@prometheus-ai/ai 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/README.md +1184 -0
  3. package/dist/types/api-registry.d.ts +30 -0
  4. package/dist/types/auth-broker/client.d.ts +66 -0
  5. package/dist/types/auth-broker/index.d.ts +6 -0
  6. package/dist/types/auth-broker/refresher.d.ts +25 -0
  7. package/dist/types/auth-broker/remote-store.d.ts +101 -0
  8. package/dist/types/auth-broker/server.d.ts +32 -0
  9. package/dist/types/auth-broker/snapshot-cache.d.ts +17 -0
  10. package/dist/types/auth-broker/types.d.ts +107 -0
  11. package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
  12. package/dist/types/auth-gateway/http.d.ts +39 -0
  13. package/dist/types/auth-gateway/index.d.ts +3 -0
  14. package/dist/types/auth-gateway/server.d.ts +36 -0
  15. package/dist/types/auth-gateway/types.d.ts +117 -0
  16. package/dist/types/auth-storage.d.ts +762 -0
  17. package/dist/types/index.d.ts +49 -0
  18. package/dist/types/model-cache.d.ts +17 -0
  19. package/dist/types/model-manager.d.ts +64 -0
  20. package/dist/types/model-thinking.d.ts +100 -0
  21. package/dist/types/models.d.ts +12 -0
  22. package/dist/types/provider-details.d.ts +24 -0
  23. package/dist/types/provider-models/bundled-references.d.ts +4 -0
  24. package/dist/types/provider-models/descriptors.d.ts +50 -0
  25. package/dist/types/provider-models/google.d.ts +24 -0
  26. package/dist/types/provider-models/index.d.ts +5 -0
  27. package/dist/types/provider-models/ollama.d.ts +7 -0
  28. package/dist/types/provider-models/openai-compat.d.ts +323 -0
  29. package/dist/types/provider-models/special.d.ts +16 -0
  30. package/dist/types/providers/amazon-bedrock.d.ts +38 -0
  31. package/dist/types/providers/anthropic-client.d.ts +99 -0
  32. package/dist/types/providers/anthropic-messages-server-schema.d.ts +465 -0
  33. package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
  34. package/dist/types/providers/anthropic-wire.d.ts +262 -0
  35. package/dist/types/providers/anthropic.d.ts +206 -0
  36. package/dist/types/providers/aws-credentials.d.ts +43 -0
  37. package/dist/types/providers/aws-eventstream.d.ts +38 -0
  38. package/dist/types/providers/aws-sigv4.d.ts +55 -0
  39. package/dist/types/providers/azure-openai-responses.d.ts +15 -0
  40. package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
  41. package/dist/types/providers/cursor.d.ts +43 -0
  42. package/dist/types/providers/error-message.d.ts +27 -0
  43. package/dist/types/providers/github-copilot-headers.d.ts +40 -0
  44. package/dist/types/providers/gitlab-duo.d.ts +27 -0
  45. package/dist/types/providers/google-auth.d.ts +24 -0
  46. package/dist/types/providers/google-gemini-cli.d.ts +81 -0
  47. package/dist/types/providers/google-gemini-headers.d.ts +18 -0
  48. package/dist/types/providers/google-shared.d.ts +171 -0
  49. package/dist/types/providers/google-types.d.ts +138 -0
  50. package/dist/types/providers/google-vertex.d.ts +7 -0
  51. package/dist/types/providers/google.d.ts +4 -0
  52. package/dist/types/providers/grammar.d.ts +1 -0
  53. package/dist/types/providers/kimi.d.ts +27 -0
  54. package/dist/types/providers/mock.d.ts +173 -0
  55. package/dist/types/providers/ollama.d.ts +6 -0
  56. package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
  57. package/dist/types/providers/openai-chat-server-schema.d.ts +817 -0
  58. package/dist/types/providers/openai-chat-server.d.ts +16 -0
  59. package/dist/types/providers/openai-codex/constants.d.ts +26 -0
  60. package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
  61. package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
  62. package/dist/types/providers/openai-codex-responses.d.ts +67 -0
  63. package/dist/types/providers/openai-completions-compat.d.ts +27 -0
  64. package/dist/types/providers/openai-completions.d.ts +54 -0
  65. package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
  66. package/dist/types/providers/openai-responses-server.d.ts +17 -0
  67. package/dist/types/providers/openai-responses-shared.d.ts +105 -0
  68. package/dist/types/providers/openai-responses.d.ts +66 -0
  69. package/dist/types/providers/prometheus-native-client.d.ts +13 -0
  70. package/dist/types/providers/prometheus-native-server.d.ts +68 -0
  71. package/dist/types/providers/register-builtins.d.ts +31 -0
  72. package/dist/types/providers/synthetic.d.ts +26 -0
  73. package/dist/types/providers/transform-messages.d.ts +12 -0
  74. package/dist/types/providers/vision-guard.d.ts +20 -0
  75. package/dist/types/providers/xai-responses.d.ts +23 -0
  76. package/dist/types/rate-limit-utils.d.ts +19 -0
  77. package/dist/types/stream.d.ts +28 -0
  78. package/dist/types/types.d.ts +819 -0
  79. package/dist/types/usage/claude.d.ts +4 -0
  80. package/dist/types/usage/gemini.d.ts +2 -0
  81. package/dist/types/usage/github-copilot.d.ts +7 -0
  82. package/dist/types/usage/google-antigravity.d.ts +2 -0
  83. package/dist/types/usage/kimi.d.ts +2 -0
  84. package/dist/types/usage/minimax-code.d.ts +2 -0
  85. package/dist/types/usage/openai-codex.d.ts +3 -0
  86. package/dist/types/usage/shared.d.ts +1 -0
  87. package/dist/types/usage/zai.d.ts +2 -0
  88. package/dist/types/usage.d.ts +260 -0
  89. package/dist/types/utils/abort.d.ts +19 -0
  90. package/dist/types/utils/abortable-iterator.d.ts +4 -0
  91. package/dist/types/utils/anthropic-auth.d.ts +35 -0
  92. package/dist/types/utils/discovery/antigravity.d.ts +61 -0
  93. package/dist/types/utils/discovery/codex.d.ts +38 -0
  94. package/dist/types/utils/discovery/cursor.d.ts +23 -0
  95. package/dist/types/utils/discovery/gemini.d.ts +25 -0
  96. package/dist/types/utils/discovery/index.d.ts +4 -0
  97. package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
  98. package/dist/types/utils/event-stream.d.ts +28 -0
  99. package/dist/types/utils/fireworks-model-id.d.ts +10 -0
  100. package/dist/types/utils/foundry.d.ts +1 -0
  101. package/dist/types/utils/http-inspector.d.ts +31 -0
  102. package/dist/types/utils/idle-iterator.d.ts +78 -0
  103. package/dist/types/utils/json-parse.d.ts +37 -0
  104. package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  105. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
  106. package/dist/types/utils/oauth/anthropic.d.ts +22 -0
  107. package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
  108. package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
  109. package/dist/types/utils/oauth/callback-server.d.ts +57 -0
  110. package/dist/types/utils/oauth/cerebras.d.ts +1 -0
  111. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
  112. package/dist/types/utils/oauth/cursor.d.ts +15 -0
  113. package/dist/types/utils/oauth/deepseek.d.ts +10 -0
  114. package/dist/types/utils/oauth/firepass.d.ts +1 -0
  115. package/dist/types/utils/oauth/fireworks.d.ts +1 -0
  116. package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
  117. package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
  118. package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
  119. package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
  120. package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
  121. package/dist/types/utils/oauth/huggingface.d.ts +19 -0
  122. package/dist/types/utils/oauth/index.d.ts +38 -0
  123. package/dist/types/utils/oauth/kagi.d.ts +17 -0
  124. package/dist/types/utils/oauth/kilo.d.ts +5 -0
  125. package/dist/types/utils/oauth/kimi.d.ts +21 -0
  126. package/dist/types/utils/oauth/litellm.d.ts +18 -0
  127. package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
  128. package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
  129. package/dist/types/utils/oauth/moonshot.d.ts +1 -0
  130. package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
  131. package/dist/types/utils/oauth/nvidia.d.ts +18 -0
  132. package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
  133. package/dist/types/utils/oauth/ollama.d.ts +18 -0
  134. package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
  135. package/dist/types/utils/oauth/opencode.d.ts +18 -0
  136. package/dist/types/utils/oauth/openrouter.d.ts +1 -0
  137. package/dist/types/utils/oauth/parallel.d.ts +17 -0
  138. package/dist/types/utils/oauth/perplexity.d.ts +9 -0
  139. package/dist/types/utils/oauth/pkce.d.ts +8 -0
  140. package/dist/types/utils/oauth/qianfan.d.ts +17 -0
  141. package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
  142. package/dist/types/utils/oauth/synthetic.d.ts +1 -0
  143. package/dist/types/utils/oauth/tavily.d.ts +17 -0
  144. package/dist/types/utils/oauth/together.d.ts +1 -0
  145. package/dist/types/utils/oauth/types.d.ts +44 -0
  146. package/dist/types/utils/oauth/venice.d.ts +18 -0
  147. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
  148. package/dist/types/utils/oauth/vllm.d.ts +16 -0
  149. package/dist/types/utils/oauth/wafer.d.ts +2 -0
  150. package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
  151. package/dist/types/utils/oauth/xiaomi.d.ts +25 -0
  152. package/dist/types/utils/oauth/zai.d.ts +18 -0
  153. package/dist/types/utils/oauth/zenmux.d.ts +1 -0
  154. package/dist/types/utils/oauth/zhipu.d.ts +18 -0
  155. package/dist/types/utils/overflow.d.ts +54 -0
  156. package/dist/types/utils/parse-bind.d.ts +23 -0
  157. package/dist/types/utils/provider-response.d.ts +3 -0
  158. package/dist/types/utils/request-debug.d.ts +29 -0
  159. package/dist/types/utils/retry-after.d.ts +3 -0
  160. package/dist/types/utils/retry.d.ts +26 -0
  161. package/dist/types/utils/schema/adapt.d.ts +24 -0
  162. package/dist/types/utils/schema/compatibility.d.ts +30 -0
  163. package/dist/types/utils/schema/dereference.d.ts +11 -0
  164. package/dist/types/utils/schema/draft.d.ts +10 -0
  165. package/dist/types/utils/schema/equality.d.ts +4 -0
  166. package/dist/types/utils/schema/fields.d.ts +49 -0
  167. package/dist/types/utils/schema/index.d.ts +13 -0
  168. package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
  169. package/dist/types/utils/schema/meta-validator.d.ts +2 -0
  170. package/dist/types/utils/schema/normalize.d.ts +93 -0
  171. package/dist/types/utils/schema/spill.d.ts +8 -0
  172. package/dist/types/utils/schema/stamps.d.ts +25 -0
  173. package/dist/types/utils/schema/types.d.ts +4 -0
  174. package/dist/types/utils/schema/wire.d.ts +53 -0
  175. package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
  176. package/dist/types/utils/sdk-stream-timeout.d.ts +33 -0
  177. package/dist/types/utils/sse-debug.d.ts +10 -0
  178. package/dist/types/utils/stream-markup-healing.d.ts +80 -0
  179. package/dist/types/utils/tool-choice.d.ts +50 -0
  180. package/dist/types/utils/validation.d.ts +17 -0
  181. package/dist/types/utils.d.ts +28 -0
  182. package/package.json +142 -0
  183. package/src/api-registry.ts +96 -0
  184. package/src/auth-broker/client.ts +358 -0
  185. package/src/auth-broker/index.ts +6 -0
  186. package/src/auth-broker/refresher.ts +117 -0
  187. package/src/auth-broker/remote-store.ts +637 -0
  188. package/src/auth-broker/server.ts +644 -0
  189. package/src/auth-broker/snapshot-cache.ts +174 -0
  190. package/src/auth-broker/types.ts +130 -0
  191. package/src/auth-broker/wire-schemas.ts +200 -0
  192. package/src/auth-gateway/http.ts +194 -0
  193. package/src/auth-gateway/index.ts +3 -0
  194. package/src/auth-gateway/server.ts +822 -0
  195. package/src/auth-gateway/types.ts +143 -0
  196. package/src/auth-storage.ts +4608 -0
  197. package/src/index.ts +54 -0
  198. package/src/model-cache.ts +129 -0
  199. package/src/model-manager.ts +469 -0
  200. package/src/model-thinking.ts +756 -0
  201. package/src/models.json +60287 -0
  202. package/src/models.json.d.ts +9 -0
  203. package/src/models.ts +56 -0
  204. package/src/prompts/turn-aborted-guidance.md +4 -0
  205. package/src/provider-details.ts +90 -0
  206. package/src/provider-models/bundled-references.ts +38 -0
  207. package/src/provider-models/descriptors.ts +364 -0
  208. package/src/provider-models/google.ts +88 -0
  209. package/src/provider-models/index.ts +5 -0
  210. package/src/provider-models/ollama.ts +153 -0
  211. package/src/provider-models/openai-compat.ts +2904 -0
  212. package/src/provider-models/special.ts +67 -0
  213. package/src/providers/amazon-bedrock.ts +873 -0
  214. package/src/providers/anthropic-client.ts +318 -0
  215. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  216. package/src/providers/anthropic-messages-server.ts +681 -0
  217. package/src/providers/anthropic-wire.ts +268 -0
  218. package/src/providers/anthropic.ts +3106 -0
  219. package/src/providers/aws-credentials.ts +501 -0
  220. package/src/providers/aws-eventstream.ts +185 -0
  221. package/src/providers/aws-sigv4.ts +218 -0
  222. package/src/providers/azure-openai-responses.ts +361 -0
  223. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  224. package/src/providers/cursor/proto/agent.proto +3526 -0
  225. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  226. package/src/providers/cursor/proto/buf.yaml +17 -0
  227. package/src/providers/cursor.ts +2621 -0
  228. package/src/providers/error-message.ts +21 -0
  229. package/src/providers/github-copilot-headers.ts +140 -0
  230. package/src/providers/gitlab-duo.ts +372 -0
  231. package/src/providers/google-auth.ts +252 -0
  232. package/src/providers/google-gemini-cli.ts +809 -0
  233. package/src/providers/google-gemini-headers.ts +41 -0
  234. package/src/providers/google-shared.ts +917 -0
  235. package/src/providers/google-types.ts +167 -0
  236. package/src/providers/google-vertex.ts +91 -0
  237. package/src/providers/google.ts +41 -0
  238. package/src/providers/grammar.ts +70 -0
  239. package/src/providers/kimi.ts +52 -0
  240. package/src/providers/mock.ts +496 -0
  241. package/src/providers/ollama.ts +644 -0
  242. package/src/providers/openai-anthropic-shim.ts +138 -0
  243. package/src/providers/openai-chat-server-schema.ts +252 -0
  244. package/src/providers/openai-chat-server.ts +647 -0
  245. package/src/providers/openai-codex/constants.ts +43 -0
  246. package/src/providers/openai-codex/request-transformer.ts +161 -0
  247. package/src/providers/openai-codex/response-handler.ts +81 -0
  248. package/src/providers/openai-codex-responses.ts +3027 -0
  249. package/src/providers/openai-completions-compat.ts +320 -0
  250. package/src/providers/openai-completions.ts +2002 -0
  251. package/src/providers/openai-responses-server-schema.ts +290 -0
  252. package/src/providers/openai-responses-server.ts +1183 -0
  253. package/src/providers/openai-responses-shared.ts +956 -0
  254. package/src/providers/openai-responses.ts +679 -0
  255. package/src/providers/prometheus-native-client.ts +228 -0
  256. package/src/providers/prometheus-native-server.ts +212 -0
  257. package/src/providers/register-builtins.ts +457 -0
  258. package/src/providers/synthetic.ts +50 -0
  259. package/src/providers/transform-messages.ts +382 -0
  260. package/src/providers/vision-guard.ts +52 -0
  261. package/src/providers/xai-responses.ts +82 -0
  262. package/src/rate-limit-utils.ts +91 -0
  263. package/src/stream.ts +1068 -0
  264. package/src/types.ts +965 -0
  265. package/src/usage/claude.ts +482 -0
  266. package/src/usage/gemini.ts +250 -0
  267. package/src/usage/github-copilot.ts +421 -0
  268. package/src/usage/google-antigravity.ts +201 -0
  269. package/src/usage/kimi.ts +271 -0
  270. package/src/usage/minimax-code.ts +31 -0
  271. package/src/usage/openai-codex.ts +503 -0
  272. package/src/usage/shared.ts +10 -0
  273. package/src/usage/zai.ts +247 -0
  274. package/src/usage.ts +185 -0
  275. package/src/utils/abort.ts +51 -0
  276. package/src/utils/abortable-iterator.ts +69 -0
  277. package/src/utils/anthropic-auth.ts +93 -0
  278. package/src/utils/discovery/antigravity.ts +261 -0
  279. package/src/utils/discovery/codex.ts +371 -0
  280. package/src/utils/discovery/cursor.ts +306 -0
  281. package/src/utils/discovery/gemini.ts +248 -0
  282. package/src/utils/discovery/index.ts +4 -0
  283. package/src/utils/discovery/openai-compatible.ts +224 -0
  284. package/src/utils/event-stream.ts +142 -0
  285. package/src/utils/fireworks-model-id.ts +30 -0
  286. package/src/utils/foundry.ts +8 -0
  287. package/src/utils/http-inspector.ts +176 -0
  288. package/src/utils/idle-iterator.ts +273 -0
  289. package/src/utils/json-parse.ts +182 -0
  290. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  291. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  292. package/src/utils/oauth/anthropic.ts +273 -0
  293. package/src/utils/oauth/api-key-login.ts +87 -0
  294. package/src/utils/oauth/api-key-validation.ts +92 -0
  295. package/src/utils/oauth/callback-server.ts +276 -0
  296. package/src/utils/oauth/cerebras.ts +16 -0
  297. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  298. package/src/utils/oauth/cursor.ts +157 -0
  299. package/src/utils/oauth/deepseek.ts +53 -0
  300. package/src/utils/oauth/firepass.ts +24 -0
  301. package/src/utils/oauth/fireworks.ts +15 -0
  302. package/src/utils/oauth/github-copilot.ts +362 -0
  303. package/src/utils/oauth/gitlab-duo.ts +123 -0
  304. package/src/utils/oauth/google-antigravity.ts +200 -0
  305. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  306. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  307. package/src/utils/oauth/huggingface.ts +62 -0
  308. package/src/utils/oauth/index.ts +502 -0
  309. package/src/utils/oauth/kagi.ts +47 -0
  310. package/src/utils/oauth/kilo.ts +87 -0
  311. package/src/utils/oauth/kimi.ts +254 -0
  312. package/src/utils/oauth/litellm.ts +47 -0
  313. package/src/utils/oauth/lm-studio.ts +38 -0
  314. package/src/utils/oauth/minimax-code.ts +80 -0
  315. package/src/utils/oauth/moonshot.ts +23 -0
  316. package/src/utils/oauth/nanogpt.ts +15 -0
  317. package/src/utils/oauth/nvidia.ts +70 -0
  318. package/src/utils/oauth/oauth.html +199 -0
  319. package/src/utils/oauth/ollama-cloud.ts +28 -0
  320. package/src/utils/oauth/ollama.ts +47 -0
  321. package/src/utils/oauth/openai-codex.ts +299 -0
  322. package/src/utils/oauth/opencode.ts +49 -0
  323. package/src/utils/oauth/openrouter.ts +20 -0
  324. package/src/utils/oauth/parallel.ts +46 -0
  325. package/src/utils/oauth/perplexity.ts +206 -0
  326. package/src/utils/oauth/pkce.ts +18 -0
  327. package/src/utils/oauth/qianfan.ts +58 -0
  328. package/src/utils/oauth/qwen-portal.ts +60 -0
  329. package/src/utils/oauth/synthetic.ts +15 -0
  330. package/src/utils/oauth/tavily.ts +46 -0
  331. package/src/utils/oauth/together.ts +16 -0
  332. package/src/utils/oauth/types.ts +102 -0
  333. package/src/utils/oauth/venice.ts +59 -0
  334. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  335. package/src/utils/oauth/vllm.ts +40 -0
  336. package/src/utils/oauth/wafer.ts +50 -0
  337. package/src/utils/oauth/xai-oauth.ts +342 -0
  338. package/src/utils/oauth/xiaomi.ts +194 -0
  339. package/src/utils/oauth/zai.ts +60 -0
  340. package/src/utils/oauth/zenmux.ts +15 -0
  341. package/src/utils/oauth/zhipu.ts +60 -0
  342. package/src/utils/overflow.ts +137 -0
  343. package/src/utils/parse-bind.ts +54 -0
  344. package/src/utils/provider-response.ts +30 -0
  345. package/src/utils/request-debug.ts +336 -0
  346. package/src/utils/retry-after.ts +110 -0
  347. package/src/utils/retry.ts +54 -0
  348. package/src/utils/schema/CONSTRAINTS.md +164 -0
  349. package/src/utils/schema/adapt.ts +36 -0
  350. package/src/utils/schema/compatibility.ts +435 -0
  351. package/src/utils/schema/dereference.ts +98 -0
  352. package/src/utils/schema/draft.ts +341 -0
  353. package/src/utils/schema/equality.ts +97 -0
  354. package/src/utils/schema/fields.ts +191 -0
  355. package/src/utils/schema/index.ts +13 -0
  356. package/src/utils/schema/json-schema-validator.ts +577 -0
  357. package/src/utils/schema/meta-validator.ts +167 -0
  358. package/src/utils/schema/normalize.ts +1588 -0
  359. package/src/utils/schema/spill.ts +43 -0
  360. package/src/utils/schema/stamps.ts +97 -0
  361. package/src/utils/schema/types.ts +10 -0
  362. package/src/utils/schema/wire.ts +293 -0
  363. package/src/utils/schema/zod-decontaminate.ts +331 -0
  364. package/src/utils/sdk-stream-timeout.ts +43 -0
  365. package/src/utils/sse-debug.ts +289 -0
  366. package/src/utils/stream-markup-healing.ts +612 -0
  367. package/src/utils/tool-choice.ts +99 -0
  368. package/src/utils/validation.ts +1024 -0
  369. package/src/utils.ts +166 -0
package/README.md ADDED
@@ -0,0 +1,1184 @@
1
+ # @prometheus-ai/ai
2
+
3
+ Unified LLM API with automatic model discovery, provider configuration, token and cost tracking, and simple context persistence and hand-off to other models mid-session.
4
+
5
+ **Note**: This library only includes models that support tool calling (function calling), as this is essential for agentic workflows.
6
+
7
+ ## Table of Contents
8
+
9
+ - [Supported Providers](#supported-providers)
10
+ - [Installation](#installation)
11
+ - [Quick Start](#quick-start)
12
+ - [Tools](#tools)
13
+ - [Defining Tools](#defining-tools)
14
+ - [Handling Tool Calls](#handling-tool-calls)
15
+ - [Streaming Tool Calls with Partial JSON](#streaming-tool-calls-with-partial-json)
16
+ - [Validating Tool Arguments](#validating-tool-arguments)
17
+ - [Complete Event Reference](#complete-event-reference)
18
+ - [Image Input](#image-input)
19
+ - [Thinking/Reasoning](#thinkingreasoning)
20
+ - [Unified Interface](#unified-interface-streamsimplecompletesimple)
21
+ - [Provider-Specific Options](#provider-specific-options-streamcomplete)
22
+ - [Streaming Thinking Content](#streaming-thinking-content)
23
+ - [Stop Reasons](#stop-reasons)
24
+ - [Error Handling](#error-handling)
25
+ - [Aborting Requests](#aborting-requests)
26
+ - [Continuing After Abort](#continuing-after-abort)
27
+ - [APIs, Models, and Providers](#apis-models-and-providers)
28
+ - [Providers and Models](#providers-and-models)
29
+ - [Querying Providers and Models](#querying-providers-and-models)
30
+ - [Custom Models](#custom-models)
31
+ - [OpenAI Compatibility Settings](#openai-compatibility-settings)
32
+ - [Type Safety](#type-safety)
33
+ - [Cross-Provider Handoffs](#cross-provider-handoffs)
34
+ - [Context Serialization](#context-serialization)
35
+ - [Browser Usage](#browser-usage)
36
+ - [Environment Variables](#environment-variables-nodejs-only)
37
+ - [Checking Environment Variables](#checking-environment-variables)
38
+ - [OAuth Providers](#oauth-providers)
39
+ - [Vertex AI (ADC)](#vertex-ai-adc)
40
+ - [CLI Login](#cli-login)
41
+ - [Programmatic OAuth](#programmatic-oauth)
42
+ - [Login Flow Example](#login-flow-example)
43
+ - [Using OAuth Tokens](#using-oauth-tokens)
44
+ - [Provider Notes](#provider-notes)
45
+ - [License](#license)
46
+
47
+ ## Supported Providers
48
+
49
+ - **OpenAI**
50
+ - **OpenAI Codex** (ChatGPT Plus/Pro subscription, requires OAuth, see below)
51
+ - **Anthropic**
52
+ - **Google**
53
+ - **Vertex AI** (Gemini via Vertex AI)
54
+ - **Mistral**
55
+ - **Groq**
56
+ - **Cerebras**
57
+ - **Together**
58
+ - **Moonshot** (requires `MOONSHOT_API_KEY`)
59
+ - **Qianfan** (requires `QIANFAN_API_KEY`)
60
+ - **NVIDIA** (requires `NVIDIA_API_KEY`)
61
+ - **NanoGPT** (requires `NANO_GPT_API_KEY`)
62
+ - **Hugging Face Inference**
63
+ - **xAI**
64
+ - **Venice** (requires `VENICE_API_KEY`)
65
+ - **Wafer Pass** (requires `WAFER_PASS_API_KEY`; flat-rate subscription, includes GLM-5.1 and Qwen3.5-397B-A17B)
66
+ - **Wafer Serverless** (requires `WAFER_SERVERLESS_API_KEY`; pay-as-you-go)
67
+ - **OpenRouter**
68
+ - **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
69
+ - **LiteLLM** (requires `LITELLM_API_KEY`)
70
+ - **zAI** (requires `ZAI_API_KEY`)
71
+ - **MiniMax Coding Plan** (requires `MINIMAX_CODE_API_KEY` or `MINIMAX_CODE_CN_API_KEY`)
72
+ - **Xiaomi MiMo** (requires `XIAOMI_API_KEY`)
73
+ - **ZenMux** (requires `ZENMUX_API_KEY`)
74
+ - **Qwen Portal** (supports `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY`)
75
+ - **Cloudflare AI Gateway** (requires `CLOUDFLARE_AI_GATEWAY_API_KEY` and provider-specific gateway base URL)
76
+ - **Ollama** (local OpenAI-compatible runtime; optional `OLLAMA_API_KEY`)
77
+ - **Ollama Cloud** (hosted native Ollama API; requires `OLLAMA_CLOUD_API_KEY`)
78
+ - **llama.cpp** (local OpenAI and Anthropic compatible inference server)
79
+ - **vLLM** (OpenAI-compatible server; `VLLM_API_KEY` for secured deployments)
80
+ - **GitHub Copilot** (requires OAuth, see below)
81
+ - **Google Gemini CLI** (requires OAuth, see below)
82
+ - **Antigravity** (requires OAuth, see below)
83
+ - **Any OpenAI-compatible API**: LM Studio, custom proxies, etc.
84
+
85
+ ## Installation
86
+
87
+ ```bash
88
+ npm install @prometheus-ai/ai
89
+ ```
90
+
91
+ ## Quick Start
92
+
93
+ ```typescript
94
+ import { z, getModel, stream, complete, Context, Tool } from "@prometheus-ai/ai";
95
+
96
+ // Fully typed with auto-complete support for both providers and models
97
+ const model = getModel("openai", "gpt-4o-mini");
98
+
99
+ // Define tools with Zod schemas for type safety and validation
100
+ const tools: Tool[] = [
101
+ {
102
+ name: "get_time",
103
+ description: "Get the current time",
104
+ parameters: z.object({
105
+ timezone: z
106
+ .string()
107
+ .optional()
108
+ .describe("Optional timezone (e.g., America/New_York)"),
109
+ }),
110
+ },
111
+ ];
112
+
113
+ // Build a conversation context (easily serializable and transferable between models)
114
+ const context: Context = {
115
+ systemPrompt: ["You are a helpful assistant."],
116
+ messages: [{ role: "user", content: "What time is it?" }],
117
+ tools,
118
+ };
119
+
120
+ // Option 1: Streaming with all event types
121
+ const s = stream(model, context);
122
+
123
+ for await (const event of s) {
124
+ switch (event.type) {
125
+ case "start":
126
+ console.log(`Starting with ${event.partial.model}`);
127
+ break;
128
+ case "text_start":
129
+ console.log("\n[Text started]");
130
+ break;
131
+ case "text_delta":
132
+ process.stdout.write(event.delta);
133
+ break;
134
+ case "text_end":
135
+ console.log("\n[Text ended]");
136
+ break;
137
+ case "thinking_start":
138
+ console.log("[Model is thinking...]");
139
+ break;
140
+ case "thinking_delta":
141
+ process.stdout.write(event.delta);
142
+ break;
143
+ case "thinking_end":
144
+ console.log("[Thinking complete]");
145
+ break;
146
+ case "toolcall_start":
147
+ console.log(`\n[Tool call started: index ${event.contentIndex}]`);
148
+ break;
149
+ case "toolcall_delta":
150
+ // Partial tool arguments are being streamed
151
+ const partialCall = event.partial.content[event.contentIndex];
152
+ if (partialCall.type === "toolCall") {
153
+ console.log(`[Streaming args for ${partialCall.name}]`);
154
+ }
155
+ break;
156
+ case "toolcall_end":
157
+ console.log(`\nTool called: ${event.toolCall.name}`);
158
+ console.log(`Arguments: ${JSON.stringify(event.toolCall.arguments)}`);
159
+ break;
160
+ case "done":
161
+ console.log(`\nFinished: ${event.reason}`);
162
+ break;
163
+ case "error":
164
+ console.error(`Error: ${event.error}`);
165
+ break;
166
+ }
167
+ }
168
+
169
+ // Get the final message after streaming, add it to the context
170
+ const finalMessage = await s.result();
171
+ context.messages.push(finalMessage);
172
+
173
+ // Handle tool calls if any
174
+ const toolCalls = finalMessage.content.filter((b) => b.type === "toolCall");
175
+ for (const call of toolCalls) {
176
+ // Execute the tool
177
+ const result =
178
+ call.name === "get_time"
179
+ ? new Date().toLocaleString("en-US", {
180
+ timeZone: call.arguments.timezone || "UTC",
181
+ dateStyle: "full",
182
+ timeStyle: "long",
183
+ })
184
+ : "Unknown tool";
185
+
186
+ // Add tool result to context (supports text and images)
187
+ context.messages.push({
188
+ role: "toolResult",
189
+ toolCallId: call.id,
190
+ toolName: call.name,
191
+ content: [{ type: "text", text: result }],
192
+ isError: false,
193
+ timestamp: Date.now(),
194
+ });
195
+ }
196
+
197
+ // Continue if there were tool calls
198
+ if (toolCalls.length > 0) {
199
+ const continuation = await complete(model, context);
200
+ context.messages.push(continuation);
201
+ console.log("After tool execution:", continuation.content);
202
+ }
203
+
204
+ console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`);
205
+ console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`);
206
+
207
+ // Option 2: Get complete response without streaming
208
+ const response = await complete(model, context);
209
+
210
+ for (const block of response.content) {
211
+ if (block.type === "text") {
212
+ console.log(block.text);
213
+ } else if (block.type === "toolCall") {
214
+ console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`);
215
+ }
216
+ }
217
+ ```
218
+
219
+ ## Tools
220
+
221
+ Tools enable LLMs to interact with external systems. This library uses **Zod** schemas for type-safe tool definitions with automatic validation. Schemas are converted to JSON Schema for providers as needed.
222
+
223
+ ### Defining Tools
224
+
225
+ ```typescript
226
+ import { z, Tool } from "@prometheus-ai/ai";
227
+
228
+ // Define tool parameters with Zod
229
+ const weatherTool: Tool = {
230
+ name: "get_weather",
231
+ description: "Get current weather for a location",
232
+ parameters: z.object({
233
+ location: z.string().describe("City name or coordinates"),
234
+ units: z.enum(["celsius", "fahrenheit"]).default("celsius"),
235
+ }),
236
+ };
237
+
238
+ const bookMeetingTool: Tool = {
239
+ name: "book_meeting",
240
+ description: "Schedule a meeting",
241
+ parameters: z.object({
242
+ title: z.string().min(1),
243
+ startTime: z.string().describe("ISO 8601 date-time"),
244
+ endTime: z.string().describe("ISO 8601 date-time"),
245
+ attendees: z.array(z.email()).min(1),
246
+ }),
247
+ };
248
+ ```
249
+
250
+ ### Handling Tool Calls
251
+
252
+ Tool results use content blocks and can include both text and images:
253
+
254
+ ```typescript
255
+ import * as fs from "node:fs";
256
+
257
+ const context: Context = {
258
+ messages: [{ role: "user", content: "What is the weather in London?" }],
259
+ tools: [weatherTool],
260
+ };
261
+
262
+ const response = await complete(model, context);
263
+
264
+ // Check for tool calls in the response
265
+ for (const block of response.content) {
266
+ if (block.type === "toolCall") {
267
+ // Execute your tool with the arguments
268
+ // See "Validating Tool Arguments" section for validation
269
+ const result = await executeWeatherApi(block.arguments);
270
+
271
+ // Add tool result with text content
272
+ context.messages.push({
273
+ role: "toolResult",
274
+ toolCallId: block.id,
275
+ toolName: block.name,
276
+ content: [{ type: "text", text: JSON.stringify(result) }],
277
+ isError: false,
278
+ timestamp: Date.now(),
279
+ });
280
+ }
281
+ }
282
+
283
+ // Tool results can also include images (for vision-capable models)
284
+ const imageBuffer = fs.readFileSync("chart.png");
285
+ context.messages.push({
286
+ role: "toolResult",
287
+ toolCallId: "tool_xyz",
288
+ toolName: "generate_chart",
289
+ content: [
290
+ { type: "text", text: "Generated chart showing temperature trends" },
291
+ { type: "image", data: imageBuffer.toBase64(), mimeType: "image/png" },
292
+ ],
293
+ isError: false,
294
+ timestamp: Date.now(),
295
+ });
296
+ ```
297
+
298
+ ### Streaming Tool Calls with Partial JSON
299
+
300
+ During streaming, tool call arguments are progressively parsed as they arrive. This enables real-time UI updates before the complete arguments are available:
301
+
302
+ ```typescript
303
+ const s = stream(model, context);
304
+
305
+ for await (const event of s) {
306
+ if (event.type === "toolcall_delta") {
307
+ const toolCall = event.partial.content[event.contentIndex];
308
+
309
+ // toolCall.arguments contains partially parsed JSON during streaming
310
+ // This allows for progressive UI updates
311
+ if (toolCall.type === "toolCall" && toolCall.arguments) {
312
+ // BE DEFENSIVE: arguments may be incomplete
313
+ // Example: Show file path being written even before content is complete
314
+ if (toolCall.name === "write_file" && toolCall.arguments.path) {
315
+ console.log(`Writing to: ${toolCall.arguments.path}`);
316
+
317
+ // Content might be partial or missing
318
+ if (toolCall.arguments.content) {
319
+ console.log(`Content preview: ${toolCall.arguments.content.substring(0, 100)}...`);
320
+ }
321
+ }
322
+ }
323
+ }
324
+
325
+ if (event.type === "toolcall_end") {
326
+ // Here toolCall.arguments is complete (but not yet validated)
327
+ const toolCall = event.toolCall;
328
+ console.log(`Tool completed: ${toolCall.name}`, toolCall.arguments);
329
+ }
330
+ }
331
+ ```
332
+
333
+ **Important notes about partial tool arguments:**
334
+
335
+ - During `toolcall_delta` events, `arguments` contains the best-effort parse of partial JSON
336
+ - Fields may be missing or incomplete - always check for existence before use
337
+ - String values may be truncated mid-word
338
+ - Arrays may be incomplete
339
+ - Nested objects may be partially populated
340
+ - At minimum, `arguments` will be an empty object `{}`, never `undefined`
341
+ - The Google provider does not support function call streaming. Instead, you will receive a single `toolcall_delta` event with the full arguments.
342
+
343
+ ### Validating Tool Arguments
344
+
345
+ When using `agentLoop`, tool arguments are automatically validated against your Zod parameter schemas before execution. If validation fails, the error is returned to the model as a tool result, allowing it to retry.
346
+
347
+ When implementing your own tool execution loop with `stream()` or `complete()`, use `validateToolCall` to validate arguments before passing them to your tools:
348
+
349
+ ```typescript
350
+ import { stream, validateToolCall, Tool } from "@prometheus-ai/ai";
351
+
352
+ const tools: Tool[] = [weatherTool, calculatorTool];
353
+ const s = stream(model, { messages, tools });
354
+
355
+ for await (const event of s) {
356
+ if (event.type === "toolcall_end") {
357
+ const toolCall = event.toolCall;
358
+
359
+ try {
360
+ // Validate arguments against the tool's schema (throws on invalid args)
361
+ const validatedArgs = validateToolCall(tools, toolCall);
362
+ const result = await executeMyTool(toolCall.name, validatedArgs);
363
+ // ... add tool result to context
364
+ } catch (error) {
365
+ // Validation failed - return error as tool result so model can retry
366
+ context.messages.push({
367
+ role: "toolResult",
368
+ toolCallId: toolCall.id,
369
+ toolName: toolCall.name,
370
+ content: [{ type: "text", text: error.message }],
371
+ isError: true,
372
+ timestamp: Date.now(),
373
+ });
374
+ }
375
+ }
376
+ }
377
+ ```
378
+
379
+ ### Complete Event Reference
380
+
381
+ All streaming events emitted during assistant message generation:
382
+
383
+ | Event Type | Description | Key Properties |
384
+ | ---------------- | ------------------------ | ------------------------------------------------------------------------------------------- |
385
+ | `start` | Stream begins | `partial`: Initial assistant message structure |
386
+ | `text_start` | Text block starts | `contentIndex`: Position in content array |
387
+ | `text_delta` | Text chunk received | `delta`: New text, `contentIndex`: Position |
388
+ | `text_end` | Text block complete | `content`: Full text, `contentIndex`: Position |
389
+ | `thinking_start` | Thinking block starts | `contentIndex`: Position in content array |
390
+ | `thinking_delta` | Thinking chunk received | `delta`: New text, `contentIndex`: Position |
391
+ | `thinking_end` | Thinking block complete | `content`: Full thinking, `contentIndex`: Position |
392
+ | `toolcall_start` | Tool call begins | `contentIndex`: Position in content array |
393
+ | `toolcall_delta` | Tool arguments streaming | `delta`: JSON chunk, `partial.content[contentIndex].arguments`: Partial parsed args |
394
+ | `toolcall_end` | Tool call complete | `toolCall`: Complete validated tool call with `id`, `name`, `arguments` |
395
+ | `done` | Stream complete | `reason`: Stop reason ("stop", "length", "toolUse"), `message`: Final assistant message |
396
+ | `error` | Error occurred | `reason`: Error type ("error" or "aborted"), `error`: AssistantMessage with partial content |
397
+
398
+ ## Image Input
399
+
400
+ Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored.
401
+
402
+ ```typescript
403
+ import * as fs from "node:fs";
404
+ import { getModel, complete } from "@prometheus-ai/ai";
405
+
406
+ const model = getModel("openai", "gpt-4o-mini");
407
+
408
+ // Check if model supports images
409
+ if (model.input.includes("image")) {
410
+ console.log("Model supports vision");
411
+ }
412
+
413
+ const imageBuffer = fs.readFileSync("image.png");
414
+ const base64Image = imageBuffer.toBase64();
415
+
416
+ const response = await complete(model, {
417
+ messages: [
418
+ {
419
+ role: "user",
420
+ content: [
421
+ { type: "text", text: "What is in this image?" },
422
+ { type: "image", data: base64Image, mimeType: "image/png" },
423
+ ],
424
+ },
425
+ ],
426
+ });
427
+
428
+ // Access the response
429
+ for (const block of response.content) {
430
+ if (block.type === "text") {
431
+ console.log(block.text);
432
+ }
433
+ }
434
+ ```
435
+
436
+ ## Thinking/Reasoning
437
+
438
+ Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored.
439
+
440
+ ### Unified Interface (streamSimple/completeSimple)
441
+
442
+ ```typescript
443
+ import { getModel, streamSimple, completeSimple } from "@prometheus-ai/ai";
444
+
445
+ // Many models across providers support thinking/reasoning
446
+ const model = getModel("anthropic", "claude-sonnet-4-20250514");
447
+ // or getModel('openai', 'gpt-5-mini');
448
+ // or getModel('google', 'gemini-2.5-flash');
449
+ // or getModel('xai', 'grok-code-fast-1');
450
+ // or getModel('groq', 'openai/gpt-oss-20b');
451
+ // or getModel('cerebras', 'gpt-oss-120b');
452
+ // or getModel('openrouter', 'z-ai/glm-4.5v');
453
+
454
+ // Check if model supports reasoning
455
+ if (model.reasoning) {
456
+ console.log("Model supports reasoning/thinking");
457
+ }
458
+
459
+ // Use the simplified reasoning option
460
+ const response = await completeSimple(
461
+ model,
462
+ {
463
+ messages: [{ role: "user", content: "Solve: 2x + 5 = 13" }],
464
+ },
465
+ {
466
+ reasoning: "medium", // 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' (xhigh maps to high on non-OpenAI providers)
467
+ }
468
+ );
469
+
470
+ // Access thinking and text blocks
471
+ for (const block of response.content) {
472
+ if (block.type === "thinking") {
473
+ console.log("Thinking:", block.thinking);
474
+ } else if (block.type === "text") {
475
+ console.log("Response:", block.text);
476
+ }
477
+ }
478
+ ```
479
+
480
+ ### Provider-Specific Options (stream/complete)
481
+
482
+ For fine-grained control, use the provider-specific options:
483
+
484
+ ```typescript
485
+ import { getModel, complete } from "@prometheus-ai/ai";
486
+
487
+ // OpenAI Reasoning (o1, o3, gpt-5)
488
+ const openaiModel = getModel("openai", "gpt-5-mini");
489
+ await complete(openaiModel, context, {
490
+ reasoningEffort: "medium",
491
+ reasoningSummary: "detailed", // OpenAI Responses API only
492
+ });
493
+
494
+ // Anthropic Thinking (Claude Sonnet 4)
495
+ const anthropicModel = getModel("anthropic", "claude-sonnet-4-20250514");
496
+ await complete(anthropicModel, context, {
497
+ thinkingEnabled: true,
498
+ thinkingBudgetTokens: 8192, // Optional token limit
499
+ });
500
+
501
+ // Google Gemini Thinking
502
+ const googleModel = getModel("google", "gemini-2.5-flash");
503
+ await complete(googleModel, context, {
504
+ thinking: {
505
+ enabled: true,
506
+ budgetTokens: 8192, // -1 for dynamic, 0 to disable
507
+ },
508
+ });
509
+ ```
510
+
511
+ ### Streaming Thinking Content
512
+
513
+ When streaming, thinking content is delivered through specific events:
514
+
515
+ ```typescript
516
+ const s = streamSimple(model, context, { reasoning: "high" });
517
+
518
+ for await (const event of s) {
519
+ switch (event.type) {
520
+ case "thinking_start":
521
+ console.log("[Model started thinking]");
522
+ break;
523
+ case "thinking_delta":
524
+ process.stdout.write(event.delta); // Stream thinking content
525
+ break;
526
+ case "thinking_end":
527
+ console.log("\n[Thinking complete]");
528
+ break;
529
+ }
530
+ }
531
+ ```
532
+
533
+ ## Stop Reasons
534
+
535
+ Every `AssistantMessage` includes a `stopReason` field that indicates how the generation ended:
536
+
537
+ - `"stop"` - Normal completion, the model finished its response
538
+ - `"length"` - Output hit the maximum token limit
539
+ - `"toolUse"` - Model is calling tools and expects tool results
540
+ - `"error"` - An error occurred during generation
541
+ - `"aborted"` - Request was cancelled via abort signal
542
+
543
+ ## Error Handling
544
+
545
+ When a request ends with an error (including aborts and tool call validation errors), the streaming API emits an error event:
546
+
547
+ ```typescript
548
+ // In streaming
549
+ for await (const event of stream) {
550
+ if (event.type === "error") {
551
+ // event.reason is either "error" or "aborted"
552
+ // event.error is the AssistantMessage with partial content
553
+ console.error(`Error (${event.reason}):`, event.error.errorMessage);
554
+ console.log("Partial content:", event.error.content);
555
+ }
556
+ }
557
+
558
+ // The final message will have the error details
559
+ const message = await stream.result();
560
+ if (message.stopReason === "error" || message.stopReason === "aborted") {
561
+ console.error("Request failed:", message.errorMessage);
562
+ // message.content contains any partial content received before the error
563
+ // message.usage contains partial token counts and costs
564
+ }
565
+ ```
566
+
567
+ ### Aborting Requests
568
+
569
+ The abort signal allows you to cancel in-progress requests. Aborted requests have `stopReason === 'aborted'`:
570
+
571
+ ```typescript
572
+ import { getModel, stream } from "@prometheus-ai/ai";
573
+
574
+ const model = getModel("openai", "gpt-4o-mini");
575
+
576
+ // Abort after 2 seconds
577
+ const signal = AbortSignal.timeout(2000);
578
+
579
+ const s = stream(
580
+ model,
581
+ {
582
+ messages: [{ role: "user", content: "Write a long story" }],
583
+ },
584
+ {
585
+ signal,
586
+ }
587
+ );
588
+
589
+ for await (const event of s) {
590
+ if (event.type === "text_delta") {
591
+ process.stdout.write(event.delta);
592
+ } else if (event.type === "error") {
593
+ // event.reason tells you if it was "error" or "aborted"
594
+ console.log(`${event.reason === "aborted" ? "Aborted" : "Error"}:`, event.error.errorMessage);
595
+ }
596
+ }
597
+
598
+ // Get results (may be partial if aborted)
599
+ const response = await s.result();
600
+ if (response.stopReason === "aborted") {
601
+ console.log("Request was aborted:", response.errorMessage);
602
+ console.log("Partial content received:", response.content);
603
+ console.log("Tokens used:", response.usage);
604
+ }
605
+ ```
606
+
607
+ ### Continuing After Abort
608
+
609
+ Aborted messages can be added to the conversation context and continued in subsequent requests:
610
+
611
+ ```typescript
612
+ const context = {
613
+ messages: [{ role: "user", content: "Explain quantum computing in detail" }],
614
+ };
615
+
616
+ // First request gets aborted after 2 seconds
617
+ const controller1 = new AbortController();
618
+ setTimeout(() => controller1.abort(), 2000);
619
+
620
+ const partial = await complete(model, context, { signal: controller1.signal });
621
+
622
+ // Add the partial response to context
623
+ context.messages.push(partial);
624
+ context.messages.push({ role: "user", content: "Please continue" });
625
+
626
+ // Continue the conversation
627
+ const continuation = await complete(model, context);
628
+ ```
629
+
630
+ ### Common Stream Options
631
+
632
+ All providers accept the base `StreamOptions` (in addition to provider-specific options):
633
+
634
+ - `apiKey`: Override the provider API key
635
+ - `headers`: Extra request headers merged on top of model-defined headers
636
+ - `sessionId`: Provider-specific session identifier (prompt caching/routing)
637
+ - `signal`: Abort in-flight requests
638
+ - `onPayload`: Callback invoked with the provider request payload just before sending
639
+
640
+ Example:
641
+
642
+ ```typescript
643
+ const response = await complete(model, context, {
644
+ apiKey: "sk-live",
645
+ headers: { "X-Debug-Trace": "true" },
646
+ onPayload: (payload) => {
647
+ console.log("request payload", payload);
648
+ },
649
+ });
650
+ ```
651
+
652
+ ## APIs, Models, and Providers
653
+
654
+ The library implements 4 API interfaces, each with its own streaming function and options:
655
+
656
+ - **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`)
657
+ - **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`)
658
+ - **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`)
659
+ - **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`)
660
+
661
+ ### Providers and Models
662
+
663
+ A **provider** offers models through a specific API. For example:
664
+
665
+ - **Anthropic** models use the `anthropic-messages` API
666
+ - **Google** models use the `google-generative-ai` API
667
+ - **OpenAI** models use the `openai-responses` API
668
+ - **Mistral, xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible)
669
+
670
+ ### Querying Providers and Models
671
+
672
+ ```typescript
673
+ import { getProviders, getModels, getModel } from "@prometheus-ai/ai";
674
+
675
+ // Get all available providers
676
+ const providers = getProviders();
677
+ console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...]
678
+
679
+ // Get all models from a provider (fully typed)
680
+ const anthropicModels = getModels("anthropic");
681
+ for (const model of anthropicModels) {
682
+ console.log(`${model.id}: ${model.name}`);
683
+ console.log(` API: ${model.api}`); // 'anthropic-messages'
684
+ console.log(` Context: ${model.contextWindow} tokens`);
685
+ console.log(` Vision: ${model.input.includes("image")}`);
686
+ console.log(` Reasoning: ${model.reasoning}`);
687
+ }
688
+
689
+ // Get a specific model (both provider and model ID are auto-completed in IDEs)
690
+ const model = getModel("openai", "gpt-4o-mini");
691
+ console.log(`Using ${model.name} via ${model.api} API`);
692
+ ```
693
+
694
+ ### Custom Models
695
+
696
+ You can create custom models for local inference servers or custom endpoints.
697
+
698
+ For local Ollama, `OLLAMA_API_KEY` is optional and mainly needed for authenticated/self-hosted gateways. `ollama` remains the local OpenAI-compatible runtime integration.
699
+
700
+ ```typescript
701
+ import { Model, stream } from "@prometheus-ai/ai";
702
+
703
+ // Example: local Ollama using the OpenAI-compatible API
704
+ const ollamaModel: Model<"openai-completions"> = {
705
+ id: "llama-3.1-8b",
706
+ name: "Llama 3.1 8B (Ollama)",
707
+ api: "openai-completions",
708
+ provider: "ollama",
709
+ baseUrl: "http://localhost:11434/v1",
710
+ reasoning: false,
711
+ input: ["text"],
712
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
713
+ contextWindow: 128000,
714
+ maxTokens: 32000,
715
+ };
716
+
717
+ const localResponse = await stream(ollamaModel, context, {
718
+ apiKey: process.env.OLLAMA_API_KEY, // Optional; local Ollama usually runs without auth
719
+ });
720
+
721
+ // Example: Ollama Cloud using the native /api/chat transport
722
+ const ollamaCloudModel: Model<"ollama-chat"> = {
723
+ id: "gpt-oss:120b",
724
+ name: "GPT OSS 120B (Ollama Cloud)",
725
+ api: "ollama-chat",
726
+ provider: "ollama-cloud",
727
+ baseUrl: "https://ollama.com",
728
+ reasoning: true,
729
+ input: ["text", "image"],
730
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
731
+ contextWindow: 262144,
732
+ maxTokens: 8192,
733
+ };
734
+
735
+ const cloudResponse = await stream(ollamaCloudModel, context, {
736
+ apiKey: process.env.OLLAMA_CLOUD_API_KEY,
737
+ });
738
+
739
+ // Example: LiteLLM proxy with explicit compat settings
740
+ const litellmModel: Model<"openai-completions"> = {
741
+ id: "gpt-4o",
742
+ name: "GPT-4o (via LiteLLM)",
743
+ api: "openai-completions",
744
+ provider: "litellm",
745
+ baseUrl: "http://localhost:4000/v1",
746
+ reasoning: false,
747
+ input: ["text", "image"],
748
+ cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
749
+ contextWindow: 128000,
750
+ maxTokens: 16384,
751
+ compat: {
752
+ supportsStore: false, // LiteLLM doesn't support the store field
753
+ },
754
+ };
755
+
756
+ // Example: Custom endpoint with headers (bypassing Cloudflare bot detection)
757
+ const proxyModel: Model<"anthropic-messages"> = {
758
+ id: "claude-sonnet-4",
759
+ name: "Claude Sonnet 4 (Proxied)",
760
+ api: "anthropic-messages",
761
+ provider: "custom-proxy",
762
+ baseUrl: "https://proxy.example.com/v1",
763
+ reasoning: true,
764
+ input: ["text", "image"],
765
+ cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
766
+ contextWindow: 200000,
767
+ maxTokens: 8192,
768
+ headers: {
769
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
770
+ "X-Custom-Auth": "bearer-token-here",
771
+ },
772
+ };
773
+ ```
774
+
775
+ ### OpenAI Compatibility Settings
776
+
777
+ The `openai-completions` API is implemented by many providers with minor differences. By default, the library auto-detects compatibility settings based on `baseUrl` for known providers (Cerebras, xAI, Mistral, Chutes, etc.). For custom proxies or unknown endpoints, you can override these settings via the `compat` field:
778
+
779
+ ```typescript
780
+ interface OpenAICompat {
781
+ supportsStore?: boolean; // Whether provider supports the `store` field (default: true)
782
+ supportsDeveloperRole?: boolean; // Whether provider supports `developer` role vs `system` (default: true)
783
+ supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true)
784
+ maxTokensField?: "max_completion_tokens" | "max_tokens"; // Which field name to use (default: max_completion_tokens)
785
+ extraBody?: Record<string, unknown>; // Extra request-body fields for custom proxy routing or provider-specific options
786
+ }
787
+ ```
788
+
789
+ If `compat` is not set, the library falls back to URL-based detection. If `compat` is partially set, unspecified fields use the detected defaults. This is useful for:
790
+
791
+ - **LiteLLM proxies**: May not support `store` field
792
+ - **Custom inference servers**: May use non-standard field names
793
+ - **Self-hosted endpoints**: May have different feature support
794
+
795
+ ### Type Safety
796
+
797
+ Models are typed by their API, ensuring type-safe options:
798
+
799
+ ```typescript
800
+ // TypeScript knows this is an Anthropic model
801
+ const claude = getModel("anthropic", "claude-sonnet-4-20250514");
802
+
803
+ // So these options are type-checked for AnthropicOptions
804
+ await stream(claude, context, {
805
+ thinkingEnabled: true, // ✓ Valid for anthropic-messages
806
+ thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages
807
+ // reasoningEffort: 'high' // ✗ TypeScript error: not valid for anthropic-messages
808
+ });
809
+ ```
810
+
811
+ ## Cross-Provider Handoffs
812
+
813
+ The library supports seamless handoffs between different LLM providers within the same conversation. This allows you to switch models mid-conversation while preserving context, including thinking blocks, tool calls, and tool results.
814
+
815
+ ### How It Works
816
+
817
+ When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
818
+
819
+ - **User and tool result messages** are passed through unchanged
820
+ - **Assistant messages from the same provider/API** are preserved as-is
821
+ - **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
822
+ - **Tool calls and regular text** are preserved unchanged
823
+
824
+ ### Example: Multi-Provider Conversation
825
+
826
+ ```typescript
827
+ import { getModel, complete, Context } from "@prometheus-ai/ai";
828
+
829
+ // Start with Claude
830
+ const claude = getModel("anthropic", "claude-sonnet-4-20250514");
831
+ const context: Context = {
832
+ messages: [],
833
+ };
834
+
835
+ context.messages.push({ role: "user", content: "What is 25 * 18?" });
836
+ const claudeResponse = await complete(claude, context, {
837
+ thinkingEnabled: true,
838
+ });
839
+ context.messages.push(claudeResponse);
840
+
841
+ // Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
842
+ const gpt5 = getModel("openai", "gpt-5-mini");
843
+ context.messages.push({ role: "user", content: "Is that calculation correct?" });
844
+ const gptResponse = await complete(gpt5, context);
845
+ context.messages.push(gptResponse);
846
+
847
+ // Switch to Gemini
848
+ const gemini = getModel("google", "gemini-2.5-flash");
849
+ context.messages.push({ role: "user", content: "What was the original question?" });
850
+ const geminiResponse = await complete(gemini, context);
851
+ ```
852
+
853
+ ### Provider Compatibility
854
+
855
+ All providers can handle messages from other providers, including:
856
+
857
+ - Text content
858
+ - Tool calls and tool results (including images in tool results)
859
+ - Thinking/reasoning blocks (transformed to tagged text for cross-provider compatibility)
860
+ - Aborted messages with partial content
861
+
862
+ This enables flexible workflows where you can:
863
+
864
+ - Start with a fast model for initial responses
865
+ - Switch to a more capable model for complex reasoning
866
+ - Use specialized models for specific tasks
867
+ - Maintain conversation continuity across provider outages
868
+
869
+ ## Context Serialization
870
+
871
+ The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
872
+
873
+ ```typescript
874
+ import { Context, getModel, complete } from "@prometheus-ai/ai";
875
+
876
+ // Create and use a context
877
+ const context: Context = {
878
+ systemPrompt: ["You are a helpful assistant."],
879
+ messages: [{ role: "user", content: "What is TypeScript?" }],
880
+ };
881
+
882
+ const model = getModel("openai", "gpt-4o-mini");
883
+ const response = await complete(model, context);
884
+ context.messages.push(response);
885
+
886
+ // Serialize the entire context
887
+ const serialized = JSON.stringify(context);
888
+ console.log("Serialized context size:", serialized.length, "bytes");
889
+
890
+ // Save to database, localStorage, file, etc.
891
+ localStorage.setItem("conversation", serialized);
892
+
893
+ // Later: deserialize and continue the conversation
894
+ const restored: Context = JSON.parse(localStorage.getItem("conversation")!);
895
+ restored.messages.push({ role: "user", content: "Tell me more about its type system" });
896
+
897
+ // Continue with any model
898
+ const newModel = getModel("anthropic", "claude-haiku-4-5-20251001");
899
+ const continuation = await complete(newModel, restored);
900
+ ```
901
+
902
+ > **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized.
903
+
904
+ ## Browser Usage
905
+
906
+ The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
907
+
908
+ ```typescript
909
+ import { getModel, complete } from "@prometheus-ai/ai";
910
+
911
+ // API key must be passed explicitly in browser
912
+ const model = getModel("anthropic", "claude-haiku-4-5-20251001");
913
+
914
+ const response = await complete(
915
+ model,
916
+ {
917
+ messages: [{ role: "user", content: "Hello!" }],
918
+ },
919
+ {
920
+ apiKey: "your-api-key",
921
+ }
922
+ );
923
+ ```
924
+
925
+ > **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
926
+
927
+ ### Environment Variables (Node.js only)
928
+
929
+ In Node.js environments, you can set environment variables to avoid passing API keys:
930
+
931
+ | Provider | Environment Variable(s) |
932
+ | -------------- | ---------------------------------------------------------------------------- |
933
+ | OpenAI | `OPENAI_API_KEY` |
934
+ | Anthropic | `ANTHROPIC_API_KEY` or `ANTHROPIC_OAUTH_TOKEN` (or `ANTHROPIC_FOUNDRY_API_KEY` when `CLAUDE_CODE_USE_FOUNDRY=true`) |
935
+ | Google | `GEMINI_API_KEY` |
936
+ | Vertex AI | `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) + `GOOGLE_CLOUD_LOCATION` + ADC |
937
+ | Mistral | `MISTRAL_API_KEY` |
938
+ | Groq | `GROQ_API_KEY` |
939
+ | Cerebras | `CEREBRAS_API_KEY` |
940
+ | Together | `TOGETHER_API_KEY` |
941
+ | Qianfan | `QIANFAN_API_KEY` |
942
+ | Hugging Face | `HUGGINGFACE_HUB_TOKEN` or `HF_TOKEN` |
943
+ | Synthetic | `SYNTHETIC_API_KEY` |
944
+ | NVIDIA | `NVIDIA_API_KEY` |
945
+ | NanoGPT | `NANO_GPT_API_KEY` |
946
+ | Venice | `VENICE_API_KEY` |
947
+ | Moonshot | `MOONSHOT_API_KEY` |
948
+ | xAI | `XAI_API_KEY` |
949
+ | OpenRouter | `OPENROUTER_API_KEY` |
950
+ | LiteLLM | `LITELLM_API_KEY` |
951
+ | Ollama | `OLLAMA_API_KEY` (optional for local deployments) |
952
+ | Ollama Cloud | `OLLAMA_CLOUD_API_KEY` |
953
+ | Qwen Portal | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY` |
954
+ | zAI | `ZAI_API_KEY` |
955
+ | MiniMax Code | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
956
+ | Xiaomi MiMo | `XIAOMI_API_KEY` |
957
+ | ZenMux | `ZENMUX_API_KEY` |
958
+ | vLLM | `VLLM_API_KEY` |
959
+ | Cloudflare AI Gateway | `CLOUDFLARE_AI_GATEWAY_API_KEY` |
960
+ | GitHub Copilot | `COPILOT_GITHUB_TOKEN` or `GH_TOKEN` or `GITHUB_TOKEN` |
961
+
962
+ For Cloudflare AI Gateway models, use provider base URL format
963
+ `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic`.
964
+
965
+ For Anthropic Foundry routing, set `CLAUDE_CODE_USE_FOUNDRY=true` plus:
966
+ `FOUNDRY_BASE_URL`, `ANTHROPIC_FOUNDRY_API_KEY`, optional `ANTHROPIC_CUSTOM_HEADERS`,
967
+ and optional mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS`).
968
+
969
+ Provider endpoint defaults for the current OpenAI-compatible integrations:
970
+
971
+ - Together: `https://api.together.xyz/v1`
972
+ - Moonshot: `https://api.moonshot.ai/v1`
973
+ - Qianfan: `https://qianfan.baidubce.com/v2`
974
+ - NVIDIA: `https://integrate.api.nvidia.com/v1`
975
+ - NanoGPT: `https://nano-gpt.com/api/v1`
976
+ - Hugging Face Inference: `https://router.huggingface.co/v1`
977
+ - Venice: `https://api.venice.ai/api/v1`
978
+ - Xiaomi MiMo: `https://api.xiaomimimo.com/anthropic`
979
+ - ZenMux (OpenAI): `https://zenmux.ai/api/v1`
980
+ - ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
981
+ - vLLM: `http://127.0.0.1:8000/v1`
982
+ - Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
983
+ - Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)
984
+ - LiteLLM: `http://localhost:4000/v1`
985
+ - Cloudflare AI Gateway: `https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic`
986
+ - Qwen Portal: `https://portal.qwen.ai/v1`
987
+ When set, the library automatically uses these keys:
988
+
989
+ ```typescript
990
+ // Uses OPENAI_API_KEY from environment
991
+ const model = getModel("openai", "gpt-4o-mini");
992
+ const response = await complete(model, context);
993
+
994
+ // Or override with explicit key
995
+ const response = await complete(model, context, {
996
+ apiKey: "sk-different-key",
997
+ });
998
+ ```
999
+
1000
+ ### Checking Environment Variables
1001
+
1002
+ ```typescript
1003
+ import { getEnvApiKey } from "@prometheus-ai/ai";
1004
+
1005
+ // Check if an API key is set in environment variables
1006
+ const key = getEnvApiKey("openai"); // checks OPENAI_API_KEY
1007
+ ```
1008
+
1009
+ ## OAuth Providers
1010
+
1011
+ Several providers support OAuth authentication (some also support static API keys):
1012
+
1013
+ - **Anthropic** (Claude Pro/Max subscription)
1014
+ - **OpenAI Codex** (ChatGPT Plus/Pro subscription, access to GPT-5.x Codex models)
1015
+ - **GitHub Copilot** (Copilot subscription)
1016
+ - **Google Gemini CLI** (Gemini 2.0/2.5 via Google Cloud Code Assist; free tier or paid subscription)
1017
+ - **Antigravity** (Free Gemini 3, Claude, GPT-OSS via Google Cloud)
1018
+ - **Qwen Portal** (Qwen OAuth token or API key)
1019
+
1020
+ For paid Cloud Code Assist subscriptions, set `GOOGLE_CLOUD_PROJECT` or `GOOGLE_CLOUD_PROJECT_ID` to your project ID.
1021
+
1022
+ ### Vertex AI (ADC)
1023
+
1024
+ Vertex AI models use Application Default Credentials (ADC):
1025
+
1026
+ - **Local development**: Run `gcloud auth application-default login`
1027
+ - **CI/Production**: Set `GOOGLE_APPLICATION_CREDENTIALS` to point to a service account JSON key file
1028
+
1029
+ Also set `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) and `GOOGLE_CLOUD_LOCATION`. You can also pass `project`/`location` in the call options.
1030
+
1031
+ Example:
1032
+
1033
+ ```bash
1034
+ # Local (uses your user credentials)
1035
+ gcloud auth application-default login
1036
+ export GOOGLE_CLOUD_PROJECT="my-project"
1037
+ export GOOGLE_CLOUD_LOCATION="us-central1"
1038
+
1039
+ # CI/Production (service account key file)
1040
+ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json"
1041
+ ```
1042
+
1043
+ ```typescript
1044
+ import { getModel, complete } from "@prometheus-ai/ai";
1045
+
1046
+ (async () => {
1047
+ const model = getModel("google-vertex", "gemini-2.5-flash");
1048
+ const response = await complete(model, {
1049
+ messages: [{ role: "user", content: "Hello from Vertex AI" }],
1050
+ });
1051
+
1052
+ for (const block of response.content) {
1053
+ if (block.type === "text") console.log(block.text);
1054
+ }
1055
+ })().catch(console.error);
1056
+ ```
1057
+
1058
+ Official docs: [Application Default Credentials](https://cloud.google.com/docs/authentication/application-default-credentials)
1059
+
1060
+ ### CLI Login
1061
+
1062
+ Authenticate via the [`prometheus`](https://prometheus.trivlab.com) coding-agent CLI, which drives this library's OAuth/API-key flows in-process and persists into `agent.db`:
1063
+
1064
+ ```bash
1065
+ prometheus auth-broker login # interactive provider selection
1066
+ prometheus auth-broker login anthropic # login to a specific provider
1067
+ prometheus auth-broker login vllm # store vLLM API key (or placeholder for local no-auth)
1068
+ prometheus auth-broker list # list supported providers
1069
+ prometheus auth-broker logout # interactive — pick a stored credential to remove
1070
+ ```
1071
+
1072
+ Credentials are saved to `agent.db` in the agent directory. `/login qianfan` opens the Qianfan console and stores the pasted API key.
1073
+
1074
+ `login` supports OAuth providers (Anthropic, OpenAI Codex, GitHub Copilot, Gemini CLI, Antigravity) and API-key onboarding flows.
1075
+
1076
+ For the current API-key onboarding flows, the library covers Together, Moonshot, Qianfan, NVIDIA, NanoGPT, Hugging Face, Venice, Xiaomi, vLLM, LiteLLM, Cloudflare AI Gateway, Qwen Portal, and Ollama Cloud. Ollama remains the local runtime integration; set `OLLAMA_API_KEY` only when your local or self-hosted deployment enforces bearer auth.
1077
+
1078
+ ### Programmatic OAuth
1079
+
1080
+ The library provides login and token refresh functions. Credential storage is the caller's responsibility.
1081
+
1082
+ ```typescript
1083
+ import {
1084
+ // Login functions (return credentials, do not store)
1085
+ loginAnthropic,
1086
+ loginOpenAICodex,
1087
+ loginGitHubCopilot,
1088
+ loginGeminiCli,
1089
+ loginAntigravity,
1090
+ loginCloudflareAiGateway,
1091
+ loginHuggingface,
1092
+ loginLiteLLM,
1093
+ loginMoonshot,
1094
+ loginNvidia,
1095
+ loginNanoGPT,
1096
+ loginQianfan,
1097
+ loginQwenPortal,
1098
+ loginTogether,
1099
+ loginVenice,
1100
+ loginVllm,
1101
+ loginXiaomi,
1102
+
1103
+ // Token management
1104
+ refreshOAuthToken, // (provider, credentials) => new credentials
1105
+ getOAuthApiKey, // (provider, credentialsMap) => { newCredentials, apiKey } | null
1106
+
1107
+ // Types
1108
+ type OAuthProvider, // includes 'anthropic', 'openai-codex', 'github-copilot', 'google-gemini-cli', 'google-antigravity', 'together', 'moonshot', 'qianfan', 'nvidia', 'nanogpt', 'huggingface', 'venice', 'xiaomi', 'vllm', 'litellm', 'cloudflare-ai-gateway', 'qwen-portal', ...
1109
+ type OAuthCredentials,
1110
+ } from "@prometheus-ai/ai";
1111
+ ```
1112
+
1113
+ `loginOpenAICodex` accepts an optional `originator` value used in the OAuth flow:
1114
+
1115
+ ```typescript
1116
+ await loginOpenAICodex({
1117
+ onAuth: ({ url }) => console.log(url),
1118
+ originator: "my-cli",
1119
+ });
1120
+ ```
1121
+
1122
+ ### Login Flow Example
1123
+
1124
+ ```typescript
1125
+ import { loginGitHubCopilot } from "@prometheus-ai/ai";
1126
+ import * as fs from "node:fs";
1127
+
1128
+ const credentials = await loginGitHubCopilot({
1129
+ onAuth: (url, instructions) => {
1130
+ console.log(`Open: ${url}`);
1131
+ if (instructions) console.log(instructions);
1132
+ },
1133
+ onPrompt: async (prompt) => {
1134
+ return await getUserInput(prompt.message);
1135
+ },
1136
+ onProgress: (message) => console.log(message),
1137
+ });
1138
+
1139
+ // Store credentials yourself
1140
+ const auth = { "github-copilot": { type: "oauth", ...credentials } };
1141
+ fs.writeFileSync("credentials.json", JSON.stringify(auth, null, 2));
1142
+ ```
1143
+
1144
+ ### Using OAuth Tokens
1145
+
1146
+ Use `getOAuthApiKey()` to get an API key, automatically refreshing if expired:
1147
+
1148
+ ```typescript
1149
+ import { getModel, complete, getOAuthApiKey } from "@prometheus-ai/ai";
1150
+ import * as fs from "node:fs";
1151
+
1152
+ // Load your stored credentials
1153
+ const auth = JSON.parse(fs.readFileSync("credentials.json", "utf-8"));
1154
+
1155
+ // Get API key (refreshes if expired)
1156
+ const result = await getOAuthApiKey("github-copilot", auth);
1157
+ if (!result) throw new Error("Not logged in");
1158
+
1159
+ // Save refreshed credentials
1160
+ auth["github-copilot"] = { type: "oauth", ...result.newCredentials };
1161
+ fs.writeFileSync("credentials.json", JSON.stringify(auth, null, 2));
1162
+
1163
+ // Use the API key
1164
+ const model = getModel("github-copilot", "gpt-4o");
1165
+ const response = await complete(
1166
+ model,
1167
+ {
1168
+ messages: [{ role: "user", content: "Hello!" }],
1169
+ },
1170
+ { apiKey: result.apiKey }
1171
+ );
1172
+ ```
1173
+
1174
+ ### Provider Notes
1175
+
1176
+ **OpenAI Codex**: Requires a ChatGPT Plus or Pro subscription. Provides access to GPT-5.x Codex models with extended context windows and reasoning capabilities. The library automatically handles session-based prompt caching when `sessionId` is provided in stream options.
1177
+
1178
+ **GitHub Copilot**: If you get "The requested model is not supported" error, enable the model manually in VS Code: open Copilot Chat, click the model selector, select the model (warning icon), and click "Enable".
1179
+
1180
+ **Google Gemini CLI / Antigravity**: These use Google Cloud OAuth. The `apiKey` returned by `getOAuthApiKey()` is a JSON string containing both the token and project ID, which the library handles automatically.
1181
+
1182
+ ## License
1183
+
1184
+ MIT