@aryee337/aery-ai 0.2.27 → 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (417) hide show
  1. package/CHANGELOG.md +2914 -0
  2. package/README.md +614 -813
  3. package/package.json +140 -105
  4. package/src/api-registry.ts +96 -0
  5. package/src/auth-broker/client.ts +358 -0
  6. package/src/auth-broker/index.ts +5 -0
  7. package/src/auth-broker/refresher.ts +117 -0
  8. package/src/auth-broker/remote-store.ts +623 -0
  9. package/src/auth-broker/server.ts +644 -0
  10. package/src/auth-broker/types.ts +127 -0
  11. package/src/auth-broker/wire-schemas.ts +200 -0
  12. package/src/auth-gateway/http.ts +194 -0
  13. package/src/auth-gateway/index.ts +3 -0
  14. package/src/auth-gateway/server.ts +818 -0
  15. package/src/auth-gateway/types.ts +143 -0
  16. package/src/auth-storage.ts +4422 -0
  17. package/src/index.ts +54 -0
  18. package/src/model-cache.ts +129 -0
  19. package/src/model-manager.ts +469 -0
  20. package/src/model-thinking.ts +782 -0
  21. package/src/models.json +83530 -0
  22. package/src/models.json.d.ts +9 -0
  23. package/src/models.ts +56 -0
  24. package/src/prompts/turn-aborted-guidance.md +4 -0
  25. package/src/provider-details.ts +90 -0
  26. package/src/provider-models/bundled-references.ts +38 -0
  27. package/src/provider-models/descriptors.ts +355 -0
  28. package/src/provider-models/google.ts +88 -0
  29. package/src/provider-models/index.ts +5 -0
  30. package/src/provider-models/ollama.ts +153 -0
  31. package/src/provider-models/openai-compat.ts +2817 -0
  32. package/src/provider-models/special.ts +67 -0
  33. package/src/providers/aery-native-client.ts +228 -0
  34. package/src/providers/aery-native-server.ts +212 -0
  35. package/src/providers/amazon-bedrock.ts +873 -0
  36. package/src/providers/anthropic-client.ts +318 -0
  37. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  38. package/src/providers/anthropic-messages-server.ts +683 -0
  39. package/src/providers/anthropic-wire.ts +268 -0
  40. package/src/providers/anthropic.ts +3094 -0
  41. package/src/providers/aws-credentials.ts +501 -0
  42. package/src/providers/aws-eventstream.ts +185 -0
  43. package/src/providers/aws-sigv4.ts +218 -0
  44. package/src/providers/azure-openai-responses.ts +361 -0
  45. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  46. package/src/providers/cursor/proto/agent.proto +3526 -0
  47. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  48. package/src/providers/cursor/proto/buf.yaml +17 -0
  49. package/src/providers/cursor.ts +2621 -0
  50. package/src/providers/error-message.ts +21 -0
  51. package/src/providers/github-copilot-headers.ts +140 -0
  52. package/src/providers/gitlab-duo.ts +372 -0
  53. package/src/providers/google-auth.ts +252 -0
  54. package/src/providers/google-gemini-cli.ts +809 -0
  55. package/src/providers/google-gemini-headers.ts +41 -0
  56. package/src/providers/google-shared.ts +917 -0
  57. package/src/providers/google-types.ts +167 -0
  58. package/src/providers/google-vertex.ts +91 -0
  59. package/src/providers/google.ts +41 -0
  60. package/src/providers/grammar.ts +70 -0
  61. package/src/providers/kimi.ts +52 -0
  62. package/src/providers/mock.ts +496 -0
  63. package/src/providers/ollama.ts +644 -0
  64. package/src/providers/openai-anthropic-shim.ts +138 -0
  65. package/src/providers/openai-chat-server-schema.ts +252 -0
  66. package/src/providers/openai-chat-server.ts +647 -0
  67. package/src/providers/openai-codex/constants.ts +43 -0
  68. package/src/providers/openai-codex/request-transformer.ts +161 -0
  69. package/src/providers/openai-codex/response-handler.ts +81 -0
  70. package/src/providers/openai-codex-responses.ts +3018 -0
  71. package/src/providers/openai-completions-compat.ts +300 -0
  72. package/src/providers/openai-completions.ts +1979 -0
  73. package/src/providers/openai-responses-server-schema.ts +290 -0
  74. package/src/providers/openai-responses-server.ts +1183 -0
  75. package/src/providers/openai-responses-shared.ts +873 -0
  76. package/src/providers/openai-responses.ts +679 -0
  77. package/src/providers/register-builtins.ts +436 -0
  78. package/src/providers/synthetic.ts +50 -0
  79. package/src/providers/transform-messages.ts +382 -0
  80. package/src/providers/vision-guard.ts +31 -0
  81. package/src/providers/xai-responses.ts +82 -0
  82. package/src/rate-limit-utils.ts +84 -0
  83. package/src/stream.ts +1065 -0
  84. package/src/types.ts +944 -0
  85. package/src/usage/claude.ts +482 -0
  86. package/src/usage/gemini.ts +250 -0
  87. package/src/usage/github-copilot.ts +421 -0
  88. package/src/usage/google-antigravity.ts +201 -0
  89. package/src/usage/kimi.ts +271 -0
  90. package/src/usage/minimax-code.ts +31 -0
  91. package/src/usage/openai-codex.ts +503 -0
  92. package/src/usage/shared.ts +10 -0
  93. package/src/usage/zai.ts +247 -0
  94. package/src/usage.ts +185 -0
  95. package/src/utils/abort.ts +51 -0
  96. package/src/utils/abortable-iterator.ts +69 -0
  97. package/src/utils/anthropic-auth.ts +93 -0
  98. package/src/utils/discovery/antigravity.ts +261 -0
  99. package/src/utils/discovery/codex.ts +371 -0
  100. package/src/utils/discovery/cursor.ts +306 -0
  101. package/src/utils/discovery/gemini.ts +248 -0
  102. package/src/utils/discovery/index.ts +4 -0
  103. package/src/utils/discovery/openai-compatible.ts +224 -0
  104. package/src/utils/event-stream.ts +142 -0
  105. package/src/utils/fireworks-model-id.ts +30 -0
  106. package/src/utils/foundry.ts +8 -0
  107. package/src/utils/http-inspector.ts +176 -0
  108. package/src/utils/idle-iterator.ts +267 -0
  109. package/src/utils/json-parse.ts +182 -0
  110. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  111. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  112. package/src/utils/oauth/anthropic.ts +273 -0
  113. package/src/utils/oauth/api-key-login.ts +87 -0
  114. package/src/utils/oauth/api-key-validation.ts +92 -0
  115. package/src/utils/oauth/callback-server.ts +276 -0
  116. package/src/utils/oauth/cerebras.ts +16 -0
  117. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  118. package/src/utils/oauth/cursor.ts +157 -0
  119. package/src/utils/oauth/deepseek.ts +53 -0
  120. package/src/utils/oauth/firepass.ts +24 -0
  121. package/src/utils/oauth/fireworks.ts +15 -0
  122. package/src/utils/oauth/github-copilot.ts +362 -0
  123. package/src/utils/oauth/gitlab-duo.ts +123 -0
  124. package/src/utils/oauth/google-antigravity.ts +200 -0
  125. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  126. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  127. package/src/utils/oauth/huggingface.ts +62 -0
  128. package/src/utils/oauth/index.ts +484 -0
  129. package/src/utils/oauth/kagi.ts +47 -0
  130. package/src/utils/oauth/kilo.ts +87 -0
  131. package/src/utils/oauth/kimi.ts +254 -0
  132. package/src/utils/oauth/litellm.ts +47 -0
  133. package/src/utils/oauth/lm-studio.ts +38 -0
  134. package/src/utils/oauth/minimax-code.ts +78 -0
  135. package/src/utils/oauth/moonshot.ts +23 -0
  136. package/src/utils/oauth/nanogpt.ts +15 -0
  137. package/src/utils/oauth/nvidia.ts +70 -0
  138. package/src/utils/oauth/oauth.html +203 -0
  139. package/src/utils/oauth/ollama-cloud.ts +28 -0
  140. package/src/utils/oauth/ollama.ts +47 -0
  141. package/src/utils/oauth/openai-codex.ts +299 -0
  142. package/src/utils/oauth/opencode.ts +49 -0
  143. package/src/utils/oauth/openrouter.ts +20 -0
  144. package/src/utils/oauth/parallel.ts +46 -0
  145. package/src/utils/oauth/perplexity.ts +206 -0
  146. package/src/utils/oauth/pkce.ts +18 -0
  147. package/src/utils/oauth/qianfan.ts +58 -0
  148. package/src/utils/oauth/qwen-portal.ts +60 -0
  149. package/src/utils/oauth/synthetic.ts +15 -0
  150. package/src/utils/oauth/tavily.ts +46 -0
  151. package/src/utils/oauth/together.ts +16 -0
  152. package/src/utils/oauth/types.ts +99 -0
  153. package/src/utils/oauth/venice.ts +59 -0
  154. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  155. package/src/utils/oauth/vllm.ts +40 -0
  156. package/src/utils/oauth/wafer.ts +50 -0
  157. package/src/utils/oauth/xai-oauth.ts +342 -0
  158. package/src/utils/oauth/xiaomi.ts +139 -0
  159. package/src/utils/oauth/zai.ts +60 -0
  160. package/src/utils/oauth/zenmux.ts +15 -0
  161. package/src/utils/oauth/zhipu.ts +60 -0
  162. package/src/utils/overflow.ts +137 -0
  163. package/src/utils/parse-bind.ts +54 -0
  164. package/src/utils/provider-response.ts +30 -0
  165. package/src/utils/request-debug.ts +336 -0
  166. package/src/utils/retry-after.ts +110 -0
  167. package/src/utils/retry.ts +54 -0
  168. package/src/utils/schema/CONSTRAINTS.md +164 -0
  169. package/src/utils/schema/adapt.ts +36 -0
  170. package/src/utils/schema/compatibility.ts +435 -0
  171. package/src/utils/schema/dereference.ts +98 -0
  172. package/src/utils/schema/draft.ts +341 -0
  173. package/src/utils/schema/equality.ts +97 -0
  174. package/src/utils/schema/fields.ts +191 -0
  175. package/src/utils/schema/index.ts +13 -0
  176. package/src/utils/schema/json-schema-validator.ts +577 -0
  177. package/src/utils/schema/meta-validator.ts +167 -0
  178. package/src/utils/schema/normalize.ts +1588 -0
  179. package/src/utils/schema/spill.ts +43 -0
  180. package/src/utils/schema/stamps.ts +97 -0
  181. package/src/utils/schema/types.ts +10 -0
  182. package/src/utils/schema/wire.ts +293 -0
  183. package/src/utils/schema/zod-decontaminate.ts +331 -0
  184. package/src/utils/sdk-stream-timeout.ts +43 -0
  185. package/src/utils/sse-debug.ts +289 -0
  186. package/src/utils/stream-markup-healing.ts +612 -0
  187. package/src/utils/tool-choice.ts +99 -0
  188. package/src/utils/validation.ts +1024 -0
  189. package/src/utils.ts +166 -0
  190. package/dist/api-registry.d.ts +0 -20
  191. package/dist/api-registry.d.ts.map +0 -1
  192. package/dist/api-registry.js +0 -44
  193. package/dist/api-registry.js.map +0 -1
  194. package/dist/bedrock-provider.d.ts +0 -5
  195. package/dist/bedrock-provider.d.ts.map +0 -1
  196. package/dist/bedrock-provider.js +0 -6
  197. package/dist/bedrock-provider.js.map +0 -1
  198. package/dist/cli.d.ts +0 -3
  199. package/dist/cli.d.ts.map +0 -1
  200. package/dist/cli.js +0 -130
  201. package/dist/cli.js.map +0 -1
  202. package/dist/env-api-keys.d.ts +0 -18
  203. package/dist/env-api-keys.d.ts.map +0 -1
  204. package/dist/env-api-keys.js +0 -178
  205. package/dist/env-api-keys.js.map +0 -1
  206. package/dist/image-models.d.ts +0 -10
  207. package/dist/image-models.d.ts.map +0 -1
  208. package/dist/image-models.generated.d.ts +0 -440
  209. package/dist/image-models.generated.d.ts.map +0 -1
  210. package/dist/image-models.generated.js +0 -442
  211. package/dist/image-models.generated.js.map +0 -1
  212. package/dist/image-models.js +0 -23
  213. package/dist/image-models.js.map +0 -1
  214. package/dist/images-api-registry.d.ts +0 -14
  215. package/dist/images-api-registry.d.ts.map +0 -1
  216. package/dist/images-api-registry.js +0 -22
  217. package/dist/images-api-registry.js.map +0 -1
  218. package/dist/images.d.ts +0 -4
  219. package/dist/images.d.ts.map +0 -1
  220. package/dist/images.js +0 -14
  221. package/dist/images.js.map +0 -1
  222. package/dist/index.d.ts +0 -32
  223. package/dist/index.d.ts.map +0 -1
  224. package/dist/index.js +0 -20
  225. package/dist/index.js.map +0 -1
  226. package/dist/models.d.ts +0 -18
  227. package/dist/models.d.ts.map +0 -1
  228. package/dist/models.generated.d.ts +0 -17707
  229. package/dist/models.generated.d.ts.map +0 -1
  230. package/dist/models.generated.js +0 -16561
  231. package/dist/models.generated.js.map +0 -1
  232. package/dist/models.js +0 -71
  233. package/dist/models.js.map +0 -1
  234. package/dist/oauth.d.ts +0 -2
  235. package/dist/oauth.d.ts.map +0 -1
  236. package/dist/oauth.js +0 -2
  237. package/dist/oauth.js.map +0 -1
  238. package/dist/providers/aery-error-formatting.d.ts +0 -13
  239. package/dist/providers/aery-error-formatting.d.ts.map +0 -1
  240. package/dist/providers/aery-error-formatting.js +0 -112
  241. package/dist/providers/aery-error-formatting.js.map +0 -1
  242. package/dist/providers/amazon-bedrock.d.ts +0 -38
  243. package/dist/providers/amazon-bedrock.d.ts.map +0 -1
  244. package/dist/providers/amazon-bedrock.js +0 -763
  245. package/dist/providers/amazon-bedrock.js.map +0 -1
  246. package/dist/providers/anthropic.d.ts +0 -71
  247. package/dist/providers/anthropic.d.ts.map +0 -1
  248. package/dist/providers/anthropic.js +0 -949
  249. package/dist/providers/anthropic.js.map +0 -1
  250. package/dist/providers/azure-openai-responses.d.ts +0 -15
  251. package/dist/providers/azure-openai-responses.d.ts.map +0 -1
  252. package/dist/providers/azure-openai-responses.js +0 -225
  253. package/dist/providers/azure-openai-responses.js.map +0 -1
  254. package/dist/providers/cloudflare.d.ts +0 -13
  255. package/dist/providers/cloudflare.d.ts.map +0 -1
  256. package/dist/providers/cloudflare.js +0 -26
  257. package/dist/providers/cloudflare.js.map +0 -1
  258. package/dist/providers/faux.d.ts +0 -56
  259. package/dist/providers/faux.d.ts.map +0 -1
  260. package/dist/providers/faux.js +0 -368
  261. package/dist/providers/faux.js.map +0 -1
  262. package/dist/providers/github-copilot-headers.d.ts +0 -8
  263. package/dist/providers/github-copilot-headers.d.ts.map +0 -1
  264. package/dist/providers/github-copilot-headers.js +0 -29
  265. package/dist/providers/github-copilot-headers.js.map +0 -1
  266. package/dist/providers/google-gemini-cli.d.ts +0 -74
  267. package/dist/providers/google-gemini-cli.d.ts.map +0 -1
  268. package/dist/providers/google-gemini-cli.js +0 -779
  269. package/dist/providers/google-gemini-cli.js.map +0 -1
  270. package/dist/providers/google-shared.d.ts +0 -70
  271. package/dist/providers/google-shared.d.ts.map +0 -1
  272. package/dist/providers/google-shared.js +0 -329
  273. package/dist/providers/google-shared.js.map +0 -1
  274. package/dist/providers/google-vertex.d.ts +0 -15
  275. package/dist/providers/google-vertex.d.ts.map +0 -1
  276. package/dist/providers/google-vertex.js +0 -442
  277. package/dist/providers/google-vertex.js.map +0 -1
  278. package/dist/providers/google.d.ts +0 -13
  279. package/dist/providers/google.d.ts.map +0 -1
  280. package/dist/providers/google.js +0 -400
  281. package/dist/providers/google.js.map +0 -1
  282. package/dist/providers/images/openrouter.d.ts +0 -3
  283. package/dist/providers/images/openrouter.d.ts.map +0 -1
  284. package/dist/providers/images/openrouter.js +0 -129
  285. package/dist/providers/images/openrouter.js.map +0 -1
  286. package/dist/providers/images/register-builtins.d.ts +0 -4
  287. package/dist/providers/images/register-builtins.d.ts.map +0 -1
  288. package/dist/providers/images/register-builtins.js +0 -34
  289. package/dist/providers/images/register-builtins.js.map +0 -1
  290. package/dist/providers/mistral.d.ts +0 -25
  291. package/dist/providers/mistral.d.ts.map +0 -1
  292. package/dist/providers/mistral.js +0 -535
  293. package/dist/providers/mistral.js.map +0 -1
  294. package/dist/providers/openai-codex-responses.d.ts +0 -30
  295. package/dist/providers/openai-codex-responses.d.ts.map +0 -1
  296. package/dist/providers/openai-codex-responses.js +0 -1090
  297. package/dist/providers/openai-codex-responses.js.map +0 -1
  298. package/dist/providers/openai-completions.d.ts +0 -19
  299. package/dist/providers/openai-completions.d.ts.map +0 -1
  300. package/dist/providers/openai-completions.js +0 -950
  301. package/dist/providers/openai-completions.js.map +0 -1
  302. package/dist/providers/openai-prompt-cache.d.ts +0 -3
  303. package/dist/providers/openai-prompt-cache.d.ts.map +0 -1
  304. package/dist/providers/openai-prompt-cache.js +0 -10
  305. package/dist/providers/openai-prompt-cache.js.map +0 -1
  306. package/dist/providers/openai-responses-shared.d.ts +0 -18
  307. package/dist/providers/openai-responses-shared.d.ts.map +0 -1
  308. package/dist/providers/openai-responses-shared.js +0 -492
  309. package/dist/providers/openai-responses-shared.js.map +0 -1
  310. package/dist/providers/openai-responses.d.ts +0 -13
  311. package/dist/providers/openai-responses.d.ts.map +0 -1
  312. package/dist/providers/openai-responses.js +0 -237
  313. package/dist/providers/openai-responses.js.map +0 -1
  314. package/dist/providers/register-builtins.d.ts +0 -38
  315. package/dist/providers/register-builtins.d.ts.map +0 -1
  316. package/dist/providers/register-builtins.js +0 -278
  317. package/dist/providers/register-builtins.js.map +0 -1
  318. package/dist/providers/simple-options.d.ts +0 -8
  319. package/dist/providers/simple-options.d.ts.map +0 -1
  320. package/dist/providers/simple-options.js +0 -41
  321. package/dist/providers/simple-options.js.map +0 -1
  322. package/dist/providers/transform-messages.d.ts +0 -8
  323. package/dist/providers/transform-messages.d.ts.map +0 -1
  324. package/dist/providers/transform-messages.js +0 -184
  325. package/dist/providers/transform-messages.js.map +0 -1
  326. package/dist/session-resources.d.ts +0 -4
  327. package/dist/session-resources.d.ts.map +0 -1
  328. package/dist/session-resources.js +0 -22
  329. package/dist/session-resources.js.map +0 -1
  330. package/dist/stream.d.ts +0 -8
  331. package/dist/stream.d.ts.map +0 -1
  332. package/dist/stream.js +0 -27
  333. package/dist/stream.js.map +0 -1
  334. package/dist/types.d.ts +0 -498
  335. package/dist/types.d.ts.map +0 -1
  336. package/dist/types.js +0 -2
  337. package/dist/types.js.map +0 -1
  338. package/dist/utils/diagnostics.d.ts +0 -19
  339. package/dist/utils/diagnostics.d.ts.map +0 -1
  340. package/dist/utils/diagnostics.js +0 -25
  341. package/dist/utils/diagnostics.js.map +0 -1
  342. package/dist/utils/event-stream.d.ts +0 -21
  343. package/dist/utils/event-stream.d.ts.map +0 -1
  344. package/dist/utils/event-stream.js +0 -81
  345. package/dist/utils/event-stream.js.map +0 -1
  346. package/dist/utils/hash.d.ts +0 -3
  347. package/dist/utils/hash.d.ts.map +0 -1
  348. package/dist/utils/hash.js +0 -14
  349. package/dist/utils/hash.js.map +0 -1
  350. package/dist/utils/headers.d.ts +0 -2
  351. package/dist/utils/headers.d.ts.map +0 -1
  352. package/dist/utils/headers.js +0 -8
  353. package/dist/utils/headers.js.map +0 -1
  354. package/dist/utils/json-parse.d.ts +0 -16
  355. package/dist/utils/json-parse.d.ts.map +0 -1
  356. package/dist/utils/json-parse.js +0 -113
  357. package/dist/utils/json-parse.js.map +0 -1
  358. package/dist/utils/node-http-proxy.d.ts +0 -10
  359. package/dist/utils/node-http-proxy.d.ts.map +0 -1
  360. package/dist/utils/node-http-proxy.js +0 -97
  361. package/dist/utils/node-http-proxy.js.map +0 -1
  362. package/dist/utils/oauth/anthropic.d.ts +0 -25
  363. package/dist/utils/oauth/anthropic.d.ts.map +0 -1
  364. package/dist/utils/oauth/anthropic.js +0 -335
  365. package/dist/utils/oauth/anthropic.js.map +0 -1
  366. package/dist/utils/oauth/device-code.d.ts +0 -19
  367. package/dist/utils/oauth/device-code.d.ts.map +0 -1
  368. package/dist/utils/oauth/device-code.js +0 -55
  369. package/dist/utils/oauth/device-code.js.map +0 -1
  370. package/dist/utils/oauth/github-copilot.d.ts +0 -30
  371. package/dist/utils/oauth/github-copilot.d.ts.map +0 -1
  372. package/dist/utils/oauth/github-copilot.js +0 -268
  373. package/dist/utils/oauth/github-copilot.js.map +0 -1
  374. package/dist/utils/oauth/google-antigravity.d.ts +0 -26
  375. package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
  376. package/dist/utils/oauth/google-antigravity.js +0 -377
  377. package/dist/utils/oauth/google-antigravity.js.map +0 -1
  378. package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
  379. package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
  380. package/dist/utils/oauth/google-gemini-cli.js +0 -482
  381. package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
  382. package/dist/utils/oauth/index.d.ts +0 -63
  383. package/dist/utils/oauth/index.d.ts.map +0 -1
  384. package/dist/utils/oauth/index.js +0 -131
  385. package/dist/utils/oauth/index.js.map +0 -1
  386. package/dist/utils/oauth/oauth-page.d.ts +0 -3
  387. package/dist/utils/oauth/oauth-page.d.ts.map +0 -1
  388. package/dist/utils/oauth/oauth-page.js +0 -105
  389. package/dist/utils/oauth/oauth-page.js.map +0 -1
  390. package/dist/utils/oauth/openai-codex.d.ts +0 -34
  391. package/dist/utils/oauth/openai-codex.d.ts.map +0 -1
  392. package/dist/utils/oauth/openai-codex.js +0 -385
  393. package/dist/utils/oauth/openai-codex.js.map +0 -1
  394. package/dist/utils/oauth/pkce.d.ts +0 -13
  395. package/dist/utils/oauth/pkce.d.ts.map +0 -1
  396. package/dist/utils/oauth/pkce.js +0 -31
  397. package/dist/utils/oauth/pkce.js.map +0 -1
  398. package/dist/utils/oauth/types.d.ts +0 -64
  399. package/dist/utils/oauth/types.d.ts.map +0 -1
  400. package/dist/utils/oauth/types.js +0 -2
  401. package/dist/utils/oauth/types.js.map +0 -1
  402. package/dist/utils/overflow.d.ts +0 -56
  403. package/dist/utils/overflow.d.ts.map +0 -1
  404. package/dist/utils/overflow.js +0 -151
  405. package/dist/utils/overflow.js.map +0 -1
  406. package/dist/utils/sanitize-unicode.d.ts +0 -22
  407. package/dist/utils/sanitize-unicode.d.ts.map +0 -1
  408. package/dist/utils/sanitize-unicode.js +0 -26
  409. package/dist/utils/sanitize-unicode.js.map +0 -1
  410. package/dist/utils/typebox-helpers.d.ts +0 -17
  411. package/dist/utils/typebox-helpers.d.ts.map +0 -1
  412. package/dist/utils/typebox-helpers.js +0 -21
  413. package/dist/utils/typebox-helpers.js.map +0 -1
  414. package/dist/utils/validation.d.ts +0 -18
  415. package/dist/utils/validation.d.ts.map +0 -1
  416. package/dist/utils/validation.js +0 -281
  417. package/dist/utils/validation.js.map +0 -1
package/src/stream.ts ADDED
@@ -0,0 +1,1065 @@
1
+ import * as fs from "node:fs";
2
+ import * as os from "node:os";
3
+ import * as path from "node:path";
4
+ import { $env, $pickenv, extractHttpStatusFromError } from "@aryee337/aery-utils";
5
+ import { getCustomApi } from "./api-registry";
6
+ import type { Effort } from "./model-thinking";
7
+ import {
8
+ mapEffortToAnthropicAdaptiveEffort,
9
+ mapEffortToGoogleThinkingLevel,
10
+ modelOmitsReasoningEffort,
11
+ requireSupportedEffort,
12
+ } from "./model-thinking";
13
+ import { streamAeryNative } from "./providers/aery-native-client";
14
+ import type { BedrockOptions } from "./providers/amazon-bedrock";
15
+ import type { AnthropicOptions } from "./providers/anthropic";
16
+ import type { CursorOptions } from "./providers/cursor";
17
+ import { isGitLabDuoModel, streamGitLabDuo } from "./providers/gitlab-duo";
18
+ import type { GoogleOptions } from "./providers/google";
19
+ import { getVertexAccessToken } from "./providers/google-auth";
20
+ import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli";
21
+ import type { GoogleVertexOptions } from "./providers/google-vertex";
22
+ import { isKimiModel, streamKimi } from "./providers/kimi";
23
+ import type { OllamaChatOptions } from "./providers/ollama";
24
+ import type { OpenAICompletionsOptions } from "./providers/openai-completions";
25
+ // Heavy provider stream functions are imported lazily via register-builtins,
26
+ // which wraps each provider module in a dynamic import. This keeps the
27
+ // AWS SDK, google-auth-library, @google/genai, @bufbuild/protobuf, and
28
+ // other provider SDKs out of the CLI startup parse graph. The
29
+ // gitlab-duo / kimi / synthetic providers stay eager because their modules
30
+ // export routing predicates (isGitLabDuoModel, isKimiModel, isSyntheticModel)
31
+ // that must be callable synchronously before streaming begins, and their
32
+ // modules are thin wrappers with no heavy SDK dependencies.
33
+ import {
34
+ streamAnthropic,
35
+ streamAzureOpenAIResponses,
36
+ streamBedrock,
37
+ streamCursor,
38
+ streamGoogle,
39
+ streamGoogleGeminiCli,
40
+ streamGoogleVertex,
41
+ streamOllama,
42
+ streamOpenAICodexResponses,
43
+ streamOpenAICompletions,
44
+ streamOpenAIResponses,
45
+ } from "./providers/register-builtins";
46
+ import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
47
+ import { streamXAIResponses } from "./providers/xai-responses";
48
+ import { isUsageLimitError } from "./rate-limit-utils";
49
+ import type {
50
+ Api,
51
+ AssistantMessage,
52
+ AssistantMessageEvent,
53
+ Context,
54
+ FetchImpl,
55
+ Model,
56
+ OptionsForApi,
57
+ SimpleStreamOptions,
58
+ StreamOptions,
59
+ ThinkingBudgets,
60
+ ToolChoice,
61
+ } from "./types";
62
+ import { AssistantMessageEventStream } from "./utils/event-stream";
63
+ import { isFoundryEnabled } from "./utils/foundry";
64
+ import { withRequestDebugFetch } from "./utils/request-debug";
65
+
66
+ let cachedVertexAdcCredentialsExists: boolean | null = null;
67
+
68
+ function hasVertexAdcCredentials(): boolean {
69
+ if (cachedVertexAdcCredentialsExists === null) {
70
+ const gacPath = $env.GOOGLE_APPLICATION_CREDENTIALS;
71
+ if (gacPath) {
72
+ cachedVertexAdcCredentialsExists = fs.existsSync(gacPath);
73
+ } else {
74
+ cachedVertexAdcCredentialsExists = fs.existsSync(
75
+ path.join(os.homedir(), ".config", "gcloud", "application_default_credentials.json"),
76
+ );
77
+ }
78
+ }
79
+ return cachedVertexAdcCredentialsExists;
80
+ }
81
+ function isGoogleVertexAuthenticatedModel(model: Model<Api>): boolean {
82
+ return (
83
+ model.provider === "google-vertex" &&
84
+ ((model.api === "openai-completions" && model.baseUrl.includes("/endpoints/openapi")) ||
85
+ (model.api === "anthropic-messages" && model.baseUrl.includes(":streamRawPredict")))
86
+ );
87
+ }
88
+
89
+ function createVertexAuthenticatedFetch(options: StreamOptions | undefined): FetchImpl {
90
+ const baseFetch = options?.fetch ?? fetch;
91
+ const vertexFetch = async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
92
+ const token = await getVertexAccessToken({ signal: options?.signal, fetch: baseFetch });
93
+ const headers = new Headers(init?.headers);
94
+ headers.set("Authorization", `Bearer ${token}`);
95
+ const rewritten = resolveVertexRequest(input);
96
+ const url = rewritten instanceof Request ? rewritten.url : rewritten.toString();
97
+ if (isVertexAnthropicRawPredict(url)) {
98
+ const bodyText = await readVertexRequestBody(rewritten, init);
99
+ const transformed = transformVertexAnthropicBody(bodyText);
100
+ return baseFetch(url, {
101
+ ...init,
102
+ method: init?.method ?? (rewritten instanceof Request ? rewritten.method : "POST"),
103
+ headers,
104
+ body: transformed,
105
+ });
106
+ }
107
+ return baseFetch(rewritten, { ...init, headers });
108
+ };
109
+ return Object.assign(vertexFetch, baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {});
110
+ }
111
+
112
+ function isVertexAnthropicRawPredict(url: string): boolean {
113
+ return url.includes(":streamRawPredict") || url.includes(":rawPredict");
114
+ }
115
+
116
+ async function readVertexRequestBody(input: string | URL | Request, init: RequestInit | undefined): Promise<string> {
117
+ if (input instanceof Request) return input.clone().text();
118
+ const body = init?.body;
119
+ if (typeof body === "string") return body;
120
+ if (body instanceof Uint8Array) return new TextDecoder().decode(body);
121
+ if (body instanceof ArrayBuffer) return new TextDecoder().decode(body);
122
+ return "";
123
+ }
124
+
125
+ // Vertex Claude rejects the standard Anthropic body shape: the `model` field
126
+ // is encoded in the URL path and `anthropic_version: "vertex-2023-10-16"` is
127
+ // required in the JSON body instead of the `anthropic-version` HTTP header.
128
+ function transformVertexAnthropicBody(bodyText: string): string {
129
+ if (!bodyText) return bodyText;
130
+ try {
131
+ const payload = JSON.parse(bodyText) as Record<string, unknown>;
132
+ delete payload.model;
133
+ payload.anthropic_version = "vertex-2023-10-16";
134
+ return JSON.stringify(payload);
135
+ } catch {
136
+ return bodyText;
137
+ }
138
+ }
139
+
140
+ function resolveVertexRequest(input: string | URL | Request): string | URL | Request {
141
+ const project = $env.GOOGLE_CLOUD_PROJECT || $env.GCP_PROJECT || $env.GCLOUD_PROJECT;
142
+ const location = $env.GOOGLE_VERTEX_LOCATION || $env.GOOGLE_CLOUD_LOCATION || $env.VERTEX_LOCATION;
143
+ if (!project || !location) return input;
144
+
145
+ const rewriteUrl = (url: string): string => {
146
+ const hasPlaceholder =
147
+ url.includes("{project}") ||
148
+ url.includes("{location}") ||
149
+ url.includes("%7Bproject%7D") ||
150
+ url.includes("%7Blocation%7D");
151
+ const host = location === "global" ? "aiplatform.googleapis.com" : `${location}-aiplatform.googleapis.com`;
152
+ const rewritten = hasPlaceholder
153
+ ? url
154
+ .replace("https://{location}-aiplatform.googleapis.com", `https://${host}`)
155
+ .replace("https://%7Blocation%7D-aiplatform.googleapis.com", `https://${host}`)
156
+ .replaceAll("{project}", encodeURIComponent(project))
157
+ .replaceAll("%7Bproject%7D", encodeURIComponent(project))
158
+ .replaceAll("{location}", encodeURIComponent(location))
159
+ .replaceAll("%7Blocation%7D", encodeURIComponent(location))
160
+ : url;
161
+ return rewritten.replace(":streamRawPredict/v1/messages", ":streamRawPredict");
162
+ };
163
+
164
+ if (input instanceof Request) {
165
+ const rewrittenUrl = rewriteUrl(input.url);
166
+ return rewrittenUrl === input.url ? input : new Request(rewrittenUrl, input);
167
+ }
168
+ if (input instanceof URL) {
169
+ const rewrittenUrl = rewriteUrl(input.toString());
170
+ return rewrittenUrl === input.toString() ? input : new URL(rewrittenUrl);
171
+ }
172
+ return rewriteUrl(input);
173
+ }
174
+
175
+ type KeyResolver = string | (() => string | undefined);
176
+
177
+ const serviceProviderMap: Record<string, KeyResolver> = {
178
+ "alibaba-coding-plan": "ALIBABA_CODING_PLAN_API_KEY",
179
+ openai: "OPENAI_API_KEY",
180
+ google: "GEMINI_API_KEY",
181
+ groq: "GROQ_API_KEY",
182
+ cerebras: "CEREBRAS_API_KEY",
183
+ xai: "XAI_API_KEY",
184
+ "xai-oauth": () => $pickenv("XAI_OAUTH_TOKEN", "XAI_API_KEY"),
185
+ fireworks: "FIREWORKS_API_KEY",
186
+ firepass: "FIREPASS_API_KEY",
187
+ "wafer-pass": "WAFER_PASS_API_KEY",
188
+ "wafer-serverless": "WAFER_SERVERLESS_API_KEY",
189
+ openrouter: "OPENROUTER_API_KEY",
190
+ kilo: "KILO_API_KEY",
191
+ "vercel-ai-gateway": "AI_GATEWAY_API_KEY",
192
+ zai: "ZAI_API_KEY",
193
+ "zhipu-coding-plan": "ZHIPU_API_KEY",
194
+ mistral: "MISTRAL_API_KEY",
195
+ minimax: "MINIMAX_API_KEY",
196
+ "minimax-code": "MINIMAX_CODE_API_KEY",
197
+ "minimax-code-cn": "MINIMAX_CODE_CN_API_KEY",
198
+ "opencode-go": "OPENCODE_API_KEY",
199
+ "opencode-zen": "OPENCODE_API_KEY",
200
+ cursor: "CURSOR_ACCESS_TOKEN",
201
+ deepseek: "DEEPSEEK_API_KEY",
202
+ "openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
203
+ "azure-openai-responses": "AZURE_OPENAI_API_KEY",
204
+ exa: "EXA_API_KEY",
205
+ jina: "JINA_API_KEY",
206
+ brave: "BRAVE_API_KEY",
207
+ perplexity: "PERPLEXITY_API_KEY",
208
+ tavily: "TAVILY_API_KEY",
209
+ parallel: "PARALLEL_API_KEY",
210
+ kagi: "KAGI_API_KEY",
211
+ // GitHub Copilot uses GitHub personal access token
212
+ "github-copilot": () => $pickenv("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
213
+ // Foundry mode optionally switches Anthropic auth to enterprise gateway credentials.
214
+ anthropic: () =>
215
+ isFoundryEnabled()
216
+ ? $pickenv("ANTHROPIC_FOUNDRY_API_KEY", "ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY")
217
+ : $pickenv("ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"),
218
+ "gitlab-duo": "GITLAB_TOKEN",
219
+ // Vertex AI supports either GOOGLE_CLOUD_API_KEY or Application Default Credentials.
220
+ "google-vertex": () => {
221
+ if ($env.GOOGLE_CLOUD_API_KEY) {
222
+ return $env.GOOGLE_CLOUD_API_KEY;
223
+ }
224
+ const hasCredentials = hasVertexAdcCredentials();
225
+ const hasProject = !!($env.GOOGLE_CLOUD_PROJECT || $env.GCP_PROJECT || $env.GCLOUD_PROJECT);
226
+ const hasLocation = !!($env.GOOGLE_VERTEX_LOCATION || $env.GOOGLE_CLOUD_LOCATION || $env.VERTEX_LOCATION);
227
+ if (hasCredentials && hasProject && hasLocation) {
228
+ return "<authenticated>";
229
+ }
230
+ },
231
+ // Amazon Bedrock supports multiple credential sources:
232
+ // 1. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
233
+ // 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
234
+ // 3. AWS_PROFILE - named profile from ~/.aws/credentials
235
+ // 4. AWS_CONTAINER_CREDENTIALS_* - ECS/Task IAM role credentials
236
+ // 5. AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN - IRSA (EKS) web identity
237
+ "amazon-bedrock": () => {
238
+ const hasEcsCredentials =
239
+ !!$env.AWS_CONTAINER_CREDENTIALS_RELATIVE_URI || !!$env.AWS_CONTAINER_CREDENTIALS_FULL_URI;
240
+ const hasWebIdentity = !!$env.AWS_WEB_IDENTITY_TOKEN_FILE && !!$env.AWS_ROLE_ARN;
241
+ if (
242
+ $env.AWS_PROFILE ||
243
+ ($env.AWS_ACCESS_KEY_ID && $env.AWS_SECRET_ACCESS_KEY) ||
244
+ $env.AWS_BEARER_TOKEN_BEDROCK ||
245
+ hasEcsCredentials ||
246
+ hasWebIdentity
247
+ ) {
248
+ return "<authenticated>";
249
+ }
250
+ },
251
+ synthetic: "SYNTHETIC_API_KEY",
252
+ "cloudflare-ai-gateway": "CLOUDFLARE_AI_GATEWAY_API_KEY",
253
+ huggingface: () => $pickenv("HUGGINGFACE_HUB_TOKEN", "HF_TOKEN"),
254
+ litellm: "LITELLM_API_KEY",
255
+ moonshot: "MOONSHOT_API_KEY",
256
+ nvidia: "NVIDIA_API_KEY",
257
+ nanogpt: "NANO_GPT_API_KEY",
258
+ "lm-studio": "LM_STUDIO_API_KEY",
259
+ ollama: "OLLAMA_API_KEY",
260
+ "ollama-cloud": "OLLAMA_CLOUD_API_KEY",
261
+ "llama.cpp": "LLAMA_CPP_API_KEY",
262
+ qianfan: "QIANFAN_API_KEY",
263
+ "qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
264
+ together: "TOGETHER_API_KEY",
265
+ zenmux: "ZENMUX_API_KEY",
266
+ venice: "VENICE_API_KEY",
267
+ vllm: "VLLM_API_KEY",
268
+ xiaomi: "XIAOMI_API_KEY",
269
+ };
270
+
271
+ /**
272
+ * Get API key for provider from known environment variables, e.g. OPENAI_API_KEY.
273
+ *
274
+ * Will not return API keys for providers that require OAuth tokens.
275
+ * Checks Bun.env, then cwd/.env, then ~/.env.
276
+ */
277
+ export function getEnvApiKey(provider: string): string | undefined {
278
+ const resolver = serviceProviderMap[provider];
279
+ if (typeof resolver === "string") {
280
+ return $env[resolver];
281
+ }
282
+ return resolver?.();
283
+ }
284
+
285
+ /**
286
+ * Enumerate every provider that has an env-var fallback for `getEnvApiKey`.
287
+ * Used by `aery auth-broker migrate --include-env` to discover env-sourced keys
288
+ * that should be uploaded to the broker.
289
+ */
290
+ export function listProvidersWithEnvKey(): string[] {
291
+ return Object.keys(serviceProviderMap);
292
+ }
293
+
294
+ export function stream<TApi extends Api>(
295
+ model: Model<TApi>,
296
+ context: Context,
297
+ options?: OptionsForApi<TApi>,
298
+ ): AssistantMessageEventStream {
299
+ const requestOptions = withRequestDebugFetch(options as StreamOptions | undefined) as
300
+ | OptionsForApi<TApi>
301
+ | undefined;
302
+
303
+ // Check custom API registry first (extension-provided APIs like "vertex-claude-api")
304
+ const customApiProvider = getCustomApi(model.api);
305
+ if (customApiProvider) {
306
+ return customApiProvider.stream(model, context, requestOptions as StreamOptions);
307
+ }
308
+
309
+ if (isGitLabDuoModel(model)) {
310
+ const apiKey = (requestOptions as StreamOptions | undefined)?.apiKey || getEnvApiKey(model.provider);
311
+ if (!apiKey) {
312
+ throw new Error(`No API key for provider: ${model.provider}`);
313
+ }
314
+ return streamGitLabDuo(model, context, {
315
+ ...(requestOptions as SimpleStreamOptions | undefined),
316
+ apiKey,
317
+ });
318
+ }
319
+
320
+ // Vertex AI uses Application Default Credentials, not API keys
321
+ if (model.api === "google-vertex") {
322
+ return streamGoogleVertex(model as Model<"google-vertex">, context, requestOptions as GoogleVertexOptions);
323
+ } else if (model.api === "bedrock-converse-stream") {
324
+ // Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
325
+ return streamBedrock(
326
+ model as Model<"bedrock-converse-stream">,
327
+ context,
328
+ (requestOptions || {}) as BedrockOptions,
329
+ );
330
+ }
331
+
332
+ const apiKey = requestOptions?.apiKey || getEnvApiKey(model.provider);
333
+ if (!apiKey) {
334
+ throw new Error(`No API key for provider: ${model.provider}`);
335
+ }
336
+ const providerOptions = isGoogleVertexAuthenticatedModel(model)
337
+ ? {
338
+ ...requestOptions,
339
+ apiKey: "vertex-adc",
340
+ fetch: createVertexAuthenticatedFetch(requestOptions as StreamOptions | undefined),
341
+ }
342
+ : { ...requestOptions, apiKey };
343
+
344
+ const api: Api = model.api;
345
+ switch (api) {
346
+ case "anthropic-messages": {
347
+ const anthropicOptions = providerOptions as AnthropicOptions;
348
+ return streamAnthropic(model as Model<"anthropic-messages">, context, {
349
+ ...anthropicOptions,
350
+ isOAuth: anthropicOptions.isOAuth ?? model.isOAuth,
351
+ });
352
+ }
353
+
354
+ case "openai-completions":
355
+ return streamOpenAICompletions(model as Model<"openai-completions">, context, providerOptions as any);
356
+
357
+ case "openai-responses": {
358
+ if (model.provider === "xai-oauth") {
359
+ return streamXAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
360
+ }
361
+ return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
362
+ }
363
+
364
+ case "azure-openai-responses":
365
+ return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
366
+
367
+ case "openai-codex-responses":
368
+ return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);
369
+
370
+ case "google-generative-ai":
371
+ return streamGoogle(model as Model<"google-generative-ai">, context, providerOptions);
372
+
373
+ case "google-gemini-cli":
374
+ return streamGoogleGeminiCli(
375
+ model as Model<"google-gemini-cli">,
376
+ context,
377
+ providerOptions as GoogleGeminiCliOptions,
378
+ );
379
+
380
+ case "ollama-chat":
381
+ return streamOllama(model as Model<"ollama-chat">, context, providerOptions as OllamaChatOptions);
382
+
383
+ case "cursor-agent":
384
+ return streamCursor(model as Model<"cursor-agent">, context, providerOptions as CursorOptions);
385
+
386
+ default:
387
+ throw new Error(`Unhandled API: ${api}`);
388
+ }
389
+ }
390
+
391
+ export async function complete<TApi extends Api>(
392
+ model: Model<TApi>,
393
+ context: Context,
394
+ options?: OptionsForApi<TApi>,
395
+ ): Promise<AssistantMessage> {
396
+ const s = stream(model, context, options);
397
+ return s.result();
398
+ }
399
+
400
+ type AuthRetryFailure = {
401
+ error: unknown;
402
+ bufferedEvents: AssistantMessageEvent[];
403
+ terminalEvent?: Extract<AssistantMessageEvent, { type: "error" }>;
404
+ };
405
+
406
+ function extractStatusFromAssistantError(message: AssistantMessage): number | undefined {
407
+ if (message.errorStatus !== undefined) return message.errorStatus;
408
+ if (!message.errorMessage) return undefined;
409
+ return extractHttpStatusFromError({ message: message.errorMessage });
410
+ }
411
+
412
+ function isRetryableUpstreamError(error: unknown, status: number | undefined, message: string | undefined): boolean {
413
+ // 401 means the credential is bad. Usage-limit phrasing (Codex's
414
+ // "You have hit your ChatGPT usage limit", Anthropic's "usage_limit_reached",
415
+ // Google's "resource_exhausted") means this account is parked but a
416
+ // sibling credential can usually pick the request up. Both are
417
+ // rotatable via `onAuthError` — the auth-gateway maps the former to
418
+ // `invalidateCredentialMatching` and the latter to `markUsageLimitReached`.
419
+ if (status === 401) return true;
420
+ void error;
421
+ return !!message && isUsageLimitError(message);
422
+ }
423
+
424
+ function createAssistantAuthError(message: AssistantMessage): Error & { status?: number } {
425
+ const error: Error & { status?: number } = new Error(message.errorMessage ?? "Provider authentication failed");
426
+ const status = extractStatusFromAssistantError(message);
427
+ if (status !== undefined) error.status = status;
428
+ return error;
429
+ }
430
+
431
+ function emitBufferedEvents(stream: AssistantMessageEventStream, events: AssistantMessageEvent[]): void {
432
+ for (const event of events) {
433
+ stream.push(event);
434
+ }
435
+ }
436
+
437
+ export function streamSimple<TApi extends Api>(
438
+ model: Model<TApi>,
439
+ context: Context,
440
+ options?: SimpleStreamOptions,
441
+ ): AssistantMessageEventStream {
442
+ const requestOptions = withRequestDebugFetch(options);
443
+ const retryApiKey = requestOptions?.onAuthError
444
+ ? (requestOptions.apiKey ?? getEnvApiKey(model.provider))
445
+ : undefined;
446
+ if (retryApiKey) {
447
+ const outer = new AssistantMessageEventStream();
448
+ const onAuthError = requestOptions!.onAuthError!;
449
+ const runAttempt = async (apiKey: string, captureAuthFailure: boolean): Promise<AuthRetryFailure | undefined> => {
450
+ const bufferedEvents: AssistantMessageEvent[] = [];
451
+ let emittedReplayUnsafeEvent = false;
452
+ const flushBuffered = (): void => {
453
+ emitBufferedEvents(outer, bufferedEvents);
454
+ bufferedEvents.length = 0;
455
+ };
456
+
457
+ try {
458
+ const inner = streamSimple(model, context, { ...requestOptions, apiKey, onAuthError: undefined });
459
+ for await (const event of inner) {
460
+ if (!emittedReplayUnsafeEvent && event.type === "start") {
461
+ bufferedEvents.push(event);
462
+ continue;
463
+ }
464
+ if (
465
+ !emittedReplayUnsafeEvent &&
466
+ captureAuthFailure &&
467
+ event.type === "error" &&
468
+ isRetryableUpstreamError(
469
+ event.error,
470
+ extractStatusFromAssistantError(event.error),
471
+ event.error.errorMessage,
472
+ )
473
+ ) {
474
+ return { error: createAssistantAuthError(event.error), bufferedEvents, terminalEvent: event };
475
+ }
476
+ flushBuffered();
477
+ emittedReplayUnsafeEvent = true;
478
+ outer.push(event);
479
+ if (outer.done) return undefined;
480
+ }
481
+ flushBuffered();
482
+ if (!outer.done) outer.end(await inner.result());
483
+ } catch (error) {
484
+ if (
485
+ !emittedReplayUnsafeEvent &&
486
+ captureAuthFailure &&
487
+ isRetryableUpstreamError(
488
+ error,
489
+ extractHttpStatusFromError(error),
490
+ error instanceof Error ? error.message : undefined,
491
+ )
492
+ ) {
493
+ return { error, bufferedEvents };
494
+ }
495
+ flushBuffered();
496
+ outer.fail(error);
497
+ }
498
+ return undefined;
499
+ };
500
+ const emitFailure = (failure: AuthRetryFailure): void => {
501
+ emitBufferedEvents(outer, failure.bufferedEvents);
502
+ if (failure.terminalEvent) {
503
+ outer.push(failure.terminalEvent);
504
+ } else {
505
+ outer.fail(failure.error);
506
+ }
507
+ };
508
+
509
+ void (async () => {
510
+ const failure = await runAttempt(retryApiKey, true);
511
+ if (!failure) return;
512
+ let nextKey: string | undefined;
513
+ try {
514
+ nextKey = await onAuthError(model.provider, retryApiKey, failure.error);
515
+ } catch {
516
+ nextKey = undefined;
517
+ }
518
+ if (!nextKey || nextKey === retryApiKey) {
519
+ emitFailure(failure);
520
+ return;
521
+ }
522
+ await runAttempt(nextKey, false);
523
+ })();
524
+ return outer;
525
+ }
526
+
527
+ // Aery-native transport short-circuits the per-provider dispatch entirely:
528
+ // the gateway resolves provider + credential server-side, so we don't
529
+ // need an `apiKey` from `getEnvApiKey` here — `options.apiKey` carries
530
+ // the gateway bearer instead. Comes BEFORE the custom-API check so
531
+ // extension-registered APIs can't accidentally override a configured
532
+ // aery-native transport.
533
+ if (model.transport === "aery-native") {
534
+ return streamAeryNative(model, context, requestOptions);
535
+ }
536
+
537
+ // Check custom API registry (extension-provided APIs)
538
+ const customApiProvider = getCustomApi(model.api);
539
+ if (customApiProvider) {
540
+ return customApiProvider.streamSimple(model, context, requestOptions);
541
+ }
542
+
543
+ // Vertex AI uses Application Default Credentials, not API keys
544
+ if (model.api === "google-vertex") {
545
+ const providerOptions = mapOptionsForApi(model, requestOptions, undefined);
546
+ return stream(model, context, providerOptions);
547
+ } else if (model.api === "bedrock-converse-stream") {
548
+ // Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
549
+ const providerOptions = mapOptionsForApi(model, requestOptions, undefined);
550
+ return stream(model, context, providerOptions);
551
+ }
552
+
553
+ const apiKey = requestOptions?.apiKey || getEnvApiKey(model.provider);
554
+ if (!apiKey) {
555
+ throw new Error(`No API key for provider: ${model.provider}`);
556
+ }
557
+
558
+ // GitLab Duo - wraps Anthropic/OpenAI behind GitLab AI Gateway direct access tokens
559
+ if (isGitLabDuoModel(model)) {
560
+ return streamGitLabDuo(model, context, {
561
+ ...requestOptions,
562
+ apiKey,
563
+ });
564
+ }
565
+
566
+ // Kimi Code - route to dedicated handler that wraps OpenAI or Anthropic API
567
+ if (isKimiModel(model)) {
568
+ // Pass raw SimpleStreamOptions - streamKimi handles mapping internally
569
+ return streamKimi(model as Model<"openai-completions">, context, {
570
+ ...requestOptions,
571
+ apiKey,
572
+ format: requestOptions?.kimiApiFormat ?? "anthropic",
573
+ });
574
+ }
575
+
576
+ // Synthetic - route to dedicated handler that wraps OpenAI or Anthropic API
577
+ if (isSyntheticModel(model)) {
578
+ // Pass raw SimpleStreamOptions - streamSynthetic handles mapping internally
579
+ return streamSynthetic(model as Model<"openai-completions">, context, {
580
+ ...requestOptions,
581
+ apiKey,
582
+ format: requestOptions?.syntheticApiFormat ?? "openai", // Default to OpenAI format
583
+ });
584
+ }
585
+ const providerOptions = mapOptionsForApi(model, requestOptions, apiKey);
586
+ return stream(model, context, providerOptions);
587
+ }
588
+
589
+ export async function completeSimple<TApi extends Api>(
590
+ model: Model<TApi>,
591
+ context: Context,
592
+ options?: SimpleStreamOptions,
593
+ ): Promise<AssistantMessage> {
594
+ const s = streamSimple(model, context, options);
595
+ return s.result();
596
+ }
597
+
598
+ const MIN_OUTPUT_TOKENS = 1024;
599
+ export const OUTPUT_FALLBACK_BUFFER = 4000;
600
+ const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PI_NO_INTERLEAVED_THINKING !== "1";
601
+
602
+ export const ANTHROPIC_THINKING: Record<Effort, number> = {
603
+ minimal: 1024,
604
+ low: 4096,
605
+ medium: 8192,
606
+ high: 16384,
607
+ xhigh: 32768,
608
+ };
609
+
610
+ const GOOGLE_THINKING: Record<Effort, number> = {
611
+ minimal: 1024,
612
+ low: 4096,
613
+ medium: 8192,
614
+ high: 16384,
615
+ xhigh: 24575,
616
+ };
617
+
618
+ const BEDROCK_CLAUDE_THINKING: Record<Effort, number> = {
619
+ minimal: 1024,
620
+ low: 2048,
621
+ medium: 8192,
622
+ high: 16384,
623
+ xhigh: 16384,
624
+ };
625
+
626
+ function resolveBedrockThinkingBudget(
627
+ model: Model<"bedrock-converse-stream">,
628
+ options?: SimpleStreamOptions,
629
+ ): { budget: number; level: Effort } | null {
630
+ if (!options?.reasoning || !model.reasoning) return null;
631
+ const level = requireSupportedEffort(model, options.reasoning);
632
+ const budget = options.thinkingBudgets?.[level] ?? BEDROCK_CLAUDE_THINKING[level];
633
+ return { budget, level };
634
+ }
635
+
636
+ export function mapAnthropicToolChoice(choice?: ToolChoice): AnthropicOptions["toolChoice"] {
637
+ if (!choice) return undefined;
638
+ if (typeof choice === "string") {
639
+ if (choice === "required") return "any";
640
+ if (choice === "auto" || choice === "none" || choice === "any") return choice;
641
+ return undefined;
642
+ }
643
+ if (choice.type === "tool") {
644
+ return choice.name ? { type: "tool", name: choice.name } : undefined;
645
+ }
646
+ if (choice.type === "function") {
647
+ const name = "function" in choice ? choice.function?.name : choice.name;
648
+ return name ? { type: "tool", name } : undefined;
649
+ }
650
+ return undefined;
651
+ }
652
+
653
+ export function mapGoogleToolChoice(
654
+ choice?: ToolChoice,
655
+ ): GoogleOptions["toolChoice"] | GoogleGeminiCliOptions["toolChoice"] | GoogleVertexOptions["toolChoice"] {
656
+ if (!choice) return undefined;
657
+ if (typeof choice === "string") {
658
+ if (choice === "required") return "any";
659
+ if (choice === "auto" || choice === "none" || choice === "any") return choice;
660
+ return undefined;
661
+ }
662
+ // Named-tool routing on Google: emit an `ANY`-mode allow-list of one entry,
663
+ // mirroring the Anthropic mapper that returns `{type: "tool", name}`.
664
+ if (choice.type === "tool") {
665
+ return choice.name ? { mode: "ANY", allowedFunctionNames: [choice.name] } : undefined;
666
+ }
667
+ if (choice.type === "function") {
668
+ const name = "function" in choice ? choice.function?.name : choice.name;
669
+ return name ? { mode: "ANY", allowedFunctionNames: [name] } : undefined;
670
+ }
671
+ return undefined;
672
+ }
673
+
674
+ function mapOpenAiToolChoice(choice?: ToolChoice): OpenAICompletionsOptions["toolChoice"] {
675
+ if (!choice) return undefined;
676
+ if (typeof choice === "string") {
677
+ if (choice === "any") return "required";
678
+ if (choice === "auto" || choice === "none" || choice === "required") return choice;
679
+ return undefined;
680
+ }
681
+ if (choice.type === "tool") {
682
+ return choice.name ? { type: "function", function: { name: choice.name } } : undefined;
683
+ }
684
+ if (choice.type === "function") {
685
+ const name = "function" in choice ? choice.function?.name : choice.name;
686
+ return name ? { type: "function", function: { name } } : undefined;
687
+ }
688
+ return undefined;
689
+ }
690
+
691
+ function resolveOpenAiReasoningEffort<TApi extends Api>(
692
+ model: Model<TApi>,
693
+ options?: SimpleStreamOptions,
694
+ ): Effort | undefined {
695
+ const reasoning = options?.reasoning;
696
+ if (!reasoning || !model.reasoning) return undefined;
697
+ // Models with compat.supportsReasoningEffort: false reason natively but
698
+ // reject the wire effort param. The wire-side omitReasoningEffort gate
699
+ // (providers/xai-responses.ts:78) is the actual strip; returning
700
+ // undefined here avoids a redundant requireSupportedEffort throw that
701
+ // would defeat the gate and surface a confusing
702
+ // "Compaction failed: Thinking effort high is not supported by..." to
703
+ // the user.
704
+ if (modelOmitsReasoningEffort(model)) return undefined;
705
+ return requireSupportedEffort(model, reasoning);
706
+ }
707
+
708
+ const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
709
+
710
+ function mapOptionsForApi<TApi extends Api>(
711
+ model: Model<TApi>,
712
+ options?: SimpleStreamOptions,
713
+ apiKey?: string,
714
+ ): OptionsForApi<TApi> {
715
+ const base = {
716
+ temperature: options?.temperature,
717
+ topP: options?.topP,
718
+ topK: options?.topK,
719
+ minP: options?.minP,
720
+ presencePenalty: options?.presencePenalty,
721
+ repetitionPenalty: options?.repetitionPenalty,
722
+ maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
723
+ signal: options?.signal,
724
+ apiKey: apiKey || options?.apiKey,
725
+ cacheRetention: options?.cacheRetention,
726
+ headers: options?.headers,
727
+ initiatorOverride: options?.initiatorOverride,
728
+ maxRetryDelayMs: options?.maxRetryDelayMs,
729
+ metadata: options?.metadata,
730
+ taskBudget: options?.taskBudget,
731
+ sessionId: options?.sessionId,
732
+ promptCacheKey: options?.promptCacheKey,
733
+ streamFirstEventTimeoutMs: options?.streamFirstEventTimeoutMs,
734
+ streamIdleTimeoutMs: options?.streamIdleTimeoutMs,
735
+ providerSessionState: options?.providerSessionState,
736
+ onPayload: options?.onPayload,
737
+ onResponse: options?.onResponse,
738
+ onSseEvent: options?.onSseEvent,
739
+ execHandlers: options?.execHandlers,
740
+ fetch: options?.fetch,
741
+ };
742
+
743
+ switch (model.api) {
744
+ case "anthropic-messages": {
745
+ // Explicitly disable thinking when reasoning is not specified or model doesn't support it
746
+ const reasoning = options?.reasoning;
747
+ if (!reasoning || !model.reasoning) {
748
+ return castApi<"anthropic-messages">({
749
+ ...base,
750
+ thinkingEnabled: false,
751
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
752
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
753
+ serviceTier: options?.serviceTier,
754
+ });
755
+ }
756
+
757
+ let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
758
+ if (thinkingBudget <= 0) {
759
+ return castApi<"anthropic-messages">({
760
+ ...base,
761
+ thinkingEnabled: false,
762
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
763
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
764
+ serviceTier: options?.serviceTier,
765
+ });
766
+ }
767
+
768
+ // For Opus 4.6+ and Sonnet 4.6+: use adaptive thinking with effort level
769
+ // For older models: use budget-based thinking
770
+ if (model.thinking?.mode === "anthropic-adaptive") {
771
+ const effort = mapEffortToAnthropicAdaptiveEffort(model, reasoning);
772
+ return castApi<"anthropic-messages">({
773
+ ...base,
774
+ thinkingEnabled: true,
775
+ effort,
776
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
777
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
778
+ serviceTier: options?.serviceTier,
779
+ });
780
+ }
781
+
782
+ if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
783
+ return castApi<"anthropic-messages">({
784
+ ...base,
785
+ thinkingEnabled: true,
786
+ thinkingBudgetTokens: thinkingBudget,
787
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
788
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
789
+ serviceTier: options?.serviceTier,
790
+ });
791
+ }
792
+
793
+ // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
794
+ const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
795
+
796
+ // If not enough room for thinking + output, reduce thinking budget
797
+ if (maxTokens <= thinkingBudget) {
798
+ thinkingBudget = maxTokens - MIN_OUTPUT_TOKENS;
799
+ }
800
+
801
+ // If thinking budget is too low, disable thinking
802
+ if (thinkingBudget <= 0) {
803
+ return castApi<"anthropic-messages">({
804
+ ...base,
805
+ thinkingEnabled: false,
806
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
807
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
808
+ serviceTier: options?.serviceTier,
809
+ });
810
+ } else {
811
+ return castApi<"anthropic-messages">({
812
+ ...base,
813
+ maxTokens,
814
+ thinkingEnabled: true,
815
+ thinkingBudgetTokens: thinkingBudget,
816
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
817
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
818
+ serviceTier: options?.serviceTier,
819
+ });
820
+ }
821
+ }
822
+
823
+ case "bedrock-converse-stream": {
824
+ const bedrockBase: BedrockOptions = {
825
+ ...base,
826
+ reasoning: options?.reasoning,
827
+ thinkingBudgets: options?.thinkingBudgets,
828
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
829
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
830
+ };
831
+ // Adaptive mode sends effort directly, no budget_tokens — skip budget inflation.
832
+ if (model.thinking?.mode === "anthropic-adaptive") {
833
+ return castApi<"bedrock-converse-stream">(bedrockBase);
834
+ }
835
+ const budgetInfo = resolveBedrockThinkingBudget(model as Model<"bedrock-converse-stream">, options);
836
+ if (!budgetInfo) return bedrockBase as OptionsForApi<TApi>;
837
+ let maxTokens = bedrockBase.maxTokens ?? model.maxTokens;
838
+ let thinkingBudgets = bedrockBase.thinkingBudgets;
839
+ if (maxTokens <= budgetInfo.budget) {
840
+ const desiredMaxTokens = Math.min(model.maxTokens, budgetInfo.budget + MIN_OUTPUT_TOKENS);
841
+ if (desiredMaxTokens > maxTokens) {
842
+ maxTokens = desiredMaxTokens;
843
+ }
844
+ }
845
+ if (maxTokens <= budgetInfo.budget) {
846
+ const adjustedBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
847
+ thinkingBudgets = { ...(thinkingBudgets ?? {}), [budgetInfo.level]: adjustedBudget };
848
+ }
849
+ return castApi<"bedrock-converse-stream">({ ...bedrockBase, maxTokens, thinkingBudgets });
850
+ }
851
+
852
+ case "openai-completions":
853
+ return castApi<"openai-completions">({
854
+ ...base,
855
+ reasoning: resolveOpenAiReasoningEffort(model, options),
856
+ disableReasoning: options?.disableReasoning,
857
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
858
+ serviceTier: options?.serviceTier,
859
+ openrouterVariant: options?.openrouterVariant,
860
+ });
861
+
862
+ case "openai-responses":
863
+ return castApi<"openai-responses">({
864
+ ...base,
865
+ reasoning: resolveOpenAiReasoningEffort(model, options),
866
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
867
+ serviceTier: options?.serviceTier,
868
+ reasoningSummary: options?.hideThinkingSummary ? null : undefined,
869
+ });
870
+
871
+ case "azure-openai-responses":
872
+ return castApi<"azure-openai-responses">({
873
+ ...base,
874
+ reasoning: resolveOpenAiReasoningEffort(model, options),
875
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
876
+ serviceTier: options?.serviceTier,
877
+ reasoningSummary: options?.hideThinkingSummary ? null : undefined,
878
+ });
879
+
880
+ case "openai-codex-responses":
881
+ return castApi<"openai-codex-responses">({
882
+ ...base,
883
+ reasoning: resolveOpenAiReasoningEffort(model, options),
884
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
885
+ serviceTier: options?.serviceTier,
886
+ preferWebsockets: options?.preferWebsockets,
887
+ reasoningSummary: options?.hideThinkingSummary ? null : undefined,
888
+ });
889
+
890
+ case "google-generative-ai": {
891
+ // Explicitly disable thinking when reasoning is not specified or model doesn't support it
892
+ // This is needed because Gemini has "dynamic thinking" enabled by default
893
+ const reasoning = options?.reasoning;
894
+ if (!reasoning || !model.reasoning) {
895
+ return castApi<"google-generative-ai">({
896
+ ...base,
897
+ thinking: { enabled: false },
898
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
899
+ });
900
+ }
901
+
902
+ const googleModel = model as Model<"google-generative-ai">;
903
+ const effort = requireSupportedEffort(googleModel, reasoning);
904
+
905
+ // Gemini 3+ models use thinkingLevel exclusively instead of thinkingBudget.
906
+ // https://ai.google.dev/gemini-api/docs/thinking#set-budget
907
+ if (googleModel.thinking?.mode === "google-level") {
908
+ return castApi<"google-generative-ai">({
909
+ ...base,
910
+ thinking: {
911
+ enabled: true,
912
+ level: mapEffortToGoogleThinkingLevel(googleModel, effort),
913
+ },
914
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
915
+ });
916
+ }
917
+
918
+ return castApi<"google-gemini-cli">({
919
+ ...base,
920
+ thinking: {
921
+ enabled: true,
922
+ budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
923
+ },
924
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
925
+ });
926
+ }
927
+
928
+ case "google-gemini-cli": {
929
+ const reasoning = options?.reasoning;
930
+ if (!reasoning || !model.reasoning) {
931
+ return castApi<"google-gemini-cli">({
932
+ ...base,
933
+ thinking: { enabled: false },
934
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
935
+ });
936
+ }
937
+
938
+ const effort = requireSupportedEffort(model, reasoning);
939
+
940
+ // Gemini 3+ models use thinkingLevel instead of thinkingBudget
941
+ if (model.thinking?.mode === "google-level") {
942
+ return castApi<"google-gemini-cli">({
943
+ ...base,
944
+ thinking: {
945
+ enabled: true,
946
+ level: mapEffortToGoogleThinkingLevel(model, effort),
947
+ },
948
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
949
+ });
950
+ }
951
+
952
+ let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
953
+
954
+ // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
955
+ const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
956
+
957
+ // If not enough room for thinking + output, reduce thinking budget
958
+ if (maxTokens <= thinkingBudget) {
959
+ thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
960
+ }
961
+
962
+ // If thinking budget is too low, disable thinking
963
+ if (thinkingBudget <= 0) {
964
+ return castApi<"google-gemini-cli">({
965
+ ...base,
966
+ thinking: { enabled: false },
967
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
968
+ });
969
+ } else {
970
+ return castApi<"google-gemini-cli">({
971
+ ...base,
972
+ maxTokens,
973
+ thinking: { enabled: true, budgetTokens: thinkingBudget },
974
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
975
+ });
976
+ }
977
+ }
978
+
979
+ case "google-vertex": {
980
+ // Explicitly disable thinking when reasoning is not specified or model doesn't support it
981
+ const reasoning = options?.reasoning;
982
+ if (!reasoning || !model.reasoning) {
983
+ return castApi<"google-vertex">({
984
+ ...base,
985
+ thinking: { enabled: false },
986
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
987
+ });
988
+ }
989
+
990
+ const vertexModel = model as Model<"google-vertex">;
991
+ const effort = requireSupportedEffort(vertexModel, reasoning);
992
+ const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
993
+
994
+ if (geminiModel.thinking?.mode === "google-level") {
995
+ return castApi<"google-vertex">({
996
+ ...base,
997
+ thinking: {
998
+ enabled: true,
999
+ level: mapEffortToGoogleThinkingLevel(geminiModel, effort),
1000
+ },
1001
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
1002
+ });
1003
+ }
1004
+
1005
+ return castApi<"google-vertex">({
1006
+ ...base,
1007
+ thinking: {
1008
+ enabled: true,
1009
+ budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
1010
+ },
1011
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
1012
+ });
1013
+ }
1014
+
1015
+ case "ollama-chat":
1016
+ return castApi<"ollama-chat">({
1017
+ ...base,
1018
+ reasoning: resolveOpenAiReasoningEffort(model, options),
1019
+ toolChoice: options?.toolChoice,
1020
+ });
1021
+
1022
+ case "cursor-agent": {
1023
+ const execHandlers = options?.cursorExecHandlers ?? options?.execHandlers;
1024
+ const onToolResult = options?.cursorOnToolResult ?? execHandlers?.onToolResult;
1025
+ return castApi<"cursor-agent">({
1026
+ ...base,
1027
+ execHandlers,
1028
+ onToolResult,
1029
+ });
1030
+ }
1031
+
1032
+ default:
1033
+ throw new Error(`Unhandled API in mapOptionsForApi: ${model.api}`);
1034
+ }
1035
+ }
1036
+
1037
+ function getGoogleBudget(
1038
+ model: Model<"google-generative-ai">,
1039
+ effort: Effort,
1040
+ customBudgets?: ThinkingBudgets,
1041
+ ): number {
1042
+ requireSupportedEffort(model, effort);
1043
+
1044
+ // Custom budgets take precedence if provided for this level
1045
+ if (customBudgets?.[effort] !== undefined) {
1046
+ return customBudgets[effort]!;
1047
+ }
1048
+
1049
+ // See https://ai.google.dev/gemini-api/docs/thinking#set-budget
1050
+ if (model.id.includes("2.5-")) {
1051
+ switch (effort) {
1052
+ case "minimal":
1053
+ return 128;
1054
+ case "low":
1055
+ return 2048;
1056
+ case "medium":
1057
+ return 8192;
1058
+ default:
1059
+ return model.id.includes("2.5-flash") ? 24576 : 32768;
1060
+ }
1061
+ }
1062
+
1063
+ // Unknown model - use dynamic
1064
+ return -1;
1065
+ }