@gajae-code/ai 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/CHANGELOG.md +2644 -0
  2. package/README.md +1181 -0
  3. package/dist/types/api-registry.d.ts +30 -0
  4. package/dist/types/auth-broker/client.d.ts +66 -0
  5. package/dist/types/auth-broker/index.d.ts +5 -0
  6. package/dist/types/auth-broker/refresher.d.ts +25 -0
  7. package/dist/types/auth-broker/remote-store.d.ts +96 -0
  8. package/dist/types/auth-broker/server.d.ts +32 -0
  9. package/dist/types/auth-broker/types.d.ts +105 -0
  10. package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
  11. package/dist/types/auth-gateway/http.d.ts +39 -0
  12. package/dist/types/auth-gateway/index.d.ts +3 -0
  13. package/dist/types/auth-gateway/server.d.ts +17 -0
  14. package/dist/types/auth-gateway/types.d.ts +115 -0
  15. package/dist/types/auth-storage.d.ts +641 -0
  16. package/dist/types/cli.d.ts +2 -0
  17. package/dist/types/index.d.ts +49 -0
  18. package/dist/types/model-cache.d.ts +17 -0
  19. package/dist/types/model-manager.d.ts +62 -0
  20. package/dist/types/model-thinking.d.ts +71 -0
  21. package/dist/types/models.d.ts +12 -0
  22. package/dist/types/provider-details.d.ts +24 -0
  23. package/dist/types/provider-models/bundled-references.d.ts +4 -0
  24. package/dist/types/provider-models/descriptors.d.ts +48 -0
  25. package/dist/types/provider-models/google.d.ts +20 -0
  26. package/dist/types/provider-models/index.d.ts +5 -0
  27. package/dist/types/provider-models/ollama.d.ts +7 -0
  28. package/dist/types/provider-models/openai-compat.d.ts +237 -0
  29. package/dist/types/provider-models/special.d.ts +16 -0
  30. package/dist/types/providers/amazon-bedrock.d.ts +36 -0
  31. package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
  32. package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
  33. package/dist/types/providers/anthropic.d.ts +188 -0
  34. package/dist/types/providers/aws-credentials.d.ts +43 -0
  35. package/dist/types/providers/aws-eventstream.d.ts +38 -0
  36. package/dist/types/providers/aws-sigv4.d.ts +55 -0
  37. package/dist/types/providers/azure-openai-responses.d.ts +15 -0
  38. package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
  39. package/dist/types/providers/cursor.d.ts +42 -0
  40. package/dist/types/providers/error-message.d.ts +27 -0
  41. package/dist/types/providers/github-copilot-headers.d.ts +40 -0
  42. package/dist/types/providers/gitlab-duo.d.ts +27 -0
  43. package/dist/types/providers/google-auth.d.ts +24 -0
  44. package/dist/types/providers/google-gemini-cli.d.ts +72 -0
  45. package/dist/types/providers/google-gemini-headers.d.ts +18 -0
  46. package/dist/types/providers/google-shared.d.ts +163 -0
  47. package/dist/types/providers/google-types.d.ts +138 -0
  48. package/dist/types/providers/google-vertex.d.ts +7 -0
  49. package/dist/types/providers/google.d.ts +4 -0
  50. package/dist/types/providers/grammar.d.ts +1 -0
  51. package/dist/types/providers/kimi.d.ts +27 -0
  52. package/dist/types/providers/mock.d.ts +175 -0
  53. package/dist/types/providers/ollama.d.ts +6 -0
  54. package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
  55. package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
  56. package/dist/types/providers/openai-chat-server.d.ts +16 -0
  57. package/dist/types/providers/openai-codex/constants.d.ts +26 -0
  58. package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
  59. package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
  60. package/dist/types/providers/openai-codex-responses.d.ts +67 -0
  61. package/dist/types/providers/openai-completions-compat.d.ts +25 -0
  62. package/dist/types/providers/openai-completions.d.ts +33 -0
  63. package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
  64. package/dist/types/providers/openai-responses-server.d.ts +17 -0
  65. package/dist/types/providers/openai-responses-shared.d.ts +89 -0
  66. package/dist/types/providers/openai-responses.d.ts +32 -0
  67. package/dist/types/providers/pi-native-client.d.ts +13 -0
  68. package/dist/types/providers/pi-native-server.d.ts +68 -0
  69. package/dist/types/providers/register-builtins.d.ts +31 -0
  70. package/dist/types/providers/synthetic.d.ts +26 -0
  71. package/dist/types/providers/transform-messages.d.ts +12 -0
  72. package/dist/types/providers/vision-guard.d.ts +8 -0
  73. package/dist/types/rate-limit-utils.d.ts +19 -0
  74. package/dist/types/stream.d.ts +24 -0
  75. package/dist/types/types.d.ts +746 -0
  76. package/dist/types/usage/claude.d.ts +3 -0
  77. package/dist/types/usage/gemini.d.ts +2 -0
  78. package/dist/types/usage/github-copilot.d.ts +7 -0
  79. package/dist/types/usage/google-antigravity.d.ts +2 -0
  80. package/dist/types/usage/kimi.d.ts +2 -0
  81. package/dist/types/usage/minimax-code.d.ts +2 -0
  82. package/dist/types/usage/openai-codex.d.ts +3 -0
  83. package/dist/types/usage/shared.d.ts +1 -0
  84. package/dist/types/usage/zai.d.ts +2 -0
  85. package/dist/types/usage.d.ts +258 -0
  86. package/dist/types/utils/abort.d.ts +19 -0
  87. package/dist/types/utils/anthropic-auth.d.ts +31 -0
  88. package/dist/types/utils/discovery/antigravity.d.ts +61 -0
  89. package/dist/types/utils/discovery/codex.d.ts +38 -0
  90. package/dist/types/utils/discovery/cursor.d.ts +23 -0
  91. package/dist/types/utils/discovery/gemini.d.ts +25 -0
  92. package/dist/types/utils/discovery/index.d.ts +4 -0
  93. package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
  94. package/dist/types/utils/event-stream.d.ts +28 -0
  95. package/dist/types/utils/fireworks-model-id.d.ts +10 -0
  96. package/dist/types/utils/foundry.d.ts +1 -0
  97. package/dist/types/utils/h2-fetch.d.ts +22 -0
  98. package/dist/types/utils/http-inspector.d.ts +31 -0
  99. package/dist/types/utils/idle-iterator.d.ts +67 -0
  100. package/dist/types/utils/json-parse.d.ts +10 -0
  101. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
  102. package/dist/types/utils/oauth/anthropic.d.ts +22 -0
  103. package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
  104. package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
  105. package/dist/types/utils/oauth/callback-server.d.ts +57 -0
  106. package/dist/types/utils/oauth/cerebras.d.ts +1 -0
  107. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
  108. package/dist/types/utils/oauth/cursor.d.ts +15 -0
  109. package/dist/types/utils/oauth/deepseek.d.ts +10 -0
  110. package/dist/types/utils/oauth/firepass.d.ts +1 -0
  111. package/dist/types/utils/oauth/fireworks.d.ts +1 -0
  112. package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
  113. package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
  114. package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
  115. package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
  116. package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
  117. package/dist/types/utils/oauth/huggingface.d.ts +19 -0
  118. package/dist/types/utils/oauth/index.d.ts +38 -0
  119. package/dist/types/utils/oauth/kagi.d.ts +17 -0
  120. package/dist/types/utils/oauth/kilo.d.ts +5 -0
  121. package/dist/types/utils/oauth/kimi.d.ts +21 -0
  122. package/dist/types/utils/oauth/litellm.d.ts +18 -0
  123. package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
  124. package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
  125. package/dist/types/utils/oauth/moonshot.d.ts +1 -0
  126. package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
  127. package/dist/types/utils/oauth/nvidia.d.ts +18 -0
  128. package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
  129. package/dist/types/utils/oauth/ollama.d.ts +18 -0
  130. package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
  131. package/dist/types/utils/oauth/opencode.d.ts +18 -0
  132. package/dist/types/utils/oauth/parallel.d.ts +17 -0
  133. package/dist/types/utils/oauth/perplexity.d.ts +9 -0
  134. package/dist/types/utils/oauth/pkce.d.ts +8 -0
  135. package/dist/types/utils/oauth/qianfan.d.ts +17 -0
  136. package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
  137. package/dist/types/utils/oauth/synthetic.d.ts +1 -0
  138. package/dist/types/utils/oauth/tavily.d.ts +17 -0
  139. package/dist/types/utils/oauth/together.d.ts +1 -0
  140. package/dist/types/utils/oauth/types.d.ts +44 -0
  141. package/dist/types/utils/oauth/venice.d.ts +18 -0
  142. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
  143. package/dist/types/utils/oauth/vllm.d.ts +16 -0
  144. package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
  145. package/dist/types/utils/oauth/zai.d.ts +18 -0
  146. package/dist/types/utils/oauth/zenmux.d.ts +1 -0
  147. package/dist/types/utils/overflow.d.ts +54 -0
  148. package/dist/types/utils/parse-bind.d.ts +23 -0
  149. package/dist/types/utils/provider-response.d.ts +3 -0
  150. package/dist/types/utils/retry-after.d.ts +3 -0
  151. package/dist/types/utils/retry.d.ts +26 -0
  152. package/dist/types/utils/schema/adapt.d.ts +24 -0
  153. package/dist/types/utils/schema/compatibility.d.ts +30 -0
  154. package/dist/types/utils/schema/dereference.d.ts +11 -0
  155. package/dist/types/utils/schema/draft.d.ts +10 -0
  156. package/dist/types/utils/schema/equality.d.ts +4 -0
  157. package/dist/types/utils/schema/fields.d.ts +49 -0
  158. package/dist/types/utils/schema/index.d.ts +13 -0
  159. package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
  160. package/dist/types/utils/schema/meta-validator.d.ts +2 -0
  161. package/dist/types/utils/schema/normalize.d.ts +93 -0
  162. package/dist/types/utils/schema/spill.d.ts +8 -0
  163. package/dist/types/utils/schema/stamps.d.ts +25 -0
  164. package/dist/types/utils/schema/types.d.ts +4 -0
  165. package/dist/types/utils/schema/wire.d.ts +54 -0
  166. package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
  167. package/dist/types/utils/sse-debug.d.ts +10 -0
  168. package/dist/types/utils/tool-call-healing.d.ts +71 -0
  169. package/dist/types/utils/tool-choice.d.ts +50 -0
  170. package/dist/types/utils/validation.d.ts +17 -0
  171. package/dist/types/utils.d.ts +28 -0
  172. package/package.json +146 -0
  173. package/src/api-registry.ts +96 -0
  174. package/src/auth-broker/client.ts +358 -0
  175. package/src/auth-broker/index.ts +5 -0
  176. package/src/auth-broker/refresher.ts +127 -0
  177. package/src/auth-broker/remote-store.ts +623 -0
  178. package/src/auth-broker/server.ts +644 -0
  179. package/src/auth-broker/types.ts +127 -0
  180. package/src/auth-broker/wire-schemas.ts +200 -0
  181. package/src/auth-gateway/http.ts +194 -0
  182. package/src/auth-gateway/index.ts +3 -0
  183. package/src/auth-gateway/server.ts +717 -0
  184. package/src/auth-gateway/types.ts +134 -0
  185. package/src/auth-storage.ts +4104 -0
  186. package/src/cli.ts +262 -0
  187. package/src/index.ts +54 -0
  188. package/src/model-cache.ts +129 -0
  189. package/src/model-manager.ts +450 -0
  190. package/src/model-thinking.ts +691 -0
  191. package/src/models.json +73853 -0
  192. package/src/models.json.d.ts +9 -0
  193. package/src/models.ts +56 -0
  194. package/src/prompts/turn-aborted-guidance.md +4 -0
  195. package/src/provider-details.ts +90 -0
  196. package/src/provider-models/bundled-references.ts +38 -0
  197. package/src/provider-models/descriptors.ts +308 -0
  198. package/src/provider-models/google.ts +91 -0
  199. package/src/provider-models/index.ts +5 -0
  200. package/src/provider-models/ollama.ts +153 -0
  201. package/src/provider-models/openai-compat.ts +2275 -0
  202. package/src/provider-models/special.ts +67 -0
  203. package/src/providers/amazon-bedrock.ts +849 -0
  204. package/src/providers/anthropic-messages-server-schema.ts +229 -0
  205. package/src/providers/anthropic-messages-server.ts +677 -0
  206. package/src/providers/anthropic.ts +2696 -0
  207. package/src/providers/aws-credentials.ts +501 -0
  208. package/src/providers/aws-eventstream.ts +185 -0
  209. package/src/providers/aws-sigv4.ts +218 -0
  210. package/src/providers/azure-openai-responses.ts +337 -0
  211. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  212. package/src/providers/cursor/proto/agent.proto +3526 -0
  213. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  214. package/src/providers/cursor/proto/buf.yaml +17 -0
  215. package/src/providers/cursor.ts +2561 -0
  216. package/src/providers/error-message.ts +21 -0
  217. package/src/providers/github-copilot-headers.ts +140 -0
  218. package/src/providers/gitlab-duo.ts +372 -0
  219. package/src/providers/google-auth.ts +252 -0
  220. package/src/providers/google-gemini-cli.ts +795 -0
  221. package/src/providers/google-gemini-headers.ts +41 -0
  222. package/src/providers/google-shared.ts +902 -0
  223. package/src/providers/google-types.ts +167 -0
  224. package/src/providers/google-vertex.ts +88 -0
  225. package/src/providers/google.ts +41 -0
  226. package/src/providers/grammar.ts +70 -0
  227. package/src/providers/kimi.ts +52 -0
  228. package/src/providers/mock.ts +500 -0
  229. package/src/providers/ollama.ts +544 -0
  230. package/src/providers/openai-anthropic-shim.ts +138 -0
  231. package/src/providers/openai-chat-server-schema.ts +243 -0
  232. package/src/providers/openai-chat-server.ts +628 -0
  233. package/src/providers/openai-codex/constants.ts +43 -0
  234. package/src/providers/openai-codex/request-transformer.ts +161 -0
  235. package/src/providers/openai-codex/response-handler.ts +81 -0
  236. package/src/providers/openai-codex-responses.ts +2598 -0
  237. package/src/providers/openai-completions-compat.ts +279 -0
  238. package/src/providers/openai-completions.ts +1853 -0
  239. package/src/providers/openai-responses-server-schema.ts +290 -0
  240. package/src/providers/openai-responses-server.ts +1183 -0
  241. package/src/providers/openai-responses-shared.ts +800 -0
  242. package/src/providers/openai-responses.ts +621 -0
  243. package/src/providers/pi-native-client.ts +228 -0
  244. package/src/providers/pi-native-server.ts +210 -0
  245. package/src/providers/register-builtins.ts +412 -0
  246. package/src/providers/synthetic.ts +50 -0
  247. package/src/providers/transform-messages.ts +309 -0
  248. package/src/providers/vision-guard.ts +31 -0
  249. package/src/rate-limit-utils.ts +84 -0
  250. package/src/stream.ts +895 -0
  251. package/src/types.ts +884 -0
  252. package/src/usage/claude.ts +431 -0
  253. package/src/usage/gemini.ts +250 -0
  254. package/src/usage/github-copilot.ts +421 -0
  255. package/src/usage/google-antigravity.ts +201 -0
  256. package/src/usage/kimi.ts +271 -0
  257. package/src/usage/minimax-code.ts +31 -0
  258. package/src/usage/openai-codex.ts +503 -0
  259. package/src/usage/shared.ts +10 -0
  260. package/src/usage/zai.ts +247 -0
  261. package/src/usage.ts +183 -0
  262. package/src/utils/abort.ts +51 -0
  263. package/src/utils/anthropic-auth.ts +87 -0
  264. package/src/utils/discovery/antigravity.ts +261 -0
  265. package/src/utils/discovery/codex.ts +371 -0
  266. package/src/utils/discovery/cursor.ts +306 -0
  267. package/src/utils/discovery/gemini.ts +248 -0
  268. package/src/utils/discovery/index.ts +4 -0
  269. package/src/utils/discovery/openai-compatible.ts +224 -0
  270. package/src/utils/event-stream.ts +142 -0
  271. package/src/utils/fireworks-model-id.ts +30 -0
  272. package/src/utils/foundry.ts +8 -0
  273. package/src/utils/h2-fetch.ts +60 -0
  274. package/src/utils/http-inspector.ts +176 -0
  275. package/src/utils/idle-iterator.ts +250 -0
  276. package/src/utils/json-parse.ts +148 -0
  277. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  278. package/src/utils/oauth/anthropic.ts +200 -0
  279. package/src/utils/oauth/api-key-login.ts +87 -0
  280. package/src/utils/oauth/api-key-validation.ts +92 -0
  281. package/src/utils/oauth/callback-server.ts +276 -0
  282. package/src/utils/oauth/cerebras.ts +16 -0
  283. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  284. package/src/utils/oauth/cursor.ts +157 -0
  285. package/src/utils/oauth/deepseek.ts +53 -0
  286. package/src/utils/oauth/firepass.ts +24 -0
  287. package/src/utils/oauth/fireworks.ts +15 -0
  288. package/src/utils/oauth/github-copilot.ts +362 -0
  289. package/src/utils/oauth/gitlab-duo.ts +123 -0
  290. package/src/utils/oauth/google-antigravity.ts +200 -0
  291. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  292. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  293. package/src/utils/oauth/huggingface.ts +62 -0
  294. package/src/utils/oauth/index.ts +444 -0
  295. package/src/utils/oauth/kagi.ts +47 -0
  296. package/src/utils/oauth/kilo.ts +87 -0
  297. package/src/utils/oauth/kimi.ts +254 -0
  298. package/src/utils/oauth/litellm.ts +47 -0
  299. package/src/utils/oauth/lm-studio.ts +38 -0
  300. package/src/utils/oauth/minimax-code.ts +78 -0
  301. package/src/utils/oauth/moonshot.ts +16 -0
  302. package/src/utils/oauth/nanogpt.ts +15 -0
  303. package/src/utils/oauth/nvidia.ts +70 -0
  304. package/src/utils/oauth/oauth.html +199 -0
  305. package/src/utils/oauth/ollama-cloud.ts +28 -0
  306. package/src/utils/oauth/ollama.ts +47 -0
  307. package/src/utils/oauth/openai-codex.ts +299 -0
  308. package/src/utils/oauth/opencode.ts +49 -0
  309. package/src/utils/oauth/parallel.ts +46 -0
  310. package/src/utils/oauth/perplexity.ts +206 -0
  311. package/src/utils/oauth/pkce.ts +18 -0
  312. package/src/utils/oauth/qianfan.ts +58 -0
  313. package/src/utils/oauth/qwen-portal.ts +60 -0
  314. package/src/utils/oauth/synthetic.ts +16 -0
  315. package/src/utils/oauth/tavily.ts +46 -0
  316. package/src/utils/oauth/together.ts +16 -0
  317. package/src/utils/oauth/types.ts +94 -0
  318. package/src/utils/oauth/venice.ts +59 -0
  319. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  320. package/src/utils/oauth/vllm.ts +40 -0
  321. package/src/utils/oauth/xiaomi.ts +137 -0
  322. package/src/utils/oauth/zai.ts +60 -0
  323. package/src/utils/oauth/zenmux.ts +15 -0
  324. package/src/utils/overflow.ts +137 -0
  325. package/src/utils/parse-bind.ts +54 -0
  326. package/src/utils/provider-response.ts +30 -0
  327. package/src/utils/retry-after.ts +110 -0
  328. package/src/utils/retry.ts +54 -0
  329. package/src/utils/schema/CONSTRAINTS.md +164 -0
  330. package/src/utils/schema/adapt.ts +36 -0
  331. package/src/utils/schema/compatibility.ts +435 -0
  332. package/src/utils/schema/dereference.ts +98 -0
  333. package/src/utils/schema/draft.ts +341 -0
  334. package/src/utils/schema/equality.ts +97 -0
  335. package/src/utils/schema/fields.ts +190 -0
  336. package/src/utils/schema/index.ts +13 -0
  337. package/src/utils/schema/json-schema-validator.ts +577 -0
  338. package/src/utils/schema/meta-validator.ts +167 -0
  339. package/src/utils/schema/normalize.ts +1588 -0
  340. package/src/utils/schema/spill.ts +43 -0
  341. package/src/utils/schema/stamps.ts +97 -0
  342. package/src/utils/schema/types.ts +11 -0
  343. package/src/utils/schema/wire.ts +213 -0
  344. package/src/utils/schema/zod-decontaminate.ts +331 -0
  345. package/src/utils/sse-debug.ts +289 -0
  346. package/src/utils/tool-call-healing.ts +271 -0
  347. package/src/utils/tool-choice.ts +99 -0
  348. package/src/utils/validation.ts +1019 -0
  349. package/src/utils.ts +166 -0
package/src/stream.ts ADDED
@@ -0,0 +1,895 @@
1
+ import * as fs from "node:fs";
2
+ import * as os from "node:os";
3
+ import * as path from "node:path";
4
+ import { $env, $pickenv, extractHttpStatusFromError } from "@gajae-code/utils";
5
+ import { getCustomApi } from "./api-registry";
6
+ import type { Effort } from "./model-thinking";
7
+ import {
8
+ mapEffortToAnthropicAdaptiveEffort,
9
+ mapEffortToGoogleThinkingLevel,
10
+ requireSupportedEffort,
11
+ } from "./model-thinking";
12
+ import type { BedrockOptions } from "./providers/amazon-bedrock";
13
+ import type { AnthropicOptions } from "./providers/anthropic";
14
+ import type { CursorOptions } from "./providers/cursor";
15
+ import { isGitLabDuoModel, streamGitLabDuo } from "./providers/gitlab-duo";
16
+ import type { GoogleOptions } from "./providers/google";
17
+ import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli";
18
+ import type { GoogleVertexOptions } from "./providers/google-vertex";
19
+ import { isKimiModel, streamKimi } from "./providers/kimi";
20
+ import type { OllamaChatOptions } from "./providers/ollama";
21
+ import type { OpenAICompletionsOptions } from "./providers/openai-completions";
22
+ import { streamPiNative } from "./providers/pi-native-client";
23
+ // Heavy provider stream functions are imported lazily via register-builtins,
24
+ // which wraps each provider module in a dynamic import. This keeps the
25
+ // AWS SDK, google-auth-library, @google/genai, @bufbuild/protobuf, and
26
+ // other provider SDKs out of the CLI startup parse graph. The
27
+ // gitlab-duo / kimi / synthetic providers stay eager because their modules
28
+ // export routing predicates (isGitLabDuoModel, isKimiModel, isSyntheticModel)
29
+ // that must be callable synchronously before streaming begins, and their
30
+ // modules are thin wrappers with no heavy SDK dependencies.
31
+ import {
32
+ streamAnthropic,
33
+ streamAzureOpenAIResponses,
34
+ streamBedrock,
35
+ streamCursor,
36
+ streamGoogle,
37
+ streamGoogleGeminiCli,
38
+ streamGoogleVertex,
39
+ streamOllama,
40
+ streamOpenAICodexResponses,
41
+ streamOpenAICompletions,
42
+ streamOpenAIResponses,
43
+ } from "./providers/register-builtins";
44
+ import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
45
+ import type {
46
+ Api,
47
+ AssistantMessage,
48
+ AssistantMessageEvent,
49
+ Context,
50
+ Model,
51
+ OptionsForApi,
52
+ SimpleStreamOptions,
53
+ StreamOptions,
54
+ ThinkingBudgets,
55
+ ToolChoice,
56
+ } from "./types";
57
+ import { AssistantMessageEventStream } from "./utils/event-stream";
58
+ import { isFoundryEnabled } from "./utils/foundry";
59
+
60
+ let cachedVertexAdcCredentialsExists: boolean | null = null;
61
+
62
+ function hasVertexAdcCredentials(): boolean {
63
+ if (cachedVertexAdcCredentialsExists === null) {
64
+ const gacPath = $env.GOOGLE_APPLICATION_CREDENTIALS;
65
+ if (gacPath) {
66
+ cachedVertexAdcCredentialsExists = fs.existsSync(gacPath);
67
+ } else {
68
+ cachedVertexAdcCredentialsExists = fs.existsSync(
69
+ path.join(os.homedir(), ".config", "gcloud", "application_default_credentials.json"),
70
+ );
71
+ }
72
+ }
73
+ return cachedVertexAdcCredentialsExists;
74
+ }
75
+
76
+ type KeyResolver = string | (() => string | undefined);
77
+
78
+ const serviceProviderMap: Record<string, KeyResolver> = {
79
+ "alibaba-coding-plan": "ALIBABA_CODING_PLAN_API_KEY",
80
+ openai: "OPENAI_API_KEY",
81
+ google: "GEMINI_API_KEY",
82
+ groq: "GROQ_API_KEY",
83
+ cerebras: "CEREBRAS_API_KEY",
84
+ xai: "XAI_API_KEY",
85
+ fireworks: "FIREWORKS_API_KEY",
86
+ firepass: "FIREPASS_API_KEY",
87
+ openrouter: "OPENROUTER_API_KEY",
88
+ kilo: "KILO_API_KEY",
89
+ "vercel-ai-gateway": "AI_GATEWAY_API_KEY",
90
+ zai: "ZAI_API_KEY",
91
+ mistral: "MISTRAL_API_KEY",
92
+ minimax: "MINIMAX_API_KEY",
93
+ "minimax-code": "MINIMAX_CODE_API_KEY",
94
+ "minimax-code-cn": "MINIMAX_CODE_CN_API_KEY",
95
+ "opencode-go": "OPENCODE_API_KEY",
96
+ "opencode-zen": "OPENCODE_API_KEY",
97
+ cursor: "CURSOR_ACCESS_TOKEN",
98
+ deepseek: "DEEPSEEK_API_KEY",
99
+ "openai-codex": "OPENAI_CODEX_OAUTH_TOKEN",
100
+ "azure-openai-responses": "AZURE_OPENAI_API_KEY",
101
+ exa: "EXA_API_KEY",
102
+ jina: "JINA_API_KEY",
103
+ brave: "BRAVE_API_KEY",
104
+ perplexity: "PERPLEXITY_API_KEY",
105
+ tavily: "TAVILY_API_KEY",
106
+ parallel: "PARALLEL_API_KEY",
107
+ kagi: "KAGI_API_KEY",
108
+ // GitHub Copilot uses GitHub personal access token
109
+ "github-copilot": () => $pickenv("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
110
+ // Foundry mode optionally switches Anthropic auth to enterprise gateway credentials.
111
+ anthropic: () =>
112
+ isFoundryEnabled()
113
+ ? $pickenv("ANTHROPIC_FOUNDRY_API_KEY", "ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY")
114
+ : $pickenv("ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"),
115
+ "gitlab-duo": "GITLAB_TOKEN",
116
+ // Vertex AI supports either GOOGLE_CLOUD_API_KEY or Application Default Credentials.
117
+ "google-vertex": () => {
118
+ if ($env.GOOGLE_CLOUD_API_KEY) {
119
+ return $env.GOOGLE_CLOUD_API_KEY;
120
+ }
121
+ const hasCredentials = hasVertexAdcCredentials();
122
+ const hasProject = !!($env.GOOGLE_CLOUD_PROJECT || $env.GCLOUD_PROJECT);
123
+ const hasLocation = !!$env.GOOGLE_CLOUD_LOCATION;
124
+ if (hasCredentials && hasProject && hasLocation) {
125
+ return "<authenticated>";
126
+ }
127
+ },
128
+ // Amazon Bedrock supports multiple credential sources:
129
+ // 1. AWS_PROFILE - named profile from ~/.aws/credentials
130
+ // 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
131
+ // 3. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
132
+ // 4. AWS_CONTAINER_CREDENTIALS_* - ECS/Task IAM role credentials
133
+ // 5. AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN - IRSA (EKS) web identity
134
+ "amazon-bedrock": () => {
135
+ const hasEcsCredentials =
136
+ !!$env.AWS_CONTAINER_CREDENTIALS_RELATIVE_URI || !!$env.AWS_CONTAINER_CREDENTIALS_FULL_URI;
137
+ const hasWebIdentity = !!$env.AWS_WEB_IDENTITY_TOKEN_FILE && !!$env.AWS_ROLE_ARN;
138
+ if (
139
+ $env.AWS_PROFILE ||
140
+ ($env.AWS_ACCESS_KEY_ID && $env.AWS_SECRET_ACCESS_KEY) ||
141
+ $env.AWS_BEARER_TOKEN_BEDROCK ||
142
+ hasEcsCredentials ||
143
+ hasWebIdentity
144
+ ) {
145
+ return "<authenticated>";
146
+ }
147
+ },
148
+ synthetic: "SYNTHETIC_API_KEY",
149
+ "cloudflare-ai-gateway": "CLOUDFLARE_AI_GATEWAY_API_KEY",
150
+ huggingface: () => $pickenv("HUGGINGFACE_HUB_TOKEN", "HF_TOKEN"),
151
+ litellm: "LITELLM_API_KEY",
152
+ moonshot: "MOONSHOT_API_KEY",
153
+ nvidia: "NVIDIA_API_KEY",
154
+ nanogpt: "NANO_GPT_API_KEY",
155
+ "lm-studio": "LM_STUDIO_API_KEY",
156
+ ollama: "OLLAMA_API_KEY",
157
+ "ollama-cloud": "OLLAMA_CLOUD_API_KEY",
158
+ "llama.cpp": "LLAMA_CPP_API_KEY",
159
+ qianfan: "QIANFAN_API_KEY",
160
+ "qwen-portal": () => $pickenv("QWEN_OAUTH_TOKEN", "QWEN_PORTAL_API_KEY"),
161
+ together: "TOGETHER_API_KEY",
162
+ zenmux: "ZENMUX_API_KEY",
163
+ venice: "VENICE_API_KEY",
164
+ vllm: "VLLM_API_KEY",
165
+ xiaomi: "XIAOMI_API_KEY",
166
+ };
167
+
168
+ /**
169
+ * Get API key for provider from known environment variables, e.g. OPENAI_API_KEY.
170
+ *
171
+ * Will not return API keys for providers that require OAuth tokens.
172
+ * Checks Bun.env, then cwd/.env, then ~/.env.
173
+ */
174
+ export function getEnvApiKey(provider: string): string | undefined {
175
+ const resolver = serviceProviderMap[provider];
176
+ if (typeof resolver === "string") {
177
+ return $env[resolver];
178
+ }
179
+ return resolver?.();
180
+ }
181
+
182
+ /**
183
+ * Enumerate every provider that has an env-var fallback for `getEnvApiKey`.
184
+ * Used by `gjc auth-broker migrate --include-env` to discover env-sourced keys
185
+ * that should be uploaded to the broker.
186
+ */
187
+ export function listProvidersWithEnvKey(): string[] {
188
+ return Object.keys(serviceProviderMap);
189
+ }
190
+
191
+ export function stream<TApi extends Api>(
192
+ model: Model<TApi>,
193
+ context: Context,
194
+ options?: OptionsForApi<TApi>,
195
+ ): AssistantMessageEventStream {
196
+ // Check custom API registry first (extension-provided APIs like "vertex-Anthropic model-api")
197
+ const customApiProvider = getCustomApi(model.api);
198
+ if (customApiProvider) {
199
+ return customApiProvider.stream(model, context, options as StreamOptions);
200
+ }
201
+
202
+ if (isGitLabDuoModel(model)) {
203
+ const apiKey = (options as StreamOptions | undefined)?.apiKey || getEnvApiKey(model.provider);
204
+ if (!apiKey) {
205
+ throw new Error(`No API key for provider: ${model.provider}`);
206
+ }
207
+ return streamGitLabDuo(model, context, {
208
+ ...(options as SimpleStreamOptions | undefined),
209
+ apiKey,
210
+ });
211
+ }
212
+
213
+ // Vertex AI uses Application Default Credentials, not API keys
214
+ if (model.api === "google-vertex") {
215
+ return streamGoogleVertex(model as Model<"google-vertex">, context, options as GoogleVertexOptions);
216
+ } else if (model.api === "bedrock-converse-stream") {
217
+ // Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
218
+ return streamBedrock(model as Model<"bedrock-converse-stream">, context, (options || {}) as BedrockOptions);
219
+ }
220
+
221
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider);
222
+ if (!apiKey) {
223
+ throw new Error(`No API key for provider: ${model.provider}`);
224
+ }
225
+ const providerOptions = { ...options, apiKey };
226
+
227
+ const api: Api = model.api;
228
+ switch (api) {
229
+ case "anthropic-messages": {
230
+ const anthropicOptions = providerOptions as AnthropicOptions;
231
+ return streamAnthropic(model as Model<"anthropic-messages">, context, {
232
+ ...anthropicOptions,
233
+ isOAuth: anthropicOptions.isOAuth ?? model.isOAuth,
234
+ });
235
+ }
236
+
237
+ case "openai-completions":
238
+ return streamOpenAICompletions(model as Model<"openai-completions">, context, providerOptions as any);
239
+
240
+ case "openai-responses":
241
+ return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
242
+
243
+ case "azure-openai-responses":
244
+ return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
245
+
246
+ case "openai-codex-responses":
247
+ return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);
248
+
249
+ case "google-generative-ai":
250
+ return streamGoogle(model as Model<"google-generative-ai">, context, providerOptions);
251
+
252
+ case "google-gemini-cli":
253
+ return streamGoogleGeminiCli(
254
+ model as Model<"google-gemini-cli">,
255
+ context,
256
+ providerOptions as GoogleGeminiCliOptions,
257
+ );
258
+
259
+ case "ollama-chat":
260
+ return streamOllama(model as Model<"ollama-chat">, context, providerOptions as OllamaChatOptions);
261
+
262
+ case "cursor-agent":
263
+ return streamCursor(model as Model<"cursor-agent">, context, providerOptions as CursorOptions);
264
+
265
+ default:
266
+ throw new Error(`Unhandled API: ${api}`);
267
+ }
268
+ }
269
+
270
+ export async function complete<TApi extends Api>(
271
+ model: Model<TApi>,
272
+ context: Context,
273
+ options?: OptionsForApi<TApi>,
274
+ ): Promise<AssistantMessage> {
275
+ const s = stream(model, context, options);
276
+ return s.result();
277
+ }
278
+
279
+ type AuthRetryFailure = {
280
+ error: unknown;
281
+ bufferedEvents: AssistantMessageEvent[];
282
+ terminalEvent?: Extract<AssistantMessageEvent, { type: "error" }>;
283
+ };
284
+
285
+ function extractStatusFromAssistantError(message: AssistantMessage): number | undefined {
286
+ if (message.errorStatus !== undefined) return message.errorStatus;
287
+ if (!message.errorMessage) return undefined;
288
+ return extractHttpStatusFromError({ message: message.errorMessage });
289
+ }
290
+
291
+ function createAssistantAuthError(message: AssistantMessage): Error & { status?: number } {
292
+ const error: Error & { status?: number } = new Error(message.errorMessage ?? "Provider authentication failed");
293
+ const status = extractStatusFromAssistantError(message);
294
+ if (status !== undefined) error.status = status;
295
+ return error;
296
+ }
297
+
298
+ function emitBufferedEvents(stream: AssistantMessageEventStream, events: AssistantMessageEvent[]): void {
299
+ for (const event of events) {
300
+ stream.push(event);
301
+ }
302
+ }
303
+
304
+ export function streamSimple<TApi extends Api>(
305
+ model: Model<TApi>,
306
+ context: Context,
307
+ options?: SimpleStreamOptions,
308
+ ): AssistantMessageEventStream {
309
+ const retryApiKey = options?.onAuthError ? (options.apiKey ?? getEnvApiKey(model.provider)) : undefined;
310
+ if (retryApiKey) {
311
+ const outer = new AssistantMessageEventStream();
312
+ const onAuthError = options!.onAuthError!;
313
+ const runAttempt = async (apiKey: string, captureAuthFailure: boolean): Promise<AuthRetryFailure | undefined> => {
314
+ const bufferedEvents: AssistantMessageEvent[] = [];
315
+ let emittedReplayUnsafeEvent = false;
316
+ const flushBuffered = (): void => {
317
+ emitBufferedEvents(outer, bufferedEvents);
318
+ bufferedEvents.length = 0;
319
+ };
320
+
321
+ try {
322
+ const inner = streamSimple(model, context, { ...options, apiKey, onAuthError: undefined });
323
+ for await (const event of inner) {
324
+ if (!emittedReplayUnsafeEvent && event.type === "start") {
325
+ bufferedEvents.push(event);
326
+ continue;
327
+ }
328
+ if (
329
+ !emittedReplayUnsafeEvent &&
330
+ captureAuthFailure &&
331
+ event.type === "error" &&
332
+ extractStatusFromAssistantError(event.error) === 401
333
+ ) {
334
+ return { error: createAssistantAuthError(event.error), bufferedEvents, terminalEvent: event };
335
+ }
336
+ flushBuffered();
337
+ emittedReplayUnsafeEvent = true;
338
+ outer.push(event);
339
+ if (outer.done) return undefined;
340
+ }
341
+ flushBuffered();
342
+ if (!outer.done) outer.end(await inner.result());
343
+ } catch (error) {
344
+ if (!emittedReplayUnsafeEvent && captureAuthFailure && extractHttpStatusFromError(error) === 401) {
345
+ return { error, bufferedEvents };
346
+ }
347
+ flushBuffered();
348
+ outer.fail(error);
349
+ }
350
+ return undefined;
351
+ };
352
+ const emitFailure = (failure: AuthRetryFailure): void => {
353
+ emitBufferedEvents(outer, failure.bufferedEvents);
354
+ if (failure.terminalEvent) {
355
+ outer.push(failure.terminalEvent);
356
+ } else {
357
+ outer.fail(failure.error);
358
+ }
359
+ };
360
+
361
+ void (async () => {
362
+ const failure = await runAttempt(retryApiKey, true);
363
+ if (!failure) return;
364
+ let nextKey: string | undefined;
365
+ try {
366
+ nextKey = await onAuthError(model.provider, retryApiKey, failure.error);
367
+ } catch {
368
+ nextKey = undefined;
369
+ }
370
+ if (!nextKey || nextKey === retryApiKey) {
371
+ emitFailure(failure);
372
+ return;
373
+ }
374
+ await runAttempt(nextKey, false);
375
+ })();
376
+ return outer;
377
+ }
378
+
379
+ // Pi-native transport short-circuits the per-provider dispatch entirely:
380
+ // the gateway resolves provider + credential server-side, so we don't
381
+ // need an `apiKey` from `getEnvApiKey` here — `options.apiKey` carries
382
+ // the gateway bearer instead. Comes BEFORE the custom-API check so
383
+ // extension-registered APIs can't accidentally override a configured
384
+ // pi-native transport.
385
+ if (model.transport === "pi-native") {
386
+ return streamPiNative(model, context, options);
387
+ }
388
+
389
+ // Check custom API registry (extension-provided APIs)
390
+ const customApiProvider = getCustomApi(model.api);
391
+ if (customApiProvider) {
392
+ return customApiProvider.streamSimple(model, context, options);
393
+ }
394
+
395
+ // Vertex AI uses Application Default Credentials, not API keys
396
+ if (model.api === "google-vertex") {
397
+ const providerOptions = mapOptionsForApi(model, options, undefined);
398
+ return stream(model, context, providerOptions);
399
+ } else if (model.api === "bedrock-converse-stream") {
400
+ // Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
401
+ const providerOptions = mapOptionsForApi(model, options, undefined);
402
+ return stream(model, context, providerOptions);
403
+ }
404
+
405
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider);
406
+ if (!apiKey) {
407
+ throw new Error(`No API key for provider: ${model.provider}`);
408
+ }
409
+
410
+ // GitLab Duo - wraps Anthropic/OpenAI behind GitLab AI Gateway direct access tokens
411
+ if (isGitLabDuoModel(model)) {
412
+ return streamGitLabDuo(model, context, {
413
+ ...options,
414
+ apiKey,
415
+ });
416
+ }
417
+
418
+ // Kimi Code - route to dedicated handler that wraps OpenAI or Anthropic API
419
+ if (isKimiModel(model)) {
420
+ // Pass raw SimpleStreamOptions - streamKimi handles mapping internally
421
+ return streamKimi(model as Model<"openai-completions">, context, {
422
+ ...options,
423
+ apiKey,
424
+ format: options?.kimiApiFormat ?? "anthropic",
425
+ });
426
+ }
427
+
428
+ // Synthetic - route to dedicated handler that wraps OpenAI or Anthropic API
429
+ if (isSyntheticModel(model)) {
430
+ // Pass raw SimpleStreamOptions - streamSynthetic handles mapping internally
431
+ return streamSynthetic(model as Model<"openai-completions">, context, {
432
+ ...options,
433
+ apiKey,
434
+ format: options?.syntheticApiFormat ?? "openai", // Default to OpenAI format
435
+ });
436
+ }
437
+
438
+ const providerOptions = mapOptionsForApi(model, options, apiKey);
439
+ return stream(model, context, providerOptions);
440
+ }
441
+
442
+ export async function completeSimple<TApi extends Api>(
443
+ model: Model<TApi>,
444
+ context: Context,
445
+ options?: SimpleStreamOptions,
446
+ ): Promise<AssistantMessage> {
447
+ const s = streamSimple(model, context, options);
448
+ return s.result();
449
+ }
450
+
451
+ const MIN_OUTPUT_TOKENS = 1024;
452
+ export const OUTPUT_FALLBACK_BUFFER = 4000;
453
+ const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PI_NO_INTERLEAVED_THINKING !== "1";
454
+
455
+ export const ANTHROPIC_THINKING: Record<Effort, number> = {
456
+ minimal: 1024,
457
+ low: 4096,
458
+ medium: 8192,
459
+ high: 16384,
460
+ xhigh: 32768,
461
+ };
462
+
463
+ const GOOGLE_THINKING: Record<Effort, number> = {
464
+ minimal: 1024,
465
+ low: 4096,
466
+ medium: 8192,
467
+ high: 16384,
468
+ xhigh: 24575,
469
+ };
470
+
471
+ const BEDROCK_CLAUDE_THINKING: Record<Effort, number> = {
472
+ minimal: 1024,
473
+ low: 2048,
474
+ medium: 8192,
475
+ high: 16384,
476
+ xhigh: 16384,
477
+ };
478
+
479
+ function resolveBedrockThinkingBudget(
480
+ model: Model<"bedrock-converse-stream">,
481
+ options?: SimpleStreamOptions,
482
+ ): { budget: number; level: Effort } | null {
483
+ if (!options?.reasoning || !model.reasoning) return null;
484
+ const level = requireSupportedEffort(model, options.reasoning);
485
+ const budget = options.thinkingBudgets?.[level] ?? BEDROCK_CLAUDE_THINKING[level];
486
+ return { budget, level };
487
+ }
488
+
489
+ export function mapAnthropicToolChoice(choice?: ToolChoice): AnthropicOptions["toolChoice"] {
490
+ if (!choice) return undefined;
491
+ if (typeof choice === "string") {
492
+ if (choice === "required") return "any";
493
+ if (choice === "auto" || choice === "none" || choice === "any") return choice;
494
+ return undefined;
495
+ }
496
+ if (choice.type === "tool") {
497
+ return choice.name ? { type: "tool", name: choice.name } : undefined;
498
+ }
499
+ if (choice.type === "function") {
500
+ const name = "function" in choice ? choice.function?.name : choice.name;
501
+ return name ? { type: "tool", name } : undefined;
502
+ }
503
+ return undefined;
504
+ }
505
+
506
+ function mapGoogleToolChoice(
507
+ choice?: ToolChoice,
508
+ ): GoogleOptions["toolChoice"] | GoogleGeminiCliOptions["toolChoice"] | GoogleVertexOptions["toolChoice"] {
509
+ if (!choice) return undefined;
510
+ if (typeof choice === "string") {
511
+ if (choice === "required") return "any";
512
+ if (choice === "auto" || choice === "none" || choice === "any") return choice;
513
+ return undefined;
514
+ }
515
+ return "any";
516
+ }
517
+
518
+ function mapOpenAiToolChoice(choice?: ToolChoice): OpenAICompletionsOptions["toolChoice"] {
519
+ if (!choice) return undefined;
520
+ if (typeof choice === "string") {
521
+ if (choice === "any") return "required";
522
+ if (choice === "auto" || choice === "none" || choice === "required") return choice;
523
+ return undefined;
524
+ }
525
+ if (choice.type === "tool") {
526
+ return choice.name ? { type: "function", function: { name: choice.name } } : undefined;
527
+ }
528
+ if (choice.type === "function") {
529
+ const name = "function" in choice ? choice.function?.name : choice.name;
530
+ return name ? { type: "function", function: { name } } : undefined;
531
+ }
532
+ return undefined;
533
+ }
534
+
535
+ function resolveOpenAiReasoningEffort<TApi extends Api>(
536
+ model: Model<TApi>,
537
+ options?: SimpleStreamOptions,
538
+ ): Effort | undefined {
539
+ const reasoning = options?.reasoning;
540
+ if (!reasoning || !model.reasoning) return undefined;
541
+ return requireSupportedEffort(model, reasoning);
542
+ }
543
+
544
+ const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
545
+
546
+ function mapOptionsForApi<TApi extends Api>(
547
+ model: Model<TApi>,
548
+ options?: SimpleStreamOptions,
549
+ apiKey?: string,
550
+ ): OptionsForApi<TApi> {
551
+ const base = {
552
+ temperature: options?.temperature,
553
+ topP: options?.topP,
554
+ topK: options?.topK,
555
+ minP: options?.minP,
556
+ presencePenalty: options?.presencePenalty,
557
+ repetitionPenalty: options?.repetitionPenalty,
558
+ maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
559
+ signal: options?.signal,
560
+ apiKey: apiKey || options?.apiKey,
561
+ cacheRetention: options?.cacheRetention,
562
+ headers: options?.headers,
563
+ initiatorOverride: options?.initiatorOverride,
564
+ maxRetryDelayMs: options?.maxRetryDelayMs,
565
+ metadata: options?.metadata,
566
+ sessionId: options?.sessionId,
567
+ providerSessionState: options?.providerSessionState,
568
+ onPayload: options?.onPayload,
569
+ onResponse: options?.onResponse,
570
+ onSseEvent: options?.onSseEvent,
571
+ execHandlers: options?.execHandlers,
572
+ };
573
+
574
+ switch (model.api) {
575
+ case "anthropic-messages": {
576
+ // Explicitly disable thinking when reasoning is not specified or model doesn't support it
577
+ const reasoning = options?.reasoning;
578
+ if (!reasoning || !model.reasoning) {
579
+ return castApi<"anthropic-messages">({
580
+ ...base,
581
+ thinkingEnabled: false,
582
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
583
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
584
+ serviceTier: options?.serviceTier,
585
+ });
586
+ }
587
+
588
+ let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
589
+ if (thinkingBudget <= 0) {
590
+ return castApi<"anthropic-messages">({
591
+ ...base,
592
+ thinkingEnabled: false,
593
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
594
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
595
+ serviceTier: options?.serviceTier,
596
+ });
597
+ }
598
+
599
+ // For Opus 4.6+ and Sonnet 4.6+: use adaptive thinking with effort level
600
+ // For older models: use budget-based thinking
601
+ if (model.thinking?.mode === "anthropic-adaptive") {
602
+ const effort = mapEffortToAnthropicAdaptiveEffort(model, reasoning);
603
+ return castApi<"anthropic-messages">({
604
+ ...base,
605
+ thinkingEnabled: true,
606
+ effort,
607
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
608
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
609
+ serviceTier: options?.serviceTier,
610
+ });
611
+ }
612
+
613
+ if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
614
+ return castApi<"anthropic-messages">({
615
+ ...base,
616
+ thinkingEnabled: true,
617
+ thinkingBudgetTokens: thinkingBudget,
618
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
619
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
620
+ serviceTier: options?.serviceTier,
621
+ });
622
+ }
623
+
624
+ // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
625
+ const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
626
+
627
+ // If not enough room for thinking + output, reduce thinking budget
628
+ if (maxTokens <= thinkingBudget) {
629
+ thinkingBudget = maxTokens - MIN_OUTPUT_TOKENS;
630
+ }
631
+
632
+ // If thinking budget is too low, disable thinking
633
+ if (thinkingBudget <= 0) {
634
+ return castApi<"anthropic-messages">({
635
+ ...base,
636
+ thinkingEnabled: false,
637
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
638
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
639
+ serviceTier: options?.serviceTier,
640
+ });
641
+ } else {
642
+ return castApi<"anthropic-messages">({
643
+ ...base,
644
+ maxTokens,
645
+ thinkingEnabled: true,
646
+ thinkingBudgetTokens: thinkingBudget,
647
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
648
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
649
+ serviceTier: options?.serviceTier,
650
+ });
651
+ }
652
+ }
653
+
654
+ case "bedrock-converse-stream": {
655
+ const bedrockBase: BedrockOptions = {
656
+ ...base,
657
+ reasoning: options?.reasoning,
658
+ thinkingBudgets: options?.thinkingBudgets,
659
+ toolChoice: mapAnthropicToolChoice(options?.toolChoice),
660
+ thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
661
+ };
662
+ // Adaptive mode sends effort directly, no budget_tokens — skip budget inflation.
663
+ if (model.thinking?.mode === "anthropic-adaptive") {
664
+ return castApi<"bedrock-converse-stream">(bedrockBase);
665
+ }
666
+ const budgetInfo = resolveBedrockThinkingBudget(model as Model<"bedrock-converse-stream">, options);
667
+ if (!budgetInfo) return bedrockBase as OptionsForApi<TApi>;
668
+ let maxTokens = bedrockBase.maxTokens ?? model.maxTokens;
669
+ let thinkingBudgets = bedrockBase.thinkingBudgets;
670
+ if (maxTokens <= budgetInfo.budget) {
671
+ const desiredMaxTokens = Math.min(model.maxTokens, budgetInfo.budget + MIN_OUTPUT_TOKENS);
672
+ if (desiredMaxTokens > maxTokens) {
673
+ maxTokens = desiredMaxTokens;
674
+ }
675
+ }
676
+ if (maxTokens <= budgetInfo.budget) {
677
+ const adjustedBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
678
+ thinkingBudgets = { ...(thinkingBudgets ?? {}), [budgetInfo.level]: adjustedBudget };
679
+ }
680
+ return castApi<"bedrock-converse-stream">({ ...bedrockBase, maxTokens, thinkingBudgets });
681
+ }
682
+
683
+ case "openai-completions":
684
+ return castApi<"openai-completions">({
685
+ ...base,
686
+ reasoning: resolveOpenAiReasoningEffort(model, options),
687
+ disableReasoning: options?.disableReasoning,
688
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
689
+ serviceTier: options?.serviceTier,
690
+ });
691
+
692
+ case "openai-responses":
693
+ return castApi<"openai-responses">({
694
+ ...base,
695
+ reasoning: resolveOpenAiReasoningEffort(model, options),
696
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
697
+ serviceTier: options?.serviceTier,
698
+ reasoningSummary: options?.hideThinkingSummary ? null : undefined,
699
+ });
700
+
701
+ case "azure-openai-responses":
702
+ return castApi<"azure-openai-responses">({
703
+ ...base,
704
+ reasoning: resolveOpenAiReasoningEffort(model, options),
705
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
706
+ serviceTier: options?.serviceTier,
707
+ reasoningSummary: options?.hideThinkingSummary ? null : undefined,
708
+ });
709
+
710
+ case "openai-codex-responses":
711
+ return castApi<"openai-codex-responses">({
712
+ ...base,
713
+ reasoning: resolveOpenAiReasoningEffort(model, options),
714
+ toolChoice: mapOpenAiToolChoice(options?.toolChoice),
715
+ serviceTier: options?.serviceTier,
716
+ preferWebsockets: options?.preferWebsockets,
717
+ reasoningSummary: options?.hideThinkingSummary ? null : undefined,
718
+ });
719
+
720
+ case "google-generative-ai": {
721
+ // Explicitly disable thinking when reasoning is not specified or model doesn't support it
722
+ // This is needed because Gemini has "dynamic thinking" enabled by default
723
+ const reasoning = options?.reasoning;
724
+ if (!reasoning || !model.reasoning) {
725
+ return castApi<"google-generative-ai">({
726
+ ...base,
727
+ thinking: { enabled: false },
728
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
729
+ });
730
+ }
731
+
732
+ const googleModel = model as Model<"google-generative-ai">;
733
+ const effort = requireSupportedEffort(googleModel, reasoning);
734
+
735
+ // Gemini 3+ models use thinkingLevel exclusively instead of thinkingBudget.
736
+ // https://ai.google.dev/gemini-api/docs/thinking#set-budget
737
+ if (googleModel.thinking?.mode === "google-level") {
738
+ return castApi<"google-generative-ai">({
739
+ ...base,
740
+ thinking: {
741
+ enabled: true,
742
+ level: mapEffortToGoogleThinkingLevel(googleModel, effort),
743
+ },
744
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
745
+ });
746
+ }
747
+
748
+ return castApi<"google-gemini-cli">({
749
+ ...base,
750
+ thinking: {
751
+ enabled: true,
752
+ budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
753
+ },
754
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
755
+ });
756
+ }
757
+
758
+ case "google-gemini-cli": {
759
+ const reasoning = options?.reasoning;
760
+ if (!reasoning || !model.reasoning) {
761
+ return castApi<"google-gemini-cli">({
762
+ ...base,
763
+ thinking: { enabled: false },
764
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
765
+ });
766
+ }
767
+
768
+ const effort = requireSupportedEffort(model, reasoning);
769
+
770
+ // Gemini 3+ models use thinkingLevel instead of thinkingBudget
771
+ if (model.thinking?.mode === "google-level") {
772
+ return castApi<"google-gemini-cli">({
773
+ ...base,
774
+ thinking: {
775
+ enabled: true,
776
+ level: mapEffortToGoogleThinkingLevel(model, effort),
777
+ },
778
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
779
+ });
780
+ }
781
+
782
+ let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
783
+
784
+ // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
785
+ const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
786
+
787
+ // If not enough room for thinking + output, reduce thinking budget
788
+ if (maxTokens <= thinkingBudget) {
789
+ thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
790
+ }
791
+
792
+ // If thinking budget is too low, disable thinking
793
+ if (thinkingBudget <= 0) {
794
+ return castApi<"google-gemini-cli">({
795
+ ...base,
796
+ thinking: { enabled: false },
797
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
798
+ });
799
+ } else {
800
+ return castApi<"google-gemini-cli">({
801
+ ...base,
802
+ maxTokens,
803
+ thinking: { enabled: true, budgetTokens: thinkingBudget },
804
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
805
+ });
806
+ }
807
+ }
808
+
809
+ case "google-vertex": {
810
+ // Explicitly disable thinking when reasoning is not specified or model doesn't support it
811
+ const reasoning = options?.reasoning;
812
+ if (!reasoning || !model.reasoning) {
813
+ return castApi<"google-vertex">({
814
+ ...base,
815
+ thinking: { enabled: false },
816
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
817
+ });
818
+ }
819
+
820
+ const vertexModel = model as Model<"google-vertex">;
821
+ const effort = requireSupportedEffort(vertexModel, reasoning);
822
+ const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
823
+
824
+ if (geminiModel.thinking?.mode === "google-level") {
825
+ return castApi<"google-vertex">({
826
+ ...base,
827
+ thinking: {
828
+ enabled: true,
829
+ level: mapEffortToGoogleThinkingLevel(geminiModel, effort),
830
+ },
831
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
832
+ });
833
+ }
834
+
835
+ return castApi<"google-vertex">({
836
+ ...base,
837
+ thinking: {
838
+ enabled: true,
839
+ budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
840
+ },
841
+ toolChoice: mapGoogleToolChoice(options?.toolChoice),
842
+ });
843
+ }
844
+
845
+ case "ollama-chat":
846
+ return castApi<"ollama-chat">({
847
+ ...base,
848
+ reasoning: resolveOpenAiReasoningEffort(model, options),
849
+ toolChoice: options?.toolChoice,
850
+ });
851
+
852
+ case "cursor-agent": {
853
+ const execHandlers = options?.cursorExecHandlers ?? options?.execHandlers;
854
+ const onToolResult = options?.cursorOnToolResult ?? execHandlers?.onToolResult;
855
+ return castApi<"cursor-agent">({
856
+ ...base,
857
+ execHandlers,
858
+ onToolResult,
859
+ });
860
+ }
861
+
862
+ default:
863
+ throw new Error(`Unhandled API in mapOptionsForApi: ${model.api}`);
864
+ }
865
+ }
866
+
867
+ function getGoogleBudget(
868
+ model: Model<"google-generative-ai">,
869
+ effort: Effort,
870
+ customBudgets?: ThinkingBudgets,
871
+ ): number {
872
+ requireSupportedEffort(model, effort);
873
+
874
+ // Custom budgets take precedence if provided for this level
875
+ if (customBudgets?.[effort] !== undefined) {
876
+ return customBudgets[effort]!;
877
+ }
878
+
879
+ // See https://ai.google.dev/gemini-api/docs/thinking#set-budget
880
+ if (model.id.includes("2.5-")) {
881
+ switch (effort) {
882
+ case "minimal":
883
+ return 128;
884
+ case "low":
885
+ return 2048;
886
+ case "medium":
887
+ return 8192;
888
+ default:
889
+ return model.id.includes("2.5-flash") ? 24576 : 32768;
890
+ }
891
+ }
892
+
893
+ // Unknown model - use dynamic
894
+ return -1;
895
+ }