@prometheus-ai/ai 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -1,2904 +0,0 @@
1
- import type { ModelManagerOptions } from "../model-manager";
2
- import { Effort } from "../model-thinking";
3
- import { getBundledModels } from "../models";
4
- import type { Api, Model, Provider, ThinkingConfig } from "../types";
5
- import { isAnthropicOAuthToken, isRecord, toBoolean, toNumber, toPositiveNumber } from "../utils";
6
- import {
7
- fetchOpenAICompatibleModels,
8
- type OpenAICompatibleModelMapperContext,
9
- type OpenAICompatibleModelRecord,
10
- } from "../utils/discovery/openai-compatible";
11
- import { toFireworksPublicModelId } from "../utils/fireworks-model-id";
12
- import { getGitHubCopilotBaseUrl, OPENCODE_HEADERS, parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
13
- import { createBundledReferenceMap, createReferenceResolver } from "./bundled-references";
14
-
15
- const MODELS_DEV_URL = "https://models.dev/api.json";
16
- const ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1";
17
- const ANTHROPIC_OAUTH_BETA =
18
- "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,mid-conversation-system-2026-04-07,advanced-tool-use-2025-11-20,effort-2025-11-24,extended-cache-ttl-2025-04-11";
19
-
20
- export interface ModelsDevModel {
21
- id?: string;
22
- name?: string;
23
- tool_call?: boolean;
24
- reasoning?: boolean;
25
- limit?: {
26
- context?: number;
27
- output?: number;
28
- };
29
- cost?: {
30
- input?: number;
31
- output?: number;
32
- cache_read?: number;
33
- cache_write?: number;
34
- };
35
- modalities?: {
36
- input?: string[];
37
- };
38
- status?: string;
39
- provider?: { npm?: string };
40
- }
41
-
42
- function toModelName(value: unknown, fallback: string): string {
43
- if (typeof value !== "string") {
44
- return fallback;
45
- }
46
- const trimmed = value.trim();
47
- return trimmed.length > 0 ? trimmed : fallback;
48
- }
49
-
50
- function toInputCapabilities(value: unknown): ("text" | "image")[] {
51
- if (!Array.isArray(value)) {
52
- return ["text"];
53
- }
54
- const supportsImage = value.some(item => item === "image");
55
- return supportsImage ? ["text", "image"] : ["text"];
56
- }
57
-
58
- async function fetchModelsDevPayload(fetchImpl: typeof fetch = fetch): Promise<unknown> {
59
- const response = await fetchImpl(MODELS_DEV_URL, {
60
- method: "GET",
61
- headers: { Accept: "application/json" },
62
- });
63
- if (!response.ok) {
64
- throw new Error(`models.dev fetch failed: ${response.status}`);
65
- }
66
- return response.json();
67
- }
68
-
69
- function mapAnthropicModelsDev(payload: unknown, baseUrl: string): Model<"anthropic-messages">[] {
70
- if (!isRecord(payload)) {
71
- return [];
72
- }
73
- const anthropicPayload = payload.anthropic;
74
- if (!isRecord(anthropicPayload)) {
75
- return [];
76
- }
77
- const modelsValue = anthropicPayload.models;
78
- if (!isRecord(modelsValue)) {
79
- return [];
80
- }
81
-
82
- const models: Model<"anthropic-messages">[] = [];
83
- for (const [modelId, rawModel] of Object.entries(modelsValue)) {
84
- if (!isRecord(rawModel)) {
85
- continue;
86
- }
87
- const model = rawModel as ModelsDevModel;
88
- if (model.tool_call !== true) {
89
- continue;
90
- }
91
- models.push({
92
- id: modelId,
93
- name: toModelName(model.name, modelId),
94
- api: "anthropic-messages",
95
- provider: "anthropic",
96
- baseUrl,
97
- reasoning: model.reasoning === true,
98
- input: toInputCapabilities(model.modalities?.input),
99
- cost: {
100
- input: toNumber(model.cost?.input) ?? 0,
101
- output: toNumber(model.cost?.output) ?? 0,
102
- cacheRead: toNumber(model.cost?.cache_read) ?? 0,
103
- cacheWrite: toNumber(model.cost?.cache_write) ?? 0,
104
- },
105
- contextWindow: toPositiveNumber(model.limit?.context, UNK_CONTEXT_WINDOW),
106
- maxTokens: toPositiveNumber(model.limit?.output, UNK_MAX_TOKENS),
107
- });
108
- }
109
-
110
- models.sort((left, right) => left.id.localeCompare(right.id));
111
- return models;
112
- }
113
-
114
- function buildAnthropicDiscoveryHeaders(apiKey: string): Record<string, string> {
115
- const oauthToken = isAnthropicOAuthToken(apiKey);
116
- const headers: Record<string, string> = {
117
- "anthropic-version": "2023-06-01",
118
- "anthropic-dangerous-direct-browser-access": "true",
119
- "anthropic-beta": ANTHROPIC_OAUTH_BETA,
120
- };
121
- if (oauthToken) {
122
- headers.Authorization = `Bearer ${apiKey}`;
123
- } else {
124
- headers["x-api-key"] = apiKey;
125
- }
126
- return headers;
127
- }
128
-
129
- function buildAnthropicReferenceMap(
130
- modelsDevModels: readonly Model<"anthropic-messages">[],
131
- ): Map<string, Model<"anthropic-messages">> {
132
- const merged = new Map<string, Model<"anthropic-messages">>();
133
- for (const model of modelsDevModels) {
134
- merged.set(model.id, model);
135
- }
136
- // Anthropic /v1/models does not carry token limits, so bundled metadata stays canonical
137
- // for known models while models.dev only fills gaps for newly discovered ids.
138
- const bundledModels = getBundledModels("anthropic").filter(
139
- (model): model is Model<"anthropic-messages"> => model.api === "anthropic-messages",
140
- );
141
- for (const model of bundledModels) {
142
- merged.set(model.id, model);
143
- }
144
- return merged;
145
- }
146
-
147
- function mapWithBundledReference<TApi extends Api>(
148
- entry: OpenAICompatibleModelRecord,
149
- defaults: Model<TApi>,
150
- reference: Model<TApi> | undefined,
151
- ): Model<TApi> {
152
- const name = toModelName(entry.name, reference?.name ?? defaults.name);
153
- if (!reference) {
154
- return {
155
- ...defaults,
156
- name,
157
- };
158
- }
159
- return {
160
- ...reference,
161
- id: defaults.id,
162
- name,
163
- baseUrl: defaults.baseUrl,
164
- contextWindow: toPositiveNumber(entry.context_length, reference.contextWindow),
165
- maxTokens: toPositiveNumber(entry.max_completion_tokens, reference.maxTokens),
166
- };
167
- }
168
-
169
- function normalizeAnthropicBaseUrl(baseUrl: string | undefined, fallback: string): string {
170
- const value = baseUrl?.trim();
171
- if (!value) {
172
- return fallback;
173
- }
174
- return value.endsWith("/") ? value.slice(0, -1) : value;
175
- }
176
-
177
- function toAnthropicDiscoveryBaseUrl(baseUrl: string): string {
178
- return baseUrl.endsWith("/v1") ? baseUrl : `${baseUrl}/v1`;
179
- }
180
-
181
- function normalizeOllamaBaseUrl(baseUrl?: string): string {
182
- const value = baseUrl?.trim();
183
- if (!value) {
184
- return "http://127.0.0.1:11434/v1";
185
- }
186
- const trimmed = value.endsWith("/") ? value.slice(0, -1) : value;
187
- return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
188
- }
189
-
190
- function toOllamaNativeBaseUrl(baseUrl: string): string {
191
- return baseUrl.endsWith("/v1") ? baseUrl.slice(0, -3) : baseUrl;
192
- }
193
-
194
- async function fetchOllamaNativeModels(
195
- baseUrl: string,
196
- resolveMetadata: (modelId: string) => Promise<OllamaResolvedMetadata>,
197
- ): Promise<Model<"openai-responses">[] | null> {
198
- const nativeBaseUrl = toOllamaNativeBaseUrl(baseUrl);
199
- let response: Response;
200
- try {
201
- response = await fetch(`${nativeBaseUrl}/api/tags`, {
202
- method: "GET",
203
- headers: { Accept: "application/json" },
204
- });
205
- } catch {
206
- return null;
207
- }
208
- if (!response.ok) {
209
- return null;
210
- }
211
- const payload = (await response.json()) as { models?: Array<{ name?: string; model?: string }> };
212
- const entries = payload.models ?? [];
213
- const resolved = await Promise.all(
214
- entries.map(async (entry): Promise<Model<"openai-responses"> | null> => {
215
- const id = entry.model ?? entry.name;
216
- if (!id) return null;
217
- const metadata = await resolveMetadata(id);
218
- return {
219
- id,
220
- name: entry.name ?? id,
221
- api: "openai-responses",
222
- provider: "ollama",
223
- baseUrl,
224
- reasoning: metadata.reasoning ?? false,
225
- thinking: metadata.thinking,
226
- input: metadata.input ?? ["text"],
227
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
228
- contextWindow: metadata.contextWindow,
229
- maxTokens: metadata.maxTokens,
230
- };
231
- }),
232
- );
233
- const models: Model<"openai-responses">[] = resolved.filter((m): m is Model<"openai-responses"> => m !== null);
234
- return models.sort((left, right) => left.id.localeCompare(right.id));
235
- }
236
-
237
- /**
238
- * Fallback context window for Ollama models when `/api/show` is unavailable
239
- * or omits a `model_info.<arch>.context_length` field. Matches the size
240
- * Ollama's cloud catalog reports for stock models.
241
- */
242
- const OLLAMA_FALLBACK_CONTEXT_WINDOW = 128_000;
243
- /** Cap max output tokens at a value that matches Prometheus's other openai-responses defaults. */
244
- const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
245
- /**
246
- * Ollama's OpenAI-compatible `reasoning.effort` only accepts
247
- * `high|medium|low|max|none`; passing Prometheus's `minimal`/`xhigh` levels verbatim
248
- * makes the server reject the turn with HTTP 400 `invalid reasoning value`.
249
- * Map the two unsupported levels onto the closest accepted ones (`low`/`max`).
250
- */
251
- const OLLAMA_REASONING_EFFORT_MAP = { minimal: "low", xhigh: "max" } as const;
252
-
253
- /** Stamp the Ollama reasoning-effort map onto a reasoning-capable model. */
254
- function applyOllamaReasoningCompat(model: Model<"openai-responses">): void {
255
- if (!model.reasoning) return;
256
- model.compat = {
257
- ...model.compat,
258
- reasoningEffortMap: { ...OLLAMA_REASONING_EFFORT_MAP, ...model.compat?.reasoningEffortMap },
259
- };
260
- }
261
-
262
- interface OllamaResolvedMetadata {
263
- contextWindow: number;
264
- maxTokens: number;
265
- capabilities?: string[];
266
- reasoning?: boolean;
267
- thinking?: ThinkingConfig;
268
- input?: ("text" | "image")[];
269
- }
270
-
271
- interface OllamaShowMetadata {
272
- contextWindow?: number;
273
- maxTokens?: number;
274
- capabilities?: string[];
275
- reasoning?: boolean;
276
- thinking?: ThinkingConfig;
277
- input?: ("text" | "image")[];
278
- }
279
-
280
- function getOllamaContextWindow(modelInfo: Record<string, unknown> | undefined): number | undefined {
281
- if (!modelInfo) {
282
- return undefined;
283
- }
284
- for (const [key, value] of Object.entries(modelInfo)) {
285
- if (typeof value !== "number" || value <= 0) {
286
- continue;
287
- }
288
- if (key.endsWith(".context_length") || key.endsWith(".num_ctx") || key.endsWith(".context_window")) {
289
- return value;
290
- }
291
- }
292
- }
293
-
294
- function getOllamaCapabilities(value: unknown): string[] | undefined {
295
- if (!Array.isArray(value)) {
296
- return undefined;
297
- }
298
- return value.filter((item): item is string => typeof item === "string");
299
- }
300
-
301
- function getOllamaThinkingConfig(capabilities: string[] | undefined): ThinkingConfig | undefined {
302
- if (!capabilities?.includes("thinking")) {
303
- return undefined;
304
- }
305
- return {
306
- mode: "effort",
307
- minLevel: Effort.Minimal,
308
- maxLevel: Effort.High,
309
- };
310
- }
311
-
312
- /**
313
- * Query Ollama's `/api/show` endpoint for a single model and pull native
314
- * context and capability metadata from the response. Returns `undefined` when
315
- * the endpoint is unavailable so callers can layer their own fallback.
316
- */
317
- async function fetchOllamaShowMetadata(
318
- nativeBaseUrl: string,
319
- modelId: string,
320
- ): Promise<OllamaShowMetadata | undefined> {
321
- try {
322
- const response = await fetch(`${nativeBaseUrl}/api/show`, {
323
- method: "POST",
324
- headers: { "Content-Type": "application/json", Accept: "application/json" },
325
- body: JSON.stringify({ model: modelId }),
326
- });
327
- if (!response.ok) {
328
- return undefined;
329
- }
330
- const payload = (await response.json()) as { capabilities?: unknown; model_info?: Record<string, unknown> };
331
- const capabilities = getOllamaCapabilities(payload.capabilities);
332
- const contextWindow = getOllamaContextWindow(payload.model_info);
333
- return {
334
- contextWindow,
335
- maxTokens: contextWindow ? OLLAMA_DEFAULT_MAX_TOKENS : undefined,
336
- capabilities,
337
- reasoning: capabilities ? capabilities.includes("thinking") : undefined,
338
- thinking: getOllamaThinkingConfig(capabilities),
339
- input: capabilities
340
- ? capabilities.includes("vision")
341
- ? (["text", "image"] as Array<"text" | "image">)
342
- : (["text"] as Array<"text">)
343
- : undefined,
344
- };
345
- } catch {
346
- // fall through; caller decides on the fallback
347
- }
348
- return undefined;
349
- }
350
-
351
- /**
352
- * Build a resolver that fetches `/api/show` metadata per model id and caches
353
- * the result in-memory for the lifetime of the manager. Successful lookups are
354
- * cached so repeated `fetchDynamicModels` calls do not refetch; failed
355
- * lookups stay uncached so a later refresh can recover.
356
- */
357
- function createOllamaMetadataResolver(nativeBaseUrl: string): (modelId: string) => Promise<OllamaResolvedMetadata> {
358
- const cache = new Map<string, Promise<OllamaResolvedMetadata>>();
359
- return modelId => {
360
- const cached = cache.get(modelId);
361
- if (cached) return cached;
362
- const pending = (async () => {
363
- const metadata = await fetchOllamaShowMetadata(nativeBaseUrl, modelId);
364
- if (!metadata) {
365
- cache.delete(modelId);
366
- return { contextWindow: OLLAMA_FALLBACK_CONTEXT_WINDOW, maxTokens: OLLAMA_DEFAULT_MAX_TOKENS };
367
- }
368
- return {
369
- ...metadata,
370
- contextWindow: metadata.contextWindow ?? OLLAMA_FALLBACK_CONTEXT_WINDOW,
371
- maxTokens: metadata.maxTokens ?? OLLAMA_DEFAULT_MAX_TOKENS,
372
- };
373
- })();
374
- cache.set(modelId, pending);
375
- void pending.catch(() => cache.delete(modelId));
376
- return pending;
377
- };
378
- }
379
-
380
- const OPENAI_NON_RESPONSES_PREFIXES = [
381
- "text-embedding",
382
- "whisper-",
383
- "tts-",
384
- "omni-moderation",
385
- "omni-transcribe",
386
- "omni-speech",
387
- "gpt-image-",
388
- "gpt-realtime",
389
- ] as const;
390
-
391
- function isLikelyOpenAIResponsesModelId(id: string, references: Map<string, Model<"openai-responses">>): boolean {
392
- const trimmed = id.trim();
393
- if (!trimmed) {
394
- return false;
395
- }
396
- if (references.has(trimmed)) {
397
- return true;
398
- }
399
- const normalized = trimmed.toLowerCase();
400
- if (OPENAI_NON_RESPONSES_PREFIXES.some(prefix => normalized.startsWith(prefix))) {
401
- return false;
402
- }
403
- if (normalized.includes("embedding")) {
404
- return false;
405
- }
406
- return (
407
- normalized.startsWith("gpt-") ||
408
- normalized.startsWith("o1") ||
409
- normalized.startsWith("o3") ||
410
- normalized.startsWith("o4") ||
411
- normalized.startsWith("chatgpt")
412
- );
413
- }
414
-
415
- const NANO_GPT_NON_TEXT_MODEL_TOKENS = [
416
- "embedding",
417
- "image",
418
- "vision",
419
- "audio",
420
- "speech",
421
- "transcribe",
422
- "moderation",
423
- "realtime",
424
- "whisper",
425
- "tts",
426
- ] as const;
427
-
428
- /** Regex matching NanoGPT `:thinking` suffixed model IDs (with or without a level). */
429
- const NANO_GPT_THINKING_SUFFIX_RE = /:thinking(:[^:]+)?$/;
430
-
431
- function isLikelyNanoGptTextModelId(id: string): boolean {
432
- const normalized = id.trim().toLowerCase();
433
- if (!normalized) {
434
- return false;
435
- }
436
- if (NANO_GPT_THINKING_SUFFIX_RE.test(normalized)) {
437
- return false;
438
- }
439
- return !NANO_GPT_NON_TEXT_MODEL_TOKENS.some(token => normalized.includes(token));
440
- }
441
-
442
- type SimpleProviderConfig = { apiKey?: string; baseUrl?: string };
443
-
444
- function createSimpleOpenAICompletionsOptions(
445
- providerId: Parameters<typeof getBundledModels>[0],
446
- defaultBaseUrl: string,
447
- config?: SimpleProviderConfig,
448
- ): ModelManagerOptions<"openai-completions"> {
449
- const apiKey = config?.apiKey;
450
- const baseUrl = config?.baseUrl ?? defaultBaseUrl;
451
- const references = createBundledReferenceMap<"openai-completions">(providerId);
452
- return {
453
- providerId,
454
- ...(apiKey && {
455
- fetchDynamicModels: () =>
456
- fetchOpenAICompatibleModels({
457
- api: "openai-completions",
458
- provider: providerId,
459
- baseUrl,
460
- apiKey,
461
- mapModel: (entry, defaults) => {
462
- const reference = references.get(defaults.id);
463
- return mapWithBundledReference(entry, defaults, reference);
464
- },
465
- }),
466
- }),
467
- };
468
- }
469
-
470
- function createSimpleOpenAIResponsesOptions(
471
- providerId: Parameters<typeof getBundledModels>[0],
472
- defaultBaseUrl: string,
473
- config?: SimpleProviderConfig,
474
- ): ModelManagerOptions<"openai-responses"> {
475
- const apiKey = config?.apiKey;
476
- const baseUrl = config?.baseUrl ?? defaultBaseUrl;
477
- const references = createBundledReferenceMap<"openai-responses">(providerId);
478
- return {
479
- providerId,
480
- ...(apiKey && {
481
- fetchDynamicModels: () =>
482
- fetchOpenAICompatibleModels({
483
- api: "openai-responses",
484
- provider: providerId,
485
- baseUrl,
486
- apiKey,
487
- mapModel: (entry, defaults) => {
488
- const reference = references.get(defaults.id);
489
- return mapWithBundledReference(entry, defaults, reference);
490
- },
491
- }),
492
- }),
493
- };
494
- }
495
-
496
- function createSimpleAnthropicProviderOptions(
497
- providerId: Parameters<typeof getBundledModels>[0],
498
- defaultBaseUrlFallback: string,
499
- config?: SimpleProviderConfig,
500
- ): ModelManagerOptions<"anthropic-messages"> {
501
- const apiKey = config?.apiKey;
502
- const baseUrl = normalizeAnthropicBaseUrl(config?.baseUrl, defaultBaseUrlFallback);
503
- const discoveryBaseUrl = toAnthropicDiscoveryBaseUrl(baseUrl);
504
- const references = createBundledReferenceMap<"anthropic-messages">(providerId);
505
- return {
506
- providerId,
507
- ...(apiKey && {
508
- fetchDynamicModels: () =>
509
- fetchOpenAICompatibleModels({
510
- api: "anthropic-messages",
511
- provider: providerId,
512
- baseUrl: discoveryBaseUrl,
513
- headers: buildAnthropicDiscoveryHeaders(apiKey),
514
- mapModel: (entry, defaults) => {
515
- const reference = references.get(defaults.id);
516
- const model = mapWithBundledReference(entry, defaults, reference);
517
- return {
518
- ...model,
519
- name: toModelName(entry.display_name, model.name),
520
- baseUrl,
521
- };
522
- },
523
- }),
524
- }),
525
- };
526
- }
527
-
528
- // ---------------------------------------------------------------------------
529
- // 1. OpenAI
530
- // ---------------------------------------------------------------------------
531
-
532
- export interface OpenAIModelManagerConfig {
533
- apiKey?: string;
534
- baseUrl?: string;
535
- }
536
-
537
- export function openaiModelManagerOptions(config?: OpenAIModelManagerConfig): ModelManagerOptions<"openai-responses"> {
538
- const apiKey = config?.apiKey;
539
- const baseUrl = config?.baseUrl ?? "https://api.openai.com/v1";
540
- const references = createBundledReferenceMap<"openai-responses">("openai");
541
- return {
542
- providerId: "openai",
543
- ...(apiKey && {
544
- fetchDynamicModels: () =>
545
- fetchOpenAICompatibleModels({
546
- api: "openai-responses",
547
- provider: "openai",
548
- baseUrl,
549
- apiKey,
550
- filterModel: (_entry, model) => isLikelyOpenAIResponsesModelId(model.id, references),
551
- mapModel: (entry, defaults) => {
552
- const reference = references.get(defaults.id);
553
- return mapWithBundledReference(entry, defaults, reference);
554
- },
555
- }),
556
- }),
557
- };
558
- }
559
-
560
- // ---------------------------------------------------------------------------
561
- // 2. Groq
562
- // ---------------------------------------------------------------------------
563
-
564
- export interface GroqModelManagerConfig {
565
- apiKey?: string;
566
- baseUrl?: string;
567
- }
568
-
569
- export function groqModelManagerOptions(config?: GroqModelManagerConfig): ModelManagerOptions<"openai-completions"> {
570
- return createSimpleOpenAICompletionsOptions("groq", "https://api.groq.com/openai/v1", config);
571
- }
572
-
573
- // ---------------------------------------------------------------------------
574
- // 3. Cerebras
575
- // ---------------------------------------------------------------------------
576
-
577
- export interface CerebrasModelManagerConfig {
578
- apiKey?: string;
579
- baseUrl?: string;
580
- }
581
-
582
- export function cerebrasModelManagerOptions(
583
- config?: CerebrasModelManagerConfig,
584
- ): ModelManagerOptions<"openai-completions"> {
585
- return createSimpleOpenAICompletionsOptions("cerebras", "https://api.cerebras.ai/v1", config);
586
- }
587
-
588
- // ---------------------------------------------------------------------------
589
- // 4. Hugging Face
590
- // ---------------------------------------------------------------------------
591
-
592
- export interface HuggingfaceModelManagerConfig {
593
- apiKey?: string;
594
- baseUrl?: string;
595
- }
596
-
597
- export function huggingfaceModelManagerOptions(
598
- config?: HuggingfaceModelManagerConfig,
599
- ): ModelManagerOptions<"openai-completions"> {
600
- return createSimpleOpenAICompletionsOptions("huggingface", "https://router.huggingface.co/v1", config);
601
- }
602
-
603
- // ---------------------------------------------------------------------------
604
- // 5. NVIDIA
605
- // ---------------------------------------------------------------------------
606
-
607
- export interface NvidiaModelManagerConfig {
608
- apiKey?: string;
609
- baseUrl?: string;
610
- }
611
-
612
- export function nvidiaModelManagerOptions(
613
- config?: NvidiaModelManagerConfig,
614
- ): ModelManagerOptions<"openai-completions"> {
615
- return createSimpleOpenAICompletionsOptions("nvidia", "https://integrate.api.nvidia.com/v1", config);
616
- }
617
-
618
- // ---------------------------------------------------------------------------
619
- // 6. xAI
620
- // ---------------------------------------------------------------------------
621
-
622
- export interface XaiModelManagerConfig {
623
- apiKey?: string;
624
- baseUrl?: string;
625
- }
626
-
627
- export function xaiModelManagerOptions(config?: XaiModelManagerConfig): ModelManagerOptions<"openai-completions"> {
628
- return createSimpleOpenAICompletionsOptions("xai", "https://api.x.ai/v1", config);
629
- }
630
-
631
- export interface XaiOAuthModelManagerConfig {
632
- apiKey?: string;
633
- baseUrl?: string;
634
- }
635
-
636
- interface XAICuratedModel {
637
- id: string;
638
- contextWindow: number;
639
- name?: string;
640
- /** Whether the model reasons natively. Defaults to true for Grok-4.x family. */
641
- reasoning?: boolean;
642
- /**
643
- * Whether xAI accepts the `reasoning.effort` wire param for this model.
644
- * Default true. When false: picker hides the effort dial (via
645
- * getSupportedEfforts in model-thinking.ts) AND wire-side already omits
646
- * the param via GROK_EFFORT_CAPABLE_PREFIXES in providers/xai-responses.ts.
647
- * Must agree with that allowlist; two truths kept in sync by curated-catalog
648
- * author convention until a follow-up Op: compress unifies them.
649
- */
650
- supportsReasoningEffort?: boolean;
651
- /**
652
- * Input modalities this model accepts. Defaults to `["text"]` when absent.
653
- * Vision-capable Grok models MUST list `"image"` here so the curated layer
654
- * overrides `fetchOpenAICompatibleModels`' default of `["text"]` (which
655
- * otherwise strips image capability on every online refresh).
656
- */
657
- input?: ("text" | "image")[];
658
- }
659
-
660
- // Source of truth for the xai-oauth chat picker. Top of list = headline.
661
- // Context windows from hermes-agent/agent/model_metadata.py:205-220
662
- // ("Values sourced from models.dev (2026-04)"). grok-build is xAI's
663
- // coding-fine-tuned chat model; 512K context per user spec (2026-05-17).
664
- //
665
- // supportsReasoningEffort=false entries reason natively but reject the wire
666
- // `reasoning.effort` param (api.x.ai returns HTTP 400). Mirrors the HTTP-side
667
- // GROK_EFFORT_CAPABLE_PREFIXES allowlist in providers/xai-responses.ts. The
668
- // curated flag is the picker-visible truth; the HTTP allowlist is the wire
669
- // truth. omitReasoningEffort in xai-responses.ts already prevents 400s; this
670
- // fixes the picker UX wart of advertising an inert dial.
671
- export const XAI_OAUTH_CURATED_MODELS: readonly XAICuratedModel[] = [
672
- // grok-build is text-only per the bundled catalog; omit `input` for the default.
673
- { id: "grok-build", contextWindow: 512_000, name: "Grok Build", supportsReasoningEffort: false },
674
- { id: "grok-4.3", contextWindow: 1_000_000, name: "Grok 4.3", input: ["text", "image"] },
675
- // grok-4.20-multi-agent-0309 is text-only per the bundled catalog; omit `input` for the default.
676
- { id: "grok-4.20-multi-agent-0309", contextWindow: 2_000_000, name: "Grok 4.20 (Multi-Agent)" },
677
- {
678
- id: "grok-4.20-0309-reasoning",
679
- contextWindow: 2_000_000,
680
- name: "Grok 4.20 (Reasoning)",
681
- supportsReasoningEffort: false,
682
- input: ["text", "image"],
683
- },
684
- {
685
- id: "grok-4.20-0309-non-reasoning",
686
- contextWindow: 2_000_000,
687
- name: "Grok 4.20 (Non-Reasoning)",
688
- reasoning: false,
689
- input: ["text", "image"],
690
- },
691
- ] as const;
692
-
693
- // xAI /v1/models returns chat, image, voice, and STT entries. Tool surfaces
694
- // route through dedicated tools (generate_image, tts) with their own model
695
- // strings; the chat picker MUST exclude these prefixes or selecting them 400s.
696
- const XAI_NON_CHAT_PREFIXES = ["grok-imagine-", "grok-stt-", "grok-voice-"] as const;
697
-
698
- // Hermes-agent parity: only the `minimal -> low` clamp is applied (see
699
- // hermes-agent/agent/transports/codex.py:92 `_effort_clamp = {"minimal":
700
- // "low"}`). Hermes sends `xhigh` to xAI verbatim and we match that contract
701
- // — let xAI decide if the level is valid for the specific Grok model.
702
- // applyResponsesReasoningParams runs this through `model.compat.reasoningEffortMap`
703
- // at request time, downstream of the omitReasoningEffort gate in xai-responses.ts.
704
- const XAI_REASONING_EFFORT_MAP = { minimal: "low" } as const;
705
-
706
- // Single source of truth for curated → Model fan-in. Used by the static-seed
707
- // and the dynamic overlay/inject paths (applyXAIOAuthCuration) so curated
708
- // reasoning/effort flags survive an online refresh (xAI's /v1/models lacks
709
- // reasoning metadata and fetchOpenAICompatibleModels defaults reasoning to
710
- // false). Caller supplies a `base` Model (either a freshly synthesised seed
711
- // or a dynamic-fetched entry); the helper layers curated fields on top.
712
- // The `minimal -> low` effort clamp (XAI_REASONING_EFFORT_MAP) is always
713
- // merged in so dynamic-fetched models — which arrive without curated
714
- // compat keys — still get the clamp applyResponsesReasoningParams expects.
715
- function mergeCuratedIntoModel(base: Model<"openai-responses">, curated: XAICuratedModel): Model<"openai-responses"> {
716
- const effort = curated.supportsReasoningEffort;
717
- const compat = {
718
- ...(base.compat ?? {}),
719
- reasoningEffortMap: { ...XAI_REASONING_EFFORT_MAP, ...(base.compat?.reasoningEffortMap ?? {}) },
720
- ...(effort === undefined ? {} : { supportsReasoningEffort: effort }),
721
- };
722
- return {
723
- ...base,
724
- contextWindow: curated.contextWindow,
725
- name: curated.name ?? base.name,
726
- reasoning: curated.reasoning ?? true,
727
- input: curated.input ?? base.input,
728
- compat,
729
- };
730
- }
731
-
732
- /**
733
- * Overlay/inject curated xai-oauth metadata onto dynamic-fetch results so
734
- * a successful `online refresh` doesn't regress vision capability, context
735
- * window, reasoning flags, or the effort-dial allowlist.
736
- *
737
- * Three passes:
738
- * 1. Filter `XAI_NON_CHAT_PREFIXES` (picker pollution defense for tool
739
- * surfaces routed through dedicated tools — generate_image, tts).
740
- * 2. Overlay curated metadata onto dynamic-fetch matches. xAI's /v1/models
741
- * does not return context_window or reasoning metadata, so without
742
- * this overlay the runtime falls back to the bundled-reference default
743
- * (effectively 128k context) and `reasoning: false` (suppressing the
744
- * effort dial and stripping thinking metadata downstream).
745
- * 3. Inject curated entries missing from the dynamic fetch. Clones the
746
- * first surviving entry as a template so required Model fields (api,
747
- * provider, baseUrl, cost, etc.) inherit sane defaults. If `filtered`
748
- * is empty (offline / no auth) injection is skipped — the descriptor's
749
- * defaultModel covers the fallback.
750
- *
751
- * Order: curated models first in declaration order; then dynamic remainder
752
- * in original order.
753
- */
754
- function applyXAIOAuthCuration(dynamic: readonly Model<"openai-responses">[]): Model<"openai-responses">[] {
755
- const filtered = dynamic.filter(e => !XAI_NON_CHAT_PREFIXES.some(p => e.id.startsWith(p)));
756
-
757
- const byId = new Map<string, Model<"openai-responses">>(filtered.map(e => [e.id, e]));
758
- for (const curated of XAI_OAUTH_CURATED_MODELS) {
759
- const existing = byId.get(curated.id);
760
- if (existing) {
761
- byId.set(curated.id, mergeCuratedIntoModel(existing, curated));
762
- }
763
- }
764
-
765
- const template = filtered[0];
766
- if (template) {
767
- for (const curated of XAI_OAUTH_CURATED_MODELS) {
768
- if (!byId.has(curated.id)) {
769
- // Reset id/name on the template before merging so the helper's
770
- // `curated.name ?? base.name` clause falls back to curated.id
771
- // (the inject contract), not to the unrelated template's label.
772
- const base: Model<"openai-responses"> = { ...template, id: curated.id, name: curated.id };
773
- byId.set(curated.id, mergeCuratedIntoModel(base, curated));
774
- }
775
- }
776
- }
777
-
778
- const curatedIds = new Set(XAI_OAUTH_CURATED_MODELS.map(c => c.id));
779
- const curatedFirst = XAI_OAUTH_CURATED_MODELS.map(c => byId.get(c.id)).filter(
780
- (e): e is Model<"openai-responses"> => e !== undefined,
781
- );
782
- const rest = filtered.filter(e => !curatedIds.has(e.id));
783
- return [...curatedFirst, ...rest];
784
- }
785
-
786
- /**
787
- * Render `XAI_OAUTH_CURATED_MODELS` as full `Model<"openai-responses">` entries.
788
- *
789
- * Single source of truth for the curated to Model fan-in, consumed by both
790
- * - {@link xaiOAuthModelManagerOptions} (runtime static seed handed to the model
791
- * manager so the picker is populated on a fresh login), and
792
- * - `packages/ai/scripts/generate-models.ts` (bundles the same entries into
793
- * `models.json`, so the synchronous `ModelRegistry.#loadModels()` boot path
794
- * sees `xai-oauth` without waiting for a refresh — fixes the boot-time
795
- * default-model reset when `modelRoles.default = "xai-oauth/<id>"`).
796
- *
797
- * `reasoning` defaults to `true` for the Grok-4.x family; the explicit
798
- * `grok-4.20-0309-non-reasoning` entry opts out via `XAICuratedModel.reasoning`.
799
- * `maxTokens` uses `UNK_MAX_TOKENS` so id-keyed overlays from a successful
800
- * dynamic fetch merge cleanly. Mirrors
801
- * `hermes-agent/hermes_cli/models.py:_XAI_STATIC_FALLBACK`.
802
- */
803
- export function buildXaiOAuthStaticSeed(baseUrl?: string): Model<"openai-responses">[] {
804
- const resolvedBaseUrl = baseUrl ?? "https://api.x.ai/v1";
805
- return XAI_OAUTH_CURATED_MODELS.map(curated => {
806
- // Synthesise a bare base then layer curated metadata via the same helper
807
- // the dynamic overlay/inject paths use. `name: curated.id` is a sentinel
808
- // the helper rewrites to `curated.name ?? base.name`, so curated.name
809
- // wins when set.
810
- const base: Model<"openai-responses"> = {
811
- id: curated.id,
812
- name: curated.id,
813
- api: "openai-responses",
814
- provider: "xai-oauth",
815
- baseUrl: resolvedBaseUrl,
816
- reasoning: true,
817
- input: ["text"],
818
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
819
- contextWindow: curated.contextWindow,
820
- maxTokens: UNK_MAX_TOKENS,
821
- compat: { reasoningEffortMap: XAI_REASONING_EFFORT_MAP },
822
- };
823
- return mergeCuratedIntoModel(base, curated);
824
- });
825
- }
826
-
827
- export function xaiOAuthModelManagerOptions(
828
- config?: XaiOAuthModelManagerConfig,
829
- ): ModelManagerOptions<"openai-responses"> {
830
- const defaultBaseUrl = "https://api.x.ai/v1";
831
- const resolvedBaseUrl = config?.baseUrl ?? defaultBaseUrl;
832
- const base = createSimpleOpenAIResponsesOptions(
833
- "xai-oauth" as Parameters<typeof getBundledModels>[0],
834
- defaultBaseUrl,
835
- config,
836
- );
837
- // Static seed handed to the runtime model manager so the picker populates on
838
- // a fresh login even before `fetchDynamicModels` fires (it is gated on
839
- // `config.apiKey` at construction time, and OAuth tokens resolve later via
840
- // AuthStorage). `generate-models.ts` calls the same builder so `models.json`
841
- // carries these entries too — making the synchronous `#loadModels()` boot
842
- // path honor `modelRoles.default = "xai-oauth/<id>"` without `await refresh()`.
843
- const staticModels = buildXaiOAuthStaticSeed(resolvedBaseUrl);
844
- if (!base.fetchDynamicModels) {
845
- return { ...base, staticModels };
846
- }
847
- // Wrap fetchDynamicModels so an `online refresh` against xAI's /v1/models
848
- // runs through applyXAIOAuthCuration — preserves curated context windows,
849
- // vision modality, reasoning flags, and filters tool-only model ids
850
- // (grok-imagine-*, grok-stt-*, grok-voice-*) from the chat picker.
851
- const inner = base.fetchDynamicModels;
852
- return {
853
- ...base,
854
- staticModels,
855
- fetchDynamicModels: async () => {
856
- const dynamic = await inner();
857
- return dynamic == null ? dynamic : applyXAIOAuthCuration(dynamic);
858
- },
859
- };
860
- }
861
-
862
- // ---------------------------------------------------------------------------
863
- // 6.5 DeepSeek
864
- // ---------------------------------------------------------------------------
865
-
866
- export interface DeepSeekModelManagerConfig {
867
- apiKey?: string;
868
- baseUrl?: string;
869
- }
870
-
871
- export function deepseekModelManagerOptions(
872
- config?: DeepSeekModelManagerConfig,
873
- ): ModelManagerOptions<"openai-completions"> {
874
- return createSimpleOpenAICompletionsOptions("deepseek", "https://api.deepseek.com", config);
875
- }
876
- // ---------------------------------------------------------------------------
877
- // 6.7 Zhipu Coding Plan
878
- // ---------------------------------------------------------------------------
879
-
880
- export interface ZhipuCodingPlanModelManagerConfig {
881
- apiKey?: string;
882
- baseUrl?: string;
883
- }
884
-
885
- export function zhipuCodingPlanModelManagerOptions(
886
- config?: ZhipuCodingPlanModelManagerConfig,
887
- ): ModelManagerOptions<"openai-completions"> {
888
- const apiKey = config?.apiKey;
889
- const baseUrl = config?.baseUrl ?? "https://open.bigmodel.cn/api/coding/paas/v4";
890
- return {
891
- providerId: "zhipu-coding-plan",
892
- ...(apiKey && {
893
- fetchDynamicModels: () =>
894
- fetchOpenAICompatibleModels({
895
- api: "openai-completions",
896
- provider: "zhipu-coding-plan",
897
- baseUrl,
898
- apiKey,
899
- mapModel: (
900
- _entry: OpenAICompatibleModelRecord,
901
- defaults: Model<"openai-completions">,
902
- _context: OpenAICompatibleModelMapperContext<"openai-completions">,
903
- ): Model<"openai-completions"> => {
904
- const id = defaults.id;
905
- return {
906
- ...defaults,
907
- reasoning: ZHIPU_REASONING_MODELS[id] === true || id.includes("thinking"),
908
- input: ZHIPU_VISION_PATTERN.test(id) ? (["text", "image"] as const) : ["text"],
909
- compat: {
910
- thinkingFormat: "zai",
911
- reasoningContentField: "reasoning_content",
912
- supportsDeveloperRole: false,
913
- },
914
- };
915
- },
916
- }),
917
- }),
918
- };
919
- }
920
-
921
- // Reasoning-capable GLM models on the BigModel coding-plan SKU. Keep this
922
- // explicit rather than regex-matching `glm-[45]\.\d` so newly-added integers
923
- // like `glm-5` / `glm-5-turbo` are covered and unrelated future SKUs (e.g.
924
- // `glm-5-preview`) do not silently flip into thinking mode.
925
- const ZHIPU_REASONING_MODELS: Readonly<Record<string, true>> = {
926
- "glm-4.5": true,
927
- "glm-4.5-air": true,
928
- "glm-4.6": true,
929
- "glm-4.7": true,
930
- "glm-5": true,
931
- "glm-5-turbo": true,
932
- "glm-5.1": true,
933
- };
934
-
935
- // Vision-capable GLM models follow the `glm-<N>[.<N>]v[-<variant>]` shape
936
- // (e.g. `glm-4v`, `glm-4.5v`, `glm-4v-plus`). The previous `id.includes("v")`
937
- // check matched anything with a `v` — including the non-vision `glm-5-preview`.
938
- const ZHIPU_VISION_PATTERN = /^glm-[45](?:\.\d+)?v(?:-|$)/;
939
-
940
- // ---------------------------------------------------------------------------
941
- // 7.5 Fireworks
942
- // ---------------------------------------------------------------------------
943
-
944
- /**
945
- * Fireworks-published cap for the Kimi K2 family. Fireworks' `/v1/models`
946
- * envelope generically reports `max_completion_tokens: 65536` for every Kimi
947
- * deployment, but Kimi K2 (instruct / thinking / turbo) on Fireworks is
948
- * documented to ship long reasoning traces that should be bounded — capping
949
- * at 32,768 prevents handing callers a budget the router cannot honor.
950
- * See https://github.com/uttamtrivedi/Prometheus/issues/1849.
951
- */
952
- export const FIREWORKS_KIMI_MAX_TOKENS = 32_768;
953
-
954
- /**
955
- * Returns true for any Kimi K2.x public model id served by Fireworks-backed
956
- * providers (`fireworks` direct, `firepass` router). Matches both the public
957
- * catalog id (`kimi-k2.5`, `kimi-k2.6`, `kimi-k2.6-turbo`) and the canonical
958
- * Fireworks wire id (`accounts/fireworks/{models,routers}/kimi-k2…`).
959
- */
960
- export function isFireworksKimiK2ModelId(modelId: string): boolean {
961
- const trimmed = modelId.toLowerCase();
962
- if (trimmed.startsWith("kimi-k2")) return true;
963
- return /\/kimi-k2(?:p\d+)?(?:[._-]|$)/.test(trimmed);
964
- }
965
-
966
- /**
967
- * Clamp the Kimi K2 family's `maxTokens` to {@link FIREWORKS_KIMI_MAX_TOKENS}
968
- * on Fireworks-backed providers, leaving every other model untouched.
969
- */
970
- export function clampFireworksKimiMaxTokens(modelId: string, candidate: number): number {
971
- return isFireworksKimiK2ModelId(modelId) ? Math.min(candidate, FIREWORKS_KIMI_MAX_TOKENS) : candidate;
972
- }
973
-
974
- export interface FireworksModelManagerConfig {
975
- apiKey?: string;
976
- baseUrl?: string;
977
- }
978
-
979
- function toFireworksModelName(entry: OpenAICompatibleModelRecord, fallback: string): string {
980
- const name = toModelName(entry.name, "");
981
- if (name) return name;
982
- const id = typeof entry.id === "string" ? entry.id : fallback;
983
- const shortName = id.split("/").at(-1) ?? fallback;
984
- if (fallback !== id && fallback !== shortName) return fallback;
985
- return shortName
986
- .split("-")
987
- .filter(Boolean)
988
- .map(part => part.charAt(0).toUpperCase() + part.slice(1))
989
- .join(" ");
990
- }
991
-
992
- function createModelsDevReferenceMap<TApi extends Api>(models: readonly Model<Api>[]): Map<string, Model<TApi>> {
993
- const references = new Map<string, Model<TApi>>();
994
- for (const model of models) {
995
- const candidate = model as Model<TApi>;
996
- const existing = references.get(candidate.id);
997
- if (!existing) {
998
- references.set(candidate.id, candidate);
999
- continue;
1000
- }
1001
- if (candidate.contextWindow > existing.contextWindow) {
1002
- references.set(candidate.id, candidate);
1003
- continue;
1004
- }
1005
- if (candidate.contextWindow === existing.contextWindow && candidate.maxTokens > existing.maxTokens) {
1006
- references.set(candidate.id, candidate);
1007
- }
1008
- }
1009
- return references;
1010
- }
1011
-
1012
- async function loadModelsDevReferences<TApi extends Api>(): Promise<Map<string, Model<TApi>>> {
1013
- try {
1014
- const payload = await fetchModelsDevPayload();
1015
- return createModelsDevReferenceMap<TApi>(
1016
- mapModelsDevToModels(payload as Record<string, unknown>, MODELS_DEV_PROVIDER_DESCRIPTORS),
1017
- );
1018
- } catch {
1019
- return new Map<string, Model<TApi>>();
1020
- }
1021
- }
1022
- export function fireworksModelManagerOptions(
1023
- config?: FireworksModelManagerConfig,
1024
- ): ModelManagerOptions<"openai-completions"> {
1025
- const apiKey = config?.apiKey;
1026
- const baseUrl = config?.baseUrl ?? "https://api.fireworks.ai/inference/v1";
1027
- const bundledReferences = createReferenceResolver(createBundledReferenceMap<"openai-completions">("fireworks"));
1028
- return {
1029
- providerId: "fireworks",
1030
- ...(apiKey && {
1031
- fetchDynamicModels: async () => {
1032
- const modelsDevReferences = await loadModelsDevReferences<"openai-completions">();
1033
- return fetchOpenAICompatibleModels({
1034
- api: "openai-completions",
1035
- provider: "fireworks",
1036
- baseUrl,
1037
- apiKey,
1038
- filterModel: entry =>
1039
- toBoolean(entry.supports_chat) === true && toBoolean(entry.supports_tools) === true,
1040
- mapModel: (entry, defaults) => {
1041
- const publicModelId = toFireworksPublicModelId(defaults.id);
1042
- const reference = modelsDevReferences.get(publicModelId) ?? bundledReferences(publicModelId);
1043
- const model = mapWithBundledReference(entry, defaults, reference);
1044
- return {
1045
- ...model,
1046
- id: publicModelId,
1047
- api: "openai-completions",
1048
- provider: "fireworks",
1049
- baseUrl,
1050
- name: toFireworksModelName(entry, model.name),
1051
- input: toBoolean(entry.supports_image_input) === true ? ["text", "image"] : ["text"],
1052
- contextWindow: toPositiveNumber(entry.context_length, model.contextWindow),
1053
- maxTokens: clampFireworksKimiMaxTokens(
1054
- publicModelId,
1055
- toPositiveNumber(entry.max_completion_tokens, model.maxTokens),
1056
- ),
1057
- };
1058
- },
1059
- });
1060
- },
1061
- }),
1062
- };
1063
- }
1064
-
1065
- // ---------------------------------------------------------------------------
1066
- // 7.6 Fire Pass (Fireworks Kimi K2.6 Turbo subscription)
1067
- // ---------------------------------------------------------------------------
1068
-
1069
- export interface FirepassModelManagerConfig {
1070
- apiKey?: string;
1071
- baseUrl?: string;
1072
- }
1073
-
1074
- /**
1075
- * Fire Pass is a Fireworks subscription product that exposes a single router
1076
- * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
1077
- * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
1078
- * never performs dynamic discovery — the bundled catalog entry is canonical.
1079
- * See https://docs.fireworks.ai/firepass.
1080
- */
1081
- export function firepassModelManagerOptions(
1082
- _config?: FirepassModelManagerConfig,
1083
- ): ModelManagerOptions<"openai-completions"> {
1084
- return {
1085
- providerId: "firepass",
1086
- };
1087
- }
1088
-
1089
- // ---------------------------------------------------------------------------
1090
- // 7.7 Wafer (Pass + Serverless)
1091
- // ---------------------------------------------------------------------------
1092
-
1093
- export interface WaferModelManagerConfig {
1094
- apiKey?: string;
1095
- baseUrl?: string;
1096
- }
1097
-
1098
- const WAFER_DEFAULT_BASE_URL = "https://pass.wafer.ai/v1";
1099
- const WAFER_MAX_TOKENS_CAP = 65536;
1100
-
1101
- /**
1102
- * Shared mapper for Wafer's `/v1/models` records.
1103
- *
1104
- * Wafer wraps each entry with a `wafer` envelope describing tier, capabilities,
1105
- * and cents-per-million pricing. The mapper folds that metadata into the
1106
- * canonical `Model<"openai-completions">` shape and applies zai-family thinking
1107
- * compat when the entry advertises reasoning support (GLM-family on the Pass
1108
- * SKU). Cents-per-million → dollars-per-million via /100.
1109
- */
1110
- interface WaferRecord {
1111
- context_length?: unknown;
1112
- tier?: unknown;
1113
- provider?: unknown;
1114
- capabilities?: { vision?: unknown; reasoning?: unknown; tools?: unknown };
1115
- pricing?: {
1116
- input_cents_per_million?: unknown;
1117
- output_cents_per_million?: unknown;
1118
- cache_read_cents_per_million?: unknown;
1119
- };
1120
- display_name?: unknown;
1121
- }
1122
-
1123
- function readWaferRecord(entry: OpenAICompatibleModelRecord): WaferRecord | undefined {
1124
- const raw = (entry as { wafer?: unknown }).wafer;
1125
- return raw && typeof raw === "object" ? (raw as WaferRecord) : undefined;
1126
- }
1127
-
1128
- function mapWaferModel(
1129
- providerId: "wafer-pass" | "wafer-serverless",
1130
- baseUrl: string,
1131
- entry: OpenAICompatibleModelRecord,
1132
- defaults: Model<"openai-completions">,
1133
- ): Model<"openai-completions"> {
1134
- const wafer = readWaferRecord(entry);
1135
- const capabilities = wafer?.capabilities ?? {};
1136
- const reasoning = capabilities.reasoning === true;
1137
- const vision = capabilities.vision === true;
1138
- const contextWindow = toPositiveNumber(
1139
- wafer?.context_length,
1140
- toPositiveNumber((entry as { max_model_len?: unknown }).max_model_len, defaults.contextWindow),
1141
- );
1142
- const maxTokens = Math.min(contextWindow, WAFER_MAX_TOKENS_CAP);
1143
- const pricing = wafer?.pricing ?? {};
1144
- // Wafer's `/v1/models` exposes pricing through `*_cents_per_million` fields,
1145
- // but the values are an internal wholesale unit, not literal cents — across
1146
- // every published Serverless model on wafer.ai the user-facing rate equals
1147
- // `cents × 125 / 10000` (i.e. wholesale × 1.25 / 100; GLM-5.1's `120` →
1148
- // $1.50/M, Kimi-K2.6's `88` → $1.10/M, etc.). The multiply-first form keeps
1149
- // the result a finite dyadic for every observed value.
1150
- // For the Pass SKU the per-token rate is bundled in the flat-rate
1151
- // subscription, so we follow the convention shared with
1152
- // `kimi-code`/`firepass`/`alibaba-coding-plan` and seed every Pass model with
1153
- // `cost: 0` regardless of what the upstream envelope says.
1154
- const isPassSku = providerId === "wafer-pass";
1155
- const cost = isPassSku
1156
- ? { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }
1157
- : {
1158
- input: (toPositiveNumber(pricing.input_cents_per_million, 0) * 125) / 10000,
1159
- output: (toPositiveNumber(pricing.output_cents_per_million, 0) * 125) / 10000,
1160
- cacheRead: (toPositiveNumber(pricing.cache_read_cents_per_million, 0) * 125) / 10000,
1161
- cacheWrite: 0,
1162
- };
1163
- const name = toModelName(wafer?.display_name, defaults.name);
1164
- const base: Model<"openai-completions"> = {
1165
- ...defaults,
1166
- id: defaults.id,
1167
- name,
1168
- api: "openai-completions",
1169
- provider: providerId,
1170
- baseUrl,
1171
- reasoning,
1172
- input: vision ? (["text", "image"] as const) : ["text"],
1173
- cost,
1174
- contextWindow,
1175
- maxTokens,
1176
- };
1177
- if (reasoning) {
1178
- // Wafer's `wafer.provider` envelope tells us which upstream backend serves
1179
- // the model. Each upstream accepts a different thinking-control parameter
1180
- // on the wire — Wafer passes the body through, so we must mirror the
1181
- // upstream's native shape:
1182
- // - zai (GLM) and moonshotai (Kimi) → `thinking: { type: "enabled" | "disabled" }`
1183
- // - qwen (Alibaba) → top-level `enable_thinking: boolean`
1184
- // - deepseek → `reasoning_effort` (DeepSeek effort map; the model always
1185
- // reasons when invoked, replay of `reasoning_content` is required on
1186
- // tool-call turns — both handled by `detectOpenAICompat` from the id).
1187
- // For unknown upstreams we omit `thinkingFormat` and let the per-id
1188
- // detection in `detectOpenAICompat` pick a safe default.
1189
- const upstream = typeof wafer?.provider === "string" ? wafer.provider : undefined;
1190
- const thinkingFormat: "zai" | "qwen" | undefined =
1191
- upstream === "zai" || upstream === "moonshotai" ? "zai" : upstream === "qwen" ? "qwen" : undefined;
1192
- return {
1193
- ...base,
1194
- compat: {
1195
- ...(thinkingFormat ? { thinkingFormat } : {}),
1196
- reasoningContentField: "reasoning_content",
1197
- supportsDeveloperRole: false,
1198
- },
1199
- };
1200
- }
1201
- return {
1202
- ...base,
1203
- compat: { supportsDeveloperRole: false },
1204
- };
1205
- }
1206
-
1207
- function createWaferOptions(
1208
- providerId: "wafer-pass" | "wafer-serverless",
1209
- config: WaferModelManagerConfig | undefined,
1210
- ): ModelManagerOptions<"openai-completions"> {
1211
- const apiKey = config?.apiKey;
1212
- const baseUrl = config?.baseUrl ?? WAFER_DEFAULT_BASE_URL;
1213
- const passOnly = providerId === "wafer-pass";
1214
- return {
1215
- providerId,
1216
- ...(apiKey && {
1217
- fetchDynamicModels: () =>
1218
- fetchOpenAICompatibleModels({
1219
- api: "openai-completions",
1220
- provider: providerId,
1221
- baseUrl,
1222
- apiKey,
1223
- filterModel: entry => {
1224
- if (!passOnly) return true;
1225
- const wafer = readWaferRecord(entry);
1226
- return wafer?.tier === "pass_included";
1227
- },
1228
- mapModel: (entry, defaults) => mapWaferModel(providerId, baseUrl, entry, defaults),
1229
- }),
1230
- }),
1231
- };
1232
- }
1233
-
1234
- export function waferPassModelManagerOptions(
1235
- config?: WaferModelManagerConfig,
1236
- ): ModelManagerOptions<"openai-completions"> {
1237
- return createWaferOptions("wafer-pass", config);
1238
- }
1239
-
1240
- export function waferServerlessModelManagerOptions(
1241
- config?: WaferModelManagerConfig,
1242
- ): ModelManagerOptions<"openai-completions"> {
1243
- return createWaferOptions("wafer-serverless", config);
1244
- }
1245
-
1246
- // ---------------------------------------------------------------------------
1247
- // 7. Mistral
1248
- // ---------------------------------------------------------------------------
1249
-
1250
- export interface MistralModelManagerConfig {
1251
- apiKey?: string;
1252
- baseUrl?: string;
1253
- }
1254
-
1255
- export function mistralModelManagerOptions(
1256
- config?: MistralModelManagerConfig,
1257
- ): ModelManagerOptions<"openai-completions"> {
1258
- return createSimpleOpenAICompletionsOptions("mistral", "https://api.mistral.ai/v1", config);
1259
- }
1260
-
1261
- // ---------------------------------------------------------------------------
1262
- // 8. OpenCode
1263
- // ---------------------------------------------------------------------------
1264
-
1265
- export interface OpenCodeModelManagerConfig {
1266
- apiKey?: string;
1267
- baseUrl?: string;
1268
- }
1269
-
1270
- function normalizeOpenCodeBasePath(baseUrl: string | undefined, fallbackBasePath: string): string {
1271
- const value = normalizeAnthropicBaseUrl(baseUrl, fallbackBasePath);
1272
- return value.endsWith("/v1") ? value.slice(0, -3) : value;
1273
- }
1274
-
1275
- function openCodeBaseUrlForApi(api: Api, basePath: string): string {
1276
- return api === "anthropic-messages" ? basePath : `${basePath}/v1`;
1277
- }
1278
-
1279
- function openCodeModelManagerOptions(
1280
- providerId: "opencode-go" | "opencode-zen",
1281
- defaultBasePath: string,
1282
- config?: OpenCodeModelManagerConfig,
1283
- ): ModelManagerOptions<Api> {
1284
- const apiKey = config?.apiKey;
1285
- const basePath = normalizeOpenCodeBasePath(config?.baseUrl, defaultBasePath);
1286
- const discoveryBaseUrl = openCodeBaseUrlForApi("openai-completions", basePath);
1287
- const references = createBundledReferenceMap<Api>(providerId);
1288
- return {
1289
- providerId,
1290
- ...(apiKey && {
1291
- fetchDynamicModels: () =>
1292
- fetchOpenAICompatibleModels<Api>({
1293
- api: "openai-completions",
1294
- provider: providerId,
1295
- baseUrl: discoveryBaseUrl,
1296
- apiKey,
1297
- mapModel: (entry, defaults) => {
1298
- const reference = references.get(defaults.id);
1299
- const name = toModelName(entry.name, reference?.name ?? defaults.name);
1300
- if (!reference) {
1301
- return {
1302
- ...defaults,
1303
- name,
1304
- };
1305
- }
1306
- return {
1307
- ...reference,
1308
- id: defaults.id,
1309
- name,
1310
- baseUrl: openCodeBaseUrlForApi(reference.api, basePath),
1311
- contextWindow: toPositiveNumber(entry.context_length, reference.contextWindow),
1312
- maxTokens: toPositiveNumber(entry.max_completion_tokens, reference.maxTokens),
1313
- };
1314
- },
1315
- }),
1316
- }),
1317
- };
1318
- }
1319
-
1320
- export function opencodeZenModelManagerOptions(config?: OpenCodeModelManagerConfig): ModelManagerOptions<Api> {
1321
- return openCodeModelManagerOptions("opencode-zen", "https://opencode.ai/zen", config);
1322
- }
1323
-
1324
- export function opencodeGoModelManagerOptions(config?: OpenCodeModelManagerConfig): ModelManagerOptions<Api> {
1325
- return openCodeModelManagerOptions("opencode-go", "https://opencode.ai/zen/go", config);
1326
- }
1327
-
1328
- // ---------------------------------------------------------------------------
1329
- // 9. Ollama
1330
- // ---------------------------------------------------------------------------
1331
-
1332
- export interface OllamaModelManagerConfig {
1333
- apiKey?: string;
1334
- baseUrl?: string;
1335
- }
1336
-
1337
- export function ollamaModelManagerOptions(config?: OllamaModelManagerConfig): ModelManagerOptions<"openai-responses"> {
1338
- const apiKey = config?.apiKey;
1339
- const baseUrl = normalizeOllamaBaseUrl(config?.baseUrl);
1340
- const nativeBaseUrl = toOllamaNativeBaseUrl(baseUrl);
1341
- const references = createBundledReferenceMap<"openai-responses">("ollama" as Parameters<typeof getBundledModels>[0]);
1342
- const resolveMetadata = createOllamaMetadataResolver(nativeBaseUrl);
1343
- return {
1344
- providerId: "ollama",
1345
- fetchDynamicModels: async () => {
1346
- const openAiCompatible = await fetchOpenAICompatibleModels({
1347
- api: "openai-responses",
1348
- provider: "ollama",
1349
- baseUrl,
1350
- apiKey,
1351
- mapModel: (entry, defaults) => {
1352
- const reference = references.get(defaults.id);
1353
- if (!reference) {
1354
- return {
1355
- ...defaults,
1356
- name: toModelName(entry.name, defaults.name),
1357
- contextWindow: OLLAMA_FALLBACK_CONTEXT_WINDOW,
1358
- maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
1359
- };
1360
- }
1361
- return mapWithBundledReference(entry, defaults, reference);
1362
- },
1363
- });
1364
- if (openAiCompatible && openAiCompatible.length > 0) {
1365
- await Promise.all(
1366
- openAiCompatible.map(async model => {
1367
- const metadata = await resolveMetadata(model.id);
1368
- model.contextWindow = metadata.contextWindow;
1369
- if (metadata.reasoning !== undefined) {
1370
- model.reasoning = metadata.reasoning;
1371
- model.thinking = metadata.thinking;
1372
- }
1373
- if (metadata.input) {
1374
- model.input = metadata.input;
1375
- }
1376
- applyOllamaReasoningCompat(model);
1377
- }),
1378
- );
1379
- return openAiCompatible;
1380
- }
1381
- const nativeFallback = await fetchOllamaNativeModels(baseUrl, resolveMetadata);
1382
- if (nativeFallback && nativeFallback.length > 0) {
1383
- for (const model of nativeFallback) applyOllamaReasoningCompat(model);
1384
- return nativeFallback;
1385
- }
1386
- return openAiCompatible;
1387
- },
1388
- };
1389
- }
1390
-
1391
- // ---------------------------------------------------------------------------
1392
- // 10. OpenRouter
1393
- // ---------------------------------------------------------------------------
1394
-
1395
- export interface OpenRouterModelManagerConfig {
1396
- apiKey?: string;
1397
- baseUrl?: string;
1398
- }
1399
-
1400
- export function openrouterModelManagerOptions(
1401
- config?: OpenRouterModelManagerConfig,
1402
- ): ModelManagerOptions<"openai-completions"> {
1403
- const apiKey = config?.apiKey;
1404
- const baseUrl = config?.baseUrl ?? "https://openrouter.ai/api/v1";
1405
- return {
1406
- providerId: "openrouter",
1407
- fetchDynamicModels: () =>
1408
- fetchOpenAICompatibleModels({
1409
- api: "openai-completions",
1410
- provider: "openrouter",
1411
- baseUrl,
1412
- apiKey,
1413
- filterModel: (entry: OpenAICompatibleModelRecord) => {
1414
- const params = entry.supported_parameters;
1415
- return Array.isArray(params) && params.includes("tools");
1416
- },
1417
- mapModel: (
1418
- entry: OpenAICompatibleModelRecord,
1419
- defaults: Model<"openai-completions">,
1420
- _context: OpenAICompatibleModelMapperContext<"openai-completions">,
1421
- ): Model<"openai-completions"> => {
1422
- const pricing = entry.pricing as Record<string, unknown> | undefined;
1423
- const params = Array.isArray(entry.supported_parameters) ? (entry.supported_parameters as string[]) : [];
1424
- const modality = String((entry.architecture as Record<string, unknown> | undefined)?.modality ?? "");
1425
- const topProvider = entry.top_provider as Record<string, unknown> | undefined;
1426
-
1427
- const supportsToolChoice = params.includes("tool_choice");
1428
-
1429
- return {
1430
- ...defaults,
1431
- reasoning: params.includes("reasoning"),
1432
- input: modality.includes("image") ? ["text", "image"] : ["text"],
1433
- cost: {
1434
- input: parseFloat(String(pricing?.prompt ?? "0")) * 1_000_000,
1435
- output: parseFloat(String(pricing?.completion ?? "0")) * 1_000_000,
1436
- cacheRead: parseFloat(String(pricing?.input_cache_read ?? "0")) * 1_000_000,
1437
- cacheWrite: parseFloat(String(pricing?.input_cache_write ?? "0")) * 1_000_000,
1438
- },
1439
- contextWindow:
1440
- typeof entry.context_length === "number" ? entry.context_length : defaults.contextWindow,
1441
- maxTokens:
1442
- typeof topProvider?.max_completion_tokens === "number"
1443
- ? topProvider.max_completion_tokens
1444
- : defaults.maxTokens,
1445
- ...(!supportsToolChoice && {
1446
- compat: { supportsToolChoice: false },
1447
- }),
1448
- };
1449
- },
1450
- }),
1451
- };
1452
- }
1453
-
1454
- const ZENMUX_OPENAI_BASE_URL = "https://zenmux.ai/api/v1";
1455
- const ZENMUX_ANTHROPIC_BASE_URL = "https://zenmux.ai/api/anthropic";
1456
-
1457
- function normalizeZenMuxOpenAiBaseUrl(baseUrl?: string): string {
1458
- const value = baseUrl?.trim();
1459
- if (!value) {
1460
- return ZENMUX_OPENAI_BASE_URL;
1461
- }
1462
- return value.endsWith("/") ? value.slice(0, -1) : value;
1463
- }
1464
-
1465
- function toZenMuxAnthropicBaseUrl(openAiBaseUrl: string): string {
1466
- try {
1467
- const parsed = new URL(openAiBaseUrl);
1468
- const trimmedPath = parsed.pathname.replace(/\/+$/g, "");
1469
- parsed.pathname = trimmedPath.endsWith("/api/v1")
1470
- ? `${trimmedPath.slice(0, -"/api/v1".length)}/api/anthropic`
1471
- : "/api/anthropic";
1472
- return `${parsed.protocol}//${parsed.host}${parsed.pathname}`;
1473
- } catch {
1474
- return ZENMUX_ANTHROPIC_BASE_URL;
1475
- }
1476
- }
1477
-
1478
- function isZenMuxAnthropicModel(entry: OpenAICompatibleModelRecord, modelId: string): boolean {
1479
- if (typeof entry.owned_by === "string" && entry.owned_by.toLowerCase() === "anthropic") {
1480
- return true;
1481
- }
1482
- return modelId.toLowerCase().startsWith("anthropic/");
1483
- }
1484
-
1485
- function getZenMuxPricingValue(pricings: Record<string, unknown> | undefined, key: string): number {
1486
- const bucket = pricings?.[key];
1487
- if (!Array.isArray(bucket)) {
1488
- return 0;
1489
- }
1490
- for (const item of bucket) {
1491
- if (!isRecord(item)) {
1492
- continue;
1493
- }
1494
- const value = toNumber(item.value);
1495
- if (value !== undefined) {
1496
- return value;
1497
- }
1498
- }
1499
- return 0;
1500
- }
1501
-
1502
- function getZenMuxCacheWritePrice(pricings: Record<string, unknown> | undefined): number {
1503
- const oneHour = getZenMuxPricingValue(pricings, "input_cache_write_1_h");
1504
- if (oneHour > 0) {
1505
- return oneHour;
1506
- }
1507
- const fiveMinute = getZenMuxPricingValue(pricings, "input_cache_write_5_min");
1508
- if (fiveMinute > 0) {
1509
- return fiveMinute;
1510
- }
1511
- return getZenMuxPricingValue(pricings, "input_cache_write");
1512
- }
1513
-
1514
- // ---------------------------------------------------------------------------
1515
- // 10.5 ZenMux
1516
- // ---------------------------------------------------------------------------
1517
-
1518
- export interface ZenMuxModelManagerConfig {
1519
- apiKey?: string;
1520
- baseUrl?: string;
1521
- }
1522
-
1523
- export function zenmuxModelManagerOptions(config?: ZenMuxModelManagerConfig): ModelManagerOptions<Api> {
1524
- const apiKey = config?.apiKey;
1525
- const openAiBaseUrl = normalizeZenMuxOpenAiBaseUrl(config?.baseUrl);
1526
- const anthropicBaseUrl = toZenMuxAnthropicBaseUrl(openAiBaseUrl);
1527
- return {
1528
- providerId: "zenmux",
1529
- ...(apiKey && {
1530
- fetchDynamicModels: () =>
1531
- fetchOpenAICompatibleModels<Api>({
1532
- api: "openai-completions",
1533
- provider: "zenmux",
1534
- baseUrl: openAiBaseUrl,
1535
- apiKey,
1536
- mapModel: (entry, defaults) => {
1537
- const pricings = isRecord(entry.pricings) ? entry.pricings : undefined;
1538
- const capabilities = isRecord(entry.capabilities) ? entry.capabilities : undefined;
1539
- const isAnthropicModel = isZenMuxAnthropicModel(entry, defaults.id);
1540
- return {
1541
- ...defaults,
1542
- name: toModelName(entry.display_name, defaults.name),
1543
- api: isAnthropicModel ? "anthropic-messages" : "openai-completions",
1544
- baseUrl: isAnthropicModel ? anthropicBaseUrl : openAiBaseUrl,
1545
- reasoning: capabilities?.reasoning === true || defaults.reasoning,
1546
- input: toInputCapabilities(entry.input_modalities),
1547
- cost: {
1548
- input: getZenMuxPricingValue(pricings, "prompt"),
1549
- output: getZenMuxPricingValue(pricings, "completion"),
1550
- cacheRead: getZenMuxPricingValue(pricings, "input_cache_read"),
1551
- cacheWrite: getZenMuxCacheWritePrice(pricings),
1552
- },
1553
- contextWindow: toPositiveNumber(entry.context_length, defaults.contextWindow),
1554
- maxTokens: toPositiveNumber(entry.max_completion_tokens, defaults.maxTokens),
1555
- };
1556
- },
1557
- }),
1558
- }),
1559
- };
1560
- }
1561
-
1562
- // ---------------------------------------------------------------------------
1563
- // 10.6 Kilo Gateway
1564
- // ---------------------------------------------------------------------------
1565
-
1566
- export interface KiloModelManagerConfig {
1567
- apiKey?: string;
1568
- baseUrl?: string;
1569
- }
1570
-
1571
- export function kiloModelManagerOptions(config?: KiloModelManagerConfig): ModelManagerOptions<"openai-completions"> {
1572
- const apiKey = config?.apiKey;
1573
- const baseUrl = config?.baseUrl ?? "https://api.kilo.ai/api/gateway";
1574
- return {
1575
- providerId: "kilo",
1576
- fetchDynamicModels: () =>
1577
- fetchOpenAICompatibleModels({
1578
- api: "openai-completions",
1579
- provider: "kilo",
1580
- baseUrl,
1581
- apiKey,
1582
- }),
1583
- };
1584
- }
1585
-
1586
- // ---------------------------------------------------------------------------
1587
- // Alibaba Coding Plan
1588
- // ---------------------------------------------------------------------------
1589
-
1590
- export interface AlibabaCodingPlanModelManagerConfig {
1591
- apiKey?: string;
1592
- baseUrl?: string;
1593
- }
1594
-
1595
- export function alibabaCodingPlanModelManagerOptions(
1596
- config?: AlibabaCodingPlanModelManagerConfig,
1597
- ): ModelManagerOptions<"openai-completions"> {
1598
- const apiKey = config?.apiKey;
1599
- const baseUrl = config?.baseUrl ?? "https://coding-intl.dashscope.aliyuncs.com/v1";
1600
- const references = createBundledReferenceMap<"openai-completions">("alibaba-coding-plan");
1601
- return {
1602
- providerId: "alibaba-coding-plan",
1603
- fetchDynamicModels: () =>
1604
- fetchOpenAICompatibleModels({
1605
- api: "openai-completions",
1606
- provider: "alibaba-coding-plan",
1607
- baseUrl,
1608
- apiKey,
1609
- mapModel: (entry, defaults) => {
1610
- const reference = references.get(defaults.id);
1611
- return mapWithBundledReference(entry, defaults, reference);
1612
- },
1613
- }),
1614
- };
1615
- }
1616
-
1617
- // ---------------------------------------------------------------------------
1618
- // 11. Vercel AI Gateway
1619
- // ---------------------------------------------------------------------------
1620
-
1621
- export interface VercelAiGatewayModelManagerConfig {
1622
- apiKey?: string;
1623
- baseUrl?: string;
1624
- }
1625
-
1626
- function normalizeVercelAiGatewayBaseUrls(rawBaseUrl: string | undefined): { baseUrl: string; catalogBaseUrl: string } {
1627
- const baseUrl = (rawBaseUrl === undefined ? "https://ai-gateway.vercel.sh" : rawBaseUrl.trim()).replace(/\/+$/, "");
1628
- const catalogBaseUrl = baseUrl === "" || baseUrl.endsWith("/v1") ? baseUrl : `${baseUrl}/v1`;
1629
-
1630
- return {
1631
- baseUrl: baseUrl.endsWith("/v1") ? baseUrl.slice(0, -3) : baseUrl,
1632
- catalogBaseUrl,
1633
- };
1634
- }
1635
-
1636
- export function vercelAiGatewayModelManagerOptions(
1637
- config?: VercelAiGatewayModelManagerConfig,
1638
- ): ModelManagerOptions<"anthropic-messages"> {
1639
- const apiKey = config?.apiKey;
1640
- const { baseUrl, catalogBaseUrl } = normalizeVercelAiGatewayBaseUrls(config?.baseUrl);
1641
- return {
1642
- providerId: "vercel-ai-gateway",
1643
- fetchDynamicModels: () =>
1644
- fetchOpenAICompatibleModels({
1645
- api: "anthropic-messages",
1646
- provider: "vercel-ai-gateway",
1647
- baseUrl: catalogBaseUrl,
1648
- apiKey,
1649
- filterModel: (entry: OpenAICompatibleModelRecord) => {
1650
- const tags = entry.tags;
1651
- return Array.isArray(tags) && tags.includes("tool-use");
1652
- },
1653
- mapModel: (
1654
- entry: OpenAICompatibleModelRecord,
1655
- defaults: Model<"anthropic-messages">,
1656
- _context: OpenAICompatibleModelMapperContext<"anthropic-messages">,
1657
- ): Model<"anthropic-messages"> => {
1658
- const pricing = entry.pricing as Record<string, unknown> | undefined;
1659
- const tags = Array.isArray(entry.tags) ? (entry.tags as string[]) : [];
1660
-
1661
- return {
1662
- ...defaults,
1663
- baseUrl,
1664
- reasoning: tags.includes("reasoning"),
1665
- input: tags.includes("vision") ? ["text", "image"] : ["text"],
1666
- cost: {
1667
- input: (toNumber(pricing?.input) ?? 0) * 1_000_000,
1668
- output: (toNumber(pricing?.output) ?? 0) * 1_000_000,
1669
- cacheRead: (toNumber(pricing?.input_cache_read) ?? 0) * 1_000_000,
1670
- cacheWrite: (toNumber(pricing?.input_cache_write) ?? 0) * 1_000_000,
1671
- },
1672
- contextWindow:
1673
- typeof entry.context_window === "number" ? entry.context_window : defaults.contextWindow,
1674
- maxTokens: typeof entry.max_tokens === "number" ? entry.max_tokens : defaults.maxTokens,
1675
- };
1676
- },
1677
- }),
1678
- };
1679
- }
1680
-
1681
- // ---------------------------------------------------------------------------
1682
- // 12. Kimi Code
1683
- // ---------------------------------------------------------------------------
1684
-
1685
- export interface KimiCodeModelManagerConfig {
1686
- apiKey?: string;
1687
- baseUrl?: string;
1688
- }
1689
-
1690
- export function kimiCodeModelManagerOptions(
1691
- config?: KimiCodeModelManagerConfig,
1692
- ): ModelManagerOptions<"openai-completions"> {
1693
- const apiKey = config?.apiKey;
1694
- const baseUrl = config?.baseUrl ?? "https://api.kimi.com/coding/v1";
1695
- return {
1696
- providerId: "kimi-code",
1697
- ...(apiKey && {
1698
- fetchDynamicModels: () =>
1699
- fetchOpenAICompatibleModels({
1700
- api: "openai-completions",
1701
- provider: "kimi-code",
1702
- baseUrl,
1703
- apiKey,
1704
- headers: {
1705
- "User-Agent": "KimiCLI/1.0",
1706
- "X-Msh-Platform": "kimi_cli",
1707
- },
1708
- mapModel: (
1709
- entry: OpenAICompatibleModelRecord,
1710
- defaults: Model<"openai-completions">,
1711
- _context: OpenAICompatibleModelMapperContext<"openai-completions">,
1712
- ): Model<"openai-completions"> => {
1713
- const id = defaults.id;
1714
- return {
1715
- ...defaults,
1716
- name: typeof entry.display_name === "string" ? entry.display_name : defaults.name,
1717
- reasoning: entry.supports_reasoning === true || id.includes("thinking"),
1718
- input: entry.supports_image_in === true || id.includes("k2.5") ? ["text", "image"] : ["text"],
1719
- contextWindow: typeof entry.context_length === "number" ? entry.context_length : 262144,
1720
- maxTokens: 32000,
1721
- compat: {
1722
- thinkingFormat: "zai",
1723
- reasoningContentField: "reasoning_content",
1724
- supportsDeveloperRole: false,
1725
- },
1726
- };
1727
- },
1728
- }),
1729
- }),
1730
- };
1731
- }
1732
-
1733
- // ---------------------------------------------------------------------------
1734
- // 12.5. LM Studio
1735
- // ---------------------------------------------------------------------------
1736
-
1737
- export interface LmStudioModelManagerConfig {
1738
- apiKey?: string;
1739
- baseUrl?: string;
1740
- }
1741
-
1742
- export function lmStudioModelManagerOptions(
1743
- config?: LmStudioModelManagerConfig,
1744
- ): ModelManagerOptions<"openai-completions"> {
1745
- const apiKey = config?.apiKey;
1746
- const baseUrl = config?.baseUrl ?? Bun.env.LM_STUDIO_BASE_URL ?? "http://127.0.0.1:1234/v1";
1747
- const references = createBundledReferenceMap<"openai-completions">("lm-studio" as any);
1748
- return {
1749
- providerId: "lm-studio",
1750
- fetchDynamicModels: () =>
1751
- fetchOpenAICompatibleModels({
1752
- api: "openai-completions",
1753
- provider: "lm-studio",
1754
- baseUrl,
1755
- apiKey,
1756
- mapModel: (entry, defaults) => {
1757
- const reference = references.get(defaults.id);
1758
- return mapWithBundledReference(entry, defaults, reference);
1759
- },
1760
- }),
1761
- };
1762
- }
1763
-
1764
- // ---------------------------------------------------------------------------
1765
- // 13. Synthetic
1766
- // ---------------------------------------------------------------------------
1767
-
1768
- export interface SyntheticModelManagerConfig {
1769
- apiKey?: string;
1770
- baseUrl?: string;
1771
- }
1772
-
1773
- export function syntheticModelManagerOptions(
1774
- config?: SyntheticModelManagerConfig,
1775
- ): ModelManagerOptions<"openai-completions"> {
1776
- const apiKey = config?.apiKey;
1777
- const baseUrl = config?.baseUrl ?? "https://api.synthetic.new/openai/v1";
1778
- const references = new Map(
1779
- (getBundledModels("synthetic") as Model<"openai-completions">[]).map(model => [model.id, model]),
1780
- );
1781
- return {
1782
- providerId: "synthetic",
1783
- dynamicModelsAuthoritative: true,
1784
- ...(apiKey && {
1785
- fetchDynamicModels: () =>
1786
- fetchOpenAICompatibleModels({
1787
- api: "openai-completions",
1788
- provider: "synthetic",
1789
- baseUrl,
1790
- apiKey,
1791
- mapModel: (
1792
- entry: OpenAICompatibleModelRecord,
1793
- defaults: Model<"openai-completions">,
1794
- _context: OpenAICompatibleModelMapperContext<"openai-completions">,
1795
- ): Model<"openai-completions"> => {
1796
- const reference = references.get(defaults.id);
1797
- const referenceSupportsImage = reference?.input.includes("image") ?? false;
1798
- return {
1799
- ...(reference ? { ...reference, id: defaults.id, baseUrl } : defaults),
1800
- name: toModelName(entry.name, reference?.name ?? defaults.name),
1801
- reasoning: entry.supports_reasoning === true || (reference?.reasoning ?? false),
1802
- input: entry.supports_vision === true || referenceSupportsImage ? ["text", "image"] : ["text"],
1803
- contextWindow: toPositiveNumber(
1804
- entry.context_length,
1805
- reference?.contextWindow ?? defaults.contextWindow,
1806
- ),
1807
- maxTokens: toPositiveNumber(entry.max_tokens, reference?.maxTokens ?? 8192),
1808
- };
1809
- },
1810
- }),
1811
- }),
1812
- };
1813
- }
1814
-
1815
- // ---------------------------------------------------------------------------
1816
- // 14. Venice
1817
- // ---------------------------------------------------------------------------
1818
-
1819
- export interface VeniceModelManagerConfig {
1820
- apiKey?: string;
1821
- baseUrl?: string;
1822
- }
1823
-
1824
- export function veniceModelManagerOptions(
1825
- config?: VeniceModelManagerConfig,
1826
- ): ModelManagerOptions<"openai-completions"> {
1827
- const apiKey = config?.apiKey;
1828
- const baseUrl = config?.baseUrl ?? "https://api.venice.ai/api/v1";
1829
- const references = createBundledReferenceMap<"openai-completions">("venice");
1830
- return {
1831
- providerId: "venice",
1832
- fetchDynamicModels: () =>
1833
- fetchOpenAICompatibleModels({
1834
- api: "openai-completions",
1835
- provider: "venice",
1836
- baseUrl,
1837
- apiKey,
1838
- mapModel: (entry, defaults) => {
1839
- const reference = references.get(defaults.id);
1840
- const model = mapWithBundledReference(entry, defaults, reference);
1841
- return {
1842
- ...model,
1843
- compat: { ...model.compat, supportsUsageInStreaming: false },
1844
- };
1845
- },
1846
- }),
1847
- };
1848
- }
1849
-
1850
- // ---------------------------------------------------------------------------
1851
- // 15. Together
1852
- // ---------------------------------------------------------------------------
1853
-
1854
- export interface TogetherModelManagerConfig {
1855
- apiKey?: string;
1856
- baseUrl?: string;
1857
- }
1858
-
1859
- export function togetherModelManagerOptions(
1860
- config?: TogetherModelManagerConfig,
1861
- ): ModelManagerOptions<"openai-completions"> {
1862
- return createSimpleOpenAICompletionsOptions("together", "https://api.together.xyz/v1", config);
1863
- }
1864
-
1865
- // ---------------------------------------------------------------------------
1866
- // 16. Moonshot
1867
- // ---------------------------------------------------------------------------
1868
-
1869
- export interface MoonshotModelManagerConfig {
1870
- apiKey?: string;
1871
- baseUrl?: string;
1872
- }
1873
-
1874
- export function moonshotModelManagerOptions(
1875
- config?: MoonshotModelManagerConfig,
1876
- ): ModelManagerOptions<"openai-completions"> {
1877
- const apiKey = config?.apiKey;
1878
- const baseUrl = config?.baseUrl ?? "https://api.moonshot.ai/v1";
1879
- const references = createBundledReferenceMap<"openai-completions">("moonshot");
1880
- return {
1881
- providerId: "moonshot",
1882
- ...(apiKey && {
1883
- fetchDynamicModels: () =>
1884
- fetchOpenAICompatibleModels({
1885
- api: "openai-completions",
1886
- provider: "moonshot",
1887
- baseUrl,
1888
- apiKey,
1889
- mapModel: (entry, defaults) => {
1890
- const reference = references.get(defaults.id);
1891
- const model = mapWithBundledReference(entry, defaults, reference);
1892
- const id = model.id.toLowerCase();
1893
- const isThinking = id.includes("thinking");
1894
- const isVision = id.includes("vision") || id.includes("vl") || id.includes("k2.5");
1895
- return {
1896
- ...model,
1897
- reasoning: isThinking || model.reasoning,
1898
- input: isVision ? ["text", "image"] : model.input,
1899
- };
1900
- },
1901
- }),
1902
- }),
1903
- };
1904
- }
1905
-
1906
- // ---------------------------------------------------------------------------
1907
- // 17. Qwen Portal
1908
- // ---------------------------------------------------------------------------
1909
-
1910
- export interface QwenPortalModelManagerConfig {
1911
- apiKey?: string;
1912
- baseUrl?: string;
1913
- }
1914
-
1915
- export function qwenPortalModelManagerOptions(
1916
- config?: QwenPortalModelManagerConfig,
1917
- ): ModelManagerOptions<"openai-completions"> {
1918
- return createSimpleOpenAICompletionsOptions("qwen-portal", "https://portal.qwen.ai/v1", config);
1919
- }
1920
-
1921
- // ---------------------------------------------------------------------------
1922
- // 18. Qianfan
1923
- // ---------------------------------------------------------------------------
1924
-
1925
- export interface QianfanModelManagerConfig {
1926
- apiKey?: string;
1927
- baseUrl?: string;
1928
- }
1929
-
1930
- export function qianfanModelManagerOptions(
1931
- config?: QianfanModelManagerConfig,
1932
- ): ModelManagerOptions<"openai-completions"> {
1933
- return createSimpleOpenAICompletionsOptions("qianfan", "https://qianfan.baidubce.com/v2", config);
1934
- }
1935
-
1936
- // ---------------------------------------------------------------------------
1937
- // 19. Cloudflare AI Gateway
1938
- // ---------------------------------------------------------------------------
1939
-
1940
- export interface CloudflareAiGatewayModelManagerConfig {
1941
- apiKey?: string;
1942
- baseUrl?: string;
1943
- }
1944
-
1945
- export function cloudflareAiGatewayModelManagerOptions(
1946
- config?: CloudflareAiGatewayModelManagerConfig,
1947
- ): ModelManagerOptions<"anthropic-messages"> {
1948
- return createSimpleAnthropicProviderOptions(
1949
- "cloudflare-ai-gateway",
1950
- "https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic",
1951
- config,
1952
- );
1953
- }
1954
-
1955
- // ---------------------------------------------------------------------------
1956
- // 20. Xiaomi
1957
- // ---------------------------------------------------------------------------
1958
-
1959
- /** Region codes for Xiaomi Token Plan clusters exposed as separate login providers. */
1960
- export type XiaomiTokenPlanRegion = "sgp" | "ams" | "cn";
1961
-
1962
- /** Configures Xiaomi standard or regional Token Plan OpenAI-compatible model discovery. */
1963
- export interface XiaomiModelManagerConfig {
1964
- apiKey?: string;
1965
- baseUrl?: string;
1966
- providerId?: Provider;
1967
- tokenPlanRegion?: XiaomiTokenPlanRegion;
1968
- }
1969
-
1970
- const XIAOMI_TOKEN_PLAN_BASE_URLS: Record<XiaomiTokenPlanRegion, string> = {
1971
- sgp: "https://token-plan-sgp.xiaomimimo.com/v1",
1972
- ams: "https://token-plan-ams.xiaomimimo.com/v1",
1973
- cn: "https://token-plan-cn.xiaomimimo.com/v1",
1974
- };
1975
-
1976
- const XIAOMI_TOKEN_PLAN_FALLBACK_BASE_URLS = [
1977
- XIAOMI_TOKEN_PLAN_BASE_URLS.sgp,
1978
- XIAOMI_TOKEN_PLAN_BASE_URLS.ams,
1979
- XIAOMI_TOKEN_PLAN_BASE_URLS.cn,
1980
- ];
1981
-
1982
- /** Builds a Xiaomi model manager, preserving Token Plan region provider ids during discovery. */
1983
- export function xiaomiModelManagerOptions(
1984
- config?: XiaomiModelManagerConfig,
1985
- ): ModelManagerOptions<"openai-completions"> {
1986
- const apiKey = config?.apiKey;
1987
- const providerId = config?.providerId ?? "xiaomi";
1988
- const tokenPlanBaseUrls = config?.tokenPlanRegion
1989
- ? [XIAOMI_TOKEN_PLAN_BASE_URLS[config.tokenPlanRegion]]
1990
- : XIAOMI_TOKEN_PLAN_FALLBACK_BASE_URLS;
1991
- const XIAOMI_STANDARD_BASE_URL = "https://api.xiaomimimo.com/v1";
1992
- const isTokenPlanProvider = config?.tokenPlanRegion !== undefined || providerId.startsWith("xiaomi-token-plan-");
1993
- const isTokenPlanKey = isTokenPlanProvider || apiKey?.startsWith("tp-");
1994
- // Token-plan keys always use a TP cluster; config?.baseUrl (from catalog)
1995
- // would incorrectly pin to the standard endpoint (api.xiaomimimo.com).
1996
- const baseUrl = isTokenPlanKey ? tokenPlanBaseUrls[0] : (config?.baseUrl ?? XIAOMI_STANDARD_BASE_URL);
1997
- const references = createBundledReferenceMap<"openai-completions">("xiaomi");
1998
- const fetchModels = (url: string) =>
1999
- fetchOpenAICompatibleModels({
2000
- api: "openai-completions",
2001
- provider: providerId,
2002
- baseUrl: url,
2003
- apiKey,
2004
- filterModel: (_entry, model) => !model.id.includes("-tts"),
2005
- mapModel: (entry, defaults) => {
2006
- const reference = references.get(defaults.id);
2007
- const model = mapWithBundledReference(entry, defaults, reference);
2008
- return {
2009
- ...model,
2010
- api: "openai-completions",
2011
- provider: providerId,
2012
- baseUrl: defaults.baseUrl,
2013
- name: toModelName(entry.display_name, model.name),
2014
- };
2015
- },
2016
- });
2017
- return {
2018
- providerId,
2019
- ...(apiKey && {
2020
- fetchDynamicModels: async () => {
2021
- if (!isTokenPlanKey) {
2022
- return fetchModels(baseUrl);
2023
- }
2024
- for (const url of tokenPlanBaseUrls) {
2025
- const result = await fetchModels(url);
2026
- if (result) return result;
2027
- }
2028
- return null;
2029
- },
2030
- }),
2031
- };
2032
- }
2033
- // ---------------------------------------------------------------------------
2034
- // 21. LiteLLM
2035
- // ---------------------------------------------------------------------------
2036
-
2037
- export interface LiteLLMModelManagerConfig {
2038
- apiKey?: string;
2039
- baseUrl?: string;
2040
- }
2041
-
2042
- export function litellmModelManagerOptions(
2043
- config?: LiteLLMModelManagerConfig,
2044
- ): ModelManagerOptions<"openai-completions"> {
2045
- const apiKey = config?.apiKey;
2046
- const baseUrl = config?.baseUrl ?? "http://localhost:4000/v1";
2047
- return {
2048
- providerId: "litellm",
2049
- // litellm is a local-only proxy whose /v1/models returns bare ids with no
2050
- // metadata, and it is never bundled in models.json (that would leak the
2051
- // machine's localhost catalog). It proxies known upstream models, so we
2052
- // enrich discovered ids against models.dev — the same reference source the
2053
- // gateway providers (fireworks et al.) use — instead of a bundled map.
2054
- fetchDynamicModels: async () => {
2055
- const modelsDevReferences = await loadModelsDevReferences<"openai-completions">();
2056
- return fetchOpenAICompatibleModels({
2057
- api: "openai-completions",
2058
- provider: "litellm",
2059
- baseUrl,
2060
- apiKey,
2061
- mapModel: (entry, defaults) =>
2062
- mapWithBundledReference(entry, defaults, modelsDevReferences.get(defaults.id)),
2063
- });
2064
- },
2065
- };
2066
- }
2067
-
2068
- // ---------------------------------------------------------------------------
2069
- // 22. vLLM
2070
- // ---------------------------------------------------------------------------
2071
-
2072
- export interface VllmModelManagerConfig {
2073
- apiKey?: string;
2074
- baseUrl?: string;
2075
- }
2076
-
2077
- export function vllmModelManagerOptions(config?: VllmModelManagerConfig): ModelManagerOptions<"openai-completions"> {
2078
- const apiKey = config?.apiKey;
2079
- const baseUrl = config?.baseUrl ?? "http://127.0.0.1:8000/v1";
2080
- const references = createBundledReferenceMap<"openai-completions">("vllm" as Parameters<typeof getBundledModels>[0]);
2081
- return {
2082
- providerId: "vllm",
2083
- fetchDynamicModels: () =>
2084
- fetchOpenAICompatibleModels({
2085
- api: "openai-completions",
2086
- provider: "vllm",
2087
- baseUrl,
2088
- apiKey,
2089
- mapModel: (entry, defaults) => {
2090
- const model = mapWithBundledReference(entry, defaults, references.get(defaults.id));
2091
- return {
2092
- ...model,
2093
- contextWindow: toPositiveNumber(entry.max_model_len, model.contextWindow),
2094
- };
2095
- },
2096
- }),
2097
- };
2098
- }
2099
-
2100
- // ---------------------------------------------------------------------------
2101
- // 23. NanoGPT
2102
- // ---------------------------------------------------------------------------
2103
-
2104
- export interface NanoGptModelManagerConfig {
2105
- apiKey?: string;
2106
- baseUrl?: string;
2107
- }
2108
-
2109
- export function nanoGptModelManagerOptions(
2110
- config?: NanoGptModelManagerConfig,
2111
- ): ModelManagerOptions<"openai-completions"> {
2112
- const apiKey = config?.apiKey;
2113
- const baseUrl = config?.baseUrl ?? "https://nano-gpt.com/api/v1";
2114
- const resolveReference = createReferenceResolver(
2115
- createBundledReferenceMap<"openai-completions">("nanogpt" as Parameters<typeof getBundledModels>[0]),
2116
- );
2117
- return {
2118
- providerId: "nanogpt",
2119
- ...(apiKey && {
2120
- fetchDynamicModels: async () => {
2121
- // Track base IDs that have :thinking variants so we can mark them reasoning-capable.
2122
- const thinkingBaseIds = new Set<string>();
2123
- const models = await fetchOpenAICompatibleModels({
2124
- api: "openai-completions",
2125
- provider: "nanogpt",
2126
- baseUrl,
2127
- apiKey,
2128
- mapModel: (entry, defaults) => {
2129
- const reference = resolveReference(defaults.id);
2130
- const mapped = mapWithBundledReference(entry, defaults, reference);
2131
- return { ...mapped, api: "openai-completions", provider: "nanogpt" };
2132
- },
2133
- filterModel: (_entry, model) => {
2134
- const match = NANO_GPT_THINKING_SUFFIX_RE.exec(model.id);
2135
- if (match) {
2136
- thinkingBaseIds.add(model.id.slice(0, match.index));
2137
- return false;
2138
- }
2139
- return isLikelyNanoGptTextModelId(model.id);
2140
- },
2141
- });
2142
- if (!models) return null;
2143
- // Mark base models as reasoning-capable when a :thinking variant existed.
2144
- for (const model of models) {
2145
- if (!model.reasoning && thinkingBaseIds.has(model.id)) {
2146
- (model as { reasoning: boolean }).reasoning = true;
2147
- }
2148
- }
2149
- return models;
2150
- },
2151
- }),
2152
- };
2153
- }
2154
-
2155
- // ---------------------------------------------------------------------------
2156
- // 24. GitHub Copilot
2157
- // ---------------------------------------------------------------------------
2158
-
2159
- export interface GithubCopilotModelManagerConfig {
2160
- apiKey?: string;
2161
- baseUrl?: string;
2162
- }
2163
-
2164
- function inferCopilotApi(modelId: string): Api {
2165
- if (/^claude-(haiku|sonnet|opus)-4([.-]|$)/.test(modelId)) {
2166
- return "anthropic-messages";
2167
- }
2168
- if (modelId.startsWith("gpt-5") || modelId.startsWith("oswe")) {
2169
- return "openai-responses";
2170
- }
2171
- return "openai-completions";
2172
- }
2173
-
2174
- function extractCopilotLimits(entry: OpenAICompatibleModelRecord): {
2175
- maxPromptTokens?: number;
2176
- maxContextWindowTokens?: number;
2177
- maxOutputTokens?: number;
2178
- maxNonStreamingOutputTokens?: number;
2179
- } {
2180
- if (!isRecord(entry.capabilities)) {
2181
- return {};
2182
- }
2183
- const limitsValue = entry.capabilities.limits;
2184
- if (!isRecord(limitsValue)) {
2185
- return {};
2186
- }
2187
- return {
2188
- maxPromptTokens: toNumber(limitsValue.max_prompt_tokens),
2189
- maxContextWindowTokens: toNumber(limitsValue.max_context_window_tokens),
2190
- maxOutputTokens: toNumber(limitsValue.max_output_tokens),
2191
- maxNonStreamingOutputTokens: toNumber(limitsValue.max_non_streaming_output_tokens),
2192
- };
2193
- }
2194
-
2195
- export function githubCopilotModelManagerOptions(config?: GithubCopilotModelManagerConfig): ModelManagerOptions<Api> {
2196
- const rawApiKey = config?.apiKey;
2197
- const configuredBaseUrl = config?.baseUrl ?? "https://api.githubcopilot.com";
2198
- const parsedApiKey = rawApiKey ? parseGitHubCopilotApiKey(rawApiKey) : undefined;
2199
- const apiKey = parsedApiKey?.accessToken;
2200
- const baseUrl =
2201
- parsedApiKey?.enterpriseUrl && configuredBaseUrl.includes("githubcopilot.com")
2202
- ? getGitHubCopilotBaseUrl(parsedApiKey.enterpriseUrl)
2203
- : configuredBaseUrl;
2204
- const providerRefs = createBundledReferenceMap<Api>("github-copilot");
2205
- const resolveReference = createReferenceResolver(providerRefs);
2206
- return {
2207
- providerId: "github-copilot",
2208
- ...(apiKey && {
2209
- fetchDynamicModels: () =>
2210
- fetchOpenAICompatibleModels<Api>({
2211
- api: "openai-completions",
2212
- provider: "github-copilot",
2213
- baseUrl,
2214
- apiKey,
2215
- headers: OPENCODE_HEADERS,
2216
- mapModel: (
2217
- entry: OpenAICompatibleModelRecord,
2218
- defaults: Model<Api>,
2219
- _context: OpenAICompatibleModelMapperContext<Api>,
2220
- ): Model<Api> => {
2221
- const reference = resolveReference(defaults.id);
2222
- const copilotLimits = extractCopilotLimits(entry);
2223
- // Copilot exposes token limits under capabilities.limits.*.
2224
- // max_context_window_tokens is the model's total usable window;
2225
- // max_prompt_tokens is Copilot's prompt/summarization budget and
2226
- // must only be a fallback when total-window fields are absent.
2227
- const contextWindow = toPositiveNumber(
2228
- copilotLimits.maxContextWindowTokens,
2229
- toPositiveNumber(
2230
- entry.context_length,
2231
- toPositiveNumber(
2232
- copilotLimits.maxPromptTokens,
2233
- reference?.contextWindow ?? defaults.contextWindow,
2234
- ),
2235
- ),
2236
- );
2237
- const maxTokens = toPositiveNumber(
2238
- copilotLimits.maxOutputTokens,
2239
- toPositiveNumber(
2240
- entry.max_completion_tokens,
2241
- toPositiveNumber(
2242
- copilotLimits.maxNonStreamingOutputTokens,
2243
- reference?.maxTokens ?? defaults.maxTokens,
2244
- ),
2245
- ),
2246
- );
2247
- const name =
2248
- typeof entry.name === "string" && entry.name.trim().length > 0
2249
- ? entry.name
2250
- : (reference?.name ?? defaults.name);
2251
- const api = inferCopilotApi(defaults.id);
2252
- if (reference) {
2253
- return {
2254
- ...reference,
2255
- api,
2256
- provider: "github-copilot",
2257
- baseUrl,
2258
- name,
2259
- contextWindow,
2260
- maxTokens,
2261
- headers: { ...OPENCODE_HEADERS, ...(providerRefs.get(defaults.id)?.headers ?? {}) },
2262
- ...(api === "openai-completions"
2263
- ? {
2264
- compat: {
2265
- supportsStore: false,
2266
- supportsDeveloperRole: false,
2267
- supportsReasoningEffort: false,
2268
- },
2269
- }
2270
- : {}),
2271
- };
2272
- }
2273
- return {
2274
- ...defaults,
2275
- api,
2276
- baseUrl,
2277
- name,
2278
- contextWindow,
2279
- maxTokens,
2280
- headers: { ...OPENCODE_HEADERS },
2281
- ...(api === "openai-completions"
2282
- ? {
2283
- compat: {
2284
- supportsStore: false,
2285
- supportsDeveloperRole: false,
2286
- supportsReasoningEffort: false,
2287
- },
2288
- }
2289
- : {}),
2290
- };
2291
- },
2292
- }),
2293
- }),
2294
- };
2295
- }
2296
-
2297
- // ---------------------------------------------------------------------------
2298
- // 24. Anthropic
2299
- // ---------------------------------------------------------------------------
2300
-
2301
- export interface AnthropicModelManagerConfig {
2302
- apiKey?: string;
2303
- baseUrl?: string;
2304
- }
2305
-
2306
- export function anthropicModelManagerOptions(
2307
- config?: AnthropicModelManagerConfig,
2308
- ): ModelManagerOptions<"anthropic-messages"> {
2309
- const apiKey = config?.apiKey;
2310
- const baseUrl = config?.baseUrl ?? ANTHROPIC_BASE_URL;
2311
- return {
2312
- providerId: "anthropic",
2313
- modelsDev: {
2314
- fetch: fetchModelsDevPayload,
2315
- map: payload => mapAnthropicModelsDev(payload, baseUrl),
2316
- },
2317
- ...(apiKey && {
2318
- fetchDynamicModels: async () => {
2319
- const modelsDevModels = await fetchModelsDevPayload()
2320
- .then(payload => mapAnthropicModelsDev(payload, baseUrl))
2321
- .catch(() => []);
2322
- const references = buildAnthropicReferenceMap(modelsDevModels);
2323
- return (
2324
- fetchOpenAICompatibleModels({
2325
- api: "anthropic-messages",
2326
- provider: "anthropic",
2327
- baseUrl,
2328
- headers: buildAnthropicDiscoveryHeaders(apiKey),
2329
- mapModel: (
2330
- entry: OpenAICompatibleModelRecord,
2331
- defaults: Model<"anthropic-messages">,
2332
- _context: OpenAICompatibleModelMapperContext<"anthropic-messages">,
2333
- ): Model<"anthropic-messages"> => {
2334
- const discoveredName = typeof entry.display_name === "string" ? entry.display_name : defaults.name;
2335
- const reference = references.get(defaults.id);
2336
- if (!reference) {
2337
- return {
2338
- ...defaults,
2339
- name: discoveredName,
2340
- };
2341
- }
2342
- return {
2343
- ...reference,
2344
- id: defaults.id,
2345
- name: discoveredName,
2346
- api: "anthropic-messages",
2347
- provider: "anthropic",
2348
- baseUrl,
2349
- };
2350
- },
2351
- }) ?? null
2352
- );
2353
- },
2354
- }),
2355
- };
2356
- }
2357
-
2358
- // ---------------------------------------------------------------------------
2359
- // Models.dev provider descriptors for generate-models.ts
2360
- // ---------------------------------------------------------------------------
2361
-
2362
- export const UNK_CONTEXT_WINDOW = 222_222;
2363
- export const UNK_MAX_TOKENS = 8_888;
2364
-
2365
- /** Describes how to map models.dev API data for a single provider. */
2366
- export interface ModelsDevProviderDescriptor {
2367
- /** Key in the models.dev API response JSON (e.g., "anthropic", "amazon-bedrock") */
2368
- modelsDevKey: string;
2369
- /** Provider ID in our system */
2370
- providerId: string;
2371
- /** Default API type for this provider's models */
2372
- api: Api;
2373
- /** Default base URL */
2374
- baseUrl: string;
2375
- /** Default context window fallback (default: UNKNNOWN_CONTEXT_WINDOW) */
2376
- defaultContextWindow?: number;
2377
- /** Default max tokens fallback (default: UNKNNOWN_MAX_TOKENS) */
2378
- defaultMaxTokens?: number;
2379
- /** Optional compat overrides applied to every model from this provider */
2380
- compat?: Model<Api>["compat"];
2381
- /** Optional static headers applied to every model */
2382
- headers?: Record<string, string>;
2383
- /**
2384
- * Optional filter: return false to skip a model.
2385
- * Called with (modelId, rawModel). Default: skip if tool_call !== true.
2386
- */
2387
- filterModel?: (modelId: string, model: ModelsDevModel) => boolean;
2388
- /**
2389
- * Optional transform: modify the mapped model before it's added.
2390
- * Can return null to skip the model, or an array to emit multiple models.
2391
- */
2392
- transformModel?: (model: Model<Api>, modelId: string, raw: ModelsDevModel) => Model<Api> | Model<Api>[] | null;
2393
- /**
2394
- * Optional: override the API type per-model.
2395
- * Called with (modelId, raw). Return the API type to use.
2396
- * If not provided, uses the `api` field.
2397
- */
2398
- resolveApi?: (modelId: string, raw: ModelsDevModel) => { api: Api; baseUrl: string } | null;
2399
- }
2400
-
2401
- /** Generic mapper that converts models.dev data using provider descriptors. */
2402
- export function mapModelsDevToModels(
2403
- data: Record<string, unknown>,
2404
- descriptors: readonly ModelsDevProviderDescriptor[],
2405
- ): Model<Api>[] {
2406
- const models: Model<Api>[] = [];
2407
- for (const desc of descriptors) {
2408
- const providerData = (data as Record<string, Record<string, unknown>>)[desc.modelsDevKey];
2409
- if (!isRecord(providerData) || !isRecord(providerData.models)) continue;
2410
-
2411
- for (const [modelId, rawModel] of Object.entries(providerData.models)) {
2412
- if (!isRecord(rawModel)) continue;
2413
- const m = rawModel as ModelsDevModel;
2414
-
2415
- // Default filter: tool_call must be true
2416
- if (desc.filterModel) {
2417
- if (!desc.filterModel(modelId, m)) continue;
2418
- } else {
2419
- if (m.tool_call !== true) continue;
2420
- }
2421
-
2422
- // Resolve API and baseUrl (may be per-model for providers like OpenCode)
2423
- const resolved = desc.resolveApi?.(modelId, m) ?? { api: desc.api, baseUrl: desc.baseUrl };
2424
- if (!resolved) continue;
2425
-
2426
- const mapped: Model<Api> = {
2427
- id: modelId,
2428
- name: toModelName(m.name, modelId),
2429
- api: resolved.api,
2430
- provider: desc.providerId as Model<Api>["provider"],
2431
- baseUrl: resolved.baseUrl,
2432
- reasoning: m.reasoning === true,
2433
- input: toInputCapabilities(m.modalities?.input),
2434
- cost: {
2435
- input: toNumber(m.cost?.input) ?? 0,
2436
- output: toNumber(m.cost?.output) ?? 0,
2437
- cacheRead: toNumber(m.cost?.cache_read) ?? 0,
2438
- cacheWrite: toNumber(m.cost?.cache_write) ?? 0,
2439
- },
2440
- contextWindow: toPositiveNumber(m.limit?.context, desc.defaultContextWindow ?? UNK_CONTEXT_WINDOW),
2441
- maxTokens: toPositiveNumber(m.limit?.output, desc.defaultMaxTokens ?? UNK_MAX_TOKENS),
2442
- ...(desc.compat && { compat: desc.compat }),
2443
- ...(desc.headers && { headers: { ...desc.headers } }),
2444
- };
2445
-
2446
- // Apply per-model transform
2447
- if (desc.transformModel) {
2448
- const result = desc.transformModel(mapped, modelId, m);
2449
- if (result === null) continue;
2450
- if (Array.isArray(result)) {
2451
- models.push(...result);
2452
- } else {
2453
- models.push(result);
2454
- }
2455
- } else {
2456
- models.push(mapped);
2457
- }
2458
- }
2459
- }
2460
- return models;
2461
- }
2462
-
2463
- // Bedrock cross-region prefix helpers
2464
- const BEDROCK_GLOBAL_PREFIXES = [
2465
- "anthropic.claude-haiku-4-5",
2466
- "anthropic.claude-sonnet-4",
2467
- "anthropic.claude-opus-4-5",
2468
- "amazon.nova-2-lite",
2469
- "cohere.embed-v4",
2470
- "twelvelabs.pegasus-1-2",
2471
- ];
2472
-
2473
- const BEDROCK_US_PREFIXES = [
2474
- "amazon.nova-lite",
2475
- "amazon.nova-micro",
2476
- "amazon.nova-premier",
2477
- "amazon.nova-pro",
2478
- "anthropic.claude-3-7-sonnet",
2479
- "anthropic.claude-opus-4-1",
2480
- "anthropic.claude-opus-4-20250514",
2481
- "deepseek.r1",
2482
- "meta.llama3-2",
2483
- "meta.llama3-3",
2484
- "meta.llama4",
2485
- ];
2486
-
2487
- function bedrockCrossRegionId(id: string): string {
2488
- if (BEDROCK_GLOBAL_PREFIXES.some(p => id.startsWith(p))) return `global.${id}`;
2489
- if (BEDROCK_US_PREFIXES.some(p => id.startsWith(p))) return `us.${id}`;
2490
- return id;
2491
- }
2492
-
2493
- interface ApiResolutionRule {
2494
- matches: (modelId: string, raw: ModelsDevModel) => boolean;
2495
- resolved: { api: Api; baseUrl: string };
2496
- }
2497
-
2498
- function resolveApiByRules(
2499
- modelId: string,
2500
- raw: ModelsDevModel,
2501
- rules: readonly ApiResolutionRule[],
2502
- fallback: { api: Api; baseUrl: string },
2503
- ): { api: Api; baseUrl: string } {
2504
- for (const rule of rules) {
2505
- if (rule.matches(modelId, raw)) return rule.resolved;
2506
- }
2507
- return fallback;
2508
- }
2509
-
2510
- function createOpenCodeApiResolution(
2511
- basePath: string,
2512
- idOverrides: Readonly<Record<string, Api>> = {},
2513
- ): {
2514
- defaultResolution: { api: Api; baseUrl: string };
2515
- rules: ApiResolutionRule[];
2516
- } {
2517
- const completionsBaseUrl = `${basePath}/v1`;
2518
- // Per-API base URLs on the OpenCode-style endpoint:
2519
- // - openai-completions / openai-responses / google-generative-ai → /v1
2520
- // - anthropic-messages → bare basePath (the Anthropic client appends /v1/messages)
2521
- const baseUrlForApi = (api: Api): string => (api === "anthropic-messages" ? basePath : completionsBaseUrl);
2522
- const overrideRules: ApiResolutionRule[] = Object.entries(idOverrides).map(([id, api]) => ({
2523
- matches: modelId => modelId === id,
2524
- resolved: { api, baseUrl: baseUrlForApi(api) },
2525
- }));
2526
- return {
2527
- defaultResolution: { api: "openai-completions", baseUrl: completionsBaseUrl },
2528
- rules: [
2529
- // Per-id overrides take precedence over npm-based heuristics so we can
2530
- // correct upstream metadata mismatches (see OPENCODE_GO_API_RESOLUTION).
2531
- ...overrideRules,
2532
- {
2533
- matches: (_modelId, raw) => raw.provider?.npm === "@ai-sdk/openai",
2534
- resolved: { api: "openai-responses", baseUrl: completionsBaseUrl },
2535
- },
2536
- {
2537
- matches: (_modelId, raw) => raw.provider?.npm === "@ai-sdk/anthropic",
2538
- resolved: { api: "anthropic-messages", baseUrl: basePath },
2539
- },
2540
- {
2541
- matches: (_modelId, raw) => raw.provider?.npm === "@ai-sdk/google",
2542
- resolved: { api: "google-generative-ai", baseUrl: completionsBaseUrl },
2543
- },
2544
- ],
2545
- };
2546
- }
2547
-
2548
- // OpenCode Zen: models.dev declares minimax-m3-free (and forward-compat
2549
- // minimax-m3) with `provider.npm = "@ai-sdk/anthropic"`, but the Zen gateway
2550
- // only serves them at https://opencode.ai/zen/v1/chat/completions (verified
2551
- // against the live /v1/models response — minimax-m3-free is listed there, and
2552
- // the gateway has no /v1/messages route for it). Without this override the
2553
- // resolver POSTs anthropic-shaped requests to /v1/messages and the UI surfaces
2554
- // raw <invoke>/<|minimax|>/<tool_call> markup (#1617).
2555
- const OPENCODE_ZEN_API_RESOLUTION = createOpenCodeApiResolution("https://opencode.ai/zen", {
2556
- "minimax-m3": "openai-completions",
2557
- "minimax-m3-free": "openai-completions",
2558
- });
2559
- // OpenCode Go: models.dev declares minimax-m2.7 / qwen3.5-plus / qwen3.6-plus
2560
- // (and now also minimax-m3) with `provider.npm = "@ai-sdk/anthropic"`, but
2561
- // the OpenCode Go gateway only serves them at
2562
- // `https://opencode.ai/zen/go/v1/chat/completions` (verified against
2563
- // https://opencode.ai/zen/go/v1/models and the upstream endpoint table at
2564
- // https://opencode.ai/docs/go/#endpoints — minimax-m2.5 works the same way
2565
- // and lacks an `npm` field on models.dev so it already falls through to the
2566
- // openai-completions default). Without this override the resolver would POST
2567
- // anthropic-style requests to /v1/messages and the gateway would return its
2568
- // `Page Not Found` HTML (issue #887 for the qwen/m2.7 entries; minimax-m3
2569
- // and minimax-m3-free added under #1617 for the same root cause).
2570
- const OPENCODE_GO_API_RESOLUTION = createOpenCodeApiResolution("https://opencode.ai/zen/go", {
2571
- "minimax-m2.7": "openai-completions",
2572
- "minimax-m3": "openai-completions",
2573
- "minimax-m3-free": "openai-completions",
2574
- "qwen3.5-plus": "openai-completions",
2575
- "qwen3.6-plus": "openai-completions",
2576
- });
2577
-
2578
- const COPILOT_BASE_URL = "https://api.githubcopilot.com";
2579
-
2580
- const COPILOT_DEFAULT_RESOLUTION = {
2581
- api: "openai-completions",
2582
- baseUrl: COPILOT_BASE_URL,
2583
- } as const satisfies { api: Api; baseUrl: string };
2584
-
2585
- const COPILOT_API_RESOLUTION_RULES: readonly ApiResolutionRule[] = [
2586
- {
2587
- matches: modelId => /^claude-(haiku|sonnet|opus)-4([.-]|$)/.test(modelId),
2588
- resolved: { api: "anthropic-messages", baseUrl: COPILOT_BASE_URL },
2589
- },
2590
- {
2591
- matches: modelId => modelId.startsWith("gpt-5") || modelId.startsWith("oswe"),
2592
- resolved: { api: "openai-responses", baseUrl: COPILOT_BASE_URL },
2593
- },
2594
- ];
2595
-
2596
- function simpleModelsDevDescriptor(
2597
- modelsDevKey: string,
2598
- providerId: string,
2599
- api: Api,
2600
- baseUrl: string,
2601
- options: Omit<ModelsDevProviderDescriptor, "modelsDevKey" | "providerId" | "api" | "baseUrl"> = {},
2602
- ): ModelsDevProviderDescriptor {
2603
- return {
2604
- modelsDevKey,
2605
- providerId,
2606
- api,
2607
- baseUrl,
2608
- ...options,
2609
- };
2610
- }
2611
-
2612
- function openAiCompletionsDescriptor(
2613
- modelsDevKey: string,
2614
- providerId: string,
2615
- baseUrl: string,
2616
- options: Omit<ModelsDevProviderDescriptor, "modelsDevKey" | "providerId" | "api" | "baseUrl"> = {},
2617
- ): ModelsDevProviderDescriptor {
2618
- return simpleModelsDevDescriptor(modelsDevKey, providerId, "openai-completions", baseUrl, options);
2619
- }
2620
-
2621
- function anthropicMessagesDescriptor(
2622
- modelsDevKey: string,
2623
- providerId: string,
2624
- baseUrl: string,
2625
- options: Omit<ModelsDevProviderDescriptor, "modelsDevKey" | "providerId" | "api" | "baseUrl"> = {},
2626
- ): ModelsDevProviderDescriptor {
2627
- return simpleModelsDevDescriptor(modelsDevKey, providerId, "anthropic-messages", baseUrl, options);
2628
- }
2629
-
2630
- const GOOGLE_VERTEX_BASE_URL = "https://{location}-aiplatform.googleapis.com";
2631
- const GOOGLE_VERTEX_OPENAI_BASE_URL =
2632
- "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/endpoints/openapi";
2633
- const GOOGLE_VERTEX_ANTHROPIC_BASE_URL =
2634
- "https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/anthropic/models/{model}:streamRawPredict";
2635
-
2636
- function resolveGoogleVertexApi(modelId: string, raw: ModelsDevModel): { api: Api; baseUrl: string } {
2637
- if (raw.provider?.npm === "@ai-sdk/google-vertex/anthropic") {
2638
- return {
2639
- api: "anthropic-messages",
2640
- baseUrl: GOOGLE_VERTEX_ANTHROPIC_BASE_URL.replace("{model}", modelId),
2641
- };
2642
- }
2643
- if (modelId.includes("/") || raw.provider?.npm === "@ai-sdk/openai-compatible") {
2644
- return { api: "openai-completions", baseUrl: GOOGLE_VERTEX_OPENAI_BASE_URL };
2645
- }
2646
- return { api: "google-vertex", baseUrl: GOOGLE_VERTEX_BASE_URL };
2647
- }
2648
-
2649
- const MODELS_DEV_PROVIDER_DESCRIPTORS_BEDROCK: readonly ModelsDevProviderDescriptor[] = [
2650
- // --- Amazon Bedrock ---
2651
- {
2652
- modelsDevKey: "amazon-bedrock",
2653
- providerId: "amazon-bedrock",
2654
- api: "bedrock-converse-stream",
2655
- baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
2656
- filterModel: (id, m) => {
2657
- if (m.tool_call !== true) return false;
2658
- if (id.startsWith("ai21.jamba")) return false;
2659
- if (id.startsWith("amazon.titan-text-express") || id.startsWith("mistral.mistral-7b-instruct-v0"))
2660
- return false;
2661
- return true;
2662
- },
2663
- transformModel: (model, modelId, m) => {
2664
- const crossRegionId = bedrockCrossRegionId(modelId);
2665
- const bedrockModel: Model<Api> = {
2666
- ...model,
2667
- id: crossRegionId,
2668
- name: toModelName(m.name, crossRegionId),
2669
- };
2670
- // Also emit EU variants for Claude models
2671
- if (modelId.startsWith("anthropic.claude-")) {
2672
- return [
2673
- bedrockModel,
2674
- {
2675
- ...bedrockModel,
2676
- id: `eu.${modelId}`,
2677
- name: `${toModelName(m.name, modelId)} (EU)`,
2678
- },
2679
- ];
2680
- }
2681
- return bedrockModel;
2682
- },
2683
- },
2684
- ];
2685
-
2686
- const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor[] = [
2687
- // --- Anthropic ---
2688
- anthropicMessagesDescriptor("anthropic", "anthropic", "https://api.anthropic.com", {
2689
- filterModel: (id, m) => {
2690
- if (m.tool_call !== true) return false;
2691
- if (
2692
- id.startsWith("claude-3-5-haiku") ||
2693
- id.startsWith("claude-3-7-sonnet") ||
2694
- id === "claude-3-opus-20240229" ||
2695
- id === "claude-3-sonnet-20240229"
2696
- )
2697
- return false;
2698
- return true;
2699
- },
2700
- }),
2701
- // --- Google ---
2702
- simpleModelsDevDescriptor(
2703
- "google",
2704
- "google",
2705
- "google-generative-ai",
2706
- "https://generativelanguage.googleapis.com/v1beta",
2707
- ),
2708
- // --- OpenAI ---
2709
- simpleModelsDevDescriptor("openai", "openai", "openai-responses", "https://api.openai.com/v1"),
2710
- // --- Groq ---
2711
- openAiCompletionsDescriptor("groq", "groq", "https://api.groq.com/openai/v1"),
2712
- // --- Cerebras ---
2713
- openAiCompletionsDescriptor("cerebras", "cerebras", "https://api.cerebras.ai/v1"),
2714
- // --- Together ---
2715
- openAiCompletionsDescriptor("together", "together", "https://api.together.xyz/v1"),
2716
- // --- NVIDIA ---
2717
- openAiCompletionsDescriptor("nvidia", "nvidia", "https://integrate.api.nvidia.com/v1", {
2718
- defaultContextWindow: 131072,
2719
- }),
2720
- // --- xAI ---
2721
- openAiCompletionsDescriptor("xai", "xai", "https://api.x.ai/v1"),
2722
- // --- DeepSeek ---
2723
- openAiCompletionsDescriptor("deepseek", "deepseek", "https://api.deepseek.com", {
2724
- // Only ship the v4 family as built-ins; older deepseek-chat / deepseek-reasoner
2725
- // ids are kept off the catalog until the issue thread asks for them.
2726
- filterModel: (id, m) => m.tool_call === true && id.startsWith("deepseek-v4"),
2727
- compat: {
2728
- // DeepSeek V4 only accepts `high`/`max`; map lower Prometheus levels upward so
2729
- // subagent "minimal" turns stay in documented thinking mode instead of
2730
- // sending unsupported effort strings.
2731
- supportsDeveloperRole: false,
2732
- supportsReasoningEffort: true,
2733
- reasoningEffortMap: { minimal: "high", low: "high", medium: "high", high: "high", xhigh: "max" },
2734
- maxTokensField: "max_tokens",
2735
- // DeepSeek V4 thinking mode rejects the `tool_choice` control parameter.
2736
- // Tool calls still work without it; the API defaults to auto when tools exist.
2737
- supportsToolChoice: false,
2738
- // DeepSeek V4's OpenAI format docs enable thinking with both the toggle and
2739
- // reasoning_effort. Keep the toggle explicit for built-in models.
2740
- extraBody: { thinking: { type: "enabled" } },
2741
- // DeepSeek emits chain-of-thought via `reasoning_content` and requires it
2742
- // to round-trip on assistant tool-call messages so the model can resume
2743
- // from prior thinking (interleaved.field=reasoning_content on models.dev,
2744
- // matches the kimi/openrouter handling already in detectCompat).
2745
- reasoningContentField: "reasoning_content",
2746
- requiresReasoningContentForToolCalls: true,
2747
- requiresAssistantContentForToolCalls: true,
2748
- },
2749
- }),
2750
- ];
2751
-
2752
- const MODELS_DEV_PROVIDER_DESCRIPTORS_CODING_PLANS: readonly ModelsDevProviderDescriptor[] = [
2753
- // --- zAI ---
2754
- anthropicMessagesDescriptor("zai-coding-plan", "zai", "https://api.z.ai/api/anthropic"),
2755
- // --- Xiaomi ---
2756
- openAiCompletionsDescriptor("xiaomi", "xiaomi", "https://api.xiaomimimo.com/v1", {
2757
- defaultContextWindow: 262144,
2758
- defaultMaxTokens: 8192,
2759
- compat: {
2760
- supportsStore: false,
2761
- thinkingFormat: "zai",
2762
- reasoningContentField: "reasoning_content",
2763
- requiresReasoningContentForToolCalls: true,
2764
- allowsSyntheticReasoningContentForToolCalls: false,
2765
- },
2766
- }),
2767
- // --- MiniMax Coding Plan ---
2768
- openAiCompletionsDescriptor("minimax-coding-plan", "minimax-code", "https://api.minimax.io/v1", {
2769
- compat: {
2770
- supportsStore: false,
2771
- supportsDeveloperRole: false,
2772
- supportsReasoningEffort: false,
2773
- reasoningContentField: "reasoning_content",
2774
- },
2775
- }),
2776
- openAiCompletionsDescriptor("minimax-cn-coding-plan", "minimax-code-cn", "https://api.minimaxi.com/v1", {
2777
- compat: {
2778
- supportsStore: false,
2779
- supportsDeveloperRole: false,
2780
- supportsReasoningEffort: false,
2781
- reasoningContentField: "reasoning_content",
2782
- },
2783
- }),
2784
- // --- Alibaba Coding Plan ---
2785
- openAiCompletionsDescriptor(
2786
- "alibaba-coding-plan",
2787
- "alibaba-coding-plan",
2788
- "https://coding-intl.dashscope.aliyuncs.com/v1",
2789
- {
2790
- compat: {
2791
- supportsDeveloperRole: false,
2792
- },
2793
- },
2794
- ),
2795
- // --- Zhipu Coding Plan ---
2796
- openAiCompletionsDescriptor(
2797
- "zhipu-coding-plan",
2798
- "zhipu-coding-plan",
2799
- "https://open.bigmodel.cn/api/coding/paas/v4",
2800
- {
2801
- compat: {
2802
- thinkingFormat: "zai",
2803
- reasoningContentField: "reasoning_content",
2804
- supportsDeveloperRole: false,
2805
- },
2806
- },
2807
- ),
2808
- ];
2809
-
2810
- const filterActiveToolCallModels = (_id: string, m: ModelsDevModel): boolean => {
2811
- if (m.tool_call !== true) return false;
2812
- if (m.status === "deprecated") return false;
2813
- return true;
2814
- };
2815
-
2816
- const MODELS_DEV_PROVIDER_DESCRIPTORS_GOOGLE_VERTEX: readonly ModelsDevProviderDescriptor[] = [
2817
- simpleModelsDevDescriptor("google-vertex", "google-vertex", "google-vertex", GOOGLE_VERTEX_BASE_URL, {
2818
- filterModel: filterActiveToolCallModels,
2819
- resolveApi: resolveGoogleVertexApi,
2820
- }),
2821
- ];
2822
-
2823
- const MODELS_DEV_PROVIDER_DESCRIPTORS_SPECIALIZED: readonly ModelsDevProviderDescriptor[] = [
2824
- // --- Cloudflare AI Gateway ---
2825
- anthropicMessagesDescriptor(
2826
- "cloudflare-ai-gateway",
2827
- "cloudflare-ai-gateway",
2828
- "https://gateway.ai.cloudflare.com/v1/<account>/<gateway>/anthropic",
2829
- ),
2830
- // --- Mistral ---
2831
- openAiCompletionsDescriptor("mistral", "mistral", "https://api.mistral.ai/v1"),
2832
- // --- OpenCode Zen ---
2833
- openAiCompletionsDescriptor("opencode", "opencode-zen", "https://opencode.ai/zen/v1", {
2834
- filterModel: filterActiveToolCallModels,
2835
- resolveApi: (modelId, raw) =>
2836
- resolveApiByRules(
2837
- modelId,
2838
- raw,
2839
- OPENCODE_ZEN_API_RESOLUTION.rules,
2840
- OPENCODE_ZEN_API_RESOLUTION.defaultResolution,
2841
- ),
2842
- }),
2843
- // --- OpenCode Go ---
2844
- openAiCompletionsDescriptor("opencode-go", "opencode-go", "https://opencode.ai/zen/go/v1", {
2845
- filterModel: filterActiveToolCallModels,
2846
- resolveApi: (modelId, raw) =>
2847
- resolveApiByRules(
2848
- modelId,
2849
- raw,
2850
- OPENCODE_GO_API_RESOLUTION.rules,
2851
- OPENCODE_GO_API_RESOLUTION.defaultResolution,
2852
- ),
2853
- }),
2854
- // --- GitHub Copilot ---
2855
- openAiCompletionsDescriptor("github-copilot", "github-copilot", COPILOT_BASE_URL, {
2856
- defaultContextWindow: 128000,
2857
- defaultMaxTokens: 8192,
2858
- headers: { ...OPENCODE_HEADERS },
2859
- filterModel: filterActiveToolCallModels,
2860
- resolveApi: (modelId, raw) =>
2861
- resolveApiByRules(modelId, raw, COPILOT_API_RESOLUTION_RULES, COPILOT_DEFAULT_RESOLUTION),
2862
- transformModel: model => {
2863
- // compat only applies to openai-completions models
2864
- if (model.api === "openai-completions") {
2865
- return {
2866
- ...model,
2867
- compat: {
2868
- supportsStore: false,
2869
- supportsDeveloperRole: false,
2870
- supportsReasoningEffort: false,
2871
- },
2872
- };
2873
- }
2874
- return model;
2875
- },
2876
- }),
2877
- // --- MiniMax (Anthropic) ---
2878
- anthropicMessagesDescriptor("minimax", "minimax", "https://api.minimax.io/anthropic"),
2879
- anthropicMessagesDescriptor("minimax-cn", "minimax-cn", "https://api.minimaxi.com/anthropic"),
2880
- // --- Qwen Portal ---
2881
- openAiCompletionsDescriptor("qwen-portal", "qwen-portal", "https://portal.qwen.ai/v1", {
2882
- defaultContextWindow: 128000,
2883
- defaultMaxTokens: 8192,
2884
- }),
2885
-
2886
- // --- ZenMux ---
2887
- openAiCompletionsDescriptor("zenmux", "zenmux", ZENMUX_OPENAI_BASE_URL, {
2888
- filterModel: filterActiveToolCallModels,
2889
- resolveApi: modelId => {
2890
- if (modelId.startsWith("anthropic/")) {
2891
- return { api: "anthropic-messages" as const, baseUrl: ZENMUX_ANTHROPIC_BASE_URL };
2892
- }
2893
- return { api: "openai-completions" as const, baseUrl: ZENMUX_OPENAI_BASE_URL };
2894
- },
2895
- }),
2896
- ];
2897
- /** All provider descriptors for models.dev data mapping in generate-models.ts. */
2898
- export const MODELS_DEV_PROVIDER_DESCRIPTORS: readonly ModelsDevProviderDescriptor[] = [
2899
- ...MODELS_DEV_PROVIDER_DESCRIPTORS_BEDROCK,
2900
- ...MODELS_DEV_PROVIDER_DESCRIPTORS_GOOGLE_VERTEX,
2901
- ...MODELS_DEV_PROVIDER_DESCRIPTORS_CORE,
2902
- ...MODELS_DEV_PROVIDER_DESCRIPTORS_CODING_PLANS,
2903
- ...MODELS_DEV_PROVIDER_DESCRIPTORS_SPECIALIZED,
2904
- ];