@prometheus-ai/ai 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/README.md +1184 -0
  3. package/dist/types/api-registry.d.ts +30 -0
  4. package/dist/types/auth-broker/client.d.ts +66 -0
  5. package/dist/types/auth-broker/index.d.ts +6 -0
  6. package/dist/types/auth-broker/refresher.d.ts +25 -0
  7. package/dist/types/auth-broker/remote-store.d.ts +101 -0
  8. package/dist/types/auth-broker/server.d.ts +32 -0
  9. package/dist/types/auth-broker/snapshot-cache.d.ts +17 -0
  10. package/dist/types/auth-broker/types.d.ts +107 -0
  11. package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
  12. package/dist/types/auth-gateway/http.d.ts +39 -0
  13. package/dist/types/auth-gateway/index.d.ts +3 -0
  14. package/dist/types/auth-gateway/server.d.ts +36 -0
  15. package/dist/types/auth-gateway/types.d.ts +117 -0
  16. package/dist/types/auth-storage.d.ts +762 -0
  17. package/dist/types/index.d.ts +49 -0
  18. package/dist/types/model-cache.d.ts +17 -0
  19. package/dist/types/model-manager.d.ts +64 -0
  20. package/dist/types/model-thinking.d.ts +100 -0
  21. package/dist/types/models.d.ts +12 -0
  22. package/dist/types/provider-details.d.ts +24 -0
  23. package/dist/types/provider-models/bundled-references.d.ts +4 -0
  24. package/dist/types/provider-models/descriptors.d.ts +50 -0
  25. package/dist/types/provider-models/google.d.ts +24 -0
  26. package/dist/types/provider-models/index.d.ts +5 -0
  27. package/dist/types/provider-models/ollama.d.ts +7 -0
  28. package/dist/types/provider-models/openai-compat.d.ts +323 -0
  29. package/dist/types/provider-models/special.d.ts +16 -0
  30. package/dist/types/providers/amazon-bedrock.d.ts +38 -0
  31. package/dist/types/providers/anthropic-client.d.ts +99 -0
  32. package/dist/types/providers/anthropic-messages-server-schema.d.ts +465 -0
  33. package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
  34. package/dist/types/providers/anthropic-wire.d.ts +262 -0
  35. package/dist/types/providers/anthropic.d.ts +206 -0
  36. package/dist/types/providers/aws-credentials.d.ts +43 -0
  37. package/dist/types/providers/aws-eventstream.d.ts +38 -0
  38. package/dist/types/providers/aws-sigv4.d.ts +55 -0
  39. package/dist/types/providers/azure-openai-responses.d.ts +15 -0
  40. package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
  41. package/dist/types/providers/cursor.d.ts +43 -0
  42. package/dist/types/providers/error-message.d.ts +27 -0
  43. package/dist/types/providers/github-copilot-headers.d.ts +40 -0
  44. package/dist/types/providers/gitlab-duo.d.ts +27 -0
  45. package/dist/types/providers/google-auth.d.ts +24 -0
  46. package/dist/types/providers/google-gemini-cli.d.ts +81 -0
  47. package/dist/types/providers/google-gemini-headers.d.ts +18 -0
  48. package/dist/types/providers/google-shared.d.ts +171 -0
  49. package/dist/types/providers/google-types.d.ts +138 -0
  50. package/dist/types/providers/google-vertex.d.ts +7 -0
  51. package/dist/types/providers/google.d.ts +4 -0
  52. package/dist/types/providers/grammar.d.ts +1 -0
  53. package/dist/types/providers/kimi.d.ts +27 -0
  54. package/dist/types/providers/mock.d.ts +173 -0
  55. package/dist/types/providers/ollama.d.ts +6 -0
  56. package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
  57. package/dist/types/providers/openai-chat-server-schema.d.ts +817 -0
  58. package/dist/types/providers/openai-chat-server.d.ts +16 -0
  59. package/dist/types/providers/openai-codex/constants.d.ts +26 -0
  60. package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
  61. package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
  62. package/dist/types/providers/openai-codex-responses.d.ts +67 -0
  63. package/dist/types/providers/openai-completions-compat.d.ts +27 -0
  64. package/dist/types/providers/openai-completions.d.ts +54 -0
  65. package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
  66. package/dist/types/providers/openai-responses-server.d.ts +17 -0
  67. package/dist/types/providers/openai-responses-shared.d.ts +105 -0
  68. package/dist/types/providers/openai-responses.d.ts +66 -0
  69. package/dist/types/providers/prometheus-native-client.d.ts +13 -0
  70. package/dist/types/providers/prometheus-native-server.d.ts +68 -0
  71. package/dist/types/providers/register-builtins.d.ts +31 -0
  72. package/dist/types/providers/synthetic.d.ts +26 -0
  73. package/dist/types/providers/transform-messages.d.ts +12 -0
  74. package/dist/types/providers/vision-guard.d.ts +20 -0
  75. package/dist/types/providers/xai-responses.d.ts +23 -0
  76. package/dist/types/rate-limit-utils.d.ts +19 -0
  77. package/dist/types/stream.d.ts +28 -0
  78. package/dist/types/types.d.ts +819 -0
  79. package/dist/types/usage/claude.d.ts +4 -0
  80. package/dist/types/usage/gemini.d.ts +2 -0
  81. package/dist/types/usage/github-copilot.d.ts +7 -0
  82. package/dist/types/usage/google-antigravity.d.ts +2 -0
  83. package/dist/types/usage/kimi.d.ts +2 -0
  84. package/dist/types/usage/minimax-code.d.ts +2 -0
  85. package/dist/types/usage/openai-codex.d.ts +3 -0
  86. package/dist/types/usage/shared.d.ts +1 -0
  87. package/dist/types/usage/zai.d.ts +2 -0
  88. package/dist/types/usage.d.ts +260 -0
  89. package/dist/types/utils/abort.d.ts +19 -0
  90. package/dist/types/utils/abortable-iterator.d.ts +4 -0
  91. package/dist/types/utils/anthropic-auth.d.ts +35 -0
  92. package/dist/types/utils/discovery/antigravity.d.ts +61 -0
  93. package/dist/types/utils/discovery/codex.d.ts +38 -0
  94. package/dist/types/utils/discovery/cursor.d.ts +23 -0
  95. package/dist/types/utils/discovery/gemini.d.ts +25 -0
  96. package/dist/types/utils/discovery/index.d.ts +4 -0
  97. package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
  98. package/dist/types/utils/event-stream.d.ts +28 -0
  99. package/dist/types/utils/fireworks-model-id.d.ts +10 -0
  100. package/dist/types/utils/foundry.d.ts +1 -0
  101. package/dist/types/utils/http-inspector.d.ts +31 -0
  102. package/dist/types/utils/idle-iterator.d.ts +78 -0
  103. package/dist/types/utils/json-parse.d.ts +37 -0
  104. package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  105. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
  106. package/dist/types/utils/oauth/anthropic.d.ts +22 -0
  107. package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
  108. package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
  109. package/dist/types/utils/oauth/callback-server.d.ts +57 -0
  110. package/dist/types/utils/oauth/cerebras.d.ts +1 -0
  111. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
  112. package/dist/types/utils/oauth/cursor.d.ts +15 -0
  113. package/dist/types/utils/oauth/deepseek.d.ts +10 -0
  114. package/dist/types/utils/oauth/firepass.d.ts +1 -0
  115. package/dist/types/utils/oauth/fireworks.d.ts +1 -0
  116. package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
  117. package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
  118. package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
  119. package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
  120. package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
  121. package/dist/types/utils/oauth/huggingface.d.ts +19 -0
  122. package/dist/types/utils/oauth/index.d.ts +38 -0
  123. package/dist/types/utils/oauth/kagi.d.ts +17 -0
  124. package/dist/types/utils/oauth/kilo.d.ts +5 -0
  125. package/dist/types/utils/oauth/kimi.d.ts +21 -0
  126. package/dist/types/utils/oauth/litellm.d.ts +18 -0
  127. package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
  128. package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
  129. package/dist/types/utils/oauth/moonshot.d.ts +1 -0
  130. package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
  131. package/dist/types/utils/oauth/nvidia.d.ts +18 -0
  132. package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
  133. package/dist/types/utils/oauth/ollama.d.ts +18 -0
  134. package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
  135. package/dist/types/utils/oauth/opencode.d.ts +18 -0
  136. package/dist/types/utils/oauth/openrouter.d.ts +1 -0
  137. package/dist/types/utils/oauth/parallel.d.ts +17 -0
  138. package/dist/types/utils/oauth/perplexity.d.ts +9 -0
  139. package/dist/types/utils/oauth/pkce.d.ts +8 -0
  140. package/dist/types/utils/oauth/qianfan.d.ts +17 -0
  141. package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
  142. package/dist/types/utils/oauth/synthetic.d.ts +1 -0
  143. package/dist/types/utils/oauth/tavily.d.ts +17 -0
  144. package/dist/types/utils/oauth/together.d.ts +1 -0
  145. package/dist/types/utils/oauth/types.d.ts +44 -0
  146. package/dist/types/utils/oauth/venice.d.ts +18 -0
  147. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
  148. package/dist/types/utils/oauth/vllm.d.ts +16 -0
  149. package/dist/types/utils/oauth/wafer.d.ts +2 -0
  150. package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
  151. package/dist/types/utils/oauth/xiaomi.d.ts +25 -0
  152. package/dist/types/utils/oauth/zai.d.ts +18 -0
  153. package/dist/types/utils/oauth/zenmux.d.ts +1 -0
  154. package/dist/types/utils/oauth/zhipu.d.ts +18 -0
  155. package/dist/types/utils/overflow.d.ts +54 -0
  156. package/dist/types/utils/parse-bind.d.ts +23 -0
  157. package/dist/types/utils/provider-response.d.ts +3 -0
  158. package/dist/types/utils/request-debug.d.ts +29 -0
  159. package/dist/types/utils/retry-after.d.ts +3 -0
  160. package/dist/types/utils/retry.d.ts +26 -0
  161. package/dist/types/utils/schema/adapt.d.ts +24 -0
  162. package/dist/types/utils/schema/compatibility.d.ts +30 -0
  163. package/dist/types/utils/schema/dereference.d.ts +11 -0
  164. package/dist/types/utils/schema/draft.d.ts +10 -0
  165. package/dist/types/utils/schema/equality.d.ts +4 -0
  166. package/dist/types/utils/schema/fields.d.ts +49 -0
  167. package/dist/types/utils/schema/index.d.ts +13 -0
  168. package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
  169. package/dist/types/utils/schema/meta-validator.d.ts +2 -0
  170. package/dist/types/utils/schema/normalize.d.ts +93 -0
  171. package/dist/types/utils/schema/spill.d.ts +8 -0
  172. package/dist/types/utils/schema/stamps.d.ts +25 -0
  173. package/dist/types/utils/schema/types.d.ts +4 -0
  174. package/dist/types/utils/schema/wire.d.ts +53 -0
  175. package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
  176. package/dist/types/utils/sdk-stream-timeout.d.ts +33 -0
  177. package/dist/types/utils/sse-debug.d.ts +10 -0
  178. package/dist/types/utils/stream-markup-healing.d.ts +80 -0
  179. package/dist/types/utils/tool-choice.d.ts +50 -0
  180. package/dist/types/utils/validation.d.ts +17 -0
  181. package/dist/types/utils.d.ts +28 -0
  182. package/package.json +142 -0
  183. package/src/api-registry.ts +96 -0
  184. package/src/auth-broker/client.ts +358 -0
  185. package/src/auth-broker/index.ts +6 -0
  186. package/src/auth-broker/refresher.ts +117 -0
  187. package/src/auth-broker/remote-store.ts +637 -0
  188. package/src/auth-broker/server.ts +644 -0
  189. package/src/auth-broker/snapshot-cache.ts +174 -0
  190. package/src/auth-broker/types.ts +130 -0
  191. package/src/auth-broker/wire-schemas.ts +200 -0
  192. package/src/auth-gateway/http.ts +194 -0
  193. package/src/auth-gateway/index.ts +3 -0
  194. package/src/auth-gateway/server.ts +822 -0
  195. package/src/auth-gateway/types.ts +143 -0
  196. package/src/auth-storage.ts +4608 -0
  197. package/src/index.ts +54 -0
  198. package/src/model-cache.ts +129 -0
  199. package/src/model-manager.ts +469 -0
  200. package/src/model-thinking.ts +756 -0
  201. package/src/models.json +60287 -0
  202. package/src/models.json.d.ts +9 -0
  203. package/src/models.ts +56 -0
  204. package/src/prompts/turn-aborted-guidance.md +4 -0
  205. package/src/provider-details.ts +90 -0
  206. package/src/provider-models/bundled-references.ts +38 -0
  207. package/src/provider-models/descriptors.ts +364 -0
  208. package/src/provider-models/google.ts +88 -0
  209. package/src/provider-models/index.ts +5 -0
  210. package/src/provider-models/ollama.ts +153 -0
  211. package/src/provider-models/openai-compat.ts +2904 -0
  212. package/src/provider-models/special.ts +67 -0
  213. package/src/providers/amazon-bedrock.ts +873 -0
  214. package/src/providers/anthropic-client.ts +318 -0
  215. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  216. package/src/providers/anthropic-messages-server.ts +681 -0
  217. package/src/providers/anthropic-wire.ts +268 -0
  218. package/src/providers/anthropic.ts +3106 -0
  219. package/src/providers/aws-credentials.ts +501 -0
  220. package/src/providers/aws-eventstream.ts +185 -0
  221. package/src/providers/aws-sigv4.ts +218 -0
  222. package/src/providers/azure-openai-responses.ts +361 -0
  223. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  224. package/src/providers/cursor/proto/agent.proto +3526 -0
  225. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  226. package/src/providers/cursor/proto/buf.yaml +17 -0
  227. package/src/providers/cursor.ts +2621 -0
  228. package/src/providers/error-message.ts +21 -0
  229. package/src/providers/github-copilot-headers.ts +140 -0
  230. package/src/providers/gitlab-duo.ts +372 -0
  231. package/src/providers/google-auth.ts +252 -0
  232. package/src/providers/google-gemini-cli.ts +809 -0
  233. package/src/providers/google-gemini-headers.ts +41 -0
  234. package/src/providers/google-shared.ts +917 -0
  235. package/src/providers/google-types.ts +167 -0
  236. package/src/providers/google-vertex.ts +91 -0
  237. package/src/providers/google.ts +41 -0
  238. package/src/providers/grammar.ts +70 -0
  239. package/src/providers/kimi.ts +52 -0
  240. package/src/providers/mock.ts +496 -0
  241. package/src/providers/ollama.ts +644 -0
  242. package/src/providers/openai-anthropic-shim.ts +138 -0
  243. package/src/providers/openai-chat-server-schema.ts +252 -0
  244. package/src/providers/openai-chat-server.ts +647 -0
  245. package/src/providers/openai-codex/constants.ts +43 -0
  246. package/src/providers/openai-codex/request-transformer.ts +161 -0
  247. package/src/providers/openai-codex/response-handler.ts +81 -0
  248. package/src/providers/openai-codex-responses.ts +3027 -0
  249. package/src/providers/openai-completions-compat.ts +320 -0
  250. package/src/providers/openai-completions.ts +2002 -0
  251. package/src/providers/openai-responses-server-schema.ts +290 -0
  252. package/src/providers/openai-responses-server.ts +1183 -0
  253. package/src/providers/openai-responses-shared.ts +956 -0
  254. package/src/providers/openai-responses.ts +679 -0
  255. package/src/providers/prometheus-native-client.ts +228 -0
  256. package/src/providers/prometheus-native-server.ts +212 -0
  257. package/src/providers/register-builtins.ts +457 -0
  258. package/src/providers/synthetic.ts +50 -0
  259. package/src/providers/transform-messages.ts +382 -0
  260. package/src/providers/vision-guard.ts +52 -0
  261. package/src/providers/xai-responses.ts +82 -0
  262. package/src/rate-limit-utils.ts +91 -0
  263. package/src/stream.ts +1068 -0
  264. package/src/types.ts +965 -0
  265. package/src/usage/claude.ts +482 -0
  266. package/src/usage/gemini.ts +250 -0
  267. package/src/usage/github-copilot.ts +421 -0
  268. package/src/usage/google-antigravity.ts +201 -0
  269. package/src/usage/kimi.ts +271 -0
  270. package/src/usage/minimax-code.ts +31 -0
  271. package/src/usage/openai-codex.ts +503 -0
  272. package/src/usage/shared.ts +10 -0
  273. package/src/usage/zai.ts +247 -0
  274. package/src/usage.ts +185 -0
  275. package/src/utils/abort.ts +51 -0
  276. package/src/utils/abortable-iterator.ts +69 -0
  277. package/src/utils/anthropic-auth.ts +93 -0
  278. package/src/utils/discovery/antigravity.ts +261 -0
  279. package/src/utils/discovery/codex.ts +371 -0
  280. package/src/utils/discovery/cursor.ts +306 -0
  281. package/src/utils/discovery/gemini.ts +248 -0
  282. package/src/utils/discovery/index.ts +4 -0
  283. package/src/utils/discovery/openai-compatible.ts +224 -0
  284. package/src/utils/event-stream.ts +142 -0
  285. package/src/utils/fireworks-model-id.ts +30 -0
  286. package/src/utils/foundry.ts +8 -0
  287. package/src/utils/http-inspector.ts +176 -0
  288. package/src/utils/idle-iterator.ts +273 -0
  289. package/src/utils/json-parse.ts +182 -0
  290. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  291. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  292. package/src/utils/oauth/anthropic.ts +273 -0
  293. package/src/utils/oauth/api-key-login.ts +87 -0
  294. package/src/utils/oauth/api-key-validation.ts +92 -0
  295. package/src/utils/oauth/callback-server.ts +276 -0
  296. package/src/utils/oauth/cerebras.ts +16 -0
  297. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  298. package/src/utils/oauth/cursor.ts +157 -0
  299. package/src/utils/oauth/deepseek.ts +53 -0
  300. package/src/utils/oauth/firepass.ts +24 -0
  301. package/src/utils/oauth/fireworks.ts +15 -0
  302. package/src/utils/oauth/github-copilot.ts +362 -0
  303. package/src/utils/oauth/gitlab-duo.ts +123 -0
  304. package/src/utils/oauth/google-antigravity.ts +200 -0
  305. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  306. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  307. package/src/utils/oauth/huggingface.ts +62 -0
  308. package/src/utils/oauth/index.ts +502 -0
  309. package/src/utils/oauth/kagi.ts +47 -0
  310. package/src/utils/oauth/kilo.ts +87 -0
  311. package/src/utils/oauth/kimi.ts +254 -0
  312. package/src/utils/oauth/litellm.ts +47 -0
  313. package/src/utils/oauth/lm-studio.ts +38 -0
  314. package/src/utils/oauth/minimax-code.ts +80 -0
  315. package/src/utils/oauth/moonshot.ts +23 -0
  316. package/src/utils/oauth/nanogpt.ts +15 -0
  317. package/src/utils/oauth/nvidia.ts +70 -0
  318. package/src/utils/oauth/oauth.html +199 -0
  319. package/src/utils/oauth/ollama-cloud.ts +28 -0
  320. package/src/utils/oauth/ollama.ts +47 -0
  321. package/src/utils/oauth/openai-codex.ts +299 -0
  322. package/src/utils/oauth/opencode.ts +49 -0
  323. package/src/utils/oauth/openrouter.ts +20 -0
  324. package/src/utils/oauth/parallel.ts +46 -0
  325. package/src/utils/oauth/perplexity.ts +206 -0
  326. package/src/utils/oauth/pkce.ts +18 -0
  327. package/src/utils/oauth/qianfan.ts +58 -0
  328. package/src/utils/oauth/qwen-portal.ts +60 -0
  329. package/src/utils/oauth/synthetic.ts +15 -0
  330. package/src/utils/oauth/tavily.ts +46 -0
  331. package/src/utils/oauth/together.ts +16 -0
  332. package/src/utils/oauth/types.ts +102 -0
  333. package/src/utils/oauth/venice.ts +59 -0
  334. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  335. package/src/utils/oauth/vllm.ts +40 -0
  336. package/src/utils/oauth/wafer.ts +50 -0
  337. package/src/utils/oauth/xai-oauth.ts +342 -0
  338. package/src/utils/oauth/xiaomi.ts +194 -0
  339. package/src/utils/oauth/zai.ts +60 -0
  340. package/src/utils/oauth/zenmux.ts +15 -0
  341. package/src/utils/oauth/zhipu.ts +60 -0
  342. package/src/utils/overflow.ts +137 -0
  343. package/src/utils/parse-bind.ts +54 -0
  344. package/src/utils/provider-response.ts +30 -0
  345. package/src/utils/request-debug.ts +336 -0
  346. package/src/utils/retry-after.ts +110 -0
  347. package/src/utils/retry.ts +54 -0
  348. package/src/utils/schema/CONSTRAINTS.md +164 -0
  349. package/src/utils/schema/adapt.ts +36 -0
  350. package/src/utils/schema/compatibility.ts +435 -0
  351. package/src/utils/schema/dereference.ts +98 -0
  352. package/src/utils/schema/draft.ts +341 -0
  353. package/src/utils/schema/equality.ts +97 -0
  354. package/src/utils/schema/fields.ts +191 -0
  355. package/src/utils/schema/index.ts +13 -0
  356. package/src/utils/schema/json-schema-validator.ts +577 -0
  357. package/src/utils/schema/meta-validator.ts +167 -0
  358. package/src/utils/schema/normalize.ts +1588 -0
  359. package/src/utils/schema/spill.ts +43 -0
  360. package/src/utils/schema/stamps.ts +97 -0
  361. package/src/utils/schema/types.ts +10 -0
  362. package/src/utils/schema/wire.ts +293 -0
  363. package/src/utils/schema/zod-decontaminate.ts +331 -0
  364. package/src/utils/sdk-stream-timeout.ts +43 -0
  365. package/src/utils/sse-debug.ts +289 -0
  366. package/src/utils/stream-markup-healing.ts +612 -0
  367. package/src/utils/tool-choice.ts +99 -0
  368. package/src/utils/validation.ts +1024 -0
  369. package/src/utils.ts +166 -0
@@ -0,0 +1,2002 @@
1
+ import { $env, APP_DISPLAY_NAME, extractHttpStatusFromError } from "@prometheus-ai/utils";
2
+ import OpenAI, { APIConnectionTimeoutError as OpenAIConnectionTimeoutError } from "openai";
3
+ import type {
4
+ ChatCompletionAssistantMessageParam,
5
+ ChatCompletionChunk,
6
+ ChatCompletionContentPart,
7
+ ChatCompletionContentPartImage,
8
+ ChatCompletionContentPartText,
9
+ ChatCompletionMessageParam,
10
+ ChatCompletionToolMessageParam,
11
+ } from "openai/resources/chat/completions";
12
+ import packageJson from "../../package.json" with { type: "json" };
13
+ import { type Effort, getSupportedEfforts } from "../model-thinking";
14
+ import { calculateCost } from "../models";
15
+ import { getEnvApiKey } from "../stream";
16
+ import {
17
+ type AssistantMessage,
18
+ type Context,
19
+ type FetchImpl,
20
+ type Message,
21
+ type MessageAttribution,
22
+ type Model,
23
+ type OpenAICompat,
24
+ type ProviderSessionState,
25
+ resolveServiceTier,
26
+ type ServiceTier,
27
+ type StopReason,
28
+ type StreamFunction,
29
+ type StreamOptions,
30
+ shouldSendServiceTier,
31
+ type TextContent,
32
+ type ThinkingContent,
33
+ type Tool,
34
+ type ToolCall,
35
+ type ToolChoice,
36
+ type ToolResultMessage,
37
+ } from "../types";
38
+ import { normalizeSystemPrompts } from "../utils";
39
+ import { createAbortSourceTracker } from "../utils/abort";
40
+ import { AssistantMessageEventStream } from "../utils/event-stream";
41
+ import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
42
+ import {
43
+ type CapturedHttpErrorResponse,
44
+ finalizeErrorMessage,
45
+ type RawHttpRequestDump,
46
+ rewriteCopilotError,
47
+ } from "../utils/http-inspector";
48
+ import {
49
+ getOpenAIStreamFirstEventTimeoutMs,
50
+ getOpenAIStreamIdleTimeoutMs,
51
+ iterateWithIdleTimeout,
52
+ } from "../utils/idle-iterator";
53
+ import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
54
+ import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
55
+ import { getKimiCommonHeaders } from "../utils/oauth/kimi";
56
+ import { notifyProviderResponse } from "../utils/provider-response";
57
+ import { callWithCopilotModelRetry } from "../utils/retry";
58
+ import { adaptSchemaForStrict, NO_STRICT, toolWireSchema } from "../utils/schema";
59
+ import { wrapFetchForSseDebug } from "../utils/sse-debug";
60
+ import {
61
+ getStreamMarkupHealingPattern,
62
+ type HealedToolCall,
63
+ StreamMarkupHealing,
64
+ type StreamMarkupHealingEvent,
65
+ } from "../utils/stream-markup-healing";
66
+ import { isForcedToolChoice, mapToOpenAICompletionsToolChoice } from "../utils/tool-choice";
67
+ import {
68
+ buildCopilotDynamicHeaders,
69
+ hasCopilotVisionInput,
70
+ resolveGitHubCopilotBaseUrl,
71
+ } from "./github-copilot-headers";
72
+ import { detectOpenAICompat, type ResolvedOpenAICompat, resolveOpenAICompat } from "./openai-completions-compat";
73
+ import { createInitialResponsesAssistantMessage } from "./openai-responses-shared";
74
+ import { transformMessages } from "./transform-messages";
75
+ import {
76
+ isDashscopeCompatibleModeTextOnlyQwen,
77
+ joinTextWithImagePlaceholder,
78
+ NON_VISION_IMAGE_PLACEHOLDER,
79
+ } from "./vision-guard";
80
+
81
+ /**
82
+ * Normalize tool call ID for Mistral.
83
+ * Mistral requires tool IDs to be exactly 9 alphanumeric characters (a-z, A-Z, 0-9).
84
+ */
85
+ function normalizeMistralToolId(id: string, isMistral: boolean): string {
86
+ if (!isMistral) return id;
87
+ // Remove non-alphanumeric characters
88
+ let normalized = id.replace(/[^a-zA-Z0-9]/g, "");
89
+ // Mistral requires exactly 9 characters
90
+ if (normalized.length < 9) {
91
+ // Pad with deterministic characters based on original ID to ensure matching
92
+ const padding = "ABCDEFGHI";
93
+ normalized = normalized + padding.slice(0, 9 - normalized.length);
94
+ } else if (normalized.length > 9) {
95
+ normalized = normalized.slice(0, 9);
96
+ }
97
+ return normalized;
98
+ }
99
+ // Direct DeepSeek model ids on NanoGPT are routed via the default tools-capable
100
+ // path. We deliberately do NOT append `:tools` here: with `:tools`, NanoGPT
101
+ // performs server-side tool-call parsing on the upstream DeepSeek stream and
102
+ // 502s with `code: "malformed_tool_call"` on more complex tool schemas (issue
103
+ // #1488). The default route forwards `delta.content` (including any DSML
104
+ // envelope leaks) which `StreamMarkupHealing` heals into a structured call
105
+ // client-side.
106
+ function resolveOpenAICompletionsModelId(
107
+ model: Model<"openai-completions">,
108
+ options: OpenAICompletionsOptions | undefined,
109
+ ): string {
110
+ if (model.provider === "firepass") return toFirepassWireModelId(model.id);
111
+ if (model.provider === "fireworks") return toFireworksWireModelId(model.id);
112
+ if (model.provider === "openrouter") return applyOpenRouterRoutingVariant(model.id, options?.openrouterVariant);
113
+ return model.id;
114
+ }
115
+
116
+ /**
117
+ * Normalize OpenAI-compatible streaming `delta.content` into plain text.
118
+ * Most providers stream `delta.content` as a string, but some (notably Mistral
119
+ * Medium 3.5 / `mistral-medium-2604`) return an array of typed content parts
120
+ * — e.g. `[{ type: "text", text: "Hello" }]`. Without normalization those
121
+ * parts get string-coerced via `text += array`, producing the literal
122
+ * `[object Object]` sequences observed in issue #911.
123
+ *
124
+ * Returns the joined text. Non-text parts and unknown shapes are skipped so
125
+ * we never emit JS object sigils as visible output.
126
+ */
127
+ function normalizeStreamingContentText(content: unknown): string {
128
+ if (typeof content === "string") return content;
129
+ if (Array.isArray(content)) {
130
+ let out = "";
131
+ for (const part of content) {
132
+ if (typeof part === "string") {
133
+ out += part;
134
+ } else if (part && typeof part === "object") {
135
+ const obj = part as { type?: unknown; text?: unknown };
136
+ if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
137
+ out += obj.text;
138
+ }
139
+ }
140
+ }
141
+ return out;
142
+ }
143
+ if (content && typeof content === "object") {
144
+ const obj = content as { type?: unknown; text?: unknown };
145
+ if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
146
+ return obj.text;
147
+ }
148
+ }
149
+ return "";
150
+ }
151
+
152
+ function serializeToolArguments(value: unknown): string {
153
+ if (value && typeof value === "object" && !Array.isArray(value)) {
154
+ try {
155
+ return JSON.stringify(value);
156
+ } catch {
157
+ return "{}";
158
+ }
159
+ }
160
+
161
+ if (typeof value === "string") {
162
+ const trimmed = value.trim();
163
+ if (trimmed.length === 0) return "{}";
164
+ try {
165
+ const parsed = JSON.parse(trimmed);
166
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
167
+ return JSON.stringify(parsed);
168
+ }
169
+ } catch {}
170
+ return "{}";
171
+ }
172
+
173
+ return "{}";
174
+ }
175
+
176
+ /**
177
+ * Check if conversation messages contain tool calls or tool results.
178
+ * This is needed because Anthropic (via proxy) requires the tools param
179
+ * to be present when messages include tool_calls or tool role messages.
180
+ */
181
+ function hasToolHistory(messages: Message[]): boolean {
182
+ for (const msg of messages) {
183
+ if (msg.role === "toolResult") {
184
+ return true;
185
+ }
186
+ if (msg.role === "assistant") {
187
+ if (msg.content.some(block => block.type === "toolCall")) {
188
+ return true;
189
+ }
190
+ }
191
+ }
192
+ return false;
193
+ }
194
+ /**
195
+ * Identify "real progress" stream chunks vs. keepalives, role-only preambles,
196
+ * and empty `{choices:[]}` no-ops emitted by some OpenAI-compatible endpoints.
197
+ * Without this filter, every keepalive resets `iterateWithIdleTimeout`'s
198
+ * deadline, so a provider that streams nothing but pings keeps the watchdog
199
+ * asleep indefinitely — observed against z.ai/GLM via OpenRouter where a
200
+ * subagent stalled for hours with no error surfaced.
201
+ *
202
+ * A chunk counts as progress when it carries terminal usage, a finish reason,
203
+ * or any model-produced delta (content / tool calls / reasoning / refusal).
204
+ * Role-only `delta: { role: "assistant" }` preambles do NOT count; we want the
205
+ * (longer) first-event timeout to keep governing until real output appears.
206
+ */
207
+ export function isOpenAICompletionsProgressChunk(chunk: unknown): boolean {
208
+ if (!chunk || typeof chunk !== "object") return false;
209
+ const record = chunk as {
210
+ usage?: unknown;
211
+ choices?: ReadonlyArray<{
212
+ finish_reason?: unknown;
213
+ usage?: unknown;
214
+ delta?: {
215
+ content?: unknown;
216
+ tool_calls?: unknown;
217
+ reasoning?: unknown;
218
+ reasoning_content?: unknown;
219
+ reasoning_text?: unknown;
220
+ refusal?: unknown;
221
+ };
222
+ }>;
223
+ };
224
+ if (record.usage) return true;
225
+ const choice = Array.isArray(record.choices) ? record.choices[0] : undefined;
226
+ if (!choice) return false;
227
+ if (choice.finish_reason) return true;
228
+ if (choice.usage) return true;
229
+ const delta = choice.delta;
230
+ if (!delta) return false;
231
+ const content = delta.content;
232
+ if (typeof content === "string" ? content.length > 0 : Array.isArray(content) && content.length > 0) return true;
233
+ if (Array.isArray(delta.tool_calls) && delta.tool_calls.length > 0) return true;
234
+ if (typeof delta.reasoning === "string" && delta.reasoning.length > 0) return true;
235
+ if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) return true;
236
+ if (typeof delta.reasoning_text === "string" && delta.reasoning_text.length > 0) return true;
237
+ if (typeof delta.refusal === "string" && delta.refusal.length > 0) return true;
238
+ return false;
239
+ }
240
+
241
+ export interface OpenAICompletionsOptions extends StreamOptions {
242
+ toolChoice?: ToolChoice;
243
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
244
+ /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
245
+ disableReasoning?: boolean;
246
+ serviceTier?: ServiceTier;
247
+ /**
248
+ * Routing-variant suffix appended to OpenRouter model IDs when none is
249
+ * already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
250
+ * values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
251
+ * resolved `model.id` already contains a colon-suffix after the last
252
+ * provider segment (explicit `:nitro` in the selector or a catalog entry
253
+ * with the variant baked in).
254
+ */
255
+ openrouterVariant?: string;
256
+ }
257
+
258
+ type OpenAICompletionsParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
259
+ top_k?: number;
260
+ min_p?: number;
261
+ repetition_penalty?: number;
262
+ thinking?: { type: "enabled" | "disabled"; keep?: "all" };
263
+ enable_thinking?: boolean;
264
+ chat_template_kwargs?: { enable_thinking: boolean };
265
+ reasoning?: { effort?: string } | { enabled: false };
266
+ provider?: OpenAICompat["openRouterRouting"];
267
+ providerOptions?: { gateway?: { only?: string[]; order?: string[] } };
268
+ };
269
+
270
+ type AppliedToolStrictMode = "mixed" | "all_strict" | "none";
271
+ type ToolStrictModeOverride = Exclude<ResolvedOpenAICompat["toolStrictMode"], "mixed"> | undefined;
272
+
273
+ type BuiltOpenAICompletionTools = {
274
+ tools: OpenAI.Chat.Completions.ChatCompletionTool[];
275
+ toolStrictMode: AppliedToolStrictMode;
276
+ };
277
+
278
+ const OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX = "openai-completions:";
279
+
280
+ type OpenAICompletionsProviderSessionState = ProviderSessionState & {
281
+ strictToolsDisabled: boolean;
282
+ };
283
+
284
+ function createOpenAICompletionsProviderSessionState(): OpenAICompletionsProviderSessionState {
285
+ const state: OpenAICompletionsProviderSessionState = {
286
+ strictToolsDisabled: false,
287
+ close: () => {
288
+ state.strictToolsDisabled = false;
289
+ },
290
+ };
291
+ return state;
292
+ }
293
+
294
+ function getOpenAICompletionsProviderSessionState(
295
+ model: Model<"openai-completions">,
296
+ baseUrl: string | undefined,
297
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
298
+ ): OpenAICompletionsProviderSessionState | undefined {
299
+ if (!providerSessionState) return undefined;
300
+ const key = `${OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX}${model.provider}:${baseUrl ?? ""}:${model.id}`;
301
+ const existing = providerSessionState.get(key) as OpenAICompletionsProviderSessionState | undefined;
302
+ if (existing) return existing;
303
+ const created = createOpenAICompletionsProviderSessionState();
304
+ providerSessionState.set(key, created);
305
+ return created;
306
+ }
307
+
308
+ function isOpenRouterAnthropicModel(model: Model<"openai-completions">): boolean {
309
+ return model.provider === "openrouter" && model.id.toLowerCase().startsWith("anthropic/");
310
+ }
311
+
312
+ /**
313
+ * Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
314
+ * to a model id when no explicit variant is already present. A variant is considered
315
+ * "already present" when `modelId` contains a colon after the last `/` separator —
316
+ * which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
317
+ * entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
318
+ *
319
+ * Exported for unit testing.
320
+ */
321
+ export function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string {
322
+ if (!variant) return modelId;
323
+ const lastSlash = modelId.lastIndexOf("/");
324
+ const lastColon = modelId.lastIndexOf(":");
325
+ // Existing `:suffix` after the last path segment — leave the id untouched.
326
+ if (lastColon > lastSlash) return modelId;
327
+ return `${modelId}:${variant}`;
328
+ }
329
+
330
+ function isCompiledGrammarTooLargeStrictError(
331
+ error: unknown,
332
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
333
+ ): boolean {
334
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
335
+ if (status !== 400) return false;
336
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
337
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
338
+ .join("\n");
339
+ return (
340
+ /invalid_request_error/i.test(messageParts) &&
341
+ /compiled grammar/i.test(messageParts) &&
342
+ /too large/i.test(messageParts)
343
+ );
344
+ }
345
+
346
+ // DeepSeek models leak chat-template special tokens (e.g. `<|tool_calls_begin|>`,
347
+ // `<|DSML|tool_calls|>`) into visible `content` deltas when hosted behind providers
348
+ // (such as NVIDIA NIM) that don't strip them server-side. The structured `tool_calls`
349
+ // payload is still emitted correctly — we only need to filter the leaked markers from
350
+ // user-visible text. Tokens use either fullwidth pipes (|, U+FF5C) or ASCII pipes.
351
+ // Body is restricted to identifier-like chars (with the DeepSeek tokenizer's `▁`),
352
+ // capped at a sane length to avoid swallowing legitimate angle-bracket text.
353
+ const DEEPSEEK_SPECIAL_TOKEN_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/g;
354
+ const DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX = /^\s*<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/;
355
+ const DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>\s*$/;
356
+ const DEEPSEEK_OPEN_DELIMS = ["<|", "<|"] as const;
357
+
358
+ function stripDeepseekSpecialTokens(text: string): string {
359
+ const stripped = text.replace(DEEPSEEK_SPECIAL_TOKEN_REGEX, "");
360
+ if (stripped === text) return text;
361
+
362
+ let normalized = stripped;
363
+ if (DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX.test(text)) normalized = normalized.replace(/^\s+/u, "");
364
+ if (DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX.test(text)) normalized = normalized.replace(/\s+$/u, "");
365
+ return normalized;
366
+ }
367
+
368
+ // Find any trailing partial `<|...` (or `<|...`) that has not yet been closed by a
369
+ // matching `|>`/`|>`, so it can be held back until the next chunk arrives. A solo
370
+ // trailing `<` is also held in case it is the start of a new token.
371
+ function getTrailingPartialDeepseekToken(text: string): string {
372
+ let bestIdx = -1;
373
+ for (const delim of DEEPSEEK_OPEN_DELIMS) {
374
+ const idx = text.lastIndexOf(delim);
375
+ if (idx > bestIdx) bestIdx = idx;
376
+ }
377
+ if (bestIdx === -1) {
378
+ return text.endsWith("<") ? "<" : "";
379
+ }
380
+ const tail = text.slice(bestIdx);
381
+ if (tail.includes("|>") || tail.includes("|>")) return "";
382
+ // Cap the held-back length so a stray `<|` in normal prose can't grow unboundedly.
383
+ if (tail.length > 256) return "";
384
+ return tail;
385
+ }
386
+ const OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE =
387
+ "OpenAI completions stream timed out while waiting for the first event";
388
+
389
+ const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
390
+ const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
391
+
392
+ /** Returns the widened OpenAI stream watchdog floor for slow GLM coding-plan reasoning models. */
393
+ export function getOpenAICompletionsStreamIdleTimeoutFallbackMs(
394
+ model: Model<"openai-completions">,
395
+ ): number | undefined {
396
+ if (!GLM_CODING_PLAN_MODEL_PATTERN.test(model.id)) return undefined;
397
+ if (model.provider === "zhipu-coding-plan" || model.provider === "zai")
398
+ return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
399
+
400
+ const baseUrl = model.baseUrl.toLowerCase();
401
+ if (baseUrl.includes("open.bigmodel.cn") || baseUrl.includes("api.z.ai")) {
402
+ return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
403
+ }
404
+
405
+ return undefined;
406
+ }
407
+
408
+ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
409
+ model: Model<"openai-completions">,
410
+ context: Context,
411
+ options?: OpenAICompletionsOptions,
412
+ ): AssistantMessageEventStream => {
413
+ const stream = new AssistantMessageEventStream();
414
+
415
+ (async () => {
416
+ const startTime = Date.now();
417
+ let firstTokenTime: number | undefined;
418
+ let getCapturedErrorResponse: (() => CapturedHttpErrorResponse | undefined) | undefined;
419
+
420
+ const output: AssistantMessage = createInitialResponsesAssistantMessage(model.api, model.provider, model.id);
421
+ let rawRequestDump: RawHttpRequestDump | undefined;
422
+ const abortTracker = createAbortSourceTracker(options?.signal);
423
+ const firstEventTimeoutAbortError = new Error(OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE);
424
+ const { requestAbortController, requestSignal } = abortTracker;
425
+
426
+ try {
427
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
428
+ const idleTimeoutFallbackMs = getOpenAICompletionsStreamIdleTimeoutFallbackMs(model);
429
+ const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs(idleTimeoutFallbackMs);
430
+ const firstEventTimeoutMs =
431
+ options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
432
+ const requestTimeoutMs =
433
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
434
+ const {
435
+ client,
436
+ copilotPremiumRequests,
437
+ baseUrl,
438
+ requestHeaders,
439
+ getCapturedErrorResponse: captureErrorResponse,
440
+ clearCapturedErrorResponse,
441
+ } = await createClient(
442
+ model,
443
+ context,
444
+ apiKey,
445
+ options?.headers,
446
+ options?.initiatorOverride,
447
+ options?.onSseEvent,
448
+ options?.fetch,
449
+ );
450
+ const premiumRequestsTotal = copilotPremiumRequests;
451
+ getCapturedErrorResponse = captureErrorResponse;
452
+ let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
453
+ const providerSessionState = getOpenAICompletionsProviderSessionState(
454
+ model,
455
+ baseUrl,
456
+ options?.providerSessionState,
457
+ );
458
+ let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
459
+ let strictFallbackErrorMessage: string | undefined;
460
+ const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
461
+ clearCapturedErrorResponse();
462
+ const effectiveToolStrictModeOverride = disableStrictTools ? "none" : toolStrictModeOverride;
463
+ const { params, toolStrictMode } = buildParams(
464
+ model,
465
+ context,
466
+ options,
467
+ baseUrl,
468
+ effectiveToolStrictModeOverride,
469
+ );
470
+ appliedToolStrictMode = toolStrictMode;
471
+ options?.onPayload?.(params);
472
+ rawRequestDump = {
473
+ provider: model.provider,
474
+ api: output.api,
475
+ model: model.id,
476
+ method: "POST",
477
+ url: `${baseUrl}/chat/completions`,
478
+ headers: requestHeaders,
479
+ body: params,
480
+ };
481
+ const requestOptions =
482
+ requestTimeoutMs === undefined
483
+ ? { signal: requestSignal }
484
+ : { signal: requestSignal, timeout: requestTimeoutMs };
485
+ let requestTimeout: NodeJS.Timeout | undefined;
486
+ if (requestTimeoutMs !== undefined) {
487
+ requestTimeout = setTimeout(
488
+ () => abortTracker.abortLocally(firstEventTimeoutAbortError),
489
+ requestTimeoutMs,
490
+ );
491
+ }
492
+ try {
493
+ const { data, response, request_id } = await client.chat.completions
494
+ .create(params, requestOptions)
495
+ .withResponse();
496
+ await notifyProviderResponse(options, response, model, request_id);
497
+ return data;
498
+ } catch (error) {
499
+ if (error instanceof OpenAIConnectionTimeoutError && !abortTracker.wasCallerAbort()) {
500
+ throw firstEventTimeoutAbortError;
501
+ }
502
+ throw error;
503
+ } finally {
504
+ if (requestTimeout !== undefined) clearTimeout(requestTimeout);
505
+ }
506
+ };
507
+ let openaiStream: AsyncIterable<ChatCompletionChunk>;
508
+ try {
509
+ openaiStream = await callWithCopilotModelRetry(() => createCompletionsStream(), {
510
+ provider: model.provider,
511
+ signal: requestSignal,
512
+ });
513
+ } catch (error) {
514
+ const capturedErrorResponse = getCapturedErrorResponse();
515
+ if (
516
+ isOpenRouterAnthropicModel(model) &&
517
+ !disableStrictTools &&
518
+ isCompiledGrammarTooLargeStrictError(error, capturedErrorResponse)
519
+ ) {
520
+ strictFallbackErrorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
521
+ output.errorMessage = strictFallbackErrorMessage;
522
+ if (providerSessionState) {
523
+ providerSessionState.strictToolsDisabled = true;
524
+ }
525
+ disableStrictTools = true;
526
+ openaiStream = await createCompletionsStream("none");
527
+ } else {
528
+ if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
529
+ throw error;
530
+ }
531
+ openaiStream = await createCompletionsStream("none");
532
+ }
533
+ }
534
+ if (premiumRequestsTotal !== undefined) {
535
+ output.usage.premiumRequests = premiumRequestsTotal;
536
+ }
537
+ stream.push({ type: "start", partial: output });
538
+
539
+ const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
540
+ // Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
541
+ // native API) leak chat-template tool-call markers in `delta.content` even
542
+ // though tool calls are also surfaced structurally. Strip the leaked markers
543
+ // so users don't see raw `<|...|>` tokens.
544
+ const stripDeepseekChatTemplateTokens =
545
+ /deepseek/i.test(model.id) && (model.provider === "nvidia" || model.provider === "deepseek");
546
+ type ToolCallStreamBlock = ToolCall & {
547
+ partialArgs?: string | Record<string, unknown>;
548
+ streamIndex?: number;
549
+ lastParseLen?: number;
550
+ };
551
+ type OpenAIStreamBlock = TextContent | ThinkingContent | ToolCallStreamBlock;
552
+ const pendingToolCallBlocks: ToolCallStreamBlock[] = [];
553
+ const toolCallBlockByIndex = new Map<number, ToolCallStreamBlock>();
554
+ let currentBlock: OpenAIStreamBlock | undefined;
555
+ const blockIndex = (block: OpenAIStreamBlock | undefined): number => {
556
+ if (!block) return Math.max(0, output.content.length - 1);
557
+ return output.content.indexOf(block);
558
+ };
559
+ const finishToolCallBlock = (block: ToolCallStreamBlock): void => {
560
+ if (block.partialArgs === undefined) return;
561
+ const contentIndex = blockIndex(block);
562
+ if (contentIndex < 0) return;
563
+ block.arguments =
564
+ typeof block.partialArgs === "string" ? parseStreamingJson(block.partialArgs) : block.partialArgs;
565
+ delete block.partialArgs;
566
+ delete block.lastParseLen;
567
+ if (block.streamIndex !== undefined) {
568
+ toolCallBlockByIndex.delete(block.streamIndex);
569
+ delete block.streamIndex;
570
+ }
571
+ const pendingIndex = pendingToolCallBlocks.indexOf(block);
572
+ if (pendingIndex >= 0) pendingToolCallBlocks.splice(pendingIndex, 1);
573
+ stream.push({ type: "toolcall_end", contentIndex, toolCall: block, partial: output });
574
+ };
575
+ const finishPendingToolCallBlocks = (): void => {
576
+ for (const block of [...pendingToolCallBlocks]) {
577
+ finishToolCallBlock(block);
578
+ }
579
+ };
580
+ const finishCurrentBlock = (block: OpenAIStreamBlock | undefined): void => {
581
+ if (!block) return;
582
+ const contentIndex = blockIndex(block);
583
+ if (contentIndex < 0) return;
584
+ if (block.type === "text") {
585
+ stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
586
+ return;
587
+ }
588
+ if (block.type === "thinking") {
589
+ stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
590
+ return;
591
+ }
592
+ finishToolCallBlock(block);
593
+ };
594
+ const appendText = (
595
+ message: AssistantMessage,
596
+ eventStream: AssistantMessageEventStream,
597
+ text: string,
598
+ ): void => {
599
+ if (currentBlock?.type !== "text") {
600
+ finishCurrentBlock(currentBlock);
601
+ currentBlock = { type: "text", text: "" };
602
+ message.content.push(currentBlock);
603
+ eventStream.push({ type: "text_start", contentIndex: blockIndex(currentBlock), partial: message });
604
+ }
605
+ currentBlock.text += text;
606
+ eventStream.push({
607
+ type: "text_delta",
608
+ contentIndex: blockIndex(currentBlock),
609
+ delta: text,
610
+ partial: message,
611
+ });
612
+ };
613
+ const appendThinking = (
614
+ message: AssistantMessage,
615
+ eventStream: AssistantMessageEventStream,
616
+ thinking: string,
617
+ signature?: string,
618
+ ): void => {
619
+ if (
620
+ currentBlock?.type !== "thinking" ||
621
+ (signature !== undefined && currentBlock.thinkingSignature !== signature)
622
+ ) {
623
+ finishCurrentBlock(currentBlock);
624
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: signature };
625
+ message.content.push(currentBlock);
626
+ eventStream.push({
627
+ type: "thinking_start",
628
+ contentIndex: blockIndex(currentBlock),
629
+ partial: message,
630
+ });
631
+ }
632
+ if (signature !== undefined && !currentBlock.thinkingSignature) {
633
+ currentBlock.thinkingSignature = signature;
634
+ }
635
+ currentBlock.thinking += thinking;
636
+ eventStream.push({
637
+ type: "thinking_delta",
638
+ contentIndex: blockIndex(currentBlock),
639
+ delta: thinking,
640
+ partial: message,
641
+ });
642
+ };
643
+
644
+ const appendTextDelta = (text: string): void => {
645
+ if (!text) return;
646
+ if (!firstTokenTime) firstTokenTime = Date.now();
647
+ appendText(output, stream, text);
648
+ };
649
+ const appendThinkingDelta = (thinking: string, signature?: string): void => {
650
+ if (!thinking) return;
651
+ if (!firstTokenTime) firstTokenTime = Date.now();
652
+ appendThinking(output, stream, thinking, signature);
653
+ };
654
+
655
+ let deepseekStripBuffer = "";
656
+ const flushDeepseekStripBuffer = (final: boolean): void => {
657
+ if (deepseekStripBuffer.length === 0) return;
658
+ let flushable: string;
659
+ if (final) {
660
+ flushable = deepseekStripBuffer;
661
+ deepseekStripBuffer = "";
662
+ } else {
663
+ const trailing = getTrailingPartialDeepseekToken(deepseekStripBuffer);
664
+ flushable = deepseekStripBuffer.slice(0, deepseekStripBuffer.length - trailing.length);
665
+ deepseekStripBuffer = trailing;
666
+ }
667
+ const stripped = stripDeepseekSpecialTokens(flushable);
668
+ if (stripped && (stripped === flushable || stripped.trim().length > 0)) appendTextDelta(stripped);
669
+ };
670
+ const appendProcessedText = (processedText: string): void => {
671
+ if (processedText.length === 0) return;
672
+ if (stripDeepseekChatTemplateTokens) {
673
+ deepseekStripBuffer += processedText;
674
+ flushDeepseekStripBuffer(false);
675
+ } else {
676
+ appendTextDelta(processedText);
677
+ }
678
+ };
679
+
680
+ const streamMarkupHealingPattern = getStreamMarkupHealingPattern(model.provider, model.id, {
681
+ parseThinkingTags: parseMiniMaxThinkTags,
682
+ });
683
+ const streamMarkupHealing = streamMarkupHealingPattern
684
+ ? new StreamMarkupHealing({ pattern: streamMarkupHealingPattern })
685
+ : undefined;
686
+ let healedToolCallEmitted = false;
687
+ const emitHealedToolCall = (call: HealedToolCall): void => {
688
+ finishCurrentBlock(currentBlock);
689
+ const block: ToolCall & { partialArgs: string } = {
690
+ type: "toolCall",
691
+ id: call.id,
692
+ name: call.name,
693
+ arguments: {},
694
+ partialArgs: call.arguments,
695
+ };
696
+ block.arguments = parseStreamingJson(call.arguments);
697
+ currentBlock = block;
698
+ output.content.push(block);
699
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(block), partial: output });
700
+ stream.push({
701
+ type: "toolcall_delta",
702
+ contentIndex: blockIndex(block),
703
+ delta: call.arguments,
704
+ partial: output,
705
+ });
706
+ finishCurrentBlock(block);
707
+ currentBlock = undefined;
708
+ healedToolCallEmitted = true;
709
+ };
710
+ const emitHealingEvent = (event: StreamMarkupHealingEvent): void => {
711
+ if (event.type === "text") {
712
+ appendProcessedText(event.text);
713
+ } else if (event.type === "thinking") {
714
+ appendThinkingDelta(event.thinking);
715
+ } else {
716
+ emitHealedToolCall(event.call);
717
+ }
718
+ };
719
+ const flushHealedToolCalls = (): void => {
720
+ if (!streamMarkupHealing) return;
721
+ const calls = streamMarkupHealing.drainCompleted();
722
+ for (const call of calls) emitHealedToolCall(call);
723
+ };
724
+
725
+ for await (const chunk of iterateWithIdleTimeout(openaiStream, {
726
+ idleTimeoutMs,
727
+ firstItemTimeoutMs: firstEventTimeoutMs,
728
+ firstItemErrorMessage: OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE,
729
+ errorMessage: "OpenAI completions stream stalled while waiting for the next event",
730
+ onIdle: () => requestAbortController.abort(),
731
+ onFirstItemTimeout: () => abortTracker.abortLocally(firstEventTimeoutAbortError),
732
+ abortSignal: options?.signal,
733
+ isProgressItem: isOpenAICompletionsProgressChunk,
734
+ })) {
735
+ if (!chunk || typeof chunk !== "object") continue;
736
+
737
+ // OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
738
+ // and each chunk in a streamed completion carries the same id.
739
+ output.responseId ||= chunk.id;
740
+
741
+ if (chunk.usage) {
742
+ output.usage = parseChunkUsage(chunk.usage, model, premiumRequestsTotal);
743
+ }
744
+
745
+ const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
746
+ if (!choice) continue;
747
+
748
+ if (!chunk.usage) {
749
+ const choiceUsage = getChoiceUsage(choice);
750
+ if (choiceUsage) {
751
+ output.usage = parseChunkUsage(choiceUsage, model, premiumRequestsTotal);
752
+ }
753
+ }
754
+
755
+ if (choice.finish_reason) {
756
+ const finishReasonResult = mapStopReason(choice.finish_reason);
757
+ output.stopReason = finishReasonResult.stopReason;
758
+ if (finishReasonResult.errorMessage) {
759
+ output.errorMessage = finishReasonResult.errorMessage;
760
+ }
761
+ }
762
+
763
+ if (choice.delta) {
764
+ const normalizedDeltaText = normalizeStreamingContentText(choice.delta.content);
765
+ if (normalizedDeltaText.length > 0) {
766
+ if (!firstTokenTime) firstTokenTime = Date.now();
767
+ const hasStructuredToolCalls =
768
+ Array.isArray(choice.delta.tool_calls) && choice.delta.tool_calls.length > 0;
769
+
770
+ if (streamMarkupHealing) {
771
+ if (hasStructuredToolCalls) {
772
+ // Same chunk leaks markers AND carries structured tool_calls.
773
+ // Strip the marker text from visible output, but drop any
774
+ // synthesized calls so the structured payload stays the
775
+ // single source of truth (avoids double-dispatch).
776
+ appendProcessedText(streamMarkupHealing.consumeWithoutCalls(normalizedDeltaText));
777
+ } else {
778
+ for (const event of streamMarkupHealing.feedEvents(normalizedDeltaText)) {
779
+ emitHealingEvent(event);
780
+ }
781
+ }
782
+ } else {
783
+ appendProcessedText(normalizedDeltaText);
784
+ }
785
+ }
786
+
787
+ // Some endpoints return reasoning in reasoning_content (llama.cpp),
788
+ // or reasoning (other openai compatible endpoints)
789
+ // Use the first non-empty reasoning field to avoid duplication
790
+ // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
791
+ const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
792
+ let foundReasoningField: string | null = null;
793
+ for (const field of reasoningFields) {
794
+ if (
795
+ (choice.delta as any)[field] !== null &&
796
+ (choice.delta as any)[field] !== undefined &&
797
+ (choice.delta as any)[field].length > 0
798
+ ) {
799
+ if (!foundReasoningField) {
800
+ foundReasoningField = field;
801
+ break;
802
+ }
803
+ }
804
+ }
805
+
806
+ if (foundReasoningField) {
807
+ const delta = (choice.delta as any)[foundReasoningField];
808
+ appendThinkingDelta(delta, foundReasoningField);
809
+ }
810
+
811
+ if (choice?.delta?.tool_calls && choice.delta.tool_calls.length > 0) {
812
+ for (const toolCall of choice.delta.tool_calls) {
813
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
814
+ let block = streamIndex !== undefined ? toolCallBlockByIndex.get(streamIndex) : undefined;
815
+ if (!block && toolCall.id) {
816
+ block = pendingToolCallBlocks.find(candidate => candidate.id === toolCall.id);
817
+ }
818
+ if (
819
+ !block &&
820
+ currentBlock?.type === "toolCall" &&
821
+ (!toolCall.id || currentBlock.id === toolCall.id)
822
+ ) {
823
+ block = currentBlock;
824
+ }
825
+
826
+ if (!block) {
827
+ if (currentBlock?.type !== "toolCall") {
828
+ finishCurrentBlock(currentBlock);
829
+ }
830
+ block = {
831
+ type: "toolCall",
832
+ id: toolCall.id || "",
833
+ name: toolCall.function?.name || "",
834
+ arguments: {},
835
+ partialArgs: "",
836
+ streamIndex,
837
+ };
838
+ if (streamIndex !== undefined) toolCallBlockByIndex.set(streamIndex, block);
839
+ pendingToolCallBlocks.push(block);
840
+ currentBlock = block;
841
+ output.content.push(block);
842
+ stream.push({
843
+ type: "toolcall_start",
844
+ contentIndex: blockIndex(block),
845
+ partial: output,
846
+ });
847
+ } else {
848
+ currentBlock = block;
849
+ if (streamIndex !== undefined && block.streamIndex === undefined) {
850
+ block.streamIndex = streamIndex;
851
+ toolCallBlockByIndex.set(streamIndex, block);
852
+ }
853
+ }
854
+
855
+ if (toolCall.id) block.id = toolCall.id;
856
+ if (toolCall.function?.name) block.name = toolCall.function.name;
857
+ let delta = "";
858
+ // The OpenAI SDK types `function.arguments` as a JSON string, but MiniMax-compatible
859
+ // hosts stream a fully-formed object instead. Model both shapes so the branches below
860
+ // narrow honestly rather than widening through `unknown`.
861
+ const rawArgs = toolCall.function?.arguments as string | Record<string, unknown> | undefined;
862
+ if (typeof rawArgs === "string") {
863
+ if (rawArgs.length > 0) {
864
+ delta = rawArgs;
865
+ const prev = typeof block.partialArgs === "string" ? block.partialArgs : "";
866
+ block.partialArgs = prev + rawArgs;
867
+ const throttled = parseStreamingJsonThrottled(block.partialArgs, block.lastParseLen ?? 0);
868
+ if (throttled) {
869
+ block.arguments = throttled.value;
870
+ block.lastParseLen = throttled.parsedLen;
871
+ }
872
+ }
873
+ } else if (rawArgs && typeof rawArgs === "object" && !Array.isArray(rawArgs)) {
874
+ // MiniMax-compatible hosts stream `function.arguments` as a complete object in a
875
+ // single delta instead of the OpenAI JSON-string contract. Hold the object directly
876
+ // — no `[object Object]` round-trip through the string buffer — and serialize once for
877
+ // the wire delta that proxy servers forward verbatim as `input_json_delta`.
878
+ block.partialArgs = rawArgs;
879
+ block.arguments = rawArgs;
880
+ delta = JSON.stringify(rawArgs);
881
+ }
882
+ stream.push({
883
+ type: "toolcall_delta",
884
+ contentIndex: blockIndex(block),
885
+ delta,
886
+ partial: output,
887
+ });
888
+ }
889
+ }
890
+
891
+ const reasoningDetails = (choice.delta as any).reasoning_details;
892
+ if (reasoningDetails && Array.isArray(reasoningDetails)) {
893
+ for (const detail of reasoningDetails) {
894
+ if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
895
+ const matchingToolCall = output.content.find(
896
+ b => b.type === "toolCall" && b.id === detail.id,
897
+ ) as ToolCall | undefined;
898
+ if (matchingToolCall) {
899
+ matchingToolCall.thoughtSignature = JSON.stringify(detail);
900
+ }
901
+ }
902
+ }
903
+ }
904
+ }
905
+ }
906
+
907
+ if (streamMarkupHealing) {
908
+ for (const event of streamMarkupHealing.flushEvents()) {
909
+ emitHealingEvent(event);
910
+ }
911
+ flushHealedToolCalls();
912
+ if (healedToolCallEmitted && output.stopReason === "stop") {
913
+ // Hosts that leak tool-call templates often still report
914
+ // `finish_reason: stop` for the surrounding turn. Promote
915
+ // only that natural-completion finish — leave `error`,
916
+ // `length`, `aborted`, etc. untouched.
917
+ output.stopReason = "toolUse";
918
+ }
919
+ }
920
+
921
+ if (stripDeepseekChatTemplateTokens) {
922
+ flushDeepseekStripBuffer(true);
923
+ }
924
+
925
+ if (currentBlock?.type === "toolCall") {
926
+ finishPendingToolCallBlocks();
927
+ } else {
928
+ finishCurrentBlock(currentBlock);
929
+ finishPendingToolCallBlocks();
930
+ }
931
+
932
+ // Some OpenAI-compatible hosts stream structured `tool_calls` but report
933
+ // `finish_reason: "stop"` instead of `"tool_calls"`. In the OpenAI contract a
934
+ // tool call always means "execute and continue", so promote that
935
+ // natural-completion finish to `toolUse` whenever the turn produced tool-call
936
+ // blocks — the agent loop gates execution on the stop reason. `error`,
937
+ // `length`, and `aborted` are intentionally left untouched. (Anthropic's
938
+ // distinct `end_turn`-with-tool-calls "abandon" semantics live in its own
939
+ // provider and correctly keep `stop`.)
940
+ if (output.stopReason === "stop" && output.content.some(b => b.type === "toolCall")) {
941
+ output.stopReason = "toolUse";
942
+ }
943
+
944
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
945
+ if (firstEventTimeoutError) {
946
+ throw firstEventTimeoutError;
947
+ }
948
+ if (abortTracker.wasCallerAbort()) {
949
+ throw new Error("Request was aborted");
950
+ }
951
+
952
+ if (output.stopReason === "aborted") {
953
+ throw new Error("Request was aborted");
954
+ }
955
+ if (output.stopReason === "error") {
956
+ throw new Error(output.errorMessage || "Provider returned an error stop reason");
957
+ }
958
+
959
+ output.errorMessage = strictFallbackErrorMessage;
960
+ output.duration = Date.now() - startTime;
961
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
962
+ stream.push({ type: "done", reason: output.stopReason, message: output });
963
+ stream.end();
964
+ } catch (error) {
965
+ for (const block of output.content) delete (block as any).index;
966
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
967
+ output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
968
+ output.errorStatus = extractHttpStatusFromError(error) ?? getCapturedErrorResponse?.()?.status;
969
+ output.errorMessage =
970
+ firstEventTimeoutError?.message ??
971
+ (await finalizeErrorMessage(error, rawRequestDump, getCapturedErrorResponse?.()));
972
+ // Some providers via OpenRouter include extra details here.
973
+ const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
974
+ if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
975
+ output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
976
+ output.duration = Date.now() - startTime;
977
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
978
+ stream.push({ type: "error", reason: output.stopReason, error: output });
979
+ stream.end();
980
+ }
981
+ })();
982
+
983
+ return stream;
984
+ };
985
+
986
+ async function createClient(
987
+ model: Model<"openai-completions">,
988
+ context: Context,
989
+ apiKey?: string,
990
+ extraHeaders?: Record<string, string>,
991
+ initiatorOverride?: MessageAttribution,
992
+ onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
993
+ fetchOverride?: FetchImpl,
994
+ ): Promise<{
995
+ client: OpenAI;
996
+ copilotPremiumRequests: number | undefined;
997
+ baseUrl: string | undefined;
998
+ requestHeaders: Record<string, string>;
999
+ getCapturedErrorResponse: () => CapturedHttpErrorResponse | undefined;
1000
+ clearCapturedErrorResponse: () => void;
1001
+ }> {
1002
+ if (!apiKey) {
1003
+ if (!$env.OPENAI_API_KEY) {
1004
+ throw new Error(
1005
+ "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
1006
+ );
1007
+ }
1008
+ apiKey = $env.OPENAI_API_KEY;
1009
+ }
1010
+ const rawApiKey = apiKey;
1011
+
1012
+ let headers = { ...model.headers };
1013
+ if (model.provider === "openrouter") {
1014
+ // App attribution — opts the agent into OpenRouter's public rankings and per-app
1015
+ // analytics. `HTTP-Referer` is the unique app identifier; without it nothing is
1016
+ // tracked. `X-OpenRouter-Title` is the display name (`X-Title` is the legacy
1017
+ // alias kept for back-compat). `X-OpenRouter-Categories` slots us into the
1018
+ // `cli-agent` marketplace category. `User-Agent` overrides the default OpenAI
1019
+ // SDK UA so traffic is identifiable in upstream provider logs.
1020
+ // https://openrouter.ai/docs/app-attribution
1021
+ headers["User-Agent"] = `${APP_DISPLAY_NAME}/${packageJson.version}`;
1022
+ headers["HTTP-Referer"] = "https://prometheus.trivlab.com/";
1023
+ headers["X-OpenRouter-Title"] = APP_DISPLAY_NAME;
1024
+ headers["X-OpenRouter-Categories"] = "cli-agent";
1025
+ // Always-on response caching: identical requests return cached responses for free.
1026
+ // TTL 1h; first call hits the provider, every identical call within the window
1027
+ // replays from OpenRouter's edge cache. https://openrouter.ai/docs/features/response-caching
1028
+ headers["X-OpenRouter-Cache"] = "true";
1029
+ headers["X-OpenRouter-Cache-TTL"] = "3600";
1030
+ }
1031
+ Object.assign(headers, extraHeaders);
1032
+ if (model.provider === "kimi-code") {
1033
+ headers = { ...getKimiCommonHeaders(), ...headers };
1034
+ }
1035
+ let copilotPremiumRequests: number | undefined;
1036
+
1037
+ let baseUrl = model.baseUrl;
1038
+ if (model.provider === "github-copilot") {
1039
+ apiKey = parseGitHubCopilotApiKey(rawApiKey).accessToken;
1040
+ const hasImages = hasCopilotVisionInput(context.messages);
1041
+ const copilot = buildCopilotDynamicHeaders({
1042
+ messages: context.messages,
1043
+ hasImages,
1044
+ premiumMultiplier: model.premiumMultiplier,
1045
+ headers,
1046
+ initiatorOverride,
1047
+ });
1048
+ Object.assign(headers, copilot.headers);
1049
+ copilotPremiumRequests = copilot.premiumRequests;
1050
+ baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
1051
+ }
1052
+ // Azure OpenAI requires /deployments/{id}/chat/completions?api-version=YYYY-MM-DD.
1053
+ // The generic openai-completions path adds neither, producing silent 404s.
1054
+ let azureDefaultQuery: Record<string, string> | undefined;
1055
+ if (baseUrl?.includes(".openai.azure.com")) {
1056
+ const apiVersion = $env.AZURE_OPENAI_API_VERSION || "2024-10-21";
1057
+ if (!baseUrl.includes("/deployments/")) {
1058
+ baseUrl = `${baseUrl}/deployments/${model.id}`;
1059
+ }
1060
+ azureDefaultQuery = { "api-version": apiVersion };
1061
+ }
1062
+ let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
1063
+ const baseFetch = fetchOverride ?? fetch;
1064
+ const wrappedFetch = Object.assign(
1065
+ async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
1066
+ const response = await baseFetch(input, init);
1067
+ if (response.ok) {
1068
+ capturedErrorResponse = undefined;
1069
+ return response;
1070
+ }
1071
+ let bodyText: string | undefined;
1072
+ let bodyJson: unknown;
1073
+ try {
1074
+ bodyText = await response.clone().text();
1075
+ if (bodyText.trim().length > 0) {
1076
+ try {
1077
+ bodyJson = JSON.parse(bodyText);
1078
+ } catch {}
1079
+ }
1080
+ } catch {}
1081
+ capturedErrorResponse = {
1082
+ status: response.status,
1083
+ headers: response.headers,
1084
+ bodyText,
1085
+ bodyJson,
1086
+ };
1087
+ return response;
1088
+ },
1089
+ baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
1090
+ );
1091
+ const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
1092
+ return {
1093
+ client: new OpenAI({
1094
+ apiKey,
1095
+ baseURL: baseUrl,
1096
+ dangerouslyAllowBrowser: true,
1097
+ maxRetries: 5,
1098
+ defaultHeaders: headers,
1099
+ defaultQuery: azureDefaultQuery,
1100
+ fetch: debugFetch,
1101
+ }),
1102
+ copilotPremiumRequests,
1103
+ baseUrl,
1104
+ requestHeaders: headers,
1105
+ getCapturedErrorResponse: () => capturedErrorResponse,
1106
+ clearCapturedErrorResponse: () => {
1107
+ capturedErrorResponse = undefined;
1108
+ },
1109
+ };
1110
+ }
1111
+
1112
+ function buildParams(
1113
+ model: Model<"openai-completions">,
1114
+ context: Context,
1115
+ options: OpenAICompletionsOptions | undefined,
1116
+ resolvedBaseUrl?: string,
1117
+ toolStrictModeOverride?: ToolStrictModeOverride,
1118
+ ): { params: OpenAICompletionsParams; toolStrictMode: AppliedToolStrictMode } {
1119
+ const compat = getCompat(model, resolvedBaseUrl);
1120
+ // Opencode Zen's gateway (https://opencode.ai/zen/go/v1) gates
1121
+ // `reasoning_content` on the request's thinking state for every model it
1122
+ // fronts (Kimi K2.x, DeepSeek V4, GLM-5.x, Qwen3.x, MiMo, MiniMax, …): it
1123
+ // 400s with `Extra inputs are not permitted` when thinking is off but the
1124
+ // field is supplied (#1071), and 400s with `thinking is enabled but
1125
+ // reasoning_content is missing in assistant tool call message at index N`
1126
+ // (#1484) when thinking is on and the field is absent. `detectOpenAICompat`
1127
+ // only set `requiresReasoningContentForToolCalls` for the DeepSeek family
1128
+ // (and previously for Kimi until #1071 carved out opencode); reactivate it
1129
+ // per request for every opencode model whenever this turn is in thinking
1130
+ // mode so prior tool-call turns replay reasoning_content. Forced-tool
1131
+ // turns are excluded because the later `disableReasoningOnForcedToolChoice`
1132
+ // guard at the bottom of `buildParams` strips thinking from the wire body
1133
+ // for Kimi-style models — keeping the replay on under those conditions
1134
+ // would resurrect the #1071 failure.
1135
+ //
1136
+ // `allowsSyntheticReasoningContentForToolCalls` is forced to `false` on
1137
+ // the same path: the gateway specifically requires `reasoning_content`,
1138
+ // and the default synthetic-friendly behavior would echo whichever field
1139
+ // the upstream streamed (e.g. `reasoning` for many opencode turns),
1140
+ // landing the replay in the wrong key and re-triggering the 400.
1141
+ const isOpenCodeProvider = model.provider === "opencode-go" || model.provider === "opencode-zen";
1142
+ const thinkingEnabledForRequest =
1143
+ Boolean(options?.reasoning) && !options?.disableReasoning && Boolean(model.reasoning);
1144
+ const forcedToolChoiceSuppressesThinking =
1145
+ compat.disableReasoningOnForcedToolChoice &&
1146
+ isForcedToolChoice(mapToOpenAICompletionsToolChoice(options?.toolChoice));
1147
+ if (isOpenCodeProvider && thinkingEnabledForRequest && !forcedToolChoiceSuppressesThinking) {
1148
+ compat.requiresReasoningContentForToolCalls = true;
1149
+ compat.allowsSyntheticReasoningContentForToolCalls = false;
1150
+ compat.reasoningContentField = "reasoning_content";
1151
+ }
1152
+ const isKimiModelId = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
1153
+ const messages = convertMessages(model, context, compat);
1154
+ maybeAddAnthropicCacheControl(compat, messages);
1155
+ const supportsReasoningParams = model.provider !== "github-copilot";
1156
+
1157
+ // Kimi (including via OpenRouter and Fireworks router-form IDs such as
1158
+ // `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
1159
+ // max_tokens, not actual output. The official Kimi K2 model guidance
1160
+ // (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
1161
+ // every call since the family can otherwise emit very long reasoning traces
1162
+ // before the final answer. Always send max_tokens — match the same
1163
+ // Kimi-family regex used by the compat detector.
1164
+ // Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
1165
+ const effectiveMaxTokens = options?.maxTokens ?? (isKimiModelId ? model.maxTokens : undefined);
1166
+
1167
+ const requestModelId = resolveOpenAICompletionsModelId(model, options);
1168
+ const params: OpenAICompletionsParams = {
1169
+ model: requestModelId,
1170
+ messages,
1171
+ stream: true,
1172
+ };
1173
+ let toolStrictMode: AppliedToolStrictMode = "none";
1174
+
1175
+ if (compat.supportsUsageInStreaming !== false) {
1176
+ params.stream_options = { include_usage: true };
1177
+ }
1178
+
1179
+ if (compat.supportsStore) {
1180
+ params.store = false;
1181
+ }
1182
+
1183
+ if (effectiveMaxTokens && !model.omitMaxOutputTokens) {
1184
+ if (compat.maxTokensField === "max_tokens") {
1185
+ params.max_tokens = effectiveMaxTokens;
1186
+ } else {
1187
+ params.max_completion_tokens = effectiveMaxTokens;
1188
+ }
1189
+ }
1190
+
1191
+ if (options?.temperature !== undefined) {
1192
+ params.temperature = options.temperature;
1193
+ }
1194
+ if (options?.topP !== undefined) {
1195
+ params.top_p = options.topP;
1196
+ }
1197
+ if (options?.topK !== undefined) {
1198
+ params.top_k = options.topK;
1199
+ }
1200
+ if (options?.minP !== undefined) {
1201
+ params.min_p = options.minP;
1202
+ }
1203
+ if (options?.presencePenalty !== undefined) {
1204
+ params.presence_penalty = options.presencePenalty;
1205
+ }
1206
+ if (options?.repetitionPenalty !== undefined) {
1207
+ params.repetition_penalty = options.repetitionPenalty;
1208
+ }
1209
+ if (options?.stopSequences?.length) {
1210
+ const seqs = options.stopSequences;
1211
+ params.stop = seqs.length === 1 ? seqs[0] : seqs.slice(0, 4);
1212
+ }
1213
+ if (options?.frequencyPenalty !== undefined) {
1214
+ params.frequency_penalty = options.frequencyPenalty;
1215
+ }
1216
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
1217
+ const resolved = resolveServiceTier(options?.serviceTier, model.provider);
1218
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
1219
+ params.service_tier = resolved;
1220
+ }
1221
+ }
1222
+
1223
+ if (context.tools?.length) {
1224
+ const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
1225
+ params.tools = builtTools.tools;
1226
+ toolStrictMode = builtTools.toolStrictMode;
1227
+ } else if (context.tools === undefined && hasToolHistory(context.messages)) {
1228
+ // Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
1229
+ // contains tool_calls/tool_results, even when no tools are offered this turn.
1230
+ // Only inject the sentinel when the caller passed `context.tools = undefined`
1231
+ // (i.e. tools were not specified at all). An explicit `context.tools = []` means
1232
+ // the caller opted out of tools for this turn (as /btw and IRC background replies
1233
+ // do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
1234
+ // so LiteLLM → Bedrock never sees an empty `toolConfig` block.
1235
+ params.tools = [];
1236
+ }
1237
+
1238
+ if (options?.toolChoice && compat.supportsToolChoice) {
1239
+ params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
1240
+ }
1241
+
1242
+ if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
1243
+ // `tool_choice: "none"` with no tools to gate is redundant and also
1244
+ // trips LiteLLM → Bedrock: the proxy serializes the directive into a
1245
+ // `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
1246
+ // non-empty whenever the conversation already holds `toolUse`/`toolResult`
1247
+ // content. Drop it whenever the resolved tools list is missing or empty.
1248
+ // Side-channel turns hit this: `/btw` and IRC background replies route
1249
+ // through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
1250
+ // and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
1251
+ delete params.tool_choice;
1252
+ }
1253
+
1254
+ if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
1255
+ // Z.ai uses binary thinking: { type: "enabled" | "disabled" }
1256
+ // Must explicitly disable since z.ai defaults to thinking enabled.
1257
+ const enabled = options?.reasoning && !options?.disableReasoning;
1258
+ params.thinking = { type: enabled ? "enabled" : "disabled" };
1259
+ if (enabled && compat.thinkingKeep) {
1260
+ params.thinking.keep = compat.thinkingKeep;
1261
+ }
1262
+ } else if (supportsReasoningParams && compat.thinkingFormat === "qwen" && model.reasoning) {
1263
+ // Qwen uses top-level enable_thinking: boolean
1264
+ params.enable_thinking = !!options?.reasoning && !options?.disableReasoning;
1265
+ } else if (supportsReasoningParams && compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
1266
+ params.chat_template_kwargs = {
1267
+ enable_thinking: !!options?.reasoning && !options?.disableReasoning,
1268
+ };
1269
+ } else if (supportsReasoningParams && compat.thinkingFormat === "openrouter" && model.reasoning) {
1270
+ // OpenRouter normalizes reasoning across providers via a nested reasoning object.
1271
+ // Without an explicit signal, OpenRouter defaults reasoning models to thinking, which
1272
+ // silently consumes the entire output budget on small `max_tokens` requests (e.g.
1273
+ // title generation). Honor `disableReasoning` to opt out cleanly.
1274
+ const openRouterParams = params as typeof params & {
1275
+ reasoning?: { effort?: string } | { enabled: false };
1276
+ };
1277
+ if (options?.disableReasoning) {
1278
+ openRouterParams.reasoning = { enabled: false };
1279
+ } else if (options?.reasoning) {
1280
+ openRouterParams.reasoning = {
1281
+ effort: mapReasoningEffort(options.reasoning, compat.reasoningEffortMap),
1282
+ };
1283
+ }
1284
+ } else if (
1285
+ supportsReasoningParams &&
1286
+ options?.reasoning &&
1287
+ !options?.disableReasoning &&
1288
+ model.reasoning &&
1289
+ compat.supportsReasoningEffort
1290
+ ) {
1291
+ // OpenAI-style reasoning_effort
1292
+ params.reasoning_effort = mapReasoningEffort(options.reasoning, compat.reasoningEffortMap) as Effort;
1293
+ } else if (
1294
+ supportsReasoningParams &&
1295
+ options?.disableReasoning &&
1296
+ !options?.reasoning &&
1297
+ model.reasoning &&
1298
+ compat.supportsReasoningEffort
1299
+ ) {
1300
+ // Generic OpenAI-compatible effort endpoints do not expose a true off
1301
+ // switch. Use the model's lowest supported effort as the closest
1302
+ // transport-level approximation when callers request disabled reasoning.
1303
+ const minEffort = getSupportedEfforts(model)[0];
1304
+ if (minEffort === undefined) {
1305
+ throw new Error(`Model ${model.provider}/${model.id} has no supported reasoning efforts`);
1306
+ }
1307
+ params.reasoning_effort = mapReasoningEffort(minEffort, compat.reasoningEffortMap) as Effort;
1308
+ }
1309
+
1310
+ if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
1311
+ // DeepSeek reasoning models accept tools/tool_choice, but reject that
1312
+ // control field while thinking is enabled. Keep the tool-selection
1313
+ // contract and suppress reasoning for this single request.
1314
+ delete params.reasoning_effort;
1315
+ delete params.reasoning;
1316
+ }
1317
+
1318
+ if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
1319
+ // Backends like Kimi 400 with `tool_choice 'specified' is incompatible
1320
+ // with thinking enabled`. Suppress thinking for this single forced-tool
1321
+ // turn while keeping the tool-selection contract intact.
1322
+ delete params.reasoning_effort;
1323
+ delete params.reasoning;
1324
+ if (compat.thinkingFormat === "zai") {
1325
+ params.thinking = { type: "disabled" };
1326
+ }
1327
+ }
1328
+
1329
+ // OpenRouter provider routing preferences
1330
+ if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
1331
+ params.provider = compat.openRouterRouting;
1332
+ }
1333
+
1334
+ // Vercel AI Gateway provider routing preferences
1335
+ if (model.baseUrl.includes("ai-gateway.vercel.sh") && model.compat?.vercelGatewayRouting) {
1336
+ const routing = model.compat.vercelGatewayRouting;
1337
+ if (routing.only || routing.order) {
1338
+ const gatewayOptions: Record<string, string[]> = {};
1339
+ if (routing.only) gatewayOptions.only = routing.only;
1340
+ if (routing.order) gatewayOptions.order = routing.order;
1341
+ params.providerOptions = { gateway: gatewayOptions };
1342
+ }
1343
+ }
1344
+
1345
+ if (compat.extraBody) {
1346
+ Object.assign(params, compat.extraBody);
1347
+ }
1348
+
1349
+ return { params, toolStrictMode };
1350
+ }
1351
+
1352
+ function getOptionalNumberProperty(value: object, key: string): number | undefined {
1353
+ const property = Reflect.get(value, key);
1354
+ return typeof property === "number" ? property : undefined;
1355
+ }
1356
+
1357
+ function getOptionalObjectProperty(value: object, key: string): object | undefined {
1358
+ const property = Reflect.get(value, key);
1359
+ return typeof property === "object" && property !== null ? property : undefined;
1360
+ }
1361
+
1362
+ function getChoiceUsage(choice: ChatCompletionChunk.Choice): object | undefined {
1363
+ return getOptionalObjectProperty(choice, "usage");
1364
+ }
1365
+
1366
+ export function parseChunkUsage(
1367
+ rawUsage: object,
1368
+ model: Model<"openai-completions">,
1369
+ premiumRequests: number | undefined,
1370
+ ): AssistantMessage["usage"] {
1371
+ const promptTokenDetails = getOptionalObjectProperty(rawUsage, "prompt_tokens_details");
1372
+ const completionTokenDetails = getOptionalObjectProperty(rawUsage, "completion_tokens_details");
1373
+ const cachedTokens =
1374
+ getOptionalNumberProperty(rawUsage, "cached_tokens") ??
1375
+ getOptionalNumberProperty(rawUsage, "prompt_cache_hit_tokens") ??
1376
+ (promptTokenDetails ? getOptionalNumberProperty(promptTokenDetails, "cached_tokens") : undefined) ??
1377
+ 0;
1378
+ // OpenRouter exposes cache writes via `prompt_tokens_details.cache_write_tokens`
1379
+ // and INCLUDES them in `prompt_tokens` — they are billed on top of the input, so
1380
+ // we subtract them to get the real billed input.
1381
+ // DeepSeek exposes cache hit/miss via `prompt_cache_hit_tokens` /
1382
+ // `prompt_cache_miss_tokens` at the top level where `prompt_tokens` equals their
1383
+ // sum. The miss portion IS the billed input — we must NOT subtract it.
1384
+ // Ref: https://openrouter.ai/docs/guides/best-practices/prompt-caching
1385
+ // Ref: https://api-docs.deepseek.com/api/create-chat-completion
1386
+ //
1387
+ // Resolve cacheWrite from both possible sources separately.
1388
+ // They have different billing semantics: OpenRouter's cache_write is billed
1389
+ // on top of prompt_tokens, while DeepSeek's miss IS the billed input.
1390
+ const cacheWriteOpenRouter = promptTokenDetails
1391
+ ? getOptionalNumberProperty(promptTokenDetails, "cache_write_tokens")
1392
+ : undefined;
1393
+ const cacheWriteDeepSeek = getOptionalNumberProperty(rawUsage, "prompt_cache_miss_tokens");
1394
+ // Prefer OpenRouter's value for the input subtraction; fall back to DeepSeek.
1395
+ const cacheWriteTokens = cacheWriteOpenRouter ?? cacheWriteDeepSeek ?? 0;
1396
+
1397
+ const reasoningTokens =
1398
+ (completionTokenDetails ? getOptionalNumberProperty(completionTokenDetails, "reasoning_tokens") : undefined) ?? 0;
1399
+ const promptTokens = getOptionalNumberProperty(rawUsage, "prompt_tokens") ?? 0;
1400
+
1401
+ const isDeepSeekNative =
1402
+ getOptionalNumberProperty(rawUsage, "prompt_cache_hit_tokens") !== undefined && cacheWriteDeepSeek !== undefined;
1403
+ // Only use the DeepSeek input path when cacheWrite came from DeepSeek's
1404
+ // miss field, not from prompt_tokens_details. Avoids false positives when
1405
+ // DeepSeek models route through OpenRouter (which may pass through native
1406
+ // fields alongside its own cache_write_tokens).
1407
+ const isDeepSeekUsage = isDeepSeekNative && cacheWriteOpenRouter === undefined && cacheWriteDeepSeek > 0;
1408
+ const input = isDeepSeekUsage
1409
+ ? Math.max(0, promptTokens - cachedTokens)
1410
+ : Math.max(0, promptTokens - cachedTokens - cacheWriteTokens);
1411
+ // Per OpenAI's CompletionUsage spec, `reasoning_tokens` is a subset of
1412
+ // `completion_tokens` (which is the total billed output). Adding them would
1413
+ // double-count.
1414
+ const outputTokens = getOptionalNumberProperty(rawUsage, "completion_tokens") ?? 0;
1415
+ // DeepSeek only exposes cache hit/miss (no cache-write data).
1416
+ // Emitting miss tokens as cacheWrite would make downstream consumers
1417
+ // double-count them (input already equals miss for DeepSeek).
1418
+ const emittedCacheWrite = isDeepSeekUsage ? 0 : cacheWriteTokens;
1419
+ const usage: AssistantMessage["usage"] = {
1420
+ input,
1421
+ output: outputTokens,
1422
+ cacheRead: cachedTokens,
1423
+ cacheWrite: emittedCacheWrite,
1424
+ totalTokens: input + outputTokens + cachedTokens + emittedCacheWrite,
1425
+ ...(reasoningTokens > 0 ? { reasoningTokens } : {}),
1426
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
1427
+ ...(premiumRequests !== undefined ? { premiumRequests } : {}),
1428
+ };
1429
+ calculateCost(model, usage);
1430
+ return usage;
1431
+ }
1432
+
1433
+ function mapReasoningEffort(
1434
+ effort: NonNullable<OpenAICompletionsOptions["reasoning"]>,
1435
+ reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoning"]>, string>>,
1436
+ ): string {
1437
+ return reasoningEffortMap[effort] ?? effort;
1438
+ }
1439
+
1440
+ function maybeAddAnthropicCacheControl(compat: ResolvedOpenAICompat, messages: ChatCompletionMessageParam[]): void {
1441
+ if (compat.cacheControlFormat !== "anthropic") return;
1442
+ // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
1443
+ // on the last user/assistant message (walking backwards until we find text content).
1444
+ for (let i = messages.length - 1; i >= 0; i--) {
1445
+ const msg = messages[i];
1446
+ if (msg.role !== "user" && msg.role !== "assistant" && msg.role !== "developer") continue;
1447
+
1448
+ const content = msg.content;
1449
+ if (typeof content === "string") {
1450
+ msg.content = [
1451
+ Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
1452
+ ];
1453
+ return;
1454
+ }
1455
+
1456
+ if (!Array.isArray(content)) continue;
1457
+
1458
+ // Find last text part and add cache_control
1459
+ for (let j = content.length - 1; j >= 0; j--) {
1460
+ const part = content[j];
1461
+ if (part?.type === "text") {
1462
+ Object.assign(part, { cache_control: { type: "ephemeral" } });
1463
+ return;
1464
+ }
1465
+ }
1466
+ }
1467
+ }
1468
+
1469
+ export function convertMessages(
1470
+ model: Model<"openai-completions">,
1471
+ context: Context,
1472
+ compat: ResolvedOpenAICompat,
1473
+ ): ChatCompletionMessageParam[] {
1474
+ const params: ChatCompletionMessageParam[] = [];
1475
+
1476
+ const normalizeToolCallId = (id: string): string => {
1477
+ if (compat.requiresMistralToolIds) return normalizeMistralToolId(id, true);
1478
+
1479
+ // Handle pipe-separated IDs from OpenAI Responses API
1480
+ // Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)
1481
+ // These come from providers like github-copilot, openai-codex, opencode
1482
+ // Extract just the call_id part and normalize it
1483
+ if (id.includes("|")) {
1484
+ const [callId] = id.split("|");
1485
+ // Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
1486
+ return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
1487
+ }
1488
+
1489
+ if (model.provider === "openai") return id.length > 40 ? id.slice(0, 40) : id;
1490
+ return id;
1491
+ };
1492
+ const transformedMessages = transformMessages(context.messages, model, id => normalizeToolCallId(id));
1493
+
1494
+ const remappedToolCallIds = new Map<string, string[]>();
1495
+ let generatedToolCallIdCounter = 0;
1496
+
1497
+ const generateFallbackToolCallId = (seed: string): string => {
1498
+ generatedToolCallIdCounter += 1;
1499
+ const hash = Bun.hash(`${model.provider}:${model.id}:${seed}:${generatedToolCallIdCounter}`).toString(36);
1500
+ return `call_${hash}`;
1501
+ };
1502
+
1503
+ const rememberToolCallId = (originalId: string, normalizedId: string): void => {
1504
+ const queue = remappedToolCallIds.get(originalId);
1505
+ if (queue) {
1506
+ queue.push(normalizedId);
1507
+ return;
1508
+ }
1509
+ remappedToolCallIds.set(originalId, [normalizedId]);
1510
+ };
1511
+
1512
+ const consumeToolCallId = (originalId: string): string | null => {
1513
+ const queue = remappedToolCallIds.get(originalId);
1514
+ if (!queue || queue.length === 0) return null;
1515
+ const nextId = queue.shift() ?? null;
1516
+ if (queue.length === 0) remappedToolCallIds.delete(originalId);
1517
+ return nextId;
1518
+ };
1519
+
1520
+ const ensureToolCallId = (rawId: string, seed: string): string => {
1521
+ const normalized = normalizeToolCallId(rawId);
1522
+ if (normalized.trim().length > 0) return normalized;
1523
+ return generateFallbackToolCallId(seed);
1524
+ };
1525
+
1526
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
1527
+ if (systemPrompts.length > 0) {
1528
+ const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
1529
+ const role = useDeveloperRole ? "developer" : "system";
1530
+ // Default to one block per ordered system prompt so the leading prefix
1531
+ // stays byte-identical between turns and the provider's KV cache can
1532
+ // reuse it. Hosts whose chat templates reject follow-up system messages
1533
+ // (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
1534
+ // via `compat.supportsMultipleSystemMessages = false`; in that mode we
1535
+ // coalesce into a single message joined by `\n\n`.
1536
+ if (compat.supportsMultipleSystemMessages) {
1537
+ for (const systemPrompt of systemPrompts) {
1538
+ params.push({ role, content: systemPrompt });
1539
+ }
1540
+ } else {
1541
+ params.push({ role, content: systemPrompts.join("\n\n") });
1542
+ }
1543
+ }
1544
+
1545
+ let lastRole: string | null = null;
1546
+
1547
+ for (let i = 0; i < transformedMessages.length; i++) {
1548
+ const msg = transformedMessages[i];
1549
+ // Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
1550
+ // Insert a synthetic assistant message to bridge the gap
1551
+ if (
1552
+ compat.requiresAssistantAfterToolResult &&
1553
+ lastRole === "toolResult" &&
1554
+ (msg.role === "user" || msg.role === "developer")
1555
+ ) {
1556
+ params.push({
1557
+ role: "assistant",
1558
+ content: "I have processed the tool results.",
1559
+ });
1560
+ }
1561
+
1562
+ const devAsUser = !compat.supportsDeveloperRole;
1563
+ if (msg.role === "user" || msg.role === "developer") {
1564
+ const role = !devAsUser && msg.role === "developer" ? "developer" : "user";
1565
+ if (typeof msg.content === "string") {
1566
+ const text = msg.content.toWellFormed();
1567
+ if (text.trim().length === 0) continue;
1568
+ params.push({
1569
+ role: role,
1570
+ content: text,
1571
+ });
1572
+ } else {
1573
+ const supportsImages = model.input.includes("image") && !isDashscopeCompatibleModeTextOnlyQwen(model);
1574
+ const content: ChatCompletionContentPart[] = [];
1575
+ let omittedImages = false;
1576
+ for (const item of msg.content) {
1577
+ if (item.type === "text") {
1578
+ const text = item.text.toWellFormed();
1579
+ if (text.trim().length === 0) continue;
1580
+ content.push({
1581
+ type: "text",
1582
+ text,
1583
+ } satisfies ChatCompletionContentPartText);
1584
+ } else if (supportsImages) {
1585
+ content.push({
1586
+ type: "image_url",
1587
+ image_url: {
1588
+ url: `data:${item.mimeType};base64,${item.data}`,
1589
+ },
1590
+ } satisfies ChatCompletionContentPartImage);
1591
+ } else {
1592
+ omittedImages = true;
1593
+ }
1594
+ }
1595
+ if (omittedImages) {
1596
+ content.push({
1597
+ type: "text",
1598
+ text: NON_VISION_IMAGE_PLACEHOLDER,
1599
+ } satisfies ChatCompletionContentPartText);
1600
+ }
1601
+ if (content.length === 0) continue;
1602
+ params.push({
1603
+ role: "user",
1604
+ content,
1605
+ });
1606
+ }
1607
+ } else if (msg.role === "assistant") {
1608
+ const assistantMsg: ChatCompletionAssistantMessageParam = {
1609
+ role: "assistant",
1610
+ content: null,
1611
+ };
1612
+
1613
+ const textBlocks = msg.content.filter(b => b.type === "text") as TextContent[];
1614
+ // Filter out empty text blocks to avoid API validation errors
1615
+ const nonEmptyTextBlocks = textBlocks.filter(b => b.text && b.text.trim().length > 0);
1616
+ if (nonEmptyTextBlocks.length > 0) {
1617
+ // Always send assistant content as a plain string. Some OpenAI-compatible
1618
+ // backends mirror array-of-text-block payloads back to the model literally,
1619
+ // causing recursive nested content in subsequent turns.
1620
+ assistantMsg.content = nonEmptyTextBlocks.map(b => b.text.toWellFormed()).join("");
1621
+ }
1622
+
1623
+ // Handle thinking blocks
1624
+ const thinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
1625
+ // Filter out empty thinking blocks to avoid API validation errors
1626
+ const nonEmptyThinkingBlocks = thinkingBlocks.filter(b => b.thinking && b.thinking.trim().length > 0);
1627
+ if (nonEmptyThinkingBlocks.length > 0) {
1628
+ if (compat.requiresThinkingAsText) {
1629
+ // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
1630
+ const thinkingText = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n\n");
1631
+ const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
1632
+ if (textContent) {
1633
+ textContent.unshift({ type: "text", text: thinkingText });
1634
+ } else {
1635
+ assistantMsg.content = [{ type: "text", text: thinkingText }];
1636
+ }
1637
+ } else if (compat.requiresReasoningContentForToolCalls) {
1638
+ // Use the streamed signature when the backend accepts whichever
1639
+ // recognized field name was emitted (allowsSynthetic=true). Backends
1640
+ // like opencode-kimi-with-thinking and DeepSeek demand the exact
1641
+ // configured `reasoningContentField` instead, so honor that here
1642
+ // rather than echoing the upstream field name.
1643
+ const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
1644
+ const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
1645
+ const wireField =
1646
+ compat.allowsSyntheticReasoningContentForToolCalls &&
1647
+ signature &&
1648
+ recognizedFields.includes(signature)
1649
+ ? signature
1650
+ : signature && recognizedFields.includes(signature)
1651
+ ? (compat.reasoningContentField ?? "reasoning_content")
1652
+ : undefined;
1653
+ if (wireField) {
1654
+ (assistantMsg as any)[wireField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
1655
+ }
1656
+ }
1657
+ }
1658
+
1659
+ if (compat.requiresReasoningContentForToolCalls) {
1660
+ const streamedReasoningField = nonEmptyThinkingBlocks[0]?.thinkingSignature;
1661
+ const reasoningField =
1662
+ compat.allowsSyntheticReasoningContentForToolCalls &&
1663
+ (streamedReasoningField === "reasoning_content" ||
1664
+ streamedReasoningField === "reasoning" ||
1665
+ streamedReasoningField === "reasoning_text")
1666
+ ? streamedReasoningField
1667
+ : (compat.reasoningContentField ?? "reasoning_content");
1668
+ const reasoningContent = (assistantMsg as any)[reasoningField];
1669
+ if (!reasoningContent) {
1670
+ const reasoning = (assistantMsg as any).reasoning;
1671
+ const reasoningText = (assistantMsg as any).reasoning_text;
1672
+ if (reasoning && reasoningField !== "reasoning") {
1673
+ (assistantMsg as any)[reasoningField] = reasoning;
1674
+ } else if (reasoningText && reasoningField !== "reasoning_text") {
1675
+ (assistantMsg as any)[reasoningField] = reasoningText;
1676
+ } else if (nonEmptyThinkingBlocks.length > 0) {
1677
+ (assistantMsg as any)[reasoningField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
1678
+ }
1679
+ }
1680
+ }
1681
+
1682
+ const toolCalls = msg.content.filter(b => b.type === "toolCall") as ToolCall[];
1683
+ // Replay reasoning_content on assistant turns for backends that validate
1684
+ // thinking-mode history. DeepSeek V4 requires reasoning_content on EVERY
1685
+ // assistant turn once any prior turn included it — not just tool-call turns.
1686
+ // The replay logic has three tiers:
1687
+ // 1. Recover from thinking blocks with valid signatures (covers same-model replay
1688
+ // where nonEmptyThinkingBlocks may have filtered out empty-text blocks)
1689
+ // 2. For providers that require the field but returned no reasoning at all
1690
+ // (e.g. proxy-stripped reasoning_content), emit an empty string
1691
+ // 3. For providers that accept synthetic placeholders (Kimi, OpenRouter), emit "."
1692
+ // DeepSeek V4 rejects synthetic "." placeholders — it validates the exact value —
1693
+ // so the allowsSyntheticReasoningContentForToolCalls flag controls tier 3.
1694
+ const canUseSyntheticReasoningContent =
1695
+ compat.requiresReasoningContentForToolCalls &&
1696
+ compat.allowsSyntheticReasoningContentForToolCalls &&
1697
+ (compat.thinkingFormat === "openai" ||
1698
+ compat.thinkingFormat === "openrouter" ||
1699
+ compat.thinkingFormat === "zai");
1700
+ // DeepSeek-compatible reasoning models require reasoning_content on all
1701
+ // assistant turns. Providers that allow placeholders only need it on
1702
+ // tool-call turns.
1703
+ const needsReasoningOnAllTurns =
1704
+ compat.requiresReasoningContentForToolCalls && !compat.allowsSyntheticReasoningContentForToolCalls;
1705
+ const needsReasoningField = needsReasoningOnAllTurns || toolCalls.length > 0;
1706
+ let hasReasoningField =
1707
+ (assistantMsg as any).reasoning_content !== undefined ||
1708
+ (assistantMsg as any).reasoning !== undefined ||
1709
+ (assistantMsg as any).reasoning_text !== undefined;
1710
+ // Tier 1: Recover reasoning_content from ALL thinking blocks (including empty-text
1711
+ // ones) when the provider requires exact replay and rejects synthetic placeholders.
1712
+ // This covers the case where thinking blocks have valid signatures but were excluded
1713
+ // by the nonEmptyThinkingBlocks filter above, or where thinking text is empty but
1714
+ // the signature identifies the correct field name for replay.
1715
+ // Only recognized OpenAI-compat reasoning field names qualify — opaque signatures
1716
+ // from other providers (Anthropic encrypted, OpenAI Responses JSON, etc.) are not
1717
+ // valid property names for the wire message.
1718
+ if (
1719
+ needsReasoningField &&
1720
+ !hasReasoningField &&
1721
+ compat.requiresReasoningContentForToolCalls &&
1722
+ !compat.allowsSyntheticReasoningContentForToolCalls
1723
+ ) {
1724
+ const allThinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
1725
+ if (allThinkingBlocks.length > 0) {
1726
+ const signature = allThinkingBlocks[0].thinkingSignature;
1727
+ const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
1728
+ if (signature && recognizedFields.includes(signature)) {
1729
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1730
+ (assistantMsg as any)[reasoningField] = allThinkingBlocks.map(b => b.thinking).join("\n");
1731
+ hasReasoningField = true;
1732
+ }
1733
+ }
1734
+ }
1735
+ // Tier 2: When the provider requires reasoning_content but there are genuinely no
1736
+ // thinking blocks at all (e.g. proxy stripped reasoning_content from the response),
1737
+ // emit an empty string. The field must be present; an empty string is the most honest
1738
+ // representation of "no reasoning was captured."
1739
+ if (
1740
+ needsReasoningField &&
1741
+ !hasReasoningField &&
1742
+ compat.requiresReasoningContentForToolCalls &&
1743
+ !compat.allowsSyntheticReasoningContentForToolCalls
1744
+ ) {
1745
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1746
+ (assistantMsg as any)[reasoningField] = "";
1747
+ hasReasoningField = true;
1748
+ }
1749
+ // Tier 3: For providers that accept synthetic placeholders (Kimi, OpenRouter).
1750
+ if (toolCalls.length > 0 && canUseSyntheticReasoningContent && !hasReasoningField) {
1751
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1752
+ (assistantMsg as any)[reasoningField] = ".";
1753
+ hasReasoningField = true;
1754
+ }
1755
+ if (toolCalls.length > 0) {
1756
+ assistantMsg.tool_calls = toolCalls.map((tc, toolCallIndex) => {
1757
+ const toolCallId = ensureToolCallId(tc.id, `${i}:${toolCallIndex}:${tc.name}`);
1758
+ rememberToolCallId(tc.id, toolCallId);
1759
+ return {
1760
+ id: normalizeMistralToolId(toolCallId, compat.requiresMistralToolIds),
1761
+ type: "function" as const,
1762
+ function: {
1763
+ name: tc.name,
1764
+ arguments: serializeToolArguments(tc.arguments),
1765
+ },
1766
+ };
1767
+ });
1768
+ const reasoningDetails = toolCalls
1769
+ .filter(tc => tc.thoughtSignature)
1770
+ .map(tc => {
1771
+ try {
1772
+ return JSON.parse(tc.thoughtSignature!);
1773
+ } catch {
1774
+ return null;
1775
+ }
1776
+ })
1777
+ .filter(Boolean);
1778
+ if (reasoningDetails.length > 0) {
1779
+ (assistantMsg as any).reasoning_details = reasoningDetails;
1780
+ }
1781
+ }
1782
+ // Some OpenAI-compatible backends concatenate assistant content as a
1783
+ // string even for tool-call replay. OpenAI accepts an empty string here;
1784
+ // null trips strict/proxy implementations before the tool result is read.
1785
+ if (assistantMsg.content === null && (hasReasoningField || assistantMsg.tool_calls)) {
1786
+ assistantMsg.content = "";
1787
+ }
1788
+ // Skip assistant messages that have no content, no tool calls, and no reasoning payload.
1789
+ // Some OpenAI-compatible backends require replaying reasoning-only assistant turns
1790
+ // so follow-up requests preserve the provider-specific reasoning field name.
1791
+ const content = assistantMsg.content;
1792
+ const hasContent =
1793
+ content !== null &&
1794
+ content !== undefined &&
1795
+ (typeof content === "string" ? content.length > 0 : content.length > 0);
1796
+ if (!hasContent && assistantMsg.tool_calls && compat.requiresAssistantContentForToolCalls) {
1797
+ assistantMsg.content = ".";
1798
+ }
1799
+ if (!hasContent && !assistantMsg.tool_calls && !hasReasoningField) {
1800
+ continue;
1801
+ }
1802
+ params.push(assistantMsg);
1803
+ } else if (msg.role === "toolResult") {
1804
+ // Batch consecutive tool results and collect all images
1805
+ const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
1806
+ let j = i;
1807
+
1808
+ for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
1809
+ const toolMsg = transformedMessages[j] as ToolResultMessage;
1810
+
1811
+ // Extract text and image content
1812
+ const textResult = toolMsg.content
1813
+ .filter(c => c.type === "text")
1814
+ .map(c => (c as TextContent).text)
1815
+ .join("\n");
1816
+ const supportsImages = model.input.includes("image") && !isDashscopeCompatibleModeTextOnlyQwen(model);
1817
+ const hasImages = toolMsg.content.some(c => c.type === "image");
1818
+ const omittedImages = hasImages && !supportsImages;
1819
+
1820
+ // Always send tool result with text (or placeholder if only images)
1821
+ const hasText = textResult.length > 0;
1822
+ const remappedToolCallId = consumeToolCallId(toolMsg.toolCallId);
1823
+ const resolvedToolCallId =
1824
+ remappedToolCallId ?? ensureToolCallId(toolMsg.toolCallId, `${j}:${toolMsg.toolName ?? "tool"}`);
1825
+ const toolResultContent = omittedImages
1826
+ ? joinTextWithImagePlaceholder(textResult, true)
1827
+ : hasText
1828
+ ? textResult
1829
+ : hasImages
1830
+ ? "(see attached image)"
1831
+ : "";
1832
+ const toolResultMsg: ChatCompletionToolMessageParam = {
1833
+ role: "tool",
1834
+ content: toolResultContent.toWellFormed(),
1835
+ tool_call_id: normalizeMistralToolId(resolvedToolCallId, compat.requiresMistralToolIds),
1836
+ };
1837
+ if (compat.requiresToolResultName && toolMsg.toolName) {
1838
+ (toolResultMsg as any).name = toolMsg.toolName;
1839
+ }
1840
+ params.push(toolResultMsg);
1841
+
1842
+ if (hasImages && supportsImages) {
1843
+ for (const block of toolMsg.content) {
1844
+ if (block.type === "image") {
1845
+ imageBlocks.push({
1846
+ type: "image_url",
1847
+ image_url: {
1848
+ url: `data:${block.mimeType};base64,${block.data}`,
1849
+ },
1850
+ });
1851
+ }
1852
+ }
1853
+ }
1854
+ }
1855
+
1856
+ i = j - 1;
1857
+
1858
+ // After all consecutive tool results, add a single user message with all images
1859
+ if (imageBlocks.length > 0) {
1860
+ if (compat.requiresAssistantAfterToolResult) {
1861
+ params.push({
1862
+ role: "assistant",
1863
+ content: "I have processed the tool results.",
1864
+ });
1865
+ }
1866
+
1867
+ params.push({
1868
+ role: "user",
1869
+ content: [
1870
+ {
1871
+ type: "text",
1872
+ text: "Attached image(s) from tool result:",
1873
+ },
1874
+ ...imageBlocks,
1875
+ ],
1876
+ });
1877
+ lastRole = "user";
1878
+ } else {
1879
+ lastRole = "toolResult";
1880
+ }
1881
+ continue;
1882
+ }
1883
+
1884
+ lastRole =
1885
+ msg.role === "developer"
1886
+ ? model.reasoning && compat.supportsDeveloperRole
1887
+ ? "developer"
1888
+ : "system"
1889
+ : msg.role;
1890
+ }
1891
+
1892
+ return params;
1893
+ }
1894
+
1895
+ function convertTools(
1896
+ tools: Tool[],
1897
+ compat: ResolvedOpenAICompat,
1898
+ toolStrictModeOverride?: ToolStrictModeOverride,
1899
+ ): BuiltOpenAICompletionTools {
1900
+ const adaptedTools = tools.map(tool => {
1901
+ const strict = !NO_STRICT && compat.supportsStrictMode !== false && tool.strict !== false;
1902
+ const baseParameters = toolWireSchema(tool);
1903
+ const adapted = adaptSchemaForStrict(baseParameters, strict);
1904
+ return {
1905
+ tool,
1906
+ baseParameters,
1907
+ parameters: adapted.schema,
1908
+ strict: adapted.strict,
1909
+ };
1910
+ });
1911
+
1912
+ const requestedStrictMode = toolStrictModeOverride ?? compat.toolStrictMode;
1913
+ const toolStrictMode =
1914
+ requestedStrictMode === "none"
1915
+ ? "none"
1916
+ : requestedStrictMode === "all_strict"
1917
+ ? adaptedTools.every(tool => tool.strict)
1918
+ ? "all_strict"
1919
+ : "none"
1920
+ : "mixed";
1921
+
1922
+ return {
1923
+ tools: adaptedTools.map(({ tool, baseParameters, parameters, strict }) => {
1924
+ const includeStrict = toolStrictMode === "all_strict" || (toolStrictMode === "mixed" && strict);
1925
+ return {
1926
+ type: "function",
1927
+ function: {
1928
+ name: tool.name,
1929
+ description: tool.description || "",
1930
+ parameters: includeStrict ? parameters : baseParameters,
1931
+ // Only include strict if provider supports it. Some reject unknown fields.
1932
+ ...(includeStrict && { strict: true }),
1933
+ },
1934
+ };
1935
+ }),
1936
+ toolStrictMode,
1937
+ };
1938
+ }
1939
+
1940
+ function shouldRetryWithoutStrictTools(
1941
+ error: unknown,
1942
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
1943
+ toolStrictMode: AppliedToolStrictMode,
1944
+ tools: Tool[] | undefined,
1945
+ ): boolean {
1946
+ if (!tools || tools.length === 0 || toolStrictMode !== "all_strict") {
1947
+ return false;
1948
+ }
1949
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
1950
+ if (status !== 400 && status !== 422) {
1951
+ return false;
1952
+ }
1953
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
1954
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
1955
+ .join("\n");
1956
+ return /wrong_api_format|mixed values for 'strict'|tool[s]?\b.*strict|\bstrict\b.*tool/i.test(messageParts);
1957
+ }
1958
+
1959
+ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
1960
+ stopReason: StopReason;
1961
+ errorMessage?: string;
1962
+ } {
1963
+ if (reason === null) return { stopReason: "stop" };
1964
+ switch (reason) {
1965
+ case "stop":
1966
+ case "end":
1967
+ return { stopReason: "stop" };
1968
+ case "length":
1969
+ return { stopReason: "length" };
1970
+ case "function_call":
1971
+ case "tool_calls":
1972
+ return { stopReason: "toolUse" };
1973
+ case "content_filter":
1974
+ return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
1975
+ case "network_error":
1976
+ return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
1977
+ default:
1978
+ return {
1979
+ stopReason: "error",
1980
+ errorMessage: `Provider finish_reason: ${reason}`,
1981
+ };
1982
+ }
1983
+ }
1984
+
1985
+ /**
1986
+ * Detect compatibility settings from provider and baseUrl for known providers.
1987
+ * Provider takes precedence over URL-based detection since it's explicitly configured.
1988
+ * Returns a fully resolved OpenAICompat object with all fields set.
1989
+ */
1990
+ export function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
1991
+ return detectOpenAICompat(model);
1992
+ }
1993
+
1994
+ /**
1995
+ * Get resolved compatibility settings for a model.
1996
+ * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
1997
+ * @param model - The model configuration
1998
+ * @param resolvedBaseUrl - Optional resolved base URL (e.g., after GitHub Copilot proxy-ep resolution).
1999
+ */
2000
+ function getCompat(model: Model<"openai-completions">, resolvedBaseUrl?: string): ResolvedOpenAICompat {
2001
+ return resolveOpenAICompat(model, resolvedBaseUrl);
2002
+ }