@aryee337/aery-ai 0.1.148 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (592) hide show
  1. package/CHANGELOG.md +2914 -0
  2. package/README.md +614 -813
  3. package/dist/types/api-registry.d.ts +30 -0
  4. package/dist/types/auth-broker/client.d.ts +66 -0
  5. package/dist/types/auth-broker/index.d.ts +5 -0
  6. package/dist/types/auth-broker/refresher.d.ts +25 -0
  7. package/dist/types/auth-broker/remote-store.d.ts +96 -0
  8. package/dist/types/auth-broker/server.d.ts +32 -0
  9. package/dist/types/auth-broker/types.d.ts +105 -0
  10. package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
  11. package/dist/types/auth-gateway/http.d.ts +39 -0
  12. package/dist/types/auth-gateway/index.d.ts +3 -0
  13. package/dist/types/auth-gateway/server.d.ts +36 -0
  14. package/dist/types/auth-gateway/types.d.ts +117 -0
  15. package/dist/types/auth-storage.d.ts +739 -0
  16. package/dist/types/index.d.ts +49 -0
  17. package/dist/types/model-cache.d.ts +17 -0
  18. package/dist/types/model-manager.d.ts +64 -0
  19. package/dist/types/model-thinking.d.ts +100 -0
  20. package/dist/types/models.d.ts +12 -0
  21. package/dist/types/provider-details.d.ts +24 -0
  22. package/dist/types/provider-models/bundled-references.d.ts +4 -0
  23. package/dist/types/provider-models/descriptors.d.ts +50 -0
  24. package/dist/types/provider-models/google.d.ts +24 -0
  25. package/dist/types/provider-models/index.d.ts +5 -0
  26. package/dist/types/provider-models/ollama.d.ts +7 -0
  27. package/dist/types/provider-models/openai-compat.d.ts +296 -0
  28. package/dist/types/provider-models/special.d.ts +16 -0
  29. package/dist/types/providers/aery-native-client.d.ts +13 -0
  30. package/dist/types/providers/aery-native-server.d.ts +68 -0
  31. package/dist/types/providers/amazon-bedrock.d.ts +38 -0
  32. package/dist/types/providers/anthropic-client.d.ts +99 -0
  33. package/dist/types/providers/anthropic-messages-server-schema.d.ts +465 -0
  34. package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
  35. package/dist/types/providers/anthropic-wire.d.ts +262 -0
  36. package/dist/types/providers/anthropic.d.ts +206 -0
  37. package/dist/types/providers/aws-credentials.d.ts +43 -0
  38. package/dist/types/providers/aws-eventstream.d.ts +38 -0
  39. package/dist/types/providers/aws-sigv4.d.ts +55 -0
  40. package/dist/types/providers/azure-openai-responses.d.ts +15 -0
  41. package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
  42. package/dist/types/providers/cursor.d.ts +43 -0
  43. package/dist/types/providers/error-message.d.ts +27 -0
  44. package/dist/types/providers/github-copilot-headers.d.ts +40 -0
  45. package/dist/types/providers/gitlab-duo.d.ts +27 -0
  46. package/dist/types/providers/google-auth.d.ts +24 -0
  47. package/dist/types/providers/google-gemini-cli.d.ts +81 -0
  48. package/dist/types/providers/google-gemini-headers.d.ts +18 -0
  49. package/dist/types/providers/google-shared.d.ts +171 -0
  50. package/dist/types/providers/google-types.d.ts +138 -0
  51. package/dist/types/providers/google-vertex.d.ts +7 -0
  52. package/dist/types/providers/google.d.ts +4 -0
  53. package/dist/types/providers/grammar.d.ts +1 -0
  54. package/dist/types/providers/kimi.d.ts +27 -0
  55. package/dist/types/providers/mock.d.ts +173 -0
  56. package/dist/types/providers/ollama.d.ts +6 -0
  57. package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
  58. package/dist/types/providers/openai-chat-server-schema.d.ts +817 -0
  59. package/dist/types/providers/openai-chat-server.d.ts +16 -0
  60. package/dist/types/providers/openai-codex/constants.d.ts +26 -0
  61. package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
  62. package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
  63. package/dist/types/providers/openai-codex-responses.d.ts +67 -0
  64. package/dist/types/providers/openai-completions-compat.d.ts +25 -0
  65. package/dist/types/providers/openai-completions.d.ts +54 -0
  66. package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
  67. package/dist/types/providers/openai-responses-server.d.ts +17 -0
  68. package/dist/types/providers/openai-responses-shared.d.ts +100 -0
  69. package/dist/types/providers/openai-responses.d.ts +66 -0
  70. package/dist/types/providers/register-builtins.d.ts +31 -0
  71. package/dist/types/providers/synthetic.d.ts +26 -0
  72. package/dist/{providers → types/providers}/transform-messages.d.ts +6 -2
  73. package/dist/types/providers/vision-guard.d.ts +8 -0
  74. package/dist/types/providers/xai-responses.d.ts +23 -0
  75. package/dist/types/rate-limit-utils.d.ts +19 -0
  76. package/dist/types/stream.d.ts +28 -0
  77. package/dist/types/types.d.ts +801 -0
  78. package/dist/types/usage/claude.d.ts +4 -0
  79. package/dist/types/usage/gemini.d.ts +2 -0
  80. package/dist/types/usage/github-copilot.d.ts +7 -0
  81. package/dist/types/usage/google-antigravity.d.ts +2 -0
  82. package/dist/types/usage/kimi.d.ts +2 -0
  83. package/dist/types/usage/minimax-code.d.ts +2 -0
  84. package/dist/types/usage/openai-codex.d.ts +3 -0
  85. package/dist/types/usage/shared.d.ts +1 -0
  86. package/dist/types/usage/zai.d.ts +2 -0
  87. package/dist/types/usage.d.ts +260 -0
  88. package/dist/types/utils/abort.d.ts +19 -0
  89. package/dist/types/utils/abortable-iterator.d.ts +4 -0
  90. package/dist/types/utils/anthropic-auth.d.ts +35 -0
  91. package/dist/types/utils/discovery/antigravity.d.ts +61 -0
  92. package/dist/types/utils/discovery/codex.d.ts +38 -0
  93. package/dist/types/utils/discovery/cursor.d.ts +23 -0
  94. package/dist/types/utils/discovery/gemini.d.ts +25 -0
  95. package/dist/types/utils/discovery/index.d.ts +4 -0
  96. package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
  97. package/dist/types/utils/event-stream.d.ts +28 -0
  98. package/dist/types/utils/fireworks-model-id.d.ts +10 -0
  99. package/dist/types/utils/foundry.d.ts +1 -0
  100. package/dist/types/utils/http-inspector.d.ts +31 -0
  101. package/dist/types/utils/idle-iterator.d.ts +78 -0
  102. package/dist/types/utils/json-parse.d.ts +37 -0
  103. package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  104. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
  105. package/dist/types/utils/oauth/anthropic.d.ts +22 -0
  106. package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
  107. package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
  108. package/dist/types/utils/oauth/callback-server.d.ts +57 -0
  109. package/dist/types/utils/oauth/cerebras.d.ts +1 -0
  110. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
  111. package/dist/types/utils/oauth/cursor.d.ts +15 -0
  112. package/dist/types/utils/oauth/deepseek.d.ts +10 -0
  113. package/dist/types/utils/oauth/firepass.d.ts +1 -0
  114. package/dist/types/utils/oauth/fireworks.d.ts +1 -0
  115. package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
  116. package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
  117. package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
  118. package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
  119. package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
  120. package/dist/types/utils/oauth/huggingface.d.ts +19 -0
  121. package/dist/types/utils/oauth/index.d.ts +38 -0
  122. package/dist/types/utils/oauth/kagi.d.ts +17 -0
  123. package/dist/types/utils/oauth/kilo.d.ts +5 -0
  124. package/dist/types/utils/oauth/kimi.d.ts +21 -0
  125. package/dist/types/utils/oauth/litellm.d.ts +18 -0
  126. package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
  127. package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
  128. package/dist/types/utils/oauth/moonshot.d.ts +1 -0
  129. package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
  130. package/dist/types/utils/oauth/nvidia.d.ts +18 -0
  131. package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
  132. package/dist/types/utils/oauth/ollama.d.ts +18 -0
  133. package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
  134. package/dist/types/utils/oauth/opencode.d.ts +18 -0
  135. package/dist/types/utils/oauth/openrouter.d.ts +1 -0
  136. package/dist/types/utils/oauth/parallel.d.ts +17 -0
  137. package/dist/types/utils/oauth/perplexity.d.ts +9 -0
  138. package/dist/{utils → types/utils}/oauth/pkce.d.ts +0 -5
  139. package/dist/types/utils/oauth/qianfan.d.ts +17 -0
  140. package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
  141. package/dist/types/utils/oauth/synthetic.d.ts +1 -0
  142. package/dist/types/utils/oauth/tavily.d.ts +17 -0
  143. package/dist/types/utils/oauth/together.d.ts +1 -0
  144. package/dist/types/utils/oauth/types.d.ts +44 -0
  145. package/dist/types/utils/oauth/venice.d.ts +18 -0
  146. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
  147. package/dist/types/utils/oauth/vllm.d.ts +16 -0
  148. package/dist/types/utils/oauth/wafer.d.ts +2 -0
  149. package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
  150. package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
  151. package/dist/types/utils/oauth/zai.d.ts +18 -0
  152. package/dist/types/utils/oauth/zenmux.d.ts +1 -0
  153. package/dist/types/utils/oauth/zhipu.d.ts +18 -0
  154. package/dist/{utils → types/utils}/overflow.d.ts +9 -11
  155. package/dist/types/utils/parse-bind.d.ts +23 -0
  156. package/dist/types/utils/provider-response.d.ts +3 -0
  157. package/dist/types/utils/request-debug.d.ts +29 -0
  158. package/dist/types/utils/retry-after.d.ts +3 -0
  159. package/dist/types/utils/retry.d.ts +26 -0
  160. package/dist/types/utils/schema/adapt.d.ts +24 -0
  161. package/dist/types/utils/schema/compatibility.d.ts +30 -0
  162. package/dist/types/utils/schema/dereference.d.ts +11 -0
  163. package/dist/types/utils/schema/draft.d.ts +10 -0
  164. package/dist/types/utils/schema/equality.d.ts +4 -0
  165. package/dist/types/utils/schema/fields.d.ts +49 -0
  166. package/dist/types/utils/schema/index.d.ts +13 -0
  167. package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
  168. package/dist/types/utils/schema/meta-validator.d.ts +2 -0
  169. package/dist/types/utils/schema/normalize.d.ts +93 -0
  170. package/dist/types/utils/schema/spill.d.ts +8 -0
  171. package/dist/types/utils/schema/stamps.d.ts +25 -0
  172. package/dist/types/utils/schema/types.d.ts +4 -0
  173. package/dist/types/utils/schema/wire.d.ts +53 -0
  174. package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
  175. package/dist/types/utils/sdk-stream-timeout.d.ts +33 -0
  176. package/dist/types/utils/sse-debug.d.ts +10 -0
  177. package/dist/types/utils/stream-markup-healing.d.ts +80 -0
  178. package/dist/types/utils/tool-choice.d.ts +50 -0
  179. package/dist/types/utils/validation.d.ts +17 -0
  180. package/dist/types/utils.d.ts +28 -0
  181. package/package.json +139 -105
  182. package/src/api-registry.ts +96 -0
  183. package/src/auth-broker/client.ts +358 -0
  184. package/src/auth-broker/index.ts +5 -0
  185. package/src/auth-broker/refresher.ts +117 -0
  186. package/src/auth-broker/remote-store.ts +623 -0
  187. package/src/auth-broker/server.ts +644 -0
  188. package/src/auth-broker/types.ts +127 -0
  189. package/src/auth-broker/wire-schemas.ts +200 -0
  190. package/src/auth-gateway/http.ts +194 -0
  191. package/src/auth-gateway/index.ts +3 -0
  192. package/src/auth-gateway/server.ts +818 -0
  193. package/src/auth-gateway/types.ts +143 -0
  194. package/src/auth-storage.ts +4422 -0
  195. package/src/index.ts +54 -0
  196. package/src/model-cache.ts +129 -0
  197. package/src/model-manager.ts +469 -0
  198. package/src/model-thinking.ts +782 -0
  199. package/src/models.json +83530 -0
  200. package/src/models.json.d.ts +9 -0
  201. package/src/models.ts +56 -0
  202. package/src/prompts/turn-aborted-guidance.md +4 -0
  203. package/src/provider-details.ts +90 -0
  204. package/src/provider-models/bundled-references.ts +38 -0
  205. package/src/provider-models/descriptors.ts +355 -0
  206. package/src/provider-models/google.ts +88 -0
  207. package/src/provider-models/index.ts +5 -0
  208. package/src/provider-models/ollama.ts +153 -0
  209. package/src/provider-models/openai-compat.ts +2817 -0
  210. package/src/provider-models/special.ts +67 -0
  211. package/src/providers/aery-native-client.ts +228 -0
  212. package/src/providers/aery-native-server.ts +212 -0
  213. package/src/providers/amazon-bedrock.ts +873 -0
  214. package/src/providers/anthropic-client.ts +318 -0
  215. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  216. package/src/providers/anthropic-messages-server.ts +683 -0
  217. package/src/providers/anthropic-wire.ts +268 -0
  218. package/src/providers/anthropic.ts +3094 -0
  219. package/src/providers/aws-credentials.ts +501 -0
  220. package/src/providers/aws-eventstream.ts +185 -0
  221. package/src/providers/aws-sigv4.ts +218 -0
  222. package/src/providers/azure-openai-responses.ts +361 -0
  223. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  224. package/src/providers/cursor/proto/agent.proto +3526 -0
  225. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  226. package/src/providers/cursor/proto/buf.yaml +17 -0
  227. package/src/providers/cursor.ts +2621 -0
  228. package/src/providers/error-message.ts +21 -0
  229. package/src/providers/github-copilot-headers.ts +140 -0
  230. package/src/providers/gitlab-duo.ts +372 -0
  231. package/src/providers/google-auth.ts +252 -0
  232. package/src/providers/google-gemini-cli.ts +809 -0
  233. package/src/providers/google-gemini-headers.ts +41 -0
  234. package/src/providers/google-shared.ts +917 -0
  235. package/src/providers/google-types.ts +167 -0
  236. package/src/providers/google-vertex.ts +91 -0
  237. package/src/providers/google.ts +41 -0
  238. package/src/providers/grammar.ts +70 -0
  239. package/src/providers/kimi.ts +52 -0
  240. package/src/providers/mock.ts +496 -0
  241. package/src/providers/ollama.ts +644 -0
  242. package/src/providers/openai-anthropic-shim.ts +138 -0
  243. package/src/providers/openai-chat-server-schema.ts +252 -0
  244. package/src/providers/openai-chat-server.ts +647 -0
  245. package/src/providers/openai-codex/constants.ts +43 -0
  246. package/src/providers/openai-codex/request-transformer.ts +161 -0
  247. package/src/providers/openai-codex/response-handler.ts +81 -0
  248. package/src/providers/openai-codex-responses.ts +3018 -0
  249. package/src/providers/openai-completions-compat.ts +300 -0
  250. package/src/providers/openai-completions.ts +1979 -0
  251. package/src/providers/openai-responses-server-schema.ts +290 -0
  252. package/src/providers/openai-responses-server.ts +1183 -0
  253. package/src/providers/openai-responses-shared.ts +873 -0
  254. package/src/providers/openai-responses.ts +679 -0
  255. package/src/providers/register-builtins.ts +436 -0
  256. package/src/providers/synthetic.ts +50 -0
  257. package/src/providers/transform-messages.ts +382 -0
  258. package/src/providers/vision-guard.ts +31 -0
  259. package/src/providers/xai-responses.ts +82 -0
  260. package/src/rate-limit-utils.ts +84 -0
  261. package/src/stream.ts +1065 -0
  262. package/src/types.ts +944 -0
  263. package/src/usage/claude.ts +482 -0
  264. package/src/usage/gemini.ts +250 -0
  265. package/src/usage/github-copilot.ts +421 -0
  266. package/src/usage/google-antigravity.ts +201 -0
  267. package/src/usage/kimi.ts +271 -0
  268. package/src/usage/minimax-code.ts +31 -0
  269. package/src/usage/openai-codex.ts +503 -0
  270. package/src/usage/shared.ts +10 -0
  271. package/src/usage/zai.ts +247 -0
  272. package/src/usage.ts +185 -0
  273. package/src/utils/abort.ts +51 -0
  274. package/src/utils/abortable-iterator.ts +69 -0
  275. package/src/utils/anthropic-auth.ts +93 -0
  276. package/src/utils/discovery/antigravity.ts +261 -0
  277. package/src/utils/discovery/codex.ts +371 -0
  278. package/src/utils/discovery/cursor.ts +306 -0
  279. package/src/utils/discovery/gemini.ts +248 -0
  280. package/src/utils/discovery/index.ts +4 -0
  281. package/src/utils/discovery/openai-compatible.ts +224 -0
  282. package/src/utils/event-stream.ts +142 -0
  283. package/src/utils/fireworks-model-id.ts +30 -0
  284. package/src/utils/foundry.ts +8 -0
  285. package/src/utils/http-inspector.ts +176 -0
  286. package/src/utils/idle-iterator.ts +267 -0
  287. package/src/utils/json-parse.ts +182 -0
  288. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  289. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  290. package/src/utils/oauth/anthropic.ts +273 -0
  291. package/src/utils/oauth/api-key-login.ts +87 -0
  292. package/src/utils/oauth/api-key-validation.ts +92 -0
  293. package/src/utils/oauth/callback-server.ts +276 -0
  294. package/src/utils/oauth/cerebras.ts +16 -0
  295. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  296. package/src/utils/oauth/cursor.ts +157 -0
  297. package/src/utils/oauth/deepseek.ts +53 -0
  298. package/src/utils/oauth/firepass.ts +24 -0
  299. package/src/utils/oauth/fireworks.ts +15 -0
  300. package/src/utils/oauth/github-copilot.ts +362 -0
  301. package/src/utils/oauth/gitlab-duo.ts +123 -0
  302. package/src/utils/oauth/google-antigravity.ts +200 -0
  303. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  304. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  305. package/src/utils/oauth/huggingface.ts +62 -0
  306. package/src/utils/oauth/index.ts +484 -0
  307. package/src/utils/oauth/kagi.ts +47 -0
  308. package/src/utils/oauth/kilo.ts +87 -0
  309. package/src/utils/oauth/kimi.ts +254 -0
  310. package/src/utils/oauth/litellm.ts +47 -0
  311. package/src/utils/oauth/lm-studio.ts +38 -0
  312. package/src/utils/oauth/minimax-code.ts +78 -0
  313. package/src/utils/oauth/moonshot.ts +23 -0
  314. package/src/utils/oauth/nanogpt.ts +15 -0
  315. package/src/utils/oauth/nvidia.ts +70 -0
  316. package/src/utils/oauth/oauth.html +203 -0
  317. package/src/utils/oauth/ollama-cloud.ts +28 -0
  318. package/src/utils/oauth/ollama.ts +47 -0
  319. package/src/utils/oauth/openai-codex.ts +299 -0
  320. package/src/utils/oauth/opencode.ts +49 -0
  321. package/src/utils/oauth/openrouter.ts +20 -0
  322. package/src/utils/oauth/parallel.ts +46 -0
  323. package/src/utils/oauth/perplexity.ts +206 -0
  324. package/src/utils/oauth/pkce.ts +18 -0
  325. package/src/utils/oauth/qianfan.ts +58 -0
  326. package/src/utils/oauth/qwen-portal.ts +60 -0
  327. package/src/utils/oauth/synthetic.ts +15 -0
  328. package/src/utils/oauth/tavily.ts +46 -0
  329. package/src/utils/oauth/together.ts +16 -0
  330. package/src/utils/oauth/types.ts +99 -0
  331. package/src/utils/oauth/venice.ts +59 -0
  332. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  333. package/src/utils/oauth/vllm.ts +40 -0
  334. package/src/utils/oauth/wafer.ts +50 -0
  335. package/src/utils/oauth/xai-oauth.ts +342 -0
  336. package/src/utils/oauth/xiaomi.ts +139 -0
  337. package/src/utils/oauth/zai.ts +60 -0
  338. package/src/utils/oauth/zenmux.ts +15 -0
  339. package/src/utils/oauth/zhipu.ts +60 -0
  340. package/src/utils/overflow.ts +137 -0
  341. package/src/utils/parse-bind.ts +54 -0
  342. package/src/utils/provider-response.ts +30 -0
  343. package/src/utils/request-debug.ts +336 -0
  344. package/src/utils/retry-after.ts +110 -0
  345. package/src/utils/retry.ts +54 -0
  346. package/src/utils/schema/CONSTRAINTS.md +164 -0
  347. package/src/utils/schema/adapt.ts +36 -0
  348. package/src/utils/schema/compatibility.ts +435 -0
  349. package/src/utils/schema/dereference.ts +98 -0
  350. package/src/utils/schema/draft.ts +341 -0
  351. package/src/utils/schema/equality.ts +97 -0
  352. package/src/utils/schema/fields.ts +191 -0
  353. package/src/utils/schema/index.ts +13 -0
  354. package/src/utils/schema/json-schema-validator.ts +577 -0
  355. package/src/utils/schema/meta-validator.ts +167 -0
  356. package/src/utils/schema/normalize.ts +1588 -0
  357. package/src/utils/schema/spill.ts +43 -0
  358. package/src/utils/schema/stamps.ts +97 -0
  359. package/src/utils/schema/types.ts +10 -0
  360. package/src/utils/schema/wire.ts +293 -0
  361. package/src/utils/schema/zod-decontaminate.ts +331 -0
  362. package/src/utils/sdk-stream-timeout.ts +43 -0
  363. package/src/utils/sse-debug.ts +289 -0
  364. package/src/utils/stream-markup-healing.ts +612 -0
  365. package/src/utils/tool-choice.ts +99 -0
  366. package/src/utils/validation.ts +1024 -0
  367. package/src/utils.ts +166 -0
  368. package/dist/api-registry.d.ts +0 -20
  369. package/dist/api-registry.d.ts.map +0 -1
  370. package/dist/api-registry.js +0 -44
  371. package/dist/api-registry.js.map +0 -1
  372. package/dist/bedrock-provider.d.ts +0 -5
  373. package/dist/bedrock-provider.d.ts.map +0 -1
  374. package/dist/bedrock-provider.js +0 -6
  375. package/dist/bedrock-provider.js.map +0 -1
  376. package/dist/cli.d.ts +0 -3
  377. package/dist/cli.d.ts.map +0 -1
  378. package/dist/cli.js +0 -130
  379. package/dist/cli.js.map +0 -1
  380. package/dist/env-api-keys.d.ts +0 -18
  381. package/dist/env-api-keys.d.ts.map +0 -1
  382. package/dist/env-api-keys.js +0 -178
  383. package/dist/env-api-keys.js.map +0 -1
  384. package/dist/image-models.d.ts +0 -10
  385. package/dist/image-models.d.ts.map +0 -1
  386. package/dist/image-models.generated.d.ts +0 -440
  387. package/dist/image-models.generated.d.ts.map +0 -1
  388. package/dist/image-models.generated.js +0 -442
  389. package/dist/image-models.generated.js.map +0 -1
  390. package/dist/image-models.js +0 -23
  391. package/dist/image-models.js.map +0 -1
  392. package/dist/images-api-registry.d.ts +0 -14
  393. package/dist/images-api-registry.d.ts.map +0 -1
  394. package/dist/images-api-registry.js +0 -22
  395. package/dist/images-api-registry.js.map +0 -1
  396. package/dist/images.d.ts +0 -4
  397. package/dist/images.d.ts.map +0 -1
  398. package/dist/images.js +0 -14
  399. package/dist/images.js.map +0 -1
  400. package/dist/index.d.ts +0 -32
  401. package/dist/index.d.ts.map +0 -1
  402. package/dist/index.js +0 -20
  403. package/dist/index.js.map +0 -1
  404. package/dist/models.d.ts +0 -18
  405. package/dist/models.d.ts.map +0 -1
  406. package/dist/models.generated.d.ts +0 -17480
  407. package/dist/models.generated.d.ts.map +0 -1
  408. package/dist/models.generated.js +0 -16339
  409. package/dist/models.generated.js.map +0 -1
  410. package/dist/models.js +0 -71
  411. package/dist/models.js.map +0 -1
  412. package/dist/oauth.d.ts +0 -2
  413. package/dist/oauth.d.ts.map +0 -1
  414. package/dist/oauth.js +0 -2
  415. package/dist/oauth.js.map +0 -1
  416. package/dist/providers/aery-error-formatting.d.ts +0 -13
  417. package/dist/providers/aery-error-formatting.d.ts.map +0 -1
  418. package/dist/providers/aery-error-formatting.js +0 -112
  419. package/dist/providers/aery-error-formatting.js.map +0 -1
  420. package/dist/providers/amazon-bedrock.d.ts +0 -38
  421. package/dist/providers/amazon-bedrock.d.ts.map +0 -1
  422. package/dist/providers/amazon-bedrock.js +0 -763
  423. package/dist/providers/amazon-bedrock.js.map +0 -1
  424. package/dist/providers/anthropic.d.ts +0 -71
  425. package/dist/providers/anthropic.d.ts.map +0 -1
  426. package/dist/providers/anthropic.js +0 -949
  427. package/dist/providers/anthropic.js.map +0 -1
  428. package/dist/providers/azure-openai-responses.d.ts +0 -15
  429. package/dist/providers/azure-openai-responses.d.ts.map +0 -1
  430. package/dist/providers/azure-openai-responses.js +0 -225
  431. package/dist/providers/azure-openai-responses.js.map +0 -1
  432. package/dist/providers/cloudflare.d.ts +0 -13
  433. package/dist/providers/cloudflare.d.ts.map +0 -1
  434. package/dist/providers/cloudflare.js +0 -26
  435. package/dist/providers/cloudflare.js.map +0 -1
  436. package/dist/providers/faux.d.ts +0 -56
  437. package/dist/providers/faux.d.ts.map +0 -1
  438. package/dist/providers/faux.js +0 -368
  439. package/dist/providers/faux.js.map +0 -1
  440. package/dist/providers/github-copilot-headers.d.ts +0 -8
  441. package/dist/providers/github-copilot-headers.d.ts.map +0 -1
  442. package/dist/providers/github-copilot-headers.js +0 -29
  443. package/dist/providers/github-copilot-headers.js.map +0 -1
  444. package/dist/providers/google-gemini-cli.d.ts +0 -74
  445. package/dist/providers/google-gemini-cli.d.ts.map +0 -1
  446. package/dist/providers/google-gemini-cli.js +0 -779
  447. package/dist/providers/google-gemini-cli.js.map +0 -1
  448. package/dist/providers/google-shared.d.ts +0 -70
  449. package/dist/providers/google-shared.d.ts.map +0 -1
  450. package/dist/providers/google-shared.js +0 -329
  451. package/dist/providers/google-shared.js.map +0 -1
  452. package/dist/providers/google-vertex.d.ts +0 -15
  453. package/dist/providers/google-vertex.d.ts.map +0 -1
  454. package/dist/providers/google-vertex.js +0 -442
  455. package/dist/providers/google-vertex.js.map +0 -1
  456. package/dist/providers/google.d.ts +0 -13
  457. package/dist/providers/google.d.ts.map +0 -1
  458. package/dist/providers/google.js +0 -400
  459. package/dist/providers/google.js.map +0 -1
  460. package/dist/providers/images/openrouter.d.ts +0 -3
  461. package/dist/providers/images/openrouter.d.ts.map +0 -1
  462. package/dist/providers/images/openrouter.js +0 -129
  463. package/dist/providers/images/openrouter.js.map +0 -1
  464. package/dist/providers/images/register-builtins.d.ts +0 -4
  465. package/dist/providers/images/register-builtins.d.ts.map +0 -1
  466. package/dist/providers/images/register-builtins.js +0 -34
  467. package/dist/providers/images/register-builtins.js.map +0 -1
  468. package/dist/providers/mistral.d.ts +0 -25
  469. package/dist/providers/mistral.d.ts.map +0 -1
  470. package/dist/providers/mistral.js +0 -535
  471. package/dist/providers/mistral.js.map +0 -1
  472. package/dist/providers/openai-codex-responses.d.ts +0 -30
  473. package/dist/providers/openai-codex-responses.d.ts.map +0 -1
  474. package/dist/providers/openai-codex-responses.js +0 -1090
  475. package/dist/providers/openai-codex-responses.js.map +0 -1
  476. package/dist/providers/openai-completions.d.ts +0 -19
  477. package/dist/providers/openai-completions.d.ts.map +0 -1
  478. package/dist/providers/openai-completions.js +0 -950
  479. package/dist/providers/openai-completions.js.map +0 -1
  480. package/dist/providers/openai-prompt-cache.d.ts +0 -3
  481. package/dist/providers/openai-prompt-cache.d.ts.map +0 -1
  482. package/dist/providers/openai-prompt-cache.js +0 -10
  483. package/dist/providers/openai-prompt-cache.js.map +0 -1
  484. package/dist/providers/openai-responses-shared.d.ts +0 -18
  485. package/dist/providers/openai-responses-shared.d.ts.map +0 -1
  486. package/dist/providers/openai-responses-shared.js +0 -492
  487. package/dist/providers/openai-responses-shared.js.map +0 -1
  488. package/dist/providers/openai-responses.d.ts +0 -13
  489. package/dist/providers/openai-responses.d.ts.map +0 -1
  490. package/dist/providers/openai-responses.js +0 -237
  491. package/dist/providers/openai-responses.js.map +0 -1
  492. package/dist/providers/register-builtins.d.ts +0 -38
  493. package/dist/providers/register-builtins.d.ts.map +0 -1
  494. package/dist/providers/register-builtins.js +0 -278
  495. package/dist/providers/register-builtins.js.map +0 -1
  496. package/dist/providers/simple-options.d.ts +0 -8
  497. package/dist/providers/simple-options.d.ts.map +0 -1
  498. package/dist/providers/simple-options.js +0 -41
  499. package/dist/providers/simple-options.js.map +0 -1
  500. package/dist/providers/transform-messages.d.ts.map +0 -1
  501. package/dist/providers/transform-messages.js +0 -184
  502. package/dist/providers/transform-messages.js.map +0 -1
  503. package/dist/session-resources.d.ts +0 -4
  504. package/dist/session-resources.d.ts.map +0 -1
  505. package/dist/session-resources.js +0 -22
  506. package/dist/session-resources.js.map +0 -1
  507. package/dist/stream.d.ts +0 -8
  508. package/dist/stream.d.ts.map +0 -1
  509. package/dist/stream.js +0 -27
  510. package/dist/stream.js.map +0 -1
  511. package/dist/types.d.ts +0 -498
  512. package/dist/types.d.ts.map +0 -1
  513. package/dist/types.js +0 -2
  514. package/dist/types.js.map +0 -1
  515. package/dist/utils/diagnostics.d.ts +0 -19
  516. package/dist/utils/diagnostics.d.ts.map +0 -1
  517. package/dist/utils/diagnostics.js +0 -25
  518. package/dist/utils/diagnostics.js.map +0 -1
  519. package/dist/utils/event-stream.d.ts +0 -21
  520. package/dist/utils/event-stream.d.ts.map +0 -1
  521. package/dist/utils/event-stream.js +0 -81
  522. package/dist/utils/event-stream.js.map +0 -1
  523. package/dist/utils/hash.d.ts +0 -3
  524. package/dist/utils/hash.d.ts.map +0 -1
  525. package/dist/utils/hash.js +0 -14
  526. package/dist/utils/hash.js.map +0 -1
  527. package/dist/utils/headers.d.ts +0 -2
  528. package/dist/utils/headers.d.ts.map +0 -1
  529. package/dist/utils/headers.js +0 -8
  530. package/dist/utils/headers.js.map +0 -1
  531. package/dist/utils/json-parse.d.ts +0 -16
  532. package/dist/utils/json-parse.d.ts.map +0 -1
  533. package/dist/utils/json-parse.js +0 -113
  534. package/dist/utils/json-parse.js.map +0 -1
  535. package/dist/utils/node-http-proxy.d.ts +0 -10
  536. package/dist/utils/node-http-proxy.d.ts.map +0 -1
  537. package/dist/utils/node-http-proxy.js +0 -97
  538. package/dist/utils/node-http-proxy.js.map +0 -1
  539. package/dist/utils/oauth/anthropic.d.ts +0 -25
  540. package/dist/utils/oauth/anthropic.d.ts.map +0 -1
  541. package/dist/utils/oauth/anthropic.js +0 -335
  542. package/dist/utils/oauth/anthropic.js.map +0 -1
  543. package/dist/utils/oauth/device-code.d.ts +0 -19
  544. package/dist/utils/oauth/device-code.d.ts.map +0 -1
  545. package/dist/utils/oauth/device-code.js +0 -55
  546. package/dist/utils/oauth/device-code.js.map +0 -1
  547. package/dist/utils/oauth/github-copilot.d.ts +0 -30
  548. package/dist/utils/oauth/github-copilot.d.ts.map +0 -1
  549. package/dist/utils/oauth/github-copilot.js +0 -268
  550. package/dist/utils/oauth/github-copilot.js.map +0 -1
  551. package/dist/utils/oauth/google-antigravity.d.ts +0 -26
  552. package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
  553. package/dist/utils/oauth/google-antigravity.js +0 -377
  554. package/dist/utils/oauth/google-antigravity.js.map +0 -1
  555. package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
  556. package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
  557. package/dist/utils/oauth/google-gemini-cli.js +0 -482
  558. package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
  559. package/dist/utils/oauth/index.d.ts +0 -63
  560. package/dist/utils/oauth/index.d.ts.map +0 -1
  561. package/dist/utils/oauth/index.js +0 -131
  562. package/dist/utils/oauth/index.js.map +0 -1
  563. package/dist/utils/oauth/oauth-page.d.ts +0 -3
  564. package/dist/utils/oauth/oauth-page.d.ts.map +0 -1
  565. package/dist/utils/oauth/oauth-page.js +0 -105
  566. package/dist/utils/oauth/oauth-page.js.map +0 -1
  567. package/dist/utils/oauth/openai-codex.d.ts +0 -34
  568. package/dist/utils/oauth/openai-codex.d.ts.map +0 -1
  569. package/dist/utils/oauth/openai-codex.js +0 -385
  570. package/dist/utils/oauth/openai-codex.js.map +0 -1
  571. package/dist/utils/oauth/pkce.d.ts.map +0 -1
  572. package/dist/utils/oauth/pkce.js +0 -31
  573. package/dist/utils/oauth/pkce.js.map +0 -1
  574. package/dist/utils/oauth/types.d.ts +0 -64
  575. package/dist/utils/oauth/types.d.ts.map +0 -1
  576. package/dist/utils/oauth/types.js +0 -2
  577. package/dist/utils/oauth/types.js.map +0 -1
  578. package/dist/utils/overflow.d.ts.map +0 -1
  579. package/dist/utils/overflow.js +0 -151
  580. package/dist/utils/overflow.js.map +0 -1
  581. package/dist/utils/sanitize-unicode.d.ts +0 -22
  582. package/dist/utils/sanitize-unicode.d.ts.map +0 -1
  583. package/dist/utils/sanitize-unicode.js +0 -26
  584. package/dist/utils/sanitize-unicode.js.map +0 -1
  585. package/dist/utils/typebox-helpers.d.ts +0 -17
  586. package/dist/utils/typebox-helpers.d.ts.map +0 -1
  587. package/dist/utils/typebox-helpers.js +0 -21
  588. package/dist/utils/typebox-helpers.js.map +0 -1
  589. package/dist/utils/validation.d.ts +0 -18
  590. package/dist/utils/validation.d.ts.map +0 -1
  591. package/dist/utils/validation.js +0 -281
  592. package/dist/utils/validation.js.map +0 -1
@@ -0,0 +1,1979 @@
1
+ import { $env, extractHttpStatusFromError } from "@aryee337/aery-utils";
2
+ import OpenAI, { APIConnectionTimeoutError as OpenAIConnectionTimeoutError } from "openai";
3
+ import type {
4
+ ChatCompletionAssistantMessageParam,
5
+ ChatCompletionChunk,
6
+ ChatCompletionContentPart,
7
+ ChatCompletionContentPartImage,
8
+ ChatCompletionContentPartText,
9
+ ChatCompletionMessageParam,
10
+ ChatCompletionToolMessageParam,
11
+ } from "openai/resources/chat/completions";
12
+ import packageJson from "../../package.json" with { type: "json" };
13
+ import { type Effort, getSupportedEfforts } from "../model-thinking";
14
+ import { calculateCost } from "../models";
15
+ import { getEnvApiKey } from "../stream";
16
+ import {
17
+ type AssistantMessage,
18
+ type Context,
19
+ type FetchImpl,
20
+ type Message,
21
+ type MessageAttribution,
22
+ type Model,
23
+ type OpenAICompat,
24
+ type ProviderSessionState,
25
+ resolveServiceTier,
26
+ type ServiceTier,
27
+ type StopReason,
28
+ type StreamFunction,
29
+ type StreamOptions,
30
+ shouldSendServiceTier,
31
+ type TextContent,
32
+ type ThinkingContent,
33
+ type Tool,
34
+ type ToolCall,
35
+ type ToolChoice,
36
+ type ToolResultMessage,
37
+ } from "../types";
38
+ import { normalizeSystemPrompts } from "../utils";
39
+ import { createAbortSourceTracker } from "../utils/abort";
40
+ import { AssistantMessageEventStream } from "../utils/event-stream";
41
+ import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
42
+ import {
43
+ type CapturedHttpErrorResponse,
44
+ finalizeErrorMessage,
45
+ type RawHttpRequestDump,
46
+ rewriteCopilotError,
47
+ } from "../utils/http-inspector";
48
+ import {
49
+ getOpenAIStreamFirstEventTimeoutMs,
50
+ getOpenAIStreamIdleTimeoutMs,
51
+ iterateWithIdleTimeout,
52
+ } from "../utils/idle-iterator";
53
+ import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
54
+ import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
55
+ import { getKimiCommonHeaders } from "../utils/oauth/kimi";
56
+ import { notifyProviderResponse } from "../utils/provider-response";
57
+ import { callWithCopilotModelRetry } from "../utils/retry";
58
+ import { adaptSchemaForStrict, NO_STRICT, toolWireSchema } from "../utils/schema";
59
+ import { wrapFetchForSseDebug } from "../utils/sse-debug";
60
+ import {
61
+ getStreamMarkupHealingPattern,
62
+ type HealedToolCall,
63
+ StreamMarkupHealing,
64
+ type StreamMarkupHealingEvent,
65
+ } from "../utils/stream-markup-healing";
66
+ import { isForcedToolChoice, mapToOpenAICompletionsToolChoice } from "../utils/tool-choice";
67
+ import {
68
+ buildCopilotDynamicHeaders,
69
+ hasCopilotVisionInput,
70
+ resolveGitHubCopilotBaseUrl,
71
+ } from "./github-copilot-headers";
72
+ import { detectOpenAICompat, type ResolvedOpenAICompat, resolveOpenAICompat } from "./openai-completions-compat";
73
+ import { createInitialResponsesAssistantMessage } from "./openai-responses-shared";
74
+ import { transformMessages } from "./transform-messages";
75
+ import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER } from "./vision-guard";
76
+
77
+ /**
78
+ * Normalize tool call ID for Mistral.
79
+ * Mistral requires tool IDs to be exactly 9 alphanumeric characters (a-z, A-Z, 0-9).
80
+ */
81
+ function normalizeMistralToolId(id: string, isMistral: boolean): string {
82
+ if (!isMistral) return id;
83
+ // Remove non-alphanumeric characters
84
+ let normalized = id.replace(/[^a-zA-Z0-9]/g, "");
85
+ // Mistral requires exactly 9 characters
86
+ if (normalized.length < 9) {
87
+ // Pad with deterministic characters based on original ID to ensure matching
88
+ const padding = "ABCDEFGHI";
89
+ normalized = normalized + padding.slice(0, 9 - normalized.length);
90
+ } else if (normalized.length > 9) {
91
+ normalized = normalized.slice(0, 9);
92
+ }
93
+ return normalized;
94
+ }
95
+ // Direct DeepSeek model ids on NanoGPT are routed via the default tools-capable
96
+ // path. We deliberately do NOT append `:tools` here: with `:tools`, NanoGPT
97
+ // performs server-side tool-call parsing on the upstream DeepSeek stream and
98
+ // 502s with `code: "malformed_tool_call"` on more complex tool schemas (issue
99
+ // #1488). The default route forwards `delta.content` (including any DSML
100
+ // envelope leaks) which `StreamMarkupHealing` heals into a structured call
101
+ // client-side.
102
+ function resolveOpenAICompletionsModelId(
103
+ model: Model<"openai-completions">,
104
+ options: OpenAICompletionsOptions | undefined,
105
+ ): string {
106
+ if (model.provider === "firepass") return toFirepassWireModelId(model.id);
107
+ if (model.provider === "fireworks") return toFireworksWireModelId(model.id);
108
+ if (model.provider === "openrouter") return applyOpenRouterRoutingVariant(model.id, options?.openrouterVariant);
109
+ return model.id;
110
+ }
111
+
112
+ /**
113
+ * Normalize OpenAI-compatible streaming `delta.content` into plain text.
114
+ * Most providers stream `delta.content` as a string, but some (notably Mistral
115
+ * Medium 3.5 / `mistral-medium-2604`) return an array of typed content parts
116
+ * — e.g. `[{ type: "text", text: "Hello" }]`. Without normalization those
117
+ * parts get string-coerced via `text += array`, producing the literal
118
+ * `[object Object]` sequences observed in issue #911.
119
+ *
120
+ * Returns the joined text. Non-text parts and unknown shapes are skipped so
121
+ * we never emit JS object sigils as visible output.
122
+ */
123
+ function normalizeStreamingContentText(content: unknown): string {
124
+ if (typeof content === "string") return content;
125
+ if (Array.isArray(content)) {
126
+ let out = "";
127
+ for (const part of content) {
128
+ if (typeof part === "string") {
129
+ out += part;
130
+ } else if (part && typeof part === "object") {
131
+ const obj = part as { type?: unknown; text?: unknown };
132
+ if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
133
+ out += obj.text;
134
+ }
135
+ }
136
+ }
137
+ return out;
138
+ }
139
+ if (content && typeof content === "object") {
140
+ const obj = content as { type?: unknown; text?: unknown };
141
+ if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
142
+ return obj.text;
143
+ }
144
+ }
145
+ return "";
146
+ }
147
+
148
+ function serializeToolArguments(value: unknown): string {
149
+ if (value && typeof value === "object" && !Array.isArray(value)) {
150
+ try {
151
+ return JSON.stringify(value);
152
+ } catch {
153
+ return "{}";
154
+ }
155
+ }
156
+
157
+ if (typeof value === "string") {
158
+ const trimmed = value.trim();
159
+ if (trimmed.length === 0) return "{}";
160
+ try {
161
+ const parsed = JSON.parse(trimmed);
162
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
163
+ return JSON.stringify(parsed);
164
+ }
165
+ } catch {}
166
+ return "{}";
167
+ }
168
+
169
+ return "{}";
170
+ }
171
+
172
+ /**
173
+ * Check if conversation messages contain tool calls or tool results.
174
+ * This is needed because Anthropic (via proxy) requires the tools param
175
+ * to be present when messages include tool_calls or tool role messages.
176
+ */
177
+ function hasToolHistory(messages: Message[]): boolean {
178
+ for (const msg of messages) {
179
+ if (msg.role === "toolResult") {
180
+ return true;
181
+ }
182
+ if (msg.role === "assistant") {
183
+ if (msg.content.some(block => block.type === "toolCall")) {
184
+ return true;
185
+ }
186
+ }
187
+ }
188
+ return false;
189
+ }
190
+ /**
191
+ * Identify "real progress" stream chunks vs. keepalives, role-only preambles,
192
+ * and empty `{choices:[]}` no-ops emitted by some OpenAI-compatible endpoints.
193
+ * Without this filter, every keepalive resets `iterateWithIdleTimeout`'s
194
+ * deadline, so a provider that streams nothing but pings keeps the watchdog
195
+ * asleep indefinitely — observed against z.ai/GLM via OpenRouter where a
196
+ * subagent stalled for hours with no error surfaced.
197
+ *
198
+ * A chunk counts as progress when it carries terminal usage, a finish reason,
199
+ * or any model-produced delta (content / tool calls / reasoning / refusal).
200
+ * Role-only `delta: { role: "assistant" }` preambles do NOT count; we want the
201
+ * (longer) first-event timeout to keep governing until real output appears.
202
+ */
203
+ export function isOpenAICompletionsProgressChunk(chunk: unknown): boolean {
204
+ if (!chunk || typeof chunk !== "object") return false;
205
+ const record = chunk as {
206
+ usage?: unknown;
207
+ choices?: ReadonlyArray<{
208
+ finish_reason?: unknown;
209
+ usage?: unknown;
210
+ delta?: {
211
+ content?: unknown;
212
+ tool_calls?: unknown;
213
+ reasoning?: unknown;
214
+ reasoning_content?: unknown;
215
+ reasoning_text?: unknown;
216
+ refusal?: unknown;
217
+ };
218
+ }>;
219
+ };
220
+ if (record.usage) return true;
221
+ const choice = Array.isArray(record.choices) ? record.choices[0] : undefined;
222
+ if (!choice) return false;
223
+ if (choice.finish_reason) return true;
224
+ if (choice.usage) return true;
225
+ const delta = choice.delta;
226
+ if (!delta) return false;
227
+ const content = delta.content;
228
+ if (typeof content === "string" ? content.length > 0 : Array.isArray(content) && content.length > 0) return true;
229
+ if (Array.isArray(delta.tool_calls) && delta.tool_calls.length > 0) return true;
230
+ if (typeof delta.reasoning === "string" && delta.reasoning.length > 0) return true;
231
+ if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) return true;
232
+ if (typeof delta.reasoning_text === "string" && delta.reasoning_text.length > 0) return true;
233
+ if (typeof delta.refusal === "string" && delta.refusal.length > 0) return true;
234
+ return false;
235
+ }
236
+
237
+ export interface OpenAICompletionsOptions extends StreamOptions {
238
+ toolChoice?: ToolChoice;
239
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
240
+ /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
241
+ disableReasoning?: boolean;
242
+ serviceTier?: ServiceTier;
243
+ /**
244
+ * Routing-variant suffix appended to OpenRouter model IDs when none is
245
+ * already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
246
+ * values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
247
+ * resolved `model.id` already contains a colon-suffix after the last
248
+ * provider segment (explicit `:nitro` in the selector or a catalog entry
249
+ * with the variant baked in).
250
+ */
251
+ openrouterVariant?: string;
252
+ }
253
+
254
+ type OpenAICompletionsParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
255
+ top_k?: number;
256
+ min_p?: number;
257
+ repetition_penalty?: number;
258
+ thinking?: { type: "enabled" | "disabled" };
259
+ enable_thinking?: boolean;
260
+ chat_template_kwargs?: { enable_thinking: boolean };
261
+ reasoning?: { effort?: string } | { enabled: false };
262
+ provider?: OpenAICompat["openRouterRouting"];
263
+ providerOptions?: { gateway?: { only?: string[]; order?: string[] } };
264
+ };
265
+
266
+ type AppliedToolStrictMode = "mixed" | "all_strict" | "none";
267
+ type ToolStrictModeOverride = Exclude<ResolvedOpenAICompat["toolStrictMode"], "mixed"> | undefined;
268
+
269
+ type BuiltOpenAICompletionTools = {
270
+ tools: OpenAI.Chat.Completions.ChatCompletionTool[];
271
+ toolStrictMode: AppliedToolStrictMode;
272
+ };
273
+
274
+ const OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX = "openai-completions:";
275
+
276
+ type OpenAICompletionsProviderSessionState = ProviderSessionState & {
277
+ strictToolsDisabled: boolean;
278
+ };
279
+
280
+ function createOpenAICompletionsProviderSessionState(): OpenAICompletionsProviderSessionState {
281
+ const state: OpenAICompletionsProviderSessionState = {
282
+ strictToolsDisabled: false,
283
+ close: () => {
284
+ state.strictToolsDisabled = false;
285
+ },
286
+ };
287
+ return state;
288
+ }
289
+
290
+ function getOpenAICompletionsProviderSessionState(
291
+ model: Model<"openai-completions">,
292
+ baseUrl: string | undefined,
293
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
294
+ ): OpenAICompletionsProviderSessionState | undefined {
295
+ if (!providerSessionState) return undefined;
296
+ const key = `${OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX}${model.provider}:${baseUrl ?? ""}:${model.id}`;
297
+ const existing = providerSessionState.get(key) as OpenAICompletionsProviderSessionState | undefined;
298
+ if (existing) return existing;
299
+ const created = createOpenAICompletionsProviderSessionState();
300
+ providerSessionState.set(key, created);
301
+ return created;
302
+ }
303
+
304
+ function isOpenRouterAnthropicModel(model: Model<"openai-completions">): boolean {
305
+ return model.provider === "openrouter" && model.id.toLowerCase().startsWith("anthropic/");
306
+ }
307
+
308
+ /**
309
+ * Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
310
+ * to a model id when no explicit variant is already present. A variant is considered
311
+ * "already present" when `modelId` contains a colon after the last `/` separator —
312
+ * which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
313
+ * entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
314
+ *
315
+ * Exported for unit testing.
316
+ */
317
+ export function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string {
318
+ if (!variant) return modelId;
319
+ const lastSlash = modelId.lastIndexOf("/");
320
+ const lastColon = modelId.lastIndexOf(":");
321
+ // Existing `:suffix` after the last path segment — leave the id untouched.
322
+ if (lastColon > lastSlash) return modelId;
323
+ return `${modelId}:${variant}`;
324
+ }
325
+
326
+ function isCompiledGrammarTooLargeStrictError(
327
+ error: unknown,
328
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
329
+ ): boolean {
330
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
331
+ if (status !== 400) return false;
332
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
333
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
334
+ .join("\n");
335
+ return (
336
+ /invalid_request_error/i.test(messageParts) &&
337
+ /compiled grammar/i.test(messageParts) &&
338
+ /too large/i.test(messageParts)
339
+ );
340
+ }
341
+
342
+ // DeepSeek models leak chat-template special tokens (e.g. `<|tool_calls_begin|>`,
343
+ // `<|DSML|tool_calls|>`) into visible `content` deltas when hosted behind providers
344
+ // (such as NVIDIA NIM) that don't strip them server-side. The structured `tool_calls`
345
+ // payload is still emitted correctly — we only need to filter the leaked markers from
346
+ // user-visible text. Tokens use either fullwidth pipes (|, U+FF5C) or ASCII pipes.
347
+ // Body is restricted to identifier-like chars (with the DeepSeek tokenizer's `▁`),
348
+ // capped at a sane length to avoid swallowing legitimate angle-bracket text.
349
+ const DEEPSEEK_SPECIAL_TOKEN_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/g;
350
+ const DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX = /^\s*<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/;
351
+ const DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>\s*$/;
352
+ const DEEPSEEK_OPEN_DELIMS = ["<|", "<|"] as const;
353
+
354
+ function stripDeepseekSpecialTokens(text: string): string {
355
+ const stripped = text.replace(DEEPSEEK_SPECIAL_TOKEN_REGEX, "");
356
+ if (stripped === text) return text;
357
+
358
+ let normalized = stripped;
359
+ if (DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX.test(text)) normalized = normalized.replace(/^\s+/u, "");
360
+ if (DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX.test(text)) normalized = normalized.replace(/\s+$/u, "");
361
+ return normalized;
362
+ }
363
+
364
+ // Find any trailing partial `<|...` (or `<|...`) that has not yet been closed by a
365
+ // matching `|>`/`|>`, so it can be held back until the next chunk arrives. A solo
366
+ // trailing `<` is also held in case it is the start of a new token.
367
+ function getTrailingPartialDeepseekToken(text: string): string {
368
+ let bestIdx = -1;
369
+ for (const delim of DEEPSEEK_OPEN_DELIMS) {
370
+ const idx = text.lastIndexOf(delim);
371
+ if (idx > bestIdx) bestIdx = idx;
372
+ }
373
+ if (bestIdx === -1) {
374
+ return text.endsWith("<") ? "<" : "";
375
+ }
376
+ const tail = text.slice(bestIdx);
377
+ if (tail.includes("|>") || tail.includes("|>")) return "";
378
+ // Cap the held-back length so a stray `<|` in normal prose can't grow unboundedly.
379
+ if (tail.length > 256) return "";
380
+ return tail;
381
+ }
382
+ const OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE =
383
+ "OpenAI completions stream timed out while waiting for the first event";
384
+
385
+ const GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS = 600_000;
386
+ const GLM_CODING_PLAN_MODEL_PATTERN = /^glm-5(?:[.-]|$)/i;
387
+
388
+ /** Returns the widened OpenAI stream watchdog floor for slow GLM coding-plan reasoning models. */
389
+ export function getOpenAICompletionsStreamIdleTimeoutFallbackMs(
390
+ model: Model<"openai-completions">,
391
+ ): number | undefined {
392
+ if (!GLM_CODING_PLAN_MODEL_PATTERN.test(model.id)) return undefined;
393
+ if (model.provider === "zhipu-coding-plan" || model.provider === "zai")
394
+ return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
395
+
396
+ const baseUrl = model.baseUrl.toLowerCase();
397
+ if (baseUrl.includes("open.bigmodel.cn") || baseUrl.includes("api.z.ai")) {
398
+ return GLM_CODING_PLAN_STREAM_IDLE_TIMEOUT_MS;
399
+ }
400
+
401
+ return undefined;
402
+ }
403
+
404
+ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
405
+ model: Model<"openai-completions">,
406
+ context: Context,
407
+ options?: OpenAICompletionsOptions,
408
+ ): AssistantMessageEventStream => {
409
+ const stream = new AssistantMessageEventStream();
410
+
411
+ (async () => {
412
+ const startTime = Date.now();
413
+ let firstTokenTime: number | undefined;
414
+ let getCapturedErrorResponse: (() => CapturedHttpErrorResponse | undefined) | undefined;
415
+
416
+ const output: AssistantMessage = createInitialResponsesAssistantMessage(model.api, model.provider, model.id);
417
+ let rawRequestDump: RawHttpRequestDump | undefined;
418
+ const abortTracker = createAbortSourceTracker(options?.signal);
419
+ const firstEventTimeoutAbortError = new Error(OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE);
420
+ const { requestAbortController, requestSignal } = abortTracker;
421
+
422
+ try {
423
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
424
+ const idleTimeoutFallbackMs = getOpenAICompletionsStreamIdleTimeoutFallbackMs(model);
425
+ const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs(idleTimeoutFallbackMs);
426
+ const firstEventTimeoutMs =
427
+ options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
428
+ const requestTimeoutMs =
429
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
430
+ const {
431
+ client,
432
+ copilotPremiumRequests,
433
+ baseUrl,
434
+ requestHeaders,
435
+ getCapturedErrorResponse: captureErrorResponse,
436
+ clearCapturedErrorResponse,
437
+ } = await createClient(
438
+ model,
439
+ context,
440
+ apiKey,
441
+ options?.headers,
442
+ options?.initiatorOverride,
443
+ options?.onSseEvent,
444
+ options?.fetch,
445
+ );
446
+ const premiumRequestsTotal = copilotPremiumRequests;
447
+ getCapturedErrorResponse = captureErrorResponse;
448
+ let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
449
+ const providerSessionState = getOpenAICompletionsProviderSessionState(
450
+ model,
451
+ baseUrl,
452
+ options?.providerSessionState,
453
+ );
454
+ let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
455
+ let strictFallbackErrorMessage: string | undefined;
456
+ const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
457
+ clearCapturedErrorResponse();
458
+ const effectiveToolStrictModeOverride = disableStrictTools ? "none" : toolStrictModeOverride;
459
+ const { params, toolStrictMode } = buildParams(
460
+ model,
461
+ context,
462
+ options,
463
+ baseUrl,
464
+ effectiveToolStrictModeOverride,
465
+ );
466
+ appliedToolStrictMode = toolStrictMode;
467
+ options?.onPayload?.(params);
468
+ rawRequestDump = {
469
+ provider: model.provider,
470
+ api: output.api,
471
+ model: model.id,
472
+ method: "POST",
473
+ url: `${baseUrl}/chat/completions`,
474
+ headers: requestHeaders,
475
+ body: params,
476
+ };
477
+ const requestOptions =
478
+ requestTimeoutMs === undefined
479
+ ? { signal: requestSignal }
480
+ : { signal: requestSignal, timeout: requestTimeoutMs };
481
+ let requestTimeout: NodeJS.Timeout | undefined;
482
+ if (requestTimeoutMs !== undefined) {
483
+ requestTimeout = setTimeout(
484
+ () => abortTracker.abortLocally(firstEventTimeoutAbortError),
485
+ requestTimeoutMs,
486
+ );
487
+ }
488
+ try {
489
+ const { data, response, request_id } = await client.chat.completions
490
+ .create(params, requestOptions)
491
+ .withResponse();
492
+ await notifyProviderResponse(options, response, model, request_id);
493
+ return data;
494
+ } catch (error) {
495
+ if (error instanceof OpenAIConnectionTimeoutError && !abortTracker.wasCallerAbort()) {
496
+ throw firstEventTimeoutAbortError;
497
+ }
498
+ throw error;
499
+ } finally {
500
+ if (requestTimeout !== undefined) clearTimeout(requestTimeout);
501
+ }
502
+ };
503
+ let openaiStream: AsyncIterable<ChatCompletionChunk>;
504
+ try {
505
+ openaiStream = await callWithCopilotModelRetry(() => createCompletionsStream(), {
506
+ provider: model.provider,
507
+ signal: requestSignal,
508
+ });
509
+ } catch (error) {
510
+ const capturedErrorResponse = getCapturedErrorResponse();
511
+ if (
512
+ isOpenRouterAnthropicModel(model) &&
513
+ !disableStrictTools &&
514
+ isCompiledGrammarTooLargeStrictError(error, capturedErrorResponse)
515
+ ) {
516
+ strictFallbackErrorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
517
+ output.errorMessage = strictFallbackErrorMessage;
518
+ if (providerSessionState) {
519
+ providerSessionState.strictToolsDisabled = true;
520
+ }
521
+ disableStrictTools = true;
522
+ openaiStream = await createCompletionsStream("none");
523
+ } else {
524
+ if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
525
+ throw error;
526
+ }
527
+ openaiStream = await createCompletionsStream("none");
528
+ }
529
+ }
530
+ if (premiumRequestsTotal !== undefined) {
531
+ output.usage.premiumRequests = premiumRequestsTotal;
532
+ }
533
+ stream.push({ type: "start", partial: output });
534
+
535
+ const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
536
+ // Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
537
+ // native API) leak chat-template tool-call markers in `delta.content` even
538
+ // though tool calls are also surfaced structurally. Strip the leaked markers
539
+ // so users don't see raw `<|...|>` tokens.
540
+ const stripDeepseekChatTemplateTokens =
541
+ /deepseek/i.test(model.id) && (model.provider === "nvidia" || model.provider === "deepseek");
542
+ type ToolCallStreamBlock = ToolCall & { partialArgs?: string; streamIndex?: number; lastParseLen?: number };
543
+ type OpenAIStreamBlock = TextContent | ThinkingContent | ToolCallStreamBlock;
544
+ const pendingToolCallBlocks: ToolCallStreamBlock[] = [];
545
+ const toolCallBlockByIndex = new Map<number, ToolCallStreamBlock>();
546
+ let currentBlock: OpenAIStreamBlock | undefined;
547
+ const blockIndex = (block: OpenAIStreamBlock | undefined): number => {
548
+ if (!block) return Math.max(0, output.content.length - 1);
549
+ return output.content.indexOf(block);
550
+ };
551
+ const finishToolCallBlock = (block: ToolCallStreamBlock): void => {
552
+ if (block.partialArgs === undefined) return;
553
+ const contentIndex = blockIndex(block);
554
+ if (contentIndex < 0) return;
555
+ block.arguments = parseStreamingJson(block.partialArgs);
556
+ delete block.partialArgs;
557
+ delete block.lastParseLen;
558
+ if (block.streamIndex !== undefined) {
559
+ toolCallBlockByIndex.delete(block.streamIndex);
560
+ delete block.streamIndex;
561
+ }
562
+ const pendingIndex = pendingToolCallBlocks.indexOf(block);
563
+ if (pendingIndex >= 0) pendingToolCallBlocks.splice(pendingIndex, 1);
564
+ stream.push({ type: "toolcall_end", contentIndex, toolCall: block, partial: output });
565
+ };
566
+ const finishPendingToolCallBlocks = (): void => {
567
+ for (const block of [...pendingToolCallBlocks]) {
568
+ finishToolCallBlock(block);
569
+ }
570
+ };
571
+ const finishCurrentBlock = (block: OpenAIStreamBlock | undefined): void => {
572
+ if (!block) return;
573
+ const contentIndex = blockIndex(block);
574
+ if (contentIndex < 0) return;
575
+ if (block.type === "text") {
576
+ stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
577
+ return;
578
+ }
579
+ if (block.type === "thinking") {
580
+ stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
581
+ return;
582
+ }
583
+ finishToolCallBlock(block);
584
+ };
585
+ const appendText = (
586
+ message: AssistantMessage,
587
+ eventStream: AssistantMessageEventStream,
588
+ text: string,
589
+ ): void => {
590
+ if (currentBlock?.type !== "text") {
591
+ finishCurrentBlock(currentBlock);
592
+ currentBlock = { type: "text", text: "" };
593
+ message.content.push(currentBlock);
594
+ eventStream.push({ type: "text_start", contentIndex: blockIndex(currentBlock), partial: message });
595
+ }
596
+ currentBlock.text += text;
597
+ eventStream.push({
598
+ type: "text_delta",
599
+ contentIndex: blockIndex(currentBlock),
600
+ delta: text,
601
+ partial: message,
602
+ });
603
+ };
604
+ const appendThinking = (
605
+ message: AssistantMessage,
606
+ eventStream: AssistantMessageEventStream,
607
+ thinking: string,
608
+ signature?: string,
609
+ ): void => {
610
+ if (
611
+ currentBlock?.type !== "thinking" ||
612
+ (signature !== undefined && currentBlock.thinkingSignature !== signature)
613
+ ) {
614
+ finishCurrentBlock(currentBlock);
615
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: signature };
616
+ message.content.push(currentBlock);
617
+ eventStream.push({
618
+ type: "thinking_start",
619
+ contentIndex: blockIndex(currentBlock),
620
+ partial: message,
621
+ });
622
+ }
623
+ if (signature !== undefined && !currentBlock.thinkingSignature) {
624
+ currentBlock.thinkingSignature = signature;
625
+ }
626
+ currentBlock.thinking += thinking;
627
+ eventStream.push({
628
+ type: "thinking_delta",
629
+ contentIndex: blockIndex(currentBlock),
630
+ delta: thinking,
631
+ partial: message,
632
+ });
633
+ };
634
+
635
+ const appendTextDelta = (text: string): void => {
636
+ if (!text) return;
637
+ if (!firstTokenTime) firstTokenTime = Date.now();
638
+ appendText(output, stream, text);
639
+ };
640
+ const appendThinkingDelta = (thinking: string, signature?: string): void => {
641
+ if (!thinking) return;
642
+ if (!firstTokenTime) firstTokenTime = Date.now();
643
+ appendThinking(output, stream, thinking, signature);
644
+ };
645
+
646
+ let deepseekStripBuffer = "";
647
+ const flushDeepseekStripBuffer = (final: boolean): void => {
648
+ if (deepseekStripBuffer.length === 0) return;
649
+ let flushable: string;
650
+ if (final) {
651
+ flushable = deepseekStripBuffer;
652
+ deepseekStripBuffer = "";
653
+ } else {
654
+ const trailing = getTrailingPartialDeepseekToken(deepseekStripBuffer);
655
+ flushable = deepseekStripBuffer.slice(0, deepseekStripBuffer.length - trailing.length);
656
+ deepseekStripBuffer = trailing;
657
+ }
658
+ const stripped = stripDeepseekSpecialTokens(flushable);
659
+ if (stripped && (stripped === flushable || stripped.trim().length > 0)) appendTextDelta(stripped);
660
+ };
661
+ const appendProcessedText = (processedText: string): void => {
662
+ if (processedText.length === 0) return;
663
+ if (stripDeepseekChatTemplateTokens) {
664
+ deepseekStripBuffer += processedText;
665
+ flushDeepseekStripBuffer(false);
666
+ } else {
667
+ appendTextDelta(processedText);
668
+ }
669
+ };
670
+
671
+ const streamMarkupHealingPattern = getStreamMarkupHealingPattern(model.provider, model.id, {
672
+ parseThinkingTags: parseMiniMaxThinkTags,
673
+ });
674
+ const streamMarkupHealing = streamMarkupHealingPattern
675
+ ? new StreamMarkupHealing({ pattern: streamMarkupHealingPattern })
676
+ : undefined;
677
+ let healedToolCallEmitted = false;
678
+ const emitHealedToolCall = (call: HealedToolCall): void => {
679
+ finishCurrentBlock(currentBlock);
680
+ const block: ToolCall & { partialArgs: string } = {
681
+ type: "toolCall",
682
+ id: call.id,
683
+ name: call.name,
684
+ arguments: {},
685
+ partialArgs: call.arguments,
686
+ };
687
+ block.arguments = parseStreamingJson(call.arguments);
688
+ currentBlock = block;
689
+ output.content.push(block);
690
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(block), partial: output });
691
+ stream.push({
692
+ type: "toolcall_delta",
693
+ contentIndex: blockIndex(block),
694
+ delta: call.arguments,
695
+ partial: output,
696
+ });
697
+ finishCurrentBlock(block);
698
+ currentBlock = undefined;
699
+ healedToolCallEmitted = true;
700
+ };
701
+ const emitHealingEvent = (event: StreamMarkupHealingEvent): void => {
702
+ if (event.type === "text") {
703
+ appendProcessedText(event.text);
704
+ } else if (event.type === "thinking") {
705
+ appendThinkingDelta(event.thinking);
706
+ } else {
707
+ emitHealedToolCall(event.call);
708
+ }
709
+ };
710
+ const flushHealedToolCalls = (): void => {
711
+ if (!streamMarkupHealing) return;
712
+ const calls = streamMarkupHealing.drainCompleted();
713
+ for (const call of calls) emitHealedToolCall(call);
714
+ };
715
+
716
+ for await (const chunk of iterateWithIdleTimeout(openaiStream, {
717
+ idleTimeoutMs,
718
+ firstItemTimeoutMs: firstEventTimeoutMs,
719
+ firstItemErrorMessage: OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE,
720
+ errorMessage: "OpenAI completions stream stalled while waiting for the next event",
721
+ onIdle: () => requestAbortController.abort(),
722
+ onFirstItemTimeout: () => abortTracker.abortLocally(firstEventTimeoutAbortError),
723
+ abortSignal: options?.signal,
724
+ isProgressItem: isOpenAICompletionsProgressChunk,
725
+ })) {
726
+ if (!chunk || typeof chunk !== "object") continue;
727
+
728
+ // OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
729
+ // and each chunk in a streamed completion carries the same id.
730
+ output.responseId ||= chunk.id;
731
+
732
+ if (chunk.usage) {
733
+ output.usage = parseChunkUsage(chunk.usage, model, premiumRequestsTotal);
734
+ }
735
+
736
+ const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
737
+ if (!choice) continue;
738
+
739
+ if (!chunk.usage) {
740
+ const choiceUsage = getChoiceUsage(choice);
741
+ if (choiceUsage) {
742
+ output.usage = parseChunkUsage(choiceUsage, model, premiumRequestsTotal);
743
+ }
744
+ }
745
+
746
+ if (choice.finish_reason) {
747
+ const finishReasonResult = mapStopReason(choice.finish_reason);
748
+ output.stopReason = finishReasonResult.stopReason;
749
+ if (finishReasonResult.errorMessage) {
750
+ output.errorMessage = finishReasonResult.errorMessage;
751
+ }
752
+ }
753
+
754
+ if (choice.delta) {
755
+ const normalizedDeltaText = normalizeStreamingContentText(choice.delta.content);
756
+ if (normalizedDeltaText.length > 0) {
757
+ if (!firstTokenTime) firstTokenTime = Date.now();
758
+ const hasStructuredToolCalls =
759
+ Array.isArray(choice.delta.tool_calls) && choice.delta.tool_calls.length > 0;
760
+
761
+ if (streamMarkupHealing) {
762
+ if (hasStructuredToolCalls) {
763
+ // Same chunk leaks markers AND carries structured tool_calls.
764
+ // Strip the marker text from visible output, but drop any
765
+ // synthesized calls so the structured payload stays the
766
+ // single source of truth (avoids double-dispatch).
767
+ appendProcessedText(streamMarkupHealing.consumeWithoutCalls(normalizedDeltaText));
768
+ } else {
769
+ for (const event of streamMarkupHealing.feedEvents(normalizedDeltaText)) {
770
+ emitHealingEvent(event);
771
+ }
772
+ }
773
+ } else {
774
+ appendProcessedText(normalizedDeltaText);
775
+ }
776
+ }
777
+
778
+ // Some endpoints return reasoning in reasoning_content (llama.cpp),
779
+ // or reasoning (other openai compatible endpoints)
780
+ // Use the first non-empty reasoning field to avoid duplication
781
+ // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
782
+ const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
783
+ let foundReasoningField: string | null = null;
784
+ for (const field of reasoningFields) {
785
+ if (
786
+ (choice.delta as any)[field] !== null &&
787
+ (choice.delta as any)[field] !== undefined &&
788
+ (choice.delta as any)[field].length > 0
789
+ ) {
790
+ if (!foundReasoningField) {
791
+ foundReasoningField = field;
792
+ break;
793
+ }
794
+ }
795
+ }
796
+
797
+ if (foundReasoningField) {
798
+ const delta = (choice.delta as any)[foundReasoningField];
799
+ appendThinkingDelta(delta, foundReasoningField);
800
+ }
801
+
802
+ if (choice?.delta?.tool_calls && choice.delta.tool_calls.length > 0) {
803
+ for (const toolCall of choice.delta.tool_calls) {
804
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
805
+ let block = streamIndex !== undefined ? toolCallBlockByIndex.get(streamIndex) : undefined;
806
+ if (!block && toolCall.id) {
807
+ block = pendingToolCallBlocks.find(candidate => candidate.id === toolCall.id);
808
+ }
809
+ if (
810
+ !block &&
811
+ currentBlock?.type === "toolCall" &&
812
+ (!toolCall.id || currentBlock.id === toolCall.id)
813
+ ) {
814
+ block = currentBlock;
815
+ }
816
+
817
+ if (!block) {
818
+ if (currentBlock?.type !== "toolCall") {
819
+ finishCurrentBlock(currentBlock);
820
+ }
821
+ block = {
822
+ type: "toolCall",
823
+ id: toolCall.id || "",
824
+ name: toolCall.function?.name || "",
825
+ arguments: {},
826
+ partialArgs: "",
827
+ streamIndex,
828
+ };
829
+ if (streamIndex !== undefined) toolCallBlockByIndex.set(streamIndex, block);
830
+ pendingToolCallBlocks.push(block);
831
+ currentBlock = block;
832
+ output.content.push(block);
833
+ stream.push({
834
+ type: "toolcall_start",
835
+ contentIndex: blockIndex(block),
836
+ partial: output,
837
+ });
838
+ } else {
839
+ currentBlock = block;
840
+ if (streamIndex !== undefined && block.streamIndex === undefined) {
841
+ block.streamIndex = streamIndex;
842
+ toolCallBlockByIndex.set(streamIndex, block);
843
+ }
844
+ }
845
+
846
+ if (toolCall.id) block.id = toolCall.id;
847
+ if (toolCall.function?.name) block.name = toolCall.function.name;
848
+ let delta = "";
849
+ if (toolCall.function?.arguments) {
850
+ delta = toolCall.function.arguments;
851
+ block.partialArgs = (block.partialArgs ?? "") + toolCall.function.arguments;
852
+ const throttled = parseStreamingJsonThrottled(block.partialArgs, block.lastParseLen ?? 0);
853
+ if (throttled) {
854
+ block.arguments = throttled.value;
855
+ block.lastParseLen = throttled.parsedLen;
856
+ }
857
+ }
858
+ stream.push({
859
+ type: "toolcall_delta",
860
+ contentIndex: blockIndex(block),
861
+ delta,
862
+ partial: output,
863
+ });
864
+ }
865
+ }
866
+
867
+ const reasoningDetails = (choice.delta as any).reasoning_details;
868
+ if (reasoningDetails && Array.isArray(reasoningDetails)) {
869
+ for (const detail of reasoningDetails) {
870
+ if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
871
+ const matchingToolCall = output.content.find(
872
+ b => b.type === "toolCall" && b.id === detail.id,
873
+ ) as ToolCall | undefined;
874
+ if (matchingToolCall) {
875
+ matchingToolCall.thoughtSignature = JSON.stringify(detail);
876
+ }
877
+ }
878
+ }
879
+ }
880
+ }
881
+ }
882
+
883
+ if (streamMarkupHealing) {
884
+ for (const event of streamMarkupHealing.flushEvents()) {
885
+ emitHealingEvent(event);
886
+ }
887
+ flushHealedToolCalls();
888
+ if (healedToolCallEmitted && output.stopReason === "stop") {
889
+ // Hosts that leak tool-call templates often still report
890
+ // `finish_reason: stop` for the surrounding turn. Promote
891
+ // only that natural-completion finish — leave `error`,
892
+ // `length`, `aborted`, etc. untouched.
893
+ output.stopReason = "toolUse";
894
+ }
895
+ }
896
+
897
+ if (stripDeepseekChatTemplateTokens) {
898
+ flushDeepseekStripBuffer(true);
899
+ }
900
+
901
+ if (currentBlock?.type === "toolCall") {
902
+ finishPendingToolCallBlocks();
903
+ } else {
904
+ finishCurrentBlock(currentBlock);
905
+ finishPendingToolCallBlocks();
906
+ }
907
+
908
+ // Some OpenAI-compatible hosts stream structured `tool_calls` but report
909
+ // `finish_reason: "stop"` instead of `"tool_calls"`. In the OpenAI contract a
910
+ // tool call always means "execute and continue", so promote that
911
+ // natural-completion finish to `toolUse` whenever the turn produced tool-call
912
+ // blocks — the agent loop gates execution on the stop reason. `error`,
913
+ // `length`, and `aborted` are intentionally left untouched. (Anthropic's
914
+ // distinct `end_turn`-with-tool-calls "abandon" semantics live in its own
915
+ // provider and correctly keep `stop`.)
916
+ if (output.stopReason === "stop" && output.content.some(b => b.type === "toolCall")) {
917
+ output.stopReason = "toolUse";
918
+ }
919
+
920
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
921
+ if (firstEventTimeoutError) {
922
+ throw firstEventTimeoutError;
923
+ }
924
+ if (abortTracker.wasCallerAbort()) {
925
+ throw new Error("Request was aborted");
926
+ }
927
+
928
+ if (output.stopReason === "aborted") {
929
+ throw new Error("Request was aborted");
930
+ }
931
+ if (output.stopReason === "error") {
932
+ throw new Error(output.errorMessage || "Provider returned an error stop reason");
933
+ }
934
+
935
+ output.errorMessage = strictFallbackErrorMessage;
936
+ output.duration = Date.now() - startTime;
937
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
938
+ stream.push({ type: "done", reason: output.stopReason, message: output });
939
+ stream.end();
940
+ } catch (error) {
941
+ for (const block of output.content) delete (block as any).index;
942
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
943
+ output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
944
+ output.errorStatus = extractHttpStatusFromError(error) ?? getCapturedErrorResponse?.()?.status;
945
+ output.errorMessage =
946
+ firstEventTimeoutError?.message ??
947
+ (await finalizeErrorMessage(error, rawRequestDump, getCapturedErrorResponse?.()));
948
+ // Some providers via OpenRouter include extra details here.
949
+ const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
950
+ if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
951
+ output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
952
+ output.duration = Date.now() - startTime;
953
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
954
+ stream.push({ type: "error", reason: output.stopReason, error: output });
955
+ stream.end();
956
+ }
957
+ })();
958
+
959
+ return stream;
960
+ };
961
+
962
+ async function createClient(
963
+ model: Model<"openai-completions">,
964
+ context: Context,
965
+ apiKey?: string,
966
+ extraHeaders?: Record<string, string>,
967
+ initiatorOverride?: MessageAttribution,
968
+ onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
969
+ fetchOverride?: FetchImpl,
970
+ ): Promise<{
971
+ client: OpenAI;
972
+ copilotPremiumRequests: number | undefined;
973
+ baseUrl: string | undefined;
974
+ requestHeaders: Record<string, string>;
975
+ getCapturedErrorResponse: () => CapturedHttpErrorResponse | undefined;
976
+ clearCapturedErrorResponse: () => void;
977
+ }> {
978
+ if (!apiKey) {
979
+ if (!$env.OPENAI_API_KEY) {
980
+ throw new Error(
981
+ "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
982
+ );
983
+ }
984
+ apiKey = $env.OPENAI_API_KEY;
985
+ }
986
+ const rawApiKey = apiKey;
987
+
988
+ let headers = { ...model.headers };
989
+ if (model.provider === "openrouter") {
990
+ // App attribution — opts the agent into OpenRouter's public rankings and per-app
991
+ // analytics. `HTTP-Referer` is the unique app identifier; without it nothing is
992
+ // tracked. `X-OpenRouter-Title` is the display name (`X-Title` is the legacy
993
+ // alias kept for back-compat). `X-OpenRouter-Categories` slots us into the
994
+ // `cli-agent` marketplace category. `User-Agent` overrides the default OpenAI
995
+ // SDK UA so traffic is identifiable in upstream provider logs.
996
+ // https://openrouter.ai/docs/app-attribution
997
+ headers["User-Agent"] = `Aery/${packageJson.version}`;
998
+ headers["HTTP-Referer"] = "https://aery.dev/";
999
+ headers["X-OpenRouter-Title"] = "Aery";
1000
+ headers["X-OpenRouter-Categories"] = "cli-agent";
1001
+ // Always-on response caching: identical requests return cached responses for free.
1002
+ // TTL 1h; first call hits the provider, every identical call within the window
1003
+ // replays from OpenRouter's edge cache. https://openrouter.ai/docs/features/response-caching
1004
+ headers["X-OpenRouter-Cache"] = "true";
1005
+ headers["X-OpenRouter-Cache-TTL"] = "3600";
1006
+ }
1007
+ Object.assign(headers, extraHeaders);
1008
+ if (model.provider === "kimi-code") {
1009
+ headers = { ...getKimiCommonHeaders(), ...headers };
1010
+ }
1011
+ let copilotPremiumRequests: number | undefined;
1012
+
1013
+ let baseUrl = model.baseUrl;
1014
+ if (model.provider === "github-copilot") {
1015
+ apiKey = parseGitHubCopilotApiKey(rawApiKey).accessToken;
1016
+ const hasImages = hasCopilotVisionInput(context.messages);
1017
+ const copilot = buildCopilotDynamicHeaders({
1018
+ messages: context.messages,
1019
+ hasImages,
1020
+ premiumMultiplier: model.premiumMultiplier,
1021
+ headers,
1022
+ initiatorOverride,
1023
+ });
1024
+ Object.assign(headers, copilot.headers);
1025
+ copilotPremiumRequests = copilot.premiumRequests;
1026
+ baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
1027
+ }
1028
+ // Azure OpenAI requires /deployments/{id}/chat/completions?api-version=YYYY-MM-DD.
1029
+ // The generic openai-completions path adds neither, producing silent 404s.
1030
+ let azureDefaultQuery: Record<string, string> | undefined;
1031
+ if (baseUrl?.includes(".openai.azure.com")) {
1032
+ const apiVersion = $env.AZURE_OPENAI_API_VERSION || "2024-10-21";
1033
+ if (!baseUrl.includes("/deployments/")) {
1034
+ baseUrl = `${baseUrl}/deployments/${model.id}`;
1035
+ }
1036
+ azureDefaultQuery = { "api-version": apiVersion };
1037
+ }
1038
+ let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
1039
+ const baseFetch = fetchOverride ?? fetch;
1040
+ const wrappedFetch = Object.assign(
1041
+ async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
1042
+ const response = await baseFetch(input, init);
1043
+ if (response.ok) {
1044
+ capturedErrorResponse = undefined;
1045
+ return response;
1046
+ }
1047
+ let bodyText: string | undefined;
1048
+ let bodyJson: unknown;
1049
+ try {
1050
+ bodyText = await response.clone().text();
1051
+ if (bodyText.trim().length > 0) {
1052
+ try {
1053
+ bodyJson = JSON.parse(bodyText);
1054
+ } catch {}
1055
+ }
1056
+ } catch {}
1057
+ capturedErrorResponse = {
1058
+ status: response.status,
1059
+ headers: response.headers,
1060
+ bodyText,
1061
+ bodyJson,
1062
+ };
1063
+ return response;
1064
+ },
1065
+ baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
1066
+ );
1067
+ const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
1068
+ return {
1069
+ client: new OpenAI({
1070
+ apiKey,
1071
+ baseURL: baseUrl,
1072
+ dangerouslyAllowBrowser: true,
1073
+ maxRetries: 5,
1074
+ defaultHeaders: headers,
1075
+ defaultQuery: azureDefaultQuery,
1076
+ fetch: debugFetch,
1077
+ }),
1078
+ copilotPremiumRequests,
1079
+ baseUrl,
1080
+ requestHeaders: headers,
1081
+ getCapturedErrorResponse: () => capturedErrorResponse,
1082
+ clearCapturedErrorResponse: () => {
1083
+ capturedErrorResponse = undefined;
1084
+ },
1085
+ };
1086
+ }
1087
+
1088
+ function buildParams(
1089
+ model: Model<"openai-completions">,
1090
+ context: Context,
1091
+ options: OpenAICompletionsOptions | undefined,
1092
+ resolvedBaseUrl?: string,
1093
+ toolStrictModeOverride?: ToolStrictModeOverride,
1094
+ ): { params: OpenAICompletionsParams; toolStrictMode: AppliedToolStrictMode } {
1095
+ const compat = getCompat(model, resolvedBaseUrl);
1096
+ // Opencode Zen's gateway (https://opencode.ai/zen/go/v1) gates
1097
+ // `reasoning_content` on the request's thinking state for every model it
1098
+ // fronts (Kimi K2.x, DeepSeek V4, GLM-5.x, Qwen3.x, MiMo, MiniMax, …): it
1099
+ // 400s with `Extra inputs are not permitted` when thinking is off but the
1100
+ // field is supplied (#1071), and 400s with `thinking is enabled but
1101
+ // reasoning_content is missing in assistant tool call message at index N`
1102
+ // (#1484) when thinking is on and the field is absent. `detectOpenAICompat`
1103
+ // only set `requiresReasoningContentForToolCalls` for the DeepSeek family
1104
+ // (and previously for Kimi until #1071 carved out opencode); reactivate it
1105
+ // per request for every opencode model whenever this turn is in thinking
1106
+ // mode so prior tool-call turns replay reasoning_content. Forced-tool
1107
+ // turns are excluded because the later `disableReasoningOnForcedToolChoice`
1108
+ // guard at the bottom of `buildParams` strips thinking from the wire body
1109
+ // for Kimi-style models — keeping the replay on under those conditions
1110
+ // would resurrect the #1071 failure.
1111
+ //
1112
+ // `allowsSyntheticReasoningContentForToolCalls` is forced to `false` on
1113
+ // the same path: the gateway specifically requires `reasoning_content`,
1114
+ // and the default synthetic-friendly behavior would echo whichever field
1115
+ // the upstream streamed (e.g. `reasoning` for many opencode turns),
1116
+ // landing the replay in the wrong key and re-triggering the 400.
1117
+ const isOpenCodeProvider = model.provider === "opencode-go" || model.provider === "opencode-zen";
1118
+ const thinkingEnabledForRequest =
1119
+ Boolean(options?.reasoning) && !options?.disableReasoning && Boolean(model.reasoning);
1120
+ const forcedToolChoiceSuppressesThinking =
1121
+ compat.disableReasoningOnForcedToolChoice &&
1122
+ isForcedToolChoice(mapToOpenAICompletionsToolChoice(options?.toolChoice));
1123
+ if (isOpenCodeProvider && thinkingEnabledForRequest && !forcedToolChoiceSuppressesThinking) {
1124
+ compat.requiresReasoningContentForToolCalls = true;
1125
+ compat.allowsSyntheticReasoningContentForToolCalls = false;
1126
+ compat.reasoningContentField = "reasoning_content";
1127
+ }
1128
+ const isKimiModelId = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
1129
+ const messages = convertMessages(model, context, compat);
1130
+ maybeAddOpenRouterAnthropicCacheControl(model, messages);
1131
+ const supportsReasoningParams = model.provider !== "github-copilot";
1132
+
1133
+ // Kimi (including via OpenRouter and Fireworks router-form IDs such as
1134
+ // `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
1135
+ // max_tokens, not actual output. The official Kimi K2 model guidance
1136
+ // (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
1137
+ // every call since the family can otherwise emit very long reasoning traces
1138
+ // before the final answer. Always send max_tokens — match the same
1139
+ // Kimi-family regex used by the compat detector.
1140
+ // Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
1141
+ const effectiveMaxTokens = options?.maxTokens ?? (isKimiModelId ? model.maxTokens : undefined);
1142
+
1143
+ const requestModelId = resolveOpenAICompletionsModelId(model, options);
1144
+ const params: OpenAICompletionsParams = {
1145
+ model: requestModelId,
1146
+ messages,
1147
+ stream: true,
1148
+ };
1149
+ let toolStrictMode: AppliedToolStrictMode = "none";
1150
+
1151
+ if (compat.supportsUsageInStreaming !== false) {
1152
+ params.stream_options = { include_usage: true };
1153
+ }
1154
+
1155
+ if (compat.supportsStore) {
1156
+ params.store = false;
1157
+ }
1158
+
1159
+ if (effectiveMaxTokens) {
1160
+ if (compat.maxTokensField === "max_tokens") {
1161
+ params.max_tokens = effectiveMaxTokens;
1162
+ } else {
1163
+ params.max_completion_tokens = effectiveMaxTokens;
1164
+ }
1165
+ }
1166
+
1167
+ if (options?.temperature !== undefined) {
1168
+ params.temperature = options.temperature;
1169
+ }
1170
+ if (options?.topP !== undefined) {
1171
+ params.top_p = options.topP;
1172
+ }
1173
+ if (options?.topK !== undefined) {
1174
+ params.top_k = options.topK;
1175
+ }
1176
+ if (options?.minP !== undefined) {
1177
+ params.min_p = options.minP;
1178
+ }
1179
+ if (options?.presencePenalty !== undefined) {
1180
+ params.presence_penalty = options.presencePenalty;
1181
+ }
1182
+ if (options?.repetitionPenalty !== undefined) {
1183
+ params.repetition_penalty = options.repetitionPenalty;
1184
+ }
1185
+ if (options?.stopSequences?.length) {
1186
+ const seqs = options.stopSequences;
1187
+ params.stop = seqs.length === 1 ? seqs[0] : seqs.slice(0, 4);
1188
+ }
1189
+ if (options?.frequencyPenalty !== undefined) {
1190
+ params.frequency_penalty = options.frequencyPenalty;
1191
+ }
1192
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
1193
+ const resolved = resolveServiceTier(options?.serviceTier, model.provider);
1194
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
1195
+ params.service_tier = resolved;
1196
+ }
1197
+ }
1198
+
1199
+ if (context.tools?.length) {
1200
+ const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
1201
+ params.tools = builtTools.tools;
1202
+ toolStrictMode = builtTools.toolStrictMode;
1203
+ } else if (context.tools === undefined && hasToolHistory(context.messages)) {
1204
+ // Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
1205
+ // contains tool_calls/tool_results, even when no tools are offered this turn.
1206
+ // Only inject the sentinel when the caller passed `context.tools = undefined`
1207
+ // (i.e. tools were not specified at all). An explicit `context.tools = []` means
1208
+ // the caller opted out of tools for this turn (as /btw and IRC background replies
1209
+ // do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
1210
+ // so LiteLLM → Bedrock never sees an empty `toolConfig` block.
1211
+ params.tools = [];
1212
+ }
1213
+
1214
+ if (options?.toolChoice && compat.supportsToolChoice) {
1215
+ params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
1216
+ }
1217
+
1218
+ if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
1219
+ // `tool_choice: "none"` with no tools to gate is redundant and also
1220
+ // trips LiteLLM → Bedrock: the proxy serializes the directive into a
1221
+ // `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
1222
+ // non-empty whenever the conversation already holds `toolUse`/`toolResult`
1223
+ // content. Drop it whenever the resolved tools list is missing or empty.
1224
+ // Side-channel turns hit this: `/btw` and IRC background replies route
1225
+ // through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
1226
+ // and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
1227
+ delete params.tool_choice;
1228
+ }
1229
+
1230
+ if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
1231
+ // Z.ai uses binary thinking: { type: "enabled" | "disabled" }
1232
+ // Must explicitly disable since z.ai defaults to thinking enabled.
1233
+ const enabled = options?.reasoning && !options?.disableReasoning;
1234
+ params.thinking = { type: enabled ? "enabled" : "disabled" };
1235
+ } else if (supportsReasoningParams && compat.thinkingFormat === "qwen" && model.reasoning) {
1236
+ // Qwen uses top-level enable_thinking: boolean
1237
+ params.enable_thinking = !!options?.reasoning && !options?.disableReasoning;
1238
+ } else if (supportsReasoningParams && compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
1239
+ params.chat_template_kwargs = {
1240
+ enable_thinking: !!options?.reasoning && !options?.disableReasoning,
1241
+ };
1242
+ } else if (supportsReasoningParams && compat.thinkingFormat === "openrouter" && model.reasoning) {
1243
+ // OpenRouter normalizes reasoning across providers via a nested reasoning object.
1244
+ // Without an explicit signal, OpenRouter defaults reasoning models to thinking, which
1245
+ // silently consumes the entire output budget on small `max_tokens` requests (e.g.
1246
+ // title generation). Honor `disableReasoning` to opt out cleanly.
1247
+ const openRouterParams = params as typeof params & {
1248
+ reasoning?: { effort?: string } | { enabled: false };
1249
+ };
1250
+ if (options?.disableReasoning) {
1251
+ openRouterParams.reasoning = { enabled: false };
1252
+ } else if (options?.reasoning) {
1253
+ openRouterParams.reasoning = {
1254
+ effort: mapReasoningEffort(options.reasoning, compat.reasoningEffortMap),
1255
+ };
1256
+ }
1257
+ } else if (
1258
+ supportsReasoningParams &&
1259
+ options?.reasoning &&
1260
+ !options?.disableReasoning &&
1261
+ model.reasoning &&
1262
+ compat.supportsReasoningEffort
1263
+ ) {
1264
+ // OpenAI-style reasoning_effort
1265
+ params.reasoning_effort = mapReasoningEffort(options.reasoning, compat.reasoningEffortMap) as Effort;
1266
+ } else if (
1267
+ supportsReasoningParams &&
1268
+ options?.disableReasoning &&
1269
+ !options?.reasoning &&
1270
+ model.reasoning &&
1271
+ compat.supportsReasoningEffort
1272
+ ) {
1273
+ // Generic OpenAI-compatible effort endpoints do not expose a true off
1274
+ // switch. Use the model's lowest supported effort as the closest
1275
+ // transport-level approximation when callers request disabled reasoning.
1276
+ const minEffort = getSupportedEfforts(model)[0];
1277
+ if (minEffort === undefined) {
1278
+ throw new Error(`Model ${model.provider}/${model.id} has no supported reasoning efforts`);
1279
+ }
1280
+ params.reasoning_effort = mapReasoningEffort(minEffort, compat.reasoningEffortMap) as Effort;
1281
+ }
1282
+
1283
+ if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
1284
+ // DeepSeek reasoning models accept tools/tool_choice, but reject that
1285
+ // control field while thinking is enabled. Keep the tool-selection
1286
+ // contract and suppress reasoning for this single request.
1287
+ delete params.reasoning_effort;
1288
+ delete params.reasoning;
1289
+ }
1290
+
1291
+ if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
1292
+ // Backends like Kimi 400 with `tool_choice 'specified' is incompatible
1293
+ // with thinking enabled`. Suppress thinking for this single forced-tool
1294
+ // turn while keeping the tool-selection contract intact.
1295
+ delete params.reasoning_effort;
1296
+ delete params.reasoning;
1297
+ if (compat.thinkingFormat === "zai") {
1298
+ params.thinking = { type: "disabled" };
1299
+ }
1300
+ }
1301
+
1302
+ // OpenRouter provider routing preferences
1303
+ if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
1304
+ params.provider = compat.openRouterRouting;
1305
+ }
1306
+
1307
+ // Vercel AI Gateway provider routing preferences
1308
+ if (model.baseUrl.includes("ai-gateway.vercel.sh") && model.compat?.vercelGatewayRouting) {
1309
+ const routing = model.compat.vercelGatewayRouting;
1310
+ if (routing.only || routing.order) {
1311
+ const gatewayOptions: Record<string, string[]> = {};
1312
+ if (routing.only) gatewayOptions.only = routing.only;
1313
+ if (routing.order) gatewayOptions.order = routing.order;
1314
+ params.providerOptions = { gateway: gatewayOptions };
1315
+ }
1316
+ }
1317
+
1318
+ if (compat.extraBody) {
1319
+ Object.assign(params, compat.extraBody);
1320
+ }
1321
+
1322
+ return { params, toolStrictMode };
1323
+ }
1324
+
1325
+ function getOptionalNumberProperty(value: object, key: string): number | undefined {
1326
+ const property = Reflect.get(value, key);
1327
+ return typeof property === "number" ? property : undefined;
1328
+ }
1329
+
1330
+ function getOptionalObjectProperty(value: object, key: string): object | undefined {
1331
+ const property = Reflect.get(value, key);
1332
+ return typeof property === "object" && property !== null ? property : undefined;
1333
+ }
1334
+
1335
+ function getChoiceUsage(choice: ChatCompletionChunk.Choice): object | undefined {
1336
+ return getOptionalObjectProperty(choice, "usage");
1337
+ }
1338
+
1339
+ export function parseChunkUsage(
1340
+ rawUsage: object,
1341
+ model: Model<"openai-completions">,
1342
+ premiumRequests: number | undefined,
1343
+ ): AssistantMessage["usage"] {
1344
+ const promptTokenDetails = getOptionalObjectProperty(rawUsage, "prompt_tokens_details");
1345
+ const completionTokenDetails = getOptionalObjectProperty(rawUsage, "completion_tokens_details");
1346
+ const cachedTokens =
1347
+ getOptionalNumberProperty(rawUsage, "cached_tokens") ??
1348
+ getOptionalNumberProperty(rawUsage, "prompt_cache_hit_tokens") ??
1349
+ (promptTokenDetails ? getOptionalNumberProperty(promptTokenDetails, "cached_tokens") : undefined) ??
1350
+ 0;
1351
+ // OpenRouter exposes cache writes via `prompt_tokens_details.cache_write_tokens`
1352
+ // and INCLUDES them in `prompt_tokens` — they are billed on top of the input, so
1353
+ // we subtract them to get the real billed input.
1354
+ // DeepSeek exposes cache hit/miss via `prompt_cache_hit_tokens` /
1355
+ // `prompt_cache_miss_tokens` at the top level where `prompt_tokens` equals their
1356
+ // sum. The miss portion IS the billed input — we must NOT subtract it.
1357
+ // Ref: https://openrouter.ai/docs/guides/best-practices/prompt-caching
1358
+ // Ref: https://api-docs.deepseek.com/api/create-chat-completion
1359
+ //
1360
+ // Resolve cacheWrite from both possible sources separately.
1361
+ // They have different billing semantics: OpenRouter's cache_write is billed
1362
+ // on top of prompt_tokens, while DeepSeek's miss IS the billed input.
1363
+ const cacheWriteOpenRouter = promptTokenDetails
1364
+ ? getOptionalNumberProperty(promptTokenDetails, "cache_write_tokens")
1365
+ : undefined;
1366
+ const cacheWriteDeepSeek = getOptionalNumberProperty(rawUsage, "prompt_cache_miss_tokens");
1367
+ // Prefer OpenRouter's value for the input subtraction; fall back to DeepSeek.
1368
+ const cacheWriteTokens = cacheWriteOpenRouter ?? cacheWriteDeepSeek ?? 0;
1369
+
1370
+ const reasoningTokens =
1371
+ (completionTokenDetails ? getOptionalNumberProperty(completionTokenDetails, "reasoning_tokens") : undefined) ?? 0;
1372
+ const promptTokens = getOptionalNumberProperty(rawUsage, "prompt_tokens") ?? 0;
1373
+
1374
+ const isDeepSeekNative =
1375
+ getOptionalNumberProperty(rawUsage, "prompt_cache_hit_tokens") !== undefined && cacheWriteDeepSeek !== undefined;
1376
+ // Only use the DeepSeek input path when cacheWrite came from DeepSeek's
1377
+ // miss field, not from prompt_tokens_details. Avoids false positives when
1378
+ // DeepSeek models route through OpenRouter (which may pass through native
1379
+ // fields alongside its own cache_write_tokens).
1380
+ const isDeepSeekUsage = isDeepSeekNative && cacheWriteOpenRouter === undefined && cacheWriteDeepSeek > 0;
1381
+ const input = isDeepSeekUsage
1382
+ ? Math.max(0, promptTokens - cachedTokens)
1383
+ : Math.max(0, promptTokens - cachedTokens - cacheWriteTokens);
1384
+ // Per OpenAI's CompletionUsage spec, `reasoning_tokens` is a subset of
1385
+ // `completion_tokens` (which is the total billed output). Adding them would
1386
+ // double-count.
1387
+ const outputTokens = getOptionalNumberProperty(rawUsage, "completion_tokens") ?? 0;
1388
+ // DeepSeek only exposes cache hit/miss (no cache-write data).
1389
+ // Emitting miss tokens as cacheWrite would make downstream consumers
1390
+ // double-count them (input already equals miss for DeepSeek).
1391
+ const emittedCacheWrite = isDeepSeekUsage ? 0 : cacheWriteTokens;
1392
+ const usage: AssistantMessage["usage"] = {
1393
+ input,
1394
+ output: outputTokens,
1395
+ cacheRead: cachedTokens,
1396
+ cacheWrite: emittedCacheWrite,
1397
+ totalTokens: input + outputTokens + cachedTokens + emittedCacheWrite,
1398
+ ...(reasoningTokens > 0 ? { reasoningTokens } : {}),
1399
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
1400
+ ...(premiumRequests !== undefined ? { premiumRequests } : {}),
1401
+ };
1402
+ calculateCost(model, usage);
1403
+ return usage;
1404
+ }
1405
+
1406
+ function mapReasoningEffort(
1407
+ effort: NonNullable<OpenAICompletionsOptions["reasoning"]>,
1408
+ reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoning"]>, string>>,
1409
+ ): string {
1410
+ return reasoningEffortMap[effort] ?? effort;
1411
+ }
1412
+
1413
+ function maybeAddOpenRouterAnthropicCacheControl(
1414
+ model: Model<"openai-completions">,
1415
+ messages: ChatCompletionMessageParam[],
1416
+ ): void {
1417
+ if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
1418
+
1419
+ // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
1420
+ // on the last user/assistant message (walking backwards until we find text content).
1421
+ for (let i = messages.length - 1; i >= 0; i--) {
1422
+ const msg = messages[i];
1423
+ if (msg.role !== "user" && msg.role !== "assistant" && msg.role !== "developer") continue;
1424
+
1425
+ const content = msg.content;
1426
+ if (typeof content === "string") {
1427
+ msg.content = [
1428
+ Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
1429
+ ];
1430
+ return;
1431
+ }
1432
+
1433
+ if (!Array.isArray(content)) continue;
1434
+
1435
+ // Find last text part and add cache_control
1436
+ for (let j = content.length - 1; j >= 0; j--) {
1437
+ const part = content[j];
1438
+ if (part?.type === "text") {
1439
+ Object.assign(part, { cache_control: { type: "ephemeral" } });
1440
+ return;
1441
+ }
1442
+ }
1443
+ }
1444
+ }
1445
+
1446
+ export function convertMessages(
1447
+ model: Model<"openai-completions">,
1448
+ context: Context,
1449
+ compat: ResolvedOpenAICompat,
1450
+ ): ChatCompletionMessageParam[] {
1451
+ const params: ChatCompletionMessageParam[] = [];
1452
+
1453
+ const normalizeToolCallId = (id: string): string => {
1454
+ if (compat.requiresMistralToolIds) return normalizeMistralToolId(id, true);
1455
+
1456
+ // Handle pipe-separated IDs from OpenAI Responses API
1457
+ // Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)
1458
+ // These come from providers like github-copilot, openai-codex, opencode
1459
+ // Extract just the call_id part and normalize it
1460
+ if (id.includes("|")) {
1461
+ const [callId] = id.split("|");
1462
+ // Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
1463
+ return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
1464
+ }
1465
+
1466
+ if (model.provider === "openai") return id.length > 40 ? id.slice(0, 40) : id;
1467
+ return id;
1468
+ };
1469
+ const transformedMessages = transformMessages(context.messages, model, id => normalizeToolCallId(id));
1470
+
1471
+ const remappedToolCallIds = new Map<string, string[]>();
1472
+ let generatedToolCallIdCounter = 0;
1473
+
1474
+ const generateFallbackToolCallId = (seed: string): string => {
1475
+ generatedToolCallIdCounter += 1;
1476
+ const hash = Bun.hash(`${model.provider}:${model.id}:${seed}:${generatedToolCallIdCounter}`).toString(36);
1477
+ return `call_${hash}`;
1478
+ };
1479
+
1480
+ const rememberToolCallId = (originalId: string, normalizedId: string): void => {
1481
+ const queue = remappedToolCallIds.get(originalId);
1482
+ if (queue) {
1483
+ queue.push(normalizedId);
1484
+ return;
1485
+ }
1486
+ remappedToolCallIds.set(originalId, [normalizedId]);
1487
+ };
1488
+
1489
+ const consumeToolCallId = (originalId: string): string | null => {
1490
+ const queue = remappedToolCallIds.get(originalId);
1491
+ if (!queue || queue.length === 0) return null;
1492
+ const nextId = queue.shift() ?? null;
1493
+ if (queue.length === 0) remappedToolCallIds.delete(originalId);
1494
+ return nextId;
1495
+ };
1496
+
1497
+ const ensureToolCallId = (rawId: string, seed: string): string => {
1498
+ const normalized = normalizeToolCallId(rawId);
1499
+ if (normalized.trim().length > 0) return normalized;
1500
+ return generateFallbackToolCallId(seed);
1501
+ };
1502
+
1503
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
1504
+ if (systemPrompts.length > 0) {
1505
+ const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
1506
+ const role = useDeveloperRole ? "developer" : "system";
1507
+ // Default to one block per ordered system prompt so the leading prefix
1508
+ // stays byte-identical between turns and the provider's KV cache can
1509
+ // reuse it. Hosts whose chat templates reject follow-up system messages
1510
+ // (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
1511
+ // via `compat.supportsMultipleSystemMessages = false`; in that mode we
1512
+ // coalesce into a single message joined by `\n\n`.
1513
+ if (compat.supportsMultipleSystemMessages) {
1514
+ for (const systemPrompt of systemPrompts) {
1515
+ params.push({ role, content: systemPrompt });
1516
+ }
1517
+ } else {
1518
+ params.push({ role, content: systemPrompts.join("\n\n") });
1519
+ }
1520
+ }
1521
+
1522
+ let lastRole: string | null = null;
1523
+
1524
+ for (let i = 0; i < transformedMessages.length; i++) {
1525
+ const msg = transformedMessages[i];
1526
+ // Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
1527
+ // Insert a synthetic assistant message to bridge the gap
1528
+ if (
1529
+ compat.requiresAssistantAfterToolResult &&
1530
+ lastRole === "toolResult" &&
1531
+ (msg.role === "user" || msg.role === "developer")
1532
+ ) {
1533
+ params.push({
1534
+ role: "assistant",
1535
+ content: "I have processed the tool results.",
1536
+ });
1537
+ }
1538
+
1539
+ const devAsUser = !compat.supportsDeveloperRole;
1540
+ if (msg.role === "user" || msg.role === "developer") {
1541
+ const role = !devAsUser && msg.role === "developer" ? "developer" : "user";
1542
+ if (typeof msg.content === "string") {
1543
+ const text = msg.content.toWellFormed();
1544
+ if (text.trim().length === 0) continue;
1545
+ params.push({
1546
+ role: role,
1547
+ content: text,
1548
+ });
1549
+ } else {
1550
+ const supportsImages = model.input.includes("image");
1551
+ const content: ChatCompletionContentPart[] = [];
1552
+ let omittedImages = false;
1553
+ for (const item of msg.content) {
1554
+ if (item.type === "text") {
1555
+ const text = item.text.toWellFormed();
1556
+ if (text.trim().length === 0) continue;
1557
+ content.push({
1558
+ type: "text",
1559
+ text,
1560
+ } satisfies ChatCompletionContentPartText);
1561
+ } else if (supportsImages) {
1562
+ content.push({
1563
+ type: "image_url",
1564
+ image_url: {
1565
+ url: `data:${item.mimeType};base64,${item.data}`,
1566
+ },
1567
+ } satisfies ChatCompletionContentPartImage);
1568
+ } else {
1569
+ omittedImages = true;
1570
+ }
1571
+ }
1572
+ if (omittedImages) {
1573
+ content.push({
1574
+ type: "text",
1575
+ text: NON_VISION_IMAGE_PLACEHOLDER,
1576
+ } satisfies ChatCompletionContentPartText);
1577
+ }
1578
+ if (content.length === 0) continue;
1579
+ params.push({
1580
+ role: "user",
1581
+ content,
1582
+ });
1583
+ }
1584
+ } else if (msg.role === "assistant") {
1585
+ const assistantMsg: ChatCompletionAssistantMessageParam = {
1586
+ role: "assistant",
1587
+ content: null,
1588
+ };
1589
+
1590
+ const textBlocks = msg.content.filter(b => b.type === "text") as TextContent[];
1591
+ // Filter out empty text blocks to avoid API validation errors
1592
+ const nonEmptyTextBlocks = textBlocks.filter(b => b.text && b.text.trim().length > 0);
1593
+ if (nonEmptyTextBlocks.length > 0) {
1594
+ // Always send assistant content as a plain string. Some OpenAI-compatible
1595
+ // backends mirror array-of-text-block payloads back to the model literally,
1596
+ // causing recursive nested content in subsequent turns.
1597
+ assistantMsg.content = nonEmptyTextBlocks.map(b => b.text.toWellFormed()).join("");
1598
+ }
1599
+
1600
+ // Handle thinking blocks
1601
+ const thinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
1602
+ // Filter out empty thinking blocks to avoid API validation errors
1603
+ const nonEmptyThinkingBlocks = thinkingBlocks.filter(b => b.thinking && b.thinking.trim().length > 0);
1604
+ if (nonEmptyThinkingBlocks.length > 0) {
1605
+ if (compat.requiresThinkingAsText) {
1606
+ // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
1607
+ const thinkingText = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n\n");
1608
+ const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
1609
+ if (textContent) {
1610
+ textContent.unshift({ type: "text", text: thinkingText });
1611
+ } else {
1612
+ assistantMsg.content = [{ type: "text", text: thinkingText }];
1613
+ }
1614
+ } else if (compat.requiresReasoningContentForToolCalls) {
1615
+ // Use the streamed signature when the backend accepts whichever
1616
+ // recognized field name was emitted (allowsSynthetic=true). Backends
1617
+ // like opencode-kimi-with-thinking and DeepSeek demand the exact
1618
+ // configured `reasoningContentField` instead, so honor that here
1619
+ // rather than echoing the upstream field name.
1620
+ const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
1621
+ const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
1622
+ const wireField =
1623
+ compat.allowsSyntheticReasoningContentForToolCalls &&
1624
+ signature &&
1625
+ recognizedFields.includes(signature)
1626
+ ? signature
1627
+ : signature && recognizedFields.includes(signature)
1628
+ ? (compat.reasoningContentField ?? "reasoning_content")
1629
+ : undefined;
1630
+ if (wireField) {
1631
+ (assistantMsg as any)[wireField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
1632
+ }
1633
+ }
1634
+ }
1635
+
1636
+ if (compat.requiresReasoningContentForToolCalls) {
1637
+ const streamedReasoningField = nonEmptyThinkingBlocks[0]?.thinkingSignature;
1638
+ const reasoningField =
1639
+ compat.allowsSyntheticReasoningContentForToolCalls &&
1640
+ (streamedReasoningField === "reasoning_content" ||
1641
+ streamedReasoningField === "reasoning" ||
1642
+ streamedReasoningField === "reasoning_text")
1643
+ ? streamedReasoningField
1644
+ : (compat.reasoningContentField ?? "reasoning_content");
1645
+ const reasoningContent = (assistantMsg as any)[reasoningField];
1646
+ if (!reasoningContent) {
1647
+ const reasoning = (assistantMsg as any).reasoning;
1648
+ const reasoningText = (assistantMsg as any).reasoning_text;
1649
+ if (reasoning && reasoningField !== "reasoning") {
1650
+ (assistantMsg as any)[reasoningField] = reasoning;
1651
+ } else if (reasoningText && reasoningField !== "reasoning_text") {
1652
+ (assistantMsg as any)[reasoningField] = reasoningText;
1653
+ } else if (nonEmptyThinkingBlocks.length > 0) {
1654
+ (assistantMsg as any)[reasoningField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
1655
+ }
1656
+ }
1657
+ }
1658
+
1659
+ const toolCalls = msg.content.filter(b => b.type === "toolCall") as ToolCall[];
1660
+ // Replay reasoning_content on assistant turns for backends that validate
1661
+ // thinking-mode history. DeepSeek V4 requires reasoning_content on EVERY
1662
+ // assistant turn once any prior turn included it — not just tool-call turns.
1663
+ // The replay logic has three tiers:
1664
+ // 1. Recover from thinking blocks with valid signatures (covers same-model replay
1665
+ // where nonEmptyThinkingBlocks may have filtered out empty-text blocks)
1666
+ // 2. For providers that require the field but returned no reasoning at all
1667
+ // (e.g. proxy-stripped reasoning_content), emit an empty string
1668
+ // 3. For providers that accept synthetic placeholders (Kimi, OpenRouter), emit "."
1669
+ // DeepSeek V4 rejects synthetic "." placeholders — it validates the exact value —
1670
+ // so the allowsSyntheticReasoningContentForToolCalls flag controls tier 3.
1671
+ const canUseSyntheticReasoningContent =
1672
+ compat.requiresReasoningContentForToolCalls &&
1673
+ compat.allowsSyntheticReasoningContentForToolCalls &&
1674
+ (compat.thinkingFormat === "openai" ||
1675
+ compat.thinkingFormat === "openrouter" ||
1676
+ compat.thinkingFormat === "zai");
1677
+ // DeepSeek-compatible reasoning models require reasoning_content on all
1678
+ // assistant turns. Providers that allow placeholders only need it on
1679
+ // tool-call turns.
1680
+ const needsReasoningOnAllTurns =
1681
+ compat.requiresReasoningContentForToolCalls && !compat.allowsSyntheticReasoningContentForToolCalls;
1682
+ const needsReasoningField = needsReasoningOnAllTurns || toolCalls.length > 0;
1683
+ let hasReasoningField =
1684
+ (assistantMsg as any).reasoning_content !== undefined ||
1685
+ (assistantMsg as any).reasoning !== undefined ||
1686
+ (assistantMsg as any).reasoning_text !== undefined;
1687
+ // Tier 1: Recover reasoning_content from ALL thinking blocks (including empty-text
1688
+ // ones) when the provider requires exact replay and rejects synthetic placeholders.
1689
+ // This covers the case where thinking blocks have valid signatures but were excluded
1690
+ // by the nonEmptyThinkingBlocks filter above, or where thinking text is empty but
1691
+ // the signature identifies the correct field name for replay.
1692
+ // Only recognized OpenAI-compat reasoning field names qualify — opaque signatures
1693
+ // from other providers (Anthropic encrypted, OpenAI Responses JSON, etc.) are not
1694
+ // valid property names for the wire message.
1695
+ if (
1696
+ needsReasoningField &&
1697
+ !hasReasoningField &&
1698
+ compat.requiresReasoningContentForToolCalls &&
1699
+ !compat.allowsSyntheticReasoningContentForToolCalls
1700
+ ) {
1701
+ const allThinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
1702
+ if (allThinkingBlocks.length > 0) {
1703
+ const signature = allThinkingBlocks[0].thinkingSignature;
1704
+ const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
1705
+ if (signature && recognizedFields.includes(signature)) {
1706
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1707
+ (assistantMsg as any)[reasoningField] = allThinkingBlocks.map(b => b.thinking).join("\n");
1708
+ hasReasoningField = true;
1709
+ }
1710
+ }
1711
+ }
1712
+ // Tier 2: When the provider requires reasoning_content but there are genuinely no
1713
+ // thinking blocks at all (e.g. proxy stripped reasoning_content from the response),
1714
+ // emit an empty string. The field must be present; an empty string is the most honest
1715
+ // representation of "no reasoning was captured."
1716
+ if (
1717
+ needsReasoningField &&
1718
+ !hasReasoningField &&
1719
+ compat.requiresReasoningContentForToolCalls &&
1720
+ !compat.allowsSyntheticReasoningContentForToolCalls
1721
+ ) {
1722
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1723
+ (assistantMsg as any)[reasoningField] = "";
1724
+ hasReasoningField = true;
1725
+ }
1726
+ // Tier 3: For providers that accept synthetic placeholders (Kimi, OpenRouter).
1727
+ if (toolCalls.length > 0 && canUseSyntheticReasoningContent && !hasReasoningField) {
1728
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1729
+ (assistantMsg as any)[reasoningField] = ".";
1730
+ hasReasoningField = true;
1731
+ }
1732
+ if (toolCalls.length > 0) {
1733
+ assistantMsg.tool_calls = toolCalls.map((tc, toolCallIndex) => {
1734
+ const toolCallId = ensureToolCallId(tc.id, `${i}:${toolCallIndex}:${tc.name}`);
1735
+ rememberToolCallId(tc.id, toolCallId);
1736
+ return {
1737
+ id: normalizeMistralToolId(toolCallId, compat.requiresMistralToolIds),
1738
+ type: "function" as const,
1739
+ function: {
1740
+ name: tc.name,
1741
+ arguments: serializeToolArguments(tc.arguments),
1742
+ },
1743
+ };
1744
+ });
1745
+ const reasoningDetails = toolCalls
1746
+ .filter(tc => tc.thoughtSignature)
1747
+ .map(tc => {
1748
+ try {
1749
+ return JSON.parse(tc.thoughtSignature!);
1750
+ } catch {
1751
+ return null;
1752
+ }
1753
+ })
1754
+ .filter(Boolean);
1755
+ if (reasoningDetails.length > 0) {
1756
+ (assistantMsg as any).reasoning_details = reasoningDetails;
1757
+ }
1758
+ }
1759
+ // Some OpenAI-compatible backends concatenate assistant content as a
1760
+ // string even for tool-call replay. OpenAI accepts an empty string here;
1761
+ // null trips strict/proxy implementations before the tool result is read.
1762
+ if (assistantMsg.content === null && (hasReasoningField || assistantMsg.tool_calls)) {
1763
+ assistantMsg.content = "";
1764
+ }
1765
+ // Skip assistant messages that have no content, no tool calls, and no reasoning payload.
1766
+ // Some OpenAI-compatible backends require replaying reasoning-only assistant turns
1767
+ // so follow-up requests preserve the provider-specific reasoning field name.
1768
+ const content = assistantMsg.content;
1769
+ const hasContent =
1770
+ content !== null &&
1771
+ content !== undefined &&
1772
+ (typeof content === "string" ? content.length > 0 : content.length > 0);
1773
+ if (!hasContent && assistantMsg.tool_calls && compat.requiresAssistantContentForToolCalls) {
1774
+ assistantMsg.content = ".";
1775
+ }
1776
+ if (!hasContent && !assistantMsg.tool_calls && !hasReasoningField) {
1777
+ continue;
1778
+ }
1779
+ params.push(assistantMsg);
1780
+ } else if (msg.role === "toolResult") {
1781
+ // Batch consecutive tool results and collect all images
1782
+ const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
1783
+ let j = i;
1784
+
1785
+ for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
1786
+ const toolMsg = transformedMessages[j] as ToolResultMessage;
1787
+
1788
+ // Extract text and image content
1789
+ const textResult = toolMsg.content
1790
+ .filter(c => c.type === "text")
1791
+ .map(c => (c as TextContent).text)
1792
+ .join("\n");
1793
+ const supportsImages = model.input.includes("image");
1794
+ const hasImages = toolMsg.content.some(c => c.type === "image");
1795
+ const omittedImages = hasImages && !supportsImages;
1796
+
1797
+ // Always send tool result with text (or placeholder if only images)
1798
+ const hasText = textResult.length > 0;
1799
+ const remappedToolCallId = consumeToolCallId(toolMsg.toolCallId);
1800
+ const resolvedToolCallId =
1801
+ remappedToolCallId ?? ensureToolCallId(toolMsg.toolCallId, `${j}:${toolMsg.toolName ?? "tool"}`);
1802
+ const toolResultContent = omittedImages
1803
+ ? joinTextWithImagePlaceholder(textResult, true)
1804
+ : hasText
1805
+ ? textResult
1806
+ : hasImages
1807
+ ? "(see attached image)"
1808
+ : "";
1809
+ const toolResultMsg: ChatCompletionToolMessageParam = {
1810
+ role: "tool",
1811
+ content: toolResultContent.toWellFormed(),
1812
+ tool_call_id: normalizeMistralToolId(resolvedToolCallId, compat.requiresMistralToolIds),
1813
+ };
1814
+ if (compat.requiresToolResultName && toolMsg.toolName) {
1815
+ (toolResultMsg as any).name = toolMsg.toolName;
1816
+ }
1817
+ params.push(toolResultMsg);
1818
+
1819
+ if (hasImages && supportsImages) {
1820
+ for (const block of toolMsg.content) {
1821
+ if (block.type === "image") {
1822
+ imageBlocks.push({
1823
+ type: "image_url",
1824
+ image_url: {
1825
+ url: `data:${block.mimeType};base64,${block.data}`,
1826
+ },
1827
+ });
1828
+ }
1829
+ }
1830
+ }
1831
+ }
1832
+
1833
+ i = j - 1;
1834
+
1835
+ // After all consecutive tool results, add a single user message with all images
1836
+ if (imageBlocks.length > 0) {
1837
+ if (compat.requiresAssistantAfterToolResult) {
1838
+ params.push({
1839
+ role: "assistant",
1840
+ content: "I have processed the tool results.",
1841
+ });
1842
+ }
1843
+
1844
+ params.push({
1845
+ role: "user",
1846
+ content: [
1847
+ {
1848
+ type: "text",
1849
+ text: "Attached image(s) from tool result:",
1850
+ },
1851
+ ...imageBlocks,
1852
+ ],
1853
+ });
1854
+ lastRole = "user";
1855
+ } else {
1856
+ lastRole = "toolResult";
1857
+ }
1858
+ continue;
1859
+ }
1860
+
1861
+ lastRole =
1862
+ msg.role === "developer"
1863
+ ? model.reasoning && compat.supportsDeveloperRole
1864
+ ? "developer"
1865
+ : "system"
1866
+ : msg.role;
1867
+ }
1868
+
1869
+ return params;
1870
+ }
1871
+
1872
+ function convertTools(
1873
+ tools: Tool[],
1874
+ compat: ResolvedOpenAICompat,
1875
+ toolStrictModeOverride?: ToolStrictModeOverride,
1876
+ ): BuiltOpenAICompletionTools {
1877
+ const adaptedTools = tools.map(tool => {
1878
+ const strict = !NO_STRICT && compat.supportsStrictMode !== false && tool.strict !== false;
1879
+ const baseParameters = toolWireSchema(tool);
1880
+ const adapted = adaptSchemaForStrict(baseParameters, strict);
1881
+ return {
1882
+ tool,
1883
+ baseParameters,
1884
+ parameters: adapted.schema,
1885
+ strict: adapted.strict,
1886
+ };
1887
+ });
1888
+
1889
+ const requestedStrictMode = toolStrictModeOverride ?? compat.toolStrictMode;
1890
+ const toolStrictMode =
1891
+ requestedStrictMode === "none"
1892
+ ? "none"
1893
+ : requestedStrictMode === "all_strict"
1894
+ ? adaptedTools.every(tool => tool.strict)
1895
+ ? "all_strict"
1896
+ : "none"
1897
+ : "mixed";
1898
+
1899
+ return {
1900
+ tools: adaptedTools.map(({ tool, baseParameters, parameters, strict }) => {
1901
+ const includeStrict = toolStrictMode === "all_strict" || (toolStrictMode === "mixed" && strict);
1902
+ return {
1903
+ type: "function",
1904
+ function: {
1905
+ name: tool.name,
1906
+ description: tool.description || "",
1907
+ parameters: includeStrict ? parameters : baseParameters,
1908
+ // Only include strict if provider supports it. Some reject unknown fields.
1909
+ ...(includeStrict && { strict: true }),
1910
+ },
1911
+ };
1912
+ }),
1913
+ toolStrictMode,
1914
+ };
1915
+ }
1916
+
1917
+ function shouldRetryWithoutStrictTools(
1918
+ error: unknown,
1919
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
1920
+ toolStrictMode: AppliedToolStrictMode,
1921
+ tools: Tool[] | undefined,
1922
+ ): boolean {
1923
+ if (!tools || tools.length === 0 || toolStrictMode !== "all_strict") {
1924
+ return false;
1925
+ }
1926
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
1927
+ if (status !== 400 && status !== 422) {
1928
+ return false;
1929
+ }
1930
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
1931
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
1932
+ .join("\n");
1933
+ return /wrong_api_format|mixed values for 'strict'|tool[s]?\b.*strict|\bstrict\b.*tool/i.test(messageParts);
1934
+ }
1935
+
1936
+ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
1937
+ stopReason: StopReason;
1938
+ errorMessage?: string;
1939
+ } {
1940
+ if (reason === null) return { stopReason: "stop" };
1941
+ switch (reason) {
1942
+ case "stop":
1943
+ case "end":
1944
+ return { stopReason: "stop" };
1945
+ case "length":
1946
+ return { stopReason: "length" };
1947
+ case "function_call":
1948
+ case "tool_calls":
1949
+ return { stopReason: "toolUse" };
1950
+ case "content_filter":
1951
+ return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
1952
+ case "network_error":
1953
+ return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
1954
+ default:
1955
+ return {
1956
+ stopReason: "error",
1957
+ errorMessage: `Provider finish_reason: ${reason}`,
1958
+ };
1959
+ }
1960
+ }
1961
+
1962
+ /**
1963
+ * Detect compatibility settings from provider and baseUrl for known providers.
1964
+ * Provider takes precedence over URL-based detection since it's explicitly configured.
1965
+ * Returns a fully resolved OpenAICompat object with all fields set.
1966
+ */
1967
+ export function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
1968
+ return detectOpenAICompat(model);
1969
+ }
1970
+
1971
+ /**
1972
+ * Get resolved compatibility settings for a model.
1973
+ * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
1974
+ * @param model - The model configuration
1975
+ * @param resolvedBaseUrl - Optional resolved base URL (e.g., after GitHub Copilot proxy-ep resolution).
1976
+ */
1977
+ function getCompat(model: Model<"openai-completions">, resolvedBaseUrl?: string): ResolvedOpenAICompat {
1978
+ return resolveOpenAICompat(model, resolvedBaseUrl);
1979
+ }