@gajae-code/ai 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/CHANGELOG.md +2644 -0
  2. package/README.md +1181 -0
  3. package/dist/types/api-registry.d.ts +30 -0
  4. package/dist/types/auth-broker/client.d.ts +66 -0
  5. package/dist/types/auth-broker/index.d.ts +5 -0
  6. package/dist/types/auth-broker/refresher.d.ts +25 -0
  7. package/dist/types/auth-broker/remote-store.d.ts +96 -0
  8. package/dist/types/auth-broker/server.d.ts +32 -0
  9. package/dist/types/auth-broker/types.d.ts +105 -0
  10. package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
  11. package/dist/types/auth-gateway/http.d.ts +39 -0
  12. package/dist/types/auth-gateway/index.d.ts +3 -0
  13. package/dist/types/auth-gateway/server.d.ts +17 -0
  14. package/dist/types/auth-gateway/types.d.ts +115 -0
  15. package/dist/types/auth-storage.d.ts +641 -0
  16. package/dist/types/cli.d.ts +2 -0
  17. package/dist/types/index.d.ts +49 -0
  18. package/dist/types/model-cache.d.ts +17 -0
  19. package/dist/types/model-manager.d.ts +62 -0
  20. package/dist/types/model-thinking.d.ts +71 -0
  21. package/dist/types/models.d.ts +12 -0
  22. package/dist/types/provider-details.d.ts +24 -0
  23. package/dist/types/provider-models/bundled-references.d.ts +4 -0
  24. package/dist/types/provider-models/descriptors.d.ts +48 -0
  25. package/dist/types/provider-models/google.d.ts +20 -0
  26. package/dist/types/provider-models/index.d.ts +5 -0
  27. package/dist/types/provider-models/ollama.d.ts +7 -0
  28. package/dist/types/provider-models/openai-compat.d.ts +237 -0
  29. package/dist/types/provider-models/special.d.ts +16 -0
  30. package/dist/types/providers/amazon-bedrock.d.ts +36 -0
  31. package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
  32. package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
  33. package/dist/types/providers/anthropic.d.ts +188 -0
  34. package/dist/types/providers/aws-credentials.d.ts +43 -0
  35. package/dist/types/providers/aws-eventstream.d.ts +38 -0
  36. package/dist/types/providers/aws-sigv4.d.ts +55 -0
  37. package/dist/types/providers/azure-openai-responses.d.ts +15 -0
  38. package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
  39. package/dist/types/providers/cursor.d.ts +42 -0
  40. package/dist/types/providers/error-message.d.ts +27 -0
  41. package/dist/types/providers/github-copilot-headers.d.ts +40 -0
  42. package/dist/types/providers/gitlab-duo.d.ts +27 -0
  43. package/dist/types/providers/google-auth.d.ts +24 -0
  44. package/dist/types/providers/google-gemini-cli.d.ts +72 -0
  45. package/dist/types/providers/google-gemini-headers.d.ts +18 -0
  46. package/dist/types/providers/google-shared.d.ts +163 -0
  47. package/dist/types/providers/google-types.d.ts +138 -0
  48. package/dist/types/providers/google-vertex.d.ts +7 -0
  49. package/dist/types/providers/google.d.ts +4 -0
  50. package/dist/types/providers/grammar.d.ts +1 -0
  51. package/dist/types/providers/kimi.d.ts +27 -0
  52. package/dist/types/providers/mock.d.ts +175 -0
  53. package/dist/types/providers/ollama.d.ts +6 -0
  54. package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
  55. package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
  56. package/dist/types/providers/openai-chat-server.d.ts +16 -0
  57. package/dist/types/providers/openai-codex/constants.d.ts +26 -0
  58. package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
  59. package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
  60. package/dist/types/providers/openai-codex-responses.d.ts +67 -0
  61. package/dist/types/providers/openai-completions-compat.d.ts +25 -0
  62. package/dist/types/providers/openai-completions.d.ts +33 -0
  63. package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
  64. package/dist/types/providers/openai-responses-server.d.ts +17 -0
  65. package/dist/types/providers/openai-responses-shared.d.ts +89 -0
  66. package/dist/types/providers/openai-responses.d.ts +32 -0
  67. package/dist/types/providers/pi-native-client.d.ts +13 -0
  68. package/dist/types/providers/pi-native-server.d.ts +68 -0
  69. package/dist/types/providers/register-builtins.d.ts +31 -0
  70. package/dist/types/providers/synthetic.d.ts +26 -0
  71. package/dist/types/providers/transform-messages.d.ts +12 -0
  72. package/dist/types/providers/vision-guard.d.ts +8 -0
  73. package/dist/types/rate-limit-utils.d.ts +19 -0
  74. package/dist/types/stream.d.ts +24 -0
  75. package/dist/types/types.d.ts +746 -0
  76. package/dist/types/usage/claude.d.ts +3 -0
  77. package/dist/types/usage/gemini.d.ts +2 -0
  78. package/dist/types/usage/github-copilot.d.ts +7 -0
  79. package/dist/types/usage/google-antigravity.d.ts +2 -0
  80. package/dist/types/usage/kimi.d.ts +2 -0
  81. package/dist/types/usage/minimax-code.d.ts +2 -0
  82. package/dist/types/usage/openai-codex.d.ts +3 -0
  83. package/dist/types/usage/shared.d.ts +1 -0
  84. package/dist/types/usage/zai.d.ts +2 -0
  85. package/dist/types/usage.d.ts +258 -0
  86. package/dist/types/utils/abort.d.ts +19 -0
  87. package/dist/types/utils/anthropic-auth.d.ts +31 -0
  88. package/dist/types/utils/discovery/antigravity.d.ts +61 -0
  89. package/dist/types/utils/discovery/codex.d.ts +38 -0
  90. package/dist/types/utils/discovery/cursor.d.ts +23 -0
  91. package/dist/types/utils/discovery/gemini.d.ts +25 -0
  92. package/dist/types/utils/discovery/index.d.ts +4 -0
  93. package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
  94. package/dist/types/utils/event-stream.d.ts +28 -0
  95. package/dist/types/utils/fireworks-model-id.d.ts +10 -0
  96. package/dist/types/utils/foundry.d.ts +1 -0
  97. package/dist/types/utils/h2-fetch.d.ts +22 -0
  98. package/dist/types/utils/http-inspector.d.ts +31 -0
  99. package/dist/types/utils/idle-iterator.d.ts +67 -0
  100. package/dist/types/utils/json-parse.d.ts +10 -0
  101. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
  102. package/dist/types/utils/oauth/anthropic.d.ts +22 -0
  103. package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
  104. package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
  105. package/dist/types/utils/oauth/callback-server.d.ts +57 -0
  106. package/dist/types/utils/oauth/cerebras.d.ts +1 -0
  107. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
  108. package/dist/types/utils/oauth/cursor.d.ts +15 -0
  109. package/dist/types/utils/oauth/deepseek.d.ts +10 -0
  110. package/dist/types/utils/oauth/firepass.d.ts +1 -0
  111. package/dist/types/utils/oauth/fireworks.d.ts +1 -0
  112. package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
  113. package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
  114. package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
  115. package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
  116. package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
  117. package/dist/types/utils/oauth/huggingface.d.ts +19 -0
  118. package/dist/types/utils/oauth/index.d.ts +38 -0
  119. package/dist/types/utils/oauth/kagi.d.ts +17 -0
  120. package/dist/types/utils/oauth/kilo.d.ts +5 -0
  121. package/dist/types/utils/oauth/kimi.d.ts +21 -0
  122. package/dist/types/utils/oauth/litellm.d.ts +18 -0
  123. package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
  124. package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
  125. package/dist/types/utils/oauth/moonshot.d.ts +1 -0
  126. package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
  127. package/dist/types/utils/oauth/nvidia.d.ts +18 -0
  128. package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
  129. package/dist/types/utils/oauth/ollama.d.ts +18 -0
  130. package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
  131. package/dist/types/utils/oauth/opencode.d.ts +18 -0
  132. package/dist/types/utils/oauth/parallel.d.ts +17 -0
  133. package/dist/types/utils/oauth/perplexity.d.ts +9 -0
  134. package/dist/types/utils/oauth/pkce.d.ts +8 -0
  135. package/dist/types/utils/oauth/qianfan.d.ts +17 -0
  136. package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
  137. package/dist/types/utils/oauth/synthetic.d.ts +1 -0
  138. package/dist/types/utils/oauth/tavily.d.ts +17 -0
  139. package/dist/types/utils/oauth/together.d.ts +1 -0
  140. package/dist/types/utils/oauth/types.d.ts +44 -0
  141. package/dist/types/utils/oauth/venice.d.ts +18 -0
  142. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
  143. package/dist/types/utils/oauth/vllm.d.ts +16 -0
  144. package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
  145. package/dist/types/utils/oauth/zai.d.ts +18 -0
  146. package/dist/types/utils/oauth/zenmux.d.ts +1 -0
  147. package/dist/types/utils/overflow.d.ts +54 -0
  148. package/dist/types/utils/parse-bind.d.ts +23 -0
  149. package/dist/types/utils/provider-response.d.ts +3 -0
  150. package/dist/types/utils/retry-after.d.ts +3 -0
  151. package/dist/types/utils/retry.d.ts +26 -0
  152. package/dist/types/utils/schema/adapt.d.ts +24 -0
  153. package/dist/types/utils/schema/compatibility.d.ts +30 -0
  154. package/dist/types/utils/schema/dereference.d.ts +11 -0
  155. package/dist/types/utils/schema/draft.d.ts +10 -0
  156. package/dist/types/utils/schema/equality.d.ts +4 -0
  157. package/dist/types/utils/schema/fields.d.ts +49 -0
  158. package/dist/types/utils/schema/index.d.ts +13 -0
  159. package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
  160. package/dist/types/utils/schema/meta-validator.d.ts +2 -0
  161. package/dist/types/utils/schema/normalize.d.ts +93 -0
  162. package/dist/types/utils/schema/spill.d.ts +8 -0
  163. package/dist/types/utils/schema/stamps.d.ts +25 -0
  164. package/dist/types/utils/schema/types.d.ts +4 -0
  165. package/dist/types/utils/schema/wire.d.ts +54 -0
  166. package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
  167. package/dist/types/utils/sse-debug.d.ts +10 -0
  168. package/dist/types/utils/tool-call-healing.d.ts +71 -0
  169. package/dist/types/utils/tool-choice.d.ts +50 -0
  170. package/dist/types/utils/validation.d.ts +17 -0
  171. package/dist/types/utils.d.ts +28 -0
  172. package/package.json +146 -0
  173. package/src/api-registry.ts +96 -0
  174. package/src/auth-broker/client.ts +358 -0
  175. package/src/auth-broker/index.ts +5 -0
  176. package/src/auth-broker/refresher.ts +127 -0
  177. package/src/auth-broker/remote-store.ts +623 -0
  178. package/src/auth-broker/server.ts +644 -0
  179. package/src/auth-broker/types.ts +127 -0
  180. package/src/auth-broker/wire-schemas.ts +200 -0
  181. package/src/auth-gateway/http.ts +194 -0
  182. package/src/auth-gateway/index.ts +3 -0
  183. package/src/auth-gateway/server.ts +717 -0
  184. package/src/auth-gateway/types.ts +134 -0
  185. package/src/auth-storage.ts +4104 -0
  186. package/src/cli.ts +262 -0
  187. package/src/index.ts +54 -0
  188. package/src/model-cache.ts +129 -0
  189. package/src/model-manager.ts +450 -0
  190. package/src/model-thinking.ts +691 -0
  191. package/src/models.json +73853 -0
  192. package/src/models.json.d.ts +9 -0
  193. package/src/models.ts +56 -0
  194. package/src/prompts/turn-aborted-guidance.md +4 -0
  195. package/src/provider-details.ts +90 -0
  196. package/src/provider-models/bundled-references.ts +38 -0
  197. package/src/provider-models/descriptors.ts +308 -0
  198. package/src/provider-models/google.ts +91 -0
  199. package/src/provider-models/index.ts +5 -0
  200. package/src/provider-models/ollama.ts +153 -0
  201. package/src/provider-models/openai-compat.ts +2275 -0
  202. package/src/provider-models/special.ts +67 -0
  203. package/src/providers/amazon-bedrock.ts +849 -0
  204. package/src/providers/anthropic-messages-server-schema.ts +229 -0
  205. package/src/providers/anthropic-messages-server.ts +677 -0
  206. package/src/providers/anthropic.ts +2696 -0
  207. package/src/providers/aws-credentials.ts +501 -0
  208. package/src/providers/aws-eventstream.ts +185 -0
  209. package/src/providers/aws-sigv4.ts +218 -0
  210. package/src/providers/azure-openai-responses.ts +337 -0
  211. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  212. package/src/providers/cursor/proto/agent.proto +3526 -0
  213. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  214. package/src/providers/cursor/proto/buf.yaml +17 -0
  215. package/src/providers/cursor.ts +2561 -0
  216. package/src/providers/error-message.ts +21 -0
  217. package/src/providers/github-copilot-headers.ts +140 -0
  218. package/src/providers/gitlab-duo.ts +372 -0
  219. package/src/providers/google-auth.ts +252 -0
  220. package/src/providers/google-gemini-cli.ts +795 -0
  221. package/src/providers/google-gemini-headers.ts +41 -0
  222. package/src/providers/google-shared.ts +902 -0
  223. package/src/providers/google-types.ts +167 -0
  224. package/src/providers/google-vertex.ts +88 -0
  225. package/src/providers/google.ts +41 -0
  226. package/src/providers/grammar.ts +70 -0
  227. package/src/providers/kimi.ts +52 -0
  228. package/src/providers/mock.ts +500 -0
  229. package/src/providers/ollama.ts +544 -0
  230. package/src/providers/openai-anthropic-shim.ts +138 -0
  231. package/src/providers/openai-chat-server-schema.ts +243 -0
  232. package/src/providers/openai-chat-server.ts +628 -0
  233. package/src/providers/openai-codex/constants.ts +43 -0
  234. package/src/providers/openai-codex/request-transformer.ts +161 -0
  235. package/src/providers/openai-codex/response-handler.ts +81 -0
  236. package/src/providers/openai-codex-responses.ts +2598 -0
  237. package/src/providers/openai-completions-compat.ts +279 -0
  238. package/src/providers/openai-completions.ts +1853 -0
  239. package/src/providers/openai-responses-server-schema.ts +290 -0
  240. package/src/providers/openai-responses-server.ts +1183 -0
  241. package/src/providers/openai-responses-shared.ts +800 -0
  242. package/src/providers/openai-responses.ts +621 -0
  243. package/src/providers/pi-native-client.ts +228 -0
  244. package/src/providers/pi-native-server.ts +210 -0
  245. package/src/providers/register-builtins.ts +412 -0
  246. package/src/providers/synthetic.ts +50 -0
  247. package/src/providers/transform-messages.ts +309 -0
  248. package/src/providers/vision-guard.ts +31 -0
  249. package/src/rate-limit-utils.ts +84 -0
  250. package/src/stream.ts +895 -0
  251. package/src/types.ts +884 -0
  252. package/src/usage/claude.ts +431 -0
  253. package/src/usage/gemini.ts +250 -0
  254. package/src/usage/github-copilot.ts +421 -0
  255. package/src/usage/google-antigravity.ts +201 -0
  256. package/src/usage/kimi.ts +271 -0
  257. package/src/usage/minimax-code.ts +31 -0
  258. package/src/usage/openai-codex.ts +503 -0
  259. package/src/usage/shared.ts +10 -0
  260. package/src/usage/zai.ts +247 -0
  261. package/src/usage.ts +183 -0
  262. package/src/utils/abort.ts +51 -0
  263. package/src/utils/anthropic-auth.ts +87 -0
  264. package/src/utils/discovery/antigravity.ts +261 -0
  265. package/src/utils/discovery/codex.ts +371 -0
  266. package/src/utils/discovery/cursor.ts +306 -0
  267. package/src/utils/discovery/gemini.ts +248 -0
  268. package/src/utils/discovery/index.ts +4 -0
  269. package/src/utils/discovery/openai-compatible.ts +224 -0
  270. package/src/utils/event-stream.ts +142 -0
  271. package/src/utils/fireworks-model-id.ts +30 -0
  272. package/src/utils/foundry.ts +8 -0
  273. package/src/utils/h2-fetch.ts +60 -0
  274. package/src/utils/http-inspector.ts +176 -0
  275. package/src/utils/idle-iterator.ts +250 -0
  276. package/src/utils/json-parse.ts +148 -0
  277. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  278. package/src/utils/oauth/anthropic.ts +200 -0
  279. package/src/utils/oauth/api-key-login.ts +87 -0
  280. package/src/utils/oauth/api-key-validation.ts +92 -0
  281. package/src/utils/oauth/callback-server.ts +276 -0
  282. package/src/utils/oauth/cerebras.ts +16 -0
  283. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  284. package/src/utils/oauth/cursor.ts +157 -0
  285. package/src/utils/oauth/deepseek.ts +53 -0
  286. package/src/utils/oauth/firepass.ts +24 -0
  287. package/src/utils/oauth/fireworks.ts +15 -0
  288. package/src/utils/oauth/github-copilot.ts +362 -0
  289. package/src/utils/oauth/gitlab-duo.ts +123 -0
  290. package/src/utils/oauth/google-antigravity.ts +200 -0
  291. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  292. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  293. package/src/utils/oauth/huggingface.ts +62 -0
  294. package/src/utils/oauth/index.ts +444 -0
  295. package/src/utils/oauth/kagi.ts +47 -0
  296. package/src/utils/oauth/kilo.ts +87 -0
  297. package/src/utils/oauth/kimi.ts +254 -0
  298. package/src/utils/oauth/litellm.ts +47 -0
  299. package/src/utils/oauth/lm-studio.ts +38 -0
  300. package/src/utils/oauth/minimax-code.ts +78 -0
  301. package/src/utils/oauth/moonshot.ts +16 -0
  302. package/src/utils/oauth/nanogpt.ts +15 -0
  303. package/src/utils/oauth/nvidia.ts +70 -0
  304. package/src/utils/oauth/oauth.html +199 -0
  305. package/src/utils/oauth/ollama-cloud.ts +28 -0
  306. package/src/utils/oauth/ollama.ts +47 -0
  307. package/src/utils/oauth/openai-codex.ts +299 -0
  308. package/src/utils/oauth/opencode.ts +49 -0
  309. package/src/utils/oauth/parallel.ts +46 -0
  310. package/src/utils/oauth/perplexity.ts +206 -0
  311. package/src/utils/oauth/pkce.ts +18 -0
  312. package/src/utils/oauth/qianfan.ts +58 -0
  313. package/src/utils/oauth/qwen-portal.ts +60 -0
  314. package/src/utils/oauth/synthetic.ts +16 -0
  315. package/src/utils/oauth/tavily.ts +46 -0
  316. package/src/utils/oauth/together.ts +16 -0
  317. package/src/utils/oauth/types.ts +94 -0
  318. package/src/utils/oauth/venice.ts +59 -0
  319. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  320. package/src/utils/oauth/vllm.ts +40 -0
  321. package/src/utils/oauth/xiaomi.ts +137 -0
  322. package/src/utils/oauth/zai.ts +60 -0
  323. package/src/utils/oauth/zenmux.ts +15 -0
  324. package/src/utils/overflow.ts +137 -0
  325. package/src/utils/parse-bind.ts +54 -0
  326. package/src/utils/provider-response.ts +30 -0
  327. package/src/utils/retry-after.ts +110 -0
  328. package/src/utils/retry.ts +54 -0
  329. package/src/utils/schema/CONSTRAINTS.md +164 -0
  330. package/src/utils/schema/adapt.ts +36 -0
  331. package/src/utils/schema/compatibility.ts +435 -0
  332. package/src/utils/schema/dereference.ts +98 -0
  333. package/src/utils/schema/draft.ts +341 -0
  334. package/src/utils/schema/equality.ts +97 -0
  335. package/src/utils/schema/fields.ts +190 -0
  336. package/src/utils/schema/index.ts +13 -0
  337. package/src/utils/schema/json-schema-validator.ts +577 -0
  338. package/src/utils/schema/meta-validator.ts +167 -0
  339. package/src/utils/schema/normalize.ts +1588 -0
  340. package/src/utils/schema/spill.ts +43 -0
  341. package/src/utils/schema/stamps.ts +97 -0
  342. package/src/utils/schema/types.ts +11 -0
  343. package/src/utils/schema/wire.ts +213 -0
  344. package/src/utils/schema/zod-decontaminate.ts +331 -0
  345. package/src/utils/sse-debug.ts +289 -0
  346. package/src/utils/tool-call-healing.ts +271 -0
  347. package/src/utils/tool-choice.ts +99 -0
  348. package/src/utils/validation.ts +1019 -0
  349. package/src/utils.ts +166 -0
@@ -0,0 +1,1853 @@
1
+ import { $env, extractHttpStatusFromError } from "@gajae-code/utils";
2
+ import OpenAI from "openai";
3
+ import type {
4
+ ChatCompletionAssistantMessageParam,
5
+ ChatCompletionChunk,
6
+ ChatCompletionContentPart,
7
+ ChatCompletionContentPartImage,
8
+ ChatCompletionContentPartText,
9
+ ChatCompletionMessageParam,
10
+ ChatCompletionToolMessageParam,
11
+ } from "openai/resources/chat/completions";
12
+ import packageJson from "../../package.json" with { type: "json" };
13
+ import { type Effort, getSupportedEfforts } from "../model-thinking";
14
+ import { calculateCost } from "../models";
15
+ import { getEnvApiKey } from "../stream";
16
+ import {
17
+ type AssistantMessage,
18
+ type Context,
19
+ type FetchImpl,
20
+ type Message,
21
+ type MessageAttribution,
22
+ type Model,
23
+ type OpenAICompat,
24
+ type ProviderSessionState,
25
+ resolveServiceTier,
26
+ type ServiceTier,
27
+ type StopReason,
28
+ type StreamFunction,
29
+ type StreamOptions,
30
+ shouldSendServiceTier,
31
+ type TextContent,
32
+ type ThinkingContent,
33
+ type Tool,
34
+ type ToolCall,
35
+ type ToolChoice,
36
+ type ToolResultMessage,
37
+ } from "../types";
38
+ import { normalizeSystemPrompts } from "../utils";
39
+ import { createAbortSourceTracker } from "../utils/abort";
40
+ import { AssistantMessageEventStream } from "../utils/event-stream";
41
+ import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
42
+ import {
43
+ type CapturedHttpErrorResponse,
44
+ finalizeErrorMessage,
45
+ type RawHttpRequestDump,
46
+ rewriteCopilotError,
47
+ } from "../utils/http-inspector";
48
+ import {
49
+ createWatchdog,
50
+ getOpenAIStreamIdleTimeoutMs,
51
+ getStreamFirstEventTimeoutMs,
52
+ iterateWithIdleTimeout,
53
+ } from "../utils/idle-iterator";
54
+ import { parseStreamingJson } from "../utils/json-parse";
55
+ import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
56
+ import { getKimiCommonHeaders } from "../utils/oauth/kimi";
57
+ import { notifyProviderResponse } from "../utils/provider-response";
58
+ import { callWithCopilotModelRetry } from "../utils/retry";
59
+ import { adaptSchemaForStrict, NO_STRICT, toolWireSchema } from "../utils/schema";
60
+ import { wrapFetchForSseDebug } from "../utils/sse-debug";
61
+ import { type HealedToolCall, modelMayLeakKimiToolCalls, ToolCallHealer } from "../utils/tool-call-healing";
62
+ import { isForcedToolChoice, mapToOpenAICompletionsToolChoice } from "../utils/tool-choice";
63
+ import {
64
+ buildCopilotDynamicHeaders,
65
+ hasCopilotVisionInput,
66
+ resolveGitHubCopilotBaseUrl,
67
+ } from "./github-copilot-headers";
68
+ import { detectOpenAICompat, type ResolvedOpenAICompat, resolveOpenAICompat } from "./openai-completions-compat";
69
+ import { createInitialResponsesAssistantMessage } from "./openai-responses-shared";
70
+ import { transformMessages } from "./transform-messages";
71
+ import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER } from "./vision-guard";
72
+
73
+ /**
74
+ * Normalize tool call ID for Mistral.
75
+ * Mistral requires tool IDs to be exactly 9 alphanumeric characters (a-z, A-Z, 0-9).
76
+ */
77
+ function normalizeMistralToolId(id: string, isMistral: boolean): string {
78
+ if (!isMistral) return id;
79
+ // Remove non-alphanumeric characters
80
+ let normalized = id.replace(/[^a-zA-Z0-9]/g, "");
81
+ // Mistral requires exactly 9 characters
82
+ if (normalized.length < 9) {
83
+ // Pad with deterministic characters based on original ID to ensure matching
84
+ const padding = "ABCDEFGHI";
85
+ normalized = normalized + padding.slice(0, 9 - normalized.length);
86
+ } else if (normalized.length > 9) {
87
+ normalized = normalized.slice(0, 9);
88
+ }
89
+ return normalized;
90
+ }
91
+
92
+ /**
93
+ * Normalize OpenAI-compatible streaming `delta.content` into plain text.
94
+ *
95
+ * Most providers stream `delta.content` as a string, but some (notably Mistral
96
+ * Medium 3.5 / `mistral-medium-2604`) return an array of typed content parts
97
+ * — e.g. `[{ type: "text", text: "Hello" }]`. Without normalization those
98
+ * parts get string-coerced via `text += array`, producing the literal
99
+ * `[object Object]` sequences observed in issue #911.
100
+ *
101
+ * Returns the joined text. Non-text parts and unknown shapes are skipped so
102
+ * we never emit JS object sigils as visible output.
103
+ */
104
+ function normalizeStreamingContentText(content: unknown): string {
105
+ if (typeof content === "string") return content;
106
+ if (Array.isArray(content)) {
107
+ let out = "";
108
+ for (const part of content) {
109
+ if (typeof part === "string") {
110
+ out += part;
111
+ } else if (part && typeof part === "object") {
112
+ const obj = part as { type?: unknown; text?: unknown };
113
+ if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
114
+ out += obj.text;
115
+ }
116
+ }
117
+ }
118
+ return out;
119
+ }
120
+ if (content && typeof content === "object") {
121
+ const obj = content as { type?: unknown; text?: unknown };
122
+ if ((obj.type === undefined || obj.type === "text") && typeof obj.text === "string") {
123
+ return obj.text;
124
+ }
125
+ }
126
+ return "";
127
+ }
128
+
129
+ function serializeToolArguments(value: unknown): string {
130
+ if (value && typeof value === "object" && !Array.isArray(value)) {
131
+ try {
132
+ return JSON.stringify(value);
133
+ } catch {
134
+ return "{}";
135
+ }
136
+ }
137
+
138
+ if (typeof value === "string") {
139
+ const trimmed = value.trim();
140
+ if (trimmed.length === 0) return "{}";
141
+ try {
142
+ const parsed = JSON.parse(trimmed);
143
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
144
+ return JSON.stringify(parsed);
145
+ }
146
+ } catch {}
147
+ return "{}";
148
+ }
149
+
150
+ return "{}";
151
+ }
152
+
153
+ /**
154
+ * Check if conversation messages contain tool calls or tool results.
155
+ * This is needed because Anthropic (via proxy) requires the tools param
156
+ * to be present when messages include tool_calls or tool role messages.
157
+ */
158
+ function hasToolHistory(messages: Message[]): boolean {
159
+ for (const msg of messages) {
160
+ if (msg.role === "toolResult") {
161
+ return true;
162
+ }
163
+ if (msg.role === "assistant") {
164
+ if (msg.content.some(block => block.type === "toolCall")) {
165
+ return true;
166
+ }
167
+ }
168
+ }
169
+ return false;
170
+ }
171
+
172
+ /**
173
+ * Identify "real progress" stream chunks vs. keepalives, role-only preambles,
174
+ * and empty `{choices:[]}` no-ops emitted by some OpenAI-compatible endpoints.
175
+ * Without this filter, every keepalive resets `iterateWithIdleTimeout`'s
176
+ * deadline, so a provider that streams nothing but pings keeps the watchdog
177
+ * asleep indefinitely — observed against z.ai/GLM via OpenRouter where a
178
+ * subagent stalled for hours with no error surfaced.
179
+ *
180
+ * A chunk counts as progress when it carries terminal usage, a finish reason,
181
+ * or any model-produced delta (content / tool calls / reasoning / refusal).
182
+ * Role-only `delta: { role: "assistant" }` preambles do NOT count; we want the
183
+ * (longer) first-event timeout to keep governing until real output appears.
184
+ */
185
+ export function isOpenAICompletionsProgressChunk(chunk: unknown): boolean {
186
+ if (!chunk || typeof chunk !== "object") return false;
187
+ const record = chunk as {
188
+ usage?: unknown;
189
+ choices?: ReadonlyArray<{
190
+ finish_reason?: unknown;
191
+ usage?: unknown;
192
+ delta?: {
193
+ content?: unknown;
194
+ tool_calls?: unknown;
195
+ reasoning?: unknown;
196
+ reasoning_content?: unknown;
197
+ reasoning_text?: unknown;
198
+ refusal?: unknown;
199
+ };
200
+ }>;
201
+ };
202
+ if (record.usage) return true;
203
+ const choice = Array.isArray(record.choices) ? record.choices[0] : undefined;
204
+ if (!choice) return false;
205
+ if (choice.finish_reason) return true;
206
+ if (choice.usage) return true;
207
+ const delta = choice.delta;
208
+ if (!delta) return false;
209
+ const content = delta.content;
210
+ if (typeof content === "string" ? content.length > 0 : Array.isArray(content) && content.length > 0) return true;
211
+ if (Array.isArray(delta.tool_calls) && delta.tool_calls.length > 0) return true;
212
+ if (typeof delta.reasoning === "string" && delta.reasoning.length > 0) return true;
213
+ if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) return true;
214
+ if (typeof delta.reasoning_text === "string" && delta.reasoning_text.length > 0) return true;
215
+ if (typeof delta.refusal === "string" && delta.refusal.length > 0) return true;
216
+ return false;
217
+ }
218
+
219
+ export interface OpenAICompletionsOptions extends StreamOptions {
220
+ toolChoice?: ToolChoice;
221
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
222
+ /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
223
+ disableReasoning?: boolean;
224
+ serviceTier?: ServiceTier;
225
+ }
226
+
227
+ type OpenAICompletionsParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
228
+ top_k?: number;
229
+ min_p?: number;
230
+ repetition_penalty?: number;
231
+ thinking?: { type: "enabled" | "disabled" };
232
+ enable_thinking?: boolean;
233
+ chat_template_kwargs?: { enable_thinking: boolean };
234
+ reasoning?: { effort?: string } | { enabled: false };
235
+ provider?: OpenAICompat["openRouterRouting"];
236
+ providerOptions?: { gateway?: { only?: string[]; order?: string[] } };
237
+ };
238
+
239
+ type AppliedToolStrictMode = "mixed" | "all_strict" | "none";
240
+ type ToolStrictModeOverride = Exclude<ResolvedOpenAICompat["toolStrictMode"], "mixed"> | undefined;
241
+
242
+ type BuiltOpenAICompletionTools = {
243
+ tools: OpenAI.Chat.Completions.ChatCompletionTool[];
244
+ toolStrictMode: AppliedToolStrictMode;
245
+ };
246
+
247
+ const OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX = "openai-completions:";
248
+
249
+ type OpenAICompletionsProviderSessionState = ProviderSessionState & {
250
+ strictToolsDisabled: boolean;
251
+ };
252
+
253
+ function createOpenAICompletionsProviderSessionState(): OpenAICompletionsProviderSessionState {
254
+ const state: OpenAICompletionsProviderSessionState = {
255
+ strictToolsDisabled: false,
256
+ close: () => {
257
+ state.strictToolsDisabled = false;
258
+ },
259
+ };
260
+ return state;
261
+ }
262
+
263
+ function getOpenAICompletionsProviderSessionState(
264
+ model: Model<"openai-completions">,
265
+ baseUrl: string | undefined,
266
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
267
+ ): OpenAICompletionsProviderSessionState | undefined {
268
+ if (!providerSessionState) return undefined;
269
+ const key = `${OPENAI_COMPLETIONS_PROVIDER_SESSION_STATE_PREFIX}${model.provider}:${baseUrl ?? ""}:${model.id}`;
270
+ const existing = providerSessionState.get(key) as OpenAICompletionsProviderSessionState | undefined;
271
+ if (existing) return existing;
272
+ const created = createOpenAICompletionsProviderSessionState();
273
+ providerSessionState.set(key, created);
274
+ return created;
275
+ }
276
+
277
+ function isOpenRouterAnthropicModel(model: Model<"openai-completions">): boolean {
278
+ return model.provider === "openrouter" && model.id.toLowerCase().startsWith("anthropic/");
279
+ }
280
+
281
+ function isCompiledGrammarTooLargeStrictError(
282
+ error: unknown,
283
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
284
+ ): boolean {
285
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
286
+ if (status !== 400) return false;
287
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
288
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
289
+ .join("\n");
290
+ return (
291
+ /invalid_request_error/i.test(messageParts) &&
292
+ /compiled grammar/i.test(messageParts) &&
293
+ /too large/i.test(messageParts)
294
+ );
295
+ }
296
+
297
+ // LIMITATION: The think tag parser uses naive string matching for <think>/<thinking> tags.
298
+ // If MiniMax models output these literal strings in code blocks, XML examples, or explanations,
299
+ // they will be incorrectly consumed as thinking delimiters, truncating visible output.
300
+ // A streaming parser with arbitrary chunk boundaries cannot reliably detect code block context.
301
+ // This is acceptable because: (1) only enabled for minimax-code providers, (2) MiniMax models
302
+ // use these tags as their actual thinking format, and (3) false positives are rare in practice.
303
+ const MINIMAX_THINK_OPEN_TAGS = ["<think>", "<thinking>"] as const;
304
+ const MINIMAX_THINK_CLOSE_TAGS = ["</think>", "</thinking>"] as const;
305
+
306
+ function findFirstTag(text: string, tags: readonly string[]): { index: number; tag: string } | undefined {
307
+ let earliestIndex = Number.POSITIVE_INFINITY;
308
+ let earliestTag: string | undefined;
309
+ for (const tag of tags) {
310
+ const index = text.indexOf(tag);
311
+ if (index !== -1 && index < earliestIndex) {
312
+ earliestIndex = index;
313
+ earliestTag = tag;
314
+ }
315
+ }
316
+ if (!earliestTag) return undefined;
317
+ return { index: earliestIndex, tag: earliestTag };
318
+ }
319
+
320
+ function getTrailingPartialTag(text: string, tags: readonly string[]): string {
321
+ let maxLength = 0;
322
+ for (const tag of tags) {
323
+ const maxCandidateLength = Math.min(tag.length - 1, text.length);
324
+ for (let length = maxCandidateLength; length > 0; length--) {
325
+ if (text.endsWith(tag.slice(0, length))) {
326
+ if (length > maxLength) maxLength = length;
327
+ break;
328
+ }
329
+ }
330
+ }
331
+ if (maxLength === 0) return "";
332
+ return text.slice(-maxLength);
333
+ }
334
+
335
+ // DeepSeek models leak chat-template special tokens (e.g. `<|tool_calls_begin|>`,
336
+ // `<|DSML|tool_calls|>`) into visible `content` deltas when hosted behind providers
337
+ // (such as NVIDIA NIM) that don't strip them server-side. The structured `tool_calls`
338
+ // payload is still emitted correctly — we only need to filter the leaked markers from
339
+ // user-visible text. Tokens use either fullwidth pipes (|, U+FF5C) or ASCII pipes.
340
+ // Body is restricted to identifier-like chars (with the DeepSeek tokenizer's `▁`),
341
+ // capped at a sane length to avoid swallowing legitimate angle-bracket text.
342
+ const DEEPSEEK_SPECIAL_TOKEN_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/g;
343
+ const DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX = /^\s*<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>/;
344
+ const DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX = /<(?:||\|)[A-Za-z0-9_.||▁]{1,64}(?:||\|)>\s*$/;
345
+ const DEEPSEEK_OPEN_DELIMS = ["<|", "<|"] as const;
346
+
347
+ function stripDeepseekSpecialTokens(text: string): string {
348
+ const stripped = text.replace(DEEPSEEK_SPECIAL_TOKEN_REGEX, "");
349
+ if (stripped === text) return text;
350
+
351
+ let normalized = stripped;
352
+ if (DEEPSEEK_SPECIAL_TOKEN_AT_START_REGEX.test(text)) normalized = normalized.replace(/^\s+/u, "");
353
+ if (DEEPSEEK_SPECIAL_TOKEN_AT_END_REGEX.test(text)) normalized = normalized.replace(/\s+$/u, "");
354
+ return normalized;
355
+ }
356
+
357
+ // Find any trailing partial `<|...` (or `<|...`) that has not yet been closed by a
358
+ // matching `|>`/`|>`, so it can be held back until the next chunk arrives. A solo
359
+ // trailing `<` is also held in case it is the start of a new token.
360
+ function getTrailingPartialDeepseekToken(text: string): string {
361
+ let bestIdx = -1;
362
+ for (const delim of DEEPSEEK_OPEN_DELIMS) {
363
+ const idx = text.lastIndexOf(delim);
364
+ if (idx > bestIdx) bestIdx = idx;
365
+ }
366
+ if (bestIdx === -1) {
367
+ return text.endsWith("<") ? "<" : "";
368
+ }
369
+ const tail = text.slice(bestIdx);
370
+ if (tail.includes("|>") || tail.includes("|>")) return "";
371
+ // Cap the held-back length so a stray `<|` in normal prose can't grow unboundedly.
372
+ if (tail.length > 256) return "";
373
+ return tail;
374
+ }
375
+
376
+ const OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE =
377
+ "OpenAI completions stream timed out while waiting for the first event";
378
+
379
+ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
380
+ model: Model<"openai-completions">,
381
+ context: Context,
382
+ options?: OpenAICompletionsOptions,
383
+ ): AssistantMessageEventStream => {
384
+ const stream = new AssistantMessageEventStream();
385
+
386
+ (async () => {
387
+ const startTime = Date.now();
388
+ let firstTokenTime: number | undefined;
389
+ let getCapturedErrorResponse: (() => CapturedHttpErrorResponse | undefined) | undefined;
390
+
391
+ const output: AssistantMessage = createInitialResponsesAssistantMessage(model.api, model.provider, model.id);
392
+ let rawRequestDump: RawHttpRequestDump | undefined;
393
+ const abortTracker = createAbortSourceTracker(options?.signal);
394
+ const firstEventTimeoutAbortError = new Error(OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE);
395
+ const { requestAbortController, requestSignal } = abortTracker;
396
+
397
+ try {
398
+ const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
399
+ const idleTimeoutMs = getOpenAIStreamIdleTimeoutMs();
400
+ const {
401
+ client,
402
+ copilotPremiumRequests,
403
+ baseUrl,
404
+ requestHeaders,
405
+ getCapturedErrorResponse: captureErrorResponse,
406
+ clearCapturedErrorResponse,
407
+ } = await createClient(
408
+ model,
409
+ context,
410
+ apiKey,
411
+ options?.headers,
412
+ options?.initiatorOverride,
413
+ options?.onSseEvent,
414
+ options?.fetch,
415
+ options?.streamFirstEventTimeoutMs,
416
+ );
417
+ const premiumRequestsTotal = copilotPremiumRequests;
418
+ getCapturedErrorResponse = captureErrorResponse;
419
+ let appliedToolStrictMode: AppliedToolStrictMode = "mixed";
420
+ const providerSessionState = getOpenAICompletionsProviderSessionState(
421
+ model,
422
+ baseUrl,
423
+ options?.providerSessionState,
424
+ );
425
+ let disableStrictTools = providerSessionState?.strictToolsDisabled ?? false;
426
+ let strictFallbackErrorMessage: string | undefined;
427
+ const createCompletionsStream = async (toolStrictModeOverride?: ToolStrictModeOverride) => {
428
+ clearCapturedErrorResponse();
429
+ const effectiveToolStrictModeOverride = disableStrictTools ? "none" : toolStrictModeOverride;
430
+ const { params, toolStrictMode } = buildParams(
431
+ model,
432
+ context,
433
+ options,
434
+ baseUrl,
435
+ effectiveToolStrictModeOverride,
436
+ );
437
+ appliedToolStrictMode = toolStrictMode;
438
+ options?.onPayload?.(params);
439
+ rawRequestDump = {
440
+ provider: model.provider,
441
+ api: output.api,
442
+ model: model.id,
443
+ method: "POST",
444
+ url: `${baseUrl}/chat/completions`,
445
+ headers: requestHeaders,
446
+ body: params,
447
+ };
448
+ const { data, response, request_id } = await client.chat.completions
449
+ .create(params, { signal: requestSignal })
450
+ .withResponse();
451
+ await notifyProviderResponse(options, response, model, request_id);
452
+ return data;
453
+ };
454
+ let openaiStream: AsyncIterable<ChatCompletionChunk>;
455
+ try {
456
+ openaiStream = await callWithCopilotModelRetry(() => createCompletionsStream(), {
457
+ provider: model.provider,
458
+ signal: requestSignal,
459
+ });
460
+ } catch (error) {
461
+ const capturedErrorResponse = getCapturedErrorResponse();
462
+ if (
463
+ isOpenRouterAnthropicModel(model) &&
464
+ !disableStrictTools &&
465
+ isCompiledGrammarTooLargeStrictError(error, capturedErrorResponse)
466
+ ) {
467
+ strictFallbackErrorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
468
+ output.errorMessage = strictFallbackErrorMessage;
469
+ if (providerSessionState) {
470
+ providerSessionState.strictToolsDisabled = true;
471
+ }
472
+ disableStrictTools = true;
473
+ openaiStream = await createCompletionsStream("none");
474
+ } else {
475
+ if (!shouldRetryWithoutStrictTools(error, capturedErrorResponse, appliedToolStrictMode, context.tools)) {
476
+ throw error;
477
+ }
478
+ openaiStream = await createCompletionsStream("none");
479
+ }
480
+ }
481
+ const firstEventWatchdog = createWatchdog(
482
+ options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs),
483
+ () => abortTracker.abortLocally(firstEventTimeoutAbortError),
484
+ );
485
+ if (premiumRequestsTotal !== undefined) {
486
+ output.usage.premiumRequests = premiumRequestsTotal;
487
+ }
488
+ stream.push({ type: "start", partial: output });
489
+
490
+ const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
491
+ // Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
492
+ // native API) leak chat-template tool-call markers in `delta.content` even
493
+ // though tool calls are also surfaced structurally. Strip the leaked markers
494
+ // so users don't see raw `<|...|>` tokens.
495
+ const stripDeepseekChatTemplateTokens =
496
+ /deepseek/i.test(model.id) && (model.provider === "nvidia" || model.provider === "deepseek");
497
+ type OpenAIStreamBlock = TextContent | ThinkingContent | (ToolCall & { partialArgs: string });
498
+ let currentBlock: OpenAIStreamBlock | undefined;
499
+ const blockIndex = (block: OpenAIStreamBlock | undefined): number => {
500
+ if (!block) return Math.max(0, output.content.length - 1);
501
+ return output.content.indexOf(block);
502
+ };
503
+ const finishCurrentBlock = (block: OpenAIStreamBlock | undefined): void => {
504
+ if (!block) return;
505
+ const contentIndex = blockIndex(block);
506
+ if (contentIndex < 0) return;
507
+ if (block.type === "text") {
508
+ stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
509
+ return;
510
+ }
511
+ if (block.type === "thinking") {
512
+ stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
513
+ return;
514
+ }
515
+ block.arguments = parseStreamingJson(block.partialArgs);
516
+ delete (block as { partialArgs?: string }).partialArgs;
517
+ stream.push({ type: "toolcall_end", contentIndex, toolCall: block, partial: output });
518
+ };
519
+ const appendText = (
520
+ message: AssistantMessage,
521
+ eventStream: AssistantMessageEventStream,
522
+ text: string,
523
+ ): void => {
524
+ if (!currentBlock || currentBlock.type !== "text") {
525
+ finishCurrentBlock(currentBlock);
526
+ currentBlock = { type: "text", text: "" };
527
+ message.content.push(currentBlock);
528
+ eventStream.push({ type: "text_start", contentIndex: blockIndex(currentBlock), partial: message });
529
+ }
530
+ currentBlock.text += text;
531
+ eventStream.push({
532
+ type: "text_delta",
533
+ contentIndex: blockIndex(currentBlock),
534
+ delta: text,
535
+ partial: message,
536
+ });
537
+ };
538
+ const appendThinking = (
539
+ message: AssistantMessage,
540
+ eventStream: AssistantMessageEventStream,
541
+ thinking: string,
542
+ signature?: string,
543
+ ): void => {
544
+ if (
545
+ !currentBlock ||
546
+ currentBlock.type !== "thinking" ||
547
+ (signature !== undefined && currentBlock.thinkingSignature !== signature)
548
+ ) {
549
+ finishCurrentBlock(currentBlock);
550
+ currentBlock = { type: "thinking", thinking: "", thinkingSignature: signature };
551
+ message.content.push(currentBlock);
552
+ eventStream.push({
553
+ type: "thinking_start",
554
+ contentIndex: blockIndex(currentBlock),
555
+ partial: message,
556
+ });
557
+ }
558
+ if (signature !== undefined && !currentBlock.thinkingSignature) {
559
+ currentBlock.thinkingSignature = signature;
560
+ }
561
+ currentBlock.thinking += thinking;
562
+ eventStream.push({
563
+ type: "thinking_delta",
564
+ contentIndex: blockIndex(currentBlock),
565
+ delta: thinking,
566
+ partial: message,
567
+ });
568
+ };
569
+
570
+ let taggedTextBuffer = "";
571
+ let insideTaggedThinking = false;
572
+ const appendTextDelta = (text: string) => {
573
+ if (!text) return;
574
+ if (!firstTokenTime) firstTokenTime = Date.now();
575
+ appendText(output, stream, text);
576
+ };
577
+ const appendThinkingDelta = (thinking: string, signature?: string) => {
578
+ if (!thinking) return;
579
+ if (!firstTokenTime) firstTokenTime = Date.now();
580
+ appendThinking(output, stream, thinking, signature);
581
+ };
582
+
583
+ const flushTaggedTextBuffer = () => {
584
+ while (taggedTextBuffer.length > 0) {
585
+ if (insideTaggedThinking) {
586
+ const closingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
587
+ if (closingTag) {
588
+ appendThinkingDelta(taggedTextBuffer.slice(0, closingTag.index));
589
+ taggedTextBuffer = taggedTextBuffer.slice(closingTag.index + closingTag.tag.length);
590
+ insideTaggedThinking = false;
591
+ continue;
592
+ }
593
+
594
+ const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_CLOSE_TAGS);
595
+ const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
596
+ appendThinkingDelta(taggedTextBuffer.slice(0, flushLength));
597
+ taggedTextBuffer = trailingPartialTag;
598
+ break;
599
+ }
600
+
601
+ const openingTag = findFirstTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
602
+ if (openingTag) {
603
+ appendTextDelta(taggedTextBuffer.slice(0, openingTag.index));
604
+ taggedTextBuffer = taggedTextBuffer.slice(openingTag.index + openingTag.tag.length);
605
+ insideTaggedThinking = true;
606
+ continue;
607
+ }
608
+
609
+ const trailingPartialTag = getTrailingPartialTag(taggedTextBuffer, MINIMAX_THINK_OPEN_TAGS);
610
+ const flushLength = taggedTextBuffer.length - trailingPartialTag.length;
611
+ appendTextDelta(taggedTextBuffer.slice(0, flushLength));
612
+ taggedTextBuffer = trailingPartialTag;
613
+ break;
614
+ }
615
+ };
616
+
617
+ let deepseekStripBuffer = "";
618
+ const flushDeepseekStripBuffer = (final: boolean): void => {
619
+ if (deepseekStripBuffer.length === 0) return;
620
+ let flushable: string;
621
+ if (final) {
622
+ flushable = deepseekStripBuffer;
623
+ deepseekStripBuffer = "";
624
+ } else {
625
+ const trailing = getTrailingPartialDeepseekToken(deepseekStripBuffer);
626
+ flushable = deepseekStripBuffer.slice(0, deepseekStripBuffer.length - trailing.length);
627
+ deepseekStripBuffer = trailing;
628
+ }
629
+ const stripped = stripDeepseekSpecialTokens(flushable);
630
+ if (stripped && (stripped === flushable || stripped.trim().length > 0)) appendTextDelta(stripped);
631
+ };
632
+
633
+ const kimiHealer = modelMayLeakKimiToolCalls(model.provider, model.id) ? new ToolCallHealer() : undefined;
634
+ let healedToolCallEmitted = false;
635
+ const emitHealedToolCall = (call: HealedToolCall): void => {
636
+ finishCurrentBlock(currentBlock);
637
+ const block: ToolCall & { partialArgs: string } = {
638
+ type: "toolCall",
639
+ id: call.id,
640
+ name: call.name,
641
+ arguments: {},
642
+ partialArgs: call.arguments,
643
+ };
644
+ block.arguments = parseStreamingJson(call.arguments);
645
+ currentBlock = block;
646
+ output.content.push(block);
647
+ stream.push({ type: "toolcall_start", contentIndex: blockIndex(block), partial: output });
648
+ stream.push({
649
+ type: "toolcall_delta",
650
+ contentIndex: blockIndex(block),
651
+ delta: call.arguments,
652
+ partial: output,
653
+ });
654
+ finishCurrentBlock(block);
655
+ currentBlock = undefined;
656
+ healedToolCallEmitted = true;
657
+ };
658
+ const flushHealedToolCalls = (): void => {
659
+ if (!kimiHealer) return;
660
+ const calls = kimiHealer.drainCompleted();
661
+ for (const call of calls) emitHealedToolCall(call);
662
+ };
663
+
664
+ for await (const chunk of iterateWithIdleTimeout(openaiStream, {
665
+ watchdog: firstEventWatchdog,
666
+ idleTimeoutMs,
667
+ errorMessage: "OpenAI completions stream stalled while waiting for the next event",
668
+ onIdle: () => requestAbortController.abort(),
669
+ abortSignal: options?.signal,
670
+ isProgressItem: isOpenAICompletionsProgressChunk,
671
+ })) {
672
+ if (!chunk || typeof chunk !== "object") continue;
673
+
674
+ // OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
675
+ // and each chunk in a streamed completion carries the same id.
676
+ output.responseId ||= chunk.id;
677
+
678
+ if (chunk.usage) {
679
+ output.usage = parseChunkUsage(chunk.usage, model, premiumRequestsTotal);
680
+ }
681
+
682
+ const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
683
+ if (!choice) continue;
684
+
685
+ if (!chunk.usage) {
686
+ const choiceUsage = getChoiceUsage(choice);
687
+ if (choiceUsage) {
688
+ output.usage = parseChunkUsage(choiceUsage, model, premiumRequestsTotal);
689
+ }
690
+ }
691
+
692
+ if (choice.finish_reason) {
693
+ const finishReasonResult = mapStopReason(choice.finish_reason);
694
+ output.stopReason = finishReasonResult.stopReason;
695
+ if (finishReasonResult.errorMessage) {
696
+ output.errorMessage = finishReasonResult.errorMessage;
697
+ }
698
+ }
699
+
700
+ if (choice.delta) {
701
+ const normalizedDeltaText = normalizeStreamingContentText(choice.delta.content);
702
+ if (normalizedDeltaText.length > 0) {
703
+ if (!firstTokenTime) firstTokenTime = Date.now();
704
+ if (parseMiniMaxThinkTags) {
705
+ taggedTextBuffer += normalizedDeltaText;
706
+ flushTaggedTextBuffer();
707
+ } else if (stripDeepseekChatTemplateTokens) {
708
+ deepseekStripBuffer += normalizedDeltaText;
709
+ flushDeepseekStripBuffer(false);
710
+ } else if (kimiHealer) {
711
+ const hasStructuredToolCalls =
712
+ Array.isArray(choice.delta.tool_calls) && choice.delta.tool_calls.length > 0;
713
+ if (hasStructuredToolCalls) {
714
+ // Same chunk leaks markers AND carries structured tool_calls.
715
+ // Strip the marker text from visible output, but drop any
716
+ // synthesized calls so the structured payload stays the
717
+ // single source of truth (avoids double-dispatch).
718
+ const clean = kimiHealer.consumeWithoutCalls(normalizedDeltaText);
719
+ if (clean.length > 0) appendTextDelta(clean);
720
+ } else {
721
+ const clean = kimiHealer.feed(normalizedDeltaText);
722
+ if (clean.length > 0) appendTextDelta(clean);
723
+ flushHealedToolCalls();
724
+ }
725
+ } else {
726
+ appendTextDelta(normalizedDeltaText);
727
+ }
728
+ }
729
+
730
+ // Some endpoints return reasoning in reasoning_content (llama.cpp),
731
+ // or reasoning (other openai compatible endpoints)
732
+ // Use the first non-empty reasoning field to avoid duplication
733
+ // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
734
+ const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
735
+ let foundReasoningField: string | null = null;
736
+ for (const field of reasoningFields) {
737
+ if (
738
+ (choice.delta as any)[field] !== null &&
739
+ (choice.delta as any)[field] !== undefined &&
740
+ (choice.delta as any)[field].length > 0
741
+ ) {
742
+ if (!foundReasoningField) {
743
+ foundReasoningField = field;
744
+ break;
745
+ }
746
+ }
747
+ }
748
+
749
+ if (foundReasoningField) {
750
+ const delta = (choice.delta as any)[foundReasoningField];
751
+ appendThinkingDelta(delta, foundReasoningField);
752
+ }
753
+
754
+ if (choice?.delta?.tool_calls && choice.delta.tool_calls.length > 0) {
755
+ for (const toolCall of choice.delta.tool_calls) {
756
+ if (
757
+ !currentBlock ||
758
+ currentBlock.type !== "toolCall" ||
759
+ (toolCall.id && currentBlock.id !== toolCall.id)
760
+ ) {
761
+ finishCurrentBlock(currentBlock);
762
+ currentBlock = {
763
+ type: "toolCall",
764
+ id: toolCall.id || "",
765
+ name: toolCall.function?.name || "",
766
+ arguments: {},
767
+ partialArgs: "",
768
+ };
769
+ output.content.push(currentBlock);
770
+ stream.push({
771
+ type: "toolcall_start",
772
+ contentIndex: blockIndex(currentBlock),
773
+ partial: output,
774
+ });
775
+ }
776
+
777
+ if (currentBlock.type === "toolCall") {
778
+ if (toolCall.id) currentBlock.id = toolCall.id;
779
+ if (toolCall.function?.name) currentBlock.name = toolCall.function.name;
780
+ let delta = "";
781
+ if (toolCall.function?.arguments) {
782
+ delta = toolCall.function.arguments;
783
+ currentBlock.partialArgs += toolCall.function.arguments;
784
+ currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
785
+ }
786
+ stream.push({
787
+ type: "toolcall_delta",
788
+ contentIndex: blockIndex(currentBlock),
789
+ delta,
790
+ partial: output,
791
+ });
792
+ }
793
+ }
794
+ }
795
+
796
+ const reasoningDetails = (choice.delta as any).reasoning_details;
797
+ if (reasoningDetails && Array.isArray(reasoningDetails)) {
798
+ for (const detail of reasoningDetails) {
799
+ if (detail.type === "reasoning.encrypted" && detail.id && detail.data) {
800
+ const matchingToolCall = output.content.find(
801
+ b => b.type === "toolCall" && b.id === detail.id,
802
+ ) as ToolCall | undefined;
803
+ if (matchingToolCall) {
804
+ matchingToolCall.thoughtSignature = JSON.stringify(detail);
805
+ }
806
+ }
807
+ }
808
+ }
809
+ }
810
+ }
811
+
812
+ if (parseMiniMaxThinkTags && taggedTextBuffer.length > 0) {
813
+ if (insideTaggedThinking) {
814
+ appendThinkingDelta(taggedTextBuffer);
815
+ } else {
816
+ appendTextDelta(taggedTextBuffer);
817
+ }
818
+ taggedTextBuffer = "";
819
+ }
820
+
821
+ if (stripDeepseekChatTemplateTokens) {
822
+ flushDeepseekStripBuffer(true);
823
+ }
824
+
825
+ if (kimiHealer) {
826
+ const trailing = kimiHealer.flushPending();
827
+ if (trailing.length > 0) appendTextDelta(trailing);
828
+ flushHealedToolCalls();
829
+ if (healedToolCallEmitted && output.stopReason === "stop") {
830
+ // Hosts that leak Kimi tool tokens often still report
831
+ // `finish_reason: stop` for the surrounding turn. Promote
832
+ // only that natural-completion finish — leave `error`,
833
+ // `length`, `aborted`, etc. untouched.
834
+ output.stopReason = "toolUse";
835
+ }
836
+ }
837
+
838
+ finishCurrentBlock(currentBlock);
839
+
840
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
841
+ if (firstEventTimeoutError) {
842
+ throw firstEventTimeoutError;
843
+ }
844
+ if (abortTracker.wasCallerAbort()) {
845
+ throw new Error("Request was aborted");
846
+ }
847
+
848
+ if (output.stopReason === "aborted") {
849
+ throw new Error("Request was aborted");
850
+ }
851
+ if (output.stopReason === "error") {
852
+ throw new Error(output.errorMessage || "Provider returned an error stop reason");
853
+ }
854
+
855
+ output.errorMessage = strictFallbackErrorMessage;
856
+ output.duration = Date.now() - startTime;
857
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
858
+ stream.push({ type: "done", reason: output.stopReason, message: output });
859
+ stream.end();
860
+ } catch (error) {
861
+ for (const block of output.content) delete (block as any).index;
862
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
863
+ output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
864
+ output.errorStatus = extractHttpStatusFromError(error) ?? getCapturedErrorResponse?.()?.status;
865
+ output.errorMessage =
866
+ firstEventTimeoutError?.message ??
867
+ (await finalizeErrorMessage(error, rawRequestDump, getCapturedErrorResponse?.()));
868
+ // Some providers via OpenRouter include extra details here.
869
+ const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
870
+ if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
871
+ output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
872
+ output.duration = Date.now() - startTime;
873
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
874
+ stream.push({ type: "error", reason: output.stopReason, error: output });
875
+ stream.end();
876
+ }
877
+ })();
878
+
879
+ return stream;
880
+ };
881
+
882
+ async function createClient(
883
+ model: Model<"openai-completions">,
884
+ context: Context,
885
+ apiKey?: string,
886
+ extraHeaders?: Record<string, string>,
887
+ initiatorOverride?: MessageAttribution,
888
+ onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
889
+ fetchOverride?: FetchImpl,
890
+ streamFirstEventTimeoutOverride?: number,
891
+ ): Promise<{
892
+ client: OpenAI;
893
+ copilotPremiumRequests: number | undefined;
894
+ baseUrl: string | undefined;
895
+ requestHeaders: Record<string, string>;
896
+ getCapturedErrorResponse: () => CapturedHttpErrorResponse | undefined;
897
+ clearCapturedErrorResponse: () => void;
898
+ }> {
899
+ if (!apiKey) {
900
+ if (!$env.OPENAI_API_KEY) {
901
+ throw new Error(
902
+ "OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
903
+ );
904
+ }
905
+ apiKey = $env.OPENAI_API_KEY;
906
+ }
907
+ const rawApiKey = apiKey;
908
+
909
+ let headers = { ...model.headers };
910
+ if (model.provider === "openrouter") {
911
+ // App attribution — opts the agent into OpenRouter's public rankings and per-app
912
+ // analytics. `HTTP-Referer` is the unique app identifier; without it nothing is
913
+ // tracked. `X-OpenRouter-Title` is the display name (`X-Title` is the legacy
914
+ // alias kept for back-compat). `X-OpenRouter-Categories` slots us into the
915
+ // `cli-agent` marketplace category. `User-Agent` overrides the default OpenAI
916
+ // SDK UA so traffic is identifiable in upstream provider logs.
917
+ // https://openrouter.ai/docs/app-attribution
918
+ headers["User-Agent"] = `Gajae-Code/${packageJson.version}`;
919
+ headers["HTTP-Referer"] = "https://gajae-code.dev/";
920
+ headers["X-OpenRouter-Title"] = "Gajae-Code";
921
+ headers["X-OpenRouter-Categories"] = "cli-agent";
922
+ // Always-on response caching: identical requests return cached responses for free.
923
+ // TTL 1h; first call hits the provider, every identical call within the window
924
+ // replays from OpenRouter's edge cache. https://openrouter.ai/docs/features/response-caching
925
+ headers["X-OpenRouter-Cache"] = "true";
926
+ headers["X-OpenRouter-Cache-TTL"] = "3600";
927
+ }
928
+ Object.assign(headers, extraHeaders);
929
+ if (model.provider === "kimi-code") {
930
+ headers = { ...getKimiCommonHeaders(), ...headers };
931
+ }
932
+ let copilotPremiumRequests: number | undefined;
933
+
934
+ let baseUrl = model.baseUrl;
935
+ if (model.provider === "github-copilot") {
936
+ apiKey = parseGitHubCopilotApiKey(rawApiKey).accessToken;
937
+ const hasImages = hasCopilotVisionInput(context.messages);
938
+ const copilot = buildCopilotDynamicHeaders({
939
+ messages: context.messages,
940
+ hasImages,
941
+ premiumMultiplier: model.premiumMultiplier,
942
+ headers,
943
+ initiatorOverride,
944
+ });
945
+ Object.assign(headers, copilot.headers);
946
+ copilotPremiumRequests = copilot.premiumRequests;
947
+ baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
948
+ }
949
+ // Azure OpenAI requires /deployments/{id}/chat/completions?api-version=YYYY-MM-DD.
950
+ // The generic openai-completions path adds neither, producing silent 404s.
951
+ let azureDefaultQuery: Record<string, string> | undefined;
952
+ if (baseUrl?.includes(".openai.azure.com")) {
953
+ const apiVersion = $env.AZURE_OPENAI_API_VERSION || "2024-10-21";
954
+ if (!baseUrl.includes("/deployments/")) {
955
+ baseUrl = `${baseUrl}/deployments/${model.id}`;
956
+ }
957
+ azureDefaultQuery = { "api-version": apiVersion };
958
+ }
959
+ let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
960
+ const baseFetch = fetchOverride ?? fetch;
961
+ const wrappedFetch = Object.assign(
962
+ async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
963
+ const response = await baseFetch(input, init);
964
+ if (response.ok) {
965
+ capturedErrorResponse = undefined;
966
+ return response;
967
+ }
968
+ let bodyText: string | undefined;
969
+ let bodyJson: unknown;
970
+ try {
971
+ bodyText = await response.clone().text();
972
+ if (bodyText.trim().length > 0) {
973
+ try {
974
+ bodyJson = JSON.parse(bodyText);
975
+ } catch {}
976
+ }
977
+ } catch {}
978
+ capturedErrorResponse = {
979
+ status: response.status,
980
+ headers: response.headers,
981
+ bodyText,
982
+ bodyJson,
983
+ };
984
+ return response;
985
+ },
986
+ baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
987
+ );
988
+ const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
989
+ // Bound HTTP request timeout to roughly the first-event watchdog window.
990
+ // The OpenAI SDK's default is 10 minutes per attempt × `maxRetries`, which
991
+ // turns a stalled-before-headers fetch into a multi-minute hang invisible
992
+ // to the agent loop (the iterator watchdog only arms AFTER `create()` returns).
993
+ // Using the first-event timeout keeps both layers aligned: the SDK gives up
994
+ // before the agent watchdog would have, surfacing a real error to the catch
995
+ // in the IIFE.
996
+ // A caller may raise `StreamOptions.streamFirstEventTimeoutMs` for a slow-
997
+ // before-headers provider; respect it so the SDK doesn't give up before the
998
+ // wrapping watchdog arms. An explicit `0` disables the first-event watchdog,
999
+ // and the SDK treats `timeout: 0` as an immediate timeout, so do not pass a
1000
+ // request timeout in that case.
1001
+ const envSdkTimeoutMs = getStreamFirstEventTimeoutMs(getOpenAIStreamIdleTimeoutMs());
1002
+ const sdkTimeoutMs =
1003
+ streamFirstEventTimeoutOverride === 0
1004
+ ? undefined
1005
+ : streamFirstEventTimeoutOverride !== undefined
1006
+ ? Math.max(envSdkTimeoutMs ?? 0, streamFirstEventTimeoutOverride)
1007
+ : envSdkTimeoutMs;
1008
+ return {
1009
+ client: new OpenAI({
1010
+ apiKey,
1011
+ baseURL: baseUrl,
1012
+ dangerouslyAllowBrowser: true,
1013
+ maxRetries: 5,
1014
+ defaultHeaders: headers,
1015
+ defaultQuery: azureDefaultQuery,
1016
+ fetch: debugFetch,
1017
+ ...(sdkTimeoutMs !== undefined ? { timeout: sdkTimeoutMs } : {}),
1018
+ }),
1019
+ copilotPremiumRequests,
1020
+ baseUrl,
1021
+ requestHeaders: headers,
1022
+ getCapturedErrorResponse: () => capturedErrorResponse,
1023
+ clearCapturedErrorResponse: () => {
1024
+ capturedErrorResponse = undefined;
1025
+ },
1026
+ };
1027
+ }
1028
+
1029
+ function buildParams(
1030
+ model: Model<"openai-completions">,
1031
+ context: Context,
1032
+ options: OpenAICompletionsOptions | undefined,
1033
+ resolvedBaseUrl?: string,
1034
+ toolStrictModeOverride?: ToolStrictModeOverride,
1035
+ ): { params: OpenAICompletionsParams; toolStrictMode: AppliedToolStrictMode } {
1036
+ const compat = getCompat(model, resolvedBaseUrl);
1037
+ const messages = convertMessages(model, context, compat);
1038
+ maybeAddOpenRouterAnthropicCacheControl(model, messages);
1039
+ const supportsReasoningParams = model.provider !== "github-copilot";
1040
+
1041
+ // Kimi (including via OpenRouter and Fireworks router-form IDs such as
1042
+ // `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
1043
+ // max_tokens, not actual output. The official Kimi K2 model guidance
1044
+ // (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
1045
+ // every call since the family can otherwise emit very long reasoning traces
1046
+ // before the final answer. Always send max_tokens — match the same
1047
+ // Kimi-family regex used by the compat detector.
1048
+ // Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
1049
+ const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
1050
+ const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
1051
+
1052
+ const requestModelId =
1053
+ model.provider === "fireworks"
1054
+ ? toFireworksWireModelId(model.id)
1055
+ : model.provider === "firepass"
1056
+ ? toFirepassWireModelId(model.id)
1057
+ : model.id;
1058
+ const params: OpenAICompletionsParams = {
1059
+ model: requestModelId,
1060
+ messages,
1061
+ stream: true,
1062
+ };
1063
+ let toolStrictMode: AppliedToolStrictMode = "none";
1064
+
1065
+ if (compat.supportsUsageInStreaming !== false) {
1066
+ params.stream_options = { include_usage: true };
1067
+ }
1068
+
1069
+ if (compat.supportsStore) {
1070
+ params.store = false;
1071
+ }
1072
+
1073
+ if (effectiveMaxTokens) {
1074
+ if (compat.maxTokensField === "max_tokens") {
1075
+ params.max_tokens = effectiveMaxTokens;
1076
+ } else {
1077
+ params.max_completion_tokens = effectiveMaxTokens;
1078
+ }
1079
+ }
1080
+
1081
+ if (options?.temperature !== undefined) {
1082
+ params.temperature = options.temperature;
1083
+ }
1084
+ if (options?.topP !== undefined) {
1085
+ params.top_p = options.topP;
1086
+ }
1087
+ if (options?.topK !== undefined) {
1088
+ params.top_k = options.topK;
1089
+ }
1090
+ if (options?.minP !== undefined) {
1091
+ params.min_p = options.minP;
1092
+ }
1093
+ if (options?.presencePenalty !== undefined) {
1094
+ params.presence_penalty = options.presencePenalty;
1095
+ }
1096
+ if (options?.repetitionPenalty !== undefined) {
1097
+ params.repetition_penalty = options.repetitionPenalty;
1098
+ }
1099
+ if (options?.stopSequences?.length) {
1100
+ const seqs = options.stopSequences;
1101
+ params.stop = seqs.length === 1 ? seqs[0] : seqs.slice(0, 4);
1102
+ }
1103
+ if (options?.frequencyPenalty !== undefined) {
1104
+ params.frequency_penalty = options.frequencyPenalty;
1105
+ }
1106
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
1107
+ const resolved = resolveServiceTier(options?.serviceTier, model.provider);
1108
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
1109
+ params.service_tier = resolved;
1110
+ }
1111
+ }
1112
+
1113
+ if (context.tools?.length) {
1114
+ const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
1115
+ params.tools = builtTools.tools;
1116
+ toolStrictMode = builtTools.toolStrictMode;
1117
+ } else if (context.tools === undefined && hasToolHistory(context.messages)) {
1118
+ // Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
1119
+ // contains tool_calls/tool_results, even when no tools are offered this turn.
1120
+ // Only inject the sentinel when the caller passed `context.tools = undefined`
1121
+ // (i.e. tools were not specified at all). An explicit `context.tools = []` means
1122
+ // the caller opted out of tools for this turn (as /btw and IRC background replies
1123
+ // do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
1124
+ // so LiteLLM → Bedrock never sees an empty `toolConfig` block.
1125
+ params.tools = [];
1126
+ }
1127
+
1128
+ if (options?.toolChoice && compat.supportsToolChoice) {
1129
+ params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
1130
+ }
1131
+
1132
+ if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
1133
+ // `tool_choice: "none"` with no tools to gate is redundant and also
1134
+ // trips LiteLLM → Bedrock: the proxy serializes the directive into a
1135
+ // `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
1136
+ // non-empty whenever the conversation already holds `toolUse`/`toolResult`
1137
+ // content. Drop it whenever the resolved tools list is missing or empty.
1138
+ // Side-channel turns hit this: `/btw` and IRC background replies route
1139
+ // through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
1140
+ // and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
1141
+ delete params.tool_choice;
1142
+ }
1143
+
1144
+ if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
1145
+ // Z.ai uses binary thinking: { type: "enabled" | "disabled" }
1146
+ // Must explicitly disable since z.ai defaults to thinking enabled.
1147
+ const enabled = options?.reasoning && !options?.disableReasoning;
1148
+ params.thinking = { type: enabled ? "enabled" : "disabled" };
1149
+ } else if (supportsReasoningParams && compat.thinkingFormat === "qwen" && model.reasoning) {
1150
+ // Qwen uses top-level enable_thinking: boolean
1151
+ params.enable_thinking = !!options?.reasoning && !options?.disableReasoning;
1152
+ } else if (supportsReasoningParams && compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
1153
+ params.chat_template_kwargs = {
1154
+ enable_thinking: !!options?.reasoning && !options?.disableReasoning,
1155
+ };
1156
+ } else if (supportsReasoningParams && compat.thinkingFormat === "openrouter" && model.reasoning) {
1157
+ // OpenRouter normalizes reasoning across providers via a nested reasoning object.
1158
+ // Without an explicit signal, OpenRouter defaults reasoning models to thinking, which
1159
+ // silently consumes the entire output budget on small `max_tokens` requests (e.g.
1160
+ // title generation). Honor `disableReasoning` to opt out cleanly.
1161
+ const openRouterParams = params as typeof params & {
1162
+ reasoning?: { effort?: string } | { enabled: false };
1163
+ };
1164
+ if (options?.disableReasoning) {
1165
+ openRouterParams.reasoning = { enabled: false };
1166
+ } else if (options?.reasoning) {
1167
+ openRouterParams.reasoning = {
1168
+ effort: mapReasoningEffort(options.reasoning, compat.reasoningEffortMap),
1169
+ };
1170
+ }
1171
+ } else if (
1172
+ supportsReasoningParams &&
1173
+ options?.reasoning &&
1174
+ !options?.disableReasoning &&
1175
+ model.reasoning &&
1176
+ compat.supportsReasoningEffort
1177
+ ) {
1178
+ // OpenAI-style reasoning_effort
1179
+ params.reasoning_effort = mapReasoningEffort(options.reasoning, compat.reasoningEffortMap) as Effort;
1180
+ } else if (
1181
+ supportsReasoningParams &&
1182
+ options?.disableReasoning &&
1183
+ !options?.reasoning &&
1184
+ model.reasoning &&
1185
+ compat.supportsReasoningEffort
1186
+ ) {
1187
+ // Generic OpenAI-compatible effort endpoints do not expose a true off
1188
+ // switch. Use the model's lowest supported effort as the closest
1189
+ // transport-level approximation when callers request disabled reasoning.
1190
+ const minEffort = getSupportedEfforts(model)[0];
1191
+ if (minEffort === undefined) {
1192
+ throw new Error(`Model ${model.provider}/${model.id} has no supported reasoning efforts`);
1193
+ }
1194
+ params.reasoning_effort = mapReasoningEffort(minEffort, compat.reasoningEffortMap) as Effort;
1195
+ }
1196
+
1197
+ if (compat.disableReasoningOnToolChoice && params.tool_choice !== undefined) {
1198
+ // DeepSeek reasoning models accept tools/tool_choice, but reject that
1199
+ // control field while thinking is enabled. Keep the tool-selection
1200
+ // contract and suppress reasoning for this single request.
1201
+ delete params.reasoning_effort;
1202
+ delete params.reasoning;
1203
+ }
1204
+
1205
+ if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
1206
+ // Backends like Kimi 400 with `tool_choice 'specified' is incompatible
1207
+ // with thinking enabled`. Suppress thinking for this single forced-tool
1208
+ // turn while keeping the tool-selection contract intact.
1209
+ delete params.reasoning_effort;
1210
+ delete params.reasoning;
1211
+ if (compat.thinkingFormat === "zai") {
1212
+ params.thinking = { type: "disabled" };
1213
+ }
1214
+ }
1215
+
1216
+ // OpenRouter provider routing preferences
1217
+ if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
1218
+ params.provider = compat.openRouterRouting;
1219
+ }
1220
+
1221
+ // Vercel AI Gateway provider routing preferences
1222
+ if (model.baseUrl.includes("ai-gateway.vercel.sh") && model.compat?.vercelGatewayRouting) {
1223
+ const routing = model.compat.vercelGatewayRouting;
1224
+ if (routing.only || routing.order) {
1225
+ const gatewayOptions: Record<string, string[]> = {};
1226
+ if (routing.only) gatewayOptions.only = routing.only;
1227
+ if (routing.order) gatewayOptions.order = routing.order;
1228
+ params.providerOptions = { gateway: gatewayOptions };
1229
+ }
1230
+ }
1231
+
1232
+ if (compat.extraBody) {
1233
+ Object.assign(params, compat.extraBody);
1234
+ }
1235
+
1236
+ return { params, toolStrictMode };
1237
+ }
1238
+
1239
+ function getOptionalNumberProperty(value: object, key: string): number | undefined {
1240
+ const property = Reflect.get(value, key);
1241
+ return typeof property === "number" ? property : undefined;
1242
+ }
1243
+
1244
+ function getOptionalObjectProperty(value: object, key: string): object | undefined {
1245
+ const property = Reflect.get(value, key);
1246
+ return typeof property === "object" && property !== null ? property : undefined;
1247
+ }
1248
+
1249
+ function getChoiceUsage(choice: ChatCompletionChunk.Choice): object | undefined {
1250
+ return getOptionalObjectProperty(choice, "usage");
1251
+ }
1252
+
1253
+ export function parseChunkUsage(
1254
+ rawUsage: object,
1255
+ model: Model<"openai-completions">,
1256
+ premiumRequests: number | undefined,
1257
+ ): AssistantMessage["usage"] {
1258
+ const promptTokenDetails = getOptionalObjectProperty(rawUsage, "prompt_tokens_details");
1259
+ const completionTokenDetails = getOptionalObjectProperty(rawUsage, "completion_tokens_details");
1260
+ const cachedTokens =
1261
+ getOptionalNumberProperty(rawUsage, "cached_tokens") ??
1262
+ (promptTokenDetails ? getOptionalNumberProperty(promptTokenDetails, "cached_tokens") : undefined) ??
1263
+ 0;
1264
+ // OpenRouter exposes cache writes via `prompt_tokens_details.cache_write_tokens`
1265
+ // and INCLUDES them in `prompt_tokens`. Without subtracting, cache-write tokens
1266
+ // leak into `input` (e.g. GLM/Anthropic via OpenRouter on a fresh cache).
1267
+ // Ref: https://openrouter.ai/docs/guides/best-practices/prompt-caching
1268
+ const cacheWriteTokens = promptTokenDetails
1269
+ ? (getOptionalNumberProperty(promptTokenDetails, "cache_write_tokens") ?? 0)
1270
+ : 0;
1271
+ const reasoningTokens =
1272
+ (completionTokenDetails ? getOptionalNumberProperty(completionTokenDetails, "reasoning_tokens") : undefined) ?? 0;
1273
+ const promptTokens = getOptionalNumberProperty(rawUsage, "prompt_tokens") ?? 0;
1274
+ const input = Math.max(0, promptTokens - cachedTokens - cacheWriteTokens);
1275
+ // Per OpenAI's CompletionUsage spec, `reasoning_tokens` is a subset of
1276
+ // `completion_tokens` (which is the total billed output). Adding them would
1277
+ // double-count.
1278
+ const outputTokens = getOptionalNumberProperty(rawUsage, "completion_tokens") ?? 0;
1279
+ const usage: AssistantMessage["usage"] = {
1280
+ input,
1281
+ output: outputTokens,
1282
+ cacheRead: cachedTokens,
1283
+ cacheWrite: cacheWriteTokens,
1284
+ totalTokens: input + outputTokens + cachedTokens + cacheWriteTokens,
1285
+ ...(reasoningTokens > 0 ? { reasoningTokens } : {}),
1286
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
1287
+ ...(premiumRequests !== undefined ? { premiumRequests } : {}),
1288
+ };
1289
+ calculateCost(model, usage);
1290
+ return usage;
1291
+ }
1292
+
1293
+ function mapReasoningEffort(
1294
+ effort: NonNullable<OpenAICompletionsOptions["reasoning"]>,
1295
+ reasoningEffortMap: Partial<Record<NonNullable<OpenAICompletionsOptions["reasoning"]>, string>>,
1296
+ ): string {
1297
+ return reasoningEffortMap[effort] ?? effort;
1298
+ }
1299
+
1300
+ function maybeAddOpenRouterAnthropicCacheControl(
1301
+ model: Model<"openai-completions">,
1302
+ messages: ChatCompletionMessageParam[],
1303
+ ): void {
1304
+ if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
1305
+
1306
+ // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
1307
+ // on the last user/assistant message (walking backwards until we find text content).
1308
+ for (let i = messages.length - 1; i >= 0; i--) {
1309
+ const msg = messages[i];
1310
+ if (msg.role !== "user" && msg.role !== "assistant" && msg.role !== "developer") continue;
1311
+
1312
+ const content = msg.content;
1313
+ if (typeof content === "string") {
1314
+ msg.content = [
1315
+ Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
1316
+ ];
1317
+ return;
1318
+ }
1319
+
1320
+ if (!Array.isArray(content)) continue;
1321
+
1322
+ // Find last text part and add cache_control
1323
+ for (let j = content.length - 1; j >= 0; j--) {
1324
+ const part = content[j];
1325
+ if (part?.type === "text") {
1326
+ Object.assign(part, { cache_control: { type: "ephemeral" } });
1327
+ return;
1328
+ }
1329
+ }
1330
+ }
1331
+ }
1332
+
1333
+ export function convertMessages(
1334
+ model: Model<"openai-completions">,
1335
+ context: Context,
1336
+ compat: ResolvedOpenAICompat,
1337
+ ): ChatCompletionMessageParam[] {
1338
+ const params: ChatCompletionMessageParam[] = [];
1339
+
1340
+ const normalizeToolCallId = (id: string): string => {
1341
+ if (compat.requiresMistralToolIds) return normalizeMistralToolId(id, true);
1342
+
1343
+ // Handle pipe-separated IDs from OpenAI Responses API
1344
+ // Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)
1345
+ // These come from providers like github-copilot, OpenAI code provider, opencode
1346
+ // Extract just the call_id part and normalize it
1347
+ if (id.includes("|")) {
1348
+ const [callId] = id.split("|");
1349
+ // Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)
1350
+ return callId.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 40);
1351
+ }
1352
+
1353
+ if (model.provider === "openai") return id.length > 40 ? id.slice(0, 40) : id;
1354
+ return id;
1355
+ };
1356
+ const transformedMessages = transformMessages(context.messages, model, id => normalizeToolCallId(id));
1357
+
1358
+ const remappedToolCallIds = new Map<string, string[]>();
1359
+ let generatedToolCallIdCounter = 0;
1360
+
1361
+ const generateFallbackToolCallId = (seed: string): string => {
1362
+ generatedToolCallIdCounter += 1;
1363
+ const hash = Bun.hash(`${model.provider}:${model.id}:${seed}:${generatedToolCallIdCounter}`).toString(36);
1364
+ return `call_${hash}`;
1365
+ };
1366
+
1367
+ const rememberToolCallId = (originalId: string, normalizedId: string): void => {
1368
+ const queue = remappedToolCallIds.get(originalId);
1369
+ if (queue) {
1370
+ queue.push(normalizedId);
1371
+ return;
1372
+ }
1373
+ remappedToolCallIds.set(originalId, [normalizedId]);
1374
+ };
1375
+
1376
+ const consumeToolCallId = (originalId: string): string | null => {
1377
+ const queue = remappedToolCallIds.get(originalId);
1378
+ if (!queue || queue.length === 0) return null;
1379
+ const nextId = queue.shift() ?? null;
1380
+ if (queue.length === 0) remappedToolCallIds.delete(originalId);
1381
+ return nextId;
1382
+ };
1383
+
1384
+ const ensureToolCallId = (rawId: string, seed: string): string => {
1385
+ const normalized = normalizeToolCallId(rawId);
1386
+ if (normalized.trim().length > 0) return normalized;
1387
+ return generateFallbackToolCallId(seed);
1388
+ };
1389
+
1390
+ const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
1391
+ if (systemPrompts.length > 0) {
1392
+ const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
1393
+ const role = useDeveloperRole ? "developer" : "system";
1394
+ // Default to one block per ordered system prompt so the leading prefix
1395
+ // stays byte-identical between turns and the provider's KV cache can
1396
+ // reuse it. Hosts whose chat templates reject follow-up system messages
1397
+ // (Qwen via vLLM, MiniMax, Alibaba Dashscope, Qwen Portal, …) opt out
1398
+ // via `compat.supportsMultipleSystemMessages = false`; in that mode we
1399
+ // coalesce into a single message joined by `\n\n`.
1400
+ if (compat.supportsMultipleSystemMessages) {
1401
+ for (const systemPrompt of systemPrompts) {
1402
+ params.push({ role, content: systemPrompt });
1403
+ }
1404
+ } else {
1405
+ params.push({ role, content: systemPrompts.join("\n\n") });
1406
+ }
1407
+ }
1408
+
1409
+ let lastRole: string | null = null;
1410
+
1411
+ for (let i = 0; i < transformedMessages.length; i++) {
1412
+ const msg = transformedMessages[i];
1413
+ // Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
1414
+ // Insert a synthetic assistant message to bridge the gap
1415
+ if (
1416
+ compat.requiresAssistantAfterToolResult &&
1417
+ lastRole === "toolResult" &&
1418
+ (msg.role === "user" || msg.role === "developer")
1419
+ ) {
1420
+ params.push({
1421
+ role: "assistant",
1422
+ content: "I have processed the tool results.",
1423
+ });
1424
+ }
1425
+
1426
+ const devAsUser = !compat.supportsDeveloperRole;
1427
+ if (msg.role === "user" || msg.role === "developer") {
1428
+ const role = !devAsUser && msg.role === "developer" ? "developer" : "user";
1429
+ if (typeof msg.content === "string") {
1430
+ const text = msg.content.toWellFormed();
1431
+ if (text.trim().length === 0) continue;
1432
+ params.push({
1433
+ role: role,
1434
+ content: text,
1435
+ });
1436
+ } else {
1437
+ const supportsImages = model.input.includes("image");
1438
+ const content: ChatCompletionContentPart[] = [];
1439
+ let omittedImages = false;
1440
+ for (const item of msg.content) {
1441
+ if (item.type === "text") {
1442
+ const text = item.text.toWellFormed();
1443
+ if (text.trim().length === 0) continue;
1444
+ content.push({
1445
+ type: "text",
1446
+ text,
1447
+ } satisfies ChatCompletionContentPartText);
1448
+ } else if (supportsImages) {
1449
+ content.push({
1450
+ type: "image_url",
1451
+ image_url: {
1452
+ url: `data:${item.mimeType};base64,${item.data}`,
1453
+ },
1454
+ } satisfies ChatCompletionContentPartImage);
1455
+ } else {
1456
+ omittedImages = true;
1457
+ }
1458
+ }
1459
+ if (omittedImages) {
1460
+ content.push({
1461
+ type: "text",
1462
+ text: NON_VISION_IMAGE_PLACEHOLDER,
1463
+ } satisfies ChatCompletionContentPartText);
1464
+ }
1465
+ if (content.length === 0) continue;
1466
+ params.push({
1467
+ role: "user",
1468
+ content,
1469
+ });
1470
+ }
1471
+ } else if (msg.role === "assistant") {
1472
+ // Some providers (e.g. Mistral) don't accept null content, use empty string instead
1473
+ const assistantMsg: ChatCompletionAssistantMessageParam = {
1474
+ role: "assistant",
1475
+ content: compat.requiresAssistantAfterToolResult ? "" : null,
1476
+ };
1477
+
1478
+ const textBlocks = msg.content.filter(b => b.type === "text") as TextContent[];
1479
+ // Filter out empty text blocks to avoid API validation errors
1480
+ const nonEmptyTextBlocks = textBlocks.filter(b => b.text && b.text.trim().length > 0);
1481
+ if (nonEmptyTextBlocks.length > 0) {
1482
+ // Always send assistant content as a plain string. Some OpenAI-compatible
1483
+ // backends mirror array-of-text-block payloads back to the model literally,
1484
+ // causing recursive nested content in subsequent turns.
1485
+ assistantMsg.content = nonEmptyTextBlocks.map(b => b.text.toWellFormed()).join("");
1486
+ }
1487
+
1488
+ // Handle thinking blocks
1489
+ const thinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
1490
+ // Filter out empty thinking blocks to avoid API validation errors
1491
+ const nonEmptyThinkingBlocks = thinkingBlocks.filter(b => b.thinking && b.thinking.trim().length > 0);
1492
+ if (nonEmptyThinkingBlocks.length > 0) {
1493
+ if (compat.requiresThinkingAsText) {
1494
+ // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
1495
+ const thinkingText = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n\n");
1496
+ const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
1497
+ if (textContent) {
1498
+ textContent.unshift({ type: "text", text: thinkingText });
1499
+ } else {
1500
+ assistantMsg.content = [{ type: "text", text: thinkingText }];
1501
+ }
1502
+ } else if (compat.requiresReasoningContentForToolCalls) {
1503
+ // Use the signature from the first thinking block if available, but only for
1504
+ // recognized OpenAI-compat reasoning field names. Opaque signatures from other
1505
+ // providers (Anthropic encrypted, OpenAI Responses JSON) are not valid property names.
1506
+ const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
1507
+ const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
1508
+ if (signature && recognizedFields.includes(signature)) {
1509
+ (assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
1510
+ }
1511
+ }
1512
+ }
1513
+
1514
+ if (compat.thinkingFormat === "openai" && compat.requiresReasoningContentForToolCalls) {
1515
+ const streamedReasoningField = nonEmptyThinkingBlocks[0]?.thinkingSignature;
1516
+ const reasoningField =
1517
+ streamedReasoningField === "reasoning_content" ||
1518
+ streamedReasoningField === "reasoning" ||
1519
+ streamedReasoningField === "reasoning_text"
1520
+ ? streamedReasoningField
1521
+ : (compat.reasoningContentField ?? "reasoning_content");
1522
+ const reasoningContent = (assistantMsg as any)[reasoningField];
1523
+ if (!reasoningContent) {
1524
+ const reasoning = (assistantMsg as any).reasoning;
1525
+ const reasoningText = (assistantMsg as any).reasoning_text;
1526
+ if (reasoning && reasoningField !== "reasoning") {
1527
+ (assistantMsg as any)[reasoningField] = reasoning;
1528
+ } else if (reasoningText && reasoningField !== "reasoning_text") {
1529
+ (assistantMsg as any)[reasoningField] = reasoningText;
1530
+ } else if (nonEmptyThinkingBlocks.length > 0) {
1531
+ (assistantMsg as any)[reasoningField] = nonEmptyThinkingBlocks.map(b => b.thinking).join("\n");
1532
+ }
1533
+ }
1534
+ }
1535
+
1536
+ const toolCalls = msg.content.filter(b => b.type === "toolCall") as ToolCall[];
1537
+ // Replay reasoning_content on assistant turns for backends that validate
1538
+ // thinking-mode history. DeepSeek V4 requires reasoning_content on EVERY
1539
+ // assistant turn once any prior turn included it — not just tool-call turns.
1540
+ // The replay logic has three tiers:
1541
+ // 1. Recover from thinking blocks with valid signatures (covers same-model replay
1542
+ // where nonEmptyThinkingBlocks may have filtered out empty-text blocks)
1543
+ // 2. For providers that require the field but returned no reasoning at all
1544
+ // (e.g. proxy-stripped reasoning_content), emit an empty string
1545
+ // 3. For providers that accept synthetic placeholders (Kimi, OpenRouter), emit "."
1546
+ // DeepSeek V4 rejects synthetic "." placeholders — it validates the exact value —
1547
+ // so the allowsSyntheticReasoningContentForToolCalls flag controls tier 3.
1548
+ const canUseSyntheticReasoningContent =
1549
+ compat.requiresReasoningContentForToolCalls &&
1550
+ compat.allowsSyntheticReasoningContentForToolCalls &&
1551
+ (compat.thinkingFormat === "openai" ||
1552
+ compat.thinkingFormat === "openrouter" ||
1553
+ compat.thinkingFormat === "zai");
1554
+ // DeepSeek reasoning models require reasoning_content on ALL assistant turns,
1555
+ // not just tool-call turns. Other providers (Kimi, OpenRouter) only require it
1556
+ // on tool-call turns.
1557
+ const needsReasoningOnAllTurns =
1558
+ compat.requiresReasoningContentForToolCalls && !compat.allowsSyntheticReasoningContentForToolCalls;
1559
+ const needsReasoningField = needsReasoningOnAllTurns || toolCalls.length > 0;
1560
+ let hasReasoningField =
1561
+ (assistantMsg as any).reasoning_content !== undefined ||
1562
+ (assistantMsg as any).reasoning !== undefined ||
1563
+ (assistantMsg as any).reasoning_text !== undefined;
1564
+ // Tier 1: Recover reasoning_content from ALL thinking blocks (including empty-text
1565
+ // ones) when the provider requires exact replay and rejects synthetic placeholders.
1566
+ // This covers the case where thinking blocks have valid signatures but were excluded
1567
+ // by the nonEmptyThinkingBlocks filter above, or where thinking text is empty but
1568
+ // the signature identifies the correct field name for replay.
1569
+ // Only recognized OpenAI-compat reasoning field names qualify — opaque signatures
1570
+ // from other providers (Anthropic encrypted, OpenAI Responses JSON, etc.) are not
1571
+ // valid property names for the wire message.
1572
+ if (
1573
+ needsReasoningField &&
1574
+ !hasReasoningField &&
1575
+ compat.requiresReasoningContentForToolCalls &&
1576
+ !compat.allowsSyntheticReasoningContentForToolCalls
1577
+ ) {
1578
+ const allThinkingBlocks = msg.content.filter(b => b.type === "thinking") as ThinkingContent[];
1579
+ if (allThinkingBlocks.length > 0) {
1580
+ const signature = allThinkingBlocks[0].thinkingSignature;
1581
+ const recognizedFields = ["reasoning_content", "reasoning", "reasoning_text"];
1582
+ if (signature && recognizedFields.includes(signature)) {
1583
+ (assistantMsg as any)[signature] = allThinkingBlocks.map(b => b.thinking).join("\n");
1584
+ hasReasoningField = true;
1585
+ }
1586
+ }
1587
+ }
1588
+ // Tier 2: When the provider requires reasoning_content but there are genuinely no
1589
+ // thinking blocks at all (e.g. proxy stripped reasoning_content from the response),
1590
+ // emit an empty string. The field must be present; an empty string is the most honest
1591
+ // representation of "no reasoning was captured."
1592
+ if (
1593
+ needsReasoningField &&
1594
+ !hasReasoningField &&
1595
+ compat.requiresReasoningContentForToolCalls &&
1596
+ !compat.allowsSyntheticReasoningContentForToolCalls
1597
+ ) {
1598
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1599
+ (assistantMsg as any)[reasoningField] = "";
1600
+ hasReasoningField = true;
1601
+ }
1602
+ // Tier 3: For providers that accept synthetic placeholders (Kimi, OpenRouter).
1603
+ if (toolCalls.length > 0 && canUseSyntheticReasoningContent && !hasReasoningField) {
1604
+ const reasoningField = compat.reasoningContentField ?? "reasoning_content";
1605
+ (assistantMsg as any)[reasoningField] = ".";
1606
+ hasReasoningField = true;
1607
+ }
1608
+ if (toolCalls.length > 0) {
1609
+ assistantMsg.tool_calls = toolCalls.map((tc, toolCallIndex) => {
1610
+ const toolCallId = ensureToolCallId(tc.id, `${i}:${toolCallIndex}:${tc.name}`);
1611
+ rememberToolCallId(tc.id, toolCallId);
1612
+ return {
1613
+ id: normalizeMistralToolId(toolCallId, compat.requiresMistralToolIds),
1614
+ type: "function" as const,
1615
+ function: {
1616
+ name: tc.name,
1617
+ arguments: serializeToolArguments(tc.arguments),
1618
+ },
1619
+ };
1620
+ });
1621
+ const reasoningDetails = toolCalls
1622
+ .filter(tc => tc.thoughtSignature)
1623
+ .map(tc => {
1624
+ try {
1625
+ return JSON.parse(tc.thoughtSignature!);
1626
+ } catch {
1627
+ return null;
1628
+ }
1629
+ })
1630
+ .filter(Boolean);
1631
+ if (reasoningDetails.length > 0) {
1632
+ (assistantMsg as any).reasoning_details = reasoningDetails;
1633
+ }
1634
+ }
1635
+ // DeepSeek requires non-null content when reasoning_content is present
1636
+ if (assistantMsg.content === null && hasReasoningField) {
1637
+ assistantMsg.content = "";
1638
+ }
1639
+ // Skip assistant messages that have no content, no tool calls, and no reasoning payload.
1640
+ // Some OpenAI-compatible backends require replaying reasoning-only assistant turns
1641
+ // so follow-up requests preserve the provider-specific reasoning field name.
1642
+ const content = assistantMsg.content;
1643
+ const hasContent =
1644
+ content !== null &&
1645
+ content !== undefined &&
1646
+ (typeof content === "string" ? content.length > 0 : content.length > 0);
1647
+ if (!hasContent && assistantMsg.tool_calls && compat.requiresAssistantContentForToolCalls) {
1648
+ assistantMsg.content = ".";
1649
+ }
1650
+ if (!hasContent && !assistantMsg.tool_calls && !hasReasoningField) {
1651
+ continue;
1652
+ }
1653
+ params.push(assistantMsg);
1654
+ } else if (msg.role === "toolResult") {
1655
+ // Batch consecutive tool results and collect all images
1656
+ const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
1657
+ let j = i;
1658
+
1659
+ for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
1660
+ const toolMsg = transformedMessages[j] as ToolResultMessage;
1661
+
1662
+ // Extract text and image content
1663
+ const textResult = toolMsg.content
1664
+ .filter(c => c.type === "text")
1665
+ .map(c => (c as TextContent).text)
1666
+ .join("\n");
1667
+ const supportsImages = model.input.includes("image");
1668
+ const hasImages = toolMsg.content.some(c => c.type === "image");
1669
+ const omittedImages = hasImages && !supportsImages;
1670
+
1671
+ // Always send tool result with text (or placeholder if only images)
1672
+ const hasText = textResult.length > 0;
1673
+ const remappedToolCallId = consumeToolCallId(toolMsg.toolCallId);
1674
+ const resolvedToolCallId =
1675
+ remappedToolCallId ?? ensureToolCallId(toolMsg.toolCallId, `${j}:${toolMsg.toolName ?? "tool"}`);
1676
+ const toolResultContent = omittedImages
1677
+ ? joinTextWithImagePlaceholder(textResult, true)
1678
+ : hasText
1679
+ ? textResult
1680
+ : hasImages
1681
+ ? "(see attached image)"
1682
+ : "";
1683
+ const toolResultMsg: ChatCompletionToolMessageParam = {
1684
+ role: "tool",
1685
+ content: toolResultContent.toWellFormed(),
1686
+ tool_call_id: normalizeMistralToolId(resolvedToolCallId, compat.requiresMistralToolIds),
1687
+ };
1688
+ if (compat.requiresToolResultName && toolMsg.toolName) {
1689
+ (toolResultMsg as any).name = toolMsg.toolName;
1690
+ }
1691
+ params.push(toolResultMsg);
1692
+
1693
+ if (hasImages && supportsImages) {
1694
+ for (const block of toolMsg.content) {
1695
+ if (block.type === "image") {
1696
+ imageBlocks.push({
1697
+ type: "image_url",
1698
+ image_url: {
1699
+ url: `data:${block.mimeType};base64,${block.data}`,
1700
+ },
1701
+ });
1702
+ }
1703
+ }
1704
+ }
1705
+ }
1706
+
1707
+ i = j - 1;
1708
+
1709
+ // After all consecutive tool results, add a single user message with all images
1710
+ if (imageBlocks.length > 0) {
1711
+ if (compat.requiresAssistantAfterToolResult) {
1712
+ params.push({
1713
+ role: "assistant",
1714
+ content: "I have processed the tool results.",
1715
+ });
1716
+ }
1717
+
1718
+ params.push({
1719
+ role: "user",
1720
+ content: [
1721
+ {
1722
+ type: "text",
1723
+ text: "Attached image(s) from tool result:",
1724
+ },
1725
+ ...imageBlocks,
1726
+ ],
1727
+ });
1728
+ lastRole = "user";
1729
+ } else {
1730
+ lastRole = "toolResult";
1731
+ }
1732
+ continue;
1733
+ }
1734
+
1735
+ lastRole =
1736
+ msg.role === "developer"
1737
+ ? model.reasoning && compat.supportsDeveloperRole
1738
+ ? "developer"
1739
+ : "system"
1740
+ : msg.role;
1741
+ }
1742
+
1743
+ return params;
1744
+ }
1745
+
1746
+ function convertTools(
1747
+ tools: Tool[],
1748
+ compat: ResolvedOpenAICompat,
1749
+ toolStrictModeOverride?: ToolStrictModeOverride,
1750
+ ): BuiltOpenAICompletionTools {
1751
+ const adaptedTools = tools.map(tool => {
1752
+ const strict = !NO_STRICT && compat.supportsStrictMode !== false && tool.strict !== false;
1753
+ const baseParameters = toolWireSchema(tool);
1754
+ const adapted = adaptSchemaForStrict(baseParameters, strict);
1755
+ return {
1756
+ tool,
1757
+ baseParameters,
1758
+ parameters: adapted.schema,
1759
+ strict: adapted.strict,
1760
+ };
1761
+ });
1762
+
1763
+ const requestedStrictMode = toolStrictModeOverride ?? compat.toolStrictMode;
1764
+ const toolStrictMode =
1765
+ requestedStrictMode === "none"
1766
+ ? "none"
1767
+ : requestedStrictMode === "all_strict"
1768
+ ? adaptedTools.every(tool => tool.strict)
1769
+ ? "all_strict"
1770
+ : "none"
1771
+ : "mixed";
1772
+
1773
+ return {
1774
+ tools: adaptedTools.map(({ tool, baseParameters, parameters, strict }) => {
1775
+ const includeStrict = toolStrictMode === "all_strict" || (toolStrictMode === "mixed" && strict);
1776
+ return {
1777
+ type: "function",
1778
+ function: {
1779
+ name: tool.name,
1780
+ description: tool.description || "",
1781
+ parameters: includeStrict ? parameters : baseParameters,
1782
+ // Only include strict if provider supports it. Some reject unknown fields.
1783
+ ...(includeStrict && { strict: true }),
1784
+ },
1785
+ };
1786
+ }),
1787
+ toolStrictMode,
1788
+ };
1789
+ }
1790
+
1791
+ function shouldRetryWithoutStrictTools(
1792
+ error: unknown,
1793
+ capturedErrorResponse: CapturedHttpErrorResponse | undefined,
1794
+ toolStrictMode: AppliedToolStrictMode,
1795
+ tools: Tool[] | undefined,
1796
+ ): boolean {
1797
+ if (!tools || tools.length === 0 || toolStrictMode !== "all_strict") {
1798
+ return false;
1799
+ }
1800
+ const status = extractHttpStatusFromError(error) ?? capturedErrorResponse?.status;
1801
+ if (status !== 400 && status !== 422) {
1802
+ return false;
1803
+ }
1804
+ const messageParts = [error instanceof Error ? error.message : undefined, capturedErrorResponse?.bodyText]
1805
+ .filter((value): value is string => typeof value === "string" && value.trim().length > 0)
1806
+ .join("\n");
1807
+ return /wrong_api_format|mixed values for 'strict'|tool[s]?\b.*strict|\bstrict\b.*tool/i.test(messageParts);
1808
+ }
1809
+
1810
+ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
1811
+ stopReason: StopReason;
1812
+ errorMessage?: string;
1813
+ } {
1814
+ if (reason === null) return { stopReason: "stop" };
1815
+ switch (reason) {
1816
+ case "stop":
1817
+ case "end":
1818
+ return { stopReason: "stop" };
1819
+ case "length":
1820
+ return { stopReason: "length" };
1821
+ case "function_call":
1822
+ case "tool_calls":
1823
+ return { stopReason: "toolUse" };
1824
+ case "content_filter":
1825
+ return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
1826
+ case "network_error":
1827
+ return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
1828
+ default:
1829
+ return {
1830
+ stopReason: "error",
1831
+ errorMessage: `Provider finish_reason: ${reason}`,
1832
+ };
1833
+ }
1834
+ }
1835
+
1836
+ /**
1837
+ * Detect compatibility settings from provider and baseUrl for known providers.
1838
+ * Provider takes precedence over URL-based detection since it's explicitly configured.
1839
+ * Returns a fully resolved OpenAICompat object with all fields set.
1840
+ */
1841
+ export function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
1842
+ return detectOpenAICompat(model);
1843
+ }
1844
+
1845
+ /**
1846
+ * Get resolved compatibility settings for a model.
1847
+ * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
1848
+ * @param model - The model configuration
1849
+ * @param resolvedBaseUrl - Optional resolved base URL (e.g., after GitHub Copilot proxy-ep resolution).
1850
+ */
1851
+ function getCompat(model: Model<"openai-completions">, resolvedBaseUrl?: string): ResolvedOpenAICompat {
1852
+ return resolveOpenAICompat(model, resolvedBaseUrl);
1853
+ }