@prometheus-ai/ai 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -2,6 +2,11 @@ import * as nodeCrypto from "node:crypto";
2
2
  import * as fs from "node:fs";
3
3
  import { scheduler } from "node:timers/promises";
4
4
  import * as tls from "node:tls";
5
+ import { isOfficialAnthropicApiUrl } from "@prometheus-ai/catalog/compat/anthropic";
6
+ import { mapEffortToAnthropicAdaptiveEffort } from "@prometheus-ai/catalog/model-thinking";
7
+ import { calculateCost } from "@prometheus-ai/catalog/models";
8
+ import { isAnthropicOAuthToken } from "@prometheus-ai/catalog/utils";
9
+ import { parseGitHubCopilotApiKey } from "@prometheus-ai/catalog/wire/github-copilot";
5
10
  import {
6
11
  $env,
7
12
  extractHttpStatusFromError,
@@ -12,13 +17,7 @@ import {
12
17
  logger,
13
18
  readSseEvents,
14
19
  } from "@prometheus-ai/utils";
15
- import {
16
- disablesParallelToolUse,
17
- hasOpus47ApiRestrictions,
18
- mapEffortToAnthropicAdaptiveEffort,
19
- supportsMidConversationSystemMessages,
20
- } from "../model-thinking";
21
- import { calculateCost } from "../models";
20
+ import { isUsageLimitError } from "../rate-limit-utils";
22
21
  import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
23
22
  import type {
24
23
  Api,
@@ -30,6 +29,7 @@ import type {
30
29
  Message,
31
30
  Model,
32
31
  ProviderSessionState,
32
+ RawSseEvent,
33
33
  RedactedThinkingContent,
34
34
  ServiceTier,
35
35
  SimpleStreamOptions,
@@ -44,31 +44,27 @@ import type {
44
44
  Usage,
45
45
  } from "../types";
46
46
  import { resolveServiceTier } from "../types";
47
- import {
48
- isAnthropicOAuthToken,
49
- isRecord,
50
- normalizeSystemPrompts,
51
- normalizeToolCallId,
52
- resolveCacheRetention,
53
- } from "../utils";
47
+ import { isRecord, normalizeSystemPrompts, normalizeToolCallId, resolveCacheRetention } from "../utils";
54
48
  import { createAbortSourceTracker } from "../utils/abort";
55
49
  import { AssistantMessageEventStream } from "../utils/event-stream";
56
50
  import { isFoundryEnabled } from "../utils/foundry";
57
51
  import { finalizeErrorMessage, type RawHttpRequestDump, rewriteCopilotError } from "../utils/http-inspector";
58
52
  import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTimeout } from "../utils/idle-iterator";
59
- import { parseJsonWithRepair, parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
60
- import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
53
+ import { parseStreamingJsonThrottled } from "../utils/json-parse";
61
54
  import { notifyProviderResponse } from "../utils/provider-response";
62
55
  import { isCopilotTransientModelError } from "../utils/retry";
63
56
  import { COMBINATOR_KEYS, NO_STRICT, toolWireSchema } from "../utils/schema";
64
57
  import { spillToDescription } from "../utils/schema/spill";
65
58
  import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
66
- import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
59
+ import { notifyRawSseEvent } from "../utils/sse-debug";
67
60
  import {
61
+ AnthropicApiError,
68
62
  AnthropicConnectionTimeoutError,
69
63
  type AnthropicFetchOptions,
70
64
  AnthropicMessagesClient,
71
65
  type AnthropicMessagesClientLike,
66
+ calculateAnthropicRetryDelayMs,
67
+ retryDelayFromHeaders,
72
68
  } from "./anthropic-client";
73
69
  import type {
74
70
  ToolInputSchema as AnthropicToolInputSchema,
@@ -122,6 +118,7 @@ export function buildBetaHeader(baseBetas: readonly string[], extraBetas: readon
122
118
  return result.join(",");
123
119
  }
124
120
 
121
+ const midConversationSystemBeta = "mid-conversation-system-2026-04-07";
125
122
  const claudeCodeUtilityBetaDefaults = [
126
123
  "oauth-2025-04-20",
127
124
  "interleaved-thinking-2025-05-14",
@@ -135,7 +132,7 @@ const claudeCodeAgentBetaDefaults = [
135
132
  "interleaved-thinking-2025-05-14",
136
133
  "context-management-2025-06-27",
137
134
  "prompt-caching-scope-2026-01-05",
138
- "mid-conversation-system-2026-04-07",
135
+ midConversationSystemBeta,
139
136
  "advanced-tool-use-2025-11-20",
140
137
  ] as const;
141
138
  const claudeCodeAgentPostEffortBetas = ["extended-cache-ttl-2025-04-11"] as const;
@@ -181,105 +178,125 @@ function isClaudeCodeClientUserAgent(userAgent: string | undefined): userAgent i
181
178
  return userAgent.toLowerCase().startsWith("claude-cli");
182
179
  }
183
180
 
184
- export function isAnthropicApiBaseUrl(baseUrl?: string): boolean {
185
- if (!baseUrl) return true;
186
- try {
187
- const url = new URL(baseUrl);
188
- return url.protocol.toLowerCase() === "https:" && url.hostname.toLowerCase() === "api.anthropic.com";
189
- } catch {
190
- return false;
191
- }
192
- }
193
-
194
181
  const sharedHeaders = {
195
182
  "Accept-Encoding": "gzip, deflate, br, zstd",
196
183
  Connection: "keep-alive",
197
184
  "Content-Type": "application/json",
198
- "Anthropic-Version": "2023-06-01",
199
- "Anthropic-Dangerous-Direct-Browser-Access": "true",
200
- "X-App": "cli",
185
+ "anthropic-version": "2023-06-01",
186
+ "anthropic-dangerous-direct-browser-access": "true",
187
+ "x-app": "cli",
201
188
  };
202
189
 
203
190
  export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<string, string> {
204
191
  const oauthToken = options.isOAuth ?? isAnthropicOAuthToken(options.apiKey);
205
192
  const extraBetas = options.extraBetas ?? [];
206
193
  const stream = options.stream ?? false;
207
- const betaHeader = buildBetaHeader(options.claudeCodeBetas ?? buildClaudeCodeBetas(true, true, false), extraBetas);
208
- const acceptHeader = oauthToken ? "application/json" : stream ? "text/event-stream" : "application/json";
209
- const modelHeaders = Object.fromEntries(
210
- Object.entries(options.modelHeaders ?? {}).filter(([key]) => !enforcedHeaderKeys.has(key.toLowerCase())),
194
+ // `enforcedHeaderKeys` strips User-Agent out of modelHeaders so a spread can't
195
+ // produce case-duplicate keys; re-add the caller's value explicitly per branch
196
+ // (OAuth replaces non-claude-cli values, the other branches forward verbatim).
197
+ const incomingUserAgent = getHeaderCaseInsensitive(options.modelHeaders, "User-Agent");
198
+ // Claude Code betas (oauth-2025-04-20, claude-code-20250219, …) are part of
199
+ // the OAuth fingerprint; API-key requests default to extras only, matching
200
+ // the streaming path (buildAnthropicClientOptions passes [] for non-OAuth).
201
+ const betaHeader = buildBetaHeader(
202
+ options.claudeCodeBetas ?? (oauthToken ? buildClaudeCodeBetas(true, true, false) : []),
203
+ extraBetas,
211
204
  );
205
+ const acceptHeader = oauthToken ? "application/json" : stream ? "text/event-stream" : "application/json";
206
+ const modelHeaders: Record<string, string> = {};
207
+ const filteredEnforcedKeys: string[] = [];
208
+ for (const [key, value] of Object.entries(options.modelHeaders ?? {})) {
209
+ const lowerKey = key.toLowerCase();
210
+ if (enforcedHeaderKeys.has(lowerKey)) {
211
+ // User-Agent is filtered only to dedup the spread; every branch re-adds
212
+ // the caller's value explicitly, so it is not "ignored".
213
+ if (lowerKey !== "user-agent") filteredEnforcedKeys.push(key);
214
+ continue;
215
+ }
216
+ modelHeaders[key] = value;
217
+ }
218
+ if (filteredEnforcedKeys.length > 0) {
219
+ // Caller/env-supplied values (options.headers, ANTHROPIC_CUSTOM_HEADERS)
220
+ // for enforced headers are replaced by our own values; say so instead of
221
+ // dropping them silently. Keys only — values may carry credentials.
222
+ logger.debug("anthropic: ignoring caller-supplied enforced headers", {
223
+ headers: filteredEnforcedKeys,
224
+ });
225
+ }
212
226
 
213
227
  if (options.isCloudflareAiGateway) {
214
228
  return {
215
229
  ...modelHeaders,
216
230
  Accept: acceptHeader,
217
231
  ...sharedHeaders,
218
- "Anthropic-Beta": betaHeader,
232
+ ...(incomingUserAgent ? { "User-Agent": incomingUserAgent } : {}),
233
+ ...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
219
234
  "cf-aig-authorization": `Bearer ${options.apiKey}`,
220
235
  };
221
236
  }
222
237
 
223
238
  if (oauthToken) {
224
- const incomingUserAgent = getHeaderCaseInsensitive(options.modelHeaders, "User-Agent");
225
239
  const userAgent = isClaudeCodeClientUserAgent(incomingUserAgent)
226
240
  ? incomingUserAgent
227
- : `claude-cli/${claudeCodeVersion} (external, cli)`;
241
+ : `claude-cli/${claudeCodeVersion} (external, local-agent, agent-sdk/${claudeAgentSdkVersion})`;
228
242
  return {
229
243
  ...modelHeaders,
230
244
  ...claudeCodeHeaders,
231
245
  Accept: acceptHeader,
232
246
  Authorization: `Bearer ${options.apiKey}`,
233
247
  ...sharedHeaders,
234
- "Anthropic-Beta": betaHeader,
248
+ ...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
235
249
  ...(options.claudeCodeSessionId ? { "X-Claude-Code-Session-Id": options.claudeCodeSessionId } : {}),
236
250
  "x-client-request-id": nodeCrypto.randomUUID(),
237
251
  "User-Agent": userAgent,
238
252
  };
239
- } else if (!isAnthropicApiBaseUrl(options.baseUrl)) {
253
+ } else if (!isOfficialAnthropicApiUrl(options.baseUrl)) {
240
254
  return {
241
255
  ...modelHeaders,
242
256
  Accept: acceptHeader,
243
257
  Authorization: `Bearer ${options.apiKey}`,
244
258
  ...sharedHeaders,
245
- "Anthropic-Beta": betaHeader,
259
+ ...(incomingUserAgent ? { "User-Agent": incomingUserAgent } : {}),
260
+ ...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
246
261
  };
247
262
  } else {
248
263
  return {
249
264
  ...modelHeaders,
250
265
  Accept: acceptHeader,
251
266
  ...sharedHeaders,
252
- "Anthropic-Beta": betaHeader,
267
+ ...(incomingUserAgent ? { "User-Agent": incomingUserAgent } : {}),
268
+ ...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
253
269
  "X-Api-Key": options.apiKey,
254
270
  };
255
271
  }
256
272
  }
257
273
 
258
274
  type AnthropicCacheControl = NonNullable<TextBlockParam["cache_control"]>;
275
+ type AnthropicImageMediaType = "image/jpeg" | "image/png" | "image/gif" | "image/webp";
259
276
 
260
- type AnthropicOutputConfig = NonNullable<MessageCreateParamsStreaming["output_config"]>;
277
+ function normalizeAnthropicImageMediaType(mimeType: string): AnthropicImageMediaType | undefined {
278
+ const normalized = mimeType.trim().toLowerCase();
279
+ if (normalized === "image/jpg") return "image/jpeg";
280
+ if (
281
+ normalized === "image/jpeg" ||
282
+ normalized === "image/png" ||
283
+ normalized === "image/gif" ||
284
+ normalized === "image/webp"
285
+ ) {
286
+ return normalized;
287
+ }
288
+ return undefined;
289
+ }
261
290
 
262
- function getAnthropicOutputConfig(params: MessageCreateParamsStreaming): AnthropicOutputConfig {
263
- const outputConfig = params.output_config ?? {};
264
- params.output_config = outputConfig;
265
- return outputConfig;
291
+ function cloneAnthropicCacheControl(cacheControl: AnthropicCacheControl): AnthropicCacheControl {
292
+ return { ...cacheControl };
266
293
  }
267
294
 
295
+ type AnthropicOutputConfig = NonNullable<MessageCreateParamsStreaming["output_config"]>;
296
+
268
297
  const ANTHROPIC_STOP_SEQUENCES_MAX = 4;
269
298
  let warnedStopSequencesTrim = false;
270
299
 
271
- /**
272
- * Adaptive thinking `display` is supported starting with Claude Opus 4.7.
273
- * Older adaptive-thinking models (Opus 4.6, Sonnet 4.6+) reject the field.
274
- */
275
- function supportsAdaptiveThinkingDisplay(modelId: string): boolean {
276
- const match = /claude-opus-(\d+)-(\d+)/.exec(modelId);
277
- if (!match) return false;
278
- const major = Number(match[1]);
279
- const minor = Number(match[2]);
280
- return major > 4 || (major === 4 && minor >= 7);
281
- }
282
-
283
300
  const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
284
301
 
285
302
  type AnthropicProviderSessionState = ProviderSessionState & {
@@ -299,16 +316,29 @@ function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
299
316
  return state;
300
317
  }
301
318
 
319
+ /**
320
+ * Key the sticky strict-tools / fast-mode learning per endpoint+model. A
321
+ * grammar-too-large 400 or a fast-mode rejection is specific to the model (its
322
+ * tool grammar / entitlement) and the endpoint (direct Anthropic vs a gateway /
323
+ * Foundry / Bedrock proxy), so it MUST NOT bleed onto unrelated anthropic-messages
324
+ * requests in the same session. NUL separates the two components so neither can
325
+ * forge the boundary.
326
+ */
327
+ function anthropicProviderSessionStateKey(baseUrl: string, modelId: string): string {
328
+ return `${ANTHROPIC_PROVIDER_SESSION_STATE_KEY}:${baseUrl}\u0000${modelId}`;
329
+ }
330
+
302
331
  function getAnthropicProviderSessionState(
303
332
  providerSessionState: Map<string, ProviderSessionState> | undefined,
333
+ baseUrl: string,
334
+ modelId: string,
304
335
  ): AnthropicProviderSessionState | undefined {
305
336
  if (!providerSessionState) return undefined;
306
- const existing = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
307
- | AnthropicProviderSessionState
308
- | undefined;
337
+ const key = anthropicProviderSessionStateKey(baseUrl, modelId);
338
+ const existing = providerSessionState.get(key) as AnthropicProviderSessionState | undefined;
309
339
  if (existing) return existing;
310
340
  const created = createAnthropicProviderSessionState();
311
- providerSessionState.set(ANTHROPIC_PROVIDER_SESSION_STATE_KEY, created);
341
+ providerSessionState.set(key, created);
312
342
  return created;
313
343
  }
314
344
 
@@ -323,10 +353,14 @@ export function clearAnthropicFastModeFallback(
323
353
  providerSessionState: Map<string, ProviderSessionState> | undefined,
324
354
  ): void {
325
355
  if (!providerSessionState) return;
326
- const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
327
- | AnthropicProviderSessionState
328
- | undefined;
329
- if (state) state.fastModeDisabled = false;
356
+ // Fast mode is re-armed session-wide (user toggled `/fast on`), so clear the
357
+ // sticky flag on every per-endpoint/model Anthropic entry — plus the legacy
358
+ // unscoped key — rather than a single shared object.
359
+ const prefix = `${ANTHROPIC_PROVIDER_SESSION_STATE_KEY}:`;
360
+ for (const [key, value] of providerSessionState) {
361
+ if (key !== ANTHROPIC_PROVIDER_SESSION_STATE_KEY && !key.startsWith(prefix)) continue;
362
+ (value as AnthropicProviderSessionState).fastModeDisabled = false;
363
+ }
330
364
  }
331
365
 
332
366
  function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
@@ -378,7 +412,6 @@ function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
378
412
 
379
413
  function getCacheControl(
380
414
  model: Model<"anthropic-messages">,
381
- baseUrl: string,
382
415
  cacheRetention: CacheRetention | undefined,
383
416
  isOAuthToken: boolean,
384
417
  ): { retention: CacheRetention; cacheControl?: AnthropicCacheControl } {
@@ -386,10 +419,7 @@ function getCacheControl(
386
419
  if (retention === "none") {
387
420
  return { retention };
388
421
  }
389
- const ttl =
390
- retention === "long" && isAnthropicApiBaseUrl(baseUrl) && getAnthropicCompat(model).supportsLongCacheRetention
391
- ? "1h"
392
- : undefined;
422
+ const ttl = retention === "long" && model.compat.supportsLongCacheRetention ? "1h" : undefined;
393
423
  return {
394
424
  retention,
395
425
  cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
@@ -397,9 +427,15 @@ function getCacheControl(
397
427
  }
398
428
 
399
429
  // Stealth mode: mimic Claude Code's request fingerprint.
400
- export const claudeCodeVersion = "2.1.160";
401
- export const claudeToolPrefix: string = "proxy_";
402
- export const claudeCodeSystemInstruction = "You are Claude Code, Anthropic's official CLI for Claude.";
430
+ export const claudeCodeVersion = "2.1.165";
431
+ export const claudeAgentSdkVersion = "0.3.165";
432
+ export const claudeClientVersion = "1.11187.4";
433
+ export const claudeToolPrefix: string = "_";
434
+ export const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
435
+ // Claude Code caps requested output at 64k tokens even when the model ceiling is
436
+ // higher (e.g. Opus 4.8 supports 128k); OAuth requests clamp to match the wire
437
+ // fingerprint. API-key requests keep the full model ceiling.
438
+ export const CLAUDE_CODE_MAX_OUTPUT_TOKENS = 64000;
403
439
 
404
440
  export function mapStainlessOs(platform: string): "MacOS" | "Windows" | "Linux" | "FreeBSD" | `Other::${string}` {
405
441
  switch (platform.toLowerCase()) {
@@ -442,7 +478,9 @@ export const claudeCodeHeaders = {
442
478
  "X-Stainless-Lang": "js",
443
479
  "X-Stainless-Arch": mapStainlessArch(process.arch),
444
480
  "X-Stainless-OS": mapStainlessOs(process.platform),
445
- "X-Stainless-Timeout": "600",
481
+ "X-Stainless-Timeout": "900",
482
+ "anthropic-client-platform": "desktop_app",
483
+ "anthropic-client-version": claudeClientVersion,
446
484
  };
447
485
 
448
486
  const enforcedHeaderKeys = new Set(
@@ -452,11 +490,11 @@ const enforcedHeaderKeys = new Set(
452
490
  "Accept-Encoding",
453
491
  "Connection",
454
492
  "Content-Type",
455
- "Anthropic-Version",
456
- "Anthropic-Dangerous-Direct-Browser-Access",
457
- "Anthropic-Beta",
493
+ "anthropic-version",
494
+ "anthropic-dangerous-direct-browser-access",
495
+ "anthropic-beta",
458
496
  "User-Agent",
459
- "X-App",
497
+ "x-app",
460
498
  "Authorization",
461
499
  "X-Api-Key",
462
500
  "X-Claude-Code-Session-Id",
@@ -479,7 +517,7 @@ function createClaudeBillingHeader(firstUserMessageText: string): string {
479
517
  .slice(0, 3);
480
518
  // cch=00000: placeholder replaced with the real attestation hash by wrapFetchForCch
481
519
  // before the request hits the wire (see below).
482
- return `${CLAUDE_BILLING_HEADER_PREFIX} cc_version=${claudeCodeVersion}.${versionSuffix}; cc_entrypoint=cli; ${CCH_PLACEHOLDER_STR};`;
520
+ return `${CLAUDE_BILLING_HEADER_PREFIX} cc_version=${claudeCodeVersion}.${versionSuffix}; cc_entrypoint=local-agent; ${CCH_PLACEHOLDER_STR};`;
483
521
  }
484
522
 
485
523
  // cch attestation: XXHash64(body_with_placeholder, seed) low-20-bits, 5 hex chars.
@@ -497,47 +535,49 @@ const CCH_PLACEHOLDER = cchEncoder.encode(CCH_PLACEHOLDER_STR);
497
535
  const BILLING_SYSTEM_MARKER = cchEncoder.encode(`"system":[{"type":"text","text":"${CLAUDE_BILLING_HEADER_PREFIX}`);
498
536
  const CCH_BILLING_SEARCH_WINDOW = 150;
499
537
 
500
- function patchCch(body: Uint8Array): Uint8Array {
538
+ function patchCch(body: Uint8Array): "patched" | "no-billing-header" | "unanchored" {
539
+ // Zero-copy Buffer view over the same memory; its `indexOf` is a native memmem,
540
+ // ~7.5x faster than a hand-rolled byte loop here — the marker sits ~99% through
541
+ // the body because `messages` serializes before `system`, so a JS scan would
542
+ // walk almost the entire payload (benchmarked: 563µs -> 75µs on a 1MB body).
543
+ const view = Buffer.from(body.buffer, body.byteOffset, body.byteLength);
544
+
501
545
  // Find the combined system[0] + billing-header prefix marker.
502
- let markerIdx = -1;
503
- outer: for (let i = 0; i <= body.length - BILLING_SYSTEM_MARKER.length; i++) {
504
- for (let j = 0; j < BILLING_SYSTEM_MARKER.length; j++) {
505
- if (body[i + j] !== BILLING_SYSTEM_MARKER[j]) continue outer;
506
- }
507
- markerIdx = i;
508
- break;
509
- }
510
- if (markerIdx === -1) return body; // no CC billing header injected
546
+ const markerIdx = view.indexOf(BILLING_SYSTEM_MARKER);
547
+ if (markerIdx === -1) return "no-billing-header"; // no CC billing header injected
511
548
 
512
- // Scan at most CCH_BILLING_SEARCH_WINDOW bytes after the marker for the placeholder.
549
+ // Placeholder must sit within CCH_BILLING_SEARCH_WINDOW bytes after the marker.
513
550
  const searchFrom = markerIdx + BILLING_SYSTEM_MARKER.length;
514
- const searchTo = Math.min(searchFrom + CCH_BILLING_SEARCH_WINDOW, body.length - CCH_PLACEHOLDER.length);
515
- let idx = -1;
516
- outer2: for (let i = searchFrom; i <= searchTo; i++) {
517
- for (let j = 0; j < CCH_PLACEHOLDER.length; j++) {
518
- if (body[i + j] !== CCH_PLACEHOLDER[j]) continue outer2;
519
- }
520
- idx = i;
521
- break;
522
- }
523
- if (idx === -1) return body; // placeholder not within the billing header value
551
+ const idx = view.indexOf(CCH_PLACEHOLDER, searchFrom);
552
+ if (idx === -1 || idx - searchFrom > CCH_BILLING_SEARCH_WINDOW) return "unanchored";
524
553
 
525
554
  // Hash the body with the placeholder in place (matches CC's in-place behaviour).
526
555
  const h = Bun.hash.xxHash64(body, CCH_SEED);
527
556
  const cch = (h & 0xfffffn).toString(16).padStart(5, "0");
528
557
 
529
558
  for (let i = 0; i < 5; i++) body[idx + 4 + i] = cch.charCodeAt(i);
530
- return body;
559
+ return "patched";
531
560
  }
532
561
 
533
- type FetchFn = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
534
-
535
- function wrapFetchForCch(base: FetchFn): FetchFn {
562
+ /**
563
+ * Wraps a fetch implementation to patch the Claude Code billing-header `cch`
564
+ * attestation into outgoing request bodies. Bodies without the placeholder
565
+ * pass through untouched, so installing it on every OAuth flow is safe.
566
+ */
567
+ export function wrapFetchForCch(base: FetchImpl): FetchImpl {
536
568
  return (input, init) => {
537
569
  if (init?.body && typeof init.body === "string" && init.body.includes(CCH_PLACEHOLDER_STR)) {
538
570
  const encoded = cchEncoder.encode(init.body);
539
- const patched = patchCch(encoded);
540
- return base(input, { ...init, body: patched });
571
+ if (patchCch(encoded) === "unanchored") {
572
+ // The OAuth billing placeholder is anchored to system[0] but we couldn't
573
+ // patch it — e.g. an `onPayload` hook reordered the first system block's keys
574
+ // so BILLING_SYSTEM_MARKER no longer matches. Send the body as-is (cch stays
575
+ // `00000`, the prior behaviour) rather than failing the request, but surface the
576
+ // fingerprint regression instead of letting it ship silently. A `cch=00000`
577
+ // literal in user content alone ("no-billing-header") is not a regression.
578
+ logger.warn("anthropic: cch billing placeholder present but not patched; sending unattested request");
579
+ }
580
+ return base(input, { ...init, body: encoded });
541
581
  }
542
582
  return base(input, init);
543
583
  };
@@ -594,20 +634,65 @@ export function generateClaudeCloakingUserId(): string {
594
634
  return `user_${userHash}_account_${accountId}_session_${sessionId}`;
595
635
  }
596
636
 
597
- function deriveClaudeDeviceIdFromInstallId(): string {
598
- return nodeCrypto.createHash("sha256").update(`prometheus-claude-device-id-v1:${getInstallId()}`).digest("hex");
637
+ const CLAUDE_DEVICE_ID_INSTALL_HASH_DOMAIN = "prometheus-claude-device-id-v1:";
638
+ const CLAUDE_DEVICE_ID_ACCOUNT_HASH_DOMAIN = "prometheus-claude-device-id-v2";
639
+
640
+ export function deriveClaudeDeviceId(installId: string, accountId?: string): string {
641
+ const hash = nodeCrypto.createHash("sha256");
642
+ if (accountId && accountId.length > 0) {
643
+ return hash
644
+ .update(CLAUDE_DEVICE_ID_ACCOUNT_HASH_DOMAIN)
645
+ .update("\0")
646
+ .update(installId)
647
+ .update("\0")
648
+ .update(accountId)
649
+ .digest("hex");
650
+ }
651
+ return hash.update(CLAUDE_DEVICE_ID_INSTALL_HASH_DOMAIN).update(installId).digest("hex");
652
+ }
653
+
654
+ function readMetadataString(metadata: Record<string, unknown> | undefined, key: string): string | undefined {
655
+ const value = metadata?.[key];
656
+ return typeof value === "string" && value.length > 0 ? value : undefined;
657
+ }
658
+
659
+ function readAnthropicMetadataAccountId(metadata: Record<string, unknown> | undefined): string | undefined {
660
+ return (
661
+ readMetadataString(metadata, "account_uuid") ??
662
+ readMetadataString(metadata, "accountId") ??
663
+ readMetadataString(metadata, "account_id")
664
+ );
599
665
  }
600
- function generateClaudeJsonUserId(sessionId?: string): string {
601
- return JSON.stringify({
602
- device_id: deriveClaudeDeviceIdFromInstallId(),
666
+
667
+ function deriveClaudeDeviceIdFromInstallId(accountId?: string): string {
668
+ return deriveClaudeDeviceId(getInstallId(), accountId);
669
+ }
670
+
671
+ function generateClaudeJsonUserId(sessionId?: string, accountId?: string): string {
672
+ const userId: Record<string, string> = {
673
+ device_id: deriveClaudeDeviceIdFromInstallId(accountId),
603
674
  session_id: sessionId ?? nodeCrypto.randomUUID().toLowerCase(),
604
- });
675
+ };
676
+ if (accountId && accountId.length > 0) userId.account_uuid = accountId;
677
+ return JSON.stringify(userId);
605
678
  }
606
679
 
607
- function resolveAnthropicMetadataUserId(
680
+ /**
681
+ * Resolve the `metadata.user_id` field for an Anthropic Messages request.
682
+ *
683
+ * For API-key tokens, an explicit caller-supplied `userId` is forwarded
684
+ * verbatim and `undefined` yields no metadata. For OAuth tokens the value
685
+ * must match the Claude Code attribution shape (`isClaudeCloakingUserId` or
686
+ * the `{session_id, account_uuid?, device_id?}` JSON envelope) — anything
687
+ * else is dropped and a fresh Claude-Code-style JSON id is generated from
688
+ * `sessionId`/`accountId` so attribution stays consistent across the main
689
+ * streaming path and provider-specific request builders (e.g. web search).
690
+ */
691
+ export function resolveAnthropicMetadataUserId(
608
692
  userId: unknown,
609
693
  isOAuthToken: boolean,
610
694
  sessionId?: string,
695
+ accountId?: string,
611
696
  ): string | undefined {
612
697
  if (typeof userId === "string") {
613
698
  if (!isOAuthToken || isClaudeCloakingUserId(userId) || isClaudeJsonUserId(userId)) {
@@ -616,22 +701,24 @@ function resolveAnthropicMetadataUserId(
616
701
  }
617
702
 
618
703
  if (!isOAuthToken) return undefined;
619
- return generateClaudeJsonUserId(sessionId);
704
+ return generateClaudeJsonUserId(sessionId, accountId);
620
705
  }
621
706
  const ANTHROPIC_BUILTIN_TOOL_NAMES = new Set(["web_search", "code_execution", "text_editor", "computer"]);
622
- export const applyClaudeToolPrefix = (name: string, prefixOverride: string = claudeToolPrefix) => {
623
- if (!prefixOverride) return name;
707
+ export const applyClaudeToolPrefix = (name: string): string => {
708
+ if (!claudeToolPrefix) return name;
624
709
  if (ANTHROPIC_BUILTIN_TOOL_NAMES.has(name.toLowerCase())) return name;
625
- const prefix = prefixOverride.toLowerCase();
626
- if (name.toLowerCase().startsWith(prefix)) return name;
627
- return `${prefixOverride}${name}`;
710
+ // Always prepend (no "already prefixed" short-circuit): the prefix is a wire
711
+ // transport detail applied once to internal tool names, and `stripClaudeToolPrefix`
712
+ // removes exactly one prefix on receive. Skipping names that already start with the
713
+ // prefix would make a tool literally named `_foo` lose its leading underscore on the
714
+ // return trip (`_foo` → wire `_foo` → strip → `foo`), so the agent loop can't find it.
715
+ return `${claudeToolPrefix}${name}`;
628
716
  };
629
717
 
630
- export const stripClaudeToolPrefix = (name: string, prefixOverride: string = claudeToolPrefix) => {
631
- if (!prefixOverride) return name;
632
- const prefix = prefixOverride.toLowerCase();
633
- if (!name.toLowerCase().startsWith(prefix)) return name;
634
- return name.slice(prefixOverride.length);
718
+ export const stripClaudeToolPrefix = (name: string): string => {
719
+ if (!claudeToolPrefix) return name;
720
+ if (!name.toLowerCase().startsWith(claudeToolPrefix.toLowerCase())) return name;
721
+ return name.slice(claudeToolPrefix.length);
635
722
  };
636
723
 
637
724
  const ANTHROPIC_MANY_IMAGE_THRESHOLD = 20;
@@ -649,6 +736,46 @@ function countAnthropicImageBlocks(messages: Message[]): number {
649
736
  return count;
650
737
  }
651
738
 
739
+ const ANTHROPIC_IMAGE_RESIZE_CONCURRENCY = 4;
740
+
741
+ /**
742
+ * Memoized resize results keyed on ImageContent identity. Callers keep message
743
+ * objects stable across turns, so without this every request (and every
744
+ * in-provider retry of a fresh turn) re-decodes and re-encodes the same
745
+ * oversized screenshots. A cached value identical to the key means "already
746
+ * within bounds / unresizable — skip the decode".
747
+ */
748
+ const anthropicManyImageResizeCache = new WeakMap<ImageContent, ImageContent>();
749
+
750
+ type ResizeLimiter = <R>(fn: () => Promise<R>) => Promise<R>;
751
+
752
+ /**
753
+ * Bounded-concurrency gate for image decode/encode work. The many-image path
754
+ * fans out over every block of every message; unbounded, 100+ oversized images
755
+ * would decode concurrently (two encode pipelines each) and spike memory by
756
+ * gigabytes. Slots are handed off directly to the next waiter on release.
757
+ */
758
+ function createResizeLimiter(limit: number): ResizeLimiter {
759
+ let active = 0;
760
+ const queue: (() => void)[] = [];
761
+ return async fn => {
762
+ if (active >= limit) {
763
+ const { promise, resolve } = Promise.withResolvers<void>();
764
+ queue.push(resolve);
765
+ await promise;
766
+ } else {
767
+ active++;
768
+ }
769
+ try {
770
+ return await fn();
771
+ } finally {
772
+ const next = queue.shift();
773
+ if (next) next();
774
+ else active--;
775
+ }
776
+ };
777
+ }
778
+
652
779
  async function resizeAnthropicManyImageBlock(block: ImageContent): Promise<ImageContent> {
653
780
  try {
654
781
  const inputBuffer = Buffer.from(block.data, "base64");
@@ -684,12 +811,17 @@ async function resizeAnthropicManyImageBlock(block: ImageContent): Promise<Image
684
811
  async function resizeAnthropicManyImageContent(
685
812
  content: (TextContent | ImageContent)[],
686
813
  state: { resized: number },
814
+ limit: ResizeLimiter,
687
815
  ): Promise<(TextContent | ImageContent)[]> {
688
816
  let changed = false;
689
817
  const next = await Promise.all(
690
818
  content.map(async block => {
691
819
  if (block.type !== "image") return block;
692
- const resized = await resizeAnthropicManyImageBlock(block);
820
+ let resized = anthropicManyImageResizeCache.get(block);
821
+ if (resized === undefined) {
822
+ resized = await limit(() => resizeAnthropicManyImageBlock(block));
823
+ anthropicManyImageResizeCache.set(block, resized);
824
+ }
693
825
  if (resized !== block) {
694
826
  changed = true;
695
827
  state.resized++;
@@ -700,14 +832,18 @@ async function resizeAnthropicManyImageContent(
700
832
  return changed ? next : content;
701
833
  }
702
834
 
703
- async function resizeAnthropicManyImageMessage(message: Message, state: { resized: number }): Promise<Message> {
835
+ async function resizeAnthropicManyImageMessage(
836
+ message: Message,
837
+ state: { resized: number },
838
+ limit: ResizeLimiter,
839
+ ): Promise<Message> {
704
840
  if (message.role === "user" || message.role === "developer") {
705
841
  if (!Array.isArray(message.content)) return message;
706
- const content = await resizeAnthropicManyImageContent(message.content, state);
842
+ const content = await resizeAnthropicManyImageContent(message.content, state, limit);
707
843
  return content === message.content ? message : { ...message, content };
708
844
  }
709
845
  if (message.role === "toolResult") {
710
- const content = await resizeAnthropicManyImageContent(message.content, state);
846
+ const content = await resizeAnthropicManyImageContent(message.content, state, limit);
711
847
  return content === message.content ? message : { ...message, content };
712
848
  }
713
849
  return message;
@@ -720,9 +856,10 @@ async function prepareAnthropicManyImageContext(context: Context, supportsImages
720
856
 
721
857
  let changed = false;
722
858
  const state = { resized: 0 };
859
+ const limit = createResizeLimiter(ANTHROPIC_IMAGE_RESIZE_CONCURRENCY);
723
860
  const messages = await Promise.all(
724
861
  context.messages.map(async message => {
725
- const next = await resizeAnthropicManyImageMessage(message, state);
862
+ const next = await resizeAnthropicManyImageMessage(message, state, limit);
726
863
  if (next !== message) changed = true;
727
864
  return next;
728
865
  }),
@@ -736,13 +873,7 @@ async function prepareAnthropicManyImageContext(context: Context, supportsImages
736
873
  return { ...context, messages };
737
874
  }
738
875
 
739
- /**
740
- * Convert content blocks to Anthropic API format
741
- */
742
- function convertContentBlocks(
743
- content: (TextContent | ImageContent)[],
744
- supportsImages = true,
745
- ):
876
+ type AnthropicToolResultContent =
746
877
  | string
747
878
  | Array<
748
879
  | { type: "text"; text: string }
@@ -750,42 +881,75 @@ function convertContentBlocks(
750
881
  type: "image";
751
882
  source: {
752
883
  type: "base64";
753
- media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
884
+ media_type: AnthropicImageMediaType;
754
885
  data: string;
755
886
  };
756
887
  }
757
- > {
758
- const textBlocks = content
759
- .filter((block): block is TextContent => block.type === "text")
760
- .map(block => block.text.toWellFormed())
761
- .filter(text => text.trim().length > 0);
762
- const imageBlocks = content.filter((block): block is ImageContent => block.type === "image");
763
- const omittedImages = !supportsImages && imageBlocks.length > 0;
764
- if (imageBlocks.length === 0 || !supportsImages) {
765
- if (omittedImages) {
766
- textBlocks.push(NON_VISION_IMAGE_PLACEHOLDER);
888
+ >;
889
+
890
+ /**
891
+ * Convert content blocks to Anthropic API format
892
+ */
893
+ function convertContentBlocks(
894
+ content: (TextContent | ImageContent)[],
895
+ supportsImages = true,
896
+ ): AnthropicToolResultContent {
897
+ const blocks: Array<
898
+ | { type: "text"; text: string }
899
+ | {
900
+ type: "image";
901
+ source: {
902
+ type: "base64";
903
+ media_type: AnthropicImageMediaType;
904
+ data: string;
905
+ };
906
+ }
907
+ > = [];
908
+ let sawText = false;
909
+ let sawImage = false;
910
+
911
+ for (const block of content) {
912
+ if (block.type === "text") {
913
+ const text = block.text.toWellFormed();
914
+ if (text.trim().length === 0) continue;
915
+ sawText = true;
916
+ blocks.push({ type: "text", text });
917
+ continue;
918
+ }
919
+
920
+ if (!supportsImages) {
921
+ blocks.push({ type: "text", text: NON_VISION_IMAGE_PLACEHOLDER });
922
+ continue;
923
+ }
924
+
925
+ const mediaType = normalizeAnthropicImageMediaType(block.mimeType);
926
+ if (!mediaType) {
927
+ blocks.push({ type: "text", text: `[unsupported image: ${block.mimeType}]` });
928
+ continue;
767
929
  }
768
- return textBlocks.join("\n").toWellFormed();
769
- }
770
930
 
771
- const blocks = [
772
- ...textBlocks.map(text => ({
773
- type: "text" as const,
774
- text,
775
- })),
776
- ...imageBlocks.map(block => ({
777
- type: "image" as const,
931
+ sawImage = true;
932
+ blocks.push({
933
+ type: "image",
778
934
  source: {
779
- type: "base64" as const,
780
- media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
935
+ type: "base64",
936
+ media_type: mediaType,
781
937
  data: block.data,
782
938
  },
783
- })),
784
- ];
939
+ });
940
+ }
785
941
 
786
- if (!textBlocks.length) {
942
+ if (!supportsImages) {
943
+ return blocks
944
+ .filter((block): block is { type: "text"; text: string } => block.type === "text")
945
+ .map(block => block.text)
946
+ .join("\n")
947
+ .toWellFormed();
948
+ }
949
+
950
+ if (sawImage && !sawText) {
787
951
  blocks.unshift({
788
- type: "text" as const,
952
+ type: "text",
789
953
  text: "(see attached image)",
790
954
  });
791
955
  }
@@ -799,17 +963,23 @@ export type AnthropicThinkingDisplay = "summarized" | "omitted";
799
963
  export interface AnthropicOptions extends StreamOptions {
800
964
  /**
801
965
  * Enable extended thinking.
802
- * For Opus 4.6+: uses adaptive thinking (Claude decides when/how much to think).
803
- * For older models: uses budget-based thinking with thinkingBudgetTokens.
966
+ * For adaptive-capable models (Opus 4.6+, Sonnet 4.6+, Fable/Mythos 5):
967
+ * uses adaptive thinking (Claude decides when/how much to think). For older
968
+ * models: uses budget-based thinking with thinkingBudgetTokens.
804
969
  */
805
970
  thinkingEnabled?: boolean;
806
971
  /**
807
972
  * Token budget for extended thinking (older models only).
808
- * Ignored for Opus 4.6+ which uses adaptive thinking.
973
+ * Ignored for adaptive-capable models.
809
974
  */
810
975
  thinkingBudgetTokens?: number;
811
976
  /**
812
- * Effort level for adaptive thinking (Opus 4.6+ only).
977
+ * Upstream wire model id override for collapsed effort-tier variants.
978
+ * Serialized as `requestModelId ?? model.requestModelId ?? model.id`.
979
+ */
980
+ requestModelId?: string;
981
+ /**
982
+ * Effort level for adaptive thinking.
813
983
  * Controls how much thinking Claude allocates:
814
984
  * - "max": Always thinks with no constraints
815
985
  * - "high": Always thinks, deep reasoning (default)
@@ -864,7 +1034,6 @@ export type AnthropicClientOptionsArgs = {
864
1034
  hasTools?: boolean;
865
1035
  thinkingEnabled?: boolean;
866
1036
  thinkingDisplay?: AnthropicThinkingDisplay;
867
- onSseEvent?: AnthropicOptions["onSseEvent"];
868
1037
  fetch?: FetchImpl;
869
1038
  claudeCodeSessionId?: string;
870
1039
  };
@@ -888,6 +1057,32 @@ type FoundryTlsOptions = {
888
1057
  key?: string;
889
1058
  };
890
1059
 
1060
+ const foundryTlsOptionsCache = new Map<string, FoundryTlsOptions | undefined>();
1061
+
1062
+ function foundryTlsCacheKeyComponent(value: string | undefined): string | null {
1063
+ if (!value) return null;
1064
+ const trimmed = value.trim();
1065
+ // For path-valued vars, fold the file mtime into the key so on-disk cert
1066
+ // rotation (common for short-lived corporate mTLS certs) invalidates the
1067
+ // cached TLS options instead of pinning the first read forever.
1068
+ if (trimmed && !trimmed.includes("-----BEGIN") && looksLikeFilePath(trimmed)) {
1069
+ try {
1070
+ return `${trimmed}@${fs.statSync(trimmed).mtimeMs}`;
1071
+ } catch {
1072
+ return trimmed;
1073
+ }
1074
+ }
1075
+ return value;
1076
+ }
1077
+
1078
+ function foundryTlsOptionsCacheKey(): string {
1079
+ return JSON.stringify([
1080
+ foundryTlsCacheKeyComponent($env.NODE_EXTRA_CA_CERTS),
1081
+ foundryTlsCacheKeyComponent($env.CLAUDE_CODE_CLIENT_CERT),
1082
+ foundryTlsCacheKeyComponent($env.CLAUDE_CODE_CLIENT_KEY),
1083
+ ]);
1084
+ }
1085
+
891
1086
  function resolveAnthropicBaseUrl(model: Model<"anthropic-messages">, apiKey?: string): string | undefined {
892
1087
  if (model.provider === "github-copilot") {
893
1088
  return normalizeAnthropicBaseUrl(resolveGitHubCopilotBaseUrl(model.baseUrl, apiKey) ?? model.baseUrl);
@@ -936,7 +1131,7 @@ function parseAnthropicCustomHeaders(rawHeaders: string | undefined): Record<str
936
1131
  export function resolveAnthropicCustomHeadersForBaseUrl(
937
1132
  baseUrl: string | undefined,
938
1133
  ): Record<string, string> | undefined {
939
- if (!isFoundryEnabled() && isAnthropicApiBaseUrl(baseUrl)) return undefined;
1134
+ if (!isFoundryEnabled() && isOfficialAnthropicApiUrl(baseUrl)) return undefined;
940
1135
  return parseAnthropicCustomHeaders($env.ANTHROPIC_CUSTOM_HEADERS);
941
1136
  }
942
1137
 
@@ -976,6 +1171,9 @@ function resolveFoundryTlsOptions(model: Model<"anthropic-messages">): FoundryTl
976
1171
  if (model.provider !== "anthropic") return undefined;
977
1172
  if (!isFoundryEnabled()) return undefined;
978
1173
 
1174
+ const cacheKey = foundryTlsOptionsCacheKey();
1175
+ if (foundryTlsOptionsCache.has(cacheKey)) return foundryTlsOptionsCache.get(cacheKey);
1176
+
979
1177
  const ca = resolvePemValue($env.NODE_EXTRA_CA_CERTS, "NODE_EXTRA_CA_CERTS");
980
1178
  const cert = resolvePemValue($env.CLAUDE_CODE_CLIENT_CERT, "CLAUDE_CODE_CLIENT_CERT");
981
1179
  const key = resolvePemValue($env.CLAUDE_CODE_CLIENT_KEY, "CLAUDE_CODE_CLIENT_KEY");
@@ -988,7 +1186,9 @@ function resolveFoundryTlsOptions(model: Model<"anthropic-messages">): FoundryTl
988
1186
  if (ca) options.ca = [...tls.rootCertificates, ca];
989
1187
  if (cert) options.cert = cert;
990
1188
  if (key) options.key = key;
991
- return Object.keys(options).length > 0 ? options : undefined;
1189
+ const resolved = Object.keys(options).length > 0 ? options : undefined;
1190
+ foundryTlsOptionsCache.set(cacheKey, resolved);
1191
+ return resolved;
992
1192
  }
993
1193
 
994
1194
  function buildClaudeCodeTlsFetchOptions(
@@ -1019,10 +1219,19 @@ function buildClaudeCodeTlsFetchOptions(
1019
1219
  };
1020
1220
  }
1021
1221
  function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {
1222
+ // Case-insensitive merge: later sources win and keep their casing. A plain
1223
+ // Object.assign would let `authorization` and `Authorization` coexist, and
1224
+ // the Headers constructor then joins both values comma-separated on the wire.
1022
1225
  const merged: Record<string, string> = {};
1226
+ const keyByLower = new Map<string, string>();
1023
1227
  for (const headers of headerSources) {
1024
- if (headers) {
1025
- Object.assign(merged, headers);
1228
+ if (!headers) continue;
1229
+ for (const [key, value] of Object.entries(headers)) {
1230
+ const lower = key.toLowerCase();
1231
+ const existing = keyByLower.get(lower);
1232
+ if (existing !== undefined && existing !== key) delete merged[existing];
1233
+ keyByLower.set(lower, key);
1234
+ merged[key] = value;
1026
1235
  }
1027
1236
  }
1028
1237
  return merged;
@@ -1037,11 +1246,44 @@ const ANTHROPIC_MESSAGE_EVENTS: ReadonlySet<string> = new Set([
1037
1246
  "content_block_stop",
1038
1247
  ]);
1039
1248
 
1249
+ /**
1250
+ * Iterate over Anthropic SSE events from a raw Response, preserving ping events
1251
+ * for liveness. Malformed event envelopes are logged and skipped (non-fatal)
1252
+ * rather than aborting the stream.
1253
+ */
1254
+ type RawMessagePingEvent = { type: "ping" };
1255
+ type AnthropicStreamEvent = RawMessageStreamEvent | RawMessagePingEvent;
1256
+ const ANTHROPIC_PING_EVENT: RawMessagePingEvent = { type: "ping" };
1257
+
1258
+ /**
1259
+ * In-stream `error` SSE frames carry an Anthropic error envelope:
1260
+ * `{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}`.
1261
+ * Surface the structured type + message instead of the raw JSON blob; the
1262
+ * error type token (e.g. `overloaded_error`, `rate_limit_error`) is kept in
1263
+ * the message so `isProviderRetryableError`'s classification keys off the
1264
+ * structured type rather than incidental JSON substrings.
1265
+ */
1266
+ function createAnthropicSseStreamError(data: string): Error {
1267
+ try {
1268
+ const parsed = JSON.parse(data) as { error?: { type?: unknown; message?: unknown } };
1269
+ const errorType = typeof parsed?.error?.type === "string" ? parsed.error.type : undefined;
1270
+ const message = typeof parsed?.error?.message === "string" ? parsed.error.message : undefined;
1271
+ if (message) {
1272
+ return new Error(
1273
+ errorType ? `Anthropic stream error (${errorType}): ${message}` : `Anthropic stream error: ${message}`,
1274
+ );
1275
+ }
1276
+ } catch {
1277
+ // Not a JSON envelope; fall through to the raw payload.
1278
+ }
1279
+ return new Error(data);
1280
+ }
1281
+
1040
1282
  async function* iterateAnthropicEvents(
1041
1283
  response: Response,
1042
1284
  signal?: AbortSignal,
1043
1285
  onSseEvent?: AnthropicOptions["onSseEvent"],
1044
- ): AsyncGenerator<RawMessageStreamEvent> {
1286
+ ): AsyncGenerator<AnthropicStreamEvent> {
1045
1287
  if (!response.body) {
1046
1288
  throw new Error("Attempted to iterate over an Anthropic response with no body");
1047
1289
  }
@@ -1052,7 +1294,13 @@ async function* iterateAnthropicEvents(
1052
1294
  for await (const sse of readSseEvents(response.body, signal)) {
1053
1295
  notifyRawSseEvent(onSseEvent, sse);
1054
1296
  if (sse.event === "error") {
1055
- throw new Error(sse.data);
1297
+ throw createAnthropicSseStreamError(sse.data);
1298
+ }
1299
+
1300
+ if (sse.event === "ping") {
1301
+ // Surface keepalives so the idle watchdog treats them as liveness.
1302
+ yield ANTHROPIC_PING_EVENT;
1303
+ continue;
1056
1304
  }
1057
1305
 
1058
1306
  if (!ANTHROPIC_MESSAGE_EVENTS.has(sse.event ?? "")) {
@@ -1060,7 +1308,10 @@ async function* iterateAnthropicEvents(
1060
1308
  }
1061
1309
 
1062
1310
  try {
1063
- const event = parseJsonWithRepair<RawMessageStreamEvent>(sse.data);
1311
+ const event = JSON.parse(sse.data) as RawMessageStreamEvent;
1312
+ if (event.type !== sse.event) {
1313
+ reportAnthropicEnvelopeAnomaly(`event type ${event.type} does not match SSE event ${sse.event}`);
1314
+ }
1064
1315
  if (event.type === "message_start") {
1065
1316
  sawMessageStart = true;
1066
1317
  } else if (event.type === "message_stop") {
@@ -1069,14 +1320,14 @@ async function* iterateAnthropicEvents(
1069
1320
  yield event;
1070
1321
  } catch (error) {
1071
1322
  const message = error instanceof Error ? error.message : String(error);
1072
- throw new Error(
1073
- `Could not parse Anthropic SSE event ${sse.event}: ${message}; data=${sse.data}; raw=${sse.raw.join("\\n")}`,
1323
+ reportAnthropicEnvelopeAnomaly(
1324
+ `could not parse SSE event ${sse.event}: ${message}; skipping frame; data=${sse.data}`,
1074
1325
  );
1075
1326
  }
1076
1327
  }
1077
1328
 
1078
- if (sawMessageStart && !sawMessageEnd) {
1079
- throw createAnthropicStreamEnvelopeError("stream ended before message_stop");
1329
+ if (sawMessageStart && !sawMessageEnd && !signal?.aborted) {
1330
+ reportAnthropicEnvelopeAnomaly("stream ended before message_stop");
1080
1331
  }
1081
1332
  }
1082
1333
 
@@ -1104,53 +1355,48 @@ async function getAnthropicStreamResponse(
1104
1355
  request: unknown,
1105
1356
  signal?: AbortSignal,
1106
1357
  onSseEvent?: AnthropicOptions["onSseEvent"],
1107
- ): Promise<{ events: AsyncIterable<RawMessageStreamEvent>; response: Response; requestId: string | null }> {
1358
+ ): Promise<{
1359
+ events: AsyncIterable<AnthropicStreamEvent>;
1360
+ response: Response;
1361
+ requestId: string | null;
1362
+ recordsRawSseEvents: boolean;
1363
+ }> {
1108
1364
  if (hasAnthropicRawResponseRequest(request)) {
1109
1365
  const response = await request.asResponse();
1110
1366
  return {
1111
1367
  events: iterateAnthropicEvents(response, signal, onSseEvent),
1112
1368
  response,
1113
1369
  requestId: response.headers.get("request-id"),
1370
+ recordsRawSseEvents: true,
1114
1371
  };
1115
1372
  }
1116
1373
  if (hasAnthropicStreamWithResponseRequest(request)) {
1117
1374
  const { data, response, request_id } = await request.withResponse();
1118
- return { events: data, response, requestId: request_id };
1375
+ return { events: data, response, requestId: request_id, recordsRawSseEvents: false };
1119
1376
  }
1120
1377
  throw new Error("Anthropic SDK request did not expose a stream response");
1121
1378
  }
1122
1379
 
1123
- function getAnthropicCompat(
1124
- model: Model<"anthropic-messages">,
1125
- ): Required<NonNullable<Model<"anthropic-messages">["compat"]>> {
1126
- return {
1127
- disableStrictTools: model.compat?.disableStrictTools ?? false,
1128
- disableAdaptiveThinking: model.compat?.disableAdaptiveThinking ?? false,
1129
- supportsEagerToolInputStreaming: model.compat?.supportsEagerToolInputStreaming ?? true,
1130
- supportsLongCacheRetention: model.compat?.supportsLongCacheRetention ?? true,
1131
- supportsMidConversationSystem:
1132
- model.compat?.supportsMidConversationSystem ??
1133
- // First-party Claude API only. Bedrock/Vertex/Foundry and other
1134
- // Anthropic-compatible proxies reject the role; gate auto-detection on
1135
- // the canonical api.anthropic.com host plus an Opus 4.8+ model id.
1136
- (isAnthropicApiBaseUrl(model.baseUrl) && supportsMidConversationSystemMessages(model.id)),
1137
- };
1380
+ async function* observeDecodedAnthropicSdkEvents(
1381
+ events: AsyncIterable<AnthropicStreamEvent>,
1382
+ observer: (event: RawSseEvent) => void,
1383
+ ): AsyncGenerator<AnthropicStreamEvent> {
1384
+ for await (const event of events) {
1385
+ const data = JSON.stringify(event);
1386
+ // Reconstructed from decoded SDK event; not literal wire bytes.
1387
+ notifyRawSseEvent(observer, { event: event.type, data, raw: [`event: ${event.type}`, `data: ${data}`] });
1388
+ yield event;
1389
+ }
1138
1390
  }
1139
1391
 
1140
- const PROVIDER_MAX_RETRIES = 3;
1141
- const PROVIDER_BASE_DELAY_MS = 2000;
1392
+ const PROVIDER_MAX_RETRIES = 10;
1142
1393
 
1143
- /**
1144
- * Check if an error from the Anthropic SDK is a rate-limit/transient error that
1145
- * should be retried before any content has been emitted.
1146
- *
1147
- * Includes malformed JSON stream-envelope parse errors seen from some
1148
- * Anthropic-compatible proxy endpoints.
1149
- */
1150
1394
  /** Transient stream corruption errors where the response was truncated mid-JSON. */
1151
1395
  function isTransientStreamParseError(error: unknown): boolean {
1152
1396
  if (!(error instanceof Error)) return false;
1153
- return /json parse error|unterminated string|unexpected end of json input/i.test(error.message);
1397
+ return /unterminated string|unexpected end of json input|unexpected end of data|unexpected eof|end of file|eof while parsing|truncated/i.test(
1398
+ error.message,
1399
+ );
1154
1400
  }
1155
1401
 
1156
1402
  const ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX = "Anthropic stream envelope error:";
@@ -1159,26 +1405,28 @@ function createAnthropicStreamEnvelopeError(message: string): Error {
1159
1405
  return new Error(`${ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX} ${message}`);
1160
1406
  }
1161
1407
 
1162
- const ANTHROPIC_PRE_MESSAGE_START_EVENT_TYPES = new Set([
1163
- "content_block_start",
1164
- "content_block_delta",
1165
- "content_block_stop",
1166
- "message_delta",
1167
- "message_stop",
1168
- "message_start",
1169
- ]);
1408
+ /**
1409
+ * Log a malformed-stream-envelope anomaly without aborting the turn. The strict
1410
+ * parser would `throw createAnthropicStreamEnvelopeError(...)` here; we instead
1411
+ * surface a warning and let the caller skip the offending event (or finalize what
1412
+ * already streamed) so a non-conforming endpoint degrades to best-effort content
1413
+ * rather than failing the request.
1414
+ */
1415
+ function reportAnthropicEnvelopeAnomaly(detail: string): void {
1416
+ logger.warn(`anthropic: ignoring malformed stream envelope: ${detail}`);
1417
+ }
1170
1418
 
1171
1419
  function shouldIgnoreAnthropicPreambleEvent(eventType: unknown): boolean {
1172
1420
  if (typeof eventType !== "string") return false;
1173
1421
  if (eventType === "ping") return true;
1174
- return !ANTHROPIC_PRE_MESSAGE_START_EVENT_TYPES.has(eventType);
1422
+ return !ANTHROPIC_MESSAGE_EVENTS.has(eventType);
1175
1423
  }
1176
1424
 
1177
1425
  function isTransientStreamEnvelopeError(error: unknown): boolean {
1178
1426
  if (!(error instanceof Error)) return false;
1179
1427
  return (
1180
1428
  error.message.includes(ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX) ||
1181
- /stream event order|before message_start|before terminal stop signal/i.test(error.message)
1429
+ /stream event order|before message_start/i.test(error.message)
1182
1430
  );
1183
1431
  }
1184
1432
 
@@ -1190,6 +1438,16 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
1190
1438
  export function isProviderRetryableError(error: unknown, provider?: string): boolean {
1191
1439
  if (!(error instanceof Error)) return false;
1192
1440
  if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
1441
+ // Account-level usage/quota limits ("usage_limit_reached", "exceed your
1442
+ // account's rate limit", "quota exceeded") are persistent — the server
1443
+ // parks the credential for minutes-to-hours (see the long `retry-after`).
1444
+ // Retrying the same key with the provider's seconds-scale backoff never
1445
+ // helps; these are owned by the credential-rotation layer (auth-gateway /
1446
+ // `streamSimple` a/b/c policy), so surface them immediately instead of
1447
+ // burning the retry budget here.
1448
+ if (isUsageLimitError(error.message)) return false;
1449
+ const status = extractHttpStatusFromError(error);
1450
+ if (status !== undefined && status >= 400 && status < 500 && status !== 408 && status !== 429) return false;
1193
1451
  const msg = error.message.toLowerCase();
1194
1452
  if (
1195
1453
  isUnexpectedSocketCloseMessage(msg) ||
@@ -1223,13 +1481,12 @@ export type AnthropicUsageLike = {
1223
1481
 
1224
1482
  /**
1225
1483
  * Capture Anthropic's optional cache-creation TTL breakdown and server-tool-use
1226
- * counters into the harness Usage shape. Only sets fields that were reported, so
1227
- * a `message_delta` that omits `cache_creation` does not clobber the breakdown
1228
- * established at `message_start`.
1484
+ * counters into the harness Usage shape. Omitted/null fields are no-ops; explicit
1485
+ * zero-valued objects clear prior extras from earlier stream usage snapshots.
1229
1486
  */
1230
1487
  export function applyAnthropicUsageExtras(usage: Usage, source: AnthropicUsageLike): void {
1231
1488
  const cacheCreation = source.cache_creation;
1232
- if (cacheCreation) {
1489
+ if (cacheCreation != null) {
1233
1490
  const fiveMinute = cacheCreation.ephemeral_5m_input_tokens ?? 0;
1234
1491
  const oneHour = cacheCreation.ephemeral_1h_input_tokens ?? 0;
1235
1492
  if (fiveMinute > 0 || oneHour > 0) {
@@ -1237,10 +1494,12 @@ export function applyAnthropicUsageExtras(usage: Usage, source: AnthropicUsageLi
1237
1494
  ...(fiveMinute > 0 ? { ephemeral5m: fiveMinute } : {}),
1238
1495
  ...(oneHour > 0 ? { ephemeral1h: oneHour } : {}),
1239
1496
  };
1497
+ } else {
1498
+ delete usage.cttl;
1240
1499
  }
1241
1500
  }
1242
1501
  const serverToolUse = source.server_tool_use;
1243
- if (serverToolUse) {
1502
+ if (serverToolUse != null) {
1244
1503
  const webSearch = serverToolUse.web_search_requests ?? 0;
1245
1504
  const webFetch = serverToolUse.web_fetch_requests ?? 0;
1246
1505
  if (webSearch > 0 || webFetch > 0) {
@@ -1248,6 +1507,8 @@ export function applyAnthropicUsageExtras(usage: Usage, source: AnthropicUsageLi
1248
1507
  ...(webSearch > 0 ? { webSearch } : {}),
1249
1508
  ...(webFetch > 0 ? { webFetch } : {}),
1250
1509
  };
1510
+ } else {
1511
+ delete usage.server;
1251
1512
  }
1252
1513
  }
1253
1514
  }
@@ -1263,30 +1524,50 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1263
1524
  const startTime = Date.now();
1264
1525
  let firstTokenTime: number | undefined;
1265
1526
 
1266
- const copilotDynamicHeaders =
1267
- model.provider === "github-copilot"
1268
- ? buildCopilotDynamicHeaders({
1269
- messages: context.messages,
1270
- hasImages: hasCopilotVisionInput(context.messages),
1271
- premiumMultiplier: model.premiumMultiplier,
1272
- headers: { ...(model.headers ?? {}), ...(options?.headers ?? {}) },
1273
- initiatorOverride: options?.initiatorOverride,
1274
- })
1275
- : undefined;
1276
1527
  const output: AssistantMessage = {
1277
1528
  role: "assistant",
1278
1529
  content: [],
1279
1530
  api: model.api as Api,
1280
1531
  provider: model.provider,
1281
1532
  model: model.id,
1282
- usage: createEmptyUsage(copilotDynamicHeaders?.premiumRequests),
1533
+ usage: createEmptyUsage(),
1283
1534
  stopReason: "stop",
1284
1535
  timestamp: Date.now(),
1285
1536
  };
1286
1537
  let rawRequestDump: RawHttpRequestDump | undefined;
1287
1538
  let activeAbortTracker = createAbortSourceTracker(options?.signal);
1288
1539
 
1540
+ const onSseEvent = options?.onSseEvent;
1541
+ const rawSseObserver = onSseEvent ? (event: RawSseEvent) => onSseEvent(event, model) : undefined;
1542
+
1289
1543
  try {
1544
+ // Built inside the try so a copilot credential/header failure surfaces as
1545
+ // an error event instead of an unhandled rejection that leaves the stream
1546
+ // (and any consumer awaiting `result()`) hanging forever.
1547
+ const copilotDynamicHeaders =
1548
+ model.provider === "github-copilot"
1549
+ ? buildCopilotDynamicHeaders({
1550
+ messages: context.messages,
1551
+ hasImages: hasCopilotVisionInput(context.messages),
1552
+ premiumMultiplier: model.premiumMultiplier,
1553
+ headers: { ...(model.headers ?? {}), ...(options?.headers ?? {}) },
1554
+ initiatorOverride: options?.initiatorOverride,
1555
+ })
1556
+ : undefined;
1557
+ if (copilotDynamicHeaders?.premiumRequests !== undefined) {
1558
+ output.usage.premiumRequests = copilotDynamicHeaders.premiumRequests;
1559
+ }
1560
+ const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
1561
+ const baseUrl = resolveAnthropicBaseUrl(model, apiKey) ?? "https://api.anthropic.com";
1562
+ const providerSessionState = getAnthropicProviderSessionState(
1563
+ options?.providerSessionState,
1564
+ baseUrl,
1565
+ model.id,
1566
+ );
1567
+ let disableStrictTools =
1568
+ (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
1569
+ let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
1570
+
1290
1571
  let client: AnthropicMessagesClientLike;
1291
1572
  let isOAuthToken: boolean;
1292
1573
 
@@ -1294,19 +1575,38 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1294
1575
  client = options.client;
1295
1576
  isOAuthToken = false;
1296
1577
  } else {
1297
- const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
1298
-
1299
1578
  const extraBetas = normalizeExtraBetas(options?.betas);
1300
1579
  const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
1301
- if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
1580
+ // Skip the fast-mode beta when this session already learned the
1581
+ // endpoint+model rejects fast mode; `speed` is dropped from the params
1582
+ // too (dropFastMode), so the request stays a faithful non-fast request.
1583
+ if (wantsAnthropicPriority && !dropFastMode && !extraBetas.includes(fastModeBeta)) {
1302
1584
  extraBetas.push(fastModeBeta);
1303
1585
  }
1304
1586
  if (options?.taskBudget && !extraBetas.includes(taskBudgetBeta)) {
1305
1587
  extraBetas.push(taskBudgetBeta);
1306
1588
  }
1307
- if (options?.thinkingEnabled && model.reasoning && !extraBetas.includes(effortBeta)) {
1589
+ // `output_config.effort` ships on thinking-on requests AND on the
1590
+ // thinking-off adaptive pin (adaptive-only models get effort:"low" so
1591
+ // the toggle cannot 400); the beta must accompany the field in both.
1592
+ const sendsAdaptiveEffortPin =
1593
+ options?.thinkingEnabled === false &&
1594
+ model.thinking?.mode === "anthropic-adaptive" &&
1595
+ !model.compat.disableAdaptiveThinking;
1596
+ if (
1597
+ model.reasoning &&
1598
+ (options?.thinkingEnabled || sendsAdaptiveEffortPin) &&
1599
+ !extraBetas.includes(effortBeta)
1600
+ ) {
1308
1601
  extraBetas.push(effortBeta);
1309
1602
  }
1603
+ if (model.compat.supportsMidConversationSystem && !extraBetas.includes(midConversationSystemBeta)) {
1604
+ // convertAnthropicMessages may upgrade developer turns to the
1605
+ // mid-conversation `system` role on these models; API-key requests
1606
+ // need the beta alongside the role (OAuth agent requests already
1607
+ // carry it in the Claude Code list).
1608
+ extraBetas.push(midConversationSystemBeta);
1609
+ }
1310
1610
 
1311
1611
  const created = createClient(model, {
1312
1612
  model,
@@ -1320,24 +1620,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1320
1620
  hasTools: !!context.tools?.length,
1321
1621
  thinkingEnabled: options?.thinkingEnabled,
1322
1622
  thinkingDisplay: options?.thinkingDisplay,
1323
- onSseEvent: options?.onSseEvent,
1324
1623
  fetch: options?.fetch,
1325
1624
  claudeCodeSessionId: options?.sessionId ?? extractClaudeMetadataSessionId(options?.metadata?.user_id),
1326
1625
  });
1327
1626
  client = created.client;
1328
1627
  isOAuthToken = created.isOAuthToken;
1329
1628
  }
1330
- const baseUrl =
1331
- resolveAnthropicBaseUrl(model, options?.apiKey ?? getEnvApiKey(model.provider) ?? "") ??
1332
- "https://api.anthropic.com";
1333
- const providerSessionState = getAnthropicProviderSessionState(options?.providerSessionState);
1334
- let disableStrictTools =
1335
- (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
1336
- let strictFallbackErrorMessage: string | undefined;
1337
- let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
1338
1629
  const preparedContext = await prepareAnthropicManyImageContext(context, model.input.includes("image"));
1339
1630
  const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
1340
- let nextParams = buildParams(model, baseUrl, preparedContext, isOAuthToken, options, disableStrictTools);
1631
+ let nextParams = buildParams(model, preparedContext, isOAuthToken, options, disableStrictTools);
1341
1632
  if (disableStrictTools) {
1342
1633
  dropAnthropicStrictTools(nextParams);
1343
1634
  }
@@ -1348,6 +1639,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1348
1639
  if (replacementPayload !== undefined) {
1349
1640
  nextParams = replacementPayload as typeof nextParams;
1350
1641
  }
1642
+ nextParams = toWellFormedDeep(nextParams) as typeof nextParams;
1351
1643
  rawRequestDump = {
1352
1644
  provider: model.provider,
1353
1645
  api: output.api,
@@ -1371,6 +1663,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1371
1663
  const requestTimeoutMs =
1372
1664
  firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
1373
1665
  const blocks = output.content as Block[];
1666
+ const finalizeStreamBlock = (block: Block, contentIndex: number): void => {
1667
+ delete (block as { index?: number }).index;
1668
+ if (block.type === "text") {
1669
+ stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
1670
+ } else if (block.type === "thinking") {
1671
+ stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
1672
+ } else if (block.type === "toolCall") {
1673
+ const finalJson =
1674
+ block.partialJson.length > 0 ? block.partialJson : JSON.stringify(block.arguments ?? {});
1675
+ try {
1676
+ block.arguments = JSON.parse(finalJson) as ToolCall["arguments"];
1677
+ } catch (parseError) {
1678
+ // Non-fatal: keep the best-effort arguments recovered by the throttled streaming
1679
+ // parser instead of failing the turn on malformed/truncated tool-argument JSON.
1680
+ reportAnthropicEnvelopeAnomaly(
1681
+ `tool_use ${block.id} arguments are not valid JSON: ${parseError instanceof Error ? parseError.message : String(parseError)}`,
1682
+ );
1683
+ block.arguments = (block.arguments ?? {}) as ToolCall["arguments"];
1684
+ }
1685
+ delete (block as { partialJson?: string }).partialJson;
1686
+ delete (block as { lastParseLen?: number }).lastParseLen;
1687
+ stream.push({ type: "toolcall_end", contentIndex, toolCall: block, partial: output });
1688
+ }
1689
+ };
1374
1690
  stream.push({ type: "start", partial: output });
1375
1691
  // Retry loop for transient errors from the stream.
1376
1692
  // Provider-level transport/rate-limit failures: only before any streamed content starts.
@@ -1381,7 +1697,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1381
1697
  while (true) {
1382
1698
  activeAbortTracker = createAbortSourceTracker(options?.signal);
1383
1699
  const { requestSignal } = activeAbortTracker;
1384
- const requestOptions = createSdkStreamRequestOptions(requestSignal, requestTimeoutMs);
1700
+ // The provider loop owns retries: pin the client's internal retry loop
1701
+ // to zero even when no watchdog timeout is configured (the helper only
1702
+ // pins it alongside a timeout; a client retry budget of 5 would otherwise
1703
+ // multiply with PROVIDER_MAX_RETRIES into up to 66 wire attempts).
1704
+ const requestOptions = { ...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs), maxRetries: 0 };
1385
1705
  const anthropicRequest: unknown =
1386
1706
  isOAuthToken && client.beta
1387
1707
  ? client.beta.messages.create({ ...params, stream: true }, requestOptions)
@@ -1396,19 +1716,17 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1396
1716
  requestTimeoutMs,
1397
1717
  );
1398
1718
  }
1399
- let anthropicStream: AsyncIterable<RawMessageStreamEvent>;
1719
+ let anthropicStream: AsyncIterable<AnthropicStreamEvent>;
1400
1720
  let response: Response;
1401
1721
  let requestId: string | null;
1722
+ let recordsRawSseEvents: boolean;
1402
1723
  try {
1403
1724
  ({
1404
1725
  events: anthropicStream,
1405
1726
  response,
1406
1727
  requestId,
1407
- } = await getAnthropicStreamResponse(
1408
- anthropicRequest,
1409
- requestSignal,
1410
- options?.client ? event => options?.onSseEvent?.(event, model) : undefined,
1411
- ));
1728
+ recordsRawSseEvents,
1729
+ } = await getAnthropicStreamResponse(anthropicRequest, requestSignal, rawSseObserver));
1412
1730
  } catch (error) {
1413
1731
  if (error instanceof AnthropicConnectionTimeoutError && !activeAbortTracker.wasCallerAbort()) {
1414
1732
  throw firstEventTimeoutAbortError;
@@ -1421,8 +1739,23 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1421
1739
  let sawEvent = false;
1422
1740
  let sawMessageStart = false;
1423
1741
  let sawTerminalEnvelope = false;
1424
-
1425
- for await (const event of iterateWithIdleTimeout(anthropicStream, {
1742
+ let sawMessageStop = false;
1743
+ // Set when a duplicate message_start splices a second envelope onto
1744
+ // the stream; closed indexes then refuse to reopen so replayed
1745
+ // content cannot duplicate (see content_block_start guard).
1746
+ let sawSplicedEnvelope = false;
1747
+ const closedBlockIndexes = new Set<number>();
1748
+ const openBlocks = new Map<
1749
+ number,
1750
+ { contentIndex: number; kind: "text" | "thinking" | "redactedThinking" | "toolCall" | "ignored" }
1751
+ >();
1752
+
1753
+ // Pings keep the idle deadline alive once content is flowing, but a
1754
+ // ping before message_start must not consume the first-event watchdog:
1755
+ // it would flip the (retryable) pre-content stall classification into
1756
+ // a terminal mid-stream idle timeout.
1757
+ let sawNonPingEvent = false;
1758
+ const timedAnthropicStream = iterateWithIdleTimeout(anthropicStream, {
1426
1759
  idleTimeoutMs,
1427
1760
  firstItemTimeoutMs: firstEventTimeoutMs,
1428
1761
  errorMessage: idleTimeoutAbortError.message,
@@ -1430,23 +1763,45 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1430
1763
  onIdle: () => activeAbortTracker.abortLocally(idleTimeoutAbortError),
1431
1764
  onFirstItemTimeout: () => activeAbortTracker.abortLocally(firstEventTimeoutAbortError),
1432
1765
  abortSignal: options?.signal,
1433
- })) {
1766
+ isProgressItem: item => {
1767
+ if ((item as AnthropicStreamEvent).type === "ping") return sawNonPingEvent;
1768
+ sawNonPingEvent = true;
1769
+ return true;
1770
+ },
1771
+ });
1772
+ const observedAnthropicStream =
1773
+ rawSseObserver && !recordsRawSseEvents
1774
+ ? observeDecodedAnthropicSdkEvents(timedAnthropicStream, rawSseObserver)
1775
+ : timedAnthropicStream;
1776
+ for await (const event of observedAnthropicStream) {
1434
1777
  sawEvent = true;
1435
1778
 
1436
1779
  if (event.type === "message_start") {
1437
1780
  if (sawMessageStart) {
1781
+ // Transparent reconnects can splice a fresh envelope onto the same
1782
+ // stream; keep the original message but surface the anomaly. Events
1783
+ // for blocks still open from the first envelope continue to apply,
1784
+ // but replayed blocks are dropped below (see closedBlockIndexes).
1785
+ reportAnthropicEnvelopeAnomaly("duplicate message_start event");
1786
+ sawSplicedEnvelope = true;
1438
1787
  continue;
1439
1788
  }
1440
1789
  sawMessageStart = true;
1441
- applyAnthropicUsageExtras(output.usage, event.message.usage);
1442
- output.responseId = event.message.id;
1443
- output.usage.input = event.message.usage.input_tokens || 0;
1444
- output.usage.output = event.message.usage.output_tokens || 0;
1445
- output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
1446
- output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
1447
- output.usage.totalTokens =
1448
- output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
1449
- calculateCost(model, output.usage);
1790
+ const startMessage = event.message;
1791
+ if (startMessage?.id) output.responseId = startMessage.id;
1792
+ const startUsage = startMessage?.usage;
1793
+ if (startUsage) {
1794
+ applyAnthropicUsageExtras(output.usage, startUsage);
1795
+ output.usage.input = startUsage.input_tokens || 0;
1796
+ output.usage.output = startUsage.output_tokens || 0;
1797
+ output.usage.cacheRead = startUsage.cache_read_input_tokens || 0;
1798
+ output.usage.cacheWrite = startUsage.cache_creation_input_tokens || 0;
1799
+ output.usage.totalTokens =
1800
+ output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
1801
+ calculateCost(model, output.usage);
1802
+ } else {
1803
+ reportAnthropicEnvelopeAnomaly("message_start missing usage");
1804
+ }
1450
1805
  continue;
1451
1806
  }
1452
1807
 
@@ -1458,6 +1813,28 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1458
1813
  }
1459
1814
 
1460
1815
  if (event.type === "content_block_start") {
1816
+ if (sawTerminalEnvelope) {
1817
+ reportAnthropicEnvelopeAnomaly(`received ${event.type} after terminal stop signal`);
1818
+ continue;
1819
+ }
1820
+ if (openBlocks.has(event.index)) {
1821
+ reportAnthropicEnvelopeAnomaly(`duplicate content_block_start index ${event.index}`);
1822
+ continue;
1823
+ }
1824
+ if (sawSplicedEnvelope && closedBlockIndexes.has(event.index)) {
1825
+ // A spliced envelope replaying an index this stream already
1826
+ // completed would append duplicate text/tool calls; consume its
1827
+ // events silently instead.
1828
+ reportAnthropicEnvelopeAnomaly(
1829
+ `replayed content_block_start index ${event.index} after duplicate message_start`,
1830
+ );
1831
+ openBlocks.set(event.index, { contentIndex: -1, kind: "ignored" });
1832
+ continue;
1833
+ }
1834
+ if (!event.content_block?.type) {
1835
+ reportAnthropicEnvelopeAnomaly("content_block_start missing content_block payload");
1836
+ continue;
1837
+ }
1461
1838
  if (!firstTokenTime) firstTokenTime = Date.now();
1462
1839
  if (event.content_block.type === "text") {
1463
1840
  streamedReplayUnsafeContent = true;
@@ -1467,12 +1844,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1467
1844
  index: event.index,
1468
1845
  };
1469
1846
  output.content.push(block);
1847
+ const contentIndex = output.content.length - 1;
1848
+ openBlocks.set(event.index, { contentIndex, kind: "text" });
1470
1849
  stream.push({
1471
1850
  type: "text_start",
1472
- contentIndex: output.content.length - 1,
1851
+ contentIndex,
1473
1852
  partial: output,
1474
1853
  });
1475
1854
  } else if (event.content_block.type === "thinking") {
1855
+ streamedReplayUnsafeContent = true;
1476
1856
  const block: Block = {
1477
1857
  type: "thinking",
1478
1858
  thinking: "",
@@ -1480,18 +1860,25 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1480
1860
  index: event.index,
1481
1861
  };
1482
1862
  output.content.push(block);
1863
+ const contentIndex = output.content.length - 1;
1864
+ openBlocks.set(event.index, { contentIndex, kind: "thinking" });
1483
1865
  stream.push({
1484
1866
  type: "thinking_start",
1485
- contentIndex: output.content.length - 1,
1867
+ contentIndex,
1486
1868
  partial: output,
1487
1869
  });
1488
1870
  } else if (event.content_block.type === "redacted_thinking") {
1871
+ streamedReplayUnsafeContent = true;
1489
1872
  const block: Block = {
1490
1873
  type: "redactedThinking",
1491
1874
  data: event.content_block.data,
1492
1875
  index: event.index,
1493
1876
  };
1494
1877
  output.content.push(block);
1878
+ openBlocks.set(event.index, {
1879
+ contentIndex: output.content.length - 1,
1880
+ kind: "redactedThinking",
1881
+ });
1495
1882
  } else if (event.content_block.type === "tool_use") {
1496
1883
  streamedReplayUnsafeContent = true;
1497
1884
  const block: Block = {
@@ -1505,134 +1892,165 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1505
1892
  index: event.index,
1506
1893
  };
1507
1894
  output.content.push(block);
1895
+ const contentIndex = output.content.length - 1;
1896
+ openBlocks.set(event.index, { contentIndex, kind: "toolCall" });
1508
1897
  stream.push({
1509
1898
  type: "toolcall_start",
1510
- contentIndex: output.content.length - 1,
1899
+ contentIndex,
1511
1900
  partial: output,
1512
1901
  });
1902
+ } else {
1903
+ openBlocks.set(event.index, { contentIndex: -1, kind: "ignored" });
1513
1904
  }
1514
1905
  } else if (event.type === "content_block_delta") {
1906
+ if (sawTerminalEnvelope) {
1907
+ reportAnthropicEnvelopeAnomaly(`received ${event.type} after terminal stop signal`);
1908
+ continue;
1909
+ }
1910
+ const openBlock = openBlocks.get(event.index);
1911
+ if (!openBlock) {
1912
+ reportAnthropicEnvelopeAnomaly(
1913
+ `received content_block_delta for unopened index ${event.index}`,
1914
+ );
1915
+ continue;
1916
+ }
1917
+ if (openBlock.kind === "ignored") continue;
1918
+ if (!event.delta?.type) {
1919
+ reportAnthropicEnvelopeAnomaly("content_block_delta missing delta payload");
1920
+ continue;
1921
+ }
1922
+ const block = blocks[openBlock.contentIndex];
1515
1923
  if (event.delta.type === "text_delta") {
1516
- const index = blocks.findIndex(b => b.index === event.index);
1517
- const block = blocks[index];
1518
- if (block && block.type === "text") {
1519
- block.text += event.delta.text;
1520
- stream.push({
1521
- type: "text_delta",
1522
- contentIndex: index,
1523
- delta: event.delta.text,
1524
- partial: output,
1525
- });
1924
+ if (openBlock.kind !== "text" || block?.type !== "text") {
1925
+ reportAnthropicEnvelopeAnomaly(`received text_delta for ${openBlock.kind} block`);
1926
+ continue;
1526
1927
  }
1928
+ streamedReplayUnsafeContent = true;
1929
+ block.text += event.delta.text;
1930
+ stream.push({
1931
+ type: "text_delta",
1932
+ contentIndex: openBlock.contentIndex,
1933
+ delta: event.delta.text,
1934
+ partial: output,
1935
+ });
1527
1936
  } else if (event.delta.type === "thinking_delta") {
1528
- const index = blocks.findIndex(b => b.index === event.index);
1529
- const block = blocks[index];
1530
- if (block && block.type === "thinking") {
1531
- block.thinking += event.delta.thinking;
1532
- stream.push({
1533
- type: "thinking_delta",
1534
- contentIndex: index,
1535
- delta: event.delta.thinking,
1536
- partial: output,
1537
- });
1937
+ if (openBlock.kind !== "thinking" || block?.type !== "thinking") {
1938
+ reportAnthropicEnvelopeAnomaly(`received thinking_delta for ${openBlock.kind} block`);
1939
+ continue;
1538
1940
  }
1941
+ streamedReplayUnsafeContent = true;
1942
+ block.thinking += event.delta.thinking;
1943
+ stream.push({
1944
+ type: "thinking_delta",
1945
+ contentIndex: openBlock.contentIndex,
1946
+ delta: event.delta.thinking,
1947
+ partial: output,
1948
+ });
1539
1949
  } else if (event.delta.type === "input_json_delta") {
1540
- const index = blocks.findIndex(b => b.index === event.index);
1541
- const block = blocks[index];
1542
- if (block && block.type === "toolCall") {
1543
- block.partialJson += event.delta.partial_json;
1544
- const throttled = parseStreamingJsonThrottled(block.partialJson, block.lastParseLen ?? 0);
1545
- if (throttled) {
1546
- block.arguments = throttled.value;
1547
- block.lastParseLen = throttled.parsedLen;
1548
- }
1549
- stream.push({
1550
- type: "toolcall_delta",
1551
- contentIndex: index,
1552
- delta: event.delta.partial_json,
1553
- partial: output,
1554
- });
1950
+ if (openBlock.kind !== "toolCall" || block?.type !== "toolCall") {
1951
+ reportAnthropicEnvelopeAnomaly(`received input_json_delta for ${openBlock.kind} block`);
1952
+ continue;
1953
+ }
1954
+ streamedReplayUnsafeContent = true;
1955
+ block.partialJson += event.delta.partial_json;
1956
+ const throttled = parseStreamingJsonThrottled(block.partialJson, block.lastParseLen ?? 0);
1957
+ if (throttled) {
1958
+ block.arguments = throttled.value;
1959
+ block.lastParseLen = throttled.parsedLen;
1555
1960
  }
1961
+ stream.push({
1962
+ type: "toolcall_delta",
1963
+ contentIndex: openBlock.contentIndex,
1964
+ delta: event.delta.partial_json,
1965
+ partial: output,
1966
+ });
1556
1967
  } else if (event.delta.type === "signature_delta") {
1557
- const index = blocks.findIndex(b => b.index === event.index);
1558
- const block = blocks[index];
1559
- if (block && block.type === "thinking") {
1560
- block.thinkingSignature = block.thinkingSignature || "";
1561
- block.thinkingSignature += event.delta.signature;
1968
+ if (openBlock.kind !== "thinking" || block?.type !== "thinking") {
1969
+ reportAnthropicEnvelopeAnomaly(`received signature_delta for ${openBlock.kind} block`);
1970
+ continue;
1562
1971
  }
1972
+ streamedReplayUnsafeContent = true;
1973
+ block.thinkingSignature = block.thinkingSignature || "";
1974
+ block.thinkingSignature += event.delta.signature;
1563
1975
  }
1564
1976
  } else if (event.type === "content_block_stop") {
1565
- const index = blocks.findIndex(b => b.index === event.index);
1566
- const block = blocks[index];
1567
- if (block) {
1568
- delete (block as { index?: number }).index;
1569
- if (block.type === "text") {
1570
- stream.push({
1571
- type: "text_end",
1572
- contentIndex: index,
1573
- content: block.text,
1574
- partial: output,
1575
- });
1576
- } else if (block.type === "thinking") {
1577
- stream.push({
1578
- type: "thinking_end",
1579
- contentIndex: index,
1580
- content: block.thinking,
1581
- partial: output,
1582
- });
1583
- } else if (block.type === "toolCall") {
1584
- block.arguments = parseStreamingJson(block.partialJson);
1585
- delete (block as { partialJson?: string }).partialJson;
1586
- delete (block as { lastParseLen?: number }).lastParseLen;
1587
- stream.push({
1588
- type: "toolcall_end",
1589
- contentIndex: index,
1590
- toolCall: block,
1591
- partial: output,
1592
- });
1593
- }
1977
+ if (sawTerminalEnvelope) {
1978
+ reportAnthropicEnvelopeAnomaly(`received ${event.type} after terminal stop signal`);
1979
+ continue;
1980
+ }
1981
+ const openBlock = openBlocks.get(event.index);
1982
+ if (!openBlock) {
1983
+ reportAnthropicEnvelopeAnomaly(`received content_block_stop for unopened index ${event.index}`);
1984
+ continue;
1985
+ }
1986
+ if (openBlock.kind === "ignored") {
1987
+ openBlocks.delete(event.index);
1988
+ continue;
1989
+ }
1990
+ const block = blocks[openBlock.contentIndex];
1991
+ if (!block || block.type !== openBlock.kind) {
1992
+ reportAnthropicEnvelopeAnomaly(`content_block_stop kind mismatch for index ${event.index}`);
1993
+ openBlocks.delete(event.index);
1994
+ continue;
1594
1995
  }
1996
+ openBlocks.delete(event.index);
1997
+ closedBlockIndexes.add(event.index);
1998
+ finalizeStreamBlock(block, openBlock.contentIndex);
1595
1999
  } else if (event.type === "message_delta") {
1596
- const rawStopReason = event.delta.stop_reason;
2000
+ if (sawTerminalEnvelope) {
2001
+ // A spliced reconnect's second envelope must not overwrite the
2002
+ // completed message's stop reason or usage.
2003
+ reportAnthropicEnvelopeAnomaly("received message_delta after terminal stop signal");
2004
+ continue;
2005
+ }
2006
+ const delta = event.delta;
2007
+ const rawStopReason = delta?.stop_reason;
1597
2008
  if (rawStopReason) {
1598
2009
  output.stopReason = mapStopReason(rawStopReason);
1599
2010
  sawTerminalEnvelope = true;
1600
2011
  }
1601
- const stopDetails = event.delta.stop_details;
1602
- if (stopDetails && stopDetails.type === "refusal") {
1603
- const explanation = stopDetails.explanation?.trim();
1604
- const category = stopDetails.category;
1605
- const label = category ? `Refusal (${category})` : "Refusal";
1606
- output.errorMessage = explanation ? `${label}: ${explanation}` : label;
1607
- } else if (output.stopReason === "error" && !output.errorMessage) {
1608
- // Anthropic flagged an error-class stop (refusal / sensitive) without
1609
- // populating stop_details. Surface the raw reason instead of falling
1610
- // through to the generic "unknown error" string when we throw below.
1611
- output.errorMessage =
1612
- rawStopReason === "refusal"
1613
- ? "Refusal (no details provided)"
1614
- : rawStopReason === "sensitive"
1615
- ? "Content flagged by safety filters"
1616
- : `Anthropic stream ended with stop_reason: ${rawStopReason ?? "unknown"}`;
1617
- }
1618
- if (event.usage.input_tokens != null) {
1619
- output.usage.input = event.usage.input_tokens;
1620
- }
1621
- if (event.usage.output_tokens != null) {
1622
- output.usage.output = event.usage.output_tokens;
1623
- }
1624
- if (event.usage.cache_read_input_tokens != null) {
1625
- output.usage.cacheRead = event.usage.cache_read_input_tokens;
2012
+ if (output.stopReason === "error") {
2013
+ const stopDetails = delta?.stop_details;
2014
+ output.stopDetails = stopDetails ?? (rawStopReason ? { type: rawStopReason } : null);
2015
+ if (stopDetails?.type === "refusal") {
2016
+ const explanation = stopDetails.explanation?.trim();
2017
+ const category = stopDetails.category;
2018
+ const label = category ? `Refusal (${category})` : "Refusal";
2019
+ output.errorMessage = explanation ? `${label}: ${explanation}` : label;
2020
+ } else if (!output.errorMessage) {
2021
+ // Anthropic flagged an error-class stop (refusal / sensitive) without
2022
+ // populating stop_details. Surface the raw reason instead of falling
2023
+ // through to the generic "unknown error" string when we throw below.
2024
+ output.errorMessage =
2025
+ rawStopReason === "refusal"
2026
+ ? "Refusal (no details provided)"
2027
+ : rawStopReason === "sensitive"
2028
+ ? "Content flagged by safety filters"
2029
+ : `Anthropic stream ended with stop_reason: ${rawStopReason ?? "unknown"}`;
2030
+ }
1626
2031
  }
1627
- if (event.usage.cache_creation_input_tokens != null) {
1628
- output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
2032
+ const deltaUsage = event.usage;
2033
+ if (deltaUsage) {
2034
+ if (deltaUsage.input_tokens != null) {
2035
+ output.usage.input = deltaUsage.input_tokens;
2036
+ }
2037
+ if (deltaUsage.output_tokens != null) {
2038
+ output.usage.output = deltaUsage.output_tokens;
2039
+ }
2040
+ if (deltaUsage.cache_read_input_tokens != null) {
2041
+ output.usage.cacheRead = deltaUsage.cache_read_input_tokens;
2042
+ }
2043
+ if (deltaUsage.cache_creation_input_tokens != null) {
2044
+ output.usage.cacheWrite = deltaUsage.cache_creation_input_tokens;
2045
+ }
2046
+ applyAnthropicUsageExtras(output.usage, deltaUsage);
2047
+ output.usage.totalTokens =
2048
+ output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
2049
+ calculateCost(model, output.usage);
1629
2050
  }
1630
- applyAnthropicUsageExtras(output.usage, event.usage);
1631
- output.usage.totalTokens =
1632
- output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
1633
- calculateCost(model, output.usage);
1634
2051
  } else if (event.type === "message_stop") {
1635
2052
  sawTerminalEnvelope = true;
2053
+ sawMessageStop = true;
1636
2054
  }
1637
2055
  }
1638
2056
 
@@ -1646,8 +2064,19 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1646
2064
  if (!sawEvent || !sawMessageStart) {
1647
2065
  throw createAnthropicStreamEnvelopeError("stream ended before message_start");
1648
2066
  }
1649
- if (!sawTerminalEnvelope) {
1650
- throw createAnthropicStreamEnvelopeError("stream ended before terminal stop signal");
2067
+ if (!sawMessageStop) {
2068
+ reportAnthropicEnvelopeAnomaly("stream ended before message_stop");
2069
+ }
2070
+ if (openBlocks.size > 0) {
2071
+ for (const [openIndex, openBlock] of openBlocks) {
2072
+ reportAnthropicEnvelopeAnomaly(
2073
+ `stream ended with an unterminated ${openBlock.kind} block at index ${openIndex}`,
2074
+ );
2075
+ if (openBlock.kind === "ignored" || openBlock.contentIndex < 0) continue;
2076
+ const danglingBlock = blocks[openBlock.contentIndex];
2077
+ if (danglingBlock) finalizeStreamBlock(danglingBlock, openBlock.contentIndex);
2078
+ }
2079
+ openBlocks.clear();
1651
2080
  }
1652
2081
 
1653
2082
  if (output.stopReason === "aborted" || output.stopReason === "error") {
@@ -1662,8 +2091,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1662
2091
  hasStrictAnthropicTools(params) &&
1663
2092
  isAnthropicStrictGrammarTooLargeError(streamFailure)
1664
2093
  ) {
1665
- strictFallbackErrorMessage = await finalizeErrorMessage(streamFailure, rawRequestDump);
1666
- output.errorMessage = strictFallbackErrorMessage;
2094
+ // Log-only: the retried turn must not carry an errorMessage on
2095
+ // success (consumers treat its presence as failure).
2096
+ logger.warn("anthropic: strict tool grammar rejected, retrying without strict tools", {
2097
+ model: model.id,
2098
+ error: await finalizeErrorMessage(streamFailure, rawRequestDump),
2099
+ });
1667
2100
  if (providerSessionState) {
1668
2101
  providerSessionState.strictToolsDisabled = true;
1669
2102
  }
@@ -1672,6 +2105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1672
2105
  providerRetryAttempt = 0;
1673
2106
  output.content.length = 0;
1674
2107
  output.responseId = undefined;
2108
+ output.errorMessage = undefined;
1675
2109
  output.providerPayload = undefined;
1676
2110
  output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1677
2111
  output.stopReason = "stop";
@@ -1696,6 +2130,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1696
2130
  providerRetryAttempt = 0;
1697
2131
  output.content.length = 0;
1698
2132
  output.responseId = undefined;
2133
+ output.errorMessage = undefined;
1699
2134
  output.providerPayload = undefined;
1700
2135
  output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1701
2136
  output.stopReason = "stop";
@@ -1721,7 +2156,13 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1721
2156
  throw streamFailure;
1722
2157
  }
1723
2158
  providerRetryAttempt++;
1724
- const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
2159
+ const backoffDelayMs = calculateAnthropicRetryDelayMs(providerRetryAttempt - 1);
2160
+ // Honor the server's retry hint (`retry-after-ms`/`retry-after`) on
2161
+ // 429/529-style failures: retrying sooner than the server asked is a
2162
+ // guaranteed failure that just burns the retry budget.
2163
+ const headerDelayMs =
2164
+ streamFailure instanceof AnthropicApiError ? retryDelayFromHeaders(streamFailure.headers) : undefined;
2165
+ const delayMs = headerDelayMs !== undefined ? Math.max(headerDelayMs, backoffDelayMs) : backoffDelayMs;
1725
2166
  if (options?.providerRetryWait) {
1726
2167
  await options.providerRetryWait(delayMs, options.signal);
1727
2168
  } else {
@@ -1729,14 +2170,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1729
2170
  }
1730
2171
  output.content.length = 0;
1731
2172
  output.responseId = undefined;
1732
- output.errorMessage = strictFallbackErrorMessage;
2173
+ output.errorMessage = undefined;
2174
+ output.stopDetails = undefined;
1733
2175
  output.providerPayload = undefined;
1734
2176
  output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1735
2177
  output.stopReason = "stop";
1736
2178
  firstTokenTime = undefined;
1737
2179
  }
1738
2180
  }
1739
-
1740
2181
  output.duration = Date.now() - startTime;
1741
2182
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
1742
2183
  if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
@@ -1753,8 +2194,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1753
2194
  const firstEventTimeoutError = activeAbortTracker.getLocalAbortReason();
1754
2195
  output.stopReason = activeAbortTracker.wasCallerAbort() ? "aborted" : "error";
1755
2196
  output.errorStatus = extractHttpStatusFromError(error);
1756
- output.errorMessage = firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
1757
- output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
2197
+ try {
2198
+ output.errorMessage =
2199
+ firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
2200
+ output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
2201
+ } catch {
2202
+ // finalizeErrorMessage must never take the stream down with it — a
2203
+ // throw here would skip stream.end() and hang result() forever.
2204
+ output.errorMessage = error instanceof Error ? error.message : String(error);
2205
+ }
1758
2206
  output.duration = Date.now() - startTime;
1759
2207
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
1760
2208
  stream.push({ type: "error", reason: output.stopReason, error: output });
@@ -1782,12 +2230,11 @@ function applyClaudeCodeSystemCache(
1782
2230
  blocks: AnthropicSystemBlock[],
1783
2231
  cacheControl: AnthropicCacheControl | undefined,
1784
2232
  ): number {
1785
- if (!cacheControl || blocks.length <= 2) return 0;
1786
- blocks[2] = { ...blocks[2], cache_control: cacheControl };
1787
- if (blocks.length === 3) return 1;
2233
+ if (!cacheControl || blocks.length === 0) return 0;
1788
2234
  const lastIndex = blocks.length - 1;
1789
- blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
1790
- return 2;
2235
+ if (blocks[lastIndex].cache_control != null) return 0;
2236
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cloneAnthropicCacheControl(cacheControl) };
2237
+ return 1;
1791
2238
  }
1792
2239
 
1793
2240
  export function buildAnthropicSystemBlocks(
@@ -1797,7 +2244,7 @@ export function buildAnthropicSystemBlocks(
1797
2244
  const { includeClaudeCodeInstruction = false, extraInstructions = [], firstUserMessageText, cacheControl } = options;
1798
2245
  const sanitizedPrompts = normalizeSystemPrompts(systemPrompt);
1799
2246
  const trimmedInstructions = extraInstructions.map(instruction => instruction.trim()).filter(Boolean);
1800
- const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.includes(CLAUDE_BILLING_HEADER_PREFIX));
2247
+ const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.startsWith(CLAUDE_BILLING_HEADER_PREFIX));
1801
2248
 
1802
2249
  if (includeClaudeCodeInstruction && !hasBillingHeader) {
1803
2250
  const blocks: AnthropicSystemBlock[] = [
@@ -1824,8 +2271,8 @@ export function buildAnthropicSystemBlocks(
1824
2271
  blocks.push({ type: "text", text: prompt });
1825
2272
  }
1826
2273
  const lastIndex = blocks.length - 1;
1827
- if (cacheControl && lastIndex >= 0) {
1828
- blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
2274
+ if (cacheControl && lastIndex >= 0 && blocks[lastIndex].cache_control == null) {
2275
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cloneAnthropicCacheControl(cacheControl) };
1829
2276
  }
1830
2277
  return blocks.length > 0 ? blocks : undefined;
1831
2278
  }
@@ -1849,30 +2296,36 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1849
2296
  thinkingEnabled = false,
1850
2297
  thinkingDisplay,
1851
2298
  isOAuth,
1852
- onSseEvent,
1853
2299
  claudeCodeSessionId,
1854
2300
  } = args;
1855
- const compat = getAnthropicCompat(model);
1856
- const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinkingDisplay(model.id);
2301
+ const compat = model.compat;
2302
+ const needsInterleavedBeta = interleavedThinking && !model.thinking?.supportsDisplay;
1857
2303
  const needsFineGrainedToolStreamingBeta = hasTools && !compat.supportsEagerToolInputStreaming;
1858
2304
  const oauthToken = isOAuth ?? isAnthropicOAuthToken(apiKey);
1859
2305
  const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
1860
2306
  const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
1861
2307
  const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
2308
+ // Disable Bun's native ~300s pre-response fetch timeout (issue #2422).
2309
+ // `AnthropicMessagesClient` already arms its own DEFAULT_TIMEOUT_MS timer
2310
+ // per request, so the native ceiling can only short-circuit slow-prefill
2311
+ // streams before the configured watchdog gets to govern them.
2312
+ const fetchOptions: AnthropicFetchOptions = { ...(tlsFetchOptions ?? {}), timeout: false };
1862
2313
  const baseFetch = args.fetch ?? fetch;
1863
2314
  // Only OAuth requests inject the CC billing header; no API-key request can ever
1864
2315
  // contain it, so there is no need to install the rewriter for those.
1865
2316
  const cchFetch = oauthToken ? wrapFetchForCch(baseFetch) : baseFetch;
1866
- const debugFetch = onSseEvent ? wrapFetchForSseDebug(cchFetch, event => onSseEvent(event, model)) : cchFetch;
1867
2317
  if (model.provider === "github-copilot") {
1868
2318
  const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
2319
+ // The GitHub Copilot Anthropic proxy doesn't accept Anthropic beta
2320
+ // features (and the catalog already forces `supportsEagerToolInputStreaming
2321
+ // = false` for this host, so `needsFineGrainedToolStreamingBeta` is true
2322
+ // whenever tools are present). Forward only caller-supplied betas.
1869
2323
  const betaFeatures = [...extraBetas];
1870
- if (needsFineGrainedToolStreamingBeta) {
1871
- betaFeatures.push(fineGrainedToolStreamingBeta);
1872
- }
1873
2324
  const defaultHeaders = mergeHeaders(
1874
2325
  {
1875
2326
  Accept: stream ? "text/event-stream" : "application/json",
2327
+ "Content-Type": "application/json",
2328
+ "anthropic-version": "2023-06-01",
1876
2329
  "Anthropic-Dangerous-Direct-Browser-Access": "true",
1877
2330
  Authorization: `Bearer ${copilotApiKey}`,
1878
2331
  ...(betaFeatures.length > 0 ? { "anthropic-beta": buildBetaHeader([], betaFeatures) } : {}),
@@ -1889,8 +2342,8 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1889
2342
  baseURL: baseUrl,
1890
2343
  maxRetries: 5,
1891
2344
  defaultHeaders,
1892
- fetch: debugFetch,
1893
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2345
+ fetch: cchFetch,
2346
+ fetchOptions,
1894
2347
  };
1895
2348
  }
1896
2349
 
@@ -1924,7 +2377,8 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1924
2377
  baseURL: baseUrl,
1925
2378
  maxRetries: 5,
1926
2379
  defaultHeaders,
1927
- fetch: debugFetch,
2380
+ fetch: cchFetch,
2381
+ fetchOptions,
1928
2382
  };
1929
2383
  }
1930
2384
 
@@ -1940,11 +2394,10 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1940
2394
  baseURL: baseUrl,
1941
2395
  maxRetries: 5,
1942
2396
  defaultHeaders,
1943
- ...(debugFetch ? { fetch: debugFetch } : {}),
1944
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2397
+ fetch: cchFetch,
2398
+ fetchOptions,
1945
2399
  };
1946
2400
  }
1947
-
1948
2401
  // OpenCode Zen's Anthropic-compatible gateway accepts bearer auth only;
1949
2402
  // leaving apiKey set lets the client add X-Api-Key, which upstream Alibaba rejects.
1950
2403
  if (model.provider === "opencode-zen") {
@@ -1955,20 +2408,27 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1955
2408
  baseURL: baseUrl,
1956
2409
  maxRetries: 5,
1957
2410
  defaultHeaders,
1958
- ...(debugFetch ? { fetch: debugFetch } : {}),
1959
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2411
+ fetch: cchFetch,
2412
+ fetchOptions,
1960
2413
  };
1961
2414
  }
1962
2415
 
2416
+ const authorizationHeader = getHeaderCaseInsensitive(defaultHeaders, "Authorization");
2417
+ const shouldSuppressClientApiKey =
2418
+ !oauthToken &&
2419
+ !model.compat.officialEndpoint &&
2420
+ typeof authorizationHeader === "string" &&
2421
+ /^Bearer\s+/i.test(authorizationHeader);
2422
+
1963
2423
  return {
1964
2424
  isOAuthToken: oauthToken,
1965
- apiKey: oauthToken ? null : apiKey,
2425
+ apiKey: oauthToken || shouldSuppressClientApiKey ? null : apiKey,
1966
2426
  authToken: oauthToken ? apiKey : undefined,
1967
2427
  baseURL: baseUrl,
1968
2428
  maxRetries: 5,
1969
2429
  defaultHeaders,
1970
- fetch: debugFetch,
1971
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2430
+ fetch: cchFetch,
2431
+ fetchOptions,
1972
2432
  };
1973
2433
  }
1974
2434
 
@@ -1987,6 +2447,7 @@ function disableThinkingIfToolChoiceForced(params: MessageCreateParamsStreaming)
1987
2447
  if (toolChoice.type !== "any" && toolChoice.type !== "tool") return;
1988
2448
 
1989
2449
  delete params.thinking;
2450
+ delete params.context_management;
1990
2451
  const outputConfig = params.output_config as AnthropicOutputConfig | undefined;
1991
2452
  if (!outputConfig) return;
1992
2453
 
@@ -1996,18 +2457,29 @@ function disableThinkingIfToolChoiceForced(params: MessageCreateParamsStreaming)
1996
2457
  }
1997
2458
  }
1998
2459
 
1999
- function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, model: Model<"anthropic-messages">): void {
2460
+ function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, maxAllowedTokens: number): void {
2000
2461
  const thinking = params.thinking;
2001
2462
  if (thinking?.type !== "enabled") return;
2002
2463
 
2003
2464
  const budgetTokens = thinking.budget_tokens ?? 0;
2004
2465
  if (budgetTokens <= 0) return;
2005
2466
 
2006
- const maxTokens = params.max_tokens ?? 0;
2007
- const requiredMaxTokens = budgetTokens + OUTPUT_FALLBACK_BUFFER;
2008
- if (maxTokens < requiredMaxTokens) {
2009
- params.max_tokens = Math.min(requiredMaxTokens, model.maxTokens);
2467
+ const currentMaxTokens = Math.min(params.max_tokens ?? maxAllowedTokens, maxAllowedTokens);
2468
+ const raisedMaxTokens = Math.min(
2469
+ Math.max(currentMaxTokens, budgetTokens + OUTPUT_FALLBACK_BUFFER),
2470
+ maxAllowedTokens,
2471
+ );
2472
+ params.max_tokens = raisedMaxTokens;
2473
+
2474
+ if (budgetTokens + OUTPUT_FALLBACK_BUFFER <= raisedMaxTokens) return;
2475
+
2476
+ const clampedBudget = raisedMaxTokens - OUTPUT_FALLBACK_BUFFER;
2477
+ if (clampedBudget <= 0) {
2478
+ throw new Error(
2479
+ `Anthropic thinking budget requires max_tokens greater than ${OUTPUT_FALLBACK_BUFFER}; got ${raisedMaxTokens}`,
2480
+ );
2010
2481
  }
2482
+ thinking.budget_tokens = clampedBudget;
2011
2483
  }
2012
2484
 
2013
2485
  type CacheControlBlock = {
@@ -2017,39 +2489,44 @@ type CacheControlBlock = {
2017
2489
  function applyCacheControlToLastBlock<T extends CacheControlBlock>(
2018
2490
  blocks: T[],
2019
2491
  cacheControl: AnthropicCacheControl,
2020
- ): void {
2021
- if (blocks.length === 0) return;
2492
+ ): boolean {
2493
+ if (blocks.length === 0) return false;
2022
2494
  const lastIndex = blocks.length - 1;
2023
- blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
2495
+ if (blocks[lastIndex].cache_control != null) return false;
2496
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cloneAnthropicCacheControl(cacheControl) };
2497
+ return true;
2024
2498
  }
2025
2499
 
2026
2500
  function applyCacheControlToLastTextBlock(
2027
2501
  blocks: Array<ContentBlockParam & CacheControlBlock>,
2028
2502
  cacheControl: AnthropicCacheControl,
2029
- ): void {
2030
- if (blocks.length === 0) return;
2503
+ ): boolean {
2504
+ if (blocks.length === 0) return false;
2031
2505
  for (let i = blocks.length - 1; i >= 0; i--) {
2032
2506
  if (blocks[i].type === "text") {
2033
- blocks[i] = { ...blocks[i], cache_control: cacheControl };
2034
- return;
2507
+ if (blocks[i].cache_control != null) return false;
2508
+ blocks[i] = { ...blocks[i], cache_control: cloneAnthropicCacheControl(cacheControl) };
2509
+ return true;
2035
2510
  }
2036
2511
  }
2037
- applyCacheControlToLastBlock(blocks, cacheControl);
2512
+ // No text block — fall back to the last block that accepts cache_control;
2513
+ // thinking/redacted_thinking blocks reject the field with a 400.
2514
+ for (let i = blocks.length - 1; i >= 0; i--) {
2515
+ const type = blocks[i].type;
2516
+ if (type === "thinking" || type === "redacted_thinking") continue;
2517
+ if (blocks[i].cache_control != null) return false;
2518
+ blocks[i] = { ...blocks[i], cache_control: cloneAnthropicCacheControl(cacheControl) };
2519
+ return true;
2520
+ }
2521
+ return false;
2038
2522
  }
2039
2523
 
2040
2524
  function applyPromptCaching(params: MessageCreateParamsStreaming, cacheControl?: AnthropicCacheControl): void {
2041
2525
  if (!cacheControl) return;
2042
2526
 
2043
- // Skip if cache_control breakpoints were already placed externally on messages.
2044
- for (const message of params.messages) {
2045
- if (Array.isArray(message.content)) {
2046
- if ((message.content as Array<ContentBlockParam & CacheControlBlock>).some(b => b.cache_control != null))
2047
- return;
2048
- }
2049
- }
2050
-
2051
2527
  const MAX_CACHE_BREAKPOINTS = 4;
2052
- let cacheBreakpointsUsed = 0;
2528
+ let cacheBreakpointsUsed = countCacheControlBreakpoints(params);
2529
+ if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
2053
2530
  let isCCLayout = false;
2054
2531
 
2055
2532
  if (params.system && Array.isArray(params.system) && params.system.length > 0) {
@@ -2057,9 +2534,12 @@ function applyPromptCaching(params: MessageCreateParamsStreaming, cacheControl?:
2057
2534
  params.system.length >= 3 &&
2058
2535
  (params.system[0] as { text?: string }).text?.startsWith(CLAUDE_BILLING_HEADER_PREFIX) === true;
2059
2536
  if (isCCLayout) {
2060
- cacheBreakpointsUsed += applyClaudeCodeSystemCache(params.system as AnthropicSystemBlock[], cacheControl);
2061
- } else {
2062
- applyCacheControlToLastBlock(params.system, cacheControl);
2537
+ const placed = Math.min(
2538
+ MAX_CACHE_BREAKPOINTS - cacheBreakpointsUsed,
2539
+ applyClaudeCodeSystemCache(params.system as AnthropicSystemBlock[], cacheControl),
2540
+ );
2541
+ cacheBreakpointsUsed += placed;
2542
+ } else if (applyCacheControlToLastBlock(params.system, cacheControl)) {
2063
2543
  cacheBreakpointsUsed++;
2064
2544
  }
2065
2545
  }
@@ -2072,14 +2552,19 @@ function applyPromptCaching(params: MessageCreateParamsStreaming, cacheControl?:
2072
2552
  const message = params.messages[i];
2073
2553
  if (!message) continue;
2074
2554
  if (typeof message.content === "string") {
2075
- message.content = [{ type: "text", text: message.content, cache_control: cacheControl }];
2555
+ message.content = [
2556
+ { type: "text", text: message.content, cache_control: cloneAnthropicCacheControl(cacheControl) },
2557
+ ];
2076
2558
  cacheBreakpointsUsed++;
2077
2559
  } else if (Array.isArray(message.content) && message.content.length > 0) {
2078
- applyCacheControlToLastTextBlock(
2079
- message.content as Array<ContentBlockParam & CacheControlBlock>,
2080
- cacheControl,
2081
- );
2082
- cacheBreakpointsUsed++;
2560
+ if (
2561
+ applyCacheControlToLastTextBlock(
2562
+ message.content as Array<ContentBlockParam & CacheControlBlock>,
2563
+ cacheControl,
2564
+ )
2565
+ ) {
2566
+ cacheBreakpointsUsed++;
2567
+ }
2083
2568
  }
2084
2569
  }
2085
2570
  }
@@ -2092,7 +2577,9 @@ function normalizeCacheControlBlockTtl(block: CacheControlBlock, seenFiveMinute:
2092
2577
  return;
2093
2578
  }
2094
2579
  if (seenFiveMinute.value) {
2095
- delete cacheControl.ttl;
2580
+ const normalized = cloneAnthropicCacheControl(cacheControl);
2581
+ delete normalized.ttl;
2582
+ block.cache_control = normalized;
2096
2583
  }
2097
2584
  }
2098
2585
 
@@ -2222,139 +2709,163 @@ function resolveAnthropicAdaptiveEffort(
2222
2709
  return mapEffortToAnthropicAdaptiveEffort(model, requestedEffort);
2223
2710
  }
2224
2711
 
2225
- function startsWithAfterAsciiWhitespace(value: string, prefix: string): boolean {
2226
- let index = 0;
2227
- while (index < value.length) {
2228
- const code = value.charCodeAt(index);
2229
- if (code !== 9 && code !== 10 && code !== 13 && code !== 32) break;
2230
- index++;
2231
- }
2232
- return value.startsWith(prefix, index);
2233
- }
2234
-
2235
- function isClaudeSyntheticUserText(value: string): boolean {
2236
- return startsWithAfterAsciiWhitespace(value, "<system-reminder>");
2237
- }
2238
-
2239
2712
  function extractClaudeCodeFirstUserMessageText(messages: readonly Message[]): string {
2240
2713
  for (const message of messages) {
2241
2714
  if (message.role !== "user") continue;
2242
2715
  const { content } = message;
2243
2716
  if (typeof content === "string") return content;
2244
2717
  if (!Array.isArray(content)) return "";
2245
- let fallback: string | undefined;
2246
2718
  for (const block of content) {
2247
- if (block.type !== "text") continue;
2248
- fallback ??= block.text;
2249
- if (!isClaudeSyntheticUserText(block.text)) return block.text;
2719
+ if (block.type === "text") return block.text;
2250
2720
  }
2251
- return fallback ?? "";
2721
+ return "";
2252
2722
  }
2253
2723
  return "";
2254
2724
  }
2255
2725
 
2256
- function applyClaudeCodeContextManagement(params: MessageCreateParamsStreaming, isOAuthToken: boolean): void {
2257
- if (!isOAuthToken || params.thinking?.type !== "adaptive") return;
2258
- params.context_management = {
2259
- edits: [{ type: "clear_thinking_20251015", keep: "all" }],
2260
- };
2261
- }
2262
-
2263
2726
  function buildParams(
2264
2727
  model: Model<"anthropic-messages">,
2265
- baseUrl: string,
2266
2728
  context: Context,
2267
2729
  isOAuthToken: boolean,
2268
2730
  options?: AnthropicOptions,
2269
2731
  disableStrictTools = false,
2270
2732
  ): MessageCreateParamsStreaming {
2271
- const { cacheControl } = getCacheControl(model, baseUrl, options?.cacheRetention, isOAuthToken);
2272
- const params: MessageCreateParamsStreaming = {
2273
- model: model.id,
2274
- messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
2275
- max_tokens: options?.maxTokens || model.maxTokens,
2276
- stream: true,
2277
- };
2278
- if (options?.temperature !== undefined && !options?.thinkingEnabled) {
2279
- params.temperature = options.temperature;
2280
- }
2281
-
2282
- if (options?.topP !== undefined) {
2283
- params.top_p = options.topP;
2284
- }
2285
- if (options?.topK !== undefined) {
2286
- params.top_k = options.topK;
2287
- }
2288
- if (options?.stopSequences?.length) {
2289
- const seqs = options.stopSequences;
2290
- if (seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX && !warnedStopSequencesTrim) {
2291
- warnedStopSequencesTrim = true;
2292
- logger.warn("anthropic: stop_sequences exceeds 4; extra entries dropped", {
2293
- received: seqs.length,
2294
- kept: ANTHROPIC_STOP_SEQUENCES_MAX,
2295
- });
2296
- }
2297
- params.stop_sequences =
2298
- seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX ? seqs.slice(0, ANTHROPIC_STOP_SEQUENCES_MAX) : seqs;
2299
- }
2733
+ const { cacheControl } = getCacheControl(model, options?.cacheRetention, isOAuthToken);
2300
2734
 
2301
- // Opus 4.7+ rejects non-default sampling parameters with 400 error.
2302
- if (hasOpus47ApiRestrictions(model.id)) {
2303
- delete params.top_p;
2304
- delete params.top_k;
2305
- delete params.temperature;
2306
- }
2735
+ // Pre-compute system blocks so they occupy the right slot in the serialized body.
2736
+ const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
2737
+ const firstUserMessageText = shouldInjectClaudeCodeInstruction
2738
+ ? extractClaudeCodeFirstUserMessageText(context.messages)
2739
+ : "";
2740
+ const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
2741
+ includeClaudeCodeInstruction: shouldInjectClaudeCodeInstruction,
2742
+ firstUserMessageText,
2743
+ });
2307
2744
 
2745
+ // Pre-compute tools.
2746
+ let tools: AnthropicWireTool[] | undefined;
2308
2747
  if (context.tools) {
2309
- params.tools = convertTools(
2748
+ tools = convertTools(
2310
2749
  context.tools,
2311
2750
  isOAuthToken,
2312
2751
  disableStrictTools || model.provider === "github-copilot",
2313
- getAnthropicCompat(model).supportsEagerToolInputStreaming,
2752
+ model.compat.supportsEagerToolInputStreaming,
2314
2753
  );
2315
2754
  } else if (isOAuthToken) {
2316
- params.tools = [];
2755
+ tools = [];
2317
2756
  }
2318
2757
 
2758
+ // Pre-compute metadata.
2759
+ const metadataAccountId = readAnthropicMetadataAccountId(options?.metadata);
2760
+ const metadataUserId = resolveAnthropicMetadataUserId(
2761
+ options?.metadata?.user_id,
2762
+ isOAuthToken,
2763
+ options?.sessionId,
2764
+ metadataAccountId,
2765
+ );
2766
+ const metadata = metadataUserId ? { user_id: metadataUserId } : undefined;
2767
+
2768
+ // Pre-compute thinking + output_config effort.
2769
+ let thinking: MessageCreateParamsStreaming["thinking"] | undefined;
2770
+ let outputConfigEffort: AnthropicEffort | undefined;
2319
2771
  if (model.reasoning) {
2320
2772
  if (options?.thinkingEnabled) {
2321
2773
  const mode = model.thinking?.mode;
2322
2774
  const effort = resolveAnthropicAdaptiveEffort(model, options);
2323
-
2324
- const compat = getAnthropicCompat(model);
2775
+ const compat = model.compat;
2325
2776
  if (mode === "anthropic-adaptive" && !compat.disableAdaptiveThinking) {
2326
2777
  const adaptive: { type: "adaptive"; display?: AnthropicThinkingDisplay } = { type: "adaptive" };
2327
- // Starting with Claude Opus 4.7, adaptive thinking content is omitted from the
2328
- // response by default. Opt into summarized reasoning so thinking deltas keep
2329
- // streaming with human-readable content for callers that rely on it.
2330
- if (options.thinkingDisplay !== undefined || supportsAdaptiveThinkingDisplay(model.id)) {
2778
+ // Starting with Claude Opus 4.7 and Claude Fable/Mythos 5, adaptive thinking
2779
+ // content is omitted from the response by default. Opt into summarized
2780
+ // reasoning so thinking deltas keep streaming with human-readable content for
2781
+ // callers that rely on it. The `display` field is gated strictly on model
2782
+ // support: Opus 4.6 / Sonnet 4.6+ reject it with a 400, so an explicit
2783
+ // `thinkingDisplay` MUST NOT force it onto a model that can't accept it.
2784
+ if (model.thinking?.supportsDisplay) {
2331
2785
  adaptive.display = options.thinkingDisplay ?? "summarized";
2332
2786
  }
2333
- params.thinking = adaptive;
2334
- if (effort) {
2335
- getAnthropicOutputConfig(params).effort = effort;
2336
- }
2787
+ thinking = adaptive;
2788
+ if (effort) outputConfigEffort = effort;
2337
2789
  } else {
2338
- params.thinking = {
2790
+ thinking = {
2339
2791
  type: "enabled",
2340
2792
  budget_tokens: options.thinkingBudgetTokens || 1024,
2341
2793
  display: options.thinkingDisplay ?? "summarized",
2342
2794
  };
2343
- if (mode === "anthropic-budget-effort" && effort) {
2344
- getAnthropicOutputConfig(params).effort = effort;
2345
- }
2795
+ if (mode === "anthropic-budget-effort" && effort) outputConfigEffort = effort;
2346
2796
  }
2347
2797
  } else if (options?.thinkingEnabled === false) {
2348
- params.thinking = { type: "disabled" };
2798
+ const compat = model.compat;
2799
+ if (model.thinking?.mode === "anthropic-adaptive" && !compat.disableAdaptiveThinking) {
2800
+ // Adaptive-only Claude models (Opus 4.6+, Sonnet 4.6+, Fable/Mythos 5) reject
2801
+ // `thinking.type: "disabled"` — adaptive thinking cannot be switched off.
2802
+ // Omit the thinking field (the API defaults to adaptive) and pin the
2803
+ // lowest effort so "thinking off" calls stay cheap instead of failing
2804
+ // the request with a 400 (a hidden-thinking toggle must never break it).
2805
+ outputConfigEffort = "low";
2806
+ } else {
2807
+ thinking = { type: "disabled" };
2808
+ }
2349
2809
  }
2350
2810
  }
2351
2811
 
2352
- if (options?.taskBudget) {
2353
- getAnthropicOutputConfig(params).task_budget = options.taskBudget;
2812
+ // Pre-compute context_management (depends on thinking).
2813
+ const contextManagement =
2814
+ isOAuthToken && thinking?.type === "adaptive"
2815
+ ? { edits: [{ type: "clear_thinking_20251015" as const, keep: "all" as const }] }
2816
+ : undefined;
2817
+
2818
+ // Pre-compute output_config.
2819
+ const outputConfigEntries: AnthropicOutputConfig = {};
2820
+ if (outputConfigEffort) outputConfigEntries.effort = outputConfigEffort;
2821
+ if (options?.taskBudget) outputConfigEntries.task_budget = options.taskBudget;
2822
+ const outputConfig = Object.keys(outputConfigEntries).length ? outputConfigEntries : undefined;
2823
+
2824
+ // Claude Code requests at most 64k output tokens; clamp only OAuth requests,
2825
+ // where the wire fingerprint must match. API-key callers keep the full model
2826
+ // ceiling (e.g. 128k on Opus 4.8).
2827
+ const modelMaxTokens = model.maxTokens ?? CLAUDE_CODE_MAX_OUTPUT_TOKENS;
2828
+ const maxOutputTokens = isOAuthToken ? Math.min(CLAUDE_CODE_MAX_OUTPUT_TOKENS, modelMaxTokens) : modelMaxTokens;
2829
+
2830
+ // Build params in the canonical field order: model → messages → system → tools →
2831
+ // metadata → max_tokens → thinking → context_management → output_config → stream.
2832
+ const params: MessageCreateParamsStreaming = {
2833
+ model: options?.requestModelId ?? model.requestModelId ?? model.id,
2834
+ messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
2835
+ ...(systemBlocks && { system: systemBlocks }),
2836
+ ...(tools !== undefined && { tools }),
2837
+ ...(metadata && { metadata }),
2838
+ max_tokens: Math.min(maxOutputTokens, options?.maxTokens || modelMaxTokens),
2839
+ ...(thinking && { thinking }),
2840
+ ...(contextManagement && { context_management: contextManagement }),
2841
+ ...(outputConfig && { output_config: outputConfig }),
2842
+ stream: true,
2843
+ };
2844
+
2845
+ // Opus 4.7+ and Fable/Mythos 5 reject non-default sampling parameters with 400 error.
2846
+ const thinkingType = params.thinking?.type;
2847
+ const allowSamplingParams =
2848
+ model.compat.supportsSamplingParams && (thinkingType === undefined || thinkingType === "disabled");
2849
+ if (allowSamplingParams && options?.temperature !== undefined) {
2850
+ params.temperature = options.temperature;
2354
2851
  }
2355
- const metadataUserId = resolveAnthropicMetadataUserId(options?.metadata?.user_id, isOAuthToken, options?.sessionId);
2356
- if (metadataUserId) {
2357
- params.metadata = { user_id: metadataUserId };
2852
+ if (allowSamplingParams && options?.topP !== undefined) {
2853
+ params.top_p = options.topP;
2854
+ }
2855
+ if (allowSamplingParams && options?.topK !== undefined) {
2856
+ params.top_k = options.topK;
2857
+ }
2858
+ if (options?.stopSequences?.length) {
2859
+ const seqs = options.stopSequences;
2860
+ if (seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX && !warnedStopSequencesTrim) {
2861
+ warnedStopSequencesTrim = true;
2862
+ logger.warn("anthropic: stop_sequences exceeds 4; extra entries dropped", {
2863
+ received: seqs.length,
2864
+ kept: ANTHROPIC_STOP_SEQUENCES_MAX,
2865
+ });
2866
+ }
2867
+ params.stop_sequences =
2868
+ seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX ? seqs.slice(0, ANTHROPIC_STOP_SEQUENCES_MAX) : seqs;
2358
2869
  }
2359
2870
 
2360
2871
  if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
@@ -2369,37 +2880,18 @@ function buildParams(
2369
2880
  } else {
2370
2881
  params.tool_choice = options.toolChoice;
2371
2882
  }
2372
- }
2373
-
2374
- // Claude Opus 4.8 must emit at most one tool call per turn. Force
2375
- // `disable_parallel_tool_use` onto the outgoing tool_choice (synthesizing an
2376
- // `auto` choice when none is set). Gated on tools being present: Anthropic
2377
- // rejects `tool_choice` without `tools`, and parallelism is moot otherwise.
2378
- // `none` rejects the field, so leave it untouched. A fresh object is built
2379
- // rather than mutated so the caller's `options.toolChoice` is never aliased.
2380
- if (disablesParallelToolUse(model.id) && params.tools && params.tools.length > 0) {
2381
- const current = params.tool_choice;
2382
- if (!current) {
2383
- params.tool_choice = { type: "auto", disable_parallel_tool_use: true };
2384
- } else if (current.type !== "none") {
2385
- params.tool_choice = { ...current, disable_parallel_tool_use: true };
2883
+ // Claude Fable/Mythos 5 reject forced tool use outright ("tool_choice forces
2884
+ // tool use is not compatible with this model"). Downgrade any/tool → auto so the
2885
+ // request succeeds; the tool stays available and the caller's prompt steers
2886
+ // the model toward it.
2887
+ const choiceType = params.tool_choice?.type;
2888
+ if ((choiceType === "any" || choiceType === "tool") && !model.compat.supportsForcedToolChoice) {
2889
+ params.tool_choice = { type: "auto" };
2386
2890
  }
2387
2891
  }
2388
2892
 
2389
- const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
2390
- const firstUserMessageText = shouldInjectClaudeCodeInstruction
2391
- ? extractClaudeCodeFirstUserMessageText(context.messages)
2392
- : "";
2393
- const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
2394
- includeClaudeCodeInstruction: shouldInjectClaudeCodeInstruction,
2395
- firstUserMessageText,
2396
- });
2397
- if (systemBlocks) {
2398
- params.system = systemBlocks;
2399
- }
2400
2893
  disableThinkingIfToolChoiceForced(params);
2401
- applyClaudeCodeContextManagement(params, isOAuthToken);
2402
- ensureMaxTokensForThinking(params, model);
2894
+ ensureMaxTokensForThinking(params, maxOutputTokens);
2403
2895
  applyPromptCaching(params, cacheControl);
2404
2896
  enforceCacheControlLimit(params, 4);
2405
2897
  normalizeCacheControlTtlOrdering(params);
@@ -2407,52 +2899,50 @@ function buildParams(
2407
2899
  return params;
2408
2900
  }
2409
2901
 
2410
- /**
2411
- * Z.AI's Anthropic-compatible proxy at `api.z.ai/api/anthropic` deserializes
2412
- * tool_result blocks into a Python class that accesses `.id`, even though
2413
- * Anthropic's standard tool_result schema only carries `tool_use_id`. Detect
2414
- * that endpoint so we can emit the non-standard alias for it without
2415
- * polluting requests to api.anthropic.com or other compatible proxies.
2416
- * See: https://github.com/uttamtrivedi/Prometheus/issues/814
2417
- */
2418
- function isZaiAnthropicEndpoint(model: Model<"anthropic-messages">): boolean {
2419
- if (model.provider === "zai") return true;
2420
- const baseUrl = model.baseUrl;
2421
- if (!baseUrl) return false;
2422
- try {
2423
- return new URL(baseUrl).hostname.toLowerCase() === "api.z.ai";
2424
- } catch {
2425
- return false;
2902
+ const EMPTY_ERROR_TOOL_RESULT_TEXT = "Tool failed with no output.";
2903
+
2904
+ function isEmptyToolResultWireContent(content: AnthropicToolResultContent): boolean {
2905
+ if (typeof content === "string") {
2906
+ return content.trim().length === 0;
2426
2907
  }
2908
+ return content.length === 0;
2427
2909
  }
2428
2910
 
2429
- /**
2430
- * Returns true for providers whose Anthropic-compatible endpoints do NOT
2431
- * implement signature-based thinking-chain integrity (DeepSeek, Z.AI, etc.).
2432
- * For these providers, unsigned thinking blocks must be preserved as
2433
- * `type: "thinking"` instead of being degraded to text.
2434
- */
2435
- function isNonSigningAnthropicEndpoint(model: Model<"anthropic-messages">): boolean {
2436
- // Known non-signing providers
2437
- if (model.provider === "zai" || model.provider === "deepseek") return true;
2438
- const baseUrl = model.baseUrl;
2439
- if (!baseUrl) return false;
2440
- try {
2441
- const hostname = new URL(baseUrl).hostname.toLowerCase();
2442
- return hostname === "api.deepseek.com" || hostname.endsWith(".deepseek.com");
2443
- } catch {
2444
- return false;
2911
+ function ensureErrorToolResultWireContent(
2912
+ content: AnthropicToolResultContent,
2913
+ isError: boolean | undefined,
2914
+ ): AnthropicToolResultContent {
2915
+ if (!isError || !isEmptyToolResultWireContent(content)) {
2916
+ return content;
2445
2917
  }
2918
+ return typeof content === "string"
2919
+ ? EMPTY_ERROR_TOOL_RESULT_TEXT
2920
+ : [{ type: "text", text: EMPTY_ERROR_TOOL_RESULT_TEXT }];
2446
2921
  }
2447
2922
 
2448
- function buildToolResultBlock(model: Model<"anthropic-messages">, msg: ToolResultMessage): ContentBlockParam {
2923
+ function buildToolResultBlock(
2924
+ model: Model<"anthropic-messages">,
2925
+ msg: ToolResultMessage,
2926
+ hoistedImages: ContentBlockParam[],
2927
+ ): ContentBlockParam {
2928
+ let content = convertContentBlocks(msg.content, model.input.includes("image"));
2929
+ // Anthropic rejects images inside error tool results ("all content must be
2930
+ // type `text` if `is_error` is true") — keep the text in the block and
2931
+ // hoist the images after the message's tool_result run.
2932
+ if (msg.isError && typeof content !== "string" && content.some(block => block.type === "image")) {
2933
+ for (const block of content) {
2934
+ if (block.type === "image") hoistedImages.push(block);
2935
+ }
2936
+ content = content.filter(block => block.type === "text");
2937
+ }
2938
+ content = ensureErrorToolResultWireContent(content, msg.isError);
2449
2939
  const block: ContentBlockParam = {
2450
2940
  type: "tool_result",
2451
2941
  tool_use_id: msg.toolCallId,
2452
- content: convertContentBlocks(msg.content, model.input.includes("image")),
2942
+ content,
2453
2943
  is_error: msg.isError,
2454
2944
  };
2455
- if (isZaiAnthropicEndpoint(model)) {
2945
+ if (model.compat.requiresToolResultId) {
2456
2946
  // Z.AI workaround (issue #814): include `id` aliased to `tool_use_id`.
2457
2947
  (block as unknown as Record<string, unknown>).id = msg.toolCallId;
2458
2948
  }
@@ -2461,20 +2951,51 @@ function buildToolResultBlock(model: Model<"anthropic-messages">, msg: ToolResul
2461
2951
 
2462
2952
  /**
2463
2953
  * A single Anthropic conversation turn, including the mid-conversation
2464
- * `system` role (Opus 4.8+).
2954
+ * `system` role (Opus 4.8+ and Fable/Mythos 5).
2465
2955
  */
2466
2956
  export type AnthropicMessageParam = MessageParam;
2467
2957
 
2958
+ /**
2959
+ * Recursively replace lone surrogates in string leaves. Identity-preserving:
2960
+ * returns the input object/array when nothing changed.
2961
+ */
2962
+ function toWellFormedDeep(value: unknown): unknown {
2963
+ if (typeof value === "string") {
2964
+ const wellFormed = value.toWellFormed();
2965
+ return wellFormed === value ? value : wellFormed;
2966
+ }
2967
+ if (Array.isArray(value)) {
2968
+ let changed = false;
2969
+ const next = value.map(entry => {
2970
+ const sanitized = toWellFormedDeep(entry);
2971
+ if (sanitized !== entry) changed = true;
2972
+ return sanitized;
2973
+ });
2974
+ return changed ? next : value;
2975
+ }
2976
+ if (isRecord(value)) {
2977
+ let changed = false;
2978
+ const next: Record<string, unknown> = {};
2979
+ for (const [key, entry] of Object.entries(value)) {
2980
+ const sanitized = toWellFormedDeep(entry);
2981
+ if (sanitized !== entry) changed = true;
2982
+ next[key] = sanitized;
2983
+ }
2984
+ return changed ? next : value;
2985
+ }
2986
+ return value;
2987
+ }
2988
+
2468
2989
  export function convertAnthropicMessages(
2469
2990
  messages: Message[],
2470
2991
  model: Model<"anthropic-messages">,
2471
2992
  isOAuthToken: boolean,
2472
2993
  ): AnthropicMessageParam[] {
2473
- const params: AnthropicMessageParam[] = [];
2474
2994
  // Indices of params emitted from `developer` messages. After the main pass,
2475
2995
  // the ones whose placement satisfies Anthropic's mid-conversation rules are
2476
2996
  // upgraded from the `user` role to the authoritative `system` role.
2477
2997
  const developerParamIndices: number[] = [];
2998
+ const params: AnthropicMessageParam[] = [];
2478
2999
 
2479
3000
  const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
2480
3001
 
@@ -2533,7 +3054,7 @@ export function convertAnthropicMessages(
2533
3054
  }
2534
3055
  if (block.thinking.trim().length === 0) continue;
2535
3056
  if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
2536
- if (isNonSigningAnthropicEndpoint(model)) {
3057
+ if (model.compat.replayUnsignedThinking) {
2537
3058
  blocks.push({
2538
3059
  type: "thinking",
2539
3060
  thinking: block.thinking.toWellFormed(),
@@ -2563,7 +3084,12 @@ export function convertAnthropicMessages(
2563
3084
  type: "tool_use",
2564
3085
  id: block.id,
2565
3086
  name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
2566
- input: block.arguments ?? {},
3087
+ // Always sanitize: the model itself can emit lone-surrogate escapes
3088
+ // in tool-argument JSON (streamed out fine, rejected with a 400 on
3089
+ // replay by Anthropic's strict UTF-8 validation). toWellFormedDeep
3090
+ // is identity-preserving, so well-formed arguments stay
3091
+ // byte-identical and prompt-cache prefixes are unaffected.
3092
+ input: toWellFormedDeep(block.arguments ?? {}),
2567
3093
  });
2568
3094
  }
2569
3095
  }
@@ -2575,21 +3101,30 @@ export function convertAnthropicMessages(
2575
3101
  } else if (msg.role === "toolResult") {
2576
3102
  // Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
2577
3103
  const toolResults: ContentBlockParam[] = [];
3104
+ // Images stripped out of error tool results, re-attached after the run.
3105
+ const hoistedImages: ContentBlockParam[] = [];
2578
3106
 
2579
3107
  // Add the current tool result
2580
- toolResults.push(buildToolResultBlock(model, msg));
3108
+ toolResults.push(buildToolResultBlock(model, msg, hoistedImages));
2581
3109
 
2582
3110
  // Look ahead for consecutive toolResult messages
2583
3111
  let j = i + 1;
2584
3112
  while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
2585
3113
  const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
2586
- toolResults.push(buildToolResultBlock(model, nextMsg));
3114
+ toolResults.push(buildToolResultBlock(model, nextMsg, hoistedImages));
2587
3115
  j++;
2588
3116
  }
2589
3117
 
2590
3118
  // Skip the messages we've already processed
2591
3119
  i = j - 1;
2592
3120
 
3121
+ if (hoistedImages.length > 0) {
3122
+ toolResults.push(
3123
+ { type: "text", text: "Attached image(s) from the tool result(s) above:" },
3124
+ ...hoistedImages,
3125
+ );
3126
+ }
3127
+
2593
3128
  // Add a single user message with all tool results
2594
3129
  params.push({
2595
3130
  role: "user",
@@ -2599,22 +3134,34 @@ export function convertAnthropicMessages(
2599
3134
  }
2600
3135
 
2601
3136
  // Upgrade developer-origin params to mid-conversation `system` messages where
2602
- // Anthropic's placement rules allow it (Opus 4.8+ on the first-party API).
3137
+ // Anthropic's placement rules allow it (Opus 4.8+ / Fable/Mythos 5 on first-party API).
2603
3138
  // Rules: a system message must immediately follow a `user` turn and must be
2604
3139
  // the last entry or be followed by an `assistant` turn — never first, and
2605
3140
  // never consecutive. Requiring the next param to be `assistant` (or absent)
2606
3141
  // covers both the "followed by assistant / last" and "no consecutive system"
2607
3142
  // constraints. Anything that does not qualify stays a `user` message.
2608
- if (developerParamIndices.length > 0 && getAnthropicCompat(model).supportsMidConversationSystem) {
3143
+ if (developerParamIndices.length > 0 && model.compat.supportsMidConversationSystem) {
2609
3144
  for (const idx of developerParamIndices) {
2610
3145
  const followsUser = idx > 0 && params[idx - 1]?.role === "user";
2611
3146
  const next = params[idx + 1];
2612
3147
  const lastOrBeforeAssistant = idx === params.length - 1 || next?.role === "assistant";
2613
- if (followsUser && lastOrBeforeAssistant) {
2614
- params[idx] = { role: "system", content: params[idx].content };
3148
+ // System content is text-only on the wire; a developer turn carrying
3149
+ // image blocks must stay a `user` message or the API rejects it.
3150
+ const content = params[idx].content;
3151
+ const textOnly = typeof content === "string" || content.every(block => block.type === "text");
3152
+ if (followsUser && lastOrBeforeAssistant && textOnly) {
3153
+ params[idx] = { role: "system", content };
2615
3154
  }
2616
3155
  }
2617
3156
  }
3157
+ // Dropped empty user/developer turns can leave two assistant params adjacent;
3158
+ // the API rejects consecutive assistant messages. Repair with the same neutral
3159
+ // nudge used for trailing-assistant prefill below.
3160
+ for (let i = params.length - 1; i > 0; i--) {
3161
+ if (params[i].role === "assistant" && params[i - 1]?.role === "assistant") {
3162
+ params.splice(i, 0, { role: "user", content: "Continue." });
3163
+ }
3164
+ }
2618
3165
  if (params.length > 0 && params[params.length - 1]?.role === "assistant") {
2619
3166
  params.push({ role: "user", content: "Continue." });
2620
3167
  }
@@ -2683,6 +3230,7 @@ function isJsonSchemaArrayNode(schema: Record<string, unknown>): boolean {
2683
3230
  const t = schema.type;
2684
3231
  if (t === "array") return true;
2685
3232
  if (Array.isArray(t) && t.includes("array") && !t.includes("object")) return true;
3233
+ if (schema.items !== undefined || Array.isArray(schema.prefixItems)) return true;
2686
3234
  return false;
2687
3235
  }
2688
3236
 
@@ -2709,6 +3257,13 @@ function pickAnthropicScalarType(type: unknown): string | undefined {
2709
3257
  }
2710
3258
  return undefined;
2711
3259
  }
3260
+ function pickAnthropicEffectiveScalarType(schema: Record<string, unknown>): string | undefined {
3261
+ const explicit = pickAnthropicScalarType(schema.type);
3262
+ if (explicit) return explicit;
3263
+ if (isRecord(schema.properties)) return "object";
3264
+ if (schema.items !== undefined || Array.isArray(schema.prefixItems)) return "array";
3265
+ return undefined;
3266
+ }
2712
3267
 
2713
3268
  function anthropicPerTypeKeep(scalarType: string | undefined): Set<string> | undefined {
2714
3269
  switch (scalarType) {
@@ -2723,14 +3278,6 @@ function anthropicPerTypeKeep(scalarType: string | undefined): Set<string> | und
2723
3278
  }
2724
3279
  }
2725
3280
 
2726
- /**
2727
- * Per-schema-object memoization slot for the normalized Anthropic tool form. We stamp
2728
- * the result onto the host via a `Symbol` property (mirroring `utils/schema/stamps.ts`)
2729
- * instead of using a `WeakMap`: it's a single hidden-class slot, so warm reads are
2730
- * direct property access and write-once cycles resolve to the in-progress result.
2731
- */
2732
- const kAnthropicToolNormal = Symbol("pi.schema.anthropic.toolNormal");
2733
-
2734
3281
  /**
2735
3282
  * Normalize a JSON Schema node for Anthropic tool `input_schema`.
2736
3283
  *
@@ -2751,20 +3298,20 @@ const kAnthropicToolNormal = Symbol("pi.schema.anthropic.toolNormal");
2751
3298
  * pass downstream demotes those shapes to non-strict instead of fabricating a closed
2752
3299
  * object, so callers like the resolve tool keep working open-map semantics.
2753
3300
  */
2754
- export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2755
- if (Array.isArray(schema)) return schema.map(entry => normalizeAnthropicToolSchema(entry));
3301
+ function normalizeAnthropicToolSchemaNode(
3302
+ schema: unknown,
3303
+ cache: WeakMap<Record<string, unknown>, Record<string, unknown>>,
3304
+ ): unknown {
3305
+ if (Array.isArray(schema)) return schema.map(entry => normalizeAnthropicToolSchemaNode(entry, cache));
2756
3306
  if (!isRecord(schema)) return schema;
2757
3307
 
2758
- const slot = schema as Record<symbol, Record<string, unknown> | undefined>;
2759
- const existing = slot[kAnthropicToolNormal];
3308
+ const existing = cache.get(schema);
2760
3309
  if (existing !== undefined) return existing;
2761
3310
 
2762
3311
  const result: Record<string, unknown> = {};
2763
- // Pre-stamp before recursion so cyclic schemas resolve to the in-progress object
2764
- // (mirrors the WeakMap-set-before-recurse pattern the original implementation used).
2765
- Object.defineProperty(schema, kAnthropicToolNormal, { value: result, writable: true, configurable: true });
3312
+ cache.set(schema, result);
2766
3313
 
2767
- const scalarType = pickAnthropicScalarType(schema.type);
3314
+ const scalarType = pickAnthropicEffectiveScalarType(schema);
2768
3315
  const perTypeKeep = anthropicPerTypeKeep(scalarType);
2769
3316
  const spill: Array<[string, unknown]> = [];
2770
3317
 
@@ -2803,12 +3350,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2803
3350
  const sourceProperties = result.properties as Record<string, unknown>;
2804
3351
  for (const propName in sourceProperties) {
2805
3352
  if (!Object.hasOwn(sourceProperties, propName)) continue;
2806
- normalizedProperties[propName] = normalizeAnthropicToolSchema(sourceProperties[propName]);
3353
+ normalizedProperties[propName] = normalizeAnthropicToolSchemaNode(sourceProperties[propName], cache);
2807
3354
  }
2808
3355
  result.properties = normalizedProperties;
2809
3356
  }
2810
3357
  if (isRecord(result.additionalProperties)) {
2811
- const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
3358
+ const normalized = normalizeAnthropicToolSchemaNode(result.additionalProperties, cache);
2812
3359
  if (isRecord(normalized) && Object.keys(normalized).length === 0) {
2813
3360
  result.additionalProperties = true;
2814
3361
  } else {
@@ -2816,17 +3363,17 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2816
3363
  }
2817
3364
  }
2818
3365
  if (Array.isArray(result.items)) {
2819
- result.items = result.items.map(item => normalizeAnthropicToolSchema(item));
3366
+ result.items = result.items.map(item => normalizeAnthropicToolSchemaNode(item, cache));
2820
3367
  } else if (isRecord(result.items)) {
2821
- result.items = normalizeAnthropicToolSchema(result.items);
3368
+ result.items = normalizeAnthropicToolSchemaNode(result.items, cache);
2822
3369
  }
2823
3370
  if (Array.isArray(result.prefixItems)) {
2824
- result.prefixItems = result.prefixItems.map(item => normalizeAnthropicToolSchema(item));
3371
+ result.prefixItems = result.prefixItems.map(item => normalizeAnthropicToolSchemaNode(item, cache));
2825
3372
  }
2826
3373
  for (const key of COMBINATOR_KEYS) {
2827
3374
  const variants = result[key];
2828
3375
  if (Array.isArray(variants)) {
2829
- result[key] = variants.map(variant => normalizeAnthropicToolSchema(variant));
3376
+ result[key] = variants.map(variant => normalizeAnthropicToolSchemaNode(variant, cache));
2830
3377
  }
2831
3378
  }
2832
3379
  for (const defsKey of ["$defs", "definitions"] as const) {
@@ -2836,7 +3383,7 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2836
3383
  const sourceDefs = definitions as Record<string, unknown>;
2837
3384
  for (const name in sourceDefs) {
2838
3385
  if (!Object.hasOwn(sourceDefs, name)) continue;
2839
- normalizedDefs[name] = normalizeAnthropicToolSchema(sourceDefs[name]);
3386
+ normalizedDefs[name] = normalizeAnthropicToolSchemaNode(sourceDefs[name], cache);
2840
3387
  }
2841
3388
  result[defsKey] = normalizedDefs;
2842
3389
  }
@@ -2845,6 +3392,10 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2845
3392
  return result;
2846
3393
  }
2847
3394
 
3395
+ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
3396
+ return normalizeAnthropicToolSchemaNode(schema, new WeakMap());
3397
+ }
3398
+
2848
3399
  type AnthropicToolSchemaPlan = {
2849
3400
  inputSchema: AnthropicToolInputSchema;
2850
3401
  strict: boolean;
@@ -2865,6 +3416,24 @@ function hasNullVariant(schema: Record<string, unknown>): boolean {
2865
3416
  if (Array.isArray(schema.type) && schema.type.includes("null")) return true;
2866
3417
  return Array.isArray(schema.anyOf) && schema.anyOf.some(variant => isRecord(variant) && variant.type === "null");
2867
3418
  }
3419
+ function hasAnthropicSchemaDefiningKeyword(schema: Record<string, unknown>): boolean {
3420
+ if (
3421
+ schema.type !== undefined ||
3422
+ schema.properties !== undefined ||
3423
+ schema.additionalProperties !== undefined ||
3424
+ schema.items !== undefined ||
3425
+ schema.prefixItems !== undefined ||
3426
+ schema.enum !== undefined ||
3427
+ schema.const !== undefined ||
3428
+ schema.$ref !== undefined
3429
+ ) {
3430
+ return true;
3431
+ }
3432
+ for (const key of COMBINATOR_KEYS) {
3433
+ if (schema[key] !== undefined) return true;
3434
+ }
3435
+ return schema.$defs !== undefined || schema.definitions !== undefined;
3436
+ }
2868
3437
 
2869
3438
  function makeAnthropicNullableSchema(schema: unknown, budget: AnthropicStrictBudget): unknown | undefined {
2870
3439
  if (isRecord(schema)) {
@@ -2903,6 +3472,8 @@ function normalizeAnthropicStrictSchemaNode(
2903
3472
  const cached = cache.get(schema);
2904
3473
  if (cached) return cached;
2905
3474
 
3475
+ if (!hasAnthropicSchemaDefiningKeyword(schema)) return undefined;
3476
+
2906
3477
  // Strict tool use only supports closed objects. Open maps stay available on
2907
3478
  // the non-strict schema plan instead of producing an Anthropic 400.
2908
3479
  if (isJsonSchemaObjectNode(schema) && schema.additionalProperties !== false) {
@@ -2992,6 +3563,38 @@ function normalizeAnthropicStrictSchemaNode(
2992
3563
  return result;
2993
3564
  }
2994
3565
 
3566
+ const ANTHROPIC_STRICT_INCOMPATIBLE_KEYWORDS = [
3567
+ "oneOf",
3568
+ "allOf",
3569
+ "$ref",
3570
+ "patternProperties",
3571
+ "propertyNames",
3572
+ ] as const;
3573
+
3574
+ /**
3575
+ * Anthropic's strict grammar subset supports anyOf/type-array unions only.
3576
+ * oneOf/allOf/$ref compile unpredictably (rejections arrive as 400s the
3577
+ * grammar-too-large fallback does not recognize, so they would hard-fail the
3578
+ * turn), and patternProperties/propertyNames describe open key sets that the
3579
+ * strict pipeline's injected `additionalProperties: false` would contradict.
3580
+ * Runs against the raw wire schema — the base normalizer spills several of
3581
+ * these keywords into the description, erasing the evidence.
3582
+ */
3583
+ function hasAnthropicStrictIncompatibleKeyword(schema: unknown, seen = new Set<object>()): boolean {
3584
+ if (Array.isArray(schema)) {
3585
+ if (seen.has(schema)) return false;
3586
+ seen.add(schema);
3587
+ return schema.some(entry => hasAnthropicStrictIncompatibleKeyword(entry, seen));
3588
+ }
3589
+ if (!isRecord(schema)) return false;
3590
+ if (seen.has(schema)) return false;
3591
+ seen.add(schema);
3592
+ for (const keyword of ANTHROPIC_STRICT_INCOMPATIBLE_KEYWORDS) {
3593
+ if (schema[keyword] !== undefined) return true;
3594
+ }
3595
+ return Object.values(schema).some(value => hasAnthropicStrictIncompatibleKeyword(value, seen));
3596
+ }
3597
+
2995
3598
  function normalizeAnthropicStrictSchema(
2996
3599
  schema: Record<string, unknown>,
2997
3600
  optionalRemaining: number,
@@ -3031,7 +3634,9 @@ function buildAnthropicToolSchemaPlans(tools: Tool[], disableStrictTools = false
3031
3634
 
3032
3635
  const candidateIndexes = tools.flatMap((tool, index) => {
3033
3636
  if (!ANTHROPIC_STRICT_TOOL_ALLOWLIST.has(tool.name)) return [];
3034
- return tool.strict === false ? [] : [index];
3637
+ if (tool.strict === false) return [];
3638
+ if (hasAnthropicStrictIncompatibleKeyword(toolWireSchema(tool))) return [];
3639
+ return [index];
3035
3640
  });
3036
3641
 
3037
3642
  let strictToolCount = 0;
@@ -3089,6 +3694,10 @@ function mapStopReason(reason: string): StopReason {
3089
3694
  return "stop";
3090
3695
  case "max_tokens":
3091
3696
  return "length";
3697
+ // Generation ran into the model's context window (default behavior on
3698
+ // Sonnet 4.5+); the streamed content is valid, just truncated.
3699
+ case "model_context_window_exceeded":
3700
+ return "length";
3092
3701
  case "tool_use":
3093
3702
  return "toolUse";
3094
3703
  case "refusal":
@@ -3096,11 +3705,15 @@ function mapStopReason(reason: string): StopReason {
3096
3705
  case "pause_turn": // Stop is good enough -> resubmit
3097
3706
  return "stop";
3098
3707
  case "stop_sequence":
3099
- return "stop"; // We don't supply stop sequences, so this should never happen
3708
+ return "stop"; // A caller-supplied stop_sequences entry matched; the turn completed normally.
3100
3709
  case "sensitive": // Content flagged by safety filters (not yet in SDK types)
3101
3710
  return "error";
3102
3711
  default:
3103
- // Handle unknown stop reasons gracefully (API may add new values)
3104
- throw new Error(`Unhandled stop reason: ${reason}`);
3712
+ // New stop reasons ship server-side first ("sensitive",
3713
+ // "model_context_window_exceeded") and arrive on the trailing
3714
+ // message_delta after all content has streamed. Degrade to a normal
3715
+ // stop instead of failing the fully streamed turn.
3716
+ reportAnthropicEnvelopeAnomaly(`unhandled stop reason: ${reason}`);
3717
+ return "stop";
3105
3718
  }
3106
3719
  }