@aryee337/aery-ai 0.2.27 → 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (417) hide show
  1. package/CHANGELOG.md +2914 -0
  2. package/README.md +614 -813
  3. package/package.json +140 -105
  4. package/src/api-registry.ts +96 -0
  5. package/src/auth-broker/client.ts +358 -0
  6. package/src/auth-broker/index.ts +5 -0
  7. package/src/auth-broker/refresher.ts +117 -0
  8. package/src/auth-broker/remote-store.ts +623 -0
  9. package/src/auth-broker/server.ts +644 -0
  10. package/src/auth-broker/types.ts +127 -0
  11. package/src/auth-broker/wire-schemas.ts +200 -0
  12. package/src/auth-gateway/http.ts +194 -0
  13. package/src/auth-gateway/index.ts +3 -0
  14. package/src/auth-gateway/server.ts +818 -0
  15. package/src/auth-gateway/types.ts +143 -0
  16. package/src/auth-storage.ts +4422 -0
  17. package/src/index.ts +54 -0
  18. package/src/model-cache.ts +129 -0
  19. package/src/model-manager.ts +469 -0
  20. package/src/model-thinking.ts +782 -0
  21. package/src/models.json +83530 -0
  22. package/src/models.json.d.ts +9 -0
  23. package/src/models.ts +56 -0
  24. package/src/prompts/turn-aborted-guidance.md +4 -0
  25. package/src/provider-details.ts +90 -0
  26. package/src/provider-models/bundled-references.ts +38 -0
  27. package/src/provider-models/descriptors.ts +355 -0
  28. package/src/provider-models/google.ts +88 -0
  29. package/src/provider-models/index.ts +5 -0
  30. package/src/provider-models/ollama.ts +153 -0
  31. package/src/provider-models/openai-compat.ts +2817 -0
  32. package/src/provider-models/special.ts +67 -0
  33. package/src/providers/aery-native-client.ts +228 -0
  34. package/src/providers/aery-native-server.ts +212 -0
  35. package/src/providers/amazon-bedrock.ts +873 -0
  36. package/src/providers/anthropic-client.ts +318 -0
  37. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  38. package/src/providers/anthropic-messages-server.ts +683 -0
  39. package/src/providers/anthropic-wire.ts +268 -0
  40. package/src/providers/anthropic.ts +3094 -0
  41. package/src/providers/aws-credentials.ts +501 -0
  42. package/src/providers/aws-eventstream.ts +185 -0
  43. package/src/providers/aws-sigv4.ts +218 -0
  44. package/src/providers/azure-openai-responses.ts +361 -0
  45. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  46. package/src/providers/cursor/proto/agent.proto +3526 -0
  47. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  48. package/src/providers/cursor/proto/buf.yaml +17 -0
  49. package/src/providers/cursor.ts +2621 -0
  50. package/src/providers/error-message.ts +21 -0
  51. package/src/providers/github-copilot-headers.ts +140 -0
  52. package/src/providers/gitlab-duo.ts +372 -0
  53. package/src/providers/google-auth.ts +252 -0
  54. package/src/providers/google-gemini-cli.ts +809 -0
  55. package/src/providers/google-gemini-headers.ts +41 -0
  56. package/src/providers/google-shared.ts +917 -0
  57. package/src/providers/google-types.ts +167 -0
  58. package/src/providers/google-vertex.ts +91 -0
  59. package/src/providers/google.ts +41 -0
  60. package/src/providers/grammar.ts +70 -0
  61. package/src/providers/kimi.ts +52 -0
  62. package/src/providers/mock.ts +496 -0
  63. package/src/providers/ollama.ts +644 -0
  64. package/src/providers/openai-anthropic-shim.ts +138 -0
  65. package/src/providers/openai-chat-server-schema.ts +252 -0
  66. package/src/providers/openai-chat-server.ts +647 -0
  67. package/src/providers/openai-codex/constants.ts +43 -0
  68. package/src/providers/openai-codex/request-transformer.ts +161 -0
  69. package/src/providers/openai-codex/response-handler.ts +81 -0
  70. package/src/providers/openai-codex-responses.ts +3018 -0
  71. package/src/providers/openai-completions-compat.ts +300 -0
  72. package/src/providers/openai-completions.ts +1979 -0
  73. package/src/providers/openai-responses-server-schema.ts +290 -0
  74. package/src/providers/openai-responses-server.ts +1183 -0
  75. package/src/providers/openai-responses-shared.ts +873 -0
  76. package/src/providers/openai-responses.ts +679 -0
  77. package/src/providers/register-builtins.ts +436 -0
  78. package/src/providers/synthetic.ts +50 -0
  79. package/src/providers/transform-messages.ts +382 -0
  80. package/src/providers/vision-guard.ts +31 -0
  81. package/src/providers/xai-responses.ts +82 -0
  82. package/src/rate-limit-utils.ts +84 -0
  83. package/src/stream.ts +1065 -0
  84. package/src/types.ts +944 -0
  85. package/src/usage/claude.ts +482 -0
  86. package/src/usage/gemini.ts +250 -0
  87. package/src/usage/github-copilot.ts +421 -0
  88. package/src/usage/google-antigravity.ts +201 -0
  89. package/src/usage/kimi.ts +271 -0
  90. package/src/usage/minimax-code.ts +31 -0
  91. package/src/usage/openai-codex.ts +503 -0
  92. package/src/usage/shared.ts +10 -0
  93. package/src/usage/zai.ts +247 -0
  94. package/src/usage.ts +185 -0
  95. package/src/utils/abort.ts +51 -0
  96. package/src/utils/abortable-iterator.ts +69 -0
  97. package/src/utils/anthropic-auth.ts +93 -0
  98. package/src/utils/discovery/antigravity.ts +261 -0
  99. package/src/utils/discovery/codex.ts +371 -0
  100. package/src/utils/discovery/cursor.ts +306 -0
  101. package/src/utils/discovery/gemini.ts +248 -0
  102. package/src/utils/discovery/index.ts +4 -0
  103. package/src/utils/discovery/openai-compatible.ts +224 -0
  104. package/src/utils/event-stream.ts +142 -0
  105. package/src/utils/fireworks-model-id.ts +30 -0
  106. package/src/utils/foundry.ts +8 -0
  107. package/src/utils/http-inspector.ts +176 -0
  108. package/src/utils/idle-iterator.ts +267 -0
  109. package/src/utils/json-parse.ts +182 -0
  110. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  111. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  112. package/src/utils/oauth/anthropic.ts +273 -0
  113. package/src/utils/oauth/api-key-login.ts +87 -0
  114. package/src/utils/oauth/api-key-validation.ts +92 -0
  115. package/src/utils/oauth/callback-server.ts +276 -0
  116. package/src/utils/oauth/cerebras.ts +16 -0
  117. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  118. package/src/utils/oauth/cursor.ts +157 -0
  119. package/src/utils/oauth/deepseek.ts +53 -0
  120. package/src/utils/oauth/firepass.ts +24 -0
  121. package/src/utils/oauth/fireworks.ts +15 -0
  122. package/src/utils/oauth/github-copilot.ts +362 -0
  123. package/src/utils/oauth/gitlab-duo.ts +123 -0
  124. package/src/utils/oauth/google-antigravity.ts +200 -0
  125. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  126. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  127. package/src/utils/oauth/huggingface.ts +62 -0
  128. package/src/utils/oauth/index.ts +484 -0
  129. package/src/utils/oauth/kagi.ts +47 -0
  130. package/src/utils/oauth/kilo.ts +87 -0
  131. package/src/utils/oauth/kimi.ts +254 -0
  132. package/src/utils/oauth/litellm.ts +47 -0
  133. package/src/utils/oauth/lm-studio.ts +38 -0
  134. package/src/utils/oauth/minimax-code.ts +78 -0
  135. package/src/utils/oauth/moonshot.ts +23 -0
  136. package/src/utils/oauth/nanogpt.ts +15 -0
  137. package/src/utils/oauth/nvidia.ts +70 -0
  138. package/src/utils/oauth/oauth.html +203 -0
  139. package/src/utils/oauth/ollama-cloud.ts +28 -0
  140. package/src/utils/oauth/ollama.ts +47 -0
  141. package/src/utils/oauth/openai-codex.ts +299 -0
  142. package/src/utils/oauth/opencode.ts +49 -0
  143. package/src/utils/oauth/openrouter.ts +20 -0
  144. package/src/utils/oauth/parallel.ts +46 -0
  145. package/src/utils/oauth/perplexity.ts +206 -0
  146. package/src/utils/oauth/pkce.ts +18 -0
  147. package/src/utils/oauth/qianfan.ts +58 -0
  148. package/src/utils/oauth/qwen-portal.ts +60 -0
  149. package/src/utils/oauth/synthetic.ts +15 -0
  150. package/src/utils/oauth/tavily.ts +46 -0
  151. package/src/utils/oauth/together.ts +16 -0
  152. package/src/utils/oauth/types.ts +99 -0
  153. package/src/utils/oauth/venice.ts +59 -0
  154. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  155. package/src/utils/oauth/vllm.ts +40 -0
  156. package/src/utils/oauth/wafer.ts +50 -0
  157. package/src/utils/oauth/xai-oauth.ts +342 -0
  158. package/src/utils/oauth/xiaomi.ts +139 -0
  159. package/src/utils/oauth/zai.ts +60 -0
  160. package/src/utils/oauth/zenmux.ts +15 -0
  161. package/src/utils/oauth/zhipu.ts +60 -0
  162. package/src/utils/overflow.ts +137 -0
  163. package/src/utils/parse-bind.ts +54 -0
  164. package/src/utils/provider-response.ts +30 -0
  165. package/src/utils/request-debug.ts +336 -0
  166. package/src/utils/retry-after.ts +110 -0
  167. package/src/utils/retry.ts +54 -0
  168. package/src/utils/schema/CONSTRAINTS.md +164 -0
  169. package/src/utils/schema/adapt.ts +36 -0
  170. package/src/utils/schema/compatibility.ts +435 -0
  171. package/src/utils/schema/dereference.ts +98 -0
  172. package/src/utils/schema/draft.ts +341 -0
  173. package/src/utils/schema/equality.ts +97 -0
  174. package/src/utils/schema/fields.ts +191 -0
  175. package/src/utils/schema/index.ts +13 -0
  176. package/src/utils/schema/json-schema-validator.ts +577 -0
  177. package/src/utils/schema/meta-validator.ts +167 -0
  178. package/src/utils/schema/normalize.ts +1588 -0
  179. package/src/utils/schema/spill.ts +43 -0
  180. package/src/utils/schema/stamps.ts +97 -0
  181. package/src/utils/schema/types.ts +10 -0
  182. package/src/utils/schema/wire.ts +293 -0
  183. package/src/utils/schema/zod-decontaminate.ts +331 -0
  184. package/src/utils/sdk-stream-timeout.ts +43 -0
  185. package/src/utils/sse-debug.ts +289 -0
  186. package/src/utils/stream-markup-healing.ts +612 -0
  187. package/src/utils/tool-choice.ts +99 -0
  188. package/src/utils/validation.ts +1024 -0
  189. package/src/utils.ts +166 -0
  190. package/dist/api-registry.d.ts +0 -20
  191. package/dist/api-registry.d.ts.map +0 -1
  192. package/dist/api-registry.js +0 -44
  193. package/dist/api-registry.js.map +0 -1
  194. package/dist/bedrock-provider.d.ts +0 -5
  195. package/dist/bedrock-provider.d.ts.map +0 -1
  196. package/dist/bedrock-provider.js +0 -6
  197. package/dist/bedrock-provider.js.map +0 -1
  198. package/dist/cli.d.ts +0 -3
  199. package/dist/cli.d.ts.map +0 -1
  200. package/dist/cli.js +0 -130
  201. package/dist/cli.js.map +0 -1
  202. package/dist/env-api-keys.d.ts +0 -18
  203. package/dist/env-api-keys.d.ts.map +0 -1
  204. package/dist/env-api-keys.js +0 -178
  205. package/dist/env-api-keys.js.map +0 -1
  206. package/dist/image-models.d.ts +0 -10
  207. package/dist/image-models.d.ts.map +0 -1
  208. package/dist/image-models.generated.d.ts +0 -440
  209. package/dist/image-models.generated.d.ts.map +0 -1
  210. package/dist/image-models.generated.js +0 -442
  211. package/dist/image-models.generated.js.map +0 -1
  212. package/dist/image-models.js +0 -23
  213. package/dist/image-models.js.map +0 -1
  214. package/dist/images-api-registry.d.ts +0 -14
  215. package/dist/images-api-registry.d.ts.map +0 -1
  216. package/dist/images-api-registry.js +0 -22
  217. package/dist/images-api-registry.js.map +0 -1
  218. package/dist/images.d.ts +0 -4
  219. package/dist/images.d.ts.map +0 -1
  220. package/dist/images.js +0 -14
  221. package/dist/images.js.map +0 -1
  222. package/dist/index.d.ts +0 -32
  223. package/dist/index.d.ts.map +0 -1
  224. package/dist/index.js +0 -20
  225. package/dist/index.js.map +0 -1
  226. package/dist/models.d.ts +0 -18
  227. package/dist/models.d.ts.map +0 -1
  228. package/dist/models.generated.d.ts +0 -17707
  229. package/dist/models.generated.d.ts.map +0 -1
  230. package/dist/models.generated.js +0 -16561
  231. package/dist/models.generated.js.map +0 -1
  232. package/dist/models.js +0 -71
  233. package/dist/models.js.map +0 -1
  234. package/dist/oauth.d.ts +0 -2
  235. package/dist/oauth.d.ts.map +0 -1
  236. package/dist/oauth.js +0 -2
  237. package/dist/oauth.js.map +0 -1
  238. package/dist/providers/aery-error-formatting.d.ts +0 -13
  239. package/dist/providers/aery-error-formatting.d.ts.map +0 -1
  240. package/dist/providers/aery-error-formatting.js +0 -112
  241. package/dist/providers/aery-error-formatting.js.map +0 -1
  242. package/dist/providers/amazon-bedrock.d.ts +0 -38
  243. package/dist/providers/amazon-bedrock.d.ts.map +0 -1
  244. package/dist/providers/amazon-bedrock.js +0 -763
  245. package/dist/providers/amazon-bedrock.js.map +0 -1
  246. package/dist/providers/anthropic.d.ts +0 -71
  247. package/dist/providers/anthropic.d.ts.map +0 -1
  248. package/dist/providers/anthropic.js +0 -949
  249. package/dist/providers/anthropic.js.map +0 -1
  250. package/dist/providers/azure-openai-responses.d.ts +0 -15
  251. package/dist/providers/azure-openai-responses.d.ts.map +0 -1
  252. package/dist/providers/azure-openai-responses.js +0 -225
  253. package/dist/providers/azure-openai-responses.js.map +0 -1
  254. package/dist/providers/cloudflare.d.ts +0 -13
  255. package/dist/providers/cloudflare.d.ts.map +0 -1
  256. package/dist/providers/cloudflare.js +0 -26
  257. package/dist/providers/cloudflare.js.map +0 -1
  258. package/dist/providers/faux.d.ts +0 -56
  259. package/dist/providers/faux.d.ts.map +0 -1
  260. package/dist/providers/faux.js +0 -368
  261. package/dist/providers/faux.js.map +0 -1
  262. package/dist/providers/github-copilot-headers.d.ts +0 -8
  263. package/dist/providers/github-copilot-headers.d.ts.map +0 -1
  264. package/dist/providers/github-copilot-headers.js +0 -29
  265. package/dist/providers/github-copilot-headers.js.map +0 -1
  266. package/dist/providers/google-gemini-cli.d.ts +0 -74
  267. package/dist/providers/google-gemini-cli.d.ts.map +0 -1
  268. package/dist/providers/google-gemini-cli.js +0 -779
  269. package/dist/providers/google-gemini-cli.js.map +0 -1
  270. package/dist/providers/google-shared.d.ts +0 -70
  271. package/dist/providers/google-shared.d.ts.map +0 -1
  272. package/dist/providers/google-shared.js +0 -329
  273. package/dist/providers/google-shared.js.map +0 -1
  274. package/dist/providers/google-vertex.d.ts +0 -15
  275. package/dist/providers/google-vertex.d.ts.map +0 -1
  276. package/dist/providers/google-vertex.js +0 -442
  277. package/dist/providers/google-vertex.js.map +0 -1
  278. package/dist/providers/google.d.ts +0 -13
  279. package/dist/providers/google.d.ts.map +0 -1
  280. package/dist/providers/google.js +0 -400
  281. package/dist/providers/google.js.map +0 -1
  282. package/dist/providers/images/openrouter.d.ts +0 -3
  283. package/dist/providers/images/openrouter.d.ts.map +0 -1
  284. package/dist/providers/images/openrouter.js +0 -129
  285. package/dist/providers/images/openrouter.js.map +0 -1
  286. package/dist/providers/images/register-builtins.d.ts +0 -4
  287. package/dist/providers/images/register-builtins.d.ts.map +0 -1
  288. package/dist/providers/images/register-builtins.js +0 -34
  289. package/dist/providers/images/register-builtins.js.map +0 -1
  290. package/dist/providers/mistral.d.ts +0 -25
  291. package/dist/providers/mistral.d.ts.map +0 -1
  292. package/dist/providers/mistral.js +0 -535
  293. package/dist/providers/mistral.js.map +0 -1
  294. package/dist/providers/openai-codex-responses.d.ts +0 -30
  295. package/dist/providers/openai-codex-responses.d.ts.map +0 -1
  296. package/dist/providers/openai-codex-responses.js +0 -1090
  297. package/dist/providers/openai-codex-responses.js.map +0 -1
  298. package/dist/providers/openai-completions.d.ts +0 -19
  299. package/dist/providers/openai-completions.d.ts.map +0 -1
  300. package/dist/providers/openai-completions.js +0 -950
  301. package/dist/providers/openai-completions.js.map +0 -1
  302. package/dist/providers/openai-prompt-cache.d.ts +0 -3
  303. package/dist/providers/openai-prompt-cache.d.ts.map +0 -1
  304. package/dist/providers/openai-prompt-cache.js +0 -10
  305. package/dist/providers/openai-prompt-cache.js.map +0 -1
  306. package/dist/providers/openai-responses-shared.d.ts +0 -18
  307. package/dist/providers/openai-responses-shared.d.ts.map +0 -1
  308. package/dist/providers/openai-responses-shared.js +0 -492
  309. package/dist/providers/openai-responses-shared.js.map +0 -1
  310. package/dist/providers/openai-responses.d.ts +0 -13
  311. package/dist/providers/openai-responses.d.ts.map +0 -1
  312. package/dist/providers/openai-responses.js +0 -237
  313. package/dist/providers/openai-responses.js.map +0 -1
  314. package/dist/providers/register-builtins.d.ts +0 -38
  315. package/dist/providers/register-builtins.d.ts.map +0 -1
  316. package/dist/providers/register-builtins.js +0 -278
  317. package/dist/providers/register-builtins.js.map +0 -1
  318. package/dist/providers/simple-options.d.ts +0 -8
  319. package/dist/providers/simple-options.d.ts.map +0 -1
  320. package/dist/providers/simple-options.js +0 -41
  321. package/dist/providers/simple-options.js.map +0 -1
  322. package/dist/providers/transform-messages.d.ts +0 -8
  323. package/dist/providers/transform-messages.d.ts.map +0 -1
  324. package/dist/providers/transform-messages.js +0 -184
  325. package/dist/providers/transform-messages.js.map +0 -1
  326. package/dist/session-resources.d.ts +0 -4
  327. package/dist/session-resources.d.ts.map +0 -1
  328. package/dist/session-resources.js +0 -22
  329. package/dist/session-resources.js.map +0 -1
  330. package/dist/stream.d.ts +0 -8
  331. package/dist/stream.d.ts.map +0 -1
  332. package/dist/stream.js +0 -27
  333. package/dist/stream.js.map +0 -1
  334. package/dist/types.d.ts +0 -498
  335. package/dist/types.d.ts.map +0 -1
  336. package/dist/types.js +0 -2
  337. package/dist/types.js.map +0 -1
  338. package/dist/utils/diagnostics.d.ts +0 -19
  339. package/dist/utils/diagnostics.d.ts.map +0 -1
  340. package/dist/utils/diagnostics.js +0 -25
  341. package/dist/utils/diagnostics.js.map +0 -1
  342. package/dist/utils/event-stream.d.ts +0 -21
  343. package/dist/utils/event-stream.d.ts.map +0 -1
  344. package/dist/utils/event-stream.js +0 -81
  345. package/dist/utils/event-stream.js.map +0 -1
  346. package/dist/utils/hash.d.ts +0 -3
  347. package/dist/utils/hash.d.ts.map +0 -1
  348. package/dist/utils/hash.js +0 -14
  349. package/dist/utils/hash.js.map +0 -1
  350. package/dist/utils/headers.d.ts +0 -2
  351. package/dist/utils/headers.d.ts.map +0 -1
  352. package/dist/utils/headers.js +0 -8
  353. package/dist/utils/headers.js.map +0 -1
  354. package/dist/utils/json-parse.d.ts +0 -16
  355. package/dist/utils/json-parse.d.ts.map +0 -1
  356. package/dist/utils/json-parse.js +0 -113
  357. package/dist/utils/json-parse.js.map +0 -1
  358. package/dist/utils/node-http-proxy.d.ts +0 -10
  359. package/dist/utils/node-http-proxy.d.ts.map +0 -1
  360. package/dist/utils/node-http-proxy.js +0 -97
  361. package/dist/utils/node-http-proxy.js.map +0 -1
  362. package/dist/utils/oauth/anthropic.d.ts +0 -25
  363. package/dist/utils/oauth/anthropic.d.ts.map +0 -1
  364. package/dist/utils/oauth/anthropic.js +0 -335
  365. package/dist/utils/oauth/anthropic.js.map +0 -1
  366. package/dist/utils/oauth/device-code.d.ts +0 -19
  367. package/dist/utils/oauth/device-code.d.ts.map +0 -1
  368. package/dist/utils/oauth/device-code.js +0 -55
  369. package/dist/utils/oauth/device-code.js.map +0 -1
  370. package/dist/utils/oauth/github-copilot.d.ts +0 -30
  371. package/dist/utils/oauth/github-copilot.d.ts.map +0 -1
  372. package/dist/utils/oauth/github-copilot.js +0 -268
  373. package/dist/utils/oauth/github-copilot.js.map +0 -1
  374. package/dist/utils/oauth/google-antigravity.d.ts +0 -26
  375. package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
  376. package/dist/utils/oauth/google-antigravity.js +0 -377
  377. package/dist/utils/oauth/google-antigravity.js.map +0 -1
  378. package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
  379. package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
  380. package/dist/utils/oauth/google-gemini-cli.js +0 -482
  381. package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
  382. package/dist/utils/oauth/index.d.ts +0 -63
  383. package/dist/utils/oauth/index.d.ts.map +0 -1
  384. package/dist/utils/oauth/index.js +0 -131
  385. package/dist/utils/oauth/index.js.map +0 -1
  386. package/dist/utils/oauth/oauth-page.d.ts +0 -3
  387. package/dist/utils/oauth/oauth-page.d.ts.map +0 -1
  388. package/dist/utils/oauth/oauth-page.js +0 -105
  389. package/dist/utils/oauth/oauth-page.js.map +0 -1
  390. package/dist/utils/oauth/openai-codex.d.ts +0 -34
  391. package/dist/utils/oauth/openai-codex.d.ts.map +0 -1
  392. package/dist/utils/oauth/openai-codex.js +0 -385
  393. package/dist/utils/oauth/openai-codex.js.map +0 -1
  394. package/dist/utils/oauth/pkce.d.ts +0 -13
  395. package/dist/utils/oauth/pkce.d.ts.map +0 -1
  396. package/dist/utils/oauth/pkce.js +0 -31
  397. package/dist/utils/oauth/pkce.js.map +0 -1
  398. package/dist/utils/oauth/types.d.ts +0 -64
  399. package/dist/utils/oauth/types.d.ts.map +0 -1
  400. package/dist/utils/oauth/types.js +0 -2
  401. package/dist/utils/oauth/types.js.map +0 -1
  402. package/dist/utils/overflow.d.ts +0 -56
  403. package/dist/utils/overflow.d.ts.map +0 -1
  404. package/dist/utils/overflow.js +0 -151
  405. package/dist/utils/overflow.js.map +0 -1
  406. package/dist/utils/sanitize-unicode.d.ts +0 -22
  407. package/dist/utils/sanitize-unicode.d.ts.map +0 -1
  408. package/dist/utils/sanitize-unicode.js +0 -26
  409. package/dist/utils/sanitize-unicode.js.map +0 -1
  410. package/dist/utils/typebox-helpers.d.ts +0 -17
  411. package/dist/utils/typebox-helpers.d.ts.map +0 -1
  412. package/dist/utils/typebox-helpers.js +0 -21
  413. package/dist/utils/typebox-helpers.js.map +0 -1
  414. package/dist/utils/validation.d.ts +0 -18
  415. package/dist/utils/validation.d.ts.map +0 -1
  416. package/dist/utils/validation.js +0 -281
  417. package/dist/utils/validation.js.map +0 -1
@@ -0,0 +1,3094 @@
1
+ import * as nodeCrypto from "node:crypto";
2
+ import * as fs from "node:fs";
3
+ import { scheduler } from "node:timers/promises";
4
+ import * as tls from "node:tls";
5
+ import {
6
+ $env,
7
+ extractHttpStatusFromError,
8
+ getInstallId,
9
+ isEnoent,
10
+ isRetryableError,
11
+ isUnexpectedSocketCloseMessage,
12
+ logger,
13
+ readSseEvents,
14
+ } from "@aryee337/aery-utils";
15
+ import {
16
+ disablesParallelToolUse,
17
+ hasOpus47ApiRestrictions,
18
+ mapEffortToAnthropicAdaptiveEffort,
19
+ supportsMidConversationSystemMessages,
20
+ } from "../model-thinking";
21
+ import { calculateCost } from "../models";
22
+ import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
23
+ import type {
24
+ Api,
25
+ AssistantMessage,
26
+ CacheRetention,
27
+ Context,
28
+ FetchImpl,
29
+ ImageContent,
30
+ Message,
31
+ Model,
32
+ ProviderSessionState,
33
+ RedactedThinkingContent,
34
+ ServiceTier,
35
+ SimpleStreamOptions,
36
+ StopReason,
37
+ StreamFunction,
38
+ StreamOptions,
39
+ TextContent,
40
+ ThinkingContent,
41
+ Tool,
42
+ ToolCall,
43
+ ToolResultMessage,
44
+ Usage,
45
+ } from "../types";
46
+ import { resolveServiceTier } from "../types";
47
+ import {
48
+ isAnthropicOAuthToken,
49
+ isRecord,
50
+ normalizeSystemPrompts,
51
+ normalizeToolCallId,
52
+ resolveCacheRetention,
53
+ } from "../utils";
54
+ import { createAbortSourceTracker } from "../utils/abort";
55
+ import { AssistantMessageEventStream } from "../utils/event-stream";
56
+ import { isFoundryEnabled } from "../utils/foundry";
57
+ import { finalizeErrorMessage, type RawHttpRequestDump, rewriteCopilotError } from "../utils/http-inspector";
58
+ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTimeout } from "../utils/idle-iterator";
59
+ import { parseJsonWithRepair, parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
60
+ import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
61
+ import { notifyProviderResponse } from "../utils/provider-response";
62
+ import { isCopilotTransientModelError } from "../utils/retry";
63
+ import { COMBINATOR_KEYS, NO_STRICT, toolWireSchema } from "../utils/schema";
64
+ import { spillToDescription } from "../utils/schema/spill";
65
+ import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
66
+ import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
67
+ import {
68
+ AnthropicConnectionTimeoutError,
69
+ type AnthropicFetchOptions,
70
+ AnthropicMessagesClient,
71
+ type AnthropicMessagesClientLike,
72
+ } from "./anthropic-client";
73
+ import type {
74
+ ToolInputSchema as AnthropicToolInputSchema,
75
+ Tool as AnthropicWireTool,
76
+ ContentBlockParam,
77
+ MessageCreateParamsStreaming,
78
+ MessageParam,
79
+ RawMessageStreamEvent,
80
+ TextBlockParam,
81
+ } from "./anthropic-wire";
82
+ import {
83
+ buildCopilotDynamicHeaders,
84
+ hasCopilotVisionInput,
85
+ resolveGitHubCopilotBaseUrl,
86
+ } from "./github-copilot-headers";
87
+ import { transformMessages } from "./transform-messages";
88
+ import { NON_VISION_IMAGE_PLACEHOLDER } from "./vision-guard";
89
+
90
+ export type AnthropicHeaderOptions = {
91
+ apiKey: string;
92
+ baseUrl?: string;
93
+ isOAuth?: boolean;
94
+ extraBetas?: string[];
95
+ stream?: boolean;
96
+ modelHeaders?: Record<string, string>;
97
+ isCloudflareAiGateway?: boolean;
98
+ claudeCodeSessionId?: string;
99
+ claudeCodeBetas?: readonly string[];
100
+ };
101
+
102
+ export function normalizeAnthropicBaseUrl(baseUrl?: string): string | undefined {
103
+ const trimmed = baseUrl?.trim();
104
+ if (!trimmed) {
105
+ return undefined;
106
+ }
107
+ const withoutTrailingSlashes = trimmed.replace(/\/+$/, "");
108
+ return withoutTrailingSlashes.endsWith("/v1") ? withoutTrailingSlashes.slice(0, -3) : withoutTrailingSlashes;
109
+ }
110
+
111
+ // Build deduplicated beta header string
112
+ export function buildBetaHeader(baseBetas: readonly string[], extraBetas: readonly string[]): string {
113
+ const seen = new Set<string>();
114
+ const result: string[] = [];
115
+ for (const beta of [...baseBetas, ...extraBetas]) {
116
+ const trimmed = beta.trim();
117
+ if (trimmed && !seen.has(trimmed)) {
118
+ seen.add(trimmed);
119
+ result.push(trimmed);
120
+ }
121
+ }
122
+ return result.join(",");
123
+ }
124
+
125
+ const claudeCodeUtilityBetaDefaults = [
126
+ "oauth-2025-04-20",
127
+ "interleaved-thinking-2025-05-14",
128
+ "context-management-2025-06-27",
129
+ "prompt-caching-scope-2026-01-05",
130
+ "structured-outputs-2025-12-15",
131
+ ] as const;
132
+ const claudeCodeAgentBetaDefaults = [
133
+ "claude-code-20250219",
134
+ "oauth-2025-04-20",
135
+ "context-1m-2025-08-07",
136
+ "interleaved-thinking-2025-05-14",
137
+ "context-management-2025-06-27",
138
+ "prompt-caching-scope-2026-01-05",
139
+ "mid-conversation-system-2026-04-07",
140
+ "advanced-tool-use-2025-11-20",
141
+ ] as const;
142
+ const claudeCodeAgentPostEffortBetas = ["extended-cache-ttl-2025-04-11"] as const;
143
+ const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
144
+ const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
145
+ // Asks the API to redact thinking blocks from responses. Only sent when the
146
+ // caller explicitly hides thinking (`thinkingDisplay: "omitted"`); sending it
147
+ // by default suppresses the thinking traces callers expect to stream.
148
+ const redactThinkingBeta = "redact-thinking-2026-02-12";
149
+ const fastModeBeta = "fast-mode-2026-02-01";
150
+ const taskBudgetBeta = "task-budgets-2026-03-13";
151
+ const effortBeta = "effort-2025-11-24";
152
+
153
+ function buildClaudeCodeBetas(
154
+ agentRequest: boolean,
155
+ thinkingRequest: boolean,
156
+ redactThinking: boolean,
157
+ ): readonly string[] {
158
+ if (!agentRequest && !redactThinking) return claudeCodeUtilityBetaDefaults;
159
+ const betas: string[] = [];
160
+ for (const beta of agentRequest ? claudeCodeAgentBetaDefaults : claudeCodeUtilityBetaDefaults) {
161
+ betas.push(beta);
162
+ // Match CC's header order: redact-thinking immediately follows interleaved-thinking.
163
+ if (redactThinking && beta === interleavedThinkingBeta) betas.push(redactThinkingBeta);
164
+ }
165
+ if (!agentRequest) return betas;
166
+ if (thinkingRequest) betas.push(effortBeta);
167
+ betas.push(...claudeCodeAgentPostEffortBetas);
168
+ return betas;
169
+ }
170
+
171
+ function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
172
+ if (!headers) return undefined;
173
+ const normalizedName = headerName.toLowerCase();
174
+ for (const [key, value] of Object.entries(headers)) {
175
+ if (key.toLowerCase() === normalizedName) return value;
176
+ }
177
+ return undefined;
178
+ }
179
+
180
+ function isClaudeCodeClientUserAgent(userAgent: string | undefined): userAgent is string {
181
+ if (!userAgent) return false;
182
+ return userAgent.toLowerCase().startsWith("claude-cli");
183
+ }
184
+
185
+ export function isAnthropicApiBaseUrl(baseUrl?: string): boolean {
186
+ if (!baseUrl) return true;
187
+ try {
188
+ const url = new URL(baseUrl);
189
+ return url.protocol.toLowerCase() === "https:" && url.hostname.toLowerCase() === "api.anthropic.com";
190
+ } catch {
191
+ return false;
192
+ }
193
+ }
194
+
195
+ const sharedHeaders = {
196
+ "Accept-Encoding": "gzip, deflate, br, zstd",
197
+ Connection: "keep-alive",
198
+ "Content-Type": "application/json",
199
+ "Anthropic-Version": "2023-06-01",
200
+ "Anthropic-Dangerous-Direct-Browser-Access": "true",
201
+ "X-App": "cli",
202
+ };
203
+
204
+ export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<string, string> {
205
+ const oauthToken = options.isOAuth ?? isAnthropicOAuthToken(options.apiKey);
206
+ const extraBetas = options.extraBetas ?? [];
207
+ const stream = options.stream ?? false;
208
+ const betaHeader = buildBetaHeader(options.claudeCodeBetas ?? buildClaudeCodeBetas(true, true, false), extraBetas);
209
+ const acceptHeader = oauthToken ? "application/json" : stream ? "text/event-stream" : "application/json";
210
+ const modelHeaders = Object.fromEntries(
211
+ Object.entries(options.modelHeaders ?? {}).filter(([key]) => !enforcedHeaderKeys.has(key.toLowerCase())),
212
+ );
213
+
214
+ if (options.isCloudflareAiGateway) {
215
+ return {
216
+ ...modelHeaders,
217
+ Accept: acceptHeader,
218
+ ...sharedHeaders,
219
+ "Anthropic-Beta": betaHeader,
220
+ "cf-aig-authorization": `Bearer ${options.apiKey}`,
221
+ };
222
+ }
223
+
224
+ if (oauthToken) {
225
+ const incomingUserAgent = getHeaderCaseInsensitive(options.modelHeaders, "User-Agent");
226
+ const userAgent = isClaudeCodeClientUserAgent(incomingUserAgent)
227
+ ? incomingUserAgent
228
+ : `claude-cli/${claudeCodeVersion} (external, cli)`;
229
+ return {
230
+ ...modelHeaders,
231
+ ...claudeCodeHeaders,
232
+ Accept: acceptHeader,
233
+ Authorization: `Bearer ${options.apiKey}`,
234
+ ...sharedHeaders,
235
+ "Anthropic-Beta": betaHeader,
236
+ ...(options.claudeCodeSessionId ? { "X-Claude-Code-Session-Id": options.claudeCodeSessionId } : {}),
237
+ "x-client-request-id": nodeCrypto.randomUUID(),
238
+ "User-Agent": userAgent,
239
+ };
240
+ } else if (!isAnthropicApiBaseUrl(options.baseUrl)) {
241
+ return {
242
+ ...modelHeaders,
243
+ Accept: acceptHeader,
244
+ Authorization: `Bearer ${options.apiKey}`,
245
+ ...sharedHeaders,
246
+ "Anthropic-Beta": betaHeader,
247
+ };
248
+ } else {
249
+ return {
250
+ ...modelHeaders,
251
+ Accept: acceptHeader,
252
+ ...sharedHeaders,
253
+ "Anthropic-Beta": betaHeader,
254
+ "X-Api-Key": options.apiKey,
255
+ };
256
+ }
257
+ }
258
+
259
+ type AnthropicCacheControl = NonNullable<TextBlockParam["cache_control"]>;
260
+
261
+ type AnthropicOutputConfig = NonNullable<MessageCreateParamsStreaming["output_config"]>;
262
+
263
+ function getAnthropicOutputConfig(params: MessageCreateParamsStreaming): AnthropicOutputConfig {
264
+ const outputConfig = params.output_config ?? {};
265
+ params.output_config = outputConfig;
266
+ return outputConfig;
267
+ }
268
+
269
+ const ANTHROPIC_STOP_SEQUENCES_MAX = 4;
270
+ let warnedStopSequencesTrim = false;
271
+
272
+ /**
273
+ * Adaptive thinking `display` is supported starting with Claude Opus 4.7.
274
+ * Older adaptive-thinking models (Opus 4.6, Sonnet 4.6+) reject the field.
275
+ */
276
+ function supportsAdaptiveThinkingDisplay(modelId: string): boolean {
277
+ const match = /claude-opus-(\d+)-(\d+)/.exec(modelId);
278
+ if (!match) return false;
279
+ const major = Number(match[1]);
280
+ const minor = Number(match[2]);
281
+ return major > 4 || (major === 4 && minor >= 7);
282
+ }
283
+
284
+ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
285
+
286
+ type AnthropicProviderSessionState = ProviderSessionState & {
287
+ strictToolsDisabled: boolean;
288
+ fastModeDisabled: boolean;
289
+ };
290
+
291
+ function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
292
+ const state: AnthropicProviderSessionState = {
293
+ strictToolsDisabled: false,
294
+ fastModeDisabled: false,
295
+ close: () => {
296
+ state.strictToolsDisabled = false;
297
+ state.fastModeDisabled = false;
298
+ },
299
+ };
300
+ return state;
301
+ }
302
+
303
+ function getAnthropicProviderSessionState(
304
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
305
+ ): AnthropicProviderSessionState | undefined {
306
+ if (!providerSessionState) return undefined;
307
+ const existing = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
308
+ | AnthropicProviderSessionState
309
+ | undefined;
310
+ if (existing) return existing;
311
+ const created = createAnthropicProviderSessionState();
312
+ providerSessionState.set(ANTHROPIC_PROVIDER_SESSION_STATE_KEY, created);
313
+ return created;
314
+ }
315
+
316
+ /**
317
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
318
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
319
+ * `/fast on` after a previous turn auto-disabled it) so the next request
320
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
321
+ * hasn't been materialized yet.
322
+ */
323
+ export function clearAnthropicFastModeFallback(
324
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
325
+ ): void {
326
+ if (!providerSessionState) return;
327
+ const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
328
+ | AnthropicProviderSessionState
329
+ | undefined;
330
+ if (state) state.fastModeDisabled = false;
331
+ }
332
+
333
+ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
334
+ if (extractHttpStatusFromError(error) !== 400) return false;
335
+ const message = error instanceof Error ? error.message : String(error);
336
+ const isStrictGrammarTooLarge = /compiled grammar/i.test(message) && /too large/i.test(message);
337
+ const isSchemaCompilationTooComplex =
338
+ /schema/i.test(message) && /too complex/i.test(message) && /compil/i.test(message);
339
+ return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
340
+ }
341
+
342
+ export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
343
+ const status = extractHttpStatusFromError(error);
344
+ if (status !== 400 && status !== 429) return false;
345
+ const message = error instanceof Error ? error.message : String(error);
346
+ // 400 invalid_request_error — model doesn't accept `speed` at all.
347
+ // Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
348
+ // Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
349
+ if (
350
+ status === 400 &&
351
+ /invalid_request_error/i.test(message) &&
352
+ /\bspeed\b/i.test(message) &&
353
+ /not support/i.test(message)
354
+ ) {
355
+ return true;
356
+ }
357
+ // 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
358
+ // Observed: "Extra usage is required for fast mode."
359
+ if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
360
+ return true;
361
+ }
362
+ return false;
363
+ }
364
+
365
+ function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
366
+ return params.tools?.some(tool => tool.strict === true) ?? false;
367
+ }
368
+
369
+ function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
370
+ delete params.speed;
371
+ }
372
+
373
+ function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
374
+ if (!params.tools) return;
375
+ for (const tool of params.tools) {
376
+ delete tool.strict;
377
+ }
378
+ }
379
+
380
+ function getCacheControl(
381
+ model: Model<"anthropic-messages">,
382
+ baseUrl: string,
383
+ cacheRetention: CacheRetention | undefined,
384
+ isOAuthToken: boolean,
385
+ ): { retention: CacheRetention; cacheControl?: AnthropicCacheControl } {
386
+ const retention = cacheRetention ?? (isOAuthToken ? "long" : resolveCacheRetention(undefined));
387
+ if (retention === "none") {
388
+ return { retention };
389
+ }
390
+ const ttl =
391
+ retention === "long" && isAnthropicApiBaseUrl(baseUrl) && getAnthropicCompat(model).supportsLongCacheRetention
392
+ ? "1h"
393
+ : undefined;
394
+ return {
395
+ retention,
396
+ cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
397
+ };
398
+ }
399
+
400
+ // Stealth mode: mimic Claude Code's request fingerprint.
401
+ export const claudeCodeVersion = "2.1.160";
402
+ export const claudeToolPrefix: string = "proxy_";
403
+ export const claudeCodeSystemInstruction = "You are Claude Code, Anthropic's official CLI for Claude.";
404
+
405
+ export function mapStainlessOs(platform: string): "MacOS" | "Windows" | "Linux" | "FreeBSD" | `Other::${string}` {
406
+ switch (platform.toLowerCase()) {
407
+ case "darwin":
408
+ return "MacOS";
409
+ case "windows":
410
+ case "win32":
411
+ return "Windows";
412
+ case "linux":
413
+ return "Linux";
414
+ case "freebsd":
415
+ return "FreeBSD";
416
+ default:
417
+ return `Other::${platform.toLowerCase()}`;
418
+ }
419
+ }
420
+
421
+ export function mapStainlessArch(arch: string): "x64" | "arm64" | "x86" | `other::${string}` {
422
+ switch (arch.toLowerCase()) {
423
+ case "amd64":
424
+ case "x64":
425
+ return "x64";
426
+ case "arm64":
427
+ case "aarch64":
428
+ return "arm64";
429
+ case "386":
430
+ case "x86":
431
+ case "ia32":
432
+ return "x86";
433
+ default:
434
+ return `other::${arch.toLowerCase()}`;
435
+ }
436
+ }
437
+
438
+ export const claudeCodeHeaders = {
439
+ "X-Stainless-Retry-Count": "0",
440
+ "X-Stainless-Runtime-Version": "v24.3.0",
441
+ "X-Stainless-Package-Version": "0.94.0",
442
+ "X-Stainless-Runtime": "node",
443
+ "X-Stainless-Lang": "js",
444
+ "X-Stainless-Arch": mapStainlessArch(process.arch),
445
+ "X-Stainless-OS": mapStainlessOs(process.platform),
446
+ "X-Stainless-Timeout": "600",
447
+ };
448
+
449
+ const enforcedHeaderKeys = new Set(
450
+ [
451
+ ...Object.keys(claudeCodeHeaders),
452
+ "Accept",
453
+ "Accept-Encoding",
454
+ "Connection",
455
+ "Content-Type",
456
+ "Anthropic-Version",
457
+ "Anthropic-Dangerous-Direct-Browser-Access",
458
+ "Anthropic-Beta",
459
+ "User-Agent",
460
+ "X-App",
461
+ "Authorization",
462
+ "X-Api-Key",
463
+ "X-Claude-Code-Session-Id",
464
+ "x-client-request-id",
465
+ "cf-aig-authorization",
466
+ ].map(key => key.toLowerCase()),
467
+ );
468
+
469
+ const CLAUDE_BILLING_HEADER_PREFIX = "x-anthropic-billing-header:";
470
+
471
+ function createClaudeBillingHeader(firstUserMessageText: string): string {
472
+ // Fingerprint: SHA256(salt + msg[4] + msg[7] + msg[20] + version)[:3]
473
+ // Matches CC's computeFingerprint in utils/fingerprint.ts.
474
+ // Uses chars from the first user message (not the system prompt).
475
+ const k = [4, 7, 20].map(i => firstUserMessageText[i] ?? "0").join("");
476
+ const versionSuffix = nodeCrypto
477
+ .createHash("sha256")
478
+ .update(`59cf53e54c78${k}${claudeCodeVersion}`)
479
+ .digest("hex")
480
+ .slice(0, 3);
481
+ // cch=00000: placeholder replaced with the real attestation hash by wrapFetchForCch
482
+ // before the request hits the wire (see below).
483
+ return `${CLAUDE_BILLING_HEADER_PREFIX} cc_version=${claudeCodeVersion}.${versionSuffix}; cc_entrypoint=cli; ${CCH_PLACEHOLDER_STR};`;
484
+ }
485
+
486
+ // cch attestation: XXHash64(body_with_placeholder, seed) low-20-bits, 5 hex chars.
487
+ const CCH_SEED = 0x4d659218e32a3268n;
488
+ const CCH_PLACEHOLDER_STR = "cch=00000";
489
+ const cchEncoder = new TextEncoder();
490
+ const CCH_PLACEHOLDER = cchEncoder.encode(CCH_PLACEHOLDER_STR);
491
+ // Combined anchor for the billing-header placeholder inside system[0].
492
+ // "system":[{"type":"text","text":"x-anthropic-billing-header:
493
+ // Matches the exact JSON prefix of the first system block when
494
+ // createClaudeBillingHeader injects system[0]. "messages" serializes before
495
+ // "system" in Anthropic SDK payloads (~byte 29 vs ~byte 4705), so user content
496
+ // in the messages array can never match this sequence. User system prompt text
497
+ // lives in system[2] and therefore also cannot match.
498
+ const BILLING_SYSTEM_MARKER = cchEncoder.encode(`"system":[{"type":"text","text":"${CLAUDE_BILLING_HEADER_PREFIX}`);
499
+ const CCH_BILLING_SEARCH_WINDOW = 150;
500
+
501
+ function patchCch(body: Uint8Array): Uint8Array {
502
+ // Find the combined system[0] + billing-header prefix marker.
503
+ let markerIdx = -1;
504
+ outer: for (let i = 0; i <= body.length - BILLING_SYSTEM_MARKER.length; i++) {
505
+ for (let j = 0; j < BILLING_SYSTEM_MARKER.length; j++) {
506
+ if (body[i + j] !== BILLING_SYSTEM_MARKER[j]) continue outer;
507
+ }
508
+ markerIdx = i;
509
+ break;
510
+ }
511
+ if (markerIdx === -1) return body; // no CC billing header injected
512
+
513
+ // Scan at most CCH_BILLING_SEARCH_WINDOW bytes after the marker for the placeholder.
514
+ const searchFrom = markerIdx + BILLING_SYSTEM_MARKER.length;
515
+ const searchTo = Math.min(searchFrom + CCH_BILLING_SEARCH_WINDOW, body.length - CCH_PLACEHOLDER.length);
516
+ let idx = -1;
517
+ outer2: for (let i = searchFrom; i <= searchTo; i++) {
518
+ for (let j = 0; j < CCH_PLACEHOLDER.length; j++) {
519
+ if (body[i + j] !== CCH_PLACEHOLDER[j]) continue outer2;
520
+ }
521
+ idx = i;
522
+ break;
523
+ }
524
+ if (idx === -1) return body; // placeholder not within the billing header value
525
+
526
+ // Hash the body with the placeholder in place (matches CC's in-place behaviour).
527
+ const h = Bun.hash.xxHash64(body, CCH_SEED);
528
+ const cch = (h & 0xfffffn).toString(16).padStart(5, "0");
529
+
530
+ for (let i = 0; i < 5; i++) body[idx + 4 + i] = cch.charCodeAt(i);
531
+ return body;
532
+ }
533
+
534
+ type FetchFn = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
535
+
536
+ function wrapFetchForCch(base: FetchFn): FetchFn {
537
+ return (input, init) => {
538
+ if (init?.body && typeof init.body === "string" && init.body.includes(CCH_PLACEHOLDER_STR)) {
539
+ const encoded = cchEncoder.encode(init.body);
540
+ const patched = patchCch(encoded);
541
+ return base(input, { ...init, body: patched });
542
+ }
543
+ return base(input, init);
544
+ };
545
+ }
546
+
547
+ const CLAUDE_CLOAKING_USER_ID_REGEX =
548
+ /^user_[0-9a-fA-F]{64}_account_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}_session_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/;
549
+
550
+ export function isClaudeCloakingUserId(userId: string): boolean {
551
+ return CLAUDE_CLOAKING_USER_ID_REGEX.test(userId);
552
+ }
553
+
554
+ /**
555
+ * Real Claude Code sends `metadata.user_id` as a JSON-stringified object of the
556
+ * shape `{ device_id, account_uuid, session_id, ...extra }` (see
557
+ * services/api/claude.ts → getAPIMetadata). Accept that shape so callers that
558
+ * supply a stable `session_id` aren't silently overwritten with fresh entropy
559
+ * on every request, which would inflate the backend session count.
560
+ */
561
+ function isClaudeJsonUserId(userId: string): boolean {
562
+ if (userId.length === 0 || userId[0] !== "{") return false;
563
+ let parsed: unknown;
564
+ try {
565
+ parsed = JSON.parse(userId);
566
+ } catch {
567
+ return false;
568
+ }
569
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return false;
570
+ const obj = parsed as Record<string, unknown>;
571
+ return typeof obj.session_id === "string" && obj.session_id.length > 0;
572
+ }
573
+
574
+ function extractClaudeMetadataSessionId(userId: unknown): string | undefined {
575
+ if (typeof userId !== "string") return undefined;
576
+ if (isClaudeCloakingUserId(userId)) {
577
+ return userId.slice(userId.lastIndexOf("_session_") + "_session_".length);
578
+ }
579
+ if (userId.length === 0 || userId[0] !== "{") return undefined;
580
+ let parsed: unknown;
581
+ try {
582
+ parsed = JSON.parse(userId);
583
+ } catch {
584
+ return undefined;
585
+ }
586
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return undefined;
587
+ const sessionId = (parsed as Record<string, unknown>).session_id;
588
+ return typeof sessionId === "string" && sessionId.length > 0 ? sessionId : undefined;
589
+ }
590
+
591
+ export function generateClaudeCloakingUserId(): string {
592
+ const userHash = nodeCrypto.randomBytes(32).toString("hex");
593
+ const accountId = nodeCrypto.randomUUID().toLowerCase();
594
+ const sessionId = nodeCrypto.randomUUID().toLowerCase();
595
+ return `user_${userHash}_account_${accountId}_session_${sessionId}`;
596
+ }
597
+
598
+ function deriveClaudeDeviceIdFromInstallId(): string {
599
+ return nodeCrypto.createHash("sha256").update(`aery-claude-device-id-v1:${getInstallId()}`).digest("hex");
600
+ }
601
+ function generateClaudeJsonUserId(sessionId?: string): string {
602
+ return JSON.stringify({
603
+ device_id: deriveClaudeDeviceIdFromInstallId(),
604
+ session_id: sessionId ?? nodeCrypto.randomUUID().toLowerCase(),
605
+ });
606
+ }
607
+
608
+ function resolveAnthropicMetadataUserId(
609
+ userId: unknown,
610
+ isOAuthToken: boolean,
611
+ sessionId?: string,
612
+ ): string | undefined {
613
+ if (typeof userId === "string") {
614
+ if (!isOAuthToken || isClaudeCloakingUserId(userId) || isClaudeJsonUserId(userId)) {
615
+ return userId;
616
+ }
617
+ }
618
+
619
+ if (!isOAuthToken) return undefined;
620
+ return generateClaudeJsonUserId(sessionId);
621
+ }
622
+ const ANTHROPIC_BUILTIN_TOOL_NAMES = new Set(["web_search", "code_execution", "text_editor", "computer"]);
623
+ export const applyClaudeToolPrefix = (name: string, prefixOverride: string = claudeToolPrefix) => {
624
+ if (!prefixOverride) return name;
625
+ if (ANTHROPIC_BUILTIN_TOOL_NAMES.has(name.toLowerCase())) return name;
626
+ const prefix = prefixOverride.toLowerCase();
627
+ if (name.toLowerCase().startsWith(prefix)) return name;
628
+ return `${prefixOverride}${name}`;
629
+ };
630
+
631
+ export const stripClaudeToolPrefix = (name: string, prefixOverride: string = claudeToolPrefix) => {
632
+ if (!prefixOverride) return name;
633
+ const prefix = prefixOverride.toLowerCase();
634
+ if (!name.toLowerCase().startsWith(prefix)) return name;
635
+ return name.slice(prefixOverride.length);
636
+ };
637
+
638
+ const ANTHROPIC_MANY_IMAGE_THRESHOLD = 20;
639
+ const ANTHROPIC_MANY_IMAGE_MAX_DIMENSION = 2000;
640
+
641
+ function countAnthropicImageBlocks(messages: Message[]): number {
642
+ let count = 0;
643
+ for (const message of messages) {
644
+ if (message.role !== "user" && message.role !== "developer" && message.role !== "toolResult") continue;
645
+ if (!Array.isArray(message.content)) continue;
646
+ for (const block of message.content) {
647
+ if (block.type === "image") count++;
648
+ }
649
+ }
650
+ return count;
651
+ }
652
+
653
+ async function resizeAnthropicManyImageBlock(block: ImageContent): Promise<ImageContent> {
654
+ try {
655
+ const inputBuffer = Buffer.from(block.data, "base64");
656
+ const { width, height } = await new Bun.Image(inputBuffer).metadata();
657
+ if (!width || !height) return block;
658
+ if (width <= ANTHROPIC_MANY_IMAGE_MAX_DIMENSION && height <= ANTHROPIC_MANY_IMAGE_MAX_DIMENSION) return block;
659
+
660
+ const scale = Math.min(ANTHROPIC_MANY_IMAGE_MAX_DIMENSION / width, ANTHROPIC_MANY_IMAGE_MAX_DIMENSION / height);
661
+ const targetWidth = Math.max(1, Math.min(ANTHROPIC_MANY_IMAGE_MAX_DIMENSION, Math.round(width * scale)));
662
+ const targetHeight = Math.max(1, Math.min(ANTHROPIC_MANY_IMAGE_MAX_DIMENSION, Math.round(height * scale)));
663
+
664
+ const [png, jpeg] = await Promise.all([
665
+ new Bun.Image(inputBuffer).resize(targetWidth, targetHeight).png().bytes(),
666
+ new Bun.Image(inputBuffer).resize(targetWidth, targetHeight).jpeg({ quality: 85 }).bytes(),
667
+ ]);
668
+ const best =
669
+ png.length <= jpeg.length ? { buffer: png, mimeType: "image/png" } : { buffer: jpeg, mimeType: "image/jpeg" };
670
+
671
+ return {
672
+ type: "image",
673
+ data: Buffer.from(best.buffer).toString("base64"),
674
+ mimeType: best.mimeType,
675
+ };
676
+ } catch (error) {
677
+ logger.warn("anthropic: failed to resize oversized image for many-image request", {
678
+ mimeType: block.mimeType,
679
+ error: error instanceof Error ? error.message : String(error),
680
+ });
681
+ return block;
682
+ }
683
+ }
684
+
685
+ async function resizeAnthropicManyImageContent(
686
+ content: (TextContent | ImageContent)[],
687
+ state: { resized: number },
688
+ ): Promise<(TextContent | ImageContent)[]> {
689
+ let changed = false;
690
+ const next = await Promise.all(
691
+ content.map(async block => {
692
+ if (block.type !== "image") return block;
693
+ const resized = await resizeAnthropicManyImageBlock(block);
694
+ if (resized !== block) {
695
+ changed = true;
696
+ state.resized++;
697
+ }
698
+ return resized;
699
+ }),
700
+ );
701
+ return changed ? next : content;
702
+ }
703
+
704
+ async function resizeAnthropicManyImageMessage(message: Message, state: { resized: number }): Promise<Message> {
705
+ if (message.role === "user" || message.role === "developer") {
706
+ if (!Array.isArray(message.content)) return message;
707
+ const content = await resizeAnthropicManyImageContent(message.content, state);
708
+ return content === message.content ? message : { ...message, content };
709
+ }
710
+ if (message.role === "toolResult") {
711
+ const content = await resizeAnthropicManyImageContent(message.content, state);
712
+ return content === message.content ? message : { ...message, content };
713
+ }
714
+ return message;
715
+ }
716
+
717
+ async function prepareAnthropicManyImageContext(context: Context, supportsImages: boolean): Promise<Context> {
718
+ if (!supportsImages) return context;
719
+ const imageCount = countAnthropicImageBlocks(context.messages);
720
+ if (imageCount <= ANTHROPIC_MANY_IMAGE_THRESHOLD) return context;
721
+
722
+ let changed = false;
723
+ const state = { resized: 0 };
724
+ const messages = await Promise.all(
725
+ context.messages.map(async message => {
726
+ const next = await resizeAnthropicManyImageMessage(message, state);
727
+ if (next !== message) changed = true;
728
+ return next;
729
+ }),
730
+ );
731
+ if (!changed) return context;
732
+ logger.debug("anthropic: resized oversized images for many-image request", {
733
+ imageCount,
734
+ resized: state.resized,
735
+ maxDimension: ANTHROPIC_MANY_IMAGE_MAX_DIMENSION,
736
+ });
737
+ return { ...context, messages };
738
+ }
739
+
740
+ /**
741
+ * Convert content blocks to Anthropic API format
742
+ */
743
+ function convertContentBlocks(
744
+ content: (TextContent | ImageContent)[],
745
+ supportsImages = true,
746
+ ):
747
+ | string
748
+ | Array<
749
+ | { type: "text"; text: string }
750
+ | {
751
+ type: "image";
752
+ source: {
753
+ type: "base64";
754
+ media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp";
755
+ data: string;
756
+ };
757
+ }
758
+ > {
759
+ const textBlocks = content
760
+ .filter((block): block is TextContent => block.type === "text")
761
+ .map(block => block.text.toWellFormed())
762
+ .filter(text => text.trim().length > 0);
763
+ const imageBlocks = content.filter((block): block is ImageContent => block.type === "image");
764
+ const omittedImages = !supportsImages && imageBlocks.length > 0;
765
+ if (imageBlocks.length === 0 || !supportsImages) {
766
+ if (omittedImages) {
767
+ textBlocks.push(NON_VISION_IMAGE_PLACEHOLDER);
768
+ }
769
+ return textBlocks.join("\n").toWellFormed();
770
+ }
771
+
772
+ const blocks = [
773
+ ...textBlocks.map(text => ({
774
+ type: "text" as const,
775
+ text,
776
+ })),
777
+ ...imageBlocks.map(block => ({
778
+ type: "image" as const,
779
+ source: {
780
+ type: "base64" as const,
781
+ media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
782
+ data: block.data,
783
+ },
784
+ })),
785
+ ];
786
+
787
+ if (!textBlocks.length) {
788
+ blocks.unshift({
789
+ type: "text" as const,
790
+ text: "(see attached image)",
791
+ });
792
+ }
793
+
794
+ return blocks;
795
+ }
796
+
797
+ export type AnthropicEffort = "low" | "medium" | "high" | "xhigh" | "max";
798
+ export type AnthropicThinkingDisplay = "summarized" | "omitted";
799
+
800
+ export interface AnthropicOptions extends StreamOptions {
801
+ /**
802
+ * Enable extended thinking.
803
+ * For Opus 4.6+: uses adaptive thinking (Claude decides when/how much to think).
804
+ * For older models: uses budget-based thinking with thinkingBudgetTokens.
805
+ */
806
+ thinkingEnabled?: boolean;
807
+ /**
808
+ * Token budget for extended thinking (older models only).
809
+ * Ignored for Opus 4.6+ which uses adaptive thinking.
810
+ */
811
+ thinkingBudgetTokens?: number;
812
+ /**
813
+ * Effort level for adaptive thinking (Opus 4.6+ only).
814
+ * Controls how much thinking Claude allocates:
815
+ * - "max": Always thinks with no constraints
816
+ * - "high": Always thinks, deep reasoning (default)
817
+ * - "medium": Moderate thinking, may skip for simple queries
818
+ * - "low": Minimal thinking, skips for simple tasks
819
+ * Ignored for older models.
820
+ */
821
+ effort?: AnthropicEffort;
822
+ /**
823
+ * Optional reasoning level fallback for direct Anthropic provider usage.
824
+ * Converted to adaptive effort when effort is not explicitly provided.
825
+ */
826
+ reasoning?: SimpleStreamOptions["reasoning"];
827
+ /**
828
+ * Controls how Anthropic returns thinking content when the selected thinking
829
+ * transport supports a display option. Defaults to "summarized" where the
830
+ * API accepts it.
831
+ */
832
+ thinkingDisplay?: AnthropicThinkingDisplay;
833
+ interleavedThinking?: boolean;
834
+ toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
835
+ betas?: string[] | string;
836
+ /**
837
+ * Realization of `serviceTier: "priority"` on Anthropic models. When
838
+ * `"priority"`, sets `speed: "fast"` on the request and appends the
839
+ * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
840
+ * with `invalid_request_error`, which triggers an in-provider one-shot
841
+ * fallback (see `fastModeDisabled` provider state).
842
+ *
843
+ * Other `ServiceTier` values are currently ignored on this provider.
844
+ */
845
+ serviceTier?: ServiceTier;
846
+ /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
847
+ isOAuth?: boolean;
848
+ /**
849
+ * Pre-built Anthropic Messages client. When provided, skips internal client
850
+ * construction entirely. Accepts any structurally compatible client,
851
+ * including SDK clients such as `AnthropicVertex`.
852
+ */
853
+ client?: AnthropicMessagesClientLike;
854
+ }
855
+
856
+ export type AnthropicClientOptionsArgs = {
857
+ model: Model<"anthropic-messages">;
858
+ apiKey: string;
859
+ extraBetas?: string[];
860
+ stream?: boolean;
861
+ interleavedThinking?: boolean;
862
+ headers?: Record<string, string>;
863
+ dynamicHeaders?: Record<string, string>;
864
+ isOAuth?: boolean;
865
+ hasTools?: boolean;
866
+ thinkingEnabled?: boolean;
867
+ thinkingDisplay?: AnthropicThinkingDisplay;
868
+ onSseEvent?: AnthropicOptions["onSseEvent"];
869
+ fetch?: FetchImpl;
870
+ claudeCodeSessionId?: string;
871
+ };
872
+
873
+ export type AnthropicClientOptionsResult = {
874
+ isOAuthToken: boolean;
875
+ apiKey: string | null;
876
+ authToken?: string | null;
877
+ baseURL?: string;
878
+ maxRetries: number;
879
+ defaultHeaders: Record<string, string>;
880
+ fetch?: FetchImpl;
881
+ fetchOptions?: AnthropicFetchOptions;
882
+ };
883
+
884
+ const CLAUDE_CODE_TLS_CIPHERS = tls.DEFAULT_CIPHERS;
885
+
886
+ type FoundryTlsOptions = {
887
+ ca?: string | string[];
888
+ cert?: string;
889
+ key?: string;
890
+ };
891
+
892
+ function resolveAnthropicBaseUrl(model: Model<"anthropic-messages">, apiKey?: string): string | undefined {
893
+ if (model.provider === "github-copilot") {
894
+ return normalizeAnthropicBaseUrl(resolveGitHubCopilotBaseUrl(model.baseUrl, apiKey) ?? model.baseUrl);
895
+ }
896
+ if (model.provider === "anthropic" && isFoundryEnabled()) {
897
+ const foundryBaseUrl = normalizeAnthropicBaseUrl($env.FOUNDRY_BASE_URL);
898
+ if (foundryBaseUrl) {
899
+ return foundryBaseUrl;
900
+ }
901
+ }
902
+ if (model.provider === "anthropic") {
903
+ return normalizeAnthropicBaseUrl(model.baseUrl) ?? "https://api.anthropic.com";
904
+ }
905
+ return normalizeAnthropicBaseUrl(model.baseUrl);
906
+ }
907
+
908
+ function parseAnthropicCustomHeaders(rawHeaders: string | undefined): Record<string, string> | undefined {
909
+ const source = rawHeaders?.trim();
910
+ if (!source) return undefined;
911
+
912
+ const parsed: Record<string, string> = {};
913
+ for (const token of source.split(/\r?\n|,/)) {
914
+ const entry = token.trim();
915
+ if (!entry) continue;
916
+ const separatorIndex = entry.indexOf(":");
917
+ if (separatorIndex <= 0) continue;
918
+ const key = entry.slice(0, separatorIndex).trim();
919
+ const value = entry.slice(separatorIndex + 1).trim();
920
+ if (!key || !value) continue;
921
+ parsed[key] = value;
922
+ }
923
+
924
+ return Object.keys(parsed).length > 0 ? parsed : undefined;
925
+ }
926
+
927
+ /**
928
+ * Returns env-supplied custom headers (`ANTHROPIC_CUSTOM_HEADERS`) when they
929
+ * should be forwarded to the upstream endpoint.
930
+ *
931
+ * Foundry mode forwards them unconditionally. Outside Foundry, they're applied
932
+ * only when the configured base URL is a non-Anthropic host — i.e. an
933
+ * enterprise/corporate gateway that may require its own proprietary auth
934
+ * header. Stock `api.anthropic.com` would reject unknown headers, so they're
935
+ * omitted there.
936
+ */
937
+ export function resolveAnthropicCustomHeadersForBaseUrl(
938
+ baseUrl: string | undefined,
939
+ ): Record<string, string> | undefined {
940
+ if (!isFoundryEnabled() && isAnthropicApiBaseUrl(baseUrl)) return undefined;
941
+ return parseAnthropicCustomHeaders($env.ANTHROPIC_CUSTOM_HEADERS);
942
+ }
943
+
944
+ function resolveAnthropicCustomHeaders(model: Model<"anthropic-messages">): Record<string, string> | undefined {
945
+ if (model.provider !== "anthropic") return undefined;
946
+ return resolveAnthropicCustomHeadersForBaseUrl(model.baseUrl);
947
+ }
948
+
949
+ function looksLikeFilePath(value: string): boolean {
950
+ return value.includes("/") || value.includes("\\") || /\.(pem|crt|cer|key)$/i.test(value);
951
+ }
952
+
953
+ function resolvePemValue(value: string | undefined, name: string): string | undefined {
954
+ const trimmed = value?.trim();
955
+ if (!trimmed) return undefined;
956
+
957
+ const inline = trimmed.replace(/\\n/g, "\n");
958
+ if (inline.includes("-----BEGIN")) {
959
+ return inline;
960
+ }
961
+
962
+ if (looksLikeFilePath(trimmed)) {
963
+ try {
964
+ return fs.readFileSync(trimmed, "utf8");
965
+ } catch (error) {
966
+ if (isEnoent(error)) {
967
+ throw new Error(`${name} path does not exist: ${trimmed}`);
968
+ }
969
+ throw error;
970
+ }
971
+ }
972
+
973
+ return inline;
974
+ }
975
+
976
+ function resolveFoundryTlsOptions(model: Model<"anthropic-messages">): FoundryTlsOptions | undefined {
977
+ if (model.provider !== "anthropic") return undefined;
978
+ if (!isFoundryEnabled()) return undefined;
979
+
980
+ const ca = resolvePemValue($env.NODE_EXTRA_CA_CERTS, "NODE_EXTRA_CA_CERTS");
981
+ const cert = resolvePemValue($env.CLAUDE_CODE_CLIENT_CERT, "CLAUDE_CODE_CLIENT_CERT");
982
+ const key = resolvePemValue($env.CLAUDE_CODE_CLIENT_KEY, "CLAUDE_CODE_CLIENT_KEY");
983
+
984
+ if ((cert && !key) || (!cert && key)) {
985
+ throw new Error("Both CLAUDE_CODE_CLIENT_CERT and CLAUDE_CODE_CLIENT_KEY must be set for mTLS.");
986
+ }
987
+
988
+ const options: FoundryTlsOptions = {};
989
+ if (ca) options.ca = [...tls.rootCertificates, ca];
990
+ if (cert) options.cert = cert;
991
+ if (key) options.key = key;
992
+ return Object.keys(options).length > 0 ? options : undefined;
993
+ }
994
+
995
+ function buildClaudeCodeTlsFetchOptions(
996
+ model: Model<"anthropic-messages">,
997
+ baseUrl: string | undefined,
998
+ ): AnthropicFetchOptions | undefined {
999
+ if (model.provider !== "anthropic") return undefined;
1000
+ if (!baseUrl) return undefined;
1001
+
1002
+ let serverName: string;
1003
+ try {
1004
+ serverName = new URL(baseUrl).hostname;
1005
+ } catch {
1006
+ return undefined;
1007
+ }
1008
+
1009
+ if (!serverName) return undefined;
1010
+
1011
+ const foundryTlsOptions = resolveFoundryTlsOptions(model);
1012
+
1013
+ return {
1014
+ tls: {
1015
+ rejectUnauthorized: true,
1016
+ serverName,
1017
+ ...(CLAUDE_CODE_TLS_CIPHERS ? { ciphers: CLAUDE_CODE_TLS_CIPHERS } : {}),
1018
+ ...(foundryTlsOptions ?? {}),
1019
+ },
1020
+ };
1021
+ }
1022
+ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {
1023
+ const merged: Record<string, string> = {};
1024
+ for (const headers of headerSources) {
1025
+ if (headers) {
1026
+ Object.assign(merged, headers);
1027
+ }
1028
+ }
1029
+ return merged;
1030
+ }
1031
+
1032
+ const ANTHROPIC_MESSAGE_EVENTS: ReadonlySet<string> = new Set([
1033
+ "message_start",
1034
+ "message_delta",
1035
+ "message_stop",
1036
+ "content_block_start",
1037
+ "content_block_delta",
1038
+ "content_block_stop",
1039
+ ]);
1040
+
1041
+ async function* iterateAnthropicEvents(
1042
+ response: Response,
1043
+ signal?: AbortSignal,
1044
+ onSseEvent?: AnthropicOptions["onSseEvent"],
1045
+ ): AsyncGenerator<RawMessageStreamEvent> {
1046
+ if (!response.body) {
1047
+ throw new Error("Attempted to iterate over an Anthropic response with no body");
1048
+ }
1049
+
1050
+ let sawMessageStart = false;
1051
+ let sawMessageEnd = false;
1052
+
1053
+ for await (const sse of readSseEvents(response.body, signal)) {
1054
+ notifyRawSseEvent(onSseEvent, sse);
1055
+ if (sse.event === "error") {
1056
+ throw new Error(sse.data);
1057
+ }
1058
+
1059
+ if (!ANTHROPIC_MESSAGE_EVENTS.has(sse.event ?? "")) {
1060
+ continue;
1061
+ }
1062
+
1063
+ try {
1064
+ const event = parseJsonWithRepair<RawMessageStreamEvent>(sse.data);
1065
+ if (event.type === "message_start") {
1066
+ sawMessageStart = true;
1067
+ } else if (event.type === "message_stop") {
1068
+ sawMessageEnd = true;
1069
+ }
1070
+ yield event;
1071
+ } catch (error) {
1072
+ const message = error instanceof Error ? error.message : String(error);
1073
+ throw new Error(
1074
+ `Could not parse Anthropic SSE event ${sse.event}: ${message}; data=${sse.data}; raw=${sse.raw.join("\\n")}`,
1075
+ );
1076
+ }
1077
+ }
1078
+
1079
+ if (sawMessageStart && !sawMessageEnd) {
1080
+ throw createAnthropicStreamEnvelopeError("stream ended before message_stop");
1081
+ }
1082
+ }
1083
+
1084
+ type AnthropicRawResponseRequest = {
1085
+ asResponse(): Promise<Response>;
1086
+ };
1087
+
1088
+ function hasAnthropicRawResponseRequest(request: unknown): request is AnthropicRawResponseRequest {
1089
+ return isRecord(request) && typeof request.asResponse === "function";
1090
+ }
1091
+
1092
+ type AnthropicStreamWithResponseRequest = {
1093
+ withResponse(): Promise<{
1094
+ data: AsyncIterable<RawMessageStreamEvent>;
1095
+ response: Response;
1096
+ request_id: string | null;
1097
+ }>;
1098
+ };
1099
+
1100
+ function hasAnthropicStreamWithResponseRequest(request: unknown): request is AnthropicStreamWithResponseRequest {
1101
+ return isRecord(request) && typeof request.withResponse === "function";
1102
+ }
1103
+
1104
+ async function getAnthropicStreamResponse(
1105
+ request: unknown,
1106
+ signal?: AbortSignal,
1107
+ onSseEvent?: AnthropicOptions["onSseEvent"],
1108
+ ): Promise<{ events: AsyncIterable<RawMessageStreamEvent>; response: Response; requestId: string | null }> {
1109
+ if (hasAnthropicRawResponseRequest(request)) {
1110
+ const response = await request.asResponse();
1111
+ return {
1112
+ events: iterateAnthropicEvents(response, signal, onSseEvent),
1113
+ response,
1114
+ requestId: response.headers.get("request-id"),
1115
+ };
1116
+ }
1117
+ if (hasAnthropicStreamWithResponseRequest(request)) {
1118
+ const { data, response, request_id } = await request.withResponse();
1119
+ return { events: data, response, requestId: request_id };
1120
+ }
1121
+ throw new Error("Anthropic SDK request did not expose a stream response");
1122
+ }
1123
+
1124
+ function getAnthropicCompat(
1125
+ model: Model<"anthropic-messages">,
1126
+ ): Required<NonNullable<Model<"anthropic-messages">["compat"]>> {
1127
+ return {
1128
+ disableStrictTools: model.compat?.disableStrictTools ?? false,
1129
+ disableAdaptiveThinking: model.compat?.disableAdaptiveThinking ?? false,
1130
+ supportsEagerToolInputStreaming: model.compat?.supportsEagerToolInputStreaming ?? true,
1131
+ supportsLongCacheRetention: model.compat?.supportsLongCacheRetention ?? true,
1132
+ supportsMidConversationSystem:
1133
+ model.compat?.supportsMidConversationSystem ??
1134
+ // First-party Claude API only. Bedrock/Vertex/Foundry and other
1135
+ // Anthropic-compatible proxies reject the role; gate auto-detection on
1136
+ // the canonical api.anthropic.com host plus an Opus 4.8+ model id.
1137
+ (isAnthropicApiBaseUrl(model.baseUrl) && supportsMidConversationSystemMessages(model.id)),
1138
+ };
1139
+ }
1140
+
1141
+ const PROVIDER_MAX_RETRIES = 3;
1142
+ const PROVIDER_BASE_DELAY_MS = 2000;
1143
+
1144
+ /**
1145
+ * Check if an error from the Anthropic SDK is a rate-limit/transient error that
1146
+ * should be retried before any content has been emitted.
1147
+ *
1148
+ * Includes malformed JSON stream-envelope parse errors seen from some
1149
+ * Anthropic-compatible proxy endpoints.
1150
+ */
1151
+ /** Transient stream corruption errors where the response was truncated mid-JSON. */
1152
+ function isTransientStreamParseError(error: unknown): boolean {
1153
+ if (!(error instanceof Error)) return false;
1154
+ return /json parse error|unterminated string|unexpected end of json input/i.test(error.message);
1155
+ }
1156
+
1157
+ const ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX = "Anthropic stream envelope error:";
1158
+
1159
+ function createAnthropicStreamEnvelopeError(message: string): Error {
1160
+ return new Error(`${ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX} ${message}`);
1161
+ }
1162
+
1163
+ const ANTHROPIC_PRE_MESSAGE_START_EVENT_TYPES = new Set([
1164
+ "content_block_start",
1165
+ "content_block_delta",
1166
+ "content_block_stop",
1167
+ "message_delta",
1168
+ "message_stop",
1169
+ "message_start",
1170
+ ]);
1171
+
1172
+ function shouldIgnoreAnthropicPreambleEvent(eventType: unknown): boolean {
1173
+ if (typeof eventType !== "string") return false;
1174
+ if (eventType === "ping") return true;
1175
+ return !ANTHROPIC_PRE_MESSAGE_START_EVENT_TYPES.has(eventType);
1176
+ }
1177
+
1178
+ function isTransientStreamEnvelopeError(error: unknown): boolean {
1179
+ if (!(error instanceof Error)) return false;
1180
+ return (
1181
+ error.message.includes(ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX) ||
1182
+ /stream event order|before message_start|before terminal stop signal/i.test(error.message)
1183
+ );
1184
+ }
1185
+
1186
+ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
1187
+ if (!(error instanceof Error)) return false;
1188
+ return /stream event order|before message_start/i.test(error.message);
1189
+ }
1190
+
1191
+ export function isProviderRetryableError(error: unknown, provider?: string): boolean {
1192
+ if (!(error instanceof Error)) return false;
1193
+ if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
1194
+ const msg = error.message.toLowerCase();
1195
+ if (
1196
+ isUnexpectedSocketCloseMessage(msg) ||
1197
+ /rate.?limit|too many requests|overloaded|service.?unavailable|internal_error|stream error.*received from peer|1302|timed?\s*out while waiting for the first event|timeout waiting for first/i.test(
1198
+ msg,
1199
+ ) ||
1200
+ isTransientStreamParseError(error) ||
1201
+ isProviderRetryableStreamEnvelopeError(error)
1202
+ ) {
1203
+ return true;
1204
+ }
1205
+ return isRetryableError(error);
1206
+ }
1207
+
1208
+ function createEmptyUsage(premiumRequests?: number): Usage {
1209
+ return {
1210
+ input: 0,
1211
+ output: 0,
1212
+ cacheRead: 0,
1213
+ cacheWrite: 0,
1214
+ totalTokens: 0,
1215
+ ...(premiumRequests === undefined ? {} : { premiumRequests }),
1216
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
1217
+ };
1218
+ }
1219
+
1220
+ export type AnthropicUsageLike = {
1221
+ cache_creation?: { ephemeral_5m_input_tokens?: number | null; ephemeral_1h_input_tokens?: number | null } | null;
1222
+ server_tool_use?: { web_search_requests?: number | null; web_fetch_requests?: number | null } | null;
1223
+ };
1224
+
1225
+ /**
1226
+ * Capture Anthropic's optional cache-creation TTL breakdown and server-tool-use
1227
+ * counters into the harness Usage shape. Only sets fields that were reported, so
1228
+ * a `message_delta` that omits `cache_creation` does not clobber the breakdown
1229
+ * established at `message_start`.
1230
+ */
1231
+ export function applyAnthropicUsageExtras(usage: Usage, source: AnthropicUsageLike): void {
1232
+ const cacheCreation = source.cache_creation;
1233
+ if (cacheCreation) {
1234
+ const fiveMinute = cacheCreation.ephemeral_5m_input_tokens ?? 0;
1235
+ const oneHour = cacheCreation.ephemeral_1h_input_tokens ?? 0;
1236
+ if (fiveMinute > 0 || oneHour > 0) {
1237
+ usage.cttl = {
1238
+ ...(fiveMinute > 0 ? { ephemeral5m: fiveMinute } : {}),
1239
+ ...(oneHour > 0 ? { ephemeral1h: oneHour } : {}),
1240
+ };
1241
+ }
1242
+ }
1243
+ const serverToolUse = source.server_tool_use;
1244
+ if (serverToolUse) {
1245
+ const webSearch = serverToolUse.web_search_requests ?? 0;
1246
+ const webFetch = serverToolUse.web_fetch_requests ?? 0;
1247
+ if (webSearch > 0 || webFetch > 0) {
1248
+ usage.server = {
1249
+ ...(webSearch > 0 ? { webSearch } : {}),
1250
+ ...(webFetch > 0 ? { webFetch } : {}),
1251
+ };
1252
+ }
1253
+ }
1254
+ }
1255
+
1256
+ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1257
+ model: Model<"anthropic-messages">,
1258
+ context: Context,
1259
+ options?: AnthropicOptions,
1260
+ ): AssistantMessageEventStream => {
1261
+ const stream = new AssistantMessageEventStream();
1262
+
1263
+ (async () => {
1264
+ const startTime = Date.now();
1265
+ let firstTokenTime: number | undefined;
1266
+
1267
+ const copilotDynamicHeaders =
1268
+ model.provider === "github-copilot"
1269
+ ? buildCopilotDynamicHeaders({
1270
+ messages: context.messages,
1271
+ hasImages: hasCopilotVisionInput(context.messages),
1272
+ premiumMultiplier: model.premiumMultiplier,
1273
+ headers: { ...(model.headers ?? {}), ...(options?.headers ?? {}) },
1274
+ initiatorOverride: options?.initiatorOverride,
1275
+ })
1276
+ : undefined;
1277
+ const output: AssistantMessage = {
1278
+ role: "assistant",
1279
+ content: [],
1280
+ api: model.api as Api,
1281
+ provider: model.provider,
1282
+ model: model.id,
1283
+ usage: createEmptyUsage(copilotDynamicHeaders?.premiumRequests),
1284
+ stopReason: "stop",
1285
+ timestamp: Date.now(),
1286
+ };
1287
+ let rawRequestDump: RawHttpRequestDump | undefined;
1288
+ let activeAbortTracker = createAbortSourceTracker(options?.signal);
1289
+
1290
+ try {
1291
+ let client: AnthropicMessagesClientLike;
1292
+ let isOAuthToken: boolean;
1293
+
1294
+ if (options?.client) {
1295
+ client = options.client;
1296
+ isOAuthToken = false;
1297
+ } else {
1298
+ const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
1299
+
1300
+ const extraBetas = normalizeExtraBetas(options?.betas);
1301
+ const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
1302
+ if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
1303
+ extraBetas.push(fastModeBeta);
1304
+ }
1305
+ if (options?.taskBudget && !extraBetas.includes(taskBudgetBeta)) {
1306
+ extraBetas.push(taskBudgetBeta);
1307
+ }
1308
+ if (options?.thinkingEnabled && model.reasoning && !extraBetas.includes(effortBeta)) {
1309
+ extraBetas.push(effortBeta);
1310
+ }
1311
+
1312
+ const created = createClient(model, {
1313
+ model,
1314
+ apiKey,
1315
+ extraBetas,
1316
+ stream: true,
1317
+ interleavedThinking: options?.interleavedThinking ?? true,
1318
+ headers: options?.headers,
1319
+ dynamicHeaders: copilotDynamicHeaders?.headers,
1320
+ isOAuth: options?.isOAuth,
1321
+ hasTools: !!context.tools?.length,
1322
+ thinkingEnabled: options?.thinkingEnabled,
1323
+ thinkingDisplay: options?.thinkingDisplay,
1324
+ onSseEvent: options?.onSseEvent,
1325
+ fetch: options?.fetch,
1326
+ claudeCodeSessionId: options?.sessionId ?? extractClaudeMetadataSessionId(options?.metadata?.user_id),
1327
+ });
1328
+ client = created.client;
1329
+ isOAuthToken = created.isOAuthToken;
1330
+ }
1331
+ const baseUrl =
1332
+ resolveAnthropicBaseUrl(model, options?.apiKey ?? getEnvApiKey(model.provider) ?? "") ??
1333
+ "https://api.anthropic.com";
1334
+ const providerSessionState = getAnthropicProviderSessionState(options?.providerSessionState);
1335
+ let disableStrictTools =
1336
+ (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
1337
+ let strictFallbackErrorMessage: string | undefined;
1338
+ let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
1339
+ const preparedContext = await prepareAnthropicManyImageContext(context, model.input.includes("image"));
1340
+ const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
1341
+ let nextParams = buildParams(model, baseUrl, preparedContext, isOAuthToken, options, disableStrictTools);
1342
+ if (disableStrictTools) {
1343
+ dropAnthropicStrictTools(nextParams);
1344
+ }
1345
+ if (dropFastMode) {
1346
+ dropAnthropicFastMode(nextParams);
1347
+ }
1348
+ const replacementPayload = await options?.onPayload?.(nextParams, model);
1349
+ if (replacementPayload !== undefined) {
1350
+ nextParams = replacementPayload as typeof nextParams;
1351
+ }
1352
+ rawRequestDump = {
1353
+ provider: model.provider,
1354
+ api: output.api,
1355
+ model: model.id,
1356
+ method: "POST",
1357
+ url: `${baseUrl}/v1/messages${isOAuthToken ? "?beta=true" : ""}`,
1358
+ body: nextParams,
1359
+ };
1360
+ return nextParams;
1361
+ };
1362
+ let params = await prepareParams();
1363
+
1364
+ type Block = (
1365
+ | ThinkingContent
1366
+ | RedactedThinkingContent
1367
+ | TextContent
1368
+ | (ToolCall & { partialJson: string; lastParseLen?: number })
1369
+ ) & { index: number };
1370
+ const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getStreamIdleTimeoutMs();
1371
+ const firstEventTimeoutMs = options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs);
1372
+ const requestTimeoutMs =
1373
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
1374
+ const blocks = output.content as Block[];
1375
+ stream.push({ type: "start", partial: output });
1376
+ // Retry loop for transient errors from the stream.
1377
+ // Provider-level transport/rate-limit failures: only before any streamed content starts.
1378
+ // Malformed envelopes/JSON: only before replay-unsafe text/tool events are visible on this stream.
1379
+ let providerRetryAttempt = 0;
1380
+ const firstEventTimeoutAbortError = new Error("Anthropic stream timed out while waiting for the first event");
1381
+ const idleTimeoutAbortError = new Error("Anthropic stream stalled while waiting for the next event");
1382
+ while (true) {
1383
+ activeAbortTracker = createAbortSourceTracker(options?.signal);
1384
+ const { requestSignal } = activeAbortTracker;
1385
+ const requestOptions = createSdkStreamRequestOptions(requestSignal, requestTimeoutMs);
1386
+ const anthropicRequest: unknown =
1387
+ isOAuthToken && client.beta
1388
+ ? client.beta.messages.create({ ...params, stream: true }, requestOptions)
1389
+ : client.messages.create({ ...params, stream: true }, requestOptions);
1390
+ let streamedReplayUnsafeContent = false;
1391
+
1392
+ try {
1393
+ let requestTimeout: NodeJS.Timeout | undefined;
1394
+ if (requestTimeoutMs !== undefined) {
1395
+ requestTimeout = setTimeout(
1396
+ () => activeAbortTracker.abortLocally(firstEventTimeoutAbortError),
1397
+ requestTimeoutMs,
1398
+ );
1399
+ }
1400
+ let anthropicStream: AsyncIterable<RawMessageStreamEvent>;
1401
+ let response: Response;
1402
+ let requestId: string | null;
1403
+ try {
1404
+ ({
1405
+ events: anthropicStream,
1406
+ response,
1407
+ requestId,
1408
+ } = await getAnthropicStreamResponse(
1409
+ anthropicRequest,
1410
+ requestSignal,
1411
+ options?.client ? event => options?.onSseEvent?.(event, model) : undefined,
1412
+ ));
1413
+ } catch (error) {
1414
+ if (error instanceof AnthropicConnectionTimeoutError && !activeAbortTracker.wasCallerAbort()) {
1415
+ throw firstEventTimeoutAbortError;
1416
+ }
1417
+ throw error;
1418
+ } finally {
1419
+ if (requestTimeout !== undefined) clearTimeout(requestTimeout);
1420
+ }
1421
+ await notifyProviderResponse(options, response, model, requestId);
1422
+ let sawEvent = false;
1423
+ let sawMessageStart = false;
1424
+ let sawTerminalEnvelope = false;
1425
+
1426
+ for await (const event of iterateWithIdleTimeout(anthropicStream, {
1427
+ idleTimeoutMs,
1428
+ firstItemTimeoutMs: firstEventTimeoutMs,
1429
+ errorMessage: idleTimeoutAbortError.message,
1430
+ firstItemErrorMessage: firstEventTimeoutAbortError.message,
1431
+ onIdle: () => activeAbortTracker.abortLocally(idleTimeoutAbortError),
1432
+ onFirstItemTimeout: () => activeAbortTracker.abortLocally(firstEventTimeoutAbortError),
1433
+ abortSignal: options?.signal,
1434
+ })) {
1435
+ sawEvent = true;
1436
+
1437
+ if (event.type === "message_start") {
1438
+ if (sawMessageStart) {
1439
+ continue;
1440
+ }
1441
+ sawMessageStart = true;
1442
+ applyAnthropicUsageExtras(output.usage, event.message.usage);
1443
+ output.responseId = event.message.id;
1444
+ output.usage.input = event.message.usage.input_tokens || 0;
1445
+ output.usage.output = event.message.usage.output_tokens || 0;
1446
+ output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
1447
+ output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
1448
+ output.usage.totalTokens =
1449
+ output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
1450
+ calculateCost(model, output.usage);
1451
+ continue;
1452
+ }
1453
+
1454
+ if (!sawMessageStart) {
1455
+ if (shouldIgnoreAnthropicPreambleEvent(event.type)) {
1456
+ continue;
1457
+ }
1458
+ throw createAnthropicStreamEnvelopeError(`received ${event.type} before message_start`);
1459
+ }
1460
+
1461
+ if (event.type === "content_block_start") {
1462
+ if (!firstTokenTime) firstTokenTime = Date.now();
1463
+ if (event.content_block.type === "text") {
1464
+ streamedReplayUnsafeContent = true;
1465
+ const block: Block = {
1466
+ type: "text",
1467
+ text: "",
1468
+ index: event.index,
1469
+ };
1470
+ output.content.push(block);
1471
+ stream.push({
1472
+ type: "text_start",
1473
+ contentIndex: output.content.length - 1,
1474
+ partial: output,
1475
+ });
1476
+ } else if (event.content_block.type === "thinking") {
1477
+ const block: Block = {
1478
+ type: "thinking",
1479
+ thinking: "",
1480
+ thinkingSignature: "",
1481
+ index: event.index,
1482
+ };
1483
+ output.content.push(block);
1484
+ stream.push({
1485
+ type: "thinking_start",
1486
+ contentIndex: output.content.length - 1,
1487
+ partial: output,
1488
+ });
1489
+ } else if (event.content_block.type === "redacted_thinking") {
1490
+ const block: Block = {
1491
+ type: "redactedThinking",
1492
+ data: event.content_block.data,
1493
+ index: event.index,
1494
+ };
1495
+ output.content.push(block);
1496
+ } else if (event.content_block.type === "tool_use") {
1497
+ streamedReplayUnsafeContent = true;
1498
+ const block: Block = {
1499
+ type: "toolCall",
1500
+ id: event.content_block.id,
1501
+ name: isOAuthToken
1502
+ ? stripClaudeToolPrefix(event.content_block.name)
1503
+ : event.content_block.name,
1504
+ arguments: event.content_block.input ?? {},
1505
+ partialJson: "",
1506
+ index: event.index,
1507
+ };
1508
+ output.content.push(block);
1509
+ stream.push({
1510
+ type: "toolcall_start",
1511
+ contentIndex: output.content.length - 1,
1512
+ partial: output,
1513
+ });
1514
+ }
1515
+ } else if (event.type === "content_block_delta") {
1516
+ if (event.delta.type === "text_delta") {
1517
+ const index = blocks.findIndex(b => b.index === event.index);
1518
+ const block = blocks[index];
1519
+ if (block && block.type === "text") {
1520
+ block.text += event.delta.text;
1521
+ stream.push({
1522
+ type: "text_delta",
1523
+ contentIndex: index,
1524
+ delta: event.delta.text,
1525
+ partial: output,
1526
+ });
1527
+ }
1528
+ } else if (event.delta.type === "thinking_delta") {
1529
+ const index = blocks.findIndex(b => b.index === event.index);
1530
+ const block = blocks[index];
1531
+ if (block && block.type === "thinking") {
1532
+ block.thinking += event.delta.thinking;
1533
+ stream.push({
1534
+ type: "thinking_delta",
1535
+ contentIndex: index,
1536
+ delta: event.delta.thinking,
1537
+ partial: output,
1538
+ });
1539
+ }
1540
+ } else if (event.delta.type === "input_json_delta") {
1541
+ const index = blocks.findIndex(b => b.index === event.index);
1542
+ const block = blocks[index];
1543
+ if (block && block.type === "toolCall") {
1544
+ block.partialJson += event.delta.partial_json;
1545
+ const throttled = parseStreamingJsonThrottled(block.partialJson, block.lastParseLen ?? 0);
1546
+ if (throttled) {
1547
+ block.arguments = throttled.value;
1548
+ block.lastParseLen = throttled.parsedLen;
1549
+ }
1550
+ stream.push({
1551
+ type: "toolcall_delta",
1552
+ contentIndex: index,
1553
+ delta: event.delta.partial_json,
1554
+ partial: output,
1555
+ });
1556
+ }
1557
+ } else if (event.delta.type === "signature_delta") {
1558
+ const index = blocks.findIndex(b => b.index === event.index);
1559
+ const block = blocks[index];
1560
+ if (block && block.type === "thinking") {
1561
+ block.thinkingSignature = block.thinkingSignature || "";
1562
+ block.thinkingSignature += event.delta.signature;
1563
+ }
1564
+ }
1565
+ } else if (event.type === "content_block_stop") {
1566
+ const index = blocks.findIndex(b => b.index === event.index);
1567
+ const block = blocks[index];
1568
+ if (block) {
1569
+ delete (block as { index?: number }).index;
1570
+ if (block.type === "text") {
1571
+ stream.push({
1572
+ type: "text_end",
1573
+ contentIndex: index,
1574
+ content: block.text,
1575
+ partial: output,
1576
+ });
1577
+ } else if (block.type === "thinking") {
1578
+ stream.push({
1579
+ type: "thinking_end",
1580
+ contentIndex: index,
1581
+ content: block.thinking,
1582
+ partial: output,
1583
+ });
1584
+ } else if (block.type === "toolCall") {
1585
+ block.arguments = parseStreamingJson(block.partialJson);
1586
+ delete (block as { partialJson?: string }).partialJson;
1587
+ delete (block as { lastParseLen?: number }).lastParseLen;
1588
+ stream.push({
1589
+ type: "toolcall_end",
1590
+ contentIndex: index,
1591
+ toolCall: block,
1592
+ partial: output,
1593
+ });
1594
+ }
1595
+ }
1596
+ } else if (event.type === "message_delta") {
1597
+ const rawStopReason = event.delta.stop_reason;
1598
+ if (rawStopReason) {
1599
+ output.stopReason = mapStopReason(rawStopReason);
1600
+ sawTerminalEnvelope = true;
1601
+ }
1602
+ const stopDetails = event.delta.stop_details;
1603
+ if (stopDetails && stopDetails.type === "refusal") {
1604
+ const explanation = stopDetails.explanation?.trim();
1605
+ const category = stopDetails.category;
1606
+ const label = category ? `Refusal (${category})` : "Refusal";
1607
+ output.errorMessage = explanation ? `${label}: ${explanation}` : label;
1608
+ } else if (output.stopReason === "error" && !output.errorMessage) {
1609
+ // Anthropic flagged an error-class stop (refusal / sensitive) without
1610
+ // populating stop_details. Surface the raw reason instead of falling
1611
+ // through to the generic "unknown error" string when we throw below.
1612
+ output.errorMessage =
1613
+ rawStopReason === "refusal"
1614
+ ? "Refusal (no details provided)"
1615
+ : rawStopReason === "sensitive"
1616
+ ? "Content flagged by safety filters"
1617
+ : `Anthropic stream ended with stop_reason: ${rawStopReason ?? "unknown"}`;
1618
+ }
1619
+ if (event.usage.input_tokens != null) {
1620
+ output.usage.input = event.usage.input_tokens;
1621
+ }
1622
+ if (event.usage.output_tokens != null) {
1623
+ output.usage.output = event.usage.output_tokens;
1624
+ }
1625
+ if (event.usage.cache_read_input_tokens != null) {
1626
+ output.usage.cacheRead = event.usage.cache_read_input_tokens;
1627
+ }
1628
+ if (event.usage.cache_creation_input_tokens != null) {
1629
+ output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
1630
+ }
1631
+ applyAnthropicUsageExtras(output.usage, event.usage);
1632
+ output.usage.totalTokens =
1633
+ output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
1634
+ calculateCost(model, output.usage);
1635
+ } else if (event.type === "message_stop") {
1636
+ sawTerminalEnvelope = true;
1637
+ }
1638
+ }
1639
+
1640
+ const firstEventTimeoutError = activeAbortTracker.getLocalAbortReason();
1641
+ if (firstEventTimeoutError) {
1642
+ throw firstEventTimeoutError;
1643
+ }
1644
+ if (activeAbortTracker.wasCallerAbort()) {
1645
+ throw new Error("Request was aborted");
1646
+ }
1647
+ if (!sawEvent || !sawMessageStart) {
1648
+ throw createAnthropicStreamEnvelopeError("stream ended before message_start");
1649
+ }
1650
+ if (!sawTerminalEnvelope) {
1651
+ throw createAnthropicStreamEnvelopeError("stream ended before terminal stop signal");
1652
+ }
1653
+
1654
+ if (output.stopReason === "aborted" || output.stopReason === "error") {
1655
+ throw new Error(output.errorMessage ?? "An unknown error occurred");
1656
+ }
1657
+ break;
1658
+ } catch (streamError) {
1659
+ const streamFailure = activeAbortTracker.getLocalAbortReason() ?? streamError;
1660
+ if (
1661
+ !disableStrictTools &&
1662
+ firstTokenTime === undefined &&
1663
+ hasStrictAnthropicTools(params) &&
1664
+ isAnthropicStrictGrammarTooLargeError(streamFailure)
1665
+ ) {
1666
+ strictFallbackErrorMessage = await finalizeErrorMessage(streamFailure, rawRequestDump);
1667
+ output.errorMessage = strictFallbackErrorMessage;
1668
+ if (providerSessionState) {
1669
+ providerSessionState.strictToolsDisabled = true;
1670
+ }
1671
+ disableStrictTools = true;
1672
+ params = await prepareParams();
1673
+ providerRetryAttempt = 0;
1674
+ output.content.length = 0;
1675
+ output.responseId = undefined;
1676
+ output.providerPayload = undefined;
1677
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1678
+ output.stopReason = "stop";
1679
+ firstTokenTime = undefined;
1680
+ continue;
1681
+ }
1682
+ if (
1683
+ !dropFastMode &&
1684
+ resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
1685
+ firstTokenTime === undefined &&
1686
+ isAnthropicFastModeUnsupportedError(streamFailure)
1687
+ ) {
1688
+ logger.debug("anthropic: fast mode unsupported, retrying without speed", {
1689
+ model: model.id,
1690
+ error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
1691
+ });
1692
+ if (providerSessionState) {
1693
+ providerSessionState.fastModeDisabled = true;
1694
+ }
1695
+ dropFastMode = true;
1696
+ params = await prepareParams();
1697
+ providerRetryAttempt = 0;
1698
+ output.content.length = 0;
1699
+ output.responseId = undefined;
1700
+ output.providerPayload = undefined;
1701
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1702
+ output.stopReason = "stop";
1703
+ firstTokenTime = undefined;
1704
+ continue;
1705
+ }
1706
+ const isTransientEnvelopeFailure =
1707
+ isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
1708
+ const isLocalIdleTimeout =
1709
+ streamFailure === idleTimeoutAbortError ||
1710
+ (streamFailure instanceof Error && streamFailure.message === idleTimeoutAbortError.message);
1711
+ const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
1712
+ const canRetryProviderFailure =
1713
+ !isLocalIdleTimeout &&
1714
+ firstTokenTime === undefined &&
1715
+ !streamedReplayUnsafeContent &&
1716
+ isProviderRetryableError(streamFailure, model.provider);
1717
+ if (
1718
+ activeAbortTracker.wasCallerAbort() ||
1719
+ providerRetryAttempt >= PROVIDER_MAX_RETRIES ||
1720
+ (!canRetryTransientEnvelopeFailure && !canRetryProviderFailure)
1721
+ ) {
1722
+ throw streamFailure;
1723
+ }
1724
+ providerRetryAttempt++;
1725
+ const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
1726
+ if (options?.providerRetryWait) {
1727
+ await options.providerRetryWait(delayMs, options.signal);
1728
+ } else {
1729
+ await scheduler.wait(delayMs, { signal: options?.signal });
1730
+ }
1731
+ output.content.length = 0;
1732
+ output.responseId = undefined;
1733
+ output.errorMessage = strictFallbackErrorMessage;
1734
+ output.providerPayload = undefined;
1735
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1736
+ output.stopReason = "stop";
1737
+ firstTokenTime = undefined;
1738
+ }
1739
+ }
1740
+
1741
+ output.duration = Date.now() - startTime;
1742
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
1743
+ if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
1744
+ output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
1745
+ }
1746
+ stream.push({ type: "done", reason: output.stopReason, message: output });
1747
+ stream.end();
1748
+ } catch (error) {
1749
+ for (const block of output.content) {
1750
+ delete (block as { index?: number }).index;
1751
+ delete (block as { partialJson?: string }).partialJson;
1752
+ delete (block as { lastParseLen?: number }).lastParseLen;
1753
+ }
1754
+ const firstEventTimeoutError = activeAbortTracker.getLocalAbortReason();
1755
+ output.stopReason = activeAbortTracker.wasCallerAbort() ? "aborted" : "error";
1756
+ output.errorStatus = extractHttpStatusFromError(error);
1757
+ output.errorMessage = firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
1758
+ output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
1759
+ output.duration = Date.now() - startTime;
1760
+ if (firstTokenTime) output.ttft = firstTokenTime - startTime;
1761
+ stream.push({ type: "error", reason: output.stopReason, error: output });
1762
+ stream.end();
1763
+ }
1764
+ })();
1765
+
1766
+ return stream;
1767
+ };
1768
+
1769
+ export type AnthropicSystemBlock = {
1770
+ type: "text";
1771
+ text: string;
1772
+ cache_control?: AnthropicCacheControl;
1773
+ };
1774
+ type SystemBlockOptions = {
1775
+ includeClaudeCodeInstruction?: boolean;
1776
+ extraInstructions?: string[];
1777
+ /** Text of the first user message — used as fingerprint seed for the billing header. */
1778
+ firstUserMessageText?: string;
1779
+ cacheControl?: AnthropicCacheControl;
1780
+ };
1781
+
1782
+ function withGlobalCacheScope(cacheControl: AnthropicCacheControl): AnthropicCacheControl {
1783
+ return { ...cacheControl, scope: "global" };
1784
+ }
1785
+
1786
+ function applyClaudeCodeSystemCache(
1787
+ blocks: AnthropicSystemBlock[],
1788
+ cacheControl: AnthropicCacheControl | undefined,
1789
+ ): number {
1790
+ if (!cacheControl || blocks.length <= 2) return 0;
1791
+ blocks[2] = { ...blocks[2], cache_control: withGlobalCacheScope(cacheControl) };
1792
+ if (blocks.length === 3) return 1;
1793
+ const lastIndex = blocks.length - 1;
1794
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
1795
+ return 2;
1796
+ }
1797
+
1798
+ export function buildAnthropicSystemBlocks(
1799
+ systemPrompt: readonly string[] | undefined,
1800
+ options: SystemBlockOptions = {},
1801
+ ): AnthropicSystemBlock[] | undefined {
1802
+ const { includeClaudeCodeInstruction = false, extraInstructions = [], firstUserMessageText, cacheControl } = options;
1803
+ const sanitizedPrompts = normalizeSystemPrompts(systemPrompt);
1804
+ const trimmedInstructions = extraInstructions.map(instruction => instruction.trim()).filter(Boolean);
1805
+ const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.includes(CLAUDE_BILLING_HEADER_PREFIX));
1806
+
1807
+ if (includeClaudeCodeInstruction && !hasBillingHeader) {
1808
+ const blocks: AnthropicSystemBlock[] = [
1809
+ { type: "text", text: createClaudeBillingHeader(firstUserMessageText ?? "") },
1810
+ { type: "text", text: claudeCodeSystemInstruction },
1811
+ ];
1812
+
1813
+ for (const instruction of trimmedInstructions) {
1814
+ blocks.push({ type: "text", text: instruction });
1815
+ }
1816
+ for (const prompt of sanitizedPrompts) {
1817
+ blocks.push({ type: "text", text: prompt });
1818
+ }
1819
+ applyClaudeCodeSystemCache(blocks, cacheControl);
1820
+
1821
+ return blocks;
1822
+ }
1823
+
1824
+ const blocks: AnthropicSystemBlock[] = [];
1825
+ for (const instruction of trimmedInstructions) {
1826
+ blocks.push({ type: "text", text: instruction });
1827
+ }
1828
+ for (const prompt of sanitizedPrompts) {
1829
+ blocks.push({ type: "text", text: prompt });
1830
+ }
1831
+ const lastIndex = blocks.length - 1;
1832
+ if (cacheControl && lastIndex >= 0) {
1833
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
1834
+ }
1835
+ return blocks.length > 0 ? blocks : undefined;
1836
+ }
1837
+
1838
+ export function normalizeExtraBetas(betas?: string[] | string): string[] {
1839
+ if (!betas) return [];
1840
+ const raw = Array.isArray(betas) ? betas : betas.split(",");
1841
+ return raw.map(beta => beta.trim()).filter(beta => beta.length > 0);
1842
+ }
1843
+
1844
+ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): AnthropicClientOptionsResult {
1845
+ const {
1846
+ model,
1847
+ apiKey,
1848
+ extraBetas = [],
1849
+ stream = true,
1850
+ interleavedThinking = true,
1851
+ headers,
1852
+ dynamicHeaders,
1853
+ hasTools = false,
1854
+ thinkingEnabled = false,
1855
+ thinkingDisplay,
1856
+ isOAuth,
1857
+ onSseEvent,
1858
+ claudeCodeSessionId,
1859
+ } = args;
1860
+ const compat = getAnthropicCompat(model);
1861
+ const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinkingDisplay(model.id);
1862
+ const needsFineGrainedToolStreamingBeta = hasTools && !compat.supportsEagerToolInputStreaming;
1863
+ const oauthToken = isOAuth ?? isAnthropicOAuthToken(apiKey);
1864
+ const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
1865
+ const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
1866
+ const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
1867
+ const baseFetch = args.fetch ?? fetch;
1868
+ // Only OAuth requests inject the CC billing header; no API-key request can ever
1869
+ // contain it, so there is no need to install the rewriter for those.
1870
+ const cchFetch = oauthToken ? wrapFetchForCch(baseFetch) : baseFetch;
1871
+ const debugFetch = onSseEvent ? wrapFetchForSseDebug(cchFetch, event => onSseEvent(event, model)) : cchFetch;
1872
+ if (model.provider === "github-copilot") {
1873
+ const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
1874
+ const betaFeatures = [...extraBetas];
1875
+ if (needsFineGrainedToolStreamingBeta) {
1876
+ betaFeatures.push(fineGrainedToolStreamingBeta);
1877
+ }
1878
+ const defaultHeaders = mergeHeaders(
1879
+ {
1880
+ Accept: stream ? "text/event-stream" : "application/json",
1881
+ "Anthropic-Dangerous-Direct-Browser-Access": "true",
1882
+ Authorization: `Bearer ${copilotApiKey}`,
1883
+ ...(betaFeatures.length > 0 ? { "anthropic-beta": buildBetaHeader([], betaFeatures) } : {}),
1884
+ },
1885
+ model.headers,
1886
+ dynamicHeaders,
1887
+ headers,
1888
+ );
1889
+
1890
+ return {
1891
+ isOAuthToken: false,
1892
+ apiKey: null,
1893
+ authToken: copilotApiKey,
1894
+ baseURL: baseUrl,
1895
+ maxRetries: 5,
1896
+ defaultHeaders,
1897
+ fetch: debugFetch,
1898
+ ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
1899
+ };
1900
+ }
1901
+
1902
+ const betaFeatures = [...extraBetas];
1903
+ if (needsFineGrainedToolStreamingBeta) {
1904
+ betaFeatures.push(fineGrainedToolStreamingBeta);
1905
+ }
1906
+ if (needsInterleavedBeta) {
1907
+ betaFeatures.push(interleavedThinkingBeta);
1908
+ }
1909
+
1910
+ const defaultHeaders = buildAnthropicHeaders({
1911
+ apiKey,
1912
+ baseUrl,
1913
+ isOAuth: oauthToken,
1914
+ extraBetas: betaFeatures,
1915
+ stream,
1916
+ modelHeaders: mergeHeaders(model.headers, foundryCustomHeaders, headers, dynamicHeaders),
1917
+ isCloudflareAiGateway: model.provider === "cloudflare-ai-gateway",
1918
+ claudeCodeSessionId,
1919
+ claudeCodeBetas: oauthToken
1920
+ ? buildClaudeCodeBetas(hasTools || thinkingEnabled, thinkingEnabled, thinkingDisplay === "omitted")
1921
+ : [],
1922
+ });
1923
+
1924
+ if (model.provider === "cloudflare-ai-gateway") {
1925
+ return {
1926
+ isOAuthToken: false,
1927
+ apiKey: null,
1928
+ authToken: null,
1929
+ baseURL: baseUrl,
1930
+ maxRetries: 5,
1931
+ defaultHeaders,
1932
+ fetch: debugFetch,
1933
+ };
1934
+ }
1935
+
1936
+ // OpenCode's Anthropic-compatible gateway accepts bearer auth only; leaving
1937
+ // apiKey set lets the SDK add X-Api-Key, which upstream Alibaba rejects.
1938
+ if (model.provider === "opencode-go" || model.provider === "opencode-zen") {
1939
+ return {
1940
+ isOAuthToken: false,
1941
+ apiKey: null,
1942
+ authToken: null,
1943
+ baseURL: baseUrl,
1944
+ maxRetries: 5,
1945
+ defaultHeaders,
1946
+ ...(debugFetch ? { fetch: debugFetch } : {}),
1947
+ ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
1948
+ };
1949
+ }
1950
+
1951
+ return {
1952
+ isOAuthToken: oauthToken,
1953
+ apiKey: oauthToken ? null : apiKey,
1954
+ authToken: oauthToken ? apiKey : undefined,
1955
+ baseURL: baseUrl,
1956
+ maxRetries: 5,
1957
+ defaultHeaders,
1958
+ fetch: debugFetch,
1959
+ ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
1960
+ };
1961
+ }
1962
+
1963
+ function createClient(
1964
+ model: Model<"anthropic-messages">,
1965
+ args: AnthropicClientOptionsArgs,
1966
+ ): { client: AnthropicMessagesClient; isOAuthToken: boolean } {
1967
+ const { isOAuthToken: oauthToken, ...clientOptions } = buildAnthropicClientOptions({ ...args, model });
1968
+ const client = new AnthropicMessagesClient(clientOptions);
1969
+ return { client, isOAuthToken: oauthToken };
1970
+ }
1971
+
1972
+ function disableThinkingIfToolChoiceForced(params: MessageCreateParamsStreaming): void {
1973
+ const toolChoice = params.tool_choice;
1974
+ if (!toolChoice) return;
1975
+ if (toolChoice.type !== "any" && toolChoice.type !== "tool") return;
1976
+
1977
+ delete params.thinking;
1978
+ const outputConfig = params.output_config as AnthropicOutputConfig | undefined;
1979
+ if (!outputConfig) return;
1980
+
1981
+ delete outputConfig.effort;
1982
+ if (Object.keys(outputConfig).length === 0) {
1983
+ delete params.output_config;
1984
+ }
1985
+ }
1986
+
1987
+ function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, model: Model<"anthropic-messages">): void {
1988
+ const thinking = params.thinking;
1989
+ if (thinking?.type !== "enabled") return;
1990
+
1991
+ const budgetTokens = thinking.budget_tokens ?? 0;
1992
+ if (budgetTokens <= 0) return;
1993
+
1994
+ const maxTokens = params.max_tokens ?? 0;
1995
+ const requiredMaxTokens = budgetTokens + OUTPUT_FALLBACK_BUFFER;
1996
+ if (maxTokens < requiredMaxTokens) {
1997
+ params.max_tokens = Math.min(requiredMaxTokens, model.maxTokens);
1998
+ }
1999
+ }
2000
+
2001
+ type CacheControlBlock = {
2002
+ cache_control?: AnthropicCacheControl | null;
2003
+ };
2004
+
2005
+ function applyCacheControlToLastBlock<T extends CacheControlBlock>(
2006
+ blocks: T[],
2007
+ cacheControl: AnthropicCacheControl,
2008
+ ): void {
2009
+ if (blocks.length === 0) return;
2010
+ const lastIndex = blocks.length - 1;
2011
+ blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
2012
+ }
2013
+
2014
+ function applyCacheControlToLastTextBlock(
2015
+ blocks: Array<ContentBlockParam & CacheControlBlock>,
2016
+ cacheControl: AnthropicCacheControl,
2017
+ ): void {
2018
+ if (blocks.length === 0) return;
2019
+ for (let i = blocks.length - 1; i >= 0; i--) {
2020
+ if (blocks[i].type === "text") {
2021
+ blocks[i] = { ...blocks[i], cache_control: cacheControl };
2022
+ return;
2023
+ }
2024
+ }
2025
+ applyCacheControlToLastBlock(blocks, cacheControl);
2026
+ }
2027
+
2028
+ function applyPromptCaching(params: MessageCreateParamsStreaming, cacheControl?: AnthropicCacheControl): void {
2029
+ if (!cacheControl) return;
2030
+
2031
+ // Skip if cache_control breakpoints were already placed externally on messages.
2032
+ for (const message of params.messages) {
2033
+ if (Array.isArray(message.content)) {
2034
+ if ((message.content as Array<ContentBlockParam & CacheControlBlock>).some(b => b.cache_control != null))
2035
+ return;
2036
+ }
2037
+ }
2038
+
2039
+ const MAX_CACHE_BREAKPOINTS = 4;
2040
+ let cacheBreakpointsUsed = 0;
2041
+ let isCCLayout = false;
2042
+
2043
+ if (params.system && Array.isArray(params.system) && params.system.length > 0) {
2044
+ isCCLayout =
2045
+ params.system.length >= 3 &&
2046
+ (params.system[0] as { text?: string }).text?.startsWith(CLAUDE_BILLING_HEADER_PREFIX) === true;
2047
+ if (isCCLayout) {
2048
+ cacheBreakpointsUsed += applyClaudeCodeSystemCache(params.system as AnthropicSystemBlock[], cacheControl);
2049
+ } else {
2050
+ applyCacheControlToLastBlock(params.system, cacheControl);
2051
+ cacheBreakpointsUsed++;
2052
+ }
2053
+ }
2054
+
2055
+ if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
2056
+
2057
+ const start = isCCLayout ? Math.max(0, params.messages.length - 1) : Math.max(0, params.messages.length - 2);
2058
+ for (let i = start; i < params.messages.length; i++) {
2059
+ if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) break;
2060
+ const message = params.messages[i];
2061
+ if (!message) continue;
2062
+ if (typeof message.content === "string") {
2063
+ message.content = [{ type: "text", text: message.content, cache_control: cacheControl }];
2064
+ cacheBreakpointsUsed++;
2065
+ } else if (Array.isArray(message.content) && message.content.length > 0) {
2066
+ applyCacheControlToLastTextBlock(
2067
+ message.content as Array<ContentBlockParam & CacheControlBlock>,
2068
+ cacheControl,
2069
+ );
2070
+ cacheBreakpointsUsed++;
2071
+ }
2072
+ }
2073
+ }
2074
+
2075
+ function normalizeCacheControlBlockTtl(block: CacheControlBlock, seenFiveMinute: { value: boolean }): void {
2076
+ const cacheControl = block.cache_control;
2077
+ if (!cacheControl) return;
2078
+ if (cacheControl.ttl !== "1h") {
2079
+ seenFiveMinute.value = true;
2080
+ return;
2081
+ }
2082
+ if (seenFiveMinute.value) {
2083
+ delete cacheControl.ttl;
2084
+ }
2085
+ }
2086
+
2087
+ function normalizeCacheControlTtlOrdering(params: MessageCreateParamsStreaming): void {
2088
+ const seenFiveMinute = { value: false };
2089
+ if (params.tools) {
2090
+ for (const tool of params.tools as Array<AnthropicWireTool & CacheControlBlock>) {
2091
+ normalizeCacheControlBlockTtl(tool, seenFiveMinute);
2092
+ }
2093
+ }
2094
+ if (params.system && Array.isArray(params.system)) {
2095
+ for (const block of params.system as Array<AnthropicSystemBlock & CacheControlBlock>) {
2096
+ normalizeCacheControlBlockTtl(block, seenFiveMinute);
2097
+ }
2098
+ }
2099
+ for (const message of params.messages) {
2100
+ if (!Array.isArray(message.content)) continue;
2101
+ for (const block of message.content as Array<ContentBlockParam & CacheControlBlock>) {
2102
+ normalizeCacheControlBlockTtl(block, seenFiveMinute);
2103
+ }
2104
+ }
2105
+ }
2106
+
2107
+ function findLastCacheControlIndex<T extends CacheControlBlock>(blocks: T[]): number {
2108
+ for (let index = blocks.length - 1; index >= 0; index--) {
2109
+ if (blocks[index]?.cache_control != null) return index;
2110
+ }
2111
+ return -1;
2112
+ }
2113
+
2114
+ function stripCacheControlExceptIndex<T extends CacheControlBlock>(
2115
+ blocks: T[],
2116
+ preserveIndex: number,
2117
+ excessCounter: { value: number },
2118
+ ): void {
2119
+ for (let index = 0; index < blocks.length && excessCounter.value > 0; index++) {
2120
+ if (index === preserveIndex) continue;
2121
+ if (!blocks[index]?.cache_control) continue;
2122
+ delete blocks[index].cache_control;
2123
+ excessCounter.value--;
2124
+ }
2125
+ }
2126
+
2127
+ function stripAllCacheControl<T extends CacheControlBlock>(blocks: T[], excessCounter: { value: number }): void {
2128
+ for (const block of blocks) {
2129
+ if (excessCounter.value <= 0) return;
2130
+ if (!block.cache_control) continue;
2131
+ delete block.cache_control;
2132
+ excessCounter.value--;
2133
+ }
2134
+ }
2135
+
2136
+ function stripMessageCacheControl(
2137
+ messages: MessageCreateParamsStreaming["messages"],
2138
+ excessCounter: { value: number },
2139
+ ): void {
2140
+ for (const message of messages) {
2141
+ if (excessCounter.value <= 0) return;
2142
+ if (!Array.isArray(message.content)) continue;
2143
+ for (const block of message.content as Array<ContentBlockParam & CacheControlBlock>) {
2144
+ if (excessCounter.value <= 0) return;
2145
+ if (!block.cache_control) continue;
2146
+ delete block.cache_control;
2147
+ excessCounter.value--;
2148
+ }
2149
+ }
2150
+ }
2151
+
2152
+ function countCacheControlBreakpoints(params: MessageCreateParamsStreaming): number {
2153
+ let total = 0;
2154
+ if (params.tools) {
2155
+ for (const tool of params.tools as Array<AnthropicWireTool & CacheControlBlock>) {
2156
+ if (tool.cache_control) total++;
2157
+ }
2158
+ }
2159
+ if (params.system && Array.isArray(params.system)) {
2160
+ for (const block of params.system as Array<AnthropicSystemBlock & CacheControlBlock>) {
2161
+ if (block.cache_control) total++;
2162
+ }
2163
+ }
2164
+ for (const message of params.messages) {
2165
+ if (!Array.isArray(message.content)) continue;
2166
+ for (const block of message.content as Array<ContentBlockParam & CacheControlBlock>) {
2167
+ if (block.cache_control) total++;
2168
+ }
2169
+ }
2170
+ return total;
2171
+ }
2172
+
2173
+ function enforceCacheControlLimit(params: MessageCreateParamsStreaming, maxBreakpoints: number): void {
2174
+ const total = countCacheControlBreakpoints(params);
2175
+ if (total <= maxBreakpoints) return;
2176
+ const excessCounter = { value: total - maxBreakpoints };
2177
+ const systemBlocks =
2178
+ params.system && Array.isArray(params.system)
2179
+ ? (params.system as Array<AnthropicSystemBlock & CacheControlBlock>)
2180
+ : [];
2181
+ const toolBlocks = (params.tools ?? []) as Array<AnthropicWireTool & CacheControlBlock>;
2182
+ const lastSystemIndex = findLastCacheControlIndex(systemBlocks);
2183
+ const lastToolIndex = findLastCacheControlIndex(toolBlocks);
2184
+ if (systemBlocks.length > 0) {
2185
+ stripCacheControlExceptIndex(systemBlocks, lastSystemIndex, excessCounter);
2186
+ }
2187
+ if (excessCounter.value <= 0) return;
2188
+ if (toolBlocks.length > 0) {
2189
+ stripCacheControlExceptIndex(toolBlocks, lastToolIndex, excessCounter);
2190
+ }
2191
+ if (excessCounter.value <= 0) return;
2192
+ stripMessageCacheControl(params.messages, excessCounter);
2193
+ if (excessCounter.value <= 0) return;
2194
+ if (systemBlocks.length > 0) {
2195
+ stripAllCacheControl(systemBlocks, excessCounter);
2196
+ }
2197
+ if (excessCounter.value <= 0) return;
2198
+ if (toolBlocks.length > 0) {
2199
+ stripAllCacheControl(toolBlocks, excessCounter);
2200
+ }
2201
+ }
2202
+
2203
+ function resolveAnthropicAdaptiveEffort(
2204
+ model: Model<"anthropic-messages">,
2205
+ options: AnthropicOptions,
2206
+ ): AnthropicEffort | undefined {
2207
+ if (options.effort) return options.effort;
2208
+ const requestedEffort = options.reasoning;
2209
+ if (!requestedEffort) return undefined;
2210
+ return mapEffortToAnthropicAdaptiveEffort(model, requestedEffort);
2211
+ }
2212
+
2213
+ function startsWithAfterAsciiWhitespace(value: string, prefix: string): boolean {
2214
+ let index = 0;
2215
+ while (index < value.length) {
2216
+ const code = value.charCodeAt(index);
2217
+ if (code !== 9 && code !== 10 && code !== 13 && code !== 32) break;
2218
+ index++;
2219
+ }
2220
+ return value.startsWith(prefix, index);
2221
+ }
2222
+
2223
+ function isClaudeSyntheticUserText(value: string): boolean {
2224
+ return startsWithAfterAsciiWhitespace(value, "<system-reminder>");
2225
+ }
2226
+
2227
+ function extractClaudeCodeFirstUserMessageText(messages: readonly Message[]): string {
2228
+ for (const message of messages) {
2229
+ if (message.role !== "user") continue;
2230
+ const { content } = message;
2231
+ if (typeof content === "string") return content;
2232
+ if (!Array.isArray(content)) return "";
2233
+ let fallback: string | undefined;
2234
+ for (const block of content) {
2235
+ if (block.type !== "text") continue;
2236
+ fallback ??= block.text;
2237
+ if (!isClaudeSyntheticUserText(block.text)) return block.text;
2238
+ }
2239
+ return fallback ?? "";
2240
+ }
2241
+ return "";
2242
+ }
2243
+
2244
+ function applyClaudeCodeContextManagement(params: MessageCreateParamsStreaming, isOAuthToken: boolean): void {
2245
+ if (!isOAuthToken || params.thinking?.type !== "adaptive") return;
2246
+ params.context_management = {
2247
+ edits: [{ type: "clear_thinking_20251015", keep: "all" }],
2248
+ };
2249
+ }
2250
+
2251
+ function buildParams(
2252
+ model: Model<"anthropic-messages">,
2253
+ baseUrl: string,
2254
+ context: Context,
2255
+ isOAuthToken: boolean,
2256
+ options?: AnthropicOptions,
2257
+ disableStrictTools = false,
2258
+ ): MessageCreateParamsStreaming {
2259
+ const { cacheControl } = getCacheControl(model, baseUrl, options?.cacheRetention, isOAuthToken);
2260
+ const params: MessageCreateParamsStreaming = {
2261
+ model: model.id,
2262
+ messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
2263
+ max_tokens: options?.maxTokens || model.maxTokens,
2264
+ stream: true,
2265
+ };
2266
+ if (options?.temperature !== undefined && !options?.thinkingEnabled) {
2267
+ params.temperature = options.temperature;
2268
+ }
2269
+
2270
+ if (options?.topP !== undefined) {
2271
+ params.top_p = options.topP;
2272
+ }
2273
+ if (options?.topK !== undefined) {
2274
+ params.top_k = options.topK;
2275
+ }
2276
+ if (options?.stopSequences?.length) {
2277
+ const seqs = options.stopSequences;
2278
+ if (seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX && !warnedStopSequencesTrim) {
2279
+ warnedStopSequencesTrim = true;
2280
+ logger.warn("anthropic: stop_sequences exceeds 4; extra entries dropped", {
2281
+ received: seqs.length,
2282
+ kept: ANTHROPIC_STOP_SEQUENCES_MAX,
2283
+ });
2284
+ }
2285
+ params.stop_sequences =
2286
+ seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX ? seqs.slice(0, ANTHROPIC_STOP_SEQUENCES_MAX) : seqs;
2287
+ }
2288
+
2289
+ // Opus 4.7+ rejects non-default sampling parameters with 400 error.
2290
+ if (hasOpus47ApiRestrictions(model.id)) {
2291
+ delete params.top_p;
2292
+ delete params.top_k;
2293
+ delete params.temperature;
2294
+ }
2295
+
2296
+ if (context.tools) {
2297
+ params.tools = convertTools(
2298
+ context.tools,
2299
+ isOAuthToken,
2300
+ disableStrictTools || model.provider === "github-copilot",
2301
+ getAnthropicCompat(model).supportsEagerToolInputStreaming,
2302
+ );
2303
+ } else if (isOAuthToken) {
2304
+ params.tools = [];
2305
+ }
2306
+
2307
+ if (model.reasoning) {
2308
+ if (options?.thinkingEnabled) {
2309
+ const mode = model.thinking?.mode;
2310
+ const effort = resolveAnthropicAdaptiveEffort(model, options);
2311
+
2312
+ const compat = getAnthropicCompat(model);
2313
+ if (mode === "anthropic-adaptive" && !compat.disableAdaptiveThinking) {
2314
+ const adaptive: { type: "adaptive"; display?: AnthropicThinkingDisplay } = { type: "adaptive" };
2315
+ // Starting with Claude Opus 4.7, adaptive thinking content is omitted from the
2316
+ // response by default. Opt into summarized reasoning so thinking deltas keep
2317
+ // streaming with human-readable content for callers that rely on it.
2318
+ if (options.thinkingDisplay !== undefined || supportsAdaptiveThinkingDisplay(model.id)) {
2319
+ adaptive.display = options.thinkingDisplay ?? "summarized";
2320
+ }
2321
+ params.thinking = adaptive;
2322
+ if (effort) {
2323
+ getAnthropicOutputConfig(params).effort = effort;
2324
+ }
2325
+ } else {
2326
+ params.thinking = {
2327
+ type: "enabled",
2328
+ budget_tokens: options.thinkingBudgetTokens || 1024,
2329
+ display: options.thinkingDisplay ?? "summarized",
2330
+ };
2331
+ if (mode === "anthropic-budget-effort" && effort) {
2332
+ getAnthropicOutputConfig(params).effort = effort;
2333
+ }
2334
+ }
2335
+ } else if (options?.thinkingEnabled === false) {
2336
+ params.thinking = { type: "disabled" };
2337
+ }
2338
+ }
2339
+
2340
+ if (options?.taskBudget) {
2341
+ getAnthropicOutputConfig(params).task_budget = options.taskBudget;
2342
+ }
2343
+ const metadataUserId = resolveAnthropicMetadataUserId(options?.metadata?.user_id, isOAuthToken, options?.sessionId);
2344
+ if (metadataUserId) {
2345
+ params.metadata = { user_id: metadataUserId };
2346
+ }
2347
+
2348
+ if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
2349
+ params.speed = "fast";
2350
+ }
2351
+
2352
+ if (options?.toolChoice) {
2353
+ if (typeof options.toolChoice === "string") {
2354
+ params.tool_choice = { type: options.toolChoice };
2355
+ } else if (isOAuthToken && options.toolChoice.name) {
2356
+ params.tool_choice = { ...options.toolChoice, name: applyClaudeToolPrefix(options.toolChoice.name) };
2357
+ } else {
2358
+ params.tool_choice = options.toolChoice;
2359
+ }
2360
+ }
2361
+
2362
+ // Claude Opus 4.8 must emit at most one tool call per turn. Force
2363
+ // `disable_parallel_tool_use` onto the outgoing tool_choice (synthesizing an
2364
+ // `auto` choice when none is set). Gated on tools being present: Anthropic
2365
+ // rejects `tool_choice` without `tools`, and parallelism is moot otherwise.
2366
+ // `none` rejects the field, so leave it untouched. A fresh object is built
2367
+ // rather than mutated so the caller's `options.toolChoice` is never aliased.
2368
+ if (disablesParallelToolUse(model.id) && params.tools && params.tools.length > 0) {
2369
+ const current = params.tool_choice;
2370
+ if (!current) {
2371
+ params.tool_choice = { type: "auto", disable_parallel_tool_use: true };
2372
+ } else if (current.type !== "none") {
2373
+ params.tool_choice = { ...current, disable_parallel_tool_use: true };
2374
+ }
2375
+ }
2376
+
2377
+ const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
2378
+ const firstUserMessageText = shouldInjectClaudeCodeInstruction
2379
+ ? extractClaudeCodeFirstUserMessageText(context.messages)
2380
+ : "";
2381
+ const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
2382
+ includeClaudeCodeInstruction: shouldInjectClaudeCodeInstruction,
2383
+ firstUserMessageText,
2384
+ });
2385
+ if (systemBlocks) {
2386
+ params.system = systemBlocks;
2387
+ }
2388
+ disableThinkingIfToolChoiceForced(params);
2389
+ applyClaudeCodeContextManagement(params, isOAuthToken);
2390
+ ensureMaxTokensForThinking(params, model);
2391
+ applyPromptCaching(params, cacheControl);
2392
+ enforceCacheControlLimit(params, 4);
2393
+ normalizeCacheControlTtlOrdering(params);
2394
+
2395
+ return params;
2396
+ }
2397
+
2398
+ /**
2399
+ * Z.AI's Anthropic-compatible proxy at `api.z.ai/api/anthropic` deserializes
2400
+ * tool_result blocks into a Python class that accesses `.id`, even though
2401
+ * Anthropic's standard tool_result schema only carries `tool_use_id`. Detect
2402
+ * that endpoint so we can emit the non-standard alias for it without
2403
+ * polluting requests to api.anthropic.com or other compatible proxies.
2404
+ * See: https://github.com/eminent337/aery/issues/814
2405
+ */
2406
+ function isZaiAnthropicEndpoint(model: Model<"anthropic-messages">): boolean {
2407
+ if (model.provider === "zai") return true;
2408
+ const baseUrl = model.baseUrl;
2409
+ if (!baseUrl) return false;
2410
+ try {
2411
+ return new URL(baseUrl).hostname.toLowerCase() === "api.z.ai";
2412
+ } catch {
2413
+ return false;
2414
+ }
2415
+ }
2416
+
2417
+ /**
2418
+ * Returns true for providers whose Anthropic-compatible endpoints do NOT
2419
+ * implement signature-based thinking-chain integrity (DeepSeek, Z.AI, etc.).
2420
+ * For these providers, unsigned thinking blocks must be preserved as
2421
+ * `type: "thinking"` instead of being degraded to text.
2422
+ */
2423
+ function isNonSigningAnthropicEndpoint(model: Model<"anthropic-messages">): boolean {
2424
+ // Known non-signing providers
2425
+ if (model.provider === "zai" || model.provider === "deepseek") return true;
2426
+ const baseUrl = model.baseUrl;
2427
+ if (!baseUrl) return false;
2428
+ try {
2429
+ const hostname = new URL(baseUrl).hostname.toLowerCase();
2430
+ return hostname === "api.deepseek.com" || hostname.endsWith(".deepseek.com");
2431
+ } catch {
2432
+ return false;
2433
+ }
2434
+ }
2435
+
2436
+ function buildToolResultBlock(model: Model<"anthropic-messages">, msg: ToolResultMessage): ContentBlockParam {
2437
+ const block: ContentBlockParam = {
2438
+ type: "tool_result",
2439
+ tool_use_id: msg.toolCallId,
2440
+ content: convertContentBlocks(msg.content, model.input.includes("image")),
2441
+ is_error: msg.isError,
2442
+ };
2443
+ if (isZaiAnthropicEndpoint(model)) {
2444
+ // Z.AI workaround (issue #814): include `id` aliased to `tool_use_id`.
2445
+ (block as unknown as Record<string, unknown>).id = msg.toolCallId;
2446
+ }
2447
+ return block;
2448
+ }
2449
+
2450
+ /**
2451
+ * A single Anthropic conversation turn, including the mid-conversation
2452
+ * `system` role (Opus 4.8+).
2453
+ */
2454
+ export type AnthropicMessageParam = MessageParam;
2455
+
2456
+ export function convertAnthropicMessages(
2457
+ messages: Message[],
2458
+ model: Model<"anthropic-messages">,
2459
+ isOAuthToken: boolean,
2460
+ ): AnthropicMessageParam[] {
2461
+ const params: AnthropicMessageParam[] = [];
2462
+ // Indices of params emitted from `developer` messages. After the main pass,
2463
+ // the ones whose placement satisfies Anthropic's mid-conversation rules are
2464
+ // upgraded from the `user` role to the authoritative `system` role.
2465
+ const developerParamIndices: number[] = [];
2466
+
2467
+ const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
2468
+
2469
+ for (let i = 0; i < transformedMessages.length; i++) {
2470
+ const msg = transformedMessages[i];
2471
+
2472
+ if (msg.role === "user" || msg.role === "developer") {
2473
+ if (!msg.content) continue;
2474
+
2475
+ let content: string | ContentBlockParam[];
2476
+ if (typeof msg.content === "string") {
2477
+ if (msg.content.trim().length === 0) continue;
2478
+ content = msg.content.toWellFormed();
2479
+ } else {
2480
+ const contentBlocks = convertContentBlocks(msg.content, model.input.includes("image"));
2481
+ if (typeof contentBlocks === "string") {
2482
+ if (contentBlocks.trim().length === 0) continue;
2483
+ content = contentBlocks;
2484
+ } else {
2485
+ if (contentBlocks.length === 0) continue;
2486
+ content = contentBlocks;
2487
+ }
2488
+ }
2489
+ if (msg.role === "developer") developerParamIndices.push(params.length);
2490
+ params.push({ role: "user", content });
2491
+ } else if (msg.role === "assistant") {
2492
+ const blocks: ContentBlockParam[] = [];
2493
+ const hasSignedThinking = msg.content.some(
2494
+ block =>
2495
+ block.type === "thinking" && !!block.thinkingSignature && block.thinkingSignature.trim().length > 0,
2496
+ );
2497
+
2498
+ for (const block of msg.content) {
2499
+ if (block.type === "text") {
2500
+ if (block.text.trim().length === 0) continue;
2501
+ blocks.push({
2502
+ type: "text",
2503
+ text: block.text.toWellFormed(),
2504
+ });
2505
+ } else if (block.type === "thinking") {
2506
+ if (hasSignedThinking) {
2507
+ if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
2508
+ if (block.thinking.trim().length === 0) continue;
2509
+ blocks.push({
2510
+ type: "text",
2511
+ text: block.thinking.toWellFormed(),
2512
+ });
2513
+ continue;
2514
+ }
2515
+ blocks.push({
2516
+ type: "thinking",
2517
+ thinking: block.thinking,
2518
+ signature: block.thinkingSignature,
2519
+ });
2520
+ continue;
2521
+ }
2522
+ if (block.thinking.trim().length === 0) continue;
2523
+ if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
2524
+ if (isNonSigningAnthropicEndpoint(model)) {
2525
+ blocks.push({
2526
+ type: "thinking",
2527
+ thinking: block.thinking.toWellFormed(),
2528
+ signature: "",
2529
+ });
2530
+ } else {
2531
+ blocks.push({
2532
+ type: "text",
2533
+ text: block.thinking.toWellFormed(),
2534
+ });
2535
+ }
2536
+ } else {
2537
+ blocks.push({
2538
+ type: "thinking",
2539
+ thinking: block.thinking.toWellFormed(),
2540
+ signature: block.thinkingSignature,
2541
+ });
2542
+ }
2543
+ } else if (block.type === "redactedThinking") {
2544
+ if (block.data.trim().length === 0) continue;
2545
+ blocks.push({
2546
+ type: "redacted_thinking",
2547
+ data: block.data,
2548
+ });
2549
+ } else if (block.type === "toolCall") {
2550
+ blocks.push({
2551
+ type: "tool_use",
2552
+ id: block.id,
2553
+ name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
2554
+ input: block.arguments ?? {},
2555
+ });
2556
+ }
2557
+ }
2558
+ if (blocks.length === 0) continue;
2559
+ params.push({
2560
+ role: "assistant",
2561
+ content: blocks,
2562
+ });
2563
+ } else if (msg.role === "toolResult") {
2564
+ // Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
2565
+ const toolResults: ContentBlockParam[] = [];
2566
+
2567
+ // Add the current tool result
2568
+ toolResults.push(buildToolResultBlock(model, msg));
2569
+
2570
+ // Look ahead for consecutive toolResult messages
2571
+ let j = i + 1;
2572
+ while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
2573
+ const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
2574
+ toolResults.push(buildToolResultBlock(model, nextMsg));
2575
+ j++;
2576
+ }
2577
+
2578
+ // Skip the messages we've already processed
2579
+ i = j - 1;
2580
+
2581
+ // Add a single user message with all tool results
2582
+ params.push({
2583
+ role: "user",
2584
+ content: toolResults,
2585
+ });
2586
+ }
2587
+ }
2588
+
2589
+ // Upgrade developer-origin params to mid-conversation `system` messages where
2590
+ // Anthropic's placement rules allow it (Opus 4.8+ on the first-party API).
2591
+ // Rules: a system message must immediately follow a `user` turn and must be
2592
+ // the last entry or be followed by an `assistant` turn — never first, and
2593
+ // never consecutive. Requiring the next param to be `assistant` (or absent)
2594
+ // covers both the "followed by assistant / last" and "no consecutive system"
2595
+ // constraints. Anything that does not qualify stays a `user` message.
2596
+ if (developerParamIndices.length > 0 && getAnthropicCompat(model).supportsMidConversationSystem) {
2597
+ for (const idx of developerParamIndices) {
2598
+ const followsUser = idx > 0 && params[idx - 1]?.role === "user";
2599
+ const next = params[idx + 1];
2600
+ const lastOrBeforeAssistant = idx === params.length - 1 || next?.role === "assistant";
2601
+ if (followsUser && lastOrBeforeAssistant) {
2602
+ params[idx] = { role: "system", content: params[idx].content };
2603
+ }
2604
+ }
2605
+ }
2606
+ if (params.length > 0 && params[params.length - 1]?.role === "assistant") {
2607
+ params.push({ role: "user", content: "Continue." });
2608
+ }
2609
+
2610
+ return params;
2611
+ }
2612
+
2613
+ /**
2614
+ * JSON Schema whitelist for Anthropic tool `input_schema` nodes.
2615
+ *
2616
+ * Mirrors the Anthropic Python SDK's `lib/_parse/_transform.py::transform_schema`:
2617
+ * we keep only structural/metadata keywords Anthropic's validator honors, and demote
2618
+ * anything else into the node's `description` as `\n\n{key: value, ...}` so the model
2619
+ * still sees the constraint as a natural-language hint.
2620
+ *
2621
+ * `Set` (not `Record<string, true>`) because membership is probed against arbitrary
2622
+ * user/Zod-derived schema keys: a literal Record would falsely match prototype names
2623
+ * like `"toString"` and silently strip valid properties.
2624
+ */
2625
+ const ANTHROPIC_TOOL_SCHEMA_UNIVERSAL_KEEP = new Set([
2626
+ "$ref",
2627
+ "$defs",
2628
+ "$schema",
2629
+ "definitions",
2630
+ "type",
2631
+ "anyOf",
2632
+ "oneOf",
2633
+ "allOf",
2634
+ "enum",
2635
+ "const",
2636
+ "description",
2637
+ "title",
2638
+ "default",
2639
+ "nullable",
2640
+ ]);
2641
+ /** Keys preserved on `type: "object"` nodes (in addition to the universal set). */
2642
+ const ANTHROPIC_TOOL_SCHEMA_OBJECT_KEEP = new Set(["properties", "required", "additionalProperties"]);
2643
+ /** Keys preserved on `type: "array"` nodes; `minItems` only when its value is 0 or 1. */
2644
+ const ANTHROPIC_TOOL_SCHEMA_ARRAY_KEEP = new Set(["items", "prefixItems", "minItems"]);
2645
+ /** Keys preserved on `type: "string"` nodes; `format` only when its value is in the supported list. */
2646
+ const ANTHROPIC_TOOL_SCHEMA_STRING_KEEP = new Set(["format"]);
2647
+ /**
2648
+ * String `format` values Anthropic accepts; everything else (including `pattern`-style
2649
+ * format hints) gets demoted into `description`. Matches `SupportedStringFormats` in the
2650
+ * Anthropic SDK's `_transform.py`.
2651
+ */
2652
+ const ANTHROPIC_TOOL_SCHEMA_STRING_FORMATS = new Set([
2653
+ "date-time",
2654
+ "time",
2655
+ "date",
2656
+ "duration",
2657
+ "email",
2658
+ "hostname",
2659
+ "uri",
2660
+ "ipv4",
2661
+ "ipv6",
2662
+ "uuid",
2663
+ ]);
2664
+ const ANTHROPIC_STRICT_TOOL_ALLOWLIST = new Set(["bash", "python", "edit", "find"]);
2665
+ const MAX_ANTHROPIC_STRICT_TOOLS = 20;
2666
+ const MAX_ANTHROPIC_STRICT_OPTIONAL_PARAMETERS = 24;
2667
+ const MAX_ANTHROPIC_STRICT_UNION_PARAMETERS = 16;
2668
+
2669
+ /** `minItems` / `maxItems` apply to arrays; Anthropic rejects them on `type: "object"` (including `minItems: 0`/`1`). */
2670
+ function isJsonSchemaArrayNode(schema: Record<string, unknown>): boolean {
2671
+ const t = schema.type;
2672
+ if (t === "array") return true;
2673
+ if (Array.isArray(t) && t.includes("array") && !t.includes("object")) return true;
2674
+ return false;
2675
+ }
2676
+
2677
+ function isJsonSchemaObjectNode(schema: Record<string, unknown>): boolean {
2678
+ if (isJsonSchemaArrayNode(schema)) return false;
2679
+ if (schema.type === "object") return true;
2680
+ if (Array.isArray(schema.type) && schema.type.includes("object")) return true;
2681
+ if (isRecord(schema.properties)) return true;
2682
+ return false;
2683
+ }
2684
+
2685
+ /**
2686
+ * Pick the principal non-null scalar type from a `type` keyword. Anthropic accepts
2687
+ * `type` as either a single string or an array (e.g. `["number", "null"]` for a
2688
+ * nullable value); the SDK whitelist is keyed off the scalar type, with `"null"`
2689
+ * ignored so nullable variants are normalized as their underlying type.
2690
+ */
2691
+ function pickAnthropicScalarType(type: unknown): string | undefined {
2692
+ if (typeof type === "string") return type;
2693
+ if (Array.isArray(type)) {
2694
+ for (const entry of type) {
2695
+ if (typeof entry === "string" && entry !== "null") return entry;
2696
+ }
2697
+ }
2698
+ return undefined;
2699
+ }
2700
+
2701
+ function anthropicPerTypeKeep(scalarType: string | undefined): Set<string> | undefined {
2702
+ switch (scalarType) {
2703
+ case "object":
2704
+ return ANTHROPIC_TOOL_SCHEMA_OBJECT_KEEP;
2705
+ case "array":
2706
+ return ANTHROPIC_TOOL_SCHEMA_ARRAY_KEEP;
2707
+ case "string":
2708
+ return ANTHROPIC_TOOL_SCHEMA_STRING_KEEP;
2709
+ default:
2710
+ return undefined;
2711
+ }
2712
+ }
2713
+
2714
+ /**
2715
+ * Per-schema-object memoization slot for the normalized Anthropic tool form. We stamp
2716
+ * the result onto the host via a `Symbol` property (mirroring `utils/schema/stamps.ts`)
2717
+ * instead of using a `WeakMap`: it's a single hidden-class slot, so warm reads are
2718
+ * direct property access and write-once cycles resolve to the in-progress result.
2719
+ */
2720
+ const kAnthropicToolNormal = Symbol("aery.schema.anthropic.toolNormal");
2721
+
2722
+ /**
2723
+ * Normalize a JSON Schema node for Anthropic tool `input_schema`.
2724
+ *
2725
+ * Applies the full whitelist semantics from the Anthropic Python SDK's
2726
+ * `lib/_parse/_transform.py::transform_schema`:
2727
+ *
2728
+ * 1. Universal keys (`$ref`, `$defs`, `type`, `anyOf`/`oneOf`/`allOf`, `enum`, `const`,
2729
+ * `description`, `title`, `default`, `nullable`) are preserved on every node.
2730
+ * 2. Per-type keys are kept additively (object → `properties`/`required`/`additionalProperties`,
2731
+ * array → `items`/`prefixItems` plus `minItems` only when 0 or 1, string → `format`
2732
+ * only when in the supported value set).
2733
+ * 3. Everything else is demoted into the node's `description` as `\n\n{key: value, ...}`
2734
+ * so the model still sees the constraint as a natural-language hint.
2735
+ *
2736
+ * Object nodes default to `additionalProperties: false`, but explicit open-map
2737
+ * declarations (`additionalProperties: true` or a schema literal — Zod's
2738
+ * `z.record(z.string(), z.unknown())` produces `{}`) are preserved. The strict-mode
2739
+ * pass downstream demotes those shapes to non-strict instead of fabricating a closed
2740
+ * object, so callers like the resolve tool keep working open-map semantics.
2741
+ */
2742
+ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2743
+ if (Array.isArray(schema)) return schema.map(entry => normalizeAnthropicToolSchema(entry));
2744
+ if (!isRecord(schema)) return schema;
2745
+
2746
+ const slot = schema as Record<symbol, Record<string, unknown> | undefined>;
2747
+ const existing = slot[kAnthropicToolNormal];
2748
+ if (existing !== undefined) return existing;
2749
+
2750
+ const result: Record<string, unknown> = {};
2751
+ // Pre-stamp before recursion so cyclic schemas resolve to the in-progress object
2752
+ // (mirrors the WeakMap-set-before-recurse pattern the original implementation used).
2753
+ Object.defineProperty(schema, kAnthropicToolNormal, { value: result, writable: true, configurable: true });
2754
+
2755
+ const scalarType = pickAnthropicScalarType(schema.type);
2756
+ const perTypeKeep = anthropicPerTypeKeep(scalarType);
2757
+ const spill: Array<[string, unknown]> = [];
2758
+
2759
+ for (const key in schema) {
2760
+ if (!Object.hasOwn(schema, key)) continue;
2761
+ const value = schema[key];
2762
+ if (ANTHROPIC_TOOL_SCHEMA_UNIVERSAL_KEEP.has(key) || perTypeKeep?.has(key)) {
2763
+ result[key] = value;
2764
+ } else {
2765
+ spill.push([key, value]);
2766
+ }
2767
+ }
2768
+
2769
+ // Per-type conditional keys: prune within the kept set.
2770
+ if (scalarType === "string") {
2771
+ const format = result.format;
2772
+ if (typeof format === "string" && !ANTHROPIC_TOOL_SCHEMA_STRING_FORMATS.has(format)) {
2773
+ spill.push(["format", format]);
2774
+ delete result.format;
2775
+ }
2776
+ }
2777
+ if (scalarType === "array" && result.minItems !== undefined) {
2778
+ const minItems = result.minItems;
2779
+ if (!(typeof minItems === "number" && (minItems === 0 || minItems === 1))) {
2780
+ spill.push(["minItems", minItems]);
2781
+ delete result.minItems;
2782
+ }
2783
+ }
2784
+ if (scalarType === "object" && result.additionalProperties === undefined) {
2785
+ result.additionalProperties = false;
2786
+ }
2787
+
2788
+ // Recurse on structural keys.
2789
+ if (isRecord(result.properties)) {
2790
+ const normalizedProperties: Record<string, unknown> = {};
2791
+ const sourceProperties = result.properties as Record<string, unknown>;
2792
+ for (const propName in sourceProperties) {
2793
+ if (!Object.hasOwn(sourceProperties, propName)) continue;
2794
+ normalizedProperties[propName] = normalizeAnthropicToolSchema(sourceProperties[propName]);
2795
+ }
2796
+ result.properties = normalizedProperties;
2797
+ }
2798
+ if (isRecord(result.additionalProperties)) {
2799
+ const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
2800
+ if (isRecord(normalized) && Object.keys(normalized).length === 0) {
2801
+ result.additionalProperties = true;
2802
+ } else {
2803
+ result.additionalProperties = normalized;
2804
+ }
2805
+ }
2806
+ if (Array.isArray(result.items)) {
2807
+ result.items = result.items.map(item => normalizeAnthropicToolSchema(item));
2808
+ } else if (isRecord(result.items)) {
2809
+ result.items = normalizeAnthropicToolSchema(result.items);
2810
+ }
2811
+ if (Array.isArray(result.prefixItems)) {
2812
+ result.prefixItems = result.prefixItems.map(item => normalizeAnthropicToolSchema(item));
2813
+ }
2814
+ for (const key of COMBINATOR_KEYS) {
2815
+ const variants = result[key];
2816
+ if (Array.isArray(variants)) {
2817
+ result[key] = variants.map(variant => normalizeAnthropicToolSchema(variant));
2818
+ }
2819
+ }
2820
+ for (const defsKey of ["$defs", "definitions"] as const) {
2821
+ const definitions = result[defsKey];
2822
+ if (!isRecord(definitions)) continue;
2823
+ const normalizedDefs: Record<string, unknown> = {};
2824
+ const sourceDefs = definitions as Record<string, unknown>;
2825
+ for (const name in sourceDefs) {
2826
+ if (!Object.hasOwn(sourceDefs, name)) continue;
2827
+ normalizedDefs[name] = normalizeAnthropicToolSchema(sourceDefs[name]);
2828
+ }
2829
+ result[defsKey] = normalizedDefs;
2830
+ }
2831
+
2832
+ spillToDescription(result, spill);
2833
+ return result;
2834
+ }
2835
+
2836
+ type AnthropicToolSchemaPlan = {
2837
+ inputSchema: AnthropicToolInputSchema;
2838
+ strict: boolean;
2839
+ };
2840
+
2841
+ type AnthropicStrictBudget = {
2842
+ optionalRemaining: number;
2843
+ unionRemaining: number;
2844
+ optionalCount: number;
2845
+ unionCount: number;
2846
+ };
2847
+
2848
+ function hasAnthropicUnionType(schema: Record<string, unknown>): boolean {
2849
+ return Array.isArray(schema.type) || Array.isArray(schema.anyOf);
2850
+ }
2851
+
2852
+ function hasNullVariant(schema: Record<string, unknown>): boolean {
2853
+ if (Array.isArray(schema.type) && schema.type.includes("null")) return true;
2854
+ return Array.isArray(schema.anyOf) && schema.anyOf.some(variant => isRecord(variant) && variant.type === "null");
2855
+ }
2856
+
2857
+ function makeAnthropicNullableSchema(schema: unknown, budget: AnthropicStrictBudget): unknown | undefined {
2858
+ if (isRecord(schema)) {
2859
+ if (hasNullVariant(schema)) return schema;
2860
+ if (Array.isArray(schema.anyOf)) {
2861
+ return { ...schema, anyOf: [...schema.anyOf, { type: "null" }] };
2862
+ }
2863
+ if (Array.isArray(schema.type)) {
2864
+ return { ...schema, type: [...schema.type, "null"] };
2865
+ }
2866
+ }
2867
+
2868
+ if (budget.unionRemaining <= 0) return undefined;
2869
+ budget.unionRemaining--;
2870
+ budget.unionCount++;
2871
+ return { anyOf: [schema, { type: "null" }] };
2872
+ }
2873
+
2874
+ function normalizeAnthropicStrictSchemaNode(
2875
+ schema: unknown,
2876
+ budget: AnthropicStrictBudget,
2877
+ cache: WeakMap<Record<string, unknown>, Record<string, unknown>>,
2878
+ ): unknown | undefined {
2879
+ if (Array.isArray(schema)) {
2880
+ const result: unknown[] = [];
2881
+ for (const entry of schema) {
2882
+ const normalized = normalizeAnthropicStrictSchemaNode(entry, budget, cache);
2883
+ if (normalized === undefined) return undefined;
2884
+ result.push(normalized);
2885
+ }
2886
+ return result;
2887
+ }
2888
+
2889
+ if (!isRecord(schema)) return schema;
2890
+
2891
+ const cached = cache.get(schema);
2892
+ if (cached) return cached;
2893
+
2894
+ // Strict tool use only supports closed objects. Open maps stay available on
2895
+ // the non-strict schema plan instead of producing an Anthropic 400.
2896
+ if (isJsonSchemaObjectNode(schema) && schema.additionalProperties !== false) {
2897
+ return undefined;
2898
+ }
2899
+
2900
+ const result: Record<string, unknown> = { ...schema };
2901
+ cache.set(schema, result);
2902
+
2903
+ if (hasAnthropicUnionType(result)) {
2904
+ if (budget.unionRemaining <= 0) return undefined;
2905
+ budget.unionRemaining--;
2906
+ budget.unionCount++;
2907
+ }
2908
+
2909
+ if (isRecord(result.properties)) {
2910
+ const originalRequired = new Set(
2911
+ Array.isArray(result.required)
2912
+ ? result.required.filter((entry): entry is string => typeof entry === "string")
2913
+ : [],
2914
+ );
2915
+ const properties: Record<string, unknown> = {};
2916
+ const required: string[] = [];
2917
+
2918
+ for (const [propertyName, propertySchema] of Object.entries(result.properties)) {
2919
+ const normalizedProperty = normalizeAnthropicStrictSchemaNode(propertySchema, budget, cache);
2920
+ if (normalizedProperty === undefined) return undefined;
2921
+
2922
+ if (originalRequired.has(propertyName)) {
2923
+ properties[propertyName] = normalizedProperty;
2924
+ required.push(propertyName);
2925
+ continue;
2926
+ }
2927
+
2928
+ if (budget.optionalRemaining > 0) {
2929
+ budget.optionalRemaining--;
2930
+ budget.optionalCount++;
2931
+ properties[propertyName] = normalizedProperty;
2932
+ continue;
2933
+ }
2934
+
2935
+ const nullableProperty = makeAnthropicNullableSchema(normalizedProperty, budget);
2936
+ if (nullableProperty === undefined) return undefined;
2937
+ properties[propertyName] = nullableProperty;
2938
+ required.push(propertyName);
2939
+ }
2940
+
2941
+ result.properties = properties;
2942
+ result.required = required;
2943
+ }
2944
+
2945
+ if (Array.isArray(result.items)) {
2946
+ const items = normalizeAnthropicStrictSchemaNode(result.items, budget, cache);
2947
+ if (items === undefined) return undefined;
2948
+ result.items = items;
2949
+ } else if (isRecord(result.items)) {
2950
+ const items = normalizeAnthropicStrictSchemaNode(result.items, budget, cache);
2951
+ if (items === undefined) return undefined;
2952
+ result.items = items;
2953
+ }
2954
+ if (Array.isArray(result.prefixItems)) {
2955
+ const prefixItems = normalizeAnthropicStrictSchemaNode(result.prefixItems, budget, cache);
2956
+ if (prefixItems === undefined) return undefined;
2957
+ result.prefixItems = prefixItems;
2958
+ }
2959
+
2960
+ for (const key of COMBINATOR_KEYS) {
2961
+ const variants = result[key];
2962
+ if (!Array.isArray(variants)) continue;
2963
+ const normalizedVariants = normalizeAnthropicStrictSchemaNode(variants, budget, cache);
2964
+ if (normalizedVariants === undefined) return undefined;
2965
+ result[key] = normalizedVariants;
2966
+ }
2967
+
2968
+ for (const defsKey of ["$defs", "definitions"] as const) {
2969
+ const definitions = result[defsKey];
2970
+ if (!isRecord(definitions)) continue;
2971
+ const normalizedDefinitions: Record<string, unknown> = {};
2972
+ for (const [definitionName, definitionSchema] of Object.entries(definitions)) {
2973
+ const normalizedDefinition = normalizeAnthropicStrictSchemaNode(definitionSchema, budget, cache);
2974
+ if (normalizedDefinition === undefined) return undefined;
2975
+ normalizedDefinitions[definitionName] = normalizedDefinition;
2976
+ }
2977
+ result[defsKey] = normalizedDefinitions;
2978
+ }
2979
+
2980
+ return result;
2981
+ }
2982
+
2983
+ function normalizeAnthropicStrictSchema(
2984
+ schema: Record<string, unknown>,
2985
+ optionalRemaining: number,
2986
+ unionRemaining: number,
2987
+ ): { schema: Record<string, unknown>; optionalCount: number; unionCount: number } | undefined {
2988
+ const budget: AnthropicStrictBudget = {
2989
+ optionalRemaining,
2990
+ unionRemaining,
2991
+ optionalCount: 0,
2992
+ unionCount: 0,
2993
+ };
2994
+ const normalized = normalizeAnthropicStrictSchemaNode(schema, budget, new WeakMap());
2995
+ if (!isRecord(normalized)) return undefined;
2996
+ return { schema: normalized, optionalCount: budget.optionalCount, unionCount: budget.unionCount };
2997
+ }
2998
+
2999
+ function buildAnthropicBaseToolInputSchema(tool: Tool): Record<string, unknown> {
3000
+ const jsonSchema = toolWireSchema(tool);
3001
+ return normalizeAnthropicToolSchema({
3002
+ ...jsonSchema,
3003
+ type: "object",
3004
+ properties: isRecord(jsonSchema.properties) ? jsonSchema.properties : {},
3005
+ required: Array.isArray(jsonSchema.required)
3006
+ ? jsonSchema.required.filter((entry): entry is string => typeof entry === "string")
3007
+ : [],
3008
+ }) as Record<string, unknown>;
3009
+ }
3010
+
3011
+ function buildAnthropicToolSchemaPlans(tools: Tool[], disableStrictTools = false): AnthropicToolSchemaPlan[] {
3012
+ const plans = tools.map(
3013
+ (tool): AnthropicToolSchemaPlan => ({
3014
+ inputSchema: buildAnthropicBaseToolInputSchema(tool) as AnthropicToolInputSchema,
3015
+ strict: false,
3016
+ }),
3017
+ );
3018
+ if (NO_STRICT || disableStrictTools) return plans;
3019
+
3020
+ const candidateIndexes = tools.flatMap((tool, index) => {
3021
+ if (!ANTHROPIC_STRICT_TOOL_ALLOWLIST.has(tool.name)) return [];
3022
+ return tool.strict === false ? [] : [index];
3023
+ });
3024
+
3025
+ let strictToolCount = 0;
3026
+ let strictOptionalParameterCount = 0;
3027
+ let strictUnionParameterCount = 0;
3028
+ for (const index of candidateIndexes) {
3029
+ if (strictToolCount >= MAX_ANTHROPIC_STRICT_TOOLS) break;
3030
+
3031
+ const strictResult = normalizeAnthropicStrictSchema(
3032
+ plans[index].inputSchema as Record<string, unknown>,
3033
+ MAX_ANTHROPIC_STRICT_OPTIONAL_PARAMETERS - strictOptionalParameterCount,
3034
+ MAX_ANTHROPIC_STRICT_UNION_PARAMETERS - strictUnionParameterCount,
3035
+ );
3036
+ if (!strictResult) continue;
3037
+
3038
+ plans[index] = {
3039
+ inputSchema: strictResult.schema as AnthropicToolInputSchema,
3040
+ strict: true,
3041
+ };
3042
+ strictToolCount++;
3043
+ strictOptionalParameterCount += strictResult.optionalCount;
3044
+ strictUnionParameterCount += strictResult.unionCount;
3045
+ }
3046
+
3047
+ return plans;
3048
+ }
3049
+
3050
+ function convertTools(
3051
+ tools: Tool[],
3052
+ isOAuthToken: boolean,
3053
+ disableStrictTools = false,
3054
+ supportsEagerToolInputStreaming = true,
3055
+ ): AnthropicWireTool[] {
3056
+ if (!tools) return [];
3057
+ const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
3058
+
3059
+ return tools.map((tool, index) => {
3060
+ const plan = schemaPlans[index];
3061
+ const baseTool = {
3062
+ name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
3063
+ description: tool.description || "",
3064
+ input_schema: plan.inputSchema,
3065
+ };
3066
+ return {
3067
+ ...baseTool,
3068
+ ...(supportsEagerToolInputStreaming ? { eager_input_streaming: true } : {}),
3069
+ ...(plan.strict ? { strict: true } : {}),
3070
+ };
3071
+ });
3072
+ }
3073
+
3074
+ function mapStopReason(reason: string): StopReason {
3075
+ switch (reason) {
3076
+ case "end_turn":
3077
+ return "stop";
3078
+ case "max_tokens":
3079
+ return "length";
3080
+ case "tool_use":
3081
+ return "toolUse";
3082
+ case "refusal":
3083
+ return "error";
3084
+ case "pause_turn": // Stop is good enough -> resubmit
3085
+ return "stop";
3086
+ case "stop_sequence":
3087
+ return "stop"; // We don't supply stop sequences, so this should never happen
3088
+ case "sensitive": // Content flagged by safety filters (not yet in SDK types)
3089
+ return "error";
3090
+ default:
3091
+ // Handle unknown stop reasons gracefully (API may add new values)
3092
+ throw new Error(`Unhandled stop reason: ${reason}`);
3093
+ }
3094
+ }