@aryee337/aery-ai 0.2.28 → 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (417) hide show
  1. package/CHANGELOG.md +2914 -0
  2. package/README.md +614 -813
  3. package/package.json +140 -105
  4. package/src/api-registry.ts +96 -0
  5. package/src/auth-broker/client.ts +358 -0
  6. package/src/auth-broker/index.ts +5 -0
  7. package/src/auth-broker/refresher.ts +117 -0
  8. package/src/auth-broker/remote-store.ts +623 -0
  9. package/src/auth-broker/server.ts +644 -0
  10. package/src/auth-broker/types.ts +127 -0
  11. package/src/auth-broker/wire-schemas.ts +200 -0
  12. package/src/auth-gateway/http.ts +194 -0
  13. package/src/auth-gateway/index.ts +3 -0
  14. package/src/auth-gateway/server.ts +818 -0
  15. package/src/auth-gateway/types.ts +143 -0
  16. package/src/auth-storage.ts +4422 -0
  17. package/src/index.ts +54 -0
  18. package/src/model-cache.ts +129 -0
  19. package/src/model-manager.ts +469 -0
  20. package/src/model-thinking.ts +782 -0
  21. package/src/models.json +83530 -0
  22. package/src/models.json.d.ts +9 -0
  23. package/src/models.ts +56 -0
  24. package/src/prompts/turn-aborted-guidance.md +4 -0
  25. package/src/provider-details.ts +90 -0
  26. package/src/provider-models/bundled-references.ts +38 -0
  27. package/src/provider-models/descriptors.ts +355 -0
  28. package/src/provider-models/google.ts +88 -0
  29. package/src/provider-models/index.ts +5 -0
  30. package/src/provider-models/ollama.ts +153 -0
  31. package/src/provider-models/openai-compat.ts +2817 -0
  32. package/src/provider-models/special.ts +67 -0
  33. package/src/providers/aery-native-client.ts +228 -0
  34. package/src/providers/aery-native-server.ts +212 -0
  35. package/src/providers/amazon-bedrock.ts +873 -0
  36. package/src/providers/anthropic-client.ts +318 -0
  37. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  38. package/src/providers/anthropic-messages-server.ts +683 -0
  39. package/src/providers/anthropic-wire.ts +268 -0
  40. package/src/providers/anthropic.ts +3094 -0
  41. package/src/providers/aws-credentials.ts +501 -0
  42. package/src/providers/aws-eventstream.ts +185 -0
  43. package/src/providers/aws-sigv4.ts +218 -0
  44. package/src/providers/azure-openai-responses.ts +361 -0
  45. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  46. package/src/providers/cursor/proto/agent.proto +3526 -0
  47. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  48. package/src/providers/cursor/proto/buf.yaml +17 -0
  49. package/src/providers/cursor.ts +2621 -0
  50. package/src/providers/error-message.ts +21 -0
  51. package/src/providers/github-copilot-headers.ts +140 -0
  52. package/src/providers/gitlab-duo.ts +372 -0
  53. package/src/providers/google-auth.ts +252 -0
  54. package/src/providers/google-gemini-cli.ts +809 -0
  55. package/src/providers/google-gemini-headers.ts +41 -0
  56. package/src/providers/google-shared.ts +917 -0
  57. package/src/providers/google-types.ts +167 -0
  58. package/src/providers/google-vertex.ts +91 -0
  59. package/src/providers/google.ts +41 -0
  60. package/src/providers/grammar.ts +70 -0
  61. package/src/providers/kimi.ts +52 -0
  62. package/src/providers/mock.ts +496 -0
  63. package/src/providers/ollama.ts +644 -0
  64. package/src/providers/openai-anthropic-shim.ts +138 -0
  65. package/src/providers/openai-chat-server-schema.ts +252 -0
  66. package/src/providers/openai-chat-server.ts +647 -0
  67. package/src/providers/openai-codex/constants.ts +43 -0
  68. package/src/providers/openai-codex/request-transformer.ts +161 -0
  69. package/src/providers/openai-codex/response-handler.ts +81 -0
  70. package/src/providers/openai-codex-responses.ts +3018 -0
  71. package/src/providers/openai-completions-compat.ts +300 -0
  72. package/src/providers/openai-completions.ts +1979 -0
  73. package/src/providers/openai-responses-server-schema.ts +290 -0
  74. package/src/providers/openai-responses-server.ts +1183 -0
  75. package/src/providers/openai-responses-shared.ts +873 -0
  76. package/src/providers/openai-responses.ts +679 -0
  77. package/src/providers/register-builtins.ts +436 -0
  78. package/src/providers/synthetic.ts +50 -0
  79. package/src/providers/transform-messages.ts +382 -0
  80. package/src/providers/vision-guard.ts +31 -0
  81. package/src/providers/xai-responses.ts +82 -0
  82. package/src/rate-limit-utils.ts +84 -0
  83. package/src/stream.ts +1065 -0
  84. package/src/types.ts +944 -0
  85. package/src/usage/claude.ts +482 -0
  86. package/src/usage/gemini.ts +250 -0
  87. package/src/usage/github-copilot.ts +421 -0
  88. package/src/usage/google-antigravity.ts +201 -0
  89. package/src/usage/kimi.ts +271 -0
  90. package/src/usage/minimax-code.ts +31 -0
  91. package/src/usage/openai-codex.ts +503 -0
  92. package/src/usage/shared.ts +10 -0
  93. package/src/usage/zai.ts +247 -0
  94. package/src/usage.ts +185 -0
  95. package/src/utils/abort.ts +51 -0
  96. package/src/utils/abortable-iterator.ts +69 -0
  97. package/src/utils/anthropic-auth.ts +93 -0
  98. package/src/utils/discovery/antigravity.ts +261 -0
  99. package/src/utils/discovery/codex.ts +371 -0
  100. package/src/utils/discovery/cursor.ts +306 -0
  101. package/src/utils/discovery/gemini.ts +248 -0
  102. package/src/utils/discovery/index.ts +4 -0
  103. package/src/utils/discovery/openai-compatible.ts +224 -0
  104. package/src/utils/event-stream.ts +142 -0
  105. package/src/utils/fireworks-model-id.ts +30 -0
  106. package/src/utils/foundry.ts +8 -0
  107. package/src/utils/http-inspector.ts +176 -0
  108. package/src/utils/idle-iterator.ts +267 -0
  109. package/src/utils/json-parse.ts +182 -0
  110. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  111. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  112. package/src/utils/oauth/anthropic.ts +273 -0
  113. package/src/utils/oauth/api-key-login.ts +87 -0
  114. package/src/utils/oauth/api-key-validation.ts +92 -0
  115. package/src/utils/oauth/callback-server.ts +276 -0
  116. package/src/utils/oauth/cerebras.ts +16 -0
  117. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  118. package/src/utils/oauth/cursor.ts +157 -0
  119. package/src/utils/oauth/deepseek.ts +53 -0
  120. package/src/utils/oauth/firepass.ts +24 -0
  121. package/src/utils/oauth/fireworks.ts +15 -0
  122. package/src/utils/oauth/github-copilot.ts +362 -0
  123. package/src/utils/oauth/gitlab-duo.ts +123 -0
  124. package/src/utils/oauth/google-antigravity.ts +200 -0
  125. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  126. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  127. package/src/utils/oauth/huggingface.ts +62 -0
  128. package/src/utils/oauth/index.ts +484 -0
  129. package/src/utils/oauth/kagi.ts +47 -0
  130. package/src/utils/oauth/kilo.ts +87 -0
  131. package/src/utils/oauth/kimi.ts +254 -0
  132. package/src/utils/oauth/litellm.ts +47 -0
  133. package/src/utils/oauth/lm-studio.ts +38 -0
  134. package/src/utils/oauth/minimax-code.ts +78 -0
  135. package/src/utils/oauth/moonshot.ts +23 -0
  136. package/src/utils/oauth/nanogpt.ts +15 -0
  137. package/src/utils/oauth/nvidia.ts +70 -0
  138. package/src/utils/oauth/oauth.html +203 -0
  139. package/src/utils/oauth/ollama-cloud.ts +28 -0
  140. package/src/utils/oauth/ollama.ts +47 -0
  141. package/src/utils/oauth/openai-codex.ts +299 -0
  142. package/src/utils/oauth/opencode.ts +49 -0
  143. package/src/utils/oauth/openrouter.ts +20 -0
  144. package/src/utils/oauth/parallel.ts +46 -0
  145. package/src/utils/oauth/perplexity.ts +206 -0
  146. package/src/utils/oauth/pkce.ts +18 -0
  147. package/src/utils/oauth/qianfan.ts +58 -0
  148. package/src/utils/oauth/qwen-portal.ts +60 -0
  149. package/src/utils/oauth/synthetic.ts +15 -0
  150. package/src/utils/oauth/tavily.ts +46 -0
  151. package/src/utils/oauth/together.ts +16 -0
  152. package/src/utils/oauth/types.ts +99 -0
  153. package/src/utils/oauth/venice.ts +59 -0
  154. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  155. package/src/utils/oauth/vllm.ts +40 -0
  156. package/src/utils/oauth/wafer.ts +50 -0
  157. package/src/utils/oauth/xai-oauth.ts +342 -0
  158. package/src/utils/oauth/xiaomi.ts +139 -0
  159. package/src/utils/oauth/zai.ts +60 -0
  160. package/src/utils/oauth/zenmux.ts +15 -0
  161. package/src/utils/oauth/zhipu.ts +60 -0
  162. package/src/utils/overflow.ts +137 -0
  163. package/src/utils/parse-bind.ts +54 -0
  164. package/src/utils/provider-response.ts +30 -0
  165. package/src/utils/request-debug.ts +336 -0
  166. package/src/utils/retry-after.ts +110 -0
  167. package/src/utils/retry.ts +54 -0
  168. package/src/utils/schema/CONSTRAINTS.md +164 -0
  169. package/src/utils/schema/adapt.ts +36 -0
  170. package/src/utils/schema/compatibility.ts +435 -0
  171. package/src/utils/schema/dereference.ts +98 -0
  172. package/src/utils/schema/draft.ts +341 -0
  173. package/src/utils/schema/equality.ts +97 -0
  174. package/src/utils/schema/fields.ts +191 -0
  175. package/src/utils/schema/index.ts +13 -0
  176. package/src/utils/schema/json-schema-validator.ts +577 -0
  177. package/src/utils/schema/meta-validator.ts +167 -0
  178. package/src/utils/schema/normalize.ts +1588 -0
  179. package/src/utils/schema/spill.ts +43 -0
  180. package/src/utils/schema/stamps.ts +97 -0
  181. package/src/utils/schema/types.ts +10 -0
  182. package/src/utils/schema/wire.ts +293 -0
  183. package/src/utils/schema/zod-decontaminate.ts +331 -0
  184. package/src/utils/sdk-stream-timeout.ts +43 -0
  185. package/src/utils/sse-debug.ts +289 -0
  186. package/src/utils/stream-markup-healing.ts +612 -0
  187. package/src/utils/tool-choice.ts +99 -0
  188. package/src/utils/validation.ts +1024 -0
  189. package/src/utils.ts +166 -0
  190. package/dist/api-registry.d.ts +0 -20
  191. package/dist/api-registry.d.ts.map +0 -1
  192. package/dist/api-registry.js +0 -44
  193. package/dist/api-registry.js.map +0 -1
  194. package/dist/bedrock-provider.d.ts +0 -5
  195. package/dist/bedrock-provider.d.ts.map +0 -1
  196. package/dist/bedrock-provider.js +0 -6
  197. package/dist/bedrock-provider.js.map +0 -1
  198. package/dist/cli.d.ts +0 -3
  199. package/dist/cli.d.ts.map +0 -1
  200. package/dist/cli.js +0 -130
  201. package/dist/cli.js.map +0 -1
  202. package/dist/env-api-keys.d.ts +0 -18
  203. package/dist/env-api-keys.d.ts.map +0 -1
  204. package/dist/env-api-keys.js +0 -178
  205. package/dist/env-api-keys.js.map +0 -1
  206. package/dist/image-models.d.ts +0 -10
  207. package/dist/image-models.d.ts.map +0 -1
  208. package/dist/image-models.generated.d.ts +0 -440
  209. package/dist/image-models.generated.d.ts.map +0 -1
  210. package/dist/image-models.generated.js +0 -442
  211. package/dist/image-models.generated.js.map +0 -1
  212. package/dist/image-models.js +0 -23
  213. package/dist/image-models.js.map +0 -1
  214. package/dist/images-api-registry.d.ts +0 -14
  215. package/dist/images-api-registry.d.ts.map +0 -1
  216. package/dist/images-api-registry.js +0 -22
  217. package/dist/images-api-registry.js.map +0 -1
  218. package/dist/images.d.ts +0 -4
  219. package/dist/images.d.ts.map +0 -1
  220. package/dist/images.js +0 -14
  221. package/dist/images.js.map +0 -1
  222. package/dist/index.d.ts +0 -32
  223. package/dist/index.d.ts.map +0 -1
  224. package/dist/index.js +0 -20
  225. package/dist/index.js.map +0 -1
  226. package/dist/models.d.ts +0 -18
  227. package/dist/models.d.ts.map +0 -1
  228. package/dist/models.generated.d.ts +0 -17707
  229. package/dist/models.generated.d.ts.map +0 -1
  230. package/dist/models.generated.js +0 -16561
  231. package/dist/models.generated.js.map +0 -1
  232. package/dist/models.js +0 -71
  233. package/dist/models.js.map +0 -1
  234. package/dist/oauth.d.ts +0 -2
  235. package/dist/oauth.d.ts.map +0 -1
  236. package/dist/oauth.js +0 -2
  237. package/dist/oauth.js.map +0 -1
  238. package/dist/providers/aery-error-formatting.d.ts +0 -13
  239. package/dist/providers/aery-error-formatting.d.ts.map +0 -1
  240. package/dist/providers/aery-error-formatting.js +0 -112
  241. package/dist/providers/aery-error-formatting.js.map +0 -1
  242. package/dist/providers/amazon-bedrock.d.ts +0 -38
  243. package/dist/providers/amazon-bedrock.d.ts.map +0 -1
  244. package/dist/providers/amazon-bedrock.js +0 -763
  245. package/dist/providers/amazon-bedrock.js.map +0 -1
  246. package/dist/providers/anthropic.d.ts +0 -71
  247. package/dist/providers/anthropic.d.ts.map +0 -1
  248. package/dist/providers/anthropic.js +0 -949
  249. package/dist/providers/anthropic.js.map +0 -1
  250. package/dist/providers/azure-openai-responses.d.ts +0 -15
  251. package/dist/providers/azure-openai-responses.d.ts.map +0 -1
  252. package/dist/providers/azure-openai-responses.js +0 -225
  253. package/dist/providers/azure-openai-responses.js.map +0 -1
  254. package/dist/providers/cloudflare.d.ts +0 -13
  255. package/dist/providers/cloudflare.d.ts.map +0 -1
  256. package/dist/providers/cloudflare.js +0 -26
  257. package/dist/providers/cloudflare.js.map +0 -1
  258. package/dist/providers/faux.d.ts +0 -56
  259. package/dist/providers/faux.d.ts.map +0 -1
  260. package/dist/providers/faux.js +0 -368
  261. package/dist/providers/faux.js.map +0 -1
  262. package/dist/providers/github-copilot-headers.d.ts +0 -8
  263. package/dist/providers/github-copilot-headers.d.ts.map +0 -1
  264. package/dist/providers/github-copilot-headers.js +0 -29
  265. package/dist/providers/github-copilot-headers.js.map +0 -1
  266. package/dist/providers/google-gemini-cli.d.ts +0 -74
  267. package/dist/providers/google-gemini-cli.d.ts.map +0 -1
  268. package/dist/providers/google-gemini-cli.js +0 -779
  269. package/dist/providers/google-gemini-cli.js.map +0 -1
  270. package/dist/providers/google-shared.d.ts +0 -70
  271. package/dist/providers/google-shared.d.ts.map +0 -1
  272. package/dist/providers/google-shared.js +0 -329
  273. package/dist/providers/google-shared.js.map +0 -1
  274. package/dist/providers/google-vertex.d.ts +0 -15
  275. package/dist/providers/google-vertex.d.ts.map +0 -1
  276. package/dist/providers/google-vertex.js +0 -442
  277. package/dist/providers/google-vertex.js.map +0 -1
  278. package/dist/providers/google.d.ts +0 -13
  279. package/dist/providers/google.d.ts.map +0 -1
  280. package/dist/providers/google.js +0 -400
  281. package/dist/providers/google.js.map +0 -1
  282. package/dist/providers/images/openrouter.d.ts +0 -3
  283. package/dist/providers/images/openrouter.d.ts.map +0 -1
  284. package/dist/providers/images/openrouter.js +0 -129
  285. package/dist/providers/images/openrouter.js.map +0 -1
  286. package/dist/providers/images/register-builtins.d.ts +0 -4
  287. package/dist/providers/images/register-builtins.d.ts.map +0 -1
  288. package/dist/providers/images/register-builtins.js +0 -34
  289. package/dist/providers/images/register-builtins.js.map +0 -1
  290. package/dist/providers/mistral.d.ts +0 -25
  291. package/dist/providers/mistral.d.ts.map +0 -1
  292. package/dist/providers/mistral.js +0 -535
  293. package/dist/providers/mistral.js.map +0 -1
  294. package/dist/providers/openai-codex-responses.d.ts +0 -30
  295. package/dist/providers/openai-codex-responses.d.ts.map +0 -1
  296. package/dist/providers/openai-codex-responses.js +0 -1090
  297. package/dist/providers/openai-codex-responses.js.map +0 -1
  298. package/dist/providers/openai-completions.d.ts +0 -19
  299. package/dist/providers/openai-completions.d.ts.map +0 -1
  300. package/dist/providers/openai-completions.js +0 -950
  301. package/dist/providers/openai-completions.js.map +0 -1
  302. package/dist/providers/openai-prompt-cache.d.ts +0 -3
  303. package/dist/providers/openai-prompt-cache.d.ts.map +0 -1
  304. package/dist/providers/openai-prompt-cache.js +0 -10
  305. package/dist/providers/openai-prompt-cache.js.map +0 -1
  306. package/dist/providers/openai-responses-shared.d.ts +0 -18
  307. package/dist/providers/openai-responses-shared.d.ts.map +0 -1
  308. package/dist/providers/openai-responses-shared.js +0 -492
  309. package/dist/providers/openai-responses-shared.js.map +0 -1
  310. package/dist/providers/openai-responses.d.ts +0 -13
  311. package/dist/providers/openai-responses.d.ts.map +0 -1
  312. package/dist/providers/openai-responses.js +0 -237
  313. package/dist/providers/openai-responses.js.map +0 -1
  314. package/dist/providers/register-builtins.d.ts +0 -38
  315. package/dist/providers/register-builtins.d.ts.map +0 -1
  316. package/dist/providers/register-builtins.js +0 -278
  317. package/dist/providers/register-builtins.js.map +0 -1
  318. package/dist/providers/simple-options.d.ts +0 -8
  319. package/dist/providers/simple-options.d.ts.map +0 -1
  320. package/dist/providers/simple-options.js +0 -41
  321. package/dist/providers/simple-options.js.map +0 -1
  322. package/dist/providers/transform-messages.d.ts +0 -8
  323. package/dist/providers/transform-messages.d.ts.map +0 -1
  324. package/dist/providers/transform-messages.js +0 -184
  325. package/dist/providers/transform-messages.js.map +0 -1
  326. package/dist/session-resources.d.ts +0 -4
  327. package/dist/session-resources.d.ts.map +0 -1
  328. package/dist/session-resources.js +0 -22
  329. package/dist/session-resources.js.map +0 -1
  330. package/dist/stream.d.ts +0 -8
  331. package/dist/stream.d.ts.map +0 -1
  332. package/dist/stream.js +0 -27
  333. package/dist/stream.js.map +0 -1
  334. package/dist/types.d.ts +0 -498
  335. package/dist/types.d.ts.map +0 -1
  336. package/dist/types.js +0 -2
  337. package/dist/types.js.map +0 -1
  338. package/dist/utils/diagnostics.d.ts +0 -19
  339. package/dist/utils/diagnostics.d.ts.map +0 -1
  340. package/dist/utils/diagnostics.js +0 -25
  341. package/dist/utils/diagnostics.js.map +0 -1
  342. package/dist/utils/event-stream.d.ts +0 -21
  343. package/dist/utils/event-stream.d.ts.map +0 -1
  344. package/dist/utils/event-stream.js +0 -81
  345. package/dist/utils/event-stream.js.map +0 -1
  346. package/dist/utils/hash.d.ts +0 -3
  347. package/dist/utils/hash.d.ts.map +0 -1
  348. package/dist/utils/hash.js +0 -14
  349. package/dist/utils/hash.js.map +0 -1
  350. package/dist/utils/headers.d.ts +0 -2
  351. package/dist/utils/headers.d.ts.map +0 -1
  352. package/dist/utils/headers.js +0 -8
  353. package/dist/utils/headers.js.map +0 -1
  354. package/dist/utils/json-parse.d.ts +0 -16
  355. package/dist/utils/json-parse.d.ts.map +0 -1
  356. package/dist/utils/json-parse.js +0 -113
  357. package/dist/utils/json-parse.js.map +0 -1
  358. package/dist/utils/node-http-proxy.d.ts +0 -10
  359. package/dist/utils/node-http-proxy.d.ts.map +0 -1
  360. package/dist/utils/node-http-proxy.js +0 -97
  361. package/dist/utils/node-http-proxy.js.map +0 -1
  362. package/dist/utils/oauth/anthropic.d.ts +0 -25
  363. package/dist/utils/oauth/anthropic.d.ts.map +0 -1
  364. package/dist/utils/oauth/anthropic.js +0 -335
  365. package/dist/utils/oauth/anthropic.js.map +0 -1
  366. package/dist/utils/oauth/device-code.d.ts +0 -19
  367. package/dist/utils/oauth/device-code.d.ts.map +0 -1
  368. package/dist/utils/oauth/device-code.js +0 -55
  369. package/dist/utils/oauth/device-code.js.map +0 -1
  370. package/dist/utils/oauth/github-copilot.d.ts +0 -30
  371. package/dist/utils/oauth/github-copilot.d.ts.map +0 -1
  372. package/dist/utils/oauth/github-copilot.js +0 -268
  373. package/dist/utils/oauth/github-copilot.js.map +0 -1
  374. package/dist/utils/oauth/google-antigravity.d.ts +0 -26
  375. package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
  376. package/dist/utils/oauth/google-antigravity.js +0 -377
  377. package/dist/utils/oauth/google-antigravity.js.map +0 -1
  378. package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
  379. package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
  380. package/dist/utils/oauth/google-gemini-cli.js +0 -482
  381. package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
  382. package/dist/utils/oauth/index.d.ts +0 -63
  383. package/dist/utils/oauth/index.d.ts.map +0 -1
  384. package/dist/utils/oauth/index.js +0 -131
  385. package/dist/utils/oauth/index.js.map +0 -1
  386. package/dist/utils/oauth/oauth-page.d.ts +0 -3
  387. package/dist/utils/oauth/oauth-page.d.ts.map +0 -1
  388. package/dist/utils/oauth/oauth-page.js +0 -105
  389. package/dist/utils/oauth/oauth-page.js.map +0 -1
  390. package/dist/utils/oauth/openai-codex.d.ts +0 -34
  391. package/dist/utils/oauth/openai-codex.d.ts.map +0 -1
  392. package/dist/utils/oauth/openai-codex.js +0 -385
  393. package/dist/utils/oauth/openai-codex.js.map +0 -1
  394. package/dist/utils/oauth/pkce.d.ts +0 -13
  395. package/dist/utils/oauth/pkce.d.ts.map +0 -1
  396. package/dist/utils/oauth/pkce.js +0 -31
  397. package/dist/utils/oauth/pkce.js.map +0 -1
  398. package/dist/utils/oauth/types.d.ts +0 -64
  399. package/dist/utils/oauth/types.d.ts.map +0 -1
  400. package/dist/utils/oauth/types.js +0 -2
  401. package/dist/utils/oauth/types.js.map +0 -1
  402. package/dist/utils/overflow.d.ts +0 -56
  403. package/dist/utils/overflow.d.ts.map +0 -1
  404. package/dist/utils/overflow.js +0 -151
  405. package/dist/utils/overflow.js.map +0 -1
  406. package/dist/utils/sanitize-unicode.d.ts +0 -22
  407. package/dist/utils/sanitize-unicode.d.ts.map +0 -1
  408. package/dist/utils/sanitize-unicode.js +0 -26
  409. package/dist/utils/sanitize-unicode.js.map +0 -1
  410. package/dist/utils/typebox-helpers.d.ts +0 -17
  411. package/dist/utils/typebox-helpers.d.ts.map +0 -1
  412. package/dist/utils/typebox-helpers.js +0 -21
  413. package/dist/utils/typebox-helpers.js.map +0 -1
  414. package/dist/utils/validation.d.ts +0 -18
  415. package/dist/utils/validation.d.ts.map +0 -1
  416. package/dist/utils/validation.js +0 -281
  417. package/dist/utils/validation.js.map +0 -1
package/CHANGELOG.md ADDED
@@ -0,0 +1,2914 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ## [15.8.0] - 2026-06-02
6
+ ### Added
7
+
8
+ - Added `AnthropicMessagesClient` and related Anthropic wire types/errors via `anthropic-client` export so callers can build a standalone Anthropic Messages client without depending on `@anthropic-ai/sdk`
9
+ - Added `parseClaudeRateLimitHeaders` and `AuthStorage.ingestUsageHeaders` so Anthropic rate-limit response headers can warm the per-credential usage cache with throttling while preserving per-tier data from the last full usage report.
10
+
11
+ ### Changed
12
+
13
+ - Changed Anthropic request handling to use the package-local `AnthropicMessagesClient` implementation instead of `@anthropic-ai/sdk` as the default transport
14
+ - Updated the `AnthropicOptions.client` surface to accept any `AnthropicMessagesClientLike` implementation with `messages.create`, enabling custom compatible clients
15
+ - Changed generated OAuth metadata `user_id` to use a deterministic `device_id` derived from the install ID instead of a random value
16
+ - `claudeCodeVersion` bumped to `2.1.148` to match current Claude Code release.
17
+ - `X-Stainless-Package-Version` updated to `0.94.0` (matches the bundled `@anthropic-ai/sdk` version); `X-Stainless-Runtime-Version` pinned to `v24.3.0` (Bun version bundled with CC 2.1.148); `X-Stainless-Os` header key corrected to `X-Stainless-OS`.
18
+ - `createClaudeBillingHeader` now emits a deterministic billing header (`cc_version=<claudeCodeVersion>.<suffix>; cc_entrypoint=cli; cch=00000;`), where `<suffix>` is the first 3 hex chars of `SHA-256(salt + msg[4] + msg[7] + msg[20] + version)` instead of random bytes. The fingerprint seed is taken from the first **user** message (skipping synthetic/developer injections), mirroring Claude Code's `computeFingerprintFromMessages`.
19
+ - `cch` attestation implemented: `cch=00000` is a placeholder that, for OAuth requests, `wrapFetchForCch` rewrites on the wire to `XXHash64(body, 0x4D659218E32A3268) & 0xFFFFF` formatted as 5 lowercase hex chars, computed in-place via `Bun.hash.xxHash64`. The rewrite is anchored to the `system[0]` billing-header prefix so user content is never mutated, and is installed only when a billing-header prefix is present (OAuth turns).
20
+ - `anthropic-beta` header set for OAuth model discovery and Claude usage-API requests expanded to add `context-1m-2025-08-07`, `redact-thinking-2026-02-12`, `mid-conversation-system-2026-04-07`, `advanced-tool-use-2025-11-20`, `effort-2025-11-24`, and `extended-cache-ttl-2025-04-11`. The usage-API `user-agent` is bumped to `claude-cli/2.1.158 (external, cli)`.
21
+ - Reasoning models now append `effort-2025-11-24` to the per-request `Anthropic-Beta` header (matches Claude Code).
22
+ - `buildAnthropicSystemBlocks` (CC-instruction mode) now emits the same 3-block layout as Claude Code: billing header (never cached), system instruction (cached), all user content merged into one block with `\n\n` (cached). Previously emitted one block per item with cache only on the last, which fingerprinted the caller by block count.
23
+ - `applyPromptCaching` now matches Claude Code's breakpoint layout: 2 system (instruction + merged content) + 2 message, with no tool breakpoint. The tool breakpoint was redundant — tools follow system in the token sequence, so when system changes the tool cache prefix also changes. The instruction block (system[1]) is stable across every request and now gets its own guaranteed-hit breakpoint.
24
+ - `applyPromptCaching` now caches the last two messages regardless of role instead of the last two *user* messages. The penultimate assistant message (tool calls + response from the previous turn) is larger and more recently created than the penultimate user message, making it the higher-value cache target.
25
+ - OAuth scope set expanded: added `user:sessions:claude_code`, `user:mcp_servers`, `user:file_upload`. `AUTHORIZE_URL` stays at `claude.ai/oauth/authorize` and `TOKEN_URL` stays at `api.anthropic.com/v1/oauth/token` — the `platform.claude.com` equivalents are CC's console-credential flow and do not grant `user:inference`, which AERY requires for direct OAuth-token inference.
26
+ - Token refresh POST now sends `anthropic-beta: oauth-2025-04-20` and `User-Agent: anthropic-sdk-typescript/0.94.0 userOAuthProvider` (CC sends these on refresh but not on the initial code exchange).
27
+
28
+ ### Fixed
29
+
30
+ - Fixed tool argument validation to wrap a plain string in a singleton array when the schema requires an array, allowing tool-level path/list normalization to recover from bare string arguments.
31
+ - Restored `eager_input_streaming` and strict flags on OAuth Anthropic tool definitions when model compatibility allows eager streaming.
32
+ - Fixed OAuth stream calls with injected custom clients missing a `beta` client by falling back to `client.messages.create` instead of requiring `client.beta.messages.create`
33
+ - Fixed direct use of internal API client typing so retry/timeouts and malformed-error classification remain compatible while not requiring the external SDK
34
+ - Fixed Cursor provider requests failing with `Cannot send empty user message to Cursor API` after tool-result history by selecting the latest user/developer turn instead of assuming the final context message is the active user turn.
35
+ - Fixed Anthropic web search dropping `ANTHROPIC_CUSTOM_HEADERS` when `CLAUDE_CODE_USE_FOUNDRY` was unset, causing 401s from corporate API gateways. `resolveAnthropicCustomHeadersForBaseUrl` now forwards the parsed headers whenever the base URL is non-Anthropic (or Foundry is enabled), and `buildAnthropicSearchHeaders` threads them through `buildAnthropicHeaders` so the search and streaming paths behave identically ([#1693](https://github.com/eminent337/aery/issues/1693)).
36
+ - Fixed OpenCode Go Anthropic-format models such as `qwen3.7-max` sending Anthropic `X-Api-Key` auth alongside the OpenCode bearer token, avoiding spurious Alibaba `401 Invalid API-key provided` errors. ([#1661](https://github.com/eminent337/aery/issues/1661))
37
+ - Fixed OAuth token exchange and refresh flows to fetch Claude CLI bootstrap identity when token responses omit account information, so `accountId` and `email` are now recovered when available
38
+ - Fixed Anthropic thinking traces being lost on direct OAuth requests. OAuth requests no longer send `redact-thinking-2026-02-12` unless thinking is explicitly hidden, Opus 4.7+ adaptive thinking opts into `display: "summarized"`, and the top user-facing thinking tier now sends Anthropic's `output_config.effort = "max"` rather than the next-lower `"xhigh"` tier.
39
+
40
+ ### Removed
41
+
42
+ - Removed the `@anthropic-ai/sdk` runtime dependency. The Anthropic provider now uses the package-local `AnthropicMessagesClient` and hand-maintained wire types in `providers/anthropic-wire.ts`; the SDK was only ever used for URL assembly, auth-header injection, bounded retries, the pre-response timeout, and HTTP-error-to-status mapping, all of which are reproduced with identical observable behavior.
43
+
44
+ ## [15.7.5] - 2026-06-01
45
+
46
+ ### Added
47
+
48
+ - Added Anthropic task budget support, forwarding `taskBudget` as `output_config.task_budget` with the required `task-budgets-2026-03-13` beta header and accepting Anthropic gateway requests that send `output_config.task_budget`.
49
+
50
+ ### Fixed
51
+
52
+ - Fixed OpenAI-family first-event timeouts so `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` cannot be undercut by a lower generic `PI_STREAM_FIRST_EVENT_TIMEOUT_MS` while local OpenAI-compatible servers are still processing large prompts. `PI_OPENAI_STREAM_FIRST_EVENT_TIMEOUT_MS` is now available for an explicit OpenAI-specific first-event override. ([#1603](https://github.com/eminent337/aery/issues/1603))
53
+
54
+ ## [15.7.4] - 2026-05-31
55
+
56
+ ### Fixed
57
+
58
+ - Fixed Anthropic stream idle-timeout retries after the provider stream has already begun.
59
+ - Fixed Xiaomi MiMo `/login` rejecting token-plan (`tp-`) keys with `401 Invalid API Key`. The validation request was still sending the legacy Anthropic `x-api-key` header against the OpenAI-compatible `/v1/chat/completions` endpoint; switched to `Authorization: Bearer`, matching the runtime path. ([#1580](https://github.com/eminent337/aery/issues/1580))
60
+ - Fixed OpenAI-compatible tool-call replay to send empty assistant content instead of `null`, avoiding strict custom backends that crash with `str`/`NoneType` concatenation after subagent tool results. ([#1585](https://github.com/eminent337/aery/issues/1585))
61
+
62
+ ## [15.7.3] - 2026-05-31
63
+
64
+ ### Changed
65
+
66
+ - Throttled per-delta streaming JSON re-parsing of OpenAI Responses/Codex tool-call arguments (bounding mid-stream parse cost from O(N²) to O(N)). Finalization via `response.output_item.done` now writes the authoritative full arguments back to the persisted assistant-message block, so tool calls finalized without a trailing `response.function_call_arguments.done` no longer retain stale/empty (`{}`) arguments. ([#1507](https://github.com/eminent337/aery/pull/1507))
67
+
68
+ ## [15.6.0] - 2026-05-30
69
+
70
+ ### Fixed
71
+
72
+ - Fixed Anthropic adaptive-thinking replay preserving signed thinking blocks on the latest abandoned tool-use assistant message, avoiding `thinking blocks in the latest assistant message cannot be modified` 400s. ([#1531](https://github.com/eminent337/aery/issues/1531))
73
+
74
+ ## [15.5.15] - 2026-05-30
75
+
76
+ ### Added
77
+
78
+ - Added `PI_REQ_DEBUG=1` request/response recording for provider transports. Each request writes `rr-session-N.json`; each received response writes `rr-session-N.res.log` with response headers followed by raw body bytes.
79
+
80
+ ### Fixed
81
+
82
+ - Fixed OpenCode-Go dynamic model refresh downgrading `qwen3.7-max` from Anthropic Messages to OpenAI-compatible transport, which caused `401 Model qwen3.7-max is not supported for format oa-compat` after `/v1/models` cache refreshes.
83
+
84
+ ## [15.5.12] - 2026-05-29
85
+
86
+ ### Removed
87
+
88
+ - Removed ANTML stream markup healing for `antml:function_calls` and `antml:thinking` envelopes, so Anthropic-compatible providers no longer parse those tags into `toolCall`/`thinking` events
89
+
90
+ ### Fixed
91
+
92
+ - Fixed GLM-5.x coding-plan OpenAI-compatible streams to use a longer default watchdog window, avoiding spurious `OpenAI completions stream stalled while waiting for the next event` errors during slow `glm-5.1` thinking/output phases. ([#1494](https://github.com/eminent337/aery/issues/1494))
93
+ - Fixed `zhipu-coding-plan` model discovery and credential validation to use the dedicated GLM Coding Plan endpoint (`https://open.bigmodel.cn/api/coding/paas/v4`) instead of the general BigModel endpoint, preventing requests from consuming ordinary account balance. ([#1494](https://github.com/eminent337/aery/issues/1494))
94
+ - Fixed DeepSeek tool calls failing on NanoGPT (e.g. `nanogpt/deepseek/deepseek-v4-pro` with reasoning enabled) by routing tool-bearing DeepSeek requests through NanoGPT's `:tools` model route and adding `nanogpt` to the DSML leak allowlist so streamed `<|DSML|tool_calls>...</|DSML|tool_calls>` envelopes are healed into structured tool calls instead of being passed through as visible text. ([#1488](https://github.com/eminent337/aery/issues/1488))
95
+ - Fixed DeepSeek tool calls failing on NanoGPT (e.g. `nanogpt/deepseek/deepseek-v4-pro` with reasoning enabled) by adding `nanogpt` to the DSML leak allowlist so streamed `<|DSML|tool_calls>...</|DSML|tool_calls>` envelopes are healed into structured tool calls instead of being passed through as visible text. The `:tools` model suffix is no longer appended on NanoGPT; that route triggered NanoGPT's server-side tool-call parser and 502'd with `code: "malformed_tool_call"` on complex tool schemas (`todo_write`) — the default route forwards `delta.content` (including DSML envelopes) which is healed client-side. ([#1488](https://github.com/eminent337/aery/issues/1488))
96
+ - Fixed OpenAI-compatible streamed parallel tool calls losing indexed argument deltas by tracking active tool-call blocks by the provider's `tool_calls[].index`; this keeps parallel NanoGPT `read` calls from merging or dropping their `path` arguments. ([#1488](https://github.com/eminent337/aery/issues/1488))
97
+
98
+ ## [15.5.11] - 2026-05-29
99
+
100
+ ### Added
101
+
102
+ - Added mid-conversation `system` message support for Anthropic Messages by upgrading eligible `developer` turns to `role: "system"` on first-party Claude API with Claude Opus 4.8+ and newer
103
+ - Added `supportsMidConversationSystem` to Anthropic compatibility settings so consumers can opt in to or disable mid-conversation `system` role handling per model
104
+ - Added `anthropic.claude-opus-4-8` model metadata in the model registry for Bedrock Converse streaming with effort-based thinking support through `xhigh`
105
+
106
+ ### Changed
107
+
108
+ - Changed Anthropic adaptive-thinking effort mapping for Opus 4.7+ on the Messages API to use the model's full five-tier scale: user-facing efforts now shift up one notch (`minimal→low`, `low→medium`, `medium→high`, `high→xhigh`, `xhigh→max`) so the top tier reaches the genuine `max` level and `high` lands on Anthropic's recommended `xhigh` coding/agentic default. Older adaptive models (Opus 4.6) and Bedrock Converse keep the four-tier legacy mapping where `xhigh` aliases to `max`.
109
+
110
+ ### Fixed
111
+
112
+ - Fixed OpenCode Zen `400 thinking is enabled but reasoning_content is missing in assistant tool call message` for every model behind `opencode-go`/`opencode-zen` (Kimi K2.x, DeepSeek V4 Pro/Flash, GLM-5.x, Qwen3.x, MiMo, MiniMax) by reactivating `requiresReasoningContentForToolCalls` and pinning the wire field to `reasoning_content` for any opencode request in thinking mode. The static compat default still omits the field for thinking-disabled turns to preserve the `Extra inputs are not permitted` guard from #1071; forced-tool turns also stay off because the existing `disableReasoningOnForcedToolChoice` guard strips thinking from the wire body. ([#1484](https://github.com/eminent337/aery/issues/1484))
113
+
114
+ ## [15.5.8] - 2026-05-28
115
+
116
+ ### Added
117
+
118
+ - Added `CheckCredentialsOptions.completionProbe` (and `completionTimeoutMs`) so `AuthStorage.checkCredentials` can additionally exercise each credential against the provider's chat-completion endpoint after refresh-on-expiry. Result lands on `CredentialHealthResult.completion` ({ok, reason?, modelId?, latencyMs?}) without disturbing the usage `ok` field. Public types: `CompletionProbe`, `CompletionProbeInput`, `CompletionProbeCredential`, `CredentialCompletionResult`. The probe is invoked even when no `UsageProvider` is registered for the row, and is skipped when OAuth refresh fails (the stale bytes would only mask the upstream failure).
119
+ - Added Wafer Pass and Wafer Serverless providers (`wafer-pass`, `wafer-serverless`). OpenAI-compatible (`https://pass.wafer.ai/v1`), bearer auth, `wfr_…` keys. `/login wafer-pass` and `/login wafer-serverless` paste-and-validate the key against `/v1/models`. `WAFER_PASS_API_KEY` and `WAFER_SERVERLESS_API_KEY` environment variables wired into `getEnvApiKey`. Bundled catalog seeds `wafer-pass/{GLM-5.1, Qwen3.5-397B-A17B}` and `wafer-serverless/{GLM-5.1, Kimi-K2.6, Qwen3.5-397B-A17B, Qwen3.6-35B-A3B, qwen3.7-max, deepseek-v4-flash, deepseek-v4-pro}`; dynamic discovery via `/v1/models` overlays additional models at runtime. Pass-tier discovery filters `wafer.tier === "pass_included"`. Pass-SKU costs are seeded at `0` (flat-rate subscription, no per-token charge — matches `kimi-code`/`firepass`/`alibaba-coding-plan`). Serverless costs are the wafer.ai retail rate, derived from the `*_cents_per_million` envelope via `value × 125 / 10000` (e.g. GLM-5.1 `120` → $1.50/M, Kimi-K2.6 `88` → $1.10/M). Reasoning entries get a thinking compat picked from the `wafer.provider` envelope: `zai`/`moonshotai` → zai-style `thinking: { type }`, `qwen` → top-level `enable_thinking`, `deepseek` and unknown upstreams stay unset so `detectOpenAICompat` can pick `reasoning_effort` from the id pattern at request time.
120
+
121
+ ### Changed
122
+
123
+ - Changed auth-gateway credential resolution to use per-conversation `promptCacheKey`/`sessionId` when calling `AuthStorage.getApiKey`, so repeated turns can keep the same credential until it becomes unavailable
124
+ - Changed auth-gateway and aery-native request handling to align `sessionId` with prompt/context identity before credential lookup
125
+ - Changed Anthropic prompt preparation to downscale image blocks over 2000px when a request includes 20+ images, reducing oversized payloads automatically
126
+ - Changed OpenAI chat request parsing to accept `name` on `tool` messages and fall back to the matching assistant `tool_calls` name, so parsed tool results now carry a proper tool name when the wire omits it
127
+ - Changed `checkCredentials` to skip running `completionProbe` when OAuth refresh fails, so stale bearer tokens are never probed and the refresh failure remains the returned `reason`
128
+ - Changed completion reporting to return `completion: { ok: null, reason: ... }` when a credential has no usable bearer bytes instead of attempting the probe
129
+ - Refactored `AuthStorage.checkCredentials` so OAuth refresh-on-expiry runs up-front and the refreshed credential is shared between the usage probe and the new completion probe; rows without a registered `UsageProvider` no longer short-circuit before the completion probe runs.
130
+
131
+ ### Fixed
132
+
133
+ - Fixed DeepSeek DSML tool-call envelope leaks on Ollama Cloud and OpenAI-compatible streams by healing leaked envelopes into structured tool calls without displaying raw DSML markers. ([#1462](https://github.com/eminent337/aery/issues/1462))
134
+ - Fixed auth-gateway to classify usage-limit messages such as `usage_limit_reached`, `resource_exhausted`, and Codex-style `Try again in ~X min` text as 429 `rate_limit_error` responses
135
+ - Fixed auth-gateway usage-limit handling to honor parsed retry hints and switch to a sibling credential via `markUsageLimitReached` instead of invalidating the rate-limited credential
136
+ - Fixed `streamSimple` to retry on usage-limit errors (including message-only error events) before any content is emitted, so `onAuthError` can rotate credentials automatically
137
+ - Fixed auth-gateway error classification to extract embedded status codes and use word-boundary matching, so `GenerateContentRequest` and similar messages are no longer misreported as rate-limit errors
138
+ - Fixed `checkCredentials` to handle `completionProbe` exceptions by recording the failure in `CredentialHealthResult.completion.reason` while still returning the usage probe result
139
+ - Fixed Google Vertex's bundled model list to use the authoritative models.dev catalog, including MaaS entries such as `deepseek-ai/deepseek-v3.2-maas` and removing retired Gemini 1.5 fallbacks. ([#1456](https://github.com/eminent337/aery/issues/1456))
140
+
141
+ ## [15.5.7] - 2026-05-27
142
+
143
+ ### Added
144
+
145
+ - `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the aery-native gateway forwarder.
146
+
147
+ - xAI Grok OAuth (SuperGrok Subscription) provider in `/login`. Loopback PKCE flow on `127.0.0.1:56121`; the token unlocks Grok-4.x chat. Ported from NousResearch/hermes-agent (MIT).
148
+ - OpenRouter provider in `/login`. API-key paste flow validated against `https://openrouter.ai/api/v1/auth/key` (the `/models` endpoint is public and cannot validate auth). The pasted key is stored under the existing `openrouter` provider id used by `OPENROUTER_API_KEY`.
149
+ - `XAI_OAUTH_TOKEN` environment variable accepted as a headless fallback for the xAI Grok OAuth provider.
150
+
151
+ ### Changed
152
+
153
+ - `OpenAIResponsesOptions` gains four optional, provider-agnostic fields that adapter wrappers can use to compose provider-specific behavior on top of the generic transport: `includeEncryptedReasoning` (gates `include: ["reasoning.encrypted_content"]`; default `true`, preserves current behavior), `filterReasoningHistory` (strips replayed `type: "reasoning"` items from conversation history; default `false`), `headers` (merged onto the client's default headers), and `extraBody` (merged into the request payload).
154
+ - The existing `XAI_API_KEY` path is unchanged — it continues to use the OpenAI-completions transport.
155
+
156
+ ### Fixed
157
+
158
+ - Fixed OpenRouter DeepSeek V4 tool-call follow-up requests replaying normalized `reasoning` as-is instead of DeepSeek's required `reasoning_content`, which caused HTTP 400 errors in thinking mode. ([#1445](https://github.com/eminent337/aery/issues/1445))
159
+
160
+ ## [15.5.6] - 2026-05-27
161
+
162
+ ### Added
163
+
164
+ - Added `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS` to control how long an idle Codex WebSocket stays eligible for reuse, with `0` disabling the check
165
+
166
+ ### Fixed
167
+
168
+ - Fixed reused Codex WebSocket connections that had gone silent without activity to be dropped and replaced with a fresh handshake after the idle-reuse threshold, preventing stalled next requests
169
+ - Fixed stale response frames left in the websocket queue from a completed turn so subsequent requests no longer process terminal frames from the previous response
170
+ - Fixed websocket dead-socket detection to fail a stale connection when no inbound traffic or pong is observed after a ping timeout, improving recovery on runtimes that do not emit pong events
171
+
172
+ ## [15.5.5] - 2026-05-27
173
+
174
+ ### Added
175
+
176
+ - Added `PI_CODEX_WEBSOCKET_PING_INTERVAL_MS` to configure the interval for Codex WebSocket protocol ping heartbeats
177
+ - Added `PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS` to configure the Codex WebSocket pong timeout used to detect unresponsive connections
178
+ - Added `PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY` to configure the maximum buffered Codex WebSocket inbound queue size before transport fallback
179
+ - Added `parseStreamingJsonThrottled` to `@aryee337/aery-ai/utils/json-parse` — a per-delta wrapper around `parseStreamingJson` that skips re-parses until the buffer has grown by `minGrowthBytes` (default 256). Wired into the streaming hot path of every provider's tool-call argument accumulator (`anthropic`, `amazon-bedrock`, `openai-completions`, `openai-codex-responses`, `openai-responses-shared`) so per-delta cost is O(N) in total buffer length instead of O(N²). Each provider's `toolcall_end` still runs a final unthrottled parse, so the published `block.arguments` is unchanged.
180
+ - Added named-tool routing support to Google providers: `GoogleSharedStreamOptions.toolChoice` and `GoogleGeminiCliOptions.toolChoice` now accept `{ mode: "ANY"; allowedFunctionNames: [string, ...string[]] }` in addition to the string forms. `mapGoogleToolChoice` converts `ToolChoice` objects of shape `{ type: "tool" | "function", name }` to the wire form. Mirrors the equivalent Anthropic mapper.
181
+
182
+ ### Changed
183
+
184
+ - Improved Codex WebSocket timeout diagnostics to include last event type and time since last progress event
185
+ - Enhanced Codex WebSocket error classification to recognize ping, pong, send, and queue-overflow failures as retryable
186
+ - Changed `mapGoogleToolChoice` to be exported from `@aryee337/aery-ai/stream` so callers can build the wire-shape allow-list directly without re-deriving it.
187
+
188
+ ### Fixed
189
+
190
+ - Fixed Codex WebSocket send failures by wrapping socket.send() in try-catch and surfacing errors as retryable transport errors
191
+ - Fixed Codex WebSocket inbound queue overflow by adding capacity bounds and triggering fallback to SSE when exceeded
192
+ - Fixed Codex WebSocket pong timeout detection by tracking pong events and failing the connection when no pong is received within the configured timeout
193
+ - Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
194
+ - Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
195
+ - Fixed Synthetic model discovery to treat the provider `/models` response as authoritative so deprecated bundled IDs are pruned from the runtime cache, and changed Synthetic login validation to avoid probing a specific model ([#1417](https://github.com/eminent337/aery/issues/1417)).
196
+
197
+ ## [15.5.0] - 2026-05-26
198
+
199
+ ### Added
200
+
201
+ - Added `zhipu-coding-plan` provider for Zhipu (智谱) BigModel's domestic coding-plan SKU at `https://open.bigmodel.cn/api/coding/paas/v4`, with dynamic model discovery (`ZHIPU_API_KEY`), zai-format thinking, `reasoning_content` field, and OAuth login flow ([#1340](https://github.com/eminent337/aery/issues/1340)).
202
+
203
+ ### Removed
204
+
205
+ - Removed the `aery-ai` CLI binary (`packages/ai/src/cli.ts`) and its `bin` entry. Use the in-process equivalent in the aery coding-agent CLI: `aery auth-broker login [provider]`, `aery auth-broker logout [provider]`, and `aery auth-broker list`. The library API (`AuthStorage.login()`, `getOAuthProviders()`, etc.) is unchanged.
206
+
207
+ ### Fixed
208
+
209
+ - Fixed delayed `toolResult` emissions so real tool results are emitted in the correct assistant `toolCall` window after handoff/compaction, preventing out-of-order or orphaned tool results
210
+ - Fixed delayed `toolResult` handling for aborted calls so a late real result is emitted instead of a synthetic `aborted` result for the same `toolCallId`
211
+ - Fixed usage polling to disable credentials when OAuth refresh fails definitively (for example `invalid_grant`) and clear cached last-good usage data so stale reports no longer remain visible
212
+
213
+ ## [15.4.3] - 2026-05-26
214
+
215
+ ### Fixed
216
+
217
+ - Fixed Google Vertex model discovery to use the project-scoped OpenAI-compatible model list so Vertex Model Garden models such as GLM and Claude are available through ADC auth ([#1412](https://github.com/eminent337/aery/issues/1412)).
218
+
219
+ ## [15.4.2] - 2026-05-26
220
+
221
+ ### Fixed
222
+
223
+ - Fixed OpenCode Zen `big-pickle` follow-up requests replaying assistant tool-call turns without DeepSeek-required `reasoning_content`, which caused HTTP 400 errors in thinking mode.
224
+
225
+ ## [15.4.1] - 2026-05-26
226
+
227
+ ### Added
228
+
229
+ - Added `isOpenAICompletionsProgressChunk` export to identify real progress chunks vs. keepalives in OpenAI completions streams
230
+ - Added per-provider stream watchdog overrides via `getStreamIdleTimeoutMs(fallbackMs)` and `getStreamFirstEventTimeoutMs(idleTimeoutMs, fallbackMs)` to allow providers like Google Gemini CLI to extend first-event timeouts without affecting global defaults
231
+ - Added `promptCacheKey` to `StreamOptions` and passed it through stream option mapping so callers can specify an explicit prompt-cache key separate from `sessionId`
232
+ - Added `promptCacheKey` support to the native server option whitelist so `promptCacheKey` is accepted by `aery-native-server` streams
233
+ - Restored the per-provider stream watchdog (`iterateWithIdleTimeout`) on top of the abortable iterator. The lazy stream forwarder in `register-builtins` now wraps every provider's event stream with the first-event + steady-state idle watchdog (`PI_STREAM_FIRST_EVENT_TIMEOUT_MS`, `PI_STREAM_IDLE_TIMEOUT_MS`; aliases honored), and Anthropic / OpenAI Completions / OpenAI Responses / Azure OpenAI Responses / Codex SSE re-emit their per-provider progress predicates so empty keepalive frames cannot keep a stalled stream alive. Reverts the partial regression from #1392 that left Codex WebSocket subagent runs hanging silently for hours when the broker dropped frames between deltas. The Codex WebSocket transport additionally now resets `lastProgressAt` only on progress events (not keepalives), giving the 300s WS-internal idle ceiling the same liveness semantics as the SSE path.
234
+
235
+ ### Changed
236
+
237
+ - Enabled OpenAI Codex WebSocket streams to apply `streamIdleTimeoutMs` and `streamFirstEventTimeoutMs` from `StreamOptions` per request instead of fixed internal defaults
238
+ - Changed stream idle watchdog implementation from `iterateUntilAbort` to `iterateWithIdleTimeout`, which now enforces maximum idle gaps between streamed events and distinguishes between first-event and steady-state timeouts
239
+ - Changed Anthropic, OpenAI Responses, OpenAI Completions, Azure OpenAI Responses, and OpenAI Codex Responses providers to use the new idle-timeout iterator with per-provider progress predicates so empty keepalive frames cannot keep a stalled stream alive
240
+ - Changed Codex WebSocket transport to reset `lastProgressAt` only on progress events (not keepalives), giving the 300s WS-internal idle ceiling the same liveness semantics as the SSE path
241
+ - Changed Google Gemini CLI stream forwarding defaults to use a 5-minute first-event floor via per-provider lazy-stream limits to avoid premature first-event timeouts on slow startup
242
+ - Changed OpenAI Responses and OpenAI Codex request handling to keep `sessionId` for provider routing and conversation headers while `promptCacheKey` controls the `prompt_cache_key` payload independently
243
+ - Changed `StreamOptions.streamIdleTimeoutMs` documentation to clarify it is now wired into every built-in provider and the lazy stream forwarder, and that `streamFirstEventTimeoutMs` is honored at both the SDK-request layer and the iterator-watchdog layer
244
+ - Changed OpenAI Responses and OpenAI Codex request handling so `sessionId` continues to drive provider routing and state while `promptCacheKey` controls the `prompt_cache_key` payload
245
+ - Changed Google Gemini CLI stream forwarding defaults to use a 5-minute first-event floor to avoid premature first-event timeouts on slow startup
246
+ - Changed auth-gateway request mapping to preserve incoming `prompt_cache_key` as both `promptCacheKey` and `sessionId` when routing OpenAI-compatible sessions
247
+ - Un-deprecated `StreamOptions.streamIdleTimeoutMs`; the option is wired into every built-in provider and the lazy stream forwarder again. `streamFirstEventTimeoutMs` is now honored at both the SDK-request layer (via `createSdkStreamRequestOptions`) and the iterator-watchdog layer, in cooperation.
248
+
249
+ ### Removed
250
+
251
+ - Removed `installH2Fetch` and the `fetch` patch that forced HTTP/2 on HTTPS requests; callers now use the default Bun `fetch` transport
252
+
253
+ ### Fixed
254
+
255
+ - Fixed first-item timeout handling so `iterateWithIdleTimeout` no longer keeps first-event timers active after the source throws or the consumer stops before semantic progress
256
+ - Fixed silent multi-hour hangs on Codex WebSocket subagent runs when the broker dropped frames between deltas by restoring per-provider stream watchdogs with progress-event filtering
257
+ - Fixed z.ai/GLM-via-OpenRouter subagent stalls where no-op keepalive chunks reset the idle watchdog indefinitely by filtering non-progress items before resetting the deadline
258
+
259
+ ## [15.4.0] - 2026-05-26
260
+
261
+ ### Breaking Changes
262
+
263
+ - Removed `findAnthropicAuth` from `anthropic-auth` and replaced store-driven auth discovery with `buildAnthropicAuthConfig`, requiring callers to provide an already-resolved API key before building Anthropic auth config
264
+
265
+ ### Added
266
+
267
+ - Added `PI_CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS` and `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` options to tune Codex WebSocket timeout behavior before fallback
268
+ - Added `AuthStorage.getOAuthAccess` to return a refreshed OAuth access token with identity metadata (`accountId`, `email`, `projectId`, `enterpriseUrl`) for callers that need bearer-token headers together
269
+ - Added Codex WebSocket forwarding to the `onSseEvent` observer so the raw provider-stream debug viewer captures the inbound JSON frames and the outbound request frame from the WS transport using the same synthesized SSE-wire shape (`event:` + `data:` lines, prefixed with a `: ws ← <type>` (inbound) or `: ws → <type>` (outbound) comment).
270
+
271
+ ### Changed
272
+
273
+ - Changed OAuth selection in `AuthStorage` to treat credentials as stale when they are within 60 seconds of expiry and rotate them preemptively
274
+ - Changed Google Gemini CLI, Google Gemini usage, Antigravity usage, and Kimi usage flows to stop refreshing OAuth tokens directly and rely on `AuthStorage` for token rotation
275
+
276
+ ### Deprecated
277
+
278
+ - Deprecated `streamIdleTimeoutMs` in `StreamOptions` as a compatibility-only field that is no longer used by providers
279
+
280
+ ### Removed
281
+
282
+ - Removed provider-local OAuth refresh helpers from Google Gemini CLI and Google/Kimi/Antigravity usage probes, preventing direct refresh calls from those usage paths
283
+
284
+ ### Fixed
285
+
286
+ - Dropped truncated, thinking-only assistant turns with only `thinking`/`redacted_thinking` blocks and no `text` or `tool` content during message transformation, preventing Anthropic requests from sending consecutive assistant messages after a `max_tokens`/`error`/`aborted` interruption
287
+ - Fixed Amazon Bedrock bearer-token authentication to honor `AWS_BEARER_TOKEN_BEDROCK` before resolving AWS profiles or running `credential_process`, matching Bedrock API-key precedence. ([#1399](https://github.com/eminent337/aery/issues/1399))
288
+ - Updated `isRetryableError` to treat Bun HTTP/2 transport errors (`HTTP2StreamReset`, `HTTP2RefusedStream`) as retryable so transient stream-reset failures can be retried
289
+ - Fixed Codex WebSocket streaming to recover from stalled sessions by falling back to SSE when the first event or subsequent progress is delayed beyond the configured websocket timeout
290
+ - Fixed expired OAuth handling so provider-level paths no longer attempt direct token refresh calls for expired credentials and instead rely on `AuthStorage` for rotation
291
+ - Fixed provider streams aborting slow-but-valid first tokens or silent inter-event gaps with AERY-owned first-event/idle watchdog errors. Built-in lazy streams, OpenAI/Anthropic/Azure/Codex SSE, and Codex WebSocket streams now wait for provider output, provider/socket errors, caller aborts, or explicit request-layer timeouts instead of treating provider silence as failure ([#1392](https://github.com/eminent337/aery/issues/1392)).
292
+ - Fixed Claude Opus 4.7 on Amazon Bedrock streaming no reasoning output (and appearing to hang on long reasoning runs) because Anthropic silently switched the adaptive-thinking display default to `"omitted"`. The Bedrock provider now sends `thinking.display = "summarized"` by default on Opus 4.7+ adaptive models and on budget-based Claude models, mirroring the existing direct-Anthropic behavior. `BedrockOptions.thinkingDisplay` (`"summarized" | "omitted"`) is exposed for callers that want to opt out, and `hideThinkingSummary` now wires through to the Bedrock case ([#1373](https://github.com/eminent337/aery/issues/1373)).
293
+ - Fixed Cursor Composer resume/tool-continuation turns failing with `Cannot send empty user message to Cursor API`. Empty current user turns now use Cursor's `resumeAction` instead of constructing an invalid `userMessageAction` ([#1376](https://github.com/eminent337/aery/issues/1376)).
294
+ - Fixed `aery-ai login moonshot` failing with `invalid temperature: only 1 is allowed for this model` (HTTP 400) because the API-key validator probed `kimi-k2.5` with `temperature: 0`. Moonshot login now validates against `GET /v1/models`, matching the DeepSeek/Fireworks/NanoGPT/ZenMux pattern and authenticating the key without invoking model-specific parameter restrictions.
295
+
296
+ ## [15.3.2] - 2026-05-25
297
+
298
+ ### Added
299
+
300
+ - Added `GET /v1/snapshot/stream` for live auth-broker snapshot updates via SSE with `snapshot`, `entry`, and `removed` event frames
301
+ - Added `AuthBrokerClient.openSnapshotStream()` for consuming SSE snapshot streams from `/v1/snapshot/stream`
302
+ - Added `streamSnapshots` option to `RemoteAuthCredentialStore` (default `true`) to enable or disable SSE-based snapshot synchronization
303
+ - Added `streamKeepaliveMs` to `startAuthBroker()` to tune heartbeat frequency for the SSE stream
304
+ - Added `AuthStorage.checkCredentials({ signal?, timeoutMs?, baseUrlResolver? })` that returns a per-credential `CredentialHealthResult` with tri-state `ok` (`true` / `false` / `null`-unverifiable), the credential's identity (provider, type, email/accountId, broker-refresh flag), and the upstream error string when the probe fails. Iterates sequentially over `listAuthCredentials()`, exercises OAuth refresh on expiry, then calls the per-provider `UsageProvider.fetchUsage` without swallowing errors — so callers can identify which row in a multi-account broker is producing 401s instead of getting a silently-deduplicated `fetchUsageReports` list.
305
+ - Added `GET /v1/credentials/check` to `startAuthGateway()` that forwards to `AuthStorage.checkCredentials` and returns `{ generatedAt, credentials }`. Gated by the same bearer as the rest of the gateway.
306
+
307
+ ### Changed
308
+
309
+ - Changed `RemoteAuthCredentialStore` to prefer SSE snapshot streaming and automatically fall back to long-polling when a broker returns 404 for `/v1/snapshot/stream`
310
+ - Changed snapshot write-refresh flow so `RemoteAuthCredentialStore` skips immediate `/v1/snapshot` refreshes when SSE streaming is active
311
+ - Changed broker SSE stream behavior to keep connections open with periodic keepalives and an increased server idle timeout
312
+
313
+ ## [15.3.0] - 2026-05-25
314
+
315
+ ### Added
316
+
317
+ - Added DeepSeek to the built-in API-key login provider catalog so `aery login deepseek` stores a reusable `DEEPSEEK_API_KEY` credential for the bundled DeepSeek models.
318
+
319
+ ### Fixed
320
+
321
+ - Fixed `openai-responses` requests intermittently 400ing with `No tool call found for function call output with call_id …` after an aborted turn or a locally-rejected tool call (e.g. argument-validation failure). `convertConversationMessages` now folds orphan `function_call_output` / `custom_tool_call_output` items — those whose matching `function_call` was wiped by an earlier `dt: false` snapshot splice or never landed in any persisted provider payload — into assistant text notes, preserving the payload while keeping the request grammatically valid ([#1351](https://github.com/eminent337/aery/issues/1351)).
322
+
323
+ ## [15.2.4] - 2026-05-22
324
+
325
+ ### Fixed
326
+
327
+ - Fixed ChatGPT Plus/Pro (Codex) OAuth login returning `Token exchange failed: 403` on Windows. When port 1455 was in use, the callback server silently fell back to a random port; OpenAI's authorization endpoint accepts any localhost redirect URI (loose validation), so the browser callback succeeds and shows "Authentication Successful", but the token endpoint rejects the non-registered port with 403. The `OpenAICodexOAuthFlow` now enforces a fixed `redirectUri` option so a busy port immediately surfaces as "port unavailable" instead of producing a confusing 403 ([#1277](https://github.com/eminent337/aery/issues/1277)).
328
+ - Improved `exchangeCodeForToken` error diagnostics: the 403 response body (`error` / `error_description` fields) is now included in the thrown message, matching the existing `refreshOpenAICodexToken` behaviour.
329
+
330
+ ### Added
331
+
332
+ - Added `ChatGPT Plus/Pro (Codex, headless/device)` (`openai-codex-device`) as an alternative login method for the Codex provider. Uses OpenAI's device-code flow (`/api/accounts/deviceauth/usercode` → poll `/api/accounts/deviceauth/token`), which avoids a local callback server and port 1455 entirely. Credentials are stored under the existing `openai-codex` provider key so all models and tooling continue to work without reconfiguration ([#1277](https://github.com/eminent337/aery/issues/1277)).
333
+
334
+ ## [15.2.2] - 2026-05-22
335
+
336
+ ### Fixed
337
+
338
+ - Fixed `gemini-3.1-pro-high` and `gemini-3.1-pro-low` on the `google-antigravity` provider always returning HTTP 400 from Cloud Code Assist. The `ANTIGRAVITY_SYSTEM_INSTRUCTION` identity header was not injected for these models because the internal check matched the string `"gemini-3-pro-high"` (hyphen) instead of the versioned `"gemini-3.1-pro-..."` form. The guard now matches all `gemini-3` model variants ([#1274](https://github.com/eminent337/aery/issues/1274)).
339
+
340
+ ## [15.2.0] - 2026-05-21
341
+
342
+ ### Fixed
343
+
344
+ - Fixed `/login` (and `/logout`, plus any `AuthStorage.set` / `remove` call) against a remote auth-broker throwing `RemoteAuthCredentialStore is read-only on the client. Use 'aery auth-broker login <provider>' to mutate credentials.` Added three optional async write hooks to `AuthCredentialStore` (`upsertAuthCredentialRemote`, `replaceAuthCredentialsRemote`, `deleteAuthCredentialsRemote`); `RemoteAuthCredentialStore` implements them via the broker's `POST /v1/credential` and `POST /v1/credential/:id/disable` endpoints and applies the broker's authoritative post-write entries to the local snapshot. `AuthStorage` routes through the hooks when present, so OAuth and API-key logins (and logouts) initiated from a broker-backed client now persist server-side and surface immediately without waiting for the long-poll snapshot tick.
345
+
346
+ ## [15.1.9] - 2026-05-21
347
+
348
+ ### Fixed
349
+
350
+ - Fixed Ollama named tool forcing to send only the requested tool when the caller passes a named `toolChoice`, preserving `tool_choice: "required"` while preventing local models from selecting a different tool. ([#1236](https://github.com/eminent337/aery/issues/1236))
351
+ - Fixed `/btw` (and IRC background replies) returning a `BedrockException` 400 (`The toolConfig field must be defined when using toolUse and toolResult content blocks.`) on LiteLLM → Bedrock once the session has tool-call history. Two source fixes in `buildParams`: (1) `if (context.tools)` → `if (context.tools?.length)` so an explicit `context.tools = []` (the /btw opt-out) never routes through `convertTools` and never emits an empty `"tools"` array; (2) `else if (hasToolHistory(...))` → `else if (context.tools === undefined && hasToolHistory(...))` so the Anthropic-proxy sentinel that injects `tools: []` for tool-history turns is suppressed when the caller explicitly opted out, preventing it from re-introducing the empty array. As defence-in-depth, `tool_choice: "none"` is also dropped when the resolved tools list is missing or empty. ([#1227](https://github.com/eminent337/aery/issues/1227))
352
+
353
+ ## [15.1.8] - 2026-05-20
354
+
355
+ ### Added
356
+
357
+ - Added Fireworks Fire Pass as a separate `firepass` provider with API-key login flow, bundled `kimi-k2.6-turbo` model entry (Kimi K2.6 Turbo), and wire-id translation from the friendly catalog id to the `accounts/fireworks/routers/kimi-k2p6-turbo` router endpoint. Fire Pass keys (`fpk_…`) authorize only the dedicated router and reject `/v1/models`, so login validation pings chat completions against the router id directly. Extended the openai-completions Kimi-family safety net so the firepass entry inherits the per-Fireworks-docs "always send `max_tokens`" default ([Kimi K2 guide](https://docs.fireworks.ai/models/kimi-k2)); the router's accepted `reasoning_effort` set includes `xhigh`, so it is forwarded verbatim rather than remapped. See https://docs.fireworks.ai/firepass.
358
+
359
+ ### Fixed
360
+
361
+ - Fixed DeepSeek V4 direct API requests with tools to keep documented thinking mode instead of dropping reasoning: lower AERY efforts now map to DeepSeek's supported `high`, `tool_choice` is omitted, `thinking: { type: "enabled" }` and `max_tokens` are sent, and partial user `reasoningEffortMap` overrides merge with DeepSeek defaults. ([#1207](https://github.com/eminent337/aery/issues/1207))
362
+ - Fixed model cache schema v2 databases so offline refreshes preserve cached provider discoveries after upgrading to schema v3 and subsequent online refreshes can overwrite the cache. ([#1219](https://github.com/eminent337/aery/issues/1219))
363
+ - Fixed Perplexity OAuth credentials being treated as expired one hour after login. `getJwtExpiry` was fabricating `expires = now + 1h` whenever the JWT had no `exp` claim (the common case — Perplexity sessions are server-side). Once the hour elapsed, `getOAuthApiKey` would mark the cred expired and the search provider's loader would silently skip it, surfacing as "logged out". Logins with no `exp` now persist a far-future sentinel; `getOAuthApiKey` also normalizes any stale `expires` written by older builds.
364
+
365
+ ## [15.1.7] - 2026-05-19
366
+
367
+ ### Added
368
+
369
+ - Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
370
+ - Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
371
+
372
+ ### Fixed
373
+
374
+ - Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
375
+ - Fixed MiniMax Coding Plan CN streaming `<think>...</think>` reasoning as visible assistant text. The OpenAI-compatible stream parser now enables the existing MiniMax tag parser for both `minimax-code` and `minimax-code-cn`, so CN responses become structured `thinking` blocks instead of raw text. ([#1203](https://github.com/eminent337/aery/issues/1203))
376
+
377
+ ## [15.1.6] - 2026-05-19
378
+
379
+ ### Fixed
380
+
381
+ - Fixed `{}` (empty JSON Schema, the wire representation of `z.unknown()`) being passed verbatim to grammar-constrained samplers (llama.cpp, etc.) in `additionalProperties`, `items`, and other schema-valued positions across **every provider** (OpenAI, Anthropic, Google, Ollama, Bedrock, Cursor). Grammar builders treat `{}` as "generate an empty object" rather than "any JSON value", causing open-typed fields (e.g. `extra.title` from `z.record(z.string(), z.unknown())`) to always emit `{}` instead of the intended string/number/etc. `toolWireSchema` now applies a new `normalizeEmptySchemas` pass (exported) to both the Zod and TypeBox/raw-JSON-Schema branches, converting `{}` → `true` (semantically identical per JSON Schema draft 2020-12 §4.3.1) in all schema-valued positions. Strict-mode opt-out is preserved across all providers: OpenAI's `hasUnrepresentableStrictObjectMap` hits the `=== true` branch instead of the `isJsonObject({})` branch (same result); Anthropic's `normalizeAnthropicStrictSchemaNode` opts out via `additionalProperties !== false` (still true for `true`); Google's `normalizeSchemaForGoogle` strips `additionalProperties` regardless (pre-existing). ([#1179](https://github.com/eminent337/aery/issues/1179))
382
+ - Fixed `aery-ai login <provider>` crashing with `Unknown provider` for providers that only the `auth-storage` `login()` switch knew about (perplexity, alibaba-coding-plan, gitlab-duo, huggingface, opencode-zen/go, lm-studio, ollama, cerebras, fireworks, qianfan, synthetic, venice, litellm, moonshot, together, cloudflare/vercel ai gateways, vllm, qwen-portal, nvidia, xiaomi, and any custom OAuth provider). The CLI now delegates to `SqliteAuthCredentialStore.login()` instead of duplicating a smaller switch, so the auth-broker `aery auth-broker login <provider>` flow works for every registered OAuth provider.
383
+
384
+ ## [15.1.4] - 2026-05-19
385
+
386
+ ### Changed
387
+
388
+ - Updated auth-gateway format and aery-native request handling to invalidate the failed API key and retry the provider request with a replacement key when authentication fails
389
+
390
+ ### Fixed
391
+
392
+ - Fixed OpenCode-Go and OpenCode-Zen chat-completions replay to omit stored reasoning fields on Kimi assistant tool-call messages, avoiding provider 400s for rejected `messages[].reasoning` payloads. ([#1157](https://github.com/eminent337/aery/issues/1157))
393
+ - Fixed OpenAI Responses and Codex tool schema normalization to emit `properties: {}` for no-argument object schemas without rewriting literal payloads. ([#1147](https://github.com/eminent337/aery/issues/1147))
394
+ - Fixed Anthropic 400 (`unexpected tool_use_id found in tool_result blocks ... Each tool_result block must have a corresponding tool_use block in the previous message`) when handoff/compaction folds an assistant `tool_use` into the handoff summary string but leaves the matching user-side `tool_result` message in the history. `transformMessages` now indexes every `tool_use` id surviving the first pass and drops orphan `tool_result` messages whose originator was compacted away, preserving the text payload as a user-level `<stale-tool-result>` note so the model still sees what the tool returned. The note is emitted with `role: "user"` rather than `role: "developer"` so providers that elevate developer-role messages (Ollama: `developer` → `system`; OpenAI chat-completions reasoning models: `developer` → `developer`) cannot lift stale tool output to an instruction-priority tier above the surrounding user/developer messages.
395
+ - Fixed streaming authentication retry to trigger when a provider emits a 401 `error` event after a `start` event but before any replay-unsafe content is emitted
396
+ - Added `credential_process` support to the Bedrock provider's AWS credential resolver so profiles delegating to external brokers (`aws-vault`, `granted`, in-house tools) resolve instead of falling through to `Unable to resolve AWS credentials`. Parses the AWS SDK `Version: 1` JSON envelope, honors `Expiration` in the per-profile cache, propagates `AbortSignal` to the spawned helper, routes Windows `.cmd`/`.bat` helpers through `cmd.exe /c`, and ships a POSIX-shell-style tokenizer that preserves backslashes inside double quotes so Windows paths survive ([#1142](https://github.com/eminent337/aery/issues/1142))
397
+
398
+ ## [15.1.3] - 2026-05-17
399
+
400
+ ### Breaking Changes
401
+
402
+ - Changed `AuthBrokerClient.fetchSnapshot()` to return status-based results (`200` or `304`) instead of always returning a raw snapshot body, so callers now need to branch on `status`
403
+ - Renamed public schema utilities in `@aryee337/aery-ai/utils/schema` by replacing `sanitizeSchemaForGoogle`, `sanitizeSchemaForCCA`, `prepareSchemaForCCA`, and `sanitizeSchemaForMCP` with `normalizeSchemaForGoogle`, `normalizeSchemaForCCA`, and `normalizeSchemaForMCP`
404
+ - Added MCP schema normalization via `normalizeSchemaForMCP` for compatibility checks
405
+ - Removed the `StringEnum` helper from `@aryee337/aery-ai/utils/schema`. Use `z.enum([...])` directly; Zod's emitted JSON Schema is already wire-compatible with Google and other providers.
406
+ - Renamed the concrete SQLite credential store class from `AuthCredentialStore` to `SqliteAuthCredentialStore`. `AuthCredentialStore` is now the persistence interface implemented by both the SQLite store and the new `RemoteAuthCredentialStore`. Update `new AuthCredentialStore(db)` / `AuthCredentialStore.open(...)` call-sites to `SqliteAuthCredentialStore`; type-position uses (`store: AuthCredentialStore`) continue to work unchanged.
407
+
408
+ ### Added
409
+
410
+ - Added `onAuthError` to `StreamOptions` and wired `streamSimple()` to retry once with a replacement API key when the first provider response is a 401 before any assistant events are emitted
411
+ - Added generation-aware snapshot metadata (`generation`, `serverNowMs`, `refresher`, and `rotatesInMs`) to auth-broker snapshot responses to support client-side credential-rotation planning
412
+ - Added `transport: "aery-native"` on `Model` and the matching `streamAeryNative` client. When `model.transport === "aery-native"`, `streamSimple` short-circuits the per-provider dispatch and POSTs the canonical `Context` to the auth-gateway's `POST /v1/aery/stream` endpoint. The response is SSE-framed `AssistantMessageEvent`s parsed by `readSseJson` and pushed verbatim into the local `AssistantMessageEventStream` — no wire-format translation, no partial-stripping reconstruction. Used by containerized aery installs (robomp slots, swarm extension, etc.) to route every LLM call through a credential-holding sidecar; the slot itself never sees the real provider tokens. Server-controlled fields (`apiKey`, `signal`, `fetch`, lifecycle callbacks, the provider-session map) are stripped from the wire body — `apiKey` rides in the `Authorization` header as the gateway bearer.
413
+ - Added `POST /v1/aery/stream` to the auth-gateway. Same auth + abort + model-resolution + codex-compat + prefix-cache plumbing as the foreign-wire routes; only the wire-format translation is skipped. Request body is `{ modelId, context, options?, stream? }` where `context` is the canonical aery-ai `Context` and `options` is `SimpleStreamOptions` with non-serializable fields stripped. Response is SSE-framed `AssistantMessageEvent` (terminated by `data: [DONE]`) when streaming, or `{ message: AssistantMessage }` JSON when `stream: false`.
414
+ - Added Vertex AI authentication via Google Application Default Credentials from `GOOGLE_APPLICATION_CREDENTIALS`, `~/.config/gcloud/application_default_credentials.json`, or metadata server tokens, with token caching and refresh skew control via `GOOGLE_VERTEX_REFRESH_SKEW_MS`
415
+ - Added support for Anthropic image message parts with `type: "url"` and `type: "file"` sources
416
+ - Added `stopSequences` and `frequencyPenalty` to shared stream options and wired them through to OpenAI request translation
417
+ - Added optional request cancellation support to auth-broker interactions by propagating `AbortSignal` into health, snapshot, usage, and refresh calls
418
+ - Added `AuthStorage.setConfigApiKey` / `removeConfigApiKey` / `clearConfigApiKeys` for config-sourced per-provider bearers (e.g. `models.yml` `providers.<name>.apiKey`). The new tier sits between runtime `--api-key` and stored credentials in `getApiKey`/`peekApiKey` resolution, so a bearer pinned in config now beats the broker's OAuth access token. Also suppresses OAuth `account_uuid` attribution when active, since outbound auth is the explicit config bearer, not OAuth. `describeCredentialSource` reports `"config override (models.yml)"` for visibility.
419
+ - Added per-model `additional_rate_limits` parsing to `openaiCodexUsageProvider`. The Codex `wham/usage` endpoint surfaces a separate `GPT-5.3-Codex-Spark` rate limit (`metered_feature: codex_bengalfox`) on Pro accounts; these now emit dedicated `openai-codex:spark:{primary,secondary}` `UsageLimit` entries with `scope.tier = "spark"`, mirroring how Anthropic exposes `anthropic:7d:sonnet` separately from the umbrella `anthropic:7d` bucket. The osx-widgets client already keyed spark detection off `limit.id.includes("spark")`; this populates that contract end-to-end.
420
+ - Added `GET /v1/usage` to the auth-broker API to expose aggregated usage reports from `AuthStorage.fetchUsageReports`
421
+ - Added auth-broker usage polling response handling that returns normalized usage reports plus generation timestamp for clients (5-min per-credential cache via `AuthStorage`)
422
+ - Added the auth-broker subsystem (`@aryee337/aery-ai/auth-broker`) for sharing OAuth credentials across machines without leaking refresh tokens.
423
+ - `startAuthBroker(...)` boots a `Bun.serve` HTTP server exposing `GET /v1/healthz`, `GET /v1/snapshot`, `POST /v1/credential` (upsert), `POST /v1/credential/:id/refresh`, and `POST /v1/credential/:id/disable`.
424
+ - `AuthBrokerClient` is the matching HTTP client used by remote clients.
425
+ - `RemoteAuthCredentialStore` is a client-side `AuthCredentialStore` that mirrors a broker snapshot in memory; mutating methods (`replace*`, `upsert*`, `delete*ForProvider`) throw because writes are server-side only.
426
+ - `AuthBrokerRefresher` is the background refresh loop that pre-refreshes credentials within `refreshSkewMs` and disables on definitive failure (`invalid_grant` / non-network 401-403).
427
+ - Added `AuthStorage.exportSnapshot()`, `AuthStorage.upsertCredential(provider, credential)`, `AuthStorage.forceRefreshCredentialById(id)`, and `AuthStorage.disableCredentialById(id, cause)` public methods consumed by the auth-broker server.
428
+ - Added `AuthStorageOptions.refreshOAuthCredential` override so a remote-store client can route every OAuth refresh through the broker instead of the local OAuth endpoint.
429
+ - Added `REMOTE_REFRESH_SENTINEL` (`"__remote__"`) — the wire placeholder substituted for OAuth refresh tokens in broker snapshots; clients never see the real refresh token.
430
+ - Exposed the OAuth provider catalog (`getOAuthProviders`, `OAuthProvider`, `OAuthProviderInfo`) and `refreshOAuthToken` through the package barrel so the coding-agent CLI can target them without reaching into `utils/oauth`.
431
+ - Added the auth-gateway subsystem (`@aryee337/aery-ai/auth-gateway`) — a forward-proxy that sits between unauthenticated clients (the macOS usage widget, llm-git, robomp containers, …) and the broker. Clients send standard provider-format requests; the gateway parses them into aery's canonical `Context`, dispatches through aery-ai's `streamSimple()`, and translates the canonical event stream back to the matching wire format. `Authorization` is injected server-side so access tokens never leave the gateway host. Wire surface:
432
+ - `GET /healthz` — unauth liveness.
433
+ - `GET /v1/usage` — aggregated provider usage; 5-min per-credential cache via `AuthStorage.fetchUsageReports`.
434
+ - `GET /v1/models` — model catalog (scoped to providers with credentials).
435
+ - `POST /v1/chat/completions` — OpenAI chat-completions in/out.
436
+ - `POST /v1/messages` — Anthropic messages in/out (text + thinking + tool_use blocks, SSE event taxonomy preserved).
437
+ - `POST /v1/responses` — OpenAI Responses in/out (reasoning items + function_call output items, SSE pass-through).
438
+ - Added exports from `@aryee337/aery-ai/auth-gateway`: `startAuthGateway`, `AuthGatewayServerOptions`, `AuthGatewayBootOptions`, `AuthGatewayServerHandle`, `ModelResolver`, `DEFAULT_AUTH_GATEWAY_BIND`. Per-format `parseRequest` / `encodeResponse` / `encodeStream` triples are reachable via the `./providers/*` subpath as `openai-chat-server`, `anthropic-messages-server`, and `openai-responses-server`.
439
+ - Added `listProvidersWithEnvKey()` to enumerate every provider with an env-var fallback (used by the new migrate command in coding-agent).
440
+
441
+ ### Changed
442
+
443
+ - Changed `GET /v1/snapshot` to support generation-based polling with `If-None-Match` and `wait` for long-poll updates and to return `304` when no snapshot changes are available
444
+ - Changed Bedrock credential resolution for streaming calls to prefer environment keys, AWS profile/SSO credentials, and IMDSv2 fallback when available
445
+ - Changed auth-gateway parsing for OpenAI chat-completions and Responses to ignore unsupported SDK-only fields instead of rejecting requests
446
+ - Changed auth-gateway protocol handling to include CORS headers on responses and support browser-origin requests
447
+ - Changed prompt-cache handling to resolve cache keys from request metadata and headers and preserve them through protocol translation
448
+ - Changed Anthropic messages parsing to forward request `metadata` through to downstream execution
449
+ - Changed usage report caching to use a 5-minute per-credential TTL with jittered refresh timing to reduce usage endpoint rate-limit collisions
450
+ - Changed usage polling failure handling so transient errors continue serving the last known report instead of returning null and dropping the credential from usage aggregates after cache expiry
451
+ - Changed `sanitizeSchemaForGoogle` to normalize snake_case schema keys (such as `any_of` and `additional_properties`) to camelCase and auto-generate `propertyOrdering` for multi-property objects
452
+ - Changed strict-mode sanitization to resolve `$ref` nodes with sibling keys by inlining and merging referenced local definitions
453
+ - Changed strict-mode sanitization to flatten single-entry `allOf` nodes and remove the `allOf` wrapper
454
+ - Changed Anthropic tool schema normalization to preserve supported metadata keywords such as `$ref`, `$defs`, `$schema`, `enum`, `const`, `default`, `title`, and `nullable` instead of stripping them
455
+ - Changed string schema processing to retain only supported `format` values (`date-time`, `time`, `date`, `duration`, `email`, `hostname`, `uri`, `ipv4`, `ipv6`, `uuid`) and demote unsupported `format` values to `description` hints
456
+
457
+ ### Fixed
458
+
459
+ - Fixed OAuth credential refresh flow so concurrent manual and background refreshes now share one in-flight attempt per credential, and `RemoteAuthCredentialStore` now re-synchronizes before using near-expiring OAuth credentials
460
+ - Fixed stale-credential handling after auth failures by waiting for updated broker snapshots and refreshing suspect credentials through broker endpoints before continuing
461
+ - Fixed Google Generative AI startup behavior to throw a clear API-key-required error when no key is configured
462
+ - Fixed AWS Bedrock image message serialization to preserve base64 `source.bytes` payloads instead of decoding and rebuilding them
463
+ - Fixed Google provider error handling to extract the API-reported `error.message` from JSON response bodies when available
464
+ - Fixed `RemoteAuthCredentialStore.getUsageReport` to return the matching credential-specific usage report and coalesce parallel callers into one broker `/v1/usage` fetch
465
+ - Fixed auth-broker credential upload validation to reject the remote refresh-token sentinel and prevent storing a non-refresh value
466
+ - Fixed OpenAI Responses streaming output to emit `reasoning_summary_text` events and parse/send `summary_text` reasoning payloads
467
+ - Fixed Anthropic stop-sequence handling by trimming requests to the API limit of four entries before forwarding
468
+ - Fixed prompt caching behavior across protocol translations so cached-token usage is preserved when Anthropic and OpenAI requests are routed through each other
469
+ - Fixed Claude usage fetching to retry transient `429` and `5xx` responses with exponential backoff, respecting `Retry-After` before returning failure
470
+ - Fixed auth-gateway request translation to preserve OpenAI Responses string/system message content, reasoning replay payloads, completed item text in stream item-done events, Anthropic tool-result ordering, and OpenAI Chat/Responses cached-token usage totals
471
+ - Fixed auth-gateway failure handling so unsupported request controls, upstream terminal errors, non-streaming aborts, and already-aborted client requests fail explicitly instead of being accepted, ignored, or encoded as successful HTTP 200 responses
472
+ - Fixed Gemini CLI / Antigravity tool schema normalization to run the full Cloud Code Assist pipeline, matching shared Google schema handling for union/object merging and nullable extraction
473
+ - Fixed stripped validation hints to be preserved as description spill text (`{key: value}` blocks) when `normalizeSchemaForGoogle` and `normalizeSchemaForCCA` drop unsupported schema keywords
474
+ - Fixed `sanitizeSchemaForGoogle` to collapse nullability forms (`type:'null'` and null-bearing `anyOf` variants) into `nullable` while preserving remaining variants
475
+ - Fixed `sanitizeSchemaForGoogle` to inline local `$defs` references instead of dropping `$ref`/`$defs` structure during Google schema sanitization
476
+ - Fixed `normalizeAnthropicToolSchema` to handle self-referential schemas without infinite recursion
477
+ - Fixed object schema normalization so explicit open-map declarations (`additionalProperties: true` and schema-valued `additionalProperties`) are preserved instead of being converted to closed objects
478
+ - Fixed unsupported schema constraints on arrays and strings (`maxItems`, `uniqueItems`, `pattern`, `minLength`, `maxLength`, and `minItems` when greater than 1) by demoting them into `description` rather than dropping them
479
+
480
+ ### Security
481
+
482
+ - Hardened auth-gateway bearer-token checks with constant-time comparison to avoid timing-side-channel leaks
483
+
484
+ ## [15.1.2] - 2026-05-15
485
+
486
+ ### Breaking Changes
487
+
488
+ - Rejected draft-07 tuple and dependency keywords (`items` arrays, `dependencies`, `additionalItems`) in JSON Schema validation
489
+
490
+ ### Added
491
+
492
+ - Added `responseHeaders`, `responseStatus`, and `responseRequestId` fields to `MockResponse` so mock providers can provide synthetic `ProviderResponseMetadata`
493
+ - Added `onResponse` metadata emission for mocks that sends lowercased headers and a default status of 200 before streaming when response headers are configured
494
+ - Added recursive strict-mode sanitization for array `prefixItems` entries so tuple schemas now enforce object constraints per item
495
+
496
+ ### Changed
497
+
498
+ - Normalized legacy draft-07 JSON Schema constructs used in tool parameters (`items` arrays, `additionalItems`, `definitions`, `dependencies`) to draft 2020-12 before OpenAI/Google/CCA sanitization, wire conversion, and argument validation
499
+ - Reworked OpenAI response schema adaptation to rewrite `oneOf` into `anyOf` while preserving existing `anyOf` branches
500
+ - Changed tuple array validation to validate per-index schemas from `prefixItems` and apply `items` only to remaining elements
501
+
502
+ ### Fixed
503
+
504
+ - Fixed validation of plain JSON Schema tool arguments that omitted a `$schema` URI so draft-07-shaped schemas now pass validation instead of being rejected
505
+ - Fixed tuple-array validation for legacy JSON Schema tool schemas to enforce `additionalItems: false` and per-position constraints after automatic draft upgrade
506
+ - Fixed Anthropic tool schema normalization to recurse into `prefixItems` so unsupported constraints inside tuple items are stripped in the generated input schema
507
+ - Fixed Anthropic tool-schema normalization stripping the body of explicit open `additionalProperties` (e.g. Zod's `z.record(z.string(), z.unknown())` compiling to `additionalProperties: {}`) by unconditionally overwriting it with `false`, which closed record-style fields and prevented models from supplying any key. The coding-agent's `resolve` tool exposes plan-approval titles via such a field, so Kimi K2 (and any other Anthropic-shaped provider) could not pass `extra: { title }`, blocking plan mode entirely ([#1104](https://github.com/eminent337/aery/issues/1104))
508
+ - Fixed Anthropic strict tool planning to leave tools with open `additionalProperties` maps non-strict instead of sending schemas Anthropic rejects.
509
+
510
+ ## [15.1.0] - 2026-05-15
511
+
512
+ ### Breaking Changes
513
+
514
+ - Removed TypeBox root exports (`Type`, `Static`, and `TSchema`) from the package entrypoint, so callers importing those symbols from `@aryee337/aery-ai` must migrate to `zod` or `@aryee337/aery-ai/types`
515
+
516
+ ### Added
517
+
518
+ - Added support for defining tool schemas with Zod (`z.object`, `z.string`, etc.) by allowing `Tool.parameters` to be either Zod schemas or legacy JSON Schema objects and converting them to provider wire format automatically
519
+ - Added package-level schema helpers in the `zod/v4` style by exporting `z` and `ZodType` from the root entrypoint
520
+ - Added a `mock` API provider via `createMockModel` to build `Model<"mock">` instances for fully in-memory, deterministic assistant streams in tests
521
+ - Added `streamMock` and `registerMockApi` so mock responses can be consumed through `stream()` and the global custom API registry without an external model backend
522
+ - Added async/sync response scripting with optional context-based handlers, and new `push()`/`reset()` controls to drive multi-turn mock interactions and inspect per-call invocation state
523
+ - Added support in mock responses for simulating tool calls, usage metadata, custom stop reasons, delayed emissions, and terminal error/aborted outcomes
524
+
525
+ ### Changed
526
+
527
+ - Changed Azure OpenAI Responses tool schema conversion to sanitize tool parameter schemas and rewrite `oneOf` branches as `anyOf` so tool calls remain compatible with Azure's schema expectations
528
+ - Changed `Static<S>` to extract a schema object’s `static` type when present, improving inferred tool argument types for non-Zod parameter definitions
529
+ - Changed `Static` typing behavior so it now infers argument types from Zod schemas and defaults to `unknown` for non-Zod JSON Schema parameter definitions
530
+ - Restored the default steady-state stream idle timeout to 120s (regressed in 15.0.0). 30s was too aggressive for reasoning models, slow proxies, and tool-call planning gaps, surfacing as repeated `Provider stream stalled while waiting for the next event` errors. Existing `PI_STREAM_IDLE_TIMEOUT_MS` / `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` overrides are unchanged.
531
+
532
+ ### Fixed
533
+
534
+ - Preserved top-level unknown fields in validated tool-call arguments so extra root properties are retained after schema coercion
535
+ - Fixed coercion for Zod `record` fields by parsing JSON-stringified record arguments into objects
536
+ - Validated legacy draft-07 JSON Schema tool parameters directly instead of converting through Zod, improving support for features like `$ref`, `definitions`, `nullable`, and `uniqueItems`
537
+ - Fixed Cloud Code Assist schema preparation to strip unsupported `propertyNames` and fall back to a minimal tool schema when schema meta-validation detects malformed keywords
538
+ - Fixed OpenAI Completions streaming to avoid treating non-output chunks (including role-only preambles) as progress events so idle-timeout watchdog behavior no longer hangs on no-op streamed chunks
539
+ - Fixed Cloud Code Assist schema compatibility checks by replacing strict AJV meta-schema validation with structural JSON Schema validation to avoid rejecting structurally valid tool schemas
540
+ - Fixed lazy built-in provider streams (`anthropic-messages`, `bedrock-converse-stream`, `cursor-agent`, `google-*`, `ollama-chat`, `openai-*`) prematurely aborting slow first-token responses with `Provider stream stalled while waiting for the next event`. The lazy-stream watchdog wrapper was treating the synthetic `start` event (yielded immediately by every provider before the model emits any tokens) as the first real item, which caused the watchdog to drop from `firstItemTimeoutMs` (100s) to `idleTimeoutMs` (30s) before the upstream model had produced anything. The shared `iterateWithIdleTimeout` now keeps `awaitingFirstItem` true until a real progress item arrives, and the lazy-stream wrapper marks `start` as a non-progress keepalive ([#1073](https://github.com/eminent337/aery/pull/1073) regression).
541
+ - Heal leaked Kimi K2 chat-template tool-call tokens (`<|tool_calls_section_begin|>` … `<|tool_call_argument_begin|>` … `<|tool_calls_section_end|>`) that some hosts (native `kimi-code` API, OpenRouter, Fireworks, etc.) emit into `delta.content` instead of structured `tool_calls`. The OpenAI-completions stream consumer now strips the markers from visible text, reconstructs the embedded calls as proper `toolCall` content blocks (stream-aware, token-boundary-safe), and promotes `finish_reason: stop` to `toolUse` when calls were healed.
542
+ - Fixed OpenAI-completions Kimi K2 healed-call promotion clobbering non-stop terminal finish reasons (`error`, `length`, `aborted`); promotion now only fires when the prior stop reason is the natural-completion `stop`
543
+ - Fixed OpenAI-completions duplicate Kimi tool calls when a single chunk delivers both leaked markers and a structured `delta.tool_calls`; the healer now strips visible markers but discards its synthesized calls so structured payloads remain the single source of truth
544
+ - Fixed Kimi tool-call healer synthesizing a bogus empty call when assistant text mentions a literal `<|tool_call_end|>` (or `<|tool_call_begin|>` / `<|tool_call_argument_begin|>`) outside an active `<|tool_calls_section_begin|>…<|tool_calls_section_end|>` section; the tokens now survive as text
545
+ - Fixed OpenAI-completions ignoring per-request `StreamOptions.streamFirstEventTimeoutMs` when configuring the underlying OpenAI SDK HTTP timeout, causing slow-before-headers providers to be aborted at the env default before the wrapping watchdog armed
546
+ - Fixed JSON Schema validator silently accepting values that violate `propertyNames`, `patternProperties`, `dependentRequired`, `dependencies`, `if`/`then`/`else`, `contains`, and `prefixItems`; the in-tree validator now enforces these keywords instead of falling through. `unevaluatedProperties`/`unevaluatedItems` remain permissive but log a one-time warning so tool authors are not surprised.
547
+ - Fixed recursive `$ref` schemas being treated as universally valid: the validator previously short-circuited on the second occurrence of any ref it had already seen, so nested values violating the referenced sub-schema passed. Cycle detection now keys on (ref, value-identity) pairs with a depth cap for primitive values, so genuine sub-tree violations are still caught.
548
+ - Fixed JSON Schema meta-validator accepting malformed `if`/`then`/`else` and `dependencies` keywords; each conditional sub-schema is now structurally validated and draft-07 `dependencies` accepts either a schema or a string array of dependent keys.
549
+ - Fixed Zod-emitted wire schemas dropping null-valued unknown root fields before `preserveUnknownRootFields` could snapshot them, so callers like `task.simple` no longer lose a `schema: null` argument and downstream rejection paths fire as intended.
550
+ - Fixed mock provider partial `Usage` to recompute `totalTokens` (and `cost.total` when cost components are supplied) when omitted, instead of reporting 0
551
+ - Fixed mock provider auto-generated tool-call IDs to use a per-instance counter (now reset by `reset()`), so test order no longer affects IDs across `createMockModel()` instances
552
+
553
+ ## [15.0.2] - 2026-05-15
554
+
555
+ ### Fixed
556
+
557
+ - Fixed `StreamOptions.fetch` typing to accept fetch-compatible override functions that do not expose `preconnect`, allowing custom fetch implementations to be used without type errors across runtimes
558
+ - Fixed Moonshot Kimi K2.6 forced tool calls to send `thinking: { type: "disabled" }`, avoiding `tool_choice 'specified' is incompatible with thinking enabled` 400s while preserving the requested named tool ([#1077](https://github.com/eminent337/aery/issues/1077)).
559
+
560
+ ## [15.0.1] - 2026-05-14
561
+
562
+ ### Breaking Changes
563
+
564
+ - Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
565
+
566
+ ### Added
567
+
568
+ - Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
569
+ - Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `aery stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
570
+
571
+ ## [15.0.0] - 2026-05-13
572
+
573
+ ### Added
574
+
575
+ - Added `AuthStorage.onCredentialDisabled(listener)` — a multi-subscriber `on/off` API for `credential_disabled` events. Returns an unsubscribe function; calling it more than once is a no-op. Multiple subscribers all receive every disable event, with synchronous and async exceptions isolated per-listener so a misbehaving subscriber cannot starve the rest of the chain. Buffer-and-replay semantics are preserved: events emitted while no listener is subscribed are buffered (FIFO, capped at 32) and replayed once to the listener that triggers the empty→non-empty transition. After every subscriber unsubscribes, subsequent disable events buffer again until the next subscribe.
576
+
577
+ ### Fixed
578
+
579
+ - Fixed OAuth credentials being silently disabled when two aery processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
580
+ - Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
581
+ - Lazy built-in provider streams now enforce the shared idle watchdog and abort stalled provider requests, so session auto-retry can continue after transient network drops instead of remaining stuck. Caller aborts still terminate as aborted.
582
+
583
+ ### Changed
584
+
585
+ - Lowered the default steady-state stream idle timeout from 120s to 30s while preserving the existing environment overrides.
586
+
587
+ ## [14.9.3] - 2026-05-10
588
+
589
+ ### Fixed
590
+
591
+ - Anthropic provider now retries generic transient connect failures (`unable to connect`, `fetch failed`, `connection error`, etc.) by falling back to the shared `isRetryableError` allowlist after the provider-specific patterns. Previously these errors bypassed the hand-curated regex in `isProviderRetryableError` and aborted the stream on the first attempt, while the OpenAI SDK and Codex `fetchWithRetry` paths already handled them.
592
+
593
+ ## [14.9.0] - 2026-05-10
594
+
595
+ ### Fixed
596
+
597
+ - Fixed silent forwarding of image content (for example Python plot output rendered in the terminal) to models without vision support, which produced opaque 404 errors from upstream. Image blocks are now stripped and replaced with a `[image omitted: model does not support vision]` placeholder for non-vision models, including tool-result payloads ([#967](https://github.com/eminent337/aery/issues/967), [#968](https://github.com/eminent337/aery/issues/968)).
598
+
599
+ - Added `AuthStorage` `onCredentialDisabled` callback (sync or async) so embedders can react when a credential is automatically disabled (e.g. OAuth refresh fails with `invalid_grant`) — useful for surfacing a banner or auto-launching a re-login flow instead of letting the credential silently disappear. Sync throws and async rejections are both caught and logged so a misbehaving subscriber cannot break the disable path.
600
+ - Added Anthropic OAuth `account.uuid` and `account.email_address` extraction from the `/v1/oauth/token` exchange and refresh responses; both `AnthropicOAuthFlow.exchangeToken()` and `refreshAnthropicToken()` now populate `OAuthCredentials.{accountId, email}` so downstream consumers can attribute requests to the authenticated account without a separate `/api/oauth/profile` round-trip.
601
+ - Added `onSseEvent` stream diagnostics so HTTP SSE providers can expose raw SSE frames without changing parsed model output.
602
+ - Added `streamIdleTimeoutMs` option (and `PI_STREAM_IDLE_TIMEOUT_MS` env override; `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS` remains a backward-compatible alias) for a steady-state inter-event watchdog. Set to `0` to disable.
603
+ - Added a semantic-progress predicate to OpenAI Responses and Codex SSE/WebSocket transports so `response.in_progress`-style keepalives no longer reset the idle deadline on stalled tool calls.
604
+
605
+ ### Changed
606
+
607
+ - Anthropic streams now enforce a steady-state idle timeout (defaults to 120s, same control as `PI_STREAM_IDLE_TIMEOUT_MS`) in addition to the first-event watchdog. Long-running responses that go fully silent between events will now surface as `Anthropic stream stalled while waiting for the next event` instead of hanging.
608
+ - Fixed `resolveAnthropicMetadataUserId()` to accept JSON-format `user_id` values that match real Claude Code's payload shape (`{ device_id, account_uuid, session_id, ... }` from `services/api/claude.ts:getAPIMetadata`). Previously only the synthetic `user_<hex>_account_<uuid>_session_<uuid>` cloaking format was accepted on OAuth, which caused stable session-keyed metadata supplied by callers to be discarded and replaced with fresh random entropy on every request — defeating session-count attribution on the Claude OAuth path.
609
+
610
+ ## [14.8.0] - 2026-05-09
611
+
612
+ ### Fixed
613
+
614
+ - Fixed Gemini 3 Pro thinking metadata so `medium` effort is rejected with the expected error instead of being silently accepted: `ThinkingConfig` now carries an optional explicit `levels` list that survives `expandEffortRange`, letting non-contiguous supported sets (e.g. `[low, high]`) round-trip through enrichment.
615
+ - Fixed Kimi Code OAuth expiry handling to refresh access tokens 5 minutes before server expiry, avoiding daily 401s from using tokens right up to the cutoff.
616
+ - Fixed OpenAI Responses custom tool replay to preserve custom tool call item IDs with the `ctc_` prefix instead of rewriting them as `fc_` function-call IDs ([#977](https://github.com/eminent337/aery/issues/977)).
617
+
618
+ ## [14.7.6] - 2026-05-07
619
+
620
+ ### Added
621
+
622
+ - Added `hideThinkingSummary` option to `SimpleStreamOptions`. When true, `streamSimple` requests that the underlying provider omit reasoning/thinking summaries: Anthropic receives `thinking.display = "omitted"` (where supported), and OpenAI Responses / Azure / Codex providers leave `reasoning.summary` unset so the server skips emitting the human-readable summary stream entirely.
623
+
624
+ ### Changed
625
+
626
+ - Changed OpenAI Responses, Azure OpenAI Responses, and OpenAI Codex providers to omit `reasoning.summary` from requests when `reasoningSummary` is explicitly `null` (previously fell back to `"auto"`).
627
+
628
+ ## [14.7.5] - 2026-05-07
629
+
630
+ ### Added
631
+
632
+ - Added `OpenAICompat.supportsMultipleSystemMessages` so chat-completions hosts can opt out of separate leading system blocks. Auto-detected as `true` for OpenAI, Azure, OpenRouter, Cerebras, Together, Fireworks, Groq, DeepSeek, Mistral, xAI, Z.ai, GitHub Copilot, and Zenmux; `false` for MiniMax, Alibaba Dashscope, and Qwen Portal whose chat templates reject follow-up system messages. Unknown OpenAI-compatible hosts (custom vLLM/local) default to `false`; users can opt back in via `compat.supportsMultipleSystemMessages: true`.
633
+
634
+ ### Fixed
635
+
636
+ - Fixed strict-template OpenAI-compatible hosts (e.g. Qwen 3.5+ via vLLM, MiniMax) rejecting follow-up `system`/`developer` messages by coalescing ordered system prompts into a single block joined by `\n\n` when `compat.supportsMultipleSystemMessages` is false. Canonical hosts continue to receive separate blocks so KV-cache reuse stays effective when only the trailing prompt changes ([#958](https://github.com/eminent337/aery/issues/958)).
637
+
638
+ ## [14.7.2] - 2026-05-06
639
+
640
+ ### Fixed
641
+
642
+ - Fixed VLLM model discovery to use `max_model_len` as the context window when the endpoint reports it.
643
+ - Fixed custom Ollama Cloud/local-proxy model aliases (for example `deepseek-v4-pro:cloud`) to inherit bundled cache-pricing metadata when the upstream model is known ([#937](https://github.com/eminent337/aery/issues/937)).
644
+ - Fixed local Ollama model discovery to apply `/api/show` thinking and vision capabilities in addition to native context windows ([#928](https://github.com/eminent337/aery/issues/928)).
645
+
646
+ ## [14.7.0] - 2026-05-04
647
+
648
+ ### Breaking Changes
649
+
650
+ - Changed `Context.systemPrompt` from a string to `string[]`, so callers must now pass an array of prompts instead of a single string
651
+ - Changed behavior will throw at runtime for non-array system prompts because request builders now normalize system prompts as an array
652
+
653
+ ### Added
654
+
655
+ - Added support for multiple system prompts by changing `Context.systemPrompt` to an ordered string array and preserving provider-appropriate instruction precedence
656
+
657
+ ### Changed
658
+
659
+ - Changed request builders for Anthropic, OpenAI, Bedrock, Azure, Cursor, Google, and Ollama to propagate every non-empty system prompt entry without demoting durable instructions into ordinary conversation turns
660
+
661
+ ### Fixed
662
+
663
+ - Filtered out empty normalized system prompts so blank entries are no longer sent to providers
664
+ - Removed blank system prompt strings from provider payloads to avoid unnecessary empty instruction messages
665
+
666
+ ## [14.6.6] - 2026-05-04
667
+
668
+ ### Added
669
+
670
+ - Added always-on OpenRouter response caching (1h TTL) by sending `X-OpenRouter-Cache: true` and `X-OpenRouter-Cache-TTL: 3600` on every OpenRouter request — identical requests replay from OpenRouter's edge cache for free. https://openrouter.ai/docs/features/response-caching
671
+
672
+ ## [14.6.4] - 2026-05-03
673
+
674
+ ### Fixed
675
+
676
+ - Fixed OpenAI Codex websocket continuations to retry with full context when `previous_response_id` expires server-side instead of surfacing `previous_response_not_found`.
677
+
678
+ ## [14.6.2] - 2026-05-03
679
+
680
+ ### Added
681
+
682
+ - Added `EventStream.fail(err)` method to terminate the async iterator with an error, enabling consumers to catch stream-level failures via `for await` without hanging
683
+
684
+ ### Fixed
685
+
686
+ - Fixed OpenAI Responses tool schema conversion to rewrite non-strict `oneOf` unions to `anyOf` before sending tools to the Responses API ([#920](https://github.com/eminent337/aery/issues/920))
687
+
688
+ ## [14.6.0] - 2026-05-02
689
+
690
+ ### Added
691
+
692
+ - Added `disableReasoning` to stream and OpenAI completion options to force reasoning off for models that support it, sending `reasoning: { enabled: false }` for OpenRouter-compatible requests
693
+ - Added `thinkingDisplay` option to Anthropic options to control whether adaptive and explicit reasoning is returned as `summarized` or `omitted`
694
+ - Added Anthropic model compatibility flags `supportsEagerToolInputStreaming` and `supportsLongCacheRetention` for API-capability-specific request behavior
695
+
696
+ ### Changed
697
+
698
+ - Changed Anthropic request payloads to send `thinking: { type: "disabled" }` when `thinkingEnabled` is explicitly `false` on reasoning-enabled models
699
+ - Changed Anthropic cache retention handling so `cacheRetention: "long"` now uses `ttl: "1h"` only for canonical Anthropic endpoints with long-cache support
700
+ - Changed Anthropic tool schema generation to include `eager_input_streaming` only on models that advertise support
701
+ - Changed Anthropic OAuth login flow to include browser fallback guidance and richer error context when token exchange or refresh fails
702
+
703
+ ### Fixed
704
+
705
+ - Fixed Anthropic non-thinking requests to include the caller-provided `temperature` value in request payloads
706
+ - Fixed Anthropic `claude-opus-4-7` non-thinking payloads to omit sampling fields (`temperature`, `top_p`, and `top_k`)
707
+ - Fixed OpenAI Codex base URL normalization so configured base URLs with or without `/codex` or `/codex/responses` now resolve to `/codex/responses`
708
+ - Fixed OpenAI Codex websocket handling to parse JSON from non-string message payloads including `ArrayBuffer`, typed arrays, and `Blob` values
709
+ - Fixed OpenAI Codex websocket handshakes to replace stale `openai-beta` values with the websocket beta and avoid sending request-body headers over websocket transport
710
+ - Fixed abort tracking so caller-initiated cancellations are treated as user aborts even after local watchdog timeouts, preventing unintended automatic retries
711
+ - Fixed Anthropic stream handling to parse raw SSE envelopes directly, ignore unrelated events, and repair malformed JSON in SSE payloads
712
+ - Fixed Anthropic streaming to emit an explicit error when the SSE stream ends without a `message_stop` event
713
+ - Fixed OpenAI Codex websocket continuations to send true `previous_response_id` deltas for `store: false` transcripts, expose request stats, and default text verbosity to `low` unless explicitly overridden.
714
+ - Fixed OpenAI Codex websocket append reuse after `response.completed` terminal events.
715
+
716
+ ## [14.5.14] - 2026-05-01
717
+
718
+ ### Added
719
+
720
+ - Added package-level `google-gemini-headers` exports (`getGeminiCliHeaders`, `getGeminiCliUserAgent`, `getAntigravityHeaders`, `extractRetryDelay`, and `ANTIGRAVITY_SYSTEM_INSTRUCTION`) for header and retry handling reuse without importing full Google providers
721
+
722
+ ### Changed
723
+
724
+ - Changed package exports and streaming/provider wiring to load heavy Google/Kimi/GitLab/synthetic provider modules lazily through `register-builtins`, reducing startup import overhead from optional provider SDKs
725
+
726
+ ### Fixed
727
+
728
+ - Fixed DeepSeek V4 tool-call follow-up 400 errors from three root causes:
729
+ - Mapped `reasoning_effort` "xhigh" to "max" for DeepSeek-family models on any provider (NVIDIA, OpenCode-Go, etc.), not just `deepseek`
730
+ - Recovered `reasoning_content` from thinking blocks with valid signatures that were filtered by the non-empty-text check
731
+ - Added empty-string fallback when `reasoning_content` is genuinely absent (e.g. proxy-stripped) but the provider requires the field
732
+
733
+ ## [14.5.13] - 2026-05-01
734
+
735
+ ### Breaking Changes
736
+
737
+ - Removed `utils/oauth` re-exports from the package entrypoint, so OAuth helper imports from the root module must be updated
738
+
739
+ ## [14.5.10] - 2026-04-30
740
+
741
+ ### Added
742
+
743
+ - Added provider response metadata callbacks for Anthropic and OpenAI streaming requests.
744
+
745
+ ## [14.5.9] - 2026-04-30
746
+
747
+ ### Added
748
+
749
+ - Added `usage.reasoningTokens` to OpenAI and Google usage output when providers report reasoning/thinking tokens
750
+ - Added `usage.cttl.ephemeral5m` and `usage.cttl.ephemeral1h` to report Anthropic cache-write TTL token buckets
751
+ - Added `usage.server.webSearch` and `usage.server.webFetch` to report Anthropic server tool-call request counts
752
+
753
+ ### Fixed
754
+
755
+ - Fixed OpenAI usage attribution to avoid double-counting `reasoning_tokens` in output totals
756
+ - Fixed Anthropic streaming usage handling so a previously populated cache TTL breakdown is preserved when later events omit `cache_creation`
757
+
758
+ ## [14.5.4] - 2026-04-28
759
+
760
+ ### Changed
761
+
762
+ - Changed OpenAI custom Lark grammar payloads to strip comments and blank lines before sending provider requests.
763
+
764
+ ### Fixed
765
+
766
+ - Fixed OpenAI Codex GPT model pricing by inheriting matching OpenAI catalog rates for zero-priced discovered Codex entries.
767
+
768
+ ## [14.5.3] - 2026-04-27
769
+
770
+ ### Added
771
+
772
+ - Added `fireworks` as a supported provider with API key login flow and credential storage
773
+ - Added Fireworks model catalog support with `fireworks`-scoped openai-completions models `glm-5`, `glm-5.1`, `kimi-k2.5`, `kimi-k2.6`, and `minimax-m2.7`
774
+ - Added built-in discovery wiring so providers with base URL `api.fireworks.ai` are recognized as OpenAI-compatible and can use streaming token control
775
+
776
+ ### Changed
777
+
778
+ - Updated the built-in model catalog to use corrected `contextWindow` and `maxTokens` values for many existing models instead of placeholder limits
779
+ - Updated several model cost entries, including cache-read pricing, to corrected values
780
+
781
+ ### Fixed
782
+
783
+ - Fixed Fireworks request formatting by translating between public model IDs and API wire IDs when sending OpenAI-completions requests
784
+ - Fixed OpenAI-compatible model parameter handling for Fireworks by allowing `max_tokens` to be sent during requests
785
+
786
+ ## [14.5.1] - 2026-04-26
787
+
788
+ ### Fixed
789
+
790
+ - Fixed NVIDIA NIM DeepSeek-V4 models leaking chat-template tool-call markers (e.g. `<|DSML|tool_calls|>`) into visible response text by stripping the special tokens from streamed `delta.content` ([#798](https://github.com/eminent337/aery/issues/798))
791
+
792
+ ## [14.4.0] - 2026-04-26
793
+
794
+ ### Added
795
+
796
+ - Added an `examples` option to `StringEnum` to include example values in the generated schema
797
+
798
+ ### Changed
799
+
800
+ - Changed Anthropic tool schema generation to strip unsupported schema fields (including `patternProperties`), add `additionalProperties: false` for object types, and apply Anthropic strict-mode limits when marking tools as strict
801
+ - Changed Anthropic strict tool planning to cap strict `tools` at twenty entries and convert excess optional/union parameters to nullable schemas to stay within provider constraints
802
+
803
+ ### Fixed
804
+
805
+ - Fixed Anthropic tool schema compilation failures by keeping the `write` tool out of the strict-tool allowlist when the full coding-agent tool set is active
806
+ - Fixed Anthropic 400 `tools.*.custom: For 'object' type, property 'minItems' is not supported` by stripping `minItems` from object-shaped JSON schema nodes (array nodes still keep supported `minItems` values)
807
+ - Fixed Anthropic tool schemas that used tuple-style arrays by stripping unsupported `maxItems` and only preserving provider-supported `minItems` values
808
+ - Fixed Anthropic and OpenRouter Anthropic tool calls that previously failed with `compiled grammar is too large` by retrying automatically without strict tool schemas and reusing non-strict mode for subsequent requests in the same provider session
809
+ - Fixed parsing of JSON tool arguments containing raw control characters inside string values (such as embedded newlines) by escaping them before JSON parsing
810
+ - Fixed `validateToolArguments` to accept stringified objects and arrays that include literal control characters inside string fields
811
+ - Fixed OpenAI Codex Spark OAuth selection to fall back to non-Pro accounts when no ChatGPT Pro account is connected, so users without a Pro account can still attempt Spark requests in case the server permits access.
812
+
813
+ ## [14.3.0] - 2026-04-25
814
+
815
+ ### Added
816
+
817
+ - Added support for Claude Opus 4.7 (`claude-opus-4-7`) model ([#726](https://github.com/eminent337/aery/issues/726))
818
+ - Suppresses sampling parameters (temperature/top_p/top_k) that Opus 4.7 rejects
819
+ - Enables `display: "summarized"` for adaptive thinking to restore visible thinking content
820
+
821
+ ### Fixed
822
+
823
+ - Fixed Cursor provider losing conversation history on follow-up turns (model responding "this appears to be the start of our session") by populating `ConversationStateStructure.rootPromptMessagesJson` with JSON blob IDs for the system prompt plus prior user/assistant/tool-result messages. Cursor's server builds the model prompt from `rootPromptMessagesJson`, not from the protobuf `turns[]` tree, so sending only the system prompt there caused prior turns to be dropped
824
+ - Fixed Cursor provider multi-turn conversations failing with `Connect error internal: Blob not found` on the second message by storing `ConversationStateStructure.turns`, `AgentConversationTurnStructure.user_message`, and `AgentConversationTurnStructure.steps` as content-addressed blob IDs in the KV store (matching the existing handling for `rootPromptMessagesJson`) rather than sending the raw serialized bytes inline ([#678](https://github.com/eminent337/aery/issues/678))
825
+
826
+ ## [14.2.1] - 2026-04-24
827
+
828
+ ### Fixed
829
+
830
+ - Fixed OpenAI Codex Spark OAuth selection to require a verified ChatGPT Pro account instead of falling back to Plus or unknown-plan accounts.
831
+
832
+ ## [14.2.0] - 2026-04-23
833
+
834
+ ### Added
835
+
836
+ - Added `gpt-5.5` to the built-in model catalog for both OpenAI Responses (`openai`) and local `litellm` (`openai-completions`) providers
837
+ - Added `gpt-image-2` to the `litellm` built-in model catalog
838
+ - Added `isCopilotTransientModelError()` and `callWithCopilotModelRetry()` helpers in `utils/retry` that detect GitHub Copilot's intermittent `HTTP 400 model_not_supported` responses for preview models (`gpt-5.3-codex`, `gpt-5.4`, `gpt-5.4-mini`, ...) and retry the request up to three times with backoff. OpenAI Responses, OpenAI Completions, and Anthropic provider paths now participate in this retry when the model is served through Copilot.
839
+ - Added OpenAI Responses custom-tool grammar support for Codex-style `apply_patch` calls, including freeform streaming, history replay, and forced tool-choice mapping to the custom wire name.
840
+
841
+ ### Changed
842
+
843
+ - Updated built-in model metadata with revised `contextWindow`, `maxTokens`, and pricing values for existing entries
844
+ - Changed generated model policies to assign `applyPatchToolType: "freeform"` for first-party GPT-5 OpenAI Responses and Codex models, so regenerated `models.json` preserves the `apply_patch` custom-tool metadata.
845
+ - Renamed `rewriteCopilotAuthError` to `rewriteCopilotError` and extended it to rewrite `HTTP 400 model_not_supported` after retries are exhausted with guidance about Copilot's OAuth-client-specific rollout gap (see opencode#13313).
846
+
847
+ ### Fixed
848
+
849
+ - Fixed Amazon Bedrock proxy handling to honor lowercase `http_proxy`, `https_proxy`, and `all_proxy` environment variables when using HTTP/1 fallback
850
+ - Fixed Amazon Bedrock streaming behind corporate HTTP proxies by using a proxy-aware HTTP/1 transport when `HTTPS_PROXY`, `HTTP_PROXY`, or `ALL_PROXY` is configured, including AWS SSO credential calls.
851
+ - Fixed Amazon Bedrock requests to retry once with HTTP/1 when the AWS SDK's default HTTP/2 transport fails before streaming begins.
852
+ - Fixed OpenAI Responses streaming to display thinking tokens from local providers (llama.cpp, etc.) that send raw `reasoning_text.delta` events and empty `summary` arrays in `output_item.done`. Previously, thinking content was silently dropped during streaming while non-streaming mode worked correctly.
853
+ - Synced the bundled OpenCode Go catalog with the current docs so `kimi-k2.6`, `mimo-v2.5`, and `mimo-v2.5-pro` appear in offline/default model lists.
854
+
855
+ ## [14.1.3] - 2026-04-17
856
+
857
+ ### Fixed
858
+
859
+ - Preserved user-provided `session_id` and `x-client-request-id` headers in OpenAI Responses requests instead of overriding them with automatic session-derived values
860
+ - Stopped sending `session_id` and `x-client-request-id` headers for OpenAI Responses requests when `cacheRetention` is set to `none`
861
+ - Fixed direct OpenAI Responses requests to send `session_id` and `x-client-request-id` from the same session-derived value as `prompt_cache_key`, improving prompt cache affinity for append-only sessions
862
+
863
+ ## [14.1.1] - 2026-04-14
864
+
865
+ ### Added
866
+
867
+ - Added `toolStrictMode` compatibility option (`"all_strict"` or `"none"`) to OpenAI-compatible model config to force tool schemas to be sent uniformly strict, uniformly non-strict, or keep mixed per-tool behavior
868
+
869
+ ### Changed
870
+
871
+ - Changed Cerebras OpenAI-compatible providers to default `toolStrictMode` to `"all_strict"` unless explicitly overridden
872
+
873
+ ### Fixed
874
+
875
+ - Fixed OpenAI Completions handling for providers that reject mixed `strict` flags by automatically retrying with non-strict tool schemas when an initial all-strict tool request fails with strict-format 400/422 errors
876
+ - Fixed OpenAI-completions error reporting by including captured JSON error body details such as type, param, and code when a request fails without a body in the thrown SDK error
877
+ - Fixed shell execution failure responses to preserve all result fields when sanitizing, preventing truncated metadata in stream results
878
+ - Fixed context overflow detection to recognize `model_context_window_exceeded` from z.ai / GLM providers, preventing infinite retry loops when context window is exceeded ([#638](https://github.com/eminent337/aery/issues/638))
879
+ - Fixed strict tool schema enforcement to preserve `additionalProperties: false` and required keys for reused nested object schemas, preventing invalid `todo_write` function schemas in Codex/OpenAI requests
880
+ - Fixed GitHub Copilot reasoning regressions by preserving GPT-5.x / Claude 4.x reasoning controls instead of stripping them from requests ([#773](https://github.com/eminent337/aery/issues/773))
881
+
882
+ ## [14.1.0] - 2026-04-11
883
+
884
+ ### Added
885
+
886
+ - Added `accountId` to usage report metadata
887
+
888
+ ### Changed
889
+
890
+ - Changed usage parsing to emit a usage report with available fields when parsing fails, rather than returning null
891
+
892
+ ### Fixed
893
+
894
+ - Fixed `planType` resolution to fall back to the raw payload `plan_type` when parsed value is absent
895
+ - Fixed usage metadata `raw` fallback to preserve the original payload when parsed raw output is missing
896
+
897
+ ## [14.0.5] - 2026-04-11
898
+
899
+ ### Changed
900
+
901
+ - Replaced GitHub Copilot authentication from VSCode extension impersonation to the opencode OAuth flow, eliminating TOS concerns. Existing users will need to re-authenticate once with `/login github-copilot`.
902
+ - Simplified Copilot token handling: GitHub OAuth token is used directly for all API requests (no JWT exchange or refresh cycle).
903
+ - Changed GitHub Copilot API base URL from `api.individual.githubcopilot.com` to `api.githubcopilot.com`.
904
+ - Updated default OpenAI stream idle timeout to 120,000 milliseconds to keep stream generation alive longer
905
+
906
+ ### Fixed
907
+
908
+ - Fixed duplicate synthetic tool results being generated when a real tool result appears later in message history
909
+ - Fixed GitHub Copilot `/models` discovery to unwrap structured OAuth credentials before sending the bearer token, preserving dynamic catalog refresh for OAuth-backed callers.
910
+
911
+ ### Removed
912
+
913
+ - Removed Copilot JWT proxy-ep base URL resolution (no longer needed with opencode auth).
914
+
915
+ ## [14.0.3] - 2026-04-09
916
+
917
+ ### Fixed
918
+
919
+ - Fixed Ollama discovery cache normalization so cached models upgrade to the OpenAI Responses transport after the provider change
920
+
921
+ ## [14.0.0] - 2026-04-08
922
+
923
+ ### Breaking Changes
924
+
925
+ - Removed `coerceNullStrings` function and its automatic null-string coercion behavior from JSON parsing
926
+
927
+ ### Added
928
+
929
+ - Added support for OpenRouter provider with strict mode detection
930
+ - Added automatic cleaning of literal escape sequences (`\n`, `\t`, `\r`) in JSON parsing to handle LLM encoding confusion
931
+ - Added support for healing JSON with trailing junk after balanced containers (e.g., `]\n</invoke>`)
932
+ - Added `CODEX_STARTUP_EVENT_CHANNEL` constant and `CodexStartupEvent` type for monitoring Codex provider initialization status
933
+ - Added automatic healing of malformed JSON with single-character bracket errors at the end of strings, improving LLM tool argument parsing robustness
934
+
935
+ ## [13.19.0] - 2026-04-05
936
+
937
+ ### Fixed
938
+
939
+ - Fixed GitHub Copilot model context window detection by correcting fallback priority for maxContextWindowTokens and maxPromptTokens
940
+ - Fixed Gemini 2.5 Pro context window detection in GitHub Copilot model limits test
941
+ - Fixed Claude Opus 4.6 context window detection in GitHub Copilot model limits test
942
+ - Fixed Anthropic streaming to suppress transient SDK console errors for malformed SSE keep-alive frames so the TUI only shows surfaced provider errors
943
+
944
+ - Added environment-based credential fallback for the OpenAI Codex provider.
945
+
946
+ ## [13.17.6] - 2026-04-01
947
+
948
+ ### Fixed
949
+
950
+ - Fixed Anthropic first-event timeouts to exclude stream connection setup from the watchdog, preserve timeout-specific retry classification after local aborts, and reset retry state cleanly between attempts
951
+
952
+ ## [13.17.5] - 2026-04-01
953
+
954
+ ### Changed
955
+
956
+ - Increased default first-event timeout from 15s to 45s to better accommodate longer request setup times
957
+ - Modified first-event watchdog to inherit idle timeout when it exceeds the default, ensuring consistent timeout behavior across different configurations
958
+
959
+ ### Fixed
960
+
961
+ - Fixed first-event watchdog initialization timing so it no longer starts before the actual stream request is created, preventing premature timeouts during request setup
962
+ - Fixed first-event watchdog timing so OpenAI-family providers no longer count slow request setup against the first streamed event timeout, and raised the default first-event timeout to avoid false aborts after long tool turns
963
+
964
+ ## [13.17.2] - 2026-04-01
965
+
966
+ ### Fixed
967
+
968
+ - Fixed OpenAI-family first-event timeouts to preserve provider-specific timeout errors for retry classification instead of flattening them to generic aborts ([#591](https://github.com/eminent337/aery/issues/591))
969
+
970
+ ## [13.17.1] - 2026-04-01
971
+
972
+ ### Added
973
+
974
+ - Added `thinkingSignature` field to thinking content blocks to preserve the original reasoning field name (e.g., `reasoning_text`, `reasoning_content`) for accurate follow-up requests
975
+ - Added first-event timeout detection for streaming responses to abort stuck requests before user-visible content arrives
976
+ - Added `PI_STREAM_FIRST_EVENT_TIMEOUT_MS` environment variable to configure first-event timeout (defaults to 15 seconds or idle timeout, whichever is lower)
977
+ - Added Vercel AI Gateway to `/login` providers for interactive API key setup
978
+
979
+ ### Changed
980
+
981
+ - Changed thinking block handling to track and distinguish between different reasoning field types, enabling proper field name preservation across multiple turns
982
+
983
+ ### Fixed
984
+
985
+ - Fixed Anthropic stream timeout errors to be properly retried by recognizing first-event timeout messages
986
+ - Fixed stream stall detection to distinguish between first-event timeouts and idle timeouts, enabling faster recovery for stuck connections
987
+ - Fixed `aery commit` failing with HTTP 400 errors when using reasoning-enabled models on OpenAI-compatible endpoints that don't support the `developer` role (e.g., GitHub Copilot, custom proxies). Now falls back to `system` role when `developer` is unsupported.
988
+
989
+ ## [13.17.0] - 2026-03-30
990
+
991
+ ### Changed
992
+
993
+ - Bumped zai provider default model from glm-4.6 to glm-5.1
994
+
995
+ ## [13.16.5] - 2026-03-29
996
+
997
+ ### Added
998
+
999
+ - Added Gemma 3 27B model support for Google Generative AI
1000
+
1001
+ ### Changed
1002
+
1003
+ - Updated Kwaipilot KAT-Coder-Pro V2 model display name and pricing information
1004
+ - Updated Kwaipilot KAT-Coder-Pro V2 context window from 222,222 to 256,000 tokens and max tokens from 8,888 to 80,000
1005
+
1006
+ ### Fixed
1007
+
1008
+ - Fixed normalizeAnthropicBaseUrl returning empty string instead of undefined when baseUrl is empty
1009
+
1010
+ ## [13.16.4] - 2026-03-28
1011
+
1012
+ ### Added
1013
+
1014
+ - Added support for Groq Compound and Compound Mini models with extended context window (131K tokens) and configurable thinking levels
1015
+ - Added support for OpenAI GPT-OSS-Safeguard-20B model with reasoning capabilities across multiple providers
1016
+ - Added support for Kwaipilot KAT-Coder-Pro V2 model across Kilo, NanoGPT, and OpenRouter providers
1017
+ - Added support for GLM-5.1 model with extended context window (200K tokens) and max output of 131K tokens
1018
+ - Added support for Qwen3.5-27B-Musica-v1 model
1019
+ - Added support for zai-org/glm-5.1 model with reasoning capabilities
1020
+ - Added support for Sapiens AI Agnes-1.5-Lite model with multimodal input (text and image) and reasoning
1021
+ - Added support for Venice openai-gpt-54-mini model
1022
+
1023
+ ### Changed
1024
+
1025
+ - Updated Qwen QwQ 32B max tokens from 16,384 to 40,960 across multiple providers
1026
+ - Updated OpenAI GPT-OSS-Safeguard-20B model name to 'Safety GPT OSS 20B' and enabled reasoning capabilities
1027
+ - Updated OpenAI GPT-OSS-Safeguard-20B context window from 222,222 to 131,072 tokens and max tokens from 8,888 to 65,536
1028
+ - Updated OpenRouter Qwen QwQ 32B pricing: input from 0.2 to 0.19, output from 1.17 to 1.15, cache read from 0.1 to 0.095
1029
+ - Updated OpenRouter Claude 3.5 Sonnet pricing: input from 0.45 to 0.42, cache read from 0.225 to 0.21
1030
+
1031
+ ## [13.16.3] - 2026-03-28
1032
+
1033
+ ### Changed
1034
+
1035
+ - Modified OAuth credential saving to preserve unrelated identities instead of replacing all credentials for a provider
1036
+ - Updated credential identity resolution to use provider context for more accurate email deduplication
1037
+
1038
+ ### Fixed
1039
+
1040
+ - Fixed OAuth credential updates to replace matching credentials in-place rather than creating disabled rows, preventing unbounded accumulation of soft-deleted credentials
1041
+
1042
+ ## [13.15.0] - 2026-03-23
1043
+
1044
+ ### Added
1045
+
1046
+ - Added `isUsageLimitError()` to `rate-limit-utils` as a single source of truth for detecting usage/quota limit errors across all providers
1047
+
1048
+ ### Fixed
1049
+
1050
+ - Fixed lazy stream forwarding to properly handle final results from source streams with `result()` methods
1051
+ - Fixed lazy stream error handling to convert iterator failures into terminal error results instead of silently failing
1052
+ - Fixed `parseRateLimitReason` to recognize "usage limit" in error messages and correctly classify them as `QUOTA_EXHAUSTED`
1053
+ - Fixed Codex `fetchWithRetry` retrying 429 responses for `usage_limit_reached` errors for up to 5 minutes instead of returning immediately for credential switching
1054
+ - Removed `usage.?limit` from `TRANSIENT_MESSAGE_PATTERN` in retry utils since usage limits are not transient and require credential rotation
1055
+ - Fixed `parseRateLimitReason` not recognizing "usage limit" in Codex error messages, causing incorrect fallback to `UNKNOWN` classification instead of `QUOTA_EXHAUSTED`
1056
+
1057
+ ## [13.14.2] - 2026-03-21
1058
+
1059
+ ### Changed
1060
+
1061
+ - Updated thinking configuration format from `levels` array to `minLevel` and `maxLevel` properties for improved clarity
1062
+ - Corrected context window from 400000 to 272000 tokens for GPT-5.4 mini and nano variants on Codex transport
1063
+ - Normalized GPT-5.4 variant priority handling to use parsed variant instead of special-casing raw model IDs
1064
+ - Added support for `mini` variant in OpenAI model parsing regex
1065
+
1066
+ ### Fixed
1067
+
1068
+ - Fixed inconsistent thinking level configuration across multiple model definitions
1069
+
1070
+ ## [13.14.0] - 2026-03-20
1071
+
1072
+ ### Fixed
1073
+
1074
+ - Fixed resumed OpenAI Responses sessions to avoid replaying stale same-provider native history on the first follow-up after process restart ([#488](https://github.com/eminent337/aery/issues/488))
1075
+
1076
+ ### Added
1077
+
1078
+ - Added bundled GPT-5.4 mini model metadata for OpenAI, OpenAI Codex, and GitHub Copilot, including low-to-xhigh thinking support and GitHub Copilot premium multiplier metadata
1079
+ - Added bundled GPT-5.4 nano model metadata for OpenAI and OpenAI Codex, including low-to-xhigh thinking support
1080
+
1081
+ ## [13.13.2] - 2026-03-18
1082
+
1083
+ ### Changed
1084
+
1085
+ - Modified tool result handling for aborted assistant messages to preserve existing tool results when already recorded, instead of always replacing them with synthetic 'aborted' results
1086
+
1087
+ ## [13.13.0] - 2026-03-18
1088
+
1089
+ ### Changed
1090
+
1091
+ - Changed tool argument validation to always normalize optional null values before type coercion, ensuring consistent handling of LLM-generated 'null' strings
1092
+
1093
+ ### Fixed
1094
+
1095
+ - Fixed tool argument validation to properly handle string 'null' values from LLMs on optional fields by stripping them during normalization
1096
+ - Improved type safety of `validateToolCall` and `validateToolArguments` functions by returning properly typed `ToolCall["arguments"]` instead of `any`
1097
+
1098
+ ## [13.12.9] - 2026-03-17
1099
+
1100
+ ### Changed
1101
+
1102
+ - Extracted OpenAI compatibility detection and resolution logic into dedicated `openai-completions-compat` module for improved maintainability and reusability
1103
+
1104
+ ### Fixed
1105
+
1106
+ - Fixed `openai-responses` manual history replay to strip replay-only item IDs and preserve normalized tool `call_id` values for GitHub Copilot follow-up turns ([#457](https://github.com/eminent337/aery/issues/457))
1107
+
1108
+ ## [13.12.0] - 2026-03-14
1109
+
1110
+ ### Added
1111
+
1112
+ - Added support for `qwen-chat-template` thinking format to enable reasoning via `chat_template_kwargs.enable_thinking`
1113
+ - Added `reasoningEffortMap` option to `OpenAICompat` for mapping aery-ai reasoning levels to provider-specific `reasoning_effort` values
1114
+ - Added `extraBody` to `OpenAICompat` to support provider-specific request body routing fields in OpenAI-completions requests
1115
+ - Added support for reading token usage from choice-level `usage` field as fallback when root-level usage is unavailable
1116
+ - Added new models: DeepSeek-V3.2 (Bedrock), Llama 3.1 405B Instruct, Magistral Small 1.2, Ministral 3 3B, Mistral Large 3, Pixtral Large (25.02), NVIDIA Nemotron Nano 3 30B, and Qwen3-5-9b
1117
+ - Added `close()` method to `AuthStorage` for properly closing the underlying credential store
1118
+ - Added `initiatorOverride` option in OpenAI and Anthropic providers to customize message attribution
1119
+
1120
+ ### Changed
1121
+
1122
+ - Changed assistant message content serialization to always use plain string format instead of text block arrays to prevent recursive nesting in OpenAI-compatible backends
1123
+ - Changed Bedrock Opus 4.6 context window from 1M to 1M and added max tokens limit of 128K
1124
+ - Changed OpenCode Zen/Go Sonnet 4.0/4.5 context window from 1M to 200K
1125
+ - Changed GitHub Copilot context windows from 200K to 128K for both gpt-4o and gpt-4o-mini
1126
+ - Changed Claude 3.5 Sonnet (Anthropic API) pricing: input from $0.5 to $0.25, output from $3 to $1.5, cache read from $0.05 to $0.025, cache write from $0 to $1
1127
+ - Changed Devstral 2 model name from '135B' to '123B'
1128
+ - Changed ByteDance Seed 2.0-Lite to support reasoning with effort-based thinking mode and image inputs
1129
+ - Changed Qwen3-32b (Groq) reasoning effort mapping to normalize all levels to 'default'
1130
+ - Changed finish_reason 'end' to map to 'stop' for improved compatibility with additional providers
1131
+ - Changed Anthropic reference model merging to prioritize bundled metadata for known models while using models.dev for newly discovered IDs
1132
+
1133
+ ### Fixed
1134
+
1135
+ - Fixed reasoning_effort parameter handling to use provider-specific mappings instead of raw effort values
1136
+ - Fixed assistant content serialization for GitHub Copilot and other OpenAI-compatible backends that mirror array payloads
1137
+ - Fixed token usage calculation to properly extract cached tokens from both root and nested `prompt_tokens_details` fields
1138
+ - Fixed stop reason mapping to handle string values and unknown finish reasons gracefully
1139
+ - Fixed resource cleanup in `AuthCredentialStore.close()` to properly finalize all prepared statements before closing the database
1140
+
1141
+ ## [13.11.1] - 2026-03-13
1142
+
1143
+ ### Fixed
1144
+
1145
+ - Added `llama.cpp` as local provider
1146
+ - Fixed auth schema V0-to-V1 migration crash when the V0 table lacks a `disabled` column
1147
+
1148
+ ## [13.11.0] - 2026-03-12
1149
+
1150
+ ### Added
1151
+
1152
+ - Added support for Parallel AI provider with API key authentication
1153
+ - Added `PARALLEL_API_KEY` environment variable support for Parallel provider configuration
1154
+ - Added automatic websocket reconnection handling for connection limit errors, with fallback to SSE replay when content has already been emitted
1155
+
1156
+ ### Changed
1157
+
1158
+ - Enhanced `CodexProviderStreamError` to include an optional error code field for better error categorization and handling
1159
+
1160
+ ### Fixed
1161
+
1162
+ - Improved retry logic to handle HTTP/2 stream errors and internal_error responses from Anthropic API
1163
+
1164
+ ## [13.9.16] - 2026-03-10
1165
+
1166
+ ### Added
1167
+
1168
+ - Support for `onPayload` callback to replace provider request payloads before sending, enabling request interception and modification
1169
+ - Support for structured text signature metadata with phase information (commentary/final_answer) in OpenAI and Azure OpenAI Responses providers
1170
+ - Support for OpenAI Codex Spark model selection with plan-based account prioritization
1171
+ - Added `modelId` option to `getApiKey()` to enable model-specific credential ranking
1172
+
1173
+ ### Changed
1174
+
1175
+ - Enhanced `onPayload` callback signature to accept model parameter and support async payload replacement
1176
+ - Improved error messages for `response.failed` events to include detailed error codes, messages, and incomplete reasons
1177
+ - Refactored OpenAI Codex response streaming to improve code organization and maintainability with extracted helper functions and type definitions
1178
+ - Enhanced websocket fallback logic to safely replay buffered output over SSE when websocket connections fail mid-stream
1179
+ - Improved error recovery for websocket streams by distinguishing between fatal connection errors and retryable stream errors
1180
+ - Updated credential ranking strategy to prioritize Pro plan accounts when requesting OpenAI Codex Spark models
1181
+
1182
+ ### Fixed
1183
+
1184
+ - Fixed websocket stream recovery to properly reset output state and clear buffered items when falling back to SSE after partial output
1185
+ - Fixed handling of malformed JSON messages in websocket streams to trigger immediate fallback to SSE without retry attempts
1186
+
1187
+ ## [13.9.13] - 2026-03-10
1188
+
1189
+ ### Added
1190
+
1191
+ - Added `isSpecialServiceTier` utility function to validate OpenAI service tier values
1192
+
1193
+ ## [13.9.12] - 2026-03-09
1194
+
1195
+ ### Added
1196
+
1197
+ - Added Tavily web search provider support with API key authentication
1198
+
1199
+ ### Fixed
1200
+
1201
+ - Fixed OpenAI-family streaming transports to fail with an explicit idle-timeout error instead of hanging indefinitely when the provider stops sending events mid-response
1202
+ - Fixed OpenAI Codex OAuth refresh and usage-limit lookups to respect request timeouts instead of waiting indefinitely during account selection or rotation
1203
+ - Fixed OpenAI Codex prewarmed websocket requests to fall back quickly when the socket connects but never starts the response stream
1204
+
1205
+ ## [13.9.10] - 2026-03-08
1206
+
1207
+ ### Added
1208
+
1209
+ - Added `identity_key` column to auth credentials storage for improved credential deduplication
1210
+ - Added schema versioning system to auth credentials database for safer migrations
1211
+ - Added automatic backfilling of identity keys during database schema migrations
1212
+
1213
+ ### Changed
1214
+
1215
+ - Changed credential deduplication logic to use single identity key instead of multiple identifiers for better performance
1216
+ - Changed database schema to store normalized identity keys alongside credentials
1217
+ - Changed auth schema migration to support upgrading from legacy database versions with automatic data backfill
1218
+
1219
+ ### Fixed
1220
+
1221
+ - Fixed API key credential matching to correctly identify when the same key is re-stored, preventing unnecessary row duplication on re-login
1222
+ - Fixed credential deduplication to correctly handle OAuth accounts with matching emails but different account IDs
1223
+ - Fixed API key replacement to reuse existing stored rows instead of accumulating disabled duplicates
1224
+ - Fixed auth storage to preserve newer recorded schema versions when opened by older binaries
1225
+
1226
+ ## [13.9.8] - 2026-03-08
1227
+
1228
+ ### Fixed
1229
+
1230
+ - Fixed WebSocket stream fallback logic to safely replay buffered output over SSE when WebSocket fails after partial content has been streamed
1231
+
1232
+ ## [13.9.4] - 2026-03-07
1233
+
1234
+ ### Changed
1235
+
1236
+ - Simplified API key credential storage to always replace existing credentials on re-login instead of accumulating multiple keys
1237
+ - Updated Kagi API key placeholder from `kagi_...` to `KG_...` to match current API key format
1238
+ - Updated Kagi login instructions to clarify Search API access is beta-only and provide support contact
1239
+ - Disabled usage reporting in streaming responses for Cerebras models due to compatibility issues
1240
+
1241
+ ### Fixed
1242
+
1243
+ - Fixed Cerebras model compatibility by preventing `stream_options` usage requests in chat completions
1244
+
1245
+ ## [13.9.3] - 2026-03-07
1246
+
1247
+ ### Breaking Changes
1248
+
1249
+ - Changed `reasoning` parameter from `ThinkingLevel | undefined` to `Effort | undefined` in `SimpleStreamOptions`; 'off' is no longer valid (omit the field instead)
1250
+ - Removed `supportsXhigh()` function; check `model.thinking?.maxLevel` instead
1251
+ - Removed `ThinkingLevel` and `ThinkingEffort` types; use `Effort` enum
1252
+ - Removed `getAvailableThinkingLevels()` and `getAvailableThinkingEfforts()` functions
1253
+ - Changed `transformRequestBody()` signature to require `Model` parameter as second argument for effort validation
1254
+ - Removed `thinking.ts` module export; import from `model-thinking.ts` instead
1255
+
1256
+ ### Added
1257
+
1258
+ - Added `incremental` flag to `OpenAIResponsesHistoryPayload` to support building conversation history from multiple assistant messages instead of replacing it
1259
+ - Added `dt` flag to `OpenAIResponsesHistoryPayload` for transport-level metadata
1260
+ - Added `ThinkingConfig` interface to models for canonical thinking transport metadata with min/max effort levels and provider-specific mode
1261
+ - Added `thinking` field to `Model` type containing per-model thinking capabilities used to clamp and map user-facing effort levels
1262
+ - Added `Effort` enum (minimal, low, medium, high, xhigh) as canonical user-facing thinking levels replacing `ThinkingLevel`
1263
+ - Added `enrichModelThinking()` function to automatically populate thinking metadata on models based on their capabilities
1264
+ - Added `mapEffortToAnthropicAdaptiveEffort()` function to map user effort levels to Anthropic adaptive thinking effort
1265
+ - Added `mapEffortToGoogleThinkingLevel()` function to map user effort levels to Google thinking levels
1266
+ - Added `requireSupportedEffort()` function to validate and clamp effort levels per model, throwing errors for unsupported combinations
1267
+ - Added `clampThinkingLevelForModel()` function to clamp thinking levels to model-supported range
1268
+ - Added `applyGeneratedModelPolicies()` and `linkSparkPromotionTargets()` exports from model-thinking module
1269
+ - Added `serviceTier` option to control OpenAI processing priority and cost (auto, default, flex, scale, priority)
1270
+ - Added `providerPayload` field to messages and responses for reconstructing transport-native history
1271
+ - Added Gemini usage provider for tracking quota and tier information
1272
+ - Added `getCodexAccountId()` utility to extract account ID from Codex JWT tokens
1273
+ - Added email extraction from OpenAI Codex OAuth tokens for credential deduplication
1274
+
1275
+ ### Changed
1276
+
1277
+ - Changed credential disabling mechanism from boolean `disabled` flag to `disabled_cause` text field for tracking why credentials were disabled
1278
+ - Changed `deleteAuthCredential()` and `deleteAuthCredentialsForProvider()` methods to require a `disabledCause` parameter explaining the reason for disabling
1279
+ - Changed Gemini model parsing to strip `-preview` suffix for consistent model identification
1280
+ - Changed OpenAI Codex websocket error handling to detect fatal connection errors and immediately fall back to SSE without retrying
1281
+ - Changed OpenAI Codex to always use websockets v2 protocol (removed v1 support)
1282
+ - Changed `reasoning` parameter type from `ThinkingLevel` to `Effort` in `SimpleStreamOptions`, removing 'off' value (callers should omit the field instead)
1283
+ - Changed thinking configuration to use model-specific metadata instead of hardcoded provider logic for effort mapping
1284
+ - Changed OpenAI Codex request transformer to accept `Model` parameter for effort validation instead of string model ID
1285
+ - Changed Anthropic provider to use model thinking metadata for determining adaptive thinking support instead of model ID pattern matching
1286
+ - Changed Google Vertex and Google providers to use shorter variable names for thinking config construction
1287
+ - Moved thinking-related utilities from `thinking.ts` to new `model-thinking.ts` module with expanded functionality
1288
+ - Moved model policy functions from `provider-models/model-policies.ts` to `model-thinking.ts`
1289
+ - Moved `googleGeminiCliUsageProvider` from `providers/google-gemini-cli-usage.ts` to `usage/gemini.ts`
1290
+ - Changed default OpenAI model from gpt-5.1-codex to gpt-5.4 across all providers
1291
+ - Changed `UsageFetchContext` to remove cache and now() dependencies—usage fetchers now use Date.now() directly
1292
+ - Removed `resetInMs` field from usage windows; consumers should calculate from `resetsAt` timestamp
1293
+ - Changed OpenAI Codex credential ranking to deduplicate by email when accountId matches
1294
+ - Improved OpenAI Codex error handling with retryable error detection
1295
+
1296
+ ### Removed
1297
+
1298
+ - Removed `thinking.ts` module; use `model-thinking.ts` instead
1299
+ - Removed `provider-models/model-policies.ts` module; functionality moved to `model-thinking.ts`
1300
+ - Removed `supportsXhigh()` function from models.ts; use model.thinking metadata instead
1301
+ - Removed `ThinkingLevel` and `ThinkingEffort` types; use `Effort` enum instead
1302
+ - Removed `getAvailableThinkingLevels()` and `getAvailableThinkingEfforts()` functions
1303
+ - Removed `model-policies` export from `provider-models/index.ts`
1304
+ - Removed hardcoded thinking level clamping logic from OpenAI Codex request transformer; now uses model metadata
1305
+ - Removed `UsageCache` and `UsageCacheEntry` interfaces—caching is now handled internally by AuthStorage
1306
+ - Removed `google-gemini-cli-usage` export; use new `gemini` usage provider instead
1307
+ - Removed `resetInMs` computation from all usage providers
1308
+ - Removed cache TTL constants and cache management from usage fetchers (claude, github-copilot, google-antigravity, kimi, openai-codex, zai)
1309
+
1310
+ ### Fixed
1311
+
1312
+ - Fixed credential purging to respect disabled credentials when deduplicating by email, preventing re-enablement of intentionally disabled credentials
1313
+ - Fixed OpenAI Codex websocket error reporting to include detailed error messages from error events
1314
+ - Fixed conversation history reconstruction to support incremental updates from multiple assistant messages while maintaining backward compatibility with full-snapshot payloads
1315
+ - Fixed OpenAI Codex to reject unsupported effort levels instead of silently clamping them, providing clear error messages about supported efforts
1316
+ - Fixed model cache normalization to properly apply thinking enrichment when loading cached models
1317
+ - Fixed dynamic model merging to apply thinking enrichment to merged model results
1318
+ - Fixed OpenAI Codex streaming to properly include service_tier in SSE payloads
1319
+ - Fixed type safety in OpenAI responses by removing unsafe type casts on image content blocks
1320
+ - Fixed credential purging to respect disabled credentials when deduplicating by email
1321
+ - Fixed API-key provider re-login to replace the active stored key instead of appending stale credentials that were still selected first
1322
+ - Fixed Kagi login guidance to use the correct `KG_...` key format and mention Search API beta access requirements
1323
+
1324
+ ## [13.9.2] - 2026-03-05
1325
+
1326
+ ### Added
1327
+
1328
+ - Support for redacted thinking blocks in Anthropic messages, enabling secure handling of encrypted reasoning content
1329
+ - Preservation of latest Anthropic thinking blocks and redacted thinking content during message transformation, even when switching between Anthropic models
1330
+
1331
+ ### Changed
1332
+
1333
+ - Assistant message content now includes `RedactedThinkingContent` type alongside existing text, thinking, and tool call blocks
1334
+ - Message transformation logic now preserves signed thinking blocks and redacted thinking for the latest assistant message in Anthropic conversations
1335
+
1336
+ ### Fixed
1337
+
1338
+ - Fixed Unicode normalization to consistently apply `toWellFormed()` to all text content, including thinking blocks, ensuring proper handling of malformed UTF-16 sequences
1339
+
1340
+ ## [13.9.1] - 2026-03-05
1341
+
1342
+ ### Breaking Changes
1343
+
1344
+ - Removed `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, `ALL_THINKING_MODES`, `THINKING_MODE_DESCRIPTIONS`, and `THINKING_MODE_LABELS` exports
1345
+ - Renamed `formatThinking()` to `getThinkingMetadata()` with changed return type from string to `ThinkingMetadata` object
1346
+ - Renamed `getAvailableThinkingLevel()` to `getAvailableThinkingLevels()` and added default parameter
1347
+ - Renamed `getAvailableEffort()` to `getAvailableEfforts()` and added default parameter
1348
+
1349
+ ### Added
1350
+
1351
+ - Added `ThinkingMetadata` type to provide structured access to thinking mode information (value, label, description)
1352
+
1353
+ ## [13.9.0] - 2026-03-05
1354
+
1355
+ ### Added
1356
+
1357
+ - Exported new thinking module with `Effort`, `ThinkingLevel`, and `ThinkingMode` types for managing reasoning effort levels
1358
+ - Added `getAvailableEffort()` function to determine supported thinking effort levels based on model capabilities
1359
+ - Added `parseEffort()`, `parseThinkingLevel()`, and `parseThinkingMode()` functions for parsing thinking configuration strings
1360
+ - Added `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, and `ALL_THINKING_MODES` constants for iterating over available thinking options
1361
+ - Added `THINKING_MODE_DESCRIPTIONS` and `THINKING_MODE_LABELS` for displaying thinking modes in user interfaces
1362
+ - Added `formatThinking()` function to format thinking modes as compact display labels
1363
+
1364
+ ### Changed
1365
+
1366
+ - Refactored thinking level handling to distinguish between `Effort` (provider-level, no "off") and `ThinkingLevel` (user-facing, includes "off")
1367
+ - Updated `ThinkingBudgets` type to use `Effort` instead of `ThinkingLevel` for more precise token budget configuration
1368
+ - Improved reasoning option handling to explicitly support "off" value for disabling reasoning across all providers
1369
+ - Simplified thinking effort mapping logic by centralizing provider-specific clamping behavior
1370
+
1371
+ ## [13.7.8] - 2026-03-04
1372
+
1373
+ ### Added
1374
+
1375
+ - Added ZenMux provider support with mixed API routing: Anthropic-owned models discovered from `https://zenmux.ai/api/v1/models` now use the Anthropic transport (`https://zenmux.ai/api/anthropic`), while other ZenMux models use the OpenAI-compatible transport.
1376
+
1377
+ ## [13.7.7] - 2026-03-04
1378
+
1379
+ ### Changed
1380
+
1381
+ - Modified response ID normalization to preserve existing item ID prefixes when truncating oversized IDs
1382
+ - Updated tool call ID normalization to use `fc_` prefix for generated item IDs instead of `item_` prefix
1383
+
1384
+ ### Fixed
1385
+
1386
+ - Fixed handling of reasoning item IDs to remain untouched during response normalization while function call IDs are properly normalized
1387
+
1388
+ ## [13.7.2] - 2026-03-04
1389
+
1390
+ ### Added
1391
+
1392
+ - Added support for Kagi API key authentication via `login kagi` command
1393
+ - Added Kagi to the list of available OAuth providers
1394
+
1395
+ ### Fixed
1396
+
1397
+ - MCP tool schemas with `$ref`/`$defs` are now dereferenced before being sent to LLM providers, fixing dangling references that left models without type definitions
1398
+ - Ajv schema validation no longer emits `console.warn()` for non-standard format keywords (e.g. `"uint"`) from MCP servers, preventing TUI corruption
1399
+ - Tool schema compilation is now cached per schema identity, eliminating redundant recompilation on every tool call
1400
+
1401
+ ## [13.6.0] - 2026-03-03
1402
+
1403
+ ### Added
1404
+
1405
+ - Added Anthropic Foundry gateway mode controlled by `CLAUDE_CODE_USE_FOUNDRY`, with support for `FOUNDRY_BASE_URL`, `ANTHROPIC_FOUNDRY_API_KEY`, `ANTHROPIC_CUSTOM_HEADERS`, and optional mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS`)
1406
+ - Added LM Studio provider support with OpenAI-compatible model discovery and OAuth login.
1407
+ - Added support for `LM_STUDIO_API_KEY` and `LM_STUDIO_BASE_URL` environment variables for authentication and custom host configuration.
1408
+
1409
+ ### Changed
1410
+
1411
+ - Anthropic key resolution now prefers `ANTHROPIC_FOUNDRY_API_KEY` over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled
1412
+ - Anthropic auth base-URL fallback now prefers `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled
1413
+
1414
+ ## [13.5.8] - 2026-03-02
1415
+
1416
+ ### Fixed
1417
+
1418
+ - Fixed schema compatibility issue where patternProperties in tool parameters caused failures when converting to legacy Antigravity format
1419
+
1420
+ ## [13.5.5] - 2026-03-01
1421
+
1422
+ ### Changed
1423
+
1424
+ - Anthropic Claude system-block cloaking now leaves the agent identity block uncached and applies `cache_control: { type: "ephemeral" }` to injected user system blocks without forcing `ttl: "1h"`
1425
+
1426
+ ### Fixed
1427
+
1428
+ - Anthropic request payload construction now enforces a maximum of 4 `cache_control` breakpoints (tools/system/messages priority order) before dispatch
1429
+ - Anthropic cache-control normalization now removes later `ttl: "1h"` entries when a default/5m block has already appeared earlier in evaluation order
1430
+
1431
+ ## [13.5.3] - 2026-03-01
1432
+
1433
+ ### Fixed
1434
+
1435
+ - Fixed tool argument coercion to handle malformed JSON with trailing wrapper braces by parsing leading JSON containers
1436
+
1437
+ ## [13.4.0] - 2026-03-01
1438
+
1439
+ ### Breaking Changes
1440
+
1441
+ - Removed `TInput` generic parameter from `ToolResultMessage` interface and removed `$normative` property
1442
+
1443
+ ### Added
1444
+
1445
+ - `hasUnrepresentableStrictObjectMap()` pre-flight check in `tryEnforceStrictSchema`: schemas with `patternProperties` or schema-valued `additionalProperties` now degrade gracefully to non-strict mode instead of throwing during enforcement
1446
+ - `generateClaudeCloakingUserId()` generates structured user IDs for Anthropic OAuth metadata (`user_{hex64}_account_{uuid}_session_{uuid}`)
1447
+ - `isClaudeCloakingUserId()` validates whether a string matches the cloaking user-ID format
1448
+ - `mapStainlessOs()` and `mapStainlessArch()` map `process.platform`/`process.arch` to Stainless header values; X-Stainless-Os and X-Stainless-Arch in `claudeCodeHeaders` are now runtime-computed
1449
+ - `buildClaudeCodeTlsFetchOptions()` attaches SNI and default TLS ciphers for direct `api.anthropic.com` connections
1450
+ - `createClaudeBillingHeader()` generates the `x-anthropic-billing-header` block (SHA-256 payload fingerprint + random build hash)
1451
+ - `buildAnthropicSystemBlocks()` now injects a billing header block and the Claude Agent SDK identity block with `ephemeral` 1h cache-control when `includeClaudeCodeInstruction` is set
1452
+ - `resolveAnthropicMetadataUserId()` auto-generates a cloaking user ID for OAuth requests when `metadata.user_id` is absent or invalid
1453
+ - `AnthropicOAuthFlow` is now exported for direct use
1454
+ - OAuth callback server timeout extended from 2 min to 5 min
1455
+ - `parseGeminiCliCredentials()` parses Google Cloud credential JSON with support for legacy (`{token,projectId}`), alias (`project_id`/`refresh`/`expires`), and enriched formats
1456
+ - `shouldRefreshGeminiCliCredentials()` and proactive token refresh before requests for both Gemini CLI and Antigravity providers (60s pre-expiry buffer)
1457
+ - `normalizeAntigravityTools()` converts `parametersJsonSchema` → `parameters` in function declarations for Antigravity compatibility
1458
+ - `ANTIGRAVITY_SYSTEM_INSTRUCTION` is now exported for use by search and other consumers
1459
+ - `ANTIGRAVITY_LOAD_CODE_ASSIST_METADATA` constant exported from OAuth module with `ANTIGRAVITY` ideType
1460
+ - Antigravity project onboarding: `onboardProjectWithRetries()` provisions a new project via `onboardUser` LRO when `loadCodeAssist` returns no existing project (up to 5 attempts, 2s interval)
1461
+ - `getOAuthApiKey` now includes `refreshToken`, `expiresAt`, `email`, and `accountId` in the Gemini/Antigravity JSON credential payload to enable proactive refresh
1462
+ - Antigravity model discovery now tries the production daily endpoint first, with sandbox as fallback
1463
+ - `ANTIGRAVITY_DISCOVERY_DENYLIST` filters low-quality/internal models from discovery results
1464
+
1465
+ ### Changed
1466
+
1467
+ - Replaced `sanitizeSurrogates()` utility with native `String.prototype.toWellFormed()` for handling unpaired Unicode surrogates across all providers
1468
+ - Extended `ANTHROPIC_OAUTH_BETA` constant in the OpenAI-compat Anthropic route with `interleaved-thinking-2025-05-14`, `context-management-2025-06-27`, and `prompt-caching-scope-2026-01-05` beta flags
1469
+ - `claudeCodeVersion` bumped to `2.1.63`; `claudeCodeSystemInstruction` updated to identify as Claude Agent SDK
1470
+ - `claudeCodeHeaders`: removed `X-Stainless-Helper-Method`, updated package version to `0.74.0`, runtime version to `v24.3.0`
1471
+ - `applyClaudeToolPrefix` / `stripClaudeToolPrefix` now accept an optional prefix override and skip Anthropic built-in tool names (`web_search`, `code_execution`, `text_editor`, `computer`)
1472
+ - Accept-Encoding header updated to `gzip, deflate, br, zstd`
1473
+ - Non-Anthropic base URLs now receive `Authorization: Bearer` regardless of OAuth status
1474
+ - Prompt-caching logic now skips applying breakpoints when any block already carries `cache_control`, instead of stripping then re-applying
1475
+ - `fine-grained-tool-streaming-2025-05-14` removed from default beta set
1476
+ - Anthropic OAuth token URL changed from `platform.claude.com` to `api.anthropic.com`
1477
+ - Anthropic OAuth scopes reduced to `org:create_api_key user:profile user:inference`
1478
+ - OAuth code exchange now strips URL fragment from callback code, using the fragment as state override when present
1479
+ - Claude usage headers aligned: user-agent updated to `claude-cli/2.1.63 (external, cli)`, anthropic-beta extended with full beta set
1480
+ - Antigravity session ID format changed to signed decimal (negative int63 derived from SHA-256 of first user message, or random bounded int63)
1481
+ - Antigravity `requestId` now uses `agent-{uuid}` format; non-Antigravity requests no longer include requestId/userAgent/requestType in the payload
1482
+ - `ANTIGRAVITY_DAILY_ENDPOINT` corrected to `daily-cloudcode-pa.googleapis.com`; sandbox endpoint kept as fallback only
1483
+ - Antigravity discovery: removed `recommended`/`agentModelSorts` filter; now includes all non-internal, non-denylisted models
1484
+ - Antigravity discovery no longer sends `project` in the request body
1485
+ - Gemini/Antigravity OAuth flows no longer use PKCE (code_challenge removed)
1486
+ - Antigravity `loadCodeAssist` metadata ideType changed from `IDE_UNSPECIFIED` to `ANTIGRAVITY`
1487
+ - Antigravity `discoverProject` now uses a single canonical production endpoint; falls back to project onboarding instead of a hardcoded default project ID
1488
+ - `VALIDATED` tool calling config applied to Antigravity requests with Claude models
1489
+ - `maxOutputTokens` removed from Antigravity generation config for non-Claude models
1490
+ - System instruction injection for Antigravity scoped to Claude and `gemini-3-pro-high` models only
1491
+
1492
+ ### Removed
1493
+
1494
+ - Removed `sanitizeSurrogates()` utility function; use native `String.prototype.toWellFormed()` instead
1495
+
1496
+ ## [13.3.14] - 2026-02-28
1497
+
1498
+ ### Added
1499
+
1500
+ - Exported schema utilities from new `./utils/schema` module, consolidating JSON Schema handling across providers
1501
+ - Added `CredentialRankingStrategy` interface for providers to implement usage-based credential selection
1502
+ - Added `claudeRankingStrategy` for Anthropic OAuth credentials to enable smart multi-account selection based on usage windows
1503
+ - Added `codexRankingStrategy` for OpenAI Codex OAuth credentials with priority boost for fresh 5-hour window starts
1504
+ - Added `adaptSchemaForStrict()` helper for unified OpenAI strict schema enforcement across providers
1505
+ - Added schema equality and merging utilities: `areJsonValuesEqual()`, `mergeCompatibleEnumSchemas()`, `mergePropertySchemas()`
1506
+ - Added Cloud Code Assist schema normalization: `copySchemaWithout()`, `stripResidualCombiners()`, `prepareSchemaForCCA()`
1507
+ - Added `sanitizeSchemaForGoogle()` and `sanitizeSchemaForCCA()` for provider-specific schema sanitization
1508
+ - Added `StringEnum()` helper for creating string enum schemas compatible with Google and other providers
1509
+ - Added `enforceStrictSchema()` and `sanitizeSchemaForStrictMode()` for OpenAI strict mode schema validation
1510
+ - Added package exports for `./utils/schema` and `./utils/schema/*` subpaths
1511
+ - Added `validateSchemaCompatibility()` to statically audit a JSON Schema against provider-specific rules (`openai-strict`, `google`, `cloud-code-assist-claude`) and return structured violations
1512
+ - Added `validateStrictSchemaEnforcement()` to verify the strict-fail-open contract: enforced schemas pass strict validation, failed schemas return the original object identity
1513
+ - Added `COMBINATOR_KEYS` (`anyOf`, `allOf`, `oneOf`) and `CCA_UNSUPPORTED_SCHEMA_FIELDS` as exported constants in `fields.ts` to eliminate duplication across modules
1514
+ - Added `tryEnforceStrictSchema` result cache (`WeakMap`) to avoid redundant sanitize + enforce work for the same schema object
1515
+ - Added comprehensive schema normalization test suite (`schema-normalization.test.ts`) covering strict mode, Google, and Cloud Code Assist normalization paths
1516
+ - Added schema compatibility validation test suite (`schema-compatibility.test.ts`) covering all three provider targets
1517
+
1518
+ ### Changed
1519
+
1520
+ - Moved schema utilities from `./utils/typebox-helpers` to new `./utils/schema` module with expanded functionality
1521
+ - Refactored OpenAI provider tool conversion to use unified `adaptSchemaForStrict()` helper across codex, completions, and responses
1522
+ - Updated `AuthStorage` to support generic credential ranking via `CredentialRankingStrategy` instead of Codex-only logic
1523
+ - Moved Google schema sanitization functions from `google-shared.ts` to `./utils/schema` module
1524
+ - Changed export path: `./utils/typebox-helpers` → `./utils/schema` in main index
1525
+ - `sanitizeSchemaForGoogle()` / `sanitizeSchemaForCCA()` now accept a parameterized `unsupportedFields` set internally, enabling code reuse between the two sanitizers
1526
+ - `copySchemaWithout()` rewritten using object-rest destructuring for clarity
1527
+
1528
+ ### Fixed
1529
+
1530
+ - Fixed cycle detection: `WeakSet` guards added to all recursive schema traversals (`sanitizeSchemaForStrictMode`, `enforceStrictSchema`, `normalizeSchemaForCCA`, `normalizeNullablePropertiesForCloudCodeAssist`, `stripResidualCombiners`, `sanitizeSchemaImpl`, `hasResidualCloudCodeAssistIncompatibilities`) — circular schemas no longer cause infinite loops or stack overflows
1531
+ - Fixed `hasResidualCloudCodeAssistIncompatibilities`: cycle detection now returns `false` (not `true`) for already-visited nodes, eliminating false positives that forced the CCA fallback schema on valid recursive inputs
1532
+ - Fixed `stripResidualCombiners` to iterate to a fixpoint rather than making a single pass, ensuring chained combiner reductions (where one reduction enables another) are fully resolved
1533
+ - Fixed `mergeObjectCombinerVariants` required-field computation: the flattened object now takes the intersection of all variants' `required` arrays (unioned with own-level required properties that exist in the merged schema), preventing required fields from being silently dropped or over-included
1534
+ - Fixed `mergeCompatibleEnumSchemas` to use deep structural equality (`areJsonValuesEqual`) instead of `Object.is` when deduplicating object-valued enum members
1535
+ - Fixed `sanitizeSchemaForGoogle` const-to-enum deduplication to use deep equality instead of reference equality
1536
+ - Fixed `sanitizeSchemaForGoogle` type inference for `anyOf`/`oneOf`-flattened const enums: type is now derived from all variants (must agree), falling back to inference from enum values; mixed null/non-null infers the non-null type and sets `nullable`
1537
+ - Fixed `sanitizeSchemaForGoogle` recursion to spread options when descending (previously only `insideProperties`, `normalizeTypeArrayToNullable`, `stripNullableKeyword` were forwarded; new fields `unsupportedFields` and `seen` were silently dropped)
1538
+ - Fixed `sanitizeSchemaForGoogle` array-valued `type` filtering to exclude non-string entries before processing
1539
+ - Removed incorrect `additionalProperties: false` stripping from `sanitizeSchemaForGoogle` (the field is valid in Google schemas when `false`)
1540
+ - Fixed `sanitizeSchemaForStrictMode` to strip the `nullable` keyword and expand it into `anyOf: [schema, {type: "null"}]` in the output, matching what OpenAI strict mode actually expects
1541
+ - Fixed `sanitizeSchemaForStrictMode` to infer `type: "array"` when `items` is present but `type` is absent
1542
+ - Fixed `sanitizeSchemaForStrictMode` to infer a scalar `type` from uniform `enum` values when `type` is not explicitly set
1543
+ - Fixed `sanitizeSchemaForStrictMode` const-to-enum merge to use deep equality, preventing duplicate enum entries when `const` and `enum` both exist with the same value
1544
+ - Fixed `enforceStrictSchema` to drop `additionalProperties` unconditionally (previously only object-valued `additionalProperties` was recursed into; non-object values were passed through, violating strict schema requirements)
1545
+ - Fixed `enforceStrictSchema` to recurse into `$defs` and `definitions` blocks so referenced sub-schemas are also made strict-compliant
1546
+ - Fixed `enforceStrictSchema` to handle tuple-style `items` arrays (previously only single-schema `items` objects were recursed)
1547
+ - Fixed `enforceStrictSchema` double-wrapping: optional properties already expressed as `anyOf: [..., {type: "null"}]` are not wrapped again
1548
+ - Fixed `enforceStrictSchema` `Array.isArray` type-narrowing for `type` field to filter non-string entries before checking for `"object"`
1549
+
1550
+ ## [13.3.8] - 2026-02-28
1551
+
1552
+ ### Fixed
1553
+
1554
+ - Fixed response body reuse error when handling 429 rate limit responses with retry logic
1555
+
1556
+ ## [13.3.7] - 2026-02-27
1557
+
1558
+ ### Added
1559
+
1560
+ - Added `tryEnforceStrictSchema` function that gracefully downgrades to non-strict mode when schema enforcement fails, enabling better compatibility with malformed or circular schemas
1561
+ - Added `sanitizeSchemaForStrictMode` function to normalize JSON schemas by stripping non-structural keywords, converting `const` to `enum`, and expanding type arrays into `anyOf` variants
1562
+ - Added Kilo Gateway provider support with OpenAI-compatible model discovery, OAuth `/login kilo`, and `KILO_API_KEY` environment variable support ([#193](https://github.com/eminent337/aery/issues/193))
1563
+
1564
+ ### Changed
1565
+
1566
+ - Changed strict mode handling in OpenAI providers to use `tryEnforceStrictSchema` for safer schema enforcement with automatic fallback to non-strict mode
1567
+ - Enhanced `enforceStrictSchema` to properly handle schemas with type arrays containing `object` (e.g., `type: ["object", "null"]`)
1568
+
1569
+ ### Fixed
1570
+
1571
+ - Fixed `enforceStrictSchema` to properly handle malformed object schemas with required keys but missing properties
1572
+ - Fixed `enforceStrictSchema` to correctly process nested object schemas within `anyOf`, `allOf`, and `oneOf` combinators
1573
+
1574
+ ## [13.3.1] - 2026-02-26
1575
+
1576
+ ### Added
1577
+
1578
+ - Added `topP`, `topK`, `minP`, `presencePenalty`, and `repetitionPenalty` options to `StreamOptions` for fine-grained control over model sampling behavior
1579
+
1580
+ ## [13.3.0] - 2026-02-26
1581
+
1582
+ ### Changed
1583
+
1584
+ - Allowed OAuth provider logins to supply a manual authorization code handler with a default prompt when none is provided
1585
+
1586
+ ## [13.2.0] - 2026-02-23
1587
+
1588
+ ### Added
1589
+
1590
+ - Added support for GitHub Copilot provider in strict mode for both openai-completions and openai-responses tool schemas
1591
+
1592
+ ### Fixed
1593
+
1594
+ - Fixed tool descriptions being rejected when undefined by providing empty string fallback across all providers
1595
+
1596
+ ## [12.19.1] - 2026-02-22
1597
+
1598
+ ### Added
1599
+
1600
+ - Exported `isProviderRetryableError` function for detecting rate-limit and transient stream errors
1601
+ - Support for retrying malformed JSON stream-envelope parse errors from Anthropic-compatible proxy endpoints
1602
+
1603
+ ### Changed
1604
+
1605
+ - Expanded retry detection to include JSON parse errors (unterminated strings, unexpected end of input) in addition to rate-limit errors
1606
+
1607
+ ## [12.19.0] - 2026-02-22
1608
+
1609
+ ### Added
1610
+
1611
+ - Added GitLab Duo provider with support for Claude, GPT-5, and other models via GitLab AI Gateway
1612
+ - Added OAuth authentication for GitLab Duo with automatic token refresh and direct access caching
1613
+ - Added 16 new GitLab Duo models including Claude Opus/Sonnet/Haiku variants and GPT-5 series models
1614
+ - Added `isOAuth` option to Anthropic provider to force OAuth bearer auth mode for proxy tokens
1615
+ - Added `streamGitLabDuo` function to route requests through GitLab AI Gateway with direct access tokens
1616
+ - Added `getGitLabDuoModels` function to retrieve available GitLab Duo model configurations
1617
+ - Added `clearGitLabDuoDirectAccessCache` function to manually clear cached direct access tokens
1618
+
1619
+ ### Changed
1620
+
1621
+ - Enhanced `getModelMapping()` to support both GitLab Duo alias IDs (e.g., `duo-chat-gpt-5-codex`) and canonical model IDs (e.g., `gpt-5-codex`) for improved model resolution flexibility
1622
+ - Migrated `AuthCredentialStore` and `AuthStorage` into `@aryee337/aery-ai` as shared credential primitives for downstream packages
1623
+ - Moved Anthropic auth helpers (`findAnthropicAuth`, `isOAuthToken`, `buildAnthropicSearchHeaders`, `buildAnthropicUrl`) into shared AI utilities for reuse across providers
1624
+ - Replaced `CliAuthStorage` with `AuthCredentialStore` for improved credential management with multiple credentials per provider
1625
+ - Updated models.json pricing for Claude 3.5 Sonnet (input: 0.23→0.45, output: 3→2.2, added cache read: 0.225) and Claude 3 Opus (input: 0.3→0.95)
1626
+ - Moved `mapAnthropicToolChoice` function from gitlab-duo provider to stream module for broader reusability
1627
+ - Enhanced HTTP status code extraction to handle string-formatted status codes in error objects
1628
+
1629
+ ### Removed
1630
+
1631
+ - Removed `CliAuthStorage` class in favor of new `AuthCredentialStore` with enhanced functionality
1632
+
1633
+ ## [12.17.2] - 2026-02-21
1634
+
1635
+ ### Added
1636
+
1637
+ - Exported `getAntigravityUserAgent()` function for constructing Antigravity User-Agent headers
1638
+
1639
+ ### Changed
1640
+
1641
+ - Updated default Antigravity version from 1.15.8 to 1.18.3
1642
+ - Unified User-Agent header generation across Antigravity API calls to use centralized `getAntigravityUserAgent()` function
1643
+
1644
+ ## [12.17.1] - 2026-02-21
1645
+
1646
+ ### Added
1647
+
1648
+ - Added new export paths for provider models via `./provider-models` and `./provider-models/*`
1649
+ - Added new export paths for Cursor and OpenAI Codex providers via `./providers/cursor/gen/*` and `./providers/openai-codex/*`
1650
+ - Added new export paths for usage utilities via `./usage/*`
1651
+ - Added new export paths for discovery and OAuth utilities via `./utils/discovery` and `./utils/oauth` with subpath exports
1652
+
1653
+ ### Changed
1654
+
1655
+ - Simplified main export path to use wildcard pattern `./src/*.ts` for broader module access
1656
+ - Updated `models.json` export to include TypeScript declaration file at `./src/models.json.d.ts`
1657
+ - Reorganized package.json field ordering for improved readability
1658
+
1659
+ ## [12.17.0] - 2026-02-21
1660
+
1661
+ ### Fixed
1662
+
1663
+ - Cursor provider: bind `execHandlers` when passing handler methods to the exec protocol so handlers receive correct `this` context (fixes "undefined is not an object (evaluating 'this.options')" when using exec tools such as web search with Cursor)
1664
+
1665
+ ## [12.16.0] - 2026-02-21
1666
+
1667
+ ### Added
1668
+
1669
+ - Exported `readModelCache` and `writeModelCache` functions for direct SQLite-backed model cache access
1670
+ - Added `<turn_aborted>` guidance marker as synthetic user message when assistant messages are aborted or errored, informing the model that tools may have partially executed
1671
+ - Added support for Sonnet 4.6 models in adaptive thinking detection
1672
+
1673
+ ### Changed
1674
+
1675
+ - Updated model cache schema version to support improved global model fallback resolution
1676
+ - Improved GitHub Copilot model resolution to prefer provider-specific model definitions over global references when context window is larger, ensuring optimal model capabilities
1677
+ - Migrated model cache from per-provider JSON files to unified SQLite database (models.db) for atomic cross-process access
1678
+ - Renamed `cachePath` option to `cacheDbPath` in ModelManagerOptions to reflect database-backed storage
1679
+ - Improved non-authoritative cache handling with 5-minute retry backoff instead of retrying on every startup
1680
+ - Modified handling of aborted/errored assistant messages to preserve tool call structure instead of converting to text summaries, with synthetic 'aborted' tool results injected
1681
+ - Updated tool call tracking to use status map (Resolved/Aborted) instead of separate sets for better handling of duplicate and aborted tool results
1682
+
1683
+ ## [12.15.0] - 2026-02-20
1684
+
1685
+ ### Fixed
1686
+
1687
+ - Improved error messages for OAuth token refresh failures by including detailed error information from the provider
1688
+ - Separated rate limit and usage limit error handling to provide distinct user-friendly messages for ChatGPT rate limits vs subscription usage limits
1689
+
1690
+ ### Changed
1691
+
1692
+ - Increased SDK retry attempts to 5 for OpenAI, Azure OpenAI, and Anthropic clients (was SDK default of 2)
1693
+ - Changed 429 retry strategy for OpenAI Codex and Google Gemini CLI to use a 5-minute time budget when the server provides a retry delay, instead of a fixed attempt cap
1694
+
1695
+ ## [12.14.0] - 2026-02-19
1696
+
1697
+ ### Added
1698
+
1699
+ - Added `gemini-3.1-pro` model to opencode provider with text and image input support
1700
+ - Added `trinity-large-preview-free` model to opencode provider
1701
+ - Added `google/gemini-3.1-pro-preview` model to nanogpt provider
1702
+ - Added `google/gemini-3.1-pro-preview` model to openrouter provider with text and image input support
1703
+ - Added `gemini-3.1-pro` model to cursor provider
1704
+ - Added optional `intent` field to `ToolCall` interface for harness-level intent metadata
1705
+
1706
+ ### Changed
1707
+
1708
+ - Changed `big-pickle` model API from `openai-completions` to `anthropic-messages`
1709
+ - Changed `big-pickle` model baseUrl from `https://opencode.ai/zen/v1` to `https://opencode.ai/zen`
1710
+ - Changed `minimax-m2.5-free` model API from `openai-completions` to `anthropic-messages`
1711
+ - Changed `minimax-m2.5-free` model baseUrl from `https://opencode.ai/zen/v1` to `https://opencode.ai/zen`
1712
+
1713
+ ### Fixed
1714
+
1715
+ - Fixed tool argument validation to iteratively coerce nested JSON strings across multiple passes, enabling proper handling of deeply nested JSON-serialized objects and arrays
1716
+
1717
+ ## [12.13.0] - 2026-02-19
1718
+
1719
+ ### Added
1720
+
1721
+ - Added NanoGPT provider support with API-key login, dynamic model discovery from `https://nano-gpt.com/api/v1/models`, and text-model filtering for catalog/runtime discovery ([#111](https://github.com/eminent337/aery/issues/111))
1722
+
1723
+ ## [12.12.3] - 2026-02-19
1724
+
1725
+ ### Fixed
1726
+
1727
+ - Fixed retry logic to recognize 'unable to connect' errors as transient failures
1728
+
1729
+ ## [12.11.3] - 2026-02-19
1730
+
1731
+ ### Fixed
1732
+
1733
+ - Fixed OpenAI Codex streaming to fail truncated responses that end without a terminal completion event, preventing partial outputs from being treated as successful completions.
1734
+ - Fixed Codex websocket append fallback by resetting stale turn-state/model-etag session metadata when request shape diverges from appendable history.
1735
+
1736
+ ## [12.11.1] - 2026-02-19
1737
+
1738
+ ### Added
1739
+
1740
+ - Added support for Claude 4.6 Opus and Sonnet models via Cursor API
1741
+ - Added support for Composer 1.5 model via Cursor API
1742
+ - Added support for GPT-5.1 Codex Mini and GPT-5.1 High models via Cursor API
1743
+ - Added support for GPT-5.2 and GPT-5.3 Codex variants (Fast, High, Low, Extra High) via Cursor API
1744
+ - Added HTTP/2 transport support for Cursor API requests (required by Cursor API)
1745
+
1746
+ ### Changed
1747
+
1748
+ - Updated pricing for Claude 3.5 Sonnet model
1749
+ - Updated Claude 3.5 Sonnet context window from 262,144 to 131,072 tokens
1750
+ - Simplified Cursor model display names by removing '(Cursor)' suffix
1751
+ - Changed Cursor API timeout from 15 seconds to 5 seconds
1752
+ - Switched Cursor API transport from HTTP/1.1 to HTTP/2
1753
+
1754
+ ## [12.11.0] - 2026-02-19
1755
+
1756
+ ### Added
1757
+
1758
+ - Added `priority` field to Model interface for provider-assigned model prioritization
1759
+ - Added `CatalogDiscoveryConfig` interface to standardize catalog discovery configuration across providers
1760
+ - Added type guards `isCatalogDescriptor()` and `allowsUnauthenticatedCatalogDiscovery()` for safer descriptor handling
1761
+ - Added `DEFAULT_MODEL_PER_PROVIDER` export from descriptors module for centralized default model management
1762
+ - Support for 11 new AI providers: Cloudflare AI Gateway, Hugging Face Inference, LiteLLM, Moonshot, NVIDIA, Ollama, Qianfan, Qwen Portal, Together, Venice, vLLM, and Xiaomi MiMo
1763
+ - Login flows for new providers with API key validation and OAuth token support
1764
+ - Extended `KnownProvider` type to include all newly supported providers
1765
+ - API key environment variable mappings for all new providers in service provider map
1766
+ - Model discovery and configuration for Cloudflare AI Gateway, Hugging Face, LiteLLM, Moonshot, NVIDIA, Ollama, Qianfan, Qwen Portal, Together, Venice, vLLM, and Xiaomi MiMo
1767
+
1768
+ ### Changed
1769
+
1770
+ - Refactored OAuth credential retrieval to simplify storage lifecycle management in model generation script
1771
+ - Parallelized special model discovery sources (Antigravity, Codex) for improved generation performance
1772
+ - Reorganized model JSON structure to place `contextWindow` and `maxTokens` before `compat` field for consistency
1773
+ - Added `priority` field to OpenAI Codex models for provider-assigned model prioritization
1774
+ - Refactored provider descriptors to use helper functions (`descriptor`, `catalog`, `catalogDescriptor`) for reduced code duplication
1775
+ - Refactored models.dev provider descriptors to use helper functions (`simpleModelsDevDescriptor`, `openAiCompletionsDescriptor`, `anthropicMessagesDescriptor`) for improved maintainability
1776
+ - Unified provider descriptors into single source of truth in `descriptors.ts` for both runtime model discovery and catalog generation, improving maintainability
1777
+ - Refactored model generation script to use declarative `CatalogProviderDescriptor` interface instead of separate descriptor types, reducing code duplication
1778
+ - Reorganized models.dev provider descriptors into logical groups (Bedrock, Core, Coding Plans, Specialized) for better code organization
1779
+ - Simplified API resolution for OpenCode and GitHub Copilot providers using rule-based matching instead of inline conditionals
1780
+ - Refactored model generation script to use declarative provider descriptors instead of inline provider-specific logic, improving maintainability and reducing code duplication
1781
+ - Extracted model post-processing policies (cache pricing corrections, context window normalization) into dedicated `model-policies.ts` module for better testability and clarity
1782
+ - Removed static bundled models for Ollama and vLLM from `models.json` to rely on dynamic discovery instead, reducing static catalog size
1783
+ - Updated `OAuthProvider` type to include new provider identifiers
1784
+ - Expanded model registry (models.json) with thousands of new model entries across all new providers
1785
+ - Modified environment variable resolution to use `$pickenv` for providers with multiple possible env var names
1786
+ - Updated README documentation to list all newly supported providers and their authentication requirements
1787
+
1788
+ ## [12.10.1] - 2026-02-18
1789
+
1790
+ - Added Synthetic provider
1791
+ - Added API-key login helpers for Synthetic and Cerebras providers
1792
+
1793
+ ## [12.10.0] - 2026-02-18
1794
+
1795
+ ### Breaking Changes
1796
+
1797
+ - Renamed public API functions: `getModel()` → `getBundledModel()`, `getModels()` → `getBundledModels()`, `getProviders()` → `getBundledProviders()`
1798
+
1799
+ ### Added
1800
+
1801
+ - Exported `ModelManager` API for runtime-aware model resolution with dynamic endpoint discovery
1802
+ - Exported provider-specific model manager configuration helpers for Google, OpenAI-compatible, Codex, and Cursor providers
1803
+ - Exported discovery utilities for fetching models from Antigravity, Codex, Cursor, Gemini, and OpenAI-compatible endpoints
1804
+ - Added `createModelManager()` function to manage bundled and dynamically discovered models with configurable refresh strategies
1805
+ - Added support for on-disk model caching with TTL-based invalidation
1806
+ - Added `resolveProviderModels()` function for runtime model resolution across multiple providers
1807
+ - Added EU cross-region inference variants for Claude Haiku 3.5 on Bedrock
1808
+ - Added Claude Sonnet 4.6 and Claude Sonnet 4.6 Thinking models to Antigravity provider
1809
+ - Added GLM-5 Free model via OpenCode provider
1810
+ - Added GLM-4.7-FlashX model via ZAI provider
1811
+ - Added MiniMax-M2.5-highspeed model across multiple providers (minimax-code, minimax-code-cn, minimax, minimax-cn)
1812
+ - Added Claude Sonnet 4.6 model to OpenRouter provider
1813
+ - Added Qwen 3.5 Plus model to Vercel AI Gateway provider
1814
+ - Added Claude Sonnet 4.6 model to Vercel AI Gateway provider
1815
+
1816
+ ### Changed
1817
+
1818
+ - Renamed `getModel()` to `getBundledModel()` to clarify it returns compile-time bundled models only
1819
+ - Renamed `getModels()` to `getBundledModels()` for consistency
1820
+ - Renamed `getProviders()` to `getBundledProviders()` for consistency
1821
+ - Refactored model generation script to use modular discovery functions instead of monolithic provider-specific logic
1822
+ - Updated models.json with new model entries and pricing updates across multiple providers
1823
+ - Updated pricing for deepseek/deepseek-v3 model on OpenRouter
1824
+ - Updated maxTokens from 65536 to 4096 for deepseek/deepseek-v3 on OpenRouter
1825
+ - Updated pricing and maxTokens for mistralai/mistral-large-2411 on OpenRouter
1826
+ - Updated pricing for qwen/qwen-max on Together AI
1827
+ - Updated pricing for qwen/qwen-vl-plus on Together AI
1828
+ - Updated pricing for qwen/qwen-plus on Together AI
1829
+ - Updated pricing for qwen/qwen-turbo on Together AI
1830
+ - Expanded EU cross-region inference variant support to all Claude models on Bedrock (previously limited to Haiku, Sonnet, and Opus 4.5)
1831
+
1832
+ ## [12.8.0] - 2026-02-16
1833
+
1834
+ ### Added
1835
+
1836
+ - Added `contextPromotionTarget` model property to specify preferred fallback model when context promotion is triggered
1837
+ - Added automatic context promotion target assignment for Spark models to their base model equivalents
1838
+ - Added support for Brave search provider with BRAVE_API_KEY environment variable
1839
+
1840
+ ### Changed
1841
+
1842
+ - Updated Qwen model context window and max token limits for improved accuracy
1843
+
1844
+ ## [12.7.0] - 2026-02-16
1845
+
1846
+ ### Added
1847
+
1848
+ - Added DeepSeek-V3.2 model support via Amazon Bedrock
1849
+ - Added GLM-5 model support via OpenCode
1850
+ - Added MiniMax M2.5 model support via OpenCode
1851
+
1852
+ ### Changed
1853
+
1854
+ - Updated GLM-4.5, GLM-4.5-Air, GLM-4.5-Flash, GLM-4.5V, GLM-4.6, GLM-4.6V, GLM-4.7, GLM-4.7-Flash, and GLM-5 models to use anthropic-messages API instead of openai-completions
1855
+ - Updated GLM models base URL from https://api.z.ai/api/coding/paas/v4 to https://api.z.ai/api/anthropic
1856
+ - Updated pricing for multiple models including Mistral, Moonshot, and Qwen variants
1857
+ - Updated context window and max tokens for several models to reflect accurate specifications
1858
+
1859
+ ### Removed
1860
+
1861
+ - Removed compat field with supportsDeveloperRole and thinkingFormat properties from GLM models
1862
+
1863
+ ## [12.6.0] - 2026-02-16
1864
+
1865
+ ### Added
1866
+
1867
+ - Added source-scoped custom API and OAuth provider registration helpers for extension-defined providers.
1868
+
1869
+ ### Changed
1870
+
1871
+ - Expanded `Api` typing to allow extension-defined API identifiers while preserving built-in API exhaustiveness checks.
1872
+
1873
+ ### Fixed
1874
+
1875
+ - Fixed custom API registration to reject built-in API identifiers and prevent accidental provider overrides.
1876
+
1877
+ ## [12.2.0] - 2026-02-13
1878
+
1879
+ ### Added
1880
+
1881
+ - Added automatic retry logic for WebSocket stream closures before response completion, with configurable retry budget to improve reliability on flaky connections
1882
+ - Added `providerSessionState` option to enable provider-scoped mutable state persistence across agent turns
1883
+ - Added WebSocket retry logic with configurable retry budget and delay via `PI_CODEX_WEBSOCKET_RETRY_BUDGET` and `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` environment variables
1884
+ - Added WebSocket idle timeout detection via `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` environment variable to fail stalled connections
1885
+ - Added WebSocket v2 beta header support via `PI_CODEX_WEBSOCKET_V2` environment variable for newer OpenAI API versions
1886
+ - Added WebSocket handshake header capture to extract and replay session metadata (turn state, models etag, reasoning flags) across SSE fallback requests
1887
+ - Added `preferWebsockets` option to enable WebSocket transport for OpenAI Codex responses when supported
1888
+ - Added `prewarmOpenAICodexResponses()` function to establish and reuse WebSocket connections across multiple requests
1889
+ - Added `getOpenAICodexTransportDetails()` function to inspect transport layer details including WebSocket status and fallback information
1890
+ - Added `getProviderDetails()` function to retrieve formatted provider configuration and transport information
1891
+ - Added automatic fallback from WebSocket to SSE when connection fails, with transparent retry logic
1892
+ - Added session state management to reuse WebSocket connections and enable request appending across turns
1893
+ - Added support for x-codex-turn-state header to maintain conversation state across SSE requests
1894
+
1895
+ ### Changed
1896
+
1897
+ - Changed WebSocket session state storage from global maps to provider-scoped session state for multi-agent isolation
1898
+ - Changed WebSocket connection initialization to accept idle timeout configuration and handshake header callbacks
1899
+ - Changed WebSocket error handling to use standardized transport error messages with `Codex websocket transport error` prefix
1900
+ - Changed WebSocket retry behavior to retry transient failures before activating sticky fallback, improving reliability on flaky connections
1901
+ - Changed OpenAI Codex model configuration to prefer WebSocket transport by default with `preferWebsockets: true`
1902
+ - Changed header handling to use appropriate OpenAI-Beta header values for WebSocket vs SSE transports
1903
+ - Perplexity OAuth token refresh now uses JWT expiry extraction instead of Socket.IO RPC, improving reliability when server is unreachable
1904
+ - Removed Socket.IO client implementation for Perplexity token refresh; tokens are now validated using embedded JWT expiry claims
1905
+
1906
+ ### Removed
1907
+
1908
+ - Removed `refreshPerplexityToken` export; token refresh is now handled internally via JWT expiry detection
1909
+
1910
+ ### Fixed
1911
+
1912
+ - Fixed WebSocket stream retry logic to properly handle mid-stream connection closures and retry before falling back to SSE transport
1913
+ - Fixed `preferWebsockets` option handling to correctly respect explicit `false` values when determining transport preference
1914
+ - Fixed WebSocket append state not being reset after aborted requests, preventing stale state from affecting subsequent turns
1915
+ - Fixed WebSocket append state not being reset after stream errors, preventing failed append attempts from blocking future requests
1916
+ - Fixed Codex model context window metadata to use 272000 input tokens (instead of 400000 total budget) for non-Spark Codex variants
1917
+
1918
+ ## [12.0.0] - 2026-02-12
1919
+
1920
+ ### Added
1921
+
1922
+ - Added GPT-5.3 Codex Spark model with 128K context window and extended reasoning capabilities
1923
+ - Added MiniMax M2.5 and M2.5 Lightning models via OpenAI-compatible API (minimax-code provider)
1924
+ - Added MiniMax M2.5 and M2.5 Lightning models via OpenAI-compatible API (minimax-code-cn provider for China region)
1925
+ - Added MiniMax M2.5 and M2.5 Lightning models via Anthropic API (minimax and minimax-cn providers)
1926
+ - Added Llama 3.1 8B model via Cerebras API
1927
+ - Added MiniMax M2.5 model via OpenRouter
1928
+ - Added MiniMax M2.5 model via Vercel AI Gateway
1929
+ - Added MiniMax M2.5 Free model via OpenCode
1930
+ - Added Qwen3 VL 32B Instruct multimodal model via OpenRouter
1931
+
1932
+ ### Changed
1933
+
1934
+ - Updated Z.ai GLM-5 pricing and context window configuration on OpenRouter
1935
+ - Updated Qwen3 Max Thinking max tokens from 32768 to 65536 on OpenRouter
1936
+ - Updated OpenAI GPT-5 Image Mini pricing on OpenRouter
1937
+ - Updated OpenAI GPT-5 Pro pricing and context window on OpenRouter
1938
+ - Updated OpenAI o4-mini pricing and context window on OpenRouter
1939
+ - Updated Claude Opus 4.5 Thinking model name formatting (removed parentheses)
1940
+ - Updated Claude Opus 4.6 Thinking model name formatting (removed parentheses)
1941
+ - Updated Claude Sonnet 4.5 Thinking model name formatting (removed parentheses)
1942
+ - Updated Gemini 2.5 Flash Thinking model name formatting (removed parentheses)
1943
+ - Updated Gemini 3 Pro High and Low model name formatting (removed parentheses)
1944
+ - Updated GPT-OSS 120B Medium model name formatting (removed parentheses) and context window to 131072
1945
+
1946
+ ### Removed
1947
+
1948
+ - Removed GLM-5 model from Z.ai provider
1949
+ - Removed Trinity Large Preview Free model from OpenCode provider
1950
+ - Removed MiniMax M2.1 Free model from OpenCode provider
1951
+ - Removed deprecated Anthropic model entries: `claude-3-5-haiku-latest`, `claude-3-5-haiku-20241022`, `claude-3-7-sonnet-20250219`, `claude-3-7-sonnet-latest`, `claude-3-opus-20240229`, `claude-3-sonnet-20240229` ([#33](https://github.com/eminent337/aery/issues/33))
1952
+
1953
+ ### Fixed
1954
+
1955
+ - Added deprecation filter in model generation script to prevent re-adding deprecated Anthropic models ([#33](https://github.com/eminent337/aery/issues/33))
1956
+
1957
+ ## [11.14.1] - 2026-02-12
1958
+
1959
+ ### Added
1960
+
1961
+ - Added prompt-caching-scope-2026-01-05 beta feature support
1962
+
1963
+ ### Changed
1964
+
1965
+ - Updated Claude Code version header to 2.1.39
1966
+ - Updated runtime version header to v24.13.1 and package version to 0.73.0
1967
+ - Increased request timeout from 60s to 600s
1968
+ - Reordered Accept-Encoding header values for compression preference
1969
+ - Updated OAuth authorization and token endpoints to use platform.claude.com
1970
+ - Expanded OAuth scopes to include user:sessions:claude_code and user:mcp_servers
1971
+
1972
+ ### Removed
1973
+
1974
+ - Removed claude-code-20250219 beta feature from default models
1975
+ - Removed fine-grained-tool-streaming-2025-05-14 beta feature
1976
+
1977
+ ## [11.13.1] - 2026-02-12
1978
+
1979
+ ### Added
1980
+
1981
+ - Added Perplexity (Pro/Max) OAuth login support via native macOS app extraction or email OTP authentication
1982
+ - Added `loginPerplexity` and `refreshPerplexityToken` functions for Perplexity account integration
1983
+ - Added Socket.IO v4 client implementation for authenticated WebSocket communication with Perplexity API
1984
+
1985
+ ## [11.12.0] - 2026-02-11
1986
+
1987
+ ### Changed
1988
+
1989
+ - Increased maximum retry attempts for Codex requests from 2 to 5 to improve reliability on transient failures
1990
+
1991
+ ### Fixed
1992
+
1993
+ - Fixed tool result content handling in Anthropic provider to provide fallback error message when content is empty
1994
+ - Improved retry delay calculation to parse delay values from error response bodies (e.g., 'Please try again in 225ms')
1995
+
1996
+ ## [11.11.0] - 2026-02-10
1997
+
1998
+ ### Breaking Changes
1999
+
2000
+ - Replaced `./models.generated` export with `./models.json` - update imports from `import { MODELS } from './models.generated'` to `import MODELS from './models.json' with { type: 'json' }`
2001
+
2002
+ ### Added
2003
+
2004
+ - Added TypeScript type declarations for `models.json` to enable proper type inference when importing the JSON file
2005
+
2006
+ ### Changed
2007
+
2008
+ - Updated available models in google-antigravity provider with new model variants and updated context window/token limits
2009
+ - Simplified type signatures for `getModel()` and `getModels()` functions for improved usability
2010
+ - Changed models export from TypeScript module to JSON format for improved performance and reduced bundle size
2011
+ - Updated `@anthropic-ai/sdk` dependency from ^0.72.1 to ^0.74.0
2012
+
2013
+ ## [11.10.0] - 2026-02-10
2014
+
2015
+ ### Added
2016
+
2017
+ - Added support for Kimi K2, K2 Turbo Preview, and K2.5 models with reasoning capabilities
2018
+
2019
+ ### Fixed
2020
+
2021
+ - Fixed Claude Opus 4.6 context window to 200K across all providers (was incorrectly set to 1M)
2022
+ - Fixed Claude Sonnet 4 context window to 200K across multiple providers (was incorrectly set to 1M)
2023
+
2024
+ ## [11.8.0] - 2026-02-10
2025
+
2026
+ ### Added
2027
+
2028
+ - Added `auto` model alias for OpenRouter with automatic model routing
2029
+ - Added `openrouter/aurora-alpha` model with reasoning capabilities
2030
+ - Added `qwen/qwen3-max-thinking` model with extended context window support
2031
+ - Added support for `parametersJsonSchema` in Google Gemini tool definitions for improved JSON Schema compatibility
2032
+
2033
+ ### Changed
2034
+
2035
+ - Updated Claude Sonnet 4 and 4.5 context window from 1M to 200K tokens to reflect actual limits
2036
+ - Updated Claude Opus 4.6 context window to 200K tokens across providers
2037
+ - Changed default `reasoningSummary` for OpenAI Codex from `undefined` to `auto`
2038
+ - Updated Qwen model pricing and context window specifications across multiple variants
2039
+ - Modified Google Gemini CLI system instruction to use compact format
2040
+ - Changed tool parameter handling for Claude models on Google Cloud Code Assist to use legacy `parameters` field for API translation
2041
+
2042
+ ### Removed
2043
+
2044
+ - Removed `glm-4.7-free` model from OpenCode provider
2045
+ - Removed `qwen3-coder` model from OpenCode provider
2046
+ - Removed `ai21/jamba-mini-1.7` model from OpenRouter
2047
+ - Removed `stepfun-ai/step3` model from OpenRouter
2048
+ - Removed duplicate test suite for Google Antigravity Provider with `gemini-3-pro-high`
2049
+
2050
+ ### Fixed
2051
+
2052
+ - Fixed Amazon Bedrock HTTP/1.1 handler import to use direct import instead of dynamic import
2053
+ - Fixed Qwen model context window and pricing inconsistencies across OpenRouter
2054
+ - Fixed cache read pricing for multiple Qwen models
2055
+ - Fixed OpenAI Codex reasoning effort clamping for `gpt-5.3-codex` model
2056
+
2057
+ ## [11.7.1] - 2026-02-07
2058
+
2059
+ ### Added
2060
+
2061
+ - Added Claude Opus 4.6 Thinking model for Antigravity provider
2062
+ - Added Gemini 2.5 Flash, Gemini 2.5 Flash Thinking, and Gemini 2.5 Pro models for Antigravity provider
2063
+ - Added Pony Alpha model via OpenRouter
2064
+
2065
+ ### Changed
2066
+
2067
+ - Updated Antigravity models to use free tier pricing (0 cost) across all models
2068
+ - Changed Antigravity model fetching to dynamically load from API when credentials are available, with hardcoded fallback models
2069
+ - Updated Claude Opus 4.6 context window from 200,000 to 1,000,000 tokens across Bedrock regions
2070
+ - Updated Claude Opus 4.6 cache pricing from 1.5/18.75 to 0.5/6.25 for EU and US regions
2071
+ - Updated Antigravity model pricing to free tier (0 cost) for Claude Opus 4.5 Thinking, Claude Sonnet 4.5 Thinking, Gemini 3 Flash, Gemini 3 Pro variants, and GPT-OSS 120B Medium
2072
+ - Updated GPT-OSS 120B Medium reasoning capability from false to true
2073
+ - Updated Gemini 3 Flash max tokens from 65,535 to 65,536
2074
+ - Updated Claude Opus 4.5 Thinking display name formatting to include parentheses
2075
+ - Updated various model pricing and context window parameters across OpenRouter and other providers
2076
+ - Removed Claude Opus 4.6 20260205 model from Anthropic provider
2077
+
2078
+ ### Fixed
2079
+
2080
+ - Fixed Claude Opus 4.6 model ID format by removing version suffix (:0) in Bedrock configurations
2081
+ - Fixed Llama 3.1 70B Instruct pricing and context window parameters
2082
+ - Fixed Mistral model pricing and cache read costs
2083
+ - Fixed DeepSeek and other model pricing inconsistencies
2084
+ - Fixed Qwen model pricing and token limits
2085
+ - Fixed GLM model pricing and context window specifications
2086
+
2087
+ ## [11.6.0] - 2026-02-07
2088
+
2089
+ ### Added
2090
+
2091
+ - Added Bedrock cache retention support with `PI_CACHE_RETENTION` env var and per-request `cacheRetention` option
2092
+ - Added adaptive thinking support for Bedrock Opus 4.6+ models
2093
+ - Added `AWS_BEDROCK_SKIP_AUTH` env var to support unauthenticated Bedrock proxies
2094
+ - Added `AWS_BEDROCK_FORCE_HTTP1` env var to force HTTP/1.1 for custom Bedrock endpoints
2095
+ - Re-exported `Static`, `TSchema`, and `Type` from `@sinclair/typebox`
2096
+
2097
+ ### Fixed
2098
+
2099
+ - Fixed OpenAI Responses storage disabled by default (`store: false`)
2100
+ - Fixed reasoning effort clamping for gpt-5.3 Codex models (minimal -> low)
2101
+ - Fixed Bedrock `supportsPromptCaching` to also check model cost fields
2102
+
2103
+ ## [11.5.1] - 2026-02-07
2104
+
2105
+ ### Fixed
2106
+
2107
+ - Fixed schema normalization to handle array-valued `type` fields by converting them to a single type with nullable flag for Google provider compatibility
2108
+
2109
+ ## [11.3.0] - 2026-02-06
2110
+
2111
+ ### Added
2112
+
2113
+ - Added `cacheRetention` option to control prompt cache retention preference ('none', 'short', 'long') across providers
2114
+ - Added `maxRetryDelayMs` option to cap server-requested retry delays and fail fast when delays exceed the limit
2115
+ - Added `effort` option for Anthropic Opus 4.6+ models to control adaptive thinking effort levels ('low', 'medium', 'high', 'max')
2116
+ - Added support for Anthropic Opus 4.6+ adaptive thinking mode that lets Claude decide when and how much to think
2117
+ - Added `PI_AI_ANTIGRAVITY_VERSION` environment variable to customize Antigravity sandbox endpoint version
2118
+ - Exported `convertAnthropicMessages` function for converting message formats to Anthropic API
2119
+ - Automatic fallback for Anthropic assistant-prefill requests: appends synthetic user "Continue." message when conversation ends with assistant turn to maintain API compatibility
2120
+
2121
+ ### Changed
2122
+
2123
+ - Changed `supportsXhigh()` to include GPT-5.1 Codex Max and broaden Anthropic support to all Anthropic Messages API models with budget-based thinking capability
2124
+ - Changed Anthropic thinking mode to use adaptive thinking for Opus 4.6+ models instead of budget-based thinking
2125
+ - Changed `supportsXhigh()` to support GPT-5.2/5.3 and Anthropic Opus 4.6+ models with adaptive thinking
2126
+ - Changed prompt caching to respect `cacheRetention` option and support TTL configuration for Anthropic
2127
+ - Changed OpenAI tool definitions to conditionally include `strict` field only when provider supports it
2128
+ - Changed Qwen model support to use `enable_thinking` boolean parameter instead of OpenAI-style reasoning_effort
2129
+
2130
+ ### Fixed
2131
+
2132
+ - Fixed indentation and formatting in `convertAnthropicMessages` function
2133
+ - Fixed handling of conversations ending with assistant messages on Anthropic-routed models that reject assistant prefill requests
2134
+
2135
+ ## [11.2.3] - 2026-02-05
2136
+
2137
+ ### Added
2138
+
2139
+ - Added Claude Opus 4.6 model support across multiple providers (Anthropic, Amazon Bedrock, GitHub Copilot, OpenRouter, OpenCode, Vercel AI Gateway)
2140
+ - Added GPT-5.3 Codex model support for OpenAI
2141
+ - Added `readSseJson` utility import for improved SSE stream handling in Google Gemini CLI provider
2142
+
2143
+ ### Changed
2144
+
2145
+ - Updated Google Gemini CLI provider to use `readSseJson` utility for cleaner SSE stream parsing
2146
+ - Updated pricing for Llama 3.1 405B model on Vercel AI Gateway (cache read rate adjusted)
2147
+ - Updated Llama 3.1 405B context window and max tokens on Vercel AI Gateway (256000 for both)
2148
+
2149
+ ### Removed
2150
+
2151
+ - Removed Kimi K2, Kimi K2 Turbo Preview, and Kimi K2.5 models
2152
+ - Removed Deep Cogito Cogito V2 Preview models from OpenRouter
2153
+
2154
+ ## [11.0.0] - 2026-02-05
2155
+
2156
+ ### Changed
2157
+
2158
+ - Replaced direct `Bun.env` access with `getEnv()` utility from `@aryee337/aery-utils` for consistent environment variable handling across all providers
2159
+ - Updated environment variable names from `AERY_*` prefix to `PI_*` prefix for consistency (e.g., `AERY_CODING_AGENT_DIR` → `PI_CODING_AGENT_DIR`)
2160
+
2161
+ ### Removed
2162
+
2163
+ - Removed automatic environment variable migration from `PI_*` to `AERY_*` prefixes via `migrate-env.ts` module
2164
+
2165
+ ## [10.5.0] - 2026-02-04
2166
+
2167
+ ### Changed
2168
+
2169
+ - Updated @anthropic-ai/sdk to ^0.72.1
2170
+ - Updated @aws-sdk/client-bedrock-runtime to ^3.982.0
2171
+ - Updated @google/genai to ^1.39.0
2172
+ - Updated @smithy/node-http-handler to ^4.4.9
2173
+ - Updated openai to ^6.17.0
2174
+ - Updated @types/node to ^25.2.0
2175
+
2176
+ ### Removed
2177
+
2178
+ - Removed proxy-agent dependency
2179
+ - Removed undici dependency
2180
+
2181
+ ## [9.4.0] - 2026-01-31
2182
+
2183
+ ### Added
2184
+
2185
+ - Added `getEnv()` function to retrieve environment variables from Bun.env, cwd/.env, or ~/.env
2186
+ - Added support for reading .env files from home directory and current working directory
2187
+ - Added support for `exa` and `perplexity` as known providers in `getEnvApiKey()`
2188
+
2189
+ ### Changed
2190
+
2191
+ - Changed `getEnvApiKey()` to check Bun.env, cwd/.env, and ~/.env files in order of precedence
2192
+ - Refactored provider API key resolution to use a declarative service provider map
2193
+
2194
+ ## [9.2.2] - 2026-01-31
2195
+
2196
+ ### Added
2197
+
2198
+ - Added OpenCode Zen provider with API key authentication for accessing multiple AI models
2199
+ - Added 4 new free models via OpenCode: glm-4.7-free, kimi-k2.5-free, minimax-m2.1-free, trinity-large-preview-free
2200
+ - Added glm-4.7-flash model via Zai provider
2201
+ - Added Kimi Code provider with OpenAI and Anthropic API format support
2202
+ - Added prompt cache retention support with PI_CACHE_RETENTION env var
2203
+ - Added overflow patterns for Bedrock, MiniMax, Kimi; reclassified 429 as rate limiting
2204
+ - Added profile endpoint integration to resolve user emails with 24-hour caching
2205
+ - Added automatic token refresh for expired Kimi OAuth credentials
2206
+ - Added Kimi Code OAuth handler with device authorization flow
2207
+ - Added Kimi Code usage provider with quota caching
2208
+ - Added 4 new Kimi Code models (kimi-for-coding, kimi-k2, kimi-k2-turbo-preview, kimi-k2.5)
2209
+ - Added Kimi Code provider integration with OAuth and token management
2210
+ - Added tool-choice utility for mapping unified ToolChoice to provider-specific formats
2211
+ - Added ToolChoice type for controlling tool selection (auto, none, any, required, function)
2212
+
2213
+ ### Changed
2214
+
2215
+ - Updated Kimi K2.5 cache read pricing from 0.1 to 0.08
2216
+ - Updated MiniMax M2 pricing: input 0.6→0.6, output 3→3, cache read 0.1→0.09999999999999999
2217
+ - Updated OpenRouter DeepSeek V3.1 pricing and max tokens: input 0.6→0.5, output 3→2.8, maxTokens 262144→4096
2218
+ - Updated OpenRouter DeepSeek R1 pricing and max tokens: input 0.06→0.049999999999999996, output 0.24→0.19999999999999998, maxTokens 262144→4096
2219
+ - Updated Anthropic Claude 3.5 Sonnet max tokens from 256000 to 65536 on OpenRouter
2220
+ - Updated Vercel AI Gateway Claude 3.5 Sonnet cache read pricing from 0.125 to 0.13
2221
+ - Updated Vercel AI Gateway Claude 3.5 Sonnet New cache read pricing from 0.125 to 0.13
2222
+ - Updated Vercel AI Gateway GPT-5.2 cache read pricing from 0.175 to 0.18 and display name to 'GPT 5.2'
2223
+ - Updated Zai GLM-4.6 cache read pricing from 0.024999999999999998 to 0.03
2224
+ - Updated Zai Qwen QwQ max tokens from 66000 to 16384
2225
+ - Added delta event batching and throttling (50ms, 20 updates/sec max) to AssistantMessageEventStream
2226
+ - Updated MiniMax-M2 pricing: input 1.2→0.6, output 1.2→3, cacheRead 0.6→0.1
2227
+
2228
+ ### Removed
2229
+
2230
+ - Removed OpenRouter google/gemini-2.0-flash-exp:free model
2231
+ - Removed Vercel AI Gateway stealth/sonoma-dusk-alpha and stealth/sonoma-sky-alpha models
2232
+
2233
+ ### Fixed
2234
+
2235
+ - Fixed rate limit issues with Kimi models by always sending max_tokens
2236
+ - Added handling for sensitive stop reason from Anthropic API safety filters
2237
+ - Added optional chaining for safer JSON schema property access in Anthropic provider
2238
+
2239
+ ## [8.6.0] - 2026-01-27
2240
+
2241
+ ### Changed
2242
+
2243
+ - Replaced JSON5 dependency with Bun.JSON5 parsing
2244
+
2245
+ ### Fixed
2246
+
2247
+ - Filtered empty user text blocks for OpenAI-compatible completions and normalized Kimi reasoning_content for OpenRouter tool-call messages
2248
+
2249
+ ## [8.4.0] - 2026-01-25
2250
+
2251
+ ### Added
2252
+
2253
+ - Added Azure OpenAI Responses provider with deployment mapping and resource-based base URL support
2254
+
2255
+ ### Changed
2256
+
2257
+ - Added OpenRouter routing preferences for OpenAI-compatible completions
2258
+
2259
+ ### Fixed
2260
+
2261
+ - Defaulted Google tool call arguments to empty objects when providers omit args
2262
+ - Guarded Responses/Codex streaming deltas against missing content parts and handled arguments.done events
2263
+
2264
+ ## [8.2.1] - 2026-01-24
2265
+
2266
+ ### Fixed
2267
+
2268
+ - Fixed handling of streaming function call arguments in OpenAI responses to properly parse arguments when sent via `response.function_call_arguments.done` events
2269
+
2270
+ ## [8.2.0] - 2026-01-24
2271
+
2272
+ ### Changed
2273
+
2274
+ - Migrated node module imports from named to namespace imports across all packages for consistency with project guidelines
2275
+
2276
+ ## [8.0.0] - 2026-01-23
2277
+
2278
+ ### Fixed
2279
+
2280
+ - Fixed OpenAI Responses API 400 error "function_call without required reasoning item" when switching between models (same provider, different model). The fix omits the `id` field for function_calls from different models to avoid triggering OpenAI's reasoning/function_call pairing validation
2281
+ - Fixed 400 errors when reading multiple images via GitHub Copilot's Claude models. Claude requires tool_use -> tool_result adjacency with no user messages interleaved. Images from consecutive tool results are now batched into a single user message
2282
+
2283
+ ## [7.0.0] - 2026-01-21
2284
+
2285
+ ### Added
2286
+
2287
+ - Added usage tracking system with normalized schema for provider quota/limit endpoints
2288
+ - Added Claude usage provider for 5-hour and 7-day quota windows
2289
+ - Added GitHub Copilot usage provider for chat, completions, and premium requests
2290
+ - Added Google Antigravity usage provider for model quota tracking
2291
+ - Added Google Gemini CLI usage provider for tier-based quota monitoring
2292
+ - Added OpenAI Codex usage provider for primary and secondary rate limit windows
2293
+ - Added ZAI usage provider for token and request quota tracking
2294
+
2295
+ ### Changed
2296
+
2297
+ - Updated Claude usage provider to extract account identifiers from response headers
2298
+ - Updated GitHub Copilot usage provider to include account identifiers in usage reports
2299
+ - Updated Google Gemini CLI usage provider to handle missing reset time gracefully
2300
+
2301
+ ### Fixed
2302
+
2303
+ - Fixed GitHub Copilot usage provider to simplify token handling and improve reliability
2304
+ - Fixed GitHub Copilot usage provider to properly resolve account identifiers for OAuth credentials
2305
+ - Fixed API validation errors when sending empty user messages (resume with `.`) across all providers:
2306
+ - Google Cloud Code Assist (google-shared.ts)
2307
+ - OpenAI Responses API (openai-responses.ts)
2308
+ - OpenAI Codex Responses API (openai-codex-responses.ts)
2309
+ - Cursor (cursor.ts)
2310
+ - Amazon Bedrock (amazon-bedrock.ts)
2311
+ - Clamped OpenAI Codex reasoning effort "minimal" to "low" for gpt-5.2 models to avoid API errors
2312
+ - Fixed GitHub Copilot usage fallback to internal quota endpoints when billing usage is unavailable
2313
+ - Fixed GitHub Copilot usage metadata to include account identifiers for report dedupe
2314
+ - Fixed Anthropic usage metadata extraction to include account identifiers when provided by the usage endpoint
2315
+ - Fixed Gemini CLI usage windows to consistently label quota windows for display suppression
2316
+
2317
+ ## [6.9.69] - 2026-01-21
2318
+
2319
+ ### Added
2320
+
2321
+ - Added duration and time-to-first-token (ttft) metrics to all AI provider responses
2322
+ - Added performance tracking for streaming responses across all providers
2323
+
2324
+ ## [6.9.0] - 2026-01-21
2325
+
2326
+ ### Removed
2327
+
2328
+ - Removed openai-codex provider exports from main package index
2329
+ - Removed openai-codex prompt utilities and moved them inline
2330
+ - Removed vitest configuration file
2331
+
2332
+ ## [6.8.4] - 2026-01-21
2333
+
2334
+ ### Changed
2335
+
2336
+ - Updated prompt caching strategy to follow Anthropic's recommended hierarchy
2337
+ - Fixed token usage tracking to properly handle cumulative output tokens from message_delta events
2338
+ - Improved message validation to filter out empty or invalid content blocks
2339
+ - Increased OAuth callback timeout from 120 seconds to 120,000 milliseconds
2340
+
2341
+ ## [6.8.3] - 2026-01-21
2342
+
2343
+ ### Added
2344
+
2345
+ - Added `headers` option to all providers for custom request headers
2346
+ - Added `onPayload` hook to observe provider request payloads before sending
2347
+ - Added `strictResponsesPairing` option for Azure OpenAI Responses API compatibility
2348
+ - Added `originator` option to `loginOpenAICodex` for custom OAuth flow identification
2349
+ - Added per-request `headers` and `onPayload` hooks to `StreamOptions`
2350
+ - Added `originator` option to `loginOpenAICodex`
2351
+
2352
+ ### Fixed
2353
+
2354
+ - Fixed tool call ID normalization for OpenAI Responses API cross-provider handoffs
2355
+ - Skipped errored or aborted assistant messages during cross-provider transforms
2356
+ - Detected AWS ECS/IRSA credentials for Bedrock authentication checks
2357
+ - Detected AWS ECS/IRSA credentials for Bedrock authentication checks
2358
+ - Normalized Responses API tool call IDs during handoffs and refreshed handoff tests
2359
+ - Enforced strict tool call/result pairing for Azure OpenAI Responses API
2360
+ - Skipped errored or aborted assistant messages during cross-provider transforms
2361
+
2362
+ ### Security
2363
+
2364
+ - Enhanced AWS credential detection to support ECS task roles and IRSA web identity tokens
2365
+
2366
+ ## [6.8.2] - 2026-01-21
2367
+
2368
+ ### Fixed
2369
+
2370
+ - Improved error handling for aborted requests in Google Gemini CLI provider
2371
+ - Enhanced OAuth callback flow to handle manual input errors gracefully
2372
+ - Fixed login cancellation handling in GitHub Copilot OAuth flow
2373
+ - Removed fallback manual input from OpenAI Codex OAuth flow
2374
+
2375
+ ### Security
2376
+
2377
+ - Hardened database file permissions to prevent credential leakage
2378
+ - Set secure directory permissions (0o700) for credential storage
2379
+
2380
+ ## [6.8.0] - 2026-01-20
2381
+
2382
+ ### Added
2383
+
2384
+ - Added `logout` command to CLI for OAuth provider logout
2385
+ - Added `status` command to show logged-in providers and token expiry
2386
+ - Added persistent credential storage using SQLite database
2387
+ - Added OAuth callback server with automatic port fallback
2388
+ - Added HTML callback page with success/error states
2389
+ - Added support for Cursor OAuth provider
2390
+
2391
+ ### Changed
2392
+
2393
+ - Updated Promise.withResolvers usage for better compatibility
2394
+ - Replaced custom sleep implementations with Bun.sleep and abortableSleep
2395
+ - Simplified SSE stream parsing using readLines utility
2396
+ - Updated test framework from vitest to bun:test
2397
+ - Replaced temp directory creation with TempDir API
2398
+ - Changed credential storage from auth.json to ~/.aery/agent/agent.db
2399
+ - Changed CLI command examples from npx to bunx
2400
+ - Refactored OAuth flows to use common callback server base class
2401
+ - Updated OAuth provider interfaces to use controller pattern
2402
+
2403
+ ### Fixed
2404
+
2405
+ - Fixed OAuth callback handling with improved error states
2406
+ - Fixed token refresh for all OAuth providers
2407
+
2408
+ ## [6.7.670] - 2026-01-19
2409
+
2410
+ ### Changed
2411
+
2412
+ - Updated Claude Code compatibility headers and version
2413
+ - Improved OAuth token handling with proper state generation
2414
+ - Enhanced cache control for tool and user message blocks
2415
+ - Simplified tool name prefixing for OAuth traffic
2416
+ - Updated PKCE verifier generation for better security
2417
+
2418
+ ## [5.7.67] - 2026-01-18
2419
+
2420
+ ### Fixed
2421
+
2422
+ - Added error handling for unknown OAuth providers
2423
+
2424
+ ## [5.6.77] - 2026-01-18
2425
+
2426
+ ### Fixed
2427
+
2428
+ - Prevented duplicate tool results for errored or aborted messages when results already exist
2429
+
2430
+ ## [5.6.7] - 2026-01-18
2431
+
2432
+ ### Added
2433
+
2434
+ - Added automatic retry logic for OpenAI Codex responses with configurable delay and max retries
2435
+ - Added tool call ID sanitization for Amazon Bedrock to ensure valid characters
2436
+ - Added tool argument validation that coerces JSON-encoded strings for expected non-string types
2437
+
2438
+ ### Changed
2439
+
2440
+ - Updated environment variable prefix from PI* to AERY* for better consistency
2441
+ - Added automatic migration for legacy PI* environment variables to AERY* equivalents
2442
+ - Adjusted Bedrock Claude thinking budgets to reserve output tokens when maxTokens is too low
2443
+
2444
+ ### Fixed
2445
+
2446
+ - Fixed orphaned tool call handling to ensure proper tool_use/tool_result pairing for all assistant messages
2447
+ - Fixed message transformation to insert synthetic tool results for errored/aborted assistant messages with tool calls
2448
+ - Fixed tool prefix handling in Claude provider to use case-insensitive comparison
2449
+ - Fixed Gemini 3 model handling to treat unsigned tool calls as context-only with anti-mimicry context
2450
+ - Fixed message transformation to filter out empty error messages from conversation history
2451
+ - Fixed OpenAI completions provider compatibility detection to use provider metadata
2452
+ - Fixed OpenAI completions provider to avoid using developer role for opencode provider
2453
+ - Fixed orphaned tool call handling to skip synthetic results for errored assistant messages
2454
+
2455
+ ## [5.5.0] - 2026-01-18
2456
+
2457
+ ### Changed
2458
+
2459
+ - Updated User-Agent header from 'opencode' to 'aery' for OpenAI Codex requests
2460
+ - Simplified Codex system prompt instructions
2461
+ - Removed bridge text override from Codex system prompt builder
2462
+
2463
+ ## [5.3.0] - 2026-01-15
2464
+
2465
+ ### Changed
2466
+
2467
+ - Replaced detailed Codex system instructions with simplified aery assistant instructions
2468
+ - Updated internal documentation references to use aery-internal:// protocol
2469
+
2470
+ ## [5.1.0] - 2026-01-14
2471
+
2472
+ ### Added
2473
+
2474
+ - Added Amazon Bedrock provider with `bedrock-converse-stream` API for Claude models via AWS
2475
+ - Added MiniMax provider with OpenAI-compatible API
2476
+ - Added EU cross-region inference model variants for Claude models on Bedrock
2477
+
2478
+ ### Fixed
2479
+
2480
+ - Fixed Gemini CLI provider retries with proper error handling, retry delays from headers, and empty stream retry logic
2481
+ - Fixed numbered list items showing "1." for all items when code blocks break list continuity (via `start` property)
2482
+
2483
+ ## [5.0.0] - 2026-01-12
2484
+
2485
+ ### Added
2486
+
2487
+ - Added support for `xhigh` thinking level in `thinkingBudgets` configuration
2488
+
2489
+ ### Changed
2490
+
2491
+ - Changed Anthropic thinking token budgets: minimal (1024→3072), low (2048→6144), medium (8192→12288), high (16384→24576)
2492
+ - Changed Google thinking token budgets: minimal (1024), low (2048→4096), medium (8192), high (16384), xhigh (24575)
2493
+ - Changed `supportsXhigh()` to return true for all Anthropic models
2494
+
2495
+ ## [4.6.0] - 2026-01-12
2496
+
2497
+ ### Fixed
2498
+
2499
+ - Fixed incorrect classification of thought signatures in Google Gemini responses—thought signatures are now correctly treated as metadata rather than thinking content indicators
2500
+ - Fixed thought signature handling in Google Gemini CLI and Vertex AI streaming to properly preserve signatures across text deltas
2501
+ - Fixed Google schema sanitization stripping property names that match schema keywords (e.g., "pattern", "format") from tool definitions
2502
+
2503
+ ## [4.4.9] - 2026-01-12
2504
+
2505
+ ### Fixed
2506
+
2507
+ - Fixed Google provider schema sanitization to strip additional unsupported JSON Schema fields (patternProperties, additionalProperties, min/max constraints, pattern, format)
2508
+
2509
+ ## [4.4.8] - 2026-01-12
2510
+
2511
+ ### Fixed
2512
+
2513
+ - Fixed Google provider schema sanitization to properly collapse `anyOf`/`oneOf` with const values into enum arrays
2514
+ - Fixed const-to-enum conversion to infer type from the const value when type is not specified
2515
+
2516
+ ## [4.4.6] - 2026-01-11
2517
+
2518
+ ### Fixed
2519
+
2520
+ - Fixed tool parameter schema sanitization to only apply Google-specific transformations for Gemini models, preserving original schemas for other model types
2521
+
2522
+ ## [4.4.5] - 2026-01-11
2523
+
2524
+ ### Changed
2525
+
2526
+ - Exported `sanitizeSchemaForGoogle` utility function for external use
2527
+
2528
+ ### Fixed
2529
+
2530
+ - Fixed Google provider schema sanitization to strip additional unsupported JSON Schema fields ($schema, $ref, $defs, format, examples, and others)
2531
+ - Fixed Google provider to ignore `additionalProperties: false` which is unsupported by the API
2532
+
2533
+ ## [4.4.4] - 2026-01-11
2534
+
2535
+ ### Fixed
2536
+
2537
+ - Fixed Cursor todo updates to bridge update_todos tool calls to the local todo_write tool
2538
+
2539
+ ## [4.3.0] - 2026-01-11
2540
+
2541
+ ### Added
2542
+
2543
+ - Added debug log filtering and display script for Cursor JSONL logs with follow mode and coalescing support
2544
+ - Added protobuf definition extractor script to reconstruct .proto files from bundled JavaScript
2545
+ - Added conversation state caching to persist context across multiple Cursor API requests in the same session
2546
+ - Added shell streaming support for real-time stdout/stderr output during command execution
2547
+ - Added JSON5 parsing for MCP tool arguments with Python-style boolean and None value normalization
2548
+ - Added Cursor provider with support for Claude, GPT, and Gemini models via Cursor's agent API
2549
+ - Added OAuth authentication flow for Cursor including login, token refresh, and expiry detection
2550
+ - Added `cursor-agent` API type with streaming support and tool execution handlers
2551
+ - Added Cursor model definitions including Claude 4.5, GPT-5.x, Gemini 3, and Grok variants
2552
+ - Added model generation script to automatically fetch and update AI model definitions from models.dev and OpenRouter APIs
2553
+
2554
+ ### Changed
2555
+
2556
+ - Changed Cursor debug logging to use structured JSONL format with automatic MCP argument decoding
2557
+ - Changed MCP tool argument decoding to use protobuf Value schema for improved type handling
2558
+ - Changed tool advertisement to filter Cursor native tools (bash, read, write, delete, ls, grep, lsp) instead of only exposing mcp\_ prefixed tools
2559
+
2560
+ ### Fixed
2561
+
2562
+ - Fixed Cursor conversation history serialization so subagents retain task context and can call complete
2563
+
2564
+ ## [4.2.1] - 2026-01-11
2565
+
2566
+ ### Changed
2567
+
2568
+ - Updated `reasoningSummary` option to accept only `"auto"`, `"concise"`, `"detailed"`, or `null` (removed `"off"` and `"on"` values)
2569
+ - Changed default `reasoningSummary` from `"auto"` to `"detailed"`
2570
+ - OpenAI Codex: switched to bundled system prompt matching opencode, changed originator to "opencode", simplified prompt handling
2571
+
2572
+ ### Fixed
2573
+
2574
+ - Fixed Cloud Code Assist tool schema conversion to avoid unsupported `const` fields
2575
+
2576
+ ## [4.0.0] - 2026-01-10
2577
+
2578
+ ### Added
2579
+
2580
+ - Added `betas` option in `AnthropicOptions` for passing custom Anthropic beta feature flags
2581
+ - OpenCode Zen provider support with 26 models (Claude, GPT, Gemini, Grok, Kimi, GLM, Qwen, etc.). Set `OPENCODE_API_KEY` env var to use.
2582
+ - `thinkingBudgets` option in `SimpleStreamOptions` for customizing token budgets per thinking level on token-based providers
2583
+ - `sessionId` option in `StreamOptions` for providers that support session-based caching. OpenAI Codex provider uses this to set `prompt_cache_key` and routing headers.
2584
+ - `supportsUsageInStreaming` compatibility flag for OpenAI-compatible providers that reject `stream_options: { include_usage: true }`. Defaults to `true`. Set to `false` in model config for providers like gatewayz.ai.
2585
+ - `GOOGLE_APPLICATION_CREDENTIALS` env var support for Vertex AI credential detection (standard for CI/production)
2586
+ - Exported OpenAI Codex utilities: `CacheMetadata`, `getCodexInstructions`, `getModelFamily`, `ModelFamily`, `buildCodexPiBridge`, `buildCodexSystemPrompt`, `CodexSystemPrompt`
2587
+ - Headless OAuth support for all callback-server providers (Google Gemini CLI, Antigravity, OpenAI Codex): paste redirect URL when browser callback is unreachable
2588
+ - Cancellable GitHub Copilot device code polling via AbortSignal
2589
+ - Improved error messages for OpenRouter providers by including raw metadata from upstream errors
2590
+
2591
+ ### Changed
2592
+
2593
+ - Changed Anthropic provider to include Claude Code system instruction for all API key types, not just OAuth tokens (except Haiku models)
2594
+ - Changed Anthropic OAuth tool naming to use `proxy_` prefix instead of mapping to Claude Code tool names, avoiding potential name collisions
2595
+ - Changed Anthropic provider to include Claude Code headers for all requests, not just OAuth tokens
2596
+ - Anthropic provider now maps tool names to Claude Code's exact tool names (Read, Write, Edit, Bash, Grep, Glob) instead of using prefixed names
2597
+ - OpenAI Completions provider now disables strict mode on tools to allow optional parameters without null unions
2598
+
2599
+ ### Fixed
2600
+
2601
+ - Fixed Anthropic OAuth code parsing to accept full redirect URLs in addition to raw authorization codes
2602
+ - Fixed Anthropic token refresh to preserve existing refresh token when server doesn't return a new one
2603
+ - Fixed thinking mode being enabled when tool_choice forces a specific tool, which is unsupported
2604
+ - Fixed max_tokens being too low when thinking budget is set, now auto-adjusts to model's maxTokens
2605
+ - Google Cloud Code Assist OAuth for paid subscriptions: properly handles long-running operations for project provisioning, supports `GOOGLE_CLOUD_PROJECT` / `GOOGLE_CLOUD_PROJECT_ID` env vars for paid tiers
2606
+ - `os.homedir()` calls at module load time; now resolved lazily when needed
2607
+ - OpenAI Responses tool strict flag to use a boolean for LM Studio compatibility
2608
+ - Gemini CLI abort handling: detect native `AbortError` in retry catch block, cancel SSE reader when abort signal fires
2609
+ - Antigravity provider 429 errors by aligning request payload with CLIProxyAPI v6.6.89
2610
+ - Thinking block handling for cross-model conversations: thinking blocks are now converted to plain text when switching models
2611
+ - OpenAI Codex context window from 400,000 to 272,000 tokens to match Codex CLI defaults
2612
+ - Codex SSE error events to surface message, code, and status
2613
+ - Context overflow detection for `context_length_exceeded` error codes
2614
+ - Codex provider now always includes `reasoning.encrypted_content` even when custom `include` options are passed
2615
+ - Codex requests now omit the `reasoning` field entirely when thinking is off
2616
+ - Crash when pasting text with trailing whitespace exceeding terminal width
2617
+
2618
+ ## [3.37.1] - 2026-01-10
2619
+
2620
+ ### Added
2621
+
2622
+ - Added automatic type coercion for tool arguments when LLMs return JSON-encoded strings instead of native types (numbers, booleans, arrays, objects)
2623
+
2624
+ ### Changed
2625
+
2626
+ - Changed tool argument validation to attempt JSON parsing and type coercion before rejecting mismatched types
2627
+ - Changed validation error messages to include both original and normalized arguments when coercion was attempted
2628
+
2629
+ ## [3.37.0] - 2026-01-10
2630
+
2631
+ ### Changed
2632
+
2633
+ - Enabled type coercion in JSON schema validation to automatically convert compatible types
2634
+
2635
+ ## [3.35.0] - 2026-01-09
2636
+
2637
+ ### Added
2638
+
2639
+ - Enhanced error messages to include retry-after timing information from API rate limit headers
2640
+
2641
+ ## [0.42.0] - 2026-01-09
2642
+
2643
+ ### Added
2644
+
2645
+ - Added OpenCode Zen provider support with 26 models (Claude, GPT, Gemini, Grok, Kimi, GLM, Qwen, etc.). Set `OPENCODE_API_KEY` env var to use.
2646
+
2647
+ ## [0.39.0] - 2026-01-08
2648
+
2649
+ ### Fixed
2650
+
2651
+ - Fixed Gemini CLI abort handling: detect native `AbortError` in retry catch block, cancel SSE reader when abort signal fires ([#568](https://github.com/badlogic/aery-monorepo/pull/568) by [@tmustier](https://github.com/tmustier))
2652
+ - Fixed Antigravity provider 429 errors by aligning request payload with CLIProxyAPI v6.6.89: inject Antigravity system instruction with `role: "user"`, set `requestType: "agent"`, and use `antigravity` userAgent. Added bridge prompt to override Antigravity behavior (identity, paths, web dev guidelines) with Aery defaults. ([#571](https://github.com/badlogic/aery-monorepo/pull/571) by [@ben-vargas](https://github.com/ben-vargas))
2653
+ - Fixed thinking block handling for cross-model conversations: thinking blocks are now converted to plain text (no `<thinking>` tags) when switching models. Previously, `<thinking>` tags caused models to mimic the pattern and output literal tags. Also fixed empty thinking blocks causing API errors. ([#561](https://github.com/badlogic/aery-monorepo/issues/561))
2654
+
2655
+ ## [0.38.0] - 2026-01-08
2656
+
2657
+ ### Added
2658
+
2659
+ - `thinkingBudgets` option in `SimpleStreamOptions` for customizing token budgets per thinking level on token-based providers ([#529](https://github.com/badlogic/aery-monorepo/pull/529) by [@melihmucuk](https://github.com/melihmucuk))
2660
+
2661
+ ### Breaking Changes
2662
+
2663
+ - Removed OpenAI Codex model aliases (`gpt-5`, `gpt-5-mini`, `gpt-5-nano`, `codex-mini-latest`, `gpt-5-codex`, `gpt-5.1-codex`, `gpt-5.1-chat-latest`). Use canonical model IDs: `gpt-5.1`, `gpt-5.1-codex-max`, `gpt-5.1-codex-mini`, `gpt-5.2`, `gpt-5.2-codex`. ([#536](https://github.com/badlogic/aery-monorepo/pull/536) by [@ghoulr](https://github.com/ghoulr))
2664
+
2665
+ ### Fixed
2666
+
2667
+ - Fixed OpenAI Codex context window from 400,000 to 272,000 tokens to match Codex CLI defaults and prevent 400 errors. ([#536](https://github.com/badlogic/aery-monorepo/pull/536) by [@ghoulr](https://github.com/ghoulr))
2668
+ - Fixed Codex SSE error events to surface message, code, and status. ([#551](https://github.com/badlogic/aery-monorepo/pull/551) by [@tmustier](https://github.com/tmustier))
2669
+ - Fixed context overflow detection for `context_length_exceeded` error codes.
2670
+
2671
+ ## [0.37.6] - 2026-01-06
2672
+
2673
+ ### Added
2674
+
2675
+ - Exported OpenAI Codex utilities: `CacheMetadata`, `getCodexInstructions`, `getModelFamily`, `ModelFamily`, `buildCodexPiBridge`, `buildCodexSystemPrompt`, `CodexSystemPrompt` ([#510](https://github.com/badlogic/aery-monorepo/pull/510) by [@mitsuhiko](https://github.com/mitsuhiko))
2676
+
2677
+ ## [0.37.3] - 2026-01-06
2678
+
2679
+ ### Added
2680
+
2681
+ - `sessionId` option in `StreamOptions` for providers that support session-based caching. OpenAI Codex provider uses this to set `prompt_cache_key` and routing headers.
2682
+
2683
+ ## [0.37.2] - 2026-01-05
2684
+
2685
+ ### Fixed
2686
+
2687
+ - Codex provider now always includes `reasoning.encrypted_content` even when custom `include` options are passed ([#484](https://github.com/badlogic/aery-monorepo/pull/484) by [@kim0](https://github.com/kim0))
2688
+
2689
+ ## [0.37.0] - 2026-01-05
2690
+
2691
+ ### Breaking Changes
2692
+
2693
+ - OpenAI Codex models no longer have per-thinking-level variants (e.g., `gpt-5.2-codex-high`). Use the base model ID and set thinking level separately. The Codex provider clamps reasoning effort to what each model supports internally. (initial implementation by [@ben-vargas](https://github.com/ben-vargas) in [#472](https://github.com/badlogic/aery-monorepo/pull/472))
2694
+
2695
+ ### Added
2696
+
2697
+ - Headless OAuth support for all callback-server providers (Google Gemini CLI, Antigravity, OpenAI Codex): paste redirect URL when browser callback is unreachable ([#428](https://github.com/badlogic/aery-monorepo/pull/428) by [@ben-vargas](https://github.com/ben-vargas), [#468](https://github.com/badlogic/aery-monorepo/pull/468) by [@crcatala](https://github.com/crcatala))
2698
+ - Cancellable GitHub Copilot device code polling via AbortSignal
2699
+
2700
+ ### Fixed
2701
+
2702
+ - Codex requests now omit the `reasoning` field entirely when thinking is off, letting the backend use its default instead of forcing a value. ([#472](https://github.com/badlogic/aery-monorepo/pull/472))
2703
+
2704
+ ## [0.36.0] - 2026-01-05
2705
+
2706
+ ### Added
2707
+
2708
+ - OpenAI Codex OAuth provider with Responses API streaming support: `openai-codex-responses` streaming provider with SSE parsing, tool-call handling, usage/cost tracking, and PKCE OAuth flow ([#451](https://github.com/badlogic/aery-monorepo/pull/451) by [@kim0](https://github.com/kim0))
2709
+
2710
+ ### Fixed
2711
+
2712
+ - Vertex AI dummy value for `getEnvApiKey()`: Returns `"<authenticated>"` when Application Default Credentials are configured (`~/.config/gcloud/application_default_credentials.json` exists) and both `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) and `GOOGLE_CLOUD_LOCATION` are set. This allows `streamSimple()` to work with Vertex AI without explicit `apiKey` option. The ADC credentials file existence check is cached per-process to avoid repeated filesystem access.
2713
+
2714
+ ## [0.32.3] - 2026-01-03
2715
+
2716
+ ### Fixed
2717
+
2718
+ - Google Vertex AI models no longer appear in available models list without explicit authentication. Previously, `getEnvApiKey()` returned a dummy value for `google-vertex`, causing models to show up even when Google Cloud ADC was not configured.
2719
+
2720
+ ## [0.32.0] - 2026-01-03
2721
+
2722
+ ### Added
2723
+
2724
+ - Vertex AI provider with ADC (Application Default Credentials) support. Authenticate with `gcloud auth application-default login`, set `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`, and access Gemini models via Vertex AI. ([#300](https://github.com/badlogic/aery-monorepo/pull/300) by [@default-anton](https://github.com/default-anton))
2725
+
2726
+ ### Fixed
2727
+
2728
+ - **Gemini CLI rate limit handling**: Added automatic retry with server-provided delay for 429 errors. Parses delay from error messages like "Your quota will reset after 39s" and waits accordingly. Falls back to exponential backoff for other transient errors. ([#370](https://github.com/badlogic/aery-monorepo/issues/370))
2729
+
2730
+ ## [0.31.0] - 2026-01-02
2731
+
2732
+ ### Breaking Changes
2733
+
2734
+ - **Agent API moved**: All agent functionality (`agentLoop`, `agentLoopContinue`, `AgentContext`, `AgentEvent`, `AgentTool`, `AgentToolResult`, etc.) has moved to `@eminent337/aery-agent-core`. Import from that package instead of `@aryee337/aery-ai`.
2735
+
2736
+ ### Added
2737
+
2738
+ - **`GoogleThinkingLevel` type**: Exported type that mirrors Google's `ThinkingLevel` enum values (`"THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH"`). Allows configuring Gemini thinking levels without importing from `@google/genai`.
2739
+ - **`ANTHROPIC_OAUTH_TOKEN` env var**: Now checked before `ANTHROPIC_API_KEY` in `getEnvApiKey()`, allowing OAuth tokens to take precedence.
2740
+ - **`event-stream.js` export**: `AssistantMessageEventStream` utility now exported from package index.
2741
+
2742
+ ### Changed
2743
+
2744
+ - **OAuth uses Web Crypto API**: PKCE generation and OAuth flows now use Web Crypto API (`crypto.subtle`) instead of Node.js `crypto` module. This improves browser compatibility while still working in Node.js 20+.
2745
+ - **Deterministic model generation**: `generate-models.ts` now sorts providers and models alphabetically for consistent output across runs. ([#332](https://github.com/badlogic/aery-monorepo/pull/332) by [@mrexodia](https://github.com/mrexodia))
2746
+
2747
+ ### Fixed
2748
+
2749
+ - **OpenAI completions empty content blocks**: Empty text or thinking blocks in assistant messages are now filtered out before sending to the OpenAI completions API, preventing validation errors. ([#344](https://github.com/badlogic/aery-monorepo/pull/344) by [@default-anton](https://github.com/default-anton))
2750
+ - **Thinking token duplication**: Fixed thinking content duplication with chutes.ai provider. The provider was returning thinking content in both `reasoning_content` and `reasoning` fields, causing each chunk to be processed twice. Now only the first non-empty reasoning field is used.
2751
+ - **zAi provider API mapping**: Fixed zAi models to use `openai-completions` API with correct base URL (`https://api.z.ai/api/coding/paas/v4`) instead of incorrect Anthropic API mapping. ([#344](https://github.com/badlogic/aery-monorepo/pull/344), [#358](https://github.com/badlogic/aery-monorepo/pull/358) by [@default-anton](https://github.com/default-anton))
2752
+
2753
+ ## [0.28.0] - 2025-12-25
2754
+
2755
+ ### Breaking Changes
2756
+
2757
+ - **OAuth storage removed** ([#296](https://github.com/badlogic/aery-monorepo/issues/296)): All storage functions (`loadOAuthCredentials`, `saveOAuthCredentials`, `setOAuthStorage`, etc.) removed. Callers are responsible for storing credentials.
2758
+ - **OAuth login functions**: `loginAnthropic`, `loginGitHubCopilot`, `loginGeminiCli`, `loginAntigravity` now return `OAuthCredentials` instead of saving to disk.
2759
+ - **refreshOAuthToken**: Now takes `(provider, credentials)` and returns new `OAuthCredentials` instead of saving.
2760
+ - **getOAuthApiKey**: Now takes `(provider, credentials)` and returns `{ newCredentials, apiKey }` or null.
2761
+ - **OAuthCredentials type**: No longer includes `type: "oauth"` discriminator. Callers add discriminator when storing.
2762
+ - **setApiKey, resolveApiKey**: Removed. Callers must manage their own API key storage/resolution.
2763
+ - **getApiKey**: Renamed to `getEnvApiKey`. Only checks environment variables for known providers.
2764
+
2765
+ ## [0.27.7] - 2025-12-24
2766
+
2767
+ ### Fixed
2768
+
2769
+ - **Thinking tag leakage**: Fixed Claude mimicking literal `</thinking>` tags in responses. Unsigned thinking blocks (from aborted streams) are now converted to plain text without `<thinking>` tags. The TUI still displays them as thinking blocks. ([#302](https://github.com/badlogic/aery-monorepo/pull/302) by [@nicobailon](https://github.com/nicobailon))
2770
+
2771
+ ## [0.25.1] - 2025-12-21
2772
+
2773
+ ### Added
2774
+
2775
+ - **xhigh thinking level support**: Added `supportsXhigh()` function to check if a model supports xhigh reasoning level. Also clamps xhigh to high for OpenAI models that don't support it. ([#236](https://github.com/badlogic/aery-monorepo/pull/236) by [@theBucky](https://github.com/theBucky))
2776
+
2777
+ ### Fixed
2778
+
2779
+ - **Gemini multimodal tool results**: Fixed images in tool results causing flaky/broken responses with Gemini models. For Gemini 3, images are now nested inside `functionResponse.parts` per the [docs](https://ai.google.dev/gemini-api/docs/function-calling#multimodal). For older models (which don't support multimodal function responses), images are sent in a separate user message.
2780
+
2781
+ - **Queued message steering**: When `getQueuedMessages` is provided, the agent loop now checks for queued user messages after each tool call and skips remaining tool calls in the current assistant message when a queued message arrives (emitting error tool results).
2782
+
2783
+ - **Double API version path in Google provider URL**: Fixed Gemini API calls returning 404 after baseUrl support was added. The SDK was appending its default apiVersion to baseUrl which already included the version path. ([#251](https://github.com/badlogic/aery-monorepo/pull/251) by [@shellfyred](https://github.com/shellfyred))
2784
+
2785
+ - **Anthropic SDK retries disabled**: Re-enabled SDK-level retries (default 2) for transient HTTP failures. ([#252](https://github.com/badlogic/aery-monorepo/issues/252))
2786
+
2787
+ ## [0.23.5] - 2025-12-19
2788
+
2789
+ ### Added
2790
+
2791
+ - **Gemini 3 Flash thinking support**: Extended thinking level support for Gemini 3 Flash models (MINIMAL, LOW, MEDIUM, HIGH) to match Pro models' capabilities. ([#212](https://github.com/badlogic/aery-monorepo/pull/212) by [@markusylisiurunen](https://github.com/markusylisiurunen))
2792
+
2793
+ - **GitHub Copilot thinking models**: Added thinking support for additional Copilot models (o3-mini, o1-mini, o1-preview). ([#234](https://github.com/badlogic/aery-monorepo/pull/234) by [@aadishv](https://github.com/aadishv))
2794
+
2795
+ ### Fixed
2796
+
2797
+ - **Gemini tool result format**: Fixed tool result format for Gemini 3 Flash Preview which strictly requires `{ output: value }` for success and `{ error: value }` for errors. Previous format using `{ result, isError }` was rejected by newer Gemini models. Also improved type safety by removing `as any` casts. ([#213](https://github.com/badlogic/aery-monorepo/issues/213), [#220](https://github.com/badlogic/aery-monorepo/pull/220))
2798
+
2799
+ - **Google baseUrl configuration**: Google provider now respects `baseUrl` configuration for custom endpoints or API proxies. ([#216](https://github.com/badlogic/aery-monorepo/issues/216), [#221](https://github.com/badlogic/aery-monorepo/pull/221) by [@theBucky](https://github.com/theBucky))
2800
+
2801
+ - **GitHub Copilot vision requests**: Added `Copilot-Vision-Request` header when sending images to GitHub Copilot models. ([#222](https://github.com/badlogic/aery-monorepo/issues/222))
2802
+
2803
+ - **GitHub Copilot X-Initiator header**: Fixed X-Initiator logic to check last message role instead of any message in history. This ensures proper billing when users send follow-up messages. ([#209](https://github.com/badlogic/aery-monorepo/issues/209))
2804
+
2805
+ ## [0.22.3] - 2025-12-16
2806
+
2807
+ ### Added
2808
+
2809
+ - **Image limits test suite**: Added comprehensive tests for provider-specific image limitations (max images, max size, max dimensions). Discovered actual limits: Anthropic (100 images, 5MB, 8000px), OpenAI (500 images, ≥25MB), Gemini (~2500 images, ≥40MB), Mistral (8 images, ~15MB), OpenRouter (~40 images context-limited, ~15MB). ([#120](https://github.com/badlogic/aery-monorepo/pull/120))
2810
+
2811
+ - **Tool result streaming**: Added `tool_execution_update` event and optional `onUpdate` callback to `AgentTool.execute()` for streaming tool output during execution. Tools can now emit partial results (e.g., bash stdout) that are forwarded to subscribers. ([#44](https://github.com/badlogic/aery-monorepo/issues/44))
2812
+
2813
+ - **X-Initiator header for GitHub Copilot**: Added X-Initiator header handling for GitHub Copilot provider to ensure correct call accounting (agent calls are not deducted from quota). Sets initiator based on last message role. ([#200](https://github.com/badlogic/aery-monorepo/pull/200) by [@kim0](https://github.com/kim0))
2814
+
2815
+ ### Changed
2816
+
2817
+ - **Normalized tool_execution_end result**: `tool_execution_end` event now always contains `AgentToolResult` (no longer `AgentToolResult | string`). Errors are wrapped in the standard result format.
2818
+
2819
+ ### Fixed
2820
+
2821
+ - **Reasoning disabled by default**: When `reasoning` option is not specified, thinking is now explicitly disabled for all providers. Previously, some providers like Gemini with "dynamic thinking" would use their default (thinking ON), causing unexpected token usage. This was the original intended behavior. ([#180](https://github.com/badlogic/aery-monorepo/pull/180) by [@markusylisiurunen](https://github.com/markusylisiurunen))
2822
+
2823
+ ## [0.22.2] - 2025-12-15
2824
+
2825
+ ### Added
2826
+
2827
+ - **Interleaved thinking for Anthropic**: Added `interleavedThinking` option to `AnthropicOptions`. When enabled, Claude 4 models can think between tool calls and reason after receiving tool results. Enabled by default (no extra token cost, just unlocks the capability). Set `interleavedThinking: false` to disable.
2828
+
2829
+ ## [0.22.1] - 2025-12-15
2830
+
2831
+ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
2832
+
2833
+ ### Added
2834
+
2835
+ - **Interleaved thinking for Anthropic**: Enabled interleaved thinking in the Anthropic provider, allowing Claude models to output thinking blocks interspersed with text responses.
2836
+
2837
+ ## [0.22.0] - 2025-12-15
2838
+
2839
+ ### Added
2840
+
2841
+ - **GitHub Copilot provider**: Added `github-copilot` as a known provider with models sourced from models.dev. Includes Claude, GPT, Gemini, Grok, and other models available through GitHub Copilot. ([#191](https://github.com/badlogic/aery-monorepo/pull/191) by [@cau1k](https://github.com/cau1k))
2842
+
2843
+ ### Fixed
2844
+
2845
+ - **GitHub Copilot gpt-5 models**: Fixed API selection for gpt-5 models to use `openai-responses` instead of `openai-completions` (gpt-5 models are not accessible via completions endpoint)
2846
+
2847
+ - **GitHub Copilot cross-model context handoff**: Fixed context handoff failing when switching between GitHub Copilot models using different APIs (e.g., gpt-5 to claude-sonnet-4). Tool call IDs from OpenAI Responses API were incompatible with other models. ([#198](https://github.com/badlogic/aery-monorepo/issues/198))
2848
+
2849
+ - **Gemini 3 Pro thinking levels**: Thinking level configuration now works correctly for Gemini 3 Pro models. Previously all levels mapped to -1 (minimal thinking). Now LOW/MEDIUM/HIGH properly control test-time computation. ([#176](https://github.com/badlogic/aery-monorepo/pull/176) by [@markusylisiurunen](https://github.com/markusylisiurunen))
2850
+
2851
+ ## [0.18.2] - 2025-12-11
2852
+
2853
+ ### Changed
2854
+
2855
+ - **Anthropic SDK retries disabled**: Set `maxRetries: 0` on Anthropic client to allow application-level retry handling. The SDK's built-in retries were interfering with coding-agent's retry logic. ([#157](https://github.com/badlogic/aery-monorepo/issues/157))
2856
+
2857
+ ## [0.18.1] - 2025-12-10
2858
+
2859
+ ### Added
2860
+
2861
+ - **Mistral provider**: Added support for Mistral AI models via the OpenAI-compatible API. Includes automatic handling of Mistral-specific requirements (tool call ID format). Set `MISTRAL_API_KEY` environment variable to use.
2862
+
2863
+ ### Fixed
2864
+
2865
+ - Fixed Mistral 400 errors after aborted assistant messages by skipping empty assistant messages (no content, no tool calls) ([#165](https://github.com/badlogic/aery-monorepo/issues/165))
2866
+
2867
+ - Removed synthetic assistant bridge message after tool results for Mistral (no longer required as of Dec 2025) ([#165](https://github.com/badlogic/aery-monorepo/issues/165))
2868
+
2869
+ - Fixed bug where `ANTHROPIC_API_KEY` environment variable was deleted globally after first OAuth token usage, causing subsequent prompts to fail ([#164](https://github.com/badlogic/aery-monorepo/pull/164))
2870
+
2871
+ ## [0.17.0] - 2025-12-09
2872
+
2873
+ ### Added
2874
+
2875
+ - **`agentLoopContinue` function**: Continue an agent loop from existing context without adding a new user message. Validates that the last message is `user` or `toolResult`. Useful for retry after context overflow or resuming from manually-added tool results.
2876
+ - Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments.
2877
+
2878
+ - **OpenAI compatibility overrides**: Added `compat` field to `Model` for `openai-completions` API, allowing explicit configuration of provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Falls back to URL-based detection if not set. Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/aery-monorepo/issues/133), thanks @fink-andreas for the initial idea and PR)
2879
+
2880
+ - **xhigh reasoning level**: Added `xhigh` to `ReasoningEffort` type for OpenAI codex-max models. For non-OpenAI providers (Anthropic, Google), `xhigh` is automatically mapped to `high`. ([#143](https://github.com/badlogic/aery-monorepo/issues/143))
2881
+
2882
+ ### Breaking Changes
2883
+
2884
+ - Removed provider-level tool argument validation. Validation now happens in `agentLoop` via `executeToolCalls`, allowing models to retry on validation errors. For manual tool execution, use `validateToolCall(tools, toolCall)` or `validateToolArguments(tool, toolCall)`.
2885
+
2886
+ ### Changed
2887
+
2888
+ - **Updated SDK versions**: OpenAI SDK 5.21.0 → 6.10.0, Anthropic SDK 0.61.0 → 0.71.2, Google GenAI SDK 1.30.0 → 1.31.0
2889
+
2890
+ ## [0.13.0] - 2025-12-06
2891
+
2892
+ ### Breaking Changes
2893
+
2894
+ - **Added `totalTokens` field to `Usage` type**: All code that constructs `Usage` objects must now include the `totalTokens` field. This field represents the total tokens processed by the LLM (input + output + cache). For OpenAI and Google, this uses native API values (`total_tokens`, `totalTokenCount`). For Anthropic, it's computed as `input + output + cacheRead + cacheWrite`.
2895
+
2896
+ ## [0.12.10] - 2025-12-04
2897
+
2898
+ ### Added
2899
+
2900
+ - Added `gpt-5.1-codex-max` model support
2901
+
2902
+ ### Fixed
2903
+
2904
+ - **OpenAI Token Counting**: Fixed `usage.input` to exclude cached tokens for OpenAI providers. Previously, `input` included cached tokens, causing double-counting when calculating total context size via `input + cacheRead`. Now `input` represents non-cached input tokens across all providers, making `input + output + cacheRead + cacheWrite` the correct formula for total context size.
2905
+
2906
+ - **Fixed Claude Opus 4.5 cache pricing** (was 3x too expensive)
2907
+ - Corrected cache_read: $1.50 → $0.50 per MTok
2908
+ - Corrected cache_write: $18.75 → $6.25 per MTok
2909
+ - Added manual override in `scripts/generate-models.ts` until upstream fix is merged
2910
+ - Submitted PR to models.dev: https://github.com/sst/models.dev/pull/439
2911
+
2912
+ ## [0.9.4] - 2025-11-26
2913
+
2914
+ Initial release with multi-provider LLM support.