@aryee337/aery-ai 0.2.27 → 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (417) hide show
  1. package/CHANGELOG.md +2914 -0
  2. package/README.md +614 -813
  3. package/package.json +140 -105
  4. package/src/api-registry.ts +96 -0
  5. package/src/auth-broker/client.ts +358 -0
  6. package/src/auth-broker/index.ts +5 -0
  7. package/src/auth-broker/refresher.ts +117 -0
  8. package/src/auth-broker/remote-store.ts +623 -0
  9. package/src/auth-broker/server.ts +644 -0
  10. package/src/auth-broker/types.ts +127 -0
  11. package/src/auth-broker/wire-schemas.ts +200 -0
  12. package/src/auth-gateway/http.ts +194 -0
  13. package/src/auth-gateway/index.ts +3 -0
  14. package/src/auth-gateway/server.ts +818 -0
  15. package/src/auth-gateway/types.ts +143 -0
  16. package/src/auth-storage.ts +4422 -0
  17. package/src/index.ts +54 -0
  18. package/src/model-cache.ts +129 -0
  19. package/src/model-manager.ts +469 -0
  20. package/src/model-thinking.ts +782 -0
  21. package/src/models.json +83530 -0
  22. package/src/models.json.d.ts +9 -0
  23. package/src/models.ts +56 -0
  24. package/src/prompts/turn-aborted-guidance.md +4 -0
  25. package/src/provider-details.ts +90 -0
  26. package/src/provider-models/bundled-references.ts +38 -0
  27. package/src/provider-models/descriptors.ts +355 -0
  28. package/src/provider-models/google.ts +88 -0
  29. package/src/provider-models/index.ts +5 -0
  30. package/src/provider-models/ollama.ts +153 -0
  31. package/src/provider-models/openai-compat.ts +2817 -0
  32. package/src/provider-models/special.ts +67 -0
  33. package/src/providers/aery-native-client.ts +228 -0
  34. package/src/providers/aery-native-server.ts +212 -0
  35. package/src/providers/amazon-bedrock.ts +873 -0
  36. package/src/providers/anthropic-client.ts +318 -0
  37. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  38. package/src/providers/anthropic-messages-server.ts +683 -0
  39. package/src/providers/anthropic-wire.ts +268 -0
  40. package/src/providers/anthropic.ts +3094 -0
  41. package/src/providers/aws-credentials.ts +501 -0
  42. package/src/providers/aws-eventstream.ts +185 -0
  43. package/src/providers/aws-sigv4.ts +218 -0
  44. package/src/providers/azure-openai-responses.ts +361 -0
  45. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  46. package/src/providers/cursor/proto/agent.proto +3526 -0
  47. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  48. package/src/providers/cursor/proto/buf.yaml +17 -0
  49. package/src/providers/cursor.ts +2621 -0
  50. package/src/providers/error-message.ts +21 -0
  51. package/src/providers/github-copilot-headers.ts +140 -0
  52. package/src/providers/gitlab-duo.ts +372 -0
  53. package/src/providers/google-auth.ts +252 -0
  54. package/src/providers/google-gemini-cli.ts +809 -0
  55. package/src/providers/google-gemini-headers.ts +41 -0
  56. package/src/providers/google-shared.ts +917 -0
  57. package/src/providers/google-types.ts +167 -0
  58. package/src/providers/google-vertex.ts +91 -0
  59. package/src/providers/google.ts +41 -0
  60. package/src/providers/grammar.ts +70 -0
  61. package/src/providers/kimi.ts +52 -0
  62. package/src/providers/mock.ts +496 -0
  63. package/src/providers/ollama.ts +644 -0
  64. package/src/providers/openai-anthropic-shim.ts +138 -0
  65. package/src/providers/openai-chat-server-schema.ts +252 -0
  66. package/src/providers/openai-chat-server.ts +647 -0
  67. package/src/providers/openai-codex/constants.ts +43 -0
  68. package/src/providers/openai-codex/request-transformer.ts +161 -0
  69. package/src/providers/openai-codex/response-handler.ts +81 -0
  70. package/src/providers/openai-codex-responses.ts +3018 -0
  71. package/src/providers/openai-completions-compat.ts +300 -0
  72. package/src/providers/openai-completions.ts +1979 -0
  73. package/src/providers/openai-responses-server-schema.ts +290 -0
  74. package/src/providers/openai-responses-server.ts +1183 -0
  75. package/src/providers/openai-responses-shared.ts +873 -0
  76. package/src/providers/openai-responses.ts +679 -0
  77. package/src/providers/register-builtins.ts +436 -0
  78. package/src/providers/synthetic.ts +50 -0
  79. package/src/providers/transform-messages.ts +382 -0
  80. package/src/providers/vision-guard.ts +31 -0
  81. package/src/providers/xai-responses.ts +82 -0
  82. package/src/rate-limit-utils.ts +84 -0
  83. package/src/stream.ts +1065 -0
  84. package/src/types.ts +944 -0
  85. package/src/usage/claude.ts +482 -0
  86. package/src/usage/gemini.ts +250 -0
  87. package/src/usage/github-copilot.ts +421 -0
  88. package/src/usage/google-antigravity.ts +201 -0
  89. package/src/usage/kimi.ts +271 -0
  90. package/src/usage/minimax-code.ts +31 -0
  91. package/src/usage/openai-codex.ts +503 -0
  92. package/src/usage/shared.ts +10 -0
  93. package/src/usage/zai.ts +247 -0
  94. package/src/usage.ts +185 -0
  95. package/src/utils/abort.ts +51 -0
  96. package/src/utils/abortable-iterator.ts +69 -0
  97. package/src/utils/anthropic-auth.ts +93 -0
  98. package/src/utils/discovery/antigravity.ts +261 -0
  99. package/src/utils/discovery/codex.ts +371 -0
  100. package/src/utils/discovery/cursor.ts +306 -0
  101. package/src/utils/discovery/gemini.ts +248 -0
  102. package/src/utils/discovery/index.ts +4 -0
  103. package/src/utils/discovery/openai-compatible.ts +224 -0
  104. package/src/utils/event-stream.ts +142 -0
  105. package/src/utils/fireworks-model-id.ts +30 -0
  106. package/src/utils/foundry.ts +8 -0
  107. package/src/utils/http-inspector.ts +176 -0
  108. package/src/utils/idle-iterator.ts +267 -0
  109. package/src/utils/json-parse.ts +182 -0
  110. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  111. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  112. package/src/utils/oauth/anthropic.ts +273 -0
  113. package/src/utils/oauth/api-key-login.ts +87 -0
  114. package/src/utils/oauth/api-key-validation.ts +92 -0
  115. package/src/utils/oauth/callback-server.ts +276 -0
  116. package/src/utils/oauth/cerebras.ts +16 -0
  117. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  118. package/src/utils/oauth/cursor.ts +157 -0
  119. package/src/utils/oauth/deepseek.ts +53 -0
  120. package/src/utils/oauth/firepass.ts +24 -0
  121. package/src/utils/oauth/fireworks.ts +15 -0
  122. package/src/utils/oauth/github-copilot.ts +362 -0
  123. package/src/utils/oauth/gitlab-duo.ts +123 -0
  124. package/src/utils/oauth/google-antigravity.ts +200 -0
  125. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  126. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  127. package/src/utils/oauth/huggingface.ts +62 -0
  128. package/src/utils/oauth/index.ts +484 -0
  129. package/src/utils/oauth/kagi.ts +47 -0
  130. package/src/utils/oauth/kilo.ts +87 -0
  131. package/src/utils/oauth/kimi.ts +254 -0
  132. package/src/utils/oauth/litellm.ts +47 -0
  133. package/src/utils/oauth/lm-studio.ts +38 -0
  134. package/src/utils/oauth/minimax-code.ts +78 -0
  135. package/src/utils/oauth/moonshot.ts +23 -0
  136. package/src/utils/oauth/nanogpt.ts +15 -0
  137. package/src/utils/oauth/nvidia.ts +70 -0
  138. package/src/utils/oauth/oauth.html +203 -0
  139. package/src/utils/oauth/ollama-cloud.ts +28 -0
  140. package/src/utils/oauth/ollama.ts +47 -0
  141. package/src/utils/oauth/openai-codex.ts +299 -0
  142. package/src/utils/oauth/opencode.ts +49 -0
  143. package/src/utils/oauth/openrouter.ts +20 -0
  144. package/src/utils/oauth/parallel.ts +46 -0
  145. package/src/utils/oauth/perplexity.ts +206 -0
  146. package/src/utils/oauth/pkce.ts +18 -0
  147. package/src/utils/oauth/qianfan.ts +58 -0
  148. package/src/utils/oauth/qwen-portal.ts +60 -0
  149. package/src/utils/oauth/synthetic.ts +15 -0
  150. package/src/utils/oauth/tavily.ts +46 -0
  151. package/src/utils/oauth/together.ts +16 -0
  152. package/src/utils/oauth/types.ts +99 -0
  153. package/src/utils/oauth/venice.ts +59 -0
  154. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  155. package/src/utils/oauth/vllm.ts +40 -0
  156. package/src/utils/oauth/wafer.ts +50 -0
  157. package/src/utils/oauth/xai-oauth.ts +342 -0
  158. package/src/utils/oauth/xiaomi.ts +139 -0
  159. package/src/utils/oauth/zai.ts +60 -0
  160. package/src/utils/oauth/zenmux.ts +15 -0
  161. package/src/utils/oauth/zhipu.ts +60 -0
  162. package/src/utils/overflow.ts +137 -0
  163. package/src/utils/parse-bind.ts +54 -0
  164. package/src/utils/provider-response.ts +30 -0
  165. package/src/utils/request-debug.ts +336 -0
  166. package/src/utils/retry-after.ts +110 -0
  167. package/src/utils/retry.ts +54 -0
  168. package/src/utils/schema/CONSTRAINTS.md +164 -0
  169. package/src/utils/schema/adapt.ts +36 -0
  170. package/src/utils/schema/compatibility.ts +435 -0
  171. package/src/utils/schema/dereference.ts +98 -0
  172. package/src/utils/schema/draft.ts +341 -0
  173. package/src/utils/schema/equality.ts +97 -0
  174. package/src/utils/schema/fields.ts +191 -0
  175. package/src/utils/schema/index.ts +13 -0
  176. package/src/utils/schema/json-schema-validator.ts +577 -0
  177. package/src/utils/schema/meta-validator.ts +167 -0
  178. package/src/utils/schema/normalize.ts +1588 -0
  179. package/src/utils/schema/spill.ts +43 -0
  180. package/src/utils/schema/stamps.ts +97 -0
  181. package/src/utils/schema/types.ts +10 -0
  182. package/src/utils/schema/wire.ts +293 -0
  183. package/src/utils/schema/zod-decontaminate.ts +331 -0
  184. package/src/utils/sdk-stream-timeout.ts +43 -0
  185. package/src/utils/sse-debug.ts +289 -0
  186. package/src/utils/stream-markup-healing.ts +612 -0
  187. package/src/utils/tool-choice.ts +99 -0
  188. package/src/utils/validation.ts +1024 -0
  189. package/src/utils.ts +166 -0
  190. package/dist/api-registry.d.ts +0 -20
  191. package/dist/api-registry.d.ts.map +0 -1
  192. package/dist/api-registry.js +0 -44
  193. package/dist/api-registry.js.map +0 -1
  194. package/dist/bedrock-provider.d.ts +0 -5
  195. package/dist/bedrock-provider.d.ts.map +0 -1
  196. package/dist/bedrock-provider.js +0 -6
  197. package/dist/bedrock-provider.js.map +0 -1
  198. package/dist/cli.d.ts +0 -3
  199. package/dist/cli.d.ts.map +0 -1
  200. package/dist/cli.js +0 -130
  201. package/dist/cli.js.map +0 -1
  202. package/dist/env-api-keys.d.ts +0 -18
  203. package/dist/env-api-keys.d.ts.map +0 -1
  204. package/dist/env-api-keys.js +0 -178
  205. package/dist/env-api-keys.js.map +0 -1
  206. package/dist/image-models.d.ts +0 -10
  207. package/dist/image-models.d.ts.map +0 -1
  208. package/dist/image-models.generated.d.ts +0 -440
  209. package/dist/image-models.generated.d.ts.map +0 -1
  210. package/dist/image-models.generated.js +0 -442
  211. package/dist/image-models.generated.js.map +0 -1
  212. package/dist/image-models.js +0 -23
  213. package/dist/image-models.js.map +0 -1
  214. package/dist/images-api-registry.d.ts +0 -14
  215. package/dist/images-api-registry.d.ts.map +0 -1
  216. package/dist/images-api-registry.js +0 -22
  217. package/dist/images-api-registry.js.map +0 -1
  218. package/dist/images.d.ts +0 -4
  219. package/dist/images.d.ts.map +0 -1
  220. package/dist/images.js +0 -14
  221. package/dist/images.js.map +0 -1
  222. package/dist/index.d.ts +0 -32
  223. package/dist/index.d.ts.map +0 -1
  224. package/dist/index.js +0 -20
  225. package/dist/index.js.map +0 -1
  226. package/dist/models.d.ts +0 -18
  227. package/dist/models.d.ts.map +0 -1
  228. package/dist/models.generated.d.ts +0 -17707
  229. package/dist/models.generated.d.ts.map +0 -1
  230. package/dist/models.generated.js +0 -16561
  231. package/dist/models.generated.js.map +0 -1
  232. package/dist/models.js +0 -71
  233. package/dist/models.js.map +0 -1
  234. package/dist/oauth.d.ts +0 -2
  235. package/dist/oauth.d.ts.map +0 -1
  236. package/dist/oauth.js +0 -2
  237. package/dist/oauth.js.map +0 -1
  238. package/dist/providers/aery-error-formatting.d.ts +0 -13
  239. package/dist/providers/aery-error-formatting.d.ts.map +0 -1
  240. package/dist/providers/aery-error-formatting.js +0 -112
  241. package/dist/providers/aery-error-formatting.js.map +0 -1
  242. package/dist/providers/amazon-bedrock.d.ts +0 -38
  243. package/dist/providers/amazon-bedrock.d.ts.map +0 -1
  244. package/dist/providers/amazon-bedrock.js +0 -763
  245. package/dist/providers/amazon-bedrock.js.map +0 -1
  246. package/dist/providers/anthropic.d.ts +0 -71
  247. package/dist/providers/anthropic.d.ts.map +0 -1
  248. package/dist/providers/anthropic.js +0 -949
  249. package/dist/providers/anthropic.js.map +0 -1
  250. package/dist/providers/azure-openai-responses.d.ts +0 -15
  251. package/dist/providers/azure-openai-responses.d.ts.map +0 -1
  252. package/dist/providers/azure-openai-responses.js +0 -225
  253. package/dist/providers/azure-openai-responses.js.map +0 -1
  254. package/dist/providers/cloudflare.d.ts +0 -13
  255. package/dist/providers/cloudflare.d.ts.map +0 -1
  256. package/dist/providers/cloudflare.js +0 -26
  257. package/dist/providers/cloudflare.js.map +0 -1
  258. package/dist/providers/faux.d.ts +0 -56
  259. package/dist/providers/faux.d.ts.map +0 -1
  260. package/dist/providers/faux.js +0 -368
  261. package/dist/providers/faux.js.map +0 -1
  262. package/dist/providers/github-copilot-headers.d.ts +0 -8
  263. package/dist/providers/github-copilot-headers.d.ts.map +0 -1
  264. package/dist/providers/github-copilot-headers.js +0 -29
  265. package/dist/providers/github-copilot-headers.js.map +0 -1
  266. package/dist/providers/google-gemini-cli.d.ts +0 -74
  267. package/dist/providers/google-gemini-cli.d.ts.map +0 -1
  268. package/dist/providers/google-gemini-cli.js +0 -779
  269. package/dist/providers/google-gemini-cli.js.map +0 -1
  270. package/dist/providers/google-shared.d.ts +0 -70
  271. package/dist/providers/google-shared.d.ts.map +0 -1
  272. package/dist/providers/google-shared.js +0 -329
  273. package/dist/providers/google-shared.js.map +0 -1
  274. package/dist/providers/google-vertex.d.ts +0 -15
  275. package/dist/providers/google-vertex.d.ts.map +0 -1
  276. package/dist/providers/google-vertex.js +0 -442
  277. package/dist/providers/google-vertex.js.map +0 -1
  278. package/dist/providers/google.d.ts +0 -13
  279. package/dist/providers/google.d.ts.map +0 -1
  280. package/dist/providers/google.js +0 -400
  281. package/dist/providers/google.js.map +0 -1
  282. package/dist/providers/images/openrouter.d.ts +0 -3
  283. package/dist/providers/images/openrouter.d.ts.map +0 -1
  284. package/dist/providers/images/openrouter.js +0 -129
  285. package/dist/providers/images/openrouter.js.map +0 -1
  286. package/dist/providers/images/register-builtins.d.ts +0 -4
  287. package/dist/providers/images/register-builtins.d.ts.map +0 -1
  288. package/dist/providers/images/register-builtins.js +0 -34
  289. package/dist/providers/images/register-builtins.js.map +0 -1
  290. package/dist/providers/mistral.d.ts +0 -25
  291. package/dist/providers/mistral.d.ts.map +0 -1
  292. package/dist/providers/mistral.js +0 -535
  293. package/dist/providers/mistral.js.map +0 -1
  294. package/dist/providers/openai-codex-responses.d.ts +0 -30
  295. package/dist/providers/openai-codex-responses.d.ts.map +0 -1
  296. package/dist/providers/openai-codex-responses.js +0 -1090
  297. package/dist/providers/openai-codex-responses.js.map +0 -1
  298. package/dist/providers/openai-completions.d.ts +0 -19
  299. package/dist/providers/openai-completions.d.ts.map +0 -1
  300. package/dist/providers/openai-completions.js +0 -950
  301. package/dist/providers/openai-completions.js.map +0 -1
  302. package/dist/providers/openai-prompt-cache.d.ts +0 -3
  303. package/dist/providers/openai-prompt-cache.d.ts.map +0 -1
  304. package/dist/providers/openai-prompt-cache.js +0 -10
  305. package/dist/providers/openai-prompt-cache.js.map +0 -1
  306. package/dist/providers/openai-responses-shared.d.ts +0 -18
  307. package/dist/providers/openai-responses-shared.d.ts.map +0 -1
  308. package/dist/providers/openai-responses-shared.js +0 -492
  309. package/dist/providers/openai-responses-shared.js.map +0 -1
  310. package/dist/providers/openai-responses.d.ts +0 -13
  311. package/dist/providers/openai-responses.d.ts.map +0 -1
  312. package/dist/providers/openai-responses.js +0 -237
  313. package/dist/providers/openai-responses.js.map +0 -1
  314. package/dist/providers/register-builtins.d.ts +0 -38
  315. package/dist/providers/register-builtins.d.ts.map +0 -1
  316. package/dist/providers/register-builtins.js +0 -278
  317. package/dist/providers/register-builtins.js.map +0 -1
  318. package/dist/providers/simple-options.d.ts +0 -8
  319. package/dist/providers/simple-options.d.ts.map +0 -1
  320. package/dist/providers/simple-options.js +0 -41
  321. package/dist/providers/simple-options.js.map +0 -1
  322. package/dist/providers/transform-messages.d.ts +0 -8
  323. package/dist/providers/transform-messages.d.ts.map +0 -1
  324. package/dist/providers/transform-messages.js +0 -184
  325. package/dist/providers/transform-messages.js.map +0 -1
  326. package/dist/session-resources.d.ts +0 -4
  327. package/dist/session-resources.d.ts.map +0 -1
  328. package/dist/session-resources.js +0 -22
  329. package/dist/session-resources.js.map +0 -1
  330. package/dist/stream.d.ts +0 -8
  331. package/dist/stream.d.ts.map +0 -1
  332. package/dist/stream.js +0 -27
  333. package/dist/stream.js.map +0 -1
  334. package/dist/types.d.ts +0 -498
  335. package/dist/types.d.ts.map +0 -1
  336. package/dist/types.js +0 -2
  337. package/dist/types.js.map +0 -1
  338. package/dist/utils/diagnostics.d.ts +0 -19
  339. package/dist/utils/diagnostics.d.ts.map +0 -1
  340. package/dist/utils/diagnostics.js +0 -25
  341. package/dist/utils/diagnostics.js.map +0 -1
  342. package/dist/utils/event-stream.d.ts +0 -21
  343. package/dist/utils/event-stream.d.ts.map +0 -1
  344. package/dist/utils/event-stream.js +0 -81
  345. package/dist/utils/event-stream.js.map +0 -1
  346. package/dist/utils/hash.d.ts +0 -3
  347. package/dist/utils/hash.d.ts.map +0 -1
  348. package/dist/utils/hash.js +0 -14
  349. package/dist/utils/hash.js.map +0 -1
  350. package/dist/utils/headers.d.ts +0 -2
  351. package/dist/utils/headers.d.ts.map +0 -1
  352. package/dist/utils/headers.js +0 -8
  353. package/dist/utils/headers.js.map +0 -1
  354. package/dist/utils/json-parse.d.ts +0 -16
  355. package/dist/utils/json-parse.d.ts.map +0 -1
  356. package/dist/utils/json-parse.js +0 -113
  357. package/dist/utils/json-parse.js.map +0 -1
  358. package/dist/utils/node-http-proxy.d.ts +0 -10
  359. package/dist/utils/node-http-proxy.d.ts.map +0 -1
  360. package/dist/utils/node-http-proxy.js +0 -97
  361. package/dist/utils/node-http-proxy.js.map +0 -1
  362. package/dist/utils/oauth/anthropic.d.ts +0 -25
  363. package/dist/utils/oauth/anthropic.d.ts.map +0 -1
  364. package/dist/utils/oauth/anthropic.js +0 -335
  365. package/dist/utils/oauth/anthropic.js.map +0 -1
  366. package/dist/utils/oauth/device-code.d.ts +0 -19
  367. package/dist/utils/oauth/device-code.d.ts.map +0 -1
  368. package/dist/utils/oauth/device-code.js +0 -55
  369. package/dist/utils/oauth/device-code.js.map +0 -1
  370. package/dist/utils/oauth/github-copilot.d.ts +0 -30
  371. package/dist/utils/oauth/github-copilot.d.ts.map +0 -1
  372. package/dist/utils/oauth/github-copilot.js +0 -268
  373. package/dist/utils/oauth/github-copilot.js.map +0 -1
  374. package/dist/utils/oauth/google-antigravity.d.ts +0 -26
  375. package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
  376. package/dist/utils/oauth/google-antigravity.js +0 -377
  377. package/dist/utils/oauth/google-antigravity.js.map +0 -1
  378. package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
  379. package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
  380. package/dist/utils/oauth/google-gemini-cli.js +0 -482
  381. package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
  382. package/dist/utils/oauth/index.d.ts +0 -63
  383. package/dist/utils/oauth/index.d.ts.map +0 -1
  384. package/dist/utils/oauth/index.js +0 -131
  385. package/dist/utils/oauth/index.js.map +0 -1
  386. package/dist/utils/oauth/oauth-page.d.ts +0 -3
  387. package/dist/utils/oauth/oauth-page.d.ts.map +0 -1
  388. package/dist/utils/oauth/oauth-page.js +0 -105
  389. package/dist/utils/oauth/oauth-page.js.map +0 -1
  390. package/dist/utils/oauth/openai-codex.d.ts +0 -34
  391. package/dist/utils/oauth/openai-codex.d.ts.map +0 -1
  392. package/dist/utils/oauth/openai-codex.js +0 -385
  393. package/dist/utils/oauth/openai-codex.js.map +0 -1
  394. package/dist/utils/oauth/pkce.d.ts +0 -13
  395. package/dist/utils/oauth/pkce.d.ts.map +0 -1
  396. package/dist/utils/oauth/pkce.js +0 -31
  397. package/dist/utils/oauth/pkce.js.map +0 -1
  398. package/dist/utils/oauth/types.d.ts +0 -64
  399. package/dist/utils/oauth/types.d.ts.map +0 -1
  400. package/dist/utils/oauth/types.js +0 -2
  401. package/dist/utils/oauth/types.js.map +0 -1
  402. package/dist/utils/overflow.d.ts +0 -56
  403. package/dist/utils/overflow.d.ts.map +0 -1
  404. package/dist/utils/overflow.js +0 -151
  405. package/dist/utils/overflow.js.map +0 -1
  406. package/dist/utils/sanitize-unicode.d.ts +0 -22
  407. package/dist/utils/sanitize-unicode.d.ts.map +0 -1
  408. package/dist/utils/sanitize-unicode.js +0 -26
  409. package/dist/utils/sanitize-unicode.js.map +0 -1
  410. package/dist/utils/typebox-helpers.d.ts +0 -17
  411. package/dist/utils/typebox-helpers.d.ts.map +0 -1
  412. package/dist/utils/typebox-helpers.js +0 -21
  413. package/dist/utils/typebox-helpers.js.map +0 -1
  414. package/dist/utils/validation.d.ts +0 -18
  415. package/dist/utils/validation.d.ts.map +0 -1
  416. package/dist/utils/validation.js +0 -281
  417. package/dist/utils/validation.js.map +0 -1
@@ -0,0 +1,818 @@
1
+ /**
2
+ * aery auth-gateway HTTP server.
3
+ *
4
+ * Accepts any provider-format request (OpenAI chat-completions, Anthropic
5
+ * messages, OpenAI Responses) and dispatches through aery-ai's `streamSimple()`
6
+ * — which handles credential injection, anthropic-beta headers, codex
7
+ * websocket transport, and all the per-provider intricacies. The gateway is
8
+ * pure protocol translation: foreign wire → aery Context → aery-ai stream() →
9
+ * aery events → foreign wire.
10
+ *
11
+ * Endpoints:
12
+ * GET /healthz → unauth; ok + version
13
+ * GET /v1/usage → aggregated provider usage (5-min per-credential cache via AuthStorage)
14
+ * GET /v1/credentials/check → per-credential auth probe (diagnose 401s in a multi-account pool)
15
+ * GET /v1/models → list known models from the registry
16
+ * POST /v1/chat/completions → OpenAI chat-completions in/out
17
+ * POST /v1/messages → Anthropic messages in/out
18
+ * POST /v1/responses → OpenAI Responses in/out
19
+ */
20
+ import { extractRetryHint, logger } from "@aryee337/aery-utils";
21
+ import type { AuthStorage } from "../auth-storage";
22
+ import { Effort } from "../model-thinking";
23
+ import * as aeryNative from "../providers/aery-native-server";
24
+ import * as anthropicMessages from "../providers/anthropic-messages-server";
25
+ import * as openaiChat from "../providers/openai-chat-server";
26
+ import * as openaiResponses from "../providers/openai-responses-server";
27
+ import { isUsageLimitError } from "../rate-limit-utils";
28
+ import { streamSimple } from "../stream";
29
+ import type { Api, AssistantMessageEventStream, Context, Model, SimpleStreamOptions } from "../types";
30
+ import { parseBind } from "../utils/parse-bind";
31
+ import { captureRequestHeaders, corsHeaders, isAuthorized, json, resolvePeer, withCors } from "./http";
32
+ import type {
33
+ AuthGatewayServerHandle,
34
+ AuthGatewayServerOptions,
35
+ AuthGatewayFormatModule as FormatModule,
36
+ AuthGatewayParsedRequest as ParsedFormatRequest,
37
+ } from "./types";
38
+ import { DEFAULT_AUTH_GATEWAY_BIND } from "./types";
39
+
40
+ // ParsedFormatRequest / ParsedFormatOptions / FormatModule come from ./types.
41
+
42
+ export type ModelResolver = (modelId: string) => Model<Api> | undefined;
43
+
44
+ export interface AuthGatewayBootOptions extends AuthGatewayServerOptions {
45
+ /** Source of credentials. Caller wires this to a broker-backed AuthStorage. */
46
+ storage: AuthStorage;
47
+ /**
48
+ * Resolve a client-requested model id to an aery-ai Model. Caller supplies
49
+ * this from a ModelRegistry (lives in `coding-agent` to avoid an inverse
50
+ * dependency in `aery-ai`).
51
+ */
52
+ resolveModel: ModelResolver;
53
+ /** Optional supplier for `/v1/models` listing. Returns the full model array. */
54
+ listModels?: () => Iterable<Model<Api>>;
55
+ }
56
+
57
+ // `parseBind` lives in ../utils/parse-bind so the gateway and broker can't
58
+ // drift on accepted inputs (e.g. empty hostname, IPv6 brackets).
59
+
60
+ const FORMAT_ROUTES: Record<string, { module: FormatModule; label: string }> = {
61
+ "/v1/chat/completions": { module: openaiChat, label: "openai-chat" },
62
+ "/v1/messages": { module: anthropicMessages, label: "anthropic-messages" },
63
+ "/v1/responses": { module: openaiResponses, label: "openai-responses" },
64
+ };
65
+
66
+ // (passthrough fast-path removed — it bypassed aery-ai provider logic, in
67
+ // particular the Anthropic Claude-Code OAuth system-prompt prefix injection.
68
+ // Every request now takes the translate path so credential-specific request
69
+ // shaping always applies.)
70
+
71
+ // Options the caller's wire format may carry but the resolved provider can't
72
+ // honour are dropped silently in `buildStreamOptions`. We used to 400 here
73
+ // (`Unsupported option: temperature for openai-codex-responses`), but every
74
+ // realistic client (llm-git, openai SDK, anthropic SDK) bakes some of these
75
+ // defaults in without knowing which model they'll resolve to. Failing loudly
76
+ // just turned that into per-call config hell. Silent strip is what the
77
+ // upstream provider would do anyway when it ignores extra fields.
78
+
79
+ /**
80
+ * Derive a stable cache identity from the parts of the request that don't
81
+ * change turn-to-turn within a logical conversation: model id, system prompt,
82
+ * tool definitions, and the first message (the conversation seed). Codex-class
83
+ * backends only cache prefixes when an explicit `prompt_cache_key` is set;
84
+ * without one, two requests with the same prefix but different trailing
85
+ * messages don't coalesce. This bridges Anthropic-style clients (which signal
86
+ * caching via `cache_control` markers rather than an opaque key) to Codex's
87
+ * keyed model so cross-protocol caching "just works".
88
+ *
89
+ * Including the first message scopes the key to one logical conversation:
90
+ * two different chats with the same system prompt no longer share a cache
91
+ * bucket and can't trample each other's prefix-tree entries.
92
+ *
93
+ * Anthropic-backed requests ignore `sessionId`; the key is harmless there.
94
+ */
95
+ function deriveSessionId(modelId: string, context: Context): string {
96
+ const parts: string[] = [modelId];
97
+ if (context.systemPrompt && context.systemPrompt.length > 0) {
98
+ parts.push(context.systemPrompt.join("\n\n"));
99
+ }
100
+ if (context.tools && context.tools.length > 0) {
101
+ parts.push(JSON.stringify(context.tools));
102
+ }
103
+ const first = context.messages?.[0];
104
+ if (first) {
105
+ // Strip timestamp / provider metadata so the hash is stable across turns
106
+ // of the same conversation (aery re-stamps every parsed Message). role +
107
+ // content is what's actually on the wire.
108
+ parts.push(JSON.stringify({ role: first.role, content: first.content }));
109
+ }
110
+ const seed = parts.join("\u0000");
111
+ const hex = new Bun.CryptoHasher("sha256").update(seed).digest("hex");
112
+ // Format the leading 128 bits as a v4-shape UUID (8-4-4-4-12). Codex's
113
+ // `normalizeOpenAIResponsesPromptCacheKey` accepts ≤64 chars verbatim, so
114
+ // the 36-char UUID flows through unchanged.
115
+ return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
116
+ }
117
+
118
+ function buildStreamOptions(parsed: ParsedFormatRequest, api: Api, signal: AbortSignal): SimpleStreamOptions {
119
+ const opts: SimpleStreamOptions = { signal };
120
+ const { options } = parsed;
121
+ // Codex backend rejects `temperature` / `top_p` (per-model defaults only),
122
+ // so we drop them silently for that one provider. Every other unsupported
123
+ // option is just ignored by `streamSimple` if the underlying provider
124
+ // doesn't honour it.
125
+ const isCodex = api === "openai-codex-responses";
126
+ if (options.maxOutputTokens !== undefined) opts.maxTokens = options.maxOutputTokens;
127
+ if (options.temperature !== undefined && !isCodex) opts.temperature = options.temperature;
128
+ if (options.topP !== undefined && !isCodex) opts.topP = options.topP;
129
+ if (options.topK !== undefined) opts.topK = options.topK;
130
+ if (options.minP !== undefined) opts.minP = options.minP;
131
+ if (options.stopSequences !== undefined) opts.stopSequences = options.stopSequences;
132
+ if (options.presencePenalty !== undefined) opts.presencePenalty = options.presencePenalty;
133
+ if (options.frequencyPenalty !== undefined) opts.frequencyPenalty = options.frequencyPenalty;
134
+ if (options.repetitionPenalty !== undefined) opts.repetitionPenalty = options.repetitionPenalty;
135
+ if (options.metadata !== undefined) opts.metadata = options.metadata;
136
+ if (options.headers !== undefined) opts.headers = { ...(opts.headers ?? {}), ...options.headers };
137
+ if (options.toolChoice !== undefined) {
138
+ opts.toolChoice =
139
+ typeof options.toolChoice === "object" ? { type: "tool", name: options.toolChoice.name } : options.toolChoice;
140
+ }
141
+ if (options.reasoning !== undefined) opts.reasoning = options.reasoning;
142
+ if (options.disableReasoning !== undefined) opts.disableReasoning = options.disableReasoning;
143
+ if (options.hideThinkingSummary !== undefined) opts.hideThinkingSummary = options.hideThinkingSummary;
144
+ if (options.taskBudget !== undefined) opts.taskBudget = options.taskBudget;
145
+ if (options.serviceTier !== undefined) opts.serviceTier = options.serviceTier;
146
+ if (options.cacheRetention !== undefined) opts.cacheRetention = options.cacheRetention;
147
+ // Client-supplied `prompt_cache_key` wins; otherwise derive a stable
148
+ // key from the model + system + tools so prefix caching engages on
149
+ // Codex-class backends across turns of the same logical conversation.
150
+ const promptCacheKey = options.promptCacheKey ?? deriveSessionId(parsed.modelId, parsed.context);
151
+ opts.promptCacheKey = promptCacheKey;
152
+ opts.sessionId = promptCacheKey;
153
+ if (options.thinkingBudgets) {
154
+ opts.thinkingBudgets = { ...(opts.thinkingBudgets ?? {}), ...options.thinkingBudgets };
155
+ }
156
+ if (options.explicitThinkingBudgetTokens !== undefined) {
157
+ // Mirror Rust's `resolve_thinking_budget`: explicit budget pins onto
158
+ // whichever effort the client requested (or High when unspecified) and
159
+ // ALSO sets the effort so providers that gate on `reasoning` actually
160
+ // surface the budget.
161
+ const effort = options.reasoning ?? Effort.High;
162
+ opts.thinkingBudgets = {
163
+ ...(opts.thinkingBudgets ?? {}),
164
+ [effort]: options.explicitThinkingBudgetTokens,
165
+ };
166
+ opts.reasoning ??= effort;
167
+ }
168
+ // Fields that don't yet have a matching aery-ai `SimpleStreamOptions` slot.
169
+ // Surfaced once in debug logs so they show up when wiring a new provider,
170
+ // but NEVER widened into `options.extra` — every consumer would have to
171
+ // re-implement the typed parse to read them back out.
172
+ // TODO(aery-ai): land first-class fields and replace these blocks.
173
+ if (
174
+ options.parallelToolCalls !== undefined ||
175
+ options.previousResponseId !== undefined ||
176
+ options.seed !== undefined ||
177
+ options.logitBias !== undefined ||
178
+ options.user !== undefined ||
179
+ options.responseFormat !== undefined
180
+ ) {
181
+ logger.debug("auth-gateway dropped unsupported typed options", {
182
+ api,
183
+ parallelToolCalls: options.parallelToolCalls,
184
+ previousResponseId: options.previousResponseId,
185
+ seed: options.seed,
186
+ hasLogitBias: options.logitBias !== undefined,
187
+ user: options.user,
188
+ hasResponseFormat: options.responseFormat !== undefined,
189
+ });
190
+ }
191
+ return opts;
192
+ }
193
+
194
+ /**
195
+ * Classify an upstream / gateway-internal error into a status code and a
196
+ * format-neutral type. The order is intentional:
197
+ *
198
+ * 1. Honour an explicit numeric `status` property on the thrown error.
199
+ * 2. Parse a status code embedded in the message string. Provider errors
200
+ * virtually always carry one (`Google API error (400): …`, `HTTP 429`,
201
+ * `status=503`) and the embedded value is authoritative.
202
+ * 3. Fall through to **word-boundaried** substring heuristics. The old
203
+ * `lower.includes("rate")` test famously matched
204
+ * `GenerateContentRequest`, surfacing every Google 400 as a 429
205
+ * `rate_limit_error`. The patterns here all require boundaries so they
206
+ * don't collide with provider field names.
207
+ */
208
+ export function classifyGatewayError(err: unknown): { status: number; type: string; message: string } {
209
+ const message = err instanceof Error ? err.message : String(err);
210
+
211
+ // 1. Custom aery-ai errors may attach a numeric `status` property.
212
+ const statusProp =
213
+ typeof err === "object" && err !== null && typeof (err as { status?: unknown }).status === "number"
214
+ ? (err as { status: number }).status | 0
215
+ : undefined;
216
+ if (statusProp !== undefined) return bucketStatus(statusProp, message);
217
+
218
+ if (err instanceof Error && err.name === "AbortError") return { status: 499, type: "request_aborted", message };
219
+
220
+ // 2. Status code embedded in the message. Requires a contextual keyword
221
+ // (`HTTP`, `API error`, `status`, …) or a leading `(NNN)` token so we
222
+ // don't trip on incidental three-digit numbers ("took 200ms").
223
+ const embedded = extractEmbeddedStatus(message);
224
+ if (embedded !== undefined) return bucketStatus(embedded, message);
225
+
226
+ // 3. Word-boundaried substring heuristics.
227
+ if (/\baborted\b|\babort signal\b/i.test(message)) {
228
+ return { status: 499, type: "request_aborted", message };
229
+ }
230
+ if (/\b(?:unauthorized|forbidden)\b/i.test(message)) {
231
+ return { status: 401, type: "authentication_error", message };
232
+ }
233
+ if (
234
+ // Match rate-limit phrasings without colliding with
235
+ // `GenerateContentRequest`, `accelerate`, `iterate`, `deprecated`, etc.
236
+ /\brate[- _]?limit(?:s|ed|ing)?\b|\bquota(?:_exceeded| exceeded)?\b|\btoo[- _]many[- _]requests\b/i.test(
237
+ message,
238
+ ) ||
239
+ // Usage-limit phrasings emit no embedded status. Codex friendly text
240
+ // reads "You have hit your ChatGPT usage limit … Try again in ~158
241
+ // min."; aery-ai's central `isUsageLimitError` already encodes every
242
+ // known provider variant, so reuse it instead of forking the regex.
243
+ // Without this branch the classifier falls through to the default
244
+ // 502/upstream_error, which is what callers were seeing when their
245
+ // account hit its cap.
246
+ isUsageLimitError(message)
247
+ ) {
248
+ return { status: 429, type: "rate_limit_error", message };
249
+ }
250
+ if (/\b(?:unsupported|invalid_request|invalid request|bad request|malformed)\b/i.test(message)) {
251
+ return { status: 400, type: "invalid_request_error", message };
252
+ }
253
+ return { status: 502, type: "upstream_error", message };
254
+ }
255
+
256
+ function bucketStatus(status: number, message: string): { status: number; type: string; message: string } {
257
+ if (status === 401 || status === 403) return { status, type: "authentication_error", message };
258
+ if (status === 429) return { status, type: "rate_limit_error", message };
259
+ if (status >= 400 && status < 500) return { status, type: "invalid_request_error", message };
260
+ if (status >= 500) return { status, type: "upstream_error", message };
261
+ return { status: 502, type: "upstream_error", message };
262
+ }
263
+
264
+ /**
265
+ * Pull a status code from common error-message shapes. Returns undefined when
266
+ * no contextual keyword is present, so we never guess at incidental numbers.
267
+ */
268
+ function extractEmbeddedStatus(message: string): number | undefined {
269
+ // `Google API error (400)`, `OpenAI API error (429): …`, `(503)`
270
+ // `HTTP 429: too many requests`
271
+ // `status: 503`, `status_code=429`, `status=400`
272
+ const re = /(?:\bHTTP\b|\bAPI error\b|\bstatus(?:[- _]?code)?\b)\s*[:=]?\s*\(?\s*(\d{3})\b|\((\d{3})\)/i;
273
+ const m = message.match(re);
274
+ if (!m) return undefined;
275
+ const raw = m[1] ?? m[2];
276
+ if (!raw) return undefined;
277
+ const code = Number.parseInt(raw, 10);
278
+ return Number.isFinite(code) && code >= 100 && code < 600 ? code : undefined;
279
+ }
280
+
281
+ /**
282
+ * Hook fired by {@link streamSimple} when the upstream request fails in a
283
+ * way that's rotatable — today that's HTTP 401 (credential is bad) and
284
+ * usage-limit phrasing matched by {@link isUsageLimitError} (Codex's
285
+ * `usage_limit_reached`, Anthropic's `usage_limit_reached`, Google's
286
+ * `resource_exhausted`, …). The two cases need different storage actions:
287
+ *
288
+ * - **usage-limit** → {@link AuthStorage.markUsageLimitReached}. Marks just
289
+ * the current session's credential as temporarily blocked (honouring
290
+ * `retry-after` / `resets_at` hints when present) and returns `true` only
291
+ * when a sibling credential is still available. Burning the credential
292
+ * with `invalidateCredentialMatching` here would orphan accounts whose
293
+ * reset window is several hours away — exactly the bug this helper exists
294
+ * to avoid.
295
+ * - **auth-failure** → {@link AuthStorage.invalidateCredentialMatching}.
296
+ * Suspect/delete the row so it doesn't get re-picked next request.
297
+ *
298
+ * In both branches we return the next `getApiKey` result (sticky on the
299
+ * same `sessionId`) so streamSimple can transparently retry the pre-emit
300
+ * failure with a fresh credential. Returning `undefined` aborts the retry
301
+ * and surfaces the original error to the caller.
302
+ */
303
+ async function refreshGatewayApiKeyAfterAuthError(
304
+ storage: AuthStorage,
305
+ model: Model<Api>,
306
+ sessionId: string,
307
+ provider: string,
308
+ oldKey: string,
309
+ error: unknown,
310
+ signal: AbortSignal,
311
+ format: string,
312
+ peer: string,
313
+ ): Promise<string | undefined> {
314
+ const message = error instanceof Error ? error.message : String(error);
315
+ if (isUsageLimitError(message)) {
316
+ const retryAfterMs = extractRetryHint(undefined, message);
317
+ const switched = await storage.markUsageLimitReached(provider, sessionId, {
318
+ retryAfterMs,
319
+ baseUrl: model.baseUrl,
320
+ signal,
321
+ });
322
+ logger.debug("auth-gateway retrying provider request after usage-limit block", {
323
+ format,
324
+ provider,
325
+ peer,
326
+ switched,
327
+ retryAfterMs,
328
+ error: message,
329
+ });
330
+ if (!switched) return undefined;
331
+ return storage.getApiKey(provider, sessionId, { modelId: model.id, signal });
332
+ }
333
+ await storage.invalidateCredentialMatching(provider, oldKey, { sessionId, signal });
334
+ logger.debug("auth-gateway retrying provider request after credential invalidation", {
335
+ format,
336
+ provider,
337
+ peer,
338
+ error: message,
339
+ });
340
+ return storage.getApiKey(provider, sessionId, { modelId: model.id, signal });
341
+ }
342
+
343
+ function clientClosedResponse(route: { module: FormatModule }): Response {
344
+ return route.module.formatError(499, "request_aborted", "client closed request");
345
+ }
346
+
347
+ function mirrorRequestAbort(req: Request): AbortController {
348
+ const controller = new AbortController();
349
+ if (req.signal.aborted) {
350
+ controller.abort(req.signal.reason);
351
+ } else {
352
+ req.signal.addEventListener("abort", () => controller.abort(req.signal.reason), { once: true });
353
+ }
354
+ return controller;
355
+ }
356
+
357
+ // (handlePassthrough removed — see note above.)
358
+
359
+ async function handleFormatEndpoint(
360
+ route: { module: FormatModule; label: string },
361
+ bootOpts: AuthGatewayBootOptions,
362
+ req: Request,
363
+ peer: string,
364
+ ): Promise<Response> {
365
+ const controller = mirrorRequestAbort(req);
366
+ if (controller.signal.aborted) return clientClosedResponse(route);
367
+
368
+ let body: unknown;
369
+ try {
370
+ body = await req.json();
371
+ } catch (error) {
372
+ if (controller.signal.aborted) return clientClosedResponse(route);
373
+ return route.module.formatError(400, "invalid_request_error", `Invalid JSON body: ${String(error)}`);
374
+ }
375
+ if (controller.signal.aborted) return clientClosedResponse(route);
376
+
377
+ // All three supported wire formats put the model id on a top-level `model`
378
+ // field. Read it without running the full strict schema so the route can
379
+ // produce a coherent error envelope when the model id is missing.
380
+ const modelId =
381
+ typeof body === "object" && body !== null && typeof (body as { model?: unknown }).model === "string"
382
+ ? (body as { model: string }).model
383
+ : undefined;
384
+ if (!modelId) {
385
+ return route.module.formatError(400, "invalid_request_error", "Missing top-level `model` field");
386
+ }
387
+
388
+ const model = bootOpts.resolveModel(modelId);
389
+ if (!model) {
390
+ return route.module.formatError(404, "invalid_request_error", `Unknown model: ${modelId}`);
391
+ }
392
+
393
+ // Parse the wire-format request BEFORE resolving the credential so we
394
+ // have a stable per-conversation `sessionId` to thread into AuthStorage.
395
+ // Sticky-credential tracking and `markUsageLimitReached` both key off
396
+ // this id; without it `getApiKey` would re-roundrobin every request
397
+ // and `markUsageLimitReached` would no-op (it can only mark the
398
+ // credential it last handed out to that session).
399
+ let parsed: ParsedFormatRequest;
400
+ try {
401
+ parsed = route.module.parseRequest(body, req.headers);
402
+ } catch (error) {
403
+ if (controller.signal.aborted) return clientClosedResponse(route);
404
+ const message = error instanceof Error ? error.message : String(error);
405
+ return route.module.formatError(400, "invalid_request_error", message);
406
+ }
407
+ // Merge gateway-captured passthrough headers under the parser's own
408
+ // captures. Parsers that set `options.headers` themselves win (they may
409
+ // have stripped or normalized values); the gateway's allow-list fills in
410
+ // anything they didn't touch.
411
+ {
412
+ const captured = captureRequestHeaders(req.headers);
413
+ parsed.options.headers = { ...captured, ...(parsed.options.headers ?? {}) };
414
+ }
415
+ if (controller.signal.aborted) return clientClosedResponse(route);
416
+
417
+ // Sticky credential id: honour the client's `prompt_cache_key` when
418
+ // supplied (so external session ids align), otherwise derive from
419
+ // modelId + system + tools + first message. Mirrored into
420
+ // streamOpts.sessionId / promptCacheKey by `buildStreamOptions`.
421
+ const sessionId = parsed.options.promptCacheKey ?? deriveSessionId(parsed.modelId, parsed.context);
422
+ parsed.options.promptCacheKey ??= sessionId;
423
+
424
+ // aery-ai's stream() does NOT consult AuthStorage — the caller (us) is
425
+ // expected to resolve the credential and pass it as `options.apiKey`.
426
+ // For OAuth providers this returns the access token (refreshed via the
427
+ // broker override on AuthStorage when needed).
428
+ let apiKey: string | undefined;
429
+ try {
430
+ apiKey = await bootOpts.storage.getApiKey(model.provider, sessionId, {
431
+ modelId: model.id,
432
+ signal: controller.signal,
433
+ });
434
+ } catch (error) {
435
+ if (controller.signal.aborted) return clientClosedResponse(route);
436
+ const classified = classifyGatewayError(error);
437
+ logger.warn("auth-gateway getApiKey threw", { provider: model.provider, peer, error: classified.message });
438
+ return route.module.formatError(classified.status, classified.type, classified.message);
439
+ }
440
+ if (controller.signal.aborted) return clientClosedResponse(route);
441
+ if (!apiKey) {
442
+ return route.module.formatError(
443
+ 401,
444
+ "authentication_error",
445
+ `No credential available for provider ${model.provider}`,
446
+ );
447
+ }
448
+
449
+ const streamOpts = buildStreamOptions(parsed, model.api, controller.signal);
450
+ streamOpts.apiKey = apiKey;
451
+ streamOpts.onAuthError = (provider, oldKey, error) =>
452
+ refreshGatewayApiKeyAfterAuthError(
453
+ bootOpts.storage,
454
+ model,
455
+ sessionId,
456
+ provider,
457
+ oldKey,
458
+ error,
459
+ controller.signal,
460
+ route.label,
461
+ peer,
462
+ );
463
+
464
+ logger.info("auth-gateway request", {
465
+ format: route.label,
466
+ model: parsed.modelId,
467
+ resolvedProvider: model.provider,
468
+ resolvedModel: model.id,
469
+ stream: parsed.stream,
470
+ peer,
471
+ });
472
+
473
+ let events: AssistantMessageEventStream;
474
+ try {
475
+ if (controller.signal.aborted) return clientClosedResponse(route);
476
+ events = streamSimple(model, parsed.context, streamOpts);
477
+ } catch (error) {
478
+ const classified = classifyGatewayError(error);
479
+ logger.warn("auth-gateway streamSimple threw", { format: route.label, error: classified.message, peer });
480
+ return route.module.formatError(classified.status, classified.type, classified.message);
481
+ }
482
+
483
+ if (!parsed.stream) {
484
+ try {
485
+ if (controller.signal.aborted) return clientClosedResponse(route);
486
+ const message = await events.result();
487
+ if (message.stopReason === "aborted" || message.stopReason === "error") {
488
+ const errorMessage =
489
+ message.errorMessage ??
490
+ (message.stopReason === "aborted" ? "Request was aborted" : "Upstream request failed");
491
+ logger.warn("auth-gateway non-streaming failed", {
492
+ format: route.label,
493
+ reason: message.stopReason,
494
+ error: errorMessage,
495
+ peer,
496
+ });
497
+ if (message.stopReason === "aborted") {
498
+ return route.module.formatError(499, "request_aborted", errorMessage);
499
+ }
500
+ const classified = classifyGatewayError(new Error(errorMessage));
501
+ return route.module.formatError(classified.status, classified.type, errorMessage);
502
+ }
503
+ return json(200, route.module.encodeResponse(message, parsed.modelId));
504
+ } catch (error) {
505
+ if (controller.signal.aborted) return clientClosedResponse(route);
506
+ const classified = classifyGatewayError(error);
507
+ logger.warn("auth-gateway non-streaming aborted", {
508
+ format: route.label,
509
+ error: classified.message,
510
+ peer,
511
+ });
512
+ return route.module.formatError(classified.status, classified.type, classified.message);
513
+ }
514
+ }
515
+ if (controller.signal.aborted) return clientClosedResponse(route);
516
+
517
+ const sseStream = route.module.encodeStream(events, parsed.modelId, parsed.options);
518
+ return new Response(sseStream, {
519
+ status: 200,
520
+ headers: {
521
+ "Content-Type": "text/event-stream; charset=utf-8",
522
+ "Cache-Control": "no-cache",
523
+ Connection: "keep-alive",
524
+ // Disable proxy buffering (nginx and ingress controllers honor this).
525
+ // Without it the SSE stream gets held until the buffer flushes, which
526
+ // stalls the long-thinking-budget calls we exist to support.
527
+ "X-Accel-Buffering": "no",
528
+ },
529
+ });
530
+ }
531
+
532
+ /**
533
+ * Aery-native fast path: `POST /v1/aery/stream`. Accepts the canonical aery-ai
534
+ * `Context` directly (no wire-format round-trip) and emits a bandwidth-shrunk
535
+ * event stream matching `aery-agent`'s `streamProxy`. Skips the OpenAI /
536
+ * Anthropic / Responses translation layers — those exist to bridge foreign
537
+ * SDKs (llm-git, anthropic-sdk, openai-sdk), and bridging back to aery-native
538
+ * just to bridge forward again is wasted work.
539
+ *
540
+ * Every other gateway concern (bearer auth, model resolve, credential fetch,
541
+ * abort mirroring, codex temperature/topP strip, prefix-cache key derivation,
542
+ * Claude-Code OAuth shaping inside `streamSimple`) still applies — only
543
+ * `parseRequest`/`encodeResponse`/`encodeStream` differ from the format-endpoint
544
+ * path.
545
+ */
546
+ async function handleAeryNative(bootOpts: AuthGatewayBootOptions, req: Request, peer: string): Promise<Response> {
547
+ const controller = mirrorRequestAbort(req);
548
+ const aborted = (): Response => aeryNative.formatError(499, "request_aborted", "client closed request");
549
+ if (controller.signal.aborted) return aborted();
550
+
551
+ let body: unknown;
552
+ try {
553
+ body = await req.json();
554
+ } catch (error) {
555
+ if (controller.signal.aborted) return aborted();
556
+ return aeryNative.formatError(400, "invalid_request_error", `Invalid JSON body: ${String(error)}`);
557
+ }
558
+ if (controller.signal.aborted) return aborted();
559
+
560
+ let parsed: aeryNative.AeryNativeParsedRequest;
561
+ try {
562
+ parsed = aeryNative.parseRequest(body, req.headers);
563
+ } catch (error) {
564
+ if (controller.signal.aborted) return aborted();
565
+ const message = error instanceof Error ? error.message : String(error);
566
+ return aeryNative.formatError(400, "invalid_request_error", message);
567
+ }
568
+
569
+ const model = bootOpts.resolveModel(parsed.modelId);
570
+ if (!model) {
571
+ return aeryNative.formatError(404, "invalid_request_error", `Unknown model: ${parsed.modelId}`);
572
+ }
573
+ // Aery-native already parsed `streamOpts.sessionId` (when set by the
574
+ // client); fall back to the derived key so credential-stickiness lines
575
+ // up with cache-prefix stickiness — same identity used for both means
576
+ // the next turn of this conversation reuses the same credential until
577
+ // it hits a usage cap, then markUsageLimitReached can hand off.
578
+ const sessionId = parsed.options.sessionId ?? deriveSessionId(parsed.modelId, parsed.context);
579
+ parsed.options.sessionId ??= sessionId;
580
+
581
+ let apiKey: string | undefined;
582
+ try {
583
+ apiKey = await bootOpts.storage.getApiKey(model.provider, sessionId, {
584
+ modelId: model.id,
585
+ signal: controller.signal,
586
+ });
587
+ } catch (error) {
588
+ if (controller.signal.aborted) return aborted();
589
+ const classified = classifyGatewayError(error);
590
+ logger.warn("auth-gateway getApiKey threw", { provider: model.provider, peer, error: classified.message });
591
+ return aeryNative.formatError(classified.status, classified.type, classified.message);
592
+ }
593
+ if (controller.signal.aborted) return aborted();
594
+ if (!apiKey) {
595
+ return aeryNative.formatError(
596
+ 401,
597
+ "authentication_error",
598
+ `No credential available for provider ${model.provider}`,
599
+ );
600
+ }
601
+
602
+ // Build the SimpleStreamOptions actually handed to `streamSimple`. We
603
+ // trust the client's options (already allow-listed by `parseRequest`) and
604
+ // only inject server-controlled fields. The codex temperature/topP strip
605
+ // matches `buildStreamOptions` — Codex rejects them with a 400.
606
+ const streamOpts: SimpleStreamOptions = { ...parsed.options, apiKey, signal: controller.signal };
607
+ streamOpts.onAuthError = (provider, oldKey, error) =>
608
+ refreshGatewayApiKeyAfterAuthError(
609
+ bootOpts.storage,
610
+ model,
611
+ sessionId,
612
+ provider,
613
+ oldKey,
614
+ error,
615
+ controller.signal,
616
+ "aery-native",
617
+ peer,
618
+ );
619
+ if (model.api === "openai-codex-responses") {
620
+ delete streamOpts.temperature;
621
+ delete streamOpts.topP;
622
+ }
623
+ // Merge gateway-captured passthrough headers under the client's own
624
+ // headers — the client's values win when they collide.
625
+ const captured = captureRequestHeaders(req.headers);
626
+ streamOpts.headers = { ...captured, ...(streamOpts.headers ?? {}) };
627
+ streamOpts.sessionId ??= sessionId;
628
+
629
+ logger.info("auth-gateway request", {
630
+ format: "aery-native",
631
+ model: parsed.modelId,
632
+ resolvedProvider: model.provider,
633
+ resolvedModel: model.id,
634
+ stream: parsed.stream,
635
+ peer,
636
+ });
637
+
638
+ let events: AssistantMessageEventStream;
639
+ try {
640
+ if (controller.signal.aborted) return aborted();
641
+ events = streamSimple(model, parsed.context, streamOpts);
642
+ } catch (error) {
643
+ const classified = classifyGatewayError(error);
644
+ logger.warn("auth-gateway streamSimple threw", { format: "aery-native", error: classified.message, peer });
645
+ return aeryNative.formatError(classified.status, classified.type, classified.message);
646
+ }
647
+
648
+ if (!parsed.stream) {
649
+ try {
650
+ if (controller.signal.aborted) return aborted();
651
+ const message = await events.result();
652
+ if (message.stopReason === "aborted" || message.stopReason === "error") {
653
+ const errorMessage =
654
+ message.errorMessage ??
655
+ (message.stopReason === "aborted" ? "Request was aborted" : "Upstream request failed");
656
+ logger.warn("auth-gateway non-streaming failed", {
657
+ format: "aery-native",
658
+ reason: message.stopReason,
659
+ error: errorMessage,
660
+ peer,
661
+ });
662
+ if (message.stopReason === "aborted") {
663
+ return aeryNative.formatError(499, "request_aborted", errorMessage);
664
+ }
665
+ const classified = classifyGatewayError(new Error(errorMessage));
666
+ return aeryNative.formatError(classified.status, classified.type, errorMessage);
667
+ }
668
+ return json(200, { message });
669
+ } catch (error) {
670
+ if (controller.signal.aborted) return aborted();
671
+ const classified = classifyGatewayError(error);
672
+ logger.warn("auth-gateway non-streaming aborted", { format: "aery-native", error: classified.message, peer });
673
+ return aeryNative.formatError(classified.status, classified.type, classified.message);
674
+ }
675
+ }
676
+ if (controller.signal.aborted) return aborted();
677
+
678
+ const sseStream = aeryNative.encodeStream(events);
679
+ return new Response(sseStream, {
680
+ status: 200,
681
+ headers: {
682
+ "Content-Type": "text/event-stream; charset=utf-8",
683
+ "Cache-Control": "no-cache",
684
+ Connection: "keep-alive",
685
+ "X-Accel-Buffering": "no",
686
+ },
687
+ });
688
+ }
689
+
690
+ /**
691
+ * Snapshot of `GET /v1/usage` — `fetchUsageReports` already caches reports at
692
+ * a 5-minute per-credential TTL (with jitter, plus last-good fallback on
693
+ * failure) inside `AuthStorage`, so this handler is a thin wrapper that
694
+ * surfaces the same data to HTTP callers (notably the macOS usage widget).
695
+ */
696
+ async function handleUsage(storage: AuthStorage, signal: AbortSignal): Promise<Response> {
697
+ const reports = (await storage.fetchUsageReports?.({ signal })) ?? [];
698
+ // Drop the heavy provider-specific `raw` payload — UI consumers only need
699
+ // `limits` + `metadata`. Match the broker's `/v1/usage` shape so a single
700
+ // client struct (Swift widget, llm-git, ...) works against either endpoint.
701
+ const trimmed = reports.map(({ raw: _raw, ...rest }) => rest);
702
+ return json(200, { generatedAt: Date.now(), reports: trimmed });
703
+ }
704
+
705
+ /**
706
+ * Per-credential health probe surfaced on `GET /v1/credentials/check`. Tells
707
+ * the caller exactly which row in their broker is producing 401s — the
708
+ * aggregate `/v1/usage` endpoint silently drops failed credentials, which is
709
+ * the wrong shape when you're diagnosing auth.
710
+ *
711
+ * The probe is sequential (one credential at a time) to avoid synchronized
712
+ * N-account fan-out tripping per-IP rate limits on provider `/usage`
713
+ * endpoints. For multi-account pools that's the difference between getting
714
+ * a clean diagnosis and getting a 429 storm.
715
+ */
716
+ async function handleCredentialsCheck(storage: AuthStorage, signal: AbortSignal): Promise<Response> {
717
+ const credentials = await storage.checkCredentials({ signal });
718
+ return json(200, { generatedAt: Date.now(), credentials });
719
+ }
720
+
721
+ function handleModelsList(opts: AuthGatewayBootOptions): Response {
722
+ const list = opts.listModels ? Array.from(opts.listModels()) : [];
723
+ const data = list.map(model => ({
724
+ id: model.id,
725
+ object: "model" as const,
726
+ owned_by: model.provider,
727
+ api: model.api,
728
+ }));
729
+ return json(200, { object: "list", data });
730
+ }
731
+
732
+ export function startAuthGateway(opts: AuthGatewayBootOptions): AuthGatewayServerHandle {
733
+ const bind = parseBind(opts.bind ?? DEFAULT_AUTH_GATEWAY_BIND);
734
+ const tokens = new Set<string>(opts.bearerTokens);
735
+ const version = opts.version;
736
+
737
+ const server = Bun.serve({
738
+ hostname: bind.hostname,
739
+ port: bind.port,
740
+ fetch: async (req): Promise<Response> => {
741
+ const url = new URL(req.url);
742
+ const pathname = url.pathname;
743
+ const peer = resolvePeer(req);
744
+ // CORS preflight is always answered without auth — browsers send
745
+ // preflights pre-authentication and a 401 here breaks the actual
746
+ // request before the bearer is ever attached.
747
+ if (req.method === "OPTIONS") {
748
+ return new Response(null, { status: 204, headers: corsHeaders(req) });
749
+ }
750
+ try {
751
+ if (req.method === "GET" && pathname === "/healthz") {
752
+ return withCors(json(200, { ok: true, version }), req);
753
+ }
754
+ if (!isAuthorized(req, tokens)) {
755
+ logger.info("auth-gateway request unauthorized", { method: req.method, path: pathname, peer });
756
+ return withCors(json(401, { error: "unauthorized" }), req);
757
+ }
758
+
759
+ // Aggregated usage — backed by AuthStorage's 5-min per-credential cache.
760
+ // Same shape as the broker's `/v1/usage`, so widget/llm-git speak to either with the
761
+ // same client struct.
762
+ if (req.method === "GET" && pathname === "/v1/usage") {
763
+ return withCors(await handleUsage(opts.storage, req.signal), req);
764
+ }
765
+
766
+ // Per-credential auth probe — diagnoses which row in a multi-account
767
+ // pool is producing 401s. Aggregated `/v1/usage` silently drops failed
768
+ // credentials, so we need a separate endpoint that captures errors.
769
+ if (req.method === "GET" && pathname === "/v1/credentials/check") {
770
+ return withCors(await handleCredentialsCheck(opts.storage, req.signal), req);
771
+ }
772
+
773
+ // Provider-format dispatch.
774
+ const formatRoute = FORMAT_ROUTES[pathname];
775
+ if (formatRoute && req.method === "POST") {
776
+ return withCors(await handleFormatEndpoint(formatRoute, opts, req, peer), req);
777
+ }
778
+
779
+ // Aery-native fast path. Same auth + provider plumbing as the
780
+ // foreign-wire routes, just without the wire-format translation.
781
+ if (req.method === "POST" && pathname === "/v1/aery/stream") {
782
+ return withCors(await handleAeryNative(opts, req, peer), req);
783
+ }
784
+
785
+ // Model catalog.
786
+ if (req.method === "GET" && pathname === "/v1/models") {
787
+ return withCors(handleModelsList(opts), req);
788
+ }
789
+
790
+ // Route-table miss: no format module to defer to, so we emit a
791
+ // plain JSON 404 rather than guessing at a protocol-specific envelope.
792
+ return withCors(json(404, { error: `No route: ${req.method} ${pathname}` }), req);
793
+ } catch (error) {
794
+ logger.error("auth-gateway handler crashed", {
795
+ method: req.method,
796
+ path: pathname,
797
+ peer,
798
+ error: String(error),
799
+ });
800
+ return withCors(json(500, { error: "internal error" }), req);
801
+ }
802
+ },
803
+ // Max-out Bun's idle timeout. Long thinking-budget calls can sit idle
804
+ // for minutes before the first token arrives; the default kills them.
805
+ idleTimeout: 255,
806
+ });
807
+
808
+ const boundHost = server.hostname ?? bind.hostname;
809
+ const boundPort = server.port ?? bind.port;
810
+ return {
811
+ url: `http://${boundHost}:${boundPort}`,
812
+ port: boundPort,
813
+ hostname: boundHost,
814
+ close: async () => {
815
+ server.stop(true);
816
+ },
817
+ };
818
+ }