@gajae-code/ai 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (349) hide show
  1. package/CHANGELOG.md +2644 -0
  2. package/README.md +1181 -0
  3. package/dist/types/api-registry.d.ts +30 -0
  4. package/dist/types/auth-broker/client.d.ts +66 -0
  5. package/dist/types/auth-broker/index.d.ts +5 -0
  6. package/dist/types/auth-broker/refresher.d.ts +25 -0
  7. package/dist/types/auth-broker/remote-store.d.ts +96 -0
  8. package/dist/types/auth-broker/server.d.ts +32 -0
  9. package/dist/types/auth-broker/types.d.ts +105 -0
  10. package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
  11. package/dist/types/auth-gateway/http.d.ts +39 -0
  12. package/dist/types/auth-gateway/index.d.ts +3 -0
  13. package/dist/types/auth-gateway/server.d.ts +17 -0
  14. package/dist/types/auth-gateway/types.d.ts +115 -0
  15. package/dist/types/auth-storage.d.ts +641 -0
  16. package/dist/types/cli.d.ts +2 -0
  17. package/dist/types/index.d.ts +49 -0
  18. package/dist/types/model-cache.d.ts +17 -0
  19. package/dist/types/model-manager.d.ts +62 -0
  20. package/dist/types/model-thinking.d.ts +71 -0
  21. package/dist/types/models.d.ts +12 -0
  22. package/dist/types/provider-details.d.ts +24 -0
  23. package/dist/types/provider-models/bundled-references.d.ts +4 -0
  24. package/dist/types/provider-models/descriptors.d.ts +48 -0
  25. package/dist/types/provider-models/google.d.ts +20 -0
  26. package/dist/types/provider-models/index.d.ts +5 -0
  27. package/dist/types/provider-models/ollama.d.ts +7 -0
  28. package/dist/types/provider-models/openai-compat.d.ts +237 -0
  29. package/dist/types/provider-models/special.d.ts +16 -0
  30. package/dist/types/providers/amazon-bedrock.d.ts +36 -0
  31. package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
  32. package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
  33. package/dist/types/providers/anthropic.d.ts +188 -0
  34. package/dist/types/providers/aws-credentials.d.ts +43 -0
  35. package/dist/types/providers/aws-eventstream.d.ts +38 -0
  36. package/dist/types/providers/aws-sigv4.d.ts +55 -0
  37. package/dist/types/providers/azure-openai-responses.d.ts +15 -0
  38. package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
  39. package/dist/types/providers/cursor.d.ts +42 -0
  40. package/dist/types/providers/error-message.d.ts +27 -0
  41. package/dist/types/providers/github-copilot-headers.d.ts +40 -0
  42. package/dist/types/providers/gitlab-duo.d.ts +27 -0
  43. package/dist/types/providers/google-auth.d.ts +24 -0
  44. package/dist/types/providers/google-gemini-cli.d.ts +72 -0
  45. package/dist/types/providers/google-gemini-headers.d.ts +18 -0
  46. package/dist/types/providers/google-shared.d.ts +163 -0
  47. package/dist/types/providers/google-types.d.ts +138 -0
  48. package/dist/types/providers/google-vertex.d.ts +7 -0
  49. package/dist/types/providers/google.d.ts +4 -0
  50. package/dist/types/providers/grammar.d.ts +1 -0
  51. package/dist/types/providers/kimi.d.ts +27 -0
  52. package/dist/types/providers/mock.d.ts +175 -0
  53. package/dist/types/providers/ollama.d.ts +6 -0
  54. package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
  55. package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
  56. package/dist/types/providers/openai-chat-server.d.ts +16 -0
  57. package/dist/types/providers/openai-codex/constants.d.ts +26 -0
  58. package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
  59. package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
  60. package/dist/types/providers/openai-codex-responses.d.ts +67 -0
  61. package/dist/types/providers/openai-completions-compat.d.ts +25 -0
  62. package/dist/types/providers/openai-completions.d.ts +33 -0
  63. package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
  64. package/dist/types/providers/openai-responses-server.d.ts +17 -0
  65. package/dist/types/providers/openai-responses-shared.d.ts +89 -0
  66. package/dist/types/providers/openai-responses.d.ts +32 -0
  67. package/dist/types/providers/pi-native-client.d.ts +13 -0
  68. package/dist/types/providers/pi-native-server.d.ts +68 -0
  69. package/dist/types/providers/register-builtins.d.ts +31 -0
  70. package/dist/types/providers/synthetic.d.ts +26 -0
  71. package/dist/types/providers/transform-messages.d.ts +12 -0
  72. package/dist/types/providers/vision-guard.d.ts +8 -0
  73. package/dist/types/rate-limit-utils.d.ts +19 -0
  74. package/dist/types/stream.d.ts +24 -0
  75. package/dist/types/types.d.ts +746 -0
  76. package/dist/types/usage/claude.d.ts +3 -0
  77. package/dist/types/usage/gemini.d.ts +2 -0
  78. package/dist/types/usage/github-copilot.d.ts +7 -0
  79. package/dist/types/usage/google-antigravity.d.ts +2 -0
  80. package/dist/types/usage/kimi.d.ts +2 -0
  81. package/dist/types/usage/minimax-code.d.ts +2 -0
  82. package/dist/types/usage/openai-codex.d.ts +3 -0
  83. package/dist/types/usage/shared.d.ts +1 -0
  84. package/dist/types/usage/zai.d.ts +2 -0
  85. package/dist/types/usage.d.ts +258 -0
  86. package/dist/types/utils/abort.d.ts +19 -0
  87. package/dist/types/utils/anthropic-auth.d.ts +31 -0
  88. package/dist/types/utils/discovery/antigravity.d.ts +61 -0
  89. package/dist/types/utils/discovery/codex.d.ts +38 -0
  90. package/dist/types/utils/discovery/cursor.d.ts +23 -0
  91. package/dist/types/utils/discovery/gemini.d.ts +25 -0
  92. package/dist/types/utils/discovery/index.d.ts +4 -0
  93. package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
  94. package/dist/types/utils/event-stream.d.ts +28 -0
  95. package/dist/types/utils/fireworks-model-id.d.ts +10 -0
  96. package/dist/types/utils/foundry.d.ts +1 -0
  97. package/dist/types/utils/h2-fetch.d.ts +22 -0
  98. package/dist/types/utils/http-inspector.d.ts +31 -0
  99. package/dist/types/utils/idle-iterator.d.ts +67 -0
  100. package/dist/types/utils/json-parse.d.ts +10 -0
  101. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
  102. package/dist/types/utils/oauth/anthropic.d.ts +22 -0
  103. package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
  104. package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
  105. package/dist/types/utils/oauth/callback-server.d.ts +57 -0
  106. package/dist/types/utils/oauth/cerebras.d.ts +1 -0
  107. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
  108. package/dist/types/utils/oauth/cursor.d.ts +15 -0
  109. package/dist/types/utils/oauth/deepseek.d.ts +10 -0
  110. package/dist/types/utils/oauth/firepass.d.ts +1 -0
  111. package/dist/types/utils/oauth/fireworks.d.ts +1 -0
  112. package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
  113. package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
  114. package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
  115. package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
  116. package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
  117. package/dist/types/utils/oauth/huggingface.d.ts +19 -0
  118. package/dist/types/utils/oauth/index.d.ts +38 -0
  119. package/dist/types/utils/oauth/kagi.d.ts +17 -0
  120. package/dist/types/utils/oauth/kilo.d.ts +5 -0
  121. package/dist/types/utils/oauth/kimi.d.ts +21 -0
  122. package/dist/types/utils/oauth/litellm.d.ts +18 -0
  123. package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
  124. package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
  125. package/dist/types/utils/oauth/moonshot.d.ts +1 -0
  126. package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
  127. package/dist/types/utils/oauth/nvidia.d.ts +18 -0
  128. package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
  129. package/dist/types/utils/oauth/ollama.d.ts +18 -0
  130. package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
  131. package/dist/types/utils/oauth/opencode.d.ts +18 -0
  132. package/dist/types/utils/oauth/parallel.d.ts +17 -0
  133. package/dist/types/utils/oauth/perplexity.d.ts +9 -0
  134. package/dist/types/utils/oauth/pkce.d.ts +8 -0
  135. package/dist/types/utils/oauth/qianfan.d.ts +17 -0
  136. package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
  137. package/dist/types/utils/oauth/synthetic.d.ts +1 -0
  138. package/dist/types/utils/oauth/tavily.d.ts +17 -0
  139. package/dist/types/utils/oauth/together.d.ts +1 -0
  140. package/dist/types/utils/oauth/types.d.ts +44 -0
  141. package/dist/types/utils/oauth/venice.d.ts +18 -0
  142. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
  143. package/dist/types/utils/oauth/vllm.d.ts +16 -0
  144. package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
  145. package/dist/types/utils/oauth/zai.d.ts +18 -0
  146. package/dist/types/utils/oauth/zenmux.d.ts +1 -0
  147. package/dist/types/utils/overflow.d.ts +54 -0
  148. package/dist/types/utils/parse-bind.d.ts +23 -0
  149. package/dist/types/utils/provider-response.d.ts +3 -0
  150. package/dist/types/utils/retry-after.d.ts +3 -0
  151. package/dist/types/utils/retry.d.ts +26 -0
  152. package/dist/types/utils/schema/adapt.d.ts +24 -0
  153. package/dist/types/utils/schema/compatibility.d.ts +30 -0
  154. package/dist/types/utils/schema/dereference.d.ts +11 -0
  155. package/dist/types/utils/schema/draft.d.ts +10 -0
  156. package/dist/types/utils/schema/equality.d.ts +4 -0
  157. package/dist/types/utils/schema/fields.d.ts +49 -0
  158. package/dist/types/utils/schema/index.d.ts +13 -0
  159. package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
  160. package/dist/types/utils/schema/meta-validator.d.ts +2 -0
  161. package/dist/types/utils/schema/normalize.d.ts +93 -0
  162. package/dist/types/utils/schema/spill.d.ts +8 -0
  163. package/dist/types/utils/schema/stamps.d.ts +25 -0
  164. package/dist/types/utils/schema/types.d.ts +4 -0
  165. package/dist/types/utils/schema/wire.d.ts +54 -0
  166. package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
  167. package/dist/types/utils/sse-debug.d.ts +10 -0
  168. package/dist/types/utils/tool-call-healing.d.ts +71 -0
  169. package/dist/types/utils/tool-choice.d.ts +50 -0
  170. package/dist/types/utils/validation.d.ts +17 -0
  171. package/dist/types/utils.d.ts +28 -0
  172. package/package.json +146 -0
  173. package/src/api-registry.ts +96 -0
  174. package/src/auth-broker/client.ts +358 -0
  175. package/src/auth-broker/index.ts +5 -0
  176. package/src/auth-broker/refresher.ts +127 -0
  177. package/src/auth-broker/remote-store.ts +623 -0
  178. package/src/auth-broker/server.ts +644 -0
  179. package/src/auth-broker/types.ts +127 -0
  180. package/src/auth-broker/wire-schemas.ts +200 -0
  181. package/src/auth-gateway/http.ts +194 -0
  182. package/src/auth-gateway/index.ts +3 -0
  183. package/src/auth-gateway/server.ts +717 -0
  184. package/src/auth-gateway/types.ts +134 -0
  185. package/src/auth-storage.ts +4104 -0
  186. package/src/cli.ts +262 -0
  187. package/src/index.ts +54 -0
  188. package/src/model-cache.ts +129 -0
  189. package/src/model-manager.ts +450 -0
  190. package/src/model-thinking.ts +691 -0
  191. package/src/models.json +73853 -0
  192. package/src/models.json.d.ts +9 -0
  193. package/src/models.ts +56 -0
  194. package/src/prompts/turn-aborted-guidance.md +4 -0
  195. package/src/provider-details.ts +90 -0
  196. package/src/provider-models/bundled-references.ts +38 -0
  197. package/src/provider-models/descriptors.ts +308 -0
  198. package/src/provider-models/google.ts +91 -0
  199. package/src/provider-models/index.ts +5 -0
  200. package/src/provider-models/ollama.ts +153 -0
  201. package/src/provider-models/openai-compat.ts +2275 -0
  202. package/src/provider-models/special.ts +67 -0
  203. package/src/providers/amazon-bedrock.ts +849 -0
  204. package/src/providers/anthropic-messages-server-schema.ts +229 -0
  205. package/src/providers/anthropic-messages-server.ts +677 -0
  206. package/src/providers/anthropic.ts +2696 -0
  207. package/src/providers/aws-credentials.ts +501 -0
  208. package/src/providers/aws-eventstream.ts +185 -0
  209. package/src/providers/aws-sigv4.ts +218 -0
  210. package/src/providers/azure-openai-responses.ts +337 -0
  211. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  212. package/src/providers/cursor/proto/agent.proto +3526 -0
  213. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  214. package/src/providers/cursor/proto/buf.yaml +17 -0
  215. package/src/providers/cursor.ts +2561 -0
  216. package/src/providers/error-message.ts +21 -0
  217. package/src/providers/github-copilot-headers.ts +140 -0
  218. package/src/providers/gitlab-duo.ts +372 -0
  219. package/src/providers/google-auth.ts +252 -0
  220. package/src/providers/google-gemini-cli.ts +795 -0
  221. package/src/providers/google-gemini-headers.ts +41 -0
  222. package/src/providers/google-shared.ts +902 -0
  223. package/src/providers/google-types.ts +167 -0
  224. package/src/providers/google-vertex.ts +88 -0
  225. package/src/providers/google.ts +41 -0
  226. package/src/providers/grammar.ts +70 -0
  227. package/src/providers/kimi.ts +52 -0
  228. package/src/providers/mock.ts +500 -0
  229. package/src/providers/ollama.ts +544 -0
  230. package/src/providers/openai-anthropic-shim.ts +138 -0
  231. package/src/providers/openai-chat-server-schema.ts +243 -0
  232. package/src/providers/openai-chat-server.ts +628 -0
  233. package/src/providers/openai-codex/constants.ts +43 -0
  234. package/src/providers/openai-codex/request-transformer.ts +161 -0
  235. package/src/providers/openai-codex/response-handler.ts +81 -0
  236. package/src/providers/openai-codex-responses.ts +2598 -0
  237. package/src/providers/openai-completions-compat.ts +279 -0
  238. package/src/providers/openai-completions.ts +1853 -0
  239. package/src/providers/openai-responses-server-schema.ts +290 -0
  240. package/src/providers/openai-responses-server.ts +1183 -0
  241. package/src/providers/openai-responses-shared.ts +800 -0
  242. package/src/providers/openai-responses.ts +621 -0
  243. package/src/providers/pi-native-client.ts +228 -0
  244. package/src/providers/pi-native-server.ts +210 -0
  245. package/src/providers/register-builtins.ts +412 -0
  246. package/src/providers/synthetic.ts +50 -0
  247. package/src/providers/transform-messages.ts +309 -0
  248. package/src/providers/vision-guard.ts +31 -0
  249. package/src/rate-limit-utils.ts +84 -0
  250. package/src/stream.ts +895 -0
  251. package/src/types.ts +884 -0
  252. package/src/usage/claude.ts +431 -0
  253. package/src/usage/gemini.ts +250 -0
  254. package/src/usage/github-copilot.ts +421 -0
  255. package/src/usage/google-antigravity.ts +201 -0
  256. package/src/usage/kimi.ts +271 -0
  257. package/src/usage/minimax-code.ts +31 -0
  258. package/src/usage/openai-codex.ts +503 -0
  259. package/src/usage/shared.ts +10 -0
  260. package/src/usage/zai.ts +247 -0
  261. package/src/usage.ts +183 -0
  262. package/src/utils/abort.ts +51 -0
  263. package/src/utils/anthropic-auth.ts +87 -0
  264. package/src/utils/discovery/antigravity.ts +261 -0
  265. package/src/utils/discovery/codex.ts +371 -0
  266. package/src/utils/discovery/cursor.ts +306 -0
  267. package/src/utils/discovery/gemini.ts +248 -0
  268. package/src/utils/discovery/index.ts +4 -0
  269. package/src/utils/discovery/openai-compatible.ts +224 -0
  270. package/src/utils/event-stream.ts +142 -0
  271. package/src/utils/fireworks-model-id.ts +30 -0
  272. package/src/utils/foundry.ts +8 -0
  273. package/src/utils/h2-fetch.ts +60 -0
  274. package/src/utils/http-inspector.ts +176 -0
  275. package/src/utils/idle-iterator.ts +250 -0
  276. package/src/utils/json-parse.ts +148 -0
  277. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  278. package/src/utils/oauth/anthropic.ts +200 -0
  279. package/src/utils/oauth/api-key-login.ts +87 -0
  280. package/src/utils/oauth/api-key-validation.ts +92 -0
  281. package/src/utils/oauth/callback-server.ts +276 -0
  282. package/src/utils/oauth/cerebras.ts +16 -0
  283. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  284. package/src/utils/oauth/cursor.ts +157 -0
  285. package/src/utils/oauth/deepseek.ts +53 -0
  286. package/src/utils/oauth/firepass.ts +24 -0
  287. package/src/utils/oauth/fireworks.ts +15 -0
  288. package/src/utils/oauth/github-copilot.ts +362 -0
  289. package/src/utils/oauth/gitlab-duo.ts +123 -0
  290. package/src/utils/oauth/google-antigravity.ts +200 -0
  291. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  292. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  293. package/src/utils/oauth/huggingface.ts +62 -0
  294. package/src/utils/oauth/index.ts +444 -0
  295. package/src/utils/oauth/kagi.ts +47 -0
  296. package/src/utils/oauth/kilo.ts +87 -0
  297. package/src/utils/oauth/kimi.ts +254 -0
  298. package/src/utils/oauth/litellm.ts +47 -0
  299. package/src/utils/oauth/lm-studio.ts +38 -0
  300. package/src/utils/oauth/minimax-code.ts +78 -0
  301. package/src/utils/oauth/moonshot.ts +16 -0
  302. package/src/utils/oauth/nanogpt.ts +15 -0
  303. package/src/utils/oauth/nvidia.ts +70 -0
  304. package/src/utils/oauth/oauth.html +199 -0
  305. package/src/utils/oauth/ollama-cloud.ts +28 -0
  306. package/src/utils/oauth/ollama.ts +47 -0
  307. package/src/utils/oauth/openai-codex.ts +299 -0
  308. package/src/utils/oauth/opencode.ts +49 -0
  309. package/src/utils/oauth/parallel.ts +46 -0
  310. package/src/utils/oauth/perplexity.ts +206 -0
  311. package/src/utils/oauth/pkce.ts +18 -0
  312. package/src/utils/oauth/qianfan.ts +58 -0
  313. package/src/utils/oauth/qwen-portal.ts +60 -0
  314. package/src/utils/oauth/synthetic.ts +16 -0
  315. package/src/utils/oauth/tavily.ts +46 -0
  316. package/src/utils/oauth/together.ts +16 -0
  317. package/src/utils/oauth/types.ts +94 -0
  318. package/src/utils/oauth/venice.ts +59 -0
  319. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  320. package/src/utils/oauth/vllm.ts +40 -0
  321. package/src/utils/oauth/xiaomi.ts +137 -0
  322. package/src/utils/oauth/zai.ts +60 -0
  323. package/src/utils/oauth/zenmux.ts +15 -0
  324. package/src/utils/overflow.ts +137 -0
  325. package/src/utils/parse-bind.ts +54 -0
  326. package/src/utils/provider-response.ts +30 -0
  327. package/src/utils/retry-after.ts +110 -0
  328. package/src/utils/retry.ts +54 -0
  329. package/src/utils/schema/CONSTRAINTS.md +164 -0
  330. package/src/utils/schema/adapt.ts +36 -0
  331. package/src/utils/schema/compatibility.ts +435 -0
  332. package/src/utils/schema/dereference.ts +98 -0
  333. package/src/utils/schema/draft.ts +341 -0
  334. package/src/utils/schema/equality.ts +97 -0
  335. package/src/utils/schema/fields.ts +190 -0
  336. package/src/utils/schema/index.ts +13 -0
  337. package/src/utils/schema/json-schema-validator.ts +577 -0
  338. package/src/utils/schema/meta-validator.ts +167 -0
  339. package/src/utils/schema/normalize.ts +1588 -0
  340. package/src/utils/schema/spill.ts +43 -0
  341. package/src/utils/schema/stamps.ts +97 -0
  342. package/src/utils/schema/types.ts +11 -0
  343. package/src/utils/schema/wire.ts +213 -0
  344. package/src/utils/schema/zod-decontaminate.ts +331 -0
  345. package/src/utils/sse-debug.ts +289 -0
  346. package/src/utils/tool-call-healing.ts +271 -0
  347. package/src/utils/tool-choice.ts +99 -0
  348. package/src/utils/validation.ts +1019 -0
  349. package/src/utils.ts +166 -0
@@ -0,0 +1,717 @@
1
+ /**
2
+ * gjc auth-gateway HTTP server.
3
+ *
4
+ * Accepts any provider-format request (OpenAI chat-completions, Anthropic
5
+ * messages, OpenAI Responses) and dispatches through pi-ai's `streamSimple()`
6
+ * — which handles credential injection, anthropic-beta headers, OpenAI code backend
7
+ * websocket transport, and all the per-provider intricacies. The gateway is
8
+ * pure protocol translation: foreign wire → gjc Context → pi-ai stream() →
9
+ * gjc events → foreign wire.
10
+ *
11
+ * Endpoints:
12
+ * GET /healthz → unauth; ok + version
13
+ * GET /v1/usage → aggregated provider usage (5-min per-credential cache via AuthStorage)
14
+ * GET /v1/credentials/check → per-credential auth probe (diagnose 401s in a multi-account pool)
15
+ * GET /v1/models → list known models from the registry
16
+ * POST /v1/chat/completions → OpenAI chat-completions in/out
17
+ * POST /v1/messages → Anthropic messages in/out
18
+ * POST /v1/responses → OpenAI Responses in/out
19
+ */
20
+ import { logger } from "@gajae-code/utils";
21
+ import type { AuthStorage } from "../auth-storage";
22
+ import { Effort } from "../model-thinking";
23
+ import * as anthropicMessages from "../providers/anthropic-messages-server";
24
+ import * as openaiChat from "../providers/openai-chat-server";
25
+ import * as openaiResponses from "../providers/openai-responses-server";
26
+ import * as piNative from "../providers/pi-native-server";
27
+ import { streamSimple } from "../stream";
28
+ import type { Api, AssistantMessageEventStream, Context, Model, SimpleStreamOptions } from "../types";
29
+ import { parseBind } from "../utils/parse-bind";
30
+ import { captureRequestHeaders, corsHeaders, isAuthorized, json, resolvePeer, withCors } from "./http";
31
+ import type {
32
+ AuthGatewayServerHandle,
33
+ AuthGatewayServerOptions,
34
+ AuthGatewayFormatModule as FormatModule,
35
+ AuthGatewayParsedRequest as ParsedFormatRequest,
36
+ } from "./types";
37
+ import { DEFAULT_AUTH_GATEWAY_BIND } from "./types";
38
+
39
+ // ParsedFormatRequest / ParsedFormatOptions / FormatModule come from ./types.
40
+
41
+ export type ModelResolver = (modelId: string) => Model<Api> | undefined;
42
+
43
+ export interface AuthGatewayBootOptions extends AuthGatewayServerOptions {
44
+ /** Source of credentials. Caller wires this to a broker-backed AuthStorage. */
45
+ storage: AuthStorage;
46
+ /**
47
+ * Resolve a client-requested model id to a pi-ai Model. Caller supplies
48
+ * this from a ModelRegistry (lives in `coding-agent` to avoid an inverse
49
+ * dependency in `pi-ai`).
50
+ */
51
+ resolveModel: ModelResolver;
52
+ /** Optional supplier for `/v1/models` listing. Returns the full model array. */
53
+ listModels?: () => Iterable<Model<Api>>;
54
+ }
55
+
56
+ // `parseBind` lives in ../utils/parse-bind so the gateway and broker can't
57
+ // drift on accepted inputs (e.g. empty hostname, IPv6 brackets).
58
+
59
+ const FORMAT_ROUTES: Record<string, { module: FormatModule; label: string }> = {
60
+ "/v1/chat/completions": { module: openaiChat, label: "openai-chat" },
61
+ "/v1/messages": { module: anthropicMessages, label: "anthropic-messages" },
62
+ "/v1/responses": { module: openaiResponses, label: "openai-responses" },
63
+ };
64
+
65
+ // (passthrough fast-path removed — it bypassed pi-ai provider logic, in
66
+ // particular the Anthropic Anthropic-code OAuth system-prompt prefix injection.
67
+ // Every request now takes the translate path so credential-specific request
68
+ // shaping always applies.)
69
+
70
+ // Options the caller's wire format may carry but the resolved provider can't
71
+ // honour are dropped silently in `buildStreamOptions`. We used to 400 here
72
+ // (`Unsupported option: temperature for OpenAI code provider-responses`), but every
73
+ // realistic client (llm-git, openai SDK, anthropic SDK) bakes some of these
74
+ // defaults in without knowing which model they'll resolve to. Failing loudly
75
+ // just turned that into per-call config hell. Silent strip is what the
76
+ // upstream provider would do anyway when it ignores extra fields.
77
+
78
+ /**
79
+ * Derive a stable cache identity from the parts of the request that don't
80
+ * change turn-to-turn within a logical conversation: model id, system prompt,
81
+ * tool definitions, and the first message (the conversation seed). OpenAI code backend-class
82
+ * backends only cache prefixes when an explicit `prompt_cache_key` is set;
83
+ * without one, two requests with the same prefix but different trailing
84
+ * messages don't coalesce. This bridges Anthropic-style clients (which signal
85
+ * caching via `cache_control` markers rather than an opaque key) to OpenAI code backend's
86
+ * keyed model so cross-protocol caching "just works".
87
+ *
88
+ * Including the first message scopes the key to one logical conversation:
89
+ * two different chats with the same system prompt no longer share a cache
90
+ * bucket and can't trample each other's prefix-tree entries.
91
+ *
92
+ * Anthropic-backed requests ignore `sessionId`; the key is harmless there.
93
+ */
94
+ function deriveSessionId(modelId: string, context: Context): string {
95
+ const parts: string[] = [modelId];
96
+ if (context.systemPrompt && context.systemPrompt.length > 0) {
97
+ parts.push(context.systemPrompt.join("\n\n"));
98
+ }
99
+ if (context.tools && context.tools.length > 0) {
100
+ parts.push(JSON.stringify(context.tools));
101
+ }
102
+ const first = context.messages?.[0];
103
+ if (first) {
104
+ // Strip timestamp / provider metadata so the hash is stable across turns
105
+ // of the same conversation (gjc re-stamps every parsed Message). role +
106
+ // content is what's actually on the wire.
107
+ parts.push(JSON.stringify({ role: first.role, content: first.content }));
108
+ }
109
+ const seed = parts.join("\u0000");
110
+ const hex = new Bun.CryptoHasher("sha256").update(seed).digest("hex");
111
+ // Format the leading 128 bits as a v4-shape UUID (8-4-4-4-12). OpenAI code backend's
112
+ // `normalizeOpenAIResponsesPromptCacheKey` accepts ≤64 chars verbatim, so
113
+ // the 36-char UUID flows through unchanged.
114
+ return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
115
+ }
116
+
117
+ function buildStreamOptions(parsed: ParsedFormatRequest, api: Api, signal: AbortSignal): SimpleStreamOptions {
118
+ const opts: SimpleStreamOptions = { signal };
119
+ const { options } = parsed;
120
+ // OpenAI code backend backend rejects `temperature` / `top_p` (per-model defaults only),
121
+ // so we drop them silently for that one provider. Every other unsupported
122
+ // option is just ignored by `streamSimple` if the underlying provider
123
+ // doesn't honour it.
124
+ const isCodex = api === "openai-codex-responses";
125
+ if (options.maxOutputTokens !== undefined) opts.maxTokens = options.maxOutputTokens;
126
+ if (options.temperature !== undefined && !isCodex) opts.temperature = options.temperature;
127
+ if (options.topP !== undefined && !isCodex) opts.topP = options.topP;
128
+ if (options.topK !== undefined) opts.topK = options.topK;
129
+ if (options.minP !== undefined) opts.minP = options.minP;
130
+ if (options.stopSequences !== undefined) opts.stopSequences = options.stopSequences;
131
+ if (options.presencePenalty !== undefined) opts.presencePenalty = options.presencePenalty;
132
+ if (options.frequencyPenalty !== undefined) opts.frequencyPenalty = options.frequencyPenalty;
133
+ if (options.repetitionPenalty !== undefined) opts.repetitionPenalty = options.repetitionPenalty;
134
+ if (options.metadata !== undefined) opts.metadata = options.metadata;
135
+ if (options.headers !== undefined) opts.headers = { ...(opts.headers ?? {}), ...options.headers };
136
+ if (options.toolChoice !== undefined) {
137
+ opts.toolChoice =
138
+ typeof options.toolChoice === "object" ? { type: "tool", name: options.toolChoice.name } : options.toolChoice;
139
+ }
140
+ if (options.reasoning !== undefined) opts.reasoning = options.reasoning;
141
+ if (options.disableReasoning !== undefined) opts.disableReasoning = options.disableReasoning;
142
+ if (options.hideThinkingSummary !== undefined) opts.hideThinkingSummary = options.hideThinkingSummary;
143
+ if (options.serviceTier !== undefined) opts.serviceTier = options.serviceTier;
144
+ if (options.cacheRetention !== undefined) opts.cacheRetention = options.cacheRetention;
145
+ // Client-supplied `prompt_cache_key` wins; otherwise derive a stable
146
+ // key from the model + system + tools so prefix caching engages on
147
+ // OpenAI code backend-class backends across turns of the same logical conversation.
148
+ opts.sessionId = options.promptCacheKey ?? deriveSessionId(parsed.modelId, parsed.context);
149
+ if (options.thinkingBudgets) {
150
+ opts.thinkingBudgets = { ...(opts.thinkingBudgets ?? {}), ...options.thinkingBudgets };
151
+ }
152
+ if (options.explicitThinkingBudgetTokens !== undefined) {
153
+ // Mirror Rust's `resolve_thinking_budget`: explicit budget pins onto
154
+ // whichever effort the client requested (or High when unspecified) and
155
+ // ALSO sets the effort so providers that gate on `reasoning` actually
156
+ // surface the budget.
157
+ const effort = options.reasoning ?? Effort.High;
158
+ opts.thinkingBudgets = {
159
+ ...(opts.thinkingBudgets ?? {}),
160
+ [effort]: options.explicitThinkingBudgetTokens,
161
+ };
162
+ opts.reasoning ??= effort;
163
+ }
164
+ // Fields that don't yet have a matching pi-ai `SimpleStreamOptions` slot.
165
+ // Surfaced once in debug logs so they show up when wiring a new provider,
166
+ // but NEVER widened into `options.extra` — every consumer would have to
167
+ // re-implement the typed parse to read them back out.
168
+ // TODO(pi-ai): land first-class fields and replace these blocks.
169
+ if (
170
+ options.parallelToolCalls !== undefined ||
171
+ options.previousResponseId !== undefined ||
172
+ options.seed !== undefined ||
173
+ options.logitBias !== undefined ||
174
+ options.user !== undefined ||
175
+ options.responseFormat !== undefined
176
+ ) {
177
+ logger.debug("auth-gateway dropped unsupported typed options", {
178
+ api,
179
+ parallelToolCalls: options.parallelToolCalls,
180
+ previousResponseId: options.previousResponseId,
181
+ seed: options.seed,
182
+ hasLogitBias: options.logitBias !== undefined,
183
+ user: options.user,
184
+ hasResponseFormat: options.responseFormat !== undefined,
185
+ });
186
+ }
187
+ return opts;
188
+ }
189
+
190
+ /**
191
+ * Classify an upstream / gateway-internal error into a status code and a
192
+ * provider-style error type tag. Used by `handleFormatEndpoint` /
193
+ * `handlePassthrough` to drive `route.module.formatError` so every wire
194
+ * format emits its native envelope shape.
195
+ */
196
+ function classifyGatewayError(err: unknown): { status: number; type: string; message: string } {
197
+ const message = err instanceof Error ? err.message : String(err);
198
+ const lower = message.toLowerCase();
199
+
200
+ // Custom pi-ai errors may attach a numeric `status` property; honor it
201
+ // when present and pick the matching tag.
202
+ const statusProp =
203
+ typeof err === "object" && err !== null && typeof (err as { status?: unknown }).status === "number"
204
+ ? (err as { status: number }).status | 0
205
+ : undefined;
206
+ if (statusProp !== undefined) {
207
+ if (statusProp === 401 || statusProp === 403)
208
+ return { status: statusProp, type: "authentication_error", message };
209
+ if (statusProp === 429) return { status: 429, type: "rate_limit_error", message };
210
+ if (statusProp >= 400 && statusProp < 500) return { status: statusProp, type: "invalid_request_error", message };
211
+ if (statusProp >= 500) return { status: statusProp, type: "upstream_error", message };
212
+ }
213
+
214
+ if (err instanceof Error && err.name === "AbortError") return { status: 499, type: "request_aborted", message };
215
+ if (lower.includes("aborted") || lower.includes("abortsignal")) {
216
+ return { status: 499, type: "request_aborted", message };
217
+ }
218
+ if (
219
+ lower.includes("401") ||
220
+ lower.includes("403") ||
221
+ lower.includes("unauthorized") ||
222
+ lower.includes("forbidden")
223
+ ) {
224
+ return { status: 401, type: "authentication_error", message };
225
+ }
226
+ if (lower.includes("429") || lower.includes("rate") || lower.includes("quota")) {
227
+ return { status: 429, type: "rate_limit_error", message };
228
+ }
229
+ if (lower.includes("unsupported") || lower.includes("invalid")) {
230
+ return { status: 400, type: "invalid_request_error", message };
231
+ }
232
+ return { status: 502, type: "upstream_error", message };
233
+ }
234
+
235
+ async function refreshGatewayApiKeyAfterAuthError(
236
+ storage: AuthStorage,
237
+ model: Model<Api>,
238
+ provider: string,
239
+ oldKey: string,
240
+ error: unknown,
241
+ signal: AbortSignal,
242
+ format: string,
243
+ peer: string,
244
+ ): Promise<string | undefined> {
245
+ await storage.invalidateCredentialMatching(provider, oldKey, signal);
246
+ logger.debug("auth-gateway retrying provider request after credential invalidation", {
247
+ format,
248
+ provider,
249
+ peer,
250
+ error: error instanceof Error ? error.message : String(error),
251
+ });
252
+ return storage.getApiKey(provider, undefined, { modelId: model.id, signal });
253
+ }
254
+
255
+ function clientClosedResponse(route: { module: FormatModule }): Response {
256
+ return route.module.formatError(499, "request_aborted", "client closed request");
257
+ }
258
+
259
+ function mirrorRequestAbort(req: Request): AbortController {
260
+ const controller = new AbortController();
261
+ if (req.signal.aborted) {
262
+ controller.abort(req.signal.reason);
263
+ } else {
264
+ req.signal.addEventListener("abort", () => controller.abort(req.signal.reason), { once: true });
265
+ }
266
+ return controller;
267
+ }
268
+
269
+ // (handlePassthrough removed — see note above.)
270
+
271
+ async function handleFormatEndpoint(
272
+ route: { module: FormatModule; label: string },
273
+ bootOpts: AuthGatewayBootOptions,
274
+ req: Request,
275
+ peer: string,
276
+ ): Promise<Response> {
277
+ const controller = mirrorRequestAbort(req);
278
+ if (controller.signal.aborted) return clientClosedResponse(route);
279
+
280
+ let body: unknown;
281
+ try {
282
+ body = await req.json();
283
+ } catch (error) {
284
+ if (controller.signal.aborted) return clientClosedResponse(route);
285
+ return route.module.formatError(400, "invalid_request_error", `Invalid JSON body: ${String(error)}`);
286
+ }
287
+ if (controller.signal.aborted) return clientClosedResponse(route);
288
+
289
+ // All three supported wire formats put the model id on a top-level `model`
290
+ // field. Read it without running the full strict schema so the route can
291
+ // produce a coherent error envelope when the model id is missing.
292
+ const modelId =
293
+ typeof body === "object" && body !== null && typeof (body as { model?: unknown }).model === "string"
294
+ ? (body as { model: string }).model
295
+ : undefined;
296
+ if (!modelId) {
297
+ return route.module.formatError(400, "invalid_request_error", "Missing top-level `model` field");
298
+ }
299
+
300
+ const model = bootOpts.resolveModel(modelId);
301
+ if (!model) {
302
+ return route.module.formatError(404, "invalid_request_error", `Unknown model: ${modelId}`);
303
+ }
304
+
305
+ // pi-ai's stream() does NOT consult AuthStorage — the caller (us) is
306
+ // expected to resolve the credential and pass it as `options.apiKey`.
307
+ // For OAuth providers this returns the access token (refreshed via the
308
+ // broker override on AuthStorage when needed).
309
+ let apiKey: string | undefined;
310
+ try {
311
+ apiKey = await bootOpts.storage.getApiKey(model.provider, undefined, {
312
+ modelId: model.id,
313
+ signal: controller.signal,
314
+ });
315
+ } catch (error) {
316
+ if (controller.signal.aborted) return clientClosedResponse(route);
317
+ const classified = classifyGatewayError(error);
318
+ logger.warn("auth-gateway getApiKey threw", { provider: model.provider, peer, error: classified.message });
319
+ return route.module.formatError(classified.status, classified.type, classified.message);
320
+ }
321
+ if (controller.signal.aborted) return clientClosedResponse(route);
322
+ if (!apiKey) {
323
+ return route.module.formatError(
324
+ 401,
325
+ "authentication_error",
326
+ `No credential available for provider ${model.provider}`,
327
+ );
328
+ }
329
+
330
+ // Parse + validate against the strict format schema, rebuild as gjc's
331
+ // canonical Context, dispatch through pi-ai's streamSimple, encode the
332
+ // canonical event stream back to the inbound format. There is no
333
+ // passthrough fast-path — every request flows through pi-ai so that
334
+ // credential-specific request shaping (OAuth Anthropic-code prefix, beta
335
+ // headers, OpenAI code backend websocket transport, …) always applies.
336
+ let parsed: ParsedFormatRequest;
337
+ try {
338
+ parsed = route.module.parseRequest(body, req.headers);
339
+ } catch (error) {
340
+ if (controller.signal.aborted) return clientClosedResponse(route);
341
+ const message = error instanceof Error ? error.message : String(error);
342
+ return route.module.formatError(400, "invalid_request_error", message);
343
+ }
344
+ // Merge gateway-captured passthrough headers under the parser's own
345
+ // captures. Parsers that set `options.headers` themselves win (they may
346
+ // have stripped or normalized values); the gateway's allow-list fills in
347
+ // anything they didn't touch.
348
+ {
349
+ const captured = captureRequestHeaders(req.headers);
350
+ parsed.options.headers = { ...captured, ...(parsed.options.headers ?? {}) };
351
+ }
352
+ if (controller.signal.aborted) return clientClosedResponse(route);
353
+
354
+ const streamOpts = buildStreamOptions(parsed, model.api, controller.signal);
355
+ streamOpts.apiKey = apiKey;
356
+ streamOpts.onAuthError = (provider, oldKey, error) =>
357
+ refreshGatewayApiKeyAfterAuthError(
358
+ bootOpts.storage,
359
+ model,
360
+ provider,
361
+ oldKey,
362
+ error,
363
+ controller.signal,
364
+ route.label,
365
+ peer,
366
+ );
367
+
368
+ logger.info("auth-gateway request", {
369
+ format: route.label,
370
+ model: parsed.modelId,
371
+ resolvedProvider: model.provider,
372
+ resolvedModel: model.id,
373
+ stream: parsed.stream,
374
+ peer,
375
+ });
376
+
377
+ let events: AssistantMessageEventStream;
378
+ try {
379
+ if (controller.signal.aborted) return clientClosedResponse(route);
380
+ events = streamSimple(model, parsed.context, streamOpts);
381
+ } catch (error) {
382
+ const classified = classifyGatewayError(error);
383
+ logger.warn("auth-gateway streamSimple threw", { format: route.label, error: classified.message, peer });
384
+ return route.module.formatError(classified.status, classified.type, classified.message);
385
+ }
386
+
387
+ if (!parsed.stream) {
388
+ try {
389
+ if (controller.signal.aborted) return clientClosedResponse(route);
390
+ const message = await events.result();
391
+ if (message.stopReason === "aborted" || message.stopReason === "error") {
392
+ const errorMessage =
393
+ message.errorMessage ??
394
+ (message.stopReason === "aborted" ? "Request was aborted" : "Upstream request failed");
395
+ logger.warn("auth-gateway non-streaming failed", {
396
+ format: route.label,
397
+ reason: message.stopReason,
398
+ error: errorMessage,
399
+ peer,
400
+ });
401
+ if (message.stopReason === "aborted") {
402
+ return route.module.formatError(499, "request_aborted", errorMessage);
403
+ }
404
+ const classified = classifyGatewayError(new Error(errorMessage));
405
+ return route.module.formatError(classified.status, classified.type, errorMessage);
406
+ }
407
+ return json(200, route.module.encodeResponse(message, parsed.modelId));
408
+ } catch (error) {
409
+ if (controller.signal.aborted) return clientClosedResponse(route);
410
+ const classified = classifyGatewayError(error);
411
+ logger.warn("auth-gateway non-streaming aborted", {
412
+ format: route.label,
413
+ error: classified.message,
414
+ peer,
415
+ });
416
+ return route.module.formatError(classified.status, classified.type, classified.message);
417
+ }
418
+ }
419
+ if (controller.signal.aborted) return clientClosedResponse(route);
420
+
421
+ const sseStream = route.module.encodeStream(events, parsed.modelId, parsed.options);
422
+ return new Response(sseStream, {
423
+ status: 200,
424
+ headers: {
425
+ "Content-Type": "text/event-stream; charset=utf-8",
426
+ "Cache-Control": "no-cache",
427
+ Connection: "keep-alive",
428
+ // Disable proxy buffering (nginx and ingress controllers honor this).
429
+ // Without it the SSE stream gets held until the buffer flushes, which
430
+ // stalls the long-thinking-budget calls we exist to support.
431
+ "X-Accel-Buffering": "no",
432
+ },
433
+ });
434
+ }
435
+
436
+ /**
437
+ * Pi-native fast path: `POST /v1/pi/stream`. Accepts the canonical pi-ai
438
+ * `Context` directly (no wire-format round-trip) and emits a bandwidth-shrunk
439
+ * event stream matching `pi-agent`'s `streamProxy`. Skips the OpenAI /
440
+ * Anthropic / Responses translation layers — those exist to bridge foreign
441
+ * SDKs (llm-git, anthropic-sdk, openai-sdk), and bridging back to pi-native
442
+ * just to bridge forward again is wasted work.
443
+ *
444
+ * Every other gateway concern (bearer auth, model resolve, credential fetch,
445
+ * abort mirroring, OpenAI code backend temperature/topP strip, prefix-cache key derivation,
446
+ * Anthropic-code OAuth shaping inside `streamSimple`) still applies — only
447
+ * `parseRequest`/`encodeResponse`/`encodeStream` differ from the format-endpoint
448
+ * path.
449
+ */
450
+ async function handlePiNative(bootOpts: AuthGatewayBootOptions, req: Request, peer: string): Promise<Response> {
451
+ const controller = mirrorRequestAbort(req);
452
+ const aborted = (): Response => piNative.formatError(499, "request_aborted", "client closed request");
453
+ if (controller.signal.aborted) return aborted();
454
+
455
+ let body: unknown;
456
+ try {
457
+ body = await req.json();
458
+ } catch (error) {
459
+ if (controller.signal.aborted) return aborted();
460
+ return piNative.formatError(400, "invalid_request_error", `Invalid JSON body: ${String(error)}`);
461
+ }
462
+ if (controller.signal.aborted) return aborted();
463
+
464
+ let parsed: piNative.PiNativeParsedRequest;
465
+ try {
466
+ parsed = piNative.parseRequest(body, req.headers);
467
+ } catch (error) {
468
+ if (controller.signal.aborted) return aborted();
469
+ const message = error instanceof Error ? error.message : String(error);
470
+ return piNative.formatError(400, "invalid_request_error", message);
471
+ }
472
+
473
+ const model = bootOpts.resolveModel(parsed.modelId);
474
+ if (!model) {
475
+ return piNative.formatError(404, "invalid_request_error", `Unknown model: ${parsed.modelId}`);
476
+ }
477
+
478
+ let apiKey: string | undefined;
479
+ try {
480
+ apiKey = await bootOpts.storage.getApiKey(model.provider, undefined, {
481
+ modelId: model.id,
482
+ signal: controller.signal,
483
+ });
484
+ } catch (error) {
485
+ if (controller.signal.aborted) return aborted();
486
+ const classified = classifyGatewayError(error);
487
+ logger.warn("auth-gateway getApiKey threw", { provider: model.provider, peer, error: classified.message });
488
+ return piNative.formatError(classified.status, classified.type, classified.message);
489
+ }
490
+ if (controller.signal.aborted) return aborted();
491
+ if (!apiKey) {
492
+ return piNative.formatError(
493
+ 401,
494
+ "authentication_error",
495
+ `No credential available for provider ${model.provider}`,
496
+ );
497
+ }
498
+
499
+ // Build the SimpleStreamOptions actually handed to `streamSimple`. We
500
+ // trust the client's options (already allow-listed by `parseRequest`) and
501
+ // only inject server-controlled fields. The OpenAI code backend temperature/topP strip
502
+ // matches `buildStreamOptions` — OpenAI code backend rejects them with a 400.
503
+ const streamOpts: SimpleStreamOptions = { ...parsed.options, apiKey, signal: controller.signal };
504
+ streamOpts.onAuthError = (provider, oldKey, error) =>
505
+ refreshGatewayApiKeyAfterAuthError(
506
+ bootOpts.storage,
507
+ model,
508
+ provider,
509
+ oldKey,
510
+ error,
511
+ controller.signal,
512
+ "pi-native",
513
+ peer,
514
+ );
515
+ if (model.api === "openai-codex-responses") {
516
+ delete streamOpts.temperature;
517
+ delete streamOpts.topP;
518
+ }
519
+ // Merge gateway-captured passthrough headers under the client's own
520
+ // headers — the client's values win when they collide.
521
+ const captured = captureRequestHeaders(req.headers);
522
+ streamOpts.headers = { ...captured, ...(streamOpts.headers ?? {}) };
523
+ // Cache identity: explicit `sessionId` wins, then derive a stable key
524
+ // from model + system + tools + first message so OpenAI code backend prefix caching
525
+ // engages on the same logical conversation across turns.
526
+ streamOpts.sessionId ??= deriveSessionId(parsed.modelId, parsed.context);
527
+
528
+ logger.info("auth-gateway request", {
529
+ format: "pi-native",
530
+ model: parsed.modelId,
531
+ resolvedProvider: model.provider,
532
+ resolvedModel: model.id,
533
+ stream: parsed.stream,
534
+ peer,
535
+ });
536
+
537
+ let events: AssistantMessageEventStream;
538
+ try {
539
+ if (controller.signal.aborted) return aborted();
540
+ events = streamSimple(model, parsed.context, streamOpts);
541
+ } catch (error) {
542
+ const classified = classifyGatewayError(error);
543
+ logger.warn("auth-gateway streamSimple threw", { format: "pi-native", error: classified.message, peer });
544
+ return piNative.formatError(classified.status, classified.type, classified.message);
545
+ }
546
+
547
+ if (!parsed.stream) {
548
+ try {
549
+ if (controller.signal.aborted) return aborted();
550
+ const message = await events.result();
551
+ if (message.stopReason === "aborted" || message.stopReason === "error") {
552
+ const errorMessage =
553
+ message.errorMessage ??
554
+ (message.stopReason === "aborted" ? "Request was aborted" : "Upstream request failed");
555
+ logger.warn("auth-gateway non-streaming failed", {
556
+ format: "pi-native",
557
+ reason: message.stopReason,
558
+ error: errorMessage,
559
+ peer,
560
+ });
561
+ if (message.stopReason === "aborted") {
562
+ return piNative.formatError(499, "request_aborted", errorMessage);
563
+ }
564
+ const classified = classifyGatewayError(new Error(errorMessage));
565
+ return piNative.formatError(classified.status, classified.type, errorMessage);
566
+ }
567
+ return json(200, { message });
568
+ } catch (error) {
569
+ if (controller.signal.aborted) return aborted();
570
+ const classified = classifyGatewayError(error);
571
+ logger.warn("auth-gateway non-streaming aborted", { format: "pi-native", error: classified.message, peer });
572
+ return piNative.formatError(classified.status, classified.type, classified.message);
573
+ }
574
+ }
575
+ if (controller.signal.aborted) return aborted();
576
+
577
+ const sseStream = piNative.encodeStream(events);
578
+ return new Response(sseStream, {
579
+ status: 200,
580
+ headers: {
581
+ "Content-Type": "text/event-stream; charset=utf-8",
582
+ "Cache-Control": "no-cache",
583
+ Connection: "keep-alive",
584
+ "X-Accel-Buffering": "no",
585
+ },
586
+ });
587
+ }
588
+
589
+ /**
590
+ * Snapshot of `GET /v1/usage` — `fetchUsageReports` already caches reports at
591
+ * a 5-minute per-credential TTL (with jitter, plus last-good fallback on
592
+ * failure) inside `AuthStorage`, so this handler is a thin wrapper that
593
+ * surfaces the same data to HTTP callers (notably the macOS usage widget).
594
+ */
595
+ async function handleUsage(storage: AuthStorage, signal: AbortSignal): Promise<Response> {
596
+ const reports = (await storage.fetchUsageReports?.({ signal })) ?? [];
597
+ // Drop the heavy provider-specific `raw` payload — UI consumers only need
598
+ // `limits` + `metadata`. Match the broker's `/v1/usage` shape so a single
599
+ // client struct (Swift widget, llm-git, ...) works against either endpoint.
600
+ const trimmed = reports.map(({ raw: _raw, ...rest }) => rest);
601
+ return json(200, { generatedAt: Date.now(), reports: trimmed });
602
+ }
603
+
604
+ /**
605
+ * Per-credential health probe surfaced on `GET /v1/credentials/check`. Tells
606
+ * the caller exactly which row in their broker is producing 401s — the
607
+ * aggregate `/v1/usage` endpoint silently drops failed credentials, which is
608
+ * the wrong shape when you're diagnosing auth.
609
+ *
610
+ * The probe is sequential (one credential at a time) to avoid synchronized
611
+ * N-account fan-out tripping per-IP rate limits on provider `/usage`
612
+ * endpoints. For multi-account pools that's the difference between getting
613
+ * a clean diagnosis and getting a 429 storm.
614
+ */
615
+ async function handleCredentialsCheck(storage: AuthStorage, signal: AbortSignal): Promise<Response> {
616
+ const credentials = await storage.checkCredentials({ signal });
617
+ return json(200, { generatedAt: Date.now(), credentials });
618
+ }
619
+
620
+ function handleModelsList(opts: AuthGatewayBootOptions): Response {
621
+ const list = opts.listModels ? Array.from(opts.listModels()) : [];
622
+ const data = list.map(model => ({
623
+ id: model.id,
624
+ object: "model" as const,
625
+ owned_by: model.provider,
626
+ api: model.api,
627
+ }));
628
+ return json(200, { object: "list", data });
629
+ }
630
+
631
+ export function startAuthGateway(opts: AuthGatewayBootOptions): AuthGatewayServerHandle {
632
+ const bind = parseBind(opts.bind ?? DEFAULT_AUTH_GATEWAY_BIND);
633
+ const tokens = new Set<string>(opts.bearerTokens);
634
+ const version = opts.version;
635
+
636
+ const server = Bun.serve({
637
+ hostname: bind.hostname,
638
+ port: bind.port,
639
+ fetch: async (req): Promise<Response> => {
640
+ const url = new URL(req.url);
641
+ const pathname = url.pathname;
642
+ const peer = resolvePeer(req);
643
+ // CORS preflight is always answered without auth — browsers send
644
+ // preflights pre-authentication and a 401 here breaks the actual
645
+ // request before the bearer is ever attached.
646
+ if (req.method === "OPTIONS") {
647
+ return new Response(null, { status: 204, headers: corsHeaders(req) });
648
+ }
649
+ try {
650
+ if (req.method === "GET" && pathname === "/healthz") {
651
+ return withCors(json(200, { ok: true, version }), req);
652
+ }
653
+ if (!isAuthorized(req, tokens)) {
654
+ logger.info("auth-gateway request unauthorized", { method: req.method, path: pathname, peer });
655
+ return withCors(json(401, { error: "unauthorized" }), req);
656
+ }
657
+
658
+ // Aggregated usage — backed by AuthStorage's 5-min per-credential cache.
659
+ // Same shape as the broker's `/v1/usage`, so widget/llm-git speak to either with the
660
+ // same client struct.
661
+ if (req.method === "GET" && pathname === "/v1/usage") {
662
+ return withCors(await handleUsage(opts.storage, req.signal), req);
663
+ }
664
+
665
+ // Per-credential auth probe — diagnoses which row in a multi-account
666
+ // pool is producing 401s. Aggregated `/v1/usage` silently drops failed
667
+ // credentials, so we need a separate endpoint that captures errors.
668
+ if (req.method === "GET" && pathname === "/v1/credentials/check") {
669
+ return withCors(await handleCredentialsCheck(opts.storage, req.signal), req);
670
+ }
671
+
672
+ // Provider-format dispatch.
673
+ const formatRoute = FORMAT_ROUTES[pathname];
674
+ if (formatRoute && req.method === "POST") {
675
+ return withCors(await handleFormatEndpoint(formatRoute, opts, req, peer), req);
676
+ }
677
+
678
+ // Pi-native fast path. Same auth + provider plumbing as the
679
+ // foreign-wire routes, just without the wire-format translation.
680
+ if (req.method === "POST" && pathname === "/v1/pi/stream") {
681
+ return withCors(await handlePiNative(opts, req, peer), req);
682
+ }
683
+
684
+ // Model catalog.
685
+ if (req.method === "GET" && pathname === "/v1/models") {
686
+ return withCors(handleModelsList(opts), req);
687
+ }
688
+
689
+ // Route-table miss: no format module to defer to, so we emit a
690
+ // plain JSON 404 rather than guessing at a protocol-specific envelope.
691
+ return withCors(json(404, { error: `No route: ${req.method} ${pathname}` }), req);
692
+ } catch (error) {
693
+ logger.error("auth-gateway handler crashed", {
694
+ method: req.method,
695
+ path: pathname,
696
+ peer,
697
+ error: String(error),
698
+ });
699
+ return withCors(json(500, { error: "internal error" }), req);
700
+ }
701
+ },
702
+ // Max-out Bun's idle timeout. Long thinking-budget calls can sit idle
703
+ // for minutes before the first token arrives; the default kills them.
704
+ idleTimeout: 255,
705
+ });
706
+
707
+ const boundHost = server.hostname ?? bind.hostname;
708
+ const boundPort = server.port ?? bind.port;
709
+ return {
710
+ url: `http://${boundHost}:${boundPort}`,
711
+ port: boundPort,
712
+ hostname: boundHost,
713
+ close: async () => {
714
+ server.stop(true);
715
+ },
716
+ };
717
+ }