@gajae-code/ai 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2644 -0
- package/README.md +1181 -0
- package/dist/types/api-registry.d.ts +30 -0
- package/dist/types/auth-broker/client.d.ts +66 -0
- package/dist/types/auth-broker/index.d.ts +5 -0
- package/dist/types/auth-broker/refresher.d.ts +25 -0
- package/dist/types/auth-broker/remote-store.d.ts +96 -0
- package/dist/types/auth-broker/server.d.ts +32 -0
- package/dist/types/auth-broker/types.d.ts +105 -0
- package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
- package/dist/types/auth-gateway/http.d.ts +39 -0
- package/dist/types/auth-gateway/index.d.ts +3 -0
- package/dist/types/auth-gateway/server.d.ts +17 -0
- package/dist/types/auth-gateway/types.d.ts +115 -0
- package/dist/types/auth-storage.d.ts +641 -0
- package/dist/types/cli.d.ts +2 -0
- package/dist/types/index.d.ts +49 -0
- package/dist/types/model-cache.d.ts +17 -0
- package/dist/types/model-manager.d.ts +62 -0
- package/dist/types/model-thinking.d.ts +71 -0
- package/dist/types/models.d.ts +12 -0
- package/dist/types/provider-details.d.ts +24 -0
- package/dist/types/provider-models/bundled-references.d.ts +4 -0
- package/dist/types/provider-models/descriptors.d.ts +48 -0
- package/dist/types/provider-models/google.d.ts +20 -0
- package/dist/types/provider-models/index.d.ts +5 -0
- package/dist/types/provider-models/ollama.d.ts +7 -0
- package/dist/types/provider-models/openai-compat.d.ts +237 -0
- package/dist/types/provider-models/special.d.ts +16 -0
- package/dist/types/providers/amazon-bedrock.d.ts +36 -0
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +450 -0
- package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
- package/dist/types/providers/anthropic.d.ts +188 -0
- package/dist/types/providers/aws-credentials.d.ts +43 -0
- package/dist/types/providers/aws-eventstream.d.ts +38 -0
- package/dist/types/providers/aws-sigv4.d.ts +55 -0
- package/dist/types/providers/azure-openai-responses.d.ts +15 -0
- package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
- package/dist/types/providers/cursor.d.ts +42 -0
- package/dist/types/providers/error-message.d.ts +27 -0
- package/dist/types/providers/github-copilot-headers.d.ts +40 -0
- package/dist/types/providers/gitlab-duo.d.ts +27 -0
- package/dist/types/providers/google-auth.d.ts +24 -0
- package/dist/types/providers/google-gemini-cli.d.ts +72 -0
- package/dist/types/providers/google-gemini-headers.d.ts +18 -0
- package/dist/types/providers/google-shared.d.ts +163 -0
- package/dist/types/providers/google-types.d.ts +138 -0
- package/dist/types/providers/google-vertex.d.ts +7 -0
- package/dist/types/providers/google.d.ts +4 -0
- package/dist/types/providers/grammar.d.ts +1 -0
- package/dist/types/providers/kimi.d.ts +27 -0
- package/dist/types/providers/mock.d.ts +175 -0
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +814 -0
- package/dist/types/providers/openai-chat-server.d.ts +16 -0
- package/dist/types/providers/openai-codex/constants.d.ts +26 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
- package/dist/types/providers/openai-codex-responses.d.ts +67 -0
- package/dist/types/providers/openai-completions-compat.d.ts +25 -0
- package/dist/types/providers/openai-completions.d.ts +33 -0
- package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
- package/dist/types/providers/openai-responses-server.d.ts +17 -0
- package/dist/types/providers/openai-responses-shared.d.ts +89 -0
- package/dist/types/providers/openai-responses.d.ts +32 -0
- package/dist/types/providers/pi-native-client.d.ts +13 -0
- package/dist/types/providers/pi-native-server.d.ts +68 -0
- package/dist/types/providers/register-builtins.d.ts +31 -0
- package/dist/types/providers/synthetic.d.ts +26 -0
- package/dist/types/providers/transform-messages.d.ts +12 -0
- package/dist/types/providers/vision-guard.d.ts +8 -0
- package/dist/types/rate-limit-utils.d.ts +19 -0
- package/dist/types/stream.d.ts +24 -0
- package/dist/types/types.d.ts +746 -0
- package/dist/types/usage/claude.d.ts +3 -0
- package/dist/types/usage/gemini.d.ts +2 -0
- package/dist/types/usage/github-copilot.d.ts +7 -0
- package/dist/types/usage/google-antigravity.d.ts +2 -0
- package/dist/types/usage/kimi.d.ts +2 -0
- package/dist/types/usage/minimax-code.d.ts +2 -0
- package/dist/types/usage/openai-codex.d.ts +3 -0
- package/dist/types/usage/shared.d.ts +1 -0
- package/dist/types/usage/zai.d.ts +2 -0
- package/dist/types/usage.d.ts +258 -0
- package/dist/types/utils/abort.d.ts +19 -0
- package/dist/types/utils/anthropic-auth.d.ts +31 -0
- package/dist/types/utils/discovery/antigravity.d.ts +61 -0
- package/dist/types/utils/discovery/codex.d.ts +38 -0
- package/dist/types/utils/discovery/cursor.d.ts +23 -0
- package/dist/types/utils/discovery/gemini.d.ts +25 -0
- package/dist/types/utils/discovery/index.d.ts +4 -0
- package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
- package/dist/types/utils/event-stream.d.ts +28 -0
- package/dist/types/utils/fireworks-model-id.d.ts +10 -0
- package/dist/types/utils/foundry.d.ts +1 -0
- package/dist/types/utils/h2-fetch.d.ts +22 -0
- package/dist/types/utils/http-inspector.d.ts +31 -0
- package/dist/types/utils/idle-iterator.d.ts +67 -0
- package/dist/types/utils/json-parse.d.ts +10 -0
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
- package/dist/types/utils/oauth/anthropic.d.ts +22 -0
- package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
- package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
- package/dist/types/utils/oauth/callback-server.d.ts +57 -0
- package/dist/types/utils/oauth/cerebras.d.ts +1 -0
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/cursor.d.ts +15 -0
- package/dist/types/utils/oauth/deepseek.d.ts +10 -0
- package/dist/types/utils/oauth/firepass.d.ts +1 -0
- package/dist/types/utils/oauth/fireworks.d.ts +1 -0
- package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
- package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
- package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
- package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
- package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
- package/dist/types/utils/oauth/huggingface.d.ts +19 -0
- package/dist/types/utils/oauth/index.d.ts +38 -0
- package/dist/types/utils/oauth/kagi.d.ts +17 -0
- package/dist/types/utils/oauth/kilo.d.ts +5 -0
- package/dist/types/utils/oauth/kimi.d.ts +21 -0
- package/dist/types/utils/oauth/litellm.d.ts +18 -0
- package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
- package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
- package/dist/types/utils/oauth/moonshot.d.ts +1 -0
- package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
- package/dist/types/utils/oauth/nvidia.d.ts +18 -0
- package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
- package/dist/types/utils/oauth/ollama.d.ts +18 -0
- package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
- package/dist/types/utils/oauth/opencode.d.ts +18 -0
- package/dist/types/utils/oauth/parallel.d.ts +17 -0
- package/dist/types/utils/oauth/perplexity.d.ts +9 -0
- package/dist/types/utils/oauth/pkce.d.ts +8 -0
- package/dist/types/utils/oauth/qianfan.d.ts +17 -0
- package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
- package/dist/types/utils/oauth/synthetic.d.ts +1 -0
- package/dist/types/utils/oauth/tavily.d.ts +17 -0
- package/dist/types/utils/oauth/together.d.ts +1 -0
- package/dist/types/utils/oauth/types.d.ts +44 -0
- package/dist/types/utils/oauth/venice.d.ts +18 -0
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
- package/dist/types/utils/oauth/vllm.d.ts +16 -0
- package/dist/types/utils/oauth/xiaomi.d.ts +19 -0
- package/dist/types/utils/oauth/zai.d.ts +18 -0
- package/dist/types/utils/oauth/zenmux.d.ts +1 -0
- package/dist/types/utils/overflow.d.ts +54 -0
- package/dist/types/utils/parse-bind.d.ts +23 -0
- package/dist/types/utils/provider-response.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +3 -0
- package/dist/types/utils/retry.d.ts +26 -0
- package/dist/types/utils/schema/adapt.d.ts +24 -0
- package/dist/types/utils/schema/compatibility.d.ts +30 -0
- package/dist/types/utils/schema/dereference.d.ts +11 -0
- package/dist/types/utils/schema/draft.d.ts +10 -0
- package/dist/types/utils/schema/equality.d.ts +4 -0
- package/dist/types/utils/schema/fields.d.ts +49 -0
- package/dist/types/utils/schema/index.d.ts +13 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
- package/dist/types/utils/schema/meta-validator.d.ts +2 -0
- package/dist/types/utils/schema/normalize.d.ts +93 -0
- package/dist/types/utils/schema/spill.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +25 -0
- package/dist/types/utils/schema/types.d.ts +4 -0
- package/dist/types/utils/schema/wire.d.ts +54 -0
- package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
- package/dist/types/utils/sse-debug.d.ts +10 -0
- package/dist/types/utils/tool-call-healing.d.ts +71 -0
- package/dist/types/utils/tool-choice.d.ts +50 -0
- package/dist/types/utils/validation.d.ts +17 -0
- package/dist/types/utils.d.ts +28 -0
- package/package.json +146 -0
- package/src/api-registry.ts +96 -0
- package/src/auth-broker/client.ts +358 -0
- package/src/auth-broker/index.ts +5 -0
- package/src/auth-broker/refresher.ts +127 -0
- package/src/auth-broker/remote-store.ts +623 -0
- package/src/auth-broker/server.ts +644 -0
- package/src/auth-broker/types.ts +127 -0
- package/src/auth-broker/wire-schemas.ts +200 -0
- package/src/auth-gateway/http.ts +194 -0
- package/src/auth-gateway/index.ts +3 -0
- package/src/auth-gateway/server.ts +717 -0
- package/src/auth-gateway/types.ts +134 -0
- package/src/auth-storage.ts +4104 -0
- package/src/cli.ts +262 -0
- package/src/index.ts +54 -0
- package/src/model-cache.ts +129 -0
- package/src/model-manager.ts +450 -0
- package/src/model-thinking.ts +691 -0
- package/src/models.json +73853 -0
- package/src/models.json.d.ts +9 -0
- package/src/models.ts +56 -0
- package/src/prompts/turn-aborted-guidance.md +4 -0
- package/src/provider-details.ts +90 -0
- package/src/provider-models/bundled-references.ts +38 -0
- package/src/provider-models/descriptors.ts +308 -0
- package/src/provider-models/google.ts +91 -0
- package/src/provider-models/index.ts +5 -0
- package/src/provider-models/ollama.ts +153 -0
- package/src/provider-models/openai-compat.ts +2275 -0
- package/src/provider-models/special.ts +67 -0
- package/src/providers/amazon-bedrock.ts +849 -0
- package/src/providers/anthropic-messages-server-schema.ts +229 -0
- package/src/providers/anthropic-messages-server.ts +677 -0
- package/src/providers/anthropic.ts +2696 -0
- package/src/providers/aws-credentials.ts +501 -0
- package/src/providers/aws-eventstream.ts +185 -0
- package/src/providers/aws-sigv4.ts +218 -0
- package/src/providers/azure-openai-responses.ts +337 -0
- package/src/providers/cursor/gen/agent_pb.ts +15274 -0
- package/src/providers/cursor/proto/agent.proto +3526 -0
- package/src/providers/cursor/proto/buf.gen.yaml +6 -0
- package/src/providers/cursor/proto/buf.yaml +17 -0
- package/src/providers/cursor.ts +2561 -0
- package/src/providers/error-message.ts +21 -0
- package/src/providers/github-copilot-headers.ts +140 -0
- package/src/providers/gitlab-duo.ts +372 -0
- package/src/providers/google-auth.ts +252 -0
- package/src/providers/google-gemini-cli.ts +795 -0
- package/src/providers/google-gemini-headers.ts +41 -0
- package/src/providers/google-shared.ts +902 -0
- package/src/providers/google-types.ts +167 -0
- package/src/providers/google-vertex.ts +88 -0
- package/src/providers/google.ts +41 -0
- package/src/providers/grammar.ts +70 -0
- package/src/providers/kimi.ts +52 -0
- package/src/providers/mock.ts +500 -0
- package/src/providers/ollama.ts +544 -0
- package/src/providers/openai-anthropic-shim.ts +138 -0
- package/src/providers/openai-chat-server-schema.ts +243 -0
- package/src/providers/openai-chat-server.ts +628 -0
- package/src/providers/openai-codex/constants.ts +43 -0
- package/src/providers/openai-codex/request-transformer.ts +161 -0
- package/src/providers/openai-codex/response-handler.ts +81 -0
- package/src/providers/openai-codex-responses.ts +2598 -0
- package/src/providers/openai-completions-compat.ts +279 -0
- package/src/providers/openai-completions.ts +1853 -0
- package/src/providers/openai-responses-server-schema.ts +290 -0
- package/src/providers/openai-responses-server.ts +1183 -0
- package/src/providers/openai-responses-shared.ts +800 -0
- package/src/providers/openai-responses.ts +621 -0
- package/src/providers/pi-native-client.ts +228 -0
- package/src/providers/pi-native-server.ts +210 -0
- package/src/providers/register-builtins.ts +412 -0
- package/src/providers/synthetic.ts +50 -0
- package/src/providers/transform-messages.ts +309 -0
- package/src/providers/vision-guard.ts +31 -0
- package/src/rate-limit-utils.ts +84 -0
- package/src/stream.ts +895 -0
- package/src/types.ts +884 -0
- package/src/usage/claude.ts +431 -0
- package/src/usage/gemini.ts +250 -0
- package/src/usage/github-copilot.ts +421 -0
- package/src/usage/google-antigravity.ts +201 -0
- package/src/usage/kimi.ts +271 -0
- package/src/usage/minimax-code.ts +31 -0
- package/src/usage/openai-codex.ts +503 -0
- package/src/usage/shared.ts +10 -0
- package/src/usage/zai.ts +247 -0
- package/src/usage.ts +183 -0
- package/src/utils/abort.ts +51 -0
- package/src/utils/anthropic-auth.ts +87 -0
- package/src/utils/discovery/antigravity.ts +261 -0
- package/src/utils/discovery/codex.ts +371 -0
- package/src/utils/discovery/cursor.ts +306 -0
- package/src/utils/discovery/gemini.ts +248 -0
- package/src/utils/discovery/index.ts +4 -0
- package/src/utils/discovery/openai-compatible.ts +224 -0
- package/src/utils/event-stream.ts +142 -0
- package/src/utils/fireworks-model-id.ts +30 -0
- package/src/utils/foundry.ts +8 -0
- package/src/utils/h2-fetch.ts +60 -0
- package/src/utils/http-inspector.ts +176 -0
- package/src/utils/idle-iterator.ts +250 -0
- package/src/utils/json-parse.ts +148 -0
- package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
- package/src/utils/oauth/anthropic.ts +200 -0
- package/src/utils/oauth/api-key-login.ts +87 -0
- package/src/utils/oauth/api-key-validation.ts +92 -0
- package/src/utils/oauth/callback-server.ts +276 -0
- package/src/utils/oauth/cerebras.ts +16 -0
- package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
- package/src/utils/oauth/cursor.ts +157 -0
- package/src/utils/oauth/deepseek.ts +53 -0
- package/src/utils/oauth/firepass.ts +24 -0
- package/src/utils/oauth/fireworks.ts +15 -0
- package/src/utils/oauth/github-copilot.ts +362 -0
- package/src/utils/oauth/gitlab-duo.ts +123 -0
- package/src/utils/oauth/google-antigravity.ts +200 -0
- package/src/utils/oauth/google-gemini-cli.ts +256 -0
- package/src/utils/oauth/google-oauth-shared.ts +110 -0
- package/src/utils/oauth/huggingface.ts +62 -0
- package/src/utils/oauth/index.ts +444 -0
- package/src/utils/oauth/kagi.ts +47 -0
- package/src/utils/oauth/kilo.ts +87 -0
- package/src/utils/oauth/kimi.ts +254 -0
- package/src/utils/oauth/litellm.ts +47 -0
- package/src/utils/oauth/lm-studio.ts +38 -0
- package/src/utils/oauth/minimax-code.ts +78 -0
- package/src/utils/oauth/moonshot.ts +16 -0
- package/src/utils/oauth/nanogpt.ts +15 -0
- package/src/utils/oauth/nvidia.ts +70 -0
- package/src/utils/oauth/oauth.html +199 -0
- package/src/utils/oauth/ollama-cloud.ts +28 -0
- package/src/utils/oauth/ollama.ts +47 -0
- package/src/utils/oauth/openai-codex.ts +299 -0
- package/src/utils/oauth/opencode.ts +49 -0
- package/src/utils/oauth/parallel.ts +46 -0
- package/src/utils/oauth/perplexity.ts +206 -0
- package/src/utils/oauth/pkce.ts +18 -0
- package/src/utils/oauth/qianfan.ts +58 -0
- package/src/utils/oauth/qwen-portal.ts +60 -0
- package/src/utils/oauth/synthetic.ts +16 -0
- package/src/utils/oauth/tavily.ts +46 -0
- package/src/utils/oauth/together.ts +16 -0
- package/src/utils/oauth/types.ts +94 -0
- package/src/utils/oauth/venice.ts +59 -0
- package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
- package/src/utils/oauth/vllm.ts +40 -0
- package/src/utils/oauth/xiaomi.ts +137 -0
- package/src/utils/oauth/zai.ts +60 -0
- package/src/utils/oauth/zenmux.ts +15 -0
- package/src/utils/overflow.ts +137 -0
- package/src/utils/parse-bind.ts +54 -0
- package/src/utils/provider-response.ts +30 -0
- package/src/utils/retry-after.ts +110 -0
- package/src/utils/retry.ts +54 -0
- package/src/utils/schema/CONSTRAINTS.md +164 -0
- package/src/utils/schema/adapt.ts +36 -0
- package/src/utils/schema/compatibility.ts +435 -0
- package/src/utils/schema/dereference.ts +98 -0
- package/src/utils/schema/draft.ts +341 -0
- package/src/utils/schema/equality.ts +97 -0
- package/src/utils/schema/fields.ts +190 -0
- package/src/utils/schema/index.ts +13 -0
- package/src/utils/schema/json-schema-validator.ts +577 -0
- package/src/utils/schema/meta-validator.ts +167 -0
- package/src/utils/schema/normalize.ts +1588 -0
- package/src/utils/schema/spill.ts +43 -0
- package/src/utils/schema/stamps.ts +97 -0
- package/src/utils/schema/types.ts +11 -0
- package/src/utils/schema/wire.ts +213 -0
- package/src/utils/schema/zod-decontaminate.ts +331 -0
- package/src/utils/sse-debug.ts +289 -0
- package/src/utils/tool-call-healing.ts +271 -0
- package/src/utils/tool-choice.ts +99 -0
- package/src/utils/validation.ts +1019 -0
- package/src/utils.ts +166 -0
|
@@ -0,0 +1,717 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* gjc auth-gateway HTTP server.
|
|
3
|
+
*
|
|
4
|
+
* Accepts any provider-format request (OpenAI chat-completions, Anthropic
|
|
5
|
+
* messages, OpenAI Responses) and dispatches through pi-ai's `streamSimple()`
|
|
6
|
+
* — which handles credential injection, anthropic-beta headers, OpenAI code backend
|
|
7
|
+
* websocket transport, and all the per-provider intricacies. The gateway is
|
|
8
|
+
* pure protocol translation: foreign wire → gjc Context → pi-ai stream() →
|
|
9
|
+
* gjc events → foreign wire.
|
|
10
|
+
*
|
|
11
|
+
* Endpoints:
|
|
12
|
+
* GET /healthz → unauth; ok + version
|
|
13
|
+
* GET /v1/usage → aggregated provider usage (5-min per-credential cache via AuthStorage)
|
|
14
|
+
* GET /v1/credentials/check → per-credential auth probe (diagnose 401s in a multi-account pool)
|
|
15
|
+
* GET /v1/models → list known models from the registry
|
|
16
|
+
* POST /v1/chat/completions → OpenAI chat-completions in/out
|
|
17
|
+
* POST /v1/messages → Anthropic messages in/out
|
|
18
|
+
* POST /v1/responses → OpenAI Responses in/out
|
|
19
|
+
*/
|
|
20
|
+
import { logger } from "@gajae-code/utils";
|
|
21
|
+
import type { AuthStorage } from "../auth-storage";
|
|
22
|
+
import { Effort } from "../model-thinking";
|
|
23
|
+
import * as anthropicMessages from "../providers/anthropic-messages-server";
|
|
24
|
+
import * as openaiChat from "../providers/openai-chat-server";
|
|
25
|
+
import * as openaiResponses from "../providers/openai-responses-server";
|
|
26
|
+
import * as piNative from "../providers/pi-native-server";
|
|
27
|
+
import { streamSimple } from "../stream";
|
|
28
|
+
import type { Api, AssistantMessageEventStream, Context, Model, SimpleStreamOptions } from "../types";
|
|
29
|
+
import { parseBind } from "../utils/parse-bind";
|
|
30
|
+
import { captureRequestHeaders, corsHeaders, isAuthorized, json, resolvePeer, withCors } from "./http";
|
|
31
|
+
import type {
|
|
32
|
+
AuthGatewayServerHandle,
|
|
33
|
+
AuthGatewayServerOptions,
|
|
34
|
+
AuthGatewayFormatModule as FormatModule,
|
|
35
|
+
AuthGatewayParsedRequest as ParsedFormatRequest,
|
|
36
|
+
} from "./types";
|
|
37
|
+
import { DEFAULT_AUTH_GATEWAY_BIND } from "./types";
|
|
38
|
+
|
|
39
|
+
// ParsedFormatRequest / ParsedFormatOptions / FormatModule come from ./types.
|
|
40
|
+
|
|
41
|
+
export type ModelResolver = (modelId: string) => Model<Api> | undefined;
|
|
42
|
+
|
|
43
|
+
export interface AuthGatewayBootOptions extends AuthGatewayServerOptions {
|
|
44
|
+
/** Source of credentials. Caller wires this to a broker-backed AuthStorage. */
|
|
45
|
+
storage: AuthStorage;
|
|
46
|
+
/**
|
|
47
|
+
* Resolve a client-requested model id to a pi-ai Model. Caller supplies
|
|
48
|
+
* this from a ModelRegistry (lives in `coding-agent` to avoid an inverse
|
|
49
|
+
* dependency in `pi-ai`).
|
|
50
|
+
*/
|
|
51
|
+
resolveModel: ModelResolver;
|
|
52
|
+
/** Optional supplier for `/v1/models` listing. Returns the full model array. */
|
|
53
|
+
listModels?: () => Iterable<Model<Api>>;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// `parseBind` lives in ../utils/parse-bind so the gateway and broker can't
|
|
57
|
+
// drift on accepted inputs (e.g. empty hostname, IPv6 brackets).
|
|
58
|
+
|
|
59
|
+
const FORMAT_ROUTES: Record<string, { module: FormatModule; label: string }> = {
|
|
60
|
+
"/v1/chat/completions": { module: openaiChat, label: "openai-chat" },
|
|
61
|
+
"/v1/messages": { module: anthropicMessages, label: "anthropic-messages" },
|
|
62
|
+
"/v1/responses": { module: openaiResponses, label: "openai-responses" },
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
// (passthrough fast-path removed — it bypassed pi-ai provider logic, in
|
|
66
|
+
// particular the Anthropic Anthropic-code OAuth system-prompt prefix injection.
|
|
67
|
+
// Every request now takes the translate path so credential-specific request
|
|
68
|
+
// shaping always applies.)
|
|
69
|
+
|
|
70
|
+
// Options the caller's wire format may carry but the resolved provider can't
|
|
71
|
+
// honour are dropped silently in `buildStreamOptions`. We used to 400 here
|
|
72
|
+
// (`Unsupported option: temperature for OpenAI code provider-responses`), but every
|
|
73
|
+
// realistic client (llm-git, openai SDK, anthropic SDK) bakes some of these
|
|
74
|
+
// defaults in without knowing which model they'll resolve to. Failing loudly
|
|
75
|
+
// just turned that into per-call config hell. Silent strip is what the
|
|
76
|
+
// upstream provider would do anyway when it ignores extra fields.
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Derive a stable cache identity from the parts of the request that don't
|
|
80
|
+
* change turn-to-turn within a logical conversation: model id, system prompt,
|
|
81
|
+
* tool definitions, and the first message (the conversation seed). OpenAI code backend-class
|
|
82
|
+
* backends only cache prefixes when an explicit `prompt_cache_key` is set;
|
|
83
|
+
* without one, two requests with the same prefix but different trailing
|
|
84
|
+
* messages don't coalesce. This bridges Anthropic-style clients (which signal
|
|
85
|
+
* caching via `cache_control` markers rather than an opaque key) to OpenAI code backend's
|
|
86
|
+
* keyed model so cross-protocol caching "just works".
|
|
87
|
+
*
|
|
88
|
+
* Including the first message scopes the key to one logical conversation:
|
|
89
|
+
* two different chats with the same system prompt no longer share a cache
|
|
90
|
+
* bucket and can't trample each other's prefix-tree entries.
|
|
91
|
+
*
|
|
92
|
+
* Anthropic-backed requests ignore `sessionId`; the key is harmless there.
|
|
93
|
+
*/
|
|
94
|
+
function deriveSessionId(modelId: string, context: Context): string {
|
|
95
|
+
const parts: string[] = [modelId];
|
|
96
|
+
if (context.systemPrompt && context.systemPrompt.length > 0) {
|
|
97
|
+
parts.push(context.systemPrompt.join("\n\n"));
|
|
98
|
+
}
|
|
99
|
+
if (context.tools && context.tools.length > 0) {
|
|
100
|
+
parts.push(JSON.stringify(context.tools));
|
|
101
|
+
}
|
|
102
|
+
const first = context.messages?.[0];
|
|
103
|
+
if (first) {
|
|
104
|
+
// Strip timestamp / provider metadata so the hash is stable across turns
|
|
105
|
+
// of the same conversation (gjc re-stamps every parsed Message). role +
|
|
106
|
+
// content is what's actually on the wire.
|
|
107
|
+
parts.push(JSON.stringify({ role: first.role, content: first.content }));
|
|
108
|
+
}
|
|
109
|
+
const seed = parts.join("\u0000");
|
|
110
|
+
const hex = new Bun.CryptoHasher("sha256").update(seed).digest("hex");
|
|
111
|
+
// Format the leading 128 bits as a v4-shape UUID (8-4-4-4-12). OpenAI code backend's
|
|
112
|
+
// `normalizeOpenAIResponsesPromptCacheKey` accepts ≤64 chars verbatim, so
|
|
113
|
+
// the 36-char UUID flows through unchanged.
|
|
114
|
+
return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function buildStreamOptions(parsed: ParsedFormatRequest, api: Api, signal: AbortSignal): SimpleStreamOptions {
|
|
118
|
+
const opts: SimpleStreamOptions = { signal };
|
|
119
|
+
const { options } = parsed;
|
|
120
|
+
// OpenAI code backend backend rejects `temperature` / `top_p` (per-model defaults only),
|
|
121
|
+
// so we drop them silently for that one provider. Every other unsupported
|
|
122
|
+
// option is just ignored by `streamSimple` if the underlying provider
|
|
123
|
+
// doesn't honour it.
|
|
124
|
+
const isCodex = api === "openai-codex-responses";
|
|
125
|
+
if (options.maxOutputTokens !== undefined) opts.maxTokens = options.maxOutputTokens;
|
|
126
|
+
if (options.temperature !== undefined && !isCodex) opts.temperature = options.temperature;
|
|
127
|
+
if (options.topP !== undefined && !isCodex) opts.topP = options.topP;
|
|
128
|
+
if (options.topK !== undefined) opts.topK = options.topK;
|
|
129
|
+
if (options.minP !== undefined) opts.minP = options.minP;
|
|
130
|
+
if (options.stopSequences !== undefined) opts.stopSequences = options.stopSequences;
|
|
131
|
+
if (options.presencePenalty !== undefined) opts.presencePenalty = options.presencePenalty;
|
|
132
|
+
if (options.frequencyPenalty !== undefined) opts.frequencyPenalty = options.frequencyPenalty;
|
|
133
|
+
if (options.repetitionPenalty !== undefined) opts.repetitionPenalty = options.repetitionPenalty;
|
|
134
|
+
if (options.metadata !== undefined) opts.metadata = options.metadata;
|
|
135
|
+
if (options.headers !== undefined) opts.headers = { ...(opts.headers ?? {}), ...options.headers };
|
|
136
|
+
if (options.toolChoice !== undefined) {
|
|
137
|
+
opts.toolChoice =
|
|
138
|
+
typeof options.toolChoice === "object" ? { type: "tool", name: options.toolChoice.name } : options.toolChoice;
|
|
139
|
+
}
|
|
140
|
+
if (options.reasoning !== undefined) opts.reasoning = options.reasoning;
|
|
141
|
+
if (options.disableReasoning !== undefined) opts.disableReasoning = options.disableReasoning;
|
|
142
|
+
if (options.hideThinkingSummary !== undefined) opts.hideThinkingSummary = options.hideThinkingSummary;
|
|
143
|
+
if (options.serviceTier !== undefined) opts.serviceTier = options.serviceTier;
|
|
144
|
+
if (options.cacheRetention !== undefined) opts.cacheRetention = options.cacheRetention;
|
|
145
|
+
// Client-supplied `prompt_cache_key` wins; otherwise derive a stable
|
|
146
|
+
// key from the model + system + tools so prefix caching engages on
|
|
147
|
+
// OpenAI code backend-class backends across turns of the same logical conversation.
|
|
148
|
+
opts.sessionId = options.promptCacheKey ?? deriveSessionId(parsed.modelId, parsed.context);
|
|
149
|
+
if (options.thinkingBudgets) {
|
|
150
|
+
opts.thinkingBudgets = { ...(opts.thinkingBudgets ?? {}), ...options.thinkingBudgets };
|
|
151
|
+
}
|
|
152
|
+
if (options.explicitThinkingBudgetTokens !== undefined) {
|
|
153
|
+
// Mirror Rust's `resolve_thinking_budget`: explicit budget pins onto
|
|
154
|
+
// whichever effort the client requested (or High when unspecified) and
|
|
155
|
+
// ALSO sets the effort so providers that gate on `reasoning` actually
|
|
156
|
+
// surface the budget.
|
|
157
|
+
const effort = options.reasoning ?? Effort.High;
|
|
158
|
+
opts.thinkingBudgets = {
|
|
159
|
+
...(opts.thinkingBudgets ?? {}),
|
|
160
|
+
[effort]: options.explicitThinkingBudgetTokens,
|
|
161
|
+
};
|
|
162
|
+
opts.reasoning ??= effort;
|
|
163
|
+
}
|
|
164
|
+
// Fields that don't yet have a matching pi-ai `SimpleStreamOptions` slot.
|
|
165
|
+
// Surfaced once in debug logs so they show up when wiring a new provider,
|
|
166
|
+
// but NEVER widened into `options.extra` — every consumer would have to
|
|
167
|
+
// re-implement the typed parse to read them back out.
|
|
168
|
+
// TODO(pi-ai): land first-class fields and replace these blocks.
|
|
169
|
+
if (
|
|
170
|
+
options.parallelToolCalls !== undefined ||
|
|
171
|
+
options.previousResponseId !== undefined ||
|
|
172
|
+
options.seed !== undefined ||
|
|
173
|
+
options.logitBias !== undefined ||
|
|
174
|
+
options.user !== undefined ||
|
|
175
|
+
options.responseFormat !== undefined
|
|
176
|
+
) {
|
|
177
|
+
logger.debug("auth-gateway dropped unsupported typed options", {
|
|
178
|
+
api,
|
|
179
|
+
parallelToolCalls: options.parallelToolCalls,
|
|
180
|
+
previousResponseId: options.previousResponseId,
|
|
181
|
+
seed: options.seed,
|
|
182
|
+
hasLogitBias: options.logitBias !== undefined,
|
|
183
|
+
user: options.user,
|
|
184
|
+
hasResponseFormat: options.responseFormat !== undefined,
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
return opts;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Classify an upstream / gateway-internal error into a status code and a
|
|
192
|
+
* provider-style error type tag. Used by `handleFormatEndpoint` /
|
|
193
|
+
* `handlePassthrough` to drive `route.module.formatError` so every wire
|
|
194
|
+
* format emits its native envelope shape.
|
|
195
|
+
*/
|
|
196
|
+
function classifyGatewayError(err: unknown): { status: number; type: string; message: string } {
|
|
197
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
198
|
+
const lower = message.toLowerCase();
|
|
199
|
+
|
|
200
|
+
// Custom pi-ai errors may attach a numeric `status` property; honor it
|
|
201
|
+
// when present and pick the matching tag.
|
|
202
|
+
const statusProp =
|
|
203
|
+
typeof err === "object" && err !== null && typeof (err as { status?: unknown }).status === "number"
|
|
204
|
+
? (err as { status: number }).status | 0
|
|
205
|
+
: undefined;
|
|
206
|
+
if (statusProp !== undefined) {
|
|
207
|
+
if (statusProp === 401 || statusProp === 403)
|
|
208
|
+
return { status: statusProp, type: "authentication_error", message };
|
|
209
|
+
if (statusProp === 429) return { status: 429, type: "rate_limit_error", message };
|
|
210
|
+
if (statusProp >= 400 && statusProp < 500) return { status: statusProp, type: "invalid_request_error", message };
|
|
211
|
+
if (statusProp >= 500) return { status: statusProp, type: "upstream_error", message };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (err instanceof Error && err.name === "AbortError") return { status: 499, type: "request_aborted", message };
|
|
215
|
+
if (lower.includes("aborted") || lower.includes("abortsignal")) {
|
|
216
|
+
return { status: 499, type: "request_aborted", message };
|
|
217
|
+
}
|
|
218
|
+
if (
|
|
219
|
+
lower.includes("401") ||
|
|
220
|
+
lower.includes("403") ||
|
|
221
|
+
lower.includes("unauthorized") ||
|
|
222
|
+
lower.includes("forbidden")
|
|
223
|
+
) {
|
|
224
|
+
return { status: 401, type: "authentication_error", message };
|
|
225
|
+
}
|
|
226
|
+
if (lower.includes("429") || lower.includes("rate") || lower.includes("quota")) {
|
|
227
|
+
return { status: 429, type: "rate_limit_error", message };
|
|
228
|
+
}
|
|
229
|
+
if (lower.includes("unsupported") || lower.includes("invalid")) {
|
|
230
|
+
return { status: 400, type: "invalid_request_error", message };
|
|
231
|
+
}
|
|
232
|
+
return { status: 502, type: "upstream_error", message };
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async function refreshGatewayApiKeyAfterAuthError(
|
|
236
|
+
storage: AuthStorage,
|
|
237
|
+
model: Model<Api>,
|
|
238
|
+
provider: string,
|
|
239
|
+
oldKey: string,
|
|
240
|
+
error: unknown,
|
|
241
|
+
signal: AbortSignal,
|
|
242
|
+
format: string,
|
|
243
|
+
peer: string,
|
|
244
|
+
): Promise<string | undefined> {
|
|
245
|
+
await storage.invalidateCredentialMatching(provider, oldKey, signal);
|
|
246
|
+
logger.debug("auth-gateway retrying provider request after credential invalidation", {
|
|
247
|
+
format,
|
|
248
|
+
provider,
|
|
249
|
+
peer,
|
|
250
|
+
error: error instanceof Error ? error.message : String(error),
|
|
251
|
+
});
|
|
252
|
+
return storage.getApiKey(provider, undefined, { modelId: model.id, signal });
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function clientClosedResponse(route: { module: FormatModule }): Response {
|
|
256
|
+
return route.module.formatError(499, "request_aborted", "client closed request");
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function mirrorRequestAbort(req: Request): AbortController {
|
|
260
|
+
const controller = new AbortController();
|
|
261
|
+
if (req.signal.aborted) {
|
|
262
|
+
controller.abort(req.signal.reason);
|
|
263
|
+
} else {
|
|
264
|
+
req.signal.addEventListener("abort", () => controller.abort(req.signal.reason), { once: true });
|
|
265
|
+
}
|
|
266
|
+
return controller;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// (handlePassthrough removed — see note above.)
|
|
270
|
+
|
|
271
|
+
async function handleFormatEndpoint(
|
|
272
|
+
route: { module: FormatModule; label: string },
|
|
273
|
+
bootOpts: AuthGatewayBootOptions,
|
|
274
|
+
req: Request,
|
|
275
|
+
peer: string,
|
|
276
|
+
): Promise<Response> {
|
|
277
|
+
const controller = mirrorRequestAbort(req);
|
|
278
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
279
|
+
|
|
280
|
+
let body: unknown;
|
|
281
|
+
try {
|
|
282
|
+
body = await req.json();
|
|
283
|
+
} catch (error) {
|
|
284
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
285
|
+
return route.module.formatError(400, "invalid_request_error", `Invalid JSON body: ${String(error)}`);
|
|
286
|
+
}
|
|
287
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
288
|
+
|
|
289
|
+
// All three supported wire formats put the model id on a top-level `model`
|
|
290
|
+
// field. Read it without running the full strict schema so the route can
|
|
291
|
+
// produce a coherent error envelope when the model id is missing.
|
|
292
|
+
const modelId =
|
|
293
|
+
typeof body === "object" && body !== null && typeof (body as { model?: unknown }).model === "string"
|
|
294
|
+
? (body as { model: string }).model
|
|
295
|
+
: undefined;
|
|
296
|
+
if (!modelId) {
|
|
297
|
+
return route.module.formatError(400, "invalid_request_error", "Missing top-level `model` field");
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const model = bootOpts.resolveModel(modelId);
|
|
301
|
+
if (!model) {
|
|
302
|
+
return route.module.formatError(404, "invalid_request_error", `Unknown model: ${modelId}`);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// pi-ai's stream() does NOT consult AuthStorage — the caller (us) is
|
|
306
|
+
// expected to resolve the credential and pass it as `options.apiKey`.
|
|
307
|
+
// For OAuth providers this returns the access token (refreshed via the
|
|
308
|
+
// broker override on AuthStorage when needed).
|
|
309
|
+
let apiKey: string | undefined;
|
|
310
|
+
try {
|
|
311
|
+
apiKey = await bootOpts.storage.getApiKey(model.provider, undefined, {
|
|
312
|
+
modelId: model.id,
|
|
313
|
+
signal: controller.signal,
|
|
314
|
+
});
|
|
315
|
+
} catch (error) {
|
|
316
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
317
|
+
const classified = classifyGatewayError(error);
|
|
318
|
+
logger.warn("auth-gateway getApiKey threw", { provider: model.provider, peer, error: classified.message });
|
|
319
|
+
return route.module.formatError(classified.status, classified.type, classified.message);
|
|
320
|
+
}
|
|
321
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
322
|
+
if (!apiKey) {
|
|
323
|
+
return route.module.formatError(
|
|
324
|
+
401,
|
|
325
|
+
"authentication_error",
|
|
326
|
+
`No credential available for provider ${model.provider}`,
|
|
327
|
+
);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Parse + validate against the strict format schema, rebuild as gjc's
|
|
331
|
+
// canonical Context, dispatch through pi-ai's streamSimple, encode the
|
|
332
|
+
// canonical event stream back to the inbound format. There is no
|
|
333
|
+
// passthrough fast-path — every request flows through pi-ai so that
|
|
334
|
+
// credential-specific request shaping (OAuth Anthropic-code prefix, beta
|
|
335
|
+
// headers, OpenAI code backend websocket transport, …) always applies.
|
|
336
|
+
let parsed: ParsedFormatRequest;
|
|
337
|
+
try {
|
|
338
|
+
parsed = route.module.parseRequest(body, req.headers);
|
|
339
|
+
} catch (error) {
|
|
340
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
341
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
342
|
+
return route.module.formatError(400, "invalid_request_error", message);
|
|
343
|
+
}
|
|
344
|
+
// Merge gateway-captured passthrough headers under the parser's own
|
|
345
|
+
// captures. Parsers that set `options.headers` themselves win (they may
|
|
346
|
+
// have stripped or normalized values); the gateway's allow-list fills in
|
|
347
|
+
// anything they didn't touch.
|
|
348
|
+
{
|
|
349
|
+
const captured = captureRequestHeaders(req.headers);
|
|
350
|
+
parsed.options.headers = { ...captured, ...(parsed.options.headers ?? {}) };
|
|
351
|
+
}
|
|
352
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
353
|
+
|
|
354
|
+
const streamOpts = buildStreamOptions(parsed, model.api, controller.signal);
|
|
355
|
+
streamOpts.apiKey = apiKey;
|
|
356
|
+
streamOpts.onAuthError = (provider, oldKey, error) =>
|
|
357
|
+
refreshGatewayApiKeyAfterAuthError(
|
|
358
|
+
bootOpts.storage,
|
|
359
|
+
model,
|
|
360
|
+
provider,
|
|
361
|
+
oldKey,
|
|
362
|
+
error,
|
|
363
|
+
controller.signal,
|
|
364
|
+
route.label,
|
|
365
|
+
peer,
|
|
366
|
+
);
|
|
367
|
+
|
|
368
|
+
logger.info("auth-gateway request", {
|
|
369
|
+
format: route.label,
|
|
370
|
+
model: parsed.modelId,
|
|
371
|
+
resolvedProvider: model.provider,
|
|
372
|
+
resolvedModel: model.id,
|
|
373
|
+
stream: parsed.stream,
|
|
374
|
+
peer,
|
|
375
|
+
});
|
|
376
|
+
|
|
377
|
+
let events: AssistantMessageEventStream;
|
|
378
|
+
try {
|
|
379
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
380
|
+
events = streamSimple(model, parsed.context, streamOpts);
|
|
381
|
+
} catch (error) {
|
|
382
|
+
const classified = classifyGatewayError(error);
|
|
383
|
+
logger.warn("auth-gateway streamSimple threw", { format: route.label, error: classified.message, peer });
|
|
384
|
+
return route.module.formatError(classified.status, classified.type, classified.message);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
if (!parsed.stream) {
|
|
388
|
+
try {
|
|
389
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
390
|
+
const message = await events.result();
|
|
391
|
+
if (message.stopReason === "aborted" || message.stopReason === "error") {
|
|
392
|
+
const errorMessage =
|
|
393
|
+
message.errorMessage ??
|
|
394
|
+
(message.stopReason === "aborted" ? "Request was aborted" : "Upstream request failed");
|
|
395
|
+
logger.warn("auth-gateway non-streaming failed", {
|
|
396
|
+
format: route.label,
|
|
397
|
+
reason: message.stopReason,
|
|
398
|
+
error: errorMessage,
|
|
399
|
+
peer,
|
|
400
|
+
});
|
|
401
|
+
if (message.stopReason === "aborted") {
|
|
402
|
+
return route.module.formatError(499, "request_aborted", errorMessage);
|
|
403
|
+
}
|
|
404
|
+
const classified = classifyGatewayError(new Error(errorMessage));
|
|
405
|
+
return route.module.formatError(classified.status, classified.type, errorMessage);
|
|
406
|
+
}
|
|
407
|
+
return json(200, route.module.encodeResponse(message, parsed.modelId));
|
|
408
|
+
} catch (error) {
|
|
409
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
410
|
+
const classified = classifyGatewayError(error);
|
|
411
|
+
logger.warn("auth-gateway non-streaming aborted", {
|
|
412
|
+
format: route.label,
|
|
413
|
+
error: classified.message,
|
|
414
|
+
peer,
|
|
415
|
+
});
|
|
416
|
+
return route.module.formatError(classified.status, classified.type, classified.message);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
420
|
+
|
|
421
|
+
const sseStream = route.module.encodeStream(events, parsed.modelId, parsed.options);
|
|
422
|
+
return new Response(sseStream, {
|
|
423
|
+
status: 200,
|
|
424
|
+
headers: {
|
|
425
|
+
"Content-Type": "text/event-stream; charset=utf-8",
|
|
426
|
+
"Cache-Control": "no-cache",
|
|
427
|
+
Connection: "keep-alive",
|
|
428
|
+
// Disable proxy buffering (nginx and ingress controllers honor this).
|
|
429
|
+
// Without it the SSE stream gets held until the buffer flushes, which
|
|
430
|
+
// stalls the long-thinking-budget calls we exist to support.
|
|
431
|
+
"X-Accel-Buffering": "no",
|
|
432
|
+
},
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Pi-native fast path: `POST /v1/pi/stream`. Accepts the canonical pi-ai
|
|
438
|
+
* `Context` directly (no wire-format round-trip) and emits a bandwidth-shrunk
|
|
439
|
+
* event stream matching `pi-agent`'s `streamProxy`. Skips the OpenAI /
|
|
440
|
+
* Anthropic / Responses translation layers — those exist to bridge foreign
|
|
441
|
+
* SDKs (llm-git, anthropic-sdk, openai-sdk), and bridging back to pi-native
|
|
442
|
+
* just to bridge forward again is wasted work.
|
|
443
|
+
*
|
|
444
|
+
* Every other gateway concern (bearer auth, model resolve, credential fetch,
|
|
445
|
+
* abort mirroring, OpenAI code backend temperature/topP strip, prefix-cache key derivation,
|
|
446
|
+
* Anthropic-code OAuth shaping inside `streamSimple`) still applies — only
|
|
447
|
+
* `parseRequest`/`encodeResponse`/`encodeStream` differ from the format-endpoint
|
|
448
|
+
* path.
|
|
449
|
+
*/
|
|
450
|
+
async function handlePiNative(bootOpts: AuthGatewayBootOptions, req: Request, peer: string): Promise<Response> {
|
|
451
|
+
const controller = mirrorRequestAbort(req);
|
|
452
|
+
const aborted = (): Response => piNative.formatError(499, "request_aborted", "client closed request");
|
|
453
|
+
if (controller.signal.aborted) return aborted();
|
|
454
|
+
|
|
455
|
+
let body: unknown;
|
|
456
|
+
try {
|
|
457
|
+
body = await req.json();
|
|
458
|
+
} catch (error) {
|
|
459
|
+
if (controller.signal.aborted) return aborted();
|
|
460
|
+
return piNative.formatError(400, "invalid_request_error", `Invalid JSON body: ${String(error)}`);
|
|
461
|
+
}
|
|
462
|
+
if (controller.signal.aborted) return aborted();
|
|
463
|
+
|
|
464
|
+
let parsed: piNative.PiNativeParsedRequest;
|
|
465
|
+
try {
|
|
466
|
+
parsed = piNative.parseRequest(body, req.headers);
|
|
467
|
+
} catch (error) {
|
|
468
|
+
if (controller.signal.aborted) return aborted();
|
|
469
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
470
|
+
return piNative.formatError(400, "invalid_request_error", message);
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
const model = bootOpts.resolveModel(parsed.modelId);
|
|
474
|
+
if (!model) {
|
|
475
|
+
return piNative.formatError(404, "invalid_request_error", `Unknown model: ${parsed.modelId}`);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
let apiKey: string | undefined;
|
|
479
|
+
try {
|
|
480
|
+
apiKey = await bootOpts.storage.getApiKey(model.provider, undefined, {
|
|
481
|
+
modelId: model.id,
|
|
482
|
+
signal: controller.signal,
|
|
483
|
+
});
|
|
484
|
+
} catch (error) {
|
|
485
|
+
if (controller.signal.aborted) return aborted();
|
|
486
|
+
const classified = classifyGatewayError(error);
|
|
487
|
+
logger.warn("auth-gateway getApiKey threw", { provider: model.provider, peer, error: classified.message });
|
|
488
|
+
return piNative.formatError(classified.status, classified.type, classified.message);
|
|
489
|
+
}
|
|
490
|
+
if (controller.signal.aborted) return aborted();
|
|
491
|
+
if (!apiKey) {
|
|
492
|
+
return piNative.formatError(
|
|
493
|
+
401,
|
|
494
|
+
"authentication_error",
|
|
495
|
+
`No credential available for provider ${model.provider}`,
|
|
496
|
+
);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// Build the SimpleStreamOptions actually handed to `streamSimple`. We
|
|
500
|
+
// trust the client's options (already allow-listed by `parseRequest`) and
|
|
501
|
+
// only inject server-controlled fields. The OpenAI code backend temperature/topP strip
|
|
502
|
+
// matches `buildStreamOptions` — OpenAI code backend rejects them with a 400.
|
|
503
|
+
const streamOpts: SimpleStreamOptions = { ...parsed.options, apiKey, signal: controller.signal };
|
|
504
|
+
streamOpts.onAuthError = (provider, oldKey, error) =>
|
|
505
|
+
refreshGatewayApiKeyAfterAuthError(
|
|
506
|
+
bootOpts.storage,
|
|
507
|
+
model,
|
|
508
|
+
provider,
|
|
509
|
+
oldKey,
|
|
510
|
+
error,
|
|
511
|
+
controller.signal,
|
|
512
|
+
"pi-native",
|
|
513
|
+
peer,
|
|
514
|
+
);
|
|
515
|
+
if (model.api === "openai-codex-responses") {
|
|
516
|
+
delete streamOpts.temperature;
|
|
517
|
+
delete streamOpts.topP;
|
|
518
|
+
}
|
|
519
|
+
// Merge gateway-captured passthrough headers under the client's own
|
|
520
|
+
// headers — the client's values win when they collide.
|
|
521
|
+
const captured = captureRequestHeaders(req.headers);
|
|
522
|
+
streamOpts.headers = { ...captured, ...(streamOpts.headers ?? {}) };
|
|
523
|
+
// Cache identity: explicit `sessionId` wins, then derive a stable key
|
|
524
|
+
// from model + system + tools + first message so OpenAI code backend prefix caching
|
|
525
|
+
// engages on the same logical conversation across turns.
|
|
526
|
+
streamOpts.sessionId ??= deriveSessionId(parsed.modelId, parsed.context);
|
|
527
|
+
|
|
528
|
+
logger.info("auth-gateway request", {
|
|
529
|
+
format: "pi-native",
|
|
530
|
+
model: parsed.modelId,
|
|
531
|
+
resolvedProvider: model.provider,
|
|
532
|
+
resolvedModel: model.id,
|
|
533
|
+
stream: parsed.stream,
|
|
534
|
+
peer,
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
let events: AssistantMessageEventStream;
|
|
538
|
+
try {
|
|
539
|
+
if (controller.signal.aborted) return aborted();
|
|
540
|
+
events = streamSimple(model, parsed.context, streamOpts);
|
|
541
|
+
} catch (error) {
|
|
542
|
+
const classified = classifyGatewayError(error);
|
|
543
|
+
logger.warn("auth-gateway streamSimple threw", { format: "pi-native", error: classified.message, peer });
|
|
544
|
+
return piNative.formatError(classified.status, classified.type, classified.message);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
if (!parsed.stream) {
|
|
548
|
+
try {
|
|
549
|
+
if (controller.signal.aborted) return aborted();
|
|
550
|
+
const message = await events.result();
|
|
551
|
+
if (message.stopReason === "aborted" || message.stopReason === "error") {
|
|
552
|
+
const errorMessage =
|
|
553
|
+
message.errorMessage ??
|
|
554
|
+
(message.stopReason === "aborted" ? "Request was aborted" : "Upstream request failed");
|
|
555
|
+
logger.warn("auth-gateway non-streaming failed", {
|
|
556
|
+
format: "pi-native",
|
|
557
|
+
reason: message.stopReason,
|
|
558
|
+
error: errorMessage,
|
|
559
|
+
peer,
|
|
560
|
+
});
|
|
561
|
+
if (message.stopReason === "aborted") {
|
|
562
|
+
return piNative.formatError(499, "request_aborted", errorMessage);
|
|
563
|
+
}
|
|
564
|
+
const classified = classifyGatewayError(new Error(errorMessage));
|
|
565
|
+
return piNative.formatError(classified.status, classified.type, errorMessage);
|
|
566
|
+
}
|
|
567
|
+
return json(200, { message });
|
|
568
|
+
} catch (error) {
|
|
569
|
+
if (controller.signal.aborted) return aborted();
|
|
570
|
+
const classified = classifyGatewayError(error);
|
|
571
|
+
logger.warn("auth-gateway non-streaming aborted", { format: "pi-native", error: classified.message, peer });
|
|
572
|
+
return piNative.formatError(classified.status, classified.type, classified.message);
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
if (controller.signal.aborted) return aborted();
|
|
576
|
+
|
|
577
|
+
const sseStream = piNative.encodeStream(events);
|
|
578
|
+
return new Response(sseStream, {
|
|
579
|
+
status: 200,
|
|
580
|
+
headers: {
|
|
581
|
+
"Content-Type": "text/event-stream; charset=utf-8",
|
|
582
|
+
"Cache-Control": "no-cache",
|
|
583
|
+
Connection: "keep-alive",
|
|
584
|
+
"X-Accel-Buffering": "no",
|
|
585
|
+
},
|
|
586
|
+
});
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
/**
|
|
590
|
+
* Snapshot of `GET /v1/usage` — `fetchUsageReports` already caches reports at
|
|
591
|
+
* a 5-minute per-credential TTL (with jitter, plus last-good fallback on
|
|
592
|
+
* failure) inside `AuthStorage`, so this handler is a thin wrapper that
|
|
593
|
+
* surfaces the same data to HTTP callers (notably the macOS usage widget).
|
|
594
|
+
*/
|
|
595
|
+
async function handleUsage(storage: AuthStorage, signal: AbortSignal): Promise<Response> {
|
|
596
|
+
const reports = (await storage.fetchUsageReports?.({ signal })) ?? [];
|
|
597
|
+
// Drop the heavy provider-specific `raw` payload — UI consumers only need
|
|
598
|
+
// `limits` + `metadata`. Match the broker's `/v1/usage` shape so a single
|
|
599
|
+
// client struct (Swift widget, llm-git, ...) works against either endpoint.
|
|
600
|
+
const trimmed = reports.map(({ raw: _raw, ...rest }) => rest);
|
|
601
|
+
return json(200, { generatedAt: Date.now(), reports: trimmed });
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
/**
|
|
605
|
+
* Per-credential health probe surfaced on `GET /v1/credentials/check`. Tells
|
|
606
|
+
* the caller exactly which row in their broker is producing 401s — the
|
|
607
|
+
* aggregate `/v1/usage` endpoint silently drops failed credentials, which is
|
|
608
|
+
* the wrong shape when you're diagnosing auth.
|
|
609
|
+
*
|
|
610
|
+
* The probe is sequential (one credential at a time) to avoid synchronized
|
|
611
|
+
* N-account fan-out tripping per-IP rate limits on provider `/usage`
|
|
612
|
+
* endpoints. For multi-account pools that's the difference between getting
|
|
613
|
+
* a clean diagnosis and getting a 429 storm.
|
|
614
|
+
*/
|
|
615
|
+
async function handleCredentialsCheck(storage: AuthStorage, signal: AbortSignal): Promise<Response> {
|
|
616
|
+
const credentials = await storage.checkCredentials({ signal });
|
|
617
|
+
return json(200, { generatedAt: Date.now(), credentials });
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
function handleModelsList(opts: AuthGatewayBootOptions): Response {
|
|
621
|
+
const list = opts.listModels ? Array.from(opts.listModels()) : [];
|
|
622
|
+
const data = list.map(model => ({
|
|
623
|
+
id: model.id,
|
|
624
|
+
object: "model" as const,
|
|
625
|
+
owned_by: model.provider,
|
|
626
|
+
api: model.api,
|
|
627
|
+
}));
|
|
628
|
+
return json(200, { object: "list", data });
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
export function startAuthGateway(opts: AuthGatewayBootOptions): AuthGatewayServerHandle {
|
|
632
|
+
const bind = parseBind(opts.bind ?? DEFAULT_AUTH_GATEWAY_BIND);
|
|
633
|
+
const tokens = new Set<string>(opts.bearerTokens);
|
|
634
|
+
const version = opts.version;
|
|
635
|
+
|
|
636
|
+
const server = Bun.serve({
|
|
637
|
+
hostname: bind.hostname,
|
|
638
|
+
port: bind.port,
|
|
639
|
+
fetch: async (req): Promise<Response> => {
|
|
640
|
+
const url = new URL(req.url);
|
|
641
|
+
const pathname = url.pathname;
|
|
642
|
+
const peer = resolvePeer(req);
|
|
643
|
+
// CORS preflight is always answered without auth — browsers send
|
|
644
|
+
// preflights pre-authentication and a 401 here breaks the actual
|
|
645
|
+
// request before the bearer is ever attached.
|
|
646
|
+
if (req.method === "OPTIONS") {
|
|
647
|
+
return new Response(null, { status: 204, headers: corsHeaders(req) });
|
|
648
|
+
}
|
|
649
|
+
try {
|
|
650
|
+
if (req.method === "GET" && pathname === "/healthz") {
|
|
651
|
+
return withCors(json(200, { ok: true, version }), req);
|
|
652
|
+
}
|
|
653
|
+
if (!isAuthorized(req, tokens)) {
|
|
654
|
+
logger.info("auth-gateway request unauthorized", { method: req.method, path: pathname, peer });
|
|
655
|
+
return withCors(json(401, { error: "unauthorized" }), req);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
// Aggregated usage — backed by AuthStorage's 5-min per-credential cache.
|
|
659
|
+
// Same shape as the broker's `/v1/usage`, so widget/llm-git speak to either with the
|
|
660
|
+
// same client struct.
|
|
661
|
+
if (req.method === "GET" && pathname === "/v1/usage") {
|
|
662
|
+
return withCors(await handleUsage(opts.storage, req.signal), req);
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
// Per-credential auth probe — diagnoses which row in a multi-account
|
|
666
|
+
// pool is producing 401s. Aggregated `/v1/usage` silently drops failed
|
|
667
|
+
// credentials, so we need a separate endpoint that captures errors.
|
|
668
|
+
if (req.method === "GET" && pathname === "/v1/credentials/check") {
|
|
669
|
+
return withCors(await handleCredentialsCheck(opts.storage, req.signal), req);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// Provider-format dispatch.
|
|
673
|
+
const formatRoute = FORMAT_ROUTES[pathname];
|
|
674
|
+
if (formatRoute && req.method === "POST") {
|
|
675
|
+
return withCors(await handleFormatEndpoint(formatRoute, opts, req, peer), req);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// Pi-native fast path. Same auth + provider plumbing as the
|
|
679
|
+
// foreign-wire routes, just without the wire-format translation.
|
|
680
|
+
if (req.method === "POST" && pathname === "/v1/pi/stream") {
|
|
681
|
+
return withCors(await handlePiNative(opts, req, peer), req);
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// Model catalog.
|
|
685
|
+
if (req.method === "GET" && pathname === "/v1/models") {
|
|
686
|
+
return withCors(handleModelsList(opts), req);
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// Route-table miss: no format module to defer to, so we emit a
|
|
690
|
+
// plain JSON 404 rather than guessing at a protocol-specific envelope.
|
|
691
|
+
return withCors(json(404, { error: `No route: ${req.method} ${pathname}` }), req);
|
|
692
|
+
} catch (error) {
|
|
693
|
+
logger.error("auth-gateway handler crashed", {
|
|
694
|
+
method: req.method,
|
|
695
|
+
path: pathname,
|
|
696
|
+
peer,
|
|
697
|
+
error: String(error),
|
|
698
|
+
});
|
|
699
|
+
return withCors(json(500, { error: "internal error" }), req);
|
|
700
|
+
}
|
|
701
|
+
},
|
|
702
|
+
// Max-out Bun's idle timeout. Long thinking-budget calls can sit idle
|
|
703
|
+
// for minutes before the first token arrives; the default kills them.
|
|
704
|
+
idleTimeout: 255,
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
const boundHost = server.hostname ?? bind.hostname;
|
|
708
|
+
const boundPort = server.port ?? bind.port;
|
|
709
|
+
return {
|
|
710
|
+
url: `http://${boundHost}:${boundPort}`,
|
|
711
|
+
port: boundPort,
|
|
712
|
+
hostname: boundHost,
|
|
713
|
+
close: async () => {
|
|
714
|
+
server.stop(true);
|
|
715
|
+
},
|
|
716
|
+
};
|
|
717
|
+
}
|