@prometheus-ai/ai 0.5.4 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -17,3 +17,9 @@ export interface AbortSourceTracker {
|
|
|
17
17
|
* the UI showing a spinner the user already tried to cancel).
|
|
18
18
|
*/
|
|
19
19
|
export declare function createAbortSourceTracker(callerSignal?: AbortSignal): AbortSourceTracker;
|
|
20
|
+
/**
|
|
21
|
+
* Race a shared promise against a caller's AbortSignal without coupling the
|
|
22
|
+
* underlying work to that signal. The shared promise keeps running (and caches
|
|
23
|
+
* its result) even when an individual caller bails out.
|
|
24
|
+
*/
|
|
25
|
+
export declare function raceWithSignal<T>(promise: Promise<T>, signal: AbortSignal | undefined): Promise<T>;
|
|
@@ -7,6 +7,8 @@ export declare class EventStream<T, R = T> implements AsyncIterable<T> {
|
|
|
7
7
|
reject: (err: unknown) => void;
|
|
8
8
|
}>;
|
|
9
9
|
done: boolean;
|
|
10
|
+
/** True once finalResultPromise has been resolved or rejected. */
|
|
11
|
+
resultSettled: boolean;
|
|
10
12
|
finalResultPromise: Promise<R>;
|
|
11
13
|
resolveFinalResult: (result: R) => void;
|
|
12
14
|
rejectFinalResult: (err: unknown) => void;
|
|
@@ -15,7 +15,6 @@ export type CapturedHttpErrorResponse = {
|
|
|
15
15
|
};
|
|
16
16
|
export declare function appendRawHttpRequestDumpFor400(message: string, error: unknown, dump: RawHttpRequestDump | undefined): Promise<string>;
|
|
17
17
|
export declare function finalizeErrorMessage(error: unknown, rawRequestDump: RawHttpRequestDump | undefined, capturedErrorResponse?: CapturedHttpErrorResponse): Promise<string>;
|
|
18
|
-
export declare function withHttpStatus(error: unknown, status: number): Error;
|
|
19
18
|
/**
|
|
20
19
|
* Rewrite error message for GitHub Copilot request failures.
|
|
21
20
|
* Must run AFTER finalizeErrorMessage since it replaces the message entirely.
|
|
@@ -76,3 +76,38 @@ export interface IdleTimeoutIteratorOptions {
|
|
|
76
76
|
* before any user-visible content has streamed.
|
|
77
77
|
*/
|
|
78
78
|
export declare function iterateWithIdleTimeout<T>(iterable: AsyncIterable<T>, options: IdleTimeoutIteratorOptions): AsyncGenerator<T>;
|
|
79
|
+
export interface TerminalGraceIteratorOptions {
|
|
80
|
+
/**
|
|
81
|
+
* Epoch-ms timestamp at which the consumer observed a logically terminal
|
|
82
|
+
* item (e.g. a chat-completions chunk carrying `finish_reason`), or
|
|
83
|
+
* `undefined` while the stream is still mid-response. Read before every
|
|
84
|
+
* pull, so the consumer can flip it between yields.
|
|
85
|
+
*/
|
|
86
|
+
finishedAtMs: () => number | undefined;
|
|
87
|
+
/**
|
|
88
|
+
* Post-terminal budget: how long after `finishedAtMs()` to keep draining
|
|
89
|
+
* trailing items (e.g. a usage-only chunk or the `[DONE]` sentinel) before
|
|
90
|
+
* ending the iteration cleanly. The deadline is fixed at
|
|
91
|
+
* `finishedAtMs() + graceMs`; trailing items do not extend it, so
|
|
92
|
+
* keepalive-only servers cannot hold the stream open.
|
|
93
|
+
*/
|
|
94
|
+
graceMs: number;
|
|
95
|
+
/**
|
|
96
|
+
* Invoked when the grace window closes with the source still open. Use it
|
|
97
|
+
* to abort the underlying request: the source generator is typically parked
|
|
98
|
+
* mid-`next()` (not at a yield), so a queued `.return()` alone cannot reach
|
|
99
|
+
* the transport until that pending read settles.
|
|
100
|
+
*/
|
|
101
|
+
onGraceEnd?: () => void;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Yields items from an async iterable until the consumer marks the stream
|
|
105
|
+
* logically finished AND the source stays silent past a short grace window.
|
|
106
|
+
*
|
|
107
|
+
* Misbehaving OpenAI-compatible servers deliver the terminal chunk but never
|
|
108
|
+
* send `[DONE]` nor close the connection; without this guard the consumer
|
|
109
|
+
* hangs on `iterator.next()` until the idle watchdog converts an
|
|
110
|
+
* already-successful turn into a timeout error. Grace expiry is a clean end
|
|
111
|
+
* of iteration, never an error.
|
|
112
|
+
*/
|
|
113
|
+
export declare function iterateWithTerminalGrace<T>(iterable: AsyncIterable<T>, options: TerminalGraceIteratorOptions): AsyncGenerator<T>;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON-POST → SSE transport for OpenAI-wire streaming endpoints (chat
|
|
3
|
+
* completions, responses, azure responses). Replaces the `openai` SDK client:
|
|
4
|
+
*
|
|
5
|
+
* - Retries: `fetchWithRetry` (Retry-After/quota-hint aware; 5xx/408/429 and
|
|
6
|
+
* transient network errors). Default 6 total attempts — parity with the
|
|
7
|
+
* SDK's former `maxRetries: 5`.
|
|
8
|
+
* - SSE decode: `readSseJson` (spec-compliant framing, `[DONE]`-aware).
|
|
9
|
+
* `onSseEvent` observers now receive real wire frames instead of events
|
|
10
|
+
* re-synthesized from decoded SDK objects.
|
|
11
|
+
* - Errors: {@link OpenAIHttpError} exposes `status`/`headers`/`code`
|
|
12
|
+
* structurally (ProviderHttpError contract — `extractHttpStatusFromError`,
|
|
13
|
+
* retry-after extraction, copilot transient classification) and carries the
|
|
14
|
+
* captured response body for the strict-tools fallback and the responses
|
|
15
|
+
* chain-state detectors, which regex over `error.message`.
|
|
16
|
+
*/
|
|
17
|
+
import { type SseEventObserver } from "@prometheus-ai/utils";
|
|
18
|
+
import { ProviderHttpError } from "../errors";
|
|
19
|
+
import type { FetchImpl } from "../types";
|
|
20
|
+
import type { CapturedHttpErrorResponse } from "./http-inspector";
|
|
21
|
+
/** Non-2xx response from an OpenAI-wire endpoint, with the decoded body attached. */
|
|
22
|
+
export declare class OpenAIHttpError extends ProviderHttpError {
|
|
23
|
+
readonly captured: CapturedHttpErrorResponse;
|
|
24
|
+
constructor(message: string, captured: CapturedHttpErrorResponse, code: string | undefined);
|
|
25
|
+
}
|
|
26
|
+
export interface OpenAIStreamRequestInit {
|
|
27
|
+
url: string;
|
|
28
|
+
headers: Record<string, string>;
|
|
29
|
+
/** JSON request body; serialized once per call (retries resend the same bytes). */
|
|
30
|
+
body: unknown;
|
|
31
|
+
signal: AbortSignal;
|
|
32
|
+
fetch?: FetchImpl;
|
|
33
|
+
/**
|
|
34
|
+
* Total attempts (initial + retries). Defaults to {@link DEFAULT_MAX_ATTEMPTS}.
|
|
35
|
+
* Pass `1` when a first-event watchdog is armed so retries cannot silently
|
|
36
|
+
* extend the caller's deadline (mirrors the old `maxRetries: 0` hint).
|
|
37
|
+
*/
|
|
38
|
+
maxAttempts?: number;
|
|
39
|
+
/** Raw wire-frame observer (`onSseEvent` debug pipeline). */
|
|
40
|
+
onSseEvent?: SseEventObserver;
|
|
41
|
+
}
|
|
42
|
+
export interface OpenAIStreamHandle<TEvent> {
|
|
43
|
+
/** Decoded `data:` payloads; terminates on `[DONE]` or stream end. */
|
|
44
|
+
events: AsyncGenerator<TEvent>;
|
|
45
|
+
response: Response;
|
|
46
|
+
/** `x-request-id` response header (the SDK's former `request_id`). */
|
|
47
|
+
requestId: string | null;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* POST a JSON body and stream back decoded SSE events.
|
|
51
|
+
*
|
|
52
|
+
* Throws {@link OpenAIHttpError} on a non-2xx terminal response. Aborts on
|
|
53
|
+
* `signal` propagate from `fetchWithRetry`/`readSseJson`; callers own the
|
|
54
|
+
* watchdog timers and abort-reason bookkeeping.
|
|
55
|
+
*/
|
|
56
|
+
export declare function postOpenAIStream<TEvent>(init: OpenAIStreamRequestInit): Promise<OpenAIStreamHandle<TEvent>>;
|
|
57
|
+
/** Decode a non-2xx response into an {@link OpenAIHttpError} without consuming it twice. */
|
|
58
|
+
export declare function captureOpenAIHttpError(response: Response): Promise<OpenAIHttpError>;
|
|
@@ -22,6 +22,9 @@ export interface RequestDebugSession {
|
|
|
22
22
|
wrapResponse(response: Response): Promise<Response>;
|
|
23
23
|
}
|
|
24
24
|
export declare function isRequestDebugEnabled(): boolean;
|
|
25
|
+
export declare function setNextRequestDebugPath(requestPath: string): void;
|
|
26
|
+
export declare function clearNextRequestDebugPath(): void;
|
|
27
|
+
export declare function getNextRequestDebugPath(): string | undefined;
|
|
25
28
|
export declare function wrapFetchForRequestDebug(fetchImpl: FetchImpl): FetchImpl;
|
|
26
29
|
export declare function withRequestDebugFetch<T extends {
|
|
27
30
|
fetch?: FetchImpl;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
export type HeadersLike = Headers | Record<string, string | undefined> | undefined | null;
|
|
2
2
|
export declare function formatErrorMessageWithRetryAfter(error: unknown, headers?: HeadersLike): string;
|
|
3
3
|
export declare function getRetryAfterMsFromHeaders(headers: HeadersLike): number | undefined;
|
|
4
|
+
export declare function getHeadersFromError(error: unknown): HeadersLike;
|
|
@@ -30,6 +30,11 @@ export declare const NON_STRUCTURAL_SCHEMA_KEYS: Record<string, true>;
|
|
|
30
30
|
* Used when collapsing mixed-type combiner variants for CCA Claude.
|
|
31
31
|
*/
|
|
32
32
|
export declare const CLOUD_CODE_ASSIST_TYPE_SPECIFIC_KEYS: Record<string, Record<string, true>>;
|
|
33
|
+
/**
|
|
34
|
+
* Flat set of every type-specific key across all CCA types.
|
|
35
|
+
* Used to identify sibling keys that need filtering during mixed-type collapse.
|
|
36
|
+
*/
|
|
37
|
+
export declare const ALL_CCA_TYPE_SPECIFIC_KEYS: Record<string, true>;
|
|
33
38
|
/**
|
|
34
39
|
* Cloud Code Assist shared schema keys allowed on any type.
|
|
35
40
|
* Used alongside CLOUD_CODE_ASSIST_TYPE_SPECIFIC_KEYS for CCA combiner collapsing.
|
|
@@ -3,6 +3,14 @@ export interface JsonSchemaValidationIssue {
|
|
|
3
3
|
message: string;
|
|
4
4
|
expectedTypes?: string[];
|
|
5
5
|
keyword?: string;
|
|
6
|
+
/**
|
|
7
|
+
* Marks issues that originate inside a failed `anyOf` / `oneOf` branch.
|
|
8
|
+
* Consumers such as the tool-argument coercion layer use this to avoid
|
|
9
|
+
* applying type repairs (e.g. singleton-array wrapping) that would be
|
|
10
|
+
* authoritative outside of a combinator but are only one candidate
|
|
11
|
+
* branch's expectation here.
|
|
12
|
+
*/
|
|
13
|
+
fromUnionBranch?: boolean;
|
|
6
14
|
}
|
|
7
15
|
export interface JsonSchemaValidationResult {
|
|
8
16
|
success: boolean;
|
|
@@ -1,17 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Symbol-keyed lazy memoization stamped directly onto the host object.
|
|
3
|
-
*
|
|
4
|
-
* Faster than a module-level `WeakMap` in V8/JSC because the symbol slot is
|
|
5
|
-
* resolved through the object's hidden class instead of a side-table hash
|
|
6
|
-
* lookup. The slot is defined as a non-enumerable property so the stamp
|
|
7
|
-
* does not leak through `{...spread}`, `Object.keys`, `JSON.stringify`, or
|
|
8
|
-
* `toEqual`-style deep equality.
|
|
9
|
-
*
|
|
10
|
-
* Caveats: the stamp lives as long as the host object, even after callers
|
|
11
|
-
* release their references to the cached value — only use this for caches
|
|
12
|
-
* whose lifetime should match the host. Frozen hosts will throw on write in
|
|
13
|
-
* strict mode; callers that may receive frozen input must handle that.
|
|
14
|
-
*/
|
|
15
1
|
export declare function stamp<T extends object, V>(target: T, key: symbol, compute: (target: T) => V): V;
|
|
16
2
|
export declare function epochNext(): number;
|
|
17
3
|
/**
|
|
@@ -20,6 +6,12 @@ export declare function epochNext(): number;
|
|
|
20
6
|
* subsequent call within the same epoch.
|
|
21
7
|
*/
|
|
22
8
|
export declare function once<T extends object>(target: T, epoch: number): boolean;
|
|
23
|
-
/**
|
|
9
|
+
/**
|
|
10
|
+
* Returns `true` on first entry, `false` if `target` is already on the
|
|
11
|
+
* current path. A `false` return does NOT deepen the counter — callers pair
|
|
12
|
+
* `exit` only with successful enters (`if (!enter(n)) bail; try {…} finally
|
|
13
|
+
* { exit(n); }`), so incrementing on the cycle branch would leak depth and
|
|
14
|
+
* make every later top-level walk of the same object misreport a cycle.
|
|
15
|
+
*/
|
|
24
16
|
export declare function enter<T extends object>(target: T): boolean;
|
|
25
17
|
export declare function exit<T extends object>(target: T): void;
|
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
import type { ServerSentEvent } from "@prometheus-ai/utils";
|
|
2
2
|
import type { RawSseEvent } from "../types";
|
|
3
|
-
type FetchFunction = (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
|
|
4
|
-
type FetchWithPreconnect = FetchFunction & {
|
|
5
|
-
preconnect?: typeof fetch.preconnect;
|
|
6
|
-
};
|
|
7
3
|
type RawSseObserver = (event: RawSseEvent) => void;
|
|
8
4
|
export declare function notifyRawSseEvent(observer: RawSseObserver | undefined, event: ServerSentEvent | RawSseEvent): void;
|
|
9
|
-
export declare function wrapFetchForSseDebug(fetchImpl: FetchWithPreconnect, observer: RawSseObserver | undefined): FetchWithPreconnect;
|
|
10
5
|
export {};
|
|
@@ -75,6 +75,8 @@ export declare class StreamMarkupHealing {
|
|
|
75
75
|
export declare function modelMayLeakKimiToolCalls(provider: string, modelId: string): boolean;
|
|
76
76
|
/** Cheap model/provider gate for DeepSeek DSML envelope leaks. */
|
|
77
77
|
export declare function modelMayLeakDsmlToolCalls(provider: string, modelId: string): boolean;
|
|
78
|
+
/** Cheap model/provider gate for MiniMax plain thinking tag leaks. */
|
|
79
|
+
export declare function modelMayLeakThinkingTags(provider: string, modelId: string): boolean;
|
|
78
80
|
export declare function getStreamMarkupHealingPattern(provider: string, modelId: string, options?: {
|
|
79
81
|
readonly parseThinkingTags?: boolean;
|
|
80
82
|
}): StreamMarkupHealingPattern | undefined;
|
package/dist/types/utils.d.ts
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
|
-
import type { ResponseInput } from "openai
|
|
1
|
+
import type { ResponseInput } from "./providers/openai-responses-wire";
|
|
2
2
|
import type { CacheRetention, OpenAIResponsesHistoryPayload, ProviderPayload } from "./types";
|
|
3
3
|
export { isRecord } from "@prometheus-ai/utils";
|
|
4
4
|
export declare function normalizeSystemPrompts(systemPrompt: readonly string[] | string | undefined | null): string[];
|
|
5
|
-
export declare function toNumber(value: unknown): number | undefined;
|
|
6
|
-
export declare function toPositiveNumber(value: unknown, fallback: number): number;
|
|
7
|
-
export declare function toBoolean(value: unknown): boolean | undefined;
|
|
8
5
|
export declare function normalizeToolCallId(id: string): string;
|
|
9
6
|
type ResponsesToolItemIdPrefix = "fc" | "ctc";
|
|
10
7
|
export declare function normalizeResponsesToolCallId(id: string, itemPrefix?: ResponsesToolItemIdPrefix): {
|
|
@@ -25,4 +22,3 @@ export declare function getOpenAIResponsesHistoryItems(providerPayload: Provider
|
|
|
25
22
|
* Defaults to "short" and uses PROMETHEUS_CACHE_RETENTION for backward compatibility.
|
|
26
23
|
*/
|
|
27
24
|
export declare function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention;
|
|
28
|
-
export declare function isAnthropicOAuthToken(key: string): boolean;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@prometheus-ai/ai",
|
|
4
|
-
"version": "0.5.
|
|
4
|
+
"version": "0.5.8",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://prometheus.trivlab.com",
|
|
7
7
|
"author": "Uttam Trivedi",
|
|
@@ -31,15 +31,15 @@
|
|
|
31
31
|
"lint": "biome lint .",
|
|
32
32
|
"test": "bun test --parallel --timeout=15000",
|
|
33
33
|
"fix": "biome check --write --unsafe .",
|
|
34
|
-
"fmt": "biome format --write ."
|
|
35
|
-
"generate-models": "bun scripts/generate-models.ts"
|
|
34
|
+
"fmt": "biome format --write ."
|
|
36
35
|
},
|
|
37
36
|
"dependencies": {
|
|
38
37
|
"@bufbuild/protobuf": "^2.12.0",
|
|
39
|
-
"@prometheus-ai/
|
|
38
|
+
"@prometheus-ai/catalog": "0.5.8",
|
|
39
|
+
"@prometheus-ai/utils": "0.5.8",
|
|
40
40
|
"openai": "^6.39.0",
|
|
41
41
|
"partial-json": "^0.1.7",
|
|
42
|
-
"zod": "4
|
|
42
|
+
"zod": "^4"
|
|
43
43
|
},
|
|
44
44
|
"devDependencies": {
|
|
45
45
|
"@types/bun": "^1.3.14"
|
|
@@ -78,18 +78,6 @@
|
|
|
78
78
|
"types": "./dist/types/auth-gateway/*.d.ts",
|
|
79
79
|
"import": "./src/auth-gateway/*.ts"
|
|
80
80
|
},
|
|
81
|
-
"./models.json": {
|
|
82
|
-
"types": "./dist/types/models.json.d.d.ts",
|
|
83
|
-
"import": "./src/models.json"
|
|
84
|
-
},
|
|
85
|
-
"./provider-models": {
|
|
86
|
-
"types": "./dist/types/provider-models/index.d.ts",
|
|
87
|
-
"import": "./src/provider-models/index.ts"
|
|
88
|
-
},
|
|
89
|
-
"./provider-models/*": {
|
|
90
|
-
"types": "./dist/types/provider-models/*.d.ts",
|
|
91
|
-
"import": "./src/provider-models/*.ts"
|
|
92
|
-
},
|
|
93
81
|
"./providers/*": {
|
|
94
82
|
"types": "./dist/types/providers/*.d.ts",
|
|
95
83
|
"import": "./src/providers/*.ts"
|
|
@@ -110,21 +98,21 @@
|
|
|
110
98
|
"types": "./dist/types/utils/*.d.ts",
|
|
111
99
|
"import": "./src/utils/*.ts"
|
|
112
100
|
},
|
|
113
|
-
"./
|
|
114
|
-
"types": "./dist/types/
|
|
115
|
-
"import": "./src/
|
|
101
|
+
"./oauth": {
|
|
102
|
+
"types": "./dist/types/registry/oauth/index.d.ts",
|
|
103
|
+
"import": "./src/registry/oauth/index.ts"
|
|
116
104
|
},
|
|
117
|
-
"./
|
|
118
|
-
"types": "./dist/types/
|
|
119
|
-
"import": "./src/
|
|
105
|
+
"./oauth/*": {
|
|
106
|
+
"types": "./dist/types/registry/oauth/*.d.ts",
|
|
107
|
+
"import": "./src/registry/oauth/*.ts"
|
|
120
108
|
},
|
|
121
|
-
"./
|
|
122
|
-
"types": "./dist/types/
|
|
123
|
-
"import": "./src/
|
|
109
|
+
"./registry": {
|
|
110
|
+
"types": "./dist/types/registry/index.d.ts",
|
|
111
|
+
"import": "./src/registry/index.ts"
|
|
124
112
|
},
|
|
125
|
-
"./
|
|
126
|
-
"types": "./dist/types/
|
|
127
|
-
"import": "./src/
|
|
113
|
+
"./registry/oauth": {
|
|
114
|
+
"types": "./dist/types/registry/oauth/index.d.ts",
|
|
115
|
+
"import": "./src/registry/oauth/index.ts"
|
|
128
116
|
},
|
|
129
117
|
"./utils/schema": {
|
|
130
118
|
"types": "./dist/types/utils/schema/index.d.ts",
|
|
@@ -17,9 +17,9 @@ import {
|
|
|
17
17
|
REMOTE_REFRESH_SENTINEL,
|
|
18
18
|
type StoredAuthCredential,
|
|
19
19
|
} from "../auth-storage";
|
|
20
|
+
import type { OAuthCredentials } from "../registry/oauth/types";
|
|
20
21
|
import type { Provider } from "../types";
|
|
21
22
|
import type { UsageReport } from "../usage";
|
|
22
|
-
import type { OAuthCredentials } from "../utils/oauth/types";
|
|
23
23
|
import { type AuthBrokerClient, AuthBrokerStreamUnsupportedError } from "./client";
|
|
24
24
|
import type { RefresherSchedule, SnapshotEntry, SnapshotResponse, SnapshotStreamEvent } from "./types";
|
|
25
25
|
|
|
@@ -275,6 +275,15 @@ export class RemoteAuthCredentialStore implements AuthCredentialStore {
|
|
|
275
275
|
});
|
|
276
276
|
}
|
|
277
277
|
|
|
278
|
+
async deleteAuthCredentialRemote(id: number, disabledCause: string): Promise<boolean> {
|
|
279
|
+
const found = this.#snapshot.credentials.some(entry => entry.id === id);
|
|
280
|
+
if (!found) return false;
|
|
281
|
+
await this.#client.disableCredential(id, disabledCause);
|
|
282
|
+
this.#removeCredentialById(id);
|
|
283
|
+
this.#maybeRefreshSnapshot("delete credential");
|
|
284
|
+
return true;
|
|
285
|
+
}
|
|
286
|
+
|
|
278
287
|
tryDisableAuthCredentialIfMatches(id: number, _expectedData: string, disabledCause: string): boolean {
|
|
279
288
|
const found = this.#snapshot.credentials.find(entry => entry.id === id);
|
|
280
289
|
if (!found) return false;
|
|
@@ -12,7 +12,7 @@ import { isEnoent, logger } from "@prometheus-ai/utils";
|
|
|
12
12
|
import type { SnapshotResponse } from "./types";
|
|
13
13
|
import { snapshotResponseSchema } from "./wire-schemas";
|
|
14
14
|
|
|
15
|
-
const MAGIC = new Uint8Array([
|
|
15
|
+
const MAGIC = new Uint8Array([0x4f, 0x4d, 0x50, 0x53]); // "OMPS"
|
|
16
16
|
const VERSION = 1;
|
|
17
17
|
const VERSION_OFFSET = MAGIC.byteLength;
|
|
18
18
|
const IV_OFFSET = VERSION_OFFSET + 1;
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* keys are rejected — the previous implementation used a hand-rolled
|
|
11
11
|
* `hasOnlyFields` allowlist for the same effect.
|
|
12
12
|
*/
|
|
13
|
-
import
|
|
13
|
+
import { z } from "zod/v4";
|
|
14
14
|
import { REMOTE_REFRESH_SENTINEL } from "../auth-storage";
|
|
15
15
|
import { usageReportSchema } from "../usage";
|
|
16
16
|
|
package/src/auth-gateway/http.ts
CHANGED
|
@@ -74,7 +74,7 @@ const PASSTHROUGH_HEADER_NAMES: Record<string, true> = {
|
|
|
74
74
|
"openai-organization": true,
|
|
75
75
|
"openai-project": true,
|
|
76
76
|
"openai-beta": true,
|
|
77
|
-
// Codex / ChatGPT-OAuth backend headers (see
|
|
77
|
+
// Codex / ChatGPT-OAuth backend headers (see @prometheus-ai/catalog/wire/codex).
|
|
78
78
|
// `session_id` and `conversation_id` thread the upstream session so prompt
|
|
79
79
|
// caching and per-conversation rate limiting work; `chatgpt-account-id` and
|
|
80
80
|
// `originator` identify the calling account and client surface.
|
|
@@ -17,9 +17,11 @@
|
|
|
17
17
|
* POST /v1/messages → Anthropic messages in/out
|
|
18
18
|
* POST /v1/responses → OpenAI Responses in/out
|
|
19
19
|
*/
|
|
20
|
+
|
|
21
|
+
import { Effort } from "@prometheus-ai/catalog/effort";
|
|
20
22
|
import { extractRetryHint, logger } from "@prometheus-ai/utils";
|
|
23
|
+
import type { ApiKeyResolver } from "../auth-retry";
|
|
21
24
|
import type { AuthStorage } from "../auth-storage";
|
|
22
|
-
import { Effort } from "../model-thinking";
|
|
23
25
|
import * as anthropicMessages from "../providers/anthropic-messages-server";
|
|
24
26
|
import * as openaiChat from "../providers/openai-chat-server";
|
|
25
27
|
import * as openaiResponses from "../providers/openai-responses-server";
|
|
@@ -314,9 +316,10 @@ async function refreshGatewayApiKeyAfterAuthError(
|
|
|
314
316
|
const message = error instanceof Error ? error.message : String(error);
|
|
315
317
|
if (isUsageLimitError(message)) {
|
|
316
318
|
const retryAfterMs = extractRetryHint(undefined, message);
|
|
317
|
-
const switched = await storage.markUsageLimitReached(provider, sessionId, {
|
|
319
|
+
const { switched, retryAtMs } = await storage.markUsageLimitReached(provider, sessionId, {
|
|
318
320
|
retryAfterMs,
|
|
319
321
|
baseUrl: model.baseUrl,
|
|
322
|
+
modelId: model.id,
|
|
320
323
|
signal,
|
|
321
324
|
});
|
|
322
325
|
logger.debug("auth-gateway retrying provider request after usage-limit block", {
|
|
@@ -325,6 +328,7 @@ async function refreshGatewayApiKeyAfterAuthError(
|
|
|
325
328
|
peer,
|
|
326
329
|
switched,
|
|
327
330
|
retryAfterMs,
|
|
331
|
+
retryAtMs,
|
|
328
332
|
error: message,
|
|
329
333
|
});
|
|
330
334
|
if (!switched) return undefined;
|
|
@@ -340,6 +344,60 @@ async function refreshGatewayApiKeyAfterAuthError(
|
|
|
340
344
|
return storage.getApiKey(provider, sessionId, { modelId: model.id, signal });
|
|
341
345
|
}
|
|
342
346
|
|
|
347
|
+
/**
|
|
348
|
+
* Build the {@link ApiKeyResolver} handed to `streamSimple` for a gateway
|
|
349
|
+
* request. Drives the central a/b/c auth-retry policy server-side:
|
|
350
|
+
*
|
|
351
|
+
* - initial resolve → the credential already resolved for this request.
|
|
352
|
+
* - step (b) `!lastChance` → force-refresh the SAME session-sticky credential
|
|
353
|
+
* (a peer/broker may have rotated its token out from under our cached copy).
|
|
354
|
+
* - step (c) `lastChance` → {@link refreshGatewayApiKeyAfterAuthError} switches
|
|
355
|
+
* to a sibling (usage-limit block vs credential invalidation by error class).
|
|
356
|
+
*
|
|
357
|
+
* `lastKey` tracks the most recent bearer so the switch step invalidates the
|
|
358
|
+
* credential that actually failed.
|
|
359
|
+
*/
|
|
360
|
+
function buildGatewayApiKeyResolver(
|
|
361
|
+
storage: AuthStorage,
|
|
362
|
+
model: Model<Api>,
|
|
363
|
+
sessionId: string,
|
|
364
|
+
initialKey: string,
|
|
365
|
+
requestSignal: AbortSignal,
|
|
366
|
+
format: string,
|
|
367
|
+
peer: string,
|
|
368
|
+
): ApiKeyResolver {
|
|
369
|
+
let lastKey = initialKey;
|
|
370
|
+
return async ({ lastChance, error, signal }) => {
|
|
371
|
+
const sig = signal ?? requestSignal;
|
|
372
|
+
if (error === undefined) {
|
|
373
|
+
lastKey = initialKey;
|
|
374
|
+
return initialKey;
|
|
375
|
+
}
|
|
376
|
+
if (!lastChance) {
|
|
377
|
+
const refreshed = await storage.getApiKey(model.provider, sessionId, {
|
|
378
|
+
modelId: model.id,
|
|
379
|
+
signal: sig,
|
|
380
|
+
forceRefresh: true,
|
|
381
|
+
});
|
|
382
|
+
lastKey = refreshed ?? lastKey;
|
|
383
|
+
return refreshed;
|
|
384
|
+
}
|
|
385
|
+
const next = await refreshGatewayApiKeyAfterAuthError(
|
|
386
|
+
storage,
|
|
387
|
+
model,
|
|
388
|
+
sessionId,
|
|
389
|
+
model.provider,
|
|
390
|
+
lastKey,
|
|
391
|
+
error,
|
|
392
|
+
sig,
|
|
393
|
+
format,
|
|
394
|
+
peer,
|
|
395
|
+
);
|
|
396
|
+
lastKey = next ?? lastKey;
|
|
397
|
+
return next;
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
|
|
343
401
|
function clientClosedResponse(route: { module: FormatModule }): Response {
|
|
344
402
|
return route.module.formatError(499, "request_aborted", "client closed request");
|
|
345
403
|
}
|
|
@@ -447,19 +505,15 @@ async function handleFormatEndpoint(
|
|
|
447
505
|
}
|
|
448
506
|
|
|
449
507
|
const streamOpts = buildStreamOptions(parsed, model.api, controller.signal);
|
|
450
|
-
streamOpts.apiKey =
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
controller.signal,
|
|
460
|
-
route.label,
|
|
461
|
-
peer,
|
|
462
|
-
);
|
|
508
|
+
streamOpts.apiKey = buildGatewayApiKeyResolver(
|
|
509
|
+
bootOpts.storage,
|
|
510
|
+
model,
|
|
511
|
+
sessionId,
|
|
512
|
+
apiKey,
|
|
513
|
+
controller.signal,
|
|
514
|
+
route.label,
|
|
515
|
+
peer,
|
|
516
|
+
);
|
|
463
517
|
|
|
464
518
|
logger.info("auth-gateway request", {
|
|
465
519
|
format: route.label,
|
|
@@ -514,7 +568,14 @@ async function handleFormatEndpoint(
|
|
|
514
568
|
}
|
|
515
569
|
if (controller.signal.aborted) return clientClosedResponse(route);
|
|
516
570
|
|
|
517
|
-
const sseStream = route.module.encodeStream(events, parsed.modelId, parsed.options
|
|
571
|
+
const sseStream = route.module.encodeStream(events, parsed.modelId, parsed.options, {
|
|
572
|
+
signal: controller.signal,
|
|
573
|
+
onCancel: reason => {
|
|
574
|
+
if (!controller.signal.aborted) {
|
|
575
|
+
controller.abort(reason instanceof Error ? reason : new Error("client closed request"));
|
|
576
|
+
}
|
|
577
|
+
},
|
|
578
|
+
});
|
|
518
579
|
return new Response(sseStream, {
|
|
519
580
|
status: 200,
|
|
520
581
|
headers: {
|
|
@@ -532,7 +593,7 @@ async function handleFormatEndpoint(
|
|
|
532
593
|
/**
|
|
533
594
|
* Prometheus-native fast path: `POST /v1/prometheus/stream`. Accepts the canonical Prometheus AI
|
|
534
595
|
* `Context` directly (no wire-format round-trip) and emits a bandwidth-shrunk
|
|
535
|
-
* event stream matching
|
|
596
|
+
* event stream matching Prometheus' `streamProxy`. Skips the OpenAI /
|
|
536
597
|
* Anthropic / Responses translation layers — those exist to bridge foreign
|
|
537
598
|
* SDKs (llm-git, anthropic-sdk, openai-sdk), and bridging back to prometheus-native
|
|
538
599
|
* just to bridge forward again is wasted work.
|
|
@@ -604,18 +665,15 @@ async function handlePrometheusNative(bootOpts: AuthGatewayBootOptions, req: Req
|
|
|
604
665
|
// only inject server-controlled fields. The codex temperature/topP strip
|
|
605
666
|
// matches `buildStreamOptions` — Codex rejects them with a 400.
|
|
606
667
|
const streamOpts: SimpleStreamOptions = { ...parsed.options, apiKey, signal: controller.signal };
|
|
607
|
-
streamOpts.
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
"prometheus-native",
|
|
617
|
-
peer,
|
|
618
|
-
);
|
|
668
|
+
streamOpts.apiKey = buildGatewayApiKeyResolver(
|
|
669
|
+
bootOpts.storage,
|
|
670
|
+
model,
|
|
671
|
+
sessionId,
|
|
672
|
+
apiKey,
|
|
673
|
+
controller.signal,
|
|
674
|
+
"prometheus-native",
|
|
675
|
+
peer,
|
|
676
|
+
);
|
|
619
677
|
if (model.api === "openai-codex-responses") {
|
|
620
678
|
delete streamOpts.temperature;
|
|
621
679
|
delete streamOpts.topP;
|
|
@@ -679,7 +737,14 @@ async function handlePrometheusNative(bootOpts: AuthGatewayBootOptions, req: Req
|
|
|
679
737
|
}
|
|
680
738
|
if (controller.signal.aborted) return aborted();
|
|
681
739
|
|
|
682
|
-
const sseStream = prometheusNative.encodeStream(events
|
|
740
|
+
const sseStream = prometheusNative.encodeStream(events, parsed.modelId, parsed.options, {
|
|
741
|
+
signal: controller.signal,
|
|
742
|
+
onCancel: reason => {
|
|
743
|
+
if (!controller.signal.aborted) {
|
|
744
|
+
controller.abort(reason instanceof Error ? reason : new Error("client closed request"));
|
|
745
|
+
}
|
|
746
|
+
},
|
|
747
|
+
});
|
|
683
748
|
return new Response(sseStream, {
|
|
684
749
|
status: 200,
|
|
685
750
|
headers: {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Effort } from "
|
|
1
|
+
import type { Effort } from "@prometheus-ai/catalog/effort";
|
|
2
2
|
import type {
|
|
3
3
|
AssistantMessage,
|
|
4
4
|
AssistantMessageEventStream,
|
|
@@ -14,7 +14,7 @@ import type {
|
|
|
14
14
|
* The gateway sits between unauthenticated clients (containerized prometheus,
|
|
15
15
|
* llm-git, …) and the broker. It accepts provider-format HTTP requests
|
|
16
16
|
* (OpenAI chat-completions / Anthropic messages / OpenAI Responses),
|
|
17
|
-
* dispatches them through
|
|
17
|
+
* dispatches them through Prometheus AI's `streamSimple()`, and translates the
|
|
18
18
|
* canonical event stream back to the matching wire format. The gateway
|
|
19
19
|
* injects `Authorization` server-side so clients never see access tokens.
|
|
20
20
|
*/
|
|
@@ -110,6 +110,13 @@ export interface AuthGatewayParsedRequest {
|
|
|
110
110
|
options: AuthGatewayParsedRequestOptions;
|
|
111
111
|
}
|
|
112
112
|
|
|
113
|
+
export interface AuthGatewayStreamControl {
|
|
114
|
+
/** Gateway request signal. Encoders stop producing frames when it aborts. */
|
|
115
|
+
signal?: AbortSignal;
|
|
116
|
+
/** Called when the HTTP response body is cancelled by the client. */
|
|
117
|
+
onCancel?: (reason?: unknown) => void;
|
|
118
|
+
}
|
|
119
|
+
|
|
113
120
|
export interface AuthGatewayFormatModule {
|
|
114
121
|
parseRequest(body: unknown, headers?: Headers): AuthGatewayParsedRequest;
|
|
115
122
|
encodeResponse(message: AssistantMessage, requestedModelId: string): Record<string, unknown>;
|
|
@@ -117,6 +124,7 @@ export interface AuthGatewayFormatModule {
|
|
|
117
124
|
events: AssistantMessageEventStream,
|
|
118
125
|
requestedModelId: string,
|
|
119
126
|
options?: AuthGatewayParsedRequestOptions,
|
|
127
|
+
control?: AuthGatewayStreamControl,
|
|
120
128
|
): ReadableStream<Uint8Array>;
|
|
121
129
|
/**
|
|
122
130
|
* Emit a protocol-specific error envelope. OpenAI returns
|