@prometheus-ai/ai 0.5.4 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
package/src/usage.ts
CHANGED
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
* Provides a normalized schema to represent multiple limit windows, model tiers,
|
|
5
5
|
* and shared quotas across providers.
|
|
6
6
|
*/
|
|
7
|
-
import
|
|
8
|
-
import type { Provider } from "./types";
|
|
7
|
+
import { z } from "zod/v4";
|
|
8
|
+
import type { FetchImpl, Provider } from "./types";
|
|
9
9
|
export type UsageUnit = "percent" | "tokens" | "requests" | "usd" | "minutes" | "bytes" | "unknown";
|
|
10
10
|
|
|
11
11
|
export type UsageStatus = "ok" | "warning" | "exhausted" | "unknown";
|
|
@@ -63,15 +63,78 @@ export interface UsageLimit {
|
|
|
63
63
|
notes?: string[];
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Saved/banked rate-limit resets an account can redeem on demand.
|
|
68
|
+
*
|
|
69
|
+
* Surfaced by providers that let users defer a usage-window reset and spend it
|
|
70
|
+
* later (OpenAI Codex "saved rate limit resets"). The redeem itself is a
|
|
71
|
+
* separate, provider-specific action; this is the read-only count for display.
|
|
72
|
+
*/
|
|
73
|
+
export interface UsageResetCredits {
|
|
74
|
+
/** Number of resets available to redeem right now. */
|
|
75
|
+
availableCount: number;
|
|
76
|
+
}
|
|
77
|
+
|
|
66
78
|
/** Aggregated usage report for a provider. */
|
|
67
79
|
export interface UsageReport {
|
|
68
80
|
provider: Provider;
|
|
69
81
|
fetchedAt: number;
|
|
70
82
|
limits: UsageLimit[];
|
|
83
|
+
/** Saved rate-limit resets the account can redeem, when the provider reports them. */
|
|
84
|
+
resetCredits?: UsageResetCredits;
|
|
71
85
|
metadata?: Record<string, unknown>;
|
|
72
86
|
raw?: unknown;
|
|
73
87
|
}
|
|
74
88
|
|
|
89
|
+
/**
|
|
90
|
+
* Resolve a limit's used fraction (0..1; >1 means overage) from whichever
|
|
91
|
+
* amount fields the provider populated. Precedence mirrors the usage UIs:
|
|
92
|
+
* explicit fraction > used/limit > percent-unit used > inverted remaining.
|
|
93
|
+
*/
|
|
94
|
+
export function resolveUsedFraction(limit: UsageLimit): number | undefined {
|
|
95
|
+
const amount = limit.amount;
|
|
96
|
+
if (amount.usedFraction !== undefined) return amount.usedFraction;
|
|
97
|
+
if (amount.used !== undefined && amount.limit !== undefined && amount.limit > 0) {
|
|
98
|
+
return amount.used / amount.limit;
|
|
99
|
+
}
|
|
100
|
+
if (amount.unit === "percent" && amount.used !== undefined) return amount.used / 100;
|
|
101
|
+
if (amount.remainingFraction !== undefined) return Math.max(0, 1 - amount.remainingFraction);
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* One recorded usage-limit snapshot: a single limit window of one account at
|
|
107
|
+
* a point in time. The usage cache itself is latest-snapshot-only; history
|
|
108
|
+
* rows are appended by the auth storage layer whenever a fresh report is
|
|
109
|
+
* fetched, so limit utilization stays inspectable over time.
|
|
110
|
+
*/
|
|
111
|
+
export interface UsageHistoryEntry {
|
|
112
|
+
/** Epoch ms the report was fetched. */
|
|
113
|
+
recordedAt: number;
|
|
114
|
+
provider: Provider;
|
|
115
|
+
/** Stable credential identity key (account/email/project derived). */
|
|
116
|
+
accountKey: string;
|
|
117
|
+
email?: string;
|
|
118
|
+
accountId?: string;
|
|
119
|
+
/** {@link UsageLimit.id} of the recorded window. */
|
|
120
|
+
limitId: string;
|
|
121
|
+
/** Human label of the limit. */
|
|
122
|
+
label: string;
|
|
123
|
+
windowLabel?: string;
|
|
124
|
+
/** Used fraction (0..1) when resolvable. */
|
|
125
|
+
usedFraction?: number;
|
|
126
|
+
status?: UsageStatus;
|
|
127
|
+
/** Epoch ms the window resets, when known. */
|
|
128
|
+
resetsAt?: number;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/** Filter for reading recorded usage history. */
|
|
132
|
+
export interface UsageHistoryQuery {
|
|
133
|
+
provider?: string;
|
|
134
|
+
/** Inclusive lower bound on {@link UsageHistoryEntry.recordedAt} (epoch ms). */
|
|
135
|
+
sinceMs?: number;
|
|
136
|
+
}
|
|
137
|
+
|
|
75
138
|
// ─── Zod schemas (wire-shape validation for the broker `/v1/usage` endpoint) ─
|
|
76
139
|
|
|
77
140
|
export const usageUnitSchema = z.enum(["percent", "tokens", "requests", "usd", "minutes", "bytes", "unknown"]);
|
|
@@ -114,10 +177,15 @@ export const usageLimitSchema = z.object({
|
|
|
114
177
|
notes: z.array(z.string()).optional(),
|
|
115
178
|
});
|
|
116
179
|
|
|
180
|
+
export const usageResetCreditsSchema = z.object({
|
|
181
|
+
availableCount: z.number(),
|
|
182
|
+
});
|
|
183
|
+
|
|
117
184
|
export const usageReportSchema = z.object({
|
|
118
185
|
provider: z.string(),
|
|
119
186
|
fetchedAt: z.number(),
|
|
120
187
|
limits: z.array(usageLimitSchema),
|
|
188
|
+
resetCredits: usageResetCreditsSchema.optional(),
|
|
121
189
|
metadata: z.record(z.string(), z.unknown()).optional(),
|
|
122
190
|
// `raw` is provider-specific and may be anything; the broker strips it before
|
|
123
191
|
// sending the report over the wire, so accept-but-ignore here.
|
|
@@ -154,7 +222,7 @@ export interface UsageFetchParams {
|
|
|
154
222
|
|
|
155
223
|
/** Shared runtime utilities for fetchers. */
|
|
156
224
|
export interface UsageFetchContext {
|
|
157
|
-
fetch:
|
|
225
|
+
fetch: FetchImpl;
|
|
158
226
|
logger?: UsageLogger;
|
|
159
227
|
retryWait?: (delayMs: number, signal?: AbortSignal) => Promise<void>;
|
|
160
228
|
}
|
|
@@ -168,13 +236,34 @@ export interface UsageProvider {
|
|
|
168
236
|
supports?(params: UsageFetchParams): boolean;
|
|
169
237
|
}
|
|
170
238
|
|
|
239
|
+
/** Request context used when ranking usage for a specific model. */
|
|
240
|
+
export interface CredentialRankingContext {
|
|
241
|
+
/** Provider model id, when the caller is selecting a credential for one model. */
|
|
242
|
+
modelId?: string;
|
|
243
|
+
}
|
|
244
|
+
|
|
171
245
|
/** Strategy for usage-based credential ranking. Providers implement this to opt into smart credential selection. */
|
|
172
246
|
export interface CredentialRankingStrategy {
|
|
173
247
|
/** Extract the primary (short) and secondary (long) window limits from a usage report. */
|
|
174
|
-
findWindowLimits(
|
|
248
|
+
findWindowLimits(
|
|
249
|
+
report: UsageReport,
|
|
250
|
+
context?: CredentialRankingContext,
|
|
251
|
+
): {
|
|
175
252
|
primary?: UsageLimit;
|
|
176
253
|
secondary?: UsageLimit;
|
|
177
254
|
};
|
|
255
|
+
/**
|
|
256
|
+
* Restrict limits to the ones relevant for the requested model before
|
|
257
|
+
* credential-wide exhaustion checks and ranking. Providers with shared
|
|
258
|
+
* account-wide quotas can omit this and use all limits.
|
|
259
|
+
*/
|
|
260
|
+
scopeLimits?(report: UsageReport, context?: CredentialRankingContext): UsageLimit[];
|
|
261
|
+
/**
|
|
262
|
+
* Return a provider-local backoff scope for the requested model. Providers
|
|
263
|
+
* with backend-specific quotas use this so one exhausted model family does
|
|
264
|
+
* not block unrelated families on the same OAuth credential.
|
|
265
|
+
*/
|
|
266
|
+
blockScope?(context?: CredentialRankingContext): string | undefined;
|
|
178
267
|
/** Fallback window durations (ms) when limits don't specify durationMs. */
|
|
179
268
|
windowDefaults: {
|
|
180
269
|
primaryMs: number;
|
package/src/utils/abort.ts
CHANGED
|
@@ -49,3 +49,17 @@ export function createAbortSourceTracker(callerSignal?: AbortSignal): AbortSourc
|
|
|
49
49
|
},
|
|
50
50
|
};
|
|
51
51
|
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Race a shared promise against a caller's AbortSignal without coupling the
|
|
55
|
+
* underlying work to that signal. The shared promise keeps running (and caches
|
|
56
|
+
* its result) even when an individual caller bails out.
|
|
57
|
+
*/
|
|
58
|
+
export function raceWithSignal<T>(promise: Promise<T>, signal: AbortSignal | undefined): Promise<T> {
|
|
59
|
+
if (!signal) return promise;
|
|
60
|
+
if (signal.aborted) return Promise.reject(signal.reason ?? new Error("Request was aborted"));
|
|
61
|
+
const { promise: aborted, reject } = Promise.withResolvers<never>();
|
|
62
|
+
const onAbort = () => reject(signal.reason ?? new Error("Request was aborted"));
|
|
63
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
64
|
+
return Promise.race([promise, aborted]).finally(() => signal.removeEventListener("abort", onAbort));
|
|
65
|
+
}
|
|
@@ -5,6 +5,8 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
|
|
5
5
|
queue: T[] = [];
|
|
6
6
|
waiting: Array<{ resolve: (value: IteratorResult<T>) => void; reject: (err: unknown) => void }> = [];
|
|
7
7
|
done = false;
|
|
8
|
+
/** True once finalResultPromise has been resolved or rejected. */
|
|
9
|
+
resultSettled = false;
|
|
8
10
|
#failed = false;
|
|
9
11
|
#error: unknown = undefined;
|
|
10
12
|
finalResultPromise: Promise<R>;
|
|
@@ -30,6 +32,7 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
|
|
30
32
|
|
|
31
33
|
if (this.isComplete(event)) {
|
|
32
34
|
this.done = true;
|
|
35
|
+
this.resultSettled = true;
|
|
33
36
|
this.resolveFinalResult(this.extractResult(event));
|
|
34
37
|
}
|
|
35
38
|
|
|
@@ -54,7 +57,13 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
|
|
54
57
|
end(result?: R): void {
|
|
55
58
|
this.done = true;
|
|
56
59
|
if (result !== undefined) {
|
|
60
|
+
this.resultSettled = true;
|
|
57
61
|
this.resolveFinalResult(result);
|
|
62
|
+
} else if (!this.resultSettled) {
|
|
63
|
+
// end() without a terminal value must still settle result() —
|
|
64
|
+
// otherwise complete()/result() awaits hang forever.
|
|
65
|
+
this.resultSettled = true;
|
|
66
|
+
this.rejectFinalResult(new Error("Stream ended without a final result"));
|
|
58
67
|
}
|
|
59
68
|
// Notify all waiting consumers that we're done
|
|
60
69
|
while (this.waiting.length > 0) {
|
|
@@ -75,6 +84,7 @@ export class EventStream<T, R = T> implements AsyncIterable<T> {
|
|
|
75
84
|
this.done = true;
|
|
76
85
|
this.#failed = true;
|
|
77
86
|
this.#error = err;
|
|
87
|
+
this.resultSettled = true;
|
|
78
88
|
this.rejectFinalResult(err);
|
|
79
89
|
while (this.waiting.length > 0) {
|
|
80
90
|
const waiter = this.waiting.shift()!;
|
|
@@ -126,6 +136,7 @@ export class AssistantMessageEventStream extends EventStream<AssistantMessageEve
|
|
|
126
136
|
// Completion resolves the final result and still emits the terminal event.
|
|
127
137
|
if (this.isComplete(event)) {
|
|
128
138
|
this.done = true;
|
|
139
|
+
this.resultSettled = true;
|
|
129
140
|
this.resolveFinalResult(this.extractResult(event));
|
|
130
141
|
}
|
|
131
142
|
|
|
@@ -135,7 +146,13 @@ export class AssistantMessageEventStream extends EventStream<AssistantMessageEve
|
|
|
135
146
|
override end(result?: AssistantMessage): void {
|
|
136
147
|
this.done = true;
|
|
137
148
|
if (result !== undefined) {
|
|
149
|
+
this.resultSettled = true;
|
|
138
150
|
this.resolveFinalResult(result);
|
|
151
|
+
} else if (!this.resultSettled) {
|
|
152
|
+
// Mirror the base class: a result-less end() must not leave
|
|
153
|
+
// result() pending forever.
|
|
154
|
+
this.resultSettled = true;
|
|
155
|
+
this.rejectFinalResult(new Error("Stream ended without a final result"));
|
|
139
156
|
}
|
|
140
157
|
this.endWaiting();
|
|
141
158
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as path from "node:path";
|
|
2
|
-
import { extractHttpStatusFromError, getLogsDir } from "@prometheus-ai/utils";
|
|
2
|
+
import { extractHttpStatusFromError, getLogsDir, isBunTestRuntime } from "@prometheus-ai/utils";
|
|
3
3
|
import { isCopilotTransientModelError } from "./retry.js";
|
|
4
4
|
import { formatErrorMessageWithRetryAfter } from "./retry-after.js";
|
|
5
5
|
|
|
@@ -20,10 +20,6 @@ export type CapturedHttpErrorResponse = {
|
|
|
20
20
|
bodyJson?: unknown;
|
|
21
21
|
};
|
|
22
22
|
|
|
23
|
-
type ErrorWithStatus = {
|
|
24
|
-
status?: unknown;
|
|
25
|
-
};
|
|
26
|
-
|
|
27
23
|
const SENSITIVE_HEADERS = ["authorization", "x-api-key", "api-key", "cookie", "set-cookie", "proxy-authorization"];
|
|
28
24
|
|
|
29
25
|
export async function appendRawHttpRequestDumpFor400(
|
|
@@ -31,7 +27,9 @@ export async function appendRawHttpRequestDumpFor400(
|
|
|
31
27
|
error: unknown,
|
|
32
28
|
dump: RawHttpRequestDump | undefined,
|
|
33
29
|
): Promise<string> {
|
|
34
|
-
|
|
30
|
+
// Never persist dumps under the test runner: providers exercise the 400 path
|
|
31
|
+
// with mocked fetch responses, which would otherwise litter the real ~/.prometheus logs.
|
|
32
|
+
if (!dump || isBunTestRuntime() || extractHttpStatusFromError(error) !== 400) {
|
|
35
33
|
return message;
|
|
36
34
|
}
|
|
37
35
|
|
|
@@ -65,12 +63,6 @@ export async function finalizeErrorMessage(
|
|
|
65
63
|
return appendRawHttpRequestDumpFor400(message, error, rawRequestDump);
|
|
66
64
|
}
|
|
67
65
|
|
|
68
|
-
export function withHttpStatus(error: unknown, status: number): Error {
|
|
69
|
-
const wrapped = error instanceof Error ? error : new Error(String(error));
|
|
70
|
-
(wrapped as ErrorWithStatus).status = status;
|
|
71
|
-
return wrapped;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
66
|
/**
|
|
75
67
|
* Rewrite error message for GitHub Copilot request failures.
|
|
76
68
|
* Must run AFTER finalizeErrorMessage since it replaces the message entirely.
|
|
@@ -2,6 +2,8 @@ import { $env } from "@prometheus-ai/utils";
|
|
|
2
2
|
|
|
3
3
|
const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 120_000;
|
|
4
4
|
const DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS = 100_000;
|
|
5
|
+
/** Re-mint persistent race promises every N iterations (see hoisted-racer comment). */
|
|
6
|
+
const RACER_REMINT_INTERVAL = 1024;
|
|
5
7
|
|
|
6
8
|
function normalizeIdleTimeoutMs(value: string | undefined, fallback: number): number | undefined {
|
|
7
9
|
if (value === undefined) return fallback;
|
|
@@ -130,8 +132,11 @@ export async function* iterateWithIdleTimeout<T>(
|
|
|
130
132
|
firstItemTimeoutMs !== undefined && firstItemTimeoutMs > 0 ? Date.now() + firstItemTimeoutMs : undefined;
|
|
131
133
|
const abortSignal = options.abortSignal;
|
|
132
134
|
const iterator = iterable[Symbol.asyncIterator]();
|
|
135
|
+
let iteratorClosed = false;
|
|
133
136
|
|
|
134
137
|
const closeIterator = (): void => {
|
|
138
|
+
if (iteratorClosed) return;
|
|
139
|
+
iteratorClosed = true;
|
|
135
140
|
const returnPromise = iterator.return?.();
|
|
136
141
|
if (returnPromise) {
|
|
137
142
|
void returnPromise.catch(() => {});
|
|
@@ -167,100 +172,266 @@ export async function* iterateWithIdleTimeout<T>(
|
|
|
167
172
|
(firstItemTimeoutMs === undefined || firstItemTimeoutMs <= 0) &&
|
|
168
173
|
(options.idleTimeoutMs === undefined || options.idleTimeoutMs <= 0);
|
|
169
174
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
|
|
175
|
+
// Persistent racers, hoisted out of the per-item loop. The abort promise can
|
|
176
|
+
// only ever resolve once (abort latches), and a timeout resolution always
|
|
177
|
+
// precedes a throw — so neither needs per-item re-creation. This keeps the
|
|
178
|
+
// token hot path free of timer create/destroy and listener churn.
|
|
179
|
+
//
|
|
180
|
+
// Each Promise.race() call still attaches a reaction record to every pending
|
|
181
|
+
// racer, and those records live until the racer settles — so a never-firing
|
|
182
|
+
// abort/timeout promise would accumulate one record per streamed item for
|
|
183
|
+
// the stream's whole life. The loop re-mints both promises every
|
|
184
|
+
// RACER_REMINT_INTERVAL iterations to keep that retention bounded; the
|
|
185
|
+
// listener and timer callbacks resolve through late-bound variables so a
|
|
186
|
+
// re-mint never strands them.
|
|
187
|
+
let abortPromise: Promise<{ kind: "abort" }> | undefined;
|
|
188
|
+
let abortListener: (() => void) | undefined;
|
|
189
|
+
let resolveAbort: ((value: { kind: "abort" }) => void) | undefined;
|
|
190
|
+
if (abortSignal) {
|
|
191
|
+
const { promise, resolve } = Promise.withResolvers<{ kind: "abort" }>();
|
|
192
|
+
resolveAbort = resolve;
|
|
193
|
+
abortListener = () => resolveAbort?.({ kind: "abort" });
|
|
194
|
+
abortSignal.addEventListener("abort", abortListener, { once: true });
|
|
195
|
+
abortPromise = promise;
|
|
196
|
+
}
|
|
191
197
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
| { kind: "abort" }
|
|
198
|
-
>
|
|
199
|
-
> = [nextResultPromise];
|
|
198
|
+
let timeoutPromise: Promise<{ kind: "timeout" }> | undefined;
|
|
199
|
+
let resolveTimeout: ((value: { kind: "timeout" }) => void) | undefined;
|
|
200
|
+
let timeoutFired = false;
|
|
201
|
+
let timer: NodeJS.Timeout | undefined;
|
|
202
|
+
let timerFireAtMs = Infinity;
|
|
200
203
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
204
|
+
const currentDeadlineMs = (): number | undefined => {
|
|
205
|
+
if (awaitingFirstItem) return firstItemDeadlineMs;
|
|
206
|
+
if (options.idleTimeoutMs !== undefined && options.idleTimeoutMs > 0) {
|
|
207
|
+
return lastProgressAt + options.idleTimeoutMs;
|
|
208
|
+
}
|
|
209
|
+
return undefined;
|
|
210
|
+
};
|
|
211
|
+
const onTimerFire = (): void => {
|
|
212
|
+
timer = undefined;
|
|
213
|
+
timerFireAtMs = Infinity;
|
|
214
|
+
const deadlineMs = currentDeadlineMs();
|
|
215
|
+
if (deadlineMs === undefined) return;
|
|
216
|
+
const remainingMs = deadlineMs - Date.now();
|
|
217
|
+
if (remainingMs > 0) {
|
|
218
|
+
// Progress moved the deadline since this timer was armed — re-arm for
|
|
219
|
+
// the remainder. One stale wake per idle period, not one per item.
|
|
220
|
+
timerFireAtMs = deadlineMs;
|
|
221
|
+
timer = setTimeout(onTimerFire, remainingMs);
|
|
222
|
+
return;
|
|
223
|
+
}
|
|
224
|
+
timeoutFired = true;
|
|
225
|
+
resolveTimeout?.({ kind: "timeout" });
|
|
226
|
+
};
|
|
227
|
+
const armTimer = (deadlineMs: number): void => {
|
|
228
|
+
if (timeoutPromise === undefined || timeoutFired) {
|
|
229
|
+
// A fired-but-unconsumed resolution (the item won the same race) is
|
|
230
|
+
// stale — racing it again would fake a timeout, so mint a fresh one.
|
|
205
231
|
const { promise, resolve } = Promise.withResolvers<{ kind: "timeout" }>();
|
|
232
|
+
timeoutPromise = promise;
|
|
206
233
|
resolveTimeout = resolve;
|
|
207
|
-
|
|
208
|
-
racers.push(promise);
|
|
234
|
+
timeoutFired = false;
|
|
209
235
|
}
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
const { promise, resolve } = Promise.withResolvers<{ kind: "abort" }>();
|
|
215
|
-
resolveAbort = resolve;
|
|
216
|
-
abortListener = () => resolve({ kind: "abort" });
|
|
217
|
-
abortSignal.addEventListener("abort", abortListener, { once: true });
|
|
218
|
-
racers.push(promise);
|
|
236
|
+
if (timer !== undefined) {
|
|
237
|
+
// An armed timer firing at or before the new deadline re-arms itself.
|
|
238
|
+
if (timerFireAtMs <= deadlineMs) return;
|
|
239
|
+
clearTimeout(timer);
|
|
219
240
|
}
|
|
241
|
+
timerFireAtMs = deadlineMs;
|
|
242
|
+
timer = setTimeout(onTimerFire, Math.max(0, deadlineMs - Date.now()));
|
|
243
|
+
};
|
|
220
244
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
245
|
+
try {
|
|
246
|
+
let raceCount = 0;
|
|
247
|
+
while (true) {
|
|
248
|
+
if (++raceCount % RACER_REMINT_INTERVAL === 0) {
|
|
249
|
+
if (abortPromise !== undefined && !abortSignal!.aborted) {
|
|
250
|
+
const { promise, resolve } = Promise.withResolvers<{ kind: "abort" }>();
|
|
251
|
+
resolveAbort = resolve;
|
|
252
|
+
abortPromise = promise;
|
|
253
|
+
}
|
|
254
|
+
if (timeoutPromise !== undefined && !timeoutFired) {
|
|
255
|
+
const { promise, resolve } = Promise.withResolvers<{ kind: "timeout" }>();
|
|
256
|
+
resolveTimeout = resolve;
|
|
257
|
+
timeoutPromise = promise;
|
|
258
|
+
}
|
|
226
259
|
}
|
|
227
|
-
|
|
228
|
-
|
|
260
|
+
let activeTimeoutMs: number | undefined;
|
|
261
|
+
if (awaitingFirstItem) {
|
|
262
|
+
if (firstItemDeadlineMs !== undefined) {
|
|
263
|
+
activeTimeoutMs = firstItemDeadlineMs - Date.now();
|
|
264
|
+
if (activeTimeoutMs <= 0) {
|
|
265
|
+
options.onFirstItemTimeout?.();
|
|
266
|
+
closeIterator();
|
|
267
|
+
throw new Error(options.firstItemErrorMessage ?? options.errorMessage);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
} else if (options.idleTimeoutMs !== undefined && options.idleTimeoutMs > 0) {
|
|
271
|
+
activeTimeoutMs = options.idleTimeoutMs - (Date.now() - lastProgressAt);
|
|
272
|
+
if (activeTimeoutMs <= 0) {
|
|
229
273
|
options.onIdle?.();
|
|
230
|
-
|
|
231
|
-
options.
|
|
274
|
+
closeIterator();
|
|
275
|
+
throw new Error(options.errorMessage);
|
|
232
276
|
}
|
|
233
|
-
closeIterator();
|
|
234
|
-
throw new Error(
|
|
235
|
-
!awaitingFirstItem ? options.errorMessage : (options.firstItemErrorMessage ?? options.errorMessage),
|
|
236
|
-
);
|
|
237
277
|
}
|
|
238
|
-
|
|
239
|
-
|
|
278
|
+
|
|
279
|
+
const nextResultPromise = withRacy(iterator.next());
|
|
280
|
+
|
|
281
|
+
const racers: Array<
|
|
282
|
+
Promise<
|
|
283
|
+
| { kind: "next"; result: IteratorResult<T> }
|
|
284
|
+
| { kind: "error"; error: unknown }
|
|
285
|
+
| { kind: "timeout" }
|
|
286
|
+
| { kind: "abort" }
|
|
287
|
+
>
|
|
288
|
+
> = [nextResultPromise];
|
|
289
|
+
|
|
290
|
+
const enforceTimeout = !noTimeoutEnforced && activeTimeoutMs !== undefined && activeTimeoutMs > 0;
|
|
291
|
+
if (enforceTimeout) {
|
|
292
|
+
armTimer(Date.now() + activeTimeoutMs!);
|
|
293
|
+
racers.push(timeoutPromise!);
|
|
294
|
+
}
|
|
295
|
+
if (abortPromise) {
|
|
296
|
+
racers.push(abortPromise);
|
|
240
297
|
}
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
298
|
+
|
|
299
|
+
// Tracks whether this iteration handed an item to the consumer and resumed
|
|
300
|
+
// normally. Any other exit — internal throw, `done` return, or the consumer
|
|
301
|
+
// abandoning us via `.return()`/`.throw()` at the `yield` below — must close
|
|
302
|
+
// the upstream iterator so the underlying SSE body / SDK stream (and its
|
|
303
|
+
// socket) is released instead of being left suspended.
|
|
304
|
+
let continuing = false;
|
|
305
|
+
try {
|
|
306
|
+
const outcome = await Promise.race(racers);
|
|
307
|
+
if (outcome.kind === "abort") {
|
|
308
|
+
closeIterator();
|
|
309
|
+
throw abortReason(abortSignal!);
|
|
310
|
+
}
|
|
311
|
+
if (outcome.kind === "timeout") {
|
|
312
|
+
if (!awaitingFirstItem) {
|
|
313
|
+
options.onIdle?.();
|
|
314
|
+
} else {
|
|
315
|
+
options.onFirstItemTimeout?.();
|
|
316
|
+
}
|
|
317
|
+
closeIterator();
|
|
318
|
+
throw new Error(
|
|
319
|
+
!awaitingFirstItem ? options.errorMessage : (options.firstItemErrorMessage ?? options.errorMessage),
|
|
320
|
+
);
|
|
321
|
+
}
|
|
322
|
+
if (outcome.kind === "error") {
|
|
323
|
+
throw outcome.error;
|
|
324
|
+
}
|
|
325
|
+
if (outcome.result.done) {
|
|
326
|
+
markFirstItemReceived();
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
const item = outcome.result.value;
|
|
330
|
+
// Non-progress items (e.g. provider keepalives, synthetic `start` events that
|
|
331
|
+
// arrive before the model has produced any tokens) MUST NOT flip us out of
|
|
332
|
+
// `awaitingFirstItem`. Otherwise the next iteration switches from the (longer)
|
|
333
|
+
// first-item watchdog to the (shorter) idle watchdog while we're still waiting
|
|
334
|
+
// on the model's first real output.
|
|
335
|
+
if (isProgressItem(item)) {
|
|
336
|
+
markFirstItemReceived();
|
|
337
|
+
lastProgressAt = Date.now();
|
|
338
|
+
}
|
|
339
|
+
yield item;
|
|
340
|
+
continuing = true;
|
|
341
|
+
} finally {
|
|
342
|
+
if (!continuing) closeIterator();
|
|
244
343
|
}
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
344
|
+
}
|
|
345
|
+
} finally {
|
|
346
|
+
if (timer !== undefined) clearTimeout(timer);
|
|
347
|
+
// Settle the persistent racers so the final Promise.race releases them.
|
|
348
|
+
resolveTimeout?.({ kind: "timeout" });
|
|
349
|
+
if (abortListener && abortSignal) {
|
|
350
|
+
abortSignal.removeEventListener("abort", abortListener);
|
|
351
|
+
}
|
|
352
|
+
resolveAbort?.({ kind: "abort" });
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
export interface TerminalGraceIteratorOptions {
|
|
357
|
+
/**
|
|
358
|
+
* Epoch-ms timestamp at which the consumer observed a logically terminal
|
|
359
|
+
* item (e.g. a chat-completions chunk carrying `finish_reason`), or
|
|
360
|
+
* `undefined` while the stream is still mid-response. Read before every
|
|
361
|
+
* pull, so the consumer can flip it between yields.
|
|
362
|
+
*/
|
|
363
|
+
finishedAtMs: () => number | undefined;
|
|
364
|
+
/**
|
|
365
|
+
* Post-terminal budget: how long after `finishedAtMs()` to keep draining
|
|
366
|
+
* trailing items (e.g. a usage-only chunk or the `[DONE]` sentinel) before
|
|
367
|
+
* ending the iteration cleanly. The deadline is fixed at
|
|
368
|
+
* `finishedAtMs() + graceMs`; trailing items do not extend it, so
|
|
369
|
+
* keepalive-only servers cannot hold the stream open.
|
|
370
|
+
*/
|
|
371
|
+
graceMs: number;
|
|
372
|
+
/**
|
|
373
|
+
* Invoked when the grace window closes with the source still open. Use it
|
|
374
|
+
* to abort the underlying request: the source generator is typically parked
|
|
375
|
+
* mid-`next()` (not at a yield), so a queued `.return()` alone cannot reach
|
|
376
|
+
* the transport until that pending read settles.
|
|
377
|
+
*/
|
|
378
|
+
onGraceEnd?: () => void;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Yields items from an async iterable until the consumer marks the stream
|
|
383
|
+
* logically finished AND the source stays silent past a short grace window.
|
|
384
|
+
*
|
|
385
|
+
* Misbehaving OpenAI-compatible servers deliver the terminal chunk but never
|
|
386
|
+
* send `[DONE]` nor close the connection; without this guard the consumer
|
|
387
|
+
* hangs on `iterator.next()` until the idle watchdog converts an
|
|
388
|
+
* already-successful turn into a timeout error. Grace expiry is a clean end
|
|
389
|
+
* of iteration, never an error.
|
|
390
|
+
*/
|
|
391
|
+
export async function* iterateWithTerminalGrace<T>(
|
|
392
|
+
iterable: AsyncIterable<T>,
|
|
393
|
+
options: TerminalGraceIteratorOptions,
|
|
394
|
+
): AsyncGenerator<T> {
|
|
395
|
+
const iterator = iterable[Symbol.asyncIterator]();
|
|
396
|
+
try {
|
|
397
|
+
while (true) {
|
|
398
|
+
const finishedAtMs = options.finishedAtMs();
|
|
399
|
+
if (finishedAtMs === undefined) {
|
|
400
|
+
const result = await iterator.next();
|
|
401
|
+
if (result.done) return;
|
|
402
|
+
yield result.value;
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
const remainingMs = finishedAtMs + options.graceMs - Date.now();
|
|
406
|
+
if (remainingMs <= 0) {
|
|
407
|
+
options.onGraceEnd?.();
|
|
408
|
+
return;
|
|
254
409
|
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
410
|
+
const nextPromise = iterator.next();
|
|
411
|
+
let timer: NodeJS.Timeout | undefined;
|
|
412
|
+
const timeoutPromise = new Promise<"timeout">(resolve => {
|
|
413
|
+
timer = setTimeout(() => resolve("timeout"), remainingMs);
|
|
414
|
+
});
|
|
415
|
+
try {
|
|
416
|
+
const outcome = await Promise.race([nextPromise, timeoutPromise]);
|
|
417
|
+
if (outcome === "timeout") {
|
|
418
|
+
// The abandoned read settles (likely rejects) once onGraceEnd
|
|
419
|
+
// aborts the transport — mark it handled so it cannot surface
|
|
420
|
+
// as an unhandled rejection.
|
|
421
|
+
nextPromise.catch(() => {});
|
|
422
|
+
options.onGraceEnd?.();
|
|
423
|
+
return;
|
|
424
|
+
}
|
|
425
|
+
if (outcome.done) return;
|
|
426
|
+
yield outcome.value;
|
|
427
|
+
} finally {
|
|
428
|
+
if (timer !== undefined) clearTimeout(timer);
|
|
262
429
|
}
|
|
263
|
-
|
|
430
|
+
}
|
|
431
|
+
} finally {
|
|
432
|
+
const returnPromise = iterator.return?.();
|
|
433
|
+
if (returnPromise) {
|
|
434
|
+
void Promise.resolve(returnPromise).catch(() => {});
|
|
264
435
|
}
|
|
265
436
|
}
|
|
266
437
|
}
|