@prometheus-ai/ai 0.5.4 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
* Shared utilities for Google Generative AI and Google Cloud Code Assist providers.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import { scheduler } from "node:timers/promises";
|
|
6
|
+
import { calculateCost } from "@prometheus-ai/catalog/models";
|
|
5
7
|
import { extractHttpStatusFromError, readSseJson } from "@prometheus-ai/utils";
|
|
6
|
-
import {
|
|
8
|
+
import { ProviderHttpError } from "../errors";
|
|
7
9
|
import type {
|
|
8
10
|
Api,
|
|
9
11
|
AssistantMessage,
|
|
@@ -20,7 +22,7 @@ import type {
|
|
|
20
22
|
} from "../types";
|
|
21
23
|
import { normalizeSystemPrompts } from "../utils";
|
|
22
24
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
23
|
-
import { finalizeErrorMessage, type RawHttpRequestDump
|
|
25
|
+
import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
24
26
|
import { normalizeSchemaForCCA, normalizeSchemaForGoogle, toolWireSchema } from "../utils/schema";
|
|
25
27
|
import type {
|
|
26
28
|
Content,
|
|
@@ -45,6 +47,11 @@ export type {
|
|
|
45
47
|
} from "./google-types";
|
|
46
48
|
export { normalizeSchemaForGoogle };
|
|
47
49
|
|
|
50
|
+
/** Non-2xx response (or in-stream error chunk) from the Google Generative Language / Vertex API. */
|
|
51
|
+
export class GoogleApiError extends ProviderHttpError {
|
|
52
|
+
override readonly name = "GoogleApiError";
|
|
53
|
+
}
|
|
54
|
+
|
|
48
55
|
type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex";
|
|
49
56
|
|
|
50
57
|
/**
|
|
@@ -160,7 +167,19 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
|
|
|
160
167
|
|
|
161
168
|
const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
|
|
162
169
|
|
|
170
|
+
// Gemini < 3 image tool results go in a separate user turn, but parallel tool results must
|
|
171
|
+
// stay a single contiguous functionResponse turn ("number of function response parts is not
|
|
172
|
+
// equal to number of function call parts"). Buffer image turns and flush them only after the
|
|
173
|
+
// merged functionResponse turn is complete.
|
|
174
|
+
let pendingToolImageParts: Part[] = [];
|
|
175
|
+
const flushPendingToolImages = () => {
|
|
176
|
+
if (pendingToolImageParts.length === 0) return;
|
|
177
|
+
contents.push({ role: "user", parts: pendingToolImageParts });
|
|
178
|
+
pendingToolImageParts = [];
|
|
179
|
+
};
|
|
180
|
+
|
|
163
181
|
for (const msg of transformedMessages) {
|
|
182
|
+
if (msg.role !== "toolResult") flushPendingToolImages();
|
|
164
183
|
if (msg.role === "user" || msg.role === "developer") {
|
|
165
184
|
if (typeof msg.content === "string") {
|
|
166
185
|
// Skip empty user messages
|
|
@@ -314,15 +333,13 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
|
|
|
314
333
|
});
|
|
315
334
|
}
|
|
316
335
|
|
|
317
|
-
// For Gemini < 3,
|
|
336
|
+
// For Gemini < 3, buffer images for a separate user message after the functionResponse turn
|
|
318
337
|
if (hasImages && !modelSupportsMultimodalFunctionResponse) {
|
|
319
|
-
|
|
320
|
-
role: "user",
|
|
321
|
-
parts: [{ text: "Tool result image:" }, ...imageParts],
|
|
322
|
-
});
|
|
338
|
+
pendingToolImageParts.push({ text: "Tool result image:" }, ...imageParts);
|
|
323
339
|
}
|
|
324
340
|
}
|
|
325
341
|
}
|
|
342
|
+
flushPendingToolImages();
|
|
326
343
|
|
|
327
344
|
return contents;
|
|
328
345
|
}
|
|
@@ -355,7 +372,7 @@ export function convertTools(
|
|
|
355
372
|
description: tool.description || "",
|
|
356
373
|
...(useParameters
|
|
357
374
|
? { parameters: normalizeSchemaForCCA(toolWireSchema(tool)) }
|
|
358
|
-
: { parametersJsonSchema: toolWireSchema(tool) }),
|
|
375
|
+
: { parametersJsonSchema: normalizeSchemaForGoogle(toolWireSchema(tool)) }),
|
|
359
376
|
})),
|
|
360
377
|
},
|
|
361
378
|
];
|
|
@@ -422,6 +439,47 @@ export function mapStopReasonString(reason: string): StopReason {
|
|
|
422
439
|
}
|
|
423
440
|
}
|
|
424
441
|
|
|
442
|
+
/**
|
|
443
|
+
* Bounded retries for the well-known Gemini "empty response" failure: a benign
|
|
444
|
+
* `finishReason: STOP` carrying only an empty/whitespace text part and no tool call.
|
|
445
|
+
* Shared by the public/Vertex `streamGoogleGenAI` path and the Cloud Code Assist
|
|
446
|
+
* (`google-gemini-cli`/`google-antigravity`) provider so both apply the same policy.
|
|
447
|
+
*/
|
|
448
|
+
export const MAX_EMPTY_STREAM_RETRIES = 2;
|
|
449
|
+
export const EMPTY_STREAM_BASE_DELAY_MS = 500;
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Whether a completed Google assistant message carries content worth delivering.
|
|
453
|
+
*
|
|
454
|
+
* A tool call or any non-whitespace text counts as meaningful. An empty/whitespace-only
|
|
455
|
+
* text part — or thinking that never produced an answer — is the "empty response" failure:
|
|
456
|
+
* delivered as-is the agent loop has nothing to act on and silently halts, so the request
|
|
457
|
+
* must be retried instead of surfaced.
|
|
458
|
+
*/
|
|
459
|
+
export function hasMeaningfulGoogleContent(output: AssistantMessage): boolean {
|
|
460
|
+
for (const block of output.content) {
|
|
461
|
+
if (block.type === "toolCall") return true;
|
|
462
|
+
if (block.type === "text" && block.text.trim().length > 0) return true;
|
|
463
|
+
}
|
|
464
|
+
return false;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
/** Wipe a streamed message between empty-response retries so the next attempt starts clean. */
|
|
468
|
+
function resetGoogleStreamOutputForRetry(output: AssistantMessage): void {
|
|
469
|
+
output.content = [];
|
|
470
|
+
output.usage = {
|
|
471
|
+
input: 0,
|
|
472
|
+
output: 0,
|
|
473
|
+
cacheRead: 0,
|
|
474
|
+
cacheWrite: 0,
|
|
475
|
+
totalTokens: 0,
|
|
476
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
477
|
+
};
|
|
478
|
+
output.stopReason = "stop";
|
|
479
|
+
output.errorMessage = undefined;
|
|
480
|
+
output.timestamp = Date.now();
|
|
481
|
+
}
|
|
482
|
+
|
|
425
483
|
/**
|
|
426
484
|
* Module-local counter for generating unique tool call IDs across Google providers.
|
|
427
485
|
* Shared so that a single monotonically-increasing sequence is used regardless of which
|
|
@@ -527,6 +585,7 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
|
|
|
527
585
|
const blockIndex = () => blocks.length - 1;
|
|
528
586
|
let currentBlock: TextContent | ThinkingContent | null = null;
|
|
529
587
|
let firstTokenSeen = false;
|
|
588
|
+
let sawFinishReason = false;
|
|
530
589
|
|
|
531
590
|
const flushCurrent = () => {
|
|
532
591
|
if (!currentBlock) return;
|
|
@@ -534,10 +593,23 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
|
|
|
534
593
|
};
|
|
535
594
|
|
|
536
595
|
for await (const chunk of googleStream) {
|
|
596
|
+
if (chunk.error) {
|
|
597
|
+
const detail = chunk.error.message || chunk.error.status || "unknown error";
|
|
598
|
+
const message = `Google API stream error: ${detail}`;
|
|
599
|
+
throw typeof chunk.error.code === "number" && chunk.error.code >= 400
|
|
600
|
+
? new GoogleApiError(message, chunk.error.code)
|
|
601
|
+
: new Error(message);
|
|
602
|
+
}
|
|
603
|
+
if (!chunk.candidates?.length && chunk.promptFeedback?.blockReason) {
|
|
604
|
+
const detail = chunk.promptFeedback.blockReasonMessage;
|
|
605
|
+
throw new Error(
|
|
606
|
+
`Request blocked by Google (${chunk.promptFeedback.blockReason})${detail ? `: ${detail}` : ""}`,
|
|
607
|
+
);
|
|
608
|
+
}
|
|
537
609
|
const candidate = chunk.candidates?.[0];
|
|
538
610
|
if (candidate?.content?.parts) {
|
|
539
611
|
for (const part of candidate.content.parts) {
|
|
540
|
-
if (part.text !== undefined) {
|
|
612
|
+
if (part.text !== undefined && part.text !== "") {
|
|
541
613
|
if (!firstTokenSeen) {
|
|
542
614
|
firstTokenSeen = true;
|
|
543
615
|
onFirstToken?.();
|
|
@@ -578,6 +650,18 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
|
|
|
578
650
|
partial: output,
|
|
579
651
|
});
|
|
580
652
|
}
|
|
653
|
+
} else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
|
|
654
|
+
if (currentBlock.type === "thinking") {
|
|
655
|
+
currentBlock.thinkingSignature = retainThoughtSignature(
|
|
656
|
+
currentBlock.thinkingSignature,
|
|
657
|
+
part.thoughtSignature,
|
|
658
|
+
);
|
|
659
|
+
} else if (retainTextSignature) {
|
|
660
|
+
currentBlock.textSignature = retainThoughtSignature(
|
|
661
|
+
currentBlock.textSignature,
|
|
662
|
+
part.thoughtSignature,
|
|
663
|
+
);
|
|
664
|
+
}
|
|
581
665
|
}
|
|
582
666
|
|
|
583
667
|
if (part.functionCall) {
|
|
@@ -606,9 +690,17 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
|
|
|
606
690
|
}
|
|
607
691
|
|
|
608
692
|
if (candidate?.finishReason) {
|
|
609
|
-
|
|
610
|
-
|
|
693
|
+
sawFinishReason = true;
|
|
694
|
+
const mapped = mapStopReason(candidate.finishReason);
|
|
695
|
+
// Only let a trailing tool call upgrade benign finishes; SAFETY/MALFORMED_FUNCTION_CALL
|
|
696
|
+
// and friends must surface as errors even when earlier chunks carried valid tool calls.
|
|
697
|
+
if ((mapped === "stop" || mapped === "length") && output.content.some(b => b.type === "toolCall")) {
|
|
611
698
|
output.stopReason = "toolUse";
|
|
699
|
+
} else {
|
|
700
|
+
output.stopReason = mapped;
|
|
701
|
+
if (mapped === "error") {
|
|
702
|
+
output.errorMessage = `Generation failed with finish reason: ${candidate.finishReason}`;
|
|
703
|
+
}
|
|
612
704
|
}
|
|
613
705
|
}
|
|
614
706
|
|
|
@@ -645,6 +737,10 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
|
|
|
645
737
|
throw new Error("Request was aborted");
|
|
646
738
|
}
|
|
647
739
|
|
|
740
|
+
if (!sawFinishReason) {
|
|
741
|
+
throw new Error("Google API stream ended without a finish reason (connection dropped or response truncated)");
|
|
742
|
+
}
|
|
743
|
+
|
|
648
744
|
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
|
649
745
|
throw new Error(output.errorMessage ?? "An unknown error occurred");
|
|
650
746
|
}
|
|
@@ -804,41 +900,65 @@ export function streamGoogleGenAI<T extends "google-generative-ai" | "google-ver
|
|
|
804
900
|
headers: plan.headers,
|
|
805
901
|
};
|
|
806
902
|
|
|
807
|
-
const
|
|
903
|
+
const bodyJson = JSON.stringify(paramsToWireBody(params));
|
|
808
904
|
const fetchImpl = plan.fetch ?? options?.fetch ?? (globalThis.fetch.bind(globalThis) as FetchImpl);
|
|
809
|
-
const
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
new
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
905
|
+
const openStream = async (): Promise<ReadableStream<Uint8Array>> => {
|
|
906
|
+
const response = await fetchImpl(plan.url, {
|
|
907
|
+
method: "POST",
|
|
908
|
+
headers: { ...plan.headers, "Content-Type": "application/json", Accept: "text/event-stream" },
|
|
909
|
+
body: bodyJson,
|
|
910
|
+
signal: options?.signal,
|
|
911
|
+
});
|
|
912
|
+
if (!response.ok) {
|
|
913
|
+
const errorText = await response.text().catch(() => "");
|
|
914
|
+
throw new GoogleApiError(
|
|
915
|
+
`Google API error (${response.status}): ${extractGoogleErrorMessage(errorText)}`,
|
|
916
|
+
response.status,
|
|
917
|
+
{ headers: response.headers },
|
|
918
|
+
);
|
|
919
|
+
}
|
|
920
|
+
if (!response.body) {
|
|
921
|
+
throw new Error("Google API returned an empty response body");
|
|
922
|
+
}
|
|
923
|
+
return response.body as ReadableStream<Uint8Array>;
|
|
924
|
+
};
|
|
829
925
|
|
|
926
|
+
let body = await openStream();
|
|
830
927
|
stream.push({ type: "start", partial: output });
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
options,
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
928
|
+
|
|
929
|
+
// Gemini occasionally finishes with `finishReason: STOP` while emitting only an empty
|
|
930
|
+
// text part and no tool call. Delivered as-is the agent receives a blank message and
|
|
931
|
+
// silently halts mid-task, so retry a bounded number of times before giving up.
|
|
932
|
+
for (let emptyAttempt = 0; ; emptyAttempt++) {
|
|
933
|
+
const googleStream = readSseJson<GenerateContentResponse>(body, options?.signal, event =>
|
|
934
|
+
options?.onSseEvent?.({ event: event.event, data: event.data, raw: [...event.raw] }, model),
|
|
935
|
+
);
|
|
936
|
+
await consumeGoogleStream({
|
|
937
|
+
googleStream,
|
|
938
|
+
output,
|
|
939
|
+
stream,
|
|
940
|
+
model,
|
|
941
|
+
options,
|
|
942
|
+
retainTextSignature,
|
|
943
|
+
onFirstToken: () => {
|
|
944
|
+
firstTokenTime = Date.now();
|
|
945
|
+
},
|
|
946
|
+
});
|
|
947
|
+
|
|
948
|
+
if (output.stopReason !== "stop" || hasMeaningfulGoogleContent(output)) break;
|
|
949
|
+
if (emptyAttempt >= MAX_EMPTY_STREAM_RETRIES) {
|
|
950
|
+
throw new Error(
|
|
951
|
+
`Google API returned an empty response (finishReason STOP with no content) after ${MAX_EMPTY_STREAM_RETRIES + 1} attempts`,
|
|
952
|
+
);
|
|
953
|
+
}
|
|
954
|
+
try {
|
|
955
|
+
await scheduler.wait(EMPTY_STREAM_BASE_DELAY_MS * 2 ** emptyAttempt, { signal: options?.signal });
|
|
956
|
+
} catch {
|
|
957
|
+
throw new Error("Request was aborted");
|
|
958
|
+
}
|
|
959
|
+
resetGoogleStreamOutputForRetry(output);
|
|
960
|
+
body = await openStream();
|
|
961
|
+
}
|
|
842
962
|
|
|
843
963
|
output.duration = Date.now() - startTime;
|
|
844
964
|
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
@@ -157,11 +157,20 @@ export interface UsageMetadata {
|
|
|
157
157
|
cachedContentTokenCount?: number;
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
+
/** Prompt-level safety feedback; `blockReason` is set (with no candidates) when the prompt is blocked. */
|
|
161
|
+
export interface PromptFeedback {
|
|
162
|
+
blockReason?: string;
|
|
163
|
+
blockReasonMessage?: string;
|
|
164
|
+
[key: string]: unknown;
|
|
165
|
+
}
|
|
166
|
+
|
|
160
167
|
/** Single SSE chunk's parsed JSON body. */
|
|
161
168
|
export interface GenerateContentResponse {
|
|
162
169
|
candidates?: Candidate[];
|
|
163
170
|
usageMetadata?: UsageMetadata;
|
|
164
171
|
modelVersion?: string;
|
|
165
172
|
responseId?: string;
|
|
166
|
-
promptFeedback?:
|
|
173
|
+
promptFeedback?: PromptFeedback;
|
|
174
|
+
/** In-band stream failure (quota, internal error) delivered as a final JSON event. */
|
|
175
|
+
error?: { code?: number; message?: string; status?: string };
|
|
167
176
|
}
|
package/src/providers/kimi.ts
CHANGED
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
* Note: Kimi calculates TPM rate limits based on max_tokens, not actual output.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
+
import { getKimiCommonHeaders } from "../registry/oauth/kimi";
|
|
12
13
|
import type { Api, Context, Model } from "../types";
|
|
13
14
|
import type { AssistantMessageEventStream } from "../utils/event-stream";
|
|
14
|
-
import { getKimiCommonHeaders } from "../utils/oauth/kimi";
|
|
15
15
|
import {
|
|
16
16
|
type OpenAIAnthropicApiFormat,
|
|
17
17
|
type OpenAIAnthropicShimOptions,
|
package/src/providers/mock.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Mock provider for tests.
|
|
3
3
|
*
|
|
4
4
|
* Implements `Model<"mock">` + `streamMock` so test code can drive
|
|
5
|
-
*
|
|
5
|
+
* agent-core / streamSimple-shaped consumers without an HTTP client.
|
|
6
6
|
*
|
|
7
7
|
* Usage:
|
|
8
8
|
*
|
|
@@ -49,6 +49,7 @@ import type {
|
|
|
49
49
|
Context,
|
|
50
50
|
Model,
|
|
51
51
|
SimpleStreamOptions,
|
|
52
|
+
StopDetails,
|
|
52
53
|
StopReason,
|
|
53
54
|
TextContent,
|
|
54
55
|
ThinkingContent,
|
|
@@ -81,6 +82,10 @@ export interface MockResponse {
|
|
|
81
82
|
content?: ReadonlyArray<MockContent>;
|
|
82
83
|
/** Stop reason. Defaults to `"toolUse"` when content has tool calls, else `"stop"`. */
|
|
83
84
|
stopReason?: StopReason;
|
|
85
|
+
/** Structured terminal stop classification, e.g. Anthropic refusal metadata. */
|
|
86
|
+
stopDetails?: StopDetails | null;
|
|
87
|
+
/** Error text paired with an explicit `"error"` stop reason. */
|
|
88
|
+
errorMessage?: string;
|
|
84
89
|
/** Usage stats. Missing fields default to 0; missing `cost.total` is recomputed from components. */
|
|
85
90
|
usage?: Partial<Omit<Usage, "cost">> & { cost?: Partial<Usage["cost"]> };
|
|
86
91
|
/** Pre-set responseId. */
|
|
@@ -168,6 +173,7 @@ export class MockModel implements Model<MockApi> {
|
|
|
168
173
|
readonly cost: Model["cost"];
|
|
169
174
|
readonly contextWindow: number;
|
|
170
175
|
readonly maxTokens: number;
|
|
176
|
+
readonly compat = undefined;
|
|
171
177
|
|
|
172
178
|
/** Recorded calls in invocation order. */
|
|
173
179
|
readonly calls: MockCall[] = [];
|
|
@@ -226,7 +232,7 @@ export function createMockModel(options: MockModelOptions = {}): MockModel {
|
|
|
226
232
|
return new MockModel(options);
|
|
227
233
|
}
|
|
228
234
|
|
|
229
|
-
/** Stream function for `Model<"mock">`. Matches the
|
|
235
|
+
/** Stream function for `Model<"mock">`. Matches the Prometheus AI per-provider stream signature. */
|
|
230
236
|
export function streamMock(
|
|
231
237
|
model: Model<Api>,
|
|
232
238
|
context: Context,
|
|
@@ -250,7 +256,7 @@ export function streamMock(
|
|
|
250
256
|
}
|
|
251
257
|
|
|
252
258
|
/** Convenience: register the mock provider with the global custom API registry. */
|
|
253
|
-
export function registerMockApi(sourceId = "prometheus-ai/mock"): void {
|
|
259
|
+
export function registerMockApi(sourceId = "@prometheus-ai/ai/mock"): void {
|
|
254
260
|
registerCustomApi(MOCK_API, streamMock, sourceId);
|
|
255
261
|
}
|
|
256
262
|
|
|
@@ -388,6 +394,8 @@ async function runMock(
|
|
|
388
394
|
const reason: StopReason = response.stopReason ?? (hasToolCall ? ("toolUse" as StopReason) : ("stop" as StopReason));
|
|
389
395
|
|
|
390
396
|
partial.stopReason = reason;
|
|
397
|
+
partial.stopDetails = response.stopDetails;
|
|
398
|
+
partial.errorMessage = response.errorMessage;
|
|
391
399
|
partial.usage = mergeUsage(response.usage);
|
|
392
400
|
partial.duration = Date.now() - startedAt;
|
|
393
401
|
|
package/src/providers/ollama.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { extractHttpStatusFromError, fetchWithRetry } from "@prometheus-ai/utils";
|
|
2
|
+
import { ProviderHttpError } from "../errors";
|
|
2
3
|
import { getEnvApiKey } from "../stream";
|
|
3
4
|
import type {
|
|
4
5
|
Api,
|
|
@@ -16,7 +17,8 @@ import type {
|
|
|
16
17
|
} from "../types";
|
|
17
18
|
import { normalizeSystemPrompts } from "../utils";
|
|
18
19
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
19
|
-
import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
20
|
+
import { type CapturedHttpErrorResponse, finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
21
|
+
import { getOpenAIStreamFirstEventTimeoutMs, getOpenAIStreamIdleTimeoutMs } from "../utils/idle-iterator";
|
|
20
22
|
import { parseStreamingJson } from "../utils/json-parse";
|
|
21
23
|
import { toolWireSchema } from "../utils/schema/wire";
|
|
22
24
|
import {
|
|
@@ -27,8 +29,14 @@ import {
|
|
|
27
29
|
} from "../utils/stream-markup-healing";
|
|
28
30
|
import { transformMessages } from "./transform-messages";
|
|
29
31
|
|
|
32
|
+
/** Non-2xx response from the Ollama `/api/chat` endpoint. */
|
|
33
|
+
export class OllamaApiError extends ProviderHttpError {
|
|
34
|
+
override readonly name = "OllamaApiError";
|
|
35
|
+
}
|
|
36
|
+
|
|
30
37
|
export interface OllamaChatOptions extends StreamOptions {
|
|
31
38
|
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
39
|
+
disableReasoning?: boolean;
|
|
32
40
|
toolChoice?: ToolChoice;
|
|
33
41
|
}
|
|
34
42
|
|
|
@@ -91,7 +99,14 @@ function normalizeBaseUrl(baseUrl?: string): string {
|
|
|
91
99
|
return trimmed.endsWith("/api") ? trimmed.slice(0, -4) : trimmed;
|
|
92
100
|
}
|
|
93
101
|
|
|
94
|
-
function mapReasoning(
|
|
102
|
+
function mapReasoning(
|
|
103
|
+
reasoning: OllamaChatOptions["reasoning"],
|
|
104
|
+
disableReasoning: boolean | undefined,
|
|
105
|
+
modelReasoning: boolean,
|
|
106
|
+
): boolean | "low" | "medium" | "high" | undefined {
|
|
107
|
+
if (disableReasoning && modelReasoning) {
|
|
108
|
+
return false;
|
|
109
|
+
}
|
|
95
110
|
switch (reasoning) {
|
|
96
111
|
case "minimal":
|
|
97
112
|
case "low":
|
|
@@ -258,7 +273,7 @@ function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefin
|
|
|
258
273
|
}
|
|
259
274
|
|
|
260
275
|
function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
|
|
261
|
-
const think = mapReasoning(options?.reasoning);
|
|
276
|
+
const think = mapReasoning(options?.reasoning, options?.disableReasoning, model.reasoning);
|
|
262
277
|
const toolChoice = mapToolChoice(options?.toolChoice);
|
|
263
278
|
const selectedTools = selectToolsForToolChoice(context.tools, options?.toolChoice);
|
|
264
279
|
const tools = convertTools(selectedTools);
|
|
@@ -268,11 +283,32 @@ function createChatBody(model: Model<"ollama-chat">, context: Context, options:
|
|
|
268
283
|
...(tools ? { tools } : {}),
|
|
269
284
|
...(think !== undefined ? { think } : {}),
|
|
270
285
|
...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
|
|
271
|
-
...(options?.maxTokens !== undefined
|
|
286
|
+
...(options?.maxTokens !== undefined && !model.omitMaxOutputTokens
|
|
287
|
+
? { options: { num_predict: options.maxTokens } }
|
|
288
|
+
: {}),
|
|
272
289
|
stream: true,
|
|
273
290
|
};
|
|
274
291
|
}
|
|
275
292
|
|
|
293
|
+
async function captureHttpErrorResponse(response: Response): Promise<CapturedHttpErrorResponse> {
|
|
294
|
+
let bodyText: string | undefined;
|
|
295
|
+
let bodyJson: unknown;
|
|
296
|
+
try {
|
|
297
|
+
bodyText = await response.text();
|
|
298
|
+
if (bodyText.trim()) {
|
|
299
|
+
try {
|
|
300
|
+
bodyJson = JSON.parse(bodyText) as unknown;
|
|
301
|
+
} catch {}
|
|
302
|
+
}
|
|
303
|
+
} catch {}
|
|
304
|
+
return {
|
|
305
|
+
status: response.status,
|
|
306
|
+
headers: response.headers,
|
|
307
|
+
bodyText,
|
|
308
|
+
bodyJson,
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
276
312
|
async function* iterateNdjson(stream: ReadableStream<Uint8Array>): AsyncGenerator<OllamaChatChunk> {
|
|
277
313
|
const reader = stream.getReader();
|
|
278
314
|
const decoder = new TextDecoder();
|
|
@@ -376,6 +412,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
|
|
|
376
412
|
let firstTokenTime: number | undefined;
|
|
377
413
|
const output = createEmptyOutput(model);
|
|
378
414
|
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
415
|
+
let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
|
|
379
416
|
let activeThinkingIndex: number | undefined;
|
|
380
417
|
let activeTextIndex: number | undefined;
|
|
381
418
|
const activeToolIndices = new Set<number>();
|
|
@@ -489,6 +526,22 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
|
|
|
489
526
|
url: `${baseUrl}/api/chat`,
|
|
490
527
|
body,
|
|
491
528
|
};
|
|
529
|
+
// Direct callers that bypass `register-builtins` (which installs
|
|
530
|
+
// the iterator-level watchdog) need a pre-response timer alongside
|
|
531
|
+
// `timeout: false`; otherwise an Ollama server that accepts the
|
|
532
|
+
// POST and never streams headers would hang forever (issue #2422).
|
|
533
|
+
const idleTimeoutMs = options.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
|
|
534
|
+
const firstEventTimeoutMs =
|
|
535
|
+
options.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
|
|
536
|
+
const preResponseWatchdog =
|
|
537
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
|
|
538
|
+
? AbortSignal.timeout(firstEventTimeoutMs)
|
|
539
|
+
: undefined;
|
|
540
|
+
const fetchSignal = preResponseWatchdog
|
|
541
|
+
? options.signal
|
|
542
|
+
? AbortSignal.any([options.signal, preResponseWatchdog])
|
|
543
|
+
: preResponseWatchdog
|
|
544
|
+
: options.signal;
|
|
492
545
|
const response = await fetchWithRetry(`${baseUrl}/api/chat`, {
|
|
493
546
|
method: "POST",
|
|
494
547
|
headers: {
|
|
@@ -498,12 +551,16 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
|
|
|
498
551
|
"Content-Type": "application/json",
|
|
499
552
|
},
|
|
500
553
|
body: JSON.stringify(body),
|
|
501
|
-
signal:
|
|
554
|
+
signal: fetchSignal,
|
|
502
555
|
defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
|
|
503
556
|
fetch: options.fetch,
|
|
557
|
+
timeout: false,
|
|
504
558
|
});
|
|
505
559
|
if (!response.ok) {
|
|
506
|
-
|
|
560
|
+
capturedErrorResponse = await captureHttpErrorResponse(response);
|
|
561
|
+
throw new OllamaApiError(`HTTP ${response.status} from ${baseUrl}/api/chat`, response.status, {
|
|
562
|
+
headers: response.headers,
|
|
563
|
+
});
|
|
507
564
|
}
|
|
508
565
|
if (!response.body) {
|
|
509
566
|
throw new Error("Ollama returned an empty response body");
|
|
@@ -631,7 +688,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
|
|
|
631
688
|
}
|
|
632
689
|
output.stopReason = options.signal?.aborted ? "aborted" : "error";
|
|
633
690
|
output.errorStatus = extractHttpStatusFromError(error);
|
|
634
|
-
output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
|
|
691
|
+
output.errorMessage = await finalizeErrorMessage(error, rawRequestDump, capturedErrorResponse);
|
|
635
692
|
output.duration = Date.now() - startTime;
|
|
636
693
|
if (firstTokenTime) {
|
|
637
694
|
output.ttft = firstTokenTime - startTime;
|
|
@@ -8,8 +8,9 @@
|
|
|
8
8
|
* here once.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
import { buildModel } from "@prometheus-ai/catalog/build";
|
|
11
12
|
import { ANTHROPIC_THINKING } from "../stream";
|
|
12
|
-
import type { Context, Model, SimpleStreamOptions } from "../types";
|
|
13
|
+
import type { Context, Model, ModelSpec, SimpleStreamOptions } from "../types";
|
|
13
14
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
14
15
|
import { createProviderErrorMessage } from "./error-message";
|
|
15
16
|
import { streamAnthropic, streamOpenAICompletions } from "./register-builtins";
|
|
@@ -44,6 +45,9 @@ export function streamOpenAIAnthropicShim(
|
|
|
44
45
|
): AssistantMessageEventStream {
|
|
45
46
|
const stream = new AssistantMessageEventStream();
|
|
46
47
|
const format = options?.format ?? config.defaultFormat;
|
|
48
|
+
// The resolver form of `apiKey` is resolved upstream in `streamSimple`;
|
|
49
|
+
// this shim only ever receives a static bearer string.
|
|
50
|
+
const apiKey = typeof options?.apiKey === "string" ? options.apiKey : undefined;
|
|
47
51
|
|
|
48
52
|
(async () => {
|
|
49
53
|
try {
|
|
@@ -53,7 +57,7 @@ export function streamOpenAIAnthropicShim(
|
|
|
53
57
|
};
|
|
54
58
|
|
|
55
59
|
if (format === "anthropic") {
|
|
56
|
-
const anthropicModel
|
|
60
|
+
const anthropicModel = buildModel({
|
|
57
61
|
id: model.id,
|
|
58
62
|
name: model.name,
|
|
59
63
|
api: "anthropic-messages",
|
|
@@ -65,7 +69,7 @@ export function streamOpenAIAnthropicShim(
|
|
|
65
69
|
reasoning: model.reasoning,
|
|
66
70
|
input: model.input,
|
|
67
71
|
cost: model.cost,
|
|
68
|
-
};
|
|
72
|
+
} as ModelSpec<"anthropic-messages">);
|
|
69
73
|
|
|
70
74
|
const reasoningEffort = options?.reasoning;
|
|
71
75
|
const thinkingEnabled = !!reasoningEffort && model.reasoning;
|
|
@@ -74,14 +78,14 @@ export function streamOpenAIAnthropicShim(
|
|
|
74
78
|
: undefined;
|
|
75
79
|
|
|
76
80
|
const innerStream = streamAnthropic(anthropicModel, context, {
|
|
77
|
-
apiKey
|
|
81
|
+
apiKey,
|
|
78
82
|
temperature: options?.temperature,
|
|
79
83
|
topP: options?.topP,
|
|
80
84
|
topK: options?.topK,
|
|
81
85
|
minP: options?.minP,
|
|
82
86
|
presencePenalty: options?.presencePenalty,
|
|
83
87
|
repetitionPenalty: options?.repetitionPenalty,
|
|
84
|
-
maxTokens: options?.maxTokens ?? model.maxTokens,
|
|
88
|
+
maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
|
|
85
89
|
signal: options?.signal,
|
|
86
90
|
headers: mergedHeaders,
|
|
87
91
|
sessionId: options?.sessionId,
|
|
@@ -98,19 +102,24 @@ export function streamOpenAIAnthropicShim(
|
|
|
98
102
|
}
|
|
99
103
|
} else {
|
|
100
104
|
const openaiModel: Model<"openai-completions"> = config.openaiBaseUrl
|
|
101
|
-
? {
|
|
105
|
+
? buildModel({
|
|
106
|
+
...model,
|
|
107
|
+
baseUrl: config.openaiBaseUrl,
|
|
108
|
+
headers: mergedHeaders,
|
|
109
|
+
compat: model.compatConfig,
|
|
110
|
+
} as ModelSpec<"openai-completions">)
|
|
102
111
|
: model;
|
|
103
112
|
|
|
104
113
|
const reasoningEffort = options?.reasoning;
|
|
105
114
|
const innerStream = streamOpenAICompletions(openaiModel, context, {
|
|
106
|
-
apiKey
|
|
115
|
+
apiKey,
|
|
107
116
|
temperature: options?.temperature,
|
|
108
117
|
topP: options?.topP,
|
|
109
118
|
topK: options?.topK,
|
|
110
119
|
minP: options?.minP,
|
|
111
120
|
presencePenalty: options?.presencePenalty,
|
|
112
121
|
repetitionPenalty: options?.repetitionPenalty,
|
|
113
|
-
maxTokens: options?.maxTokens ?? model.maxTokens,
|
|
122
|
+
maxTokens: options?.maxTokens ?? model.maxTokens ?? undefined,
|
|
114
123
|
signal: options?.signal,
|
|
115
124
|
headers: mergedHeaders,
|
|
116
125
|
sessionId: options?.sessionId,
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
* non-strict defaults (e.g. `stream_options.include_obfuscation`) — does not
|
|
8
8
|
* trip 400s on shapes we simply ignore.
|
|
9
9
|
*/
|
|
10
|
+
|
|
11
|
+
import { z } from "zod/v4";
|
|
10
12
|
import type {
|
|
11
13
|
ChatCompletionContentPart,
|
|
12
14
|
ChatCompletionCreateParams,
|
|
@@ -14,8 +16,7 @@ import type {
|
|
|
14
16
|
ChatCompletionMessageToolCall,
|
|
15
17
|
ChatCompletionTool,
|
|
16
18
|
ChatCompletionToolChoiceOption,
|
|
17
|
-
} from "openai
|
|
18
|
-
import * as z from "zod/v4";
|
|
19
|
+
} from "./openai-chat-wire";
|
|
19
20
|
|
|
20
21
|
// ─── User-message content parts ─────────────────────────────────────────────
|
|
21
22
|
|
|
@@ -27,7 +28,7 @@ export const textPartSchema = z.object({
|
|
|
27
28
|
/**
|
|
28
29
|
* OpenAI documents `image_url` as either `{ url: string, detail?: ... }` or —
|
|
29
30
|
* older clients — a bare string. Accept both shapes; downstream we extract a
|
|
30
|
-
* URL. `detail` is accepted for forward-compat but currently dropped (
|
|
31
|
+
* URL. `detail` is accepted for forward-compat but currently dropped (Prometheus AI's
|
|
31
32
|
* `ImageContent` has no detail field — TODO: plumb through if/when added).
|
|
32
33
|
*/
|
|
33
34
|
export const imagePartSchema = z.object({
|
|
@@ -145,6 +146,11 @@ export const assistantMessageSchema = z.object({
|
|
|
145
146
|
role: z.literal("assistant"),
|
|
146
147
|
content: baseContent.optional(),
|
|
147
148
|
tool_calls: z.array(toolCallSchema).optional(),
|
|
149
|
+
// DeepSeek-style reasoning channel. The gateway emits it on the way out
|
|
150
|
+
// (encodeResponse/encodeStream); accept it back so thinking-mode
|
|
151
|
+
// continuations replay the model's actual reasoning instead of a
|
|
152
|
+
// synthesized placeholder.
|
|
153
|
+
reasoning_content: z.string().nullish(),
|
|
148
154
|
});
|
|
149
155
|
|
|
150
156
|
export const toolMessageSchema = z.object({
|