@prometheus-ai/ai 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -17,6 +17,7 @@ import * as os from "node:os";
|
|
|
17
17
|
import * as path from "node:path";
|
|
18
18
|
import { $envpos, isEnoent, logger } from "@prometheus-ai/utils";
|
|
19
19
|
import type { FetchImpl } from "../types";
|
|
20
|
+
import { raceWithSignal } from "../utils/abort";
|
|
20
21
|
|
|
21
22
|
const OAUTH_TOKEN_URL = "https://oauth2.googleapis.com/token";
|
|
22
23
|
const METADATA_TOKEN_URL = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token";
|
|
@@ -42,7 +43,14 @@ interface AuthorizedUserCredentials {
|
|
|
42
43
|
refresh_token: string;
|
|
43
44
|
}
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
interface ImpersonatedServiceAccountCredentials {
|
|
47
|
+
type: "impersonated_service_account";
|
|
48
|
+
service_account_impersonation_url: string;
|
|
49
|
+
source_credentials: AuthorizedUserCredentials | ServiceAccountCredentials;
|
|
50
|
+
delegates?: string[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
type AdcFileCredentials = ServiceAccountCredentials | AuthorizedUserCredentials | ImpersonatedServiceAccountCredentials;
|
|
46
54
|
|
|
47
55
|
interface TokenResponse {
|
|
48
56
|
access_token: string;
|
|
@@ -196,10 +204,52 @@ async function resolveAccessTokenUncached(
|
|
|
196
204
|
): Promise<{ source: string; token: TokenResponse }> {
|
|
197
205
|
const adc = await loadAdcCredentials();
|
|
198
206
|
if (adc) {
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
207
|
+
const creds = adc.creds;
|
|
208
|
+
let token: TokenResponse;
|
|
209
|
+
|
|
210
|
+
if (creds.type === "impersonated_service_account") {
|
|
211
|
+
const targetPrincipalMatch = /(?<target>[^/]+):(generateAccessToken|generateIdToken)$/.exec(
|
|
212
|
+
creds.service_account_impersonation_url,
|
|
213
|
+
);
|
|
214
|
+
const targetPrincipal = targetPrincipalMatch?.groups?.target;
|
|
215
|
+
if (!targetPrincipal) {
|
|
216
|
+
throw new RangeError(`Cannot extract target principal from ${creds.service_account_impersonation_url}`);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const sourceToken =
|
|
220
|
+
creds.source_credentials.type === "service_account"
|
|
221
|
+
? await exchangeJwtForToken(creds.source_credentials, signal, fetchImpl)
|
|
222
|
+
: await exchangeRefreshToken(creds.source_credentials, signal, fetchImpl);
|
|
223
|
+
|
|
224
|
+
const response = await fetchImpl(
|
|
225
|
+
`https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/${targetPrincipal}:generateAccessToken`,
|
|
226
|
+
{
|
|
227
|
+
method: "POST",
|
|
228
|
+
headers: {
|
|
229
|
+
"Content-Type": "application/json",
|
|
230
|
+
Authorization: `Bearer ${sourceToken.access_token}`,
|
|
231
|
+
},
|
|
232
|
+
body: JSON.stringify({
|
|
233
|
+
delegates: creds.delegates ?? [],
|
|
234
|
+
scope: [CLOUD_PLATFORM_SCOPE],
|
|
235
|
+
lifetime: "3600s",
|
|
236
|
+
}),
|
|
237
|
+
signal,
|
|
238
|
+
},
|
|
239
|
+
);
|
|
240
|
+
if (!response.ok) {
|
|
241
|
+
const detail = await response.text().catch(() => "");
|
|
242
|
+
throw new Error(`Google Impersonation token exchange failed (${response.status}): ${detail}`);
|
|
243
|
+
}
|
|
244
|
+
const data = (await response.json()) as { accessToken: string; expireTime: string };
|
|
245
|
+
const expiresIn = Math.max(0, Math.floor((new Date(data.expireTime).getTime() - Date.now()) / 1000));
|
|
246
|
+
token = { access_token: data.accessToken, expires_in: expiresIn, token_type: "Bearer" };
|
|
247
|
+
} else {
|
|
248
|
+
token =
|
|
249
|
+
creds.type === "service_account"
|
|
250
|
+
? await exchangeJwtForToken(creds, signal, fetchImpl)
|
|
251
|
+
: await exchangeRefreshToken(creds, signal, fetchImpl);
|
|
252
|
+
}
|
|
203
253
|
return { source: adc.source, token };
|
|
204
254
|
}
|
|
205
255
|
const metadata = await fetchMetadataToken(signal, fetchImpl);
|
|
@@ -209,6 +259,13 @@ async function resolveAccessTokenUncached(
|
|
|
209
259
|
);
|
|
210
260
|
}
|
|
211
261
|
|
|
262
|
+
/**
|
|
263
|
+
* Bound for the detached (signal-free) shared token resolution: a hung OAuth
|
|
264
|
+
* exchange or metadata fetch must not pin the inflight slot forever — every
|
|
265
|
+
* later call would await the stuck promise until process restart.
|
|
266
|
+
*/
|
|
267
|
+
const SHARED_TOKEN_RESOLVE_TIMEOUT_MS = 30_000;
|
|
268
|
+
|
|
212
269
|
/**
|
|
213
270
|
* Returns a Bearer access token suitable for the `Authorization` header on Vertex AI calls.
|
|
214
271
|
* The token is cached in module scope and refreshed `GOOGLE_VERTEX_REFRESH_SKEW_MS` ms before it expires.
|
|
@@ -228,11 +285,17 @@ export async function getVertexAccessToken(options?: { signal?: AbortSignal; fet
|
|
|
228
285
|
|
|
229
286
|
const cacheKey = "vertex-adc";
|
|
230
287
|
const existing = inflight.get(cacheKey);
|
|
231
|
-
if (existing) return existing;
|
|
288
|
+
if (existing) return raceWithSignal(existing, options?.signal);
|
|
232
289
|
|
|
290
|
+
// Deliberately resolve without any caller's signal: the in-flight promise is shared
|
|
291
|
+
// by every concurrent caller, so aborting one request must not fail the whole batch.
|
|
292
|
+
// Each caller races its own signal against the shared promise instead.
|
|
233
293
|
const promise = (async () => {
|
|
234
294
|
try {
|
|
235
|
-
const { source, token } = await resolveAccessTokenUncached(
|
|
295
|
+
const { source, token } = await resolveAccessTokenUncached(
|
|
296
|
+
AbortSignal.timeout(SHARED_TOKEN_RESOLVE_TIMEOUT_MS),
|
|
297
|
+
fetchImpl,
|
|
298
|
+
);
|
|
236
299
|
const expiresAtMs = Date.now() + Math.max(0, token.expires_in * 1000);
|
|
237
300
|
tokenCache.set(source, { token: token.access_token, expiresAtMs });
|
|
238
301
|
logger.debug("vertex.adc acquired access token", { source, expiresInSec: token.expires_in });
|
|
@@ -242,7 +305,7 @@ export async function getVertexAccessToken(options?: { signal?: AbortSignal; fet
|
|
|
242
305
|
}
|
|
243
306
|
})();
|
|
244
307
|
inflight.set(cacheKey, promise);
|
|
245
|
-
return promise;
|
|
308
|
+
return raceWithSignal(promise, options?.signal);
|
|
246
309
|
}
|
|
247
310
|
|
|
248
311
|
/** Test seam: clears every cached token. */
|
|
@@ -5,8 +5,15 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { createHash, randomBytes, randomUUID } from "node:crypto";
|
|
7
7
|
import { scheduler } from "node:timers/promises";
|
|
8
|
+
import { calculateCost } from "@prometheus-ai/catalog/models";
|
|
9
|
+
import {
|
|
10
|
+
ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
|
|
11
|
+
ANTIGRAVITY_SYSTEM_INSTRUCTION,
|
|
12
|
+
getAntigravityUserAgent,
|
|
13
|
+
getGeminiCliHeaders,
|
|
14
|
+
} from "@prometheus-ai/catalog/wire/gemini-headers";
|
|
8
15
|
import { extractHttpStatusFromError, fetchWithRetry, readSseJson } from "@prometheus-ai/utils";
|
|
9
|
-
import {
|
|
16
|
+
import { ProviderHttpError } from "../errors";
|
|
10
17
|
import type {
|
|
11
18
|
Api,
|
|
12
19
|
AssistantMessage,
|
|
@@ -20,17 +27,20 @@ import type {
|
|
|
20
27
|
} from "../types";
|
|
21
28
|
import { normalizeSystemPrompts } from "../utils";
|
|
22
29
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
23
|
-
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump
|
|
30
|
+
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
31
|
+
import { getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
|
|
24
32
|
// Refresh is the sole responsibility of AuthStorage (broker-aware, single-flighted);
|
|
25
33
|
// the stream provider trusts the access token threaded through `options.apiKey`.
|
|
26
34
|
import { normalizeSchemaForCCA } from "../utils/schema";
|
|
27
|
-
import { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders } from "./google-gemini-headers";
|
|
28
35
|
import type { Content, FunctionCallingConfigMode, ThinkingConfig } from "./google-shared";
|
|
29
36
|
import {
|
|
30
37
|
convertMessages,
|
|
31
38
|
convertTools,
|
|
39
|
+
EMPTY_STREAM_BASE_DELAY_MS,
|
|
32
40
|
type GoogleThinkingLevel,
|
|
41
|
+
hasMeaningfulGoogleContent,
|
|
33
42
|
isThinkingPart,
|
|
43
|
+
MAX_EMPTY_STREAM_RETRIES,
|
|
34
44
|
mapStopReasonString,
|
|
35
45
|
mapToolChoice,
|
|
36
46
|
nextToolCallId,
|
|
@@ -46,6 +56,11 @@ import {
|
|
|
46
56
|
*/
|
|
47
57
|
export type { GoogleThinkingLevel };
|
|
48
58
|
|
|
59
|
+
/** Non-2xx response (or in-stream error chunk) from the Cloud Code Assist API. */
|
|
60
|
+
export class GeminiCliApiError extends ProviderHttpError {
|
|
61
|
+
override readonly name = "GeminiCliApiError";
|
|
62
|
+
}
|
|
63
|
+
|
|
49
64
|
export interface GoogleGeminiCliOptions extends StreamOptions {
|
|
50
65
|
/**
|
|
51
66
|
* Tool selection mode. String forms map directly to Gemini
|
|
@@ -66,7 +81,19 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
|
|
|
66
81
|
budgetTokens?: number;
|
|
67
82
|
/** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */
|
|
68
83
|
level?: GoogleThinkingLevel;
|
|
84
|
+
/**
|
|
85
|
+
* Explicit wire suppression when `enabled` is false. Cloud Code Assist
|
|
86
|
+
* re-applies the per-id baked server default when thinkingConfig is
|
|
87
|
+
* omitted, so models with `thinking.suppressWhenOff` must send
|
|
88
|
+
* `includeThoughts: false` plus a MINIMAL level (or zero budget).
|
|
89
|
+
*/
|
|
90
|
+
suppress?: { level: GoogleThinkingLevel } | { budget: number };
|
|
69
91
|
};
|
|
92
|
+
/**
|
|
93
|
+
* Upstream wire model id override for collapsed effort-tier variants.
|
|
94
|
+
* Serialized as `requestModelId ?? model.requestModelId ?? model.id`.
|
|
95
|
+
*/
|
|
96
|
+
requestModelId?: string;
|
|
70
97
|
projectId?: string;
|
|
71
98
|
}
|
|
72
99
|
|
|
@@ -76,17 +103,16 @@ const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googlea
|
|
|
76
103
|
const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
|
|
77
104
|
|
|
78
105
|
export {
|
|
106
|
+
ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
|
|
79
107
|
ANTIGRAVITY_SYSTEM_INSTRUCTION,
|
|
80
108
|
getAntigravityUserAgent,
|
|
81
109
|
getGeminiCliHeaders,
|
|
82
110
|
getGeminiCliUserAgent,
|
|
83
|
-
} from "
|
|
111
|
+
} from "@prometheus-ai/catalog/wire/gemini-headers";
|
|
84
112
|
|
|
85
113
|
// Retry configuration
|
|
86
114
|
const MAX_RETRIES = 3;
|
|
87
115
|
const BASE_DELAY_MS = 1000;
|
|
88
|
-
const MAX_EMPTY_STREAM_RETRIES = 2;
|
|
89
|
-
const EMPTY_STREAM_BASE_DELAY_MS = 500;
|
|
90
116
|
const RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
|
|
91
117
|
const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
|
|
92
118
|
const GOOGLE_GEMINI_REFRESH_SKEW_MS = 60_000;
|
|
@@ -253,7 +279,10 @@ interface CloudCodeAssistResponseChunk {
|
|
|
253
279
|
};
|
|
254
280
|
modelVersion?: string;
|
|
255
281
|
responseId?: string;
|
|
282
|
+
promptFeedback?: { blockReason?: string; blockReasonMessage?: string };
|
|
256
283
|
};
|
|
284
|
+
/** In-band stream failure (quota, internal error) delivered as a final JSON event. */
|
|
285
|
+
error?: { code?: number; message?: string; status?: string };
|
|
257
286
|
traceId?: string;
|
|
258
287
|
}
|
|
259
288
|
|
|
@@ -339,29 +368,48 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
339
368
|
headers: requestHeaders,
|
|
340
369
|
};
|
|
341
370
|
|
|
371
|
+
// Direct callers that skip `register-builtins` (which installs the
|
|
372
|
+
// iterator-level watchdog) need a pre-response timer alongside
|
|
373
|
+
// `timeout: false`; otherwise a stalled Cloud Code Assist proxy
|
|
374
|
+
// would hang forever. Floor matches the lazy wrapper's 5min default.
|
|
375
|
+
const firstEventTimeoutMs =
|
|
376
|
+
options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(undefined, 300_000);
|
|
377
|
+
const preResponseWatchdog =
|
|
378
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
|
|
379
|
+
? AbortSignal.timeout(firstEventTimeoutMs)
|
|
380
|
+
: undefined;
|
|
381
|
+
const callerSignal = options?.signal;
|
|
382
|
+
const fetchSignal = preResponseWatchdog
|
|
383
|
+
? callerSignal
|
|
384
|
+
? AbortSignal.any([callerSignal, preResponseWatchdog])
|
|
385
|
+
: preResponseWatchdog
|
|
386
|
+
: callerSignal;
|
|
342
387
|
const response = await fetchWithRetry(
|
|
343
388
|
attempt => `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`,
|
|
344
389
|
{
|
|
345
390
|
method: "POST",
|
|
346
391
|
headers: requestHeaders,
|
|
347
392
|
body: requestBodyJson,
|
|
348
|
-
signal:
|
|
393
|
+
signal: fetchSignal,
|
|
349
394
|
maxAttempts: MAX_RETRIES + 1,
|
|
350
395
|
defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
|
|
351
396
|
maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
|
|
352
397
|
fetch: options?.fetch,
|
|
398
|
+
timeout: false,
|
|
353
399
|
},
|
|
354
400
|
);
|
|
355
401
|
if (!response.ok) {
|
|
356
402
|
const errorText = await response.text();
|
|
357
|
-
throw
|
|
358
|
-
|
|
403
|
+
throw new GeminiCliApiError(
|
|
404
|
+
`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`,
|
|
359
405
|
response.status,
|
|
406
|
+
{ headers: response.headers },
|
|
360
407
|
);
|
|
361
408
|
}
|
|
362
409
|
const requestUrl = response.url;
|
|
363
410
|
|
|
364
411
|
let started = false;
|
|
412
|
+
let sawFinishReason = false;
|
|
365
413
|
const ensureStarted = () => {
|
|
366
414
|
if (!started) {
|
|
367
415
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
@@ -383,7 +431,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
383
431
|
output.stopReason = "stop";
|
|
384
432
|
output.errorMessage = undefined;
|
|
385
433
|
output.timestamp = Date.now();
|
|
386
|
-
|
|
434
|
+
sawFinishReason = false;
|
|
387
435
|
};
|
|
388
436
|
|
|
389
437
|
const streamResponse = async (activeResponse: Response): Promise<boolean> => {
|
|
@@ -391,7 +439,6 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
391
439
|
throw new Error("No response body");
|
|
392
440
|
}
|
|
393
441
|
|
|
394
|
-
let hasContent = false;
|
|
395
442
|
let currentBlock: TextContent | ThinkingContent | null = null;
|
|
396
443
|
const blocks = output.content;
|
|
397
444
|
const blockIndex = () => blocks.length - 1;
|
|
@@ -401,14 +448,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
401
448
|
options?.signal,
|
|
402
449
|
event => options?.onSseEvent?.({ event: event.event, data: event.data, raw: [...event.raw] }, model),
|
|
403
450
|
)) {
|
|
451
|
+
if (chunk.error) {
|
|
452
|
+
const detail = chunk.error.message || chunk.error.status || "unknown error";
|
|
453
|
+
const message = `Cloud Code Assist stream error: ${detail}`;
|
|
454
|
+
throw typeof chunk.error.code === "number" && chunk.error.code >= 400
|
|
455
|
+
? new GeminiCliApiError(message, chunk.error.code)
|
|
456
|
+
: new Error(message);
|
|
457
|
+
}
|
|
404
458
|
const responseData = chunk.response;
|
|
405
459
|
if (!responseData) continue;
|
|
460
|
+
if (!responseData.candidates?.length && responseData.promptFeedback?.blockReason) {
|
|
461
|
+
const detail = responseData.promptFeedback.blockReasonMessage;
|
|
462
|
+
throw new Error(
|
|
463
|
+
`Request blocked by Google (${responseData.promptFeedback.blockReason})${detail ? `: ${detail}` : ""}`,
|
|
464
|
+
);
|
|
465
|
+
}
|
|
406
466
|
|
|
407
467
|
const candidate = responseData.candidates?.[0];
|
|
408
468
|
if (candidate?.content?.parts) {
|
|
409
469
|
for (const part of candidate.content.parts) {
|
|
410
|
-
if (part.text !== undefined) {
|
|
411
|
-
hasContent = true;
|
|
470
|
+
if (part.text !== undefined && part.text !== "") {
|
|
412
471
|
const isThinking = isThinkingPart(part);
|
|
413
472
|
if (
|
|
414
473
|
!currentBlock ||
|
|
@@ -445,10 +504,21 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
445
504
|
partial: output,
|
|
446
505
|
});
|
|
447
506
|
}
|
|
507
|
+
} else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
|
|
508
|
+
if (currentBlock.type === "thinking") {
|
|
509
|
+
currentBlock.thinkingSignature = retainThoughtSignature(
|
|
510
|
+
currentBlock.thinkingSignature,
|
|
511
|
+
part.thoughtSignature,
|
|
512
|
+
);
|
|
513
|
+
} else {
|
|
514
|
+
currentBlock.textSignature = retainThoughtSignature(
|
|
515
|
+
currentBlock.textSignature,
|
|
516
|
+
part.thoughtSignature,
|
|
517
|
+
);
|
|
518
|
+
}
|
|
448
519
|
}
|
|
449
520
|
|
|
450
521
|
if (part.functionCall) {
|
|
451
|
-
hasContent = true;
|
|
452
522
|
if (currentBlock) {
|
|
453
523
|
pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
|
|
454
524
|
currentBlock = null;
|
|
@@ -463,7 +533,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
463
533
|
type: "toolCall",
|
|
464
534
|
id: toolCallId,
|
|
465
535
|
name: part.functionCall.name || "",
|
|
466
|
-
arguments: part.functionCall.args as Record<string, unknown>,
|
|
536
|
+
arguments: (part.functionCall.args ?? {}) as Record<string, unknown>,
|
|
467
537
|
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
468
538
|
};
|
|
469
539
|
|
|
@@ -475,9 +545,17 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
475
545
|
}
|
|
476
546
|
|
|
477
547
|
if (candidate?.finishReason) {
|
|
478
|
-
|
|
479
|
-
|
|
548
|
+
sawFinishReason = true;
|
|
549
|
+
const mapped = mapStopReasonString(candidate.finishReason);
|
|
550
|
+
// Only let a trailing tool call upgrade benign finishes; error finishes
|
|
551
|
+
// (SAFETY, MALFORMED_FUNCTION_CALL, ...) must surface even with tool calls present.
|
|
552
|
+
if ((mapped === "stop" || mapped === "length") && output.content.some(b => b.type === "toolCall")) {
|
|
480
553
|
output.stopReason = "toolUse";
|
|
554
|
+
} else {
|
|
555
|
+
output.stopReason = mapped;
|
|
556
|
+
if (mapped === "error") {
|
|
557
|
+
output.errorMessage = `Generation failed with finish reason: ${candidate.finishReason}`;
|
|
558
|
+
}
|
|
481
559
|
}
|
|
482
560
|
}
|
|
483
561
|
|
|
@@ -509,7 +587,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
509
587
|
pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
|
|
510
588
|
}
|
|
511
589
|
|
|
512
|
-
return
|
|
590
|
+
return hasMeaningfulGoogleContent(output);
|
|
513
591
|
};
|
|
514
592
|
|
|
515
593
|
let receivedContent = false;
|
|
@@ -542,9 +620,10 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
542
620
|
|
|
543
621
|
if (!currentResponse.ok) {
|
|
544
622
|
const retryErrorText = await currentResponse.text();
|
|
545
|
-
throw
|
|
546
|
-
|
|
623
|
+
throw new GeminiCliApiError(
|
|
624
|
+
`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`,
|
|
547
625
|
currentResponse.status,
|
|
626
|
+
{ headers: currentResponse.headers },
|
|
548
627
|
);
|
|
549
628
|
}
|
|
550
629
|
}
|
|
@@ -568,6 +647,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
568
647
|
throw new Error("Request was aborted");
|
|
569
648
|
}
|
|
570
649
|
|
|
650
|
+
if (!sawFinishReason) {
|
|
651
|
+
throw new Error(
|
|
652
|
+
"Cloud Code Assist stream ended without a finish reason (connection dropped or response truncated)",
|
|
653
|
+
);
|
|
654
|
+
}
|
|
655
|
+
|
|
571
656
|
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
|
572
657
|
throw new Error(output.errorMessage ?? "An unknown error occurred");
|
|
573
658
|
}
|
|
@@ -726,6 +811,17 @@ export function buildRequest(
|
|
|
726
811
|
} else if (options.thinking.budgetTokens !== undefined) {
|
|
727
812
|
generationConfig.thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
|
|
728
813
|
}
|
|
814
|
+
} else if (options.thinking?.suppress && model.reasoning) {
|
|
815
|
+
// Explicit off: omitting thinkingConfig re-applies the per-id baked
|
|
816
|
+
// server default (the model silently thinks and bills the tokens).
|
|
817
|
+
const suppress = options.thinking.suppress;
|
|
818
|
+
generationConfig.thinkingConfig = { includeThoughts: false };
|
|
819
|
+
if ("level" in suppress) {
|
|
820
|
+
// Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values
|
|
821
|
+
generationConfig.thinkingConfig.thinkingLevel = suppress.level as any;
|
|
822
|
+
} else {
|
|
823
|
+
generationConfig.thinkingConfig.thinkingBudget = suppress.budget;
|
|
824
|
+
}
|
|
729
825
|
}
|
|
730
826
|
|
|
731
827
|
const request: CloudCodeAssistRequest["request"] = {
|
|
@@ -785,10 +881,10 @@ export function buildRequest(
|
|
|
785
881
|
if (isAntigravity && shouldInjectAntigravitySystemInstruction(model.id)) {
|
|
786
882
|
const existingParts = request.systemInstruction?.parts ?? [];
|
|
787
883
|
request.systemInstruction = {
|
|
788
|
-
role: "user",
|
|
789
884
|
parts: [
|
|
790
885
|
{ text: ANTIGRAVITY_SYSTEM_INSTRUCTION },
|
|
791
886
|
{ text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },
|
|
887
|
+
{ text: ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION },
|
|
792
888
|
...existingParts,
|
|
793
889
|
],
|
|
794
890
|
};
|
|
@@ -796,7 +892,7 @@ export function buildRequest(
|
|
|
796
892
|
|
|
797
893
|
return {
|
|
798
894
|
project: projectId,
|
|
799
|
-
model: model.id,
|
|
895
|
+
model: options.requestModelId ?? model.requestModelId ?? model.id,
|
|
800
896
|
request,
|
|
801
897
|
...(isAntigravity
|
|
802
898
|
? {
|