@prometheus-ai/ai 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -2,6 +2,11 @@ import * as nodeCrypto from "node:crypto";
|
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
3
|
import { scheduler } from "node:timers/promises";
|
|
4
4
|
import * as tls from "node:tls";
|
|
5
|
+
import { isOfficialAnthropicApiUrl } from "@prometheus-ai/catalog/compat/anthropic";
|
|
6
|
+
import { mapEffortToAnthropicAdaptiveEffort } from "@prometheus-ai/catalog/model-thinking";
|
|
7
|
+
import { calculateCost } from "@prometheus-ai/catalog/models";
|
|
8
|
+
import { isAnthropicOAuthToken } from "@prometheus-ai/catalog/utils";
|
|
9
|
+
import { parseGitHubCopilotApiKey } from "@prometheus-ai/catalog/wire/github-copilot";
|
|
5
10
|
import {
|
|
6
11
|
$env,
|
|
7
12
|
extractHttpStatusFromError,
|
|
@@ -12,13 +17,7 @@ import {
|
|
|
12
17
|
logger,
|
|
13
18
|
readSseEvents,
|
|
14
19
|
} from "@prometheus-ai/utils";
|
|
15
|
-
import {
|
|
16
|
-
disablesParallelToolUse,
|
|
17
|
-
hasOpus47ApiRestrictions,
|
|
18
|
-
mapEffortToAnthropicAdaptiveEffort,
|
|
19
|
-
supportsMidConversationSystemMessages,
|
|
20
|
-
} from "../model-thinking";
|
|
21
|
-
import { calculateCost } from "../models";
|
|
20
|
+
import { isUsageLimitError } from "../rate-limit-utils";
|
|
22
21
|
import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
|
|
23
22
|
import type {
|
|
24
23
|
Api,
|
|
@@ -30,6 +29,7 @@ import type {
|
|
|
30
29
|
Message,
|
|
31
30
|
Model,
|
|
32
31
|
ProviderSessionState,
|
|
32
|
+
RawSseEvent,
|
|
33
33
|
RedactedThinkingContent,
|
|
34
34
|
ServiceTier,
|
|
35
35
|
SimpleStreamOptions,
|
|
@@ -44,31 +44,27 @@ import type {
|
|
|
44
44
|
Usage,
|
|
45
45
|
} from "../types";
|
|
46
46
|
import { resolveServiceTier } from "../types";
|
|
47
|
-
import {
|
|
48
|
-
isAnthropicOAuthToken,
|
|
49
|
-
isRecord,
|
|
50
|
-
normalizeSystemPrompts,
|
|
51
|
-
normalizeToolCallId,
|
|
52
|
-
resolveCacheRetention,
|
|
53
|
-
} from "../utils";
|
|
47
|
+
import { isRecord, normalizeSystemPrompts, normalizeToolCallId, resolveCacheRetention } from "../utils";
|
|
54
48
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
55
49
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
56
50
|
import { isFoundryEnabled } from "../utils/foundry";
|
|
57
51
|
import { finalizeErrorMessage, type RawHttpRequestDump, rewriteCopilotError } from "../utils/http-inspector";
|
|
58
52
|
import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTimeout } from "../utils/idle-iterator";
|
|
59
|
-
import {
|
|
60
|
-
import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
53
|
+
import { parseStreamingJsonThrottled } from "../utils/json-parse";
|
|
61
54
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
62
55
|
import { isCopilotTransientModelError } from "../utils/retry";
|
|
63
56
|
import { COMBINATOR_KEYS, NO_STRICT, toolWireSchema } from "../utils/schema";
|
|
64
57
|
import { spillToDescription } from "../utils/schema/spill";
|
|
65
58
|
import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
|
|
66
|
-
import { notifyRawSseEvent
|
|
59
|
+
import { notifyRawSseEvent } from "../utils/sse-debug";
|
|
67
60
|
import {
|
|
61
|
+
AnthropicApiError,
|
|
68
62
|
AnthropicConnectionTimeoutError,
|
|
69
63
|
type AnthropicFetchOptions,
|
|
70
64
|
AnthropicMessagesClient,
|
|
71
65
|
type AnthropicMessagesClientLike,
|
|
66
|
+
calculateAnthropicRetryDelayMs,
|
|
67
|
+
retryDelayFromHeaders,
|
|
72
68
|
} from "./anthropic-client";
|
|
73
69
|
import type {
|
|
74
70
|
ToolInputSchema as AnthropicToolInputSchema,
|
|
@@ -122,6 +118,7 @@ export function buildBetaHeader(baseBetas: readonly string[], extraBetas: readon
|
|
|
122
118
|
return result.join(",");
|
|
123
119
|
}
|
|
124
120
|
|
|
121
|
+
const midConversationSystemBeta = "mid-conversation-system-2026-04-07";
|
|
125
122
|
const claudeCodeUtilityBetaDefaults = [
|
|
126
123
|
"oauth-2025-04-20",
|
|
127
124
|
"interleaved-thinking-2025-05-14",
|
|
@@ -135,7 +132,7 @@ const claudeCodeAgentBetaDefaults = [
|
|
|
135
132
|
"interleaved-thinking-2025-05-14",
|
|
136
133
|
"context-management-2025-06-27",
|
|
137
134
|
"prompt-caching-scope-2026-01-05",
|
|
138
|
-
|
|
135
|
+
midConversationSystemBeta,
|
|
139
136
|
"advanced-tool-use-2025-11-20",
|
|
140
137
|
] as const;
|
|
141
138
|
const claudeCodeAgentPostEffortBetas = ["extended-cache-ttl-2025-04-11"] as const;
|
|
@@ -181,105 +178,125 @@ function isClaudeCodeClientUserAgent(userAgent: string | undefined): userAgent i
|
|
|
181
178
|
return userAgent.toLowerCase().startsWith("claude-cli");
|
|
182
179
|
}
|
|
183
180
|
|
|
184
|
-
export function isAnthropicApiBaseUrl(baseUrl?: string): boolean {
|
|
185
|
-
if (!baseUrl) return true;
|
|
186
|
-
try {
|
|
187
|
-
const url = new URL(baseUrl);
|
|
188
|
-
return url.protocol.toLowerCase() === "https:" && url.hostname.toLowerCase() === "api.anthropic.com";
|
|
189
|
-
} catch {
|
|
190
|
-
return false;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
181
|
const sharedHeaders = {
|
|
195
182
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
|
196
183
|
Connection: "keep-alive",
|
|
197
184
|
"Content-Type": "application/json",
|
|
198
|
-
"
|
|
199
|
-
"
|
|
200
|
-
"
|
|
185
|
+
"anthropic-version": "2023-06-01",
|
|
186
|
+
"anthropic-dangerous-direct-browser-access": "true",
|
|
187
|
+
"x-app": "cli",
|
|
201
188
|
};
|
|
202
189
|
|
|
203
190
|
export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<string, string> {
|
|
204
191
|
const oauthToken = options.isOAuth ?? isAnthropicOAuthToken(options.apiKey);
|
|
205
192
|
const extraBetas = options.extraBetas ?? [];
|
|
206
193
|
const stream = options.stream ?? false;
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
194
|
+
// `enforcedHeaderKeys` strips User-Agent out of modelHeaders so a spread can't
|
|
195
|
+
// produce case-duplicate keys; re-add the caller's value explicitly per branch
|
|
196
|
+
// (OAuth replaces non-claude-cli values, the other branches forward verbatim).
|
|
197
|
+
const incomingUserAgent = getHeaderCaseInsensitive(options.modelHeaders, "User-Agent");
|
|
198
|
+
// Claude Code betas (oauth-2025-04-20, claude-code-20250219, …) are part of
|
|
199
|
+
// the OAuth fingerprint; API-key requests default to extras only, matching
|
|
200
|
+
// the streaming path (buildAnthropicClientOptions passes [] for non-OAuth).
|
|
201
|
+
const betaHeader = buildBetaHeader(
|
|
202
|
+
options.claudeCodeBetas ?? (oauthToken ? buildClaudeCodeBetas(true, true, false) : []),
|
|
203
|
+
extraBetas,
|
|
211
204
|
);
|
|
205
|
+
const acceptHeader = oauthToken ? "application/json" : stream ? "text/event-stream" : "application/json";
|
|
206
|
+
const modelHeaders: Record<string, string> = {};
|
|
207
|
+
const filteredEnforcedKeys: string[] = [];
|
|
208
|
+
for (const [key, value] of Object.entries(options.modelHeaders ?? {})) {
|
|
209
|
+
const lowerKey = key.toLowerCase();
|
|
210
|
+
if (enforcedHeaderKeys.has(lowerKey)) {
|
|
211
|
+
// User-Agent is filtered only to dedup the spread; every branch re-adds
|
|
212
|
+
// the caller's value explicitly, so it is not "ignored".
|
|
213
|
+
if (lowerKey !== "user-agent") filteredEnforcedKeys.push(key);
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
modelHeaders[key] = value;
|
|
217
|
+
}
|
|
218
|
+
if (filteredEnforcedKeys.length > 0) {
|
|
219
|
+
// Caller/env-supplied values (options.headers, ANTHROPIC_CUSTOM_HEADERS)
|
|
220
|
+
// for enforced headers are replaced by our own values; say so instead of
|
|
221
|
+
// dropping them silently. Keys only — values may carry credentials.
|
|
222
|
+
logger.debug("anthropic: ignoring caller-supplied enforced headers", {
|
|
223
|
+
headers: filteredEnforcedKeys,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
212
226
|
|
|
213
227
|
if (options.isCloudflareAiGateway) {
|
|
214
228
|
return {
|
|
215
229
|
...modelHeaders,
|
|
216
230
|
Accept: acceptHeader,
|
|
217
231
|
...sharedHeaders,
|
|
218
|
-
"
|
|
232
|
+
...(incomingUserAgent ? { "User-Agent": incomingUserAgent } : {}),
|
|
233
|
+
...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
|
|
219
234
|
"cf-aig-authorization": `Bearer ${options.apiKey}`,
|
|
220
235
|
};
|
|
221
236
|
}
|
|
222
237
|
|
|
223
238
|
if (oauthToken) {
|
|
224
|
-
const incomingUserAgent = getHeaderCaseInsensitive(options.modelHeaders, "User-Agent");
|
|
225
239
|
const userAgent = isClaudeCodeClientUserAgent(incomingUserAgent)
|
|
226
240
|
? incomingUserAgent
|
|
227
|
-
: `claude-cli/${claudeCodeVersion} (external,
|
|
241
|
+
: `claude-cli/${claudeCodeVersion} (external, local-agent, agent-sdk/${claudeAgentSdkVersion})`;
|
|
228
242
|
return {
|
|
229
243
|
...modelHeaders,
|
|
230
244
|
...claudeCodeHeaders,
|
|
231
245
|
Accept: acceptHeader,
|
|
232
246
|
Authorization: `Bearer ${options.apiKey}`,
|
|
233
247
|
...sharedHeaders,
|
|
234
|
-
"
|
|
248
|
+
...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
|
|
235
249
|
...(options.claudeCodeSessionId ? { "X-Claude-Code-Session-Id": options.claudeCodeSessionId } : {}),
|
|
236
250
|
"x-client-request-id": nodeCrypto.randomUUID(),
|
|
237
251
|
"User-Agent": userAgent,
|
|
238
252
|
};
|
|
239
|
-
} else if (!
|
|
253
|
+
} else if (!isOfficialAnthropicApiUrl(options.baseUrl)) {
|
|
240
254
|
return {
|
|
241
255
|
...modelHeaders,
|
|
242
256
|
Accept: acceptHeader,
|
|
243
257
|
Authorization: `Bearer ${options.apiKey}`,
|
|
244
258
|
...sharedHeaders,
|
|
245
|
-
"
|
|
259
|
+
...(incomingUserAgent ? { "User-Agent": incomingUserAgent } : {}),
|
|
260
|
+
...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
|
|
246
261
|
};
|
|
247
262
|
} else {
|
|
248
263
|
return {
|
|
249
264
|
...modelHeaders,
|
|
250
265
|
Accept: acceptHeader,
|
|
251
266
|
...sharedHeaders,
|
|
252
|
-
"
|
|
267
|
+
...(incomingUserAgent ? { "User-Agent": incomingUserAgent } : {}),
|
|
268
|
+
...(betaHeader ? { "anthropic-beta": betaHeader } : {}),
|
|
253
269
|
"X-Api-Key": options.apiKey,
|
|
254
270
|
};
|
|
255
271
|
}
|
|
256
272
|
}
|
|
257
273
|
|
|
258
274
|
type AnthropicCacheControl = NonNullable<TextBlockParam["cache_control"]>;
|
|
275
|
+
type AnthropicImageMediaType = "image/jpeg" | "image/png" | "image/gif" | "image/webp";
|
|
259
276
|
|
|
260
|
-
|
|
277
|
+
function normalizeAnthropicImageMediaType(mimeType: string): AnthropicImageMediaType | undefined {
|
|
278
|
+
const normalized = mimeType.trim().toLowerCase();
|
|
279
|
+
if (normalized === "image/jpg") return "image/jpeg";
|
|
280
|
+
if (
|
|
281
|
+
normalized === "image/jpeg" ||
|
|
282
|
+
normalized === "image/png" ||
|
|
283
|
+
normalized === "image/gif" ||
|
|
284
|
+
normalized === "image/webp"
|
|
285
|
+
) {
|
|
286
|
+
return normalized;
|
|
287
|
+
}
|
|
288
|
+
return undefined;
|
|
289
|
+
}
|
|
261
290
|
|
|
262
|
-
function
|
|
263
|
-
|
|
264
|
-
params.output_config = outputConfig;
|
|
265
|
-
return outputConfig;
|
|
291
|
+
function cloneAnthropicCacheControl(cacheControl: AnthropicCacheControl): AnthropicCacheControl {
|
|
292
|
+
return { ...cacheControl };
|
|
266
293
|
}
|
|
267
294
|
|
|
295
|
+
type AnthropicOutputConfig = NonNullable<MessageCreateParamsStreaming["output_config"]>;
|
|
296
|
+
|
|
268
297
|
const ANTHROPIC_STOP_SEQUENCES_MAX = 4;
|
|
269
298
|
let warnedStopSequencesTrim = false;
|
|
270
299
|
|
|
271
|
-
/**
|
|
272
|
-
* Adaptive thinking `display` is supported starting with Claude Opus 4.7.
|
|
273
|
-
* Older adaptive-thinking models (Opus 4.6, Sonnet 4.6+) reject the field.
|
|
274
|
-
*/
|
|
275
|
-
function supportsAdaptiveThinkingDisplay(modelId: string): boolean {
|
|
276
|
-
const match = /claude-opus-(\d+)-(\d+)/.exec(modelId);
|
|
277
|
-
if (!match) return false;
|
|
278
|
-
const major = Number(match[1]);
|
|
279
|
-
const minor = Number(match[2]);
|
|
280
|
-
return major > 4 || (major === 4 && minor >= 7);
|
|
281
|
-
}
|
|
282
|
-
|
|
283
300
|
const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
|
|
284
301
|
|
|
285
302
|
type AnthropicProviderSessionState = ProviderSessionState & {
|
|
@@ -299,16 +316,29 @@ function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
|
|
|
299
316
|
return state;
|
|
300
317
|
}
|
|
301
318
|
|
|
319
|
+
/**
|
|
320
|
+
* Key the sticky strict-tools / fast-mode learning per endpoint+model. A
|
|
321
|
+
* grammar-too-large 400 or a fast-mode rejection is specific to the model (its
|
|
322
|
+
* tool grammar / entitlement) and the endpoint (direct Anthropic vs a gateway /
|
|
323
|
+
* Foundry / Bedrock proxy), so it MUST NOT bleed onto unrelated anthropic-messages
|
|
324
|
+
* requests in the same session. NUL separates the two components so neither can
|
|
325
|
+
* forge the boundary.
|
|
326
|
+
*/
|
|
327
|
+
function anthropicProviderSessionStateKey(baseUrl: string, modelId: string): string {
|
|
328
|
+
return `${ANTHROPIC_PROVIDER_SESSION_STATE_KEY}:${baseUrl}\u0000${modelId}`;
|
|
329
|
+
}
|
|
330
|
+
|
|
302
331
|
function getAnthropicProviderSessionState(
|
|
303
332
|
providerSessionState: Map<string, ProviderSessionState> | undefined,
|
|
333
|
+
baseUrl: string,
|
|
334
|
+
modelId: string,
|
|
304
335
|
): AnthropicProviderSessionState | undefined {
|
|
305
336
|
if (!providerSessionState) return undefined;
|
|
306
|
-
const
|
|
307
|
-
|
|
308
|
-
| undefined;
|
|
337
|
+
const key = anthropicProviderSessionStateKey(baseUrl, modelId);
|
|
338
|
+
const existing = providerSessionState.get(key) as AnthropicProviderSessionState | undefined;
|
|
309
339
|
if (existing) return existing;
|
|
310
340
|
const created = createAnthropicProviderSessionState();
|
|
311
|
-
providerSessionState.set(
|
|
341
|
+
providerSessionState.set(key, created);
|
|
312
342
|
return created;
|
|
313
343
|
}
|
|
314
344
|
|
|
@@ -323,10 +353,14 @@ export function clearAnthropicFastModeFallback(
|
|
|
323
353
|
providerSessionState: Map<string, ProviderSessionState> | undefined,
|
|
324
354
|
): void {
|
|
325
355
|
if (!providerSessionState) return;
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
356
|
+
// Fast mode is re-armed session-wide (user toggled `/fast on`), so clear the
|
|
357
|
+
// sticky flag on every per-endpoint/model Anthropic entry — plus the legacy
|
|
358
|
+
// unscoped key — rather than a single shared object.
|
|
359
|
+
const prefix = `${ANTHROPIC_PROVIDER_SESSION_STATE_KEY}:`;
|
|
360
|
+
for (const [key, value] of providerSessionState) {
|
|
361
|
+
if (key !== ANTHROPIC_PROVIDER_SESSION_STATE_KEY && !key.startsWith(prefix)) continue;
|
|
362
|
+
(value as AnthropicProviderSessionState).fastModeDisabled = false;
|
|
363
|
+
}
|
|
330
364
|
}
|
|
331
365
|
|
|
332
366
|
function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
|
|
@@ -378,7 +412,6 @@ function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
|
|
|
378
412
|
|
|
379
413
|
function getCacheControl(
|
|
380
414
|
model: Model<"anthropic-messages">,
|
|
381
|
-
baseUrl: string,
|
|
382
415
|
cacheRetention: CacheRetention | undefined,
|
|
383
416
|
isOAuthToken: boolean,
|
|
384
417
|
): { retention: CacheRetention; cacheControl?: AnthropicCacheControl } {
|
|
@@ -386,10 +419,7 @@ function getCacheControl(
|
|
|
386
419
|
if (retention === "none") {
|
|
387
420
|
return { retention };
|
|
388
421
|
}
|
|
389
|
-
const ttl =
|
|
390
|
-
retention === "long" && isAnthropicApiBaseUrl(baseUrl) && getAnthropicCompat(model).supportsLongCacheRetention
|
|
391
|
-
? "1h"
|
|
392
|
-
: undefined;
|
|
422
|
+
const ttl = retention === "long" && model.compat.supportsLongCacheRetention ? "1h" : undefined;
|
|
393
423
|
return {
|
|
394
424
|
retention,
|
|
395
425
|
cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
|
|
@@ -397,9 +427,15 @@ function getCacheControl(
|
|
|
397
427
|
}
|
|
398
428
|
|
|
399
429
|
// Stealth mode: mimic Claude Code's request fingerprint.
|
|
400
|
-
export const claudeCodeVersion = "2.1.
|
|
401
|
-
export const
|
|
402
|
-
export const
|
|
430
|
+
export const claudeCodeVersion = "2.1.165";
|
|
431
|
+
export const claudeAgentSdkVersion = "0.3.165";
|
|
432
|
+
export const claudeClientVersion = "1.11187.4";
|
|
433
|
+
export const claudeToolPrefix: string = "_";
|
|
434
|
+
export const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
|
|
435
|
+
// Claude Code caps requested output at 64k tokens even when the model ceiling is
|
|
436
|
+
// higher (e.g. Opus 4.8 supports 128k); OAuth requests clamp to match the wire
|
|
437
|
+
// fingerprint. API-key requests keep the full model ceiling.
|
|
438
|
+
export const CLAUDE_CODE_MAX_OUTPUT_TOKENS = 64000;
|
|
403
439
|
|
|
404
440
|
export function mapStainlessOs(platform: string): "MacOS" | "Windows" | "Linux" | "FreeBSD" | `Other::${string}` {
|
|
405
441
|
switch (platform.toLowerCase()) {
|
|
@@ -442,7 +478,9 @@ export const claudeCodeHeaders = {
|
|
|
442
478
|
"X-Stainless-Lang": "js",
|
|
443
479
|
"X-Stainless-Arch": mapStainlessArch(process.arch),
|
|
444
480
|
"X-Stainless-OS": mapStainlessOs(process.platform),
|
|
445
|
-
"X-Stainless-Timeout": "
|
|
481
|
+
"X-Stainless-Timeout": "900",
|
|
482
|
+
"anthropic-client-platform": "desktop_app",
|
|
483
|
+
"anthropic-client-version": claudeClientVersion,
|
|
446
484
|
};
|
|
447
485
|
|
|
448
486
|
const enforcedHeaderKeys = new Set(
|
|
@@ -452,11 +490,11 @@ const enforcedHeaderKeys = new Set(
|
|
|
452
490
|
"Accept-Encoding",
|
|
453
491
|
"Connection",
|
|
454
492
|
"Content-Type",
|
|
455
|
-
"
|
|
456
|
-
"
|
|
457
|
-
"
|
|
493
|
+
"anthropic-version",
|
|
494
|
+
"anthropic-dangerous-direct-browser-access",
|
|
495
|
+
"anthropic-beta",
|
|
458
496
|
"User-Agent",
|
|
459
|
-
"
|
|
497
|
+
"x-app",
|
|
460
498
|
"Authorization",
|
|
461
499
|
"X-Api-Key",
|
|
462
500
|
"X-Claude-Code-Session-Id",
|
|
@@ -479,7 +517,7 @@ function createClaudeBillingHeader(firstUserMessageText: string): string {
|
|
|
479
517
|
.slice(0, 3);
|
|
480
518
|
// cch=00000: placeholder replaced with the real attestation hash by wrapFetchForCch
|
|
481
519
|
// before the request hits the wire (see below).
|
|
482
|
-
return `${CLAUDE_BILLING_HEADER_PREFIX} cc_version=${claudeCodeVersion}.${versionSuffix}; cc_entrypoint=
|
|
520
|
+
return `${CLAUDE_BILLING_HEADER_PREFIX} cc_version=${claudeCodeVersion}.${versionSuffix}; cc_entrypoint=local-agent; ${CCH_PLACEHOLDER_STR};`;
|
|
483
521
|
}
|
|
484
522
|
|
|
485
523
|
// cch attestation: XXHash64(body_with_placeholder, seed) low-20-bits, 5 hex chars.
|
|
@@ -497,47 +535,49 @@ const CCH_PLACEHOLDER = cchEncoder.encode(CCH_PLACEHOLDER_STR);
|
|
|
497
535
|
const BILLING_SYSTEM_MARKER = cchEncoder.encode(`"system":[{"type":"text","text":"${CLAUDE_BILLING_HEADER_PREFIX}`);
|
|
498
536
|
const CCH_BILLING_SEARCH_WINDOW = 150;
|
|
499
537
|
|
|
500
|
-
function patchCch(body: Uint8Array):
|
|
538
|
+
function patchCch(body: Uint8Array): "patched" | "no-billing-header" | "unanchored" {
|
|
539
|
+
// Zero-copy Buffer view over the same memory; its `indexOf` is a native memmem,
|
|
540
|
+
// ~7.5x faster than a hand-rolled byte loop here — the marker sits ~99% through
|
|
541
|
+
// the body because `messages` serializes before `system`, so a JS scan would
|
|
542
|
+
// walk almost the entire payload (benchmarked: 563µs -> 75µs on a 1MB body).
|
|
543
|
+
const view = Buffer.from(body.buffer, body.byteOffset, body.byteLength);
|
|
544
|
+
|
|
501
545
|
// Find the combined system[0] + billing-header prefix marker.
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
for (let j = 0; j < BILLING_SYSTEM_MARKER.length; j++) {
|
|
505
|
-
if (body[i + j] !== BILLING_SYSTEM_MARKER[j]) continue outer;
|
|
506
|
-
}
|
|
507
|
-
markerIdx = i;
|
|
508
|
-
break;
|
|
509
|
-
}
|
|
510
|
-
if (markerIdx === -1) return body; // no CC billing header injected
|
|
546
|
+
const markerIdx = view.indexOf(BILLING_SYSTEM_MARKER);
|
|
547
|
+
if (markerIdx === -1) return "no-billing-header"; // no CC billing header injected
|
|
511
548
|
|
|
512
|
-
//
|
|
549
|
+
// Placeholder must sit within CCH_BILLING_SEARCH_WINDOW bytes after the marker.
|
|
513
550
|
const searchFrom = markerIdx + BILLING_SYSTEM_MARKER.length;
|
|
514
|
-
const
|
|
515
|
-
|
|
516
|
-
outer2: for (let i = searchFrom; i <= searchTo; i++) {
|
|
517
|
-
for (let j = 0; j < CCH_PLACEHOLDER.length; j++) {
|
|
518
|
-
if (body[i + j] !== CCH_PLACEHOLDER[j]) continue outer2;
|
|
519
|
-
}
|
|
520
|
-
idx = i;
|
|
521
|
-
break;
|
|
522
|
-
}
|
|
523
|
-
if (idx === -1) return body; // placeholder not within the billing header value
|
|
551
|
+
const idx = view.indexOf(CCH_PLACEHOLDER, searchFrom);
|
|
552
|
+
if (idx === -1 || idx - searchFrom > CCH_BILLING_SEARCH_WINDOW) return "unanchored";
|
|
524
553
|
|
|
525
554
|
// Hash the body with the placeholder in place (matches CC's in-place behaviour).
|
|
526
555
|
const h = Bun.hash.xxHash64(body, CCH_SEED);
|
|
527
556
|
const cch = (h & 0xfffffn).toString(16).padStart(5, "0");
|
|
528
557
|
|
|
529
558
|
for (let i = 0; i < 5; i++) body[idx + 4 + i] = cch.charCodeAt(i);
|
|
530
|
-
return
|
|
559
|
+
return "patched";
|
|
531
560
|
}
|
|
532
561
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
562
|
+
/**
|
|
563
|
+
* Wraps a fetch implementation to patch the Claude Code billing-header `cch`
|
|
564
|
+
* attestation into outgoing request bodies. Bodies without the placeholder
|
|
565
|
+
* pass through untouched, so installing it on every OAuth flow is safe.
|
|
566
|
+
*/
|
|
567
|
+
export function wrapFetchForCch(base: FetchImpl): FetchImpl {
|
|
536
568
|
return (input, init) => {
|
|
537
569
|
if (init?.body && typeof init.body === "string" && init.body.includes(CCH_PLACEHOLDER_STR)) {
|
|
538
570
|
const encoded = cchEncoder.encode(init.body);
|
|
539
|
-
|
|
540
|
-
|
|
571
|
+
if (patchCch(encoded) === "unanchored") {
|
|
572
|
+
// The OAuth billing placeholder is anchored to system[0] but we couldn't
|
|
573
|
+
// patch it — e.g. an `onPayload` hook reordered the first system block's keys
|
|
574
|
+
// so BILLING_SYSTEM_MARKER no longer matches. Send the body as-is (cch stays
|
|
575
|
+
// `00000`, the prior behaviour) rather than failing the request, but surface the
|
|
576
|
+
// fingerprint regression instead of letting it ship silently. A `cch=00000`
|
|
577
|
+
// literal in user content alone ("no-billing-header") is not a regression.
|
|
578
|
+
logger.warn("anthropic: cch billing placeholder present but not patched; sending unattested request");
|
|
579
|
+
}
|
|
580
|
+
return base(input, { ...init, body: encoded });
|
|
541
581
|
}
|
|
542
582
|
return base(input, init);
|
|
543
583
|
};
|
|
@@ -594,20 +634,65 @@ export function generateClaudeCloakingUserId(): string {
|
|
|
594
634
|
return `user_${userHash}_account_${accountId}_session_${sessionId}`;
|
|
595
635
|
}
|
|
596
636
|
|
|
597
|
-
|
|
598
|
-
|
|
637
|
+
const CLAUDE_DEVICE_ID_INSTALL_HASH_DOMAIN = "prometheus-claude-device-id-v1:";
|
|
638
|
+
const CLAUDE_DEVICE_ID_ACCOUNT_HASH_DOMAIN = "prometheus-claude-device-id-v2";
|
|
639
|
+
|
|
640
|
+
export function deriveClaudeDeviceId(installId: string, accountId?: string): string {
|
|
641
|
+
const hash = nodeCrypto.createHash("sha256");
|
|
642
|
+
if (accountId && accountId.length > 0) {
|
|
643
|
+
return hash
|
|
644
|
+
.update(CLAUDE_DEVICE_ID_ACCOUNT_HASH_DOMAIN)
|
|
645
|
+
.update("\0")
|
|
646
|
+
.update(installId)
|
|
647
|
+
.update("\0")
|
|
648
|
+
.update(accountId)
|
|
649
|
+
.digest("hex");
|
|
650
|
+
}
|
|
651
|
+
return hash.update(CLAUDE_DEVICE_ID_INSTALL_HASH_DOMAIN).update(installId).digest("hex");
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
function readMetadataString(metadata: Record<string, unknown> | undefined, key: string): string | undefined {
|
|
655
|
+
const value = metadata?.[key];
|
|
656
|
+
return typeof value === "string" && value.length > 0 ? value : undefined;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
function readAnthropicMetadataAccountId(metadata: Record<string, unknown> | undefined): string | undefined {
|
|
660
|
+
return (
|
|
661
|
+
readMetadataString(metadata, "account_uuid") ??
|
|
662
|
+
readMetadataString(metadata, "accountId") ??
|
|
663
|
+
readMetadataString(metadata, "account_id")
|
|
664
|
+
);
|
|
599
665
|
}
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
666
|
+
|
|
667
|
+
function deriveClaudeDeviceIdFromInstallId(accountId?: string): string {
|
|
668
|
+
return deriveClaudeDeviceId(getInstallId(), accountId);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
function generateClaudeJsonUserId(sessionId?: string, accountId?: string): string {
|
|
672
|
+
const userId: Record<string, string> = {
|
|
673
|
+
device_id: deriveClaudeDeviceIdFromInstallId(accountId),
|
|
603
674
|
session_id: sessionId ?? nodeCrypto.randomUUID().toLowerCase(),
|
|
604
|
-
}
|
|
675
|
+
};
|
|
676
|
+
if (accountId && accountId.length > 0) userId.account_uuid = accountId;
|
|
677
|
+
return JSON.stringify(userId);
|
|
605
678
|
}
|
|
606
679
|
|
|
607
|
-
|
|
680
|
+
/**
|
|
681
|
+
* Resolve the `metadata.user_id` field for an Anthropic Messages request.
|
|
682
|
+
*
|
|
683
|
+
* For API-key tokens, an explicit caller-supplied `userId` is forwarded
|
|
684
|
+
* verbatim and `undefined` yields no metadata. For OAuth tokens the value
|
|
685
|
+
* must match the Claude Code attribution shape (`isClaudeCloakingUserId` or
|
|
686
|
+
* the `{session_id, account_uuid?, device_id?}` JSON envelope) — anything
|
|
687
|
+
* else is dropped and a fresh Claude-Code-style JSON id is generated from
|
|
688
|
+
* `sessionId`/`accountId` so attribution stays consistent across the main
|
|
689
|
+
* streaming path and provider-specific request builders (e.g. web search).
|
|
690
|
+
*/
|
|
691
|
+
export function resolveAnthropicMetadataUserId(
|
|
608
692
|
userId: unknown,
|
|
609
693
|
isOAuthToken: boolean,
|
|
610
694
|
sessionId?: string,
|
|
695
|
+
accountId?: string,
|
|
611
696
|
): string | undefined {
|
|
612
697
|
if (typeof userId === "string") {
|
|
613
698
|
if (!isOAuthToken || isClaudeCloakingUserId(userId) || isClaudeJsonUserId(userId)) {
|
|
@@ -616,22 +701,24 @@ function resolveAnthropicMetadataUserId(
|
|
|
616
701
|
}
|
|
617
702
|
|
|
618
703
|
if (!isOAuthToken) return undefined;
|
|
619
|
-
return generateClaudeJsonUserId(sessionId);
|
|
704
|
+
return generateClaudeJsonUserId(sessionId, accountId);
|
|
620
705
|
}
|
|
621
706
|
const ANTHROPIC_BUILTIN_TOOL_NAMES = new Set(["web_search", "code_execution", "text_editor", "computer"]);
|
|
622
|
-
export const applyClaudeToolPrefix = (name: string
|
|
623
|
-
if (!
|
|
707
|
+
export const applyClaudeToolPrefix = (name: string): string => {
|
|
708
|
+
if (!claudeToolPrefix) return name;
|
|
624
709
|
if (ANTHROPIC_BUILTIN_TOOL_NAMES.has(name.toLowerCase())) return name;
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
710
|
+
// Always prepend (no "already prefixed" short-circuit): the prefix is a wire
|
|
711
|
+
// transport detail applied once to internal tool names, and `stripClaudeToolPrefix`
|
|
712
|
+
// removes exactly one prefix on receive. Skipping names that already start with the
|
|
713
|
+
// prefix would make a tool literally named `_foo` lose its leading underscore on the
|
|
714
|
+
// return trip (`_foo` → wire `_foo` → strip → `foo`), so the agent loop can't find it.
|
|
715
|
+
return `${claudeToolPrefix}${name}`;
|
|
628
716
|
};
|
|
629
717
|
|
|
630
|
-
export const stripClaudeToolPrefix = (name: string
|
|
631
|
-
if (!
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
return name.slice(prefixOverride.length);
|
|
718
|
+
export const stripClaudeToolPrefix = (name: string): string => {
|
|
719
|
+
if (!claudeToolPrefix) return name;
|
|
720
|
+
if (!name.toLowerCase().startsWith(claudeToolPrefix.toLowerCase())) return name;
|
|
721
|
+
return name.slice(claudeToolPrefix.length);
|
|
635
722
|
};
|
|
636
723
|
|
|
637
724
|
const ANTHROPIC_MANY_IMAGE_THRESHOLD = 20;
|
|
@@ -649,6 +736,46 @@ function countAnthropicImageBlocks(messages: Message[]): number {
|
|
|
649
736
|
return count;
|
|
650
737
|
}
|
|
651
738
|
|
|
739
|
+
const ANTHROPIC_IMAGE_RESIZE_CONCURRENCY = 4;
|
|
740
|
+
|
|
741
|
+
/**
|
|
742
|
+
* Memoized resize results keyed on ImageContent identity. Callers keep message
|
|
743
|
+
* objects stable across turns, so without this every request (and every
|
|
744
|
+
* in-provider retry of a fresh turn) re-decodes and re-encodes the same
|
|
745
|
+
* oversized screenshots. A cached value identical to the key means "already
|
|
746
|
+
* within bounds / unresizable — skip the decode".
|
|
747
|
+
*/
|
|
748
|
+
const anthropicManyImageResizeCache = new WeakMap<ImageContent, ImageContent>();
|
|
749
|
+
|
|
750
|
+
type ResizeLimiter = <R>(fn: () => Promise<R>) => Promise<R>;
|
|
751
|
+
|
|
752
|
+
/**
|
|
753
|
+
* Bounded-concurrency gate for image decode/encode work. The many-image path
|
|
754
|
+
* fans out over every block of every message; unbounded, 100+ oversized images
|
|
755
|
+
* would decode concurrently (two encode pipelines each) and spike memory by
|
|
756
|
+
* gigabytes. Slots are handed off directly to the next waiter on release.
|
|
757
|
+
*/
|
|
758
|
+
function createResizeLimiter(limit: number): ResizeLimiter {
|
|
759
|
+
let active = 0;
|
|
760
|
+
const queue: (() => void)[] = [];
|
|
761
|
+
return async fn => {
|
|
762
|
+
if (active >= limit) {
|
|
763
|
+
const { promise, resolve } = Promise.withResolvers<void>();
|
|
764
|
+
queue.push(resolve);
|
|
765
|
+
await promise;
|
|
766
|
+
} else {
|
|
767
|
+
active++;
|
|
768
|
+
}
|
|
769
|
+
try {
|
|
770
|
+
return await fn();
|
|
771
|
+
} finally {
|
|
772
|
+
const next = queue.shift();
|
|
773
|
+
if (next) next();
|
|
774
|
+
else active--;
|
|
775
|
+
}
|
|
776
|
+
};
|
|
777
|
+
}
|
|
778
|
+
|
|
652
779
|
async function resizeAnthropicManyImageBlock(block: ImageContent): Promise<ImageContent> {
|
|
653
780
|
try {
|
|
654
781
|
const inputBuffer = Buffer.from(block.data, "base64");
|
|
@@ -684,12 +811,17 @@ async function resizeAnthropicManyImageBlock(block: ImageContent): Promise<Image
|
|
|
684
811
|
async function resizeAnthropicManyImageContent(
|
|
685
812
|
content: (TextContent | ImageContent)[],
|
|
686
813
|
state: { resized: number },
|
|
814
|
+
limit: ResizeLimiter,
|
|
687
815
|
): Promise<(TextContent | ImageContent)[]> {
|
|
688
816
|
let changed = false;
|
|
689
817
|
const next = await Promise.all(
|
|
690
818
|
content.map(async block => {
|
|
691
819
|
if (block.type !== "image") return block;
|
|
692
|
-
|
|
820
|
+
let resized = anthropicManyImageResizeCache.get(block);
|
|
821
|
+
if (resized === undefined) {
|
|
822
|
+
resized = await limit(() => resizeAnthropicManyImageBlock(block));
|
|
823
|
+
anthropicManyImageResizeCache.set(block, resized);
|
|
824
|
+
}
|
|
693
825
|
if (resized !== block) {
|
|
694
826
|
changed = true;
|
|
695
827
|
state.resized++;
|
|
@@ -700,14 +832,18 @@ async function resizeAnthropicManyImageContent(
|
|
|
700
832
|
return changed ? next : content;
|
|
701
833
|
}
|
|
702
834
|
|
|
703
|
-
async function resizeAnthropicManyImageMessage(
|
|
835
|
+
async function resizeAnthropicManyImageMessage(
|
|
836
|
+
message: Message,
|
|
837
|
+
state: { resized: number },
|
|
838
|
+
limit: ResizeLimiter,
|
|
839
|
+
): Promise<Message> {
|
|
704
840
|
if (message.role === "user" || message.role === "developer") {
|
|
705
841
|
if (!Array.isArray(message.content)) return message;
|
|
706
|
-
const content = await resizeAnthropicManyImageContent(message.content, state);
|
|
842
|
+
const content = await resizeAnthropicManyImageContent(message.content, state, limit);
|
|
707
843
|
return content === message.content ? message : { ...message, content };
|
|
708
844
|
}
|
|
709
845
|
if (message.role === "toolResult") {
|
|
710
|
-
const content = await resizeAnthropicManyImageContent(message.content, state);
|
|
846
|
+
const content = await resizeAnthropicManyImageContent(message.content, state, limit);
|
|
711
847
|
return content === message.content ? message : { ...message, content };
|
|
712
848
|
}
|
|
713
849
|
return message;
|
|
@@ -720,9 +856,10 @@ async function prepareAnthropicManyImageContext(context: Context, supportsImages
|
|
|
720
856
|
|
|
721
857
|
let changed = false;
|
|
722
858
|
const state = { resized: 0 };
|
|
859
|
+
const limit = createResizeLimiter(ANTHROPIC_IMAGE_RESIZE_CONCURRENCY);
|
|
723
860
|
const messages = await Promise.all(
|
|
724
861
|
context.messages.map(async message => {
|
|
725
|
-
const next = await resizeAnthropicManyImageMessage(message, state);
|
|
862
|
+
const next = await resizeAnthropicManyImageMessage(message, state, limit);
|
|
726
863
|
if (next !== message) changed = true;
|
|
727
864
|
return next;
|
|
728
865
|
}),
|
|
@@ -736,13 +873,7 @@ async function prepareAnthropicManyImageContext(context: Context, supportsImages
|
|
|
736
873
|
return { ...context, messages };
|
|
737
874
|
}
|
|
738
875
|
|
|
739
|
-
|
|
740
|
-
* Convert content blocks to Anthropic API format
|
|
741
|
-
*/
|
|
742
|
-
function convertContentBlocks(
|
|
743
|
-
content: (TextContent | ImageContent)[],
|
|
744
|
-
supportsImages = true,
|
|
745
|
-
):
|
|
876
|
+
type AnthropicToolResultContent =
|
|
746
877
|
| string
|
|
747
878
|
| Array<
|
|
748
879
|
| { type: "text"; text: string }
|
|
@@ -750,42 +881,75 @@ function convertContentBlocks(
|
|
|
750
881
|
type: "image";
|
|
751
882
|
source: {
|
|
752
883
|
type: "base64";
|
|
753
|
-
media_type:
|
|
884
|
+
media_type: AnthropicImageMediaType;
|
|
754
885
|
data: string;
|
|
755
886
|
};
|
|
756
887
|
}
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
888
|
+
>;
|
|
889
|
+
|
|
890
|
+
/**
|
|
891
|
+
* Convert content blocks to Anthropic API format
|
|
892
|
+
*/
|
|
893
|
+
function convertContentBlocks(
|
|
894
|
+
content: (TextContent | ImageContent)[],
|
|
895
|
+
supportsImages = true,
|
|
896
|
+
): AnthropicToolResultContent {
|
|
897
|
+
const blocks: Array<
|
|
898
|
+
| { type: "text"; text: string }
|
|
899
|
+
| {
|
|
900
|
+
type: "image";
|
|
901
|
+
source: {
|
|
902
|
+
type: "base64";
|
|
903
|
+
media_type: AnthropicImageMediaType;
|
|
904
|
+
data: string;
|
|
905
|
+
};
|
|
906
|
+
}
|
|
907
|
+
> = [];
|
|
908
|
+
let sawText = false;
|
|
909
|
+
let sawImage = false;
|
|
910
|
+
|
|
911
|
+
for (const block of content) {
|
|
912
|
+
if (block.type === "text") {
|
|
913
|
+
const text = block.text.toWellFormed();
|
|
914
|
+
if (text.trim().length === 0) continue;
|
|
915
|
+
sawText = true;
|
|
916
|
+
blocks.push({ type: "text", text });
|
|
917
|
+
continue;
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
if (!supportsImages) {
|
|
921
|
+
blocks.push({ type: "text", text: NON_VISION_IMAGE_PLACEHOLDER });
|
|
922
|
+
continue;
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
const mediaType = normalizeAnthropicImageMediaType(block.mimeType);
|
|
926
|
+
if (!mediaType) {
|
|
927
|
+
blocks.push({ type: "text", text: `[unsupported image: ${block.mimeType}]` });
|
|
928
|
+
continue;
|
|
767
929
|
}
|
|
768
|
-
return textBlocks.join("\n").toWellFormed();
|
|
769
|
-
}
|
|
770
930
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
type: "
|
|
774
|
-
text,
|
|
775
|
-
})),
|
|
776
|
-
...imageBlocks.map(block => ({
|
|
777
|
-
type: "image" as const,
|
|
931
|
+
sawImage = true;
|
|
932
|
+
blocks.push({
|
|
933
|
+
type: "image",
|
|
778
934
|
source: {
|
|
779
|
-
type: "base64"
|
|
780
|
-
media_type:
|
|
935
|
+
type: "base64",
|
|
936
|
+
media_type: mediaType,
|
|
781
937
|
data: block.data,
|
|
782
938
|
},
|
|
783
|
-
})
|
|
784
|
-
|
|
939
|
+
});
|
|
940
|
+
}
|
|
785
941
|
|
|
786
|
-
if (!
|
|
942
|
+
if (!supportsImages) {
|
|
943
|
+
return blocks
|
|
944
|
+
.filter((block): block is { type: "text"; text: string } => block.type === "text")
|
|
945
|
+
.map(block => block.text)
|
|
946
|
+
.join("\n")
|
|
947
|
+
.toWellFormed();
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
if (sawImage && !sawText) {
|
|
787
951
|
blocks.unshift({
|
|
788
|
-
type: "text"
|
|
952
|
+
type: "text",
|
|
789
953
|
text: "(see attached image)",
|
|
790
954
|
});
|
|
791
955
|
}
|
|
@@ -799,17 +963,23 @@ export type AnthropicThinkingDisplay = "summarized" | "omitted";
|
|
|
799
963
|
export interface AnthropicOptions extends StreamOptions {
|
|
800
964
|
/**
|
|
801
965
|
* Enable extended thinking.
|
|
802
|
-
* For Opus 4.6
|
|
803
|
-
*
|
|
966
|
+
* For adaptive-capable models (Opus 4.6+, Sonnet 4.6+, Fable/Mythos 5):
|
|
967
|
+
* uses adaptive thinking (Claude decides when/how much to think). For older
|
|
968
|
+
* models: uses budget-based thinking with thinkingBudgetTokens.
|
|
804
969
|
*/
|
|
805
970
|
thinkingEnabled?: boolean;
|
|
806
971
|
/**
|
|
807
972
|
* Token budget for extended thinking (older models only).
|
|
808
|
-
* Ignored for
|
|
973
|
+
* Ignored for adaptive-capable models.
|
|
809
974
|
*/
|
|
810
975
|
thinkingBudgetTokens?: number;
|
|
811
976
|
/**
|
|
812
|
-
*
|
|
977
|
+
* Upstream wire model id override for collapsed effort-tier variants.
|
|
978
|
+
* Serialized as `requestModelId ?? model.requestModelId ?? model.id`.
|
|
979
|
+
*/
|
|
980
|
+
requestModelId?: string;
|
|
981
|
+
/**
|
|
982
|
+
* Effort level for adaptive thinking.
|
|
813
983
|
* Controls how much thinking Claude allocates:
|
|
814
984
|
* - "max": Always thinks with no constraints
|
|
815
985
|
* - "high": Always thinks, deep reasoning (default)
|
|
@@ -864,7 +1034,6 @@ export type AnthropicClientOptionsArgs = {
|
|
|
864
1034
|
hasTools?: boolean;
|
|
865
1035
|
thinkingEnabled?: boolean;
|
|
866
1036
|
thinkingDisplay?: AnthropicThinkingDisplay;
|
|
867
|
-
onSseEvent?: AnthropicOptions["onSseEvent"];
|
|
868
1037
|
fetch?: FetchImpl;
|
|
869
1038
|
claudeCodeSessionId?: string;
|
|
870
1039
|
};
|
|
@@ -888,6 +1057,32 @@ type FoundryTlsOptions = {
|
|
|
888
1057
|
key?: string;
|
|
889
1058
|
};
|
|
890
1059
|
|
|
1060
|
+
const foundryTlsOptionsCache = new Map<string, FoundryTlsOptions | undefined>();
|
|
1061
|
+
|
|
1062
|
+
function foundryTlsCacheKeyComponent(value: string | undefined): string | null {
|
|
1063
|
+
if (!value) return null;
|
|
1064
|
+
const trimmed = value.trim();
|
|
1065
|
+
// For path-valued vars, fold the file mtime into the key so on-disk cert
|
|
1066
|
+
// rotation (common for short-lived corporate mTLS certs) invalidates the
|
|
1067
|
+
// cached TLS options instead of pinning the first read forever.
|
|
1068
|
+
if (trimmed && !trimmed.includes("-----BEGIN") && looksLikeFilePath(trimmed)) {
|
|
1069
|
+
try {
|
|
1070
|
+
return `${trimmed}@${fs.statSync(trimmed).mtimeMs}`;
|
|
1071
|
+
} catch {
|
|
1072
|
+
return trimmed;
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
return value;
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
function foundryTlsOptionsCacheKey(): string {
|
|
1079
|
+
return JSON.stringify([
|
|
1080
|
+
foundryTlsCacheKeyComponent($env.NODE_EXTRA_CA_CERTS),
|
|
1081
|
+
foundryTlsCacheKeyComponent($env.CLAUDE_CODE_CLIENT_CERT),
|
|
1082
|
+
foundryTlsCacheKeyComponent($env.CLAUDE_CODE_CLIENT_KEY),
|
|
1083
|
+
]);
|
|
1084
|
+
}
|
|
1085
|
+
|
|
891
1086
|
function resolveAnthropicBaseUrl(model: Model<"anthropic-messages">, apiKey?: string): string | undefined {
|
|
892
1087
|
if (model.provider === "github-copilot") {
|
|
893
1088
|
return normalizeAnthropicBaseUrl(resolveGitHubCopilotBaseUrl(model.baseUrl, apiKey) ?? model.baseUrl);
|
|
@@ -936,7 +1131,7 @@ function parseAnthropicCustomHeaders(rawHeaders: string | undefined): Record<str
|
|
|
936
1131
|
export function resolveAnthropicCustomHeadersForBaseUrl(
|
|
937
1132
|
baseUrl: string | undefined,
|
|
938
1133
|
): Record<string, string> | undefined {
|
|
939
|
-
if (!isFoundryEnabled() &&
|
|
1134
|
+
if (!isFoundryEnabled() && isOfficialAnthropicApiUrl(baseUrl)) return undefined;
|
|
940
1135
|
return parseAnthropicCustomHeaders($env.ANTHROPIC_CUSTOM_HEADERS);
|
|
941
1136
|
}
|
|
942
1137
|
|
|
@@ -976,6 +1171,9 @@ function resolveFoundryTlsOptions(model: Model<"anthropic-messages">): FoundryTl
|
|
|
976
1171
|
if (model.provider !== "anthropic") return undefined;
|
|
977
1172
|
if (!isFoundryEnabled()) return undefined;
|
|
978
1173
|
|
|
1174
|
+
const cacheKey = foundryTlsOptionsCacheKey();
|
|
1175
|
+
if (foundryTlsOptionsCache.has(cacheKey)) return foundryTlsOptionsCache.get(cacheKey);
|
|
1176
|
+
|
|
979
1177
|
const ca = resolvePemValue($env.NODE_EXTRA_CA_CERTS, "NODE_EXTRA_CA_CERTS");
|
|
980
1178
|
const cert = resolvePemValue($env.CLAUDE_CODE_CLIENT_CERT, "CLAUDE_CODE_CLIENT_CERT");
|
|
981
1179
|
const key = resolvePemValue($env.CLAUDE_CODE_CLIENT_KEY, "CLAUDE_CODE_CLIENT_KEY");
|
|
@@ -988,7 +1186,9 @@ function resolveFoundryTlsOptions(model: Model<"anthropic-messages">): FoundryTl
|
|
|
988
1186
|
if (ca) options.ca = [...tls.rootCertificates, ca];
|
|
989
1187
|
if (cert) options.cert = cert;
|
|
990
1188
|
if (key) options.key = key;
|
|
991
|
-
|
|
1189
|
+
const resolved = Object.keys(options).length > 0 ? options : undefined;
|
|
1190
|
+
foundryTlsOptionsCache.set(cacheKey, resolved);
|
|
1191
|
+
return resolved;
|
|
992
1192
|
}
|
|
993
1193
|
|
|
994
1194
|
function buildClaudeCodeTlsFetchOptions(
|
|
@@ -1019,10 +1219,19 @@ function buildClaudeCodeTlsFetchOptions(
|
|
|
1019
1219
|
};
|
|
1020
1220
|
}
|
|
1021
1221
|
function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {
|
|
1222
|
+
// Case-insensitive merge: later sources win and keep their casing. A plain
|
|
1223
|
+
// Object.assign would let `authorization` and `Authorization` coexist, and
|
|
1224
|
+
// the Headers constructor then joins both values comma-separated on the wire.
|
|
1022
1225
|
const merged: Record<string, string> = {};
|
|
1226
|
+
const keyByLower = new Map<string, string>();
|
|
1023
1227
|
for (const headers of headerSources) {
|
|
1024
|
-
if (headers)
|
|
1025
|
-
|
|
1228
|
+
if (!headers) continue;
|
|
1229
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
1230
|
+
const lower = key.toLowerCase();
|
|
1231
|
+
const existing = keyByLower.get(lower);
|
|
1232
|
+
if (existing !== undefined && existing !== key) delete merged[existing];
|
|
1233
|
+
keyByLower.set(lower, key);
|
|
1234
|
+
merged[key] = value;
|
|
1026
1235
|
}
|
|
1027
1236
|
}
|
|
1028
1237
|
return merged;
|
|
@@ -1037,11 +1246,44 @@ const ANTHROPIC_MESSAGE_EVENTS: ReadonlySet<string> = new Set([
|
|
|
1037
1246
|
"content_block_stop",
|
|
1038
1247
|
]);
|
|
1039
1248
|
|
|
1249
|
+
/**
|
|
1250
|
+
* Iterate over Anthropic SSE events from a raw Response, preserving ping events
|
|
1251
|
+
* for liveness. Malformed event envelopes are logged and skipped (non-fatal)
|
|
1252
|
+
* rather than aborting the stream.
|
|
1253
|
+
*/
|
|
1254
|
+
type RawMessagePingEvent = { type: "ping" };
|
|
1255
|
+
type AnthropicStreamEvent = RawMessageStreamEvent | RawMessagePingEvent;
|
|
1256
|
+
const ANTHROPIC_PING_EVENT: RawMessagePingEvent = { type: "ping" };
|
|
1257
|
+
|
|
1258
|
+
/**
|
|
1259
|
+
* In-stream `error` SSE frames carry an Anthropic error envelope:
|
|
1260
|
+
* `{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}`.
|
|
1261
|
+
* Surface the structured type + message instead of the raw JSON blob; the
|
|
1262
|
+
* error type token (e.g. `overloaded_error`, `rate_limit_error`) is kept in
|
|
1263
|
+
* the message so `isProviderRetryableError`'s classification keys off the
|
|
1264
|
+
* structured type rather than incidental JSON substrings.
|
|
1265
|
+
*/
|
|
1266
|
+
function createAnthropicSseStreamError(data: string): Error {
|
|
1267
|
+
try {
|
|
1268
|
+
const parsed = JSON.parse(data) as { error?: { type?: unknown; message?: unknown } };
|
|
1269
|
+
const errorType = typeof parsed?.error?.type === "string" ? parsed.error.type : undefined;
|
|
1270
|
+
const message = typeof parsed?.error?.message === "string" ? parsed.error.message : undefined;
|
|
1271
|
+
if (message) {
|
|
1272
|
+
return new Error(
|
|
1273
|
+
errorType ? `Anthropic stream error (${errorType}): ${message}` : `Anthropic stream error: ${message}`,
|
|
1274
|
+
);
|
|
1275
|
+
}
|
|
1276
|
+
} catch {
|
|
1277
|
+
// Not a JSON envelope; fall through to the raw payload.
|
|
1278
|
+
}
|
|
1279
|
+
return new Error(data);
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1040
1282
|
async function* iterateAnthropicEvents(
|
|
1041
1283
|
response: Response,
|
|
1042
1284
|
signal?: AbortSignal,
|
|
1043
1285
|
onSseEvent?: AnthropicOptions["onSseEvent"],
|
|
1044
|
-
): AsyncGenerator<
|
|
1286
|
+
): AsyncGenerator<AnthropicStreamEvent> {
|
|
1045
1287
|
if (!response.body) {
|
|
1046
1288
|
throw new Error("Attempted to iterate over an Anthropic response with no body");
|
|
1047
1289
|
}
|
|
@@ -1052,7 +1294,13 @@ async function* iterateAnthropicEvents(
|
|
|
1052
1294
|
for await (const sse of readSseEvents(response.body, signal)) {
|
|
1053
1295
|
notifyRawSseEvent(onSseEvent, sse);
|
|
1054
1296
|
if (sse.event === "error") {
|
|
1055
|
-
throw
|
|
1297
|
+
throw createAnthropicSseStreamError(sse.data);
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
if (sse.event === "ping") {
|
|
1301
|
+
// Surface keepalives so the idle watchdog treats them as liveness.
|
|
1302
|
+
yield ANTHROPIC_PING_EVENT;
|
|
1303
|
+
continue;
|
|
1056
1304
|
}
|
|
1057
1305
|
|
|
1058
1306
|
if (!ANTHROPIC_MESSAGE_EVENTS.has(sse.event ?? "")) {
|
|
@@ -1060,7 +1308,10 @@ async function* iterateAnthropicEvents(
|
|
|
1060
1308
|
}
|
|
1061
1309
|
|
|
1062
1310
|
try {
|
|
1063
|
-
const event =
|
|
1311
|
+
const event = JSON.parse(sse.data) as RawMessageStreamEvent;
|
|
1312
|
+
if (event.type !== sse.event) {
|
|
1313
|
+
reportAnthropicEnvelopeAnomaly(`event type ${event.type} does not match SSE event ${sse.event}`);
|
|
1314
|
+
}
|
|
1064
1315
|
if (event.type === "message_start") {
|
|
1065
1316
|
sawMessageStart = true;
|
|
1066
1317
|
} else if (event.type === "message_stop") {
|
|
@@ -1069,14 +1320,14 @@ async function* iterateAnthropicEvents(
|
|
|
1069
1320
|
yield event;
|
|
1070
1321
|
} catch (error) {
|
|
1071
1322
|
const message = error instanceof Error ? error.message : String(error);
|
|
1072
|
-
|
|
1073
|
-
`
|
|
1323
|
+
reportAnthropicEnvelopeAnomaly(
|
|
1324
|
+
`could not parse SSE event ${sse.event}: ${message}; skipping frame; data=${sse.data}`,
|
|
1074
1325
|
);
|
|
1075
1326
|
}
|
|
1076
1327
|
}
|
|
1077
1328
|
|
|
1078
|
-
if (sawMessageStart && !sawMessageEnd) {
|
|
1079
|
-
|
|
1329
|
+
if (sawMessageStart && !sawMessageEnd && !signal?.aborted) {
|
|
1330
|
+
reportAnthropicEnvelopeAnomaly("stream ended before message_stop");
|
|
1080
1331
|
}
|
|
1081
1332
|
}
|
|
1082
1333
|
|
|
@@ -1104,53 +1355,48 @@ async function getAnthropicStreamResponse(
|
|
|
1104
1355
|
request: unknown,
|
|
1105
1356
|
signal?: AbortSignal,
|
|
1106
1357
|
onSseEvent?: AnthropicOptions["onSseEvent"],
|
|
1107
|
-
): Promise<{
|
|
1358
|
+
): Promise<{
|
|
1359
|
+
events: AsyncIterable<AnthropicStreamEvent>;
|
|
1360
|
+
response: Response;
|
|
1361
|
+
requestId: string | null;
|
|
1362
|
+
recordsRawSseEvents: boolean;
|
|
1363
|
+
}> {
|
|
1108
1364
|
if (hasAnthropicRawResponseRequest(request)) {
|
|
1109
1365
|
const response = await request.asResponse();
|
|
1110
1366
|
return {
|
|
1111
1367
|
events: iterateAnthropicEvents(response, signal, onSseEvent),
|
|
1112
1368
|
response,
|
|
1113
1369
|
requestId: response.headers.get("request-id"),
|
|
1370
|
+
recordsRawSseEvents: true,
|
|
1114
1371
|
};
|
|
1115
1372
|
}
|
|
1116
1373
|
if (hasAnthropicStreamWithResponseRequest(request)) {
|
|
1117
1374
|
const { data, response, request_id } = await request.withResponse();
|
|
1118
|
-
return { events: data, response, requestId: request_id };
|
|
1375
|
+
return { events: data, response, requestId: request_id, recordsRawSseEvents: false };
|
|
1119
1376
|
}
|
|
1120
1377
|
throw new Error("Anthropic SDK request did not expose a stream response");
|
|
1121
1378
|
}
|
|
1122
1379
|
|
|
1123
|
-
function
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
// First-party Claude API only. Bedrock/Vertex/Foundry and other
|
|
1134
|
-
// Anthropic-compatible proxies reject the role; gate auto-detection on
|
|
1135
|
-
// the canonical api.anthropic.com host plus an Opus 4.8+ model id.
|
|
1136
|
-
(isAnthropicApiBaseUrl(model.baseUrl) && supportsMidConversationSystemMessages(model.id)),
|
|
1137
|
-
};
|
|
1380
|
+
async function* observeDecodedAnthropicSdkEvents(
|
|
1381
|
+
events: AsyncIterable<AnthropicStreamEvent>,
|
|
1382
|
+
observer: (event: RawSseEvent) => void,
|
|
1383
|
+
): AsyncGenerator<AnthropicStreamEvent> {
|
|
1384
|
+
for await (const event of events) {
|
|
1385
|
+
const data = JSON.stringify(event);
|
|
1386
|
+
// Reconstructed from decoded SDK event; not literal wire bytes.
|
|
1387
|
+
notifyRawSseEvent(observer, { event: event.type, data, raw: [`event: ${event.type}`, `data: ${data}`] });
|
|
1388
|
+
yield event;
|
|
1389
|
+
}
|
|
1138
1390
|
}
|
|
1139
1391
|
|
|
1140
|
-
const PROVIDER_MAX_RETRIES =
|
|
1141
|
-
const PROVIDER_BASE_DELAY_MS = 2000;
|
|
1392
|
+
const PROVIDER_MAX_RETRIES = 10;
|
|
1142
1393
|
|
|
1143
|
-
/**
|
|
1144
|
-
* Check if an error from the Anthropic SDK is a rate-limit/transient error that
|
|
1145
|
-
* should be retried before any content has been emitted.
|
|
1146
|
-
*
|
|
1147
|
-
* Includes malformed JSON stream-envelope parse errors seen from some
|
|
1148
|
-
* Anthropic-compatible proxy endpoints.
|
|
1149
|
-
*/
|
|
1150
1394
|
/** Transient stream corruption errors where the response was truncated mid-JSON. */
|
|
1151
1395
|
function isTransientStreamParseError(error: unknown): boolean {
|
|
1152
1396
|
if (!(error instanceof Error)) return false;
|
|
1153
|
-
return /
|
|
1397
|
+
return /unterminated string|unexpected end of json input|unexpected end of data|unexpected eof|end of file|eof while parsing|truncated/i.test(
|
|
1398
|
+
error.message,
|
|
1399
|
+
);
|
|
1154
1400
|
}
|
|
1155
1401
|
|
|
1156
1402
|
const ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX = "Anthropic stream envelope error:";
|
|
@@ -1159,26 +1405,28 @@ function createAnthropicStreamEnvelopeError(message: string): Error {
|
|
|
1159
1405
|
return new Error(`${ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX} ${message}`);
|
|
1160
1406
|
}
|
|
1161
1407
|
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1408
|
+
/**
|
|
1409
|
+
* Log a malformed-stream-envelope anomaly without aborting the turn. The strict
|
|
1410
|
+
* parser would `throw createAnthropicStreamEnvelopeError(...)` here; we instead
|
|
1411
|
+
* surface a warning and let the caller skip the offending event (or finalize what
|
|
1412
|
+
* already streamed) so a non-conforming endpoint degrades to best-effort content
|
|
1413
|
+
* rather than failing the request.
|
|
1414
|
+
*/
|
|
1415
|
+
function reportAnthropicEnvelopeAnomaly(detail: string): void {
|
|
1416
|
+
logger.warn(`anthropic: ignoring malformed stream envelope: ${detail}`);
|
|
1417
|
+
}
|
|
1170
1418
|
|
|
1171
1419
|
function shouldIgnoreAnthropicPreambleEvent(eventType: unknown): boolean {
|
|
1172
1420
|
if (typeof eventType !== "string") return false;
|
|
1173
1421
|
if (eventType === "ping") return true;
|
|
1174
|
-
return !
|
|
1422
|
+
return !ANTHROPIC_MESSAGE_EVENTS.has(eventType);
|
|
1175
1423
|
}
|
|
1176
1424
|
|
|
1177
1425
|
function isTransientStreamEnvelopeError(error: unknown): boolean {
|
|
1178
1426
|
if (!(error instanceof Error)) return false;
|
|
1179
1427
|
return (
|
|
1180
1428
|
error.message.includes(ANTHROPIC_STREAM_ENVELOPE_ERROR_PREFIX) ||
|
|
1181
|
-
/stream event order|before message_start
|
|
1429
|
+
/stream event order|before message_start/i.test(error.message)
|
|
1182
1430
|
);
|
|
1183
1431
|
}
|
|
1184
1432
|
|
|
@@ -1190,6 +1438,16 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
|
|
|
1190
1438
|
export function isProviderRetryableError(error: unknown, provider?: string): boolean {
|
|
1191
1439
|
if (!(error instanceof Error)) return false;
|
|
1192
1440
|
if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
|
|
1441
|
+
// Account-level usage/quota limits ("usage_limit_reached", "exceed your
|
|
1442
|
+
// account's rate limit", "quota exceeded") are persistent — the server
|
|
1443
|
+
// parks the credential for minutes-to-hours (see the long `retry-after`).
|
|
1444
|
+
// Retrying the same key with the provider's seconds-scale backoff never
|
|
1445
|
+
// helps; these are owned by the credential-rotation layer (auth-gateway /
|
|
1446
|
+
// `streamSimple` a/b/c policy), so surface them immediately instead of
|
|
1447
|
+
// burning the retry budget here.
|
|
1448
|
+
if (isUsageLimitError(error.message)) return false;
|
|
1449
|
+
const status = extractHttpStatusFromError(error);
|
|
1450
|
+
if (status !== undefined && status >= 400 && status < 500 && status !== 408 && status !== 429) return false;
|
|
1193
1451
|
const msg = error.message.toLowerCase();
|
|
1194
1452
|
if (
|
|
1195
1453
|
isUnexpectedSocketCloseMessage(msg) ||
|
|
@@ -1223,13 +1481,12 @@ export type AnthropicUsageLike = {
|
|
|
1223
1481
|
|
|
1224
1482
|
/**
|
|
1225
1483
|
* Capture Anthropic's optional cache-creation TTL breakdown and server-tool-use
|
|
1226
|
-
* counters into the harness Usage shape.
|
|
1227
|
-
*
|
|
1228
|
-
* established at `message_start`.
|
|
1484
|
+
* counters into the harness Usage shape. Omitted/null fields are no-ops; explicit
|
|
1485
|
+
* zero-valued objects clear prior extras from earlier stream usage snapshots.
|
|
1229
1486
|
*/
|
|
1230
1487
|
export function applyAnthropicUsageExtras(usage: Usage, source: AnthropicUsageLike): void {
|
|
1231
1488
|
const cacheCreation = source.cache_creation;
|
|
1232
|
-
if (cacheCreation) {
|
|
1489
|
+
if (cacheCreation != null) {
|
|
1233
1490
|
const fiveMinute = cacheCreation.ephemeral_5m_input_tokens ?? 0;
|
|
1234
1491
|
const oneHour = cacheCreation.ephemeral_1h_input_tokens ?? 0;
|
|
1235
1492
|
if (fiveMinute > 0 || oneHour > 0) {
|
|
@@ -1237,10 +1494,12 @@ export function applyAnthropicUsageExtras(usage: Usage, source: AnthropicUsageLi
|
|
|
1237
1494
|
...(fiveMinute > 0 ? { ephemeral5m: fiveMinute } : {}),
|
|
1238
1495
|
...(oneHour > 0 ? { ephemeral1h: oneHour } : {}),
|
|
1239
1496
|
};
|
|
1497
|
+
} else {
|
|
1498
|
+
delete usage.cttl;
|
|
1240
1499
|
}
|
|
1241
1500
|
}
|
|
1242
1501
|
const serverToolUse = source.server_tool_use;
|
|
1243
|
-
if (serverToolUse) {
|
|
1502
|
+
if (serverToolUse != null) {
|
|
1244
1503
|
const webSearch = serverToolUse.web_search_requests ?? 0;
|
|
1245
1504
|
const webFetch = serverToolUse.web_fetch_requests ?? 0;
|
|
1246
1505
|
if (webSearch > 0 || webFetch > 0) {
|
|
@@ -1248,6 +1507,8 @@ export function applyAnthropicUsageExtras(usage: Usage, source: AnthropicUsageLi
|
|
|
1248
1507
|
...(webSearch > 0 ? { webSearch } : {}),
|
|
1249
1508
|
...(webFetch > 0 ? { webFetch } : {}),
|
|
1250
1509
|
};
|
|
1510
|
+
} else {
|
|
1511
|
+
delete usage.server;
|
|
1251
1512
|
}
|
|
1252
1513
|
}
|
|
1253
1514
|
}
|
|
@@ -1263,30 +1524,50 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1263
1524
|
const startTime = Date.now();
|
|
1264
1525
|
let firstTokenTime: number | undefined;
|
|
1265
1526
|
|
|
1266
|
-
const copilotDynamicHeaders =
|
|
1267
|
-
model.provider === "github-copilot"
|
|
1268
|
-
? buildCopilotDynamicHeaders({
|
|
1269
|
-
messages: context.messages,
|
|
1270
|
-
hasImages: hasCopilotVisionInput(context.messages),
|
|
1271
|
-
premiumMultiplier: model.premiumMultiplier,
|
|
1272
|
-
headers: { ...(model.headers ?? {}), ...(options?.headers ?? {}) },
|
|
1273
|
-
initiatorOverride: options?.initiatorOverride,
|
|
1274
|
-
})
|
|
1275
|
-
: undefined;
|
|
1276
1527
|
const output: AssistantMessage = {
|
|
1277
1528
|
role: "assistant",
|
|
1278
1529
|
content: [],
|
|
1279
1530
|
api: model.api as Api,
|
|
1280
1531
|
provider: model.provider,
|
|
1281
1532
|
model: model.id,
|
|
1282
|
-
usage: createEmptyUsage(
|
|
1533
|
+
usage: createEmptyUsage(),
|
|
1283
1534
|
stopReason: "stop",
|
|
1284
1535
|
timestamp: Date.now(),
|
|
1285
1536
|
};
|
|
1286
1537
|
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
1287
1538
|
let activeAbortTracker = createAbortSourceTracker(options?.signal);
|
|
1288
1539
|
|
|
1540
|
+
const onSseEvent = options?.onSseEvent;
|
|
1541
|
+
const rawSseObserver = onSseEvent ? (event: RawSseEvent) => onSseEvent(event, model) : undefined;
|
|
1542
|
+
|
|
1289
1543
|
try {
|
|
1544
|
+
// Built inside the try so a copilot credential/header failure surfaces as
|
|
1545
|
+
// an error event instead of an unhandled rejection that leaves the stream
|
|
1546
|
+
// (and any consumer awaiting `result()`) hanging forever.
|
|
1547
|
+
const copilotDynamicHeaders =
|
|
1548
|
+
model.provider === "github-copilot"
|
|
1549
|
+
? buildCopilotDynamicHeaders({
|
|
1550
|
+
messages: context.messages,
|
|
1551
|
+
hasImages: hasCopilotVisionInput(context.messages),
|
|
1552
|
+
premiumMultiplier: model.premiumMultiplier,
|
|
1553
|
+
headers: { ...(model.headers ?? {}), ...(options?.headers ?? {}) },
|
|
1554
|
+
initiatorOverride: options?.initiatorOverride,
|
|
1555
|
+
})
|
|
1556
|
+
: undefined;
|
|
1557
|
+
if (copilotDynamicHeaders?.premiumRequests !== undefined) {
|
|
1558
|
+
output.usage.premiumRequests = copilotDynamicHeaders.premiumRequests;
|
|
1559
|
+
}
|
|
1560
|
+
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
|
|
1561
|
+
const baseUrl = resolveAnthropicBaseUrl(model, apiKey) ?? "https://api.anthropic.com";
|
|
1562
|
+
const providerSessionState = getAnthropicProviderSessionState(
|
|
1563
|
+
options?.providerSessionState,
|
|
1564
|
+
baseUrl,
|
|
1565
|
+
model.id,
|
|
1566
|
+
);
|
|
1567
|
+
let disableStrictTools =
|
|
1568
|
+
(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
|
|
1569
|
+
let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
|
|
1570
|
+
|
|
1290
1571
|
let client: AnthropicMessagesClientLike;
|
|
1291
1572
|
let isOAuthToken: boolean;
|
|
1292
1573
|
|
|
@@ -1294,19 +1575,38 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1294
1575
|
client = options.client;
|
|
1295
1576
|
isOAuthToken = false;
|
|
1296
1577
|
} else {
|
|
1297
|
-
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
|
|
1298
|
-
|
|
1299
1578
|
const extraBetas = normalizeExtraBetas(options?.betas);
|
|
1300
1579
|
const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
|
|
1301
|
-
|
|
1580
|
+
// Skip the fast-mode beta when this session already learned the
|
|
1581
|
+
// endpoint+model rejects fast mode; `speed` is dropped from the params
|
|
1582
|
+
// too (dropFastMode), so the request stays a faithful non-fast request.
|
|
1583
|
+
if (wantsAnthropicPriority && !dropFastMode && !extraBetas.includes(fastModeBeta)) {
|
|
1302
1584
|
extraBetas.push(fastModeBeta);
|
|
1303
1585
|
}
|
|
1304
1586
|
if (options?.taskBudget && !extraBetas.includes(taskBudgetBeta)) {
|
|
1305
1587
|
extraBetas.push(taskBudgetBeta);
|
|
1306
1588
|
}
|
|
1307
|
-
|
|
1589
|
+
// `output_config.effort` ships on thinking-on requests AND on the
|
|
1590
|
+
// thinking-off adaptive pin (adaptive-only models get effort:"low" so
|
|
1591
|
+
// the toggle cannot 400); the beta must accompany the field in both.
|
|
1592
|
+
const sendsAdaptiveEffortPin =
|
|
1593
|
+
options?.thinkingEnabled === false &&
|
|
1594
|
+
model.thinking?.mode === "anthropic-adaptive" &&
|
|
1595
|
+
!model.compat.disableAdaptiveThinking;
|
|
1596
|
+
if (
|
|
1597
|
+
model.reasoning &&
|
|
1598
|
+
(options?.thinkingEnabled || sendsAdaptiveEffortPin) &&
|
|
1599
|
+
!extraBetas.includes(effortBeta)
|
|
1600
|
+
) {
|
|
1308
1601
|
extraBetas.push(effortBeta);
|
|
1309
1602
|
}
|
|
1603
|
+
if (model.compat.supportsMidConversationSystem && !extraBetas.includes(midConversationSystemBeta)) {
|
|
1604
|
+
// convertAnthropicMessages may upgrade developer turns to the
|
|
1605
|
+
// mid-conversation `system` role on these models; API-key requests
|
|
1606
|
+
// need the beta alongside the role (OAuth agent requests already
|
|
1607
|
+
// carry it in the Claude Code list).
|
|
1608
|
+
extraBetas.push(midConversationSystemBeta);
|
|
1609
|
+
}
|
|
1310
1610
|
|
|
1311
1611
|
const created = createClient(model, {
|
|
1312
1612
|
model,
|
|
@@ -1320,24 +1620,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1320
1620
|
hasTools: !!context.tools?.length,
|
|
1321
1621
|
thinkingEnabled: options?.thinkingEnabled,
|
|
1322
1622
|
thinkingDisplay: options?.thinkingDisplay,
|
|
1323
|
-
onSseEvent: options?.onSseEvent,
|
|
1324
1623
|
fetch: options?.fetch,
|
|
1325
1624
|
claudeCodeSessionId: options?.sessionId ?? extractClaudeMetadataSessionId(options?.metadata?.user_id),
|
|
1326
1625
|
});
|
|
1327
1626
|
client = created.client;
|
|
1328
1627
|
isOAuthToken = created.isOAuthToken;
|
|
1329
1628
|
}
|
|
1330
|
-
const baseUrl =
|
|
1331
|
-
resolveAnthropicBaseUrl(model, options?.apiKey ?? getEnvApiKey(model.provider) ?? "") ??
|
|
1332
|
-
"https://api.anthropic.com";
|
|
1333
|
-
const providerSessionState = getAnthropicProviderSessionState(options?.providerSessionState);
|
|
1334
|
-
let disableStrictTools =
|
|
1335
|
-
(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
|
|
1336
|
-
let strictFallbackErrorMessage: string | undefined;
|
|
1337
|
-
let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
|
|
1338
1629
|
const preparedContext = await prepareAnthropicManyImageContext(context, model.input.includes("image"));
|
|
1339
1630
|
const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
|
|
1340
|
-
let nextParams = buildParams(model,
|
|
1631
|
+
let nextParams = buildParams(model, preparedContext, isOAuthToken, options, disableStrictTools);
|
|
1341
1632
|
if (disableStrictTools) {
|
|
1342
1633
|
dropAnthropicStrictTools(nextParams);
|
|
1343
1634
|
}
|
|
@@ -1348,6 +1639,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1348
1639
|
if (replacementPayload !== undefined) {
|
|
1349
1640
|
nextParams = replacementPayload as typeof nextParams;
|
|
1350
1641
|
}
|
|
1642
|
+
nextParams = toWellFormedDeep(nextParams) as typeof nextParams;
|
|
1351
1643
|
rawRequestDump = {
|
|
1352
1644
|
provider: model.provider,
|
|
1353
1645
|
api: output.api,
|
|
@@ -1371,6 +1663,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1371
1663
|
const requestTimeoutMs =
|
|
1372
1664
|
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
|
|
1373
1665
|
const blocks = output.content as Block[];
|
|
1666
|
+
const finalizeStreamBlock = (block: Block, contentIndex: number): void => {
|
|
1667
|
+
delete (block as { index?: number }).index;
|
|
1668
|
+
if (block.type === "text") {
|
|
1669
|
+
stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
|
|
1670
|
+
} else if (block.type === "thinking") {
|
|
1671
|
+
stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
|
|
1672
|
+
} else if (block.type === "toolCall") {
|
|
1673
|
+
const finalJson =
|
|
1674
|
+
block.partialJson.length > 0 ? block.partialJson : JSON.stringify(block.arguments ?? {});
|
|
1675
|
+
try {
|
|
1676
|
+
block.arguments = JSON.parse(finalJson) as ToolCall["arguments"];
|
|
1677
|
+
} catch (parseError) {
|
|
1678
|
+
// Non-fatal: keep the best-effort arguments recovered by the throttled streaming
|
|
1679
|
+
// parser instead of failing the turn on malformed/truncated tool-argument JSON.
|
|
1680
|
+
reportAnthropicEnvelopeAnomaly(
|
|
1681
|
+
`tool_use ${block.id} arguments are not valid JSON: ${parseError instanceof Error ? parseError.message : String(parseError)}`,
|
|
1682
|
+
);
|
|
1683
|
+
block.arguments = (block.arguments ?? {}) as ToolCall["arguments"];
|
|
1684
|
+
}
|
|
1685
|
+
delete (block as { partialJson?: string }).partialJson;
|
|
1686
|
+
delete (block as { lastParseLen?: number }).lastParseLen;
|
|
1687
|
+
stream.push({ type: "toolcall_end", contentIndex, toolCall: block, partial: output });
|
|
1688
|
+
}
|
|
1689
|
+
};
|
|
1374
1690
|
stream.push({ type: "start", partial: output });
|
|
1375
1691
|
// Retry loop for transient errors from the stream.
|
|
1376
1692
|
// Provider-level transport/rate-limit failures: only before any streamed content starts.
|
|
@@ -1381,7 +1697,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1381
1697
|
while (true) {
|
|
1382
1698
|
activeAbortTracker = createAbortSourceTracker(options?.signal);
|
|
1383
1699
|
const { requestSignal } = activeAbortTracker;
|
|
1384
|
-
|
|
1700
|
+
// The provider loop owns retries: pin the client's internal retry loop
|
|
1701
|
+
// to zero even when no watchdog timeout is configured (the helper only
|
|
1702
|
+
// pins it alongside a timeout; a client retry budget of 5 would otherwise
|
|
1703
|
+
// multiply with PROVIDER_MAX_RETRIES into up to 66 wire attempts).
|
|
1704
|
+
const requestOptions = { ...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs), maxRetries: 0 };
|
|
1385
1705
|
const anthropicRequest: unknown =
|
|
1386
1706
|
isOAuthToken && client.beta
|
|
1387
1707
|
? client.beta.messages.create({ ...params, stream: true }, requestOptions)
|
|
@@ -1396,19 +1716,17 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1396
1716
|
requestTimeoutMs,
|
|
1397
1717
|
);
|
|
1398
1718
|
}
|
|
1399
|
-
let anthropicStream: AsyncIterable<
|
|
1719
|
+
let anthropicStream: AsyncIterable<AnthropicStreamEvent>;
|
|
1400
1720
|
let response: Response;
|
|
1401
1721
|
let requestId: string | null;
|
|
1722
|
+
let recordsRawSseEvents: boolean;
|
|
1402
1723
|
try {
|
|
1403
1724
|
({
|
|
1404
1725
|
events: anthropicStream,
|
|
1405
1726
|
response,
|
|
1406
1727
|
requestId,
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
requestSignal,
|
|
1410
|
-
options?.client ? event => options?.onSseEvent?.(event, model) : undefined,
|
|
1411
|
-
));
|
|
1728
|
+
recordsRawSseEvents,
|
|
1729
|
+
} = await getAnthropicStreamResponse(anthropicRequest, requestSignal, rawSseObserver));
|
|
1412
1730
|
} catch (error) {
|
|
1413
1731
|
if (error instanceof AnthropicConnectionTimeoutError && !activeAbortTracker.wasCallerAbort()) {
|
|
1414
1732
|
throw firstEventTimeoutAbortError;
|
|
@@ -1421,8 +1739,23 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1421
1739
|
let sawEvent = false;
|
|
1422
1740
|
let sawMessageStart = false;
|
|
1423
1741
|
let sawTerminalEnvelope = false;
|
|
1424
|
-
|
|
1425
|
-
|
|
1742
|
+
let sawMessageStop = false;
|
|
1743
|
+
// Set when a duplicate message_start splices a second envelope onto
|
|
1744
|
+
// the stream; closed indexes then refuse to reopen so replayed
|
|
1745
|
+
// content cannot duplicate (see content_block_start guard).
|
|
1746
|
+
let sawSplicedEnvelope = false;
|
|
1747
|
+
const closedBlockIndexes = new Set<number>();
|
|
1748
|
+
const openBlocks = new Map<
|
|
1749
|
+
number,
|
|
1750
|
+
{ contentIndex: number; kind: "text" | "thinking" | "redactedThinking" | "toolCall" | "ignored" }
|
|
1751
|
+
>();
|
|
1752
|
+
|
|
1753
|
+
// Pings keep the idle deadline alive once content is flowing, but a
|
|
1754
|
+
// ping before message_start must not consume the first-event watchdog:
|
|
1755
|
+
// it would flip the (retryable) pre-content stall classification into
|
|
1756
|
+
// a terminal mid-stream idle timeout.
|
|
1757
|
+
let sawNonPingEvent = false;
|
|
1758
|
+
const timedAnthropicStream = iterateWithIdleTimeout(anthropicStream, {
|
|
1426
1759
|
idleTimeoutMs,
|
|
1427
1760
|
firstItemTimeoutMs: firstEventTimeoutMs,
|
|
1428
1761
|
errorMessage: idleTimeoutAbortError.message,
|
|
@@ -1430,23 +1763,45 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1430
1763
|
onIdle: () => activeAbortTracker.abortLocally(idleTimeoutAbortError),
|
|
1431
1764
|
onFirstItemTimeout: () => activeAbortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
1432
1765
|
abortSignal: options?.signal,
|
|
1433
|
-
|
|
1766
|
+
isProgressItem: item => {
|
|
1767
|
+
if ((item as AnthropicStreamEvent).type === "ping") return sawNonPingEvent;
|
|
1768
|
+
sawNonPingEvent = true;
|
|
1769
|
+
return true;
|
|
1770
|
+
},
|
|
1771
|
+
});
|
|
1772
|
+
const observedAnthropicStream =
|
|
1773
|
+
rawSseObserver && !recordsRawSseEvents
|
|
1774
|
+
? observeDecodedAnthropicSdkEvents(timedAnthropicStream, rawSseObserver)
|
|
1775
|
+
: timedAnthropicStream;
|
|
1776
|
+
for await (const event of observedAnthropicStream) {
|
|
1434
1777
|
sawEvent = true;
|
|
1435
1778
|
|
|
1436
1779
|
if (event.type === "message_start") {
|
|
1437
1780
|
if (sawMessageStart) {
|
|
1781
|
+
// Transparent reconnects can splice a fresh envelope onto the same
|
|
1782
|
+
// stream; keep the original message but surface the anomaly. Events
|
|
1783
|
+
// for blocks still open from the first envelope continue to apply,
|
|
1784
|
+
// but replayed blocks are dropped below (see closedBlockIndexes).
|
|
1785
|
+
reportAnthropicEnvelopeAnomaly("duplicate message_start event");
|
|
1786
|
+
sawSplicedEnvelope = true;
|
|
1438
1787
|
continue;
|
|
1439
1788
|
}
|
|
1440
1789
|
sawMessageStart = true;
|
|
1441
|
-
|
|
1442
|
-
output.responseId =
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
output.usage.
|
|
1449
|
-
|
|
1790
|
+
const startMessage = event.message;
|
|
1791
|
+
if (startMessage?.id) output.responseId = startMessage.id;
|
|
1792
|
+
const startUsage = startMessage?.usage;
|
|
1793
|
+
if (startUsage) {
|
|
1794
|
+
applyAnthropicUsageExtras(output.usage, startUsage);
|
|
1795
|
+
output.usage.input = startUsage.input_tokens || 0;
|
|
1796
|
+
output.usage.output = startUsage.output_tokens || 0;
|
|
1797
|
+
output.usage.cacheRead = startUsage.cache_read_input_tokens || 0;
|
|
1798
|
+
output.usage.cacheWrite = startUsage.cache_creation_input_tokens || 0;
|
|
1799
|
+
output.usage.totalTokens =
|
|
1800
|
+
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
1801
|
+
calculateCost(model, output.usage);
|
|
1802
|
+
} else {
|
|
1803
|
+
reportAnthropicEnvelopeAnomaly("message_start missing usage");
|
|
1804
|
+
}
|
|
1450
1805
|
continue;
|
|
1451
1806
|
}
|
|
1452
1807
|
|
|
@@ -1458,6 +1813,28 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1458
1813
|
}
|
|
1459
1814
|
|
|
1460
1815
|
if (event.type === "content_block_start") {
|
|
1816
|
+
if (sawTerminalEnvelope) {
|
|
1817
|
+
reportAnthropicEnvelopeAnomaly(`received ${event.type} after terminal stop signal`);
|
|
1818
|
+
continue;
|
|
1819
|
+
}
|
|
1820
|
+
if (openBlocks.has(event.index)) {
|
|
1821
|
+
reportAnthropicEnvelopeAnomaly(`duplicate content_block_start index ${event.index}`);
|
|
1822
|
+
continue;
|
|
1823
|
+
}
|
|
1824
|
+
if (sawSplicedEnvelope && closedBlockIndexes.has(event.index)) {
|
|
1825
|
+
// A spliced envelope replaying an index this stream already
|
|
1826
|
+
// completed would append duplicate text/tool calls; consume its
|
|
1827
|
+
// events silently instead.
|
|
1828
|
+
reportAnthropicEnvelopeAnomaly(
|
|
1829
|
+
`replayed content_block_start index ${event.index} after duplicate message_start`,
|
|
1830
|
+
);
|
|
1831
|
+
openBlocks.set(event.index, { contentIndex: -1, kind: "ignored" });
|
|
1832
|
+
continue;
|
|
1833
|
+
}
|
|
1834
|
+
if (!event.content_block?.type) {
|
|
1835
|
+
reportAnthropicEnvelopeAnomaly("content_block_start missing content_block payload");
|
|
1836
|
+
continue;
|
|
1837
|
+
}
|
|
1461
1838
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
1462
1839
|
if (event.content_block.type === "text") {
|
|
1463
1840
|
streamedReplayUnsafeContent = true;
|
|
@@ -1467,12 +1844,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1467
1844
|
index: event.index,
|
|
1468
1845
|
};
|
|
1469
1846
|
output.content.push(block);
|
|
1847
|
+
const contentIndex = output.content.length - 1;
|
|
1848
|
+
openBlocks.set(event.index, { contentIndex, kind: "text" });
|
|
1470
1849
|
stream.push({
|
|
1471
1850
|
type: "text_start",
|
|
1472
|
-
contentIndex
|
|
1851
|
+
contentIndex,
|
|
1473
1852
|
partial: output,
|
|
1474
1853
|
});
|
|
1475
1854
|
} else if (event.content_block.type === "thinking") {
|
|
1855
|
+
streamedReplayUnsafeContent = true;
|
|
1476
1856
|
const block: Block = {
|
|
1477
1857
|
type: "thinking",
|
|
1478
1858
|
thinking: "",
|
|
@@ -1480,18 +1860,25 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1480
1860
|
index: event.index,
|
|
1481
1861
|
};
|
|
1482
1862
|
output.content.push(block);
|
|
1863
|
+
const contentIndex = output.content.length - 1;
|
|
1864
|
+
openBlocks.set(event.index, { contentIndex, kind: "thinking" });
|
|
1483
1865
|
stream.push({
|
|
1484
1866
|
type: "thinking_start",
|
|
1485
|
-
contentIndex
|
|
1867
|
+
contentIndex,
|
|
1486
1868
|
partial: output,
|
|
1487
1869
|
});
|
|
1488
1870
|
} else if (event.content_block.type === "redacted_thinking") {
|
|
1871
|
+
streamedReplayUnsafeContent = true;
|
|
1489
1872
|
const block: Block = {
|
|
1490
1873
|
type: "redactedThinking",
|
|
1491
1874
|
data: event.content_block.data,
|
|
1492
1875
|
index: event.index,
|
|
1493
1876
|
};
|
|
1494
1877
|
output.content.push(block);
|
|
1878
|
+
openBlocks.set(event.index, {
|
|
1879
|
+
contentIndex: output.content.length - 1,
|
|
1880
|
+
kind: "redactedThinking",
|
|
1881
|
+
});
|
|
1495
1882
|
} else if (event.content_block.type === "tool_use") {
|
|
1496
1883
|
streamedReplayUnsafeContent = true;
|
|
1497
1884
|
const block: Block = {
|
|
@@ -1505,134 +1892,165 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1505
1892
|
index: event.index,
|
|
1506
1893
|
};
|
|
1507
1894
|
output.content.push(block);
|
|
1895
|
+
const contentIndex = output.content.length - 1;
|
|
1896
|
+
openBlocks.set(event.index, { contentIndex, kind: "toolCall" });
|
|
1508
1897
|
stream.push({
|
|
1509
1898
|
type: "toolcall_start",
|
|
1510
|
-
contentIndex
|
|
1899
|
+
contentIndex,
|
|
1511
1900
|
partial: output,
|
|
1512
1901
|
});
|
|
1902
|
+
} else {
|
|
1903
|
+
openBlocks.set(event.index, { contentIndex: -1, kind: "ignored" });
|
|
1513
1904
|
}
|
|
1514
1905
|
} else if (event.type === "content_block_delta") {
|
|
1906
|
+
if (sawTerminalEnvelope) {
|
|
1907
|
+
reportAnthropicEnvelopeAnomaly(`received ${event.type} after terminal stop signal`);
|
|
1908
|
+
continue;
|
|
1909
|
+
}
|
|
1910
|
+
const openBlock = openBlocks.get(event.index);
|
|
1911
|
+
if (!openBlock) {
|
|
1912
|
+
reportAnthropicEnvelopeAnomaly(
|
|
1913
|
+
`received content_block_delta for unopened index ${event.index}`,
|
|
1914
|
+
);
|
|
1915
|
+
continue;
|
|
1916
|
+
}
|
|
1917
|
+
if (openBlock.kind === "ignored") continue;
|
|
1918
|
+
if (!event.delta?.type) {
|
|
1919
|
+
reportAnthropicEnvelopeAnomaly("content_block_delta missing delta payload");
|
|
1920
|
+
continue;
|
|
1921
|
+
}
|
|
1922
|
+
const block = blocks[openBlock.contentIndex];
|
|
1515
1923
|
if (event.delta.type === "text_delta") {
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
block.text += event.delta.text;
|
|
1520
|
-
stream.push({
|
|
1521
|
-
type: "text_delta",
|
|
1522
|
-
contentIndex: index,
|
|
1523
|
-
delta: event.delta.text,
|
|
1524
|
-
partial: output,
|
|
1525
|
-
});
|
|
1924
|
+
if (openBlock.kind !== "text" || block?.type !== "text") {
|
|
1925
|
+
reportAnthropicEnvelopeAnomaly(`received text_delta for ${openBlock.kind} block`);
|
|
1926
|
+
continue;
|
|
1526
1927
|
}
|
|
1928
|
+
streamedReplayUnsafeContent = true;
|
|
1929
|
+
block.text += event.delta.text;
|
|
1930
|
+
stream.push({
|
|
1931
|
+
type: "text_delta",
|
|
1932
|
+
contentIndex: openBlock.contentIndex,
|
|
1933
|
+
delta: event.delta.text,
|
|
1934
|
+
partial: output,
|
|
1935
|
+
});
|
|
1527
1936
|
} else if (event.delta.type === "thinking_delta") {
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
block.thinking += event.delta.thinking;
|
|
1532
|
-
stream.push({
|
|
1533
|
-
type: "thinking_delta",
|
|
1534
|
-
contentIndex: index,
|
|
1535
|
-
delta: event.delta.thinking,
|
|
1536
|
-
partial: output,
|
|
1537
|
-
});
|
|
1937
|
+
if (openBlock.kind !== "thinking" || block?.type !== "thinking") {
|
|
1938
|
+
reportAnthropicEnvelopeAnomaly(`received thinking_delta for ${openBlock.kind} block`);
|
|
1939
|
+
continue;
|
|
1538
1940
|
}
|
|
1941
|
+
streamedReplayUnsafeContent = true;
|
|
1942
|
+
block.thinking += event.delta.thinking;
|
|
1943
|
+
stream.push({
|
|
1944
|
+
type: "thinking_delta",
|
|
1945
|
+
contentIndex: openBlock.contentIndex,
|
|
1946
|
+
delta: event.delta.thinking,
|
|
1947
|
+
partial: output,
|
|
1948
|
+
});
|
|
1539
1949
|
} else if (event.delta.type === "input_json_delta") {
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
type: "toolcall_delta",
|
|
1551
|
-
contentIndex: index,
|
|
1552
|
-
delta: event.delta.partial_json,
|
|
1553
|
-
partial: output,
|
|
1554
|
-
});
|
|
1950
|
+
if (openBlock.kind !== "toolCall" || block?.type !== "toolCall") {
|
|
1951
|
+
reportAnthropicEnvelopeAnomaly(`received input_json_delta for ${openBlock.kind} block`);
|
|
1952
|
+
continue;
|
|
1953
|
+
}
|
|
1954
|
+
streamedReplayUnsafeContent = true;
|
|
1955
|
+
block.partialJson += event.delta.partial_json;
|
|
1956
|
+
const throttled = parseStreamingJsonThrottled(block.partialJson, block.lastParseLen ?? 0);
|
|
1957
|
+
if (throttled) {
|
|
1958
|
+
block.arguments = throttled.value;
|
|
1959
|
+
block.lastParseLen = throttled.parsedLen;
|
|
1555
1960
|
}
|
|
1961
|
+
stream.push({
|
|
1962
|
+
type: "toolcall_delta",
|
|
1963
|
+
contentIndex: openBlock.contentIndex,
|
|
1964
|
+
delta: event.delta.partial_json,
|
|
1965
|
+
partial: output,
|
|
1966
|
+
});
|
|
1556
1967
|
} else if (event.delta.type === "signature_delta") {
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
block.thinkingSignature = block.thinkingSignature || "";
|
|
1561
|
-
block.thinkingSignature += event.delta.signature;
|
|
1968
|
+
if (openBlock.kind !== "thinking" || block?.type !== "thinking") {
|
|
1969
|
+
reportAnthropicEnvelopeAnomaly(`received signature_delta for ${openBlock.kind} block`);
|
|
1970
|
+
continue;
|
|
1562
1971
|
}
|
|
1972
|
+
streamedReplayUnsafeContent = true;
|
|
1973
|
+
block.thinkingSignature = block.thinkingSignature || "";
|
|
1974
|
+
block.thinkingSignature += event.delta.signature;
|
|
1563
1975
|
}
|
|
1564
1976
|
} else if (event.type === "content_block_stop") {
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
} else if (block.type === "toolCall") {
|
|
1584
|
-
block.arguments = parseStreamingJson(block.partialJson);
|
|
1585
|
-
delete (block as { partialJson?: string }).partialJson;
|
|
1586
|
-
delete (block as { lastParseLen?: number }).lastParseLen;
|
|
1587
|
-
stream.push({
|
|
1588
|
-
type: "toolcall_end",
|
|
1589
|
-
contentIndex: index,
|
|
1590
|
-
toolCall: block,
|
|
1591
|
-
partial: output,
|
|
1592
|
-
});
|
|
1593
|
-
}
|
|
1977
|
+
if (sawTerminalEnvelope) {
|
|
1978
|
+
reportAnthropicEnvelopeAnomaly(`received ${event.type} after terminal stop signal`);
|
|
1979
|
+
continue;
|
|
1980
|
+
}
|
|
1981
|
+
const openBlock = openBlocks.get(event.index);
|
|
1982
|
+
if (!openBlock) {
|
|
1983
|
+
reportAnthropicEnvelopeAnomaly(`received content_block_stop for unopened index ${event.index}`);
|
|
1984
|
+
continue;
|
|
1985
|
+
}
|
|
1986
|
+
if (openBlock.kind === "ignored") {
|
|
1987
|
+
openBlocks.delete(event.index);
|
|
1988
|
+
continue;
|
|
1989
|
+
}
|
|
1990
|
+
const block = blocks[openBlock.contentIndex];
|
|
1991
|
+
if (!block || block.type !== openBlock.kind) {
|
|
1992
|
+
reportAnthropicEnvelopeAnomaly(`content_block_stop kind mismatch for index ${event.index}`);
|
|
1993
|
+
openBlocks.delete(event.index);
|
|
1994
|
+
continue;
|
|
1594
1995
|
}
|
|
1996
|
+
openBlocks.delete(event.index);
|
|
1997
|
+
closedBlockIndexes.add(event.index);
|
|
1998
|
+
finalizeStreamBlock(block, openBlock.contentIndex);
|
|
1595
1999
|
} else if (event.type === "message_delta") {
|
|
1596
|
-
|
|
2000
|
+
if (sawTerminalEnvelope) {
|
|
2001
|
+
// A spliced reconnect's second envelope must not overwrite the
|
|
2002
|
+
// completed message's stop reason or usage.
|
|
2003
|
+
reportAnthropicEnvelopeAnomaly("received message_delta after terminal stop signal");
|
|
2004
|
+
continue;
|
|
2005
|
+
}
|
|
2006
|
+
const delta = event.delta;
|
|
2007
|
+
const rawStopReason = delta?.stop_reason;
|
|
1597
2008
|
if (rawStopReason) {
|
|
1598
2009
|
output.stopReason = mapStopReason(rawStopReason);
|
|
1599
2010
|
sawTerminalEnvelope = true;
|
|
1600
2011
|
}
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
? "
|
|
1616
|
-
:
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
}
|
|
1621
|
-
if (event.usage.output_tokens != null) {
|
|
1622
|
-
output.usage.output = event.usage.output_tokens;
|
|
1623
|
-
}
|
|
1624
|
-
if (event.usage.cache_read_input_tokens != null) {
|
|
1625
|
-
output.usage.cacheRead = event.usage.cache_read_input_tokens;
|
|
2012
|
+
if (output.stopReason === "error") {
|
|
2013
|
+
const stopDetails = delta?.stop_details;
|
|
2014
|
+
output.stopDetails = stopDetails ?? (rawStopReason ? { type: rawStopReason } : null);
|
|
2015
|
+
if (stopDetails?.type === "refusal") {
|
|
2016
|
+
const explanation = stopDetails.explanation?.trim();
|
|
2017
|
+
const category = stopDetails.category;
|
|
2018
|
+
const label = category ? `Refusal (${category})` : "Refusal";
|
|
2019
|
+
output.errorMessage = explanation ? `${label}: ${explanation}` : label;
|
|
2020
|
+
} else if (!output.errorMessage) {
|
|
2021
|
+
// Anthropic flagged an error-class stop (refusal / sensitive) without
|
|
2022
|
+
// populating stop_details. Surface the raw reason instead of falling
|
|
2023
|
+
// through to the generic "unknown error" string when we throw below.
|
|
2024
|
+
output.errorMessage =
|
|
2025
|
+
rawStopReason === "refusal"
|
|
2026
|
+
? "Refusal (no details provided)"
|
|
2027
|
+
: rawStopReason === "sensitive"
|
|
2028
|
+
? "Content flagged by safety filters"
|
|
2029
|
+
: `Anthropic stream ended with stop_reason: ${rawStopReason ?? "unknown"}`;
|
|
2030
|
+
}
|
|
1626
2031
|
}
|
|
1627
|
-
|
|
1628
|
-
|
|
2032
|
+
const deltaUsage = event.usage;
|
|
2033
|
+
if (deltaUsage) {
|
|
2034
|
+
if (deltaUsage.input_tokens != null) {
|
|
2035
|
+
output.usage.input = deltaUsage.input_tokens;
|
|
2036
|
+
}
|
|
2037
|
+
if (deltaUsage.output_tokens != null) {
|
|
2038
|
+
output.usage.output = deltaUsage.output_tokens;
|
|
2039
|
+
}
|
|
2040
|
+
if (deltaUsage.cache_read_input_tokens != null) {
|
|
2041
|
+
output.usage.cacheRead = deltaUsage.cache_read_input_tokens;
|
|
2042
|
+
}
|
|
2043
|
+
if (deltaUsage.cache_creation_input_tokens != null) {
|
|
2044
|
+
output.usage.cacheWrite = deltaUsage.cache_creation_input_tokens;
|
|
2045
|
+
}
|
|
2046
|
+
applyAnthropicUsageExtras(output.usage, deltaUsage);
|
|
2047
|
+
output.usage.totalTokens =
|
|
2048
|
+
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
2049
|
+
calculateCost(model, output.usage);
|
|
1629
2050
|
}
|
|
1630
|
-
applyAnthropicUsageExtras(output.usage, event.usage);
|
|
1631
|
-
output.usage.totalTokens =
|
|
1632
|
-
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
|
1633
|
-
calculateCost(model, output.usage);
|
|
1634
2051
|
} else if (event.type === "message_stop") {
|
|
1635
2052
|
sawTerminalEnvelope = true;
|
|
2053
|
+
sawMessageStop = true;
|
|
1636
2054
|
}
|
|
1637
2055
|
}
|
|
1638
2056
|
|
|
@@ -1646,8 +2064,19 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1646
2064
|
if (!sawEvent || !sawMessageStart) {
|
|
1647
2065
|
throw createAnthropicStreamEnvelopeError("stream ended before message_start");
|
|
1648
2066
|
}
|
|
1649
|
-
if (!
|
|
1650
|
-
|
|
2067
|
+
if (!sawMessageStop) {
|
|
2068
|
+
reportAnthropicEnvelopeAnomaly("stream ended before message_stop");
|
|
2069
|
+
}
|
|
2070
|
+
if (openBlocks.size > 0) {
|
|
2071
|
+
for (const [openIndex, openBlock] of openBlocks) {
|
|
2072
|
+
reportAnthropicEnvelopeAnomaly(
|
|
2073
|
+
`stream ended with an unterminated ${openBlock.kind} block at index ${openIndex}`,
|
|
2074
|
+
);
|
|
2075
|
+
if (openBlock.kind === "ignored" || openBlock.contentIndex < 0) continue;
|
|
2076
|
+
const danglingBlock = blocks[openBlock.contentIndex];
|
|
2077
|
+
if (danglingBlock) finalizeStreamBlock(danglingBlock, openBlock.contentIndex);
|
|
2078
|
+
}
|
|
2079
|
+
openBlocks.clear();
|
|
1651
2080
|
}
|
|
1652
2081
|
|
|
1653
2082
|
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
|
@@ -1662,8 +2091,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1662
2091
|
hasStrictAnthropicTools(params) &&
|
|
1663
2092
|
isAnthropicStrictGrammarTooLargeError(streamFailure)
|
|
1664
2093
|
) {
|
|
1665
|
-
|
|
1666
|
-
|
|
2094
|
+
// Log-only: the retried turn must not carry an errorMessage on
|
|
2095
|
+
// success (consumers treat its presence as failure).
|
|
2096
|
+
logger.warn("anthropic: strict tool grammar rejected, retrying without strict tools", {
|
|
2097
|
+
model: model.id,
|
|
2098
|
+
error: await finalizeErrorMessage(streamFailure, rawRequestDump),
|
|
2099
|
+
});
|
|
1667
2100
|
if (providerSessionState) {
|
|
1668
2101
|
providerSessionState.strictToolsDisabled = true;
|
|
1669
2102
|
}
|
|
@@ -1672,6 +2105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1672
2105
|
providerRetryAttempt = 0;
|
|
1673
2106
|
output.content.length = 0;
|
|
1674
2107
|
output.responseId = undefined;
|
|
2108
|
+
output.errorMessage = undefined;
|
|
1675
2109
|
output.providerPayload = undefined;
|
|
1676
2110
|
output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
|
|
1677
2111
|
output.stopReason = "stop";
|
|
@@ -1696,6 +2130,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1696
2130
|
providerRetryAttempt = 0;
|
|
1697
2131
|
output.content.length = 0;
|
|
1698
2132
|
output.responseId = undefined;
|
|
2133
|
+
output.errorMessage = undefined;
|
|
1699
2134
|
output.providerPayload = undefined;
|
|
1700
2135
|
output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
|
|
1701
2136
|
output.stopReason = "stop";
|
|
@@ -1721,7 +2156,13 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1721
2156
|
throw streamFailure;
|
|
1722
2157
|
}
|
|
1723
2158
|
providerRetryAttempt++;
|
|
1724
|
-
const
|
|
2159
|
+
const backoffDelayMs = calculateAnthropicRetryDelayMs(providerRetryAttempt - 1);
|
|
2160
|
+
// Honor the server's retry hint (`retry-after-ms`/`retry-after`) on
|
|
2161
|
+
// 429/529-style failures: retrying sooner than the server asked is a
|
|
2162
|
+
// guaranteed failure that just burns the retry budget.
|
|
2163
|
+
const headerDelayMs =
|
|
2164
|
+
streamFailure instanceof AnthropicApiError ? retryDelayFromHeaders(streamFailure.headers) : undefined;
|
|
2165
|
+
const delayMs = headerDelayMs !== undefined ? Math.max(headerDelayMs, backoffDelayMs) : backoffDelayMs;
|
|
1725
2166
|
if (options?.providerRetryWait) {
|
|
1726
2167
|
await options.providerRetryWait(delayMs, options.signal);
|
|
1727
2168
|
} else {
|
|
@@ -1729,14 +2170,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1729
2170
|
}
|
|
1730
2171
|
output.content.length = 0;
|
|
1731
2172
|
output.responseId = undefined;
|
|
1732
|
-
output.errorMessage =
|
|
2173
|
+
output.errorMessage = undefined;
|
|
2174
|
+
output.stopDetails = undefined;
|
|
1733
2175
|
output.providerPayload = undefined;
|
|
1734
2176
|
output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
|
|
1735
2177
|
output.stopReason = "stop";
|
|
1736
2178
|
firstTokenTime = undefined;
|
|
1737
2179
|
}
|
|
1738
2180
|
}
|
|
1739
|
-
|
|
1740
2181
|
output.duration = Date.now() - startTime;
|
|
1741
2182
|
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
1742
2183
|
if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
|
|
@@ -1753,8 +2194,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1753
2194
|
const firstEventTimeoutError = activeAbortTracker.getLocalAbortReason();
|
|
1754
2195
|
output.stopReason = activeAbortTracker.wasCallerAbort() ? "aborted" : "error";
|
|
1755
2196
|
output.errorStatus = extractHttpStatusFromError(error);
|
|
1756
|
-
|
|
1757
|
-
|
|
2197
|
+
try {
|
|
2198
|
+
output.errorMessage =
|
|
2199
|
+
firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
|
|
2200
|
+
output.errorMessage = rewriteCopilotError(output.errorMessage, error, model.provider);
|
|
2201
|
+
} catch {
|
|
2202
|
+
// finalizeErrorMessage must never take the stream down with it — a
|
|
2203
|
+
// throw here would skip stream.end() and hang result() forever.
|
|
2204
|
+
output.errorMessage = error instanceof Error ? error.message : String(error);
|
|
2205
|
+
}
|
|
1758
2206
|
output.duration = Date.now() - startTime;
|
|
1759
2207
|
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
1760
2208
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
@@ -1782,12 +2230,11 @@ function applyClaudeCodeSystemCache(
|
|
|
1782
2230
|
blocks: AnthropicSystemBlock[],
|
|
1783
2231
|
cacheControl: AnthropicCacheControl | undefined,
|
|
1784
2232
|
): number {
|
|
1785
|
-
if (!cacheControl || blocks.length
|
|
1786
|
-
blocks[2] = { ...blocks[2], cache_control: cacheControl };
|
|
1787
|
-
if (blocks.length === 3) return 1;
|
|
2233
|
+
if (!cacheControl || blocks.length === 0) return 0;
|
|
1788
2234
|
const lastIndex = blocks.length - 1;
|
|
1789
|
-
blocks[lastIndex]
|
|
1790
|
-
|
|
2235
|
+
if (blocks[lastIndex].cache_control != null) return 0;
|
|
2236
|
+
blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cloneAnthropicCacheControl(cacheControl) };
|
|
2237
|
+
return 1;
|
|
1791
2238
|
}
|
|
1792
2239
|
|
|
1793
2240
|
export function buildAnthropicSystemBlocks(
|
|
@@ -1797,7 +2244,7 @@ export function buildAnthropicSystemBlocks(
|
|
|
1797
2244
|
const { includeClaudeCodeInstruction = false, extraInstructions = [], firstUserMessageText, cacheControl } = options;
|
|
1798
2245
|
const sanitizedPrompts = normalizeSystemPrompts(systemPrompt);
|
|
1799
2246
|
const trimmedInstructions = extraInstructions.map(instruction => instruction.trim()).filter(Boolean);
|
|
1800
|
-
const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.
|
|
2247
|
+
const hasBillingHeader = sanitizedPrompts.some(prompt => prompt.startsWith(CLAUDE_BILLING_HEADER_PREFIX));
|
|
1801
2248
|
|
|
1802
2249
|
if (includeClaudeCodeInstruction && !hasBillingHeader) {
|
|
1803
2250
|
const blocks: AnthropicSystemBlock[] = [
|
|
@@ -1824,8 +2271,8 @@ export function buildAnthropicSystemBlocks(
|
|
|
1824
2271
|
blocks.push({ type: "text", text: prompt });
|
|
1825
2272
|
}
|
|
1826
2273
|
const lastIndex = blocks.length - 1;
|
|
1827
|
-
if (cacheControl && lastIndex >= 0) {
|
|
1828
|
-
blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cacheControl };
|
|
2274
|
+
if (cacheControl && lastIndex >= 0 && blocks[lastIndex].cache_control == null) {
|
|
2275
|
+
blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cloneAnthropicCacheControl(cacheControl) };
|
|
1829
2276
|
}
|
|
1830
2277
|
return blocks.length > 0 ? blocks : undefined;
|
|
1831
2278
|
}
|
|
@@ -1849,30 +2296,36 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1849
2296
|
thinkingEnabled = false,
|
|
1850
2297
|
thinkingDisplay,
|
|
1851
2298
|
isOAuth,
|
|
1852
|
-
onSseEvent,
|
|
1853
2299
|
claudeCodeSessionId,
|
|
1854
2300
|
} = args;
|
|
1855
|
-
const compat =
|
|
1856
|
-
const needsInterleavedBeta = interleavedThinking && !
|
|
2301
|
+
const compat = model.compat;
|
|
2302
|
+
const needsInterleavedBeta = interleavedThinking && !model.thinking?.supportsDisplay;
|
|
1857
2303
|
const needsFineGrainedToolStreamingBeta = hasTools && !compat.supportsEagerToolInputStreaming;
|
|
1858
2304
|
const oauthToken = isOAuth ?? isAnthropicOAuthToken(apiKey);
|
|
1859
2305
|
const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
|
|
1860
2306
|
const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
|
|
1861
2307
|
const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
|
|
2308
|
+
// Disable Bun's native ~300s pre-response fetch timeout (issue #2422).
|
|
2309
|
+
// `AnthropicMessagesClient` already arms its own DEFAULT_TIMEOUT_MS timer
|
|
2310
|
+
// per request, so the native ceiling can only short-circuit slow-prefill
|
|
2311
|
+
// streams before the configured watchdog gets to govern them.
|
|
2312
|
+
const fetchOptions: AnthropicFetchOptions = { ...(tlsFetchOptions ?? {}), timeout: false };
|
|
1862
2313
|
const baseFetch = args.fetch ?? fetch;
|
|
1863
2314
|
// Only OAuth requests inject the CC billing header; no API-key request can ever
|
|
1864
2315
|
// contain it, so there is no need to install the rewriter for those.
|
|
1865
2316
|
const cchFetch = oauthToken ? wrapFetchForCch(baseFetch) : baseFetch;
|
|
1866
|
-
const debugFetch = onSseEvent ? wrapFetchForSseDebug(cchFetch, event => onSseEvent(event, model)) : cchFetch;
|
|
1867
2317
|
if (model.provider === "github-copilot") {
|
|
1868
2318
|
const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
|
|
2319
|
+
// The GitHub Copilot Anthropic proxy doesn't accept Anthropic beta
|
|
2320
|
+
// features (and the catalog already forces `supportsEagerToolInputStreaming
|
|
2321
|
+
// = false` for this host, so `needsFineGrainedToolStreamingBeta` is true
|
|
2322
|
+
// whenever tools are present). Forward only caller-supplied betas.
|
|
1869
2323
|
const betaFeatures = [...extraBetas];
|
|
1870
|
-
if (needsFineGrainedToolStreamingBeta) {
|
|
1871
|
-
betaFeatures.push(fineGrainedToolStreamingBeta);
|
|
1872
|
-
}
|
|
1873
2324
|
const defaultHeaders = mergeHeaders(
|
|
1874
2325
|
{
|
|
1875
2326
|
Accept: stream ? "text/event-stream" : "application/json",
|
|
2327
|
+
"Content-Type": "application/json",
|
|
2328
|
+
"anthropic-version": "2023-06-01",
|
|
1876
2329
|
"Anthropic-Dangerous-Direct-Browser-Access": "true",
|
|
1877
2330
|
Authorization: `Bearer ${copilotApiKey}`,
|
|
1878
2331
|
...(betaFeatures.length > 0 ? { "anthropic-beta": buildBetaHeader([], betaFeatures) } : {}),
|
|
@@ -1889,8 +2342,8 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1889
2342
|
baseURL: baseUrl,
|
|
1890
2343
|
maxRetries: 5,
|
|
1891
2344
|
defaultHeaders,
|
|
1892
|
-
fetch:
|
|
1893
|
-
|
|
2345
|
+
fetch: cchFetch,
|
|
2346
|
+
fetchOptions,
|
|
1894
2347
|
};
|
|
1895
2348
|
}
|
|
1896
2349
|
|
|
@@ -1924,7 +2377,8 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1924
2377
|
baseURL: baseUrl,
|
|
1925
2378
|
maxRetries: 5,
|
|
1926
2379
|
defaultHeaders,
|
|
1927
|
-
fetch:
|
|
2380
|
+
fetch: cchFetch,
|
|
2381
|
+
fetchOptions,
|
|
1928
2382
|
};
|
|
1929
2383
|
}
|
|
1930
2384
|
|
|
@@ -1940,11 +2394,10 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1940
2394
|
baseURL: baseUrl,
|
|
1941
2395
|
maxRetries: 5,
|
|
1942
2396
|
defaultHeaders,
|
|
1943
|
-
|
|
1944
|
-
|
|
2397
|
+
fetch: cchFetch,
|
|
2398
|
+
fetchOptions,
|
|
1945
2399
|
};
|
|
1946
2400
|
}
|
|
1947
|
-
|
|
1948
2401
|
// OpenCode Zen's Anthropic-compatible gateway accepts bearer auth only;
|
|
1949
2402
|
// leaving apiKey set lets the client add X-Api-Key, which upstream Alibaba rejects.
|
|
1950
2403
|
if (model.provider === "opencode-zen") {
|
|
@@ -1955,20 +2408,27 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1955
2408
|
baseURL: baseUrl,
|
|
1956
2409
|
maxRetries: 5,
|
|
1957
2410
|
defaultHeaders,
|
|
1958
|
-
|
|
1959
|
-
|
|
2411
|
+
fetch: cchFetch,
|
|
2412
|
+
fetchOptions,
|
|
1960
2413
|
};
|
|
1961
2414
|
}
|
|
1962
2415
|
|
|
2416
|
+
const authorizationHeader = getHeaderCaseInsensitive(defaultHeaders, "Authorization");
|
|
2417
|
+
const shouldSuppressClientApiKey =
|
|
2418
|
+
!oauthToken &&
|
|
2419
|
+
!model.compat.officialEndpoint &&
|
|
2420
|
+
typeof authorizationHeader === "string" &&
|
|
2421
|
+
/^Bearer\s+/i.test(authorizationHeader);
|
|
2422
|
+
|
|
1963
2423
|
return {
|
|
1964
2424
|
isOAuthToken: oauthToken,
|
|
1965
|
-
apiKey: oauthToken ? null : apiKey,
|
|
2425
|
+
apiKey: oauthToken || shouldSuppressClientApiKey ? null : apiKey,
|
|
1966
2426
|
authToken: oauthToken ? apiKey : undefined,
|
|
1967
2427
|
baseURL: baseUrl,
|
|
1968
2428
|
maxRetries: 5,
|
|
1969
2429
|
defaultHeaders,
|
|
1970
|
-
fetch:
|
|
1971
|
-
|
|
2430
|
+
fetch: cchFetch,
|
|
2431
|
+
fetchOptions,
|
|
1972
2432
|
};
|
|
1973
2433
|
}
|
|
1974
2434
|
|
|
@@ -1987,6 +2447,7 @@ function disableThinkingIfToolChoiceForced(params: MessageCreateParamsStreaming)
|
|
|
1987
2447
|
if (toolChoice.type !== "any" && toolChoice.type !== "tool") return;
|
|
1988
2448
|
|
|
1989
2449
|
delete params.thinking;
|
|
2450
|
+
delete params.context_management;
|
|
1990
2451
|
const outputConfig = params.output_config as AnthropicOutputConfig | undefined;
|
|
1991
2452
|
if (!outputConfig) return;
|
|
1992
2453
|
|
|
@@ -1996,18 +2457,29 @@ function disableThinkingIfToolChoiceForced(params: MessageCreateParamsStreaming)
|
|
|
1996
2457
|
}
|
|
1997
2458
|
}
|
|
1998
2459
|
|
|
1999
|
-
function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming,
|
|
2460
|
+
function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, maxAllowedTokens: number): void {
|
|
2000
2461
|
const thinking = params.thinking;
|
|
2001
2462
|
if (thinking?.type !== "enabled") return;
|
|
2002
2463
|
|
|
2003
2464
|
const budgetTokens = thinking.budget_tokens ?? 0;
|
|
2004
2465
|
if (budgetTokens <= 0) return;
|
|
2005
2466
|
|
|
2006
|
-
const
|
|
2007
|
-
const
|
|
2008
|
-
|
|
2009
|
-
|
|
2467
|
+
const currentMaxTokens = Math.min(params.max_tokens ?? maxAllowedTokens, maxAllowedTokens);
|
|
2468
|
+
const raisedMaxTokens = Math.min(
|
|
2469
|
+
Math.max(currentMaxTokens, budgetTokens + OUTPUT_FALLBACK_BUFFER),
|
|
2470
|
+
maxAllowedTokens,
|
|
2471
|
+
);
|
|
2472
|
+
params.max_tokens = raisedMaxTokens;
|
|
2473
|
+
|
|
2474
|
+
if (budgetTokens + OUTPUT_FALLBACK_BUFFER <= raisedMaxTokens) return;
|
|
2475
|
+
|
|
2476
|
+
const clampedBudget = raisedMaxTokens - OUTPUT_FALLBACK_BUFFER;
|
|
2477
|
+
if (clampedBudget <= 0) {
|
|
2478
|
+
throw new Error(
|
|
2479
|
+
`Anthropic thinking budget requires max_tokens greater than ${OUTPUT_FALLBACK_BUFFER}; got ${raisedMaxTokens}`,
|
|
2480
|
+
);
|
|
2010
2481
|
}
|
|
2482
|
+
thinking.budget_tokens = clampedBudget;
|
|
2011
2483
|
}
|
|
2012
2484
|
|
|
2013
2485
|
type CacheControlBlock = {
|
|
@@ -2017,39 +2489,44 @@ type CacheControlBlock = {
|
|
|
2017
2489
|
function applyCacheControlToLastBlock<T extends CacheControlBlock>(
|
|
2018
2490
|
blocks: T[],
|
|
2019
2491
|
cacheControl: AnthropicCacheControl,
|
|
2020
|
-
):
|
|
2021
|
-
if (blocks.length === 0) return;
|
|
2492
|
+
): boolean {
|
|
2493
|
+
if (blocks.length === 0) return false;
|
|
2022
2494
|
const lastIndex = blocks.length - 1;
|
|
2023
|
-
blocks[lastIndex]
|
|
2495
|
+
if (blocks[lastIndex].cache_control != null) return false;
|
|
2496
|
+
blocks[lastIndex] = { ...blocks[lastIndex], cache_control: cloneAnthropicCacheControl(cacheControl) };
|
|
2497
|
+
return true;
|
|
2024
2498
|
}
|
|
2025
2499
|
|
|
2026
2500
|
function applyCacheControlToLastTextBlock(
|
|
2027
2501
|
blocks: Array<ContentBlockParam & CacheControlBlock>,
|
|
2028
2502
|
cacheControl: AnthropicCacheControl,
|
|
2029
|
-
):
|
|
2030
|
-
if (blocks.length === 0) return;
|
|
2503
|
+
): boolean {
|
|
2504
|
+
if (blocks.length === 0) return false;
|
|
2031
2505
|
for (let i = blocks.length - 1; i >= 0; i--) {
|
|
2032
2506
|
if (blocks[i].type === "text") {
|
|
2033
|
-
blocks[i]
|
|
2034
|
-
|
|
2507
|
+
if (blocks[i].cache_control != null) return false;
|
|
2508
|
+
blocks[i] = { ...blocks[i], cache_control: cloneAnthropicCacheControl(cacheControl) };
|
|
2509
|
+
return true;
|
|
2035
2510
|
}
|
|
2036
2511
|
}
|
|
2037
|
-
|
|
2512
|
+
// No text block — fall back to the last block that accepts cache_control;
|
|
2513
|
+
// thinking/redacted_thinking blocks reject the field with a 400.
|
|
2514
|
+
for (let i = blocks.length - 1; i >= 0; i--) {
|
|
2515
|
+
const type = blocks[i].type;
|
|
2516
|
+
if (type === "thinking" || type === "redacted_thinking") continue;
|
|
2517
|
+
if (blocks[i].cache_control != null) return false;
|
|
2518
|
+
blocks[i] = { ...blocks[i], cache_control: cloneAnthropicCacheControl(cacheControl) };
|
|
2519
|
+
return true;
|
|
2520
|
+
}
|
|
2521
|
+
return false;
|
|
2038
2522
|
}
|
|
2039
2523
|
|
|
2040
2524
|
function applyPromptCaching(params: MessageCreateParamsStreaming, cacheControl?: AnthropicCacheControl): void {
|
|
2041
2525
|
if (!cacheControl) return;
|
|
2042
2526
|
|
|
2043
|
-
// Skip if cache_control breakpoints were already placed externally on messages.
|
|
2044
|
-
for (const message of params.messages) {
|
|
2045
|
-
if (Array.isArray(message.content)) {
|
|
2046
|
-
if ((message.content as Array<ContentBlockParam & CacheControlBlock>).some(b => b.cache_control != null))
|
|
2047
|
-
return;
|
|
2048
|
-
}
|
|
2049
|
-
}
|
|
2050
|
-
|
|
2051
2527
|
const MAX_CACHE_BREAKPOINTS = 4;
|
|
2052
|
-
let cacheBreakpointsUsed =
|
|
2528
|
+
let cacheBreakpointsUsed = countCacheControlBreakpoints(params);
|
|
2529
|
+
if (cacheBreakpointsUsed >= MAX_CACHE_BREAKPOINTS) return;
|
|
2053
2530
|
let isCCLayout = false;
|
|
2054
2531
|
|
|
2055
2532
|
if (params.system && Array.isArray(params.system) && params.system.length > 0) {
|
|
@@ -2057,9 +2534,12 @@ function applyPromptCaching(params: MessageCreateParamsStreaming, cacheControl?:
|
|
|
2057
2534
|
params.system.length >= 3 &&
|
|
2058
2535
|
(params.system[0] as { text?: string }).text?.startsWith(CLAUDE_BILLING_HEADER_PREFIX) === true;
|
|
2059
2536
|
if (isCCLayout) {
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2537
|
+
const placed = Math.min(
|
|
2538
|
+
MAX_CACHE_BREAKPOINTS - cacheBreakpointsUsed,
|
|
2539
|
+
applyClaudeCodeSystemCache(params.system as AnthropicSystemBlock[], cacheControl),
|
|
2540
|
+
);
|
|
2541
|
+
cacheBreakpointsUsed += placed;
|
|
2542
|
+
} else if (applyCacheControlToLastBlock(params.system, cacheControl)) {
|
|
2063
2543
|
cacheBreakpointsUsed++;
|
|
2064
2544
|
}
|
|
2065
2545
|
}
|
|
@@ -2072,14 +2552,19 @@ function applyPromptCaching(params: MessageCreateParamsStreaming, cacheControl?:
|
|
|
2072
2552
|
const message = params.messages[i];
|
|
2073
2553
|
if (!message) continue;
|
|
2074
2554
|
if (typeof message.content === "string") {
|
|
2075
|
-
message.content = [
|
|
2555
|
+
message.content = [
|
|
2556
|
+
{ type: "text", text: message.content, cache_control: cloneAnthropicCacheControl(cacheControl) },
|
|
2557
|
+
];
|
|
2076
2558
|
cacheBreakpointsUsed++;
|
|
2077
2559
|
} else if (Array.isArray(message.content) && message.content.length > 0) {
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2560
|
+
if (
|
|
2561
|
+
applyCacheControlToLastTextBlock(
|
|
2562
|
+
message.content as Array<ContentBlockParam & CacheControlBlock>,
|
|
2563
|
+
cacheControl,
|
|
2564
|
+
)
|
|
2565
|
+
) {
|
|
2566
|
+
cacheBreakpointsUsed++;
|
|
2567
|
+
}
|
|
2083
2568
|
}
|
|
2084
2569
|
}
|
|
2085
2570
|
}
|
|
@@ -2092,7 +2577,9 @@ function normalizeCacheControlBlockTtl(block: CacheControlBlock, seenFiveMinute:
|
|
|
2092
2577
|
return;
|
|
2093
2578
|
}
|
|
2094
2579
|
if (seenFiveMinute.value) {
|
|
2095
|
-
|
|
2580
|
+
const normalized = cloneAnthropicCacheControl(cacheControl);
|
|
2581
|
+
delete normalized.ttl;
|
|
2582
|
+
block.cache_control = normalized;
|
|
2096
2583
|
}
|
|
2097
2584
|
}
|
|
2098
2585
|
|
|
@@ -2222,139 +2709,163 @@ function resolveAnthropicAdaptiveEffort(
|
|
|
2222
2709
|
return mapEffortToAnthropicAdaptiveEffort(model, requestedEffort);
|
|
2223
2710
|
}
|
|
2224
2711
|
|
|
2225
|
-
function startsWithAfterAsciiWhitespace(value: string, prefix: string): boolean {
|
|
2226
|
-
let index = 0;
|
|
2227
|
-
while (index < value.length) {
|
|
2228
|
-
const code = value.charCodeAt(index);
|
|
2229
|
-
if (code !== 9 && code !== 10 && code !== 13 && code !== 32) break;
|
|
2230
|
-
index++;
|
|
2231
|
-
}
|
|
2232
|
-
return value.startsWith(prefix, index);
|
|
2233
|
-
}
|
|
2234
|
-
|
|
2235
|
-
function isClaudeSyntheticUserText(value: string): boolean {
|
|
2236
|
-
return startsWithAfterAsciiWhitespace(value, "<system-reminder>");
|
|
2237
|
-
}
|
|
2238
|
-
|
|
2239
2712
|
function extractClaudeCodeFirstUserMessageText(messages: readonly Message[]): string {
|
|
2240
2713
|
for (const message of messages) {
|
|
2241
2714
|
if (message.role !== "user") continue;
|
|
2242
2715
|
const { content } = message;
|
|
2243
2716
|
if (typeof content === "string") return content;
|
|
2244
2717
|
if (!Array.isArray(content)) return "";
|
|
2245
|
-
let fallback: string | undefined;
|
|
2246
2718
|
for (const block of content) {
|
|
2247
|
-
if (block.type
|
|
2248
|
-
fallback ??= block.text;
|
|
2249
|
-
if (!isClaudeSyntheticUserText(block.text)) return block.text;
|
|
2719
|
+
if (block.type === "text") return block.text;
|
|
2250
2720
|
}
|
|
2251
|
-
return
|
|
2721
|
+
return "";
|
|
2252
2722
|
}
|
|
2253
2723
|
return "";
|
|
2254
2724
|
}
|
|
2255
2725
|
|
|
2256
|
-
function applyClaudeCodeContextManagement(params: MessageCreateParamsStreaming, isOAuthToken: boolean): void {
|
|
2257
|
-
if (!isOAuthToken || params.thinking?.type !== "adaptive") return;
|
|
2258
|
-
params.context_management = {
|
|
2259
|
-
edits: [{ type: "clear_thinking_20251015", keep: "all" }],
|
|
2260
|
-
};
|
|
2261
|
-
}
|
|
2262
|
-
|
|
2263
2726
|
function buildParams(
|
|
2264
2727
|
model: Model<"anthropic-messages">,
|
|
2265
|
-
baseUrl: string,
|
|
2266
2728
|
context: Context,
|
|
2267
2729
|
isOAuthToken: boolean,
|
|
2268
2730
|
options?: AnthropicOptions,
|
|
2269
2731
|
disableStrictTools = false,
|
|
2270
2732
|
): MessageCreateParamsStreaming {
|
|
2271
|
-
const { cacheControl } = getCacheControl(model,
|
|
2272
|
-
const params: MessageCreateParamsStreaming = {
|
|
2273
|
-
model: model.id,
|
|
2274
|
-
messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
|
|
2275
|
-
max_tokens: options?.maxTokens || model.maxTokens,
|
|
2276
|
-
stream: true,
|
|
2277
|
-
};
|
|
2278
|
-
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
|
|
2279
|
-
params.temperature = options.temperature;
|
|
2280
|
-
}
|
|
2281
|
-
|
|
2282
|
-
if (options?.topP !== undefined) {
|
|
2283
|
-
params.top_p = options.topP;
|
|
2284
|
-
}
|
|
2285
|
-
if (options?.topK !== undefined) {
|
|
2286
|
-
params.top_k = options.topK;
|
|
2287
|
-
}
|
|
2288
|
-
if (options?.stopSequences?.length) {
|
|
2289
|
-
const seqs = options.stopSequences;
|
|
2290
|
-
if (seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX && !warnedStopSequencesTrim) {
|
|
2291
|
-
warnedStopSequencesTrim = true;
|
|
2292
|
-
logger.warn("anthropic: stop_sequences exceeds 4; extra entries dropped", {
|
|
2293
|
-
received: seqs.length,
|
|
2294
|
-
kept: ANTHROPIC_STOP_SEQUENCES_MAX,
|
|
2295
|
-
});
|
|
2296
|
-
}
|
|
2297
|
-
params.stop_sequences =
|
|
2298
|
-
seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX ? seqs.slice(0, ANTHROPIC_STOP_SEQUENCES_MAX) : seqs;
|
|
2299
|
-
}
|
|
2733
|
+
const { cacheControl } = getCacheControl(model, options?.cacheRetention, isOAuthToken);
|
|
2300
2734
|
|
|
2301
|
-
//
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2306
|
-
|
|
2735
|
+
// Pre-compute system blocks so they occupy the right slot in the serialized body.
|
|
2736
|
+
const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
|
|
2737
|
+
const firstUserMessageText = shouldInjectClaudeCodeInstruction
|
|
2738
|
+
? extractClaudeCodeFirstUserMessageText(context.messages)
|
|
2739
|
+
: "";
|
|
2740
|
+
const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
|
|
2741
|
+
includeClaudeCodeInstruction: shouldInjectClaudeCodeInstruction,
|
|
2742
|
+
firstUserMessageText,
|
|
2743
|
+
});
|
|
2307
2744
|
|
|
2745
|
+
// Pre-compute tools.
|
|
2746
|
+
let tools: AnthropicWireTool[] | undefined;
|
|
2308
2747
|
if (context.tools) {
|
|
2309
|
-
|
|
2748
|
+
tools = convertTools(
|
|
2310
2749
|
context.tools,
|
|
2311
2750
|
isOAuthToken,
|
|
2312
2751
|
disableStrictTools || model.provider === "github-copilot",
|
|
2313
|
-
|
|
2752
|
+
model.compat.supportsEagerToolInputStreaming,
|
|
2314
2753
|
);
|
|
2315
2754
|
} else if (isOAuthToken) {
|
|
2316
|
-
|
|
2755
|
+
tools = [];
|
|
2317
2756
|
}
|
|
2318
2757
|
|
|
2758
|
+
// Pre-compute metadata.
|
|
2759
|
+
const metadataAccountId = readAnthropicMetadataAccountId(options?.metadata);
|
|
2760
|
+
const metadataUserId = resolveAnthropicMetadataUserId(
|
|
2761
|
+
options?.metadata?.user_id,
|
|
2762
|
+
isOAuthToken,
|
|
2763
|
+
options?.sessionId,
|
|
2764
|
+
metadataAccountId,
|
|
2765
|
+
);
|
|
2766
|
+
const metadata = metadataUserId ? { user_id: metadataUserId } : undefined;
|
|
2767
|
+
|
|
2768
|
+
// Pre-compute thinking + output_config effort.
|
|
2769
|
+
let thinking: MessageCreateParamsStreaming["thinking"] | undefined;
|
|
2770
|
+
let outputConfigEffort: AnthropicEffort | undefined;
|
|
2319
2771
|
if (model.reasoning) {
|
|
2320
2772
|
if (options?.thinkingEnabled) {
|
|
2321
2773
|
const mode = model.thinking?.mode;
|
|
2322
2774
|
const effort = resolveAnthropicAdaptiveEffort(model, options);
|
|
2323
|
-
|
|
2324
|
-
const compat = getAnthropicCompat(model);
|
|
2775
|
+
const compat = model.compat;
|
|
2325
2776
|
if (mode === "anthropic-adaptive" && !compat.disableAdaptiveThinking) {
|
|
2326
2777
|
const adaptive: { type: "adaptive"; display?: AnthropicThinkingDisplay } = { type: "adaptive" };
|
|
2327
|
-
// Starting with Claude Opus 4.7
|
|
2328
|
-
// response by default. Opt into summarized
|
|
2329
|
-
// streaming with human-readable content for
|
|
2330
|
-
|
|
2778
|
+
// Starting with Claude Opus 4.7 and Claude Fable/Mythos 5, adaptive thinking
|
|
2779
|
+
// content is omitted from the response by default. Opt into summarized
|
|
2780
|
+
// reasoning so thinking deltas keep streaming with human-readable content for
|
|
2781
|
+
// callers that rely on it. The `display` field is gated strictly on model
|
|
2782
|
+
// support: Opus 4.6 / Sonnet 4.6+ reject it with a 400, so an explicit
|
|
2783
|
+
// `thinkingDisplay` MUST NOT force it onto a model that can't accept it.
|
|
2784
|
+
if (model.thinking?.supportsDisplay) {
|
|
2331
2785
|
adaptive.display = options.thinkingDisplay ?? "summarized";
|
|
2332
2786
|
}
|
|
2333
|
-
|
|
2334
|
-
if (effort)
|
|
2335
|
-
getAnthropicOutputConfig(params).effort = effort;
|
|
2336
|
-
}
|
|
2787
|
+
thinking = adaptive;
|
|
2788
|
+
if (effort) outputConfigEffort = effort;
|
|
2337
2789
|
} else {
|
|
2338
|
-
|
|
2790
|
+
thinking = {
|
|
2339
2791
|
type: "enabled",
|
|
2340
2792
|
budget_tokens: options.thinkingBudgetTokens || 1024,
|
|
2341
2793
|
display: options.thinkingDisplay ?? "summarized",
|
|
2342
2794
|
};
|
|
2343
|
-
if (mode === "anthropic-budget-effort" && effort)
|
|
2344
|
-
getAnthropicOutputConfig(params).effort = effort;
|
|
2345
|
-
}
|
|
2795
|
+
if (mode === "anthropic-budget-effort" && effort) outputConfigEffort = effort;
|
|
2346
2796
|
}
|
|
2347
2797
|
} else if (options?.thinkingEnabled === false) {
|
|
2348
|
-
|
|
2798
|
+
const compat = model.compat;
|
|
2799
|
+
if (model.thinking?.mode === "anthropic-adaptive" && !compat.disableAdaptiveThinking) {
|
|
2800
|
+
// Adaptive-only Claude models (Opus 4.6+, Sonnet 4.6+, Fable/Mythos 5) reject
|
|
2801
|
+
// `thinking.type: "disabled"` — adaptive thinking cannot be switched off.
|
|
2802
|
+
// Omit the thinking field (the API defaults to adaptive) and pin the
|
|
2803
|
+
// lowest effort so "thinking off" calls stay cheap instead of failing
|
|
2804
|
+
// the request with a 400 (a hidden-thinking toggle must never break it).
|
|
2805
|
+
outputConfigEffort = "low";
|
|
2806
|
+
} else {
|
|
2807
|
+
thinking = { type: "disabled" };
|
|
2808
|
+
}
|
|
2349
2809
|
}
|
|
2350
2810
|
}
|
|
2351
2811
|
|
|
2352
|
-
|
|
2353
|
-
|
|
2812
|
+
// Pre-compute context_management (depends on thinking).
|
|
2813
|
+
const contextManagement =
|
|
2814
|
+
isOAuthToken && thinking?.type === "adaptive"
|
|
2815
|
+
? { edits: [{ type: "clear_thinking_20251015" as const, keep: "all" as const }] }
|
|
2816
|
+
: undefined;
|
|
2817
|
+
|
|
2818
|
+
// Pre-compute output_config.
|
|
2819
|
+
const outputConfigEntries: AnthropicOutputConfig = {};
|
|
2820
|
+
if (outputConfigEffort) outputConfigEntries.effort = outputConfigEffort;
|
|
2821
|
+
if (options?.taskBudget) outputConfigEntries.task_budget = options.taskBudget;
|
|
2822
|
+
const outputConfig = Object.keys(outputConfigEntries).length ? outputConfigEntries : undefined;
|
|
2823
|
+
|
|
2824
|
+
// Claude Code requests at most 64k output tokens; clamp only OAuth requests,
|
|
2825
|
+
// where the wire fingerprint must match. API-key callers keep the full model
|
|
2826
|
+
// ceiling (e.g. 128k on Opus 4.8).
|
|
2827
|
+
const modelMaxTokens = model.maxTokens ?? CLAUDE_CODE_MAX_OUTPUT_TOKENS;
|
|
2828
|
+
const maxOutputTokens = isOAuthToken ? Math.min(CLAUDE_CODE_MAX_OUTPUT_TOKENS, modelMaxTokens) : modelMaxTokens;
|
|
2829
|
+
|
|
2830
|
+
// Build params in the canonical field order: model → messages → system → tools →
|
|
2831
|
+
// metadata → max_tokens → thinking → context_management → output_config → stream.
|
|
2832
|
+
const params: MessageCreateParamsStreaming = {
|
|
2833
|
+
model: options?.requestModelId ?? model.requestModelId ?? model.id,
|
|
2834
|
+
messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
|
|
2835
|
+
...(systemBlocks && { system: systemBlocks }),
|
|
2836
|
+
...(tools !== undefined && { tools }),
|
|
2837
|
+
...(metadata && { metadata }),
|
|
2838
|
+
max_tokens: Math.min(maxOutputTokens, options?.maxTokens || modelMaxTokens),
|
|
2839
|
+
...(thinking && { thinking }),
|
|
2840
|
+
...(contextManagement && { context_management: contextManagement }),
|
|
2841
|
+
...(outputConfig && { output_config: outputConfig }),
|
|
2842
|
+
stream: true,
|
|
2843
|
+
};
|
|
2844
|
+
|
|
2845
|
+
// Opus 4.7+ and Fable/Mythos 5 reject non-default sampling parameters with 400 error.
|
|
2846
|
+
const thinkingType = params.thinking?.type;
|
|
2847
|
+
const allowSamplingParams =
|
|
2848
|
+
model.compat.supportsSamplingParams && (thinkingType === undefined || thinkingType === "disabled");
|
|
2849
|
+
if (allowSamplingParams && options?.temperature !== undefined) {
|
|
2850
|
+
params.temperature = options.temperature;
|
|
2354
2851
|
}
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2852
|
+
if (allowSamplingParams && options?.topP !== undefined) {
|
|
2853
|
+
params.top_p = options.topP;
|
|
2854
|
+
}
|
|
2855
|
+
if (allowSamplingParams && options?.topK !== undefined) {
|
|
2856
|
+
params.top_k = options.topK;
|
|
2857
|
+
}
|
|
2858
|
+
if (options?.stopSequences?.length) {
|
|
2859
|
+
const seqs = options.stopSequences;
|
|
2860
|
+
if (seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX && !warnedStopSequencesTrim) {
|
|
2861
|
+
warnedStopSequencesTrim = true;
|
|
2862
|
+
logger.warn("anthropic: stop_sequences exceeds 4; extra entries dropped", {
|
|
2863
|
+
received: seqs.length,
|
|
2864
|
+
kept: ANTHROPIC_STOP_SEQUENCES_MAX,
|
|
2865
|
+
});
|
|
2866
|
+
}
|
|
2867
|
+
params.stop_sequences =
|
|
2868
|
+
seqs.length > ANTHROPIC_STOP_SEQUENCES_MAX ? seqs.slice(0, ANTHROPIC_STOP_SEQUENCES_MAX) : seqs;
|
|
2358
2869
|
}
|
|
2359
2870
|
|
|
2360
2871
|
if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
|
|
@@ -2369,37 +2880,18 @@ function buildParams(
|
|
|
2369
2880
|
} else {
|
|
2370
2881
|
params.tool_choice = options.toolChoice;
|
|
2371
2882
|
}
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
// rather than mutated so the caller's `options.toolChoice` is never aliased.
|
|
2380
|
-
if (disablesParallelToolUse(model.id) && params.tools && params.tools.length > 0) {
|
|
2381
|
-
const current = params.tool_choice;
|
|
2382
|
-
if (!current) {
|
|
2383
|
-
params.tool_choice = { type: "auto", disable_parallel_tool_use: true };
|
|
2384
|
-
} else if (current.type !== "none") {
|
|
2385
|
-
params.tool_choice = { ...current, disable_parallel_tool_use: true };
|
|
2883
|
+
// Claude Fable/Mythos 5 reject forced tool use outright ("tool_choice forces
|
|
2884
|
+
// tool use is not compatible with this model"). Downgrade any/tool → auto so the
|
|
2885
|
+
// request succeeds; the tool stays available and the caller's prompt steers
|
|
2886
|
+
// the model toward it.
|
|
2887
|
+
const choiceType = params.tool_choice?.type;
|
|
2888
|
+
if ((choiceType === "any" || choiceType === "tool") && !model.compat.supportsForcedToolChoice) {
|
|
2889
|
+
params.tool_choice = { type: "auto" };
|
|
2386
2890
|
}
|
|
2387
2891
|
}
|
|
2388
2892
|
|
|
2389
|
-
const shouldInjectClaudeCodeInstruction = isOAuthToken && !model.id.startsWith("claude-3-5-haiku");
|
|
2390
|
-
const firstUserMessageText = shouldInjectClaudeCodeInstruction
|
|
2391
|
-
? extractClaudeCodeFirstUserMessageText(context.messages)
|
|
2392
|
-
: "";
|
|
2393
|
-
const systemBlocks = buildAnthropicSystemBlocks(context.systemPrompt, {
|
|
2394
|
-
includeClaudeCodeInstruction: shouldInjectClaudeCodeInstruction,
|
|
2395
|
-
firstUserMessageText,
|
|
2396
|
-
});
|
|
2397
|
-
if (systemBlocks) {
|
|
2398
|
-
params.system = systemBlocks;
|
|
2399
|
-
}
|
|
2400
2893
|
disableThinkingIfToolChoiceForced(params);
|
|
2401
|
-
|
|
2402
|
-
ensureMaxTokensForThinking(params, model);
|
|
2894
|
+
ensureMaxTokensForThinking(params, maxOutputTokens);
|
|
2403
2895
|
applyPromptCaching(params, cacheControl);
|
|
2404
2896
|
enforceCacheControlLimit(params, 4);
|
|
2405
2897
|
normalizeCacheControlTtlOrdering(params);
|
|
@@ -2407,52 +2899,50 @@ function buildParams(
|
|
|
2407
2899
|
return params;
|
|
2408
2900
|
}
|
|
2409
2901
|
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
* polluting requests to api.anthropic.com or other compatible proxies.
|
|
2416
|
-
* See: https://github.com/uttamtrivedi/Prometheus/issues/814
|
|
2417
|
-
*/
|
|
2418
|
-
function isZaiAnthropicEndpoint(model: Model<"anthropic-messages">): boolean {
|
|
2419
|
-
if (model.provider === "zai") return true;
|
|
2420
|
-
const baseUrl = model.baseUrl;
|
|
2421
|
-
if (!baseUrl) return false;
|
|
2422
|
-
try {
|
|
2423
|
-
return new URL(baseUrl).hostname.toLowerCase() === "api.z.ai";
|
|
2424
|
-
} catch {
|
|
2425
|
-
return false;
|
|
2902
|
+
const EMPTY_ERROR_TOOL_RESULT_TEXT = "Tool failed with no output.";
|
|
2903
|
+
|
|
2904
|
+
function isEmptyToolResultWireContent(content: AnthropicToolResultContent): boolean {
|
|
2905
|
+
if (typeof content === "string") {
|
|
2906
|
+
return content.trim().length === 0;
|
|
2426
2907
|
}
|
|
2908
|
+
return content.length === 0;
|
|
2427
2909
|
}
|
|
2428
2910
|
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
|
|
2432
|
-
|
|
2433
|
-
|
|
2434
|
-
|
|
2435
|
-
function isNonSigningAnthropicEndpoint(model: Model<"anthropic-messages">): boolean {
|
|
2436
|
-
// Known non-signing providers
|
|
2437
|
-
if (model.provider === "zai" || model.provider === "deepseek") return true;
|
|
2438
|
-
const baseUrl = model.baseUrl;
|
|
2439
|
-
if (!baseUrl) return false;
|
|
2440
|
-
try {
|
|
2441
|
-
const hostname = new URL(baseUrl).hostname.toLowerCase();
|
|
2442
|
-
return hostname === "api.deepseek.com" || hostname.endsWith(".deepseek.com");
|
|
2443
|
-
} catch {
|
|
2444
|
-
return false;
|
|
2911
|
+
function ensureErrorToolResultWireContent(
|
|
2912
|
+
content: AnthropicToolResultContent,
|
|
2913
|
+
isError: boolean | undefined,
|
|
2914
|
+
): AnthropicToolResultContent {
|
|
2915
|
+
if (!isError || !isEmptyToolResultWireContent(content)) {
|
|
2916
|
+
return content;
|
|
2445
2917
|
}
|
|
2918
|
+
return typeof content === "string"
|
|
2919
|
+
? EMPTY_ERROR_TOOL_RESULT_TEXT
|
|
2920
|
+
: [{ type: "text", text: EMPTY_ERROR_TOOL_RESULT_TEXT }];
|
|
2446
2921
|
}
|
|
2447
2922
|
|
|
2448
|
-
function buildToolResultBlock(
|
|
2923
|
+
function buildToolResultBlock(
|
|
2924
|
+
model: Model<"anthropic-messages">,
|
|
2925
|
+
msg: ToolResultMessage,
|
|
2926
|
+
hoistedImages: ContentBlockParam[],
|
|
2927
|
+
): ContentBlockParam {
|
|
2928
|
+
let content = convertContentBlocks(msg.content, model.input.includes("image"));
|
|
2929
|
+
// Anthropic rejects images inside error tool results ("all content must be
|
|
2930
|
+
// type `text` if `is_error` is true") — keep the text in the block and
|
|
2931
|
+
// hoist the images after the message's tool_result run.
|
|
2932
|
+
if (msg.isError && typeof content !== "string" && content.some(block => block.type === "image")) {
|
|
2933
|
+
for (const block of content) {
|
|
2934
|
+
if (block.type === "image") hoistedImages.push(block);
|
|
2935
|
+
}
|
|
2936
|
+
content = content.filter(block => block.type === "text");
|
|
2937
|
+
}
|
|
2938
|
+
content = ensureErrorToolResultWireContent(content, msg.isError);
|
|
2449
2939
|
const block: ContentBlockParam = {
|
|
2450
2940
|
type: "tool_result",
|
|
2451
2941
|
tool_use_id: msg.toolCallId,
|
|
2452
|
-
content
|
|
2942
|
+
content,
|
|
2453
2943
|
is_error: msg.isError,
|
|
2454
2944
|
};
|
|
2455
|
-
if (
|
|
2945
|
+
if (model.compat.requiresToolResultId) {
|
|
2456
2946
|
// Z.AI workaround (issue #814): include `id` aliased to `tool_use_id`.
|
|
2457
2947
|
(block as unknown as Record<string, unknown>).id = msg.toolCallId;
|
|
2458
2948
|
}
|
|
@@ -2461,20 +2951,51 @@ function buildToolResultBlock(model: Model<"anthropic-messages">, msg: ToolResul
|
|
|
2461
2951
|
|
|
2462
2952
|
/**
|
|
2463
2953
|
* A single Anthropic conversation turn, including the mid-conversation
|
|
2464
|
-
* `system` role (Opus 4.8+).
|
|
2954
|
+
* `system` role (Opus 4.8+ and Fable/Mythos 5).
|
|
2465
2955
|
*/
|
|
2466
2956
|
export type AnthropicMessageParam = MessageParam;
|
|
2467
2957
|
|
|
2958
|
+
/**
|
|
2959
|
+
* Recursively replace lone surrogates in string leaves. Identity-preserving:
|
|
2960
|
+
* returns the input object/array when nothing changed.
|
|
2961
|
+
*/
|
|
2962
|
+
function toWellFormedDeep(value: unknown): unknown {
|
|
2963
|
+
if (typeof value === "string") {
|
|
2964
|
+
const wellFormed = value.toWellFormed();
|
|
2965
|
+
return wellFormed === value ? value : wellFormed;
|
|
2966
|
+
}
|
|
2967
|
+
if (Array.isArray(value)) {
|
|
2968
|
+
let changed = false;
|
|
2969
|
+
const next = value.map(entry => {
|
|
2970
|
+
const sanitized = toWellFormedDeep(entry);
|
|
2971
|
+
if (sanitized !== entry) changed = true;
|
|
2972
|
+
return sanitized;
|
|
2973
|
+
});
|
|
2974
|
+
return changed ? next : value;
|
|
2975
|
+
}
|
|
2976
|
+
if (isRecord(value)) {
|
|
2977
|
+
let changed = false;
|
|
2978
|
+
const next: Record<string, unknown> = {};
|
|
2979
|
+
for (const [key, entry] of Object.entries(value)) {
|
|
2980
|
+
const sanitized = toWellFormedDeep(entry);
|
|
2981
|
+
if (sanitized !== entry) changed = true;
|
|
2982
|
+
next[key] = sanitized;
|
|
2983
|
+
}
|
|
2984
|
+
return changed ? next : value;
|
|
2985
|
+
}
|
|
2986
|
+
return value;
|
|
2987
|
+
}
|
|
2988
|
+
|
|
2468
2989
|
export function convertAnthropicMessages(
|
|
2469
2990
|
messages: Message[],
|
|
2470
2991
|
model: Model<"anthropic-messages">,
|
|
2471
2992
|
isOAuthToken: boolean,
|
|
2472
2993
|
): AnthropicMessageParam[] {
|
|
2473
|
-
const params: AnthropicMessageParam[] = [];
|
|
2474
2994
|
// Indices of params emitted from `developer` messages. After the main pass,
|
|
2475
2995
|
// the ones whose placement satisfies Anthropic's mid-conversation rules are
|
|
2476
2996
|
// upgraded from the `user` role to the authoritative `system` role.
|
|
2477
2997
|
const developerParamIndices: number[] = [];
|
|
2998
|
+
const params: AnthropicMessageParam[] = [];
|
|
2478
2999
|
|
|
2479
3000
|
const transformedMessages = transformMessages(messages, model, normalizeToolCallId);
|
|
2480
3001
|
|
|
@@ -2533,7 +3054,7 @@ export function convertAnthropicMessages(
|
|
|
2533
3054
|
}
|
|
2534
3055
|
if (block.thinking.trim().length === 0) continue;
|
|
2535
3056
|
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
|
|
2536
|
-
if (
|
|
3057
|
+
if (model.compat.replayUnsignedThinking) {
|
|
2537
3058
|
blocks.push({
|
|
2538
3059
|
type: "thinking",
|
|
2539
3060
|
thinking: block.thinking.toWellFormed(),
|
|
@@ -2563,7 +3084,12 @@ export function convertAnthropicMessages(
|
|
|
2563
3084
|
type: "tool_use",
|
|
2564
3085
|
id: block.id,
|
|
2565
3086
|
name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
|
|
2566
|
-
|
|
3087
|
+
// Always sanitize: the model itself can emit lone-surrogate escapes
|
|
3088
|
+
// in tool-argument JSON (streamed out fine, rejected with a 400 on
|
|
3089
|
+
// replay by Anthropic's strict UTF-8 validation). toWellFormedDeep
|
|
3090
|
+
// is identity-preserving, so well-formed arguments stay
|
|
3091
|
+
// byte-identical and prompt-cache prefixes are unaffected.
|
|
3092
|
+
input: toWellFormedDeep(block.arguments ?? {}),
|
|
2567
3093
|
});
|
|
2568
3094
|
}
|
|
2569
3095
|
}
|
|
@@ -2575,21 +3101,30 @@ export function convertAnthropicMessages(
|
|
|
2575
3101
|
} else if (msg.role === "toolResult") {
|
|
2576
3102
|
// Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint
|
|
2577
3103
|
const toolResults: ContentBlockParam[] = [];
|
|
3104
|
+
// Images stripped out of error tool results, re-attached after the run.
|
|
3105
|
+
const hoistedImages: ContentBlockParam[] = [];
|
|
2578
3106
|
|
|
2579
3107
|
// Add the current tool result
|
|
2580
|
-
toolResults.push(buildToolResultBlock(model, msg));
|
|
3108
|
+
toolResults.push(buildToolResultBlock(model, msg, hoistedImages));
|
|
2581
3109
|
|
|
2582
3110
|
// Look ahead for consecutive toolResult messages
|
|
2583
3111
|
let j = i + 1;
|
|
2584
3112
|
while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
|
|
2585
3113
|
const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
|
|
2586
|
-
toolResults.push(buildToolResultBlock(model, nextMsg));
|
|
3114
|
+
toolResults.push(buildToolResultBlock(model, nextMsg, hoistedImages));
|
|
2587
3115
|
j++;
|
|
2588
3116
|
}
|
|
2589
3117
|
|
|
2590
3118
|
// Skip the messages we've already processed
|
|
2591
3119
|
i = j - 1;
|
|
2592
3120
|
|
|
3121
|
+
if (hoistedImages.length > 0) {
|
|
3122
|
+
toolResults.push(
|
|
3123
|
+
{ type: "text", text: "Attached image(s) from the tool result(s) above:" },
|
|
3124
|
+
...hoistedImages,
|
|
3125
|
+
);
|
|
3126
|
+
}
|
|
3127
|
+
|
|
2593
3128
|
// Add a single user message with all tool results
|
|
2594
3129
|
params.push({
|
|
2595
3130
|
role: "user",
|
|
@@ -2599,22 +3134,34 @@ export function convertAnthropicMessages(
|
|
|
2599
3134
|
}
|
|
2600
3135
|
|
|
2601
3136
|
// Upgrade developer-origin params to mid-conversation `system` messages where
|
|
2602
|
-
// Anthropic's placement rules allow it (Opus 4.8+ on
|
|
3137
|
+
// Anthropic's placement rules allow it (Opus 4.8+ / Fable/Mythos 5 on first-party API).
|
|
2603
3138
|
// Rules: a system message must immediately follow a `user` turn and must be
|
|
2604
3139
|
// the last entry or be followed by an `assistant` turn — never first, and
|
|
2605
3140
|
// never consecutive. Requiring the next param to be `assistant` (or absent)
|
|
2606
3141
|
// covers both the "followed by assistant / last" and "no consecutive system"
|
|
2607
3142
|
// constraints. Anything that does not qualify stays a `user` message.
|
|
2608
|
-
if (developerParamIndices.length > 0 &&
|
|
3143
|
+
if (developerParamIndices.length > 0 && model.compat.supportsMidConversationSystem) {
|
|
2609
3144
|
for (const idx of developerParamIndices) {
|
|
2610
3145
|
const followsUser = idx > 0 && params[idx - 1]?.role === "user";
|
|
2611
3146
|
const next = params[idx + 1];
|
|
2612
3147
|
const lastOrBeforeAssistant = idx === params.length - 1 || next?.role === "assistant";
|
|
2613
|
-
|
|
2614
|
-
|
|
3148
|
+
// System content is text-only on the wire; a developer turn carrying
|
|
3149
|
+
// image blocks must stay a `user` message or the API rejects it.
|
|
3150
|
+
const content = params[idx].content;
|
|
3151
|
+
const textOnly = typeof content === "string" || content.every(block => block.type === "text");
|
|
3152
|
+
if (followsUser && lastOrBeforeAssistant && textOnly) {
|
|
3153
|
+
params[idx] = { role: "system", content };
|
|
2615
3154
|
}
|
|
2616
3155
|
}
|
|
2617
3156
|
}
|
|
3157
|
+
// Dropped empty user/developer turns can leave two assistant params adjacent;
|
|
3158
|
+
// the API rejects consecutive assistant messages. Repair with the same neutral
|
|
3159
|
+
// nudge used for trailing-assistant prefill below.
|
|
3160
|
+
for (let i = params.length - 1; i > 0; i--) {
|
|
3161
|
+
if (params[i].role === "assistant" && params[i - 1]?.role === "assistant") {
|
|
3162
|
+
params.splice(i, 0, { role: "user", content: "Continue." });
|
|
3163
|
+
}
|
|
3164
|
+
}
|
|
2618
3165
|
if (params.length > 0 && params[params.length - 1]?.role === "assistant") {
|
|
2619
3166
|
params.push({ role: "user", content: "Continue." });
|
|
2620
3167
|
}
|
|
@@ -2683,6 +3230,7 @@ function isJsonSchemaArrayNode(schema: Record<string, unknown>): boolean {
|
|
|
2683
3230
|
const t = schema.type;
|
|
2684
3231
|
if (t === "array") return true;
|
|
2685
3232
|
if (Array.isArray(t) && t.includes("array") && !t.includes("object")) return true;
|
|
3233
|
+
if (schema.items !== undefined || Array.isArray(schema.prefixItems)) return true;
|
|
2686
3234
|
return false;
|
|
2687
3235
|
}
|
|
2688
3236
|
|
|
@@ -2709,6 +3257,13 @@ function pickAnthropicScalarType(type: unknown): string | undefined {
|
|
|
2709
3257
|
}
|
|
2710
3258
|
return undefined;
|
|
2711
3259
|
}
|
|
3260
|
+
function pickAnthropicEffectiveScalarType(schema: Record<string, unknown>): string | undefined {
|
|
3261
|
+
const explicit = pickAnthropicScalarType(schema.type);
|
|
3262
|
+
if (explicit) return explicit;
|
|
3263
|
+
if (isRecord(schema.properties)) return "object";
|
|
3264
|
+
if (schema.items !== undefined || Array.isArray(schema.prefixItems)) return "array";
|
|
3265
|
+
return undefined;
|
|
3266
|
+
}
|
|
2712
3267
|
|
|
2713
3268
|
function anthropicPerTypeKeep(scalarType: string | undefined): Set<string> | undefined {
|
|
2714
3269
|
switch (scalarType) {
|
|
@@ -2723,14 +3278,6 @@ function anthropicPerTypeKeep(scalarType: string | undefined): Set<string> | und
|
|
|
2723
3278
|
}
|
|
2724
3279
|
}
|
|
2725
3280
|
|
|
2726
|
-
/**
|
|
2727
|
-
* Per-schema-object memoization slot for the normalized Anthropic tool form. We stamp
|
|
2728
|
-
* the result onto the host via a `Symbol` property (mirroring `utils/schema/stamps.ts`)
|
|
2729
|
-
* instead of using a `WeakMap`: it's a single hidden-class slot, so warm reads are
|
|
2730
|
-
* direct property access and write-once cycles resolve to the in-progress result.
|
|
2731
|
-
*/
|
|
2732
|
-
const kAnthropicToolNormal = Symbol("pi.schema.anthropic.toolNormal");
|
|
2733
|
-
|
|
2734
3281
|
/**
|
|
2735
3282
|
* Normalize a JSON Schema node for Anthropic tool `input_schema`.
|
|
2736
3283
|
*
|
|
@@ -2751,20 +3298,20 @@ const kAnthropicToolNormal = Symbol("pi.schema.anthropic.toolNormal");
|
|
|
2751
3298
|
* pass downstream demotes those shapes to non-strict instead of fabricating a closed
|
|
2752
3299
|
* object, so callers like the resolve tool keep working open-map semantics.
|
|
2753
3300
|
*/
|
|
2754
|
-
|
|
2755
|
-
|
|
3301
|
+
function normalizeAnthropicToolSchemaNode(
|
|
3302
|
+
schema: unknown,
|
|
3303
|
+
cache: WeakMap<Record<string, unknown>, Record<string, unknown>>,
|
|
3304
|
+
): unknown {
|
|
3305
|
+
if (Array.isArray(schema)) return schema.map(entry => normalizeAnthropicToolSchemaNode(entry, cache));
|
|
2756
3306
|
if (!isRecord(schema)) return schema;
|
|
2757
3307
|
|
|
2758
|
-
const
|
|
2759
|
-
const existing = slot[kAnthropicToolNormal];
|
|
3308
|
+
const existing = cache.get(schema);
|
|
2760
3309
|
if (existing !== undefined) return existing;
|
|
2761
3310
|
|
|
2762
3311
|
const result: Record<string, unknown> = {};
|
|
2763
|
-
|
|
2764
|
-
// (mirrors the WeakMap-set-before-recurse pattern the original implementation used).
|
|
2765
|
-
Object.defineProperty(schema, kAnthropicToolNormal, { value: result, writable: true, configurable: true });
|
|
3312
|
+
cache.set(schema, result);
|
|
2766
3313
|
|
|
2767
|
-
const scalarType =
|
|
3314
|
+
const scalarType = pickAnthropicEffectiveScalarType(schema);
|
|
2768
3315
|
const perTypeKeep = anthropicPerTypeKeep(scalarType);
|
|
2769
3316
|
const spill: Array<[string, unknown]> = [];
|
|
2770
3317
|
|
|
@@ -2803,12 +3350,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
|
|
|
2803
3350
|
const sourceProperties = result.properties as Record<string, unknown>;
|
|
2804
3351
|
for (const propName in sourceProperties) {
|
|
2805
3352
|
if (!Object.hasOwn(sourceProperties, propName)) continue;
|
|
2806
|
-
normalizedProperties[propName] =
|
|
3353
|
+
normalizedProperties[propName] = normalizeAnthropicToolSchemaNode(sourceProperties[propName], cache);
|
|
2807
3354
|
}
|
|
2808
3355
|
result.properties = normalizedProperties;
|
|
2809
3356
|
}
|
|
2810
3357
|
if (isRecord(result.additionalProperties)) {
|
|
2811
|
-
const normalized =
|
|
3358
|
+
const normalized = normalizeAnthropicToolSchemaNode(result.additionalProperties, cache);
|
|
2812
3359
|
if (isRecord(normalized) && Object.keys(normalized).length === 0) {
|
|
2813
3360
|
result.additionalProperties = true;
|
|
2814
3361
|
} else {
|
|
@@ -2816,17 +3363,17 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
|
|
|
2816
3363
|
}
|
|
2817
3364
|
}
|
|
2818
3365
|
if (Array.isArray(result.items)) {
|
|
2819
|
-
result.items = result.items.map(item =>
|
|
3366
|
+
result.items = result.items.map(item => normalizeAnthropicToolSchemaNode(item, cache));
|
|
2820
3367
|
} else if (isRecord(result.items)) {
|
|
2821
|
-
result.items =
|
|
3368
|
+
result.items = normalizeAnthropicToolSchemaNode(result.items, cache);
|
|
2822
3369
|
}
|
|
2823
3370
|
if (Array.isArray(result.prefixItems)) {
|
|
2824
|
-
result.prefixItems = result.prefixItems.map(item =>
|
|
3371
|
+
result.prefixItems = result.prefixItems.map(item => normalizeAnthropicToolSchemaNode(item, cache));
|
|
2825
3372
|
}
|
|
2826
3373
|
for (const key of COMBINATOR_KEYS) {
|
|
2827
3374
|
const variants = result[key];
|
|
2828
3375
|
if (Array.isArray(variants)) {
|
|
2829
|
-
result[key] = variants.map(variant =>
|
|
3376
|
+
result[key] = variants.map(variant => normalizeAnthropicToolSchemaNode(variant, cache));
|
|
2830
3377
|
}
|
|
2831
3378
|
}
|
|
2832
3379
|
for (const defsKey of ["$defs", "definitions"] as const) {
|
|
@@ -2836,7 +3383,7 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
|
|
|
2836
3383
|
const sourceDefs = definitions as Record<string, unknown>;
|
|
2837
3384
|
for (const name in sourceDefs) {
|
|
2838
3385
|
if (!Object.hasOwn(sourceDefs, name)) continue;
|
|
2839
|
-
normalizedDefs[name] =
|
|
3386
|
+
normalizedDefs[name] = normalizeAnthropicToolSchemaNode(sourceDefs[name], cache);
|
|
2840
3387
|
}
|
|
2841
3388
|
result[defsKey] = normalizedDefs;
|
|
2842
3389
|
}
|
|
@@ -2845,6 +3392,10 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
|
|
|
2845
3392
|
return result;
|
|
2846
3393
|
}
|
|
2847
3394
|
|
|
3395
|
+
export function normalizeAnthropicToolSchema(schema: unknown): unknown {
|
|
3396
|
+
return normalizeAnthropicToolSchemaNode(schema, new WeakMap());
|
|
3397
|
+
}
|
|
3398
|
+
|
|
2848
3399
|
type AnthropicToolSchemaPlan = {
|
|
2849
3400
|
inputSchema: AnthropicToolInputSchema;
|
|
2850
3401
|
strict: boolean;
|
|
@@ -2865,6 +3416,24 @@ function hasNullVariant(schema: Record<string, unknown>): boolean {
|
|
|
2865
3416
|
if (Array.isArray(schema.type) && schema.type.includes("null")) return true;
|
|
2866
3417
|
return Array.isArray(schema.anyOf) && schema.anyOf.some(variant => isRecord(variant) && variant.type === "null");
|
|
2867
3418
|
}
|
|
3419
|
+
function hasAnthropicSchemaDefiningKeyword(schema: Record<string, unknown>): boolean {
|
|
3420
|
+
if (
|
|
3421
|
+
schema.type !== undefined ||
|
|
3422
|
+
schema.properties !== undefined ||
|
|
3423
|
+
schema.additionalProperties !== undefined ||
|
|
3424
|
+
schema.items !== undefined ||
|
|
3425
|
+
schema.prefixItems !== undefined ||
|
|
3426
|
+
schema.enum !== undefined ||
|
|
3427
|
+
schema.const !== undefined ||
|
|
3428
|
+
schema.$ref !== undefined
|
|
3429
|
+
) {
|
|
3430
|
+
return true;
|
|
3431
|
+
}
|
|
3432
|
+
for (const key of COMBINATOR_KEYS) {
|
|
3433
|
+
if (schema[key] !== undefined) return true;
|
|
3434
|
+
}
|
|
3435
|
+
return schema.$defs !== undefined || schema.definitions !== undefined;
|
|
3436
|
+
}
|
|
2868
3437
|
|
|
2869
3438
|
function makeAnthropicNullableSchema(schema: unknown, budget: AnthropicStrictBudget): unknown | undefined {
|
|
2870
3439
|
if (isRecord(schema)) {
|
|
@@ -2903,6 +3472,8 @@ function normalizeAnthropicStrictSchemaNode(
|
|
|
2903
3472
|
const cached = cache.get(schema);
|
|
2904
3473
|
if (cached) return cached;
|
|
2905
3474
|
|
|
3475
|
+
if (!hasAnthropicSchemaDefiningKeyword(schema)) return undefined;
|
|
3476
|
+
|
|
2906
3477
|
// Strict tool use only supports closed objects. Open maps stay available on
|
|
2907
3478
|
// the non-strict schema plan instead of producing an Anthropic 400.
|
|
2908
3479
|
if (isJsonSchemaObjectNode(schema) && schema.additionalProperties !== false) {
|
|
@@ -2992,6 +3563,38 @@ function normalizeAnthropicStrictSchemaNode(
|
|
|
2992
3563
|
return result;
|
|
2993
3564
|
}
|
|
2994
3565
|
|
|
3566
|
+
const ANTHROPIC_STRICT_INCOMPATIBLE_KEYWORDS = [
|
|
3567
|
+
"oneOf",
|
|
3568
|
+
"allOf",
|
|
3569
|
+
"$ref",
|
|
3570
|
+
"patternProperties",
|
|
3571
|
+
"propertyNames",
|
|
3572
|
+
] as const;
|
|
3573
|
+
|
|
3574
|
+
/**
|
|
3575
|
+
* Anthropic's strict grammar subset supports anyOf/type-array unions only.
|
|
3576
|
+
* oneOf/allOf/$ref compile unpredictably (rejections arrive as 400s the
|
|
3577
|
+
* grammar-too-large fallback does not recognize, so they would hard-fail the
|
|
3578
|
+
* turn), and patternProperties/propertyNames describe open key sets that the
|
|
3579
|
+
* strict pipeline's injected `additionalProperties: false` would contradict.
|
|
3580
|
+
* Runs against the raw wire schema — the base normalizer spills several of
|
|
3581
|
+
* these keywords into the description, erasing the evidence.
|
|
3582
|
+
*/
|
|
3583
|
+
function hasAnthropicStrictIncompatibleKeyword(schema: unknown, seen = new Set<object>()): boolean {
|
|
3584
|
+
if (Array.isArray(schema)) {
|
|
3585
|
+
if (seen.has(schema)) return false;
|
|
3586
|
+
seen.add(schema);
|
|
3587
|
+
return schema.some(entry => hasAnthropicStrictIncompatibleKeyword(entry, seen));
|
|
3588
|
+
}
|
|
3589
|
+
if (!isRecord(schema)) return false;
|
|
3590
|
+
if (seen.has(schema)) return false;
|
|
3591
|
+
seen.add(schema);
|
|
3592
|
+
for (const keyword of ANTHROPIC_STRICT_INCOMPATIBLE_KEYWORDS) {
|
|
3593
|
+
if (schema[keyword] !== undefined) return true;
|
|
3594
|
+
}
|
|
3595
|
+
return Object.values(schema).some(value => hasAnthropicStrictIncompatibleKeyword(value, seen));
|
|
3596
|
+
}
|
|
3597
|
+
|
|
2995
3598
|
function normalizeAnthropicStrictSchema(
|
|
2996
3599
|
schema: Record<string, unknown>,
|
|
2997
3600
|
optionalRemaining: number,
|
|
@@ -3031,7 +3634,9 @@ function buildAnthropicToolSchemaPlans(tools: Tool[], disableStrictTools = false
|
|
|
3031
3634
|
|
|
3032
3635
|
const candidateIndexes = tools.flatMap((tool, index) => {
|
|
3033
3636
|
if (!ANTHROPIC_STRICT_TOOL_ALLOWLIST.has(tool.name)) return [];
|
|
3034
|
-
|
|
3637
|
+
if (tool.strict === false) return [];
|
|
3638
|
+
if (hasAnthropicStrictIncompatibleKeyword(toolWireSchema(tool))) return [];
|
|
3639
|
+
return [index];
|
|
3035
3640
|
});
|
|
3036
3641
|
|
|
3037
3642
|
let strictToolCount = 0;
|
|
@@ -3089,6 +3694,10 @@ function mapStopReason(reason: string): StopReason {
|
|
|
3089
3694
|
return "stop";
|
|
3090
3695
|
case "max_tokens":
|
|
3091
3696
|
return "length";
|
|
3697
|
+
// Generation ran into the model's context window (default behavior on
|
|
3698
|
+
// Sonnet 4.5+); the streamed content is valid, just truncated.
|
|
3699
|
+
case "model_context_window_exceeded":
|
|
3700
|
+
return "length";
|
|
3092
3701
|
case "tool_use":
|
|
3093
3702
|
return "toolUse";
|
|
3094
3703
|
case "refusal":
|
|
@@ -3096,11 +3705,15 @@ function mapStopReason(reason: string): StopReason {
|
|
|
3096
3705
|
case "pause_turn": // Stop is good enough -> resubmit
|
|
3097
3706
|
return "stop";
|
|
3098
3707
|
case "stop_sequence":
|
|
3099
|
-
return "stop"; //
|
|
3708
|
+
return "stop"; // A caller-supplied stop_sequences entry matched; the turn completed normally.
|
|
3100
3709
|
case "sensitive": // Content flagged by safety filters (not yet in SDK types)
|
|
3101
3710
|
return "error";
|
|
3102
3711
|
default:
|
|
3103
|
-
//
|
|
3104
|
-
|
|
3712
|
+
// New stop reasons ship server-side first ("sensitive",
|
|
3713
|
+
// "model_context_window_exceeded") and arrive on the trailing
|
|
3714
|
+
// message_delta after all content has streamed. Degrade to a normal
|
|
3715
|
+
// stop instead of failing the fully streamed turn.
|
|
3716
|
+
reportAnthropicEnvelopeAnomaly(`unhandled stop reason: ${reason}`);
|
|
3717
|
+
return "stop";
|
|
3105
3718
|
}
|
|
3106
3719
|
}
|