@prometheus-ai/ai 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -11,11 +11,12 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Activated when a {@link Model} has `transport: "prometheus-native"` set; the
|
|
13
13
|
* dispatch hook lives in `streamSimple()` (see `../stream.ts`). Used by
|
|
14
|
-
* containerized prometheus deployments (
|
|
14
|
+
* containerized prometheus deployments (isolated slots, the swarm extension) that
|
|
15
15
|
* route every LLM call through a credential-holding sidecar so the slot
|
|
16
16
|
* itself stays credential-free.
|
|
17
17
|
*/
|
|
18
18
|
import { readSseJson } from "@prometheus-ai/utils";
|
|
19
|
+
import { ProviderHttpError } from "../errors";
|
|
19
20
|
import type {
|
|
20
21
|
Api,
|
|
21
22
|
AssistantMessage,
|
|
@@ -58,7 +59,19 @@ function buildWireOptions(options: SimpleStreamOptions | undefined): Record<stri
|
|
|
58
59
|
return wire;
|
|
59
60
|
}
|
|
60
61
|
|
|
61
|
-
|
|
62
|
+
/**
|
|
63
|
+
* Non-2xx response from the auth-gateway `/v1/prometheus/stream` endpoint. `code`
|
|
64
|
+
* carries the gateway's error-type token (`authentication_error`,
|
|
65
|
+
* `rate_limit_error`, `upstream_error`, ...).
|
|
66
|
+
*/
|
|
67
|
+
export class AuthGatewayError extends ProviderHttpError {
|
|
68
|
+
constructor(message: string, status: number, headers?: Headers, code?: string) {
|
|
69
|
+
super(message, status, { headers, code });
|
|
70
|
+
this.name = "AuthGatewayError";
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async function decodeGatewayError(response: Response): Promise<AuthGatewayError> {
|
|
62
75
|
const status = response.status;
|
|
63
76
|
let body: unknown;
|
|
64
77
|
try {
|
|
@@ -71,16 +84,16 @@ async function decodeGatewayError(response: Response): Promise<Error> {
|
|
|
71
84
|
if (typeof err === "object" && err !== null) {
|
|
72
85
|
const message = (err as { message?: unknown }).message;
|
|
73
86
|
const type = (err as { type?: unknown }).type;
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
87
|
+
return new AuthGatewayError(
|
|
88
|
+
typeof message === "string" ? message : `auth-gateway ${status}`,
|
|
89
|
+
status,
|
|
90
|
+
response.headers,
|
|
91
|
+
typeof type === "string" ? type : undefined,
|
|
92
|
+
);
|
|
78
93
|
}
|
|
79
94
|
}
|
|
80
95
|
const text = typeof body === "string" ? body : JSON.stringify(body);
|
|
81
|
-
|
|
82
|
-
(err as { status?: number }).status = status;
|
|
83
|
-
return err;
|
|
96
|
+
return new AuthGatewayError(`auth-gateway ${status}: ${text || response.statusText}`, status, response.headers);
|
|
84
97
|
}
|
|
85
98
|
|
|
86
99
|
/**
|
|
@@ -149,9 +162,12 @@ export function streamPrometheusNative<TApi extends Api>(
|
|
|
149
162
|
try {
|
|
150
163
|
const url = resolveStreamUrl(model as Model<Api>);
|
|
151
164
|
const fetchImpl = options?.fetch ?? globalThis.fetch;
|
|
152
|
-
const headers = buildHeaders(
|
|
165
|
+
const headers = buildHeaders(
|
|
166
|
+
model as Model<Api>,
|
|
167
|
+
typeof options?.apiKey === "string" ? options.apiKey : undefined,
|
|
168
|
+
);
|
|
153
169
|
const body = JSON.stringify({
|
|
154
|
-
modelId: model.id
|
|
170
|
+
modelId: `${model.provider}/${model.id}`,
|
|
155
171
|
context,
|
|
156
172
|
options: buildWireOptions(options),
|
|
157
173
|
stream: true,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Where the OpenAI / Anthropic / Responses route modules translate foreign
|
|
5
5
|
* wire shapes through Prometheus AI's canonical {@link Context}, this module accepts
|
|
6
6
|
* the canonical shape *directly* — for clients that already speak Prometheus AI
|
|
7
|
-
* (containerized prometheus, the swarm extension,
|
|
7
|
+
* (containerized prometheus, the swarm extension, sidecar auth-gateways).
|
|
8
8
|
* Skipping the wire-format → Context → wire-format round-trip cuts
|
|
9
9
|
* per-request CPU but, more importantly, avoids the quantization that those
|
|
10
10
|
* translations impose on first-class Prometheus AI fields (service tier, cache
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
* 200 JSON (stream=false): { message: AssistantMessage }
|
|
26
26
|
* 4xx/5xx: { error: { type, message } }
|
|
27
27
|
*/
|
|
28
|
+
import type { AuthGatewayStreamControl } from "../auth-gateway/types";
|
|
28
29
|
import type { AssistantMessageEventStream, Context, SimpleStreamOptions } from "../types";
|
|
29
30
|
|
|
30
31
|
export interface PrometheusNativeParsedRequest {
|
|
@@ -156,36 +157,62 @@ const SSE_DONE = SSE_ENCODER.encode("data: [DONE]\n\n");
|
|
|
156
157
|
* canonical event type IS the wire type. Including the rolling
|
|
157
158
|
* `partial: AssistantMessage` on every delta is quadratic in turn length
|
|
158
159
|
* on the wire, but for the loopback / sidecar topology this transport
|
|
159
|
-
* targets (containerized prometheus → host gateway,
|
|
160
|
+
* targets (containerized prometheus → host gateway, isolated slot → prometheus-auth-gateway
|
|
160
161
|
* sidecar) the bandwidth cost is negligible compared to provider latency —
|
|
161
162
|
* and the client gets to feed the events straight into its existing
|
|
162
163
|
* `AssistantMessageEventStream.push()` plumbing with zero translation.
|
|
163
164
|
*/
|
|
164
|
-
export function encodeStream(
|
|
165
|
+
export function encodeStream(
|
|
166
|
+
events: AssistantMessageEventStream,
|
|
167
|
+
_requestedModelId?: string,
|
|
168
|
+
_options?: SimpleStreamOptions,
|
|
169
|
+
control?: AuthGatewayStreamControl,
|
|
170
|
+
): ReadableStream<Uint8Array> {
|
|
171
|
+
let cancelled = control?.signal?.aborted === true;
|
|
172
|
+
const markCancelled = () => {
|
|
173
|
+
cancelled = true;
|
|
174
|
+
};
|
|
175
|
+
control?.signal?.addEventListener("abort", markCancelled, { once: true });
|
|
165
176
|
return new ReadableStream<Uint8Array>({
|
|
166
177
|
async start(controller) {
|
|
167
178
|
try {
|
|
179
|
+
if (cancelled) {
|
|
180
|
+
controller.close();
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
168
183
|
for await (const event of events) {
|
|
184
|
+
if (cancelled) return;
|
|
169
185
|
controller.enqueue(SSE_ENCODER.encode(`data: ${JSON.stringify(event)}\n\n`));
|
|
170
186
|
if (event.type === "done" || event.type === "error") break;
|
|
171
187
|
}
|
|
172
|
-
|
|
173
|
-
|
|
188
|
+
if (!cancelled) {
|
|
189
|
+
controller.enqueue(SSE_DONE);
|
|
190
|
+
controller.close();
|
|
191
|
+
}
|
|
174
192
|
} catch (err) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
193
|
+
if (!cancelled) {
|
|
194
|
+
// Best-effort error envelope so the client iterator resolves
|
|
195
|
+
// instead of hanging on the dropped connection. Shape matches the
|
|
196
|
+
// canonical `error` event minus the unrecoverable `error:
|
|
197
|
+
// AssistantMessage` payload (we don't have a usable one here).
|
|
198
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
199
|
+
controller.enqueue(
|
|
200
|
+
SSE_ENCODER.encode(
|
|
201
|
+
`data: ${JSON.stringify({ type: "error", reason: "error", errorMessage: message })}\n\n`,
|
|
202
|
+
),
|
|
203
|
+
);
|
|
204
|
+
controller.enqueue(SSE_DONE);
|
|
205
|
+
controller.close();
|
|
206
|
+
}
|
|
207
|
+
} finally {
|
|
208
|
+
control?.signal?.removeEventListener("abort", markCancelled);
|
|
187
209
|
}
|
|
188
210
|
},
|
|
211
|
+
cancel(reason) {
|
|
212
|
+
cancelled = true;
|
|
213
|
+
control?.signal?.removeEventListener("abort", markCancelled);
|
|
214
|
+
control?.onCancel?.(reason);
|
|
215
|
+
},
|
|
189
216
|
});
|
|
190
217
|
}
|
|
191
218
|
|
|
@@ -1,14 +1,4 @@
|
|
|
1
|
-
import
|
|
2
|
-
import type {
|
|
3
|
-
Api,
|
|
4
|
-
AssistantMessage,
|
|
5
|
-
DeveloperMessage,
|
|
6
|
-
Message,
|
|
7
|
-
Model,
|
|
8
|
-
ToolCall,
|
|
9
|
-
ToolResultMessage,
|
|
10
|
-
UserMessage,
|
|
11
|
-
} from "../types";
|
|
1
|
+
import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage, UserMessage } from "../types";
|
|
12
2
|
|
|
13
3
|
const enum ToolCallStatus {
|
|
14
4
|
/** A tool result has already been emitted for this tool call; later duplicates must be skipped. */
|
|
@@ -17,6 +7,123 @@ const enum ToolCallStatus {
|
|
|
17
7
|
Aborted = 2,
|
|
18
8
|
}
|
|
19
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Maximum tool-call id length the strictest replay provider accepts.
|
|
12
|
+
*
|
|
13
|
+
* Anthropic requires `^[a-zA-Z0-9_-]+$` with a 64-char cap; Google and Codex
|
|
14
|
+
* `normalizeToolCallId` implementations cap individual id segments to the same
|
|
15
|
+
* 64-char ceiling. Replacement ids minted here flow back through
|
|
16
|
+
* `convertAnthropicMessages` (and friends) unchanged, so the `_dupN` suffix
|
|
17
|
+
* MUST not push a normalized id past this bound.
|
|
18
|
+
*/
|
|
19
|
+
const MAX_TOOL_CALL_ID_LENGTH = 64;
|
|
20
|
+
|
|
21
|
+
function appendDuplicateSuffix(originalId: string, suffix: string, maxLength: number): string {
|
|
22
|
+
// Responses-family ids are composites (`callId|itemId`): the wire call_id is
|
|
23
|
+
// the FIRST segment (normalizeResponsesToolCallId splits on `|`), so the
|
|
24
|
+
// suffix must land on every segment or the duplicate collapses back onto the
|
|
25
|
+
// original call_id at encode time. The length budget applies per segment,
|
|
26
|
+
// matching the per-segment caps of the provider normalizers.
|
|
27
|
+
if (originalId.includes("|")) {
|
|
28
|
+
return originalId
|
|
29
|
+
.split("|")
|
|
30
|
+
.map(segment => appendSegmentDuplicateSuffix(segment, suffix, maxLength))
|
|
31
|
+
.join("|");
|
|
32
|
+
}
|
|
33
|
+
return appendSegmentDuplicateSuffix(originalId, suffix, maxLength);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function appendSegmentDuplicateSuffix(segment: string, suffix: string, maxLength: number): string {
|
|
37
|
+
if (segment.length + suffix.length <= maxLength) return `${segment}${suffix}`;
|
|
38
|
+
const prefixBudget = Math.max(0, maxLength - suffix.length);
|
|
39
|
+
return `${segment.slice(0, prefixBudget)}${suffix}`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
type PendingToolResultRewrite = { replacementId: string } | undefined;
|
|
43
|
+
|
|
44
|
+
function deduplicateToolCallIds(
|
|
45
|
+
messages: Message[],
|
|
46
|
+
maxToolCallIdLength = MAX_TOOL_CALL_ID_LENGTH,
|
|
47
|
+
duplicateSuffixPrefix = "_dup",
|
|
48
|
+
): Message[] {
|
|
49
|
+
const seenToolCallIds = new Map<string, number>();
|
|
50
|
+
const pendingToolResultRewrites = new Map<string, PendingToolResultRewrite[]>();
|
|
51
|
+
|
|
52
|
+
return messages.map(msg => {
|
|
53
|
+
if (msg.role === "toolResult") {
|
|
54
|
+
const rewrites = pendingToolResultRewrites.get(msg.toolCallId);
|
|
55
|
+
if (!rewrites || rewrites.length === 0) return msg;
|
|
56
|
+
|
|
57
|
+
const rewrite = rewrites.shift();
|
|
58
|
+
if (rewrites.length === 0) pendingToolResultRewrites.delete(msg.toolCallId);
|
|
59
|
+
if (rewrite) return { ...msg, toolCallId: rewrite.replacementId };
|
|
60
|
+
return msg;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (msg.role !== "assistant") return msg;
|
|
64
|
+
|
|
65
|
+
const enqueueToolResultRewrite = (id: string, rewrite: PendingToolResultRewrite): void => {
|
|
66
|
+
const rewrites = pendingToolResultRewrites.get(id);
|
|
67
|
+
if (rewrites) {
|
|
68
|
+
rewrites.push(rewrite);
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
pendingToolResultRewrites.set(id, [rewrite]);
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
// Ids this turn has already touched; used to scope the "drop carried-over
|
|
75
|
+
// pending rewrites" semantics to the FIRST occurrence per turn so multiple
|
|
76
|
+
// blocks of the same id within one turn still accumulate as duplicates.
|
|
77
|
+
const idsTouchedInTurn = new Set<string>();
|
|
78
|
+
let contentChanged = false;
|
|
79
|
+
const content = msg.content.map(block => {
|
|
80
|
+
if (block.type !== "toolCall") return block;
|
|
81
|
+
|
|
82
|
+
// Drop any pending rewrites carried over from a prior assistant turn
|
|
83
|
+
// for this id on its first appearance this turn. When a later turn
|
|
84
|
+
// re-emits the same id, the older duplicate call's expected result
|
|
85
|
+
// never landed in time — the second pass synthesizes
|
|
86
|
+
// "No result provided" for it, and the upcoming real result(id) must
|
|
87
|
+
// route to one of THIS turn's calls. Without this guard the older
|
|
88
|
+
// `_dup` id would steal the next result.
|
|
89
|
+
if (!idsTouchedInTurn.has(block.id)) {
|
|
90
|
+
pendingToolResultRewrites.delete(block.id);
|
|
91
|
+
idsTouchedInTurn.add(block.id);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const previousCount = seenToolCallIds.get(block.id) ?? 0;
|
|
95
|
+
if (previousCount === 0) {
|
|
96
|
+
seenToolCallIds.set(block.id, 1);
|
|
97
|
+
enqueueToolResultRewrite(block.id, undefined);
|
|
98
|
+
return block;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
let duplicateIndex = previousCount;
|
|
102
|
+
let replacementId = appendDuplicateSuffix(
|
|
103
|
+
block.id,
|
|
104
|
+
`${duplicateSuffixPrefix}${duplicateIndex}`,
|
|
105
|
+
maxToolCallIdLength,
|
|
106
|
+
);
|
|
107
|
+
while (seenToolCallIds.has(replacementId)) {
|
|
108
|
+
duplicateIndex += 1;
|
|
109
|
+
replacementId = appendDuplicateSuffix(
|
|
110
|
+
block.id,
|
|
111
|
+
`${duplicateSuffixPrefix}${duplicateIndex}`,
|
|
112
|
+
maxToolCallIdLength,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
seenToolCallIds.set(block.id, duplicateIndex + 1);
|
|
116
|
+
seenToolCallIds.set(replacementId, 1);
|
|
117
|
+
enqueueToolResultRewrite(block.id, { replacementId });
|
|
118
|
+
contentChanged = true;
|
|
119
|
+
return { ...block, id: replacementId };
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
if (!contentChanged) return msg;
|
|
123
|
+
return { ...msg, content };
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
20
127
|
function shouldDropTruncatedThinkingOnlyAssistant(msg: AssistantMessage): boolean {
|
|
21
128
|
const isTruncatedStop = msg.stopReason === "length" || msg.stopReason === "error" || msg.stopReason === "aborted";
|
|
22
129
|
return isTruncatedStop && !msg.content.some(block => block.type === "toolCall" || block.type === "text");
|
|
@@ -32,6 +139,10 @@ function getLatestSurvivingAssistantIndex(messages: readonly Message[]): number
|
|
|
32
139
|
return -1;
|
|
33
140
|
}
|
|
34
141
|
|
|
142
|
+
function isAnthropicMessagesModel(model: Model): model is Model<"anthropic-messages"> {
|
|
143
|
+
return model.api === "anthropic-messages";
|
|
144
|
+
}
|
|
145
|
+
|
|
35
146
|
/**
|
|
36
147
|
* Normalize tool call ID for cross-provider compatibility.
|
|
37
148
|
* OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
|
|
@@ -40,128 +151,218 @@ function getLatestSurvivingAssistantIndex(messages: readonly Message[]): number
|
|
|
40
151
|
* For aborted/errored turns, this function:
|
|
41
152
|
* - Preserves tool call structure (unlike converting to text summaries)
|
|
42
153
|
* - Injects synthetic "aborted" tool results
|
|
43
|
-
* - Adds a <turn-aborted> guidance marker for the model
|
|
44
154
|
*/
|
|
45
155
|
export function transformMessages<TApi extends Api>(
|
|
46
156
|
messages: Message[],
|
|
47
157
|
model: Model<TApi>,
|
|
48
158
|
normalizeToolCallId?: (id: string, model: Model<TApi>, source: AssistantMessage) => string,
|
|
159
|
+
maxNormalizedToolCallIdLength = MAX_TOOL_CALL_ID_LENGTH,
|
|
160
|
+
duplicateToolCallIdSuffixPrefix = "_dup",
|
|
49
161
|
): Message[] {
|
|
50
162
|
// Build a map of original tool call IDs to normalized IDs
|
|
51
163
|
const toolCallIdMap = new Map<string, string>();
|
|
52
164
|
|
|
53
165
|
const latestSurvivingAssistantIndex = getLatestSurvivingAssistantIndex(messages);
|
|
54
166
|
// First pass: transform messages (thinking blocks, tool call ID normalization)
|
|
55
|
-
const transformed =
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
// Handle toolResult messages - normalize toolCallId if we have a mapping
|
|
62
|
-
if (msg.role === "toolResult") {
|
|
63
|
-
const normalizedId = toolCallIdMap.get(msg.toolCallId);
|
|
64
|
-
if (normalizedId && normalizedId !== msg.toolCallId) {
|
|
65
|
-
return { ...msg, toolCallId: normalizedId };
|
|
167
|
+
const transformed = deduplicateToolCallIds(
|
|
168
|
+
messages.map((msg, index) => {
|
|
169
|
+
// User and developer messages pass through unchanged
|
|
170
|
+
if (msg.role === "user" || msg.role === "developer") {
|
|
171
|
+
return msg;
|
|
66
172
|
}
|
|
67
|
-
return msg;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
// Assistant messages need transformation check
|
|
71
|
-
if (msg.role === "assistant") {
|
|
72
|
-
const assistantMsg = msg as AssistantMessage;
|
|
73
|
-
const isSameModel =
|
|
74
|
-
assistantMsg.provider === model.provider &&
|
|
75
|
-
assistantMsg.api === model.api &&
|
|
76
|
-
assistantMsg.model === model.id;
|
|
77
|
-
|
|
78
|
-
const mustPreserveLatestAnthropicThinking =
|
|
79
|
-
index === latestSurvivingAssistantIndex &&
|
|
80
|
-
model.api === "anthropic-messages" &&
|
|
81
|
-
assistantMsg.api === "anthropic-messages";
|
|
82
|
-
// Aborted/errored messages may have partially-streamed thinking signatures.
|
|
83
|
-
// A partial signature is invalid and will be rejected by the API, so we must
|
|
84
|
-
// strip signatures from thinking blocks in these messages.
|
|
85
|
-
//
|
|
86
|
-
// Abandoned tool-use turns get the same treatment once they are no longer
|
|
87
|
-
// the latest assistant message. When a turn carries toolCall blocks but did
|
|
88
|
-
// NOT request tool execution (stopReason !== "toolUse" — e.g.
|
|
89
|
-
// adaptive-thinking Opus emitting tool calls and then ending the turn on
|
|
90
|
-
// `end_turn`/`stop`), the agent loop pairs those calls with placeholder
|
|
91
|
-
// tool_results to keep the tool_use/tool_result contract valid. Historical
|
|
92
|
-
// abandoned turns cannot safely replay their end_turn-bound signatures in
|
|
93
|
-
// that continuation, so stripping downgrades them to plain text downstream.
|
|
94
|
-
// Latest abandoned turns are exempt because Anthropic requires thinking
|
|
95
|
-
// blocks from its most recent response to remain byte-for-byte unmodified.
|
|
96
|
-
const invalidStopReason = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
|
|
97
|
-
const abandonedToolUse =
|
|
98
|
-
!invalidStopReason &&
|
|
99
|
-
assistantMsg.stopReason !== "toolUse" &&
|
|
100
|
-
assistantMsg.content.some(b => b.type === "toolCall");
|
|
101
|
-
const hasInvalidSignatures = invalidStopReason || abandonedToolUse;
|
|
102
|
-
|
|
103
|
-
const transformedContent = assistantMsg.content.flatMap(block => {
|
|
104
|
-
if (block.type === "thinking") {
|
|
105
|
-
// Strip untrustworthy signatures so the encoder can downgrade to text.
|
|
106
|
-
const sanitized =
|
|
107
|
-
hasInvalidSignatures && block.thinkingSignature ? { ...block, thinkingSignature: undefined } : block;
|
|
108
|
-
if (mustPreserveLatestAnthropicThinking) return abandonedToolUse ? block : sanitized;
|
|
109
|
-
// For same model: keep thinking blocks with signatures (needed for replay)
|
|
110
|
-
// even if the thinking text is empty (OpenAI encrypted reasoning)
|
|
111
|
-
if (isSameModel && sanitized.thinkingSignature) return sanitized;
|
|
112
|
-
// Skip empty thinking blocks, convert others to plain text
|
|
113
|
-
if (!sanitized.thinking || sanitized.thinking.trim() === "") return [];
|
|
114
|
-
if (isSameModel) return sanitized;
|
|
115
|
-
return {
|
|
116
|
-
type: "text" as const,
|
|
117
|
-
text: sanitized.thinking,
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
173
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
174
|
+
// Handle toolResult messages - normalize toolCallId if we have a mapping
|
|
175
|
+
if (msg.role === "toolResult") {
|
|
176
|
+
const normalizedId = toolCallIdMap.get(msg.toolCallId);
|
|
177
|
+
if (normalizedId && normalizedId !== msg.toolCallId) {
|
|
178
|
+
return { ...msg, toolCallId: normalizedId };
|
|
125
179
|
}
|
|
180
|
+
return msg;
|
|
181
|
+
}
|
|
126
182
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
183
|
+
// Assistant messages need transformation check
|
|
184
|
+
if (msg.role === "assistant") {
|
|
185
|
+
const assistantMsg = msg as AssistantMessage;
|
|
186
|
+
const isSameModel =
|
|
187
|
+
assistantMsg.provider === model.provider &&
|
|
188
|
+
assistantMsg.api === model.api &&
|
|
189
|
+
assistantMsg.model === model.id;
|
|
190
|
+
|
|
191
|
+
const isAnthropicTarget = isAnthropicMessagesModel(model);
|
|
192
|
+
// Anthropic's all-or-none contract on prior-turn thinking blocks
|
|
193
|
+
// applies to every `anthropic-messages → anthropic-messages` replay,
|
|
194
|
+
// not just the latest assistant turn. The legacy
|
|
195
|
+
// `mustPreserveLatestAnthropicThinking` flag only honored it for the
|
|
196
|
+
// latest turn; every prior turn fell through to the cross-API
|
|
197
|
+
// text-demotion path whenever the conversation crossed a model id,
|
|
198
|
+
// silently dropping the reasoning chain on continuation for custom
|
|
199
|
+
// anthropic-messages providers configured via `models.yaml` and
|
|
200
|
+
// session-level model swaps (#2257).
|
|
201
|
+
const isAnthropicReplay = isAnthropicTarget && assistantMsg.api === "anthropic-messages";
|
|
202
|
+
const isLatestSurvivingAssistant = index === latestSurvivingAssistantIndex;
|
|
203
|
+
// Signature policy is a second axis. Anthropic cryptographically
|
|
204
|
+
// binds reasoning signatures to its key+session+model, so cross-model
|
|
205
|
+
// signatures must be stripped whenever official Anthropic is on
|
|
206
|
+
// either end of the replay:
|
|
207
|
+
// * official → 3p: the 3p target can't reverify the signature;
|
|
208
|
+
// keeping it leaks private continuation metadata for no benefit.
|
|
209
|
+
// * 3p → official: official rejects a foreign signature outright.
|
|
210
|
+
// * official → official cross-model: the new model rejects the
|
|
211
|
+
// previous model's signature.
|
|
212
|
+
// 3p ↔ 3p replays preserve signatures because compatible providers
|
|
213
|
+
// (Z.AI, DeepSeek, custom `models.yaml` providers) treat them as
|
|
214
|
+
// opaque continuation hints rather than verified material; stripping
|
|
215
|
+
// degrades the reasoning chain into unsigned/text on the next turn
|
|
216
|
+
// (#2265). Source-side official detection uses the canonical catalog
|
|
217
|
+
// provider id `"anthropic"` because assistant messages carry no
|
|
218
|
+
// `baseUrl` — a user who manually points `provider: "anthropic"` at
|
|
219
|
+
// a custom proxy via `models.yaml` will see signatures stripped, the
|
|
220
|
+
// conservative direction (degraded reasoning, not broken requests).
|
|
221
|
+
const isOfficialAnthropicSource = isAnthropicReplay && assistantMsg.provider === "anthropic";
|
|
222
|
+
const isOfficialAnthropicTarget = isAnthropicTarget && model.compat.officialEndpoint;
|
|
223
|
+
const officialAnthropicInvolved = isOfficialAnthropicSource || isOfficialAnthropicTarget;
|
|
224
|
+
// Compatible Anthropic-messages reasoning targets that accept
|
|
225
|
+
// unsigned thinking natively (Z.AI, DeepSeek, the generic
|
|
226
|
+
// `reasoning && !official` case in the compat builder). Used to keep
|
|
227
|
+
// `redacted_thinking` siblings beside unsigned visible thinking on
|
|
228
|
+
// targets that won't text-demote it.
|
|
229
|
+
const replaysUnsignedAnthropicThinking = isAnthropicTarget && model.compat.replayUnsignedThinking;
|
|
230
|
+
// Thinking signatures can be untrustworthy for two distinct reasons with very
|
|
231
|
+
// different blast radii:
|
|
232
|
+
//
|
|
233
|
+
// 1. Aborted/errored turns: the stream stopped mid-block, so only the block
|
|
234
|
+
// that was streaming at the abort point — always the FINAL content block —
|
|
235
|
+
// can carry a partially-streamed (invalid) signature. Every earlier block
|
|
236
|
+
// completed: Anthropic delivers a block's signature at its
|
|
237
|
+
// `content_block_stop`, which necessarily fired before the next block began,
|
|
238
|
+
// so those signatures are whole and valid. Stripping them would needlessly
|
|
239
|
+
// discard a replayable thinking chain — e.g. interrupting during the visible
|
|
240
|
+
// text output after thinking already finished leaves a fully-signed thinking
|
|
241
|
+
// block that must be kept, or Anthropic rejects the replay with HTTP 400
|
|
242
|
+
// "Invalid `signature` in `thinking` block".
|
|
243
|
+
//
|
|
244
|
+
// 2. Abandoned tool-use turns: a turn that carries toolCall blocks but did NOT
|
|
245
|
+
// request tool execution (stopReason !== "toolUse" — e.g. adaptive-thinking
|
|
246
|
+
// Opus emitting tool calls and then ending on `end_turn`/`stop`). The agent
|
|
247
|
+
// loop pairs those calls with placeholder tool_results to keep the
|
|
248
|
+
// tool_use/tool_result contract valid. The turn completed cleanly, but its
|
|
249
|
+
// signatures are end_turn-bound and cannot be replayed in that synthesized
|
|
250
|
+
// continuation, so EVERY thinking signature is stripped.
|
|
251
|
+
//
|
|
252
|
+
// Latest abandoned turns are exempt because Anthropic requires thinking blocks
|
|
253
|
+
// from its most recent response to remain byte-for-byte unmodified.
|
|
254
|
+
const invalidStopReason = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
|
|
255
|
+
const abandonedToolUse =
|
|
256
|
+
!invalidStopReason &&
|
|
257
|
+
assistantMsg.stopReason !== "toolUse" &&
|
|
258
|
+
assistantMsg.content.some(b => b.type === "toolCall");
|
|
259
|
+
const lastBlockIndex = assistantMsg.content.length - 1;
|
|
260
|
+
|
|
261
|
+
const transformedContent = assistantMsg.content.flatMap((block, blockIndex) => {
|
|
262
|
+
if (block.type === "thinking") {
|
|
263
|
+
// Only an aborted/errored turn's final (mid-stream) block can hold a
|
|
264
|
+
// partial signature; abandoned tool-use turns strip all. Drop the
|
|
265
|
+
// untrustworthy signature so the encoder can downgrade the block to text.
|
|
266
|
+
const signatureUntrustworthy =
|
|
267
|
+
abandonedToolUse || (invalidStopReason && blockIndex === lastBlockIndex);
|
|
268
|
+
let sanitized: typeof block =
|
|
269
|
+
signatureUntrustworthy && block.thinkingSignature
|
|
270
|
+
? { ...block, thinkingSignature: undefined }
|
|
271
|
+
: block;
|
|
272
|
+
if (isAnthropicReplay) {
|
|
273
|
+
// Latest abandoned turn: Anthropic's byte-for-byte rule forbids
|
|
274
|
+
// even stripping a signature on the latest message.
|
|
275
|
+
if (isLatestSurvivingAssistant && abandonedToolUse) return block;
|
|
276
|
+
// Cross-model prior turns crossing an official Anthropic endpoint
|
|
277
|
+
// must strip the source signature so the downstream encoder
|
|
278
|
+
// applies its `replayUnsignedThinking` policy (unsigned thinking
|
|
279
|
+
// is emitted natively on Anthropic-compatible reasoning endpoints
|
|
280
|
+
// and demoted to text on official Anthropic). 3p ↔ 3p replays
|
|
281
|
+
// keep the signature so the reasoning chain stays signed on
|
|
282
|
+
// continuation (#2265).
|
|
283
|
+
if (
|
|
284
|
+
!isLatestSurvivingAssistant &&
|
|
285
|
+
!isSameModel &&
|
|
286
|
+
officialAnthropicInvolved &&
|
|
287
|
+
sanitized.thinkingSignature
|
|
288
|
+
) {
|
|
289
|
+
sanitized = { ...sanitized, thinkingSignature: undefined };
|
|
290
|
+
}
|
|
291
|
+
// Drop blocks with neither a signature anchor nor any text —
|
|
292
|
+
// nothing for the next turn to replay.
|
|
293
|
+
if (!sanitized.thinkingSignature && (!sanitized.thinking || sanitized.thinking.trim() === "")) {
|
|
294
|
+
return [];
|
|
295
|
+
}
|
|
296
|
+
return sanitized;
|
|
297
|
+
}
|
|
298
|
+
// Cross-API target: keep the existing text-demotion fallback.
|
|
299
|
+
// For same model: keep thinking blocks with signatures (needed for replay)
|
|
300
|
+
// even if the thinking text is empty (OpenAI encrypted reasoning)
|
|
301
|
+
if (isSameModel && sanitized.thinkingSignature) return sanitized;
|
|
302
|
+
// Skip empty thinking blocks, convert others to plain text
|
|
303
|
+
if (!sanitized.thinking || sanitized.thinking.trim() === "") return [];
|
|
304
|
+
if (isSameModel) return sanitized;
|
|
305
|
+
return {
|
|
306
|
+
type: "text" as const,
|
|
307
|
+
text: sanitized.thinking,
|
|
308
|
+
};
|
|
309
|
+
}
|
|
134
310
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
311
|
+
if (block.type === "redactedThinking") {
|
|
312
|
+
// Redacted thinking is native-only. Keep it for same-model
|
|
313
|
+
// signed replay, the latest byte-for-byte Anthropic turn, or
|
|
314
|
+
// compatible targets that will also emit sibling unsigned
|
|
315
|
+
// thinking natively. Drop it when the visible thinking was
|
|
316
|
+
// cross-model stripped and will be demoted to text.
|
|
317
|
+
if (isAnthropicReplay) {
|
|
318
|
+
if (isSameModel || isLatestSurvivingAssistant || replaysUnsignedAnthropicThinking) return block;
|
|
319
|
+
return [];
|
|
320
|
+
}
|
|
321
|
+
if (isSameModel) return block;
|
|
322
|
+
return [];
|
|
323
|
+
}
|
|
138
324
|
|
|
139
|
-
if (
|
|
140
|
-
|
|
141
|
-
|
|
325
|
+
if (block.type === "text") {
|
|
326
|
+
if (isSameModel) return block;
|
|
327
|
+
return {
|
|
328
|
+
type: "text" as const,
|
|
329
|
+
text: block.text,
|
|
330
|
+
};
|
|
142
331
|
}
|
|
143
332
|
|
|
144
|
-
if (
|
|
145
|
-
const
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
333
|
+
if (block.type === "toolCall") {
|
|
334
|
+
const toolCall = block as ToolCall;
|
|
335
|
+
let normalizedToolCall: ToolCall = toolCall;
|
|
336
|
+
|
|
337
|
+
if (!isSameModel && toolCall.thoughtSignature) {
|
|
338
|
+
normalizedToolCall = { ...toolCall };
|
|
339
|
+
delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature;
|
|
149
340
|
}
|
|
150
|
-
}
|
|
151
341
|
|
|
152
|
-
|
|
153
|
-
|
|
342
|
+
if (!isSameModel && normalizeToolCallId) {
|
|
343
|
+
const normalizedId = normalizeToolCallId(toolCall.id, model, assistantMsg);
|
|
344
|
+
if (normalizedId !== toolCall.id) {
|
|
345
|
+
toolCallIdMap.set(toolCall.id, normalizedId);
|
|
346
|
+
normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
|
|
347
|
+
}
|
|
348
|
+
}
|
|
154
349
|
|
|
155
|
-
|
|
156
|
-
|
|
350
|
+
return normalizedToolCall;
|
|
351
|
+
}
|
|
157
352
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
353
|
+
return block;
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
return {
|
|
357
|
+
...assistantMsg,
|
|
358
|
+
content: transformedContent,
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
return msg;
|
|
362
|
+
}),
|
|
363
|
+
maxNormalizedToolCallIdLength,
|
|
364
|
+
duplicateToolCallIdSuffixPrefix,
|
|
365
|
+
);
|
|
165
366
|
const realToolResultsById = new Map<string, ToolResultMessage>();
|
|
166
367
|
for (const msg of transformed) {
|
|
167
368
|
if (msg.role === "toolResult" && !realToolResultsById.has(msg.toolCallId)) {
|
|
@@ -235,11 +436,6 @@ export function transformMessages<TApi extends Api>(
|
|
|
235
436
|
} as ToolResultMessage);
|
|
236
437
|
toolCallStatus.set(tc.id, ToolCallStatus.Aborted);
|
|
237
438
|
}
|
|
238
|
-
result.push({
|
|
239
|
-
role: "developer",
|
|
240
|
-
content: turnAbortedGuidance,
|
|
241
|
-
timestamp: pendingAbortedTimestamp + 1,
|
|
242
|
-
} as DeveloperMessage);
|
|
243
439
|
pendingAbortedToolCalls = new Map();
|
|
244
440
|
pendingAbortedTimestamp = undefined;
|
|
245
441
|
};
|
|
@@ -268,11 +464,6 @@ export function transformMessages<TApi extends Api>(
|
|
|
268
464
|
// (OpenAI completions `reasoning_text`, Google signed thought parts).
|
|
269
465
|
const originalMsg = messages[i]!;
|
|
270
466
|
if (originalMsg.role === "assistant" && shouldDropTruncatedThinkingOnlyAssistant(originalMsg)) {
|
|
271
|
-
if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
|
|
272
|
-
// Still arm the aborted-turn note so downstream guidance fires.
|
|
273
|
-
pendingAbortedToolCalls = new Map();
|
|
274
|
-
pendingAbortedTimestamp = assistantMsg.timestamp;
|
|
275
|
-
}
|
|
276
467
|
continue;
|
|
277
468
|
}
|
|
278
469
|
|