@prometheus-ai/ai 0.5.4 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -1,20 +1,14 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
Tool as OpenAITool,
|
|
5
|
-
ResponseCreateParamsStreaming,
|
|
6
|
-
ResponseInput,
|
|
7
|
-
} from "openai/resources/responses/responses";
|
|
1
|
+
import { hostMatchesUrl } from "@prometheus-ai/catalog/hosts";
|
|
2
|
+
import { parseGitHubCopilotApiKey } from "@prometheus-ai/catalog/wire/github-copilot";
|
|
3
|
+
import { $env, $flag, extractHttpStatusFromError, logger, structuredCloneJSON } from "@prometheus-ai/utils";
|
|
8
4
|
import { getEnvApiKey } from "../stream";
|
|
9
5
|
import type {
|
|
10
6
|
AssistantMessage,
|
|
11
|
-
CacheRetention,
|
|
12
7
|
Context,
|
|
13
|
-
FetchImpl,
|
|
14
8
|
MessageAttribution,
|
|
15
9
|
Model,
|
|
16
|
-
OpenAICompat,
|
|
17
10
|
ProviderSessionState,
|
|
11
|
+
RawSseEvent,
|
|
18
12
|
ServiceTier,
|
|
19
13
|
StreamFunction,
|
|
20
14
|
StreamOptions,
|
|
@@ -37,12 +31,10 @@ import {
|
|
|
37
31
|
getOpenAIStreamIdleTimeoutMs,
|
|
38
32
|
iterateWithIdleTimeout,
|
|
39
33
|
} from "../utils/idle-iterator";
|
|
40
|
-
import {
|
|
34
|
+
import { postOpenAIStream } from "../utils/openai-http";
|
|
41
35
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
42
36
|
import { callWithCopilotModelRetry } from "../utils/retry";
|
|
43
37
|
import { adaptSchemaForStrict, NO_STRICT, sanitizeSchemaForOpenAIResponses, toolWireSchema } from "../utils/schema";
|
|
44
|
-
import { createSdkStreamRequestOptions } from "../utils/sdk-stream-timeout";
|
|
45
|
-
import { wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
46
38
|
import { mapToOpenAIResponsesToolChoice, type OpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
47
39
|
import {
|
|
48
40
|
buildCopilotDynamicHeaders,
|
|
@@ -54,6 +46,7 @@ import {
|
|
|
54
46
|
appendResponsesToolResultMessages,
|
|
55
47
|
applyCommonResponsesSamplingParams,
|
|
56
48
|
applyResponsesReasoningParams,
|
|
49
|
+
buildResponsesDeltaInput,
|
|
57
50
|
collectCustomCallIds,
|
|
58
51
|
collectKnownCallIds,
|
|
59
52
|
convertResponsesAssistantMessage,
|
|
@@ -62,24 +55,17 @@ import {
|
|
|
62
55
|
isOpenAIResponsesProgressEvent,
|
|
63
56
|
normalizeResponsesToolCallIdForTransform,
|
|
64
57
|
processResponsesStream,
|
|
58
|
+
repairOrphanResponsesToolCalls,
|
|
65
59
|
repairOrphanResponsesToolOutputs,
|
|
66
60
|
} from "./openai-responses-shared";
|
|
61
|
+
import type {
|
|
62
|
+
Tool as OpenAITool,
|
|
63
|
+
ResponseCreateParamsStreaming,
|
|
64
|
+
ResponseInput,
|
|
65
|
+
ResponseStreamEvent,
|
|
66
|
+
} from "./openai-responses-wire";
|
|
67
67
|
import { transformMessages } from "./transform-messages";
|
|
68
68
|
|
|
69
|
-
/**
|
|
70
|
-
* Get prompt cache retention based on cacheRetention and base URL.
|
|
71
|
-
* Only applies to direct OpenAI API calls (api.openai.com).
|
|
72
|
-
*/
|
|
73
|
-
function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention): "24h" | undefined {
|
|
74
|
-
if (cacheRetention !== "long") {
|
|
75
|
-
return undefined;
|
|
76
|
-
}
|
|
77
|
-
if (baseUrl.includes("api.openai.com")) {
|
|
78
|
-
return "24h";
|
|
79
|
-
}
|
|
80
|
-
return undefined;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
69
|
export function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined {
|
|
84
70
|
if (!sessionId || sessionId.length === 0) return undefined;
|
|
85
71
|
const wellFormed = sessionId.toWellFormed();
|
|
@@ -93,6 +79,16 @@ export interface OpenAIResponsesOptions extends StreamOptions {
|
|
|
93
79
|
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
|
94
80
|
serviceTier?: ServiceTier;
|
|
95
81
|
toolChoice?: ToolChoice;
|
|
82
|
+
/**
|
|
83
|
+
* Stateful turns: chain via `previous_response_id` + delta input instead of
|
|
84
|
+
* replaying the full transcript. Forces `store: true` (the platform only
|
|
85
|
+
* resolves stored responses). Defaults ON against the official OpenAI API
|
|
86
|
+
* and OFF for other Responses endpoints; `PROMETHEUS_OPENAI_STATEFUL` overrides the
|
|
87
|
+
* default, and `false` here vetoes everything. Requires `sessionId` +
|
|
88
|
+
* `providerSessionState`. Falls back to a full replay whenever history
|
|
89
|
+
* mutates or the server reports a stale id.
|
|
90
|
+
*/
|
|
91
|
+
statefulResponses?: boolean;
|
|
96
92
|
/**
|
|
97
93
|
* Enforce strict tool call/result pairing when building Responses API inputs.
|
|
98
94
|
* Azure OpenAI and GitHub Copilot Responses paths require tool results to match prior tool calls.
|
|
@@ -120,8 +116,8 @@ export interface OpenAIResponsesOptions extends StreamOptions {
|
|
|
120
116
|
*/
|
|
121
117
|
omitReasoningEffort?: boolean;
|
|
122
118
|
/**
|
|
123
|
-
* Extra request headers merged onto the
|
|
124
|
-
*
|
|
119
|
+
* Extra request headers merged onto the model/copilot defaults. Used by
|
|
120
|
+
* adapter wrappers to inject provider-specific
|
|
125
121
|
* routing or cache hints.
|
|
126
122
|
*/
|
|
127
123
|
headers?: Record<string, string>;
|
|
@@ -136,16 +132,38 @@ export interface OpenAIResponsesOptions extends StreamOptions {
|
|
|
136
132
|
const OPENAI_RESPONSES_PROVIDER_SESSION_STATE_PREFIX = "openai-responses:";
|
|
137
133
|
const OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE =
|
|
138
134
|
"OpenAI responses stream timed out while waiting for the first event";
|
|
135
|
+
/** Consecutive stale-previous-response failures before chaining is disabled for the session. */
|
|
136
|
+
const OPENAI_RESPONSES_CHAIN_STALE_FAILURE_LIMIT = 3;
|
|
139
137
|
|
|
140
138
|
interface OpenAIResponsesProviderSessionState extends ProviderSessionState {
|
|
141
139
|
nativeHistoryReplayWarmed: boolean;
|
|
140
|
+
/** Stateful `previous_response_id` chain baselines, keyed by baseUrl/model/session. */
|
|
141
|
+
chains: Map<string, OpenAIResponsesChainState>;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
interface OpenAIResponsesChainState {
|
|
145
|
+
/**
|
|
146
|
+
* Wire params of the last successful turn, with per-turn trailing
|
|
147
|
+
* scaffolding stripped from `input` (never carries previous_response_id).
|
|
148
|
+
*/
|
|
149
|
+
lastParams?: OpenAIResponsesSamplingParams;
|
|
150
|
+
lastResponseId?: string;
|
|
151
|
+
/** Output items of the last response, in replay-sanitized form (matches next-turn input). */
|
|
152
|
+
lastResponseItems?: ResponseInput;
|
|
153
|
+
canAppend: boolean;
|
|
154
|
+
/** Consecutive stale-previous-response failures; reset on a successful chained completion. */
|
|
155
|
+
staleFailures: number;
|
|
156
|
+
/** Set once chaining is judged unsupported for this session (circuit breaker). */
|
|
157
|
+
disabled: boolean;
|
|
142
158
|
}
|
|
143
159
|
|
|
144
160
|
function createOpenAIResponsesProviderSessionState(): OpenAIResponsesProviderSessionState {
|
|
145
161
|
const state: OpenAIResponsesProviderSessionState = {
|
|
146
162
|
nativeHistoryReplayWarmed: false,
|
|
163
|
+
chains: new Map(),
|
|
147
164
|
close: () => {
|
|
148
165
|
state.nativeHistoryReplayWarmed = false;
|
|
166
|
+
state.chains.clear();
|
|
149
167
|
},
|
|
150
168
|
};
|
|
151
169
|
return state;
|
|
@@ -174,6 +192,142 @@ function canReplayOpenAIResponsesNativeHistory(
|
|
|
174
192
|
return providerSessionState?.nativeHistoryReplayWarmed ?? true;
|
|
175
193
|
}
|
|
176
194
|
|
|
195
|
+
function isOpenAIResponsesStatefulEnabled(
|
|
196
|
+
options: OpenAIResponsesOptions | undefined,
|
|
197
|
+
baseUrl: string | undefined,
|
|
198
|
+
): boolean {
|
|
199
|
+
if (options?.statefulResponses === false) return false;
|
|
200
|
+
if (options?.statefulResponses === true) return true;
|
|
201
|
+
// Default ON only against the official OpenAI API: chaining forces
|
|
202
|
+
// `store: true`, and third-party /v1/responses proxies routinely ignore or
|
|
203
|
+
// reject `previous_response_id`. An unset baseUrl means the default
|
|
204
|
+
// endpoint (api.openai.com).
|
|
205
|
+
return $flag("PROMETHEUS_OPENAI_STATEFUL", !baseUrl || hostMatchesUrl(baseUrl, "openai"));
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function getOpenAIResponsesChainState(
|
|
209
|
+
providerSessionState: OpenAIResponsesProviderSessionState,
|
|
210
|
+
model: Model<"openai-responses">,
|
|
211
|
+
sessionId: string,
|
|
212
|
+
): OpenAIResponsesChainState {
|
|
213
|
+
const key = `${model.baseUrl ?? ""}\u0000${model.id}\u0000${sessionId}`;
|
|
214
|
+
const existing = providerSessionState.chains.get(key);
|
|
215
|
+
if (existing) return existing;
|
|
216
|
+
const created: OpenAIResponsesChainState = { canAppend: false, staleFailures: 0, disabled: false };
|
|
217
|
+
providerSessionState.chains.set(key, created);
|
|
218
|
+
return created;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function resetOpenAIResponsesChainState(state: OpenAIResponsesChainState): void {
|
|
222
|
+
state.canAppend = false;
|
|
223
|
+
state.lastParams = undefined;
|
|
224
|
+
state.lastResponseId = undefined;
|
|
225
|
+
state.lastResponseItems = undefined;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
interface OpenAIResponsesChainedParams {
|
|
229
|
+
params: OpenAIResponsesSamplingParams;
|
|
230
|
+
/** Set iff the params carry previous_response_id (delta request). */
|
|
231
|
+
previousResponseId?: string;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Drop the per-turn trailing scaffolding (the GPT-5 "Juice: 0" developer item)
|
|
236
|
+
* from `input`, yielding the wire form of the conversation arguments alone.
|
|
237
|
+
*/
|
|
238
|
+
function stripTrailingScaffolding(
|
|
239
|
+
params: OpenAIResponsesSamplingParams,
|
|
240
|
+
trailingScaffoldingItems: number,
|
|
241
|
+
): OpenAIResponsesSamplingParams {
|
|
242
|
+
if (trailingScaffoldingItems <= 0 || !Array.isArray(params.input)) return params;
|
|
243
|
+
return { ...params, input: params.input.slice(0, params.input.length - trailingScaffoldingItems) };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Shape the next turn's request: when the session's append baseline is intact
|
|
248
|
+
* (same options, strict history prefix), chain via `previous_response_id` +
|
|
249
|
+
* delta-only `input`; otherwise break the chain and replay the full transcript.
|
|
250
|
+
*
|
|
251
|
+
* The prefix check runs on the wire form of the conversation arguments alone:
|
|
252
|
+
* per-turn trailing scaffolding is excluded from both sides and re-appended to
|
|
253
|
+
* the delta, so a decoration that trails every request can never masquerade as
|
|
254
|
+
* a history mutation.
|
|
255
|
+
*/
|
|
256
|
+
function buildOpenAIResponsesChainedParams(
|
|
257
|
+
params: OpenAIResponsesSamplingParams,
|
|
258
|
+
trailingScaffoldingItems: number,
|
|
259
|
+
chain: OpenAIResponsesChainState,
|
|
260
|
+
): OpenAIResponsesChainedParams {
|
|
261
|
+
const historyParams = stripTrailingScaffolding(params, trailingScaffoldingItems);
|
|
262
|
+
const deltaInput = chain.canAppend
|
|
263
|
+
? buildResponsesDeltaInput<ResponseInput[number]>(chain.lastParams, chain.lastResponseItems, historyParams)
|
|
264
|
+
: null;
|
|
265
|
+
if (deltaInput && deltaInput.length > 0 && chain.lastResponseId) {
|
|
266
|
+
const scaffolding =
|
|
267
|
+
historyParams !== params && Array.isArray(params.input)
|
|
268
|
+
? params.input.slice(params.input.length - trailingScaffoldingItems)
|
|
269
|
+
: [];
|
|
270
|
+
return {
|
|
271
|
+
params: { ...params, previous_response_id: chain.lastResponseId, input: [...deltaInput, ...scaffolding] },
|
|
272
|
+
previousResponseId: chain.lastResponseId,
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
if (chain.canAppend) {
|
|
276
|
+
// History mutated or options changed — break the chain and replay in full.
|
|
277
|
+
resetOpenAIResponsesChainState(chain);
|
|
278
|
+
}
|
|
279
|
+
return { params };
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function isOpenAIResponsesStalePreviousResponseError(error: unknown): boolean {
|
|
283
|
+
if (!(error instanceof Error)) return false;
|
|
284
|
+
if ((error as { code?: string }).code === "previous_response_not_found") return true;
|
|
285
|
+
// "unsupported" covers endpoints that reject the parameter outright
|
|
286
|
+
// (e.g. "Unsupported parameter: previous_response_id").
|
|
287
|
+
return (
|
|
288
|
+
/previous[ _]?response/i.test(error.message) &&
|
|
289
|
+
/not[ _]?found|invalid|expired|stale|unsupported/i.test(error.message)
|
|
290
|
+
);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Zero Data Retention orgs accept `store: true` but refuse to resolve any
|
|
295
|
+
* `previous_response_id` — the prior response was never persisted server-side.
|
|
296
|
+
* The 400 carries a fixed phrasing ("Zero Data Retention") that the generic
|
|
297
|
+
* stale-id regex above does not match, so it is classified separately and
|
|
298
|
+
* disables chaining categorically (one strike, not three).
|
|
299
|
+
*/
|
|
300
|
+
function isOpenAIResponsesZeroDataRetentionError(error: unknown): boolean {
|
|
301
|
+
if (!(error instanceof Error)) return false;
|
|
302
|
+
return /previous[ _]?response/i.test(error.message) && /zero[ _-]?data[ _-]?retention/i.test(error.message);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function registerOpenAIResponsesChainStaleFailure(chain: OpenAIResponsesChainState, error: unknown): void {
|
|
306
|
+
resetOpenAIResponsesChainState(chain);
|
|
307
|
+
chain.staleFailures += 1;
|
|
308
|
+
if (chain.staleFailures >= OPENAI_RESPONSES_CHAIN_STALE_FAILURE_LIMIT) {
|
|
309
|
+
chain.disabled = true;
|
|
310
|
+
}
|
|
311
|
+
logger.debug("OpenAI responses previous_response_id rejected; falling back to full context", {
|
|
312
|
+
error: error instanceof Error ? error.message : String(error),
|
|
313
|
+
consecutiveFailures: chain.staleFailures,
|
|
314
|
+
disabled: chain.disabled,
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* One-shot ZDR signal: the org will never resolve a stored response, so skip
|
|
320
|
+
* the staleFailures counter and disable chaining immediately for this session.
|
|
321
|
+
*/
|
|
322
|
+
function markOpenAIResponsesChainZeroDataRetention(chain: OpenAIResponsesChainState, error: unknown): void {
|
|
323
|
+
resetOpenAIResponsesChainState(chain);
|
|
324
|
+
chain.disabled = true;
|
|
325
|
+
chain.staleFailures = OPENAI_RESPONSES_CHAIN_STALE_FAILURE_LIMIT;
|
|
326
|
+
logger.debug("OpenAI responses chaining disabled (Zero Data Retention)", {
|
|
327
|
+
error: error instanceof Error ? error.message : String(error),
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
|
|
177
331
|
type OpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
|
|
178
332
|
top_p?: number;
|
|
179
333
|
top_k?: number;
|
|
@@ -204,9 +358,32 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
204
358
|
model.id,
|
|
205
359
|
);
|
|
206
360
|
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
361
|
+
let chainState: OpenAIResponsesChainState | undefined;
|
|
362
|
+
let sentPreviousResponseId: string | undefined;
|
|
207
363
|
const abortTracker = createAbortSourceTracker(options?.signal);
|
|
208
364
|
const firstEventTimeoutAbortError = new Error(OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
|
|
209
365
|
const { requestAbortController, requestSignal } = abortTracker;
|
|
366
|
+
const onSseEvent = options?.onSseEvent;
|
|
367
|
+
const rawSseObserver = onSseEvent
|
|
368
|
+
? (event: RawSseEvent) => {
|
|
369
|
+
if (!event.event && event.data && event.data !== "[DONE]") {
|
|
370
|
+
try {
|
|
371
|
+
const parsed = JSON.parse(event.data);
|
|
372
|
+
const resolvedEvent =
|
|
373
|
+
typeof parsed.type === "string"
|
|
374
|
+
? parsed.type
|
|
375
|
+
: typeof parsed.object === "string"
|
|
376
|
+
? parsed.object
|
|
377
|
+
: null;
|
|
378
|
+
if (resolvedEvent) {
|
|
379
|
+
event.event = resolvedEvent;
|
|
380
|
+
event.raw = [`event: ${resolvedEvent}`, ...event.raw];
|
|
381
|
+
}
|
|
382
|
+
} catch {}
|
|
383
|
+
}
|
|
384
|
+
onSseEvent(event, model);
|
|
385
|
+
}
|
|
386
|
+
: undefined;
|
|
210
387
|
|
|
211
388
|
try {
|
|
212
389
|
// Keep request routing on `sessionId` while allowing callers to pin a
|
|
@@ -214,88 +391,138 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
214
391
|
// avoid perturbing provider conversation state without cold-starting the cache.
|
|
215
392
|
const routingSessionId = getOpenAIResponsesRoutingSessionId(options);
|
|
216
393
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
217
|
-
const {
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
options?.initiatorOverride,
|
|
223
|
-
routingSessionId,
|
|
224
|
-
options?.onSseEvent,
|
|
225
|
-
options?.fetch,
|
|
226
|
-
);
|
|
394
|
+
const {
|
|
395
|
+
headers: requestHeaders,
|
|
396
|
+
copilotPremiumRequests,
|
|
397
|
+
baseUrl,
|
|
398
|
+
} = createRequestSetup(model, context, apiKey, options?.headers, options?.initiatorOverride, routingSessionId);
|
|
227
399
|
const premiumRequestsTotal = copilotPremiumRequests;
|
|
228
400
|
const providerSessionState = getOpenAIResponsesProviderSessionState(model, options?.providerSessionState);
|
|
229
|
-
const { params } = buildParams(model, context, options, providerSessionState
|
|
401
|
+
const { params, trailingScaffoldingItems } = buildParams(model, context, options, providerSessionState);
|
|
402
|
+
if (isOpenAIResponsesStatefulEnabled(options, baseUrl) && routingSessionId && providerSessionState) {
|
|
403
|
+
chainState = getOpenAIResponsesChainState(providerSessionState, model, routingSessionId);
|
|
404
|
+
if (!chainState.disabled) {
|
|
405
|
+
// Platform `previous_response_id` chaining only resolves stored responses.
|
|
406
|
+
params.store = true;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
const chained: OpenAIResponsesChainedParams =
|
|
410
|
+
chainState && !chainState.disabled
|
|
411
|
+
? buildOpenAIResponsesChainedParams(params, trailingScaffoldingItems, chainState)
|
|
412
|
+
: { params };
|
|
413
|
+
sentPreviousResponseId = chained.previousResponseId;
|
|
230
414
|
const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
|
|
231
415
|
const firstEventTimeoutMs =
|
|
232
416
|
options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
|
|
233
417
|
const requestTimeoutMs =
|
|
234
418
|
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0 ? firstEventTimeoutMs : undefined;
|
|
235
419
|
options?.onPayload?.(params);
|
|
420
|
+
const requestUrl = `${(baseUrl ?? "https://api.openai.com/v1").replace(/\/+$/, "")}/responses`;
|
|
236
421
|
rawRequestDump = {
|
|
237
422
|
provider: model.provider,
|
|
238
423
|
api: output.api,
|
|
239
424
|
model: model.id,
|
|
240
425
|
method: "POST",
|
|
241
|
-
url:
|
|
242
|
-
body: params,
|
|
426
|
+
url: requestUrl,
|
|
427
|
+
body: chained.params,
|
|
243
428
|
};
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
}
|
|
254
|
-
try {
|
|
255
|
-
const { data, response, request_id } = await client.responses
|
|
256
|
-
.create(params, requestOptions)
|
|
257
|
-
.withResponse();
|
|
258
|
-
await notifyProviderResponse(options, response, model, request_id);
|
|
259
|
-
return data;
|
|
260
|
-
} catch (error) {
|
|
261
|
-
if (error instanceof OpenAIConnectionTimeoutError && !abortTracker.wasCallerAbort()) {
|
|
262
|
-
throw firstEventTimeoutAbortError;
|
|
429
|
+
const openResponsesStream = (requestParams: OpenAIResponsesSamplingParams) =>
|
|
430
|
+
callWithCopilotModelRetry(
|
|
431
|
+
async () => {
|
|
432
|
+
let requestTimeout: NodeJS.Timeout | undefined;
|
|
433
|
+
if (requestTimeoutMs !== undefined) {
|
|
434
|
+
requestTimeout = setTimeout(
|
|
435
|
+
() => abortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
436
|
+
requestTimeoutMs,
|
|
437
|
+
);
|
|
263
438
|
}
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
439
|
+
try {
|
|
440
|
+
const headers = { ...requestHeaders };
|
|
441
|
+
if (requestTimeoutMs !== undefined) {
|
|
442
|
+
headers["X-Stainless-Timeout"] = Math.floor(requestTimeoutMs / 1000).toString();
|
|
443
|
+
}
|
|
444
|
+
const { events, response, requestId } = await postOpenAIStream<ResponseStreamEvent>({
|
|
445
|
+
url: requestUrl,
|
|
446
|
+
headers,
|
|
447
|
+
body: requestParams,
|
|
448
|
+
signal: requestSignal,
|
|
449
|
+
fetch: options?.fetch,
|
|
450
|
+
// With a first-event watchdog armed, transport retries must
|
|
451
|
+
// not silently extend the caller's deadline.
|
|
452
|
+
maxAttempts: requestTimeoutMs !== undefined ? 1 : undefined,
|
|
453
|
+
onSseEvent: rawSseObserver,
|
|
454
|
+
});
|
|
455
|
+
// Disarm the first-event watchdog as soon as headers arrive — a slow
|
|
456
|
+
// onResponse callback must not abort an already-connected stream.
|
|
457
|
+
if (requestTimeout !== undefined) {
|
|
458
|
+
clearTimeout(requestTimeout);
|
|
459
|
+
requestTimeout = undefined;
|
|
460
|
+
}
|
|
461
|
+
await notifyProviderResponse(options, response, model, requestId);
|
|
462
|
+
return events;
|
|
463
|
+
} finally {
|
|
464
|
+
if (requestTimeout !== undefined) clearTimeout(requestTimeout);
|
|
465
|
+
}
|
|
466
|
+
},
|
|
467
|
+
{ provider: model.provider, signal: requestSignal },
|
|
468
|
+
);
|
|
469
|
+
let openaiStream: AsyncIterable<ResponseStreamEvent>;
|
|
470
|
+
try {
|
|
471
|
+
openaiStream = await openResponsesStream(chained.params);
|
|
472
|
+
} catch (error) {
|
|
473
|
+
if (!chainState || !sentPreviousResponseId || requestSignal.aborted) {
|
|
474
|
+
throw error;
|
|
475
|
+
}
|
|
476
|
+
const zdrRejection = isOpenAIResponsesZeroDataRetentionError(error);
|
|
477
|
+
if (!zdrRejection && !isOpenAIResponsesStalePreviousResponseError(error)) {
|
|
478
|
+
throw error;
|
|
479
|
+
}
|
|
480
|
+
// Server rejected the chain baseline: reset, count the failure (or
|
|
481
|
+
// disable categorically on ZDR), and retry once with the full
|
|
482
|
+
// transcript. Structurally cannot loop — the retry carries no
|
|
483
|
+
// previous_response_id.
|
|
484
|
+
if (zdrRejection) {
|
|
485
|
+
markOpenAIResponsesChainZeroDataRetention(chainState, error);
|
|
486
|
+
// ZDR orgs cannot store responses; the original request forced
|
|
487
|
+
// `store: true` for chaining, which is meaningless here and would
|
|
488
|
+
// otherwise leave subsequent turns asking the server to retain
|
|
489
|
+
// data it must discard.
|
|
490
|
+
params.store = false;
|
|
491
|
+
} else {
|
|
492
|
+
registerOpenAIResponsesChainStaleFailure(chainState, error);
|
|
493
|
+
}
|
|
494
|
+
sentPreviousResponseId = undefined;
|
|
495
|
+
rawRequestDump.body = params;
|
|
496
|
+
openaiStream = await openResponsesStream(params);
|
|
497
|
+
}
|
|
271
498
|
if (premiumRequestsTotal !== undefined) output.usage.premiumRequests = premiumRequestsTotal;
|
|
272
499
|
stream.push({ type: "start", partial: output });
|
|
273
500
|
|
|
274
501
|
const nativeOutputItems: Array<Record<string, unknown>> = [];
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
{
|
|
290
|
-
onFirstToken: () => {
|
|
291
|
-
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
292
|
-
},
|
|
293
|
-
onOutputItemDone: item => {
|
|
294
|
-
nativeOutputItems.push(structuredCloneJSON<unknown>(item) as unknown as Record<string, unknown>);
|
|
295
|
-
},
|
|
502
|
+
let sawCompleted = false;
|
|
503
|
+
const timedOpenaiStream = iterateWithIdleTimeout(openaiStream, {
|
|
504
|
+
idleTimeoutMs,
|
|
505
|
+
firstItemTimeoutMs: firstEventTimeoutMs,
|
|
506
|
+
firstItemErrorMessage: OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE,
|
|
507
|
+
errorMessage: "OpenAI responses stream stalled while waiting for the next event",
|
|
508
|
+
onFirstItemTimeout: () => abortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
509
|
+
onIdle: () => requestAbortController.abort(),
|
|
510
|
+
abortSignal: options?.signal,
|
|
511
|
+
isProgressItem: isOpenAIResponsesProgressEvent,
|
|
512
|
+
});
|
|
513
|
+
await processResponsesStream(timedOpenaiStream, output, stream, model, {
|
|
514
|
+
onFirstToken: () => {
|
|
515
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
296
516
|
},
|
|
297
|
-
|
|
298
|
-
|
|
517
|
+
onOutputItemDone: item => {
|
|
518
|
+
// `processResponsesStream` hands over a private clone already; no
|
|
519
|
+
// second deep copy needed (reasoning items carry multi-KB blobs).
|
|
520
|
+
nativeOutputItems.push(item as unknown as Record<string, unknown>);
|
|
521
|
+
},
|
|
522
|
+
onCompleted: () => {
|
|
523
|
+
sawCompleted = true;
|
|
524
|
+
},
|
|
525
|
+
});
|
|
299
526
|
|
|
300
527
|
const firstEventTimeoutError = abortTracker.getLocalAbortReason();
|
|
301
528
|
if (firstEventTimeoutError) {
|
|
@@ -305,12 +532,36 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
305
532
|
throw new Error("Request was aborted");
|
|
306
533
|
}
|
|
307
534
|
|
|
535
|
+
// Detect premature stream closure: the HTTP stream ended without the
|
|
536
|
+
// provider sending `response.completed`. Custom/proxy providers may
|
|
537
|
+
// drop the connection mid-stream; without this guard the incomplete
|
|
538
|
+
// output is silently surfaced as a successful "stop".
|
|
539
|
+
if (!sawCompleted) {
|
|
540
|
+
throw new Error("OpenAI responses stream closed before response.completed was received");
|
|
541
|
+
}
|
|
542
|
+
|
|
308
543
|
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
|
309
544
|
throw new Error(output.errorMessage ?? "An unknown error occurred");
|
|
310
545
|
}
|
|
311
546
|
|
|
312
547
|
output.providerPayload = createOpenAIResponsesHistoryPayload(model.provider, nativeOutputItems);
|
|
313
548
|
if (providerSessionState) providerSessionState.nativeHistoryReplayWarmed = true;
|
|
549
|
+
if (chainState) {
|
|
550
|
+
chainState.lastParams = structuredCloneJSON(stripTrailingScaffolding(params, trailingScaffoldingItems));
|
|
551
|
+
if (output.responseId) {
|
|
552
|
+
chainState.lastResponseId = output.responseId;
|
|
553
|
+
chainState.lastResponseItems = sanitizeOpenAIResponsesHistoryItemsForReplay(
|
|
554
|
+
structuredCloneJSON(nativeOutputItems),
|
|
555
|
+
);
|
|
556
|
+
chainState.canAppend = true;
|
|
557
|
+
// Only a successful CHAINED completion clears the stale counter — a
|
|
558
|
+
// full-context success must not mask categorical rejection.
|
|
559
|
+
if (sentPreviousResponseId) chainState.staleFailures = 0;
|
|
560
|
+
} else {
|
|
561
|
+
// Without a response id the append baseline cannot be trusted.
|
|
562
|
+
chainState.canAppend = false;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
314
565
|
|
|
315
566
|
output.duration = Date.now() - startTime;
|
|
316
567
|
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
@@ -318,6 +569,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
318
569
|
stream.end();
|
|
319
570
|
} catch (error) {
|
|
320
571
|
for (const block of output.content) delete (block as { index?: number }).index;
|
|
572
|
+
if (chainState) resetOpenAIResponsesChainState(chainState);
|
|
321
573
|
const firstEventTimeoutError = abortTracker.getLocalAbortReason();
|
|
322
574
|
output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
|
|
323
575
|
output.errorStatus = extractHttpStatusFromError(error);
|
|
@@ -333,17 +585,15 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
333
585
|
return stream;
|
|
334
586
|
};
|
|
335
587
|
|
|
336
|
-
function
|
|
588
|
+
function createRequestSetup(
|
|
337
589
|
model: Model<"openai-responses">,
|
|
338
590
|
context: Context,
|
|
339
591
|
apiKey?: string,
|
|
340
592
|
extraHeaders?: Record<string, string>,
|
|
341
593
|
initiatorOverride?: MessageAttribution,
|
|
342
594
|
sessionId?: string,
|
|
343
|
-
onSseEvent?: OpenAIResponsesOptions["onSseEvent"],
|
|
344
|
-
fetchOverride?: FetchImpl,
|
|
345
595
|
): {
|
|
346
|
-
|
|
596
|
+
headers: Record<string, string>;
|
|
347
597
|
copilotPremiumRequests: number | undefined;
|
|
348
598
|
baseUrl: string | undefined;
|
|
349
599
|
} {
|
|
@@ -375,23 +625,12 @@ function createClient(
|
|
|
375
625
|
copilotPremiumRequests = copilot.premiumRequests;
|
|
376
626
|
baseUrl = resolveGitHubCopilotBaseUrl(model.baseUrl, rawApiKey) ?? model.baseUrl;
|
|
377
627
|
}
|
|
378
|
-
if (sessionId && model.provider === "openai"
|
|
628
|
+
if (sessionId && model.provider === "openai") {
|
|
379
629
|
headers.session_id ??= sessionId;
|
|
380
630
|
headers["x-client-request-id"] ??= sessionId;
|
|
381
631
|
}
|
|
382
|
-
|
|
383
|
-
return {
|
|
384
|
-
client: new OpenAI({
|
|
385
|
-
apiKey,
|
|
386
|
-
baseURL: baseUrl,
|
|
387
|
-
dangerouslyAllowBrowser: true,
|
|
388
|
-
maxRetries: 5,
|
|
389
|
-
defaultHeaders: headers,
|
|
390
|
-
fetch: onSseEvent ? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model)) : baseFetch,
|
|
391
|
-
}),
|
|
392
|
-
copilotPremiumRequests,
|
|
393
|
-
baseUrl,
|
|
394
|
-
};
|
|
632
|
+
headers.Authorization ??= `Bearer ${apiKey}`;
|
|
633
|
+
return { headers, copilotPremiumRequests, baseUrl };
|
|
395
634
|
}
|
|
396
635
|
|
|
397
636
|
function getOpenAIResponsesPromptCacheKey(
|
|
@@ -418,24 +657,14 @@ function buildParams(
|
|
|
418
657
|
context: Context,
|
|
419
658
|
options: OpenAIResponsesOptions | undefined,
|
|
420
659
|
providerSessionState: OpenAIResponsesProviderSessionState | undefined,
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
const strictResponsesPairing
|
|
424
|
-
options?.strictResponsesPairing ??
|
|
425
|
-
(isAzureOpenAIBaseUrl(model.baseUrl ?? "") || model.provider === "github-copilot");
|
|
426
|
-
const conversationMessages = convertConversationMessages(
|
|
427
|
-
model,
|
|
428
|
-
context,
|
|
429
|
-
strictResponsesPairing,
|
|
430
|
-
providerSessionState,
|
|
431
|
-
options,
|
|
432
|
-
);
|
|
433
|
-
const messages: ResponseInput = [...conversationMessages];
|
|
660
|
+
): { params: OpenAIResponsesSamplingParams; trailingScaffoldingItems: number } {
|
|
661
|
+
const strictResponsesPairing = options?.strictResponsesPairing ?? model.compat.strictResponsesPairing;
|
|
662
|
+
const messages = convertConversationMessages(model, context, strictResponsesPairing, providerSessionState, options);
|
|
434
663
|
|
|
435
664
|
const systemPrompts = normalizeSystemPrompts(context.systemPrompt);
|
|
436
665
|
let systemInstructions: string | undefined;
|
|
437
666
|
if (systemPrompts.length > 0) {
|
|
438
|
-
const needsDeveloperRole = model.reasoning && supportsDeveloperRole
|
|
667
|
+
const needsDeveloperRole = model.reasoning && model.compat.supportsDeveloperRole;
|
|
439
668
|
if (needsDeveloperRole) {
|
|
440
669
|
// Reasoning models on known OpenAI-compatible endpoints require the
|
|
441
670
|
// `developer` role. Send all system prompts inline in `input`.
|
|
@@ -453,12 +682,16 @@ function buildParams(
|
|
|
453
682
|
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
|
|
454
683
|
const promptCacheKey = getOpenAIResponsesPromptCacheKey(options);
|
|
455
684
|
const params: OpenAIResponsesSamplingParams = {
|
|
456
|
-
model: model.id,
|
|
685
|
+
model: model.requestModelId ?? model.id,
|
|
457
686
|
input: messages,
|
|
458
687
|
instructions: systemInstructions,
|
|
459
688
|
stream: true,
|
|
460
689
|
prompt_cache_key: promptCacheKey,
|
|
461
|
-
prompt_cache_retention: promptCacheKey
|
|
690
|
+
prompt_cache_retention: promptCacheKey
|
|
691
|
+
? cacheRetention === "long" && model.compat.supportsLongPromptCacheRetention
|
|
692
|
+
? "24h"
|
|
693
|
+
: undefined
|
|
694
|
+
: undefined,
|
|
462
695
|
store: false,
|
|
463
696
|
stream_options: model.provider === "openai" ? { include_obfuscation: false } : undefined,
|
|
464
697
|
};
|
|
@@ -469,8 +702,8 @@ function buildParams(
|
|
|
469
702
|
// TODO: openai responses has no top-level `frequency_penalty` field as of the current SDK;
|
|
470
703
|
// `StreamOptions.frequencyPenalty` is intentionally dropped for this provider.
|
|
471
704
|
|
|
472
|
-
if (context.tools
|
|
473
|
-
params.tools = convertTools(context.tools, supportsStrictMode
|
|
705
|
+
if (context.tools) {
|
|
706
|
+
params.tools = convertTools(context.tools, model.compat.supportsStrictMode, model);
|
|
474
707
|
if (options?.toolChoice) {
|
|
475
708
|
params.tool_choice = mapOpenAIResponsesToolChoiceForTools(options.toolChoice, context.tools, model);
|
|
476
709
|
}
|
|
@@ -485,16 +718,15 @@ function buildParams(
|
|
|
485
718
|
}
|
|
486
719
|
}
|
|
487
720
|
|
|
488
|
-
applyResponsesReasoningParams(
|
|
721
|
+
const trailingScaffoldingItems = applyResponsesReasoningParams(
|
|
489
722
|
params,
|
|
490
723
|
model,
|
|
491
724
|
options,
|
|
492
725
|
messages,
|
|
493
726
|
effort =>
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
),
|
|
727
|
+
model.compat.reasoningEffortMap?.[effort as NonNullable<OpenAIResponsesOptions["reasoning"]>] ??
|
|
728
|
+
model.thinking?.effortMap?.[effort as NonNullable<OpenAIResponsesOptions["reasoning"]>] ??
|
|
729
|
+
effort,
|
|
498
730
|
options?.includeEncryptedReasoning ?? true,
|
|
499
731
|
options?.omitReasoningEffort ?? false,
|
|
500
732
|
);
|
|
@@ -503,42 +735,7 @@ function buildParams(
|
|
|
503
735
|
Object.assign(params, options.extraBody);
|
|
504
736
|
}
|
|
505
737
|
|
|
506
|
-
return {
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
function mapReasoningEffort(
|
|
510
|
-
effort: NonNullable<OpenAIResponsesOptions["reasoning"]>,
|
|
511
|
-
reasoningEffortMap: OpenAICompat["reasoningEffortMap"] | undefined,
|
|
512
|
-
): string {
|
|
513
|
-
return reasoningEffortMap?.[effort] ?? effort;
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
function isAzureOpenAIBaseUrl(baseUrl: string): boolean {
|
|
517
|
-
return baseUrl.includes(".openai.azure.com") || baseUrl.includes("azure.com/openai");
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
function supportsStrictMode(model: Model<"openai-responses">): boolean {
|
|
521
|
-
if (model.provider === "openai" || model.provider === "azure" || model.provider === "github-copilot") return true;
|
|
522
|
-
|
|
523
|
-
const baseUrl = model.baseUrl.toLowerCase();
|
|
524
|
-
return (
|
|
525
|
-
baseUrl.includes("api.openai.com") ||
|
|
526
|
-
baseUrl.includes(".openai.azure.com") ||
|
|
527
|
-
baseUrl.includes("models.inference.ai.azure.com")
|
|
528
|
-
);
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
export function supportsDeveloperRole(modelOrBaseUrl: Pick<Model, "provider" | "baseUrl"> | string): boolean {
|
|
532
|
-
const baseUrl =
|
|
533
|
-
typeof modelOrBaseUrl === "string" ? modelOrBaseUrl.toLowerCase() : (modelOrBaseUrl.baseUrl ?? "").toLowerCase();
|
|
534
|
-
return (
|
|
535
|
-
baseUrl.includes("api.openai.com") ||
|
|
536
|
-
baseUrl.includes(".openai.azure.com") ||
|
|
537
|
-
baseUrl.includes("azure.com/openai") ||
|
|
538
|
-
baseUrl.includes("models.inference.ai.azure.com") ||
|
|
539
|
-
baseUrl.includes("githubcopilot.com") ||
|
|
540
|
-
baseUrl.includes("copilot-api.")
|
|
541
|
-
);
|
|
738
|
+
return { params, trailingScaffoldingItems };
|
|
542
739
|
}
|
|
543
740
|
|
|
544
741
|
function convertConversationMessages(
|
|
@@ -581,9 +778,13 @@ function convertConversationMessages(
|
|
|
581
778
|
messages.push({ role: "user", content });
|
|
582
779
|
} else if (msg.role === "assistant") {
|
|
583
780
|
const assistantMsg = msg as AssistantMessage;
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
781
|
+
// Native items are model-bound (reasoning carries encrypted content minted
|
|
782
|
+
// by the producing model); after a mid-session model switch fall back to
|
|
783
|
+
// block re-encode, which strips foreign signatures.
|
|
784
|
+
const providerPayload =
|
|
785
|
+
shouldReplayNativeHistory && assistantMsg.api === model.api && assistantMsg.model === model.id
|
|
786
|
+
? getOpenAIResponsesHistoryPayload(assistantMsg.providerPayload, model.provider, assistantMsg.provider)
|
|
787
|
+
: undefined;
|
|
587
788
|
const historyItems = providerPayload?.items;
|
|
588
789
|
if (historyItems) {
|
|
589
790
|
const sanitizedHistoryItems = sanitizeOpenAIResponsesHistoryItemsForReplay(filterReasoning(historyItems));
|
|
@@ -614,7 +815,7 @@ function convertConversationMessages(
|
|
|
614
815
|
msgIndex++;
|
|
615
816
|
}
|
|
616
817
|
|
|
617
|
-
return repairOrphanResponsesToolOutputs(messages);
|
|
818
|
+
return repairOrphanResponsesToolCalls(repairOrphanResponsesToolOutputs(messages));
|
|
618
819
|
}
|
|
619
820
|
|
|
620
821
|
/**
|
|
@@ -635,13 +836,18 @@ export function mapOpenAIResponsesToolChoiceForTools(
|
|
|
635
836
|
model: Model<"openai-responses">,
|
|
636
837
|
): OpenAIResponsesToolChoice {
|
|
637
838
|
const mapped = mapToOpenAIResponsesToolChoice(choice);
|
|
638
|
-
if (!mapped || typeof mapped === "string" || mapped.type !== "function"
|
|
839
|
+
if (!mapped || typeof mapped === "string" || mapped.type !== "function") {
|
|
639
840
|
return mapped;
|
|
640
841
|
}
|
|
641
842
|
|
|
642
|
-
const
|
|
643
|
-
|
|
644
|
-
|
|
843
|
+
const directTool = tools.find(tool => tool.name === mapped.name);
|
|
844
|
+
const customTool = supportsFreeformApplyPatch(model)
|
|
845
|
+
? tools.find(tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name))
|
|
846
|
+
: undefined;
|
|
847
|
+
const offeredTool = customTool ?? directTool;
|
|
848
|
+
if (!offeredTool) {
|
|
849
|
+
return undefined;
|
|
850
|
+
}
|
|
645
851
|
return customTool ? { type: "custom", name: customTool.customWireName ?? customTool.name } : mapped;
|
|
646
852
|
}
|
|
647
853
|
|