@prometheus-ai/ai 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -1,22 +1,11 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import
|
|
3
|
-
import type {
|
|
4
|
-
ResponseCustomToolCall,
|
|
5
|
-
ResponseFunctionToolCall,
|
|
6
|
-
ResponseInput,
|
|
7
|
-
ResponseInputContent,
|
|
8
|
-
ResponseInputImage,
|
|
9
|
-
ResponseInputText,
|
|
10
|
-
ResponseOutputItem,
|
|
11
|
-
ResponseOutputMessage,
|
|
12
|
-
ResponseReasoningItem,
|
|
13
|
-
} from "openai/resources/responses/responses";
|
|
14
|
-
import { calculateCost } from "../models";
|
|
1
|
+
import { calculateCost } from "@prometheus-ai/catalog/models";
|
|
2
|
+
import { logger, structuredCloneJSON } from "@prometheus-ai/utils";
|
|
15
3
|
import {
|
|
16
4
|
type Api,
|
|
17
5
|
type AssistantMessage,
|
|
18
6
|
type ImageContent,
|
|
19
7
|
type Model,
|
|
8
|
+
OPENAI_MAX_OUTPUT_TOKENS,
|
|
20
9
|
resolveServiceTier,
|
|
21
10
|
type ServiceTier,
|
|
22
11
|
type StopReason,
|
|
@@ -31,6 +20,20 @@ import {
|
|
|
31
20
|
import { normalizeResponsesToolCallId } from "../utils";
|
|
32
21
|
import type { AssistantMessageEventStream } from "../utils/event-stream";
|
|
33
22
|
import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
|
|
23
|
+
import type {
|
|
24
|
+
ResponseCreateParamsStreaming,
|
|
25
|
+
ResponseCustomToolCall,
|
|
26
|
+
ResponseFunctionToolCall,
|
|
27
|
+
ResponseInput,
|
|
28
|
+
ResponseInputContent,
|
|
29
|
+
ResponseInputImage,
|
|
30
|
+
ResponseInputText,
|
|
31
|
+
ResponseOutputItem,
|
|
32
|
+
ResponseOutputMessage,
|
|
33
|
+
ResponseReasoningItem,
|
|
34
|
+
ResponseStatus,
|
|
35
|
+
ResponseStreamEvent,
|
|
36
|
+
} from "./openai-responses-wire";
|
|
34
37
|
import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER, partitionVisionContent } from "./vision-guard";
|
|
35
38
|
export const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string> = new Set([
|
|
36
39
|
"response.created",
|
|
@@ -48,6 +51,7 @@ export const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string> = new Se
|
|
|
48
51
|
"response.custom_tool_call_input.done",
|
|
49
52
|
"response.output_item.done",
|
|
50
53
|
"response.completed",
|
|
54
|
+
"response.incomplete",
|
|
51
55
|
"response.failed",
|
|
52
56
|
"error",
|
|
53
57
|
]);
|
|
@@ -212,6 +216,59 @@ export function repairOrphanResponsesToolOutputs(input: ResponseInput): Response
|
|
|
212
216
|
});
|
|
213
217
|
}
|
|
214
218
|
|
|
219
|
+
/** Placeholder output for a tool call whose result is absent from the input. */
|
|
220
|
+
const ORPHAN_TOOL_CALL_PLACEHOLDER =
|
|
221
|
+
"[No tool output recorded: the tool call was interrupted before it produced a result.]";
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Synthesize a placeholder `function_call_output` / `custom_tool_call_output`
|
|
225
|
+
* for every `function_call` / `custom_tool_call` whose `call_id` has no matching
|
|
226
|
+
* output later in the same input. The Responses API rejects an unpaired call
|
|
227
|
+
* with `400 No tool output found for function call …`.
|
|
228
|
+
*
|
|
229
|
+
* Orphan calls surface when the user branches/navigates the session tree to a
|
|
230
|
+
* node that ends on a tool call (the tool-result child is excluded from the
|
|
231
|
+
* reconstructed history) or when a turn is aborted/crashes after the call
|
|
232
|
+
* streamed but before its result persisted. Dropping the call would erase the
|
|
233
|
+
* assistant's action; a placeholder output keeps the call visible so the model
|
|
234
|
+
* can recover (e.g. re-issue the call). Symmetric to
|
|
235
|
+
* {@link repairOrphanResponsesToolOutputs}.
|
|
236
|
+
*/
|
|
237
|
+
export function repairOrphanResponsesToolCalls(input: ResponseInput): ResponseInput {
|
|
238
|
+
const outputCallIds = new Set<string>();
|
|
239
|
+
for (const item of input) {
|
|
240
|
+
const t = (item as { type?: string }).type;
|
|
241
|
+
if (t !== "function_call_output" && t !== "custom_tool_call_output") continue;
|
|
242
|
+
const callId = (item as { call_id?: unknown }).call_id;
|
|
243
|
+
if (typeof callId === "string") outputCallIds.add(callId);
|
|
244
|
+
}
|
|
245
|
+
let hasOrphan = false;
|
|
246
|
+
for (const item of input) {
|
|
247
|
+
const t = (item as { type?: string }).type;
|
|
248
|
+
if (t !== "function_call" && t !== "custom_tool_call") continue;
|
|
249
|
+
const callId = (item as { call_id?: unknown }).call_id;
|
|
250
|
+
if (typeof callId === "string" && !outputCallIds.has(callId)) {
|
|
251
|
+
hasOrphan = true;
|
|
252
|
+
break;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
if (!hasOrphan) return input;
|
|
256
|
+
const repaired: ResponseInput = [];
|
|
257
|
+
for (const item of input) {
|
|
258
|
+
repaired.push(item);
|
|
259
|
+
const t = (item as { type?: string }).type;
|
|
260
|
+
if (t !== "function_call" && t !== "custom_tool_call") continue;
|
|
261
|
+
const callId = (item as { call_id?: unknown }).call_id;
|
|
262
|
+
if (typeof callId !== "string" || outputCallIds.has(callId)) continue;
|
|
263
|
+
repaired.push({
|
|
264
|
+
type: t === "custom_tool_call" ? "custom_tool_call_output" : "function_call_output",
|
|
265
|
+
call_id: callId,
|
|
266
|
+
output: ORPHAN_TOOL_CALL_PLACEHOLDER,
|
|
267
|
+
} as ResponseInput[number]);
|
|
268
|
+
}
|
|
269
|
+
return repaired;
|
|
270
|
+
}
|
|
271
|
+
|
|
215
272
|
export function convertResponsesInputContent(
|
|
216
273
|
content: string | Array<TextContent | ImageContent>,
|
|
217
274
|
supportsImages: boolean,
|
|
@@ -234,7 +291,7 @@ export function convertResponsesInputContent(
|
|
|
234
291
|
for (const item of imageBlocks) {
|
|
235
292
|
normalizedContent.push({
|
|
236
293
|
type: "input_image",
|
|
237
|
-
detail: "auto",
|
|
294
|
+
detail: item.detail ?? "auto",
|
|
238
295
|
image_url: `data:${item.mimeType};base64,${item.data}`,
|
|
239
296
|
} satisfies ResponseInputImage);
|
|
240
297
|
}
|
|
@@ -256,6 +313,7 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
|
|
|
256
313
|
customCallIds?: Set<string>,
|
|
257
314
|
): ResponseInput {
|
|
258
315
|
const outputItems: ResponseInput = [];
|
|
316
|
+
let unsignedTextBlocks = 0;
|
|
259
317
|
const isDifferentModel =
|
|
260
318
|
assistantMsg.model !== model.id && assistantMsg.provider === model.provider && assistantMsg.api === model.api;
|
|
261
319
|
|
|
@@ -265,7 +323,12 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
|
|
|
265
323
|
continue;
|
|
266
324
|
}
|
|
267
325
|
if (block.thinkingSignature) {
|
|
268
|
-
|
|
326
|
+
try {
|
|
327
|
+
outputItems.push(JSON.parse(block.thinkingSignature) as ResponseReasoningItem);
|
|
328
|
+
} catch {
|
|
329
|
+
// Legacy/corrupt persisted signature — skip the reasoning item
|
|
330
|
+
// rather than failing the whole request build.
|
|
331
|
+
}
|
|
269
332
|
}
|
|
270
333
|
continue;
|
|
271
334
|
}
|
|
@@ -274,7 +337,10 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
|
|
|
274
337
|
const parsedSignature = parseTextSignature(block.textSignature);
|
|
275
338
|
let msgId = parsedSignature?.id;
|
|
276
339
|
if (!msgId) {
|
|
277
|
-
|
|
340
|
+
// Distinct ids per unsigned block: several text blocks in one message
|
|
341
|
+
// (cross-provider replay downgrades thinking → text) must not share an id.
|
|
342
|
+
msgId = unsignedTextBlocks === 0 ? `msg_${msgIndex}` : `msg_${msgIndex}_${unsignedTextBlocks}`;
|
|
343
|
+
unsignedTextBlocks += 1;
|
|
278
344
|
} else if (msgId.length > 64) {
|
|
279
345
|
msgId = `msg_${Bun.hash(msgId).toString(36)}`;
|
|
280
346
|
}
|
|
@@ -339,10 +405,6 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
|
|
|
339
405
|
const hasImages = toolResult.content.some((block): block is ImageContent => block.type === "image");
|
|
340
406
|
const omittedImages = hasImages && !supportsImages;
|
|
341
407
|
const normalized = normalizeResponsesToolCallId(toolResult.toolCallId);
|
|
342
|
-
if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
|
|
343
|
-
return;
|
|
344
|
-
}
|
|
345
|
-
|
|
346
408
|
const output = (
|
|
347
409
|
omittedImages
|
|
348
410
|
? joinTextWithImagePlaceholder(textResult, true)
|
|
@@ -350,6 +412,19 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
|
|
|
350
412
|
? textResult
|
|
351
413
|
: "(see attached image)"
|
|
352
414
|
).toWellFormed();
|
|
415
|
+
if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
|
|
416
|
+
// Strict backends (Azure, Copilot) reject unpaired outputs outright, but
|
|
417
|
+
// silently dropping the result loses information the model needs. Fold it
|
|
418
|
+
// into an assistant note instead (same shape as repairOrphanResponsesToolOutputs).
|
|
419
|
+
const limit = 16_000;
|
|
420
|
+
const noteText = output.length > limit ? `${output.slice(0, limit)}\n...[truncated]` : output;
|
|
421
|
+
messages.push({
|
|
422
|
+
type: "message",
|
|
423
|
+
role: "assistant",
|
|
424
|
+
content: `[Orphan ${toolResult.toolName || "tool"} result; call_id=${normalized.callId}]: ${noteText}`,
|
|
425
|
+
} as ResponseInput[number]);
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
353
428
|
if (customCallIds?.has(normalized.callId)) {
|
|
354
429
|
messages.push({
|
|
355
430
|
type: "custom_tool_call_output",
|
|
@@ -375,7 +450,7 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
|
|
|
375
450
|
if (block.type === "image") {
|
|
376
451
|
contentParts.push({
|
|
377
452
|
type: "input_image",
|
|
378
|
-
detail: "auto",
|
|
453
|
+
detail: block.detail ?? "auto",
|
|
379
454
|
image_url: `data:${block.mimeType};base64,${block.data}`,
|
|
380
455
|
} satisfies ResponseInputImage);
|
|
381
456
|
}
|
|
@@ -386,10 +461,18 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
|
|
|
386
461
|
export interface ProcessResponsesStreamOptions {
|
|
387
462
|
onFirstToken?: () => void;
|
|
388
463
|
onOutputItemDone?: (item: ResponseOutputItem) => void;
|
|
464
|
+
/**
|
|
465
|
+
* Called when a terminal `response.completed` or `response.incomplete` event
|
|
466
|
+
* is successfully processed. Only invoked on the successful-completion path;
|
|
467
|
+
* thrown failure (`response.failed`) and cancellation paths never call this.
|
|
468
|
+
* Used by callers to detect premature stream closure (i.e. the stream ended
|
|
469
|
+
* without a recognized terminal event).
|
|
470
|
+
*/
|
|
471
|
+
onCompleted?: () => void;
|
|
389
472
|
}
|
|
390
473
|
|
|
391
474
|
export async function processResponsesStream<TApi extends Api>(
|
|
392
|
-
openaiStream: AsyncIterable<
|
|
475
|
+
openaiStream: AsyncIterable<ResponseStreamEvent>,
|
|
393
476
|
output: AssistantMessage,
|
|
394
477
|
stream: AssistantMessageEventStream,
|
|
395
478
|
model: Model<TApi>,
|
|
@@ -403,9 +486,16 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
403
486
|
|
|
404
487
|
// Multiple items (parallel function_calls in particular) can be open at the same
|
|
405
488
|
// time. OpenAI's spec routes every per-item event by `output_index`/`item_id`;
|
|
406
|
-
// see https://github.com/uttamtrivedi/
|
|
489
|
+
// see https://github.com/uttamtrivedi/prometheus/issues/1880 — llama.cpp emits parallel
|
|
407
490
|
// function_call deltas interleaved, and a singleton `current` reference would
|
|
408
491
|
// fold them into the wrong block and drop arguments on every call but the last.
|
|
492
|
+
//
|
|
493
|
+
// llama.cpp's `to_json_oaicompat_resp` (issue #2015) compounds this: `output_item.added`
|
|
494
|
+
// for function_call/custom_tool_call carries `item.call_id` but no `item.id` and no
|
|
495
|
+
// `output_index`, while the matching `function_call_arguments.delta` carries
|
|
496
|
+
// `item_id = "fc_<call_id>"`. Registering function-call items by `call_id` as a
|
|
497
|
+
// secondary key lets the delta lookup find the right block on hosts that emit one
|
|
498
|
+
// identifier but not the other.
|
|
409
499
|
const openItemsByOutputIndex = new Map<number, StreamingItem>();
|
|
410
500
|
const openItemsByItemId = new Map<string, StreamingItem>();
|
|
411
501
|
let lastOpenItem: StreamingItem | null = null;
|
|
@@ -415,9 +505,11 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
415
505
|
outputIndex: number | undefined,
|
|
416
506
|
itemId: string | undefined,
|
|
417
507
|
entry: StreamingItem,
|
|
508
|
+
alternateItemKey?: string,
|
|
418
509
|
): void => {
|
|
419
510
|
if (typeof outputIndex === "number") openItemsByOutputIndex.set(outputIndex, entry);
|
|
420
511
|
if (itemId) openItemsByItemId.set(itemId, entry);
|
|
512
|
+
if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.set(alternateItemKey, entry);
|
|
421
513
|
openItemsInOrder.push(entry);
|
|
422
514
|
lastOpenItem = entry;
|
|
423
515
|
};
|
|
@@ -455,9 +547,11 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
455
547
|
outputIndex: number | undefined,
|
|
456
548
|
itemId: string | undefined,
|
|
457
549
|
entry: StreamingItem | undefined,
|
|
550
|
+
alternateItemKey?: string,
|
|
458
551
|
): void => {
|
|
459
552
|
if (typeof outputIndex === "number") openItemsByOutputIndex.delete(outputIndex);
|
|
460
553
|
if (itemId) openItemsByItemId.delete(itemId);
|
|
554
|
+
if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.delete(alternateItemKey);
|
|
461
555
|
if (entry) {
|
|
462
556
|
const index = openItemsInOrder.indexOf(entry);
|
|
463
557
|
if (index >= 0) openItemsInOrder.splice(index, 1);
|
|
@@ -497,7 +591,7 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
497
591
|
partialJson: item.arguments || "",
|
|
498
592
|
};
|
|
499
593
|
output.content.push(block);
|
|
500
|
-
registerOpenItem(event.output_index, item.id, { item, block });
|
|
594
|
+
registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
|
|
501
595
|
stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
|
|
502
596
|
} else if (item.type === "custom_tool_call") {
|
|
503
597
|
const block: StreamingToolCallBlock = {
|
|
@@ -515,7 +609,7 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
515
609
|
partialJson: item.input ?? "",
|
|
516
610
|
};
|
|
517
611
|
output.content.push(block);
|
|
518
|
-
registerOpenItem(event.output_index, item.id, { item, block });
|
|
612
|
+
registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
|
|
519
613
|
stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
|
|
520
614
|
}
|
|
521
615
|
} else if (event.type === "response.reasoning_summary_part.added") {
|
|
@@ -580,32 +674,42 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
580
674
|
} else if (event.type === "response.output_text.delta") {
|
|
581
675
|
const entry = lookupOpenItem(event);
|
|
582
676
|
if (entry?.item.type === "message" && entry.block.type === "text") {
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
partial: output,
|
|
592
|
-
});
|
|
677
|
+
entry.item.content = entry.item.content || [];
|
|
678
|
+
let lastPart = entry.item.content[entry.item.content.length - 1];
|
|
679
|
+
if (lastPart?.type !== "output_text") {
|
|
680
|
+
// `content_part.added` never arrived (lossy proxy) — synthesize the
|
|
681
|
+
// part so live text still streams instead of freezing until the
|
|
682
|
+
// item's output_item.done recovers the final text.
|
|
683
|
+
lastPart = { type: "output_text", text: "", annotations: [] };
|
|
684
|
+
entry.item.content.push(lastPart);
|
|
593
685
|
}
|
|
686
|
+
entry.block.text += event.delta;
|
|
687
|
+
lastPart.text += event.delta;
|
|
688
|
+
stream.push({
|
|
689
|
+
type: "text_delta",
|
|
690
|
+
contentIndex: contentIndexOf(entry.block),
|
|
691
|
+
delta: event.delta,
|
|
692
|
+
partial: output,
|
|
693
|
+
});
|
|
594
694
|
}
|
|
595
695
|
} else if (event.type === "response.refusal.delta") {
|
|
596
696
|
const entry = lookupOpenItem(event);
|
|
597
697
|
if (entry?.item.type === "message" && entry.block.type === "text") {
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
contentIndex: contentIndexOf(entry.block),
|
|
605
|
-
delta: event.delta,
|
|
606
|
-
partial: output,
|
|
607
|
-
});
|
|
698
|
+
entry.item.content = entry.item.content || [];
|
|
699
|
+
let lastPart = entry.item.content[entry.item.content.length - 1];
|
|
700
|
+
if (lastPart?.type !== "refusal") {
|
|
701
|
+
// Same lossy-proxy hardening as the output_text branch above.
|
|
702
|
+
lastPart = { type: "refusal", refusal: "" };
|
|
703
|
+
entry.item.content.push(lastPart);
|
|
608
704
|
}
|
|
705
|
+
entry.block.text += event.delta;
|
|
706
|
+
lastPart.refusal += event.delta;
|
|
707
|
+
stream.push({
|
|
708
|
+
type: "text_delta",
|
|
709
|
+
contentIndex: contentIndexOf(entry.block),
|
|
710
|
+
delta: event.delta,
|
|
711
|
+
partial: output,
|
|
712
|
+
});
|
|
609
713
|
}
|
|
610
714
|
} else if (event.type === "response.function_call_arguments.delta") {
|
|
611
715
|
const entry = lookupOpenFunctionCallItem(event);
|
|
@@ -656,7 +760,10 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
656
760
|
} else if (event.type === "response.output_item.done") {
|
|
657
761
|
const item = structuredCloneJSON(event.item);
|
|
658
762
|
options?.onOutputItemDone?.(item);
|
|
659
|
-
const entry =
|
|
763
|
+
const entry =
|
|
764
|
+
item.type === "function_call" || item.type === "custom_tool_call"
|
|
765
|
+
? lookupOpenItem({ output_index: event.output_index, item_id: item.id ?? item.call_id })
|
|
766
|
+
: lookupOpenItem({ output_index: event.output_index, item_id: item.id });
|
|
660
767
|
if (item.type === "reasoning") {
|
|
661
768
|
const thinking =
|
|
662
769
|
item.summary?.length > 0
|
|
@@ -664,9 +771,15 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
664
771
|
: item.content?.[0]?.type === "reasoning_text"
|
|
665
772
|
? (item.content[0].text ?? "")
|
|
666
773
|
: "";
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
774
|
+
// Prefer the routed entry; the bare itemId find misroutes when ids are
|
|
775
|
+
// absent (`undefined === undefined` matches the FIRST thinking block) and
|
|
776
|
+
// misses entirely when the done-event id drifts from the added-event id.
|
|
777
|
+
const reasoningBlock =
|
|
778
|
+
entry?.block.type === "thinking"
|
|
779
|
+
? entry.block
|
|
780
|
+
: (output.content.find(b => b.type === "thinking" && (b as ThinkingContent).itemId === item.id) as
|
|
781
|
+
| ThinkingContent
|
|
782
|
+
| undefined);
|
|
670
783
|
if (reasoningBlock) {
|
|
671
784
|
reasoningBlock.thinking = thinking;
|
|
672
785
|
reasoningBlock.thinkingSignature = JSON.stringify(item);
|
|
@@ -678,18 +791,25 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
678
791
|
});
|
|
679
792
|
}
|
|
680
793
|
closeOpenItem(event.output_index, item.id, entry);
|
|
681
|
-
} else if (item.type === "message"
|
|
682
|
-
const block = entry.block;
|
|
683
|
-
|
|
794
|
+
} else if (item.type === "message") {
|
|
795
|
+
const block = entry?.block.type === "text" ? entry.block : undefined;
|
|
796
|
+
const text = item.content
|
|
684
797
|
.map(part => (part.type === "output_text" ? (part.text ?? "") : (part.refusal ?? "")))
|
|
685
798
|
.join("");
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
}
|
|
799
|
+
const textSignature = encodeTextSignatureV1(item.id, item.phase ?? undefined);
|
|
800
|
+
let contentIndex: number;
|
|
801
|
+
if (block) {
|
|
802
|
+
block.text = text;
|
|
803
|
+
block.textSignature = textSignature;
|
|
804
|
+
contentIndex = contentIndexOf(block);
|
|
805
|
+
} else {
|
|
806
|
+
// `output_item.added` never arrived (lossy proxy) — synthesize the
|
|
807
|
+
// block so the final message still carries the authoritative text.
|
|
808
|
+
const synthesized: TextContent = { type: "text", text, textSignature };
|
|
809
|
+
output.content.push(synthesized);
|
|
810
|
+
contentIndex = output.content.length - 1;
|
|
811
|
+
}
|
|
812
|
+
stream.push({ type: "text_end", contentIndex, content: text, partial: output });
|
|
693
813
|
closeOpenItem(event.output_index, item.id, entry);
|
|
694
814
|
} else if (item.type === "function_call") {
|
|
695
815
|
const block = entry?.block.type === "toolCall" ? entry.block : undefined;
|
|
@@ -704,6 +824,7 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
704
824
|
name: item.name,
|
|
705
825
|
arguments: args,
|
|
706
826
|
};
|
|
827
|
+
let contentIndex: number;
|
|
707
828
|
if (block) {
|
|
708
829
|
// Persist the authoritative final args on the stored block. The
|
|
709
830
|
// throttled delta parser may have skipped the last partial parse,
|
|
@@ -713,9 +834,15 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
713
834
|
delete (block as { partialJson?: string }).partialJson;
|
|
714
835
|
delete (block as { lastParseLen?: number }).lastParseLen;
|
|
715
836
|
delete (block as { argumentsDone?: boolean }).argumentsDone;
|
|
837
|
+
contentIndex = contentIndexOf(block);
|
|
838
|
+
} else {
|
|
839
|
+
// `output_item.added` never arrived (lossy proxy) — synthesize the
|
|
840
|
+
// block so the final message carries the call the consumer was told
|
|
841
|
+
// completed (the agent loop executes tools from message.content).
|
|
842
|
+
output.content.push(toolCall);
|
|
843
|
+
contentIndex = output.content.length - 1;
|
|
716
844
|
}
|
|
717
|
-
|
|
718
|
-
closeOpenItem(event.output_index, item.id, entry);
|
|
845
|
+
closeOpenItem(event.output_index, item.id, entry, item.call_id);
|
|
719
846
|
stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
|
|
720
847
|
} else if (item.type === "custom_tool_call") {
|
|
721
848
|
const block = entry?.block.type === "toolCall" ? entry.block : undefined;
|
|
@@ -727,12 +854,39 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
727
854
|
arguments: { input: rawInput },
|
|
728
855
|
customWireName: item.name,
|
|
729
856
|
};
|
|
730
|
-
|
|
731
|
-
|
|
857
|
+
let contentIndex: number;
|
|
858
|
+
if (block) {
|
|
859
|
+
// Persist the final input on the stored block and drop the transient
|
|
860
|
+
// accumulation buffer, mirroring the function_call branch above.
|
|
861
|
+
block.arguments = { input: rawInput };
|
|
862
|
+
delete (block as { partialJson?: string }).partialJson;
|
|
863
|
+
delete (block as { lastParseLen?: number }).lastParseLen;
|
|
864
|
+
contentIndex = contentIndexOf(block);
|
|
865
|
+
} else {
|
|
866
|
+
output.content.push(toolCall);
|
|
867
|
+
contentIndex = output.content.length - 1;
|
|
868
|
+
}
|
|
869
|
+
closeOpenItem(event.output_index, item.id, entry, item.call_id);
|
|
732
870
|
stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
|
|
733
871
|
}
|
|
734
|
-
} else if (event.type === "response.completed") {
|
|
872
|
+
} else if (event.type === "response.completed" || event.type === "response.incomplete") {
|
|
735
873
|
const response = event.response;
|
|
874
|
+
// Finalize any toolCall block whose output_item.done never arrived: the
|
|
875
|
+
// throttled delta parser may have left block.arguments stale, and the
|
|
876
|
+
// toolUse override below would hand the agent incomplete arguments.
|
|
877
|
+
for (const open of openItemsInOrder) {
|
|
878
|
+
if (open.block.type !== "toolCall") continue;
|
|
879
|
+
const block = open.block;
|
|
880
|
+
if (block.partialJson && !block.argumentsDone) {
|
|
881
|
+
block.arguments =
|
|
882
|
+
open.item.type === "custom_tool_call"
|
|
883
|
+
? { input: block.partialJson }
|
|
884
|
+
: parseStreamingJson(block.partialJson);
|
|
885
|
+
}
|
|
886
|
+
delete (block as { partialJson?: string }).partialJson;
|
|
887
|
+
delete (block as { lastParseLen?: number }).lastParseLen;
|
|
888
|
+
delete (block as { argumentsDone?: boolean }).argumentsDone;
|
|
889
|
+
}
|
|
736
890
|
if (response?.id) {
|
|
737
891
|
output.responseId = response.id;
|
|
738
892
|
}
|
|
@@ -752,12 +906,40 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
752
906
|
: "Unknown error (no error details in response)";
|
|
753
907
|
throw new Error(message);
|
|
754
908
|
}
|
|
909
|
+
if (response?.status === "incomplete" && response.incomplete_details?.reason === "content_filter") {
|
|
910
|
+
// A content-filtered turn is a failure, not a token-cap truncation —
|
|
911
|
+
// mapping it to "length" would route the agent loop into "shorten your
|
|
912
|
+
// output" recovery against a filtered prompt.
|
|
913
|
+
throw new Error("incomplete: content_filter");
|
|
914
|
+
}
|
|
755
915
|
if (output.content.some(block => block.type === "toolCall") && output.stopReason === "stop") {
|
|
756
916
|
output.stopReason = "toolUse";
|
|
757
917
|
}
|
|
918
|
+
// Codex-lineage backends/gateways mark an unfinished turn with
|
|
919
|
+
// `end_turn: false` on the terminal event (the response ended on
|
|
920
|
+
// commentary only). Not in the SDK types or the platform API today —
|
|
921
|
+
// inert when absent. Same mapping as openai-codex-responses: surface a
|
|
922
|
+
// non-terminal stop so the agent loop re-samples instead of ending the
|
|
923
|
+
// turn.
|
|
924
|
+
if ((response as { end_turn?: boolean } | undefined)?.end_turn === false && output.stopReason === "stop") {
|
|
925
|
+
output.stopDetails = { type: "pause_turn" };
|
|
926
|
+
}
|
|
927
|
+
options?.onCompleted?.();
|
|
928
|
+
// `response.completed`/`response.incomplete` is the last event of a
|
|
929
|
+
// Responses stream. Stop pulling instead of waiting for the server to
|
|
930
|
+
// close the connection: misbehaving providers keep the socket open
|
|
931
|
+
// after the terminal event, which would park this loop until the idle
|
|
932
|
+
// watchdog converts an already-successful turn into a timeout error.
|
|
933
|
+
// Breaking unwinds the iterator chain (the consumer's `.return()`
|
|
934
|
+
// reaches the SDK stream), actively releasing the connection.
|
|
935
|
+
break;
|
|
758
936
|
} else if (event.type === "error") {
|
|
759
|
-
|
|
937
|
+
const err = (event as any).error ?? event;
|
|
938
|
+
const code = err.code ?? "unknown";
|
|
939
|
+
const message = err.message ?? "no message";
|
|
940
|
+
throw new Error(`Error Code ${code}: ${message}`);
|
|
760
941
|
} else if (event.type === "response.failed") {
|
|
942
|
+
populateResponsesUsageFromResponse(output, event.response?.usage);
|
|
761
943
|
const error = event.response?.error ?? (event.response as any)?.status_details?.error;
|
|
762
944
|
const details = event.response?.incomplete_details;
|
|
763
945
|
const message = error
|
|
@@ -770,7 +952,7 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
770
952
|
}
|
|
771
953
|
}
|
|
772
954
|
|
|
773
|
-
export function mapOpenAIResponsesStopReason(status:
|
|
955
|
+
export function mapOpenAIResponsesStopReason(status: ResponseStatus | undefined): StopReason {
|
|
774
956
|
if (!status) return "stop";
|
|
775
957
|
switch (status) {
|
|
776
958
|
case "completed":
|
|
@@ -784,8 +966,12 @@ export function mapOpenAIResponsesStopReason(status: OpenAI.Responses.ResponseSt
|
|
|
784
966
|
case "queued":
|
|
785
967
|
return "stop";
|
|
786
968
|
default: {
|
|
969
|
+
// Compile-time exhaustiveness; at runtime a brand-new status from the
|
|
970
|
+
// server must degrade gracefully instead of failing a fully-streamed
|
|
971
|
+
// response.
|
|
787
972
|
const exhaustive: never = status;
|
|
788
|
-
|
|
973
|
+
logger.warn("Unhandled OpenAI Responses stop reason", { status: exhaustive });
|
|
974
|
+
return "stop";
|
|
789
975
|
}
|
|
790
976
|
}
|
|
791
977
|
}
|
|
@@ -820,7 +1006,7 @@ export type ResponsesSamplingParamsExtras = {
|
|
|
820
1006
|
repetition_penalty?: number;
|
|
821
1007
|
};
|
|
822
1008
|
|
|
823
|
-
type CommonResponsesParams =
|
|
1009
|
+
type CommonResponsesParams = ResponseCreateParamsStreaming & ResponsesSamplingParamsExtras;
|
|
824
1010
|
|
|
825
1011
|
type CommonSamplingOptions = Pick<
|
|
826
1012
|
StreamOptions,
|
|
@@ -839,9 +1025,15 @@ type CommonSamplingOptions = Pick<
|
|
|
839
1025
|
export function applyCommonResponsesSamplingParams<P extends CommonResponsesParams>(
|
|
840
1026
|
params: P,
|
|
841
1027
|
options: CommonSamplingOptions | undefined,
|
|
842
|
-
model: Pick<Model, "provider" | "omitMaxOutputTokens">,
|
|
1028
|
+
model: Pick<Model, "provider" | "omitMaxOutputTokens" | "maxTokens">,
|
|
843
1029
|
): void {
|
|
844
|
-
if (options?.maxTokens && !model.omitMaxOutputTokens)
|
|
1030
|
+
if (options?.maxTokens && !model.omitMaxOutputTokens) {
|
|
1031
|
+
params.max_output_tokens = Math.min(
|
|
1032
|
+
options.maxTokens,
|
|
1033
|
+
model.maxTokens ?? Number.POSITIVE_INFINITY,
|
|
1034
|
+
OPENAI_MAX_OUTPUT_TOKENS,
|
|
1035
|
+
);
|
|
1036
|
+
}
|
|
845
1037
|
if (options?.temperature !== undefined) params.temperature = options.temperature;
|
|
846
1038
|
if (options?.topP !== undefined) params.top_p = options.topP;
|
|
847
1039
|
if (options?.topK !== undefined) params.top_k = options.topK;
|
|
@@ -863,8 +1055,12 @@ type ReasoningOptions = {
|
|
|
863
1055
|
|
|
864
1056
|
/**
|
|
865
1057
|
* Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
|
|
866
|
-
* set effort/summary when requested, and otherwise inject the
|
|
867
|
-
*
|
|
1058
|
+
* set effort/summary when requested, and otherwise inject the "Juice: 0" no-reasoning hack
|
|
1059
|
+
* when `model.compat.requiresJuiceZeroHack` is set (GPT-5 family by default).
|
|
1060
|
+
* Mutates `params` and may push a developer message into `messages`. Returns
|
|
1061
|
+
* the number of per-turn trailing scaffolding items appended to `messages`
|
|
1062
|
+
* (the "Juice: 0" developer item), so callers doing stateful
|
|
1063
|
+
* `previous_response_id` chaining can exclude them from append-baseline math.
|
|
868
1064
|
*
|
|
869
1065
|
* @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
|
|
870
1066
|
* body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
|
|
@@ -875,21 +1071,23 @@ type ReasoningOptions = {
|
|
|
875
1071
|
* without needing explicit activation. Callers that pass `options.reasoning` for such models
|
|
876
1072
|
* should expect this documented downgrade: the model will reason, but at its default effort.
|
|
877
1073
|
*/
|
|
878
|
-
export function applyResponsesReasoningParams<P extends
|
|
1074
|
+
export function applyResponsesReasoningParams<P extends ResponseCreateParamsStreaming>(
|
|
879
1075
|
params: P,
|
|
880
|
-
model: Model<
|
|
1076
|
+
model: Model<"openai-responses" | "azure-openai-responses" | "openai-codex-responses">,
|
|
881
1077
|
options: ReasoningOptions | undefined,
|
|
882
1078
|
messages: ResponseInput,
|
|
883
1079
|
mapEffort?: (effort: string) => string,
|
|
884
1080
|
includeEncryptedReasoning: boolean = true,
|
|
885
1081
|
omitReasoningEffort: boolean = false,
|
|
886
|
-
):
|
|
887
|
-
if (!model.reasoning) return;
|
|
1082
|
+
): number {
|
|
1083
|
+
if (!model.reasoning) return 0;
|
|
888
1084
|
// Always request encrypted reasoning content so reasoning items can be replayed in
|
|
889
1085
|
// multi-turn conversations when store is false (items aren't persisted server-side, so
|
|
890
|
-
// we must include the full content). See: https://github.com/uttamtrivedi/
|
|
1086
|
+
// we must include the full content). See: https://github.com/uttamtrivedi/prometheus/issues/41
|
|
891
1087
|
if (includeEncryptedReasoning) {
|
|
892
|
-
params.include
|
|
1088
|
+
const include = params.include ?? [];
|
|
1089
|
+
if (!include.includes("reasoning.encrypted_content")) include.push("reasoning.encrypted_content");
|
|
1090
|
+
params.include = include;
|
|
893
1091
|
}
|
|
894
1092
|
|
|
895
1093
|
if (options?.reasoning || options?.reasoningSummary !== undefined) {
|
|
@@ -904,12 +1102,12 @@ export function applyResponsesReasoningParams<P extends OpenAI.Responses.Respons
|
|
|
904
1102
|
// When only options.reasoning (effort level) is set, params.reasoning
|
|
905
1103
|
// is intentionally omitted — see @param omitReasoningEffort above.
|
|
906
1104
|
if (options?.reasoningSummary !== undefined && options?.reasoningSummary !== null) {
|
|
907
|
-
type ReasoningParam = NonNullable<
|
|
1105
|
+
type ReasoningParam = NonNullable<ResponseCreateParamsStreaming["reasoning"]>;
|
|
908
1106
|
params.reasoning = { summary: options.reasoningSummary || "auto" } as P["reasoning"] & ReasoningParam;
|
|
909
1107
|
}
|
|
910
1108
|
} else {
|
|
911
1109
|
const requested = options?.reasoning || "medium";
|
|
912
|
-
type ReasoningParam = NonNullable<
|
|
1110
|
+
type ReasoningParam = NonNullable<ResponseCreateParamsStreaming["reasoning"]>;
|
|
913
1111
|
const reasoningParams: ReasoningParam = {
|
|
914
1112
|
effort: (mapEffort ? mapEffort(requested) : requested) as ReasoningParam["effort"],
|
|
915
1113
|
};
|
|
@@ -918,13 +1116,15 @@ export function applyResponsesReasoningParams<P extends OpenAI.Responses.Respons
|
|
|
918
1116
|
}
|
|
919
1117
|
params.reasoning = reasoningParams as P["reasoning"];
|
|
920
1118
|
}
|
|
921
|
-
} else if (model.
|
|
1119
|
+
} else if (model.compat.requiresJuiceZeroHack) {
|
|
922
1120
|
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
|
923
1121
|
messages.push({
|
|
924
1122
|
role: "developer",
|
|
925
1123
|
content: [{ type: "input_text", text: "# Juice: 0 !important" }],
|
|
926
1124
|
});
|
|
1125
|
+
return 1;
|
|
927
1126
|
}
|
|
1127
|
+
return 0;
|
|
928
1128
|
}
|
|
929
1129
|
|
|
930
1130
|
/** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
|
|
@@ -944,6 +1144,10 @@ export function populateResponsesUsageFromResponse(
|
|
|
944
1144
|
if (!usage) return;
|
|
945
1145
|
const cachedTokens = usage.input_tokens_details?.cached_tokens || 0;
|
|
946
1146
|
const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0;
|
|
1147
|
+
// Wholesale replacement must not drop provider-annotated extras (Copilot
|
|
1148
|
+
// premium-request accounting): the failed/cancelled paths throw right after
|
|
1149
|
+
// this call with no later chance to re-apply.
|
|
1150
|
+
const premiumRequests = output.usage.premiumRequests;
|
|
947
1151
|
output.usage = {
|
|
948
1152
|
input: (usage.input_tokens || 0) - cachedTokens,
|
|
949
1153
|
output: usage.output_tokens || 0,
|
|
@@ -953,4 +1157,38 @@ export function populateResponsesUsageFromResponse(
|
|
|
953
1157
|
...(reasoningTokens > 0 ? { reasoningTokens } : {}),
|
|
954
1158
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
955
1159
|
};
|
|
1160
|
+
if (premiumRequests !== undefined) {
|
|
1161
|
+
output.usage.premiumRequests = premiumRequests;
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
/**
|
|
1166
|
+
* Strict-prefix delta for stateful `previous_response_id` chaining (used by the
|
|
1167
|
+
* platform Responses provider and the Codex provider on both transports):
|
|
1168
|
+
* returns the input items the current request appends beyond the previous
|
|
1169
|
+
* request's input plus the previous response's output items, or null when the
|
|
1170
|
+
* request options differ or history mutated (the chain must break). Per-turn
|
|
1171
|
+
* `client_metadata` (e.g. rotating turn ids) is excluded from the option
|
|
1172
|
+
* comparison; codex-rs excludes it from the same check.
|
|
1173
|
+
*/
|
|
1174
|
+
export function buildResponsesDeltaInput<TItem>(
|
|
1175
|
+
previous: { input?: unknown } | undefined,
|
|
1176
|
+
previousResponseItems: readonly TItem[] | undefined,
|
|
1177
|
+
current: { input?: unknown },
|
|
1178
|
+
): TItem[] | null {
|
|
1179
|
+
if (!previous) return null;
|
|
1180
|
+
if (!Array.isArray(previous.input) || !Array.isArray(current.input)) return null;
|
|
1181
|
+
const previousWithoutInput = { ...previous, input: undefined, client_metadata: undefined };
|
|
1182
|
+
const currentWithoutInput = { ...current, input: undefined, client_metadata: undefined };
|
|
1183
|
+
if (JSON.stringify(previousWithoutInput) !== JSON.stringify(currentWithoutInput)) {
|
|
1184
|
+
return null;
|
|
1185
|
+
}
|
|
1186
|
+
const baseline = [...previous.input, ...(previousResponseItems ?? [])];
|
|
1187
|
+
if (current.input.length <= baseline.length) return null;
|
|
1188
|
+
for (let index = 0; index < baseline.length; index += 1) {
|
|
1189
|
+
if (JSON.stringify(baseline[index]) !== JSON.stringify(current.input[index])) {
|
|
1190
|
+
return null;
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
return current.input.slice(baseline.length) as TItem[];
|
|
956
1194
|
}
|