@prometheus-ai/ai 0.5.4 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/auth-broker/remote-store.d.ts +2 -1
- package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
- package/dist/types/auth-gateway/server.d.ts +19 -0
- package/dist/types/auth-gateway/types.d.ts +9 -3
- package/dist/types/auth-retry.d.ts +119 -0
- package/dist/types/auth-storage.d.ts +217 -8
- package/dist/types/errors.d.ts +24 -0
- package/dist/types/index.d.ts +5 -9
- package/dist/types/provider-details.d.ts +1 -1
- package/dist/types/providers/amazon-bedrock.d.ts +12 -6
- package/dist/types/providers/anthropic-client.d.ts +10 -3
- package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
- package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
- package/dist/types/providers/anthropic-wire.d.ts +3 -3
- package/dist/types/providers/anthropic.d.ts +41 -34
- package/dist/types/providers/aws-credentials.d.ts +8 -0
- package/dist/types/providers/azure-openai-responses.d.ts +1 -0
- package/dist/types/providers/google-gemini-cli.d.ts +22 -1
- package/dist/types/providers/google-shared.d.ts +22 -0
- package/dist/types/providers/google-types.d.ts +13 -1
- package/dist/types/providers/mock.d.ts +8 -3
- package/dist/types/providers/ollama.d.ts +6 -0
- package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
- package/dist/types/providers/openai-chat-server.d.ts +3 -3
- package/dist/types/providers/openai-chat-wire.d.ts +644 -0
- package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
- package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
- package/dist/types/providers/openai-codex-responses.d.ts +31 -2
- package/dist/types/providers/openai-completions-compat.d.ts +2 -25
- package/dist/types/providers/openai-completions.d.ts +2 -10
- package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
- package/dist/types/providers/openai-responses-server.d.ts +2 -2
- package/dist/types/providers/openai-responses-shared.d.ts +49 -9
- package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
- package/dist/types/providers/openai-responses.d.ts +13 -4
- package/dist/types/providers/prometheus-native-client.d.ts +9 -0
- package/dist/types/providers/prometheus-native-server.d.ts +4 -3
- package/dist/types/providers/transform-messages.d.ts +1 -2
- package/dist/types/rate-limit-utils.d.ts +3 -2
- package/dist/types/registry/aimlapi.d.ts +4 -0
- package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
- package/dist/types/registry/amazon-bedrock.d.ts +5 -0
- package/dist/types/registry/anthropic.d.ts +10 -0
- package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
- package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
- package/dist/types/registry/cerebras.d.ts +7 -0
- package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
- package/dist/types/registry/cursor.d.ts +7 -0
- package/dist/types/registry/deepseek.d.ts +8 -0
- package/dist/types/registry/derived.d.ts +5 -0
- package/dist/types/registry/firepass.d.ts +16 -0
- package/dist/types/registry/fireworks.d.ts +7 -0
- package/dist/types/registry/github-copilot.d.ts +7 -0
- package/dist/types/registry/gitlab-duo.d.ts +9 -0
- package/dist/types/registry/google-antigravity.d.ts +9 -0
- package/dist/types/registry/google-gemini-cli.d.ts +9 -0
- package/dist/types/registry/google-vertex.d.ts +5 -0
- package/dist/types/registry/google.d.ts +4 -0
- package/dist/types/registry/groq.d.ts +4 -0
- package/dist/types/registry/huggingface.d.ts +7 -0
- package/dist/types/registry/index.d.ts +4 -0
- package/dist/types/registry/kagi.d.ts +14 -0
- package/dist/types/registry/kilo.d.ts +7 -0
- package/dist/types/registry/kimi-code.d.ts +7 -0
- package/dist/types/registry/litellm.d.ts +13 -0
- package/dist/types/registry/lm-studio.d.ts +8 -0
- package/dist/types/registry/minimax-code-cn.d.ts +6 -0
- package/dist/types/registry/minimax-code.d.ts +6 -0
- package/dist/types/registry/minimax.d.ts +4 -0
- package/dist/types/registry/mistral.d.ts +4 -0
- package/dist/types/registry/moonshot.d.ts +7 -0
- package/dist/types/registry/nanogpt.d.ts +7 -0
- package/dist/types/registry/nvidia.d.ts +7 -0
- package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
- package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
- package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
- package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
- package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
- package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
- package/dist/types/registry/ollama-cloud.d.ts +7 -0
- package/dist/types/registry/ollama.d.ts +12 -0
- package/dist/types/registry/openai-codex-device.d.ts +8 -0
- package/dist/types/registry/openai-codex.d.ts +9 -0
- package/dist/types/registry/openai.d.ts +4 -0
- package/dist/types/registry/opencode-go.d.ts +6 -0
- package/dist/types/registry/opencode-zen.d.ts +6 -0
- package/dist/types/registry/openrouter.d.ts +13 -0
- package/dist/types/registry/parallel.d.ts +14 -0
- package/dist/types/registry/perplexity.d.ts +7 -0
- package/dist/types/registry/qianfan.d.ts +7 -0
- package/dist/types/registry/qwen-portal.d.ts +7 -0
- package/dist/types/registry/registry.d.ts +272 -0
- package/dist/types/registry/synthetic.d.ts +6 -0
- package/dist/types/registry/tavily.d.ts +14 -0
- package/dist/types/registry/together.d.ts +6 -0
- package/dist/types/registry/types.d.ts +51 -0
- package/dist/types/registry/venice.d.ts +13 -0
- package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
- package/dist/types/registry/vllm.d.ts +7 -0
- package/dist/types/registry/wafer-pass.d.ts +6 -0
- package/dist/types/registry/wafer-serverless.d.ts +6 -0
- package/dist/types/registry/xai-oauth.d.ts +7 -0
- package/dist/types/registry/xai.d.ts +4 -0
- package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
- package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
- package/dist/types/registry/xiaomi.d.ts +6 -0
- package/dist/types/registry/zai.d.ts +7 -0
- package/dist/types/registry/zenmux.d.ts +7 -0
- package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
- package/dist/types/stream.d.ts +9 -1
- package/dist/types/types.d.ts +56 -295
- package/dist/types/usage/google-antigravity.d.ts +15 -1
- package/dist/types/usage/openai-codex-reset.d.ts +79 -0
- package/dist/types/usage/openai-codex.d.ts +1 -0
- package/dist/types/usage.d.ts +77 -4
- package/dist/types/utils/abort.d.ts +6 -0
- package/dist/types/utils/event-stream.d.ts +2 -0
- package/dist/types/utils/http-inspector.d.ts +0 -1
- package/dist/types/utils/idle-iterator.d.ts +35 -0
- package/dist/types/utils/openai-http.d.ts +58 -0
- package/dist/types/utils/request-debug.d.ts +3 -0
- package/dist/types/utils/retry-after.d.ts +1 -0
- package/dist/types/utils/schema/fields.d.ts +5 -0
- package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
- package/dist/types/utils/schema/stamps.d.ts +7 -15
- package/dist/types/utils/sse-debug.d.ts +0 -5
- package/dist/types/utils/stream-markup-healing.d.ts +2 -0
- package/dist/types/utils.d.ts +1 -5
- package/package.json +17 -29
- package/src/auth-broker/remote-store.ts +10 -1
- package/src/auth-broker/snapshot-cache.ts +1 -1
- package/src/auth-broker/wire-schemas.ts +1 -1
- package/src/auth-gateway/http.ts +1 -1
- package/src/auth-gateway/server.ts +95 -30
- package/src/auth-gateway/types.ts +10 -2
- package/src/auth-retry.ts +238 -0
- package/src/auth-storage.ts +935 -430
- package/src/errors.ts +32 -0
- package/src/index.ts +9 -14
- package/src/provider-details.ts +1 -1
- package/src/providers/__tests__/google-auth.test.ts +144 -0
- package/src/providers/amazon-bedrock.ts +70 -40
- package/src/providers/anthropic-client.ts +15 -13
- package/src/providers/anthropic-messages-server-schema.ts +17 -7
- package/src/providers/anthropic-messages-server.ts +88 -20
- package/src/providers/anthropic-wire.ts +4 -3
- package/src/providers/anthropic.ts +1234 -621
- package/src/providers/aws-credentials.ts +47 -5
- package/src/providers/aws-eventstream.ts +5 -0
- package/src/providers/azure-openai-responses.ts +117 -67
- package/src/providers/cursor.ts +30 -30
- package/src/providers/github-copilot-headers.ts +1 -1
- package/src/providers/gitlab-duo.ts +36 -29
- package/src/providers/google-auth.ts +71 -8
- package/src/providers/google-gemini-cli.ts +118 -22
- package/src/providers/google-shared.ts +163 -43
- package/src/providers/google-types.ts +10 -1
- package/src/providers/kimi.ts +1 -1
- package/src/providers/mock.ts +11 -3
- package/src/providers/ollama.ts +64 -7
- package/src/providers/openai-anthropic-shim.ts +17 -8
- package/src/providers/openai-chat-server-schema.ts +9 -3
- package/src/providers/openai-chat-server.ts +82 -16
- package/src/providers/openai-chat-wire.ts +847 -0
- package/src/providers/openai-codex/request-transformer.ts +129 -34
- package/src/providers/openai-codex/response-handler.ts +22 -1
- package/src/providers/openai-codex-responses.ts +699 -247
- package/src/providers/openai-completions-compat.ts +8 -308
- package/src/providers/openai-completions.ts +416 -267
- package/src/providers/openai-responses-server-schema.ts +15 -9
- package/src/providers/openai-responses-server.ts +162 -114
- package/src/providers/openai-responses-shared.ts +320 -82
- package/src/providers/openai-responses-wire.ts +6391 -0
- package/src/providers/openai-responses.ts +382 -176
- package/src/providers/prometheus-native-client.ts +27 -11
- package/src/providers/prometheus-native-server.ts +44 -17
- package/src/providers/transform-messages.ts +311 -120
- package/src/providers/vision-guard.ts +5 -3
- package/src/rate-limit-utils.ts +13 -3
- package/src/registry/aimlapi.ts +6 -0
- package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
- package/src/registry/amazon-bedrock.ts +22 -0
- package/src/registry/anthropic.ts +26 -0
- package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
- package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
- package/src/{utils/oauth → registry}/cerebras.ts +8 -1
- package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
- package/src/registry/cursor.ts +20 -0
- package/src/{utils/oauth → registry}/deepseek.ts +9 -17
- package/src/registry/derived.ts +9 -0
- package/src/{utils/oauth → registry}/firepass.ts +10 -2
- package/src/{utils/oauth → registry}/fireworks.ts +8 -1
- package/src/registry/github-copilot.ts +22 -0
- package/src/registry/gitlab-duo.ts +19 -0
- package/src/registry/google-antigravity.ts +21 -0
- package/src/registry/google-gemini-cli.ts +21 -0
- package/src/registry/google-vertex.ts +38 -0
- package/src/registry/google.ts +6 -0
- package/src/registry/groq.ts +6 -0
- package/src/{utils/oauth → registry}/huggingface.ts +8 -19
- package/src/registry/index.ts +4 -0
- package/src/{utils/oauth → registry}/kagi.ts +9 -11
- package/src/{utils/oauth → registry}/kilo.ts +11 -6
- package/src/registry/kimi-code.ts +17 -0
- package/src/{utils/oauth → registry}/litellm.ts +8 -12
- package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
- package/src/registry/minimax-code-cn.ts +12 -0
- package/src/registry/minimax-code.ts +12 -0
- package/src/registry/minimax.ts +6 -0
- package/src/registry/mistral.ts +6 -0
- package/src/{utils/oauth → registry}/moonshot.ts +8 -9
- package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
- package/src/{utils/oauth → registry}/nvidia.ts +8 -18
- package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
- package/src/{utils → registry}/oauth/anthropic.ts +38 -17
- package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
- package/src/registry/oauth/gitlab-duo.ts +198 -0
- package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
- package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
- package/src/registry/oauth/index.ts +164 -0
- package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
- package/src/{utils → registry}/oauth/types.ts +7 -51
- package/src/{utils → registry}/oauth/wafer.ts +1 -1
- package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
- package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
- package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
- package/src/{utils/oauth → registry}/ollama.ts +8 -13
- package/src/registry/openai-codex-device.ts +18 -0
- package/src/registry/openai-codex.ts +19 -0
- package/src/registry/openai.ts +6 -0
- package/src/registry/opencode-go.ts +12 -0
- package/src/registry/opencode-zen.ts +12 -0
- package/src/{utils/oauth → registry}/openrouter.ts +10 -2
- package/src/{utils/oauth → registry}/parallel.ts +9 -11
- package/src/registry/perplexity.ts +13 -0
- package/src/{utils/oauth → registry}/qianfan.ts +8 -17
- package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
- package/src/registry/registry.ts +149 -0
- package/src/{utils/oauth → registry}/synthetic.ts +7 -1
- package/src/{utils/oauth → registry}/tavily.ts +10 -12
- package/src/{utils/oauth → registry}/together.ts +7 -1
- package/src/registry/types.ts +56 -0
- package/src/{utils/oauth → registry}/venice.ts +8 -12
- package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
- package/src/{utils/oauth → registry}/vllm.ts +9 -16
- package/src/registry/wafer-pass.ts +12 -0
- package/src/registry/wafer-serverless.ts +12 -0
- package/src/registry/xai-oauth.ts +17 -0
- package/src/registry/xai.ts +6 -0
- package/src/registry/xiaomi-token-plan-ams.ts +12 -0
- package/src/registry/xiaomi-token-plan-cn.ts +12 -0
- package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
- package/src/registry/xiaomi.ts +12 -0
- package/src/{utils/oauth → registry}/zai.ts +10 -22
- package/src/{utils/oauth → registry}/zenmux.ts +8 -1
- package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
- package/src/stream.ts +229 -199
- package/src/types.ts +63 -384
- package/src/usage/claude.ts +4 -2
- package/src/usage/github-copilot.ts +4 -2
- package/src/usage/google-antigravity.ts +196 -28
- package/src/usage/kimi.ts +1 -1
- package/src/usage/minimax-code.ts +5 -6
- package/src/usage/openai-codex-reset.ts +174 -0
- package/src/usage/openai-codex.ts +19 -2
- package/src/usage/zai.ts +2 -1
- package/src/usage.ts +93 -4
- package/src/utils/abort.ts +14 -0
- package/src/utils/event-stream.ts +17 -0
- package/src/utils/http-inspector.ts +4 -12
- package/src/utils/idle-iterator.ts +250 -79
- package/src/utils/openai-http.ts +157 -0
- package/src/utils/request-debug.ts +67 -19
- package/src/utils/retry-after.ts +1 -1
- package/src/utils/retry.ts +23 -2
- package/src/utils/schema/CONSTRAINTS.md +4 -2
- package/src/utils/schema/fields.ts +16 -0
- package/src/utils/schema/json-schema-validator.ts +19 -1
- package/src/utils/schema/normalize.ts +80 -8
- package/src/utils/schema/stamps.ts +22 -10
- package/src/utils/schema/wire.ts +2 -2
- package/src/utils/sse-debug.ts +0 -271
- package/src/utils/stream-markup-healing.ts +50 -8
- package/src/utils/validation.ts +49 -13
- package/src/utils.ts +2 -26
- package/dist/types/model-cache.d.ts +0 -17
- package/dist/types/model-manager.d.ts +0 -64
- package/dist/types/model-thinking.d.ts +0 -100
- package/dist/types/models.d.ts +0 -12
- package/dist/types/provider-models/bundled-references.d.ts +0 -4
- package/dist/types/provider-models/descriptors.d.ts +0 -50
- package/dist/types/provider-models/google.d.ts +0 -24
- package/dist/types/provider-models/index.d.ts +0 -5
- package/dist/types/provider-models/ollama.d.ts +0 -7
- package/dist/types/provider-models/openai-compat.d.ts +0 -323
- package/dist/types/provider-models/special.d.ts +0 -16
- package/dist/types/utils/discovery/antigravity.d.ts +0 -61
- package/dist/types/utils/discovery/codex.d.ts +0 -38
- package/dist/types/utils/discovery/cursor.d.ts +0 -23
- package/dist/types/utils/discovery/gemini.d.ts +0 -25
- package/dist/types/utils/discovery/index.d.ts +0 -4
- package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
- package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
- package/dist/types/utils/oauth/cerebras.d.ts +0 -1
- package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/deepseek.d.ts +0 -10
- package/dist/types/utils/oauth/firepass.d.ts +0 -1
- package/dist/types/utils/oauth/fireworks.d.ts +0 -1
- package/dist/types/utils/oauth/huggingface.d.ts +0 -19
- package/dist/types/utils/oauth/kagi.d.ts +0 -17
- package/dist/types/utils/oauth/kilo.d.ts +0 -5
- package/dist/types/utils/oauth/litellm.d.ts +0 -18
- package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
- package/dist/types/utils/oauth/moonshot.d.ts +0 -1
- package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
- package/dist/types/utils/oauth/nvidia.d.ts +0 -18
- package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
- package/dist/types/utils/oauth/ollama.d.ts +0 -18
- package/dist/types/utils/oauth/openrouter.d.ts +0 -1
- package/dist/types/utils/oauth/parallel.d.ts +0 -17
- package/dist/types/utils/oauth/qianfan.d.ts +0 -17
- package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
- package/dist/types/utils/oauth/synthetic.d.ts +0 -1
- package/dist/types/utils/oauth/tavily.d.ts +0 -17
- package/dist/types/utils/oauth/together.d.ts +0 -1
- package/dist/types/utils/oauth/venice.d.ts +0 -18
- package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
- package/dist/types/utils/oauth/vllm.d.ts +0 -16
- package/dist/types/utils/oauth/zai.d.ts +0 -18
- package/dist/types/utils/oauth/zenmux.d.ts +0 -1
- package/dist/types/utils/oauth/zhipu.d.ts +0 -18
- package/src/model-cache.ts +0 -129
- package/src/model-manager.ts +0 -469
- package/src/model-thinking.ts +0 -756
- package/src/models.json +0 -60287
- package/src/models.json.d.ts +0 -9
- package/src/models.ts +0 -56
- package/src/provider-models/bundled-references.ts +0 -38
- package/src/provider-models/descriptors.ts +0 -364
- package/src/provider-models/google.ts +0 -88
- package/src/provider-models/index.ts +0 -5
- package/src/provider-models/ollama.ts +0 -153
- package/src/provider-models/openai-compat.ts +0 -2904
- package/src/provider-models/special.ts +0 -67
- package/src/utils/discovery/antigravity.ts +0 -261
- package/src/utils/discovery/codex.ts +0 -371
- package/src/utils/discovery/cursor.ts +0 -306
- package/src/utils/discovery/gemini.ts +0 -248
- package/src/utils/discovery/index.ts +0 -4
- package/src/utils/discovery/openai-compatible.ts +0 -224
- package/src/utils/oauth/gitlab-duo.ts +0 -123
- package/src/utils/oauth/index.ts +0 -502
- /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
- /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
- /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
- /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
- /package/src/{utils → registry}/oauth/cursor.ts +0 -0
- /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
- /package/src/{utils → registry}/oauth/kimi.ts +0 -0
- /package/src/{utils → registry}/oauth/oauth.html +0 -0
- /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
- /package/src/{utils → registry}/oauth/opencode.ts +0 -0
- /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
- /package/src/{utils → registry}/oauth/pkce.ts +0 -0
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
import * as os from "node:os";
|
|
2
2
|
import { scheduler } from "node:timers/promises";
|
|
3
|
+
import { calculateCost } from "@prometheus-ai/catalog/models";
|
|
4
|
+
import {
|
|
5
|
+
CODEX_BASE_URL,
|
|
6
|
+
getCodexAccountId,
|
|
7
|
+
OPENAI_HEADER_VALUES,
|
|
8
|
+
OPENAI_HEADERS,
|
|
9
|
+
} from "@prometheus-ai/catalog/wire/codex";
|
|
3
10
|
import {
|
|
4
11
|
$env,
|
|
5
12
|
$flag,
|
|
@@ -10,17 +17,7 @@ import {
|
|
|
10
17
|
readSseJson,
|
|
11
18
|
structuredCloneJSON,
|
|
12
19
|
} from "@prometheus-ai/utils";
|
|
13
|
-
import type OpenAI from "openai";
|
|
14
|
-
import type {
|
|
15
|
-
ResponseCustomToolCall,
|
|
16
|
-
ResponseFunctionToolCall,
|
|
17
|
-
ResponseInput,
|
|
18
|
-
ResponseInputContent,
|
|
19
|
-
ResponseOutputMessage,
|
|
20
|
-
ResponseReasoningItem,
|
|
21
|
-
} from "openai/resources/responses/responses";
|
|
22
20
|
import packageJson from "../../package.json" with { type: "json" };
|
|
23
|
-
import { calculateCost } from "../models";
|
|
24
21
|
import { getEnvApiKey } from "../stream";
|
|
25
22
|
import {
|
|
26
23
|
type Api,
|
|
@@ -58,17 +55,18 @@ import { createRequestDebugSession, isRequestDebugEnabled, type RequestDebugResp
|
|
|
58
55
|
import { adaptSchemaForStrict, NO_STRICT, sanitizeSchemaForOpenAIResponses, toolWireSchema } from "../utils/schema";
|
|
59
56
|
import { notifyRawSseEvent } from "../utils/sse-debug";
|
|
60
57
|
import { compactGrammarDefinition } from "./grammar";
|
|
61
|
-
import { CODEX_BASE_URL, getCodexAccountId, OPENAI_HEADER_VALUES, OPENAI_HEADERS } from "./openai-codex/constants";
|
|
62
58
|
import {
|
|
59
|
+
type CodexReasoningContext,
|
|
63
60
|
type CodexRequestOptions,
|
|
64
61
|
type InputItem,
|
|
65
62
|
type RequestBody,
|
|
66
63
|
transformRequestBody,
|
|
67
64
|
} from "./openai-codex/request-transformer";
|
|
68
|
-
import {
|
|
65
|
+
import { CodexApiError } from "./openai-codex/response-handler";
|
|
69
66
|
import { normalizeOpenAIResponsesPromptCacheKey } from "./openai-responses";
|
|
70
67
|
import {
|
|
71
68
|
appendResponsesToolResultMessages,
|
|
69
|
+
buildResponsesDeltaInput,
|
|
72
70
|
convertResponsesAssistantMessage,
|
|
73
71
|
convertResponsesInputContent,
|
|
74
72
|
encodeResponsesToolCallId,
|
|
@@ -77,17 +75,49 @@ import {
|
|
|
77
75
|
mapOpenAIResponsesStopReason,
|
|
78
76
|
populateResponsesUsageFromResponse,
|
|
79
77
|
} from "./openai-responses-shared";
|
|
78
|
+
import type {
|
|
79
|
+
ResponseCustomToolCall,
|
|
80
|
+
ResponseFunctionToolCall,
|
|
81
|
+
ResponseInput,
|
|
82
|
+
ResponseInputContent,
|
|
83
|
+
ResponseOutputMessage,
|
|
84
|
+
ResponseReasoningItem,
|
|
85
|
+
ResponseStatus,
|
|
86
|
+
} from "./openai-responses-wire";
|
|
80
87
|
import { transformMessages } from "./transform-messages";
|
|
81
88
|
|
|
82
89
|
export interface OpenAICodexResponsesOptions extends StreamOptions {
|
|
83
90
|
reasoning?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
84
91
|
reasoningSummary?: "auto" | "concise" | "detailed" | null;
|
|
92
|
+
/** `reasoning.context` replay scope. Defaults to `all_turns` under {@link OpenAICodexResponsesOptions.responsesLite}, otherwise omitted (server default is `current_turn`). */
|
|
93
|
+
reasoningContext?: CodexReasoningContext;
|
|
85
94
|
textVerbosity?: "low" | "medium" | "high";
|
|
86
95
|
include?: string[];
|
|
87
96
|
codexMode?: boolean;
|
|
88
97
|
toolChoice?: ToolChoice;
|
|
89
98
|
preferWebsockets?: boolean;
|
|
90
99
|
serviceTier?: ServiceTier;
|
|
100
|
+
/**
|
|
101
|
+
* Opt into the Responses Lite transport contract. Sends
|
|
102
|
+
* `x-openai-internal-codex-responses-lite: true` on HTTP requests and on the
|
|
103
|
+
* WebSocket upgrade (the marker is connection-scoped there, so lite and
|
|
104
|
+
* non-lite turns never share a pooled socket), strips image detail from
|
|
105
|
+
* input, and defaults `reasoning.context` to `all_turns` — mirroring codex-rs.
|
|
106
|
+
*/
|
|
107
|
+
responsesLite?: boolean;
|
|
108
|
+
/**
|
|
109
|
+
* Extra `client_metadata` to include in the request body on both transports.
|
|
110
|
+
* The canonical Codex envelope is `client_metadata["x-codex-turn-metadata"]`
|
|
111
|
+
* (JSON string of thread/turn identifiers); flat keys are also accepted.
|
|
112
|
+
*/
|
|
113
|
+
clientMetadata?: Record<string, string>;
|
|
114
|
+
/**
|
|
115
|
+
* Invoked when the server streams a `response.metadata` event carrying
|
|
116
|
+
* ChatGPT moderation metadata (`metadata.openai_chatgpt_moderation_metadata`)
|
|
117
|
+
* for first-party presentation parity. Diagnostic observer: failures are
|
|
118
|
+
* swallowed and must not alter the stream.
|
|
119
|
+
*/
|
|
120
|
+
onModerationMetadata?: (metadata: unknown) => void;
|
|
91
121
|
}
|
|
92
122
|
|
|
93
123
|
const CODEX_DEBUG = $flag("PROMETHEUS_CODEX_DEBUG");
|
|
@@ -107,8 +137,8 @@ const CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY = 4096;
|
|
|
107
137
|
* heartbeat below also catches dead sockets, but only after `pongTimeoutMs`
|
|
108
138
|
* (default 60s) and only while a request is active — this gate closes the door
|
|
109
139
|
* earlier and even when the gap between requests is purely client-side (tool
|
|
110
|
-
* execution, user typing, etc.). Set
|
|
111
|
-
* to disable.
|
|
140
|
+
* execution, user typing, etc.). Set
|
|
141
|
+
* `PROMETHEUS_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS=0` to disable.
|
|
112
142
|
*/
|
|
113
143
|
const CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS = 30_000;
|
|
114
144
|
/**
|
|
@@ -136,12 +166,24 @@ const CODEX_RETRYABLE_EVENT_MESSAGE =
|
|
|
136
166
|
const CODEX_PROVIDER_SESSION_STATE_KEY = "openai-codex-responses";
|
|
137
167
|
const X_CODEX_TURN_STATE_HEADER = "x-codex-turn-state";
|
|
138
168
|
const X_MODELS_ETAG_HEADER = "x-models-etag";
|
|
139
|
-
const
|
|
169
|
+
const X_OPENAI_INTERNAL_CODEX_RESPONSES_LITE_HEADER = "x-openai-internal-codex-responses-lite";
|
|
170
|
+
/** WebSocket frames cannot carry per-request HTTP headers; codex-rs mirrors the lite marker into `client_metadata` under this key. */
|
|
171
|
+
const CODEX_WS_RESPONSES_LITE_CLIENT_METADATA_KEY = "ws_request_header_x_openai_internal_codex_responses_lite";
|
|
172
|
+
/** `response.metadata` payload key carrying ChatGPT moderation metadata. */
|
|
173
|
+
const CODEX_MODERATION_METADATA_KEY = "openai_chatgpt_moderation_metadata";
|
|
140
174
|
/** Connection-level websocket failures that should immediately fall back to SSE without retrying. */
|
|
141
175
|
const CODEX_WEBSOCKET_FATAL_PATTERNS = ["websocket error:", "websocket closed before open", "connection timeout"];
|
|
142
176
|
/** Max total time to spend retrying 429s with server-provided delays (5 minutes). */
|
|
143
177
|
const CODEX_RATE_LIMIT_BUDGET_MS = 5 * 60 * 1000;
|
|
144
178
|
const CODEX_ADDITIONAL_PROGRESS_EVENT_TYPES = new Set(["response.done", "response.incomplete"]);
|
|
179
|
+
// Provider/model failure mode: Codex can keep a response alive by streaming
|
|
180
|
+
// whitespace-only function-call argument deltas forever. Those frames count as
|
|
181
|
+
// transport activity, so idle timers never fire; cap the run before raw debug
|
|
182
|
+
// buffers and partial JSON grow without semantic progress.
|
|
183
|
+
const CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_EVENT_LIMIT = 256;
|
|
184
|
+
const CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_CHAR_LIMIT = 16 * 1024;
|
|
185
|
+
const CODEX_WHITESPACE_LOOP_RETRY_LIMIT = 2;
|
|
186
|
+
const CODEX_WHITESPACE_LOOP_RETRY_DELAY_MS = 250;
|
|
145
187
|
|
|
146
188
|
function isCodexStreamProgressEvent(event: unknown): boolean {
|
|
147
189
|
if (isOpenAIResponsesProgressEvent(event)) return true;
|
|
@@ -172,6 +214,11 @@ type CodexTransport = "sse" | "websocket";
|
|
|
172
214
|
type CodexEventItem = ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | ResponseCustomToolCall;
|
|
173
215
|
type CodexOutputBlock = ThinkingContent | TextContent | (ToolCall & { partialJson: string; lastParseLen?: number });
|
|
174
216
|
|
|
217
|
+
/**
|
|
218
|
+
* Per-session request-shape counters. Despite the name, these cover both
|
|
219
|
+
* transports: once stateful SSE chaining is enabled, SSE requests are counted
|
|
220
|
+
* too (the shared chained-request builder records every request it shapes).
|
|
221
|
+
*/
|
|
175
222
|
export interface OpenAICodexWebSocketDebugStats {
|
|
176
223
|
fullContextRequests: number;
|
|
177
224
|
deltaRequests: number;
|
|
@@ -180,6 +227,12 @@ export interface OpenAICodexWebSocketDebugStats {
|
|
|
180
227
|
lastPreviousResponseId?: string;
|
|
181
228
|
}
|
|
182
229
|
|
|
230
|
+
/**
|
|
231
|
+
* Per-session transport state shared by BOTH transports: websocket turn
|
|
232
|
+
* chaining (`previous_response_id` baseline), turn-state/models-etag headers,
|
|
233
|
+
* websocket connection pooling, and debug stats. The name is historical — SSE-only
|
|
234
|
+
* sessions use it too.
|
|
235
|
+
*/
|
|
183
236
|
type CodexWebSocketSessionState = {
|
|
184
237
|
disableWebsocket: boolean;
|
|
185
238
|
lastRequest?: RequestBody;
|
|
@@ -188,7 +241,6 @@ type CodexWebSocketSessionState = {
|
|
|
188
241
|
canAppend: boolean;
|
|
189
242
|
turnState?: string;
|
|
190
243
|
modelsEtag?: string;
|
|
191
|
-
reasoningIncluded?: boolean;
|
|
192
244
|
connection?: CodexWebSocketConnection;
|
|
193
245
|
lastTransport?: CodexTransport;
|
|
194
246
|
fallbackCount: number;
|
|
@@ -211,6 +263,7 @@ interface CodexRequestContext {
|
|
|
211
263
|
transportSessionId?: string;
|
|
212
264
|
providerSessionState?: CodexProviderSessionState;
|
|
213
265
|
websocketState?: CodexWebSocketSessionState;
|
|
266
|
+
responsesLite: boolean;
|
|
214
267
|
transformedBody: RequestBody;
|
|
215
268
|
rawRequestDump: RawHttpRequestDump;
|
|
216
269
|
}
|
|
@@ -219,6 +272,7 @@ interface CodexRequestSetup {
|
|
|
219
272
|
requestSignal: AbortSignal;
|
|
220
273
|
wrapCodexSseStream: (source: AsyncGenerator<Record<string, unknown>>) => AsyncGenerator<Record<string, unknown>>;
|
|
221
274
|
requestAbortController: AbortController;
|
|
275
|
+
firstEventTimeoutMs: number | undefined;
|
|
222
276
|
websocketIdleTimeoutMs: number | undefined;
|
|
223
277
|
websocketFirstEventTimeoutMs: number | undefined;
|
|
224
278
|
}
|
|
@@ -235,6 +289,21 @@ interface CodexStreamRuntime {
|
|
|
235
289
|
providerRetryAttempt: number;
|
|
236
290
|
sawTerminalEvent: boolean;
|
|
237
291
|
canSafelyReplayWebsocketOverSse: boolean;
|
|
292
|
+
whitespaceToolCallArgumentsDelta?: CodexWhitespaceToolCallArgumentsDeltaState;
|
|
293
|
+
whitespaceLoopRetries: number;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
interface CodexWhitespaceToolCallArgumentsDeltaState {
|
|
297
|
+
itemId: string;
|
|
298
|
+
outputIndex?: number;
|
|
299
|
+
consecutiveEvents: number;
|
|
300
|
+
consecutiveChars: number;
|
|
301
|
+
firstSequenceNumber?: number;
|
|
302
|
+
lastSequenceNumber?: number;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
interface CodexWhitespaceToolCallArgumentsDeltaInterruption {
|
|
306
|
+
message: string;
|
|
238
307
|
}
|
|
239
308
|
|
|
240
309
|
interface CodexStreamProcessingContext {
|
|
@@ -266,54 +335,55 @@ function parseCodexPositiveInteger(value: string | undefined, fallback: number):
|
|
|
266
335
|
return Math.trunc(parsed);
|
|
267
336
|
}
|
|
268
337
|
|
|
338
|
+
function getCodexEnv(name: string): string | undefined {
|
|
339
|
+
return $env[`PROMETHEUS_${name}`] ?? $env[`PI_${name}`];
|
|
340
|
+
}
|
|
341
|
+
|
|
269
342
|
function isCodexWebSocketEnvEnabled(): boolean {
|
|
270
343
|
return $flag("PROMETHEUS_CODEX_WEBSOCKET");
|
|
271
344
|
}
|
|
272
345
|
|
|
273
346
|
function getCodexWebSocketRetryBudget(): number {
|
|
274
|
-
return parseCodexNonNegativeInteger(
|
|
347
|
+
return parseCodexNonNegativeInteger(getCodexEnv("CODEX_WEBSOCKET_RETRY_BUDGET"), CODEX_WEBSOCKET_RETRY_BUDGET);
|
|
275
348
|
}
|
|
276
349
|
|
|
277
350
|
function getCodexWebSocketRetryDelayMs(retry: number): number {
|
|
278
|
-
const baseDelay = parseCodexPositiveInteger(
|
|
351
|
+
const baseDelay = parseCodexPositiveInteger(getCodexEnv("CODEX_WEBSOCKET_RETRY_DELAY_MS"), CODEX_RETRY_DELAY_MS);
|
|
279
352
|
return baseDelay * Math.max(1, retry);
|
|
280
353
|
}
|
|
281
354
|
|
|
282
355
|
function getCodexWebSocketIdleTimeoutMs(): number {
|
|
283
|
-
return parseCodexPositiveInteger(
|
|
356
|
+
return parseCodexPositiveInteger(getCodexEnv("CODEX_WEBSOCKET_IDLE_TIMEOUT_MS"), CODEX_WEBSOCKET_IDLE_TIMEOUT_MS);
|
|
284
357
|
}
|
|
285
358
|
|
|
286
359
|
function getCodexWebSocketFirstEventTimeoutMs(): number {
|
|
287
360
|
return parseCodexPositiveInteger(
|
|
288
|
-
|
|
361
|
+
getCodexEnv("CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS"),
|
|
289
362
|
CODEX_WEBSOCKET_FIRST_EVENT_TIMEOUT_MS,
|
|
290
363
|
);
|
|
291
364
|
}
|
|
292
365
|
|
|
293
366
|
function getCodexWebSocketPingIntervalMs(): number {
|
|
294
367
|
return parseCodexNonNegativeInteger(
|
|
295
|
-
|
|
368
|
+
getCodexEnv("CODEX_WEBSOCKET_PING_INTERVAL_MS"),
|
|
296
369
|
CODEX_WEBSOCKET_PING_INTERVAL_MS,
|
|
297
370
|
);
|
|
298
371
|
}
|
|
299
372
|
|
|
300
373
|
function getCodexWebSocketPongTimeoutMs(): number {
|
|
301
|
-
return parseCodexNonNegativeInteger(
|
|
302
|
-
$env.PROMETHEUS_CODEX_WEBSOCKET_PONG_TIMEOUT_MS,
|
|
303
|
-
CODEX_WEBSOCKET_PONG_TIMEOUT_MS,
|
|
304
|
-
);
|
|
374
|
+
return parseCodexNonNegativeInteger(getCodexEnv("CODEX_WEBSOCKET_PONG_TIMEOUT_MS"), CODEX_WEBSOCKET_PONG_TIMEOUT_MS);
|
|
305
375
|
}
|
|
306
376
|
|
|
307
377
|
function getCodexWebSocketMessageQueueCapacity(): number {
|
|
308
378
|
return parseCodexPositiveInteger(
|
|
309
|
-
|
|
379
|
+
getCodexEnv("CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY"),
|
|
310
380
|
CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY,
|
|
311
381
|
);
|
|
312
382
|
}
|
|
313
383
|
|
|
314
384
|
function getCodexWebSocketMaxIdleReuseMs(): number {
|
|
315
385
|
return parseCodexNonNegativeInteger(
|
|
316
|
-
|
|
386
|
+
getCodexEnv("CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS"),
|
|
317
387
|
CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS,
|
|
318
388
|
);
|
|
319
389
|
}
|
|
@@ -369,6 +439,7 @@ function isCodexWebSocketRetryableStreamError(error: unknown): boolean {
|
|
|
369
439
|
message.includes("websocket ping failed") ||
|
|
370
440
|
message.includes("websocket pong timeout") ||
|
|
371
441
|
message.includes("websocket message queue exceeded") ||
|
|
442
|
+
message.includes("websocket request already in progress") ||
|
|
372
443
|
message.includes("idle timeout waiting for websocket") ||
|
|
373
444
|
message.includes("timeout waiting for first websocket event") ||
|
|
374
445
|
message.includes("syntaxerror") ||
|
|
@@ -420,11 +491,6 @@ function updateCodexSessionMetadataFromHeaders(
|
|
|
420
491
|
if (modelsEtag && modelsEtag.length > 0) {
|
|
421
492
|
state.modelsEtag = modelsEtag;
|
|
422
493
|
}
|
|
423
|
-
const reasoningIncluded = resolvedHeaders.get(X_REASONING_INCLUDED_HEADER);
|
|
424
|
-
if (reasoningIncluded !== null) {
|
|
425
|
-
const normalized = reasoningIncluded.trim().toLowerCase();
|
|
426
|
-
state.reasoningIncluded = normalized.length === 0 ? true : normalized !== "false";
|
|
427
|
-
}
|
|
428
494
|
}
|
|
429
495
|
|
|
430
496
|
function extractCodexWebSocketHandshakeHeaders(socket: Bun.WebSocket, openEvent?: Event): Headers | undefined {
|
|
@@ -499,13 +565,16 @@ export function normalizeCodexToolChoice(
|
|
|
499
565
|
if (!choice) return undefined;
|
|
500
566
|
if (typeof choice === "string") return choice;
|
|
501
567
|
const allowFreeform = model ? supportsFreeformApplyPatchCodex(model) : false;
|
|
502
|
-
const mapName = (name: string): Record<string, string> => {
|
|
568
|
+
const mapName = (name: string): Record<string, string> | undefined => {
|
|
569
|
+
const directTool = tools.find(tool => tool.name === name);
|
|
503
570
|
const customTool = allowFreeform
|
|
504
571
|
? tools.find(tool => tool.customFormat && (tool.name === name || tool.customWireName === name))
|
|
505
572
|
: undefined;
|
|
573
|
+
const offeredTool = customTool ?? directTool;
|
|
574
|
+
if (!offeredTool) return undefined;
|
|
506
575
|
return customTool
|
|
507
576
|
? { type: "custom", name: customTool.customWireName ?? customTool.name }
|
|
508
|
-
: { type: "function", name };
|
|
577
|
+
: { type: "function", name: offeredTool.name };
|
|
509
578
|
};
|
|
510
579
|
if (choice.type === "function") {
|
|
511
580
|
if ("function" in choice && choice.function?.name) {
|
|
@@ -597,6 +666,7 @@ function resetOutputState(output: AssistantMessage): void {
|
|
|
597
666
|
output.content.length = 0;
|
|
598
667
|
output.usage = createEmptyUsage();
|
|
599
668
|
output.stopReason = "stop";
|
|
669
|
+
output.stopDetails = undefined;
|
|
600
670
|
}
|
|
601
671
|
|
|
602
672
|
function removeTransientBlockIndices(output: AssistantMessage): void {
|
|
@@ -631,6 +701,7 @@ function createRequestSetup(options: OpenAICodexResponsesOptions | undefined): C
|
|
|
631
701
|
requestAbortController,
|
|
632
702
|
requestSignal,
|
|
633
703
|
wrapCodexSseStream,
|
|
704
|
+
firstEventTimeoutMs,
|
|
634
705
|
websocketIdleTimeoutMs,
|
|
635
706
|
websocketFirstEventTimeoutMs,
|
|
636
707
|
};
|
|
@@ -666,13 +737,19 @@ async function buildCodexRequestContext(
|
|
|
666
737
|
};
|
|
667
738
|
|
|
668
739
|
const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
|
|
669
|
-
const
|
|
740
|
+
const responsesLite = options?.responsesLite === true;
|
|
741
|
+
const sessionKey = getCodexWebSocketSessionKey(transportSessionId, model, accountId, baseUrl, responsesLite);
|
|
670
742
|
const publicSessionKey = getCodexPublicSessionKey(transportSessionId, model, baseUrl);
|
|
671
743
|
if (sessionKey && publicSessionKey) {
|
|
672
744
|
providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
|
|
673
745
|
}
|
|
674
746
|
const websocketState =
|
|
675
747
|
sessionKey && providerSessionState ? getCodexWebSocketSessionState(sessionKey, providerSessionState) : undefined;
|
|
748
|
+
if (websocketState && !isCodexWithinTurnContinuation(context)) {
|
|
749
|
+
// codex-rs scopes `x-codex-turn-state` to a single user turn: tool-loop
|
|
750
|
+
// follow-ups echo it, a new user turn starts without it.
|
|
751
|
+
websocketState.turnState = undefined;
|
|
752
|
+
}
|
|
676
753
|
return {
|
|
677
754
|
apiKey,
|
|
678
755
|
accountId,
|
|
@@ -682,6 +759,7 @@ async function buildCodexRequestContext(
|
|
|
682
759
|
transportSessionId,
|
|
683
760
|
providerSessionState,
|
|
684
761
|
websocketState,
|
|
762
|
+
responsesLite,
|
|
685
763
|
transformedBody,
|
|
686
764
|
rawRequestDump,
|
|
687
765
|
};
|
|
@@ -695,14 +773,14 @@ async function buildTransformedCodexRequestBody(
|
|
|
695
773
|
): Promise<RequestBody> {
|
|
696
774
|
const params: RequestBody = {
|
|
697
775
|
model: model.id,
|
|
698
|
-
input:
|
|
776
|
+
input: convertMessages(model, context),
|
|
699
777
|
stream: true,
|
|
700
778
|
prompt_cache_key: promptCacheKey,
|
|
701
779
|
};
|
|
702
780
|
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
781
|
+
// `maxTokens` is intentionally not forwarded: transformRequestBody strips
|
|
782
|
+
// `max_output_tokens`/`max_completion_tokens` (the Codex backend rejects
|
|
783
|
+
// caller-supplied output caps).
|
|
706
784
|
if (options?.temperature !== undefined) {
|
|
707
785
|
params.temperature = options.temperature;
|
|
708
786
|
}
|
|
@@ -750,11 +828,16 @@ async function buildTransformedCodexRequestBody(
|
|
|
750
828
|
params.instructions = systemPrompts[0];
|
|
751
829
|
}
|
|
752
830
|
const developerMessages = systemPrompts.slice(1);
|
|
831
|
+
if (options?.clientMetadata && Object.keys(options.clientMetadata).length > 0) {
|
|
832
|
+
params.client_metadata = { ...options.clientMetadata };
|
|
833
|
+
}
|
|
753
834
|
const codexOptions: CodexRequestOptions = {
|
|
754
835
|
reasoningEffort: options?.reasoning,
|
|
755
|
-
reasoningSummary: options?.reasoningSummary
|
|
836
|
+
reasoningSummary: options?.reasoningSummary === undefined ? "auto" : options.reasoningSummary,
|
|
837
|
+
reasoningContext: options?.reasoningContext,
|
|
756
838
|
textVerbosity: options?.textVerbosity,
|
|
757
839
|
include: options?.include,
|
|
840
|
+
responsesLite: options?.responsesLite,
|
|
758
841
|
};
|
|
759
842
|
|
|
760
843
|
return transformRequestBody(params, model, codexOptions, { developerMessages });
|
|
@@ -819,7 +902,21 @@ async function openCodexWebSocketTransport(
|
|
|
819
902
|
requestBodyForState: RequestBody;
|
|
820
903
|
transport: CodexTransport;
|
|
821
904
|
}> {
|
|
822
|
-
const
|
|
905
|
+
const chainedBody = buildCodexChainedRequestBody(requestContext.transformedBody, websocketState);
|
|
906
|
+
// WebSocket frames cannot carry per-request HTTP headers, so the Responses
|
|
907
|
+
// Lite marker rides in `client_metadata` on every `response.create`.
|
|
908
|
+
const websocketRequest: Record<string, unknown> = {
|
|
909
|
+
type: "response.create",
|
|
910
|
+
...chainedBody,
|
|
911
|
+
...(requestContext.responsesLite
|
|
912
|
+
? {
|
|
913
|
+
client_metadata: {
|
|
914
|
+
...(chainedBody.client_metadata ?? {}),
|
|
915
|
+
[CODEX_WS_RESPONSES_LITE_CLIENT_METADATA_KEY]: "true",
|
|
916
|
+
},
|
|
917
|
+
}
|
|
918
|
+
: {}),
|
|
919
|
+
};
|
|
823
920
|
const websocketHeaders = createCodexHeaders(
|
|
824
921
|
requestContext.requestHeaders,
|
|
825
922
|
requestContext.accountId,
|
|
@@ -827,6 +924,7 @@ async function openCodexWebSocketTransport(
|
|
|
827
924
|
requestContext.transportSessionId,
|
|
828
925
|
"websocket",
|
|
829
926
|
websocketState,
|
|
927
|
+
requestContext.responsesLite,
|
|
830
928
|
);
|
|
831
929
|
const requestBodyForState = structuredCloneJSON(requestContext.transformedBody);
|
|
832
930
|
logCodexDebug("codex websocket request", {
|
|
@@ -852,7 +950,26 @@ async function openCodexWebSocketTransport(
|
|
|
852
950
|
requestSetup.requestSignal,
|
|
853
951
|
onSseEvent,
|
|
854
952
|
);
|
|
855
|
-
return {
|
|
953
|
+
return {
|
|
954
|
+
eventStream,
|
|
955
|
+
requestBodyForState,
|
|
956
|
+
transport: "websocket",
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
/**
|
|
961
|
+
* True when the request continues the current turn (everything after the
|
|
962
|
+
* last assistant message is tool results), false when a new user turn starts.
|
|
963
|
+
* Mirrors codex-rs, which scopes `x-codex-turn-state` to a single turn and
|
|
964
|
+
* clears it when the next one begins.
|
|
965
|
+
*/
|
|
966
|
+
function isCodexWithinTurnContinuation(context: Context): boolean {
|
|
967
|
+
for (let i = context.messages.length - 1; i >= 0; i--) {
|
|
968
|
+
const role = context.messages[i]?.role;
|
|
969
|
+
if (role === "toolResult") continue;
|
|
970
|
+
return role === "assistant";
|
|
971
|
+
}
|
|
972
|
+
return false;
|
|
856
973
|
}
|
|
857
974
|
|
|
858
975
|
async function openCodexSseTransport(
|
|
@@ -867,21 +984,28 @@ async function openCodexSseTransport(
|
|
|
867
984
|
requestBodyForState: RequestBody;
|
|
868
985
|
transport: CodexTransport;
|
|
869
986
|
}> {
|
|
870
|
-
const
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
987
|
+
const open = async (wireBody: RequestBody) => {
|
|
988
|
+
// Keep the 400 dump honest: record the body actually sent on the wire.
|
|
989
|
+
requestContext.rawRequestDump.body = wireBody;
|
|
990
|
+
return requestSetup.wrapCodexSseStream(
|
|
991
|
+
await openCodexSseEventStream(
|
|
992
|
+
requestContext.url,
|
|
993
|
+
requestContext.requestHeaders,
|
|
994
|
+
requestContext.accountId,
|
|
995
|
+
requestContext.apiKey,
|
|
996
|
+
requestContext.transportSessionId,
|
|
997
|
+
wireBody,
|
|
998
|
+
state,
|
|
999
|
+
requestContext.responsesLite,
|
|
1000
|
+
requestSetup.requestSignal,
|
|
1001
|
+
requestSetup.firstEventTimeoutMs,
|
|
1002
|
+
event => options?.onSseEvent?.(event, model),
|
|
1003
|
+
options?.fetch,
|
|
1004
|
+
),
|
|
1005
|
+
);
|
|
1006
|
+
};
|
|
1007
|
+
recordCodexWebSocketRequestStats(state, body);
|
|
1008
|
+
return { eventStream: await open(body), requestBodyForState: structuredCloneJSON(body), transport: "sse" };
|
|
885
1009
|
}
|
|
886
1010
|
|
|
887
1011
|
async function reopenCodexWebSocketRuntimeStream(
|
|
@@ -952,11 +1076,94 @@ function createCodexStreamRuntime(initial: {
|
|
|
952
1076
|
nativeOutputItems: [],
|
|
953
1077
|
websocketStreamRetries: 0,
|
|
954
1078
|
providerRetryAttempt: 0,
|
|
1079
|
+
whitespaceLoopRetries: 0,
|
|
955
1080
|
sawTerminalEvent: false,
|
|
956
1081
|
canSafelyReplayWebsocketOverSse: true,
|
|
1082
|
+
whitespaceToolCallArgumentsDelta: undefined,
|
|
1083
|
+
};
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
function resetWhitespaceToolCallArgumentsDelta(runtime: CodexStreamRuntime): void {
|
|
1087
|
+
runtime.whitespaceToolCallArgumentsDelta = undefined;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
function isJsonWhitespaceOnly(value: string): boolean {
|
|
1091
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
1092
|
+
const code = value.charCodeAt(index);
|
|
1093
|
+
if (code !== 0x09 && code !== 0x0a && code !== 0x0d && code !== 0x20) {
|
|
1094
|
+
return false;
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
return true;
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
function readOptionalInteger(value: unknown): number | undefined {
|
|
1101
|
+
if (typeof value !== "number" || !Number.isFinite(value)) return undefined;
|
|
1102
|
+
return Math.trunc(value);
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
function observeWhitespaceToolCallArgumentsDelta(
|
|
1106
|
+
runtime: CodexStreamRuntime,
|
|
1107
|
+
rawEvent: Record<string, unknown>,
|
|
1108
|
+
delta: string,
|
|
1109
|
+
): CodexWhitespaceToolCallArgumentsDeltaInterruption | undefined {
|
|
1110
|
+
if (!isJsonWhitespaceOnly(delta)) {
|
|
1111
|
+
resetWhitespaceToolCallArgumentsDelta(runtime);
|
|
1112
|
+
return undefined;
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
const itemId =
|
|
1116
|
+
typeof rawEvent.item_id === "string" && rawEvent.item_id.length > 0
|
|
1117
|
+
? rawEvent.item_id
|
|
1118
|
+
: (runtime.currentItem?.id ?? "");
|
|
1119
|
+
const outputIndex = readOptionalInteger(rawEvent.output_index);
|
|
1120
|
+
const sequenceNumber = readOptionalInteger(rawEvent.sequence_number);
|
|
1121
|
+
let state = runtime.whitespaceToolCallArgumentsDelta;
|
|
1122
|
+
if (!state || state.itemId !== itemId || state.outputIndex !== outputIndex) {
|
|
1123
|
+
state = {
|
|
1124
|
+
itemId,
|
|
1125
|
+
outputIndex,
|
|
1126
|
+
consecutiveEvents: 0,
|
|
1127
|
+
consecutiveChars: 0,
|
|
1128
|
+
firstSequenceNumber: sequenceNumber,
|
|
1129
|
+
};
|
|
1130
|
+
runtime.whitespaceToolCallArgumentsDelta = state;
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
state.consecutiveEvents += 1;
|
|
1134
|
+
state.consecutiveChars += delta.length;
|
|
1135
|
+
state.lastSequenceNumber = sequenceNumber;
|
|
1136
|
+
if (
|
|
1137
|
+
state.consecutiveEvents < CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_EVENT_LIMIT &&
|
|
1138
|
+
state.consecutiveChars < CODEX_WHITESPACE_TOOL_CALL_ARGUMENT_DELTA_CHAR_LIMIT
|
|
1139
|
+
) {
|
|
1140
|
+
return undefined;
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
const itemLabel = itemId ? ` for item ${itemId}` : "";
|
|
1144
|
+
const sequenceLabel =
|
|
1145
|
+
state.firstSequenceNumber === undefined || state.lastSequenceNumber === undefined
|
|
1146
|
+
? ""
|
|
1147
|
+
: `, sequence ${state.firstSequenceNumber}..${state.lastSequenceNumber}`;
|
|
1148
|
+
return {
|
|
1149
|
+
message: `Interrupted OpenAI Codex response after ${state.consecutiveEvents} consecutive whitespace-only tool-call argument delta events (${state.consecutiveChars} chars${sequenceLabel})${itemLabel}.`,
|
|
957
1150
|
};
|
|
958
1151
|
}
|
|
959
1152
|
|
|
1153
|
+
function interruptWhitespaceToolCallArgumentsDelta(
|
|
1154
|
+
runtime: CodexStreamRuntime,
|
|
1155
|
+
interruption: CodexWhitespaceToolCallArgumentsDeltaInterruption,
|
|
1156
|
+
): never {
|
|
1157
|
+
// Close the degenerate websocket so the server stops streaming whitespace
|
|
1158
|
+
// frames. Do NOT abort requestSetup.requestAbortController: reopen*RuntimeStream
|
|
1159
|
+
// reuses the same setup across retries, and requestSignal is an AbortSignal.any
|
|
1160
|
+
// over that controller — aborting it stays latched and makes recovery
|
|
1161
|
+
// impossible. Throwing unwinds the for-await, which returns the SSE generator
|
|
1162
|
+
// and cancels its underlying body.
|
|
1163
|
+
runtime.websocketState?.connection?.close("degenerate-tool-call");
|
|
1164
|
+
throw new CodexWhitespaceToolCallLoopError(interruption.message);
|
|
1165
|
+
}
|
|
1166
|
+
|
|
960
1167
|
async function processCodexResponseStream(
|
|
961
1168
|
context: CodexStreamProcessingContext,
|
|
962
1169
|
runtime: CodexStreamRuntime,
|
|
@@ -968,12 +1175,7 @@ async function processCodexResponseStream(
|
|
|
968
1175
|
try {
|
|
969
1176
|
let firstTokenTime = context.firstTokenTime;
|
|
970
1177
|
for await (const rawEvent of runtime.eventStream) {
|
|
971
|
-
firstTokenTime = handleCodexStreamEvent(
|
|
972
|
-
...context,
|
|
973
|
-
runtime,
|
|
974
|
-
rawEvent,
|
|
975
|
-
firstTokenTime,
|
|
976
|
-
});
|
|
1178
|
+
firstTokenTime = handleCodexStreamEvent(context, runtime, rawEvent, firstTokenTime);
|
|
977
1179
|
if (runtime.sawTerminalEvent) break;
|
|
978
1180
|
}
|
|
979
1181
|
return { firstTokenTime };
|
|
@@ -986,23 +1188,18 @@ async function processCodexResponseStream(
|
|
|
986
1188
|
}
|
|
987
1189
|
}
|
|
988
1190
|
|
|
989
|
-
function handleCodexStreamEvent(
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
}): number | undefined {
|
|
997
|
-
const { model, output, stream, runtime, rawEvent } = args;
|
|
1191
|
+
function handleCodexStreamEvent(
|
|
1192
|
+
context: CodexStreamProcessingContext,
|
|
1193
|
+
runtime: CodexStreamRuntime,
|
|
1194
|
+
rawEvent: Record<string, unknown>,
|
|
1195
|
+
firstTokenTime: number | undefined,
|
|
1196
|
+
): number | undefined {
|
|
1197
|
+
const { model, output, stream } = context;
|
|
998
1198
|
const eventType = typeof rawEvent.type === "string" ? rawEvent.type : "";
|
|
999
|
-
if (!eventType) return
|
|
1000
|
-
|
|
1001
|
-
const blocks = output.content;
|
|
1002
|
-
const blockIndex = () => blocks.length - 1;
|
|
1003
|
-
let firstTokenTime = args.firstTokenTime;
|
|
1199
|
+
if (!eventType) return firstTokenTime;
|
|
1004
1200
|
|
|
1005
1201
|
if (eventType === "response.output_item.added") {
|
|
1202
|
+
resetWhitespaceToolCallArgumentsDelta(runtime);
|
|
1006
1203
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
1007
1204
|
const item = rawEvent.item as CodexEventItem;
|
|
1008
1205
|
runtime.currentItem = item;
|
|
@@ -1011,7 +1208,7 @@ function handleCodexStreamEvent(args: {
|
|
|
1011
1208
|
output.content.push(runtime.currentBlock);
|
|
1012
1209
|
stream.push({
|
|
1013
1210
|
type: getOutputBlockStartEventType(runtime.currentBlock),
|
|
1014
|
-
contentIndex:
|
|
1211
|
+
contentIndex: output.content.length - 1,
|
|
1015
1212
|
partial: output,
|
|
1016
1213
|
});
|
|
1017
1214
|
return firstTokenTime;
|
|
@@ -1023,12 +1220,12 @@ function handleCodexStreamEvent(args: {
|
|
|
1023
1220
|
}
|
|
1024
1221
|
|
|
1025
1222
|
if (eventType === "response.reasoning_summary_text.delta") {
|
|
1026
|
-
handleReasoningSummaryTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output
|
|
1223
|
+
handleReasoningSummaryTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output);
|
|
1027
1224
|
return firstTokenTime;
|
|
1028
1225
|
}
|
|
1029
1226
|
|
|
1030
1227
|
if (eventType === "response.reasoning_summary_part.done") {
|
|
1031
|
-
handleReasoningSummaryPartDone(runtime.currentItem, runtime.currentBlock, stream, output
|
|
1228
|
+
handleReasoningSummaryPartDone(runtime.currentItem, runtime.currentBlock, stream, output);
|
|
1032
1229
|
return firstTokenTime;
|
|
1033
1230
|
}
|
|
1034
1231
|
|
|
@@ -1038,58 +1235,48 @@ function handleCodexStreamEvent(args: {
|
|
|
1038
1235
|
}
|
|
1039
1236
|
|
|
1040
1237
|
if (eventType === "response.output_text.delta") {
|
|
1041
|
-
handleMessageTextDelta(
|
|
1042
|
-
runtime.currentItem,
|
|
1043
|
-
runtime.currentBlock,
|
|
1044
|
-
rawEvent,
|
|
1045
|
-
stream,
|
|
1046
|
-
output,
|
|
1047
|
-
blockIndex,
|
|
1048
|
-
"output_text",
|
|
1049
|
-
);
|
|
1238
|
+
handleMessageTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output, "output_text");
|
|
1050
1239
|
return firstTokenTime;
|
|
1051
1240
|
}
|
|
1052
1241
|
|
|
1053
1242
|
if (eventType === "response.refusal.delta") {
|
|
1054
|
-
handleMessageTextDelta(
|
|
1055
|
-
runtime.currentItem,
|
|
1056
|
-
runtime.currentBlock,
|
|
1057
|
-
rawEvent,
|
|
1058
|
-
stream,
|
|
1059
|
-
output,
|
|
1060
|
-
blockIndex,
|
|
1061
|
-
"refusal",
|
|
1062
|
-
);
|
|
1243
|
+
handleMessageTextDelta(runtime.currentItem, runtime.currentBlock, rawEvent, stream, output, "refusal");
|
|
1063
1244
|
return firstTokenTime;
|
|
1064
1245
|
}
|
|
1065
1246
|
|
|
1066
1247
|
if (eventType === "response.function_call_arguments.delta") {
|
|
1067
|
-
handleToolCallArgumentsDelta(runtime
|
|
1248
|
+
const interruption = handleToolCallArgumentsDelta(runtime, rawEvent, stream, output);
|
|
1249
|
+
if (interruption) interruptWhitespaceToolCallArgumentsDelta(runtime, interruption);
|
|
1068
1250
|
return firstTokenTime;
|
|
1069
1251
|
}
|
|
1070
1252
|
|
|
1071
1253
|
if (eventType === "response.function_call_arguments.done") {
|
|
1254
|
+
resetWhitespaceToolCallArgumentsDelta(runtime);
|
|
1072
1255
|
handleToolCallArgumentsDone(runtime.currentItem, runtime.currentBlock, rawEvent);
|
|
1073
1256
|
return firstTokenTime;
|
|
1074
1257
|
}
|
|
1075
1258
|
|
|
1076
1259
|
if (eventType === "response.custom_tool_call_input.delta") {
|
|
1077
|
-
handleCustomToolCallInputDelta(runtime
|
|
1260
|
+
const interruption = handleCustomToolCallInputDelta(runtime, rawEvent, stream, output);
|
|
1261
|
+
if (interruption) interruptWhitespaceToolCallArgumentsDelta(runtime, interruption);
|
|
1078
1262
|
return firstTokenTime;
|
|
1079
1263
|
}
|
|
1080
1264
|
|
|
1081
1265
|
if (eventType === "response.custom_tool_call_input.done") {
|
|
1266
|
+
resetWhitespaceToolCallArgumentsDelta(runtime);
|
|
1082
1267
|
handleCustomToolCallInputDone(runtime.currentItem, runtime.currentBlock, rawEvent);
|
|
1083
1268
|
return firstTokenTime;
|
|
1084
1269
|
}
|
|
1085
1270
|
|
|
1086
1271
|
if (eventType === "response.output_item.done") {
|
|
1087
|
-
|
|
1272
|
+
resetWhitespaceToolCallArgumentsDelta(runtime);
|
|
1273
|
+
handleOutputItemDone(model, output, stream, runtime, rawEvent);
|
|
1088
1274
|
return firstTokenTime;
|
|
1089
1275
|
}
|
|
1090
1276
|
|
|
1091
1277
|
if (eventType === "response.created") {
|
|
1092
|
-
|
|
1278
|
+
handleResponseCreated(runtime, rawEvent);
|
|
1279
|
+
return firstTokenTime;
|
|
1093
1280
|
}
|
|
1094
1281
|
|
|
1095
1282
|
if (eventType === "response.completed" || eventType === "response.done" || eventType === "response.incomplete") {
|
|
@@ -1097,6 +1284,18 @@ function handleCodexStreamEvent(args: {
|
|
|
1097
1284
|
return firstTokenTime;
|
|
1098
1285
|
}
|
|
1099
1286
|
|
|
1287
|
+
if (eventType === "response.metadata") {
|
|
1288
|
+
const moderation = asRecord(rawEvent.metadata)?.[CODEX_MODERATION_METADATA_KEY];
|
|
1289
|
+
if (moderation !== undefined) {
|
|
1290
|
+
try {
|
|
1291
|
+
context.options?.onModerationMetadata?.(moderation);
|
|
1292
|
+
} catch {
|
|
1293
|
+
// Diagnostic observer: failures must not disturb the stream.
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
return firstTokenTime;
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1100
1299
|
if (eventType === "error" || eventType === "response.failed") {
|
|
1101
1300
|
throw createCodexProviderStreamError(rawEvent);
|
|
1102
1301
|
}
|
|
@@ -1154,7 +1353,6 @@ function handleReasoningSummaryTextDelta(
|
|
|
1154
1353
|
rawEvent: Record<string, unknown>,
|
|
1155
1354
|
stream: AssistantMessageEventStream,
|
|
1156
1355
|
output: AssistantMessage,
|
|
1157
|
-
blockIndex: () => number,
|
|
1158
1356
|
): void {
|
|
1159
1357
|
if (currentItem?.type !== "reasoning" || currentBlock?.type !== "thinking") return;
|
|
1160
1358
|
currentItem.summary = currentItem.summary || [];
|
|
@@ -1163,7 +1361,7 @@ function handleReasoningSummaryTextDelta(
|
|
|
1163
1361
|
const delta = (rawEvent as { delta?: string }).delta || "";
|
|
1164
1362
|
currentBlock.thinking += delta;
|
|
1165
1363
|
lastPart.text += delta;
|
|
1166
|
-
stream.push({ type: "thinking_delta", contentIndex:
|
|
1364
|
+
stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta, partial: output });
|
|
1167
1365
|
}
|
|
1168
1366
|
|
|
1169
1367
|
function handleReasoningSummaryPartDone(
|
|
@@ -1171,7 +1369,6 @@ function handleReasoningSummaryPartDone(
|
|
|
1171
1369
|
currentBlock: CodexOutputBlock | null,
|
|
1172
1370
|
stream: AssistantMessageEventStream,
|
|
1173
1371
|
output: AssistantMessage,
|
|
1174
|
-
blockIndex: () => number,
|
|
1175
1372
|
): void {
|
|
1176
1373
|
if (currentItem?.type !== "reasoning" || currentBlock?.type !== "thinking") return;
|
|
1177
1374
|
currentItem.summary = currentItem.summary || [];
|
|
@@ -1179,7 +1376,7 @@ function handleReasoningSummaryPartDone(
|
|
|
1179
1376
|
if (!lastPart) return;
|
|
1180
1377
|
currentBlock.thinking += "\n\n";
|
|
1181
1378
|
lastPart.text += "\n\n";
|
|
1182
|
-
stream.push({ type: "thinking_delta", contentIndex:
|
|
1379
|
+
stream.push({ type: "thinking_delta", contentIndex: output.content.length - 1, delta: "\n\n", partial: output });
|
|
1183
1380
|
}
|
|
1184
1381
|
|
|
1185
1382
|
function handleContentPartAdded(currentItem: CodexEventItem | null, rawEvent: Record<string, unknown>): void {
|
|
@@ -1197,13 +1394,20 @@ function handleMessageTextDelta(
|
|
|
1197
1394
|
rawEvent: Record<string, unknown>,
|
|
1198
1395
|
stream: AssistantMessageEventStream,
|
|
1199
1396
|
output: AssistantMessage,
|
|
1200
|
-
blockIndex: () => number,
|
|
1201
1397
|
partType: "output_text" | "refusal",
|
|
1202
1398
|
): void {
|
|
1203
1399
|
if (currentItem?.type !== "message" || currentBlock?.type !== "text") return;
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
if (
|
|
1400
|
+
currentItem.content = currentItem.content || [];
|
|
1401
|
+
let lastPart = currentItem.content[currentItem.content.length - 1];
|
|
1402
|
+
if (lastPart?.type !== partType) {
|
|
1403
|
+
// `content_part.added` never arrived (lossy proxy) — synthesize the part
|
|
1404
|
+
// so live text still streams instead of freezing until output_item.done.
|
|
1405
|
+
lastPart =
|
|
1406
|
+
partType === "output_text"
|
|
1407
|
+
? { type: "output_text", text: "", annotations: [] }
|
|
1408
|
+
: { type: "refusal", refusal: "" };
|
|
1409
|
+
currentItem.content.push(lastPart);
|
|
1410
|
+
}
|
|
1207
1411
|
const delta = (rawEvent as { delta?: string }).delta || "";
|
|
1208
1412
|
currentBlock.text += delta;
|
|
1209
1413
|
if (lastPart.type === "output_text") {
|
|
@@ -1211,26 +1415,33 @@ function handleMessageTextDelta(
|
|
|
1211
1415
|
} else {
|
|
1212
1416
|
lastPart.refusal += delta;
|
|
1213
1417
|
}
|
|
1214
|
-
stream.push({ type: "text_delta", contentIndex:
|
|
1418
|
+
stream.push({ type: "text_delta", contentIndex: output.content.length - 1, delta, partial: output });
|
|
1215
1419
|
}
|
|
1216
1420
|
|
|
1217
1421
|
function handleToolCallArgumentsDelta(
|
|
1218
|
-
|
|
1219
|
-
currentBlock: CodexOutputBlock | null,
|
|
1422
|
+
runtime: CodexStreamRuntime,
|
|
1220
1423
|
rawEvent: Record<string, unknown>,
|
|
1221
1424
|
stream: AssistantMessageEventStream,
|
|
1222
1425
|
output: AssistantMessage,
|
|
1223
|
-
|
|
1224
|
-
): void {
|
|
1225
|
-
if (currentItem?.type !== "function_call" || currentBlock?.type !== "toolCall") return;
|
|
1426
|
+
): CodexWhitespaceToolCallArgumentsDeltaInterruption | undefined {
|
|
1226
1427
|
const delta = (rawEvent as { delta?: string }).delta || "";
|
|
1428
|
+
// Observe BEFORE the item/block guard: degenerate whitespace frames can keep
|
|
1429
|
+
// arriving after the item closed (currentBlock detached) and still count as
|
|
1430
|
+
// progress for the idle watchdogs — dropping them unobserved would reopen
|
|
1431
|
+
// the infinite-loop hole the breaker exists for.
|
|
1432
|
+
const interruption = observeWhitespaceToolCallArgumentsDelta(runtime, rawEvent, delta);
|
|
1433
|
+
if (interruption) return interruption;
|
|
1434
|
+
const currentItem = runtime.currentItem;
|
|
1435
|
+
const currentBlock = runtime.currentBlock;
|
|
1436
|
+
if (currentItem?.type !== "function_call" || currentBlock?.type !== "toolCall") return undefined;
|
|
1227
1437
|
currentBlock.partialJson += delta;
|
|
1228
1438
|
const throttled = parseStreamingJsonThrottled(currentBlock.partialJson, currentBlock.lastParseLen ?? 0);
|
|
1229
1439
|
if (throttled) {
|
|
1230
1440
|
currentBlock.arguments = throttled.value;
|
|
1231
1441
|
currentBlock.lastParseLen = throttled.parsedLen;
|
|
1232
1442
|
}
|
|
1233
|
-
stream.push({ type: "toolcall_delta", contentIndex:
|
|
1443
|
+
stream.push({ type: "toolcall_delta", contentIndex: output.content.length - 1, delta, partial: output });
|
|
1444
|
+
return undefined;
|
|
1234
1445
|
}
|
|
1235
1446
|
|
|
1236
1447
|
function handleToolCallArgumentsDone(
|
|
@@ -1249,18 +1460,22 @@ function handleToolCallArgumentsDone(
|
|
|
1249
1460
|
}
|
|
1250
1461
|
|
|
1251
1462
|
function handleCustomToolCallInputDelta(
|
|
1252
|
-
|
|
1253
|
-
currentBlock: CodexOutputBlock | null,
|
|
1463
|
+
runtime: CodexStreamRuntime,
|
|
1254
1464
|
rawEvent: Record<string, unknown>,
|
|
1255
1465
|
stream: AssistantMessageEventStream,
|
|
1256
1466
|
output: AssistantMessage,
|
|
1257
|
-
|
|
1258
|
-
): void {
|
|
1259
|
-
if (currentItem?.type !== "custom_tool_call" || currentBlock?.type !== "toolCall") return;
|
|
1467
|
+
): CodexWhitespaceToolCallArgumentsDeltaInterruption | undefined {
|
|
1260
1468
|
const delta = (rawEvent as { delta?: string }).delta || "";
|
|
1469
|
+
// Observe BEFORE the item/block guard — see handleToolCallArgumentsDelta.
|
|
1470
|
+
const interruption = observeWhitespaceToolCallArgumentsDelta(runtime, rawEvent, delta);
|
|
1471
|
+
if (interruption) return interruption;
|
|
1472
|
+
const currentItem = runtime.currentItem;
|
|
1473
|
+
const currentBlock = runtime.currentBlock;
|
|
1474
|
+
if (currentItem?.type !== "custom_tool_call" || currentBlock?.type !== "toolCall") return undefined;
|
|
1261
1475
|
currentBlock.partialJson += delta;
|
|
1262
|
-
currentBlock.arguments
|
|
1263
|
-
stream.push({ type: "toolcall_delta", contentIndex:
|
|
1476
|
+
(currentBlock.arguments as { input?: string }).input = currentBlock.partialJson;
|
|
1477
|
+
stream.push({ type: "toolcall_delta", contentIndex: output.content.length - 1, delta, partial: output });
|
|
1478
|
+
return undefined;
|
|
1264
1479
|
}
|
|
1265
1480
|
|
|
1266
1481
|
function handleCustomToolCallInputDone(
|
|
@@ -1282,9 +1497,10 @@ function handleOutputItemDone(
|
|
|
1282
1497
|
stream: AssistantMessageEventStream,
|
|
1283
1498
|
runtime: CodexStreamRuntime,
|
|
1284
1499
|
rawEvent: Record<string, unknown>,
|
|
1285
|
-
blockIndex: () => number,
|
|
1286
1500
|
): void {
|
|
1287
|
-
const
|
|
1501
|
+
const rawItem = rawEvent.item;
|
|
1502
|
+
if (!rawItem || typeof rawItem !== "object") return;
|
|
1503
|
+
const item = structuredCloneJSON(rawItem) as CodexEventItem;
|
|
1288
1504
|
runtime.nativeOutputItems.push(item as unknown as Record<string, unknown>);
|
|
1289
1505
|
|
|
1290
1506
|
if (item.type === "reasoning" && runtime.currentBlock?.type === "thinking") {
|
|
@@ -1292,7 +1508,7 @@ function handleOutputItemDone(
|
|
|
1292
1508
|
runtime.currentBlock.thinkingSignature = JSON.stringify(item);
|
|
1293
1509
|
stream.push({
|
|
1294
1510
|
type: "thinking_end",
|
|
1295
|
-
contentIndex:
|
|
1511
|
+
contentIndex: output.content.length - 1,
|
|
1296
1512
|
content: runtime.currentBlock.thinking,
|
|
1297
1513
|
partial: output,
|
|
1298
1514
|
});
|
|
@@ -1308,7 +1524,7 @@ function handleOutputItemDone(
|
|
|
1308
1524
|
runtime.currentBlock.textSignature = encodeTextSignatureV1(item.id, phase);
|
|
1309
1525
|
stream.push({
|
|
1310
1526
|
type: "text_end",
|
|
1311
|
-
contentIndex:
|
|
1527
|
+
contentIndex: output.content.length - 1,
|
|
1312
1528
|
content: runtime.currentBlock.text,
|
|
1313
1529
|
partial: output,
|
|
1314
1530
|
});
|
|
@@ -1329,9 +1545,12 @@ function handleOutputItemDone(
|
|
|
1329
1545
|
runtime.currentBlock.arguments = toolCall.arguments;
|
|
1330
1546
|
delete (runtime.currentBlock as { partialJson?: string }).partialJson;
|
|
1331
1547
|
delete (runtime.currentBlock as { lastParseLen?: number }).lastParseLen;
|
|
1548
|
+
// Detach so a late/duplicate arguments.delta cannot append to the
|
|
1549
|
+
// finished block or trip the whitespace-loop guard against it.
|
|
1550
|
+
runtime.currentBlock = null;
|
|
1332
1551
|
}
|
|
1333
1552
|
runtime.canSafelyReplayWebsocketOverSse = false;
|
|
1334
|
-
stream.push({ type: "toolcall_end", contentIndex:
|
|
1553
|
+
stream.push({ type: "toolcall_end", contentIndex: output.content.length - 1, toolCall, partial: output });
|
|
1335
1554
|
return;
|
|
1336
1555
|
}
|
|
1337
1556
|
|
|
@@ -1347,21 +1566,25 @@ function handleOutputItemDone(
|
|
|
1347
1566
|
arguments: { input: rawInput },
|
|
1348
1567
|
customWireName: item.name,
|
|
1349
1568
|
};
|
|
1569
|
+
if (runtime.currentBlock?.type === "toolCall") {
|
|
1570
|
+
runtime.currentBlock.arguments = { input: rawInput };
|
|
1571
|
+
delete (runtime.currentBlock as { partialJson?: string }).partialJson;
|
|
1572
|
+
runtime.currentBlock = null;
|
|
1573
|
+
}
|
|
1350
1574
|
runtime.canSafelyReplayWebsocketOverSse = false;
|
|
1351
|
-
stream.push({ type: "toolcall_end", contentIndex:
|
|
1575
|
+
stream.push({ type: "toolcall_end", contentIndex: output.content.length - 1, toolCall, partial: output });
|
|
1352
1576
|
return;
|
|
1353
1577
|
}
|
|
1354
1578
|
|
|
1355
1579
|
void model;
|
|
1356
1580
|
}
|
|
1357
1581
|
|
|
1358
|
-
function handleResponseCreated(runtime: CodexStreamRuntime, rawEvent: Record<string, unknown>):
|
|
1582
|
+
function handleResponseCreated(runtime: CodexStreamRuntime, rawEvent: Record<string, unknown>): void {
|
|
1359
1583
|
const response = (rawEvent as { response?: { id?: string } }).response;
|
|
1360
1584
|
const state = runtime.websocketState;
|
|
1361
|
-
if (runtime.transport === "websocket" &&
|
|
1585
|
+
if (state && runtime.transport === "websocket" && typeof response?.id === "string" && response.id.length > 0) {
|
|
1362
1586
|
state.lastResponseId = response.id;
|
|
1363
1587
|
}
|
|
1364
|
-
return undefined;
|
|
1365
1588
|
}
|
|
1366
1589
|
|
|
1367
1590
|
function handleResponseCompleted(
|
|
@@ -1384,6 +1607,7 @@ function handleResponseCompleted(
|
|
|
1384
1607
|
};
|
|
1385
1608
|
status?: string;
|
|
1386
1609
|
service_tier?: ServiceTier | "default";
|
|
1610
|
+
end_turn?: boolean;
|
|
1387
1611
|
};
|
|
1388
1612
|
}
|
|
1389
1613
|
).response;
|
|
@@ -1394,21 +1618,58 @@ function handleResponseCompleted(
|
|
|
1394
1618
|
}
|
|
1395
1619
|
|
|
1396
1620
|
const state = runtime.websocketState;
|
|
1397
|
-
if (
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1621
|
+
if (state) {
|
|
1622
|
+
if (runtime.transport !== "websocket") {
|
|
1623
|
+
// SSE turns never chain (previous_response_id is websocket-only on this
|
|
1624
|
+
// endpoint); a completed SSE turn also invalidates any websocket append
|
|
1625
|
+
// baseline, which no longer matches the transcript.
|
|
1626
|
+
resetCodexWebSocketAppendState(state);
|
|
1627
|
+
} else {
|
|
1628
|
+
state.lastRequest = structuredCloneJSON(runtime.requestBodyForState);
|
|
1629
|
+
if (typeof response?.id === "string" && response.id.length > 0) {
|
|
1630
|
+
state.lastResponseId = response.id;
|
|
1631
|
+
state.lastResponseItems = stripInputItemIds(structuredCloneJSON(runtime.nativeOutputItems));
|
|
1632
|
+
state.canAppend = rawEvent.type === "response.done" || rawEvent.type === "response.completed";
|
|
1633
|
+
} else {
|
|
1634
|
+
// Without a response id the append baseline cannot be trusted.
|
|
1635
|
+
state.canAppend = false;
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1638
|
+
}
|
|
1639
|
+
|
|
1640
|
+
// Finalize any toolCall block whose output_item.done never arrived: the
|
|
1641
|
+
// throttled delta parser may have left block.arguments stale, and the
|
|
1642
|
+
// toolUse promotion below would hand the agent incomplete arguments.
|
|
1643
|
+
// Mirrors the shared decoder's response.completed sweep; also strips the
|
|
1644
|
+
// transient partialJson/lastParseLen fields so they never persist.
|
|
1645
|
+
for (const block of output.content) {
|
|
1646
|
+
if (block.type !== "toolCall") continue;
|
|
1647
|
+
const pending = block as ToolCall & { partialJson?: string; lastParseLen?: number };
|
|
1648
|
+
if (pending.partialJson) {
|
|
1649
|
+
pending.arguments =
|
|
1650
|
+
pending.customWireName !== undefined
|
|
1651
|
+
? { input: pending.partialJson }
|
|
1652
|
+
: parseStreamingJson(pending.partialJson);
|
|
1402
1653
|
}
|
|
1403
|
-
|
|
1654
|
+
delete pending.partialJson;
|
|
1655
|
+
delete pending.lastParseLen;
|
|
1404
1656
|
}
|
|
1405
1657
|
|
|
1406
1658
|
calculateCost(model, output.usage);
|
|
1407
1659
|
applyCodexServiceTierPricing(model, output.usage, response?.service_tier, runtime.requestBodyForState.service_tier);
|
|
1408
|
-
output.stopReason = mapOpenAIResponsesStopReason(response?.status as
|
|
1660
|
+
output.stopReason = mapOpenAIResponsesStopReason(response?.status as ResponseStatus | undefined);
|
|
1409
1661
|
if (output.content.some(block => block.type === "toolCall") && output.stopReason === "stop") {
|
|
1410
1662
|
output.stopReason = "toolUse";
|
|
1411
1663
|
}
|
|
1664
|
+
// The Codex backend marks an unfinished turn with `end_turn: false` on the
|
|
1665
|
+
// terminal event: this response ended on commentary only and the model
|
|
1666
|
+
// expects to be sampled again (mirrors codex-rs `needs_follow_up`). Surface
|
|
1667
|
+
// it as a non-terminal stop so the agent loop replays history and
|
|
1668
|
+
// re-samples instead of ending the turn. Gated on "stop": with tool calls
|
|
1669
|
+
// present the loop continues through tool execution anyway.
|
|
1670
|
+
if (response?.end_turn === false && output.stopReason === "stop") {
|
|
1671
|
+
output.stopDetails = { type: "pause_turn" };
|
|
1672
|
+
}
|
|
1412
1673
|
}
|
|
1413
1674
|
|
|
1414
1675
|
async function recoverCodexStreamError(
|
|
@@ -1416,6 +1677,9 @@ async function recoverCodexStreamError(
|
|
|
1416
1677
|
runtime: CodexStreamRuntime,
|
|
1417
1678
|
error: unknown,
|
|
1418
1679
|
): Promise<boolean> {
|
|
1680
|
+
if (await tryRecoverCodexWhitespaceToolCallLoop(context, runtime, error)) {
|
|
1681
|
+
return true;
|
|
1682
|
+
}
|
|
1419
1683
|
if (await tryReconnectCodexWebSocketOnConnectionLimit(context, runtime, error)) {
|
|
1420
1684
|
return true;
|
|
1421
1685
|
}
|
|
@@ -1431,11 +1695,90 @@ async function recoverCodexStreamError(
|
|
|
1431
1695
|
return false;
|
|
1432
1696
|
}
|
|
1433
1697
|
|
|
1698
|
+
/**
|
|
1699
|
+
* Pop the half-built degenerate tool-call block (the one whose arguments were
|
|
1700
|
+
* nothing but whitespace) off the output accumulator so it never surfaces in the
|
|
1701
|
+
* caller's message. Any legitimate content produced before it is preserved.
|
|
1702
|
+
*/
|
|
1703
|
+
function dropTrailingDegenerateToolCall(output: AssistantMessage, runtime: CodexStreamRuntime): void {
|
|
1704
|
+
const block = runtime.currentBlock;
|
|
1705
|
+
if (block && block.type === "toolCall" && output.content[output.content.length - 1] === block) {
|
|
1706
|
+
output.content.pop();
|
|
1707
|
+
}
|
|
1708
|
+
runtime.currentItem = null;
|
|
1709
|
+
runtime.currentBlock = null;
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
/**
|
|
1713
|
+
* Recover from the degenerate whitespace-only tool-call argument loop
|
|
1714
|
+
* ({@link CodexWhitespaceToolCallLoopError}). The interrupted function call has
|
|
1715
|
+
* no usable arguments, so drop the partial turn and replay the request from
|
|
1716
|
+
* scratch — bounded by {@link CODEX_WHITESPACE_LOOP_RETRY_LIMIT}. Sampling
|
|
1717
|
+
* nondeterminism usually breaks the loop on a fresh attempt; once the budget is
|
|
1718
|
+
* exhausted the original error is surfaced (now without the junk tool call
|
|
1719
|
+
* polluting the message). Replay is refused once a toolcall_end was already
|
|
1720
|
+
* delivered to the consumer (`canSafelyReplayWebsocketOverSse`) — it would
|
|
1721
|
+
* re-emit the same tool calls.
|
|
1722
|
+
*/
|
|
1723
|
+
async function tryRecoverCodexWhitespaceToolCallLoop(
|
|
1724
|
+
context: CodexStreamProcessingContext,
|
|
1725
|
+
runtime: CodexStreamRuntime,
|
|
1726
|
+
error: unknown,
|
|
1727
|
+
): Promise<boolean> {
|
|
1728
|
+
if (!(error instanceof CodexWhitespaceToolCallLoopError)) {
|
|
1729
|
+
return false;
|
|
1730
|
+
}
|
|
1731
|
+
// Drop the half-built degenerate tool call whether or not we retry, so it
|
|
1732
|
+
// never reaches the caller's message.
|
|
1733
|
+
dropTrailingDegenerateToolCall(context.output, runtime);
|
|
1734
|
+
if (
|
|
1735
|
+
runtime.whitespaceLoopRetries >= CODEX_WHITESPACE_LOOP_RETRY_LIMIT ||
|
|
1736
|
+
!runtime.canSafelyReplayWebsocketOverSse ||
|
|
1737
|
+
context.options?.signal?.aborted
|
|
1738
|
+
) {
|
|
1739
|
+
return false;
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
runtime.whitespaceLoopRetries += 1;
|
|
1743
|
+
const websocketState = context.requestContext.websocketState;
|
|
1744
|
+
if (websocketState) {
|
|
1745
|
+
resetCodexWebSocketAppendState(websocketState);
|
|
1746
|
+
resetCodexSessionMetadata(websocketState);
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
logCodexDebug("retrying codex turn after whitespace-only tool-call argument loop", {
|
|
1750
|
+
retry: runtime.whitespaceLoopRetries,
|
|
1751
|
+
retryBudget: CODEX_WHITESPACE_LOOP_RETRY_LIMIT,
|
|
1752
|
+
transport: runtime.transport,
|
|
1753
|
+
});
|
|
1754
|
+
|
|
1755
|
+
runtime.currentItem = null;
|
|
1756
|
+
runtime.currentBlock = null;
|
|
1757
|
+
runtime.sawTerminalEvent = false;
|
|
1758
|
+
runtime.nativeOutputItems.length = 0;
|
|
1759
|
+
resetWhitespaceToolCallArgumentsDelta(runtime);
|
|
1760
|
+
resetOutputState(context.output);
|
|
1761
|
+
context.firstTokenTime = undefined;
|
|
1762
|
+
await scheduler.wait(CODEX_WHITESPACE_LOOP_RETRY_DELAY_MS * runtime.whitespaceLoopRetries, {
|
|
1763
|
+
signal: context.requestSetup.requestSignal,
|
|
1764
|
+
});
|
|
1765
|
+
|
|
1766
|
+
if (runtime.transport === "websocket" && websocketState) {
|
|
1767
|
+
await reopenCodexWebSocketRuntimeStream(context, runtime, websocketState);
|
|
1768
|
+
return true;
|
|
1769
|
+
}
|
|
1770
|
+
|
|
1771
|
+
await reopenCodexSseRuntimeStream(context, runtime, websocketState);
|
|
1772
|
+
return true;
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1434
1775
|
/**
|
|
1435
1776
|
* Handles `websocket_connection_limit_reached` errors by closing the stale connection
|
|
1436
1777
|
* and opening a fresh websocket. If content has already been emitted to the caller,
|
|
1437
1778
|
* falls back to SSE replay (same as other WS failures) since we cannot safely
|
|
1438
|
-
* continue a partial response on a new connection.
|
|
1779
|
+
* continue a partial response on a new connection. If a tool call was already
|
|
1780
|
+
* delivered (`canSafelyReplayWebsocketOverSse` is false), the error surfaces
|
|
1781
|
+
* instead — replaying would re-emit the same tool calls.
|
|
1439
1782
|
*/
|
|
1440
1783
|
async function tryReconnectCodexWebSocketOnConnectionLimit(
|
|
1441
1784
|
context: CodexStreamProcessingContext,
|
|
@@ -1455,6 +1798,12 @@ async function tryReconnectCodexWebSocketOnConnectionLimit(
|
|
|
1455
1798
|
websocketState.connection = undefined;
|
|
1456
1799
|
resetCodexWebSocketAppendState(websocketState);
|
|
1457
1800
|
|
|
1801
|
+
if (context.output.content.length > 0 && !runtime.canSafelyReplayWebsocketOverSse) {
|
|
1802
|
+
// A toolcall_end already reached the consumer; a full replay would emit
|
|
1803
|
+
// the same tool calls a second time. Let the error surface instead.
|
|
1804
|
+
return false;
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1458
1807
|
logCodexDebug("codex websocket connection limit reached, reconnecting", {
|
|
1459
1808
|
hadContent: context.output.content.length > 0,
|
|
1460
1809
|
retry: runtime.websocketStreamRetries,
|
|
@@ -1463,7 +1812,6 @@ async function tryReconnectCodexWebSocketOnConnectionLimit(
|
|
|
1463
1812
|
if (context.output.content.length > 0) {
|
|
1464
1813
|
// Content already emitted to the caller — cannot safely continue on a new WS.
|
|
1465
1814
|
// Reset and replay the full request over SSE.
|
|
1466
|
-
runtime.canSafelyReplayWebsocketOverSse = true;
|
|
1467
1815
|
runtime.currentItem = null;
|
|
1468
1816
|
runtime.currentBlock = null;
|
|
1469
1817
|
runtime.nativeOutputItems.length = 0;
|
|
@@ -1474,14 +1822,40 @@ async function tryReconnectCodexWebSocketOnConnectionLimit(
|
|
|
1474
1822
|
return true;
|
|
1475
1823
|
}
|
|
1476
1824
|
|
|
1477
|
-
// No content emitted yet —
|
|
1825
|
+
// No content emitted yet — clear accumulator state from the failed attempt
|
|
1826
|
+
// (blockless native items can exist even with empty content) and reconnect
|
|
1827
|
+
// over websocket, bounded by the shared retry budget: an account-scoped
|
|
1828
|
+
// limit can reject every fresh connection, and an unbounded loop would
|
|
1829
|
+
// hammer the endpoint with zero backoff.
|
|
1830
|
+
runtime.currentItem = null;
|
|
1831
|
+
runtime.currentBlock = null;
|
|
1832
|
+
runtime.nativeOutputItems.length = 0;
|
|
1833
|
+
context.firstTokenTime = undefined;
|
|
1834
|
+
if (runtime.websocketStreamRetries >= getCodexWebSocketRetryBudget()) {
|
|
1835
|
+
recordCodexWebSocketFailure(websocketState, true);
|
|
1836
|
+
await reopenCodexSseRuntimeStream(context, runtime, websocketState);
|
|
1837
|
+
return true;
|
|
1838
|
+
}
|
|
1478
1839
|
runtime.websocketStreamRetries += 1;
|
|
1840
|
+
await scheduler.wait(getCodexWebSocketRetryDelayMs(runtime.websocketStreamRetries), {
|
|
1841
|
+
signal: context.requestSetup.requestSignal,
|
|
1842
|
+
});
|
|
1479
1843
|
await reopenCodexWebSocketRuntimeStream(context, runtime, websocketState);
|
|
1480
1844
|
return true;
|
|
1481
1845
|
}
|
|
1482
1846
|
|
|
1483
|
-
function
|
|
1484
|
-
|
|
1847
|
+
function isCodexStalePreviousResponseError(error: unknown): boolean {
|
|
1848
|
+
if (error instanceof CodexProviderStreamError) return error.code === "previous_response_not_found";
|
|
1849
|
+
if (!(error instanceof Error)) return false;
|
|
1850
|
+
if ((error as { code?: string }).code === "previous_response_not_found") return true;
|
|
1851
|
+
// "unsupported": the backend intermittently rejects the parameter outright
|
|
1852
|
+
// with `{"detail":"Unsupported parameter: previous_response_id"}` (no
|
|
1853
|
+
// `error.code`); treat it like a stale chain so the turn replays with full
|
|
1854
|
+
// context instead of surfacing the 400.
|
|
1855
|
+
return (
|
|
1856
|
+
/previous[ _]?response/i.test(error.message) &&
|
|
1857
|
+
/not[ _]?found|invalid|expired|stale|unsupported/i.test(error.message)
|
|
1858
|
+
);
|
|
1485
1859
|
}
|
|
1486
1860
|
|
|
1487
1861
|
async function tryRecoverCodexPreviousResponseNotFound(
|
|
@@ -1491,15 +1865,18 @@ async function tryRecoverCodexPreviousResponseNotFound(
|
|
|
1491
1865
|
): Promise<boolean> {
|
|
1492
1866
|
const websocketState = context.requestContext.websocketState;
|
|
1493
1867
|
if (
|
|
1494
|
-
!
|
|
1868
|
+
!isCodexStalePreviousResponseError(error) ||
|
|
1495
1869
|
!websocketState ||
|
|
1496
|
-
runtime.transport !== "websocket" ||
|
|
1497
1870
|
context.output.content.length > 0 ||
|
|
1498
1871
|
context.options?.signal?.aborted ||
|
|
1499
1872
|
runtime.providerRetryAttempt >= CODEX_MAX_RETRIES
|
|
1500
1873
|
) {
|
|
1501
1874
|
return false;
|
|
1502
1875
|
}
|
|
1876
|
+
if (runtime.transport !== "websocket") {
|
|
1877
|
+
// SSE never sends previous_response_id; let other recovery handle it.
|
|
1878
|
+
return false;
|
|
1879
|
+
}
|
|
1503
1880
|
|
|
1504
1881
|
runtime.providerRetryAttempt += 1;
|
|
1505
1882
|
resetCodexWebSocketAppendState(websocketState);
|
|
@@ -1551,6 +1928,13 @@ async function tryReplayWebsocketFailureOverSse(
|
|
|
1551
1928
|
|
|
1552
1929
|
if (!activateFallback) {
|
|
1553
1930
|
runtime.websocketStreamRetries += 1;
|
|
1931
|
+
// Full re-send on a fresh socket: clear accumulator state from the failed
|
|
1932
|
+
// attempt. Content is empty here, but blockless native items (e.g.
|
|
1933
|
+
// web_search_call) may already have accumulated.
|
|
1934
|
+
runtime.currentItem = null;
|
|
1935
|
+
runtime.currentBlock = null;
|
|
1936
|
+
runtime.nativeOutputItems.length = 0;
|
|
1937
|
+
context.firstTokenTime = undefined;
|
|
1554
1938
|
await scheduler.wait(getCodexWebSocketRetryDelayMs(runtime.websocketStreamRetries), {
|
|
1555
1939
|
signal: context.requestSetup.requestSignal,
|
|
1556
1940
|
});
|
|
@@ -1558,14 +1942,11 @@ async function tryReplayWebsocketFailureOverSse(
|
|
|
1558
1942
|
return true;
|
|
1559
1943
|
}
|
|
1560
1944
|
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
resetOutputState(context.output);
|
|
1567
|
-
context.firstTokenTime = undefined;
|
|
1568
|
-
}
|
|
1945
|
+
runtime.currentItem = null;
|
|
1946
|
+
runtime.currentBlock = null;
|
|
1947
|
+
runtime.nativeOutputItems.length = 0;
|
|
1948
|
+
resetOutputState(context.output);
|
|
1949
|
+
context.firstTokenTime = undefined;
|
|
1569
1950
|
|
|
1570
1951
|
await reopenCodexSseRuntimeStream(context, runtime, state);
|
|
1571
1952
|
return true;
|
|
@@ -1587,7 +1968,7 @@ async function tryRetryCodexProviderError(
|
|
|
1587
1968
|
|
|
1588
1969
|
runtime.providerRetryAttempt += 1;
|
|
1589
1970
|
const websocketState = context.requestContext.websocketState;
|
|
1590
|
-
if (
|
|
1971
|
+
if (websocketState) {
|
|
1591
1972
|
resetCodexWebSocketAppendState(websocketState);
|
|
1592
1973
|
resetCodexSessionMetadata(websocketState);
|
|
1593
1974
|
}
|
|
@@ -1602,6 +1983,7 @@ async function tryRetryCodexProviderError(
|
|
|
1602
1983
|
runtime.currentItem = null;
|
|
1603
1984
|
runtime.currentBlock = null;
|
|
1604
1985
|
runtime.sawTerminalEvent = false;
|
|
1986
|
+
runtime.nativeOutputItems.length = 0;
|
|
1605
1987
|
resetOutputState(context.output);
|
|
1606
1988
|
context.firstTokenTime = undefined;
|
|
1607
1989
|
await scheduler.wait(CODEX_RETRY_DELAY_MS * runtime.providerRetryAttempt, {
|
|
@@ -1627,7 +2009,7 @@ function finalizeCodexResponse(
|
|
|
1627
2009
|
throw new Error("Request was aborted");
|
|
1628
2010
|
}
|
|
1629
2011
|
if (!runtime.sawTerminalEvent) {
|
|
1630
|
-
if (
|
|
2012
|
+
if (context.requestContext.websocketState) {
|
|
1631
2013
|
resetCodexWebSocketAppendState(context.requestContext.websocketState);
|
|
1632
2014
|
resetCodexSessionMetadata(context.requestContext.websocketState);
|
|
1633
2015
|
}
|
|
@@ -1684,9 +2066,10 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
1684
2066
|
const output = createAssistantOutput(model);
|
|
1685
2067
|
const requestSetup = createRequestSetup(options);
|
|
1686
2068
|
let processingContext: CodexStreamProcessingContext | undefined;
|
|
2069
|
+
let requestContext: CodexRequestContext | undefined;
|
|
1687
2070
|
|
|
1688
2071
|
try {
|
|
1689
|
-
|
|
2072
|
+
requestContext = await buildCodexRequestContext(model, context, options, output);
|
|
1690
2073
|
const initialTransport = await openInitialCodexEventStream(model, options, requestSetup, requestContext);
|
|
1691
2074
|
const runtime = createCodexStreamRuntime({
|
|
1692
2075
|
...initialTransport,
|
|
@@ -1720,12 +2103,13 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
1720
2103
|
stream,
|
|
1721
2104
|
options,
|
|
1722
2105
|
requestSetup,
|
|
1723
|
-
requestContext: {
|
|
2106
|
+
requestContext: requestContext ?? {
|
|
1724
2107
|
apiKey: "",
|
|
1725
2108
|
accountId: "",
|
|
1726
2109
|
baseUrl: model.baseUrl || CODEX_BASE_URL,
|
|
1727
2110
|
url: "",
|
|
1728
2111
|
requestHeaders: {},
|
|
2112
|
+
responsesLite: options?.responsesLite === true,
|
|
1729
2113
|
transformedBody: { model: model.id },
|
|
1730
2114
|
rawRequestDump: {
|
|
1731
2115
|
provider: model.provider,
|
|
@@ -1738,8 +2122,19 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
1738
2122
|
},
|
|
1739
2123
|
startTime,
|
|
1740
2124
|
} satisfies CodexStreamProcessingContext);
|
|
1741
|
-
|
|
1742
|
-
|
|
2125
|
+
try {
|
|
2126
|
+
const failure = await handleCodexStreamFailure(failureContext, error);
|
|
2127
|
+
stream.push({ type: "error", reason: failure.stopReason as "error" | "aborted", error: failure });
|
|
2128
|
+
} catch (failureError) {
|
|
2129
|
+
// Last resort — the failure handler itself threw (exotic error object or
|
|
2130
|
+
// request-dump formatting). Never leave the stream un-ended.
|
|
2131
|
+
logger.error("Codex stream failure handler threw", {
|
|
2132
|
+
error: failureError instanceof Error ? failureError.message : String(failureError),
|
|
2133
|
+
});
|
|
2134
|
+
output.stopReason = "error";
|
|
2135
|
+
output.errorMessage ??= error instanceof Error ? error.message : String(error);
|
|
2136
|
+
stream.push({ type: "error", reason: "error", error: output });
|
|
2137
|
+
}
|
|
1743
2138
|
stream.end();
|
|
1744
2139
|
}
|
|
1745
2140
|
})();
|
|
@@ -1751,7 +2146,7 @@ export async function prewarmOpenAICodexResponses(
|
|
|
1751
2146
|
model: Model<"openai-codex-responses">,
|
|
1752
2147
|
options?: Pick<
|
|
1753
2148
|
OpenAICodexResponsesOptions,
|
|
1754
|
-
"apiKey" | "headers" | "sessionId" | "signal" | "preferWebsockets" | "providerSessionState"
|
|
2149
|
+
"apiKey" | "headers" | "sessionId" | "signal" | "preferWebsockets" | "providerSessionState" | "responsesLite"
|
|
1755
2150
|
>,
|
|
1756
2151
|
): Promise<void> {
|
|
1757
2152
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
@@ -1761,7 +2156,8 @@ export async function prewarmOpenAICodexResponses(
|
|
|
1761
2156
|
const url = resolveCodexResponsesUrl(baseUrl);
|
|
1762
2157
|
const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
|
|
1763
2158
|
const providerSessionState = getCodexProviderSessionState(options?.providerSessionState);
|
|
1764
|
-
const
|
|
2159
|
+
const responsesLite = options?.responsesLite === true;
|
|
2160
|
+
const sessionKey = getCodexWebSocketSessionKey(promptCacheKey, model, accountId, baseUrl, responsesLite);
|
|
1765
2161
|
const publicSessionKey = getCodexPublicSessionKey(promptCacheKey, model, baseUrl);
|
|
1766
2162
|
if (publicSessionKey && sessionKey) {
|
|
1767
2163
|
providerSessionState?.webSocketPublicToPrivate.set(publicSessionKey, sessionKey);
|
|
@@ -1778,6 +2174,7 @@ export async function prewarmOpenAICodexResponses(
|
|
|
1778
2174
|
promptCacheKey,
|
|
1779
2175
|
"websocket",
|
|
1780
2176
|
state,
|
|
2177
|
+
responsesLite,
|
|
1781
2178
|
);
|
|
1782
2179
|
await logger.time(
|
|
1783
2180
|
"prewarmCodex:establishWs",
|
|
@@ -1807,10 +2204,14 @@ function getCodexWebSocketSessionKey(
|
|
|
1807
2204
|
model: Model<"openai-codex-responses">,
|
|
1808
2205
|
accountId: string,
|
|
1809
2206
|
baseUrl: string,
|
|
2207
|
+
responsesLite: boolean,
|
|
1810
2208
|
): string | undefined {
|
|
1811
2209
|
const promptCacheKey = normalizeOpenAIResponsesPromptCacheKey(sessionId);
|
|
1812
2210
|
if (!promptCacheKey) return undefined;
|
|
1813
|
-
|
|
2211
|
+
// Responses Lite is connection-scoped on the WebSocket upgrade, so lite and
|
|
2212
|
+
// non-lite turns must never share a pooled socket or append state.
|
|
2213
|
+
const liteSuffix = responsesLite ? ":lite" : "";
|
|
2214
|
+
return `${accountId}:${baseUrl}:${model.id}:${promptCacheKey}${liteSuffix}`;
|
|
1814
2215
|
}
|
|
1815
2216
|
|
|
1816
2217
|
function getCodexPublicSessionKey(
|
|
@@ -1854,13 +2255,18 @@ function resetCodexWebSocketAppendState(state: CodexWebSocketSessionState): void
|
|
|
1854
2255
|
function resetCodexSessionMetadata(state: CodexWebSocketSessionState): void {
|
|
1855
2256
|
state.turnState = undefined;
|
|
1856
2257
|
state.modelsEtag = undefined;
|
|
1857
|
-
state.reasoningIncluded = undefined;
|
|
1858
2258
|
}
|
|
1859
2259
|
|
|
1860
2260
|
function recordCodexWebSocketFailure(state: CodexWebSocketSessionState, activateFallback: boolean): void {
|
|
1861
2261
|
resetCodexWebSocketAppendState(state);
|
|
1862
|
-
|
|
1863
|
-
|
|
2262
|
+
// Never tear down a CONNECTING socket: it belongs to a concurrent caller's
|
|
2263
|
+
// in-flight handshake (prewarm/request race); closing it would reject that
|
|
2264
|
+
// caller with a fatal "websocket closed before open" and disable websockets
|
|
2265
|
+
// for the whole session.
|
|
2266
|
+
if (state.connection && !state.connection.isConnecting()) {
|
|
2267
|
+
state.connection.close("fallback");
|
|
2268
|
+
state.connection = undefined;
|
|
2269
|
+
}
|
|
1864
2270
|
state.lastFallbackAt = Date.now();
|
|
1865
2271
|
if (activateFallback && !state.disableWebsocket) {
|
|
1866
2272
|
state.disableWebsocket = true;
|
|
@@ -1949,28 +2355,6 @@ export function getOpenAICodexTransportDetails(
|
|
|
1949
2355
|
};
|
|
1950
2356
|
}
|
|
1951
2357
|
|
|
1952
|
-
function buildAppendInput(
|
|
1953
|
-
previous: RequestBody | undefined,
|
|
1954
|
-
previousResponseItems: InputItem[] | undefined,
|
|
1955
|
-
current: RequestBody,
|
|
1956
|
-
): InputItem[] | null {
|
|
1957
|
-
if (!previous) return null;
|
|
1958
|
-
if (!Array.isArray(previous.input) || !Array.isArray(current.input)) return null;
|
|
1959
|
-
const previousWithoutInput = { ...previous, input: undefined };
|
|
1960
|
-
const currentWithoutInput = { ...current, input: undefined };
|
|
1961
|
-
if (JSON.stringify(previousWithoutInput) !== JSON.stringify(currentWithoutInput)) {
|
|
1962
|
-
return null;
|
|
1963
|
-
}
|
|
1964
|
-
const baseline = [...previous.input, ...(previousResponseItems ?? [])];
|
|
1965
|
-
if (current.input.length <= baseline.length) return null;
|
|
1966
|
-
for (let index = 0; index < baseline.length; index += 1) {
|
|
1967
|
-
if (JSON.stringify(baseline[index]) !== JSON.stringify(current.input[index])) {
|
|
1968
|
-
return null;
|
|
1969
|
-
}
|
|
1970
|
-
}
|
|
1971
|
-
return current.input.slice(baseline.length) as InputItem[];
|
|
1972
|
-
}
|
|
1973
|
-
|
|
1974
2358
|
function stripInputItemIds(items: Array<Record<string, unknown>>): InputItem[] {
|
|
1975
2359
|
return items.map(item => {
|
|
1976
2360
|
if (item.id == null) return item as InputItem;
|
|
@@ -1997,37 +2381,40 @@ function recordCodexWebSocketRequestStats(
|
|
|
1997
2381
|
state.stats.lastPreviousResponseId = undefined;
|
|
1998
2382
|
}
|
|
1999
2383
|
|
|
2000
|
-
|
|
2384
|
+
/**
|
|
2385
|
+
* Shape the next websocket turn's request body: when the session's append
|
|
2386
|
+
* baseline is intact (same options, strict history prefix), chain via
|
|
2387
|
+
* `previous_response_id` + delta-only `input`; otherwise break the chain and
|
|
2388
|
+
* replay the full transcript. SSE requests never chain — the HTTP endpoint's
|
|
2389
|
+
* request schema has no `previous_response_id` (codex-rs carries it only on
|
|
2390
|
+
* websocket `response.create` frames) and strict gateway validators 400 it
|
|
2391
|
+
* with `{"detail":"Unsupported parameter: previous_response_id"}`.
|
|
2392
|
+
*/
|
|
2393
|
+
function buildCodexChainedRequestBody(
|
|
2001
2394
|
requestBody: RequestBody,
|
|
2002
2395
|
state: CodexWebSocketSessionState | undefined,
|
|
2003
|
-
):
|
|
2004
|
-
const
|
|
2005
|
-
|
|
2396
|
+
): RequestBody {
|
|
2397
|
+
const chainable = state?.canAppend === true;
|
|
2398
|
+
const appendInput = chainable
|
|
2399
|
+
? buildResponsesDeltaInput<InputItem>(state.lastRequest, state.lastResponseItems, requestBody)
|
|
2006
2400
|
: null;
|
|
2007
2401
|
if (appendInput && appendInput.length > 0 && state?.lastResponseId) {
|
|
2008
|
-
const
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
}
|
|
2017
|
-
if (state?.canAppend) {
|
|
2018
|
-
logCodexDebug("codex websocket append reset", {
|
|
2402
|
+
const body: RequestBody = { ...requestBody, previous_response_id: state.lastResponseId, input: appendInput };
|
|
2403
|
+
recordCodexWebSocketRequestStats(state, body);
|
|
2404
|
+
return body;
|
|
2405
|
+
}
|
|
2406
|
+
if (chainable && state) {
|
|
2407
|
+
// Chaining was eligible but the prefix/options check failed: history
|
|
2408
|
+
// mutated or options changed — break the chain.
|
|
2409
|
+
logCodexDebug("codex append reset", {
|
|
2019
2410
|
hadTurnStateHeader: Boolean(state.turnState),
|
|
2020
2411
|
hadModelsEtagHeader: Boolean(state.modelsEtag),
|
|
2021
2412
|
});
|
|
2022
2413
|
resetCodexWebSocketAppendState(state);
|
|
2023
2414
|
resetCodexSessionMetadata(state);
|
|
2024
2415
|
}
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
...requestBody,
|
|
2028
|
-
};
|
|
2029
|
-
recordCodexWebSocketRequestStats(state, request);
|
|
2030
|
-
return request;
|
|
2416
|
+
recordCodexWebSocketRequestStats(state, requestBody);
|
|
2417
|
+
return requestBody;
|
|
2031
2418
|
}
|
|
2032
2419
|
|
|
2033
2420
|
function toWebSocketUrl(url: string): string {
|
|
@@ -2091,6 +2478,11 @@ class CodexWebSocketConnection {
|
|
|
2091
2478
|
return this.#socket?.readyState === WebSocket.OPEN;
|
|
2092
2479
|
}
|
|
2093
2480
|
|
|
2481
|
+
/** True while a handshake (possibly started by another caller) is still in flight. */
|
|
2482
|
+
isConnecting(): boolean {
|
|
2483
|
+
return this.#connectPromise !== undefined;
|
|
2484
|
+
}
|
|
2485
|
+
|
|
2094
2486
|
/**
|
|
2095
2487
|
* Stricter variant of {@link isOpen} for the connection-pool reuse gate.
|
|
2096
2488
|
* Refuses sockets that have been silent past {@link CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS}.
|
|
@@ -2146,10 +2538,18 @@ class CodexWebSocketConnection {
|
|
|
2146
2538
|
this.#socket = socket;
|
|
2147
2539
|
let settled = false;
|
|
2148
2540
|
let timeout: NodeJS.Timeout | undefined;
|
|
2541
|
+
const clearPending = () => {
|
|
2542
|
+
if (timeout !== undefined) {
|
|
2543
|
+
clearTimeout(timeout);
|
|
2544
|
+
timeout = undefined;
|
|
2545
|
+
}
|
|
2546
|
+
if (signal) signal.removeEventListener("abort", onAbort);
|
|
2547
|
+
};
|
|
2149
2548
|
const onAbort = () => {
|
|
2150
2549
|
socket.close(1000, "aborted");
|
|
2151
2550
|
if (!settled) {
|
|
2152
2551
|
settled = true;
|
|
2552
|
+
clearPending();
|
|
2153
2553
|
reject(createCodexWebSocketTransportError("request was aborted"));
|
|
2154
2554
|
}
|
|
2155
2555
|
};
|
|
@@ -2160,17 +2560,16 @@ class CodexWebSocketConnection {
|
|
|
2160
2560
|
signal.addEventListener("abort", onAbort, { once: true });
|
|
2161
2561
|
}
|
|
2162
2562
|
}
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
}, CODEX_WEBSOCKET_CONNECT_TIMEOUT_MS);
|
|
2563
|
+
if (!settled) {
|
|
2564
|
+
timeout = setTimeout(() => {
|
|
2565
|
+
socket.close(1000, "connect-timeout");
|
|
2566
|
+
if (!settled) {
|
|
2567
|
+
settled = true;
|
|
2568
|
+
clearPending();
|
|
2569
|
+
reject(createCodexWebSocketTransportError("connection timeout"));
|
|
2570
|
+
}
|
|
2571
|
+
}, CODEX_WEBSOCKET_CONNECT_TIMEOUT_MS);
|
|
2572
|
+
}
|
|
2174
2573
|
|
|
2175
2574
|
socket.onopen = event => {
|
|
2176
2575
|
if (!settled) {
|
|
@@ -2256,6 +2655,9 @@ class CodexWebSocketConnection {
|
|
|
2256
2655
|
if (this.#activeRequest) {
|
|
2257
2656
|
throw createCodexWebSocketTransportError("websocket request already in progress");
|
|
2258
2657
|
}
|
|
2658
|
+
if (signal?.aborted) {
|
|
2659
|
+
throw createCodexWebSocketTransportError("request was aborted");
|
|
2660
|
+
}
|
|
2259
2661
|
this.#activeRequest = true;
|
|
2260
2662
|
this.#streamObserver = onSseEvent;
|
|
2261
2663
|
// Drain any non-error frames left over from a prior request before sending.
|
|
@@ -2273,13 +2675,7 @@ class CodexWebSocketConnection {
|
|
|
2273
2675
|
this.close("aborted");
|
|
2274
2676
|
this.#push(createCodexWebSocketTransportError("request was aborted"));
|
|
2275
2677
|
};
|
|
2276
|
-
if (signal) {
|
|
2277
|
-
if (signal.aborted) {
|
|
2278
|
-
onAbort();
|
|
2279
|
-
} else {
|
|
2280
|
-
signal.addEventListener("abort", onAbort, { once: true });
|
|
2281
|
-
}
|
|
2282
|
-
}
|
|
2678
|
+
if (signal) signal.addEventListener("abort", onAbort, { once: true });
|
|
2283
2679
|
|
|
2284
2680
|
try {
|
|
2285
2681
|
const debugSession = isRequestDebugEnabled()
|
|
@@ -2297,8 +2693,13 @@ class CodexWebSocketConnection {
|
|
|
2297
2693
|
|
|
2298
2694
|
const requestPayload = JSON.stringify(request);
|
|
2299
2695
|
notifyCodexWebSocketOutbound(onSseEvent, request, requestPayload);
|
|
2696
|
+
// Re-check liveness: the debug-session await above can outlive the socket.
|
|
2697
|
+
const socket = this.#socket;
|
|
2698
|
+
if (!socket || socket.readyState !== WebSocket.OPEN) {
|
|
2699
|
+
throw createCodexWebSocketTransportError("websocket connection is unavailable");
|
|
2700
|
+
}
|
|
2300
2701
|
try {
|
|
2301
|
-
|
|
2702
|
+
socket.send(requestPayload);
|
|
2302
2703
|
} catch (error) {
|
|
2303
2704
|
throw createCodexWebSocketTransportError(
|
|
2304
2705
|
`websocket send failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
@@ -2517,9 +2918,11 @@ class CodexWebSocketConnection {
|
|
|
2517
2918
|
|
|
2518
2919
|
#push(item: Record<string, unknown> | Error | null): void {
|
|
2519
2920
|
if (item instanceof Error) {
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2921
|
+
// Append after frames already received instead of wiping them: a queued
|
|
2922
|
+
// terminal event (e.g. `response.completed` followed by an eager server
|
|
2923
|
+
// close) must still reach the consumer rather than morph into a spurious
|
|
2924
|
+
// transport failure. `#dropStaleFrames` keeps errors across requests, so
|
|
2925
|
+
// the death signal still surfaces if the data frames go unconsumed.
|
|
2523
2926
|
this.#queue.push(item);
|
|
2524
2927
|
this.#wakeWaiters();
|
|
2525
2928
|
return;
|
|
@@ -2574,6 +2977,22 @@ async function getOrCreateCodexWebSocketConnection(
|
|
|
2574
2977
|
signal?: AbortSignal,
|
|
2575
2978
|
): Promise<CodexWebSocketConnection> {
|
|
2576
2979
|
const headerRecord = headersToRecord(headers);
|
|
2980
|
+
// Join an in-flight handshake instead of tearing it down: closing a
|
|
2981
|
+
// CONNECTING socket rejects the concurrent caller (prewarm racing the first
|
|
2982
|
+
// request) with a fatal "websocket closed before open", which would disable
|
|
2983
|
+
// websockets for the entire session.
|
|
2984
|
+
// Bounded re-join: a fresh handshake may have been started by yet another
|
|
2985
|
+
// caller while we awaited the previous one.
|
|
2986
|
+
for (let joinAttempt = 0; joinAttempt < 3; joinAttempt += 1) {
|
|
2987
|
+
const pending = state.connection;
|
|
2988
|
+
if (!pending || pending.isOpen() || !pending.isConnecting()) break;
|
|
2989
|
+
try {
|
|
2990
|
+
await pending.connect(signal);
|
|
2991
|
+
} catch {
|
|
2992
|
+
// The handshake owner surfaces its own failure; re-evaluate below
|
|
2993
|
+
// (state.connection may have been replaced or cleared).
|
|
2994
|
+
}
|
|
2995
|
+
}
|
|
2577
2996
|
if (state.connection?.isOpen()) {
|
|
2578
2997
|
if (!state.connection.matchesAuth(headerRecord)) {
|
|
2579
2998
|
state.connection.close("token-refresh");
|
|
@@ -2612,11 +3031,13 @@ async function openCodexSseEventStream(
|
|
|
2612
3031
|
sessionId: string | undefined,
|
|
2613
3032
|
body: RequestBody,
|
|
2614
3033
|
state: CodexWebSocketSessionState | undefined,
|
|
2615
|
-
|
|
3034
|
+
responsesLite: boolean,
|
|
3035
|
+
signal: AbortSignal | undefined,
|
|
3036
|
+
firstEventTimeoutMs: number | undefined,
|
|
2616
3037
|
onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
|
|
2617
3038
|
fetchOverride?: FetchImpl,
|
|
2618
3039
|
): Promise<AsyncGenerator<Record<string, unknown>>> {
|
|
2619
|
-
const headers = createCodexHeaders(requestHeaders, accountId, apiKey, sessionId, "sse", state);
|
|
3040
|
+
const headers = createCodexHeaders(requestHeaders, accountId, apiKey, sessionId, "sse", state, responsesLite);
|
|
2620
3041
|
logCodexDebug("codex request", {
|
|
2621
3042
|
url,
|
|
2622
3043
|
model: body.model,
|
|
@@ -2624,15 +3045,31 @@ async function openCodexSseEventStream(
|
|
|
2624
3045
|
sentTurnStateHeader: headers.has(X_CODEX_TURN_STATE_HEADER),
|
|
2625
3046
|
sentModelsEtagHeader: headers.has(X_MODELS_ETAG_HEADER),
|
|
2626
3047
|
});
|
|
3048
|
+
// `wrapCodexSseStream` arms a first-event watchdog only after this fetch
|
|
3049
|
+
// resolves (it wraps the SSE generator). With `timeout: false` disabling
|
|
3050
|
+
// Bun's native 300s ceiling, a stalled pre-response request needs its own
|
|
3051
|
+
// watchdog — combine the caller signal with a fresh
|
|
3052
|
+
// `AbortSignal.timeout(firstEventTimeoutMs)` so headers must arrive
|
|
3053
|
+
// within the configured budget (issue #2422).
|
|
3054
|
+
const preResponseWatchdog =
|
|
3055
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
|
|
3056
|
+
? AbortSignal.timeout(firstEventTimeoutMs)
|
|
3057
|
+
: undefined;
|
|
3058
|
+
const fetchSignal = preResponseWatchdog
|
|
3059
|
+
? signal
|
|
3060
|
+
? AbortSignal.any([signal, preResponseWatchdog])
|
|
3061
|
+
: preResponseWatchdog
|
|
3062
|
+
: signal;
|
|
2627
3063
|
const response = await fetchWithRetry(url, {
|
|
2628
3064
|
method: "POST",
|
|
2629
3065
|
headers,
|
|
2630
3066
|
body: JSON.stringify(body),
|
|
2631
|
-
signal,
|
|
3067
|
+
signal: fetchSignal,
|
|
2632
3068
|
maxAttempts: CODEX_MAX_RETRIES + 1,
|
|
2633
3069
|
defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
|
|
2634
3070
|
maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
|
|
2635
3071
|
fetch: fetchOverride,
|
|
3072
|
+
timeout: false,
|
|
2636
3073
|
});
|
|
2637
3074
|
logCodexDebug("codex response", {
|
|
2638
3075
|
url: response.url,
|
|
@@ -2641,14 +3078,10 @@ async function openCodexSseEventStream(
|
|
|
2641
3078
|
contentType: response.headers.get("content-type") || null,
|
|
2642
3079
|
cfRay: response.headers.get("cf-ray") || null,
|
|
2643
3080
|
});
|
|
2644
|
-
updateCodexSessionMetadataFromHeaders(state, response.headers);
|
|
2645
3081
|
if (!response.ok) {
|
|
2646
|
-
|
|
2647
|
-
const error = new Error(info.friendlyMessage || info.message);
|
|
2648
|
-
(error as { headers?: Headers; status?: number }).headers = response.headers;
|
|
2649
|
-
(error as { headers?: Headers; status?: number }).status = response.status;
|
|
2650
|
-
throw error;
|
|
3082
|
+
throw await CodexApiError.fromResponse(response);
|
|
2651
3083
|
}
|
|
3084
|
+
updateCodexSessionMetadataFromHeaders(state, response.headers);
|
|
2652
3085
|
if (!response.body) {
|
|
2653
3086
|
throw new Error("No response body");
|
|
2654
3087
|
}
|
|
@@ -2677,6 +3110,7 @@ function createCodexHeaders(
|
|
|
2677
3110
|
sessionId?: string,
|
|
2678
3111
|
transport: CodexTransport = "sse",
|
|
2679
3112
|
state?: CodexWebSocketSessionState,
|
|
3113
|
+
responsesLite = false,
|
|
2680
3114
|
): Headers {
|
|
2681
3115
|
const headers = new Headers(initHeaders ?? {});
|
|
2682
3116
|
headers.delete("x-api-key");
|
|
@@ -2698,6 +3132,7 @@ function createCodexHeaders(
|
|
|
2698
3132
|
} else {
|
|
2699
3133
|
headers.delete(OPENAI_HEADERS.CONVERSATION_ID);
|
|
2700
3134
|
headers.delete(OPENAI_HEADERS.SESSION_ID);
|
|
3135
|
+
headers.delete("x-client-request-id");
|
|
2701
3136
|
}
|
|
2702
3137
|
if (state?.turnState) {
|
|
2703
3138
|
headers.set(X_CODEX_TURN_STATE_HEADER, state.turnState);
|
|
@@ -2709,6 +3144,11 @@ function createCodexHeaders(
|
|
|
2709
3144
|
} else {
|
|
2710
3145
|
headers.delete(X_MODELS_ETAG_HEADER);
|
|
2711
3146
|
}
|
|
3147
|
+
if (responsesLite) {
|
|
3148
|
+
headers.set(X_OPENAI_INTERNAL_CODEX_RESPONSES_LITE_HEADER, "true");
|
|
3149
|
+
} else {
|
|
3150
|
+
headers.delete(X_OPENAI_INTERNAL_CODEX_RESPONSES_LITE_HEADER);
|
|
3151
|
+
}
|
|
2712
3152
|
if (transport === "sse") {
|
|
2713
3153
|
headers.set("accept", "text/event-stream");
|
|
2714
3154
|
headers.set("content-type", "application/json");
|
|
@@ -2736,6 +3176,7 @@ function redactHeaders(headers: Headers): Record<string, string> {
|
|
|
2736
3176
|
lower.includes("account") ||
|
|
2737
3177
|
lower.includes("session") ||
|
|
2738
3178
|
lower.includes("conversation") ||
|
|
3179
|
+
lower === "x-client-request-id" ||
|
|
2739
3180
|
lower === "cookie"
|
|
2740
3181
|
) {
|
|
2741
3182
|
redacted[key] = "[redacted]";
|
|
@@ -2815,11 +3256,13 @@ function convertMessages(model: Model<"openai-codex-responses">, context: Contex
|
|
|
2815
3256
|
|
|
2816
3257
|
if (msg.role === "assistant") {
|
|
2817
3258
|
const assistantMsg = msg as AssistantMessage;
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
3259
|
+
// Native items are model-bound (reasoning carries encrypted content
|
|
3260
|
+
// minted by the producing model); after a mid-session model switch fall
|
|
3261
|
+
// back to block re-encode, which strips foreign signatures.
|
|
3262
|
+
const providerPayload =
|
|
3263
|
+
assistantMsg.api === model.api && assistantMsg.model === model.id
|
|
3264
|
+
? getOpenAIResponsesHistoryPayload(assistantMsg.providerPayload, model.provider, assistantMsg.provider)
|
|
3265
|
+
: undefined;
|
|
2823
3266
|
const historyItems = providerPayload?.items as Array<ResponseInput[number]> | undefined;
|
|
2824
3267
|
if (historyItems) {
|
|
2825
3268
|
for (const item of historyItems) {
|
|
@@ -2941,6 +3384,13 @@ function getString(value: unknown): string | undefined {
|
|
|
2941
3384
|
return typeof value === "string" ? value : undefined;
|
|
2942
3385
|
}
|
|
2943
3386
|
|
|
3387
|
+
class CodexWhitespaceToolCallLoopError extends Error {
|
|
3388
|
+
constructor(message: string) {
|
|
3389
|
+
super(message);
|
|
3390
|
+
this.name = "CodexWhitespaceToolCallLoopError";
|
|
3391
|
+
}
|
|
3392
|
+
}
|
|
3393
|
+
|
|
2944
3394
|
class CodexProviderStreamError extends Error {
|
|
2945
3395
|
readonly retryable: boolean;
|
|
2946
3396
|
readonly code?: string;
|
|
@@ -2965,7 +3415,9 @@ function isRetryableCodexFailureEvent(rawEvent: Record<string, unknown>): boolea
|
|
|
2965
3415
|
}
|
|
2966
3416
|
|
|
2967
3417
|
function createCodexProviderStreamError(rawEvent: Record<string, unknown>): CodexProviderStreamError {
|
|
2968
|
-
const
|
|
3418
|
+
const response = asRecord(rawEvent.response);
|
|
3419
|
+
const nestedError = asRecord(rawEvent.error) ?? (response ? asRecord(response.error) : null);
|
|
3420
|
+
const code = getString(rawEvent.code) ?? getString(nestedError?.code) ?? getString(nestedError?.type) ?? "";
|
|
2969
3421
|
const message = getString(rawEvent.message) ?? "";
|
|
2970
3422
|
const formattedMessage =
|
|
2971
3423
|
typeof rawEvent.type === "string" && rawEvent.type === "error"
|