npm - @prometheus-ai/ai - Versions diffs - 0.5.4 → 0.5.8 - Mend

@prometheus-ai/ai 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (377) hide show

package/dist/types/auth-broker/remote-store.d.ts +2 -1
package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
package/dist/types/auth-gateway/server.d.ts +19 -0
package/dist/types/auth-gateway/types.d.ts +9 -3
package/dist/types/auth-retry.d.ts +119 -0
package/dist/types/auth-storage.d.ts +217 -8
package/dist/types/errors.d.ts +24 -0
package/dist/types/index.d.ts +5 -9
package/dist/types/provider-details.d.ts +1 -1
package/dist/types/providers/amazon-bedrock.d.ts +12 -6
package/dist/types/providers/anthropic-client.d.ts +10 -3
package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
package/dist/types/providers/anthropic-wire.d.ts +3 -3
package/dist/types/providers/anthropic.d.ts +41 -34
package/dist/types/providers/aws-credentials.d.ts +8 -0
package/dist/types/providers/azure-openai-responses.d.ts +1 -0
package/dist/types/providers/google-gemini-cli.d.ts +22 -1
package/dist/types/providers/google-shared.d.ts +22 -0
package/dist/types/providers/google-types.d.ts +13 -1
package/dist/types/providers/mock.d.ts +8 -3
package/dist/types/providers/ollama.d.ts +6 -0
package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
package/dist/types/providers/openai-chat-server.d.ts +3 -3
package/dist/types/providers/openai-chat-wire.d.ts +644 -0
package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
package/dist/types/providers/openai-codex-responses.d.ts +31 -2
package/dist/types/providers/openai-completions-compat.d.ts +2 -25
package/dist/types/providers/openai-completions.d.ts +2 -10
package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
package/dist/types/providers/openai-responses-server.d.ts +2 -2
package/dist/types/providers/openai-responses-shared.d.ts +49 -9
package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
package/dist/types/providers/openai-responses.d.ts +13 -4
package/dist/types/providers/prometheus-native-client.d.ts +9 -0
package/dist/types/providers/prometheus-native-server.d.ts +4 -3
package/dist/types/providers/transform-messages.d.ts +1 -2
package/dist/types/rate-limit-utils.d.ts +3 -2
package/dist/types/registry/aimlapi.d.ts +4 -0
package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
package/dist/types/registry/amazon-bedrock.d.ts +5 -0
package/dist/types/registry/anthropic.d.ts +10 -0
package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
package/dist/types/registry/cerebras.d.ts +7 -0
package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
package/dist/types/registry/cursor.d.ts +7 -0
package/dist/types/registry/deepseek.d.ts +8 -0
package/dist/types/registry/derived.d.ts +5 -0
package/dist/types/registry/firepass.d.ts +16 -0
package/dist/types/registry/fireworks.d.ts +7 -0
package/dist/types/registry/github-copilot.d.ts +7 -0
package/dist/types/registry/gitlab-duo.d.ts +9 -0
package/dist/types/registry/google-antigravity.d.ts +9 -0
package/dist/types/registry/google-gemini-cli.d.ts +9 -0
package/dist/types/registry/google-vertex.d.ts +5 -0
package/dist/types/registry/google.d.ts +4 -0
package/dist/types/registry/groq.d.ts +4 -0
package/dist/types/registry/huggingface.d.ts +7 -0
package/dist/types/registry/index.d.ts +4 -0
package/dist/types/registry/kagi.d.ts +14 -0
package/dist/types/registry/kilo.d.ts +7 -0
package/dist/types/registry/kimi-code.d.ts +7 -0
package/dist/types/registry/litellm.d.ts +13 -0
package/dist/types/registry/lm-studio.d.ts +8 -0
package/dist/types/registry/minimax-code-cn.d.ts +6 -0
package/dist/types/registry/minimax-code.d.ts +6 -0
package/dist/types/registry/minimax.d.ts +4 -0
package/dist/types/registry/mistral.d.ts +4 -0
package/dist/types/registry/moonshot.d.ts +7 -0
package/dist/types/registry/nanogpt.d.ts +7 -0
package/dist/types/registry/nvidia.d.ts +7 -0
package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
package/dist/types/registry/ollama-cloud.d.ts +7 -0
package/dist/types/registry/ollama.d.ts +12 -0
package/dist/types/registry/openai-codex-device.d.ts +8 -0
package/dist/types/registry/openai-codex.d.ts +9 -0
package/dist/types/registry/openai.d.ts +4 -0
package/dist/types/registry/opencode-go.d.ts +6 -0
package/dist/types/registry/opencode-zen.d.ts +6 -0
package/dist/types/registry/openrouter.d.ts +13 -0
package/dist/types/registry/parallel.d.ts +14 -0
package/dist/types/registry/perplexity.d.ts +7 -0
package/dist/types/registry/qianfan.d.ts +7 -0
package/dist/types/registry/qwen-portal.d.ts +7 -0
package/dist/types/registry/registry.d.ts +272 -0
package/dist/types/registry/synthetic.d.ts +6 -0
package/dist/types/registry/tavily.d.ts +14 -0
package/dist/types/registry/together.d.ts +6 -0
package/dist/types/registry/types.d.ts +51 -0
package/dist/types/registry/venice.d.ts +13 -0
package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
package/dist/types/registry/vllm.d.ts +7 -0
package/dist/types/registry/wafer-pass.d.ts +6 -0
package/dist/types/registry/wafer-serverless.d.ts +6 -0
package/dist/types/registry/xai-oauth.d.ts +7 -0
package/dist/types/registry/xai.d.ts +4 -0
package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
package/dist/types/registry/xiaomi.d.ts +6 -0
package/dist/types/registry/zai.d.ts +7 -0
package/dist/types/registry/zenmux.d.ts +7 -0
package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
package/dist/types/stream.d.ts +9 -1
package/dist/types/types.d.ts +56 -295
package/dist/types/usage/google-antigravity.d.ts +15 -1
package/dist/types/usage/openai-codex-reset.d.ts +79 -0
package/dist/types/usage/openai-codex.d.ts +1 -0
package/dist/types/usage.d.ts +77 -4
package/dist/types/utils/abort.d.ts +6 -0
package/dist/types/utils/event-stream.d.ts +2 -0
package/dist/types/utils/http-inspector.d.ts +0 -1
package/dist/types/utils/idle-iterator.d.ts +35 -0
package/dist/types/utils/openai-http.d.ts +58 -0
package/dist/types/utils/request-debug.d.ts +3 -0
package/dist/types/utils/retry-after.d.ts +1 -0
package/dist/types/utils/schema/fields.d.ts +5 -0
package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
package/dist/types/utils/schema/stamps.d.ts +7 -15
package/dist/types/utils/sse-debug.d.ts +0 -5
package/dist/types/utils/stream-markup-healing.d.ts +2 -0
package/dist/types/utils.d.ts +1 -5
package/package.json +17 -29
package/src/auth-broker/remote-store.ts +10 -1
package/src/auth-broker/snapshot-cache.ts +1 -1
package/src/auth-broker/wire-schemas.ts +1 -1
package/src/auth-gateway/http.ts +1 -1
package/src/auth-gateway/server.ts +95 -30
package/src/auth-gateway/types.ts +10 -2
package/src/auth-retry.ts +238 -0
package/src/auth-storage.ts +935 -430
package/src/errors.ts +32 -0
package/src/index.ts +9 -14
package/src/provider-details.ts +1 -1
package/src/providers/__tests__/google-auth.test.ts +144 -0
package/src/providers/amazon-bedrock.ts +70 -40
package/src/providers/anthropic-client.ts +15 -13
package/src/providers/anthropic-messages-server-schema.ts +17 -7
package/src/providers/anthropic-messages-server.ts +88 -20
package/src/providers/anthropic-wire.ts +4 -3
package/src/providers/anthropic.ts +1234 -621
package/src/providers/aws-credentials.ts +47 -5
package/src/providers/aws-eventstream.ts +5 -0
package/src/providers/azure-openai-responses.ts +117 -67
package/src/providers/cursor.ts +30 -30
package/src/providers/github-copilot-headers.ts +1 -1
package/src/providers/gitlab-duo.ts +36 -29
package/src/providers/google-auth.ts +71 -8
package/src/providers/google-gemini-cli.ts +118 -22
package/src/providers/google-shared.ts +163 -43
package/src/providers/google-types.ts +10 -1
package/src/providers/kimi.ts +1 -1
package/src/providers/mock.ts +11 -3
package/src/providers/ollama.ts +64 -7
package/src/providers/openai-anthropic-shim.ts +17 -8
package/src/providers/openai-chat-server-schema.ts +9 -3
package/src/providers/openai-chat-server.ts +82 -16
package/src/providers/openai-chat-wire.ts +847 -0
package/src/providers/openai-codex/request-transformer.ts +129 -34
package/src/providers/openai-codex/response-handler.ts +22 -1
package/src/providers/openai-codex-responses.ts +699 -247
package/src/providers/openai-completions-compat.ts +8 -308
package/src/providers/openai-completions.ts +416 -267
package/src/providers/openai-responses-server-schema.ts +15 -9
package/src/providers/openai-responses-server.ts +162 -114
package/src/providers/openai-responses-shared.ts +320 -82
package/src/providers/openai-responses-wire.ts +6391 -0
package/src/providers/openai-responses.ts +382 -176
package/src/providers/prometheus-native-client.ts +27 -11
package/src/providers/prometheus-native-server.ts +44 -17
package/src/providers/transform-messages.ts +311 -120
package/src/providers/vision-guard.ts +5 -3
package/src/rate-limit-utils.ts +13 -3
package/src/registry/aimlapi.ts +6 -0
package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
package/src/registry/amazon-bedrock.ts +22 -0
package/src/registry/anthropic.ts +26 -0
package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
package/src/{utils/oauth → registry}/cerebras.ts +8 -1
package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
package/src/registry/cursor.ts +20 -0
package/src/{utils/oauth → registry}/deepseek.ts +9 -17
package/src/registry/derived.ts +9 -0
package/src/{utils/oauth → registry}/firepass.ts +10 -2
package/src/{utils/oauth → registry}/fireworks.ts +8 -1
package/src/registry/github-copilot.ts +22 -0
package/src/registry/gitlab-duo.ts +19 -0
package/src/registry/google-antigravity.ts +21 -0
package/src/registry/google-gemini-cli.ts +21 -0
package/src/registry/google-vertex.ts +38 -0
package/src/registry/google.ts +6 -0
package/src/registry/groq.ts +6 -0
package/src/{utils/oauth → registry}/huggingface.ts +8 -19
package/src/registry/index.ts +4 -0
package/src/{utils/oauth → registry}/kagi.ts +9 -11
package/src/{utils/oauth → registry}/kilo.ts +11 -6
package/src/registry/kimi-code.ts +17 -0
package/src/{utils/oauth → registry}/litellm.ts +8 -12
package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
package/src/registry/minimax-code-cn.ts +12 -0
package/src/registry/minimax-code.ts +12 -0
package/src/registry/minimax.ts +6 -0
package/src/registry/mistral.ts +6 -0
package/src/{utils/oauth → registry}/moonshot.ts +8 -9
package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
package/src/{utils/oauth → registry}/nvidia.ts +8 -18
package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
package/src/{utils → registry}/oauth/anthropic.ts +38 -17
package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
package/src/registry/oauth/gitlab-duo.ts +198 -0
package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
package/src/registry/oauth/index.ts +164 -0
package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
package/src/{utils → registry}/oauth/types.ts +7 -51
package/src/{utils → registry}/oauth/wafer.ts +1 -1
package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
package/src/{utils/oauth → registry}/ollama.ts +8 -13
package/src/registry/openai-codex-device.ts +18 -0
package/src/registry/openai-codex.ts +19 -0
package/src/registry/openai.ts +6 -0
package/src/registry/opencode-go.ts +12 -0
package/src/registry/opencode-zen.ts +12 -0
package/src/{utils/oauth → registry}/openrouter.ts +10 -2
package/src/{utils/oauth → registry}/parallel.ts +9 -11
package/src/registry/perplexity.ts +13 -0
package/src/{utils/oauth → registry}/qianfan.ts +8 -17
package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
package/src/registry/registry.ts +149 -0
package/src/{utils/oauth → registry}/synthetic.ts +7 -1
package/src/{utils/oauth → registry}/tavily.ts +10 -12
package/src/{utils/oauth → registry}/together.ts +7 -1
package/src/registry/types.ts +56 -0
package/src/{utils/oauth → registry}/venice.ts +8 -12
package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
package/src/{utils/oauth → registry}/vllm.ts +9 -16
package/src/registry/wafer-pass.ts +12 -0
package/src/registry/wafer-serverless.ts +12 -0
package/src/registry/xai-oauth.ts +17 -0
package/src/registry/xai.ts +6 -0
package/src/registry/xiaomi-token-plan-ams.ts +12 -0
package/src/registry/xiaomi-token-plan-cn.ts +12 -0
package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
package/src/registry/xiaomi.ts +12 -0
package/src/{utils/oauth → registry}/zai.ts +10 -22
package/src/{utils/oauth → registry}/zenmux.ts +8 -1
package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
package/src/stream.ts +229 -199
package/src/types.ts +63 -384
package/src/usage/claude.ts +4 -2
package/src/usage/github-copilot.ts +4 -2
package/src/usage/google-antigravity.ts +196 -28
package/src/usage/kimi.ts +1 -1
package/src/usage/minimax-code.ts +5 -6
package/src/usage/openai-codex-reset.ts +174 -0
package/src/usage/openai-codex.ts +19 -2
package/src/usage/zai.ts +2 -1
package/src/usage.ts +93 -4
package/src/utils/abort.ts +14 -0
package/src/utils/event-stream.ts +17 -0
package/src/utils/http-inspector.ts +4 -12
package/src/utils/idle-iterator.ts +250 -79
package/src/utils/openai-http.ts +157 -0
package/src/utils/request-debug.ts +67 -19
package/src/utils/retry-after.ts +1 -1
package/src/utils/retry.ts +23 -2
package/src/utils/schema/CONSTRAINTS.md +4 -2
package/src/utils/schema/fields.ts +16 -0
package/src/utils/schema/json-schema-validator.ts +19 -1
package/src/utils/schema/normalize.ts +80 -8
package/src/utils/schema/stamps.ts +22 -10
package/src/utils/schema/wire.ts +2 -2
package/src/utils/sse-debug.ts +0 -271
package/src/utils/stream-markup-healing.ts +50 -8
package/src/utils/validation.ts +49 -13
package/src/utils.ts +2 -26
package/dist/types/model-cache.d.ts +0 -17
package/dist/types/model-manager.d.ts +0 -64
package/dist/types/model-thinking.d.ts +0 -100
package/dist/types/models.d.ts +0 -12
package/dist/types/provider-models/bundled-references.d.ts +0 -4
package/dist/types/provider-models/descriptors.d.ts +0 -50
package/dist/types/provider-models/google.d.ts +0 -24
package/dist/types/provider-models/index.d.ts +0 -5
package/dist/types/provider-models/ollama.d.ts +0 -7
package/dist/types/provider-models/openai-compat.d.ts +0 -323
package/dist/types/provider-models/special.d.ts +0 -16
package/dist/types/utils/discovery/antigravity.d.ts +0 -61
package/dist/types/utils/discovery/codex.d.ts +0 -38
package/dist/types/utils/discovery/cursor.d.ts +0 -23
package/dist/types/utils/discovery/gemini.d.ts +0 -25
package/dist/types/utils/discovery/index.d.ts +0 -4
package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
package/dist/types/utils/oauth/cerebras.d.ts +0 -1
package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/deepseek.d.ts +0 -10
package/dist/types/utils/oauth/firepass.d.ts +0 -1
package/dist/types/utils/oauth/fireworks.d.ts +0 -1
package/dist/types/utils/oauth/huggingface.d.ts +0 -19
package/dist/types/utils/oauth/kagi.d.ts +0 -17
package/dist/types/utils/oauth/kilo.d.ts +0 -5
package/dist/types/utils/oauth/litellm.d.ts +0 -18
package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
package/dist/types/utils/oauth/moonshot.d.ts +0 -1
package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
package/dist/types/utils/oauth/nvidia.d.ts +0 -18
package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
package/dist/types/utils/oauth/ollama.d.ts +0 -18
package/dist/types/utils/oauth/openrouter.d.ts +0 -1
package/dist/types/utils/oauth/parallel.d.ts +0 -17
package/dist/types/utils/oauth/qianfan.d.ts +0 -17
package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
package/dist/types/utils/oauth/synthetic.d.ts +0 -1
package/dist/types/utils/oauth/tavily.d.ts +0 -17
package/dist/types/utils/oauth/together.d.ts +0 -1
package/dist/types/utils/oauth/venice.d.ts +0 -18
package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/vllm.d.ts +0 -16
package/dist/types/utils/oauth/zai.d.ts +0 -18
package/dist/types/utils/oauth/zenmux.d.ts +0 -1
package/dist/types/utils/oauth/zhipu.d.ts +0 -18
package/src/model-cache.ts +0 -129
package/src/model-manager.ts +0 -469
package/src/model-thinking.ts +0 -756
package/src/models.json +0 -60287
package/src/models.json.d.ts +0 -9
package/src/models.ts +0 -56
package/src/provider-models/bundled-references.ts +0 -38
package/src/provider-models/descriptors.ts +0 -364
package/src/provider-models/google.ts +0 -88
package/src/provider-models/index.ts +0 -5
package/src/provider-models/ollama.ts +0 -153
package/src/provider-models/openai-compat.ts +0 -2904
package/src/provider-models/special.ts +0 -67
package/src/utils/discovery/antigravity.ts +0 -261
package/src/utils/discovery/codex.ts +0 -371
package/src/utils/discovery/cursor.ts +0 -306
package/src/utils/discovery/gemini.ts +0 -248
package/src/utils/discovery/index.ts +0 -4
package/src/utils/discovery/openai-compatible.ts +0 -224
package/src/utils/oauth/gitlab-duo.ts +0 -123
package/src/utils/oauth/index.ts +0 -502
/package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
/package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
/package/src/{utils → registry}/oauth/callback-server.ts +0 -0
/package/src/{utils → registry}/oauth/cursor.ts +0 -0
/package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
/package/src/{utils → registry}/oauth/kimi.ts +0 -0
/package/src/{utils → registry}/oauth/oauth.html +0 -0
/package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
/package/src/{utils → registry}/oauth/opencode.ts +0 -0
/package/src/{utils → registry}/oauth/perplexity.ts +0 -0
/package/src/{utils → registry}/oauth/pkce.ts +0 -0

package/src/providers/openai-responses-shared.ts CHANGED Viewed

@@ -1,22 +1,11 @@
-import { structuredCloneJSON } from "@prometheus-ai/utils";
-import type OpenAI from "openai";
-import type {
-	ResponseCustomToolCall,
-	ResponseFunctionToolCall,
-	ResponseInput,
-	ResponseInputContent,
-	ResponseInputImage,
-	ResponseInputText,
-	ResponseOutputItem,
-	ResponseOutputMessage,
-	ResponseReasoningItem,
-} from "openai/resources/responses/responses";
-import { calculateCost } from "../models";
+import { calculateCost } from "@prometheus-ai/catalog/models";
+import { logger, structuredCloneJSON } from "@prometheus-ai/utils";
 import {
 	type Api,
 	type AssistantMessage,
 	type ImageContent,
 	type Model,
+	OPENAI_MAX_OUTPUT_TOKENS,
 	resolveServiceTier,
 	type ServiceTier,
 	type StopReason,
@@ -31,6 +20,20 @@ import {
 import { normalizeResponsesToolCallId } from "../utils";
 import type { AssistantMessageEventStream } from "../utils/event-stream";
 import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
+import type {
+	ResponseCreateParamsStreaming,
+	ResponseCustomToolCall,
+	ResponseFunctionToolCall,
+	ResponseInput,
+	ResponseInputContent,
+	ResponseInputImage,
+	ResponseInputText,
+	ResponseOutputItem,
+	ResponseOutputMessage,
+	ResponseReasoningItem,
+	ResponseStatus,
+	ResponseStreamEvent,
+} from "./openai-responses-wire";
 import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER, partitionVisionContent } from "./vision-guard";
 export const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string> = new Set([
 	"response.created",
@@ -48,6 +51,7 @@ export const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string> = new Se
 	"response.custom_tool_call_input.done",
 	"response.output_item.done",
 	"response.completed",
+	"response.incomplete",
 	"response.failed",
 	"error",
 ]);
@@ -212,6 +216,59 @@ export function repairOrphanResponsesToolOutputs(input: ResponseInput): Response
 	});
 }
+/** Placeholder output for a tool call whose result is absent from the input. */
+const ORPHAN_TOOL_CALL_PLACEHOLDER =
+	"[No tool output recorded: the tool call was interrupted before it produced a result.]";
+/**
+ * Synthesize a placeholder `function_call_output` / `custom_tool_call_output`
+ * for every `function_call` / `custom_tool_call` whose `call_id` has no matching
+ * output later in the same input. The Responses API rejects an unpaired call
+ * with `400 No tool output found for function call …`.
+ *
+ * Orphan calls surface when the user branches/navigates the session tree to a
+ * node that ends on a tool call (the tool-result child is excluded from the
+ * reconstructed history) or when a turn is aborted/crashes after the call
+ * streamed but before its result persisted. Dropping the call would erase the
+ * assistant's action; a placeholder output keeps the call visible so the model
+ * can recover (e.g. re-issue the call). Symmetric to
+ * {@link repairOrphanResponsesToolOutputs}.
+ */
+export function repairOrphanResponsesToolCalls(input: ResponseInput): ResponseInput {
+	const outputCallIds = new Set<string>();
+	for (const item of input) {
+		const t = (item as { type?: string }).type;
+		if (t !== "function_call_output" && t !== "custom_tool_call_output") continue;
+		const callId = (item as { call_id?: unknown }).call_id;
+		if (typeof callId === "string") outputCallIds.add(callId);
+	}
+	let hasOrphan = false;
+	for (const item of input) {
+		const t = (item as { type?: string }).type;
+		if (t !== "function_call" && t !== "custom_tool_call") continue;
+		const callId = (item as { call_id?: unknown }).call_id;
+		if (typeof callId === "string" && !outputCallIds.has(callId)) {
+			hasOrphan = true;
+			break;
+		}
+	}
+	if (!hasOrphan) return input;
+	const repaired: ResponseInput = [];
+	for (const item of input) {
+		repaired.push(item);
+		const t = (item as { type?: string }).type;
+		if (t !== "function_call" && t !== "custom_tool_call") continue;
+		const callId = (item as { call_id?: unknown }).call_id;
+		if (typeof callId !== "string" || outputCallIds.has(callId)) continue;
+		repaired.push({
+			type: t === "custom_tool_call" ? "custom_tool_call_output" : "function_call_output",
+			call_id: callId,
+			output: ORPHAN_TOOL_CALL_PLACEHOLDER,
+		} as ResponseInput[number]);
+	}
+	return repaired;
+}
 export function convertResponsesInputContent(
 	content: string | Array<TextContent | ImageContent>,
 	supportsImages: boolean,
@@ -234,7 +291,7 @@ export function convertResponsesInputContent(
 	for (const item of imageBlocks) {
 		normalizedContent.push({
 			type: "input_image",
-			detail: "auto",
+			detail: item.detail ?? "auto",
 			image_url: `data:${item.mimeType};base64,${item.data}`,
 		} satisfies ResponseInputImage);
 	}
@@ -256,6 +313,7 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
 	customCallIds?: Set<string>,
 ): ResponseInput {
 	const outputItems: ResponseInput = [];
+	let unsignedTextBlocks = 0;
 	const isDifferentModel =
 		assistantMsg.model !== model.id && assistantMsg.provider === model.provider && assistantMsg.api === model.api;
@@ -265,7 +323,12 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
 				continue;
 			}
 			if (block.thinkingSignature) {
-				outputItems.push(JSON.parse(block.thinkingSignature) as ResponseReasoningItem);
+				try {
+					outputItems.push(JSON.parse(block.thinkingSignature) as ResponseReasoningItem);
+				} catch {
+					// Legacy/corrupt persisted signature — skip the reasoning item
+					// rather than failing the whole request build.
+				}
 			}
 			continue;
 		}
@@ -274,7 +337,10 @@ export function convertResponsesAssistantMessage<TApi extends Api>(
 			const parsedSignature = parseTextSignature(block.textSignature);
 			let msgId = parsedSignature?.id;
 			if (!msgId) {
-				msgId = `msg_${msgIndex}`;
+				// Distinct ids per unsigned block: several text blocks in one message
+				// (cross-provider replay downgrades thinking → text) must not share an id.
+				msgId = unsignedTextBlocks === 0 ? `msg_${msgIndex}` : `msg_${msgIndex}_${unsignedTextBlocks}`;
+				unsignedTextBlocks += 1;
 			} else if (msgId.length > 64) {
 				msgId = `msg_${Bun.hash(msgId).toString(36)}`;
 			}
@@ -339,10 +405,6 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
 	const hasImages = toolResult.content.some((block): block is ImageContent => block.type === "image");
 	const omittedImages = hasImages && !supportsImages;
 	const normalized = normalizeResponsesToolCallId(toolResult.toolCallId);
-	if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
-		return;
-	}
 	const output = (
 		omittedImages
 			? joinTextWithImagePlaceholder(textResult, true)
@@ -350,6 +412,19 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
 				? textResult
 				: "(see attached image)"
 	).toWellFormed();
+	if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
+		// Strict backends (Azure, Copilot) reject unpaired outputs outright, but
+		// silently dropping the result loses information the model needs. Fold it
+		// into an assistant note instead (same shape as repairOrphanResponsesToolOutputs).
+		const limit = 16_000;
+		const noteText = output.length > limit ? `${output.slice(0, limit)}\n...[truncated]` : output;
+		messages.push({
+			type: "message",
+			role: "assistant",
+			content: `[Orphan ${toolResult.toolName || "tool"} result; call_id=${normalized.callId}]: ${noteText}`,
+		} as ResponseInput[number]);
+		return;
+	}
 	if (customCallIds?.has(normalized.callId)) {
 		messages.push({
 			type: "custom_tool_call_output",
@@ -375,7 +450,7 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
 		if (block.type === "image") {
 			contentParts.push({
 				type: "input_image",
-				detail: "auto",
+				detail: block.detail ?? "auto",
 				image_url: `data:${block.mimeType};base64,${block.data}`,
 			} satisfies ResponseInputImage);
 		}
@@ -386,10 +461,18 @@ export function appendResponsesToolResultMessages<TApi extends Api>(
 export interface ProcessResponsesStreamOptions {
 	onFirstToken?: () => void;
 	onOutputItemDone?: (item: ResponseOutputItem) => void;
+	/**
+	 * Called when a terminal `response.completed` or `response.incomplete` event
+	 * is successfully processed. Only invoked on the successful-completion path;
+	 * thrown failure (`response.failed`) and cancellation paths never call this.
+	 * Used by callers to detect premature stream closure (i.e. the stream ended
+	 * without a recognized terminal event).
+	 */
+	onCompleted?: () => void;
 }
 export async function processResponsesStream<TApi extends Api>(
-	openaiStream: AsyncIterable<OpenAI.Responses.ResponseStreamEvent>,
+	openaiStream: AsyncIterable<ResponseStreamEvent>,
 	output: AssistantMessage,
 	stream: AssistantMessageEventStream,
 	model: Model<TApi>,
@@ -403,9 +486,16 @@ export async function processResponsesStream<TApi extends Api>(
 	// Multiple items (parallel function_calls in particular) can be open at the same
 	// time. OpenAI's spec routes every per-item event by `output_index`/`item_id`;
-	// see https://github.com/uttamtrivedi/Prometheus/issues/1880 — llama.cpp emits parallel
+	// see https://github.com/uttamtrivedi/prometheus/issues/1880 — llama.cpp emits parallel
 	// function_call deltas interleaved, and a singleton `current` reference would
 	// fold them into the wrong block and drop arguments on every call but the last.
+	//
+	// llama.cpp's `to_json_oaicompat_resp` (issue #2015) compounds this: `output_item.added`
+	// for function_call/custom_tool_call carries `item.call_id` but no `item.id` and no
+	// `output_index`, while the matching `function_call_arguments.delta` carries
+	// `item_id = "fc_<call_id>"`. Registering function-call items by `call_id` as a
+	// secondary key lets the delta lookup find the right block on hosts that emit one
+	// identifier but not the other.
 	const openItemsByOutputIndex = new Map<number, StreamingItem>();
 	const openItemsByItemId = new Map<string, StreamingItem>();
 	let lastOpenItem: StreamingItem | null = null;
@@ -415,9 +505,11 @@ export async function processResponsesStream<TApi extends Api>(
 		outputIndex: number | undefined,
 		itemId: string | undefined,
 		entry: StreamingItem,
+		alternateItemKey?: string,
 	): void => {
 		if (typeof outputIndex === "number") openItemsByOutputIndex.set(outputIndex, entry);
 		if (itemId) openItemsByItemId.set(itemId, entry);
+		if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.set(alternateItemKey, entry);
 		openItemsInOrder.push(entry);
 		lastOpenItem = entry;
 	};
@@ -455,9 +547,11 @@ export async function processResponsesStream<TApi extends Api>(
 		outputIndex: number | undefined,
 		itemId: string | undefined,
 		entry: StreamingItem | undefined,
+		alternateItemKey?: string,
 	): void => {
 		if (typeof outputIndex === "number") openItemsByOutputIndex.delete(outputIndex);
 		if (itemId) openItemsByItemId.delete(itemId);
+		if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.delete(alternateItemKey);
 		if (entry) {
 			const index = openItemsInOrder.indexOf(entry);
 			if (index >= 0) openItemsInOrder.splice(index, 1);
@@ -497,7 +591,7 @@ export async function processResponsesStream<TApi extends Api>(
 					partialJson: item.arguments || "",
 				};
 				output.content.push(block);
-				registerOpenItem(event.output_index, item.id, { item, block });
+				registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
 				stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
 			} else if (item.type === "custom_tool_call") {
 				const block: StreamingToolCallBlock = {
@@ -515,7 +609,7 @@ export async function processResponsesStream<TApi extends Api>(
 					partialJson: item.input ?? "",
 				};
 				output.content.push(block);
-				registerOpenItem(event.output_index, item.id, { item, block });
+				registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
 				stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
 			}
 		} else if (event.type === "response.reasoning_summary_part.added") {
@@ -580,32 +674,42 @@ export async function processResponsesStream<TApi extends Api>(
 		} else if (event.type === "response.output_text.delta") {
 			const entry = lookupOpenItem(event);
 			if (entry?.item.type === "message" && entry.block.type === "text") {
-				const lastPart = entry.item.content?.[entry.item.content.length - 1];
-				if (lastPart?.type === "output_text") {
-					entry.block.text += event.delta;
-					lastPart.text += event.delta;
-					stream.push({
-						type: "text_delta",
-						contentIndex: contentIndexOf(entry.block),
-						delta: event.delta,
-						partial: output,
-					});
+				entry.item.content = entry.item.content || [];
+				let lastPart = entry.item.content[entry.item.content.length - 1];
+				if (lastPart?.type !== "output_text") {
+					// `content_part.added` never arrived (lossy proxy) — synthesize the
+					// part so live text still streams instead of freezing until the
+					// item's output_item.done recovers the final text.
+					lastPart = { type: "output_text", text: "", annotations: [] };
+					entry.item.content.push(lastPart);
 				}
+				entry.block.text += event.delta;
+				lastPart.text += event.delta;
+				stream.push({
+					type: "text_delta",
+					contentIndex: contentIndexOf(entry.block),
+					delta: event.delta,
+					partial: output,
+				});
 			}
 		} else if (event.type === "response.refusal.delta") {
 			const entry = lookupOpenItem(event);
 			if (entry?.item.type === "message" && entry.block.type === "text") {
-				const lastPart = entry.item.content?.[entry.item.content.length - 1];
-				if (lastPart?.type === "refusal") {
-					entry.block.text += event.delta;
-					lastPart.refusal += event.delta;
-					stream.push({
-						type: "text_delta",
-						contentIndex: contentIndexOf(entry.block),
-						delta: event.delta,
-						partial: output,
-					});
+				entry.item.content = entry.item.content || [];
+				let lastPart = entry.item.content[entry.item.content.length - 1];
+				if (lastPart?.type !== "refusal") {
+					// Same lossy-proxy hardening as the output_text branch above.
+					lastPart = { type: "refusal", refusal: "" };
+					entry.item.content.push(lastPart);
 				}
+				entry.block.text += event.delta;
+				lastPart.refusal += event.delta;
+				stream.push({
+					type: "text_delta",
+					contentIndex: contentIndexOf(entry.block),
+					delta: event.delta,
+					partial: output,
+				});
 			}
 		} else if (event.type === "response.function_call_arguments.delta") {
 			const entry = lookupOpenFunctionCallItem(event);
@@ -656,7 +760,10 @@ export async function processResponsesStream<TApi extends Api>(
 		} else if (event.type === "response.output_item.done") {
 			const item = structuredCloneJSON(event.item);
 			options?.onOutputItemDone?.(item);
-			const entry = lookupOpenItem({ output_index: event.output_index, item_id: item.id });
+			const entry =
+				item.type === "function_call" || item.type === "custom_tool_call"
+					? lookupOpenItem({ output_index: event.output_index, item_id: item.id ?? item.call_id })
+					: lookupOpenItem({ output_index: event.output_index, item_id: item.id });
 			if (item.type === "reasoning") {
 				const thinking =
 					item.summary?.length > 0
@@ -664,9 +771,15 @@ export async function processResponsesStream<TApi extends Api>(
 						: item.content?.[0]?.type === "reasoning_text"
 							? (item.content[0].text ?? "")
 							: "";
-				const reasoningBlock = output.content.find(
-					b => b.type === "thinking" && (b as ThinkingContent).itemId === item.id,
-				) as ThinkingContent | undefined;
+				// Prefer the routed entry; the bare itemId find misroutes when ids are
+				// absent (`undefined === undefined` matches the FIRST thinking block) and
+				// misses entirely when the done-event id drifts from the added-event id.
+				const reasoningBlock =
+					entry?.block.type === "thinking"
+						? entry.block
+						: (output.content.find(b => b.type === "thinking" && (b as ThinkingContent).itemId === item.id) as
+								| ThinkingContent
+								| undefined);
 				if (reasoningBlock) {
 					reasoningBlock.thinking = thinking;
 					reasoningBlock.thinkingSignature = JSON.stringify(item);
@@ -678,18 +791,25 @@ export async function processResponsesStream<TApi extends Api>(
 					});
 				}
 				closeOpenItem(event.output_index, item.id, entry);
-			} else if (item.type === "message" && entry?.block.type === "text") {
-				const block = entry.block;
-				block.text = item.content
+			} else if (item.type === "message") {
+				const block = entry?.block.type === "text" ? entry.block : undefined;
+				const text = item.content
 					.map(part => (part.type === "output_text" ? (part.text ?? "") : (part.refusal ?? "")))
 					.join("");
-				block.textSignature = encodeTextSignatureV1(item.id, item.phase ?? undefined);
-				stream.push({
-					type: "text_end",
-					contentIndex: contentIndexOf(block),
-					content: block.text,
-					partial: output,
-				});
+				const textSignature = encodeTextSignatureV1(item.id, item.phase ?? undefined);
+				let contentIndex: number;
+				if (block) {
+					block.text = text;
+					block.textSignature = textSignature;
+					contentIndex = contentIndexOf(block);
+				} else {
+					// `output_item.added` never arrived (lossy proxy) — synthesize the
+					// block so the final message still carries the authoritative text.
+					const synthesized: TextContent = { type: "text", text, textSignature };
+					output.content.push(synthesized);
+					contentIndex = output.content.length - 1;
+				}
+				stream.push({ type: "text_end", contentIndex, content: text, partial: output });
 				closeOpenItem(event.output_index, item.id, entry);
 			} else if (item.type === "function_call") {
 				const block = entry?.block.type === "toolCall" ? entry.block : undefined;
@@ -704,6 +824,7 @@ export async function processResponsesStream<TApi extends Api>(
 					name: item.name,
 					arguments: args,
 				};
+				let contentIndex: number;
 				if (block) {
 					// Persist the authoritative final args on the stored block. The
 					// throttled delta parser may have skipped the last partial parse,
@@ -713,9 +834,15 @@ export async function processResponsesStream<TApi extends Api>(
 					delete (block as { partialJson?: string }).partialJson;
 					delete (block as { lastParseLen?: number }).lastParseLen;
 					delete (block as { argumentsDone?: boolean }).argumentsDone;
+					contentIndex = contentIndexOf(block);
+				} else {
+					// `output_item.added` never arrived (lossy proxy) — synthesize the
+					// block so the final message carries the call the consumer was told
+					// completed (the agent loop executes tools from message.content).
+					output.content.push(toolCall);
+					contentIndex = output.content.length - 1;
 				}
-				const contentIndex = block ? contentIndexOf(block) : output.content.length - 1;
-				closeOpenItem(event.output_index, item.id, entry);
+				closeOpenItem(event.output_index, item.id, entry, item.call_id);
 				stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
 			} else if (item.type === "custom_tool_call") {
 				const block = entry?.block.type === "toolCall" ? entry.block : undefined;
@@ -727,12 +854,39 @@ export async function processResponsesStream<TApi extends Api>(
 					arguments: { input: rawInput },
 					customWireName: item.name,
 				};
-				const contentIndex = block ? contentIndexOf(block) : output.content.length - 1;
-				closeOpenItem(event.output_index, item.id, entry);
+				let contentIndex: number;
+				if (block) {
+					// Persist the final input on the stored block and drop the transient
+					// accumulation buffer, mirroring the function_call branch above.
+					block.arguments = { input: rawInput };
+					delete (block as { partialJson?: string }).partialJson;
+					delete (block as { lastParseLen?: number }).lastParseLen;
+					contentIndex = contentIndexOf(block);
+				} else {
+					output.content.push(toolCall);
+					contentIndex = output.content.length - 1;
+				}
+				closeOpenItem(event.output_index, item.id, entry, item.call_id);
 				stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
 			}
-		} else if (event.type === "response.completed") {
+		} else if (event.type === "response.completed" || event.type === "response.incomplete") {
 			const response = event.response;
+			// Finalize any toolCall block whose output_item.done never arrived: the
+			// throttled delta parser may have left block.arguments stale, and the
+			// toolUse override below would hand the agent incomplete arguments.
+			for (const open of openItemsInOrder) {
+				if (open.block.type !== "toolCall") continue;
+				const block = open.block;
+				if (block.partialJson && !block.argumentsDone) {
+					block.arguments =
+						open.item.type === "custom_tool_call"
+							? { input: block.partialJson }
+							: parseStreamingJson(block.partialJson);
+				}
+				delete (block as { partialJson?: string }).partialJson;
+				delete (block as { lastParseLen?: number }).lastParseLen;
+				delete (block as { argumentsDone?: boolean }).argumentsDone;
+			}
 			if (response?.id) {
 				output.responseId = response.id;
 			}
@@ -752,12 +906,40 @@ export async function processResponsesStream<TApi extends Api>(
 							: "Unknown error (no error details in response)";
 				throw new Error(message);
 			}
+			if (response?.status === "incomplete" && response.incomplete_details?.reason === "content_filter") {
+				// A content-filtered turn is a failure, not a token-cap truncation —
+				// mapping it to "length" would route the agent loop into "shorten your
+				// output" recovery against a filtered prompt.
+				throw new Error("incomplete: content_filter");
+			}
 			if (output.content.some(block => block.type === "toolCall") && output.stopReason === "stop") {
 				output.stopReason = "toolUse";
 			}
+			// Codex-lineage backends/gateways mark an unfinished turn with
+			// `end_turn: false` on the terminal event (the response ended on
+			// commentary only). Not in the SDK types or the platform API today —
+			// inert when absent. Same mapping as openai-codex-responses: surface a
+			// non-terminal stop so the agent loop re-samples instead of ending the
+			// turn.
+			if ((response as { end_turn?: boolean } | undefined)?.end_turn === false && output.stopReason === "stop") {
+				output.stopDetails = { type: "pause_turn" };
+			}
+			options?.onCompleted?.();
+			// `response.completed`/`response.incomplete` is the last event of a
+			// Responses stream. Stop pulling instead of waiting for the server to
+			// close the connection: misbehaving providers keep the socket open
+			// after the terminal event, which would park this loop until the idle
+			// watchdog converts an already-successful turn into a timeout error.
+			// Breaking unwinds the iterator chain (the consumer's `.return()`
+			// reaches the SDK stream), actively releasing the connection.
+			break;
 		} else if (event.type === "error") {
-			throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
+			const err = (event as any).error ?? event;
+			const code = err.code ?? "unknown";
+			const message = err.message ?? "no message";
+			throw new Error(`Error Code ${code}: ${message}`);
 		} else if (event.type === "response.failed") {
+			populateResponsesUsageFromResponse(output, event.response?.usage);
 			const error = event.response?.error ?? (event.response as any)?.status_details?.error;
 			const details = event.response?.incomplete_details;
 			const message = error
@@ -770,7 +952,7 @@ export async function processResponsesStream<TApi extends Api>(
 	}
 }
-export function mapOpenAIResponsesStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
+export function mapOpenAIResponsesStopReason(status: ResponseStatus | undefined): StopReason {
 	if (!status) return "stop";
 	switch (status) {
 		case "completed":
@@ -784,8 +966,12 @@ export function mapOpenAIResponsesStopReason(status: OpenAI.Responses.ResponseSt
 		case "queued":
 			return "stop";
 		default: {
+			// Compile-time exhaustiveness; at runtime a brand-new status from the
+			// server must degrade gracefully instead of failing a fully-streamed
+			// response.
 			const exhaustive: never = status;
-			throw new Error(`Unhandled stop reason: ${exhaustive}`);
+			logger.warn("Unhandled OpenAI Responses stop reason", { status: exhaustive });
+			return "stop";
 		}
 	}
 }
@@ -820,7 +1006,7 @@ export type ResponsesSamplingParamsExtras = {
 	repetition_penalty?: number;
 };
-type CommonResponsesParams = OpenAI.Responses.ResponseCreateParamsStreaming & ResponsesSamplingParamsExtras;
+type CommonResponsesParams = ResponseCreateParamsStreaming & ResponsesSamplingParamsExtras;
 type CommonSamplingOptions = Pick<
 	StreamOptions,
@@ -839,9 +1025,15 @@ type CommonSamplingOptions = Pick<
 export function applyCommonResponsesSamplingParams<P extends CommonResponsesParams>(
 	params: P,
 	options: CommonSamplingOptions | undefined,
-	model: Pick<Model, "provider" | "omitMaxOutputTokens">,
+	model: Pick<Model, "provider" | "omitMaxOutputTokens" | "maxTokens">,
 ): void {
-	if (options?.maxTokens && !model.omitMaxOutputTokens) params.max_output_tokens = options.maxTokens;
+	if (options?.maxTokens && !model.omitMaxOutputTokens) {
+		params.max_output_tokens = Math.min(
+			options.maxTokens,
+			model.maxTokens ?? Number.POSITIVE_INFINITY,
+			OPENAI_MAX_OUTPUT_TOKENS,
+		);
+	}
 	if (options?.temperature !== undefined) params.temperature = options.temperature;
 	if (options?.topP !== undefined) params.top_p = options.topP;
 	if (options?.topK !== undefined) params.top_k = options.topK;
@@ -863,8 +1055,12 @@ type ReasoningOptions = {
 /**
  * Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
- * set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
- * Mutates `params` and may push a developer message into `messages`.
+ * set effort/summary when requested, and otherwise inject the "Juice: 0" no-reasoning hack
+ * when `model.compat.requiresJuiceZeroHack` is set (GPT-5 family by default).
+ * Mutates `params` and may push a developer message into `messages`. Returns
+ * the number of per-turn trailing scaffolding items appended to `messages`
+ * (the "Juice: 0" developer item), so callers doing stateful
+ * `previous_response_id` chaining can exclude them from append-baseline math.
  *
  * @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
  *   body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
@@ -875,21 +1071,23 @@ type ReasoningOptions = {
  *   without needing explicit activation. Callers that pass `options.reasoning` for such models
  *   should expect this documented downgrade: the model will reason, but at its default effort.
  */
-export function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(
+export function applyResponsesReasoningParams<P extends ResponseCreateParamsStreaming>(
 	params: P,
-	model: Model<Api>,
+	model: Model<"openai-responses" | "azure-openai-responses" | "openai-codex-responses">,
 	options: ReasoningOptions | undefined,
 	messages: ResponseInput,
 	mapEffort?: (effort: string) => string,
 	includeEncryptedReasoning: boolean = true,
 	omitReasoningEffort: boolean = false,
-): void {
-	if (!model.reasoning) return;
+): number {
+	if (!model.reasoning) return 0;
 	// Always request encrypted reasoning content so reasoning items can be replayed in
 	// multi-turn conversations when store is false (items aren't persisted server-side, so
-	// we must include the full content). See: https://github.com/uttamtrivedi/Prometheus/issues/41
+	// we must include the full content). See: https://github.com/uttamtrivedi/prometheus/issues/41
 	if (includeEncryptedReasoning) {
-		params.include = ["reasoning.encrypted_content"];
+		const include = params.include ?? [];
+		if (!include.includes("reasoning.encrypted_content")) include.push("reasoning.encrypted_content");
+		params.include = include;
 	}
 	if (options?.reasoning || options?.reasoningSummary !== undefined) {
@@ -904,12 +1102,12 @@ export function applyResponsesReasoningParams<P extends OpenAI.Responses.Respons
 			// When only options.reasoning (effort level) is set, params.reasoning
 			// is intentionally omitted — see @param omitReasoningEffort above.
 			if (options?.reasoningSummary !== undefined && options?.reasoningSummary !== null) {
-				type ReasoningParam = NonNullable<OpenAI.Responses.ResponseCreateParamsStreaming["reasoning"]>;
+				type ReasoningParam = NonNullable<ResponseCreateParamsStreaming["reasoning"]>;
 				params.reasoning = { summary: options.reasoningSummary || "auto" } as P["reasoning"] & ReasoningParam;
 			}
 		} else {
 			const requested = options?.reasoning || "medium";
-			type ReasoningParam = NonNullable<OpenAI.Responses.ResponseCreateParamsStreaming["reasoning"]>;
+			type ReasoningParam = NonNullable<ResponseCreateParamsStreaming["reasoning"]>;
 			const reasoningParams: ReasoningParam = {
 				effort: (mapEffort ? mapEffort(requested) : requested) as ReasoningParam["effort"],
 			};
@@ -918,13 +1116,15 @@ export function applyResponsesReasoningParams<P extends OpenAI.Responses.Respons
 			}
 			params.reasoning = reasoningParams as P["reasoning"];
 		}
-	} else if (model.name.toLowerCase().startsWith("gpt-5")) {
+	} else if (model.compat.requiresJuiceZeroHack) {
 		// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
 		messages.push({
 			role: "developer",
 			content: [{ type: "input_text", text: "# Juice: 0 !important" }],
 		});
+		return 1;
 	}
+	return 0;
 }
 /** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
@@ -944,6 +1144,10 @@ export function populateResponsesUsageFromResponse(
 	if (!usage) return;
 	const cachedTokens = usage.input_tokens_details?.cached_tokens || 0;
 	const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0;
+	// Wholesale replacement must not drop provider-annotated extras (Copilot
+	// premium-request accounting): the failed/cancelled paths throw right after
+	// this call with no later chance to re-apply.
+	const premiumRequests = output.usage.premiumRequests;
 	output.usage = {
 		input: (usage.input_tokens || 0) - cachedTokens,
 		output: usage.output_tokens || 0,
@@ -953,4 +1157,38 @@ export function populateResponsesUsageFromResponse(
 		...(reasoningTokens > 0 ? { reasoningTokens } : {}),
 		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 	};
+	if (premiumRequests !== undefined) {
+		output.usage.premiumRequests = premiumRequests;
+	}
+}
+/**
+ * Strict-prefix delta for stateful `previous_response_id` chaining (used by the
+ * platform Responses provider and the Codex provider on both transports):
+ * returns the input items the current request appends beyond the previous
+ * request's input plus the previous response's output items, or null when the
+ * request options differ or history mutated (the chain must break). Per-turn
+ * `client_metadata` (e.g. rotating turn ids) is excluded from the option
+ * comparison; codex-rs excludes it from the same check.
+ */
+export function buildResponsesDeltaInput<TItem>(
+	previous: { input?: unknown } | undefined,
+	previousResponseItems: readonly TItem[] | undefined,
+	current: { input?: unknown },
+): TItem[] | null {
+	if (!previous) return null;
+	if (!Array.isArray(previous.input) || !Array.isArray(current.input)) return null;
+	const previousWithoutInput = { ...previous, input: undefined, client_metadata: undefined };
+	const currentWithoutInput = { ...current, input: undefined, client_metadata: undefined };
+	if (JSON.stringify(previousWithoutInput) !== JSON.stringify(currentWithoutInput)) {
+		return null;
+	}
+	const baseline = [...previous.input, ...(previousResponseItems ?? [])];
+	if (current.input.length <= baseline.length) return null;
+	for (let index = 0; index < baseline.length; index += 1) {
+		if (JSON.stringify(baseline[index]) !== JSON.stringify(current.input[index])) {
+			return null;
+		}
+	}
+	return current.input.slice(baseline.length) as TItem[];
 }