npm - @prometheus-ai/ai - Versions diffs - 0.5.3 → 0.5.8 - Mend

@prometheus-ai/ai 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (377) hide show

package/dist/types/auth-broker/remote-store.d.ts +2 -1
package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
package/dist/types/auth-gateway/server.d.ts +19 -0
package/dist/types/auth-gateway/types.d.ts +9 -3
package/dist/types/auth-retry.d.ts +119 -0
package/dist/types/auth-storage.d.ts +217 -8
package/dist/types/errors.d.ts +24 -0
package/dist/types/index.d.ts +5 -9
package/dist/types/provider-details.d.ts +1 -1
package/dist/types/providers/amazon-bedrock.d.ts +12 -6
package/dist/types/providers/anthropic-client.d.ts +10 -3
package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
package/dist/types/providers/anthropic-wire.d.ts +3 -3
package/dist/types/providers/anthropic.d.ts +41 -34
package/dist/types/providers/aws-credentials.d.ts +8 -0
package/dist/types/providers/azure-openai-responses.d.ts +1 -0
package/dist/types/providers/google-gemini-cli.d.ts +22 -1
package/dist/types/providers/google-shared.d.ts +22 -0
package/dist/types/providers/google-types.d.ts +13 -1
package/dist/types/providers/mock.d.ts +8 -3
package/dist/types/providers/ollama.d.ts +6 -0
package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
package/dist/types/providers/openai-chat-server.d.ts +3 -3
package/dist/types/providers/openai-chat-wire.d.ts +644 -0
package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
package/dist/types/providers/openai-codex-responses.d.ts +31 -2
package/dist/types/providers/openai-completions-compat.d.ts +2 -25
package/dist/types/providers/openai-completions.d.ts +2 -10
package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
package/dist/types/providers/openai-responses-server.d.ts +2 -2
package/dist/types/providers/openai-responses-shared.d.ts +49 -9
package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
package/dist/types/providers/openai-responses.d.ts +13 -4
package/dist/types/providers/prometheus-native-client.d.ts +9 -0
package/dist/types/providers/prometheus-native-server.d.ts +4 -3
package/dist/types/providers/transform-messages.d.ts +1 -2
package/dist/types/rate-limit-utils.d.ts +3 -2
package/dist/types/registry/aimlapi.d.ts +4 -0
package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
package/dist/types/registry/amazon-bedrock.d.ts +5 -0
package/dist/types/registry/anthropic.d.ts +10 -0
package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
package/dist/types/registry/cerebras.d.ts +7 -0
package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
package/dist/types/registry/cursor.d.ts +7 -0
package/dist/types/registry/deepseek.d.ts +8 -0
package/dist/types/registry/derived.d.ts +5 -0
package/dist/types/registry/firepass.d.ts +16 -0
package/dist/types/registry/fireworks.d.ts +7 -0
package/dist/types/registry/github-copilot.d.ts +7 -0
package/dist/types/registry/gitlab-duo.d.ts +9 -0
package/dist/types/registry/google-antigravity.d.ts +9 -0
package/dist/types/registry/google-gemini-cli.d.ts +9 -0
package/dist/types/registry/google-vertex.d.ts +5 -0
package/dist/types/registry/google.d.ts +4 -0
package/dist/types/registry/groq.d.ts +4 -0
package/dist/types/registry/huggingface.d.ts +7 -0
package/dist/types/registry/index.d.ts +4 -0
package/dist/types/registry/kagi.d.ts +14 -0
package/dist/types/registry/kilo.d.ts +7 -0
package/dist/types/registry/kimi-code.d.ts +7 -0
package/dist/types/registry/litellm.d.ts +13 -0
package/dist/types/registry/lm-studio.d.ts +8 -0
package/dist/types/registry/minimax-code-cn.d.ts +6 -0
package/dist/types/registry/minimax-code.d.ts +6 -0
package/dist/types/registry/minimax.d.ts +4 -0
package/dist/types/registry/mistral.d.ts +4 -0
package/dist/types/registry/moonshot.d.ts +7 -0
package/dist/types/registry/nanogpt.d.ts +7 -0
package/dist/types/registry/nvidia.d.ts +7 -0
package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
package/dist/types/registry/ollama-cloud.d.ts +7 -0
package/dist/types/registry/ollama.d.ts +12 -0
package/dist/types/registry/openai-codex-device.d.ts +8 -0
package/dist/types/registry/openai-codex.d.ts +9 -0
package/dist/types/registry/openai.d.ts +4 -0
package/dist/types/registry/opencode-go.d.ts +6 -0
package/dist/types/registry/opencode-zen.d.ts +6 -0
package/dist/types/registry/openrouter.d.ts +13 -0
package/dist/types/registry/parallel.d.ts +14 -0
package/dist/types/registry/perplexity.d.ts +7 -0
package/dist/types/registry/qianfan.d.ts +7 -0
package/dist/types/registry/qwen-portal.d.ts +7 -0
package/dist/types/registry/registry.d.ts +272 -0
package/dist/types/registry/synthetic.d.ts +6 -0
package/dist/types/registry/tavily.d.ts +14 -0
package/dist/types/registry/together.d.ts +6 -0
package/dist/types/registry/types.d.ts +51 -0
package/dist/types/registry/venice.d.ts +13 -0
package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
package/dist/types/registry/vllm.d.ts +7 -0
package/dist/types/registry/wafer-pass.d.ts +6 -0
package/dist/types/registry/wafer-serverless.d.ts +6 -0
package/dist/types/registry/xai-oauth.d.ts +7 -0
package/dist/types/registry/xai.d.ts +4 -0
package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
package/dist/types/registry/xiaomi.d.ts +6 -0
package/dist/types/registry/zai.d.ts +7 -0
package/dist/types/registry/zenmux.d.ts +7 -0
package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
package/dist/types/stream.d.ts +9 -1
package/dist/types/types.d.ts +56 -295
package/dist/types/usage/google-antigravity.d.ts +15 -1
package/dist/types/usage/openai-codex-reset.d.ts +79 -0
package/dist/types/usage/openai-codex.d.ts +1 -0
package/dist/types/usage.d.ts +77 -4
package/dist/types/utils/abort.d.ts +6 -0
package/dist/types/utils/event-stream.d.ts +2 -0
package/dist/types/utils/http-inspector.d.ts +0 -1
package/dist/types/utils/idle-iterator.d.ts +35 -0
package/dist/types/utils/openai-http.d.ts +58 -0
package/dist/types/utils/request-debug.d.ts +3 -0
package/dist/types/utils/retry-after.d.ts +1 -0
package/dist/types/utils/schema/fields.d.ts +5 -0
package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
package/dist/types/utils/schema/stamps.d.ts +7 -15
package/dist/types/utils/sse-debug.d.ts +0 -5
package/dist/types/utils/stream-markup-healing.d.ts +2 -0
package/dist/types/utils.d.ts +1 -5
package/package.json +17 -29
package/src/auth-broker/remote-store.ts +10 -1
package/src/auth-broker/snapshot-cache.ts +1 -1
package/src/auth-broker/wire-schemas.ts +1 -1
package/src/auth-gateway/http.ts +1 -1
package/src/auth-gateway/server.ts +95 -30
package/src/auth-gateway/types.ts +10 -2
package/src/auth-retry.ts +238 -0
package/src/auth-storage.ts +935 -430
package/src/errors.ts +32 -0
package/src/index.ts +9 -14
package/src/provider-details.ts +1 -1
package/src/providers/__tests__/google-auth.test.ts +144 -0
package/src/providers/amazon-bedrock.ts +70 -40
package/src/providers/anthropic-client.ts +15 -13
package/src/providers/anthropic-messages-server-schema.ts +17 -7
package/src/providers/anthropic-messages-server.ts +88 -20
package/src/providers/anthropic-wire.ts +4 -3
package/src/providers/anthropic.ts +1234 -621
package/src/providers/aws-credentials.ts +47 -5
package/src/providers/aws-eventstream.ts +5 -0
package/src/providers/azure-openai-responses.ts +117 -67
package/src/providers/cursor.ts +30 -30
package/src/providers/github-copilot-headers.ts +1 -1
package/src/providers/gitlab-duo.ts +36 -29
package/src/providers/google-auth.ts +71 -8
package/src/providers/google-gemini-cli.ts +118 -22
package/src/providers/google-shared.ts +163 -43
package/src/providers/google-types.ts +10 -1
package/src/providers/kimi.ts +1 -1
package/src/providers/mock.ts +11 -3
package/src/providers/ollama.ts +64 -7
package/src/providers/openai-anthropic-shim.ts +17 -8
package/src/providers/openai-chat-server-schema.ts +9 -3
package/src/providers/openai-chat-server.ts +82 -16
package/src/providers/openai-chat-wire.ts +847 -0
package/src/providers/openai-codex/request-transformer.ts +129 -34
package/src/providers/openai-codex/response-handler.ts +22 -1
package/src/providers/openai-codex-responses.ts +699 -247
package/src/providers/openai-completions-compat.ts +8 -308
package/src/providers/openai-completions.ts +416 -267
package/src/providers/openai-responses-server-schema.ts +15 -9
package/src/providers/openai-responses-server.ts +162 -114
package/src/providers/openai-responses-shared.ts +320 -82
package/src/providers/openai-responses-wire.ts +6391 -0
package/src/providers/openai-responses.ts +382 -176
package/src/providers/prometheus-native-client.ts +27 -11
package/src/providers/prometheus-native-server.ts +44 -17
package/src/providers/transform-messages.ts +311 -120
package/src/providers/vision-guard.ts +5 -3
package/src/rate-limit-utils.ts +13 -3
package/src/registry/aimlapi.ts +6 -0
package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
package/src/registry/amazon-bedrock.ts +22 -0
package/src/registry/anthropic.ts +26 -0
package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
package/src/{utils/oauth → registry}/cerebras.ts +8 -1
package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
package/src/registry/cursor.ts +20 -0
package/src/{utils/oauth → registry}/deepseek.ts +9 -17
package/src/registry/derived.ts +9 -0
package/src/{utils/oauth → registry}/firepass.ts +10 -2
package/src/{utils/oauth → registry}/fireworks.ts +8 -1
package/src/registry/github-copilot.ts +22 -0
package/src/registry/gitlab-duo.ts +19 -0
package/src/registry/google-antigravity.ts +21 -0
package/src/registry/google-gemini-cli.ts +21 -0
package/src/registry/google-vertex.ts +38 -0
package/src/registry/google.ts +6 -0
package/src/registry/groq.ts +6 -0
package/src/{utils/oauth → registry}/huggingface.ts +8 -19
package/src/registry/index.ts +4 -0
package/src/{utils/oauth → registry}/kagi.ts +9 -11
package/src/{utils/oauth → registry}/kilo.ts +11 -6
package/src/registry/kimi-code.ts +17 -0
package/src/{utils/oauth → registry}/litellm.ts +8 -12
package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
package/src/registry/minimax-code-cn.ts +12 -0
package/src/registry/minimax-code.ts +12 -0
package/src/registry/minimax.ts +6 -0
package/src/registry/mistral.ts +6 -0
package/src/{utils/oauth → registry}/moonshot.ts +8 -9
package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
package/src/{utils/oauth → registry}/nvidia.ts +8 -18
package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
package/src/{utils → registry}/oauth/anthropic.ts +38 -17
package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
package/src/registry/oauth/gitlab-duo.ts +198 -0
package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
package/src/registry/oauth/index.ts +164 -0
package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
package/src/{utils → registry}/oauth/types.ts +7 -51
package/src/{utils → registry}/oauth/wafer.ts +1 -1
package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
package/src/{utils/oauth → registry}/ollama.ts +8 -13
package/src/registry/openai-codex-device.ts +18 -0
package/src/registry/openai-codex.ts +19 -0
package/src/registry/openai.ts +6 -0
package/src/registry/opencode-go.ts +12 -0
package/src/registry/opencode-zen.ts +12 -0
package/src/{utils/oauth → registry}/openrouter.ts +10 -2
package/src/{utils/oauth → registry}/parallel.ts +9 -11
package/src/registry/perplexity.ts +13 -0
package/src/{utils/oauth → registry}/qianfan.ts +8 -17
package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
package/src/registry/registry.ts +149 -0
package/src/{utils/oauth → registry}/synthetic.ts +7 -1
package/src/{utils/oauth → registry}/tavily.ts +10 -12
package/src/{utils/oauth → registry}/together.ts +7 -1
package/src/registry/types.ts +56 -0
package/src/{utils/oauth → registry}/venice.ts +8 -12
package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
package/src/{utils/oauth → registry}/vllm.ts +9 -16
package/src/registry/wafer-pass.ts +12 -0
package/src/registry/wafer-serverless.ts +12 -0
package/src/registry/xai-oauth.ts +17 -0
package/src/registry/xai.ts +6 -0
package/src/registry/xiaomi-token-plan-ams.ts +12 -0
package/src/registry/xiaomi-token-plan-cn.ts +12 -0
package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
package/src/registry/xiaomi.ts +12 -0
package/src/{utils/oauth → registry}/zai.ts +10 -22
package/src/{utils/oauth → registry}/zenmux.ts +8 -1
package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
package/src/stream.ts +229 -199
package/src/types.ts +63 -384
package/src/usage/claude.ts +4 -2
package/src/usage/github-copilot.ts +4 -2
package/src/usage/google-antigravity.ts +196 -28
package/src/usage/kimi.ts +1 -1
package/src/usage/minimax-code.ts +5 -6
package/src/usage/openai-codex-reset.ts +174 -0
package/src/usage/openai-codex.ts +19 -2
package/src/usage/zai.ts +2 -1
package/src/usage.ts +93 -4
package/src/utils/abort.ts +14 -0
package/src/utils/event-stream.ts +17 -0
package/src/utils/http-inspector.ts +4 -12
package/src/utils/idle-iterator.ts +250 -79
package/src/utils/openai-http.ts +157 -0
package/src/utils/request-debug.ts +67 -19
package/src/utils/retry-after.ts +1 -1
package/src/utils/retry.ts +23 -2
package/src/utils/schema/CONSTRAINTS.md +4 -2
package/src/utils/schema/fields.ts +16 -0
package/src/utils/schema/json-schema-validator.ts +19 -1
package/src/utils/schema/normalize.ts +80 -8
package/src/utils/schema/stamps.ts +22 -10
package/src/utils/schema/wire.ts +2 -2
package/src/utils/sse-debug.ts +0 -271
package/src/utils/stream-markup-healing.ts +50 -8
package/src/utils/validation.ts +49 -13
package/src/utils.ts +2 -26
package/dist/types/model-cache.d.ts +0 -17
package/dist/types/model-manager.d.ts +0 -64
package/dist/types/model-thinking.d.ts +0 -100
package/dist/types/models.d.ts +0 -12
package/dist/types/provider-models/bundled-references.d.ts +0 -4
package/dist/types/provider-models/descriptors.d.ts +0 -50
package/dist/types/provider-models/google.d.ts +0 -24
package/dist/types/provider-models/index.d.ts +0 -5
package/dist/types/provider-models/ollama.d.ts +0 -7
package/dist/types/provider-models/openai-compat.d.ts +0 -323
package/dist/types/provider-models/special.d.ts +0 -16
package/dist/types/utils/discovery/antigravity.d.ts +0 -61
package/dist/types/utils/discovery/codex.d.ts +0 -38
package/dist/types/utils/discovery/cursor.d.ts +0 -23
package/dist/types/utils/discovery/gemini.d.ts +0 -25
package/dist/types/utils/discovery/index.d.ts +0 -4
package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
package/dist/types/utils/oauth/cerebras.d.ts +0 -1
package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/deepseek.d.ts +0 -10
package/dist/types/utils/oauth/firepass.d.ts +0 -1
package/dist/types/utils/oauth/fireworks.d.ts +0 -1
package/dist/types/utils/oauth/huggingface.d.ts +0 -19
package/dist/types/utils/oauth/kagi.d.ts +0 -17
package/dist/types/utils/oauth/kilo.d.ts +0 -5
package/dist/types/utils/oauth/litellm.d.ts +0 -18
package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
package/dist/types/utils/oauth/moonshot.d.ts +0 -1
package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
package/dist/types/utils/oauth/nvidia.d.ts +0 -18
package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
package/dist/types/utils/oauth/ollama.d.ts +0 -18
package/dist/types/utils/oauth/openrouter.d.ts +0 -1
package/dist/types/utils/oauth/parallel.d.ts +0 -17
package/dist/types/utils/oauth/qianfan.d.ts +0 -17
package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
package/dist/types/utils/oauth/synthetic.d.ts +0 -1
package/dist/types/utils/oauth/tavily.d.ts +0 -17
package/dist/types/utils/oauth/together.d.ts +0 -1
package/dist/types/utils/oauth/venice.d.ts +0 -18
package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
package/dist/types/utils/oauth/vllm.d.ts +0 -16
package/dist/types/utils/oauth/zai.d.ts +0 -18
package/dist/types/utils/oauth/zenmux.d.ts +0 -1
package/dist/types/utils/oauth/zhipu.d.ts +0 -18
package/src/model-cache.ts +0 -129
package/src/model-manager.ts +0 -469
package/src/model-thinking.ts +0 -756
package/src/models.json +0 -60287
package/src/models.json.d.ts +0 -9
package/src/models.ts +0 -56
package/src/provider-models/bundled-references.ts +0 -38
package/src/provider-models/descriptors.ts +0 -364
package/src/provider-models/google.ts +0 -88
package/src/provider-models/index.ts +0 -5
package/src/provider-models/ollama.ts +0 -153
package/src/provider-models/openai-compat.ts +0 -2904
package/src/provider-models/special.ts +0 -67
package/src/utils/discovery/antigravity.ts +0 -261
package/src/utils/discovery/codex.ts +0 -371
package/src/utils/discovery/cursor.ts +0 -306
package/src/utils/discovery/gemini.ts +0 -248
package/src/utils/discovery/index.ts +0 -4
package/src/utils/discovery/openai-compatible.ts +0 -224
package/src/utils/oauth/gitlab-duo.ts +0 -123
package/src/utils/oauth/index.ts +0 -502
/package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
/package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
/package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
/package/src/{utils → registry}/oauth/callback-server.ts +0 -0
/package/src/{utils → registry}/oauth/cursor.ts +0 -0
/package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
/package/src/{utils → registry}/oauth/kimi.ts +0 -0
/package/src/{utils → registry}/oauth/oauth.html +0 -0
/package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
/package/src/{utils → registry}/oauth/opencode.ts +0 -0
/package/src/{utils → registry}/oauth/perplexity.ts +0 -0
/package/src/{utils → registry}/oauth/pkce.ts +0 -0

package/src/providers/prometheus-native-client.ts CHANGED Viewed

@@ -11,11 +11,12 @@
  *
  * Activated when a {@link Model} has `transport: "prometheus-native"` set; the
  * dispatch hook lives in `streamSimple()` (see `../stream.ts`). Used by
- * containerized prometheus deployments (roboprometheus slots, the swarm extension) that
+ * containerized prometheus deployments (isolated slots, the swarm extension) that
  * route every LLM call through a credential-holding sidecar so the slot
  * itself stays credential-free.
  */
 import { readSseJson } from "@prometheus-ai/utils";
+import { ProviderHttpError } from "../errors";
 import type {
 	Api,
 	AssistantMessage,
@@ -58,7 +59,19 @@ function buildWireOptions(options: SimpleStreamOptions | undefined): Record<stri
 	return wire;
 }
-async function decodeGatewayError(response: Response): Promise<Error> {
+/**
+ * Non-2xx response from the auth-gateway `/v1/prometheus/stream` endpoint. `code`
+ * carries the gateway's error-type token (`authentication_error`,
+ * `rate_limit_error`, `upstream_error`, ...).
+ */
+export class AuthGatewayError extends ProviderHttpError {
+	constructor(message: string, status: number, headers?: Headers, code?: string) {
+		super(message, status, { headers, code });
+		this.name = "AuthGatewayError";
+	}
+}
+async function decodeGatewayError(response: Response): Promise<AuthGatewayError> {
 	const status = response.status;
 	let body: unknown;
 	try {
@@ -71,16 +84,16 @@ async function decodeGatewayError(response: Response): Promise<Error> {
 		if (typeof err === "object" && err !== null) {
 			const message = (err as { message?: unknown }).message;
 			const type = (err as { type?: unknown }).type;
-			const out = new Error(typeof message === "string" ? message : `auth-gateway ${status}`);
-			(out as { status?: number; type?: string }).status = status;
-			if (typeof type === "string") (out as { type?: string }).type = type;
-			return out;
+			return new AuthGatewayError(
+				typeof message === "string" ? message : `auth-gateway ${status}`,
+				status,
+				response.headers,
+				typeof type === "string" ? type : undefined,
+			);
 		}
 	}
 	const text = typeof body === "string" ? body : JSON.stringify(body);
-	const err = new Error(`auth-gateway ${status}: ${text || response.statusText}`);
-	(err as { status?: number }).status = status;
-	return err;
+	return new AuthGatewayError(`auth-gateway ${status}: ${text || response.statusText}`, status, response.headers);
 }
 /**
@@ -149,9 +162,12 @@ export function streamPrometheusNative<TApi extends Api>(
 		try {
 			const url = resolveStreamUrl(model as Model<Api>);
 			const fetchImpl = options?.fetch ?? globalThis.fetch;
-			const headers = buildHeaders(model as Model<Api>, options?.apiKey);
+			const headers = buildHeaders(
+				model as Model<Api>,
+				typeof options?.apiKey === "string" ? options.apiKey : undefined,
+			);
 			const body = JSON.stringify({
-				modelId: model.id,
+				modelId: `${model.provider}/${model.id}`,
 				context,
 				options: buildWireOptions(options),
 				stream: true,

package/src/providers/prometheus-native-server.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  * Where the OpenAI / Anthropic / Responses route modules translate foreign
  * wire shapes through Prometheus AI's canonical {@link Context}, this module accepts
  * the canonical shape *directly* — for clients that already speak Prometheus AI
- * (containerized prometheus, the swarm extension, roboprometheus's sidecar auth-gateway).
+ * (containerized prometheus, the swarm extension, sidecar auth-gateways).
  * Skipping the wire-format → Context → wire-format round-trip cuts
  * per-request CPU but, more importantly, avoids the quantization that those
  * translations impose on first-class Prometheus AI fields (service tier, cache
@@ -25,6 +25,7 @@
  *   200 JSON (stream=false): { message: AssistantMessage }
  *   4xx/5xx: { error: { type, message } }
  */
+import type { AuthGatewayStreamControl } from "../auth-gateway/types";
 import type { AssistantMessageEventStream, Context, SimpleStreamOptions } from "../types";
 export interface PrometheusNativeParsedRequest {
@@ -156,36 +157,62 @@ const SSE_DONE = SSE_ENCODER.encode("data: [DONE]\n\n");
  * canonical event type IS the wire type. Including the rolling
  * `partial: AssistantMessage` on every delta is quadratic in turn length
  * on the wire, but for the loopback / sidecar topology this transport
- * targets (containerized prometheus → host gateway, roboprometheus slot → prometheus-auth-gateway
+ * targets (containerized prometheus → host gateway, isolated slot → prometheus-auth-gateway
  * sidecar) the bandwidth cost is negligible compared to provider latency —
  * and the client gets to feed the events straight into its existing
  * `AssistantMessageEventStream.push()` plumbing with zero translation.
  */
-export function encodeStream(events: AssistantMessageEventStream): ReadableStream<Uint8Array> {
+export function encodeStream(
+	events: AssistantMessageEventStream,
+	_requestedModelId?: string,
+	_options?: SimpleStreamOptions,
+	control?: AuthGatewayStreamControl,
+): ReadableStream<Uint8Array> {
+	let cancelled = control?.signal?.aborted === true;
+	const markCancelled = () => {
+		cancelled = true;
+	};
+	control?.signal?.addEventListener("abort", markCancelled, { once: true });
 	return new ReadableStream<Uint8Array>({
 		async start(controller) {
 			try {
+				if (cancelled) {
+					controller.close();
+					return;
+				}
 				for await (const event of events) {
+					if (cancelled) return;
 					controller.enqueue(SSE_ENCODER.encode(`data: ${JSON.stringify(event)}\n\n`));
 					if (event.type === "done" || event.type === "error") break;
 				}
-				controller.enqueue(SSE_DONE);
-				controller.close();
+				if (!cancelled) {
+					controller.enqueue(SSE_DONE);
+					controller.close();
+				}
 			} catch (err) {
-				// Best-effort error envelope so the client iterator resolves
-				// instead of hanging on the dropped connection. Shape matches the
-				// canonical `error` event minus the unrecoverable `error:
-				// AssistantMessage` payload (we don't have a usable one here).
-				const message = err instanceof Error ? err.message : String(err);
-				controller.enqueue(
-					SSE_ENCODER.encode(
-						`data: ${JSON.stringify({ type: "error", reason: "error", errorMessage: message })}\n\n`,
-					),
-				);
-				controller.enqueue(SSE_DONE);
-				controller.close();
+				if (!cancelled) {
+					// Best-effort error envelope so the client iterator resolves
+					// instead of hanging on the dropped connection. Shape matches the
+					// canonical `error` event minus the unrecoverable `error:
+					// AssistantMessage` payload (we don't have a usable one here).
+					const message = err instanceof Error ? err.message : String(err);
+					controller.enqueue(
+						SSE_ENCODER.encode(
+							`data: ${JSON.stringify({ type: "error", reason: "error", errorMessage: message })}\n\n`,
+						),
+					);
+					controller.enqueue(SSE_DONE);
+					controller.close();
+				}
+			} finally {
+				control?.signal?.removeEventListener("abort", markCancelled);
 			}
 		},
+		cancel(reason) {
+			cancelled = true;
+			control?.signal?.removeEventListener("abort", markCancelled);
+			control?.onCancel?.(reason);
+		},
 	});
 }

package/src/providers/transform-messages.ts CHANGED Viewed

@@ -1,14 +1,4 @@
-import turnAbortedGuidance from "../prompts/turn-aborted-guidance.md" with { type: "text" };
-import type {
-	Api,
-	AssistantMessage,
-	DeveloperMessage,
-	Message,
-	Model,
-	ToolCall,
-	ToolResultMessage,
-	UserMessage,
-} from "../types";
+import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage, UserMessage } from "../types";
 const enum ToolCallStatus {
 	/** A tool result has already been emitted for this tool call; later duplicates must be skipped. */
@@ -17,6 +7,123 @@ const enum ToolCallStatus {
 	Aborted = 2,
 }
+/**
+ * Maximum tool-call id length the strictest replay provider accepts.
+ *
+ * Anthropic requires `^[a-zA-Z0-9_-]+$` with a 64-char cap; Google and Codex
+ * `normalizeToolCallId` implementations cap individual id segments to the same
+ * 64-char ceiling. Replacement ids minted here flow back through
+ * `convertAnthropicMessages` (and friends) unchanged, so the `_dupN` suffix
+ * MUST not push a normalized id past this bound.
+ */
+const MAX_TOOL_CALL_ID_LENGTH = 64;
+function appendDuplicateSuffix(originalId: string, suffix: string, maxLength: number): string {
+	// Responses-family ids are composites (`callId|itemId`): the wire call_id is
+	// the FIRST segment (normalizeResponsesToolCallId splits on `|`), so the
+	// suffix must land on every segment or the duplicate collapses back onto the
+	// original call_id at encode time. The length budget applies per segment,
+	// matching the per-segment caps of the provider normalizers.
+	if (originalId.includes("|")) {
+		return originalId
+			.split("|")
+			.map(segment => appendSegmentDuplicateSuffix(segment, suffix, maxLength))
+			.join("|");
+	}
+	return appendSegmentDuplicateSuffix(originalId, suffix, maxLength);
+}
+function appendSegmentDuplicateSuffix(segment: string, suffix: string, maxLength: number): string {
+	if (segment.length + suffix.length <= maxLength) return `${segment}${suffix}`;
+	const prefixBudget = Math.max(0, maxLength - suffix.length);
+	return `${segment.slice(0, prefixBudget)}${suffix}`;
+}
+type PendingToolResultRewrite = { replacementId: string } | undefined;
+function deduplicateToolCallIds(
+	messages: Message[],
+	maxToolCallIdLength = MAX_TOOL_CALL_ID_LENGTH,
+	duplicateSuffixPrefix = "_dup",
+): Message[] {
+	const seenToolCallIds = new Map<string, number>();
+	const pendingToolResultRewrites = new Map<string, PendingToolResultRewrite[]>();
+	return messages.map(msg => {
+		if (msg.role === "toolResult") {
+			const rewrites = pendingToolResultRewrites.get(msg.toolCallId);
+			if (!rewrites || rewrites.length === 0) return msg;
+			const rewrite = rewrites.shift();
+			if (rewrites.length === 0) pendingToolResultRewrites.delete(msg.toolCallId);
+			if (rewrite) return { ...msg, toolCallId: rewrite.replacementId };
+			return msg;
+		}
+		if (msg.role !== "assistant") return msg;
+		const enqueueToolResultRewrite = (id: string, rewrite: PendingToolResultRewrite): void => {
+			const rewrites = pendingToolResultRewrites.get(id);
+			if (rewrites) {
+				rewrites.push(rewrite);
+				return;
+			}
+			pendingToolResultRewrites.set(id, [rewrite]);
+		};
+		// Ids this turn has already touched; used to scope the "drop carried-over
+		// pending rewrites" semantics to the FIRST occurrence per turn so multiple
+		// blocks of the same id within one turn still accumulate as duplicates.
+		const idsTouchedInTurn = new Set<string>();
+		let contentChanged = false;
+		const content = msg.content.map(block => {
+			if (block.type !== "toolCall") return block;
+			// Drop any pending rewrites carried over from a prior assistant turn
+			// for this id on its first appearance this turn. When a later turn
+			// re-emits the same id, the older duplicate call's expected result
+			// never landed in time — the second pass synthesizes
+			// "No result provided" for it, and the upcoming real result(id) must
+			// route to one of THIS turn's calls. Without this guard the older
+			// `_dup` id would steal the next result.
+			if (!idsTouchedInTurn.has(block.id)) {
+				pendingToolResultRewrites.delete(block.id);
+				idsTouchedInTurn.add(block.id);
+			}
+			const previousCount = seenToolCallIds.get(block.id) ?? 0;
+			if (previousCount === 0) {
+				seenToolCallIds.set(block.id, 1);
+				enqueueToolResultRewrite(block.id, undefined);
+				return block;
+			}
+			let duplicateIndex = previousCount;
+			let replacementId = appendDuplicateSuffix(
+				block.id,
+				`${duplicateSuffixPrefix}${duplicateIndex}`,
+				maxToolCallIdLength,
+			);
+			while (seenToolCallIds.has(replacementId)) {
+				duplicateIndex += 1;
+				replacementId = appendDuplicateSuffix(
+					block.id,
+					`${duplicateSuffixPrefix}${duplicateIndex}`,
+					maxToolCallIdLength,
+				);
+			}
+			seenToolCallIds.set(block.id, duplicateIndex + 1);
+			seenToolCallIds.set(replacementId, 1);
+			enqueueToolResultRewrite(block.id, { replacementId });
+			contentChanged = true;
+			return { ...block, id: replacementId };
+		});
+		if (!contentChanged) return msg;
+		return { ...msg, content };
+	});
+}
 function shouldDropTruncatedThinkingOnlyAssistant(msg: AssistantMessage): boolean {
 	const isTruncatedStop = msg.stopReason === "length" || msg.stopReason === "error" || msg.stopReason === "aborted";
 	return isTruncatedStop && !msg.content.some(block => block.type === "toolCall" || block.type === "text");
@@ -32,6 +139,10 @@ function getLatestSurvivingAssistantIndex(messages: readonly Message[]): number
 	return -1;
 }
+function isAnthropicMessagesModel(model: Model): model is Model<"anthropic-messages"> {
+	return model.api === "anthropic-messages";
+}
 /**
  * Normalize tool call ID for cross-provider compatibility.
  * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
@@ -40,128 +151,218 @@ function getLatestSurvivingAssistantIndex(messages: readonly Message[]): number
  * For aborted/errored turns, this function:
  * - Preserves tool call structure (unlike converting to text summaries)
  * - Injects synthetic "aborted" tool results
- * - Adds a <turn-aborted> guidance marker for the model
  */
 export function transformMessages<TApi extends Api>(
 	messages: Message[],
 	model: Model<TApi>,
 	normalizeToolCallId?: (id: string, model: Model<TApi>, source: AssistantMessage) => string,
+	maxNormalizedToolCallIdLength = MAX_TOOL_CALL_ID_LENGTH,
+	duplicateToolCallIdSuffixPrefix = "_dup",
 ): Message[] {
 	// Build a map of original tool call IDs to normalized IDs
 	const toolCallIdMap = new Map<string, string>();
 	const latestSurvivingAssistantIndex = getLatestSurvivingAssistantIndex(messages);
 	// First pass: transform messages (thinking blocks, tool call ID normalization)
-	const transformed = messages.map((msg, index) => {
-		// User and developer messages pass through unchanged
-		if (msg.role === "user" || msg.role === "developer") {
-			return msg;
-		}
-		// Handle toolResult messages - normalize toolCallId if we have a mapping
-		if (msg.role === "toolResult") {
-			const normalizedId = toolCallIdMap.get(msg.toolCallId);
-			if (normalizedId && normalizedId !== msg.toolCallId) {
-				return { ...msg, toolCallId: normalizedId };
+	const transformed = deduplicateToolCallIds(
+		messages.map((msg, index) => {
+			// User and developer messages pass through unchanged
+			if (msg.role === "user" || msg.role === "developer") {
+				return msg;
 			}
-			return msg;
-		}
-		// Assistant messages need transformation check
-		if (msg.role === "assistant") {
-			const assistantMsg = msg as AssistantMessage;
-			const isSameModel =
-				assistantMsg.provider === model.provider &&
-				assistantMsg.api === model.api &&
-				assistantMsg.model === model.id;
-			const mustPreserveLatestAnthropicThinking =
-				index === latestSurvivingAssistantIndex &&
-				model.api === "anthropic-messages" &&
-				assistantMsg.api === "anthropic-messages";
-			// Aborted/errored messages may have partially-streamed thinking signatures.
-			// A partial signature is invalid and will be rejected by the API, so we must
-			// strip signatures from thinking blocks in these messages.
-			//
-			// Abandoned tool-use turns get the same treatment once they are no longer
-			// the latest assistant message. When a turn carries toolCall blocks but did
-			// NOT request tool execution (stopReason !== "toolUse" — e.g.
-			// adaptive-thinking Opus emitting tool calls and then ending the turn on
-			// `end_turn`/`stop`), the agent loop pairs those calls with placeholder
-			// tool_results to keep the tool_use/tool_result contract valid. Historical
-			// abandoned turns cannot safely replay their end_turn-bound signatures in
-			// that continuation, so stripping downgrades them to plain text downstream.
-			// Latest abandoned turns are exempt because Anthropic requires thinking
-			// blocks from its most recent response to remain byte-for-byte unmodified.
-			const invalidStopReason = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
-			const abandonedToolUse =
-				!invalidStopReason &&
-				assistantMsg.stopReason !== "toolUse" &&
-				assistantMsg.content.some(b => b.type === "toolCall");
-			const hasInvalidSignatures = invalidStopReason || abandonedToolUse;
-			const transformedContent = assistantMsg.content.flatMap(block => {
-				if (block.type === "thinking") {
-					// Strip untrustworthy signatures so the encoder can downgrade to text.
-					const sanitized =
-						hasInvalidSignatures && block.thinkingSignature ? { ...block, thinkingSignature: undefined } : block;
-					if (mustPreserveLatestAnthropicThinking) return abandonedToolUse ? block : sanitized;
-					// For same model: keep thinking blocks with signatures (needed for replay)
-					// even if the thinking text is empty (OpenAI encrypted reasoning)
-					if (isSameModel && sanitized.thinkingSignature) return sanitized;
-					// Skip empty thinking blocks, convert others to plain text
-					if (!sanitized.thinking || sanitized.thinking.trim() === "") return [];
-					if (isSameModel) return sanitized;
-					return {
-						type: "text" as const,
-						text: sanitized.thinking,
-					};
-				}
-				if (block.type === "redactedThinking") {
-					if (mustPreserveLatestAnthropicThinking) return block;
-					if (isSameModel) return block;
-					return [];
+			// Handle toolResult messages - normalize toolCallId if we have a mapping
+			if (msg.role === "toolResult") {
+				const normalizedId = toolCallIdMap.get(msg.toolCallId);
+				if (normalizedId && normalizedId !== msg.toolCallId) {
+					return { ...msg, toolCallId: normalizedId };
 				}
+				return msg;
+			}
-				if (block.type === "text") {
-					if (isSameModel) return block;
-					return {
-						type: "text" as const,
-						text: block.text,
-					};
-				}
+			// Assistant messages need transformation check
+			if (msg.role === "assistant") {
+				const assistantMsg = msg as AssistantMessage;
+				const isSameModel =
+					assistantMsg.provider === model.provider &&
+					assistantMsg.api === model.api &&
+					assistantMsg.model === model.id;
+				const isAnthropicTarget = isAnthropicMessagesModel(model);
+				// Anthropic's all-or-none contract on prior-turn thinking blocks
+				// applies to every `anthropic-messages → anthropic-messages` replay,
+				// not just the latest assistant turn. The legacy
+				// `mustPreserveLatestAnthropicThinking` flag only honored it for the
+				// latest turn; every prior turn fell through to the cross-API
+				// text-demotion path whenever the conversation crossed a model id,
+				// silently dropping the reasoning chain on continuation for custom
+				// anthropic-messages providers configured via `models.yaml` and
+				// session-level model swaps (#2257).
+				const isAnthropicReplay = isAnthropicTarget && assistantMsg.api === "anthropic-messages";
+				const isLatestSurvivingAssistant = index === latestSurvivingAssistantIndex;
+				// Signature policy is a second axis. Anthropic cryptographically
+				// binds reasoning signatures to its key+session+model, so cross-model
+				// signatures must be stripped whenever official Anthropic is on
+				// either end of the replay:
+				//   * official → 3p: the 3p target can't reverify the signature;
+				//     keeping it leaks private continuation metadata for no benefit.
+				//   * 3p → official: official rejects a foreign signature outright.
+				//   * official → official cross-model: the new model rejects the
+				//     previous model's signature.
+				// 3p ↔ 3p replays preserve signatures because compatible providers
+				// (Z.AI, DeepSeek, custom `models.yaml` providers) treat them as
+				// opaque continuation hints rather than verified material; stripping
+				// degrades the reasoning chain into unsigned/text on the next turn
+				// (#2265). Source-side official detection uses the canonical catalog
+				// provider id `"anthropic"` because assistant messages carry no
+				// `baseUrl` — a user who manually points `provider: "anthropic"` at
+				// a custom proxy via `models.yaml` will see signatures stripped, the
+				// conservative direction (degraded reasoning, not broken requests).
+				const isOfficialAnthropicSource = isAnthropicReplay && assistantMsg.provider === "anthropic";
+				const isOfficialAnthropicTarget = isAnthropicTarget && model.compat.officialEndpoint;
+				const officialAnthropicInvolved = isOfficialAnthropicSource || isOfficialAnthropicTarget;
+				// Compatible Anthropic-messages reasoning targets that accept
+				// unsigned thinking natively (Z.AI, DeepSeek, the generic
+				// `reasoning && !official` case in the compat builder). Used to keep
+				// `redacted_thinking` siblings beside unsigned visible thinking on
+				// targets that won't text-demote it.
+				const replaysUnsignedAnthropicThinking = isAnthropicTarget && model.compat.replayUnsignedThinking;
+				// Thinking signatures can be untrustworthy for two distinct reasons with very
+				// different blast radii:
+				//
+				// 1. Aborted/errored turns: the stream stopped mid-block, so only the block
+				//    that was streaming at the abort point — always the FINAL content block —
+				//    can carry a partially-streamed (invalid) signature. Every earlier block
+				//    completed: Anthropic delivers a block's signature at its
+				//    `content_block_stop`, which necessarily fired before the next block began,
+				//    so those signatures are whole and valid. Stripping them would needlessly
+				//    discard a replayable thinking chain — e.g. interrupting during the visible
+				//    text output after thinking already finished leaves a fully-signed thinking
+				//    block that must be kept, or Anthropic rejects the replay with HTTP 400
+				//    "Invalid `signature` in `thinking` block".
+				//
+				// 2. Abandoned tool-use turns: a turn that carries toolCall blocks but did NOT
+				//    request tool execution (stopReason !== "toolUse" — e.g. adaptive-thinking
+				//    Opus emitting tool calls and then ending on `end_turn`/`stop`). The agent
+				//    loop pairs those calls with placeholder tool_results to keep the
+				//    tool_use/tool_result contract valid. The turn completed cleanly, but its
+				//    signatures are end_turn-bound and cannot be replayed in that synthesized
+				//    continuation, so EVERY thinking signature is stripped.
+				//
+				// Latest abandoned turns are exempt because Anthropic requires thinking blocks
+				// from its most recent response to remain byte-for-byte unmodified.
+				const invalidStopReason = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
+				const abandonedToolUse =
+					!invalidStopReason &&
+					assistantMsg.stopReason !== "toolUse" &&
+					assistantMsg.content.some(b => b.type === "toolCall");
+				const lastBlockIndex = assistantMsg.content.length - 1;
+				const transformedContent = assistantMsg.content.flatMap((block, blockIndex) => {
+					if (block.type === "thinking") {
+						// Only an aborted/errored turn's final (mid-stream) block can hold a
+						// partial signature; abandoned tool-use turns strip all. Drop the
+						// untrustworthy signature so the encoder can downgrade the block to text.
+						const signatureUntrustworthy =
+							abandonedToolUse || (invalidStopReason && blockIndex === lastBlockIndex);
+						let sanitized: typeof block =
+							signatureUntrustworthy && block.thinkingSignature
+								? { ...block, thinkingSignature: undefined }
+								: block;
+						if (isAnthropicReplay) {
+							// Latest abandoned turn: Anthropic's byte-for-byte rule forbids
+							// even stripping a signature on the latest message.
+							if (isLatestSurvivingAssistant && abandonedToolUse) return block;
+							// Cross-model prior turns crossing an official Anthropic endpoint
+							// must strip the source signature so the downstream encoder
+							// applies its `replayUnsignedThinking` policy (unsigned thinking
+							// is emitted natively on Anthropic-compatible reasoning endpoints
+							// and demoted to text on official Anthropic). 3p ↔ 3p replays
+							// keep the signature so the reasoning chain stays signed on
+							// continuation (#2265).
+							if (
+								!isLatestSurvivingAssistant &&
+								!isSameModel &&
+								officialAnthropicInvolved &&
+								sanitized.thinkingSignature
+							) {
+								sanitized = { ...sanitized, thinkingSignature: undefined };
+							}
+							// Drop blocks with neither a signature anchor nor any text —
+							// nothing for the next turn to replay.
+							if (!sanitized.thinkingSignature && (!sanitized.thinking || sanitized.thinking.trim() === "")) {
+								return [];
+							}
+							return sanitized;
+						}
+						// Cross-API target: keep the existing text-demotion fallback.
+						// For same model: keep thinking blocks with signatures (needed for replay)
+						// even if the thinking text is empty (OpenAI encrypted reasoning)
+						if (isSameModel && sanitized.thinkingSignature) return sanitized;
+						// Skip empty thinking blocks, convert others to plain text
+						if (!sanitized.thinking || sanitized.thinking.trim() === "") return [];
+						if (isSameModel) return sanitized;
+						return {
+							type: "text" as const,
+							text: sanitized.thinking,
+						};
+					}
-				if (block.type === "toolCall") {
-					const toolCall = block as ToolCall;
-					let normalizedToolCall: ToolCall = toolCall;
+					if (block.type === "redactedThinking") {
+						// Redacted thinking is native-only. Keep it for same-model
+						// signed replay, the latest byte-for-byte Anthropic turn, or
+						// compatible targets that will also emit sibling unsigned
+						// thinking natively. Drop it when the visible thinking was
+						// cross-model stripped and will be demoted to text.
+						if (isAnthropicReplay) {
+							if (isSameModel || isLatestSurvivingAssistant || replaysUnsignedAnthropicThinking) return block;
+							return [];
+						}
+						if (isSameModel) return block;
+						return [];
+					}
-					if (!isSameModel && toolCall.thoughtSignature) {
-						normalizedToolCall = { ...toolCall };
-						delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature;
+					if (block.type === "text") {
+						if (isSameModel) return block;
+						return {
+							type: "text" as const,
+							text: block.text,
+						};
 					}
-					if (!isSameModel && normalizeToolCallId) {
-						const normalizedId = normalizeToolCallId(toolCall.id, model, assistantMsg);
-						if (normalizedId !== toolCall.id) {
-							toolCallIdMap.set(toolCall.id, normalizedId);
-							normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
+					if (block.type === "toolCall") {
+						const toolCall = block as ToolCall;
+						let normalizedToolCall: ToolCall = toolCall;
+						if (!isSameModel && toolCall.thoughtSignature) {
+							normalizedToolCall = { ...toolCall };
+							delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature;
 						}
-					}
-					return normalizedToolCall;
-				}
+						if (!isSameModel && normalizeToolCallId) {
+							const normalizedId = normalizeToolCallId(toolCall.id, model, assistantMsg);
+							if (normalizedId !== toolCall.id) {
+								toolCallIdMap.set(toolCall.id, normalizedId);
+								normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
+							}
+						}
-				return block;
-			});
+						return normalizedToolCall;
+					}
-			return {
-				...assistantMsg,
-				content: transformedContent,
-			};
-		}
-		return msg;
-	});
+					return block;
+				});
+				return {
+					...assistantMsg,
+					content: transformedContent,
+				};
+			}
+			return msg;
+		}),
+		maxNormalizedToolCallIdLength,
+		duplicateToolCallIdSuffixPrefix,
+	);
 	const realToolResultsById = new Map<string, ToolResultMessage>();
 	for (const msg of transformed) {
 		if (msg.role === "toolResult" && !realToolResultsById.has(msg.toolCallId)) {
@@ -235,11 +436,6 @@ export function transformMessages<TApi extends Api>(
 			} as ToolResultMessage);
 			toolCallStatus.set(tc.id, ToolCallStatus.Aborted);
 		}
-		result.push({
-			role: "developer",
-			content: turnAbortedGuidance,
-			timestamp: pendingAbortedTimestamp + 1,
-		} as DeveloperMessage);
 		pendingAbortedToolCalls = new Map();
 		pendingAbortedTimestamp = undefined;
 	};
@@ -268,11 +464,6 @@ export function transformMessages<TApi extends Api>(
 			// (OpenAI completions `reasoning_text`, Google signed thought parts).
 			const originalMsg = messages[i]!;
 			if (originalMsg.role === "assistant" && shouldDropTruncatedThinkingOnlyAssistant(originalMsg)) {
-				if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
-					// Still arm the aborted-turn note so downstream guidance fires.
-					pendingAbortedToolCalls = new Map();
-					pendingAbortedTimestamp = assistantMsg.timestamp;
-				}
 				continue;
 			}