@prometheus-ai/ai 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/dist/types/auth-broker/remote-store.d.ts +2 -1
  2. package/dist/types/auth-broker/wire-schemas.d.ts +4 -1
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-gateway/types.d.ts +9 -3
  5. package/dist/types/auth-retry.d.ts +119 -0
  6. package/dist/types/auth-storage.d.ts +217 -8
  7. package/dist/types/errors.d.ts +24 -0
  8. package/dist/types/index.d.ts +5 -9
  9. package/dist/types/provider-details.d.ts +1 -1
  10. package/dist/types/providers/amazon-bedrock.d.ts +12 -6
  11. package/dist/types/providers/anthropic-client.d.ts +10 -3
  12. package/dist/types/providers/anthropic-messages-server-schema.d.ts +2 -2
  13. package/dist/types/providers/anthropic-messages-server.d.ts +3 -3
  14. package/dist/types/providers/anthropic-wire.d.ts +3 -3
  15. package/dist/types/providers/anthropic.d.ts +41 -34
  16. package/dist/types/providers/aws-credentials.d.ts +8 -0
  17. package/dist/types/providers/azure-openai-responses.d.ts +1 -0
  18. package/dist/types/providers/google-gemini-cli.d.ts +22 -1
  19. package/dist/types/providers/google-shared.d.ts +22 -0
  20. package/dist/types/providers/google-types.d.ts +13 -1
  21. package/dist/types/providers/mock.d.ts +8 -3
  22. package/dist/types/providers/ollama.d.ts +6 -0
  23. package/dist/types/providers/openai-chat-server-schema.d.ts +6 -3
  24. package/dist/types/providers/openai-chat-server.d.ts +3 -3
  25. package/dist/types/providers/openai-chat-wire.d.ts +644 -0
  26. package/dist/types/providers/openai-codex/request-transformer.d.ts +8 -0
  27. package/dist/types/providers/openai-codex/response-handler.d.ts +9 -0
  28. package/dist/types/providers/openai-codex-responses.d.ts +31 -2
  29. package/dist/types/providers/openai-completions-compat.d.ts +2 -25
  30. package/dist/types/providers/openai-completions.d.ts +2 -10
  31. package/dist/types/providers/openai-responses-server-schema.d.ts +4 -4
  32. package/dist/types/providers/openai-responses-server.d.ts +2 -2
  33. package/dist/types/providers/openai-responses-shared.d.ts +49 -9
  34. package/dist/types/providers/openai-responses-wire.d.ts +6065 -0
  35. package/dist/types/providers/openai-responses.d.ts +13 -4
  36. package/dist/types/providers/prometheus-native-client.d.ts +9 -0
  37. package/dist/types/providers/prometheus-native-server.d.ts +4 -3
  38. package/dist/types/providers/transform-messages.d.ts +1 -2
  39. package/dist/types/rate-limit-utils.d.ts +3 -2
  40. package/dist/types/registry/aimlapi.d.ts +4 -0
  41. package/dist/types/registry/alibaba-coding-plan.d.ts +7 -0
  42. package/dist/types/registry/amazon-bedrock.d.ts +5 -0
  43. package/dist/types/registry/anthropic.d.ts +10 -0
  44. package/dist/types/{utils/oauth → registry}/api-key-login.d.ts +8 -2
  45. package/dist/types/{utils/oauth → registry}/api-key-validation.d.ts +15 -0
  46. package/dist/types/registry/cerebras.d.ts +7 -0
  47. package/dist/types/registry/cloudflare-ai-gateway.d.ts +13 -0
  48. package/dist/types/registry/cursor.d.ts +7 -0
  49. package/dist/types/registry/deepseek.d.ts +8 -0
  50. package/dist/types/registry/derived.d.ts +5 -0
  51. package/dist/types/registry/firepass.d.ts +16 -0
  52. package/dist/types/registry/fireworks.d.ts +7 -0
  53. package/dist/types/registry/github-copilot.d.ts +7 -0
  54. package/dist/types/registry/gitlab-duo.d.ts +9 -0
  55. package/dist/types/registry/google-antigravity.d.ts +9 -0
  56. package/dist/types/registry/google-gemini-cli.d.ts +9 -0
  57. package/dist/types/registry/google-vertex.d.ts +5 -0
  58. package/dist/types/registry/google.d.ts +4 -0
  59. package/dist/types/registry/groq.d.ts +4 -0
  60. package/dist/types/registry/huggingface.d.ts +7 -0
  61. package/dist/types/registry/index.d.ts +4 -0
  62. package/dist/types/registry/kagi.d.ts +14 -0
  63. package/dist/types/registry/kilo.d.ts +7 -0
  64. package/dist/types/registry/kimi-code.d.ts +7 -0
  65. package/dist/types/registry/litellm.d.ts +13 -0
  66. package/dist/types/registry/lm-studio.d.ts +8 -0
  67. package/dist/types/registry/minimax-code-cn.d.ts +6 -0
  68. package/dist/types/registry/minimax-code.d.ts +6 -0
  69. package/dist/types/registry/minimax.d.ts +4 -0
  70. package/dist/types/registry/mistral.d.ts +4 -0
  71. package/dist/types/registry/moonshot.d.ts +7 -0
  72. package/dist/types/registry/nanogpt.d.ts +7 -0
  73. package/dist/types/registry/nvidia.d.ts +7 -0
  74. package/dist/types/registry/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  75. package/dist/types/{utils → registry}/oauth/anthropic.d.ts +2 -1
  76. package/dist/types/{utils → registry}/oauth/github-copilot.d.ts +15 -23
  77. package/dist/types/{utils → registry}/oauth/index.d.ts +1 -0
  78. package/dist/types/{utils → registry}/oauth/minimax-code.d.ts +5 -5
  79. package/dist/types/{utils → registry}/oauth/types.d.ts +6 -1
  80. package/dist/types/{utils → registry}/oauth/xai-oauth.d.ts +2 -1
  81. package/dist/types/registry/ollama-cloud.d.ts +7 -0
  82. package/dist/types/registry/ollama.d.ts +12 -0
  83. package/dist/types/registry/openai-codex-device.d.ts +8 -0
  84. package/dist/types/registry/openai-codex.d.ts +9 -0
  85. package/dist/types/registry/openai.d.ts +4 -0
  86. package/dist/types/registry/opencode-go.d.ts +6 -0
  87. package/dist/types/registry/opencode-zen.d.ts +6 -0
  88. package/dist/types/registry/openrouter.d.ts +13 -0
  89. package/dist/types/registry/parallel.d.ts +14 -0
  90. package/dist/types/registry/perplexity.d.ts +7 -0
  91. package/dist/types/registry/qianfan.d.ts +7 -0
  92. package/dist/types/registry/qwen-portal.d.ts +7 -0
  93. package/dist/types/registry/registry.d.ts +272 -0
  94. package/dist/types/registry/synthetic.d.ts +6 -0
  95. package/dist/types/registry/tavily.d.ts +14 -0
  96. package/dist/types/registry/together.d.ts +6 -0
  97. package/dist/types/registry/types.d.ts +51 -0
  98. package/dist/types/registry/venice.d.ts +13 -0
  99. package/dist/types/registry/vercel-ai-gateway.d.ts +7 -0
  100. package/dist/types/registry/vllm.d.ts +7 -0
  101. package/dist/types/registry/wafer-pass.d.ts +6 -0
  102. package/dist/types/registry/wafer-serverless.d.ts +6 -0
  103. package/dist/types/registry/xai-oauth.d.ts +7 -0
  104. package/dist/types/registry/xai.d.ts +4 -0
  105. package/dist/types/registry/xiaomi-token-plan-ams.d.ts +6 -0
  106. package/dist/types/registry/xiaomi-token-plan-cn.d.ts +6 -0
  107. package/dist/types/registry/xiaomi-token-plan-sgp.d.ts +6 -0
  108. package/dist/types/registry/xiaomi.d.ts +6 -0
  109. package/dist/types/registry/zai.d.ts +7 -0
  110. package/dist/types/registry/zenmux.d.ts +7 -0
  111. package/dist/types/registry/zhipu-coding-plan.d.ts +7 -0
  112. package/dist/types/stream.d.ts +9 -1
  113. package/dist/types/types.d.ts +56 -295
  114. package/dist/types/usage/google-antigravity.d.ts +15 -1
  115. package/dist/types/usage/openai-codex-reset.d.ts +79 -0
  116. package/dist/types/usage/openai-codex.d.ts +1 -0
  117. package/dist/types/usage.d.ts +77 -4
  118. package/dist/types/utils/abort.d.ts +6 -0
  119. package/dist/types/utils/event-stream.d.ts +2 -0
  120. package/dist/types/utils/http-inspector.d.ts +0 -1
  121. package/dist/types/utils/idle-iterator.d.ts +35 -0
  122. package/dist/types/utils/openai-http.d.ts +58 -0
  123. package/dist/types/utils/request-debug.d.ts +3 -0
  124. package/dist/types/utils/retry-after.d.ts +1 -0
  125. package/dist/types/utils/schema/fields.d.ts +5 -0
  126. package/dist/types/utils/schema/json-schema-validator.d.ts +8 -0
  127. package/dist/types/utils/schema/stamps.d.ts +7 -15
  128. package/dist/types/utils/sse-debug.d.ts +0 -5
  129. package/dist/types/utils/stream-markup-healing.d.ts +2 -0
  130. package/dist/types/utils.d.ts +1 -5
  131. package/package.json +17 -29
  132. package/src/auth-broker/remote-store.ts +10 -1
  133. package/src/auth-broker/snapshot-cache.ts +1 -1
  134. package/src/auth-broker/wire-schemas.ts +1 -1
  135. package/src/auth-gateway/http.ts +1 -1
  136. package/src/auth-gateway/server.ts +95 -30
  137. package/src/auth-gateway/types.ts +10 -2
  138. package/src/auth-retry.ts +238 -0
  139. package/src/auth-storage.ts +935 -430
  140. package/src/errors.ts +32 -0
  141. package/src/index.ts +9 -14
  142. package/src/provider-details.ts +1 -1
  143. package/src/providers/__tests__/google-auth.test.ts +144 -0
  144. package/src/providers/amazon-bedrock.ts +70 -40
  145. package/src/providers/anthropic-client.ts +15 -13
  146. package/src/providers/anthropic-messages-server-schema.ts +17 -7
  147. package/src/providers/anthropic-messages-server.ts +88 -20
  148. package/src/providers/anthropic-wire.ts +4 -3
  149. package/src/providers/anthropic.ts +1234 -621
  150. package/src/providers/aws-credentials.ts +47 -5
  151. package/src/providers/aws-eventstream.ts +5 -0
  152. package/src/providers/azure-openai-responses.ts +117 -67
  153. package/src/providers/cursor.ts +30 -30
  154. package/src/providers/github-copilot-headers.ts +1 -1
  155. package/src/providers/gitlab-duo.ts +36 -29
  156. package/src/providers/google-auth.ts +71 -8
  157. package/src/providers/google-gemini-cli.ts +118 -22
  158. package/src/providers/google-shared.ts +163 -43
  159. package/src/providers/google-types.ts +10 -1
  160. package/src/providers/kimi.ts +1 -1
  161. package/src/providers/mock.ts +11 -3
  162. package/src/providers/ollama.ts +64 -7
  163. package/src/providers/openai-anthropic-shim.ts +17 -8
  164. package/src/providers/openai-chat-server-schema.ts +9 -3
  165. package/src/providers/openai-chat-server.ts +82 -16
  166. package/src/providers/openai-chat-wire.ts +847 -0
  167. package/src/providers/openai-codex/request-transformer.ts +129 -34
  168. package/src/providers/openai-codex/response-handler.ts +22 -1
  169. package/src/providers/openai-codex-responses.ts +699 -247
  170. package/src/providers/openai-completions-compat.ts +8 -308
  171. package/src/providers/openai-completions.ts +416 -267
  172. package/src/providers/openai-responses-server-schema.ts +15 -9
  173. package/src/providers/openai-responses-server.ts +162 -114
  174. package/src/providers/openai-responses-shared.ts +320 -82
  175. package/src/providers/openai-responses-wire.ts +6391 -0
  176. package/src/providers/openai-responses.ts +382 -176
  177. package/src/providers/prometheus-native-client.ts +27 -11
  178. package/src/providers/prometheus-native-server.ts +44 -17
  179. package/src/providers/transform-messages.ts +311 -120
  180. package/src/providers/vision-guard.ts +5 -3
  181. package/src/rate-limit-utils.ts +13 -3
  182. package/src/registry/aimlapi.ts +6 -0
  183. package/src/{utils/oauth → registry}/alibaba-coding-plan.ts +8 -18
  184. package/src/registry/amazon-bedrock.ts +22 -0
  185. package/src/registry/anthropic.ts +26 -0
  186. package/src/{utils/oauth → registry}/api-key-login.ts +25 -3
  187. package/src/{utils/oauth → registry}/api-key-validation.ts +62 -2
  188. package/src/{utils/oauth → registry}/cerebras.ts +8 -1
  189. package/src/{utils/oauth → registry}/cloudflare-ai-gateway.ts +8 -12
  190. package/src/registry/cursor.ts +20 -0
  191. package/src/{utils/oauth → registry}/deepseek.ts +9 -17
  192. package/src/registry/derived.ts +9 -0
  193. package/src/{utils/oauth → registry}/firepass.ts +10 -2
  194. package/src/{utils/oauth → registry}/fireworks.ts +8 -1
  195. package/src/registry/github-copilot.ts +22 -0
  196. package/src/registry/gitlab-duo.ts +19 -0
  197. package/src/registry/google-antigravity.ts +21 -0
  198. package/src/registry/google-gemini-cli.ts +21 -0
  199. package/src/registry/google-vertex.ts +38 -0
  200. package/src/registry/google.ts +6 -0
  201. package/src/registry/groq.ts +6 -0
  202. package/src/{utils/oauth → registry}/huggingface.ts +8 -19
  203. package/src/registry/index.ts +4 -0
  204. package/src/{utils/oauth → registry}/kagi.ts +9 -11
  205. package/src/{utils/oauth → registry}/kilo.ts +11 -6
  206. package/src/registry/kimi-code.ts +17 -0
  207. package/src/{utils/oauth → registry}/litellm.ts +8 -12
  208. package/src/{utils/oauth → registry}/lm-studio.ts +9 -17
  209. package/src/registry/minimax-code-cn.ts +12 -0
  210. package/src/registry/minimax-code.ts +12 -0
  211. package/src/registry/minimax.ts +6 -0
  212. package/src/registry/mistral.ts +6 -0
  213. package/src/{utils/oauth → registry}/moonshot.ts +8 -9
  214. package/src/{utils/oauth → registry}/nanogpt.ts +8 -1
  215. package/src/{utils/oauth → registry}/nvidia.ts +8 -18
  216. package/src/{utils → registry}/oauth/__tests__/xai-oauth.test.ts +4 -7
  217. package/src/{utils → registry}/oauth/anthropic.ts +38 -17
  218. package/src/{utils → registry}/oauth/github-copilot.ts +79 -115
  219. package/src/registry/oauth/gitlab-duo.ts +198 -0
  220. package/src/{utils → registry}/oauth/google-antigravity.ts +1 -4
  221. package/src/{utils → registry}/oauth/google-gemini-cli.ts +1 -4
  222. package/src/registry/oauth/index.ts +164 -0
  223. package/src/{utils → registry}/oauth/minimax-code.ts +16 -14
  224. package/src/{utils → registry}/oauth/types.ts +7 -51
  225. package/src/{utils → registry}/oauth/wafer.ts +1 -1
  226. package/src/{utils → registry}/oauth/xai-oauth.ts +16 -8
  227. package/src/{utils → registry}/oauth/xiaomi.ts +9 -4
  228. package/src/{utils/oauth → registry}/ollama-cloud.ts +8 -1
  229. package/src/{utils/oauth → registry}/ollama.ts +8 -13
  230. package/src/registry/openai-codex-device.ts +18 -0
  231. package/src/registry/openai-codex.ts +19 -0
  232. package/src/registry/openai.ts +6 -0
  233. package/src/registry/opencode-go.ts +12 -0
  234. package/src/registry/opencode-zen.ts +12 -0
  235. package/src/{utils/oauth → registry}/openrouter.ts +10 -2
  236. package/src/{utils/oauth → registry}/parallel.ts +9 -11
  237. package/src/registry/perplexity.ts +13 -0
  238. package/src/{utils/oauth → registry}/qianfan.ts +8 -17
  239. package/src/{utils/oauth → registry}/qwen-portal.ts +8 -19
  240. package/src/registry/registry.ts +149 -0
  241. package/src/{utils/oauth → registry}/synthetic.ts +7 -1
  242. package/src/{utils/oauth → registry}/tavily.ts +10 -12
  243. package/src/{utils/oauth → registry}/together.ts +7 -1
  244. package/src/registry/types.ts +56 -0
  245. package/src/{utils/oauth → registry}/venice.ts +8 -12
  246. package/src/{utils/oauth → registry}/vercel-ai-gateway.ts +8 -18
  247. package/src/{utils/oauth → registry}/vllm.ts +9 -16
  248. package/src/registry/wafer-pass.ts +12 -0
  249. package/src/registry/wafer-serverless.ts +12 -0
  250. package/src/registry/xai-oauth.ts +17 -0
  251. package/src/registry/xai.ts +6 -0
  252. package/src/registry/xiaomi-token-plan-ams.ts +12 -0
  253. package/src/registry/xiaomi-token-plan-cn.ts +12 -0
  254. package/src/registry/xiaomi-token-plan-sgp.ts +12 -0
  255. package/src/registry/xiaomi.ts +12 -0
  256. package/src/{utils/oauth → registry}/zai.ts +10 -22
  257. package/src/{utils/oauth → registry}/zenmux.ts +8 -1
  258. package/src/{utils/oauth/zhipu.ts → registry/zhipu-coding-plan.ts} +9 -21
  259. package/src/stream.ts +229 -199
  260. package/src/types.ts +63 -384
  261. package/src/usage/claude.ts +4 -2
  262. package/src/usage/github-copilot.ts +4 -2
  263. package/src/usage/google-antigravity.ts +196 -28
  264. package/src/usage/kimi.ts +1 -1
  265. package/src/usage/minimax-code.ts +5 -6
  266. package/src/usage/openai-codex-reset.ts +174 -0
  267. package/src/usage/openai-codex.ts +19 -2
  268. package/src/usage/zai.ts +2 -1
  269. package/src/usage.ts +93 -4
  270. package/src/utils/abort.ts +14 -0
  271. package/src/utils/event-stream.ts +17 -0
  272. package/src/utils/http-inspector.ts +4 -12
  273. package/src/utils/idle-iterator.ts +250 -79
  274. package/src/utils/openai-http.ts +157 -0
  275. package/src/utils/request-debug.ts +67 -19
  276. package/src/utils/retry-after.ts +1 -1
  277. package/src/utils/retry.ts +23 -2
  278. package/src/utils/schema/CONSTRAINTS.md +4 -2
  279. package/src/utils/schema/fields.ts +16 -0
  280. package/src/utils/schema/json-schema-validator.ts +19 -1
  281. package/src/utils/schema/normalize.ts +80 -8
  282. package/src/utils/schema/stamps.ts +22 -10
  283. package/src/utils/schema/wire.ts +2 -2
  284. package/src/utils/sse-debug.ts +0 -271
  285. package/src/utils/stream-markup-healing.ts +50 -8
  286. package/src/utils/validation.ts +49 -13
  287. package/src/utils.ts +2 -26
  288. package/dist/types/model-cache.d.ts +0 -17
  289. package/dist/types/model-manager.d.ts +0 -64
  290. package/dist/types/model-thinking.d.ts +0 -100
  291. package/dist/types/models.d.ts +0 -12
  292. package/dist/types/provider-models/bundled-references.d.ts +0 -4
  293. package/dist/types/provider-models/descriptors.d.ts +0 -50
  294. package/dist/types/provider-models/google.d.ts +0 -24
  295. package/dist/types/provider-models/index.d.ts +0 -5
  296. package/dist/types/provider-models/ollama.d.ts +0 -7
  297. package/dist/types/provider-models/openai-compat.d.ts +0 -323
  298. package/dist/types/provider-models/special.d.ts +0 -16
  299. package/dist/types/utils/discovery/antigravity.d.ts +0 -61
  300. package/dist/types/utils/discovery/codex.d.ts +0 -38
  301. package/dist/types/utils/discovery/cursor.d.ts +0 -23
  302. package/dist/types/utils/discovery/gemini.d.ts +0 -25
  303. package/dist/types/utils/discovery/index.d.ts +0 -4
  304. package/dist/types/utils/discovery/openai-compatible.d.ts +0 -72
  305. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +0 -18
  306. package/dist/types/utils/oauth/cerebras.d.ts +0 -1
  307. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +0 -18
  308. package/dist/types/utils/oauth/deepseek.d.ts +0 -10
  309. package/dist/types/utils/oauth/firepass.d.ts +0 -1
  310. package/dist/types/utils/oauth/fireworks.d.ts +0 -1
  311. package/dist/types/utils/oauth/huggingface.d.ts +0 -19
  312. package/dist/types/utils/oauth/kagi.d.ts +0 -17
  313. package/dist/types/utils/oauth/kilo.d.ts +0 -5
  314. package/dist/types/utils/oauth/litellm.d.ts +0 -18
  315. package/dist/types/utils/oauth/lm-studio.d.ts +0 -17
  316. package/dist/types/utils/oauth/moonshot.d.ts +0 -1
  317. package/dist/types/utils/oauth/nanogpt.d.ts +0 -1
  318. package/dist/types/utils/oauth/nvidia.d.ts +0 -18
  319. package/dist/types/utils/oauth/ollama-cloud.d.ts +0 -2
  320. package/dist/types/utils/oauth/ollama.d.ts +0 -18
  321. package/dist/types/utils/oauth/openrouter.d.ts +0 -1
  322. package/dist/types/utils/oauth/parallel.d.ts +0 -17
  323. package/dist/types/utils/oauth/qianfan.d.ts +0 -17
  324. package/dist/types/utils/oauth/qwen-portal.d.ts +0 -19
  325. package/dist/types/utils/oauth/synthetic.d.ts +0 -1
  326. package/dist/types/utils/oauth/tavily.d.ts +0 -17
  327. package/dist/types/utils/oauth/together.d.ts +0 -1
  328. package/dist/types/utils/oauth/venice.d.ts +0 -18
  329. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +0 -18
  330. package/dist/types/utils/oauth/vllm.d.ts +0 -16
  331. package/dist/types/utils/oauth/zai.d.ts +0 -18
  332. package/dist/types/utils/oauth/zenmux.d.ts +0 -1
  333. package/dist/types/utils/oauth/zhipu.d.ts +0 -18
  334. package/src/model-cache.ts +0 -129
  335. package/src/model-manager.ts +0 -469
  336. package/src/model-thinking.ts +0 -756
  337. package/src/models.json +0 -60287
  338. package/src/models.json.d.ts +0 -9
  339. package/src/models.ts +0 -56
  340. package/src/provider-models/bundled-references.ts +0 -38
  341. package/src/provider-models/descriptors.ts +0 -364
  342. package/src/provider-models/google.ts +0 -88
  343. package/src/provider-models/index.ts +0 -5
  344. package/src/provider-models/ollama.ts +0 -153
  345. package/src/provider-models/openai-compat.ts +0 -2904
  346. package/src/provider-models/special.ts +0 -67
  347. package/src/utils/discovery/antigravity.ts +0 -261
  348. package/src/utils/discovery/codex.ts +0 -371
  349. package/src/utils/discovery/cursor.ts +0 -306
  350. package/src/utils/discovery/gemini.ts +0 -248
  351. package/src/utils/discovery/index.ts +0 -4
  352. package/src/utils/discovery/openai-compatible.ts +0 -224
  353. package/src/utils/oauth/gitlab-duo.ts +0 -123
  354. package/src/utils/oauth/index.ts +0 -502
  355. /package/dist/types/{utils/oauth/__tests__/xai-oauth.test.d.ts → providers/__tests__/google-auth.test.d.ts} +0 -0
  356. /package/dist/types/{utils → registry}/oauth/callback-server.d.ts +0 -0
  357. /package/dist/types/{utils → registry}/oauth/cursor.d.ts +0 -0
  358. /package/dist/types/{utils → registry}/oauth/gitlab-duo.d.ts +0 -0
  359. /package/dist/types/{utils → registry}/oauth/google-antigravity.d.ts +0 -0
  360. /package/dist/types/{utils → registry}/oauth/google-gemini-cli.d.ts +0 -0
  361. /package/dist/types/{utils → registry}/oauth/google-oauth-shared.d.ts +0 -0
  362. /package/dist/types/{utils → registry}/oauth/kimi.d.ts +0 -0
  363. /package/dist/types/{utils → registry}/oauth/openai-codex.d.ts +0 -0
  364. /package/dist/types/{utils → registry}/oauth/opencode.d.ts +0 -0
  365. /package/dist/types/{utils → registry}/oauth/perplexity.d.ts +0 -0
  366. /package/dist/types/{utils → registry}/oauth/pkce.d.ts +0 -0
  367. /package/dist/types/{utils → registry}/oauth/wafer.d.ts +0 -0
  368. /package/dist/types/{utils → registry}/oauth/xiaomi.d.ts +0 -0
  369. /package/src/{utils → registry}/oauth/callback-server.ts +0 -0
  370. /package/src/{utils → registry}/oauth/cursor.ts +0 -0
  371. /package/src/{utils → registry}/oauth/google-oauth-shared.ts +0 -0
  372. /package/src/{utils → registry}/oauth/kimi.ts +0 -0
  373. /package/src/{utils → registry}/oauth/oauth.html +0 -0
  374. /package/src/{utils → registry}/oauth/openai-codex.ts +0 -0
  375. /package/src/{utils → registry}/oauth/opencode.ts +0 -0
  376. /package/src/{utils → registry}/oauth/perplexity.ts +0 -0
  377. /package/src/{utils → registry}/oauth/pkce.ts +0 -0
@@ -11,11 +11,12 @@
11
11
  *
12
12
  * Activated when a {@link Model} has `transport: "prometheus-native"` set; the
13
13
  * dispatch hook lives in `streamSimple()` (see `../stream.ts`). Used by
14
- * containerized prometheus deployments (roboprometheus slots, the swarm extension) that
14
+ * containerized prometheus deployments (isolated slots, the swarm extension) that
15
15
  * route every LLM call through a credential-holding sidecar so the slot
16
16
  * itself stays credential-free.
17
17
  */
18
18
  import { readSseJson } from "@prometheus-ai/utils";
19
+ import { ProviderHttpError } from "../errors";
19
20
  import type {
20
21
  Api,
21
22
  AssistantMessage,
@@ -58,7 +59,19 @@ function buildWireOptions(options: SimpleStreamOptions | undefined): Record<stri
58
59
  return wire;
59
60
  }
60
61
 
61
- async function decodeGatewayError(response: Response): Promise<Error> {
62
+ /**
63
+ * Non-2xx response from the auth-gateway `/v1/prometheus/stream` endpoint. `code`
64
+ * carries the gateway's error-type token (`authentication_error`,
65
+ * `rate_limit_error`, `upstream_error`, ...).
66
+ */
67
+ export class AuthGatewayError extends ProviderHttpError {
68
+ constructor(message: string, status: number, headers?: Headers, code?: string) {
69
+ super(message, status, { headers, code });
70
+ this.name = "AuthGatewayError";
71
+ }
72
+ }
73
+
74
+ async function decodeGatewayError(response: Response): Promise<AuthGatewayError> {
62
75
  const status = response.status;
63
76
  let body: unknown;
64
77
  try {
@@ -71,16 +84,16 @@ async function decodeGatewayError(response: Response): Promise<Error> {
71
84
  if (typeof err === "object" && err !== null) {
72
85
  const message = (err as { message?: unknown }).message;
73
86
  const type = (err as { type?: unknown }).type;
74
- const out = new Error(typeof message === "string" ? message : `auth-gateway ${status}`);
75
- (out as { status?: number; type?: string }).status = status;
76
- if (typeof type === "string") (out as { type?: string }).type = type;
77
- return out;
87
+ return new AuthGatewayError(
88
+ typeof message === "string" ? message : `auth-gateway ${status}`,
89
+ status,
90
+ response.headers,
91
+ typeof type === "string" ? type : undefined,
92
+ );
78
93
  }
79
94
  }
80
95
  const text = typeof body === "string" ? body : JSON.stringify(body);
81
- const err = new Error(`auth-gateway ${status}: ${text || response.statusText}`);
82
- (err as { status?: number }).status = status;
83
- return err;
96
+ return new AuthGatewayError(`auth-gateway ${status}: ${text || response.statusText}`, status, response.headers);
84
97
  }
85
98
 
86
99
  /**
@@ -149,9 +162,12 @@ export function streamPrometheusNative<TApi extends Api>(
149
162
  try {
150
163
  const url = resolveStreamUrl(model as Model<Api>);
151
164
  const fetchImpl = options?.fetch ?? globalThis.fetch;
152
- const headers = buildHeaders(model as Model<Api>, options?.apiKey);
165
+ const headers = buildHeaders(
166
+ model as Model<Api>,
167
+ typeof options?.apiKey === "string" ? options.apiKey : undefined,
168
+ );
153
169
  const body = JSON.stringify({
154
- modelId: model.id,
170
+ modelId: `${model.provider}/${model.id}`,
155
171
  context,
156
172
  options: buildWireOptions(options),
157
173
  stream: true,
@@ -4,7 +4,7 @@
4
4
  * Where the OpenAI / Anthropic / Responses route modules translate foreign
5
5
  * wire shapes through Prometheus AI's canonical {@link Context}, this module accepts
6
6
  * the canonical shape *directly* — for clients that already speak Prometheus AI
7
- * (containerized prometheus, the swarm extension, roboprometheus's sidecar auth-gateway).
7
+ * (containerized prometheus, the swarm extension, sidecar auth-gateways).
8
8
  * Skipping the wire-format → Context → wire-format round-trip cuts
9
9
  * per-request CPU but, more importantly, avoids the quantization that those
10
10
  * translations impose on first-class Prometheus AI fields (service tier, cache
@@ -25,6 +25,7 @@
25
25
  * 200 JSON (stream=false): { message: AssistantMessage }
26
26
  * 4xx/5xx: { error: { type, message } }
27
27
  */
28
+ import type { AuthGatewayStreamControl } from "../auth-gateway/types";
28
29
  import type { AssistantMessageEventStream, Context, SimpleStreamOptions } from "../types";
29
30
 
30
31
  export interface PrometheusNativeParsedRequest {
@@ -156,36 +157,62 @@ const SSE_DONE = SSE_ENCODER.encode("data: [DONE]\n\n");
156
157
  * canonical event type IS the wire type. Including the rolling
157
158
  * `partial: AssistantMessage` on every delta is quadratic in turn length
158
159
  * on the wire, but for the loopback / sidecar topology this transport
159
- * targets (containerized prometheus → host gateway, roboprometheus slot → prometheus-auth-gateway
160
+ * targets (containerized prometheus → host gateway, isolated slot → prometheus-auth-gateway
160
161
  * sidecar) the bandwidth cost is negligible compared to provider latency —
161
162
  * and the client gets to feed the events straight into its existing
162
163
  * `AssistantMessageEventStream.push()` plumbing with zero translation.
163
164
  */
164
- export function encodeStream(events: AssistantMessageEventStream): ReadableStream<Uint8Array> {
165
+ export function encodeStream(
166
+ events: AssistantMessageEventStream,
167
+ _requestedModelId?: string,
168
+ _options?: SimpleStreamOptions,
169
+ control?: AuthGatewayStreamControl,
170
+ ): ReadableStream<Uint8Array> {
171
+ let cancelled = control?.signal?.aborted === true;
172
+ const markCancelled = () => {
173
+ cancelled = true;
174
+ };
175
+ control?.signal?.addEventListener("abort", markCancelled, { once: true });
165
176
  return new ReadableStream<Uint8Array>({
166
177
  async start(controller) {
167
178
  try {
179
+ if (cancelled) {
180
+ controller.close();
181
+ return;
182
+ }
168
183
  for await (const event of events) {
184
+ if (cancelled) return;
169
185
  controller.enqueue(SSE_ENCODER.encode(`data: ${JSON.stringify(event)}\n\n`));
170
186
  if (event.type === "done" || event.type === "error") break;
171
187
  }
172
- controller.enqueue(SSE_DONE);
173
- controller.close();
188
+ if (!cancelled) {
189
+ controller.enqueue(SSE_DONE);
190
+ controller.close();
191
+ }
174
192
  } catch (err) {
175
- // Best-effort error envelope so the client iterator resolves
176
- // instead of hanging on the dropped connection. Shape matches the
177
- // canonical `error` event minus the unrecoverable `error:
178
- // AssistantMessage` payload (we don't have a usable one here).
179
- const message = err instanceof Error ? err.message : String(err);
180
- controller.enqueue(
181
- SSE_ENCODER.encode(
182
- `data: ${JSON.stringify({ type: "error", reason: "error", errorMessage: message })}\n\n`,
183
- ),
184
- );
185
- controller.enqueue(SSE_DONE);
186
- controller.close();
193
+ if (!cancelled) {
194
+ // Best-effort error envelope so the client iterator resolves
195
+ // instead of hanging on the dropped connection. Shape matches the
196
+ // canonical `error` event minus the unrecoverable `error:
197
+ // AssistantMessage` payload (we don't have a usable one here).
198
+ const message = err instanceof Error ? err.message : String(err);
199
+ controller.enqueue(
200
+ SSE_ENCODER.encode(
201
+ `data: ${JSON.stringify({ type: "error", reason: "error", errorMessage: message })}\n\n`,
202
+ ),
203
+ );
204
+ controller.enqueue(SSE_DONE);
205
+ controller.close();
206
+ }
207
+ } finally {
208
+ control?.signal?.removeEventListener("abort", markCancelled);
187
209
  }
188
210
  },
211
+ cancel(reason) {
212
+ cancelled = true;
213
+ control?.signal?.removeEventListener("abort", markCancelled);
214
+ control?.onCancel?.(reason);
215
+ },
189
216
  });
190
217
  }
191
218
 
@@ -1,14 +1,4 @@
1
- import turnAbortedGuidance from "../prompts/turn-aborted-guidance.md" with { type: "text" };
2
- import type {
3
- Api,
4
- AssistantMessage,
5
- DeveloperMessage,
6
- Message,
7
- Model,
8
- ToolCall,
9
- ToolResultMessage,
10
- UserMessage,
11
- } from "../types";
1
+ import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage, UserMessage } from "../types";
12
2
 
13
3
  const enum ToolCallStatus {
14
4
  /** A tool result has already been emitted for this tool call; later duplicates must be skipped. */
@@ -17,6 +7,123 @@ const enum ToolCallStatus {
17
7
  Aborted = 2,
18
8
  }
19
9
 
10
+ /**
11
+ * Maximum tool-call id length the strictest replay provider accepts.
12
+ *
13
+ * Anthropic requires `^[a-zA-Z0-9_-]+$` with a 64-char cap; Google and Codex
14
+ * `normalizeToolCallId` implementations cap individual id segments to the same
15
+ * 64-char ceiling. Replacement ids minted here flow back through
16
+ * `convertAnthropicMessages` (and friends) unchanged, so the `_dupN` suffix
17
+ * MUST not push a normalized id past this bound.
18
+ */
19
+ const MAX_TOOL_CALL_ID_LENGTH = 64;
20
+
21
+ function appendDuplicateSuffix(originalId: string, suffix: string, maxLength: number): string {
22
+ // Responses-family ids are composites (`callId|itemId`): the wire call_id is
23
+ // the FIRST segment (normalizeResponsesToolCallId splits on `|`), so the
24
+ // suffix must land on every segment or the duplicate collapses back onto the
25
+ // original call_id at encode time. The length budget applies per segment,
26
+ // matching the per-segment caps of the provider normalizers.
27
+ if (originalId.includes("|")) {
28
+ return originalId
29
+ .split("|")
30
+ .map(segment => appendSegmentDuplicateSuffix(segment, suffix, maxLength))
31
+ .join("|");
32
+ }
33
+ return appendSegmentDuplicateSuffix(originalId, suffix, maxLength);
34
+ }
35
+
36
+ function appendSegmentDuplicateSuffix(segment: string, suffix: string, maxLength: number): string {
37
+ if (segment.length + suffix.length <= maxLength) return `${segment}${suffix}`;
38
+ const prefixBudget = Math.max(0, maxLength - suffix.length);
39
+ return `${segment.slice(0, prefixBudget)}${suffix}`;
40
+ }
41
+
42
+ type PendingToolResultRewrite = { replacementId: string } | undefined;
43
+
44
+ function deduplicateToolCallIds(
45
+ messages: Message[],
46
+ maxToolCallIdLength = MAX_TOOL_CALL_ID_LENGTH,
47
+ duplicateSuffixPrefix = "_dup",
48
+ ): Message[] {
49
+ const seenToolCallIds = new Map<string, number>();
50
+ const pendingToolResultRewrites = new Map<string, PendingToolResultRewrite[]>();
51
+
52
+ return messages.map(msg => {
53
+ if (msg.role === "toolResult") {
54
+ const rewrites = pendingToolResultRewrites.get(msg.toolCallId);
55
+ if (!rewrites || rewrites.length === 0) return msg;
56
+
57
+ const rewrite = rewrites.shift();
58
+ if (rewrites.length === 0) pendingToolResultRewrites.delete(msg.toolCallId);
59
+ if (rewrite) return { ...msg, toolCallId: rewrite.replacementId };
60
+ return msg;
61
+ }
62
+
63
+ if (msg.role !== "assistant") return msg;
64
+
65
+ const enqueueToolResultRewrite = (id: string, rewrite: PendingToolResultRewrite): void => {
66
+ const rewrites = pendingToolResultRewrites.get(id);
67
+ if (rewrites) {
68
+ rewrites.push(rewrite);
69
+ return;
70
+ }
71
+ pendingToolResultRewrites.set(id, [rewrite]);
72
+ };
73
+
74
+ // Ids this turn has already touched; used to scope the "drop carried-over
75
+ // pending rewrites" semantics to the FIRST occurrence per turn so multiple
76
+ // blocks of the same id within one turn still accumulate as duplicates.
77
+ const idsTouchedInTurn = new Set<string>();
78
+ let contentChanged = false;
79
+ const content = msg.content.map(block => {
80
+ if (block.type !== "toolCall") return block;
81
+
82
+ // Drop any pending rewrites carried over from a prior assistant turn
83
+ // for this id on its first appearance this turn. When a later turn
84
+ // re-emits the same id, the older duplicate call's expected result
85
+ // never landed in time — the second pass synthesizes
86
+ // "No result provided" for it, and the upcoming real result(id) must
87
+ // route to one of THIS turn's calls. Without this guard the older
88
+ // `_dup` id would steal the next result.
89
+ if (!idsTouchedInTurn.has(block.id)) {
90
+ pendingToolResultRewrites.delete(block.id);
91
+ idsTouchedInTurn.add(block.id);
92
+ }
93
+
94
+ const previousCount = seenToolCallIds.get(block.id) ?? 0;
95
+ if (previousCount === 0) {
96
+ seenToolCallIds.set(block.id, 1);
97
+ enqueueToolResultRewrite(block.id, undefined);
98
+ return block;
99
+ }
100
+
101
+ let duplicateIndex = previousCount;
102
+ let replacementId = appendDuplicateSuffix(
103
+ block.id,
104
+ `${duplicateSuffixPrefix}${duplicateIndex}`,
105
+ maxToolCallIdLength,
106
+ );
107
+ while (seenToolCallIds.has(replacementId)) {
108
+ duplicateIndex += 1;
109
+ replacementId = appendDuplicateSuffix(
110
+ block.id,
111
+ `${duplicateSuffixPrefix}${duplicateIndex}`,
112
+ maxToolCallIdLength,
113
+ );
114
+ }
115
+ seenToolCallIds.set(block.id, duplicateIndex + 1);
116
+ seenToolCallIds.set(replacementId, 1);
117
+ enqueueToolResultRewrite(block.id, { replacementId });
118
+ contentChanged = true;
119
+ return { ...block, id: replacementId };
120
+ });
121
+
122
+ if (!contentChanged) return msg;
123
+ return { ...msg, content };
124
+ });
125
+ }
126
+
20
127
  function shouldDropTruncatedThinkingOnlyAssistant(msg: AssistantMessage): boolean {
21
128
  const isTruncatedStop = msg.stopReason === "length" || msg.stopReason === "error" || msg.stopReason === "aborted";
22
129
  return isTruncatedStop && !msg.content.some(block => block.type === "toolCall" || block.type === "text");
@@ -32,6 +139,10 @@ function getLatestSurvivingAssistantIndex(messages: readonly Message[]): number
32
139
  return -1;
33
140
  }
34
141
 
142
+ function isAnthropicMessagesModel(model: Model): model is Model<"anthropic-messages"> {
143
+ return model.api === "anthropic-messages";
144
+ }
145
+
35
146
  /**
36
147
  * Normalize tool call ID for cross-provider compatibility.
37
148
  * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
@@ -40,128 +151,218 @@ function getLatestSurvivingAssistantIndex(messages: readonly Message[]): number
40
151
  * For aborted/errored turns, this function:
41
152
  * - Preserves tool call structure (unlike converting to text summaries)
42
153
  * - Injects synthetic "aborted" tool results
43
- * - Adds a <turn-aborted> guidance marker for the model
44
154
  */
45
155
  export function transformMessages<TApi extends Api>(
46
156
  messages: Message[],
47
157
  model: Model<TApi>,
48
158
  normalizeToolCallId?: (id: string, model: Model<TApi>, source: AssistantMessage) => string,
159
+ maxNormalizedToolCallIdLength = MAX_TOOL_CALL_ID_LENGTH,
160
+ duplicateToolCallIdSuffixPrefix = "_dup",
49
161
  ): Message[] {
50
162
  // Build a map of original tool call IDs to normalized IDs
51
163
  const toolCallIdMap = new Map<string, string>();
52
164
 
53
165
  const latestSurvivingAssistantIndex = getLatestSurvivingAssistantIndex(messages);
54
166
  // First pass: transform messages (thinking blocks, tool call ID normalization)
55
- const transformed = messages.map((msg, index) => {
56
- // User and developer messages pass through unchanged
57
- if (msg.role === "user" || msg.role === "developer") {
58
- return msg;
59
- }
60
-
61
- // Handle toolResult messages - normalize toolCallId if we have a mapping
62
- if (msg.role === "toolResult") {
63
- const normalizedId = toolCallIdMap.get(msg.toolCallId);
64
- if (normalizedId && normalizedId !== msg.toolCallId) {
65
- return { ...msg, toolCallId: normalizedId };
167
+ const transformed = deduplicateToolCallIds(
168
+ messages.map((msg, index) => {
169
+ // User and developer messages pass through unchanged
170
+ if (msg.role === "user" || msg.role === "developer") {
171
+ return msg;
66
172
  }
67
- return msg;
68
- }
69
-
70
- // Assistant messages need transformation check
71
- if (msg.role === "assistant") {
72
- const assistantMsg = msg as AssistantMessage;
73
- const isSameModel =
74
- assistantMsg.provider === model.provider &&
75
- assistantMsg.api === model.api &&
76
- assistantMsg.model === model.id;
77
-
78
- const mustPreserveLatestAnthropicThinking =
79
- index === latestSurvivingAssistantIndex &&
80
- model.api === "anthropic-messages" &&
81
- assistantMsg.api === "anthropic-messages";
82
- // Aborted/errored messages may have partially-streamed thinking signatures.
83
- // A partial signature is invalid and will be rejected by the API, so we must
84
- // strip signatures from thinking blocks in these messages.
85
- //
86
- // Abandoned tool-use turns get the same treatment once they are no longer
87
- // the latest assistant message. When a turn carries toolCall blocks but did
88
- // NOT request tool execution (stopReason !== "toolUse" — e.g.
89
- // adaptive-thinking Opus emitting tool calls and then ending the turn on
90
- // `end_turn`/`stop`), the agent loop pairs those calls with placeholder
91
- // tool_results to keep the tool_use/tool_result contract valid. Historical
92
- // abandoned turns cannot safely replay their end_turn-bound signatures in
93
- // that continuation, so stripping downgrades them to plain text downstream.
94
- // Latest abandoned turns are exempt because Anthropic requires thinking
95
- // blocks from its most recent response to remain byte-for-byte unmodified.
96
- const invalidStopReason = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
97
- const abandonedToolUse =
98
- !invalidStopReason &&
99
- assistantMsg.stopReason !== "toolUse" &&
100
- assistantMsg.content.some(b => b.type === "toolCall");
101
- const hasInvalidSignatures = invalidStopReason || abandonedToolUse;
102
-
103
- const transformedContent = assistantMsg.content.flatMap(block => {
104
- if (block.type === "thinking") {
105
- // Strip untrustworthy signatures so the encoder can downgrade to text.
106
- const sanitized =
107
- hasInvalidSignatures && block.thinkingSignature ? { ...block, thinkingSignature: undefined } : block;
108
- if (mustPreserveLatestAnthropicThinking) return abandonedToolUse ? block : sanitized;
109
- // For same model: keep thinking blocks with signatures (needed for replay)
110
- // even if the thinking text is empty (OpenAI encrypted reasoning)
111
- if (isSameModel && sanitized.thinkingSignature) return sanitized;
112
- // Skip empty thinking blocks, convert others to plain text
113
- if (!sanitized.thinking || sanitized.thinking.trim() === "") return [];
114
- if (isSameModel) return sanitized;
115
- return {
116
- type: "text" as const,
117
- text: sanitized.thinking,
118
- };
119
- }
120
173
 
121
- if (block.type === "redactedThinking") {
122
- if (mustPreserveLatestAnthropicThinking) return block;
123
- if (isSameModel) return block;
124
- return [];
174
+ // Handle toolResult messages - normalize toolCallId if we have a mapping
175
+ if (msg.role === "toolResult") {
176
+ const normalizedId = toolCallIdMap.get(msg.toolCallId);
177
+ if (normalizedId && normalizedId !== msg.toolCallId) {
178
+ return { ...msg, toolCallId: normalizedId };
125
179
  }
180
+ return msg;
181
+ }
126
182
 
127
- if (block.type === "text") {
128
- if (isSameModel) return block;
129
- return {
130
- type: "text" as const,
131
- text: block.text,
132
- };
133
- }
183
+ // Assistant messages need transformation check
184
+ if (msg.role === "assistant") {
185
+ const assistantMsg = msg as AssistantMessage;
186
+ const isSameModel =
187
+ assistantMsg.provider === model.provider &&
188
+ assistantMsg.api === model.api &&
189
+ assistantMsg.model === model.id;
190
+
191
+ const isAnthropicTarget = isAnthropicMessagesModel(model);
192
+ // Anthropic's all-or-none contract on prior-turn thinking blocks
193
+ // applies to every `anthropic-messages → anthropic-messages` replay,
194
+ // not just the latest assistant turn. The legacy
195
+ // `mustPreserveLatestAnthropicThinking` flag only honored it for the
196
+ // latest turn; every prior turn fell through to the cross-API
197
+ // text-demotion path whenever the conversation crossed a model id,
198
+ // silently dropping the reasoning chain on continuation for custom
199
+ // anthropic-messages providers configured via `models.yaml` and
200
+ // session-level model swaps (#2257).
201
+ const isAnthropicReplay = isAnthropicTarget && assistantMsg.api === "anthropic-messages";
202
+ const isLatestSurvivingAssistant = index === latestSurvivingAssistantIndex;
203
+ // Signature policy is a second axis. Anthropic cryptographically
204
+ // binds reasoning signatures to its key+session+model, so cross-model
205
+ // signatures must be stripped whenever official Anthropic is on
206
+ // either end of the replay:
207
+ // * official → 3p: the 3p target can't reverify the signature;
208
+ // keeping it leaks private continuation metadata for no benefit.
209
+ // * 3p → official: official rejects a foreign signature outright.
210
+ // * official → official cross-model: the new model rejects the
211
+ // previous model's signature.
212
+ // 3p ↔ 3p replays preserve signatures because compatible providers
213
+ // (Z.AI, DeepSeek, custom `models.yaml` providers) treat them as
214
+ // opaque continuation hints rather than verified material; stripping
215
+ // degrades the reasoning chain into unsigned/text on the next turn
216
+ // (#2265). Source-side official detection uses the canonical catalog
217
+ // provider id `"anthropic"` because assistant messages carry no
218
+ // `baseUrl` — a user who manually points `provider: "anthropic"` at
219
+ // a custom proxy via `models.yaml` will see signatures stripped, the
220
+ // conservative direction (degraded reasoning, not broken requests).
221
+ const isOfficialAnthropicSource = isAnthropicReplay && assistantMsg.provider === "anthropic";
222
+ const isOfficialAnthropicTarget = isAnthropicTarget && model.compat.officialEndpoint;
223
+ const officialAnthropicInvolved = isOfficialAnthropicSource || isOfficialAnthropicTarget;
224
+ // Compatible Anthropic-messages reasoning targets that accept
225
+ // unsigned thinking natively (Z.AI, DeepSeek, the generic
226
+ // `reasoning && !official` case in the compat builder). Used to keep
227
+ // `redacted_thinking` siblings beside unsigned visible thinking on
228
+ // targets that won't text-demote it.
229
+ const replaysUnsignedAnthropicThinking = isAnthropicTarget && model.compat.replayUnsignedThinking;
230
+ // Thinking signatures can be untrustworthy for two distinct reasons with very
231
+ // different blast radii:
232
+ //
233
+ // 1. Aborted/errored turns: the stream stopped mid-block, so only the block
234
+ // that was streaming at the abort point — always the FINAL content block —
235
+ // can carry a partially-streamed (invalid) signature. Every earlier block
236
+ // completed: Anthropic delivers a block's signature at its
237
+ // `content_block_stop`, which necessarily fired before the next block began,
238
+ // so those signatures are whole and valid. Stripping them would needlessly
239
+ // discard a replayable thinking chain — e.g. interrupting during the visible
240
+ // text output after thinking already finished leaves a fully-signed thinking
241
+ // block that must be kept, or Anthropic rejects the replay with HTTP 400
242
+ // "Invalid `signature` in `thinking` block".
243
+ //
244
+ // 2. Abandoned tool-use turns: a turn that carries toolCall blocks but did NOT
245
+ // request tool execution (stopReason !== "toolUse" — e.g. adaptive-thinking
246
+ // Opus emitting tool calls and then ending on `end_turn`/`stop`). The agent
247
+ // loop pairs those calls with placeholder tool_results to keep the
248
+ // tool_use/tool_result contract valid. The turn completed cleanly, but its
249
+ // signatures are end_turn-bound and cannot be replayed in that synthesized
250
+ // continuation, so EVERY thinking signature is stripped.
251
+ //
252
+ // Latest abandoned turns are exempt because Anthropic requires thinking blocks
253
+ // from its most recent response to remain byte-for-byte unmodified.
254
+ const invalidStopReason = assistantMsg.stopReason === "aborted" || assistantMsg.stopReason === "error";
255
+ const abandonedToolUse =
256
+ !invalidStopReason &&
257
+ assistantMsg.stopReason !== "toolUse" &&
258
+ assistantMsg.content.some(b => b.type === "toolCall");
259
+ const lastBlockIndex = assistantMsg.content.length - 1;
260
+
261
+ const transformedContent = assistantMsg.content.flatMap((block, blockIndex) => {
262
+ if (block.type === "thinking") {
263
+ // Only an aborted/errored turn's final (mid-stream) block can hold a
264
+ // partial signature; abandoned tool-use turns strip all. Drop the
265
+ // untrustworthy signature so the encoder can downgrade the block to text.
266
+ const signatureUntrustworthy =
267
+ abandonedToolUse || (invalidStopReason && blockIndex === lastBlockIndex);
268
+ let sanitized: typeof block =
269
+ signatureUntrustworthy && block.thinkingSignature
270
+ ? { ...block, thinkingSignature: undefined }
271
+ : block;
272
+ if (isAnthropicReplay) {
273
+ // Latest abandoned turn: Anthropic's byte-for-byte rule forbids
274
+ // even stripping a signature on the latest message.
275
+ if (isLatestSurvivingAssistant && abandonedToolUse) return block;
276
+ // Cross-model prior turns crossing an official Anthropic endpoint
277
+ // must strip the source signature so the downstream encoder
278
+ // applies its `replayUnsignedThinking` policy (unsigned thinking
279
+ // is emitted natively on Anthropic-compatible reasoning endpoints
280
+ // and demoted to text on official Anthropic). 3p ↔ 3p replays
281
+ // keep the signature so the reasoning chain stays signed on
282
+ // continuation (#2265).
283
+ if (
284
+ !isLatestSurvivingAssistant &&
285
+ !isSameModel &&
286
+ officialAnthropicInvolved &&
287
+ sanitized.thinkingSignature
288
+ ) {
289
+ sanitized = { ...sanitized, thinkingSignature: undefined };
290
+ }
291
+ // Drop blocks with neither a signature anchor nor any text —
292
+ // nothing for the next turn to replay.
293
+ if (!sanitized.thinkingSignature && (!sanitized.thinking || sanitized.thinking.trim() === "")) {
294
+ return [];
295
+ }
296
+ return sanitized;
297
+ }
298
+ // Cross-API target: keep the existing text-demotion fallback.
299
+ // For same model: keep thinking blocks with signatures (needed for replay)
300
+ // even if the thinking text is empty (OpenAI encrypted reasoning)
301
+ if (isSameModel && sanitized.thinkingSignature) return sanitized;
302
+ // Skip empty thinking blocks, convert others to plain text
303
+ if (!sanitized.thinking || sanitized.thinking.trim() === "") return [];
304
+ if (isSameModel) return sanitized;
305
+ return {
306
+ type: "text" as const,
307
+ text: sanitized.thinking,
308
+ };
309
+ }
134
310
 
135
- if (block.type === "toolCall") {
136
- const toolCall = block as ToolCall;
137
- let normalizedToolCall: ToolCall = toolCall;
311
+ if (block.type === "redactedThinking") {
312
+ // Redacted thinking is native-only. Keep it for same-model
313
+ // signed replay, the latest byte-for-byte Anthropic turn, or
314
+ // compatible targets that will also emit sibling unsigned
315
+ // thinking natively. Drop it when the visible thinking was
316
+ // cross-model stripped and will be demoted to text.
317
+ if (isAnthropicReplay) {
318
+ if (isSameModel || isLatestSurvivingAssistant || replaysUnsignedAnthropicThinking) return block;
319
+ return [];
320
+ }
321
+ if (isSameModel) return block;
322
+ return [];
323
+ }
138
324
 
139
- if (!isSameModel && toolCall.thoughtSignature) {
140
- normalizedToolCall = { ...toolCall };
141
- delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature;
325
+ if (block.type === "text") {
326
+ if (isSameModel) return block;
327
+ return {
328
+ type: "text" as const,
329
+ text: block.text,
330
+ };
142
331
  }
143
332
 
144
- if (!isSameModel && normalizeToolCallId) {
145
- const normalizedId = normalizeToolCallId(toolCall.id, model, assistantMsg);
146
- if (normalizedId !== toolCall.id) {
147
- toolCallIdMap.set(toolCall.id, normalizedId);
148
- normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
333
+ if (block.type === "toolCall") {
334
+ const toolCall = block as ToolCall;
335
+ let normalizedToolCall: ToolCall = toolCall;
336
+
337
+ if (!isSameModel && toolCall.thoughtSignature) {
338
+ normalizedToolCall = { ...toolCall };
339
+ delete (normalizedToolCall as { thoughtSignature?: string }).thoughtSignature;
149
340
  }
150
- }
151
341
 
152
- return normalizedToolCall;
153
- }
342
+ if (!isSameModel && normalizeToolCallId) {
343
+ const normalizedId = normalizeToolCallId(toolCall.id, model, assistantMsg);
344
+ if (normalizedId !== toolCall.id) {
345
+ toolCallIdMap.set(toolCall.id, normalizedId);
346
+ normalizedToolCall = { ...normalizedToolCall, id: normalizedId };
347
+ }
348
+ }
154
349
 
155
- return block;
156
- });
350
+ return normalizedToolCall;
351
+ }
157
352
 
158
- return {
159
- ...assistantMsg,
160
- content: transformedContent,
161
- };
162
- }
163
- return msg;
164
- });
353
+ return block;
354
+ });
355
+
356
+ return {
357
+ ...assistantMsg,
358
+ content: transformedContent,
359
+ };
360
+ }
361
+ return msg;
362
+ }),
363
+ maxNormalizedToolCallIdLength,
364
+ duplicateToolCallIdSuffixPrefix,
365
+ );
165
366
  const realToolResultsById = new Map<string, ToolResultMessage>();
166
367
  for (const msg of transformed) {
167
368
  if (msg.role === "toolResult" && !realToolResultsById.has(msg.toolCallId)) {
@@ -235,11 +436,6 @@ export function transformMessages<TApi extends Api>(
235
436
  } as ToolResultMessage);
236
437
  toolCallStatus.set(tc.id, ToolCallStatus.Aborted);
237
438
  }
238
- result.push({
239
- role: "developer",
240
- content: turnAbortedGuidance,
241
- timestamp: pendingAbortedTimestamp + 1,
242
- } as DeveloperMessage);
243
439
  pendingAbortedToolCalls = new Map();
244
440
  pendingAbortedTimestamp = undefined;
245
441
  };
@@ -268,11 +464,6 @@ export function transformMessages<TApi extends Api>(
268
464
  // (OpenAI completions `reasoning_text`, Google signed thought parts).
269
465
  const originalMsg = messages[i]!;
270
466
  if (originalMsg.role === "assistant" && shouldDropTruncatedThinkingOnlyAssistant(originalMsg)) {
271
- if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
272
- // Still arm the aborted-turn note so downstream guidance fires.
273
- pendingAbortedToolCalls = new Map();
274
- pendingAbortedTimestamp = assistantMsg.timestamp;
275
- }
276
467
  continue;
277
468
  }
278
469