@prometheus-ai/ai 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/README.md +1184 -0
  3. package/dist/types/api-registry.d.ts +30 -0
  4. package/dist/types/auth-broker/client.d.ts +66 -0
  5. package/dist/types/auth-broker/index.d.ts +6 -0
  6. package/dist/types/auth-broker/refresher.d.ts +25 -0
  7. package/dist/types/auth-broker/remote-store.d.ts +101 -0
  8. package/dist/types/auth-broker/server.d.ts +32 -0
  9. package/dist/types/auth-broker/snapshot-cache.d.ts +17 -0
  10. package/dist/types/auth-broker/types.d.ts +107 -0
  11. package/dist/types/auth-broker/wire-schemas.d.ts +412 -0
  12. package/dist/types/auth-gateway/http.d.ts +39 -0
  13. package/dist/types/auth-gateway/index.d.ts +3 -0
  14. package/dist/types/auth-gateway/server.d.ts +36 -0
  15. package/dist/types/auth-gateway/types.d.ts +117 -0
  16. package/dist/types/auth-storage.d.ts +762 -0
  17. package/dist/types/index.d.ts +49 -0
  18. package/dist/types/model-cache.d.ts +17 -0
  19. package/dist/types/model-manager.d.ts +64 -0
  20. package/dist/types/model-thinking.d.ts +100 -0
  21. package/dist/types/models.d.ts +12 -0
  22. package/dist/types/provider-details.d.ts +24 -0
  23. package/dist/types/provider-models/bundled-references.d.ts +4 -0
  24. package/dist/types/provider-models/descriptors.d.ts +50 -0
  25. package/dist/types/provider-models/google.d.ts +24 -0
  26. package/dist/types/provider-models/index.d.ts +5 -0
  27. package/dist/types/provider-models/ollama.d.ts +7 -0
  28. package/dist/types/provider-models/openai-compat.d.ts +323 -0
  29. package/dist/types/provider-models/special.d.ts +16 -0
  30. package/dist/types/providers/amazon-bedrock.d.ts +38 -0
  31. package/dist/types/providers/anthropic-client.d.ts +99 -0
  32. package/dist/types/providers/anthropic-messages-server-schema.d.ts +465 -0
  33. package/dist/types/providers/anthropic-messages-server.d.ts +17 -0
  34. package/dist/types/providers/anthropic-wire.d.ts +262 -0
  35. package/dist/types/providers/anthropic.d.ts +206 -0
  36. package/dist/types/providers/aws-credentials.d.ts +43 -0
  37. package/dist/types/providers/aws-eventstream.d.ts +38 -0
  38. package/dist/types/providers/aws-sigv4.d.ts +55 -0
  39. package/dist/types/providers/azure-openai-responses.d.ts +15 -0
  40. package/dist/types/providers/cursor/gen/agent_pb.d.ts +13022 -0
  41. package/dist/types/providers/cursor.d.ts +43 -0
  42. package/dist/types/providers/error-message.d.ts +27 -0
  43. package/dist/types/providers/github-copilot-headers.d.ts +40 -0
  44. package/dist/types/providers/gitlab-duo.d.ts +27 -0
  45. package/dist/types/providers/google-auth.d.ts +24 -0
  46. package/dist/types/providers/google-gemini-cli.d.ts +81 -0
  47. package/dist/types/providers/google-gemini-headers.d.ts +18 -0
  48. package/dist/types/providers/google-shared.d.ts +171 -0
  49. package/dist/types/providers/google-types.d.ts +138 -0
  50. package/dist/types/providers/google-vertex.d.ts +7 -0
  51. package/dist/types/providers/google.d.ts +4 -0
  52. package/dist/types/providers/grammar.d.ts +1 -0
  53. package/dist/types/providers/kimi.d.ts +27 -0
  54. package/dist/types/providers/mock.d.ts +173 -0
  55. package/dist/types/providers/ollama.d.ts +6 -0
  56. package/dist/types/providers/openai-anthropic-shim.d.ts +31 -0
  57. package/dist/types/providers/openai-chat-server-schema.d.ts +817 -0
  58. package/dist/types/providers/openai-chat-server.d.ts +16 -0
  59. package/dist/types/providers/openai-codex/constants.d.ts +26 -0
  60. package/dist/types/providers/openai-codex/request-transformer.d.ts +49 -0
  61. package/dist/types/providers/openai-codex/response-handler.d.ts +17 -0
  62. package/dist/types/providers/openai-codex-responses.d.ts +67 -0
  63. package/dist/types/providers/openai-completions-compat.d.ts +27 -0
  64. package/dist/types/providers/openai-completions.d.ts +54 -0
  65. package/dist/types/providers/openai-responses-server-schema.d.ts +392 -0
  66. package/dist/types/providers/openai-responses-server.d.ts +17 -0
  67. package/dist/types/providers/openai-responses-shared.d.ts +105 -0
  68. package/dist/types/providers/openai-responses.d.ts +66 -0
  69. package/dist/types/providers/prometheus-native-client.d.ts +13 -0
  70. package/dist/types/providers/prometheus-native-server.d.ts +68 -0
  71. package/dist/types/providers/register-builtins.d.ts +31 -0
  72. package/dist/types/providers/synthetic.d.ts +26 -0
  73. package/dist/types/providers/transform-messages.d.ts +12 -0
  74. package/dist/types/providers/vision-guard.d.ts +20 -0
  75. package/dist/types/providers/xai-responses.d.ts +23 -0
  76. package/dist/types/rate-limit-utils.d.ts +19 -0
  77. package/dist/types/stream.d.ts +28 -0
  78. package/dist/types/types.d.ts +819 -0
  79. package/dist/types/usage/claude.d.ts +4 -0
  80. package/dist/types/usage/gemini.d.ts +2 -0
  81. package/dist/types/usage/github-copilot.d.ts +7 -0
  82. package/dist/types/usage/google-antigravity.d.ts +2 -0
  83. package/dist/types/usage/kimi.d.ts +2 -0
  84. package/dist/types/usage/minimax-code.d.ts +2 -0
  85. package/dist/types/usage/openai-codex.d.ts +3 -0
  86. package/dist/types/usage/shared.d.ts +1 -0
  87. package/dist/types/usage/zai.d.ts +2 -0
  88. package/dist/types/usage.d.ts +260 -0
  89. package/dist/types/utils/abort.d.ts +19 -0
  90. package/dist/types/utils/abortable-iterator.d.ts +4 -0
  91. package/dist/types/utils/anthropic-auth.d.ts +35 -0
  92. package/dist/types/utils/discovery/antigravity.d.ts +61 -0
  93. package/dist/types/utils/discovery/codex.d.ts +38 -0
  94. package/dist/types/utils/discovery/cursor.d.ts +23 -0
  95. package/dist/types/utils/discovery/gemini.d.ts +25 -0
  96. package/dist/types/utils/discovery/index.d.ts +4 -0
  97. package/dist/types/utils/discovery/openai-compatible.d.ts +72 -0
  98. package/dist/types/utils/event-stream.d.ts +28 -0
  99. package/dist/types/utils/fireworks-model-id.d.ts +10 -0
  100. package/dist/types/utils/foundry.d.ts +1 -0
  101. package/dist/types/utils/http-inspector.d.ts +31 -0
  102. package/dist/types/utils/idle-iterator.d.ts +78 -0
  103. package/dist/types/utils/json-parse.d.ts +37 -0
  104. package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  105. package/dist/types/utils/oauth/alibaba-coding-plan.d.ts +18 -0
  106. package/dist/types/utils/oauth/anthropic.d.ts +22 -0
  107. package/dist/types/utils/oauth/api-key-login.d.ts +35 -0
  108. package/dist/types/utils/oauth/api-key-validation.d.ts +27 -0
  109. package/dist/types/utils/oauth/callback-server.d.ts +57 -0
  110. package/dist/types/utils/oauth/cerebras.d.ts +1 -0
  111. package/dist/types/utils/oauth/cloudflare-ai-gateway.d.ts +18 -0
  112. package/dist/types/utils/oauth/cursor.d.ts +15 -0
  113. package/dist/types/utils/oauth/deepseek.d.ts +10 -0
  114. package/dist/types/utils/oauth/firepass.d.ts +1 -0
  115. package/dist/types/utils/oauth/fireworks.d.ts +1 -0
  116. package/dist/types/utils/oauth/github-copilot.d.ts +38 -0
  117. package/dist/types/utils/oauth/gitlab-duo.d.ts +3 -0
  118. package/dist/types/utils/oauth/google-antigravity.d.ts +11 -0
  119. package/dist/types/utils/oauth/google-gemini-cli.d.ts +10 -0
  120. package/dist/types/utils/oauth/google-oauth-shared.d.ts +28 -0
  121. package/dist/types/utils/oauth/huggingface.d.ts +19 -0
  122. package/dist/types/utils/oauth/index.d.ts +38 -0
  123. package/dist/types/utils/oauth/kagi.d.ts +17 -0
  124. package/dist/types/utils/oauth/kilo.d.ts +5 -0
  125. package/dist/types/utils/oauth/kimi.d.ts +21 -0
  126. package/dist/types/utils/oauth/litellm.d.ts +18 -0
  127. package/dist/types/utils/oauth/lm-studio.d.ts +17 -0
  128. package/dist/types/utils/oauth/minimax-code.d.ts +28 -0
  129. package/dist/types/utils/oauth/moonshot.d.ts +1 -0
  130. package/dist/types/utils/oauth/nanogpt.d.ts +1 -0
  131. package/dist/types/utils/oauth/nvidia.d.ts +18 -0
  132. package/dist/types/utils/oauth/ollama-cloud.d.ts +2 -0
  133. package/dist/types/utils/oauth/ollama.d.ts +18 -0
  134. package/dist/types/utils/oauth/openai-codex.d.ts +21 -0
  135. package/dist/types/utils/oauth/opencode.d.ts +18 -0
  136. package/dist/types/utils/oauth/openrouter.d.ts +1 -0
  137. package/dist/types/utils/oauth/parallel.d.ts +17 -0
  138. package/dist/types/utils/oauth/perplexity.d.ts +9 -0
  139. package/dist/types/utils/oauth/pkce.d.ts +8 -0
  140. package/dist/types/utils/oauth/qianfan.d.ts +17 -0
  141. package/dist/types/utils/oauth/qwen-portal.d.ts +19 -0
  142. package/dist/types/utils/oauth/synthetic.d.ts +1 -0
  143. package/dist/types/utils/oauth/tavily.d.ts +17 -0
  144. package/dist/types/utils/oauth/together.d.ts +1 -0
  145. package/dist/types/utils/oauth/types.d.ts +44 -0
  146. package/dist/types/utils/oauth/venice.d.ts +18 -0
  147. package/dist/types/utils/oauth/vercel-ai-gateway.d.ts +18 -0
  148. package/dist/types/utils/oauth/vllm.d.ts +16 -0
  149. package/dist/types/utils/oauth/wafer.d.ts +2 -0
  150. package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
  151. package/dist/types/utils/oauth/xiaomi.d.ts +25 -0
  152. package/dist/types/utils/oauth/zai.d.ts +18 -0
  153. package/dist/types/utils/oauth/zenmux.d.ts +1 -0
  154. package/dist/types/utils/oauth/zhipu.d.ts +18 -0
  155. package/dist/types/utils/overflow.d.ts +54 -0
  156. package/dist/types/utils/parse-bind.d.ts +23 -0
  157. package/dist/types/utils/provider-response.d.ts +3 -0
  158. package/dist/types/utils/request-debug.d.ts +29 -0
  159. package/dist/types/utils/retry-after.d.ts +3 -0
  160. package/dist/types/utils/retry.d.ts +26 -0
  161. package/dist/types/utils/schema/adapt.d.ts +24 -0
  162. package/dist/types/utils/schema/compatibility.d.ts +30 -0
  163. package/dist/types/utils/schema/dereference.d.ts +11 -0
  164. package/dist/types/utils/schema/draft.d.ts +10 -0
  165. package/dist/types/utils/schema/equality.d.ts +4 -0
  166. package/dist/types/utils/schema/fields.d.ts +49 -0
  167. package/dist/types/utils/schema/index.d.ts +13 -0
  168. package/dist/types/utils/schema/json-schema-validator.d.ts +12 -0
  169. package/dist/types/utils/schema/meta-validator.d.ts +2 -0
  170. package/dist/types/utils/schema/normalize.d.ts +93 -0
  171. package/dist/types/utils/schema/spill.d.ts +8 -0
  172. package/dist/types/utils/schema/stamps.d.ts +25 -0
  173. package/dist/types/utils/schema/types.d.ts +4 -0
  174. package/dist/types/utils/schema/wire.d.ts +53 -0
  175. package/dist/types/utils/schema/zod-decontaminate.d.ts +31 -0
  176. package/dist/types/utils/sdk-stream-timeout.d.ts +33 -0
  177. package/dist/types/utils/sse-debug.d.ts +10 -0
  178. package/dist/types/utils/stream-markup-healing.d.ts +80 -0
  179. package/dist/types/utils/tool-choice.d.ts +50 -0
  180. package/dist/types/utils/validation.d.ts +17 -0
  181. package/dist/types/utils.d.ts +28 -0
  182. package/package.json +142 -0
  183. package/src/api-registry.ts +96 -0
  184. package/src/auth-broker/client.ts +358 -0
  185. package/src/auth-broker/index.ts +6 -0
  186. package/src/auth-broker/refresher.ts +117 -0
  187. package/src/auth-broker/remote-store.ts +637 -0
  188. package/src/auth-broker/server.ts +644 -0
  189. package/src/auth-broker/snapshot-cache.ts +174 -0
  190. package/src/auth-broker/types.ts +130 -0
  191. package/src/auth-broker/wire-schemas.ts +200 -0
  192. package/src/auth-gateway/http.ts +194 -0
  193. package/src/auth-gateway/index.ts +3 -0
  194. package/src/auth-gateway/server.ts +822 -0
  195. package/src/auth-gateway/types.ts +143 -0
  196. package/src/auth-storage.ts +4608 -0
  197. package/src/index.ts +54 -0
  198. package/src/model-cache.ts +129 -0
  199. package/src/model-manager.ts +469 -0
  200. package/src/model-thinking.ts +756 -0
  201. package/src/models.json +60287 -0
  202. package/src/models.json.d.ts +9 -0
  203. package/src/models.ts +56 -0
  204. package/src/prompts/turn-aborted-guidance.md +4 -0
  205. package/src/provider-details.ts +90 -0
  206. package/src/provider-models/bundled-references.ts +38 -0
  207. package/src/provider-models/descriptors.ts +364 -0
  208. package/src/provider-models/google.ts +88 -0
  209. package/src/provider-models/index.ts +5 -0
  210. package/src/provider-models/ollama.ts +153 -0
  211. package/src/provider-models/openai-compat.ts +2904 -0
  212. package/src/provider-models/special.ts +67 -0
  213. package/src/providers/amazon-bedrock.ts +873 -0
  214. package/src/providers/anthropic-client.ts +318 -0
  215. package/src/providers/anthropic-messages-server-schema.ts +243 -0
  216. package/src/providers/anthropic-messages-server.ts +681 -0
  217. package/src/providers/anthropic-wire.ts +268 -0
  218. package/src/providers/anthropic.ts +3106 -0
  219. package/src/providers/aws-credentials.ts +501 -0
  220. package/src/providers/aws-eventstream.ts +185 -0
  221. package/src/providers/aws-sigv4.ts +218 -0
  222. package/src/providers/azure-openai-responses.ts +361 -0
  223. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  224. package/src/providers/cursor/proto/agent.proto +3526 -0
  225. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  226. package/src/providers/cursor/proto/buf.yaml +17 -0
  227. package/src/providers/cursor.ts +2621 -0
  228. package/src/providers/error-message.ts +21 -0
  229. package/src/providers/github-copilot-headers.ts +140 -0
  230. package/src/providers/gitlab-duo.ts +372 -0
  231. package/src/providers/google-auth.ts +252 -0
  232. package/src/providers/google-gemini-cli.ts +809 -0
  233. package/src/providers/google-gemini-headers.ts +41 -0
  234. package/src/providers/google-shared.ts +917 -0
  235. package/src/providers/google-types.ts +167 -0
  236. package/src/providers/google-vertex.ts +91 -0
  237. package/src/providers/google.ts +41 -0
  238. package/src/providers/grammar.ts +70 -0
  239. package/src/providers/kimi.ts +52 -0
  240. package/src/providers/mock.ts +496 -0
  241. package/src/providers/ollama.ts +644 -0
  242. package/src/providers/openai-anthropic-shim.ts +138 -0
  243. package/src/providers/openai-chat-server-schema.ts +252 -0
  244. package/src/providers/openai-chat-server.ts +647 -0
  245. package/src/providers/openai-codex/constants.ts +43 -0
  246. package/src/providers/openai-codex/request-transformer.ts +161 -0
  247. package/src/providers/openai-codex/response-handler.ts +81 -0
  248. package/src/providers/openai-codex-responses.ts +3027 -0
  249. package/src/providers/openai-completions-compat.ts +320 -0
  250. package/src/providers/openai-completions.ts +2002 -0
  251. package/src/providers/openai-responses-server-schema.ts +290 -0
  252. package/src/providers/openai-responses-server.ts +1183 -0
  253. package/src/providers/openai-responses-shared.ts +956 -0
  254. package/src/providers/openai-responses.ts +679 -0
  255. package/src/providers/prometheus-native-client.ts +228 -0
  256. package/src/providers/prometheus-native-server.ts +212 -0
  257. package/src/providers/register-builtins.ts +457 -0
  258. package/src/providers/synthetic.ts +50 -0
  259. package/src/providers/transform-messages.ts +382 -0
  260. package/src/providers/vision-guard.ts +52 -0
  261. package/src/providers/xai-responses.ts +82 -0
  262. package/src/rate-limit-utils.ts +91 -0
  263. package/src/stream.ts +1068 -0
  264. package/src/types.ts +965 -0
  265. package/src/usage/claude.ts +482 -0
  266. package/src/usage/gemini.ts +250 -0
  267. package/src/usage/github-copilot.ts +421 -0
  268. package/src/usage/google-antigravity.ts +201 -0
  269. package/src/usage/kimi.ts +271 -0
  270. package/src/usage/minimax-code.ts +31 -0
  271. package/src/usage/openai-codex.ts +503 -0
  272. package/src/usage/shared.ts +10 -0
  273. package/src/usage/zai.ts +247 -0
  274. package/src/usage.ts +185 -0
  275. package/src/utils/abort.ts +51 -0
  276. package/src/utils/abortable-iterator.ts +69 -0
  277. package/src/utils/anthropic-auth.ts +93 -0
  278. package/src/utils/discovery/antigravity.ts +261 -0
  279. package/src/utils/discovery/codex.ts +371 -0
  280. package/src/utils/discovery/cursor.ts +306 -0
  281. package/src/utils/discovery/gemini.ts +248 -0
  282. package/src/utils/discovery/index.ts +4 -0
  283. package/src/utils/discovery/openai-compatible.ts +224 -0
  284. package/src/utils/event-stream.ts +142 -0
  285. package/src/utils/fireworks-model-id.ts +30 -0
  286. package/src/utils/foundry.ts +8 -0
  287. package/src/utils/http-inspector.ts +176 -0
  288. package/src/utils/idle-iterator.ts +273 -0
  289. package/src/utils/json-parse.ts +182 -0
  290. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  291. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  292. package/src/utils/oauth/anthropic.ts +273 -0
  293. package/src/utils/oauth/api-key-login.ts +87 -0
  294. package/src/utils/oauth/api-key-validation.ts +92 -0
  295. package/src/utils/oauth/callback-server.ts +276 -0
  296. package/src/utils/oauth/cerebras.ts +16 -0
  297. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  298. package/src/utils/oauth/cursor.ts +157 -0
  299. package/src/utils/oauth/deepseek.ts +53 -0
  300. package/src/utils/oauth/firepass.ts +24 -0
  301. package/src/utils/oauth/fireworks.ts +15 -0
  302. package/src/utils/oauth/github-copilot.ts +362 -0
  303. package/src/utils/oauth/gitlab-duo.ts +123 -0
  304. package/src/utils/oauth/google-antigravity.ts +200 -0
  305. package/src/utils/oauth/google-gemini-cli.ts +256 -0
  306. package/src/utils/oauth/google-oauth-shared.ts +110 -0
  307. package/src/utils/oauth/huggingface.ts +62 -0
  308. package/src/utils/oauth/index.ts +502 -0
  309. package/src/utils/oauth/kagi.ts +47 -0
  310. package/src/utils/oauth/kilo.ts +87 -0
  311. package/src/utils/oauth/kimi.ts +254 -0
  312. package/src/utils/oauth/litellm.ts +47 -0
  313. package/src/utils/oauth/lm-studio.ts +38 -0
  314. package/src/utils/oauth/minimax-code.ts +80 -0
  315. package/src/utils/oauth/moonshot.ts +23 -0
  316. package/src/utils/oauth/nanogpt.ts +15 -0
  317. package/src/utils/oauth/nvidia.ts +70 -0
  318. package/src/utils/oauth/oauth.html +199 -0
  319. package/src/utils/oauth/ollama-cloud.ts +28 -0
  320. package/src/utils/oauth/ollama.ts +47 -0
  321. package/src/utils/oauth/openai-codex.ts +299 -0
  322. package/src/utils/oauth/opencode.ts +49 -0
  323. package/src/utils/oauth/openrouter.ts +20 -0
  324. package/src/utils/oauth/parallel.ts +46 -0
  325. package/src/utils/oauth/perplexity.ts +206 -0
  326. package/src/utils/oauth/pkce.ts +18 -0
  327. package/src/utils/oauth/qianfan.ts +58 -0
  328. package/src/utils/oauth/qwen-portal.ts +60 -0
  329. package/src/utils/oauth/synthetic.ts +15 -0
  330. package/src/utils/oauth/tavily.ts +46 -0
  331. package/src/utils/oauth/together.ts +16 -0
  332. package/src/utils/oauth/types.ts +102 -0
  333. package/src/utils/oauth/venice.ts +59 -0
  334. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  335. package/src/utils/oauth/vllm.ts +40 -0
  336. package/src/utils/oauth/wafer.ts +50 -0
  337. package/src/utils/oauth/xai-oauth.ts +342 -0
  338. package/src/utils/oauth/xiaomi.ts +194 -0
  339. package/src/utils/oauth/zai.ts +60 -0
  340. package/src/utils/oauth/zenmux.ts +15 -0
  341. package/src/utils/oauth/zhipu.ts +60 -0
  342. package/src/utils/overflow.ts +137 -0
  343. package/src/utils/parse-bind.ts +54 -0
  344. package/src/utils/provider-response.ts +30 -0
  345. package/src/utils/request-debug.ts +336 -0
  346. package/src/utils/retry-after.ts +110 -0
  347. package/src/utils/retry.ts +54 -0
  348. package/src/utils/schema/CONSTRAINTS.md +164 -0
  349. package/src/utils/schema/adapt.ts +36 -0
  350. package/src/utils/schema/compatibility.ts +435 -0
  351. package/src/utils/schema/dereference.ts +98 -0
  352. package/src/utils/schema/draft.ts +341 -0
  353. package/src/utils/schema/equality.ts +97 -0
  354. package/src/utils/schema/fields.ts +191 -0
  355. package/src/utils/schema/index.ts +13 -0
  356. package/src/utils/schema/json-schema-validator.ts +577 -0
  357. package/src/utils/schema/meta-validator.ts +167 -0
  358. package/src/utils/schema/normalize.ts +1588 -0
  359. package/src/utils/schema/spill.ts +43 -0
  360. package/src/utils/schema/stamps.ts +97 -0
  361. package/src/utils/schema/types.ts +10 -0
  362. package/src/utils/schema/wire.ts +293 -0
  363. package/src/utils/schema/zod-decontaminate.ts +331 -0
  364. package/src/utils/sdk-stream-timeout.ts +43 -0
  365. package/src/utils/sse-debug.ts +289 -0
  366. package/src/utils/stream-markup-healing.ts +612 -0
  367. package/src/utils/tool-choice.ts +99 -0
  368. package/src/utils/validation.ts +1024 -0
  369. package/src/utils.ts +166 -0
@@ -0,0 +1,956 @@
1
+ import { structuredCloneJSON } from "@prometheus-ai/utils";
2
+ import type OpenAI from "openai";
3
+ import type {
4
+ ResponseCustomToolCall,
5
+ ResponseFunctionToolCall,
6
+ ResponseInput,
7
+ ResponseInputContent,
8
+ ResponseInputImage,
9
+ ResponseInputText,
10
+ ResponseOutputItem,
11
+ ResponseOutputMessage,
12
+ ResponseReasoningItem,
13
+ } from "openai/resources/responses/responses";
14
+ import { calculateCost } from "../models";
15
+ import {
16
+ type Api,
17
+ type AssistantMessage,
18
+ type ImageContent,
19
+ type Model,
20
+ resolveServiceTier,
21
+ type ServiceTier,
22
+ type StopReason,
23
+ type StreamOptions,
24
+ shouldSendServiceTier,
25
+ type TextContent,
26
+ type TextSignatureV1,
27
+ type ThinkingContent,
28
+ type ToolCall,
29
+ type ToolResultMessage,
30
+ } from "../types";
31
+ import { normalizeResponsesToolCallId } from "../utils";
32
+ import type { AssistantMessageEventStream } from "../utils/event-stream";
33
+ import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
34
+ import { joinTextWithImagePlaceholder, NON_VISION_IMAGE_PLACEHOLDER, partitionVisionContent } from "./vision-guard";
35
+ export const OPENAI_RESPONSES_PROGRESS_EVENT_TYPES: ReadonlySet<string> = new Set([
36
+ "response.created",
37
+ "response.output_item.added",
38
+ "response.reasoning_summary_part.added",
39
+ "response.reasoning_summary_text.delta",
40
+ "response.reasoning_summary_part.done",
41
+ "response.reasoning_text.delta",
42
+ "response.content_part.added",
43
+ "response.output_text.delta",
44
+ "response.refusal.delta",
45
+ "response.function_call_arguments.delta",
46
+ "response.function_call_arguments.done",
47
+ "response.custom_tool_call_input.delta",
48
+ "response.custom_tool_call_input.done",
49
+ "response.output_item.done",
50
+ "response.completed",
51
+ "response.failed",
52
+ "error",
53
+ ]);
54
+
55
+ export function isOpenAIResponsesProgressEvent(event: unknown): boolean {
56
+ if (!event || typeof event !== "object") return false;
57
+ const type = (event as { type?: unknown }).type;
58
+ return typeof type === "string" && OPENAI_RESPONSES_PROGRESS_EVENT_TYPES.has(type);
59
+ }
60
+
61
+ export function encodeTextSignatureV1(id: string, phase?: TextSignatureV1["phase"]): string {
62
+ const payload: TextSignatureV1 = { v: 1, id };
63
+ if (phase) payload.phase = phase;
64
+ return JSON.stringify(payload);
65
+ }
66
+
67
+ export function parseTextSignature(
68
+ signature: string | undefined,
69
+ ): { id: string; phase?: TextSignatureV1["phase"] } | undefined {
70
+ if (!signature) return undefined;
71
+ if (signature.startsWith("{")) {
72
+ try {
73
+ const parsed = JSON.parse(signature) as Partial<TextSignatureV1>;
74
+ if (parsed.v === 1 && typeof parsed.id === "string") {
75
+ if (parsed.phase === "commentary" || parsed.phase === "final_answer") {
76
+ return { id: parsed.id, phase: parsed.phase };
77
+ }
78
+ return { id: parsed.id };
79
+ }
80
+ } catch {
81
+ // Fall through to legacy plain-string handling.
82
+ }
83
+ }
84
+ return { id: signature };
85
+ }
86
+
87
+ export function encodeResponsesToolCallId(callId: string, itemId: string | null | undefined): string {
88
+ const stableItemId = itemId && itemId.length > 0 ? itemId : `fc_${Bun.hash(callId).toString(36)}`;
89
+ return `${callId}|${stableItemId}`;
90
+ }
91
+
92
+ export function normalizeResponsesToolCallIdForTransform(
93
+ id: string,
94
+ model?: Model<Api>,
95
+ source?: AssistantMessage,
96
+ ): string {
97
+ if (!id.includes("|")) return id;
98
+ const isForeignToolCall =
99
+ source != null && model != null && (source.provider !== model.provider || source.api !== model.api);
100
+ if (isForeignToolCall) {
101
+ const [callId, itemId] = id.split("|");
102
+ const normalizeIdPart = (part: string): string => {
103
+ const sanitized = part.replace(/[^a-zA-Z0-9_-]/g, "_");
104
+ const truncated = sanitized.length > 64 ? sanitized.slice(0, 64) : sanitized;
105
+ return truncated.replace(/_+$/, "");
106
+ };
107
+ const normalizedCallId = normalizeIdPart(callId);
108
+ let normalizedItemId = `fc_${Bun.hash(itemId).toString(36)}`;
109
+ if (normalizedItemId.length > 64) normalizedItemId = normalizedItemId.slice(0, 64);
110
+ return `${normalizedCallId}|${normalizedItemId}`;
111
+ }
112
+ const normalized = normalizeResponsesToolCallId(id);
113
+ return `${normalized.callId}|${normalized.itemId}`;
114
+ }
115
+
116
+ export function collectKnownCallIds(messages: ResponseInput): Set<string> {
117
+ const knownCallIds = new Set<string>();
118
+ for (const item of messages) {
119
+ if (item.type === "function_call" && typeof item.call_id === "string") {
120
+ knownCallIds.add(item.call_id);
121
+ } else if (
122
+ (item as { type?: string }).type === "custom_tool_call" &&
123
+ typeof (item as { call_id?: string }).call_id === "string"
124
+ ) {
125
+ knownCallIds.add((item as { call_id: string }).call_id);
126
+ }
127
+ }
128
+ return knownCallIds;
129
+ }
130
+
131
+ /** Scan replay items for call_ids that were originally custom tool calls. */
132
+ export function collectCustomCallIds(messages: ResponseInput): Set<string> {
133
+ const customCallIds = new Set<string>();
134
+ for (const item of messages) {
135
+ if (
136
+ (item as { type?: string }).type === "custom_tool_call" &&
137
+ typeof (item as { call_id?: string }).call_id === "string"
138
+ ) {
139
+ customCallIds.add((item as { call_id: string }).call_id);
140
+ }
141
+ }
142
+ return customCallIds;
143
+ }
144
+
145
+ /**
146
+ * Convert orphan `function_call_output` / `custom_tool_call_output` items —
147
+ * those whose `call_id` has no matching preceding `function_call` /
148
+ * `custom_tool_call` in the same input — into assistant text notes.
149
+ *
150
+ * The Responses API rejects unpaired outputs with
151
+ * `400 No tool call found for function call output with call_id …`. Orphans
152
+ * sneak in through two paths today:
153
+ *
154
+ * - A previous turn's `providerPayload` snapshot replaces the input array via
155
+ * the `dt: false` splice (see {@link convertConversationMessages}), wiping
156
+ * the matching `function_call` while leaving the matching
157
+ * `function_call_output` queued in a later `toolResult`.
158
+ * - A locally-rejected tool call (argument-validation failure, hook reject,
159
+ * aborted turn before the call streamed) produces a tool result without a
160
+ * `function_call` ever landing in any persisted provider payload.
161
+ *
162
+ * Dropping the result loses information the model needs to recover; sending
163
+ * it as-is 400s the request. Folding it into an assistant `message` preserves
164
+ * the payload (call_id + truncated output) while staying within the Responses
165
+ * input grammar. Matches the behavior of {@link transformRequestBody} in the
166
+ * codex provider — issue #1351 / regression of #472.
167
+ */
168
+ export function repairOrphanResponsesToolOutputs(input: ResponseInput): ResponseInput {
169
+ const knownCallIds = new Set<string>();
170
+ for (const item of input) {
171
+ const t = (item as { type?: string }).type;
172
+ const callId = (item as { call_id?: unknown }).call_id;
173
+ if (typeof callId !== "string") continue;
174
+ if (t === "function_call" || t === "custom_tool_call") knownCallIds.add(callId);
175
+ }
176
+ let hasOrphan = false;
177
+ for (const item of input) {
178
+ const t = (item as { type?: string }).type;
179
+ if (t !== "function_call_output" && t !== "custom_tool_call_output") continue;
180
+ const callId = (item as { call_id?: unknown }).call_id;
181
+ if (typeof callId === "string" && !knownCallIds.has(callId)) {
182
+ hasOrphan = true;
183
+ break;
184
+ }
185
+ }
186
+ if (!hasOrphan) return input;
187
+ return input.map(item => {
188
+ const t = (item as { type?: string }).type;
189
+ if (t !== "function_call_output" && t !== "custom_tool_call_output") return item;
190
+ const record = item as { call_id?: unknown; output?: unknown; name?: unknown };
191
+ const callId = record.call_id;
192
+ if (typeof callId !== "string" || knownCallIds.has(callId)) return item;
193
+ const toolName = typeof record.name === "string" && record.name.length > 0 ? record.name : "tool";
194
+ const rawOutput = record.output;
195
+ let text: string;
196
+ if (typeof rawOutput === "string") text = rawOutput;
197
+ else if (rawOutput == null) text = "";
198
+ else {
199
+ try {
200
+ text = JSON.stringify(rawOutput);
201
+ } catch {
202
+ text = String(rawOutput);
203
+ }
204
+ }
205
+ const ORPHAN_OUTPUT_LIMIT = 16_000;
206
+ if (text.length > ORPHAN_OUTPUT_LIMIT) text = `${text.slice(0, ORPHAN_OUTPUT_LIMIT)}\n...[truncated]`;
207
+ return {
208
+ type: "message",
209
+ role: "assistant",
210
+ content: `[Orphan ${toolName} result; call_id=${callId}]: ${text}`,
211
+ } as ResponseInput[number];
212
+ });
213
+ }
214
+
215
+ export function convertResponsesInputContent(
216
+ content: string | Array<TextContent | ImageContent>,
217
+ supportsImages: boolean,
218
+ ): ResponseInputContent[] | undefined {
219
+ if (typeof content === "string") {
220
+ if (content.trim().length === 0) return undefined;
221
+ return [{ type: "input_text", text: content.toWellFormed() } satisfies ResponseInputText];
222
+ }
223
+
224
+ const { textBlocks, imageBlocks, omittedImages } = partitionVisionContent(content, supportsImages);
225
+ const normalizedContent: ResponseInputContent[] = [];
226
+ for (const item of textBlocks) {
227
+ const text = item.text.toWellFormed();
228
+ if (text.trim().length === 0) continue;
229
+ normalizedContent.push({
230
+ type: "input_text",
231
+ text,
232
+ } satisfies ResponseInputText);
233
+ }
234
+ for (const item of imageBlocks) {
235
+ normalizedContent.push({
236
+ type: "input_image",
237
+ detail: "auto",
238
+ image_url: `data:${item.mimeType};base64,${item.data}`,
239
+ } satisfies ResponseInputImage);
240
+ }
241
+ if (omittedImages) {
242
+ normalizedContent.push({
243
+ type: "input_text",
244
+ text: NON_VISION_IMAGE_PLACEHOLDER,
245
+ } satisfies ResponseInputText);
246
+ }
247
+ return normalizedContent.length > 0 ? normalizedContent : undefined;
248
+ }
249
+
250
+ export function convertResponsesAssistantMessage<TApi extends Api>(
251
+ assistantMsg: AssistantMessage,
252
+ model: Model<TApi>,
253
+ msgIndex: number,
254
+ knownCallIds: Set<string>,
255
+ includeThinkingSignatures = true,
256
+ customCallIds?: Set<string>,
257
+ ): ResponseInput {
258
+ const outputItems: ResponseInput = [];
259
+ const isDifferentModel =
260
+ assistantMsg.model !== model.id && assistantMsg.provider === model.provider && assistantMsg.api === model.api;
261
+
262
+ for (const block of assistantMsg.content) {
263
+ if (block.type === "thinking" && assistantMsg.stopReason !== "error") {
264
+ if (!includeThinkingSignatures) {
265
+ continue;
266
+ }
267
+ if (block.thinkingSignature) {
268
+ outputItems.push(JSON.parse(block.thinkingSignature) as ResponseReasoningItem);
269
+ }
270
+ continue;
271
+ }
272
+
273
+ if (block.type === "text") {
274
+ const parsedSignature = parseTextSignature(block.textSignature);
275
+ let msgId = parsedSignature?.id;
276
+ if (!msgId) {
277
+ msgId = `msg_${msgIndex}`;
278
+ } else if (msgId.length > 64) {
279
+ msgId = `msg_${Bun.hash(msgId).toString(36)}`;
280
+ }
281
+ outputItems.push({
282
+ type: "message",
283
+ role: "assistant",
284
+ content: [{ type: "output_text", text: block.text.toWellFormed(), annotations: [] }],
285
+ status: "completed",
286
+ id: msgId,
287
+ phase: parsedSignature?.phase,
288
+ } satisfies ResponseOutputMessage);
289
+ continue;
290
+ }
291
+
292
+ if (block.type !== "toolCall") {
293
+ continue;
294
+ }
295
+
296
+ const normalized = normalizeResponsesToolCallId(block.id, block.customWireName ? "ctc" : "fc");
297
+ let itemId: string | undefined = normalized.itemId;
298
+ if (isDifferentModel && (itemId?.startsWith("fc_") || itemId?.startsWith("fcr_") || itemId?.startsWith("ctc_"))) {
299
+ itemId = undefined;
300
+ }
301
+ knownCallIds.add(normalized.callId);
302
+ if (block.customWireName) {
303
+ const rawInput = typeof block.arguments?.input === "string" ? block.arguments.input : "";
304
+ customCallIds?.add(normalized.callId);
305
+ outputItems.push({
306
+ type: "custom_tool_call",
307
+ id: itemId,
308
+ call_id: normalized.callId,
309
+ name: block.customWireName,
310
+ input: rawInput,
311
+ } as ResponseInput[number]);
312
+ continue;
313
+ }
314
+ outputItems.push({
315
+ type: "function_call",
316
+ id: itemId,
317
+ call_id: normalized.callId,
318
+ name: block.name,
319
+ arguments: JSON.stringify(block.arguments),
320
+ });
321
+ }
322
+
323
+ return outputItems;
324
+ }
325
+
326
+ export function appendResponsesToolResultMessages<TApi extends Api>(
327
+ messages: ResponseInput,
328
+ toolResult: ToolResultMessage,
329
+ model: Model<TApi>,
330
+ strictResponsesPairing: boolean,
331
+ knownCallIds: ReadonlySet<string>,
332
+ customCallIds?: ReadonlySet<string>,
333
+ ): void {
334
+ const supportsImages = model.input.includes("image");
335
+ const textResult = toolResult.content
336
+ .filter((block): block is TextContent => block.type === "text")
337
+ .map(block => block.text)
338
+ .join("\n");
339
+ const hasImages = toolResult.content.some((block): block is ImageContent => block.type === "image");
340
+ const omittedImages = hasImages && !supportsImages;
341
+ const normalized = normalizeResponsesToolCallId(toolResult.toolCallId);
342
+ if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
343
+ return;
344
+ }
345
+
346
+ const output = (
347
+ omittedImages
348
+ ? joinTextWithImagePlaceholder(textResult, true)
349
+ : textResult.length > 0
350
+ ? textResult
351
+ : "(see attached image)"
352
+ ).toWellFormed();
353
+ if (customCallIds?.has(normalized.callId)) {
354
+ messages.push({
355
+ type: "custom_tool_call_output",
356
+ call_id: normalized.callId,
357
+ output,
358
+ } as ResponseInput[number]);
359
+ } else {
360
+ messages.push({
361
+ type: "function_call_output",
362
+ call_id: normalized.callId,
363
+ output,
364
+ });
365
+ }
366
+
367
+ if (!hasImages || !supportsImages) {
368
+ return;
369
+ }
370
+
371
+ const contentParts: ResponseInputContent[] = [
372
+ { type: "input_text", text: "Attached image(s) from tool result:" } satisfies ResponseInputText,
373
+ ];
374
+ for (const block of toolResult.content) {
375
+ if (block.type === "image") {
376
+ contentParts.push({
377
+ type: "input_image",
378
+ detail: "auto",
379
+ image_url: `data:${block.mimeType};base64,${block.data}`,
380
+ } satisfies ResponseInputImage);
381
+ }
382
+ }
383
+ messages.push({ role: "user", content: contentParts });
384
+ }
385
+
386
+ export interface ProcessResponsesStreamOptions {
387
+ onFirstToken?: () => void;
388
+ onOutputItemDone?: (item: ResponseOutputItem) => void;
389
+ }
390
+
391
+ export async function processResponsesStream<TApi extends Api>(
392
+ openaiStream: AsyncIterable<OpenAI.Responses.ResponseStreamEvent>,
393
+ output: AssistantMessage,
394
+ stream: AssistantMessageEventStream,
395
+ model: Model<TApi>,
396
+ options?: ProcessResponsesStreamOptions,
397
+ ): Promise<void> {
398
+ type StreamingToolCallBlock = ToolCall & { partialJson: string; lastParseLen?: number; argumentsDone?: boolean };
399
+ interface StreamingItem {
400
+ item: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | ResponseCustomToolCall;
401
+ block: ThinkingContent | TextContent | StreamingToolCallBlock;
402
+ }
403
+
404
+ // Multiple items (parallel function_calls in particular) can be open at the same
405
+ // time. OpenAI's spec routes every per-item event by `output_index`/`item_id`;
406
+ // see https://github.com/uttamtrivedi/Prometheus/issues/1880 — llama.cpp emits parallel
407
+ // function_call deltas interleaved, and a singleton `current` reference would
408
+ // fold them into the wrong block and drop arguments on every call but the last.
409
+ const openItemsByOutputIndex = new Map<number, StreamingItem>();
410
+ const openItemsByItemId = new Map<string, StreamingItem>();
411
+ let lastOpenItem: StreamingItem | null = null;
412
+ const openItemsInOrder: StreamingItem[] = [];
413
+
414
+ const registerOpenItem = (
415
+ outputIndex: number | undefined,
416
+ itemId: string | undefined,
417
+ entry: StreamingItem,
418
+ ): void => {
419
+ if (typeof outputIndex === "number") openItemsByOutputIndex.set(outputIndex, entry);
420
+ if (itemId) openItemsByItemId.set(itemId, entry);
421
+ openItemsInOrder.push(entry);
422
+ lastOpenItem = entry;
423
+ };
424
+ const lookupOpenItem = (event: { output_index?: number; item_id?: string }): StreamingItem | undefined => {
425
+ if (typeof event.output_index === "number") {
426
+ const found = openItemsByOutputIndex.get(event.output_index);
427
+ if (found) return found;
428
+ }
429
+ if (event.item_id) {
430
+ const found = openItemsByItemId.get(event.item_id);
431
+ if (found) return found;
432
+ }
433
+ // Fallback for tests / mock providers that omit identifiers on stream events.
434
+ return lastOpenItem ?? undefined;
435
+ };
436
+ const hasOpenItemKey = (event: { output_index?: number; item_id?: string }): boolean =>
437
+ typeof event.output_index === "number" || event.item_id !== undefined;
438
+ const lookupOpenFunctionCallItem = (event: {
439
+ output_index?: number;
440
+ item_id?: string;
441
+ }): StreamingItem | undefined => {
442
+ if (hasOpenItemKey(event)) return lookupOpenItem(event);
443
+ for (const candidate of openItemsInOrder) {
444
+ if (
445
+ candidate.item.type === "function_call" &&
446
+ candidate.block.type === "toolCall" &&
447
+ !candidate.block.argumentsDone
448
+ ) {
449
+ return candidate;
450
+ }
451
+ }
452
+ return lastOpenItem?.item.type === "function_call" ? lastOpenItem : undefined;
453
+ };
454
+ const closeOpenItem = (
455
+ outputIndex: number | undefined,
456
+ itemId: string | undefined,
457
+ entry: StreamingItem | undefined,
458
+ ): void => {
459
+ if (typeof outputIndex === "number") openItemsByOutputIndex.delete(outputIndex);
460
+ if (itemId) openItemsByItemId.delete(itemId);
461
+ if (entry) {
462
+ const index = openItemsInOrder.indexOf(entry);
463
+ if (index >= 0) openItemsInOrder.splice(index, 1);
464
+ }
465
+ if (entry && lastOpenItem === entry) lastOpenItem = null;
466
+ };
467
+ const contentIndexOf = (block: ThinkingContent | TextContent | StreamingToolCallBlock): number =>
468
+ output.content.indexOf(block);
469
+
470
+ let sawFirstToken = false;
471
+
472
+ for await (const event of openaiStream) {
473
+ if (event.type === "response.created") {
474
+ output.responseId = event.response.id;
475
+ } else if (event.type === "response.output_item.added") {
476
+ if (!sawFirstToken) {
477
+ sawFirstToken = true;
478
+ options?.onFirstToken?.();
479
+ }
480
+ const item = event.item;
481
+ if (item.type === "reasoning") {
482
+ const block: ThinkingContent = { type: "thinking", thinking: "", itemId: item.id };
483
+ output.content.push(block);
484
+ registerOpenItem(event.output_index, item.id, { item, block });
485
+ stream.push({ type: "thinking_start", contentIndex: contentIndexOf(block), partial: output });
486
+ } else if (item.type === "message") {
487
+ const block: TextContent = { type: "text", text: "" };
488
+ output.content.push(block);
489
+ registerOpenItem(event.output_index, item.id, { item, block });
490
+ stream.push({ type: "text_start", contentIndex: contentIndexOf(block), partial: output });
491
+ } else if (item.type === "function_call") {
492
+ const block: StreamingToolCallBlock = {
493
+ type: "toolCall",
494
+ id: encodeResponsesToolCallId(item.call_id, item.id),
495
+ name: item.name,
496
+ arguments: {},
497
+ partialJson: item.arguments || "",
498
+ };
499
+ output.content.push(block);
500
+ registerOpenItem(event.output_index, item.id, { item, block });
501
+ stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
502
+ } else if (item.type === "custom_tool_call") {
503
+ const block: StreamingToolCallBlock = {
504
+ type: "toolCall",
505
+ id: encodeResponsesToolCallId(item.call_id, item.id),
506
+ // Preserve the raw wire name (e.g. `apply_patch`). The agent-loop
507
+ // dispatcher matches it against both `Tool.name` and
508
+ // `Tool.customWireName`, so this stays wire-accurate through
509
+ // history replay while still routing to the right handler.
510
+ name: item.name,
511
+ arguments: { input: item.input ?? "" },
512
+ customWireName: item.name,
513
+ // Custom tools stream a raw string, but we reuse `partialJson` as the
514
+ // accumulation buffer so later code that inspects the field still works.
515
+ partialJson: item.input ?? "",
516
+ };
517
+ output.content.push(block);
518
+ registerOpenItem(event.output_index, item.id, { item, block });
519
+ stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
520
+ }
521
+ } else if (event.type === "response.reasoning_summary_part.added") {
522
+ const entry = lookupOpenItem(event);
523
+ if (entry?.item.type === "reasoning") {
524
+ entry.item.summary = entry.item.summary || [];
525
+ entry.item.summary.push(event.part);
526
+ }
527
+ } else if (event.type === "response.reasoning_summary_text.delta") {
528
+ const entry = lookupOpenItem(event);
529
+ if (entry?.item.type === "reasoning" && entry.block.type === "thinking") {
530
+ entry.item.summary = entry.item.summary || [];
531
+ const lastPart = entry.item.summary[entry.item.summary.length - 1];
532
+ if (lastPart) {
533
+ entry.block.thinking += event.delta;
534
+ lastPart.text += event.delta;
535
+ stream.push({
536
+ type: "thinking_delta",
537
+ contentIndex: contentIndexOf(entry.block),
538
+ delta: event.delta,
539
+ partial: output,
540
+ });
541
+ }
542
+ }
543
+ } else if (event.type === "response.reasoning_summary_part.done") {
544
+ const entry = lookupOpenItem(event);
545
+ if (entry?.item.type === "reasoning" && entry.block.type === "thinking") {
546
+ entry.item.summary = entry.item.summary || [];
547
+ const lastPart = entry.item.summary[entry.item.summary.length - 1];
548
+ if (lastPart) {
549
+ entry.block.thinking += "\n\n";
550
+ lastPart.text += "\n\n";
551
+ stream.push({
552
+ type: "thinking_delta",
553
+ contentIndex: contentIndexOf(entry.block),
554
+ delta: "\n\n",
555
+ partial: output,
556
+ });
557
+ }
558
+ }
559
+ } else if (event.type === "response.reasoning_text.delta") {
560
+ // Raw reasoning text delta from local providers that stream thinking
561
+ // directly rather than via the OpenAI summary tracking protocol.
562
+ const entry = lookupOpenItem(event);
563
+ if (entry?.item.type === "reasoning" && entry.block.type === "thinking") {
564
+ entry.block.thinking += event.delta;
565
+ stream.push({
566
+ type: "thinking_delta",
567
+ contentIndex: contentIndexOf(entry.block),
568
+ delta: event.delta,
569
+ partial: output,
570
+ });
571
+ }
572
+ } else if (event.type === "response.content_part.added") {
573
+ const entry = lookupOpenItem(event);
574
+ if (entry?.item.type === "message") {
575
+ entry.item.content = entry.item.content || [];
576
+ if (event.part.type === "output_text" || event.part.type === "refusal") {
577
+ entry.item.content.push(event.part);
578
+ }
579
+ }
580
+ } else if (event.type === "response.output_text.delta") {
581
+ const entry = lookupOpenItem(event);
582
+ if (entry?.item.type === "message" && entry.block.type === "text") {
583
+ const lastPart = entry.item.content?.[entry.item.content.length - 1];
584
+ if (lastPart?.type === "output_text") {
585
+ entry.block.text += event.delta;
586
+ lastPart.text += event.delta;
587
+ stream.push({
588
+ type: "text_delta",
589
+ contentIndex: contentIndexOf(entry.block),
590
+ delta: event.delta,
591
+ partial: output,
592
+ });
593
+ }
594
+ }
595
+ } else if (event.type === "response.refusal.delta") {
596
+ const entry = lookupOpenItem(event);
597
+ if (entry?.item.type === "message" && entry.block.type === "text") {
598
+ const lastPart = entry.item.content?.[entry.item.content.length - 1];
599
+ if (lastPart?.type === "refusal") {
600
+ entry.block.text += event.delta;
601
+ lastPart.refusal += event.delta;
602
+ stream.push({
603
+ type: "text_delta",
604
+ contentIndex: contentIndexOf(entry.block),
605
+ delta: event.delta,
606
+ partial: output,
607
+ });
608
+ }
609
+ }
610
+ } else if (event.type === "response.function_call_arguments.delta") {
611
+ const entry = lookupOpenFunctionCallItem(event);
612
+ if (entry?.item.type === "function_call" && entry.block.type === "toolCall") {
613
+ const block = entry.block;
614
+ block.partialJson += event.delta;
615
+ const throttled = parseStreamingJsonThrottled(block.partialJson, block.lastParseLen ?? 0);
616
+ if (throttled) {
617
+ block.arguments = throttled.value;
618
+ block.lastParseLen = throttled.parsedLen;
619
+ }
620
+ stream.push({
621
+ type: "toolcall_delta",
622
+ contentIndex: contentIndexOf(block),
623
+ delta: event.delta,
624
+ partial: output,
625
+ });
626
+ }
627
+ } else if (event.type === "response.function_call_arguments.done") {
628
+ const entry = lookupOpenFunctionCallItem(event);
629
+ if (entry?.item.type === "function_call" && entry.block.type === "toolCall") {
630
+ const block = entry.block;
631
+ block.partialJson = event.arguments;
632
+ block.arguments = parseStreamingJson(block.partialJson);
633
+ block.argumentsDone = true;
634
+ delete (block as { partialJson?: string }).partialJson;
635
+ delete (block as { lastParseLen?: number }).lastParseLen;
636
+ }
637
+ } else if (event.type === "response.custom_tool_call_input.delta") {
638
+ const entry = lookupOpenItem(event);
639
+ if (entry?.item.type === "custom_tool_call" && entry.block.type === "toolCall") {
640
+ const block = entry.block;
641
+ block.partialJson += event.delta;
642
+ block.arguments = { input: block.partialJson };
643
+ stream.push({
644
+ type: "toolcall_delta",
645
+ contentIndex: contentIndexOf(block),
646
+ delta: event.delta,
647
+ partial: output,
648
+ });
649
+ }
650
+ } else if (event.type === "response.custom_tool_call_input.done") {
651
+ const entry = lookupOpenItem(event);
652
+ if (entry?.item.type === "custom_tool_call" && entry.block.type === "toolCall") {
653
+ entry.block.partialJson = event.input;
654
+ entry.block.arguments = { input: event.input };
655
+ }
656
+ } else if (event.type === "response.output_item.done") {
657
+ const item = structuredCloneJSON(event.item);
658
+ options?.onOutputItemDone?.(item);
659
+ const entry = lookupOpenItem({ output_index: event.output_index, item_id: item.id });
660
+ if (item.type === "reasoning") {
661
+ const thinking =
662
+ item.summary?.length > 0
663
+ ? item.summary.map(part => part.text).join("\n\n")
664
+ : item.content?.[0]?.type === "reasoning_text"
665
+ ? (item.content[0].text ?? "")
666
+ : "";
667
+ const reasoningBlock = output.content.find(
668
+ b => b.type === "thinking" && (b as ThinkingContent).itemId === item.id,
669
+ ) as ThinkingContent | undefined;
670
+ if (reasoningBlock) {
671
+ reasoningBlock.thinking = thinking;
672
+ reasoningBlock.thinkingSignature = JSON.stringify(item);
673
+ stream.push({
674
+ type: "thinking_end",
675
+ contentIndex: contentIndexOf(reasoningBlock),
676
+ content: thinking,
677
+ partial: output,
678
+ });
679
+ }
680
+ closeOpenItem(event.output_index, item.id, entry);
681
+ } else if (item.type === "message" && entry?.block.type === "text") {
682
+ const block = entry.block;
683
+ block.text = item.content
684
+ .map(part => (part.type === "output_text" ? (part.text ?? "") : (part.refusal ?? "")))
685
+ .join("");
686
+ block.textSignature = encodeTextSignatureV1(item.id, item.phase ?? undefined);
687
+ stream.push({
688
+ type: "text_end",
689
+ contentIndex: contentIndexOf(block),
690
+ content: block.text,
691
+ partial: output,
692
+ });
693
+ closeOpenItem(event.output_index, item.id, entry);
694
+ } else if (item.type === "function_call") {
695
+ const block = entry?.block.type === "toolCall" ? entry.block : undefined;
696
+ const args = block?.argumentsDone
697
+ ? block.arguments
698
+ : block?.partialJson
699
+ ? parseStreamingJson(block.partialJson)
700
+ : parseStreamingJson(item.arguments || "{}");
701
+ const toolCall: ToolCall = {
702
+ type: "toolCall",
703
+ id: encodeResponsesToolCallId(item.call_id, item.id),
704
+ name: item.name,
705
+ arguments: args,
706
+ };
707
+ if (block) {
708
+ // Persist the authoritative final args on the stored block. The
709
+ // throttled delta parser may have skipped the last partial parse,
710
+ // leaving block.arguments stale (often `{}`); the emitted toolCall
711
+ // and the persisted block must agree.
712
+ block.arguments = args;
713
+ delete (block as { partialJson?: string }).partialJson;
714
+ delete (block as { lastParseLen?: number }).lastParseLen;
715
+ delete (block as { argumentsDone?: boolean }).argumentsDone;
716
+ }
717
+ const contentIndex = block ? contentIndexOf(block) : output.content.length - 1;
718
+ closeOpenItem(event.output_index, item.id, entry);
719
+ stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
720
+ } else if (item.type === "custom_tool_call") {
721
+ const block = entry?.block.type === "toolCall" ? entry.block : undefined;
722
+ const rawInput = block?.partialJson ? block.partialJson : (item.input ?? "");
723
+ const toolCall: ToolCall = {
724
+ type: "toolCall",
725
+ id: encodeResponsesToolCallId(item.call_id, item.id),
726
+ name: item.name,
727
+ arguments: { input: rawInput },
728
+ customWireName: item.name,
729
+ };
730
+ const contentIndex = block ? contentIndexOf(block) : output.content.length - 1;
731
+ closeOpenItem(event.output_index, item.id, entry);
732
+ stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
733
+ }
734
+ } else if (event.type === "response.completed") {
735
+ const response = event.response;
736
+ if (response?.id) {
737
+ output.responseId = response.id;
738
+ }
739
+ populateResponsesUsageFromResponse(output, response?.usage);
740
+ calculateCost(model, output.usage);
741
+ output.stopReason = mapOpenAIResponsesStopReason(response?.status);
742
+ if (response?.status === "failed" || response?.status === "cancelled") {
743
+ const error = response?.error ?? (response as any)?.status_details?.error;
744
+ const details = response?.incomplete_details;
745
+ const statusDetailsReason = (response as any)?.status_details?.reason;
746
+ const message = error
747
+ ? `${error.code || "unknown"}: ${error.message || "no message"}`
748
+ : details?.reason
749
+ ? `incomplete: ${details.reason}`
750
+ : typeof statusDetailsReason === "string" && statusDetailsReason.length > 0
751
+ ? `status_details: ${statusDetailsReason}`
752
+ : "Unknown error (no error details in response)";
753
+ throw new Error(message);
754
+ }
755
+ if (output.content.some(block => block.type === "toolCall") && output.stopReason === "stop") {
756
+ output.stopReason = "toolUse";
757
+ }
758
+ } else if (event.type === "error") {
759
+ throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
760
+ } else if (event.type === "response.failed") {
761
+ const error = event.response?.error ?? (event.response as any)?.status_details?.error;
762
+ const details = event.response?.incomplete_details;
763
+ const message = error
764
+ ? `${error.code || "unknown"}: ${error.message || "no message"}`
765
+ : details?.reason
766
+ ? `incomplete: ${details.reason}`
767
+ : "Unknown error (no error details in response)";
768
+ throw new Error(message);
769
+ }
770
+ }
771
+ }
772
+
773
+ export function mapOpenAIResponsesStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
774
+ if (!status) return "stop";
775
+ switch (status) {
776
+ case "completed":
777
+ return "stop";
778
+ case "incomplete":
779
+ return "length";
780
+ case "failed":
781
+ case "cancelled":
782
+ return "error";
783
+ case "in_progress":
784
+ case "queued":
785
+ return "stop";
786
+ default: {
787
+ const exhaustive: never = status;
788
+ throw new Error(`Unhandled stop reason: ${exhaustive}`);
789
+ }
790
+ }
791
+ }
792
+
793
+ /** Initial empty `AssistantMessage` that streaming providers accumulate into. */
794
+ export function createInitialResponsesAssistantMessage(api: Api, provider: string, modelId: string): AssistantMessage {
795
+ return {
796
+ role: "assistant",
797
+ content: [],
798
+ api,
799
+ provider,
800
+ model: modelId,
801
+ usage: {
802
+ input: 0,
803
+ output: 0,
804
+ cacheRead: 0,
805
+ cacheWrite: 0,
806
+ totalTokens: 0,
807
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
808
+ },
809
+ stopReason: "stop",
810
+ timestamp: Date.now(),
811
+ };
812
+ }
813
+
814
+ /** Extension fields we add on top of `ResponseCreateParamsStreaming` across the Responses-family providers. */
815
+ export type ResponsesSamplingParamsExtras = {
816
+ top_p?: number;
817
+ top_k?: number;
818
+ min_p?: number;
819
+ presence_penalty?: number;
820
+ repetition_penalty?: number;
821
+ };
822
+
823
+ type CommonResponsesParams = OpenAI.Responses.ResponseCreateParamsStreaming & ResponsesSamplingParamsExtras;
824
+
825
+ type CommonSamplingOptions = Pick<
826
+ StreamOptions,
827
+ "temperature" | "topP" | "topK" | "minP" | "presencePenalty" | "repetitionPenalty" | "maxTokens"
828
+ > & { serviceTier?: ServiceTier };
829
+
830
+ /**
831
+ * Apply the common `StreamOptions` → Responses sampling-parameter mapping (max output tokens,
832
+ * temperature, top-p/k, min-p, presence/repetition penalties, service tier). Mutates `params`.
833
+ *
834
+ * `max_output_tokens` is suppressed when {@link Model.omitMaxOutputTokens} is `true`, so
835
+ * proxies (notably Ollama) that forward to upstream APIs with an unknown output-token cap
836
+ * can let the upstream apply its own default instead of 400-ing on `maxTokens` values that
837
+ * reflect the model's context window rather than the upstream output limit.
838
+ */
839
+ export function applyCommonResponsesSamplingParams<P extends CommonResponsesParams>(
840
+ params: P,
841
+ options: CommonSamplingOptions | undefined,
842
+ model: Pick<Model, "provider" | "omitMaxOutputTokens">,
843
+ ): void {
844
+ if (options?.maxTokens && !model.omitMaxOutputTokens) params.max_output_tokens = options.maxTokens;
845
+ if (options?.temperature !== undefined) params.temperature = options.temperature;
846
+ if (options?.topP !== undefined) params.top_p = options.topP;
847
+ if (options?.topK !== undefined) params.top_k = options.topK;
848
+ if (options?.minP !== undefined) params.min_p = options.minP;
849
+ if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
850
+ if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
851
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
852
+ const resolved = resolveServiceTier(options?.serviceTier, model.provider);
853
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
854
+ params.service_tier = resolved;
855
+ }
856
+ }
857
+ }
858
+
859
+ type ReasoningOptions = {
860
+ reasoning?: string;
861
+ reasoningSummary?: "auto" | "detailed" | "concise" | null;
862
+ };
863
+
864
+ /**
865
+ * Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
866
+ * set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
867
+ * Mutates `params` and may push a developer message into `messages`.
868
+ *
869
+ * @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
870
+ * body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
871
+ * xAI Grok models that return HTTP 400 on any `reasoning.effort` value (e.g. grok-build,
872
+ * grok-4.20-0309-reasoning). When `true` and `options.reasoning` is set but
873
+ * `options.reasoningSummary` is absent, `params.reasoning` is intentionally omitted from the
874
+ * wire body entirely — these models reason natively at their own internal default effort level
875
+ * without needing explicit activation. Callers that pass `options.reasoning` for such models
876
+ * should expect this documented downgrade: the model will reason, but at its default effort.
877
+ */
878
+ export function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(
879
+ params: P,
880
+ model: Model<Api>,
881
+ options: ReasoningOptions | undefined,
882
+ messages: ResponseInput,
883
+ mapEffort?: (effort: string) => string,
884
+ includeEncryptedReasoning: boolean = true,
885
+ omitReasoningEffort: boolean = false,
886
+ ): void {
887
+ if (!model.reasoning) return;
888
+ // Always request encrypted reasoning content so reasoning items can be replayed in
889
+ // multi-turn conversations when store is false (items aren't persisted server-side, so
890
+ // we must include the full content). See: https://github.com/uttamtrivedi/Prometheus/issues/41
891
+ if (includeEncryptedReasoning) {
892
+ params.include = ["reasoning.encrypted_content"];
893
+ }
894
+
895
+ if (options?.reasoning || options?.reasoningSummary !== undefined) {
896
+ // Suppress the effort dial entirely when the upstream provider rejects
897
+ // `reasoning.effort` for this model (xAI Grok models outside the
898
+ // effort-capable allowlist 400 with "Model X does not support parameter
899
+ // reasoningEffort"). Default is false to preserve existing behavior for
900
+ // every non-xAI caller.
901
+ if (omitReasoningEffort) {
902
+ // Still honor reasoningSummary when explicitly requested; xAI
903
+ // accepts the summary field on every reasoning-capable model.
904
+ // When only options.reasoning (effort level) is set, params.reasoning
905
+ // is intentionally omitted — see @param omitReasoningEffort above.
906
+ if (options?.reasoningSummary !== undefined && options?.reasoningSummary !== null) {
907
+ type ReasoningParam = NonNullable<OpenAI.Responses.ResponseCreateParamsStreaming["reasoning"]>;
908
+ params.reasoning = { summary: options.reasoningSummary || "auto" } as P["reasoning"] & ReasoningParam;
909
+ }
910
+ } else {
911
+ const requested = options?.reasoning || "medium";
912
+ type ReasoningParam = NonNullable<OpenAI.Responses.ResponseCreateParamsStreaming["reasoning"]>;
913
+ const reasoningParams: ReasoningParam = {
914
+ effort: (mapEffort ? mapEffort(requested) : requested) as ReasoningParam["effort"],
915
+ };
916
+ if (options?.reasoningSummary !== null) {
917
+ reasoningParams.summary = options?.reasoningSummary || "auto";
918
+ }
919
+ params.reasoning = reasoningParams as P["reasoning"];
920
+ }
921
+ } else if (model.name.toLowerCase().startsWith("gpt-5")) {
922
+ // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
923
+ messages.push({
924
+ role: "developer",
925
+ content: [{ type: "input_text", text: "# Juice: 0 !important" }],
926
+ });
927
+ }
928
+ }
929
+
930
+ /** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
931
+ export function populateResponsesUsageFromResponse(
932
+ output: AssistantMessage,
933
+ usage:
934
+ | {
935
+ input_tokens?: number | null;
936
+ output_tokens?: number | null;
937
+ total_tokens?: number | null;
938
+ input_tokens_details?: { cached_tokens?: number | null } | null;
939
+ output_tokens_details?: { reasoning_tokens?: number | null } | null;
940
+ }
941
+ | null
942
+ | undefined,
943
+ ): void {
944
+ if (!usage) return;
945
+ const cachedTokens = usage.input_tokens_details?.cached_tokens || 0;
946
+ const reasoningTokens = usage.output_tokens_details?.reasoning_tokens || 0;
947
+ output.usage = {
948
+ input: (usage.input_tokens || 0) - cachedTokens,
949
+ output: usage.output_tokens || 0,
950
+ cacheRead: cachedTokens,
951
+ cacheWrite: 0,
952
+ totalTokens: usage.total_tokens || 0,
953
+ ...(reasoningTokens > 0 ? { reasoningTokens } : {}),
954
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
955
+ };
956
+ }