promptfoo 0.121.4 → 0.121.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. package/dist/src/{ListApp-DQkFNqE9.js → ListApp-BRUsT43Y.js} +1 -1
  2. package/dist/src/{accounts-Dy17bs4D.cjs → accounts-BIFntVWB.cjs} +4 -4
  3. package/dist/src/{accounts-F9d_5sMC.js → accounts-CLJHCDDb.js} +6 -6
  4. package/dist/src/{accounts-DhMYUUbu.js → accounts-CaLNYnf7.js} +4 -4
  5. package/dist/src/{accounts-DdJ2pHMI.js → accounts-bnyHT7Ju.js} +5 -5
  6. package/dist/src/{agentic-utils-w68v6_Dz.js → agentic-utils-B5krlibj.js} +3 -3
  7. package/dist/src/{agentic-utils-P172hM8B.js → agentic-utils-Ba67xmgs.js} +2 -2
  8. package/dist/src/{agentic-utils-qFlm6zes.js → agentic-utils-BclbiXiq.js} +3 -3
  9. package/dist/src/{agentic-utils-BpX5b23w.cjs → agentic-utils-D2x0wGhB.cjs} +2 -2
  10. package/dist/src/{agents-CgaMXvLM.js → agents-BGqaTDnr.js} +5 -5
  11. package/dist/src/{agents-8FDnTriG.js → agents-BV9yFpXX.js} +5 -5
  12. package/dist/src/{agents-aYPQLf8W.js → agents-BYdMl1UE.js} +4 -4
  13. package/dist/src/{agents-pQeBEXMm.js → agents-DhxWMCtH.js} +5 -5
  14. package/dist/src/{agents-D7-HGxUj.cjs → agents-DiWmQYH9.cjs} +4 -4
  15. package/dist/src/{agents-BahDpe5G.cjs → agents-WULPVjbH.cjs} +4 -4
  16. package/dist/src/{agents-DJ35I3Nt.js → agents-emVcx3yh.js} +5 -5
  17. package/dist/src/{agents-C-R_jfzI.js → agents-n6vPqV3i.js} +4 -4
  18. package/dist/src/{aimlapi-BCq3MHeL.js → aimlapi-BxqK9HF_.js} +7 -7
  19. package/dist/src/{aimlapi-qcK4OT55.cjs → aimlapi-BzLjZI_m.cjs} +6 -6
  20. package/dist/src/{aimlapi-BD6J9oKt.js → aimlapi-DR4pgeiC.js} +6 -6
  21. package/dist/src/{aimlapi-sgYnkE54.js → aimlapi-uPGp0Zdo.js} +7 -7
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-vjzrbgce.js +1 -0
  24. package/dist/src/app/assets/index-B3NQ8HTd.js +385 -0
  25. package/dist/src/app/assets/{index-BXGkeMwh.css → index-Cli2yAXv.css} +1 -1
  26. package/dist/src/app/index.html +27 -2
  27. package/dist/src/{audio-DcVKoInv.js → audio-BvpTOArF.js} +4 -4
  28. package/dist/src/{audio-BQtNuYBj.cjs → audio-C0vDeS0j.cjs} +3 -3
  29. package/dist/src/{audio-B7izf48x.js → audio-CScmnmEB.js} +4 -4
  30. package/dist/src/{audio-COrn8rM6.js → audio-Da8U9IS5.js} +3 -3
  31. package/dist/src/{base-fZ9wgg50.js → base-BOMaNEes.js} +3 -3
  32. package/dist/src/{base-PYJvBE1i.js → base-BTux96b1.js} +2 -2
  33. package/dist/src/{base-D-670DX8.cjs → base-Tw6uhH8K.cjs} +2 -2
  34. package/dist/src/{base-yrI1Yal4.js → base-dYsl2hmL.js} +3 -3
  35. package/dist/src/{blobs-D2FAd1Q5.cjs → blobs-B95F_7vE.cjs} +2 -2
  36. package/dist/src/{blobs-C-F78Kfn.js → blobs-BW4U31ue.js} +2 -2
  37. package/dist/src/{blobs-BCZavS8s.js → blobs-D_gg8nbm.js} +3 -3
  38. package/dist/src/{blobs-BQWqnnvL.js → blobs-DjLby-uP.js} +3 -3
  39. package/dist/src/{cache-mb7c8hbp.js → cache-BI5BY7ey.js} +4 -4
  40. package/dist/src/{cache-DbLsVWB2.cjs → cache-BRkhlH3k.cjs} +1 -1
  41. package/dist/src/cache-BlC6aeJ0.js +3 -0
  42. package/dist/src/{cache-D5NZmMiT.js → cache-Bzttsk0X.js} +2 -2
  43. package/dist/src/{cache-C4Xb-hNb.js → cache-Cr-qWIbP.js} +3 -3
  44. package/dist/src/{cache-BIyPcp5v.cjs → cache-DGg-yTZG.cjs} +2 -2
  45. package/dist/src/{chat-Dr3DUQ0D.js → chat-BLOdH60v.js} +12 -12
  46. package/dist/src/{chat-BfPaS15_.js → chat-Cx_LkwvZ.js} +12 -12
  47. package/dist/src/{chat-mW0ORo8G.js → chat-D9nudO9b.js} +4 -4
  48. package/dist/src/{chat-I9izLm49.js → chat-DChSH_Es.js} +12 -12
  49. package/dist/src/{chat-MKxMnZJZ.js → chat-DG2LkwLq.js} +2 -2
  50. package/dist/src/{chat-BPXSW8Bv.cjs → chat-DH97tVV9.cjs} +2 -2
  51. package/dist/src/{chat-0bwXjVP0.js → chat-aMQZw6R7.js} +4 -4
  52. package/dist/src/{chat-CclRbxGf.cjs → chat-vYqqv1gP.cjs} +11 -11
  53. package/dist/src/{chatkit-zUIVoDos.js → chatkit-B8X34dQc.js} +4 -4
  54. package/dist/src/{chatkit-Cv6AhukM.js → chatkit-BXu42Qwt.js} +3 -3
  55. package/dist/src/{chatkit-CJnHRRMM.js → chatkit-CbMRoeYw.js} +4 -4
  56. package/dist/src/{chatkit-BoWoSgXl.cjs → chatkit-D44VyUyB.cjs} +3 -3
  57. package/dist/src/{claude-agent-sdk-CPJo3dBQ.cjs → claude-agent-sdk-BRq0bbIK.cjs} +8 -8
  58. package/dist/src/{claude-agent-sdk-BQNuLaAK.js → claude-agent-sdk-BjriSVRZ.js} +7 -7
  59. package/dist/src/{claude-agent-sdk-Dtq_L-Sc.js → claude-agent-sdk-BzNZeZ0N.js} +7 -7
  60. package/dist/src/{claude-agent-sdk-nfAIcxNf.js → claude-agent-sdk-DYv_AJ8u.js} +7 -7
  61. package/dist/src/cloud-CoD5OacT.js +3 -0
  62. package/dist/src/{cloud-DQZ5sVjW.js → cloud-Da0bofJd.js} +3 -3
  63. package/dist/src/{cloudflare-ai-BIB567w6.js → cloudflare-ai-CXC4b1EU.js} +4 -4
  64. package/dist/src/{cloudflare-ai-DlKr0rY7.js → cloudflare-ai-CyBoIs1Q.js} +6 -6
  65. package/dist/src/{cloudflare-ai-DGLte7Py.js → cloudflare-ai-DGOwgexC.js} +6 -6
  66. package/dist/src/{cloudflare-ai-Dl3N9OVD.cjs → cloudflare-ai-DJv5qnyb.cjs} +4 -4
  67. package/dist/src/{cloudflare-gateway-BDZrYydE.js → cloudflare-gateway-1sAoOyft.js} +5 -5
  68. package/dist/src/{cloudflare-gateway-CiIZHU0Q.js → cloudflare-gateway-D-dnkzCF.js} +5 -5
  69. package/dist/src/{cloudflare-gateway-BYDp495F.cjs → cloudflare-gateway-DKVjkDav.cjs} +3 -3
  70. package/dist/src/{cloudflare-gateway-DI1HNP5F.js → cloudflare-gateway-TJkVrZlB.js} +3 -3
  71. package/dist/src/codex-app-server-CCLjqCh9.js +1915 -0
  72. package/dist/src/codex-app-server-CCe0TiDc.js +1915 -0
  73. package/dist/src/codex-app-server-CPW1LFwh.js +1916 -0
  74. package/dist/src/codex-app-server-VMRnjZ68.cjs +1920 -0
  75. package/dist/src/codex-sdk-1jm_qPHf.js +3 -0
  76. package/dist/src/{codex-sdk-C2_M2pl_.cjs → codex-sdk-Bd8UbO9q.cjs} +5 -5
  77. package/dist/src/{codex-sdk-CpqiOqDO.js → codex-sdk-BgEFQ70r.js} +6 -6
  78. package/dist/src/{codex-sdk-Rtky3M4I.js → codex-sdk-Bzb_TqX9.js} +6 -6
  79. package/dist/src/{codex-sdk-CWEnH70W.cjs → codex-sdk-Danroptg.cjs} +1 -1
  80. package/dist/src/{codex-sdk-CErXn7qh.js → codex-sdk-DfvDTN33.js} +5 -5
  81. package/dist/src/{cometapi-CtJ-mS8R.js → cometapi-B5ImDlSm.js} +8 -8
  82. package/dist/src/{cometapi-UVOryo4W.cjs → cometapi-BgAkuYCw.cjs} +7 -7
  83. package/dist/src/{cometapi-BUlt_ELa.js → cometapi-CC7hWxmX.js} +8 -8
  84. package/dist/src/{cometapi-DT-jlVCB.js → cometapi-CCbpHkuF.js} +7 -7
  85. package/dist/src/{completion-x0a_c2y1.js → completion-2iuYVxwi.js} +6 -6
  86. package/dist/src/{completion-Dnxn7E-j.js → completion-CrD6MQ93.js} +5 -5
  87. package/dist/src/{completion-BozdoXba.cjs → completion-DtQ72Bm3.cjs} +5 -5
  88. package/dist/src/{completion-HUe8wDhZ.js → completion-Vq_ad618.js} +6 -6
  89. package/dist/src/{createHash-ChI45QR1.js → createHash-DPpsZgFF.js} +1 -1
  90. package/dist/src/{createHash-CwDVU5xr.js → createHash-Un4Q_huE.js} +1 -1
  91. package/dist/src/{createHash-B7KvgoOD.cjs → createHash-VvBIc-AW.cjs} +1 -1
  92. package/dist/src/{docker-DCgsveLD.js → docker--3qzPa-6.js} +6 -6
  93. package/dist/src/{docker-DS4_Osau.cjs → docker-D3AY-5F5.cjs} +5 -5
  94. package/dist/src/{docker-CQmlA2NU.js → docker-DCsCDvwM.js} +6 -6
  95. package/dist/src/{docker-ClnmCf1Z.js → docker-Dorv4_Dg.js} +5 -5
  96. package/dist/src/{embedding-I45KG3o7.cjs → embedding-BXhN5lCH.cjs} +5 -5
  97. package/dist/src/{embedding-nFbumxcv.js → embedding-ChS1ivFS.js} +5 -5
  98. package/dist/src/{embedding-D3xTseo7.js → embedding-DNRvZwRN.js} +6 -6
  99. package/dist/src/{embedding-DD9wa3ae.js → embedding-D_bI4NDq.js} +6 -6
  100. package/dist/src/{errors-Cw810C93.js → errors-DFHe4L-n.js} +1 -1
  101. package/dist/src/{esm-Dh4dOLlt.js → esm-B6whoAcf.js} +2 -2
  102. package/dist/src/{esm-C7PnfdF8.js → esm-BRkfNsYs.js} +1 -1
  103. package/dist/src/{esm-tVgYPY-f.js → esm-BX8fwlAO.js} +2 -2
  104. package/dist/src/{esm-CtEPLdAj.cjs → esm-B_rGuPTo.cjs} +1 -1
  105. package/dist/src/{eval-CzJFfFO9.js → eval-BQPLBJbw.js} +1 -1
  106. package/dist/src/{eval-u4UVafl6.js → eval-DJ_4A-tr.js} +14 -14
  107. package/dist/src/evalResult-BBJAHAtw.cjs +2 -0
  108. package/dist/src/evalResult-BBK58h2B.js +3 -0
  109. package/dist/src/{evalResult-KZqXl4XP.cjs → evalResult-Cx-8OWkb.cjs} +28 -10
  110. package/dist/src/{evalResult-D3hVYFis.js → evalResult-D6P5I5il.js} +29 -11
  111. package/dist/src/{evalResult-Bgm9ZH31.js → evalResult-pSvGWFMo.js} +29 -11
  112. package/dist/src/{evaluator-IvuDYSvQ.js → evaluator-D-UIbbYq.js} +845 -98
  113. package/dist/src/evaluator-DgLKaZk8.js +3 -0
  114. package/dist/src/{extractor-Dk6bRWkv.js → extractor-BM3jRERL.js} +5 -5
  115. package/dist/src/{extractor-WVPOrH43.cjs → extractor-Dxr2J_wK.cjs} +5 -5
  116. package/dist/src/{extractor-DNSeBVOJ.js → extractor-DxyiFhPk.js} +6 -6
  117. package/dist/src/{extractor-CAfTSraf.js → extractor-YlZbUMsL.js} +6 -6
  118. package/dist/src/fetch-8viavNv8.js +3 -0
  119. package/dist/src/{fetch-BEWnXrrG.js → fetch-B6ch2nU2.js} +9 -20
  120. package/dist/src/{fetch-Di00EQrc.js → fetch-D9xxyC1p.js} +221 -232
  121. package/dist/src/{fetch-CJU5ELPa.cjs → fetch-NuqXW1Xb.cjs} +221 -244
  122. package/dist/src/{fetch-B0Z3Oe4k.js → fetch-Y5qX_kST.js} +8 -19
  123. package/dist/src/{fileExtensions-BArZuxsI.js → fileExtensions-8CjoL7vB.js} +1 -1
  124. package/dist/src/{fileExtensions-DnqA1y9x.js → fileExtensions-BGh-W-HT.js} +1 -1
  125. package/dist/src/{fileExtensions-bYh77CN8.cjs → fileExtensions-D9h-8Wxg.cjs} +1 -1
  126. package/dist/src/{fileExtensions-AWa2ZML4.js → fileExtensions-DysCsxNG.js} +1 -1
  127. package/dist/src/{formatDuration-DZzPsexs.js → formatDuration-Ch4A7G3o.js} +1 -1
  128. package/dist/src/{genaiTracer-yRuxj9-L.cjs → genaiTracer-BokHC-MW.cjs} +1 -1
  129. package/dist/src/{genaiTracer-DWdZ28hY.js → genaiTracer-C3ZPQU60.js} +1 -1
  130. package/dist/src/{genaiTracer-XnrcgDCe.js → genaiTracer-CFny3gOy.js} +1 -1
  131. package/dist/src/{genaiTracer-COYDi-tC.js → genaiTracer-DxODqT9e.js} +1 -1
  132. package/dist/src/{graders-Zy3x0zqX.js → graders-BoUqsCEm.js} +1303 -2044
  133. package/dist/src/{graders--zknU_uk.cjs → graders-Bw1wk_21.cjs} +1553 -2240
  134. package/dist/src/graders-C84JI-m5.js +2 -0
  135. package/dist/src/graders-CBbd0K0Q.cjs +2 -0
  136. package/dist/src/graders-CbQqpHSN.js +3 -0
  137. package/dist/src/{graders-eIHhRqoC.js → graders-CgPn32yp.js} +1300 -2041
  138. package/dist/src/{graders-pvbReLLn.js → graders-CwrbifOo.js} +747 -1488
  139. package/dist/src/graders-DS42d3ZG.js +2 -0
  140. package/dist/src/{image-9302QVqR.js → image-BeWaInPF.js} +3 -3
  141. package/dist/src/{image-DVz2RiMF.js → image-BmilRNqO.js} +7 -7
  142. package/dist/src/{image-x6KqLQl4.cjs → image-CxJoa3aW.cjs} +6 -6
  143. package/dist/src/{image-De2FBmYV.cjs → image-D10dNAav.cjs} +3 -3
  144. package/dist/src/{image-dnoUgPrC.js → image-Dr_3I3nK.js} +4 -4
  145. package/dist/src/{image-B5Mv-Z3h.js → image-DsGRlkh7.js} +7 -7
  146. package/dist/src/{image-qUpPvmNZ.js → image-a_SGUobh.js} +6 -6
  147. package/dist/src/{image-u7-rKnYU.js → image-qjO6FWPs.js} +4 -4
  148. package/dist/src/index.cjs +1052 -296
  149. package/dist/src/index.d.cts +124 -13
  150. package/dist/src/index.d.ts +125 -14
  151. package/dist/src/index.js +1018 -262
  152. package/dist/src/{interactiveCheck-CLERUB0c.js → interactiveCheck-CCICw2cy.js} +2 -2
  153. package/dist/src/{invariant-BtWWVVhl.js → invariant-B2Rf6avk.js} +1 -1
  154. package/dist/src/{invariant-vgHWClmd.js → invariant-DIYf9sP1.js} +1 -1
  155. package/dist/src/{knowledgeBase-RhFPGWDc.js → knowledgeBase-BBETc5-S.js} +6 -6
  156. package/dist/src/{knowledgeBase-Bpoe_nLu.cjs → knowledgeBase-C8qOo26M.cjs} +5 -5
  157. package/dist/src/{knowledgeBase-lm9RXSAm.js → knowledgeBase-CzAi2rUI.js} +6 -6
  158. package/dist/src/{knowledgeBase-Dgc7CBWF.js → knowledgeBase-Dr3Kib7F.js} +5 -5
  159. package/dist/src/{litellm-C2kqjxqp.js → litellm-BLSiANhk.js} +5 -5
  160. package/dist/src/{litellm-CoyI4IAl.cjs → litellm-CaUmV7Mk.cjs} +4 -4
  161. package/dist/src/{litellm-p37R1dzQ.js → litellm-DQGo_juI.js} +4 -4
  162. package/dist/src/{litellm-DRjpcSa7.js → litellm-DRc4qWfc.js} +5 -5
  163. package/dist/src/{logger-DksKw1Qc.js → logger-BbY6ypFL.js} +2 -2
  164. package/dist/src/{logger-B88EkIn6.js → logger-KD8JjCRJ.js} +2 -2
  165. package/dist/src/{luma-ray-KgTCXrZC.js → luma-ray-B-tNZzqW.js} +6 -6
  166. package/dist/src/{luma-ray-B863CmuZ.js → luma-ray-CtS3OlGq.js} +5 -5
  167. package/dist/src/{luma-ray-BTTLtqQ8.js → luma-ray-PJJgUjOc.js} +6 -6
  168. package/dist/src/{luma-ray-BxVKaW2a.cjs → luma-ray-if-Ml4R9.cjs} +5 -5
  169. package/dist/src/main.js +242 -198
  170. package/dist/src/{messages-zWbkLLHz.js → messages-B9dSjrNf.js} +264 -16
  171. package/dist/src/{messages-811uVVW5.cjs → messages-BnsVHUnm.cjs} +266 -15
  172. package/dist/src/{messages-MYTQ2TWp.js → messages-CI69Lasb.js} +264 -16
  173. package/dist/src/{messages-BTQz42fn.js → messages-CewuNcNS.js} +264 -16
  174. package/dist/src/{meteor-Co1VQ1u5.cjs → meteor-BBGcGeCa.cjs} +1 -1
  175. package/dist/src/{meteor-DuAFv6gF.js → meteor-BKTM-7KS.js} +1 -1
  176. package/dist/src/{meteor-DHdzY1Ss.js → meteor-CeGo0Lu2.js} +2 -2
  177. package/dist/src/{meteor-CU5UAE-H.js → meteor-Wc_aUVvu.js} +2 -2
  178. package/dist/src/{modelslab-wu9yi5GE.js → modelslab-BCLOtfek.js} +7 -7
  179. package/dist/src/{modelslab-Dk1JAtVo.cjs → modelslab-BkapYJhh.cjs} +6 -6
  180. package/dist/src/{modelslab-DIq-6y7x.js → modelslab-D73OnKSx.js} +6 -6
  181. package/dist/src/{modelslab-D0erNWKe.js → modelslab-zpz9JcK0.js} +7 -7
  182. package/dist/src/{nova-reel-CCFRfeRb.js → nova-reel-B8F_TK5w.js} +6 -6
  183. package/dist/src/{nova-reel-DQrm74ng.js → nova-reel-Bx0NFV2f.js} +5 -5
  184. package/dist/src/{nova-reel-gr11WG7f.js → nova-reel-CNGJTLtG.js} +6 -6
  185. package/dist/src/{nova-reel-CrLXVKQf.cjs → nova-reel-DkT7tnoB.cjs} +5 -5
  186. package/dist/src/{nova-sonic-BYdp-QLs.js → nova-sonic-BaXRN1cr.js} +4 -4
  187. package/dist/src/{nova-sonic-TDgrlTk7.js → nova-sonic-BeTRaFOh.js} +4 -4
  188. package/dist/src/{nova-sonic-B_ZXcUJB.js → nova-sonic-CL7Zqv0G.js} +3 -3
  189. package/dist/src/{nova-sonic-i5tUvXKn.cjs → nova-sonic-YT426juD.cjs} +3 -3
  190. package/dist/src/{openai-DhVEmgeZ.js → openai-BMHD2Huo.js} +2 -2
  191. package/dist/src/{openai-Qsvz25mV.js → openai-BT-JvDse.js} +2 -2
  192. package/dist/src/{openai-URNyItar.cjs → openai-Cy1XLs0c.cjs} +1 -1
  193. package/dist/src/{openai-iYtrXzOX.js → openai-D4fxGvRx.js} +1 -1
  194. package/dist/src/{openclaw-CwzlQSQX.js → openclaw-Bq7RVR3k.js} +7 -6
  195. package/dist/src/{openclaw-CLWrW03k.js → openclaw-DA8U4DsD.js} +8 -7
  196. package/dist/src/{openclaw-CnQ363Wi.js → openclaw-DObVgpjC.js} +8 -7
  197. package/dist/src/{openclaw-wX9rtfke.cjs → openclaw-DUBZP3GL.cjs} +8 -7
  198. package/dist/src/{opencode-sdk-BUu5Nevv.js → opencode-sdk-BB40Wir1.js} +4 -4
  199. package/dist/src/{opencode-sdk-GI2KaAXq.js → opencode-sdk-BM1UAIv1.js} +3 -3
  200. package/dist/src/{opencode-sdk-BZ2idgYA.cjs → opencode-sdk-CeqiOcOU.cjs} +4 -4
  201. package/dist/src/{opencode-sdk-BxD8vXp_.js → opencode-sdk-ChdK7F7z.js} +4 -4
  202. package/dist/src/{otlpReceiver-DmVulbhC.js → otlpReceiver-C6thJRXi.js} +4 -4
  203. package/dist/src/{otlpReceiver-B2z58l4e.js → otlpReceiver-CcdIikOu.js} +3 -3
  204. package/dist/src/{otlpReceiver-BfcVq2Nq.cjs → otlpReceiver-DNSQj6bf.cjs} +3 -3
  205. package/dist/src/{otlpReceiver-BntK801g.js → otlpReceiver-UYMQx3sy.js} +4 -4
  206. package/dist/src/{providerRegistry-CPQ_CmVO.js → providerRegistry-1gB5vtzQ.js} +2 -2
  207. package/dist/src/{providerRegistry-CQMdTmHP.cjs → providerRegistry-BESeALrr.cjs} +1 -1
  208. package/dist/src/{providerRegistry-Bvh8mv85.js → providerRegistry-DoACwqhD.js} +1 -1
  209. package/dist/src/{providerRegistry-CWoPjKFZ.js → providerRegistry-PMsleEzs.js} +2 -2
  210. package/dist/src/{providers-Bp4S-FvO.js → providers-BuyzKt7C.js} +1 -1
  211. package/dist/src/{providers-DV3ax9e_.cjs → providers-C7lNVBjX.cjs} +1 -1
  212. package/dist/src/{providers-u9Enmfok.js → providers-CCE2COJi2.js} +1 -1
  213. package/dist/src/{providers-DruaQfwu.js → providers-CJh7iriU.js} +18103 -17952
  214. package/dist/src/{providers-iUt5fbAN.js → providers-Ctcc592x.js} +1 -1
  215. package/dist/src/{providers-Domz_llv.js → providers-DRrerKra.js} +432 -281
  216. package/dist/src/{providers-BV_KMZje.js → providers-DT-GtF2t.js} +19094 -18943
  217. package/dist/src/{providers-1eKkXBKp.cjs → providers-eDShy16E.cjs} +17946 -17795
  218. package/dist/src/{pythonUtils-Cldx7huE.js → pythonUtils-C4tltmIn.js} +3 -3
  219. package/dist/src/{pythonUtils-tAJvvpS-.cjs → pythonUtils-CoLaCwNY.cjs} +3 -3
  220. package/dist/src/{pythonUtils-C2UQ30Rz.js → pythonUtils-DMO68Jg7.js} +3 -3
  221. package/dist/src/{pythonUtils-CnndUbW-.js → pythonUtils-DNqbnRdx.js} +3 -3
  222. package/dist/src/{quiverai-DR0SnIQV.js → quiverai-BSS9a7wV.js} +3 -3
  223. package/dist/src/{quiverai-CtWi6x_g.js → quiverai-Bk1KrvL6.js} +4 -4
  224. package/dist/src/{quiverai-DFotyafY.cjs → quiverai-Bpx6MZ7T.cjs} +3 -3
  225. package/dist/src/{quiverai-aPPvXOgn.js → quiverai-CPKhWgaT.js} +4 -4
  226. package/dist/src/{render-DHIZ6_k8.js → render-7uNJ2V14.js} +2 -2
  227. package/dist/src/{render-CH-62LbA.js → render-DlscvAUJ.js} +1 -1
  228. package/dist/src/{render-CMEpfLaO.js → render-eui5p5mL.js} +2 -2
  229. package/dist/src/{render-CgVDrJmM.js → render-nj-UaPdn.js} +2 -2
  230. package/dist/src/{render-DfQSFxGE.cjs → render-tG6ir9_g.cjs} +1 -1
  231. package/dist/src/{responses--OsX2aYW.js → responses-1ztiVYsx.js} +49 -15
  232. package/dist/src/{responses-DL9m8CyY.js → responses-B8haB-mD.js} +49 -15
  233. package/dist/src/{responses-C-flexAY.js → responses-BiaBguAu.js} +49 -15
  234. package/dist/src/{responses-Bi9vBuW_.cjs → responses-CF-ayauu.cjs} +48 -14
  235. package/dist/src/rubyUtils-4hjGxvju.js +3 -0
  236. package/dist/src/{rubyUtils-DVLeA2jg.js → rubyUtils-BI0p46eZ.js} +3 -3
  237. package/dist/src/{rubyUtils-DsGrTx8R.js → rubyUtils-CIQFnVz4.js} +3 -3
  238. package/dist/src/rubyUtils-CO-tuszQ.cjs +2 -0
  239. package/dist/src/{rubyUtils-CYSQEG4a.js → rubyUtils-DGnoCYL2.js} +3 -3
  240. package/dist/src/{rubyUtils-B6eljPuh.cjs → rubyUtils-DoifqkiA.cjs} +4 -3
  241. package/dist/src/{sagemaker-BveBvuxm.js → sagemaker-BDLeW29y.js} +12 -12
  242. package/dist/src/{sagemaker-D67yzMzs.js → sagemaker-C5T60MKf.js} +13 -13
  243. package/dist/src/{sagemaker-BVkaG2-l.js → sagemaker-ClS_NB07.js} +13 -13
  244. package/dist/src/{sagemaker-XnfhheQv.cjs → sagemaker-ljtY12VM.cjs} +12 -12
  245. package/dist/src/{scanner-1DqWi1Ej.js → scanner-nOCWNIXa.js} +7 -7
  246. package/dist/src/server/index.js +1067 -265
  247. package/dist/src/{server-Dx2TyCH2.cjs → server-BEECpeGG.cjs} +5 -5
  248. package/dist/src/{server-BNYztJkh.js → server-ByiF3qlg.js} +9 -8
  249. package/dist/src/{server-BSB45Nt9.js → server-ByxbqAcQ.js} +8 -7
  250. package/dist/src/{server-DaA2eR26.cjs → server-C0XKRNB_.cjs} +1 -1
  251. package/dist/src/server-C_15p79-.js +3 -0
  252. package/dist/src/{server-D6Il2Sob.js → server-gyd6d4Hc.js} +5 -5
  253. package/dist/src/{signal-CE5G3a7x.js → signal-DTtUuU3l.js} +3 -3
  254. package/dist/src/{slack-acRb0IqQ.js → slack-4zZX1OKP.js} +1 -1
  255. package/dist/src/{slack-1Rhq0EoV.cjs → slack-BLlsDpfG.cjs} +1 -1
  256. package/dist/src/{slack-D5Wpy8LM.js → slack-BPYLQLgb.js} +2 -2
  257. package/dist/src/{slack-DDUe-5MC.js → slack-Bamy_7te.js} +2 -2
  258. package/dist/src/{store-DAAyxcy6.cjs → store-2K0kDi80.cjs} +2 -2
  259. package/dist/src/{store-Dn9HUkdW.js → store-2OXm_eBY.js} +3 -3
  260. package/dist/src/store-BELqNwvz.js +3 -0
  261. package/dist/src/{store-M0b1WfYb.js → store-BPkzEyFM.js} +2 -2
  262. package/dist/src/{store-CYEy5J2D.js → store-CPh25336.js} +3 -3
  263. package/dist/src/store-uQZ4AjPe.cjs +2 -0
  264. package/dist/src/{tables-CsWou1Bx.js → tables-BMSOS2Gg.js} +3 -3
  265. package/dist/src/{tables-DUfh1F7Z.cjs → tables-CXbaZ9y1.cjs} +2 -2
  266. package/dist/src/{tables-C4CH3zRr.js → tables-NlvH23ky.js} +3 -3
  267. package/dist/src/{tables-DQ4WU5tX.js → tables-WgdUZ8Ck.js} +2 -2
  268. package/dist/src/{telemetry-dbaJ0E98.js → telemetry--iqaGyaS.js} +5 -4
  269. package/dist/src/{telemetry-Dsw_faFj.cjs → telemetry-CEQxGnMZ.cjs} +7 -6
  270. package/dist/src/{telemetry-Dvqxv3YC.js → telemetry-CgdVGV8N.js} +4 -3
  271. package/dist/src/{telemetry-CQPez_Jp.js → telemetry-DWdGHvEf.js} +5 -4
  272. package/dist/src/telemetry-DjNoC_n3.cjs +2 -0
  273. package/dist/src/telemetry-ZdPZc0fm.js +3 -0
  274. package/dist/src/{text-BVi-cLPJ.cjs → text-BiNME7QG.cjs} +1 -1
  275. package/dist/src/{text-KvuD2Iko.js → text-D4lz-Jg_.js} +1 -1
  276. package/dist/src/{text-DHxdyQqT.js → text-DDQP0tuQ.js} +1 -1
  277. package/dist/src/{text-CZr46tp_.js → text-NWvfMfkF.js} +1 -1
  278. package/dist/src/{tokenUsageUtils-CXrvO-wA.js → tokenUsageUtils-2wIvAhB3.js} +1 -1
  279. package/dist/src/{tokenUsageUtils-C-bmyHoE.js → tokenUsageUtils-4c780gFd.js} +1 -1
  280. package/dist/src/tokenUsageUtils-BjVkdk18.js +142 -0
  281. package/dist/src/{tokenUsageUtils-Bb7DkZPz.cjs → tokenUsageUtils-C9odhsbW.cjs} +1 -1
  282. package/dist/src/{transcription-DuWDupG7.js → transcription-84t4ALo2.js} +5 -5
  283. package/dist/src/{transcription-CJspiD2c.js → transcription-Bm2emLmJ.js} +6 -6
  284. package/dist/src/{transcription-BvjmiYB1.cjs → transcription-CZ4LG5hQ.cjs} +5 -5
  285. package/dist/src/{transcription-V2HaAmy2.js → transcription-D7Q0vJsh.js} +6 -6
  286. package/dist/src/{transform-zDhMmzwX.js → transform-B-b6Cq-q.js} +5 -5
  287. package/dist/src/transform-BQt0BeAW.js +3 -0
  288. package/dist/src/{transform-DgKlRr73.cjs → transform-Bq5oqC0s.cjs} +1 -1
  289. package/dist/src/{transform-CUnzlsbn.cjs → transform-C9izGX54.cjs} +4 -4
  290. package/dist/src/{transform-DYX1_Xnh.js → transform-CwbAZ84V.js} +5 -5
  291. package/dist/src/{transform-CTeuTR3S.cjs → transform-Dg4LcO1Y.cjs} +6 -6
  292. package/dist/src/{transform-CG0ehZNG.js → transform-DtooZqYY.js} +6 -6
  293. package/dist/src/{transform-UN5UGu8U.js → transform-DzCF-wqV.js} +5 -5
  294. package/dist/src/{transform-lQrDE1BQ.js → transform-_DpNB4qp.js} +5 -5
  295. package/dist/src/{transform-Bbg6A8Jk.js → transform-eGiUAv86.js} +4 -4
  296. package/dist/src/{transformersAvailability-Cju9mHgR.cjs → transformersAvailability-B22swDxr.cjs} +1 -1
  297. package/dist/src/{transformersAvailability-CcHusyhw.js → transformersAvailability-lvCCvuPT.js} +1 -1
  298. package/dist/src/{transformersAvailability-DLlROWhg.js → transformersAvailability-rJGPccjr.js} +1 -1
  299. package/dist/src/{types-Bgh5SOn6.js → types-BDjGOq4E.js} +4 -2
  300. package/dist/src/{types-Dm9JM6Vb.js → types-BVH9hjgW.js} +4 -2
  301. package/dist/src/{types-CeaeaZdP.cjs → types-CgG2rKiW.cjs} +151 -149
  302. package/dist/src/{types-BGQDAP8i.js → types-DNRZVOue.js} +152 -150
  303. package/dist/src/{util-C8e5uydV.js → util-3pBZZb_H.js} +142 -17
  304. package/dist/src/{util-CN3SrLT4.cjs → util-A5_ZsQUn.cjs} +65 -43
  305. package/dist/src/{util-D3q0WQ-0.js → util-B9CNhyac.js} +66 -44
  306. package/dist/src/{util-DxWpWjhc.js → util-BQOCAHQC.js} +700 -575
  307. package/dist/src/{util-BYvQUPp7.js → util-BVXcTwXu.js} +3 -3
  308. package/dist/src/{util-D9TisOyk.js → util-BlFVL0UF.js} +65 -43
  309. package/dist/src/{util-C9J8ahRn.js → util-C-kmRosx.js} +66 -44
  310. package/dist/src/{util-DvU2Pw8c.js → util-DFPeFkiV.js} +3 -3
  311. package/dist/src/{util-DDs-7g6-.js → util-DN0-b81k.js} +3 -3
  312. package/dist/src/{util-olYL5C6N.cjs → util-Dpmm_dAI.cjs} +3 -3
  313. package/dist/src/{util-oGMLA7vc.js → util-Dub0f_ej.js} +700 -575
  314. package/dist/src/{util-Bxn8emtE.cjs → util-DvpHnLt0.cjs} +718 -570
  315. package/dist/src/{utils-DJfvjyMj.js → utils-BUMN8orw.js} +3 -3
  316. package/dist/src/{utils-B05gLxER.cjs → utils-DkVeShIB.cjs} +2 -2
  317. package/dist/src/{utils-BLJKfv0y.js → utils-kt7lv30R.js} +3 -3
  318. package/dist/src/{utils-hXtCYanr.js → utils-o8S5huU2.js} +2 -2
  319. package/dist/src/version-0frU0UTr.js +16 -0
  320. package/dist/src/version-CbpiUINz.js +17 -0
  321. package/dist/src/version-CbuBKu2U.js +16 -0
  322. package/dist/src/version-D9zu9FWB.cjs +27 -0
  323. package/dist/tsconfig.tsbuildinfo +1 -1
  324. package/package.json +22 -20
  325. package/dist/src/app/assets/Report-CQYFezYu.js +0 -1
  326. package/dist/src/app/assets/index-BzJt18Jz.js +0 -385
  327. package/dist/src/cache-Cr9oLMUa.js +0 -3
  328. package/dist/src/cloud-Hphvo8kr.js +0 -3
  329. package/dist/src/codex-sdk-BAmYE7qy.js +0 -3
  330. package/dist/src/evalResult-D8MT9p0s.js +0 -3
  331. package/dist/src/evalResult-Dvc-iucu.cjs +0 -2
  332. package/dist/src/evaluator-CVessDWe.js +0 -3
  333. package/dist/src/fetch-C7bGKDlQ.js +0 -3
  334. package/dist/src/graders-BOAzQEUe.cjs +0 -2
  335. package/dist/src/graders-D4BTsZdG2.js +0 -3
  336. package/dist/src/graders-DOJK1XpV.js +0 -2
  337. package/dist/src/graders-NAv9LcBn.js +0 -2
  338. package/dist/src/rubyUtils-D1L2d3jb.js +0 -3
  339. package/dist/src/rubyUtils-DUbq4tff.cjs +0 -2
  340. package/dist/src/server-DCtHUqlp.js +0 -3
  341. package/dist/src/store-CWOSz6D_.cjs +0 -2
  342. package/dist/src/store-DCDBhv7B.js +0 -3
  343. package/dist/src/telemetry-C1IqxcdW.js +0 -3
  344. package/dist/src/telemetry-C4ZEa_es.cjs +0 -2
  345. package/dist/src/transform-M6ITAESf.js +0 -3
  346. /package/dist/src/{evalResult-DElBuddX.js → evalResult-spPqh1G_.js} +0 -0
package/dist/src/main.js CHANGED
@@ -1,34 +1,35 @@
1
1
  #!/usr/bin/env node
2
- import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-DksKw1Qc.js";
3
- import { A as getDefaultPort, F as VERSION, L as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, T as CLOUD_PROVIDER_PREFIX, i as fetchWithTimeout, j as getDefaultShareViewBaseUrl, k as TERMINAL_MAX_WIDTH, l as sleep, m as REQUEST_TIMEOUT_MS, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, v as isPromptfooSampleTarget } from "./fetch-BEWnXrrG.js";
4
- import { t as invariant } from "./invariant-BtWWVVhl.js";
5
- import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-F9d_5sMC.js";
6
- import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-DQZ5sVjW.js";
7
- import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-CQPez_Jp.js";
8
- import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-Dm9JM6Vb.js";
9
- import { r as importModule, t as getDirectory } from "./esm-tVgYPY-f.js";
10
- import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-CMEpfLaO.js";
11
- import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-BNYztJkh.js";
12
- import { A as renderPrompt, B as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, C as extractVariablesFromJson, F as TokenUsageTracker, H as getMaxCharsPerMessageModifierValue, P as redteamProviderManager, S as extractPromptFromTags, T as getShortPluginId, V as getGeneratedPromptOverLimit, _ as pluginMatchesStrategyTargets, _t as getUserTeams, a as resolveProviderConfigs, b as extractGoalFromPrompt, bt as resolveTeamFromIdentifier, c as createTransformRequest, ct as canCreateTargets, d as Strategies, dt as getConfigFromCloud, f as loadStrategy, ft as getEvalConfigFromCloud, g as retrieveMedia, gt as getProviderFromCloud, h as mediaExists, i as resolveProvider, l as createTransformResponse, lt as checkCloudPermissions, m as getMediaStorage, mt as getPluginSeverityOverridesFromCloud, n as loadApiProvider, o as MCPProvider, p as validateStrategies, pt as getOrgContext, r as loadApiProviders, s as HttpProvider, t as getProviderIds, ut as getCloudDatabaseId, vt as isCloudProvider, xt as resolveTeamId, yt as makeRequest$1, z as PromptfooHarmfulCompletionProvider } from "./providers-BV_KMZje.js";
13
- import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-mb7c8hbp.js";
14
- import { i as isJavascriptFile } from "./fileExtensions-AWa2ZML4.js";
15
- import { A as getProviderDescription, C as deduplicateTestCases, D as resultIsForTestCase, E as getTestCaseDeduplicationKey, O as checkProviderApiKeys, S as setupEnv, T as filterRuntimeVars, a as ComparisonEvalNotFoundError, c as mergeComparisonTables, f as maybeLoadConfigFromExternalFile, i as writeOutput, k as doesProviderRefMatch, l as fetchCsvFromGoogleSheet, n as createOutputMetadata, o as evalTableToJson, p as maybeLoadFromExternalFile, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, v as readFilters, w as extractRuntimeVars, y as readOutput } from "./util-DxWpWjhc.js";
16
- import { r as runPython } from "./pythonUtils-CnndUbW-.js";
17
- import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, dt as processPrompts, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, ft as readPrompts, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, lt as doRemoteGrading, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, pt as readProviderPromptMap, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, ut as getDefaultProviders, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-eIHhRqoC.js";
18
- import { i as generateIdFromPrompt } from "./utils-BLJKfv0y.js";
19
- import { n as sha256, t as randomSequence } from "./createHash-ChI45QR1.js";
20
- import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CXrvO-wA.js";
21
- import { n as getBlobUrl, t as getBlobByHash } from "./blobs-BQWqnnvL.js";
22
- import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-C4CH3zRr.js";
23
- import { n as isBlobStorageEnabled } from "./extractor-DNSeBVOJ.js";
24
- import { n as escapeRegExp, t as ellipsize } from "./text-KvuD2Iko.js";
25
- import { n as getTraceStore } from "./store-CYEy5J2D.js";
26
- import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-IvuDYSvQ.js";
27
- import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CE5G3a7x.js";
28
- import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-u4UVafl6.js";
29
- import { t as EvalResult } from "./evalResult-D3hVYFis.js";
30
- import { t as formatDuration } from "./formatDuration-DZzPsexs.js";
31
- import { n as shouldUseInkUI } from "./interactiveCheck-CLERUB0c.js";
2
+ import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
3
+ import { A as getDefaultPort, F as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, T as CLOUD_PROVIDER_PREFIX, i as fetchWithTimeout, j as getDefaultShareViewBaseUrl, k as TERMINAL_MAX_WIDTH, l as sleep, m as REQUEST_TIMEOUT_MS, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, v as isPromptfooSampleTarget } from "./fetch-B6ch2nU2.js";
4
+ import { n as VERSION } from "./version-CbpiUINz.js";
5
+ import { t as invariant } from "./invariant-B2Rf6avk.js";
6
+ import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-CLJHCDDb.js";
7
+ import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-Da0bofJd.js";
8
+ import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-DWdGHvEf.js";
9
+ import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BVH9hjgW.js";
10
+ import { r as importModule, t as getDirectory } from "./esm-BX8fwlAO.js";
11
+ import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-eui5p5mL.js";
12
+ import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-ByiF3qlg.js";
13
+ import { A as validateStrategies, B as extractVariablesFromJson, H as getShortPluginId, L as extractGoalFromPrompt, M as mediaExists, N as retrieveMedia, O as Strategies, P as pluginMatchesStrategyTargets, Q as TokenUsageTracker, Z as redteamProviderManager, _t as getUserTeams, a as resolveProviderConfigs, at as getMaxCharsPerMessageModifierValue, bt as resolveTeamFromIdentifier, ct as canCreateTargets, d as MCPProvider, dt as getConfigFromCloud, f as HttpProvider, ft as getEvalConfigFromCloud, gt as getProviderFromCloud, i as resolveProvider, it as getGeneratedPromptOverLimit, j as getMediaStorage, k as loadStrategy, lt as checkCloudPermissions, m as createTransformResponse, mt as getPluginSeverityOverridesFromCloud, n as loadApiProvider, nt as PromptfooHarmfulCompletionProvider, p as createTransformRequest, pt as getOrgContext, q as renderPrompt, r as loadApiProviders, rt as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, t as getProviderIds, ut as getCloudDatabaseId, vt as isCloudProvider, xt as resolveTeamId, yt as makeRequest$1, z as extractPromptFromTags } from "./providers-DT-GtF2t.js";
14
+ import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-BI5BY7ey.js";
15
+ import { i as isJavascriptFile } from "./fileExtensions-DysCsxNG.js";
16
+ import { r as runPython } from "./pythonUtils-DNqbnRdx.js";
17
+ import { A as maybeLoadFromExternalFile, F as readFilters, I as readOutput, T as normalizeProviderRef, _ as doesProviderRefMatch, a as ComparisonEvalNotFoundError, c as mergeComparisonTables, d as deduplicateTestCases, f as extractRuntimeVars, g as checkProviderApiKeys, h as resultIsForTestCase, i as writeOutput, k as maybeLoadConfigFromExternalFile, l as fetchCsvFromGoogleSheet, m as getTestCaseDeduplicationKey, n as createOutputMetadata, o as evalTableToJson, p as filterRuntimeVars, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as setupEnv, v as getProviderDescription } from "./util-BQOCAHQC.js";
18
+ import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-2wIvAhB3.js";
19
+ import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DjLby-uP.js";
20
+ import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-NlvH23ky.js";
21
+ import { n as isBlobStorageEnabled } from "./extractor-DxyiFhPk.js";
22
+ import { n as escapeRegExp, t as ellipsize } from "./text-D4lz-Jg_.js";
23
+ import { n as getTraceStore } from "./store-CPh25336.js";
24
+ import { n as sha256, t as randomSequence } from "./createHash-DPpsZgFF.js";
25
+ import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, J as doRemoteGrading, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, R as getCustomPolicies, S as PlinyPlugin, St as getDefaultProviders, T as IntentPlugin, V as fetchHuggingFaceDataset, X as readPrompts, Y as processPrompts, Z as readProviderPromptMap, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-CgPn32yp.js";
26
+ import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-D-UIbbYq.js";
27
+ import { i as generateIdFromPrompt } from "./utils-kt7lv30R.js";
28
+ import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-DTtUuU3l.js";
29
+ import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-DJ_4A-tr.js";
30
+ import { t as EvalResult } from "./evalResult-D6P5I5il.js";
31
+ import { t as formatDuration } from "./formatDuration-Ch4A7G3o.js";
32
+ import { n as shouldUseInkUI } from "./interactiveCheck-CCICw2cy.js";
32
33
  import { fileURLToPath } from "node:url";
33
34
  import { Command, InvalidArgumentError } from "commander";
34
35
  import { z } from "zod";
@@ -100,7 +101,7 @@ function runCommand(program) {
100
101
  hasGithubPr: !!cmdObj.githubPr,
101
102
  hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
102
103
  });
103
- const { executeScan } = await import("./scanner-1DqWi1Ej.js");
104
+ const { executeScan } = await import("./scanner-nOCWNIXa.js");
104
105
  await executeScan(repoPath, cmdObj);
105
106
  });
106
107
  }
@@ -572,47 +573,11 @@ function filterPrompts(prompts, filterPromptsOption) {
572
573
  //#endregion
573
574
  //#region src/commands/eval/filterProviders.ts
574
575
  /**
575
- * Checks if a value is a valid provider ID (non-empty string).
576
- */
577
- function isValidProviderId(id) {
578
- return id !== null && id !== void 0 && typeof id === "string" && id !== "";
579
- }
580
- /**
581
576
  * Extracts the id and label from a raw provider config without instantiating it.
582
577
  * Handles all provider config formats: string, function, ProviderOptions, ProviderOptionsMap.
583
578
  */
584
579
  function getProviderIdAndLabel(provider, index) {
585
- if (typeof provider === "string") return { id: provider };
586
- if (typeof provider === "function") {
587
- const label = provider.label;
588
- return {
589
- id: label ?? `custom-function-${index}`,
590
- label
591
- };
592
- }
593
- const providerId = provider.id;
594
- if ("id" in provider && isValidProviderId(providerId)) return {
595
- id: providerId,
596
- label: provider.label
597
- };
598
- const keys = Object.keys(provider);
599
- if (keys.length > 0) {
600
- const id = keys[0];
601
- const value = provider[id];
602
- if (typeof value === "object" && value !== null) return {
603
- id: value.id || id,
604
- label: value.label
605
- };
606
- }
607
- const label = provider.label;
608
- if (isValidProviderId(label)) return {
609
- id: label,
610
- label
611
- };
612
- return {
613
- id: `unknown-${index}`,
614
- label
615
- };
580
+ return normalizeProviderRef(provider, { index });
616
581
  }
617
582
  /**
618
583
  * Filters raw provider configs BEFORE instantiation.
@@ -3208,7 +3173,7 @@ function generateTable(evaluateTable, tableCellMaxLength = 250, maxRows = 25) {
3208
3173
  for (const row of evaluateTable.body.slice(0, maxRows)) table.push([...row.vars.map((v) => ellipsize(v, tableCellMaxLength)), ...row.outputs.map(({ pass, text, failureReason: failureType }) => {
3209
3174
  text = ellipsize(text, tableCellMaxLength);
3210
3175
  if (pass) return chalk.green("[PASS] ") + text;
3211
- else return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
3176
+ return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
3212
3177
  })]);
3213
3178
  return table.toString();
3214
3179
  }
@@ -3293,6 +3258,115 @@ function shouldShareResults(opts) {
3293
3258
  }
3294
3259
  //#endregion
3295
3260
  //#region src/commands/eval/summary.ts
3261
+ function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
3262
+ if (wasAborted) {
3263
+ const idSuffix = writeToDatabase ? ` (ID: ${chalk.cyan(evalId)})` : "";
3264
+ return `${chalk.red("✗")} ${completionType} aborted${idSuffix}`;
3265
+ }
3266
+ if (writeToDatabase && shareableUrl) return `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
3267
+ if (writeToDatabase && activelySharing) return `${chalk.green("✓")} ${completionType} complete`;
3268
+ if (writeToDatabase) return `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
3269
+ return `${chalk.green("✓")} ${completionType} complete`;
3270
+ }
3271
+ function getAbortSummaryLines(targetErrorStatus) {
3272
+ if (targetErrorStatus == null) return [];
3273
+ return [
3274
+ "",
3275
+ chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."),
3276
+ chalk.red(` Target returned HTTP ${targetErrorStatus}`),
3277
+ "",
3278
+ chalk.yellow("Possible causes:"),
3279
+ chalk.yellow(" • Invalid API key or authentication (401/403)"),
3280
+ chalk.yellow(" • Target endpoint does not exist (404)"),
3281
+ chalk.yellow(" • Server does not support the request (501)"),
3282
+ "",
3283
+ chalk.cyan("To fix: Check your target configuration and credentials.")
3284
+ ];
3285
+ }
3286
+ function getGuidanceLines({ writeToDatabase, shareableUrl, wantsToShare, activelySharing, hasExplicitDisable, cloudEnabled }) {
3287
+ if (!writeToDatabase || shareableUrl || wantsToShare || activelySharing) return [];
3288
+ const lines = ["", `» View results: ${chalk.green.bold("promptfoo view")}`];
3289
+ if (!hasExplicitDisable) lines.push(cloudEnabled ? `» Create shareable URL: ${chalk.green.bold("promptfoo share")}` : `» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
3290
+ lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
3291
+ return lines;
3292
+ }
3293
+ function buildUsageDetails(usage, total) {
3294
+ const parts = [];
3295
+ if (usage.prompt && usage.prompt > 0) parts.push(`${usage.prompt.toLocaleString()} prompt`);
3296
+ if (usage.completion && usage.completion > 0) parts.push(`${usage.completion.toLocaleString()} completion`);
3297
+ if (usage.cached && usage.cached > 0) parts.push(usage.cached === total && parts.length === 0 ? "cached" : `${usage.cached.toLocaleString()} cached`);
3298
+ if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) parts.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
3299
+ return parts;
3300
+ }
3301
+ function getTokenUsageLines(tokenUsage, isRedteam, tracker) {
3302
+ const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
3303
+ const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
3304
+ if (!hasEvalTokens && !hasGradingTokens) return [];
3305
+ const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
3306
+ const evalTokens = {
3307
+ prompt: tokenUsage.prompt || 0,
3308
+ completion: tokenUsage.completion || 0,
3309
+ total: tokenUsage.total || combinedTotal,
3310
+ cached: tokenUsage.cached || 0,
3311
+ numRequests: tokenUsage.numRequests || 0,
3312
+ completionDetails: tokenUsage.completionDetails || {
3313
+ reasoning: 0,
3314
+ acceptedPrediction: 0,
3315
+ rejectedPrediction: 0
3316
+ }
3317
+ };
3318
+ const lines = [`${chalk.bold("Total Tokens:")} ${chalk.white.bold((evalTokens.total + (tokenUsage.assertions?.total || 0)).toLocaleString())}`];
3319
+ if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
3320
+ if (evalTokens.total > 0) {
3321
+ const evalParts = buildUsageDetails(evalTokens, evalTokens.total);
3322
+ lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
3323
+ }
3324
+ if (tokenUsage.assertions?.total && tokenUsage.assertions.total > 0) {
3325
+ const gradingParts = buildUsageDetails(tokenUsage.assertions, tokenUsage.assertions.total);
3326
+ lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
3327
+ }
3328
+ lines.push(...getProviderUsageLines(tracker));
3329
+ return lines;
3330
+ }
3331
+ function getProviderUsageLines(tracker) {
3332
+ const providerIds = tracker.getProviderIds();
3333
+ if (providerIds.length <= 1) return [];
3334
+ const sortedProviders = providerIds.map((id) => ({
3335
+ id,
3336
+ usage: tracker.getProviderUsage(id)
3337
+ })).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
3338
+ const lines = ["", chalk.bold("Providers:")];
3339
+ for (const { id, usage } of sortedProviders) {
3340
+ if ((usage.total || 0) === 0 && (usage.prompt || 0) + (usage.completion || 0) === 0) continue;
3341
+ const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
3342
+ const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
3343
+ const details = buildUsageDetails(usage, displayTotal);
3344
+ const requestInfo = `${usage.numRequests || 0} requests`;
3345
+ const separator = details.length > 0 ? "; " : "";
3346
+ lines.push(` ${chalk.gray(`${displayId}:`)} ${chalk.white(displayTotal.toLocaleString())} (${requestInfo}${separator}${details.join(", ")})`);
3347
+ }
3348
+ return lines;
3349
+ }
3350
+ function formatResultPercentage(count, totalTests) {
3351
+ const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
3352
+ return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
3353
+ }
3354
+ function formatResultLine(count, label, icon, iconColor, totalTests) {
3355
+ return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count, totalTests)})`)}`;
3356
+ }
3357
+ function getResultsLines({ successes, failures, errors, duration, maxConcurrency }) {
3358
+ const totalTests = successes + failures + errors;
3359
+ const errorLabel = errors === 1 ? "error" : "errors";
3360
+ return [
3361
+ "",
3362
+ chalk.bold("Results:"),
3363
+ formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green, totalTests),
3364
+ formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red, totalTests),
3365
+ formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red, totalTests),
3366
+ chalk.gray(`Duration: ${formatDuration(duration)} (concurrency: ${maxConcurrency})`),
3367
+ ""
3368
+ ];
3369
+ }
3296
3370
  /**
3297
3371
  * Generate formatted evaluation summary output for CLI display.
3298
3372
  *
@@ -3331,115 +3405,28 @@ function shouldShareResults(opts) {
3331
3405
  * ```
3332
3406
  */
3333
3407
  function generateEvalSummary(params) {
3334
- const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker, targetErrorStatus } = params;
3335
- const lines = [];
3336
- const completionType = isRedteam ? "Red team" : "Eval";
3337
- const wasAborted = targetErrorStatus != null;
3338
- let completionMessage;
3339
- if (wasAborted) {
3340
- completionMessage = `${chalk.red("✗")} ${completionType} aborted`;
3341
- if (writeToDatabase) completionMessage += ` (ID: ${chalk.cyan(evalId)})`;
3342
- } else if (writeToDatabase && shareableUrl) completionMessage = `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
3343
- else if (writeToDatabase && activelySharing) completionMessage = `${chalk.green("✓")} ${completionType} complete`;
3344
- else if (writeToDatabase) completionMessage = `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
3345
- else completionMessage = `${chalk.green("✓")} ${completionType} complete`;
3346
- lines.push(completionMessage);
3347
- if (wasAborted && targetErrorStatus != null) {
3348
- lines.push("");
3349
- lines.push(chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."));
3350
- lines.push(chalk.red(` Target returned HTTP ${targetErrorStatus}`));
3351
- lines.push("");
3352
- lines.push(chalk.yellow("Possible causes:"));
3353
- lines.push(chalk.yellow(" • Invalid API key or authentication (401/403)"));
3354
- lines.push(chalk.yellow(" • Target endpoint does not exist (404)"));
3355
- lines.push(chalk.yellow(" • Server does not support the request (501)"));
3356
- lines.push("");
3357
- lines.push(chalk.cyan("To fix: Check your target configuration and credentials."));
3358
- }
3359
- if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
3360
- lines.push("");
3361
- lines.push(`» View results: ${chalk.green.bold("promptfoo view")}`);
3362
- if (!hasExplicitDisable) if (cloudEnabled) lines.push(`» Create shareable URL: ${chalk.green.bold("promptfoo share")}`);
3363
- else lines.push(`» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
3364
- lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
3365
- }
3366
- lines.push("");
3367
- const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
3368
- const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
3369
- if (hasEvalTokens || hasGradingTokens) {
3370
- const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
3371
- const evalTokens = {
3372
- prompt: tokenUsage.prompt || 0,
3373
- completion: tokenUsage.completion || 0,
3374
- total: tokenUsage.total || combinedTotal,
3375
- cached: tokenUsage.cached || 0,
3376
- completionDetails: tokenUsage.completionDetails || {
3377
- reasoning: 0,
3378
- acceptedPrediction: 0,
3379
- rejectedPrediction: 0
3380
- }
3381
- };
3382
- const grandTotal = evalTokens.total + (tokenUsage.assertions?.total || 0);
3383
- lines.push(`${chalk.bold("Total Tokens:")} ${chalk.white.bold(grandTotal.toLocaleString())}`);
3384
- if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
3385
- if (evalTokens.total > 0) {
3386
- const evalParts = [];
3387
- if (evalTokens.prompt > 0) evalParts.push(`${evalTokens.prompt.toLocaleString()} prompt`);
3388
- if (evalTokens.completion > 0) evalParts.push(`${evalTokens.completion.toLocaleString()} completion`);
3389
- if (evalTokens.cached > 0) if (evalTokens.cached === evalTokens.total && evalParts.length === 0) evalParts.push("cached");
3390
- else evalParts.push(`${evalTokens.cached.toLocaleString()} cached`);
3391
- if (evalTokens.completionDetails?.reasoning && evalTokens.completionDetails.reasoning > 0) evalParts.push(`${evalTokens.completionDetails.reasoning.toLocaleString()} reasoning`);
3392
- lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
3393
- }
3394
- if (tokenUsage.assertions && tokenUsage.assertions.total && tokenUsage.assertions.total > 0) {
3395
- const gradingParts = [];
3396
- if (tokenUsage.assertions.prompt && tokenUsage.assertions.prompt > 0) gradingParts.push(`${tokenUsage.assertions.prompt.toLocaleString()} prompt`);
3397
- if (tokenUsage.assertions.completion && tokenUsage.assertions.completion > 0) gradingParts.push(`${tokenUsage.assertions.completion.toLocaleString()} completion`);
3398
- if (tokenUsage.assertions.cached && tokenUsage.assertions.cached > 0) if (tokenUsage.assertions.cached === tokenUsage.assertions.total && gradingParts.length === 0) gradingParts.push("cached");
3399
- else gradingParts.push(`${tokenUsage.assertions.cached.toLocaleString()} cached`);
3400
- if (tokenUsage.assertions.completionDetails?.reasoning && tokenUsage.assertions.completionDetails.reasoning > 0) gradingParts.push(`${tokenUsage.assertions.completionDetails.reasoning.toLocaleString()} reasoning`);
3401
- lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
3402
- }
3403
- const providerIds = tracker.getProviderIds();
3404
- if (providerIds.length > 1) {
3405
- lines.push("");
3406
- lines.push(chalk.bold("Providers:"));
3407
- const sortedProviders = providerIds.map((id) => ({
3408
- id,
3409
- usage: tracker.getProviderUsage(id)
3410
- })).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
3411
- for (const { id, usage } of sortedProviders) if ((usage.total || 0) > 0 || (usage.prompt || 0) + (usage.completion || 0) > 0) {
3412
- const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
3413
- const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
3414
- const details = [];
3415
- if (usage.prompt && usage.prompt > 0) details.push(`${usage.prompt.toLocaleString()} prompt`);
3416
- if (usage.completion && usage.completion > 0) details.push(`${usage.completion.toLocaleString()} completion`);
3417
- if (usage.cached && usage.cached > 0) if (usage.cached === displayTotal && details.length === 0) details.push("cached");
3418
- else details.push(`${usage.cached.toLocaleString()} cached`);
3419
- if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) details.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
3420
- const breakdown = ` (${`${usage.numRequests || 0} requests`}${details.length > 0 ? "; " : ""}${details.join(", ")})`;
3421
- lines.push(` ${chalk.gray(displayId + ":")} ${chalk.white(displayTotal.toLocaleString())}${breakdown}`);
3422
- }
3423
- }
3424
- }
3425
- lines.push("");
3426
- const totalTests = successes + failures + errors;
3427
- const formatResultPercentage = (count) => {
3428
- const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
3429
- return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
3430
- };
3431
- const formatResultLine = (count, label, icon, iconColor) => {
3432
- return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count)})`)}`;
3433
- };
3434
- const errorLabel = errors === 1 ? "error" : "errors";
3435
- lines.push(chalk.bold("Results:"));
3436
- lines.push(formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green));
3437
- lines.push(formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red));
3438
- lines.push(formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red));
3439
- const durationDisplay = formatDuration(duration);
3440
- lines.push(chalk.gray(`Duration: ${durationDisplay} (concurrency: ${maxConcurrency})`));
3441
- lines.push("");
3442
- return lines;
3408
+ return [
3409
+ getCompletionMessage({
3410
+ completionType: params.isRedteam ? "Red team" : "Eval",
3411
+ evalId: params.evalId,
3412
+ shareableUrl: params.shareableUrl,
3413
+ wasAborted: params.targetErrorStatus != null,
3414
+ writeToDatabase: params.writeToDatabase,
3415
+ activelySharing: params.activelySharing ?? false
3416
+ }),
3417
+ ...getAbortSummaryLines(params.targetErrorStatus),
3418
+ ...getGuidanceLines({
3419
+ writeToDatabase: params.writeToDatabase,
3420
+ shareableUrl: params.shareableUrl,
3421
+ wantsToShare: params.wantsToShare,
3422
+ activelySharing: params.activelySharing ?? false,
3423
+ hasExplicitDisable: params.hasExplicitDisable,
3424
+ cloudEnabled: params.cloudEnabled
3425
+ }),
3426
+ "",
3427
+ ...getTokenUsageLines(params.tokenUsage, params.isRedteam, params.tracker),
3428
+ ...getResultsLines(params)
3429
+ ];
3443
3430
  }
3444
3431
  //#endregion
3445
3432
  //#region src/commands/retry.ts
@@ -6517,9 +6504,10 @@ function dedupeTestCases(testCases) {
6517
6504
  return deduped;
6518
6505
  }
6519
6506
  function buildMaxCharsRetryInstructions(rejectedPromptLengths, limit) {
6507
+ const longestRejectedPromptText = rejectedPromptLengths.length > 0 ? `${Math.max(...rejectedPromptLengths)} characters` : "unknown length";
6520
6508
  return dedent`
6521
6509
  Your previous response included ${rejectedPromptLengths.length} generated prompt${rejectedPromptLengths.length === 1 ? "" : "s"} that exceeded the ${limit ?? "configured"}-character limit.
6522
- The longest rejected prompt was ${Math.max(...rejectedPromptLengths)} characters.
6510
+ The longest rejected prompt was ${longestRejectedPromptText}.
6523
6511
  Generate replacement prompts only, and keep every user message within the character limit.
6524
6512
  `.trim();
6525
6513
  }
@@ -8359,6 +8347,26 @@ async function doRedteamRun(options) {
8359
8347
  }
8360
8348
  //#endregion
8361
8349
  //#region src/index.ts
8350
+ /**
8351
+ * Shallow-clone a test case so the caller can swap in resolved ApiProvider
8352
+ * instances on `options.provider` / `assert[].provider` without leaking those
8353
+ * mutations back to the input. The input may alias the unified config written
8354
+ * to the Eval record, and a live SDK client (e.g. Bedrock's BedrockRuntime,
8355
+ * Anthropic's client) holds circular references that break drizzle's JSON
8356
+ * serialization on `evalRecord.save()`. Fixes #8687.
8357
+ *
8358
+ * Detaches only `options` and `assert[]`. Other reference fields (`provider`,
8359
+ * `vars`, `metadata`, `providerOutput`) remain aliased — callers must reassign
8360
+ * those by reference rather than mutating in place. `assert-set` children are
8361
+ * not deep-cloned because the resolve loop skips `assert-set`; if that ever
8362
+ * changes, extend this helper.
8363
+ */
8364
+ function cloneTestForResolve(test) {
8365
+ const cloned = { ...test };
8366
+ if (test.options) cloned.options = { ...test.options };
8367
+ if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
8368
+ return cloned;
8369
+ }
8362
8370
  async function evaluate(testSuite, options = {}) {
8363
8371
  if (testSuite.writeLatestResults) await runDbMigrations();
8364
8372
  const loadedProviders = await loadApiProviders(testSuite.providers, { env: testSuite.env });
@@ -8378,22 +8386,24 @@ async function evaluate(testSuite, options = {}) {
8378
8386
  nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
8379
8387
  prompts: await processPrompts(testSuite.prompts)
8380
8388
  };
8381
- if (typeof constructedTestSuite.defaultTest === "object") {
8382
- if (constructedTestSuite.defaultTest?.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
8389
+ if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
8390
+ constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
8391
+ if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
8383
8392
  env: testSuite.env,
8384
8393
  basePath: state.basePath
8385
8394
  });
8386
- if (constructedTestSuite.defaultTest?.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
8395
+ if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
8387
8396
  env: testSuite.env,
8388
8397
  basePath: state.basePath
8389
8398
  });
8390
8399
  }
8391
- for (const test of constructedTestSuite.tests || []) {
8400
+ constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
8401
+ for (const test of constructedTestSuite.tests) {
8392
8402
  if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
8393
8403
  env: testSuite.env,
8394
8404
  basePath: state.basePath
8395
8405
  });
8396
- if (test.assert) for (const assertion of test.assert) {
8406
+ for (const assertion of test.assert || []) {
8397
8407
  if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
8398
8408
  if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
8399
8409
  env: testSuite.env,
@@ -9353,7 +9363,7 @@ async function checkForUpdates() {
9353
9363
  } catch {
9354
9364
  return false;
9355
9365
  }
9356
- if (semverGt(latestVersion, "0.121.4")) {
9366
+ if (semverGt(latestVersion, "0.121.5")) {
9357
9367
  const border = "=".repeat(TERMINAL_MAX_WIDTH);
9358
9368
  logger.info(`\n${border}
9359
9369
  ${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
@@ -10701,6 +10711,38 @@ function normalizeTargetPurposeDiscoveryResult(result) {
10701
10711
  tools: cleanTools(result.tools)
10702
10712
  };
10703
10713
  }
10714
+ function extractStringField(value) {
10715
+ if (typeof value !== "string") return;
10716
+ return value.trim() || void 0;
10717
+ }
10718
+ async function getRemoteResponseErrorDetail(response) {
10719
+ const rawText = (await response.text()).trim();
10720
+ const fallback = rawText || response.statusText || "Unknown error";
10721
+ if (!rawText) return fallback;
10722
+ try {
10723
+ const parsed = JSON.parse(rawText);
10724
+ return extractStringField(parsed?.message) ?? extractStringField(parsed?.error) ?? fallback;
10725
+ } catch {
10726
+ return fallback;
10727
+ }
10728
+ }
10729
+ const REMOTE_ERROR_HINTS = {
10730
+ 400: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
10731
+ 401: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
10732
+ 403: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
10733
+ 404: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
10734
+ 429: "You are being rate limited. Wait a moment and try again."
10735
+ };
10736
+ function getRemoteErrorHint(status) {
10737
+ if (REMOTE_ERROR_HINTS[status]) return REMOTE_ERROR_HINTS[status];
10738
+ if (status >= 500) return "The remote generation service may be temporarily unavailable. Retry in a few minutes or contact support if the issue persists.";
10739
+ }
10740
+ async function buildRemoteErrorFromResponse(response) {
10741
+ const detail = await getRemoteResponseErrorDetail(response);
10742
+ const hint = getRemoteErrorHint(response.status);
10743
+ const base = `Remote server returned HTTP ${response.status}: ${detail}`;
10744
+ return new Error(hint ? `${base}\n${hint}` : base);
10745
+ }
10704
10746
  /**
10705
10747
  * Queries Cloud for the purpose-discovery logic, sends each logic to the target,
10706
10748
  * and summarizes the results.
@@ -10750,11 +10792,7 @@ async function doTargetPurposeDiscovery(target, prompt, showProgress = true) {
10750
10792
  email: getUserEmail()
10751
10793
  }))
10752
10794
  });
10753
- if (!response.ok) {
10754
- const error = await response.text();
10755
- logger.error(`${LOG_PREFIX} Error getting the next question from remote server: ${error}`);
10756
- continue;
10757
- }
10795
+ if (!response.ok) throw await buildRemoteErrorFromResponse(response);
10758
10796
  const responseData = await response.json();
10759
10797
  const data = TargetPurposeDiscoveryTaskResponseSchema.parse(responseData);
10760
10798
  logger.debug(`${LOG_PREFIX} Received response from remote server: ${JSON.stringify(data, null, 2)}`);
@@ -12743,7 +12781,7 @@ router.get("/", async (_req, res) => {
12743
12781
  };
12744
12782
  } catch (error) {
12745
12783
  logger.debug(`Failed to fetch latest version: ${error}`);
12746
- latestVersion = versionCache.latestVersion ?? "0.121.4";
12784
+ latestVersion = versionCache.latestVersion ?? "0.121.5";
12747
12785
  }
12748
12786
  }
12749
12787
  const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
@@ -12752,7 +12790,7 @@ router.get("/", async (_req, res) => {
12752
12790
  selfHosted,
12753
12791
  isNpx
12754
12792
  });
12755
- const resolvedLatestVersion = latestVersion ?? "0.121.4";
12793
+ const resolvedLatestVersion = latestVersion ?? "0.121.5";
12756
12794
  const response = {
12757
12795
  currentVersion: VERSION,
12758
12796
  latestVersion: resolvedLatestVersion,
@@ -15174,7 +15212,11 @@ const EXAMPLE_ALIASES = {
15174
15212
  "google-adk-example": "integration-google-adk",
15175
15213
  "google-sheets": "integration-google-sheets",
15176
15214
  "gpt-4o-temperature-comparison": "compare-gpt-temperature",
15177
- "gpt-4o-vs-4o-mini": "compare-gpt-4o-vs-4o-mini",
15215
+ "compare-gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
15216
+ "compare-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
15217
+ "compare-gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
15218
+ "gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
15219
+ "gpt-model-tiers": "compare-gpt-model-tiers",
15178
15220
  "gpt-vs-claude-vs-gemini": "compare-gpt-vs-claude-vs-gemini",
15179
15221
  "grok-4-political-bias": "redteam-grok-4-political-bias",
15180
15222
  groq: "provider-groq",
@@ -15225,7 +15267,9 @@ const EXAMPLE_ALIASES = {
15225
15267
  "node-package-typescript": "config-node-package-typescript",
15226
15268
  nscale: "provider-nscale",
15227
15269
  "nunjucks-custom-filters": "config-nunjucks-custom-filters",
15228
- "openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-5-vs-gpt-5-mini-mmlu",
15270
+ "gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
15271
+ "gpt-model-tiers-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
15272
+ "openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
15229
15273
  "openai-model-comparison": "compare-openai-models",
15230
15274
  openclaw: "provider-openclaw",
15231
15275
  "opencode-sdk": "provider-opencode-sdk/basic",
@@ -15597,8 +15641,8 @@ function shouldUseInkList() {
15597
15641
  async function runInkList(options) {
15598
15642
  const [React, { renderInteractive }, { ListApp }] = await Promise.all([
15599
15643
  import("react"),
15600
- import("./render-CgVDrJmM.js"),
15601
- import("./ListApp-DQkFNqE9.js")
15644
+ import("./render-nj-UaPdn.js"),
15645
+ import("./ListApp-BRUsT43Y.js")
15602
15646
  ]);
15603
15647
  let result = { cancelled: false };
15604
15648
  let resolveResult;
@@ -17906,8 +17950,8 @@ function registerRunEvaluationTool(server) {
17906
17950
  }
17907
17951
  filteredTestSuite.tests = filteredTests;
17908
17952
  }
17909
- const { evaluate } = await import("./evaluator-CVessDWe.js");
17910
- const evalRecord = await (await import("./eval-CzJFfFO9.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
17953
+ const { evaluate } = await import("./evaluator-DgLKaZk8.js");
17954
+ const evalRecord = await (await import("./eval-BQPLBJbw.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
17911
17955
  logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
17912
17956
  const startTime = Date.now();
17913
17957
  const result = await evaluate(filteredTestSuite, evalRecord, {