promptfoo 0.121.4 → 0.121.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (497) hide show
  1. package/dist/src/{ListApp-DQkFNqE9.js → ListApp-DLmM02JS.js} +1 -1
  2. package/dist/src/{accounts-DhMYUUbu.js → accounts-Ca7WIoPY.js} +12 -7
  3. package/dist/src/{accounts-F9d_5sMC.js → accounts-CjFnOPmb.js} +14 -9
  4. package/dist/src/{accounts-Dy17bs4D.cjs → accounts-CmWzeD2d.cjs} +16 -10
  5. package/dist/src/{accounts-DdJ2pHMI.js → accounts-DanM1wq_.js} +13 -8
  6. package/dist/src/{agentic-utils-qFlm6zes.js → agentic-utils-CJ0j3fBi.js} +3 -3
  7. package/dist/src/{agentic-utils-w68v6_Dz.js → agentic-utils-DDEGRV9v.js} +3 -3
  8. package/dist/src/{agentic-utils-BpX5b23w.cjs → agentic-utils-DvPWSUpb.cjs} +8 -7
  9. package/dist/src/{agentic-utils-P172hM8B.js → agentic-utils-TxUEMPYS.js} +2 -2
  10. package/dist/src/{agents-BahDpe5G.cjs → agents-B4sRuXg3.cjs} +7 -6
  11. package/dist/src/{agents-pQeBEXMm.js → agents-B8q7h_ek.js} +5 -5
  12. package/dist/src/{agents-CgaMXvLM.js → agents-CBgJvRkB.js} +21 -10
  13. package/dist/src/{agents-C-R_jfzI.js → agents-CYn2n3QP.js} +4 -4
  14. package/dist/src/{agents-8FDnTriG.js → agents-D-vDNFx4.js} +21 -10
  15. package/dist/src/{agents-aYPQLf8W.js → agents-LrHuQqr1.js} +20 -9
  16. package/dist/src/{agents-DJ35I3Nt.js → agents-QGg76OF-.js} +5 -5
  17. package/dist/src/{agents-D7-HGxUj.cjs → agents-eHZ9nlgA.cjs} +21 -10
  18. package/dist/src/{aimlapi-sgYnkE54.js → aimlapi-CJEbQ0o6.js} +7 -7
  19. package/dist/src/{aimlapi-BD6J9oKt.js → aimlapi-D5HXzZ0s.js} +6 -6
  20. package/dist/src/{aimlapi-qcK4OT55.cjs → aimlapi-T6HGNxNe.cjs} +7 -7
  21. package/dist/src/{aimlapi-BCq3MHeL.js → aimlapi-eYv3a_DK.js} +7 -7
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-BNHJKN35.js +1 -0
  24. package/dist/src/app/assets/index-BnT6P6sF.js +388 -0
  25. package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
  26. package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
  27. package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
  28. package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
  29. package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
  30. package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
  31. package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
  32. package/dist/src/app/index.html +31 -6
  33. package/dist/src/{audio-COrn8rM6.js → audio-BqnRvcWG.js} +3 -3
  34. package/dist/src/{audio-DcVKoInv.js → audio-CPMtV1yR.js} +4 -4
  35. package/dist/src/{audio-B7izf48x.js → audio-DyiebVB3.js} +4 -4
  36. package/dist/src/{audio-BQtNuYBj.cjs → audio-FnxbEnSE.cjs} +4 -4
  37. package/dist/src/authoritativeMarkupInjection-BZIywVjG.js +74 -0
  38. package/dist/src/authoritativeMarkupInjection-DyAXAsSr.js +75 -0
  39. package/dist/src/authoritativeMarkupInjection-F2gBw0lN.cjs +74 -0
  40. package/dist/src/authoritativeMarkupInjection-QEQmFS83.js +74 -0
  41. package/dist/src/{base-PYJvBE1i.js → base-CKLo890h.js} +4 -3
  42. package/dist/src/{base-fZ9wgg50.js → base-Co80MMCi.js} +5 -4
  43. package/dist/src/{base-D-670DX8.cjs → base-DGJW48uz.cjs} +5 -4
  44. package/dist/src/{base-yrI1Yal4.js → base-E9I8zXjz.js} +5 -4
  45. package/dist/src/bestOfN-B3wNzjSB.js +137 -0
  46. package/dist/src/bestOfN-BBsO41z4.js +136 -0
  47. package/dist/src/bestOfN-CAwmg5UL.cjs +140 -0
  48. package/dist/src/bestOfN-_kTi8Bxe.js +136 -0
  49. package/dist/src/{blobs-D2FAd1Q5.cjs → blobs-B0977K1O.cjs} +7 -6
  50. package/dist/src/{blobs-BCZavS8s.js → blobs-CeFdPn_T.js} +3 -3
  51. package/dist/src/{blobs-BQWqnnvL.js → blobs-DODuTK-a.js} +3 -3
  52. package/dist/src/{blobs-C-F78Kfn.js → blobs-Dwef1Ao1.js} +2 -2
  53. package/dist/src/{cache-BIyPcp5v.cjs → cache-CPGUA4Yl.cjs} +135 -25
  54. package/dist/src/cache-Cf7b4pWE.js +3 -0
  55. package/dist/src/{cache-D5NZmMiT.js → cache-DIXbtkNO.js} +125 -10
  56. package/dist/src/{cache-mb7c8hbp.js → cache-DpPWrkTE.js} +128 -12
  57. package/dist/src/{cache-C4Xb-hNb.js → cache-roFAE0cI.js} +126 -11
  58. package/dist/src/{chat-I9izLm49.js → chat-CUCorGiL.js} +12 -12
  59. package/dist/src/{chat-BPXSW8Bv.cjs → chat-DG1wG4w0.cjs} +6 -6
  60. package/dist/src/{chat-BfPaS15_.js → chat-Dabu84Br.js} +12 -12
  61. package/dist/src/{chat-Dr3DUQ0D.js → chat-DqUFcWI0.js} +12 -12
  62. package/dist/src/{chat-CclRbxGf.cjs → chat-DxTDQ83C.cjs} +14 -13
  63. package/dist/src/{chat-MKxMnZJZ.js → chat-GmlolEwo.js} +4 -4
  64. package/dist/src/{chat-0bwXjVP0.js → chat-TP8Qifkh.js} +6 -6
  65. package/dist/src/{chat-mW0ORo8G.js → chat-iwaM5UTQ.js} +6 -6
  66. package/dist/src/{chatkit-zUIVoDos.js → chatkit-B6DWi70Q.js} +4 -4
  67. package/dist/src/{chatkit-BoWoSgXl.cjs → chatkit-BYveR48_.cjs} +6 -5
  68. package/dist/src/{chatkit-Cv6AhukM.js → chatkit-fARZwEfV.js} +3 -3
  69. package/dist/src/{chatkit-CJnHRRMM.js → chatkit-lb6FK02w.js} +4 -4
  70. package/dist/src/{claude-agent-sdk-Dtq_L-Sc.js → claude-agent-sdk-BQNp_y-F.js} +212 -67
  71. package/dist/src/{claude-agent-sdk-BQNuLaAK.js → claude-agent-sdk-D5Jl0SDh.js} +212 -67
  72. package/dist/src/{claude-agent-sdk-CPJo3dBQ.cjs → claude-agent-sdk-DH416NBD.cjs} +218 -72
  73. package/dist/src/{claude-agent-sdk-nfAIcxNf.js → claude-agent-sdk-x1XJ1-pU.js} +212 -67
  74. package/dist/src/{cloud-DQZ5sVjW.js → cloud-D3DiFqH6.js} +3 -3
  75. package/dist/src/cloud-p96PA4MH.js +3 -0
  76. package/dist/src/{cloudflare-ai-BIB567w6.js → cloudflare-ai-B6NVI3ax.js} +4 -4
  77. package/dist/src/{cloudflare-ai-Dl3N9OVD.cjs → cloudflare-ai-CEAW-xQa.cjs} +6 -6
  78. package/dist/src/{cloudflare-ai-DlKr0rY7.js → cloudflare-ai-RFSojyXG.js} +6 -6
  79. package/dist/src/{cloudflare-ai-DGLte7Py.js → cloudflare-ai-r4tbYmWU.js} +6 -6
  80. package/dist/src/{cloudflare-gateway-CiIZHU0Q.js → cloudflare-gateway-BCkLouto.js} +5 -5
  81. package/dist/src/{cloudflare-gateway-DI1HNP5F.js → cloudflare-gateway-BaZ4insB.js} +3 -3
  82. package/dist/src/{cloudflare-gateway-BDZrYydE.js → cloudflare-gateway-CF-Vb-2Z.js} +5 -5
  83. package/dist/src/{cloudflare-gateway-BYDp495F.cjs → cloudflare-gateway-TJMLBj6I.cjs} +5 -5
  84. package/dist/src/codex-app-server-B8KHEiF4.js +1915 -0
  85. package/dist/src/codex-app-server-CnrLBCeA.cjs +1921 -0
  86. package/dist/src/codex-app-server-DIXZ230V.js +1915 -0
  87. package/dist/src/codex-app-server-Dd22dC_N.js +1916 -0
  88. package/dist/src/{codex-sdk-CpqiOqDO.js → codex-sdk-B6Wah8Pa.js} +6 -6
  89. package/dist/src/codex-sdk-BGjVAk23.js +3 -0
  90. package/dist/src/{codex-sdk-C2_M2pl_.cjs → codex-sdk-CFF6gUyi.cjs} +18 -10
  91. package/dist/src/{codex-sdk-Rtky3M4I.js → codex-sdk-CmQABzV3.js} +6 -6
  92. package/dist/src/{codex-sdk-CErXn7qh.js → codex-sdk-D2d54RL8.js} +5 -5
  93. package/dist/src/{cometapi-CtJ-mS8R.js → cometapi-Bu9B8NUY.js} +8 -8
  94. package/dist/src/{cometapi-DT-jlVCB.js → cometapi-CtzNCHKu.js} +7 -7
  95. package/dist/src/{cometapi-UVOryo4W.cjs → cometapi-DHCDlQUI.cjs} +8 -8
  96. package/dist/src/{cometapi-BUlt_ELa.js → cometapi-OBILPLlu.js} +8 -8
  97. package/dist/src/{completion-HUe8wDhZ.js → completion-CO2e1_62.js} +6 -6
  98. package/dist/src/{completion-BozdoXba.cjs → completion-CSYfl2cd.cjs} +6 -6
  99. package/dist/src/{completion-x0a_c2y1.js → completion-DZNxcyfG.js} +6 -6
  100. package/dist/src/{completion-Dnxn7E-j.js → completion-sNvCLTAP.js} +5 -5
  101. package/dist/src/constants-BjJV0cRr.js +6 -0
  102. package/dist/src/constants-DH5XYLKZ.js +7 -0
  103. package/dist/src/constants-DZGEFLsu.js +6 -0
  104. package/dist/src/constants-a2kYssQk.cjs +11 -0
  105. package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
  106. package/dist/src/{createHash-CwDVU5xr.js → createHash-CGVzWdjj.js} +1 -1
  107. package/dist/src/{createHash-B7KvgoOD.cjs → createHash-CSiqnK5P.cjs} +2 -2
  108. package/dist/src/{createHash-ChI45QR1.js → createHash-CgRvs4Fn.js} +1 -1
  109. package/dist/src/crescendo-BXEJK_bi.cjs +704 -0
  110. package/dist/src/crescendo-CU_Y2i-m.js +702 -0
  111. package/dist/src/crescendo-J1Xx4_zb.js +703 -0
  112. package/dist/src/crescendo-QiaSLW0d.js +701 -0
  113. package/dist/src/custom-BJfP00Bh.js +619 -0
  114. package/dist/src/custom-CZVn-1-r.js +620 -0
  115. package/dist/src/custom-Cqia7M0D.cjs +621 -0
  116. package/dist/src/custom-notggYVl.js +618 -0
  117. package/dist/src/{docker-DCgsveLD.js → docker-4D1eL6Gq.js} +6 -6
  118. package/dist/src/{docker-ClnmCf1Z.js → docker-BBv1WUDu.js} +5 -5
  119. package/dist/src/{docker-DS4_Osau.cjs → docker-D06JUoe2.cjs} +6 -6
  120. package/dist/src/{docker-CQmlA2NU.js → docker-DdJQBxK9.js} +6 -6
  121. package/dist/src/{embedding-D3xTseo7.js → embedding--UZVe4_7.js} +6 -6
  122. package/dist/src/{embedding-I45KG3o7.cjs → embedding-BbrwopfX.cjs} +6 -6
  123. package/dist/src/{embedding-nFbumxcv.js → embedding-Bi3rxrZF.js} +5 -5
  124. package/dist/src/{embedding-DD9wa3ae.js → embedding-C251p1-8.js} +6 -6
  125. package/dist/src/{errors-Cw810C93.js → errors-9PcUL8BC.js} +1 -1
  126. package/dist/src/{esm-Dh4dOLlt.js → esm-B6whoAcf.js} +2 -2
  127. package/dist/src/{esm-CtEPLdAj.cjs → esm-BIKakvNa.cjs} +8 -7
  128. package/dist/src/{esm-C7PnfdF8.js → esm-BTK1W7lG.js} +1 -1
  129. package/dist/src/{esm-tVgYPY-f.js → esm-Bexx2PFc.js} +2 -2
  130. package/dist/src/{eval-u4UVafl6.js → eval-0VRANImH.js} +21 -21
  131. package/dist/src/{eval-CzJFfFO9.js → eval-DscR5iOM.js} +1 -1
  132. package/dist/src/{evalResult-Bgm9ZH31.js → evalResult-2RRJvFyB.js} +41 -16
  133. package/dist/src/{evalResult-KZqXl4XP.cjs → evalResult-CvtS8h8u.cjs} +51 -15
  134. package/dist/src/evalResult-DqzsS6_W.js +3 -0
  135. package/dist/src/{evalResult-D3hVYFis.js → evalResult-eUkJv9Ko.js} +40 -15
  136. package/dist/src/evaluator-DNdJF1Gv.js +3 -0
  137. package/dist/src/{evaluator-IvuDYSvQ.js → evaluator-DRoiYB2q.js} +1060 -187
  138. package/dist/src/evaluatorHelpers-BsYP_muT.js +511 -0
  139. package/dist/src/evaluatorHelpers-CRqTvSux.cjs +537 -0
  140. package/dist/src/evaluatorHelpers-DuqFFfq7.js +510 -0
  141. package/dist/src/{extractor-CAfTSraf.js → extractor-BR7XAzAL.js} +6 -6
  142. package/dist/src/{extractor-WVPOrH43.cjs → extractor-BdxEtt3J.cjs} +6 -6
  143. package/dist/src/{extractor-DNSeBVOJ.js → extractor-CIW3iN-b.js} +6 -6
  144. package/dist/src/{extractor-Dk6bRWkv.js → extractor-CxRtnaHl.js} +5 -5
  145. package/dist/src/{fetch-B0Z3Oe4k.js → fetch-BufrQtvR.js} +93 -40
  146. package/dist/src/{fetch-BEWnXrrG.js → fetch-DXUnXkVU.js} +89 -40
  147. package/dist/src/{fetch-CJU5ELPa.cjs → fetch-Dw4XZHjj.cjs} +330 -270
  148. package/dist/src/{fetch-Di00EQrc.js → fetch-It34O8Ur.js} +305 -252
  149. package/dist/src/fetch-_YgGd2qv.js +3 -0
  150. package/dist/src/{fileExtensions-bYh77CN8.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
  151. package/dist/src/{fileExtensions-DnqA1y9x.js → fileExtensions-CXRfY3Ss.js} +12 -2
  152. package/dist/src/{fileExtensions-AWa2ZML4.js → fileExtensions-D4GCJ67J.js} +12 -2
  153. package/dist/src/{formatDuration-DZzPsexs.js → formatDuration-CMVNrYvE.js} +1 -1
  154. package/dist/src/{genaiTracer-yRuxj9-L.cjs → genaiTracer-14nugQQx.cjs} +14 -2
  155. package/dist/src/{genaiTracer-DWdZ28hY.js → genaiTracer-BPVvltoW.js} +2 -2
  156. package/dist/src/{genaiTracer-XnrcgDCe.js → genaiTracer-D18lYzhB.js} +2 -2
  157. package/dist/src/{genaiTracer-COYDi-tC.js → genaiTracer-jJKYsnjc.js} +2 -2
  158. package/dist/src/goat-Ckd3q3AY.js +467 -0
  159. package/dist/src/goat-Qgurm-NP.js +466 -0
  160. package/dist/src/goat-ghadEDdy.js +465 -0
  161. package/dist/src/goat-una6pZGP.cjs +469 -0
  162. package/dist/src/graders-BDT7dif6.js +3 -0
  163. package/dist/src/{graders-eIHhRqoC.js → graders-BGP99PdK.js} +2416 -2224
  164. package/dist/src/{graders-Zy3x0zqX.js → graders-BX0f2tvS.js} +2423 -2226
  165. package/dist/src/{graders-pvbReLLn.js → graders-C0nXU_ZP.js} +1806 -1609
  166. package/dist/src/{graders--zknU_uk.cjs → graders-ClrU2fnd.cjs} +2219 -1949
  167. package/dist/src/hydra-BSNZZm2M.js +543 -0
  168. package/dist/src/hydra-BxdG4nkg.js +541 -0
  169. package/dist/src/hydra-DE4xWwyc.js +542 -0
  170. package/dist/src/hydra-DrJttnvw.cjs +542 -0
  171. package/dist/src/image-B4oBtu6J.js +443 -0
  172. package/dist/src/{image-dnoUgPrC.js → image-BN-hjLL9.js} +4 -4
  173. package/dist/src/{image-9302QVqR.js → image-B_fPIwdg.js} +3 -3
  174. package/dist/src/image-BvUAW344.js +442 -0
  175. package/dist/src/image-Cvjwx1uY.js +442 -0
  176. package/dist/src/{image-De2FBmYV.cjs → image-DfVCGPbI.cjs} +4 -4
  177. package/dist/src/{image-u7-rKnYU.js → image-QzmydkiG.js} +4 -4
  178. package/dist/src/image-X0oY4350.cjs +465 -0
  179. package/dist/src/index.cjs +1689 -558
  180. package/dist/src/index.d.cts +3270 -1624
  181. package/dist/src/index.d.ts +3270 -1624
  182. package/dist/src/index.js +1553 -438
  183. package/dist/src/indirectWebPwn-02ZIghCS.js +259 -0
  184. package/dist/src/indirectWebPwn-BJ22AbQa.cjs +397 -0
  185. package/dist/src/indirectWebPwn-CbjUG0rh.js +385 -0
  186. package/dist/src/indirectWebPwn-CfQJt3gk.cjs +260 -0
  187. package/dist/src/indirectWebPwn-DBQhOjoD.js +260 -0
  188. package/dist/src/indirectWebPwn-OsXnKejv.js +259 -0
  189. package/dist/src/indirectWebPwn-tNx9OZ35.js +385 -0
  190. package/dist/src/indirectWebPwn-uyWdHx04.js +386 -0
  191. package/dist/src/inputVariables-B0qUChbV.js +467 -0
  192. package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
  193. package/dist/src/inputVariables-DXFdi7AI.js +468 -0
  194. package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
  195. package/dist/src/{interactiveCheck-CLERUB0c.js → interactiveCheck-C4QlIuoR.js} +2 -2
  196. package/dist/src/{invariant-BtWWVVhl.js → invariant-B2Rf6avk.js} +1 -1
  197. package/dist/src/{invariant-vgHWClmd.js → invariant-DIYf9sP1.js} +1 -1
  198. package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
  199. package/dist/src/iterative-CpU6i2As.js +490 -0
  200. package/dist/src/iterative-DJQEQpG3.js +491 -0
  201. package/dist/src/iterative-DQBuWM-j.cjs +493 -0
  202. package/dist/src/iterative-FTS4Bz67.js +492 -0
  203. package/dist/src/iterativeImage-BUABMVOA.js +413 -0
  204. package/dist/src/iterativeImage-ByFWkxax.cjs +415 -0
  205. package/dist/src/iterativeImage-BzUapOUi.js +414 -0
  206. package/dist/src/iterativeImage-Doz8mgxF.js +413 -0
  207. package/dist/src/iterativeMeta-B3YiAOc8.js +386 -0
  208. package/dist/src/iterativeMeta-C7APE_P1.js +385 -0
  209. package/dist/src/iterativeMeta-CSS8M6Ds.cjs +385 -0
  210. package/dist/src/iterativeMeta-DgoQ7bLh.js +384 -0
  211. package/dist/src/iterativeTree-B5zxBBSW.js +769 -0
  212. package/dist/src/iterativeTree-CNyIk0Yn.js +768 -0
  213. package/dist/src/iterativeTree-CPMF10ve.cjs +771 -0
  214. package/dist/src/iterativeTree-DvZ7GBwt.js +770 -0
  215. package/dist/src/{knowledgeBase-Dgc7CBWF.js → knowledgeBase-BadkINlJ.js} +24 -10
  216. package/dist/src/{knowledgeBase-RhFPGWDc.js → knowledgeBase-Bi_8sV-H.js} +25 -11
  217. package/dist/src/{knowledgeBase-lm9RXSAm.js → knowledgeBase-CkMljjdg.js} +25 -11
  218. package/dist/src/{knowledgeBase-Bpoe_nLu.cjs → knowledgeBase-DUh34xba.cjs} +25 -11
  219. package/dist/src/{litellm-DRjpcSa7.js → litellm-BKBo0jpC.js} +5 -5
  220. package/dist/src/{litellm-C2kqjxqp.js → litellm-BXyn5kZK.js} +5 -5
  221. package/dist/src/{litellm-p37R1dzQ.js → litellm-CNcfbCfa.js} +4 -4
  222. package/dist/src/{litellm-CoyI4IAl.cjs → litellm-CtAr7bKG.cjs} +5 -5
  223. package/dist/src/{logger-DksKw1Qc.js → logger-BbY6ypFL.js} +2 -2
  224. package/dist/src/{logger-B88EkIn6.js → logger-KD8JjCRJ.js} +2 -2
  225. package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
  226. package/dist/src/{luma-ray-KgTCXrZC.js → luma-ray-BMX1iEB6.js} +5 -5
  227. package/dist/src/{luma-ray-B863CmuZ.js → luma-ray-CR5TSpp4.js} +5 -5
  228. package/dist/src/{luma-ray-BxVKaW2a.cjs → luma-ray-D3FUc2K3.cjs} +9 -8
  229. package/dist/src/{luma-ray-BTTLtqQ8.js → luma-ray-OEMmS1RB.js} +6 -6
  230. package/dist/src/main.js +909 -369
  231. package/dist/src/memoryPoisoning-CM83NWYl.js +107 -0
  232. package/dist/src/memoryPoisoning-D8h9gXJF.js +106 -0
  233. package/dist/src/memoryPoisoning-Dp-btinn.cjs +106 -0
  234. package/dist/src/memoryPoisoning-cLuCoTuJ.js +106 -0
  235. package/dist/src/{messages-BTQz42fn.js → messages-BabO-cX8.js} +273 -17
  236. package/dist/src/{messages-811uVVW5.cjs → messages-DBPir0TQ.cjs} +278 -18
  237. package/dist/src/{messages-zWbkLLHz.js → messages-DGUlSNU7.js} +273 -17
  238. package/dist/src/{messages-MYTQ2TWp.js → messages-vsE_-Lv0.js} +273 -17
  239. package/dist/src/{meteor-DHdzY1Ss.js → meteor--TZYICTI.js} +2 -2
  240. package/dist/src/{meteor-Co1VQ1u5.cjs → meteor-CR226f7Z.cjs} +2 -2
  241. package/dist/src/{meteor-CU5UAE-H.js → meteor-Cl_yd7rJ.js} +2 -2
  242. package/dist/src/{meteor-DuAFv6gF.js → meteor-Dce-_zGQ.js} +1 -1
  243. package/dist/src/mischievousUser-0l8GD7Dp.js +46 -0
  244. package/dist/src/mischievousUser-BUOP9W5r.js +46 -0
  245. package/dist/src/mischievousUser-frFYKxu6.js +47 -0
  246. package/dist/src/mischievousUser-olGgHIVR.cjs +46 -0
  247. package/dist/src/{modelslab-Dk1JAtVo.cjs → modelslab-CNV5bMSk.cjs} +7 -7
  248. package/dist/src/{modelslab-D0erNWKe.js → modelslab-Cogmu4mG.js} +6 -6
  249. package/dist/src/{modelslab-DIq-6y7x.js → modelslab-Dzst7VTU.js} +6 -6
  250. package/dist/src/{modelslab-wu9yi5GE.js → modelslab-EyDczZ5A.js} +7 -7
  251. package/dist/src/{nova-reel-CCFRfeRb.js → nova-reel-BGPNBOMS.js} +6 -6
  252. package/dist/src/{nova-reel-DQrm74ng.js → nova-reel-B_5NKFu1.js} +5 -5
  253. package/dist/src/{nova-reel-gr11WG7f.js → nova-reel-C4eUJGse.js} +5 -5
  254. package/dist/src/{nova-reel-CrLXVKQf.cjs → nova-reel-CjJRxI1X.cjs} +9 -8
  255. package/dist/src/{nova-sonic-BYdp-QLs.js → nova-sonic-BNGmgfFz.js} +4 -4
  256. package/dist/src/{nova-sonic-TDgrlTk7.js → nova-sonic-ChPlh5na.js} +4 -4
  257. package/dist/src/{nova-sonic-B_ZXcUJB.js → nova-sonic-CrV0iaY_.js} +3 -3
  258. package/dist/src/{nova-sonic-i5tUvXKn.cjs → nova-sonic-DuOG9Aun.cjs} +5 -4
  259. package/dist/src/{openai-DhVEmgeZ.js → openai-BMHD2Huo.js} +2 -2
  260. package/dist/src/{openai-URNyItar.cjs → openai-C3uXv8wS.cjs} +2 -2
  261. package/dist/src/{openai-Qsvz25mV.js → openai-CJrsh9n4.js} +2 -2
  262. package/dist/src/{openai-iYtrXzOX.js → openai-zgwBb4Ff.js} +1 -1
  263. package/dist/src/{openclaw-CnQ363Wi.js → openclaw-BIHlu_36.js} +10 -8
  264. package/dist/src/{openclaw-CwzlQSQX.js → openclaw-CF7fMido.js} +9 -7
  265. package/dist/src/{openclaw-wX9rtfke.cjs → openclaw-Dphc01BY.cjs} +18 -15
  266. package/dist/src/{openclaw-CLWrW03k.js → openclaw-zIJAsz3P.js} +10 -8
  267. package/dist/src/{opencode-sdk-BUu5Nevv.js → opencode-sdk-B3vlPLsp.js} +40 -5
  268. package/dist/src/{opencode-sdk-BxD8vXp_.js → opencode-sdk-D05JSgMQ.js} +40 -5
  269. package/dist/src/{opencode-sdk-BZ2idgYA.cjs → opencode-sdk-DoY6GbWw.cjs} +46 -10
  270. package/dist/src/{opencode-sdk-GI2KaAXq.js → opencode-sdk-sRKYHGoI.js} +39 -4
  271. package/dist/src/{otlpReceiver-BntK801g.js → otlpReceiver--gTpSagc.js} +120 -4
  272. package/dist/src/{otlpReceiver-DmVulbhC.js → otlpReceiver-B2eaKC8C.js} +120 -4
  273. package/dist/src/{otlpReceiver-B2z58l4e.js → otlpReceiver-BXjcRqAM.js} +119 -3
  274. package/dist/src/{otlpReceiver-BfcVq2Nq.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
  275. package/dist/src/packageParser--MWTSrPW.js +36 -0
  276. package/dist/src/packageParser-CgE-ziRo.js +35 -0
  277. package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
  278. package/dist/src/packageParser-hwwSGnAZ.js +35 -0
  279. package/dist/src/processShim-BBxt7LKO.js +95 -0
  280. package/dist/src/processShim-BcGzU8fY.js +94 -0
  281. package/dist/src/processShim-C_z3aRvF.js +94 -0
  282. package/dist/src/processShim-DSY9BV2T.cjs +98 -0
  283. package/dist/src/promptLength-0qIHyhA5.js +71 -0
  284. package/dist/src/promptLength-4X-Wd8PG.js +72 -0
  285. package/dist/src/promptLength-B9nZEfO6.js +71 -0
  286. package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
  287. package/dist/src/promptfoo-BDrfT30-.js +180 -0
  288. package/dist/src/promptfoo-Cm4hiy1Y.js +180 -0
  289. package/dist/src/promptfoo-Rjp-MeBb.js +181 -0
  290. package/dist/src/promptfoo-b-baRMj-.cjs +205 -0
  291. package/dist/src/prompts-BYMtqPCw.js +259 -0
  292. package/dist/src/prompts-C-bqE1Yp.js +260 -0
  293. package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
  294. package/dist/src/prompts-DHhQsANy.js +259 -0
  295. package/dist/src/prompts-D_QpZ2Dm.js +271 -0
  296. package/dist/src/prompts-hNvWBD3z.cjs +284 -0
  297. package/dist/src/prompts-huDVH2CI.js +270 -0
  298. package/dist/src/prompts-p78Hul5i.cjs +289 -0
  299. package/dist/src/{providerRegistry-CPQ_CmVO.js → providerRegistry-1gB5vtzQ.js} +2 -2
  300. package/dist/src/{providerRegistry-CQMdTmHP.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
  301. package/dist/src/{providerRegistry-Bvh8mv85.js → providerRegistry-DHcFiVWX.js} +1 -1
  302. package/dist/src/{providerRegistry-CWoPjKFZ.js → providerRegistry-ReCd0sFa.js} +2 -2
  303. package/dist/src/{providers-BV_KMZje.js → providers-B9KzWxAX.js} +10558 -21587
  304. package/dist/src/{providers-DruaQfwu.js → providers-BCCz6_IX.js} +1228 -12196
  305. package/dist/src/{providers-1eKkXBKp.cjs → providers-BDVVIQM6.cjs} +10649 -21843
  306. package/dist/src/{providers-iUt5fbAN.js → providers-BYAn82cf.js} +1 -1
  307. package/dist/src/{providers-Domz_llv.js → providers-DVYRZP4E.js} +10589 -21570
  308. package/dist/src/{pythonUtils-Cldx7huE.js → pythonUtils-CLCgQ9tt.js} +3 -3
  309. package/dist/src/{pythonUtils-CnndUbW-.js → pythonUtils-CgYxeSmO.js} +3 -3
  310. package/dist/src/{pythonUtils-tAJvvpS-.cjs → pythonUtils-Cokhluq3.cjs} +8 -7
  311. package/dist/src/{pythonUtils-C2UQ30Rz.js → pythonUtils-D0BYebvX.js} +3 -3
  312. package/dist/src/{quiverai-DFotyafY.cjs → quiverai-BAp6iTZD.cjs} +4 -4
  313. package/dist/src/{quiverai-aPPvXOgn.js → quiverai-BvIhI_0l.js} +4 -4
  314. package/dist/src/{quiverai-DR0SnIQV.js → quiverai-CdTWPe-A.js} +3 -3
  315. package/dist/src/{quiverai-CtWi6x_g.js → quiverai-Cv7rJKDz.js} +4 -4
  316. package/dist/src/registry-BUJrgjwv.js +124 -0
  317. package/dist/src/registry-DXm1t_x0.js +125 -0
  318. package/dist/src/registry-Dp5EqoXc.js +124 -0
  319. package/dist/src/registry-KCVF1CFC.cjs +124 -0
  320. package/dist/src/{server-D6Il2Sob.js → remoteGeneration-B1_XsKXU.js} +16 -108
  321. package/dist/src/{server-BSB45Nt9.js → remoteGeneration-COpWcmWd.js} +15 -146
  322. package/dist/src/{server-Dx2TyCH2.cjs → remoteGeneration-DS9N3pgB.cjs} +30 -119
  323. package/dist/src/remoteGeneration-DsaSwmG2.js +217 -0
  324. package/dist/src/render-BNTrbmBw.cjs +384 -0
  325. package/dist/src/render-CSP99NLm.js +348 -0
  326. package/dist/src/render-DFfDeYUK.js +347 -0
  327. package/dist/src/{render-CgVDrJmM.js → render-DznWrxGO.js} +2 -2
  328. package/dist/src/render-_6ur1fhE.js +347 -0
  329. package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
  330. package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
  331. package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
  332. package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
  333. package/dist/src/{responses-Bi9vBuW_.cjs → responses-1UFFF9N_.cjs} +51 -16
  334. package/dist/src/{responses-DL9m8CyY.js → responses-B3W2JvOQ.js} +49 -15
  335. package/dist/src/{responses--OsX2aYW.js → responses-B6ktc3Ra.js} +49 -15
  336. package/dist/src/{responses-C-flexAY.js → responses-URRzV8qE.js} +49 -15
  337. package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
  338. package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
  339. package/dist/src/{rubyUtils-DsGrTx8R.js → rubyUtils-CXlFM2rR.js} +3 -3
  340. package/dist/src/{rubyUtils-DVLeA2jg.js → rubyUtils-CnlW8AYb.js} +3 -3
  341. package/dist/src/{rubyUtils-B6eljPuh.cjs → rubyUtils-CqUWBZAt.cjs} +18 -27
  342. package/dist/src/{rubyUtils-CYSQEG4a.js → rubyUtils-DdGojpfv.js} +3 -3
  343. package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
  344. package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
  345. package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
  346. package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
  347. package/dist/src/{sagemaker-BVkaG2-l.js → sagemaker-CujrzP1a.js} +62 -51
  348. package/dist/src/{sagemaker-XnfhheQv.cjs → sagemaker-DzffAqo_.cjs} +65 -53
  349. package/dist/src/{sagemaker-D67yzMzs.js → sagemaker-vhtSV7JI.js} +62 -51
  350. package/dist/src/{sagemaker-BveBvuxm.js → sagemaker-yr1QKeBs.js} +61 -50
  351. package/dist/src/{scanner-1DqWi1Ej.js → scanner-DS0109SS.js} +7 -7
  352. package/dist/src/server/index.js +5105 -605
  353. package/dist/src/server-B8rqV126.cjs +126 -0
  354. package/dist/src/server-BaLytskk.js +3 -0
  355. package/dist/src/server-CMJD10J4.js +107 -0
  356. package/dist/src/server-Ddp8GNMp.js +146 -0
  357. package/dist/src/server-DhMHosWj.js +182 -0
  358. package/dist/src/shared-7pmVZLNO.js +1334 -0
  359. package/dist/src/shared-9WHQ1oNE.js +1335 -0
  360. package/dist/src/{fileExtensions-BArZuxsI.js → shared-BoG7qLMv.js} +12 -2
  361. package/dist/src/shared-D6IjElRI.js +1334 -0
  362. package/dist/src/shared-WkgnDkcg.cjs +1436 -0
  363. package/dist/src/{signal-CE5G3a7x.js → signal-CSurUUyV.js} +3 -3
  364. package/dist/src/simulatedUser-C9aQObBI.js +222 -0
  365. package/dist/src/simulatedUser-Cu601Dd4.cjs +227 -0
  366. package/dist/src/simulatedUser-U_qAHnuB.js +222 -0
  367. package/dist/src/simulatedUser-p3tACcmw.js +223 -0
  368. package/dist/src/{slack-DDUe-5MC.js → slack-Bapo-7_8.js} +2 -2
  369. package/dist/src/{slack-1Rhq0EoV.cjs → slack-DMC1QVEg.cjs} +3 -2
  370. package/dist/src/{slack-D5Wpy8LM.js → slack-DTEFhrMn.js} +2 -2
  371. package/dist/src/{slack-acRb0IqQ.js → slack-k-_CP84Q.js} +1 -1
  372. package/dist/src/storage-BU4qcnOb.js +875 -0
  373. package/dist/src/storage-CA-v9V2v.cjs +911 -0
  374. package/dist/src/storage-CD-GWAdx.js +822 -0
  375. package/dist/src/storage-QdU-SmvD.js +834 -0
  376. package/dist/src/{store-DAAyxcy6.cjs → store-B2NDDooM.cjs} +60 -24
  377. package/dist/src/{store-CYEy5J2D.js → store-DKd5592Q.js} +51 -20
  378. package/dist/src/{store-M0b1WfYb.js → store-HpopRVzl.js} +50 -19
  379. package/dist/src/store-IbiRIF3k.js +3 -0
  380. package/dist/src/strategies-7CS3Alao.cjs +2360 -0
  381. package/dist/src/strategies-CiSeroPH.js +2331 -0
  382. package/dist/src/strategies-DRJjGTIY.js +2333 -0
  383. package/dist/src/{tables-DQ4WU5tX.js → tables-CRSXQ2Ke.js} +2 -2
  384. package/dist/src/{tables-CsWou1Bx.js → tables-CxjU7bBd.js} +3 -3
  385. package/dist/src/{tables-DUfh1F7Z.cjs → tables-DBIJU0WE.cjs} +6 -5
  386. package/dist/src/{tables-C4CH3zRr.js → tables-DafUHOeh.js} +3 -3
  387. package/dist/src/{telemetry-CQPez_Jp.js → telemetry-00ezXr_t.js} +5 -4
  388. package/dist/src/telemetry-ByPqDcKC.js +3 -0
  389. package/dist/src/{telemetry-Dsw_faFj.cjs → telemetry-CJ7FnCsc.cjs} +18 -11
  390. package/dist/src/{telemetry-dbaJ0E98.js → telemetry-DmXYcJNV.js} +5 -4
  391. package/dist/src/{telemetry-Dvqxv3YC.js → telemetry-DwX9XUN5.js} +4 -3
  392. package/dist/src/{text-KvuD2Iko.js → text-Db-Wt2u2.js} +1 -1
  393. package/dist/src/{text-DHxdyQqT.js → text-DwYK5EBn.js} +1 -1
  394. package/dist/src/{text-BVi-cLPJ.cjs → text-nywWsRBM.cjs} +1 -1
  395. package/dist/src/{tokenUsageUtils-C-bmyHoE.js → tokenUsageUtils-BjVkdk18.js} +1 -1
  396. package/dist/src/{tokenUsageUtils-CXrvO-wA.js → tokenUsageUtils-CDet74yk.js} +1 -1
  397. package/dist/src/tokenUsageUtils-CmnQ0G2m.js +142 -0
  398. package/dist/src/{tokenUsageUtils-Bb7DkZPz.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
  399. package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
  400. package/dist/src/toolAttributes-COVgDrBG.js +87 -0
  401. package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
  402. package/dist/src/tracingOptions-BnwKCkSB.js +221 -0
  403. package/dist/src/tracingOptions-Chi74lOD.js +219 -0
  404. package/dist/src/tracingOptions-DrbSFaKy.cjs +249 -0
  405. package/dist/src/tracingOptions-ji2OuXbT.js +220 -0
  406. package/dist/src/{transcription-DuWDupG7.js → transcription-B8uIgCYX.js} +5 -5
  407. package/dist/src/{transcription-CJspiD2c.js → transcription-CfU5loSq.js} +6 -6
  408. package/dist/src/{transcription-V2HaAmy2.js → transcription-Dkd22_4K.js} +6 -6
  409. package/dist/src/{transcription-BvjmiYB1.cjs → transcription-mzuf18Mq.cjs} +9 -8
  410. package/dist/src/{transform-lQrDE1BQ.js → transform-BIMynQsA.js} +9 -9
  411. package/dist/src/transform-BnSTnFlp.js +187 -0
  412. package/dist/src/transform-BnSXWmU_2.cjs +221 -0
  413. package/dist/src/transform-CGt7Kt3y2.js +186 -0
  414. package/dist/src/transform-CrPGTsij.js +186 -0
  415. package/dist/src/{transform-CTeuTR3S.cjs → transform-DhNkAUs8.cjs} +13 -12
  416. package/dist/src/{transform-CG0ehZNG.js → transform-DmvYBRll.js} +9 -9
  417. package/dist/src/{transform-zDhMmzwX.js → transform-EtD4jAWi.js} +9 -9
  418. package/dist/src/{transformersAvailability-CcHusyhw.js → transformersAvailability-0ThtPved.js} +1 -1
  419. package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
  420. package/dist/src/{transformersAvailability-DLlROWhg.js → transformersAvailability-BvyU9vDD.js} +1 -1
  421. package/dist/src/{transformersAvailability-Cju9mHgR.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
  422. package/dist/src/{types-Dm9JM6Vb.js → types-BFevViUY.js} +115 -19
  423. package/dist/src/{types-Bgh5SOn6.js → types-BJQBBPTP.js} +115 -19
  424. package/dist/src/{types-CeaeaZdP.cjs → types-CxJvaY2S.cjs} +357 -172
  425. package/dist/src/{types-BGQDAP8i.js → types-D6glLbdF.js} +271 -170
  426. package/dist/src/{util-BYvQUPp7.js → util--WMgw7wM.js} +28 -8
  427. package/dist/src/{util-C9J8ahRn.js → util-5WnCSb0h.js} +72 -48
  428. package/dist/src/{util-CN3SrLT4.cjs → util-BSIuSLVK.cjs} +74 -49
  429. package/dist/src/{util-C8e5uydV.js → util-Bx677_k2.js} +154 -147
  430. package/dist/src/util-CN8om2rz.cjs +386 -0
  431. package/dist/src/{util-DDs-7g6-.js → util-CoQWM76y.js} +28 -8
  432. package/dist/src/util-DNl96nNs.js +327 -0
  433. package/dist/src/{util-DxWpWjhc.js → util-DURocbYR.js} +667 -507
  434. package/dist/src/util-Df8YMvS1.js +327 -0
  435. package/dist/src/{util-DvU2Pw8c.js → util-DiQ3QvBB.js} +28 -8
  436. package/dist/src/{util-oGMLA7vc.js → util-I-Rf-KaD.js} +862 -577
  437. package/dist/src/{util-olYL5C6N.cjs → util-IYzs5Y04.cjs} +33 -7
  438. package/dist/src/{util-D9TisOyk.js → util-LKTmNsMQ.js} +71 -47
  439. package/dist/src/{util-Bxn8emtE.cjs → util-SPsvFONY.cjs} +738 -582
  440. package/dist/src/{util-D3q0WQ-0.js → util-efByNxcr.js} +72 -48
  441. package/dist/src/util-kDURhgJW.js +328 -0
  442. package/dist/src/{utils-DJfvjyMj.js → utils-B0lzitHZ.js} +3 -3
  443. package/dist/src/{utils-BLJKfv0y.js → utils-BFOh20Gb.js} +3 -3
  444. package/dist/src/{utils-hXtCYanr.js → utils-BGY69tk_.js} +2 -2
  445. package/dist/src/{utils-B05gLxER.cjs → utils-Ve6kuJsa.cjs} +3 -3
  446. package/dist/src/version-BK20a4sw.js +16 -0
  447. package/dist/src/version-BWCSaByA.cjs +27 -0
  448. package/dist/src/version-eRkNuGv8.js +17 -0
  449. package/dist/src/version-lpHV_53E.js +16 -0
  450. package/dist/tsconfig.tsbuildinfo +1 -1
  451. package/package.json +56 -28
  452. package/dist/src/app/assets/Report-CQYFezYu.js +0 -1
  453. package/dist/src/app/assets/index-BXGkeMwh.css +0 -1
  454. package/dist/src/app/assets/index-BzJt18Jz.js +0 -385
  455. package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
  456. package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
  457. package/dist/src/cache-Cr9oLMUa.js +0 -3
  458. package/dist/src/cache-DbLsVWB2.cjs +0 -3
  459. package/dist/src/cloud-Hphvo8kr.js +0 -3
  460. package/dist/src/codex-sdk-BAmYE7qy.js +0 -3
  461. package/dist/src/codex-sdk-CWEnH70W.cjs +0 -2
  462. package/dist/src/evalResult-D8MT9p0s.js +0 -3
  463. package/dist/src/evalResult-DElBuddX.js +0 -2
  464. package/dist/src/evalResult-Dvc-iucu.cjs +0 -2
  465. package/dist/src/evaluator-CVessDWe.js +0 -3
  466. package/dist/src/fetch-C7bGKDlQ.js +0 -3
  467. package/dist/src/graders-BOAzQEUe.cjs +0 -2
  468. package/dist/src/graders-D4BTsZdG2.js +0 -3
  469. package/dist/src/graders-DOJK1XpV.js +0 -2
  470. package/dist/src/graders-NAv9LcBn.js +0 -2
  471. package/dist/src/image-B5Mv-Z3h.js +0 -257
  472. package/dist/src/image-DVz2RiMF.js +0 -258
  473. package/dist/src/image-qUpPvmNZ.js +0 -257
  474. package/dist/src/image-x6KqLQl4.cjs +0 -280
  475. package/dist/src/providers-Bp4S-FvO.js +0 -2
  476. package/dist/src/providers-DV3ax9e_.cjs +0 -3
  477. package/dist/src/providers-u9Enmfok.js +0 -2
  478. package/dist/src/render-CH-62LbA.js +0 -135
  479. package/dist/src/render-CMEpfLaO.js +0 -136
  480. package/dist/src/render-DHIZ6_k8.js +0 -135
  481. package/dist/src/render-DfQSFxGE.cjs +0 -165
  482. package/dist/src/rubyUtils-D1L2d3jb.js +0 -3
  483. package/dist/src/rubyUtils-DUbq4tff.cjs +0 -2
  484. package/dist/src/server-BNYztJkh.js +0 -385
  485. package/dist/src/server-DCtHUqlp.js +0 -3
  486. package/dist/src/server-DaA2eR26.cjs +0 -2
  487. package/dist/src/store-CWOSz6D_.cjs +0 -2
  488. package/dist/src/store-DCDBhv7B.js +0 -3
  489. package/dist/src/store-Dn9HUkdW.js +0 -240
  490. package/dist/src/telemetry-C1IqxcdW.js +0 -3
  491. package/dist/src/telemetry-C4ZEa_es.cjs +0 -2
  492. package/dist/src/transform-Bbg6A8Jk.js +0 -216
  493. package/dist/src/transform-CUnzlsbn.cjs +0 -228
  494. package/dist/src/transform-DYX1_Xnh.js +0 -216
  495. package/dist/src/transform-DgKlRr73.cjs +0 -2
  496. package/dist/src/transform-M6ITAESf.js +0 -3
  497. package/dist/src/transform-UN5UGu8U.js +0 -213
package/dist/src/main.js CHANGED
@@ -1,34 +1,45 @@
1
1
  #!/usr/bin/env node
2
- import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-DksKw1Qc.js";
3
- import { A as getDefaultPort, F as VERSION, L as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, T as CLOUD_PROVIDER_PREFIX, i as fetchWithTimeout, j as getDefaultShareViewBaseUrl, k as TERMINAL_MAX_WIDTH, l as sleep, m as REQUEST_TIMEOUT_MS, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, v as isPromptfooSampleTarget } from "./fetch-BEWnXrrG.js";
4
- import { t as invariant } from "./invariant-BtWWVVhl.js";
5
- import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-F9d_5sMC.js";
6
- import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-DQZ5sVjW.js";
7
- import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-CQPez_Jp.js";
8
- import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-Dm9JM6Vb.js";
9
- import { r as importModule, t as getDirectory } from "./esm-tVgYPY-f.js";
10
- import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-CMEpfLaO.js";
11
- import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-BNYztJkh.js";
12
- import { A as renderPrompt, B as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, C as extractVariablesFromJson, F as TokenUsageTracker, H as getMaxCharsPerMessageModifierValue, P as redteamProviderManager, S as extractPromptFromTags, T as getShortPluginId, V as getGeneratedPromptOverLimit, _ as pluginMatchesStrategyTargets, _t as getUserTeams, a as resolveProviderConfigs, b as extractGoalFromPrompt, bt as resolveTeamFromIdentifier, c as createTransformRequest, ct as canCreateTargets, d as Strategies, dt as getConfigFromCloud, f as loadStrategy, ft as getEvalConfigFromCloud, g as retrieveMedia, gt as getProviderFromCloud, h as mediaExists, i as resolveProvider, l as createTransformResponse, lt as checkCloudPermissions, m as getMediaStorage, mt as getPluginSeverityOverridesFromCloud, n as loadApiProvider, o as MCPProvider, p as validateStrategies, pt as getOrgContext, r as loadApiProviders, s as HttpProvider, t as getProviderIds, ut as getCloudDatabaseId, vt as isCloudProvider, xt as resolveTeamId, yt as makeRequest$1, z as PromptfooHarmfulCompletionProvider } from "./providers-BV_KMZje.js";
13
- import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-mb7c8hbp.js";
14
- import { i as isJavascriptFile } from "./fileExtensions-AWa2ZML4.js";
15
- import { A as getProviderDescription, C as deduplicateTestCases, D as resultIsForTestCase, E as getTestCaseDeduplicationKey, O as checkProviderApiKeys, S as setupEnv, T as filterRuntimeVars, a as ComparisonEvalNotFoundError, c as mergeComparisonTables, f as maybeLoadConfigFromExternalFile, i as writeOutput, k as doesProviderRefMatch, l as fetchCsvFromGoogleSheet, n as createOutputMetadata, o as evalTableToJson, p as maybeLoadFromExternalFile, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, v as readFilters, w as extractRuntimeVars, y as readOutput } from "./util-DxWpWjhc.js";
16
- import { r as runPython } from "./pythonUtils-CnndUbW-.js";
17
- import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, dt as processPrompts, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, ft as readPrompts, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, lt as doRemoteGrading, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, pt as readProviderPromptMap, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, ut as getDefaultProviders, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-eIHhRqoC.js";
18
- import { i as generateIdFromPrompt } from "./utils-BLJKfv0y.js";
19
- import { n as sha256, t as randomSequence } from "./createHash-ChI45QR1.js";
20
- import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CXrvO-wA.js";
21
- import { n as getBlobUrl, t as getBlobByHash } from "./blobs-BQWqnnvL.js";
22
- import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-C4CH3zRr.js";
23
- import { n as isBlobStorageEnabled } from "./extractor-DNSeBVOJ.js";
24
- import { n as escapeRegExp, t as ellipsize } from "./text-KvuD2Iko.js";
25
- import { n as getTraceStore } from "./store-CYEy5J2D.js";
26
- import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-IvuDYSvQ.js";
27
- import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CE5G3a7x.js";
28
- import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-u4UVafl6.js";
29
- import { t as EvalResult } from "./evalResult-D3hVYFis.js";
30
- import { t as formatDuration } from "./formatDuration-DZzPsexs.js";
31
- import { n as shouldUseInkUI } from "./interactiveCheck-CLERUB0c.js";
2
+ import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
3
+ import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, M as TERMINAL_MAX_WIDTH, N as getDefaultPort, O as CLOUD_PROVIDER_PREFIX, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, _ as REQUEST_TIMEOUT_MS, f as sleep, i as fetchWithTimeout, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, x as isPromptfooSampleTarget } from "./fetch-DXUnXkVU.js";
4
+ import { n as VERSION } from "./version-eRkNuGv8.js";
5
+ import { t as invariant } from "./invariant-B2Rf6avk.js";
6
+ import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-CjFnOPmb.js";
7
+ import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-D3DiFqH6.js";
8
+ import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-00ezXr_t.js";
9
+ import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, _t as CODING_AGENT_PLUGINS, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BFevViUY.js";
10
+ import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
11
+ import { r as importModule, t as getDirectory } from "./esm-Bexx2PFc.js";
12
+ import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, t as renderEnvOnlyInObject } from "./render-CSP99NLm.js";
13
+ import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-B1_XsKXU.js";
14
+ import { a as openAuthBrowser, i as checkServerRunning, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-DhMHosWj.js";
15
+ import { _ as makeRequest$1, c as getCloudDatabaseId, d as getOrgContext, f as getPluginSeverityOverridesFromCloud, g as isCloudProvider, h as getUserTeams, i as retrieveMedia, l as getConfigFromCloud, m as getProviderFromCloud, o as canCreateTargets, r as mediaExists, s as checkCloudPermissions, t as getMediaStorage, u as getEvalConfigFromCloud, v as resolveTeamFromIdentifier, y as resolveTeamId } from "./storage-BU4qcnOb.js";
16
+ import { n as sha256, t as randomSequence } from "./createHash-CgRvs4Fn.js";
17
+ import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-DpPWrkTE.js";
18
+ import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
19
+ import { D as normalizeProviderRef, L as readFilters, M as maybeLoadFromExternalFile, R as readOutput, _ as resultIsForTestCase, a as ComparisonEvalNotFoundError, b as getProviderDescription, c as getEvalTableOutputPromptLocationsBySize, d as fetchCsvFromGoogleSheet, f as setupEnv, g as getTestCaseDeduplicationKey, h as filterRuntimeVars, i as writeOutput, j as maybeLoadConfigFromExternalFile, l as getEvalTablePromptStrippedPayload, m as extractRuntimeVars, n as createOutputMetadata, o as evalTableToJson, p as deduplicateTestCases, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as mergeComparisonTables, v as checkProviderApiKeys, y as doesProviderRefMatch } from "./util-DURocbYR.js";
20
+ import { a as resolveProviderConfigs, d as MCPProvider, f as HttpProvider, i as resolveProvider, m as createTransformResponse, n as loadApiProvider, p as createTransformRequest, r as loadApiProviders, t as getProviderIds } from "./providers-B9KzWxAX.js";
21
+ import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
22
+ import { n as escapeRegExp, t as ellipsize } from "./text-Db-Wt2u2.js";
23
+ import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-DafUHOeh.js";
24
+ import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DODuTK-a.js";
25
+ import { n as createPlaceholderInputValue, r as materializeInputVariablesWithMetadata, t as buildPromptInputDescriptions } from "./inputVariables-DXFdi7AI.js";
26
+ import { a as extractPromptFromTags, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, r as extractInputVarsFromPrompt, s as getShortPluginId } from "./util-kDURhgJW.js";
27
+ import { n as PromptfooHarmfulCompletionProvider } from "./promptfoo-Rjp-MeBb.js";
28
+ import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as getDefaultProviders, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as readProviderPromptMap, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, X as processPrompts, Y as doRemoteGrading, Z as readPrompts, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-BGP99PdK.js";
29
+ import { f as redteamProviderManager, m as TokenUsageTracker } from "./shared-9WHQ1oNE.js";
30
+ import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-DRoiYB2q.js";
31
+ import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
32
+ import { n as TRANSFORM_KEYS, t as INLINE_FUNCTION_LABEL } from "./transform-BnSTnFlp.js";
33
+ import { n as getTraceStore } from "./store-DKd5592Q.js";
34
+ import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CSurUUyV.js";
35
+ import { n as isBlobStorageEnabled } from "./extractor-CIW3iN-b.js";
36
+ import { n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-4X-Wd8PG.js";
37
+ import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-0VRANImH.js";
38
+ import { n as sanitizeProvider, t as EvalResult } from "./evalResult-eUkJv9Ko.js";
39
+ import { n as renderPrompt } from "./evaluatorHelpers-BsYP_muT.js";
40
+ import { t as formatDuration } from "./formatDuration-CMVNrYvE.js";
41
+ import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-DRJjGTIY.js";
42
+ import { n as shouldUseInkUI } from "./interactiveCheck-C4QlIuoR.js";
32
43
  import { fileURLToPath } from "node:url";
33
44
  import { Command, InvalidArgumentError } from "commander";
34
45
  import { z } from "zod";
@@ -61,9 +72,9 @@ import process$1 from "process";
61
72
  import $RefParser from "@apidevtools/json-schema-ref-parser";
62
73
  import async from "async";
63
74
  import { and, asc, count, desc, eq, inArray, isNotNull, like, or, sql } from "drizzle-orm";
64
- import cliProgress from "cli-progress";
65
75
  import { URL as URL$1 } from "url";
66
76
  import editor from "@inquirer/editor";
77
+ import cliProgress from "cli-progress";
67
78
  import debounce from "debounce";
68
79
  import { LRUCache } from "lru-cache";
69
80
  import chokidar from "chokidar";
@@ -100,7 +111,7 @@ function runCommand(program) {
100
111
  hasGithubPr: !!cmdObj.githubPr,
101
112
  hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
102
113
  });
103
- const { executeScan } = await import("./scanner-1DqWi1Ej.js");
114
+ const { executeScan } = await import("./scanner-DS0109SS.js");
104
115
  await executeScan(repoPath, cmdObj);
105
116
  });
106
117
  }
@@ -572,47 +583,11 @@ function filterPrompts(prompts, filterPromptsOption) {
572
583
  //#endregion
573
584
  //#region src/commands/eval/filterProviders.ts
574
585
  /**
575
- * Checks if a value is a valid provider ID (non-empty string).
576
- */
577
- function isValidProviderId(id) {
578
- return id !== null && id !== void 0 && typeof id === "string" && id !== "";
579
- }
580
- /**
581
586
  * Extracts the id and label from a raw provider config without instantiating it.
582
587
  * Handles all provider config formats: string, function, ProviderOptions, ProviderOptionsMap.
583
588
  */
584
589
  function getProviderIdAndLabel(provider, index) {
585
- if (typeof provider === "string") return { id: provider };
586
- if (typeof provider === "function") {
587
- const label = provider.label;
588
- return {
589
- id: label ?? `custom-function-${index}`,
590
- label
591
- };
592
- }
593
- const providerId = provider.id;
594
- if ("id" in provider && isValidProviderId(providerId)) return {
595
- id: providerId,
596
- label: provider.label
597
- };
598
- const keys = Object.keys(provider);
599
- if (keys.length > 0) {
600
- const id = keys[0];
601
- const value = provider[id];
602
- if (typeof value === "object" && value !== null) return {
603
- id: value.id || id,
604
- label: value.label
605
- };
606
- }
607
- const label = provider.label;
608
- if (isValidProviderId(label)) return {
609
- id: label,
610
- label
611
- };
612
- return {
613
- id: `unknown-${index}`,
614
- label
615
- };
590
+ return normalizeProviderRef(provider, { index });
616
591
  }
617
592
  /**
618
593
  * Filters raw provider configs BEFORE instantiation.
@@ -3030,6 +3005,10 @@ function stripAuthFromUrl(urlString) {
3030
3005
  }
3031
3006
  }
3032
3007
  async function handleEmailCollection(evalRecord) {
3008
+ if (evalRecord.author) {
3009
+ logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
3010
+ return;
3011
+ }
3033
3012
  if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
3034
3013
  let email = getUserEmail();
3035
3014
  if (!email) {
@@ -3208,7 +3187,7 @@ function generateTable(evaluateTable, tableCellMaxLength = 250, maxRows = 25) {
3208
3187
  for (const row of evaluateTable.body.slice(0, maxRows)) table.push([...row.vars.map((v) => ellipsize(v, tableCellMaxLength)), ...row.outputs.map(({ pass, text, failureReason: failureType }) => {
3209
3188
  text = ellipsize(text, tableCellMaxLength);
3210
3189
  if (pass) return chalk.green("[PASS] ") + text;
3211
- else return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
3190
+ return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
3212
3191
  })]);
3213
3192
  return table.toString();
3214
3193
  }
@@ -3292,7 +3271,124 @@ function shouldShareResults(opts) {
3292
3271
  return cloudConfig.isEnabled() && sharing !== false;
3293
3272
  }
3294
3273
  //#endregion
3274
+ //#region src/commands/eval/redteamWarning.ts
3275
+ function warnIfRedteamConfigHasNoTests(config, testSuite) {
3276
+ if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
3277
+ Warning: Config file has a redteam section but no test cases.
3278
+ Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
3279
+ `));
3280
+ }
3281
+ //#endregion
3295
3282
  //#region src/commands/eval/summary.ts
3283
+ function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
3284
+ if (wasAborted) {
3285
+ const idSuffix = writeToDatabase ? ` (ID: ${chalk.cyan(evalId)})` : "";
3286
+ return `${chalk.red("✗")} ${completionType} aborted${idSuffix}`;
3287
+ }
3288
+ if (writeToDatabase && shareableUrl) return `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
3289
+ if (writeToDatabase && activelySharing) return `${chalk.green("✓")} ${completionType} complete`;
3290
+ if (writeToDatabase) return `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
3291
+ return `${chalk.green("✓")} ${completionType} complete`;
3292
+ }
3293
+ function getAbortSummaryLines(targetErrorStatus) {
3294
+ if (targetErrorStatus == null) return [];
3295
+ return [
3296
+ "",
3297
+ chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."),
3298
+ chalk.red(` Target returned HTTP ${targetErrorStatus}`),
3299
+ "",
3300
+ chalk.yellow("Possible causes:"),
3301
+ chalk.yellow(" • Invalid API key or authentication (401/403)"),
3302
+ chalk.yellow(" • Target endpoint does not exist (404)"),
3303
+ chalk.yellow(" • Server does not support the request (501)"),
3304
+ "",
3305
+ chalk.cyan("To fix: Check your target configuration and credentials.")
3306
+ ];
3307
+ }
3308
+ function getGuidanceLines({ writeToDatabase, shareableUrl, wantsToShare, activelySharing, hasExplicitDisable, cloudEnabled }) {
3309
+ if (!writeToDatabase || shareableUrl || wantsToShare || activelySharing) return [];
3310
+ const lines = ["", `» View results: ${chalk.green.bold("promptfoo view")}`];
3311
+ if (!hasExplicitDisable) lines.push(cloudEnabled ? `» Create shareable URL: ${chalk.green.bold("promptfoo share")}` : `» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
3312
+ lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
3313
+ return lines;
3314
+ }
3315
+ function buildUsageDetails(usage, total) {
3316
+ const parts = [];
3317
+ if (usage.prompt && usage.prompt > 0) parts.push(`${usage.prompt.toLocaleString()} prompt`);
3318
+ if (usage.completion && usage.completion > 0) parts.push(`${usage.completion.toLocaleString()} completion`);
3319
+ if (usage.cached && usage.cached > 0) parts.push(usage.cached === total && parts.length === 0 ? "cached" : `${usage.cached.toLocaleString()} cached`);
3320
+ if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) parts.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
3321
+ return parts;
3322
+ }
3323
+ function getTokenUsageLines(tokenUsage, isRedteam, tracker) {
3324
+ const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
3325
+ const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
3326
+ if (!hasEvalTokens && !hasGradingTokens) return [];
3327
+ const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
3328
+ const evalTokens = {
3329
+ prompt: tokenUsage.prompt || 0,
3330
+ completion: tokenUsage.completion || 0,
3331
+ total: tokenUsage.total || combinedTotal,
3332
+ cached: tokenUsage.cached || 0,
3333
+ numRequests: tokenUsage.numRequests || 0,
3334
+ completionDetails: tokenUsage.completionDetails || {
3335
+ reasoning: 0,
3336
+ acceptedPrediction: 0,
3337
+ rejectedPrediction: 0
3338
+ }
3339
+ };
3340
+ const lines = [`${chalk.bold("Total Tokens:")} ${chalk.white.bold((evalTokens.total + (tokenUsage.assertions?.total || 0)).toLocaleString())}`];
3341
+ if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
3342
+ if (evalTokens.total > 0) {
3343
+ const evalParts = buildUsageDetails(evalTokens, evalTokens.total);
3344
+ lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
3345
+ }
3346
+ if (tokenUsage.assertions?.total && tokenUsage.assertions.total > 0) {
3347
+ const gradingParts = buildUsageDetails(tokenUsage.assertions, tokenUsage.assertions.total);
3348
+ lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
3349
+ }
3350
+ lines.push(...getProviderUsageLines(tracker));
3351
+ return lines;
3352
+ }
3353
+ function getProviderUsageLines(tracker) {
3354
+ const providerIds = tracker.getProviderIds();
3355
+ if (providerIds.length <= 1) return [];
3356
+ const sortedProviders = providerIds.map((id) => ({
3357
+ id,
3358
+ usage: tracker.getProviderUsage(id)
3359
+ })).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
3360
+ const lines = ["", chalk.bold("Providers:")];
3361
+ for (const { id, usage } of sortedProviders) {
3362
+ if ((usage.total || 0) === 0 && (usage.prompt || 0) + (usage.completion || 0) === 0) continue;
3363
+ const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
3364
+ const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
3365
+ const details = buildUsageDetails(usage, displayTotal);
3366
+ const requestInfo = `${usage.numRequests || 0} requests`;
3367
+ const separator = details.length > 0 ? "; " : "";
3368
+ lines.push(` ${chalk.gray(`${displayId}:`)} ${chalk.white(displayTotal.toLocaleString())} (${requestInfo}${separator}${details.join(", ")})`);
3369
+ }
3370
+ return lines;
3371
+ }
3372
+ function formatResultPercentage(count, totalTests) {
3373
+ const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
3374
+ return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
3375
+ }
3376
+ function formatResultLine(count, label, icon, iconColor, totalTests) {
3377
+ return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count, totalTests)})`)}`;
3378
+ }
3379
+ function getResultsLines({ successes, failures, errors, duration, maxConcurrency }) {
3380
+ const totalTests = successes + failures + errors;
3381
+ const errorLabel = errors === 1 ? "error" : "errors";
3382
+ return [
3383
+ "",
3384
+ chalk.bold("Results:"),
3385
+ formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green, totalTests),
3386
+ formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red, totalTests),
3387
+ formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red, totalTests),
3388
+ chalk.gray(`Duration: ${formatDuration(duration)} (concurrency: ${maxConcurrency})`),
3389
+ ""
3390
+ ];
3391
+ }
3296
3392
  /**
3297
3393
  * Generate formatted evaluation summary output for CLI display.
3298
3394
  *
@@ -3331,115 +3427,28 @@ function shouldShareResults(opts) {
3331
3427
  * ```
3332
3428
  */
3333
3429
  function generateEvalSummary(params) {
3334
- const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker, targetErrorStatus } = params;
3335
- const lines = [];
3336
- const completionType = isRedteam ? "Red team" : "Eval";
3337
- const wasAborted = targetErrorStatus != null;
3338
- let completionMessage;
3339
- if (wasAborted) {
3340
- completionMessage = `${chalk.red("✗")} ${completionType} aborted`;
3341
- if (writeToDatabase) completionMessage += ` (ID: ${chalk.cyan(evalId)})`;
3342
- } else if (writeToDatabase && shareableUrl) completionMessage = `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
3343
- else if (writeToDatabase && activelySharing) completionMessage = `${chalk.green("✓")} ${completionType} complete`;
3344
- else if (writeToDatabase) completionMessage = `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
3345
- else completionMessage = `${chalk.green("✓")} ${completionType} complete`;
3346
- lines.push(completionMessage);
3347
- if (wasAborted && targetErrorStatus != null) {
3348
- lines.push("");
3349
- lines.push(chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."));
3350
- lines.push(chalk.red(` Target returned HTTP ${targetErrorStatus}`));
3351
- lines.push("");
3352
- lines.push(chalk.yellow("Possible causes:"));
3353
- lines.push(chalk.yellow(" • Invalid API key or authentication (401/403)"));
3354
- lines.push(chalk.yellow(" • Target endpoint does not exist (404)"));
3355
- lines.push(chalk.yellow(" • Server does not support the request (501)"));
3356
- lines.push("");
3357
- lines.push(chalk.cyan("To fix: Check your target configuration and credentials."));
3358
- }
3359
- if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
3360
- lines.push("");
3361
- lines.push(`» View results: ${chalk.green.bold("promptfoo view")}`);
3362
- if (!hasExplicitDisable) if (cloudEnabled) lines.push(`» Create shareable URL: ${chalk.green.bold("promptfoo share")}`);
3363
- else lines.push(`» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
3364
- lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
3365
- }
3366
- lines.push("");
3367
- const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
3368
- const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
3369
- if (hasEvalTokens || hasGradingTokens) {
3370
- const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
3371
- const evalTokens = {
3372
- prompt: tokenUsage.prompt || 0,
3373
- completion: tokenUsage.completion || 0,
3374
- total: tokenUsage.total || combinedTotal,
3375
- cached: tokenUsage.cached || 0,
3376
- completionDetails: tokenUsage.completionDetails || {
3377
- reasoning: 0,
3378
- acceptedPrediction: 0,
3379
- rejectedPrediction: 0
3380
- }
3381
- };
3382
- const grandTotal = evalTokens.total + (tokenUsage.assertions?.total || 0);
3383
- lines.push(`${chalk.bold("Total Tokens:")} ${chalk.white.bold(grandTotal.toLocaleString())}`);
3384
- if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
3385
- if (evalTokens.total > 0) {
3386
- const evalParts = [];
3387
- if (evalTokens.prompt > 0) evalParts.push(`${evalTokens.prompt.toLocaleString()} prompt`);
3388
- if (evalTokens.completion > 0) evalParts.push(`${evalTokens.completion.toLocaleString()} completion`);
3389
- if (evalTokens.cached > 0) if (evalTokens.cached === evalTokens.total && evalParts.length === 0) evalParts.push("cached");
3390
- else evalParts.push(`${evalTokens.cached.toLocaleString()} cached`);
3391
- if (evalTokens.completionDetails?.reasoning && evalTokens.completionDetails.reasoning > 0) evalParts.push(`${evalTokens.completionDetails.reasoning.toLocaleString()} reasoning`);
3392
- lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
3393
- }
3394
- if (tokenUsage.assertions && tokenUsage.assertions.total && tokenUsage.assertions.total > 0) {
3395
- const gradingParts = [];
3396
- if (tokenUsage.assertions.prompt && tokenUsage.assertions.prompt > 0) gradingParts.push(`${tokenUsage.assertions.prompt.toLocaleString()} prompt`);
3397
- if (tokenUsage.assertions.completion && tokenUsage.assertions.completion > 0) gradingParts.push(`${tokenUsage.assertions.completion.toLocaleString()} completion`);
3398
- if (tokenUsage.assertions.cached && tokenUsage.assertions.cached > 0) if (tokenUsage.assertions.cached === tokenUsage.assertions.total && gradingParts.length === 0) gradingParts.push("cached");
3399
- else gradingParts.push(`${tokenUsage.assertions.cached.toLocaleString()} cached`);
3400
- if (tokenUsage.assertions.completionDetails?.reasoning && tokenUsage.assertions.completionDetails.reasoning > 0) gradingParts.push(`${tokenUsage.assertions.completionDetails.reasoning.toLocaleString()} reasoning`);
3401
- lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
3402
- }
3403
- const providerIds = tracker.getProviderIds();
3404
- if (providerIds.length > 1) {
3405
- lines.push("");
3406
- lines.push(chalk.bold("Providers:"));
3407
- const sortedProviders = providerIds.map((id) => ({
3408
- id,
3409
- usage: tracker.getProviderUsage(id)
3410
- })).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
3411
- for (const { id, usage } of sortedProviders) if ((usage.total || 0) > 0 || (usage.prompt || 0) + (usage.completion || 0) > 0) {
3412
- const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
3413
- const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
3414
- const details = [];
3415
- if (usage.prompt && usage.prompt > 0) details.push(`${usage.prompt.toLocaleString()} prompt`);
3416
- if (usage.completion && usage.completion > 0) details.push(`${usage.completion.toLocaleString()} completion`);
3417
- if (usage.cached && usage.cached > 0) if (usage.cached === displayTotal && details.length === 0) details.push("cached");
3418
- else details.push(`${usage.cached.toLocaleString()} cached`);
3419
- if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) details.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
3420
- const breakdown = ` (${`${usage.numRequests || 0} requests`}${details.length > 0 ? "; " : ""}${details.join(", ")})`;
3421
- lines.push(` ${chalk.gray(displayId + ":")} ${chalk.white(displayTotal.toLocaleString())}${breakdown}`);
3422
- }
3423
- }
3424
- }
3425
- lines.push("");
3426
- const totalTests = successes + failures + errors;
3427
- const formatResultPercentage = (count) => {
3428
- const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
3429
- return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
3430
- };
3431
- const formatResultLine = (count, label, icon, iconColor) => {
3432
- return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count)})`)}`;
3433
- };
3434
- const errorLabel = errors === 1 ? "error" : "errors";
3435
- lines.push(chalk.bold("Results:"));
3436
- lines.push(formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green));
3437
- lines.push(formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red));
3438
- lines.push(formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red));
3439
- const durationDisplay = formatDuration(duration);
3440
- lines.push(chalk.gray(`Duration: ${durationDisplay} (concurrency: ${maxConcurrency})`));
3441
- lines.push("");
3442
- return lines;
3430
+ return [
3431
+ getCompletionMessage({
3432
+ completionType: params.isRedteam ? "Red team" : "Eval",
3433
+ evalId: params.evalId,
3434
+ shareableUrl: params.shareableUrl,
3435
+ wasAborted: params.targetErrorStatus != null,
3436
+ writeToDatabase: params.writeToDatabase,
3437
+ activelySharing: params.activelySharing ?? false
3438
+ }),
3439
+ ...getAbortSummaryLines(params.targetErrorStatus),
3440
+ ...getGuidanceLines({
3441
+ writeToDatabase: params.writeToDatabase,
3442
+ shareableUrl: params.shareableUrl,
3443
+ wantsToShare: params.wantsToShare,
3444
+ activelySharing: params.activelySharing ?? false,
3445
+ hasExplicitDisable: params.hasExplicitDisable,
3446
+ cloudEnabled: params.cloudEnabled
3447
+ }),
3448
+ "",
3449
+ ...getTokenUsageLines(params.tokenUsage, params.isRedteam, params.tracker),
3450
+ ...getResultsLines(params)
3451
+ ];
3443
3452
  }
3444
3453
  //#endregion
3445
3454
  //#region src/commands/retry.ts
@@ -3657,6 +3666,9 @@ function setupRetryCommand(program) {
3657
3666
  }
3658
3667
  //#endregion
3659
3668
  //#region src/models/modelAudit.ts
3669
+ function getModelAuditSortColumn(sortField) {
3670
+ return modelAuditsTable[sortField];
3671
+ }
3660
3672
  function createScanId(createdAt = /* @__PURE__ */ new Date()) {
3661
3673
  return `scan-${randomSequence(3)}-${createdAt.toISOString().slice(0, 19)}`;
3662
3674
  }
@@ -3697,8 +3709,10 @@ var ModelAudit = class ModelAudit {
3697
3709
  this.issues = data.issues || data.results?.issues || null;
3698
3710
  const issues = data.issues || data.results?.issues;
3699
3711
  const resultsHasErrors = data.results?.has_errors ?? false;
3700
- if (data.hasErrors === void 0) this.hasErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
3701
- else this.hasErrors = data.hasErrors;
3712
+ if (data.hasErrors === void 0) {
3713
+ const hasActualErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
3714
+ this.hasErrors = hasActualErrors;
3715
+ } else this.hasErrors = data.hasErrors;
3702
3716
  this.totalChecks = data.totalChecks;
3703
3717
  this.passedChecks = data.passedChecks;
3704
3718
  this.failedChecks = data.failedChecks;
@@ -3796,9 +3810,9 @@ var ModelAudit = class ModelAudit {
3796
3810
  static async getMany(limit = 100, offset = 0, sortField = "createdAt", sortOrder = "desc", search) {
3797
3811
  let query = getDb().select().from(modelAuditsTable);
3798
3812
  if (search) query = query.where(or(like(modelAuditsTable.name, `%${search}%`), like(modelAuditsTable.modelPath, `%${search}%`), like(modelAuditsTable.id, `%${search}%`)));
3799
- const sortColumn = sortField === "name" ? modelAuditsTable.name : sortField === "modelPath" ? modelAuditsTable.modelPath : modelAuditsTable.createdAt;
3800
- if (sortOrder === "asc") query = query.orderBy(asc(sortColumn));
3801
- else query = query.orderBy(desc(sortColumn));
3813
+ const sortColumn = getModelAuditSortColumn(sortField);
3814
+ if (sortOrder === "asc") query = sortField === "id" ? query.orderBy(asc(sortColumn)) : query.orderBy(asc(sortColumn), asc(modelAuditsTable.id));
3815
+ else query = sortField === "id" ? query.orderBy(desc(sortColumn)) : query.orderBy(desc(sortColumn), desc(modelAuditsTable.id));
3802
3816
  return (await query.limit(limit).offset(offset).all()).map((r) => new ModelAudit({
3803
3817
  ...r,
3804
3818
  persisted: true
@@ -4157,14 +4171,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
4157
4171
  state.resume = true;
4158
4172
  state.retryMode = true;
4159
4173
  } else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
4160
- if (!cmdObj.envPath && commandLineOptions?.envPath) {
4174
+ if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
4161
4175
  logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
4162
4176
  setupEnv(commandLineOptions.envPath);
4163
4177
  }
4164
- if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
4165
- Warning: Config file has a redteam section but no test cases.
4166
- Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
4167
- `));
4178
+ warnIfRedteamConfigHasNoTests(config, testSuite);
4168
4179
  if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
4169
4180
  const maybeUrl = config.providers[0]?.config?.url;
4170
4181
  if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
@@ -4272,7 +4283,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
4272
4283
  ${z.prettifyError(testSuiteSchema.error)}
4273
4284
 
4274
4285
  Please review your promptfooconfig.yaml configuration.`));
4275
- const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, { runtimeOptions: options }) : new Eval(config, { runtimeOptions: options });
4286
+ const author = getAuthor();
4287
+ const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
4288
+ author,
4289
+ runtimeOptions: options
4290
+ }) : new Eval(config, {
4291
+ author,
4292
+ runtimeOptions: options
4293
+ });
4276
4294
  const abortController = new AbortController();
4277
4295
  const previousAbortSignal = evaluateOptions.abortSignal;
4278
4296
  evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
@@ -6045,25 +6063,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
6045
6063
  getAssertions(_prompt) {
6046
6064
  return getHarmfulAssertions(this.harmCategory);
6047
6065
  }
6048
- promptsToTestCases(prompts) {
6066
+ async promptsToTestCases(prompts) {
6049
6067
  const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
6050
6068
  const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
6051
- return prompts.map(({ __prompt }) => {
6069
+ const pluginId = getShortPluginId(this.harmCategory);
6070
+ return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
6052
6071
  const vars = { [this.injectVar]: __prompt };
6053
- if (hasMultipleInputs) try {
6054
- const parsed = JSON.parse(__prompt);
6055
- Object.assign(vars, extractVariablesFromJson(parsed, this.config.inputs));
6056
- } catch {}
6072
+ let inputMaterialization;
6073
+ if (hasMultipleInputs) {
6074
+ let parsed;
6075
+ try {
6076
+ parsed = JSON.parse(__prompt);
6077
+ } catch (error) {
6078
+ logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
6079
+ }
6080
+ if (parsed) try {
6081
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
6082
+ materializationIndex,
6083
+ pluginId,
6084
+ provider: this.provider,
6085
+ purpose: this.purpose
6086
+ });
6087
+ Object.assign(vars, materializedVars.vars);
6088
+ inputMaterialization = materializedVars.metadata;
6089
+ } catch (error) {
6090
+ logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
6091
+ throw error;
6092
+ }
6093
+ }
6057
6094
  return {
6058
6095
  vars,
6059
6096
  metadata: {
6060
6097
  harmCategory: harmCategoryLabel,
6061
- pluginId: getShortPluginId(this.harmCategory),
6062
- pluginConfig: this.config
6098
+ pluginId,
6099
+ pluginConfig: this.config,
6100
+ ...inputMaterialization ? { inputMaterialization } : {}
6063
6101
  },
6064
6102
  assert: getHarmfulAssertions(this.harmCategory)
6065
6103
  };
6066
- });
6104
+ }));
6067
6105
  }
6068
6106
  };
6069
6107
  //#endregion
@@ -6072,20 +6110,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
6072
6110
  * Extract content from <Prompt> tags and parse JSON if inputs are defined.
6073
6111
  * Returns the processed prompt and any additional vars extracted from JSON.
6074
6112
  */
6075
- function processPromptForInputs(prompt, _injectVar, inputs) {
6113
+ async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
6076
6114
  let processedPrompt = prompt.trim();
6077
6115
  const additionalVars = {};
6116
+ let additionalMetadata;
6078
6117
  const extractedPrompt = extractPromptFromTags(processedPrompt);
6079
6118
  if (extractedPrompt) processedPrompt = extractedPrompt;
6080
- if (inputs && Object.keys(inputs).length > 0) try {
6081
- const parsed = JSON.parse(processedPrompt);
6082
- Object.assign(additionalVars, extractVariablesFromJson(parsed, inputs));
6083
- } catch {
6084
- logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode");
6119
+ if (inputs && Object.keys(inputs).length > 0) {
6120
+ let parsed;
6121
+ try {
6122
+ parsed = JSON.parse(processedPrompt);
6123
+ } catch (error) {
6124
+ logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
6125
+ }
6126
+ if (parsed) try {
6127
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
6128
+ materializationIndex,
6129
+ pluginId: plugin,
6130
+ provider,
6131
+ purpose
6132
+ });
6133
+ Object.assign(additionalVars, materializedVars.vars);
6134
+ additionalMetadata = materializedVars.metadata;
6135
+ } catch (error) {
6136
+ logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
6137
+ throw error;
6138
+ }
6085
6139
  }
6086
6140
  return {
6087
6141
  processedPrompt,
6088
- additionalVars
6142
+ additionalVars,
6143
+ additionalMetadata
6089
6144
  };
6090
6145
  }
6091
6146
  async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
@@ -6106,15 +6161,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
6106
6161
  };
6107
6162
  const allPrompts = await retryWithDeduplication(generatePrompts, n);
6108
6163
  const inputs = config?.inputs;
6109
- return sampleArray(allPrompts, n).map((prompt) => {
6110
- const { processedPrompt, additionalVars } = processPromptForInputs(prompt, injectVar, inputs);
6164
+ return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
6165
+ const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
6111
6166
  const testCase = createTestCase(injectVar, processedPrompt, plugin);
6112
6167
  if (Object.keys(additionalVars).length > 0) testCase.vars = {
6113
6168
  ...testCase.vars,
6114
6169
  ...additionalVars
6115
6170
  };
6171
+ if (additionalMetadata) testCase.metadata = {
6172
+ ...testCase.metadata,
6173
+ inputMaterialization: additionalMetadata
6174
+ };
6116
6175
  return testCase;
6117
- });
6176
+ }));
6118
6177
  }
6119
6178
  //#endregion
6120
6179
  //#region src/redteam/plugins/teenSafety/graderExamples.ts
@@ -6432,7 +6491,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
6432
6491
  function computeModifiersFromConfig(config) {
6433
6492
  const modifiers = { ...config?.modifiers };
6434
6493
  if (config?.language && typeof config.language === "string") modifiers.language = config.language;
6435
- if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
6494
+ if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
6436
6495
  const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
6437
6496
  if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
6438
6497
  return modifiers;
@@ -6517,9 +6576,10 @@ function dedupeTestCases(testCases) {
6517
6576
  return deduped;
6518
6577
  }
6519
6578
  function buildMaxCharsRetryInstructions(rejectedPromptLengths, limit) {
6579
+ const longestRejectedPromptText = rejectedPromptLengths.length > 0 ? `${Math.max(...rejectedPromptLengths)} characters` : "unknown length";
6520
6580
  return dedent`
6521
6581
  Your previous response included ${rejectedPromptLengths.length} generated prompt${rejectedPromptLengths.length === 1 ? "" : "s"} that exceeded the ${limit ?? "configured"}-character limit.
6522
- The longest rejected prompt was ${Math.max(...rejectedPromptLengths)} characters.
6582
+ The longest rejected prompt was ${longestRejectedPromptText}.
6523
6583
  Generate replacement prompts only, and keep every user message within the character limit.
6524
6584
  `.trim();
6525
6585
  }
@@ -6597,6 +6657,31 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
6597
6657
  return [];
6598
6658
  }
6599
6659
  }
6660
+ async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
6661
+ const inputs = config.inputs;
6662
+ if (!inputs || Object.keys(inputs).length === 0) return testCases;
6663
+ return Promise.all(testCases.map(async (testCase, materializationIndex) => {
6664
+ const inputVars = extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
6665
+ if (!inputVars) return testCase;
6666
+ const materializedVars = await materializeInputVariablesWithMetadata(inputVars, inputs, {
6667
+ materializationIndex,
6668
+ pluginId,
6669
+ provider,
6670
+ purpose
6671
+ });
6672
+ return {
6673
+ ...testCase,
6674
+ vars: {
6675
+ ...testCase.vars || {},
6676
+ ...materializedVars.vars
6677
+ },
6678
+ metadata: {
6679
+ ...testCase.metadata || {},
6680
+ ...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
6681
+ }
6682
+ };
6683
+ }));
6684
+ }
6600
6685
  function createPluginFactory(PluginClass, key, validate) {
6601
6686
  return {
6602
6687
  key,
@@ -6607,13 +6692,21 @@ function createPluginFactory(PluginClass, key, validate) {
6607
6692
  logger.debug(`Using local redteam generation for ${key}`);
6608
6693
  return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
6609
6694
  }
6610
- const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
6695
+ const pluginId = getShortPluginId(key);
6696
+ const testCases = await materializeRemoteTestCaseInputs({
6697
+ config: configWithDefaults ?? {},
6698
+ injectVar,
6699
+ pluginId,
6700
+ provider,
6701
+ purpose,
6702
+ testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
6703
+ });
6611
6704
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
6612
6705
  return testCases.map((testCase) => ({
6613
6706
  ...testCase,
6614
6707
  metadata: {
6615
6708
  ...testCase.metadata,
6616
- pluginId: getShortPluginId(key),
6709
+ pluginId,
6617
6710
  pluginConfig: {
6618
6711
  ...configWithDefaults,
6619
6712
  modifiers: computedModifiers
@@ -6670,7 +6763,7 @@ const pluginFactories = [
6670
6763
  key: category,
6671
6764
  action: async (params) => {
6672
6765
  if (neverGenerateRemote()) {
6673
- logger.error(`${category} plugin requires remote generation to be enabled`);
6766
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
6674
6767
  return [];
6675
6768
  }
6676
6769
  const testCases = await getHarmfulTests(params, category);
@@ -6693,13 +6786,21 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
6693
6786
  key: category,
6694
6787
  action: async (params) => {
6695
6788
  if (shouldGenerateRemote()) {
6696
- const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
6789
+ const pluginId = getShortPluginId(category);
6790
+ const testCases = await materializeRemoteTestCaseInputs({
6791
+ config: params.config ?? {},
6792
+ injectVar: params.injectVar,
6793
+ pluginId,
6794
+ provider: params.provider,
6795
+ purpose: params.purpose,
6796
+ testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
6797
+ });
6697
6798
  const computedModifiers = computeModifiersFromConfig(params.config);
6698
6799
  return testCases.map((testCase) => ({
6699
6800
  ...testCase,
6700
6801
  metadata: {
6701
6802
  ...testCase.metadata,
6702
- pluginId: getShortPluginId(category),
6803
+ pluginId,
6703
6804
  pluginConfig: {
6704
6805
  ...params.config,
6705
6806
  modifiers: computedModifiers
@@ -6721,16 +6822,24 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
6721
6822
  key: category,
6722
6823
  action: async (params) => {
6723
6824
  if (neverGenerateRemote()) {
6724
- logger.error(`${category} plugin requires remote generation to be enabled`);
6825
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
6725
6826
  return [];
6726
6827
  }
6727
- const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
6828
+ const pluginId = getShortPluginId(category);
6829
+ const testCases = await materializeRemoteTestCaseInputs({
6830
+ config: params.config ?? {},
6831
+ injectVar: params.injectVar,
6832
+ pluginId,
6833
+ provider: params.provider,
6834
+ purpose: params.purpose,
6835
+ testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
6836
+ });
6728
6837
  const computedModifiers = computeModifiersFromConfig(params.config);
6729
6838
  return testCases.map((testCase) => ({
6730
6839
  ...testCase,
6731
6840
  metadata: {
6732
6841
  ...testCase.metadata,
6733
- pluginId: getShortPluginId(category),
6842
+ pluginId,
6734
6843
  pluginConfig: {
6735
6844
  ...params.config,
6736
6845
  modifiers: computedModifiers
@@ -6743,19 +6852,27 @@ function createRemotePlugin(key, validate) {
6743
6852
  return {
6744
6853
  key,
6745
6854
  validate,
6746
- action: async ({ purpose, injectVar, n, config }) => {
6855
+ action: async ({ provider, purpose, injectVar, n, config }) => {
6747
6856
  const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
6748
6857
  if (neverGenerateRemote()) {
6749
- logger.error(`${key} plugin requires remote generation to be enabled`);
6858
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
6750
6859
  return [];
6751
6860
  }
6752
- const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
6861
+ const pluginId = getShortPluginId(key);
6862
+ const testCases = await materializeRemoteTestCaseInputs({
6863
+ config: configWithDefaults ?? {},
6864
+ injectVar,
6865
+ pluginId,
6866
+ provider,
6867
+ purpose,
6868
+ testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
6869
+ });
6753
6870
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
6754
6871
  const testsWithMetadata = testCases.map((testCase) => ({
6755
6872
  ...testCase,
6756
6873
  metadata: {
6757
6874
  ...testCase.metadata,
6758
- pluginId: getShortPluginId(key),
6875
+ pluginId,
6759
6876
  pluginConfig: {
6760
6877
  ...configWithDefaults,
6761
6878
  modifiers: computedModifiers
@@ -6825,6 +6942,37 @@ function getPolicyText(metadata) {
6825
6942
  return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
6826
6943
  }
6827
6944
  }
6945
+ async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
6946
+ const inputs = testCase.metadata?.pluginConfig?.inputs;
6947
+ const inputMaterialization = testCase.metadata?.inputMaterialization;
6948
+ if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
6949
+ inputMaterialization,
6950
+ vars: testCase.vars
6951
+ };
6952
+ try {
6953
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
6954
+ materializationIndex,
6955
+ pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
6956
+ provider,
6957
+ purpose
6958
+ });
6959
+ return {
6960
+ inputMaterialization: materializedVars.metadata ? {
6961
+ ...inputMaterialization,
6962
+ ...materializedVars.metadata
6963
+ } : inputMaterialization,
6964
+ vars: {
6965
+ ...testCase.vars,
6966
+ ...materializedVars.vars
6967
+ }
6968
+ };
6969
+ } catch {
6970
+ return {
6971
+ inputMaterialization,
6972
+ vars: testCase.vars
6973
+ };
6974
+ }
6975
+ }
6828
6976
  /**
6829
6977
  * Gets the severity level for a plugin based on its ID and configuration.
6830
6978
  * @param pluginId - The ID of the plugin.
@@ -6966,6 +7114,7 @@ const categories = {
6966
7114
  foundation: FOUNDATION_PLUGINS,
6967
7115
  harmful: Object.keys(HARM_PLUGINS),
6968
7116
  "coding-agent:core": CODING_AGENT_CORE_PLUGINS,
7117
+ "coding-agent:all": CODING_AGENT_PLUGINS,
6969
7118
  bias: BIAS_PLUGINS,
6970
7119
  pii: PII_PLUGINS,
6971
7120
  medical: MEDICAL_PLUGINS,
@@ -7060,7 +7209,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
7060
7209
  * @param injectVar - The variable to inject.
7061
7210
  * @returns An array of new test cases generated by strategies.
7062
7211
  */
7063
- async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
7212
+ async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
7064
7213
  const newTestCases = [];
7065
7214
  const strategyResults = {};
7066
7215
  for (const strategy of strategies) {
@@ -7116,14 +7265,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
7116
7265
  }
7117
7266
  }
7118
7267
  resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
7119
- newTestCases.push(...resultTestCases.map((t) => {
7120
- const inputs = t?.metadata?.pluginConfig?.inputs;
7121
- let updatedVars = t.vars;
7122
- if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
7123
- const parsed = JSON.parse(String(t.vars[injectVar]));
7124
- updatedVars = { ...t.vars };
7125
- Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
7126
- } catch {}
7268
+ newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
7269
+ const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
7127
7270
  const strategyConfig = {
7128
7271
  ...strategy.config || {},
7129
7272
  ...maxCharsPerMessage ? { maxCharsPerMessage } : {},
@@ -7131,16 +7274,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
7131
7274
  };
7132
7275
  return {
7133
7276
  ...t,
7134
- vars: updatedVars,
7277
+ vars,
7135
7278
  metadata: {
7136
7279
  ...t?.metadata || {},
7137
7280
  ...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
7138
7281
  ...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
7139
7282
  ...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
7283
+ ...inputMaterialization && { inputMaterialization },
7140
7284
  ...Object.keys(strategyConfig).length > 0 && { strategyConfig }
7141
7285
  }
7142
7286
  };
7143
- }));
7287
+ })));
7144
7288
  const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
7145
7289
  const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
7146
7290
  const applyNumTestsCap = (calculatedRequested) => {
@@ -7595,7 +7739,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
7595
7739
  targetIds,
7596
7740
  ...retryStrategy.config
7597
7741
  };
7598
- const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
7742
+ const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
7599
7743
  pluginTestCases.push(...retryTestCases);
7600
7744
  Object.assign(strategyResults, retryResults);
7601
7745
  if (showProgressBar) progressBar?.increment(retryTestCases.length);
@@ -7603,7 +7747,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
7603
7747
  checkAbort();
7604
7748
  const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
7605
7749
  if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
7606
- const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
7750
+ const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
7607
7751
  Object.assign(strategyResults, otherStrategyResults);
7608
7752
  if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
7609
7753
  const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
@@ -8358,65 +8502,175 @@ async function doRedteamRun(options) {
8358
8502
  return evalResult;
8359
8503
  }
8360
8504
  //#endregion
8505
+ //#region src/types/transform.ts
8506
+ /** Runtime type guard for `TransformFunction` values. */
8507
+ function isTransformFunction(value) {
8508
+ return typeof value === "function";
8509
+ }
8510
+ //#endregion
8361
8511
  //#region src/index.ts
8512
+ /**
8513
+ * Shallow-clone a test case so the caller can swap in resolved ApiProvider
8514
+ * instances on `options.provider` / `assert[].provider` without leaking those
8515
+ * mutations back to the input. The input may alias the unified config written
8516
+ * to the Eval record, and a live SDK client (e.g. Bedrock's BedrockRuntime,
8517
+ * Anthropic's client) holds circular references that break drizzle's JSON
8518
+ * serialization on `evalRecord.save()`. Fixes #8687.
8519
+ *
8520
+ * Detaches only `options` and `assert[]`. Other reference fields (`provider`,
8521
+ * `vars`, `metadata`, `providerOutput`) remain aliased — callers must reassign
8522
+ * those by reference rather than mutating in place. `assert-set` children are
8523
+ * not deep-cloned because the resolve loop skips `assert-set`; if that ever
8524
+ * changes, extend this helper.
8525
+ */
8526
+ function cloneTestForResolve(test) {
8527
+ const cloned = { ...test };
8528
+ if (test.options) cloned.options = { ...test.options };
8529
+ if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
8530
+ return cloned;
8531
+ }
8532
+ function toSerializableProviderRef(provider) {
8533
+ if (isApiProvider(provider)) return sanitizeProvider(provider);
8534
+ if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
8535
+ return provider;
8536
+ }
8537
+ function isRecord(value) {
8538
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
8539
+ }
8540
+ function withSerializableProvider(record) {
8541
+ if (!isApiProvider(record.provider)) return record;
8542
+ return {
8543
+ ...record,
8544
+ provider: sanitizeProvider(record.provider)
8545
+ };
8546
+ }
8547
+ /**
8548
+ * Function-valued transforms are first-class at runtime but are silently dropped
8549
+ * by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
8550
+ * a function reference, so replace every `transform`-like field with a
8551
+ * `[inline function]: name` marker. Non-function values pass through unchanged.
8552
+ *
8553
+ * `droppedRef.value` is flipped to `true` the first time a function is replaced
8554
+ * so the caller can emit a single warning instead of logging per field.
8555
+ */
8556
+ function replaceFunctionTransforms(record, droppedRef) {
8557
+ let result;
8558
+ for (const key of TRANSFORM_KEYS) {
8559
+ const value = record[key];
8560
+ if (!isTransformFunction(value)) continue;
8561
+ if (!result) result = { ...record };
8562
+ result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
8563
+ droppedRef.value = true;
8564
+ }
8565
+ return result ?? record;
8566
+ }
8567
+ function toSerializableAssertion(assertion, droppedRef) {
8568
+ if (!isRecord(assertion)) return assertion;
8569
+ let sanitizedAssertion = withSerializableProvider(assertion);
8570
+ sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
8571
+ if (Array.isArray(assertion.assert)) sanitizedAssertion = {
8572
+ ...sanitizedAssertion,
8573
+ assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
8574
+ };
8575
+ return sanitizedAssertion;
8576
+ }
8577
+ function toSerializableTestCase(test, droppedRef) {
8578
+ if (!isRecord(test)) return test;
8579
+ let sanitizedTest = withSerializableProvider(test);
8580
+ if (isRecord(test.options)) {
8581
+ let options = withSerializableProvider(test.options);
8582
+ options = replaceFunctionTransforms(options, droppedRef);
8583
+ if (options !== test.options) sanitizedTest = {
8584
+ ...sanitizedTest,
8585
+ options
8586
+ };
8587
+ }
8588
+ if (Array.isArray(test.assert)) sanitizedTest = {
8589
+ ...sanitizedTest,
8590
+ assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
8591
+ };
8592
+ return sanitizedTest;
8593
+ }
8594
+ function toSerializableScenario(scenario, droppedRef) {
8595
+ if (!isRecord(scenario)) return scenario;
8596
+ if (!Array.isArray(scenario.tests)) return scenario;
8597
+ return {
8598
+ ...scenario,
8599
+ tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
8600
+ };
8601
+ }
8602
+ function createSerializableUnifiedConfig(testSuite, prompts) {
8603
+ const droppedRef = { value: false };
8604
+ const config = {
8605
+ ...testSuite,
8606
+ providers: toSerializableProviderRef(testSuite.providers),
8607
+ defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
8608
+ tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
8609
+ scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
8610
+ prompts
8611
+ };
8612
+ if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
8613
+ return config;
8614
+ }
8362
8615
  async function evaluate(testSuite, options = {}) {
8363
- if (testSuite.writeLatestResults) await runDbMigrations();
8364
- const loadedProviders = await loadApiProviders(testSuite.providers, { env: testSuite.env });
8616
+ const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
8617
+ if (testSuiteConfig.writeLatestResults) await runDbMigrations();
8618
+ const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
8365
8619
  const providerMap = {};
8366
8620
  for (const p of loadedProviders) {
8367
8621
  providerMap[p.id()] = p;
8368
8622
  if (p.label) providerMap[p.label] = p;
8369
8623
  }
8370
- let resolvedDefaultTest = testSuite.defaultTest;
8371
- if (typeof testSuite.defaultTest === "string" && testSuite.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuite.defaultTest);
8624
+ let resolvedDefaultTest = testSuiteConfig.defaultTest;
8625
+ if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
8372
8626
  const constructedTestSuite = {
8373
- ...testSuite,
8627
+ ...testSuiteConfig,
8374
8628
  defaultTest: resolvedDefaultTest,
8375
- scenarios: testSuite.scenarios,
8629
+ scenarios: testSuiteConfig.scenarios,
8376
8630
  providers: loadedProviders,
8377
- tests: await readTests(testSuite.tests),
8378
- nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
8379
- prompts: await processPrompts(testSuite.prompts)
8631
+ tests: await readTests(testSuiteConfig.tests),
8632
+ nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
8633
+ prompts: await processPrompts(testSuiteConfig.prompts)
8380
8634
  };
8381
- if (typeof constructedTestSuite.defaultTest === "object") {
8382
- if (constructedTestSuite.defaultTest?.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
8383
- env: testSuite.env,
8635
+ if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
8636
+ constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
8637
+ if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
8638
+ env: testSuiteConfig.env,
8384
8639
  basePath: state.basePath
8385
8640
  });
8386
- if (constructedTestSuite.defaultTest?.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
8387
- env: testSuite.env,
8641
+ if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
8642
+ env: testSuiteConfig.env,
8388
8643
  basePath: state.basePath
8389
8644
  });
8390
8645
  }
8391
- for (const test of constructedTestSuite.tests || []) {
8646
+ constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
8647
+ for (const test of constructedTestSuite.tests) {
8392
8648
  if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
8393
- env: testSuite.env,
8649
+ env: testSuiteConfig.env,
8394
8650
  basePath: state.basePath
8395
8651
  });
8396
- if (test.assert) for (const assertion of test.assert) {
8652
+ for (const assertion of test.assert || []) {
8397
8653
  if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
8398
8654
  if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
8399
- env: testSuite.env,
8655
+ env: testSuiteConfig.env,
8400
8656
  basePath: state.basePath
8401
8657
  });
8402
8658
  }
8403
8659
  }
8404
8660
  if (options.cache === false) disableCache();
8405
- const parsedProviderPromptMap = readProviderPromptMap(testSuite, constructedTestSuite.prompts);
8406
- const unifiedConfig = {
8407
- ...testSuite,
8408
- prompts: constructedTestSuite.prompts
8409
- };
8410
- const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
8661
+ const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
8662
+ const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
8663
+ const author = getAuthor(suiteAuthor);
8664
+ const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
8411
8665
  const ret = await evaluate$1({
8412
8666
  ...constructedTestSuite,
8413
8667
  providerPromptMap: parsedProviderPromptMap
8414
8668
  }, evalRecord, {
8415
8669
  eventSource: "library",
8416
- isRedteam: Boolean(testSuite.redteam),
8670
+ isRedteam: Boolean(testSuiteConfig.redteam),
8417
8671
  ...options
8418
8672
  });
8419
- if (testSuite.writeLatestResults && testSuite.sharing) if (isSharingEnabled(ret)) try {
8673
+ if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
8420
8674
  const shareableUrl = await createShareableUrl(ret, { silent: true });
8421
8675
  if (shareableUrl) {
8422
8676
  ret.shareableUrl = shareableUrl;
@@ -8427,9 +8681,9 @@ async function evaluate(testSuite, options = {}) {
8427
8681
  logger.warn(`Failed to create shareable URL: ${error}`);
8428
8682
  }
8429
8683
  else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
8430
- if (testSuite.outputPath) {
8431
- if (typeof testSuite.outputPath === "string") await writeOutput(testSuite.outputPath, evalRecord, null);
8432
- else if (Array.isArray(testSuite.outputPath)) await writeMultipleOutputs(testSuite.outputPath, evalRecord, null);
8684
+ if (testSuiteConfig.outputPath) {
8685
+ if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
8686
+ else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
8433
8687
  }
8434
8688
  return ret;
8435
8689
  }
@@ -8658,6 +8912,58 @@ function setDownloadHeaders(res, fileName, contentType) {
8658
8912
  //#region src/server/routes/eval.ts
8659
8913
  const evalRouter = Router();
8660
8914
  const evalJobs = /* @__PURE__ */ new Map();
8915
+ function sendEvalTableResponse(res, evalId, responsePayload) {
8916
+ try {
8917
+ res.json(responsePayload);
8918
+ } catch (error) {
8919
+ if (!(error instanceof RangeError)) throw error;
8920
+ logger.warn("[GET /:id/table] Response too large, stripping per-cell prompts by size", { evalId });
8921
+ const promptLocations = getEvalTableOutputPromptLocationsBySize(responsePayload);
8922
+ if (promptLocations.length === 0) {
8923
+ logger.error("[GET /:id/table] Response too large and has no prompts to strip", { evalId });
8924
+ res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
8925
+ return;
8926
+ }
8927
+ const tryStringifyWithStrippedPrompts = (promptCountToStrip) => {
8928
+ const responseWithoutPrompts = getEvalTablePromptStrippedPayload(responsePayload, promptLocations, promptCountToStrip);
8929
+ try {
8930
+ const responseBody = JSON.stringify(responseWithoutPrompts);
8931
+ invariant(typeof responseBody === "string", "Eval table response must serialize to JSON");
8932
+ return responseBody;
8933
+ } catch (retryError) {
8934
+ if (!(retryError instanceof RangeError)) throw retryError;
8935
+ return null;
8936
+ }
8937
+ };
8938
+ let lowerBound = 0;
8939
+ let upperBound = 1;
8940
+ let responseBody = null;
8941
+ while (upperBound < promptLocations.length) {
8942
+ responseBody = tryStringifyWithStrippedPrompts(upperBound);
8943
+ if (responseBody) break;
8944
+ lowerBound = upperBound;
8945
+ upperBound *= 2;
8946
+ }
8947
+ if (!responseBody) {
8948
+ upperBound = promptLocations.length;
8949
+ responseBody = tryStringifyWithStrippedPrompts(upperBound);
8950
+ }
8951
+ if (responseBody) {
8952
+ while (upperBound - lowerBound > 1) {
8953
+ const midPoint = lowerBound + Math.floor((upperBound - lowerBound) / 2);
8954
+ const midpointResponseBody = tryStringifyWithStrippedPrompts(midPoint);
8955
+ if (midpointResponseBody) {
8956
+ upperBound = midPoint;
8957
+ responseBody = midpointResponseBody;
8958
+ } else lowerBound = midPoint;
8959
+ }
8960
+ res.type("json").send(responseBody);
8961
+ return;
8962
+ }
8963
+ logger.error("[GET /:id/table] Response still too large after stripping prompts", { evalId });
8964
+ res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
8965
+ }
8966
+ }
8661
8967
  evalRouter.post("/job", (req, res) => {
8662
8968
  const result = EvalSchemas.CreateJob.Request.safeParse(req.body);
8663
8969
  if (!result.success) {
@@ -8698,9 +9004,10 @@ evalRouter.post("/job", (req, res) => {
8698
9004
  job.evalId = evalResult.id;
8699
9005
  console.log(`[${id}] Complete`);
8700
9006
  }).catch((error) => {
8701
- logger.error(dedent`Failed to eval tests:
8702
- Error: ${error}
8703
- Body: ${JSON.stringify(req.body, null, 2)}`);
9007
+ logger.error("Failed to eval tests", {
9008
+ error,
9009
+ body: sanitizeObject(testSuite, { context: "request body" })
9010
+ });
8704
9011
  const job = evalJobs.get(id);
8705
9012
  invariant(job, "Job not found");
8706
9013
  job.status = "error";
@@ -8897,7 +9204,7 @@ evalRouter.get("/:id/table", async (req, res) => {
8897
9204
  evalId: id
8898
9205
  });
8899
9206
  }
8900
- res.json({
9207
+ sendEvalTableResponse(res, id, {
8901
9208
  table: returnTable,
8902
9209
  totalCount: table.totalCount,
8903
9210
  filteredCount: table.filteredCount,
@@ -9063,43 +9370,53 @@ evalRouter.post("/:evalId/results/:id/rating", async (req, res) => {
9063
9370
  res.status(400).json({ error: z.prettifyError(bodyResult.error) });
9064
9371
  return;
9065
9372
  }
9066
- const { id } = paramsResult.data;
9067
- const gradingResult = bodyResult.data;
9068
- const result = await EvalResult.findById(id);
9069
- invariant(result, "Result not found");
9070
- const eval_ = await Eval.findById(result.evalId);
9071
- invariant(eval_, "Eval not found");
9072
- const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
9073
- const successChanged = result.success !== gradingResult.pass;
9074
- const scoreChange = gradingResult.score - result.score;
9075
- result.gradingResult = gradingResult;
9076
- result.success = gradingResult.pass;
9077
- result.score = gradingResult.score;
9078
- const prompt = eval_.prompts[result.promptIdx];
9079
- invariant(prompt, "Prompt not found");
9080
- if (!prompt.metrics) {
9081
- logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
9082
- res.status(400).json({ error: "Prompt metrics not found" });
9083
- return;
9373
+ try {
9374
+ const { evalId, id } = paramsResult.data;
9375
+ const gradingResult = bodyResult.data;
9376
+ const result = await EvalResult.findById(id);
9377
+ if (!result || result.evalId !== evalId) {
9378
+ res.status(404).json({ error: "Result not found" });
9379
+ return;
9380
+ }
9381
+ const eval_ = await Eval.findById(evalId);
9382
+ if (!eval_) {
9383
+ res.status(404).json({ error: "Eval not found" });
9384
+ return;
9385
+ }
9386
+ const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
9387
+ const successChanged = result.success !== gradingResult.pass;
9388
+ const scoreChange = gradingResult.score - result.score;
9389
+ result.gradingResult = gradingResult;
9390
+ result.success = gradingResult.pass;
9391
+ result.score = gradingResult.score;
9392
+ const prompt = eval_.prompts[result.promptIdx];
9393
+ invariant(prompt, "Prompt not found");
9394
+ if (!prompt.metrics) {
9395
+ logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
9396
+ res.status(400).json({ error: "Prompt metrics not found" });
9397
+ return;
9398
+ }
9399
+ if (successChanged) if (result.success) {
9400
+ prompt.metrics.testPassCount += 1;
9401
+ prompt.metrics.testFailCount -= 1;
9402
+ prompt.metrics.assertPassCount += 1;
9403
+ prompt.metrics.score += scoreChange;
9404
+ if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
9405
+ } else {
9406
+ prompt.metrics.testPassCount -= 1;
9407
+ prompt.metrics.testFailCount += 1;
9408
+ prompt.metrics.assertFailCount += 1;
9409
+ prompt.metrics.score += scoreChange;
9410
+ if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
9411
+ }
9412
+ else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
9413
+ else prompt.metrics.assertFailCount += 1;
9414
+ await eval_.save();
9415
+ await result.save();
9416
+ res.json(result);
9417
+ } catch (error) {
9418
+ sendError(res, 500, "Failed to submit rating", error);
9084
9419
  }
9085
- if (successChanged) if (result.success) {
9086
- prompt.metrics.testPassCount += 1;
9087
- prompt.metrics.testFailCount -= 1;
9088
- prompt.metrics.assertPassCount += 1;
9089
- prompt.metrics.score += scoreChange;
9090
- if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
9091
- } else {
9092
- prompt.metrics.testPassCount -= 1;
9093
- prompt.metrics.testFailCount += 1;
9094
- prompt.metrics.assertFailCount += 1;
9095
- prompt.metrics.score += scoreChange;
9096
- if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
9097
- }
9098
- else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
9099
- else prompt.metrics.assertFailCount += 1;
9100
- await eval_.save();
9101
- await result.save();
9102
- res.json(result);
9103
9420
  });
9104
9421
  evalRouter.post("/", async (req, res) => {
9105
9422
  const bodyResult = EvalSchemas.Save.Request.safeParse(req.body);
@@ -9133,9 +9450,10 @@ evalRouter.post("/", async (req, res) => {
9133
9450
  res.json(EvalSchemas.Save.Response.parse({ id: eval_.id }));
9134
9451
  }
9135
9452
  } catch (error) {
9136
- logger.error(dedent`Failed to write eval to database:
9137
- Error: ${error}
9138
- Body: ${JSON.stringify(body, null, 2)}`);
9453
+ logger.error("Failed to write eval to database", {
9454
+ error,
9455
+ body: sanitizeObject(body, { context: "request body" })
9456
+ });
9139
9457
  res.status(500).json({ error: "Failed to write eval to database" });
9140
9458
  }
9141
9459
  });
@@ -9353,7 +9671,7 @@ async function checkForUpdates() {
9353
9671
  } catch {
9354
9672
  return false;
9355
9673
  }
9356
- if (semverGt(latestVersion, "0.121.4")) {
9674
+ if (semverGt(latestVersion, "0.121.7")) {
9357
9675
  const border = "=".repeat(TERMINAL_MAX_WIDTH);
9358
9676
  logger.info(`\n${border}
9359
9677
  ${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
@@ -9512,6 +9830,9 @@ const ModelAuditCliOptionsSchema = z.object({
9512
9830
  dryRun: z.boolean().optional(),
9513
9831
  cache: z.boolean().optional(),
9514
9832
  stream: z.boolean().optional(),
9833
+ scanners: z.array(z.string()).optional(),
9834
+ excludeScanner: z.array(z.string()).optional(),
9835
+ listScanners: z.boolean().optional(),
9515
9836
  share: z.boolean().optional(),
9516
9837
  noShare: z.boolean().optional()
9517
9838
  });
@@ -9599,6 +9920,18 @@ const CLI_ARG_MAP = {
9599
9920
  stream: {
9600
9921
  flag: "--stream",
9601
9922
  type: "boolean"
9923
+ },
9924
+ scanners: {
9925
+ flag: "--scanners",
9926
+ type: "array"
9927
+ },
9928
+ excludeScanner: {
9929
+ flag: "--exclude-scanner",
9930
+ type: "array"
9931
+ },
9932
+ listScanners: {
9933
+ flag: "--list-scanners",
9934
+ type: "boolean"
9602
9935
  }
9603
9936
  };
9604
9937
  /**
@@ -9767,6 +10100,62 @@ function spawnModelAudit(args, options) {
9767
10100
  });
9768
10101
  });
9769
10102
  }
10103
+ function collectRepeatableOption(value, previous = []) {
10104
+ return [...previous, value];
10105
+ }
10106
+ function hasScannerSelectionOptions(options) {
10107
+ return Boolean(options.scanners?.length || options.excludeScanner?.length);
10108
+ }
10109
+ function hasScannerSelectionValue(value) {
10110
+ if (Array.isArray(value)) return value.length > 0;
10111
+ return typeof value === "string" && value.trim().length > 0;
10112
+ }
10113
+ function hasPersistedScannerSelection(metadata) {
10114
+ const options = metadata?.options;
10115
+ if (!options || typeof options !== "object" || Array.isArray(options)) return false;
10116
+ return hasScannerSelectionValue(options.scanners) || hasScannerSelectionValue(options.excludeScanner);
10117
+ }
10118
+ /**
10119
+ * Parse CLI options through Zod, logging validation errors to the CLI.
10120
+ * Returns null when validation fails (and sets process.exitCode to 1).
10121
+ */
10122
+ function buildCliArgs(paths, cliOptions) {
10123
+ try {
10124
+ return parseModelAuditArgs(paths, cliOptions);
10125
+ } catch (error) {
10126
+ if (error instanceof z.ZodError) {
10127
+ logger.error(`Invalid model audit options provided:\n${z.prettifyError(error)}`);
10128
+ process.exitCode = 1;
10129
+ return null;
10130
+ }
10131
+ throw error;
10132
+ }
10133
+ }
10134
+ /**
10135
+ * Run modelaudit with inherited stdio and propagate its exit code.
10136
+ *
10137
+ * `treatExitOneAsIssues=true` suppresses the error log for exit code 1, which
10138
+ * modelaudit uses to mean "scan completed, issues found" — callers that expect
10139
+ * findings (like the main scan flow) should set this, while list/help flows
10140
+ * (where a non-zero exit is always unexpected) should leave it false.
10141
+ */
10142
+ async function runPassthroughModelAudit(args, env, treatExitOneAsIssues = false) {
10143
+ try {
10144
+ const spawnResult = await spawnModelAudit(args, {
10145
+ captureOutput: false,
10146
+ env
10147
+ });
10148
+ const isIssuesExit = treatExitOneAsIssues && spawnResult.code === 1;
10149
+ if (spawnResult.code !== null && spawnResult.code !== 0 && !isIssuesExit) logger.error(`Model scan process exited with code ${spawnResult.code}`);
10150
+ process.exitCode = spawnResult.code || 0;
10151
+ } catch (error) {
10152
+ const message = error instanceof Error ? error.message : String(error);
10153
+ logger.error(`Failed to start modelaudit: ${message}`);
10154
+ logger.info("Make sure modelaudit is installed and available in your PATH.");
10155
+ logger.info("Install it using: pip install modelaudit");
10156
+ process.exitCode = 1;
10157
+ }
10158
+ }
9770
10159
  /**
9771
10160
  * Check for existing scan and determine if re-scan is needed.
9772
10161
  * Returns the existing audit if found and re-scan should happen.
@@ -9789,6 +10178,20 @@ async function checkExistingScan(paths, options, currentScannerVersion) {
9789
10178
  shouldSkip: false,
9790
10179
  existingAudit: null
9791
10180
  };
10181
+ if (hasScannerSelectionOptions(options)) {
10182
+ logger.debug("Re-scanning with scanner selection options");
10183
+ return {
10184
+ shouldSkip: false,
10185
+ existingAudit: existing
10186
+ };
10187
+ }
10188
+ if (hasPersistedScannerSelection(existing.metadata)) {
10189
+ logger.debug("Re-scanning because cached revision used scanner selection options");
10190
+ return {
10191
+ shouldSkip: false,
10192
+ existingAudit: existing
10193
+ };
10194
+ }
9792
10195
  if (options.force) {
9793
10196
  logger.debug(`Re-scanning (--force): ${modelId}`);
9794
10197
  return {
@@ -9920,7 +10323,9 @@ async function saveAuditRecord(paths, results, options, currentScannerVersion, e
9920
10323
  cache: options.cache,
9921
10324
  quiet: options.quiet,
9922
10325
  progress: options.progress,
9923
- stream: options.stream
10326
+ stream: options.stream,
10327
+ scanners: options.scanners,
10328
+ excludeScanner: options.excludeScanner
9924
10329
  }
9925
10330
  };
9926
10331
  if (existingAudit) {
@@ -10052,8 +10457,8 @@ async function processScanResultsFromStdout(spawnResult, paths, options, current
10052
10457
  return processJsonResults(jsonOutput, spawnResult.code || 0, paths, options, currentScannerVersion, existingAudit);
10053
10458
  }
10054
10459
  function modelScanCommand(program) {
10055
- program.command("scan-model").description("Scan model files for security and quality issues").argument("<paths...>", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
10056
- if (!paths || paths.length === 0) {
10460
+ program.command("scan-model").description("Scan model files for security and quality issues").argument("[paths...]", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("--scanners <scanner>", "Only run selected ModelAudit scanners (IDs/classes; comma-separated or repeated)", collectRepeatableOption).option("--exclude-scanner <scanner>", "Exclude a ModelAudit scanner from the active set (comma-separated or repeated)", collectRepeatableOption).option("--list-scanners", "List registered ModelAudit scanners and exit").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
10461
+ if (!options.listScanners && (!paths || paths.length === 0)) {
10057
10462
  logger.error("No paths specified. Provide at least one model file or directory to scan.");
10058
10463
  process.exitCode = 1;
10059
10464
  return;
@@ -10069,6 +10474,21 @@ function modelScanCommand(program) {
10069
10474
  }
10070
10475
  await checkModelAuditUpdates();
10071
10476
  if (currentScannerVersion) logger.debug(`Using modelaudit version: ${currentScannerVersion}`);
10477
+ const delegationEnv = {
10478
+ ...process.env,
10479
+ PROMPTFOO_DELEGATED: "true"
10480
+ };
10481
+ if (options.listScanners) {
10482
+ const parsed = buildCliArgs(paths || [], {
10483
+ ...options,
10484
+ format: options.format || "text",
10485
+ output: options.output,
10486
+ timeout: void 0
10487
+ });
10488
+ if (!parsed) return;
10489
+ await runPassthroughModelAudit(parsed.args, delegationEnv);
10490
+ return;
10491
+ }
10072
10492
  const saveToDatabase = options.write === void 0 || options.write === true;
10073
10493
  let existingAuditToUpdate = null;
10074
10494
  if (saveToDatabase) {
@@ -10080,33 +10500,22 @@ function modelScanCommand(program) {
10080
10500
  existingAuditToUpdate = existingAudit;
10081
10501
  }
10082
10502
  const outputFormat = saveToDatabase ? "json" : options.format || "text";
10083
- const cliOptions = {
10503
+ const parsed = buildCliArgs(paths, {
10084
10504
  ...options,
10085
10505
  format: outputFormat,
10086
10506
  output: options.output && !saveToDatabase ? options.output : void 0,
10087
10507
  timeout: options.timeout ? parseInt(options.timeout, 10) : void 0
10088
- };
10089
- let args;
10090
- try {
10091
- const result = parseModelAuditArgs(paths, cliOptions);
10092
- args = result.args;
10093
- if (result.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${result.unsupportedOptions.join(", ")}`);
10094
- } catch (error) {
10095
- if (error instanceof z.ZodError) {
10096
- logger.error("Invalid model audit options provided:");
10097
- for (const err of error.issues) logger.error(` - ${err.path.join(".")}: ${err.message}`);
10098
- process.exitCode = 1;
10099
- return;
10100
- }
10101
- throw error;
10508
+ });
10509
+ if (!parsed) return;
10510
+ const args = parsed.args;
10511
+ if (parsed.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${parsed.unsupportedOptions.join(", ")}`);
10512
+ if (saveToDatabase || outputFormat === "text") logger.info(`Running model scan on: ${paths.join(", ")}`);
10513
+ if (!saveToDatabase) {
10514
+ await runPassthroughModelAudit(args, delegationEnv, true);
10515
+ return;
10102
10516
  }
10103
- logger.info(`Running model scan on: ${paths.join(", ")}`);
10104
- const delegationEnv = {
10105
- ...process.env,
10106
- PROMPTFOO_DELEGATED: "true"
10107
- };
10108
10517
  try {
10109
- if (saveToDatabase) if (supportsCliUiWithOutput(currentScannerVersion)) {
10518
+ if (supportsCliUiWithOutput(currentScannerVersion)) {
10110
10519
  const tempOutputPath = createTempOutputPath();
10111
10520
  args.push("--output", tempOutputPath);
10112
10521
  let cleanedUp = false;
@@ -10140,14 +10549,6 @@ function modelScanCommand(program) {
10140
10549
  });
10141
10550
  process.exitCode = await processScanResultsFromStdout(spawnResult, paths, options, currentScannerVersion, existingAuditToUpdate);
10142
10551
  }
10143
- else {
10144
- const spawnResult = await spawnModelAudit(args, {
10145
- captureOutput: false,
10146
- env: delegationEnv
10147
- });
10148
- if (spawnResult.code !== null && spawnResult.code !== 0 && spawnResult.code !== 1) logger.error(`Model scan process exited with code ${spawnResult.code}`);
10149
- process.exitCode = spawnResult.code || 0;
10150
- }
10151
10552
  } catch (error) {
10152
10553
  const message = error instanceof Error ? error.message : String(error);
10153
10554
  logger.error(`Failed to start modelaudit: ${message}`);
@@ -10164,6 +10565,14 @@ const CheckInstalledResponseSchema = z.object({
10164
10565
  version: z.string().nullable(),
10165
10566
  cwd: z.string()
10166
10567
  });
10568
+ const ScannerInfoSchema = z.object({
10569
+ id: z.string(),
10570
+ class: z.string().optional().default(""),
10571
+ description: z.string().optional().default(""),
10572
+ extensions: z.array(z.string()).optional().default([]),
10573
+ dependencies: z.array(z.string()).optional().default([])
10574
+ }).passthrough();
10575
+ const ListScannersResponseSchema = z.object({ scanners: z.array(ScannerInfoSchema) }).passthrough();
10167
10576
  const CheckPathRequestSchema = z.object({ path: z.string().trim().min(1, "No path provided") });
10168
10577
  const CheckPathResponseSchema = z.union([z.object({
10169
10578
  exists: z.literal(false),
@@ -10195,6 +10604,8 @@ const ScanRequestSchema = z.object({
10195
10604
  sbom: z.string().optional(),
10196
10605
  output: z.string().optional(),
10197
10606
  maxSize: z.string().optional(),
10607
+ scanners: z.array(z.string()).optional(),
10608
+ excludeScanner: z.array(z.string()).optional(),
10198
10609
  persist: z.boolean().optional(),
10199
10610
  name: z.string().optional(),
10200
10611
  author: z.string().optional()
@@ -10205,8 +10616,13 @@ const ListScansQuerySchema = z.object({
10205
10616
  offset: z.coerce.number().int().min(0).optional().default(0),
10206
10617
  sort: z.enum([
10207
10618
  "createdAt",
10619
+ "failedChecks",
10620
+ "hasErrors",
10621
+ "id",
10622
+ "modelPath",
10208
10623
  "name",
10209
- "modelPath"
10624
+ "passedChecks",
10625
+ "totalChecks"
10210
10626
  ]).optional().default("createdAt"),
10211
10627
  order: z.enum(["asc", "desc"]).optional().default("desc"),
10212
10628
  search: z.string().optional()
@@ -10245,6 +10661,7 @@ const DeleteScanResponseSchema = z.object({
10245
10661
  });
10246
10662
  const ModelAuditSchemas = {
10247
10663
  CheckInstalled: { Response: CheckInstalledResponseSchema },
10664
+ ListScanners: { Response: ListScannersResponseSchema },
10248
10665
  CheckPath: {
10249
10666
  Request: CheckPathRequestSchema,
10250
10667
  Response: CheckPathResponseSchema
@@ -10267,6 +10684,47 @@ const ModelAuditSchemas = {
10267
10684
  //#endregion
10268
10685
  //#region src/server/routes/modelAudit.ts
10269
10686
  const modelAuditRouter = Router();
10687
+ const LIST_SCANNERS_ARGS = parseModelAuditArgs([], {
10688
+ listScanners: true,
10689
+ format: "json"
10690
+ }).args;
10691
+ function getModelAuditDelegationEnv() {
10692
+ return {
10693
+ ...process.env,
10694
+ PROMPTFOO_DELEGATED: "true"
10695
+ };
10696
+ }
10697
+ function spawnModelAuditCapture(args, options = {}) {
10698
+ return new Promise((resolve, reject) => {
10699
+ const child = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
10700
+ let stdout = "";
10701
+ let stderr = "";
10702
+ const onAbort = () => {
10703
+ if (!child.killed) child.kill("SIGTERM");
10704
+ };
10705
+ if (options.signal?.aborted) onAbort();
10706
+ else options.signal?.addEventListener("abort", onAbort, { once: true });
10707
+ const cleanupAbort = () => options.signal?.removeEventListener("abort", onAbort);
10708
+ child.stdout?.on("data", (data) => {
10709
+ stdout += data.toString();
10710
+ });
10711
+ child.stderr?.on("data", (data) => {
10712
+ stderr += data.toString();
10713
+ });
10714
+ child.on("error", (error) => {
10715
+ cleanupAbort();
10716
+ reject(error);
10717
+ });
10718
+ child.on("close", (code) => {
10719
+ cleanupAbort();
10720
+ resolve({
10721
+ code,
10722
+ stdout,
10723
+ stderr
10724
+ });
10725
+ });
10726
+ });
10727
+ }
10270
10728
  modelAuditRouter.get("/check-installed", async (_req, res) => {
10271
10729
  try {
10272
10730
  const { installed, version } = await checkModelAuditInstalled();
@@ -10283,6 +10741,34 @@ modelAuditRouter.get("/check-installed", async (_req, res) => {
10283
10741
  }));
10284
10742
  }
10285
10743
  });
10744
+ modelAuditRouter.get("/scanners", async (req, res) => {
10745
+ const abortController = new AbortController();
10746
+ const onClientClose = () => abortController.abort();
10747
+ req.on("close", onClientClose);
10748
+ try {
10749
+ const { installed } = await checkModelAuditInstalled();
10750
+ if (!installed) {
10751
+ res.status(400).json({ error: "ModelAudit is not installed. Please install it using: pip install modelaudit" });
10752
+ return;
10753
+ }
10754
+ const { code, stdout, stderr } = await spawnModelAuditCapture(LIST_SCANNERS_ARGS, { signal: abortController.signal });
10755
+ if (abortController.signal.aborted) return;
10756
+ if (code !== null && code !== 0) {
10757
+ sendError(res, 500, "Failed to list ModelAudit scanners", {
10758
+ code,
10759
+ stderr
10760
+ });
10761
+ return;
10762
+ }
10763
+ const parsedOutput = JSON.parse(stdout);
10764
+ res.json(ModelAuditSchemas.ListScanners.Response.parse(parsedOutput));
10765
+ } catch (error) {
10766
+ if (abortController.signal.aborted) return;
10767
+ sendError(res, 500, "Failed to list ModelAudit scanners", error);
10768
+ } finally {
10769
+ req.removeListener("close", onClientClose);
10770
+ }
10771
+ });
10286
10772
  modelAuditRouter.post("/check-path", async (req, res) => {
10287
10773
  const bodyResult = ModelAuditSchemas.CheckPath.Request.safeParse(req.body);
10288
10774
  if (!bodyResult.success) {
@@ -10353,11 +10839,12 @@ modelAuditRouter.post("/scan", async (req, res) => {
10353
10839
  event: "model_scan",
10354
10840
  pathCount: paths.length,
10355
10841
  hasBlacklist: (options.blacklist?.length ?? 0) > 0,
10842
+ hasScannerSelection: Boolean(options.scanners?.length || options.excludeScanner?.length),
10356
10843
  timeout: options.timeout ?? 0,
10357
10844
  verbose: options.verbose ?? false,
10358
10845
  persist
10359
10846
  });
10360
- const modelAudit = spawn("modelaudit", args);
10847
+ const modelAudit = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
10361
10848
  let stdout = "";
10362
10849
  let stderr = "";
10363
10850
  let responded = false;
@@ -10543,9 +11030,20 @@ modelAuditRouter.post("/scan", async (req, res) => {
10543
11030
  options: {
10544
11031
  blacklist: options.blacklist,
10545
11032
  timeout: options.timeout,
11033
+ maxSize: options.maxSize,
10546
11034
  maxFileSize: options.maxFileSize,
10547
11035
  maxTotalSize: options.maxTotalSize,
10548
- verbose: options.verbose
11036
+ verbose: options.verbose,
11037
+ format: options.format,
11038
+ strict: options.strict,
11039
+ dryRun: options.dryRun,
11040
+ cache: options.cache,
11041
+ quiet: options.quiet,
11042
+ progress: options.progress,
11043
+ sbom: options.sbom,
11044
+ output: options.output,
11045
+ scanners: options.scanners,
11046
+ excludeScanner: options.excludeScanner
10549
11047
  }
10550
11048
  }
10551
11049
  })).id;
@@ -10701,6 +11199,38 @@ function normalizeTargetPurposeDiscoveryResult(result) {
10701
11199
  tools: cleanTools(result.tools)
10702
11200
  };
10703
11201
  }
11202
+ function extractStringField(value) {
11203
+ if (typeof value !== "string") return;
11204
+ return value.trim() || void 0;
11205
+ }
11206
+ async function getRemoteResponseErrorDetail(response) {
11207
+ const rawText = (await response.text()).trim();
11208
+ const fallback = rawText || response.statusText || "Unknown error";
11209
+ if (!rawText) return fallback;
11210
+ try {
11211
+ const parsed = JSON.parse(rawText);
11212
+ return extractStringField(parsed?.message) ?? extractStringField(parsed?.error) ?? fallback;
11213
+ } catch {
11214
+ return fallback;
11215
+ }
11216
+ }
11217
+ const REMOTE_ERROR_HINTS = {
11218
+ 400: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
11219
+ 401: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
11220
+ 403: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
11221
+ 404: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
11222
+ 429: "You are being rate limited. Wait a moment and try again."
11223
+ };
11224
+ function getRemoteErrorHint(status) {
11225
+ if (REMOTE_ERROR_HINTS[status]) return REMOTE_ERROR_HINTS[status];
11226
+ if (status >= 500) return "The remote generation service may be temporarily unavailable. Retry in a few minutes or contact support if the issue persists.";
11227
+ }
11228
+ async function buildRemoteErrorFromResponse(response) {
11229
+ const detail = await getRemoteResponseErrorDetail(response);
11230
+ const hint = getRemoteErrorHint(response.status);
11231
+ const base = `Remote server returned HTTP ${response.status}: ${detail}`;
11232
+ return new Error(hint ? `${base}\n${hint}` : base);
11233
+ }
10704
11234
  /**
10705
11235
  * Queries Cloud for the purpose-discovery logic, sends each logic to the target,
10706
11236
  * and summarizes the results.
@@ -10750,11 +11280,7 @@ async function doTargetPurposeDiscovery(target, prompt, showProgress = true) {
10750
11280
  email: getUserEmail()
10751
11281
  }))
10752
11282
  });
10753
- if (!response.ok) {
10754
- const error = await response.text();
10755
- logger.error(`${LOG_PREFIX} Error getting the next question from remote server: ${error}`);
10756
- continue;
10757
- }
11283
+ if (!response.ok) throw await buildRemoteErrorFromResponse(response);
10758
11284
  const responseData = await response.json();
10759
11285
  const data = TargetPurposeDiscoveryTaskResponseSchema.parse(responseData);
10760
11286
  logger.debug(`${LOG_PREFIX} Received response from remote server: ${JSON.stringify(data, null, 2)}`);
@@ -10971,7 +11497,7 @@ function determineEffectiveSessionSource({ provider, sessionConfig }) {
10971
11497
  async function testProviderConnectivity({ provider, prompt = "Hello World!", inputs }) {
10972
11498
  const vars = {};
10973
11499
  if (!provider?.config?.sessionParser) vars["sessionId"] = crypto.randomUUID();
10974
- if (inputs && typeof inputs === "object") for (const [varName, _description] of Object.entries(inputs)) vars[varName] = `test_${varName}`;
11500
+ if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) vars[varName] = createPlaceholderInputValue(varName, definition);
10975
11501
  const testSuite = {
10976
11502
  providers: [provider],
10977
11503
  prompts: [{
@@ -11202,10 +11728,15 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
11202
11728
  sessionConfig
11203
11729
  });
11204
11730
  const initialSessionId = effectiveSessionSource === "server" ? void 0 : crypto.randomUUID();
11731
+ const materializeSessionPrompt = (prompt) => {
11732
+ if (!mainInputVariable) return prompt;
11733
+ const definition = inputs?.[mainInputVariable];
11734
+ return definition ? createPlaceholderInputValue(mainInputVariable, definition, prompt) : prompt;
11735
+ };
11205
11736
  const inputVars = {};
11206
- if (inputs && typeof inputs === "object") for (const [varName, _description] of Object.entries(inputs)) {
11737
+ if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) {
11207
11738
  if (varName === mainInputVariable) continue;
11208
- inputVars[varName] = `test_${varName}`;
11739
+ inputVars[varName] = createPlaceholderInputValue(varName, definition);
11209
11740
  }
11210
11741
  const firstPrompt = "What can you help me with?";
11211
11742
  const secondPrompt = "What was the last thing I asked you?";
@@ -11218,7 +11749,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
11218
11749
  vars: {
11219
11750
  ...initialSessionId ? { sessionId: initialSessionId } : {},
11220
11751
  ...inputVars,
11221
- ...mainInputVariable ? { [mainInputVariable]: firstPrompt } : {}
11752
+ ...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(firstPrompt) } : {}
11222
11753
  },
11223
11754
  prompt: {
11224
11755
  raw: firstPrompt,
@@ -11265,7 +11796,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
11265
11796
  vars: {
11266
11797
  ...extractedSessionId ? { sessionId: extractedSessionId } : {},
11267
11798
  ...inputVars,
11268
- ...mainInputVariable ? { [mainInputVariable]: secondPrompt } : {}
11799
+ ...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(secondPrompt) } : {}
11269
11800
  },
11270
11801
  prompt: {
11271
11802
  raw: secondPrompt,
@@ -11790,7 +12321,7 @@ providersRouter.post("/test-session", async (req, res) => {
11790
12321
  const TestCaseGenerationSchema = z.object({
11791
12322
  plugin: z.object({
11792
12323
  id: z.string().refine((val) => ALL_PLUGINS.includes(val), { message: `Invalid plugin ID. Must be one of: ${ALL_PLUGINS.join(", ")}` }),
11793
- config: PluginConfigSchema.optional().prefault({})
12324
+ config: PluginConfigSchema.catchall(z.unknown()).optional().prefault({})
11794
12325
  }),
11795
12326
  strategy: z.object({
11796
12327
  id: z.string().refine((val) => ALL_STRATEGIES.includes(val), { message: `Invalid strategy ID. Must be one of: ${ALL_STRATEGIES.join(", ")}` }),
@@ -12367,11 +12898,11 @@ redteamRouter.post("/:taskId", async (req, res) => {
12367
12898
  }
12368
12899
  const { taskId } = paramsResult.data;
12369
12900
  const cloudFunctionUrl = getRemoteGenerationUrl();
12370
- logger.debug(`Received ${taskId} task request: ${JSON.stringify({
12901
+ logger.debug(`Received ${taskId} task request`, {
12371
12902
  method: req.method,
12372
12903
  url: req.url,
12373
- body: req.body
12374
- })}`);
12904
+ body: sanitizeObject(bodyResult.data, { context: "request body" })
12905
+ });
12375
12906
  try {
12376
12907
  logger.debug(`Sending request to cloud function: ${cloudFunctionUrl}`);
12377
12908
  const response = await fetchWithProxy(cloudFunctionUrl, {
@@ -12743,7 +13274,7 @@ router.get("/", async (_req, res) => {
12743
13274
  };
12744
13275
  } catch (error) {
12745
13276
  logger.debug(`Failed to fetch latest version: ${error}`);
12746
- latestVersion = versionCache.latestVersion ?? "0.121.4";
13277
+ latestVersion = versionCache.latestVersion ?? "0.121.7";
12747
13278
  }
12748
13279
  }
12749
13280
  const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
@@ -12752,7 +13283,7 @@ router.get("/", async (_req, res) => {
12752
13283
  selfHosted,
12753
13284
  isNpx
12754
13285
  });
12755
- const resolvedLatestVersion = latestVersion ?? "0.121.4";
13286
+ const resolvedLatestVersion = latestVersion ?? "0.121.7";
12756
13287
  const response = {
12757
13288
  currentVersion: VERSION,
12758
13289
  latestVersion: resolvedLatestVersion,
@@ -15174,7 +15705,11 @@ const EXAMPLE_ALIASES = {
15174
15705
  "google-adk-example": "integration-google-adk",
15175
15706
  "google-sheets": "integration-google-sheets",
15176
15707
  "gpt-4o-temperature-comparison": "compare-gpt-temperature",
15177
- "gpt-4o-vs-4o-mini": "compare-gpt-4o-vs-4o-mini",
15708
+ "compare-gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
15709
+ "compare-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
15710
+ "compare-gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
15711
+ "gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
15712
+ "gpt-model-tiers": "compare-gpt-model-tiers",
15178
15713
  "gpt-vs-claude-vs-gemini": "compare-gpt-vs-claude-vs-gemini",
15179
15714
  "grok-4-political-bias": "redteam-grok-4-political-bias",
15180
15715
  groq: "provider-groq",
@@ -15225,7 +15760,9 @@ const EXAMPLE_ALIASES = {
15225
15760
  "node-package-typescript": "config-node-package-typescript",
15226
15761
  nscale: "provider-nscale",
15227
15762
  "nunjucks-custom-filters": "config-nunjucks-custom-filters",
15228
- "openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-5-vs-gpt-5-mini-mmlu",
15763
+ "gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
15764
+ "gpt-model-tiers-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
15765
+ "openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
15229
15766
  "openai-model-comparison": "compare-openai-models",
15230
15767
  openclaw: "provider-openclaw",
15231
15768
  "opencode-sdk": "provider-opencode-sdk/basic",
@@ -15597,8 +16134,8 @@ function shouldUseInkList() {
15597
16134
  async function runInkList(options) {
15598
16135
  const [React, { renderInteractive }, { ListApp }] = await Promise.all([
15599
16136
  import("react"),
15600
- import("./render-CgVDrJmM.js"),
15601
- import("./ListApp-DQkFNqE9.js")
16137
+ import("./render-DznWrxGO.js"),
16138
+ import("./ListApp-DLmM02JS.js")
15602
16139
  ]);
15603
16140
  let result = { cancelled: false };
15604
16141
  let resolveResult;
@@ -17906,8 +18443,8 @@ function registerRunEvaluationTool(server) {
17906
18443
  }
17907
18444
  filteredTestSuite.tests = filteredTests;
17908
18445
  }
17909
- const { evaluate } = await import("./evaluator-CVessDWe.js");
17910
- const evalRecord = await (await import("./eval-CzJFfFO9.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
18446
+ const { evaluate } = await import("./evaluator-DNdJF1Gv.js");
18447
+ const evalRecord = await (await import("./eval-DscR5iOM.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
17911
18448
  logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
17912
18449
  const startTime = Date.now();
17913
18450
  const result = await evaluate(filteredTestSuite, evalRecord, {
@@ -18473,6 +19010,9 @@ function analyzeConfiguration(config) {
18473
19010
  }
18474
19011
  //#endregion
18475
19012
  //#region src/commands/mcp/server.ts
19013
+ function setMcpTransport(transport) {
19014
+ Object.assign(process.env, { MCP_TRANSPORT: transport });
19015
+ }
18476
19016
  /**
18477
19017
  * Creates an MCP server with tools for interacting with promptfoo
18478
19018
  */
@@ -18508,7 +19048,7 @@ async function createMcpServer() {
18508
19048
  */
18509
19049
  async function startHttpMcpServer(port) {
18510
19050
  if (!Number.isInteger(port) || port < 1 || port > 65535) throw new Error(`Invalid port number: ${port}. Port must be an integer between 1 and 65535.`);
18511
- process.env.MCP_TRANSPORT = "http";
19051
+ setMcpTransport("http");
18512
19052
  const app = express();
18513
19053
  app.use(express.json());
18514
19054
  const mcpServer = await createMcpServer();
@@ -18565,7 +19105,7 @@ async function startHttpMcpServer(port) {
18565
19105
  * Starts an MCP server with stdio transport
18566
19106
  */
18567
19107
  async function startStdioMcpServer() {
18568
- process.env.MCP_TRANSPORT = "stdio";
19108
+ setMcpTransport("stdio");
18569
19109
  logger.transports.forEach((transport) => {
18570
19110
  if (transport.constructor.name === "Console" || transport.name === "console") transport.silent = true;
18571
19111
  });
@@ -19434,7 +19974,7 @@ function printErrorInformation(errorLogFile, debugLogFile) {
19434
19974
  async function main() {
19435
19975
  setupEnvFilesFromArgv();
19436
19976
  initializeRunLogging();
19437
- if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") process.env.PROMPTFOO_DISABLE_UPDATE = "true";
19977
+ if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") Object.assign(process.env, { PROMPTFOO_DISABLE_UPDATE: "true" });
19438
19978
  await checkForUpdates();
19439
19979
  await runDbMigrations();
19440
19980
  const { defaultConfig, defaultConfigPath } = await loadDefaultConfig();