promptfoo 0.121.5 → 0.121.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (490) hide show
  1. package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
  2. package/dist/src/{accounts-BIFntVWB.cjs → accounts-BVz5gHLK.cjs} +16 -10
  3. package/dist/src/{accounts-bnyHT7Ju.js → accounts-BWjqwsrf.js} +12 -7
  4. package/dist/src/{accounts-CLJHCDDb.js → accounts-D6IBfEE0.js} +13 -8
  5. package/dist/src/{accounts-CaLNYnf7.js → accounts-DAv_0iE7.js} +12 -7
  6. package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-BJKAkz2e.js} +2 -2
  7. package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-C-A92xhn.cjs} +8 -7
  8. package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-DmoS_S4B.js} +2 -2
  9. package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-GdToujHu.js} +2 -2
  10. package/dist/src/{agents-BGqaTDnr.js → agents-2C8NN6I1.js} +20 -9
  11. package/dist/src/{agents-n6vPqV3i.js → agents-BMAiSR2o.js} +4 -4
  12. package/dist/src/{agents-BYdMl1UE.js → agents-C-PGaxwj.js} +20 -9
  13. package/dist/src/{agents-BV9yFpXX.js → agents-C98cz5pl.js} +20 -9
  14. package/dist/src/{agents-WULPVjbH.cjs → agents-DB8Ub2Ld.cjs} +7 -6
  15. package/dist/src/{agents-DhxWMCtH.js → agents-Dwshy2H8.js} +4 -4
  16. package/dist/src/{agents-emVcx3yh.js → agents-mlKjx-cK.js} +2 -2
  17. package/dist/src/{agents-DiWmQYH9.cjs → agents-n2ej-c4H.cjs} +21 -10
  18. package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-Bi-laUlp.js} +6 -6
  19. package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-D0OFV4Vj.cjs} +7 -7
  20. package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-WyUK0wYy.js} +6 -6
  21. package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-v-63ZjEI.js} +6 -6
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-sCXUfaWo.js +1 -0
  24. package/dist/src/app/assets/index-BopgkZEh.js +388 -0
  25. package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
  26. package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
  27. package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
  28. package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
  29. package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
  30. package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
  31. package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
  32. package/dist/src/app/index.html +6 -6
  33. package/dist/src/{audio-CScmnmEB.js → audio-DnEIHWZR.js} +3 -3
  34. package/dist/src/{audio-Da8U9IS5.js → audio-heR0mu0n.js} +3 -3
  35. package/dist/src/{audio-BvpTOArF.js → audio-wafFO1wn.js} +3 -3
  36. package/dist/src/{audio-C0vDeS0j.cjs → audio-x44tsxIo.cjs} +4 -4
  37. package/dist/src/authoritativeMarkupInjection-2G3Be6oL.cjs +74 -0
  38. package/dist/src/authoritativeMarkupInjection-81PPynHR.js +75 -0
  39. package/dist/src/authoritativeMarkupInjection-D9O70HPi.js +74 -0
  40. package/dist/src/authoritativeMarkupInjection-PxSf3Rh8.js +74 -0
  41. package/dist/src/{base-dYsl2hmL.js → base-0Gzzue9Z.js} +4 -3
  42. package/dist/src/{base-BOMaNEes.js → base-Bp4c52YZ.js} +4 -3
  43. package/dist/src/{base-BTux96b1.js → base-BuNn-YBX.js} +4 -3
  44. package/dist/src/{base-Tw6uhH8K.cjs → base-BzdS8tod.cjs} +5 -4
  45. package/dist/src/bestOfN-BHiOjeaq.js +136 -0
  46. package/dist/src/bestOfN-CdkNPPbX.js +136 -0
  47. package/dist/src/bestOfN-ClUSOhN0.js +137 -0
  48. package/dist/src/bestOfN-DWwXEg1h.cjs +140 -0
  49. package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
  50. package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
  51. package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
  52. package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
  53. package/dist/src/cache-BKYi3kAA.js +3 -0
  54. package/dist/src/{cache-BI5BY7ey.js → cache-BR77mdIR.js} +127 -11
  55. package/dist/src/{cache-Cr-qWIbP.js → cache-CrioYnaa.js} +125 -10
  56. package/dist/src/{cache-Bzttsk0X.js → cache-DdriHsNX.js} +125 -10
  57. package/dist/src/{cache-DGg-yTZG.cjs → cache-h5MWOBZI.cjs} +135 -25
  58. package/dist/src/{chat-Cx_LkwvZ.js → chat-BBEnnpQk.js} +11 -11
  59. package/dist/src/{chat-DChSH_Es.js → chat-BSos6PvZ.js} +9 -9
  60. package/dist/src/{chat-aMQZw6R7.js → chat-Bnt7ieO0.js} +4 -4
  61. package/dist/src/{chat-vYqqv1gP.cjs → chat-DSyYuTYT.cjs} +14 -13
  62. package/dist/src/{chat-BLOdH60v.js → chat-DTdf-J5Q.js} +11 -11
  63. package/dist/src/{chat-DH97tVV9.cjs → chat-Dq3DomYU.cjs} +6 -6
  64. package/dist/src/{chat-DG2LkwLq.js → chat-g5QLeLOo.js} +4 -4
  65. package/dist/src/{chat-D9nudO9b.js → chat-mTTuUAYb.js} +4 -4
  66. package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
  67. package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
  68. package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
  69. package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
  70. package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-BGUac_kS.js} +213 -66
  71. package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-BP__YGfK.js} +213 -66
  72. package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-Ddgx5BIQ.js} +212 -65
  73. package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-n2XcEclh.cjs} +219 -71
  74. package/dist/src/cloud-DiWbUiVP.js +3 -0
  75. package/dist/src/{cloud-Da0bofJd.js → cloud-DqF5N1aJ.js} +2 -2
  76. package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-BGamMotN.js} +4 -4
  77. package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-BwW8W-w7.js} +4 -4
  78. package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-DCRGnsyL.cjs} +6 -6
  79. package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-sD26nP6V.js} +4 -4
  80. package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-2lnOT4qM.js} +3 -3
  81. package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-B1-8KNCt.js} +3 -3
  82. package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-CCC1DFrC.cjs} +5 -5
  83. package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-DOghiU6r.js} +3 -3
  84. package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-BWeWamEb.js} +19 -4
  85. package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-DyQB1P0p.js} +20 -5
  86. package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-R9u_G7W9.cjs} +27 -11
  87. package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-ZQRJSJjU.js} +20 -5
  88. package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-B62H0fe7.js} +16 -5
  89. package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-BRPUvJG8.cjs} +31 -12
  90. package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-Dio1zJBS.js} +18 -7
  91. package/dist/src/codex-sdk-DxukZs_K.js +3 -0
  92. package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-iEmW1eS2.js} +18 -7
  93. package/dist/src/{cometapi-B5ImDlSm.js → cometapi--hh7dESS.js} +7 -7
  94. package/dist/src/{cometapi-CC7hWxmX.js → cometapi-C7yWNGqt.js} +7 -7
  95. package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CSIi16a0.js} +7 -7
  96. package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-sZgBZtbU.cjs} +8 -8
  97. package/dist/src/{completion-DtQ72Bm3.cjs → completion-CWtqdn3z.cjs} +6 -6
  98. package/dist/src/{completion-Vq_ad618.js → completion-DT8cxo9T.js} +4 -4
  99. package/dist/src/{completion-2iuYVxwi.js → completion-DUScduXp.js} +5 -5
  100. package/dist/src/{completion-CrD6MQ93.js → completion-D_2IOAoS.js} +5 -5
  101. package/dist/src/constants-BjJV0cRr.js +6 -0
  102. package/dist/src/constants-DH5XYLKZ.js +7 -0
  103. package/dist/src/constants-DZGEFLsu.js +6 -0
  104. package/dist/src/constants-a2kYssQk.cjs +11 -0
  105. package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
  106. package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
  107. package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
  108. package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
  109. package/dist/src/crescendo-B41TwUHM.js +722 -0
  110. package/dist/src/crescendo-Bfic7KC4.js +724 -0
  111. package/dist/src/crescendo-DXFB7rHP.cjs +725 -0
  112. package/dist/src/crescendo-DvvYxMLA.js +723 -0
  113. package/dist/src/custom-BdzuqFTN.js +619 -0
  114. package/dist/src/custom-Cf5Q6r-P.js +618 -0
  115. package/dist/src/custom-DLgufezC.js +620 -0
  116. package/dist/src/custom-LEXHCRe3.cjs +621 -0
  117. package/dist/src/{docker--3qzPa-6.js → docker-BOSO_6hK.js} +5 -5
  118. package/dist/src/{docker-D3AY-5F5.cjs → docker-D0h2vFrc.cjs} +6 -6
  119. package/dist/src/{docker-DCsCDvwM.js → docker-D2TWGyTP.js} +5 -5
  120. package/dist/src/{docker-Dorv4_Dg.js → docker-Vj_4_cPg.js} +5 -5
  121. package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BQIApR18.cjs} +6 -6
  122. package/dist/src/{embedding-ChS1ivFS.js → embedding-CStK0TV6.js} +5 -5
  123. package/dist/src/{embedding-DNRvZwRN.js → embedding-CU78FMnw.js} +5 -5
  124. package/dist/src/{embedding-D_bI4NDq.js → embedding-CV8lmCnU.js} +4 -4
  125. package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
  126. package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
  127. package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
  128. package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
  129. package/dist/src/{eval-DJ_4A-tr.js → eval-B3BaNBbO.js} +19 -19
  130. package/dist/src/{eval-BQPLBJbw.js → eval-DfR9885C.js} +1 -1
  131. package/dist/src/evalResult-BtZSUgQv.js +3 -0
  132. package/dist/src/{evalResult-D6P5I5il.js → evalResult-CcSqNl_Y.js} +17 -10
  133. package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-DHXs-9TL.cjs} +29 -11
  134. package/dist/src/{evalResult-pSvGWFMo.js → evalResult-PGqEbasb.js} +18 -11
  135. package/dist/src/evaluator-B9LGbKI8.js +3 -0
  136. package/dist/src/{evaluator-D-UIbbYq.js → evaluator-n_dEb00o.js} +258 -132
  137. package/dist/src/evaluatorHelpers-CrRObe2z.js +510 -0
  138. package/dist/src/evaluatorHelpers-D06I9WFL.cjs +537 -0
  139. package/dist/src/evaluatorHelpers-D1_kwvyp.js +511 -0
  140. package/dist/src/{extractor-DxyiFhPk.js → extractor-BVkZtk4R.js} +5 -5
  141. package/dist/src/{extractor-BM3jRERL.js → extractor-D3Fv_Tdh.js} +5 -5
  142. package/dist/src/{extractor-YlZbUMsL.js → extractor-Jp53vs-6.js} +5 -5
  143. package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-bV_NOoaz.cjs} +6 -6
  144. package/dist/src/{fetch-Y5qX_kST.js → fetch-BSSAcMxf.js} +90 -26
  145. package/dist/src/fetch-BodQTrMU.js +3 -0
  146. package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Cfm4EuOB.cjs} +115 -32
  147. package/dist/src/{fetch-B6ch2nU2.js → fetch-Cpf1U1nO.js} +86 -26
  148. package/dist/src/{fetch-D9xxyC1p.js → fetch-Doks14zQ.js} +90 -26
  149. package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
  150. package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
  151. package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
  152. package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
  153. package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
  154. package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
  155. package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
  156. package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
  157. package/dist/src/goat-Ba7Gznzq.js +467 -0
  158. package/dist/src/goat-CJngS-WU.js +468 -0
  159. package/dist/src/goat-CwsbfQeu.js +466 -0
  160. package/dist/src/goat-DOMbozoX.cjs +470 -0
  161. package/dist/src/graders-B3D7kCcD.js +3 -0
  162. package/dist/src/{graders-CgPn32yp.js → graders-BQt1BaQe.js} +1019 -86
  163. package/dist/src/{graders-Bw1wk_21.cjs → graders-CBQ2s6gz.cjs} +1087 -130
  164. package/dist/src/{graders-CwrbifOo.js → graders-DaRU98zs.js} +1022 -84
  165. package/dist/src/{graders-BoUqsCEm.js → graders-KXzjnIim.js} +1024 -86
  166. package/dist/src/hydra-BU6GjYoQ.js +560 -0
  167. package/dist/src/hydra-BmXHxlyl.js +559 -0
  168. package/dist/src/hydra-CR0KyYDb.js +561 -0
  169. package/dist/src/hydra-RO9jBks7.cjs +560 -0
  170. package/dist/src/{image-BeWaInPF.js → image-BBmZdKO3.js} +3 -3
  171. package/dist/src/{image-qjO6FWPs.js → image-B_PFG7IG.js} +3 -3
  172. package/dist/src/image-BtODOZjh.js +442 -0
  173. package/dist/src/image-CYuNJIqd.js +442 -0
  174. package/dist/src/{image-D10dNAav.cjs → image-E00WFZkq.cjs} +4 -4
  175. package/dist/src/image-HK2Cfqb1.cjs +465 -0
  176. package/dist/src/{image-Dr_3I3nK.js → image-LGj8dTcr.js} +3 -3
  177. package/dist/src/image-YMKejC0r.js +443 -0
  178. package/dist/src/index.cjs +711 -339
  179. package/dist/src/index.d.cts +3159 -1611
  180. package/dist/src/index.d.ts +3158 -1610
  181. package/dist/src/index.js +605 -249
  182. package/dist/src/indirectWebPwn-B80dLlFC.js +260 -0
  183. package/dist/src/indirectWebPwn-BMTXXznx.js +386 -0
  184. package/dist/src/indirectWebPwn-BZFPV7Q9.js +385 -0
  185. package/dist/src/indirectWebPwn-BaEQEOIO.cjs +260 -0
  186. package/dist/src/indirectWebPwn-BzpyMnFS.js +259 -0
  187. package/dist/src/indirectWebPwn-CiWB-vVH.js +385 -0
  188. package/dist/src/indirectWebPwn-D7NA9Nsv.cjs +397 -0
  189. package/dist/src/indirectWebPwn-o_bEFMjP.js +259 -0
  190. package/dist/src/inputVariables-B0qUChbV.js +467 -0
  191. package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
  192. package/dist/src/inputVariables-DXFdi7AI.js +468 -0
  193. package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
  194. package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
  195. package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
  196. package/dist/src/iterative-B-l0syBV.js +501 -0
  197. package/dist/src/iterative-BACUeCCz.cjs +503 -0
  198. package/dist/src/iterative-CVwoExo8.js +502 -0
  199. package/dist/src/iterative-QDrGSyss.js +500 -0
  200. package/dist/src/iterativeImage-BQg2OwA6.js +413 -0
  201. package/dist/src/iterativeImage-Bwn0fM75.js +413 -0
  202. package/dist/src/iterativeImage-CcgVyASo.cjs +415 -0
  203. package/dist/src/iterativeImage-D_UbQXg4.js +414 -0
  204. package/dist/src/iterativeMeta-CIu-CHRS.js +405 -0
  205. package/dist/src/iterativeMeta-CN8CNjFA.js +406 -0
  206. package/dist/src/iterativeMeta-DN6BTjpq.js +404 -0
  207. package/dist/src/iterativeMeta-DlqY3BsS.cjs +405 -0
  208. package/dist/src/iterativeTree-CJ8a8G9T.js +820 -0
  209. package/dist/src/iterativeTree-DcuFXnjL.js +819 -0
  210. package/dist/src/iterativeTree-FrDDYAN0.js +818 -0
  211. package/dist/src/iterativeTree-LOgAi0nU.cjs +821 -0
  212. package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-CdbcGBZF.cjs} +25 -11
  213. package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-CjOXp6Lr.js} +23 -9
  214. package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-DxAq4n4z.js} +24 -10
  215. package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-o_wTLzrt.js} +24 -10
  216. package/dist/src/{litellm-DRc4qWfc.js → litellm-B2gHwya_.js} +4 -4
  217. package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CPpdlO7n.cjs} +5 -5
  218. package/dist/src/{litellm-BLSiANhk.js → litellm-CYfgxLrM.js} +4 -4
  219. package/dist/src/{litellm-DQGo_juI.js → litellm-ojWBKU3C.js} +4 -4
  220. package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
  221. package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-BCCO9XXG.cjs} +9 -8
  222. package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BPrdihAb.js} +5 -5
  223. package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-DP5N79lB.js} +5 -5
  224. package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-DTDyoAlM.js} +5 -5
  225. package/dist/src/main.js +727 -234
  226. package/dist/src/memoryPoisoning-B6N3us35.cjs +106 -0
  227. package/dist/src/memoryPoisoning-Bc_BK_k2.js +106 -0
  228. package/dist/src/memoryPoisoning-D375zwSX.js +107 -0
  229. package/dist/src/memoryPoisoning-DJA0YjJT.js +106 -0
  230. package/dist/src/{messages-CewuNcNS.js → messages-Bg29Nbit.js} +17 -9
  231. package/dist/src/{messages-BnsVHUnm.cjs → messages-BrZEnHsV.cjs} +24 -15
  232. package/dist/src/{messages-CI69Lasb.js → messages-CBulRaud.js} +18 -10
  233. package/dist/src/{messages-B9dSjrNf.js → messages-K9A8RxBM.js} +18 -10
  234. package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
  235. package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
  236. package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
  237. package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
  238. package/dist/src/mischievousUser-BDLwSGN0.js +46 -0
  239. package/dist/src/mischievousUser-Blx-OtT3.js +47 -0
  240. package/dist/src/mischievousUser-CHuTTvBg.js +46 -0
  241. package/dist/src/mischievousUser-CSUrH3fq.cjs +46 -0
  242. package/dist/src/{modelslab-zpz9JcK0.js → modelslab-BjEk7yCP.js} +6 -6
  243. package/dist/src/{modelslab-BCLOtfek.js → modelslab-CBCrdIBc.js} +6 -6
  244. package/dist/src/{modelslab-D73OnKSx.js → modelslab-CrGpXuhv.js} +6 -6
  245. package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-Cvh0klQx.cjs} +7 -7
  246. package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-CUDDZcSA.js} +5 -5
  247. package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-CyDESu5J.js} +5 -5
  248. package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-DH_Ksu6X.cjs} +9 -8
  249. package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-YQAqUYJY.js} +5 -5
  250. package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-BVFFB7JZ.js} +2 -2
  251. package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-Bg3cxMMu.cjs} +5 -4
  252. package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-BhmnRGyh.js} +3 -3
  253. package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-DxmK95c1.js} +3 -3
  254. package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
  255. package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
  256. package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
  257. package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-CHQaZi_-.cjs} +17 -15
  258. package/dist/src/{openclaw-DA8U4DsD.js → openclaw-CWOm2eOr.js} +9 -8
  259. package/dist/src/{openclaw-DObVgpjC.js → openclaw-Cdk9JKfY.js} +9 -8
  260. package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-DhF8rUpI.js} +9 -8
  261. package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-BRjiPV-g.js} +38 -3
  262. package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-CPFMw0ed.cjs} +45 -9
  263. package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-CaSOCsBA.js} +39 -4
  264. package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-CqkyG8De.js} +39 -4
  265. package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
  266. package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
  267. package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
  268. package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
  269. package/dist/src/packageParser--MWTSrPW.js +36 -0
  270. package/dist/src/packageParser-CgE-ziRo.js +35 -0
  271. package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
  272. package/dist/src/packageParser-hwwSGnAZ.js +35 -0
  273. package/dist/src/processShim-BBxt7LKO.js +95 -0
  274. package/dist/src/processShim-BcGzU8fY.js +94 -0
  275. package/dist/src/processShim-C_z3aRvF.js +94 -0
  276. package/dist/src/processShim-DSY9BV2T.cjs +98 -0
  277. package/dist/src/promptLength-0qIHyhA5.js +71 -0
  278. package/dist/src/promptLength-4X-Wd8PG.js +72 -0
  279. package/dist/src/promptLength-B9nZEfO6.js +71 -0
  280. package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
  281. package/dist/src/promptfoo-BU4_0J85.js +238 -0
  282. package/dist/src/promptfoo-CReYAtfb.js +237 -0
  283. package/dist/src/promptfoo-ClChwT74.cjs +292 -0
  284. package/dist/src/promptfoo-CqEpj6Sr.js +237 -0
  285. package/dist/src/prompts-BYMtqPCw.js +259 -0
  286. package/dist/src/prompts-C-bqE1Yp.js +260 -0
  287. package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
  288. package/dist/src/prompts-DHhQsANy.js +259 -0
  289. package/dist/src/prompts-D_QpZ2Dm.js +271 -0
  290. package/dist/src/prompts-hNvWBD3z.cjs +284 -0
  291. package/dist/src/prompts-huDVH2CI.js +270 -0
  292. package/dist/src/prompts-p78Hul5i.cjs +289 -0
  293. package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
  294. package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
  295. package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
  296. package/dist/src/{providers-DT-GtF2t.js → providers-B7TyByfj.js} +789 -11925
  297. package/dist/src/{providers-CJh7iriU.js → providers-BGc7tDtQ.js} +796 -11872
  298. package/dist/src/{providers-Ctcc592x.js → providers-CSOp-bCm.js} +1 -1
  299. package/dist/src/{providers-eDShy16E.cjs → providers-V6RBuieY.cjs} +837 -12138
  300. package/dist/src/{providers-DRrerKra.js → providers-iH3Sw1yo.js} +851 -11938
  301. package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
  302. package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
  303. package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
  304. package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
  305. package/dist/src/{quiverai-CPKhWgaT.js → quiverai-58BtRRet.js} +3 -3
  306. package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-C1dYu5MW.js} +3 -3
  307. package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CNMIpZQg.js} +3 -3
  308. package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-fmSfU43k.cjs} +4 -4
  309. package/dist/src/registry-6Jw6ebor.cjs +124 -0
  310. package/dist/src/registry-BTGk2ZkB.js +124 -0
  311. package/dist/src/registry-BTNqmP5o.js +125 -0
  312. package/dist/src/registry-NGnOG2xa.js +124 -0
  313. package/dist/src/{server-gyd6d4Hc.js → remoteGeneration--D6WjzUm.js} +15 -108
  314. package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-BpIYlb_O.cjs} +30 -119
  315. package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-CJC3E0aW.js} +15 -147
  316. package/dist/src/remoteGeneration-D6UjE2JT.js +218 -0
  317. package/dist/src/render-BNTrbmBw.cjs +384 -0
  318. package/dist/src/render-CSP99NLm.js +348 -0
  319. package/dist/src/render-DFfDeYUK.js +347 -0
  320. package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
  321. package/dist/src/render-_6ur1fhE.js +347 -0
  322. package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
  323. package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
  324. package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
  325. package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
  326. package/dist/src/{responses-1ztiVYsx.js → responses-5Gf5HNOi.js} +11 -7
  327. package/dist/src/{responses-BiaBguAu.js → responses-BVi7xIdv.js} +13 -9
  328. package/dist/src/{responses-CF-ayauu.cjs → responses-CUARGrhY.cjs} +16 -11
  329. package/dist/src/{responses-B8haB-mD.js → responses-CrmWv6iz.js} +13 -9
  330. package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
  331. package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
  332. package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
  333. package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
  334. package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
  335. package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
  336. package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
  337. package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
  338. package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
  339. package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
  340. package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-BeVTKfrv.cjs} +65 -53
  341. package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-BwbV5U7_.js} +61 -50
  342. package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-ClAZ6Qwc.js} +61 -50
  343. package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-DY2qrOpq.js} +61 -50
  344. package/dist/src/{scanner-nOCWNIXa.js → scanner-BS-iFIp3.js} +6 -6
  345. package/dist/src/server/index.js +4115 -420
  346. package/dist/src/server-BHOEL8p8.cjs +126 -0
  347. package/dist/src/server-CbiJppij.js +107 -0
  348. package/dist/src/server-D18AAlAc.js +3 -0
  349. package/dist/src/server-DLYjSFm2.js +182 -0
  350. package/dist/src/server-JQYD_Nws.js +146 -0
  351. package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
  352. package/dist/src/shared-CRq0gGVf.js +1334 -0
  353. package/dist/src/shared-CSIGeGLl.js +1334 -0
  354. package/dist/src/shared-DNvim54U.js +1335 -0
  355. package/dist/src/shared-aHWko3P1.cjs +1436 -0
  356. package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
  357. package/dist/src/simulatedUser-C7sOFSF6.js +222 -0
  358. package/dist/src/simulatedUser-DH_7RzEQ.js +222 -0
  359. package/dist/src/simulatedUser-lgMMmniD.js +223 -0
  360. package/dist/src/simulatedUser-mnCUS9Bm.cjs +227 -0
  361. package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
  362. package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
  363. package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
  364. package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
  365. package/dist/src/storage-95htjpLs.js +834 -0
  366. package/dist/src/storage-A4WnAeN3.cjs +911 -0
  367. package/dist/src/storage-B2Ql_oq4.js +822 -0
  368. package/dist/src/storage-EKVWZBNY.js +875 -0
  369. package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
  370. package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
  371. package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
  372. package/dist/src/store-IbiRIF3k.js +3 -0
  373. package/dist/src/strategies-CBI1brSy.js +2333 -0
  374. package/dist/src/strategies-CCcnEbFO.cjs +2360 -0
  375. package/dist/src/strategies-CD1gHeeQ.js +2331 -0
  376. package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
  377. package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
  378. package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
  379. package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
  380. package/dist/src/telemetry-BUm_krVX.js +3 -0
  381. package/dist/src/{telemetry--iqaGyaS.js → telemetry-BbpmrenM.js} +4 -4
  382. package/dist/src/{telemetry-CgdVGV8N.js → telemetry-C4bX-6Sr.js} +4 -4
  383. package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-C_ImlCOk.cjs} +15 -9
  384. package/dist/src/{telemetry-DWdGHvEf.js → telemetry-DOE567Wj.js} +4 -4
  385. package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
  386. package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
  387. package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
  388. package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
  389. package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
  390. package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
  391. package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
  392. package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
  393. package/dist/src/toolAttributes-COVgDrBG.js +87 -0
  394. package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
  395. package/dist/src/tracingOptions-BOP1FsRt.js +220 -0
  396. package/dist/src/tracingOptions-BvmDa_jH.js +219 -0
  397. package/dist/src/tracingOptions-DFBoE8O0.cjs +249 -0
  398. package/dist/src/tracingOptions-xhjOg2b1.js +221 -0
  399. package/dist/src/{transcription-D7Q0vJsh.js → transcription-BOXDoehT.js} +4 -4
  400. package/dist/src/{transcription-Bm2emLmJ.js → transcription-CpVdKc4P.js} +5 -5
  401. package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-D8kkPXg_.cjs} +9 -8
  402. package/dist/src/{transcription-84t4ALo2.js → transcription-Z94eV9LR.js} +5 -5
  403. package/dist/src/transform-BnSTnFlp.js +187 -0
  404. package/dist/src/transform-BnSXWmU_2.cjs +221 -0
  405. package/dist/src/{transform-DtooZqYY.js → transform-BufxPIQL.js} +8 -8
  406. package/dist/src/transform-CGt7Kt3y2.js +186 -0
  407. package/dist/src/transform-CrPGTsij.js +186 -0
  408. package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DJkt81VY.cjs} +12 -11
  409. package/dist/src/{transform-B-b6Cq-q.js → transform-DtTfiGoh.js} +8 -8
  410. package/dist/src/{transform-_DpNB4qp.js → transform-Wp6s_5QE.js} +8 -8
  411. package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
  412. package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
  413. package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
  414. package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
  415. package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
  416. package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
  417. package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
  418. package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
  419. package/dist/src/{util-Dpmm_dAI.cjs → util-BHCAeuXx.cjs} +33 -7
  420. package/dist/src/{util-DFPeFkiV.js → util-CMrHV35u.js} +28 -8
  421. package/dist/src/{util-DvpHnLt0.cjs → util-COnzevXE.cjs} +29 -21
  422. package/dist/src/{util-BlFVL0UF.js → util-CYev3d-r.js} +22 -7
  423. package/dist/src/util-DGqkTb2-.js +327 -0
  424. package/dist/src/util-DNtUsus_.cjs +386 -0
  425. package/dist/src/{util-Dub0f_ej.js → util-DRfqa4xz.js} +17 -10
  426. package/dist/src/{util-BVXcTwXu.js → util-DTq3jq2z.js} +28 -8
  427. package/dist/src/{util-C-kmRosx.js → util-DeY58psG.js} +22 -7
  428. package/dist/src/util-DpV6KT5i.js +327 -0
  429. package/dist/src/{util-3pBZZb_H.js → util-DwNJzqOV.js} +45 -10
  430. package/dist/src/util-ETfU_sS9.js +328 -0
  431. package/dist/src/{util-A5_ZsQUn.cjs → util-NYQvo1C7.cjs} +25 -9
  432. package/dist/src/{util-DN0-b81k.js → util-X4KQgyVD.js} +28 -8
  433. package/dist/src/{util-BQOCAHQC.js → util-jZRrXe1P.js} +46 -11
  434. package/dist/src/{util-B9CNhyac.js → util-o2Qg5rZv.js} +22 -7
  435. package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
  436. package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
  437. package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
  438. package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
  439. package/dist/src/{version-CbuBKu2U.js → version-CHR-EFec.js} +2 -2
  440. package/dist/src/{version-CbpiUINz.js → version-F0YDgb7J.js} +2 -2
  441. package/dist/src/{version-D9zu9FWB.cjs → version-VzUqOBZk.cjs} +2 -2
  442. package/dist/src/{version-0frU0UTr.js → version-qVEN5qCm.js} +2 -2
  443. package/dist/tsconfig.tsbuildinfo +1 -1
  444. package/package.json +49 -23
  445. package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
  446. package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
  447. package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
  448. package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
  449. package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
  450. package/dist/src/cache-BRkhlH3k.cjs +0 -3
  451. package/dist/src/cache-BlC6aeJ0.js +0 -3
  452. package/dist/src/cloud-CoD5OacT.js +0 -3
  453. package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
  454. package/dist/src/codex-sdk-Danroptg.cjs +0 -2
  455. package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
  456. package/dist/src/evalResult-BBK58h2B.js +0 -3
  457. package/dist/src/evalResult-spPqh1G_.js +0 -2
  458. package/dist/src/evaluator-DgLKaZk8.js +0 -3
  459. package/dist/src/fetch-8viavNv8.js +0 -3
  460. package/dist/src/graders-C84JI-m5.js +0 -2
  461. package/dist/src/graders-CBbd0K0Q.cjs +0 -2
  462. package/dist/src/graders-CbQqpHSN.js +0 -3
  463. package/dist/src/graders-DS42d3ZG.js +0 -2
  464. package/dist/src/image-BmilRNqO.js +0 -258
  465. package/dist/src/image-CxJoa3aW.cjs +0 -280
  466. package/dist/src/image-DsGRlkh7.js +0 -257
  467. package/dist/src/image-a_SGUobh.js +0 -257
  468. package/dist/src/providers-BuyzKt7C.js +0 -2
  469. package/dist/src/providers-C7lNVBjX.cjs +0 -3
  470. package/dist/src/providers-CCE2COJi2.js +0 -2
  471. package/dist/src/render-7uNJ2V14.js +0 -135
  472. package/dist/src/render-DlscvAUJ.js +0 -135
  473. package/dist/src/render-eui5p5mL.js +0 -136
  474. package/dist/src/render-tG6ir9_g.cjs +0 -165
  475. package/dist/src/rubyUtils-4hjGxvju.js +0 -3
  476. package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
  477. package/dist/src/server-ByiF3qlg.js +0 -386
  478. package/dist/src/server-C0XKRNB_.cjs +0 -2
  479. package/dist/src/server-C_15p79-.js +0 -3
  480. package/dist/src/store-2OXm_eBY.js +0 -240
  481. package/dist/src/store-BELqNwvz.js +0 -3
  482. package/dist/src/store-uQZ4AjPe.cjs +0 -2
  483. package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
  484. package/dist/src/telemetry-ZdPZc0fm.js +0 -3
  485. package/dist/src/transform-BQt0BeAW.js +0 -3
  486. package/dist/src/transform-Bq5oqC0s.cjs +0 -2
  487. package/dist/src/transform-C9izGX54.cjs +0 -228
  488. package/dist/src/transform-CwbAZ84V.js +0 -216
  489. package/dist/src/transform-DzCF-wqV.js +0 -213
  490. package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/main.js CHANGED
@@ -1,35 +1,45 @@
1
1
  #!/usr/bin/env node
2
2
  import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
3
- import { A as getDefaultPort, F as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, T as CLOUD_PROVIDER_PREFIX, i as fetchWithTimeout, j as getDefaultShareViewBaseUrl, k as TERMINAL_MAX_WIDTH, l as sleep, m as REQUEST_TIMEOUT_MS, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, v as isPromptfooSampleTarget } from "./fetch-B6ch2nU2.js";
4
- import { n as VERSION } from "./version-CbpiUINz.js";
3
+ import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, M as TERMINAL_MAX_WIDTH, N as getDefaultPort, O as CLOUD_PROVIDER_PREFIX, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, _ as REQUEST_TIMEOUT_MS, f as sleep, i as fetchWithTimeout, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, x as isPromptfooSampleTarget } from "./fetch-Cpf1U1nO.js";
4
+ import { n as VERSION } from "./version-F0YDgb7J.js";
5
5
  import { t as invariant } from "./invariant-B2Rf6avk.js";
6
- import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-CLJHCDDb.js";
7
- import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-Da0bofJd.js";
8
- import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-DWdGHvEf.js";
9
- import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BVH9hjgW.js";
10
- import { r as importModule, t as getDirectory } from "./esm-BX8fwlAO.js";
11
- import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-eui5p5mL.js";
12
- import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-ByiF3qlg.js";
13
- import { A as validateStrategies, B as extractVariablesFromJson, H as getShortPluginId, L as extractGoalFromPrompt, M as mediaExists, N as retrieveMedia, O as Strategies, P as pluginMatchesStrategyTargets, Q as TokenUsageTracker, Z as redteamProviderManager, _t as getUserTeams, a as resolveProviderConfigs, at as getMaxCharsPerMessageModifierValue, bt as resolveTeamFromIdentifier, ct as canCreateTargets, d as MCPProvider, dt as getConfigFromCloud, f as HttpProvider, ft as getEvalConfigFromCloud, gt as getProviderFromCloud, i as resolveProvider, it as getGeneratedPromptOverLimit, j as getMediaStorage, k as loadStrategy, lt as checkCloudPermissions, m as createTransformResponse, mt as getPluginSeverityOverridesFromCloud, n as loadApiProvider, nt as PromptfooHarmfulCompletionProvider, p as createTransformRequest, pt as getOrgContext, q as renderPrompt, r as loadApiProviders, rt as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, t as getProviderIds, ut as getCloudDatabaseId, vt as isCloudProvider, xt as resolveTeamId, yt as makeRequest$1, z as extractPromptFromTags } from "./providers-DT-GtF2t.js";
14
- import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-BI5BY7ey.js";
15
- import { i as isJavascriptFile } from "./fileExtensions-DysCsxNG.js";
16
- import { r as runPython } from "./pythonUtils-DNqbnRdx.js";
17
- import { A as maybeLoadFromExternalFile, F as readFilters, I as readOutput, T as normalizeProviderRef, _ as doesProviderRefMatch, a as ComparisonEvalNotFoundError, c as mergeComparisonTables, d as deduplicateTestCases, f as extractRuntimeVars, g as checkProviderApiKeys, h as resultIsForTestCase, i as writeOutput, k as maybeLoadConfigFromExternalFile, l as fetchCsvFromGoogleSheet, m as getTestCaseDeduplicationKey, n as createOutputMetadata, o as evalTableToJson, p as filterRuntimeVars, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as setupEnv, v as getProviderDescription } from "./util-BQOCAHQC.js";
18
- import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-2wIvAhB3.js";
19
- import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DjLby-uP.js";
20
- import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-NlvH23ky.js";
21
- import { n as isBlobStorageEnabled } from "./extractor-DxyiFhPk.js";
22
- import { n as escapeRegExp, t as ellipsize } from "./text-D4lz-Jg_.js";
23
- import { n as getTraceStore } from "./store-CPh25336.js";
24
- import { n as sha256, t as randomSequence } from "./createHash-DPpsZgFF.js";
25
- import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, J as doRemoteGrading, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, R as getCustomPolicies, S as PlinyPlugin, St as getDefaultProviders, T as IntentPlugin, V as fetchHuggingFaceDataset, X as readPrompts, Y as processPrompts, Z as readProviderPromptMap, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-CgPn32yp.js";
26
- import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-D-UIbbYq.js";
27
- import { i as generateIdFromPrompt } from "./utils-kt7lv30R.js";
28
- import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-DTtUuU3l.js";
29
- import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-DJ_4A-tr.js";
30
- import { t as EvalResult } from "./evalResult-D6P5I5il.js";
31
- import { t as formatDuration } from "./formatDuration-Ch4A7G3o.js";
32
- import { n as shouldUseInkUI } from "./interactiveCheck-CCICw2cy.js";
6
+ import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-D6IBfEE0.js";
7
+ import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-DqF5N1aJ.js";
8
+ import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-DOE567Wj.js";
9
+ import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, _t as CODING_AGENT_PLUGINS, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BFevViUY.js";
10
+ import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
11
+ import { r as importModule, t as getDirectory } from "./esm-Bexx2PFc.js";
12
+ import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, t as renderEnvOnlyInObject } from "./render-CSP99NLm.js";
13
+ import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-D6UjE2JT.js";
14
+ import { a as openAuthBrowser, i as checkServerRunning, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-DLYjSFm2.js";
15
+ import { _ as makeRequest$1, c as getCloudDatabaseId, d as getOrgContext, f as getPluginSeverityOverridesFromCloud, g as isCloudProvider, h as getUserTeams, i as retrieveMedia, l as getConfigFromCloud, m as getProviderFromCloud, o as canCreateTargets, r as mediaExists, s as checkCloudPermissions, t as getMediaStorage, u as getEvalConfigFromCloud, v as resolveTeamFromIdentifier, y as resolveTeamId } from "./storage-EKVWZBNY.js";
16
+ import { n as sha256, t as randomSequence } from "./createHash-CgRvs4Fn.js";
17
+ import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-BR77mdIR.js";
18
+ import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
19
+ import { D as normalizeProviderRef, L as readFilters, M as maybeLoadFromExternalFile, R as readOutput, _ as resultIsForTestCase, a as ComparisonEvalNotFoundError, b as getProviderDescription, c as getEvalTableOutputPromptLocationsBySize, d as fetchCsvFromGoogleSheet, f as setupEnv, g as getTestCaseDeduplicationKey, h as filterRuntimeVars, i as writeOutput, j as maybeLoadConfigFromExternalFile, l as getEvalTablePromptStrippedPayload, m as extractRuntimeVars, n as createOutputMetadata, o as evalTableToJson, p as deduplicateTestCases, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as mergeComparisonTables, v as checkProviderApiKeys, y as doesProviderRefMatch } from "./util-jZRrXe1P.js";
20
+ import { a as resolveProviderConfigs, d as MCPProvider, f as HttpProvider, i as resolveProvider, m as createTransformResponse, n as loadApiProvider, p as createTransformRequest, r as loadApiProviders, t as getProviderIds } from "./providers-B7TyByfj.js";
21
+ import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
22
+ import { n as escapeRegExp, t as ellipsize } from "./text-Db-Wt2u2.js";
23
+ import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-DafUHOeh.js";
24
+ import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DODuTK-a.js";
25
+ import { n as createPlaceholderInputValue, t as buildPromptInputDescriptions } from "./inputVariables-DXFdi7AI.js";
26
+ import { a as extractPromptFromTags, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, s as getShortPluginId } from "./util-ETfU_sS9.js";
27
+ import { a as assertRemoteMaterializationHandled, l as requiresRemoteMaterialization, n as PromptfooHarmfulCompletionProvider } from "./promptfoo-BU4_0J85.js";
28
+ import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as getDefaultProviders, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as readProviderPromptMap, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, X as processPrompts, Y as doRemoteGrading, Z as readPrompts, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-BQt1BaQe.js";
29
+ import { f as redteamProviderManager, m as TokenUsageTracker } from "./shared-DNvim54U.js";
30
+ import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-n_dEb00o.js";
31
+ import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
32
+ import { n as TRANSFORM_KEYS, t as INLINE_FUNCTION_LABEL } from "./transform-BnSTnFlp.js";
33
+ import { n as getTraceStore } from "./store-DKd5592Q.js";
34
+ import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CSurUUyV.js";
35
+ import { n as isBlobStorageEnabled } from "./extractor-BVkZtk4R.js";
36
+ import { n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-4X-Wd8PG.js";
37
+ import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-B3BaNBbO.js";
38
+ import { n as sanitizeProvider, t as EvalResult } from "./evalResult-CcSqNl_Y.js";
39
+ import { n as renderPrompt } from "./evaluatorHelpers-D1_kwvyp.js";
40
+ import { t as formatDuration } from "./formatDuration-CMVNrYvE.js";
41
+ import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-CBI1brSy.js";
42
+ import { n as shouldUseInkUI } from "./interactiveCheck-C4QlIuoR.js";
33
43
  import { fileURLToPath } from "node:url";
34
44
  import { Command, InvalidArgumentError } from "commander";
35
45
  import { z } from "zod";
@@ -62,9 +72,9 @@ import process$1 from "process";
62
72
  import $RefParser from "@apidevtools/json-schema-ref-parser";
63
73
  import async from "async";
64
74
  import { and, asc, count, desc, eq, inArray, isNotNull, like, or, sql } from "drizzle-orm";
65
- import cliProgress from "cli-progress";
66
75
  import { URL as URL$1 } from "url";
67
76
  import editor from "@inquirer/editor";
77
+ import cliProgress from "cli-progress";
68
78
  import debounce from "debounce";
69
79
  import { LRUCache } from "lru-cache";
70
80
  import chokidar from "chokidar";
@@ -101,7 +111,7 @@ function runCommand(program) {
101
111
  hasGithubPr: !!cmdObj.githubPr,
102
112
  hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
103
113
  });
104
- const { executeScan } = await import("./scanner-nOCWNIXa.js");
114
+ const { executeScan } = await import("./scanner-BS-iFIp3.js");
105
115
  await executeScan(repoPath, cmdObj);
106
116
  });
107
117
  }
@@ -2995,6 +3005,10 @@ function stripAuthFromUrl(urlString) {
2995
3005
  }
2996
3006
  }
2997
3007
  async function handleEmailCollection(evalRecord) {
3008
+ if (evalRecord.author) {
3009
+ logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
3010
+ return;
3011
+ }
2998
3012
  if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
2999
3013
  let email = getUserEmail();
3000
3014
  if (!email) {
@@ -3257,6 +3271,14 @@ function shouldShareResults(opts) {
3257
3271
  return cloudConfig.isEnabled() && sharing !== false;
3258
3272
  }
3259
3273
  //#endregion
3274
+ //#region src/commands/eval/redteamWarning.ts
3275
+ function warnIfRedteamConfigHasNoTests(config, testSuite) {
3276
+ if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
3277
+ Warning: Config file has a redteam section but no test cases.
3278
+ Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
3279
+ `));
3280
+ }
3281
+ //#endregion
3260
3282
  //#region src/commands/eval/summary.ts
3261
3283
  function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
3262
3284
  if (wasAborted) {
@@ -3644,6 +3666,9 @@ function setupRetryCommand(program) {
3644
3666
  }
3645
3667
  //#endregion
3646
3668
  //#region src/models/modelAudit.ts
3669
+ function getModelAuditSortColumn(sortField) {
3670
+ return modelAuditsTable[sortField];
3671
+ }
3647
3672
  function createScanId(createdAt = /* @__PURE__ */ new Date()) {
3648
3673
  return `scan-${randomSequence(3)}-${createdAt.toISOString().slice(0, 19)}`;
3649
3674
  }
@@ -3684,8 +3709,10 @@ var ModelAudit = class ModelAudit {
3684
3709
  this.issues = data.issues || data.results?.issues || null;
3685
3710
  const issues = data.issues || data.results?.issues;
3686
3711
  const resultsHasErrors = data.results?.has_errors ?? false;
3687
- if (data.hasErrors === void 0) this.hasErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
3688
- else this.hasErrors = data.hasErrors;
3712
+ if (data.hasErrors === void 0) {
3713
+ const hasActualErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
3714
+ this.hasErrors = hasActualErrors;
3715
+ } else this.hasErrors = data.hasErrors;
3689
3716
  this.totalChecks = data.totalChecks;
3690
3717
  this.passedChecks = data.passedChecks;
3691
3718
  this.failedChecks = data.failedChecks;
@@ -3783,9 +3810,9 @@ var ModelAudit = class ModelAudit {
3783
3810
  static async getMany(limit = 100, offset = 0, sortField = "createdAt", sortOrder = "desc", search) {
3784
3811
  let query = getDb().select().from(modelAuditsTable);
3785
3812
  if (search) query = query.where(or(like(modelAuditsTable.name, `%${search}%`), like(modelAuditsTable.modelPath, `%${search}%`), like(modelAuditsTable.id, `%${search}%`)));
3786
- const sortColumn = sortField === "name" ? modelAuditsTable.name : sortField === "modelPath" ? modelAuditsTable.modelPath : modelAuditsTable.createdAt;
3787
- if (sortOrder === "asc") query = query.orderBy(asc(sortColumn));
3788
- else query = query.orderBy(desc(sortColumn));
3813
+ const sortColumn = getModelAuditSortColumn(sortField);
3814
+ if (sortOrder === "asc") query = sortField === "id" ? query.orderBy(asc(sortColumn)) : query.orderBy(asc(sortColumn), asc(modelAuditsTable.id));
3815
+ else query = sortField === "id" ? query.orderBy(desc(sortColumn)) : query.orderBy(desc(sortColumn), desc(modelAuditsTable.id));
3789
3816
  return (await query.limit(limit).offset(offset).all()).map((r) => new ModelAudit({
3790
3817
  ...r,
3791
3818
  persisted: true
@@ -4144,14 +4171,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
4144
4171
  state.resume = true;
4145
4172
  state.retryMode = true;
4146
4173
  } else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
4147
- if (!cmdObj.envPath && commandLineOptions?.envPath) {
4174
+ if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
4148
4175
  logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
4149
4176
  setupEnv(commandLineOptions.envPath);
4150
4177
  }
4151
- if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
4152
- Warning: Config file has a redteam section but no test cases.
4153
- Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
4154
- `));
4178
+ warnIfRedteamConfigHasNoTests(config, testSuite);
4155
4179
  if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
4156
4180
  const maybeUrl = config.providers[0]?.config?.url;
4157
4181
  if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
@@ -4259,7 +4283,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
4259
4283
  ${z.prettifyError(testSuiteSchema.error)}
4260
4284
 
4261
4285
  Please review your promptfooconfig.yaml configuration.`));
4262
- const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, { runtimeOptions: options }) : new Eval(config, { runtimeOptions: options });
4286
+ const author = getAuthor();
4287
+ const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
4288
+ author,
4289
+ runtimeOptions: options
4290
+ }) : new Eval(config, {
4291
+ author,
4292
+ runtimeOptions: options
4293
+ });
4263
4294
  const abortController = new AbortController();
4264
4295
  const previousAbortSignal = evaluateOptions.abortSignal;
4265
4296
  evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
@@ -6032,25 +6063,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
6032
6063
  getAssertions(_prompt) {
6033
6064
  return getHarmfulAssertions(this.harmCategory);
6034
6065
  }
6035
- promptsToTestCases(prompts) {
6066
+ async promptsToTestCases(prompts) {
6036
6067
  const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
6037
6068
  const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
6038
- return prompts.map(({ __prompt }) => {
6069
+ const pluginId = getShortPluginId(this.harmCategory);
6070
+ return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
6039
6071
  const vars = { [this.injectVar]: __prompt };
6040
- if (hasMultipleInputs) try {
6041
- const parsed = JSON.parse(__prompt);
6042
- Object.assign(vars, extractVariablesFromJson(parsed, this.config.inputs));
6043
- } catch {}
6072
+ let inputMaterialization;
6073
+ if (hasMultipleInputs) {
6074
+ let parsed;
6075
+ try {
6076
+ parsed = JSON.parse(__prompt);
6077
+ } catch (error) {
6078
+ logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
6079
+ }
6080
+ if (parsed) try {
6081
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
6082
+ materializationIndex,
6083
+ pluginId,
6084
+ provider: this.provider,
6085
+ purpose: this.purpose
6086
+ });
6087
+ Object.assign(vars, materializedVars.vars);
6088
+ inputMaterialization = materializedVars.metadata;
6089
+ } catch (error) {
6090
+ logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
6091
+ throw error;
6092
+ }
6093
+ }
6044
6094
  return {
6045
6095
  vars,
6046
6096
  metadata: {
6047
6097
  harmCategory: harmCategoryLabel,
6048
- pluginId: getShortPluginId(this.harmCategory),
6049
- pluginConfig: this.config
6098
+ pluginId,
6099
+ pluginConfig: this.config,
6100
+ ...inputMaterialization ? { inputMaterialization } : {}
6050
6101
  },
6051
6102
  assert: getHarmfulAssertions(this.harmCategory)
6052
6103
  };
6053
- });
6104
+ }));
6054
6105
  }
6055
6106
  };
6056
6107
  //#endregion
@@ -6059,20 +6110,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
6059
6110
  * Extract content from <Prompt> tags and parse JSON if inputs are defined.
6060
6111
  * Returns the processed prompt and any additional vars extracted from JSON.
6061
6112
  */
6062
- function processPromptForInputs(prompt, _injectVar, inputs) {
6113
+ async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
6063
6114
  let processedPrompt = prompt.trim();
6064
6115
  const additionalVars = {};
6116
+ let additionalMetadata;
6065
6117
  const extractedPrompt = extractPromptFromTags(processedPrompt);
6066
6118
  if (extractedPrompt) processedPrompt = extractedPrompt;
6067
- if (inputs && Object.keys(inputs).length > 0) try {
6068
- const parsed = JSON.parse(processedPrompt);
6069
- Object.assign(additionalVars, extractVariablesFromJson(parsed, inputs));
6070
- } catch {
6071
- logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode");
6119
+ if (inputs && Object.keys(inputs).length > 0) {
6120
+ let parsed;
6121
+ try {
6122
+ parsed = JSON.parse(processedPrompt);
6123
+ } catch (error) {
6124
+ logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
6125
+ }
6126
+ if (parsed) try {
6127
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
6128
+ materializationIndex,
6129
+ pluginId: plugin,
6130
+ provider,
6131
+ purpose
6132
+ });
6133
+ Object.assign(additionalVars, materializedVars.vars);
6134
+ additionalMetadata = materializedVars.metadata;
6135
+ } catch (error) {
6136
+ logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
6137
+ throw error;
6138
+ }
6072
6139
  }
6073
6140
  return {
6074
6141
  processedPrompt,
6075
- additionalVars
6142
+ additionalVars,
6143
+ additionalMetadata
6076
6144
  };
6077
6145
  }
6078
6146
  async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
@@ -6093,15 +6161,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
6093
6161
  };
6094
6162
  const allPrompts = await retryWithDeduplication(generatePrompts, n);
6095
6163
  const inputs = config?.inputs;
6096
- return sampleArray(allPrompts, n).map((prompt) => {
6097
- const { processedPrompt, additionalVars } = processPromptForInputs(prompt, injectVar, inputs);
6164
+ return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
6165
+ const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
6098
6166
  const testCase = createTestCase(injectVar, processedPrompt, plugin);
6099
6167
  if (Object.keys(additionalVars).length > 0) testCase.vars = {
6100
6168
  ...testCase.vars,
6101
6169
  ...additionalVars
6102
6170
  };
6171
+ if (additionalMetadata) testCase.metadata = {
6172
+ ...testCase.metadata,
6173
+ inputMaterialization: additionalMetadata
6174
+ };
6103
6175
  return testCase;
6104
- });
6176
+ }));
6105
6177
  }
6106
6178
  //#endregion
6107
6179
  //#region src/redteam/plugins/teenSafety/graderExamples.ts
@@ -6419,7 +6491,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
6419
6491
  function computeModifiersFromConfig(config) {
6420
6492
  const modifiers = { ...config?.modifiers };
6421
6493
  if (config?.language && typeof config.language === "string") modifiers.language = config.language;
6422
- if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
6494
+ if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
6423
6495
  const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
6424
6496
  if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
6425
6497
  return modifiers;
@@ -6577,6 +6649,7 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
6577
6649
  logger.error(`Error generating test cases for ${key}: ${statusText} ${JSON.stringify(data)}`);
6578
6650
  return [];
6579
6651
  }
6652
+ if (requiresRemoteMaterialization(config?.inputs)) assertRemoteMaterializationHandled(data, `Remote plugin generation for ${key}`);
6580
6653
  const ret = data.result;
6581
6654
  logger.debug(`Received remote generation for ${key}:\n${JSON.stringify(ret)}`);
6582
6655
  return ret;
@@ -6595,13 +6668,14 @@ function createPluginFactory(PluginClass, key, validate) {
6595
6668
  logger.debug(`Using local redteam generation for ${key}`);
6596
6669
  return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
6597
6670
  }
6671
+ const pluginId = getShortPluginId(key);
6598
6672
  const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
6599
6673
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
6600
6674
  return testCases.map((testCase) => ({
6601
6675
  ...testCase,
6602
6676
  metadata: {
6603
6677
  ...testCase.metadata,
6604
- pluginId: getShortPluginId(key),
6678
+ pluginId,
6605
6679
  pluginConfig: {
6606
6680
  ...configWithDefaults,
6607
6681
  modifiers: computedModifiers
@@ -6658,7 +6732,7 @@ const pluginFactories = [
6658
6732
  key: category,
6659
6733
  action: async (params) => {
6660
6734
  if (neverGenerateRemote()) {
6661
- logger.error(`${category} plugin requires remote generation to be enabled`);
6735
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
6662
6736
  return [];
6663
6737
  }
6664
6738
  const testCases = await getHarmfulTests(params, category);
@@ -6681,13 +6755,14 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
6681
6755
  key: category,
6682
6756
  action: async (params) => {
6683
6757
  if (shouldGenerateRemote()) {
6758
+ const pluginId = getShortPluginId(category);
6684
6759
  const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
6685
6760
  const computedModifiers = computeModifiersFromConfig(params.config);
6686
6761
  return testCases.map((testCase) => ({
6687
6762
  ...testCase,
6688
6763
  metadata: {
6689
6764
  ...testCase.metadata,
6690
- pluginId: getShortPluginId(category),
6765
+ pluginId,
6691
6766
  pluginConfig: {
6692
6767
  ...params.config,
6693
6768
  modifiers: computedModifiers
@@ -6709,16 +6784,17 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
6709
6784
  key: category,
6710
6785
  action: async (params) => {
6711
6786
  if (neverGenerateRemote()) {
6712
- logger.error(`${category} plugin requires remote generation to be enabled`);
6787
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
6713
6788
  return [];
6714
6789
  }
6790
+ const pluginId = getShortPluginId(category);
6715
6791
  const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
6716
6792
  const computedModifiers = computeModifiersFromConfig(params.config);
6717
6793
  return testCases.map((testCase) => ({
6718
6794
  ...testCase,
6719
6795
  metadata: {
6720
6796
  ...testCase.metadata,
6721
- pluginId: getShortPluginId(category),
6797
+ pluginId,
6722
6798
  pluginConfig: {
6723
6799
  ...params.config,
6724
6800
  modifiers: computedModifiers
@@ -6734,16 +6810,17 @@ function createRemotePlugin(key, validate) {
6734
6810
  action: async ({ purpose, injectVar, n, config }) => {
6735
6811
  const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
6736
6812
  if (neverGenerateRemote()) {
6737
- logger.error(`${key} plugin requires remote generation to be enabled`);
6813
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
6738
6814
  return [];
6739
6815
  }
6816
+ const pluginId = getShortPluginId(key);
6740
6817
  const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
6741
6818
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
6742
6819
  const testsWithMetadata = testCases.map((testCase) => ({
6743
6820
  ...testCase,
6744
6821
  metadata: {
6745
6822
  ...testCase.metadata,
6746
- pluginId: getShortPluginId(key),
6823
+ pluginId,
6747
6824
  pluginConfig: {
6748
6825
  ...configWithDefaults,
6749
6826
  modifiers: computedModifiers
@@ -6804,6 +6881,15 @@ async function validateSharpDependency(strategies, plugins, checkSharp = isSharp
6804
6881
  }
6805
6882
  //#endregion
6806
6883
  //#region src/redteam/index.ts
6884
+ const MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY = "__promptfooMaterializedMultiInputPrompt";
6885
+ function getMaterializedMultiInputPromptSnapshot(metadata) {
6886
+ const snapshot = metadata?.[MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY];
6887
+ return typeof snapshot === "string" ? snapshot : void 0;
6888
+ }
6889
+ function getMaterializedMultiInputPromptMetadata(vars) {
6890
+ const prompt = vars?.[MULTI_INPUT_VAR];
6891
+ return typeof prompt === "string" ? { [MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY]: prompt } : void 0;
6892
+ }
6807
6893
  function getPolicyText(metadata) {
6808
6894
  if (!metadata || metadata.policy === void 0 || metadata.policy === null) return;
6809
6895
  const policyValue = metadata.policy;
@@ -6813,6 +6899,44 @@ function getPolicyText(metadata) {
6813
6899
  return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
6814
6900
  }
6815
6901
  }
6902
+ async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
6903
+ const inputs = testCase.metadata?.pluginConfig?.inputs;
6904
+ const inputMaterialization = testCase.metadata?.inputMaterialization;
6905
+ const materializedPromptSnapshot = getMaterializedMultiInputPromptSnapshot(testCase.metadata);
6906
+ const currentInjectVar = testCase.vars?.[injectVar];
6907
+ if (!inputs || Object.keys(inputs).length === 0 || !currentInjectVar) return {
6908
+ inputMaterialization,
6909
+ vars: testCase.vars
6910
+ };
6911
+ const promptChangedSinceMaterialization = typeof currentInjectVar === "string" && typeof materializedPromptSnapshot === "string" && currentInjectVar !== materializedPromptSnapshot;
6912
+ if (Boolean(inputMaterialization) && Object.keys(inputs).every((key) => Object.prototype.hasOwnProperty.call(testCase.vars ?? {}, key)) && !promptChangedSinceMaterialization) return {
6913
+ inputMaterialization,
6914
+ vars: testCase.vars
6915
+ };
6916
+ try {
6917
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(currentInjectVar)), inputs, {
6918
+ materializationIndex,
6919
+ pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
6920
+ provider,
6921
+ purpose
6922
+ });
6923
+ return {
6924
+ inputMaterialization: materializedVars.metadata ? {
6925
+ ...inputMaterialization,
6926
+ ...materializedVars.metadata
6927
+ } : inputMaterialization,
6928
+ vars: {
6929
+ ...testCase.vars,
6930
+ ...materializedVars.vars
6931
+ }
6932
+ };
6933
+ } catch {
6934
+ return {
6935
+ inputMaterialization,
6936
+ vars: testCase.vars
6937
+ };
6938
+ }
6939
+ }
6816
6940
  /**
6817
6941
  * Gets the severity level for a plugin based on its ID and configuration.
6818
6942
  * @param pluginId - The ID of the plugin.
@@ -6954,6 +7078,7 @@ const categories = {
6954
7078
  foundation: FOUNDATION_PLUGINS,
6955
7079
  harmful: Object.keys(HARM_PLUGINS),
6956
7080
  "coding-agent:core": CODING_AGENT_CORE_PLUGINS,
7081
+ "coding-agent:all": CODING_AGENT_PLUGINS,
6957
7082
  bias: BIAS_PLUGINS,
6958
7083
  pii: PII_PLUGINS,
6959
7084
  medical: MEDICAL_PLUGINS,
@@ -7021,7 +7146,8 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
7021
7146
  ...test.metadata?.modifiers,
7022
7147
  ...languageToAdd
7023
7148
  },
7024
- ...languageToAdd
7149
+ ...languageToAdd,
7150
+ ...getMaterializedMultiInputPromptMetadata(test.vars)
7025
7151
  }
7026
7152
  };
7027
7153
  }
@@ -7048,7 +7174,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
7048
7174
  * @param injectVar - The variable to inject.
7049
7175
  * @returns An array of new test cases generated by strategies.
7050
7176
  */
7051
- async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
7177
+ async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
7052
7178
  const newTestCases = [];
7053
7179
  const strategyResults = {};
7054
7180
  for (const strategy of strategies) {
@@ -7104,14 +7230,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
7104
7230
  }
7105
7231
  }
7106
7232
  resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
7107
- newTestCases.push(...resultTestCases.map((t) => {
7108
- const inputs = t?.metadata?.pluginConfig?.inputs;
7109
- let updatedVars = t.vars;
7110
- if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
7111
- const parsed = JSON.parse(String(t.vars[injectVar]));
7112
- updatedVars = { ...t.vars };
7113
- Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
7114
- } catch {}
7233
+ newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
7234
+ const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
7115
7235
  const strategyConfig = {
7116
7236
  ...strategy.config || {},
7117
7237
  ...maxCharsPerMessage ? { maxCharsPerMessage } : {},
@@ -7119,16 +7239,18 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
7119
7239
  };
7120
7240
  return {
7121
7241
  ...t,
7122
- vars: updatedVars,
7242
+ vars,
7123
7243
  metadata: {
7124
7244
  ...t?.metadata || {},
7125
7245
  ...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
7126
7246
  ...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
7127
7247
  ...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
7128
- ...Object.keys(strategyConfig).length > 0 && { strategyConfig }
7248
+ ...inputMaterialization && { inputMaterialization },
7249
+ ...Object.keys(strategyConfig).length > 0 && { strategyConfig },
7250
+ ...getMaterializedMultiInputPromptMetadata(vars)
7129
7251
  }
7130
7252
  };
7131
- }));
7253
+ })));
7132
7254
  const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
7133
7255
  const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
7134
7256
  const applyNumTestsCap = (calculatedRequested) => {
@@ -7471,14 +7593,21 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
7471
7593
  }
7472
7594
  });
7473
7595
  const languageResults = await Promise.allSettled(languagePromises);
7474
- for (const result of languageResults) if (result.status === "fulfilled") {
7596
+ for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
7475
7597
  const { lang, tests, requested, generated } = result.value;
7476
7598
  allPluginTests.push(...tests);
7477
7599
  resultsPerLanguage[lang || "default"] = {
7478
7600
  requested,
7479
7601
  generated
7480
7602
  };
7481
- } else logger.warn(`[Language Processing] Error generating tests for ${plugin.id}: ${result.reason}`);
7603
+ } else {
7604
+ const lang = languages[index];
7605
+ logger.warn(`[Language Processing] Error generating tests for ${plugin.id}: ${result.reason}`);
7606
+ resultsPerLanguage[lang || "default"] = {
7607
+ requested: plugin.numTests,
7608
+ generated: 0
7609
+ };
7610
+ }
7482
7611
  logger.debug(`[Language Processing] Total tests generated for ${plugin.id}: ${allPluginTests.length} (across ${languages.length} language(s))`);
7483
7612
  if (!Array.isArray(allPluginTests) || allPluginTests.length === 0) logger.warn(`Failed to generate tests for ${plugin.id}`);
7484
7613
  else {
@@ -7512,35 +7641,51 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
7512
7641
  generated: allPluginTests.length
7513
7642
  };
7514
7643
  } else if (plugin.id.startsWith("file://")) try {
7515
- const customTests = await new CustomPlugin(redteamProvider, purpose, injectVar, plugin.id, resolvePluginConfigWithMaxChars(plugin.config, maxCharsPerMessage)).generateTests(plugin.numTests, delay);
7516
- const testCasesWithMetadata = filterOversizedTestCases(customTests.map((t) => {
7517
- const includePluginConfig = !(t.metadata && Object.hasOwn(t.metadata, "pluginConfig") && t.metadata.pluginConfig === void 0);
7518
- const pluginConfigWithMaxChars = {
7644
+ const languageConfig = plugin.config?.language ?? language;
7645
+ const languages = Array.isArray(languageConfig) ? languageConfig : languageConfig ? [languageConfig] : [void 0];
7646
+ const allCustomTests = [];
7647
+ const resultsPerLanguage = {};
7648
+ const languagePromises = languages.map(async (lang) => {
7649
+ const resolvedConfig = {
7519
7650
  ...resolvePluginConfigWithMaxChars(plugin.config, maxCharsPerMessage),
7520
- ...t.metadata?.pluginConfig ?? {}
7651
+ ...lang ? { language: lang } : {},
7652
+ ...hasMultipleInputs ? { inputs } : {}
7521
7653
  };
7522
- const modifiers = {
7523
- ...buildRedteamModifiers({
7654
+ const customPluginConfig = {
7655
+ ...resolvedConfig,
7656
+ modifiers: buildRedteamModifiers({
7524
7657
  maxCharsPerMessage,
7525
- pluginConfig: pluginConfigWithMaxChars,
7658
+ pluginConfig: resolvedConfig,
7526
7659
  testGenerationInstructions
7527
- }),
7528
- ...t.metadata?.modifiers
7660
+ })
7529
7661
  };
7662
+ const testCasesWithMetadata = filterOversizedTestCases((await new CustomPlugin(redteamProvider, purpose, injectVar, plugin.id, customPluginConfig).generateTests(plugin.numTests, delay)).map((t) => addLanguageToPluginMetadata(t, lang, plugin, maxCharsPerMessage, testGenerationInstructions)), injectVar, `Custom plugin ${plugin.id}`, maxCharsPerMessage);
7530
7663
  return {
7531
- ...t,
7532
- metadata: {
7533
- ...t.metadata || {},
7534
- pluginId: plugin.id,
7535
- ...includePluginConfig && { pluginConfig: pluginConfigWithMaxChars },
7536
- severity: plugin.severity || getPluginSeverity(plugin.id, resolvePluginConfig(plugin.config)),
7537
- modifiers
7538
- }
7664
+ lang,
7665
+ tests: testCasesWithMetadata,
7666
+ requested: plugin.numTests,
7667
+ generated: testCasesWithMetadata.length
7539
7668
  };
7540
- }), injectVar, `Custom plugin ${plugin.id}`, maxCharsPerMessage);
7669
+ });
7670
+ const languageResults = await Promise.allSettled(languagePromises);
7671
+ for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
7672
+ const { lang, tests, requested, generated } = result.value;
7673
+ allCustomTests.push(...tests);
7674
+ resultsPerLanguage[lang || "default"] = {
7675
+ requested,
7676
+ generated
7677
+ };
7678
+ } else {
7679
+ const lang = languages[index];
7680
+ logger.warn(`[Language Processing] Error generating tests for custom plugin ${plugin.id}: ${result.reason}`);
7681
+ resultsPerLanguage[lang || "default"] = {
7682
+ requested: plugin.numTests,
7683
+ generated: 0
7684
+ };
7685
+ }
7541
7686
  if (needsGoalExtraction) {
7542
- logger.debug(`Extracting goal for ${testCasesWithMetadata.length} custom tests from ${plugin.id}...`);
7543
- for (const testCase of testCasesWithMetadata) {
7687
+ logger.debug(`Extracting goal for ${allCustomTests.length} custom tests from ${plugin.id}...`);
7688
+ for (const testCase of allCustomTests) {
7544
7689
  const promptVar = testCase.vars?.[injectVar];
7545
7690
  const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
7546
7691
  const policy = getPolicyText(testCase.metadata);
@@ -7548,13 +7693,21 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
7548
7693
  testCase.metadata.goal = extractedGoal;
7549
7694
  }
7550
7695
  }
7551
- testCases.push(...testCasesWithMetadata);
7552
- logger.debug(`Added ${customTests.length} custom test cases from ${plugin.id}`);
7553
- const displayId = getPluginDisplayId(plugin);
7554
- pluginResults[displayId] = {
7555
- requested: plugin.numTests,
7556
- generated: testCasesWithMetadata.length
7696
+ testCases.push(...allCustomTests);
7697
+ logger.debug(`Added ${allCustomTests.length} custom test cases from ${plugin.id}`);
7698
+ const baseDisplayId = getPluginDisplayId(plugin);
7699
+ if (languages.filter((lang) => lang !== void 0).length > 1) for (const [langKey, result] of Object.entries(resultsPerLanguage)) {
7700
+ const displayId = langKey === "en" ? baseDisplayId : `(${langKey}) ${baseDisplayId}`;
7701
+ pluginResults[displayId] = {
7702
+ requested: result.requested,
7703
+ generated: result.generated
7704
+ };
7705
+ }
7706
+ else pluginResults[baseDisplayId] = {
7707
+ requested: plugin.numTests * languages.length,
7708
+ generated: allCustomTests.length
7557
7709
  };
7710
+ progressBar?.increment(plugin.numTests * languages.length);
7558
7711
  } catch (e) {
7559
7712
  logger.error(`Error generating tests for custom plugin ${plugin.id}: ${e}`);
7560
7713
  const displayId = getPluginDisplayId(plugin);
@@ -7583,7 +7736,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
7583
7736
  targetIds,
7584
7737
  ...retryStrategy.config
7585
7738
  };
7586
- const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
7739
+ const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
7587
7740
  pluginTestCases.push(...retryTestCases);
7588
7741
  Object.assign(strategyResults, retryResults);
7589
7742
  if (showProgressBar) progressBar?.increment(retryTestCases.length);
@@ -7591,7 +7744,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
7591
7744
  checkAbort();
7592
7745
  const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
7593
7746
  if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
7594
- const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
7747
+ const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
7595
7748
  Object.assign(strategyResults, otherStrategyResults);
7596
7749
  if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
7597
7750
  const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
@@ -8346,6 +8499,12 @@ async function doRedteamRun(options) {
8346
8499
  return evalResult;
8347
8500
  }
8348
8501
  //#endregion
8502
+ //#region src/types/transform.ts
8503
+ /** Runtime type guard for `TransformFunction` values. */
8504
+ function isTransformFunction(value) {
8505
+ return typeof value === "function";
8506
+ }
8507
+ //#endregion
8349
8508
  //#region src/index.ts
8350
8509
  /**
8351
8510
  * Shallow-clone a test case so the caller can swap in resolved ApiProvider
@@ -8367,66 +8526,148 @@ function cloneTestForResolve(test) {
8367
8526
  if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
8368
8527
  return cloned;
8369
8528
  }
8529
+ function toSerializableProviderRef(provider) {
8530
+ if (isApiProvider(provider)) return sanitizeProvider(provider);
8531
+ if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
8532
+ return provider;
8533
+ }
8534
+ function isRecord(value) {
8535
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
8536
+ }
8537
+ function withSerializableProvider(record) {
8538
+ if (!isApiProvider(record.provider)) return record;
8539
+ return {
8540
+ ...record,
8541
+ provider: sanitizeProvider(record.provider)
8542
+ };
8543
+ }
8544
+ /**
8545
+ * Function-valued transforms are first-class at runtime but are silently dropped
8546
+ * by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
8547
+ * a function reference, so replace every `transform`-like field with a
8548
+ * `[inline function]: name` marker. Non-function values pass through unchanged.
8549
+ *
8550
+ * `droppedRef.value` is flipped to `true` the first time a function is replaced
8551
+ * so the caller can emit a single warning instead of logging per field.
8552
+ */
8553
+ function replaceFunctionTransforms(record, droppedRef) {
8554
+ let result;
8555
+ for (const key of TRANSFORM_KEYS) {
8556
+ const value = record[key];
8557
+ if (!isTransformFunction(value)) continue;
8558
+ if (!result) result = { ...record };
8559
+ result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
8560
+ droppedRef.value = true;
8561
+ }
8562
+ return result ?? record;
8563
+ }
8564
+ function toSerializableAssertion(assertion, droppedRef) {
8565
+ if (!isRecord(assertion)) return assertion;
8566
+ let sanitizedAssertion = withSerializableProvider(assertion);
8567
+ sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
8568
+ if (Array.isArray(assertion.assert)) sanitizedAssertion = {
8569
+ ...sanitizedAssertion,
8570
+ assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
8571
+ };
8572
+ return sanitizedAssertion;
8573
+ }
8574
+ function toSerializableTestCase(test, droppedRef) {
8575
+ if (!isRecord(test)) return test;
8576
+ let sanitizedTest = withSerializableProvider(test);
8577
+ if (isRecord(test.options)) {
8578
+ let options = withSerializableProvider(test.options);
8579
+ options = replaceFunctionTransforms(options, droppedRef);
8580
+ if (options !== test.options) sanitizedTest = {
8581
+ ...sanitizedTest,
8582
+ options
8583
+ };
8584
+ }
8585
+ if (Array.isArray(test.assert)) sanitizedTest = {
8586
+ ...sanitizedTest,
8587
+ assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
8588
+ };
8589
+ return sanitizedTest;
8590
+ }
8591
+ function toSerializableScenario(scenario, droppedRef) {
8592
+ if (!isRecord(scenario)) return scenario;
8593
+ if (!Array.isArray(scenario.tests)) return scenario;
8594
+ return {
8595
+ ...scenario,
8596
+ tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
8597
+ };
8598
+ }
8599
+ function createSerializableUnifiedConfig(testSuite, prompts) {
8600
+ const droppedRef = { value: false };
8601
+ const config = {
8602
+ ...testSuite,
8603
+ providers: toSerializableProviderRef(testSuite.providers),
8604
+ defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
8605
+ tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
8606
+ scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
8607
+ prompts
8608
+ };
8609
+ if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
8610
+ return config;
8611
+ }
8370
8612
  async function evaluate(testSuite, options = {}) {
8371
- if (testSuite.writeLatestResults) await runDbMigrations();
8372
- const loadedProviders = await loadApiProviders(testSuite.providers, { env: testSuite.env });
8613
+ const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
8614
+ if (testSuiteConfig.writeLatestResults) await runDbMigrations();
8615
+ const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
8373
8616
  const providerMap = {};
8374
8617
  for (const p of loadedProviders) {
8375
8618
  providerMap[p.id()] = p;
8376
8619
  if (p.label) providerMap[p.label] = p;
8377
8620
  }
8378
- let resolvedDefaultTest = testSuite.defaultTest;
8379
- if (typeof testSuite.defaultTest === "string" && testSuite.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuite.defaultTest);
8621
+ let resolvedDefaultTest = testSuiteConfig.defaultTest;
8622
+ if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
8380
8623
  const constructedTestSuite = {
8381
- ...testSuite,
8624
+ ...testSuiteConfig,
8382
8625
  defaultTest: resolvedDefaultTest,
8383
- scenarios: testSuite.scenarios,
8626
+ scenarios: testSuiteConfig.scenarios,
8384
8627
  providers: loadedProviders,
8385
- tests: await readTests(testSuite.tests),
8386
- nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
8387
- prompts: await processPrompts(testSuite.prompts)
8628
+ tests: await readTests(testSuiteConfig.tests),
8629
+ nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
8630
+ prompts: await processPrompts(testSuiteConfig.prompts)
8388
8631
  };
8389
8632
  if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
8390
8633
  constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
8391
8634
  if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
8392
- env: testSuite.env,
8635
+ env: testSuiteConfig.env,
8393
8636
  basePath: state.basePath
8394
8637
  });
8395
8638
  if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
8396
- env: testSuite.env,
8639
+ env: testSuiteConfig.env,
8397
8640
  basePath: state.basePath
8398
8641
  });
8399
8642
  }
8400
8643
  constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
8401
8644
  for (const test of constructedTestSuite.tests) {
8402
8645
  if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
8403
- env: testSuite.env,
8646
+ env: testSuiteConfig.env,
8404
8647
  basePath: state.basePath
8405
8648
  });
8406
8649
  for (const assertion of test.assert || []) {
8407
8650
  if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
8408
8651
  if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
8409
- env: testSuite.env,
8652
+ env: testSuiteConfig.env,
8410
8653
  basePath: state.basePath
8411
8654
  });
8412
8655
  }
8413
8656
  }
8414
8657
  if (options.cache === false) disableCache();
8415
- const parsedProviderPromptMap = readProviderPromptMap(testSuite, constructedTestSuite.prompts);
8416
- const unifiedConfig = {
8417
- ...testSuite,
8418
- prompts: constructedTestSuite.prompts
8419
- };
8420
- const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
8658
+ const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
8659
+ const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
8660
+ const author = getAuthor(suiteAuthor);
8661
+ const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
8421
8662
  const ret = await evaluate$1({
8422
8663
  ...constructedTestSuite,
8423
8664
  providerPromptMap: parsedProviderPromptMap
8424
8665
  }, evalRecord, {
8425
8666
  eventSource: "library",
8426
- isRedteam: Boolean(testSuite.redteam),
8667
+ isRedteam: Boolean(testSuiteConfig.redteam),
8427
8668
  ...options
8428
8669
  });
8429
- if (testSuite.writeLatestResults && testSuite.sharing) if (isSharingEnabled(ret)) try {
8670
+ if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
8430
8671
  const shareableUrl = await createShareableUrl(ret, { silent: true });
8431
8672
  if (shareableUrl) {
8432
8673
  ret.shareableUrl = shareableUrl;
@@ -8437,9 +8678,9 @@ async function evaluate(testSuite, options = {}) {
8437
8678
  logger.warn(`Failed to create shareable URL: ${error}`);
8438
8679
  }
8439
8680
  else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
8440
- if (testSuite.outputPath) {
8441
- if (typeof testSuite.outputPath === "string") await writeOutput(testSuite.outputPath, evalRecord, null);
8442
- else if (Array.isArray(testSuite.outputPath)) await writeMultipleOutputs(testSuite.outputPath, evalRecord, null);
8681
+ if (testSuiteConfig.outputPath) {
8682
+ if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
8683
+ else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
8443
8684
  }
8444
8685
  return ret;
8445
8686
  }
@@ -8668,6 +8909,58 @@ function setDownloadHeaders(res, fileName, contentType) {
8668
8909
  //#region src/server/routes/eval.ts
8669
8910
  const evalRouter = Router();
8670
8911
  const evalJobs = /* @__PURE__ */ new Map();
8912
+ function sendEvalTableResponse(res, evalId, responsePayload) {
8913
+ try {
8914
+ res.json(responsePayload);
8915
+ } catch (error) {
8916
+ if (!(error instanceof RangeError)) throw error;
8917
+ logger.warn("[GET /:id/table] Response too large, stripping per-cell prompts by size", { evalId });
8918
+ const promptLocations = getEvalTableOutputPromptLocationsBySize(responsePayload);
8919
+ if (promptLocations.length === 0) {
8920
+ logger.error("[GET /:id/table] Response too large and has no prompts to strip", { evalId });
8921
+ res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
8922
+ return;
8923
+ }
8924
+ const tryStringifyWithStrippedPrompts = (promptCountToStrip) => {
8925
+ const responseWithoutPrompts = getEvalTablePromptStrippedPayload(responsePayload, promptLocations, promptCountToStrip);
8926
+ try {
8927
+ const responseBody = JSON.stringify(responseWithoutPrompts);
8928
+ invariant(typeof responseBody === "string", "Eval table response must serialize to JSON");
8929
+ return responseBody;
8930
+ } catch (retryError) {
8931
+ if (!(retryError instanceof RangeError)) throw retryError;
8932
+ return null;
8933
+ }
8934
+ };
8935
+ let lowerBound = 0;
8936
+ let upperBound = 1;
8937
+ let responseBody = null;
8938
+ while (upperBound < promptLocations.length) {
8939
+ responseBody = tryStringifyWithStrippedPrompts(upperBound);
8940
+ if (responseBody) break;
8941
+ lowerBound = upperBound;
8942
+ upperBound *= 2;
8943
+ }
8944
+ if (!responseBody) {
8945
+ upperBound = promptLocations.length;
8946
+ responseBody = tryStringifyWithStrippedPrompts(upperBound);
8947
+ }
8948
+ if (responseBody) {
8949
+ while (upperBound - lowerBound > 1) {
8950
+ const midPoint = lowerBound + Math.floor((upperBound - lowerBound) / 2);
8951
+ const midpointResponseBody = tryStringifyWithStrippedPrompts(midPoint);
8952
+ if (midpointResponseBody) {
8953
+ upperBound = midPoint;
8954
+ responseBody = midpointResponseBody;
8955
+ } else lowerBound = midPoint;
8956
+ }
8957
+ res.type("json").send(responseBody);
8958
+ return;
8959
+ }
8960
+ logger.error("[GET /:id/table] Response still too large after stripping prompts", { evalId });
8961
+ res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
8962
+ }
8963
+ }
8671
8964
  evalRouter.post("/job", (req, res) => {
8672
8965
  const result = EvalSchemas.CreateJob.Request.safeParse(req.body);
8673
8966
  if (!result.success) {
@@ -8708,9 +9001,10 @@ evalRouter.post("/job", (req, res) => {
8708
9001
  job.evalId = evalResult.id;
8709
9002
  console.log(`[${id}] Complete`);
8710
9003
  }).catch((error) => {
8711
- logger.error(dedent`Failed to eval tests:
8712
- Error: ${error}
8713
- Body: ${JSON.stringify(req.body, null, 2)}`);
9004
+ logger.error("Failed to eval tests", {
9005
+ error,
9006
+ body: sanitizeObject(testSuite, { context: "request body" })
9007
+ });
8714
9008
  const job = evalJobs.get(id);
8715
9009
  invariant(job, "Job not found");
8716
9010
  job.status = "error";
@@ -8907,7 +9201,7 @@ evalRouter.get("/:id/table", async (req, res) => {
8907
9201
  evalId: id
8908
9202
  });
8909
9203
  }
8910
- res.json({
9204
+ sendEvalTableResponse(res, id, {
8911
9205
  table: returnTable,
8912
9206
  totalCount: table.totalCount,
8913
9207
  filteredCount: table.filteredCount,
@@ -9073,43 +9367,53 @@ evalRouter.post("/:evalId/results/:id/rating", async (req, res) => {
9073
9367
  res.status(400).json({ error: z.prettifyError(bodyResult.error) });
9074
9368
  return;
9075
9369
  }
9076
- const { id } = paramsResult.data;
9077
- const gradingResult = bodyResult.data;
9078
- const result = await EvalResult.findById(id);
9079
- invariant(result, "Result not found");
9080
- const eval_ = await Eval.findById(result.evalId);
9081
- invariant(eval_, "Eval not found");
9082
- const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
9083
- const successChanged = result.success !== gradingResult.pass;
9084
- const scoreChange = gradingResult.score - result.score;
9085
- result.gradingResult = gradingResult;
9086
- result.success = gradingResult.pass;
9087
- result.score = gradingResult.score;
9088
- const prompt = eval_.prompts[result.promptIdx];
9089
- invariant(prompt, "Prompt not found");
9090
- if (!prompt.metrics) {
9091
- logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
9092
- res.status(400).json({ error: "Prompt metrics not found" });
9093
- return;
9370
+ try {
9371
+ const { evalId, id } = paramsResult.data;
9372
+ const gradingResult = bodyResult.data;
9373
+ const result = await EvalResult.findById(id);
9374
+ if (!result || result.evalId !== evalId) {
9375
+ res.status(404).json({ error: "Result not found" });
9376
+ return;
9377
+ }
9378
+ const eval_ = await Eval.findById(evalId);
9379
+ if (!eval_) {
9380
+ res.status(404).json({ error: "Eval not found" });
9381
+ return;
9382
+ }
9383
+ const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
9384
+ const successChanged = result.success !== gradingResult.pass;
9385
+ const scoreChange = gradingResult.score - result.score;
9386
+ result.gradingResult = gradingResult;
9387
+ result.success = gradingResult.pass;
9388
+ result.score = gradingResult.score;
9389
+ const prompt = eval_.prompts[result.promptIdx];
9390
+ invariant(prompt, "Prompt not found");
9391
+ if (!prompt.metrics) {
9392
+ logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
9393
+ res.status(400).json({ error: "Prompt metrics not found" });
9394
+ return;
9395
+ }
9396
+ if (successChanged) if (result.success) {
9397
+ prompt.metrics.testPassCount += 1;
9398
+ prompt.metrics.testFailCount -= 1;
9399
+ prompt.metrics.assertPassCount += 1;
9400
+ prompt.metrics.score += scoreChange;
9401
+ if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
9402
+ } else {
9403
+ prompt.metrics.testPassCount -= 1;
9404
+ prompt.metrics.testFailCount += 1;
9405
+ prompt.metrics.assertFailCount += 1;
9406
+ prompt.metrics.score += scoreChange;
9407
+ if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
9408
+ }
9409
+ else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
9410
+ else prompt.metrics.assertFailCount += 1;
9411
+ await eval_.save();
9412
+ await result.save();
9413
+ res.json(result);
9414
+ } catch (error) {
9415
+ sendError(res, 500, "Failed to submit rating", error);
9094
9416
  }
9095
- if (successChanged) if (result.success) {
9096
- prompt.metrics.testPassCount += 1;
9097
- prompt.metrics.testFailCount -= 1;
9098
- prompt.metrics.assertPassCount += 1;
9099
- prompt.metrics.score += scoreChange;
9100
- if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
9101
- } else {
9102
- prompt.metrics.testPassCount -= 1;
9103
- prompt.metrics.testFailCount += 1;
9104
- prompt.metrics.assertFailCount += 1;
9105
- prompt.metrics.score += scoreChange;
9106
- if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
9107
- }
9108
- else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
9109
- else prompt.metrics.assertFailCount += 1;
9110
- await eval_.save();
9111
- await result.save();
9112
- res.json(result);
9113
9417
  });
9114
9418
  evalRouter.post("/", async (req, res) => {
9115
9419
  const bodyResult = EvalSchemas.Save.Request.safeParse(req.body);
@@ -9143,9 +9447,10 @@ evalRouter.post("/", async (req, res) => {
9143
9447
  res.json(EvalSchemas.Save.Response.parse({ id: eval_.id }));
9144
9448
  }
9145
9449
  } catch (error) {
9146
- logger.error(dedent`Failed to write eval to database:
9147
- Error: ${error}
9148
- Body: ${JSON.stringify(body, null, 2)}`);
9450
+ logger.error("Failed to write eval to database", {
9451
+ error,
9452
+ body: sanitizeObject(body, { context: "request body" })
9453
+ });
9149
9454
  res.status(500).json({ error: "Failed to write eval to database" });
9150
9455
  }
9151
9456
  });
@@ -9363,7 +9668,7 @@ async function checkForUpdates() {
9363
9668
  } catch {
9364
9669
  return false;
9365
9670
  }
9366
- if (semverGt(latestVersion, "0.121.5")) {
9671
+ if (semverGt(latestVersion, "0.121.8")) {
9367
9672
  const border = "=".repeat(TERMINAL_MAX_WIDTH);
9368
9673
  logger.info(`\n${border}
9369
9674
  ${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
@@ -9522,6 +9827,9 @@ const ModelAuditCliOptionsSchema = z.object({
9522
9827
  dryRun: z.boolean().optional(),
9523
9828
  cache: z.boolean().optional(),
9524
9829
  stream: z.boolean().optional(),
9830
+ scanners: z.array(z.string()).optional(),
9831
+ excludeScanner: z.array(z.string()).optional(),
9832
+ listScanners: z.boolean().optional(),
9525
9833
  share: z.boolean().optional(),
9526
9834
  noShare: z.boolean().optional()
9527
9835
  });
@@ -9609,6 +9917,18 @@ const CLI_ARG_MAP = {
9609
9917
  stream: {
9610
9918
  flag: "--stream",
9611
9919
  type: "boolean"
9920
+ },
9921
+ scanners: {
9922
+ flag: "--scanners",
9923
+ type: "array"
9924
+ },
9925
+ excludeScanner: {
9926
+ flag: "--exclude-scanner",
9927
+ type: "array"
9928
+ },
9929
+ listScanners: {
9930
+ flag: "--list-scanners",
9931
+ type: "boolean"
9612
9932
  }
9613
9933
  };
9614
9934
  /**
@@ -9777,6 +10097,62 @@ function spawnModelAudit(args, options) {
9777
10097
  });
9778
10098
  });
9779
10099
  }
10100
+ function collectRepeatableOption(value, previous = []) {
10101
+ return [...previous, value];
10102
+ }
10103
+ function hasScannerSelectionOptions(options) {
10104
+ return Boolean(options.scanners?.length || options.excludeScanner?.length);
10105
+ }
10106
+ function hasScannerSelectionValue(value) {
10107
+ if (Array.isArray(value)) return value.length > 0;
10108
+ return typeof value === "string" && value.trim().length > 0;
10109
+ }
10110
+ function hasPersistedScannerSelection(metadata) {
10111
+ const options = metadata?.options;
10112
+ if (!options || typeof options !== "object" || Array.isArray(options)) return false;
10113
+ return hasScannerSelectionValue(options.scanners) || hasScannerSelectionValue(options.excludeScanner);
10114
+ }
10115
+ /**
10116
+ * Parse CLI options through Zod, logging validation errors to the CLI.
10117
+ * Returns null when validation fails (and sets process.exitCode to 1).
10118
+ */
10119
+ function buildCliArgs(paths, cliOptions) {
10120
+ try {
10121
+ return parseModelAuditArgs(paths, cliOptions);
10122
+ } catch (error) {
10123
+ if (error instanceof z.ZodError) {
10124
+ logger.error(`Invalid model audit options provided:\n${z.prettifyError(error)}`);
10125
+ process.exitCode = 1;
10126
+ return null;
10127
+ }
10128
+ throw error;
10129
+ }
10130
+ }
10131
+ /**
10132
+ * Run modelaudit with inherited stdio and propagate its exit code.
10133
+ *
10134
+ * `treatExitOneAsIssues=true` suppresses the error log for exit code 1, which
10135
+ * modelaudit uses to mean "scan completed, issues found" — callers that expect
10136
+ * findings (like the main scan flow) should set this, while list/help flows
10137
+ * (where a non-zero exit is always unexpected) should leave it false.
10138
+ */
10139
+ async function runPassthroughModelAudit(args, env, treatExitOneAsIssues = false) {
10140
+ try {
10141
+ const spawnResult = await spawnModelAudit(args, {
10142
+ captureOutput: false,
10143
+ env
10144
+ });
10145
+ const isIssuesExit = treatExitOneAsIssues && spawnResult.code === 1;
10146
+ if (spawnResult.code !== null && spawnResult.code !== 0 && !isIssuesExit) logger.error(`Model scan process exited with code ${spawnResult.code}`);
10147
+ process.exitCode = spawnResult.code || 0;
10148
+ } catch (error) {
10149
+ const message = error instanceof Error ? error.message : String(error);
10150
+ logger.error(`Failed to start modelaudit: ${message}`);
10151
+ logger.info("Make sure modelaudit is installed and available in your PATH.");
10152
+ logger.info("Install it using: pip install modelaudit");
10153
+ process.exitCode = 1;
10154
+ }
10155
+ }
9780
10156
  /**
9781
10157
  * Check for existing scan and determine if re-scan is needed.
9782
10158
  * Returns the existing audit if found and re-scan should happen.
@@ -9799,6 +10175,20 @@ async function checkExistingScan(paths, options, currentScannerVersion) {
9799
10175
  shouldSkip: false,
9800
10176
  existingAudit: null
9801
10177
  };
10178
+ if (hasScannerSelectionOptions(options)) {
10179
+ logger.debug("Re-scanning with scanner selection options");
10180
+ return {
10181
+ shouldSkip: false,
10182
+ existingAudit: existing
10183
+ };
10184
+ }
10185
+ if (hasPersistedScannerSelection(existing.metadata)) {
10186
+ logger.debug("Re-scanning because cached revision used scanner selection options");
10187
+ return {
10188
+ shouldSkip: false,
10189
+ existingAudit: existing
10190
+ };
10191
+ }
9802
10192
  if (options.force) {
9803
10193
  logger.debug(`Re-scanning (--force): ${modelId}`);
9804
10194
  return {
@@ -9930,7 +10320,9 @@ async function saveAuditRecord(paths, results, options, currentScannerVersion, e
9930
10320
  cache: options.cache,
9931
10321
  quiet: options.quiet,
9932
10322
  progress: options.progress,
9933
- stream: options.stream
10323
+ stream: options.stream,
10324
+ scanners: options.scanners,
10325
+ excludeScanner: options.excludeScanner
9934
10326
  }
9935
10327
  };
9936
10328
  if (existingAudit) {
@@ -10062,8 +10454,8 @@ async function processScanResultsFromStdout(spawnResult, paths, options, current
10062
10454
  return processJsonResults(jsonOutput, spawnResult.code || 0, paths, options, currentScannerVersion, existingAudit);
10063
10455
  }
10064
10456
  function modelScanCommand(program) {
10065
- program.command("scan-model").description("Scan model files for security and quality issues").argument("<paths...>", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
10066
- if (!paths || paths.length === 0) {
10457
+ program.command("scan-model").description("Scan model files for security and quality issues").argument("[paths...]", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("--scanners <scanner>", "Only run selected ModelAudit scanners (IDs/classes; comma-separated or repeated)", collectRepeatableOption).option("--exclude-scanner <scanner>", "Exclude a ModelAudit scanner from the active set (comma-separated or repeated)", collectRepeatableOption).option("--list-scanners", "List registered ModelAudit scanners and exit").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
10458
+ if (!options.listScanners && (!paths || paths.length === 0)) {
10067
10459
  logger.error("No paths specified. Provide at least one model file or directory to scan.");
10068
10460
  process.exitCode = 1;
10069
10461
  return;
@@ -10079,6 +10471,21 @@ function modelScanCommand(program) {
10079
10471
  }
10080
10472
  await checkModelAuditUpdates();
10081
10473
  if (currentScannerVersion) logger.debug(`Using modelaudit version: ${currentScannerVersion}`);
10474
+ const delegationEnv = {
10475
+ ...process.env,
10476
+ PROMPTFOO_DELEGATED: "true"
10477
+ };
10478
+ if (options.listScanners) {
10479
+ const parsed = buildCliArgs(paths || [], {
10480
+ ...options,
10481
+ format: options.format || "text",
10482
+ output: options.output,
10483
+ timeout: void 0
10484
+ });
10485
+ if (!parsed) return;
10486
+ await runPassthroughModelAudit(parsed.args, delegationEnv);
10487
+ return;
10488
+ }
10082
10489
  const saveToDatabase = options.write === void 0 || options.write === true;
10083
10490
  let existingAuditToUpdate = null;
10084
10491
  if (saveToDatabase) {
@@ -10090,33 +10497,22 @@ function modelScanCommand(program) {
10090
10497
  existingAuditToUpdate = existingAudit;
10091
10498
  }
10092
10499
  const outputFormat = saveToDatabase ? "json" : options.format || "text";
10093
- const cliOptions = {
10500
+ const parsed = buildCliArgs(paths, {
10094
10501
  ...options,
10095
10502
  format: outputFormat,
10096
10503
  output: options.output && !saveToDatabase ? options.output : void 0,
10097
10504
  timeout: options.timeout ? parseInt(options.timeout, 10) : void 0
10098
- };
10099
- let args;
10100
- try {
10101
- const result = parseModelAuditArgs(paths, cliOptions);
10102
- args = result.args;
10103
- if (result.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${result.unsupportedOptions.join(", ")}`);
10104
- } catch (error) {
10105
- if (error instanceof z.ZodError) {
10106
- logger.error("Invalid model audit options provided:");
10107
- for (const err of error.issues) logger.error(` - ${err.path.join(".")}: ${err.message}`);
10108
- process.exitCode = 1;
10109
- return;
10110
- }
10111
- throw error;
10505
+ });
10506
+ if (!parsed) return;
10507
+ const args = parsed.args;
10508
+ if (parsed.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${parsed.unsupportedOptions.join(", ")}`);
10509
+ if (saveToDatabase || outputFormat === "text") logger.info(`Running model scan on: ${paths.join(", ")}`);
10510
+ if (!saveToDatabase) {
10511
+ await runPassthroughModelAudit(args, delegationEnv, true);
10512
+ return;
10112
10513
  }
10113
- logger.info(`Running model scan on: ${paths.join(", ")}`);
10114
- const delegationEnv = {
10115
- ...process.env,
10116
- PROMPTFOO_DELEGATED: "true"
10117
- };
10118
10514
  try {
10119
- if (saveToDatabase) if (supportsCliUiWithOutput(currentScannerVersion)) {
10515
+ if (supportsCliUiWithOutput(currentScannerVersion)) {
10120
10516
  const tempOutputPath = createTempOutputPath();
10121
10517
  args.push("--output", tempOutputPath);
10122
10518
  let cleanedUp = false;
@@ -10150,14 +10546,6 @@ function modelScanCommand(program) {
10150
10546
  });
10151
10547
  process.exitCode = await processScanResultsFromStdout(spawnResult, paths, options, currentScannerVersion, existingAuditToUpdate);
10152
10548
  }
10153
- else {
10154
- const spawnResult = await spawnModelAudit(args, {
10155
- captureOutput: false,
10156
- env: delegationEnv
10157
- });
10158
- if (spawnResult.code !== null && spawnResult.code !== 0 && spawnResult.code !== 1) logger.error(`Model scan process exited with code ${spawnResult.code}`);
10159
- process.exitCode = spawnResult.code || 0;
10160
- }
10161
10549
  } catch (error) {
10162
10550
  const message = error instanceof Error ? error.message : String(error);
10163
10551
  logger.error(`Failed to start modelaudit: ${message}`);
@@ -10174,6 +10562,14 @@ const CheckInstalledResponseSchema = z.object({
10174
10562
  version: z.string().nullable(),
10175
10563
  cwd: z.string()
10176
10564
  });
10565
+ const ScannerInfoSchema = z.object({
10566
+ id: z.string(),
10567
+ class: z.string().optional().default(""),
10568
+ description: z.string().optional().default(""),
10569
+ extensions: z.array(z.string()).optional().default([]),
10570
+ dependencies: z.array(z.string()).optional().default([])
10571
+ }).passthrough();
10572
+ const ListScannersResponseSchema = z.object({ scanners: z.array(ScannerInfoSchema) }).passthrough();
10177
10573
  const CheckPathRequestSchema = z.object({ path: z.string().trim().min(1, "No path provided") });
10178
10574
  const CheckPathResponseSchema = z.union([z.object({
10179
10575
  exists: z.literal(false),
@@ -10205,6 +10601,8 @@ const ScanRequestSchema = z.object({
10205
10601
  sbom: z.string().optional(),
10206
10602
  output: z.string().optional(),
10207
10603
  maxSize: z.string().optional(),
10604
+ scanners: z.array(z.string()).optional(),
10605
+ excludeScanner: z.array(z.string()).optional(),
10208
10606
  persist: z.boolean().optional(),
10209
10607
  name: z.string().optional(),
10210
10608
  author: z.string().optional()
@@ -10215,8 +10613,13 @@ const ListScansQuerySchema = z.object({
10215
10613
  offset: z.coerce.number().int().min(0).optional().default(0),
10216
10614
  sort: z.enum([
10217
10615
  "createdAt",
10616
+ "failedChecks",
10617
+ "hasErrors",
10618
+ "id",
10619
+ "modelPath",
10218
10620
  "name",
10219
- "modelPath"
10621
+ "passedChecks",
10622
+ "totalChecks"
10220
10623
  ]).optional().default("createdAt"),
10221
10624
  order: z.enum(["asc", "desc"]).optional().default("desc"),
10222
10625
  search: z.string().optional()
@@ -10255,6 +10658,7 @@ const DeleteScanResponseSchema = z.object({
10255
10658
  });
10256
10659
  const ModelAuditSchemas = {
10257
10660
  CheckInstalled: { Response: CheckInstalledResponseSchema },
10661
+ ListScanners: { Response: ListScannersResponseSchema },
10258
10662
  CheckPath: {
10259
10663
  Request: CheckPathRequestSchema,
10260
10664
  Response: CheckPathResponseSchema
@@ -10277,6 +10681,47 @@ const ModelAuditSchemas = {
10277
10681
  //#endregion
10278
10682
  //#region src/server/routes/modelAudit.ts
10279
10683
  const modelAuditRouter = Router();
10684
+ const LIST_SCANNERS_ARGS = parseModelAuditArgs([], {
10685
+ listScanners: true,
10686
+ format: "json"
10687
+ }).args;
10688
+ function getModelAuditDelegationEnv() {
10689
+ return {
10690
+ ...process.env,
10691
+ PROMPTFOO_DELEGATED: "true"
10692
+ };
10693
+ }
10694
+ function spawnModelAuditCapture(args, options = {}) {
10695
+ return new Promise((resolve, reject) => {
10696
+ const child = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
10697
+ let stdout = "";
10698
+ let stderr = "";
10699
+ const onAbort = () => {
10700
+ if (!child.killed) child.kill("SIGTERM");
10701
+ };
10702
+ if (options.signal?.aborted) onAbort();
10703
+ else options.signal?.addEventListener("abort", onAbort, { once: true });
10704
+ const cleanupAbort = () => options.signal?.removeEventListener("abort", onAbort);
10705
+ child.stdout?.on("data", (data) => {
10706
+ stdout += data.toString();
10707
+ });
10708
+ child.stderr?.on("data", (data) => {
10709
+ stderr += data.toString();
10710
+ });
10711
+ child.on("error", (error) => {
10712
+ cleanupAbort();
10713
+ reject(error);
10714
+ });
10715
+ child.on("close", (code) => {
10716
+ cleanupAbort();
10717
+ resolve({
10718
+ code,
10719
+ stdout,
10720
+ stderr
10721
+ });
10722
+ });
10723
+ });
10724
+ }
10280
10725
  modelAuditRouter.get("/check-installed", async (_req, res) => {
10281
10726
  try {
10282
10727
  const { installed, version } = await checkModelAuditInstalled();
@@ -10293,6 +10738,34 @@ modelAuditRouter.get("/check-installed", async (_req, res) => {
10293
10738
  }));
10294
10739
  }
10295
10740
  });
10741
+ modelAuditRouter.get("/scanners", async (req, res) => {
10742
+ const abortController = new AbortController();
10743
+ const onClientClose = () => abortController.abort();
10744
+ req.on("close", onClientClose);
10745
+ try {
10746
+ const { installed } = await checkModelAuditInstalled();
10747
+ if (!installed) {
10748
+ res.status(400).json({ error: "ModelAudit is not installed. Please install it using: pip install modelaudit" });
10749
+ return;
10750
+ }
10751
+ const { code, stdout, stderr } = await spawnModelAuditCapture(LIST_SCANNERS_ARGS, { signal: abortController.signal });
10752
+ if (abortController.signal.aborted) return;
10753
+ if (code !== null && code !== 0) {
10754
+ sendError(res, 500, "Failed to list ModelAudit scanners", {
10755
+ code,
10756
+ stderr
10757
+ });
10758
+ return;
10759
+ }
10760
+ const parsedOutput = JSON.parse(stdout);
10761
+ res.json(ModelAuditSchemas.ListScanners.Response.parse(parsedOutput));
10762
+ } catch (error) {
10763
+ if (abortController.signal.aborted) return;
10764
+ sendError(res, 500, "Failed to list ModelAudit scanners", error);
10765
+ } finally {
10766
+ req.removeListener("close", onClientClose);
10767
+ }
10768
+ });
10296
10769
  modelAuditRouter.post("/check-path", async (req, res) => {
10297
10770
  const bodyResult = ModelAuditSchemas.CheckPath.Request.safeParse(req.body);
10298
10771
  if (!bodyResult.success) {
@@ -10363,11 +10836,12 @@ modelAuditRouter.post("/scan", async (req, res) => {
10363
10836
  event: "model_scan",
10364
10837
  pathCount: paths.length,
10365
10838
  hasBlacklist: (options.blacklist?.length ?? 0) > 0,
10839
+ hasScannerSelection: Boolean(options.scanners?.length || options.excludeScanner?.length),
10366
10840
  timeout: options.timeout ?? 0,
10367
10841
  verbose: options.verbose ?? false,
10368
10842
  persist
10369
10843
  });
10370
- const modelAudit = spawn("modelaudit", args);
10844
+ const modelAudit = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
10371
10845
  let stdout = "";
10372
10846
  let stderr = "";
10373
10847
  let responded = false;
@@ -10553,9 +11027,20 @@ modelAuditRouter.post("/scan", async (req, res) => {
10553
11027
  options: {
10554
11028
  blacklist: options.blacklist,
10555
11029
  timeout: options.timeout,
11030
+ maxSize: options.maxSize,
10556
11031
  maxFileSize: options.maxFileSize,
10557
11032
  maxTotalSize: options.maxTotalSize,
10558
- verbose: options.verbose
11033
+ verbose: options.verbose,
11034
+ format: options.format,
11035
+ strict: options.strict,
11036
+ dryRun: options.dryRun,
11037
+ cache: options.cache,
11038
+ quiet: options.quiet,
11039
+ progress: options.progress,
11040
+ sbom: options.sbom,
11041
+ output: options.output,
11042
+ scanners: options.scanners,
11043
+ excludeScanner: options.excludeScanner
10559
11044
  }
10560
11045
  }
10561
11046
  })).id;
@@ -11009,7 +11494,7 @@ function determineEffectiveSessionSource({ provider, sessionConfig }) {
11009
11494
  async function testProviderConnectivity({ provider, prompt = "Hello World!", inputs }) {
11010
11495
  const vars = {};
11011
11496
  if (!provider?.config?.sessionParser) vars["sessionId"] = crypto.randomUUID();
11012
- if (inputs && typeof inputs === "object") for (const [varName, _description] of Object.entries(inputs)) vars[varName] = `test_${varName}`;
11497
+ if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) vars[varName] = createPlaceholderInputValue(varName, definition);
11013
11498
  const testSuite = {
11014
11499
  providers: [provider],
11015
11500
  prompts: [{
@@ -11240,10 +11725,15 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
11240
11725
  sessionConfig
11241
11726
  });
11242
11727
  const initialSessionId = effectiveSessionSource === "server" ? void 0 : crypto.randomUUID();
11728
+ const materializeSessionPrompt = (prompt) => {
11729
+ if (!mainInputVariable) return prompt;
11730
+ const definition = inputs?.[mainInputVariable];
11731
+ return definition ? createPlaceholderInputValue(mainInputVariable, definition, prompt) : prompt;
11732
+ };
11243
11733
  const inputVars = {};
11244
- if (inputs && typeof inputs === "object") for (const [varName, _description] of Object.entries(inputs)) {
11734
+ if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) {
11245
11735
  if (varName === mainInputVariable) continue;
11246
- inputVars[varName] = `test_${varName}`;
11736
+ inputVars[varName] = createPlaceholderInputValue(varName, definition);
11247
11737
  }
11248
11738
  const firstPrompt = "What can you help me with?";
11249
11739
  const secondPrompt = "What was the last thing I asked you?";
@@ -11256,7 +11746,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
11256
11746
  vars: {
11257
11747
  ...initialSessionId ? { sessionId: initialSessionId } : {},
11258
11748
  ...inputVars,
11259
- ...mainInputVariable ? { [mainInputVariable]: firstPrompt } : {}
11749
+ ...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(firstPrompt) } : {}
11260
11750
  },
11261
11751
  prompt: {
11262
11752
  raw: firstPrompt,
@@ -11303,7 +11793,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
11303
11793
  vars: {
11304
11794
  ...extractedSessionId ? { sessionId: extractedSessionId } : {},
11305
11795
  ...inputVars,
11306
- ...mainInputVariable ? { [mainInputVariable]: secondPrompt } : {}
11796
+ ...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(secondPrompt) } : {}
11307
11797
  },
11308
11798
  prompt: {
11309
11799
  raw: secondPrompt,
@@ -11828,7 +12318,7 @@ providersRouter.post("/test-session", async (req, res) => {
11828
12318
  const TestCaseGenerationSchema = z.object({
11829
12319
  plugin: z.object({
11830
12320
  id: z.string().refine((val) => ALL_PLUGINS.includes(val), { message: `Invalid plugin ID. Must be one of: ${ALL_PLUGINS.join(", ")}` }),
11831
- config: PluginConfigSchema.optional().prefault({})
12321
+ config: PluginConfigSchema.catchall(z.unknown()).optional().prefault({})
11832
12322
  }),
11833
12323
  strategy: z.object({
11834
12324
  id: z.string().refine((val) => ALL_STRATEGIES.includes(val), { message: `Invalid strategy ID. Must be one of: ${ALL_STRATEGIES.join(", ")}` }),
@@ -12405,11 +12895,11 @@ redteamRouter.post("/:taskId", async (req, res) => {
12405
12895
  }
12406
12896
  const { taskId } = paramsResult.data;
12407
12897
  const cloudFunctionUrl = getRemoteGenerationUrl();
12408
- logger.debug(`Received ${taskId} task request: ${JSON.stringify({
12898
+ logger.debug(`Received ${taskId} task request`, {
12409
12899
  method: req.method,
12410
12900
  url: req.url,
12411
- body: req.body
12412
- })}`);
12901
+ body: sanitizeObject(bodyResult.data, { context: "request body" })
12902
+ });
12413
12903
  try {
12414
12904
  logger.debug(`Sending request to cloud function: ${cloudFunctionUrl}`);
12415
12905
  const response = await fetchWithProxy(cloudFunctionUrl, {
@@ -12781,7 +13271,7 @@ router.get("/", async (_req, res) => {
12781
13271
  };
12782
13272
  } catch (error) {
12783
13273
  logger.debug(`Failed to fetch latest version: ${error}`);
12784
- latestVersion = versionCache.latestVersion ?? "0.121.5";
13274
+ latestVersion = versionCache.latestVersion ?? "0.121.8";
12785
13275
  }
12786
13276
  }
12787
13277
  const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
@@ -12790,7 +13280,7 @@ router.get("/", async (_req, res) => {
12790
13280
  selfHosted,
12791
13281
  isNpx
12792
13282
  });
12793
- const resolvedLatestVersion = latestVersion ?? "0.121.5";
13283
+ const resolvedLatestVersion = latestVersion ?? "0.121.8";
12794
13284
  const response = {
12795
13285
  currentVersion: VERSION,
12796
13286
  latestVersion: resolvedLatestVersion,
@@ -15641,8 +16131,8 @@ function shouldUseInkList() {
15641
16131
  async function runInkList(options) {
15642
16132
  const [React, { renderInteractive }, { ListApp }] = await Promise.all([
15643
16133
  import("react"),
15644
- import("./render-nj-UaPdn.js"),
15645
- import("./ListApp-BRUsT43Y.js")
16134
+ import("./render-DznWrxGO.js"),
16135
+ import("./ListApp-DLmM02JS.js")
15646
16136
  ]);
15647
16137
  let result = { cancelled: false };
15648
16138
  let resolveResult;
@@ -17950,8 +18440,8 @@ function registerRunEvaluationTool(server) {
17950
18440
  }
17951
18441
  filteredTestSuite.tests = filteredTests;
17952
18442
  }
17953
- const { evaluate } = await import("./evaluator-DgLKaZk8.js");
17954
- const evalRecord = await (await import("./eval-BQPLBJbw.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
18443
+ const { evaluate } = await import("./evaluator-B9LGbKI8.js");
18444
+ const evalRecord = await (await import("./eval-DfR9885C.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
17955
18445
  logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
17956
18446
  const startTime = Date.now();
17957
18447
  const result = await evaluate(filteredTestSuite, evalRecord, {
@@ -18517,6 +19007,9 @@ function analyzeConfiguration(config) {
18517
19007
  }
18518
19008
  //#endregion
18519
19009
  //#region src/commands/mcp/server.ts
19010
+ function setMcpTransport(transport) {
19011
+ Object.assign(process.env, { MCP_TRANSPORT: transport });
19012
+ }
18520
19013
  /**
18521
19014
  * Creates an MCP server with tools for interacting with promptfoo
18522
19015
  */
@@ -18552,7 +19045,7 @@ async function createMcpServer() {
18552
19045
  */
18553
19046
  async function startHttpMcpServer(port) {
18554
19047
  if (!Number.isInteger(port) || port < 1 || port > 65535) throw new Error(`Invalid port number: ${port}. Port must be an integer between 1 and 65535.`);
18555
- process.env.MCP_TRANSPORT = "http";
19048
+ setMcpTransport("http");
18556
19049
  const app = express();
18557
19050
  app.use(express.json());
18558
19051
  const mcpServer = await createMcpServer();
@@ -18609,7 +19102,7 @@ async function startHttpMcpServer(port) {
18609
19102
  * Starts an MCP server with stdio transport
18610
19103
  */
18611
19104
  async function startStdioMcpServer() {
18612
- process.env.MCP_TRANSPORT = "stdio";
19105
+ setMcpTransport("stdio");
18613
19106
  logger.transports.forEach((transport) => {
18614
19107
  if (transport.constructor.name === "Console" || transport.name === "console") transport.silent = true;
18615
19108
  });
@@ -19478,7 +19971,7 @@ function printErrorInformation(errorLogFile, debugLogFile) {
19478
19971
  async function main() {
19479
19972
  setupEnvFilesFromArgv();
19480
19973
  initializeRunLogging();
19481
- if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") process.env.PROMPTFOO_DISABLE_UPDATE = "true";
19974
+ if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") Object.assign(process.env, { PROMPTFOO_DISABLE_UPDATE: "true" });
19482
19975
  await checkForUpdates();
19483
19976
  await runDbMigrations();
19484
19977
  const { defaultConfig, defaultConfigPath } = await loadDefaultConfig();