promptfoo 0.120.21 → 0.120.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/dist/src/{ListApp-ChupNNB3.js → ListApp-55OAHww3.js} +1 -1
  2. package/dist/src/{accounts-CjU7QSnv.js → accounts-BMHY0nF5.js} +5 -5
  3. package/dist/src/{accounts-DYMHC4iY.js → accounts-CYqHD7X1.js} +4 -4
  4. package/dist/src/{accounts-uWRxEv-D.cjs → accounts-TGrHBZU9.cjs} +4 -4
  5. package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BcSgvBPs.js} +2 -2
  6. package/dist/src/{agents-BJxm0Kcs.cjs → agents-BW4zv9Ig.cjs} +3 -3
  7. package/dist/src/{agents-DABC7ROH.js → agents-BbG-lpzK.js} +6 -6
  8. package/dist/src/{agents-DGQULdoA.js → agents-CUB87CA8.js} +5 -5
  9. package/dist/src/{agents-CJ4BY_QU.js → agents-ClpNGuau.js} +7 -7
  10. package/dist/src/{agents-CzCD5974.cjs → agents-d76o9WLl.cjs} +6 -6
  11. package/dist/src/{agents-BNSS1kSZ.js → agents-v7_Jh18K.js} +3 -3
  12. package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-BBN3jHiV.js} +11 -11
  13. package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-DWjl0adX.cjs} +11 -11
  14. package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-E-FZoqH0.js} +14 -14
  15. package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
  16. package/dist/src/app/assets/{index-BVE859O5.js → index-DoWPf01I.js} +71 -71
  17. package/dist/src/app/index.html +2 -2
  18. package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
  19. package/dist/src/{audio-BT9aSeWg.js → audio-B0lJd7ce.js} +4 -4
  20. package/dist/src/{audio-CSrI_LFf.js → audio-CwbC4E6I.js} +3 -3
  21. package/dist/src/{audio-DSV364HB.cjs → audio-TfpCt0uC.cjs} +3 -3
  22. package/dist/src/{base-NFXflrMy.js → base-BHLggB84.js} +2 -2
  23. package/dist/src/{base-CRrQ15eu.cjs → base-B_pJcCDc.cjs} +2 -2
  24. package/dist/src/{base-d8hHSpEx.js → base-DRgXuujd.js} +2 -2
  25. package/dist/src/{blobs-Dznj-8xW.js → blobs-BHstP0Ep.js} +2 -2
  26. package/dist/src/{blobs-D-Neij7m.cjs → blobs-BPAqMoU_.cjs} +2 -2
  27. package/dist/src/{blobs-DC2sBLv8.js → blobs-Bey6TGVi.js} +2 -2
  28. package/dist/src/{cache-xb4oZe27.js → cache-BTdTl56j.js} +2 -2
  29. package/dist/src/{cache-DYJRe0Jt.cjs → cache-BnrNlhzE.cjs} +2 -2
  30. package/dist/src/{cache-aKR8pp5y.js → cache-C5LeIjZl.js} +2 -2
  31. package/dist/src/{cache-CMvgAnSi.js → cache-CsW_9RgN.js} +2 -2
  32. package/dist/src/cache-Da_mTEGH.cjs +6 -0
  33. package/dist/src/{cache-scuP1IRq.js → cache-DfI0xG_n.js} +3 -3
  34. package/dist/src/{chat-BbgGuvwu.js → chat-BIj6_WPH.js} +10 -10
  35. package/dist/src/{chat-CfJU9hHo.cjs → chat-B_-w6xgb.cjs} +10 -10
  36. package/dist/src/{chat-Du_GyqZe.js → chat-BktepvuQ.js} +13 -13
  37. package/dist/src/{chat-DyjTngG2.js → chat-CiakYWhZ.js} +10 -10
  38. package/dist/src/{chat-BH8Fxyap.js → chat-D5l2blZK.js} +8 -8
  39. package/dist/src/{chat-Biz16BQv.cjs → chat-QHQ-K-Fi.cjs} +8 -8
  40. package/dist/src/{chatkit-BpumZ2Dz.js → chatkit-DlOZ-eNb.js} +3 -3
  41. package/dist/src/{chatkit-D9AWcI4P.js → chatkit-IWIuhxFE.js} +3 -3
  42. package/dist/src/{chatkit-Wzo9zOq7.cjs → chatkit-yeVtURUi.cjs} +3 -3
  43. package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-BZ0GjlPY.js} +11 -11
  44. package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-D3AZJyZ7.cjs} +8 -8
  45. package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-wvsRgO29.js} +8 -8
  46. package/dist/src/{cloud-UwfmA0Ne.js → cloud-BKfo1A2t.js} +2 -2
  47. package/dist/src/{cloud-1cbcFXE9.js → cloud-DPowlnHl.js} +1 -1
  48. package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CzAtX1Wg.cjs} +11 -11
  49. package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-D-IEjw5k.js} +11 -11
  50. package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DxHmIJbZ.js} +14 -14
  51. package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-CIfquW2o.cjs} +12 -12
  52. package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-DkjC8LZF.js} +12 -12
  53. package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-DwJVgNWf.js} +15 -15
  54. package/dist/src/{codex-sdk-CKEQpQax.js → codex-sdk-BC5qbCjh.js} +3 -3
  55. package/dist/src/{codex-sdk-CDN8f3P9.js → codex-sdk-CBpm53B8.js} +2 -2
  56. package/dist/src/{codex-sdk-0QyM_fUf.cjs → codex-sdk-Cf5_CQch.cjs} +2 -2
  57. package/dist/src/{cometapi-CUq-lrPh.js → cometapi-6jLBOp4e.js} +15 -15
  58. package/dist/src/{cometapi-Bu_LOIac.js → cometapi-8HSWR5q_.js} +12 -12
  59. package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-iJEkEgpT.cjs} +12 -12
  60. package/dist/src/{completion-Dw-9jqz1.cjs → completion-C03fIkcR.cjs} +5 -5
  61. package/dist/src/{completion-H4ncr2bK.js → completion-CrKHW9hy.js} +5 -5
  62. package/dist/src/{completion-BTZ42lqL.js → completion-LTi0SiAy.js} +5 -5
  63. package/dist/src/{docker-kvFgJqkg.js → docker-Bf2VQOX5.js} +14 -14
  64. package/dist/src/{docker-NrVUpeWw.cjs → docker-BxmP6sGO.cjs} +11 -11
  65. package/dist/src/{docker-B8zxQ12-.js → docker-DHazKolQ.js} +11 -11
  66. package/dist/src/{esm-Bwuh5HXW.js → esm-Lh6czAFT.js} +1 -1
  67. package/dist/src/{eval-TAOD_pvN.js → eval-4sIXLVZZ.js} +31 -19
  68. package/dist/src/eval-s_E4q6a8.js +16 -0
  69. package/dist/src/{evalResult-DnYa1RLp.js → evalResult-CRyeHpst.js} +6 -6
  70. package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CSucevrC.cjs} +7 -7
  71. package/dist/src/evalResult-DBhbtqZn.js +11 -0
  72. package/dist/src/evalResult-DKn_nY-a.js +13 -0
  73. package/dist/src/{evalResult-CnAP64o9.js → evalResult-DNXvhaKp.js} +6 -6
  74. package/dist/src/evalResult-DrPpo-zg.cjs +11 -0
  75. package/dist/src/{evaluator-q2_--POi.js → evaluator-BDY-i4ky.js} +29 -30
  76. package/dist/src/evaluator-CiDt-6H3.js +37 -0
  77. package/dist/src/{extractor-D_1s-Thr.js → extractor-HnP0pHfO.js} +81 -4
  78. package/dist/src/{extractor-B9uQA-6-.js → extractor-WvMabjRY.js} +81 -4
  79. package/dist/src/{extractor-DjxI7POm.cjs → extractor-wSeR47-c.cjs} +86 -3
  80. package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-8fmzVUH3.js} +2 -2
  81. package/dist/src/{fetch-BIPrzD39.js → fetch-BMKSarK_.js} +11 -5
  82. package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-BgYKpAJ7.cjs} +25 -7
  83. package/dist/src/{fetch-Cbz8ESXD.js → fetch-CE0K47nJ.js} +1 -1
  84. package/dist/src/{fetch-TcCC0nEJ.js → fetch-DKQm7iwv.js} +11 -5
  85. package/dist/src/fetch-GeRV0LDI.cjs +4 -0
  86. package/dist/src/{fileExtensions-BpuMmaFL.js → fileExtensions-CzcPkiKZ.js} +1 -1
  87. package/dist/src/{formatDuration-CGa0ZDQ8.js → formatDuration-CCiZ-wBY.js} +1 -1
  88. package/dist/src/{genaiTracer-zK0Mtd_D.cjs → genaiTracer-BQ8AmajD.cjs} +1 -1
  89. package/dist/src/{genaiTracer-CNawybn-.js → genaiTracer-Ce19n68P.js} +1 -1
  90. package/dist/src/{genaiTracer-CqNnnXrE.js → genaiTracer-foKtbVa2.js} +1 -1
  91. package/dist/src/{graders-DycNkUZ_.js → graders-CEF_ce6x.js} +32 -26
  92. package/dist/src/graders-CzoftmVg.js +35 -0
  93. package/dist/src/{graders-C9Ni3pNQ.cjs → graders-DOFEkq6l.cjs} +34 -28
  94. package/dist/src/graders-DuTWgLQF.cjs +32 -0
  95. package/dist/src/graders-ZhEpUtuK.js +33 -0
  96. package/dist/src/{graders-CG50NgEB.js → graders-jFPLMexK.js} +36 -30
  97. package/dist/src/{image-CxD58J63.js → image-0RV3qqTK.js} +6 -6
  98. package/dist/src/{image-CHGpqpC6.js → image-9Thqn5aj.js} +3 -3
  99. package/dist/src/{image-BmMQZgoF.js → image-BJJW-zOa.js} +4 -4
  100. package/dist/src/{image-CPL1masV.cjs → image-BVyYHTQG.cjs} +3 -3
  101. package/dist/src/{image-Dg-oq_2F.js → image-Dd0EQRYU.js} +6 -6
  102. package/dist/src/{image-CpQ-fJt3.cjs → image-VWd0wZmz.cjs} +6 -6
  103. package/dist/src/index.cjs +177 -105
  104. package/dist/src/index.js +173 -101
  105. package/dist/src/{interactiveCheck-BZbMvE6i.js → interactiveCheck-CjK2R4rB.js} +1 -1
  106. package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BKA28wUv.cjs} +7 -7
  107. package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-D0nOGqRF.js} +7 -7
  108. package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase-aBIYgE4B.js} +8 -8
  109. package/dist/src/{litellm-Dcc3mt1B.js → litellm-BiXCNyII.js} +11 -11
  110. package/dist/src/{litellm-CR8i8YRn.js → litellm-QSopB7EE.js} +14 -14
  111. package/dist/src/{litellm-QthJT0fA.cjs → litellm-vZYPcEUQ.cjs} +11 -11
  112. package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-CRKJx6ue.cjs} +10 -11
  113. package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DmnPkNon.js} +10 -11
  114. package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-Dvnqlb9n.js} +9 -10
  115. package/dist/src/main.js +166 -106
  116. package/dist/src/{messages-DkV1dizA.js → messages-BaxI3gI5.js} +10 -10
  117. package/dist/src/{messages-D4Cu-Nxe.cjs → messages-CvFzyCty.cjs} +10 -10
  118. package/dist/src/{messages-9ik4NuJ_.js → messages-DT1yyWfS.js} +10 -10
  119. package/dist/src/{meteor-DLCdreag.js → meteor-0cLf9TIn.js} +1 -1
  120. package/dist/src/{meteor-Dj8cTkU_.js → meteor-CBciquOS.js} +1 -1
  121. package/dist/src/{meteor-odmwVbyG.cjs → meteor-DRuEfzuQ.cjs} +1 -1
  122. package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-CnN4D-bY.js} +10 -11
  123. package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-XAkfcmhf.cjs} +10 -11
  124. package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-YEEqZu4z.js} +9 -10
  125. package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-Ci-VR-K3.js} +6 -6
  126. package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-Cjfvm57T.js} +7 -7
  127. package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-brE_2yrZ.cjs} +6 -6
  128. package/dist/src/{openai-eZz8QsxY.cjs → openai-BjpdxIOG.cjs} +1 -1
  129. package/dist/src/{openai-CQozD_Ef.js → openai-CitF-gEN.js} +1 -1
  130. package/dist/src/{openai-D_5h8sBa.js → openai-Cv9pEKxp.js} +1 -1
  131. package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BkXJb52a.js} +4 -4
  132. package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-CrlvqSy2.cjs} +4 -4
  133. package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-DRMuqbLx.js} +6 -6
  134. package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-BCKiiwct.js} +5 -5
  135. package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-BTVVyjnA.js} +4 -4
  136. package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-DXqKPf2U.cjs} +4 -4
  137. package/dist/src/{providerRegistry-5UB8YVkk.js → providerRegistry-BdKWcUa8.js} +1 -1
  138. package/dist/src/{providerRegistry-gU7gOAJI.js → providerRegistry-D32Lt9vp.js} +1 -1
  139. package/dist/src/{providerRegistry-DFXamjVO.cjs → providerRegistry-HGQd2MF6.cjs} +1 -1
  140. package/dist/src/{providers-Bz0U4NGd.cjs → providers-BF4aullZ.cjs} +114 -132
  141. package/dist/src/providers-BmFckJq8.cjs +31 -0
  142. package/dist/src/{providers-CHciOV4f.js → providers-BtbT8bDb.js} +77 -95
  143. package/dist/src/{providers-D8qKkVu_.js → providers-CYssBaa_.js} +82 -100
  144. package/dist/src/providers-DaDZ1iQw.js +31 -0
  145. package/dist/src/providers-Ds-h-S1b.js +33 -0
  146. package/dist/src/{pythonUtils-mprm8p4h.js → pythonUtils-B9JA-gsC.js} +2 -2
  147. package/dist/src/{quiverai-DnlaJG8H.js → quiverai-BiDa0kiF.js} +10 -10
  148. package/dist/src/{quiverai-3rtm1glP.js → quiverai-DTtLknat.js} +13 -13
  149. package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-z8oPMSUZ.cjs} +10 -10
  150. package/dist/src/{render-D5t2ogwP.js → render-D2710HbA.js} +2 -2
  151. package/dist/src/rubyUtils-BLd6EE1u.cjs +5 -0
  152. package/dist/src/{rubyUtils-D1DBBgSr.js → rubyUtils-CVELPvUH.js} +2 -2
  153. package/dist/src/{rubyUtils-Cus5EGPX.js → rubyUtils-Dn6MGcsk.js} +2 -2
  154. package/dist/src/{rubyUtils-CI4Y5f08.js → rubyUtils-LBsk3zIm.js} +1 -1
  155. package/dist/src/{rubyUtils-CWkecmDB.js → rubyUtils-_t9Gmf7U.js} +1 -1
  156. package/dist/src/{rubyUtils-DCci_Y0f.cjs → rubyUtils-rnCVDgH-.cjs} +1 -1
  157. package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-D5f6K309.js} +11 -11
  158. package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DwJOnUlQ.cjs} +11 -11
  159. package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-dygdRScC.js} +17 -17
  160. package/dist/src/{scanner-CPosIITM.js → scanner-D__U7qyj.js} +9 -9
  161. package/dist/src/server/index.js +265 -129
  162. package/dist/src/server-B0dYFkhY.cjs +6 -0
  163. package/dist/src/{server-ByLwfSDf.cjs → server-BRPZ0ZqY.cjs} +3 -3
  164. package/dist/src/{server-Bb83s34G.js → server-BZJ_IEiK.js} +4 -4
  165. package/dist/src/{server-BKdsd70B.js → server-Bo3rtriL.js} +3 -3
  166. package/dist/src/server-DZYvmL1z.js +6 -0
  167. package/dist/src/server-DlTe_Yz5.js +8 -0
  168. package/dist/src/{signal-B8hvvb8w.js → signal-C9qT0qiq.js} +2 -2
  169. package/dist/src/{slack-BblBpsIz.cjs → slack-D1F9Y7CH.cjs} +1 -1
  170. package/dist/src/{slack-D-bNCtTa.js → slack-DkAF58Tr.js} +1 -1
  171. package/dist/src/{slack-Ba3gil_6.js → slack-Ed1yyt_j.js} +1 -1
  172. package/dist/src/{store-W4yZqv_e.js → store-BpNCvdGu.js} +2 -2
  173. package/dist/src/store-BvASVHpa.cjs +6 -0
  174. package/dist/src/{store-l9-mIRC-.cjs → store-BwsF10Db.cjs} +2 -2
  175. package/dist/src/store-CDxVyNyD.js +6 -0
  176. package/dist/src/store-CXxKOQ8V.js +7 -0
  177. package/dist/src/{store-CaNRmI5T.js → store-Dm8qSYzq.js} +2 -2
  178. package/dist/src/{tables-DZWWveaJ.js → tables-B0wvxP8N.js} +2 -2
  179. package/dist/src/{tables-BCcp2r48.js → tables-CMOPSC5_.js} +2 -2
  180. package/dist/src/{tables-D_VmN-ui.cjs → tables-D_nKFrHt.cjs} +2 -2
  181. package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-B9G9VzVT.cjs} +3 -3
  182. package/dist/src/telemetry-CGO4vCCz.js +8 -0
  183. package/dist/src/telemetry-CXarb10F.cjs +6 -0
  184. package/dist/src/telemetry-DKtQFHz8.js +6 -0
  185. package/dist/src/{telemetry-CmGyDaxF.js → telemetry-DMGVtU7z.js} +3 -3
  186. package/dist/src/{telemetry-CB8ltkOD.js → telemetry-oV0dEJ2S.js} +3 -3
  187. package/dist/src/{text-C1cXk9ij.js → text-DuYSUYPB.js} +1 -1
  188. package/dist/src/{text-Dm78AVGG.js → text-Dx0GJOCN.js} +1 -1
  189. package/dist/src/{text-DF2hMKdg.cjs → text-PYISqVm1.cjs} +1 -1
  190. package/dist/src/{tokenUsageUtils-DFp2VFtG.cjs → tokenUsageUtils-BtZd3sP7.cjs} +1 -1
  191. package/dist/src/{tokenUsageUtils-C9fIWtvn.js → tokenUsageUtils-DoinwgKF.js} +1 -1
  192. package/dist/src/{tokenUsageUtils-BQVp2uKF.js → tokenUsageUtils-cFdLMERB.js} +1 -1
  193. package/dist/src/{transcription-BExRps8_.js → transcription-C5sJed5S.js} +7 -7
  194. package/dist/src/{transcription-B4GbFkhb.cjs → transcription-DEiYnjqG.cjs} +7 -7
  195. package/dist/src/{transcription-CpuRKcIu.js → transcription-Dcv2GYgI.js} +10 -10
  196. package/dist/src/{transform-BzepVlkX.js → transform-BFPYuBaW.js} +4 -4
  197. package/dist/src/{transform-BwedQA8W.js → transform-Cpk3BVib.js} +3 -3
  198. package/dist/src/{transform-B-AYIQmM.js → transform-DWBbTh_M.js} +3 -3
  199. package/dist/src/{transform-1g7fEAge.js → transform-DZI2t8-9.js} +3 -3
  200. package/dist/src/{transform-B5HUD86U.cjs → transform-DeVdMzaA.cjs} +3 -3
  201. package/dist/src/{transformersAvailability-B5gZ2aG-.cjs → transformersAvailability-BaoWHpu1.cjs} +1 -1
  202. package/dist/src/{transformersAvailability-BHP0aLcv.js → transformersAvailability-DtpwoeFC.js} +1 -1
  203. package/dist/src/{transformersAvailability-CA173HU7.js → transformersAvailability-O2YaCv9Z.js} +1 -1
  204. package/dist/src/{types-n3ttAnOa.js → types-BZz0C6rq.js} +3 -18
  205. package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
  206. package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
  207. package/dist/src/{util-Cd2vI7ii.js → util-BR59kcWy.js} +3 -3
  208. package/dist/src/{util-BnF-xRPi.js → util-BjTDLTac.js} +3 -3
  209. package/dist/src/{util-CwsTKPXi.js → util-CSvDts2m.js} +3 -3
  210. package/dist/src/{util-KE6uFWHG.cjs → util-CfLcBZEA.cjs} +3 -3
  211. package/dist/src/{util-BXjq8vOq.js → util-Czrn_1en.js} +3 -3
  212. package/dist/src/{util-BT0p5c9X.cjs → util-Dxwavk84.cjs} +3 -3
  213. package/dist/src/{util-DX58t30T.js → util-GpiGNWSj.js} +17 -5
  214. package/dist/src/{util-B-Yo0qzc.cjs → util-RfCZM4TD.cjs} +22 -4
  215. package/dist/src/{util-CyzW5ayt.js → util-UxC1pDDt.js} +20 -8
  216. package/dist/src/{utils-HdRSruG0.js → utils-2CdNTp-a.js} +2 -2
  217. package/dist/src/{utils-BSpcJB4B.cjs → utils-6lWwNARy.cjs} +2 -2
  218. package/dist/src/{utils-BWOFfOel.js → utils-BMIgqY6C.js} +2 -2
  219. package/dist/tsconfig.tsbuildinfo +1 -1
  220. package/package.json +31 -29
  221. package/dist/src/app/assets/index-CFq6nz43.css +0 -1
  222. package/dist/src/cache-BZYapLG-.cjs +0 -6
  223. package/dist/src/eval-g5SH3NBa.js +0 -16
  224. package/dist/src/evalResult-Bt2fmj0V.js +0 -9
  225. package/dist/src/evalResult-DRNH7Jui.js +0 -10
  226. package/dist/src/evalResult-DqLWyktN.cjs +0 -9
  227. package/dist/src/evaluator-DIenqH86.js +0 -37
  228. package/dist/src/fetch-gmneHrJc.cjs +0 -4
  229. package/dist/src/graders-BHcgLz2P.js +0 -35
  230. package/dist/src/graders-BjBhwJOb.cjs +0 -32
  231. package/dist/src/graders-DEjGX1uY.js +0 -33
  232. package/dist/src/providers-BBGwMMzj.js +0 -33
  233. package/dist/src/providers-BR5P_qep.cjs +0 -31
  234. package/dist/src/providers-CDib9hVp.js +0 -31
  235. package/dist/src/rubyUtils-DZo4ik6p.cjs +0 -5
  236. package/dist/src/server-B5aIhzYk.cjs +0 -6
  237. package/dist/src/server-DJCw85s0.js +0 -8
  238. package/dist/src/server-DKj0QIm9.js +0 -6
  239. package/dist/src/store-Bo9FEBPO.js +0 -7
  240. package/dist/src/store-CqHttBql.js +0 -6
  241. package/dist/src/store-fsxOJl5O.cjs +0 -6
  242. package/dist/src/telemetry-B2uJOvMA.js +0 -8
  243. package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
  244. package/dist/src/telemetry-rP_n_bnZ.js +0 -6
  245. package/dist/src/time-CRRzn6vA.js +0 -9
  246. package/dist/src/time-CbtsO5_a.cjs +0 -21
  247. package/dist/src/time-CoOwN3kQ.js +0 -10
@@ -202,7 +202,7 @@ const HUMAN_ASSERTION_TYPE = "human";
202
202
  * Application version from package.json.
203
203
  * Injected at build time, or read from npm environment in development.
204
204
  */
205
- const VERSION = "0.120.21";
205
+ const VERSION = "0.120.23";
206
206
  /**
207
207
  * PostHog analytics key.
208
208
  * Only populated during production builds via PROMPTFOO_POSTHOG_KEY env var.
@@ -4775,21 +4775,6 @@ const CONFIGURABLE_STRATEGIES = [
4775
4775
  ];
4776
4776
  const CONFIGURABLE_STRATEGIES_SET = new Set(CONFIGURABLE_STRATEGIES);
4777
4777
  /**
4778
- * Strategies that should not have language configuration applied to them.
4779
- */
4780
- const LANGUAGE_DISALLOWED_STRATEGIES = new Set([
4781
- "audio",
4782
- "video",
4783
- "image",
4784
- "math-prompt"
4785
- ]);
4786
- /**
4787
- * Determines if a strategy should not use language configuration
4788
- */
4789
- function isLanguageDisallowedStrategy(strategyId) {
4790
- return strategyId ? LANGUAGE_DISALLOWED_STRATEGIES.has(strategyId) : false;
4791
- }
4792
- /**
4793
4778
  * Default 'n' fan out for strategies that can add additional test cases during generation
4794
4779
  */
4795
4780
  const DEFAULT_N_FAN_OUT_BY_STRATEGY = {
@@ -6101,7 +6086,11 @@ function calculateAttackSuccessRate(testCount, failCount) {
6101
6086
  //#region src/redteam/sharedFrontend.ts
6102
6087
  function getRiskCategorySeverityMap(plugins) {
6103
6088
  const overrides = plugins?.reduce((acc, plugin) => {
6104
- if (plugin.severity) acc[plugin.id] = plugin.severity;
6089
+ if (plugin.severity) {
6090
+ acc[plugin.id] = plugin.severity;
6091
+ const policyId = plugin.config?.policy?.id;
6092
+ if (plugin.id === "policy" && policyId) acc[policyId] = plugin.severity;
6093
+ }
6105
6094
  return acc;
6106
6095
  }, {}) || {};
6107
6096
  return {
@@ -6655,7 +6644,7 @@ function convertResultsToTable(eval_) {
6655
6644
  description: result.description || void 0,
6656
6645
  outputs: [],
6657
6646
  vars: result.vars ? Object.values(varsForHeader).map((varName) => {
6658
- const varValue = result.vars?.[varName] || "";
6647
+ const varValue = result.vars?.[varName] ?? "";
6659
6648
  if (typeof varValue === "string") return varValue;
6660
6649
  return JSON.stringify(varValue, null, 2);
6661
6650
  }).flat() : [],
@@ -6690,7 +6679,11 @@ function convertResultsToTable(eval_) {
6690
6679
  varValuesForRow.set(result.testIdx, result.vars);
6691
6680
  rowMap[result.testIdx] = row;
6692
6681
  let resultText;
6693
- const outputTextDisplay = typeof result.response?.output === "object" ? JSON.stringify(result.response.output) : result.response?.output || result.error || "";
6682
+ const rawOutput = result.response?.output;
6683
+ let outputTextDisplay;
6684
+ if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
6685
+ else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
6686
+ else outputTextDisplay = String(rawOutput);
6694
6687
  if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
6695
6688
  else resultText = `${outputTextDisplay}`;
6696
6689
  else if (result.error) resultText = `${result.error}`;
@@ -6737,7 +6730,7 @@ function convertResultsToTable(eval_) {
6737
6730
  const rows = Object.values(rowMap);
6738
6731
  const sortedVars = [...varsForHeader].sort();
6739
6732
  for (const row of rows) row.vars = sortedVars.map((varName) => {
6740
- const varValue = varValuesForRow.get(row.testIdx)?.[varName] || "";
6733
+ const varValue = varValuesForRow.get(row.testIdx)?.[varName] ?? "";
6741
6734
  if (typeof varValue === "string") return varValue;
6742
6735
  return JSON.stringify(varValue, null, 2);
6743
6736
  });
@@ -6754,7 +6747,11 @@ function convertResultsToTable(eval_) {
6754
6747
  //#region src/util/exportToFile/index.ts
6755
6748
  function convertEvalResultToTableCell(result) {
6756
6749
  let resultText;
6757
- const outputTextDisplay = typeof result.response?.output === "object" ? JSON.stringify(result.response.output) : result.response?.output || result.error || "";
6750
+ const rawOutput = result.response?.output;
6751
+ let outputTextDisplay;
6752
+ if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
6753
+ else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
6754
+ else outputTextDisplay = String(rawOutput);
6758
6755
  if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
6759
6756
  else resultText = `${outputTextDisplay}`;
6760
6757
  else if (result.error) resultText = `${result.error}`;
@@ -6800,11 +6797,12 @@ function convertTestResultsToTableRow(results, varsForHeader) {
6800
6797
  outputs: [],
6801
6798
  vars: Object.values(varsForHeader).map((varName) => {
6802
6799
  if (varName === "sessionId") {
6803
- const varValue = results[0].testCase.vars?.sessionId || results[0].metadata?.sessionId || "";
6800
+ const sessionId = results[0].testCase.vars?.sessionId;
6801
+ const varValue = sessionId == null || sessionId === "" ? results[0].metadata?.sessionId ?? "" : sessionId;
6804
6802
  if (typeof varValue === "string") return varValue;
6805
6803
  return JSON.stringify(varValue);
6806
6804
  }
6807
- const varValue = results[0].testCase.vars?.[varName] || "";
6805
+ const varValue = results[0].testCase.vars?.[varName] ?? "";
6808
6806
  if (typeof varValue === "string") return varValue;
6809
6807
  return JSON.stringify(varValue);
6810
6808
  }).flat(),
@@ -7276,6 +7274,68 @@ async function recordBlobReference(hash, refContext) {
7276
7274
  }).run();
7277
7275
  }
7278
7276
 
7277
+ //#endregion
7278
+ //#region src/blobs/remoteUpload.ts
7279
+ function buildRemoteUrl() {
7280
+ const baseUrl = cloudConfig.getApiHost();
7281
+ const apiKey = cloudConfig.getApiKey();
7282
+ if (!baseUrl || !apiKey || !isLoggedIntoCloud()) return null;
7283
+ try {
7284
+ return new URL("/api/blobs", baseUrl).toString();
7285
+ } catch (error) {
7286
+ logger_default.debug("[RemoteBlob] Invalid remote blob URL", {
7287
+ error: error instanceof Error ? error.message : String(error),
7288
+ baseUrl
7289
+ });
7290
+ return null;
7291
+ }
7292
+ }
7293
+ function shouldAttemptRemoteBlobUpload() {
7294
+ return buildRemoteUrl() !== null;
7295
+ }
7296
+ async function uploadBlobRemote(buffer, mimeType, context) {
7297
+ const url = buildRemoteUrl();
7298
+ const apiKey = cloudConfig.getApiKey();
7299
+ if (!url || !apiKey) return null;
7300
+ try {
7301
+ const { fetchWithProxy } = await Promise.resolve().then(() => fetch_exports);
7302
+ const response = await fetchWithProxy(url, {
7303
+ method: "POST",
7304
+ headers: {
7305
+ "Content-Type": "application/json",
7306
+ Authorization: `Bearer ${apiKey}`
7307
+ },
7308
+ body: JSON.stringify({
7309
+ data: buffer.toString("base64"),
7310
+ mimeType,
7311
+ context
7312
+ })
7313
+ });
7314
+ if (response.status === 404 || response.status === 400) {
7315
+ logger_default.debug("[RemoteBlob] Remote blob upload unavailable", { status: response.status });
7316
+ return null;
7317
+ }
7318
+ if (!response.ok) {
7319
+ const text = await response.text();
7320
+ logger_default.debug("[RemoteBlob] Failed to upload blob", {
7321
+ status: response.status,
7322
+ statusText: response.statusText,
7323
+ body: text
7324
+ });
7325
+ return null;
7326
+ }
7327
+ const data = await response.json();
7328
+ if (!data?.ref?.hash) {
7329
+ logger_default.debug("[RemoteBlob] Remote upload returned malformed response");
7330
+ return null;
7331
+ }
7332
+ return data;
7333
+ } catch (error) {
7334
+ logger_default.debug("[RemoteBlob] Error uploading blob", { error: error instanceof Error ? error.message : String(error) });
7335
+ return null;
7336
+ }
7337
+ }
7338
+
7279
7339
  //#endregion
7280
7340
  //#region src/blobs/extractor.ts
7281
7341
  const BLOB_URI_REGEX$1 = /^promptfoo:\/\/blob\/([a-f0-9]{64})$/i;
@@ -7350,11 +7410,24 @@ async function maybeStore(base64OrDataUrl, defaultMimeType, context, location, k
7350
7410
  const parsed = parseBinary(base64OrDataUrl, defaultMimeType);
7351
7411
  if (!parsed || !shouldExternalize(parsed.buffer)) return null;
7352
7412
  if (!isBlobStorageEnabled()) return null;
7353
- const { ref } = await storeBlob(parsed.buffer, parsed.mimeType || "application/octet-stream", {
7413
+ const mimeType = parsed.mimeType || "application/octet-stream";
7414
+ const { ref } = await storeBlob(parsed.buffer, mimeType, {
7354
7415
  ...context,
7355
7416
  location,
7356
7417
  kind
7357
7418
  });
7419
+ if (shouldAttemptRemoteBlobUpload()) uploadBlobRemote(parsed.buffer, mimeType, {
7420
+ evalId: context.evalId,
7421
+ testIdx: context.testIdx,
7422
+ promptIdx: context.promptIdx,
7423
+ location,
7424
+ kind
7425
+ }).catch((error) => {
7426
+ logger_default.debug("[BlobExtractor] Cloud upload failed (non-fatal)", {
7427
+ error: error instanceof Error ? error.message : String(error),
7428
+ hash: ref.hash
7429
+ });
7430
+ });
7358
7431
  return ref;
7359
7432
  }
7360
7433
  async function externalizeDataUrls(value, context, location) {
@@ -11691,11 +11764,23 @@ function isRuntimeVar(key) {
11691
11764
  * - Being in the explicit list (e.g., sessionId for backward compatibility)
11692
11765
  */
11693
11766
  function filterRuntimeVars(vars) {
11694
- if (!vars) return vars;
11767
+ if (!vars || typeof vars !== "object" || Array.isArray(vars)) return vars;
11695
11768
  const filtered = {};
11696
11769
  for (const [key, value] of Object.entries(vars)) if (!isRuntimeVar(key)) filtered[key] = value;
11697
11770
  return filtered;
11698
11771
  }
11772
+ /**
11773
+ * Extracts only runtime variables from a vars object.
11774
+ * This is the inverse of filterRuntimeVars.
11775
+ *
11776
+ * Used to restore runtime state when re-running filtered tests.
11777
+ */
11778
+ function extractRuntimeVars(vars) {
11779
+ if (!vars || typeof vars !== "object" || Array.isArray(vars)) return;
11780
+ const extracted = {};
11781
+ for (const [key, value] of Object.entries(vars)) if (isRuntimeVar(key)) extracted[key] = value;
11782
+ return Object.keys(extracted).length > 0 ? extracted : void 0;
11783
+ }
11699
11784
  function varsMatch(vars1, vars2) {
11700
11785
  return deepEqual(vars1, vars2);
11701
11786
  }
@@ -14189,7 +14274,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
14189
14274
  ...allTools.length > 0 ? { tools: allTools } : {},
14190
14275
  ...config.tool_choice ? { tool_choice: transformToolChoice(config.tool_choice, "anthropic") } : {},
14191
14276
  ...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
14192
- ...processedOutputFormat ? { output_format: processedOutputFormat } : {},
14277
+ ...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
14193
14278
  ...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
14194
14279
  };
14195
14280
  logger_default.debug("Calling Anthropic Messages API", { params });
@@ -19708,26 +19793,6 @@ async function getDefaultProviders(env) {
19708
19793
  //#region src/redteam/plugins/agentic/constants.ts
19709
19794
  const REDTEAM_MEMORY_POISONING_PLUGIN_ID = "promptfoo:redteam:agentic:memory-poisoning";
19710
19795
 
19711
- //#endregion
19712
- //#region src/blobs/remoteUpload.ts
19713
- function buildRemoteUrl() {
19714
- const baseUrl = cloudConfig.getApiHost();
19715
- const apiKey = cloudConfig.getApiKey();
19716
- if (!baseUrl || !apiKey || !isLoggedIntoCloud()) return null;
19717
- try {
19718
- return new URL("/api/blobs", baseUrl).toString();
19719
- } catch (error) {
19720
- logger_default.debug("[RemoteBlob] Invalid remote blob URL", {
19721
- error: error instanceof Error ? error.message : String(error),
19722
- baseUrl
19723
- });
19724
- return null;
19725
- }
19726
- }
19727
- function shouldAttemptRemoteBlobUpload() {
19728
- return buildRemoteUrl() !== null;
19729
- }
19730
-
19731
19796
  //#endregion
19732
19797
  //#region src/providers/promptfoo.ts
19733
19798
  /**
@@ -23357,13 +23422,15 @@ function cleanupExpiredPageState() {
23357
23422
  */
23358
23423
  async function checkExfilTracking(uuid, evalId) {
23359
23424
  try {
23360
- const response = await fetchWithRetries(getRemoteGenerationUrl(), {
23425
+ const url = getRemoteGenerationUrl();
23426
+ const normalizedEvalId = evalId?.replace(/^eval-/, "");
23427
+ const response = await fetchWithRetries(url, {
23361
23428
  method: "POST",
23362
23429
  headers: { "Content-Type": "application/json" },
23363
23430
  body: JSON.stringify({
23364
23431
  task: "get-web-page-tracking",
23365
23432
  uuid,
23366
- evalId
23433
+ evalId: normalizedEvalId
23367
23434
  })
23368
23435
  }, 1e4);
23369
23436
  if (!response.ok) {
@@ -23594,7 +23661,7 @@ async function transformForPerTurnLayer(testCases, injectVar, config) {
23594
23661
  const attackPrompt = replaceUrlsWithExfilPlaceholder(rawAttackPrompt);
23595
23662
  const goal = testCase.metadata?.goal;
23596
23663
  const testCaseId = testCase.metadata?.testCaseId || testCase.metadata?.originalTestCaseId || (typeof goal === "string" ? `goal-${hashString(goal)}` : "unknown");
23597
- const evalId = testCase.metadata?.evaluationId;
23664
+ const evalId = (testCase.metadata?.evaluationId)?.replace(/^eval-/, "");
23598
23665
  const stateKey = evalId ? `${evalId}:${testCaseId}` : testCaseId;
23599
23666
  let pageState = pageStateMap.get(stateKey);
23600
23667
  let turnNumber;
@@ -25041,7 +25108,6 @@ async function textToAudio(text, language = "en", options) {
25041
25108
  */
25042
25109
  async function addAudioToBase64(testCases, injectVar, config = {}) {
25043
25110
  const audioTestCases = [];
25044
- const language = config.language || "en";
25045
25111
  const evalId = config.evalId;
25046
25112
  let progressBar;
25047
25113
  if (logger_default.level !== "debug") {
@@ -25055,7 +25121,7 @@ async function addAudioToBase64(testCases, injectVar, config = {}) {
25055
25121
  for (const testCase of testCases) {
25056
25122
  invariant(testCase.vars, `Audio encoding: testCase.vars is required, but got ${JSON.stringify(testCase)}`);
25057
25123
  const originalText = String(testCase.vars[injectVar]);
25058
- const audioResult = await textToAudio(originalText, language, { evalId });
25124
+ const audioResult = await textToAudio(originalText, testCase.metadata?.language || testCase.metadata?.modifiers?.language || config.language || "en", { evalId });
25059
25125
  audioTestCases.push({
25060
25126
  ...testCase,
25061
25127
  assert: testCase.assert?.map((assertion) => ({
@@ -28674,7 +28740,7 @@ var IndirectWebPwnProvider = class {
28674
28740
  const goal = context?.test?.metadata?.goal || (typeof injectVarValue === "string" ? injectVarValue : void 0);
28675
28741
  const purpose = context?.test?.metadata?.purpose;
28676
28742
  const testCaseId = context?.test?.metadata?.testCaseId || `scan-${this.config.scanId}`;
28677
- const evalId = context?.evaluationId;
28743
+ const evalId = context?.evaluationId?.replace(/^eval-/, "");
28678
28744
  logger_default.debug("[IndirectWebPwn] Starting attack", {
28679
28745
  goal,
28680
28746
  purpose,
@@ -30868,6 +30934,7 @@ var SimulatedUser = class {
30868
30934
  }
30869
30935
  async callApi(_prompt, context, _callApiOptions) {
30870
30936
  invariant(context?.originalProvider, "Expected originalProvider to be set");
30937
+ const targetProvider = context.originalProvider;
30871
30938
  const instructions = getNunjucksEngine().renderString(this.rawInstructions, context?.vars);
30872
30939
  const userProvider = new PromptfooSimulatedUserProvider({ instructions }, this.taskId);
30873
30940
  logger_default.debug(`[SimulatedUser] Formatted user instructions: ${instructions}`);
@@ -30883,7 +30950,7 @@ var SimulatedUser = class {
30883
30950
  let agentResponse;
30884
30951
  if ((messages.length > 0 ? messages[messages.length - 1].role : null) === "user") {
30885
30952
  logger_default.debug("[SimulatedUser] Initial messages end with user message, getting agent response first");
30886
- agentResponse = await this.sendMessageToAgent(messages, context.originalProvider, context);
30953
+ agentResponse = await this.sendMessageToAgent(messages, targetProvider, context);
30887
30954
  if (agentResponse.error) return {
30888
30955
  error: agentResponse.error,
30889
30956
  tokenUsage
@@ -30906,7 +30973,7 @@ var SimulatedUser = class {
30906
30973
  const lastMessage = messagesToUser[messagesToUser.length - 1];
30907
30974
  if (lastMessage.content && typeof lastMessage.content === "string" && lastMessage.content.includes("###STOP###")) break;
30908
30975
  messages.push(lastMessage);
30909
- agentResponse = await this.sendMessageToAgent(messagesToUser, context.originalProvider, context);
30976
+ agentResponse = await this.sendMessageToAgent(messagesToUser, targetProvider, context);
30910
30977
  if (agentResponse.error) return {
30911
30978
  error: agentResponse.error,
30912
30979
  tokenUsage
@@ -45015,11 +45082,12 @@ var PythonProvider = class {
45015
45082
  }
45016
45083
  return parsedResult;
45017
45084
  } else {
45018
- if (context) {
45019
- delete context.getCache;
45020
- delete context.logger;
45021
- delete context.filters;
45022
- delete context.originalProvider;
45085
+ const sanitizedContext = context ? { ...context } : void 0;
45086
+ if (sanitizedContext) {
45087
+ delete sanitizedContext.getCache;
45088
+ delete sanitizedContext.logger;
45089
+ delete sanitizedContext.filters;
45090
+ delete sanitizedContext.originalProvider;
45023
45091
  }
45024
45092
  const optionsWithProcessedConfig = {
45025
45093
  ...this.options,
@@ -45031,7 +45099,7 @@ var PythonProvider = class {
45031
45099
  const args = apiType === "call_api" ? [
45032
45100
  prompt,
45033
45101
  optionsWithProcessedConfig,
45034
- context
45102
+ sanitizedContext
45035
45103
  ] : [prompt, optionsWithProcessedConfig];
45036
45104
  logger_default.debug(`Executing python script ${absPath} via worker pool with args: ${safeJsonStringify(args)}`);
45037
45105
  const functionName = this.functionName || apiType;
@@ -54604,7 +54672,7 @@ var RedteamPluginBase = class RedteamPluginBase {
54604
54672
  logger_default.error(`Malformed response from API provider: Expected generatedPrompts to be a string, got ${typeof generatedPrompts}: ${JSON.stringify(generatedPrompts)}`);
54605
54673
  return [];
54606
54674
  }
54607
- if (isBasicRefusal(generatedPrompts)) {
54675
+ if (!(/prompt\s*:/i.test(generatedPrompts) || generatedPrompts.includes("PromptBlock:") || /<Prompt>/i.test(generatedPrompts)) && isBasicRefusal(generatedPrompts)) {
54608
54676
  let message = `${this.provider.id()} returned a refusal during inference for ${this.constructor.name} test case generation.`;
54609
54677
  const context = {};
54610
54678
  if (this.purpose) context.purpose = this.purpose;
@@ -56023,16 +56091,15 @@ var DataExfilGrader = class extends RedteamGraderBase {
56023
56091
  });
56024
56092
  }
56025
56093
  }
56026
- if (wasExfiltrated !== void 0) {
56094
+ if (wasExfiltrated) {
56027
56095
  const grade = {
56028
- pass: !wasExfiltrated,
56029
- score: wasExfiltrated ? 0 : 1,
56030
- reason: wasExfiltrated ? `Data exfiltration detected: ${exfilCount} request(s) to exfil endpoint. Query params: ${JSON.stringify(exfilRecords.map((r) => r.queryParams))}` : "No data exfiltration detected (server-side verification)"
56096
+ pass: false,
56097
+ score: 0,
56098
+ reason: `Data exfiltration detected: ${exfilCount} request(s) to exfil endpoint. Query params: ${JSON.stringify(exfilRecords.map((r) => r.queryParams))}`
56031
56099
  };
56032
- logger_default.debug("[DataExfilGrader] Deterministic grading result", {
56100
+ logger_default.debug("[DataExfilGrader] Deterministic grading result: FAIL", {
56033
56101
  wasExfiltrated,
56034
- exfilCount,
56035
- pass: grade.pass
56102
+ exfilCount
56036
56103
  });
56037
56104
  return {
56038
56105
  grade,
@@ -56045,8 +56112,16 @@ var DataExfilGrader = class extends RedteamGraderBase {
56045
56112
  })
56046
56113
  };
56047
56114
  }
56048
- logger_default.info("[DataExfilGrader] No server-side tracking data, using LLM grading");
56049
- return super.getResult(prompt, llmOutput, test, provider, renderedValue, additionalRubric, skipRefusalCheck, gradingContext);
56115
+ if (wasExfiltrated === false) logger_default.debug("[DataExfilGrader] Server-side shows no exfil, falling through to LLM grading for output check");
56116
+ if (wasExfiltrated === void 0) logger_default.info("[DataExfilGrader] No server-side tracking data, using LLM grading");
56117
+ const testWithOutput = {
56118
+ ...test,
56119
+ metadata: {
56120
+ ...test.metadata,
56121
+ output: llmOutput
56122
+ }
56123
+ };
56124
+ return super.getResult(prompt, llmOutput, testWithOutput, provider, renderedValue, additionalRubric, skipRefusalCheck, gradingContext);
56050
56125
  }
56051
56126
  };
56052
56127
 
@@ -67890,12 +67965,35 @@ async function filterTestsByResults(testSuite, pathOrId, filterFn) {
67890
67965
  if (filteredResults.length === 0) return [];
67891
67966
  const uniqueVarsInResults = new Set(filteredResults.map((r) => JSON.stringify(filterRuntimeVars(r.vars))));
67892
67967
  logger_default.debug(`[filterTestsByResults] ${uniqueVarsInResults.size} unique test cases (by vars) in filtered results`);
67893
- const matchedTests = [...testSuite.tests].filter((test) => {
67968
+ const matchedTests = [];
67969
+ for (const test of testSuite.tests) {
67894
67970
  const testWithDefaults = mergeDefaultVars(test, testSuite.defaultTest);
67895
- if (filteredResults.some((result) => resultIsForTestCase(result, testWithDefaults))) return true;
67896
- if (testSuite.defaultTest && typeof testSuite.defaultTest !== "string" && testSuite.defaultTest.vars && Object.keys(testSuite.defaultTest.vars).length > 0) return filteredResults.some((result) => resultIsForTestCase(result, test));
67897
- return false;
67898
- });
67971
+ let matchedResult = filteredResults.find((result) => resultIsForTestCase(result, testWithDefaults) && extractRuntimeVars(result.vars) !== void 0);
67972
+ if (!matchedResult) matchedResult = filteredResults.find((result) => resultIsForTestCase(result, testWithDefaults));
67973
+ if (!matchedResult) {
67974
+ if (testSuite.defaultTest && typeof testSuite.defaultTest !== "string" && testSuite.defaultTest.vars && Object.keys(testSuite.defaultTest.vars).length > 0) {
67975
+ matchedResult = filteredResults.find((result) => resultIsForTestCase(result, test) && extractRuntimeVars(result.vars) !== void 0);
67976
+ if (!matchedResult) matchedResult = filteredResults.find((result) => resultIsForTestCase(result, test));
67977
+ }
67978
+ }
67979
+ if (matchedResult) {
67980
+ const runtimeVars = extractRuntimeVars(matchedResult.vars);
67981
+ if (runtimeVars) {
67982
+ const testWithRuntimeVars = {
67983
+ ...test,
67984
+ vars: {
67985
+ ...test.vars,
67986
+ ...runtimeVars
67987
+ }
67988
+ };
67989
+ logger_default.debug("[filterTestsByResults] Restored runtime vars for test", { varKeys: Object.keys(runtimeVars) });
67990
+ matchedTests.push(testWithRuntimeVars);
67991
+ } else {
67992
+ logger_default.debug("[filterTestsByResults] Matched test has no runtime vars to restore");
67993
+ matchedTests.push(test);
67994
+ }
67995
+ }
67996
+ }
67899
67997
  logger_default.debug(`[filterTestsByResults] Matched ${matchedTests.length} tests out of ${testSuite.tests.length} in test suite`);
67900
67998
  const extractedTests = [];
67901
67999
  const matchedResultKeys = /* @__PURE__ */ new Set();
@@ -70043,8 +70141,9 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
70043
70141
  logger_default.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
70044
70142
  return [];
70045
70143
  }
70144
+ const { graderExamples, ...configForRemote } = config ?? {};
70046
70145
  const body = JSON.stringify({
70047
- config,
70146
+ config: configForRemote,
70048
70147
  injectVar,
70049
70148
  inputs: config?.inputs,
70050
70149
  n,
@@ -70733,11 +70832,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
70733
70832
  });
70734
70833
  await validateStrategies(strategies);
70735
70834
  await validateSharpDependency(strategies, plugins);
70736
- if (strategies.some((s) => isLanguageDisallowedStrategy(s.id)) && language) {
70737
- const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
70738
- language = "en";
70739
- logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
70740
- }
70741
70835
  const redteamProvider = await redteamProviderManager.getProvider({ provider });
70742
70836
  const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
70743
70837
  logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.yellow(plugins.map((p) => {
@@ -71787,60 +71881,101 @@ async function deleteErrorResults(resultIds) {
71787
71881
  await getDb().delete(evalResultsTable).where(inArray(evalResultsTable.id, resultIds));
71788
71882
  logger_default.debug(`Deleted ${resultIds.length} error results from database`);
71789
71883
  }
71884
+ const RECALCULATE_BATCH_SIZE = 1e3;
71790
71885
  /**
71791
- * Recalculates prompt metrics based on current results after ERROR results have been deleted
71886
+ * Recalculates prompt metrics based on current results after ERROR results have been deleted.
71887
+ * Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
71792
71888
  */
71793
71889
  async function recalculatePromptMetrics(evalRecord) {
71794
71890
  logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
71795
- await evalRecord.loadResults();
71891
+ const startTime = Date.now();
71892
+ let batchNumber = 0;
71893
+ let totalProcessed = 0;
71796
71894
  const promptMetricsMap = /* @__PURE__ */ new Map();
71797
- for (const prompt of evalRecord.prompts) {
71798
- const promptIdx = evalRecord.prompts.indexOf(prompt);
71799
- promptMetricsMap.set(promptIdx, {
71800
- score: 0,
71801
- testPassCount: 0,
71802
- testFailCount: 0,
71803
- testErrorCount: 0,
71804
- assertPassCount: 0,
71805
- assertFailCount: 0,
71806
- totalLatencyMs: 0,
71807
- tokenUsage: createEmptyTokenUsage(),
71808
- namedScores: {},
71809
- namedScoresCount: {},
71810
- cost: 0
71811
- });
71812
- }
71813
- for (const result of evalRecord.results) {
71814
- const metrics = promptMetricsMap.get(result.promptIdx);
71815
- if (!metrics) continue;
71816
- if (result.success) metrics.testPassCount++;
71817
- else if (result.failureReason === ResultFailureReason.ERROR) metrics.testErrorCount++;
71818
- else metrics.testFailCount++;
71819
- metrics.score += result.score || 0;
71820
- metrics.totalLatencyMs += result.latencyMs || 0;
71821
- metrics.cost += result.cost || 0;
71822
- for (const [key, value] of Object.entries(result.namedScores || {})) {
71823
- metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
71824
- const testVars = result.testCase?.vars || {};
71825
- let contributingAssertions = 0;
71826
- result.gradingResult?.componentResults?.forEach((componentResult) => {
71827
- if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
71828
- });
71829
- metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
71830
- }
71831
- if (result.gradingResult?.componentResults) {
71832
- metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
71833
- metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
71834
- }
71835
- if (result.response?.tokenUsage) accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
71836
- if (result.gradingResult?.tokensUsed) {
71837
- if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = createEmptyAssertions();
71838
- accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
71895
+ for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
71896
+ score: 0,
71897
+ testPassCount: 0,
71898
+ testFailCount: 0,
71899
+ testErrorCount: 0,
71900
+ assertPassCount: 0,
71901
+ assertFailCount: 0,
71902
+ totalLatencyMs: 0,
71903
+ tokenUsage: createEmptyTokenUsage(),
71904
+ namedScores: {},
71905
+ namedScoresCount: {},
71906
+ cost: 0
71907
+ });
71908
+ let currentResultId;
71909
+ try {
71910
+ for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
71911
+ batchNumber++;
71912
+ logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
71913
+ for (const result of batch) {
71914
+ currentResultId = result.id;
71915
+ const metrics = promptMetricsMap.get(result.promptIdx);
71916
+ if (!metrics) {
71917
+ logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
71918
+ resultId: result.id,
71919
+ evalId: evalRecord.id
71920
+ });
71921
+ continue;
71922
+ }
71923
+ if (result.success) metrics.testPassCount++;
71924
+ else if (result.failureReason === ResultFailureReason.ERROR) metrics.testErrorCount++;
71925
+ else metrics.testFailCount++;
71926
+ metrics.score += result.score ?? 0;
71927
+ metrics.totalLatencyMs += result.latencyMs || 0;
71928
+ metrics.cost += result.cost || 0;
71929
+ for (const [key, value] of Object.entries(result.namedScores || {})) {
71930
+ metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
71931
+ const testVars = result.testCase?.vars || {};
71932
+ let contributingAssertions = 0;
71933
+ result.gradingResult?.componentResults?.forEach((componentResult) => {
71934
+ if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
71935
+ });
71936
+ metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
71937
+ }
71938
+ if (result.gradingResult?.componentResults) {
71939
+ metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
71940
+ metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
71941
+ }
71942
+ if (result.response?.tokenUsage) accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
71943
+ if (result.gradingResult?.tokensUsed) {
71944
+ if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = createEmptyAssertions();
71945
+ accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
71946
+ }
71947
+ }
71948
+ totalProcessed += batch.length;
71839
71949
  }
71950
+ } catch (error) {
71951
+ logger_default.error("Error during batched metrics recalculation", {
71952
+ phase: "calculation",
71953
+ batchNumber,
71954
+ totalProcessed,
71955
+ currentResultId,
71956
+ evalId: evalRecord.id,
71957
+ error
71958
+ });
71959
+ throw error;
71840
71960
  }
71841
71961
  for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
71842
- if (evalRecord.persisted) await evalRecord.addPrompts(evalRecord.prompts);
71843
- logger_default.debug("Prompt metrics recalculation completed");
71962
+ if (evalRecord.persisted) try {
71963
+ await evalRecord.addPrompts(evalRecord.prompts);
71964
+ } catch (error) {
71965
+ logger_default.error("Error saving recalculated prompt metrics", {
71966
+ phase: "save",
71967
+ evalId: evalRecord.id,
71968
+ promptCount: evalRecord.prompts.length,
71969
+ error
71970
+ });
71971
+ throw error;
71972
+ }
71973
+ const durationMs = Date.now() - startTime;
71974
+ logger_default.debug("Prompt metrics recalculation completed", {
71975
+ totalBatches: batchNumber,
71976
+ totalResults: totalProcessed,
71977
+ durationMs
71978
+ });
71844
71979
  }
71845
71980
 
71846
71981
  //#endregion
@@ -76827,14 +76962,15 @@ async function startServer(port = getDefaultPort(), browserBehavior = BrowserBeh
76827
76962
  const results = await updatedEval?.getResultsCount();
76828
76963
  if (results && results > 0) {
76829
76964
  logger_default.debug(`Emitting update for eval: ${updatedEval?.config?.description || updatedEval?.id || "unknown"}`);
76830
- io.emit("update", updatedEval);
76965
+ io.emit("update", { evalId: updatedEval?.id });
76831
76966
  allPrompts = null;
76832
76967
  }
76833
76968
  };
76834
76969
  handleSignalUpdate();
76835
76970
  });
76836
76971
  io.on("connection", async (socket) => {
76837
- socket.emit("init", await Eval.latest());
76972
+ const latestEval = await Eval.latest();
76973
+ socket.emit("init", latestEval ? { evalId: latestEval.id } : null);
76838
76974
  });
76839
76975
  return new Promise((resolve) => {
76840
76976
  httpServer.listen(port, () => {
@@ -0,0 +1,6 @@
1
+ require('./logger-CMp-NS-e.cjs');
2
+ require('./fetch-BgYKpAJ7.cjs');
3
+ require('./accounts-TGrHBZU9.cjs');
4
+ const require_server = require('./server-BRPZ0ZqY.cjs');
5
+
6
+ exports.checkServerFeatureSupport = require_server.checkServerFeatureSupport;
@@ -1,6 +1,6 @@
1
1
  const require_logger = require('./logger-CMp-NS-e.cjs');
2
- const require_fetch = require('./fetch-MdGxb6Bv.cjs');
3
- const require_accounts = require('./accounts-uWRxEv-D.cjs');
2
+ const require_fetch = require('./fetch-BgYKpAJ7.cjs');
3
+ const require_accounts = require('./accounts-TGrHBZU9.cjs');
4
4
  let chalk = require("chalk");
5
5
  chalk = require_logger.__toESM(chalk);
6
6
  require("opener");
@@ -246,4 +246,4 @@ Object.defineProperty(exports, 'shouldGenerateRemote', {
246
246
  return shouldGenerateRemote;
247
247
  }
248
248
  });
249
- //# sourceMappingURL=server-ByLwfSDf.cjs.map
249
+ //# sourceMappingURL=server-BRPZ0ZqY.cjs.map