promptfoo 0.120.21 → 0.120.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/dist/src/{ListApp-ChupNNB3.js → ListApp-55OAHww3.js} +1 -1
  2. package/dist/src/{accounts-CjU7QSnv.js → accounts-BMHY0nF5.js} +5 -5
  3. package/dist/src/{accounts-DYMHC4iY.js → accounts-CYqHD7X1.js} +4 -4
  4. package/dist/src/{accounts-uWRxEv-D.cjs → accounts-TGrHBZU9.cjs} +4 -4
  5. package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BcSgvBPs.js} +2 -2
  6. package/dist/src/{agents-BJxm0Kcs.cjs → agents-BW4zv9Ig.cjs} +3 -3
  7. package/dist/src/{agents-DABC7ROH.js → agents-BbG-lpzK.js} +6 -6
  8. package/dist/src/{agents-DGQULdoA.js → agents-CUB87CA8.js} +5 -5
  9. package/dist/src/{agents-CJ4BY_QU.js → agents-ClpNGuau.js} +7 -7
  10. package/dist/src/{agents-CzCD5974.cjs → agents-d76o9WLl.cjs} +6 -6
  11. package/dist/src/{agents-BNSS1kSZ.js → agents-v7_Jh18K.js} +3 -3
  12. package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-BBN3jHiV.js} +11 -11
  13. package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-DWjl0adX.cjs} +11 -11
  14. package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-E-FZoqH0.js} +14 -14
  15. package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
  16. package/dist/src/app/assets/{index-BVE859O5.js → index-DoWPf01I.js} +71 -71
  17. package/dist/src/app/index.html +2 -2
  18. package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
  19. package/dist/src/{audio-BT9aSeWg.js → audio-B0lJd7ce.js} +4 -4
  20. package/dist/src/{audio-CSrI_LFf.js → audio-CwbC4E6I.js} +3 -3
  21. package/dist/src/{audio-DSV364HB.cjs → audio-TfpCt0uC.cjs} +3 -3
  22. package/dist/src/{base-NFXflrMy.js → base-BHLggB84.js} +2 -2
  23. package/dist/src/{base-CRrQ15eu.cjs → base-B_pJcCDc.cjs} +2 -2
  24. package/dist/src/{base-d8hHSpEx.js → base-DRgXuujd.js} +2 -2
  25. package/dist/src/{blobs-Dznj-8xW.js → blobs-BHstP0Ep.js} +2 -2
  26. package/dist/src/{blobs-D-Neij7m.cjs → blobs-BPAqMoU_.cjs} +2 -2
  27. package/dist/src/{blobs-DC2sBLv8.js → blobs-Bey6TGVi.js} +2 -2
  28. package/dist/src/{cache-xb4oZe27.js → cache-BTdTl56j.js} +2 -2
  29. package/dist/src/{cache-DYJRe0Jt.cjs → cache-BnrNlhzE.cjs} +2 -2
  30. package/dist/src/{cache-aKR8pp5y.js → cache-C5LeIjZl.js} +2 -2
  31. package/dist/src/{cache-CMvgAnSi.js → cache-CsW_9RgN.js} +2 -2
  32. package/dist/src/cache-Da_mTEGH.cjs +6 -0
  33. package/dist/src/{cache-scuP1IRq.js → cache-DfI0xG_n.js} +3 -3
  34. package/dist/src/{chat-BbgGuvwu.js → chat-BIj6_WPH.js} +10 -10
  35. package/dist/src/{chat-CfJU9hHo.cjs → chat-B_-w6xgb.cjs} +10 -10
  36. package/dist/src/{chat-Du_GyqZe.js → chat-BktepvuQ.js} +13 -13
  37. package/dist/src/{chat-DyjTngG2.js → chat-CiakYWhZ.js} +10 -10
  38. package/dist/src/{chat-BH8Fxyap.js → chat-D5l2blZK.js} +8 -8
  39. package/dist/src/{chat-Biz16BQv.cjs → chat-QHQ-K-Fi.cjs} +8 -8
  40. package/dist/src/{chatkit-BpumZ2Dz.js → chatkit-DlOZ-eNb.js} +3 -3
  41. package/dist/src/{chatkit-D9AWcI4P.js → chatkit-IWIuhxFE.js} +3 -3
  42. package/dist/src/{chatkit-Wzo9zOq7.cjs → chatkit-yeVtURUi.cjs} +3 -3
  43. package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-BZ0GjlPY.js} +11 -11
  44. package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-D3AZJyZ7.cjs} +8 -8
  45. package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-wvsRgO29.js} +8 -8
  46. package/dist/src/{cloud-UwfmA0Ne.js → cloud-BKfo1A2t.js} +2 -2
  47. package/dist/src/{cloud-1cbcFXE9.js → cloud-DPowlnHl.js} +1 -1
  48. package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CzAtX1Wg.cjs} +11 -11
  49. package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-D-IEjw5k.js} +11 -11
  50. package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DxHmIJbZ.js} +14 -14
  51. package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-CIfquW2o.cjs} +12 -12
  52. package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-DkjC8LZF.js} +12 -12
  53. package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-DwJVgNWf.js} +15 -15
  54. package/dist/src/{codex-sdk-CKEQpQax.js → codex-sdk-BC5qbCjh.js} +3 -3
  55. package/dist/src/{codex-sdk-CDN8f3P9.js → codex-sdk-CBpm53B8.js} +2 -2
  56. package/dist/src/{codex-sdk-0QyM_fUf.cjs → codex-sdk-Cf5_CQch.cjs} +2 -2
  57. package/dist/src/{cometapi-CUq-lrPh.js → cometapi-6jLBOp4e.js} +15 -15
  58. package/dist/src/{cometapi-Bu_LOIac.js → cometapi-8HSWR5q_.js} +12 -12
  59. package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-iJEkEgpT.cjs} +12 -12
  60. package/dist/src/{completion-Dw-9jqz1.cjs → completion-C03fIkcR.cjs} +5 -5
  61. package/dist/src/{completion-H4ncr2bK.js → completion-CrKHW9hy.js} +5 -5
  62. package/dist/src/{completion-BTZ42lqL.js → completion-LTi0SiAy.js} +5 -5
  63. package/dist/src/{docker-kvFgJqkg.js → docker-Bf2VQOX5.js} +14 -14
  64. package/dist/src/{docker-NrVUpeWw.cjs → docker-BxmP6sGO.cjs} +11 -11
  65. package/dist/src/{docker-B8zxQ12-.js → docker-DHazKolQ.js} +11 -11
  66. package/dist/src/{esm-Bwuh5HXW.js → esm-Lh6czAFT.js} +1 -1
  67. package/dist/src/{eval-TAOD_pvN.js → eval-4sIXLVZZ.js} +31 -19
  68. package/dist/src/eval-s_E4q6a8.js +16 -0
  69. package/dist/src/{evalResult-DnYa1RLp.js → evalResult-CRyeHpst.js} +6 -6
  70. package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CSucevrC.cjs} +7 -7
  71. package/dist/src/evalResult-DBhbtqZn.js +11 -0
  72. package/dist/src/evalResult-DKn_nY-a.js +13 -0
  73. package/dist/src/{evalResult-CnAP64o9.js → evalResult-DNXvhaKp.js} +6 -6
  74. package/dist/src/evalResult-DrPpo-zg.cjs +11 -0
  75. package/dist/src/{evaluator-q2_--POi.js → evaluator-BDY-i4ky.js} +29 -30
  76. package/dist/src/evaluator-CiDt-6H3.js +37 -0
  77. package/dist/src/{extractor-D_1s-Thr.js → extractor-HnP0pHfO.js} +81 -4
  78. package/dist/src/{extractor-B9uQA-6-.js → extractor-WvMabjRY.js} +81 -4
  79. package/dist/src/{extractor-DjxI7POm.cjs → extractor-wSeR47-c.cjs} +86 -3
  80. package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-8fmzVUH3.js} +2 -2
  81. package/dist/src/{fetch-BIPrzD39.js → fetch-BMKSarK_.js} +11 -5
  82. package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-BgYKpAJ7.cjs} +25 -7
  83. package/dist/src/{fetch-Cbz8ESXD.js → fetch-CE0K47nJ.js} +1 -1
  84. package/dist/src/{fetch-TcCC0nEJ.js → fetch-DKQm7iwv.js} +11 -5
  85. package/dist/src/fetch-GeRV0LDI.cjs +4 -0
  86. package/dist/src/{fileExtensions-BpuMmaFL.js → fileExtensions-CzcPkiKZ.js} +1 -1
  87. package/dist/src/{formatDuration-CGa0ZDQ8.js → formatDuration-CCiZ-wBY.js} +1 -1
  88. package/dist/src/{genaiTracer-zK0Mtd_D.cjs → genaiTracer-BQ8AmajD.cjs} +1 -1
  89. package/dist/src/{genaiTracer-CNawybn-.js → genaiTracer-Ce19n68P.js} +1 -1
  90. package/dist/src/{genaiTracer-CqNnnXrE.js → genaiTracer-foKtbVa2.js} +1 -1
  91. package/dist/src/{graders-DycNkUZ_.js → graders-CEF_ce6x.js} +32 -26
  92. package/dist/src/graders-CzoftmVg.js +35 -0
  93. package/dist/src/{graders-C9Ni3pNQ.cjs → graders-DOFEkq6l.cjs} +34 -28
  94. package/dist/src/graders-DuTWgLQF.cjs +32 -0
  95. package/dist/src/graders-ZhEpUtuK.js +33 -0
  96. package/dist/src/{graders-CG50NgEB.js → graders-jFPLMexK.js} +36 -30
  97. package/dist/src/{image-CxD58J63.js → image-0RV3qqTK.js} +6 -6
  98. package/dist/src/{image-CHGpqpC6.js → image-9Thqn5aj.js} +3 -3
  99. package/dist/src/{image-BmMQZgoF.js → image-BJJW-zOa.js} +4 -4
  100. package/dist/src/{image-CPL1masV.cjs → image-BVyYHTQG.cjs} +3 -3
  101. package/dist/src/{image-Dg-oq_2F.js → image-Dd0EQRYU.js} +6 -6
  102. package/dist/src/{image-CpQ-fJt3.cjs → image-VWd0wZmz.cjs} +6 -6
  103. package/dist/src/index.cjs +177 -105
  104. package/dist/src/index.js +173 -101
  105. package/dist/src/{interactiveCheck-BZbMvE6i.js → interactiveCheck-CjK2R4rB.js} +1 -1
  106. package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BKA28wUv.cjs} +7 -7
  107. package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-D0nOGqRF.js} +7 -7
  108. package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase-aBIYgE4B.js} +8 -8
  109. package/dist/src/{litellm-Dcc3mt1B.js → litellm-BiXCNyII.js} +11 -11
  110. package/dist/src/{litellm-CR8i8YRn.js → litellm-QSopB7EE.js} +14 -14
  111. package/dist/src/{litellm-QthJT0fA.cjs → litellm-vZYPcEUQ.cjs} +11 -11
  112. package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-CRKJx6ue.cjs} +10 -11
  113. package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DmnPkNon.js} +10 -11
  114. package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-Dvnqlb9n.js} +9 -10
  115. package/dist/src/main.js +166 -106
  116. package/dist/src/{messages-DkV1dizA.js → messages-BaxI3gI5.js} +10 -10
  117. package/dist/src/{messages-D4Cu-Nxe.cjs → messages-CvFzyCty.cjs} +10 -10
  118. package/dist/src/{messages-9ik4NuJ_.js → messages-DT1yyWfS.js} +10 -10
  119. package/dist/src/{meteor-DLCdreag.js → meteor-0cLf9TIn.js} +1 -1
  120. package/dist/src/{meteor-Dj8cTkU_.js → meteor-CBciquOS.js} +1 -1
  121. package/dist/src/{meteor-odmwVbyG.cjs → meteor-DRuEfzuQ.cjs} +1 -1
  122. package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-CnN4D-bY.js} +10 -11
  123. package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-XAkfcmhf.cjs} +10 -11
  124. package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-YEEqZu4z.js} +9 -10
  125. package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-Ci-VR-K3.js} +6 -6
  126. package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-Cjfvm57T.js} +7 -7
  127. package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-brE_2yrZ.cjs} +6 -6
  128. package/dist/src/{openai-eZz8QsxY.cjs → openai-BjpdxIOG.cjs} +1 -1
  129. package/dist/src/{openai-CQozD_Ef.js → openai-CitF-gEN.js} +1 -1
  130. package/dist/src/{openai-D_5h8sBa.js → openai-Cv9pEKxp.js} +1 -1
  131. package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BkXJb52a.js} +4 -4
  132. package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-CrlvqSy2.cjs} +4 -4
  133. package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-DRMuqbLx.js} +6 -6
  134. package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-BCKiiwct.js} +5 -5
  135. package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-BTVVyjnA.js} +4 -4
  136. package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-DXqKPf2U.cjs} +4 -4
  137. package/dist/src/{providerRegistry-5UB8YVkk.js → providerRegistry-BdKWcUa8.js} +1 -1
  138. package/dist/src/{providerRegistry-gU7gOAJI.js → providerRegistry-D32Lt9vp.js} +1 -1
  139. package/dist/src/{providerRegistry-DFXamjVO.cjs → providerRegistry-HGQd2MF6.cjs} +1 -1
  140. package/dist/src/{providers-Bz0U4NGd.cjs → providers-BF4aullZ.cjs} +114 -132
  141. package/dist/src/providers-BmFckJq8.cjs +31 -0
  142. package/dist/src/{providers-CHciOV4f.js → providers-BtbT8bDb.js} +77 -95
  143. package/dist/src/{providers-D8qKkVu_.js → providers-CYssBaa_.js} +82 -100
  144. package/dist/src/providers-DaDZ1iQw.js +31 -0
  145. package/dist/src/providers-Ds-h-S1b.js +33 -0
  146. package/dist/src/{pythonUtils-mprm8p4h.js → pythonUtils-B9JA-gsC.js} +2 -2
  147. package/dist/src/{quiverai-DnlaJG8H.js → quiverai-BiDa0kiF.js} +10 -10
  148. package/dist/src/{quiverai-3rtm1glP.js → quiverai-DTtLknat.js} +13 -13
  149. package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-z8oPMSUZ.cjs} +10 -10
  150. package/dist/src/{render-D5t2ogwP.js → render-D2710HbA.js} +2 -2
  151. package/dist/src/rubyUtils-BLd6EE1u.cjs +5 -0
  152. package/dist/src/{rubyUtils-D1DBBgSr.js → rubyUtils-CVELPvUH.js} +2 -2
  153. package/dist/src/{rubyUtils-Cus5EGPX.js → rubyUtils-Dn6MGcsk.js} +2 -2
  154. package/dist/src/{rubyUtils-CI4Y5f08.js → rubyUtils-LBsk3zIm.js} +1 -1
  155. package/dist/src/{rubyUtils-CWkecmDB.js → rubyUtils-_t9Gmf7U.js} +1 -1
  156. package/dist/src/{rubyUtils-DCci_Y0f.cjs → rubyUtils-rnCVDgH-.cjs} +1 -1
  157. package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-D5f6K309.js} +11 -11
  158. package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DwJOnUlQ.cjs} +11 -11
  159. package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-dygdRScC.js} +17 -17
  160. package/dist/src/{scanner-CPosIITM.js → scanner-D__U7qyj.js} +9 -9
  161. package/dist/src/server/index.js +265 -129
  162. package/dist/src/server-B0dYFkhY.cjs +6 -0
  163. package/dist/src/{server-ByLwfSDf.cjs → server-BRPZ0ZqY.cjs} +3 -3
  164. package/dist/src/{server-Bb83s34G.js → server-BZJ_IEiK.js} +4 -4
  165. package/dist/src/{server-BKdsd70B.js → server-Bo3rtriL.js} +3 -3
  166. package/dist/src/server-DZYvmL1z.js +6 -0
  167. package/dist/src/server-DlTe_Yz5.js +8 -0
  168. package/dist/src/{signal-B8hvvb8w.js → signal-C9qT0qiq.js} +2 -2
  169. package/dist/src/{slack-BblBpsIz.cjs → slack-D1F9Y7CH.cjs} +1 -1
  170. package/dist/src/{slack-D-bNCtTa.js → slack-DkAF58Tr.js} +1 -1
  171. package/dist/src/{slack-Ba3gil_6.js → slack-Ed1yyt_j.js} +1 -1
  172. package/dist/src/{store-W4yZqv_e.js → store-BpNCvdGu.js} +2 -2
  173. package/dist/src/store-BvASVHpa.cjs +6 -0
  174. package/dist/src/{store-l9-mIRC-.cjs → store-BwsF10Db.cjs} +2 -2
  175. package/dist/src/store-CDxVyNyD.js +6 -0
  176. package/dist/src/store-CXxKOQ8V.js +7 -0
  177. package/dist/src/{store-CaNRmI5T.js → store-Dm8qSYzq.js} +2 -2
  178. package/dist/src/{tables-DZWWveaJ.js → tables-B0wvxP8N.js} +2 -2
  179. package/dist/src/{tables-BCcp2r48.js → tables-CMOPSC5_.js} +2 -2
  180. package/dist/src/{tables-D_VmN-ui.cjs → tables-D_nKFrHt.cjs} +2 -2
  181. package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-B9G9VzVT.cjs} +3 -3
  182. package/dist/src/telemetry-CGO4vCCz.js +8 -0
  183. package/dist/src/telemetry-CXarb10F.cjs +6 -0
  184. package/dist/src/telemetry-DKtQFHz8.js +6 -0
  185. package/dist/src/{telemetry-CmGyDaxF.js → telemetry-DMGVtU7z.js} +3 -3
  186. package/dist/src/{telemetry-CB8ltkOD.js → telemetry-oV0dEJ2S.js} +3 -3
  187. package/dist/src/{text-C1cXk9ij.js → text-DuYSUYPB.js} +1 -1
  188. package/dist/src/{text-Dm78AVGG.js → text-Dx0GJOCN.js} +1 -1
  189. package/dist/src/{text-DF2hMKdg.cjs → text-PYISqVm1.cjs} +1 -1
  190. package/dist/src/{tokenUsageUtils-DFp2VFtG.cjs → tokenUsageUtils-BtZd3sP7.cjs} +1 -1
  191. package/dist/src/{tokenUsageUtils-C9fIWtvn.js → tokenUsageUtils-DoinwgKF.js} +1 -1
  192. package/dist/src/{tokenUsageUtils-BQVp2uKF.js → tokenUsageUtils-cFdLMERB.js} +1 -1
  193. package/dist/src/{transcription-BExRps8_.js → transcription-C5sJed5S.js} +7 -7
  194. package/dist/src/{transcription-B4GbFkhb.cjs → transcription-DEiYnjqG.cjs} +7 -7
  195. package/dist/src/{transcription-CpuRKcIu.js → transcription-Dcv2GYgI.js} +10 -10
  196. package/dist/src/{transform-BzepVlkX.js → transform-BFPYuBaW.js} +4 -4
  197. package/dist/src/{transform-BwedQA8W.js → transform-Cpk3BVib.js} +3 -3
  198. package/dist/src/{transform-B-AYIQmM.js → transform-DWBbTh_M.js} +3 -3
  199. package/dist/src/{transform-1g7fEAge.js → transform-DZI2t8-9.js} +3 -3
  200. package/dist/src/{transform-B5HUD86U.cjs → transform-DeVdMzaA.cjs} +3 -3
  201. package/dist/src/{transformersAvailability-B5gZ2aG-.cjs → transformersAvailability-BaoWHpu1.cjs} +1 -1
  202. package/dist/src/{transformersAvailability-BHP0aLcv.js → transformersAvailability-DtpwoeFC.js} +1 -1
  203. package/dist/src/{transformersAvailability-CA173HU7.js → transformersAvailability-O2YaCv9Z.js} +1 -1
  204. package/dist/src/{types-n3ttAnOa.js → types-BZz0C6rq.js} +3 -18
  205. package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
  206. package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
  207. package/dist/src/{util-Cd2vI7ii.js → util-BR59kcWy.js} +3 -3
  208. package/dist/src/{util-BnF-xRPi.js → util-BjTDLTac.js} +3 -3
  209. package/dist/src/{util-CwsTKPXi.js → util-CSvDts2m.js} +3 -3
  210. package/dist/src/{util-KE6uFWHG.cjs → util-CfLcBZEA.cjs} +3 -3
  211. package/dist/src/{util-BXjq8vOq.js → util-Czrn_1en.js} +3 -3
  212. package/dist/src/{util-BT0p5c9X.cjs → util-Dxwavk84.cjs} +3 -3
  213. package/dist/src/{util-DX58t30T.js → util-GpiGNWSj.js} +17 -5
  214. package/dist/src/{util-B-Yo0qzc.cjs → util-RfCZM4TD.cjs} +22 -4
  215. package/dist/src/{util-CyzW5ayt.js → util-UxC1pDDt.js} +20 -8
  216. package/dist/src/{utils-HdRSruG0.js → utils-2CdNTp-a.js} +2 -2
  217. package/dist/src/{utils-BSpcJB4B.cjs → utils-6lWwNARy.cjs} +2 -2
  218. package/dist/src/{utils-BWOFfOel.js → utils-BMIgqY6C.js} +2 -2
  219. package/dist/tsconfig.tsbuildinfo +1 -1
  220. package/package.json +31 -29
  221. package/dist/src/app/assets/index-CFq6nz43.css +0 -1
  222. package/dist/src/cache-BZYapLG-.cjs +0 -6
  223. package/dist/src/eval-g5SH3NBa.js +0 -16
  224. package/dist/src/evalResult-Bt2fmj0V.js +0 -9
  225. package/dist/src/evalResult-DRNH7Jui.js +0 -10
  226. package/dist/src/evalResult-DqLWyktN.cjs +0 -9
  227. package/dist/src/evaluator-DIenqH86.js +0 -37
  228. package/dist/src/fetch-gmneHrJc.cjs +0 -4
  229. package/dist/src/graders-BHcgLz2P.js +0 -35
  230. package/dist/src/graders-BjBhwJOb.cjs +0 -32
  231. package/dist/src/graders-DEjGX1uY.js +0 -33
  232. package/dist/src/providers-BBGwMMzj.js +0 -33
  233. package/dist/src/providers-BR5P_qep.cjs +0 -31
  234. package/dist/src/providers-CDib9hVp.js +0 -31
  235. package/dist/src/rubyUtils-DZo4ik6p.cjs +0 -5
  236. package/dist/src/server-B5aIhzYk.cjs +0 -6
  237. package/dist/src/server-DJCw85s0.js +0 -8
  238. package/dist/src/server-DKj0QIm9.js +0 -6
  239. package/dist/src/store-Bo9FEBPO.js +0 -7
  240. package/dist/src/store-CqHttBql.js +0 -6
  241. package/dist/src/store-fsxOJl5O.cjs +0 -6
  242. package/dist/src/telemetry-B2uJOvMA.js +0 -8
  243. package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
  244. package/dist/src/telemetry-rP_n_bnZ.js +0 -6
  245. package/dist/src/time-CRRzn6vA.js +0 -9
  246. package/dist/src/time-CbtsO5_a.cjs +0 -21
  247. package/dist/src/time-CoOwN3kQ.js +0 -10
@@ -1,9 +1,9 @@
1
1
  const require_logger = require('./logger-CMp-NS-e.cjs');
2
- const require_fetch = require('./fetch-MdGxb6Bv.cjs');
3
- const require_cache = require('./cache-DYJRe0Jt.cjs');
4
- const require_openai = require('./openai-eZz8QsxY.cjs');
5
- const require_util = require('./util-BT0p5c9X.cjs');
6
- const require_text = require('./text-DF2hMKdg.cjs');
2
+ const require_fetch = require('./fetch-BgYKpAJ7.cjs');
3
+ const require_cache = require('./cache-BnrNlhzE.cjs');
4
+ const require_openai = require('./openai-BjpdxIOG.cjs');
5
+ const require_util = require('./util-Dxwavk84.cjs');
6
+ const require_text = require('./text-PYISqVm1.cjs');
7
7
 
8
8
  //#region src/providers/openai/image.ts
9
9
  const DALLE2_VALID_SIZES = [
@@ -232,4 +232,4 @@ Object.defineProperty(exports, 'formatOutput', {
232
232
  return formatOutput;
233
233
  }
234
234
  });
235
- //# sourceMappingURL=image-CpQ-fJt3.cjs.map
235
+ //# sourceMappingURL=image-VWd0wZmz.cjs.map
@@ -5,36 +5,35 @@ const require_esm = require('./esm-gWVPXn3r.cjs');
5
5
  const require_pythonUtils = require('./pythonUtils-rOCm9w_5.cjs');
6
6
  const require_fileExtensions = require('./fileExtensions-DkJYkWUy.cjs');
7
7
  const require_transform = require('./transform-DshYLyBq.cjs');
8
- const require_graders = require('./graders-C9Ni3pNQ.cjs');
9
- const require_types = require('./types-DAmPdu4_.cjs');
10
- const require_util = require('./util-B-Yo0qzc.cjs');
11
- const require_fetch = require('./fetch-MdGxb6Bv.cjs');
12
- const require_time = require('./time-CbtsO5_a.cjs');
13
- const require_cache = require('./cache-DYJRe0Jt.cjs');
14
- const require_providers = require('./providers-Bz0U4NGd.cjs');
15
- const require_utils = require('./utils-BSpcJB4B.cjs');
16
- const require_extractor = require('./extractor-DjxI7POm.cjs');
17
- require('./genaiTracer-zK0Mtd_D.cjs');
18
- const require_chat = require('./chat-Biz16BQv.cjs');
19
- const require_tokenUsageUtils = require('./tokenUsageUtils-DFp2VFtG.cjs');
20
- const require_transform$1 = require('./transform-B5HUD86U.cjs');
21
- require('./messages-D4Cu-Nxe.cjs');
22
- require('./util-KE6uFWHG.cjs');
23
- require('./openai-eZz8QsxY.cjs');
24
- const require_util$2 = require('./util-BT0p5c9X.cjs');
25
- require('./completion-Dw-9jqz1.cjs');
26
- const require_accounts = require('./accounts-uWRxEv-D.cjs');
27
- const require_server = require('./server-ByLwfSDf.cjs');
28
- const require_blobs = require('./blobs-D-Neij7m.cjs');
29
- const require_tables = require('./tables-D_VmN-ui.cjs');
30
- const require_telemetry = require('./telemetry-DrSTNTH-.cjs');
31
- const require_text = require('./text-DF2hMKdg.cjs');
32
- const require_store = require('./store-l9-mIRC-.cjs');
33
- require('./base-CRrQ15eu.cjs');
34
- require('./image-CpQ-fJt3.cjs');
35
- const require_providerRegistry = require('./providerRegistry-DFXamjVO.cjs');
36
- const require_rubyUtils = require('./rubyUtils-DCci_Y0f.cjs');
37
- const require_evalResult = require('./evalResult-CFSvp3Zh.cjs');
8
+ const require_graders = require('./graders-DOFEkq6l.cjs');
9
+ const require_types = require('./types-Cixl-wRO.cjs');
10
+ const require_util = require('./util-RfCZM4TD.cjs');
11
+ const require_fetch = require('./fetch-BgYKpAJ7.cjs');
12
+ const require_cache = require('./cache-BnrNlhzE.cjs');
13
+ const require_providers = require('./providers-BF4aullZ.cjs');
14
+ const require_utils = require('./utils-6lWwNARy.cjs');
15
+ const require_extractor = require('./extractor-wSeR47-c.cjs');
16
+ require('./genaiTracer-BQ8AmajD.cjs');
17
+ const require_chat = require('./chat-QHQ-K-Fi.cjs');
18
+ const require_tokenUsageUtils = require('./tokenUsageUtils-BtZd3sP7.cjs');
19
+ const require_transform$1 = require('./transform-DeVdMzaA.cjs');
20
+ require('./messages-CvFzyCty.cjs');
21
+ require('./util-CfLcBZEA.cjs');
22
+ require('./openai-BjpdxIOG.cjs');
23
+ const require_util$2 = require('./util-Dxwavk84.cjs');
24
+ require('./completion-C03fIkcR.cjs');
25
+ const require_accounts = require('./accounts-TGrHBZU9.cjs');
26
+ const require_server = require('./server-BRPZ0ZqY.cjs');
27
+ const require_blobs = require('./blobs-BPAqMoU_.cjs');
28
+ const require_tables = require('./tables-D_nKFrHt.cjs');
29
+ const require_telemetry = require('./telemetry-B9G9VzVT.cjs');
30
+ const require_text = require('./text-PYISqVm1.cjs');
31
+ const require_store = require('./store-BwsF10Db.cjs');
32
+ require('./base-B_pJcCDc.cjs');
33
+ require('./image-VWd0wZmz.cjs');
34
+ const require_providerRegistry = require('./providerRegistry-HGQd2MF6.cjs');
35
+ const require_rubyUtils = require('./rubyUtils-rnCVDgH-.cjs');
36
+ const require_evalResult = require('./evalResult-CSucevrC.cjs');
38
37
  let fs = require("fs");
39
38
  fs = require_logger.__toESM(fs);
40
39
  let path = require("path");
@@ -307,7 +306,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
307
306
  require_telemetry.telemetry_default.record("feature_used", { feature: "tracing" });
308
307
  try {
309
308
  require_logger.logger_default.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
310
- const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-COmdVimi.cjs"));
309
+ const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-DXqKPf2U.cjs"));
311
310
  const port = testSuite.tracing.otlp.http.port || 4318;
312
311
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
313
312
  require_logger.logger_default.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
@@ -330,7 +329,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
330
329
  async function stopOtlpReceiverIfNeeded() {
331
330
  if (otlpReceiverStarted) try {
332
331
  require_logger.logger_default.debug("[EvaluatorTracing] Stopping OTLP receiver");
333
- const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-COmdVimi.cjs"));
332
+ const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-DXqKPf2U.cjs"));
334
333
  await stopOTLPReceiver();
335
334
  otlpReceiverStarted = false;
336
335
  require_logger.logger_default.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -365,7 +364,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
365
364
  }
366
365
  if (!tracingEnabled) return null;
367
366
  require_logger.logger_default.debug("[EvaluatorTracing] Importing trace store");
368
- const { getTraceStore } = await Promise.resolve().then(() => require("./store-fsxOJl5O.cjs"));
367
+ const { getTraceStore } = await Promise.resolve().then(() => require("./store-BvASVHpa.cjs"));
369
368
  const traceStore = getTraceStore();
370
369
  const traceId = generateTraceId();
371
370
  const spanId = generateSpanId();
@@ -2776,7 +2775,7 @@ const ASSERTION_HANDLERS = {
2776
2775
  "llm-rubric": handleLlmRubric,
2777
2776
  meteor: async (params) => {
2778
2777
  try {
2779
- const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-odmwVbyG.cjs"));
2778
+ const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-DRuEfzuQ.cjs"));
2780
2779
  return handleMeteorAssertion(params);
2781
2780
  } catch (error) {
2782
2781
  if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
@@ -2906,7 +2905,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
2906
2905
  };
2907
2906
  }
2908
2907
  else if (filePath.endsWith(".rb")) try {
2909
- const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-DZo4ik6p.cjs"));
2908
+ const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-BLd6EE1u.cjs"));
2910
2909
  valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
2911
2910
  require_logger.logger_default.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
2912
2911
  } catch (error) {
@@ -3810,7 +3809,7 @@ async function runEval({ provider, prompt, test, testSuite, delay, nunjucksFilte
3810
3809
  require_logger.logger_default.debug(`Evaluator checking cached flag: response.cached = ${Boolean(response.cached)}, provider.delay = ${provider.delay}`);
3811
3810
  if (!response.cached && provider.delay > 0) {
3812
3811
  require_logger.logger_default.debug(`Sleeping for ${provider.delay}ms`);
3813
- await require_time.sleep(provider.delay);
3812
+ await require_fetch.sleep(provider.delay);
3814
3813
  } else if (response.cached) require_logger.logger_default.debug(`Skipping delay because response is cached`);
3815
3814
  const ret = {
3816
3815
  ...setup,
@@ -4266,7 +4265,7 @@ var Evaluator = class {
4266
4265
  const defaultProvider = testSuite.defaultTest.provider;
4267
4266
  if (require_types.isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
4268
4267
  else if (typeof defaultProvider === "object" && defaultProvider.id) {
4269
- const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BR5P_qep.cjs"));
4268
+ const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BmFckJq8.cjs"));
4270
4269
  testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
4271
4270
  } else testCase.provider = defaultProvider;
4272
4271
  }
@@ -4341,7 +4340,7 @@ var Evaluator = class {
4341
4340
  if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
4342
4341
  }
4343
4342
  if (require_logger.cliState_default.resume && this.evalRecord.persisted) try {
4344
- const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-DqLWyktN.cjs"));
4343
+ const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-DrPpo-zg.cjs"));
4345
4344
  const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: require_logger.cliState_default.retryMode });
4346
4345
  const originalCount = runEvalOptions.length;
4347
4346
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -4857,7 +4856,7 @@ var Evaluator = class {
4857
4856
  }
4858
4857
  if (isOtlpReceiverStarted()) {
4859
4858
  require_logger.logger_default.debug("[Evaluator] Waiting for span exports to complete...");
4860
- await require_time.sleep(3e3);
4859
+ await require_fetch.sleep(3e3);
4861
4860
  }
4862
4861
  await stopOtlpReceiverIfNeeded();
4863
4862
  await require_providerRegistry.providerRegistry.shutdownAll();
@@ -4984,7 +4983,11 @@ async function runDbMigrations() {
4984
4983
  //#region src/redteam/sharedFrontend.ts
4985
4984
  function getRiskCategorySeverityMap(plugins) {
4986
4985
  const overrides = plugins?.reduce((acc, plugin) => {
4987
- if (plugin.severity) acc[plugin.id] = plugin.severity;
4986
+ if (plugin.severity) {
4987
+ acc[plugin.id] = plugin.severity;
4988
+ const policyId = plugin.config?.policy?.id;
4989
+ if (plugin.id === "policy" && policyId) acc[policyId] = plugin.severity;
4990
+ }
4988
4991
  return acc;
4989
4992
  }, {}) || {};
4990
4993
  return {
@@ -5273,7 +5276,7 @@ function convertResultsToTable(eval_) {
5273
5276
  description: result.description || void 0,
5274
5277
  outputs: [],
5275
5278
  vars: result.vars ? Object.values(varsForHeader).map((varName) => {
5276
- const varValue = result.vars?.[varName] || "";
5279
+ const varValue = result.vars?.[varName] ?? "";
5277
5280
  if (typeof varValue === "string") return varValue;
5278
5281
  return JSON.stringify(varValue, null, 2);
5279
5282
  }).flat() : [],
@@ -5308,7 +5311,11 @@ function convertResultsToTable(eval_) {
5308
5311
  varValuesForRow.set(result.testIdx, result.vars);
5309
5312
  rowMap[result.testIdx] = row;
5310
5313
  let resultText;
5311
- const outputTextDisplay = typeof result.response?.output === "object" ? JSON.stringify(result.response.output) : result.response?.output || result.error || "";
5314
+ const rawOutput = result.response?.output;
5315
+ let outputTextDisplay;
5316
+ if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
5317
+ else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
5318
+ else outputTextDisplay = String(rawOutput);
5312
5319
  if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
5313
5320
  else resultText = `${outputTextDisplay}`;
5314
5321
  else if (result.error) resultText = `${result.error}`;
@@ -5355,7 +5362,7 @@ function convertResultsToTable(eval_) {
5355
5362
  const rows = Object.values(rowMap);
5356
5363
  const sortedVars = [...varsForHeader].sort();
5357
5364
  for (const row of rows) row.vars = sortedVars.map((varName) => {
5358
- const varValue = varValuesForRow.get(row.testIdx)?.[varName] || "";
5365
+ const varValue = varValuesForRow.get(row.testIdx)?.[varName] ?? "";
5359
5366
  if (typeof varValue === "string") return varValue;
5360
5367
  return JSON.stringify(varValue, null, 2);
5361
5368
  });
@@ -5372,7 +5379,11 @@ function convertResultsToTable(eval_) {
5372
5379
  //#region src/util/exportToFile/index.ts
5373
5380
  function convertEvalResultToTableCell(result) {
5374
5381
  let resultText;
5375
- const outputTextDisplay = typeof result.response?.output === "object" ? JSON.stringify(result.response.output) : result.response?.output || result.error || "";
5382
+ const rawOutput = result.response?.output;
5383
+ let outputTextDisplay;
5384
+ if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
5385
+ else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
5386
+ else outputTextDisplay = String(rawOutput);
5376
5387
  if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
5377
5388
  else resultText = `${outputTextDisplay}`;
5378
5389
  else if (result.error) resultText = `${result.error}`;
@@ -5418,11 +5429,12 @@ function convertTestResultsToTableRow(results, varsForHeader) {
5418
5429
  outputs: [],
5419
5430
  vars: Object.values(varsForHeader).map((varName) => {
5420
5431
  if (varName === "sessionId") {
5421
- const varValue = results[0].testCase.vars?.sessionId || results[0].metadata?.sessionId || "";
5432
+ const sessionId = results[0].testCase.vars?.sessionId;
5433
+ const varValue = sessionId == null || sessionId === "" ? results[0].metadata?.sessionId ?? "" : sessionId;
5422
5434
  if (typeof varValue === "string") return varValue;
5423
5435
  return JSON.stringify(varValue);
5424
5436
  }
5425
- const varValue = results[0].testCase.vars?.[varName] || "";
5437
+ const varValue = results[0].testCase.vars?.[varName] ?? "";
5426
5438
  if (typeof varValue === "string") return varValue;
5427
5439
  return JSON.stringify(varValue);
5428
5440
  }).flat(),
@@ -5879,7 +5891,7 @@ var Eval = class Eval {
5879
5891
  prompts: this.prompts,
5880
5892
  description: this.config.description,
5881
5893
  author: this.author,
5882
- updatedAt: require_time.getCurrentTimestamp(),
5894
+ updatedAt: require_fetch.getCurrentTimestamp(),
5883
5895
  vars: Array.from(this.vars),
5884
5896
  runtimeOptions: sanitizeRuntimeOptions(this.runtimeOptions)
5885
5897
  };
@@ -6596,12 +6608,35 @@ async function filterTestsByResults(testSuite, pathOrId, filterFn) {
6596
6608
  if (filteredResults.length === 0) return [];
6597
6609
  const uniqueVarsInResults = new Set(filteredResults.map((r) => JSON.stringify(require_util.filterRuntimeVars(r.vars))));
6598
6610
  require_logger.logger_default.debug(`[filterTestsByResults] ${uniqueVarsInResults.size} unique test cases (by vars) in filtered results`);
6599
- const matchedTests = [...testSuite.tests].filter((test) => {
6611
+ const matchedTests = [];
6612
+ for (const test of testSuite.tests) {
6600
6613
  const testWithDefaults = mergeDefaultVars(test, testSuite.defaultTest);
6601
- if (filteredResults.some((result) => require_util.resultIsForTestCase(result, testWithDefaults))) return true;
6602
- if (testSuite.defaultTest && typeof testSuite.defaultTest !== "string" && testSuite.defaultTest.vars && Object.keys(testSuite.defaultTest.vars).length > 0) return filteredResults.some((result) => require_util.resultIsForTestCase(result, test));
6603
- return false;
6604
- });
6614
+ let matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, testWithDefaults) && require_util.extractRuntimeVars(result.vars) !== void 0);
6615
+ if (!matchedResult) matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, testWithDefaults));
6616
+ if (!matchedResult) {
6617
+ if (testSuite.defaultTest && typeof testSuite.defaultTest !== "string" && testSuite.defaultTest.vars && Object.keys(testSuite.defaultTest.vars).length > 0) {
6618
+ matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, test) && require_util.extractRuntimeVars(result.vars) !== void 0);
6619
+ if (!matchedResult) matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, test));
6620
+ }
6621
+ }
6622
+ if (matchedResult) {
6623
+ const runtimeVars = require_util.extractRuntimeVars(matchedResult.vars);
6624
+ if (runtimeVars) {
6625
+ const testWithRuntimeVars = {
6626
+ ...test,
6627
+ vars: {
6628
+ ...test.vars,
6629
+ ...runtimeVars
6630
+ }
6631
+ };
6632
+ require_logger.logger_default.debug("[filterTestsByResults] Restored runtime vars for test", { varKeys: Object.keys(runtimeVars) });
6633
+ matchedTests.push(testWithRuntimeVars);
6634
+ } else {
6635
+ require_logger.logger_default.debug("[filterTestsByResults] Matched test has no runtime vars to restore");
6636
+ matchedTests.push(test);
6637
+ }
6638
+ }
6639
+ }
6605
6640
  require_logger.logger_default.debug(`[filterTestsByResults] Matched ${matchedTests.length} tests out of ${testSuite.tests.length} in test suite`);
6606
6641
  const extractedTests = [];
6607
6642
  const matchedResultKeys = /* @__PURE__ */ new Set();
@@ -8651,7 +8686,7 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
8651
8686
  const generatePrompts = async () => {
8652
8687
  const result = await unalignedProvider.callApi("");
8653
8688
  if (result.output) {
8654
- if (delayMs > 0) await require_time.sleep(delayMs);
8689
+ if (delayMs > 0) await require_fetch.sleep(delayMs);
8655
8690
  return result.output;
8656
8691
  }
8657
8692
  return [];
@@ -8816,8 +8851,9 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
8816
8851
  require_logger.logger_default.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
8817
8852
  return [];
8818
8853
  }
8854
+ const { graderExamples, ...configForRemote } = config ?? {};
8819
8855
  const body = JSON.stringify({
8820
- config,
8856
+ config: configForRemote,
8821
8857
  injectVar,
8822
8858
  inputs: config?.inputs,
8823
8859
  n,
@@ -9506,11 +9542,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
9506
9542
  });
9507
9543
  await require_providers.validateStrategies(strategies);
9508
9544
  await validateSharpDependency(strategies, plugins);
9509
- if (strategies.some((s) => require_types.isLanguageDisallowedStrategy(s.id)) && language) {
9510
- const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
9511
- language = "en";
9512
- require_logger.logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
9513
- }
9514
9545
  const redteamProvider = await require_providers.redteamProviderManager.getProvider({ provider });
9515
9546
  const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
9516
9547
  require_logger.logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.default.yellow(plugins.map((p) => {
@@ -10995,60 +11026,101 @@ async function deleteErrorResults(resultIds) {
10995
11026
  await require_tables.getDb().delete(require_tables.evalResultsTable).where((0, drizzle_orm.inArray)(require_tables.evalResultsTable.id, resultIds));
10996
11027
  require_logger.logger_default.debug(`Deleted ${resultIds.length} error results from database`);
10997
11028
  }
11029
+ const RECALCULATE_BATCH_SIZE = 1e3;
10998
11030
  /**
10999
- * Recalculates prompt metrics based on current results after ERROR results have been deleted
11031
+ * Recalculates prompt metrics based on current results after ERROR results have been deleted.
11032
+ * Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
11000
11033
  */
11001
11034
  async function recalculatePromptMetrics(evalRecord) {
11002
11035
  require_logger.logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
11003
- await evalRecord.loadResults();
11036
+ const startTime = Date.now();
11037
+ let batchNumber = 0;
11038
+ let totalProcessed = 0;
11004
11039
  const promptMetricsMap = /* @__PURE__ */ new Map();
11005
- for (const prompt of evalRecord.prompts) {
11006
- const promptIdx = evalRecord.prompts.indexOf(prompt);
11007
- promptMetricsMap.set(promptIdx, {
11008
- score: 0,
11009
- testPassCount: 0,
11010
- testFailCount: 0,
11011
- testErrorCount: 0,
11012
- assertPassCount: 0,
11013
- assertFailCount: 0,
11014
- totalLatencyMs: 0,
11015
- tokenUsage: require_tokenUsageUtils.createEmptyTokenUsage(),
11016
- namedScores: {},
11017
- namedScoresCount: {},
11018
- cost: 0
11019
- });
11020
- }
11021
- for (const result of evalRecord.results) {
11022
- const metrics = promptMetricsMap.get(result.promptIdx);
11023
- if (!metrics) continue;
11024
- if (result.success) metrics.testPassCount++;
11025
- else if (result.failureReason === require_types.ResultFailureReason.ERROR) metrics.testErrorCount++;
11026
- else metrics.testFailCount++;
11027
- metrics.score += result.score || 0;
11028
- metrics.totalLatencyMs += result.latencyMs || 0;
11029
- metrics.cost += result.cost || 0;
11030
- for (const [key, value] of Object.entries(result.namedScores || {})) {
11031
- metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
11032
- const testVars = result.testCase?.vars || {};
11033
- let contributingAssertions = 0;
11034
- result.gradingResult?.componentResults?.forEach((componentResult) => {
11035
- if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
11036
- });
11037
- metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
11038
- }
11039
- if (result.gradingResult?.componentResults) {
11040
- metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
11041
- metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
11042
- }
11043
- if (result.response?.tokenUsage) require_tokenUsageUtils.accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
11044
- if (result.gradingResult?.tokensUsed) {
11045
- if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = require_tokenUsageUtils.createEmptyAssertions();
11046
- require_tokenUsageUtils.accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
11040
+ for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
11041
+ score: 0,
11042
+ testPassCount: 0,
11043
+ testFailCount: 0,
11044
+ testErrorCount: 0,
11045
+ assertPassCount: 0,
11046
+ assertFailCount: 0,
11047
+ totalLatencyMs: 0,
11048
+ tokenUsage: require_tokenUsageUtils.createEmptyTokenUsage(),
11049
+ namedScores: {},
11050
+ namedScoresCount: {},
11051
+ cost: 0
11052
+ });
11053
+ let currentResultId;
11054
+ try {
11055
+ for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
11056
+ batchNumber++;
11057
+ require_logger.logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
11058
+ for (const result of batch) {
11059
+ currentResultId = result.id;
11060
+ const metrics = promptMetricsMap.get(result.promptIdx);
11061
+ if (!metrics) {
11062
+ require_logger.logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
11063
+ resultId: result.id,
11064
+ evalId: evalRecord.id
11065
+ });
11066
+ continue;
11067
+ }
11068
+ if (result.success) metrics.testPassCount++;
11069
+ else if (result.failureReason === require_types.ResultFailureReason.ERROR) metrics.testErrorCount++;
11070
+ else metrics.testFailCount++;
11071
+ metrics.score += result.score ?? 0;
11072
+ metrics.totalLatencyMs += result.latencyMs || 0;
11073
+ metrics.cost += result.cost || 0;
11074
+ for (const [key, value] of Object.entries(result.namedScores || {})) {
11075
+ metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
11076
+ const testVars = result.testCase?.vars || {};
11077
+ let contributingAssertions = 0;
11078
+ result.gradingResult?.componentResults?.forEach((componentResult) => {
11079
+ if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
11080
+ });
11081
+ metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
11082
+ }
11083
+ if (result.gradingResult?.componentResults) {
11084
+ metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
11085
+ metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
11086
+ }
11087
+ if (result.response?.tokenUsage) require_tokenUsageUtils.accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
11088
+ if (result.gradingResult?.tokensUsed) {
11089
+ if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = require_tokenUsageUtils.createEmptyAssertions();
11090
+ require_tokenUsageUtils.accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
11091
+ }
11092
+ }
11093
+ totalProcessed += batch.length;
11047
11094
  }
11095
+ } catch (error) {
11096
+ require_logger.logger_default.error("Error during batched metrics recalculation", {
11097
+ phase: "calculation",
11098
+ batchNumber,
11099
+ totalProcessed,
11100
+ currentResultId,
11101
+ evalId: evalRecord.id,
11102
+ error
11103
+ });
11104
+ throw error;
11048
11105
  }
11049
11106
  for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
11050
- if (evalRecord.persisted) await evalRecord.addPrompts(evalRecord.prompts);
11051
- require_logger.logger_default.debug("Prompt metrics recalculation completed");
11107
+ if (evalRecord.persisted) try {
11108
+ await evalRecord.addPrompts(evalRecord.prompts);
11109
+ } catch (error) {
11110
+ require_logger.logger_default.error("Error saving recalculated prompt metrics", {
11111
+ phase: "save",
11112
+ evalId: evalRecord.id,
11113
+ promptCount: evalRecord.prompts.length,
11114
+ error
11115
+ });
11116
+ throw error;
11117
+ }
11118
+ const durationMs = Date.now() - startTime;
11119
+ require_logger.logger_default.debug("Prompt metrics recalculation completed", {
11120
+ totalBatches: batchNumber,
11121
+ totalResults: totalProcessed,
11122
+ durationMs
11123
+ });
11052
11124
  }
11053
11125
 
11054
11126
  //#endregion