promptfoo 0.120.21 → 0.120.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/dist/src/{accounts-uWRxEv-D.cjs → accounts-BGa3oLcq.cjs} +4 -4
  2. package/dist/src/{accounts-DYMHC4iY.js → accounts-BR9WiyfR.js} +4 -4
  3. package/dist/src/{accounts-CjU7QSnv.js → accounts-Ckp-8szu.js} +5 -5
  4. package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BE3QlHKy.js} +2 -2
  5. package/dist/src/{agents-CzCD5974.cjs → agents-BhbWppc2.cjs} +6 -6
  6. package/dist/src/{agents-DGQULdoA.js → agents-BtgWQypw.js} +3 -3
  7. package/dist/src/{agents-CJ4BY_QU.js → agents-BxHVTa7v.js} +7 -7
  8. package/dist/src/{agents-DABC7ROH.js → agents-CMv_PdKa.js} +6 -6
  9. package/dist/src/{agents-BNSS1kSZ.js → agents-CTESpLg_.js} +2 -2
  10. package/dist/src/{agents-BJxm0Kcs.cjs → agents-CnjTsnFv.cjs} +2 -2
  11. package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-BDfWTIuB.js} +10 -10
  12. package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-B_JaNK7C.cjs} +9 -9
  13. package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-DzmOPVaB.js} +9 -9
  14. package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
  15. package/dist/src/app/assets/{index-BVE859O5.js → index-gBlK8Ooo.js} +74 -74
  16. package/dist/src/app/index.html +2 -2
  17. package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
  18. package/dist/src/{audio-BT9aSeWg.js → audio-CLv1koN0.js} +4 -4
  19. package/dist/src/{audio-CSrI_LFf.js → audio-DCfFE8hp.js} +3 -3
  20. package/dist/src/{audio-DSV364HB.cjs → audio-UH7JqpeT.cjs} +3 -3
  21. package/dist/src/{base-d8hHSpEx.js → base-BMtzQZDN.js} +2 -2
  22. package/dist/src/{base-NFXflrMy.js → base-DGXF05vt.js} +2 -2
  23. package/dist/src/{base-CRrQ15eu.cjs → base-oCiJ630X.cjs} +2 -2
  24. package/dist/src/{blobs-Dznj-8xW.js → blobs-CNXTPz_3.js} +2 -2
  25. package/dist/src/{blobs-DC2sBLv8.js → blobs-eYQ1PJ-z.js} +2 -2
  26. package/dist/src/{blobs-D-Neij7m.cjs → blobs-h2nW1G8J.cjs} +2 -2
  27. package/dist/src/cache-BbrUrAzE.cjs +6 -0
  28. package/dist/src/{cache-aKR8pp5y.js → cache-CG_VUeq6.js} +2 -2
  29. package/dist/src/{cache-CMvgAnSi.js → cache-CkRahBA6.js} +2 -2
  30. package/dist/src/{cache-scuP1IRq.js → cache-CuMs-Ct_.js} +3 -3
  31. package/dist/src/{cache-DYJRe0Jt.cjs → cache-PWdK7nbt.cjs} +2 -2
  32. package/dist/src/{cache-xb4oZe27.js → cache-_erPRF2X.js} +2 -2
  33. package/dist/src/{chat-DyjTngG2.js → chat-4dCIDs0X.js} +8 -8
  34. package/dist/src/{chat-BbgGuvwu.js → chat-Bvvgb30H.js} +6 -6
  35. package/dist/src/{chat-CfJU9hHo.cjs → chat-CPZqlTJM.cjs} +8 -8
  36. package/dist/src/{chat-BH8Fxyap.js → chat-Cz8uiybK.js} +6 -6
  37. package/dist/src/{chat-Biz16BQv.cjs → chat-DKNIBnnw.cjs} +6 -6
  38. package/dist/src/{chat-Du_GyqZe.js → chat-Noh-ZAEA.js} +9 -9
  39. package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-C_1rEeQS.js} +8 -8
  40. package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-Cf9ilftQ.cjs} +8 -8
  41. package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-Duik96Pj.js} +9 -9
  42. package/dist/src/{cloud-UwfmA0Ne.js → cloud-CZHXB86j.js} +2 -2
  43. package/dist/src/{cloud-1cbcFXE9.js → cloud-Ckel3stH.js} +1 -1
  44. package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-8EaAKyYe.js} +9 -9
  45. package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CCHnFeAX.cjs} +9 -9
  46. package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DM055RzH.js} +10 -10
  47. package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-BMUSnpcR.js} +11 -11
  48. package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-Bcw05eOu.js} +10 -10
  49. package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-DNxNiVNW.cjs} +10 -10
  50. package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-Bc95q8BF.cjs} +10 -10
  51. package/dist/src/{cometapi-CUq-lrPh.js → cometapi-CHlpzlEb.js} +11 -11
  52. package/dist/src/{cometapi-Bu_LOIac.js → cometapi-DWXoEmta.js} +10 -10
  53. package/dist/src/{completion-BTZ42lqL.js → completion-2IaHHyEe.js} +4 -4
  54. package/dist/src/{completion-H4ncr2bK.js → completion-D25ktKRj.js} +4 -4
  55. package/dist/src/{completion-Dw-9jqz1.cjs → completion-Xj-aJd1b.cjs} +4 -4
  56. package/dist/src/{docker-kvFgJqkg.js → docker-CJbB16zr.js} +10 -10
  57. package/dist/src/{docker-B8zxQ12-.js → docker-Cc26CxVP.js} +9 -9
  58. package/dist/src/{docker-NrVUpeWw.cjs → docker-jL9w9fA6.cjs} +9 -9
  59. package/dist/src/eval-D4RlVjjL.js +16 -0
  60. package/dist/src/{eval-TAOD_pvN.js → eval-nWuy1AG2.js} +25 -16
  61. package/dist/src/evalResult-BeU2oOzP.cjs +9 -0
  62. package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CPVLU2Np.cjs} +5 -5
  63. package/dist/src/evalResult-Cf2g8jAB.js +10 -0
  64. package/dist/src/{evalResult-CnAP64o9.js → evalResult-ChJlD0Kq.js} +5 -5
  65. package/dist/src/{evalResult-DnYa1RLp.js → evalResult-DIIXLdjz.js} +5 -5
  66. package/dist/src/evalResult-XDigzbtJ.js +9 -0
  67. package/dist/src/evaluator-8HYi1Fph.js +37 -0
  68. package/dist/src/{evaluator-q2_--POi.js → evaluator-GYXFbA_Y.js} +21 -21
  69. package/dist/src/{extractor-DjxI7POm.cjs → extractor-6TFoLBb3.cjs} +2 -2
  70. package/dist/src/{extractor-D_1s-Thr.js → extractor-BEtCUN8X.js} +2 -2
  71. package/dist/src/{extractor-B9uQA-6-.js → extractor-CjCWhyEf.js} +2 -2
  72. package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-24MJ7lmX.js} +2 -2
  73. package/dist/src/fetch-BNRxuf6y.cjs +4 -0
  74. package/dist/src/{fetch-BIPrzD39.js → fetch-Cn7G4hKv.js} +3 -3
  75. package/dist/src/{fetch-TcCC0nEJ.js → fetch-CruohpfF.js} +3 -3
  76. package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-E3xbPP9w.cjs} +3 -3
  77. package/dist/src/{fetch-Cbz8ESXD.js → fetch-YrlVkoyU.js} +1 -1
  78. package/dist/src/{graders-C9Ni3pNQ.cjs → graders-B6ThhLLt.cjs} +14 -14
  79. package/dist/src/{graders-CG50NgEB.js → graders-DcCxY5P5.js} +14 -14
  80. package/dist/src/{graders-DycNkUZ_.js → graders-DmOCxaH5.js} +14 -14
  81. package/dist/src/graders-R44KsWy_.js +35 -0
  82. package/dist/src/graders-femSFbBL.js +33 -0
  83. package/dist/src/graders-q--EyLqs.cjs +32 -0
  84. package/dist/src/{image-CPL1masV.cjs → image-C2ceOyBf.cjs} +3 -3
  85. package/dist/src/{image-CxD58J63.js → image-CKdRHN-J.js} +4 -4
  86. package/dist/src/{image-Dg-oq_2F.js → image-CQjDlt1F.js} +4 -4
  87. package/dist/src/{image-CpQ-fJt3.cjs → image-CVIQmm2j.cjs} +4 -4
  88. package/dist/src/{image-BmMQZgoF.js → image-KltLhC3X.js} +4 -4
  89. package/dist/src/{image-CHGpqpC6.js → image-KxTS2kF1.js} +3 -3
  90. package/dist/src/index.cjs +130 -85
  91. package/dist/src/index.js +130 -85
  92. package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase--WN82LQi.js} +7 -7
  93. package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BqWUVrpf.cjs} +6 -6
  94. package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-Cj6dsMqo.js} +6 -6
  95. package/dist/src/{litellm-CR8i8YRn.js → litellm-Cr4cLT5U.js} +10 -10
  96. package/dist/src/{litellm-Dcc3mt1B.js → litellm-LBVMdo0D.js} +9 -9
  97. package/dist/src/{litellm-QthJT0fA.cjs → litellm-fFlC5dSL.cjs} +9 -9
  98. package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DIn2tYU5.js} +9 -9
  99. package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-DX0-i4oF.js} +8 -8
  100. package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-Di9Jozxk.cjs} +8 -8
  101. package/dist/src/main.js +122 -85
  102. package/dist/src/{messages-DkV1dizA.js → messages-CrKRl5Pk.js} +8 -8
  103. package/dist/src/{messages-D4Cu-Nxe.cjs → messages-DR9VV_jC.cjs} +8 -8
  104. package/dist/src/{messages-9ik4NuJ_.js → messages-HEhixC8X.js} +8 -8
  105. package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-B0gtacpW.js} +8 -8
  106. package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-Bq0thILx.js} +9 -9
  107. package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-DGvKrmSN.cjs} +8 -8
  108. package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-CjysF4hQ.js} +5 -5
  109. package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-D5qOssIP.js} +6 -6
  110. package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-a0F7idWe.cjs} +5 -5
  111. package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BJQlARp2.js} +4 -4
  112. package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-BsA1jKss.js} +5 -5
  113. package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-DLgVrMGX.cjs} +4 -4
  114. package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-14NR1X1K.js} +4 -4
  115. package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-BGH8dMy4.cjs} +4 -4
  116. package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-K8eh39vC.js} +4 -4
  117. package/dist/src/providers-BGK6Bn3A.cjs +31 -0
  118. package/dist/src/providers-BMWePlwX.js +31 -0
  119. package/dist/src/{providers-CHciOV4f.js → providers-C5LI9356.js} +51 -52
  120. package/dist/src/{providers-D8qKkVu_.js → providers-DbetV5MB.js} +52 -53
  121. package/dist/src/providers-EXgGEohn.js +33 -0
  122. package/dist/src/{providers-Bz0U4NGd.cjs → providers-lmV8J0jR.cjs} +51 -52
  123. package/dist/src/{quiverai-DnlaJG8H.js → quiverai-3Uc6za3c.js} +8 -8
  124. package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-QLaorfoo.cjs} +8 -8
  125. package/dist/src/{quiverai-3rtm1glP.js → quiverai-h9NdOMN2.js} +9 -9
  126. package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-BghZ9VSg.js} +12 -12
  127. package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DIBiJ2A1.cjs} +11 -11
  128. package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-Dd4VJGu5.js} +11 -11
  129. package/dist/src/{scanner-CPosIITM.js → scanner-CMWzx1QH.js} +6 -6
  130. package/dist/src/server/index.js +108 -78
  131. package/dist/src/server-BrgtL_8J.js +8 -0
  132. package/dist/src/server-CJPqXqVd.js +6 -0
  133. package/dist/src/{server-BKdsd70B.js → server-DHV0r-FB.js} +3 -3
  134. package/dist/src/server-DZlHxFYI.cjs +6 -0
  135. package/dist/src/{server-Bb83s34G.js → server-JDwxLoXT.js} +4 -4
  136. package/dist/src/{server-ByLwfSDf.cjs → server-PndEy2Nj.cjs} +3 -3
  137. package/dist/src/{signal-B8hvvb8w.js → signal-V7dUBr64.js} +2 -2
  138. package/dist/src/{store-W4yZqv_e.js → store-BSsuF2Jj.js} +2 -2
  139. package/dist/src/store-BppHaTbi.cjs +6 -0
  140. package/dist/src/{store-CaNRmI5T.js → store-C7WtvRII.js} +2 -2
  141. package/dist/src/store-DyZAxX01.js +7 -0
  142. package/dist/src/{store-l9-mIRC-.cjs → store-j9IVHCUH.cjs} +2 -2
  143. package/dist/src/store-jXGyn3ME.js +6 -0
  144. package/dist/src/{tables-BCcp2r48.js → tables-0EDD8777.js} +2 -2
  145. package/dist/src/{tables-D_VmN-ui.cjs → tables-B5ootZ1x.cjs} +2 -2
  146. package/dist/src/{tables-DZWWveaJ.js → tables-DchWyfYe.js} +2 -2
  147. package/dist/src/{telemetry-CB8ltkOD.js → telemetry-Ar0--jvU.js} +3 -3
  148. package/dist/src/telemetry-D186sF_u.js +6 -0
  149. package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-DMNm_KFn.cjs} +3 -3
  150. package/dist/src/telemetry-DPa6sGTP.js +8 -0
  151. package/dist/src/{telemetry-CmGyDaxF.js → telemetry-J9g1ySRA.js} +3 -3
  152. package/dist/src/telemetry-XUPEBnrQ.cjs +6 -0
  153. package/dist/src/{transcription-BExRps8_.js → transcription-BhomPVy1.js} +6 -6
  154. package/dist/src/{transcription-CpuRKcIu.js → transcription-Dijlv9vZ.js} +7 -7
  155. package/dist/src/{transcription-B4GbFkhb.cjs → transcription-hPpt68W9.cjs} +6 -6
  156. package/dist/src/{transform-B5HUD86U.cjs → transform-Bow1252l.cjs} +3 -3
  157. package/dist/src/{transform-B-AYIQmM.js → transform-DFkyOjQ7.js} +3 -3
  158. package/dist/src/{transform-BwedQA8W.js → transform-yRqaT_HG.js} +3 -3
  159. package/dist/src/{types-n3ttAnOa.js → types-BjfZC_QB.js} +2 -17
  160. package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
  161. package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
  162. package/dist/src/{util-DX58t30T.js → util-07lmo96G.js} +3 -3
  163. package/dist/src/{util-Cd2vI7ii.js → util-4LvhctH4.js} +3 -3
  164. package/dist/src/{util-BnF-xRPi.js → util-7G7p_iRM.js} +3 -3
  165. package/dist/src/{util-KE6uFWHG.cjs → util-9SFCdUzk.cjs} +3 -3
  166. package/dist/src/{util-BXjq8vOq.js → util-CD-FiF5u.js} +3 -3
  167. package/dist/src/{util-B-Yo0qzc.cjs → util-CxWghxGa.cjs} +3 -3
  168. package/dist/src/{util-CyzW5ayt.js → util-DhvhJDlT.js} +3 -3
  169. package/dist/src/{util-BT0p5c9X.cjs → util-Dqg4mr2c.cjs} +3 -3
  170. package/dist/src/{util-CwsTKPXi.js → util-ImmJ_Dxe.js} +3 -3
  171. package/dist/src/{utils-BWOFfOel.js → utils-CF4fxllU.js} +2 -2
  172. package/dist/src/{utils-HdRSruG0.js → utils-D_6Jp7Sl.js} +2 -2
  173. package/dist/src/{utils-BSpcJB4B.cjs → utils-u1jU5_Iq.cjs} +2 -2
  174. package/dist/tsconfig.tsbuildinfo +1 -1
  175. package/package.json +10 -8
  176. package/dist/src/app/assets/index-CFq6nz43.css +0 -1
  177. package/dist/src/cache-BZYapLG-.cjs +0 -6
  178. package/dist/src/eval-g5SH3NBa.js +0 -16
  179. package/dist/src/evalResult-Bt2fmj0V.js +0 -9
  180. package/dist/src/evalResult-DRNH7Jui.js +0 -10
  181. package/dist/src/evalResult-DqLWyktN.cjs +0 -9
  182. package/dist/src/evaluator-DIenqH86.js +0 -37
  183. package/dist/src/fetch-gmneHrJc.cjs +0 -4
  184. package/dist/src/graders-BHcgLz2P.js +0 -35
  185. package/dist/src/graders-BjBhwJOb.cjs +0 -32
  186. package/dist/src/graders-DEjGX1uY.js +0 -33
  187. package/dist/src/providers-BBGwMMzj.js +0 -33
  188. package/dist/src/providers-BR5P_qep.cjs +0 -31
  189. package/dist/src/providers-CDib9hVp.js +0 -31
  190. package/dist/src/server-B5aIhzYk.cjs +0 -6
  191. package/dist/src/server-DJCw85s0.js +0 -8
  192. package/dist/src/server-DKj0QIm9.js +0 -6
  193. package/dist/src/store-Bo9FEBPO.js +0 -7
  194. package/dist/src/store-CqHttBql.js +0 -6
  195. package/dist/src/store-fsxOJl5O.cjs +0 -6
  196. package/dist/src/telemetry-B2uJOvMA.js +0 -8
  197. package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
  198. package/dist/src/telemetry-rP_n_bnZ.js +0 -6
@@ -5,36 +5,36 @@ const require_esm = require('./esm-gWVPXn3r.cjs');
5
5
  const require_pythonUtils = require('./pythonUtils-rOCm9w_5.cjs');
6
6
  const require_fileExtensions = require('./fileExtensions-DkJYkWUy.cjs');
7
7
  const require_transform = require('./transform-DshYLyBq.cjs');
8
- const require_graders = require('./graders-C9Ni3pNQ.cjs');
9
- const require_types = require('./types-DAmPdu4_.cjs');
10
- const require_util = require('./util-B-Yo0qzc.cjs');
11
- const require_fetch = require('./fetch-MdGxb6Bv.cjs');
8
+ const require_graders = require('./graders-B6ThhLLt.cjs');
9
+ const require_types = require('./types-Cixl-wRO.cjs');
10
+ const require_util = require('./util-CxWghxGa.cjs');
11
+ const require_fetch = require('./fetch-E3xbPP9w.cjs');
12
12
  const require_time = require('./time-CbtsO5_a.cjs');
13
- const require_cache = require('./cache-DYJRe0Jt.cjs');
14
- const require_providers = require('./providers-Bz0U4NGd.cjs');
15
- const require_utils = require('./utils-BSpcJB4B.cjs');
16
- const require_extractor = require('./extractor-DjxI7POm.cjs');
13
+ const require_cache = require('./cache-PWdK7nbt.cjs');
14
+ const require_providers = require('./providers-lmV8J0jR.cjs');
15
+ const require_utils = require('./utils-u1jU5_Iq.cjs');
16
+ const require_extractor = require('./extractor-6TFoLBb3.cjs');
17
17
  require('./genaiTracer-zK0Mtd_D.cjs');
18
- const require_chat = require('./chat-Biz16BQv.cjs');
18
+ const require_chat = require('./chat-DKNIBnnw.cjs');
19
19
  const require_tokenUsageUtils = require('./tokenUsageUtils-DFp2VFtG.cjs');
20
- const require_transform$1 = require('./transform-B5HUD86U.cjs');
21
- require('./messages-D4Cu-Nxe.cjs');
22
- require('./util-KE6uFWHG.cjs');
20
+ const require_transform$1 = require('./transform-Bow1252l.cjs');
21
+ require('./messages-DR9VV_jC.cjs');
22
+ require('./util-9SFCdUzk.cjs');
23
23
  require('./openai-eZz8QsxY.cjs');
24
- const require_util$2 = require('./util-BT0p5c9X.cjs');
25
- require('./completion-Dw-9jqz1.cjs');
26
- const require_accounts = require('./accounts-uWRxEv-D.cjs');
27
- const require_server = require('./server-ByLwfSDf.cjs');
28
- const require_blobs = require('./blobs-D-Neij7m.cjs');
29
- const require_tables = require('./tables-D_VmN-ui.cjs');
30
- const require_telemetry = require('./telemetry-DrSTNTH-.cjs');
24
+ const require_util$2 = require('./util-Dqg4mr2c.cjs');
25
+ require('./completion-Xj-aJd1b.cjs');
26
+ const require_accounts = require('./accounts-BGa3oLcq.cjs');
27
+ const require_server = require('./server-PndEy2Nj.cjs');
28
+ const require_blobs = require('./blobs-h2nW1G8J.cjs');
29
+ const require_tables = require('./tables-B5ootZ1x.cjs');
30
+ const require_telemetry = require('./telemetry-DMNm_KFn.cjs');
31
31
  const require_text = require('./text-DF2hMKdg.cjs');
32
- const require_store = require('./store-l9-mIRC-.cjs');
33
- require('./base-CRrQ15eu.cjs');
34
- require('./image-CpQ-fJt3.cjs');
32
+ const require_store = require('./store-j9IVHCUH.cjs');
33
+ require('./base-oCiJ630X.cjs');
34
+ require('./image-CVIQmm2j.cjs');
35
35
  const require_providerRegistry = require('./providerRegistry-DFXamjVO.cjs');
36
36
  const require_rubyUtils = require('./rubyUtils-DCci_Y0f.cjs');
37
- const require_evalResult = require('./evalResult-CFSvp3Zh.cjs');
37
+ const require_evalResult = require('./evalResult-CPVLU2Np.cjs');
38
38
  let fs = require("fs");
39
39
  fs = require_logger.__toESM(fs);
40
40
  let path = require("path");
@@ -307,7 +307,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
307
307
  require_telemetry.telemetry_default.record("feature_used", { feature: "tracing" });
308
308
  try {
309
309
  require_logger.logger_default.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
310
- const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-COmdVimi.cjs"));
310
+ const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-BGH8dMy4.cjs"));
311
311
  const port = testSuite.tracing.otlp.http.port || 4318;
312
312
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
313
313
  require_logger.logger_default.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
@@ -330,7 +330,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
330
330
  async function stopOtlpReceiverIfNeeded() {
331
331
  if (otlpReceiverStarted) try {
332
332
  require_logger.logger_default.debug("[EvaluatorTracing] Stopping OTLP receiver");
333
- const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-COmdVimi.cjs"));
333
+ const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-BGH8dMy4.cjs"));
334
334
  await stopOTLPReceiver();
335
335
  otlpReceiverStarted = false;
336
336
  require_logger.logger_default.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -365,7 +365,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
365
365
  }
366
366
  if (!tracingEnabled) return null;
367
367
  require_logger.logger_default.debug("[EvaluatorTracing] Importing trace store");
368
- const { getTraceStore } = await Promise.resolve().then(() => require("./store-fsxOJl5O.cjs"));
368
+ const { getTraceStore } = await Promise.resolve().then(() => require("./store-BppHaTbi.cjs"));
369
369
  const traceStore = getTraceStore();
370
370
  const traceId = generateTraceId();
371
371
  const spanId = generateSpanId();
@@ -4266,7 +4266,7 @@ var Evaluator = class {
4266
4266
  const defaultProvider = testSuite.defaultTest.provider;
4267
4267
  if (require_types.isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
4268
4268
  else if (typeof defaultProvider === "object" && defaultProvider.id) {
4269
- const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BR5P_qep.cjs"));
4269
+ const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BGK6Bn3A.cjs"));
4270
4270
  testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
4271
4271
  } else testCase.provider = defaultProvider;
4272
4272
  }
@@ -4341,7 +4341,7 @@ var Evaluator = class {
4341
4341
  if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
4342
4342
  }
4343
4343
  if (require_logger.cliState_default.resume && this.evalRecord.persisted) try {
4344
- const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-DqLWyktN.cjs"));
4344
+ const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-BeU2oOzP.cjs"));
4345
4345
  const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: require_logger.cliState_default.retryMode });
4346
4346
  const originalCount = runEvalOptions.length;
4347
4347
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -5273,7 +5273,7 @@ function convertResultsToTable(eval_) {
5273
5273
  description: result.description || void 0,
5274
5274
  outputs: [],
5275
5275
  vars: result.vars ? Object.values(varsForHeader).map((varName) => {
5276
- const varValue = result.vars?.[varName] || "";
5276
+ const varValue = result.vars?.[varName] ?? "";
5277
5277
  if (typeof varValue === "string") return varValue;
5278
5278
  return JSON.stringify(varValue, null, 2);
5279
5279
  }).flat() : [],
@@ -5308,7 +5308,11 @@ function convertResultsToTable(eval_) {
5308
5308
  varValuesForRow.set(result.testIdx, result.vars);
5309
5309
  rowMap[result.testIdx] = row;
5310
5310
  let resultText;
5311
- const outputTextDisplay = typeof result.response?.output === "object" ? JSON.stringify(result.response.output) : result.response?.output || result.error || "";
5311
+ const rawOutput = result.response?.output;
5312
+ let outputTextDisplay;
5313
+ if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
5314
+ else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
5315
+ else outputTextDisplay = String(rawOutput);
5312
5316
  if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
5313
5317
  else resultText = `${outputTextDisplay}`;
5314
5318
  else if (result.error) resultText = `${result.error}`;
@@ -5355,7 +5359,7 @@ function convertResultsToTable(eval_) {
5355
5359
  const rows = Object.values(rowMap);
5356
5360
  const sortedVars = [...varsForHeader].sort();
5357
5361
  for (const row of rows) row.vars = sortedVars.map((varName) => {
5358
- const varValue = varValuesForRow.get(row.testIdx)?.[varName] || "";
5362
+ const varValue = varValuesForRow.get(row.testIdx)?.[varName] ?? "";
5359
5363
  if (typeof varValue === "string") return varValue;
5360
5364
  return JSON.stringify(varValue, null, 2);
5361
5365
  });
@@ -5372,7 +5376,11 @@ function convertResultsToTable(eval_) {
5372
5376
  //#region src/util/exportToFile/index.ts
5373
5377
  function convertEvalResultToTableCell(result) {
5374
5378
  let resultText;
5375
- const outputTextDisplay = typeof result.response?.output === "object" ? JSON.stringify(result.response.output) : result.response?.output || result.error || "";
5379
+ const rawOutput = result.response?.output;
5380
+ let outputTextDisplay;
5381
+ if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
5382
+ else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
5383
+ else outputTextDisplay = String(rawOutput);
5376
5384
  if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
5377
5385
  else resultText = `${outputTextDisplay}`;
5378
5386
  else if (result.error) resultText = `${result.error}`;
@@ -5418,11 +5426,12 @@ function convertTestResultsToTableRow(results, varsForHeader) {
5418
5426
  outputs: [],
5419
5427
  vars: Object.values(varsForHeader).map((varName) => {
5420
5428
  if (varName === "sessionId") {
5421
- const varValue = results[0].testCase.vars?.sessionId || results[0].metadata?.sessionId || "";
5429
+ const sessionId = results[0].testCase.vars?.sessionId;
5430
+ const varValue = sessionId == null || sessionId === "" ? results[0].metadata?.sessionId ?? "" : sessionId;
5422
5431
  if (typeof varValue === "string") return varValue;
5423
5432
  return JSON.stringify(varValue);
5424
5433
  }
5425
- const varValue = results[0].testCase.vars?.[varName] || "";
5434
+ const varValue = results[0].testCase.vars?.[varName] ?? "";
5426
5435
  if (typeof varValue === "string") return varValue;
5427
5436
  return JSON.stringify(varValue);
5428
5437
  }).flat(),
@@ -9506,11 +9515,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
9506
9515
  });
9507
9516
  await require_providers.validateStrategies(strategies);
9508
9517
  await validateSharpDependency(strategies, plugins);
9509
- if (strategies.some((s) => require_types.isLanguageDisallowedStrategy(s.id)) && language) {
9510
- const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
9511
- language = "en";
9512
- require_logger.logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
9513
- }
9514
9518
  const redteamProvider = await require_providers.redteamProviderManager.getProvider({ provider });
9515
9519
  const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
9516
9520
  require_logger.logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.default.yellow(plugins.map((p) => {
@@ -10995,60 +10999,101 @@ async function deleteErrorResults(resultIds) {
10995
10999
  await require_tables.getDb().delete(require_tables.evalResultsTable).where((0, drizzle_orm.inArray)(require_tables.evalResultsTable.id, resultIds));
10996
11000
  require_logger.logger_default.debug(`Deleted ${resultIds.length} error results from database`);
10997
11001
  }
11002
+ const RECALCULATE_BATCH_SIZE = 1e3;
10998
11003
  /**
10999
- * Recalculates prompt metrics based on current results after ERROR results have been deleted
11004
+ * Recalculates prompt metrics based on current results after ERROR results have been deleted.
11005
+ * Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
11000
11006
  */
11001
11007
  async function recalculatePromptMetrics(evalRecord) {
11002
11008
  require_logger.logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
11003
- await evalRecord.loadResults();
11009
+ const startTime = Date.now();
11010
+ let batchNumber = 0;
11011
+ let totalProcessed = 0;
11004
11012
  const promptMetricsMap = /* @__PURE__ */ new Map();
11005
- for (const prompt of evalRecord.prompts) {
11006
- const promptIdx = evalRecord.prompts.indexOf(prompt);
11007
- promptMetricsMap.set(promptIdx, {
11008
- score: 0,
11009
- testPassCount: 0,
11010
- testFailCount: 0,
11011
- testErrorCount: 0,
11012
- assertPassCount: 0,
11013
- assertFailCount: 0,
11014
- totalLatencyMs: 0,
11015
- tokenUsage: require_tokenUsageUtils.createEmptyTokenUsage(),
11016
- namedScores: {},
11017
- namedScoresCount: {},
11018
- cost: 0
11019
- });
11020
- }
11021
- for (const result of evalRecord.results) {
11022
- const metrics = promptMetricsMap.get(result.promptIdx);
11023
- if (!metrics) continue;
11024
- if (result.success) metrics.testPassCount++;
11025
- else if (result.failureReason === require_types.ResultFailureReason.ERROR) metrics.testErrorCount++;
11026
- else metrics.testFailCount++;
11027
- metrics.score += result.score || 0;
11028
- metrics.totalLatencyMs += result.latencyMs || 0;
11029
- metrics.cost += result.cost || 0;
11030
- for (const [key, value] of Object.entries(result.namedScores || {})) {
11031
- metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
11032
- const testVars = result.testCase?.vars || {};
11033
- let contributingAssertions = 0;
11034
- result.gradingResult?.componentResults?.forEach((componentResult) => {
11035
- if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
11036
- });
11037
- metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
11038
- }
11039
- if (result.gradingResult?.componentResults) {
11040
- metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
11041
- metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
11042
- }
11043
- if (result.response?.tokenUsage) require_tokenUsageUtils.accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
11044
- if (result.gradingResult?.tokensUsed) {
11045
- if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = require_tokenUsageUtils.createEmptyAssertions();
11046
- require_tokenUsageUtils.accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
11013
+ for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
11014
+ score: 0,
11015
+ testPassCount: 0,
11016
+ testFailCount: 0,
11017
+ testErrorCount: 0,
11018
+ assertPassCount: 0,
11019
+ assertFailCount: 0,
11020
+ totalLatencyMs: 0,
11021
+ tokenUsage: require_tokenUsageUtils.createEmptyTokenUsage(),
11022
+ namedScores: {},
11023
+ namedScoresCount: {},
11024
+ cost: 0
11025
+ });
11026
+ let currentResultId;
11027
+ try {
11028
+ for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
11029
+ batchNumber++;
11030
+ require_logger.logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
11031
+ for (const result of batch) {
11032
+ currentResultId = result.id;
11033
+ const metrics = promptMetricsMap.get(result.promptIdx);
11034
+ if (!metrics) {
11035
+ require_logger.logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
11036
+ resultId: result.id,
11037
+ evalId: evalRecord.id
11038
+ });
11039
+ continue;
11040
+ }
11041
+ if (result.success) metrics.testPassCount++;
11042
+ else if (result.failureReason === require_types.ResultFailureReason.ERROR) metrics.testErrorCount++;
11043
+ else metrics.testFailCount++;
11044
+ metrics.score += result.score ?? 0;
11045
+ metrics.totalLatencyMs += result.latencyMs || 0;
11046
+ metrics.cost += result.cost || 0;
11047
+ for (const [key, value] of Object.entries(result.namedScores || {})) {
11048
+ metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
11049
+ const testVars = result.testCase?.vars || {};
11050
+ let contributingAssertions = 0;
11051
+ result.gradingResult?.componentResults?.forEach((componentResult) => {
11052
+ if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
11053
+ });
11054
+ metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
11055
+ }
11056
+ if (result.gradingResult?.componentResults) {
11057
+ metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
11058
+ metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
11059
+ }
11060
+ if (result.response?.tokenUsage) require_tokenUsageUtils.accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
11061
+ if (result.gradingResult?.tokensUsed) {
11062
+ if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = require_tokenUsageUtils.createEmptyAssertions();
11063
+ require_tokenUsageUtils.accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
11064
+ }
11065
+ }
11066
+ totalProcessed += batch.length;
11047
11067
  }
11068
+ } catch (error) {
11069
+ require_logger.logger_default.error("Error during batched metrics recalculation", {
11070
+ phase: "calculation",
11071
+ batchNumber,
11072
+ totalProcessed,
11073
+ currentResultId,
11074
+ evalId: evalRecord.id,
11075
+ error
11076
+ });
11077
+ throw error;
11048
11078
  }
11049
11079
  for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
11050
- if (evalRecord.persisted) await evalRecord.addPrompts(evalRecord.prompts);
11051
- require_logger.logger_default.debug("Prompt metrics recalculation completed");
11080
+ if (evalRecord.persisted) try {
11081
+ await evalRecord.addPrompts(evalRecord.prompts);
11082
+ } catch (error) {
11083
+ require_logger.logger_default.error("Error saving recalculated prompt metrics", {
11084
+ phase: "save",
11085
+ evalId: evalRecord.id,
11086
+ promptCount: evalRecord.prompts.length,
11087
+ error
11088
+ });
11089
+ throw error;
11090
+ }
11091
+ const durationMs = Date.now() - startTime;
11092
+ require_logger.logger_default.debug("Prompt metrics recalculation completed", {
11093
+ totalBatches: batchNumber,
11094
+ totalResults: totalProcessed,
11095
+ durationMs
11096
+ });
11052
11097
  }
11053
11098
 
11054
11099
  //#endregion