promptfoo 0.121.4 → 0.121.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (497) hide show
  1. package/dist/src/{ListApp-DQkFNqE9.js → ListApp-DLmM02JS.js} +1 -1
  2. package/dist/src/{accounts-DhMYUUbu.js → accounts-Ca7WIoPY.js} +12 -7
  3. package/dist/src/{accounts-F9d_5sMC.js → accounts-CjFnOPmb.js} +14 -9
  4. package/dist/src/{accounts-Dy17bs4D.cjs → accounts-CmWzeD2d.cjs} +16 -10
  5. package/dist/src/{accounts-DdJ2pHMI.js → accounts-DanM1wq_.js} +13 -8
  6. package/dist/src/{agentic-utils-qFlm6zes.js → agentic-utils-CJ0j3fBi.js} +3 -3
  7. package/dist/src/{agentic-utils-w68v6_Dz.js → agentic-utils-DDEGRV9v.js} +3 -3
  8. package/dist/src/{agentic-utils-BpX5b23w.cjs → agentic-utils-DvPWSUpb.cjs} +8 -7
  9. package/dist/src/{agentic-utils-P172hM8B.js → agentic-utils-TxUEMPYS.js} +2 -2
  10. package/dist/src/{agents-BahDpe5G.cjs → agents-B4sRuXg3.cjs} +7 -6
  11. package/dist/src/{agents-pQeBEXMm.js → agents-B8q7h_ek.js} +5 -5
  12. package/dist/src/{agents-CgaMXvLM.js → agents-CBgJvRkB.js} +21 -10
  13. package/dist/src/{agents-C-R_jfzI.js → agents-CYn2n3QP.js} +4 -4
  14. package/dist/src/{agents-8FDnTriG.js → agents-D-vDNFx4.js} +21 -10
  15. package/dist/src/{agents-aYPQLf8W.js → agents-LrHuQqr1.js} +20 -9
  16. package/dist/src/{agents-DJ35I3Nt.js → agents-QGg76OF-.js} +5 -5
  17. package/dist/src/{agents-D7-HGxUj.cjs → agents-eHZ9nlgA.cjs} +21 -10
  18. package/dist/src/{aimlapi-sgYnkE54.js → aimlapi-CJEbQ0o6.js} +7 -7
  19. package/dist/src/{aimlapi-BD6J9oKt.js → aimlapi-D5HXzZ0s.js} +6 -6
  20. package/dist/src/{aimlapi-qcK4OT55.cjs → aimlapi-T6HGNxNe.cjs} +7 -7
  21. package/dist/src/{aimlapi-BCq3MHeL.js → aimlapi-eYv3a_DK.js} +7 -7
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-BNHJKN35.js +1 -0
  24. package/dist/src/app/assets/index-BnT6P6sF.js +388 -0
  25. package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
  26. package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
  27. package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
  28. package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
  29. package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
  30. package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
  31. package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
  32. package/dist/src/app/index.html +31 -6
  33. package/dist/src/{audio-COrn8rM6.js → audio-BqnRvcWG.js} +3 -3
  34. package/dist/src/{audio-DcVKoInv.js → audio-CPMtV1yR.js} +4 -4
  35. package/dist/src/{audio-B7izf48x.js → audio-DyiebVB3.js} +4 -4
  36. package/dist/src/{audio-BQtNuYBj.cjs → audio-FnxbEnSE.cjs} +4 -4
  37. package/dist/src/authoritativeMarkupInjection-BZIywVjG.js +74 -0
  38. package/dist/src/authoritativeMarkupInjection-DyAXAsSr.js +75 -0
  39. package/dist/src/authoritativeMarkupInjection-F2gBw0lN.cjs +74 -0
  40. package/dist/src/authoritativeMarkupInjection-QEQmFS83.js +74 -0
  41. package/dist/src/{base-PYJvBE1i.js → base-CKLo890h.js} +4 -3
  42. package/dist/src/{base-fZ9wgg50.js → base-Co80MMCi.js} +5 -4
  43. package/dist/src/{base-D-670DX8.cjs → base-DGJW48uz.cjs} +5 -4
  44. package/dist/src/{base-yrI1Yal4.js → base-E9I8zXjz.js} +5 -4
  45. package/dist/src/bestOfN-B3wNzjSB.js +137 -0
  46. package/dist/src/bestOfN-BBsO41z4.js +136 -0
  47. package/dist/src/bestOfN-CAwmg5UL.cjs +140 -0
  48. package/dist/src/bestOfN-_kTi8Bxe.js +136 -0
  49. package/dist/src/{blobs-D2FAd1Q5.cjs → blobs-B0977K1O.cjs} +7 -6
  50. package/dist/src/{blobs-BCZavS8s.js → blobs-CeFdPn_T.js} +3 -3
  51. package/dist/src/{blobs-BQWqnnvL.js → blobs-DODuTK-a.js} +3 -3
  52. package/dist/src/{blobs-C-F78Kfn.js → blobs-Dwef1Ao1.js} +2 -2
  53. package/dist/src/{cache-BIyPcp5v.cjs → cache-CPGUA4Yl.cjs} +135 -25
  54. package/dist/src/cache-Cf7b4pWE.js +3 -0
  55. package/dist/src/{cache-D5NZmMiT.js → cache-DIXbtkNO.js} +125 -10
  56. package/dist/src/{cache-mb7c8hbp.js → cache-DpPWrkTE.js} +128 -12
  57. package/dist/src/{cache-C4Xb-hNb.js → cache-roFAE0cI.js} +126 -11
  58. package/dist/src/{chat-I9izLm49.js → chat-CUCorGiL.js} +12 -12
  59. package/dist/src/{chat-BPXSW8Bv.cjs → chat-DG1wG4w0.cjs} +6 -6
  60. package/dist/src/{chat-BfPaS15_.js → chat-Dabu84Br.js} +12 -12
  61. package/dist/src/{chat-Dr3DUQ0D.js → chat-DqUFcWI0.js} +12 -12
  62. package/dist/src/{chat-CclRbxGf.cjs → chat-DxTDQ83C.cjs} +14 -13
  63. package/dist/src/{chat-MKxMnZJZ.js → chat-GmlolEwo.js} +4 -4
  64. package/dist/src/{chat-0bwXjVP0.js → chat-TP8Qifkh.js} +6 -6
  65. package/dist/src/{chat-mW0ORo8G.js → chat-iwaM5UTQ.js} +6 -6
  66. package/dist/src/{chatkit-zUIVoDos.js → chatkit-B6DWi70Q.js} +4 -4
  67. package/dist/src/{chatkit-BoWoSgXl.cjs → chatkit-BYveR48_.cjs} +6 -5
  68. package/dist/src/{chatkit-Cv6AhukM.js → chatkit-fARZwEfV.js} +3 -3
  69. package/dist/src/{chatkit-CJnHRRMM.js → chatkit-lb6FK02w.js} +4 -4
  70. package/dist/src/{claude-agent-sdk-Dtq_L-Sc.js → claude-agent-sdk-BQNp_y-F.js} +212 -67
  71. package/dist/src/{claude-agent-sdk-BQNuLaAK.js → claude-agent-sdk-D5Jl0SDh.js} +212 -67
  72. package/dist/src/{claude-agent-sdk-CPJo3dBQ.cjs → claude-agent-sdk-DH416NBD.cjs} +218 -72
  73. package/dist/src/{claude-agent-sdk-nfAIcxNf.js → claude-agent-sdk-x1XJ1-pU.js} +212 -67
  74. package/dist/src/{cloud-DQZ5sVjW.js → cloud-D3DiFqH6.js} +3 -3
  75. package/dist/src/cloud-p96PA4MH.js +3 -0
  76. package/dist/src/{cloudflare-ai-BIB567w6.js → cloudflare-ai-B6NVI3ax.js} +4 -4
  77. package/dist/src/{cloudflare-ai-Dl3N9OVD.cjs → cloudflare-ai-CEAW-xQa.cjs} +6 -6
  78. package/dist/src/{cloudflare-ai-DlKr0rY7.js → cloudflare-ai-RFSojyXG.js} +6 -6
  79. package/dist/src/{cloudflare-ai-DGLte7Py.js → cloudflare-ai-r4tbYmWU.js} +6 -6
  80. package/dist/src/{cloudflare-gateway-CiIZHU0Q.js → cloudflare-gateway-BCkLouto.js} +5 -5
  81. package/dist/src/{cloudflare-gateway-DI1HNP5F.js → cloudflare-gateway-BaZ4insB.js} +3 -3
  82. package/dist/src/{cloudflare-gateway-BDZrYydE.js → cloudflare-gateway-CF-Vb-2Z.js} +5 -5
  83. package/dist/src/{cloudflare-gateway-BYDp495F.cjs → cloudflare-gateway-TJMLBj6I.cjs} +5 -5
  84. package/dist/src/codex-app-server-B8KHEiF4.js +1915 -0
  85. package/dist/src/codex-app-server-CnrLBCeA.cjs +1921 -0
  86. package/dist/src/codex-app-server-DIXZ230V.js +1915 -0
  87. package/dist/src/codex-app-server-Dd22dC_N.js +1916 -0
  88. package/dist/src/{codex-sdk-CpqiOqDO.js → codex-sdk-B6Wah8Pa.js} +6 -6
  89. package/dist/src/codex-sdk-BGjVAk23.js +3 -0
  90. package/dist/src/{codex-sdk-C2_M2pl_.cjs → codex-sdk-CFF6gUyi.cjs} +18 -10
  91. package/dist/src/{codex-sdk-Rtky3M4I.js → codex-sdk-CmQABzV3.js} +6 -6
  92. package/dist/src/{codex-sdk-CErXn7qh.js → codex-sdk-D2d54RL8.js} +5 -5
  93. package/dist/src/{cometapi-CtJ-mS8R.js → cometapi-Bu9B8NUY.js} +8 -8
  94. package/dist/src/{cometapi-DT-jlVCB.js → cometapi-CtzNCHKu.js} +7 -7
  95. package/dist/src/{cometapi-UVOryo4W.cjs → cometapi-DHCDlQUI.cjs} +8 -8
  96. package/dist/src/{cometapi-BUlt_ELa.js → cometapi-OBILPLlu.js} +8 -8
  97. package/dist/src/{completion-HUe8wDhZ.js → completion-CO2e1_62.js} +6 -6
  98. package/dist/src/{completion-BozdoXba.cjs → completion-CSYfl2cd.cjs} +6 -6
  99. package/dist/src/{completion-x0a_c2y1.js → completion-DZNxcyfG.js} +6 -6
  100. package/dist/src/{completion-Dnxn7E-j.js → completion-sNvCLTAP.js} +5 -5
  101. package/dist/src/constants-BjJV0cRr.js +6 -0
  102. package/dist/src/constants-DH5XYLKZ.js +7 -0
  103. package/dist/src/constants-DZGEFLsu.js +6 -0
  104. package/dist/src/constants-a2kYssQk.cjs +11 -0
  105. package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
  106. package/dist/src/{createHash-CwDVU5xr.js → createHash-CGVzWdjj.js} +1 -1
  107. package/dist/src/{createHash-B7KvgoOD.cjs → createHash-CSiqnK5P.cjs} +2 -2
  108. package/dist/src/{createHash-ChI45QR1.js → createHash-CgRvs4Fn.js} +1 -1
  109. package/dist/src/crescendo-BXEJK_bi.cjs +704 -0
  110. package/dist/src/crescendo-CU_Y2i-m.js +702 -0
  111. package/dist/src/crescendo-J1Xx4_zb.js +703 -0
  112. package/dist/src/crescendo-QiaSLW0d.js +701 -0
  113. package/dist/src/custom-BJfP00Bh.js +619 -0
  114. package/dist/src/custom-CZVn-1-r.js +620 -0
  115. package/dist/src/custom-Cqia7M0D.cjs +621 -0
  116. package/dist/src/custom-notggYVl.js +618 -0
  117. package/dist/src/{docker-DCgsveLD.js → docker-4D1eL6Gq.js} +6 -6
  118. package/dist/src/{docker-ClnmCf1Z.js → docker-BBv1WUDu.js} +5 -5
  119. package/dist/src/{docker-DS4_Osau.cjs → docker-D06JUoe2.cjs} +6 -6
  120. package/dist/src/{docker-CQmlA2NU.js → docker-DdJQBxK9.js} +6 -6
  121. package/dist/src/{embedding-D3xTseo7.js → embedding--UZVe4_7.js} +6 -6
  122. package/dist/src/{embedding-I45KG3o7.cjs → embedding-BbrwopfX.cjs} +6 -6
  123. package/dist/src/{embedding-nFbumxcv.js → embedding-Bi3rxrZF.js} +5 -5
  124. package/dist/src/{embedding-DD9wa3ae.js → embedding-C251p1-8.js} +6 -6
  125. package/dist/src/{errors-Cw810C93.js → errors-9PcUL8BC.js} +1 -1
  126. package/dist/src/{esm-Dh4dOLlt.js → esm-B6whoAcf.js} +2 -2
  127. package/dist/src/{esm-CtEPLdAj.cjs → esm-BIKakvNa.cjs} +8 -7
  128. package/dist/src/{esm-C7PnfdF8.js → esm-BTK1W7lG.js} +1 -1
  129. package/dist/src/{esm-tVgYPY-f.js → esm-Bexx2PFc.js} +2 -2
  130. package/dist/src/{eval-u4UVafl6.js → eval-0VRANImH.js} +21 -21
  131. package/dist/src/{eval-CzJFfFO9.js → eval-DscR5iOM.js} +1 -1
  132. package/dist/src/{evalResult-Bgm9ZH31.js → evalResult-2RRJvFyB.js} +41 -16
  133. package/dist/src/{evalResult-KZqXl4XP.cjs → evalResult-CvtS8h8u.cjs} +51 -15
  134. package/dist/src/evalResult-DqzsS6_W.js +3 -0
  135. package/dist/src/{evalResult-D3hVYFis.js → evalResult-eUkJv9Ko.js} +40 -15
  136. package/dist/src/evaluator-DNdJF1Gv.js +3 -0
  137. package/dist/src/{evaluator-IvuDYSvQ.js → evaluator-DRoiYB2q.js} +1060 -187
  138. package/dist/src/evaluatorHelpers-BsYP_muT.js +511 -0
  139. package/dist/src/evaluatorHelpers-CRqTvSux.cjs +537 -0
  140. package/dist/src/evaluatorHelpers-DuqFFfq7.js +510 -0
  141. package/dist/src/{extractor-CAfTSraf.js → extractor-BR7XAzAL.js} +6 -6
  142. package/dist/src/{extractor-WVPOrH43.cjs → extractor-BdxEtt3J.cjs} +6 -6
  143. package/dist/src/{extractor-DNSeBVOJ.js → extractor-CIW3iN-b.js} +6 -6
  144. package/dist/src/{extractor-Dk6bRWkv.js → extractor-CxRtnaHl.js} +5 -5
  145. package/dist/src/{fetch-B0Z3Oe4k.js → fetch-BufrQtvR.js} +93 -40
  146. package/dist/src/{fetch-BEWnXrrG.js → fetch-DXUnXkVU.js} +89 -40
  147. package/dist/src/{fetch-CJU5ELPa.cjs → fetch-Dw4XZHjj.cjs} +330 -270
  148. package/dist/src/{fetch-Di00EQrc.js → fetch-It34O8Ur.js} +305 -252
  149. package/dist/src/fetch-_YgGd2qv.js +3 -0
  150. package/dist/src/{fileExtensions-bYh77CN8.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
  151. package/dist/src/{fileExtensions-DnqA1y9x.js → fileExtensions-CXRfY3Ss.js} +12 -2
  152. package/dist/src/{fileExtensions-AWa2ZML4.js → fileExtensions-D4GCJ67J.js} +12 -2
  153. package/dist/src/{formatDuration-DZzPsexs.js → formatDuration-CMVNrYvE.js} +1 -1
  154. package/dist/src/{genaiTracer-yRuxj9-L.cjs → genaiTracer-14nugQQx.cjs} +14 -2
  155. package/dist/src/{genaiTracer-DWdZ28hY.js → genaiTracer-BPVvltoW.js} +2 -2
  156. package/dist/src/{genaiTracer-XnrcgDCe.js → genaiTracer-D18lYzhB.js} +2 -2
  157. package/dist/src/{genaiTracer-COYDi-tC.js → genaiTracer-jJKYsnjc.js} +2 -2
  158. package/dist/src/goat-Ckd3q3AY.js +467 -0
  159. package/dist/src/goat-Qgurm-NP.js +466 -0
  160. package/dist/src/goat-ghadEDdy.js +465 -0
  161. package/dist/src/goat-una6pZGP.cjs +469 -0
  162. package/dist/src/graders-BDT7dif6.js +3 -0
  163. package/dist/src/{graders-eIHhRqoC.js → graders-BGP99PdK.js} +2416 -2224
  164. package/dist/src/{graders-Zy3x0zqX.js → graders-BX0f2tvS.js} +2423 -2226
  165. package/dist/src/{graders-pvbReLLn.js → graders-C0nXU_ZP.js} +1806 -1609
  166. package/dist/src/{graders--zknU_uk.cjs → graders-ClrU2fnd.cjs} +2219 -1949
  167. package/dist/src/hydra-BSNZZm2M.js +543 -0
  168. package/dist/src/hydra-BxdG4nkg.js +541 -0
  169. package/dist/src/hydra-DE4xWwyc.js +542 -0
  170. package/dist/src/hydra-DrJttnvw.cjs +542 -0
  171. package/dist/src/image-B4oBtu6J.js +443 -0
  172. package/dist/src/{image-dnoUgPrC.js → image-BN-hjLL9.js} +4 -4
  173. package/dist/src/{image-9302QVqR.js → image-B_fPIwdg.js} +3 -3
  174. package/dist/src/image-BvUAW344.js +442 -0
  175. package/dist/src/image-Cvjwx1uY.js +442 -0
  176. package/dist/src/{image-De2FBmYV.cjs → image-DfVCGPbI.cjs} +4 -4
  177. package/dist/src/{image-u7-rKnYU.js → image-QzmydkiG.js} +4 -4
  178. package/dist/src/image-X0oY4350.cjs +465 -0
  179. package/dist/src/index.cjs +1689 -558
  180. package/dist/src/index.d.cts +3270 -1624
  181. package/dist/src/index.d.ts +3270 -1624
  182. package/dist/src/index.js +1553 -438
  183. package/dist/src/indirectWebPwn-02ZIghCS.js +259 -0
  184. package/dist/src/indirectWebPwn-BJ22AbQa.cjs +397 -0
  185. package/dist/src/indirectWebPwn-CbjUG0rh.js +385 -0
  186. package/dist/src/indirectWebPwn-CfQJt3gk.cjs +260 -0
  187. package/dist/src/indirectWebPwn-DBQhOjoD.js +260 -0
  188. package/dist/src/indirectWebPwn-OsXnKejv.js +259 -0
  189. package/dist/src/indirectWebPwn-tNx9OZ35.js +385 -0
  190. package/dist/src/indirectWebPwn-uyWdHx04.js +386 -0
  191. package/dist/src/inputVariables-B0qUChbV.js +467 -0
  192. package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
  193. package/dist/src/inputVariables-DXFdi7AI.js +468 -0
  194. package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
  195. package/dist/src/{interactiveCheck-CLERUB0c.js → interactiveCheck-C4QlIuoR.js} +2 -2
  196. package/dist/src/{invariant-BtWWVVhl.js → invariant-B2Rf6avk.js} +1 -1
  197. package/dist/src/{invariant-vgHWClmd.js → invariant-DIYf9sP1.js} +1 -1
  198. package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
  199. package/dist/src/iterative-CpU6i2As.js +490 -0
  200. package/dist/src/iterative-DJQEQpG3.js +491 -0
  201. package/dist/src/iterative-DQBuWM-j.cjs +493 -0
  202. package/dist/src/iterative-FTS4Bz67.js +492 -0
  203. package/dist/src/iterativeImage-BUABMVOA.js +413 -0
  204. package/dist/src/iterativeImage-ByFWkxax.cjs +415 -0
  205. package/dist/src/iterativeImage-BzUapOUi.js +414 -0
  206. package/dist/src/iterativeImage-Doz8mgxF.js +413 -0
  207. package/dist/src/iterativeMeta-B3YiAOc8.js +386 -0
  208. package/dist/src/iterativeMeta-C7APE_P1.js +385 -0
  209. package/dist/src/iterativeMeta-CSS8M6Ds.cjs +385 -0
  210. package/dist/src/iterativeMeta-DgoQ7bLh.js +384 -0
  211. package/dist/src/iterativeTree-B5zxBBSW.js +769 -0
  212. package/dist/src/iterativeTree-CNyIk0Yn.js +768 -0
  213. package/dist/src/iterativeTree-CPMF10ve.cjs +771 -0
  214. package/dist/src/iterativeTree-DvZ7GBwt.js +770 -0
  215. package/dist/src/{knowledgeBase-Dgc7CBWF.js → knowledgeBase-BadkINlJ.js} +24 -10
  216. package/dist/src/{knowledgeBase-RhFPGWDc.js → knowledgeBase-Bi_8sV-H.js} +25 -11
  217. package/dist/src/{knowledgeBase-lm9RXSAm.js → knowledgeBase-CkMljjdg.js} +25 -11
  218. package/dist/src/{knowledgeBase-Bpoe_nLu.cjs → knowledgeBase-DUh34xba.cjs} +25 -11
  219. package/dist/src/{litellm-DRjpcSa7.js → litellm-BKBo0jpC.js} +5 -5
  220. package/dist/src/{litellm-C2kqjxqp.js → litellm-BXyn5kZK.js} +5 -5
  221. package/dist/src/{litellm-p37R1dzQ.js → litellm-CNcfbCfa.js} +4 -4
  222. package/dist/src/{litellm-CoyI4IAl.cjs → litellm-CtAr7bKG.cjs} +5 -5
  223. package/dist/src/{logger-DksKw1Qc.js → logger-BbY6ypFL.js} +2 -2
  224. package/dist/src/{logger-B88EkIn6.js → logger-KD8JjCRJ.js} +2 -2
  225. package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
  226. package/dist/src/{luma-ray-KgTCXrZC.js → luma-ray-BMX1iEB6.js} +5 -5
  227. package/dist/src/{luma-ray-B863CmuZ.js → luma-ray-CR5TSpp4.js} +5 -5
  228. package/dist/src/{luma-ray-BxVKaW2a.cjs → luma-ray-D3FUc2K3.cjs} +9 -8
  229. package/dist/src/{luma-ray-BTTLtqQ8.js → luma-ray-OEMmS1RB.js} +6 -6
  230. package/dist/src/main.js +909 -369
  231. package/dist/src/memoryPoisoning-CM83NWYl.js +107 -0
  232. package/dist/src/memoryPoisoning-D8h9gXJF.js +106 -0
  233. package/dist/src/memoryPoisoning-Dp-btinn.cjs +106 -0
  234. package/dist/src/memoryPoisoning-cLuCoTuJ.js +106 -0
  235. package/dist/src/{messages-BTQz42fn.js → messages-BabO-cX8.js} +273 -17
  236. package/dist/src/{messages-811uVVW5.cjs → messages-DBPir0TQ.cjs} +278 -18
  237. package/dist/src/{messages-zWbkLLHz.js → messages-DGUlSNU7.js} +273 -17
  238. package/dist/src/{messages-MYTQ2TWp.js → messages-vsE_-Lv0.js} +273 -17
  239. package/dist/src/{meteor-DHdzY1Ss.js → meteor--TZYICTI.js} +2 -2
  240. package/dist/src/{meteor-Co1VQ1u5.cjs → meteor-CR226f7Z.cjs} +2 -2
  241. package/dist/src/{meteor-CU5UAE-H.js → meteor-Cl_yd7rJ.js} +2 -2
  242. package/dist/src/{meteor-DuAFv6gF.js → meteor-Dce-_zGQ.js} +1 -1
  243. package/dist/src/mischievousUser-0l8GD7Dp.js +46 -0
  244. package/dist/src/mischievousUser-BUOP9W5r.js +46 -0
  245. package/dist/src/mischievousUser-frFYKxu6.js +47 -0
  246. package/dist/src/mischievousUser-olGgHIVR.cjs +46 -0
  247. package/dist/src/{modelslab-Dk1JAtVo.cjs → modelslab-CNV5bMSk.cjs} +7 -7
  248. package/dist/src/{modelslab-D0erNWKe.js → modelslab-Cogmu4mG.js} +6 -6
  249. package/dist/src/{modelslab-DIq-6y7x.js → modelslab-Dzst7VTU.js} +6 -6
  250. package/dist/src/{modelslab-wu9yi5GE.js → modelslab-EyDczZ5A.js} +7 -7
  251. package/dist/src/{nova-reel-CCFRfeRb.js → nova-reel-BGPNBOMS.js} +6 -6
  252. package/dist/src/{nova-reel-DQrm74ng.js → nova-reel-B_5NKFu1.js} +5 -5
  253. package/dist/src/{nova-reel-gr11WG7f.js → nova-reel-C4eUJGse.js} +5 -5
  254. package/dist/src/{nova-reel-CrLXVKQf.cjs → nova-reel-CjJRxI1X.cjs} +9 -8
  255. package/dist/src/{nova-sonic-BYdp-QLs.js → nova-sonic-BNGmgfFz.js} +4 -4
  256. package/dist/src/{nova-sonic-TDgrlTk7.js → nova-sonic-ChPlh5na.js} +4 -4
  257. package/dist/src/{nova-sonic-B_ZXcUJB.js → nova-sonic-CrV0iaY_.js} +3 -3
  258. package/dist/src/{nova-sonic-i5tUvXKn.cjs → nova-sonic-DuOG9Aun.cjs} +5 -4
  259. package/dist/src/{openai-DhVEmgeZ.js → openai-BMHD2Huo.js} +2 -2
  260. package/dist/src/{openai-URNyItar.cjs → openai-C3uXv8wS.cjs} +2 -2
  261. package/dist/src/{openai-Qsvz25mV.js → openai-CJrsh9n4.js} +2 -2
  262. package/dist/src/{openai-iYtrXzOX.js → openai-zgwBb4Ff.js} +1 -1
  263. package/dist/src/{openclaw-CnQ363Wi.js → openclaw-BIHlu_36.js} +10 -8
  264. package/dist/src/{openclaw-CwzlQSQX.js → openclaw-CF7fMido.js} +9 -7
  265. package/dist/src/{openclaw-wX9rtfke.cjs → openclaw-Dphc01BY.cjs} +18 -15
  266. package/dist/src/{openclaw-CLWrW03k.js → openclaw-zIJAsz3P.js} +10 -8
  267. package/dist/src/{opencode-sdk-BUu5Nevv.js → opencode-sdk-B3vlPLsp.js} +40 -5
  268. package/dist/src/{opencode-sdk-BxD8vXp_.js → opencode-sdk-D05JSgMQ.js} +40 -5
  269. package/dist/src/{opencode-sdk-BZ2idgYA.cjs → opencode-sdk-DoY6GbWw.cjs} +46 -10
  270. package/dist/src/{opencode-sdk-GI2KaAXq.js → opencode-sdk-sRKYHGoI.js} +39 -4
  271. package/dist/src/{otlpReceiver-BntK801g.js → otlpReceiver--gTpSagc.js} +120 -4
  272. package/dist/src/{otlpReceiver-DmVulbhC.js → otlpReceiver-B2eaKC8C.js} +120 -4
  273. package/dist/src/{otlpReceiver-B2z58l4e.js → otlpReceiver-BXjcRqAM.js} +119 -3
  274. package/dist/src/{otlpReceiver-BfcVq2Nq.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
  275. package/dist/src/packageParser--MWTSrPW.js +36 -0
  276. package/dist/src/packageParser-CgE-ziRo.js +35 -0
  277. package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
  278. package/dist/src/packageParser-hwwSGnAZ.js +35 -0
  279. package/dist/src/processShim-BBxt7LKO.js +95 -0
  280. package/dist/src/processShim-BcGzU8fY.js +94 -0
  281. package/dist/src/processShim-C_z3aRvF.js +94 -0
  282. package/dist/src/processShim-DSY9BV2T.cjs +98 -0
  283. package/dist/src/promptLength-0qIHyhA5.js +71 -0
  284. package/dist/src/promptLength-4X-Wd8PG.js +72 -0
  285. package/dist/src/promptLength-B9nZEfO6.js +71 -0
  286. package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
  287. package/dist/src/promptfoo-BDrfT30-.js +180 -0
  288. package/dist/src/promptfoo-Cm4hiy1Y.js +180 -0
  289. package/dist/src/promptfoo-Rjp-MeBb.js +181 -0
  290. package/dist/src/promptfoo-b-baRMj-.cjs +205 -0
  291. package/dist/src/prompts-BYMtqPCw.js +259 -0
  292. package/dist/src/prompts-C-bqE1Yp.js +260 -0
  293. package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
  294. package/dist/src/prompts-DHhQsANy.js +259 -0
  295. package/dist/src/prompts-D_QpZ2Dm.js +271 -0
  296. package/dist/src/prompts-hNvWBD3z.cjs +284 -0
  297. package/dist/src/prompts-huDVH2CI.js +270 -0
  298. package/dist/src/prompts-p78Hul5i.cjs +289 -0
  299. package/dist/src/{providerRegistry-CPQ_CmVO.js → providerRegistry-1gB5vtzQ.js} +2 -2
  300. package/dist/src/{providerRegistry-CQMdTmHP.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
  301. package/dist/src/{providerRegistry-Bvh8mv85.js → providerRegistry-DHcFiVWX.js} +1 -1
  302. package/dist/src/{providerRegistry-CWoPjKFZ.js → providerRegistry-ReCd0sFa.js} +2 -2
  303. package/dist/src/{providers-BV_KMZje.js → providers-B9KzWxAX.js} +10558 -21587
  304. package/dist/src/{providers-DruaQfwu.js → providers-BCCz6_IX.js} +1228 -12196
  305. package/dist/src/{providers-1eKkXBKp.cjs → providers-BDVVIQM6.cjs} +10649 -21843
  306. package/dist/src/{providers-iUt5fbAN.js → providers-BYAn82cf.js} +1 -1
  307. package/dist/src/{providers-Domz_llv.js → providers-DVYRZP4E.js} +10589 -21570
  308. package/dist/src/{pythonUtils-Cldx7huE.js → pythonUtils-CLCgQ9tt.js} +3 -3
  309. package/dist/src/{pythonUtils-CnndUbW-.js → pythonUtils-CgYxeSmO.js} +3 -3
  310. package/dist/src/{pythonUtils-tAJvvpS-.cjs → pythonUtils-Cokhluq3.cjs} +8 -7
  311. package/dist/src/{pythonUtils-C2UQ30Rz.js → pythonUtils-D0BYebvX.js} +3 -3
  312. package/dist/src/{quiverai-DFotyafY.cjs → quiverai-BAp6iTZD.cjs} +4 -4
  313. package/dist/src/{quiverai-aPPvXOgn.js → quiverai-BvIhI_0l.js} +4 -4
  314. package/dist/src/{quiverai-DR0SnIQV.js → quiverai-CdTWPe-A.js} +3 -3
  315. package/dist/src/{quiverai-CtWi6x_g.js → quiverai-Cv7rJKDz.js} +4 -4
  316. package/dist/src/registry-BUJrgjwv.js +124 -0
  317. package/dist/src/registry-DXm1t_x0.js +125 -0
  318. package/dist/src/registry-Dp5EqoXc.js +124 -0
  319. package/dist/src/registry-KCVF1CFC.cjs +124 -0
  320. package/dist/src/{server-D6Il2Sob.js → remoteGeneration-B1_XsKXU.js} +16 -108
  321. package/dist/src/{server-BSB45Nt9.js → remoteGeneration-COpWcmWd.js} +15 -146
  322. package/dist/src/{server-Dx2TyCH2.cjs → remoteGeneration-DS9N3pgB.cjs} +30 -119
  323. package/dist/src/remoteGeneration-DsaSwmG2.js +217 -0
  324. package/dist/src/render-BNTrbmBw.cjs +384 -0
  325. package/dist/src/render-CSP99NLm.js +348 -0
  326. package/dist/src/render-DFfDeYUK.js +347 -0
  327. package/dist/src/{render-CgVDrJmM.js → render-DznWrxGO.js} +2 -2
  328. package/dist/src/render-_6ur1fhE.js +347 -0
  329. package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
  330. package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
  331. package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
  332. package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
  333. package/dist/src/{responses-Bi9vBuW_.cjs → responses-1UFFF9N_.cjs} +51 -16
  334. package/dist/src/{responses-DL9m8CyY.js → responses-B3W2JvOQ.js} +49 -15
  335. package/dist/src/{responses--OsX2aYW.js → responses-B6ktc3Ra.js} +49 -15
  336. package/dist/src/{responses-C-flexAY.js → responses-URRzV8qE.js} +49 -15
  337. package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
  338. package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
  339. package/dist/src/{rubyUtils-DsGrTx8R.js → rubyUtils-CXlFM2rR.js} +3 -3
  340. package/dist/src/{rubyUtils-DVLeA2jg.js → rubyUtils-CnlW8AYb.js} +3 -3
  341. package/dist/src/{rubyUtils-B6eljPuh.cjs → rubyUtils-CqUWBZAt.cjs} +18 -27
  342. package/dist/src/{rubyUtils-CYSQEG4a.js → rubyUtils-DdGojpfv.js} +3 -3
  343. package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
  344. package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
  345. package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
  346. package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
  347. package/dist/src/{sagemaker-BVkaG2-l.js → sagemaker-CujrzP1a.js} +62 -51
  348. package/dist/src/{sagemaker-XnfhheQv.cjs → sagemaker-DzffAqo_.cjs} +65 -53
  349. package/dist/src/{sagemaker-D67yzMzs.js → sagemaker-vhtSV7JI.js} +62 -51
  350. package/dist/src/{sagemaker-BveBvuxm.js → sagemaker-yr1QKeBs.js} +61 -50
  351. package/dist/src/{scanner-1DqWi1Ej.js → scanner-DS0109SS.js} +7 -7
  352. package/dist/src/server/index.js +5105 -605
  353. package/dist/src/server-B8rqV126.cjs +126 -0
  354. package/dist/src/server-BaLytskk.js +3 -0
  355. package/dist/src/server-CMJD10J4.js +107 -0
  356. package/dist/src/server-Ddp8GNMp.js +146 -0
  357. package/dist/src/server-DhMHosWj.js +182 -0
  358. package/dist/src/shared-7pmVZLNO.js +1334 -0
  359. package/dist/src/shared-9WHQ1oNE.js +1335 -0
  360. package/dist/src/{fileExtensions-BArZuxsI.js → shared-BoG7qLMv.js} +12 -2
  361. package/dist/src/shared-D6IjElRI.js +1334 -0
  362. package/dist/src/shared-WkgnDkcg.cjs +1436 -0
  363. package/dist/src/{signal-CE5G3a7x.js → signal-CSurUUyV.js} +3 -3
  364. package/dist/src/simulatedUser-C9aQObBI.js +222 -0
  365. package/dist/src/simulatedUser-Cu601Dd4.cjs +227 -0
  366. package/dist/src/simulatedUser-U_qAHnuB.js +222 -0
  367. package/dist/src/simulatedUser-p3tACcmw.js +223 -0
  368. package/dist/src/{slack-DDUe-5MC.js → slack-Bapo-7_8.js} +2 -2
  369. package/dist/src/{slack-1Rhq0EoV.cjs → slack-DMC1QVEg.cjs} +3 -2
  370. package/dist/src/{slack-D5Wpy8LM.js → slack-DTEFhrMn.js} +2 -2
  371. package/dist/src/{slack-acRb0IqQ.js → slack-k-_CP84Q.js} +1 -1
  372. package/dist/src/storage-BU4qcnOb.js +875 -0
  373. package/dist/src/storage-CA-v9V2v.cjs +911 -0
  374. package/dist/src/storage-CD-GWAdx.js +822 -0
  375. package/dist/src/storage-QdU-SmvD.js +834 -0
  376. package/dist/src/{store-DAAyxcy6.cjs → store-B2NDDooM.cjs} +60 -24
  377. package/dist/src/{store-CYEy5J2D.js → store-DKd5592Q.js} +51 -20
  378. package/dist/src/{store-M0b1WfYb.js → store-HpopRVzl.js} +50 -19
  379. package/dist/src/store-IbiRIF3k.js +3 -0
  380. package/dist/src/strategies-7CS3Alao.cjs +2360 -0
  381. package/dist/src/strategies-CiSeroPH.js +2331 -0
  382. package/dist/src/strategies-DRJjGTIY.js +2333 -0
  383. package/dist/src/{tables-DQ4WU5tX.js → tables-CRSXQ2Ke.js} +2 -2
  384. package/dist/src/{tables-CsWou1Bx.js → tables-CxjU7bBd.js} +3 -3
  385. package/dist/src/{tables-DUfh1F7Z.cjs → tables-DBIJU0WE.cjs} +6 -5
  386. package/dist/src/{tables-C4CH3zRr.js → tables-DafUHOeh.js} +3 -3
  387. package/dist/src/{telemetry-CQPez_Jp.js → telemetry-00ezXr_t.js} +5 -4
  388. package/dist/src/telemetry-ByPqDcKC.js +3 -0
  389. package/dist/src/{telemetry-Dsw_faFj.cjs → telemetry-CJ7FnCsc.cjs} +18 -11
  390. package/dist/src/{telemetry-dbaJ0E98.js → telemetry-DmXYcJNV.js} +5 -4
  391. package/dist/src/{telemetry-Dvqxv3YC.js → telemetry-DwX9XUN5.js} +4 -3
  392. package/dist/src/{text-KvuD2Iko.js → text-Db-Wt2u2.js} +1 -1
  393. package/dist/src/{text-DHxdyQqT.js → text-DwYK5EBn.js} +1 -1
  394. package/dist/src/{text-BVi-cLPJ.cjs → text-nywWsRBM.cjs} +1 -1
  395. package/dist/src/{tokenUsageUtils-C-bmyHoE.js → tokenUsageUtils-BjVkdk18.js} +1 -1
  396. package/dist/src/{tokenUsageUtils-CXrvO-wA.js → tokenUsageUtils-CDet74yk.js} +1 -1
  397. package/dist/src/tokenUsageUtils-CmnQ0G2m.js +142 -0
  398. package/dist/src/{tokenUsageUtils-Bb7DkZPz.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
  399. package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
  400. package/dist/src/toolAttributes-COVgDrBG.js +87 -0
  401. package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
  402. package/dist/src/tracingOptions-BnwKCkSB.js +221 -0
  403. package/dist/src/tracingOptions-Chi74lOD.js +219 -0
  404. package/dist/src/tracingOptions-DrbSFaKy.cjs +249 -0
  405. package/dist/src/tracingOptions-ji2OuXbT.js +220 -0
  406. package/dist/src/{transcription-DuWDupG7.js → transcription-B8uIgCYX.js} +5 -5
  407. package/dist/src/{transcription-CJspiD2c.js → transcription-CfU5loSq.js} +6 -6
  408. package/dist/src/{transcription-V2HaAmy2.js → transcription-Dkd22_4K.js} +6 -6
  409. package/dist/src/{transcription-BvjmiYB1.cjs → transcription-mzuf18Mq.cjs} +9 -8
  410. package/dist/src/{transform-lQrDE1BQ.js → transform-BIMynQsA.js} +9 -9
  411. package/dist/src/transform-BnSTnFlp.js +187 -0
  412. package/dist/src/transform-BnSXWmU_2.cjs +221 -0
  413. package/dist/src/transform-CGt7Kt3y2.js +186 -0
  414. package/dist/src/transform-CrPGTsij.js +186 -0
  415. package/dist/src/{transform-CTeuTR3S.cjs → transform-DhNkAUs8.cjs} +13 -12
  416. package/dist/src/{transform-CG0ehZNG.js → transform-DmvYBRll.js} +9 -9
  417. package/dist/src/{transform-zDhMmzwX.js → transform-EtD4jAWi.js} +9 -9
  418. package/dist/src/{transformersAvailability-CcHusyhw.js → transformersAvailability-0ThtPved.js} +1 -1
  419. package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
  420. package/dist/src/{transformersAvailability-DLlROWhg.js → transformersAvailability-BvyU9vDD.js} +1 -1
  421. package/dist/src/{transformersAvailability-Cju9mHgR.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
  422. package/dist/src/{types-Dm9JM6Vb.js → types-BFevViUY.js} +115 -19
  423. package/dist/src/{types-Bgh5SOn6.js → types-BJQBBPTP.js} +115 -19
  424. package/dist/src/{types-CeaeaZdP.cjs → types-CxJvaY2S.cjs} +357 -172
  425. package/dist/src/{types-BGQDAP8i.js → types-D6glLbdF.js} +271 -170
  426. package/dist/src/{util-BYvQUPp7.js → util--WMgw7wM.js} +28 -8
  427. package/dist/src/{util-C9J8ahRn.js → util-5WnCSb0h.js} +72 -48
  428. package/dist/src/{util-CN3SrLT4.cjs → util-BSIuSLVK.cjs} +74 -49
  429. package/dist/src/{util-C8e5uydV.js → util-Bx677_k2.js} +154 -147
  430. package/dist/src/util-CN8om2rz.cjs +386 -0
  431. package/dist/src/{util-DDs-7g6-.js → util-CoQWM76y.js} +28 -8
  432. package/dist/src/util-DNl96nNs.js +327 -0
  433. package/dist/src/{util-DxWpWjhc.js → util-DURocbYR.js} +667 -507
  434. package/dist/src/util-Df8YMvS1.js +327 -0
  435. package/dist/src/{util-DvU2Pw8c.js → util-DiQ3QvBB.js} +28 -8
  436. package/dist/src/{util-oGMLA7vc.js → util-I-Rf-KaD.js} +862 -577
  437. package/dist/src/{util-olYL5C6N.cjs → util-IYzs5Y04.cjs} +33 -7
  438. package/dist/src/{util-D9TisOyk.js → util-LKTmNsMQ.js} +71 -47
  439. package/dist/src/{util-Bxn8emtE.cjs → util-SPsvFONY.cjs} +738 -582
  440. package/dist/src/{util-D3q0WQ-0.js → util-efByNxcr.js} +72 -48
  441. package/dist/src/util-kDURhgJW.js +328 -0
  442. package/dist/src/{utils-DJfvjyMj.js → utils-B0lzitHZ.js} +3 -3
  443. package/dist/src/{utils-BLJKfv0y.js → utils-BFOh20Gb.js} +3 -3
  444. package/dist/src/{utils-hXtCYanr.js → utils-BGY69tk_.js} +2 -2
  445. package/dist/src/{utils-B05gLxER.cjs → utils-Ve6kuJsa.cjs} +3 -3
  446. package/dist/src/version-BK20a4sw.js +16 -0
  447. package/dist/src/version-BWCSaByA.cjs +27 -0
  448. package/dist/src/version-eRkNuGv8.js +17 -0
  449. package/dist/src/version-lpHV_53E.js +16 -0
  450. package/dist/tsconfig.tsbuildinfo +1 -1
  451. package/package.json +56 -28
  452. package/dist/src/app/assets/Report-CQYFezYu.js +0 -1
  453. package/dist/src/app/assets/index-BXGkeMwh.css +0 -1
  454. package/dist/src/app/assets/index-BzJt18Jz.js +0 -385
  455. package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
  456. package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
  457. package/dist/src/cache-Cr9oLMUa.js +0 -3
  458. package/dist/src/cache-DbLsVWB2.cjs +0 -3
  459. package/dist/src/cloud-Hphvo8kr.js +0 -3
  460. package/dist/src/codex-sdk-BAmYE7qy.js +0 -3
  461. package/dist/src/codex-sdk-CWEnH70W.cjs +0 -2
  462. package/dist/src/evalResult-D8MT9p0s.js +0 -3
  463. package/dist/src/evalResult-DElBuddX.js +0 -2
  464. package/dist/src/evalResult-Dvc-iucu.cjs +0 -2
  465. package/dist/src/evaluator-CVessDWe.js +0 -3
  466. package/dist/src/fetch-C7bGKDlQ.js +0 -3
  467. package/dist/src/graders-BOAzQEUe.cjs +0 -2
  468. package/dist/src/graders-D4BTsZdG2.js +0 -3
  469. package/dist/src/graders-DOJK1XpV.js +0 -2
  470. package/dist/src/graders-NAv9LcBn.js +0 -2
  471. package/dist/src/image-B5Mv-Z3h.js +0 -257
  472. package/dist/src/image-DVz2RiMF.js +0 -258
  473. package/dist/src/image-qUpPvmNZ.js +0 -257
  474. package/dist/src/image-x6KqLQl4.cjs +0 -280
  475. package/dist/src/providers-Bp4S-FvO.js +0 -2
  476. package/dist/src/providers-DV3ax9e_.cjs +0 -3
  477. package/dist/src/providers-u9Enmfok.js +0 -2
  478. package/dist/src/render-CH-62LbA.js +0 -135
  479. package/dist/src/render-CMEpfLaO.js +0 -136
  480. package/dist/src/render-DHIZ6_k8.js +0 -135
  481. package/dist/src/render-DfQSFxGE.cjs +0 -165
  482. package/dist/src/rubyUtils-D1L2d3jb.js +0 -3
  483. package/dist/src/rubyUtils-DUbq4tff.cjs +0 -2
  484. package/dist/src/server-BNYztJkh.js +0 -385
  485. package/dist/src/server-DCtHUqlp.js +0 -3
  486. package/dist/src/server-DaA2eR26.cjs +0 -2
  487. package/dist/src/store-CWOSz6D_.cjs +0 -2
  488. package/dist/src/store-DCDBhv7B.js +0 -3
  489. package/dist/src/store-Dn9HUkdW.js +0 -240
  490. package/dist/src/telemetry-C1IqxcdW.js +0 -3
  491. package/dist/src/telemetry-C4ZEa_es.cjs +0 -2
  492. package/dist/src/transform-Bbg6A8Jk.js +0 -216
  493. package/dist/src/transform-CUnzlsbn.cjs +0 -228
  494. package/dist/src/transform-DYX1_Xnh.js +0 -216
  495. package/dist/src/transform-DgKlRr73.cjs +0 -2
  496. package/dist/src/transform-M6ITAESf.js +0 -3
  497. package/dist/src/transform-UN5UGu8U.js +0 -213
@@ -2,77 +2,93 @@ Object.defineProperties(exports, {
2
2
  __esModule: { value: true },
3
3
  [Symbol.toStringTag]: { value: "Module" }
4
4
  });
5
- const require_logger = require("./logger-COuQb2xB.cjs");
6
- const require_invariant = require("./invariant-kfQ8Bu82.cjs");
7
- const require_esm = require("./esm-CtEPLdAj.cjs");
8
- const require_pythonUtils = require("./pythonUtils-tAJvvpS-.cjs");
9
- const require_fileExtensions = require("./fileExtensions-bYh77CN8.cjs");
10
- const require_transform = require("./transform-CUnzlsbn.cjs");
11
- const require_graders = require("./graders--zknU_uk.cjs");
12
- const require_types = require("./types-CeaeaZdP.cjs");
13
- const require_util = require("./util-Bxn8emtE.cjs");
14
- const require_render = require("./render-DfQSFxGE.cjs");
15
- const require_fetch = require("./fetch-CJU5ELPa.cjs");
16
- const require_cache = require("./cache-BIyPcp5v.cjs");
17
- const require_providers = require("./providers-1eKkXBKp.cjs");
18
- const require_utils = require("./utils-B05gLxER.cjs");
19
- const require_createHash = require("./createHash-B7KvgoOD.cjs");
20
- const require_chat = require("./chat-CclRbxGf.cjs");
21
- const require_tokenUsageUtils = require("./tokenUsageUtils-Bb7DkZPz.cjs");
22
- const require_transform$1 = require("./transform-CTeuTR3S.cjs");
23
- const require_util$1 = require("./util-CN3SrLT4.cjs");
24
- const require_providerRegistry = require("./providerRegistry-CQMdTmHP.cjs");
25
- const require_server = require("./server-Dx2TyCH2.cjs");
26
- const require_accounts = require("./accounts-Dy17bs4D.cjs");
27
- const require_blobs = require("./blobs-D2FAd1Q5.cjs");
28
- const require_tables = require("./tables-DUfh1F7Z.cjs");
29
- const require_extractor = require("./extractor-WVPOrH43.cjs");
30
- const require_telemetry = require("./telemetry-Dsw_faFj.cjs");
31
- const require_text = require("./text-BVi-cLPJ.cjs");
32
- const require_store = require("./store-DAAyxcy6.cjs");
33
- const require_rubyUtils = require("./rubyUtils-B6eljPuh.cjs");
34
- const require_evalResult = require("./evalResult-KZqXl4XP.cjs");
5
+ const require_rolldown_runtime = require("./rolldown-runtime-D_mwlA32.cjs");
6
+ const require_logger = require("./logger-cfNpzI4o.cjs");
7
+ const require_invariant = require("./invariant-QtnLD03y.cjs");
8
+ const require_types = require("./types-CxJvaY2S.cjs");
9
+ const require_fetch = require("./fetch-Dw4XZHjj.cjs");
10
+ const require_version = require("./version-BWCSaByA.cjs");
11
+ const require_fileExtensions = require("./fileExtensions-BhdwzYaD.cjs");
12
+ const require_accounts = require("./accounts-CmWzeD2d.cjs");
13
+ const require_esm = require("./esm-BIKakvNa.cjs");
14
+ const require_render = require("./render-BNTrbmBw.cjs");
15
+ const require_providerRegistry = require("./providerRegistry-CZO_w7ue.cjs");
16
+ const require_remoteGeneration = require("./remoteGeneration-DS9N3pgB.cjs");
17
+ const require_server = require("./server-B8rqV126.cjs");
18
+ const require_storage = require("./storage-CA-v9V2v.cjs");
19
+ const require_pythonUtils = require("./pythonUtils-Cokhluq3.cjs");
20
+ const require_util = require("./util-SPsvFONY.cjs");
21
+ const require_createHash = require("./createHash-CSiqnK5P.cjs");
22
+ const require_cache = require("./cache-CPGUA4Yl.cjs");
23
+ const require_chat = require("./chat-DxTDQ83C.cjs");
24
+ const require_transform = require("./transform-DhNkAUs8.cjs");
25
+ const require_util$1 = require("./util-BSIuSLVK.cjs");
26
+ const require_providers = require("./providers-BDVVIQM6.cjs");
27
+ const require_tokenUsageUtils = require("./tokenUsageUtils-_B-P8IAi.cjs");
28
+ const require_text = require("./text-nywWsRBM.cjs");
29
+ const require_telemetry = require("./telemetry-CJ7FnCsc.cjs");
30
+ const require_tables = require("./tables-DBIJU0WE.cjs");
31
+ const require_blobs = require("./blobs-B0977K1O.cjs");
32
+ const require_processShim = require("./processShim-DSY9BV2T.cjs");
33
+ const require_packageParser = require("./packageParser-QoCS1FMl.cjs");
34
+ const require_rubyUtils = require("./rubyUtils-CqUWBZAt.cjs");
35
+ const require_inputVariables = require("./inputVariables-Dq9W-Z3a.cjs");
36
+ const require_util$2 = require("./util-CN8om2rz.cjs");
37
+ const require_promptfoo = require("./promptfoo-b-baRMj-.cjs");
38
+ const require_graders = require("./graders-ClrU2fnd.cjs");
39
+ const require_shared = require("./shared-WkgnDkcg.cjs");
40
+ const require_utils = require("./utils-Ve6kuJsa.cjs");
41
+ const require_transform$1 = require("./transform-BnSXWmU_2.cjs");
42
+ const require_store = require("./store-B2NDDooM.cjs");
43
+ const require_extractor = require("./extractor-BdxEtt3J.cjs");
44
+ const require_promptLength = require("./promptLength-BbBbDHNj.cjs");
45
+ const require_indirectWebPwn = require("./indirectWebPwn-BJ22AbQa.cjs");
46
+ const require_toolAttributes = require("./toolAttributes-BAjwcBf0.cjs");
47
+ const require_evaluatorHelpers = require("./evaluatorHelpers-CRqTvSux.cjs");
48
+ const require_evalResult = require("./evalResult-CvtS8h8u.cjs");
49
+ const require_strategies = require("./strategies-7CS3Alao.cjs");
35
50
  let fs = require("fs");
36
- fs = require_logger.__toESM(fs);
51
+ fs = require_rolldown_runtime.__toESM(fs, 1);
37
52
  let path = require("path");
38
- path = require_logger.__toESM(path);
53
+ path = require_rolldown_runtime.__toESM(path, 1);
39
54
  let async = require("async");
40
- async = require_logger.__toESM(async);
55
+ async = require_rolldown_runtime.__toESM(async, 1);
41
56
  let js_yaml = require("js-yaml");
42
- js_yaml = require_logger.__toESM(js_yaml);
57
+ js_yaml = require_rolldown_runtime.__toESM(js_yaml, 1);
43
58
  let node_async_hooks = require("node:async_hooks");
44
- require("node:path");
45
- require("node:url");
46
59
  let chalk = require("chalk");
47
- chalk = require_logger.__toESM(chalk);
60
+ chalk = require_rolldown_runtime.__toESM(chalk, 1);
48
61
  let os = require("os");
49
- os = require_logger.__toESM(os);
50
- let util = require("util");
51
- util = require_logger.__toESM(util);
62
+ os = require_rolldown_runtime.__toESM(os, 1);
52
63
  let dedent = require("dedent");
53
- dedent = require_logger.__toESM(dedent);
54
- let fs_promises = require("fs/promises");
55
- fs_promises = require_logger.__toESM(fs_promises);
56
- let glob = require("glob");
64
+ dedent = require_rolldown_runtime.__toESM(dedent, 1);
57
65
  let zod = require("zod");
58
- let csv_parse_sync = require("csv-parse/sync");
59
- let fast_xml_parser = require("fast-xml-parser");
66
+ let fs_promises = require("fs/promises");
67
+ fs_promises = require_rolldown_runtime.__toESM(fs_promises, 1);
68
+ let util = require("util");
69
+ util = require_rolldown_runtime.__toESM(util, 1);
70
+ let _inquirer_input = require("@inquirer/input");
71
+ _inquirer_input = require_rolldown_runtime.__toESM(_inquirer_input, 1);
72
+ require("node:path");
73
+ require("node:url");
60
74
  let crypto$1 = require("crypto");
61
- crypto$1 = require_logger.__toESM(crypto$1);
75
+ crypto$1 = require_rolldown_runtime.__toESM(crypto$1, 1);
62
76
  let _opentelemetry_api = require("@opentelemetry/api");
63
- let _inquirer_input = require("@inquirer/input");
64
- _inquirer_input = require_logger.__toESM(_inquirer_input);
65
77
  let readline = require("readline");
66
- readline = require_logger.__toESM(readline);
78
+ readline = require_rolldown_runtime.__toESM(readline, 1);
79
+ let csv_parse_sync = require("csv-parse/sync");
80
+ let glob = require("glob");
81
+ let fast_xml_parser = require("fast-xml-parser");
67
82
  let drizzle_orm = require("drizzle-orm");
68
- let cli_progress = require("cli-progress");
69
- cli_progress = require_logger.__toESM(cli_progress);
70
83
  let url = require("url");
71
- let jsdom = require("jsdom");
84
+ let parse5 = require("parse5");
72
85
  let fastest_levenshtein = require("fastest-levenshtein");
86
+ let cli_progress = require("cli-progress");
87
+ cli_progress = require_rolldown_runtime.__toESM(cli_progress, 1);
73
88
  let js_rouge = require("js-rouge");
74
- js_rouge = require_logger.__toESM(js_rouge);
89
+ js_rouge = require_rolldown_runtime.__toESM(js_rouge, 1);
75
90
  let node_util = require("node:util");
91
+ let lru_cache = require("lru-cache");
76
92
  require("debounce");
77
93
  let _opentelemetry_core = require("@opentelemetry/core");
78
94
  let _opentelemetry_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
@@ -81,15 +97,15 @@ let _opentelemetry_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
81
97
  let _opentelemetry_semantic_conventions = require("@opentelemetry/semantic-conventions");
82
98
  let drizzle_orm_better_sqlite3_migrator = require("drizzle-orm/better-sqlite3/migrator");
83
99
  let process$1 = require("process");
84
- process$1 = require_logger.__toESM(process$1);
100
+ process$1 = require_rolldown_runtime.__toESM(process$1, 1);
85
101
  let _apidevtools_json_schema_ref_parser = require("@apidevtools/json-schema-ref-parser");
86
- _apidevtools_json_schema_ref_parser = require_logger.__toESM(_apidevtools_json_schema_ref_parser);
102
+ _apidevtools_json_schema_ref_parser = require_rolldown_runtime.__toESM(_apidevtools_json_schema_ref_parser, 1);
87
103
  let cli_table3 = require("cli-table3");
88
- cli_table3 = require_logger.__toESM(cli_table3);
104
+ cli_table3 = require_rolldown_runtime.__toESM(cli_table3, 1);
89
105
  let chokidar = require("chokidar");
90
- chokidar = require_logger.__toESM(chokidar);
106
+ chokidar = require_rolldown_runtime.__toESM(chokidar, 1);
91
107
  let ora = require("ora");
92
- ora = require_logger.__toESM(ora);
108
+ ora = require_rolldown_runtime.__toESM(ora, 1);
93
109
  require("@inquirer/confirm");
94
110
  //#region src/external/matchers/conversationRelevancyTemplate.ts
95
111
  var ConversationRelevancyTemplate = class {
@@ -262,6 +278,505 @@ const handleConversationRelevance = async ({ assertion, outputString, prompt, pr
262
278
  };
263
279
  };
264
280
  //#endregion
281
+ //#region src/matchers/classification.ts
282
+ /**
283
+ *
284
+ * @param expected Expected classification. If undefined, matches any classification.
285
+ * @param output Text to classify.
286
+ * @param threshold Value between 0 and 1. If the expected classification is undefined, the threshold is the minimum score for any classification. If the expected classification is defined, the threshold is the minimum score for that classification.
287
+ * @param grading
288
+ * @returns Pass if the output matches the classification with a score greater than or equal to the threshold.
289
+ */
290
+ async function matchesClassification(expected, output, threshold, grading) {
291
+ const resp = await (await require_graders.getAndCheckProvider("classification", grading?.provider, null, "classification check")).callClassificationApi(output);
292
+ if (!resp.classification) return require_graders.fail(resp.error || "Unknown error fetching classification");
293
+ let score;
294
+ if (expected === void 0) {
295
+ const scores = Object.values(resp.classification);
296
+ if (scores.length === 0) return {
297
+ pass: false,
298
+ score: 0,
299
+ reason: "No classification scores returned"
300
+ };
301
+ score = Math.max(...scores);
302
+ } else score = resp.classification[expected] || 0;
303
+ if (score >= threshold - Number.EPSILON) {
304
+ const reason = expected === void 0 ? `Maximum classification score ${score.toFixed(2)} >= ${threshold}` : `Classification ${expected} has score ${score.toFixed(2)} >= ${threshold}`;
305
+ return {
306
+ pass: true,
307
+ score,
308
+ reason
309
+ };
310
+ }
311
+ return {
312
+ pass: false,
313
+ score,
314
+ reason: expected === void 0 ? `Maximum classification score ${score.toFixed(2)} < ${threshold}` : `Classification ${expected} has score ${score.toFixed(2)} < ${threshold}`
315
+ };
316
+ }
317
+ //#endregion
318
+ //#region src/matchers/comparison.ts
319
+ async function matchesSelectBest(criteria, outputs, grading, vars, providerCallContext) {
320
+ require_invariant.invariant(outputs.length >= 2, "select-best assertion must have at least two outputs to compare between");
321
+ const resp = await require_graders.callProviderWithContext(await require_graders.getAndCheckProvider("text", grading?.provider, (await require_graders.getDefaultProviders()).gradingProvider, "select-best check"), await require_graders.renderLlmRubricPrompt(await require_graders.loadRubricPrompt(grading?.rubricPrompt, require_graders.SELECT_BEST_PROMPT), {
322
+ criteria,
323
+ outputs: outputs.map((o) => require_graders.tryParse(o)),
324
+ ...vars || {}
325
+ }), "select-best", {
326
+ criteria,
327
+ outputs: outputs.map((o) => require_graders.tryParse(o)),
328
+ ...vars || {}
329
+ }, providerCallContext);
330
+ if (resp.error || !resp.output) return Array.from({ length: outputs.length }, () => require_graders.fail(resp.error || "No output", resp.tokenUsage));
331
+ require_invariant.invariant(typeof resp.output === "string", "select-best produced malformed response");
332
+ const firstIntegerMatch = resp.output.trim().match(/\d+/);
333
+ const verdict = firstIntegerMatch ? Number.parseInt(firstIntegerMatch[0], 10) : NaN;
334
+ if (Number.isNaN(verdict) || verdict < 0 || verdict >= outputs.length) return Array.from({ length: outputs.length }, () => require_graders.fail(`Invalid select-best verdict: ${verdict}`, resp.tokenUsage));
335
+ const tokensUsed = require_graders.normalizeMatcherTokenUsage(resp.tokenUsage);
336
+ return outputs.map((_output, index) => {
337
+ if (index === verdict) return {
338
+ pass: true,
339
+ score: 1,
340
+ reason: `Output selected as the best: ${criteria}`,
341
+ tokensUsed
342
+ };
343
+ else return {
344
+ pass: false,
345
+ score: 0,
346
+ reason: `Output not selected: ${criteria}`,
347
+ tokensUsed
348
+ };
349
+ });
350
+ }
351
+ async function selectMaxScore(outputs, resultsWithGradingResults, assertion) {
352
+ require_invariant.invariant(outputs.length >= 2, "max-score assertion must have at least two outputs to compare between");
353
+ const value = assertion.value || {};
354
+ const options = {
355
+ method: typeof value === "object" && "method" in value ? value.method : "average",
356
+ weights: typeof value === "object" && "weights" in value ? value.weights : {},
357
+ threshold: typeof value === "object" && "threshold" in value ? value.threshold : void 0
358
+ };
359
+ const scores = resultsWithGradingResults.map((result, index) => {
360
+ const relevantResults = (result.gradingResult?.componentResults || []).filter((r) => r.assertion && r.assertion.type !== "max-score" && r.assertion.type !== "select-best");
361
+ if (relevantResults.length === 0) throw new Error("max-score requires at least one other assertion (besides max-score or select-best) to aggregate scores from");
362
+ let totalWeightedScore = 0;
363
+ let totalWeight = 0;
364
+ relevantResults.forEach((componentResult) => {
365
+ const assertionType = componentResult.assertion?.type || "unknown";
366
+ const weight = options.weights[assertionType] === void 0 ? 1 : options.weights[assertionType];
367
+ const score = componentResult.score || 0;
368
+ totalWeightedScore += score * weight;
369
+ totalWeight += weight;
370
+ });
371
+ let aggregateScore;
372
+ if (options.method === "sum") aggregateScore = totalWeightedScore;
373
+ else aggregateScore = totalWeight > 0 ? totalWeightedScore / totalWeight : 0;
374
+ return {
375
+ index,
376
+ score: aggregateScore,
377
+ componentCount: relevantResults.length,
378
+ totalWeight
379
+ };
380
+ });
381
+ let maxScore = -Infinity;
382
+ let winnerIndex = 0;
383
+ for (let i = 0; i < scores.length; i++) if (scores[i].score > maxScore) {
384
+ maxScore = scores[i].score;
385
+ winnerIndex = i;
386
+ }
387
+ const meetsThreshold = options.threshold === void 0 || maxScore >= options.threshold;
388
+ return scores.map(({ index, score, componentCount, totalWeight }) => {
389
+ const isWinner = index === winnerIndex && meetsThreshold;
390
+ return {
391
+ pass: isWinner,
392
+ score: isWinner ? 1 : 0,
393
+ reason: isWinner ? `Selected as highest scoring output (score: ${score.toFixed(3)})` : score === maxScore && !meetsThreshold ? `Not selected - score ${score.toFixed(3)} below threshold ${options.threshold}` : `Not selected (score: ${score.toFixed(3)}, max: ${maxScore.toFixed(3)})`,
394
+ namedScores: {
395
+ maxScore: score,
396
+ assertionCount: componentCount,
397
+ totalWeight
398
+ }
399
+ };
400
+ });
401
+ }
402
+ //#endregion
403
+ //#region src/matchers/moderation.ts
404
+ async function matchesModeration({ userPrompt, assistantResponse, categories = [] }, grading) {
405
+ if (!assistantResponse) return {
406
+ pass: true,
407
+ score: 1,
408
+ reason: "No output to moderate"
409
+ };
410
+ const defaultProviders = await require_graders.getDefaultProviders();
411
+ const defaultModerationProvider = !require_logger.getEnvString("OPENAI_API_KEY") && (require_logger.getEnvString("REPLICATE_API_KEY") || require_logger.getEnvString("REPLICATE_API_TOKEN")) ? await require_providers.loadApiProvider(require_types.LLAMA_GUARD_REPLICATE_PROVIDER) : defaultProviders.moderationProvider;
412
+ const moderationProvider = await require_graders.getAndCheckProvider("moderation", grading?.provider, defaultModerationProvider, "moderation check");
413
+ require_invariant.invariant(moderationProvider, "Moderation provider must be defined");
414
+ const resp = await moderationProvider.callModerationApi(userPrompt, assistantResponse);
415
+ if (resp.error) return {
416
+ pass: false,
417
+ score: 0,
418
+ reason: `Moderation API error: ${resp.error}`
419
+ };
420
+ const { flags } = resp;
421
+ if (!flags || flags.length === 0) return {
422
+ pass: true,
423
+ score: 1,
424
+ reason: "No moderation flags detected"
425
+ };
426
+ const filteredFlags = categories.length === 0 ? flags : flags.filter((flag) => categories.includes(flag.code));
427
+ if (filteredFlags.length > 0) return {
428
+ pass: false,
429
+ score: 0,
430
+ reason: `Moderation flags detected: ${filteredFlags.map((flag) => flag.description).join(", ")}`
431
+ };
432
+ return {
433
+ pass: true,
434
+ score: 1,
435
+ reason: "No relevant moderation flags detected"
436
+ };
437
+ }
438
+ //#endregion
439
+ //#region src/assertions/contextUtils.ts
440
+ /**
441
+ * Resolves the context value for context-based assertions.
442
+ * Supports extracting context from test variables or transforming from output.
443
+ * Can return either a single context string or an array of context chunks.
444
+ *
445
+ * @param assertion - The assertion configuration
446
+ * @param test - The test case
447
+ * @param output - The provider output (after provider transform, before test transform)
448
+ * @param prompt - The prompt text
449
+ * @param fallbackContext - Optional fallback context (e.g., prompt for context-recall)
450
+ * @param providerResponse - Optional full provider response for contextTransform
451
+ * @returns The resolved context string or array of strings
452
+ * @throws Error if context cannot be resolved or transform fails
453
+ */
454
+ async function resolveContext(assertion, test, output, prompt, fallbackContext, providerResponse) {
455
+ let contextValue;
456
+ if (test.vars?.context) {
457
+ if (typeof test.vars.context === "string") contextValue = test.vars.context;
458
+ else if (Array.isArray(test.vars.context)) {
459
+ const invalidEntry = [...test.vars.context.entries()].find(([, v]) => typeof v !== "string");
460
+ if (invalidEntry) {
461
+ const [idx, val] = invalidEntry;
462
+ require_invariant.invariant(false, `Invalid context: expected an array of strings, but found ${typeof val} at index ${idx}`);
463
+ }
464
+ contextValue = test.vars.context;
465
+ }
466
+ } else if (fallbackContext) contextValue = fallbackContext;
467
+ if (assertion.contextTransform) {
468
+ const getLabel = () => require_transform$1.getTransformLabel(assertion.contextTransform);
469
+ try {
470
+ const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
471
+ const transformed = await require_transform$1.transform(assertion.contextTransform, outputForTransform, {
472
+ vars: test.vars,
473
+ prompt: { label: prompt },
474
+ ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
475
+ });
476
+ require_invariant.invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
477
+ contextValue = transformed;
478
+ } catch (error) {
479
+ throw new Error(`Failed to transform context using expression '${getLabel()}': ${require_transform$1.getTransformErrorMessage(error)}`);
480
+ }
481
+ }
482
+ require_invariant.invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
483
+ return contextValue;
484
+ }
485
+ /**
486
+ * Serializes context (string or string[]) to a single string for prompts.
487
+ * Joins chunks with double newlines to preserve separation.
488
+ */
489
+ function serializeContext(context) {
490
+ return Array.isArray(context) ? context.join("\n\n") : context;
491
+ }
492
+ //#endregion
493
+ //#region src/matchers/rag.ts
494
+ async function matchesAnswerRelevance(input, output, threshold, grading, providerCallContext) {
495
+ const defaults = await require_graders.getDefaultProviders();
496
+ const embeddingProvider = await require_graders.getAndCheckProvider("embedding", grading?.provider, defaults.embeddingProvider, "answer relevancy check");
497
+ const textProvider = await require_graders.getAndCheckProvider("text", grading?.provider, defaults.gradingProvider, "answer relevancy check");
498
+ const tokensUsed = require_graders.normalizeMatcherTokenUsage(void 0);
499
+ const rubricPrompt = await require_graders.loadRubricPrompt(grading?.rubricPrompt, require_graders.ANSWER_RELEVANCY_GENERATE);
500
+ const parsedOutput = require_graders.tryParse(output);
501
+ const promptText = await require_graders.renderLlmRubricPrompt(rubricPrompt, { answer: parsedOutput });
502
+ const candidateQuestions = [];
503
+ for (let i = 0; i < 3; i++) {
504
+ const resp = await require_graders.callProviderWithContext(textProvider, promptText, "answer-relevance", { answer: parsedOutput }, providerCallContext);
505
+ require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, resp.tokenUsage);
506
+ if (resp.error || !resp.output) return require_graders.fail(resp.error || "No output", tokensUsed);
507
+ require_invariant.invariant(typeof resp.output === "string", "answer relevancy check produced malformed response");
508
+ candidateQuestions.push(resp.output);
509
+ }
510
+ require_invariant.invariant(typeof embeddingProvider.callEmbeddingApi === "function", `Provider ${embeddingProvider.id()} must implement callEmbeddingApi for similarity check`);
511
+ const inputEmbeddingResp = await embeddingProvider.callEmbeddingApi(input);
512
+ require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, inputEmbeddingResp.tokenUsage);
513
+ if (inputEmbeddingResp.error || !inputEmbeddingResp.embedding) return require_graders.fail(inputEmbeddingResp.error || "No embedding", tokensUsed);
514
+ const inputEmbedding = inputEmbeddingResp.embedding;
515
+ const similarities = [];
516
+ const questionsWithScores = [];
517
+ for (const question of candidateQuestions) {
518
+ const resp = await embeddingProvider.callEmbeddingApi(question);
519
+ require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, resp.tokenUsage);
520
+ if (resp.error || !resp.embedding) return require_graders.fail(resp.error || "No embedding", tokensUsed);
521
+ const questionSimilarity = require_graders.cosineSimilarity(inputEmbedding, resp.embedding);
522
+ similarities.push(questionSimilarity);
523
+ questionsWithScores.push({
524
+ question,
525
+ similarity: questionSimilarity
526
+ });
527
+ }
528
+ const similarity = similarities.reduce((a, b) => a + b, 0) / similarities.length;
529
+ const pass = similarity >= threshold - Number.EPSILON;
530
+ const greaterThanReason = `Relevance ${similarity.toFixed(2)} is greater than threshold ${threshold}`;
531
+ const lessThanReason = `Relevance ${similarity.toFixed(2)} is less than threshold ${threshold}`;
532
+ const metadata = {
533
+ generatedQuestions: questionsWithScores,
534
+ averageSimilarity: similarity,
535
+ threshold
536
+ };
537
+ if (pass) return {
538
+ pass: true,
539
+ score: similarity,
540
+ reason: greaterThanReason,
541
+ tokensUsed,
542
+ metadata
543
+ };
544
+ return {
545
+ pass: false,
546
+ score: similarity,
547
+ reason: lessThanReason,
548
+ tokensUsed,
549
+ metadata
550
+ };
551
+ }
552
+ async function matchesContextRecall(context, groundTruth, threshold, grading, vars, providerCallContext) {
553
+ const textProvider = await require_graders.getAndCheckProvider("text", grading?.provider, (await require_graders.getDefaultProviders()).gradingProvider, "context recall check");
554
+ const contextString = serializeContext(context);
555
+ const resp = await require_graders.callProviderWithContext(textProvider, await require_graders.renderLlmRubricPrompt(await require_graders.loadRubricPrompt(grading?.rubricPrompt, require_graders.CONTEXT_RECALL), {
556
+ context: contextString,
557
+ groundTruth,
558
+ ...vars || {}
559
+ }), "context-recall", {
560
+ context: contextString,
561
+ groundTruth,
562
+ ...vars || {}
563
+ }, providerCallContext);
564
+ if (resp.error || !resp.output) return require_graders.fail(resp.error || "No output", resp.tokenUsage);
565
+ require_invariant.invariant(typeof resp.output === "string", "context-recall produced malformed response");
566
+ const attributedTokenLower = require_graders.CONTEXT_RECALL_ATTRIBUTED_TOKEN.toLowerCase();
567
+ const notAttributedTokenLower = require_graders.CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN.toLowerCase();
568
+ const sentences = require_graders.splitIntoSentences(resp.output).filter((line) => {
569
+ const lowerLine = line.toLowerCase();
570
+ return lowerLine.includes(attributedTokenLower) || lowerLine.includes(notAttributedTokenLower);
571
+ });
572
+ const sentenceAttributions = [];
573
+ let numerator = 0;
574
+ for (const sentence of sentences) {
575
+ const lowerSentence = sentence.toLowerCase();
576
+ const isAttributed = !lowerSentence.includes(notAttributedTokenLower) && lowerSentence.includes(attributedTokenLower);
577
+ if (isAttributed) numerator++;
578
+ const sentenceMatch = sentence.match(/^\d+\.\s*([^\.]+\.)/);
579
+ const cleanSentence = sentenceMatch ? sentenceMatch[1].trim() : sentence.split(".")[0].trim();
580
+ sentenceAttributions.push({
581
+ sentence: cleanSentence,
582
+ attributed: isAttributed
583
+ });
584
+ }
585
+ const score = sentences.length > 0 ? numerator / sentences.length : 0;
586
+ const pass = score >= threshold - Number.EPSILON;
587
+ const metadata = {
588
+ sentenceAttributions,
589
+ totalSentences: sentences.length,
590
+ attributedSentences: numerator,
591
+ score
592
+ };
593
+ return {
594
+ pass,
595
+ score,
596
+ reason: pass ? `Recall ${score.toFixed(2)} is >= ${threshold}` : `Recall ${score.toFixed(2)} is < ${threshold}`,
597
+ tokensUsed: require_graders.normalizeMatcherTokenUsage(resp.tokenUsage),
598
+ metadata
599
+ };
600
+ }
601
+ async function matchesContextRelevance(question, context, threshold, grading, providerCallContext) {
602
+ const textProvider = await require_graders.getAndCheckProvider("text", grading?.provider, (await require_graders.getDefaultProviders()).gradingProvider, "context relevance check");
603
+ const contextString = serializeContext(context);
604
+ const resp = await require_graders.callProviderWithContext(textProvider, await require_graders.renderLlmRubricPrompt(await require_graders.loadRubricPrompt(grading?.rubricPrompt, require_graders.CONTEXT_RELEVANCE), {
605
+ context: contextString,
606
+ query: question
607
+ }), "context-relevance", {
608
+ context: contextString,
609
+ query: question
610
+ }, providerCallContext);
611
+ if (resp.error || !resp.output) return require_graders.fail(resp.error || "No output", resp.tokenUsage);
612
+ require_invariant.invariant(typeof resp.output === "string", "context-relevance produced malformed response");
613
+ const contextUnits = Array.isArray(context) ? context.filter((chunk) => chunk.trim().length > 0) : require_graders.splitIntoSentences(context);
614
+ const totalContextUnits = contextUnits.length;
615
+ const extractedSentences = require_graders.splitIntoSentences(resp.output);
616
+ const relevantSentences = [];
617
+ const insufficientInformation = resp.output.includes(require_graders.CONTEXT_RELEVANCE_BAD);
618
+ let numerator = 0;
619
+ if (insufficientInformation) numerator = 0;
620
+ else {
621
+ const uniqueRelevantSentences = [...new Set(extractedSentences)];
622
+ numerator = Math.min(uniqueRelevantSentences.length, totalContextUnits);
623
+ relevantSentences.push(...uniqueRelevantSentences);
624
+ }
625
+ const score = totalContextUnits > 0 ? numerator / totalContextUnits : 0;
626
+ const pass = score >= threshold - Number.EPSILON;
627
+ const metadata = {
628
+ extractedSentences: relevantSentences,
629
+ totalContextUnits,
630
+ totalContextSentences: totalContextUnits,
631
+ contextUnits,
632
+ relevantSentenceCount: numerator,
633
+ insufficientInformation,
634
+ score
635
+ };
636
+ return {
637
+ pass,
638
+ score,
639
+ reason: pass ? `Context relevance ${score.toFixed(2)} is >= ${threshold}` : `Context relevance ${score.toFixed(2)} is < ${threshold}`,
640
+ tokensUsed: require_graders.normalizeMatcherTokenUsage(resp.tokenUsage),
641
+ metadata
642
+ };
643
+ }
644
+ async function matchesContextFaithfulness(query, output, context, threshold, grading, vars, providerCallContext) {
645
+ const textProvider = await require_graders.getAndCheckProvider("text", grading?.provider, (await require_graders.getDefaultProviders()).gradingProvider, "faithfulness check");
646
+ const tokensUsed = require_graders.normalizeMatcherTokenUsage(void 0);
647
+ if (grading?.rubricPrompt) require_invariant.invariant(Array.isArray(grading.rubricPrompt), "rubricPrompt must be an array");
648
+ const rawLongformPrompt = typeof grading?.rubricPrompt?.[0] === "string" ? grading?.rubricPrompt?.[0] : grading?.rubricPrompt?.[0]?.content;
649
+ const rawNliPrompt = typeof grading?.rubricPrompt?.[1] === "string" ? grading?.rubricPrompt?.[1] : grading?.rubricPrompt?.[1]?.content;
650
+ const longformPrompt = await require_graders.loadRubricPrompt(rawLongformPrompt, require_graders.CONTEXT_FAITHFULNESS_LONGFORM);
651
+ const nliPrompt = await require_graders.loadRubricPrompt(rawNliPrompt, require_graders.CONTEXT_FAITHFULNESS_NLI_STATEMENTS);
652
+ let promptText = await require_graders.renderLlmRubricPrompt(longformPrompt, {
653
+ question: query,
654
+ answer: require_graders.tryParse(output),
655
+ ...vars || {}
656
+ });
657
+ let resp = await require_graders.callProviderWithContext(textProvider, promptText, "context-faithfulness-longform", {
658
+ question: query,
659
+ answer: require_graders.tryParse(output),
660
+ ...vars || {}
661
+ }, providerCallContext);
662
+ require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, resp.tokenUsage);
663
+ if (resp.error || !resp.output) return require_graders.fail(resp.error || "No output", tokensUsed);
664
+ require_invariant.invariant(typeof resp.output === "string", "context-faithfulness produced malformed response");
665
+ const contextString = serializeContext(context);
666
+ const statements = require_graders.splitIntoSentences(resp.output);
667
+ promptText = await require_graders.renderLlmRubricPrompt(nliPrompt, {
668
+ context: contextString,
669
+ statements,
670
+ ...vars || {}
671
+ });
672
+ resp = await require_graders.callProviderWithContext(textProvider, promptText, "context-faithfulness-nli", {
673
+ context: contextString,
674
+ statements,
675
+ ...vars || {}
676
+ }, providerCallContext);
677
+ require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, resp.tokenUsage);
678
+ if (resp.error || !resp.output) return require_graders.fail(resp.error || "No output", tokensUsed);
679
+ require_invariant.invariant(typeof resp.output === "string", "context-faithfulness produced malformed response");
680
+ let finalAnswer = "Final verdict for each statement in order:";
681
+ finalAnswer = finalAnswer.toLowerCase();
682
+ let verdicts = resp.output.toLowerCase().trim();
683
+ let score = 0;
684
+ if (statements.length > 0) if (verdicts.includes(finalAnswer)) {
685
+ verdicts = verdicts.slice(verdicts.indexOf(finalAnswer) + finalAnswer.length);
686
+ const parsedVerdicts = verdicts.split(".").filter((answer) => answer.trim() !== "");
687
+ if (parsedVerdicts.length > 0) score = 1 - parsedVerdicts.filter((answer) => !answer.includes("yes")).length / statements.length;
688
+ } else {
689
+ const noVerdictCount = verdicts.split("verdict: no").length - 1;
690
+ if (noVerdictCount + (verdicts.split("verdict: yes").length - 1) > 0) score = 1 - noVerdictCount / statements.length;
691
+ }
692
+ score = Math.min(1, Math.max(0, score));
693
+ const pass = score >= threshold - Number.EPSILON;
694
+ return {
695
+ pass,
696
+ score,
697
+ reason: pass ? `Faithfulness ${score.toFixed(2)} is >= ${threshold}` : `Faithfulness ${score.toFixed(2)} is < ${threshold}`,
698
+ tokensUsed
699
+ };
700
+ }
701
+ //#endregion
702
+ //#region src/matchers/similarity.ts
703
+ function calculateSimilarityScore(expectedEmbedding, outputEmbedding, metric, tokensUsed) {
704
+ switch (metric) {
705
+ case "cosine": return require_graders.cosineSimilarity(expectedEmbedding, outputEmbedding);
706
+ case "dot_product": return require_graders.dotProduct(expectedEmbedding, outputEmbedding);
707
+ case "euclidean": return require_graders.euclideanDistance(expectedEmbedding, outputEmbedding);
708
+ default: return require_graders.fail(`Unsupported metric: ${metric}`, tokensUsed);
709
+ }
710
+ }
711
+ function buildSimilarityResult(similarity, threshold, inverse, metric, tokensUsed) {
712
+ if (metric === "euclidean") {
713
+ const distance = similarity;
714
+ const pass = inverse ? distance >= threshold - Number.EPSILON : distance <= threshold + Number.EPSILON;
715
+ const normalizedScore = 1 / (1 + distance);
716
+ const score = inverse ? 1 - normalizedScore : normalizedScore;
717
+ const belowThresholdReason = `Distance ${distance.toFixed(2)} is less than or equal to threshold ${threshold}`;
718
+ const aboveThresholdReason = `Distance ${distance.toFixed(2)} is greater than threshold ${threshold}`;
719
+ return {
720
+ pass,
721
+ score,
722
+ reason: pass ? inverse ? aboveThresholdReason : belowThresholdReason : inverse ? belowThresholdReason : aboveThresholdReason,
723
+ tokensUsed
724
+ };
725
+ }
726
+ const pass = inverse ? similarity <= threshold + Number.EPSILON : similarity >= threshold - Number.EPSILON;
727
+ const score = inverse ? 1 - similarity : similarity;
728
+ const greaterThanReason = `Similarity ${similarity.toFixed(2)} is greater than or equal to threshold ${threshold}`;
729
+ const lessThanReason = `Similarity ${similarity.toFixed(2)} is less than threshold ${threshold}`;
730
+ return {
731
+ pass,
732
+ score,
733
+ reason: pass ? inverse ? lessThanReason : greaterThanReason : inverse ? greaterThanReason : lessThanReason,
734
+ tokensUsed
735
+ };
736
+ }
737
+ async function calculateProviderSimilarity(finalProvider, expected, output, metric, tokensUsed) {
738
+ if (metric === "cosine" && "callSimilarityApi" in finalProvider) {
739
+ const similarityResp = await finalProvider.callSimilarityApi(expected, output);
740
+ require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, similarityResp.tokenUsage);
741
+ if (similarityResp.error) return require_graders.fail(similarityResp.error, tokensUsed);
742
+ if (similarityResp.similarity == null) return require_graders.fail("Unknown error fetching similarity", tokensUsed);
743
+ if (!Number.isFinite(similarityResp.similarity)) return require_graders.fail(`Invalid similarity score: ${similarityResp.similarity}`, tokensUsed);
744
+ return similarityResp.similarity;
745
+ }
746
+ const callEmbeddingApi = "callEmbeddingApi" in finalProvider ? finalProvider.callEmbeddingApi : void 0;
747
+ if (typeof callEmbeddingApi !== "function") {
748
+ if ("callSimilarityApi" in finalProvider) return require_graders.fail(`Provider ${finalProvider.id()} only supports cosine similarity via callSimilarityApi`, tokensUsed);
749
+ throw new Error("Provider must implement callSimilarityApi or callEmbeddingApi");
750
+ }
751
+ const [expectedEmbedding, outputEmbedding] = await Promise.all([callEmbeddingApi.call(finalProvider, expected), callEmbeddingApi.call(finalProvider, output)]);
752
+ const mergedUsage = require_graders.normalizeMatcherTokenUsage(void 0);
753
+ require_tokenUsageUtils.accumulateTokenUsage(mergedUsage, expectedEmbedding.tokenUsage);
754
+ require_tokenUsageUtils.accumulateTokenUsage(mergedUsage, outputEmbedding.tokenUsage);
755
+ require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, mergedUsage);
756
+ if (expectedEmbedding.error || outputEmbedding.error) return require_graders.fail(expectedEmbedding.error || outputEmbedding.error || "Unknown error fetching embeddings", tokensUsed);
757
+ if (!expectedEmbedding.embedding || !outputEmbedding.embedding) return require_graders.fail("Embedding not found", tokensUsed);
758
+ return calculateSimilarityScore(expectedEmbedding.embedding, outputEmbedding.embedding, metric, tokensUsed);
759
+ }
760
+ async function matchesSimilarity(expected, output, threshold, inverse = false, grading, metric = "cosine") {
761
+ if (metric === "cosine" && require_logger.state.config?.redteam && require_remoteGeneration.shouldGenerateRemote({ requireEmbeddingProvider: true })) try {
762
+ return await require_graders.doRemoteGrading({
763
+ task: "similar",
764
+ expected,
765
+ output,
766
+ threshold,
767
+ inverse
768
+ });
769
+ } catch (error) {
770
+ return require_graders.fail(`Could not perform remote grading: ${error}`);
771
+ }
772
+ const defaults = await require_graders.getDefaultProviders();
773
+ const finalProvider = await require_graders.getAndCheckProvider("embedding", grading?.provider, defaults.embeddingProvider, "similarity check");
774
+ const tokensUsed = require_graders.normalizeMatcherTokenUsage(void 0);
775
+ const similarity = await calculateProviderSimilarity(finalProvider, expected, output, metric, tokensUsed);
776
+ if (typeof similarity !== "number") return similarity;
777
+ return buildSimilarityResult(similarity, threshold, inverse, metric, tokensUsed);
778
+ }
779
+ //#endregion
265
780
  //#region src/tracing/evaluatorTracing.ts
266
781
  let otlpReceiverStarted = false;
267
782
  const DEFAULT_OTLP_ACCEPT_FORMATS = ["json", "protobuf"];
@@ -305,7 +820,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
305
820
  require_telemetry.telemetry.record("feature_used", { feature: "tracing" });
306
821
  try {
307
822
  require_logger.logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
308
- const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-BfcVq2Nq.cjs"));
823
+ const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-CvJdBGSc.cjs"));
309
824
  const port = testSuite.tracing.otlp.http.port || 4318;
310
825
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
311
826
  const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
@@ -329,7 +844,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
329
844
  async function stopOtlpReceiverIfNeeded() {
330
845
  if (otlpReceiverStarted) try {
331
846
  require_logger.logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
332
- const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-BfcVq2Nq.cjs"));
847
+ const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-CvJdBGSc.cjs"));
333
848
  await stopOTLPReceiver();
334
849
  otlpReceiverStarted = false;
335
850
  require_logger.logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -364,7 +879,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
364
879
  }
365
880
  if (!tracingEnabled) return null;
366
881
  require_logger.logger.debug("[EvaluatorTracing] Importing trace store");
367
- const { getTraceStore } = await Promise.resolve().then(() => require("./store-CWOSz6D_.cjs"));
882
+ const { getTraceStore } = await Promise.resolve().then(() => require("./store-B2NDDooM.cjs")).then((n) => n.store_exports);
368
883
  const traceStore = getTraceStore();
369
884
  const traceId = generateTraceId();
370
885
  const spanId = generateSpanId();
@@ -406,7 +921,7 @@ const handleAnswerRelevance = async ({ assertion, output, prompt, test, provider
406
921
  require_invariant.invariant(prompt, "answer-relevance assertion type must have a prompt");
407
922
  return {
408
923
  assertion,
409
- ...await require_graders.matchesAnswerRelevance(typeof test?.vars?.query === "string" ? test.vars.query : prompt, output, assertion.threshold ?? 0, test.options, providerCallContext)
924
+ ...await matchesAnswerRelevance(typeof test?.vars?.query === "string" ? test.vars.query : prompt, output, assertion.threshold ?? 0, test.options, providerCallContext)
410
925
  };
411
926
  };
412
927
  //#endregion
@@ -662,7 +1177,7 @@ function handleBleuScore({ assertion, inverse, outputString, renderedValue }) {
662
1177
  //#region src/assertions/classifier.ts
663
1178
  async function handleClassifier({ assertion, renderedValue, outputString, test, inverse }) {
664
1179
  require_invariant.invariant(typeof renderedValue === "string" || typeof renderedValue === "undefined", "\"classifier\" assertion type must have a string value or be undefined");
665
- const classificationResult = await require_graders.matchesClassification(renderedValue, outputString, assertion.threshold ?? 1, test.options);
1180
+ const classificationResult = await matchesClassification(renderedValue, outputString, assertion.threshold ?? 1, test.options);
666
1181
  if (inverse) {
667
1182
  classificationResult.pass = !classificationResult.pass;
668
1183
  classificationResult.score = 1 - classificationResult.score;
@@ -674,38 +1189,84 @@ async function handleClassifier({ assertion, renderedValue, outputString, test,
674
1189
  }
675
1190
  //#endregion
676
1191
  //#region src/assertions/contains.ts
1192
+ /**
1193
+ * Advance over separators between parsed fields.
1194
+ *
1195
+ * Contains-any values allow whitespace around comma delimiters, and historical
1196
+ * parsing ignored repeated commas rather than producing empty fields.
1197
+ */
1198
+ function skipWhitespaceAndCommas(value, startIndex) {
1199
+ let i = startIndex;
1200
+ while (i < value.length) {
1201
+ i = skipWhitespace(value, i);
1202
+ if (value[i] !== ",") break;
1203
+ i++;
1204
+ }
1205
+ return i;
1206
+ }
1207
+ /**
1208
+ * Advance over whitespace while preserving comma delimiter handling for callers.
1209
+ */
1210
+ function skipWhitespace(value, startIndex) {
1211
+ let i = startIndex;
1212
+ while (i < value.length && /\s/.test(value[i])) i++;
1213
+ return i;
1214
+ }
1215
+ /**
1216
+ * Parse a quoted field using the assertion parser's CSV-like escape rules.
1217
+ *
1218
+ * Supports backslash-escaped quotes/backslashes and doubled quotes, and rejects
1219
+ * unterminated fields so malformed assertion values do not silently pass.
1220
+ */
1221
+ function parseQuotedField(value, startIndex) {
1222
+ let i = startIndex + 1;
1223
+ let field = "";
1224
+ let terminated = false;
1225
+ while (i < value.length) if (value[i] === "\\" && i + 1 < value.length && ["\"", "\\"].includes(value[i + 1])) {
1226
+ field += value[i + 1];
1227
+ i += 2;
1228
+ } else if (value[i] === "\"" && i + 1 < value.length && value[i + 1] === "\"") {
1229
+ field += "\"";
1230
+ i += 2;
1231
+ } else if (value[i] === "\"") {
1232
+ i++;
1233
+ terminated = true;
1234
+ break;
1235
+ } else {
1236
+ field += value[i];
1237
+ i++;
1238
+ }
1239
+ require_invariant.invariant(terminated, "Unterminated quoted field in contains assertion value");
1240
+ return {
1241
+ field,
1242
+ nextIndex: i
1243
+ };
1244
+ }
1245
+ /**
1246
+ * Parse an unquoted field up to the next comma, trimming surrounding whitespace.
1247
+ */
1248
+ function parseUnquotedField(value, startIndex) {
1249
+ let i = startIndex;
1250
+ while (i < value.length && value[i] !== ",") i++;
1251
+ return {
1252
+ field: value.substring(startIndex, i).trim(),
1253
+ nextIndex: i
1254
+ };
1255
+ }
1256
+ /**
1257
+ * Split a contains-any string into fields while preserving quoted commas.
1258
+ */
677
1259
  function parseCommaSeparatedValues(value) {
678
1260
  const results = [];
679
1261
  let i = 0;
680
1262
  while (i < value.length) {
681
- while (i < value.length && /\s/.test(value[i])) i++;
1263
+ i = skipWhitespaceAndCommas(value, i);
682
1264
  if (i >= value.length) break;
683
- if (value[i] === ",") {
684
- i++;
685
- continue;
686
- }
687
- if (value[i] === "\"") {
688
- i++;
689
- let field = "";
690
- while (i < value.length) if (value[i] === "\\" && i + 1 < value.length && (value[i + 1] === "\"" || value[i + 1] === "\\")) {
691
- field += value[i + 1];
692
- i += 2;
693
- } else if (value[i] === "\"" && i + 1 < value.length && value[i + 1] === "\"") {
694
- field += "\"";
695
- i += 2;
696
- } else if (value[i] === "\"") {
697
- i++;
698
- break;
699
- } else {
700
- field += value[i];
701
- i++;
702
- }
703
- results.push(field);
704
- } else {
705
- const start = i;
706
- while (i < value.length && value[i] !== ",") i++;
707
- results.push(value.substring(start, i).trim());
708
- }
1265
+ const isQuotedField = value[i] === "\"";
1266
+ const parsed = isQuotedField ? parseQuotedField(value, i) : parseUnquotedField(value, i);
1267
+ results.push(parsed.field);
1268
+ i = isQuotedField ? skipWhitespace(value, parsed.nextIndex) : parsed.nextIndex;
1269
+ require_invariant.invariant(!isQuotedField || i >= value.length || value[i] === ",", "Expected comma after quoted field in contains assertion value");
709
1270
  }
710
1271
  return results;
711
1272
  }
@@ -803,10 +1364,10 @@ async function handleContextFaithfulness({ assertion, test, output, prompt, prov
803
1364
  require_invariant.invariant(test.vars, "context-faithfulness assertion requires a test with variables");
804
1365
  require_invariant.invariant(typeof test.vars.query === "string", "context-faithfulness assertion requires a \"query\" variable with the user question");
805
1366
  require_invariant.invariant(typeof output === "string", "context-faithfulness assertion requires string output from the provider");
806
- const context = await require_graders.resolveContext(assertion, test, output, prompt, void 0, providerResponse);
1367
+ const context = await resolveContext(assertion, test, output, prompt, void 0, providerResponse);
807
1368
  return {
808
1369
  assertion,
809
- ...await require_graders.matchesContextFaithfulness(test.vars.query, output, context, assertion.threshold ?? 0, test.options, test.vars, providerCallContext),
1370
+ ...await matchesContextFaithfulness(test.vars.query, output, context, assertion.threshold ?? 0, test.options, test.vars, providerCallContext),
810
1371
  metadata: { context }
811
1372
  };
812
1373
  }
@@ -825,8 +1386,8 @@ async function handleContextFaithfulness({ assertion, test, output, prompt, prov
825
1386
  const handleContextRecall = async ({ assertion, renderedValue, prompt, test, output, providerResponse, providerCallContext }) => {
826
1387
  require_invariant.invariant(typeof renderedValue === "string", "context-recall assertion requires a string value (expected answer or fact to verify)");
827
1388
  require_invariant.invariant(prompt, "context-recall assertion requires a prompt");
828
- const context = await require_graders.resolveContext(assertion, test, output, prompt, prompt, providerResponse);
829
- const result = await require_graders.matchesContextRecall(context, renderedValue, assertion.threshold ?? 0, test.options, test.vars, providerCallContext);
1389
+ const context = await resolveContext(assertion, test, output, prompt, prompt, providerResponse);
1390
+ const result = await matchesContextRecall(context, renderedValue, assertion.threshold ?? 0, test.options, test.vars, providerCallContext);
830
1391
  return {
831
1392
  assertion,
832
1393
  ...result,
@@ -851,8 +1412,8 @@ const handleContextRecall = async ({ assertion, renderedValue, prompt, test, out
851
1412
  const handleContextRelevance = async ({ assertion, test, output, prompt, providerResponse, providerCallContext }) => {
852
1413
  require_invariant.invariant(test.vars, "context-relevance assertion requires a test with variables");
853
1414
  require_invariant.invariant(typeof test.vars.query === "string", "context-relevance assertion requires a \"query\" variable with the user question");
854
- const context = await require_graders.resolveContext(assertion, test, output, prompt, void 0, providerResponse);
855
- const result = await require_graders.matchesContextRelevance(test.vars.query, context, assertion.threshold ?? 0, test.options, providerCallContext);
1415
+ const context = await resolveContext(assertion, test, output, prompt, void 0, providerResponse);
1416
+ const result = await matchesContextRelevance(test.vars.query, context, assertion.threshold ?? 0, test.options, providerCallContext);
856
1417
  return {
857
1418
  assertion,
858
1419
  ...result,
@@ -930,7 +1491,7 @@ function handleFinishReason({ assertion, inverse = false, renderedValue, provide
930
1491
  //#region src/assertions/functionToolCall.ts
931
1492
  const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
932
1493
  try {
933
- if (provider instanceof require_providers.AIStudioChatProvider || provider instanceof require_providers.GoogleLiveProvider || provider instanceof require_providers.VertexChatProvider) require_transform$1.validateFunctionCall(output, provider.config?.tools, test.vars);
1494
+ if (provider instanceof require_providers.AIStudioChatProvider || provider instanceof require_providers.GoogleLiveProvider || provider instanceof require_providers.VertexChatProvider) require_transform.validateFunctionCall(output, provider.config?.tools, test.vars);
934
1495
  else if (provider instanceof require_chat.OpenAiChatCompletionProvider) require_util$1.validateFunctionCall(output, provider.config.functions, test.vars);
935
1496
  else throw new Error(`Provider does not have functionality for checking function call.`);
936
1497
  return {
@@ -950,27 +1511,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
950
1511
  };
951
1512
  //#endregion
952
1513
  //#region src/assertions/geval.ts
953
- const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
954
- require_invariant.invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
1514
+ const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
1515
+ require_invariant.invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
955
1516
  const threshold = assertion.threshold ?? .7;
956
1517
  if (Array.isArray(renderedValue)) {
957
- const scores = [];
958
- const reasons = [];
959
- for (const value of renderedValue) {
1518
+ if (renderedValue.length === 0) return {
1519
+ assertion,
1520
+ pass: false,
1521
+ score: 0,
1522
+ reason: "G-Eval assertion requires at least one criterion string in the value array."
1523
+ };
1524
+ const responses = [];
1525
+ let failure;
1526
+ for (const [index, value] of renderedValue.entries()) {
960
1527
  const resp = await require_graders.matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
961
- scores.push(resp.score);
962
- reasons.push(resp.reason);
1528
+ responses.push(resp);
1529
+ if (require_graders.isGraderFailure(resp)) {
1530
+ failure = {
1531
+ index,
1532
+ resp
1533
+ };
1534
+ break;
1535
+ }
1536
+ }
1537
+ const tokensUsed = require_tokenUsageUtils.createEmptyTokenUsage();
1538
+ for (const r of responses) require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, r.tokensUsed);
1539
+ if (failure) {
1540
+ const criterion = renderedValue[failure.index];
1541
+ return {
1542
+ assertion,
1543
+ pass: false,
1544
+ score: 0,
1545
+ reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
1546
+ tokensUsed,
1547
+ metadata: failure.resp.metadata
1548
+ };
963
1549
  }
964
- const scoresSum = scores.reduce((a, b) => a + b, 0);
1550
+ const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
1551
+ const combinedReason = responses.map((r) => r.reason).join("\n\n");
965
1552
  return {
966
1553
  assertion,
967
- pass: scoresSum / scores.length >= threshold,
968
- score: scoresSum / scores.length,
969
- reason: reasons.join("\n\n")
1554
+ pass: averageScore >= threshold !== inverse,
1555
+ score: inverse ? 1 - averageScore : averageScore,
1556
+ reason: combinedReason,
1557
+ tokensUsed
970
1558
  };
971
- } else return {
1559
+ }
1560
+ const resp = await require_graders.matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
1561
+ if (require_graders.isGraderFailure(resp)) return {
1562
+ assertion,
1563
+ pass: false,
1564
+ score: 0,
1565
+ reason: resp.reason,
1566
+ tokensUsed: resp.tokensUsed,
1567
+ metadata: resp.metadata
1568
+ };
1569
+ const passed = resp.score >= threshold !== inverse;
1570
+ return {
972
1571
  assertion,
973
- ...await require_graders.matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext)
1572
+ ...resp,
1573
+ pass: passed,
1574
+ score: inverse ? 1 - resp.score : resp.score
974
1575
  };
975
1576
  };
976
1577
  //#endregion
@@ -1110,6 +1711,43 @@ const handleGuardrails = async ({ assertion, inverse, providerResponse }) => {
1110
1711
  };
1111
1712
  //#endregion
1112
1713
  //#region src/assertions/html.ts
1714
+ const LITERAL_WRAPPER_PATTERNS = {
1715
+ html: /<html(?=[\s>/])/,
1716
+ head: /<head(?=[\s>/])/,
1717
+ body: /<body(?=[\s>/])/
1718
+ };
1719
+ function isWrapperTagName(tagName) {
1720
+ return tagName === "html" || tagName === "head" || tagName === "body";
1721
+ }
1722
+ function isTextNode(node) {
1723
+ return node.nodeName === "#text";
1724
+ }
1725
+ function isElementNode(node) {
1726
+ return "tagName" in node;
1727
+ }
1728
+ function hasSourceCodeLocation(element) {
1729
+ return "sourceCodeLocation" in element && element.sourceCodeLocation !== null && element.sourceCodeLocation !== void 0;
1730
+ }
1731
+ function getChildNodes(node) {
1732
+ return "childNodes" in node ? node.childNodes : [];
1733
+ }
1734
+ function findFirstElement(root, predicate) {
1735
+ const stack = [root];
1736
+ while (stack.length > 0) {
1737
+ const current = stack.pop();
1738
+ if (isElementNode(current) && predicate(current)) return current;
1739
+ const children = getChildNodes(current);
1740
+ for (let i = children.length - 1; i >= 0; i--) stack.push(children[i]);
1741
+ }
1742
+ }
1743
+ function hasTopLevelText(parentNode) {
1744
+ return parentNode.childNodes.some((node) => isTextNode(node) && Boolean(node.value.trim()));
1745
+ }
1746
+ function isUserProvidedElement(element, inputLowercase) {
1747
+ const tagName = element.tagName.toLowerCase();
1748
+ if (isWrapperTagName(tagName)) return LITERAL_WRAPPER_PATTERNS[tagName].test(inputLowercase) && hasSourceCodeLocation(element);
1749
+ return VALID_HTML_ELEMENTS.has(tagName) || tagName.includes("-");
1750
+ }
1113
1751
  const HTML_PATTERNS = {
1114
1752
  openingTag: /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?>/,
1115
1753
  closingTag: /<\/[a-zA-Z][a-zA-Z0-9-]*\s*>/,
@@ -1265,37 +1903,21 @@ function validateHtml(htmlString) {
1265
1903
  isValid: false,
1266
1904
  reason: "Output appears to be XML, not HTML"
1267
1905
  };
1268
- try {
1269
- const { document } = new jsdom.JSDOM(trimmed, { contentType: "text/html" }).window;
1270
- if (document.body && !trimmed.toLowerCase().includes("<body")) {
1271
- if (Array.from(document.body.childNodes).some((node) => node.nodeType === 3 && node.textContent?.trim())) return {
1272
- isValid: false,
1273
- reason: "Output must be wrapped in HTML tags"
1274
- };
1275
- }
1276
- const allElements = document.querySelectorAll("*");
1277
- if (!Array.from(allElements).find((element) => {
1278
- const tagName = element.tagName.toLowerCase();
1279
- if ([
1280
- "html",
1281
- "head",
1282
- "body"
1283
- ].includes(tagName) && !trimmed.toLowerCase().includes(`<${tagName}`)) return false;
1284
- return VALID_HTML_ELEMENTS.has(tagName) || tagName.includes("-");
1285
- })) return {
1286
- isValid: false,
1287
- reason: "Output does not contain recognized HTML elements"
1288
- };
1289
- return {
1290
- isValid: true,
1291
- reason: "Output is valid HTML"
1292
- };
1293
- } catch (error) {
1294
- return {
1295
- isValid: false,
1296
- reason: `HTML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`
1297
- };
1298
- }
1906
+ const document = (0, parse5.parse)(trimmed, { sourceCodeLocationInfo: true });
1907
+ const inputLowercase = trimmed.toLowerCase();
1908
+ const body = findFirstElement(document, (element) => element.tagName === "body");
1909
+ if (!(body !== void 0 && LITERAL_WRAPPER_PATTERNS.body.test(inputLowercase) && hasSourceCodeLocation(body)) && body && hasTopLevelText(body)) return {
1910
+ isValid: false,
1911
+ reason: "Output must be wrapped in HTML tags"
1912
+ };
1913
+ if (!findFirstElement(document, (element) => isUserProvidedElement(element, inputLowercase))) return {
1914
+ isValid: false,
1915
+ reason: "Output does not contain recognized HTML elements"
1916
+ };
1917
+ return {
1918
+ isValid: true,
1919
+ reason: "Output is valid HTML"
1920
+ };
1299
1921
  }
1300
1922
  const handleContainsHtml = ({ assertion, outputString, inverse }) => {
1301
1923
  const pass = containsHtml(outputString) !== inverse;
@@ -1460,7 +2082,7 @@ const handleJavascript = async ({ assertion, renderedValue, valueFromScript, ass
1460
2082
  let result;
1461
2083
  if (typeof valueFromScript === "undefined") {
1462
2084
  const functionBody = renderedValue.includes("\n") ? renderedValue : buildFunctionBody(renderedValue);
1463
- result = await validateResult(new Function("output", "context", "process", functionBody)(output, assertionValueContext, require_transform.getProcessShim()));
2085
+ result = await validateResult(new Function("output", "context", "process", functionBody)(output, assertionValueContext, require_processShim.getProcessShim()));
1464
2086
  } else {
1465
2087
  require_invariant.invariant(typeof valueFromScript === "boolean" || typeof valueFromScript === "number" || typeof valueFromScript === "object", `Javascript assertion script must return a boolean, number, or object (${assertion.value})`);
1466
2088
  result = await validateResult(valueFromScript);
@@ -1667,7 +2289,7 @@ const handleModeration = async ({ assertion, test, outputString, providerRespons
1667
2289
  const parsedPrompt = require_fetch.parseChatPrompt(promptToModerate, null);
1668
2290
  if (parsedPrompt && parsedPrompt.length > 0) promptToModerate = getLastModerationPrompt(parsedPrompt) ?? promptToModerate;
1669
2291
  } catch {}
1670
- const moderationResult = await require_graders.matchesModeration({
2292
+ const moderationResult = await matchesModeration({
1671
2293
  userPrompt: promptToModerate,
1672
2294
  assistantResponse: outputString,
1673
2295
  categories: Array.isArray(assertion.value) ? assertion.value : []
@@ -1952,45 +2574,6 @@ function matchesPattern(spanName, pattern) {
1952
2574
  }
1953
2575
  //#endregion
1954
2576
  //#region src/assertions/trajectoryUtils.ts
1955
- const TOOL_ATTRIBUTE_KEYS = [
1956
- "tool.name",
1957
- "tool_name",
1958
- "tool",
1959
- "function.name",
1960
- "function_name",
1961
- "gen_ai.tool.name",
1962
- "codex.mcp.tool",
1963
- "agent.tool",
1964
- "agent.tool_name",
1965
- "agent.toolName"
1966
- ];
1967
- const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
1968
- "tool.arguments",
1969
- "tool.args",
1970
- "tool.input",
1971
- "tool_arguments",
1972
- "tool_args",
1973
- "tool_input",
1974
- "function.arguments",
1975
- "function.args",
1976
- "function.input",
1977
- "function_arguments",
1978
- "function_args",
1979
- "gen_ai.tool.arguments",
1980
- "gen_ai.tool.args",
1981
- "gen_ai.tool.input",
1982
- "gen_ai.tool.call.arguments",
1983
- "gen_ai.tool.call.args",
1984
- "agent.tool.arguments",
1985
- "agent.tool.args",
1986
- "agent.tool.input",
1987
- "codex.mcp.arguments",
1988
- "codex.mcp.args",
1989
- "codex.mcp.input",
1990
- "arguments",
1991
- "args",
1992
- "input"
1993
- ];
1994
2577
  const COMMAND_ATTRIBUTE_KEYS = [
1995
2578
  "codex.command",
1996
2579
  "command",
@@ -2003,16 +2586,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
2003
2586
  "search_query"
2004
2587
  ];
2005
2588
  const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
2589
+ const COMMAND_TOOL_NAMES = new Set([
2590
+ "exec_command",
2591
+ "local_shell",
2592
+ "shell"
2593
+ ]);
2006
2594
  const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
2007
2595
  const MAX_JUDGE_SUMMARY_STEPS = 24;
2008
2596
  const JUDGE_SUMMARY_HEAD_STEPS = 12;
2009
2597
  const JUDGE_SUMMARY_TAIL_STEPS = 12;
2010
- function getStringAttribute(attributes, keys) {
2011
- for (const key of keys) {
2012
- const value = attributes[key];
2013
- if (typeof value === "string" && value.trim()) return value.trim();
2014
- }
2015
- }
2016
2598
  function normalizeStructuredAttribute(value) {
2017
2599
  if (value === void 0 || value === null) return;
2018
2600
  if (typeof value === "string") {
@@ -2044,9 +2626,12 @@ function getTrajectoryStepStatus(step) {
2044
2626
  function getCommandExecutable(command) {
2045
2627
  return command.trim().split(/\s+/)[0] || void 0;
2046
2628
  }
2629
+ function isCommandToolName(toolName) {
2630
+ return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
2631
+ }
2047
2632
  function extractToolName(span) {
2048
2633
  const attributes = span.attributes || {};
2049
- const directMatch = getStringAttribute(attributes, TOOL_ATTRIBUTE_KEYS);
2634
+ const directMatch = require_toolAttributes.getToolNameFromAttributes(attributes);
2050
2635
  if (directMatch) return directMatch;
2051
2636
  for (const [key, value] of Object.entries(attributes)) {
2052
2637
  if (typeof value !== "string" || !value.trim()) continue;
@@ -2060,7 +2645,7 @@ function extractToolName(span) {
2060
2645
  }
2061
2646
  function extractToolArgs(span) {
2062
2647
  const attributes = span.attributes || {};
2063
- for (const key of TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
2648
+ for (const key of require_toolAttributes.TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
2064
2649
  const value = normalizeStructuredAttribute(attributes[key]);
2065
2650
  if (value !== void 0) return value;
2066
2651
  }
@@ -2071,21 +2656,31 @@ function extractToolArgs(span) {
2071
2656
  if (value !== void 0) return value;
2072
2657
  }
2073
2658
  }
2074
- function extractCommand(span) {
2659
+ function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
2075
2660
  const attributes = span.attributes || {};
2076
- const directMatch = getStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2661
+ const directMatch = require_toolAttributes.getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2077
2662
  if (directMatch) return directMatch;
2078
2663
  for (const [key, value] of Object.entries(attributes)) {
2079
2664
  if (typeof value !== "string" || !value.trim()) continue;
2080
2665
  if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
2081
2666
  }
2667
+ const toolArgs = getToolArgs();
2668
+ if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
2669
+ const args = toolArgs;
2670
+ const command = args.cmd ?? args.command;
2671
+ if (typeof command === "string" && command.trim()) return command.trim();
2672
+ if (Array.isArray(command)) {
2673
+ const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
2674
+ if (joined) return joined;
2675
+ }
2676
+ }
2082
2677
  if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
2083
2678
  }
2084
2679
  function extractSearchQuery(span) {
2085
2680
  const attributes = span.attributes || {};
2086
- const directMatch = getStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2681
+ const directMatch = require_toolAttributes.getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2087
2682
  if (directMatch) return directMatch;
2088
- const genericQuery = getStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2683
+ const genericQuery = require_toolAttributes.getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2089
2684
  if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
2090
2685
  if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
2091
2686
  }
@@ -2109,17 +2704,34 @@ function extractTrajectorySteps(trace) {
2109
2704
  return left.index - right.index;
2110
2705
  }).map(({ span }) => {
2111
2706
  const toolName = extractToolName(span);
2112
- const command = extractCommand(span);
2707
+ let toolArgs;
2708
+ let hasExtractedToolArgs = false;
2709
+ const getToolArgs = () => {
2710
+ if (!hasExtractedToolArgs) {
2711
+ toolArgs = extractToolArgs(span);
2712
+ hasExtractedToolArgs = true;
2713
+ }
2714
+ return toolArgs;
2715
+ };
2716
+ const command = extractCommand(span, toolName, getToolArgs);
2113
2717
  const searchQuery = extractSearchQuery(span);
2114
2718
  let type = "span";
2115
2719
  let name = span.name;
2116
2720
  const aliases = new Set([span.name]);
2117
2721
  let args;
2118
- if (toolName) {
2722
+ if (command && isCommandToolName(toolName)) {
2723
+ type = "command";
2724
+ name = command;
2725
+ aliases.add(command);
2726
+ args = getToolArgs();
2727
+ if (toolName) aliases.add(toolName);
2728
+ const executable = getCommandExecutable(command);
2729
+ if (executable) aliases.add(executable);
2730
+ } else if (toolName) {
2119
2731
  type = "tool";
2120
2732
  name = toolName;
2121
2733
  aliases.add(toolName);
2122
- args = extractToolArgs(span);
2734
+ args = getToolArgs();
2123
2735
  } else if (command) {
2124
2736
  type = "command";
2125
2737
  name = command;
@@ -2295,7 +2907,7 @@ const handleRedteam = async ({ assertion, baseType, test, prompt, outputString,
2295
2907
  if (match) evalId = match[1];
2296
2908
  }
2297
2909
  }
2298
- const tracking = await require_providers.checkExfilTracking(webPageUuid, evalId);
2910
+ const tracking = await require_indirectWebPwn.checkExfilTracking(webPageUuid, evalId);
2299
2911
  if (tracking) gradingContext = {
2300
2912
  ...gradingContext,
2301
2913
  wasExfiltrated: tracking.wasExfiltrated,
@@ -2361,7 +2973,7 @@ function handleIsRefusal(params) {
2361
2973
  assertion
2362
2974
  };
2363
2975
  }
2364
- const pass = require_providers.isBasicRefusal(output) !== inverse;
2976
+ const pass = require_util$2.isBasicRefusal(output) !== inverse;
2365
2977
  return {
2366
2978
  pass,
2367
2979
  score: pass ? 1 : 0,
@@ -2400,11 +3012,10 @@ function handleRougeScore({ baseType, assertion, renderedValue, outputString, in
2400
3012
  const rougeMethod = js_rouge[baseType[baseType.length - 1]];
2401
3013
  const score = rougeMethod(outputString, renderedValue, {});
2402
3014
  const threshold = assertion.threshold ?? .75;
2403
- const pass = score >= threshold != inverse;
2404
3015
  return {
2405
- pass,
3016
+ pass: score >= threshold !== inverse,
2406
3017
  score: inverse ? 1 - score : score,
2407
- reason: pass ? `${baseType.toUpperCase()} score ${score.toFixed(2)} is greater than or equal to threshold ${threshold}` : `${baseType.toUpperCase()} score ${score.toFixed(2)} is less than threshold ${threshold}`,
3018
+ reason: `${baseType.toUpperCase()} score ${score.toFixed(2)} is ${score >= threshold ? "greater than or equal to" : "less than"} threshold ${threshold}`,
2408
3019
  assertion
2409
3020
  };
2410
3021
  }
@@ -2466,10 +3077,196 @@ const handleRuby = async ({ assertion, renderedValue, valueFromScript, assertion
2466
3077
  }
2467
3078
  };
2468
3079
  //#endregion
3080
+ //#region src/providers/webSearchUtils.ts
3081
+ function hasTool(provider, predicate) {
3082
+ return Array.isArray(provider.config?.tools) && provider.config.tools.some(predicate);
3083
+ }
3084
+ function getProviderId(provider) {
3085
+ if (typeof provider.id !== "function") return null;
3086
+ try {
3087
+ return provider.id();
3088
+ } catch (err) {
3089
+ require_logger.logger.debug(`Failed to read provider id: ${err}`);
3090
+ return null;
3091
+ }
3092
+ }
3093
+ function isOpenAiResponsesProvider(provider, id) {
3094
+ return id.includes("openai:responses") || provider.constructor?.name === "OpenAiResponsesProvider";
3095
+ }
3096
+ /**
3097
+ * Check if a provider has web search capabilities
3098
+ * @param provider The provider to check
3099
+ * @returns true if the provider supports web search
3100
+ */
3101
+ function hasWebSearchCapability(provider) {
3102
+ if (!provider) return false;
3103
+ const id = getProviderId(provider);
3104
+ if (!id) return false;
3105
+ if (id.includes("perplexity")) return true;
3106
+ if ((id.includes("google") || id.includes("gemini") || id.includes("vertex")) && hasTool(provider, (t) => t.googleSearch !== void 0)) return true;
3107
+ if (id.includes("xai") && provider.config?.search_parameters?.mode === "on") return true;
3108
+ if (isOpenAiResponsesProvider(provider, id) && hasTool(provider, (t) => t.type === "web_search_preview")) return true;
3109
+ if (id.startsWith("openai:codex") && (provider.config?.web_search_mode === "live" || provider.config?.web_search_mode === "cached" || provider.config?.web_search_enabled === true)) return true;
3110
+ if (id.includes("anthropic") && hasTool(provider, (t) => t.type === "web_search_20250305")) return true;
3111
+ return false;
3112
+ }
3113
+ /**
3114
+ * Load a provider with web search capabilities.
3115
+ * Tries multiple providers in order of preference until one succeeds.
3116
+ * Uses the latest and most capable models from each provider with specific checkpoint IDs.
3117
+ *
3118
+ * @param preferAnthropic Whether to try Anthropic first (true) or OpenAI first (false)
3119
+ * @returns A provider with web search capabilities or null
3120
+ */
3121
+ async function loadWebSearchProvider(preferAnthropic = false) {
3122
+ const loadAnthropicWebSearch = async () => {
3123
+ try {
3124
+ return await require_providers.loadApiProvider("anthropic:messages:claude-opus-4-6", { options: { config: { tools: [{
3125
+ type: "web_search_20250305",
3126
+ name: "web_search",
3127
+ max_uses: 5
3128
+ }] } } });
3129
+ } catch (err) {
3130
+ require_logger.logger.debug(`Failed to load Anthropic web search provider: ${err}`);
3131
+ return null;
3132
+ }
3133
+ };
3134
+ const loadOpenAIWebSearch = async () => {
3135
+ try {
3136
+ return await require_providers.loadApiProvider("openai:responses:gpt-5.4-2026-03-05", { options: { config: { tools: [{ type: "web_search_preview" }] } } });
3137
+ } catch (err) {
3138
+ require_logger.logger.debug(`Failed to load OpenAI web search provider: ${err}`);
3139
+ return null;
3140
+ }
3141
+ };
3142
+ const loadPerplexity = async () => {
3143
+ try {
3144
+ return await require_providers.loadApiProvider("perplexity:sonar-pro");
3145
+ } catch (err) {
3146
+ require_logger.logger.debug(`Failed to load Perplexity provider: ${err}`);
3147
+ return null;
3148
+ }
3149
+ };
3150
+ const loadGoogleWebSearch = async () => {
3151
+ try {
3152
+ return await require_providers.loadApiProvider("google:gemini-3-pro-preview", { options: { config: { tools: [{ googleSearch: {} }] } } });
3153
+ } catch (err) {
3154
+ require_logger.logger.debug(`Failed to load Google web search provider: ${err}`);
3155
+ return null;
3156
+ }
3157
+ };
3158
+ const loadVertexWebSearch = async () => {
3159
+ try {
3160
+ return await require_providers.loadApiProvider("vertex:gemini-3-pro-preview", { options: { config: { tools: [{ googleSearch: {} }] } } });
3161
+ } catch (err) {
3162
+ require_logger.logger.debug(`Failed to load Vertex web search provider: ${err}`);
3163
+ return null;
3164
+ }
3165
+ };
3166
+ const loadXaiWebSearch = async () => {
3167
+ try {
3168
+ return await require_providers.loadApiProvider("xai:grok-4-1-fast-reasoning", { options: { config: { search_parameters: { mode: "on" } } } });
3169
+ } catch (err) {
3170
+ require_logger.logger.debug(`Failed to load xAI web search provider: ${err}`);
3171
+ return null;
3172
+ }
3173
+ };
3174
+ const providers = preferAnthropic ? [
3175
+ loadAnthropicWebSearch,
3176
+ loadOpenAIWebSearch,
3177
+ loadPerplexity,
3178
+ loadGoogleWebSearch,
3179
+ loadVertexWebSearch,
3180
+ loadXaiWebSearch
3181
+ ] : [
3182
+ loadOpenAIWebSearch,
3183
+ loadAnthropicWebSearch,
3184
+ loadPerplexity,
3185
+ loadGoogleWebSearch,
3186
+ loadVertexWebSearch,
3187
+ loadXaiWebSearch
3188
+ ];
3189
+ for (const getProvider of providers) {
3190
+ const provider = await getProvider();
3191
+ if (provider && hasWebSearchCapability(provider)) {
3192
+ require_logger.logger.info(`Using ${getProviderId(provider) ?? "loaded provider"} as web search provider`);
3193
+ return provider;
3194
+ }
3195
+ if (provider) require_logger.logger.debug(`Loaded provider ${getProviderId(provider) ?? "unknown"} does not support web search`);
3196
+ }
3197
+ return null;
3198
+ }
3199
+ //#endregion
3200
+ //#region src/matchers/search.ts
3201
+ async function matchesSearchRubric(rubric, llmOutput, grading, vars, assertion, _provider, providerCallContext) {
3202
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
3203
+ const defaultProviders = await require_graders.getDefaultProviders();
3204
+ const defaultSearchProviders = [
3205
+ defaultProviders.webSearchProvider,
3206
+ defaultProviders.llmRubricProvider,
3207
+ defaultProviders.gradingProvider
3208
+ ];
3209
+ let searchProvider = (grading.provider ? await require_graders.getGradingProvider("text", grading.provider, null) : null) || defaultSearchProviders.find((provider) => Boolean(provider));
3210
+ if (!hasWebSearchCapability(searchProvider)) {
3211
+ const webSearchDefault = defaultSearchProviders.find((provider) => hasWebSearchCapability(provider));
3212
+ if (webSearchDefault) searchProvider = webSearchDefault;
3213
+ }
3214
+ if (!hasWebSearchCapability(searchProvider)) {
3215
+ const webSearchProvider = await loadWebSearchProvider(true);
3216
+ if (webSearchProvider) searchProvider = webSearchProvider;
3217
+ }
3218
+ if (!searchProvider || !hasWebSearchCapability(searchProvider)) throw new Error(`search-rubric assertion requires a grading provider with web search capabilities. Use --grader with a web search provider (e.g., anthropic:messages:${require_graders.DEFAULT_ANTHROPIC_MODEL}, openai:responses:o4-mini with tools configured, perplexity:sonar) or configure one in defaultTest.options.provider`);
3219
+ const prompt = await require_graders.renderLlmRubricPrompt(await require_graders.loadRubricPrompt(grading?.rubricPrompt, require_graders.DEFAULT_WEB_SEARCH_PROMPT), {
3220
+ output: require_graders.tryParse(llmOutput),
3221
+ rubric,
3222
+ ...vars || {}
3223
+ });
3224
+ const resp = await require_graders.callProviderWithContext(searchProvider, prompt, "search-rubric", {
3225
+ output: require_graders.tryParse(llmOutput),
3226
+ rubric,
3227
+ ...vars || {}
3228
+ }, providerCallContext);
3229
+ if (resp.error || !resp.output) return {
3230
+ pass: false,
3231
+ score: 0,
3232
+ reason: `Search rubric evaluation failed: ${resp.error || "No output"}`,
3233
+ tokensUsed: resp.tokenUsage,
3234
+ assertion
3235
+ };
3236
+ try {
3237
+ const result = require_logger.extractFirstJsonObject(String(resp.output));
3238
+ let pass = result.pass ?? false;
3239
+ const score = typeof result.score === "number" ? result.score : pass ? 1 : 0;
3240
+ if (assertion?.threshold !== void 0) pass = pass && score >= assertion.threshold;
3241
+ return {
3242
+ pass,
3243
+ score,
3244
+ reason: result.reason || "No reason provided",
3245
+ tokensUsed: resp.tokenUsage,
3246
+ assertion,
3247
+ metadata: {
3248
+ searchResults: result.searchResults || [],
3249
+ searchProvider: searchProvider.id()
3250
+ }
3251
+ };
3252
+ } catch (err) {
3253
+ require_logger.logger.warn(`[search-rubric] Could not parse structured JSON from provider response, falling back to substring matching: ${err.message}`);
3254
+ const outputLower = String(resp.output).toLowerCase();
3255
+ const pass = outputLower.includes("\"pass\":true") || outputLower.includes("\"pass\": true");
3256
+ return {
3257
+ pass,
3258
+ score: pass ? 1 : 0,
3259
+ reason: resp.output,
3260
+ tokensUsed: resp.tokenUsage,
3261
+ assertion
3262
+ };
3263
+ }
3264
+ }
3265
+ //#endregion
2469
3266
  //#region src/assertions/searchRubric.ts
2470
3267
  async function handleSearchRubric({ assertion, baseType: _baseType, inverse, provider, providerCallContext, renderedValue, test, providerResponse }) {
2471
3268
  if (renderedValue == null) throw new Error("search-rubric assertion type must have a string value");
2472
- const result = await require_graders.matchesSearchRubric(String(renderedValue), providerResponse.output, test.options, test.vars, assertion, provider, providerCallContext);
3269
+ const result = await matchesSearchRubric(String(renderedValue), providerResponse.output, test.options, test.vars, assertion, provider, providerCallContext);
2473
3270
  if (inverse) {
2474
3271
  result.pass = !result.pass;
2475
3272
  result.reason = result.pass ? `Output does not require web search verification: ${result.reason}` : `Output requires web search verification: ${result.reason}`;
@@ -2500,7 +3297,7 @@ const handleSimilar = async ({ assertion, renderedValue, outputString, inverse,
2500
3297
  if (Array.isArray(renderedValue)) {
2501
3298
  let minScore = Number.POSITIVE_INFINITY;
2502
3299
  for (const value of renderedValue) {
2503
- const result = await require_graders.matchesSimilarity(value, outputString, threshold, inverse, test.options, metric);
3300
+ const result = await matchesSimilarity(value, outputString, threshold, inverse, test.options, metric);
2504
3301
  if (result.pass) return {
2505
3302
  assertion,
2506
3303
  ...result
@@ -2515,7 +3312,7 @@ const handleSimilar = async ({ assertion, renderedValue, outputString, inverse,
2515
3312
  };
2516
3313
  } else return {
2517
3314
  assertion,
2518
- ...await require_graders.matchesSimilarity(renderedValue, outputString, threshold, inverse, test.options, metric)
3315
+ ...await matchesSimilarity(renderedValue, outputString, threshold, inverse, test.options, metric)
2519
3316
  };
2520
3317
  };
2521
3318
  //#endregion
@@ -3121,13 +3918,13 @@ function resolveSequenceValue(value) {
3121
3918
  }
3122
3919
  throw new Error("trajectory:tool-sequence assertion must have an array or object value");
3123
3920
  }
3124
- function isRecord(value) {
3921
+ function isRecord$1(value) {
3125
3922
  return typeof value === "object" && value !== null && !Array.isArray(value);
3126
3923
  }
3127
3924
  function matchesExpectedArgsPartial(actual, expected) {
3128
3925
  if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
3129
- if (isRecord(expected)) {
3130
- if (!isRecord(actual)) return false;
3926
+ if (isRecord$1(expected)) {
3927
+ if (!isRecord$1(actual)) return false;
3131
3928
  return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
3132
3929
  }
3133
3930
  return (0, node_util.isDeepStrictEqual)(actual, expected);
@@ -3484,7 +4281,7 @@ function assertionMayNeedTraceContext(assertion) {
3484
4281
  if (assertionUsesTrace(assertion)) return true;
3485
4282
  if (assertion.type === "assert-set") return assertion.assert.some(assertionMayNeedTraceContext);
3486
4283
  if (assertion.type.startsWith("promptfoo:redteam:coding-agent:")) return true;
3487
- return typeof assertion.value === "string" ? assertion.value.startsWith("file://") || require_providers.isPackagePath(assertion.value) : false;
4284
+ return typeof assertion.value === "string" ? assertion.value.startsWith("file://") || require_packageParser.isPackagePath(assertion.value) : false;
3488
4285
  }
3489
4286
  function hasTraceAwareAssertions(assertions) {
3490
4287
  return Boolean(assertions?.some(assertionMayNeedTraceContext));
@@ -3498,7 +4295,7 @@ async function loadTraceData(traceId) {
3498
4295
  let stableObservations = 0;
3499
4296
  let latestTrace = null;
3500
4297
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
3501
- latestTrace = await traceStore.getTrace(traceId);
4298
+ latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
3502
4299
  const spanCount = latestTrace?.spans?.length ?? 0;
3503
4300
  if (spanCount > 0) {
3504
4301
  stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
@@ -3551,7 +4348,7 @@ const ASSERTION_HANDLERS = {
3551
4348
  "llm-rubric": handleLlmRubric,
3552
4349
  meteor: async (params) => {
3553
4350
  try {
3554
- const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-Co1VQ1u5.cjs"));
4351
+ const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-CR226f7Z.cjs"));
3555
4352
  return handleMeteorAssertion(params);
3556
4353
  } catch (error) {
3557
4354
  if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
@@ -3633,7 +4430,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
3633
4430
  const { cost, logProbs, output: originalOutput } = providerResponse;
3634
4431
  let output = originalOutput;
3635
4432
  require_invariant.invariant(assertion.type, `Assertion must have a type: ${JSON.stringify(assertion)}`);
3636
- if (assertion.transform) output = await require_transform.transform(assertion.transform, output, {
4433
+ if (assertion.transform) output = await require_transform$1.transform(assertion.transform, output, {
3637
4434
  vars: resolvedVars,
3638
4435
  prompt: { label: prompt },
3639
4436
  ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
@@ -3687,7 +4484,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
3687
4484
  };
3688
4485
  }
3689
4486
  else if (filePath.endsWith(".rb")) try {
3690
- const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-DUbq4tff.cjs"));
4487
+ const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-CqUWBZAt.cjs")).then((n) => n.rubyUtils_exports);
3691
4488
  valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
3692
4489
  require_logger.logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
3693
4490
  } catch (error) {
@@ -3699,9 +4496,9 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
3699
4496
  };
3700
4497
  }
3701
4498
  else renderedValue = require_graders.processFileReference(renderedValue);
3702
- } else if (require_providers.isPackagePath(renderedValue)) {
4499
+ } else if (require_packageParser.isPackagePath(renderedValue)) {
3703
4500
  const basePath = require_logger.state.basePath || "";
3704
- const requiredModule = await require_providers.loadFromPackage(renderedValue, basePath);
4501
+ const requiredModule = await require_packageParser.loadFromPackage(renderedValue, basePath);
3705
4502
  if (typeof requiredModule !== "function") throw new Error(`Assertion malformed: ${renderedValue} must be a function. Received: ${typeof requiredModule}`);
3706
4503
  valueFromScript = await Promise.resolve(requiredModule(output, context));
3707
4504
  } else renderedValue = nunjucks.renderString(renderedValue, resolvedVars);
@@ -3804,7 +4601,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
3804
4601
  require_logger.logger.debug(`Failed to preload trace data for assertions: ${error}`);
3805
4602
  preloadedTraceData = null;
3806
4603
  }
3807
- await async.default.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
4604
+ const concurrency = require_graders.getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
4605
+ await async.default.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
3808
4606
  if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
3809
4607
  const result = await runAssertion({
3810
4608
  prompt,
@@ -3840,7 +4638,7 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
3840
4638
  async function runCompareAssertion(test, assertion, outputs, context) {
3841
4639
  require_invariant.invariant(typeof assertion.value === "string", "select-best must have a string value");
3842
4640
  test = require_graders.getFinalTest(test, assertion);
3843
- return (await require_graders.matchesSelectBest(assertion.value, outputs, test.options, test.vars, context)).map((result) => ({
4641
+ return (await matchesSelectBest(assertion.value, outputs, test.options, test.vars, context)).map((result) => ({
3844
4642
  ...result,
3845
4643
  assertion
3846
4644
  }));
@@ -3857,17 +4655,17 @@ async function readAssertions(filePath) {
3857
4655
  var assertions_default = {
3858
4656
  runAssertion,
3859
4657
  runAssertions,
3860
- matchesSimilarity: require_graders.matchesSimilarity,
3861
- matchesClassification: require_graders.matchesClassification,
4658
+ matchesSimilarity,
4659
+ matchesClassification,
3862
4660
  matchesLlmRubric: require_graders.matchesLlmRubric,
3863
4661
  matchesFactuality: require_graders.matchesFactuality,
3864
4662
  matchesClosedQa: require_graders.matchesClosedQa,
3865
- matchesAnswerRelevance: require_graders.matchesAnswerRelevance,
3866
- matchesContextRecall: require_graders.matchesContextRecall,
3867
- matchesContextRelevance: require_graders.matchesContextRelevance,
3868
- matchesContextFaithfulness: require_graders.matchesContextFaithfulness,
3869
- matchesComparisonBoolean: require_graders.matchesSelectBest,
3870
- matchesModeration: require_graders.matchesModeration,
4663
+ matchesAnswerRelevance,
4664
+ matchesContextRecall,
4665
+ matchesContextRelevance,
4666
+ matchesContextFaithfulness,
4667
+ matchesComparisonBoolean: matchesSelectBest,
4668
+ matchesModeration,
3871
4669
  matchesConversationRelevance
3872
4670
  };
3873
4671
  //#endregion
@@ -3931,7 +4729,8 @@ var CIProgressReporter = class {
3931
4729
  }
3932
4730
  updateTotalTests(newTotal) {
3933
4731
  this.totalTests = Math.max(newTotal, 1);
3934
- this.highestPercentageSeen = Math.floor(this.completedTests / this.totalTests * 100);
4732
+ const percentage = Math.floor(this.completedTests / this.totalTests * 100);
4733
+ this.highestPercentageSeen = percentage;
3935
4734
  }
3936
4735
  finish() {
3937
4736
  if (this.intervalId) {
@@ -4104,6 +4903,10 @@ function getDefaultOtelConfig() {
4104
4903
  }
4105
4904
  //#endregion
4106
4905
  //#region src/tracing/localSpanExporter.ts
4906
+ const MISSING_TRACE_RETRY_DELAY_MS = 50;
4907
+ function delay(ms) {
4908
+ return new Promise((resolve) => setTimeout(resolve, ms));
4909
+ }
4107
4910
  /**
4108
4911
  * A span exporter that writes spans to the local TraceStore (SQLite).
4109
4912
  * This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
@@ -4145,7 +4948,7 @@ var LocalSpanExporter = class {
4145
4948
  }
4146
4949
  let firstError;
4147
4950
  for (const [traceId, spanDataList] of spansByTrace) try {
4148
- const result = await traceStore.addSpans(traceId, spanDataList, { skipTraceCheck: false });
4951
+ const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
4149
4952
  if (result.stored) require_logger.logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
4150
4953
  else require_logger.logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
4151
4954
  } catch (error) {
@@ -4157,6 +4960,16 @@ var LocalSpanExporter = class {
4157
4960
  }
4158
4961
  return firstError;
4159
4962
  }
4963
+ async addSpansWithTraceRetry(traceStore, traceId, spans) {
4964
+ const options = {
4965
+ skipTraceCheck: false,
4966
+ warnIfMissingTrace: false
4967
+ };
4968
+ const result = await traceStore.addSpans(traceId, spans, options);
4969
+ if (result.stored) return result;
4970
+ await delay(MISSING_TRACE_RETRY_DELAY_MS);
4971
+ return traceStore.addSpans(traceId, spans, options);
4972
+ }
4160
4973
  /**
4161
4974
  * Convert an OTEL ReadableSpan to our SpanData format.
4162
4975
  */
@@ -4242,7 +5055,7 @@ function initializeOtel(config) {
4242
5055
  require_logger.logger.debug("[OtelSdk] Registered W3C Trace Context propagator");
4243
5056
  const resource = (0, _opentelemetry_resources.resourceFromAttributes)({
4244
5057
  [_opentelemetry_semantic_conventions.ATTR_SERVICE_NAME]: config.serviceName,
4245
- [_opentelemetry_semantic_conventions.ATTR_SERVICE_VERSION]: require_fetch.VERSION
5058
+ [_opentelemetry_semantic_conventions.ATTR_SERVICE_VERSION]: require_version.VERSION
4246
5059
  });
4247
5060
  const spanProcessors = [];
4248
5061
  if (config.localExport) {
@@ -4439,6 +5252,15 @@ function isPromptAllowed(prompt, allowedPrompts) {
4439
5252
  }
4440
5253
  //#endregion
4441
5254
  //#region src/evaluator.ts
5255
+ const CONVERSATION_VAR_NAME = "_conversation";
5256
+ const promptUsesConversationVariableCache = new lru_cache.LRUCache({ max: 1024 });
5257
+ function promptUsesConversationVariable(prompt) {
5258
+ const cached = promptUsesConversationVariableCache.get(prompt.raw);
5259
+ if (cached !== void 0) return cached;
5260
+ const { referenced, parsed } = require_render.analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
5261
+ if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
5262
+ return referenced;
5263
+ }
4442
5264
  /**
4443
5265
  * Manages a single progress bar for the evaluation
4444
5266
  */
@@ -4638,6 +5460,18 @@ function hasProviderGroupedAssertion(assertion) {
4638
5460
  function shouldDeferGradingForTest(test) {
4639
5461
  return Boolean(test.assert?.some(hasProviderGroupedAssertion));
4640
5462
  }
5463
+ function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
5464
+ if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
5465
+ if (shouldGroupGradingByProvider) {
5466
+ require_logger.logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
5467
+ return;
5468
+ }
5469
+ if (concurrency !== 1) return;
5470
+ const reasons = [];
5471
+ if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
5472
+ if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
5473
+ if (reasons.length > 0) require_logger.logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
5474
+ }
4641
5475
  function applyGradingResult(row, checkResult) {
4642
5476
  if (!checkResult.pass) {
4643
5477
  row.error = checkResult.reason;
@@ -4652,14 +5486,29 @@ function applyGradingResult(row, checkResult) {
4652
5486
  if (checkResult.tokensUsed) require_tokenUsageUtils.accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
4653
5487
  row.gradingResult = checkResult;
4654
5488
  }
4655
- function applyGradingError(row, error) {
4656
- const errorMessage = error instanceof Error ? error.stack ?? error.message : String(error);
4657
- require_logger.logger.error("Assertion grading failed during eval", {
4658
- error: errorMessage,
4659
- promptIdx: row.promptIdx,
4660
- testIdx: row.testIdx
4661
- });
4662
- row.error = errorMessage;
5489
+ const ABORTED_GRADING_PREFIX = "Aborted: ";
5490
+ function isAbortShapedError(error) {
5491
+ return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
5492
+ }
5493
+ function applyGradingError(row, error, abortSignal) {
5494
+ const errorAsError = error instanceof Error ? error : void 0;
5495
+ if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
5496
+ const shortMessage = errorAsError?.message ?? String(error);
5497
+ require_logger.logger.debug("Assertion grading aborted", {
5498
+ error: shortMessage,
5499
+ promptIdx: row.promptIdx,
5500
+ testIdx: row.testIdx
5501
+ });
5502
+ row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
5503
+ } else {
5504
+ const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
5505
+ require_logger.logger.error("Assertion grading failed during eval", {
5506
+ error: fullMessage,
5507
+ promptIdx: row.promptIdx,
5508
+ testIdx: row.testIdx
5509
+ });
5510
+ row.error = fullMessage;
5511
+ }
4663
5512
  row.failureReason = require_types.ResultFailureReason.ERROR;
4664
5513
  row.success = false;
4665
5514
  row.score = 0;
@@ -4671,7 +5520,7 @@ function getNonTransientTargetStatus(row) {
4671
5520
  }
4672
5521
  function createRunEvalState({ provider, prompt, test }) {
4673
5522
  const vars = structuredClone(test.vars || {});
4674
- const fileMetadata = require_providers.collectFileMetadata(vars);
5523
+ const fileMetadata = require_evaluatorHelpers.collectFileMetadata(vars);
4675
5524
  const conversationKey = `${provider.label || provider.id()}:${prompt.id}${test.metadata?.conversationId ? `:${test.metadata.conversationId}` : ""}`;
4676
5525
  const setup = createRunEvalSetup({
4677
5526
  provider,
@@ -4691,7 +5540,7 @@ function createRunEvalState({ provider, prompt, test }) {
4691
5540
  };
4692
5541
  }
4693
5542
  function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
4694
- const usesConversation = prompt.raw.includes("_conversation");
5543
+ const usesConversation = promptUsesConversationVariable(prompt);
4695
5544
  if (!require_logger.getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
4696
5545
  }
4697
5546
  function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
@@ -4710,8 +5559,8 @@ function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
4710
5559
  };
4711
5560
  }
4712
5561
  async function renderRunEvalPrompt({ filters, isRedteam, provider, promptForRender, test, testSuite, vars }) {
4713
- const renderedPrompt = await require_providers.renderPrompt(promptForRender, vars, filters, provider, shouldSkipRedteamInjectVar(test, testSuite, isRedteam) ? [getRedteamInjectVar(test, promptForRender, testSuite)] : void 0);
4714
- if (isRedteam) require_providers.throwIfTargetPromptExceedsMaxChars(renderedPrompt, testSuite?.redteam?.maxCharsPerMessage);
5562
+ const renderedPrompt = await require_evaluatorHelpers.renderPrompt(promptForRender, vars, filters, provider, shouldSkipRedteamInjectVar(test, testSuite, isRedteam) ? [getRedteamInjectVar(test, promptForRender, testSuite)] : void 0);
5563
+ if (isRedteam) require_promptLength.throwIfTargetPromptExceedsMaxChars(renderedPrompt, testSuite?.redteam?.maxCharsPerMessage);
4715
5564
  const setup = createRunEvalSetup({
4716
5565
  provider,
4717
5566
  prompt: promptForRender,
@@ -4778,7 +5627,7 @@ async function callActiveProvider({ abortSignal, evalId, filters, promptForRende
4778
5627
  });
4779
5628
  const callApiOptions = abortSignal ? { abortSignal } : void 0;
4780
5629
  const callApi = () => activeProvider.callApi(renderedPrompt, callApiContext, callApiOptions);
4781
- const response = rateLimitRegistry ? await rateLimitRegistry.execute(activeProvider, callApi, require_providers.createProviderRateLimitOptions()) : await callApi();
5630
+ const response = rateLimitRegistry ? await rateLimitRegistry.execute(activeProvider, callApi, require_shared.createProviderRateLimitOptions()) : await callApi();
4782
5631
  require_logger.logger.debug(`Provider response properties: ${Object.keys(response).join(", ")}`);
4783
5632
  require_logger.logger.debug(`Provider response cached property explicitly: ${response.cached}`);
4784
5633
  return response;
@@ -4856,7 +5705,7 @@ function createEvaluateResult({ fileMetadata, latencyMs, prompt, promptIdx, rend
4856
5705
  };
4857
5706
  if (!ret.metadata?.sessionIds && !ret.metadata?.sessionId) {
4858
5707
  ret.metadata ??= {};
4859
- ret.metadata.sessionId = require_providers.getSessionId(response, { vars });
5708
+ ret.metadata.sessionId = require_util$2.getSessionId(response, { vars });
4860
5709
  }
4861
5710
  return ret;
4862
5711
  }
@@ -4864,7 +5713,7 @@ function trackProviderUsage(provider, response) {
4864
5713
  if (!response.tokenUsage) return;
4865
5714
  const providerId = provider.id();
4866
5715
  const trackingId = provider.constructor?.name ? `${providerId} (${provider.constructor.name})` : providerId;
4867
- require_providers.TokenUsageTracker.getInstance().trackUsage(trackingId, response.tokenUsage);
5716
+ require_shared.TokenUsageTracker.getInstance().trackUsage(trackingId, response.tokenUsage);
4868
5717
  }
4869
5718
  async function applyRunEvalResponseOutcome({ abortSignal, deferGrading, evalId, isRedteam, latencyMs, prompt, promptIdx, provider, providerCallQueue, rateLimitRegistry, renderedPrompt, response, ret, test, testIdx, traceContext, vars }) {
4870
5719
  if (response.error) {
@@ -4938,7 +5787,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
4938
5787
  assertScoringFunction: test.assertScoringFunction,
4939
5788
  traceId
4940
5789
  }).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
4941
- applyGradingError(ret, error);
5790
+ applyGradingError(ret, error, abortSignal);
4942
5791
  });
4943
5792
  deferredGradingPromises.set(ret, gradingPromise);
4944
5793
  return;
@@ -4960,13 +5809,13 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
4960
5809
  }
4961
5810
  async function transformRunEvalResponse({ evalId, prompt, promptIdx, provider, response, test, testIdx, vars }) {
4962
5811
  const processedResponse = { ...response };
4963
- if (provider.transform) processedResponse.output = await require_transform.transform(provider.transform, processedResponse.output, {
5812
+ if (provider.transform) processedResponse.output = await require_transform$1.transform(provider.transform, processedResponse.output, {
4964
5813
  vars,
4965
5814
  prompt
4966
5815
  });
4967
5816
  const providerTransformedOutput = processedResponse.output;
4968
5817
  const testTransform = test.options?.transform || test.options?.postprocess;
4969
- if (testTransform) processedResponse.output = await require_transform.transform(testTransform, processedResponse.output, {
5818
+ if (testTransform) processedResponse.output = await require_transform$1.transform(testTransform, processedResponse.output, {
4970
5819
  vars,
4971
5820
  prompt,
4972
5821
  ...response && response.metadata && { metadata: response.metadata }
@@ -5418,10 +6267,10 @@ async function prepareTestVariables(tests, testSuite) {
5418
6267
  async function applyInputTransform(testCase, inputTransformDefault) {
5419
6268
  const inputTransform = testCase.options?.transformVars || inputTransformDefault;
5420
6269
  if (!inputTransform) return;
5421
- const transformedVars = await require_transform.transform(inputTransform, testCase.vars, {
6270
+ const transformedVars = await require_transform$1.transform(inputTransform, testCase.vars, {
5422
6271
  prompt: {},
5423
6272
  uuid: crypto.randomUUID()
5424
- }, true, require_transform.TransformInputType.VARS);
6273
+ }, true, require_transform$1.TransformInputType.VARS);
5425
6274
  require_invariant.invariant(typeof transformedVars === "object", "Transform function did not return a valid object");
5426
6275
  testCase.vars = {
5427
6276
  ...testCase.vars,
@@ -5485,7 +6334,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
5485
6334
  const defaultProvider = defaultTest.provider;
5486
6335
  if (require_types.isApiProvider(defaultProvider)) return defaultProvider;
5487
6336
  if (typeof defaultProvider === "object" && defaultProvider.id) {
5488
- const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-DV3ax9e_.cjs"));
6337
+ const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BDVVIQM6.cjs")).then((n) => n.providers_exports);
5489
6338
  return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
5490
6339
  }
5491
6340
  return defaultProvider;
@@ -5645,7 +6494,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
5645
6494
  async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
5646
6495
  if (!require_logger.state.resume || !evalRecord.persisted) return;
5647
6496
  try {
5648
- const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-Dvc-iucu.cjs"));
6497
+ const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-CvtS8h8u.cjs")).then((n) => n.evalResult_exports);
5649
6498
  const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: require_logger.state.retryMode });
5650
6499
  const originalCount = runEvalOptions.length;
5651
6500
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -5659,14 +6508,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
5659
6508
  }
5660
6509
  }
5661
6510
  function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
5662
- const usesConversationVar = prompts.some((p) => p.raw.includes("_conversation"));
6511
+ const usesConversationVar = prompts.some(promptUsesConversationVariable);
5663
6512
  if (concurrency <= 1) return {
5664
6513
  concurrency,
5665
6514
  usesConversationVar
5666
6515
  };
5667
6516
  const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
5668
6517
  if (usesConversationVar) {
5669
- require_logger.logger.info(`Setting concurrency to 1 because the ${chalk.default.cyan("_conversation")} variable is used.`);
6518
+ require_logger.logger.info(`Setting concurrency to 1 because the ${chalk.default.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
5670
6519
  return {
5671
6520
  concurrency: 1,
5672
6521
  usesConversationVar
@@ -5896,8 +6745,9 @@ var Evaluator = class {
5896
6745
  };
5897
6746
  this.conversations = {};
5898
6747
  this.registers = {};
5899
- this.fileWriters = (Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : []).map((p) => new JsonlFileWriter(p));
5900
- this.rateLimitRegistry = require_providers.createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
6748
+ const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
6749
+ this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
6750
+ this.rateLimitRegistry = require_shared.createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
5901
6751
  this.rateLimitRegistry.on("ratelimit:hit", (data) => {
5902
6752
  require_logger.logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
5903
6753
  retryAfterMs: data.retryAfterMs,
@@ -5923,7 +6773,7 @@ var Evaluator = class {
5923
6773
  current: data.current
5924
6774
  });
5925
6775
  });
5926
- require_providers.redteamProviderManager.setRateLimitRegistry(this.rateLimitRegistry);
6776
+ require_shared.redteamProviderManager.setRateLimitRegistry(this.rateLimitRegistry);
5927
6777
  }
5928
6778
  /**
5929
6779
  * Updates metrics and stats after a comparison assertion (select-best or max-score).
@@ -6001,7 +6851,7 @@ var Evaluator = class {
6001
6851
  });
6002
6852
  }
6003
6853
  async runEvalStepAfterBeforeEach(evalStep, { deferGrading, onRowsReady, providerCallQueue, testSuite }) {
6004
- evalStep.test = (await require_providers.runExtensionHook(testSuite.extensions, "beforeEach", { test: evalStep.test })).test;
6854
+ evalStep.test = (await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "beforeEach", { test: evalStep.test })).test;
6005
6855
  const rows = await runEvalInternal({
6006
6856
  ...evalStep,
6007
6857
  deferGrading,
@@ -6016,6 +6866,25 @@ var Evaluator = class {
6016
6866
  this.trackCompletedRow(evalStep, row, context);
6017
6867
  context.numComplete++;
6018
6868
  const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
6869
+ if (context.testSuite.extensions?.length) try {
6870
+ const afterEachOut = await require_evaluatorHelpers.runExtensionHook(context.testSuite.extensions, "afterEach", {
6871
+ test: evalStep.test,
6872
+ result: {
6873
+ ...row,
6874
+ namedScores: { ...row.namedScores },
6875
+ metadata: { ...row.metadata },
6876
+ response: row.response ? {
6877
+ ...row.response,
6878
+ metadata: { ...row.response.metadata }
6879
+ } : row.response
6880
+ }
6881
+ });
6882
+ row.namedScores = require_evaluatorHelpers.filterFiniteScores(afterEachOut.result.namedScores);
6883
+ row.metadata = afterEachOut.result.metadata;
6884
+ if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
6885
+ } catch (error) {
6886
+ require_logger.logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
6887
+ }
6019
6888
  await this.persistEvalRow(row);
6020
6889
  if (this.abortIfTargetUnavailable(row, context)) break;
6021
6890
  const metrics = context.prompts[row.promptIdx].metrics;
@@ -6027,10 +6896,6 @@ var Evaluator = class {
6027
6896
  promptEvalCount,
6028
6897
  row
6029
6898
  });
6030
- await require_providers.runExtensionHook(context.testSuite.extensions, "afterEach", {
6031
- test: evalStep.test,
6032
- result: row
6033
- });
6034
6899
  context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
6035
6900
  }
6036
6901
  }
@@ -6104,9 +6969,8 @@ var Evaluator = class {
6104
6969
  context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics || createTimeoutMetrics(timeoutMs));
6105
6970
  }
6106
6971
  async executeEvalSteps({ checkAbort, ciProgressReporter, combinedAbortSignal, concurrentRunEvalOptions, evalStepIndexMap, globalTimeout, groupedRunEvalOptions, isEvalTimedOut, isWebUI, maxEvalTimeMs, processingContext, processedIndices, progressBarManager, prompts, serialRunEvalOptions, shouldGroupGradingByProvider }) {
6107
- let flushGroupedRows;
6108
6972
  try {
6109
- if (shouldGroupGradingByProvider) flushGroupedRows = await this.runGroupedEvalSteps({
6973
+ if (shouldGroupGradingByProvider) await this.runGroupedEvalSteps({
6110
6974
  checkAbort,
6111
6975
  evalStepIndexMap,
6112
6976
  groupedRunEvalOptions,
@@ -6138,7 +7002,6 @@ var Evaluator = class {
6138
7002
  cleanupProgressAfterError(progressBarManager, ciProgressReporter, err);
6139
7003
  throw err;
6140
7004
  }
6141
- await flushGroupedRows?.();
6142
7005
  if (isEvalTimedOut()) require_logger.logger.warn(`Evaluation stopped after reaching max duration (${maxEvalTimeMs}ms)`);
6143
7006
  else if (!processingContext.targetUnavailable) return this.saveInterruptedEval({
6144
7007
  ciProgressReporter,
@@ -6187,7 +7050,15 @@ var Evaluator = class {
6187
7050
  })) break;
6188
7051
  }
6189
7052
  } catch (error) {
6190
- await flushGroupedRows();
7053
+ const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
7054
+ try {
7055
+ await flushGroupedRows();
7056
+ } catch (flushError) {
7057
+ require_logger.logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
7058
+ error: flushError instanceof Error ? flushError.message : String(flushError),
7059
+ pendingRowCount
7060
+ });
7061
+ }
6191
7062
  throw error;
6192
7063
  }
6193
7064
  await flushGroupedRows();
@@ -6363,7 +7234,7 @@ var Evaluator = class {
6363
7234
  }
6364
7235
  const maxScoreAssertion = resultsToCompare[0].testCase.assert?.find((a) => a.type === "max-score");
6365
7236
  if (!maxScoreAssertion) return;
6366
- const maxScoreGradingResults = await require_graders.selectMaxScore(resultsToCompare.map((r) => r.response?.output || ""), resultsToCompare, maxScoreAssertion);
7237
+ const maxScoreGradingResults = await selectMaxScore(resultsToCompare.map((r) => r.response?.output || ""), resultsToCompare, maxScoreAssertion);
6367
7238
  updateComparisonReporterProgress({
6368
7239
  ciProgressReporter,
6369
7240
  compareCount,
@@ -6457,7 +7328,7 @@ var Evaluator = class {
6457
7328
  async runAfterAllExtensions(testSuite) {
6458
7329
  if (!testSuite.extensions?.length) return;
6459
7330
  const resultsForExtension = (await this.evalRecord.getResults()).map((result) => "toEvaluateResult" in result ? result.toEvaluateResult() : result);
6460
- await require_providers.runExtensionHook(testSuite.extensions, "afterAll", {
7331
+ await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "afterAll", {
6461
7332
  prompts: this.evalRecord.prompts,
6462
7333
  results: resultsForExtension,
6463
7334
  suite: testSuite,
@@ -6543,7 +7414,7 @@ var Evaluator = class {
6543
7414
  const rowsWithSelectBestAssertion = /* @__PURE__ */ new Set();
6544
7415
  const rowsWithMaxScoreAssertion = /* @__PURE__ */ new Set();
6545
7416
  ensureDefaultTestForExtensions(testSuite);
6546
- testSuite = (await require_providers.runExtensionHook(testSuite.extensions, "beforeAll", { suite: testSuite })).suite;
7417
+ testSuite = (await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "beforeAll", { suite: testSuite })).suite;
6547
7418
  if (!await maybeAddGeneratedPrompts(testSuite, options)) return this.evalRecord;
6548
7419
  prompts.push(...buildCompletedPrompts(testSuite, this.evalRecord));
6549
7420
  const promptIndexMap = buildPromptIndexMap(prompts);
@@ -6623,6 +7494,13 @@ var Evaluator = class {
6623
7494
  if (!this.options.silent) {
6624
7495
  if (serialRunEvalOptions.length > 0) require_logger.logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
6625
7496
  if (concurrentRunEvalOptions.length > 0) require_logger.logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
7497
+ logGroupedGradingStatus({
7498
+ concurrency,
7499
+ hasEvalStepTimeout,
7500
+ runEvalOptions,
7501
+ shouldGroupGradingByProvider,
7502
+ usesConversationVar
7503
+ });
6626
7504
  }
6627
7505
  if (this.options.showProgressBar && progressBarManager) {
6628
7506
  await progressBarManager.initialize(runEvalOptions, concurrency, 0);
@@ -6714,7 +7592,7 @@ var Evaluator = class {
6714
7592
  });
6715
7593
  }
6716
7594
  this.rateLimitRegistry?.dispose();
6717
- require_providers.redteamProviderManager.setRateLimitRegistry(void 0);
7595
+ require_shared.redteamProviderManager.setRateLimitRegistry(void 0);
6718
7596
  require_logger.state.maxConcurrency = void 0;
6719
7597
  }
6720
7598
  }
@@ -7622,7 +8500,7 @@ var Eval = class Eval {
7622
8500
  const evalInstance = new Eval(eval_.config, {
7623
8501
  id: eval_.id,
7624
8502
  createdAt: new Date(eval_.createdAt),
7625
- author: eval_.author || void 0,
8503
+ author: eval_.author,
7626
8504
  description: eval_.description || void 0,
7627
8505
  prompts: eval_.prompts || [],
7628
8506
  datasetId,
@@ -7645,7 +8523,7 @@ var Eval = class Eval {
7645
8523
  return (await require_tables.getDb().select().from(require_tables.evalsTable).limit(limit).orderBy((0, drizzle_orm.desc)(require_tables.evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
7646
8524
  id: e.id,
7647
8525
  createdAt: new Date(e.createdAt),
7648
- author: e.author || void 0,
8526
+ author: e.author,
7649
8527
  description: e.description || void 0,
7650
8528
  prompts: e.prompts || [],
7651
8529
  persisted: true
@@ -7660,7 +8538,7 @@ var Eval = class Eval {
7660
8538
  return (await require_tables.getDb().select().from(require_tables.evalsTable).orderBy((0, drizzle_orm.desc)(require_tables.evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
7661
8539
  id: e.id,
7662
8540
  createdAt: new Date(e.createdAt),
7663
- author: e.author || void 0,
8541
+ author: e.author,
7664
8542
  description: e.description || void 0,
7665
8543
  prompts: e.prompts || [],
7666
8544
  persisted: true
@@ -7675,7 +8553,7 @@ var Eval = class Eval {
7675
8553
  static async create(config, renderedPrompts, opts) {
7676
8554
  const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
7677
8555
  const evalId = opts?.id || createEvalId(createdAt);
7678
- const author = opts?.author || require_accounts.getUserEmail();
8556
+ const author = opts && "author" in opts ? opts.author ?? null : require_accounts.getAuthor();
7679
8557
  const db = require_tables.getDb();
7680
8558
  const datasetId = require_createHash.sha256(JSON.stringify(config.tests || []));
7681
8559
  db.transaction(() => {
@@ -7737,7 +8615,7 @@ var Eval = class Eval {
7737
8615
  });
7738
8616
  return new Eval(config, {
7739
8617
  id: evalId,
7740
- author: opts?.author,
8618
+ author,
7741
8619
  createdAt,
7742
8620
  persisted: true,
7743
8621
  runtimeOptions: sanitizeRuntimeOptions(opts?.runtimeOptions)
@@ -7747,7 +8625,7 @@ var Eval = class Eval {
7747
8625
  const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
7748
8626
  this.createdAt = createdAt.getTime();
7749
8627
  this.id = opts?.id || createEvalId(createdAt);
7750
- this.author = opts?.author;
8628
+ this.author = opts?.author ?? null;
7751
8629
  this.config = config;
7752
8630
  this.results = [];
7753
8631
  this.prompts = opts?.prompts || [];
@@ -8277,7 +9155,7 @@ var Eval = class Eval {
8277
9155
  newConfig.description = copyDescription;
8278
9156
  const newPrompts = structuredClone(this.prompts);
8279
9157
  const newVars = this.vars ? structuredClone(this.vars) : [];
8280
- const author = require_accounts.getUserEmail();
9158
+ const author = require_accounts.getAuthor();
8281
9159
  const db = require_tables.getDb();
8282
9160
  let copiedCount = 0;
8283
9161
  db.transaction(() => {
@@ -8434,47 +9312,11 @@ function filterPrompts(prompts, filterPromptsOption) {
8434
9312
  //#endregion
8435
9313
  //#region src/commands/eval/filterProviders.ts
8436
9314
  /**
8437
- * Checks if a value is a valid provider ID (non-empty string).
8438
- */
8439
- function isValidProviderId(id) {
8440
- return id !== null && id !== void 0 && typeof id === "string" && id !== "";
8441
- }
8442
- /**
8443
9315
  * Extracts the id and label from a raw provider config without instantiating it.
8444
9316
  * Handles all provider config formats: string, function, ProviderOptions, ProviderOptionsMap.
8445
9317
  */
8446
9318
  function getProviderIdAndLabel(provider, index) {
8447
- if (typeof provider === "string") return { id: provider };
8448
- if (typeof provider === "function") {
8449
- const label = provider.label;
8450
- return {
8451
- id: label ?? `custom-function-${index}`,
8452
- label
8453
- };
8454
- }
8455
- const providerId = provider.id;
8456
- if ("id" in provider && isValidProviderId(providerId)) return {
8457
- id: providerId,
8458
- label: provider.label
8459
- };
8460
- const keys = Object.keys(provider);
8461
- if (keys.length > 0) {
8462
- const id = keys[0];
8463
- const value = provider[id];
8464
- if (typeof value === "object" && value !== null) return {
8465
- id: value.id || id,
8466
- label: value.label
8467
- };
8468
- }
8469
- const label = provider.label;
8470
- if (isValidProviderId(label)) return {
8471
- id: label,
8472
- label
8473
- };
8474
- return {
8475
- id: `unknown-${index}`,
8476
- label
8477
- };
9319
+ return require_util.normalizeProviderRef(provider, { index });
8478
9320
  }
8479
9321
  /**
8480
9322
  * Filters raw provider configs BEFORE instantiation.
@@ -10227,10 +11069,10 @@ async function fetchRemoteGeneration(task, prompts) {
10227
11069
  const body = {
10228
11070
  task,
10229
11071
  prompts,
10230
- version: require_fetch.VERSION,
11072
+ version: require_version.VERSION,
10231
11073
  email: require_accounts.getUserEmail()
10232
11074
  };
10233
- const response = await require_cache.fetchWithCache(require_server.getRemoteGenerationUrl(), {
11075
+ const response = await require_cache.fetchWithCache(require_remoteGeneration.getRemoteGenerationUrl(), {
10234
11076
  method: "POST",
10235
11077
  headers: { "Content-Type": "application/json" },
10236
11078
  body: JSON.stringify(body)
@@ -10265,7 +11107,7 @@ function formatPrompts(prompts) {
10265
11107
  //#endregion
10266
11108
  //#region src/redteam/extraction/entities.ts
10267
11109
  async function extractEntities(provider, prompts) {
10268
- if (require_server.shouldGenerateRemote()) try {
11110
+ if (require_remoteGeneration.shouldGenerateRemote()) try {
10269
11111
  return await fetchRemoteGeneration("entities", prompts);
10270
11112
  } catch (error) {
10271
11113
  require_logger.logger.warn(`[Entity Extraction] Failed, returning 0 entities. Error using remote generation: ${error}`);
@@ -10312,7 +11154,7 @@ async function extractSystemPurpose(provider, prompts) {
10312
11154
  require_logger.logger.debug("[purpose] No meaningful prompts provided, returning default purpose");
10313
11155
  return DEFAULT_PURPOSE;
10314
11156
  }
10315
- if (!require_server.neverGenerateRemote()) try {
11157
+ if (!require_remoteGeneration.neverGenerateRemote()) try {
10316
11158
  return await fetchRemoteGeneration("purpose", prompts);
10317
11159
  } catch (error) {
10318
11160
  require_logger.logger.warn(`[purpose] Error using remote generation, returning empty string: ${error}`);
@@ -10584,7 +11426,7 @@ function createTestCase(injectVar, output, harmCategory) {
10584
11426
  vars: { [injectVar]: output.trim() },
10585
11427
  metadata: {
10586
11428
  harmCategory: harmCategoryLabel,
10587
- pluginId: require_providers.getShortPluginId(harmCategory)
11429
+ pluginId: require_util$2.getShortPluginId(harmCategory)
10588
11430
  },
10589
11431
  assert: getHarmfulAssertions(harmCategory)
10590
11432
  };
@@ -10707,25 +11549,45 @@ var AlignedHarmfulPlugin = class extends require_graders.RedteamPluginBase {
10707
11549
  getAssertions(_prompt) {
10708
11550
  return getHarmfulAssertions(this.harmCategory);
10709
11551
  }
10710
- promptsToTestCases(prompts) {
11552
+ async promptsToTestCases(prompts) {
10711
11553
  const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
10712
11554
  const harmCategoryLabel = require_types.HARM_PLUGINS[this.harmCategory] || this.harmCategory;
10713
- return prompts.map(({ __prompt }) => {
11555
+ const pluginId = require_util$2.getShortPluginId(this.harmCategory);
11556
+ return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
10714
11557
  const vars = { [this.injectVar]: __prompt };
10715
- if (hasMultipleInputs) try {
10716
- const parsed = JSON.parse(__prompt);
10717
- Object.assign(vars, require_providers.extractVariablesFromJson(parsed, this.config.inputs));
10718
- } catch {}
11558
+ let inputMaterialization;
11559
+ if (hasMultipleInputs) {
11560
+ let parsed;
11561
+ try {
11562
+ parsed = JSON.parse(__prompt);
11563
+ } catch (error) {
11564
+ require_logger.logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
11565
+ }
11566
+ if (parsed) try {
11567
+ const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
11568
+ materializationIndex,
11569
+ pluginId,
11570
+ provider: this.provider,
11571
+ purpose: this.purpose
11572
+ });
11573
+ Object.assign(vars, materializedVars.vars);
11574
+ inputMaterialization = materializedVars.metadata;
11575
+ } catch (error) {
11576
+ require_logger.logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
11577
+ throw error;
11578
+ }
11579
+ }
10719
11580
  return {
10720
11581
  vars,
10721
11582
  metadata: {
10722
11583
  harmCategory: harmCategoryLabel,
10723
- pluginId: require_providers.getShortPluginId(this.harmCategory),
10724
- pluginConfig: this.config
11584
+ pluginId,
11585
+ pluginConfig: this.config,
11586
+ ...inputMaterialization ? { inputMaterialization } : {}
10725
11587
  },
10726
11588
  assert: getHarmfulAssertions(this.harmCategory)
10727
11589
  };
10728
- });
11590
+ }));
10729
11591
  }
10730
11592
  };
10731
11593
  //#endregion
@@ -10734,25 +11596,42 @@ var AlignedHarmfulPlugin = class extends require_graders.RedteamPluginBase {
10734
11596
  * Extract content from <Prompt> tags and parse JSON if inputs are defined.
10735
11597
  * Returns the processed prompt and any additional vars extracted from JSON.
10736
11598
  */
10737
- function processPromptForInputs(prompt, _injectVar, inputs) {
11599
+ async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
10738
11600
  let processedPrompt = prompt.trim();
10739
11601
  const additionalVars = {};
10740
- const extractedPrompt = require_providers.extractPromptFromTags(processedPrompt);
11602
+ let additionalMetadata;
11603
+ const extractedPrompt = require_util$2.extractPromptFromTags(processedPrompt);
10741
11604
  if (extractedPrompt) processedPrompt = extractedPrompt;
10742
- if (inputs && Object.keys(inputs).length > 0) try {
10743
- const parsed = JSON.parse(processedPrompt);
10744
- Object.assign(additionalVars, require_providers.extractVariablesFromJson(parsed, inputs));
10745
- } catch {
10746
- require_logger.logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode");
11605
+ if (inputs && Object.keys(inputs).length > 0) {
11606
+ let parsed;
11607
+ try {
11608
+ parsed = JSON.parse(processedPrompt);
11609
+ } catch (error) {
11610
+ require_logger.logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
11611
+ }
11612
+ if (parsed) try {
11613
+ const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
11614
+ materializationIndex,
11615
+ pluginId: plugin,
11616
+ provider,
11617
+ purpose
11618
+ });
11619
+ Object.assign(additionalVars, materializedVars.vars);
11620
+ additionalMetadata = materializedVars.metadata;
11621
+ } catch (error) {
11622
+ require_logger.logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
11623
+ throw error;
11624
+ }
10747
11625
  }
10748
11626
  return {
10749
11627
  processedPrompt,
10750
- additionalVars
11628
+ additionalVars,
11629
+ additionalMetadata
10751
11630
  };
10752
11631
  }
10753
11632
  async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
10754
11633
  const maxHarmfulTests = require_logger.getEnvInt("PROMPTFOO_MAX_HARMFUL_TESTS_PER_REQUEST", 5);
10755
- const unalignedProvider = new require_providers.PromptfooHarmfulCompletionProvider({
11634
+ const unalignedProvider = new require_promptfoo.PromptfooHarmfulCompletionProvider({
10756
11635
  purpose,
10757
11636
  n: Math.min(n, maxHarmfulTests),
10758
11637
  harmCategory: plugin,
@@ -10768,15 +11647,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
10768
11647
  };
10769
11648
  const allPrompts = await require_graders.retryWithDeduplication(generatePrompts, n);
10770
11649
  const inputs = config?.inputs;
10771
- return require_graders.sampleArray(allPrompts, n).map((prompt) => {
10772
- const { processedPrompt, additionalVars } = processPromptForInputs(prompt, injectVar, inputs);
11650
+ return Promise.all(require_graders.sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
11651
+ const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
10773
11652
  const testCase = createTestCase(injectVar, processedPrompt, plugin);
10774
11653
  if (Object.keys(additionalVars).length > 0) testCase.vars = {
10775
11654
  ...testCase.vars,
10776
11655
  ...additionalVars
10777
11656
  };
11657
+ if (additionalMetadata) testCase.metadata = {
11658
+ ...testCase.metadata,
11659
+ inputMaterialization: additionalMetadata
11660
+ };
10778
11661
  return testCase;
10779
- });
11662
+ }));
10780
11663
  }
10781
11664
  //#endregion
10782
11665
  //#region src/redteam/plugins/teenSafety/graderExamples.ts
@@ -11094,9 +11977,9 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
11094
11977
  function computeModifiersFromConfig(config) {
11095
11978
  const modifiers = { ...config?.modifiers };
11096
11979
  if (config?.language && typeof config.language === "string") modifiers.language = config.language;
11097
- if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
11098
- const maxCharsModifier = require_providers.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
11099
- if (maxCharsModifier) modifiers[require_providers.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
11980
+ if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(require_inputVariables.buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
11981
+ const maxCharsModifier = require_promptLength.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
11982
+ if (maxCharsModifier) modifiers[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
11100
11983
  return modifiers;
11101
11984
  }
11102
11985
  function applyDefaultGraderExamples(key, config) {
@@ -11120,7 +12003,7 @@ function isValidMaxCharsPerMessage(limit) {
11120
12003
  }
11121
12004
  function getMaxCharsPerMessageFromConfig(config) {
11122
12005
  if (isValidMaxCharsPerMessage(config?.maxCharsPerMessage)) return config.maxCharsPerMessage;
11123
- const maxCharsModifier = (config?.modifiers)?.[require_providers.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY];
12006
+ const maxCharsModifier = (config?.modifiers)?.[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY];
11124
12007
  if (typeof maxCharsModifier !== "string") return;
11125
12008
  const match = /must be (\d+) characters or fewer\./.exec(maxCharsModifier);
11126
12009
  if (!match) return;
@@ -11179,9 +12062,10 @@ function dedupeTestCases(testCases) {
11179
12062
  return deduped;
11180
12063
  }
11181
12064
  function buildMaxCharsRetryInstructions(rejectedPromptLengths, limit) {
12065
+ const longestRejectedPromptText = rejectedPromptLengths.length > 0 ? `${Math.max(...rejectedPromptLengths)} characters` : "unknown length";
11182
12066
  return dedent.default`
11183
12067
  Your previous response included ${rejectedPromptLengths.length} generated prompt${rejectedPromptLengths.length === 1 ? "" : "s"} that exceeded the ${limit ?? "configured"}-character limit.
11184
- The longest rejected prompt was ${Math.max(...rejectedPromptLengths)} characters.
12068
+ The longest rejected prompt was ${longestRejectedPromptText}.
11185
12069
  Generate replacement prompts only, and keep every user message within the character limit.
11186
12070
  `.trim();
11187
12071
  }
@@ -11203,7 +12087,7 @@ function withMaxCharsRetries(pluginFactory) {
11203
12087
  const rejectedPromptLengths = [];
11204
12088
  let rejectedPromptLimit;
11205
12089
  for (const testCase of generatedTestCases) {
11206
- const violation = require_providers.getGeneratedPromptOverLimit(String(testCase.vars?.[params.injectVar] ?? ""), maxCharsPerMessage);
12090
+ const violation = require_promptLength.getGeneratedPromptOverLimit(String(testCase.vars?.[params.injectVar] ?? ""), maxCharsPerMessage);
11207
12091
  if (violation) {
11208
12092
  rejectedPromptLengths.push(violation.length);
11209
12093
  rejectedPromptLimit = violation.limit;
@@ -11220,16 +12104,16 @@ function withMaxCharsRetries(pluginFactory) {
11220
12104
  }
11221
12105
  async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
11222
12106
  require_invariant.invariant(!require_logger.getEnvBool("PROMPTFOO_DISABLE_REDTEAM_REMOTE_GENERATION"), "fetchRemoteTestCases should never be called when remote generation is disabled");
11223
- const remoteHealth = await checkRemoteHealth(require_server.getRemoteHealthUrl());
12107
+ const remoteHealth = await checkRemoteHealth(require_remoteGeneration.getRemoteHealthUrl());
11224
12108
  if (remoteHealth.status !== "OK") {
11225
12109
  require_logger.logger.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
11226
12110
  return [];
11227
12111
  }
11228
12112
  const { graderExamples, ...configForRemote } = config ?? {};
11229
- const maxCharsModifier = require_providers.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
12113
+ const maxCharsModifier = require_promptLength.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
11230
12114
  if (maxCharsModifier) configForRemote.modifiers = {
11231
12115
  ...configForRemote.modifiers ?? {},
11232
- [require_providers.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY]: maxCharsModifier
12116
+ [require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY]: maxCharsModifier
11233
12117
  };
11234
12118
  const body = JSON.stringify({
11235
12119
  config: configForRemote,
@@ -11238,11 +12122,11 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
11238
12122
  n,
11239
12123
  purpose,
11240
12124
  task: key,
11241
- version: require_fetch.VERSION,
12125
+ version: require_version.VERSION,
11242
12126
  email: require_accounts.getUserEmail()
11243
12127
  });
11244
12128
  try {
11245
- const { data, status, statusText } = await require_cache.fetchWithCache(require_server.getRemoteGenerationUrl(), {
12129
+ const { data, status, statusText } = await require_cache.fetchWithCache(require_remoteGeneration.getRemoteGenerationUrl(), {
11246
12130
  method: "POST",
11247
12131
  headers: { "Content-Type": "application/json" },
11248
12132
  body
@@ -11259,23 +12143,56 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
11259
12143
  return [];
11260
12144
  }
11261
12145
  }
12146
+ async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
12147
+ const inputs = config.inputs;
12148
+ if (!inputs || Object.keys(inputs).length === 0) return testCases;
12149
+ return Promise.all(testCases.map(async (testCase, materializationIndex) => {
12150
+ const inputVars = require_util$2.extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
12151
+ if (!inputVars) return testCase;
12152
+ const materializedVars = await require_inputVariables.materializeInputVariablesWithMetadata(inputVars, inputs, {
12153
+ materializationIndex,
12154
+ pluginId,
12155
+ provider,
12156
+ purpose
12157
+ });
12158
+ return {
12159
+ ...testCase,
12160
+ vars: {
12161
+ ...testCase.vars || {},
12162
+ ...materializedVars.vars
12163
+ },
12164
+ metadata: {
12165
+ ...testCase.metadata || {},
12166
+ ...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
12167
+ }
12168
+ };
12169
+ }));
12170
+ }
11262
12171
  function createPluginFactory(PluginClass, key, validate) {
11263
12172
  return {
11264
12173
  key,
11265
12174
  validate,
11266
12175
  action: async ({ provider, purpose, injectVar, n, delayMs, config }) => {
11267
12176
  const configWithDefaults = applyDefaultGraderExamples(key, config);
11268
- if (PluginClass.canGenerateRemote === false || !require_server.shouldGenerateRemote()) {
12177
+ if (PluginClass.canGenerateRemote === false || !require_remoteGeneration.shouldGenerateRemote()) {
11269
12178
  require_logger.logger.debug(`Using local redteam generation for ${key}`);
11270
12179
  return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
11271
12180
  }
11272
- const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
12181
+ const pluginId = require_util$2.getShortPluginId(key);
12182
+ const testCases = await materializeRemoteTestCaseInputs({
12183
+ config: configWithDefaults ?? {},
12184
+ injectVar,
12185
+ pluginId,
12186
+ provider,
12187
+ purpose,
12188
+ testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
12189
+ });
11273
12190
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
11274
12191
  return testCases.map((testCase) => ({
11275
12192
  ...testCase,
11276
12193
  metadata: {
11277
12194
  ...testCase.metadata,
11278
- pluginId: require_providers.getShortPluginId(key),
12195
+ pluginId,
11279
12196
  pluginConfig: {
11280
12197
  ...configWithDefaults,
11281
12198
  modifiers: computedModifiers
@@ -11331,8 +12248,8 @@ const pluginFactories = [
11331
12248
  ...unalignedHarmCategories.map((category) => ({
11332
12249
  key: category,
11333
12250
  action: async (params) => {
11334
- if (require_server.neverGenerateRemote()) {
11335
- require_logger.logger.error(`${category} plugin requires remote generation to be enabled`);
12251
+ if (require_remoteGeneration.neverGenerateRemote()) {
12252
+ require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
11336
12253
  return [];
11337
12254
  }
11338
12255
  const testCases = await getHarmfulTests(params, category);
@@ -11341,7 +12258,7 @@ const pluginFactories = [
11341
12258
  ...testCase,
11342
12259
  metadata: {
11343
12260
  ...testCase.metadata,
11344
- pluginId: require_providers.getShortPluginId(category),
12261
+ pluginId: require_util$2.getShortPluginId(category),
11345
12262
  pluginConfig: {
11346
12263
  ...params.config,
11347
12264
  modifiers: computedModifiers
@@ -11354,14 +12271,22 @@ const pluginFactories = [
11354
12271
  const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
11355
12272
  key: category,
11356
12273
  action: async (params) => {
11357
- if (require_server.shouldGenerateRemote()) {
11358
- const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
12274
+ if (require_remoteGeneration.shouldGenerateRemote()) {
12275
+ const pluginId = require_util$2.getShortPluginId(category);
12276
+ const testCases = await materializeRemoteTestCaseInputs({
12277
+ config: params.config ?? {},
12278
+ injectVar: params.injectVar,
12279
+ pluginId,
12280
+ provider: params.provider,
12281
+ purpose: params.purpose,
12282
+ testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
12283
+ });
11359
12284
  const computedModifiers = computeModifiersFromConfig(params.config);
11360
12285
  return testCases.map((testCase) => ({
11361
12286
  ...testCase,
11362
12287
  metadata: {
11363
12288
  ...testCase.metadata,
11364
- pluginId: require_providers.getShortPluginId(category),
12289
+ pluginId,
11365
12290
  pluginConfig: {
11366
12291
  ...params.config,
11367
12292
  modifiers: computedModifiers
@@ -11374,7 +12299,7 @@ const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
11374
12299
  ...testCase,
11375
12300
  metadata: {
11376
12301
  ...testCase.metadata,
11377
- pluginId: require_providers.getShortPluginId(category)
12302
+ pluginId: require_util$2.getShortPluginId(category)
11378
12303
  }
11379
12304
  }));
11380
12305
  }
@@ -11382,17 +12307,25 @@ const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
11382
12307
  const biasPlugins = require_types.BIAS_PLUGINS.map((category) => ({
11383
12308
  key: category,
11384
12309
  action: async (params) => {
11385
- if (require_server.neverGenerateRemote()) {
11386
- require_logger.logger.error(`${category} plugin requires remote generation to be enabled`);
12310
+ if (require_remoteGeneration.neverGenerateRemote()) {
12311
+ require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
11387
12312
  return [];
11388
12313
  }
11389
- const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
12314
+ const pluginId = require_util$2.getShortPluginId(category);
12315
+ const testCases = await materializeRemoteTestCaseInputs({
12316
+ config: params.config ?? {},
12317
+ injectVar: params.injectVar,
12318
+ pluginId,
12319
+ provider: params.provider,
12320
+ purpose: params.purpose,
12321
+ testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
12322
+ });
11390
12323
  const computedModifiers = computeModifiersFromConfig(params.config);
11391
12324
  return testCases.map((testCase) => ({
11392
12325
  ...testCase,
11393
12326
  metadata: {
11394
12327
  ...testCase.metadata,
11395
- pluginId: require_providers.getShortPluginId(category),
12328
+ pluginId,
11396
12329
  pluginConfig: {
11397
12330
  ...params.config,
11398
12331
  modifiers: computedModifiers
@@ -11405,19 +12338,27 @@ function createRemotePlugin(key, validate) {
11405
12338
  return {
11406
12339
  key,
11407
12340
  validate,
11408
- action: async ({ purpose, injectVar, n, config }) => {
12341
+ action: async ({ provider, purpose, injectVar, n, config }) => {
11409
12342
  const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
11410
- if (require_server.neverGenerateRemote()) {
11411
- require_logger.logger.error(`${key} plugin requires remote generation to be enabled`);
12343
+ if (require_remoteGeneration.neverGenerateRemote()) {
12344
+ require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
11412
12345
  return [];
11413
12346
  }
11414
- const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
12347
+ const pluginId = require_util$2.getShortPluginId(key);
12348
+ const testCases = await materializeRemoteTestCaseInputs({
12349
+ config: configWithDefaults ?? {},
12350
+ injectVar,
12351
+ pluginId,
12352
+ provider,
12353
+ purpose,
12354
+ testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
12355
+ });
11415
12356
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
11416
12357
  const testsWithMetadata = testCases.map((testCase) => ({
11417
12358
  ...testCase,
11418
12359
  metadata: {
11419
12360
  ...testCase.metadata,
11420
- pluginId: require_providers.getShortPluginId(key),
12361
+ pluginId,
11421
12362
  pluginConfig: {
11422
12363
  ...configWithDefaults,
11423
12364
  modifiers: computedModifiers
@@ -11487,6 +12428,37 @@ function getPolicyText(metadata) {
11487
12428
  return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
11488
12429
  }
11489
12430
  }
12431
+ async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
12432
+ const inputs = testCase.metadata?.pluginConfig?.inputs;
12433
+ const inputMaterialization = testCase.metadata?.inputMaterialization;
12434
+ if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
12435
+ inputMaterialization,
12436
+ vars: testCase.vars
12437
+ };
12438
+ try {
12439
+ const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
12440
+ materializationIndex,
12441
+ pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
12442
+ provider,
12443
+ purpose
12444
+ });
12445
+ return {
12446
+ inputMaterialization: materializedVars.metadata ? {
12447
+ ...inputMaterialization,
12448
+ ...materializedVars.metadata
12449
+ } : inputMaterialization,
12450
+ vars: {
12451
+ ...testCase.vars,
12452
+ ...materializedVars.vars
12453
+ }
12454
+ };
12455
+ } catch {
12456
+ return {
12457
+ inputMaterialization,
12458
+ vars: testCase.vars
12459
+ };
12460
+ }
12461
+ }
11490
12462
  /**
11491
12463
  * Gets the severity level for a plugin based on its ID and configuration.
11492
12464
  * @param pluginId - The ID of the plugin.
@@ -11495,7 +12467,7 @@ function getPolicyText(metadata) {
11495
12467
  */
11496
12468
  function getPluginSeverity(pluginId, pluginConfig) {
11497
12469
  if (pluginConfig?.severity) return pluginConfig.severity;
11498
- const shortId = require_providers.getShortPluginId(pluginId);
12470
+ const shortId = require_util$2.getShortPluginId(pluginId);
11499
12471
  return shortId in require_types.riskCategorySeverityMap ? require_types.riskCategorySeverityMap[shortId] : require_types.Severity.Low;
11500
12472
  }
11501
12473
  const POLICY_PREVIEW_MAX_LENGTH = 20;
@@ -11620,14 +12592,15 @@ function buildRedteamModifiers({ maxCharsPerMessage, pluginConfig, testGeneratio
11620
12592
  ...testGenerationInstructions ? { testGenerationInstructions } : {},
11621
12593
  ...pluginConfig?.modifiers ?? {}
11622
12594
  };
11623
- const maxCharsPerMessageModifier = require_providers.getMaxCharsPerMessageModifierValue(maxCharsPerMessage ?? pluginConfig?.maxCharsPerMessage);
11624
- if (maxCharsPerMessageModifier) modifiers[require_providers.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsPerMessageModifier;
12595
+ const maxCharsPerMessageModifier = require_promptLength.getMaxCharsPerMessageModifierValue(maxCharsPerMessage ?? pluginConfig?.maxCharsPerMessage);
12596
+ if (maxCharsPerMessageModifier) modifiers[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsPerMessageModifier;
11625
12597
  return modifiers;
11626
12598
  }
11627
12599
  const categories = {
11628
12600
  foundation: require_types.FOUNDATION_PLUGINS,
11629
12601
  harmful: Object.keys(require_types.HARM_PLUGINS),
11630
12602
  "coding-agent:core": require_types.CODING_AGENT_CORE_PLUGINS,
12603
+ "coding-agent:all": require_types.CODING_AGENT_PLUGINS,
11631
12604
  bias: require_types.BIAS_PLUGINS,
11632
12605
  pii: require_types.PII_PLUGINS,
11633
12606
  medical: require_types.MEDICAL_PLUGINS,
@@ -11657,7 +12630,7 @@ function getLanguageForTestCase(test) {
11657
12630
  function filterOversizedTestCases(testCases, injectVar, sourceLabel, maxCharsPerMessage) {
11658
12631
  return testCases.filter((testCase) => {
11659
12632
  const testCaseMaxCharsPerMessage = maxCharsPerMessage ?? (testCase.metadata?.strategyConfig)?.maxCharsPerMessage ?? (testCase.metadata?.pluginConfig)?.maxCharsPerMessage;
11660
- const violation = require_providers.getGeneratedPromptOverLimit(String(testCase.vars?.[injectVar] ?? ""), testCaseMaxCharsPerMessage);
12633
+ const violation = require_promptLength.getGeneratedPromptOverLimit(String(testCase.vars?.[injectVar] ?? ""), testCaseMaxCharsPerMessage);
11661
12634
  if (!violation) return true;
11662
12635
  require_logger.logger.warn(`[${sourceLabel}] Dropping generated test case that exceeds maxCharsPerMessage=${violation.limit} (${violation.length} chars)`);
11663
12636
  return false;
@@ -11722,18 +12695,18 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
11722
12695
  * @param injectVar - The variable to inject.
11723
12696
  * @returns An array of new test cases generated by strategies.
11724
12697
  */
11725
- async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
12698
+ async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
11726
12699
  const newTestCases = [];
11727
12700
  const strategyResults = {};
11728
12701
  for (const strategy of strategies) {
11729
12702
  require_logger.logger.debug(`Generating ${strategy.id} tests`);
11730
12703
  let strategyAction;
11731
- if (strategy.id.startsWith("file://")) strategyAction = (await require_providers.loadStrategy(strategy.id)).action;
12704
+ if (strategy.id.startsWith("file://")) strategyAction = (await require_strategies.loadStrategy(strategy.id)).action;
11732
12705
  else {
11733
- let builtinStrategy = require_providers.Strategies.find((s) => s.id === strategy.id);
12706
+ let builtinStrategy = require_strategies.Strategies.find((s) => s.id === strategy.id);
11734
12707
  if (!builtinStrategy && strategy.id.includes(":")) {
11735
12708
  const baseStrategyId = strategy.id.split(":")[0];
11736
- builtinStrategy = require_providers.Strategies.find((s) => s.id === baseStrategyId);
12709
+ builtinStrategy = require_strategies.Strategies.find((s) => s.id === baseStrategyId);
11737
12710
  }
11738
12711
  if (!builtinStrategy) {
11739
12712
  require_logger.logger.warn(`Strategy ${strategy.id} not registered, skipping`);
@@ -11743,7 +12716,7 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
11743
12716
  }
11744
12717
  const targetPlugins = strategy.config?.plugins;
11745
12718
  const applicableTestCases = testCases.filter((t) => {
11746
- if (!require_providers.pluginMatchesStrategyTargets(t, strategy.id, targetPlugins)) return false;
12719
+ if (!require_strategies.pluginMatchesStrategyTargets(t, strategy.id, targetPlugins)) return false;
11747
12720
  if (t.metadata?.retry === true) {
11748
12721
  require_logger.logger.debug(`Skipping ${strategy.id} for retry test (plugin: ${t.metadata?.pluginId}) - retry tests are not transformed`);
11749
12722
  return false;
@@ -11778,14 +12751,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
11778
12751
  }
11779
12752
  }
11780
12753
  resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
11781
- newTestCases.push(...resultTestCases.map((t) => {
11782
- const inputs = t?.metadata?.pluginConfig?.inputs;
11783
- let updatedVars = t.vars;
11784
- if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
11785
- const parsed = JSON.parse(String(t.vars[injectVar]));
11786
- updatedVars = { ...t.vars };
11787
- Object.assign(updatedVars, require_providers.extractVariablesFromJson(parsed, inputs));
11788
- } catch {}
12754
+ newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
12755
+ const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
11789
12756
  const strategyConfig = {
11790
12757
  ...strategy.config || {},
11791
12758
  ...maxCharsPerMessage ? { maxCharsPerMessage } : {},
@@ -11793,16 +12760,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
11793
12760
  };
11794
12761
  return {
11795
12762
  ...t,
11796
- vars: updatedVars,
12763
+ vars,
11797
12764
  metadata: {
11798
12765
  ...t?.metadata || {},
11799
12766
  ...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
11800
12767
  ...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
11801
12768
  ...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
12769
+ ...inputMaterialization && { inputMaterialization },
11802
12770
  ...Object.keys(strategyConfig).length > 0 && { strategyConfig }
11803
12771
  }
11804
12772
  };
11805
- }));
12773
+ })));
11806
12774
  const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
11807
12775
  const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
11808
12776
  const applyNumTestsCap = (calculatedRequested) => {
@@ -11960,10 +12928,10 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
11960
12928
  seen.add(key);
11961
12929
  return true;
11962
12930
  });
11963
- const needsGoalExtraction = strategies.some((s) => require_providers.Strategies.find((def) => def.id === s.id)?.requiresGoalExtraction);
11964
- await require_providers.validateStrategies(strategies);
12931
+ const needsGoalExtraction = strategies.some((s) => require_strategies.Strategies.find((def) => def.id === s.id)?.requiresGoalExtraction);
12932
+ await require_strategies.validateStrategies(strategies);
11965
12933
  await validateSharpDependency(strategies, plugins);
11966
- const redteamProvider = await require_providers.redteamProviderManager.getProvider({ provider });
12934
+ const redteamProvider = await require_shared.redteamProviderManager.getProvider({ provider });
11967
12935
  const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
11968
12936
  require_logger.logger.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.default.yellow(plugins.map((p) => {
11969
12937
  const pluginLanguageConfig = p.config?.language ?? language;
@@ -12067,8 +13035,8 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12067
13035
  };
12068
13036
  require_logger.logger.debug("Validating plugins...");
12069
13037
  plugins = [...new Set(expandedPlugins)].filter(validatePlugin).sort();
12070
- if (require_server.shouldGenerateRemote()) {
12071
- const healthUrl = require_server.getRemoteHealthUrl();
13038
+ if (require_remoteGeneration.shouldGenerateRemote()) {
13039
+ const healthUrl = require_remoteGeneration.getRemoteHealthUrl();
12072
13040
  if (healthUrl) {
12073
13041
  require_logger.logger.debug(`Checking Promptfoo API health at ${healthUrl}...`);
12074
13042
  const healthResult = await checkRemoteHealth(healthUrl);
@@ -12163,7 +13131,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12163
13131
  const promptVar = testCase.vars?.[injectVar];
12164
13132
  const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
12165
13133
  const policy = getPolicyText(testCase.metadata);
12166
- const extractedGoal = await require_providers.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
13134
+ const extractedGoal = await require_util$2.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
12167
13135
  testCase.metadata.goal = extractedGoal;
12168
13136
  }
12169
13137
  }
@@ -12218,7 +13186,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12218
13186
  const promptVar = testCase.vars?.[injectVar];
12219
13187
  const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
12220
13188
  const policy = getPolicyText(testCase.metadata);
12221
- const extractedGoal = await require_providers.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
13189
+ const extractedGoal = await require_util$2.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
12222
13190
  testCase.metadata.goal = extractedGoal;
12223
13191
  }
12224
13192
  }
@@ -12257,7 +13225,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12257
13225
  targetIds,
12258
13226
  ...retryStrategy.config
12259
13227
  };
12260
- const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
13228
+ const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
12261
13229
  pluginTestCases.push(...retryTestCases);
12262
13230
  Object.assign(strategyResults, retryResults);
12263
13231
  if (showProgressBar) progressBar?.increment(retryTestCases.length);
@@ -12265,7 +13233,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12265
13233
  checkAbort();
12266
13234
  const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
12267
13235
  if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
12268
- const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
13236
+ const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
12269
13237
  Object.assign(strategyResults, otherStrategyResults);
12270
13238
  if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
12271
13239
  const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
@@ -12317,7 +13285,7 @@ function handleFailedPlugins(failedPlugins, strict) {
12317
13285
  }
12318
13286
  function getConfigHash(configPath) {
12319
13287
  const content = fs.readFileSync(configPath, "utf8");
12320
- return (0, crypto$1.createHash)("md5").update(`${require_fetch.VERSION}:${content}`).digest("hex");
13288
+ return (0, crypto$1.createHash)("md5").update(`${require_version.VERSION}:${content}`).digest("hex");
12321
13289
  }
12322
13290
  function createHeaderComments({ title, timestampLabel, author, cloudHost, testCasesCount, plugins, strategies, isUpdate = false }) {
12323
13291
  const sectionLabel = isUpdate ? "Changes:" : "Test Configuration:";
@@ -12392,7 +13360,7 @@ async function doGenerateRedteam(options) {
12392
13360
  redteamConfig = resolved.config.redteam;
12393
13361
  commandLineOptions = resolved.commandLineOptions;
12394
13362
  resolvedConfig = resolved.config;
12395
- await require_providers.checkCloudPermissions(resolved.config);
13363
+ await require_storage.checkCloudPermissions(resolved.config);
12396
13364
  if (redteamConfig && resolved.testSuite.tests && resolved.testSuite.tests.length > 0) require_logger.logger.warn(chalk.default.yellow(dedent.default`
12397
13365
  ⚠️ Warning: Found both 'tests' section and 'redteam' configuration in your config file.
12398
13366
 
@@ -12407,8 +13375,8 @@ async function doGenerateRedteam(options) {
12407
13375
  `));
12408
13376
  try {
12409
13377
  const providerId = require_providers.getProviderIds(resolved.config.providers)[0];
12410
- if (require_providers.isCloudProvider(providerId)) {
12411
- const overrides = await require_providers.getPluginSeverityOverridesFromCloud(require_providers.getCloudDatabaseId(providerId));
13378
+ if (require_storage.isCloudProvider(providerId)) {
13379
+ const overrides = await require_storage.getPluginSeverityOverridesFromCloud(require_storage.getCloudDatabaseId(providerId));
12412
13380
  if (overrides) {
12413
13381
  pluginSeverityOverrides = new Map(Object.entries(overrides.severities));
12414
13382
  pluginSeverityOverridesId = overrides.id;
@@ -12426,7 +13394,7 @@ async function doGenerateRedteam(options) {
12426
13394
  require_logger.logger.info(chalk.default.red(`\nCan't generate without configuration - run ${chalk.default.yellow.bold(promptfooCommand("redteam init"))} first`));
12427
13395
  return null;
12428
13396
  }
12429
- if (!require_server.neverGenerateRemote()) {
13397
+ if (!require_remoteGeneration.neverGenerateRemote()) {
12430
13398
  let hasValidEmail = false;
12431
13399
  while (!hasValidEmail) {
12432
13400
  const { emailNeedsValidation } = await require_accounts.promptForEmailUnverified();
@@ -12490,7 +13458,7 @@ async function doGenerateRedteam(options) {
12490
13458
  }
12491
13459
  const policyPluginsWithRefs = plugins.filter((plugin) => plugin.config?.policy && require_graders.isValidPolicyObject(plugin.config?.policy) && require_graders.determinePolicyTypeFromId(plugin.config.policy.id) === "reusable");
12492
13460
  if (policyPluginsWithRefs.length > 0) {
12493
- const teamId = (await require_providers.resolveTeamId()).id;
13461
+ const teamId = (await require_storage.resolveTeamId()).id;
12494
13462
  const policiesById = await require_graders.getCustomPolicies(policyPluginsWithRefs, teamId);
12495
13463
  for (const policyPlugin of policyPluginsWithRefs) {
12496
13464
  const policyId = policyPlugin.config.policy.id;
@@ -13062,7 +14030,7 @@ async function sendChunkedResults(evalRecord, url$5, options = {}) {
13062
14030
  const isVerbose = require_logger.isDebugEnabled();
13063
14031
  const { silent = false } = options;
13064
14032
  require_logger.logger.debug(`Starting chunked results upload to ${url$5}`);
13065
- await require_providers.checkCloudPermissions(evalRecord.config);
14033
+ await require_storage.checkCloudPermissions(evalRecord.config);
13066
14034
  const inlineBlobs = require_extractor.isBlobStorageEnabled() && require_logger.getEnvBool("PROMPTFOO_SHARE_INLINE_BLOBS", !require_fetch.cloudConfig.isEnabled());
13067
14035
  const inlineCache = inlineBlobs ? createBlobInlineCache() : null;
13068
14036
  let sampleResults = (await evalRecord.fetchResultsBatched(100).next()).value ?? [];
@@ -13158,6 +14126,10 @@ function stripAuthFromUrl(urlString) {
13158
14126
  }
13159
14127
  }
13160
14128
  async function handleEmailCollection(evalRecord) {
14129
+ if (evalRecord.author) {
14130
+ require_logger.logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
14131
+ return;
14132
+ }
13161
14133
  if (!process.stdout.isTTY || require_logger.isCI() || require_logger.getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
13162
14134
  let email = require_accounts.getUserEmail();
13163
14135
  if (!email) {
@@ -13200,7 +14172,7 @@ async function createShareableUrl(evalRecord, options = {}) {
13200
14172
  return null;
13201
14173
  }
13202
14174
  if (!silent) {
13203
- const orgContext = await require_providers.getOrgContext();
14175
+ const orgContext = await require_storage.getOrgContext();
13204
14176
  if (orgContext) {
13205
14177
  const teamSuffix = orgContext.teamName ? ` > ${orgContext.teamName}` : "";
13206
14178
  require_logger.logger.info(`${chalk.default.dim("Sharing to:")} ${chalk.default.cyan(orgContext.organizationName)}${teamSuffix}`);
@@ -13230,7 +14202,7 @@ function generateTable(evaluateTable, tableCellMaxLength = 250, maxRows = 25) {
13230
14202
  for (const row of evaluateTable.body.slice(0, maxRows)) table.push([...row.vars.map((v) => require_text.ellipsize(v, tableCellMaxLength)), ...row.outputs.map(({ pass, text, failureReason: failureType }) => {
13231
14203
  text = require_text.ellipsize(text, tableCellMaxLength);
13232
14204
  if (pass) return chalk.default.green("[PASS] ") + text;
13233
- else return chalk.default.red(failureType === require_types.ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.default.red.bold(c) : c).join("---");
14205
+ return chalk.default.red(failureType === require_types.ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.default.red.bold(c) : c).join("---");
13234
14206
  })]);
13235
14207
  return table.toString();
13236
14208
  }
@@ -13301,6 +14273,14 @@ function shouldShareResults(opts) {
13301
14273
  return require_fetch.cloudConfig.isEnabled() && sharing !== false;
13302
14274
  }
13303
14275
  //#endregion
14276
+ //#region src/commands/eval/redteamWarning.ts
14277
+ function warnIfRedteamConfigHasNoTests(config, testSuite) {
14278
+ if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) require_logger.logger.warn(chalk.default.yellow(dedent.default`
14279
+ Warning: Config file has a redteam section but no test cases.
14280
+ Did you mean to run ${chalk.default.bold("promptfoo redteam generate")} instead?
14281
+ `));
14282
+ }
14283
+ //#endregion
13304
14284
  //#region src/util/formatDuration.ts
13305
14285
  /**
13306
14286
  * Formats a duration in seconds into a human-readable string
@@ -13321,6 +14301,115 @@ function formatDuration(seconds) {
13321
14301
  }
13322
14302
  //#endregion
13323
14303
  //#region src/commands/eval/summary.ts
14304
+ function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
14305
+ if (wasAborted) {
14306
+ const idSuffix = writeToDatabase ? ` (ID: ${chalk.default.cyan(evalId)})` : "";
14307
+ return `${chalk.default.red("✗")} ${completionType} aborted${idSuffix}`;
14308
+ }
14309
+ if (writeToDatabase && shareableUrl) return `${chalk.default.green("✓")} ${completionType} complete: ${shareableUrl}`;
14310
+ if (writeToDatabase && activelySharing) return `${chalk.default.green("✓")} ${completionType} complete`;
14311
+ if (writeToDatabase) return `${chalk.default.green("✓")} ${completionType} complete (ID: ${chalk.default.cyan(evalId)})`;
14312
+ return `${chalk.default.green("✓")} ${completionType} complete`;
14313
+ }
14314
+ function getAbortSummaryLines(targetErrorStatus) {
14315
+ if (targetErrorStatus == null) return [];
14316
+ return [
14317
+ "",
14318
+ chalk.default.red.bold("Scan stopped: Target is unavailable and will not recover on retry."),
14319
+ chalk.default.red(` Target returned HTTP ${targetErrorStatus}`),
14320
+ "",
14321
+ chalk.default.yellow("Possible causes:"),
14322
+ chalk.default.yellow(" • Invalid API key or authentication (401/403)"),
14323
+ chalk.default.yellow(" • Target endpoint does not exist (404)"),
14324
+ chalk.default.yellow(" • Server does not support the request (501)"),
14325
+ "",
14326
+ chalk.default.cyan("To fix: Check your target configuration and credentials.")
14327
+ ];
14328
+ }
14329
+ function getGuidanceLines({ writeToDatabase, shareableUrl, wantsToShare, activelySharing, hasExplicitDisable, cloudEnabled }) {
14330
+ if (!writeToDatabase || shareableUrl || wantsToShare || activelySharing) return [];
14331
+ const lines = ["", `» View results: ${chalk.default.green.bold("promptfoo view")}`];
14332
+ if (!hasExplicitDisable) lines.push(cloudEnabled ? `» Create shareable URL: ${chalk.default.green.bold("promptfoo share")}` : `» Share with your team: ${chalk.default.green.bold("https://promptfoo.app")}`);
14333
+ lines.push(`» Feedback: ${chalk.default.green.bold("https://promptfoo.dev/feedback")}`);
14334
+ return lines;
14335
+ }
14336
+ function buildUsageDetails(usage, total) {
14337
+ const parts = [];
14338
+ if (usage.prompt && usage.prompt > 0) parts.push(`${usage.prompt.toLocaleString()} prompt`);
14339
+ if (usage.completion && usage.completion > 0) parts.push(`${usage.completion.toLocaleString()} completion`);
14340
+ if (usage.cached && usage.cached > 0) parts.push(usage.cached === total && parts.length === 0 ? "cached" : `${usage.cached.toLocaleString()} cached`);
14341
+ if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) parts.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
14342
+ return parts;
14343
+ }
14344
+ function getTokenUsageLines(tokenUsage, isRedteam, tracker) {
14345
+ const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
14346
+ const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
14347
+ if (!hasEvalTokens && !hasGradingTokens) return [];
14348
+ const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
14349
+ const evalTokens = {
14350
+ prompt: tokenUsage.prompt || 0,
14351
+ completion: tokenUsage.completion || 0,
14352
+ total: tokenUsage.total || combinedTotal,
14353
+ cached: tokenUsage.cached || 0,
14354
+ numRequests: tokenUsage.numRequests || 0,
14355
+ completionDetails: tokenUsage.completionDetails || {
14356
+ reasoning: 0,
14357
+ acceptedPrediction: 0,
14358
+ rejectedPrediction: 0
14359
+ }
14360
+ };
14361
+ const lines = [`${chalk.default.bold("Total Tokens:")} ${chalk.default.white.bold((evalTokens.total + (tokenUsage.assertions?.total || 0)).toLocaleString())}`];
14362
+ if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.default.gray("Probes:")} ${chalk.default.white(tokenUsage.numRequests.toLocaleString())}`);
14363
+ if (evalTokens.total > 0) {
14364
+ const evalParts = buildUsageDetails(evalTokens, evalTokens.total);
14365
+ lines.push(` ${chalk.default.gray("Eval:")} ${chalk.default.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
14366
+ }
14367
+ if (tokenUsage.assertions?.total && tokenUsage.assertions.total > 0) {
14368
+ const gradingParts = buildUsageDetails(tokenUsage.assertions, tokenUsage.assertions.total);
14369
+ lines.push(` ${chalk.default.gray("Grading:")} ${chalk.default.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
14370
+ }
14371
+ lines.push(...getProviderUsageLines(tracker));
14372
+ return lines;
14373
+ }
14374
+ function getProviderUsageLines(tracker) {
14375
+ const providerIds = tracker.getProviderIds();
14376
+ if (providerIds.length <= 1) return [];
14377
+ const sortedProviders = providerIds.map((id) => ({
14378
+ id,
14379
+ usage: tracker.getProviderUsage(id)
14380
+ })).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
14381
+ const lines = ["", chalk.default.bold("Providers:")];
14382
+ for (const { id, usage } of sortedProviders) {
14383
+ if ((usage.total || 0) === 0 && (usage.prompt || 0) + (usage.completion || 0) === 0) continue;
14384
+ const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
14385
+ const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
14386
+ const details = buildUsageDetails(usage, displayTotal);
14387
+ const requestInfo = `${usage.numRequests || 0} requests`;
14388
+ const separator = details.length > 0 ? "; " : "";
14389
+ lines.push(` ${chalk.default.gray(`${displayId}:`)} ${chalk.default.white(displayTotal.toLocaleString())} (${requestInfo}${separator}${details.join(", ")})`);
14390
+ }
14391
+ return lines;
14392
+ }
14393
+ function formatResultPercentage(count, totalTests) {
14394
+ const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
14395
+ return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
14396
+ }
14397
+ function formatResultLine(count, label, icon, iconColor, totalTests) {
14398
+ return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.default.white.bold(count.toLocaleString())} ${chalk.default.white(label)} ${chalk.default.gray(`(${formatResultPercentage(count, totalTests)})`)}`;
14399
+ }
14400
+ function getResultsLines({ successes, failures, errors, duration, maxConcurrency }) {
14401
+ const totalTests = successes + failures + errors;
14402
+ const errorLabel = errors === 1 ? "error" : "errors";
14403
+ return [
14404
+ "",
14405
+ chalk.default.bold("Results:"),
14406
+ formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.default.green, totalTests),
14407
+ formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.default.red, totalTests),
14408
+ formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.default.red, totalTests),
14409
+ chalk.default.gray(`Duration: ${formatDuration(duration)} (concurrency: ${maxConcurrency})`),
14410
+ ""
14411
+ ];
14412
+ }
13324
14413
  /**
13325
14414
  * Generate formatted evaluation summary output for CLI display.
13326
14415
  *
@@ -13359,115 +14448,28 @@ function formatDuration(seconds) {
13359
14448
  * ```
13360
14449
  */
13361
14450
  function generateEvalSummary(params) {
13362
- const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker, targetErrorStatus } = params;
13363
- const lines = [];
13364
- const completionType = isRedteam ? "Red team" : "Eval";
13365
- const wasAborted = targetErrorStatus != null;
13366
- let completionMessage;
13367
- if (wasAborted) {
13368
- completionMessage = `${chalk.default.red("✗")} ${completionType} aborted`;
13369
- if (writeToDatabase) completionMessage += ` (ID: ${chalk.default.cyan(evalId)})`;
13370
- } else if (writeToDatabase && shareableUrl) completionMessage = `${chalk.default.green("✓")} ${completionType} complete: ${shareableUrl}`;
13371
- else if (writeToDatabase && activelySharing) completionMessage = `${chalk.default.green("✓")} ${completionType} complete`;
13372
- else if (writeToDatabase) completionMessage = `${chalk.default.green("✓")} ${completionType} complete (ID: ${chalk.default.cyan(evalId)})`;
13373
- else completionMessage = `${chalk.default.green("✓")} ${completionType} complete`;
13374
- lines.push(completionMessage);
13375
- if (wasAborted && targetErrorStatus != null) {
13376
- lines.push("");
13377
- lines.push(chalk.default.red.bold("Scan stopped: Target is unavailable and will not recover on retry."));
13378
- lines.push(chalk.default.red(` Target returned HTTP ${targetErrorStatus}`));
13379
- lines.push("");
13380
- lines.push(chalk.default.yellow("Possible causes:"));
13381
- lines.push(chalk.default.yellow(" • Invalid API key or authentication (401/403)"));
13382
- lines.push(chalk.default.yellow(" • Target endpoint does not exist (404)"));
13383
- lines.push(chalk.default.yellow(" • Server does not support the request (501)"));
13384
- lines.push("");
13385
- lines.push(chalk.default.cyan("To fix: Check your target configuration and credentials."));
13386
- }
13387
- if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
13388
- lines.push("");
13389
- lines.push(`» View results: ${chalk.default.green.bold("promptfoo view")}`);
13390
- if (!hasExplicitDisable) if (cloudEnabled) lines.push(`» Create shareable URL: ${chalk.default.green.bold("promptfoo share")}`);
13391
- else lines.push(`» Share with your team: ${chalk.default.green.bold("https://promptfoo.app")}`);
13392
- lines.push(`» Feedback: ${chalk.default.green.bold("https://promptfoo.dev/feedback")}`);
13393
- }
13394
- lines.push("");
13395
- const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
13396
- const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
13397
- if (hasEvalTokens || hasGradingTokens) {
13398
- const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
13399
- const evalTokens = {
13400
- prompt: tokenUsage.prompt || 0,
13401
- completion: tokenUsage.completion || 0,
13402
- total: tokenUsage.total || combinedTotal,
13403
- cached: tokenUsage.cached || 0,
13404
- completionDetails: tokenUsage.completionDetails || {
13405
- reasoning: 0,
13406
- acceptedPrediction: 0,
13407
- rejectedPrediction: 0
13408
- }
13409
- };
13410
- const grandTotal = evalTokens.total + (tokenUsage.assertions?.total || 0);
13411
- lines.push(`${chalk.default.bold("Total Tokens:")} ${chalk.default.white.bold(grandTotal.toLocaleString())}`);
13412
- if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.default.gray("Probes:")} ${chalk.default.white(tokenUsage.numRequests.toLocaleString())}`);
13413
- if (evalTokens.total > 0) {
13414
- const evalParts = [];
13415
- if (evalTokens.prompt > 0) evalParts.push(`${evalTokens.prompt.toLocaleString()} prompt`);
13416
- if (evalTokens.completion > 0) evalParts.push(`${evalTokens.completion.toLocaleString()} completion`);
13417
- if (evalTokens.cached > 0) if (evalTokens.cached === evalTokens.total && evalParts.length === 0) evalParts.push("cached");
13418
- else evalParts.push(`${evalTokens.cached.toLocaleString()} cached`);
13419
- if (evalTokens.completionDetails?.reasoning && evalTokens.completionDetails.reasoning > 0) evalParts.push(`${evalTokens.completionDetails.reasoning.toLocaleString()} reasoning`);
13420
- lines.push(` ${chalk.default.gray("Eval:")} ${chalk.default.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
13421
- }
13422
- if (tokenUsage.assertions && tokenUsage.assertions.total && tokenUsage.assertions.total > 0) {
13423
- const gradingParts = [];
13424
- if (tokenUsage.assertions.prompt && tokenUsage.assertions.prompt > 0) gradingParts.push(`${tokenUsage.assertions.prompt.toLocaleString()} prompt`);
13425
- if (tokenUsage.assertions.completion && tokenUsage.assertions.completion > 0) gradingParts.push(`${tokenUsage.assertions.completion.toLocaleString()} completion`);
13426
- if (tokenUsage.assertions.cached && tokenUsage.assertions.cached > 0) if (tokenUsage.assertions.cached === tokenUsage.assertions.total && gradingParts.length === 0) gradingParts.push("cached");
13427
- else gradingParts.push(`${tokenUsage.assertions.cached.toLocaleString()} cached`);
13428
- if (tokenUsage.assertions.completionDetails?.reasoning && tokenUsage.assertions.completionDetails.reasoning > 0) gradingParts.push(`${tokenUsage.assertions.completionDetails.reasoning.toLocaleString()} reasoning`);
13429
- lines.push(` ${chalk.default.gray("Grading:")} ${chalk.default.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
13430
- }
13431
- const providerIds = tracker.getProviderIds();
13432
- if (providerIds.length > 1) {
13433
- lines.push("");
13434
- lines.push(chalk.default.bold("Providers:"));
13435
- const sortedProviders = providerIds.map((id) => ({
13436
- id,
13437
- usage: tracker.getProviderUsage(id)
13438
- })).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
13439
- for (const { id, usage } of sortedProviders) if ((usage.total || 0) > 0 || (usage.prompt || 0) + (usage.completion || 0) > 0) {
13440
- const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
13441
- const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
13442
- const details = [];
13443
- if (usage.prompt && usage.prompt > 0) details.push(`${usage.prompt.toLocaleString()} prompt`);
13444
- if (usage.completion && usage.completion > 0) details.push(`${usage.completion.toLocaleString()} completion`);
13445
- if (usage.cached && usage.cached > 0) if (usage.cached === displayTotal && details.length === 0) details.push("cached");
13446
- else details.push(`${usage.cached.toLocaleString()} cached`);
13447
- if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) details.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
13448
- const breakdown = ` (${`${usage.numRequests || 0} requests`}${details.length > 0 ? "; " : ""}${details.join(", ")})`;
13449
- lines.push(` ${chalk.default.gray(displayId + ":")} ${chalk.default.white(displayTotal.toLocaleString())}${breakdown}`);
13450
- }
13451
- }
13452
- }
13453
- lines.push("");
13454
- const totalTests = successes + failures + errors;
13455
- const formatResultPercentage = (count) => {
13456
- const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
13457
- return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
13458
- };
13459
- const formatResultLine = (count, label, icon, iconColor) => {
13460
- return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.default.white.bold(count.toLocaleString())} ${chalk.default.white(label)} ${chalk.default.gray(`(${formatResultPercentage(count)})`)}`;
13461
- };
13462
- const errorLabel = errors === 1 ? "error" : "errors";
13463
- lines.push(chalk.default.bold("Results:"));
13464
- lines.push(formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.default.green));
13465
- lines.push(formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.default.red));
13466
- lines.push(formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.default.red));
13467
- const durationDisplay = formatDuration(duration);
13468
- lines.push(chalk.default.gray(`Duration: ${durationDisplay} (concurrency: ${maxConcurrency})`));
13469
- lines.push("");
13470
- return lines;
14451
+ return [
14452
+ getCompletionMessage({
14453
+ completionType: params.isRedteam ? "Red team" : "Eval",
14454
+ evalId: params.evalId,
14455
+ shareableUrl: params.shareableUrl,
14456
+ wasAborted: params.targetErrorStatus != null,
14457
+ writeToDatabase: params.writeToDatabase,
14458
+ activelySharing: params.activelySharing ?? false
14459
+ }),
14460
+ ...getAbortSummaryLines(params.targetErrorStatus),
14461
+ ...getGuidanceLines({
14462
+ writeToDatabase: params.writeToDatabase,
14463
+ shareableUrl: params.shareableUrl,
14464
+ wantsToShare: params.wantsToShare,
14465
+ activelySharing: params.activelySharing ?? false,
14466
+ hasExplicitDisable: params.hasExplicitDisable,
14467
+ cloudEnabled: params.cloudEnabled
14468
+ }),
14469
+ "",
14470
+ ...getTokenUsageLines(params.tokenUsage, params.isRedteam, params.tracker),
14471
+ ...getResultsLines(params)
14472
+ ];
13471
14473
  }
13472
14474
  //#endregion
13473
14475
  //#region src/commands/retry.ts
@@ -13625,7 +14627,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
13625
14627
  const cloudConfigId = uuidConfigArgs[0];
13626
14628
  if (cmdObj.watch) throw new Error("--watch is not supported when using a cloud config UUID with -c. Use a local config file path for watch mode.");
13627
14629
  try {
13628
- defaultConfig = await require_providers.getEvalConfigFromCloud(cloudConfigId);
14630
+ defaultConfig = await require_storage.getEvalConfigFromCloud(cloudConfigId);
13629
14631
  } catch (error) {
13630
14632
  const reason = error instanceof Error ? error.message : String(error);
13631
14633
  throw new Error(`Failed to load cloud eval config "${cloudConfigId}". ${reason}. Cloud UUID inputs do not fall back to local file paths. Check authentication and that the UUID exists.`);
@@ -13719,14 +14721,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
13719
14721
  require_logger.state.resume = true;
13720
14722
  require_logger.state.retryMode = true;
13721
14723
  } else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
13722
- if (!cmdObj.envPath && commandLineOptions?.envPath) {
14724
+ if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
13723
14725
  require_logger.logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
13724
14726
  require_util.setupEnv(commandLineOptions.envPath);
13725
14727
  }
13726
- if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) require_logger.logger.warn(chalk.default.yellow(dedent.default`
13727
- Warning: Config file has a redteam section but no test cases.
13728
- Did you mean to run ${chalk.default.bold("promptfoo redteam generate")} instead?
13729
- `));
14728
+ warnIfRedteamConfigHasNoTests(config, testSuite);
13730
14729
  if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
13731
14730
  const maybeUrl = config.providers[0]?.config?.url;
13732
14731
  if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) require_telemetry.telemetry.record("feature_used", { feature: "redteam_run_with_example" });
@@ -13774,7 +14773,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
13774
14773
  };
13775
14774
  testSuite.tests = await filterTests(testSuite, filterOptions);
13776
14775
  }
13777
- if (!require_server.neverGenerateRemote() && config.redteam && config.redteam.plugins && config.redteam.plugins.length > 0 && testSuite.tests && testSuite.tests.length > 0) {
14776
+ if (!require_remoteGeneration.neverGenerateRemote() && config.redteam && config.redteam.plugins && config.redteam.plugins.length > 0 && testSuite.tests && testSuite.tests.length > 0) {
13778
14777
  let hasValidEmail = false;
13779
14778
  while (!hasValidEmail) {
13780
14779
  const { emailNeedsValidation } = await require_accounts.promptForEmailUnverified();
@@ -13792,7 +14791,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
13792
14791
  process.exitCode = 1;
13793
14792
  return new Eval({}, { persisted: false });
13794
14793
  }
13795
- await require_providers.checkCloudPermissions(config);
14794
+ await require_storage.checkCloudPermissions(config);
13796
14795
  const options = {
13797
14796
  ...evaluateOptions,
13798
14797
  showProgressBar: require_logger.getLogLevel() === "debug" ? false : cmdObj.progressBar === void 0 ? evaluateOptions.showProgressBar === void 0 ? true : evaluateOptions.showProgressBar : cmdObj.progressBar !== false,
@@ -13834,7 +14833,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
13834
14833
  ${zod.z.prettifyError(testSuiteSchema.error)}
13835
14834
 
13836
14835
  Please review your promptfooconfig.yaml configuration.`));
13837
- const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, { runtimeOptions: options }) : new Eval(config, { runtimeOptions: options });
14836
+ const author = require_accounts.getAuthor();
14837
+ const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
14838
+ author,
14839
+ runtimeOptions: options
14840
+ }) : new Eval(config, {
14841
+ author,
14842
+ runtimeOptions: options
14843
+ });
13838
14844
  const abortController = new AbortController();
13839
14845
  const previousAbortSignal = evaluateOptions.abortSignal;
13840
14846
  evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
@@ -13945,7 +14951,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
13945
14951
  const paths = (Array.isArray(outputPath) ? outputPath : [outputPath]).filter((p) => typeof p === "string" && p.length > 0 && !p.endsWith(".jsonl"));
13946
14952
  const isRedteam = Boolean(config.redteam);
13947
14953
  const duration = Math.round((Date.now() - startTime) / 1e3);
13948
- const tracker = require_providers.TokenUsageTracker.getInstance();
14954
+ const tracker = require_shared.TokenUsageTracker.getInstance();
13949
14955
  const targetErrorStatus = await evalRecord.findTargetErrorStatus();
13950
14956
  const summaryLines = generateEvalSummary({
13951
14957
  evalId: evalRecord.id,
@@ -13975,7 +14981,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
13975
14981
  } else for (const line of summaryLines) require_logger.logger.info(line);
13976
14982
  let shareableUrl = null;
13977
14983
  if (sharePromise != null) {
13978
- const orgContext = await require_providers.getOrgContext();
14984
+ const orgContext = await require_storage.getOrgContext();
13979
14985
  const orgSuffix = orgContext ? ` to ${orgContext.organizationName}${orgContext.teamName ? ` > ${orgContext.teamName}` : ""}` : "";
13980
14986
  if (process.stdout.isTTY && !require_logger.isCI()) {
13981
14987
  const spinner = (0, ora.default)({
@@ -14150,7 +15156,7 @@ async function doRedteamRun(options) {
14150
15156
  redteamPath = path.join(configDir, "redteam.yaml");
14151
15157
  }
14152
15158
  try {
14153
- const healthUrl = require_server.getRemoteHealthUrl();
15159
+ const healthUrl = require_remoteGeneration.getRemoteHealthUrl();
14154
15160
  if (healthUrl) {
14155
15161
  require_logger.logger.debug(`Checking Promptfoo API health at ${healthUrl}...`);
14156
15162
  const healthResult = await checkRemoteHealth(healthUrl);
@@ -14236,65 +15242,175 @@ async function doRedteamRun(options) {
14236
15242
  return evalResult;
14237
15243
  }
14238
15244
  //#endregion
15245
+ //#region src/types/transform.ts
15246
+ /** Runtime type guard for `TransformFunction` values. */
15247
+ function isTransformFunction(value) {
15248
+ return typeof value === "function";
15249
+ }
15250
+ //#endregion
14239
15251
  //#region src/index.ts
15252
+ /**
15253
+ * Shallow-clone a test case so the caller can swap in resolved ApiProvider
15254
+ * instances on `options.provider` / `assert[].provider` without leaking those
15255
+ * mutations back to the input. The input may alias the unified config written
15256
+ * to the Eval record, and a live SDK client (e.g. Bedrock's BedrockRuntime,
15257
+ * Anthropic's client) holds circular references that break drizzle's JSON
15258
+ * serialization on `evalRecord.save()`. Fixes #8687.
15259
+ *
15260
+ * Detaches only `options` and `assert[]`. Other reference fields (`provider`,
15261
+ * `vars`, `metadata`, `providerOutput`) remain aliased — callers must reassign
15262
+ * those by reference rather than mutating in place. `assert-set` children are
15263
+ * not deep-cloned because the resolve loop skips `assert-set`; if that ever
15264
+ * changes, extend this helper.
15265
+ */
15266
+ function cloneTestForResolve(test) {
15267
+ const cloned = { ...test };
15268
+ if (test.options) cloned.options = { ...test.options };
15269
+ if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
15270
+ return cloned;
15271
+ }
15272
+ function toSerializableProviderRef(provider) {
15273
+ if (require_types.isApiProvider(provider)) return require_evalResult.sanitizeProvider(provider);
15274
+ if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
15275
+ return provider;
15276
+ }
15277
+ function isRecord(value) {
15278
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
15279
+ }
15280
+ function withSerializableProvider(record) {
15281
+ if (!require_types.isApiProvider(record.provider)) return record;
15282
+ return {
15283
+ ...record,
15284
+ provider: require_evalResult.sanitizeProvider(record.provider)
15285
+ };
15286
+ }
15287
+ /**
15288
+ * Function-valued transforms are first-class at runtime but are silently dropped
15289
+ * by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
15290
+ * a function reference, so replace every `transform`-like field with a
15291
+ * `[inline function]: name` marker. Non-function values pass through unchanged.
15292
+ *
15293
+ * `droppedRef.value` is flipped to `true` the first time a function is replaced
15294
+ * so the caller can emit a single warning instead of logging per field.
15295
+ */
15296
+ function replaceFunctionTransforms(record, droppedRef) {
15297
+ let result;
15298
+ for (const key of require_transform$1.TRANSFORM_KEYS) {
15299
+ const value = record[key];
15300
+ if (!isTransformFunction(value)) continue;
15301
+ if (!result) result = { ...record };
15302
+ result[key] = value.name ? `${require_transform$1.INLINE_FUNCTION_LABEL}: ${value.name}` : require_transform$1.INLINE_FUNCTION_LABEL;
15303
+ droppedRef.value = true;
15304
+ }
15305
+ return result ?? record;
15306
+ }
15307
+ function toSerializableAssertion(assertion, droppedRef) {
15308
+ if (!isRecord(assertion)) return assertion;
15309
+ let sanitizedAssertion = withSerializableProvider(assertion);
15310
+ sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
15311
+ if (Array.isArray(assertion.assert)) sanitizedAssertion = {
15312
+ ...sanitizedAssertion,
15313
+ assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
15314
+ };
15315
+ return sanitizedAssertion;
15316
+ }
15317
+ function toSerializableTestCase(test, droppedRef) {
15318
+ if (!isRecord(test)) return test;
15319
+ let sanitizedTest = withSerializableProvider(test);
15320
+ if (isRecord(test.options)) {
15321
+ let options = withSerializableProvider(test.options);
15322
+ options = replaceFunctionTransforms(options, droppedRef);
15323
+ if (options !== test.options) sanitizedTest = {
15324
+ ...sanitizedTest,
15325
+ options
15326
+ };
15327
+ }
15328
+ if (Array.isArray(test.assert)) sanitizedTest = {
15329
+ ...sanitizedTest,
15330
+ assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
15331
+ };
15332
+ return sanitizedTest;
15333
+ }
15334
+ function toSerializableScenario(scenario, droppedRef) {
15335
+ if (!isRecord(scenario)) return scenario;
15336
+ if (!Array.isArray(scenario.tests)) return scenario;
15337
+ return {
15338
+ ...scenario,
15339
+ tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
15340
+ };
15341
+ }
15342
+ function createSerializableUnifiedConfig(testSuite, prompts) {
15343
+ const droppedRef = { value: false };
15344
+ const config = {
15345
+ ...testSuite,
15346
+ providers: toSerializableProviderRef(testSuite.providers),
15347
+ defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
15348
+ tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
15349
+ scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
15350
+ prompts
15351
+ };
15352
+ if (droppedRef.value && testSuite.writeLatestResults) require_logger.logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
15353
+ return config;
15354
+ }
14240
15355
  async function evaluate(testSuite, options = {}) {
14241
- if (testSuite.writeLatestResults) await runDbMigrations();
14242
- const loadedProviders = await require_providers.loadApiProviders(testSuite.providers, { env: testSuite.env });
15356
+ const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
15357
+ if (testSuiteConfig.writeLatestResults) await runDbMigrations();
15358
+ const loadedProviders = await require_providers.loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
14243
15359
  const providerMap = {};
14244
15360
  for (const p of loadedProviders) {
14245
15361
  providerMap[p.id()] = p;
14246
15362
  if (p.label) providerMap[p.label] = p;
14247
15363
  }
14248
- let resolvedDefaultTest = testSuite.defaultTest;
14249
- if (typeof testSuite.defaultTest === "string" && testSuite.defaultTest.startsWith("file://")) resolvedDefaultTest = await require_util.maybeLoadFromExternalFile(testSuite.defaultTest);
15364
+ let resolvedDefaultTest = testSuiteConfig.defaultTest;
15365
+ if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await require_util.maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
14250
15366
  const constructedTestSuite = {
14251
- ...testSuite,
15367
+ ...testSuiteConfig,
14252
15368
  defaultTest: resolvedDefaultTest,
14253
- scenarios: testSuite.scenarios,
15369
+ scenarios: testSuiteConfig.scenarios,
14254
15370
  providers: loadedProviders,
14255
- tests: await readTests(testSuite.tests),
14256
- nunjucksFilters: await require_util.readFilters(testSuite.nunjucksFilters || {}),
14257
- prompts: await require_graders.processPrompts(testSuite.prompts)
14258
- };
14259
- if (typeof constructedTestSuite.defaultTest === "object") {
14260
- if (constructedTestSuite.defaultTest?.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
14261
- env: testSuite.env,
15371
+ tests: await readTests(testSuiteConfig.tests),
15372
+ nunjucksFilters: await require_util.readFilters(testSuiteConfig.nunjucksFilters || {}),
15373
+ prompts: await require_graders.processPrompts(testSuiteConfig.prompts)
15374
+ };
15375
+ if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
15376
+ constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
15377
+ if (constructedTestSuite.defaultTest.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
15378
+ env: testSuiteConfig.env,
14262
15379
  basePath: require_logger.state.basePath
14263
15380
  });
14264
- if (constructedTestSuite.defaultTest?.options?.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
14265
- env: testSuite.env,
15381
+ if (constructedTestSuite.defaultTest.options?.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
15382
+ env: testSuiteConfig.env,
14266
15383
  basePath: require_logger.state.basePath
14267
15384
  });
14268
15385
  }
14269
- for (const test of constructedTestSuite.tests || []) {
15386
+ constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
15387
+ for (const test of constructedTestSuite.tests) {
14270
15388
  if (test.options?.provider && !require_types.isApiProvider(test.options.provider)) test.options.provider = await require_providers.resolveProvider(test.options.provider, providerMap, {
14271
- env: testSuite.env,
15389
+ env: testSuiteConfig.env,
14272
15390
  basePath: require_logger.state.basePath
14273
15391
  });
14274
- if (test.assert) for (const assertion of test.assert) {
15392
+ for (const assertion of test.assert || []) {
14275
15393
  if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
14276
15394
  if (assertion.provider && !require_types.isApiProvider(assertion.provider)) assertion.provider = await require_providers.resolveProvider(assertion.provider, providerMap, {
14277
- env: testSuite.env,
15395
+ env: testSuiteConfig.env,
14278
15396
  basePath: require_logger.state.basePath
14279
15397
  });
14280
15398
  }
14281
15399
  }
14282
15400
  if (options.cache === false) require_cache.disableCache();
14283
- const parsedProviderPromptMap = require_graders.readProviderPromptMap(testSuite, constructedTestSuite.prompts);
14284
- const unifiedConfig = {
14285
- ...testSuite,
14286
- prompts: constructedTestSuite.prompts
14287
- };
14288
- const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
15401
+ const parsedProviderPromptMap = require_graders.readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
15402
+ const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
15403
+ const author = require_accounts.getAuthor(suiteAuthor);
15404
+ const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
14289
15405
  const ret = await evaluate$1({
14290
15406
  ...constructedTestSuite,
14291
15407
  providerPromptMap: parsedProviderPromptMap
14292
15408
  }, evalRecord, {
14293
15409
  eventSource: "library",
14294
- isRedteam: Boolean(testSuite.redteam),
15410
+ isRedteam: Boolean(testSuiteConfig.redteam),
14295
15411
  ...options
14296
15412
  });
14297
- if (testSuite.writeLatestResults && testSuite.sharing) if (isSharingEnabled(ret)) try {
15413
+ if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
14298
15414
  const shareableUrl = await createShareableUrl(ret, { silent: true });
14299
15415
  if (shareableUrl) {
14300
15416
  ret.shareableUrl = shareableUrl;
@@ -14305,9 +15421,9 @@ async function evaluate(testSuite, options = {}) {
14305
15421
  require_logger.logger.warn(`Failed to create shareable URL: ${error}`);
14306
15422
  }
14307
15423
  else require_logger.logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
14308
- if (testSuite.outputPath) {
14309
- if (typeof testSuite.outputPath === "string") await require_util.writeOutput(testSuite.outputPath, evalRecord, null);
14310
- else if (Array.isArray(testSuite.outputPath)) await require_util.writeMultipleOutputs(testSuite.outputPath, evalRecord, null);
15424
+ if (testSuiteConfig.outputPath) {
15425
+ if (typeof testSuiteConfig.outputPath === "string") await require_util.writeOutput(testSuiteConfig.outputPath, evalRecord, null);
15426
+ else if (Array.isArray(testSuiteConfig.outputPath)) await require_util.writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
14311
15427
  }
14312
15428
  return ret;
14313
15429
  }
@@ -14319,7 +15435,7 @@ const redteam = {
14319
15435
  },
14320
15436
  Graders: require_graders.GRADERS,
14321
15437
  Plugins,
14322
- Strategies: require_providers.Strategies,
15438
+ Strategies: require_strategies.Strategies,
14323
15439
  Base: {
14324
15440
  Plugin: require_graders.RedteamPluginBase,
14325
15441
  Grader: require_graders.RedteamGraderBase
@@ -14348,9 +15464,18 @@ exports.CompletedPromptSchema = require_types.CompletedPromptSchema;
14348
15464
  exports.CompletionTokenDetailsSchema = require_types.CompletionTokenDetailsSchema;
14349
15465
  exports.ConversationMessageSchema = require_types.ConversationMessageSchema;
14350
15466
  exports.DerivedMetricSchema = require_types.DerivedMetricSchema;
15467
+ exports.DocumentMediaInjectionPlacementSchema = require_types.DocumentMediaInjectionPlacementSchema;
15468
+ exports.DocumentMediaInjectionPlacementValues = require_types.DocumentMediaInjectionPlacementValues;
15469
+ exports.DocxInjectionPlacementSchema = require_types.DocxInjectionPlacementSchema;
15470
+ exports.DocxInjectionPlacementValues = require_types.DocxInjectionPlacementValues;
14351
15471
  exports.EvalResultsFilterMode = require_types.EvalResultsFilterMode;
14352
15472
  exports.EvaluateOptionsSchema = require_types.EvaluateOptionsSchema;
14353
15473
  exports.GradingConfigSchema = require_types.GradingConfigSchema;
15474
+ exports.InputConfigSchema = require_types.InputConfigSchema;
15475
+ exports.InputDefinitionObjectSchema = require_types.InputDefinitionObjectSchema;
15476
+ exports.InputDefinitionSchema = require_types.InputDefinitionSchema;
15477
+ exports.InputTypeSchema = require_types.InputTypeSchema;
15478
+ exports.InputTypeValues = require_types.InputTypeValues;
14354
15479
  exports.InputsSchema = require_types.InputsSchema;
14355
15480
  exports.NotPrefixedAssertionTypesSchema = require_types.NotPrefixedAssertionTypesSchema;
14356
15481
  exports.OutputConfigSchema = require_types.OutputConfigSchema;
@@ -14373,6 +15498,7 @@ exports.TestSuiteSchema = require_types.TestSuiteSchema;
14373
15498
  exports.UnifiedConfigSchema = require_types.UnifiedConfigSchema;
14374
15499
  exports.VarsSchema = require_types.VarsSchema;
14375
15500
  exports.assertions = assertions_default;
15501
+ exports.buildInputPromptDescription = require_types.buildInputPromptDescription;
14376
15502
  Object.defineProperty(exports, "cache", {
14377
15503
  enumerable: true,
14378
15504
  get: function() {
@@ -14382,12 +15508,17 @@ Object.defineProperty(exports, "cache", {
14382
15508
  exports.default = src_default;
14383
15509
  exports.evaluate = evaluate;
14384
15510
  exports.generateTable = generateTable;
15511
+ exports.getInputDescription = require_types.getInputDescription;
15512
+ exports.getInputType = require_types.getInputType;
14385
15513
  exports.guardrails = guardrails;
14386
15514
  exports.isApiProvider = require_types.isApiProvider;
14387
15515
  exports.isGradingResult = require_types.isGradingResult;
14388
15516
  exports.isProviderOptions = require_types.isProviderOptions;
14389
15517
  exports.isResultFailureReason = require_types.isResultFailureReason;
15518
+ exports.isTransformFunction = isTransformFunction;
14390
15519
  exports.loadApiProvider = require_providers.loadApiProvider;
15520
+ exports.normalizeInputDefinition = require_types.normalizeInputDefinition;
15521
+ exports.normalizeInputs = require_types.normalizeInputs;
14391
15522
  exports.redteam = redteam;
14392
15523
 
14393
15524
  //# sourceMappingURL=index.cjs.map