promptfoo 0.121.5 → 0.121.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (490) hide show
  1. package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
  2. package/dist/src/{accounts-CaLNYnf7.js → accounts-Ca7WIoPY.js} +12 -7
  3. package/dist/src/{accounts-CLJHCDDb.js → accounts-CjFnOPmb.js} +13 -8
  4. package/dist/src/{accounts-BIFntVWB.cjs → accounts-CmWzeD2d.cjs} +16 -10
  5. package/dist/src/{accounts-bnyHT7Ju.js → accounts-DanM1wq_.js} +12 -7
  6. package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-CJ0j3fBi.js} +2 -2
  7. package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-DDEGRV9v.js} +2 -2
  8. package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-DvPWSUpb.cjs} +8 -7
  9. package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-TxUEMPYS.js} +2 -2
  10. package/dist/src/{agents-WULPVjbH.cjs → agents-B4sRuXg3.cjs} +7 -6
  11. package/dist/src/{agents-DhxWMCtH.js → agents-B8q7h_ek.js} +4 -4
  12. package/dist/src/{agents-BGqaTDnr.js → agents-CBgJvRkB.js} +20 -9
  13. package/dist/src/{agents-n6vPqV3i.js → agents-CYn2n3QP.js} +4 -4
  14. package/dist/src/{agents-BV9yFpXX.js → agents-D-vDNFx4.js} +20 -9
  15. package/dist/src/{agents-BYdMl1UE.js → agents-LrHuQqr1.js} +20 -9
  16. package/dist/src/{agents-emVcx3yh.js → agents-QGg76OF-.js} +2 -2
  17. package/dist/src/{agents-DiWmQYH9.cjs → agents-eHZ9nlgA.cjs} +21 -10
  18. package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-CJEbQ0o6.js} +6 -6
  19. package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-D5HXzZ0s.js} +6 -6
  20. package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-T6HGNxNe.cjs} +7 -7
  21. package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-eYv3a_DK.js} +6 -6
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-BNHJKN35.js +1 -0
  24. package/dist/src/app/assets/index-BnT6P6sF.js +388 -0
  25. package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
  26. package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
  27. package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
  28. package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
  29. package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
  30. package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
  31. package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
  32. package/dist/src/app/index.html +6 -6
  33. package/dist/src/{audio-Da8U9IS5.js → audio-BqnRvcWG.js} +3 -3
  34. package/dist/src/{audio-BvpTOArF.js → audio-CPMtV1yR.js} +3 -3
  35. package/dist/src/{audio-CScmnmEB.js → audio-DyiebVB3.js} +3 -3
  36. package/dist/src/{audio-C0vDeS0j.cjs → audio-FnxbEnSE.cjs} +4 -4
  37. package/dist/src/authoritativeMarkupInjection-BZIywVjG.js +74 -0
  38. package/dist/src/authoritativeMarkupInjection-DyAXAsSr.js +75 -0
  39. package/dist/src/authoritativeMarkupInjection-F2gBw0lN.cjs +74 -0
  40. package/dist/src/authoritativeMarkupInjection-QEQmFS83.js +74 -0
  41. package/dist/src/{base-BTux96b1.js → base-CKLo890h.js} +4 -3
  42. package/dist/src/{base-BOMaNEes.js → base-Co80MMCi.js} +4 -3
  43. package/dist/src/{base-Tw6uhH8K.cjs → base-DGJW48uz.cjs} +5 -4
  44. package/dist/src/{base-dYsl2hmL.js → base-E9I8zXjz.js} +4 -3
  45. package/dist/src/bestOfN-B3wNzjSB.js +137 -0
  46. package/dist/src/bestOfN-BBsO41z4.js +136 -0
  47. package/dist/src/bestOfN-CAwmg5UL.cjs +140 -0
  48. package/dist/src/bestOfN-_kTi8Bxe.js +136 -0
  49. package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
  50. package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
  51. package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
  52. package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
  53. package/dist/src/{cache-DGg-yTZG.cjs → cache-CPGUA4Yl.cjs} +135 -25
  54. package/dist/src/cache-Cf7b4pWE.js +3 -0
  55. package/dist/src/{cache-Bzttsk0X.js → cache-DIXbtkNO.js} +125 -10
  56. package/dist/src/{cache-BI5BY7ey.js → cache-DpPWrkTE.js} +127 -11
  57. package/dist/src/{cache-Cr-qWIbP.js → cache-roFAE0cI.js} +125 -10
  58. package/dist/src/{chat-DChSH_Es.js → chat-CUCorGiL.js} +9 -9
  59. package/dist/src/{chat-DH97tVV9.cjs → chat-DG1wG4w0.cjs} +6 -6
  60. package/dist/src/{chat-Cx_LkwvZ.js → chat-Dabu84Br.js} +11 -11
  61. package/dist/src/{chat-BLOdH60v.js → chat-DqUFcWI0.js} +11 -11
  62. package/dist/src/{chat-vYqqv1gP.cjs → chat-DxTDQ83C.cjs} +14 -13
  63. package/dist/src/{chat-DG2LkwLq.js → chat-GmlolEwo.js} +4 -4
  64. package/dist/src/{chat-aMQZw6R7.js → chat-TP8Qifkh.js} +4 -4
  65. package/dist/src/{chat-D9nudO9b.js → chat-iwaM5UTQ.js} +4 -4
  66. package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
  67. package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
  68. package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
  69. package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
  70. package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-BQNp_y-F.js} +209 -64
  71. package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-D5Jl0SDh.js} +210 -65
  72. package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-DH416NBD.cjs} +216 -70
  73. package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-x1XJ1-pU.js} +210 -65
  74. package/dist/src/{cloud-Da0bofJd.js → cloud-D3DiFqH6.js} +2 -2
  75. package/dist/src/cloud-p96PA4MH.js +3 -0
  76. package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-B6NVI3ax.js} +4 -4
  77. package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-CEAW-xQa.cjs} +6 -6
  78. package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-RFSojyXG.js} +4 -4
  79. package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-r4tbYmWU.js} +4 -4
  80. package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-BCkLouto.js} +3 -3
  81. package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-BaZ4insB.js} +3 -3
  82. package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-CF-Vb-2Z.js} +3 -3
  83. package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-TJMLBj6I.cjs} +5 -5
  84. package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-B8KHEiF4.js} +5 -5
  85. package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-CnrLBCeA.cjs} +12 -11
  86. package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-DIXZ230V.js} +4 -4
  87. package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-Dd22dC_N.js} +5 -5
  88. package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-B6Wah8Pa.js} +5 -5
  89. package/dist/src/codex-sdk-BGjVAk23.js +3 -0
  90. package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-CFF6gUyi.cjs} +18 -10
  91. package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-CmQABzV3.js} +3 -3
  92. package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-D2d54RL8.js} +5 -5
  93. package/dist/src/{cometapi-B5ImDlSm.js → cometapi-Bu9B8NUY.js} +7 -7
  94. package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CtzNCHKu.js} +7 -7
  95. package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-DHCDlQUI.cjs} +8 -8
  96. package/dist/src/{cometapi-CC7hWxmX.js → cometapi-OBILPLlu.js} +7 -7
  97. package/dist/src/{completion-Vq_ad618.js → completion-CO2e1_62.js} +4 -4
  98. package/dist/src/{completion-DtQ72Bm3.cjs → completion-CSYfl2cd.cjs} +6 -6
  99. package/dist/src/{completion-2iuYVxwi.js → completion-DZNxcyfG.js} +5 -5
  100. package/dist/src/{completion-CrD6MQ93.js → completion-sNvCLTAP.js} +5 -5
  101. package/dist/src/constants-BjJV0cRr.js +6 -0
  102. package/dist/src/constants-DH5XYLKZ.js +7 -0
  103. package/dist/src/constants-DZGEFLsu.js +6 -0
  104. package/dist/src/constants-a2kYssQk.cjs +11 -0
  105. package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
  106. package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
  107. package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
  108. package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
  109. package/dist/src/crescendo-BXEJK_bi.cjs +704 -0
  110. package/dist/src/crescendo-CU_Y2i-m.js +702 -0
  111. package/dist/src/crescendo-J1Xx4_zb.js +703 -0
  112. package/dist/src/crescendo-QiaSLW0d.js +701 -0
  113. package/dist/src/custom-BJfP00Bh.js +619 -0
  114. package/dist/src/custom-CZVn-1-r.js +620 -0
  115. package/dist/src/custom-Cqia7M0D.cjs +621 -0
  116. package/dist/src/custom-notggYVl.js +618 -0
  117. package/dist/src/{docker--3qzPa-6.js → docker-4D1eL6Gq.js} +5 -5
  118. package/dist/src/{docker-Dorv4_Dg.js → docker-BBv1WUDu.js} +5 -5
  119. package/dist/src/{docker-D3AY-5F5.cjs → docker-D06JUoe2.cjs} +6 -6
  120. package/dist/src/{docker-DCsCDvwM.js → docker-DdJQBxK9.js} +5 -5
  121. package/dist/src/{embedding-DNRvZwRN.js → embedding--UZVe4_7.js} +5 -5
  122. package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BbrwopfX.cjs} +6 -6
  123. package/dist/src/{embedding-ChS1ivFS.js → embedding-Bi3rxrZF.js} +5 -5
  124. package/dist/src/{embedding-D_bI4NDq.js → embedding-C251p1-8.js} +4 -4
  125. package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
  126. package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
  127. package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
  128. package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
  129. package/dist/src/{eval-DJ_4A-tr.js → eval-0VRANImH.js} +19 -19
  130. package/dist/src/{eval-BQPLBJbw.js → eval-DscR5iOM.js} +1 -1
  131. package/dist/src/{evalResult-pSvGWFMo.js → evalResult-2RRJvFyB.js} +18 -11
  132. package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-CvtS8h8u.cjs} +29 -11
  133. package/dist/src/evalResult-DqzsS6_W.js +3 -0
  134. package/dist/src/{evalResult-D6P5I5il.js → evalResult-eUkJv9Ko.js} +17 -10
  135. package/dist/src/evaluator-DNdJF1Gv.js +3 -0
  136. package/dist/src/{evaluator-D-UIbbYq.js → evaluator-DRoiYB2q.js} +258 -132
  137. package/dist/src/evaluatorHelpers-BsYP_muT.js +511 -0
  138. package/dist/src/evaluatorHelpers-CRqTvSux.cjs +537 -0
  139. package/dist/src/evaluatorHelpers-DuqFFfq7.js +510 -0
  140. package/dist/src/{extractor-YlZbUMsL.js → extractor-BR7XAzAL.js} +5 -5
  141. package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-BdxEtt3J.cjs} +6 -6
  142. package/dist/src/{extractor-DxyiFhPk.js → extractor-CIW3iN-b.js} +5 -5
  143. package/dist/src/{extractor-BM3jRERL.js → extractor-CxRtnaHl.js} +5 -5
  144. package/dist/src/{fetch-Y5qX_kST.js → fetch-BufrQtvR.js} +90 -26
  145. package/dist/src/{fetch-B6ch2nU2.js → fetch-DXUnXkVU.js} +86 -26
  146. package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Dw4XZHjj.cjs} +115 -32
  147. package/dist/src/{fetch-D9xxyC1p.js → fetch-It34O8Ur.js} +90 -26
  148. package/dist/src/fetch-_YgGd2qv.js +3 -0
  149. package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
  150. package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
  151. package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
  152. package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
  153. package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
  154. package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
  155. package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
  156. package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
  157. package/dist/src/goat-Ckd3q3AY.js +467 -0
  158. package/dist/src/goat-Qgurm-NP.js +466 -0
  159. package/dist/src/goat-ghadEDdy.js +465 -0
  160. package/dist/src/goat-una6pZGP.cjs +469 -0
  161. package/dist/src/graders-BDT7dif6.js +3 -0
  162. package/dist/src/{graders-CgPn32yp.js → graders-BGP99PdK.js} +1017 -84
  163. package/dist/src/{graders-BoUqsCEm.js → graders-BX0f2tvS.js} +1022 -84
  164. package/dist/src/{graders-CwrbifOo.js → graders-C0nXU_ZP.js} +1020 -82
  165. package/dist/src/{graders-Bw1wk_21.cjs → graders-ClrU2fnd.cjs} +1085 -128
  166. package/dist/src/hydra-BSNZZm2M.js +543 -0
  167. package/dist/src/hydra-BxdG4nkg.js +541 -0
  168. package/dist/src/hydra-DE4xWwyc.js +542 -0
  169. package/dist/src/hydra-DrJttnvw.cjs +542 -0
  170. package/dist/src/image-B4oBtu6J.js +443 -0
  171. package/dist/src/{image-Dr_3I3nK.js → image-BN-hjLL9.js} +3 -3
  172. package/dist/src/{image-BeWaInPF.js → image-B_fPIwdg.js} +3 -3
  173. package/dist/src/image-BvUAW344.js +442 -0
  174. package/dist/src/image-Cvjwx1uY.js +442 -0
  175. package/dist/src/{image-D10dNAav.cjs → image-DfVCGPbI.cjs} +4 -4
  176. package/dist/src/{image-qjO6FWPs.js → image-QzmydkiG.js} +3 -3
  177. package/dist/src/image-X0oY4350.cjs +465 -0
  178. package/dist/src/index.cjs +688 -313
  179. package/dist/src/index.d.cts +3152 -1617
  180. package/dist/src/index.d.ts +3151 -1616
  181. package/dist/src/index.js +582 -223
  182. package/dist/src/indirectWebPwn-02ZIghCS.js +259 -0
  183. package/dist/src/indirectWebPwn-BJ22AbQa.cjs +397 -0
  184. package/dist/src/indirectWebPwn-CbjUG0rh.js +385 -0
  185. package/dist/src/indirectWebPwn-CfQJt3gk.cjs +260 -0
  186. package/dist/src/indirectWebPwn-DBQhOjoD.js +260 -0
  187. package/dist/src/indirectWebPwn-OsXnKejv.js +259 -0
  188. package/dist/src/indirectWebPwn-tNx9OZ35.js +385 -0
  189. package/dist/src/indirectWebPwn-uyWdHx04.js +386 -0
  190. package/dist/src/inputVariables-B0qUChbV.js +467 -0
  191. package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
  192. package/dist/src/inputVariables-DXFdi7AI.js +468 -0
  193. package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
  194. package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
  195. package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
  196. package/dist/src/iterative-CpU6i2As.js +490 -0
  197. package/dist/src/iterative-DJQEQpG3.js +491 -0
  198. package/dist/src/iterative-DQBuWM-j.cjs +493 -0
  199. package/dist/src/iterative-FTS4Bz67.js +492 -0
  200. package/dist/src/iterativeImage-BUABMVOA.js +413 -0
  201. package/dist/src/iterativeImage-ByFWkxax.cjs +415 -0
  202. package/dist/src/iterativeImage-BzUapOUi.js +414 -0
  203. package/dist/src/iterativeImage-Doz8mgxF.js +413 -0
  204. package/dist/src/iterativeMeta-B3YiAOc8.js +386 -0
  205. package/dist/src/iterativeMeta-C7APE_P1.js +385 -0
  206. package/dist/src/iterativeMeta-CSS8M6Ds.cjs +385 -0
  207. package/dist/src/iterativeMeta-DgoQ7bLh.js +384 -0
  208. package/dist/src/iterativeTree-B5zxBBSW.js +769 -0
  209. package/dist/src/iterativeTree-CNyIk0Yn.js +768 -0
  210. package/dist/src/iterativeTree-CPMF10ve.cjs +771 -0
  211. package/dist/src/iterativeTree-DvZ7GBwt.js +770 -0
  212. package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-BadkINlJ.js} +24 -10
  213. package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-Bi_8sV-H.js} +23 -9
  214. package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-CkMljjdg.js} +24 -10
  215. package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-DUh34xba.cjs} +25 -11
  216. package/dist/src/{litellm-DRc4qWfc.js → litellm-BKBo0jpC.js} +4 -4
  217. package/dist/src/{litellm-BLSiANhk.js → litellm-BXyn5kZK.js} +4 -4
  218. package/dist/src/{litellm-DQGo_juI.js → litellm-CNcfbCfa.js} +4 -4
  219. package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CtAr7bKG.cjs} +5 -5
  220. package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
  221. package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BMX1iEB6.js} +5 -5
  222. package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-CR5TSpp4.js} +5 -5
  223. package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-D3FUc2K3.cjs} +9 -8
  224. package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-OEMmS1RB.js} +5 -5
  225. package/dist/src/main.js +704 -208
  226. package/dist/src/memoryPoisoning-CM83NWYl.js +107 -0
  227. package/dist/src/memoryPoisoning-D8h9gXJF.js +106 -0
  228. package/dist/src/memoryPoisoning-Dp-btinn.cjs +106 -0
  229. package/dist/src/memoryPoisoning-cLuCoTuJ.js +106 -0
  230. package/dist/src/{messages-CewuNcNS.js → messages-BabO-cX8.js} +17 -9
  231. package/dist/src/{messages-BnsVHUnm.cjs → messages-DBPir0TQ.cjs} +24 -15
  232. package/dist/src/{messages-B9dSjrNf.js → messages-DGUlSNU7.js} +18 -10
  233. package/dist/src/{messages-CI69Lasb.js → messages-vsE_-Lv0.js} +18 -10
  234. package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
  235. package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
  236. package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
  237. package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
  238. package/dist/src/mischievousUser-0l8GD7Dp.js +46 -0
  239. package/dist/src/mischievousUser-BUOP9W5r.js +46 -0
  240. package/dist/src/mischievousUser-frFYKxu6.js +47 -0
  241. package/dist/src/mischievousUser-olGgHIVR.cjs +46 -0
  242. package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-CNV5bMSk.cjs} +7 -7
  243. package/dist/src/{modelslab-zpz9JcK0.js → modelslab-Cogmu4mG.js} +6 -6
  244. package/dist/src/{modelslab-D73OnKSx.js → modelslab-Dzst7VTU.js} +6 -6
  245. package/dist/src/{modelslab-BCLOtfek.js → modelslab-EyDczZ5A.js} +6 -6
  246. package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-BGPNBOMS.js} +5 -5
  247. package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-B_5NKFu1.js} +5 -5
  248. package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-C4eUJGse.js} +5 -5
  249. package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-CjJRxI1X.cjs} +9 -8
  250. package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-BNGmgfFz.js} +3 -3
  251. package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-ChPlh5na.js} +2 -2
  252. package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-CrV0iaY_.js} +3 -3
  253. package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-DuOG9Aun.cjs} +5 -4
  254. package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
  255. package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
  256. package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
  257. package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-BIHlu_36.js} +9 -8
  258. package/dist/src/{openclaw-DObVgpjC.js → openclaw-CF7fMido.js} +9 -8
  259. package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-Dphc01BY.cjs} +17 -15
  260. package/dist/src/{openclaw-DA8U4DsD.js → openclaw-zIJAsz3P.js} +9 -8
  261. package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-B3vlPLsp.js} +38 -3
  262. package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-D05JSgMQ.js} +39 -4
  263. package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-DoY6GbWw.cjs} +45 -9
  264. package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-sRKYHGoI.js} +39 -4
  265. package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
  266. package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
  267. package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
  268. package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
  269. package/dist/src/packageParser--MWTSrPW.js +36 -0
  270. package/dist/src/packageParser-CgE-ziRo.js +35 -0
  271. package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
  272. package/dist/src/packageParser-hwwSGnAZ.js +35 -0
  273. package/dist/src/processShim-BBxt7LKO.js +95 -0
  274. package/dist/src/processShim-BcGzU8fY.js +94 -0
  275. package/dist/src/processShim-C_z3aRvF.js +94 -0
  276. package/dist/src/processShim-DSY9BV2T.cjs +98 -0
  277. package/dist/src/promptLength-0qIHyhA5.js +71 -0
  278. package/dist/src/promptLength-4X-Wd8PG.js +72 -0
  279. package/dist/src/promptLength-B9nZEfO6.js +71 -0
  280. package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
  281. package/dist/src/promptfoo-BDrfT30-.js +180 -0
  282. package/dist/src/promptfoo-Cm4hiy1Y.js +180 -0
  283. package/dist/src/promptfoo-Rjp-MeBb.js +181 -0
  284. package/dist/src/promptfoo-b-baRMj-.cjs +205 -0
  285. package/dist/src/prompts-BYMtqPCw.js +259 -0
  286. package/dist/src/prompts-C-bqE1Yp.js +260 -0
  287. package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
  288. package/dist/src/prompts-DHhQsANy.js +259 -0
  289. package/dist/src/prompts-D_QpZ2Dm.js +271 -0
  290. package/dist/src/prompts-hNvWBD3z.cjs +284 -0
  291. package/dist/src/prompts-huDVH2CI.js +270 -0
  292. package/dist/src/prompts-p78Hul5i.cjs +289 -0
  293. package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
  294. package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
  295. package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
  296. package/dist/src/{providers-DT-GtF2t.js → providers-B9KzWxAX.js} +739 -11919
  297. package/dist/src/{providers-DRrerKra.js → providers-BCCz6_IX.js} +813 -11944
  298. package/dist/src/{providers-eDShy16E.cjs → providers-BDVVIQM6.cjs} +787 -12132
  299. package/dist/src/{providers-Ctcc592x.js → providers-BYAn82cf.js} +1 -1
  300. package/dist/src/{providers-CJh7iriU.js → providers-DVYRZP4E.js} +746 -11866
  301. package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
  302. package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
  303. package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
  304. package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
  305. package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-BAp6iTZD.cjs} +4 -4
  306. package/dist/src/{quiverai-CPKhWgaT.js → quiverai-BvIhI_0l.js} +3 -3
  307. package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CdTWPe-A.js} +3 -3
  308. package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-Cv7rJKDz.js} +3 -3
  309. package/dist/src/registry-BUJrgjwv.js +124 -0
  310. package/dist/src/registry-DXm1t_x0.js +125 -0
  311. package/dist/src/registry-Dp5EqoXc.js +124 -0
  312. package/dist/src/registry-KCVF1CFC.cjs +124 -0
  313. package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-B1_XsKXU.js} +16 -147
  314. package/dist/src/{server-gyd6d4Hc.js → remoteGeneration-COpWcmWd.js} +15 -108
  315. package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-DS9N3pgB.cjs} +30 -119
  316. package/dist/src/remoteGeneration-DsaSwmG2.js +217 -0
  317. package/dist/src/render-BNTrbmBw.cjs +384 -0
  318. package/dist/src/render-CSP99NLm.js +348 -0
  319. package/dist/src/render-DFfDeYUK.js +347 -0
  320. package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
  321. package/dist/src/render-_6ur1fhE.js +347 -0
  322. package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
  323. package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
  324. package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
  325. package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
  326. package/dist/src/{responses-CF-ayauu.cjs → responses-1UFFF9N_.cjs} +12 -11
  327. package/dist/src/{responses-B8haB-mD.js → responses-B3W2JvOQ.js} +9 -9
  328. package/dist/src/{responses-1ztiVYsx.js → responses-B6ktc3Ra.js} +7 -7
  329. package/dist/src/{responses-BiaBguAu.js → responses-URRzV8qE.js} +9 -9
  330. package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
  331. package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
  332. package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
  333. package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
  334. package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
  335. package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
  336. package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
  337. package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
  338. package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
  339. package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
  340. package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-CujrzP1a.js} +61 -50
  341. package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-DzffAqo_.cjs} +65 -53
  342. package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-vhtSV7JI.js} +61 -50
  343. package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-yr1QKeBs.js} +61 -50
  344. package/dist/src/{scanner-nOCWNIXa.js → scanner-DS0109SS.js} +6 -6
  345. package/dist/src/server/index.js +4147 -449
  346. package/dist/src/server-B8rqV126.cjs +126 -0
  347. package/dist/src/server-BaLytskk.js +3 -0
  348. package/dist/src/server-CMJD10J4.js +107 -0
  349. package/dist/src/server-Ddp8GNMp.js +146 -0
  350. package/dist/src/server-DhMHosWj.js +182 -0
  351. package/dist/src/shared-7pmVZLNO.js +1334 -0
  352. package/dist/src/shared-9WHQ1oNE.js +1335 -0
  353. package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
  354. package/dist/src/shared-D6IjElRI.js +1334 -0
  355. package/dist/src/shared-WkgnDkcg.cjs +1436 -0
  356. package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
  357. package/dist/src/simulatedUser-C9aQObBI.js +222 -0
  358. package/dist/src/simulatedUser-Cu601Dd4.cjs +227 -0
  359. package/dist/src/simulatedUser-U_qAHnuB.js +222 -0
  360. package/dist/src/simulatedUser-p3tACcmw.js +223 -0
  361. package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
  362. package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
  363. package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
  364. package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
  365. package/dist/src/storage-BU4qcnOb.js +875 -0
  366. package/dist/src/storage-CA-v9V2v.cjs +911 -0
  367. package/dist/src/storage-CD-GWAdx.js +822 -0
  368. package/dist/src/storage-QdU-SmvD.js +834 -0
  369. package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
  370. package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
  371. package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
  372. package/dist/src/store-IbiRIF3k.js +3 -0
  373. package/dist/src/strategies-7CS3Alao.cjs +2360 -0
  374. package/dist/src/strategies-CiSeroPH.js +2331 -0
  375. package/dist/src/strategies-DRJjGTIY.js +2333 -0
  376. package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
  377. package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
  378. package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
  379. package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
  380. package/dist/src/{telemetry-DWdGHvEf.js → telemetry-00ezXr_t.js} +4 -4
  381. package/dist/src/telemetry-ByPqDcKC.js +3 -0
  382. package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-CJ7FnCsc.cjs} +15 -9
  383. package/dist/src/{telemetry--iqaGyaS.js → telemetry-DmXYcJNV.js} +4 -4
  384. package/dist/src/{telemetry-CgdVGV8N.js → telemetry-DwX9XUN5.js} +4 -4
  385. package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
  386. package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
  387. package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
  388. package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
  389. package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
  390. package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
  391. package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
  392. package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
  393. package/dist/src/toolAttributes-COVgDrBG.js +87 -0
  394. package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
  395. package/dist/src/tracingOptions-BnwKCkSB.js +221 -0
  396. package/dist/src/tracingOptions-Chi74lOD.js +219 -0
  397. package/dist/src/tracingOptions-DrbSFaKy.cjs +249 -0
  398. package/dist/src/tracingOptions-ji2OuXbT.js +220 -0
  399. package/dist/src/{transcription-84t4ALo2.js → transcription-B8uIgCYX.js} +5 -5
  400. package/dist/src/{transcription-Bm2emLmJ.js → transcription-CfU5loSq.js} +5 -5
  401. package/dist/src/{transcription-D7Q0vJsh.js → transcription-Dkd22_4K.js} +4 -4
  402. package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-mzuf18Mq.cjs} +9 -8
  403. package/dist/src/{transform-DtooZqYY.js → transform-BIMynQsA.js} +8 -8
  404. package/dist/src/transform-BnSTnFlp.js +187 -0
  405. package/dist/src/transform-BnSXWmU_2.cjs +221 -0
  406. package/dist/src/transform-CGt7Kt3y2.js +186 -0
  407. package/dist/src/transform-CrPGTsij.js +186 -0
  408. package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DhNkAUs8.cjs} +12 -11
  409. package/dist/src/{transform-_DpNB4qp.js → transform-DmvYBRll.js} +8 -8
  410. package/dist/src/{transform-B-b6Cq-q.js → transform-EtD4jAWi.js} +8 -8
  411. package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
  412. package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
  413. package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
  414. package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
  415. package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
  416. package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
  417. package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
  418. package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
  419. package/dist/src/{util-DFPeFkiV.js → util--WMgw7wM.js} +28 -8
  420. package/dist/src/{util-C-kmRosx.js → util-5WnCSb0h.js} +9 -7
  421. package/dist/src/{util-A5_ZsQUn.cjs → util-BSIuSLVK.cjs} +12 -9
  422. package/dist/src/{util-Dub0f_ej.js → util-Bx677_k2.js} +17 -10
  423. package/dist/src/util-CN8om2rz.cjs +386 -0
  424. package/dist/src/{util-DN0-b81k.js → util-CoQWM76y.js} +28 -8
  425. package/dist/src/util-DNl96nNs.js +327 -0
  426. package/dist/src/{util-BQOCAHQC.js → util-DURocbYR.js} +46 -11
  427. package/dist/src/util-Df8YMvS1.js +327 -0
  428. package/dist/src/{util-BVXcTwXu.js → util-DiQ3QvBB.js} +28 -8
  429. package/dist/src/{util-3pBZZb_H.js → util-I-Rf-KaD.js} +45 -10
  430. package/dist/src/{util-Dpmm_dAI.cjs → util-IYzs5Y04.cjs} +33 -7
  431. package/dist/src/{util-BlFVL0UF.js → util-LKTmNsMQ.js} +9 -7
  432. package/dist/src/{util-DvpHnLt0.cjs → util-SPsvFONY.cjs} +29 -21
  433. package/dist/src/{util-B9CNhyac.js → util-efByNxcr.js} +9 -7
  434. package/dist/src/util-kDURhgJW.js +328 -0
  435. package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
  436. package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
  437. package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
  438. package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
  439. package/dist/src/{version-CbuBKu2U.js → version-BK20a4sw.js} +2 -2
  440. package/dist/src/{version-D9zu9FWB.cjs → version-BWCSaByA.cjs} +2 -2
  441. package/dist/src/{version-CbpiUINz.js → version-eRkNuGv8.js} +2 -2
  442. package/dist/src/{version-0frU0UTr.js → version-lpHV_53E.js} +2 -2
  443. package/dist/tsconfig.tsbuildinfo +1 -1
  444. package/package.json +48 -22
  445. package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
  446. package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
  447. package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
  448. package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
  449. package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
  450. package/dist/src/cache-BRkhlH3k.cjs +0 -3
  451. package/dist/src/cache-BlC6aeJ0.js +0 -3
  452. package/dist/src/cloud-CoD5OacT.js +0 -3
  453. package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
  454. package/dist/src/codex-sdk-Danroptg.cjs +0 -2
  455. package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
  456. package/dist/src/evalResult-BBK58h2B.js +0 -3
  457. package/dist/src/evalResult-spPqh1G_.js +0 -2
  458. package/dist/src/evaluator-DgLKaZk8.js +0 -3
  459. package/dist/src/fetch-8viavNv8.js +0 -3
  460. package/dist/src/graders-C84JI-m5.js +0 -2
  461. package/dist/src/graders-CBbd0K0Q.cjs +0 -2
  462. package/dist/src/graders-CbQqpHSN.js +0 -3
  463. package/dist/src/graders-DS42d3ZG.js +0 -2
  464. package/dist/src/image-BmilRNqO.js +0 -258
  465. package/dist/src/image-CxJoa3aW.cjs +0 -280
  466. package/dist/src/image-DsGRlkh7.js +0 -257
  467. package/dist/src/image-a_SGUobh.js +0 -257
  468. package/dist/src/providers-BuyzKt7C.js +0 -2
  469. package/dist/src/providers-C7lNVBjX.cjs +0 -3
  470. package/dist/src/providers-CCE2COJi2.js +0 -2
  471. package/dist/src/render-7uNJ2V14.js +0 -135
  472. package/dist/src/render-DlscvAUJ.js +0 -135
  473. package/dist/src/render-eui5p5mL.js +0 -136
  474. package/dist/src/render-tG6ir9_g.cjs +0 -165
  475. package/dist/src/rubyUtils-4hjGxvju.js +0 -3
  476. package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
  477. package/dist/src/server-ByiF3qlg.js +0 -386
  478. package/dist/src/server-C0XKRNB_.cjs +0 -2
  479. package/dist/src/server-C_15p79-.js +0 -3
  480. package/dist/src/store-2OXm_eBY.js +0 -240
  481. package/dist/src/store-BELqNwvz.js +0 -3
  482. package/dist/src/store-uQZ4AjPe.cjs +0 -2
  483. package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
  484. package/dist/src/telemetry-ZdPZc0fm.js +0 -3
  485. package/dist/src/transform-BQt0BeAW.js +0 -3
  486. package/dist/src/transform-Bq5oqC0s.cjs +0 -2
  487. package/dist/src/transform-C9izGX54.cjs +0 -228
  488. package/dist/src/transform-CwbAZ84V.js +0 -216
  489. package/dist/src/transform-DzCF-wqV.js +0 -213
  490. package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/index.js CHANGED
@@ -1,34 +1,47 @@
1
1
  import { C as getEnvFloat, D as getMaxEvalTimeMs, E as getEvalTimeoutMs, O as isCI, S as getEnvBool, T as getEnvString, a as logger, b as summarizeEvaluateResultForLogging, g as getAjv, h as extractJsonObjects, k as state, m as extractFirstJsonObject, n as globalLogCallback, o as setLogCallback, r as isDebugEnabled, s as setLogLevel, t as getLogLevel, v as orderKeys, w as getEnvInt, y as safeJsonStringify } from "./logger-Ct2S6Yx-.js";
2
2
  import { t as invariant } from "./invariant-Ddh24eXh.js";
3
- import { A as TERMINAL_MAX_WIDTH, F as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, P as FILE_METADATA_KEY, S as cloudConfig, _ as parseChatPrompt, a as getCurrentTimestamp, j as getDefaultShareViewBaseUrl, m as isPromptfooSampleTarget, n as fetchWithRetries, o as sleep, r as fetchWithTimeout, t as fetchWithProxy, u as REQUEST_TIMEOUT_MS, x as CloudConfig } from "./fetch-D9xxyC1p.js";
4
- import { n as VERSION } from "./version-CbuBKu2U.js";
5
- import { $ as categoryAliases, A as isApiProvider, At as CompletionTokenDetailsSchema, B as StrategyConfigSchema, C as TestGeneratorConfigSchema, Ct as TELECOM_PLUGINS, D as VarsSchema, E as UnifiedConfigSchema, F as ProvidersSchema, G as DEFAULT_STRATEGIES, H as isUuid, I as ConversationMessageSchema, J as STRATEGY_COLLECTION_MAPPINGS, L as PartialGenerationError, M as RedteamConfigSchema, N as PromptSchema, O as isGradingResult, Q as Severity, R as PluginConfigSchema, S as TestCasesWithMetadataSchema, St as TEEN_SAFETY_PLUGINS, T as TestSuiteSchema, Tt as CODING_AGENT_CORE_PLUGINS, Y as getDefaultNFanout, Z as isFanoutStrategy, _ as ScenarioSchema, _t as PII_PLUGINS, a as AtomicTestCaseSchema, at as DATASET_EXEMPT_PLUGINS, b as TestCaseWithVarsFileSchema, bt as REMOTE_ONLY_PLUGIN_IDS, c as CompletedPromptSchema, ct as FOUNDATION_PLUGINS, d as EvaluateOptionsSchema, dt as LLAMA_GUARD_ENABLED_CATEGORIES, f as GradingConfigSchema, ft as LLAMA_GUARD_REPLICATE_PROVIDER, g as ResultFailureReason, gt as PHARMACY_PLUGINS, h as OutputFileExtension, ht as MULTI_INPUT_VAR, i as AssertionTypeSchema, it as CANARY_BREAKING_STRATEGY_IDS, j as isProviderOptions, jt as InputsSchema, k as isResultFailureReason, kt as BaseTokenUsageSchema, l as DerivedMetricSchema, lt as HARM_PLUGINS, m as OutputConfigSchema, mt as MULTI_INPUT_EXCLUDED_PLUGINS, n as AssertionSchema, nt as ALIASED_PLUGIN_MAPPINGS, o as BaseAssertionTypesSchema, ot as DEFAULT_PLUGINS, p as NotPrefixedAssertionTypesSchema, pt as MEDICAL_PLUGINS, q as STRATEGY_COLLECTIONS, r as AssertionSetSchema, rt as BIAS_PLUGINS, s as CommandLineOptionsSchema, st as FINANCIAL_PLUGINS, t as AssertionOrSetSchema, tt as riskCategorySeverityMap, u as EvalResultsFilterMode, ut as INSURANCE_PLUGINS, v as SpecialAssertionTypesSchema, vt as PLUGIN_CATEGORIES, w as TestSuiteConfigSchema, wt as UNALIGNED_PROVIDER_HARM_PLUGINS, x as TestCasesWithMetadataPromptSchema, y as TestCaseSchema, yt as REDTEAM_PROVIDER_HARM_PLUGINS, z as PolicyObjectSchema } from "./types-DNRZVOue.js";
6
- import { c as setUserEmail, i as getUserEmail, o as isLoggedIntoCloud, r as getAuthor, s as promptForEmailUnverified, t as checkEmailStatusAndMaybeExit } from "./accounts-CaLNYnf7.js";
7
- import { r as importModule, t as getDirectory } from "./esm-BRkfNsYs.js";
8
- import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-DlscvAUJ.js";
9
- import { t as providerRegistry } from "./providerRegistry-DoACwqhD.js";
10
- import { i as getRemoteGenerationUrl, l as shouldGenerateRemote, o as getRemoteHealthUrl, r as promptYesNo, s as neverGenerateRemote } from "./server-gyd6d4Hc.js";
11
- import { $ as getMaxCharsPerMessageModifierValue, B as collectFileMetadata, D as validateStrategies, E as loadStrategy, F as getSessionId, G as redteamProviderManager, H as runExtensionHook, I as getShortPluginId, J as createProviderRateLimitOptions, K as TokenUsageTracker, L as isBasicRefusal, N as extractPromptFromTags, O as pluginMatchesStrategyTargets, P as extractVariablesFromJson, Q as getGeneratedPromptOverLimit, T as Strategies, U as isPackagePath, V as renderPrompt, W as loadFromPackage, X as PromptfooHarmfulCompletionProvider, Z as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, a as resolveProviderConfigs, at as getOrgContext, ct as isCloudProvider, d as MCPProvider, et as throwIfTargetPromptExceedsMaxChars, g as AIStudioChatProvider, h as GoogleLiveProvider, i as resolveProvider, it as getEvalConfigFromCloud, j as extractGoalFromPrompt, k as checkExfilTracking, lt as resolveTeamId, m as VertexChatProvider, n as loadApiProvider, nt as checkCloudPermissions, ot as getPluginSeverityOverridesFromCloud, q as createRateLimitRegistry, r as loadApiProviders, rt as getCloudDatabaseId, t as getProviderIds } from "./providers-CJh7iriU.js";
12
- import { r as runPython } from "./pythonUtils-DMO68Jg7.js";
13
- import { i as isJavascriptFile } from "./fileExtensions-8CjoL7vB.js";
14
- import { A as readFilters, M as loadFunction, N as parseFileUrl, O as maybeLoadToolsFromExternalFile, T as maybeLoadFromExternalFile, _ as isProviderAllowed, a as setupEnv, b as normalizeProviderRef, c as filterRuntimeVars, d as checkProviderApiKeys, f as doesProviderRefMatch, g as isOpenAiProvider, h as isGoogleProvider, i as fetchCsvFromGoogleSheet, j as readOutput, l as getTestCaseDeduplicationKey, m as isAnthropicProvider, n as writeMultipleOutputs, o as deduplicateTestCases, p as getProviderDescription, r as writeOutput, s as extractRuntimeVars, t as printBorder, u as resultIsForTestCase, w as maybeLoadConfigFromExternalFile } from "./util-Dub0f_ej.js";
15
- import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-4c780gFd.js";
16
- import { t as getBlobByHash } from "./blobs-BW4U31ue.js";
17
- import { a as evalsTable, c as evalsToTagsTable, d as tagsTable, i as evalResultsTable, l as promptsTable, m as getDbSignalPath, o as evalsToDatasetsTable, p as getDb, r as datasetsTable, s as evalsToPromptsTable } from "./tables-WgdUZ8Ck.js";
18
- import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-BM3jRERL.js";
19
- import { c as isNonTransientHttpStatus, i as getCache, n as disableCache, o as withCacheNamespace, r as fetchWithCache, s as NON_TRANSIENT_HTTP_STATUSES, t as cache_exports } from "./cache-Bzttsk0X.js";
20
- import { t as OpenAiChatCompletionProvider } from "./chat-Cx_LkwvZ.js";
21
- import { h as validateFunctionCall } from "./transform-_DpNB4qp.js";
22
- import { l as validateFunctionCall$1 } from "./util-BlFVL0UF.js";
23
- import { i as getProcessShim, n as transform, t as TransformInputType } from "./transform-eGiUAv86.js";
24
- import { t as telemetry } from "./telemetry-CgdVGV8N.js";
25
- import { t as ellipsize } from "./text-DDQP0tuQ.js";
26
- import { t as getTraceStore } from "./store-BPkzEyFM.js";
27
- import { n as sha256, t as randomSequence } from "./createHash-Un4Q_huE.js";
28
- import { n as runRuby } from "./rubyUtils-BI0p46eZ.js";
29
- import { $ as SELECT_BEST_PROMPT, A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as DEFAULT_ANTHROPIC_MODEL, D as HarmbenchPlugin, Dt as withProviderCallExecutionContext, E as ImitationPlugin, Et as getGradingProvider, F as AegisPlugin, G as matchesLlmRubric, H as matchesClosedQa, I as RedteamGraderBase, J as doRemoteGrading, K as matchesPiScore, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as DEFAULT_WEB_SEARCH_PROMPT, R as getCustomPolicies, S as PlinyPlugin, St as getDefaultProviders, T as IntentPlugin, Tt as getAndCheckProvider, U as matchesFactuality, V as fetchHuggingFaceDataset, W as matchesGEval, X as readPrompts, Y as processPrompts, Z as readProviderPromptMap, _ as PoliticsPlugin, _t as tryParse, a as UnverifiableClaimsPlugin, at as CONTEXT_RECALL_ATTRIBUTED_TOKEN, b as isValidPolicyObject, bt as loadFromJavaScriptFile, c as ToolDiscoveryPlugin, ct as CONTEXT_RELEVANCE_BAD, d as TeenSafetyDangerousContentPlugin, dt as cosineSimilarity, et as SUGGEST_PROMPTS_SYSTEM_MESSAGE, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, ft as dotProduct, g as PromptExtractionPlugin, gt as splitIntoSentences, h as RbacPlugin, ht as normalizeMatcherTokenUsage, i as VLGuardPlugin, it as CONTEXT_RECALL, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, lt as loadRubricPrompt, m as ShellInjectionPlugin, mt as fail, n as getGraderById, nt as CONTEXT_FAITHFULNESS_LONGFORM, o as UnsafeBenchPlugin, ot as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, p as SqlInjectionPlugin, pt as euclideanDistance, q as matchesTrajectoryGoalSuccess, r as VLSUPlugin, rt as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, s as ToxicChatPlugin, st as CONTEXT_RELEVANCE, t as GRADERS, tt as ANSWER_RELEVANCY_GENERATE, u as TeenSafetyDangerousRoleplayPlugin, ut as renderLlmRubricPrompt, v as PolicyPlugin, vt as coerceString, w as OverreliancePlugin, wt as callProviderWithContext, x as makeInlinePolicyIdSync, xt as processFileReference, y as determinePolicyTypeFromId, yt as getFinalTest, z as retryWithDeduplication } from "./graders-BoUqsCEm.js";
30
- import { i as generateIdFromPrompt, t as hashPrompt } from "./utils-o8S5huU2.js";
31
- import { t as EvalResult } from "./evalResult-pSvGWFMo.js";
3
+ import { $ as riskCategorySeverityMap, A as RedteamConfigSchema, At as DocumentMediaInjectionPlacementValues, B as isUuid, Bt as getInputDescription, C as TestGeneratorConfigSchema, Ct as CODING_AGENT_CORE_PLUGINS, D as VarsSchema, Dt as BaseTokenUsageSchema, E as UnifiedConfigSchema, F as PartialGenerationError, Ft as InputDefinitionSchema, G as STRATEGY_COLLECTIONS, Gt as isProviderOptions, Ht as normalizeInputDefinition, I as PluginConfigSchema, It as InputTypeSchema, K as STRATEGY_COLLECTION_MAPPINGS, L as PolicyObjectSchema, Lt as InputTypeValues, Mt as DocxInjectionPlacementValues, N as ProvidersSchema, Nt as InputConfigSchema, O as isGradingResult, Ot as CompletionTokenDetailsSchema, P as ConversationMessageSchema, Pt as InputDefinitionObjectSchema, R as StrategyConfigSchema, Rt as InputsSchema, S as TestCasesWithMetadataSchema, St as UNALIGNED_PROVIDER_HARM_PLUGINS, T as TestSuiteSchema, U as DEFAULT_STRATEGIES, Ut as normalizeInputs, Vt as getInputType, Wt as isApiProvider, X as Severity, Y as isFanoutStrategy, Z as categoryAliases, _ as ScenarioSchema, _t as REDTEAM_PROVIDER_HARM_PLUGINS, a as AtomicTestCaseSchema, at as FINANCIAL_PLUGINS, b as TestCaseWithVarsFileSchema, bt as TEEN_SAFETY_PLUGINS, c as CompletedPromptSchema, ct as INSURANCE_PLUGINS, d as EvaluateOptionsSchema, dt as MEDICAL_PLUGINS, et as ALIASED_PLUGIN_MAPPINGS, f as GradingConfigSchema, ft as MULTI_INPUT_EXCLUDED_PLUGINS, g as ResultFailureReason, gt as PLUGIN_CATEGORIES, h as OutputFileExtension, ht as PII_PLUGINS, i as AssertionTypeSchema, it as DEFAULT_PLUGINS, j as PromptSchema, jt as DocxInjectionPlacementSchema, k as isResultFailureReason, kt as DocumentMediaInjectionPlacementSchema, l as DerivedMetricSchema, lt as LLAMA_GUARD_ENABLED_CATEGORIES, m as OutputConfigSchema, mt as PHARMACY_PLUGINS, n as AssertionSchema, nt as CANARY_BREAKING_STRATEGY_IDS, o as BaseAssertionTypesSchema, ot as FOUNDATION_PLUGINS, p as NotPrefixedAssertionTypesSchema, pt as MULTI_INPUT_VAR, q as getDefaultNFanout, r as AssertionSetSchema, rt as DATASET_EXEMPT_PLUGINS, s as CommandLineOptionsSchema, st as HARM_PLUGINS, t as AssertionOrSetSchema, tt as BIAS_PLUGINS, u as EvalResultsFilterMode, ut as LLAMA_GUARD_REPLICATE_PROVIDER, v as SpecialAssertionTypesSchema, vt as REMOTE_ONLY_PLUGIN_IDS, w as TestSuiteConfigSchema, wt as CODING_AGENT_PLUGINS, x as TestCasesWithMetadataPromptSchema, xt as TELECOM_PLUGINS, y as TestCaseSchema, zt as buildInputPromptDescription } from "./types-D6glLbdF.js";
4
+ import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, L as FILE_METADATA_KEY, N as TERMINAL_MAX_WIDTH, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, T as cloudConfig, _ as isPromptfooSampleTarget, b as parseChatPrompt, c as getCurrentTimestamp, l as sleep, n as fetchWithRetries, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy, w as CloudConfig } from "./fetch-It34O8Ur.js";
5
+ import { n as VERSION } from "./version-lpHV_53E.js";
6
+ import { i as isJavascriptFile } from "./fileExtensions-CXRfY3Ss.js";
7
+ import { c as setUserEmail, i as getUserEmail, o as isLoggedIntoCloud, r as getAuthor, s as promptForEmailUnverified, t as checkEmailStatusAndMaybeExit } from "./accounts-Ca7WIoPY.js";
8
+ import { r as importModule, t as getDirectory } from "./esm-BTK1W7lG.js";
9
+ import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, r as analyzeTemplateReference, t as renderEnvOnlyInObject } from "./render-DFfDeYUK.js";
10
+ import { t as providerRegistry } from "./providerRegistry-DHcFiVWX.js";
11
+ import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-DsaSwmG2.js";
12
+ import { r as promptYesNo } from "./server-CMJD10J4.js";
13
+ import { a as getCloudDatabaseId, c as getPluginSeverityOverridesFromCloud, d as isCloudProvider, i as checkCloudPermissions, o as getEvalConfigFromCloud, p as resolveTeamId, s as getOrgContext } from "./storage-CD-GWAdx.js";
14
+ import { r as runPython } from "./pythonUtils-D0BYebvX.js";
15
+ import { A as readFilters, M as loadFunction, N as parseFileUrl, O as maybeLoadToolsFromExternalFile, T as maybeLoadFromExternalFile, _ as isProviderAllowed, a as setupEnv, b as normalizeProviderRef, c as filterRuntimeVars, d as checkProviderApiKeys, f as doesProviderRefMatch, g as isOpenAiProvider, h as isGoogleProvider, i as fetchCsvFromGoogleSheet, j as readOutput, l as getTestCaseDeduplicationKey, m as isAnthropicProvider, n as writeMultipleOutputs, o as deduplicateTestCases, p as getProviderDescription, r as writeOutput, s as extractRuntimeVars, t as printBorder, u as resultIsForTestCase, w as maybeLoadConfigFromExternalFile } from "./util-Bx677_k2.js";
16
+ import { n as sha256, t as randomSequence } from "./createHash-BtbSX3mj.js";
17
+ import { c as NON_TRANSIENT_HTTP_STATUSES, i as getCache, l as isNonTransientHttpStatus, n as disableCache, r as fetchWithCache, s as withCacheNamespace, t as cache_exports } from "./cache-DIXbtkNO.js";
18
+ import { t as OpenAiChatCompletionProvider } from "./chat-Dabu84Br.js";
19
+ import { h as validateFunctionCall } from "./transform-DmvYBRll.js";
20
+ import { l as validateFunctionCall$1 } from "./util-LKTmNsMQ.js";
21
+ import { _ as AIStudioChatProvider, a as resolveProvider, f as MCPProvider, g as GoogleLiveProvider, h as VertexChatProvider, n as loadApiProvider, o as resolveProviderConfigs, r as loadApiProviders, t as getProviderIds } from "./providers-DVYRZP4E.js";
22
+ import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CmnQ0G2m.js";
23
+ import { t as ellipsize } from "./text-DwYK5EBn.js";
24
+ import { t as telemetry } from "./telemetry-DwX9XUN5.js";
25
+ import { a as evalsTable, c as evalsToTagsTable, d as tagsTable, i as evalResultsTable, l as promptsTable, m as getDbSignalPath, o as evalsToDatasetsTable, p as getDb, r as datasetsTable, s as evalsToPromptsTable } from "./tables-CRSXQ2Ke.js";
26
+ import { t as getBlobByHash } from "./blobs-Dwef1Ao1.js";
27
+ import { t as getProcessShim } from "./processShim-BcGzU8fY.js";
28
+ import { n as loadFromPackage, t as isPackagePath } from "./packageParser-CgE-ziRo.js";
29
+ import { n as runRuby } from "./rubyUtils-CnlW8AYb.js";
30
+ import { n as materializeInputVariablesWithMetadata, t as buildPromptInputDescriptions } from "./inputVariables-DUGMb9Ka.js";
31
+ import { a as extractPromptFromTags, c as isBasicRefusal, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, o as getSessionId, r as extractInputVarsFromPrompt, s as getShortPluginId } from "./util-DNl96nNs.js";
32
+ import { n as PromptfooHarmfulCompletionProvider } from "./promptfoo-Cm4hiy1Y.js";
33
+ import { $ as readProviderPromptMap, A as ExcessiveAgencyPlugin, At as withProviderCallExecutionContext, B as retryWithDeduplication, C as PlinyPlugin, Ct as processFileReference, D as ImitationPlugin, Dt as getAndCheckProvider, E as IntentPlugin, Et as callProviderWithContext, F as BeavertailsPlugin, G as matchesFactuality, H as fetchHuggingFaceDataset, I as AegisPlugin, J as matchesPiScore, K as matchesGEval, L as RedteamGraderBase, M as DebugAccessPlugin, N as CrossSessionLeakPlugin, O as HarmbenchPlugin, Ot as getGradingProvider, P as ContractPlugin, Q as readPrompts, R as RedteamPluginBase, S as makeInlinePolicyIdSync, St as loadFromJavaScriptFile, T as OverreliancePlugin, Tt as DEFAULT_ANTHROPIC_MODEL, U as isGraderFailure, V as sampleArray, W as matchesClosedQa, X as doRemoteGrading, Y as matchesTrajectoryGoalSuccess, Z as processPrompts, _ as PromptExtractionPlugin, _t as normalizeMatcherTokenUsage, a as VLGuardPlugin, at as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, b as determinePolicyTypeFromId, bt as coerceString, c as ToxicChatPlugin, ct as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, d as TeenSafetyDangerousRoleplayPlugin, dt as loadRubricPrompt, et as DEFAULT_WEB_SEARCH_PROMPT, f as TeenSafetyDangerousContentPlugin, ft as renderLlmRubricPrompt, g as RbacPlugin, gt as fail, h as ShellInjectionPlugin, ht as euclideanDistance, i as VLSUPlugin, it as CONTEXT_FAITHFULNESS_LONGFORM, j as DivergentRepetitionPlugin, k as HallucinationPlugin, kt as getProviderCallExecutionContext, l as ToolDiscoveryPlugin, lt as CONTEXT_RELEVANCE, m as SqlInjectionPlugin, mt as dotProduct, n as getGraderById, nt as SUGGEST_PROMPTS_SYSTEM_MESSAGE, o as UnverifiableClaimsPlugin, ot as CONTEXT_RECALL, p as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, pt as cosineSimilarity, q as matchesLlmRubric, rt as ANSWER_RELEVANCY_GENERATE, s as UnsafeBenchPlugin, st as CONTEXT_RECALL_ATTRIBUTED_TOKEN, t as GRADERS, tt as SELECT_BEST_PROMPT, u as TeenSafetyHarmfulBodyIdealsPlugin, ut as CONTEXT_RELEVANCE_BAD, v as PoliticsPlugin, vt as splitIntoSentences, w as getPiiLeakTestsForCategory, wt as getDefaultProviders, x as isValidPolicyObject, xt as getFinalTest, y as PolicyPlugin, yt as tryParse, z as getCustomPolicies } from "./graders-BX0f2tvS.js";
34
+ import { f as redteamProviderManager, g as createProviderRateLimitOptions, h as createRateLimitRegistry, m as TokenUsageTracker } from "./shared-D6IjElRI.js";
35
+ import { i as generateIdFromPrompt, t as hashPrompt } from "./utils-BGY69tk_.js";
36
+ import { a as getTransformLabel, i as getTransformErrorMessage, n as TRANSFORM_KEYS, o as transform, r as TransformInputType, t as INLINE_FUNCTION_LABEL } from "./transform-CGt7Kt3y2.js";
37
+ import { t as getTraceStore } from "./store-HpopRVzl.js";
38
+ import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-CxRtnaHl.js";
39
+ import { i as throwIfTargetPromptExceedsMaxChars, n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-B9nZEfO6.js";
40
+ import { n as checkExfilTracking } from "./indirectWebPwn-CbjUG0rh.js";
41
+ import { n as getFirstStringAttribute, r as getToolNameFromAttributes, t as TOOL_ARGUMENT_ATTRIBUTE_KEYS } from "./toolAttributes-DJ9ZEKXD.js";
42
+ import { i as filterFiniteScores, n as renderPrompt, r as runExtensionHook, t as collectFileMetadata } from "./evaluatorHelpers-DuqFFfq7.js";
43
+ import { r as sanitizeProvider, t as EvalResult } from "./evalResult-2RRJvFyB.js";
44
+ import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-CiSeroPH.js";
32
45
  import * as fs$2 from "fs";
33
46
  import fs, { createWriteStream } from "fs";
34
47
  import * as path$2 from "path";
@@ -51,14 +64,15 @@ import { DiagConsoleLogger, DiagLogLevel, diag, propagation } from "@opentelemet
51
64
  import readline from "readline";
52
65
  import { parse as parse$1 } from "csv-parse/sync";
53
66
  import { globSync } from "glob";
54
- import { and, desc, eq, inArray, sql } from "drizzle-orm";
55
67
  import { XMLParser } from "fast-xml-parser";
56
- import cliProgress from "cli-progress";
68
+ import { and, desc, eq, inArray, sql } from "drizzle-orm";
57
69
  import { URL } from "url";
58
70
  import { parse as parse$2 } from "parse5";
59
71
  import { distance } from "fastest-levenshtein";
72
+ import cliProgress from "cli-progress";
60
73
  import * as rouge from "js-rouge";
61
74
  import { isDeepStrictEqual } from "node:util";
75
+ import { LRUCache } from "lru-cache";
62
76
  import "debounce";
63
77
  import { ExportResultCode, W3CTraceContextPropagator } from "@opentelemetry/core";
64
78
  import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
@@ -429,17 +443,20 @@ async function resolveContext(assertion, test, output, prompt, fallbackContext,
429
443
  contextValue = test.vars.context;
430
444
  }
431
445
  } else if (fallbackContext) contextValue = fallbackContext;
432
- if (assertion.contextTransform) try {
433
- const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
434
- const transformed = await transform(assertion.contextTransform, outputForTransform, {
435
- vars: test.vars,
436
- prompt: { label: prompt },
437
- ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
438
- });
439
- invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${assertion.contextTransform}`);
440
- contextValue = transformed;
441
- } catch (error) {
442
- throw new Error(`Failed to transform context using expression '${assertion.contextTransform}': ${error instanceof Error ? error.message : String(error)}`);
446
+ if (assertion.contextTransform) {
447
+ const getLabel = () => getTransformLabel(assertion.contextTransform);
448
+ try {
449
+ const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
450
+ const transformed = await transform(assertion.contextTransform, outputForTransform, {
451
+ vars: test.vars,
452
+ prompt: { label: prompt },
453
+ ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
454
+ });
455
+ invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
456
+ contextValue = transformed;
457
+ } catch (error) {
458
+ throw new Error(`Failed to transform context using expression '${getLabel()}': ${getTransformErrorMessage(error)}`);
459
+ }
443
460
  }
444
461
  invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
445
462
  return contextValue;
@@ -782,7 +799,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
782
799
  telemetry.record("feature_used", { feature: "tracing" });
783
800
  try {
784
801
  logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
785
- const { startOTLPReceiver } = await import("./otlpReceiver-CcdIikOu.js");
802
+ const { startOTLPReceiver } = await import("./otlpReceiver-BXjcRqAM.js");
786
803
  const port = testSuite.tracing.otlp.http.port || 4318;
787
804
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
788
805
  const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
@@ -806,7 +823,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
806
823
  async function stopOtlpReceiverIfNeeded() {
807
824
  if (otlpReceiverStarted) try {
808
825
  logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
809
- const { stopOTLPReceiver } = await import("./otlpReceiver-CcdIikOu.js");
826
+ const { stopOTLPReceiver } = await import("./otlpReceiver-BXjcRqAM.js");
810
827
  await stopOTLPReceiver();
811
828
  otlpReceiverStarted = false;
812
829
  logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -841,7 +858,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
841
858
  }
842
859
  if (!tracingEnabled) return null;
843
860
  logger.debug("[EvaluatorTracing] Importing trace store");
844
- const { getTraceStore } = await import("./store-BPkzEyFM.js").then((n) => n.n);
861
+ const { getTraceStore } = await import("./store-HpopRVzl.js").then((n) => n.n);
845
862
  const traceStore = getTraceStore();
846
863
  const traceId = generateTraceId();
847
864
  const spanId = generateSpanId();
@@ -1473,27 +1490,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
1473
1490
  };
1474
1491
  //#endregion
1475
1492
  //#region src/assertions/geval.ts
1476
- const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
1477
- invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
1493
+ const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
1494
+ invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
1478
1495
  const threshold = assertion.threshold ?? .7;
1479
1496
  if (Array.isArray(renderedValue)) {
1480
- const scores = [];
1481
- const reasons = [];
1482
- for (const value of renderedValue) {
1497
+ if (renderedValue.length === 0) return {
1498
+ assertion,
1499
+ pass: false,
1500
+ score: 0,
1501
+ reason: "G-Eval assertion requires at least one criterion string in the value array."
1502
+ };
1503
+ const responses = [];
1504
+ let failure;
1505
+ for (const [index, value] of renderedValue.entries()) {
1483
1506
  const resp = await matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
1484
- scores.push(resp.score);
1485
- reasons.push(resp.reason);
1507
+ responses.push(resp);
1508
+ if (isGraderFailure(resp)) {
1509
+ failure = {
1510
+ index,
1511
+ resp
1512
+ };
1513
+ break;
1514
+ }
1486
1515
  }
1487
- const scoresSum = scores.reduce((a, b) => a + b, 0);
1516
+ const tokensUsed = createEmptyTokenUsage();
1517
+ for (const r of responses) accumulateTokenUsage(tokensUsed, r.tokensUsed);
1518
+ if (failure) {
1519
+ const criterion = renderedValue[failure.index];
1520
+ return {
1521
+ assertion,
1522
+ pass: false,
1523
+ score: 0,
1524
+ reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
1525
+ tokensUsed,
1526
+ metadata: failure.resp.metadata
1527
+ };
1528
+ }
1529
+ const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
1530
+ const combinedReason = responses.map((r) => r.reason).join("\n\n");
1488
1531
  return {
1489
1532
  assertion,
1490
- pass: scoresSum / scores.length >= threshold,
1491
- score: scoresSum / scores.length,
1492
- reason: reasons.join("\n\n")
1533
+ pass: averageScore >= threshold !== inverse,
1534
+ score: inverse ? 1 - averageScore : averageScore,
1535
+ reason: combinedReason,
1536
+ tokensUsed
1493
1537
  };
1494
- } else return {
1538
+ }
1539
+ const resp = await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
1540
+ if (isGraderFailure(resp)) return {
1541
+ assertion,
1542
+ pass: false,
1543
+ score: 0,
1544
+ reason: resp.reason,
1545
+ tokensUsed: resp.tokensUsed,
1546
+ metadata: resp.metadata
1547
+ };
1548
+ const passed = resp.score >= threshold !== inverse;
1549
+ return {
1495
1550
  assertion,
1496
- ...await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext)
1551
+ ...resp,
1552
+ pass: passed,
1553
+ score: inverse ? 1 - resp.score : resp.score
1497
1554
  };
1498
1555
  };
1499
1556
  //#endregion
@@ -2496,45 +2553,6 @@ function matchesPattern(spanName, pattern) {
2496
2553
  }
2497
2554
  //#endregion
2498
2555
  //#region src/assertions/trajectoryUtils.ts
2499
- const TOOL_ATTRIBUTE_KEYS = [
2500
- "tool.name",
2501
- "tool_name",
2502
- "tool",
2503
- "function.name",
2504
- "function_name",
2505
- "gen_ai.tool.name",
2506
- "codex.mcp.tool",
2507
- "agent.tool",
2508
- "agent.tool_name",
2509
- "agent.toolName"
2510
- ];
2511
- const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
2512
- "tool.arguments",
2513
- "tool.args",
2514
- "tool.input",
2515
- "tool_arguments",
2516
- "tool_args",
2517
- "tool_input",
2518
- "function.arguments",
2519
- "function.args",
2520
- "function.input",
2521
- "function_arguments",
2522
- "function_args",
2523
- "gen_ai.tool.arguments",
2524
- "gen_ai.tool.args",
2525
- "gen_ai.tool.input",
2526
- "gen_ai.tool.call.arguments",
2527
- "gen_ai.tool.call.args",
2528
- "agent.tool.arguments",
2529
- "agent.tool.args",
2530
- "agent.tool.input",
2531
- "codex.mcp.arguments",
2532
- "codex.mcp.args",
2533
- "codex.mcp.input",
2534
- "arguments",
2535
- "args",
2536
- "input"
2537
- ];
2538
2556
  const COMMAND_ATTRIBUTE_KEYS = [
2539
2557
  "codex.command",
2540
2558
  "command",
@@ -2547,16 +2565,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
2547
2565
  "search_query"
2548
2566
  ];
2549
2567
  const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
2568
+ const COMMAND_TOOL_NAMES = new Set([
2569
+ "exec_command",
2570
+ "local_shell",
2571
+ "shell"
2572
+ ]);
2550
2573
  const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
2551
2574
  const MAX_JUDGE_SUMMARY_STEPS = 24;
2552
2575
  const JUDGE_SUMMARY_HEAD_STEPS = 12;
2553
2576
  const JUDGE_SUMMARY_TAIL_STEPS = 12;
2554
- function getStringAttribute(attributes, keys) {
2555
- for (const key of keys) {
2556
- const value = attributes[key];
2557
- if (typeof value === "string" && value.trim()) return value.trim();
2558
- }
2559
- }
2560
2577
  function normalizeStructuredAttribute(value) {
2561
2578
  if (value === void 0 || value === null) return;
2562
2579
  if (typeof value === "string") {
@@ -2588,9 +2605,12 @@ function getTrajectoryStepStatus(step) {
2588
2605
  function getCommandExecutable(command) {
2589
2606
  return command.trim().split(/\s+/)[0] || void 0;
2590
2607
  }
2608
+ function isCommandToolName(toolName) {
2609
+ return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
2610
+ }
2591
2611
  function extractToolName(span) {
2592
2612
  const attributes = span.attributes || {};
2593
- const directMatch = getStringAttribute(attributes, TOOL_ATTRIBUTE_KEYS);
2613
+ const directMatch = getToolNameFromAttributes(attributes);
2594
2614
  if (directMatch) return directMatch;
2595
2615
  for (const [key, value] of Object.entries(attributes)) {
2596
2616
  if (typeof value !== "string" || !value.trim()) continue;
@@ -2615,21 +2635,31 @@ function extractToolArgs(span) {
2615
2635
  if (value !== void 0) return value;
2616
2636
  }
2617
2637
  }
2618
- function extractCommand(span) {
2638
+ function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
2619
2639
  const attributes = span.attributes || {};
2620
- const directMatch = getStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2640
+ const directMatch = getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2621
2641
  if (directMatch) return directMatch;
2622
2642
  for (const [key, value] of Object.entries(attributes)) {
2623
2643
  if (typeof value !== "string" || !value.trim()) continue;
2624
2644
  if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
2625
2645
  }
2646
+ const toolArgs = getToolArgs();
2647
+ if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
2648
+ const args = toolArgs;
2649
+ const command = args.cmd ?? args.command;
2650
+ if (typeof command === "string" && command.trim()) return command.trim();
2651
+ if (Array.isArray(command)) {
2652
+ const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
2653
+ if (joined) return joined;
2654
+ }
2655
+ }
2626
2656
  if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
2627
2657
  }
2628
2658
  function extractSearchQuery(span) {
2629
2659
  const attributes = span.attributes || {};
2630
- const directMatch = getStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2660
+ const directMatch = getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2631
2661
  if (directMatch) return directMatch;
2632
- const genericQuery = getStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2662
+ const genericQuery = getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2633
2663
  if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
2634
2664
  if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
2635
2665
  }
@@ -2653,17 +2683,34 @@ function extractTrajectorySteps(trace) {
2653
2683
  return left.index - right.index;
2654
2684
  }).map(({ span }) => {
2655
2685
  const toolName = extractToolName(span);
2656
- const command = extractCommand(span);
2686
+ let toolArgs;
2687
+ let hasExtractedToolArgs = false;
2688
+ const getToolArgs = () => {
2689
+ if (!hasExtractedToolArgs) {
2690
+ toolArgs = extractToolArgs(span);
2691
+ hasExtractedToolArgs = true;
2692
+ }
2693
+ return toolArgs;
2694
+ };
2695
+ const command = extractCommand(span, toolName, getToolArgs);
2657
2696
  const searchQuery = extractSearchQuery(span);
2658
2697
  let type = "span";
2659
2698
  let name = span.name;
2660
2699
  const aliases = new Set([span.name]);
2661
2700
  let args;
2662
- if (toolName) {
2701
+ if (command && isCommandToolName(toolName)) {
2702
+ type = "command";
2703
+ name = command;
2704
+ aliases.add(command);
2705
+ args = getToolArgs();
2706
+ if (toolName) aliases.add(toolName);
2707
+ const executable = getCommandExecutable(command);
2708
+ if (executable) aliases.add(executable);
2709
+ } else if (toolName) {
2663
2710
  type = "tool";
2664
2711
  name = toolName;
2665
2712
  aliases.add(toolName);
2666
- args = extractToolArgs(span);
2713
+ args = getToolArgs();
2667
2714
  } else if (command) {
2668
2715
  type = "command";
2669
2716
  name = command;
@@ -3850,13 +3897,13 @@ function resolveSequenceValue(value) {
3850
3897
  }
3851
3898
  throw new Error("trajectory:tool-sequence assertion must have an array or object value");
3852
3899
  }
3853
- function isRecord(value) {
3900
+ function isRecord$1(value) {
3854
3901
  return typeof value === "object" && value !== null && !Array.isArray(value);
3855
3902
  }
3856
3903
  function matchesExpectedArgsPartial(actual, expected) {
3857
3904
  if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
3858
- if (isRecord(expected)) {
3859
- if (!isRecord(actual)) return false;
3905
+ if (isRecord$1(expected)) {
3906
+ if (!isRecord$1(actual)) return false;
3860
3907
  return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
3861
3908
  }
3862
3909
  return isDeepStrictEqual(actual, expected);
@@ -4227,7 +4274,7 @@ async function loadTraceData(traceId) {
4227
4274
  let stableObservations = 0;
4228
4275
  let latestTrace = null;
4229
4276
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
4230
- latestTrace = await traceStore.getTrace(traceId);
4277
+ latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
4231
4278
  const spanCount = latestTrace?.spans?.length ?? 0;
4232
4279
  if (spanCount > 0) {
4233
4280
  stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
@@ -4280,7 +4327,7 @@ const ASSERTION_HANDLERS = {
4280
4327
  "llm-rubric": handleLlmRubric,
4281
4328
  meteor: async (params) => {
4282
4329
  try {
4283
- const { handleMeteorAssertion } = await import("./meteor-BKTM-7KS.js");
4330
+ const { handleMeteorAssertion } = await import("./meteor-Dce-_zGQ.js");
4284
4331
  return handleMeteorAssertion(params);
4285
4332
  } catch (error) {
4286
4333
  if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
@@ -4416,7 +4463,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
4416
4463
  };
4417
4464
  }
4418
4465
  else if (filePath.endsWith(".rb")) try {
4419
- const { runRuby } = await import("./rubyUtils-BI0p46eZ.js").then((n) => n.t);
4466
+ const { runRuby } = await import("./rubyUtils-CnlW8AYb.js").then((n) => n.t);
4420
4467
  valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
4421
4468
  logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
4422
4469
  } catch (error) {
@@ -4533,7 +4580,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
4533
4580
  logger.debug(`Failed to preload trace data for assertions: ${error}`);
4534
4581
  preloadedTraceData = null;
4535
4582
  }
4536
- await async.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
4583
+ const concurrency = getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
4584
+ await async.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
4537
4585
  if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
4538
4586
  const result = await runAssertion({
4539
4587
  prompt,
@@ -4660,7 +4708,8 @@ var CIProgressReporter = class {
4660
4708
  }
4661
4709
  updateTotalTests(newTotal) {
4662
4710
  this.totalTests = Math.max(newTotal, 1);
4663
- this.highestPercentageSeen = Math.floor(this.completedTests / this.totalTests * 100);
4711
+ const percentage = Math.floor(this.completedTests / this.totalTests * 100);
4712
+ this.highestPercentageSeen = percentage;
4664
4713
  }
4665
4714
  finish() {
4666
4715
  if (this.intervalId) {
@@ -4833,6 +4882,10 @@ function getDefaultOtelConfig() {
4833
4882
  }
4834
4883
  //#endregion
4835
4884
  //#region src/tracing/localSpanExporter.ts
4885
+ const MISSING_TRACE_RETRY_DELAY_MS = 50;
4886
+ function delay(ms) {
4887
+ return new Promise((resolve) => setTimeout(resolve, ms));
4888
+ }
4836
4889
  /**
4837
4890
  * A span exporter that writes spans to the local TraceStore (SQLite).
4838
4891
  * This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
@@ -4874,7 +4927,7 @@ var LocalSpanExporter = class {
4874
4927
  }
4875
4928
  let firstError;
4876
4929
  for (const [traceId, spanDataList] of spansByTrace) try {
4877
- const result = await traceStore.addSpans(traceId, spanDataList, { skipTraceCheck: false });
4930
+ const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
4878
4931
  if (result.stored) logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
4879
4932
  else logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
4880
4933
  } catch (error) {
@@ -4886,6 +4939,16 @@ var LocalSpanExporter = class {
4886
4939
  }
4887
4940
  return firstError;
4888
4941
  }
4942
+ async addSpansWithTraceRetry(traceStore, traceId, spans) {
4943
+ const options = {
4944
+ skipTraceCheck: false,
4945
+ warnIfMissingTrace: false
4946
+ };
4947
+ const result = await traceStore.addSpans(traceId, spans, options);
4948
+ if (result.stored) return result;
4949
+ await delay(MISSING_TRACE_RETRY_DELAY_MS);
4950
+ return traceStore.addSpans(traceId, spans, options);
4951
+ }
4889
4952
  /**
4890
4953
  * Convert an OTEL ReadableSpan to our SpanData format.
4891
4954
  */
@@ -5168,6 +5231,15 @@ function isPromptAllowed(prompt, allowedPrompts) {
5168
5231
  }
5169
5232
  //#endregion
5170
5233
  //#region src/evaluator.ts
5234
+ const CONVERSATION_VAR_NAME = "_conversation";
5235
+ const promptUsesConversationVariableCache = new LRUCache({ max: 1024 });
5236
+ function promptUsesConversationVariable(prompt) {
5237
+ const cached = promptUsesConversationVariableCache.get(prompt.raw);
5238
+ if (cached !== void 0) return cached;
5239
+ const { referenced, parsed } = analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
5240
+ if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
5241
+ return referenced;
5242
+ }
5171
5243
  /**
5172
5244
  * Manages a single progress bar for the evaluation
5173
5245
  */
@@ -5367,6 +5439,18 @@ function hasProviderGroupedAssertion(assertion) {
5367
5439
  function shouldDeferGradingForTest(test) {
5368
5440
  return Boolean(test.assert?.some(hasProviderGroupedAssertion));
5369
5441
  }
5442
+ function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
5443
+ if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
5444
+ if (shouldGroupGradingByProvider) {
5445
+ logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
5446
+ return;
5447
+ }
5448
+ if (concurrency !== 1) return;
5449
+ const reasons = [];
5450
+ if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
5451
+ if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
5452
+ if (reasons.length > 0) logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
5453
+ }
5370
5454
  function applyGradingResult(row, checkResult) {
5371
5455
  if (!checkResult.pass) {
5372
5456
  row.error = checkResult.reason;
@@ -5381,14 +5465,29 @@ function applyGradingResult(row, checkResult) {
5381
5465
  if (checkResult.tokensUsed) accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
5382
5466
  row.gradingResult = checkResult;
5383
5467
  }
5384
- function applyGradingError(row, error) {
5385
- const errorMessage = error instanceof Error ? error.stack ?? error.message : String(error);
5386
- logger.error("Assertion grading failed during eval", {
5387
- error: errorMessage,
5388
- promptIdx: row.promptIdx,
5389
- testIdx: row.testIdx
5390
- });
5391
- row.error = errorMessage;
5468
+ const ABORTED_GRADING_PREFIX = "Aborted: ";
5469
+ function isAbortShapedError(error) {
5470
+ return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
5471
+ }
5472
+ function applyGradingError(row, error, abortSignal) {
5473
+ const errorAsError = error instanceof Error ? error : void 0;
5474
+ if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
5475
+ const shortMessage = errorAsError?.message ?? String(error);
5476
+ logger.debug("Assertion grading aborted", {
5477
+ error: shortMessage,
5478
+ promptIdx: row.promptIdx,
5479
+ testIdx: row.testIdx
5480
+ });
5481
+ row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
5482
+ } else {
5483
+ const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
5484
+ logger.error("Assertion grading failed during eval", {
5485
+ error: fullMessage,
5486
+ promptIdx: row.promptIdx,
5487
+ testIdx: row.testIdx
5488
+ });
5489
+ row.error = fullMessage;
5490
+ }
5392
5491
  row.failureReason = ResultFailureReason.ERROR;
5393
5492
  row.success = false;
5394
5493
  row.score = 0;
@@ -5420,7 +5519,7 @@ function createRunEvalState({ provider, prompt, test }) {
5420
5519
  };
5421
5520
  }
5422
5521
  function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
5423
- const usesConversation = prompt.raw.includes("_conversation");
5522
+ const usesConversation = promptUsesConversationVariable(prompt);
5424
5523
  if (!getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
5425
5524
  }
5426
5525
  function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
@@ -5667,7 +5766,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
5667
5766
  assertScoringFunction: test.assertScoringFunction,
5668
5767
  traceId
5669
5768
  }).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
5670
- applyGradingError(ret, error);
5769
+ applyGradingError(ret, error, abortSignal);
5671
5770
  });
5672
5771
  deferredGradingPromises.set(ret, gradingPromise);
5673
5772
  return;
@@ -6214,7 +6313,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
6214
6313
  const defaultProvider = defaultTest.provider;
6215
6314
  if (isApiProvider(defaultProvider)) return defaultProvider;
6216
6315
  if (typeof defaultProvider === "object" && defaultProvider.id) {
6217
- const { loadApiProvider } = await import("./providers-CCE2COJi2.js");
6316
+ const { loadApiProvider } = await import("./providers-DVYRZP4E.js").then((n) => n.i);
6218
6317
  return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
6219
6318
  }
6220
6319
  return defaultProvider;
@@ -6374,7 +6473,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
6374
6473
  async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
6375
6474
  if (!state.resume || !evalRecord.persisted) return;
6376
6475
  try {
6377
- const { default: EvalResult } = await import("./evalResult-pSvGWFMo.js").then((n) => n.n);
6476
+ const { default: EvalResult } = await import("./evalResult-2RRJvFyB.js").then((n) => n.n);
6378
6477
  const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: state.retryMode });
6379
6478
  const originalCount = runEvalOptions.length;
6380
6479
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -6388,14 +6487,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
6388
6487
  }
6389
6488
  }
6390
6489
  function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
6391
- const usesConversationVar = prompts.some((p) => p.raw.includes("_conversation"));
6490
+ const usesConversationVar = prompts.some(promptUsesConversationVariable);
6392
6491
  if (concurrency <= 1) return {
6393
6492
  concurrency,
6394
6493
  usesConversationVar
6395
6494
  };
6396
6495
  const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
6397
6496
  if (usesConversationVar) {
6398
- logger.info(`Setting concurrency to 1 because the ${chalk.cyan("_conversation")} variable is used.`);
6497
+ logger.info(`Setting concurrency to 1 because the ${chalk.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
6399
6498
  return {
6400
6499
  concurrency: 1,
6401
6500
  usesConversationVar
@@ -6625,7 +6724,8 @@ var Evaluator = class {
6625
6724
  };
6626
6725
  this.conversations = {};
6627
6726
  this.registers = {};
6628
- this.fileWriters = (Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : []).map((p) => new JsonlFileWriter(p));
6727
+ const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
6728
+ this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
6629
6729
  this.rateLimitRegistry = createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
6630
6730
  this.rateLimitRegistry.on("ratelimit:hit", (data) => {
6631
6731
  logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
@@ -6745,6 +6845,25 @@ var Evaluator = class {
6745
6845
  this.trackCompletedRow(evalStep, row, context);
6746
6846
  context.numComplete++;
6747
6847
  const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
6848
+ if (context.testSuite.extensions?.length) try {
6849
+ const afterEachOut = await runExtensionHook(context.testSuite.extensions, "afterEach", {
6850
+ test: evalStep.test,
6851
+ result: {
6852
+ ...row,
6853
+ namedScores: { ...row.namedScores },
6854
+ metadata: { ...row.metadata },
6855
+ response: row.response ? {
6856
+ ...row.response,
6857
+ metadata: { ...row.response.metadata }
6858
+ } : row.response
6859
+ }
6860
+ });
6861
+ row.namedScores = filterFiniteScores(afterEachOut.result.namedScores);
6862
+ row.metadata = afterEachOut.result.metadata;
6863
+ if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
6864
+ } catch (error) {
6865
+ logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
6866
+ }
6748
6867
  await this.persistEvalRow(row);
6749
6868
  if (this.abortIfTargetUnavailable(row, context)) break;
6750
6869
  const metrics = context.prompts[row.promptIdx].metrics;
@@ -6756,10 +6875,6 @@ var Evaluator = class {
6756
6875
  promptEvalCount,
6757
6876
  row
6758
6877
  });
6759
- await runExtensionHook(context.testSuite.extensions, "afterEach", {
6760
- test: evalStep.test,
6761
- result: row
6762
- });
6763
6878
  context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
6764
6879
  }
6765
6880
  }
@@ -6914,7 +7029,15 @@ var Evaluator = class {
6914
7029
  })) break;
6915
7030
  }
6916
7031
  } catch (error) {
6917
- await flushGroupedRows();
7032
+ const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
7033
+ try {
7034
+ await flushGroupedRows();
7035
+ } catch (flushError) {
7036
+ logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
7037
+ error: flushError instanceof Error ? flushError.message : String(flushError),
7038
+ pendingRowCount
7039
+ });
7040
+ }
6918
7041
  throw error;
6919
7042
  }
6920
7043
  await flushGroupedRows();
@@ -7350,6 +7473,13 @@ var Evaluator = class {
7350
7473
  if (!this.options.silent) {
7351
7474
  if (serialRunEvalOptions.length > 0) logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
7352
7475
  if (concurrentRunEvalOptions.length > 0) logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
7476
+ logGroupedGradingStatus({
7477
+ concurrency,
7478
+ hasEvalStepTimeout,
7479
+ runEvalOptions,
7480
+ shouldGroupGradingByProvider,
7481
+ usesConversationVar
7482
+ });
7353
7483
  }
7354
7484
  if (this.options.showProgressBar && progressBarManager) {
7355
7485
  await progressBarManager.initialize(runEvalOptions, concurrency, 0);
@@ -8353,7 +8483,7 @@ var Eval = class Eval {
8353
8483
  const evalInstance = new Eval(eval_.config, {
8354
8484
  id: eval_.id,
8355
8485
  createdAt: new Date(eval_.createdAt),
8356
- author: eval_.author || void 0,
8486
+ author: eval_.author,
8357
8487
  description: eval_.description || void 0,
8358
8488
  prompts: eval_.prompts || [],
8359
8489
  datasetId,
@@ -8376,7 +8506,7 @@ var Eval = class Eval {
8376
8506
  return (await getDb().select().from(evalsTable).limit(limit).orderBy(desc(evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
8377
8507
  id: e.id,
8378
8508
  createdAt: new Date(e.createdAt),
8379
- author: e.author || void 0,
8509
+ author: e.author,
8380
8510
  description: e.description || void 0,
8381
8511
  prompts: e.prompts || [],
8382
8512
  persisted: true
@@ -8391,7 +8521,7 @@ var Eval = class Eval {
8391
8521
  return (await getDb().select().from(evalsTable).orderBy(desc(evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
8392
8522
  id: e.id,
8393
8523
  createdAt: new Date(e.createdAt),
8394
- author: e.author || void 0,
8524
+ author: e.author,
8395
8525
  description: e.description || void 0,
8396
8526
  prompts: e.prompts || [],
8397
8527
  persisted: true
@@ -8406,7 +8536,7 @@ var Eval = class Eval {
8406
8536
  static async create(config, renderedPrompts, opts) {
8407
8537
  const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
8408
8538
  const evalId = opts?.id || createEvalId(createdAt);
8409
- const author = opts?.author || getUserEmail();
8539
+ const author = opts && "author" in opts ? opts.author ?? null : getAuthor();
8410
8540
  const db = getDb();
8411
8541
  const datasetId = sha256(JSON.stringify(config.tests || []));
8412
8542
  db.transaction(() => {
@@ -8468,7 +8598,7 @@ var Eval = class Eval {
8468
8598
  });
8469
8599
  return new Eval(config, {
8470
8600
  id: evalId,
8471
- author: opts?.author,
8601
+ author,
8472
8602
  createdAt,
8473
8603
  persisted: true,
8474
8604
  runtimeOptions: sanitizeRuntimeOptions(opts?.runtimeOptions)
@@ -8478,7 +8608,7 @@ var Eval = class Eval {
8478
8608
  const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
8479
8609
  this.createdAt = createdAt.getTime();
8480
8610
  this.id = opts?.id || createEvalId(createdAt);
8481
- this.author = opts?.author;
8611
+ this.author = opts?.author ?? null;
8482
8612
  this.config = config;
8483
8613
  this.results = [];
8484
8614
  this.prompts = opts?.prompts || [];
@@ -9008,7 +9138,7 @@ var Eval = class Eval {
9008
9138
  newConfig.description = copyDescription;
9009
9139
  const newPrompts = structuredClone(this.prompts);
9010
9140
  const newVars = this.vars ? structuredClone(this.vars) : [];
9011
- const author = getUserEmail();
9141
+ const author = getAuthor();
9012
9142
  const db = getDb();
9013
9143
  let copiedCount = 0;
9014
9144
  db.transaction(() => {
@@ -11402,25 +11532,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
11402
11532
  getAssertions(_prompt) {
11403
11533
  return getHarmfulAssertions(this.harmCategory);
11404
11534
  }
11405
- promptsToTestCases(prompts) {
11535
+ async promptsToTestCases(prompts) {
11406
11536
  const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
11407
11537
  const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
11408
- return prompts.map(({ __prompt }) => {
11538
+ const pluginId = getShortPluginId(this.harmCategory);
11539
+ return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
11409
11540
  const vars = { [this.injectVar]: __prompt };
11410
- if (hasMultipleInputs) try {
11411
- const parsed = JSON.parse(__prompt);
11412
- Object.assign(vars, extractVariablesFromJson(parsed, this.config.inputs));
11413
- } catch {}
11541
+ let inputMaterialization;
11542
+ if (hasMultipleInputs) {
11543
+ let parsed;
11544
+ try {
11545
+ parsed = JSON.parse(__prompt);
11546
+ } catch (error) {
11547
+ logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
11548
+ }
11549
+ if (parsed) try {
11550
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
11551
+ materializationIndex,
11552
+ pluginId,
11553
+ provider: this.provider,
11554
+ purpose: this.purpose
11555
+ });
11556
+ Object.assign(vars, materializedVars.vars);
11557
+ inputMaterialization = materializedVars.metadata;
11558
+ } catch (error) {
11559
+ logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
11560
+ throw error;
11561
+ }
11562
+ }
11414
11563
  return {
11415
11564
  vars,
11416
11565
  metadata: {
11417
11566
  harmCategory: harmCategoryLabel,
11418
- pluginId: getShortPluginId(this.harmCategory),
11419
- pluginConfig: this.config
11567
+ pluginId,
11568
+ pluginConfig: this.config,
11569
+ ...inputMaterialization ? { inputMaterialization } : {}
11420
11570
  },
11421
11571
  assert: getHarmfulAssertions(this.harmCategory)
11422
11572
  };
11423
- });
11573
+ }));
11424
11574
  }
11425
11575
  };
11426
11576
  //#endregion
@@ -11429,20 +11579,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
11429
11579
  * Extract content from <Prompt> tags and parse JSON if inputs are defined.
11430
11580
  * Returns the processed prompt and any additional vars extracted from JSON.
11431
11581
  */
11432
- function processPromptForInputs(prompt, _injectVar, inputs) {
11582
+ async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
11433
11583
  let processedPrompt = prompt.trim();
11434
11584
  const additionalVars = {};
11585
+ let additionalMetadata;
11435
11586
  const extractedPrompt = extractPromptFromTags(processedPrompt);
11436
11587
  if (extractedPrompt) processedPrompt = extractedPrompt;
11437
- if (inputs && Object.keys(inputs).length > 0) try {
11438
- const parsed = JSON.parse(processedPrompt);
11439
- Object.assign(additionalVars, extractVariablesFromJson(parsed, inputs));
11440
- } catch {
11441
- logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode");
11588
+ if (inputs && Object.keys(inputs).length > 0) {
11589
+ let parsed;
11590
+ try {
11591
+ parsed = JSON.parse(processedPrompt);
11592
+ } catch (error) {
11593
+ logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
11594
+ }
11595
+ if (parsed) try {
11596
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
11597
+ materializationIndex,
11598
+ pluginId: plugin,
11599
+ provider,
11600
+ purpose
11601
+ });
11602
+ Object.assign(additionalVars, materializedVars.vars);
11603
+ additionalMetadata = materializedVars.metadata;
11604
+ } catch (error) {
11605
+ logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
11606
+ throw error;
11607
+ }
11442
11608
  }
11443
11609
  return {
11444
11610
  processedPrompt,
11445
- additionalVars
11611
+ additionalVars,
11612
+ additionalMetadata
11446
11613
  };
11447
11614
  }
11448
11615
  async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
@@ -11463,15 +11630,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
11463
11630
  };
11464
11631
  const allPrompts = await retryWithDeduplication(generatePrompts, n);
11465
11632
  const inputs = config?.inputs;
11466
- return sampleArray(allPrompts, n).map((prompt) => {
11467
- const { processedPrompt, additionalVars } = processPromptForInputs(prompt, injectVar, inputs);
11633
+ return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
11634
+ const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
11468
11635
  const testCase = createTestCase(injectVar, processedPrompt, plugin);
11469
11636
  if (Object.keys(additionalVars).length > 0) testCase.vars = {
11470
11637
  ...testCase.vars,
11471
11638
  ...additionalVars
11472
11639
  };
11640
+ if (additionalMetadata) testCase.metadata = {
11641
+ ...testCase.metadata,
11642
+ inputMaterialization: additionalMetadata
11643
+ };
11473
11644
  return testCase;
11474
- });
11645
+ }));
11475
11646
  }
11476
11647
  //#endregion
11477
11648
  //#region src/redteam/plugins/teenSafety/graderExamples.ts
@@ -11789,7 +11960,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
11789
11960
  function computeModifiersFromConfig(config) {
11790
11961
  const modifiers = { ...config?.modifiers };
11791
11962
  if (config?.language && typeof config.language === "string") modifiers.language = config.language;
11792
- if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
11963
+ if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
11793
11964
  const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
11794
11965
  if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
11795
11966
  return modifiers;
@@ -11955,6 +12126,31 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
11955
12126
  return [];
11956
12127
  }
11957
12128
  }
12129
+ async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
12130
+ const inputs = config.inputs;
12131
+ if (!inputs || Object.keys(inputs).length === 0) return testCases;
12132
+ return Promise.all(testCases.map(async (testCase, materializationIndex) => {
12133
+ const inputVars = extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
12134
+ if (!inputVars) return testCase;
12135
+ const materializedVars = await materializeInputVariablesWithMetadata(inputVars, inputs, {
12136
+ materializationIndex,
12137
+ pluginId,
12138
+ provider,
12139
+ purpose
12140
+ });
12141
+ return {
12142
+ ...testCase,
12143
+ vars: {
12144
+ ...testCase.vars || {},
12145
+ ...materializedVars.vars
12146
+ },
12147
+ metadata: {
12148
+ ...testCase.metadata || {},
12149
+ ...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
12150
+ }
12151
+ };
12152
+ }));
12153
+ }
11958
12154
  function createPluginFactory(PluginClass, key, validate) {
11959
12155
  return {
11960
12156
  key,
@@ -11965,13 +12161,21 @@ function createPluginFactory(PluginClass, key, validate) {
11965
12161
  logger.debug(`Using local redteam generation for ${key}`);
11966
12162
  return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
11967
12163
  }
11968
- const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
12164
+ const pluginId = getShortPluginId(key);
12165
+ const testCases = await materializeRemoteTestCaseInputs({
12166
+ config: configWithDefaults ?? {},
12167
+ injectVar,
12168
+ pluginId,
12169
+ provider,
12170
+ purpose,
12171
+ testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
12172
+ });
11969
12173
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
11970
12174
  return testCases.map((testCase) => ({
11971
12175
  ...testCase,
11972
12176
  metadata: {
11973
12177
  ...testCase.metadata,
11974
- pluginId: getShortPluginId(key),
12178
+ pluginId,
11975
12179
  pluginConfig: {
11976
12180
  ...configWithDefaults,
11977
12181
  modifiers: computedModifiers
@@ -12028,7 +12232,7 @@ const pluginFactories = [
12028
12232
  key: category,
12029
12233
  action: async (params) => {
12030
12234
  if (neverGenerateRemote()) {
12031
- logger.error(`${category} plugin requires remote generation to be enabled`);
12235
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
12032
12236
  return [];
12033
12237
  }
12034
12238
  const testCases = await getHarmfulTests(params, category);
@@ -12051,13 +12255,21 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
12051
12255
  key: category,
12052
12256
  action: async (params) => {
12053
12257
  if (shouldGenerateRemote()) {
12054
- const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
12258
+ const pluginId = getShortPluginId(category);
12259
+ const testCases = await materializeRemoteTestCaseInputs({
12260
+ config: params.config ?? {},
12261
+ injectVar: params.injectVar,
12262
+ pluginId,
12263
+ provider: params.provider,
12264
+ purpose: params.purpose,
12265
+ testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
12266
+ });
12055
12267
  const computedModifiers = computeModifiersFromConfig(params.config);
12056
12268
  return testCases.map((testCase) => ({
12057
12269
  ...testCase,
12058
12270
  metadata: {
12059
12271
  ...testCase.metadata,
12060
- pluginId: getShortPluginId(category),
12272
+ pluginId,
12061
12273
  pluginConfig: {
12062
12274
  ...params.config,
12063
12275
  modifiers: computedModifiers
@@ -12079,16 +12291,24 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
12079
12291
  key: category,
12080
12292
  action: async (params) => {
12081
12293
  if (neverGenerateRemote()) {
12082
- logger.error(`${category} plugin requires remote generation to be enabled`);
12294
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
12083
12295
  return [];
12084
12296
  }
12085
- const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
12297
+ const pluginId = getShortPluginId(category);
12298
+ const testCases = await materializeRemoteTestCaseInputs({
12299
+ config: params.config ?? {},
12300
+ injectVar: params.injectVar,
12301
+ pluginId,
12302
+ provider: params.provider,
12303
+ purpose: params.purpose,
12304
+ testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
12305
+ });
12086
12306
  const computedModifiers = computeModifiersFromConfig(params.config);
12087
12307
  return testCases.map((testCase) => ({
12088
12308
  ...testCase,
12089
12309
  metadata: {
12090
12310
  ...testCase.metadata,
12091
- pluginId: getShortPluginId(category),
12311
+ pluginId,
12092
12312
  pluginConfig: {
12093
12313
  ...params.config,
12094
12314
  modifiers: computedModifiers
@@ -12101,19 +12321,27 @@ function createRemotePlugin(key, validate) {
12101
12321
  return {
12102
12322
  key,
12103
12323
  validate,
12104
- action: async ({ purpose, injectVar, n, config }) => {
12324
+ action: async ({ provider, purpose, injectVar, n, config }) => {
12105
12325
  const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
12106
12326
  if (neverGenerateRemote()) {
12107
- logger.error(`${key} plugin requires remote generation to be enabled`);
12327
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
12108
12328
  return [];
12109
12329
  }
12110
- const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
12330
+ const pluginId = getShortPluginId(key);
12331
+ const testCases = await materializeRemoteTestCaseInputs({
12332
+ config: configWithDefaults ?? {},
12333
+ injectVar,
12334
+ pluginId,
12335
+ provider,
12336
+ purpose,
12337
+ testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
12338
+ });
12111
12339
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
12112
12340
  const testsWithMetadata = testCases.map((testCase) => ({
12113
12341
  ...testCase,
12114
12342
  metadata: {
12115
12343
  ...testCase.metadata,
12116
- pluginId: getShortPluginId(key),
12344
+ pluginId,
12117
12345
  pluginConfig: {
12118
12346
  ...configWithDefaults,
12119
12347
  modifiers: computedModifiers
@@ -12183,6 +12411,37 @@ function getPolicyText(metadata) {
12183
12411
  return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
12184
12412
  }
12185
12413
  }
12414
+ async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
12415
+ const inputs = testCase.metadata?.pluginConfig?.inputs;
12416
+ const inputMaterialization = testCase.metadata?.inputMaterialization;
12417
+ if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
12418
+ inputMaterialization,
12419
+ vars: testCase.vars
12420
+ };
12421
+ try {
12422
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
12423
+ materializationIndex,
12424
+ pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
12425
+ provider,
12426
+ purpose
12427
+ });
12428
+ return {
12429
+ inputMaterialization: materializedVars.metadata ? {
12430
+ ...inputMaterialization,
12431
+ ...materializedVars.metadata
12432
+ } : inputMaterialization,
12433
+ vars: {
12434
+ ...testCase.vars,
12435
+ ...materializedVars.vars
12436
+ }
12437
+ };
12438
+ } catch {
12439
+ return {
12440
+ inputMaterialization,
12441
+ vars: testCase.vars
12442
+ };
12443
+ }
12444
+ }
12186
12445
  /**
12187
12446
  * Gets the severity level for a plugin based on its ID and configuration.
12188
12447
  * @param pluginId - The ID of the plugin.
@@ -12324,6 +12583,7 @@ const categories = {
12324
12583
  foundation: FOUNDATION_PLUGINS,
12325
12584
  harmful: Object.keys(HARM_PLUGINS),
12326
12585
  "coding-agent:core": CODING_AGENT_CORE_PLUGINS,
12586
+ "coding-agent:all": CODING_AGENT_PLUGINS,
12327
12587
  bias: BIAS_PLUGINS,
12328
12588
  pii: PII_PLUGINS,
12329
12589
  medical: MEDICAL_PLUGINS,
@@ -12418,7 +12678,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
12418
12678
  * @param injectVar - The variable to inject.
12419
12679
  * @returns An array of new test cases generated by strategies.
12420
12680
  */
12421
- async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
12681
+ async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
12422
12682
  const newTestCases = [];
12423
12683
  const strategyResults = {};
12424
12684
  for (const strategy of strategies) {
@@ -12474,14 +12734,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
12474
12734
  }
12475
12735
  }
12476
12736
  resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
12477
- newTestCases.push(...resultTestCases.map((t) => {
12478
- const inputs = t?.metadata?.pluginConfig?.inputs;
12479
- let updatedVars = t.vars;
12480
- if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
12481
- const parsed = JSON.parse(String(t.vars[injectVar]));
12482
- updatedVars = { ...t.vars };
12483
- Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
12484
- } catch {}
12737
+ newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
12738
+ const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
12485
12739
  const strategyConfig = {
12486
12740
  ...strategy.config || {},
12487
12741
  ...maxCharsPerMessage ? { maxCharsPerMessage } : {},
@@ -12489,16 +12743,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
12489
12743
  };
12490
12744
  return {
12491
12745
  ...t,
12492
- vars: updatedVars,
12746
+ vars,
12493
12747
  metadata: {
12494
12748
  ...t?.metadata || {},
12495
12749
  ...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
12496
12750
  ...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
12497
12751
  ...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
12752
+ ...inputMaterialization && { inputMaterialization },
12498
12753
  ...Object.keys(strategyConfig).length > 0 && { strategyConfig }
12499
12754
  }
12500
12755
  };
12501
- }));
12756
+ })));
12502
12757
  const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
12503
12758
  const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
12504
12759
  const applyNumTestsCap = (calculatedRequested) => {
@@ -12953,7 +13208,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12953
13208
  targetIds,
12954
13209
  ...retryStrategy.config
12955
13210
  };
12956
- const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
13211
+ const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
12957
13212
  pluginTestCases.push(...retryTestCases);
12958
13213
  Object.assign(strategyResults, retryResults);
12959
13214
  if (showProgressBar) progressBar?.increment(retryTestCases.length);
@@ -12961,7 +13216,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12961
13216
  checkAbort();
12962
13217
  const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
12963
13218
  if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
12964
- const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
13219
+ const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
12965
13220
  Object.assign(strategyResults, otherStrategyResults);
12966
13221
  if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
12967
13222
  const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
@@ -13854,6 +14109,10 @@ function stripAuthFromUrl(urlString) {
13854
14109
  }
13855
14110
  }
13856
14111
  async function handleEmailCollection(evalRecord) {
14112
+ if (evalRecord.author) {
14113
+ logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
14114
+ return;
14115
+ }
13857
14116
  if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
13858
14117
  let email = getUserEmail();
13859
14118
  if (!email) {
@@ -13997,6 +14256,14 @@ function shouldShareResults(opts) {
13997
14256
  return cloudConfig.isEnabled() && sharing !== false;
13998
14257
  }
13999
14258
  //#endregion
14259
+ //#region src/commands/eval/redteamWarning.ts
14260
+ function warnIfRedteamConfigHasNoTests(config, testSuite) {
14261
+ if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
14262
+ Warning: Config file has a redteam section but no test cases.
14263
+ Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
14264
+ `));
14265
+ }
14266
+ //#endregion
14000
14267
  //#region src/util/formatDuration.ts
14001
14268
  /**
14002
14269
  * Formats a duration in seconds into a human-readable string
@@ -14437,14 +14704,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
14437
14704
  state.resume = true;
14438
14705
  state.retryMode = true;
14439
14706
  } else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
14440
- if (!cmdObj.envPath && commandLineOptions?.envPath) {
14707
+ if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
14441
14708
  logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
14442
14709
  setupEnv(commandLineOptions.envPath);
14443
14710
  }
14444
- if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
14445
- Warning: Config file has a redteam section but no test cases.
14446
- Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
14447
- `));
14711
+ warnIfRedteamConfigHasNoTests(config, testSuite);
14448
14712
  if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
14449
14713
  const maybeUrl = config.providers[0]?.config?.url;
14450
14714
  if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
@@ -14552,7 +14816,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
14552
14816
  ${z.prettifyError(testSuiteSchema.error)}
14553
14817
 
14554
14818
  Please review your promptfooconfig.yaml configuration.`));
14555
- const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, { runtimeOptions: options }) : new Eval(config, { runtimeOptions: options });
14819
+ const author = getAuthor();
14820
+ const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
14821
+ author,
14822
+ runtimeOptions: options
14823
+ }) : new Eval(config, {
14824
+ author,
14825
+ runtimeOptions: options
14826
+ });
14556
14827
  const abortController = new AbortController();
14557
14828
  const previousAbortSignal = evaluateOptions.abortSignal;
14558
14829
  evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
@@ -14954,6 +15225,12 @@ async function doRedteamRun(options) {
14954
15225
  return evalResult;
14955
15226
  }
14956
15227
  //#endregion
15228
+ //#region src/types/transform.ts
15229
+ /** Runtime type guard for `TransformFunction` values. */
15230
+ function isTransformFunction(value) {
15231
+ return typeof value === "function";
15232
+ }
15233
+ //#endregion
14957
15234
  //#region src/index.ts
14958
15235
  /**
14959
15236
  * Shallow-clone a test case so the caller can swap in resolved ApiProvider
@@ -14975,66 +15252,148 @@ function cloneTestForResolve(test) {
14975
15252
  if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
14976
15253
  return cloned;
14977
15254
  }
15255
+ function toSerializableProviderRef(provider) {
15256
+ if (isApiProvider(provider)) return sanitizeProvider(provider);
15257
+ if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
15258
+ return provider;
15259
+ }
15260
+ function isRecord(value) {
15261
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
15262
+ }
15263
+ function withSerializableProvider(record) {
15264
+ if (!isApiProvider(record.provider)) return record;
15265
+ return {
15266
+ ...record,
15267
+ provider: sanitizeProvider(record.provider)
15268
+ };
15269
+ }
15270
+ /**
15271
+ * Function-valued transforms are first-class at runtime but are silently dropped
15272
+ * by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
15273
+ * a function reference, so replace every `transform`-like field with a
15274
+ * `[inline function]: name` marker. Non-function values pass through unchanged.
15275
+ *
15276
+ * `droppedRef.value` is flipped to `true` the first time a function is replaced
15277
+ * so the caller can emit a single warning instead of logging per field.
15278
+ */
15279
+ function replaceFunctionTransforms(record, droppedRef) {
15280
+ let result;
15281
+ for (const key of TRANSFORM_KEYS) {
15282
+ const value = record[key];
15283
+ if (!isTransformFunction(value)) continue;
15284
+ if (!result) result = { ...record };
15285
+ result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
15286
+ droppedRef.value = true;
15287
+ }
15288
+ return result ?? record;
15289
+ }
15290
+ function toSerializableAssertion(assertion, droppedRef) {
15291
+ if (!isRecord(assertion)) return assertion;
15292
+ let sanitizedAssertion = withSerializableProvider(assertion);
15293
+ sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
15294
+ if (Array.isArray(assertion.assert)) sanitizedAssertion = {
15295
+ ...sanitizedAssertion,
15296
+ assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
15297
+ };
15298
+ return sanitizedAssertion;
15299
+ }
15300
+ function toSerializableTestCase(test, droppedRef) {
15301
+ if (!isRecord(test)) return test;
15302
+ let sanitizedTest = withSerializableProvider(test);
15303
+ if (isRecord(test.options)) {
15304
+ let options = withSerializableProvider(test.options);
15305
+ options = replaceFunctionTransforms(options, droppedRef);
15306
+ if (options !== test.options) sanitizedTest = {
15307
+ ...sanitizedTest,
15308
+ options
15309
+ };
15310
+ }
15311
+ if (Array.isArray(test.assert)) sanitizedTest = {
15312
+ ...sanitizedTest,
15313
+ assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
15314
+ };
15315
+ return sanitizedTest;
15316
+ }
15317
+ function toSerializableScenario(scenario, droppedRef) {
15318
+ if (!isRecord(scenario)) return scenario;
15319
+ if (!Array.isArray(scenario.tests)) return scenario;
15320
+ return {
15321
+ ...scenario,
15322
+ tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
15323
+ };
15324
+ }
15325
+ function createSerializableUnifiedConfig(testSuite, prompts) {
15326
+ const droppedRef = { value: false };
15327
+ const config = {
15328
+ ...testSuite,
15329
+ providers: toSerializableProviderRef(testSuite.providers),
15330
+ defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
15331
+ tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
15332
+ scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
15333
+ prompts
15334
+ };
15335
+ if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
15336
+ return config;
15337
+ }
14978
15338
  async function evaluate(testSuite, options = {}) {
14979
- if (testSuite.writeLatestResults) await runDbMigrations();
14980
- const loadedProviders = await loadApiProviders(testSuite.providers, { env: testSuite.env });
15339
+ const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
15340
+ if (testSuiteConfig.writeLatestResults) await runDbMigrations();
15341
+ const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
14981
15342
  const providerMap = {};
14982
15343
  for (const p of loadedProviders) {
14983
15344
  providerMap[p.id()] = p;
14984
15345
  if (p.label) providerMap[p.label] = p;
14985
15346
  }
14986
- let resolvedDefaultTest = testSuite.defaultTest;
14987
- if (typeof testSuite.defaultTest === "string" && testSuite.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuite.defaultTest);
15347
+ let resolvedDefaultTest = testSuiteConfig.defaultTest;
15348
+ if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
14988
15349
  const constructedTestSuite = {
14989
- ...testSuite,
15350
+ ...testSuiteConfig,
14990
15351
  defaultTest: resolvedDefaultTest,
14991
- scenarios: testSuite.scenarios,
15352
+ scenarios: testSuiteConfig.scenarios,
14992
15353
  providers: loadedProviders,
14993
- tests: await readTests(testSuite.tests),
14994
- nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
14995
- prompts: await processPrompts(testSuite.prompts)
15354
+ tests: await readTests(testSuiteConfig.tests),
15355
+ nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
15356
+ prompts: await processPrompts(testSuiteConfig.prompts)
14996
15357
  };
14997
15358
  if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
14998
15359
  constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
14999
15360
  if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
15000
- env: testSuite.env,
15361
+ env: testSuiteConfig.env,
15001
15362
  basePath: state.basePath
15002
15363
  });
15003
15364
  if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
15004
- env: testSuite.env,
15365
+ env: testSuiteConfig.env,
15005
15366
  basePath: state.basePath
15006
15367
  });
15007
15368
  }
15008
15369
  constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
15009
15370
  for (const test of constructedTestSuite.tests) {
15010
15371
  if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
15011
- env: testSuite.env,
15372
+ env: testSuiteConfig.env,
15012
15373
  basePath: state.basePath
15013
15374
  });
15014
15375
  for (const assertion of test.assert || []) {
15015
15376
  if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
15016
15377
  if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
15017
- env: testSuite.env,
15378
+ env: testSuiteConfig.env,
15018
15379
  basePath: state.basePath
15019
15380
  });
15020
15381
  }
15021
15382
  }
15022
15383
  if (options.cache === false) disableCache();
15023
- const parsedProviderPromptMap = readProviderPromptMap(testSuite, constructedTestSuite.prompts);
15024
- const unifiedConfig = {
15025
- ...testSuite,
15026
- prompts: constructedTestSuite.prompts
15027
- };
15028
- const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
15384
+ const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
15385
+ const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
15386
+ const author = getAuthor(suiteAuthor);
15387
+ const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
15029
15388
  const ret = await evaluate$1({
15030
15389
  ...constructedTestSuite,
15031
15390
  providerPromptMap: parsedProviderPromptMap
15032
15391
  }, evalRecord, {
15033
15392
  eventSource: "library",
15034
- isRedteam: Boolean(testSuite.redteam),
15393
+ isRedteam: Boolean(testSuiteConfig.redteam),
15035
15394
  ...options
15036
15395
  });
15037
- if (testSuite.writeLatestResults && testSuite.sharing) if (isSharingEnabled(ret)) try {
15396
+ if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
15038
15397
  const shareableUrl = await createShareableUrl(ret, { silent: true });
15039
15398
  if (shareableUrl) {
15040
15399
  ret.shareableUrl = shareableUrl;
@@ -15045,9 +15404,9 @@ async function evaluate(testSuite, options = {}) {
15045
15404
  logger.warn(`Failed to create shareable URL: ${error}`);
15046
15405
  }
15047
15406
  else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
15048
- if (testSuite.outputPath) {
15049
- if (typeof testSuite.outputPath === "string") await writeOutput(testSuite.outputPath, evalRecord, null);
15050
- else if (Array.isArray(testSuite.outputPath)) await writeMultipleOutputs(testSuite.outputPath, evalRecord, null);
15407
+ if (testSuiteConfig.outputPath) {
15408
+ if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
15409
+ else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
15051
15410
  }
15052
15411
  return ret;
15053
15412
  }
@@ -15076,6 +15435,6 @@ var src_default = {
15076
15435
  redteam
15077
15436
  };
15078
15437
  //#endregion
15079
- export { AssertionOrSetSchema, AssertionSchema, AssertionSetSchema, AssertionTypeSchema, AtomicTestCaseSchema, BaseAssertionTypesSchema, BaseTokenUsageSchema, CommandLineOptionsSchema, CompletedPromptSchema, CompletionTokenDetailsSchema, ConversationMessageSchema, DerivedMetricSchema, EvalResultsFilterMode, EvaluateOptionsSchema, GradingConfigSchema, InputsSchema, NotPrefixedAssertionTypesSchema, OutputConfigSchema, OutputFileExtension, PartialGenerationError, PluginConfigSchema, PolicyObjectSchema, ProvidersSchema, ResultFailureReason, ScenarioSchema, SpecialAssertionTypesSchema, StrategyConfigSchema, TestCaseSchema, TestCaseWithVarsFileSchema, TestCasesWithMetadataPromptSchema, TestCasesWithMetadataSchema, TestGeneratorConfigSchema, TestSuiteConfigSchema, TestSuiteSchema, UnifiedConfigSchema, VarsSchema, assertions_default as assertions, cache_exports as cache, src_default as default, evaluate, generateTable, guardrails, isApiProvider, isGradingResult, isProviderOptions, isResultFailureReason, loadApiProvider, redteam };
15438
+ export { AssertionOrSetSchema, AssertionSchema, AssertionSetSchema, AssertionTypeSchema, AtomicTestCaseSchema, BaseAssertionTypesSchema, BaseTokenUsageSchema, CommandLineOptionsSchema, CompletedPromptSchema, CompletionTokenDetailsSchema, ConversationMessageSchema, DerivedMetricSchema, DocumentMediaInjectionPlacementSchema, DocumentMediaInjectionPlacementValues, DocxInjectionPlacementSchema, DocxInjectionPlacementValues, EvalResultsFilterMode, EvaluateOptionsSchema, GradingConfigSchema, InputConfigSchema, InputDefinitionObjectSchema, InputDefinitionSchema, InputTypeSchema, InputTypeValues, InputsSchema, NotPrefixedAssertionTypesSchema, OutputConfigSchema, OutputFileExtension, PartialGenerationError, PluginConfigSchema, PolicyObjectSchema, ProvidersSchema, ResultFailureReason, ScenarioSchema, SpecialAssertionTypesSchema, StrategyConfigSchema, TestCaseSchema, TestCaseWithVarsFileSchema, TestCasesWithMetadataPromptSchema, TestCasesWithMetadataSchema, TestGeneratorConfigSchema, TestSuiteConfigSchema, TestSuiteSchema, UnifiedConfigSchema, VarsSchema, assertions_default as assertions, buildInputPromptDescription, cache_exports as cache, src_default as default, evaluate, generateTable, getInputDescription, getInputType, guardrails, isApiProvider, isGradingResult, isProviderOptions, isResultFailureReason, isTransformFunction, loadApiProvider, normalizeInputDefinition, normalizeInputs, redteam };
15080
15439
 
15081
15440
  //# sourceMappingURL=index.js.map