promptfoo 0.121.5 → 0.121.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (490) hide show
  1. package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
  2. package/dist/src/{accounts-BIFntVWB.cjs → accounts-BVz5gHLK.cjs} +16 -10
  3. package/dist/src/{accounts-bnyHT7Ju.js → accounts-BWjqwsrf.js} +12 -7
  4. package/dist/src/{accounts-CLJHCDDb.js → accounts-D6IBfEE0.js} +13 -8
  5. package/dist/src/{accounts-CaLNYnf7.js → accounts-DAv_0iE7.js} +12 -7
  6. package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-BJKAkz2e.js} +2 -2
  7. package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-C-A92xhn.cjs} +8 -7
  8. package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-DmoS_S4B.js} +2 -2
  9. package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-GdToujHu.js} +2 -2
  10. package/dist/src/{agents-BGqaTDnr.js → agents-2C8NN6I1.js} +20 -9
  11. package/dist/src/{agents-n6vPqV3i.js → agents-BMAiSR2o.js} +4 -4
  12. package/dist/src/{agents-BYdMl1UE.js → agents-C-PGaxwj.js} +20 -9
  13. package/dist/src/{agents-BV9yFpXX.js → agents-C98cz5pl.js} +20 -9
  14. package/dist/src/{agents-WULPVjbH.cjs → agents-DB8Ub2Ld.cjs} +7 -6
  15. package/dist/src/{agents-DhxWMCtH.js → agents-Dwshy2H8.js} +4 -4
  16. package/dist/src/{agents-emVcx3yh.js → agents-mlKjx-cK.js} +2 -2
  17. package/dist/src/{agents-DiWmQYH9.cjs → agents-n2ej-c4H.cjs} +21 -10
  18. package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-Bi-laUlp.js} +6 -6
  19. package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-D0OFV4Vj.cjs} +7 -7
  20. package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-WyUK0wYy.js} +6 -6
  21. package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-v-63ZjEI.js} +6 -6
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-sCXUfaWo.js +1 -0
  24. package/dist/src/app/assets/index-BopgkZEh.js +388 -0
  25. package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
  26. package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
  27. package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
  28. package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
  29. package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
  30. package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
  31. package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
  32. package/dist/src/app/index.html +6 -6
  33. package/dist/src/{audio-CScmnmEB.js → audio-DnEIHWZR.js} +3 -3
  34. package/dist/src/{audio-Da8U9IS5.js → audio-heR0mu0n.js} +3 -3
  35. package/dist/src/{audio-BvpTOArF.js → audio-wafFO1wn.js} +3 -3
  36. package/dist/src/{audio-C0vDeS0j.cjs → audio-x44tsxIo.cjs} +4 -4
  37. package/dist/src/authoritativeMarkupInjection-2G3Be6oL.cjs +74 -0
  38. package/dist/src/authoritativeMarkupInjection-81PPynHR.js +75 -0
  39. package/dist/src/authoritativeMarkupInjection-D9O70HPi.js +74 -0
  40. package/dist/src/authoritativeMarkupInjection-PxSf3Rh8.js +74 -0
  41. package/dist/src/{base-dYsl2hmL.js → base-0Gzzue9Z.js} +4 -3
  42. package/dist/src/{base-BOMaNEes.js → base-Bp4c52YZ.js} +4 -3
  43. package/dist/src/{base-BTux96b1.js → base-BuNn-YBX.js} +4 -3
  44. package/dist/src/{base-Tw6uhH8K.cjs → base-BzdS8tod.cjs} +5 -4
  45. package/dist/src/bestOfN-BHiOjeaq.js +136 -0
  46. package/dist/src/bestOfN-CdkNPPbX.js +136 -0
  47. package/dist/src/bestOfN-ClUSOhN0.js +137 -0
  48. package/dist/src/bestOfN-DWwXEg1h.cjs +140 -0
  49. package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
  50. package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
  51. package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
  52. package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
  53. package/dist/src/cache-BKYi3kAA.js +3 -0
  54. package/dist/src/{cache-BI5BY7ey.js → cache-BR77mdIR.js} +127 -11
  55. package/dist/src/{cache-Cr-qWIbP.js → cache-CrioYnaa.js} +125 -10
  56. package/dist/src/{cache-Bzttsk0X.js → cache-DdriHsNX.js} +125 -10
  57. package/dist/src/{cache-DGg-yTZG.cjs → cache-h5MWOBZI.cjs} +135 -25
  58. package/dist/src/{chat-Cx_LkwvZ.js → chat-BBEnnpQk.js} +11 -11
  59. package/dist/src/{chat-DChSH_Es.js → chat-BSos6PvZ.js} +9 -9
  60. package/dist/src/{chat-aMQZw6R7.js → chat-Bnt7ieO0.js} +4 -4
  61. package/dist/src/{chat-vYqqv1gP.cjs → chat-DSyYuTYT.cjs} +14 -13
  62. package/dist/src/{chat-BLOdH60v.js → chat-DTdf-J5Q.js} +11 -11
  63. package/dist/src/{chat-DH97tVV9.cjs → chat-Dq3DomYU.cjs} +6 -6
  64. package/dist/src/{chat-DG2LkwLq.js → chat-g5QLeLOo.js} +4 -4
  65. package/dist/src/{chat-D9nudO9b.js → chat-mTTuUAYb.js} +4 -4
  66. package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
  67. package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
  68. package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
  69. package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
  70. package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-BGUac_kS.js} +213 -66
  71. package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-BP__YGfK.js} +213 -66
  72. package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-Ddgx5BIQ.js} +212 -65
  73. package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-n2XcEclh.cjs} +219 -71
  74. package/dist/src/cloud-DiWbUiVP.js +3 -0
  75. package/dist/src/{cloud-Da0bofJd.js → cloud-DqF5N1aJ.js} +2 -2
  76. package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-BGamMotN.js} +4 -4
  77. package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-BwW8W-w7.js} +4 -4
  78. package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-DCRGnsyL.cjs} +6 -6
  79. package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-sD26nP6V.js} +4 -4
  80. package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-2lnOT4qM.js} +3 -3
  81. package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-B1-8KNCt.js} +3 -3
  82. package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-CCC1DFrC.cjs} +5 -5
  83. package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-DOghiU6r.js} +3 -3
  84. package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-BWeWamEb.js} +19 -4
  85. package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-DyQB1P0p.js} +20 -5
  86. package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-R9u_G7W9.cjs} +27 -11
  87. package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-ZQRJSJjU.js} +20 -5
  88. package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-B62H0fe7.js} +16 -5
  89. package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-BRPUvJG8.cjs} +31 -12
  90. package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-Dio1zJBS.js} +18 -7
  91. package/dist/src/codex-sdk-DxukZs_K.js +3 -0
  92. package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-iEmW1eS2.js} +18 -7
  93. package/dist/src/{cometapi-B5ImDlSm.js → cometapi--hh7dESS.js} +7 -7
  94. package/dist/src/{cometapi-CC7hWxmX.js → cometapi-C7yWNGqt.js} +7 -7
  95. package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CSIi16a0.js} +7 -7
  96. package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-sZgBZtbU.cjs} +8 -8
  97. package/dist/src/{completion-DtQ72Bm3.cjs → completion-CWtqdn3z.cjs} +6 -6
  98. package/dist/src/{completion-Vq_ad618.js → completion-DT8cxo9T.js} +4 -4
  99. package/dist/src/{completion-2iuYVxwi.js → completion-DUScduXp.js} +5 -5
  100. package/dist/src/{completion-CrD6MQ93.js → completion-D_2IOAoS.js} +5 -5
  101. package/dist/src/constants-BjJV0cRr.js +6 -0
  102. package/dist/src/constants-DH5XYLKZ.js +7 -0
  103. package/dist/src/constants-DZGEFLsu.js +6 -0
  104. package/dist/src/constants-a2kYssQk.cjs +11 -0
  105. package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
  106. package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
  107. package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
  108. package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
  109. package/dist/src/crescendo-B41TwUHM.js +722 -0
  110. package/dist/src/crescendo-Bfic7KC4.js +724 -0
  111. package/dist/src/crescendo-DXFB7rHP.cjs +725 -0
  112. package/dist/src/crescendo-DvvYxMLA.js +723 -0
  113. package/dist/src/custom-BdzuqFTN.js +619 -0
  114. package/dist/src/custom-Cf5Q6r-P.js +618 -0
  115. package/dist/src/custom-DLgufezC.js +620 -0
  116. package/dist/src/custom-LEXHCRe3.cjs +621 -0
  117. package/dist/src/{docker--3qzPa-6.js → docker-BOSO_6hK.js} +5 -5
  118. package/dist/src/{docker-D3AY-5F5.cjs → docker-D0h2vFrc.cjs} +6 -6
  119. package/dist/src/{docker-DCsCDvwM.js → docker-D2TWGyTP.js} +5 -5
  120. package/dist/src/{docker-Dorv4_Dg.js → docker-Vj_4_cPg.js} +5 -5
  121. package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BQIApR18.cjs} +6 -6
  122. package/dist/src/{embedding-ChS1ivFS.js → embedding-CStK0TV6.js} +5 -5
  123. package/dist/src/{embedding-DNRvZwRN.js → embedding-CU78FMnw.js} +5 -5
  124. package/dist/src/{embedding-D_bI4NDq.js → embedding-CV8lmCnU.js} +4 -4
  125. package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
  126. package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
  127. package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
  128. package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
  129. package/dist/src/{eval-DJ_4A-tr.js → eval-B3BaNBbO.js} +19 -19
  130. package/dist/src/{eval-BQPLBJbw.js → eval-DfR9885C.js} +1 -1
  131. package/dist/src/evalResult-BtZSUgQv.js +3 -0
  132. package/dist/src/{evalResult-D6P5I5il.js → evalResult-CcSqNl_Y.js} +17 -10
  133. package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-DHXs-9TL.cjs} +29 -11
  134. package/dist/src/{evalResult-pSvGWFMo.js → evalResult-PGqEbasb.js} +18 -11
  135. package/dist/src/evaluator-B9LGbKI8.js +3 -0
  136. package/dist/src/{evaluator-D-UIbbYq.js → evaluator-n_dEb00o.js} +258 -132
  137. package/dist/src/evaluatorHelpers-CrRObe2z.js +510 -0
  138. package/dist/src/evaluatorHelpers-D06I9WFL.cjs +537 -0
  139. package/dist/src/evaluatorHelpers-D1_kwvyp.js +511 -0
  140. package/dist/src/{extractor-DxyiFhPk.js → extractor-BVkZtk4R.js} +5 -5
  141. package/dist/src/{extractor-BM3jRERL.js → extractor-D3Fv_Tdh.js} +5 -5
  142. package/dist/src/{extractor-YlZbUMsL.js → extractor-Jp53vs-6.js} +5 -5
  143. package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-bV_NOoaz.cjs} +6 -6
  144. package/dist/src/{fetch-Y5qX_kST.js → fetch-BSSAcMxf.js} +90 -26
  145. package/dist/src/fetch-BodQTrMU.js +3 -0
  146. package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Cfm4EuOB.cjs} +115 -32
  147. package/dist/src/{fetch-B6ch2nU2.js → fetch-Cpf1U1nO.js} +86 -26
  148. package/dist/src/{fetch-D9xxyC1p.js → fetch-Doks14zQ.js} +90 -26
  149. package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
  150. package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
  151. package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
  152. package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
  153. package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
  154. package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
  155. package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
  156. package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
  157. package/dist/src/goat-Ba7Gznzq.js +467 -0
  158. package/dist/src/goat-CJngS-WU.js +468 -0
  159. package/dist/src/goat-CwsbfQeu.js +466 -0
  160. package/dist/src/goat-DOMbozoX.cjs +470 -0
  161. package/dist/src/graders-B3D7kCcD.js +3 -0
  162. package/dist/src/{graders-CgPn32yp.js → graders-BQt1BaQe.js} +1019 -86
  163. package/dist/src/{graders-Bw1wk_21.cjs → graders-CBQ2s6gz.cjs} +1087 -130
  164. package/dist/src/{graders-CwrbifOo.js → graders-DaRU98zs.js} +1022 -84
  165. package/dist/src/{graders-BoUqsCEm.js → graders-KXzjnIim.js} +1024 -86
  166. package/dist/src/hydra-BU6GjYoQ.js +560 -0
  167. package/dist/src/hydra-BmXHxlyl.js +559 -0
  168. package/dist/src/hydra-CR0KyYDb.js +561 -0
  169. package/dist/src/hydra-RO9jBks7.cjs +560 -0
  170. package/dist/src/{image-BeWaInPF.js → image-BBmZdKO3.js} +3 -3
  171. package/dist/src/{image-qjO6FWPs.js → image-B_PFG7IG.js} +3 -3
  172. package/dist/src/image-BtODOZjh.js +442 -0
  173. package/dist/src/image-CYuNJIqd.js +442 -0
  174. package/dist/src/{image-D10dNAav.cjs → image-E00WFZkq.cjs} +4 -4
  175. package/dist/src/image-HK2Cfqb1.cjs +465 -0
  176. package/dist/src/{image-Dr_3I3nK.js → image-LGj8dTcr.js} +3 -3
  177. package/dist/src/image-YMKejC0r.js +443 -0
  178. package/dist/src/index.cjs +711 -339
  179. package/dist/src/index.d.cts +3159 -1611
  180. package/dist/src/index.d.ts +3158 -1610
  181. package/dist/src/index.js +605 -249
  182. package/dist/src/indirectWebPwn-B80dLlFC.js +260 -0
  183. package/dist/src/indirectWebPwn-BMTXXznx.js +386 -0
  184. package/dist/src/indirectWebPwn-BZFPV7Q9.js +385 -0
  185. package/dist/src/indirectWebPwn-BaEQEOIO.cjs +260 -0
  186. package/dist/src/indirectWebPwn-BzpyMnFS.js +259 -0
  187. package/dist/src/indirectWebPwn-CiWB-vVH.js +385 -0
  188. package/dist/src/indirectWebPwn-D7NA9Nsv.cjs +397 -0
  189. package/dist/src/indirectWebPwn-o_bEFMjP.js +259 -0
  190. package/dist/src/inputVariables-B0qUChbV.js +467 -0
  191. package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
  192. package/dist/src/inputVariables-DXFdi7AI.js +468 -0
  193. package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
  194. package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
  195. package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
  196. package/dist/src/iterative-B-l0syBV.js +501 -0
  197. package/dist/src/iterative-BACUeCCz.cjs +503 -0
  198. package/dist/src/iterative-CVwoExo8.js +502 -0
  199. package/dist/src/iterative-QDrGSyss.js +500 -0
  200. package/dist/src/iterativeImage-BQg2OwA6.js +413 -0
  201. package/dist/src/iterativeImage-Bwn0fM75.js +413 -0
  202. package/dist/src/iterativeImage-CcgVyASo.cjs +415 -0
  203. package/dist/src/iterativeImage-D_UbQXg4.js +414 -0
  204. package/dist/src/iterativeMeta-CIu-CHRS.js +405 -0
  205. package/dist/src/iterativeMeta-CN8CNjFA.js +406 -0
  206. package/dist/src/iterativeMeta-DN6BTjpq.js +404 -0
  207. package/dist/src/iterativeMeta-DlqY3BsS.cjs +405 -0
  208. package/dist/src/iterativeTree-CJ8a8G9T.js +820 -0
  209. package/dist/src/iterativeTree-DcuFXnjL.js +819 -0
  210. package/dist/src/iterativeTree-FrDDYAN0.js +818 -0
  211. package/dist/src/iterativeTree-LOgAi0nU.cjs +821 -0
  212. package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-CdbcGBZF.cjs} +25 -11
  213. package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-CjOXp6Lr.js} +23 -9
  214. package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-DxAq4n4z.js} +24 -10
  215. package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-o_wTLzrt.js} +24 -10
  216. package/dist/src/{litellm-DRc4qWfc.js → litellm-B2gHwya_.js} +4 -4
  217. package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CPpdlO7n.cjs} +5 -5
  218. package/dist/src/{litellm-BLSiANhk.js → litellm-CYfgxLrM.js} +4 -4
  219. package/dist/src/{litellm-DQGo_juI.js → litellm-ojWBKU3C.js} +4 -4
  220. package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
  221. package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-BCCO9XXG.cjs} +9 -8
  222. package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BPrdihAb.js} +5 -5
  223. package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-DP5N79lB.js} +5 -5
  224. package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-DTDyoAlM.js} +5 -5
  225. package/dist/src/main.js +727 -234
  226. package/dist/src/memoryPoisoning-B6N3us35.cjs +106 -0
  227. package/dist/src/memoryPoisoning-Bc_BK_k2.js +106 -0
  228. package/dist/src/memoryPoisoning-D375zwSX.js +107 -0
  229. package/dist/src/memoryPoisoning-DJA0YjJT.js +106 -0
  230. package/dist/src/{messages-CewuNcNS.js → messages-Bg29Nbit.js} +17 -9
  231. package/dist/src/{messages-BnsVHUnm.cjs → messages-BrZEnHsV.cjs} +24 -15
  232. package/dist/src/{messages-CI69Lasb.js → messages-CBulRaud.js} +18 -10
  233. package/dist/src/{messages-B9dSjrNf.js → messages-K9A8RxBM.js} +18 -10
  234. package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
  235. package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
  236. package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
  237. package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
  238. package/dist/src/mischievousUser-BDLwSGN0.js +46 -0
  239. package/dist/src/mischievousUser-Blx-OtT3.js +47 -0
  240. package/dist/src/mischievousUser-CHuTTvBg.js +46 -0
  241. package/dist/src/mischievousUser-CSUrH3fq.cjs +46 -0
  242. package/dist/src/{modelslab-zpz9JcK0.js → modelslab-BjEk7yCP.js} +6 -6
  243. package/dist/src/{modelslab-BCLOtfek.js → modelslab-CBCrdIBc.js} +6 -6
  244. package/dist/src/{modelslab-D73OnKSx.js → modelslab-CrGpXuhv.js} +6 -6
  245. package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-Cvh0klQx.cjs} +7 -7
  246. package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-CUDDZcSA.js} +5 -5
  247. package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-CyDESu5J.js} +5 -5
  248. package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-DH_Ksu6X.cjs} +9 -8
  249. package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-YQAqUYJY.js} +5 -5
  250. package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-BVFFB7JZ.js} +2 -2
  251. package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-Bg3cxMMu.cjs} +5 -4
  252. package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-BhmnRGyh.js} +3 -3
  253. package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-DxmK95c1.js} +3 -3
  254. package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
  255. package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
  256. package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
  257. package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-CHQaZi_-.cjs} +17 -15
  258. package/dist/src/{openclaw-DA8U4DsD.js → openclaw-CWOm2eOr.js} +9 -8
  259. package/dist/src/{openclaw-DObVgpjC.js → openclaw-Cdk9JKfY.js} +9 -8
  260. package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-DhF8rUpI.js} +9 -8
  261. package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-BRjiPV-g.js} +38 -3
  262. package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-CPFMw0ed.cjs} +45 -9
  263. package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-CaSOCsBA.js} +39 -4
  264. package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-CqkyG8De.js} +39 -4
  265. package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
  266. package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
  267. package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
  268. package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
  269. package/dist/src/packageParser--MWTSrPW.js +36 -0
  270. package/dist/src/packageParser-CgE-ziRo.js +35 -0
  271. package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
  272. package/dist/src/packageParser-hwwSGnAZ.js +35 -0
  273. package/dist/src/processShim-BBxt7LKO.js +95 -0
  274. package/dist/src/processShim-BcGzU8fY.js +94 -0
  275. package/dist/src/processShim-C_z3aRvF.js +94 -0
  276. package/dist/src/processShim-DSY9BV2T.cjs +98 -0
  277. package/dist/src/promptLength-0qIHyhA5.js +71 -0
  278. package/dist/src/promptLength-4X-Wd8PG.js +72 -0
  279. package/dist/src/promptLength-B9nZEfO6.js +71 -0
  280. package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
  281. package/dist/src/promptfoo-BU4_0J85.js +238 -0
  282. package/dist/src/promptfoo-CReYAtfb.js +237 -0
  283. package/dist/src/promptfoo-ClChwT74.cjs +292 -0
  284. package/dist/src/promptfoo-CqEpj6Sr.js +237 -0
  285. package/dist/src/prompts-BYMtqPCw.js +259 -0
  286. package/dist/src/prompts-C-bqE1Yp.js +260 -0
  287. package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
  288. package/dist/src/prompts-DHhQsANy.js +259 -0
  289. package/dist/src/prompts-D_QpZ2Dm.js +271 -0
  290. package/dist/src/prompts-hNvWBD3z.cjs +284 -0
  291. package/dist/src/prompts-huDVH2CI.js +270 -0
  292. package/dist/src/prompts-p78Hul5i.cjs +289 -0
  293. package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
  294. package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
  295. package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
  296. package/dist/src/{providers-DT-GtF2t.js → providers-B7TyByfj.js} +789 -11925
  297. package/dist/src/{providers-CJh7iriU.js → providers-BGc7tDtQ.js} +796 -11872
  298. package/dist/src/{providers-Ctcc592x.js → providers-CSOp-bCm.js} +1 -1
  299. package/dist/src/{providers-eDShy16E.cjs → providers-V6RBuieY.cjs} +837 -12138
  300. package/dist/src/{providers-DRrerKra.js → providers-iH3Sw1yo.js} +851 -11938
  301. package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
  302. package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
  303. package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
  304. package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
  305. package/dist/src/{quiverai-CPKhWgaT.js → quiverai-58BtRRet.js} +3 -3
  306. package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-C1dYu5MW.js} +3 -3
  307. package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CNMIpZQg.js} +3 -3
  308. package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-fmSfU43k.cjs} +4 -4
  309. package/dist/src/registry-6Jw6ebor.cjs +124 -0
  310. package/dist/src/registry-BTGk2ZkB.js +124 -0
  311. package/dist/src/registry-BTNqmP5o.js +125 -0
  312. package/dist/src/registry-NGnOG2xa.js +124 -0
  313. package/dist/src/{server-gyd6d4Hc.js → remoteGeneration--D6WjzUm.js} +15 -108
  314. package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-BpIYlb_O.cjs} +30 -119
  315. package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-CJC3E0aW.js} +15 -147
  316. package/dist/src/remoteGeneration-D6UjE2JT.js +218 -0
  317. package/dist/src/render-BNTrbmBw.cjs +384 -0
  318. package/dist/src/render-CSP99NLm.js +348 -0
  319. package/dist/src/render-DFfDeYUK.js +347 -0
  320. package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
  321. package/dist/src/render-_6ur1fhE.js +347 -0
  322. package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
  323. package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
  324. package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
  325. package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
  326. package/dist/src/{responses-1ztiVYsx.js → responses-5Gf5HNOi.js} +11 -7
  327. package/dist/src/{responses-BiaBguAu.js → responses-BVi7xIdv.js} +13 -9
  328. package/dist/src/{responses-CF-ayauu.cjs → responses-CUARGrhY.cjs} +16 -11
  329. package/dist/src/{responses-B8haB-mD.js → responses-CrmWv6iz.js} +13 -9
  330. package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
  331. package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
  332. package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
  333. package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
  334. package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
  335. package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
  336. package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
  337. package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
  338. package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
  339. package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
  340. package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-BeVTKfrv.cjs} +65 -53
  341. package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-BwbV5U7_.js} +61 -50
  342. package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-ClAZ6Qwc.js} +61 -50
  343. package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-DY2qrOpq.js} +61 -50
  344. package/dist/src/{scanner-nOCWNIXa.js → scanner-BS-iFIp3.js} +6 -6
  345. package/dist/src/server/index.js +4115 -420
  346. package/dist/src/server-BHOEL8p8.cjs +126 -0
  347. package/dist/src/server-CbiJppij.js +107 -0
  348. package/dist/src/server-D18AAlAc.js +3 -0
  349. package/dist/src/server-DLYjSFm2.js +182 -0
  350. package/dist/src/server-JQYD_Nws.js +146 -0
  351. package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
  352. package/dist/src/shared-CRq0gGVf.js +1334 -0
  353. package/dist/src/shared-CSIGeGLl.js +1334 -0
  354. package/dist/src/shared-DNvim54U.js +1335 -0
  355. package/dist/src/shared-aHWko3P1.cjs +1436 -0
  356. package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
  357. package/dist/src/simulatedUser-C7sOFSF6.js +222 -0
  358. package/dist/src/simulatedUser-DH_7RzEQ.js +222 -0
  359. package/dist/src/simulatedUser-lgMMmniD.js +223 -0
  360. package/dist/src/simulatedUser-mnCUS9Bm.cjs +227 -0
  361. package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
  362. package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
  363. package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
  364. package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
  365. package/dist/src/storage-95htjpLs.js +834 -0
  366. package/dist/src/storage-A4WnAeN3.cjs +911 -0
  367. package/dist/src/storage-B2Ql_oq4.js +822 -0
  368. package/dist/src/storage-EKVWZBNY.js +875 -0
  369. package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
  370. package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
  371. package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
  372. package/dist/src/store-IbiRIF3k.js +3 -0
  373. package/dist/src/strategies-CBI1brSy.js +2333 -0
  374. package/dist/src/strategies-CCcnEbFO.cjs +2360 -0
  375. package/dist/src/strategies-CD1gHeeQ.js +2331 -0
  376. package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
  377. package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
  378. package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
  379. package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
  380. package/dist/src/telemetry-BUm_krVX.js +3 -0
  381. package/dist/src/{telemetry--iqaGyaS.js → telemetry-BbpmrenM.js} +4 -4
  382. package/dist/src/{telemetry-CgdVGV8N.js → telemetry-C4bX-6Sr.js} +4 -4
  383. package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-C_ImlCOk.cjs} +15 -9
  384. package/dist/src/{telemetry-DWdGHvEf.js → telemetry-DOE567Wj.js} +4 -4
  385. package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
  386. package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
  387. package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
  388. package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
  389. package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
  390. package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
  391. package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
  392. package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
  393. package/dist/src/toolAttributes-COVgDrBG.js +87 -0
  394. package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
  395. package/dist/src/tracingOptions-BOP1FsRt.js +220 -0
  396. package/dist/src/tracingOptions-BvmDa_jH.js +219 -0
  397. package/dist/src/tracingOptions-DFBoE8O0.cjs +249 -0
  398. package/dist/src/tracingOptions-xhjOg2b1.js +221 -0
  399. package/dist/src/{transcription-D7Q0vJsh.js → transcription-BOXDoehT.js} +4 -4
  400. package/dist/src/{transcription-Bm2emLmJ.js → transcription-CpVdKc4P.js} +5 -5
  401. package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-D8kkPXg_.cjs} +9 -8
  402. package/dist/src/{transcription-84t4ALo2.js → transcription-Z94eV9LR.js} +5 -5
  403. package/dist/src/transform-BnSTnFlp.js +187 -0
  404. package/dist/src/transform-BnSXWmU_2.cjs +221 -0
  405. package/dist/src/{transform-DtooZqYY.js → transform-BufxPIQL.js} +8 -8
  406. package/dist/src/transform-CGt7Kt3y2.js +186 -0
  407. package/dist/src/transform-CrPGTsij.js +186 -0
  408. package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DJkt81VY.cjs} +12 -11
  409. package/dist/src/{transform-B-b6Cq-q.js → transform-DtTfiGoh.js} +8 -8
  410. package/dist/src/{transform-_DpNB4qp.js → transform-Wp6s_5QE.js} +8 -8
  411. package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
  412. package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
  413. package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
  414. package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
  415. package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
  416. package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
  417. package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
  418. package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
  419. package/dist/src/{util-Dpmm_dAI.cjs → util-BHCAeuXx.cjs} +33 -7
  420. package/dist/src/{util-DFPeFkiV.js → util-CMrHV35u.js} +28 -8
  421. package/dist/src/{util-DvpHnLt0.cjs → util-COnzevXE.cjs} +29 -21
  422. package/dist/src/{util-BlFVL0UF.js → util-CYev3d-r.js} +22 -7
  423. package/dist/src/util-DGqkTb2-.js +327 -0
  424. package/dist/src/util-DNtUsus_.cjs +386 -0
  425. package/dist/src/{util-Dub0f_ej.js → util-DRfqa4xz.js} +17 -10
  426. package/dist/src/{util-BVXcTwXu.js → util-DTq3jq2z.js} +28 -8
  427. package/dist/src/{util-C-kmRosx.js → util-DeY58psG.js} +22 -7
  428. package/dist/src/util-DpV6KT5i.js +327 -0
  429. package/dist/src/{util-3pBZZb_H.js → util-DwNJzqOV.js} +45 -10
  430. package/dist/src/util-ETfU_sS9.js +328 -0
  431. package/dist/src/{util-A5_ZsQUn.cjs → util-NYQvo1C7.cjs} +25 -9
  432. package/dist/src/{util-DN0-b81k.js → util-X4KQgyVD.js} +28 -8
  433. package/dist/src/{util-BQOCAHQC.js → util-jZRrXe1P.js} +46 -11
  434. package/dist/src/{util-B9CNhyac.js → util-o2Qg5rZv.js} +22 -7
  435. package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
  436. package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
  437. package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
  438. package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
  439. package/dist/src/{version-CbuBKu2U.js → version-CHR-EFec.js} +2 -2
  440. package/dist/src/{version-CbpiUINz.js → version-F0YDgb7J.js} +2 -2
  441. package/dist/src/{version-D9zu9FWB.cjs → version-VzUqOBZk.cjs} +2 -2
  442. package/dist/src/{version-0frU0UTr.js → version-qVEN5qCm.js} +2 -2
  443. package/dist/tsconfig.tsbuildinfo +1 -1
  444. package/package.json +49 -23
  445. package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
  446. package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
  447. package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
  448. package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
  449. package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
  450. package/dist/src/cache-BRkhlH3k.cjs +0 -3
  451. package/dist/src/cache-BlC6aeJ0.js +0 -3
  452. package/dist/src/cloud-CoD5OacT.js +0 -3
  453. package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
  454. package/dist/src/codex-sdk-Danroptg.cjs +0 -2
  455. package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
  456. package/dist/src/evalResult-BBK58h2B.js +0 -3
  457. package/dist/src/evalResult-spPqh1G_.js +0 -2
  458. package/dist/src/evaluator-DgLKaZk8.js +0 -3
  459. package/dist/src/fetch-8viavNv8.js +0 -3
  460. package/dist/src/graders-C84JI-m5.js +0 -2
  461. package/dist/src/graders-CBbd0K0Q.cjs +0 -2
  462. package/dist/src/graders-CbQqpHSN.js +0 -3
  463. package/dist/src/graders-DS42d3ZG.js +0 -2
  464. package/dist/src/image-BmilRNqO.js +0 -258
  465. package/dist/src/image-CxJoa3aW.cjs +0 -280
  466. package/dist/src/image-DsGRlkh7.js +0 -257
  467. package/dist/src/image-a_SGUobh.js +0 -257
  468. package/dist/src/providers-BuyzKt7C.js +0 -2
  469. package/dist/src/providers-C7lNVBjX.cjs +0 -3
  470. package/dist/src/providers-CCE2COJi2.js +0 -2
  471. package/dist/src/render-7uNJ2V14.js +0 -135
  472. package/dist/src/render-DlscvAUJ.js +0 -135
  473. package/dist/src/render-eui5p5mL.js +0 -136
  474. package/dist/src/render-tG6ir9_g.cjs +0 -165
  475. package/dist/src/rubyUtils-4hjGxvju.js +0 -3
  476. package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
  477. package/dist/src/server-ByiF3qlg.js +0 -386
  478. package/dist/src/server-C0XKRNB_.cjs +0 -2
  479. package/dist/src/server-C_15p79-.js +0 -3
  480. package/dist/src/store-2OXm_eBY.js +0 -240
  481. package/dist/src/store-BELqNwvz.js +0 -3
  482. package/dist/src/store-uQZ4AjPe.cjs +0 -2
  483. package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
  484. package/dist/src/telemetry-ZdPZc0fm.js +0 -3
  485. package/dist/src/transform-BQt0BeAW.js +0 -3
  486. package/dist/src/transform-Bq5oqC0s.cjs +0 -2
  487. package/dist/src/transform-C9izGX54.cjs +0 -228
  488. package/dist/src/transform-CwbAZ84V.js +0 -216
  489. package/dist/src/transform-DzCF-wqV.js +0 -213
  490. package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/index.js CHANGED
@@ -1,34 +1,47 @@
1
1
  import { C as getEnvFloat, D as getMaxEvalTimeMs, E as getEvalTimeoutMs, O as isCI, S as getEnvBool, T as getEnvString, a as logger, b as summarizeEvaluateResultForLogging, g as getAjv, h as extractJsonObjects, k as state, m as extractFirstJsonObject, n as globalLogCallback, o as setLogCallback, r as isDebugEnabled, s as setLogLevel, t as getLogLevel, v as orderKeys, w as getEnvInt, y as safeJsonStringify } from "./logger-Ct2S6Yx-.js";
2
2
  import { t as invariant } from "./invariant-Ddh24eXh.js";
3
- import { A as TERMINAL_MAX_WIDTH, F as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, P as FILE_METADATA_KEY, S as cloudConfig, _ as parseChatPrompt, a as getCurrentTimestamp, j as getDefaultShareViewBaseUrl, m as isPromptfooSampleTarget, n as fetchWithRetries, o as sleep, r as fetchWithTimeout, t as fetchWithProxy, u as REQUEST_TIMEOUT_MS, x as CloudConfig } from "./fetch-D9xxyC1p.js";
4
- import { n as VERSION } from "./version-CbuBKu2U.js";
5
- import { $ as categoryAliases, A as isApiProvider, At as CompletionTokenDetailsSchema, B as StrategyConfigSchema, C as TestGeneratorConfigSchema, Ct as TELECOM_PLUGINS, D as VarsSchema, E as UnifiedConfigSchema, F as ProvidersSchema, G as DEFAULT_STRATEGIES, H as isUuid, I as ConversationMessageSchema, J as STRATEGY_COLLECTION_MAPPINGS, L as PartialGenerationError, M as RedteamConfigSchema, N as PromptSchema, O as isGradingResult, Q as Severity, R as PluginConfigSchema, S as TestCasesWithMetadataSchema, St as TEEN_SAFETY_PLUGINS, T as TestSuiteSchema, Tt as CODING_AGENT_CORE_PLUGINS, Y as getDefaultNFanout, Z as isFanoutStrategy, _ as ScenarioSchema, _t as PII_PLUGINS, a as AtomicTestCaseSchema, at as DATASET_EXEMPT_PLUGINS, b as TestCaseWithVarsFileSchema, bt as REMOTE_ONLY_PLUGIN_IDS, c as CompletedPromptSchema, ct as FOUNDATION_PLUGINS, d as EvaluateOptionsSchema, dt as LLAMA_GUARD_ENABLED_CATEGORIES, f as GradingConfigSchema, ft as LLAMA_GUARD_REPLICATE_PROVIDER, g as ResultFailureReason, gt as PHARMACY_PLUGINS, h as OutputFileExtension, ht as MULTI_INPUT_VAR, i as AssertionTypeSchema, it as CANARY_BREAKING_STRATEGY_IDS, j as isProviderOptions, jt as InputsSchema, k as isResultFailureReason, kt as BaseTokenUsageSchema, l as DerivedMetricSchema, lt as HARM_PLUGINS, m as OutputConfigSchema, mt as MULTI_INPUT_EXCLUDED_PLUGINS, n as AssertionSchema, nt as ALIASED_PLUGIN_MAPPINGS, o as BaseAssertionTypesSchema, ot as DEFAULT_PLUGINS, p as NotPrefixedAssertionTypesSchema, pt as MEDICAL_PLUGINS, q as STRATEGY_COLLECTIONS, r as AssertionSetSchema, rt as BIAS_PLUGINS, s as CommandLineOptionsSchema, st as FINANCIAL_PLUGINS, t as AssertionOrSetSchema, tt as riskCategorySeverityMap, u as EvalResultsFilterMode, ut as INSURANCE_PLUGINS, v as SpecialAssertionTypesSchema, vt as PLUGIN_CATEGORIES, w as TestSuiteConfigSchema, wt as UNALIGNED_PROVIDER_HARM_PLUGINS, x as TestCasesWithMetadataPromptSchema, y as TestCaseSchema, yt as REDTEAM_PROVIDER_HARM_PLUGINS, z as PolicyObjectSchema } from "./types-DNRZVOue.js";
6
- import { c as setUserEmail, i as getUserEmail, o as isLoggedIntoCloud, r as getAuthor, s as promptForEmailUnverified, t as checkEmailStatusAndMaybeExit } from "./accounts-CaLNYnf7.js";
7
- import { r as importModule, t as getDirectory } from "./esm-BRkfNsYs.js";
8
- import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-DlscvAUJ.js";
9
- import { t as providerRegistry } from "./providerRegistry-DoACwqhD.js";
10
- import { i as getRemoteGenerationUrl, l as shouldGenerateRemote, o as getRemoteHealthUrl, r as promptYesNo, s as neverGenerateRemote } from "./server-gyd6d4Hc.js";
11
- import { $ as getMaxCharsPerMessageModifierValue, B as collectFileMetadata, D as validateStrategies, E as loadStrategy, F as getSessionId, G as redteamProviderManager, H as runExtensionHook, I as getShortPluginId, J as createProviderRateLimitOptions, K as TokenUsageTracker, L as isBasicRefusal, N as extractPromptFromTags, O as pluginMatchesStrategyTargets, P as extractVariablesFromJson, Q as getGeneratedPromptOverLimit, T as Strategies, U as isPackagePath, V as renderPrompt, W as loadFromPackage, X as PromptfooHarmfulCompletionProvider, Z as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, a as resolveProviderConfigs, at as getOrgContext, ct as isCloudProvider, d as MCPProvider, et as throwIfTargetPromptExceedsMaxChars, g as AIStudioChatProvider, h as GoogleLiveProvider, i as resolveProvider, it as getEvalConfigFromCloud, j as extractGoalFromPrompt, k as checkExfilTracking, lt as resolveTeamId, m as VertexChatProvider, n as loadApiProvider, nt as checkCloudPermissions, ot as getPluginSeverityOverridesFromCloud, q as createRateLimitRegistry, r as loadApiProviders, rt as getCloudDatabaseId, t as getProviderIds } from "./providers-CJh7iriU.js";
12
- import { r as runPython } from "./pythonUtils-DMO68Jg7.js";
13
- import { i as isJavascriptFile } from "./fileExtensions-8CjoL7vB.js";
14
- import { A as readFilters, M as loadFunction, N as parseFileUrl, O as maybeLoadToolsFromExternalFile, T as maybeLoadFromExternalFile, _ as isProviderAllowed, a as setupEnv, b as normalizeProviderRef, c as filterRuntimeVars, d as checkProviderApiKeys, f as doesProviderRefMatch, g as isOpenAiProvider, h as isGoogleProvider, i as fetchCsvFromGoogleSheet, j as readOutput, l as getTestCaseDeduplicationKey, m as isAnthropicProvider, n as writeMultipleOutputs, o as deduplicateTestCases, p as getProviderDescription, r as writeOutput, s as extractRuntimeVars, t as printBorder, u as resultIsForTestCase, w as maybeLoadConfigFromExternalFile } from "./util-Dub0f_ej.js";
15
- import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-4c780gFd.js";
16
- import { t as getBlobByHash } from "./blobs-BW4U31ue.js";
17
- import { a as evalsTable, c as evalsToTagsTable, d as tagsTable, i as evalResultsTable, l as promptsTable, m as getDbSignalPath, o as evalsToDatasetsTable, p as getDb, r as datasetsTable, s as evalsToPromptsTable } from "./tables-WgdUZ8Ck.js";
18
- import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-BM3jRERL.js";
19
- import { c as isNonTransientHttpStatus, i as getCache, n as disableCache, o as withCacheNamespace, r as fetchWithCache, s as NON_TRANSIENT_HTTP_STATUSES, t as cache_exports } from "./cache-Bzttsk0X.js";
20
- import { t as OpenAiChatCompletionProvider } from "./chat-Cx_LkwvZ.js";
21
- import { h as validateFunctionCall } from "./transform-_DpNB4qp.js";
22
- import { l as validateFunctionCall$1 } from "./util-BlFVL0UF.js";
23
- import { i as getProcessShim, n as transform, t as TransformInputType } from "./transform-eGiUAv86.js";
24
- import { t as telemetry } from "./telemetry-CgdVGV8N.js";
25
- import { t as ellipsize } from "./text-DDQP0tuQ.js";
26
- import { t as getTraceStore } from "./store-BPkzEyFM.js";
27
- import { n as sha256, t as randomSequence } from "./createHash-Un4Q_huE.js";
28
- import { n as runRuby } from "./rubyUtils-BI0p46eZ.js";
29
- import { $ as SELECT_BEST_PROMPT, A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as DEFAULT_ANTHROPIC_MODEL, D as HarmbenchPlugin, Dt as withProviderCallExecutionContext, E as ImitationPlugin, Et as getGradingProvider, F as AegisPlugin, G as matchesLlmRubric, H as matchesClosedQa, I as RedteamGraderBase, J as doRemoteGrading, K as matchesPiScore, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as DEFAULT_WEB_SEARCH_PROMPT, R as getCustomPolicies, S as PlinyPlugin, St as getDefaultProviders, T as IntentPlugin, Tt as getAndCheckProvider, U as matchesFactuality, V as fetchHuggingFaceDataset, W as matchesGEval, X as readPrompts, Y as processPrompts, Z as readProviderPromptMap, _ as PoliticsPlugin, _t as tryParse, a as UnverifiableClaimsPlugin, at as CONTEXT_RECALL_ATTRIBUTED_TOKEN, b as isValidPolicyObject, bt as loadFromJavaScriptFile, c as ToolDiscoveryPlugin, ct as CONTEXT_RELEVANCE_BAD, d as TeenSafetyDangerousContentPlugin, dt as cosineSimilarity, et as SUGGEST_PROMPTS_SYSTEM_MESSAGE, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, ft as dotProduct, g as PromptExtractionPlugin, gt as splitIntoSentences, h as RbacPlugin, ht as normalizeMatcherTokenUsage, i as VLGuardPlugin, it as CONTEXT_RECALL, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, lt as loadRubricPrompt, m as ShellInjectionPlugin, mt as fail, n as getGraderById, nt as CONTEXT_FAITHFULNESS_LONGFORM, o as UnsafeBenchPlugin, ot as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, p as SqlInjectionPlugin, pt as euclideanDistance, q as matchesTrajectoryGoalSuccess, r as VLSUPlugin, rt as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, s as ToxicChatPlugin, st as CONTEXT_RELEVANCE, t as GRADERS, tt as ANSWER_RELEVANCY_GENERATE, u as TeenSafetyDangerousRoleplayPlugin, ut as renderLlmRubricPrompt, v as PolicyPlugin, vt as coerceString, w as OverreliancePlugin, wt as callProviderWithContext, x as makeInlinePolicyIdSync, xt as processFileReference, y as determinePolicyTypeFromId, yt as getFinalTest, z as retryWithDeduplication } from "./graders-BoUqsCEm.js";
30
- import { i as generateIdFromPrompt, t as hashPrompt } from "./utils-o8S5huU2.js";
31
- import { t as EvalResult } from "./evalResult-pSvGWFMo.js";
3
+ import { $ as riskCategorySeverityMap, A as RedteamConfigSchema, At as DocumentMediaInjectionPlacementValues, B as isUuid, Bt as getInputDescription, C as TestGeneratorConfigSchema, Ct as CODING_AGENT_CORE_PLUGINS, D as VarsSchema, Dt as BaseTokenUsageSchema, E as UnifiedConfigSchema, F as PartialGenerationError, Ft as InputDefinitionSchema, G as STRATEGY_COLLECTIONS, Gt as isProviderOptions, Ht as normalizeInputDefinition, I as PluginConfigSchema, It as InputTypeSchema, K as STRATEGY_COLLECTION_MAPPINGS, L as PolicyObjectSchema, Lt as InputTypeValues, Mt as DocxInjectionPlacementValues, N as ProvidersSchema, Nt as InputConfigSchema, O as isGradingResult, Ot as CompletionTokenDetailsSchema, P as ConversationMessageSchema, Pt as InputDefinitionObjectSchema, R as StrategyConfigSchema, Rt as InputsSchema, S as TestCasesWithMetadataSchema, St as UNALIGNED_PROVIDER_HARM_PLUGINS, T as TestSuiteSchema, U as DEFAULT_STRATEGIES, Ut as normalizeInputs, Vt as getInputType, Wt as isApiProvider, X as Severity, Y as isFanoutStrategy, Z as categoryAliases, _ as ScenarioSchema, _t as REDTEAM_PROVIDER_HARM_PLUGINS, a as AtomicTestCaseSchema, at as FINANCIAL_PLUGINS, b as TestCaseWithVarsFileSchema, bt as TEEN_SAFETY_PLUGINS, c as CompletedPromptSchema, ct as INSURANCE_PLUGINS, d as EvaluateOptionsSchema, dt as MEDICAL_PLUGINS, et as ALIASED_PLUGIN_MAPPINGS, f as GradingConfigSchema, ft as MULTI_INPUT_EXCLUDED_PLUGINS, g as ResultFailureReason, gt as PLUGIN_CATEGORIES, h as OutputFileExtension, ht as PII_PLUGINS, i as AssertionTypeSchema, it as DEFAULT_PLUGINS, j as PromptSchema, jt as DocxInjectionPlacementSchema, k as isResultFailureReason, kt as DocumentMediaInjectionPlacementSchema, l as DerivedMetricSchema, lt as LLAMA_GUARD_ENABLED_CATEGORIES, m as OutputConfigSchema, mt as PHARMACY_PLUGINS, n as AssertionSchema, nt as CANARY_BREAKING_STRATEGY_IDS, o as BaseAssertionTypesSchema, ot as FOUNDATION_PLUGINS, p as NotPrefixedAssertionTypesSchema, pt as MULTI_INPUT_VAR, q as getDefaultNFanout, r as AssertionSetSchema, rt as DATASET_EXEMPT_PLUGINS, s as CommandLineOptionsSchema, st as HARM_PLUGINS, t as AssertionOrSetSchema, tt as BIAS_PLUGINS, u as EvalResultsFilterMode, ut as LLAMA_GUARD_REPLICATE_PROVIDER, v as SpecialAssertionTypesSchema, vt as REMOTE_ONLY_PLUGIN_IDS, w as TestSuiteConfigSchema, wt as CODING_AGENT_PLUGINS, x as TestCasesWithMetadataPromptSchema, xt as TELECOM_PLUGINS, y as TestCaseSchema, zt as buildInputPromptDescription } from "./types-D6glLbdF.js";
4
+ import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, L as FILE_METADATA_KEY, N as TERMINAL_MAX_WIDTH, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, T as cloudConfig, _ as isPromptfooSampleTarget, b as parseChatPrompt, c as getCurrentTimestamp, l as sleep, n as fetchWithRetries, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy, w as CloudConfig } from "./fetch-Doks14zQ.js";
5
+ import { n as VERSION } from "./version-qVEN5qCm.js";
6
+ import { i as isJavascriptFile } from "./fileExtensions-CXRfY3Ss.js";
7
+ import { c as setUserEmail, i as getUserEmail, o as isLoggedIntoCloud, r as getAuthor, s as promptForEmailUnverified, t as checkEmailStatusAndMaybeExit } from "./accounts-DAv_0iE7.js";
8
+ import { r as importModule, t as getDirectory } from "./esm-BTK1W7lG.js";
9
+ import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, r as analyzeTemplateReference, t as renderEnvOnlyInObject } from "./render-DFfDeYUK.js";
10
+ import { t as providerRegistry } from "./providerRegistry-DHcFiVWX.js";
11
+ import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-CJC3E0aW.js";
12
+ import { r as promptYesNo } from "./server-CbiJppij.js";
13
+ import { a as getCloudDatabaseId, c as getPluginSeverityOverridesFromCloud, d as isCloudProvider, i as checkCloudPermissions, o as getEvalConfigFromCloud, p as resolveTeamId, s as getOrgContext } from "./storage-B2Ql_oq4.js";
14
+ import { r as runPython } from "./pythonUtils-D0BYebvX.js";
15
+ import { A as readFilters, M as loadFunction, N as parseFileUrl, O as maybeLoadToolsFromExternalFile, T as maybeLoadFromExternalFile, _ as isProviderAllowed, a as setupEnv, b as normalizeProviderRef, c as filterRuntimeVars, d as checkProviderApiKeys, f as doesProviderRefMatch, g as isOpenAiProvider, h as isGoogleProvider, i as fetchCsvFromGoogleSheet, j as readOutput, l as getTestCaseDeduplicationKey, m as isAnthropicProvider, n as writeMultipleOutputs, o as deduplicateTestCases, p as getProviderDescription, r as writeOutput, s as extractRuntimeVars, t as printBorder, u as resultIsForTestCase, w as maybeLoadConfigFromExternalFile } from "./util-DRfqa4xz.js";
16
+ import { n as sha256, t as randomSequence } from "./createHash-BtbSX3mj.js";
17
+ import { c as NON_TRANSIENT_HTTP_STATUSES, i as getCache, l as isNonTransientHttpStatus, n as disableCache, r as fetchWithCache, s as withCacheNamespace, t as cache_exports } from "./cache-DdriHsNX.js";
18
+ import { t as OpenAiChatCompletionProvider } from "./chat-BBEnnpQk.js";
19
+ import { h as validateFunctionCall } from "./transform-Wp6s_5QE.js";
20
+ import { l as validateFunctionCall$1 } from "./util-CYev3d-r.js";
21
+ import { _ as AIStudioChatProvider, a as resolveProvider, f as MCPProvider, g as GoogleLiveProvider, h as VertexChatProvider, n as loadApiProvider, o as resolveProviderConfigs, r as loadApiProviders, t as getProviderIds } from "./providers-BGc7tDtQ.js";
22
+ import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CmnQ0G2m.js";
23
+ import { t as ellipsize } from "./text-DwYK5EBn.js";
24
+ import { t as telemetry } from "./telemetry-C4bX-6Sr.js";
25
+ import { a as evalsTable, c as evalsToTagsTable, d as tagsTable, i as evalResultsTable, l as promptsTable, m as getDbSignalPath, o as evalsToDatasetsTable, p as getDb, r as datasetsTable, s as evalsToPromptsTable } from "./tables-CRSXQ2Ke.js";
26
+ import { t as getBlobByHash } from "./blobs-Dwef1Ao1.js";
27
+ import { t as getProcessShim } from "./processShim-BcGzU8fY.js";
28
+ import { n as loadFromPackage, t as isPackagePath } from "./packageParser-CgE-ziRo.js";
29
+ import { n as runRuby } from "./rubyUtils-CnlW8AYb.js";
30
+ import { t as buildPromptInputDescriptions } from "./inputVariables-DUGMb9Ka.js";
31
+ import { a as extractPromptFromTags, c as isBasicRefusal, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, o as getSessionId, s as getShortPluginId } from "./util-DpV6KT5i.js";
32
+ import { a as assertRemoteMaterializationHandled, l as requiresRemoteMaterialization, n as PromptfooHarmfulCompletionProvider } from "./promptfoo-CqEpj6Sr.js";
33
+ import { $ as readProviderPromptMap, A as ExcessiveAgencyPlugin, At as withProviderCallExecutionContext, B as retryWithDeduplication, C as PlinyPlugin, Ct as processFileReference, D as ImitationPlugin, Dt as getAndCheckProvider, E as IntentPlugin, Et as callProviderWithContext, F as BeavertailsPlugin, G as matchesFactuality, H as fetchHuggingFaceDataset, I as AegisPlugin, J as matchesPiScore, K as matchesGEval, L as RedteamGraderBase, M as DebugAccessPlugin, N as CrossSessionLeakPlugin, O as HarmbenchPlugin, Ot as getGradingProvider, P as ContractPlugin, Q as readPrompts, R as RedteamPluginBase, S as makeInlinePolicyIdSync, St as loadFromJavaScriptFile, T as OverreliancePlugin, Tt as DEFAULT_ANTHROPIC_MODEL, U as isGraderFailure, V as sampleArray, W as matchesClosedQa, X as doRemoteGrading, Y as matchesTrajectoryGoalSuccess, Z as processPrompts, _ as PromptExtractionPlugin, _t as normalizeMatcherTokenUsage, a as VLGuardPlugin, at as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, b as determinePolicyTypeFromId, bt as coerceString, c as ToxicChatPlugin, ct as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, d as TeenSafetyDangerousRoleplayPlugin, dt as loadRubricPrompt, et as DEFAULT_WEB_SEARCH_PROMPT, f as TeenSafetyDangerousContentPlugin, ft as renderLlmRubricPrompt, g as RbacPlugin, gt as fail, h as ShellInjectionPlugin, ht as euclideanDistance, i as VLSUPlugin, it as CONTEXT_FAITHFULNESS_LONGFORM, j as DivergentRepetitionPlugin, k as HallucinationPlugin, kt as getProviderCallExecutionContext, l as ToolDiscoveryPlugin, lt as CONTEXT_RELEVANCE, m as SqlInjectionPlugin, mt as dotProduct, n as getGraderById, nt as SUGGEST_PROMPTS_SYSTEM_MESSAGE, o as UnverifiableClaimsPlugin, ot as CONTEXT_RECALL, p as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, pt as cosineSimilarity, q as matchesLlmRubric, rt as ANSWER_RELEVANCY_GENERATE, s as UnsafeBenchPlugin, st as CONTEXT_RECALL_ATTRIBUTED_TOKEN, t as GRADERS, tt as SELECT_BEST_PROMPT, u as TeenSafetyHarmfulBodyIdealsPlugin, ut as CONTEXT_RELEVANCE_BAD, v as PoliticsPlugin, vt as splitIntoSentences, w as getPiiLeakTestsForCategory, wt as getDefaultProviders, x as isValidPolicyObject, xt as getFinalTest, y as PolicyPlugin, yt as tryParse, z as getCustomPolicies } from "./graders-KXzjnIim.js";
34
+ import { f as redteamProviderManager, g as createProviderRateLimitOptions, h as createRateLimitRegistry, m as TokenUsageTracker } from "./shared-CSIGeGLl.js";
35
+ import { i as generateIdFromPrompt, t as hashPrompt } from "./utils-BGY69tk_.js";
36
+ import { a as getTransformLabel, i as getTransformErrorMessage, n as TRANSFORM_KEYS, o as transform, r as TransformInputType, t as INLINE_FUNCTION_LABEL } from "./transform-CGt7Kt3y2.js";
37
+ import { t as getTraceStore } from "./store-HpopRVzl.js";
38
+ import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-D3Fv_Tdh.js";
39
+ import { i as throwIfTargetPromptExceedsMaxChars, n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-B9nZEfO6.js";
40
+ import { n as checkExfilTracking } from "./indirectWebPwn-CiWB-vVH.js";
41
+ import { n as getFirstStringAttribute, r as getToolNameFromAttributes, t as TOOL_ARGUMENT_ATTRIBUTE_KEYS } from "./toolAttributes-DJ9ZEKXD.js";
42
+ import { i as filterFiniteScores, n as renderPrompt, r as runExtensionHook, t as collectFileMetadata } from "./evaluatorHelpers-CrRObe2z.js";
43
+ import { r as sanitizeProvider, t as EvalResult } from "./evalResult-PGqEbasb.js";
44
+ import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-CD1gHeeQ.js";
32
45
  import * as fs$2 from "fs";
33
46
  import fs, { createWriteStream } from "fs";
34
47
  import * as path$2 from "path";
@@ -51,14 +64,15 @@ import { DiagConsoleLogger, DiagLogLevel, diag, propagation } from "@opentelemet
51
64
  import readline from "readline";
52
65
  import { parse as parse$1 } from "csv-parse/sync";
53
66
  import { globSync } from "glob";
54
- import { and, desc, eq, inArray, sql } from "drizzle-orm";
55
67
  import { XMLParser } from "fast-xml-parser";
56
- import cliProgress from "cli-progress";
68
+ import { and, desc, eq, inArray, sql } from "drizzle-orm";
57
69
  import { URL } from "url";
58
70
  import { parse as parse$2 } from "parse5";
59
71
  import { distance } from "fastest-levenshtein";
72
+ import cliProgress from "cli-progress";
60
73
  import * as rouge from "js-rouge";
61
74
  import { isDeepStrictEqual } from "node:util";
75
+ import { LRUCache } from "lru-cache";
62
76
  import "debounce";
63
77
  import { ExportResultCode, W3CTraceContextPropagator } from "@opentelemetry/core";
64
78
  import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
@@ -429,17 +443,20 @@ async function resolveContext(assertion, test, output, prompt, fallbackContext,
429
443
  contextValue = test.vars.context;
430
444
  }
431
445
  } else if (fallbackContext) contextValue = fallbackContext;
432
- if (assertion.contextTransform) try {
433
- const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
434
- const transformed = await transform(assertion.contextTransform, outputForTransform, {
435
- vars: test.vars,
436
- prompt: { label: prompt },
437
- ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
438
- });
439
- invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${assertion.contextTransform}`);
440
- contextValue = transformed;
441
- } catch (error) {
442
- throw new Error(`Failed to transform context using expression '${assertion.contextTransform}': ${error instanceof Error ? error.message : String(error)}`);
446
+ if (assertion.contextTransform) {
447
+ const getLabel = () => getTransformLabel(assertion.contextTransform);
448
+ try {
449
+ const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
450
+ const transformed = await transform(assertion.contextTransform, outputForTransform, {
451
+ vars: test.vars,
452
+ prompt: { label: prompt },
453
+ ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
454
+ });
455
+ invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
456
+ contextValue = transformed;
457
+ } catch (error) {
458
+ throw new Error(`Failed to transform context using expression '${getLabel()}': ${getTransformErrorMessage(error)}`);
459
+ }
443
460
  }
444
461
  invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
445
462
  return contextValue;
@@ -782,7 +799,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
782
799
  telemetry.record("feature_used", { feature: "tracing" });
783
800
  try {
784
801
  logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
785
- const { startOTLPReceiver } = await import("./otlpReceiver-CcdIikOu.js");
802
+ const { startOTLPReceiver } = await import("./otlpReceiver-BXjcRqAM.js");
786
803
  const port = testSuite.tracing.otlp.http.port || 4318;
787
804
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
788
805
  const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
@@ -806,7 +823,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
806
823
  async function stopOtlpReceiverIfNeeded() {
807
824
  if (otlpReceiverStarted) try {
808
825
  logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
809
- const { stopOTLPReceiver } = await import("./otlpReceiver-CcdIikOu.js");
826
+ const { stopOTLPReceiver } = await import("./otlpReceiver-BXjcRqAM.js");
810
827
  await stopOTLPReceiver();
811
828
  otlpReceiverStarted = false;
812
829
  logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -841,7 +858,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
841
858
  }
842
859
  if (!tracingEnabled) return null;
843
860
  logger.debug("[EvaluatorTracing] Importing trace store");
844
- const { getTraceStore } = await import("./store-BPkzEyFM.js").then((n) => n.n);
861
+ const { getTraceStore } = await import("./store-HpopRVzl.js").then((n) => n.n);
845
862
  const traceStore = getTraceStore();
846
863
  const traceId = generateTraceId();
847
864
  const spanId = generateSpanId();
@@ -1473,27 +1490,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
1473
1490
  };
1474
1491
  //#endregion
1475
1492
  //#region src/assertions/geval.ts
1476
- const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
1477
- invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
1493
+ const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
1494
+ invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
1478
1495
  const threshold = assertion.threshold ?? .7;
1479
1496
  if (Array.isArray(renderedValue)) {
1480
- const scores = [];
1481
- const reasons = [];
1482
- for (const value of renderedValue) {
1497
+ if (renderedValue.length === 0) return {
1498
+ assertion,
1499
+ pass: false,
1500
+ score: 0,
1501
+ reason: "G-Eval assertion requires at least one criterion string in the value array."
1502
+ };
1503
+ const responses = [];
1504
+ let failure;
1505
+ for (const [index, value] of renderedValue.entries()) {
1483
1506
  const resp = await matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
1484
- scores.push(resp.score);
1485
- reasons.push(resp.reason);
1507
+ responses.push(resp);
1508
+ if (isGraderFailure(resp)) {
1509
+ failure = {
1510
+ index,
1511
+ resp
1512
+ };
1513
+ break;
1514
+ }
1515
+ }
1516
+ const tokensUsed = createEmptyTokenUsage();
1517
+ for (const r of responses) accumulateTokenUsage(tokensUsed, r.tokensUsed);
1518
+ if (failure) {
1519
+ const criterion = renderedValue[failure.index];
1520
+ return {
1521
+ assertion,
1522
+ pass: false,
1523
+ score: 0,
1524
+ reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
1525
+ tokensUsed,
1526
+ metadata: failure.resp.metadata
1527
+ };
1486
1528
  }
1487
- const scoresSum = scores.reduce((a, b) => a + b, 0);
1529
+ const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
1530
+ const combinedReason = responses.map((r) => r.reason).join("\n\n");
1488
1531
  return {
1489
1532
  assertion,
1490
- pass: scoresSum / scores.length >= threshold,
1491
- score: scoresSum / scores.length,
1492
- reason: reasons.join("\n\n")
1533
+ pass: averageScore >= threshold !== inverse,
1534
+ score: inverse ? 1 - averageScore : averageScore,
1535
+ reason: combinedReason,
1536
+ tokensUsed
1493
1537
  };
1494
- } else return {
1538
+ }
1539
+ const resp = await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
1540
+ if (isGraderFailure(resp)) return {
1541
+ assertion,
1542
+ pass: false,
1543
+ score: 0,
1544
+ reason: resp.reason,
1545
+ tokensUsed: resp.tokensUsed,
1546
+ metadata: resp.metadata
1547
+ };
1548
+ const passed = resp.score >= threshold !== inverse;
1549
+ return {
1495
1550
  assertion,
1496
- ...await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext)
1551
+ ...resp,
1552
+ pass: passed,
1553
+ score: inverse ? 1 - resp.score : resp.score
1497
1554
  };
1498
1555
  };
1499
1556
  //#endregion
@@ -2496,45 +2553,6 @@ function matchesPattern(spanName, pattern) {
2496
2553
  }
2497
2554
  //#endregion
2498
2555
  //#region src/assertions/trajectoryUtils.ts
2499
- const TOOL_ATTRIBUTE_KEYS = [
2500
- "tool.name",
2501
- "tool_name",
2502
- "tool",
2503
- "function.name",
2504
- "function_name",
2505
- "gen_ai.tool.name",
2506
- "codex.mcp.tool",
2507
- "agent.tool",
2508
- "agent.tool_name",
2509
- "agent.toolName"
2510
- ];
2511
- const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
2512
- "tool.arguments",
2513
- "tool.args",
2514
- "tool.input",
2515
- "tool_arguments",
2516
- "tool_args",
2517
- "tool_input",
2518
- "function.arguments",
2519
- "function.args",
2520
- "function.input",
2521
- "function_arguments",
2522
- "function_args",
2523
- "gen_ai.tool.arguments",
2524
- "gen_ai.tool.args",
2525
- "gen_ai.tool.input",
2526
- "gen_ai.tool.call.arguments",
2527
- "gen_ai.tool.call.args",
2528
- "agent.tool.arguments",
2529
- "agent.tool.args",
2530
- "agent.tool.input",
2531
- "codex.mcp.arguments",
2532
- "codex.mcp.args",
2533
- "codex.mcp.input",
2534
- "arguments",
2535
- "args",
2536
- "input"
2537
- ];
2538
2556
  const COMMAND_ATTRIBUTE_KEYS = [
2539
2557
  "codex.command",
2540
2558
  "command",
@@ -2547,16 +2565,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
2547
2565
  "search_query"
2548
2566
  ];
2549
2567
  const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
2568
+ const COMMAND_TOOL_NAMES = new Set([
2569
+ "exec_command",
2570
+ "local_shell",
2571
+ "shell"
2572
+ ]);
2550
2573
  const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
2551
2574
  const MAX_JUDGE_SUMMARY_STEPS = 24;
2552
2575
  const JUDGE_SUMMARY_HEAD_STEPS = 12;
2553
2576
  const JUDGE_SUMMARY_TAIL_STEPS = 12;
2554
- function getStringAttribute(attributes, keys) {
2555
- for (const key of keys) {
2556
- const value = attributes[key];
2557
- if (typeof value === "string" && value.trim()) return value.trim();
2558
- }
2559
- }
2560
2577
  function normalizeStructuredAttribute(value) {
2561
2578
  if (value === void 0 || value === null) return;
2562
2579
  if (typeof value === "string") {
@@ -2588,9 +2605,12 @@ function getTrajectoryStepStatus(step) {
2588
2605
  function getCommandExecutable(command) {
2589
2606
  return command.trim().split(/\s+/)[0] || void 0;
2590
2607
  }
2608
+ function isCommandToolName(toolName) {
2609
+ return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
2610
+ }
2591
2611
  function extractToolName(span) {
2592
2612
  const attributes = span.attributes || {};
2593
- const directMatch = getStringAttribute(attributes, TOOL_ATTRIBUTE_KEYS);
2613
+ const directMatch = getToolNameFromAttributes(attributes);
2594
2614
  if (directMatch) return directMatch;
2595
2615
  for (const [key, value] of Object.entries(attributes)) {
2596
2616
  if (typeof value !== "string" || !value.trim()) continue;
@@ -2615,21 +2635,31 @@ function extractToolArgs(span) {
2615
2635
  if (value !== void 0) return value;
2616
2636
  }
2617
2637
  }
2618
- function extractCommand(span) {
2638
+ function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
2619
2639
  const attributes = span.attributes || {};
2620
- const directMatch = getStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2640
+ const directMatch = getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2621
2641
  if (directMatch) return directMatch;
2622
2642
  for (const [key, value] of Object.entries(attributes)) {
2623
2643
  if (typeof value !== "string" || !value.trim()) continue;
2624
2644
  if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
2625
2645
  }
2646
+ const toolArgs = getToolArgs();
2647
+ if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
2648
+ const args = toolArgs;
2649
+ const command = args.cmd ?? args.command;
2650
+ if (typeof command === "string" && command.trim()) return command.trim();
2651
+ if (Array.isArray(command)) {
2652
+ const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
2653
+ if (joined) return joined;
2654
+ }
2655
+ }
2626
2656
  if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
2627
2657
  }
2628
2658
  function extractSearchQuery(span) {
2629
2659
  const attributes = span.attributes || {};
2630
- const directMatch = getStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2660
+ const directMatch = getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2631
2661
  if (directMatch) return directMatch;
2632
- const genericQuery = getStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2662
+ const genericQuery = getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2633
2663
  if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
2634
2664
  if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
2635
2665
  }
@@ -2653,17 +2683,34 @@ function extractTrajectorySteps(trace) {
2653
2683
  return left.index - right.index;
2654
2684
  }).map(({ span }) => {
2655
2685
  const toolName = extractToolName(span);
2656
- const command = extractCommand(span);
2686
+ let toolArgs;
2687
+ let hasExtractedToolArgs = false;
2688
+ const getToolArgs = () => {
2689
+ if (!hasExtractedToolArgs) {
2690
+ toolArgs = extractToolArgs(span);
2691
+ hasExtractedToolArgs = true;
2692
+ }
2693
+ return toolArgs;
2694
+ };
2695
+ const command = extractCommand(span, toolName, getToolArgs);
2657
2696
  const searchQuery = extractSearchQuery(span);
2658
2697
  let type = "span";
2659
2698
  let name = span.name;
2660
2699
  const aliases = new Set([span.name]);
2661
2700
  let args;
2662
- if (toolName) {
2701
+ if (command && isCommandToolName(toolName)) {
2702
+ type = "command";
2703
+ name = command;
2704
+ aliases.add(command);
2705
+ args = getToolArgs();
2706
+ if (toolName) aliases.add(toolName);
2707
+ const executable = getCommandExecutable(command);
2708
+ if (executable) aliases.add(executable);
2709
+ } else if (toolName) {
2663
2710
  type = "tool";
2664
2711
  name = toolName;
2665
2712
  aliases.add(toolName);
2666
- args = extractToolArgs(span);
2713
+ args = getToolArgs();
2667
2714
  } else if (command) {
2668
2715
  type = "command";
2669
2716
  name = command;
@@ -3850,13 +3897,13 @@ function resolveSequenceValue(value) {
3850
3897
  }
3851
3898
  throw new Error("trajectory:tool-sequence assertion must have an array or object value");
3852
3899
  }
3853
- function isRecord(value) {
3900
+ function isRecord$1(value) {
3854
3901
  return typeof value === "object" && value !== null && !Array.isArray(value);
3855
3902
  }
3856
3903
  function matchesExpectedArgsPartial(actual, expected) {
3857
3904
  if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
3858
- if (isRecord(expected)) {
3859
- if (!isRecord(actual)) return false;
3905
+ if (isRecord$1(expected)) {
3906
+ if (!isRecord$1(actual)) return false;
3860
3907
  return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
3861
3908
  }
3862
3909
  return isDeepStrictEqual(actual, expected);
@@ -4227,7 +4274,7 @@ async function loadTraceData(traceId) {
4227
4274
  let stableObservations = 0;
4228
4275
  let latestTrace = null;
4229
4276
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
4230
- latestTrace = await traceStore.getTrace(traceId);
4277
+ latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
4231
4278
  const spanCount = latestTrace?.spans?.length ?? 0;
4232
4279
  if (spanCount > 0) {
4233
4280
  stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
@@ -4280,7 +4327,7 @@ const ASSERTION_HANDLERS = {
4280
4327
  "llm-rubric": handleLlmRubric,
4281
4328
  meteor: async (params) => {
4282
4329
  try {
4283
- const { handleMeteorAssertion } = await import("./meteor-BKTM-7KS.js");
4330
+ const { handleMeteorAssertion } = await import("./meteor-Dce-_zGQ.js");
4284
4331
  return handleMeteorAssertion(params);
4285
4332
  } catch (error) {
4286
4333
  if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
@@ -4416,7 +4463,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
4416
4463
  };
4417
4464
  }
4418
4465
  else if (filePath.endsWith(".rb")) try {
4419
- const { runRuby } = await import("./rubyUtils-BI0p46eZ.js").then((n) => n.t);
4466
+ const { runRuby } = await import("./rubyUtils-CnlW8AYb.js").then((n) => n.t);
4420
4467
  valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
4421
4468
  logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
4422
4469
  } catch (error) {
@@ -4533,7 +4580,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
4533
4580
  logger.debug(`Failed to preload trace data for assertions: ${error}`);
4534
4581
  preloadedTraceData = null;
4535
4582
  }
4536
- await async.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
4583
+ const concurrency = getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
4584
+ await async.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
4537
4585
  if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
4538
4586
  const result = await runAssertion({
4539
4587
  prompt,
@@ -4660,7 +4708,8 @@ var CIProgressReporter = class {
4660
4708
  }
4661
4709
  updateTotalTests(newTotal) {
4662
4710
  this.totalTests = Math.max(newTotal, 1);
4663
- this.highestPercentageSeen = Math.floor(this.completedTests / this.totalTests * 100);
4711
+ const percentage = Math.floor(this.completedTests / this.totalTests * 100);
4712
+ this.highestPercentageSeen = percentage;
4664
4713
  }
4665
4714
  finish() {
4666
4715
  if (this.intervalId) {
@@ -4833,6 +4882,10 @@ function getDefaultOtelConfig() {
4833
4882
  }
4834
4883
  //#endregion
4835
4884
  //#region src/tracing/localSpanExporter.ts
4885
+ const MISSING_TRACE_RETRY_DELAY_MS = 50;
4886
+ function delay(ms) {
4887
+ return new Promise((resolve) => setTimeout(resolve, ms));
4888
+ }
4836
4889
  /**
4837
4890
  * A span exporter that writes spans to the local TraceStore (SQLite).
4838
4891
  * This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
@@ -4874,7 +4927,7 @@ var LocalSpanExporter = class {
4874
4927
  }
4875
4928
  let firstError;
4876
4929
  for (const [traceId, spanDataList] of spansByTrace) try {
4877
- const result = await traceStore.addSpans(traceId, spanDataList, { skipTraceCheck: false });
4930
+ const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
4878
4931
  if (result.stored) logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
4879
4932
  else logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
4880
4933
  } catch (error) {
@@ -4886,6 +4939,16 @@ var LocalSpanExporter = class {
4886
4939
  }
4887
4940
  return firstError;
4888
4941
  }
4942
+ async addSpansWithTraceRetry(traceStore, traceId, spans) {
4943
+ const options = {
4944
+ skipTraceCheck: false,
4945
+ warnIfMissingTrace: false
4946
+ };
4947
+ const result = await traceStore.addSpans(traceId, spans, options);
4948
+ if (result.stored) return result;
4949
+ await delay(MISSING_TRACE_RETRY_DELAY_MS);
4950
+ return traceStore.addSpans(traceId, spans, options);
4951
+ }
4889
4952
  /**
4890
4953
  * Convert an OTEL ReadableSpan to our SpanData format.
4891
4954
  */
@@ -5168,6 +5231,15 @@ function isPromptAllowed(prompt, allowedPrompts) {
5168
5231
  }
5169
5232
  //#endregion
5170
5233
  //#region src/evaluator.ts
5234
+ const CONVERSATION_VAR_NAME = "_conversation";
5235
+ const promptUsesConversationVariableCache = new LRUCache({ max: 1024 });
5236
+ function promptUsesConversationVariable(prompt) {
5237
+ const cached = promptUsesConversationVariableCache.get(prompt.raw);
5238
+ if (cached !== void 0) return cached;
5239
+ const { referenced, parsed } = analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
5240
+ if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
5241
+ return referenced;
5242
+ }
5171
5243
  /**
5172
5244
  * Manages a single progress bar for the evaluation
5173
5245
  */
@@ -5367,6 +5439,18 @@ function hasProviderGroupedAssertion(assertion) {
5367
5439
  function shouldDeferGradingForTest(test) {
5368
5440
  return Boolean(test.assert?.some(hasProviderGroupedAssertion));
5369
5441
  }
5442
+ function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
5443
+ if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
5444
+ if (shouldGroupGradingByProvider) {
5445
+ logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
5446
+ return;
5447
+ }
5448
+ if (concurrency !== 1) return;
5449
+ const reasons = [];
5450
+ if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
5451
+ if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
5452
+ if (reasons.length > 0) logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
5453
+ }
5370
5454
  function applyGradingResult(row, checkResult) {
5371
5455
  if (!checkResult.pass) {
5372
5456
  row.error = checkResult.reason;
@@ -5381,14 +5465,29 @@ function applyGradingResult(row, checkResult) {
5381
5465
  if (checkResult.tokensUsed) accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
5382
5466
  row.gradingResult = checkResult;
5383
5467
  }
5384
- function applyGradingError(row, error) {
5385
- const errorMessage = error instanceof Error ? error.stack ?? error.message : String(error);
5386
- logger.error("Assertion grading failed during eval", {
5387
- error: errorMessage,
5388
- promptIdx: row.promptIdx,
5389
- testIdx: row.testIdx
5390
- });
5391
- row.error = errorMessage;
5468
+ const ABORTED_GRADING_PREFIX = "Aborted: ";
5469
+ function isAbortShapedError(error) {
5470
+ return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
5471
+ }
5472
+ function applyGradingError(row, error, abortSignal) {
5473
+ const errorAsError = error instanceof Error ? error : void 0;
5474
+ if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
5475
+ const shortMessage = errorAsError?.message ?? String(error);
5476
+ logger.debug("Assertion grading aborted", {
5477
+ error: shortMessage,
5478
+ promptIdx: row.promptIdx,
5479
+ testIdx: row.testIdx
5480
+ });
5481
+ row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
5482
+ } else {
5483
+ const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
5484
+ logger.error("Assertion grading failed during eval", {
5485
+ error: fullMessage,
5486
+ promptIdx: row.promptIdx,
5487
+ testIdx: row.testIdx
5488
+ });
5489
+ row.error = fullMessage;
5490
+ }
5392
5491
  row.failureReason = ResultFailureReason.ERROR;
5393
5492
  row.success = false;
5394
5493
  row.score = 0;
@@ -5420,7 +5519,7 @@ function createRunEvalState({ provider, prompt, test }) {
5420
5519
  };
5421
5520
  }
5422
5521
  function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
5423
- const usesConversation = prompt.raw.includes("_conversation");
5522
+ const usesConversation = promptUsesConversationVariable(prompt);
5424
5523
  if (!getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
5425
5524
  }
5426
5525
  function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
@@ -5667,7 +5766,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
5667
5766
  assertScoringFunction: test.assertScoringFunction,
5668
5767
  traceId
5669
5768
  }).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
5670
- applyGradingError(ret, error);
5769
+ applyGradingError(ret, error, abortSignal);
5671
5770
  });
5672
5771
  deferredGradingPromises.set(ret, gradingPromise);
5673
5772
  return;
@@ -6214,7 +6313,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
6214
6313
  const defaultProvider = defaultTest.provider;
6215
6314
  if (isApiProvider(defaultProvider)) return defaultProvider;
6216
6315
  if (typeof defaultProvider === "object" && defaultProvider.id) {
6217
- const { loadApiProvider } = await import("./providers-CCE2COJi2.js");
6316
+ const { loadApiProvider } = await import("./providers-BGc7tDtQ.js").then((n) => n.i);
6218
6317
  return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
6219
6318
  }
6220
6319
  return defaultProvider;
@@ -6374,7 +6473,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
6374
6473
  async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
6375
6474
  if (!state.resume || !evalRecord.persisted) return;
6376
6475
  try {
6377
- const { default: EvalResult } = await import("./evalResult-pSvGWFMo.js").then((n) => n.n);
6476
+ const { default: EvalResult } = await import("./evalResult-PGqEbasb.js").then((n) => n.n);
6378
6477
  const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: state.retryMode });
6379
6478
  const originalCount = runEvalOptions.length;
6380
6479
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -6388,14 +6487,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
6388
6487
  }
6389
6488
  }
6390
6489
  function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
6391
- const usesConversationVar = prompts.some((p) => p.raw.includes("_conversation"));
6490
+ const usesConversationVar = prompts.some(promptUsesConversationVariable);
6392
6491
  if (concurrency <= 1) return {
6393
6492
  concurrency,
6394
6493
  usesConversationVar
6395
6494
  };
6396
6495
  const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
6397
6496
  if (usesConversationVar) {
6398
- logger.info(`Setting concurrency to 1 because the ${chalk.cyan("_conversation")} variable is used.`);
6497
+ logger.info(`Setting concurrency to 1 because the ${chalk.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
6399
6498
  return {
6400
6499
  concurrency: 1,
6401
6500
  usesConversationVar
@@ -6625,7 +6724,8 @@ var Evaluator = class {
6625
6724
  };
6626
6725
  this.conversations = {};
6627
6726
  this.registers = {};
6628
- this.fileWriters = (Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : []).map((p) => new JsonlFileWriter(p));
6727
+ const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
6728
+ this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
6629
6729
  this.rateLimitRegistry = createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
6630
6730
  this.rateLimitRegistry.on("ratelimit:hit", (data) => {
6631
6731
  logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
@@ -6745,6 +6845,25 @@ var Evaluator = class {
6745
6845
  this.trackCompletedRow(evalStep, row, context);
6746
6846
  context.numComplete++;
6747
6847
  const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
6848
+ if (context.testSuite.extensions?.length) try {
6849
+ const afterEachOut = await runExtensionHook(context.testSuite.extensions, "afterEach", {
6850
+ test: evalStep.test,
6851
+ result: {
6852
+ ...row,
6853
+ namedScores: { ...row.namedScores },
6854
+ metadata: { ...row.metadata },
6855
+ response: row.response ? {
6856
+ ...row.response,
6857
+ metadata: { ...row.response.metadata }
6858
+ } : row.response
6859
+ }
6860
+ });
6861
+ row.namedScores = filterFiniteScores(afterEachOut.result.namedScores);
6862
+ row.metadata = afterEachOut.result.metadata;
6863
+ if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
6864
+ } catch (error) {
6865
+ logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
6866
+ }
6748
6867
  await this.persistEvalRow(row);
6749
6868
  if (this.abortIfTargetUnavailable(row, context)) break;
6750
6869
  const metrics = context.prompts[row.promptIdx].metrics;
@@ -6756,10 +6875,6 @@ var Evaluator = class {
6756
6875
  promptEvalCount,
6757
6876
  row
6758
6877
  });
6759
- await runExtensionHook(context.testSuite.extensions, "afterEach", {
6760
- test: evalStep.test,
6761
- result: row
6762
- });
6763
6878
  context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
6764
6879
  }
6765
6880
  }
@@ -6914,7 +7029,15 @@ var Evaluator = class {
6914
7029
  })) break;
6915
7030
  }
6916
7031
  } catch (error) {
6917
- await flushGroupedRows();
7032
+ const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
7033
+ try {
7034
+ await flushGroupedRows();
7035
+ } catch (flushError) {
7036
+ logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
7037
+ error: flushError instanceof Error ? flushError.message : String(flushError),
7038
+ pendingRowCount
7039
+ });
7040
+ }
6918
7041
  throw error;
6919
7042
  }
6920
7043
  await flushGroupedRows();
@@ -7350,6 +7473,13 @@ var Evaluator = class {
7350
7473
  if (!this.options.silent) {
7351
7474
  if (serialRunEvalOptions.length > 0) logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
7352
7475
  if (concurrentRunEvalOptions.length > 0) logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
7476
+ logGroupedGradingStatus({
7477
+ concurrency,
7478
+ hasEvalStepTimeout,
7479
+ runEvalOptions,
7480
+ shouldGroupGradingByProvider,
7481
+ usesConversationVar
7482
+ });
7353
7483
  }
7354
7484
  if (this.options.showProgressBar && progressBarManager) {
7355
7485
  await progressBarManager.initialize(runEvalOptions, concurrency, 0);
@@ -8353,7 +8483,7 @@ var Eval = class Eval {
8353
8483
  const evalInstance = new Eval(eval_.config, {
8354
8484
  id: eval_.id,
8355
8485
  createdAt: new Date(eval_.createdAt),
8356
- author: eval_.author || void 0,
8486
+ author: eval_.author,
8357
8487
  description: eval_.description || void 0,
8358
8488
  prompts: eval_.prompts || [],
8359
8489
  datasetId,
@@ -8376,7 +8506,7 @@ var Eval = class Eval {
8376
8506
  return (await getDb().select().from(evalsTable).limit(limit).orderBy(desc(evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
8377
8507
  id: e.id,
8378
8508
  createdAt: new Date(e.createdAt),
8379
- author: e.author || void 0,
8509
+ author: e.author,
8380
8510
  description: e.description || void 0,
8381
8511
  prompts: e.prompts || [],
8382
8512
  persisted: true
@@ -8391,7 +8521,7 @@ var Eval = class Eval {
8391
8521
  return (await getDb().select().from(evalsTable).orderBy(desc(evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
8392
8522
  id: e.id,
8393
8523
  createdAt: new Date(e.createdAt),
8394
- author: e.author || void 0,
8524
+ author: e.author,
8395
8525
  description: e.description || void 0,
8396
8526
  prompts: e.prompts || [],
8397
8527
  persisted: true
@@ -8406,7 +8536,7 @@ var Eval = class Eval {
8406
8536
  static async create(config, renderedPrompts, opts) {
8407
8537
  const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
8408
8538
  const evalId = opts?.id || createEvalId(createdAt);
8409
- const author = opts?.author || getUserEmail();
8539
+ const author = opts && "author" in opts ? opts.author ?? null : getAuthor();
8410
8540
  const db = getDb();
8411
8541
  const datasetId = sha256(JSON.stringify(config.tests || []));
8412
8542
  db.transaction(() => {
@@ -8468,7 +8598,7 @@ var Eval = class Eval {
8468
8598
  });
8469
8599
  return new Eval(config, {
8470
8600
  id: evalId,
8471
- author: opts?.author,
8601
+ author,
8472
8602
  createdAt,
8473
8603
  persisted: true,
8474
8604
  runtimeOptions: sanitizeRuntimeOptions(opts?.runtimeOptions)
@@ -8478,7 +8608,7 @@ var Eval = class Eval {
8478
8608
  const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
8479
8609
  this.createdAt = createdAt.getTime();
8480
8610
  this.id = opts?.id || createEvalId(createdAt);
8481
- this.author = opts?.author;
8611
+ this.author = opts?.author ?? null;
8482
8612
  this.config = config;
8483
8613
  this.results = [];
8484
8614
  this.prompts = opts?.prompts || [];
@@ -9008,7 +9138,7 @@ var Eval = class Eval {
9008
9138
  newConfig.description = copyDescription;
9009
9139
  const newPrompts = structuredClone(this.prompts);
9010
9140
  const newVars = this.vars ? structuredClone(this.vars) : [];
9011
- const author = getUserEmail();
9141
+ const author = getAuthor();
9012
9142
  const db = getDb();
9013
9143
  let copiedCount = 0;
9014
9144
  db.transaction(() => {
@@ -11402,25 +11532,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
11402
11532
  getAssertions(_prompt) {
11403
11533
  return getHarmfulAssertions(this.harmCategory);
11404
11534
  }
11405
- promptsToTestCases(prompts) {
11535
+ async promptsToTestCases(prompts) {
11406
11536
  const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
11407
11537
  const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
11408
- return prompts.map(({ __prompt }) => {
11538
+ const pluginId = getShortPluginId(this.harmCategory);
11539
+ return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
11409
11540
  const vars = { [this.injectVar]: __prompt };
11410
- if (hasMultipleInputs) try {
11411
- const parsed = JSON.parse(__prompt);
11412
- Object.assign(vars, extractVariablesFromJson(parsed, this.config.inputs));
11413
- } catch {}
11541
+ let inputMaterialization;
11542
+ if (hasMultipleInputs) {
11543
+ let parsed;
11544
+ try {
11545
+ parsed = JSON.parse(__prompt);
11546
+ } catch (error) {
11547
+ logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
11548
+ }
11549
+ if (parsed) try {
11550
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
11551
+ materializationIndex,
11552
+ pluginId,
11553
+ provider: this.provider,
11554
+ purpose: this.purpose
11555
+ });
11556
+ Object.assign(vars, materializedVars.vars);
11557
+ inputMaterialization = materializedVars.metadata;
11558
+ } catch (error) {
11559
+ logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
11560
+ throw error;
11561
+ }
11562
+ }
11414
11563
  return {
11415
11564
  vars,
11416
11565
  metadata: {
11417
11566
  harmCategory: harmCategoryLabel,
11418
- pluginId: getShortPluginId(this.harmCategory),
11419
- pluginConfig: this.config
11567
+ pluginId,
11568
+ pluginConfig: this.config,
11569
+ ...inputMaterialization ? { inputMaterialization } : {}
11420
11570
  },
11421
11571
  assert: getHarmfulAssertions(this.harmCategory)
11422
11572
  };
11423
- });
11573
+ }));
11424
11574
  }
11425
11575
  };
11426
11576
  //#endregion
@@ -11429,20 +11579,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
11429
11579
  * Extract content from <Prompt> tags and parse JSON if inputs are defined.
11430
11580
  * Returns the processed prompt and any additional vars extracted from JSON.
11431
11581
  */
11432
- function processPromptForInputs(prompt, _injectVar, inputs) {
11582
+ async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
11433
11583
  let processedPrompt = prompt.trim();
11434
11584
  const additionalVars = {};
11585
+ let additionalMetadata;
11435
11586
  const extractedPrompt = extractPromptFromTags(processedPrompt);
11436
11587
  if (extractedPrompt) processedPrompt = extractedPrompt;
11437
- if (inputs && Object.keys(inputs).length > 0) try {
11438
- const parsed = JSON.parse(processedPrompt);
11439
- Object.assign(additionalVars, extractVariablesFromJson(parsed, inputs));
11440
- } catch {
11441
- logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode");
11588
+ if (inputs && Object.keys(inputs).length > 0) {
11589
+ let parsed;
11590
+ try {
11591
+ parsed = JSON.parse(processedPrompt);
11592
+ } catch (error) {
11593
+ logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
11594
+ }
11595
+ if (parsed) try {
11596
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
11597
+ materializationIndex,
11598
+ pluginId: plugin,
11599
+ provider,
11600
+ purpose
11601
+ });
11602
+ Object.assign(additionalVars, materializedVars.vars);
11603
+ additionalMetadata = materializedVars.metadata;
11604
+ } catch (error) {
11605
+ logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
11606
+ throw error;
11607
+ }
11442
11608
  }
11443
11609
  return {
11444
11610
  processedPrompt,
11445
- additionalVars
11611
+ additionalVars,
11612
+ additionalMetadata
11446
11613
  };
11447
11614
  }
11448
11615
  async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
@@ -11463,15 +11630,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
11463
11630
  };
11464
11631
  const allPrompts = await retryWithDeduplication(generatePrompts, n);
11465
11632
  const inputs = config?.inputs;
11466
- return sampleArray(allPrompts, n).map((prompt) => {
11467
- const { processedPrompt, additionalVars } = processPromptForInputs(prompt, injectVar, inputs);
11633
+ return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
11634
+ const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
11468
11635
  const testCase = createTestCase(injectVar, processedPrompt, plugin);
11469
11636
  if (Object.keys(additionalVars).length > 0) testCase.vars = {
11470
11637
  ...testCase.vars,
11471
11638
  ...additionalVars
11472
11639
  };
11640
+ if (additionalMetadata) testCase.metadata = {
11641
+ ...testCase.metadata,
11642
+ inputMaterialization: additionalMetadata
11643
+ };
11473
11644
  return testCase;
11474
- });
11645
+ }));
11475
11646
  }
11476
11647
  //#endregion
11477
11648
  //#region src/redteam/plugins/teenSafety/graderExamples.ts
@@ -11789,7 +11960,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
11789
11960
  function computeModifiersFromConfig(config) {
11790
11961
  const modifiers = { ...config?.modifiers };
11791
11962
  if (config?.language && typeof config.language === "string") modifiers.language = config.language;
11792
- if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
11963
+ if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
11793
11964
  const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
11794
11965
  if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
11795
11966
  return modifiers;
@@ -11947,6 +12118,7 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
11947
12118
  logger.error(`Error generating test cases for ${key}: ${statusText} ${JSON.stringify(data)}`);
11948
12119
  return [];
11949
12120
  }
12121
+ if (requiresRemoteMaterialization(config?.inputs)) assertRemoteMaterializationHandled(data, `Remote plugin generation for ${key}`);
11950
12122
  const ret = data.result;
11951
12123
  logger.debug(`Received remote generation for ${key}:\n${JSON.stringify(ret)}`);
11952
12124
  return ret;
@@ -11965,13 +12137,14 @@ function createPluginFactory(PluginClass, key, validate) {
11965
12137
  logger.debug(`Using local redteam generation for ${key}`);
11966
12138
  return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
11967
12139
  }
12140
+ const pluginId = getShortPluginId(key);
11968
12141
  const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
11969
12142
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
11970
12143
  return testCases.map((testCase) => ({
11971
12144
  ...testCase,
11972
12145
  metadata: {
11973
12146
  ...testCase.metadata,
11974
- pluginId: getShortPluginId(key),
12147
+ pluginId,
11975
12148
  pluginConfig: {
11976
12149
  ...configWithDefaults,
11977
12150
  modifiers: computedModifiers
@@ -12028,7 +12201,7 @@ const pluginFactories = [
12028
12201
  key: category,
12029
12202
  action: async (params) => {
12030
12203
  if (neverGenerateRemote()) {
12031
- logger.error(`${category} plugin requires remote generation to be enabled`);
12204
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
12032
12205
  return [];
12033
12206
  }
12034
12207
  const testCases = await getHarmfulTests(params, category);
@@ -12051,13 +12224,14 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
12051
12224
  key: category,
12052
12225
  action: async (params) => {
12053
12226
  if (shouldGenerateRemote()) {
12227
+ const pluginId = getShortPluginId(category);
12054
12228
  const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
12055
12229
  const computedModifiers = computeModifiersFromConfig(params.config);
12056
12230
  return testCases.map((testCase) => ({
12057
12231
  ...testCase,
12058
12232
  metadata: {
12059
12233
  ...testCase.metadata,
12060
- pluginId: getShortPluginId(category),
12234
+ pluginId,
12061
12235
  pluginConfig: {
12062
12236
  ...params.config,
12063
12237
  modifiers: computedModifiers
@@ -12079,16 +12253,17 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
12079
12253
  key: category,
12080
12254
  action: async (params) => {
12081
12255
  if (neverGenerateRemote()) {
12082
- logger.error(`${category} plugin requires remote generation to be enabled`);
12256
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
12083
12257
  return [];
12084
12258
  }
12259
+ const pluginId = getShortPluginId(category);
12085
12260
  const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
12086
12261
  const computedModifiers = computeModifiersFromConfig(params.config);
12087
12262
  return testCases.map((testCase) => ({
12088
12263
  ...testCase,
12089
12264
  metadata: {
12090
12265
  ...testCase.metadata,
12091
- pluginId: getShortPluginId(category),
12266
+ pluginId,
12092
12267
  pluginConfig: {
12093
12268
  ...params.config,
12094
12269
  modifiers: computedModifiers
@@ -12104,16 +12279,17 @@ function createRemotePlugin(key, validate) {
12104
12279
  action: async ({ purpose, injectVar, n, config }) => {
12105
12280
  const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
12106
12281
  if (neverGenerateRemote()) {
12107
- logger.error(`${key} plugin requires remote generation to be enabled`);
12282
+ logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
12108
12283
  return [];
12109
12284
  }
12285
+ const pluginId = getShortPluginId(key);
12110
12286
  const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
12111
12287
  const computedModifiers = computeModifiersFromConfig(configWithDefaults);
12112
12288
  const testsWithMetadata = testCases.map((testCase) => ({
12113
12289
  ...testCase,
12114
12290
  metadata: {
12115
12291
  ...testCase.metadata,
12116
- pluginId: getShortPluginId(key),
12292
+ pluginId,
12117
12293
  pluginConfig: {
12118
12294
  ...configWithDefaults,
12119
12295
  modifiers: computedModifiers
@@ -12174,6 +12350,15 @@ async function validateSharpDependency(strategies, plugins, checkSharp = isSharp
12174
12350
  }
12175
12351
  //#endregion
12176
12352
  //#region src/redteam/index.ts
12353
+ const MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY = "__promptfooMaterializedMultiInputPrompt";
12354
+ function getMaterializedMultiInputPromptSnapshot(metadata) {
12355
+ const snapshot = metadata?.[MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY];
12356
+ return typeof snapshot === "string" ? snapshot : void 0;
12357
+ }
12358
+ function getMaterializedMultiInputPromptMetadata(vars) {
12359
+ const prompt = vars?.[MULTI_INPUT_VAR];
12360
+ return typeof prompt === "string" ? { [MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY]: prompt } : void 0;
12361
+ }
12177
12362
  function getPolicyText(metadata) {
12178
12363
  if (!metadata || metadata.policy === void 0 || metadata.policy === null) return;
12179
12364
  const policyValue = metadata.policy;
@@ -12183,6 +12368,44 @@ function getPolicyText(metadata) {
12183
12368
  return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
12184
12369
  }
12185
12370
  }
12371
+ async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
12372
+ const inputs = testCase.metadata?.pluginConfig?.inputs;
12373
+ const inputMaterialization = testCase.metadata?.inputMaterialization;
12374
+ const materializedPromptSnapshot = getMaterializedMultiInputPromptSnapshot(testCase.metadata);
12375
+ const currentInjectVar = testCase.vars?.[injectVar];
12376
+ if (!inputs || Object.keys(inputs).length === 0 || !currentInjectVar) return {
12377
+ inputMaterialization,
12378
+ vars: testCase.vars
12379
+ };
12380
+ const promptChangedSinceMaterialization = typeof currentInjectVar === "string" && typeof materializedPromptSnapshot === "string" && currentInjectVar !== materializedPromptSnapshot;
12381
+ if (Boolean(inputMaterialization) && Object.keys(inputs).every((key) => Object.prototype.hasOwnProperty.call(testCase.vars ?? {}, key)) && !promptChangedSinceMaterialization) return {
12382
+ inputMaterialization,
12383
+ vars: testCase.vars
12384
+ };
12385
+ try {
12386
+ const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(currentInjectVar)), inputs, {
12387
+ materializationIndex,
12388
+ pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
12389
+ provider,
12390
+ purpose
12391
+ });
12392
+ return {
12393
+ inputMaterialization: materializedVars.metadata ? {
12394
+ ...inputMaterialization,
12395
+ ...materializedVars.metadata
12396
+ } : inputMaterialization,
12397
+ vars: {
12398
+ ...testCase.vars,
12399
+ ...materializedVars.vars
12400
+ }
12401
+ };
12402
+ } catch {
12403
+ return {
12404
+ inputMaterialization,
12405
+ vars: testCase.vars
12406
+ };
12407
+ }
12408
+ }
12186
12409
  /**
12187
12410
  * Gets the severity level for a plugin based on its ID and configuration.
12188
12411
  * @param pluginId - The ID of the plugin.
@@ -12324,6 +12547,7 @@ const categories = {
12324
12547
  foundation: FOUNDATION_PLUGINS,
12325
12548
  harmful: Object.keys(HARM_PLUGINS),
12326
12549
  "coding-agent:core": CODING_AGENT_CORE_PLUGINS,
12550
+ "coding-agent:all": CODING_AGENT_PLUGINS,
12327
12551
  bias: BIAS_PLUGINS,
12328
12552
  pii: PII_PLUGINS,
12329
12553
  medical: MEDICAL_PLUGINS,
@@ -12391,7 +12615,8 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
12391
12615
  ...test.metadata?.modifiers,
12392
12616
  ...languageToAdd
12393
12617
  },
12394
- ...languageToAdd
12618
+ ...languageToAdd,
12619
+ ...getMaterializedMultiInputPromptMetadata(test.vars)
12395
12620
  }
12396
12621
  };
12397
12622
  }
@@ -12418,7 +12643,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
12418
12643
  * @param injectVar - The variable to inject.
12419
12644
  * @returns An array of new test cases generated by strategies.
12420
12645
  */
12421
- async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
12646
+ async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
12422
12647
  const newTestCases = [];
12423
12648
  const strategyResults = {};
12424
12649
  for (const strategy of strategies) {
@@ -12474,14 +12699,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
12474
12699
  }
12475
12700
  }
12476
12701
  resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
12477
- newTestCases.push(...resultTestCases.map((t) => {
12478
- const inputs = t?.metadata?.pluginConfig?.inputs;
12479
- let updatedVars = t.vars;
12480
- if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
12481
- const parsed = JSON.parse(String(t.vars[injectVar]));
12482
- updatedVars = { ...t.vars };
12483
- Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
12484
- } catch {}
12702
+ newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
12703
+ const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
12485
12704
  const strategyConfig = {
12486
12705
  ...strategy.config || {},
12487
12706
  ...maxCharsPerMessage ? { maxCharsPerMessage } : {},
@@ -12489,16 +12708,18 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
12489
12708
  };
12490
12709
  return {
12491
12710
  ...t,
12492
- vars: updatedVars,
12711
+ vars,
12493
12712
  metadata: {
12494
12713
  ...t?.metadata || {},
12495
12714
  ...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
12496
12715
  ...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
12497
12716
  ...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
12498
- ...Object.keys(strategyConfig).length > 0 && { strategyConfig }
12717
+ ...inputMaterialization && { inputMaterialization },
12718
+ ...Object.keys(strategyConfig).length > 0 && { strategyConfig },
12719
+ ...getMaterializedMultiInputPromptMetadata(vars)
12499
12720
  }
12500
12721
  };
12501
- }));
12722
+ })));
12502
12723
  const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
12503
12724
  const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
12504
12725
  const applyNumTestsCap = (calculatedRequested) => {
@@ -12841,14 +13062,21 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12841
13062
  }
12842
13063
  });
12843
13064
  const languageResults = await Promise.allSettled(languagePromises);
12844
- for (const result of languageResults) if (result.status === "fulfilled") {
13065
+ for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
12845
13066
  const { lang, tests, requested, generated } = result.value;
12846
13067
  allPluginTests.push(...tests);
12847
13068
  resultsPerLanguage[lang || "default"] = {
12848
13069
  requested,
12849
13070
  generated
12850
13071
  };
12851
- } else logger.warn(`[Language Processing] Error generating tests for ${plugin.id}: ${result.reason}`);
13072
+ } else {
13073
+ const lang = languages[index];
13074
+ logger.warn(`[Language Processing] Error generating tests for ${plugin.id}: ${result.reason}`);
13075
+ resultsPerLanguage[lang || "default"] = {
13076
+ requested: plugin.numTests,
13077
+ generated: 0
13078
+ };
13079
+ }
12852
13080
  logger.debug(`[Language Processing] Total tests generated for ${plugin.id}: ${allPluginTests.length} (across ${languages.length} language(s))`);
12853
13081
  if (!Array.isArray(allPluginTests) || allPluginTests.length === 0) logger.warn(`Failed to generate tests for ${plugin.id}`);
12854
13082
  else {
@@ -12882,35 +13110,51 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12882
13110
  generated: allPluginTests.length
12883
13111
  };
12884
13112
  } else if (plugin.id.startsWith("file://")) try {
12885
- const customTests = await new CustomPlugin(redteamProvider, purpose, injectVar, plugin.id, resolvePluginConfigWithMaxChars(plugin.config, maxCharsPerMessage)).generateTests(plugin.numTests, delay);
12886
- const testCasesWithMetadata = filterOversizedTestCases(customTests.map((t) => {
12887
- const includePluginConfig = !(t.metadata && Object.hasOwn(t.metadata, "pluginConfig") && t.metadata.pluginConfig === void 0);
12888
- const pluginConfigWithMaxChars = {
13113
+ const languageConfig = plugin.config?.language ?? language;
13114
+ const languages = Array.isArray(languageConfig) ? languageConfig : languageConfig ? [languageConfig] : [void 0];
13115
+ const allCustomTests = [];
13116
+ const resultsPerLanguage = {};
13117
+ const languagePromises = languages.map(async (lang) => {
13118
+ const resolvedConfig = {
12889
13119
  ...resolvePluginConfigWithMaxChars(plugin.config, maxCharsPerMessage),
12890
- ...t.metadata?.pluginConfig ?? {}
13120
+ ...lang ? { language: lang } : {},
13121
+ ...hasMultipleInputs ? { inputs } : {}
12891
13122
  };
12892
- const modifiers = {
12893
- ...buildRedteamModifiers({
13123
+ const customPluginConfig = {
13124
+ ...resolvedConfig,
13125
+ modifiers: buildRedteamModifiers({
12894
13126
  maxCharsPerMessage,
12895
- pluginConfig: pluginConfigWithMaxChars,
13127
+ pluginConfig: resolvedConfig,
12896
13128
  testGenerationInstructions
12897
- }),
12898
- ...t.metadata?.modifiers
13129
+ })
12899
13130
  };
13131
+ const testCasesWithMetadata = filterOversizedTestCases((await new CustomPlugin(redteamProvider, purpose, injectVar, plugin.id, customPluginConfig).generateTests(plugin.numTests, delay)).map((t) => addLanguageToPluginMetadata(t, lang, plugin, maxCharsPerMessage, testGenerationInstructions)), injectVar, `Custom plugin ${plugin.id}`, maxCharsPerMessage);
12900
13132
  return {
12901
- ...t,
12902
- metadata: {
12903
- ...t.metadata || {},
12904
- pluginId: plugin.id,
12905
- ...includePluginConfig && { pluginConfig: pluginConfigWithMaxChars },
12906
- severity: plugin.severity || getPluginSeverity(plugin.id, resolvePluginConfig(plugin.config)),
12907
- modifiers
12908
- }
13133
+ lang,
13134
+ tests: testCasesWithMetadata,
13135
+ requested: plugin.numTests,
13136
+ generated: testCasesWithMetadata.length
12909
13137
  };
12910
- }), injectVar, `Custom plugin ${plugin.id}`, maxCharsPerMessage);
13138
+ });
13139
+ const languageResults = await Promise.allSettled(languagePromises);
13140
+ for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
13141
+ const { lang, tests, requested, generated } = result.value;
13142
+ allCustomTests.push(...tests);
13143
+ resultsPerLanguage[lang || "default"] = {
13144
+ requested,
13145
+ generated
13146
+ };
13147
+ } else {
13148
+ const lang = languages[index];
13149
+ logger.warn(`[Language Processing] Error generating tests for custom plugin ${plugin.id}: ${result.reason}`);
13150
+ resultsPerLanguage[lang || "default"] = {
13151
+ requested: plugin.numTests,
13152
+ generated: 0
13153
+ };
13154
+ }
12911
13155
  if (needsGoalExtraction) {
12912
- logger.debug(`Extracting goal for ${testCasesWithMetadata.length} custom tests from ${plugin.id}...`);
12913
- for (const testCase of testCasesWithMetadata) {
13156
+ logger.debug(`Extracting goal for ${allCustomTests.length} custom tests from ${plugin.id}...`);
13157
+ for (const testCase of allCustomTests) {
12914
13158
  const promptVar = testCase.vars?.[injectVar];
12915
13159
  const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
12916
13160
  const policy = getPolicyText(testCase.metadata);
@@ -12918,13 +13162,21 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12918
13162
  testCase.metadata.goal = extractedGoal;
12919
13163
  }
12920
13164
  }
12921
- testCases.push(...testCasesWithMetadata);
12922
- logger.debug(`Added ${customTests.length} custom test cases from ${plugin.id}`);
12923
- const displayId = getPluginDisplayId(plugin);
12924
- pluginResults[displayId] = {
12925
- requested: plugin.numTests,
12926
- generated: testCasesWithMetadata.length
13165
+ testCases.push(...allCustomTests);
13166
+ logger.debug(`Added ${allCustomTests.length} custom test cases from ${plugin.id}`);
13167
+ const baseDisplayId = getPluginDisplayId(plugin);
13168
+ if (languages.filter((lang) => lang !== void 0).length > 1) for (const [langKey, result] of Object.entries(resultsPerLanguage)) {
13169
+ const displayId = langKey === "en" ? baseDisplayId : `(${langKey}) ${baseDisplayId}`;
13170
+ pluginResults[displayId] = {
13171
+ requested: result.requested,
13172
+ generated: result.generated
13173
+ };
13174
+ }
13175
+ else pluginResults[baseDisplayId] = {
13176
+ requested: plugin.numTests * languages.length,
13177
+ generated: allCustomTests.length
12927
13178
  };
13179
+ progressBar?.increment(plugin.numTests * languages.length);
12928
13180
  } catch (e) {
12929
13181
  logger.error(`Error generating tests for custom plugin ${plugin.id}: ${e}`);
12930
13182
  const displayId = getPluginDisplayId(plugin);
@@ -12953,7 +13205,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12953
13205
  targetIds,
12954
13206
  ...retryStrategy.config
12955
13207
  };
12956
- const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
13208
+ const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
12957
13209
  pluginTestCases.push(...retryTestCases);
12958
13210
  Object.assign(strategyResults, retryResults);
12959
13211
  if (showProgressBar) progressBar?.increment(retryTestCases.length);
@@ -12961,7 +13213,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
12961
13213
  checkAbort();
12962
13214
  const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
12963
13215
  if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
12964
- const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
13216
+ const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
12965
13217
  Object.assign(strategyResults, otherStrategyResults);
12966
13218
  if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
12967
13219
  const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
@@ -13854,6 +14106,10 @@ function stripAuthFromUrl(urlString) {
13854
14106
  }
13855
14107
  }
13856
14108
  async function handleEmailCollection(evalRecord) {
14109
+ if (evalRecord.author) {
14110
+ logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
14111
+ return;
14112
+ }
13857
14113
  if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
13858
14114
  let email = getUserEmail();
13859
14115
  if (!email) {
@@ -13997,6 +14253,14 @@ function shouldShareResults(opts) {
13997
14253
  return cloudConfig.isEnabled() && sharing !== false;
13998
14254
  }
13999
14255
  //#endregion
14256
+ //#region src/commands/eval/redteamWarning.ts
14257
+ function warnIfRedteamConfigHasNoTests(config, testSuite) {
14258
+ if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
14259
+ Warning: Config file has a redteam section but no test cases.
14260
+ Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
14261
+ `));
14262
+ }
14263
+ //#endregion
14000
14264
  //#region src/util/formatDuration.ts
14001
14265
  /**
14002
14266
  * Formats a duration in seconds into a human-readable string
@@ -14437,14 +14701,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
14437
14701
  state.resume = true;
14438
14702
  state.retryMode = true;
14439
14703
  } else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
14440
- if (!cmdObj.envPath && commandLineOptions?.envPath) {
14704
+ if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
14441
14705
  logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
14442
14706
  setupEnv(commandLineOptions.envPath);
14443
14707
  }
14444
- if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
14445
- Warning: Config file has a redteam section but no test cases.
14446
- Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
14447
- `));
14708
+ warnIfRedteamConfigHasNoTests(config, testSuite);
14448
14709
  if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
14449
14710
  const maybeUrl = config.providers[0]?.config?.url;
14450
14711
  if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
@@ -14552,7 +14813,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
14552
14813
  ${z.prettifyError(testSuiteSchema.error)}
14553
14814
 
14554
14815
  Please review your promptfooconfig.yaml configuration.`));
14555
- const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, { runtimeOptions: options }) : new Eval(config, { runtimeOptions: options });
14816
+ const author = getAuthor();
14817
+ const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
14818
+ author,
14819
+ runtimeOptions: options
14820
+ }) : new Eval(config, {
14821
+ author,
14822
+ runtimeOptions: options
14823
+ });
14556
14824
  const abortController = new AbortController();
14557
14825
  const previousAbortSignal = evaluateOptions.abortSignal;
14558
14826
  evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
@@ -14954,6 +15222,12 @@ async function doRedteamRun(options) {
14954
15222
  return evalResult;
14955
15223
  }
14956
15224
  //#endregion
15225
+ //#region src/types/transform.ts
15226
+ /** Runtime type guard for `TransformFunction` values. */
15227
+ function isTransformFunction(value) {
15228
+ return typeof value === "function";
15229
+ }
15230
+ //#endregion
14957
15231
  //#region src/index.ts
14958
15232
  /**
14959
15233
  * Shallow-clone a test case so the caller can swap in resolved ApiProvider
@@ -14975,66 +15249,148 @@ function cloneTestForResolve(test) {
14975
15249
  if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
14976
15250
  return cloned;
14977
15251
  }
15252
+ function toSerializableProviderRef(provider) {
15253
+ if (isApiProvider(provider)) return sanitizeProvider(provider);
15254
+ if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
15255
+ return provider;
15256
+ }
15257
+ function isRecord(value) {
15258
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
15259
+ }
15260
+ function withSerializableProvider(record) {
15261
+ if (!isApiProvider(record.provider)) return record;
15262
+ return {
15263
+ ...record,
15264
+ provider: sanitizeProvider(record.provider)
15265
+ };
15266
+ }
15267
+ /**
15268
+ * Function-valued transforms are first-class at runtime but are silently dropped
15269
+ * by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
15270
+ * a function reference, so replace every `transform`-like field with a
15271
+ * `[inline function]: name` marker. Non-function values pass through unchanged.
15272
+ *
15273
+ * `droppedRef.value` is flipped to `true` the first time a function is replaced
15274
+ * so the caller can emit a single warning instead of logging per field.
15275
+ */
15276
+ function replaceFunctionTransforms(record, droppedRef) {
15277
+ let result;
15278
+ for (const key of TRANSFORM_KEYS) {
15279
+ const value = record[key];
15280
+ if (!isTransformFunction(value)) continue;
15281
+ if (!result) result = { ...record };
15282
+ result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
15283
+ droppedRef.value = true;
15284
+ }
15285
+ return result ?? record;
15286
+ }
15287
+ function toSerializableAssertion(assertion, droppedRef) {
15288
+ if (!isRecord(assertion)) return assertion;
15289
+ let sanitizedAssertion = withSerializableProvider(assertion);
15290
+ sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
15291
+ if (Array.isArray(assertion.assert)) sanitizedAssertion = {
15292
+ ...sanitizedAssertion,
15293
+ assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
15294
+ };
15295
+ return sanitizedAssertion;
15296
+ }
15297
+ function toSerializableTestCase(test, droppedRef) {
15298
+ if (!isRecord(test)) return test;
15299
+ let sanitizedTest = withSerializableProvider(test);
15300
+ if (isRecord(test.options)) {
15301
+ let options = withSerializableProvider(test.options);
15302
+ options = replaceFunctionTransforms(options, droppedRef);
15303
+ if (options !== test.options) sanitizedTest = {
15304
+ ...sanitizedTest,
15305
+ options
15306
+ };
15307
+ }
15308
+ if (Array.isArray(test.assert)) sanitizedTest = {
15309
+ ...sanitizedTest,
15310
+ assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
15311
+ };
15312
+ return sanitizedTest;
15313
+ }
15314
+ function toSerializableScenario(scenario, droppedRef) {
15315
+ if (!isRecord(scenario)) return scenario;
15316
+ if (!Array.isArray(scenario.tests)) return scenario;
15317
+ return {
15318
+ ...scenario,
15319
+ tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
15320
+ };
15321
+ }
15322
+ function createSerializableUnifiedConfig(testSuite, prompts) {
15323
+ const droppedRef = { value: false };
15324
+ const config = {
15325
+ ...testSuite,
15326
+ providers: toSerializableProviderRef(testSuite.providers),
15327
+ defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
15328
+ tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
15329
+ scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
15330
+ prompts
15331
+ };
15332
+ if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
15333
+ return config;
15334
+ }
14978
15335
  async function evaluate(testSuite, options = {}) {
14979
- if (testSuite.writeLatestResults) await runDbMigrations();
14980
- const loadedProviders = await loadApiProviders(testSuite.providers, { env: testSuite.env });
15336
+ const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
15337
+ if (testSuiteConfig.writeLatestResults) await runDbMigrations();
15338
+ const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
14981
15339
  const providerMap = {};
14982
15340
  for (const p of loadedProviders) {
14983
15341
  providerMap[p.id()] = p;
14984
15342
  if (p.label) providerMap[p.label] = p;
14985
15343
  }
14986
- let resolvedDefaultTest = testSuite.defaultTest;
14987
- if (typeof testSuite.defaultTest === "string" && testSuite.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuite.defaultTest);
15344
+ let resolvedDefaultTest = testSuiteConfig.defaultTest;
15345
+ if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
14988
15346
  const constructedTestSuite = {
14989
- ...testSuite,
15347
+ ...testSuiteConfig,
14990
15348
  defaultTest: resolvedDefaultTest,
14991
- scenarios: testSuite.scenarios,
15349
+ scenarios: testSuiteConfig.scenarios,
14992
15350
  providers: loadedProviders,
14993
- tests: await readTests(testSuite.tests),
14994
- nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
14995
- prompts: await processPrompts(testSuite.prompts)
15351
+ tests: await readTests(testSuiteConfig.tests),
15352
+ nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
15353
+ prompts: await processPrompts(testSuiteConfig.prompts)
14996
15354
  };
14997
15355
  if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
14998
15356
  constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
14999
15357
  if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
15000
- env: testSuite.env,
15358
+ env: testSuiteConfig.env,
15001
15359
  basePath: state.basePath
15002
15360
  });
15003
15361
  if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
15004
- env: testSuite.env,
15362
+ env: testSuiteConfig.env,
15005
15363
  basePath: state.basePath
15006
15364
  });
15007
15365
  }
15008
15366
  constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
15009
15367
  for (const test of constructedTestSuite.tests) {
15010
15368
  if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
15011
- env: testSuite.env,
15369
+ env: testSuiteConfig.env,
15012
15370
  basePath: state.basePath
15013
15371
  });
15014
15372
  for (const assertion of test.assert || []) {
15015
15373
  if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
15016
15374
  if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
15017
- env: testSuite.env,
15375
+ env: testSuiteConfig.env,
15018
15376
  basePath: state.basePath
15019
15377
  });
15020
15378
  }
15021
15379
  }
15022
15380
  if (options.cache === false) disableCache();
15023
- const parsedProviderPromptMap = readProviderPromptMap(testSuite, constructedTestSuite.prompts);
15024
- const unifiedConfig = {
15025
- ...testSuite,
15026
- prompts: constructedTestSuite.prompts
15027
- };
15028
- const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
15381
+ const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
15382
+ const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
15383
+ const author = getAuthor(suiteAuthor);
15384
+ const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
15029
15385
  const ret = await evaluate$1({
15030
15386
  ...constructedTestSuite,
15031
15387
  providerPromptMap: parsedProviderPromptMap
15032
15388
  }, evalRecord, {
15033
15389
  eventSource: "library",
15034
- isRedteam: Boolean(testSuite.redteam),
15390
+ isRedteam: Boolean(testSuiteConfig.redteam),
15035
15391
  ...options
15036
15392
  });
15037
- if (testSuite.writeLatestResults && testSuite.sharing) if (isSharingEnabled(ret)) try {
15393
+ if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
15038
15394
  const shareableUrl = await createShareableUrl(ret, { silent: true });
15039
15395
  if (shareableUrl) {
15040
15396
  ret.shareableUrl = shareableUrl;
@@ -15045,9 +15401,9 @@ async function evaluate(testSuite, options = {}) {
15045
15401
  logger.warn(`Failed to create shareable URL: ${error}`);
15046
15402
  }
15047
15403
  else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
15048
- if (testSuite.outputPath) {
15049
- if (typeof testSuite.outputPath === "string") await writeOutput(testSuite.outputPath, evalRecord, null);
15050
- else if (Array.isArray(testSuite.outputPath)) await writeMultipleOutputs(testSuite.outputPath, evalRecord, null);
15404
+ if (testSuiteConfig.outputPath) {
15405
+ if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
15406
+ else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
15051
15407
  }
15052
15408
  return ret;
15053
15409
  }
@@ -15076,6 +15432,6 @@ var src_default = {
15076
15432
  redteam
15077
15433
  };
15078
15434
  //#endregion
15079
- export { AssertionOrSetSchema, AssertionSchema, AssertionSetSchema, AssertionTypeSchema, AtomicTestCaseSchema, BaseAssertionTypesSchema, BaseTokenUsageSchema, CommandLineOptionsSchema, CompletedPromptSchema, CompletionTokenDetailsSchema, ConversationMessageSchema, DerivedMetricSchema, EvalResultsFilterMode, EvaluateOptionsSchema, GradingConfigSchema, InputsSchema, NotPrefixedAssertionTypesSchema, OutputConfigSchema, OutputFileExtension, PartialGenerationError, PluginConfigSchema, PolicyObjectSchema, ProvidersSchema, ResultFailureReason, ScenarioSchema, SpecialAssertionTypesSchema, StrategyConfigSchema, TestCaseSchema, TestCaseWithVarsFileSchema, TestCasesWithMetadataPromptSchema, TestCasesWithMetadataSchema, TestGeneratorConfigSchema, TestSuiteConfigSchema, TestSuiteSchema, UnifiedConfigSchema, VarsSchema, assertions_default as assertions, cache_exports as cache, src_default as default, evaluate, generateTable, guardrails, isApiProvider, isGradingResult, isProviderOptions, isResultFailureReason, loadApiProvider, redteam };
15435
+ export { AssertionOrSetSchema, AssertionSchema, AssertionSetSchema, AssertionTypeSchema, AtomicTestCaseSchema, BaseAssertionTypesSchema, BaseTokenUsageSchema, CommandLineOptionsSchema, CompletedPromptSchema, CompletionTokenDetailsSchema, ConversationMessageSchema, DerivedMetricSchema, DocumentMediaInjectionPlacementSchema, DocumentMediaInjectionPlacementValues, DocxInjectionPlacementSchema, DocxInjectionPlacementValues, EvalResultsFilterMode, EvaluateOptionsSchema, GradingConfigSchema, InputConfigSchema, InputDefinitionObjectSchema, InputDefinitionSchema, InputTypeSchema, InputTypeValues, InputsSchema, NotPrefixedAssertionTypesSchema, OutputConfigSchema, OutputFileExtension, PartialGenerationError, PluginConfigSchema, PolicyObjectSchema, ProvidersSchema, ResultFailureReason, ScenarioSchema, SpecialAssertionTypesSchema, StrategyConfigSchema, TestCaseSchema, TestCaseWithVarsFileSchema, TestCasesWithMetadataPromptSchema, TestCasesWithMetadataSchema, TestGeneratorConfigSchema, TestSuiteConfigSchema, TestSuiteSchema, UnifiedConfigSchema, VarsSchema, assertions_default as assertions, buildInputPromptDescription, cache_exports as cache, src_default as default, evaluate, generateTable, getInputDescription, getInputType, guardrails, isApiProvider, isGradingResult, isProviderOptions, isResultFailureReason, isTransformFunction, loadApiProvider, normalizeInputDefinition, normalizeInputs, redteam };
15080
15436
 
15081
15437
  //# sourceMappingURL=index.js.map