promptfoo 0.121.5 → 0.121.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (490) hide show
  1. package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
  2. package/dist/src/{accounts-BIFntVWB.cjs → accounts-BVz5gHLK.cjs} +16 -10
  3. package/dist/src/{accounts-bnyHT7Ju.js → accounts-BWjqwsrf.js} +12 -7
  4. package/dist/src/{accounts-CLJHCDDb.js → accounts-D6IBfEE0.js} +13 -8
  5. package/dist/src/{accounts-CaLNYnf7.js → accounts-DAv_0iE7.js} +12 -7
  6. package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-BJKAkz2e.js} +2 -2
  7. package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-C-A92xhn.cjs} +8 -7
  8. package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-DmoS_S4B.js} +2 -2
  9. package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-GdToujHu.js} +2 -2
  10. package/dist/src/{agents-BGqaTDnr.js → agents-2C8NN6I1.js} +20 -9
  11. package/dist/src/{agents-n6vPqV3i.js → agents-BMAiSR2o.js} +4 -4
  12. package/dist/src/{agents-BYdMl1UE.js → agents-C-PGaxwj.js} +20 -9
  13. package/dist/src/{agents-BV9yFpXX.js → agents-C98cz5pl.js} +20 -9
  14. package/dist/src/{agents-WULPVjbH.cjs → agents-DB8Ub2Ld.cjs} +7 -6
  15. package/dist/src/{agents-DhxWMCtH.js → agents-Dwshy2H8.js} +4 -4
  16. package/dist/src/{agents-emVcx3yh.js → agents-mlKjx-cK.js} +2 -2
  17. package/dist/src/{agents-DiWmQYH9.cjs → agents-n2ej-c4H.cjs} +21 -10
  18. package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-Bi-laUlp.js} +6 -6
  19. package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-D0OFV4Vj.cjs} +7 -7
  20. package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-WyUK0wYy.js} +6 -6
  21. package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-v-63ZjEI.js} +6 -6
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-sCXUfaWo.js +1 -0
  24. package/dist/src/app/assets/index-BopgkZEh.js +388 -0
  25. package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
  26. package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
  27. package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
  28. package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
  29. package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
  30. package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
  31. package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
  32. package/dist/src/app/index.html +6 -6
  33. package/dist/src/{audio-CScmnmEB.js → audio-DnEIHWZR.js} +3 -3
  34. package/dist/src/{audio-Da8U9IS5.js → audio-heR0mu0n.js} +3 -3
  35. package/dist/src/{audio-BvpTOArF.js → audio-wafFO1wn.js} +3 -3
  36. package/dist/src/{audio-C0vDeS0j.cjs → audio-x44tsxIo.cjs} +4 -4
  37. package/dist/src/authoritativeMarkupInjection-2G3Be6oL.cjs +74 -0
  38. package/dist/src/authoritativeMarkupInjection-81PPynHR.js +75 -0
  39. package/dist/src/authoritativeMarkupInjection-D9O70HPi.js +74 -0
  40. package/dist/src/authoritativeMarkupInjection-PxSf3Rh8.js +74 -0
  41. package/dist/src/{base-dYsl2hmL.js → base-0Gzzue9Z.js} +4 -3
  42. package/dist/src/{base-BOMaNEes.js → base-Bp4c52YZ.js} +4 -3
  43. package/dist/src/{base-BTux96b1.js → base-BuNn-YBX.js} +4 -3
  44. package/dist/src/{base-Tw6uhH8K.cjs → base-BzdS8tod.cjs} +5 -4
  45. package/dist/src/bestOfN-BHiOjeaq.js +136 -0
  46. package/dist/src/bestOfN-CdkNPPbX.js +136 -0
  47. package/dist/src/bestOfN-ClUSOhN0.js +137 -0
  48. package/dist/src/bestOfN-DWwXEg1h.cjs +140 -0
  49. package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
  50. package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
  51. package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
  52. package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
  53. package/dist/src/cache-BKYi3kAA.js +3 -0
  54. package/dist/src/{cache-BI5BY7ey.js → cache-BR77mdIR.js} +127 -11
  55. package/dist/src/{cache-Cr-qWIbP.js → cache-CrioYnaa.js} +125 -10
  56. package/dist/src/{cache-Bzttsk0X.js → cache-DdriHsNX.js} +125 -10
  57. package/dist/src/{cache-DGg-yTZG.cjs → cache-h5MWOBZI.cjs} +135 -25
  58. package/dist/src/{chat-Cx_LkwvZ.js → chat-BBEnnpQk.js} +11 -11
  59. package/dist/src/{chat-DChSH_Es.js → chat-BSos6PvZ.js} +9 -9
  60. package/dist/src/{chat-aMQZw6R7.js → chat-Bnt7ieO0.js} +4 -4
  61. package/dist/src/{chat-vYqqv1gP.cjs → chat-DSyYuTYT.cjs} +14 -13
  62. package/dist/src/{chat-BLOdH60v.js → chat-DTdf-J5Q.js} +11 -11
  63. package/dist/src/{chat-DH97tVV9.cjs → chat-Dq3DomYU.cjs} +6 -6
  64. package/dist/src/{chat-DG2LkwLq.js → chat-g5QLeLOo.js} +4 -4
  65. package/dist/src/{chat-D9nudO9b.js → chat-mTTuUAYb.js} +4 -4
  66. package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
  67. package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
  68. package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
  69. package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
  70. package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-BGUac_kS.js} +213 -66
  71. package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-BP__YGfK.js} +213 -66
  72. package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-Ddgx5BIQ.js} +212 -65
  73. package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-n2XcEclh.cjs} +219 -71
  74. package/dist/src/cloud-DiWbUiVP.js +3 -0
  75. package/dist/src/{cloud-Da0bofJd.js → cloud-DqF5N1aJ.js} +2 -2
  76. package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-BGamMotN.js} +4 -4
  77. package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-BwW8W-w7.js} +4 -4
  78. package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-DCRGnsyL.cjs} +6 -6
  79. package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-sD26nP6V.js} +4 -4
  80. package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-2lnOT4qM.js} +3 -3
  81. package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-B1-8KNCt.js} +3 -3
  82. package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-CCC1DFrC.cjs} +5 -5
  83. package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-DOghiU6r.js} +3 -3
  84. package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-BWeWamEb.js} +19 -4
  85. package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-DyQB1P0p.js} +20 -5
  86. package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-R9u_G7W9.cjs} +27 -11
  87. package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-ZQRJSJjU.js} +20 -5
  88. package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-B62H0fe7.js} +16 -5
  89. package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-BRPUvJG8.cjs} +31 -12
  90. package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-Dio1zJBS.js} +18 -7
  91. package/dist/src/codex-sdk-DxukZs_K.js +3 -0
  92. package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-iEmW1eS2.js} +18 -7
  93. package/dist/src/{cometapi-B5ImDlSm.js → cometapi--hh7dESS.js} +7 -7
  94. package/dist/src/{cometapi-CC7hWxmX.js → cometapi-C7yWNGqt.js} +7 -7
  95. package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CSIi16a0.js} +7 -7
  96. package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-sZgBZtbU.cjs} +8 -8
  97. package/dist/src/{completion-DtQ72Bm3.cjs → completion-CWtqdn3z.cjs} +6 -6
  98. package/dist/src/{completion-Vq_ad618.js → completion-DT8cxo9T.js} +4 -4
  99. package/dist/src/{completion-2iuYVxwi.js → completion-DUScduXp.js} +5 -5
  100. package/dist/src/{completion-CrD6MQ93.js → completion-D_2IOAoS.js} +5 -5
  101. package/dist/src/constants-BjJV0cRr.js +6 -0
  102. package/dist/src/constants-DH5XYLKZ.js +7 -0
  103. package/dist/src/constants-DZGEFLsu.js +6 -0
  104. package/dist/src/constants-a2kYssQk.cjs +11 -0
  105. package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
  106. package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
  107. package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
  108. package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
  109. package/dist/src/crescendo-B41TwUHM.js +722 -0
  110. package/dist/src/crescendo-Bfic7KC4.js +724 -0
  111. package/dist/src/crescendo-DXFB7rHP.cjs +725 -0
  112. package/dist/src/crescendo-DvvYxMLA.js +723 -0
  113. package/dist/src/custom-BdzuqFTN.js +619 -0
  114. package/dist/src/custom-Cf5Q6r-P.js +618 -0
  115. package/dist/src/custom-DLgufezC.js +620 -0
  116. package/dist/src/custom-LEXHCRe3.cjs +621 -0
  117. package/dist/src/{docker--3qzPa-6.js → docker-BOSO_6hK.js} +5 -5
  118. package/dist/src/{docker-D3AY-5F5.cjs → docker-D0h2vFrc.cjs} +6 -6
  119. package/dist/src/{docker-DCsCDvwM.js → docker-D2TWGyTP.js} +5 -5
  120. package/dist/src/{docker-Dorv4_Dg.js → docker-Vj_4_cPg.js} +5 -5
  121. package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BQIApR18.cjs} +6 -6
  122. package/dist/src/{embedding-ChS1ivFS.js → embedding-CStK0TV6.js} +5 -5
  123. package/dist/src/{embedding-DNRvZwRN.js → embedding-CU78FMnw.js} +5 -5
  124. package/dist/src/{embedding-D_bI4NDq.js → embedding-CV8lmCnU.js} +4 -4
  125. package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
  126. package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
  127. package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
  128. package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
  129. package/dist/src/{eval-DJ_4A-tr.js → eval-B3BaNBbO.js} +19 -19
  130. package/dist/src/{eval-BQPLBJbw.js → eval-DfR9885C.js} +1 -1
  131. package/dist/src/evalResult-BtZSUgQv.js +3 -0
  132. package/dist/src/{evalResult-D6P5I5il.js → evalResult-CcSqNl_Y.js} +17 -10
  133. package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-DHXs-9TL.cjs} +29 -11
  134. package/dist/src/{evalResult-pSvGWFMo.js → evalResult-PGqEbasb.js} +18 -11
  135. package/dist/src/evaluator-B9LGbKI8.js +3 -0
  136. package/dist/src/{evaluator-D-UIbbYq.js → evaluator-n_dEb00o.js} +258 -132
  137. package/dist/src/evaluatorHelpers-CrRObe2z.js +510 -0
  138. package/dist/src/evaluatorHelpers-D06I9WFL.cjs +537 -0
  139. package/dist/src/evaluatorHelpers-D1_kwvyp.js +511 -0
  140. package/dist/src/{extractor-DxyiFhPk.js → extractor-BVkZtk4R.js} +5 -5
  141. package/dist/src/{extractor-BM3jRERL.js → extractor-D3Fv_Tdh.js} +5 -5
  142. package/dist/src/{extractor-YlZbUMsL.js → extractor-Jp53vs-6.js} +5 -5
  143. package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-bV_NOoaz.cjs} +6 -6
  144. package/dist/src/{fetch-Y5qX_kST.js → fetch-BSSAcMxf.js} +90 -26
  145. package/dist/src/fetch-BodQTrMU.js +3 -0
  146. package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Cfm4EuOB.cjs} +115 -32
  147. package/dist/src/{fetch-B6ch2nU2.js → fetch-Cpf1U1nO.js} +86 -26
  148. package/dist/src/{fetch-D9xxyC1p.js → fetch-Doks14zQ.js} +90 -26
  149. package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
  150. package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
  151. package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
  152. package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
  153. package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
  154. package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
  155. package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
  156. package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
  157. package/dist/src/goat-Ba7Gznzq.js +467 -0
  158. package/dist/src/goat-CJngS-WU.js +468 -0
  159. package/dist/src/goat-CwsbfQeu.js +466 -0
  160. package/dist/src/goat-DOMbozoX.cjs +470 -0
  161. package/dist/src/graders-B3D7kCcD.js +3 -0
  162. package/dist/src/{graders-CgPn32yp.js → graders-BQt1BaQe.js} +1019 -86
  163. package/dist/src/{graders-Bw1wk_21.cjs → graders-CBQ2s6gz.cjs} +1087 -130
  164. package/dist/src/{graders-CwrbifOo.js → graders-DaRU98zs.js} +1022 -84
  165. package/dist/src/{graders-BoUqsCEm.js → graders-KXzjnIim.js} +1024 -86
  166. package/dist/src/hydra-BU6GjYoQ.js +560 -0
  167. package/dist/src/hydra-BmXHxlyl.js +559 -0
  168. package/dist/src/hydra-CR0KyYDb.js +561 -0
  169. package/dist/src/hydra-RO9jBks7.cjs +560 -0
  170. package/dist/src/{image-BeWaInPF.js → image-BBmZdKO3.js} +3 -3
  171. package/dist/src/{image-qjO6FWPs.js → image-B_PFG7IG.js} +3 -3
  172. package/dist/src/image-BtODOZjh.js +442 -0
  173. package/dist/src/image-CYuNJIqd.js +442 -0
  174. package/dist/src/{image-D10dNAav.cjs → image-E00WFZkq.cjs} +4 -4
  175. package/dist/src/image-HK2Cfqb1.cjs +465 -0
  176. package/dist/src/{image-Dr_3I3nK.js → image-LGj8dTcr.js} +3 -3
  177. package/dist/src/image-YMKejC0r.js +443 -0
  178. package/dist/src/index.cjs +711 -339
  179. package/dist/src/index.d.cts +3159 -1611
  180. package/dist/src/index.d.ts +3158 -1610
  181. package/dist/src/index.js +605 -249
  182. package/dist/src/indirectWebPwn-B80dLlFC.js +260 -0
  183. package/dist/src/indirectWebPwn-BMTXXznx.js +386 -0
  184. package/dist/src/indirectWebPwn-BZFPV7Q9.js +385 -0
  185. package/dist/src/indirectWebPwn-BaEQEOIO.cjs +260 -0
  186. package/dist/src/indirectWebPwn-BzpyMnFS.js +259 -0
  187. package/dist/src/indirectWebPwn-CiWB-vVH.js +385 -0
  188. package/dist/src/indirectWebPwn-D7NA9Nsv.cjs +397 -0
  189. package/dist/src/indirectWebPwn-o_bEFMjP.js +259 -0
  190. package/dist/src/inputVariables-B0qUChbV.js +467 -0
  191. package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
  192. package/dist/src/inputVariables-DXFdi7AI.js +468 -0
  193. package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
  194. package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
  195. package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
  196. package/dist/src/iterative-B-l0syBV.js +501 -0
  197. package/dist/src/iterative-BACUeCCz.cjs +503 -0
  198. package/dist/src/iterative-CVwoExo8.js +502 -0
  199. package/dist/src/iterative-QDrGSyss.js +500 -0
  200. package/dist/src/iterativeImage-BQg2OwA6.js +413 -0
  201. package/dist/src/iterativeImage-Bwn0fM75.js +413 -0
  202. package/dist/src/iterativeImage-CcgVyASo.cjs +415 -0
  203. package/dist/src/iterativeImage-D_UbQXg4.js +414 -0
  204. package/dist/src/iterativeMeta-CIu-CHRS.js +405 -0
  205. package/dist/src/iterativeMeta-CN8CNjFA.js +406 -0
  206. package/dist/src/iterativeMeta-DN6BTjpq.js +404 -0
  207. package/dist/src/iterativeMeta-DlqY3BsS.cjs +405 -0
  208. package/dist/src/iterativeTree-CJ8a8G9T.js +820 -0
  209. package/dist/src/iterativeTree-DcuFXnjL.js +819 -0
  210. package/dist/src/iterativeTree-FrDDYAN0.js +818 -0
  211. package/dist/src/iterativeTree-LOgAi0nU.cjs +821 -0
  212. package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-CdbcGBZF.cjs} +25 -11
  213. package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-CjOXp6Lr.js} +23 -9
  214. package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-DxAq4n4z.js} +24 -10
  215. package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-o_wTLzrt.js} +24 -10
  216. package/dist/src/{litellm-DRc4qWfc.js → litellm-B2gHwya_.js} +4 -4
  217. package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CPpdlO7n.cjs} +5 -5
  218. package/dist/src/{litellm-BLSiANhk.js → litellm-CYfgxLrM.js} +4 -4
  219. package/dist/src/{litellm-DQGo_juI.js → litellm-ojWBKU3C.js} +4 -4
  220. package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
  221. package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-BCCO9XXG.cjs} +9 -8
  222. package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BPrdihAb.js} +5 -5
  223. package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-DP5N79lB.js} +5 -5
  224. package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-DTDyoAlM.js} +5 -5
  225. package/dist/src/main.js +727 -234
  226. package/dist/src/memoryPoisoning-B6N3us35.cjs +106 -0
  227. package/dist/src/memoryPoisoning-Bc_BK_k2.js +106 -0
  228. package/dist/src/memoryPoisoning-D375zwSX.js +107 -0
  229. package/dist/src/memoryPoisoning-DJA0YjJT.js +106 -0
  230. package/dist/src/{messages-CewuNcNS.js → messages-Bg29Nbit.js} +17 -9
  231. package/dist/src/{messages-BnsVHUnm.cjs → messages-BrZEnHsV.cjs} +24 -15
  232. package/dist/src/{messages-CI69Lasb.js → messages-CBulRaud.js} +18 -10
  233. package/dist/src/{messages-B9dSjrNf.js → messages-K9A8RxBM.js} +18 -10
  234. package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
  235. package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
  236. package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
  237. package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
  238. package/dist/src/mischievousUser-BDLwSGN0.js +46 -0
  239. package/dist/src/mischievousUser-Blx-OtT3.js +47 -0
  240. package/dist/src/mischievousUser-CHuTTvBg.js +46 -0
  241. package/dist/src/mischievousUser-CSUrH3fq.cjs +46 -0
  242. package/dist/src/{modelslab-zpz9JcK0.js → modelslab-BjEk7yCP.js} +6 -6
  243. package/dist/src/{modelslab-BCLOtfek.js → modelslab-CBCrdIBc.js} +6 -6
  244. package/dist/src/{modelslab-D73OnKSx.js → modelslab-CrGpXuhv.js} +6 -6
  245. package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-Cvh0klQx.cjs} +7 -7
  246. package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-CUDDZcSA.js} +5 -5
  247. package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-CyDESu5J.js} +5 -5
  248. package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-DH_Ksu6X.cjs} +9 -8
  249. package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-YQAqUYJY.js} +5 -5
  250. package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-BVFFB7JZ.js} +2 -2
  251. package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-Bg3cxMMu.cjs} +5 -4
  252. package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-BhmnRGyh.js} +3 -3
  253. package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-DxmK95c1.js} +3 -3
  254. package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
  255. package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
  256. package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
  257. package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-CHQaZi_-.cjs} +17 -15
  258. package/dist/src/{openclaw-DA8U4DsD.js → openclaw-CWOm2eOr.js} +9 -8
  259. package/dist/src/{openclaw-DObVgpjC.js → openclaw-Cdk9JKfY.js} +9 -8
  260. package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-DhF8rUpI.js} +9 -8
  261. package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-BRjiPV-g.js} +38 -3
  262. package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-CPFMw0ed.cjs} +45 -9
  263. package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-CaSOCsBA.js} +39 -4
  264. package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-CqkyG8De.js} +39 -4
  265. package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
  266. package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
  267. package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
  268. package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
  269. package/dist/src/packageParser--MWTSrPW.js +36 -0
  270. package/dist/src/packageParser-CgE-ziRo.js +35 -0
  271. package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
  272. package/dist/src/packageParser-hwwSGnAZ.js +35 -0
  273. package/dist/src/processShim-BBxt7LKO.js +95 -0
  274. package/dist/src/processShim-BcGzU8fY.js +94 -0
  275. package/dist/src/processShim-C_z3aRvF.js +94 -0
  276. package/dist/src/processShim-DSY9BV2T.cjs +98 -0
  277. package/dist/src/promptLength-0qIHyhA5.js +71 -0
  278. package/dist/src/promptLength-4X-Wd8PG.js +72 -0
  279. package/dist/src/promptLength-B9nZEfO6.js +71 -0
  280. package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
  281. package/dist/src/promptfoo-BU4_0J85.js +238 -0
  282. package/dist/src/promptfoo-CReYAtfb.js +237 -0
  283. package/dist/src/promptfoo-ClChwT74.cjs +292 -0
  284. package/dist/src/promptfoo-CqEpj6Sr.js +237 -0
  285. package/dist/src/prompts-BYMtqPCw.js +259 -0
  286. package/dist/src/prompts-C-bqE1Yp.js +260 -0
  287. package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
  288. package/dist/src/prompts-DHhQsANy.js +259 -0
  289. package/dist/src/prompts-D_QpZ2Dm.js +271 -0
  290. package/dist/src/prompts-hNvWBD3z.cjs +284 -0
  291. package/dist/src/prompts-huDVH2CI.js +270 -0
  292. package/dist/src/prompts-p78Hul5i.cjs +289 -0
  293. package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
  294. package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
  295. package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
  296. package/dist/src/{providers-DT-GtF2t.js → providers-B7TyByfj.js} +789 -11925
  297. package/dist/src/{providers-CJh7iriU.js → providers-BGc7tDtQ.js} +796 -11872
  298. package/dist/src/{providers-Ctcc592x.js → providers-CSOp-bCm.js} +1 -1
  299. package/dist/src/{providers-eDShy16E.cjs → providers-V6RBuieY.cjs} +837 -12138
  300. package/dist/src/{providers-DRrerKra.js → providers-iH3Sw1yo.js} +851 -11938
  301. package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
  302. package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
  303. package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
  304. package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
  305. package/dist/src/{quiverai-CPKhWgaT.js → quiverai-58BtRRet.js} +3 -3
  306. package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-C1dYu5MW.js} +3 -3
  307. package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CNMIpZQg.js} +3 -3
  308. package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-fmSfU43k.cjs} +4 -4
  309. package/dist/src/registry-6Jw6ebor.cjs +124 -0
  310. package/dist/src/registry-BTGk2ZkB.js +124 -0
  311. package/dist/src/registry-BTNqmP5o.js +125 -0
  312. package/dist/src/registry-NGnOG2xa.js +124 -0
  313. package/dist/src/{server-gyd6d4Hc.js → remoteGeneration--D6WjzUm.js} +15 -108
  314. package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-BpIYlb_O.cjs} +30 -119
  315. package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-CJC3E0aW.js} +15 -147
  316. package/dist/src/remoteGeneration-D6UjE2JT.js +218 -0
  317. package/dist/src/render-BNTrbmBw.cjs +384 -0
  318. package/dist/src/render-CSP99NLm.js +348 -0
  319. package/dist/src/render-DFfDeYUK.js +347 -0
  320. package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
  321. package/dist/src/render-_6ur1fhE.js +347 -0
  322. package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
  323. package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
  324. package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
  325. package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
  326. package/dist/src/{responses-1ztiVYsx.js → responses-5Gf5HNOi.js} +11 -7
  327. package/dist/src/{responses-BiaBguAu.js → responses-BVi7xIdv.js} +13 -9
  328. package/dist/src/{responses-CF-ayauu.cjs → responses-CUARGrhY.cjs} +16 -11
  329. package/dist/src/{responses-B8haB-mD.js → responses-CrmWv6iz.js} +13 -9
  330. package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
  331. package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
  332. package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
  333. package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
  334. package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
  335. package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
  336. package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
  337. package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
  338. package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
  339. package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
  340. package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-BeVTKfrv.cjs} +65 -53
  341. package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-BwbV5U7_.js} +61 -50
  342. package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-ClAZ6Qwc.js} +61 -50
  343. package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-DY2qrOpq.js} +61 -50
  344. package/dist/src/{scanner-nOCWNIXa.js → scanner-BS-iFIp3.js} +6 -6
  345. package/dist/src/server/index.js +4115 -420
  346. package/dist/src/server-BHOEL8p8.cjs +126 -0
  347. package/dist/src/server-CbiJppij.js +107 -0
  348. package/dist/src/server-D18AAlAc.js +3 -0
  349. package/dist/src/server-DLYjSFm2.js +182 -0
  350. package/dist/src/server-JQYD_Nws.js +146 -0
  351. package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
  352. package/dist/src/shared-CRq0gGVf.js +1334 -0
  353. package/dist/src/shared-CSIGeGLl.js +1334 -0
  354. package/dist/src/shared-DNvim54U.js +1335 -0
  355. package/dist/src/shared-aHWko3P1.cjs +1436 -0
  356. package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
  357. package/dist/src/simulatedUser-C7sOFSF6.js +222 -0
  358. package/dist/src/simulatedUser-DH_7RzEQ.js +222 -0
  359. package/dist/src/simulatedUser-lgMMmniD.js +223 -0
  360. package/dist/src/simulatedUser-mnCUS9Bm.cjs +227 -0
  361. package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
  362. package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
  363. package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
  364. package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
  365. package/dist/src/storage-95htjpLs.js +834 -0
  366. package/dist/src/storage-A4WnAeN3.cjs +911 -0
  367. package/dist/src/storage-B2Ql_oq4.js +822 -0
  368. package/dist/src/storage-EKVWZBNY.js +875 -0
  369. package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
  370. package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
  371. package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
  372. package/dist/src/store-IbiRIF3k.js +3 -0
  373. package/dist/src/strategies-CBI1brSy.js +2333 -0
  374. package/dist/src/strategies-CCcnEbFO.cjs +2360 -0
  375. package/dist/src/strategies-CD1gHeeQ.js +2331 -0
  376. package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
  377. package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
  378. package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
  379. package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
  380. package/dist/src/telemetry-BUm_krVX.js +3 -0
  381. package/dist/src/{telemetry--iqaGyaS.js → telemetry-BbpmrenM.js} +4 -4
  382. package/dist/src/{telemetry-CgdVGV8N.js → telemetry-C4bX-6Sr.js} +4 -4
  383. package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-C_ImlCOk.cjs} +15 -9
  384. package/dist/src/{telemetry-DWdGHvEf.js → telemetry-DOE567Wj.js} +4 -4
  385. package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
  386. package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
  387. package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
  388. package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
  389. package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
  390. package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
  391. package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
  392. package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
  393. package/dist/src/toolAttributes-COVgDrBG.js +87 -0
  394. package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
  395. package/dist/src/tracingOptions-BOP1FsRt.js +220 -0
  396. package/dist/src/tracingOptions-BvmDa_jH.js +219 -0
  397. package/dist/src/tracingOptions-DFBoE8O0.cjs +249 -0
  398. package/dist/src/tracingOptions-xhjOg2b1.js +221 -0
  399. package/dist/src/{transcription-D7Q0vJsh.js → transcription-BOXDoehT.js} +4 -4
  400. package/dist/src/{transcription-Bm2emLmJ.js → transcription-CpVdKc4P.js} +5 -5
  401. package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-D8kkPXg_.cjs} +9 -8
  402. package/dist/src/{transcription-84t4ALo2.js → transcription-Z94eV9LR.js} +5 -5
  403. package/dist/src/transform-BnSTnFlp.js +187 -0
  404. package/dist/src/transform-BnSXWmU_2.cjs +221 -0
  405. package/dist/src/{transform-DtooZqYY.js → transform-BufxPIQL.js} +8 -8
  406. package/dist/src/transform-CGt7Kt3y2.js +186 -0
  407. package/dist/src/transform-CrPGTsij.js +186 -0
  408. package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DJkt81VY.cjs} +12 -11
  409. package/dist/src/{transform-B-b6Cq-q.js → transform-DtTfiGoh.js} +8 -8
  410. package/dist/src/{transform-_DpNB4qp.js → transform-Wp6s_5QE.js} +8 -8
  411. package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
  412. package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
  413. package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
  414. package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
  415. package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
  416. package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
  417. package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
  418. package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
  419. package/dist/src/{util-Dpmm_dAI.cjs → util-BHCAeuXx.cjs} +33 -7
  420. package/dist/src/{util-DFPeFkiV.js → util-CMrHV35u.js} +28 -8
  421. package/dist/src/{util-DvpHnLt0.cjs → util-COnzevXE.cjs} +29 -21
  422. package/dist/src/{util-BlFVL0UF.js → util-CYev3d-r.js} +22 -7
  423. package/dist/src/util-DGqkTb2-.js +327 -0
  424. package/dist/src/util-DNtUsus_.cjs +386 -0
  425. package/dist/src/{util-Dub0f_ej.js → util-DRfqa4xz.js} +17 -10
  426. package/dist/src/{util-BVXcTwXu.js → util-DTq3jq2z.js} +28 -8
  427. package/dist/src/{util-C-kmRosx.js → util-DeY58psG.js} +22 -7
  428. package/dist/src/util-DpV6KT5i.js +327 -0
  429. package/dist/src/{util-3pBZZb_H.js → util-DwNJzqOV.js} +45 -10
  430. package/dist/src/util-ETfU_sS9.js +328 -0
  431. package/dist/src/{util-A5_ZsQUn.cjs → util-NYQvo1C7.cjs} +25 -9
  432. package/dist/src/{util-DN0-b81k.js → util-X4KQgyVD.js} +28 -8
  433. package/dist/src/{util-BQOCAHQC.js → util-jZRrXe1P.js} +46 -11
  434. package/dist/src/{util-B9CNhyac.js → util-o2Qg5rZv.js} +22 -7
  435. package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
  436. package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
  437. package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
  438. package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
  439. package/dist/src/{version-CbuBKu2U.js → version-CHR-EFec.js} +2 -2
  440. package/dist/src/{version-CbpiUINz.js → version-F0YDgb7J.js} +2 -2
  441. package/dist/src/{version-D9zu9FWB.cjs → version-VzUqOBZk.cjs} +2 -2
  442. package/dist/src/{version-0frU0UTr.js → version-qVEN5qCm.js} +2 -2
  443. package/dist/tsconfig.tsbuildinfo +1 -1
  444. package/package.json +49 -23
  445. package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
  446. package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
  447. package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
  448. package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
  449. package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
  450. package/dist/src/cache-BRkhlH3k.cjs +0 -3
  451. package/dist/src/cache-BlC6aeJ0.js +0 -3
  452. package/dist/src/cloud-CoD5OacT.js +0 -3
  453. package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
  454. package/dist/src/codex-sdk-Danroptg.cjs +0 -2
  455. package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
  456. package/dist/src/evalResult-BBK58h2B.js +0 -3
  457. package/dist/src/evalResult-spPqh1G_.js +0 -2
  458. package/dist/src/evaluator-DgLKaZk8.js +0 -3
  459. package/dist/src/fetch-8viavNv8.js +0 -3
  460. package/dist/src/graders-C84JI-m5.js +0 -2
  461. package/dist/src/graders-CBbd0K0Q.cjs +0 -2
  462. package/dist/src/graders-CbQqpHSN.js +0 -3
  463. package/dist/src/graders-DS42d3ZG.js +0 -2
  464. package/dist/src/image-BmilRNqO.js +0 -258
  465. package/dist/src/image-CxJoa3aW.cjs +0 -280
  466. package/dist/src/image-DsGRlkh7.js +0 -257
  467. package/dist/src/image-a_SGUobh.js +0 -257
  468. package/dist/src/providers-BuyzKt7C.js +0 -2
  469. package/dist/src/providers-C7lNVBjX.cjs +0 -3
  470. package/dist/src/providers-CCE2COJi2.js +0 -2
  471. package/dist/src/render-7uNJ2V14.js +0 -135
  472. package/dist/src/render-DlscvAUJ.js +0 -135
  473. package/dist/src/render-eui5p5mL.js +0 -136
  474. package/dist/src/render-tG6ir9_g.cjs +0 -165
  475. package/dist/src/rubyUtils-4hjGxvju.js +0 -3
  476. package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
  477. package/dist/src/server-ByiF3qlg.js +0 -386
  478. package/dist/src/server-C0XKRNB_.cjs +0 -2
  479. package/dist/src/server-C_15p79-.js +0 -3
  480. package/dist/src/store-2OXm_eBY.js +0 -240
  481. package/dist/src/store-BELqNwvz.js +0 -3
  482. package/dist/src/store-uQZ4AjPe.cjs +0 -2
  483. package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
  484. package/dist/src/telemetry-ZdPZc0fm.js +0 -3
  485. package/dist/src/transform-BQt0BeAW.js +0 -3
  486. package/dist/src/transform-Bq5oqC0s.cjs +0 -2
  487. package/dist/src/transform-C9izGX54.cjs +0 -228
  488. package/dist/src/transform-CwbAZ84V.js +0 -216
  489. package/dist/src/transform-DzCF-wqV.js +0 -213
  490. package/dist/src/transform-eGiUAv86.js +0 -216
@@ -1,30 +1,39 @@
1
1
  #!/usr/bin/env node
2
2
  import { A as getMaxEvalTimeMs, D as getEnvInt, N as state, O as getEnvString, S as summarizeEvaluateResultForLogging, T as getEnvBool, _ as extractJsonObjects, c as setLogCallback, g as extractFirstJsonObject, j as isCI, k as getEvalTimeoutMs, r as globalLogCallback, s as logger, v as getAjv, x as safeJsonStringify } from "./logger-BbY6ypFL.js";
3
- import { P as FILE_METADATA_KEY, l as sleep, r as fetchWithRetries, v as isPromptfooSampleTarget, x as parseChatPrompt } from "./fetch-B6ch2nU2.js";
4
- import { n as VERSION } from "./version-CbpiUINz.js";
3
+ import { L as FILE_METADATA_KEY, f as sleep, r as fetchWithRetries, w as parseChatPrompt, x as isPromptfooSampleTarget } from "./fetch-Cpf1U1nO.js";
4
+ import { n as VERSION } from "./version-F0YDgb7J.js";
5
5
  import { t as invariant } from "./invariant-B2Rf6avk.js";
6
- import { r as telemetry } from "./telemetry-DWdGHvEf.js";
7
- import { at as MULTI_INPUT_VAR, d as isGradingResult, nt as LLAMA_GUARD_REPLICATE_PROVIDER, p as isApiProvider, s as ResultFailureReason } from "./types-BVH9hjgW.js";
8
- import { a as getNunjucksEngine, r as extractVariablesFromTemplate } from "./render-eui5p5mL.js";
9
- import { t as providerRegistry } from "./providerRegistry-PMsleEzs.js";
10
- import { c as promptYesNo, m as shouldGenerateRemote } from "./server-ByiF3qlg.js";
11
- import { $ as createRateLimitRegistry, F as checkExfilTracking, J as runExtensionHook, K as collectFileMetadata, Q as TokenUsageTracker, U as isBasicRefusal, V as getSessionId, X as loadFromPackage, Y as isPackagePath, Z as redteamProviderManager, _ as VertexChatProvider, et as createProviderRateLimitOptions, n as loadApiProvider, ot as throwIfTargetPromptExceedsMaxChars, q as renderPrompt, v as GoogleLiveProvider, y as AIStudioChatProvider } from "./providers-DT-GtF2t.js";
12
- import { n as isNonTransientHttpStatus } from "./errors-DFHe4L-n.js";
13
- import { c as withCacheNamespace, o as getCache } from "./cache-BI5BY7ey.js";
14
- import { i as isJavascriptFile } from "./fileExtensions-DysCsxNG.js";
15
- import { r as runPython } from "./pythonUtils-DNqbnRdx.js";
16
- import { L as loadFunction, N as maybeLoadToolsFromExternalFile, R as parseFileUrl, S as isProviderAllowed, b as isGoogleProvider, x as isOpenAiProvider, y as isAnthropicProvider } from "./util-BQOCAHQC.js";
17
- import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-2wIvAhB3.js";
18
- import { t as extractAndStoreBinaryData } from "./extractor-DxyiFhPk.js";
19
- import { t as OpenAiChatCompletionProvider } from "./chat-BLOdH60v.js";
20
- import { h as validateFunctionCall } from "./transform-B-b6Cq-q.js";
21
- import { l as validateFunctionCall$1 } from "./util-C-kmRosx.js";
22
- import { n as transform, r as getProcessShim, t as TransformInputType } from "./transform-DzCF-wqV.js";
23
- import { n as getTraceStore } from "./store-CPh25336.js";
24
- import { n as runRuby } from "./rubyUtils-CIQFnVz4.js";
25
- import { $ as SELECT_BEST_PROMPT, Ct as DEFAULT_ANTHROPIC_MODEL, Dt as withProviderCallExecutionContext, Et as getGradingProvider, G as matchesLlmRubric, H as matchesClosedQa, J as doRemoteGrading, K as matchesPiScore, Q as DEFAULT_WEB_SEARCH_PROMPT, St as getDefaultProviders, Tt as getAndCheckProvider, U as matchesFactuality, W as matchesGEval, _t as tryParse, at as CONTEXT_RECALL_ATTRIBUTED_TOKEN, bt as loadFromJavaScriptFile, ct as CONTEXT_RELEVANCE_BAD, dt as cosineSimilarity, et as SUGGEST_PROMPTS_SYSTEM_MESSAGE, ft as dotProduct, gt as splitIntoSentences, ht as normalizeMatcherTokenUsage, it as CONTEXT_RECALL, lt as loadRubricPrompt, mt as fail, n as getGraderById, nt as CONTEXT_FAITHFULNESS_LONGFORM, ot as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, pt as euclideanDistance, q as matchesTrajectoryGoalSuccess, rt as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, st as CONTEXT_RELEVANCE, tt as ANSWER_RELEVANCY_GENERATE, ut as renderLlmRubricPrompt, vt as coerceString, wt as callProviderWithContext, xt as processFileReference, yt as getFinalTest } from "./graders-CgPn32yp.js";
26
- import { i as generateIdFromPrompt } from "./utils-kt7lv30R.js";
27
- import { a as getActualPromptWithFallback, r as updateSignalFile } from "./signal-DTtUuU3l.js";
6
+ import { r as telemetry } from "./telemetry-DOE567Wj.js";
7
+ import { at as MULTI_INPUT_VAR, d as isGradingResult, nt as LLAMA_GUARD_REPLICATE_PROVIDER, p as isApiProvider, s as ResultFailureReason } from "./types-BFevViUY.js";
8
+ import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
9
+ import { i as extractVariablesFromTemplate, o as getNunjucksEngine, r as analyzeTemplateReference } from "./render-CSP99NLm.js";
10
+ import { t as providerRegistry } from "./providerRegistry-ReCd0sFa.js";
11
+ import { l as shouldGenerateRemote } from "./remoteGeneration-D6UjE2JT.js";
12
+ import { c as promptYesNo } from "./server-DLYjSFm2.js";
13
+ import { n as isNonTransientHttpStatus } from "./errors-9PcUL8BC.js";
14
+ import { l as withCacheNamespace, o as getCache } from "./cache-BR77mdIR.js";
15
+ import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
16
+ import { B as parseFileUrl, C as isOpenAiProvider, F as maybeLoadToolsFromExternalFile, S as isGoogleProvider, w as isProviderAllowed, x as isAnthropicProvider, z as loadFunction } from "./util-jZRrXe1P.js";
17
+ import { t as OpenAiChatCompletionProvider } from "./chat-DTdf-J5Q.js";
18
+ import { h as validateFunctionCall } from "./transform-DtTfiGoh.js";
19
+ import { l as validateFunctionCall$1 } from "./util-DeY58psG.js";
20
+ import { _ as VertexChatProvider, n as loadApiProvider, v as GoogleLiveProvider, y as AIStudioChatProvider } from "./providers-B7TyByfj.js";
21
+ import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
22
+ import { t as getProcessShim } from "./processShim-BBxt7LKO.js";
23
+ import { n as loadFromPackage, t as isPackagePath } from "./packageParser--MWTSrPW.js";
24
+ import { n as runRuby } from "./rubyUtils-CXlFM2rR.js";
25
+ import { c as isBasicRefusal, o as getSessionId } from "./util-ETfU_sS9.js";
26
+ import { $ as DEFAULT_WEB_SEARCH_PROMPT, Ct as getDefaultProviders, Dt as getGradingProvider, Et as getAndCheckProvider, G as matchesGEval, H as isGraderFailure, J as matchesTrajectoryGoalSuccess, K as matchesLlmRubric, Ot as getProviderCallExecutionContext, St as processFileReference, Tt as callProviderWithContext, U as matchesClosedQa, W as matchesFactuality, Y as doRemoteGrading, _t as splitIntoSentences, at as CONTEXT_RECALL, bt as getFinalTest, ct as CONTEXT_RELEVANCE, dt as renderLlmRubricPrompt, et as SELECT_BEST_PROMPT, ft as cosineSimilarity, gt as normalizeMatcherTokenUsage, ht as fail, it as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, kt as withProviderCallExecutionContext, lt as CONTEXT_RELEVANCE_BAD, mt as euclideanDistance, n as getGraderById, nt as ANSWER_RELEVANCY_GENERATE, ot as CONTEXT_RECALL_ATTRIBUTED_TOKEN, pt as dotProduct, q as matchesPiScore, rt as CONTEXT_FAITHFULNESS_LONGFORM, st as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, tt as SUGGEST_PROMPTS_SYSTEM_MESSAGE, ut as loadRubricPrompt, vt as tryParse, wt as DEFAULT_ANTHROPIC_MODEL, xt as loadFromJavaScriptFile, yt as coerceString } from "./graders-BQt1BaQe.js";
27
+ import { f as redteamProviderManager, g as createProviderRateLimitOptions, h as createRateLimitRegistry, m as TokenUsageTracker } from "./shared-DNvim54U.js";
28
+ import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
29
+ import { a as getTransformLabel, i as getTransformErrorMessage, o as transform, r as TransformInputType } from "./transform-BnSTnFlp.js";
30
+ import { n as getTraceStore } from "./store-DKd5592Q.js";
31
+ import { a as getActualPromptWithFallback, r as updateSignalFile } from "./signal-CSurUUyV.js";
32
+ import { t as extractAndStoreBinaryData } from "./extractor-BVkZtk4R.js";
33
+ import { i as throwIfTargetPromptExceedsMaxChars } from "./promptLength-4X-Wd8PG.js";
34
+ import { n as checkExfilTracking } from "./indirectWebPwn-BMTXXznx.js";
35
+ import { n as getFirstStringAttribute, r as getToolNameFromAttributes, t as TOOL_ARGUMENT_ATTRIBUTE_KEYS } from "./toolAttributes-COVgDrBG.js";
36
+ import { i as filterFiniteScores, n as renderPrompt, r as runExtensionHook, t as collectFileMetadata } from "./evaluatorHelpers-D1_kwvyp.js";
28
37
  import { AsyncResource } from "node:async_hooks";
29
38
  import chalk from "chalk";
30
39
  import fs, { createWriteStream } from "fs";
@@ -38,11 +47,12 @@ import readline from "readline";
38
47
  import { globSync } from "glob";
39
48
  import { XMLParser } from "fast-xml-parser";
40
49
  import async from "async";
41
- import cliProgress from "cli-progress";
42
50
  import { parse as parse$1 } from "parse5";
43
51
  import { distance } from "fastest-levenshtein";
52
+ import cliProgress from "cli-progress";
44
53
  import * as rouge from "js-rouge";
45
54
  import { isDeepStrictEqual } from "node:util";
55
+ import { LRUCache } from "lru-cache";
46
56
  import { ExportResultCode, W3CTraceContextPropagator } from "@opentelemetry/core";
47
57
  import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
48
58
  import { resourceFromAttributes } from "@opentelemetry/resources";
@@ -405,17 +415,20 @@ async function resolveContext(assertion, test, output, prompt, fallbackContext,
405
415
  contextValue = test.vars.context;
406
416
  }
407
417
  } else if (fallbackContext) contextValue = fallbackContext;
408
- if (assertion.contextTransform) try {
409
- const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
410
- const transformed = await transform(assertion.contextTransform, outputForTransform, {
411
- vars: test.vars,
412
- prompt: { label: prompt },
413
- ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
414
- });
415
- invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${assertion.contextTransform}`);
416
- contextValue = transformed;
417
- } catch (error) {
418
- throw new Error(`Failed to transform context using expression '${assertion.contextTransform}': ${error instanceof Error ? error.message : String(error)}`);
418
+ if (assertion.contextTransform) {
419
+ const getLabel = () => getTransformLabel(assertion.contextTransform);
420
+ try {
421
+ const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
422
+ const transformed = await transform(assertion.contextTransform, outputForTransform, {
423
+ vars: test.vars,
424
+ prompt: { label: prompt },
425
+ ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
426
+ });
427
+ invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
428
+ contextValue = transformed;
429
+ } catch (error) {
430
+ throw new Error(`Failed to transform context using expression '${getLabel()}': ${getTransformErrorMessage(error)}`);
431
+ }
419
432
  }
420
433
  invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
421
434
  return contextValue;
@@ -758,7 +771,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
758
771
  telemetry.record("feature_used", { feature: "tracing" });
759
772
  try {
760
773
  logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
761
- const { startOTLPReceiver } = await import("./otlpReceiver-UYMQx3sy.js");
774
+ const { startOTLPReceiver } = await import("./otlpReceiver--gTpSagc.js");
762
775
  const port = testSuite.tracing.otlp.http.port || 4318;
763
776
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
764
777
  const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
@@ -782,7 +795,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
782
795
  async function stopOtlpReceiverIfNeeded() {
783
796
  if (otlpReceiverStarted) try {
784
797
  logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
785
- const { stopOTLPReceiver } = await import("./otlpReceiver-UYMQx3sy.js");
798
+ const { stopOTLPReceiver } = await import("./otlpReceiver--gTpSagc.js");
786
799
  await stopOTLPReceiver();
787
800
  otlpReceiverStarted = false;
788
801
  logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -817,7 +830,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
817
830
  }
818
831
  if (!tracingEnabled) return null;
819
832
  logger.debug("[EvaluatorTracing] Importing trace store");
820
- const { getTraceStore } = await import("./store-BELqNwvz.js");
833
+ const { getTraceStore } = await import("./store-IbiRIF3k.js");
821
834
  const traceStore = getTraceStore();
822
835
  const traceId = generateTraceId();
823
836
  const spanId = generateSpanId();
@@ -1449,27 +1462,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
1449
1462
  };
1450
1463
  //#endregion
1451
1464
  //#region src/assertions/geval.ts
1452
- const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
1453
- invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
1465
+ const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
1466
+ invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
1454
1467
  const threshold = assertion.threshold ?? .7;
1455
1468
  if (Array.isArray(renderedValue)) {
1456
- const scores = [];
1457
- const reasons = [];
1458
- for (const value of renderedValue) {
1469
+ if (renderedValue.length === 0) return {
1470
+ assertion,
1471
+ pass: false,
1472
+ score: 0,
1473
+ reason: "G-Eval assertion requires at least one criterion string in the value array."
1474
+ };
1475
+ const responses = [];
1476
+ let failure;
1477
+ for (const [index, value] of renderedValue.entries()) {
1459
1478
  const resp = await matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
1460
- scores.push(resp.score);
1461
- reasons.push(resp.reason);
1479
+ responses.push(resp);
1480
+ if (isGraderFailure(resp)) {
1481
+ failure = {
1482
+ index,
1483
+ resp
1484
+ };
1485
+ break;
1486
+ }
1487
+ }
1488
+ const tokensUsed = createEmptyTokenUsage();
1489
+ for (const r of responses) accumulateTokenUsage(tokensUsed, r.tokensUsed);
1490
+ if (failure) {
1491
+ const criterion = renderedValue[failure.index];
1492
+ return {
1493
+ assertion,
1494
+ pass: false,
1495
+ score: 0,
1496
+ reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
1497
+ tokensUsed,
1498
+ metadata: failure.resp.metadata
1499
+ };
1462
1500
  }
1463
- const scoresSum = scores.reduce((a, b) => a + b, 0);
1501
+ const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
1502
+ const combinedReason = responses.map((r) => r.reason).join("\n\n");
1464
1503
  return {
1465
1504
  assertion,
1466
- pass: scoresSum / scores.length >= threshold,
1467
- score: scoresSum / scores.length,
1468
- reason: reasons.join("\n\n")
1505
+ pass: averageScore >= threshold !== inverse,
1506
+ score: inverse ? 1 - averageScore : averageScore,
1507
+ reason: combinedReason,
1508
+ tokensUsed
1469
1509
  };
1470
- } else return {
1510
+ }
1511
+ const resp = await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
1512
+ if (isGraderFailure(resp)) return {
1471
1513
  assertion,
1472
- ...await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext)
1514
+ pass: false,
1515
+ score: 0,
1516
+ reason: resp.reason,
1517
+ tokensUsed: resp.tokensUsed,
1518
+ metadata: resp.metadata
1519
+ };
1520
+ const passed = resp.score >= threshold !== inverse;
1521
+ return {
1522
+ assertion,
1523
+ ...resp,
1524
+ pass: passed,
1525
+ score: inverse ? 1 - resp.score : resp.score
1473
1526
  };
1474
1527
  };
1475
1528
  //#endregion
@@ -2430,45 +2483,6 @@ function matchesPattern(spanName, pattern) {
2430
2483
  }
2431
2484
  //#endregion
2432
2485
  //#region src/assertions/trajectoryUtils.ts
2433
- const TOOL_ATTRIBUTE_KEYS = [
2434
- "tool.name",
2435
- "tool_name",
2436
- "tool",
2437
- "function.name",
2438
- "function_name",
2439
- "gen_ai.tool.name",
2440
- "codex.mcp.tool",
2441
- "agent.tool",
2442
- "agent.tool_name",
2443
- "agent.toolName"
2444
- ];
2445
- const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
2446
- "tool.arguments",
2447
- "tool.args",
2448
- "tool.input",
2449
- "tool_arguments",
2450
- "tool_args",
2451
- "tool_input",
2452
- "function.arguments",
2453
- "function.args",
2454
- "function.input",
2455
- "function_arguments",
2456
- "function_args",
2457
- "gen_ai.tool.arguments",
2458
- "gen_ai.tool.args",
2459
- "gen_ai.tool.input",
2460
- "gen_ai.tool.call.arguments",
2461
- "gen_ai.tool.call.args",
2462
- "agent.tool.arguments",
2463
- "agent.tool.args",
2464
- "agent.tool.input",
2465
- "codex.mcp.arguments",
2466
- "codex.mcp.args",
2467
- "codex.mcp.input",
2468
- "arguments",
2469
- "args",
2470
- "input"
2471
- ];
2472
2486
  const COMMAND_ATTRIBUTE_KEYS = [
2473
2487
  "codex.command",
2474
2488
  "command",
@@ -2481,16 +2495,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
2481
2495
  "search_query"
2482
2496
  ];
2483
2497
  const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
2498
+ const COMMAND_TOOL_NAMES = new Set([
2499
+ "exec_command",
2500
+ "local_shell",
2501
+ "shell"
2502
+ ]);
2484
2503
  const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
2485
2504
  const MAX_JUDGE_SUMMARY_STEPS = 24;
2486
2505
  const JUDGE_SUMMARY_HEAD_STEPS = 12;
2487
2506
  const JUDGE_SUMMARY_TAIL_STEPS = 12;
2488
- function getStringAttribute(attributes, keys) {
2489
- for (const key of keys) {
2490
- const value = attributes[key];
2491
- if (typeof value === "string" && value.trim()) return value.trim();
2492
- }
2493
- }
2494
2507
  function normalizeStructuredAttribute(value) {
2495
2508
  if (value === void 0 || value === null) return;
2496
2509
  if (typeof value === "string") {
@@ -2522,9 +2535,12 @@ function getTrajectoryStepStatus(step) {
2522
2535
  function getCommandExecutable(command) {
2523
2536
  return command.trim().split(/\s+/)[0] || void 0;
2524
2537
  }
2538
+ function isCommandToolName(toolName) {
2539
+ return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
2540
+ }
2525
2541
  function extractToolName(span) {
2526
2542
  const attributes = span.attributes || {};
2527
- const directMatch = getStringAttribute(attributes, TOOL_ATTRIBUTE_KEYS);
2543
+ const directMatch = getToolNameFromAttributes(attributes);
2528
2544
  if (directMatch) return directMatch;
2529
2545
  for (const [key, value] of Object.entries(attributes)) {
2530
2546
  if (typeof value !== "string" || !value.trim()) continue;
@@ -2549,21 +2565,31 @@ function extractToolArgs(span) {
2549
2565
  if (value !== void 0) return value;
2550
2566
  }
2551
2567
  }
2552
- function extractCommand(span) {
2568
+ function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
2553
2569
  const attributes = span.attributes || {};
2554
- const directMatch = getStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2570
+ const directMatch = getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2555
2571
  if (directMatch) return directMatch;
2556
2572
  for (const [key, value] of Object.entries(attributes)) {
2557
2573
  if (typeof value !== "string" || !value.trim()) continue;
2558
2574
  if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
2559
2575
  }
2576
+ const toolArgs = getToolArgs();
2577
+ if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
2578
+ const args = toolArgs;
2579
+ const command = args.cmd ?? args.command;
2580
+ if (typeof command === "string" && command.trim()) return command.trim();
2581
+ if (Array.isArray(command)) {
2582
+ const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
2583
+ if (joined) return joined;
2584
+ }
2585
+ }
2560
2586
  if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
2561
2587
  }
2562
2588
  function extractSearchQuery(span) {
2563
2589
  const attributes = span.attributes || {};
2564
- const directMatch = getStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2590
+ const directMatch = getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2565
2591
  if (directMatch) return directMatch;
2566
- const genericQuery = getStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2592
+ const genericQuery = getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2567
2593
  if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
2568
2594
  if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
2569
2595
  }
@@ -2587,17 +2613,34 @@ function extractTrajectorySteps(trace) {
2587
2613
  return left.index - right.index;
2588
2614
  }).map(({ span }) => {
2589
2615
  const toolName = extractToolName(span);
2590
- const command = extractCommand(span);
2616
+ let toolArgs;
2617
+ let hasExtractedToolArgs = false;
2618
+ const getToolArgs = () => {
2619
+ if (!hasExtractedToolArgs) {
2620
+ toolArgs = extractToolArgs(span);
2621
+ hasExtractedToolArgs = true;
2622
+ }
2623
+ return toolArgs;
2624
+ };
2625
+ const command = extractCommand(span, toolName, getToolArgs);
2591
2626
  const searchQuery = extractSearchQuery(span);
2592
2627
  let type = "span";
2593
2628
  let name = span.name;
2594
2629
  const aliases = new Set([span.name]);
2595
2630
  let args;
2596
- if (toolName) {
2631
+ if (command && isCommandToolName(toolName)) {
2632
+ type = "command";
2633
+ name = command;
2634
+ aliases.add(command);
2635
+ args = getToolArgs();
2636
+ if (toolName) aliases.add(toolName);
2637
+ const executable = getCommandExecutable(command);
2638
+ if (executable) aliases.add(executable);
2639
+ } else if (toolName) {
2597
2640
  type = "tool";
2598
2641
  name = toolName;
2599
2642
  aliases.add(toolName);
2600
- args = extractToolArgs(span);
2643
+ args = getToolArgs();
2601
2644
  } else if (command) {
2602
2645
  type = "command";
2603
2646
  name = command;
@@ -4161,7 +4204,7 @@ async function loadTraceData(traceId) {
4161
4204
  let stableObservations = 0;
4162
4205
  let latestTrace = null;
4163
4206
  for (let attempt = 0; attempt < maxAttempts; attempt++) {
4164
- latestTrace = await traceStore.getTrace(traceId);
4207
+ latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
4165
4208
  const spanCount = latestTrace?.spans?.length ?? 0;
4166
4209
  if (spanCount > 0) {
4167
4210
  stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
@@ -4214,7 +4257,7 @@ const ASSERTION_HANDLERS = {
4214
4257
  "llm-rubric": handleLlmRubric,
4215
4258
  meteor: async (params) => {
4216
4259
  try {
4217
- const { handleMeteorAssertion } = await import("./meteor-CeGo0Lu2.js");
4260
+ const { handleMeteorAssertion } = await import("./meteor--TZYICTI.js");
4218
4261
  return handleMeteorAssertion(params);
4219
4262
  } catch (error) {
4220
4263
  if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
@@ -4350,7 +4393,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
4350
4393
  };
4351
4394
  }
4352
4395
  else if (filePath.endsWith(".rb")) try {
4353
- const { runRuby } = await import("./rubyUtils-4hjGxvju.js");
4396
+ const { runRuby } = await import("./rubyUtils-BYVlQ94c.js");
4354
4397
  valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
4355
4398
  logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
4356
4399
  } catch (error) {
@@ -4467,7 +4510,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
4467
4510
  logger.debug(`Failed to preload trace data for assertions: ${error}`);
4468
4511
  preloadedTraceData = null;
4469
4512
  }
4470
- await async.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
4513
+ const concurrency = getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
4514
+ await async.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
4471
4515
  if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
4472
4516
  const result = await runAssertion({
4473
4517
  prompt,
@@ -4614,7 +4658,8 @@ var CIProgressReporter = class {
4614
4658
  }
4615
4659
  updateTotalTests(newTotal) {
4616
4660
  this.totalTests = Math.max(newTotal, 1);
4617
- this.highestPercentageSeen = Math.floor(this.completedTests / this.totalTests * 100);
4661
+ const percentage = Math.floor(this.completedTests / this.totalTests * 100);
4662
+ this.highestPercentageSeen = percentage;
4618
4663
  }
4619
4664
  finish() {
4620
4665
  if (this.intervalId) {
@@ -4787,6 +4832,10 @@ function getDefaultOtelConfig() {
4787
4832
  }
4788
4833
  //#endregion
4789
4834
  //#region src/tracing/localSpanExporter.ts
4835
+ const MISSING_TRACE_RETRY_DELAY_MS = 50;
4836
+ function delay(ms) {
4837
+ return new Promise((resolve) => setTimeout(resolve, ms));
4838
+ }
4790
4839
  /**
4791
4840
  * A span exporter that writes spans to the local TraceStore (SQLite).
4792
4841
  * This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
@@ -4828,7 +4877,7 @@ var LocalSpanExporter = class {
4828
4877
  }
4829
4878
  let firstError;
4830
4879
  for (const [traceId, spanDataList] of spansByTrace) try {
4831
- const result = await traceStore.addSpans(traceId, spanDataList, { skipTraceCheck: false });
4880
+ const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
4832
4881
  if (result.stored) logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
4833
4882
  else logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
4834
4883
  } catch (error) {
@@ -4840,6 +4889,16 @@ var LocalSpanExporter = class {
4840
4889
  }
4841
4890
  return firstError;
4842
4891
  }
4892
+ async addSpansWithTraceRetry(traceStore, traceId, spans) {
4893
+ const options = {
4894
+ skipTraceCheck: false,
4895
+ warnIfMissingTrace: false
4896
+ };
4897
+ const result = await traceStore.addSpans(traceId, spans, options);
4898
+ if (result.stored) return result;
4899
+ await delay(MISSING_TRACE_RETRY_DELAY_MS);
4900
+ return traceStore.addSpans(traceId, spans, options);
4901
+ }
4843
4902
  /**
4844
4903
  * Convert an OTEL ReadableSpan to our SpanData format.
4845
4904
  */
@@ -5085,6 +5144,15 @@ function backfillNamedScoreWeights(accumulator) {
5085
5144
  }
5086
5145
  //#endregion
5087
5146
  //#region src/evaluator.ts
5147
+ const CONVERSATION_VAR_NAME = "_conversation";
5148
+ const promptUsesConversationVariableCache = new LRUCache({ max: 1024 });
5149
+ function promptUsesConversationVariable(prompt) {
5150
+ const cached = promptUsesConversationVariableCache.get(prompt.raw);
5151
+ if (cached !== void 0) return cached;
5152
+ const { referenced, parsed } = analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
5153
+ if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
5154
+ return referenced;
5155
+ }
5088
5156
  /**
5089
5157
  * Manages a single progress bar for the evaluation
5090
5158
  */
@@ -5284,6 +5352,18 @@ function hasProviderGroupedAssertion(assertion) {
5284
5352
  function shouldDeferGradingForTest(test) {
5285
5353
  return Boolean(test.assert?.some(hasProviderGroupedAssertion));
5286
5354
  }
5355
+ function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
5356
+ if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
5357
+ if (shouldGroupGradingByProvider) {
5358
+ logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
5359
+ return;
5360
+ }
5361
+ if (concurrency !== 1) return;
5362
+ const reasons = [];
5363
+ if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
5364
+ if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
5365
+ if (reasons.length > 0) logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
5366
+ }
5287
5367
  function applyGradingResult(row, checkResult) {
5288
5368
  if (!checkResult.pass) {
5289
5369
  row.error = checkResult.reason;
@@ -5298,14 +5378,29 @@ function applyGradingResult(row, checkResult) {
5298
5378
  if (checkResult.tokensUsed) accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
5299
5379
  row.gradingResult = checkResult;
5300
5380
  }
5301
- function applyGradingError(row, error) {
5302
- const errorMessage = error instanceof Error ? error.stack ?? error.message : String(error);
5303
- logger.error("Assertion grading failed during eval", {
5304
- error: errorMessage,
5305
- promptIdx: row.promptIdx,
5306
- testIdx: row.testIdx
5307
- });
5308
- row.error = errorMessage;
5381
+ const ABORTED_GRADING_PREFIX = "Aborted: ";
5382
+ function isAbortShapedError(error) {
5383
+ return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
5384
+ }
5385
+ function applyGradingError(row, error, abortSignal) {
5386
+ const errorAsError = error instanceof Error ? error : void 0;
5387
+ if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
5388
+ const shortMessage = errorAsError?.message ?? String(error);
5389
+ logger.debug("Assertion grading aborted", {
5390
+ error: shortMessage,
5391
+ promptIdx: row.promptIdx,
5392
+ testIdx: row.testIdx
5393
+ });
5394
+ row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
5395
+ } else {
5396
+ const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
5397
+ logger.error("Assertion grading failed during eval", {
5398
+ error: fullMessage,
5399
+ promptIdx: row.promptIdx,
5400
+ testIdx: row.testIdx
5401
+ });
5402
+ row.error = fullMessage;
5403
+ }
5309
5404
  row.failureReason = ResultFailureReason.ERROR;
5310
5405
  row.success = false;
5311
5406
  row.score = 0;
@@ -5337,7 +5432,7 @@ function createRunEvalState({ provider, prompt, test }) {
5337
5432
  };
5338
5433
  }
5339
5434
  function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
5340
- const usesConversation = prompt.raw.includes("_conversation");
5435
+ const usesConversation = promptUsesConversationVariable(prompt);
5341
5436
  if (!getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
5342
5437
  }
5343
5438
  function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
@@ -5584,7 +5679,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
5584
5679
  assertScoringFunction: test.assertScoringFunction,
5585
5680
  traceId
5586
5681
  }).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
5587
- applyGradingError(ret, error);
5682
+ applyGradingError(ret, error, abortSignal);
5588
5683
  });
5589
5684
  deferredGradingPromises.set(ret, gradingPromise);
5590
5685
  return;
@@ -6131,7 +6226,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
6131
6226
  const defaultProvider = defaultTest.provider;
6132
6227
  if (isApiProvider(defaultProvider)) return defaultProvider;
6133
6228
  if (typeof defaultProvider === "object" && defaultProvider.id) {
6134
- const { loadApiProvider } = await import("./providers-Ctcc592x.js");
6229
+ const { loadApiProvider } = await import("./providers-CSOp-bCm.js");
6135
6230
  return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
6136
6231
  }
6137
6232
  return defaultProvider;
@@ -6291,7 +6386,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
6291
6386
  async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
6292
6387
  if (!state.resume || !evalRecord.persisted) return;
6293
6388
  try {
6294
- const { default: EvalResult } = await import("./evalResult-BBK58h2B.js");
6389
+ const { default: EvalResult } = await import("./evalResult-BtZSUgQv.js");
6295
6390
  const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: state.retryMode });
6296
6391
  const originalCount = runEvalOptions.length;
6297
6392
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -6305,14 +6400,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
6305
6400
  }
6306
6401
  }
6307
6402
  function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
6308
- const usesConversationVar = prompts.some((p) => p.raw.includes("_conversation"));
6403
+ const usesConversationVar = prompts.some(promptUsesConversationVariable);
6309
6404
  if (concurrency <= 1) return {
6310
6405
  concurrency,
6311
6406
  usesConversationVar
6312
6407
  };
6313
6408
  const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
6314
6409
  if (usesConversationVar) {
6315
- logger.info(`Setting concurrency to 1 because the ${chalk.cyan("_conversation")} variable is used.`);
6410
+ logger.info(`Setting concurrency to 1 because the ${chalk.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
6316
6411
  return {
6317
6412
  concurrency: 1,
6318
6413
  usesConversationVar
@@ -6542,7 +6637,8 @@ var Evaluator = class {
6542
6637
  };
6543
6638
  this.conversations = {};
6544
6639
  this.registers = {};
6545
- this.fileWriters = (Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : []).map((p) => new JsonlFileWriter(p));
6640
+ const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
6641
+ this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
6546
6642
  this.rateLimitRegistry = createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
6547
6643
  this.rateLimitRegistry.on("ratelimit:hit", (data) => {
6548
6644
  logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
@@ -6662,6 +6758,25 @@ var Evaluator = class {
6662
6758
  this.trackCompletedRow(evalStep, row, context);
6663
6759
  context.numComplete++;
6664
6760
  const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
6761
+ if (context.testSuite.extensions?.length) try {
6762
+ const afterEachOut = await runExtensionHook(context.testSuite.extensions, "afterEach", {
6763
+ test: evalStep.test,
6764
+ result: {
6765
+ ...row,
6766
+ namedScores: { ...row.namedScores },
6767
+ metadata: { ...row.metadata },
6768
+ response: row.response ? {
6769
+ ...row.response,
6770
+ metadata: { ...row.response.metadata }
6771
+ } : row.response
6772
+ }
6773
+ });
6774
+ row.namedScores = filterFiniteScores(afterEachOut.result.namedScores);
6775
+ row.metadata = afterEachOut.result.metadata;
6776
+ if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
6777
+ } catch (error) {
6778
+ logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
6779
+ }
6665
6780
  await this.persistEvalRow(row);
6666
6781
  if (this.abortIfTargetUnavailable(row, context)) break;
6667
6782
  const metrics = context.prompts[row.promptIdx].metrics;
@@ -6673,10 +6788,6 @@ var Evaluator = class {
6673
6788
  promptEvalCount,
6674
6789
  row
6675
6790
  });
6676
- await runExtensionHook(context.testSuite.extensions, "afterEach", {
6677
- test: evalStep.test,
6678
- result: row
6679
- });
6680
6791
  context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
6681
6792
  }
6682
6793
  }
@@ -6831,7 +6942,15 @@ var Evaluator = class {
6831
6942
  })) break;
6832
6943
  }
6833
6944
  } catch (error) {
6834
- await flushGroupedRows();
6945
+ const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
6946
+ try {
6947
+ await flushGroupedRows();
6948
+ } catch (flushError) {
6949
+ logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
6950
+ error: flushError instanceof Error ? flushError.message : String(flushError),
6951
+ pendingRowCount
6952
+ });
6953
+ }
6835
6954
  throw error;
6836
6955
  }
6837
6956
  await flushGroupedRows();
@@ -7267,6 +7386,13 @@ var Evaluator = class {
7267
7386
  if (!this.options.silent) {
7268
7387
  if (serialRunEvalOptions.length > 0) logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
7269
7388
  if (concurrentRunEvalOptions.length > 0) logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
7389
+ logGroupedGradingStatus({
7390
+ concurrency,
7391
+ hasEvalStepTimeout,
7392
+ runEvalOptions,
7393
+ shouldGroupGradingByProvider,
7394
+ usesConversationVar
7395
+ });
7270
7396
  }
7271
7397
  if (this.options.showProgressBar && progressBarManager) {
7272
7398
  await progressBarManager.initialize(runEvalOptions, concurrency, 0);
@@ -7369,4 +7495,4 @@ function evaluate(testSuite, evalRecord, options) {
7369
7495
  //#endregion
7370
7496
  export { isAllowedPrompt as a, assertions_default as c, generateVarCombinations as i, readAssertions as l, evaluate as n, accumulateNamedMetric as o, formatVarsForDisplay as r, doesPromptRefMatch as s, ProgressBarManager as t, runAssertions as u };
7371
7497
 
7372
- //# sourceMappingURL=evaluator-D-UIbbYq.js.map
7498
+ //# sourceMappingURL=evaluator-n_dEb00o.js.map