promptfoo 0.121.1 → 0.121.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/README.md +2 -0
  2. package/dist/src/{accounts-xrUGFA6n.js → accounts-B2XmGjty.js} +5 -5
  3. package/dist/src/{accounts-Bx-x3bmW.cjs → accounts-BPyfpSeU.cjs} +5 -5
  4. package/dist/src/{accounts-CMqkzrVf.js → accounts-CFLK3mnD.js} +6 -6
  5. package/dist/src/{accounts-BgNJDBE6.js → accounts-Xatc0RYb.js} +5 -5
  6. package/dist/src/{agentic-utils-BKIN5PKu.js → agentic-utils-36epdqwB.js} +3 -3
  7. package/dist/src/{cometapi-DkXrKi5z.js → agentic-utils-D8yXo5Lm.js} +4 -61
  8. package/dist/src/{cometapi-vY6aDZgo.cjs → agentic-utils-DAVsChuB.cjs} +24 -62
  9. package/dist/src/agentic-utils-DIYAAYE7.js +153 -0
  10. package/dist/src/{agents-C-dDThPK.js → agents-BBVJCIYr.js} +226 -13
  11. package/dist/src/{agents-CErsqg5U.cjs → agents-BBWxKSM0.cjs} +7 -7
  12. package/dist/src/{agents-Dy2YpZpa.js → agents-Bqgfdokm.js} +227 -14
  13. package/dist/src/{agents-B0f4HICh.cjs → agents-CAYbM7qD.cjs} +226 -13
  14. package/dist/src/{agents-CVIn-Utx.js → agents-CLQ-P15P.js} +7 -7
  15. package/dist/src/{agents-DeH4Gu94.js → agents-CgBniSlI.js} +8 -8
  16. package/dist/src/{agents-CXknwsFX.js → agents-DSSTV4bv.js} +226 -13
  17. package/dist/src/{agents-aF4-T121.js → agents-wg3ohknq.js} +7 -7
  18. package/dist/src/{aimlapi-tg0Gkcvr.cjs → aimlapi-Bv8Fmc-b.cjs} +14 -14
  19. package/dist/src/{aimlapi-BNfTBexL.js → aimlapi-BwGC1TtS.js} +13 -13
  20. package/dist/src/{aimlapi-BAGZDo5G.js → aimlapi-DaC3qZ-o.js} +14 -14
  21. package/dist/src/{aimlapi-DHRKlBEA.js → aimlapi-MgSLdvy7.js} +13 -13
  22. package/dist/src/app/assets/index-B6l9CVVb.js +439 -0
  23. package/dist/src/app/assets/index-DyZ0Ep37.css +1 -0
  24. package/dist/src/app/assets/sync-CStkzc6u.js +4 -0
  25. package/dist/src/app/assets/vendor-charts-BnDWwBlI.js +36 -0
  26. package/dist/src/app/assets/vendor-markdown-Bz7N-ca6.js +29 -0
  27. package/dist/src/app/index.html +4 -4
  28. package/dist/src/{audio-tf_NBjlC.js → audio-Bn44pQxv.js} +4 -4
  29. package/dist/src/{audio-CHQ4r-RV.js → audio-DDA5WHdx.js} +4 -4
  30. package/dist/src/{audio-BWeaWovU.cjs → audio-DVFjQ67_.cjs} +4 -4
  31. package/dist/src/{audio-BRODU0UK.js → audio-DjU9GswO.js} +5 -5
  32. package/dist/src/{base-DBtwl2FR.cjs → base-BboXIF_0.cjs} +3 -3
  33. package/dist/src/{base-B4QJRyFS.js → base-CKjwebIH.js} +3 -3
  34. package/dist/src/{base-B0tcrnq_.js → base-CqzQ4K8j.js} +3 -3
  35. package/dist/src/{base-fEDN28WM.js → base-Cz2ZC_iA.js} +3 -3
  36. package/dist/src/{blobs-BAU-dXan.js → blobs-B1JriOyi.js} +3 -3
  37. package/dist/src/{blobs-qTYm-1PY.js → blobs-BUWmKWzo.js} +3 -3
  38. package/dist/src/{blobs-DvS-O6be.cjs → blobs-C6j0bvFz.cjs} +3 -3
  39. package/dist/src/{blobs-Bpg5rH6i.js → blobs-DXTl6J3H.js} +3 -3
  40. package/dist/src/{cache-COish3-W.cjs → cache-C5yFZ4gC.cjs} +75 -58
  41. package/dist/src/{cache-8XhNqPKW.js → cache-CaT5tPgo.js} +75 -58
  42. package/dist/src/cache-CyCanoMu.js +6 -0
  43. package/dist/src/{cache-CG0SlR1d.js → cache-DSqR6ezl.js} +75 -58
  44. package/dist/src/cache-Df_QFDNu.cjs +5 -0
  45. package/dist/src/{cache-D3eqDYGU.js → cache-HP0NP4k3.js} +75 -58
  46. package/dist/src/{chat-DHMH-N64.js → chat-B-52XYI1.js} +12 -12
  47. package/dist/src/{chat-BKm79wib.js → chat-B0iaWhoh.js} +16 -14
  48. package/dist/src/{chat-DxysjBvt.js → chat-BE0qTA8e.js} +13 -13
  49. package/dist/src/{chat-CRWNNq73.js → chat-BEwdgGEg.js} +16 -14
  50. package/dist/src/{chat-2K608PeQ.cjs → chat-BtIKkLKx.cjs} +13 -13
  51. package/dist/src/{chat-DaqekjFr.cjs → chat-CM8qWR3_.cjs} +17 -15
  52. package/dist/src/{chat-CM_kyI8B.js → chat-DK1U-eZ-.js} +12 -12
  53. package/dist/src/{chat-CznLWr_D.js → chat-pxmiVpWe.js} +16 -14
  54. package/dist/src/{chatkit-65VXf5SR.js → chatkit-BYGQlHlV.js} +4 -4
  55. package/dist/src/{chatkit-DKyPi1Gs.cjs → chatkit-Cx174XI3.cjs} +4 -4
  56. package/dist/src/{chatkit-BxFvW8KY.js → chatkit-_8eJqKcD.js} +4 -4
  57. package/dist/src/{chatkit-Be-Q-a9F.js → chatkit-a2D6mY6s.js} +4 -4
  58. package/dist/src/{claude-agent-sdk-CJH22shf.cjs → claude-agent-sdk-8ddRp1L2.cjs} +35 -17
  59. package/dist/src/{claude-agent-sdk-Dy5lT-Tx.js → claude-agent-sdk-Bq5EArsX.js} +33 -15
  60. package/dist/src/{claude-agent-sdk-BLTu0WBO.js → claude-agent-sdk-CMjh4LFH.js} +33 -15
  61. package/dist/src/{claude-agent-sdk-D6_k9FKA.js → claude-agent-sdk-HgbFioFw.js} +33 -15
  62. package/dist/src/cloud-DE3t1-ZI.js +4 -0
  63. package/dist/src/{cloud-Bc9526yV.js → cloud-z8KZpUoa.js} +3 -3
  64. package/dist/src/{cloudflare-ai-CWWJCRim.js → cloudflare-ai-BGyXlpXJ.js} +13 -13
  65. package/dist/src/{cloudflare-ai-C9r2sRhw.js → cloudflare-ai-Bbp26N0L.js} +13 -13
  66. package/dist/src/{cloudflare-ai-ClWSdor4.cjs → cloudflare-ai-C62x6MQG.cjs} +14 -14
  67. package/dist/src/{cloudflare-ai-ICsOuD-z.js → cloudflare-ai-DdKP9TKT.js} +14 -14
  68. package/dist/src/{cloudflare-gateway-D6xFc5pa.js → cloudflare-gateway-BwAaUgeW.js} +14 -14
  69. package/dist/src/{cloudflare-gateway-D6O7AlYb.js → cloudflare-gateway-D-e9i1Sn.js} +15 -15
  70. package/dist/src/{cloudflare-gateway-pXGHxJ47.js → cloudflare-gateway-DXhtXDRb.js} +15 -163
  71. package/dist/src/{cloudflare-gateway-C2_-KG5o.cjs → cloudflare-gateway-Dx36ftqF.cjs} +15 -15
  72. package/dist/src/{codex-sdk-DUwKWezN.js → codex-sdk-BQEw16R_.js} +180 -11
  73. package/dist/src/{codex-sdk-C6UMlxwV.js → codex-sdk-C_07GuVS.js} +180 -11
  74. package/dist/src/{codex-sdk-GGAw0qbD.js → codex-sdk-DE5G18dx.js} +180 -11
  75. package/dist/src/{codex-sdk-fAO0c3yA.cjs → codex-sdk-ZLKfDjqP.cjs} +181 -12
  76. package/dist/src/cometapi-BDyV-NNm.js +62 -0
  77. package/dist/src/cometapi-C3hOlM7-.cjs +62 -0
  78. package/dist/src/{cometapi-Bbjp5V4x.js → cometapi-hhL4TAh3.js} +14 -14
  79. package/dist/src/{cometapi-BasUi7-_.js → cometapi-sp7sJpBD.js} +15 -15
  80. package/dist/src/{completion-C_P3ypkJ.js → completion-BCimtq-h.js} +6 -6
  81. package/dist/src/{completion-6Mx_iXxK.js → completion-DCjv7RZ3.js} +6 -6
  82. package/dist/src/{completion-CDOouNzq.cjs → completion-DlXUhj5c.cjs} +6 -6
  83. package/dist/src/{completion-C5rtR_9P.js → completion-DoYy49ti.js} +6 -6
  84. package/dist/src/{createHash-CfZSc0b4.cjs → createHash-BYwImsYv.cjs} +2 -2
  85. package/dist/src/{docker-BwsKwxFs.cjs → docker-Cqj2-QVi.cjs} +14 -14
  86. package/dist/src/{docker-CZnqU1XV.js → docker-CxCkwMzc.js} +13 -13
  87. package/dist/src/{docker-DzxyDPIj.js → docker-DpguQj-w.js} +14 -14
  88. package/dist/src/{docker-5KcG-_86.js → docker-FeBni2dw.js} +13 -13
  89. package/dist/src/{esm-C03C-mv3.js → esm-7UIl0pPM.js} +2 -2
  90. package/dist/src/{esm-Cd1AjG1D.js → esm-CKWP3u_P.js} +3 -3
  91. package/dist/src/{esm-CnNt7sI4.cjs → esm-CipptfDu.cjs} +2 -2
  92. package/dist/src/{esm-CaIwzWR5.js → esm-SUNIX1x3.js} +3 -3
  93. package/dist/src/eval-7aEqoMs3.js +15 -0
  94. package/dist/src/{eval-DmFyWU7i.js → eval-BTqTn7lb.js} +10 -10
  95. package/dist/src/{evalResult-CDQiuUuf.js → evalResult-BkIhRdTe.js} +7 -7
  96. package/dist/src/evalResult-CYNHkk5A.js +12 -0
  97. package/dist/src/evalResult-CuvJeNiM.js +10 -0
  98. package/dist/src/{evalResult-CTG2AHOS.js → evalResult-DUDShQrm.js} +7 -7
  99. package/dist/src/{evalResult-Dap2CekP.cjs → evalResult-DpARzUCb.cjs} +7 -7
  100. package/dist/src/evalResult-tGdilrWt.cjs +10 -0
  101. package/dist/src/evaluator-BBUqRhz1.js +36 -0
  102. package/dist/src/{evaluator-DPFRbFIL.js → evaluator-BcvOGaam.js} +833 -79
  103. package/dist/src/{extractor-YMU_Gvt8.js → extractor-C8XwivI9.js} +6 -6
  104. package/dist/src/{extractor-CFG6bcWJ.js → extractor-CAZ2G3Kh.js} +6 -6
  105. package/dist/src/{extractor-DX36oYEv.cjs → extractor-DG3sSfXE.cjs} +6 -6
  106. package/dist/src/{extractor-M67RUtg6.js → extractor-D_wd8jxt.js} +6 -6
  107. package/dist/src/{fetch-4M3YRaqL.js → fetch-BiYv2BZc.js} +3 -3
  108. package/dist/src/{fetch-BxUk8odA.cjs → fetch-BnR9wSnm.cjs} +3 -3
  109. package/dist/src/{fetch-60Gzydls.js → fetch-CVAtKnI3.js} +3 -3
  110. package/dist/src/{fetch-BMv0O527.js → fetch-DoVRJZhJ.js} +4 -4
  111. package/dist/src/fetch-UWU706qb.js +5 -0
  112. package/dist/src/{genaiTracer-DN4dQywX.cjs → genaiTracer-BfxrvSUb.cjs} +2 -2
  113. package/dist/src/{graders-DOXycdlG.cjs → graders-BElhu9ZY.cjs} +126 -55
  114. package/dist/src/{graders-R9rYUM0d.js → graders-BXAJ0sbS.js} +120 -55
  115. package/dist/src/graders-BxfEguVY.js +32 -0
  116. package/dist/src/graders-CzVMbEnv.js +34 -0
  117. package/dist/src/{graders-CpdqD9PI.js → graders-DG7mhg-b.js} +120 -55
  118. package/dist/src/graders-DjCXfj0l.cjs +32 -0
  119. package/dist/src/{graders-CHO8EPM4.js → graders-RjHF8VfG.js} +120 -55
  120. package/dist/src/graders-kHzIWOKu.js +32 -0
  121. package/dist/src/{image-DTedmQPg.cjs → image--F58eEIn.cjs} +6 -6
  122. package/dist/src/{image-DJEvKveK.js → image-6WQXK8m8.js} +4 -4
  123. package/dist/src/{image-pAX56tPG.js → image-B8b6f36E.js} +6 -6
  124. package/dist/src/{image-BmEZqVmk.js → image-CoxZp9PZ.js} +6 -6
  125. package/dist/src/{image-gvmivTEe.js → image-DO0RYnjH.js} +5 -5
  126. package/dist/src/{image-CBBVXWuT.js → image-PoF6DN3x.js} +6 -6
  127. package/dist/src/{image-CDLQOcqT.cjs → image-fza3zuKs.cjs} +4 -4
  128. package/dist/src/{image-tL5hIOFh.js → image-xNbw5ph2.js} +4 -4
  129. package/dist/src/index.cjs +863 -110
  130. package/dist/src/index.d.cts +833 -60
  131. package/dist/src/index.d.ts +833 -60
  132. package/dist/src/index.js +860 -108
  133. package/dist/src/{interactiveCheck-BgLZUIt3.js → interactiveCheck-BnMYOjMu.js} +2 -2
  134. package/dist/src/{knowledgeBase-CoU-UQBg.js → knowledgeBase-Bi7CmDbx.js} +7 -7
  135. package/dist/src/{knowledgeBase-CLJybhnF.js → knowledgeBase-Ce3ofVan.js} +8 -8
  136. package/dist/src/{knowledgeBase-DjWPVqSb.js → knowledgeBase-DFRXPZl_.js} +7 -7
  137. package/dist/src/{knowledgeBase-wkxuRFhA.cjs → knowledgeBase-DqrLX8fy.cjs} +7 -7
  138. package/dist/src/{litellm-B9Hysuri.js → litellm-Bo2gQXpo.js} +16 -15
  139. package/dist/src/{litellm-ePxtr9F1.js → litellm-CKiAxnoM.js} +15 -14
  140. package/dist/src/{litellm-NYpQ8RQu.cjs → litellm-CnHI69aj.cjs} +16 -15
  141. package/dist/src/{litellm-CTfa0hqi.js → litellm-Tc294Jhj.js} +15 -14
  142. package/dist/src/{logger-KkObSCzq.js → logger-BcJBzSSA.js} +10 -14
  143. package/dist/src/{logger-DLcq4dWf.js → logger-BnkjG2jt.js} +10 -14
  144. package/dist/src/{logger-Cp1GPUjj.cjs → logger-D5iKBpu_.cjs} +27 -13
  145. package/dist/src/{logger-CT3IKMKA.js → logger-DO8_zM18.js} +10 -14
  146. package/dist/src/{luma-ray-BW9IRGIc.js → luma-ray-0ehMPt5N.js} +10 -10
  147. package/dist/src/{luma-ray-BE2mOt6N.js → luma-ray-C9q8rdQe.js} +9 -9
  148. package/dist/src/{luma-ray-Cm1KZBhs.js → luma-ray-DP0QA9qn.js} +9 -9
  149. package/dist/src/{luma-ray-B0GGNRc1.cjs → luma-ray-m9Ku2meV.cjs} +9 -9
  150. package/dist/src/main.js +69 -71
  151. package/dist/src/{messages-1x9atZmP.js → messages-DJNo37Ko.js} +14 -9
  152. package/dist/src/{messages-BLbWdsyt.js → messages-Dy9QecMs.js} +14 -9
  153. package/dist/src/{messages-1JrJs91T.cjs → messages-HJsyEh4o.cjs} +15 -10
  154. package/dist/src/{messages-D8EA0oDc.js → messages-biC_ex-p.js} +14 -9
  155. package/dist/src/{modelslab-C1OLRmVX.js → modelslab-B5J-ZM5c.js} +9 -9
  156. package/dist/src/{modelslab-CqXBy3U8.js → modelslab-BI458moT.js} +10 -10
  157. package/dist/src/{modelslab-X5-4LroM.js → modelslab-BTOT8FUO.js} +9 -9
  158. package/dist/src/{modelslab-DcOSFwKh.cjs → modelslab-IQbNg-r7.cjs} +9 -9
  159. package/dist/src/{nova-reel-DihqLeol.js → nova-reel-BZ9y-Y5s.js} +9 -9
  160. package/dist/src/{nova-reel-D9xfaMBs.cjs → nova-reel-CE5etkv9.cjs} +9 -9
  161. package/dist/src/{nova-reel-D2ZkOSyr.js → nova-reel-DEeQlnOJ.js} +10 -10
  162. package/dist/src/{nova-reel-BgS1ZWuK.js → nova-reel-Xw1SXLpg.js} +9 -9
  163. package/dist/src/{nova-sonic-Q3BOJeig.js → nova-sonic-DWswpN1E.js} +7 -7
  164. package/dist/src/{nova-sonic-DezhVUYT.js → nova-sonic-DXTLpi-r.js} +6 -6
  165. package/dist/src/{nova-sonic-DVu3mMIy.cjs → nova-sonic-N0yCm0vb.cjs} +6 -6
  166. package/dist/src/{nova-sonic-P-CdUMlV.js → nova-sonic-Ogqf-csn.js} +6 -6
  167. package/dist/src/{openai-DhbB7eWK.js → openai-BMcwgD5C.js} +2 -2
  168. package/dist/src/{openai-j-sE2O7r.js → openai-BcB5KlTk.js} +2 -2
  169. package/dist/src/{openai-Cuif0GEt.cjs → openai-CoxGAQwn.cjs} +2 -2
  170. package/dist/src/{openai-DElQ-fPX.js → openai-D6wITiVn.js} +2 -2
  171. package/dist/src/{openclaw-Bv1DINsX.js → openclaw-0Sv7AK3O.js} +172 -109
  172. package/dist/src/{openclaw-DAfWQn-o.cjs → openclaw-CXxbKgDH.cjs} +174 -110
  173. package/dist/src/{openclaw-BiSZPL7J.js → openclaw-D1FSCps-.js} +172 -109
  174. package/dist/src/{openclaw-D1D_ej1z.js → openclaw-D2ENvu7a.js} +173 -110
  175. package/dist/src/{opencode-sdk-D95s6SnR.js → opencode-sdk-C71Z0ehR.js} +13 -13
  176. package/dist/src/{opencode-sdk-DxUPkLT7.js → opencode-sdk-CHCs7dEb.js} +12 -12
  177. package/dist/src/{opencode-sdk-C7m-wRfI.js → opencode-sdk-DDxj4QqH.js} +12 -12
  178. package/dist/src/{opencode-sdk-CfaLN8PY.cjs → opencode-sdk-WWJhnbKr.cjs} +16 -16
  179. package/dist/src/{otlpReceiver-g3ByGaXs.js → otlpReceiver-C9KlUtxh.js} +6 -6
  180. package/dist/src/{otlpReceiver--AIRW_S4.js → otlpReceiver-CZL48YfC.js} +6 -6
  181. package/dist/src/{otlpReceiver-Bn5wGB1v.js → otlpReceiver-CavGAA6k.js} +6 -6
  182. package/dist/src/{otlpReceiver-Diec4cln.cjs → otlpReceiver-DHKqJlsz.cjs} +6 -6
  183. package/dist/src/{providerRegistry-B0RUOLI_.js → providerRegistry-B9lh-_tx.js} +2 -2
  184. package/dist/src/{providerRegistry-Civky8Ar.cjs → providerRegistry-BTDgfV5h.cjs} +2 -2
  185. package/dist/src/{providerRegistry-CD8MEar9.js → providerRegistry-BkzVH5Ba.js} +2 -2
  186. package/dist/src/{providerRegistry-DM8rZYol.js → providerRegistry-CUWki5mQ.js} +2 -2
  187. package/dist/src/providers-BSLEaIQG.js +32 -0
  188. package/dist/src/{providers-CFu-TZl-.cjs → providers-CScd1wN6.cjs} +733 -464
  189. package/dist/src/{providers-CFLy1_ji.js → providers-Ch6Mr0gn.js} +795 -526
  190. package/dist/src/{providers-BKRJTjBz.js → providers-Cn73d5sr.js} +795 -526
  191. package/dist/src/providers-D-FnDg8k.cjs +31 -0
  192. package/dist/src/providers-DEYiFVAo.js +30 -0
  193. package/dist/src/{providers-B3HvufyI.js → providers-DvddrgxL.js} +795 -526
  194. package/dist/src/providers-sS2WI8YD.js +30 -0
  195. package/dist/src/{pythonUtils-D6fwaDSg.js → pythonUtils-Bzwbgpbg.js} +3 -3
  196. package/dist/src/{pythonUtils-D5nxkQ0P.js → pythonUtils-Cpo0Ez1p.js} +3 -3
  197. package/dist/src/{pythonUtils-CTU3Y3lw.cjs → pythonUtils-dAVigVK-.cjs} +3 -3
  198. package/dist/src/{pythonUtils-C3py6GC1.js → pythonUtils-wIqk7zAf.js} +3 -3
  199. package/dist/src/{quiverai-CI6gYJVI.js → quiverai-BeofbLVc.js} +4 -4
  200. package/dist/src/{quiverai-MHSxbmmZ.js → quiverai-CCQn73lq.js} +5 -5
  201. package/dist/src/{quiverai-CLkWkyZc.cjs → quiverai-CcUhPIBg.cjs} +4 -4
  202. package/dist/src/{quiverai-C2jVwbH1.js → quiverai-DVSEqJiq.js} +4 -4
  203. package/dist/src/{render-Drod8m7K.js → render-BHl6QVq9.js} +3 -3
  204. package/dist/src/{responses-CGw0DCzh.js → responses-BKP_WYis.js} +16 -12
  205. package/dist/src/{responses-BKqJmhhc.js → responses-CQb1Tj69.js} +16 -12
  206. package/dist/src/{responses-jxdehPkC.js → responses-CgNyTPsY.js} +16 -12
  207. package/dist/src/{responses-tD4Bd4dc.cjs → responses-mo0KQDbu.cjs} +16 -12
  208. package/dist/src/rubyUtils-B1HXG4ej.cjs +4 -0
  209. package/dist/src/{rubyUtils-DhCAlxZr.cjs → rubyUtils-CGeUtCfW.cjs} +3 -3
  210. package/dist/src/{rubyUtils-Boc4HZzX.js → rubyUtils-CiVfln3g.js} +3 -3
  211. package/dist/src/{rubyUtils-BcuGX77l.js → rubyUtils-DECSbsfY.js} +3 -3
  212. package/dist/src/{rubyUtils-BUVePouc.js → rubyUtils-PgU-gHmx.js} +3 -3
  213. package/dist/src/rubyUtils-Rt6pKA96.js +5 -0
  214. package/dist/src/{sagemaker-BK4Zb993.js → sagemaker-CVv8W7so.js} +17 -17
  215. package/dist/src/{sagemaker-D2Q1c-sD.js → sagemaker-CqeASYE5.js} +17 -17
  216. package/dist/src/{sagemaker-BfiWTmvn.js → sagemaker-MUbD5V3v.js} +18 -18
  217. package/dist/src/{sagemaker-CcQHM1jV.cjs → sagemaker-jiw1wQa-.cjs} +17 -17
  218. package/dist/src/{scanner-J8CA3LsV.js → scanner-DVDeUz1r.js} +10 -10
  219. package/dist/src/server/index.js +864 -112
  220. package/dist/src/server-B0Xh1Gx-.js +7 -0
  221. package/dist/src/{server-B0PPuDw-.cjs → server-BtoCXeXI.cjs} +4 -4
  222. package/dist/src/{server-BC7XJFgr.js → server-CP9qKM40.js} +4 -4
  223. package/dist/src/{server-OAs3nBRT.js → server-Cns05F1j.js} +5 -5
  224. package/dist/src/server-DJTKu9IR.cjs +5 -0
  225. package/dist/src/{server-DbFphssR.js → server-DZ9MtCn0.js} +6 -6
  226. package/dist/src/{signal-BOTbd53Z.js → signal-C3ZTsUgi.js} +3 -3
  227. package/dist/src/{slack-DXMKtA-f.js → slack-2sdpGzbt.js} +2 -2
  228. package/dist/src/{slack-BmVAVGaK.cjs → slack-94iG3T0s.cjs} +2 -2
  229. package/dist/src/{slack-DCUPTzS2.js → slack-BR0HtO3K.js} +2 -2
  230. package/dist/src/{slack-DOdy_kyv.js → slack-DCEV-vWP.js} +2 -2
  231. package/dist/src/store-C5u6MgC8.js +6 -0
  232. package/dist/src/{store-BSc-TF2w.cjs → store-CLyU7AtI.cjs} +17 -5
  233. package/dist/src/store-CNHk-De4.cjs +5 -0
  234. package/dist/src/{store-DQLEjuEO.js → store-Cj258DgL.js} +17 -5
  235. package/dist/src/{store-D1tv90v3.js → store-P8OKm19S.js} +17 -5
  236. package/dist/src/{store-Ub2vaGJ1.js → store-VB0GP46K.js} +17 -5
  237. package/dist/src/{tables-xKANLRBD.js → tables-BEIFz2tM.js} +3 -3
  238. package/dist/src/{tables-C7K-XKWp.cjs → tables-BdZQEpRz.cjs} +3 -3
  239. package/dist/src/{tables-D36WTqKX.js → tables-DmzvLbeZ.js} +3 -3
  240. package/dist/src/{tables-5EvT_Bwn.js → tables-kC7R5kiK.js} +3 -3
  241. package/dist/src/{telemetry-C2YDkUQH.js → telemetry-BnH5VJAU.js} +4 -4
  242. package/dist/src/{telemetry-C15ziL8u.js → telemetry-BugWqKiu.js} +4 -4
  243. package/dist/src/{telemetry-DMb2Mpfm.js → telemetry-DPXLd7UE.js} +4 -4
  244. package/dist/src/telemetry-Yig0Tino.js +7 -0
  245. package/dist/src/telemetry-p8Pwqm1i.cjs +5 -0
  246. package/dist/src/{telemetry-CbrnxHp_.cjs → telemetry-re627Lre.cjs} +4 -4
  247. package/dist/src/{transcription-CL78qbOU.cjs → transcription-BvtsrzRG.cjs} +13 -13
  248. package/dist/src/{transcription-DAtxHhAM.js → transcription-CaMivnjG.js} +13 -13
  249. package/dist/src/{transcription-QHh3AH6Z.js → transcription-DOMMTu01.js} +14 -14
  250. package/dist/src/{transcription-LNZTNUUL.js → transcription-Hb3VnC4M.js} +13 -13
  251. package/dist/src/{transform-DOcQeLld.cjs → transform-0BwoBsvO.cjs} +19 -5
  252. package/dist/src/{transform-DGxXocjk.js → transform-B2-jIv68.js} +8 -6
  253. package/dist/src/{transform-DECvGmzp.js → transform-BqPkNPYm.js} +4 -4
  254. package/dist/src/{transform-aa6tmVpZ.js → transform-BzK09Q_9.js} +4 -4
  255. package/dist/src/transform-ChNIpHz7.js +6 -0
  256. package/dist/src/{transform-Cgi24fJ7.js → transform-DrleutM3.js} +8 -6
  257. package/dist/src/{transform-DGLazrMm.js → transform-DyDAwEpE.js} +8 -6
  258. package/dist/src/transform-PtQ6rAE3.cjs +5 -0
  259. package/dist/src/{transform-CzK1Q0zl.cjs → transform-ZrG2dvlo.cjs} +4 -4
  260. package/dist/src/{transform-DilY9wbS.js → transform-ljLYHEPh.js} +4 -4
  261. package/dist/src/{transformersAvailability-CEVM2GNQ.js → transformersAvailability-BGkzavwb.js} +1 -1
  262. package/dist/src/{transformersAvailability-CwayUSlh.cjs → transformersAvailability-DKoRtQLy.cjs} +1 -1
  263. package/dist/src/{types-CH3Ge2sE.js → types-CIhFeUC4.js} +45 -11
  264. package/dist/src/{types-CN_TZ2GJ.js → types-Cd3ygw8W.js} +45 -11
  265. package/dist/src/{types-LJ0r3wbR.cjs → types-D8cGDZbL.cjs} +46 -12
  266. package/dist/src/{types-CLKiCBW3.js → types-q8GXGF65.js} +45 -11
  267. package/dist/src/{util-CchiqXh_.cjs → util--9u9UVCt.cjs} +3 -3
  268. package/dist/src/{util-5cB-L7U3.js → util-BLvy9qfE.js} +7 -11
  269. package/dist/src/{util-YT5HPZaS.js → util-Bm3E9jpK.js} +7 -11
  270. package/dist/src/{util-6-GqIvzS.js → util-BtoGs5Cb.js} +18 -4
  271. package/dist/src/{util-Db0a0AFH.cjs → util-CFj4YKIn.cjs} +18 -4
  272. package/dist/src/{util-Dlz_Wvgm.js → util-CMMkIxfU.js} +7 -11
  273. package/dist/src/{util-Betm42rL.js → util-CgDCK4KI.js} +18 -4
  274. package/dist/src/{util-Yz-1aEhW.cjs → util-CuLo2pMR.cjs} +7 -11
  275. package/dist/src/{util-C-PPYSMq.js → util-DM2rTn_6.js} +18 -4
  276. package/dist/src/{util-B7T3SiBS.js → util-DMFeUvLz.js} +3 -3
  277. package/dist/src/{util-ZZH-3QZz.js → util-DbVG-yZU.js} +3 -3
  278. package/dist/src/{util-DaWTWKBK.js → util-vNmDL5DT.js} +3 -3
  279. package/dist/src/{utils-XiOAgly5.js → utils-CFxO9KGo.js} +2 -2
  280. package/dist/src/{utils-f2-Moju7.js → utils-DEuL4VNB.js} +2 -2
  281. package/dist/src/{utils-Cz9qXqII.cjs → utils-DKw8mrgr.cjs} +3 -3
  282. package/dist/src/{utils-dLokC-eR.js → utils-DOjD4dTC.js} +2 -2
  283. package/dist/tsconfig.tsbuildinfo +1 -1
  284. package/package.json +38 -38
  285. package/dist/src/app/assets/index-BFCZg7hQ.js +0 -439
  286. package/dist/src/app/assets/index-NCn4eVBv.css +0 -1
  287. package/dist/src/app/assets/sync-9qqYcY-B.js +0 -4
  288. package/dist/src/app/assets/vendor-charts-CCl15Imd.js +0 -36
  289. package/dist/src/app/assets/vendor-markdown-0tekx3KX.js +0 -29
  290. package/dist/src/cache-Bbn1Nyrd.cjs +0 -5
  291. package/dist/src/cache-BwsMSda7.js +0 -6
  292. package/dist/src/cloud-DmE0EwsY.js +0 -4
  293. package/dist/src/eval-17JizQIv.js +0 -15
  294. package/dist/src/evalResult-Cqj8pldJ.js +0 -12
  295. package/dist/src/evalResult-DvcJAWJU.cjs +0 -10
  296. package/dist/src/evalResult-Hftn-S_i.js +0 -10
  297. package/dist/src/evaluator-B2CFNt-P.js +0 -36
  298. package/dist/src/fetch-KV5kNASw.js +0 -5
  299. package/dist/src/graders-Bu0H9nXi.js +0 -32
  300. package/dist/src/graders-Cfhkvx-e.js +0 -34
  301. package/dist/src/graders-DClJVpGP.cjs +0 -32
  302. package/dist/src/graders-DcnJsrMO.js +0 -32
  303. package/dist/src/providers-C1rOSHiR.js +0 -32
  304. package/dist/src/providers-CxmDwEFf.cjs +0 -31
  305. package/dist/src/providers-Dodakqr0.js +0 -30
  306. package/dist/src/providers-GIQ2TcsA.js +0 -30
  307. package/dist/src/rubyUtils-BUHu6PhO.js +0 -5
  308. package/dist/src/rubyUtils-CP42kMvq.cjs +0 -4
  309. package/dist/src/server-B1vi21hA.js +0 -7
  310. package/dist/src/server-Cm9Kai_h.cjs +0 -5
  311. package/dist/src/store-BNmZ1KAz.cjs +0 -5
  312. package/dist/src/store-BltJg2cd.js +0 -6
  313. package/dist/src/telemetry-5BCRNBbe.cjs +0 -5
  314. package/dist/src/telemetry-D4W5hboe.js +0 -7
  315. package/dist/src/transform-DTGDnAzW.js +0 -6
  316. package/dist/src/transform-m3qNw4KP.cjs +0 -5
@@ -1,34 +1,35 @@
1
1
  #!/usr/bin/env node
2
- import { C as getEnvString, E as isCI, O as state, S as getEnvInt, T as getMaxEvalTimeMs, _ as summarizeEvaluateResultForLogging, b as getEnvBool, f as extractJsonObjects, g as safeJsonStringify, o as logger, p as getAjv, w as getEvalTimeoutMs } from "./logger-KkObSCzq.js";
3
- import { N as VERSION, P as FILE_METADATA_KEY, g as isPromptfooSampleTarget, l as sleep, r as fetchWithRetries, y as parseChatPrompt } from "./fetch-BMv0O527.js";
2
+ import { C as getEnvBool, D as getEvalTimeoutMs, E as getEnvString, O as getMaxEvalTimeMs, T as getEnvInt, b as summarizeEvaluateResultForLogging, c as setLogCallback, g as getAjv, h as extractJsonObjects, j as state, k as isCI, r as globalLogCallback, s as logger, y as safeJsonStringify } from "./logger-BcJBzSSA.js";
3
+ import { N as VERSION, P as FILE_METADATA_KEY, g as isPromptfooSampleTarget, l as sleep, r as fetchWithRetries, y as parseChatPrompt } from "./fetch-DoVRJZhJ.js";
4
4
  import { t as invariant } from "./invariant-BtWWVVhl.js";
5
- import { r as telemetry } from "./telemetry-C2YDkUQH.js";
6
- import { d as isGradingResult, p as isApiProvider, s as ResultFailureReason } from "./types-CH3Ge2sE.js";
7
- import { c as promptYesNo } from "./server-DbFphssR.js";
8
- import { A as renderPrompt, E as isBasicRefusal, F as TokenUsageTracker, G as VertexChatProvider, I as createRateLimitRegistry, K as AIStudioChatProvider, L as createProviderRateLimitOptions, M as isPackagePath, N as loadFromPackage, P as redteamProviderManager, j as runExtensionHook, k as collectFileMetadata, u as GoogleLiveProvider, v as checkExfilTracking, w as getSessionId } from "./providers-CFLy1_ji.js";
9
- import { o as getCache } from "./cache-CG0SlR1d.js";
5
+ import { r as telemetry } from "./telemetry-BnH5VJAU.js";
6
+ import { d as isGradingResult, p as isApiProvider, s as ResultFailureReason } from "./types-CIhFeUC4.js";
7
+ import { c as promptYesNo } from "./server-DZ9MtCn0.js";
8
+ import { A as renderPrompt, E as isBasicRefusal, F as TokenUsageTracker, G as VertexChatProvider, I as createRateLimitRegistry, K as AIStudioChatProvider, L as createProviderRateLimitOptions, M as isPackagePath, N as loadFromPackage, P as redteamProviderManager, j as runExtensionHook, k as collectFileMetadata, u as GoogleLiveProvider, v as checkExfilTracking, w as getSessionId } from "./providers-Ch6Mr0gn.js";
9
+ import { o as getCache } from "./cache-DSqR6ezl.js";
10
10
  import { n as isNonTransientHttpStatus } from "./errors-P6ll7XSJ.js";
11
11
  import { i as isJavascriptFile } from "./fileExtensions-Ds-foDzt.js";
12
- import { E as parseFileUrl, I as isAnthropicProvider, L as isGoogleProvider, R as isOpenAiProvider, T as loadFunction, g as maybeLoadToolsFromExternalFile, w as getNunjucksEngine, z as isProviderAllowed } from "./util-YT5HPZaS.js";
13
- import { r as runPython } from "./pythonUtils-C3py6GC1.js";
14
- import { n as transform, r as getProcessShim, t as TransformInputType } from "./transform-DilY9wbS.js";
15
- import { $ as matchesSearchRubric, B as getAndCheckProvider, G as matchesContextFaithfulness, H as matchesAnswerRelevance, J as matchesFactuality, K as matchesContextRecall, Q as matchesPiScore, R as callProviderWithContext, U as matchesClassification, V as loadRubricPrompt, W as matchesClosedQa, X as matchesLlmRubric, Y as matchesGEval, Z as matchesModeration, at as DefaultSuggestionsProvider, dt as getFinalTest, et as matchesSelectBest, ft as loadFromJavaScriptFile, it as getDefaultProviders, lt as SUGGEST_PROMPTS_SYSTEM_MESSAGE, mt as resolveContext, n as getGraderById, nt as selectMaxScore, pt as processFileReference, q as matchesContextRelevance, tt as matchesSimilarity, ut as coerceString, z as fail } from "./graders-CHO8EPM4.js";
16
- import { i as generateIdFromPrompt } from "./utils-f2-Moju7.js";
17
- import { t as OpenAiChatCompletionProvider } from "./chat-CRWNNq73.js";
12
+ import { E as parseFileUrl, I as isAnthropicProvider, L as isGoogleProvider, R as isOpenAiProvider, T as loadFunction, g as maybeLoadToolsFromExternalFile, w as getNunjucksEngine, z as isProviderAllowed } from "./util-Bm3E9jpK.js";
13
+ import { r as runPython } from "./pythonUtils-wIqk7zAf.js";
14
+ import { n as transform, r as getProcessShim, t as TransformInputType } from "./transform-ljLYHEPh.js";
15
+ import { $ as matchesSearchRubric, B as getAndCheckProvider, G as matchesContextFaithfulness, H as matchesAnswerRelevance, J as matchesFactuality, K as matchesContextRecall, Q as matchesPiScore, R as callProviderWithContext, U as matchesClassification, V as loadRubricPrompt, W as matchesClosedQa, X as matchesLlmRubric, Y as matchesGEval, Z as matchesModeration, at as getDefaultProviders, dt as coerceString, et as matchesSelectBest, ft as getFinalTest, ht as resolveContext, mt as processFileReference, n as getGraderById, nt as matchesTrajectoryGoalSuccess, ot as DefaultSuggestionsProvider, pt as loadFromJavaScriptFile, q as matchesContextRelevance, rt as selectMaxScore, tt as matchesSimilarity, ut as SUGGEST_PROMPTS_SYSTEM_MESSAGE, z as fail } from "./graders-RjHF8VfG.js";
16
+ import { i as generateIdFromPrompt } from "./utils-DEuL4VNB.js";
17
+ import { t as OpenAiChatCompletionProvider } from "./chat-BEwdgGEg.js";
18
18
  import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-DflFMjS0.js";
19
- import { m as validateFunctionCall } from "./transform-Cgi24fJ7.js";
20
- import { l as validateFunctionCall$1 } from "./util-C-PPYSMq.js";
21
- import { t as extractAndStoreBinaryData } from "./extractor-CFG6bcWJ.js";
22
- import { n as getTraceStore } from "./store-D1tv90v3.js";
23
- import { t as providerRegistry } from "./providerRegistry-B0RUOLI_.js";
24
- import { n as runRuby } from "./rubyUtils-Boc4HZzX.js";
25
- import { a as getActualPromptWithFallback, r as updateSignalFile } from "./signal-BOTbd53Z.js";
19
+ import { m as validateFunctionCall } from "./transform-DrleutM3.js";
20
+ import { l as validateFunctionCall$1 } from "./util-DM2rTn_6.js";
21
+ import { t as extractAndStoreBinaryData } from "./extractor-CAZ2G3Kh.js";
22
+ import { n as getTraceStore } from "./store-P8OKm19S.js";
23
+ import { t as providerRegistry } from "./providerRegistry-B9lh-_tx.js";
24
+ import { n as runRuby } from "./rubyUtils-CiVfln3g.js";
25
+ import { a as getActualPromptWithFallback, r as updateSignalFile } from "./signal-C3ZTsUgi.js";
26
26
  import chalk from "chalk";
27
27
  import fs, { createWriteStream } from "fs";
28
28
  import path from "path";
29
29
  import os from "os";
30
30
  import yaml from "js-yaml";
31
31
  import util from "util";
32
+ import readline from "readline";
32
33
  import { randomBytes } from "crypto";
33
34
  import { globSync } from "glob";
34
35
  import { XMLParser } from "fast-xml-parser";
@@ -38,6 +39,7 @@ import cliProgress from "cli-progress";
38
39
  import { JSDOM } from "jsdom";
39
40
  import { distance } from "fastest-levenshtein";
40
41
  import * as rouge from "js-rouge";
42
+ import { isDeepStrictEqual } from "node:util";
41
43
  import { ExportResultCode, W3CTraceContextPropagator } from "@opentelemetry/core";
42
44
  import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
43
45
  import { resourceFromAttributes } from "@opentelemetry/resources";
@@ -256,7 +258,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
256
258
  telemetry.record("feature_used", { feature: "tracing" });
257
259
  try {
258
260
  logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
259
- const { startOTLPReceiver } = await import("./otlpReceiver-Bn5wGB1v.js");
261
+ const { startOTLPReceiver } = await import("./otlpReceiver-CavGAA6k.js");
260
262
  const port = testSuite.tracing.otlp.http.port || 4318;
261
263
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
262
264
  logger.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
@@ -279,7 +281,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
279
281
  async function stopOtlpReceiverIfNeeded() {
280
282
  if (otlpReceiverStarted) try {
281
283
  logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
282
- const { stopOTLPReceiver } = await import("./otlpReceiver-Bn5wGB1v.js");
284
+ const { stopOTLPReceiver } = await import("./otlpReceiver-CavGAA6k.js");
283
285
  await stopOTLPReceiver();
284
286
  otlpReceiverStarted = false;
285
287
  logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -314,7 +316,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
314
316
  }
315
317
  if (!tracingEnabled) return null;
316
318
  logger.debug("[EvaluatorTracing] Importing trace store");
317
- const { getTraceStore } = await import("./store-BltJg2cd.js");
319
+ const { getTraceStore } = await import("./store-C5u6MgC8.js");
318
320
  const traceStore = getTraceStore();
319
321
  const traceId = generateTraceId();
320
322
  const spanId = generateSpanId();
@@ -1347,7 +1349,7 @@ const handleJavascript = async ({ assertion, renderedValue, valueFromScript, ass
1347
1349
  pass = result !== inverse;
1348
1350
  score = pass ? 1 : 0;
1349
1351
  } else if (typeof result === "number") {
1350
- pass = assertion.threshold !== void 0 ? result >= assertion.threshold : result > 0;
1352
+ pass = assertion.threshold === void 0 ? result > 0 : result >= assertion.threshold;
1351
1353
  score = result;
1352
1354
  } else if (typeof result === "object") return result;
1353
1355
  else throw new Error("Custom function must return a boolean or number");
@@ -1380,7 +1382,7 @@ function handleIsJson({ outputString, renderedValue, inverse, valueFromScript, a
1380
1382
  } catch {
1381
1383
  pass = inverse;
1382
1384
  }
1383
- if (pass && renderedValue) {
1385
+ if (parsedJson !== void 0 && renderedValue) {
1384
1386
  let validate;
1385
1387
  if (typeof renderedValue === "string") if (renderedValue.startsWith("file://")) {
1386
1388
  const schema = valueFromScript;
@@ -1392,11 +1394,12 @@ function handleIsJson({ outputString, renderedValue, inverse, valueFromScript, a
1392
1394
  }
1393
1395
  else if (typeof renderedValue === "object") validate = getAjv().compile(renderedValue);
1394
1396
  else throw new Error("is-json assertion must have a string or object value");
1395
- pass = validate(parsedJson);
1397
+ const valid = validate(parsedJson);
1398
+ pass = inverse ? !valid : valid;
1396
1399
  if (!pass) return {
1397
1400
  pass,
1398
1401
  score: 0,
1399
- reason: `JSON does not conform to the provided schema. Errors: ${getAjv().errorsText(validate.errors)}`,
1402
+ reason: inverse ? "Output is JSON that conforms to the provided schema" : `JSON does not conform to the provided schema. Errors: ${getAjv().errorsText(validate.errors)}`,
1400
1403
  assertion
1401
1404
  };
1402
1405
  }
@@ -1423,9 +1426,12 @@ function handleContainsJson({ assertion, renderedValue, outputString, inverse, v
1423
1426
  }
1424
1427
  else if (typeof renderedValue === "object") validate = getAjv().compile(renderedValue);
1425
1428
  else throw new Error("contains-json assertion must have a string or object value");
1426
- pass = validate(jsonObject);
1427
- if (pass) break;
1428
- else errorMessage = `JSON does not conform to the provided schema. Errors: ${getAjv().errorsText(validate.errors)}`;
1429
+ const valid = validate(jsonObject);
1430
+ pass = inverse ? !valid : valid;
1431
+ if (valid) {
1432
+ if (inverse) errorMessage = "Output contains JSON conforming to the provided schema";
1433
+ break;
1434
+ } else errorMessage = `JSON does not conform to the provided schema. Errors: ${getAjv().errorsText(validate.errors)}`;
1429
1435
  }
1430
1436
  return {
1431
1437
  pass,
@@ -1567,7 +1573,7 @@ function handlePerplexity({ logProbs, assertion }) {
1567
1573
  if (!logProbs || logProbs.length === 0) throw new Error("Perplexity assertion does not support providers that do not return logProbs");
1568
1574
  const avgLogProb = logProbs.reduce((acc, logProb) => acc + logProb, 0) / logProbs.length;
1569
1575
  const perplexity = Math.exp(-avgLogProb);
1570
- const pass = assertion.threshold !== void 0 ? perplexity <= assertion.threshold : true;
1576
+ const pass = assertion.threshold === void 0 ? true : perplexity <= assertion.threshold;
1571
1577
  return {
1572
1578
  pass,
1573
1579
  score: pass ? 1 : 0,
@@ -1579,7 +1585,7 @@ function handlePerplexityScore({ logProbs, assertion }) {
1579
1585
  if (!logProbs || logProbs.length === 0) throw new Error("perplexity-score assertion does not support providers that do not return logProbs");
1580
1586
  const avgLogProb = logProbs.reduce((acc, logProb) => acc + logProb, 0) / logProbs.length;
1581
1587
  const perplexityNorm = 1 / (1 + Math.exp(-avgLogProb));
1582
- const pass = assertion.threshold !== void 0 ? perplexityNorm >= assertion.threshold : true;
1588
+ const pass = assertion.threshold === void 0 ? true : perplexityNorm >= assertion.threshold;
1583
1589
  return {
1584
1590
  pass,
1585
1591
  score: perplexityNorm,
@@ -1694,7 +1700,7 @@ ${isMultiline ? renderedValue.split("\n").map((line) => `${indentStyle}${line}`)
1694
1700
  } else {
1695
1701
  score = Number.parseFloat(String(result));
1696
1702
  if (Number.isNaN(score)) throw new Error(`Python assertion must return a boolean, number, or {pass, score, reason} object. Instead got:\n${result}`);
1697
- pass = assertion.threshold !== void 0 ? score >= assertion.threshold : score > 0;
1703
+ pass = assertion.threshold === void 0 ? score > 0 : score >= assertion.threshold;
1698
1704
  }
1699
1705
  } catch (err) {
1700
1706
  return {
@@ -1955,7 +1961,7 @@ end
1955
1961
  } else {
1956
1962
  score = Number.parseFloat(String(result));
1957
1963
  if (Number.isNaN(score)) throw new Error(`Ruby assertion must return a boolean, number, or {pass, score, reason} object. Instead got:\n${result}`);
1958
- pass = assertion.threshold !== void 0 ? score >= assertion.threshold : score > 0;
1964
+ pass = assertion.threshold === void 0 ? score > 0 : score >= assertion.threshold;
1959
1965
  }
1960
1966
  } catch (err) {
1961
1967
  return {
@@ -2026,6 +2032,127 @@ const handleSimilar = async ({ assertion, renderedValue, outputString, inverse,
2026
2032
  };
2027
2033
  };
2028
2034
  //#endregion
2035
+ //#region src/assertions/traceUtils.ts
2036
+ /**
2037
+ * Shared utilities for trace assertions
2038
+ */
2039
+ /**
2040
+ * Match a span name against a glob-like pattern.
2041
+ * Supports * (any characters) and ? (single character) wildcards.
2042
+ *
2043
+ * @param spanName - The span name to match
2044
+ * @param pattern - The glob pattern to match against
2045
+ * @returns true if the span name matches the pattern
2046
+ */
2047
+ function matchesPattern(spanName, pattern) {
2048
+ const regexPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
2049
+ return new RegExp(`^${regexPattern}$`, "i").test(spanName);
2050
+ }
2051
+ //#endregion
2052
+ //#region src/assertions/skill.ts
2053
+ function getSkillCalls(params) {
2054
+ const rawSkillCalls = params.providerResponse?.metadata?.skillCalls;
2055
+ if (!Array.isArray(rawSkillCalls)) return [];
2056
+ return rawSkillCalls.filter((entry) => Boolean(entry) && typeof entry === "object" && typeof entry.name === "string");
2057
+ }
2058
+ function matchesSkill(skillCall, matcher) {
2059
+ if (matcher.name && skillCall.name !== matcher.name) return false;
2060
+ if (matcher.pattern && !matchesPattern(skillCall.name, matcher.pattern)) return false;
2061
+ return true;
2062
+ }
2063
+ function formatSkillCall(skillCall) {
2064
+ const details = [skillCall.source, skillCall.path].filter(Boolean).join(", ");
2065
+ return details ? `${skillCall.name} (${details})` : skillCall.name;
2066
+ }
2067
+ function resolveSkillMatchers(value) {
2068
+ const normalizeText = (text) => typeof text === "string" ? text.trim() : void 0;
2069
+ const validateCount = (field, count) => {
2070
+ if (!Number.isFinite(count) || !Number.isInteger(count) || count < 0) throw new Error(`skill-used assertion object ${field} must be a finite non-negative integer`);
2071
+ };
2072
+ if (typeof value === "string" && value.trim()) return {
2073
+ kind: "list",
2074
+ matchers: [{ name: normalizeText(value) }]
2075
+ };
2076
+ if (Array.isArray(value) && value.length > 0 && value.every((item) => typeof item === "string" && item.trim())) return {
2077
+ kind: "list",
2078
+ matchers: value.map((item) => ({ name: item.trim() }))
2079
+ };
2080
+ if (value && typeof value === "object" && !Array.isArray(value)) {
2081
+ const rawMatcher = value;
2082
+ const matcher = rawMatcher;
2083
+ const name = normalizeText(matcher.name);
2084
+ const pattern = normalizeText(matcher.pattern);
2085
+ if (!name && !pattern) throw new Error("skill-used assertion object must include a name or pattern property");
2086
+ if ("min" in rawMatcher) validateCount("min", matcher.min);
2087
+ if ("max" in rawMatcher) validateCount("max", matcher.max);
2088
+ if (typeof matcher.min === "number" && typeof matcher.max === "number" && matcher.max < matcher.min) throw new Error("skill-used assertion object max must be greater than or equal to min");
2089
+ return {
2090
+ kind: "count",
2091
+ matcher: {
2092
+ max: typeof matcher.max === "number" ? matcher.max : void 0,
2093
+ min: typeof matcher.min === "number" ? matcher.min : void 0,
2094
+ name,
2095
+ pattern
2096
+ }
2097
+ };
2098
+ }
2099
+ throw new Error("skill-used assertion must have a string, string array, or object value");
2100
+ }
2101
+ function handleListSkillAssertion(params, skillCalls, actualSkills, expected) {
2102
+ const missing = expected.matchers.filter((matcher) => !skillCalls.some((skillCall) => matchesSkill(skillCall, matcher)));
2103
+ const matched = expected.matchers.filter((matcher) => skillCalls.some((skillCall) => matchesSkill(skillCall, matcher)));
2104
+ const pass = params.inverse ? matched.length === 0 : missing.length === 0;
2105
+ const expectedSkills = expected.matchers.map((matcher) => matcher.name);
2106
+ const actualSummary = actualSkills.length > 0 ? actualSkills.join(", ") : "(none)";
2107
+ let reason;
2108
+ if (params.inverse) reason = pass ? `Forbidden skill(s) were not used: ${expectedSkills.join(", ")}` : `Forbidden skill(s) were used: ${matched.map((matcher) => matcher.name).join(", ")}. Actual skills: ${actualSummary}`;
2109
+ else if (pass) reason = `Observed required skill(s): ${expectedSkills.join(", ")}. Actual skills: ${actualSummary}`;
2110
+ else reason = `Missing required skill(s): ${missing.map((matcher) => matcher.name).join(", ")}. Actual skills: ${actualSummary}`;
2111
+ return {
2112
+ pass,
2113
+ score: pass ? 1 : 0,
2114
+ reason,
2115
+ assertion: params.assertion
2116
+ };
2117
+ }
2118
+ function handleCountSkillAssertion(params, skillCalls, actualSkills, matcher) {
2119
+ const hasExplicitMin = matcher.min !== void 0;
2120
+ const hasExplicitMax = matcher.max !== void 0;
2121
+ const min = matcher.min ?? (hasExplicitMax ? 0 : 1);
2122
+ const max = matcher.max;
2123
+ const matchingSkillCalls = skillCalls.filter((skillCall) => matchesSkill(skillCall, matcher));
2124
+ const count = matchingSkillCalls.length;
2125
+ const matcherLabel = matcher.pattern || matcher.name || "*";
2126
+ if (params.inverse) {
2127
+ if (hasExplicitMin || hasExplicitMax && max !== 0) throw new Error("not-skill-used object assertions only support name/pattern with no count bounds, or max: 0");
2128
+ const pass = count === 0;
2129
+ const actualSummary = actualSkills.length > 0 ? actualSkills.join(", ") : "(none)";
2130
+ return {
2131
+ pass,
2132
+ score: pass ? 1 : 0,
2133
+ reason: pass ? `Forbidden skill "${matcherLabel}" was not used. Actual skills: ${actualSummary}` : `Forbidden skill "${matcherLabel}" was used ${count} time(s). Matches: ${matchingSkillCalls.map(formatSkillCall).join(", ")}`,
2134
+ assertion: params.assertion
2135
+ };
2136
+ }
2137
+ const pass = count >= min && (max === void 0 || count <= max);
2138
+ let reason = `Matched skill "${matcherLabel}" ${count} time(s)`;
2139
+ reason += max === void 0 ? ` (expected at least ${min})` : ` (expected ${min}-${max})`;
2140
+ if (matchingSkillCalls.length > 0) reason += `. Matches: ${matchingSkillCalls.map(formatSkillCall).join(", ")}`;
2141
+ return {
2142
+ pass,
2143
+ score: pass ? 1 : 0,
2144
+ reason,
2145
+ assertion: params.assertion
2146
+ };
2147
+ }
2148
+ function handleSkillUsed(params) {
2149
+ const skillCalls = getSkillCalls(params);
2150
+ const actualSkills = skillCalls.map(formatSkillCall);
2151
+ const expected = resolveSkillMatchers(params.renderedValue ?? params.assertion.value);
2152
+ if (expected.kind === "list") return handleListSkillAssertion(params, skillCalls, actualSkills, expected);
2153
+ return handleCountSkillAssertion(params, skillCalls, actualSkills, expected.matcher);
2154
+ }
2155
+ //#endregion
2029
2156
  //#region src/assertions/sql.ts
2030
2157
  const handleIsSql = async ({ assertion, renderedValue, outputString, inverse }) => {
2031
2158
  let pass = false;
@@ -2258,23 +2385,6 @@ const handleToolCallF1 = ({ assertion, output, renderedValue, inverse }) => {
2258
2385
  };
2259
2386
  };
2260
2387
  //#endregion
2261
- //#region src/assertions/traceUtils.ts
2262
- /**
2263
- * Shared utilities for trace assertions
2264
- */
2265
- /**
2266
- * Match a span name against a glob-like pattern.
2267
- * Supports * (any characters) and ? (single character) wildcards.
2268
- *
2269
- * @param spanName - The span name to match
2270
- * @param pattern - The glob pattern to match against
2271
- * @returns true if the span name matches the pattern
2272
- */
2273
- function matchesPattern(spanName, pattern) {
2274
- const regexPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
2275
- return new RegExp(`^${regexPattern}$`, "i").test(spanName);
2276
- }
2277
- //#endregion
2278
2388
  //#region src/assertions/traceErrorSpans.ts
2279
2389
  function isErrorSpan(span) {
2280
2390
  if (span.statusCode && span.statusCode >= 400) return true;
@@ -2443,6 +2553,524 @@ const handleTraceSpanDuration = ({ assertion, assertionValueContext }) => {
2443
2553
  };
2444
2554
  };
2445
2555
  //#endregion
2556
+ //#region src/assertions/trajectoryUtils.ts
2557
+ const TOOL_ATTRIBUTE_KEYS = [
2558
+ "tool.name",
2559
+ "tool_name",
2560
+ "tool",
2561
+ "function.name",
2562
+ "function_name",
2563
+ "gen_ai.tool.name",
2564
+ "codex.mcp.tool",
2565
+ "agent.tool",
2566
+ "agent.tool_name",
2567
+ "agent.toolName"
2568
+ ];
2569
+ const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
2570
+ "tool.arguments",
2571
+ "tool.args",
2572
+ "tool.input",
2573
+ "tool_arguments",
2574
+ "tool_args",
2575
+ "tool_input",
2576
+ "function.arguments",
2577
+ "function.args",
2578
+ "function.input",
2579
+ "function_arguments",
2580
+ "function_args",
2581
+ "gen_ai.tool.arguments",
2582
+ "gen_ai.tool.args",
2583
+ "gen_ai.tool.input",
2584
+ "gen_ai.tool.call.arguments",
2585
+ "gen_ai.tool.call.args",
2586
+ "agent.tool.arguments",
2587
+ "agent.tool.args",
2588
+ "agent.tool.input",
2589
+ "codex.mcp.arguments",
2590
+ "codex.mcp.args",
2591
+ "codex.mcp.input",
2592
+ "arguments",
2593
+ "args",
2594
+ "input"
2595
+ ];
2596
+ const COMMAND_ATTRIBUTE_KEYS = [
2597
+ "codex.command",
2598
+ "command",
2599
+ "command.name",
2600
+ "command_name"
2601
+ ];
2602
+ const SEARCH_ATTRIBUTE_KEYS = [
2603
+ "codex.search.query",
2604
+ "search.query",
2605
+ "search_query"
2606
+ ];
2607
+ const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
2608
+ const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
2609
+ const MAX_JUDGE_SUMMARY_STEPS = 24;
2610
+ const JUDGE_SUMMARY_HEAD_STEPS = 12;
2611
+ const JUDGE_SUMMARY_TAIL_STEPS = 12;
2612
+ function getStringAttribute(attributes, keys) {
2613
+ for (const key of keys) {
2614
+ const value = attributes[key];
2615
+ if (typeof value === "string" && value.trim()) return value.trim();
2616
+ }
2617
+ }
2618
+ function normalizeStructuredAttribute(value) {
2619
+ if (value === void 0 || value === null) return;
2620
+ if (typeof value === "string") {
2621
+ const trimmed = value.trim();
2622
+ if (!trimmed) return;
2623
+ try {
2624
+ return JSON.parse(trimmed);
2625
+ } catch {
2626
+ return trimmed;
2627
+ }
2628
+ }
2629
+ if (typeof value === "number" || typeof value === "boolean" || typeof value === "object") return value;
2630
+ }
2631
+ function hasSameStatus(left, right) {
2632
+ return left?.code === right?.code && left?.message === right?.message;
2633
+ }
2634
+ function isSearchLikeSpan(span) {
2635
+ const attributes = span.attributes || {};
2636
+ if (SEARCH_SPAN_NAME_PATTERN.test(span.name) || span.name.startsWith("search ")) return true;
2637
+ return Object.keys(attributes).some((key) => key !== "query" && /(^|[._])(search|lookup|retriev(?:e|al))($|[._])/i.test(key));
2638
+ }
2639
+ function getTrajectoryStepStatus(step) {
2640
+ if (step.statusCode === void 0 || step.statusCode === 0) return;
2641
+ return {
2642
+ code: step.statusCode,
2643
+ ...step.statusMessage ? { message: step.statusMessage } : {}
2644
+ };
2645
+ }
2646
+ function getCommandExecutable(command) {
2647
+ return command.trim().split(/\s+/)[0] || void 0;
2648
+ }
2649
+ function extractToolName(span) {
2650
+ const attributes = span.attributes || {};
2651
+ const directMatch = getStringAttribute(attributes, TOOL_ATTRIBUTE_KEYS);
2652
+ if (directMatch) return directMatch;
2653
+ for (const [key, value] of Object.entries(attributes)) {
2654
+ if (typeof value !== "string" || !value.trim()) continue;
2655
+ if (/tool.?name|function.?name/i.test(key)) return value.trim();
2656
+ if (/(^|[._])tool($|[._])/i.test(key) && !/result|output/i.test(key)) return value.trim();
2657
+ }
2658
+ if (span.name.startsWith("mcp ")) {
2659
+ const slashIndex = span.name.lastIndexOf("/");
2660
+ if (slashIndex !== -1 && slashIndex < span.name.length - 1) return span.name.slice(slashIndex + 1).trim();
2661
+ }
2662
+ }
2663
+ function extractToolArgs(span) {
2664
+ const attributes = span.attributes || {};
2665
+ for (const key of TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
2666
+ const value = normalizeStructuredAttribute(attributes[key]);
2667
+ if (value !== void 0) return value;
2668
+ }
2669
+ for (const [key, rawValue] of Object.entries(attributes)) {
2670
+ if (/result|output|error|status/i.test(key)) continue;
2671
+ if (!/(^|[._])(arguments|args|input)($|[._])/i.test(key)) continue;
2672
+ const value = normalizeStructuredAttribute(rawValue);
2673
+ if (value !== void 0) return value;
2674
+ }
2675
+ }
2676
+ function extractCommand(span) {
2677
+ const attributes = span.attributes || {};
2678
+ const directMatch = getStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
2679
+ if (directMatch) return directMatch;
2680
+ for (const [key, value] of Object.entries(attributes)) {
2681
+ if (typeof value !== "string" || !value.trim()) continue;
2682
+ if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
2683
+ }
2684
+ if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
2685
+ }
2686
+ function extractSearchQuery(span) {
2687
+ const attributes = span.attributes || {};
2688
+ const directMatch = getStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
2689
+ if (directMatch) return directMatch;
2690
+ const genericQuery = getStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
2691
+ if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
2692
+ if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
2693
+ }
2694
+ function isReasoningSpan(span) {
2695
+ if ((span.attributes || {})["codex.item.type"] === "reasoning") return true;
2696
+ return /^reasoning([_\s]|$)/i.test(span.name) || span.name === "reasoning";
2697
+ }
2698
+ function isMessageSpan(span) {
2699
+ if ((span.attributes || {})["codex.item.type"] === "agent_message") return true;
2700
+ return span.name === "agent response" || span.name === "send input";
2701
+ }
2702
+ function extractTrajectorySteps(trace) {
2703
+ return [...trace.spans || []].map((span, index) => ({
2704
+ span,
2705
+ index
2706
+ })).sort((left, right) => {
2707
+ const timeDiff = left.span.startTime - right.span.startTime;
2708
+ if (timeDiff !== 0) return timeDiff;
2709
+ const endDiff = (left.span.endTime ?? left.span.startTime) - (right.span.endTime ?? right.span.startTime);
2710
+ if (endDiff !== 0) return endDiff;
2711
+ return left.index - right.index;
2712
+ }).map(({ span }) => {
2713
+ const toolName = extractToolName(span);
2714
+ const command = extractCommand(span);
2715
+ const searchQuery = extractSearchQuery(span);
2716
+ let type = "span";
2717
+ let name = span.name;
2718
+ const aliases = new Set([span.name]);
2719
+ let args;
2720
+ if (toolName) {
2721
+ type = "tool";
2722
+ name = toolName;
2723
+ aliases.add(toolName);
2724
+ args = extractToolArgs(span);
2725
+ } else if (command) {
2726
+ type = "command";
2727
+ name = command;
2728
+ aliases.add(command);
2729
+ const executable = getCommandExecutable(command);
2730
+ if (executable) aliases.add(executable);
2731
+ } else if (searchQuery) {
2732
+ type = "search";
2733
+ name = searchQuery;
2734
+ aliases.add(searchQuery);
2735
+ } else if (isReasoningSpan(span)) {
2736
+ type = "reasoning";
2737
+ name = span.name;
2738
+ aliases.add("reasoning");
2739
+ } else if (isMessageSpan(span)) {
2740
+ type = "message";
2741
+ name = span.name;
2742
+ aliases.add("message");
2743
+ }
2744
+ return {
2745
+ aliases: [...aliases],
2746
+ ...args === void 0 ? {} : { args },
2747
+ attributes: span.attributes || {},
2748
+ endTime: span.endTime,
2749
+ name,
2750
+ spanId: span.spanId,
2751
+ spanName: span.name,
2752
+ startTime: span.startTime,
2753
+ statusCode: span.statusCode,
2754
+ statusMessage: span.statusMessage,
2755
+ type
2756
+ };
2757
+ });
2758
+ }
2759
+ function normalizeTrajectoryMatcher(matcher, defaultType) {
2760
+ if (typeof matcher === "string") return {
2761
+ pattern: matcher,
2762
+ ...defaultType ? { type: defaultType } : {}
2763
+ };
2764
+ return {
2765
+ ...matcher,
2766
+ ...matcher.type ? {} : defaultType ? { type: defaultType } : {}
2767
+ };
2768
+ }
2769
+ function matchesTrajectoryStep(step, matcher, defaultType) {
2770
+ const { type, pattern, name } = normalizeTrajectoryMatcher(matcher, defaultType);
2771
+ if (type) {
2772
+ if (!(Array.isArray(type) ? type : [type]).includes(step.type)) return false;
2773
+ }
2774
+ const matchPattern = pattern || name;
2775
+ if (!matchPattern) return true;
2776
+ return step.aliases.some((alias) => matchesPattern(alias, matchPattern));
2777
+ }
2778
+ function formatTrajectoryStep(step) {
2779
+ return `${step.type}:${step.name}`;
2780
+ }
2781
+ function formatTrajectoryArgs(args) {
2782
+ if (args === void 0) return "(none)";
2783
+ try {
2784
+ const serialized = JSON.stringify(args);
2785
+ if (serialized !== void 0) return serialized;
2786
+ } catch {}
2787
+ return String(args);
2788
+ }
2789
+ function compactJudgeTrajectorySteps(steps) {
2790
+ const compacted = [];
2791
+ for (const step of steps) {
2792
+ const previousStep = compacted[compacted.length - 1];
2793
+ if (previousStep && previousStep.type === step.type && previousStep.name === step.name && previousStep.spanName === step.spanName && hasSameStatus(previousStep.status, step.status)) {
2794
+ previousStep.collapsedCount = (previousStep.collapsedCount ?? 1) + 1;
2795
+ continue;
2796
+ }
2797
+ compacted.push(step);
2798
+ }
2799
+ return compacted;
2800
+ }
2801
+ function truncateJudgeTrajectorySteps(steps) {
2802
+ if (steps.length <= MAX_JUDGE_SUMMARY_STEPS) return steps;
2803
+ return [
2804
+ ...steps.slice(0, JUDGE_SUMMARY_HEAD_STEPS),
2805
+ { omittedCount: steps.length - MAX_JUDGE_SUMMARY_STEPS },
2806
+ ...steps.slice(-JUDGE_SUMMARY_TAIL_STEPS)
2807
+ ];
2808
+ }
2809
+ function summarizeTrajectoryForJudge(trace) {
2810
+ const rawSteps = extractTrajectorySteps(trace).map((step, index) => ({
2811
+ index: index + 1,
2812
+ type: step.type,
2813
+ name: step.name,
2814
+ ...step.spanName === step.name ? {} : { spanName: step.spanName },
2815
+ ...getTrajectoryStepStatus(step) ? { status: getTrajectoryStepStatus(step) } : {}
2816
+ }));
2817
+ const compactedSteps = compactJudgeTrajectorySteps(rawSteps);
2818
+ const steps = truncateJudgeTrajectorySteps(compactedSteps);
2819
+ return JSON.stringify({
2820
+ traceId: trace.traceId,
2821
+ stepCount: rawSteps.length,
2822
+ compactedStepCount: compactedSteps.length,
2823
+ steps
2824
+ }, null, 2);
2825
+ }
2826
+ //#endregion
2827
+ //#region src/assertions/trajectory.ts
2828
+ function getTraceOrThrow(params) {
2829
+ const trace = params.assertionValueContext.trace;
2830
+ if (!trace || !trace.spans) throw new Error(`No trace data available for ${params.baseType} assertion`);
2831
+ return trace;
2832
+ }
2833
+ function applyInverse(pass, inverse) {
2834
+ return inverse ? !pass : pass;
2835
+ }
2836
+ function formatStepList(stepLabels) {
2837
+ return stepLabels.length > 0 ? stepLabels.join(", ") : "(none)";
2838
+ }
2839
+ function requireNamedTrajectoryMatcher(matcher, assertionType, index) {
2840
+ if (matcher.pattern || matcher.name) return;
2841
+ const stepLabel = index === void 0 ? "object" : `step ${index + 1}`;
2842
+ throw new Error(`${assertionType} assertion ${stepLabel} must include a name or pattern property`);
2843
+ }
2844
+ function resolveGoalSuccessValue(value) {
2845
+ if (typeof value === "string" && value.trim()) return { goal: value.trim() };
2846
+ if (value && typeof value === "object" && !Array.isArray(value) && typeof value.goal === "string" && value.goal.trim()) return { goal: value.goal.trim() };
2847
+ throw new Error("trajectory:goal-success assertion must have a string value or an object with a goal property");
2848
+ }
2849
+ function resolveToolMatchers(value) {
2850
+ if (typeof value === "string") return {
2851
+ kind: "list",
2852
+ matchers: [normalizeTrajectoryMatcher(value, "tool")]
2853
+ };
2854
+ if (Array.isArray(value) && value.every((item) => typeof item === "string")) return {
2855
+ kind: "list",
2856
+ matchers: value.map((item) => normalizeTrajectoryMatcher(item, "tool"))
2857
+ };
2858
+ if (value && typeof value === "object" && !Array.isArray(value)) return {
2859
+ kind: "count",
2860
+ matcher: {
2861
+ ...normalizeTrajectoryMatcher(value, "tool"),
2862
+ max: typeof value.max === "number" ? value.max : void 0,
2863
+ min: typeof value.min === "number" ? value.min : void 0
2864
+ }
2865
+ };
2866
+ throw new Error("trajectory:tool-used assertion must have a string, string array, or object value");
2867
+ }
2868
+ const handleTrajectoryToolUsed = (params) => {
2869
+ const steps = extractTrajectorySteps(getTraceOrThrow(params)).filter((step) => step.type === "tool");
2870
+ const expected = resolveToolMatchers(params.renderedValue ?? params.assertion.value);
2871
+ if (expected.kind === "list") {
2872
+ if (expected.matchers.length === 0) throw new Error("trajectory:tool-used assertion requires at least one expected tool");
2873
+ const missing = expected.matchers.filter((matcher) => !steps.some((step) => matchesTrajectoryStep(step, matcher)));
2874
+ const matched = expected.matchers.filter((matcher) => steps.some((step) => matchesTrajectoryStep(step, matcher)));
2875
+ const pass = params.inverse ? matched.length === 0 : missing.length === 0;
2876
+ const actualTools = steps.map(formatTrajectoryStep);
2877
+ const expectedTools = expected.matchers.map((matcher) => matcher.pattern || matcher.name || "*");
2878
+ let reason;
2879
+ if (params.inverse) reason = pass ? `Forbidden tool(s) were not used: ${expectedTools.join(", ")}` : `Forbidden tool(s) were used: ${matched.map((matcher) => matcher.pattern || matcher.name || "*").join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
2880
+ else if (pass) reason = `Observed required tool(s): ${expectedTools.join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
2881
+ else reason = `Missing required tool(s): ${missing.map((matcher) => matcher.pattern || matcher.name || "*").join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
2882
+ return {
2883
+ pass,
2884
+ score: pass ? 1 : 0,
2885
+ reason,
2886
+ assertion: params.assertion
2887
+ };
2888
+ }
2889
+ const matcher = expected.matcher;
2890
+ const min = matcher.min ?? 1;
2891
+ const max = matcher.max;
2892
+ if (!matcher.pattern && !matcher.name) throw new Error("trajectory:tool-used assertion object must include a name or pattern property");
2893
+ const matchingSteps = steps.filter((step) => matchesTrajectoryStep(step, matcher));
2894
+ const count = matchingSteps.length;
2895
+ const basePass = count >= min && (max === void 0 || count <= max);
2896
+ const pass = applyInverse(basePass, params.inverse);
2897
+ const matcherLabel = matcher.pattern || matcher.name || "*";
2898
+ let reason = `Matched tool "${matcherLabel}" ${count} time(s)`;
2899
+ if (max === void 0) reason += ` (expected at least ${min})`;
2900
+ else reason += ` (expected ${min}-${max})`;
2901
+ if (matchingSteps.length > 0) reason += `. Matches: ${matchingSteps.map(formatTrajectoryStep).join(", ")}`;
2902
+ if (params.inverse) reason = basePass ? `Tool "${matcherLabel}" matched ${count} time(s), which violates the inverse assertion` : `Tool "${matcherLabel}" did not satisfy the forbidden match condition`;
2903
+ return {
2904
+ pass,
2905
+ score: pass ? 1 : 0,
2906
+ reason,
2907
+ assertion: params.assertion
2908
+ };
2909
+ };
2910
+ function resolveSequenceValue(value) {
2911
+ if (Array.isArray(value)) return {
2912
+ mode: "in_order",
2913
+ steps: value
2914
+ };
2915
+ if (value && typeof value === "object" && !Array.isArray(value)) {
2916
+ const sequenceValue = value;
2917
+ return {
2918
+ mode: sequenceValue.mode || "in_order",
2919
+ steps: sequenceValue.steps || []
2920
+ };
2921
+ }
2922
+ throw new Error("trajectory:tool-sequence assertion must have an array or object value");
2923
+ }
2924
+ function isRecord(value) {
2925
+ return typeof value === "object" && value !== null && !Array.isArray(value);
2926
+ }
2927
+ function matchesExpectedArgsPartial(actual, expected) {
2928
+ if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
2929
+ if (isRecord(expected)) {
2930
+ if (!isRecord(actual)) return false;
2931
+ return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
2932
+ }
2933
+ return isDeepStrictEqual(actual, expected);
2934
+ }
2935
+ function matchesToolArgs(actual, expected, mode) {
2936
+ if (mode === "exact") return isDeepStrictEqual(actual, expected);
2937
+ return matchesExpectedArgsPartial(actual, expected);
2938
+ }
2939
+ function resolveToolArgsMatchMode(mode) {
2940
+ if (mode === void 0) return "partial";
2941
+ if (mode === "partial" || mode === "exact") return mode;
2942
+ throw new Error("trajectory:tool-args-match assertion mode must be \"partial\" or \"exact\"");
2943
+ }
2944
+ function resolveToolArgsMatchValue(value) {
2945
+ if (!value || typeof value !== "object" || Array.isArray(value)) throw new Error("trajectory:tool-args-match assertion must have an object value");
2946
+ const matcher = normalizeTrajectoryMatcher(value, "tool");
2947
+ requireNamedTrajectoryMatcher(matcher, "trajectory:tool-args-match");
2948
+ const expectedArgs = Object.prototype.hasOwnProperty.call(value, "args") ? value.args : value.arguments;
2949
+ if (expectedArgs === void 0) throw new Error("trajectory:tool-args-match assertion must include an args or arguments property");
2950
+ return {
2951
+ matcher,
2952
+ expectedArgs,
2953
+ mode: resolveToolArgsMatchMode(value.mode)
2954
+ };
2955
+ }
2956
+ const handleTrajectoryToolSequence = (params) => {
2957
+ const toolSteps = extractTrajectorySteps(getTraceOrThrow(params)).filter((step) => step.type === "tool");
2958
+ const value = resolveSequenceValue(params.renderedValue ?? params.assertion.value);
2959
+ const expectedMatchers = value.steps.map((step, index) => {
2960
+ const matcher = normalizeTrajectoryMatcher(step, "tool");
2961
+ requireNamedTrajectoryMatcher(matcher, "trajectory:tool-sequence", index);
2962
+ return matcher;
2963
+ });
2964
+ if (expectedMatchers.length === 0) throw new Error("trajectory:tool-sequence assertion requires at least one expected step");
2965
+ const actualTools = toolSteps.map(formatTrajectoryStep);
2966
+ let basePass = false;
2967
+ let reason = "";
2968
+ if (value.mode === "exact") {
2969
+ basePass = toolSteps.length === expectedMatchers.length && expectedMatchers.every((matcher, index) => matchesTrajectoryStep(toolSteps[index], matcher));
2970
+ if (basePass) reason = `Observed exact tool sequence: ${formatStepList(actualTools)}`;
2971
+ else reason = `Expected exact tool sequence of ${expectedMatchers.map((matcher) => matcher.pattern || matcher.name || "*").join(", ")}, but actual tools were ${formatStepList(actualTools)}`;
2972
+ } else {
2973
+ let expectedIndex = 0;
2974
+ const matchedSteps = [];
2975
+ for (const step of toolSteps) {
2976
+ if (expectedIndex >= expectedMatchers.length) break;
2977
+ if (matchesTrajectoryStep(step, expectedMatchers[expectedIndex])) {
2978
+ matchedSteps.push(formatTrajectoryStep(step));
2979
+ expectedIndex += 1;
2980
+ }
2981
+ }
2982
+ basePass = expectedIndex === expectedMatchers.length;
2983
+ if (basePass) reason = `Observed tool sequence in order: ${matchedSteps.join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
2984
+ else reason = `Expected tool "${expectedMatchers[expectedIndex]?.pattern || expectedMatchers[expectedIndex]?.name || "*"}" was not observed in order. Actual tools: ${formatStepList(actualTools)}`;
2985
+ }
2986
+ const pass = applyInverse(basePass, params.inverse);
2987
+ if (params.inverse) reason = basePass ? `Forbidden tool sequence was observed. Actual tools: ${formatStepList(actualTools)}` : `Forbidden tool sequence was not observed`;
2988
+ return {
2989
+ pass,
2990
+ score: pass ? 1 : 0,
2991
+ reason,
2992
+ assertion: params.assertion
2993
+ };
2994
+ };
2995
+ const handleTrajectoryToolArgsMatch = (params) => {
2996
+ const toolSteps = extractTrajectorySteps(getTraceOrThrow(params)).filter((step) => step.type === "tool");
2997
+ const { matcher, expectedArgs, mode } = resolveToolArgsMatchValue(params.renderedValue ?? params.assertion.value);
2998
+ const matcherLabel = matcher.pattern || matcher.name || "*";
2999
+ const actualTools = toolSteps.map(formatTrajectoryStep);
3000
+ const matchingSteps = toolSteps.filter((step) => matchesTrajectoryStep(step, matcher));
3001
+ const stepsWithArgs = matchingSteps.filter((step) => step.args !== void 0);
3002
+ const matchedStep = stepsWithArgs.find((step) => matchesToolArgs(step.args, expectedArgs, mode));
3003
+ const basePass = matchedStep !== void 0;
3004
+ const pass = applyInverse(basePass, params.inverse);
3005
+ const expectedArgsLabel = formatTrajectoryArgs(expectedArgs);
3006
+ const observedArgsLabel = stepsWithArgs.length > 0 ? stepsWithArgs.map((step) => formatTrajectoryArgs(step.args)).join(", ") : "(none)";
3007
+ let reason;
3008
+ if (params.inverse) if (basePass) reason = `Forbidden argument match for tool "${matcherLabel}" was observed on ${formatTrajectoryStep(matchedStep)}. Args: ${formatTrajectoryArgs(matchedStep.args)}`;
3009
+ else if (matchingSteps.length === 0) reason = `Forbidden argument match for tool "${matcherLabel}" was not observed because no tool call matched it`;
3010
+ else reason = `Forbidden argument match for tool "${matcherLabel}" was not observed. Observed args: ${observedArgsLabel}`;
3011
+ else if (basePass) reason = `Tool "${matcherLabel}" matched expected arguments (${mode}) on ${formatTrajectoryStep(matchedStep)}. Args: ${formatTrajectoryArgs(matchedStep.args)}`;
3012
+ else if (matchingSteps.length === 0) reason = `No tool call matched "${matcherLabel}". Actual tools: ${formatStepList(actualTools)}`;
3013
+ else if (stepsWithArgs.length === 0) reason = `Tool "${matcherLabel}" was observed but no arguments were captured. Actual tools: ${formatStepList(actualTools)}`;
3014
+ else reason = `No call to tool "${matcherLabel}" matched expected arguments (${mode}): ${expectedArgsLabel}. Observed args: ${observedArgsLabel}`;
3015
+ return {
3016
+ pass,
3017
+ score: pass ? 1 : 0,
3018
+ reason,
3019
+ assertion: params.assertion
3020
+ };
3021
+ };
3022
+ function resolveStepCountValue(value) {
3023
+ if (!value || typeof value !== "object" || Array.isArray(value)) throw new Error("trajectory:step-count assertion must have an object value");
3024
+ return {
3025
+ ...normalizeTrajectoryMatcher(value),
3026
+ max: typeof value.max === "number" ? value.max : void 0,
3027
+ min: typeof value.min === "number" ? value.min : void 0
3028
+ };
3029
+ }
3030
+ const handleTrajectoryStepCount = (params) => {
3031
+ const steps = extractTrajectorySteps(getTraceOrThrow(params));
3032
+ const matcher = resolveStepCountValue(params.renderedValue ?? params.assertion.value);
3033
+ const { min, max } = matcher;
3034
+ if (min === void 0 && max === void 0) throw new Error("trajectory:step-count assertion must include a min or max property");
3035
+ const matchingSteps = steps.filter((step) => matchesTrajectoryStep(step, matcher));
3036
+ const count = matchingSteps.length;
3037
+ const basePass = (min === void 0 || count >= min) && (max === void 0 || count <= max);
3038
+ const pass = applyInverse(basePass, params.inverse);
3039
+ const filterParts = [];
3040
+ if (matcher.type) {
3041
+ const types = Array.isArray(matcher.type) ? matcher.type : [matcher.type];
3042
+ filterParts.push(`type=${types.join("|")}`);
3043
+ }
3044
+ const pattern = matcher.pattern || matcher.name;
3045
+ if (pattern) filterParts.push(`pattern=${pattern}`);
3046
+ let reason = `Matched ${count} trajectory step(s)`;
3047
+ if (filterParts.length > 0) reason += ` for ${filterParts.join(", ")}`;
3048
+ if (min !== void 0 && max !== void 0) reason += ` (expected ${min}-${max})`;
3049
+ else if (min !== void 0) reason += ` (expected at least ${min})`;
3050
+ else if (max !== void 0) reason += ` (expected at most ${max})`;
3051
+ if (matchingSteps.length > 0) reason += `. Matches: ${matchingSteps.map(formatTrajectoryStep).join(", ")}`;
3052
+ if (params.inverse) reason = basePass ? `Trajectory step count satisfied the forbidden range` : `Trajectory step count did not satisfy the forbidden range`;
3053
+ return {
3054
+ pass,
3055
+ score: pass ? 1 : 0,
3056
+ reason,
3057
+ assertion: params.assertion
3058
+ };
3059
+ };
3060
+ const handleTrajectoryGoalSuccess = async (params) => {
3061
+ const trace = getTraceOrThrow(params);
3062
+ const { goal } = resolveGoalSuccessValue(params.renderedValue ?? params.assertion.value);
3063
+ const result = await matchesTrajectoryGoalSuccess(goal, summarizeTrajectoryForJudge(trace), params.outputString, params.test.options, params.assertionValueContext.vars, params.assertion, params.providerCallContext);
3064
+ if (!params.inverse) return result;
3065
+ return {
3066
+ ...result,
3067
+ assertion: params.assertion,
3068
+ pass: !result.pass,
3069
+ score: result.pass ? 0 : 1,
3070
+ reason: result.pass ? `Agent unexpectedly achieved the goal: ${goal}` : `Agent did not achieve the forbidden goal: ${goal}`
3071
+ };
3072
+ };
3073
+ //#endregion
2446
3074
  //#region src/assertions/webhook.ts
2447
3075
  async function handleWebhook({ assertion, renderedValue, test, prompt, output, inverse }) {
2448
3076
  invariant(renderedValue, "\"webhook\" assertion type must have a URL value");
@@ -2511,18 +3139,18 @@ const handleWordCount = ({ assertion, renderedValue, valueFromScript, outputStri
2511
3139
  if (pass) reason = "Assertion passed";
2512
3140
  else if (inverse) reason = `Expected word count to not be between ${min} and ${max}, but got ${wordCount}`;
2513
3141
  else reason = `Word count ${wordCount} is not between ${min} and ${max}`;
2514
- } else if (min !== void 0) {
2515
- const basePass = wordCount >= min;
2516
- pass = inverse ? !basePass : basePass;
2517
- if (pass) reason = "Assertion passed";
2518
- else if (inverse) reason = `Expected word count to be less than ${min}, but got ${wordCount}`;
2519
- else reason = `Word count ${wordCount} is less than minimum ${min}`;
2520
- } else {
3142
+ } else if (min === void 0) {
2521
3143
  const basePass = wordCount <= max;
2522
3144
  pass = inverse ? !basePass : basePass;
2523
3145
  if (pass) reason = "Assertion passed";
2524
3146
  else if (inverse) reason = `Expected word count to be greater than ${max}, but got ${wordCount}`;
2525
3147
  else reason = `Word count ${wordCount} is greater than maximum ${max}`;
3148
+ } else {
3149
+ const basePass = wordCount >= min;
3150
+ pass = inverse ? !basePass : basePass;
3151
+ if (pass) reason = "Assertion passed";
3152
+ else if (inverse) reason = `Expected word count to be less than ${min}, but got ${wordCount}`;
3153
+ else reason = `Word count ${wordCount} is less than minimum ${min}`;
2526
3154
  }
2527
3155
  } else {
2528
3156
  invariant(typeof value === "number" || typeof value === "string" && !Number.isNaN(Number(value)), "\"word-count\" assertion value must be a number or an object with min/max properties");
@@ -2617,6 +3245,12 @@ const handleIsXml = ({ assertion, renderedValue, outputString, inverse, baseType
2617
3245
  //#endregion
2618
3246
  //#region src/assertions/index.ts
2619
3247
  const ASSERTIONS_MAX_CONCURRENCY = getEnvInt("PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY", 3);
3248
+ const DEFAULT_TRACE_FETCH_MAX_ATTEMPTS = 6;
3249
+ const DEFAULT_TRACE_FETCH_RETRY_DELAY_MS = 250;
3250
+ const DEFAULT_TRACE_FETCH_STABLE_POLLS = 2;
3251
+ const MAX_TRACE_FETCH_MAX_ATTEMPTS = 30;
3252
+ const MAX_TRACE_FETCH_RETRY_DELAY_MS = 5e3;
3253
+ const MAX_TRACE_FETCH_STABLE_POLLS = 10;
2620
3254
  const MODEL_GRADED_ASSERTION_TYPES = new Set([
2621
3255
  "answer-relevance",
2622
3256
  "context-faithfulness",
@@ -2626,8 +3260,57 @@ const MODEL_GRADED_ASSERTION_TYPES = new Set([
2626
3260
  "llm-rubric",
2627
3261
  "model-graded-closedqa",
2628
3262
  "model-graded-factuality",
2629
- "search-rubric"
3263
+ "search-rubric",
3264
+ "trajectory:goal-success"
2630
3265
  ]);
3266
+ const TRACE_AWARE_ASSERTION_TYPES = new Set([
3267
+ "javascript",
3268
+ "python",
3269
+ "ruby",
3270
+ "trace-error-spans",
3271
+ "trace-span-count",
3272
+ "trace-span-duration",
3273
+ "trajectory:goal-success",
3274
+ "trajectory:step-count",
3275
+ "trajectory:tool-args-match",
3276
+ "trajectory:tool-sequence",
3277
+ "trajectory:tool-used"
3278
+ ]);
3279
+ function assertionUsesTrace(assertion) {
3280
+ if (assertion.type === "assert-set") return assertion.assert.some(assertionUsesTrace);
3281
+ return TRACE_AWARE_ASSERTION_TYPES.has(getAssertionBaseType(assertion));
3282
+ }
3283
+ function assertionMayNeedTraceContext(assertion) {
3284
+ if (assertionUsesTrace(assertion)) return true;
3285
+ if (assertion.type === "assert-set") return assertion.assert.some(assertionMayNeedTraceContext);
3286
+ return typeof assertion.value === "string" ? assertion.value.startsWith("file://") || isPackagePath(assertion.value) : false;
3287
+ }
3288
+ function hasTraceAwareAssertions(assertions) {
3289
+ return Boolean(assertions?.some(assertionMayNeedTraceContext));
3290
+ }
3291
+ async function loadTraceData(traceId) {
3292
+ const traceStore = getTraceStore();
3293
+ const maxAttempts = Math.min(MAX_TRACE_FETCH_MAX_ATTEMPTS, Math.max(1, getEnvInt("PROMPTFOO_TRACE_FETCH_MAX_ATTEMPTS", DEFAULT_TRACE_FETCH_MAX_ATTEMPTS)));
3294
+ const retryDelayMs = Math.min(MAX_TRACE_FETCH_RETRY_DELAY_MS, Math.max(0, getEnvInt("PROMPTFOO_TRACE_FETCH_RETRY_DELAY_MS", DEFAULT_TRACE_FETCH_RETRY_DELAY_MS)));
3295
+ const stablePolls = Math.min(MAX_TRACE_FETCH_STABLE_POLLS, Math.max(1, getEnvInt("PROMPTFOO_TRACE_FETCH_STABLE_POLLS", DEFAULT_TRACE_FETCH_STABLE_POLLS)));
3296
+ let lastSpanCount = -1;
3297
+ let stableObservations = 0;
3298
+ let latestTrace = null;
3299
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
3300
+ latestTrace = await traceStore.getTrace(traceId);
3301
+ const spanCount = latestTrace?.spans?.length ?? 0;
3302
+ if (spanCount > 0) {
3303
+ stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
3304
+ lastSpanCount = spanCount;
3305
+ if (stableObservations >= stablePolls || attempt === maxAttempts - 1) return latestTrace;
3306
+ } else {
3307
+ stableObservations = 0;
3308
+ lastSpanCount = spanCount;
3309
+ }
3310
+ if (attempt < maxAttempts - 1) await sleep(retryDelayMs);
3311
+ }
3312
+ return latestTrace;
3313
+ }
2631
3314
  const ASSERTION_HANDLERS = {
2632
3315
  "answer-relevance": handleAnswerRelevance,
2633
3316
  bleu: handleBleuScore,
@@ -2690,12 +3373,18 @@ const ASSERTION_HANDLERS = {
2690
3373
  ruby: handleRuby,
2691
3374
  "rouge-n": handleRougeScore,
2692
3375
  "search-rubric": handleSearchRubric,
3376
+ "skill-used": handleSkillUsed,
2693
3377
  similar: handleSimilar,
2694
3378
  "similar:cosine": handleSimilar,
2695
3379
  "similar:dot": handleSimilar,
2696
3380
  "similar:euclidean": handleSimilar,
2697
3381
  "starts-with": handleStartsWith,
2698
3382
  "tool-call-f1": handleToolCallF1,
3383
+ "trajectory:goal-success": handleTrajectoryGoalSuccess,
3384
+ "trajectory:tool-args-match": handleTrajectoryToolArgsMatch,
3385
+ "trajectory:step-count": handleTrajectoryStepCount,
3386
+ "trajectory:tool-sequence": handleTrajectoryToolSequence,
3387
+ "trajectory:tool-used": handleTrajectoryToolUsed,
2699
3388
  "trace-error-spans": handleTraceErrorSpans,
2700
3389
  "trace-span-count": handleTraceSpanCount,
2701
3390
  "trace-span-duration": handleTraceSpanDuration,
@@ -2738,7 +3427,7 @@ function isAssertionInverse(assertion) {
2738
3427
  function getAssertionBaseType(assertion) {
2739
3428
  return isAssertionInverse(assertion) ? assertion.type.slice(4) : assertion.type;
2740
3429
  }
2741
- async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs, providerResponse, traceId }) {
3430
+ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs, providerResponse, traceId, traceData }) {
2742
3431
  const resolvedVars = vars || test.vars || {};
2743
3432
  const { cost, logProbs, output: originalOutput } = providerResponse;
2744
3433
  let output = originalOutput;
@@ -2757,14 +3446,14 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
2757
3446
  providerResponse,
2758
3447
  ...assertion.config ? { config: structuredClone(assertion.config) } : {}
2759
3448
  };
2760
- if (traceId) try {
2761
- const traceData = await getTraceStore().getTrace(traceId);
2762
- if (traceData) context.trace = {
2763
- traceId: traceData.traceId,
2764
- evaluationId: traceData.evaluationId,
2765
- testCaseId: traceData.testCaseId,
2766
- metadata: traceData.metadata,
2767
- spans: traceData.spans || []
3449
+ if (traceId && assertionMayNeedTraceContext(assertion)) try {
3450
+ const resolvedTraceData = traceData === void 0 ? await loadTraceData(traceId) : traceData;
3451
+ if (resolvedTraceData) context.trace = {
3452
+ traceId: resolvedTraceData.traceId,
3453
+ evaluationId: resolvedTraceData.evaluationId,
3454
+ testCaseId: resolvedTraceData.testCaseId,
3455
+ metadata: resolvedTraceData.metadata,
3456
+ spans: resolvedTraceData.spans || []
2768
3457
  };
2769
3458
  } catch (error) {
2770
3459
  logger.debug(`Failed to fetch trace data for assertion: ${error}`);
@@ -2797,7 +3486,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
2797
3486
  };
2798
3487
  }
2799
3488
  else if (filePath.endsWith(".rb")) try {
2800
- const { runRuby } = await import("./rubyUtils-BUHu6PhO.js");
3489
+ const { runRuby } = await import("./rubyUtils-Rt6pKA96.js");
2801
3490
  valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
2802
3491
  logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
2803
3492
  } catch (error) {
@@ -2906,6 +3595,14 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
2906
3595
  index: i
2907
3596
  };
2908
3597
  }).flat();
3598
+ const shouldPreloadTrace = !!traceId && hasTraceAwareAssertions(asserts.map(({ assertion }) => assertion));
3599
+ let preloadedTraceData;
3600
+ if (shouldPreloadTrace && traceId) try {
3601
+ preloadedTraceData = await loadTraceData(traceId);
3602
+ } catch (error) {
3603
+ logger.debug(`Failed to preload trace data for assertions: ${error}`);
3604
+ preloadedTraceData = null;
3605
+ }
2909
3606
  await async.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
2910
3607
  if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
2911
3608
  const result = await runAssertion({
@@ -2917,7 +3614,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
2917
3614
  vars,
2918
3615
  latencyMs,
2919
3616
  assertIndex: index,
2920
- traceId
3617
+ traceId,
3618
+ traceData: preloadedTraceData
2921
3619
  });
2922
3620
  assertResult.addResult({
2923
3621
  index,
@@ -3083,7 +3781,7 @@ var CIProgressReporter = class {
3083
3781
  else {
3084
3782
  const eta = remaining / rate;
3085
3783
  if (eta > 1440) etaDisplay = ">24 hours";
3086
- else etaDisplay = `${Math.round(eta)} minute${Math.round(eta) !== 1 ? "s" : ""}`;
3784
+ else etaDisplay = `${Math.round(eta)} minute${Math.round(eta) === 1 ? "" : "s"}`;
3087
3785
  }
3088
3786
  const percentage = Math.floor(this.completedTests / this.totalTests * 100);
3089
3787
  logger.info(`[CI Progress] Evaluation running for ${this.formatElapsedTime(elapsed)} - Completed ${this.completedTests}/${this.totalTests} tests (${percentage}%)`);
@@ -3447,12 +4145,55 @@ var JsonlFileWriter = class {
3447
4145
  var ProgressBarManager = class {
3448
4146
  progressBar;
3449
4147
  isWebUI;
4148
+ originalLogCallback = null;
4149
+ installedLogCallback = null;
4150
+ pendingRender = null;
3450
4151
  totalCount = 0;
3451
4152
  completedCount = 0;
3452
4153
  concurrency = 1;
3453
4154
  constructor(isWebUI) {
3454
4155
  this.isWebUI = isWebUI;
3455
4156
  }
4157
+ clearProgressBarLine() {
4158
+ readline.cursorTo(process.stderr, 0);
4159
+ readline.clearLine(process.stderr, 0);
4160
+ }
4161
+ scheduleRender() {
4162
+ if (!this.progressBar || this.pendingRender) return;
4163
+ this.pendingRender = setImmediate(() => {
4164
+ this.pendingRender = null;
4165
+ this.progressBar?.render();
4166
+ });
4167
+ }
4168
+ handleLogMessage() {
4169
+ if (!this.progressBar) return;
4170
+ this.clearProgressBarLine();
4171
+ this.scheduleRender();
4172
+ }
4173
+ /**
4174
+ * Coordinate console logging with the progress bar to prevent visual corruption.
4175
+ */
4176
+ installLogInterceptor() {
4177
+ if (!this.progressBar || this.isWebUI || this.installedLogCallback) return;
4178
+ this.originalLogCallback = globalLogCallback;
4179
+ this.installedLogCallback = (message) => {
4180
+ this.originalLogCallback?.(message);
4181
+ this.handleLogMessage();
4182
+ };
4183
+ setLogCallback(this.installedLogCallback);
4184
+ }
4185
+ /**
4186
+ * Remove the log interceptor and restore original logger callback behavior.
4187
+ */
4188
+ removeLogInterceptor() {
4189
+ if (this.pendingRender) {
4190
+ clearImmediate(this.pendingRender);
4191
+ this.pendingRender = null;
4192
+ }
4193
+ if (this.installedLogCallback && globalLogCallback === this.installedLogCallback) setLogCallback(this.originalLogCallback);
4194
+ this.installedLogCallback = null;
4195
+ this.originalLogCallback = null;
4196
+ }
3456
4197
  /**
3457
4198
  * Initialize progress bar
3458
4199
  */
@@ -3472,7 +4213,8 @@ var ProgressBarManager = class {
3472
4213
  return `Evaluating [${bar}${spaces}] ${percentage}% | ${params.value}/${params.total}${errorsText} | ${payload.provider} ${payload.prompt} ${payload.vars}`;
3473
4214
  },
3474
4215
  hideCursor: true,
3475
- gracefulExit: true
4216
+ gracefulExit: true,
4217
+ stream: process.stderr
3476
4218
  }, cliProgress.Presets.shades_classic);
3477
4219
  this.progressBar.start(this.totalCount, 0, {
3478
4220
  provider: "",
@@ -3747,6 +4489,7 @@ async function runEval({ provider, prompt, test, testSuite, delay, nunjucksFilte
3747
4489
  const parts = traceContext.traceparent.split("-");
3748
4490
  if (parts.length >= 3) traceId = parts[1];
3749
4491
  }
4492
+ if (traceId && hasTraceAwareAssertions(test.assert)) await flushOtel();
3750
4493
  const checkResult = await runAssertions({
3751
4494
  prompt: renderedPrompt,
3752
4495
  provider,
@@ -4144,7 +4887,7 @@ var Evaluator = class {
4144
4887
  const defaultProvider = testSuite.defaultTest.provider;
4145
4888
  if (isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
4146
4889
  else if (typeof defaultProvider === "object" && defaultProvider.id) {
4147
- const { loadApiProvider } = await import("./providers-C1rOSHiR.js");
4890
+ const { loadApiProvider } = await import("./providers-BSLEaIQG.js");
4148
4891
  testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
4149
4892
  } else testCase.provider = defaultProvider;
4150
4893
  }
@@ -4228,7 +4971,7 @@ var Evaluator = class {
4228
4971
  if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
4229
4972
  }
4230
4973
  if (state.resume && this.evalRecord.persisted) try {
4231
- const { default: EvalResult } = await import("./evalResult-Cqj8pldJ.js");
4974
+ const { default: EvalResult } = await import("./evalResult-CYNHkk5A.js");
4232
4975
  const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: state.retryMode });
4233
4976
  const originalCount = runEvalOptions.length;
4234
4977
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -4428,7 +5171,7 @@ var Evaluator = class {
4428
5171
  if (isCI() && !isWebUI) {
4429
5172
  ciProgressReporter = new CIProgressReporter(runEvalOptions.length);
4430
5173
  ciProgressReporter.start();
4431
- } else if (this.options.showProgressBar && process.stdout.isTTY) progressBarManager = new ProgressBarManager(isWebUI);
5174
+ } else if (this.options.showProgressBar && process.stderr.isTTY) progressBarManager = new ProgressBarManager(isWebUI);
4432
5175
  this.options.progressCallback = (completed, total, index, evalStep, metrics) => {
4433
5176
  if (originalProgressCallback) originalProgressCallback(completed, total, index, evalStep, metrics);
4434
5177
  if (isWebUI) {
@@ -4449,7 +5192,10 @@ var Evaluator = class {
4449
5192
  if (serialRunEvalOptions.length > 0) logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
4450
5193
  if (concurrentRunEvalOptions.length > 0) logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
4451
5194
  }
4452
- if (this.options.showProgressBar && progressBarManager) await progressBarManager.initialize(runEvalOptions, concurrency, 0);
5195
+ if (this.options.showProgressBar && progressBarManager) {
5196
+ await progressBarManager.initialize(runEvalOptions, concurrency, 0);
5197
+ progressBarManager.installLogInterceptor();
5198
+ }
4453
5199
  try {
4454
5200
  if (serialRunEvalOptions.length > 0) for (const evalStep of serialRunEvalOptions) {
4455
5201
  checkAbort();
@@ -4475,7 +5221,10 @@ var Evaluator = class {
4475
5221
  else if (!targetUnavailable) {
4476
5222
  logger.info("Evaluation interrupted, saving progress...");
4477
5223
  if (globalTimeout) clearTimeout(globalTimeout);
4478
- if (progressBarManager) progressBarManager.stop();
5224
+ if (progressBarManager) {
5225
+ progressBarManager.removeLogInterceptor();
5226
+ progressBarManager.stop();
5227
+ }
4479
5228
  if (ciProgressReporter) ciProgressReporter.finish();
4480
5229
  this.evalRecord.setVars(Array.from(vars));
4481
5230
  await this.evalRecord.addPrompts(prompts);
@@ -4483,6 +5232,10 @@ var Evaluator = class {
4483
5232
  return this.evalRecord;
4484
5233
  }
4485
5234
  } else {
5235
+ if (progressBarManager) {
5236
+ progressBarManager.removeLogInterceptor();
5237
+ progressBarManager.stop();
5238
+ }
4486
5239
  if (ciProgressReporter) ciProgressReporter.error(`Evaluation failed: ${String(err)}`);
4487
5240
  throw err;
4488
5241
  }
@@ -4625,6 +5378,7 @@ var Evaluator = class {
4625
5378
  await this.evalRecord.addPrompts(prompts);
4626
5379
  try {
4627
5380
  if (progressBarManager) {
5381
+ progressBarManager.removeLogInterceptor();
4628
5382
  progressBarManager.complete();
4629
5383
  progressBarManager.stop();
4630
5384
  } else if (ciProgressReporter) ciProgressReporter.finish();
@@ -4790,6 +5544,6 @@ function evaluate(testSuite, evalRecord, options) {
4790
5544
  return new Evaluator(testSuite, evalRecord, options).evaluate();
4791
5545
  }
4792
5546
  //#endregion
4793
- export { runEval as a, readAssertions as c, isAllowedPrompt as i, renderMetricName as l, formatVarsForDisplay as n, doesPromptRefMatch as o, generateVarCombinations as r, assertions_default as s, evaluate as t, runAssertions as u };
5547
+ export { isAllowedPrompt as a, assertions_default as c, runAssertions as d, generateVarCombinations as i, readAssertions as l, evaluate as n, runEval as o, formatVarsForDisplay as r, doesPromptRefMatch as s, ProgressBarManager as t, renderMetricName as u };
4794
5548
 
4795
- //# sourceMappingURL=evaluator-DPFRbFIL.js.map
5549
+ //# sourceMappingURL=evaluator-BcvOGaam.js.map