promptfoo 0.121.1 → 0.121.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/README.md +2 -0
  2. package/dist/src/{accounts-xrUGFA6n.js → accounts-B2XmGjty.js} +5 -5
  3. package/dist/src/{accounts-Bx-x3bmW.cjs → accounts-BPyfpSeU.cjs} +5 -5
  4. package/dist/src/{accounts-CMqkzrVf.js → accounts-CFLK3mnD.js} +6 -6
  5. package/dist/src/{accounts-BgNJDBE6.js → accounts-Xatc0RYb.js} +5 -5
  6. package/dist/src/{agentic-utils-BKIN5PKu.js → agentic-utils-36epdqwB.js} +3 -3
  7. package/dist/src/{cometapi-DkXrKi5z.js → agentic-utils-D8yXo5Lm.js} +4 -61
  8. package/dist/src/{cometapi-vY6aDZgo.cjs → agentic-utils-DAVsChuB.cjs} +24 -62
  9. package/dist/src/agentic-utils-DIYAAYE7.js +153 -0
  10. package/dist/src/{agents-C-dDThPK.js → agents-BBVJCIYr.js} +226 -13
  11. package/dist/src/{agents-CErsqg5U.cjs → agents-BBWxKSM0.cjs} +7 -7
  12. package/dist/src/{agents-Dy2YpZpa.js → agents-Bqgfdokm.js} +227 -14
  13. package/dist/src/{agents-B0f4HICh.cjs → agents-CAYbM7qD.cjs} +226 -13
  14. package/dist/src/{agents-CVIn-Utx.js → agents-CLQ-P15P.js} +7 -7
  15. package/dist/src/{agents-DeH4Gu94.js → agents-CgBniSlI.js} +8 -8
  16. package/dist/src/{agents-CXknwsFX.js → agents-DSSTV4bv.js} +226 -13
  17. package/dist/src/{agents-aF4-T121.js → agents-wg3ohknq.js} +7 -7
  18. package/dist/src/{aimlapi-tg0Gkcvr.cjs → aimlapi-Bv8Fmc-b.cjs} +14 -14
  19. package/dist/src/{aimlapi-BNfTBexL.js → aimlapi-BwGC1TtS.js} +13 -13
  20. package/dist/src/{aimlapi-BAGZDo5G.js → aimlapi-DaC3qZ-o.js} +14 -14
  21. package/dist/src/{aimlapi-DHRKlBEA.js → aimlapi-MgSLdvy7.js} +13 -13
  22. package/dist/src/app/assets/index-B6l9CVVb.js +439 -0
  23. package/dist/src/app/assets/index-DyZ0Ep37.css +1 -0
  24. package/dist/src/app/assets/sync-CStkzc6u.js +4 -0
  25. package/dist/src/app/assets/vendor-charts-BnDWwBlI.js +36 -0
  26. package/dist/src/app/assets/vendor-markdown-Bz7N-ca6.js +29 -0
  27. package/dist/src/app/index.html +4 -4
  28. package/dist/src/{audio-tf_NBjlC.js → audio-Bn44pQxv.js} +4 -4
  29. package/dist/src/{audio-CHQ4r-RV.js → audio-DDA5WHdx.js} +4 -4
  30. package/dist/src/{audio-BWeaWovU.cjs → audio-DVFjQ67_.cjs} +4 -4
  31. package/dist/src/{audio-BRODU0UK.js → audio-DjU9GswO.js} +5 -5
  32. package/dist/src/{base-DBtwl2FR.cjs → base-BboXIF_0.cjs} +3 -3
  33. package/dist/src/{base-B4QJRyFS.js → base-CKjwebIH.js} +3 -3
  34. package/dist/src/{base-B0tcrnq_.js → base-CqzQ4K8j.js} +3 -3
  35. package/dist/src/{base-fEDN28WM.js → base-Cz2ZC_iA.js} +3 -3
  36. package/dist/src/{blobs-BAU-dXan.js → blobs-B1JriOyi.js} +3 -3
  37. package/dist/src/{blobs-qTYm-1PY.js → blobs-BUWmKWzo.js} +3 -3
  38. package/dist/src/{blobs-DvS-O6be.cjs → blobs-C6j0bvFz.cjs} +3 -3
  39. package/dist/src/{blobs-Bpg5rH6i.js → blobs-DXTl6J3H.js} +3 -3
  40. package/dist/src/{cache-COish3-W.cjs → cache-C5yFZ4gC.cjs} +75 -58
  41. package/dist/src/{cache-8XhNqPKW.js → cache-CaT5tPgo.js} +75 -58
  42. package/dist/src/cache-CyCanoMu.js +6 -0
  43. package/dist/src/{cache-CG0SlR1d.js → cache-DSqR6ezl.js} +75 -58
  44. package/dist/src/cache-Df_QFDNu.cjs +5 -0
  45. package/dist/src/{cache-D3eqDYGU.js → cache-HP0NP4k3.js} +75 -58
  46. package/dist/src/{chat-DHMH-N64.js → chat-B-52XYI1.js} +12 -12
  47. package/dist/src/{chat-BKm79wib.js → chat-B0iaWhoh.js} +16 -14
  48. package/dist/src/{chat-DxysjBvt.js → chat-BE0qTA8e.js} +13 -13
  49. package/dist/src/{chat-CRWNNq73.js → chat-BEwdgGEg.js} +16 -14
  50. package/dist/src/{chat-2K608PeQ.cjs → chat-BtIKkLKx.cjs} +13 -13
  51. package/dist/src/{chat-DaqekjFr.cjs → chat-CM8qWR3_.cjs} +17 -15
  52. package/dist/src/{chat-CM_kyI8B.js → chat-DK1U-eZ-.js} +12 -12
  53. package/dist/src/{chat-CznLWr_D.js → chat-pxmiVpWe.js} +16 -14
  54. package/dist/src/{chatkit-65VXf5SR.js → chatkit-BYGQlHlV.js} +4 -4
  55. package/dist/src/{chatkit-DKyPi1Gs.cjs → chatkit-Cx174XI3.cjs} +4 -4
  56. package/dist/src/{chatkit-BxFvW8KY.js → chatkit-_8eJqKcD.js} +4 -4
  57. package/dist/src/{chatkit-Be-Q-a9F.js → chatkit-a2D6mY6s.js} +4 -4
  58. package/dist/src/{claude-agent-sdk-CJH22shf.cjs → claude-agent-sdk-8ddRp1L2.cjs} +35 -17
  59. package/dist/src/{claude-agent-sdk-Dy5lT-Tx.js → claude-agent-sdk-Bq5EArsX.js} +33 -15
  60. package/dist/src/{claude-agent-sdk-BLTu0WBO.js → claude-agent-sdk-CMjh4LFH.js} +33 -15
  61. package/dist/src/{claude-agent-sdk-D6_k9FKA.js → claude-agent-sdk-HgbFioFw.js} +33 -15
  62. package/dist/src/cloud-DE3t1-ZI.js +4 -0
  63. package/dist/src/{cloud-Bc9526yV.js → cloud-z8KZpUoa.js} +3 -3
  64. package/dist/src/{cloudflare-ai-CWWJCRim.js → cloudflare-ai-BGyXlpXJ.js} +13 -13
  65. package/dist/src/{cloudflare-ai-C9r2sRhw.js → cloudflare-ai-Bbp26N0L.js} +13 -13
  66. package/dist/src/{cloudflare-ai-ClWSdor4.cjs → cloudflare-ai-C62x6MQG.cjs} +14 -14
  67. package/dist/src/{cloudflare-ai-ICsOuD-z.js → cloudflare-ai-DdKP9TKT.js} +14 -14
  68. package/dist/src/{cloudflare-gateway-D6xFc5pa.js → cloudflare-gateway-BwAaUgeW.js} +14 -14
  69. package/dist/src/{cloudflare-gateway-D6O7AlYb.js → cloudflare-gateway-D-e9i1Sn.js} +15 -15
  70. package/dist/src/{cloudflare-gateway-pXGHxJ47.js → cloudflare-gateway-DXhtXDRb.js} +15 -163
  71. package/dist/src/{cloudflare-gateway-C2_-KG5o.cjs → cloudflare-gateway-Dx36ftqF.cjs} +15 -15
  72. package/dist/src/{codex-sdk-DUwKWezN.js → codex-sdk-BQEw16R_.js} +180 -11
  73. package/dist/src/{codex-sdk-C6UMlxwV.js → codex-sdk-C_07GuVS.js} +180 -11
  74. package/dist/src/{codex-sdk-GGAw0qbD.js → codex-sdk-DE5G18dx.js} +180 -11
  75. package/dist/src/{codex-sdk-fAO0c3yA.cjs → codex-sdk-ZLKfDjqP.cjs} +181 -12
  76. package/dist/src/cometapi-BDyV-NNm.js +62 -0
  77. package/dist/src/cometapi-C3hOlM7-.cjs +62 -0
  78. package/dist/src/{cometapi-Bbjp5V4x.js → cometapi-hhL4TAh3.js} +14 -14
  79. package/dist/src/{cometapi-BasUi7-_.js → cometapi-sp7sJpBD.js} +15 -15
  80. package/dist/src/{completion-C_P3ypkJ.js → completion-BCimtq-h.js} +6 -6
  81. package/dist/src/{completion-6Mx_iXxK.js → completion-DCjv7RZ3.js} +6 -6
  82. package/dist/src/{completion-CDOouNzq.cjs → completion-DlXUhj5c.cjs} +6 -6
  83. package/dist/src/{completion-C5rtR_9P.js → completion-DoYy49ti.js} +6 -6
  84. package/dist/src/{createHash-CfZSc0b4.cjs → createHash-BYwImsYv.cjs} +2 -2
  85. package/dist/src/{docker-BwsKwxFs.cjs → docker-Cqj2-QVi.cjs} +14 -14
  86. package/dist/src/{docker-CZnqU1XV.js → docker-CxCkwMzc.js} +13 -13
  87. package/dist/src/{docker-DzxyDPIj.js → docker-DpguQj-w.js} +14 -14
  88. package/dist/src/{docker-5KcG-_86.js → docker-FeBni2dw.js} +13 -13
  89. package/dist/src/{esm-C03C-mv3.js → esm-7UIl0pPM.js} +2 -2
  90. package/dist/src/{esm-Cd1AjG1D.js → esm-CKWP3u_P.js} +3 -3
  91. package/dist/src/{esm-CnNt7sI4.cjs → esm-CipptfDu.cjs} +2 -2
  92. package/dist/src/{esm-CaIwzWR5.js → esm-SUNIX1x3.js} +3 -3
  93. package/dist/src/eval-7aEqoMs3.js +15 -0
  94. package/dist/src/{eval-DmFyWU7i.js → eval-BTqTn7lb.js} +10 -10
  95. package/dist/src/{evalResult-CDQiuUuf.js → evalResult-BkIhRdTe.js} +7 -7
  96. package/dist/src/evalResult-CYNHkk5A.js +12 -0
  97. package/dist/src/evalResult-CuvJeNiM.js +10 -0
  98. package/dist/src/{evalResult-CTG2AHOS.js → evalResult-DUDShQrm.js} +7 -7
  99. package/dist/src/{evalResult-Dap2CekP.cjs → evalResult-DpARzUCb.cjs} +7 -7
  100. package/dist/src/evalResult-tGdilrWt.cjs +10 -0
  101. package/dist/src/evaluator-BBUqRhz1.js +36 -0
  102. package/dist/src/{evaluator-DPFRbFIL.js → evaluator-BcvOGaam.js} +833 -79
  103. package/dist/src/{extractor-YMU_Gvt8.js → extractor-C8XwivI9.js} +6 -6
  104. package/dist/src/{extractor-CFG6bcWJ.js → extractor-CAZ2G3Kh.js} +6 -6
  105. package/dist/src/{extractor-DX36oYEv.cjs → extractor-DG3sSfXE.cjs} +6 -6
  106. package/dist/src/{extractor-M67RUtg6.js → extractor-D_wd8jxt.js} +6 -6
  107. package/dist/src/{fetch-4M3YRaqL.js → fetch-BiYv2BZc.js} +3 -3
  108. package/dist/src/{fetch-BxUk8odA.cjs → fetch-BnR9wSnm.cjs} +3 -3
  109. package/dist/src/{fetch-60Gzydls.js → fetch-CVAtKnI3.js} +3 -3
  110. package/dist/src/{fetch-BMv0O527.js → fetch-DoVRJZhJ.js} +4 -4
  111. package/dist/src/fetch-UWU706qb.js +5 -0
  112. package/dist/src/{genaiTracer-DN4dQywX.cjs → genaiTracer-BfxrvSUb.cjs} +2 -2
  113. package/dist/src/{graders-DOXycdlG.cjs → graders-BElhu9ZY.cjs} +126 -55
  114. package/dist/src/{graders-R9rYUM0d.js → graders-BXAJ0sbS.js} +120 -55
  115. package/dist/src/graders-BxfEguVY.js +32 -0
  116. package/dist/src/graders-CzVMbEnv.js +34 -0
  117. package/dist/src/{graders-CpdqD9PI.js → graders-DG7mhg-b.js} +120 -55
  118. package/dist/src/graders-DjCXfj0l.cjs +32 -0
  119. package/dist/src/{graders-CHO8EPM4.js → graders-RjHF8VfG.js} +120 -55
  120. package/dist/src/graders-kHzIWOKu.js +32 -0
  121. package/dist/src/{image-DTedmQPg.cjs → image--F58eEIn.cjs} +6 -6
  122. package/dist/src/{image-DJEvKveK.js → image-6WQXK8m8.js} +4 -4
  123. package/dist/src/{image-pAX56tPG.js → image-B8b6f36E.js} +6 -6
  124. package/dist/src/{image-BmEZqVmk.js → image-CoxZp9PZ.js} +6 -6
  125. package/dist/src/{image-gvmivTEe.js → image-DO0RYnjH.js} +5 -5
  126. package/dist/src/{image-CBBVXWuT.js → image-PoF6DN3x.js} +6 -6
  127. package/dist/src/{image-CDLQOcqT.cjs → image-fza3zuKs.cjs} +4 -4
  128. package/dist/src/{image-tL5hIOFh.js → image-xNbw5ph2.js} +4 -4
  129. package/dist/src/index.cjs +863 -110
  130. package/dist/src/index.d.cts +833 -60
  131. package/dist/src/index.d.ts +833 -60
  132. package/dist/src/index.js +860 -108
  133. package/dist/src/{interactiveCheck-BgLZUIt3.js → interactiveCheck-BnMYOjMu.js} +2 -2
  134. package/dist/src/{knowledgeBase-CoU-UQBg.js → knowledgeBase-Bi7CmDbx.js} +7 -7
  135. package/dist/src/{knowledgeBase-CLJybhnF.js → knowledgeBase-Ce3ofVan.js} +8 -8
  136. package/dist/src/{knowledgeBase-DjWPVqSb.js → knowledgeBase-DFRXPZl_.js} +7 -7
  137. package/dist/src/{knowledgeBase-wkxuRFhA.cjs → knowledgeBase-DqrLX8fy.cjs} +7 -7
  138. package/dist/src/{litellm-B9Hysuri.js → litellm-Bo2gQXpo.js} +16 -15
  139. package/dist/src/{litellm-ePxtr9F1.js → litellm-CKiAxnoM.js} +15 -14
  140. package/dist/src/{litellm-NYpQ8RQu.cjs → litellm-CnHI69aj.cjs} +16 -15
  141. package/dist/src/{litellm-CTfa0hqi.js → litellm-Tc294Jhj.js} +15 -14
  142. package/dist/src/{logger-KkObSCzq.js → logger-BcJBzSSA.js} +10 -14
  143. package/dist/src/{logger-DLcq4dWf.js → logger-BnkjG2jt.js} +10 -14
  144. package/dist/src/{logger-Cp1GPUjj.cjs → logger-D5iKBpu_.cjs} +27 -13
  145. package/dist/src/{logger-CT3IKMKA.js → logger-DO8_zM18.js} +10 -14
  146. package/dist/src/{luma-ray-BW9IRGIc.js → luma-ray-0ehMPt5N.js} +10 -10
  147. package/dist/src/{luma-ray-BE2mOt6N.js → luma-ray-C9q8rdQe.js} +9 -9
  148. package/dist/src/{luma-ray-Cm1KZBhs.js → luma-ray-DP0QA9qn.js} +9 -9
  149. package/dist/src/{luma-ray-B0GGNRc1.cjs → luma-ray-m9Ku2meV.cjs} +9 -9
  150. package/dist/src/main.js +69 -71
  151. package/dist/src/{messages-1x9atZmP.js → messages-DJNo37Ko.js} +14 -9
  152. package/dist/src/{messages-BLbWdsyt.js → messages-Dy9QecMs.js} +14 -9
  153. package/dist/src/{messages-1JrJs91T.cjs → messages-HJsyEh4o.cjs} +15 -10
  154. package/dist/src/{messages-D8EA0oDc.js → messages-biC_ex-p.js} +14 -9
  155. package/dist/src/{modelslab-C1OLRmVX.js → modelslab-B5J-ZM5c.js} +9 -9
  156. package/dist/src/{modelslab-CqXBy3U8.js → modelslab-BI458moT.js} +10 -10
  157. package/dist/src/{modelslab-X5-4LroM.js → modelslab-BTOT8FUO.js} +9 -9
  158. package/dist/src/{modelslab-DcOSFwKh.cjs → modelslab-IQbNg-r7.cjs} +9 -9
  159. package/dist/src/{nova-reel-DihqLeol.js → nova-reel-BZ9y-Y5s.js} +9 -9
  160. package/dist/src/{nova-reel-D9xfaMBs.cjs → nova-reel-CE5etkv9.cjs} +9 -9
  161. package/dist/src/{nova-reel-D2ZkOSyr.js → nova-reel-DEeQlnOJ.js} +10 -10
  162. package/dist/src/{nova-reel-BgS1ZWuK.js → nova-reel-Xw1SXLpg.js} +9 -9
  163. package/dist/src/{nova-sonic-Q3BOJeig.js → nova-sonic-DWswpN1E.js} +7 -7
  164. package/dist/src/{nova-sonic-DezhVUYT.js → nova-sonic-DXTLpi-r.js} +6 -6
  165. package/dist/src/{nova-sonic-DVu3mMIy.cjs → nova-sonic-N0yCm0vb.cjs} +6 -6
  166. package/dist/src/{nova-sonic-P-CdUMlV.js → nova-sonic-Ogqf-csn.js} +6 -6
  167. package/dist/src/{openai-DhbB7eWK.js → openai-BMcwgD5C.js} +2 -2
  168. package/dist/src/{openai-j-sE2O7r.js → openai-BcB5KlTk.js} +2 -2
  169. package/dist/src/{openai-Cuif0GEt.cjs → openai-CoxGAQwn.cjs} +2 -2
  170. package/dist/src/{openai-DElQ-fPX.js → openai-D6wITiVn.js} +2 -2
  171. package/dist/src/{openclaw-Bv1DINsX.js → openclaw-0Sv7AK3O.js} +172 -109
  172. package/dist/src/{openclaw-DAfWQn-o.cjs → openclaw-CXxbKgDH.cjs} +174 -110
  173. package/dist/src/{openclaw-BiSZPL7J.js → openclaw-D1FSCps-.js} +172 -109
  174. package/dist/src/{openclaw-D1D_ej1z.js → openclaw-D2ENvu7a.js} +173 -110
  175. package/dist/src/{opencode-sdk-D95s6SnR.js → opencode-sdk-C71Z0ehR.js} +13 -13
  176. package/dist/src/{opencode-sdk-DxUPkLT7.js → opencode-sdk-CHCs7dEb.js} +12 -12
  177. package/dist/src/{opencode-sdk-C7m-wRfI.js → opencode-sdk-DDxj4QqH.js} +12 -12
  178. package/dist/src/{opencode-sdk-CfaLN8PY.cjs → opencode-sdk-WWJhnbKr.cjs} +16 -16
  179. package/dist/src/{otlpReceiver-g3ByGaXs.js → otlpReceiver-C9KlUtxh.js} +6 -6
  180. package/dist/src/{otlpReceiver--AIRW_S4.js → otlpReceiver-CZL48YfC.js} +6 -6
  181. package/dist/src/{otlpReceiver-Bn5wGB1v.js → otlpReceiver-CavGAA6k.js} +6 -6
  182. package/dist/src/{otlpReceiver-Diec4cln.cjs → otlpReceiver-DHKqJlsz.cjs} +6 -6
  183. package/dist/src/{providerRegistry-B0RUOLI_.js → providerRegistry-B9lh-_tx.js} +2 -2
  184. package/dist/src/{providerRegistry-Civky8Ar.cjs → providerRegistry-BTDgfV5h.cjs} +2 -2
  185. package/dist/src/{providerRegistry-CD8MEar9.js → providerRegistry-BkzVH5Ba.js} +2 -2
  186. package/dist/src/{providerRegistry-DM8rZYol.js → providerRegistry-CUWki5mQ.js} +2 -2
  187. package/dist/src/providers-BSLEaIQG.js +32 -0
  188. package/dist/src/{providers-CFu-TZl-.cjs → providers-CScd1wN6.cjs} +733 -464
  189. package/dist/src/{providers-CFLy1_ji.js → providers-Ch6Mr0gn.js} +795 -526
  190. package/dist/src/{providers-BKRJTjBz.js → providers-Cn73d5sr.js} +795 -526
  191. package/dist/src/providers-D-FnDg8k.cjs +31 -0
  192. package/dist/src/providers-DEYiFVAo.js +30 -0
  193. package/dist/src/{providers-B3HvufyI.js → providers-DvddrgxL.js} +795 -526
  194. package/dist/src/providers-sS2WI8YD.js +30 -0
  195. package/dist/src/{pythonUtils-D6fwaDSg.js → pythonUtils-Bzwbgpbg.js} +3 -3
  196. package/dist/src/{pythonUtils-D5nxkQ0P.js → pythonUtils-Cpo0Ez1p.js} +3 -3
  197. package/dist/src/{pythonUtils-CTU3Y3lw.cjs → pythonUtils-dAVigVK-.cjs} +3 -3
  198. package/dist/src/{pythonUtils-C3py6GC1.js → pythonUtils-wIqk7zAf.js} +3 -3
  199. package/dist/src/{quiverai-CI6gYJVI.js → quiverai-BeofbLVc.js} +4 -4
  200. package/dist/src/{quiverai-MHSxbmmZ.js → quiverai-CCQn73lq.js} +5 -5
  201. package/dist/src/{quiverai-CLkWkyZc.cjs → quiverai-CcUhPIBg.cjs} +4 -4
  202. package/dist/src/{quiverai-C2jVwbH1.js → quiverai-DVSEqJiq.js} +4 -4
  203. package/dist/src/{render-Drod8m7K.js → render-BHl6QVq9.js} +3 -3
  204. package/dist/src/{responses-CGw0DCzh.js → responses-BKP_WYis.js} +16 -12
  205. package/dist/src/{responses-BKqJmhhc.js → responses-CQb1Tj69.js} +16 -12
  206. package/dist/src/{responses-jxdehPkC.js → responses-CgNyTPsY.js} +16 -12
  207. package/dist/src/{responses-tD4Bd4dc.cjs → responses-mo0KQDbu.cjs} +16 -12
  208. package/dist/src/rubyUtils-B1HXG4ej.cjs +4 -0
  209. package/dist/src/{rubyUtils-DhCAlxZr.cjs → rubyUtils-CGeUtCfW.cjs} +3 -3
  210. package/dist/src/{rubyUtils-Boc4HZzX.js → rubyUtils-CiVfln3g.js} +3 -3
  211. package/dist/src/{rubyUtils-BcuGX77l.js → rubyUtils-DECSbsfY.js} +3 -3
  212. package/dist/src/{rubyUtils-BUVePouc.js → rubyUtils-PgU-gHmx.js} +3 -3
  213. package/dist/src/rubyUtils-Rt6pKA96.js +5 -0
  214. package/dist/src/{sagemaker-BK4Zb993.js → sagemaker-CVv8W7so.js} +17 -17
  215. package/dist/src/{sagemaker-D2Q1c-sD.js → sagemaker-CqeASYE5.js} +17 -17
  216. package/dist/src/{sagemaker-BfiWTmvn.js → sagemaker-MUbD5V3v.js} +18 -18
  217. package/dist/src/{sagemaker-CcQHM1jV.cjs → sagemaker-jiw1wQa-.cjs} +17 -17
  218. package/dist/src/{scanner-J8CA3LsV.js → scanner-DVDeUz1r.js} +10 -10
  219. package/dist/src/server/index.js +864 -112
  220. package/dist/src/server-B0Xh1Gx-.js +7 -0
  221. package/dist/src/{server-B0PPuDw-.cjs → server-BtoCXeXI.cjs} +4 -4
  222. package/dist/src/{server-BC7XJFgr.js → server-CP9qKM40.js} +4 -4
  223. package/dist/src/{server-OAs3nBRT.js → server-Cns05F1j.js} +5 -5
  224. package/dist/src/server-DJTKu9IR.cjs +5 -0
  225. package/dist/src/{server-DbFphssR.js → server-DZ9MtCn0.js} +6 -6
  226. package/dist/src/{signal-BOTbd53Z.js → signal-C3ZTsUgi.js} +3 -3
  227. package/dist/src/{slack-DXMKtA-f.js → slack-2sdpGzbt.js} +2 -2
  228. package/dist/src/{slack-BmVAVGaK.cjs → slack-94iG3T0s.cjs} +2 -2
  229. package/dist/src/{slack-DCUPTzS2.js → slack-BR0HtO3K.js} +2 -2
  230. package/dist/src/{slack-DOdy_kyv.js → slack-DCEV-vWP.js} +2 -2
  231. package/dist/src/store-C5u6MgC8.js +6 -0
  232. package/dist/src/{store-BSc-TF2w.cjs → store-CLyU7AtI.cjs} +17 -5
  233. package/dist/src/store-CNHk-De4.cjs +5 -0
  234. package/dist/src/{store-DQLEjuEO.js → store-Cj258DgL.js} +17 -5
  235. package/dist/src/{store-D1tv90v3.js → store-P8OKm19S.js} +17 -5
  236. package/dist/src/{store-Ub2vaGJ1.js → store-VB0GP46K.js} +17 -5
  237. package/dist/src/{tables-xKANLRBD.js → tables-BEIFz2tM.js} +3 -3
  238. package/dist/src/{tables-C7K-XKWp.cjs → tables-BdZQEpRz.cjs} +3 -3
  239. package/dist/src/{tables-D36WTqKX.js → tables-DmzvLbeZ.js} +3 -3
  240. package/dist/src/{tables-5EvT_Bwn.js → tables-kC7R5kiK.js} +3 -3
  241. package/dist/src/{telemetry-C2YDkUQH.js → telemetry-BnH5VJAU.js} +4 -4
  242. package/dist/src/{telemetry-C15ziL8u.js → telemetry-BugWqKiu.js} +4 -4
  243. package/dist/src/{telemetry-DMb2Mpfm.js → telemetry-DPXLd7UE.js} +4 -4
  244. package/dist/src/telemetry-Yig0Tino.js +7 -0
  245. package/dist/src/telemetry-p8Pwqm1i.cjs +5 -0
  246. package/dist/src/{telemetry-CbrnxHp_.cjs → telemetry-re627Lre.cjs} +4 -4
  247. package/dist/src/{transcription-CL78qbOU.cjs → transcription-BvtsrzRG.cjs} +13 -13
  248. package/dist/src/{transcription-DAtxHhAM.js → transcription-CaMivnjG.js} +13 -13
  249. package/dist/src/{transcription-QHh3AH6Z.js → transcription-DOMMTu01.js} +14 -14
  250. package/dist/src/{transcription-LNZTNUUL.js → transcription-Hb3VnC4M.js} +13 -13
  251. package/dist/src/{transform-DOcQeLld.cjs → transform-0BwoBsvO.cjs} +19 -5
  252. package/dist/src/{transform-DGxXocjk.js → transform-B2-jIv68.js} +8 -6
  253. package/dist/src/{transform-DECvGmzp.js → transform-BqPkNPYm.js} +4 -4
  254. package/dist/src/{transform-aa6tmVpZ.js → transform-BzK09Q_9.js} +4 -4
  255. package/dist/src/transform-ChNIpHz7.js +6 -0
  256. package/dist/src/{transform-Cgi24fJ7.js → transform-DrleutM3.js} +8 -6
  257. package/dist/src/{transform-DGLazrMm.js → transform-DyDAwEpE.js} +8 -6
  258. package/dist/src/transform-PtQ6rAE3.cjs +5 -0
  259. package/dist/src/{transform-CzK1Q0zl.cjs → transform-ZrG2dvlo.cjs} +4 -4
  260. package/dist/src/{transform-DilY9wbS.js → transform-ljLYHEPh.js} +4 -4
  261. package/dist/src/{transformersAvailability-CEVM2GNQ.js → transformersAvailability-BGkzavwb.js} +1 -1
  262. package/dist/src/{transformersAvailability-CwayUSlh.cjs → transformersAvailability-DKoRtQLy.cjs} +1 -1
  263. package/dist/src/{types-CH3Ge2sE.js → types-CIhFeUC4.js} +45 -11
  264. package/dist/src/{types-CN_TZ2GJ.js → types-Cd3ygw8W.js} +45 -11
  265. package/dist/src/{types-LJ0r3wbR.cjs → types-D8cGDZbL.cjs} +46 -12
  266. package/dist/src/{types-CLKiCBW3.js → types-q8GXGF65.js} +45 -11
  267. package/dist/src/{util-CchiqXh_.cjs → util--9u9UVCt.cjs} +3 -3
  268. package/dist/src/{util-5cB-L7U3.js → util-BLvy9qfE.js} +7 -11
  269. package/dist/src/{util-YT5HPZaS.js → util-Bm3E9jpK.js} +7 -11
  270. package/dist/src/{util-6-GqIvzS.js → util-BtoGs5Cb.js} +18 -4
  271. package/dist/src/{util-Db0a0AFH.cjs → util-CFj4YKIn.cjs} +18 -4
  272. package/dist/src/{util-Dlz_Wvgm.js → util-CMMkIxfU.js} +7 -11
  273. package/dist/src/{util-Betm42rL.js → util-CgDCK4KI.js} +18 -4
  274. package/dist/src/{util-Yz-1aEhW.cjs → util-CuLo2pMR.cjs} +7 -11
  275. package/dist/src/{util-C-PPYSMq.js → util-DM2rTn_6.js} +18 -4
  276. package/dist/src/{util-B7T3SiBS.js → util-DMFeUvLz.js} +3 -3
  277. package/dist/src/{util-ZZH-3QZz.js → util-DbVG-yZU.js} +3 -3
  278. package/dist/src/{util-DaWTWKBK.js → util-vNmDL5DT.js} +3 -3
  279. package/dist/src/{utils-XiOAgly5.js → utils-CFxO9KGo.js} +2 -2
  280. package/dist/src/{utils-f2-Moju7.js → utils-DEuL4VNB.js} +2 -2
  281. package/dist/src/{utils-Cz9qXqII.cjs → utils-DKw8mrgr.cjs} +3 -3
  282. package/dist/src/{utils-dLokC-eR.js → utils-DOjD4dTC.js} +2 -2
  283. package/dist/tsconfig.tsbuildinfo +1 -1
  284. package/package.json +38 -38
  285. package/dist/src/app/assets/index-BFCZg7hQ.js +0 -439
  286. package/dist/src/app/assets/index-NCn4eVBv.css +0 -1
  287. package/dist/src/app/assets/sync-9qqYcY-B.js +0 -4
  288. package/dist/src/app/assets/vendor-charts-CCl15Imd.js +0 -36
  289. package/dist/src/app/assets/vendor-markdown-0tekx3KX.js +0 -29
  290. package/dist/src/cache-Bbn1Nyrd.cjs +0 -5
  291. package/dist/src/cache-BwsMSda7.js +0 -6
  292. package/dist/src/cloud-DmE0EwsY.js +0 -4
  293. package/dist/src/eval-17JizQIv.js +0 -15
  294. package/dist/src/evalResult-Cqj8pldJ.js +0 -12
  295. package/dist/src/evalResult-DvcJAWJU.cjs +0 -10
  296. package/dist/src/evalResult-Hftn-S_i.js +0 -10
  297. package/dist/src/evaluator-B2CFNt-P.js +0 -36
  298. package/dist/src/fetch-KV5kNASw.js +0 -5
  299. package/dist/src/graders-Bu0H9nXi.js +0 -32
  300. package/dist/src/graders-Cfhkvx-e.js +0 -34
  301. package/dist/src/graders-DClJVpGP.cjs +0 -32
  302. package/dist/src/graders-DcnJsrMO.js +0 -32
  303. package/dist/src/providers-C1rOSHiR.js +0 -32
  304. package/dist/src/providers-CxmDwEFf.cjs +0 -31
  305. package/dist/src/providers-Dodakqr0.js +0 -30
  306. package/dist/src/providers-GIQ2TcsA.js +0 -30
  307. package/dist/src/rubyUtils-BUHu6PhO.js +0 -5
  308. package/dist/src/rubyUtils-CP42kMvq.cjs +0 -4
  309. package/dist/src/server-B1vi21hA.js +0 -7
  310. package/dist/src/server-Cm9Kai_h.cjs +0 -5
  311. package/dist/src/store-BNmZ1KAz.cjs +0 -5
  312. package/dist/src/store-BltJg2cd.js +0 -6
  313. package/dist/src/telemetry-5BCRNBbe.cjs +0 -5
  314. package/dist/src/telemetry-D4W5hboe.js +0 -7
  315. package/dist/src/transform-DTGDnAzW.js +0 -6
  316. package/dist/src/transform-m3qNw4KP.cjs +0 -5
@@ -1,24 +1,24 @@
1
- import { C as isCI, _ as getEnvBool, b as getEnvString, i as logger, l as extractFirstJsonObject, m as safeJsonStringify, u as extractJsonObjects, w as state } from "./logger-DLcq4dWf.js";
2
- import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-4M3YRaqL.js";
1
+ import { C as getEnvString, D as state, E as isCI, _ as safeJsonStringify, a as logger, b as getEnvBool, f as extractFirstJsonObject, p as extractJsonObjects } from "./logger-BnkjG2jt.js";
2
+ import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-BiYv2BZc.js";
3
3
  import { t as invariant } from "./invariant-vgHWClmd.js";
4
- import { o as getUserEmail } from "./accounts-BgNJDBE6.js";
5
- import { c as getRemoteGenerationUrl, p as shouldGenerateRemote } from "./server-OAs3nBRT.js";
6
- import { r as importModule } from "./esm-CaIwzWR5.js";
7
- import { C as isValidReusablePolicyId, X as LLAMA_GUARD_REPLICATE_PROVIDER, k as MULTI_TURN_STRATEGIES, lt as PromptSchema, x as PolicyObjectSchema } from "./types-CN_TZ2GJ.js";
4
+ import { o as getUserEmail } from "./accounts-Xatc0RYb.js";
5
+ import { c as getRemoteGenerationUrl, p as shouldGenerateRemote } from "./server-Cns05F1j.js";
6
+ import { r as importModule } from "./esm-CKWP3u_P.js";
7
+ import { C as isValidReusablePolicyId, X as LLAMA_GUARD_REPLICATE_PROVIDER, k as MULTI_TURN_STRATEGIES, lt as PromptSchema, x as PolicyObjectSchema } from "./types-Cd3ygw8W.js";
8
8
  import { i as isJavascriptFile } from "./fileExtensions-LcDYkU4v.js";
9
9
  import { n as sha256 } from "./createHash-CTQmL3G2.js";
10
- import { i as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-dLokC-eR.js";
11
- import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-D3eqDYGU.js";
10
+ import { i as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-DOjD4dTC.js";
11
+ import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-HP0NP4k3.js";
12
12
  import { r as accumulateTokenUsage } from "./tokenUsageUtils-BDGe-iyI.js";
13
- import { $ as DefaultSynthesizeProvider$1, A as removePrefix, C as extractInputVarsFromPrompt, D as getShortPluginId, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, I as redteamProviderManager, K as DefaultGradingProvider$3, O as isBasicRefusal, Q as DefaultSuggestionsProvider$2, S as extractGoalFromPrompt, T as extractVariablesFromJson, U as MistralChatCompletionProvider, V as REDTEAM_MEMORY_POISONING_PLUGIN_ID, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, b as checkExfilTracking, ct as getPoliciesFromCloud, et as AzureModerationProvider, k as isEmptyResponse, n as loadApiProvider, nt as AzureChatCompletionProvider, o as getFileHashes, s as parseScriptParts, tt as AzureEmbeddingProvider, w as extractPromptFromTags, x as extractAllPromptsFromTags } from "./providers-B3HvufyI.js";
14
- import { r as runPython } from "./pythonUtils-D6fwaDSg.js";
15
- import { I as getNunjucksEngine, O as maybeLoadToolsFromExternalFile, P as extractVariablesFromTemplate, R as parseFileUrl, S as getNunjucksEngineForFilePath, T as maybeLoadFromExternalFile, k as parsePathOrGlob, w as maybeLoadConfigFromExternalFile } from "./util-5cB-L7U3.js";
16
- import { t as OpenAiChatCompletionProvider } from "./chat-BKm79wib.js";
17
- import { v as hasGoogleDefaultCredentials } from "./transform-DGxXocjk.js";
18
- import { t as AnthropicMessagesProvider } from "./messages-D8EA0oDc.js";
19
- import { t as OpenAiResponsesProvider } from "./responses-jxdehPkC.js";
20
- import { n as OpenAiEmbeddingProvider } from "./completion-6Mx_iXxK.js";
21
- import { n as transform } from "./transform-aa6tmVpZ.js";
13
+ import { $ as DefaultSynthesizeProvider$1, A as removePrefix, C as extractInputVarsFromPrompt, D as getShortPluginId, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, I as redteamProviderManager, K as DefaultGradingProvider$3, O as isBasicRefusal, Q as DefaultSuggestionsProvider$2, S as extractGoalFromPrompt, T as extractVariablesFromJson, U as MistralChatCompletionProvider, V as REDTEAM_MEMORY_POISONING_PLUGIN_ID, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, b as checkExfilTracking, ct as getPoliciesFromCloud, et as AzureModerationProvider, k as isEmptyResponse, n as loadApiProvider, nt as AzureChatCompletionProvider, o as getFileHashes, s as parseScriptParts, tt as AzureEmbeddingProvider, w as extractPromptFromTags, x as extractAllPromptsFromTags } from "./providers-DvddrgxL.js";
14
+ import { r as runPython } from "./pythonUtils-Bzwbgpbg.js";
15
+ import { I as getNunjucksEngine, O as maybeLoadToolsFromExternalFile, P as extractVariablesFromTemplate, R as parseFileUrl, S as getNunjucksEngineForFilePath, T as maybeLoadFromExternalFile, k as parsePathOrGlob, w as maybeLoadConfigFromExternalFile } from "./util-BLvy9qfE.js";
16
+ import { t as OpenAiChatCompletionProvider } from "./chat-B0iaWhoh.js";
17
+ import { b as hasGoogleDefaultCredentials } from "./transform-B2-jIv68.js";
18
+ import { t as AnthropicMessagesProvider } from "./messages-biC_ex-p.js";
19
+ import { t as OpenAiResponsesProvider } from "./responses-CgNyTPsY.js";
20
+ import { n as OpenAiEmbeddingProvider } from "./completion-BCimtq-h.js";
21
+ import { n as transform } from "./transform-BzK09Q_9.js";
22
22
  import * as fs$1 from "fs";
23
23
  import fs from "fs";
24
24
  import path from "path";
@@ -588,6 +588,37 @@ const DEFAULT_WEB_SEARCH_PROMPT = JSON.stringify([{
588
588
  role: "user",
589
589
  content: "<Output>\n{{ output }}\n</Output>\n<Rubric>\n{{ rubric }}\n</Rubric>"
590
590
  }]);
591
+ const TRAJECTORY_GOAL_SUCCESS_PROMPT = JSON.stringify([{
592
+ role: "system",
593
+ content: dedent`You are grading whether an AI agent successfully completed a goal based on its final output and a summarized execution trajectory. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
594
+
595
+ Judge end-to-end success, not stylistic perfection.
596
+ Use the trajectory as evidence for what the agent actually did.
597
+ Give partial credit when the agent made progress but did not fully achieve the goal.
598
+
599
+ Examples:
600
+
601
+ <Goal>Find the order status and tell the user whether it has shipped</Goal>
602
+ <Trajectory>{"stepCount":2,"steps":[{"index":1,"type":"tool","name":"search_orders"},{"index":2,"type":"message","name":"agent response"}]}</Trajectory>
603
+ <Output>Your order shipped yesterday and should arrive on Tuesday.</Output>
604
+ {"reason":"The agent used the order lookup tool and gave the user the shipping status, so the goal was achieved.","pass":true,"score":1.0}
605
+
606
+ <Goal>Find the order status and tell the user whether it has shipped</Goal>
607
+ <Trajectory>{"stepCount":1,"steps":[{"index":1,"type":"message","name":"agent response"}]}</Trajectory>
608
+ <Output>I cannot check your order right now.</Output>
609
+ {"reason":"The agent did not show evidence of checking the order and did not provide the requested status.","pass":false,"score":0.0}`
610
+ }, {
611
+ role: "user",
612
+ content: dedent`<Goal>
613
+ {{ goal }}
614
+ </Goal>
615
+ <Trajectory>
616
+ {{ trajectory }}
617
+ </Trajectory>
618
+ <Output>
619
+ {{ output }}
620
+ </Output>`
621
+ }]);
591
622
  //#endregion
592
623
  //#region src/prompts/processors/csv.ts
593
624
  /**
@@ -1578,45 +1609,31 @@ async function renderLlmRubricPrompt(rubricPrompt, context) {
1578
1609
  } catch {}
1579
1610
  return nunjucks.renderString(rubricPrompt, processedContext);
1580
1611
  }
1581
- async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
1582
- if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1583
- if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
1584
- ...await doRemoteGrading({
1585
- task: "llm-rubric",
1586
- rubric,
1587
- output: llmOutput,
1588
- vars: vars || {}
1589
- }),
1590
- assertion
1591
- };
1592
- const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, DEFAULT_GRADING_PROMPT), {
1593
- output: tryParse(llmOutput),
1594
- rubric,
1595
- ...vars || {}
1596
- });
1597
- const defaultProviders = await getDefaultProviders();
1598
- const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
1599
- const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, "llm-rubric check"), prompt, "llm-rubric", {
1600
- output: tryParse(llmOutput),
1601
- rubric,
1602
- ...vars || {}
1603
- }, providerCallContext);
1604
- if (resp.error || !resp.output) {
1605
- if (options?.throwOnError) throw new LlmRubricProviderError(resp.error || "No output");
1606
- return fail(resp.error || "No output", resp.tokenUsage);
1607
- }
1612
+ function parseJsonGradingResponse(label, resp) {
1608
1613
  let jsonObjects = [];
1609
1614
  if (typeof resp.output === "string") try {
1610
1615
  jsonObjects = extractJsonObjects(resp.output);
1611
- if (jsonObjects.length === 0) return fail("Could not extract JSON from llm-rubric response", resp.tokenUsage);
1616
+ if (jsonObjects.length === 0) return { failure: fail(`Could not extract JSON from ${label} response`, resp.tokenUsage) };
1612
1617
  } catch (err) {
1613
- return fail(`llm-rubric produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage);
1618
+ return { failure: fail(`${label} produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage) };
1614
1619
  }
1615
1620
  else if (typeof resp.output === "object") jsonObjects = [resp.output];
1616
- else return fail(`llm-rubric produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1617
- if (!Array.isArray(jsonObjects) || jsonObjects.length === 0) return fail(`llm-rubric produced malformed response - We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1621
+ else return { failure: fail(`${label} produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
1618
1622
  const parsed = jsonObjects[0];
1619
- if (typeof parsed !== "object" || parsed === null || parsed === void 0) return fail(`llm-rubric produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1623
+ if (typeof parsed !== "object" || parsed === null || parsed === void 0) return { failure: fail(`${label} produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
1624
+ return { parsed };
1625
+ }
1626
+ async function runJsonGradingPrompt({ assertion, checkName, defaultPrompt, grading, label, providerCallContext, throwOnError, vars }) {
1627
+ const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading.rubricPrompt, defaultPrompt), vars);
1628
+ const defaultProviders = await getDefaultProviders();
1629
+ const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
1630
+ const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, checkName), prompt, label, vars, providerCallContext);
1631
+ if (resp.error || !resp.output) {
1632
+ if (throwOnError) throw new Error(resp.error || "No output");
1633
+ return fail(resp.error || "No output", resp.tokenUsage);
1634
+ }
1635
+ const { parsed, failure } = parseJsonGradingResponse(label, resp);
1636
+ if (!parsed) return failure;
1620
1637
  let pass = parsed.pass ?? true;
1621
1638
  if (typeof pass !== "boolean") pass = /^(true|yes|pass|y)$/i.test(String(pass));
1622
1639
  let score = parsed.score;
@@ -1644,6 +1661,54 @@ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, opt
1644
1661
  metadata: { renderedGradingPrompt: prompt }
1645
1662
  };
1646
1663
  }
1664
+ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
1665
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1666
+ if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
1667
+ ...await doRemoteGrading({
1668
+ task: "llm-rubric",
1669
+ rubric,
1670
+ output: llmOutput,
1671
+ vars: vars || {}
1672
+ }),
1673
+ assertion
1674
+ };
1675
+ try {
1676
+ return await runJsonGradingPrompt({
1677
+ assertion,
1678
+ checkName: "llm-rubric check",
1679
+ defaultPrompt: DEFAULT_GRADING_PROMPT,
1680
+ grading,
1681
+ label: "llm-rubric",
1682
+ providerCallContext,
1683
+ throwOnError: options?.throwOnError,
1684
+ vars: {
1685
+ output: tryParse(llmOutput),
1686
+ rubric,
1687
+ ...vars || {}
1688
+ }
1689
+ });
1690
+ } catch (error) {
1691
+ if (options?.throwOnError) throw new LlmRubricProviderError(error.message || "No output");
1692
+ throw error;
1693
+ }
1694
+ }
1695
+ async function matchesTrajectoryGoalSuccess(goal, trajectory, llmOutput, grading, vars, assertion, providerCallContext) {
1696
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1697
+ return runJsonGradingPrompt({
1698
+ assertion,
1699
+ checkName: "trajectory:goal-success check",
1700
+ defaultPrompt: TRAJECTORY_GOAL_SUCCESS_PROMPT,
1701
+ grading,
1702
+ label: "trajectory:goal-success",
1703
+ providerCallContext,
1704
+ vars: {
1705
+ ...vars || {},
1706
+ goal,
1707
+ output: tryParse(llmOutput),
1708
+ trajectory
1709
+ }
1710
+ });
1711
+ }
1647
1712
  async function matchesPiScore(renderedValue, llmInput, llmOutput, assertion) {
1648
1713
  return {
1649
1714
  ...await doRemoteScoringWithPi({
@@ -2151,7 +2216,7 @@ async function selectMaxScore(outputs, resultsWithGradingResults, assertion) {
2151
2216
  let totalWeight = 0;
2152
2217
  relevantResults.forEach((componentResult) => {
2153
2218
  const assertionType = componentResult.assertion?.type || "unknown";
2154
- const weight = options.weights[assertionType] !== void 0 ? options.weights[assertionType] : 1;
2219
+ const weight = options.weights[assertionType] === void 0 ? 1 : options.weights[assertionType];
2155
2220
  const score = componentResult.score || 0;
2156
2221
  totalWeightedScore += score * weight;
2157
2222
  totalWeight += weight;
@@ -2401,9 +2466,9 @@ async function fetchHuggingFaceDataset(datasetPath, limit) {
2401
2466
  while (true) {
2402
2467
  const requestParams = new URLSearchParams(queryParams);
2403
2468
  requestParams.set("offset", offset.toString());
2404
- const remainingUserLimit = userLimit !== void 0 ? Math.max(userLimit - offset, 0) : void 0;
2405
- const remainingDatasetRows = totalRows !== void 0 ? Math.max(totalRows - offset, 0) : void 0;
2406
- const requestedLength = remainingUserLimit !== void 0 ? Math.min(pageSize, remainingUserLimit) : remainingDatasetRows !== void 0 ? Math.min(pageSize, remainingDatasetRows) : pageSize;
2469
+ const remainingUserLimit = userLimit === void 0 ? void 0 : Math.max(userLimit - offset, 0);
2470
+ const remainingDatasetRows = totalRows === void 0 ? void 0 : Math.max(totalRows - offset, 0);
2471
+ const requestedLength = remainingUserLimit === void 0 ? remainingDatasetRows === void 0 ? pageSize : Math.min(pageSize, remainingDatasetRows) : Math.min(pageSize, remainingUserLimit);
2407
2472
  if (requestedLength <= 0) {
2408
2473
  logger.debug(`[HF Dataset] No remaining rows to fetch for ${owner}/${repo} (offset ${offset})`);
2409
2474
  break;
@@ -13461,6 +13526,6 @@ function getGraderById(id) {
13461
13526
  return grader;
13462
13527
  }
13463
13528
  //#endregion
13464
- export { selectMaxScore as $, BeavertailsPlugin as A, matchesClassification as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, callProviderWithContext as F, matchesFactuality as G, matchesContextFaithfulness as H, fail as I, matchesModeration as J, matchesGEval as K, getAndCheckProvider as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, fetchHuggingFaceDataset as P, matchesSimilarity as Q, loadRubricPrompt as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesContextRecall as U, matchesClosedQa as V, matchesContextRelevance as W, matchesSearchRubric as X, matchesPiScore as Y, matchesSelectBest as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, coerceString as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, processFileReference as ct, RbacPlugin as d, retryWithDeduplication as dt, doRemoteGrading as et, PromptExtractionPlugin as f, sampleArray as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, VLGuardPlugin as i, SUGGEST_PROMPTS_SYSTEM_MESSAGE as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, resolveContext as lt, PolicyPlugin as m, DefaultSuggestionsProvider as mt, getGraderById as n, readPrompts as nt, UnsafeBenchPlugin as o, getFinalTest as ot, PoliticsPlugin as p, getDefaultProviders as pt, matchesLlmRubric as q, VLSUPlugin as r, readProviderPromptMap as rt, ToxicChatPlugin as s, loadFromJavaScriptFile as st, GRADERS as t, processPrompts as tt, ShellInjectionPlugin as u, getCustomPolicies as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, matchesAnswerRelevance as z };
13529
+ export { matchesTrajectoryGoalSuccess as $, BeavertailsPlugin as A, matchesClassification as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, callProviderWithContext as F, matchesFactuality as G, matchesContextFaithfulness as H, fail as I, matchesModeration as J, matchesGEval as K, getAndCheckProvider as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, fetchHuggingFaceDataset as P, matchesSimilarity as Q, loadRubricPrompt as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesContextRecall as U, matchesClosedQa as V, matchesContextRelevance as W, matchesSearchRubric as X, matchesPiScore as Y, matchesSelectBest as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, SUGGEST_PROMPTS_SYSTEM_MESSAGE as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, loadFromJavaScriptFile as ct, RbacPlugin as d, getCustomPolicies as dt, selectMaxScore as et, PromptExtractionPlugin as f, retryWithDeduplication as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, DefaultSuggestionsProvider as ht, VLGuardPlugin as i, readProviderPromptMap as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, processFileReference as lt, PolicyPlugin as m, getDefaultProviders as mt, getGraderById as n, processPrompts as nt, UnsafeBenchPlugin as o, coerceString as ot, PoliticsPlugin as p, sampleArray as pt, matchesLlmRubric as q, VLSUPlugin as r, readPrompts as rt, ToxicChatPlugin as s, getFinalTest as st, GRADERS as t, doRemoteGrading as tt, ShellInjectionPlugin as u, resolveContext as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, matchesAnswerRelevance as z };
13465
13530
 
13466
- //# sourceMappingURL=graders-R9rYUM0d.js.map
13531
+ //# sourceMappingURL=graders-BXAJ0sbS.js.map
@@ -0,0 +1,32 @@
1
+ import "./logger-BnkjG2jt.js";
2
+ import "./fetch-BiYv2BZc.js";
3
+ import "./accounts-Xatc0RYb.js";
4
+ import "./server-Cns05F1j.js";
5
+ import "./tables-BEIFz2tM.js";
6
+ import "./esm-CKWP3u_P.js";
7
+ import "./types-Cd3ygw8W.js";
8
+ import "./utils-DOjD4dTC.js";
9
+ import "./store-VB0GP46K.js";
10
+ import "./cache-HP0NP4k3.js";
11
+ import "./blobs-BUWmKWzo.js";
12
+ import "./extractor-C8XwivI9.js";
13
+ import "./providers-DvddrgxL.js";
14
+ import "./telemetry-DPXLd7UE.js";
15
+ import "./genaiTracer-70Z8BIuV.js";
16
+ import "./pythonUtils-Bzwbgpbg.js";
17
+ import "./util-BLvy9qfE.js";
18
+ import "./chat-B0iaWhoh.js";
19
+ import "./transform-B2-jIv68.js";
20
+ import "./messages-biC_ex-p.js";
21
+ import "./util-DbVG-yZU.js";
22
+ import { n as getGraderById } from "./graders-BXAJ0sbS.js";
23
+ import "./responses-CgNyTPsY.js";
24
+ import "./openai-D6wITiVn.js";
25
+ import "./util-BtoGs5Cb.js";
26
+ import "./completion-BCimtq-h.js";
27
+ import "./transform-BzK09Q_9.js";
28
+ import "./base-Cz2ZC_iA.js";
29
+ import "./image-B8b6f36E.js";
30
+ import "./providerRegistry-BkzVH5Ba.js";
31
+ import "./rubyUtils-DECSbsfY.js";
32
+ export { getGraderById };
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env node
2
+ import "./logger-BcJBzSSA.js";
3
+ import "./fetch-DoVRJZhJ.js";
4
+ import "./accounts-CFLK3mnD.js";
5
+ import "./cloud-z8KZpUoa.js";
6
+ import "./telemetry-BnH5VJAU.js";
7
+ import "./types-CIhFeUC4.js";
8
+ import "./server-DZ9MtCn0.js";
9
+ import "./providers-Ch6Mr0gn.js";
10
+ import "./cache-DSqR6ezl.js";
11
+ import "./util-Bm3E9jpK.js";
12
+ import "./esm-7UIl0pPM.js";
13
+ import "./pythonUtils-wIqk7zAf.js";
14
+ import "./transform-ljLYHEPh.js";
15
+ import { n as getGraderById } from "./graders-RjHF8VfG.js";
16
+ import "./utils-DEuL4VNB.js";
17
+ import "./genaiTracer-C1rxGO8Q.js";
18
+ import "./chat-BEwdgGEg.js";
19
+ import "./transform-DrleutM3.js";
20
+ import "./messages-DJNo37Ko.js";
21
+ import "./util-DMFeUvLz.js";
22
+ import "./responses-BKP_WYis.js";
23
+ import "./openai-BMcwgD5C.js";
24
+ import "./util-DM2rTn_6.js";
25
+ import "./completion-DoYy49ti.js";
26
+ import "./blobs-B1JriOyi.js";
27
+ import "./tables-DmzvLbeZ.js";
28
+ import "./extractor-CAZ2G3Kh.js";
29
+ import "./store-P8OKm19S.js";
30
+ import "./base-CKjwebIH.js";
31
+ import "./image-PoF6DN3x.js";
32
+ import "./providerRegistry-B9lh-_tx.js";
33
+ import "./rubyUtils-CiVfln3g.js";
34
+ export { getGraderById };
@@ -1,24 +1,24 @@
1
- import { C as isCI, _ as getEnvBool, b as getEnvString, i as logger, l as extractFirstJsonObject, m as safeJsonStringify, u as extractJsonObjects, w as state } from "./logger-CT3IKMKA.js";
1
+ import { C as getEnvString, D as state, E as isCI, _ as safeJsonStringify, a as logger, b as getEnvBool, f as extractFirstJsonObject, p as extractJsonObjects } from "./logger-DO8_zM18.js";
2
2
  import { t as invariant } from "./invariant-Ddh24eXh.js";
3
- import { r as importModule } from "./esm-Cd1AjG1D.js";
4
- import { r as runPython } from "./pythonUtils-D5nxkQ0P.js";
3
+ import { r as importModule } from "./esm-SUNIX1x3.js";
4
+ import { r as runPython } from "./pythonUtils-Cpo0Ez1p.js";
5
5
  import { i as isJavascriptFile } from "./fileExtensions-DnqA1y9x.js";
6
- import { n as transform } from "./transform-DECvGmzp.js";
7
- import { B as isValidReusablePolicyId, G as MULTI_TURN_STRATEGIES, R as PolicyObjectSchema, St as PromptSchema, ut as LLAMA_GUARD_REPLICATE_PROVIDER } from "./types-CLKiCBW3.js";
8
- import { _ as extractVariablesFromTemplate, a as getNunjucksEngineForFilePath, c as maybeLoadFromExternalFile, d as maybeLoadToolsFromExternalFile, f as parsePathOrGlob, s as maybeLoadConfigFromExternalFile, x as parseFileUrl, y as getNunjucksEngine } from "./util-Dlz_Wvgm.js";
9
- import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-60Gzydls.js";
10
- import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-8XhNqPKW.js";
11
- import { $ as DefaultSynthesizeProvider$1, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, K as DefaultGradingProvider$3, N as REDTEAM_MEMORY_POISONING_PLUGIN_ID, O as redteamProviderManager, Q as DefaultSuggestionsProvider$2, S as removePrefix, U as MistralChatCompletionProvider, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, _ as extractVariablesFromJson, b as isBasicRefusal, et as AzureModerationProvider, f as checkExfilTracking, g as extractPromptFromTags, h as extractInputVarsFromPrompt, it as parseScriptParts, m as extractGoalFromPrompt, n as loadApiProvider, nt as AzureChatCompletionProvider, p as extractAllPromptsFromTags, rt as getFileHashes, tt as AzureEmbeddingProvider, x as isEmptyResponse, y as getShortPluginId, z as getPoliciesFromCloud } from "./providers-BKRJTjBz.js";
12
- import { a as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-XiOAgly5.js";
6
+ import { n as transform } from "./transform-BqPkNPYm.js";
7
+ import { B as isValidReusablePolicyId, G as MULTI_TURN_STRATEGIES, R as PolicyObjectSchema, St as PromptSchema, ut as LLAMA_GUARD_REPLICATE_PROVIDER } from "./types-q8GXGF65.js";
8
+ import { _ as extractVariablesFromTemplate, a as getNunjucksEngineForFilePath, c as maybeLoadFromExternalFile, d as maybeLoadToolsFromExternalFile, f as parsePathOrGlob, s as maybeLoadConfigFromExternalFile, x as parseFileUrl, y as getNunjucksEngine } from "./util-CMMkIxfU.js";
9
+ import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-CVAtKnI3.js";
10
+ import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-CaT5tPgo.js";
11
+ import { $ as DefaultSynthesizeProvider$1, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, K as DefaultGradingProvider$3, N as REDTEAM_MEMORY_POISONING_PLUGIN_ID, O as redteamProviderManager, Q as DefaultSuggestionsProvider$2, S as removePrefix, U as MistralChatCompletionProvider, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, _ as extractVariablesFromJson, b as isBasicRefusal, et as AzureModerationProvider, f as checkExfilTracking, g as extractPromptFromTags, h as extractInputVarsFromPrompt, it as parseScriptParts, m as extractGoalFromPrompt, n as loadApiProvider, nt as AzureChatCompletionProvider, p as extractAllPromptsFromTags, rt as getFileHashes, tt as AzureEmbeddingProvider, x as isEmptyResponse, y as getShortPluginId, z as getPoliciesFromCloud } from "./providers-Cn73d5sr.js";
12
+ import { a as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-CFxO9KGo.js";
13
13
  import { n as sha256 } from "./createHash-DmPQkvBh.js";
14
- import { t as OpenAiChatCompletionProvider } from "./chat-CznLWr_D.js";
14
+ import { t as OpenAiChatCompletionProvider } from "./chat-pxmiVpWe.js";
15
15
  import { r as accumulateTokenUsage } from "./tokenUsageUtils-NYT-WKS6.js";
16
- import { v as hasGoogleDefaultCredentials } from "./transform-DGLazrMm.js";
17
- import { t as AnthropicMessagesProvider } from "./messages-BLbWdsyt.js";
18
- import { t as OpenAiResponsesProvider } from "./responses-BKqJmhhc.js";
19
- import { n as OpenAiEmbeddingProvider } from "./completion-C_P3ypkJ.js";
20
- import { i as getUserEmail } from "./accounts-xrUGFA6n.js";
21
- import { i as getRemoteGenerationUrl, l as shouldGenerateRemote } from "./server-BC7XJFgr.js";
16
+ import { b as hasGoogleDefaultCredentials } from "./transform-DyDAwEpE.js";
17
+ import { t as AnthropicMessagesProvider } from "./messages-Dy9QecMs.js";
18
+ import { t as OpenAiResponsesProvider } from "./responses-CQb1Tj69.js";
19
+ import { n as OpenAiEmbeddingProvider } from "./completion-DCjv7RZ3.js";
20
+ import { i as getUserEmail } from "./accounts-B2XmGjty.js";
21
+ import { i as getRemoteGenerationUrl, l as shouldGenerateRemote } from "./server-CP9qKM40.js";
22
22
  import * as fs$1 from "fs";
23
23
  import fs from "fs";
24
24
  import path from "path";
@@ -311,6 +311,37 @@ const DEFAULT_WEB_SEARCH_PROMPT = JSON.stringify([{
311
311
  role: "user",
312
312
  content: "<Output>\n{{ output }}\n</Output>\n<Rubric>\n{{ rubric }}\n</Rubric>"
313
313
  }]);
314
+ const TRAJECTORY_GOAL_SUCCESS_PROMPT = JSON.stringify([{
315
+ role: "system",
316
+ content: dedent`You are grading whether an AI agent successfully completed a goal based on its final output and a summarized execution trajectory. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
317
+
318
+ Judge end-to-end success, not stylistic perfection.
319
+ Use the trajectory as evidence for what the agent actually did.
320
+ Give partial credit when the agent made progress but did not fully achieve the goal.
321
+
322
+ Examples:
323
+
324
+ <Goal>Find the order status and tell the user whether it has shipped</Goal>
325
+ <Trajectory>{"stepCount":2,"steps":[{"index":1,"type":"tool","name":"search_orders"},{"index":2,"type":"message","name":"agent response"}]}</Trajectory>
326
+ <Output>Your order shipped yesterday and should arrive on Tuesday.</Output>
327
+ {"reason":"The agent used the order lookup tool and gave the user the shipping status, so the goal was achieved.","pass":true,"score":1.0}
328
+
329
+ <Goal>Find the order status and tell the user whether it has shipped</Goal>
330
+ <Trajectory>{"stepCount":1,"steps":[{"index":1,"type":"message","name":"agent response"}]}</Trajectory>
331
+ <Output>I cannot check your order right now.</Output>
332
+ {"reason":"The agent did not show evidence of checking the order and did not provide the requested status.","pass":false,"score":0.0}`
333
+ }, {
334
+ role: "user",
335
+ content: dedent`<Goal>
336
+ {{ goal }}
337
+ </Goal>
338
+ <Trajectory>
339
+ {{ trajectory }}
340
+ </Trajectory>
341
+ <Output>
342
+ {{ output }}
343
+ </Output>`
344
+ }]);
314
345
  //#endregion
315
346
  //#region src/prompts/processors/csv.ts
316
347
  /**
@@ -1518,45 +1549,31 @@ async function renderLlmRubricPrompt(rubricPrompt, context) {
1518
1549
  } catch {}
1519
1550
  return nunjucks.renderString(rubricPrompt, processedContext);
1520
1551
  }
1521
- async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
1522
- if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1523
- if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
1524
- ...await doRemoteGrading({
1525
- task: "llm-rubric",
1526
- rubric,
1527
- output: llmOutput,
1528
- vars: vars || {}
1529
- }),
1530
- assertion
1531
- };
1532
- const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, DEFAULT_GRADING_PROMPT), {
1533
- output: tryParse(llmOutput),
1534
- rubric,
1535
- ...vars || {}
1536
- });
1537
- const defaultProviders = await getDefaultProviders();
1538
- const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
1539
- const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, "llm-rubric check"), prompt, "llm-rubric", {
1540
- output: tryParse(llmOutput),
1541
- rubric,
1542
- ...vars || {}
1543
- }, providerCallContext);
1544
- if (resp.error || !resp.output) {
1545
- if (options?.throwOnError) throw new LlmRubricProviderError(resp.error || "No output");
1546
- return fail(resp.error || "No output", resp.tokenUsage);
1547
- }
1552
+ function parseJsonGradingResponse(label, resp) {
1548
1553
  let jsonObjects = [];
1549
1554
  if (typeof resp.output === "string") try {
1550
1555
  jsonObjects = extractJsonObjects(resp.output);
1551
- if (jsonObjects.length === 0) return fail("Could not extract JSON from llm-rubric response", resp.tokenUsage);
1556
+ if (jsonObjects.length === 0) return { failure: fail(`Could not extract JSON from ${label} response`, resp.tokenUsage) };
1552
1557
  } catch (err) {
1553
- return fail(`llm-rubric produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage);
1558
+ return { failure: fail(`${label} produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage) };
1554
1559
  }
1555
1560
  else if (typeof resp.output === "object") jsonObjects = [resp.output];
1556
- else return fail(`llm-rubric produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1557
- if (!Array.isArray(jsonObjects) || jsonObjects.length === 0) return fail(`llm-rubric produced malformed response - We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1561
+ else return { failure: fail(`${label} produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
1558
1562
  const parsed = jsonObjects[0];
1559
- if (typeof parsed !== "object" || parsed === null || parsed === void 0) return fail(`llm-rubric produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1563
+ if (typeof parsed !== "object" || parsed === null || parsed === void 0) return { failure: fail(`${label} produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
1564
+ return { parsed };
1565
+ }
1566
+ async function runJsonGradingPrompt({ assertion, checkName, defaultPrompt, grading, label, providerCallContext, throwOnError, vars }) {
1567
+ const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading.rubricPrompt, defaultPrompt), vars);
1568
+ const defaultProviders = await getDefaultProviders();
1569
+ const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
1570
+ const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, checkName), prompt, label, vars, providerCallContext);
1571
+ if (resp.error || !resp.output) {
1572
+ if (throwOnError) throw new Error(resp.error || "No output");
1573
+ return fail(resp.error || "No output", resp.tokenUsage);
1574
+ }
1575
+ const { parsed, failure } = parseJsonGradingResponse(label, resp);
1576
+ if (!parsed) return failure;
1560
1577
  let pass = parsed.pass ?? true;
1561
1578
  if (typeof pass !== "boolean") pass = /^(true|yes|pass|y)$/i.test(String(pass));
1562
1579
  let score = parsed.score;
@@ -1584,6 +1601,54 @@ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, opt
1584
1601
  metadata: { renderedGradingPrompt: prompt }
1585
1602
  };
1586
1603
  }
1604
+ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
1605
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1606
+ if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
1607
+ ...await doRemoteGrading({
1608
+ task: "llm-rubric",
1609
+ rubric,
1610
+ output: llmOutput,
1611
+ vars: vars || {}
1612
+ }),
1613
+ assertion
1614
+ };
1615
+ try {
1616
+ return await runJsonGradingPrompt({
1617
+ assertion,
1618
+ checkName: "llm-rubric check",
1619
+ defaultPrompt: DEFAULT_GRADING_PROMPT,
1620
+ grading,
1621
+ label: "llm-rubric",
1622
+ providerCallContext,
1623
+ throwOnError: options?.throwOnError,
1624
+ vars: {
1625
+ output: tryParse(llmOutput),
1626
+ rubric,
1627
+ ...vars || {}
1628
+ }
1629
+ });
1630
+ } catch (error) {
1631
+ if (options?.throwOnError) throw new LlmRubricProviderError(error.message || "No output");
1632
+ throw error;
1633
+ }
1634
+ }
1635
+ async function matchesTrajectoryGoalSuccess(goal, trajectory, llmOutput, grading, vars, assertion, providerCallContext) {
1636
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1637
+ return runJsonGradingPrompt({
1638
+ assertion,
1639
+ checkName: "trajectory:goal-success check",
1640
+ defaultPrompt: TRAJECTORY_GOAL_SUCCESS_PROMPT,
1641
+ grading,
1642
+ label: "trajectory:goal-success",
1643
+ providerCallContext,
1644
+ vars: {
1645
+ ...vars || {},
1646
+ goal,
1647
+ output: tryParse(llmOutput),
1648
+ trajectory
1649
+ }
1650
+ });
1651
+ }
1587
1652
  async function matchesPiScore(renderedValue, llmInput, llmOutput, assertion) {
1588
1653
  return {
1589
1654
  ...await doRemoteScoringWithPi({
@@ -2091,7 +2156,7 @@ async function selectMaxScore(outputs, resultsWithGradingResults, assertion) {
2091
2156
  let totalWeight = 0;
2092
2157
  relevantResults.forEach((componentResult) => {
2093
2158
  const assertionType = componentResult.assertion?.type || "unknown";
2094
- const weight = options.weights[assertionType] !== void 0 ? options.weights[assertionType] : 1;
2159
+ const weight = options.weights[assertionType] === void 0 ? 1 : options.weights[assertionType];
2095
2160
  const score = componentResult.score || 0;
2096
2161
  totalWeightedScore += score * weight;
2097
2162
  totalWeight += weight;
@@ -2341,9 +2406,9 @@ async function fetchHuggingFaceDataset(datasetPath, limit) {
2341
2406
  while (true) {
2342
2407
  const requestParams = new URLSearchParams(queryParams);
2343
2408
  requestParams.set("offset", offset.toString());
2344
- const remainingUserLimit = userLimit !== void 0 ? Math.max(userLimit - offset, 0) : void 0;
2345
- const remainingDatasetRows = totalRows !== void 0 ? Math.max(totalRows - offset, 0) : void 0;
2346
- const requestedLength = remainingUserLimit !== void 0 ? Math.min(pageSize, remainingUserLimit) : remainingDatasetRows !== void 0 ? Math.min(pageSize, remainingDatasetRows) : pageSize;
2409
+ const remainingUserLimit = userLimit === void 0 ? void 0 : Math.max(userLimit - offset, 0);
2410
+ const remainingDatasetRows = totalRows === void 0 ? void 0 : Math.max(totalRows - offset, 0);
2411
+ const requestedLength = remainingUserLimit === void 0 ? remainingDatasetRows === void 0 ? pageSize : Math.min(pageSize, remainingDatasetRows) : Math.min(pageSize, remainingUserLimit);
2347
2412
  if (requestedLength <= 0) {
2348
2413
  logger.debug(`[HF Dataset] No remaining rows to fetch for ${owner}/${repo} (offset ${offset})`);
2349
2414
  break;
@@ -13461,6 +13526,6 @@ function getGraderById(id) {
13461
13526
  return grader;
13462
13527
  }
13463
13528
  //#endregion
13464
- export { matchesSearchRubric as $, BeavertailsPlugin as A, getAndCheckProvider as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, retryWithDeduplication as F, matchesContextFaithfulness as G, matchesAnswerRelevance as H, sampleArray as I, matchesFactuality as J, matchesContextRecall as K, fetchHuggingFaceDataset as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, getCustomPolicies as P, matchesPiScore as Q, callProviderWithContext as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesClassification as U, loadRubricPrompt as V, matchesClosedQa as W, matchesLlmRubric as X, matchesGEval as Y, matchesModeration as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, processPrompts as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, SUGGEST_PROMPTS_SYSTEM_MESSAGE as ct, RbacPlugin as d, loadFromJavaScriptFile as dt, matchesSelectBest as et, PromptExtractionPlugin as f, processFileReference as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, VLGuardPlugin as i, DefaultSuggestionsProvider as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, coerceString as lt, PolicyPlugin as m, getGraderById as n, selectMaxScore as nt, UnsafeBenchPlugin as o, readPrompts as ot, PoliticsPlugin as p, resolveContext as pt, matchesContextRelevance as q, VLSUPlugin as r, getDefaultProviders as rt, ToxicChatPlugin as s, readProviderPromptMap as st, GRADERS as t, matchesSimilarity as tt, ShellInjectionPlugin as u, getFinalTest as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, fail as z };
13529
+ export { matchesSearchRubric as $, BeavertailsPlugin as A, getAndCheckProvider as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, retryWithDeduplication as F, matchesContextFaithfulness as G, matchesAnswerRelevance as H, sampleArray as I, matchesFactuality as J, matchesContextRecall as K, fetchHuggingFaceDataset as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, getCustomPolicies as P, matchesPiScore as Q, callProviderWithContext as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesClassification as U, loadRubricPrompt as V, matchesClosedQa as W, matchesLlmRubric as X, matchesGEval as Y, matchesModeration as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, DefaultSuggestionsProvider as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, readProviderPromptMap as ct, RbacPlugin as d, getFinalTest as dt, matchesSelectBest as et, PromptExtractionPlugin as f, loadFromJavaScriptFile as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, VLGuardPlugin as i, getDefaultProviders as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, SUGGEST_PROMPTS_SYSTEM_MESSAGE as lt, PolicyPlugin as m, resolveContext as mt, getGraderById as n, matchesTrajectoryGoalSuccess as nt, UnsafeBenchPlugin as o, processPrompts as ot, PoliticsPlugin as p, processFileReference as pt, matchesContextRelevance as q, VLSUPlugin as r, selectMaxScore as rt, ToxicChatPlugin as s, readPrompts as st, GRADERS as t, matchesSimilarity as tt, ShellInjectionPlugin as u, coerceString as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, fail as z };
13465
13530
 
13466
- //# sourceMappingURL=graders-CpdqD9PI.js.map
13531
+ //# sourceMappingURL=graders-DG7mhg-b.js.map
@@ -0,0 +1,32 @@
1
+ require("./logger-D5iKBpu_.cjs");
2
+ require("./esm-CipptfDu.cjs");
3
+ require("./pythonUtils-dAVigVK-.cjs");
4
+ require("./transform-ZrG2dvlo.cjs");
5
+ const require_graders = require("./graders-BElhu9ZY.cjs");
6
+ require("./types-D8cGDZbL.cjs");
7
+ require("./util-CuLo2pMR.cjs");
8
+ require("./fetch-BnR9wSnm.cjs");
9
+ require("./cache-C5yFZ4gC.cjs");
10
+ require("./providers-CScd1wN6.cjs");
11
+ require("./utils-DKw8mrgr.cjs");
12
+ require("./genaiTracer-BfxrvSUb.cjs");
13
+ require("./chat-CM8qWR3_.cjs");
14
+ require("./transform-0BwoBsvO.cjs");
15
+ require("./messages-HJsyEh4o.cjs");
16
+ require("./util--9u9UVCt.cjs");
17
+ require("./responses-mo0KQDbu.cjs");
18
+ require("./openai-CoxGAQwn.cjs");
19
+ require("./util-CFj4YKIn.cjs");
20
+ require("./completion-DlXUhj5c.cjs");
21
+ require("./accounts-BPyfpSeU.cjs");
22
+ require("./server-BtoCXeXI.cjs");
23
+ require("./blobs-C6j0bvFz.cjs");
24
+ require("./tables-BdZQEpRz.cjs");
25
+ require("./extractor-DG3sSfXE.cjs");
26
+ require("./telemetry-re627Lre.cjs");
27
+ require("./store-CLyU7AtI.cjs");
28
+ require("./base-BboXIF_0.cjs");
29
+ require("./image--F58eEIn.cjs");
30
+ require("./providerRegistry-BTDgfV5h.cjs");
31
+ require("./rubyUtils-CGeUtCfW.cjs");
32
+ exports.getGraderById = require_graders.getGraderById;