promptfoo 0.121.1 → 0.121.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/README.md +2 -0
  2. package/dist/src/{accounts-xrUGFA6n.js → accounts-B2XmGjty.js} +5 -5
  3. package/dist/src/{accounts-Bx-x3bmW.cjs → accounts-BPyfpSeU.cjs} +5 -5
  4. package/dist/src/{accounts-CMqkzrVf.js → accounts-CFLK3mnD.js} +6 -6
  5. package/dist/src/{accounts-BgNJDBE6.js → accounts-Xatc0RYb.js} +5 -5
  6. package/dist/src/{agentic-utils-BKIN5PKu.js → agentic-utils-36epdqwB.js} +3 -3
  7. package/dist/src/{cometapi-DkXrKi5z.js → agentic-utils-D8yXo5Lm.js} +4 -61
  8. package/dist/src/{cometapi-vY6aDZgo.cjs → agentic-utils-DAVsChuB.cjs} +24 -62
  9. package/dist/src/agentic-utils-DIYAAYE7.js +153 -0
  10. package/dist/src/{agents-C-dDThPK.js → agents-BBVJCIYr.js} +226 -13
  11. package/dist/src/{agents-CErsqg5U.cjs → agents-BBWxKSM0.cjs} +7 -7
  12. package/dist/src/{agents-Dy2YpZpa.js → agents-Bqgfdokm.js} +227 -14
  13. package/dist/src/{agents-B0f4HICh.cjs → agents-CAYbM7qD.cjs} +226 -13
  14. package/dist/src/{agents-CVIn-Utx.js → agents-CLQ-P15P.js} +7 -7
  15. package/dist/src/{agents-DeH4Gu94.js → agents-CgBniSlI.js} +8 -8
  16. package/dist/src/{agents-CXknwsFX.js → agents-DSSTV4bv.js} +226 -13
  17. package/dist/src/{agents-aF4-T121.js → agents-wg3ohknq.js} +7 -7
  18. package/dist/src/{aimlapi-tg0Gkcvr.cjs → aimlapi-Bv8Fmc-b.cjs} +14 -14
  19. package/dist/src/{aimlapi-BNfTBexL.js → aimlapi-BwGC1TtS.js} +13 -13
  20. package/dist/src/{aimlapi-BAGZDo5G.js → aimlapi-DaC3qZ-o.js} +14 -14
  21. package/dist/src/{aimlapi-DHRKlBEA.js → aimlapi-MgSLdvy7.js} +13 -13
  22. package/dist/src/app/assets/index-B6l9CVVb.js +439 -0
  23. package/dist/src/app/assets/index-DyZ0Ep37.css +1 -0
  24. package/dist/src/app/assets/sync-CStkzc6u.js +4 -0
  25. package/dist/src/app/assets/vendor-charts-BnDWwBlI.js +36 -0
  26. package/dist/src/app/assets/vendor-markdown-Bz7N-ca6.js +29 -0
  27. package/dist/src/app/index.html +4 -4
  28. package/dist/src/{audio-tf_NBjlC.js → audio-Bn44pQxv.js} +4 -4
  29. package/dist/src/{audio-CHQ4r-RV.js → audio-DDA5WHdx.js} +4 -4
  30. package/dist/src/{audio-BWeaWovU.cjs → audio-DVFjQ67_.cjs} +4 -4
  31. package/dist/src/{audio-BRODU0UK.js → audio-DjU9GswO.js} +5 -5
  32. package/dist/src/{base-DBtwl2FR.cjs → base-BboXIF_0.cjs} +3 -3
  33. package/dist/src/{base-B4QJRyFS.js → base-CKjwebIH.js} +3 -3
  34. package/dist/src/{base-B0tcrnq_.js → base-CqzQ4K8j.js} +3 -3
  35. package/dist/src/{base-fEDN28WM.js → base-Cz2ZC_iA.js} +3 -3
  36. package/dist/src/{blobs-BAU-dXan.js → blobs-B1JriOyi.js} +3 -3
  37. package/dist/src/{blobs-qTYm-1PY.js → blobs-BUWmKWzo.js} +3 -3
  38. package/dist/src/{blobs-DvS-O6be.cjs → blobs-C6j0bvFz.cjs} +3 -3
  39. package/dist/src/{blobs-Bpg5rH6i.js → blobs-DXTl6J3H.js} +3 -3
  40. package/dist/src/{cache-COish3-W.cjs → cache-C5yFZ4gC.cjs} +75 -58
  41. package/dist/src/{cache-8XhNqPKW.js → cache-CaT5tPgo.js} +75 -58
  42. package/dist/src/cache-CyCanoMu.js +6 -0
  43. package/dist/src/{cache-CG0SlR1d.js → cache-DSqR6ezl.js} +75 -58
  44. package/dist/src/cache-Df_QFDNu.cjs +5 -0
  45. package/dist/src/{cache-D3eqDYGU.js → cache-HP0NP4k3.js} +75 -58
  46. package/dist/src/{chat-DHMH-N64.js → chat-B-52XYI1.js} +12 -12
  47. package/dist/src/{chat-BKm79wib.js → chat-B0iaWhoh.js} +16 -14
  48. package/dist/src/{chat-DxysjBvt.js → chat-BE0qTA8e.js} +13 -13
  49. package/dist/src/{chat-CRWNNq73.js → chat-BEwdgGEg.js} +16 -14
  50. package/dist/src/{chat-2K608PeQ.cjs → chat-BtIKkLKx.cjs} +13 -13
  51. package/dist/src/{chat-DaqekjFr.cjs → chat-CM8qWR3_.cjs} +17 -15
  52. package/dist/src/{chat-CM_kyI8B.js → chat-DK1U-eZ-.js} +12 -12
  53. package/dist/src/{chat-CznLWr_D.js → chat-pxmiVpWe.js} +16 -14
  54. package/dist/src/{chatkit-65VXf5SR.js → chatkit-BYGQlHlV.js} +4 -4
  55. package/dist/src/{chatkit-DKyPi1Gs.cjs → chatkit-Cx174XI3.cjs} +4 -4
  56. package/dist/src/{chatkit-BxFvW8KY.js → chatkit-_8eJqKcD.js} +4 -4
  57. package/dist/src/{chatkit-Be-Q-a9F.js → chatkit-a2D6mY6s.js} +4 -4
  58. package/dist/src/{claude-agent-sdk-CJH22shf.cjs → claude-agent-sdk-8ddRp1L2.cjs} +35 -17
  59. package/dist/src/{claude-agent-sdk-Dy5lT-Tx.js → claude-agent-sdk-Bq5EArsX.js} +33 -15
  60. package/dist/src/{claude-agent-sdk-BLTu0WBO.js → claude-agent-sdk-CMjh4LFH.js} +33 -15
  61. package/dist/src/{claude-agent-sdk-D6_k9FKA.js → claude-agent-sdk-HgbFioFw.js} +33 -15
  62. package/dist/src/cloud-DE3t1-ZI.js +4 -0
  63. package/dist/src/{cloud-Bc9526yV.js → cloud-z8KZpUoa.js} +3 -3
  64. package/dist/src/{cloudflare-ai-CWWJCRim.js → cloudflare-ai-BGyXlpXJ.js} +13 -13
  65. package/dist/src/{cloudflare-ai-C9r2sRhw.js → cloudflare-ai-Bbp26N0L.js} +13 -13
  66. package/dist/src/{cloudflare-ai-ClWSdor4.cjs → cloudflare-ai-C62x6MQG.cjs} +14 -14
  67. package/dist/src/{cloudflare-ai-ICsOuD-z.js → cloudflare-ai-DdKP9TKT.js} +14 -14
  68. package/dist/src/{cloudflare-gateway-D6xFc5pa.js → cloudflare-gateway-BwAaUgeW.js} +14 -14
  69. package/dist/src/{cloudflare-gateway-D6O7AlYb.js → cloudflare-gateway-D-e9i1Sn.js} +15 -15
  70. package/dist/src/{cloudflare-gateway-pXGHxJ47.js → cloudflare-gateway-DXhtXDRb.js} +15 -163
  71. package/dist/src/{cloudflare-gateway-C2_-KG5o.cjs → cloudflare-gateway-Dx36ftqF.cjs} +15 -15
  72. package/dist/src/{codex-sdk-DUwKWezN.js → codex-sdk-BQEw16R_.js} +180 -11
  73. package/dist/src/{codex-sdk-C6UMlxwV.js → codex-sdk-C_07GuVS.js} +180 -11
  74. package/dist/src/{codex-sdk-GGAw0qbD.js → codex-sdk-DE5G18dx.js} +180 -11
  75. package/dist/src/{codex-sdk-fAO0c3yA.cjs → codex-sdk-ZLKfDjqP.cjs} +181 -12
  76. package/dist/src/cometapi-BDyV-NNm.js +62 -0
  77. package/dist/src/cometapi-C3hOlM7-.cjs +62 -0
  78. package/dist/src/{cometapi-Bbjp5V4x.js → cometapi-hhL4TAh3.js} +14 -14
  79. package/dist/src/{cometapi-BasUi7-_.js → cometapi-sp7sJpBD.js} +15 -15
  80. package/dist/src/{completion-C_P3ypkJ.js → completion-BCimtq-h.js} +6 -6
  81. package/dist/src/{completion-6Mx_iXxK.js → completion-DCjv7RZ3.js} +6 -6
  82. package/dist/src/{completion-CDOouNzq.cjs → completion-DlXUhj5c.cjs} +6 -6
  83. package/dist/src/{completion-C5rtR_9P.js → completion-DoYy49ti.js} +6 -6
  84. package/dist/src/{createHash-CfZSc0b4.cjs → createHash-BYwImsYv.cjs} +2 -2
  85. package/dist/src/{docker-BwsKwxFs.cjs → docker-Cqj2-QVi.cjs} +14 -14
  86. package/dist/src/{docker-CZnqU1XV.js → docker-CxCkwMzc.js} +13 -13
  87. package/dist/src/{docker-DzxyDPIj.js → docker-DpguQj-w.js} +14 -14
  88. package/dist/src/{docker-5KcG-_86.js → docker-FeBni2dw.js} +13 -13
  89. package/dist/src/{esm-C03C-mv3.js → esm-7UIl0pPM.js} +2 -2
  90. package/dist/src/{esm-Cd1AjG1D.js → esm-CKWP3u_P.js} +3 -3
  91. package/dist/src/{esm-CnNt7sI4.cjs → esm-CipptfDu.cjs} +2 -2
  92. package/dist/src/{esm-CaIwzWR5.js → esm-SUNIX1x3.js} +3 -3
  93. package/dist/src/eval-7aEqoMs3.js +15 -0
  94. package/dist/src/{eval-DmFyWU7i.js → eval-BTqTn7lb.js} +10 -10
  95. package/dist/src/{evalResult-CDQiuUuf.js → evalResult-BkIhRdTe.js} +7 -7
  96. package/dist/src/evalResult-CYNHkk5A.js +12 -0
  97. package/dist/src/evalResult-CuvJeNiM.js +10 -0
  98. package/dist/src/{evalResult-CTG2AHOS.js → evalResult-DUDShQrm.js} +7 -7
  99. package/dist/src/{evalResult-Dap2CekP.cjs → evalResult-DpARzUCb.cjs} +7 -7
  100. package/dist/src/evalResult-tGdilrWt.cjs +10 -0
  101. package/dist/src/evaluator-BBUqRhz1.js +36 -0
  102. package/dist/src/{evaluator-DPFRbFIL.js → evaluator-BcvOGaam.js} +833 -79
  103. package/dist/src/{extractor-YMU_Gvt8.js → extractor-C8XwivI9.js} +6 -6
  104. package/dist/src/{extractor-CFG6bcWJ.js → extractor-CAZ2G3Kh.js} +6 -6
  105. package/dist/src/{extractor-DX36oYEv.cjs → extractor-DG3sSfXE.cjs} +6 -6
  106. package/dist/src/{extractor-M67RUtg6.js → extractor-D_wd8jxt.js} +6 -6
  107. package/dist/src/{fetch-4M3YRaqL.js → fetch-BiYv2BZc.js} +3 -3
  108. package/dist/src/{fetch-BxUk8odA.cjs → fetch-BnR9wSnm.cjs} +3 -3
  109. package/dist/src/{fetch-60Gzydls.js → fetch-CVAtKnI3.js} +3 -3
  110. package/dist/src/{fetch-BMv0O527.js → fetch-DoVRJZhJ.js} +4 -4
  111. package/dist/src/fetch-UWU706qb.js +5 -0
  112. package/dist/src/{genaiTracer-DN4dQywX.cjs → genaiTracer-BfxrvSUb.cjs} +2 -2
  113. package/dist/src/{graders-DOXycdlG.cjs → graders-BElhu9ZY.cjs} +126 -55
  114. package/dist/src/{graders-R9rYUM0d.js → graders-BXAJ0sbS.js} +120 -55
  115. package/dist/src/graders-BxfEguVY.js +32 -0
  116. package/dist/src/graders-CzVMbEnv.js +34 -0
  117. package/dist/src/{graders-CpdqD9PI.js → graders-DG7mhg-b.js} +120 -55
  118. package/dist/src/graders-DjCXfj0l.cjs +32 -0
  119. package/dist/src/{graders-CHO8EPM4.js → graders-RjHF8VfG.js} +120 -55
  120. package/dist/src/graders-kHzIWOKu.js +32 -0
  121. package/dist/src/{image-DTedmQPg.cjs → image--F58eEIn.cjs} +6 -6
  122. package/dist/src/{image-DJEvKveK.js → image-6WQXK8m8.js} +4 -4
  123. package/dist/src/{image-pAX56tPG.js → image-B8b6f36E.js} +6 -6
  124. package/dist/src/{image-BmEZqVmk.js → image-CoxZp9PZ.js} +6 -6
  125. package/dist/src/{image-gvmivTEe.js → image-DO0RYnjH.js} +5 -5
  126. package/dist/src/{image-CBBVXWuT.js → image-PoF6DN3x.js} +6 -6
  127. package/dist/src/{image-CDLQOcqT.cjs → image-fza3zuKs.cjs} +4 -4
  128. package/dist/src/{image-tL5hIOFh.js → image-xNbw5ph2.js} +4 -4
  129. package/dist/src/index.cjs +863 -110
  130. package/dist/src/index.d.cts +833 -60
  131. package/dist/src/index.d.ts +833 -60
  132. package/dist/src/index.js +860 -108
  133. package/dist/src/{interactiveCheck-BgLZUIt3.js → interactiveCheck-BnMYOjMu.js} +2 -2
  134. package/dist/src/{knowledgeBase-CoU-UQBg.js → knowledgeBase-Bi7CmDbx.js} +7 -7
  135. package/dist/src/{knowledgeBase-CLJybhnF.js → knowledgeBase-Ce3ofVan.js} +8 -8
  136. package/dist/src/{knowledgeBase-DjWPVqSb.js → knowledgeBase-DFRXPZl_.js} +7 -7
  137. package/dist/src/{knowledgeBase-wkxuRFhA.cjs → knowledgeBase-DqrLX8fy.cjs} +7 -7
  138. package/dist/src/{litellm-B9Hysuri.js → litellm-Bo2gQXpo.js} +16 -15
  139. package/dist/src/{litellm-ePxtr9F1.js → litellm-CKiAxnoM.js} +15 -14
  140. package/dist/src/{litellm-NYpQ8RQu.cjs → litellm-CnHI69aj.cjs} +16 -15
  141. package/dist/src/{litellm-CTfa0hqi.js → litellm-Tc294Jhj.js} +15 -14
  142. package/dist/src/{logger-KkObSCzq.js → logger-BcJBzSSA.js} +10 -14
  143. package/dist/src/{logger-DLcq4dWf.js → logger-BnkjG2jt.js} +10 -14
  144. package/dist/src/{logger-Cp1GPUjj.cjs → logger-D5iKBpu_.cjs} +27 -13
  145. package/dist/src/{logger-CT3IKMKA.js → logger-DO8_zM18.js} +10 -14
  146. package/dist/src/{luma-ray-BW9IRGIc.js → luma-ray-0ehMPt5N.js} +10 -10
  147. package/dist/src/{luma-ray-BE2mOt6N.js → luma-ray-C9q8rdQe.js} +9 -9
  148. package/dist/src/{luma-ray-Cm1KZBhs.js → luma-ray-DP0QA9qn.js} +9 -9
  149. package/dist/src/{luma-ray-B0GGNRc1.cjs → luma-ray-m9Ku2meV.cjs} +9 -9
  150. package/dist/src/main.js +69 -71
  151. package/dist/src/{messages-1x9atZmP.js → messages-DJNo37Ko.js} +14 -9
  152. package/dist/src/{messages-BLbWdsyt.js → messages-Dy9QecMs.js} +14 -9
  153. package/dist/src/{messages-1JrJs91T.cjs → messages-HJsyEh4o.cjs} +15 -10
  154. package/dist/src/{messages-D8EA0oDc.js → messages-biC_ex-p.js} +14 -9
  155. package/dist/src/{modelslab-C1OLRmVX.js → modelslab-B5J-ZM5c.js} +9 -9
  156. package/dist/src/{modelslab-CqXBy3U8.js → modelslab-BI458moT.js} +10 -10
  157. package/dist/src/{modelslab-X5-4LroM.js → modelslab-BTOT8FUO.js} +9 -9
  158. package/dist/src/{modelslab-DcOSFwKh.cjs → modelslab-IQbNg-r7.cjs} +9 -9
  159. package/dist/src/{nova-reel-DihqLeol.js → nova-reel-BZ9y-Y5s.js} +9 -9
  160. package/dist/src/{nova-reel-D9xfaMBs.cjs → nova-reel-CE5etkv9.cjs} +9 -9
  161. package/dist/src/{nova-reel-D2ZkOSyr.js → nova-reel-DEeQlnOJ.js} +10 -10
  162. package/dist/src/{nova-reel-BgS1ZWuK.js → nova-reel-Xw1SXLpg.js} +9 -9
  163. package/dist/src/{nova-sonic-Q3BOJeig.js → nova-sonic-DWswpN1E.js} +7 -7
  164. package/dist/src/{nova-sonic-DezhVUYT.js → nova-sonic-DXTLpi-r.js} +6 -6
  165. package/dist/src/{nova-sonic-DVu3mMIy.cjs → nova-sonic-N0yCm0vb.cjs} +6 -6
  166. package/dist/src/{nova-sonic-P-CdUMlV.js → nova-sonic-Ogqf-csn.js} +6 -6
  167. package/dist/src/{openai-DhbB7eWK.js → openai-BMcwgD5C.js} +2 -2
  168. package/dist/src/{openai-j-sE2O7r.js → openai-BcB5KlTk.js} +2 -2
  169. package/dist/src/{openai-Cuif0GEt.cjs → openai-CoxGAQwn.cjs} +2 -2
  170. package/dist/src/{openai-DElQ-fPX.js → openai-D6wITiVn.js} +2 -2
  171. package/dist/src/{openclaw-Bv1DINsX.js → openclaw-0Sv7AK3O.js} +172 -109
  172. package/dist/src/{openclaw-DAfWQn-o.cjs → openclaw-CXxbKgDH.cjs} +174 -110
  173. package/dist/src/{openclaw-BiSZPL7J.js → openclaw-D1FSCps-.js} +172 -109
  174. package/dist/src/{openclaw-D1D_ej1z.js → openclaw-D2ENvu7a.js} +173 -110
  175. package/dist/src/{opencode-sdk-D95s6SnR.js → opencode-sdk-C71Z0ehR.js} +13 -13
  176. package/dist/src/{opencode-sdk-DxUPkLT7.js → opencode-sdk-CHCs7dEb.js} +12 -12
  177. package/dist/src/{opencode-sdk-C7m-wRfI.js → opencode-sdk-DDxj4QqH.js} +12 -12
  178. package/dist/src/{opencode-sdk-CfaLN8PY.cjs → opencode-sdk-WWJhnbKr.cjs} +16 -16
  179. package/dist/src/{otlpReceiver-g3ByGaXs.js → otlpReceiver-C9KlUtxh.js} +6 -6
  180. package/dist/src/{otlpReceiver--AIRW_S4.js → otlpReceiver-CZL48YfC.js} +6 -6
  181. package/dist/src/{otlpReceiver-Bn5wGB1v.js → otlpReceiver-CavGAA6k.js} +6 -6
  182. package/dist/src/{otlpReceiver-Diec4cln.cjs → otlpReceiver-DHKqJlsz.cjs} +6 -6
  183. package/dist/src/{providerRegistry-B0RUOLI_.js → providerRegistry-B9lh-_tx.js} +2 -2
  184. package/dist/src/{providerRegistry-Civky8Ar.cjs → providerRegistry-BTDgfV5h.cjs} +2 -2
  185. package/dist/src/{providerRegistry-CD8MEar9.js → providerRegistry-BkzVH5Ba.js} +2 -2
  186. package/dist/src/{providerRegistry-DM8rZYol.js → providerRegistry-CUWki5mQ.js} +2 -2
  187. package/dist/src/providers-BSLEaIQG.js +32 -0
  188. package/dist/src/{providers-CFu-TZl-.cjs → providers-CScd1wN6.cjs} +733 -464
  189. package/dist/src/{providers-CFLy1_ji.js → providers-Ch6Mr0gn.js} +795 -526
  190. package/dist/src/{providers-BKRJTjBz.js → providers-Cn73d5sr.js} +795 -526
  191. package/dist/src/providers-D-FnDg8k.cjs +31 -0
  192. package/dist/src/providers-DEYiFVAo.js +30 -0
  193. package/dist/src/{providers-B3HvufyI.js → providers-DvddrgxL.js} +795 -526
  194. package/dist/src/providers-sS2WI8YD.js +30 -0
  195. package/dist/src/{pythonUtils-D6fwaDSg.js → pythonUtils-Bzwbgpbg.js} +3 -3
  196. package/dist/src/{pythonUtils-D5nxkQ0P.js → pythonUtils-Cpo0Ez1p.js} +3 -3
  197. package/dist/src/{pythonUtils-CTU3Y3lw.cjs → pythonUtils-dAVigVK-.cjs} +3 -3
  198. package/dist/src/{pythonUtils-C3py6GC1.js → pythonUtils-wIqk7zAf.js} +3 -3
  199. package/dist/src/{quiverai-CI6gYJVI.js → quiverai-BeofbLVc.js} +4 -4
  200. package/dist/src/{quiverai-MHSxbmmZ.js → quiverai-CCQn73lq.js} +5 -5
  201. package/dist/src/{quiverai-CLkWkyZc.cjs → quiverai-CcUhPIBg.cjs} +4 -4
  202. package/dist/src/{quiverai-C2jVwbH1.js → quiverai-DVSEqJiq.js} +4 -4
  203. package/dist/src/{render-Drod8m7K.js → render-BHl6QVq9.js} +3 -3
  204. package/dist/src/{responses-CGw0DCzh.js → responses-BKP_WYis.js} +16 -12
  205. package/dist/src/{responses-BKqJmhhc.js → responses-CQb1Tj69.js} +16 -12
  206. package/dist/src/{responses-jxdehPkC.js → responses-CgNyTPsY.js} +16 -12
  207. package/dist/src/{responses-tD4Bd4dc.cjs → responses-mo0KQDbu.cjs} +16 -12
  208. package/dist/src/rubyUtils-B1HXG4ej.cjs +4 -0
  209. package/dist/src/{rubyUtils-DhCAlxZr.cjs → rubyUtils-CGeUtCfW.cjs} +3 -3
  210. package/dist/src/{rubyUtils-Boc4HZzX.js → rubyUtils-CiVfln3g.js} +3 -3
  211. package/dist/src/{rubyUtils-BcuGX77l.js → rubyUtils-DECSbsfY.js} +3 -3
  212. package/dist/src/{rubyUtils-BUVePouc.js → rubyUtils-PgU-gHmx.js} +3 -3
  213. package/dist/src/rubyUtils-Rt6pKA96.js +5 -0
  214. package/dist/src/{sagemaker-BK4Zb993.js → sagemaker-CVv8W7so.js} +17 -17
  215. package/dist/src/{sagemaker-D2Q1c-sD.js → sagemaker-CqeASYE5.js} +17 -17
  216. package/dist/src/{sagemaker-BfiWTmvn.js → sagemaker-MUbD5V3v.js} +18 -18
  217. package/dist/src/{sagemaker-CcQHM1jV.cjs → sagemaker-jiw1wQa-.cjs} +17 -17
  218. package/dist/src/{scanner-J8CA3LsV.js → scanner-DVDeUz1r.js} +10 -10
  219. package/dist/src/server/index.js +864 -112
  220. package/dist/src/server-B0Xh1Gx-.js +7 -0
  221. package/dist/src/{server-B0PPuDw-.cjs → server-BtoCXeXI.cjs} +4 -4
  222. package/dist/src/{server-BC7XJFgr.js → server-CP9qKM40.js} +4 -4
  223. package/dist/src/{server-OAs3nBRT.js → server-Cns05F1j.js} +5 -5
  224. package/dist/src/server-DJTKu9IR.cjs +5 -0
  225. package/dist/src/{server-DbFphssR.js → server-DZ9MtCn0.js} +6 -6
  226. package/dist/src/{signal-BOTbd53Z.js → signal-C3ZTsUgi.js} +3 -3
  227. package/dist/src/{slack-DXMKtA-f.js → slack-2sdpGzbt.js} +2 -2
  228. package/dist/src/{slack-BmVAVGaK.cjs → slack-94iG3T0s.cjs} +2 -2
  229. package/dist/src/{slack-DCUPTzS2.js → slack-BR0HtO3K.js} +2 -2
  230. package/dist/src/{slack-DOdy_kyv.js → slack-DCEV-vWP.js} +2 -2
  231. package/dist/src/store-C5u6MgC8.js +6 -0
  232. package/dist/src/{store-BSc-TF2w.cjs → store-CLyU7AtI.cjs} +17 -5
  233. package/dist/src/store-CNHk-De4.cjs +5 -0
  234. package/dist/src/{store-DQLEjuEO.js → store-Cj258DgL.js} +17 -5
  235. package/dist/src/{store-D1tv90v3.js → store-P8OKm19S.js} +17 -5
  236. package/dist/src/{store-Ub2vaGJ1.js → store-VB0GP46K.js} +17 -5
  237. package/dist/src/{tables-xKANLRBD.js → tables-BEIFz2tM.js} +3 -3
  238. package/dist/src/{tables-C7K-XKWp.cjs → tables-BdZQEpRz.cjs} +3 -3
  239. package/dist/src/{tables-D36WTqKX.js → tables-DmzvLbeZ.js} +3 -3
  240. package/dist/src/{tables-5EvT_Bwn.js → tables-kC7R5kiK.js} +3 -3
  241. package/dist/src/{telemetry-C2YDkUQH.js → telemetry-BnH5VJAU.js} +4 -4
  242. package/dist/src/{telemetry-C15ziL8u.js → telemetry-BugWqKiu.js} +4 -4
  243. package/dist/src/{telemetry-DMb2Mpfm.js → telemetry-DPXLd7UE.js} +4 -4
  244. package/dist/src/telemetry-Yig0Tino.js +7 -0
  245. package/dist/src/telemetry-p8Pwqm1i.cjs +5 -0
  246. package/dist/src/{telemetry-CbrnxHp_.cjs → telemetry-re627Lre.cjs} +4 -4
  247. package/dist/src/{transcription-CL78qbOU.cjs → transcription-BvtsrzRG.cjs} +13 -13
  248. package/dist/src/{transcription-DAtxHhAM.js → transcription-CaMivnjG.js} +13 -13
  249. package/dist/src/{transcription-QHh3AH6Z.js → transcription-DOMMTu01.js} +14 -14
  250. package/dist/src/{transcription-LNZTNUUL.js → transcription-Hb3VnC4M.js} +13 -13
  251. package/dist/src/{transform-DOcQeLld.cjs → transform-0BwoBsvO.cjs} +19 -5
  252. package/dist/src/{transform-DGxXocjk.js → transform-B2-jIv68.js} +8 -6
  253. package/dist/src/{transform-DECvGmzp.js → transform-BqPkNPYm.js} +4 -4
  254. package/dist/src/{transform-aa6tmVpZ.js → transform-BzK09Q_9.js} +4 -4
  255. package/dist/src/transform-ChNIpHz7.js +6 -0
  256. package/dist/src/{transform-Cgi24fJ7.js → transform-DrleutM3.js} +8 -6
  257. package/dist/src/{transform-DGLazrMm.js → transform-DyDAwEpE.js} +8 -6
  258. package/dist/src/transform-PtQ6rAE3.cjs +5 -0
  259. package/dist/src/{transform-CzK1Q0zl.cjs → transform-ZrG2dvlo.cjs} +4 -4
  260. package/dist/src/{transform-DilY9wbS.js → transform-ljLYHEPh.js} +4 -4
  261. package/dist/src/{transformersAvailability-CEVM2GNQ.js → transformersAvailability-BGkzavwb.js} +1 -1
  262. package/dist/src/{transformersAvailability-CwayUSlh.cjs → transformersAvailability-DKoRtQLy.cjs} +1 -1
  263. package/dist/src/{types-CH3Ge2sE.js → types-CIhFeUC4.js} +45 -11
  264. package/dist/src/{types-CN_TZ2GJ.js → types-Cd3ygw8W.js} +45 -11
  265. package/dist/src/{types-LJ0r3wbR.cjs → types-D8cGDZbL.cjs} +46 -12
  266. package/dist/src/{types-CLKiCBW3.js → types-q8GXGF65.js} +45 -11
  267. package/dist/src/{util-CchiqXh_.cjs → util--9u9UVCt.cjs} +3 -3
  268. package/dist/src/{util-5cB-L7U3.js → util-BLvy9qfE.js} +7 -11
  269. package/dist/src/{util-YT5HPZaS.js → util-Bm3E9jpK.js} +7 -11
  270. package/dist/src/{util-6-GqIvzS.js → util-BtoGs5Cb.js} +18 -4
  271. package/dist/src/{util-Db0a0AFH.cjs → util-CFj4YKIn.cjs} +18 -4
  272. package/dist/src/{util-Dlz_Wvgm.js → util-CMMkIxfU.js} +7 -11
  273. package/dist/src/{util-Betm42rL.js → util-CgDCK4KI.js} +18 -4
  274. package/dist/src/{util-Yz-1aEhW.cjs → util-CuLo2pMR.cjs} +7 -11
  275. package/dist/src/{util-C-PPYSMq.js → util-DM2rTn_6.js} +18 -4
  276. package/dist/src/{util-B7T3SiBS.js → util-DMFeUvLz.js} +3 -3
  277. package/dist/src/{util-ZZH-3QZz.js → util-DbVG-yZU.js} +3 -3
  278. package/dist/src/{util-DaWTWKBK.js → util-vNmDL5DT.js} +3 -3
  279. package/dist/src/{utils-XiOAgly5.js → utils-CFxO9KGo.js} +2 -2
  280. package/dist/src/{utils-f2-Moju7.js → utils-DEuL4VNB.js} +2 -2
  281. package/dist/src/{utils-Cz9qXqII.cjs → utils-DKw8mrgr.cjs} +3 -3
  282. package/dist/src/{utils-dLokC-eR.js → utils-DOjD4dTC.js} +2 -2
  283. package/dist/tsconfig.tsbuildinfo +1 -1
  284. package/package.json +38 -38
  285. package/dist/src/app/assets/index-BFCZg7hQ.js +0 -439
  286. package/dist/src/app/assets/index-NCn4eVBv.css +0 -1
  287. package/dist/src/app/assets/sync-9qqYcY-B.js +0 -4
  288. package/dist/src/app/assets/vendor-charts-CCl15Imd.js +0 -36
  289. package/dist/src/app/assets/vendor-markdown-0tekx3KX.js +0 -29
  290. package/dist/src/cache-Bbn1Nyrd.cjs +0 -5
  291. package/dist/src/cache-BwsMSda7.js +0 -6
  292. package/dist/src/cloud-DmE0EwsY.js +0 -4
  293. package/dist/src/eval-17JizQIv.js +0 -15
  294. package/dist/src/evalResult-Cqj8pldJ.js +0 -12
  295. package/dist/src/evalResult-DvcJAWJU.cjs +0 -10
  296. package/dist/src/evalResult-Hftn-S_i.js +0 -10
  297. package/dist/src/evaluator-B2CFNt-P.js +0 -36
  298. package/dist/src/fetch-KV5kNASw.js +0 -5
  299. package/dist/src/graders-Bu0H9nXi.js +0 -32
  300. package/dist/src/graders-Cfhkvx-e.js +0 -34
  301. package/dist/src/graders-DClJVpGP.cjs +0 -32
  302. package/dist/src/graders-DcnJsrMO.js +0 -32
  303. package/dist/src/providers-C1rOSHiR.js +0 -32
  304. package/dist/src/providers-CxmDwEFf.cjs +0 -31
  305. package/dist/src/providers-Dodakqr0.js +0 -30
  306. package/dist/src/providers-GIQ2TcsA.js +0 -30
  307. package/dist/src/rubyUtils-BUHu6PhO.js +0 -5
  308. package/dist/src/rubyUtils-CP42kMvq.cjs +0 -4
  309. package/dist/src/server-B1vi21hA.js +0 -7
  310. package/dist/src/server-Cm9Kai_h.cjs +0 -5
  311. package/dist/src/store-BNmZ1KAz.cjs +0 -5
  312. package/dist/src/store-BltJg2cd.js +0 -6
  313. package/dist/src/telemetry-5BCRNBbe.cjs +0 -5
  314. package/dist/src/telemetry-D4W5hboe.js +0 -7
  315. package/dist/src/transform-DTGDnAzW.js +0 -6
  316. package/dist/src/transform-m3qNw4KP.cjs +0 -5
@@ -1,25 +1,25 @@
1
1
  #!/usr/bin/env node
2
- import { C as getEnvString, E as isCI, O as state, b as getEnvBool, d as extractFirstJsonObject, f as extractJsonObjects, g as safeJsonStringify, o as logger } from "./logger-KkObSCzq.js";
3
- import { f as REQUEST_TIMEOUT_MS, i as fetchWithTimeout, l as sleep, n as fetchWithProxy } from "./fetch-BMv0O527.js";
2
+ import { C as getEnvBool, E as getEnvString, h as extractJsonObjects, j as state, k as isCI, m as extractFirstJsonObject, s as logger, y as safeJsonStringify } from "./logger-BcJBzSSA.js";
3
+ import { f as REQUEST_TIMEOUT_MS, i as fetchWithTimeout, l as sleep, n as fetchWithProxy } from "./fetch-DoVRJZhJ.js";
4
4
  import { t as invariant } from "./invariant-BtWWVVhl.js";
5
- import { o as getUserEmail } from "./accounts-CMqkzrVf.js";
6
- import { C as PolicyObjectSchema, M as MULTI_TURN_STRATEGIES, T as isValidReusablePolicyId, _ as PromptSchema, tt as LLAMA_GUARD_REPLICATE_PROVIDER } from "./types-CH3Ge2sE.js";
7
- import { l as getRemoteGenerationUrl, m as shouldGenerateRemote } from "./server-DbFphssR.js";
8
- import { $ as AzureEmbeddingProvider, B as OpenAiModerationProvider, C as extractVariablesFromJson, D as isEmptyResponse, E as isBasicRefusal, H as MistralEmbeddingProvider, J as DefaultGradingProvider$2, O as removePrefix, P as redteamProviderManager, Q as AzureModerationProvider, S as extractPromptFromTags, T as getShortPluginId, U as DefaultEmbeddingProvider$2, V as MistralChatCompletionProvider, W as DefaultGradingProvider$3, X as DefaultSuggestionsProvider$2, Y as DefaultLlmRubricProvider, Z as DefaultSynthesizeProvider$1, b as extractGoalFromPrompt, dt as getPoliciesFromCloud, et as AzureChatCompletionProvider, n as loadApiProvider, nt as parseScriptParts, q as DefaultGradingJsonProvider$2, tt as getFileHashes, v as checkExfilTracking, x as extractInputVarsFromPrompt, y as extractAllPromptsFromTags, z as REDTEAM_MEMORY_POISONING_PLUGIN_ID } from "./providers-CFLy1_ji.js";
9
- import { a as fetchWithCache, o as getCache, s as isCacheEnabled } from "./cache-CG0SlR1d.js";
5
+ import { o as getUserEmail } from "./accounts-CFLK3mnD.js";
6
+ import { C as PolicyObjectSchema, M as MULTI_TURN_STRATEGIES, T as isValidReusablePolicyId, _ as PromptSchema, tt as LLAMA_GUARD_REPLICATE_PROVIDER } from "./types-CIhFeUC4.js";
7
+ import { l as getRemoteGenerationUrl, m as shouldGenerateRemote } from "./server-DZ9MtCn0.js";
8
+ import { $ as AzureEmbeddingProvider, B as OpenAiModerationProvider, C as extractVariablesFromJson, D as isEmptyResponse, E as isBasicRefusal, H as MistralEmbeddingProvider, J as DefaultGradingProvider$2, O as removePrefix, P as redteamProviderManager, Q as AzureModerationProvider, S as extractPromptFromTags, T as getShortPluginId, U as DefaultEmbeddingProvider$2, V as MistralChatCompletionProvider, W as DefaultGradingProvider$3, X as DefaultSuggestionsProvider$2, Y as DefaultLlmRubricProvider, Z as DefaultSynthesizeProvider$1, b as extractGoalFromPrompt, dt as getPoliciesFromCloud, et as AzureChatCompletionProvider, n as loadApiProvider, nt as parseScriptParts, q as DefaultGradingJsonProvider$2, tt as getFileHashes, v as checkExfilTracking, x as extractInputVarsFromPrompt, y as extractAllPromptsFromTags, z as REDTEAM_MEMORY_POISONING_PLUGIN_ID } from "./providers-Ch6Mr0gn.js";
9
+ import { a as fetchWithCache, o as getCache, s as isCacheEnabled } from "./cache-DSqR6ezl.js";
10
10
  import { i as isJavascriptFile } from "./fileExtensions-Ds-foDzt.js";
11
- import { E as parseFileUrl, S as extractVariablesFromTemplate, _ as parsePathOrGlob, f as maybeLoadConfigFromExternalFile, g as maybeLoadToolsFromExternalFile, p as maybeLoadFromExternalFile, u as getNunjucksEngineForFilePath, w as getNunjucksEngine } from "./util-YT5HPZaS.js";
12
- import { r as importModule } from "./esm-C03C-mv3.js";
13
- import { r as runPython } from "./pythonUtils-C3py6GC1.js";
14
- import { n as transform } from "./transform-DilY9wbS.js";
15
- import { a as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-f2-Moju7.js";
11
+ import { E as parseFileUrl, S as extractVariablesFromTemplate, _ as parsePathOrGlob, f as maybeLoadConfigFromExternalFile, g as maybeLoadToolsFromExternalFile, p as maybeLoadFromExternalFile, u as getNunjucksEngineForFilePath, w as getNunjucksEngine } from "./util-Bm3E9jpK.js";
12
+ import { r as importModule } from "./esm-7UIl0pPM.js";
13
+ import { r as runPython } from "./pythonUtils-wIqk7zAf.js";
14
+ import { n as transform } from "./transform-ljLYHEPh.js";
15
+ import { a as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-DEuL4VNB.js";
16
16
  import { n as sha256 } from "./createHash-Da8fMwqB.js";
17
- import { t as OpenAiChatCompletionProvider } from "./chat-CRWNNq73.js";
17
+ import { t as OpenAiChatCompletionProvider } from "./chat-BEwdgGEg.js";
18
18
  import { r as accumulateTokenUsage } from "./tokenUsageUtils-DflFMjS0.js";
19
- import { v as hasGoogleDefaultCredentials } from "./transform-Cgi24fJ7.js";
20
- import { t as AnthropicMessagesProvider } from "./messages-1x9atZmP.js";
21
- import { t as OpenAiResponsesProvider } from "./responses-CGw0DCzh.js";
22
- import { n as OpenAiEmbeddingProvider } from "./completion-C5rtR_9P.js";
19
+ import { b as hasGoogleDefaultCredentials } from "./transform-DrleutM3.js";
20
+ import { t as AnthropicMessagesProvider } from "./messages-DJNo37Ko.js";
21
+ import { t as OpenAiResponsesProvider } from "./responses-BKP_WYis.js";
22
+ import { n as OpenAiEmbeddingProvider } from "./completion-DoYy49ti.js";
23
23
  import z$1 from "zod";
24
24
  import * as fs$2 from "fs";
25
25
  import fs from "fs";
@@ -312,6 +312,37 @@ const DEFAULT_WEB_SEARCH_PROMPT = JSON.stringify([{
312
312
  role: "user",
313
313
  content: "<Output>\n{{ output }}\n</Output>\n<Rubric>\n{{ rubric }}\n</Rubric>"
314
314
  }]);
315
+ const TRAJECTORY_GOAL_SUCCESS_PROMPT = JSON.stringify([{
316
+ role: "system",
317
+ content: dedent`You are grading whether an AI agent successfully completed a goal based on its final output and a summarized execution trajectory. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
318
+
319
+ Judge end-to-end success, not stylistic perfection.
320
+ Use the trajectory as evidence for what the agent actually did.
321
+ Give partial credit when the agent made progress but did not fully achieve the goal.
322
+
323
+ Examples:
324
+
325
+ <Goal>Find the order status and tell the user whether it has shipped</Goal>
326
+ <Trajectory>{"stepCount":2,"steps":[{"index":1,"type":"tool","name":"search_orders"},{"index":2,"type":"message","name":"agent response"}]}</Trajectory>
327
+ <Output>Your order shipped yesterday and should arrive on Tuesday.</Output>
328
+ {"reason":"The agent used the order lookup tool and gave the user the shipping status, so the goal was achieved.","pass":true,"score":1.0}
329
+
330
+ <Goal>Find the order status and tell the user whether it has shipped</Goal>
331
+ <Trajectory>{"stepCount":1,"steps":[{"index":1,"type":"message","name":"agent response"}]}</Trajectory>
332
+ <Output>I cannot check your order right now.</Output>
333
+ {"reason":"The agent did not show evidence of checking the order and did not provide the requested status.","pass":false,"score":0.0}`
334
+ }, {
335
+ role: "user",
336
+ content: dedent`<Goal>
337
+ {{ goal }}
338
+ </Goal>
339
+ <Trajectory>
340
+ {{ trajectory }}
341
+ </Trajectory>
342
+ <Output>
343
+ {{ output }}
344
+ </Output>`
345
+ }]);
315
346
  //#endregion
316
347
  //#region src/prompts/processors/csv.ts
317
348
  /**
@@ -1519,45 +1550,31 @@ async function renderLlmRubricPrompt(rubricPrompt, context) {
1519
1550
  } catch {}
1520
1551
  return nunjucks.renderString(rubricPrompt, processedContext);
1521
1552
  }
1522
- async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
1523
- if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1524
- if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
1525
- ...await doRemoteGrading({
1526
- task: "llm-rubric",
1527
- rubric,
1528
- output: llmOutput,
1529
- vars: vars || {}
1530
- }),
1531
- assertion
1532
- };
1533
- const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, DEFAULT_GRADING_PROMPT), {
1534
- output: tryParse(llmOutput),
1535
- rubric,
1536
- ...vars || {}
1537
- });
1538
- const defaultProviders = await getDefaultProviders();
1539
- const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
1540
- const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, "llm-rubric check"), prompt, "llm-rubric", {
1541
- output: tryParse(llmOutput),
1542
- rubric,
1543
- ...vars || {}
1544
- }, providerCallContext);
1545
- if (resp.error || !resp.output) {
1546
- if (options?.throwOnError) throw new LlmRubricProviderError(resp.error || "No output");
1547
- return fail(resp.error || "No output", resp.tokenUsage);
1548
- }
1553
+ function parseJsonGradingResponse(label, resp) {
1549
1554
  let jsonObjects = [];
1550
1555
  if (typeof resp.output === "string") try {
1551
1556
  jsonObjects = extractJsonObjects(resp.output);
1552
- if (jsonObjects.length === 0) return fail("Could not extract JSON from llm-rubric response", resp.tokenUsage);
1557
+ if (jsonObjects.length === 0) return { failure: fail(`Could not extract JSON from ${label} response`, resp.tokenUsage) };
1553
1558
  } catch (err) {
1554
- return fail(`llm-rubric produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage);
1559
+ return { failure: fail(`${label} produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage) };
1555
1560
  }
1556
1561
  else if (typeof resp.output === "object") jsonObjects = [resp.output];
1557
- else return fail(`llm-rubric produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1558
- if (!Array.isArray(jsonObjects) || jsonObjects.length === 0) return fail(`llm-rubric produced malformed response - We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1562
+ else return { failure: fail(`${label} produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
1559
1563
  const parsed = jsonObjects[0];
1560
- if (typeof parsed !== "object" || parsed === null || parsed === void 0) return fail(`llm-rubric produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
1564
+ if (typeof parsed !== "object" || parsed === null || parsed === void 0) return { failure: fail(`${label} produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
1565
+ return { parsed };
1566
+ }
1567
+ async function runJsonGradingPrompt({ assertion, checkName, defaultPrompt, grading, label, providerCallContext, throwOnError, vars }) {
1568
+ const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading.rubricPrompt, defaultPrompt), vars);
1569
+ const defaultProviders = await getDefaultProviders();
1570
+ const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
1571
+ const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, checkName), prompt, label, vars, providerCallContext);
1572
+ if (resp.error || !resp.output) {
1573
+ if (throwOnError) throw new Error(resp.error || "No output");
1574
+ return fail(resp.error || "No output", resp.tokenUsage);
1575
+ }
1576
+ const { parsed, failure } = parseJsonGradingResponse(label, resp);
1577
+ if (!parsed) return failure;
1561
1578
  let pass = parsed.pass ?? true;
1562
1579
  if (typeof pass !== "boolean") pass = /^(true|yes|pass|y)$/i.test(String(pass));
1563
1580
  let score = parsed.score;
@@ -1585,6 +1602,54 @@ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, opt
1585
1602
  metadata: { renderedGradingPrompt: prompt }
1586
1603
  };
1587
1604
  }
1605
+ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
1606
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1607
+ if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
1608
+ ...await doRemoteGrading({
1609
+ task: "llm-rubric",
1610
+ rubric,
1611
+ output: llmOutput,
1612
+ vars: vars || {}
1613
+ }),
1614
+ assertion
1615
+ };
1616
+ try {
1617
+ return await runJsonGradingPrompt({
1618
+ assertion,
1619
+ checkName: "llm-rubric check",
1620
+ defaultPrompt: DEFAULT_GRADING_PROMPT,
1621
+ grading,
1622
+ label: "llm-rubric",
1623
+ providerCallContext,
1624
+ throwOnError: options?.throwOnError,
1625
+ vars: {
1626
+ output: tryParse(llmOutput),
1627
+ rubric,
1628
+ ...vars || {}
1629
+ }
1630
+ });
1631
+ } catch (error) {
1632
+ if (options?.throwOnError) throw new LlmRubricProviderError(error.message || "No output");
1633
+ throw error;
1634
+ }
1635
+ }
1636
+ async function matchesTrajectoryGoalSuccess(goal, trajectory, llmOutput, grading, vars, assertion, providerCallContext) {
1637
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
1638
+ return runJsonGradingPrompt({
1639
+ assertion,
1640
+ checkName: "trajectory:goal-success check",
1641
+ defaultPrompt: TRAJECTORY_GOAL_SUCCESS_PROMPT,
1642
+ grading,
1643
+ label: "trajectory:goal-success",
1644
+ providerCallContext,
1645
+ vars: {
1646
+ ...vars || {},
1647
+ goal,
1648
+ output: tryParse(llmOutput),
1649
+ trajectory
1650
+ }
1651
+ });
1652
+ }
1588
1653
  async function matchesPiScore(renderedValue, llmInput, llmOutput, assertion) {
1589
1654
  return {
1590
1655
  ...await doRemoteScoringWithPi({
@@ -2092,7 +2157,7 @@ async function selectMaxScore(outputs, resultsWithGradingResults, assertion) {
2092
2157
  let totalWeight = 0;
2093
2158
  relevantResults.forEach((componentResult) => {
2094
2159
  const assertionType = componentResult.assertion?.type || "unknown";
2095
- const weight = options.weights[assertionType] !== void 0 ? options.weights[assertionType] : 1;
2160
+ const weight = options.weights[assertionType] === void 0 ? 1 : options.weights[assertionType];
2096
2161
  const score = componentResult.score || 0;
2097
2162
  totalWeightedScore += score * weight;
2098
2163
  totalWeight += weight;
@@ -2342,9 +2407,9 @@ async function fetchHuggingFaceDataset(datasetPath, limit) {
2342
2407
  while (true) {
2343
2408
  const requestParams = new URLSearchParams(queryParams);
2344
2409
  requestParams.set("offset", offset.toString());
2345
- const remainingUserLimit = userLimit !== void 0 ? Math.max(userLimit - offset, 0) : void 0;
2346
- const remainingDatasetRows = totalRows !== void 0 ? Math.max(totalRows - offset, 0) : void 0;
2347
- const requestedLength = remainingUserLimit !== void 0 ? Math.min(pageSize, remainingUserLimit) : remainingDatasetRows !== void 0 ? Math.min(pageSize, remainingDatasetRows) : pageSize;
2410
+ const remainingUserLimit = userLimit === void 0 ? void 0 : Math.max(userLimit - offset, 0);
2411
+ const remainingDatasetRows = totalRows === void 0 ? void 0 : Math.max(totalRows - offset, 0);
2412
+ const requestedLength = remainingUserLimit === void 0 ? remainingDatasetRows === void 0 ? pageSize : Math.min(pageSize, remainingDatasetRows) : Math.min(pageSize, remainingUserLimit);
2348
2413
  if (requestedLength <= 0) {
2349
2414
  logger.debug(`[HF Dataset] No remaining rows to fetch for ${owner}/${repo} (offset ${offset})`);
2350
2415
  break;
@@ -13462,6 +13527,6 @@ function getGraderById(id) {
13462
13527
  return grader;
13463
13528
  }
13464
13529
  //#endregion
13465
- export { matchesSearchRubric as $, BeavertailsPlugin as A, getAndCheckProvider as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, retryWithDeduplication as F, matchesContextFaithfulness as G, matchesAnswerRelevance as H, sampleArray as I, matchesFactuality as J, matchesContextRecall as K, fetchHuggingFaceDataset as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, getCustomPolicies as P, matchesPiScore as Q, callProviderWithContext as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesClassification as U, loadRubricPrompt as V, matchesClosedQa as W, matchesLlmRubric as X, matchesGEval as Y, matchesModeration as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, DefaultSuggestionsProvider as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, readProviderPromptMap as ct, RbacPlugin as d, getFinalTest as dt, matchesSelectBest as et, PromptExtractionPlugin as f, loadFromJavaScriptFile as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, VLGuardPlugin as i, getDefaultProviders as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, SUGGEST_PROMPTS_SYSTEM_MESSAGE as lt, PolicyPlugin as m, resolveContext as mt, getGraderById as n, selectMaxScore as nt, UnsafeBenchPlugin as o, processPrompts as ot, PoliticsPlugin as p, processFileReference as pt, matchesContextRelevance as q, VLSUPlugin as r, doRemoteGrading as rt, ToxicChatPlugin as s, readPrompts as st, GRADERS as t, matchesSimilarity as tt, ShellInjectionPlugin as u, coerceString as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, fail as z };
13530
+ export { matchesSearchRubric as $, BeavertailsPlugin as A, getAndCheckProvider as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, retryWithDeduplication as F, matchesContextFaithfulness as G, matchesAnswerRelevance as H, sampleArray as I, matchesFactuality as J, matchesContextRecall as K, fetchHuggingFaceDataset as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, getCustomPolicies as P, matchesPiScore as Q, callProviderWithContext as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesClassification as U, loadRubricPrompt as V, matchesClosedQa as W, matchesLlmRubric as X, matchesGEval as Y, matchesModeration as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, getDefaultProviders as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, readPrompts as ct, RbacPlugin as d, coerceString as dt, matchesSelectBest as et, PromptExtractionPlugin as f, getFinalTest as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, resolveContext as ht, VLGuardPlugin as i, doRemoteGrading as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, readProviderPromptMap as lt, PolicyPlugin as m, processFileReference as mt, getGraderById as n, matchesTrajectoryGoalSuccess as nt, UnsafeBenchPlugin as o, DefaultSuggestionsProvider as ot, PoliticsPlugin as p, loadFromJavaScriptFile as pt, matchesContextRelevance as q, VLSUPlugin as r, selectMaxScore as rt, ToxicChatPlugin as s, processPrompts as st, GRADERS as t, matchesSimilarity as tt, ShellInjectionPlugin as u, SUGGEST_PROMPTS_SYSTEM_MESSAGE as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, fail as z };
13466
13531
 
13467
- //# sourceMappingURL=graders-CHO8EPM4.js.map
13532
+ //# sourceMappingURL=graders-RjHF8VfG.js.map
@@ -0,0 +1,32 @@
1
+ import "./logger-DO8_zM18.js";
2
+ import "./esm-SUNIX1x3.js";
3
+ import "./pythonUtils-Cpo0Ez1p.js";
4
+ import "./transform-BqPkNPYm.js";
5
+ import { n as getGraderById } from "./graders-DG7mhg-b.js";
6
+ import "./types-q8GXGF65.js";
7
+ import "./util-CMMkIxfU.js";
8
+ import "./fetch-CVAtKnI3.js";
9
+ import "./cache-CaT5tPgo.js";
10
+ import "./providers-Cn73d5sr.js";
11
+ import "./utils-CFxO9KGo.js";
12
+ import "./genaiTracer-D3fD9dNV.js";
13
+ import "./chat-pxmiVpWe.js";
14
+ import "./transform-DyDAwEpE.js";
15
+ import "./messages-Dy9QecMs.js";
16
+ import "./util-vNmDL5DT.js";
17
+ import "./responses-CQb1Tj69.js";
18
+ import "./openai-BcB5KlTk.js";
19
+ import "./util-CgDCK4KI.js";
20
+ import "./completion-DCjv7RZ3.js";
21
+ import "./accounts-B2XmGjty.js";
22
+ import "./server-CP9qKM40.js";
23
+ import "./blobs-DXTl6J3H.js";
24
+ import "./tables-kC7R5kiK.js";
25
+ import "./extractor-D_wd8jxt.js";
26
+ import "./telemetry-BugWqKiu.js";
27
+ import "./store-Cj258DgL.js";
28
+ import "./base-CqzQ4K8j.js";
29
+ import "./image-CoxZp9PZ.js";
30
+ import "./providerRegistry-CUWki5mQ.js";
31
+ import "./rubyUtils-PgU-gHmx.js";
32
+ export { getGraderById };
@@ -1,8 +1,8 @@
1
- const require_logger = require("./logger-Cp1GPUjj.cjs");
2
- const require_fetch = require("./fetch-BxUk8odA.cjs");
3
- const require_cache = require("./cache-COish3-W.cjs");
4
- const require_openai = require("./openai-Cuif0GEt.cjs");
5
- const require_util = require("./util-Db0a0AFH.cjs");
1
+ const require_logger = require("./logger-D5iKBpu_.cjs");
2
+ const require_fetch = require("./fetch-BnR9wSnm.cjs");
3
+ const require_cache = require("./cache-C5yFZ4gC.cjs");
4
+ const require_openai = require("./openai-CoxGAQwn.cjs");
5
+ const require_util = require("./util-CFj4YKIn.cjs");
6
6
  const require_text = require("./text-CW1cyrwj.cjs");
7
7
  //#region src/providers/openai/image.ts
8
8
  const DALLE2_VALID_SIZES = [
@@ -277,4 +277,4 @@ Object.defineProperty(exports, "formatOutput", {
277
277
  }
278
278
  });
279
279
 
280
- //# sourceMappingURL=image-DTedmQPg.cjs.map
280
+ //# sourceMappingURL=image--F58eEIn.cjs.map
@@ -1,6 +1,6 @@
1
- import { b as getEnvString, i as logger } from "./logger-CT3IKMKA.js";
2
- import { p as REQUEST_TIMEOUT_MS } from "./fetch-60Gzydls.js";
3
- import { r as fetchWithCache } from "./cache-8XhNqPKW.js";
1
+ import { C as getEnvString, a as logger } from "./logger-BnkjG2jt.js";
2
+ import { p as REQUEST_TIMEOUT_MS } from "./fetch-BiYv2BZc.js";
3
+ import { r as fetchWithCache } from "./cache-HP0NP4k3.js";
4
4
  //#region src/providers/hyperbolic/image.ts
5
5
  const HYPERBOLIC_IMAGE_MODELS = [
6
6
  {
@@ -160,4 +160,4 @@ function createHyperbolicImageProvider(providerPath, options = {}) {
160
160
  //#endregion
161
161
  export { createHyperbolicImageProvider };
162
162
 
163
- //# sourceMappingURL=image-DJEvKveK.js.map
163
+ //# sourceMappingURL=image-6WQXK8m8.js.map
@@ -1,8 +1,8 @@
1
- import { i as logger } from "./logger-DLcq4dWf.js";
2
- import { p as REQUEST_TIMEOUT_MS } from "./fetch-4M3YRaqL.js";
3
- import { r as fetchWithCache } from "./cache-D3eqDYGU.js";
4
- import { t as OpenAiGenericProvider } from "./openai-j-sE2O7r.js";
5
- import { s as formatOpenAiError } from "./util-6-GqIvzS.js";
1
+ import { a as logger } from "./logger-BnkjG2jt.js";
2
+ import { p as REQUEST_TIMEOUT_MS } from "./fetch-BiYv2BZc.js";
3
+ import { r as fetchWithCache } from "./cache-HP0NP4k3.js";
4
+ import { t as OpenAiGenericProvider } from "./openai-D6wITiVn.js";
5
+ import { s as formatOpenAiError } from "./util-BtoGs5Cb.js";
6
6
  import { t as ellipsize } from "./text-TIv0QYnd.js";
7
7
  //#region src/providers/openai/image.ts
8
8
  const DALLE2_VALID_SIZES = [
@@ -254,4 +254,4 @@ var OpenAiImageProvider = class extends OpenAiGenericProvider {
254
254
  //#endregion
255
255
  export { formatOutput as i, buildStructuredImageOutputs as n, callOpenAiImageApi as r, OpenAiImageProvider as t };
256
256
 
257
- //# sourceMappingURL=image-pAX56tPG.js.map
257
+ //# sourceMappingURL=image-B8b6f36E.js.map
@@ -1,8 +1,8 @@
1
- import { i as logger } from "./logger-CT3IKMKA.js";
2
- import { p as REQUEST_TIMEOUT_MS } from "./fetch-60Gzydls.js";
3
- import { r as fetchWithCache } from "./cache-8XhNqPKW.js";
4
- import { t as OpenAiGenericProvider } from "./openai-DElQ-fPX.js";
5
- import { s as formatOpenAiError } from "./util-Betm42rL.js";
1
+ import { a as logger } from "./logger-DO8_zM18.js";
2
+ import { p as REQUEST_TIMEOUT_MS } from "./fetch-CVAtKnI3.js";
3
+ import { r as fetchWithCache } from "./cache-CaT5tPgo.js";
4
+ import { t as OpenAiGenericProvider } from "./openai-BcB5KlTk.js";
5
+ import { s as formatOpenAiError } from "./util-CgDCK4KI.js";
6
6
  import { t as ellipsize } from "./text-B_UCRPp2.js";
7
7
  //#region src/providers/openai/image.ts
8
8
  const DALLE2_VALID_SIZES = [
@@ -254,4 +254,4 @@ var OpenAiImageProvider = class extends OpenAiGenericProvider {
254
254
  //#endregion
255
255
  export { formatOutput as i, buildStructuredImageOutputs as n, callOpenAiImageApi as r, OpenAiImageProvider as t };
256
256
 
257
- //# sourceMappingURL=image-BmEZqVmk.js.map
257
+ //# sourceMappingURL=image-CoxZp9PZ.js.map
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env node
2
- import { C as getEnvString, o as logger } from "./logger-KkObSCzq.js";
3
- import { f as REQUEST_TIMEOUT_MS } from "./fetch-BMv0O527.js";
4
- import "./cloud-Bc9526yV.js";
5
- import { a as fetchWithCache } from "./cache-CG0SlR1d.js";
2
+ import { E as getEnvString, s as logger } from "./logger-BcJBzSSA.js";
3
+ import { f as REQUEST_TIMEOUT_MS } from "./fetch-DoVRJZhJ.js";
4
+ import "./cloud-z8KZpUoa.js";
5
+ import { a as fetchWithCache } from "./cache-DSqR6ezl.js";
6
6
  //#region src/providers/hyperbolic/image.ts
7
7
  const HYPERBOLIC_IMAGE_MODELS = [
8
8
  {
@@ -162,4 +162,4 @@ function createHyperbolicImageProvider(providerPath, options = {}) {
162
162
  //#endregion
163
163
  export { createHyperbolicImageProvider };
164
164
 
165
- //# sourceMappingURL=image-gvmivTEe.js.map
165
+ //# sourceMappingURL=image-DO0RYnjH.js.map
@@ -1,9 +1,9 @@
1
1
  #!/usr/bin/env node
2
- import { o as logger } from "./logger-KkObSCzq.js";
3
- import { f as REQUEST_TIMEOUT_MS } from "./fetch-BMv0O527.js";
4
- import { a as fetchWithCache } from "./cache-CG0SlR1d.js";
5
- import { t as OpenAiGenericProvider } from "./openai-DhbB7eWK.js";
6
- import { s as formatOpenAiError } from "./util-C-PPYSMq.js";
2
+ import { s as logger } from "./logger-BcJBzSSA.js";
3
+ import { f as REQUEST_TIMEOUT_MS } from "./fetch-DoVRJZhJ.js";
4
+ import { a as fetchWithCache } from "./cache-DSqR6ezl.js";
5
+ import { t as OpenAiGenericProvider } from "./openai-BMcwgD5C.js";
6
+ import { s as formatOpenAiError } from "./util-DM2rTn_6.js";
7
7
  import { t as ellipsize } from "./text-Db-Wt2u2.js";
8
8
  //#region src/providers/openai/image.ts
9
9
  const DALLE2_VALID_SIZES = [
@@ -255,4 +255,4 @@ var OpenAiImageProvider = class extends OpenAiGenericProvider {
255
255
  //#endregion
256
256
  export { formatOutput as i, buildStructuredImageOutputs as n, callOpenAiImageApi as r, OpenAiImageProvider as t };
257
257
 
258
- //# sourceMappingURL=image-CBBVXWuT.js.map
258
+ //# sourceMappingURL=image-PoF6DN3x.js.map
@@ -1,6 +1,6 @@
1
- const require_logger = require("./logger-Cp1GPUjj.cjs");
2
- const require_fetch = require("./fetch-BxUk8odA.cjs");
3
- const require_cache = require("./cache-COish3-W.cjs");
1
+ const require_logger = require("./logger-D5iKBpu_.cjs");
2
+ const require_fetch = require("./fetch-BnR9wSnm.cjs");
3
+ const require_cache = require("./cache-C5yFZ4gC.cjs");
4
4
  //#region src/providers/hyperbolic/image.ts
5
5
  const HYPERBOLIC_IMAGE_MODELS = [
6
6
  {
@@ -160,4 +160,4 @@ function createHyperbolicImageProvider(providerPath, options = {}) {
160
160
  //#endregion
161
161
  exports.createHyperbolicImageProvider = createHyperbolicImageProvider;
162
162
 
163
- //# sourceMappingURL=image-CDLQOcqT.cjs.map
163
+ //# sourceMappingURL=image-fza3zuKs.cjs.map
@@ -1,6 +1,6 @@
1
- import { b as getEnvString, i as logger } from "./logger-DLcq4dWf.js";
2
- import { p as REQUEST_TIMEOUT_MS } from "./fetch-4M3YRaqL.js";
3
- import { r as fetchWithCache } from "./cache-D3eqDYGU.js";
1
+ import { C as getEnvString, a as logger } from "./logger-DO8_zM18.js";
2
+ import { p as REQUEST_TIMEOUT_MS } from "./fetch-CVAtKnI3.js";
3
+ import { r as fetchWithCache } from "./cache-CaT5tPgo.js";
4
4
  //#region src/providers/hyperbolic/image.ts
5
5
  const HYPERBOLIC_IMAGE_MODELS = [
6
6
  {
@@ -160,4 +160,4 @@ function createHyperbolicImageProvider(providerPath, options = {}) {
160
160
  //#endregion
161
161
  export { createHyperbolicImageProvider };
162
162
 
163
- //# sourceMappingURL=image-tL5hIOFh.js.map
163
+ //# sourceMappingURL=image-xNbw5ph2.js.map