promptfoo 0.121.1 → 0.121.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/src/{accounts-xrUGFA6n.js → accounts-B2XmGjty.js} +5 -5
- package/dist/src/{accounts-Bx-x3bmW.cjs → accounts-BPyfpSeU.cjs} +5 -5
- package/dist/src/{accounts-CMqkzrVf.js → accounts-CFLK3mnD.js} +6 -6
- package/dist/src/{accounts-BgNJDBE6.js → accounts-Xatc0RYb.js} +5 -5
- package/dist/src/{agentic-utils-BKIN5PKu.js → agentic-utils-36epdqwB.js} +3 -3
- package/dist/src/{cometapi-DkXrKi5z.js → agentic-utils-D8yXo5Lm.js} +4 -61
- package/dist/src/{cometapi-vY6aDZgo.cjs → agentic-utils-DAVsChuB.cjs} +24 -62
- package/dist/src/agentic-utils-DIYAAYE7.js +153 -0
- package/dist/src/{agents-C-dDThPK.js → agents-BBVJCIYr.js} +226 -13
- package/dist/src/{agents-CErsqg5U.cjs → agents-BBWxKSM0.cjs} +7 -7
- package/dist/src/{agents-Dy2YpZpa.js → agents-Bqgfdokm.js} +227 -14
- package/dist/src/{agents-B0f4HICh.cjs → agents-CAYbM7qD.cjs} +226 -13
- package/dist/src/{agents-CVIn-Utx.js → agents-CLQ-P15P.js} +7 -7
- package/dist/src/{agents-DeH4Gu94.js → agents-CgBniSlI.js} +8 -8
- package/dist/src/{agents-CXknwsFX.js → agents-DSSTV4bv.js} +226 -13
- package/dist/src/{agents-aF4-T121.js → agents-wg3ohknq.js} +7 -7
- package/dist/src/{aimlapi-tg0Gkcvr.cjs → aimlapi-Bv8Fmc-b.cjs} +14 -14
- package/dist/src/{aimlapi-BNfTBexL.js → aimlapi-BwGC1TtS.js} +13 -13
- package/dist/src/{aimlapi-BAGZDo5G.js → aimlapi-DaC3qZ-o.js} +14 -14
- package/dist/src/{aimlapi-DHRKlBEA.js → aimlapi-MgSLdvy7.js} +13 -13
- package/dist/src/app/assets/index-B6l9CVVb.js +439 -0
- package/dist/src/app/assets/index-DyZ0Ep37.css +1 -0
- package/dist/src/app/assets/sync-CStkzc6u.js +4 -0
- package/dist/src/app/assets/vendor-charts-BnDWwBlI.js +36 -0
- package/dist/src/app/assets/vendor-markdown-Bz7N-ca6.js +29 -0
- package/dist/src/app/index.html +4 -4
- package/dist/src/{audio-tf_NBjlC.js → audio-Bn44pQxv.js} +4 -4
- package/dist/src/{audio-CHQ4r-RV.js → audio-DDA5WHdx.js} +4 -4
- package/dist/src/{audio-BWeaWovU.cjs → audio-DVFjQ67_.cjs} +4 -4
- package/dist/src/{audio-BRODU0UK.js → audio-DjU9GswO.js} +5 -5
- package/dist/src/{base-DBtwl2FR.cjs → base-BboXIF_0.cjs} +3 -3
- package/dist/src/{base-B4QJRyFS.js → base-CKjwebIH.js} +3 -3
- package/dist/src/{base-B0tcrnq_.js → base-CqzQ4K8j.js} +3 -3
- package/dist/src/{base-fEDN28WM.js → base-Cz2ZC_iA.js} +3 -3
- package/dist/src/{blobs-BAU-dXan.js → blobs-B1JriOyi.js} +3 -3
- package/dist/src/{blobs-qTYm-1PY.js → blobs-BUWmKWzo.js} +3 -3
- package/dist/src/{blobs-DvS-O6be.cjs → blobs-C6j0bvFz.cjs} +3 -3
- package/dist/src/{blobs-Bpg5rH6i.js → blobs-DXTl6J3H.js} +3 -3
- package/dist/src/{cache-COish3-W.cjs → cache-C5yFZ4gC.cjs} +75 -58
- package/dist/src/{cache-8XhNqPKW.js → cache-CaT5tPgo.js} +75 -58
- package/dist/src/cache-CyCanoMu.js +6 -0
- package/dist/src/{cache-CG0SlR1d.js → cache-DSqR6ezl.js} +75 -58
- package/dist/src/cache-Df_QFDNu.cjs +5 -0
- package/dist/src/{cache-D3eqDYGU.js → cache-HP0NP4k3.js} +75 -58
- package/dist/src/{chat-DHMH-N64.js → chat-B-52XYI1.js} +12 -12
- package/dist/src/{chat-BKm79wib.js → chat-B0iaWhoh.js} +16 -14
- package/dist/src/{chat-DxysjBvt.js → chat-BE0qTA8e.js} +13 -13
- package/dist/src/{chat-CRWNNq73.js → chat-BEwdgGEg.js} +16 -14
- package/dist/src/{chat-2K608PeQ.cjs → chat-BtIKkLKx.cjs} +13 -13
- package/dist/src/{chat-DaqekjFr.cjs → chat-CM8qWR3_.cjs} +17 -15
- package/dist/src/{chat-CM_kyI8B.js → chat-DK1U-eZ-.js} +12 -12
- package/dist/src/{chat-CznLWr_D.js → chat-pxmiVpWe.js} +16 -14
- package/dist/src/{chatkit-65VXf5SR.js → chatkit-BYGQlHlV.js} +4 -4
- package/dist/src/{chatkit-DKyPi1Gs.cjs → chatkit-Cx174XI3.cjs} +4 -4
- package/dist/src/{chatkit-BxFvW8KY.js → chatkit-_8eJqKcD.js} +4 -4
- package/dist/src/{chatkit-Be-Q-a9F.js → chatkit-a2D6mY6s.js} +4 -4
- package/dist/src/{claude-agent-sdk-CJH22shf.cjs → claude-agent-sdk-8ddRp1L2.cjs} +35 -17
- package/dist/src/{claude-agent-sdk-Dy5lT-Tx.js → claude-agent-sdk-Bq5EArsX.js} +33 -15
- package/dist/src/{claude-agent-sdk-BLTu0WBO.js → claude-agent-sdk-CMjh4LFH.js} +33 -15
- package/dist/src/{claude-agent-sdk-D6_k9FKA.js → claude-agent-sdk-HgbFioFw.js} +33 -15
- package/dist/src/cloud-DE3t1-ZI.js +4 -0
- package/dist/src/{cloud-Bc9526yV.js → cloud-z8KZpUoa.js} +3 -3
- package/dist/src/{cloudflare-ai-CWWJCRim.js → cloudflare-ai-BGyXlpXJ.js} +13 -13
- package/dist/src/{cloudflare-ai-C9r2sRhw.js → cloudflare-ai-Bbp26N0L.js} +13 -13
- package/dist/src/{cloudflare-ai-ClWSdor4.cjs → cloudflare-ai-C62x6MQG.cjs} +14 -14
- package/dist/src/{cloudflare-ai-ICsOuD-z.js → cloudflare-ai-DdKP9TKT.js} +14 -14
- package/dist/src/{cloudflare-gateway-D6xFc5pa.js → cloudflare-gateway-BwAaUgeW.js} +14 -14
- package/dist/src/{cloudflare-gateway-D6O7AlYb.js → cloudflare-gateway-D-e9i1Sn.js} +15 -15
- package/dist/src/{cloudflare-gateway-pXGHxJ47.js → cloudflare-gateway-DXhtXDRb.js} +15 -163
- package/dist/src/{cloudflare-gateway-C2_-KG5o.cjs → cloudflare-gateway-Dx36ftqF.cjs} +15 -15
- package/dist/src/{codex-sdk-DUwKWezN.js → codex-sdk-BQEw16R_.js} +180 -11
- package/dist/src/{codex-sdk-C6UMlxwV.js → codex-sdk-C_07GuVS.js} +180 -11
- package/dist/src/{codex-sdk-GGAw0qbD.js → codex-sdk-DE5G18dx.js} +180 -11
- package/dist/src/{codex-sdk-fAO0c3yA.cjs → codex-sdk-ZLKfDjqP.cjs} +181 -12
- package/dist/src/cometapi-BDyV-NNm.js +62 -0
- package/dist/src/cometapi-C3hOlM7-.cjs +62 -0
- package/dist/src/{cometapi-Bbjp5V4x.js → cometapi-hhL4TAh3.js} +14 -14
- package/dist/src/{cometapi-BasUi7-_.js → cometapi-sp7sJpBD.js} +15 -15
- package/dist/src/{completion-C_P3ypkJ.js → completion-BCimtq-h.js} +6 -6
- package/dist/src/{completion-6Mx_iXxK.js → completion-DCjv7RZ3.js} +6 -6
- package/dist/src/{completion-CDOouNzq.cjs → completion-DlXUhj5c.cjs} +6 -6
- package/dist/src/{completion-C5rtR_9P.js → completion-DoYy49ti.js} +6 -6
- package/dist/src/{createHash-CfZSc0b4.cjs → createHash-BYwImsYv.cjs} +2 -2
- package/dist/src/{docker-BwsKwxFs.cjs → docker-Cqj2-QVi.cjs} +14 -14
- package/dist/src/{docker-CZnqU1XV.js → docker-CxCkwMzc.js} +13 -13
- package/dist/src/{docker-DzxyDPIj.js → docker-DpguQj-w.js} +14 -14
- package/dist/src/{docker-5KcG-_86.js → docker-FeBni2dw.js} +13 -13
- package/dist/src/{esm-C03C-mv3.js → esm-7UIl0pPM.js} +2 -2
- package/dist/src/{esm-Cd1AjG1D.js → esm-CKWP3u_P.js} +3 -3
- package/dist/src/{esm-CnNt7sI4.cjs → esm-CipptfDu.cjs} +2 -2
- package/dist/src/{esm-CaIwzWR5.js → esm-SUNIX1x3.js} +3 -3
- package/dist/src/eval-7aEqoMs3.js +15 -0
- package/dist/src/{eval-DmFyWU7i.js → eval-BTqTn7lb.js} +10 -10
- package/dist/src/{evalResult-CDQiuUuf.js → evalResult-BkIhRdTe.js} +7 -7
- package/dist/src/evalResult-CYNHkk5A.js +12 -0
- package/dist/src/evalResult-CuvJeNiM.js +10 -0
- package/dist/src/{evalResult-CTG2AHOS.js → evalResult-DUDShQrm.js} +7 -7
- package/dist/src/{evalResult-Dap2CekP.cjs → evalResult-DpARzUCb.cjs} +7 -7
- package/dist/src/evalResult-tGdilrWt.cjs +10 -0
- package/dist/src/evaluator-BBUqRhz1.js +36 -0
- package/dist/src/{evaluator-DPFRbFIL.js → evaluator-BcvOGaam.js} +833 -79
- package/dist/src/{extractor-YMU_Gvt8.js → extractor-C8XwivI9.js} +6 -6
- package/dist/src/{extractor-CFG6bcWJ.js → extractor-CAZ2G3Kh.js} +6 -6
- package/dist/src/{extractor-DX36oYEv.cjs → extractor-DG3sSfXE.cjs} +6 -6
- package/dist/src/{extractor-M67RUtg6.js → extractor-D_wd8jxt.js} +6 -6
- package/dist/src/{fetch-4M3YRaqL.js → fetch-BiYv2BZc.js} +3 -3
- package/dist/src/{fetch-BxUk8odA.cjs → fetch-BnR9wSnm.cjs} +3 -3
- package/dist/src/{fetch-60Gzydls.js → fetch-CVAtKnI3.js} +3 -3
- package/dist/src/{fetch-BMv0O527.js → fetch-DoVRJZhJ.js} +4 -4
- package/dist/src/fetch-UWU706qb.js +5 -0
- package/dist/src/{genaiTracer-DN4dQywX.cjs → genaiTracer-BfxrvSUb.cjs} +2 -2
- package/dist/src/{graders-DOXycdlG.cjs → graders-BElhu9ZY.cjs} +126 -55
- package/dist/src/{graders-R9rYUM0d.js → graders-BXAJ0sbS.js} +120 -55
- package/dist/src/graders-BxfEguVY.js +32 -0
- package/dist/src/graders-CzVMbEnv.js +34 -0
- package/dist/src/{graders-CpdqD9PI.js → graders-DG7mhg-b.js} +120 -55
- package/dist/src/graders-DjCXfj0l.cjs +32 -0
- package/dist/src/{graders-CHO8EPM4.js → graders-RjHF8VfG.js} +120 -55
- package/dist/src/graders-kHzIWOKu.js +32 -0
- package/dist/src/{image-DTedmQPg.cjs → image--F58eEIn.cjs} +6 -6
- package/dist/src/{image-DJEvKveK.js → image-6WQXK8m8.js} +4 -4
- package/dist/src/{image-pAX56tPG.js → image-B8b6f36E.js} +6 -6
- package/dist/src/{image-BmEZqVmk.js → image-CoxZp9PZ.js} +6 -6
- package/dist/src/{image-gvmivTEe.js → image-DO0RYnjH.js} +5 -5
- package/dist/src/{image-CBBVXWuT.js → image-PoF6DN3x.js} +6 -6
- package/dist/src/{image-CDLQOcqT.cjs → image-fza3zuKs.cjs} +4 -4
- package/dist/src/{image-tL5hIOFh.js → image-xNbw5ph2.js} +4 -4
- package/dist/src/index.cjs +863 -110
- package/dist/src/index.d.cts +833 -60
- package/dist/src/index.d.ts +833 -60
- package/dist/src/index.js +860 -108
- package/dist/src/{interactiveCheck-BgLZUIt3.js → interactiveCheck-BnMYOjMu.js} +2 -2
- package/dist/src/{knowledgeBase-CoU-UQBg.js → knowledgeBase-Bi7CmDbx.js} +7 -7
- package/dist/src/{knowledgeBase-CLJybhnF.js → knowledgeBase-Ce3ofVan.js} +8 -8
- package/dist/src/{knowledgeBase-DjWPVqSb.js → knowledgeBase-DFRXPZl_.js} +7 -7
- package/dist/src/{knowledgeBase-wkxuRFhA.cjs → knowledgeBase-DqrLX8fy.cjs} +7 -7
- package/dist/src/{litellm-B9Hysuri.js → litellm-Bo2gQXpo.js} +16 -15
- package/dist/src/{litellm-ePxtr9F1.js → litellm-CKiAxnoM.js} +15 -14
- package/dist/src/{litellm-NYpQ8RQu.cjs → litellm-CnHI69aj.cjs} +16 -15
- package/dist/src/{litellm-CTfa0hqi.js → litellm-Tc294Jhj.js} +15 -14
- package/dist/src/{logger-KkObSCzq.js → logger-BcJBzSSA.js} +10 -14
- package/dist/src/{logger-DLcq4dWf.js → logger-BnkjG2jt.js} +10 -14
- package/dist/src/{logger-Cp1GPUjj.cjs → logger-D5iKBpu_.cjs} +27 -13
- package/dist/src/{logger-CT3IKMKA.js → logger-DO8_zM18.js} +10 -14
- package/dist/src/{luma-ray-BW9IRGIc.js → luma-ray-0ehMPt5N.js} +10 -10
- package/dist/src/{luma-ray-BE2mOt6N.js → luma-ray-C9q8rdQe.js} +9 -9
- package/dist/src/{luma-ray-Cm1KZBhs.js → luma-ray-DP0QA9qn.js} +9 -9
- package/dist/src/{luma-ray-B0GGNRc1.cjs → luma-ray-m9Ku2meV.cjs} +9 -9
- package/dist/src/main.js +69 -71
- package/dist/src/{messages-1x9atZmP.js → messages-DJNo37Ko.js} +14 -9
- package/dist/src/{messages-BLbWdsyt.js → messages-Dy9QecMs.js} +14 -9
- package/dist/src/{messages-1JrJs91T.cjs → messages-HJsyEh4o.cjs} +15 -10
- package/dist/src/{messages-D8EA0oDc.js → messages-biC_ex-p.js} +14 -9
- package/dist/src/{modelslab-C1OLRmVX.js → modelslab-B5J-ZM5c.js} +9 -9
- package/dist/src/{modelslab-CqXBy3U8.js → modelslab-BI458moT.js} +10 -10
- package/dist/src/{modelslab-X5-4LroM.js → modelslab-BTOT8FUO.js} +9 -9
- package/dist/src/{modelslab-DcOSFwKh.cjs → modelslab-IQbNg-r7.cjs} +9 -9
- package/dist/src/{nova-reel-DihqLeol.js → nova-reel-BZ9y-Y5s.js} +9 -9
- package/dist/src/{nova-reel-D9xfaMBs.cjs → nova-reel-CE5etkv9.cjs} +9 -9
- package/dist/src/{nova-reel-D2ZkOSyr.js → nova-reel-DEeQlnOJ.js} +10 -10
- package/dist/src/{nova-reel-BgS1ZWuK.js → nova-reel-Xw1SXLpg.js} +9 -9
- package/dist/src/{nova-sonic-Q3BOJeig.js → nova-sonic-DWswpN1E.js} +7 -7
- package/dist/src/{nova-sonic-DezhVUYT.js → nova-sonic-DXTLpi-r.js} +6 -6
- package/dist/src/{nova-sonic-DVu3mMIy.cjs → nova-sonic-N0yCm0vb.cjs} +6 -6
- package/dist/src/{nova-sonic-P-CdUMlV.js → nova-sonic-Ogqf-csn.js} +6 -6
- package/dist/src/{openai-DhbB7eWK.js → openai-BMcwgD5C.js} +2 -2
- package/dist/src/{openai-j-sE2O7r.js → openai-BcB5KlTk.js} +2 -2
- package/dist/src/{openai-Cuif0GEt.cjs → openai-CoxGAQwn.cjs} +2 -2
- package/dist/src/{openai-DElQ-fPX.js → openai-D6wITiVn.js} +2 -2
- package/dist/src/{openclaw-Bv1DINsX.js → openclaw-0Sv7AK3O.js} +172 -109
- package/dist/src/{openclaw-DAfWQn-o.cjs → openclaw-CXxbKgDH.cjs} +174 -110
- package/dist/src/{openclaw-BiSZPL7J.js → openclaw-D1FSCps-.js} +172 -109
- package/dist/src/{openclaw-D1D_ej1z.js → openclaw-D2ENvu7a.js} +173 -110
- package/dist/src/{opencode-sdk-D95s6SnR.js → opencode-sdk-C71Z0ehR.js} +13 -13
- package/dist/src/{opencode-sdk-DxUPkLT7.js → opencode-sdk-CHCs7dEb.js} +12 -12
- package/dist/src/{opencode-sdk-C7m-wRfI.js → opencode-sdk-DDxj4QqH.js} +12 -12
- package/dist/src/{opencode-sdk-CfaLN8PY.cjs → opencode-sdk-WWJhnbKr.cjs} +16 -16
- package/dist/src/{otlpReceiver-g3ByGaXs.js → otlpReceiver-C9KlUtxh.js} +6 -6
- package/dist/src/{otlpReceiver--AIRW_S4.js → otlpReceiver-CZL48YfC.js} +6 -6
- package/dist/src/{otlpReceiver-Bn5wGB1v.js → otlpReceiver-CavGAA6k.js} +6 -6
- package/dist/src/{otlpReceiver-Diec4cln.cjs → otlpReceiver-DHKqJlsz.cjs} +6 -6
- package/dist/src/{providerRegistry-B0RUOLI_.js → providerRegistry-B9lh-_tx.js} +2 -2
- package/dist/src/{providerRegistry-Civky8Ar.cjs → providerRegistry-BTDgfV5h.cjs} +2 -2
- package/dist/src/{providerRegistry-CD8MEar9.js → providerRegistry-BkzVH5Ba.js} +2 -2
- package/dist/src/{providerRegistry-DM8rZYol.js → providerRegistry-CUWki5mQ.js} +2 -2
- package/dist/src/providers-BSLEaIQG.js +32 -0
- package/dist/src/{providers-CFu-TZl-.cjs → providers-CScd1wN6.cjs} +733 -464
- package/dist/src/{providers-CFLy1_ji.js → providers-Ch6Mr0gn.js} +795 -526
- package/dist/src/{providers-BKRJTjBz.js → providers-Cn73d5sr.js} +795 -526
- package/dist/src/providers-D-FnDg8k.cjs +31 -0
- package/dist/src/providers-DEYiFVAo.js +30 -0
- package/dist/src/{providers-B3HvufyI.js → providers-DvddrgxL.js} +795 -526
- package/dist/src/providers-sS2WI8YD.js +30 -0
- package/dist/src/{pythonUtils-D6fwaDSg.js → pythonUtils-Bzwbgpbg.js} +3 -3
- package/dist/src/{pythonUtils-D5nxkQ0P.js → pythonUtils-Cpo0Ez1p.js} +3 -3
- package/dist/src/{pythonUtils-CTU3Y3lw.cjs → pythonUtils-dAVigVK-.cjs} +3 -3
- package/dist/src/{pythonUtils-C3py6GC1.js → pythonUtils-wIqk7zAf.js} +3 -3
- package/dist/src/{quiverai-CI6gYJVI.js → quiverai-BeofbLVc.js} +4 -4
- package/dist/src/{quiverai-MHSxbmmZ.js → quiverai-CCQn73lq.js} +5 -5
- package/dist/src/{quiverai-CLkWkyZc.cjs → quiverai-CcUhPIBg.cjs} +4 -4
- package/dist/src/{quiverai-C2jVwbH1.js → quiverai-DVSEqJiq.js} +4 -4
- package/dist/src/{render-Drod8m7K.js → render-BHl6QVq9.js} +3 -3
- package/dist/src/{responses-CGw0DCzh.js → responses-BKP_WYis.js} +16 -12
- package/dist/src/{responses-BKqJmhhc.js → responses-CQb1Tj69.js} +16 -12
- package/dist/src/{responses-jxdehPkC.js → responses-CgNyTPsY.js} +16 -12
- package/dist/src/{responses-tD4Bd4dc.cjs → responses-mo0KQDbu.cjs} +16 -12
- package/dist/src/rubyUtils-B1HXG4ej.cjs +4 -0
- package/dist/src/{rubyUtils-DhCAlxZr.cjs → rubyUtils-CGeUtCfW.cjs} +3 -3
- package/dist/src/{rubyUtils-Boc4HZzX.js → rubyUtils-CiVfln3g.js} +3 -3
- package/dist/src/{rubyUtils-BcuGX77l.js → rubyUtils-DECSbsfY.js} +3 -3
- package/dist/src/{rubyUtils-BUVePouc.js → rubyUtils-PgU-gHmx.js} +3 -3
- package/dist/src/rubyUtils-Rt6pKA96.js +5 -0
- package/dist/src/{sagemaker-BK4Zb993.js → sagemaker-CVv8W7so.js} +17 -17
- package/dist/src/{sagemaker-D2Q1c-sD.js → sagemaker-CqeASYE5.js} +17 -17
- package/dist/src/{sagemaker-BfiWTmvn.js → sagemaker-MUbD5V3v.js} +18 -18
- package/dist/src/{sagemaker-CcQHM1jV.cjs → sagemaker-jiw1wQa-.cjs} +17 -17
- package/dist/src/{scanner-J8CA3LsV.js → scanner-DVDeUz1r.js} +10 -10
- package/dist/src/server/index.js +864 -112
- package/dist/src/server-B0Xh1Gx-.js +7 -0
- package/dist/src/{server-B0PPuDw-.cjs → server-BtoCXeXI.cjs} +4 -4
- package/dist/src/{server-BC7XJFgr.js → server-CP9qKM40.js} +4 -4
- package/dist/src/{server-OAs3nBRT.js → server-Cns05F1j.js} +5 -5
- package/dist/src/server-DJTKu9IR.cjs +5 -0
- package/dist/src/{server-DbFphssR.js → server-DZ9MtCn0.js} +6 -6
- package/dist/src/{signal-BOTbd53Z.js → signal-C3ZTsUgi.js} +3 -3
- package/dist/src/{slack-DXMKtA-f.js → slack-2sdpGzbt.js} +2 -2
- package/dist/src/{slack-BmVAVGaK.cjs → slack-94iG3T0s.cjs} +2 -2
- package/dist/src/{slack-DCUPTzS2.js → slack-BR0HtO3K.js} +2 -2
- package/dist/src/{slack-DOdy_kyv.js → slack-DCEV-vWP.js} +2 -2
- package/dist/src/store-C5u6MgC8.js +6 -0
- package/dist/src/{store-BSc-TF2w.cjs → store-CLyU7AtI.cjs} +17 -5
- package/dist/src/store-CNHk-De4.cjs +5 -0
- package/dist/src/{store-DQLEjuEO.js → store-Cj258DgL.js} +17 -5
- package/dist/src/{store-D1tv90v3.js → store-P8OKm19S.js} +17 -5
- package/dist/src/{store-Ub2vaGJ1.js → store-VB0GP46K.js} +17 -5
- package/dist/src/{tables-xKANLRBD.js → tables-BEIFz2tM.js} +3 -3
- package/dist/src/{tables-C7K-XKWp.cjs → tables-BdZQEpRz.cjs} +3 -3
- package/dist/src/{tables-D36WTqKX.js → tables-DmzvLbeZ.js} +3 -3
- package/dist/src/{tables-5EvT_Bwn.js → tables-kC7R5kiK.js} +3 -3
- package/dist/src/{telemetry-C2YDkUQH.js → telemetry-BnH5VJAU.js} +4 -4
- package/dist/src/{telemetry-C15ziL8u.js → telemetry-BugWqKiu.js} +4 -4
- package/dist/src/{telemetry-DMb2Mpfm.js → telemetry-DPXLd7UE.js} +4 -4
- package/dist/src/telemetry-Yig0Tino.js +7 -0
- package/dist/src/telemetry-p8Pwqm1i.cjs +5 -0
- package/dist/src/{telemetry-CbrnxHp_.cjs → telemetry-re627Lre.cjs} +4 -4
- package/dist/src/{transcription-CL78qbOU.cjs → transcription-BvtsrzRG.cjs} +13 -13
- package/dist/src/{transcription-DAtxHhAM.js → transcription-CaMivnjG.js} +13 -13
- package/dist/src/{transcription-QHh3AH6Z.js → transcription-DOMMTu01.js} +14 -14
- package/dist/src/{transcription-LNZTNUUL.js → transcription-Hb3VnC4M.js} +13 -13
- package/dist/src/{transform-DOcQeLld.cjs → transform-0BwoBsvO.cjs} +19 -5
- package/dist/src/{transform-DGxXocjk.js → transform-B2-jIv68.js} +8 -6
- package/dist/src/{transform-DECvGmzp.js → transform-BqPkNPYm.js} +4 -4
- package/dist/src/{transform-aa6tmVpZ.js → transform-BzK09Q_9.js} +4 -4
- package/dist/src/transform-ChNIpHz7.js +6 -0
- package/dist/src/{transform-Cgi24fJ7.js → transform-DrleutM3.js} +8 -6
- package/dist/src/{transform-DGLazrMm.js → transform-DyDAwEpE.js} +8 -6
- package/dist/src/transform-PtQ6rAE3.cjs +5 -0
- package/dist/src/{transform-CzK1Q0zl.cjs → transform-ZrG2dvlo.cjs} +4 -4
- package/dist/src/{transform-DilY9wbS.js → transform-ljLYHEPh.js} +4 -4
- package/dist/src/{transformersAvailability-CEVM2GNQ.js → transformersAvailability-BGkzavwb.js} +1 -1
- package/dist/src/{transformersAvailability-CwayUSlh.cjs → transformersAvailability-DKoRtQLy.cjs} +1 -1
- package/dist/src/{types-CH3Ge2sE.js → types-CIhFeUC4.js} +45 -11
- package/dist/src/{types-CN_TZ2GJ.js → types-Cd3ygw8W.js} +45 -11
- package/dist/src/{types-LJ0r3wbR.cjs → types-D8cGDZbL.cjs} +46 -12
- package/dist/src/{types-CLKiCBW3.js → types-q8GXGF65.js} +45 -11
- package/dist/src/{util-CchiqXh_.cjs → util--9u9UVCt.cjs} +3 -3
- package/dist/src/{util-5cB-L7U3.js → util-BLvy9qfE.js} +7 -11
- package/dist/src/{util-YT5HPZaS.js → util-Bm3E9jpK.js} +7 -11
- package/dist/src/{util-6-GqIvzS.js → util-BtoGs5Cb.js} +18 -4
- package/dist/src/{util-Db0a0AFH.cjs → util-CFj4YKIn.cjs} +18 -4
- package/dist/src/{util-Dlz_Wvgm.js → util-CMMkIxfU.js} +7 -11
- package/dist/src/{util-Betm42rL.js → util-CgDCK4KI.js} +18 -4
- package/dist/src/{util-Yz-1aEhW.cjs → util-CuLo2pMR.cjs} +7 -11
- package/dist/src/{util-C-PPYSMq.js → util-DM2rTn_6.js} +18 -4
- package/dist/src/{util-B7T3SiBS.js → util-DMFeUvLz.js} +3 -3
- package/dist/src/{util-ZZH-3QZz.js → util-DbVG-yZU.js} +3 -3
- package/dist/src/{util-DaWTWKBK.js → util-vNmDL5DT.js} +3 -3
- package/dist/src/{utils-XiOAgly5.js → utils-CFxO9KGo.js} +2 -2
- package/dist/src/{utils-f2-Moju7.js → utils-DEuL4VNB.js} +2 -2
- package/dist/src/{utils-Cz9qXqII.cjs → utils-DKw8mrgr.cjs} +3 -3
- package/dist/src/{utils-dLokC-eR.js → utils-DOjD4dTC.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +38 -38
- package/dist/src/app/assets/index-BFCZg7hQ.js +0 -439
- package/dist/src/app/assets/index-NCn4eVBv.css +0 -1
- package/dist/src/app/assets/sync-9qqYcY-B.js +0 -4
- package/dist/src/app/assets/vendor-charts-CCl15Imd.js +0 -36
- package/dist/src/app/assets/vendor-markdown-0tekx3KX.js +0 -29
- package/dist/src/cache-Bbn1Nyrd.cjs +0 -5
- package/dist/src/cache-BwsMSda7.js +0 -6
- package/dist/src/cloud-DmE0EwsY.js +0 -4
- package/dist/src/eval-17JizQIv.js +0 -15
- package/dist/src/evalResult-Cqj8pldJ.js +0 -12
- package/dist/src/evalResult-DvcJAWJU.cjs +0 -10
- package/dist/src/evalResult-Hftn-S_i.js +0 -10
- package/dist/src/evaluator-B2CFNt-P.js +0 -36
- package/dist/src/fetch-KV5kNASw.js +0 -5
- package/dist/src/graders-Bu0H9nXi.js +0 -32
- package/dist/src/graders-Cfhkvx-e.js +0 -34
- package/dist/src/graders-DClJVpGP.cjs +0 -32
- package/dist/src/graders-DcnJsrMO.js +0 -32
- package/dist/src/providers-C1rOSHiR.js +0 -32
- package/dist/src/providers-CxmDwEFf.cjs +0 -31
- package/dist/src/providers-Dodakqr0.js +0 -30
- package/dist/src/providers-GIQ2TcsA.js +0 -30
- package/dist/src/rubyUtils-BUHu6PhO.js +0 -5
- package/dist/src/rubyUtils-CP42kMvq.cjs +0 -4
- package/dist/src/server-B1vi21hA.js +0 -7
- package/dist/src/server-Cm9Kai_h.cjs +0 -5
- package/dist/src/store-BNmZ1KAz.cjs +0 -5
- package/dist/src/store-BltJg2cd.js +0 -6
- package/dist/src/telemetry-5BCRNBbe.cjs +0 -5
- package/dist/src/telemetry-D4W5hboe.js +0 -7
- package/dist/src/transform-DTGDnAzW.js +0 -6
- package/dist/src/transform-m3qNw4KP.cjs +0 -5
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
import { C as
|
|
2
|
-
import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-
|
|
1
|
+
import { C as getEnvString, D as state, E as isCI, _ as safeJsonStringify, a as logger, b as getEnvBool, f as extractFirstJsonObject, p as extractJsonObjects } from "./logger-BnkjG2jt.js";
|
|
2
|
+
import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-BiYv2BZc.js";
|
|
3
3
|
import { t as invariant } from "./invariant-vgHWClmd.js";
|
|
4
|
-
import { o as getUserEmail } from "./accounts-
|
|
5
|
-
import { c as getRemoteGenerationUrl, p as shouldGenerateRemote } from "./server-
|
|
6
|
-
import { r as importModule } from "./esm-
|
|
7
|
-
import { C as isValidReusablePolicyId, X as LLAMA_GUARD_REPLICATE_PROVIDER, k as MULTI_TURN_STRATEGIES, lt as PromptSchema, x as PolicyObjectSchema } from "./types-
|
|
4
|
+
import { o as getUserEmail } from "./accounts-Xatc0RYb.js";
|
|
5
|
+
import { c as getRemoteGenerationUrl, p as shouldGenerateRemote } from "./server-Cns05F1j.js";
|
|
6
|
+
import { r as importModule } from "./esm-CKWP3u_P.js";
|
|
7
|
+
import { C as isValidReusablePolicyId, X as LLAMA_GUARD_REPLICATE_PROVIDER, k as MULTI_TURN_STRATEGIES, lt as PromptSchema, x as PolicyObjectSchema } from "./types-Cd3ygw8W.js";
|
|
8
8
|
import { i as isJavascriptFile } from "./fileExtensions-LcDYkU4v.js";
|
|
9
9
|
import { n as sha256 } from "./createHash-CTQmL3G2.js";
|
|
10
|
-
import { i as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-
|
|
11
|
-
import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-
|
|
10
|
+
import { i as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-DOjD4dTC.js";
|
|
11
|
+
import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-HP0NP4k3.js";
|
|
12
12
|
import { r as accumulateTokenUsage } from "./tokenUsageUtils-BDGe-iyI.js";
|
|
13
|
-
import { $ as DefaultSynthesizeProvider$1, A as removePrefix, C as extractInputVarsFromPrompt, D as getShortPluginId, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, I as redteamProviderManager, K as DefaultGradingProvider$3, O as isBasicRefusal, Q as DefaultSuggestionsProvider$2, S as extractGoalFromPrompt, T as extractVariablesFromJson, U as MistralChatCompletionProvider, V as REDTEAM_MEMORY_POISONING_PLUGIN_ID, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, b as checkExfilTracking, ct as getPoliciesFromCloud, et as AzureModerationProvider, k as isEmptyResponse, n as loadApiProvider, nt as AzureChatCompletionProvider, o as getFileHashes, s as parseScriptParts, tt as AzureEmbeddingProvider, w as extractPromptFromTags, x as extractAllPromptsFromTags } from "./providers-
|
|
14
|
-
import { r as runPython } from "./pythonUtils-
|
|
15
|
-
import { I as getNunjucksEngine, O as maybeLoadToolsFromExternalFile, P as extractVariablesFromTemplate, R as parseFileUrl, S as getNunjucksEngineForFilePath, T as maybeLoadFromExternalFile, k as parsePathOrGlob, w as maybeLoadConfigFromExternalFile } from "./util-
|
|
16
|
-
import { t as OpenAiChatCompletionProvider } from "./chat-
|
|
17
|
-
import {
|
|
18
|
-
import { t as AnthropicMessagesProvider } from "./messages-
|
|
19
|
-
import { t as OpenAiResponsesProvider } from "./responses-
|
|
20
|
-
import { n as OpenAiEmbeddingProvider } from "./completion-
|
|
21
|
-
import { n as transform } from "./transform-
|
|
13
|
+
import { $ as DefaultSynthesizeProvider$1, A as removePrefix, C as extractInputVarsFromPrompt, D as getShortPluginId, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, I as redteamProviderManager, K as DefaultGradingProvider$3, O as isBasicRefusal, Q as DefaultSuggestionsProvider$2, S as extractGoalFromPrompt, T as extractVariablesFromJson, U as MistralChatCompletionProvider, V as REDTEAM_MEMORY_POISONING_PLUGIN_ID, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, b as checkExfilTracking, ct as getPoliciesFromCloud, et as AzureModerationProvider, k as isEmptyResponse, n as loadApiProvider, nt as AzureChatCompletionProvider, o as getFileHashes, s as parseScriptParts, tt as AzureEmbeddingProvider, w as extractPromptFromTags, x as extractAllPromptsFromTags } from "./providers-DvddrgxL.js";
|
|
14
|
+
import { r as runPython } from "./pythonUtils-Bzwbgpbg.js";
|
|
15
|
+
import { I as getNunjucksEngine, O as maybeLoadToolsFromExternalFile, P as extractVariablesFromTemplate, R as parseFileUrl, S as getNunjucksEngineForFilePath, T as maybeLoadFromExternalFile, k as parsePathOrGlob, w as maybeLoadConfigFromExternalFile } from "./util-BLvy9qfE.js";
|
|
16
|
+
import { t as OpenAiChatCompletionProvider } from "./chat-B0iaWhoh.js";
|
|
17
|
+
import { b as hasGoogleDefaultCredentials } from "./transform-B2-jIv68.js";
|
|
18
|
+
import { t as AnthropicMessagesProvider } from "./messages-biC_ex-p.js";
|
|
19
|
+
import { t as OpenAiResponsesProvider } from "./responses-CgNyTPsY.js";
|
|
20
|
+
import { n as OpenAiEmbeddingProvider } from "./completion-BCimtq-h.js";
|
|
21
|
+
import { n as transform } from "./transform-BzK09Q_9.js";
|
|
22
22
|
import * as fs$1 from "fs";
|
|
23
23
|
import fs from "fs";
|
|
24
24
|
import path from "path";
|
|
@@ -588,6 +588,37 @@ const DEFAULT_WEB_SEARCH_PROMPT = JSON.stringify([{
|
|
|
588
588
|
role: "user",
|
|
589
589
|
content: "<Output>\n{{ output }}\n</Output>\n<Rubric>\n{{ rubric }}\n</Rubric>"
|
|
590
590
|
}]);
|
|
591
|
+
const TRAJECTORY_GOAL_SUCCESS_PROMPT = JSON.stringify([{
|
|
592
|
+
role: "system",
|
|
593
|
+
content: dedent`You are grading whether an AI agent successfully completed a goal based on its final output and a summarized execution trajectory. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
|
|
594
|
+
|
|
595
|
+
Judge end-to-end success, not stylistic perfection.
|
|
596
|
+
Use the trajectory as evidence for what the agent actually did.
|
|
597
|
+
Give partial credit when the agent made progress but did not fully achieve the goal.
|
|
598
|
+
|
|
599
|
+
Examples:
|
|
600
|
+
|
|
601
|
+
<Goal>Find the order status and tell the user whether it has shipped</Goal>
|
|
602
|
+
<Trajectory>{"stepCount":2,"steps":[{"index":1,"type":"tool","name":"search_orders"},{"index":2,"type":"message","name":"agent response"}]}</Trajectory>
|
|
603
|
+
<Output>Your order shipped yesterday and should arrive on Tuesday.</Output>
|
|
604
|
+
{"reason":"The agent used the order lookup tool and gave the user the shipping status, so the goal was achieved.","pass":true,"score":1.0}
|
|
605
|
+
|
|
606
|
+
<Goal>Find the order status and tell the user whether it has shipped</Goal>
|
|
607
|
+
<Trajectory>{"stepCount":1,"steps":[{"index":1,"type":"message","name":"agent response"}]}</Trajectory>
|
|
608
|
+
<Output>I cannot check your order right now.</Output>
|
|
609
|
+
{"reason":"The agent did not show evidence of checking the order and did not provide the requested status.","pass":false,"score":0.0}`
|
|
610
|
+
}, {
|
|
611
|
+
role: "user",
|
|
612
|
+
content: dedent`<Goal>
|
|
613
|
+
{{ goal }}
|
|
614
|
+
</Goal>
|
|
615
|
+
<Trajectory>
|
|
616
|
+
{{ trajectory }}
|
|
617
|
+
</Trajectory>
|
|
618
|
+
<Output>
|
|
619
|
+
{{ output }}
|
|
620
|
+
</Output>`
|
|
621
|
+
}]);
|
|
591
622
|
//#endregion
|
|
592
623
|
//#region src/prompts/processors/csv.ts
|
|
593
624
|
/**
|
|
@@ -1578,45 +1609,31 @@ async function renderLlmRubricPrompt(rubricPrompt, context) {
|
|
|
1578
1609
|
} catch {}
|
|
1579
1610
|
return nunjucks.renderString(rubricPrompt, processedContext);
|
|
1580
1611
|
}
|
|
1581
|
-
|
|
1582
|
-
if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
|
|
1583
|
-
if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
|
|
1584
|
-
...await doRemoteGrading({
|
|
1585
|
-
task: "llm-rubric",
|
|
1586
|
-
rubric,
|
|
1587
|
-
output: llmOutput,
|
|
1588
|
-
vars: vars || {}
|
|
1589
|
-
}),
|
|
1590
|
-
assertion
|
|
1591
|
-
};
|
|
1592
|
-
const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, DEFAULT_GRADING_PROMPT), {
|
|
1593
|
-
output: tryParse(llmOutput),
|
|
1594
|
-
rubric,
|
|
1595
|
-
...vars || {}
|
|
1596
|
-
});
|
|
1597
|
-
const defaultProviders = await getDefaultProviders();
|
|
1598
|
-
const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
|
|
1599
|
-
const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, "llm-rubric check"), prompt, "llm-rubric", {
|
|
1600
|
-
output: tryParse(llmOutput),
|
|
1601
|
-
rubric,
|
|
1602
|
-
...vars || {}
|
|
1603
|
-
}, providerCallContext);
|
|
1604
|
-
if (resp.error || !resp.output) {
|
|
1605
|
-
if (options?.throwOnError) throw new LlmRubricProviderError(resp.error || "No output");
|
|
1606
|
-
return fail(resp.error || "No output", resp.tokenUsage);
|
|
1607
|
-
}
|
|
1612
|
+
function parseJsonGradingResponse(label, resp) {
|
|
1608
1613
|
let jsonObjects = [];
|
|
1609
1614
|
if (typeof resp.output === "string") try {
|
|
1610
1615
|
jsonObjects = extractJsonObjects(resp.output);
|
|
1611
|
-
if (jsonObjects.length === 0) return fail(
|
|
1616
|
+
if (jsonObjects.length === 0) return { failure: fail(`Could not extract JSON from ${label} response`, resp.tokenUsage) };
|
|
1612
1617
|
} catch (err) {
|
|
1613
|
-
return fail(
|
|
1618
|
+
return { failure: fail(`${label} produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage) };
|
|
1614
1619
|
}
|
|
1615
1620
|
else if (typeof resp.output === "object") jsonObjects = [resp.output];
|
|
1616
|
-
else return fail(
|
|
1617
|
-
if (!Array.isArray(jsonObjects) || jsonObjects.length === 0) return fail(`llm-rubric produced malformed response - We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
|
|
1621
|
+
else return { failure: fail(`${label} produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
|
|
1618
1622
|
const parsed = jsonObjects[0];
|
|
1619
|
-
if (typeof parsed !== "object" || parsed === null || parsed === void 0) return fail(
|
|
1623
|
+
if (typeof parsed !== "object" || parsed === null || parsed === void 0) return { failure: fail(`${label} produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
|
|
1624
|
+
return { parsed };
|
|
1625
|
+
}
|
|
1626
|
+
async function runJsonGradingPrompt({ assertion, checkName, defaultPrompt, grading, label, providerCallContext, throwOnError, vars }) {
|
|
1627
|
+
const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading.rubricPrompt, defaultPrompt), vars);
|
|
1628
|
+
const defaultProviders = await getDefaultProviders();
|
|
1629
|
+
const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
|
|
1630
|
+
const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, checkName), prompt, label, vars, providerCallContext);
|
|
1631
|
+
if (resp.error || !resp.output) {
|
|
1632
|
+
if (throwOnError) throw new Error(resp.error || "No output");
|
|
1633
|
+
return fail(resp.error || "No output", resp.tokenUsage);
|
|
1634
|
+
}
|
|
1635
|
+
const { parsed, failure } = parseJsonGradingResponse(label, resp);
|
|
1636
|
+
if (!parsed) return failure;
|
|
1620
1637
|
let pass = parsed.pass ?? true;
|
|
1621
1638
|
if (typeof pass !== "boolean") pass = /^(true|yes|pass|y)$/i.test(String(pass));
|
|
1622
1639
|
let score = parsed.score;
|
|
@@ -1644,6 +1661,54 @@ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, opt
|
|
|
1644
1661
|
metadata: { renderedGradingPrompt: prompt }
|
|
1645
1662
|
};
|
|
1646
1663
|
}
|
|
1664
|
+
async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
|
|
1665
|
+
if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
|
|
1666
|
+
if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
|
|
1667
|
+
...await doRemoteGrading({
|
|
1668
|
+
task: "llm-rubric",
|
|
1669
|
+
rubric,
|
|
1670
|
+
output: llmOutput,
|
|
1671
|
+
vars: vars || {}
|
|
1672
|
+
}),
|
|
1673
|
+
assertion
|
|
1674
|
+
};
|
|
1675
|
+
try {
|
|
1676
|
+
return await runJsonGradingPrompt({
|
|
1677
|
+
assertion,
|
|
1678
|
+
checkName: "llm-rubric check",
|
|
1679
|
+
defaultPrompt: DEFAULT_GRADING_PROMPT,
|
|
1680
|
+
grading,
|
|
1681
|
+
label: "llm-rubric",
|
|
1682
|
+
providerCallContext,
|
|
1683
|
+
throwOnError: options?.throwOnError,
|
|
1684
|
+
vars: {
|
|
1685
|
+
output: tryParse(llmOutput),
|
|
1686
|
+
rubric,
|
|
1687
|
+
...vars || {}
|
|
1688
|
+
}
|
|
1689
|
+
});
|
|
1690
|
+
} catch (error) {
|
|
1691
|
+
if (options?.throwOnError) throw new LlmRubricProviderError(error.message || "No output");
|
|
1692
|
+
throw error;
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
async function matchesTrajectoryGoalSuccess(goal, trajectory, llmOutput, grading, vars, assertion, providerCallContext) {
|
|
1696
|
+
if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
|
|
1697
|
+
return runJsonGradingPrompt({
|
|
1698
|
+
assertion,
|
|
1699
|
+
checkName: "trajectory:goal-success check",
|
|
1700
|
+
defaultPrompt: TRAJECTORY_GOAL_SUCCESS_PROMPT,
|
|
1701
|
+
grading,
|
|
1702
|
+
label: "trajectory:goal-success",
|
|
1703
|
+
providerCallContext,
|
|
1704
|
+
vars: {
|
|
1705
|
+
...vars || {},
|
|
1706
|
+
goal,
|
|
1707
|
+
output: tryParse(llmOutput),
|
|
1708
|
+
trajectory
|
|
1709
|
+
}
|
|
1710
|
+
});
|
|
1711
|
+
}
|
|
1647
1712
|
async function matchesPiScore(renderedValue, llmInput, llmOutput, assertion) {
|
|
1648
1713
|
return {
|
|
1649
1714
|
...await doRemoteScoringWithPi({
|
|
@@ -2151,7 +2216,7 @@ async function selectMaxScore(outputs, resultsWithGradingResults, assertion) {
|
|
|
2151
2216
|
let totalWeight = 0;
|
|
2152
2217
|
relevantResults.forEach((componentResult) => {
|
|
2153
2218
|
const assertionType = componentResult.assertion?.type || "unknown";
|
|
2154
|
-
const weight = options.weights[assertionType]
|
|
2219
|
+
const weight = options.weights[assertionType] === void 0 ? 1 : options.weights[assertionType];
|
|
2155
2220
|
const score = componentResult.score || 0;
|
|
2156
2221
|
totalWeightedScore += score * weight;
|
|
2157
2222
|
totalWeight += weight;
|
|
@@ -2401,9 +2466,9 @@ async function fetchHuggingFaceDataset(datasetPath, limit) {
|
|
|
2401
2466
|
while (true) {
|
|
2402
2467
|
const requestParams = new URLSearchParams(queryParams);
|
|
2403
2468
|
requestParams.set("offset", offset.toString());
|
|
2404
|
-
const remainingUserLimit = userLimit
|
|
2405
|
-
const remainingDatasetRows = totalRows
|
|
2406
|
-
const requestedLength = remainingUserLimit
|
|
2469
|
+
const remainingUserLimit = userLimit === void 0 ? void 0 : Math.max(userLimit - offset, 0);
|
|
2470
|
+
const remainingDatasetRows = totalRows === void 0 ? void 0 : Math.max(totalRows - offset, 0);
|
|
2471
|
+
const requestedLength = remainingUserLimit === void 0 ? remainingDatasetRows === void 0 ? pageSize : Math.min(pageSize, remainingDatasetRows) : Math.min(pageSize, remainingUserLimit);
|
|
2407
2472
|
if (requestedLength <= 0) {
|
|
2408
2473
|
logger.debug(`[HF Dataset] No remaining rows to fetch for ${owner}/${repo} (offset ${offset})`);
|
|
2409
2474
|
break;
|
|
@@ -13461,6 +13526,6 @@ function getGraderById(id) {
|
|
|
13461
13526
|
return grader;
|
|
13462
13527
|
}
|
|
13463
13528
|
//#endregion
|
|
13464
|
-
export {
|
|
13529
|
+
export { matchesTrajectoryGoalSuccess as $, BeavertailsPlugin as A, matchesClassification as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, callProviderWithContext as F, matchesFactuality as G, matchesContextFaithfulness as H, fail as I, matchesModeration as J, matchesGEval as K, getAndCheckProvider as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, fetchHuggingFaceDataset as P, matchesSimilarity as Q, loadRubricPrompt as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesContextRecall as U, matchesClosedQa as V, matchesContextRelevance as W, matchesSearchRubric as X, matchesPiScore as Y, matchesSelectBest as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, SUGGEST_PROMPTS_SYSTEM_MESSAGE as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, loadFromJavaScriptFile as ct, RbacPlugin as d, getCustomPolicies as dt, selectMaxScore as et, PromptExtractionPlugin as f, retryWithDeduplication as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, DefaultSuggestionsProvider as ht, VLGuardPlugin as i, readProviderPromptMap as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, processFileReference as lt, PolicyPlugin as m, getDefaultProviders as mt, getGraderById as n, processPrompts as nt, UnsafeBenchPlugin as o, coerceString as ot, PoliticsPlugin as p, sampleArray as pt, matchesLlmRubric as q, VLSUPlugin as r, readPrompts as rt, ToxicChatPlugin as s, getFinalTest as st, GRADERS as t, doRemoteGrading as tt, ShellInjectionPlugin as u, resolveContext as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, matchesAnswerRelevance as z };
|
|
13465
13530
|
|
|
13466
|
-
//# sourceMappingURL=graders-
|
|
13531
|
+
//# sourceMappingURL=graders-BXAJ0sbS.js.map
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import "./logger-BnkjG2jt.js";
|
|
2
|
+
import "./fetch-BiYv2BZc.js";
|
|
3
|
+
import "./accounts-Xatc0RYb.js";
|
|
4
|
+
import "./server-Cns05F1j.js";
|
|
5
|
+
import "./tables-BEIFz2tM.js";
|
|
6
|
+
import "./esm-CKWP3u_P.js";
|
|
7
|
+
import "./types-Cd3ygw8W.js";
|
|
8
|
+
import "./utils-DOjD4dTC.js";
|
|
9
|
+
import "./store-VB0GP46K.js";
|
|
10
|
+
import "./cache-HP0NP4k3.js";
|
|
11
|
+
import "./blobs-BUWmKWzo.js";
|
|
12
|
+
import "./extractor-C8XwivI9.js";
|
|
13
|
+
import "./providers-DvddrgxL.js";
|
|
14
|
+
import "./telemetry-DPXLd7UE.js";
|
|
15
|
+
import "./genaiTracer-70Z8BIuV.js";
|
|
16
|
+
import "./pythonUtils-Bzwbgpbg.js";
|
|
17
|
+
import "./util-BLvy9qfE.js";
|
|
18
|
+
import "./chat-B0iaWhoh.js";
|
|
19
|
+
import "./transform-B2-jIv68.js";
|
|
20
|
+
import "./messages-biC_ex-p.js";
|
|
21
|
+
import "./util-DbVG-yZU.js";
|
|
22
|
+
import { n as getGraderById } from "./graders-BXAJ0sbS.js";
|
|
23
|
+
import "./responses-CgNyTPsY.js";
|
|
24
|
+
import "./openai-D6wITiVn.js";
|
|
25
|
+
import "./util-BtoGs5Cb.js";
|
|
26
|
+
import "./completion-BCimtq-h.js";
|
|
27
|
+
import "./transform-BzK09Q_9.js";
|
|
28
|
+
import "./base-Cz2ZC_iA.js";
|
|
29
|
+
import "./image-B8b6f36E.js";
|
|
30
|
+
import "./providerRegistry-BkzVH5Ba.js";
|
|
31
|
+
import "./rubyUtils-DECSbsfY.js";
|
|
32
|
+
export { getGraderById };
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import "./logger-BcJBzSSA.js";
|
|
3
|
+
import "./fetch-DoVRJZhJ.js";
|
|
4
|
+
import "./accounts-CFLK3mnD.js";
|
|
5
|
+
import "./cloud-z8KZpUoa.js";
|
|
6
|
+
import "./telemetry-BnH5VJAU.js";
|
|
7
|
+
import "./types-CIhFeUC4.js";
|
|
8
|
+
import "./server-DZ9MtCn0.js";
|
|
9
|
+
import "./providers-Ch6Mr0gn.js";
|
|
10
|
+
import "./cache-DSqR6ezl.js";
|
|
11
|
+
import "./util-Bm3E9jpK.js";
|
|
12
|
+
import "./esm-7UIl0pPM.js";
|
|
13
|
+
import "./pythonUtils-wIqk7zAf.js";
|
|
14
|
+
import "./transform-ljLYHEPh.js";
|
|
15
|
+
import { n as getGraderById } from "./graders-RjHF8VfG.js";
|
|
16
|
+
import "./utils-DEuL4VNB.js";
|
|
17
|
+
import "./genaiTracer-C1rxGO8Q.js";
|
|
18
|
+
import "./chat-BEwdgGEg.js";
|
|
19
|
+
import "./transform-DrleutM3.js";
|
|
20
|
+
import "./messages-DJNo37Ko.js";
|
|
21
|
+
import "./util-DMFeUvLz.js";
|
|
22
|
+
import "./responses-BKP_WYis.js";
|
|
23
|
+
import "./openai-BMcwgD5C.js";
|
|
24
|
+
import "./util-DM2rTn_6.js";
|
|
25
|
+
import "./completion-DoYy49ti.js";
|
|
26
|
+
import "./blobs-B1JriOyi.js";
|
|
27
|
+
import "./tables-DmzvLbeZ.js";
|
|
28
|
+
import "./extractor-CAZ2G3Kh.js";
|
|
29
|
+
import "./store-P8OKm19S.js";
|
|
30
|
+
import "./base-CKjwebIH.js";
|
|
31
|
+
import "./image-PoF6DN3x.js";
|
|
32
|
+
import "./providerRegistry-B9lh-_tx.js";
|
|
33
|
+
import "./rubyUtils-CiVfln3g.js";
|
|
34
|
+
export { getGraderById };
|
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { C as getEnvString, D as state, E as isCI, _ as safeJsonStringify, a as logger, b as getEnvBool, f as extractFirstJsonObject, p as extractJsonObjects } from "./logger-DO8_zM18.js";
|
|
2
2
|
import { t as invariant } from "./invariant-Ddh24eXh.js";
|
|
3
|
-
import { r as importModule } from "./esm-
|
|
4
|
-
import { r as runPython } from "./pythonUtils-
|
|
3
|
+
import { r as importModule } from "./esm-SUNIX1x3.js";
|
|
4
|
+
import { r as runPython } from "./pythonUtils-Cpo0Ez1p.js";
|
|
5
5
|
import { i as isJavascriptFile } from "./fileExtensions-DnqA1y9x.js";
|
|
6
|
-
import { n as transform } from "./transform-
|
|
7
|
-
import { B as isValidReusablePolicyId, G as MULTI_TURN_STRATEGIES, R as PolicyObjectSchema, St as PromptSchema, ut as LLAMA_GUARD_REPLICATE_PROVIDER } from "./types-
|
|
8
|
-
import { _ as extractVariablesFromTemplate, a as getNunjucksEngineForFilePath, c as maybeLoadFromExternalFile, d as maybeLoadToolsFromExternalFile, f as parsePathOrGlob, s as maybeLoadConfigFromExternalFile, x as parseFileUrl, y as getNunjucksEngine } from "./util-
|
|
9
|
-
import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-
|
|
10
|
-
import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-
|
|
11
|
-
import { $ as DefaultSynthesizeProvider$1, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, K as DefaultGradingProvider$3, N as REDTEAM_MEMORY_POISONING_PLUGIN_ID, O as redteamProviderManager, Q as DefaultSuggestionsProvider$2, S as removePrefix, U as MistralChatCompletionProvider, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, _ as extractVariablesFromJson, b as isBasicRefusal, et as AzureModerationProvider, f as checkExfilTracking, g as extractPromptFromTags, h as extractInputVarsFromPrompt, it as parseScriptParts, m as extractGoalFromPrompt, n as loadApiProvider, nt as AzureChatCompletionProvider, p as extractAllPromptsFromTags, rt as getFileHashes, tt as AzureEmbeddingProvider, x as isEmptyResponse, y as getShortPluginId, z as getPoliciesFromCloud } from "./providers-
|
|
12
|
-
import { a as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-
|
|
6
|
+
import { n as transform } from "./transform-BqPkNPYm.js";
|
|
7
|
+
import { B as isValidReusablePolicyId, G as MULTI_TURN_STRATEGIES, R as PolicyObjectSchema, St as PromptSchema, ut as LLAMA_GUARD_REPLICATE_PROVIDER } from "./types-q8GXGF65.js";
|
|
8
|
+
import { _ as extractVariablesFromTemplate, a as getNunjucksEngineForFilePath, c as maybeLoadFromExternalFile, d as maybeLoadToolsFromExternalFile, f as parsePathOrGlob, s as maybeLoadConfigFromExternalFile, x as parseFileUrl, y as getNunjucksEngine } from "./util-CMMkIxfU.js";
|
|
9
|
+
import { d as sleep, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy } from "./fetch-CVAtKnI3.js";
|
|
10
|
+
import { a as isCacheEnabled, i as getCache, r as fetchWithCache } from "./cache-CaT5tPgo.js";
|
|
11
|
+
import { $ as DefaultSynthesizeProvider$1, G as DefaultEmbeddingProvider$2, H as OpenAiModerationProvider, K as DefaultGradingProvider$3, N as REDTEAM_MEMORY_POISONING_PLUGIN_ID, O as redteamProviderManager, Q as DefaultSuggestionsProvider$2, S as removePrefix, U as MistralChatCompletionProvider, W as MistralEmbeddingProvider, X as DefaultGradingProvider$2, Y as DefaultGradingJsonProvider$2, Z as DefaultLlmRubricProvider, _ as extractVariablesFromJson, b as isBasicRefusal, et as AzureModerationProvider, f as checkExfilTracking, g as extractPromptFromTags, h as extractInputVarsFromPrompt, it as parseScriptParts, m as extractGoalFromPrompt, n as loadApiProvider, nt as AzureChatCompletionProvider, p as extractAllPromptsFromTags, rt as getFileHashes, tt as AzureEmbeddingProvider, x as isEmptyResponse, y as getShortPluginId, z as getPoliciesFromCloud } from "./providers-Cn73d5sr.js";
|
|
12
|
+
import { a as PROMPT_DELIMITER, n as maybeFilePath, r as normalizeInput } from "./utils-CFxO9KGo.js";
|
|
13
13
|
import { n as sha256 } from "./createHash-DmPQkvBh.js";
|
|
14
|
-
import { t as OpenAiChatCompletionProvider } from "./chat-
|
|
14
|
+
import { t as OpenAiChatCompletionProvider } from "./chat-pxmiVpWe.js";
|
|
15
15
|
import { r as accumulateTokenUsage } from "./tokenUsageUtils-NYT-WKS6.js";
|
|
16
|
-
import {
|
|
17
|
-
import { t as AnthropicMessagesProvider } from "./messages-
|
|
18
|
-
import { t as OpenAiResponsesProvider } from "./responses-
|
|
19
|
-
import { n as OpenAiEmbeddingProvider } from "./completion-
|
|
20
|
-
import { i as getUserEmail } from "./accounts-
|
|
21
|
-
import { i as getRemoteGenerationUrl, l as shouldGenerateRemote } from "./server-
|
|
16
|
+
import { b as hasGoogleDefaultCredentials } from "./transform-DyDAwEpE.js";
|
|
17
|
+
import { t as AnthropicMessagesProvider } from "./messages-Dy9QecMs.js";
|
|
18
|
+
import { t as OpenAiResponsesProvider } from "./responses-CQb1Tj69.js";
|
|
19
|
+
import { n as OpenAiEmbeddingProvider } from "./completion-DCjv7RZ3.js";
|
|
20
|
+
import { i as getUserEmail } from "./accounts-B2XmGjty.js";
|
|
21
|
+
import { i as getRemoteGenerationUrl, l as shouldGenerateRemote } from "./server-CP9qKM40.js";
|
|
22
22
|
import * as fs$1 from "fs";
|
|
23
23
|
import fs from "fs";
|
|
24
24
|
import path from "path";
|
|
@@ -311,6 +311,37 @@ const DEFAULT_WEB_SEARCH_PROMPT = JSON.stringify([{
|
|
|
311
311
|
role: "user",
|
|
312
312
|
content: "<Output>\n{{ output }}\n</Output>\n<Rubric>\n{{ rubric }}\n</Rubric>"
|
|
313
313
|
}]);
|
|
314
|
+
const TRAJECTORY_GOAL_SUCCESS_PROMPT = JSON.stringify([{
|
|
315
|
+
role: "system",
|
|
316
|
+
content: dedent`You are grading whether an AI agent successfully completed a goal based on its final output and a summarized execution trajectory. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
|
|
317
|
+
|
|
318
|
+
Judge end-to-end success, not stylistic perfection.
|
|
319
|
+
Use the trajectory as evidence for what the agent actually did.
|
|
320
|
+
Give partial credit when the agent made progress but did not fully achieve the goal.
|
|
321
|
+
|
|
322
|
+
Examples:
|
|
323
|
+
|
|
324
|
+
<Goal>Find the order status and tell the user whether it has shipped</Goal>
|
|
325
|
+
<Trajectory>{"stepCount":2,"steps":[{"index":1,"type":"tool","name":"search_orders"},{"index":2,"type":"message","name":"agent response"}]}</Trajectory>
|
|
326
|
+
<Output>Your order shipped yesterday and should arrive on Tuesday.</Output>
|
|
327
|
+
{"reason":"The agent used the order lookup tool and gave the user the shipping status, so the goal was achieved.","pass":true,"score":1.0}
|
|
328
|
+
|
|
329
|
+
<Goal>Find the order status and tell the user whether it has shipped</Goal>
|
|
330
|
+
<Trajectory>{"stepCount":1,"steps":[{"index":1,"type":"message","name":"agent response"}]}</Trajectory>
|
|
331
|
+
<Output>I cannot check your order right now.</Output>
|
|
332
|
+
{"reason":"The agent did not show evidence of checking the order and did not provide the requested status.","pass":false,"score":0.0}`
|
|
333
|
+
}, {
|
|
334
|
+
role: "user",
|
|
335
|
+
content: dedent`<Goal>
|
|
336
|
+
{{ goal }}
|
|
337
|
+
</Goal>
|
|
338
|
+
<Trajectory>
|
|
339
|
+
{{ trajectory }}
|
|
340
|
+
</Trajectory>
|
|
341
|
+
<Output>
|
|
342
|
+
{{ output }}
|
|
343
|
+
</Output>`
|
|
344
|
+
}]);
|
|
314
345
|
//#endregion
|
|
315
346
|
//#region src/prompts/processors/csv.ts
|
|
316
347
|
/**
|
|
@@ -1518,45 +1549,31 @@ async function renderLlmRubricPrompt(rubricPrompt, context) {
|
|
|
1518
1549
|
} catch {}
|
|
1519
1550
|
return nunjucks.renderString(rubricPrompt, processedContext);
|
|
1520
1551
|
}
|
|
1521
|
-
|
|
1522
|
-
if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
|
|
1523
|
-
if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
|
|
1524
|
-
...await doRemoteGrading({
|
|
1525
|
-
task: "llm-rubric",
|
|
1526
|
-
rubric,
|
|
1527
|
-
output: llmOutput,
|
|
1528
|
-
vars: vars || {}
|
|
1529
|
-
}),
|
|
1530
|
-
assertion
|
|
1531
|
-
};
|
|
1532
|
-
const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, DEFAULT_GRADING_PROMPT), {
|
|
1533
|
-
output: tryParse(llmOutput),
|
|
1534
|
-
rubric,
|
|
1535
|
-
...vars || {}
|
|
1536
|
-
});
|
|
1537
|
-
const defaultProviders = await getDefaultProviders();
|
|
1538
|
-
const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
|
|
1539
|
-
const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, "llm-rubric check"), prompt, "llm-rubric", {
|
|
1540
|
-
output: tryParse(llmOutput),
|
|
1541
|
-
rubric,
|
|
1542
|
-
...vars || {}
|
|
1543
|
-
}, providerCallContext);
|
|
1544
|
-
if (resp.error || !resp.output) {
|
|
1545
|
-
if (options?.throwOnError) throw new LlmRubricProviderError(resp.error || "No output");
|
|
1546
|
-
return fail(resp.error || "No output", resp.tokenUsage);
|
|
1547
|
-
}
|
|
1552
|
+
function parseJsonGradingResponse(label, resp) {
|
|
1548
1553
|
let jsonObjects = [];
|
|
1549
1554
|
if (typeof resp.output === "string") try {
|
|
1550
1555
|
jsonObjects = extractJsonObjects(resp.output);
|
|
1551
|
-
if (jsonObjects.length === 0) return fail(
|
|
1556
|
+
if (jsonObjects.length === 0) return { failure: fail(`Could not extract JSON from ${label} response`, resp.tokenUsage) };
|
|
1552
1557
|
} catch (err) {
|
|
1553
|
-
return fail(
|
|
1558
|
+
return { failure: fail(`${label} produced malformed response: ${err}\n\n${resp.output}`, resp.tokenUsage) };
|
|
1554
1559
|
}
|
|
1555
1560
|
else if (typeof resp.output === "object") jsonObjects = [resp.output];
|
|
1556
|
-
else return fail(
|
|
1557
|
-
if (!Array.isArray(jsonObjects) || jsonObjects.length === 0) return fail(`llm-rubric produced malformed response - We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage);
|
|
1561
|
+
else return { failure: fail(`${label} produced malformed response - output must be string or object. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
|
|
1558
1562
|
const parsed = jsonObjects[0];
|
|
1559
|
-
if (typeof parsed !== "object" || parsed === null || parsed === void 0) return fail(
|
|
1563
|
+
if (typeof parsed !== "object" || parsed === null || parsed === void 0) return { failure: fail(`${label} produced malformed response. We were not able to parse the response as JSON. Output: ${JSON.stringify(resp.output)}`, resp.tokenUsage) };
|
|
1564
|
+
return { parsed };
|
|
1565
|
+
}
|
|
1566
|
+
async function runJsonGradingPrompt({ assertion, checkName, defaultPrompt, grading, label, providerCallContext, throwOnError, vars }) {
|
|
1567
|
+
const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading.rubricPrompt, defaultPrompt), vars);
|
|
1568
|
+
const defaultProviders = await getDefaultProviders();
|
|
1569
|
+
const defaultProvider = defaultProviders.llmRubricProvider || defaultProviders.gradingJsonProvider;
|
|
1570
|
+
const resp = await callProviderWithContext(await getAndCheckProvider("text", grading.provider, defaultProvider, checkName), prompt, label, vars, providerCallContext);
|
|
1571
|
+
if (resp.error || !resp.output) {
|
|
1572
|
+
if (throwOnError) throw new Error(resp.error || "No output");
|
|
1573
|
+
return fail(resp.error || "No output", resp.tokenUsage);
|
|
1574
|
+
}
|
|
1575
|
+
const { parsed, failure } = parseJsonGradingResponse(label, resp);
|
|
1576
|
+
if (!parsed) return failure;
|
|
1560
1577
|
let pass = parsed.pass ?? true;
|
|
1561
1578
|
if (typeof pass !== "boolean") pass = /^(true|yes|pass|y)$/i.test(String(pass));
|
|
1562
1579
|
let score = parsed.score;
|
|
@@ -1584,6 +1601,54 @@ async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, opt
|
|
|
1584
1601
|
metadata: { renderedGradingPrompt: prompt }
|
|
1585
1602
|
};
|
|
1586
1603
|
}
|
|
1604
|
+
async function matchesLlmRubric(rubric, llmOutput, grading, vars, assertion, options, providerCallContext) {
|
|
1605
|
+
if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
|
|
1606
|
+
if (!grading.rubricPrompt && !state.config?.redteam?.provider && state.config?.redteam && shouldGenerateRemote()) return {
|
|
1607
|
+
...await doRemoteGrading({
|
|
1608
|
+
task: "llm-rubric",
|
|
1609
|
+
rubric,
|
|
1610
|
+
output: llmOutput,
|
|
1611
|
+
vars: vars || {}
|
|
1612
|
+
}),
|
|
1613
|
+
assertion
|
|
1614
|
+
};
|
|
1615
|
+
try {
|
|
1616
|
+
return await runJsonGradingPrompt({
|
|
1617
|
+
assertion,
|
|
1618
|
+
checkName: "llm-rubric check",
|
|
1619
|
+
defaultPrompt: DEFAULT_GRADING_PROMPT,
|
|
1620
|
+
grading,
|
|
1621
|
+
label: "llm-rubric",
|
|
1622
|
+
providerCallContext,
|
|
1623
|
+
throwOnError: options?.throwOnError,
|
|
1624
|
+
vars: {
|
|
1625
|
+
output: tryParse(llmOutput),
|
|
1626
|
+
rubric,
|
|
1627
|
+
...vars || {}
|
|
1628
|
+
}
|
|
1629
|
+
});
|
|
1630
|
+
} catch (error) {
|
|
1631
|
+
if (options?.throwOnError) throw new LlmRubricProviderError(error.message || "No output");
|
|
1632
|
+
throw error;
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
async function matchesTrajectoryGoalSuccess(goal, trajectory, llmOutput, grading, vars, assertion, providerCallContext) {
|
|
1636
|
+
if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
|
|
1637
|
+
return runJsonGradingPrompt({
|
|
1638
|
+
assertion,
|
|
1639
|
+
checkName: "trajectory:goal-success check",
|
|
1640
|
+
defaultPrompt: TRAJECTORY_GOAL_SUCCESS_PROMPT,
|
|
1641
|
+
grading,
|
|
1642
|
+
label: "trajectory:goal-success",
|
|
1643
|
+
providerCallContext,
|
|
1644
|
+
vars: {
|
|
1645
|
+
...vars || {},
|
|
1646
|
+
goal,
|
|
1647
|
+
output: tryParse(llmOutput),
|
|
1648
|
+
trajectory
|
|
1649
|
+
}
|
|
1650
|
+
});
|
|
1651
|
+
}
|
|
1587
1652
|
async function matchesPiScore(renderedValue, llmInput, llmOutput, assertion) {
|
|
1588
1653
|
return {
|
|
1589
1654
|
...await doRemoteScoringWithPi({
|
|
@@ -2091,7 +2156,7 @@ async function selectMaxScore(outputs, resultsWithGradingResults, assertion) {
|
|
|
2091
2156
|
let totalWeight = 0;
|
|
2092
2157
|
relevantResults.forEach((componentResult) => {
|
|
2093
2158
|
const assertionType = componentResult.assertion?.type || "unknown";
|
|
2094
|
-
const weight = options.weights[assertionType]
|
|
2159
|
+
const weight = options.weights[assertionType] === void 0 ? 1 : options.weights[assertionType];
|
|
2095
2160
|
const score = componentResult.score || 0;
|
|
2096
2161
|
totalWeightedScore += score * weight;
|
|
2097
2162
|
totalWeight += weight;
|
|
@@ -2341,9 +2406,9 @@ async function fetchHuggingFaceDataset(datasetPath, limit) {
|
|
|
2341
2406
|
while (true) {
|
|
2342
2407
|
const requestParams = new URLSearchParams(queryParams);
|
|
2343
2408
|
requestParams.set("offset", offset.toString());
|
|
2344
|
-
const remainingUserLimit = userLimit
|
|
2345
|
-
const remainingDatasetRows = totalRows
|
|
2346
|
-
const requestedLength = remainingUserLimit
|
|
2409
|
+
const remainingUserLimit = userLimit === void 0 ? void 0 : Math.max(userLimit - offset, 0);
|
|
2410
|
+
const remainingDatasetRows = totalRows === void 0 ? void 0 : Math.max(totalRows - offset, 0);
|
|
2411
|
+
const requestedLength = remainingUserLimit === void 0 ? remainingDatasetRows === void 0 ? pageSize : Math.min(pageSize, remainingDatasetRows) : Math.min(pageSize, remainingUserLimit);
|
|
2347
2412
|
if (requestedLength <= 0) {
|
|
2348
2413
|
logger.debug(`[HF Dataset] No remaining rows to fetch for ${owner}/${repo} (offset ${offset})`);
|
|
2349
2414
|
break;
|
|
@@ -13461,6 +13526,6 @@ function getGraderById(id) {
|
|
|
13461
13526
|
return grader;
|
|
13462
13527
|
}
|
|
13463
13528
|
//#endregion
|
|
13464
|
-
export { matchesSearchRubric as $, BeavertailsPlugin as A, getAndCheckProvider as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, retryWithDeduplication as F, matchesContextFaithfulness as G, matchesAnswerRelevance as H, sampleArray as I, matchesFactuality as J, matchesContextRecall as K, fetchHuggingFaceDataset as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, getCustomPolicies as P, matchesPiScore as Q, callProviderWithContext as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesClassification as U, loadRubricPrompt as V, matchesClosedQa as W, matchesLlmRubric as X, matchesGEval as Y, matchesModeration as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a,
|
|
13529
|
+
export { matchesSearchRubric as $, BeavertailsPlugin as A, getAndCheckProvider as B, HarmbenchPlugin as C, DebugAccessPlugin as D, DivergentRepetitionPlugin as E, retryWithDeduplication as F, matchesContextFaithfulness as G, matchesAnswerRelevance as H, sampleArray as I, matchesFactuality as J, matchesContextRecall as K, fetchHuggingFaceDataset as L, RedteamGraderBase as M, RedteamPluginBase as N, CrossSessionLeakPlugin as O, getCustomPolicies as P, matchesPiScore as Q, callProviderWithContext as R, ImitationPlugin as S, ExcessiveAgencyPlugin as T, matchesClassification as U, loadRubricPrompt as V, matchesClosedQa as W, matchesLlmRubric as X, matchesGEval as Y, matchesModeration as Z, makeInlinePolicyIdSync as _, UnverifiableClaimsPlugin as a, DefaultSuggestionsProvider as at, OverreliancePlugin as b, ToolDiscoveryPlugin as c, readProviderPromptMap as ct, RbacPlugin as d, getFinalTest as dt, matchesSelectBest as et, PromptExtractionPlugin as f, loadFromJavaScriptFile as ft, isValidPolicyObject as g, determinePolicyTypeFromId as h, VLGuardPlugin as i, getDefaultProviders as it, AegisPlugin as j, ContractPlugin as k, SqlInjectionPlugin as l, SUGGEST_PROMPTS_SYSTEM_MESSAGE as lt, PolicyPlugin as m, resolveContext as mt, getGraderById as n, matchesTrajectoryGoalSuccess as nt, UnsafeBenchPlugin as o, processPrompts as ot, PoliticsPlugin as p, processFileReference as pt, matchesContextRelevance as q, VLSUPlugin as r, selectMaxScore as rt, ToxicChatPlugin as s, readPrompts as st, GRADERS as t, matchesSimilarity as tt, ShellInjectionPlugin as u, coerceString as ut, PlinyPlugin as v, HallucinationPlugin as w, IntentPlugin as x, getPiiLeakTestsForCategory as y, fail as z };
|
|
13465
13530
|
|
|
13466
|
-
//# sourceMappingURL=graders-
|
|
13531
|
+
//# sourceMappingURL=graders-DG7mhg-b.js.map
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require("./logger-D5iKBpu_.cjs");
|
|
2
|
+
require("./esm-CipptfDu.cjs");
|
|
3
|
+
require("./pythonUtils-dAVigVK-.cjs");
|
|
4
|
+
require("./transform-ZrG2dvlo.cjs");
|
|
5
|
+
const require_graders = require("./graders-BElhu9ZY.cjs");
|
|
6
|
+
require("./types-D8cGDZbL.cjs");
|
|
7
|
+
require("./util-CuLo2pMR.cjs");
|
|
8
|
+
require("./fetch-BnR9wSnm.cjs");
|
|
9
|
+
require("./cache-C5yFZ4gC.cjs");
|
|
10
|
+
require("./providers-CScd1wN6.cjs");
|
|
11
|
+
require("./utils-DKw8mrgr.cjs");
|
|
12
|
+
require("./genaiTracer-BfxrvSUb.cjs");
|
|
13
|
+
require("./chat-CM8qWR3_.cjs");
|
|
14
|
+
require("./transform-0BwoBsvO.cjs");
|
|
15
|
+
require("./messages-HJsyEh4o.cjs");
|
|
16
|
+
require("./util--9u9UVCt.cjs");
|
|
17
|
+
require("./responses-mo0KQDbu.cjs");
|
|
18
|
+
require("./openai-CoxGAQwn.cjs");
|
|
19
|
+
require("./util-CFj4YKIn.cjs");
|
|
20
|
+
require("./completion-DlXUhj5c.cjs");
|
|
21
|
+
require("./accounts-BPyfpSeU.cjs");
|
|
22
|
+
require("./server-BtoCXeXI.cjs");
|
|
23
|
+
require("./blobs-C6j0bvFz.cjs");
|
|
24
|
+
require("./tables-BdZQEpRz.cjs");
|
|
25
|
+
require("./extractor-DG3sSfXE.cjs");
|
|
26
|
+
require("./telemetry-re627Lre.cjs");
|
|
27
|
+
require("./store-CLyU7AtI.cjs");
|
|
28
|
+
require("./base-BboXIF_0.cjs");
|
|
29
|
+
require("./image--F58eEIn.cjs");
|
|
30
|
+
require("./providerRegistry-BTDgfV5h.cjs");
|
|
31
|
+
require("./rubyUtils-CGeUtCfW.cjs");
|
|
32
|
+
exports.getGraderById = require_graders.getGraderById;
|