promptfoo 0.121.2 → 0.121.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/src/{accounts-CiBLOnA7.js → accounts-B2XmGjty.js} +5 -5
- package/dist/src/{accounts-gtkH-5KX.cjs → accounts-BPyfpSeU.cjs} +5 -5
- package/dist/src/{accounts-Bm2D8Db9.js → accounts-CFLK3mnD.js} +6 -6
- package/dist/src/{accounts-B0pgC1oV.js → accounts-Xatc0RYb.js} +5 -5
- package/dist/src/{agentic-utils-DS1g3GLF.js → agentic-utils-36epdqwB.js} +3 -3
- package/dist/src/{cometapi-CUQq3H_a.js → agentic-utils-D8yXo5Lm.js} +4 -61
- package/dist/src/{cometapi-C4xSqeID.cjs → agentic-utils-DAVsChuB.cjs} +24 -62
- package/dist/src/agentic-utils-DIYAAYE7.js +153 -0
- package/dist/src/{agents-CBr9A01V.js → agents-BBVJCIYr.js} +226 -13
- package/dist/src/{agents-Di9DKPzn.cjs → agents-BBWxKSM0.cjs} +7 -7
- package/dist/src/{agents-DgF2zDag.js → agents-Bqgfdokm.js} +228 -13
- package/dist/src/{agents-DbRtpYxR.cjs → agents-CAYbM7qD.cjs} +226 -13
- package/dist/src/{agents-9qiOy0ho.js → agents-CLQ-P15P.js} +7 -7
- package/dist/src/{agents-cLXA8a_8.js → agents-CgBniSlI.js} +8 -8
- package/dist/src/{agents-D__IdAlg.js → agents-DSSTV4bv.js} +226 -15
- package/dist/src/{agents-CmvBq8LV.js → agents-wg3ohknq.js} +7 -7
- package/dist/src/{aimlapi-BvlNH0gr.cjs → aimlapi-Bv8Fmc-b.cjs} +14 -14
- package/dist/src/{aimlapi-DHJU_kcV.js → aimlapi-BwGC1TtS.js} +13 -13
- package/dist/src/{aimlapi-CnkC2HqE.js → aimlapi-DaC3qZ-o.js} +14 -14
- package/dist/src/{aimlapi-B4rcnZgv.js → aimlapi-MgSLdvy7.js} +13 -13
- package/dist/src/app/assets/index-B6l9CVVb.js +439 -0
- package/dist/src/app/assets/index-DyZ0Ep37.css +1 -0
- package/dist/src/app/assets/sync-CStkzc6u.js +4 -0
- package/dist/src/app/assets/vendor-markdown-Bz7N-ca6.js +29 -0
- package/dist/src/app/index.html +3 -3
- package/dist/src/{audio-Bkv46et0.js → audio-Bn44pQxv.js} +4 -4
- package/dist/src/{audio-ClI_AFre.js → audio-DDA5WHdx.js} +4 -4
- package/dist/src/{audio-CGMyULza.cjs → audio-DVFjQ67_.cjs} +4 -4
- package/dist/src/{audio-Dz3z7s3J.js → audio-DjU9GswO.js} +5 -5
- package/dist/src/{base-CGrhspbK.cjs → base-BboXIF_0.cjs} +3 -3
- package/dist/src/{base-Dy1V8--Z.js → base-CKjwebIH.js} +3 -3
- package/dist/src/{base-DLKtKMFh.js → base-CqzQ4K8j.js} +3 -3
- package/dist/src/{base-CpjcHe4e.js → base-Cz2ZC_iA.js} +3 -3
- package/dist/src/{blobs-CMHN0Qcz.js → blobs-B1JriOyi.js} +3 -3
- package/dist/src/{blobs-BDbfYdrJ.js → blobs-BUWmKWzo.js} +3 -3
- package/dist/src/{blobs-D23XLin-.cjs → blobs-C6j0bvFz.cjs} +3 -3
- package/dist/src/{blobs-CBO20krR.js → blobs-DXTl6J3H.js} +3 -3
- package/dist/src/{cache-Dh5WtQps.cjs → cache-C5yFZ4gC.cjs} +3 -3
- package/dist/src/{cache-C4Nxf52C.js → cache-CaT5tPgo.js} +3 -3
- package/dist/src/cache-CyCanoMu.js +6 -0
- package/dist/src/{cache-BVeDlD87.js → cache-DSqR6ezl.js} +3 -3
- package/dist/src/cache-Df_QFDNu.cjs +5 -0
- package/dist/src/{cache-i1P6crbO.js → cache-HP0NP4k3.js} +3 -3
- package/dist/src/{chat-CzkrVDfz.js → chat-B-52XYI1.js} +12 -12
- package/dist/src/{chat-DJIw17u0.js → chat-B0iaWhoh.js} +14 -14
- package/dist/src/{chat-qmatte1u.js → chat-BE0qTA8e.js} +13 -13
- package/dist/src/{chat-BiKyneZl.js → chat-BEwdgGEg.js} +14 -14
- package/dist/src/{chat-C1Qst7jL.cjs → chat-BtIKkLKx.cjs} +13 -13
- package/dist/src/{chat-CgF-J-Jj.cjs → chat-CM8qWR3_.cjs} +15 -15
- package/dist/src/{chat-C2jrdPMx.js → chat-DK1U-eZ-.js} +12 -12
- package/dist/src/{chat-DqxYYtWA.js → chat-pxmiVpWe.js} +14 -14
- package/dist/src/{chatkit-65VXf5SR.js → chatkit-BYGQlHlV.js} +4 -4
- package/dist/src/{chatkit-DKyPi1Gs.cjs → chatkit-Cx174XI3.cjs} +4 -4
- package/dist/src/{chatkit-BxFvW8KY.js → chatkit-_8eJqKcD.js} +4 -4
- package/dist/src/{chatkit-Be-Q-a9F.js → chatkit-a2D6mY6s.js} +4 -4
- package/dist/src/{claude-agent-sdk-D9Z5Pr9X.cjs → claude-agent-sdk-8ddRp1L2.cjs} +35 -17
- package/dist/src/{claude-agent-sdk-DfCoW0E6.js → claude-agent-sdk-Bq5EArsX.js} +33 -15
- package/dist/src/{claude-agent-sdk-Apiy0iaz.js → claude-agent-sdk-CMjh4LFH.js} +33 -15
- package/dist/src/{claude-agent-sdk-D2bJee9S.js → claude-agent-sdk-HgbFioFw.js} +33 -15
- package/dist/src/cloud-DE3t1-ZI.js +4 -0
- package/dist/src/{cloud-C0dlstV_.js → cloud-z8KZpUoa.js} +3 -3
- package/dist/src/{cloudflare-ai-g7PB6VHR.js → cloudflare-ai-BGyXlpXJ.js} +13 -13
- package/dist/src/{cloudflare-ai-8TDxHR0x.js → cloudflare-ai-Bbp26N0L.js} +13 -13
- package/dist/src/{cloudflare-ai-CknbZ5LJ.cjs → cloudflare-ai-C62x6MQG.cjs} +14 -14
- package/dist/src/{cloudflare-ai-BxAGvfju.js → cloudflare-ai-DdKP9TKT.js} +14 -14
- package/dist/src/{cloudflare-gateway-CP9QEWYS.js → cloudflare-gateway-BwAaUgeW.js} +14 -14
- package/dist/src/{cloudflare-gateway-B9HWA5wf.js → cloudflare-gateway-D-e9i1Sn.js} +15 -15
- package/dist/src/{cloudflare-gateway-CKDb4dJ8.js → cloudflare-gateway-DXhtXDRb.js} +15 -163
- package/dist/src/{cloudflare-gateway-BSnDmHYo.cjs → cloudflare-gateway-Dx36ftqF.cjs} +15 -15
- package/dist/src/{codex-sdk-DUwKWezN.js → codex-sdk-BQEw16R_.js} +180 -11
- package/dist/src/{codex-sdk-C6UMlxwV.js → codex-sdk-C_07GuVS.js} +180 -11
- package/dist/src/{codex-sdk-GGAw0qbD.js → codex-sdk-DE5G18dx.js} +180 -11
- package/dist/src/{codex-sdk-fAO0c3yA.cjs → codex-sdk-ZLKfDjqP.cjs} +181 -12
- package/dist/src/cometapi-BDyV-NNm.js +62 -0
- package/dist/src/cometapi-C3hOlM7-.cjs +62 -0
- package/dist/src/{cometapi-BL9yvj_f.js → cometapi-hhL4TAh3.js} +14 -14
- package/dist/src/{cometapi-DFNiKmSz.js → cometapi-sp7sJpBD.js} +15 -15
- package/dist/src/{completion-5MzrpJxT.js → completion-BCimtq-h.js} +6 -6
- package/dist/src/{completion-qRoZAYRB.js → completion-DCjv7RZ3.js} +6 -6
- package/dist/src/{completion-CM6oK8PS.cjs → completion-DlXUhj5c.cjs} +6 -6
- package/dist/src/{completion-DZ083F31.js → completion-DoYy49ti.js} +6 -6
- package/dist/src/{createHash-CfZSc0b4.cjs → createHash-BYwImsYv.cjs} +2 -2
- package/dist/src/{docker-DcF2pRrj.cjs → docker-Cqj2-QVi.cjs} +14 -14
- package/dist/src/{docker-Bb5dcxr8.js → docker-CxCkwMzc.js} +13 -13
- package/dist/src/{docker-BvfL2BrW.js → docker-DpguQj-w.js} +14 -14
- package/dist/src/{docker-ExVyLp0S.js → docker-FeBni2dw.js} +13 -13
- package/dist/src/{esm-C03C-mv3.js → esm-7UIl0pPM.js} +2 -2
- package/dist/src/{esm-Cd1AjG1D.js → esm-CKWP3u_P.js} +3 -3
- package/dist/src/{esm-CnNt7sI4.cjs → esm-CipptfDu.cjs} +2 -2
- package/dist/src/{esm-CaIwzWR5.js → esm-SUNIX1x3.js} +3 -3
- package/dist/src/eval-7aEqoMs3.js +15 -0
- package/dist/src/{eval-Dg2nG4v2.js → eval-BTqTn7lb.js} +10 -10
- package/dist/src/{evalResult-BDMqrapS.js → evalResult-BkIhRdTe.js} +7 -7
- package/dist/src/evalResult-CYNHkk5A.js +12 -0
- package/dist/src/evalResult-CuvJeNiM.js +10 -0
- package/dist/src/{evalResult-BBRNtX4I.js → evalResult-DUDShQrm.js} +7 -7
- package/dist/src/{evalResult-fuaI8HkH.cjs → evalResult-DpARzUCb.cjs} +7 -7
- package/dist/src/evalResult-tGdilrWt.cjs +10 -0
- package/dist/src/evaluator-BBUqRhz1.js +36 -0
- package/dist/src/{evaluator-BhoWwp5b.js → evaluator-BcvOGaam.js} +823 -73
- package/dist/src/{extractor-D25qpmGX.js → extractor-C8XwivI9.js} +6 -6
- package/dist/src/{extractor-DReVID0K.js → extractor-CAZ2G3Kh.js} +6 -6
- package/dist/src/{extractor-pYLLi3wS.cjs → extractor-DG3sSfXE.cjs} +6 -6
- package/dist/src/{extractor-C0EVHewb.js → extractor-D_wd8jxt.js} +6 -6
- package/dist/src/{fetch-HaqdX7U1.js → fetch-BiYv2BZc.js} +3 -3
- package/dist/src/{fetch-BPkYtG8K.cjs → fetch-BnR9wSnm.cjs} +3 -3
- package/dist/src/{fetch-Cwxnd8zz.js → fetch-CVAtKnI3.js} +3 -3
- package/dist/src/{fetch-Dxpd4_sr.js → fetch-DoVRJZhJ.js} +4 -4
- package/dist/src/fetch-UWU706qb.js +5 -0
- package/dist/src/{genaiTracer-DN4dQywX.cjs → genaiTracer-BfxrvSUb.cjs} +2 -2
- package/dist/src/{graders-DU49_J8Y.cjs → graders-BElhu9ZY.cjs} +126 -55
- package/dist/src/{graders-DP7KFFo-.js → graders-BXAJ0sbS.js} +120 -55
- package/dist/src/graders-BxfEguVY.js +32 -0
- package/dist/src/graders-CzVMbEnv.js +34 -0
- package/dist/src/{graders-BTeBGqjJ.js → graders-DG7mhg-b.js} +120 -55
- package/dist/src/graders-DjCXfj0l.cjs +32 -0
- package/dist/src/{graders-Bj_Odv7c.js → graders-RjHF8VfG.js} +120 -55
- package/dist/src/graders-kHzIWOKu.js +32 -0
- package/dist/src/{image-BLmROtN3.cjs → image--F58eEIn.cjs} +6 -6
- package/dist/src/{image-B0h9VEMc.js → image-6WQXK8m8.js} +4 -4
- package/dist/src/{image-Dpxa1Jt6.js → image-B8b6f36E.js} +6 -6
- package/dist/src/{image-CHfWvljl.js → image-CoxZp9PZ.js} +6 -6
- package/dist/src/{image-B02ogr_b.js → image-DO0RYnjH.js} +5 -5
- package/dist/src/{image-DS-o-0ph.js → image-PoF6DN3x.js} +6 -6
- package/dist/src/{image-C1madmKh.cjs → image-fza3zuKs.cjs} +4 -4
- package/dist/src/{image-Bb4vWQLM.js → image-xNbw5ph2.js} +4 -4
- package/dist/src/index.cjs +853 -104
- package/dist/src/index.d.cts +573 -60
- package/dist/src/index.d.ts +573 -60
- package/dist/src/index.js +850 -102
- package/dist/src/{interactiveCheck-BgLZUIt3.js → interactiveCheck-BnMYOjMu.js} +2 -2
- package/dist/src/{knowledgeBase-B3OoKIej.js → knowledgeBase-Bi7CmDbx.js} +7 -7
- package/dist/src/{knowledgeBase-CYTLHOt1.js → knowledgeBase-Ce3ofVan.js} +8 -8
- package/dist/src/{knowledgeBase-D33Ty2l6.js → knowledgeBase-DFRXPZl_.js} +7 -7
- package/dist/src/{knowledgeBase-DOO_BM9b.cjs → knowledgeBase-DqrLX8fy.cjs} +7 -7
- package/dist/src/{litellm-AaeZcZQF.js → litellm-Bo2gQXpo.js} +14 -14
- package/dist/src/{litellm-NbjknEh6.js → litellm-CKiAxnoM.js} +13 -13
- package/dist/src/{litellm-I_hbp_dc.cjs → litellm-CnHI69aj.cjs} +14 -14
- package/dist/src/{litellm-TrljxD9G.js → litellm-Tc294Jhj.js} +13 -13
- package/dist/src/{logger-KkObSCzq.js → logger-BcJBzSSA.js} +10 -14
- package/dist/src/{logger-DLcq4dWf.js → logger-BnkjG2jt.js} +10 -14
- package/dist/src/{logger-Cp1GPUjj.cjs → logger-D5iKBpu_.cjs} +27 -13
- package/dist/src/{logger-CT3IKMKA.js → logger-DO8_zM18.js} +10 -14
- package/dist/src/{luma-ray-BS2_tY8L.js → luma-ray-0ehMPt5N.js} +10 -10
- package/dist/src/{luma-ray-DDsjcgZZ.js → luma-ray-C9q8rdQe.js} +9 -9
- package/dist/src/{luma-ray-f6I2fft-.js → luma-ray-DP0QA9qn.js} +9 -9
- package/dist/src/{luma-ray-Due0n7di.cjs → luma-ray-m9Ku2meV.cjs} +9 -9
- package/dist/src/main.js +69 -71
- package/dist/src/{messages-D0lx5qK7.js → messages-DJNo37Ko.js} +14 -9
- package/dist/src/{messages-BS17jdMx.js → messages-Dy9QecMs.js} +14 -9
- package/dist/src/{messages-Bs1kC7P4.cjs → messages-HJsyEh4o.cjs} +15 -10
- package/dist/src/{messages-ZJk778GH.js → messages-biC_ex-p.js} +14 -9
- package/dist/src/{modelslab-DRb74SP4.js → modelslab-B5J-ZM5c.js} +9 -9
- package/dist/src/{modelslab-Bx9IrZfS.js → modelslab-BI458moT.js} +10 -10
- package/dist/src/{modelslab-Bmni6skY.js → modelslab-BTOT8FUO.js} +9 -9
- package/dist/src/{modelslab-CoUX6Jc_.cjs → modelslab-IQbNg-r7.cjs} +9 -9
- package/dist/src/{nova-reel-bgjxilYW.js → nova-reel-BZ9y-Y5s.js} +9 -9
- package/dist/src/{nova-reel-C_QM18Xn.cjs → nova-reel-CE5etkv9.cjs} +9 -9
- package/dist/src/{nova-reel-D_W1tjMH.js → nova-reel-DEeQlnOJ.js} +10 -10
- package/dist/src/{nova-reel-BfPq-0Yk.js → nova-reel-Xw1SXLpg.js} +9 -9
- package/dist/src/{nova-sonic-De1HW5fD.js → nova-sonic-DWswpN1E.js} +7 -7
- package/dist/src/{nova-sonic-CFb5GYhg.js → nova-sonic-DXTLpi-r.js} +6 -6
- package/dist/src/{nova-sonic-zfcljeRp.cjs → nova-sonic-N0yCm0vb.cjs} +6 -6
- package/dist/src/{nova-sonic-DIGQNR07.js → nova-sonic-Ogqf-csn.js} +6 -6
- package/dist/src/{openai-DhbB7eWK.js → openai-BMcwgD5C.js} +2 -2
- package/dist/src/{openai-j-sE2O7r.js → openai-BcB5KlTk.js} +2 -2
- package/dist/src/{openai-Cuif0GEt.cjs → openai-CoxGAQwn.cjs} +2 -2
- package/dist/src/{openai-DElQ-fPX.js → openai-D6wITiVn.js} +2 -2
- package/dist/src/{openclaw-tiVYRtr-.js → openclaw-0Sv7AK3O.js} +13 -13
- package/dist/src/{openclaw-CSugPYAr.cjs → openclaw-CXxbKgDH.cjs} +14 -14
- package/dist/src/{openclaw-DuvJKEW5.js → openclaw-D1FSCps-.js} +13 -13
- package/dist/src/{openclaw-DiSz3I5L.js → openclaw-D2ENvu7a.js} +14 -14
- package/dist/src/{opencode-sdk-0j6rTWNb.js → opencode-sdk-C71Z0ehR.js} +13 -13
- package/dist/src/{opencode-sdk-B3CWY9h_.js → opencode-sdk-CHCs7dEb.js} +12 -12
- package/dist/src/{opencode-sdk-C2y6UkP2.js → opencode-sdk-DDxj4QqH.js} +12 -12
- package/dist/src/{opencode-sdk-BL764Jdi.cjs → opencode-sdk-WWJhnbKr.cjs} +16 -16
- package/dist/src/{otlpReceiver-C99PPb48.js → otlpReceiver-C9KlUtxh.js} +6 -6
- package/dist/src/{otlpReceiver-CdNBdbsk.js → otlpReceiver-CZL48YfC.js} +6 -6
- package/dist/src/{otlpReceiver-D89fR-rC.js → otlpReceiver-CavGAA6k.js} +6 -6
- package/dist/src/{otlpReceiver-CGq6LspY.cjs → otlpReceiver-DHKqJlsz.cjs} +6 -6
- package/dist/src/{providerRegistry-B0RUOLI_.js → providerRegistry-B9lh-_tx.js} +2 -2
- package/dist/src/{providerRegistry-Civky8Ar.cjs → providerRegistry-BTDgfV5h.cjs} +2 -2
- package/dist/src/{providerRegistry-CD8MEar9.js → providerRegistry-BkzVH5Ba.js} +2 -2
- package/dist/src/{providerRegistry-DM8rZYol.js → providerRegistry-CUWki5mQ.js} +2 -2
- package/dist/src/providers-BSLEaIQG.js +32 -0
- package/dist/src/{providers-CgKOSgTR.cjs → providers-CScd1wN6.cjs} +733 -464
- package/dist/src/{providers-BlqUifFg.js → providers-Ch6Mr0gn.js} +795 -526
- package/dist/src/{providers-Dk_6ocUX.js → providers-Cn73d5sr.js} +795 -526
- package/dist/src/providers-D-FnDg8k.cjs +31 -0
- package/dist/src/providers-DEYiFVAo.js +30 -0
- package/dist/src/{providers-D8lF1sqW.js → providers-DvddrgxL.js} +795 -526
- package/dist/src/providers-sS2WI8YD.js +30 -0
- package/dist/src/{pythonUtils-D6fwaDSg.js → pythonUtils-Bzwbgpbg.js} +3 -3
- package/dist/src/{pythonUtils-D5nxkQ0P.js → pythonUtils-Cpo0Ez1p.js} +3 -3
- package/dist/src/{pythonUtils-CTU3Y3lw.cjs → pythonUtils-dAVigVK-.cjs} +3 -3
- package/dist/src/{pythonUtils-C3py6GC1.js → pythonUtils-wIqk7zAf.js} +3 -3
- package/dist/src/{quiverai-CIaELU_m.js → quiverai-BeofbLVc.js} +4 -4
- package/dist/src/{quiverai-uH-dcTIr.js → quiverai-CCQn73lq.js} +5 -5
- package/dist/src/{quiverai-PdShCPox.cjs → quiverai-CcUhPIBg.cjs} +4 -4
- package/dist/src/{quiverai-BbOUOn2L.js → quiverai-DVSEqJiq.js} +4 -4
- package/dist/src/{render-Drod8m7K.js → render-BHl6QVq9.js} +3 -3
- package/dist/src/{responses-WNGNYe3K.js → responses-BKP_WYis.js} +14 -10
- package/dist/src/{responses-DIR9Ud3j.js → responses-CQb1Tj69.js} +14 -10
- package/dist/src/{responses-CB2jwoAr.js → responses-CgNyTPsY.js} +14 -10
- package/dist/src/{responses-D8SBTL64.cjs → responses-mo0KQDbu.cjs} +14 -10
- package/dist/src/rubyUtils-B1HXG4ej.cjs +4 -0
- package/dist/src/{rubyUtils-DhCAlxZr.cjs → rubyUtils-CGeUtCfW.cjs} +3 -3
- package/dist/src/{rubyUtils-Boc4HZzX.js → rubyUtils-CiVfln3g.js} +3 -3
- package/dist/src/{rubyUtils-BcuGX77l.js → rubyUtils-DECSbsfY.js} +3 -3
- package/dist/src/{rubyUtils-BUVePouc.js → rubyUtils-PgU-gHmx.js} +3 -3
- package/dist/src/rubyUtils-Rt6pKA96.js +5 -0
- package/dist/src/{sagemaker-CNBxx5CJ.js → sagemaker-CVv8W7so.js} +17 -17
- package/dist/src/{sagemaker-CemTFp2h.js → sagemaker-CqeASYE5.js} +17 -17
- package/dist/src/{sagemaker-YSyBXQQh.js → sagemaker-MUbD5V3v.js} +18 -18
- package/dist/src/{sagemaker-Cl28mZU2.cjs → sagemaker-jiw1wQa-.cjs} +17 -17
- package/dist/src/{scanner-BsBlNXNn.js → scanner-DVDeUz1r.js} +10 -10
- package/dist/src/server/index.js +854 -106
- package/dist/src/server-B0Xh1Gx-.js +7 -0
- package/dist/src/{server-C_7Ax-hA.cjs → server-BtoCXeXI.cjs} +4 -4
- package/dist/src/{server-VWgWb00X.js → server-CP9qKM40.js} +4 -4
- package/dist/src/{server-CuxBbeSY.js → server-Cns05F1j.js} +5 -5
- package/dist/src/server-DJTKu9IR.cjs +5 -0
- package/dist/src/{server-CqzrVGpF.js → server-DZ9MtCn0.js} +6 -6
- package/dist/src/{signal-4U3mfRvL.js → signal-C3ZTsUgi.js} +3 -3
- package/dist/src/{slack-DOdy_kyv.js → slack-2sdpGzbt.js} +2 -2
- package/dist/src/{slack-BmVAVGaK.cjs → slack-94iG3T0s.cjs} +2 -2
- package/dist/src/{slack-DCUPTzS2.js → slack-BR0HtO3K.js} +2 -2
- package/dist/src/{slack-DXMKtA-f.js → slack-DCEV-vWP.js} +2 -2
- package/dist/src/store-C5u6MgC8.js +6 -0
- package/dist/src/{store-DLlFCC4h.cjs → store-CLyU7AtI.cjs} +17 -5
- package/dist/src/store-CNHk-De4.cjs +5 -0
- package/dist/src/{store-DXilxTl-.js → store-Cj258DgL.js} +17 -5
- package/dist/src/{store-Dim__MDd.js → store-P8OKm19S.js} +17 -5
- package/dist/src/{store-CXGFv4aR.js → store-VB0GP46K.js} +17 -5
- package/dist/src/{tables-DLJPUdUE.js → tables-BEIFz2tM.js} +3 -3
- package/dist/src/{tables-DPi7wKeM.cjs → tables-BdZQEpRz.cjs} +3 -3
- package/dist/src/{tables-gftXzE9I.js → tables-DmzvLbeZ.js} +3 -3
- package/dist/src/{tables-6YKwjN9-.js → tables-kC7R5kiK.js} +3 -3
- package/dist/src/{telemetry-CMrFgtPB.js → telemetry-BnH5VJAU.js} +4 -4
- package/dist/src/{telemetry-Dthj_BbD.js → telemetry-BugWqKiu.js} +4 -4
- package/dist/src/{telemetry-Cps3mIU-.js → telemetry-DPXLd7UE.js} +4 -4
- package/dist/src/telemetry-Yig0Tino.js +7 -0
- package/dist/src/telemetry-p8Pwqm1i.cjs +5 -0
- package/dist/src/{telemetry-DaX14Chu.cjs → telemetry-re627Lre.cjs} +4 -4
- package/dist/src/{transcription-NLVG9MT1.cjs → transcription-BvtsrzRG.cjs} +13 -13
- package/dist/src/{transcription-BNYURcXg.js → transcription-CaMivnjG.js} +13 -13
- package/dist/src/{transcription-s6A-bNrZ.js → transcription-DOMMTu01.js} +14 -14
- package/dist/src/{transcription-B_OdaHp7.js → transcription-Hb3VnC4M.js} +13 -13
- package/dist/src/{transform-DuHvhZpj.cjs → transform-0BwoBsvO.cjs} +19 -5
- package/dist/src/{transform-uAytVuyX.js → transform-B2-jIv68.js} +8 -6
- package/dist/src/{transform-DECvGmzp.js → transform-BqPkNPYm.js} +4 -4
- package/dist/src/{transform-aa6tmVpZ.js → transform-BzK09Q_9.js} +4 -4
- package/dist/src/transform-ChNIpHz7.js +6 -0
- package/dist/src/{transform-D5HsjduX.js → transform-DrleutM3.js} +8 -6
- package/dist/src/{transform-vNucnNr0.js → transform-DyDAwEpE.js} +8 -6
- package/dist/src/transform-PtQ6rAE3.cjs +5 -0
- package/dist/src/{transform-CzK1Q0zl.cjs → transform-ZrG2dvlo.cjs} +4 -4
- package/dist/src/{transform-DilY9wbS.js → transform-ljLYHEPh.js} +4 -4
- package/dist/src/{transformersAvailability-CEVM2GNQ.js → transformersAvailability-BGkzavwb.js} +1 -1
- package/dist/src/{transformersAvailability-CwayUSlh.cjs → transformersAvailability-DKoRtQLy.cjs} +1 -1
- package/dist/src/{types-Cbd8uOMq.js → types-CIhFeUC4.js} +7 -1
- package/dist/src/{types-CzW2QFyi.js → types-Cd3ygw8W.js} +7 -1
- package/dist/src/{types-C_7nyzr1.cjs → types-D8cGDZbL.cjs} +8 -2
- package/dist/src/{types-DmyIJ-sR.js → types-q8GXGF65.js} +7 -1
- package/dist/src/{util-DGNOS1db.cjs → util--9u9UVCt.cjs} +3 -3
- package/dist/src/{util-ZzmqNPlg.js → util-BLvy9qfE.js} +7 -7
- package/dist/src/{util-C1CeHl-P.js → util-Bm3E9jpK.js} +7 -7
- package/dist/src/{util-BV4XUC0n.js → util-BtoGs5Cb.js} +18 -4
- package/dist/src/{util-BzMcevZc.cjs → util-CFj4YKIn.cjs} +18 -4
- package/dist/src/{util-BRYkYPTd.js → util-CMMkIxfU.js} +7 -7
- package/dist/src/{util-Dnmk2mBQ.js → util-CgDCK4KI.js} +18 -4
- package/dist/src/{util-B9vlHIIh.cjs → util-CuLo2pMR.cjs} +7 -7
- package/dist/src/{util-CMy69ZgQ.js → util-DM2rTn_6.js} +18 -4
- package/dist/src/{util-B3xGByQh.js → util-DMFeUvLz.js} +3 -3
- package/dist/src/{util-BHGHw5G1.js → util-DbVG-yZU.js} +3 -3
- package/dist/src/{util-Bv6uGDfH.js → util-vNmDL5DT.js} +3 -3
- package/dist/src/{utils-XiOAgly5.js → utils-CFxO9KGo.js} +2 -2
- package/dist/src/{utils-f2-Moju7.js → utils-DEuL4VNB.js} +2 -2
- package/dist/src/{utils-Cz9qXqII.cjs → utils-DKw8mrgr.cjs} +3 -3
- package/dist/src/{utils-dLokC-eR.js → utils-DOjD4dTC.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +32 -32
- package/dist/src/app/assets/index-4LKxG2CG.js +0 -439
- package/dist/src/app/assets/index-C3zcsZFQ.css +0 -1
- package/dist/src/app/assets/sync-9qqYcY-B.js +0 -4
- package/dist/src/app/assets/vendor-markdown-0tekx3KX.js +0 -29
- package/dist/src/app/tsconfig.app.tsbuildinfo +0 -1
- package/dist/src/cache-CeUpFm3M.cjs +0 -5
- package/dist/src/cache-n-RCJ-hL.js +0 -6
- package/dist/src/cloud-BBh91EUK.js +0 -4
- package/dist/src/eval-B3r2CVXr.js +0 -15
- package/dist/src/evalResult-5xwYnECe.js +0 -12
- package/dist/src/evalResult-71lY93Kj.cjs +0 -10
- package/dist/src/evalResult-Dx5P5cIv.js +0 -10
- package/dist/src/evaluator-Jx6bRZV6.js +0 -36
- package/dist/src/fetch-BxNb_Lp3.js +0 -5
- package/dist/src/graders-B_pgMLS2.js +0 -34
- package/dist/src/graders-DErokPDO.cjs +0 -32
- package/dist/src/graders-DR_uNe54.js +0 -32
- package/dist/src/graders-w3176Wz-.js +0 -32
- package/dist/src/providers-B7V0njNs.js +0 -32
- package/dist/src/providers-BEwbhv0X.js +0 -30
- package/dist/src/providers-CH3C7zf7.js +0 -30
- package/dist/src/providers-zyB6k_38.cjs +0 -31
- package/dist/src/rubyUtils-BUHu6PhO.js +0 -5
- package/dist/src/rubyUtils-CP42kMvq.cjs +0 -4
- package/dist/src/server-DA4Cyrrq.js +0 -7
- package/dist/src/server-Dulb-4-K.cjs +0 -5
- package/dist/src/store-CXS-Q_91.js +0 -6
- package/dist/src/store-eYkaKMwq.cjs +0 -5
- package/dist/src/telemetry-BpMfhthR.cjs +0 -5
- package/dist/src/telemetry-Dw38hanS.js +0 -7
- package/dist/src/transform-DTGDnAzW.js +0 -6
- package/dist/src/transform-m3qNw4KP.cjs +0 -5
package/dist/src/index.cjs
CHANGED
|
@@ -2,43 +2,43 @@ Object.defineProperties(exports, {
|
|
|
2
2
|
__esModule: { value: true },
|
|
3
3
|
[Symbol.toStringTag]: { value: "Module" }
|
|
4
4
|
});
|
|
5
|
-
const require_logger = require("./logger-
|
|
5
|
+
const require_logger = require("./logger-D5iKBpu_.cjs");
|
|
6
6
|
const require_invariant = require("./invariant-kfQ8Bu82.cjs");
|
|
7
|
-
const require_esm = require("./esm-
|
|
8
|
-
const require_pythonUtils = require("./pythonUtils-
|
|
7
|
+
const require_esm = require("./esm-CipptfDu.cjs");
|
|
8
|
+
const require_pythonUtils = require("./pythonUtils-dAVigVK-.cjs");
|
|
9
9
|
const require_fileExtensions = require("./fileExtensions-bYh77CN8.cjs");
|
|
10
|
-
const require_transform = require("./transform-
|
|
11
|
-
const require_graders = require("./graders-
|
|
12
|
-
const require_types = require("./types-
|
|
13
|
-
const require_util = require("./util-
|
|
14
|
-
const require_fetch = require("./fetch-
|
|
15
|
-
const require_cache = require("./cache-
|
|
16
|
-
const require_providers = require("./providers-
|
|
17
|
-
const require_utils = require("./utils-
|
|
18
|
-
const require_createHash = require("./createHash-
|
|
19
|
-
require("./genaiTracer-
|
|
20
|
-
const require_chat = require("./chat-
|
|
10
|
+
const require_transform = require("./transform-ZrG2dvlo.cjs");
|
|
11
|
+
const require_graders = require("./graders-BElhu9ZY.cjs");
|
|
12
|
+
const require_types = require("./types-D8cGDZbL.cjs");
|
|
13
|
+
const require_util = require("./util-CuLo2pMR.cjs");
|
|
14
|
+
const require_fetch = require("./fetch-BnR9wSnm.cjs");
|
|
15
|
+
const require_cache = require("./cache-C5yFZ4gC.cjs");
|
|
16
|
+
const require_providers = require("./providers-CScd1wN6.cjs");
|
|
17
|
+
const require_utils = require("./utils-DKw8mrgr.cjs");
|
|
18
|
+
const require_createHash = require("./createHash-BYwImsYv.cjs");
|
|
19
|
+
require("./genaiTracer-BfxrvSUb.cjs");
|
|
20
|
+
const require_chat = require("./chat-CM8qWR3_.cjs");
|
|
21
21
|
const require_tokenUsageUtils = require("./tokenUsageUtils-bVa1ga6f.cjs");
|
|
22
|
-
const require_transform$1 = require("./transform-
|
|
23
|
-
require("./messages-
|
|
24
|
-
require("./util
|
|
25
|
-
require("./responses-
|
|
26
|
-
require("./openai-
|
|
27
|
-
const require_util$2 = require("./util-
|
|
28
|
-
require("./completion-
|
|
29
|
-
const require_accounts = require("./accounts-
|
|
30
|
-
const require_server = require("./server-
|
|
31
|
-
const require_blobs = require("./blobs-
|
|
32
|
-
const require_tables = require("./tables-
|
|
33
|
-
const require_extractor = require("./extractor-
|
|
34
|
-
const require_telemetry = require("./telemetry-
|
|
22
|
+
const require_transform$1 = require("./transform-0BwoBsvO.cjs");
|
|
23
|
+
require("./messages-HJsyEh4o.cjs");
|
|
24
|
+
require("./util--9u9UVCt.cjs");
|
|
25
|
+
require("./responses-mo0KQDbu.cjs");
|
|
26
|
+
require("./openai-CoxGAQwn.cjs");
|
|
27
|
+
const require_util$2 = require("./util-CFj4YKIn.cjs");
|
|
28
|
+
require("./completion-DlXUhj5c.cjs");
|
|
29
|
+
const require_accounts = require("./accounts-BPyfpSeU.cjs");
|
|
30
|
+
const require_server = require("./server-BtoCXeXI.cjs");
|
|
31
|
+
const require_blobs = require("./blobs-C6j0bvFz.cjs");
|
|
32
|
+
const require_tables = require("./tables-BdZQEpRz.cjs");
|
|
33
|
+
const require_extractor = require("./extractor-DG3sSfXE.cjs");
|
|
34
|
+
const require_telemetry = require("./telemetry-re627Lre.cjs");
|
|
35
35
|
const require_text = require("./text-CW1cyrwj.cjs");
|
|
36
|
-
const require_store = require("./store-
|
|
37
|
-
require("./base-
|
|
38
|
-
require("./image
|
|
39
|
-
const require_providerRegistry = require("./providerRegistry-
|
|
40
|
-
const require_rubyUtils = require("./rubyUtils-
|
|
41
|
-
const require_evalResult = require("./evalResult-
|
|
36
|
+
const require_store = require("./store-CLyU7AtI.cjs");
|
|
37
|
+
require("./base-BboXIF_0.cjs");
|
|
38
|
+
require("./image--F58eEIn.cjs");
|
|
39
|
+
const require_providerRegistry = require("./providerRegistry-BTDgfV5h.cjs");
|
|
40
|
+
const require_rubyUtils = require("./rubyUtils-CGeUtCfW.cjs");
|
|
41
|
+
const require_evalResult = require("./evalResult-DpARzUCb.cjs");
|
|
42
42
|
let fs = require("fs");
|
|
43
43
|
fs = require_logger.__toESM(fs);
|
|
44
44
|
let path = require("path");
|
|
@@ -68,6 +68,8 @@ crypto$1 = require_logger.__toESM(crypto$1);
|
|
|
68
68
|
let _opentelemetry_api = require("@opentelemetry/api");
|
|
69
69
|
let _inquirer_input = require("@inquirer/input");
|
|
70
70
|
_inquirer_input = require_logger.__toESM(_inquirer_input);
|
|
71
|
+
let readline = require("readline");
|
|
72
|
+
readline = require_logger.__toESM(readline);
|
|
71
73
|
let drizzle_orm = require("drizzle-orm");
|
|
72
74
|
let cli_progress = require("cli-progress");
|
|
73
75
|
cli_progress = require_logger.__toESM(cli_progress);
|
|
@@ -75,6 +77,7 @@ let jsdom = require("jsdom");
|
|
|
75
77
|
let fastest_levenshtein = require("fastest-levenshtein");
|
|
76
78
|
let js_rouge = require("js-rouge");
|
|
77
79
|
js_rouge = require_logger.__toESM(js_rouge);
|
|
80
|
+
let node_util = require("node:util");
|
|
78
81
|
require("debounce");
|
|
79
82
|
let _opentelemetry_core = require("@opentelemetry/core");
|
|
80
83
|
let _opentelemetry_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
|
|
@@ -307,7 +310,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
307
310
|
require_telemetry.telemetry.record("feature_used", { feature: "tracing" });
|
|
308
311
|
try {
|
|
309
312
|
require_logger.logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
310
|
-
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
313
|
+
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-DHKqJlsz.cjs"));
|
|
311
314
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
312
315
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
313
316
|
require_logger.logger.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
|
|
@@ -330,7 +333,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
330
333
|
async function stopOtlpReceiverIfNeeded() {
|
|
331
334
|
if (otlpReceiverStarted) try {
|
|
332
335
|
require_logger.logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
333
|
-
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
336
|
+
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-DHKqJlsz.cjs"));
|
|
334
337
|
await stopOTLPReceiver();
|
|
335
338
|
otlpReceiverStarted = false;
|
|
336
339
|
require_logger.logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -365,7 +368,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
365
368
|
}
|
|
366
369
|
if (!tracingEnabled) return null;
|
|
367
370
|
require_logger.logger.debug("[EvaluatorTracing] Importing trace store");
|
|
368
|
-
const { getTraceStore } = await Promise.resolve().then(() => require("./store-
|
|
371
|
+
const { getTraceStore } = await Promise.resolve().then(() => require("./store-CNHk-De4.cjs"));
|
|
369
372
|
const traceStore = getTraceStore();
|
|
370
373
|
const traceId = generateTraceId();
|
|
371
374
|
const spanId = generateSpanId();
|
|
@@ -1398,7 +1401,7 @@ const handleJavascript = async ({ assertion, renderedValue, valueFromScript, ass
|
|
|
1398
1401
|
pass = result !== inverse;
|
|
1399
1402
|
score = pass ? 1 : 0;
|
|
1400
1403
|
} else if (typeof result === "number") {
|
|
1401
|
-
pass = assertion.threshold
|
|
1404
|
+
pass = assertion.threshold === void 0 ? result > 0 : result >= assertion.threshold;
|
|
1402
1405
|
score = result;
|
|
1403
1406
|
} else if (typeof result === "object") return result;
|
|
1404
1407
|
else throw new Error("Custom function must return a boolean or number");
|
|
@@ -1664,7 +1667,7 @@ function handlePerplexity({ logProbs, assertion }) {
|
|
|
1664
1667
|
if (!logProbs || logProbs.length === 0) throw new Error("Perplexity assertion does not support providers that do not return logProbs");
|
|
1665
1668
|
const avgLogProb = logProbs.reduce((acc, logProb) => acc + logProb, 0) / logProbs.length;
|
|
1666
1669
|
const perplexity = Math.exp(-avgLogProb);
|
|
1667
|
-
const pass = assertion.threshold
|
|
1670
|
+
const pass = assertion.threshold === void 0 ? true : perplexity <= assertion.threshold;
|
|
1668
1671
|
return {
|
|
1669
1672
|
pass,
|
|
1670
1673
|
score: pass ? 1 : 0,
|
|
@@ -1676,7 +1679,7 @@ function handlePerplexityScore({ logProbs, assertion }) {
|
|
|
1676
1679
|
if (!logProbs || logProbs.length === 0) throw new Error("perplexity-score assertion does not support providers that do not return logProbs");
|
|
1677
1680
|
const avgLogProb = logProbs.reduce((acc, logProb) => acc + logProb, 0) / logProbs.length;
|
|
1678
1681
|
const perplexityNorm = 1 / (1 + Math.exp(-avgLogProb));
|
|
1679
|
-
const pass = assertion.threshold
|
|
1682
|
+
const pass = assertion.threshold === void 0 ? true : perplexityNorm >= assertion.threshold;
|
|
1680
1683
|
return {
|
|
1681
1684
|
pass,
|
|
1682
1685
|
score: perplexityNorm,
|
|
@@ -1791,7 +1794,7 @@ ${isMultiline ? renderedValue.split("\n").map((line) => `${indentStyle}${line}`)
|
|
|
1791
1794
|
} else {
|
|
1792
1795
|
score = Number.parseFloat(String(result));
|
|
1793
1796
|
if (Number.isNaN(score)) throw new Error(`Python assertion must return a boolean, number, or {pass, score, reason} object. Instead got:\n${result}`);
|
|
1794
|
-
pass = assertion.threshold
|
|
1797
|
+
pass = assertion.threshold === void 0 ? score > 0 : score >= assertion.threshold;
|
|
1795
1798
|
}
|
|
1796
1799
|
} catch (err) {
|
|
1797
1800
|
return {
|
|
@@ -2052,7 +2055,7 @@ end
|
|
|
2052
2055
|
} else {
|
|
2053
2056
|
score = Number.parseFloat(String(result));
|
|
2054
2057
|
if (Number.isNaN(score)) throw new Error(`Ruby assertion must return a boolean, number, or {pass, score, reason} object. Instead got:\n${result}`);
|
|
2055
|
-
pass = assertion.threshold
|
|
2058
|
+
pass = assertion.threshold === void 0 ? score > 0 : score >= assertion.threshold;
|
|
2056
2059
|
}
|
|
2057
2060
|
} catch (err) {
|
|
2058
2061
|
return {
|
|
@@ -2123,6 +2126,127 @@ const handleSimilar = async ({ assertion, renderedValue, outputString, inverse,
|
|
|
2123
2126
|
};
|
|
2124
2127
|
};
|
|
2125
2128
|
//#endregion
|
|
2129
|
+
//#region src/assertions/traceUtils.ts
|
|
2130
|
+
/**
|
|
2131
|
+
* Shared utilities for trace assertions
|
|
2132
|
+
*/
|
|
2133
|
+
/**
|
|
2134
|
+
* Match a span name against a glob-like pattern.
|
|
2135
|
+
* Supports * (any characters) and ? (single character) wildcards.
|
|
2136
|
+
*
|
|
2137
|
+
* @param spanName - The span name to match
|
|
2138
|
+
* @param pattern - The glob pattern to match against
|
|
2139
|
+
* @returns true if the span name matches the pattern
|
|
2140
|
+
*/
|
|
2141
|
+
function matchesPattern(spanName, pattern) {
|
|
2142
|
+
const regexPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
|
|
2143
|
+
return new RegExp(`^${regexPattern}$`, "i").test(spanName);
|
|
2144
|
+
}
|
|
2145
|
+
//#endregion
|
|
2146
|
+
//#region src/assertions/skill.ts
|
|
2147
|
+
function getSkillCalls(params) {
|
|
2148
|
+
const rawSkillCalls = params.providerResponse?.metadata?.skillCalls;
|
|
2149
|
+
if (!Array.isArray(rawSkillCalls)) return [];
|
|
2150
|
+
return rawSkillCalls.filter((entry) => Boolean(entry) && typeof entry === "object" && typeof entry.name === "string");
|
|
2151
|
+
}
|
|
2152
|
+
function matchesSkill(skillCall, matcher) {
|
|
2153
|
+
if (matcher.name && skillCall.name !== matcher.name) return false;
|
|
2154
|
+
if (matcher.pattern && !matchesPattern(skillCall.name, matcher.pattern)) return false;
|
|
2155
|
+
return true;
|
|
2156
|
+
}
|
|
2157
|
+
function formatSkillCall(skillCall) {
|
|
2158
|
+
const details = [skillCall.source, skillCall.path].filter(Boolean).join(", ");
|
|
2159
|
+
return details ? `${skillCall.name} (${details})` : skillCall.name;
|
|
2160
|
+
}
|
|
2161
|
+
function resolveSkillMatchers(value) {
|
|
2162
|
+
const normalizeText = (text) => typeof text === "string" ? text.trim() : void 0;
|
|
2163
|
+
const validateCount = (field, count) => {
|
|
2164
|
+
if (!Number.isFinite(count) || !Number.isInteger(count) || count < 0) throw new Error(`skill-used assertion object ${field} must be a finite non-negative integer`);
|
|
2165
|
+
};
|
|
2166
|
+
if (typeof value === "string" && value.trim()) return {
|
|
2167
|
+
kind: "list",
|
|
2168
|
+
matchers: [{ name: normalizeText(value) }]
|
|
2169
|
+
};
|
|
2170
|
+
if (Array.isArray(value) && value.length > 0 && value.every((item) => typeof item === "string" && item.trim())) return {
|
|
2171
|
+
kind: "list",
|
|
2172
|
+
matchers: value.map((item) => ({ name: item.trim() }))
|
|
2173
|
+
};
|
|
2174
|
+
if (value && typeof value === "object" && !Array.isArray(value)) {
|
|
2175
|
+
const rawMatcher = value;
|
|
2176
|
+
const matcher = rawMatcher;
|
|
2177
|
+
const name = normalizeText(matcher.name);
|
|
2178
|
+
const pattern = normalizeText(matcher.pattern);
|
|
2179
|
+
if (!name && !pattern) throw new Error("skill-used assertion object must include a name or pattern property");
|
|
2180
|
+
if ("min" in rawMatcher) validateCount("min", matcher.min);
|
|
2181
|
+
if ("max" in rawMatcher) validateCount("max", matcher.max);
|
|
2182
|
+
if (typeof matcher.min === "number" && typeof matcher.max === "number" && matcher.max < matcher.min) throw new Error("skill-used assertion object max must be greater than or equal to min");
|
|
2183
|
+
return {
|
|
2184
|
+
kind: "count",
|
|
2185
|
+
matcher: {
|
|
2186
|
+
max: typeof matcher.max === "number" ? matcher.max : void 0,
|
|
2187
|
+
min: typeof matcher.min === "number" ? matcher.min : void 0,
|
|
2188
|
+
name,
|
|
2189
|
+
pattern
|
|
2190
|
+
}
|
|
2191
|
+
};
|
|
2192
|
+
}
|
|
2193
|
+
throw new Error("skill-used assertion must have a string, string array, or object value");
|
|
2194
|
+
}
|
|
2195
|
+
function handleListSkillAssertion(params, skillCalls, actualSkills, expected) {
|
|
2196
|
+
const missing = expected.matchers.filter((matcher) => !skillCalls.some((skillCall) => matchesSkill(skillCall, matcher)));
|
|
2197
|
+
const matched = expected.matchers.filter((matcher) => skillCalls.some((skillCall) => matchesSkill(skillCall, matcher)));
|
|
2198
|
+
const pass = params.inverse ? matched.length === 0 : missing.length === 0;
|
|
2199
|
+
const expectedSkills = expected.matchers.map((matcher) => matcher.name);
|
|
2200
|
+
const actualSummary = actualSkills.length > 0 ? actualSkills.join(", ") : "(none)";
|
|
2201
|
+
let reason;
|
|
2202
|
+
if (params.inverse) reason = pass ? `Forbidden skill(s) were not used: ${expectedSkills.join(", ")}` : `Forbidden skill(s) were used: ${matched.map((matcher) => matcher.name).join(", ")}. Actual skills: ${actualSummary}`;
|
|
2203
|
+
else if (pass) reason = `Observed required skill(s): ${expectedSkills.join(", ")}. Actual skills: ${actualSummary}`;
|
|
2204
|
+
else reason = `Missing required skill(s): ${missing.map((matcher) => matcher.name).join(", ")}. Actual skills: ${actualSummary}`;
|
|
2205
|
+
return {
|
|
2206
|
+
pass,
|
|
2207
|
+
score: pass ? 1 : 0,
|
|
2208
|
+
reason,
|
|
2209
|
+
assertion: params.assertion
|
|
2210
|
+
};
|
|
2211
|
+
}
|
|
2212
|
+
function handleCountSkillAssertion(params, skillCalls, actualSkills, matcher) {
|
|
2213
|
+
const hasExplicitMin = matcher.min !== void 0;
|
|
2214
|
+
const hasExplicitMax = matcher.max !== void 0;
|
|
2215
|
+
const min = matcher.min ?? (hasExplicitMax ? 0 : 1);
|
|
2216
|
+
const max = matcher.max;
|
|
2217
|
+
const matchingSkillCalls = skillCalls.filter((skillCall) => matchesSkill(skillCall, matcher));
|
|
2218
|
+
const count = matchingSkillCalls.length;
|
|
2219
|
+
const matcherLabel = matcher.pattern || matcher.name || "*";
|
|
2220
|
+
if (params.inverse) {
|
|
2221
|
+
if (hasExplicitMin || hasExplicitMax && max !== 0) throw new Error("not-skill-used object assertions only support name/pattern with no count bounds, or max: 0");
|
|
2222
|
+
const pass = count === 0;
|
|
2223
|
+
const actualSummary = actualSkills.length > 0 ? actualSkills.join(", ") : "(none)";
|
|
2224
|
+
return {
|
|
2225
|
+
pass,
|
|
2226
|
+
score: pass ? 1 : 0,
|
|
2227
|
+
reason: pass ? `Forbidden skill "${matcherLabel}" was not used. Actual skills: ${actualSummary}` : `Forbidden skill "${matcherLabel}" was used ${count} time(s). Matches: ${matchingSkillCalls.map(formatSkillCall).join(", ")}`,
|
|
2228
|
+
assertion: params.assertion
|
|
2229
|
+
};
|
|
2230
|
+
}
|
|
2231
|
+
const pass = count >= min && (max === void 0 || count <= max);
|
|
2232
|
+
let reason = `Matched skill "${matcherLabel}" ${count} time(s)`;
|
|
2233
|
+
reason += max === void 0 ? ` (expected at least ${min})` : ` (expected ${min}-${max})`;
|
|
2234
|
+
if (matchingSkillCalls.length > 0) reason += `. Matches: ${matchingSkillCalls.map(formatSkillCall).join(", ")}`;
|
|
2235
|
+
return {
|
|
2236
|
+
pass,
|
|
2237
|
+
score: pass ? 1 : 0,
|
|
2238
|
+
reason,
|
|
2239
|
+
assertion: params.assertion
|
|
2240
|
+
};
|
|
2241
|
+
}
|
|
2242
|
+
function handleSkillUsed(params) {
|
|
2243
|
+
const skillCalls = getSkillCalls(params);
|
|
2244
|
+
const actualSkills = skillCalls.map(formatSkillCall);
|
|
2245
|
+
const expected = resolveSkillMatchers(params.renderedValue ?? params.assertion.value);
|
|
2246
|
+
if (expected.kind === "list") return handleListSkillAssertion(params, skillCalls, actualSkills, expected);
|
|
2247
|
+
return handleCountSkillAssertion(params, skillCalls, actualSkills, expected.matcher);
|
|
2248
|
+
}
|
|
2249
|
+
//#endregion
|
|
2126
2250
|
//#region src/assertions/sql.ts
|
|
2127
2251
|
const handleIsSql = async ({ assertion, renderedValue, outputString, inverse }) => {
|
|
2128
2252
|
let pass = false;
|
|
@@ -2355,23 +2479,6 @@ const handleToolCallF1 = ({ assertion, output, renderedValue, inverse }) => {
|
|
|
2355
2479
|
};
|
|
2356
2480
|
};
|
|
2357
2481
|
//#endregion
|
|
2358
|
-
//#region src/assertions/traceUtils.ts
|
|
2359
|
-
/**
|
|
2360
|
-
* Shared utilities for trace assertions
|
|
2361
|
-
*/
|
|
2362
|
-
/**
|
|
2363
|
-
* Match a span name against a glob-like pattern.
|
|
2364
|
-
* Supports * (any characters) and ? (single character) wildcards.
|
|
2365
|
-
*
|
|
2366
|
-
* @param spanName - The span name to match
|
|
2367
|
-
* @param pattern - The glob pattern to match against
|
|
2368
|
-
* @returns true if the span name matches the pattern
|
|
2369
|
-
*/
|
|
2370
|
-
function matchesPattern(spanName, pattern) {
|
|
2371
|
-
const regexPattern = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
|
|
2372
|
-
return new RegExp(`^${regexPattern}$`, "i").test(spanName);
|
|
2373
|
-
}
|
|
2374
|
-
//#endregion
|
|
2375
2482
|
//#region src/assertions/traceErrorSpans.ts
|
|
2376
2483
|
function isErrorSpan(span) {
|
|
2377
2484
|
if (span.statusCode && span.statusCode >= 400) return true;
|
|
@@ -2540,6 +2647,524 @@ const handleTraceSpanDuration = ({ assertion, assertionValueContext }) => {
|
|
|
2540
2647
|
};
|
|
2541
2648
|
};
|
|
2542
2649
|
//#endregion
|
|
2650
|
+
//#region src/assertions/trajectoryUtils.ts
|
|
2651
|
+
const TOOL_ATTRIBUTE_KEYS = [
|
|
2652
|
+
"tool.name",
|
|
2653
|
+
"tool_name",
|
|
2654
|
+
"tool",
|
|
2655
|
+
"function.name",
|
|
2656
|
+
"function_name",
|
|
2657
|
+
"gen_ai.tool.name",
|
|
2658
|
+
"codex.mcp.tool",
|
|
2659
|
+
"agent.tool",
|
|
2660
|
+
"agent.tool_name",
|
|
2661
|
+
"agent.toolName"
|
|
2662
|
+
];
|
|
2663
|
+
const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
|
|
2664
|
+
"tool.arguments",
|
|
2665
|
+
"tool.args",
|
|
2666
|
+
"tool.input",
|
|
2667
|
+
"tool_arguments",
|
|
2668
|
+
"tool_args",
|
|
2669
|
+
"tool_input",
|
|
2670
|
+
"function.arguments",
|
|
2671
|
+
"function.args",
|
|
2672
|
+
"function.input",
|
|
2673
|
+
"function_arguments",
|
|
2674
|
+
"function_args",
|
|
2675
|
+
"gen_ai.tool.arguments",
|
|
2676
|
+
"gen_ai.tool.args",
|
|
2677
|
+
"gen_ai.tool.input",
|
|
2678
|
+
"gen_ai.tool.call.arguments",
|
|
2679
|
+
"gen_ai.tool.call.args",
|
|
2680
|
+
"agent.tool.arguments",
|
|
2681
|
+
"agent.tool.args",
|
|
2682
|
+
"agent.tool.input",
|
|
2683
|
+
"codex.mcp.arguments",
|
|
2684
|
+
"codex.mcp.args",
|
|
2685
|
+
"codex.mcp.input",
|
|
2686
|
+
"arguments",
|
|
2687
|
+
"args",
|
|
2688
|
+
"input"
|
|
2689
|
+
];
|
|
2690
|
+
const COMMAND_ATTRIBUTE_KEYS = [
|
|
2691
|
+
"codex.command",
|
|
2692
|
+
"command",
|
|
2693
|
+
"command.name",
|
|
2694
|
+
"command_name"
|
|
2695
|
+
];
|
|
2696
|
+
const SEARCH_ATTRIBUTE_KEYS = [
|
|
2697
|
+
"codex.search.query",
|
|
2698
|
+
"search.query",
|
|
2699
|
+
"search_query"
|
|
2700
|
+
];
|
|
2701
|
+
const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
|
|
2702
|
+
const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
|
|
2703
|
+
const MAX_JUDGE_SUMMARY_STEPS = 24;
|
|
2704
|
+
const JUDGE_SUMMARY_HEAD_STEPS = 12;
|
|
2705
|
+
const JUDGE_SUMMARY_TAIL_STEPS = 12;
|
|
2706
|
+
function getStringAttribute(attributes, keys) {
|
|
2707
|
+
for (const key of keys) {
|
|
2708
|
+
const value = attributes[key];
|
|
2709
|
+
if (typeof value === "string" && value.trim()) return value.trim();
|
|
2710
|
+
}
|
|
2711
|
+
}
|
|
2712
|
+
function normalizeStructuredAttribute(value) {
|
|
2713
|
+
if (value === void 0 || value === null) return;
|
|
2714
|
+
if (typeof value === "string") {
|
|
2715
|
+
const trimmed = value.trim();
|
|
2716
|
+
if (!trimmed) return;
|
|
2717
|
+
try {
|
|
2718
|
+
return JSON.parse(trimmed);
|
|
2719
|
+
} catch {
|
|
2720
|
+
return trimmed;
|
|
2721
|
+
}
|
|
2722
|
+
}
|
|
2723
|
+
if (typeof value === "number" || typeof value === "boolean" || typeof value === "object") return value;
|
|
2724
|
+
}
|
|
2725
|
+
function hasSameStatus(left, right) {
|
|
2726
|
+
return left?.code === right?.code && left?.message === right?.message;
|
|
2727
|
+
}
|
|
2728
|
+
function isSearchLikeSpan(span) {
|
|
2729
|
+
const attributes = span.attributes || {};
|
|
2730
|
+
if (SEARCH_SPAN_NAME_PATTERN.test(span.name) || span.name.startsWith("search ")) return true;
|
|
2731
|
+
return Object.keys(attributes).some((key) => key !== "query" && /(^|[._])(search|lookup|retriev(?:e|al))($|[._])/i.test(key));
|
|
2732
|
+
}
|
|
2733
|
+
function getTrajectoryStepStatus(step) {
|
|
2734
|
+
if (step.statusCode === void 0 || step.statusCode === 0) return;
|
|
2735
|
+
return {
|
|
2736
|
+
code: step.statusCode,
|
|
2737
|
+
...step.statusMessage ? { message: step.statusMessage } : {}
|
|
2738
|
+
};
|
|
2739
|
+
}
|
|
2740
|
+
function getCommandExecutable(command) {
|
|
2741
|
+
return command.trim().split(/\s+/)[0] || void 0;
|
|
2742
|
+
}
|
|
2743
|
+
function extractToolName(span) {
|
|
2744
|
+
const attributes = span.attributes || {};
|
|
2745
|
+
const directMatch = getStringAttribute(attributes, TOOL_ATTRIBUTE_KEYS);
|
|
2746
|
+
if (directMatch) return directMatch;
|
|
2747
|
+
for (const [key, value] of Object.entries(attributes)) {
|
|
2748
|
+
if (typeof value !== "string" || !value.trim()) continue;
|
|
2749
|
+
if (/tool.?name|function.?name/i.test(key)) return value.trim();
|
|
2750
|
+
if (/(^|[._])tool($|[._])/i.test(key) && !/result|output/i.test(key)) return value.trim();
|
|
2751
|
+
}
|
|
2752
|
+
if (span.name.startsWith("mcp ")) {
|
|
2753
|
+
const slashIndex = span.name.lastIndexOf("/");
|
|
2754
|
+
if (slashIndex !== -1 && slashIndex < span.name.length - 1) return span.name.slice(slashIndex + 1).trim();
|
|
2755
|
+
}
|
|
2756
|
+
}
|
|
2757
|
+
function extractToolArgs(span) {
|
|
2758
|
+
const attributes = span.attributes || {};
|
|
2759
|
+
for (const key of TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
|
|
2760
|
+
const value = normalizeStructuredAttribute(attributes[key]);
|
|
2761
|
+
if (value !== void 0) return value;
|
|
2762
|
+
}
|
|
2763
|
+
for (const [key, rawValue] of Object.entries(attributes)) {
|
|
2764
|
+
if (/result|output|error|status/i.test(key)) continue;
|
|
2765
|
+
if (!/(^|[._])(arguments|args|input)($|[._])/i.test(key)) continue;
|
|
2766
|
+
const value = normalizeStructuredAttribute(rawValue);
|
|
2767
|
+
if (value !== void 0) return value;
|
|
2768
|
+
}
|
|
2769
|
+
}
|
|
2770
|
+
function extractCommand(span) {
|
|
2771
|
+
const attributes = span.attributes || {};
|
|
2772
|
+
const directMatch = getStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
|
|
2773
|
+
if (directMatch) return directMatch;
|
|
2774
|
+
for (const [key, value] of Object.entries(attributes)) {
|
|
2775
|
+
if (typeof value !== "string" || !value.trim()) continue;
|
|
2776
|
+
if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
|
|
2777
|
+
}
|
|
2778
|
+
if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
|
|
2779
|
+
}
|
|
2780
|
+
function extractSearchQuery(span) {
|
|
2781
|
+
const attributes = span.attributes || {};
|
|
2782
|
+
const directMatch = getStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
|
|
2783
|
+
if (directMatch) return directMatch;
|
|
2784
|
+
const genericQuery = getStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
|
|
2785
|
+
if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
|
|
2786
|
+
if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
|
|
2787
|
+
}
|
|
2788
|
+
function isReasoningSpan(span) {
|
|
2789
|
+
if ((span.attributes || {})["codex.item.type"] === "reasoning") return true;
|
|
2790
|
+
return /^reasoning([_\s]|$)/i.test(span.name) || span.name === "reasoning";
|
|
2791
|
+
}
|
|
2792
|
+
function isMessageSpan(span) {
|
|
2793
|
+
if ((span.attributes || {})["codex.item.type"] === "agent_message") return true;
|
|
2794
|
+
return span.name === "agent response" || span.name === "send input";
|
|
2795
|
+
}
|
|
2796
|
+
function extractTrajectorySteps(trace) {
|
|
2797
|
+
return [...trace.spans || []].map((span, index) => ({
|
|
2798
|
+
span,
|
|
2799
|
+
index
|
|
2800
|
+
})).sort((left, right) => {
|
|
2801
|
+
const timeDiff = left.span.startTime - right.span.startTime;
|
|
2802
|
+
if (timeDiff !== 0) return timeDiff;
|
|
2803
|
+
const endDiff = (left.span.endTime ?? left.span.startTime) - (right.span.endTime ?? right.span.startTime);
|
|
2804
|
+
if (endDiff !== 0) return endDiff;
|
|
2805
|
+
return left.index - right.index;
|
|
2806
|
+
}).map(({ span }) => {
|
|
2807
|
+
const toolName = extractToolName(span);
|
|
2808
|
+
const command = extractCommand(span);
|
|
2809
|
+
const searchQuery = extractSearchQuery(span);
|
|
2810
|
+
let type = "span";
|
|
2811
|
+
let name = span.name;
|
|
2812
|
+
const aliases = new Set([span.name]);
|
|
2813
|
+
let args;
|
|
2814
|
+
if (toolName) {
|
|
2815
|
+
type = "tool";
|
|
2816
|
+
name = toolName;
|
|
2817
|
+
aliases.add(toolName);
|
|
2818
|
+
args = extractToolArgs(span);
|
|
2819
|
+
} else if (command) {
|
|
2820
|
+
type = "command";
|
|
2821
|
+
name = command;
|
|
2822
|
+
aliases.add(command);
|
|
2823
|
+
const executable = getCommandExecutable(command);
|
|
2824
|
+
if (executable) aliases.add(executable);
|
|
2825
|
+
} else if (searchQuery) {
|
|
2826
|
+
type = "search";
|
|
2827
|
+
name = searchQuery;
|
|
2828
|
+
aliases.add(searchQuery);
|
|
2829
|
+
} else if (isReasoningSpan(span)) {
|
|
2830
|
+
type = "reasoning";
|
|
2831
|
+
name = span.name;
|
|
2832
|
+
aliases.add("reasoning");
|
|
2833
|
+
} else if (isMessageSpan(span)) {
|
|
2834
|
+
type = "message";
|
|
2835
|
+
name = span.name;
|
|
2836
|
+
aliases.add("message");
|
|
2837
|
+
}
|
|
2838
|
+
return {
|
|
2839
|
+
aliases: [...aliases],
|
|
2840
|
+
...args === void 0 ? {} : { args },
|
|
2841
|
+
attributes: span.attributes || {},
|
|
2842
|
+
endTime: span.endTime,
|
|
2843
|
+
name,
|
|
2844
|
+
spanId: span.spanId,
|
|
2845
|
+
spanName: span.name,
|
|
2846
|
+
startTime: span.startTime,
|
|
2847
|
+
statusCode: span.statusCode,
|
|
2848
|
+
statusMessage: span.statusMessage,
|
|
2849
|
+
type
|
|
2850
|
+
};
|
|
2851
|
+
});
|
|
2852
|
+
}
|
|
2853
|
+
function normalizeTrajectoryMatcher(matcher, defaultType) {
|
|
2854
|
+
if (typeof matcher === "string") return {
|
|
2855
|
+
pattern: matcher,
|
|
2856
|
+
...defaultType ? { type: defaultType } : {}
|
|
2857
|
+
};
|
|
2858
|
+
return {
|
|
2859
|
+
...matcher,
|
|
2860
|
+
...matcher.type ? {} : defaultType ? { type: defaultType } : {}
|
|
2861
|
+
};
|
|
2862
|
+
}
|
|
2863
|
+
function matchesTrajectoryStep(step, matcher, defaultType) {
|
|
2864
|
+
const { type, pattern, name } = normalizeTrajectoryMatcher(matcher, defaultType);
|
|
2865
|
+
if (type) {
|
|
2866
|
+
if (!(Array.isArray(type) ? type : [type]).includes(step.type)) return false;
|
|
2867
|
+
}
|
|
2868
|
+
const matchPattern = pattern || name;
|
|
2869
|
+
if (!matchPattern) return true;
|
|
2870
|
+
return step.aliases.some((alias) => matchesPattern(alias, matchPattern));
|
|
2871
|
+
}
|
|
2872
|
+
function formatTrajectoryStep(step) {
|
|
2873
|
+
return `${step.type}:${step.name}`;
|
|
2874
|
+
}
|
|
2875
|
+
function formatTrajectoryArgs(args) {
|
|
2876
|
+
if (args === void 0) return "(none)";
|
|
2877
|
+
try {
|
|
2878
|
+
const serialized = JSON.stringify(args);
|
|
2879
|
+
if (serialized !== void 0) return serialized;
|
|
2880
|
+
} catch {}
|
|
2881
|
+
return String(args);
|
|
2882
|
+
}
|
|
2883
|
+
function compactJudgeTrajectorySteps(steps) {
|
|
2884
|
+
const compacted = [];
|
|
2885
|
+
for (const step of steps) {
|
|
2886
|
+
const previousStep = compacted[compacted.length - 1];
|
|
2887
|
+
if (previousStep && previousStep.type === step.type && previousStep.name === step.name && previousStep.spanName === step.spanName && hasSameStatus(previousStep.status, step.status)) {
|
|
2888
|
+
previousStep.collapsedCount = (previousStep.collapsedCount ?? 1) + 1;
|
|
2889
|
+
continue;
|
|
2890
|
+
}
|
|
2891
|
+
compacted.push(step);
|
|
2892
|
+
}
|
|
2893
|
+
return compacted;
|
|
2894
|
+
}
|
|
2895
|
+
function truncateJudgeTrajectorySteps(steps) {
|
|
2896
|
+
if (steps.length <= MAX_JUDGE_SUMMARY_STEPS) return steps;
|
|
2897
|
+
return [
|
|
2898
|
+
...steps.slice(0, JUDGE_SUMMARY_HEAD_STEPS),
|
|
2899
|
+
{ omittedCount: steps.length - MAX_JUDGE_SUMMARY_STEPS },
|
|
2900
|
+
...steps.slice(-JUDGE_SUMMARY_TAIL_STEPS)
|
|
2901
|
+
];
|
|
2902
|
+
}
|
|
2903
|
+
function summarizeTrajectoryForJudge(trace) {
|
|
2904
|
+
const rawSteps = extractTrajectorySteps(trace).map((step, index) => ({
|
|
2905
|
+
index: index + 1,
|
|
2906
|
+
type: step.type,
|
|
2907
|
+
name: step.name,
|
|
2908
|
+
...step.spanName === step.name ? {} : { spanName: step.spanName },
|
|
2909
|
+
...getTrajectoryStepStatus(step) ? { status: getTrajectoryStepStatus(step) } : {}
|
|
2910
|
+
}));
|
|
2911
|
+
const compactedSteps = compactJudgeTrajectorySteps(rawSteps);
|
|
2912
|
+
const steps = truncateJudgeTrajectorySteps(compactedSteps);
|
|
2913
|
+
return JSON.stringify({
|
|
2914
|
+
traceId: trace.traceId,
|
|
2915
|
+
stepCount: rawSteps.length,
|
|
2916
|
+
compactedStepCount: compactedSteps.length,
|
|
2917
|
+
steps
|
|
2918
|
+
}, null, 2);
|
|
2919
|
+
}
|
|
2920
|
+
//#endregion
|
|
2921
|
+
//#region src/assertions/trajectory.ts
|
|
2922
|
+
function getTraceOrThrow(params) {
|
|
2923
|
+
const trace = params.assertionValueContext.trace;
|
|
2924
|
+
if (!trace || !trace.spans) throw new Error(`No trace data available for ${params.baseType} assertion`);
|
|
2925
|
+
return trace;
|
|
2926
|
+
}
|
|
2927
|
+
function applyInverse(pass, inverse) {
|
|
2928
|
+
return inverse ? !pass : pass;
|
|
2929
|
+
}
|
|
2930
|
+
function formatStepList(stepLabels) {
|
|
2931
|
+
return stepLabels.length > 0 ? stepLabels.join(", ") : "(none)";
|
|
2932
|
+
}
|
|
2933
|
+
function requireNamedTrajectoryMatcher(matcher, assertionType, index) {
|
|
2934
|
+
if (matcher.pattern || matcher.name) return;
|
|
2935
|
+
const stepLabel = index === void 0 ? "object" : `step ${index + 1}`;
|
|
2936
|
+
throw new Error(`${assertionType} assertion ${stepLabel} must include a name or pattern property`);
|
|
2937
|
+
}
|
|
2938
|
+
function resolveGoalSuccessValue(value) {
|
|
2939
|
+
if (typeof value === "string" && value.trim()) return { goal: value.trim() };
|
|
2940
|
+
if (value && typeof value === "object" && !Array.isArray(value) && typeof value.goal === "string" && value.goal.trim()) return { goal: value.goal.trim() };
|
|
2941
|
+
throw new Error("trajectory:goal-success assertion must have a string value or an object with a goal property");
|
|
2942
|
+
}
|
|
2943
|
+
function resolveToolMatchers(value) {
|
|
2944
|
+
if (typeof value === "string") return {
|
|
2945
|
+
kind: "list",
|
|
2946
|
+
matchers: [normalizeTrajectoryMatcher(value, "tool")]
|
|
2947
|
+
};
|
|
2948
|
+
if (Array.isArray(value) && value.every((item) => typeof item === "string")) return {
|
|
2949
|
+
kind: "list",
|
|
2950
|
+
matchers: value.map((item) => normalizeTrajectoryMatcher(item, "tool"))
|
|
2951
|
+
};
|
|
2952
|
+
if (value && typeof value === "object" && !Array.isArray(value)) return {
|
|
2953
|
+
kind: "count",
|
|
2954
|
+
matcher: {
|
|
2955
|
+
...normalizeTrajectoryMatcher(value, "tool"),
|
|
2956
|
+
max: typeof value.max === "number" ? value.max : void 0,
|
|
2957
|
+
min: typeof value.min === "number" ? value.min : void 0
|
|
2958
|
+
}
|
|
2959
|
+
};
|
|
2960
|
+
throw new Error("trajectory:tool-used assertion must have a string, string array, or object value");
|
|
2961
|
+
}
|
|
2962
|
+
const handleTrajectoryToolUsed = (params) => {
|
|
2963
|
+
const steps = extractTrajectorySteps(getTraceOrThrow(params)).filter((step) => step.type === "tool");
|
|
2964
|
+
const expected = resolveToolMatchers(params.renderedValue ?? params.assertion.value);
|
|
2965
|
+
if (expected.kind === "list") {
|
|
2966
|
+
if (expected.matchers.length === 0) throw new Error("trajectory:tool-used assertion requires at least one expected tool");
|
|
2967
|
+
const missing = expected.matchers.filter((matcher) => !steps.some((step) => matchesTrajectoryStep(step, matcher)));
|
|
2968
|
+
const matched = expected.matchers.filter((matcher) => steps.some((step) => matchesTrajectoryStep(step, matcher)));
|
|
2969
|
+
const pass = params.inverse ? matched.length === 0 : missing.length === 0;
|
|
2970
|
+
const actualTools = steps.map(formatTrajectoryStep);
|
|
2971
|
+
const expectedTools = expected.matchers.map((matcher) => matcher.pattern || matcher.name || "*");
|
|
2972
|
+
let reason;
|
|
2973
|
+
if (params.inverse) reason = pass ? `Forbidden tool(s) were not used: ${expectedTools.join(", ")}` : `Forbidden tool(s) were used: ${matched.map((matcher) => matcher.pattern || matcher.name || "*").join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
|
|
2974
|
+
else if (pass) reason = `Observed required tool(s): ${expectedTools.join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
|
|
2975
|
+
else reason = `Missing required tool(s): ${missing.map((matcher) => matcher.pattern || matcher.name || "*").join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
|
|
2976
|
+
return {
|
|
2977
|
+
pass,
|
|
2978
|
+
score: pass ? 1 : 0,
|
|
2979
|
+
reason,
|
|
2980
|
+
assertion: params.assertion
|
|
2981
|
+
};
|
|
2982
|
+
}
|
|
2983
|
+
const matcher = expected.matcher;
|
|
2984
|
+
const min = matcher.min ?? 1;
|
|
2985
|
+
const max = matcher.max;
|
|
2986
|
+
if (!matcher.pattern && !matcher.name) throw new Error("trajectory:tool-used assertion object must include a name or pattern property");
|
|
2987
|
+
const matchingSteps = steps.filter((step) => matchesTrajectoryStep(step, matcher));
|
|
2988
|
+
const count = matchingSteps.length;
|
|
2989
|
+
const basePass = count >= min && (max === void 0 || count <= max);
|
|
2990
|
+
const pass = applyInverse(basePass, params.inverse);
|
|
2991
|
+
const matcherLabel = matcher.pattern || matcher.name || "*";
|
|
2992
|
+
let reason = `Matched tool "${matcherLabel}" ${count} time(s)`;
|
|
2993
|
+
if (max === void 0) reason += ` (expected at least ${min})`;
|
|
2994
|
+
else reason += ` (expected ${min}-${max})`;
|
|
2995
|
+
if (matchingSteps.length > 0) reason += `. Matches: ${matchingSteps.map(formatTrajectoryStep).join(", ")}`;
|
|
2996
|
+
if (params.inverse) reason = basePass ? `Tool "${matcherLabel}" matched ${count} time(s), which violates the inverse assertion` : `Tool "${matcherLabel}" did not satisfy the forbidden match condition`;
|
|
2997
|
+
return {
|
|
2998
|
+
pass,
|
|
2999
|
+
score: pass ? 1 : 0,
|
|
3000
|
+
reason,
|
|
3001
|
+
assertion: params.assertion
|
|
3002
|
+
};
|
|
3003
|
+
};
|
|
3004
|
+
function resolveSequenceValue(value) {
|
|
3005
|
+
if (Array.isArray(value)) return {
|
|
3006
|
+
mode: "in_order",
|
|
3007
|
+
steps: value
|
|
3008
|
+
};
|
|
3009
|
+
if (value && typeof value === "object" && !Array.isArray(value)) {
|
|
3010
|
+
const sequenceValue = value;
|
|
3011
|
+
return {
|
|
3012
|
+
mode: sequenceValue.mode || "in_order",
|
|
3013
|
+
steps: sequenceValue.steps || []
|
|
3014
|
+
};
|
|
3015
|
+
}
|
|
3016
|
+
throw new Error("trajectory:tool-sequence assertion must have an array or object value");
|
|
3017
|
+
}
|
|
3018
|
+
function isRecord(value) {
|
|
3019
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
3020
|
+
}
|
|
3021
|
+
function matchesExpectedArgsPartial(actual, expected) {
|
|
3022
|
+
if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
|
|
3023
|
+
if (isRecord(expected)) {
|
|
3024
|
+
if (!isRecord(actual)) return false;
|
|
3025
|
+
return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
|
|
3026
|
+
}
|
|
3027
|
+
return (0, node_util.isDeepStrictEqual)(actual, expected);
|
|
3028
|
+
}
|
|
3029
|
+
function matchesToolArgs(actual, expected, mode) {
|
|
3030
|
+
if (mode === "exact") return (0, node_util.isDeepStrictEqual)(actual, expected);
|
|
3031
|
+
return matchesExpectedArgsPartial(actual, expected);
|
|
3032
|
+
}
|
|
3033
|
+
function resolveToolArgsMatchMode(mode) {
|
|
3034
|
+
if (mode === void 0) return "partial";
|
|
3035
|
+
if (mode === "partial" || mode === "exact") return mode;
|
|
3036
|
+
throw new Error("trajectory:tool-args-match assertion mode must be \"partial\" or \"exact\"");
|
|
3037
|
+
}
|
|
3038
|
+
function resolveToolArgsMatchValue(value) {
|
|
3039
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) throw new Error("trajectory:tool-args-match assertion must have an object value");
|
|
3040
|
+
const matcher = normalizeTrajectoryMatcher(value, "tool");
|
|
3041
|
+
requireNamedTrajectoryMatcher(matcher, "trajectory:tool-args-match");
|
|
3042
|
+
const expectedArgs = Object.prototype.hasOwnProperty.call(value, "args") ? value.args : value.arguments;
|
|
3043
|
+
if (expectedArgs === void 0) throw new Error("trajectory:tool-args-match assertion must include an args or arguments property");
|
|
3044
|
+
return {
|
|
3045
|
+
matcher,
|
|
3046
|
+
expectedArgs,
|
|
3047
|
+
mode: resolveToolArgsMatchMode(value.mode)
|
|
3048
|
+
};
|
|
3049
|
+
}
|
|
3050
|
+
const handleTrajectoryToolSequence = (params) => {
|
|
3051
|
+
const toolSteps = extractTrajectorySteps(getTraceOrThrow(params)).filter((step) => step.type === "tool");
|
|
3052
|
+
const value = resolveSequenceValue(params.renderedValue ?? params.assertion.value);
|
|
3053
|
+
const expectedMatchers = value.steps.map((step, index) => {
|
|
3054
|
+
const matcher = normalizeTrajectoryMatcher(step, "tool");
|
|
3055
|
+
requireNamedTrajectoryMatcher(matcher, "trajectory:tool-sequence", index);
|
|
3056
|
+
return matcher;
|
|
3057
|
+
});
|
|
3058
|
+
if (expectedMatchers.length === 0) throw new Error("trajectory:tool-sequence assertion requires at least one expected step");
|
|
3059
|
+
const actualTools = toolSteps.map(formatTrajectoryStep);
|
|
3060
|
+
let basePass = false;
|
|
3061
|
+
let reason = "";
|
|
3062
|
+
if (value.mode === "exact") {
|
|
3063
|
+
basePass = toolSteps.length === expectedMatchers.length && expectedMatchers.every((matcher, index) => matchesTrajectoryStep(toolSteps[index], matcher));
|
|
3064
|
+
if (basePass) reason = `Observed exact tool sequence: ${formatStepList(actualTools)}`;
|
|
3065
|
+
else reason = `Expected exact tool sequence of ${expectedMatchers.map((matcher) => matcher.pattern || matcher.name || "*").join(", ")}, but actual tools were ${formatStepList(actualTools)}`;
|
|
3066
|
+
} else {
|
|
3067
|
+
let expectedIndex = 0;
|
|
3068
|
+
const matchedSteps = [];
|
|
3069
|
+
for (const step of toolSteps) {
|
|
3070
|
+
if (expectedIndex >= expectedMatchers.length) break;
|
|
3071
|
+
if (matchesTrajectoryStep(step, expectedMatchers[expectedIndex])) {
|
|
3072
|
+
matchedSteps.push(formatTrajectoryStep(step));
|
|
3073
|
+
expectedIndex += 1;
|
|
3074
|
+
}
|
|
3075
|
+
}
|
|
3076
|
+
basePass = expectedIndex === expectedMatchers.length;
|
|
3077
|
+
if (basePass) reason = `Observed tool sequence in order: ${matchedSteps.join(", ")}. Actual tools: ${formatStepList(actualTools)}`;
|
|
3078
|
+
else reason = `Expected tool "${expectedMatchers[expectedIndex]?.pattern || expectedMatchers[expectedIndex]?.name || "*"}" was not observed in order. Actual tools: ${formatStepList(actualTools)}`;
|
|
3079
|
+
}
|
|
3080
|
+
const pass = applyInverse(basePass, params.inverse);
|
|
3081
|
+
if (params.inverse) reason = basePass ? `Forbidden tool sequence was observed. Actual tools: ${formatStepList(actualTools)}` : `Forbidden tool sequence was not observed`;
|
|
3082
|
+
return {
|
|
3083
|
+
pass,
|
|
3084
|
+
score: pass ? 1 : 0,
|
|
3085
|
+
reason,
|
|
3086
|
+
assertion: params.assertion
|
|
3087
|
+
};
|
|
3088
|
+
};
|
|
3089
|
+
const handleTrajectoryToolArgsMatch = (params) => {
|
|
3090
|
+
const toolSteps = extractTrajectorySteps(getTraceOrThrow(params)).filter((step) => step.type === "tool");
|
|
3091
|
+
const { matcher, expectedArgs, mode } = resolveToolArgsMatchValue(params.renderedValue ?? params.assertion.value);
|
|
3092
|
+
const matcherLabel = matcher.pattern || matcher.name || "*";
|
|
3093
|
+
const actualTools = toolSteps.map(formatTrajectoryStep);
|
|
3094
|
+
const matchingSteps = toolSteps.filter((step) => matchesTrajectoryStep(step, matcher));
|
|
3095
|
+
const stepsWithArgs = matchingSteps.filter((step) => step.args !== void 0);
|
|
3096
|
+
const matchedStep = stepsWithArgs.find((step) => matchesToolArgs(step.args, expectedArgs, mode));
|
|
3097
|
+
const basePass = matchedStep !== void 0;
|
|
3098
|
+
const pass = applyInverse(basePass, params.inverse);
|
|
3099
|
+
const expectedArgsLabel = formatTrajectoryArgs(expectedArgs);
|
|
3100
|
+
const observedArgsLabel = stepsWithArgs.length > 0 ? stepsWithArgs.map((step) => formatTrajectoryArgs(step.args)).join(", ") : "(none)";
|
|
3101
|
+
let reason;
|
|
3102
|
+
if (params.inverse) if (basePass) reason = `Forbidden argument match for tool "${matcherLabel}" was observed on ${formatTrajectoryStep(matchedStep)}. Args: ${formatTrajectoryArgs(matchedStep.args)}`;
|
|
3103
|
+
else if (matchingSteps.length === 0) reason = `Forbidden argument match for tool "${matcherLabel}" was not observed because no tool call matched it`;
|
|
3104
|
+
else reason = `Forbidden argument match for tool "${matcherLabel}" was not observed. Observed args: ${observedArgsLabel}`;
|
|
3105
|
+
else if (basePass) reason = `Tool "${matcherLabel}" matched expected arguments (${mode}) on ${formatTrajectoryStep(matchedStep)}. Args: ${formatTrajectoryArgs(matchedStep.args)}`;
|
|
3106
|
+
else if (matchingSteps.length === 0) reason = `No tool call matched "${matcherLabel}". Actual tools: ${formatStepList(actualTools)}`;
|
|
3107
|
+
else if (stepsWithArgs.length === 0) reason = `Tool "${matcherLabel}" was observed but no arguments were captured. Actual tools: ${formatStepList(actualTools)}`;
|
|
3108
|
+
else reason = `No call to tool "${matcherLabel}" matched expected arguments (${mode}): ${expectedArgsLabel}. Observed args: ${observedArgsLabel}`;
|
|
3109
|
+
return {
|
|
3110
|
+
pass,
|
|
3111
|
+
score: pass ? 1 : 0,
|
|
3112
|
+
reason,
|
|
3113
|
+
assertion: params.assertion
|
|
3114
|
+
};
|
|
3115
|
+
};
|
|
3116
|
+
function resolveStepCountValue(value) {
|
|
3117
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) throw new Error("trajectory:step-count assertion must have an object value");
|
|
3118
|
+
return {
|
|
3119
|
+
...normalizeTrajectoryMatcher(value),
|
|
3120
|
+
max: typeof value.max === "number" ? value.max : void 0,
|
|
3121
|
+
min: typeof value.min === "number" ? value.min : void 0
|
|
3122
|
+
};
|
|
3123
|
+
}
|
|
3124
|
+
const handleTrajectoryStepCount = (params) => {
|
|
3125
|
+
const steps = extractTrajectorySteps(getTraceOrThrow(params));
|
|
3126
|
+
const matcher = resolveStepCountValue(params.renderedValue ?? params.assertion.value);
|
|
3127
|
+
const { min, max } = matcher;
|
|
3128
|
+
if (min === void 0 && max === void 0) throw new Error("trajectory:step-count assertion must include a min or max property");
|
|
3129
|
+
const matchingSteps = steps.filter((step) => matchesTrajectoryStep(step, matcher));
|
|
3130
|
+
const count = matchingSteps.length;
|
|
3131
|
+
const basePass = (min === void 0 || count >= min) && (max === void 0 || count <= max);
|
|
3132
|
+
const pass = applyInverse(basePass, params.inverse);
|
|
3133
|
+
const filterParts = [];
|
|
3134
|
+
if (matcher.type) {
|
|
3135
|
+
const types = Array.isArray(matcher.type) ? matcher.type : [matcher.type];
|
|
3136
|
+
filterParts.push(`type=${types.join("|")}`);
|
|
3137
|
+
}
|
|
3138
|
+
const pattern = matcher.pattern || matcher.name;
|
|
3139
|
+
if (pattern) filterParts.push(`pattern=${pattern}`);
|
|
3140
|
+
let reason = `Matched ${count} trajectory step(s)`;
|
|
3141
|
+
if (filterParts.length > 0) reason += ` for ${filterParts.join(", ")}`;
|
|
3142
|
+
if (min !== void 0 && max !== void 0) reason += ` (expected ${min}-${max})`;
|
|
3143
|
+
else if (min !== void 0) reason += ` (expected at least ${min})`;
|
|
3144
|
+
else if (max !== void 0) reason += ` (expected at most ${max})`;
|
|
3145
|
+
if (matchingSteps.length > 0) reason += `. Matches: ${matchingSteps.map(formatTrajectoryStep).join(", ")}`;
|
|
3146
|
+
if (params.inverse) reason = basePass ? `Trajectory step count satisfied the forbidden range` : `Trajectory step count did not satisfy the forbidden range`;
|
|
3147
|
+
return {
|
|
3148
|
+
pass,
|
|
3149
|
+
score: pass ? 1 : 0,
|
|
3150
|
+
reason,
|
|
3151
|
+
assertion: params.assertion
|
|
3152
|
+
};
|
|
3153
|
+
};
|
|
3154
|
+
const handleTrajectoryGoalSuccess = async (params) => {
|
|
3155
|
+
const trace = getTraceOrThrow(params);
|
|
3156
|
+
const { goal } = resolveGoalSuccessValue(params.renderedValue ?? params.assertion.value);
|
|
3157
|
+
const result = await require_graders.matchesTrajectoryGoalSuccess(goal, summarizeTrajectoryForJudge(trace), params.outputString, params.test.options, params.assertionValueContext.vars, params.assertion, params.providerCallContext);
|
|
3158
|
+
if (!params.inverse) return result;
|
|
3159
|
+
return {
|
|
3160
|
+
...result,
|
|
3161
|
+
assertion: params.assertion,
|
|
3162
|
+
pass: !result.pass,
|
|
3163
|
+
score: result.pass ? 0 : 1,
|
|
3164
|
+
reason: result.pass ? `Agent unexpectedly achieved the goal: ${goal}` : `Agent did not achieve the forbidden goal: ${goal}`
|
|
3165
|
+
};
|
|
3166
|
+
};
|
|
3167
|
+
//#endregion
|
|
2543
3168
|
//#region src/assertions/webhook.ts
|
|
2544
3169
|
async function handleWebhook({ assertion, renderedValue, test, prompt, output, inverse }) {
|
|
2545
3170
|
require_invariant.invariant(renderedValue, "\"webhook\" assertion type must have a URL value");
|
|
@@ -2608,18 +3233,18 @@ const handleWordCount = ({ assertion, renderedValue, valueFromScript, outputStri
|
|
|
2608
3233
|
if (pass) reason = "Assertion passed";
|
|
2609
3234
|
else if (inverse) reason = `Expected word count to not be between ${min} and ${max}, but got ${wordCount}`;
|
|
2610
3235
|
else reason = `Word count ${wordCount} is not between ${min} and ${max}`;
|
|
2611
|
-
} else if (min
|
|
2612
|
-
const basePass = wordCount >= min;
|
|
2613
|
-
pass = inverse ? !basePass : basePass;
|
|
2614
|
-
if (pass) reason = "Assertion passed";
|
|
2615
|
-
else if (inverse) reason = `Expected word count to be less than ${min}, but got ${wordCount}`;
|
|
2616
|
-
else reason = `Word count ${wordCount} is less than minimum ${min}`;
|
|
2617
|
-
} else {
|
|
3236
|
+
} else if (min === void 0) {
|
|
2618
3237
|
const basePass = wordCount <= max;
|
|
2619
3238
|
pass = inverse ? !basePass : basePass;
|
|
2620
3239
|
if (pass) reason = "Assertion passed";
|
|
2621
3240
|
else if (inverse) reason = `Expected word count to be greater than ${max}, but got ${wordCount}`;
|
|
2622
3241
|
else reason = `Word count ${wordCount} is greater than maximum ${max}`;
|
|
3242
|
+
} else {
|
|
3243
|
+
const basePass = wordCount >= min;
|
|
3244
|
+
pass = inverse ? !basePass : basePass;
|
|
3245
|
+
if (pass) reason = "Assertion passed";
|
|
3246
|
+
else if (inverse) reason = `Expected word count to be less than ${min}, but got ${wordCount}`;
|
|
3247
|
+
else reason = `Word count ${wordCount} is less than minimum ${min}`;
|
|
2623
3248
|
}
|
|
2624
3249
|
} else {
|
|
2625
3250
|
require_invariant.invariant(typeof value === "number" || typeof value === "string" && !Number.isNaN(Number(value)), "\"word-count\" assertion value must be a number or an object with min/max properties");
|
|
@@ -2714,6 +3339,12 @@ const handleIsXml = ({ assertion, renderedValue, outputString, inverse, baseType
|
|
|
2714
3339
|
//#endregion
|
|
2715
3340
|
//#region src/assertions/index.ts
|
|
2716
3341
|
const ASSERTIONS_MAX_CONCURRENCY = require_logger.getEnvInt("PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY", 3);
|
|
3342
|
+
const DEFAULT_TRACE_FETCH_MAX_ATTEMPTS = 6;
|
|
3343
|
+
const DEFAULT_TRACE_FETCH_RETRY_DELAY_MS = 250;
|
|
3344
|
+
const DEFAULT_TRACE_FETCH_STABLE_POLLS = 2;
|
|
3345
|
+
const MAX_TRACE_FETCH_MAX_ATTEMPTS = 30;
|
|
3346
|
+
const MAX_TRACE_FETCH_RETRY_DELAY_MS = 5e3;
|
|
3347
|
+
const MAX_TRACE_FETCH_STABLE_POLLS = 10;
|
|
2717
3348
|
const MODEL_GRADED_ASSERTION_TYPES = new Set([
|
|
2718
3349
|
"answer-relevance",
|
|
2719
3350
|
"context-faithfulness",
|
|
@@ -2723,8 +3354,57 @@ const MODEL_GRADED_ASSERTION_TYPES = new Set([
|
|
|
2723
3354
|
"llm-rubric",
|
|
2724
3355
|
"model-graded-closedqa",
|
|
2725
3356
|
"model-graded-factuality",
|
|
2726
|
-
"search-rubric"
|
|
3357
|
+
"search-rubric",
|
|
3358
|
+
"trajectory:goal-success"
|
|
3359
|
+
]);
|
|
3360
|
+
const TRACE_AWARE_ASSERTION_TYPES = new Set([
|
|
3361
|
+
"javascript",
|
|
3362
|
+
"python",
|
|
3363
|
+
"ruby",
|
|
3364
|
+
"trace-error-spans",
|
|
3365
|
+
"trace-span-count",
|
|
3366
|
+
"trace-span-duration",
|
|
3367
|
+
"trajectory:goal-success",
|
|
3368
|
+
"trajectory:step-count",
|
|
3369
|
+
"trajectory:tool-args-match",
|
|
3370
|
+
"trajectory:tool-sequence",
|
|
3371
|
+
"trajectory:tool-used"
|
|
2727
3372
|
]);
|
|
3373
|
+
function assertionUsesTrace(assertion) {
|
|
3374
|
+
if (assertion.type === "assert-set") return assertion.assert.some(assertionUsesTrace);
|
|
3375
|
+
return TRACE_AWARE_ASSERTION_TYPES.has(getAssertionBaseType(assertion));
|
|
3376
|
+
}
|
|
3377
|
+
function assertionMayNeedTraceContext(assertion) {
|
|
3378
|
+
if (assertionUsesTrace(assertion)) return true;
|
|
3379
|
+
if (assertion.type === "assert-set") return assertion.assert.some(assertionMayNeedTraceContext);
|
|
3380
|
+
return typeof assertion.value === "string" ? assertion.value.startsWith("file://") || require_providers.isPackagePath(assertion.value) : false;
|
|
3381
|
+
}
|
|
3382
|
+
function hasTraceAwareAssertions(assertions) {
|
|
3383
|
+
return Boolean(assertions?.some(assertionMayNeedTraceContext));
|
|
3384
|
+
}
|
|
3385
|
+
async function loadTraceData(traceId) {
|
|
3386
|
+
const traceStore = require_store.getTraceStore();
|
|
3387
|
+
const maxAttempts = Math.min(MAX_TRACE_FETCH_MAX_ATTEMPTS, Math.max(1, require_logger.getEnvInt("PROMPTFOO_TRACE_FETCH_MAX_ATTEMPTS", DEFAULT_TRACE_FETCH_MAX_ATTEMPTS)));
|
|
3388
|
+
const retryDelayMs = Math.min(MAX_TRACE_FETCH_RETRY_DELAY_MS, Math.max(0, require_logger.getEnvInt("PROMPTFOO_TRACE_FETCH_RETRY_DELAY_MS", DEFAULT_TRACE_FETCH_RETRY_DELAY_MS)));
|
|
3389
|
+
const stablePolls = Math.min(MAX_TRACE_FETCH_STABLE_POLLS, Math.max(1, require_logger.getEnvInt("PROMPTFOO_TRACE_FETCH_STABLE_POLLS", DEFAULT_TRACE_FETCH_STABLE_POLLS)));
|
|
3390
|
+
let lastSpanCount = -1;
|
|
3391
|
+
let stableObservations = 0;
|
|
3392
|
+
let latestTrace = null;
|
|
3393
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
3394
|
+
latestTrace = await traceStore.getTrace(traceId);
|
|
3395
|
+
const spanCount = latestTrace?.spans?.length ?? 0;
|
|
3396
|
+
if (spanCount > 0) {
|
|
3397
|
+
stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
|
|
3398
|
+
lastSpanCount = spanCount;
|
|
3399
|
+
if (stableObservations >= stablePolls || attempt === maxAttempts - 1) return latestTrace;
|
|
3400
|
+
} else {
|
|
3401
|
+
stableObservations = 0;
|
|
3402
|
+
lastSpanCount = spanCount;
|
|
3403
|
+
}
|
|
3404
|
+
if (attempt < maxAttempts - 1) await require_fetch.sleep(retryDelayMs);
|
|
3405
|
+
}
|
|
3406
|
+
return latestTrace;
|
|
3407
|
+
}
|
|
2728
3408
|
const ASSERTION_HANDLERS = {
|
|
2729
3409
|
"answer-relevance": handleAnswerRelevance,
|
|
2730
3410
|
bleu: handleBleuScore,
|
|
@@ -2787,12 +3467,18 @@ const ASSERTION_HANDLERS = {
|
|
|
2787
3467
|
ruby: handleRuby,
|
|
2788
3468
|
"rouge-n": handleRougeScore,
|
|
2789
3469
|
"search-rubric": handleSearchRubric,
|
|
3470
|
+
"skill-used": handleSkillUsed,
|
|
2790
3471
|
similar: handleSimilar,
|
|
2791
3472
|
"similar:cosine": handleSimilar,
|
|
2792
3473
|
"similar:dot": handleSimilar,
|
|
2793
3474
|
"similar:euclidean": handleSimilar,
|
|
2794
3475
|
"starts-with": handleStartsWith,
|
|
2795
3476
|
"tool-call-f1": handleToolCallF1,
|
|
3477
|
+
"trajectory:goal-success": handleTrajectoryGoalSuccess,
|
|
3478
|
+
"trajectory:tool-args-match": handleTrajectoryToolArgsMatch,
|
|
3479
|
+
"trajectory:step-count": handleTrajectoryStepCount,
|
|
3480
|
+
"trajectory:tool-sequence": handleTrajectoryToolSequence,
|
|
3481
|
+
"trajectory:tool-used": handleTrajectoryToolUsed,
|
|
2796
3482
|
"trace-error-spans": handleTraceErrorSpans,
|
|
2797
3483
|
"trace-span-count": handleTraceSpanCount,
|
|
2798
3484
|
"trace-span-duration": handleTraceSpanDuration,
|
|
@@ -2835,7 +3521,7 @@ function isAssertionInverse(assertion) {
|
|
|
2835
3521
|
function getAssertionBaseType(assertion) {
|
|
2836
3522
|
return isAssertionInverse(assertion) ? assertion.type.slice(4) : assertion.type;
|
|
2837
3523
|
}
|
|
2838
|
-
async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs, providerResponse, traceId }) {
|
|
3524
|
+
async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs, providerResponse, traceId, traceData }) {
|
|
2839
3525
|
const resolvedVars = vars || test.vars || {};
|
|
2840
3526
|
const { cost, logProbs, output: originalOutput } = providerResponse;
|
|
2841
3527
|
let output = originalOutput;
|
|
@@ -2854,14 +3540,14 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
2854
3540
|
providerResponse,
|
|
2855
3541
|
...assertion.config ? { config: structuredClone(assertion.config) } : {}
|
|
2856
3542
|
};
|
|
2857
|
-
if (traceId) try {
|
|
2858
|
-
const
|
|
2859
|
-
if (
|
|
2860
|
-
traceId:
|
|
2861
|
-
evaluationId:
|
|
2862
|
-
testCaseId:
|
|
2863
|
-
metadata:
|
|
2864
|
-
spans:
|
|
3543
|
+
if (traceId && assertionMayNeedTraceContext(assertion)) try {
|
|
3544
|
+
const resolvedTraceData = traceData === void 0 ? await loadTraceData(traceId) : traceData;
|
|
3545
|
+
if (resolvedTraceData) context.trace = {
|
|
3546
|
+
traceId: resolvedTraceData.traceId,
|
|
3547
|
+
evaluationId: resolvedTraceData.evaluationId,
|
|
3548
|
+
testCaseId: resolvedTraceData.testCaseId,
|
|
3549
|
+
metadata: resolvedTraceData.metadata,
|
|
3550
|
+
spans: resolvedTraceData.spans || []
|
|
2865
3551
|
};
|
|
2866
3552
|
} catch (error) {
|
|
2867
3553
|
require_logger.logger.debug(`Failed to fetch trace data for assertion: ${error}`);
|
|
@@ -2894,7 +3580,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
2894
3580
|
};
|
|
2895
3581
|
}
|
|
2896
3582
|
else if (filePath.endsWith(".rb")) try {
|
|
2897
|
-
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-
|
|
3583
|
+
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-B1HXG4ej.cjs"));
|
|
2898
3584
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
2899
3585
|
require_logger.logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
2900
3586
|
} catch (error) {
|
|
@@ -3003,6 +3689,14 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
|
|
|
3003
3689
|
index: i
|
|
3004
3690
|
};
|
|
3005
3691
|
}).flat();
|
|
3692
|
+
const shouldPreloadTrace = !!traceId && hasTraceAwareAssertions(asserts.map(({ assertion }) => assertion));
|
|
3693
|
+
let preloadedTraceData;
|
|
3694
|
+
if (shouldPreloadTrace && traceId) try {
|
|
3695
|
+
preloadedTraceData = await loadTraceData(traceId);
|
|
3696
|
+
} catch (error) {
|
|
3697
|
+
require_logger.logger.debug(`Failed to preload trace data for assertions: ${error}`);
|
|
3698
|
+
preloadedTraceData = null;
|
|
3699
|
+
}
|
|
3006
3700
|
await async.default.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
|
|
3007
3701
|
if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
|
|
3008
3702
|
const result = await runAssertion({
|
|
@@ -3014,7 +3708,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
|
|
|
3014
3708
|
vars,
|
|
3015
3709
|
latencyMs,
|
|
3016
3710
|
assertIndex: index,
|
|
3017
|
-
traceId
|
|
3711
|
+
traceId,
|
|
3712
|
+
traceData: preloadedTraceData
|
|
3018
3713
|
});
|
|
3019
3714
|
assertResult.addResult({
|
|
3020
3715
|
index,
|
|
@@ -3160,7 +3855,7 @@ var CIProgressReporter = class {
|
|
|
3160
3855
|
else {
|
|
3161
3856
|
const eta = remaining / rate;
|
|
3162
3857
|
if (eta > 1440) etaDisplay = ">24 hours";
|
|
3163
|
-
else etaDisplay = `${Math.round(eta)} minute${Math.round(eta)
|
|
3858
|
+
else etaDisplay = `${Math.round(eta)} minute${Math.round(eta) === 1 ? "" : "s"}`;
|
|
3164
3859
|
}
|
|
3165
3860
|
const percentage = Math.floor(this.completedTests / this.totalTests * 100);
|
|
3166
3861
|
require_logger.logger.info(`[CI Progress] Evaluation running for ${this.formatElapsedTime(elapsed)} - Completed ${this.completedTests}/${this.totalTests} tests (${percentage}%)`);
|
|
@@ -3561,12 +4256,55 @@ function isPromptAllowed(prompt, allowedPrompts) {
|
|
|
3561
4256
|
var ProgressBarManager = class {
|
|
3562
4257
|
progressBar;
|
|
3563
4258
|
isWebUI;
|
|
4259
|
+
originalLogCallback = null;
|
|
4260
|
+
installedLogCallback = null;
|
|
4261
|
+
pendingRender = null;
|
|
3564
4262
|
totalCount = 0;
|
|
3565
4263
|
completedCount = 0;
|
|
3566
4264
|
concurrency = 1;
|
|
3567
4265
|
constructor(isWebUI) {
|
|
3568
4266
|
this.isWebUI = isWebUI;
|
|
3569
4267
|
}
|
|
4268
|
+
clearProgressBarLine() {
|
|
4269
|
+
readline.default.cursorTo(process.stderr, 0);
|
|
4270
|
+
readline.default.clearLine(process.stderr, 0);
|
|
4271
|
+
}
|
|
4272
|
+
scheduleRender() {
|
|
4273
|
+
if (!this.progressBar || this.pendingRender) return;
|
|
4274
|
+
this.pendingRender = setImmediate(() => {
|
|
4275
|
+
this.pendingRender = null;
|
|
4276
|
+
this.progressBar?.render();
|
|
4277
|
+
});
|
|
4278
|
+
}
|
|
4279
|
+
handleLogMessage() {
|
|
4280
|
+
if (!this.progressBar) return;
|
|
4281
|
+
this.clearProgressBarLine();
|
|
4282
|
+
this.scheduleRender();
|
|
4283
|
+
}
|
|
4284
|
+
/**
|
|
4285
|
+
* Coordinate console logging with the progress bar to prevent visual corruption.
|
|
4286
|
+
*/
|
|
4287
|
+
installLogInterceptor() {
|
|
4288
|
+
if (!this.progressBar || this.isWebUI || this.installedLogCallback) return;
|
|
4289
|
+
this.originalLogCallback = require_logger.globalLogCallback;
|
|
4290
|
+
this.installedLogCallback = (message) => {
|
|
4291
|
+
this.originalLogCallback?.(message);
|
|
4292
|
+
this.handleLogMessage();
|
|
4293
|
+
};
|
|
4294
|
+
require_logger.setLogCallback(this.installedLogCallback);
|
|
4295
|
+
}
|
|
4296
|
+
/**
|
|
4297
|
+
* Remove the log interceptor and restore original logger callback behavior.
|
|
4298
|
+
*/
|
|
4299
|
+
removeLogInterceptor() {
|
|
4300
|
+
if (this.pendingRender) {
|
|
4301
|
+
clearImmediate(this.pendingRender);
|
|
4302
|
+
this.pendingRender = null;
|
|
4303
|
+
}
|
|
4304
|
+
if (this.installedLogCallback && require_logger.globalLogCallback === this.installedLogCallback) require_logger.setLogCallback(this.originalLogCallback);
|
|
4305
|
+
this.installedLogCallback = null;
|
|
4306
|
+
this.originalLogCallback = null;
|
|
4307
|
+
}
|
|
3570
4308
|
/**
|
|
3571
4309
|
* Initialize progress bar
|
|
3572
4310
|
*/
|
|
@@ -3586,7 +4324,8 @@ var ProgressBarManager = class {
|
|
|
3586
4324
|
return `Evaluating [${bar}${spaces}] ${percentage}% | ${params.value}/${params.total}${errorsText} | ${payload.provider} ${payload.prompt} ${payload.vars}`;
|
|
3587
4325
|
},
|
|
3588
4326
|
hideCursor: true,
|
|
3589
|
-
gracefulExit: true
|
|
4327
|
+
gracefulExit: true,
|
|
4328
|
+
stream: process.stderr
|
|
3590
4329
|
}, cli_progress.default.Presets.shades_classic);
|
|
3591
4330
|
this.progressBar.start(this.totalCount, 0, {
|
|
3592
4331
|
provider: "",
|
|
@@ -3861,6 +4600,7 @@ async function runEval({ provider, prompt, test, testSuite, delay, nunjucksFilte
|
|
|
3861
4600
|
const parts = traceContext.traceparent.split("-");
|
|
3862
4601
|
if (parts.length >= 3) traceId = parts[1];
|
|
3863
4602
|
}
|
|
4603
|
+
if (traceId && hasTraceAwareAssertions(test.assert)) await flushOtel();
|
|
3864
4604
|
const checkResult = await runAssertions({
|
|
3865
4605
|
prompt: renderedPrompt,
|
|
3866
4606
|
provider,
|
|
@@ -4258,7 +4998,7 @@ var Evaluator = class {
|
|
|
4258
4998
|
const defaultProvider = testSuite.defaultTest.provider;
|
|
4259
4999
|
if (require_types.isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
|
|
4260
5000
|
else if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
4261
|
-
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-
|
|
5001
|
+
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-D-FnDg8k.cjs"));
|
|
4262
5002
|
testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
4263
5003
|
} else testCase.provider = defaultProvider;
|
|
4264
5004
|
}
|
|
@@ -4342,7 +5082,7 @@ var Evaluator = class {
|
|
|
4342
5082
|
if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
|
|
4343
5083
|
}
|
|
4344
5084
|
if (require_logger.state.resume && this.evalRecord.persisted) try {
|
|
4345
|
-
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-
|
|
5085
|
+
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-tGdilrWt.cjs"));
|
|
4346
5086
|
const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: require_logger.state.retryMode });
|
|
4347
5087
|
const originalCount = runEvalOptions.length;
|
|
4348
5088
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -4542,7 +5282,7 @@ var Evaluator = class {
|
|
|
4542
5282
|
if (require_logger.isCI() && !isWebUI) {
|
|
4543
5283
|
ciProgressReporter = new CIProgressReporter(runEvalOptions.length);
|
|
4544
5284
|
ciProgressReporter.start();
|
|
4545
|
-
} else if (this.options.showProgressBar && process.
|
|
5285
|
+
} else if (this.options.showProgressBar && process.stderr.isTTY) progressBarManager = new ProgressBarManager(isWebUI);
|
|
4546
5286
|
this.options.progressCallback = (completed, total, index, evalStep, metrics) => {
|
|
4547
5287
|
if (originalProgressCallback) originalProgressCallback(completed, total, index, evalStep, metrics);
|
|
4548
5288
|
if (isWebUI) {
|
|
@@ -4563,7 +5303,10 @@ var Evaluator = class {
|
|
|
4563
5303
|
if (serialRunEvalOptions.length > 0) require_logger.logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
|
|
4564
5304
|
if (concurrentRunEvalOptions.length > 0) require_logger.logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
|
|
4565
5305
|
}
|
|
4566
|
-
if (this.options.showProgressBar && progressBarManager)
|
|
5306
|
+
if (this.options.showProgressBar && progressBarManager) {
|
|
5307
|
+
await progressBarManager.initialize(runEvalOptions, concurrency, 0);
|
|
5308
|
+
progressBarManager.installLogInterceptor();
|
|
5309
|
+
}
|
|
4567
5310
|
try {
|
|
4568
5311
|
if (serialRunEvalOptions.length > 0) for (const evalStep of serialRunEvalOptions) {
|
|
4569
5312
|
checkAbort();
|
|
@@ -4589,7 +5332,10 @@ var Evaluator = class {
|
|
|
4589
5332
|
else if (!targetUnavailable) {
|
|
4590
5333
|
require_logger.logger.info("Evaluation interrupted, saving progress...");
|
|
4591
5334
|
if (globalTimeout) clearTimeout(globalTimeout);
|
|
4592
|
-
if (progressBarManager)
|
|
5335
|
+
if (progressBarManager) {
|
|
5336
|
+
progressBarManager.removeLogInterceptor();
|
|
5337
|
+
progressBarManager.stop();
|
|
5338
|
+
}
|
|
4593
5339
|
if (ciProgressReporter) ciProgressReporter.finish();
|
|
4594
5340
|
this.evalRecord.setVars(Array.from(vars));
|
|
4595
5341
|
await this.evalRecord.addPrompts(prompts);
|
|
@@ -4597,6 +5343,10 @@ var Evaluator = class {
|
|
|
4597
5343
|
return this.evalRecord;
|
|
4598
5344
|
}
|
|
4599
5345
|
} else {
|
|
5346
|
+
if (progressBarManager) {
|
|
5347
|
+
progressBarManager.removeLogInterceptor();
|
|
5348
|
+
progressBarManager.stop();
|
|
5349
|
+
}
|
|
4600
5350
|
if (ciProgressReporter) ciProgressReporter.error(`Evaluation failed: ${String(err)}`);
|
|
4601
5351
|
throw err;
|
|
4602
5352
|
}
|
|
@@ -4739,6 +5489,7 @@ var Evaluator = class {
|
|
|
4739
5489
|
await this.evalRecord.addPrompts(prompts);
|
|
4740
5490
|
try {
|
|
4741
5491
|
if (progressBarManager) {
|
|
5492
|
+
progressBarManager.removeLogInterceptor();
|
|
4742
5493
|
progressBarManager.complete();
|
|
4743
5494
|
progressBarManager.stop();
|
|
4744
5495
|
} else if (ciProgressReporter) ciProgressReporter.finish();
|
|
@@ -7092,8 +7843,7 @@ function testCaseFromCsvRow(row) {
|
|
|
7092
7843
|
require_logger.logger.warn("The \"__metadata\" column requires a key, e.g. \"__metadata:category\". This column will be ignored.");
|
|
7093
7844
|
} else if (key.startsWith("__config:")) {
|
|
7094
7845
|
const configParts = key.slice(9).split(":");
|
|
7095
|
-
if (configParts.length
|
|
7096
|
-
else {
|
|
7846
|
+
if (configParts.length === 2) {
|
|
7097
7847
|
const [expectedKey, configKey] = configParts;
|
|
7098
7848
|
let targetIndex;
|
|
7099
7849
|
if (expectedKey === "__expected") targetIndex = 0;
|
|
@@ -7119,7 +7869,7 @@ function testCaseFromCsvRow(row) {
|
|
|
7119
7869
|
}
|
|
7120
7870
|
}
|
|
7121
7871
|
assertionConfigs[targetIndex][configKey] = parsedValue;
|
|
7122
|
-
}
|
|
7872
|
+
} else require_logger.logger.warn(`Invalid __config column format: "${key}". Expected format: __config:__expected:threshold or __config:__expected<N>:threshold`);
|
|
7123
7873
|
} else vars[key] = value;
|
|
7124
7874
|
}
|
|
7125
7875
|
for (let i = 0; i < asserts.length; i++) {
|
|
@@ -7248,14 +7998,14 @@ async function parseXlsxFile(filePath) {
|
|
|
7248
7998
|
const sheetName = typeof sheetOption === "number" ? sheetNames[sheetOption - 1] : sheetOption;
|
|
7249
7999
|
const rows = await readXlsxFile(actualFilePath, { sheet: sheetOption });
|
|
7250
8000
|
if (rows.length === 0) throw new Error(`Sheet "${sheetName}" is empty or contains no valid data rows`);
|
|
7251
|
-
const headers = rows[0].map((cell) => cell
|
|
8001
|
+
const headers = rows[0].map((cell) => cell == null ? "" : String(cell));
|
|
7252
8002
|
if (headers.length === 0 || headers.every((h) => h === "")) throw new Error(`Sheet "${sheetName}" has no valid column headers`);
|
|
7253
8003
|
if (rows.length === 1) throw new Error(`Sheet "${sheetName}" is empty or contains no valid data rows`);
|
|
7254
8004
|
const data = rows.slice(1).map((row) => {
|
|
7255
8005
|
const obj = {};
|
|
7256
8006
|
headers.forEach((header, index) => {
|
|
7257
8007
|
const cellValue = row[index];
|
|
7258
|
-
obj[header] = cellValue
|
|
8008
|
+
obj[header] = cellValue == null ? "" : String(cellValue);
|
|
7259
8009
|
});
|
|
7260
8010
|
return obj;
|
|
7261
8011
|
});
|
|
@@ -11202,20 +11952,19 @@ function generateEvalSummary(params) {
|
|
|
11202
11952
|
}
|
|
11203
11953
|
}
|
|
11204
11954
|
lines.push("");
|
|
11205
|
-
const
|
|
11206
|
-
|
|
11207
|
-
|
|
11208
|
-
|
|
11209
|
-
|
|
11210
|
-
|
|
11211
|
-
|
|
11212
|
-
}
|
|
11213
|
-
const passedPart = successes > 0 ? `${chalk.default.green("✓")} ${chalk.default.green.bold(successes.toLocaleString())} passed` : `${chalk.default.gray.bold(successes.toLocaleString())} passed`;
|
|
11214
|
-
const failedPart = failures > 0 ? `${chalk.default.red("✗")} ${chalk.default.red.bold(failures.toLocaleString())} failed` : `${chalk.default.gray.bold(failures.toLocaleString())} failed`;
|
|
11955
|
+
const totalTests = successes + failures + errors;
|
|
11956
|
+
const formatResultPercentage = (count) => {
|
|
11957
|
+
const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
|
|
11958
|
+
return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
|
|
11959
|
+
};
|
|
11960
|
+
const formatResultLine = (count, label, icon, iconColor) => {
|
|
11961
|
+
return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.default.white.bold(count.toLocaleString())} ${chalk.default.white(label)} ${chalk.default.gray(`(${formatResultPercentage(count)})`)}`;
|
|
11962
|
+
};
|
|
11215
11963
|
const errorLabel = errors === 1 ? "error" : "errors";
|
|
11216
|
-
|
|
11217
|
-
|
|
11218
|
-
|
|
11964
|
+
lines.push(chalk.default.bold("Results:"));
|
|
11965
|
+
lines.push(formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.default.green));
|
|
11966
|
+
lines.push(formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.default.red));
|
|
11967
|
+
lines.push(formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.default.red));
|
|
11219
11968
|
const durationDisplay = formatDuration(duration);
|
|
11220
11969
|
lines.push(chalk.default.gray(`Duration: ${durationDisplay} (concurrency: ${maxConcurrency})`));
|
|
11221
11970
|
lines.push("");
|
|
@@ -11549,7 +12298,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11549
12298
|
await require_providers.checkCloudPermissions(config);
|
|
11550
12299
|
const options = {
|
|
11551
12300
|
...evaluateOptions,
|
|
11552
|
-
showProgressBar: require_logger.getLogLevel() === "debug" ? false : cmdObj.progressBar
|
|
12301
|
+
showProgressBar: require_logger.getLogLevel() === "debug" ? false : cmdObj.progressBar === void 0 ? evaluateOptions.showProgressBar === void 0 ? true : evaluateOptions.showProgressBar : cmdObj.progressBar !== false,
|
|
11553
12302
|
repeat,
|
|
11554
12303
|
delay: !Number.isNaN(delay) && delay > 0 ? delay : void 0,
|
|
11555
12304
|
maxConcurrency,
|
|
@@ -11933,7 +12682,7 @@ async function doRedteamRun(options) {
|
|
|
11933
12682
|
redteamConfig = await doGenerateRedteam({
|
|
11934
12683
|
...passThroughOptions,
|
|
11935
12684
|
...options.liveRedteamConfig?.commandLineOptions || {},
|
|
11936
|
-
...maxConcurrency
|
|
12685
|
+
...maxConcurrency === void 0 ? {} : { maxConcurrency },
|
|
11937
12686
|
config: configPath,
|
|
11938
12687
|
output: redteamPath,
|
|
11939
12688
|
force: options.force,
|