promptfoo 0.121.5 → 0.121.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
- package/dist/src/{accounts-CaLNYnf7.js → accounts-Ca7WIoPY.js} +12 -7
- package/dist/src/{accounts-CLJHCDDb.js → accounts-CjFnOPmb.js} +13 -8
- package/dist/src/{accounts-BIFntVWB.cjs → accounts-CmWzeD2d.cjs} +16 -10
- package/dist/src/{accounts-bnyHT7Ju.js → accounts-DanM1wq_.js} +12 -7
- package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-CJ0j3fBi.js} +2 -2
- package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-DDEGRV9v.js} +2 -2
- package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-DvPWSUpb.cjs} +8 -7
- package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-TxUEMPYS.js} +2 -2
- package/dist/src/{agents-WULPVjbH.cjs → agents-B4sRuXg3.cjs} +7 -6
- package/dist/src/{agents-DhxWMCtH.js → agents-B8q7h_ek.js} +4 -4
- package/dist/src/{agents-BGqaTDnr.js → agents-CBgJvRkB.js} +20 -9
- package/dist/src/{agents-n6vPqV3i.js → agents-CYn2n3QP.js} +4 -4
- package/dist/src/{agents-BV9yFpXX.js → agents-D-vDNFx4.js} +20 -9
- package/dist/src/{agents-BYdMl1UE.js → agents-LrHuQqr1.js} +20 -9
- package/dist/src/{agents-emVcx3yh.js → agents-QGg76OF-.js} +2 -2
- package/dist/src/{agents-DiWmQYH9.cjs → agents-eHZ9nlgA.cjs} +21 -10
- package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-CJEbQ0o6.js} +6 -6
- package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-D5HXzZ0s.js} +6 -6
- package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-T6HGNxNe.cjs} +7 -7
- package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-eYv3a_DK.js} +6 -6
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-BNHJKN35.js +1 -0
- package/dist/src/app/assets/index-BnT6P6sF.js +388 -0
- package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
- package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
- package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
- package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
- package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
- package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
- package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/{audio-Da8U9IS5.js → audio-BqnRvcWG.js} +3 -3
- package/dist/src/{audio-BvpTOArF.js → audio-CPMtV1yR.js} +3 -3
- package/dist/src/{audio-CScmnmEB.js → audio-DyiebVB3.js} +3 -3
- package/dist/src/{audio-C0vDeS0j.cjs → audio-FnxbEnSE.cjs} +4 -4
- package/dist/src/authoritativeMarkupInjection-BZIywVjG.js +74 -0
- package/dist/src/authoritativeMarkupInjection-DyAXAsSr.js +75 -0
- package/dist/src/authoritativeMarkupInjection-F2gBw0lN.cjs +74 -0
- package/dist/src/authoritativeMarkupInjection-QEQmFS83.js +74 -0
- package/dist/src/{base-BTux96b1.js → base-CKLo890h.js} +4 -3
- package/dist/src/{base-BOMaNEes.js → base-Co80MMCi.js} +4 -3
- package/dist/src/{base-Tw6uhH8K.cjs → base-DGJW48uz.cjs} +5 -4
- package/dist/src/{base-dYsl2hmL.js → base-E9I8zXjz.js} +4 -3
- package/dist/src/bestOfN-B3wNzjSB.js +137 -0
- package/dist/src/bestOfN-BBsO41z4.js +136 -0
- package/dist/src/bestOfN-CAwmg5UL.cjs +140 -0
- package/dist/src/bestOfN-_kTi8Bxe.js +136 -0
- package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
- package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
- package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
- package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
- package/dist/src/{cache-DGg-yTZG.cjs → cache-CPGUA4Yl.cjs} +135 -25
- package/dist/src/cache-Cf7b4pWE.js +3 -0
- package/dist/src/{cache-Bzttsk0X.js → cache-DIXbtkNO.js} +125 -10
- package/dist/src/{cache-BI5BY7ey.js → cache-DpPWrkTE.js} +127 -11
- package/dist/src/{cache-Cr-qWIbP.js → cache-roFAE0cI.js} +125 -10
- package/dist/src/{chat-DChSH_Es.js → chat-CUCorGiL.js} +9 -9
- package/dist/src/{chat-DH97tVV9.cjs → chat-DG1wG4w0.cjs} +6 -6
- package/dist/src/{chat-Cx_LkwvZ.js → chat-Dabu84Br.js} +11 -11
- package/dist/src/{chat-BLOdH60v.js → chat-DqUFcWI0.js} +11 -11
- package/dist/src/{chat-vYqqv1gP.cjs → chat-DxTDQ83C.cjs} +14 -13
- package/dist/src/{chat-DG2LkwLq.js → chat-GmlolEwo.js} +4 -4
- package/dist/src/{chat-aMQZw6R7.js → chat-TP8Qifkh.js} +4 -4
- package/dist/src/{chat-D9nudO9b.js → chat-iwaM5UTQ.js} +4 -4
- package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
- package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
- package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
- package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
- package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-BQNp_y-F.js} +209 -64
- package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-D5Jl0SDh.js} +210 -65
- package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-DH416NBD.cjs} +216 -70
- package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-x1XJ1-pU.js} +210 -65
- package/dist/src/{cloud-Da0bofJd.js → cloud-D3DiFqH6.js} +2 -2
- package/dist/src/cloud-p96PA4MH.js +3 -0
- package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-B6NVI3ax.js} +4 -4
- package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-CEAW-xQa.cjs} +6 -6
- package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-RFSojyXG.js} +4 -4
- package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-r4tbYmWU.js} +4 -4
- package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-BCkLouto.js} +3 -3
- package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-BaZ4insB.js} +3 -3
- package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-CF-Vb-2Z.js} +3 -3
- package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-TJMLBj6I.cjs} +5 -5
- package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-B8KHEiF4.js} +5 -5
- package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-CnrLBCeA.cjs} +12 -11
- package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-DIXZ230V.js} +4 -4
- package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-Dd22dC_N.js} +5 -5
- package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-B6Wah8Pa.js} +5 -5
- package/dist/src/codex-sdk-BGjVAk23.js +3 -0
- package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-CFF6gUyi.cjs} +18 -10
- package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-CmQABzV3.js} +3 -3
- package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-D2d54RL8.js} +5 -5
- package/dist/src/{cometapi-B5ImDlSm.js → cometapi-Bu9B8NUY.js} +7 -7
- package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CtzNCHKu.js} +7 -7
- package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-DHCDlQUI.cjs} +8 -8
- package/dist/src/{cometapi-CC7hWxmX.js → cometapi-OBILPLlu.js} +7 -7
- package/dist/src/{completion-Vq_ad618.js → completion-CO2e1_62.js} +4 -4
- package/dist/src/{completion-DtQ72Bm3.cjs → completion-CSYfl2cd.cjs} +6 -6
- package/dist/src/{completion-2iuYVxwi.js → completion-DZNxcyfG.js} +5 -5
- package/dist/src/{completion-CrD6MQ93.js → completion-sNvCLTAP.js} +5 -5
- package/dist/src/constants-BjJV0cRr.js +6 -0
- package/dist/src/constants-DH5XYLKZ.js +7 -0
- package/dist/src/constants-DZGEFLsu.js +6 -0
- package/dist/src/constants-a2kYssQk.cjs +11 -0
- package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
- package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
- package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
- package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
- package/dist/src/crescendo-BXEJK_bi.cjs +704 -0
- package/dist/src/crescendo-CU_Y2i-m.js +702 -0
- package/dist/src/crescendo-J1Xx4_zb.js +703 -0
- package/dist/src/crescendo-QiaSLW0d.js +701 -0
- package/dist/src/custom-BJfP00Bh.js +619 -0
- package/dist/src/custom-CZVn-1-r.js +620 -0
- package/dist/src/custom-Cqia7M0D.cjs +621 -0
- package/dist/src/custom-notggYVl.js +618 -0
- package/dist/src/{docker--3qzPa-6.js → docker-4D1eL6Gq.js} +5 -5
- package/dist/src/{docker-Dorv4_Dg.js → docker-BBv1WUDu.js} +5 -5
- package/dist/src/{docker-D3AY-5F5.cjs → docker-D06JUoe2.cjs} +6 -6
- package/dist/src/{docker-DCsCDvwM.js → docker-DdJQBxK9.js} +5 -5
- package/dist/src/{embedding-DNRvZwRN.js → embedding--UZVe4_7.js} +5 -5
- package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BbrwopfX.cjs} +6 -6
- package/dist/src/{embedding-ChS1ivFS.js → embedding-Bi3rxrZF.js} +5 -5
- package/dist/src/{embedding-D_bI4NDq.js → embedding-C251p1-8.js} +4 -4
- package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
- package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
- package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
- package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
- package/dist/src/{eval-DJ_4A-tr.js → eval-0VRANImH.js} +19 -19
- package/dist/src/{eval-BQPLBJbw.js → eval-DscR5iOM.js} +1 -1
- package/dist/src/{evalResult-pSvGWFMo.js → evalResult-2RRJvFyB.js} +18 -11
- package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-CvtS8h8u.cjs} +29 -11
- package/dist/src/evalResult-DqzsS6_W.js +3 -0
- package/dist/src/{evalResult-D6P5I5il.js → evalResult-eUkJv9Ko.js} +17 -10
- package/dist/src/evaluator-DNdJF1Gv.js +3 -0
- package/dist/src/{evaluator-D-UIbbYq.js → evaluator-DRoiYB2q.js} +258 -132
- package/dist/src/evaluatorHelpers-BsYP_muT.js +511 -0
- package/dist/src/evaluatorHelpers-CRqTvSux.cjs +537 -0
- package/dist/src/evaluatorHelpers-DuqFFfq7.js +510 -0
- package/dist/src/{extractor-YlZbUMsL.js → extractor-BR7XAzAL.js} +5 -5
- package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-BdxEtt3J.cjs} +6 -6
- package/dist/src/{extractor-DxyiFhPk.js → extractor-CIW3iN-b.js} +5 -5
- package/dist/src/{extractor-BM3jRERL.js → extractor-CxRtnaHl.js} +5 -5
- package/dist/src/{fetch-Y5qX_kST.js → fetch-BufrQtvR.js} +90 -26
- package/dist/src/{fetch-B6ch2nU2.js → fetch-DXUnXkVU.js} +86 -26
- package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Dw4XZHjj.cjs} +115 -32
- package/dist/src/{fetch-D9xxyC1p.js → fetch-It34O8Ur.js} +90 -26
- package/dist/src/fetch-_YgGd2qv.js +3 -0
- package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
- package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
- package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
- package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
- package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
- package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
- package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
- package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
- package/dist/src/goat-Ckd3q3AY.js +467 -0
- package/dist/src/goat-Qgurm-NP.js +466 -0
- package/dist/src/goat-ghadEDdy.js +465 -0
- package/dist/src/goat-una6pZGP.cjs +469 -0
- package/dist/src/graders-BDT7dif6.js +3 -0
- package/dist/src/{graders-CgPn32yp.js → graders-BGP99PdK.js} +1017 -84
- package/dist/src/{graders-BoUqsCEm.js → graders-BX0f2tvS.js} +1022 -84
- package/dist/src/{graders-CwrbifOo.js → graders-C0nXU_ZP.js} +1020 -82
- package/dist/src/{graders-Bw1wk_21.cjs → graders-ClrU2fnd.cjs} +1085 -128
- package/dist/src/hydra-BSNZZm2M.js +543 -0
- package/dist/src/hydra-BxdG4nkg.js +541 -0
- package/dist/src/hydra-DE4xWwyc.js +542 -0
- package/dist/src/hydra-DrJttnvw.cjs +542 -0
- package/dist/src/image-B4oBtu6J.js +443 -0
- package/dist/src/{image-Dr_3I3nK.js → image-BN-hjLL9.js} +3 -3
- package/dist/src/{image-BeWaInPF.js → image-B_fPIwdg.js} +3 -3
- package/dist/src/image-BvUAW344.js +442 -0
- package/dist/src/image-Cvjwx1uY.js +442 -0
- package/dist/src/{image-D10dNAav.cjs → image-DfVCGPbI.cjs} +4 -4
- package/dist/src/{image-qjO6FWPs.js → image-QzmydkiG.js} +3 -3
- package/dist/src/image-X0oY4350.cjs +465 -0
- package/dist/src/index.cjs +688 -313
- package/dist/src/index.d.cts +3152 -1617
- package/dist/src/index.d.ts +3151 -1616
- package/dist/src/index.js +582 -223
- package/dist/src/indirectWebPwn-02ZIghCS.js +259 -0
- package/dist/src/indirectWebPwn-BJ22AbQa.cjs +397 -0
- package/dist/src/indirectWebPwn-CbjUG0rh.js +385 -0
- package/dist/src/indirectWebPwn-CfQJt3gk.cjs +260 -0
- package/dist/src/indirectWebPwn-DBQhOjoD.js +260 -0
- package/dist/src/indirectWebPwn-OsXnKejv.js +259 -0
- package/dist/src/indirectWebPwn-tNx9OZ35.js +385 -0
- package/dist/src/indirectWebPwn-uyWdHx04.js +386 -0
- package/dist/src/inputVariables-B0qUChbV.js +467 -0
- package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
- package/dist/src/inputVariables-DXFdi7AI.js +468 -0
- package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
- package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
- package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
- package/dist/src/iterative-CpU6i2As.js +490 -0
- package/dist/src/iterative-DJQEQpG3.js +491 -0
- package/dist/src/iterative-DQBuWM-j.cjs +493 -0
- package/dist/src/iterative-FTS4Bz67.js +492 -0
- package/dist/src/iterativeImage-BUABMVOA.js +413 -0
- package/dist/src/iterativeImage-ByFWkxax.cjs +415 -0
- package/dist/src/iterativeImage-BzUapOUi.js +414 -0
- package/dist/src/iterativeImage-Doz8mgxF.js +413 -0
- package/dist/src/iterativeMeta-B3YiAOc8.js +386 -0
- package/dist/src/iterativeMeta-C7APE_P1.js +385 -0
- package/dist/src/iterativeMeta-CSS8M6Ds.cjs +385 -0
- package/dist/src/iterativeMeta-DgoQ7bLh.js +384 -0
- package/dist/src/iterativeTree-B5zxBBSW.js +769 -0
- package/dist/src/iterativeTree-CNyIk0Yn.js +768 -0
- package/dist/src/iterativeTree-CPMF10ve.cjs +771 -0
- package/dist/src/iterativeTree-DvZ7GBwt.js +770 -0
- package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-BadkINlJ.js} +24 -10
- package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-Bi_8sV-H.js} +23 -9
- package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-CkMljjdg.js} +24 -10
- package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-DUh34xba.cjs} +25 -11
- package/dist/src/{litellm-DRc4qWfc.js → litellm-BKBo0jpC.js} +4 -4
- package/dist/src/{litellm-BLSiANhk.js → litellm-BXyn5kZK.js} +4 -4
- package/dist/src/{litellm-DQGo_juI.js → litellm-CNcfbCfa.js} +4 -4
- package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CtAr7bKG.cjs} +5 -5
- package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
- package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BMX1iEB6.js} +5 -5
- package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-CR5TSpp4.js} +5 -5
- package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-D3FUc2K3.cjs} +9 -8
- package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-OEMmS1RB.js} +5 -5
- package/dist/src/main.js +704 -208
- package/dist/src/memoryPoisoning-CM83NWYl.js +107 -0
- package/dist/src/memoryPoisoning-D8h9gXJF.js +106 -0
- package/dist/src/memoryPoisoning-Dp-btinn.cjs +106 -0
- package/dist/src/memoryPoisoning-cLuCoTuJ.js +106 -0
- package/dist/src/{messages-CewuNcNS.js → messages-BabO-cX8.js} +17 -9
- package/dist/src/{messages-BnsVHUnm.cjs → messages-DBPir0TQ.cjs} +24 -15
- package/dist/src/{messages-B9dSjrNf.js → messages-DGUlSNU7.js} +18 -10
- package/dist/src/{messages-CI69Lasb.js → messages-vsE_-Lv0.js} +18 -10
- package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
- package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
- package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
- package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
- package/dist/src/mischievousUser-0l8GD7Dp.js +46 -0
- package/dist/src/mischievousUser-BUOP9W5r.js +46 -0
- package/dist/src/mischievousUser-frFYKxu6.js +47 -0
- package/dist/src/mischievousUser-olGgHIVR.cjs +46 -0
- package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-CNV5bMSk.cjs} +7 -7
- package/dist/src/{modelslab-zpz9JcK0.js → modelslab-Cogmu4mG.js} +6 -6
- package/dist/src/{modelslab-D73OnKSx.js → modelslab-Dzst7VTU.js} +6 -6
- package/dist/src/{modelslab-BCLOtfek.js → modelslab-EyDczZ5A.js} +6 -6
- package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-BGPNBOMS.js} +5 -5
- package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-B_5NKFu1.js} +5 -5
- package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-C4eUJGse.js} +5 -5
- package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-CjJRxI1X.cjs} +9 -8
- package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-BNGmgfFz.js} +3 -3
- package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-ChPlh5na.js} +2 -2
- package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-CrV0iaY_.js} +3 -3
- package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-DuOG9Aun.cjs} +5 -4
- package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
- package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
- package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
- package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-BIHlu_36.js} +9 -8
- package/dist/src/{openclaw-DObVgpjC.js → openclaw-CF7fMido.js} +9 -8
- package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-Dphc01BY.cjs} +17 -15
- package/dist/src/{openclaw-DA8U4DsD.js → openclaw-zIJAsz3P.js} +9 -8
- package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-B3vlPLsp.js} +38 -3
- package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-D05JSgMQ.js} +39 -4
- package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-DoY6GbWw.cjs} +45 -9
- package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-sRKYHGoI.js} +39 -4
- package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
- package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
- package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
- package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
- package/dist/src/packageParser--MWTSrPW.js +36 -0
- package/dist/src/packageParser-CgE-ziRo.js +35 -0
- package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
- package/dist/src/packageParser-hwwSGnAZ.js +35 -0
- package/dist/src/processShim-BBxt7LKO.js +95 -0
- package/dist/src/processShim-BcGzU8fY.js +94 -0
- package/dist/src/processShim-C_z3aRvF.js +94 -0
- package/dist/src/processShim-DSY9BV2T.cjs +98 -0
- package/dist/src/promptLength-0qIHyhA5.js +71 -0
- package/dist/src/promptLength-4X-Wd8PG.js +72 -0
- package/dist/src/promptLength-B9nZEfO6.js +71 -0
- package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
- package/dist/src/promptfoo-BDrfT30-.js +180 -0
- package/dist/src/promptfoo-Cm4hiy1Y.js +180 -0
- package/dist/src/promptfoo-Rjp-MeBb.js +181 -0
- package/dist/src/promptfoo-b-baRMj-.cjs +205 -0
- package/dist/src/prompts-BYMtqPCw.js +259 -0
- package/dist/src/prompts-C-bqE1Yp.js +260 -0
- package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
- package/dist/src/prompts-DHhQsANy.js +259 -0
- package/dist/src/prompts-D_QpZ2Dm.js +271 -0
- package/dist/src/prompts-hNvWBD3z.cjs +284 -0
- package/dist/src/prompts-huDVH2CI.js +270 -0
- package/dist/src/prompts-p78Hul5i.cjs +289 -0
- package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
- package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
- package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
- package/dist/src/{providers-DT-GtF2t.js → providers-B9KzWxAX.js} +739 -11919
- package/dist/src/{providers-DRrerKra.js → providers-BCCz6_IX.js} +813 -11944
- package/dist/src/{providers-eDShy16E.cjs → providers-BDVVIQM6.cjs} +787 -12132
- package/dist/src/{providers-Ctcc592x.js → providers-BYAn82cf.js} +1 -1
- package/dist/src/{providers-CJh7iriU.js → providers-DVYRZP4E.js} +746 -11866
- package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
- package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
- package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
- package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
- package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-BAp6iTZD.cjs} +4 -4
- package/dist/src/{quiverai-CPKhWgaT.js → quiverai-BvIhI_0l.js} +3 -3
- package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CdTWPe-A.js} +3 -3
- package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-Cv7rJKDz.js} +3 -3
- package/dist/src/registry-BUJrgjwv.js +124 -0
- package/dist/src/registry-DXm1t_x0.js +125 -0
- package/dist/src/registry-Dp5EqoXc.js +124 -0
- package/dist/src/registry-KCVF1CFC.cjs +124 -0
- package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-B1_XsKXU.js} +16 -147
- package/dist/src/{server-gyd6d4Hc.js → remoteGeneration-COpWcmWd.js} +15 -108
- package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-DS9N3pgB.cjs} +30 -119
- package/dist/src/remoteGeneration-DsaSwmG2.js +217 -0
- package/dist/src/render-BNTrbmBw.cjs +384 -0
- package/dist/src/render-CSP99NLm.js +348 -0
- package/dist/src/render-DFfDeYUK.js +347 -0
- package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
- package/dist/src/render-_6ur1fhE.js +347 -0
- package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
- package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
- package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
- package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
- package/dist/src/{responses-CF-ayauu.cjs → responses-1UFFF9N_.cjs} +12 -11
- package/dist/src/{responses-B8haB-mD.js → responses-B3W2JvOQ.js} +9 -9
- package/dist/src/{responses-1ztiVYsx.js → responses-B6ktc3Ra.js} +7 -7
- package/dist/src/{responses-BiaBguAu.js → responses-URRzV8qE.js} +9 -9
- package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
- package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
- package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
- package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
- package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
- package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
- package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
- package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
- package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
- package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
- package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-CujrzP1a.js} +61 -50
- package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-DzffAqo_.cjs} +65 -53
- package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-vhtSV7JI.js} +61 -50
- package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-yr1QKeBs.js} +61 -50
- package/dist/src/{scanner-nOCWNIXa.js → scanner-DS0109SS.js} +6 -6
- package/dist/src/server/index.js +4147 -449
- package/dist/src/server-B8rqV126.cjs +126 -0
- package/dist/src/server-BaLytskk.js +3 -0
- package/dist/src/server-CMJD10J4.js +107 -0
- package/dist/src/server-Ddp8GNMp.js +146 -0
- package/dist/src/server-DhMHosWj.js +182 -0
- package/dist/src/shared-7pmVZLNO.js +1334 -0
- package/dist/src/shared-9WHQ1oNE.js +1335 -0
- package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
- package/dist/src/shared-D6IjElRI.js +1334 -0
- package/dist/src/shared-WkgnDkcg.cjs +1436 -0
- package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
- package/dist/src/simulatedUser-C9aQObBI.js +222 -0
- package/dist/src/simulatedUser-Cu601Dd4.cjs +227 -0
- package/dist/src/simulatedUser-U_qAHnuB.js +222 -0
- package/dist/src/simulatedUser-p3tACcmw.js +223 -0
- package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
- package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
- package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
- package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
- package/dist/src/storage-BU4qcnOb.js +875 -0
- package/dist/src/storage-CA-v9V2v.cjs +911 -0
- package/dist/src/storage-CD-GWAdx.js +822 -0
- package/dist/src/storage-QdU-SmvD.js +834 -0
- package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
- package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
- package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
- package/dist/src/store-IbiRIF3k.js +3 -0
- package/dist/src/strategies-7CS3Alao.cjs +2360 -0
- package/dist/src/strategies-CiSeroPH.js +2331 -0
- package/dist/src/strategies-DRJjGTIY.js +2333 -0
- package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
- package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
- package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
- package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
- package/dist/src/{telemetry-DWdGHvEf.js → telemetry-00ezXr_t.js} +4 -4
- package/dist/src/telemetry-ByPqDcKC.js +3 -0
- package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-CJ7FnCsc.cjs} +15 -9
- package/dist/src/{telemetry--iqaGyaS.js → telemetry-DmXYcJNV.js} +4 -4
- package/dist/src/{telemetry-CgdVGV8N.js → telemetry-DwX9XUN5.js} +4 -4
- package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
- package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
- package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
- package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
- package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
- package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
- package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
- package/dist/src/toolAttributes-COVgDrBG.js +87 -0
- package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
- package/dist/src/tracingOptions-BnwKCkSB.js +221 -0
- package/dist/src/tracingOptions-Chi74lOD.js +219 -0
- package/dist/src/tracingOptions-DrbSFaKy.cjs +249 -0
- package/dist/src/tracingOptions-ji2OuXbT.js +220 -0
- package/dist/src/{transcription-84t4ALo2.js → transcription-B8uIgCYX.js} +5 -5
- package/dist/src/{transcription-Bm2emLmJ.js → transcription-CfU5loSq.js} +5 -5
- package/dist/src/{transcription-D7Q0vJsh.js → transcription-Dkd22_4K.js} +4 -4
- package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-mzuf18Mq.cjs} +9 -8
- package/dist/src/{transform-DtooZqYY.js → transform-BIMynQsA.js} +8 -8
- package/dist/src/transform-BnSTnFlp.js +187 -0
- package/dist/src/transform-BnSXWmU_2.cjs +221 -0
- package/dist/src/transform-CGt7Kt3y2.js +186 -0
- package/dist/src/transform-CrPGTsij.js +186 -0
- package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DhNkAUs8.cjs} +12 -11
- package/dist/src/{transform-_DpNB4qp.js → transform-DmvYBRll.js} +8 -8
- package/dist/src/{transform-B-b6Cq-q.js → transform-EtD4jAWi.js} +8 -8
- package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
- package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
- package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
- package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
- package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
- package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
- package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
- package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
- package/dist/src/{util-DFPeFkiV.js → util--WMgw7wM.js} +28 -8
- package/dist/src/{util-C-kmRosx.js → util-5WnCSb0h.js} +9 -7
- package/dist/src/{util-A5_ZsQUn.cjs → util-BSIuSLVK.cjs} +12 -9
- package/dist/src/{util-Dub0f_ej.js → util-Bx677_k2.js} +17 -10
- package/dist/src/util-CN8om2rz.cjs +386 -0
- package/dist/src/{util-DN0-b81k.js → util-CoQWM76y.js} +28 -8
- package/dist/src/util-DNl96nNs.js +327 -0
- package/dist/src/{util-BQOCAHQC.js → util-DURocbYR.js} +46 -11
- package/dist/src/util-Df8YMvS1.js +327 -0
- package/dist/src/{util-BVXcTwXu.js → util-DiQ3QvBB.js} +28 -8
- package/dist/src/{util-3pBZZb_H.js → util-I-Rf-KaD.js} +45 -10
- package/dist/src/{util-Dpmm_dAI.cjs → util-IYzs5Y04.cjs} +33 -7
- package/dist/src/{util-BlFVL0UF.js → util-LKTmNsMQ.js} +9 -7
- package/dist/src/{util-DvpHnLt0.cjs → util-SPsvFONY.cjs} +29 -21
- package/dist/src/{util-B9CNhyac.js → util-efByNxcr.js} +9 -7
- package/dist/src/util-kDURhgJW.js +328 -0
- package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
- package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
- package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
- package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
- package/dist/src/{version-CbuBKu2U.js → version-BK20a4sw.js} +2 -2
- package/dist/src/{version-D9zu9FWB.cjs → version-BWCSaByA.cjs} +2 -2
- package/dist/src/{version-CbpiUINz.js → version-eRkNuGv8.js} +2 -2
- package/dist/src/{version-0frU0UTr.js → version-lpHV_53E.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +48 -22
- package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
- package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
- package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
- package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
- package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
- package/dist/src/cache-BRkhlH3k.cjs +0 -3
- package/dist/src/cache-BlC6aeJ0.js +0 -3
- package/dist/src/cloud-CoD5OacT.js +0 -3
- package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
- package/dist/src/codex-sdk-Danroptg.cjs +0 -2
- package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
- package/dist/src/evalResult-BBK58h2B.js +0 -3
- package/dist/src/evalResult-spPqh1G_.js +0 -2
- package/dist/src/evaluator-DgLKaZk8.js +0 -3
- package/dist/src/fetch-8viavNv8.js +0 -3
- package/dist/src/graders-C84JI-m5.js +0 -2
- package/dist/src/graders-CBbd0K0Q.cjs +0 -2
- package/dist/src/graders-CbQqpHSN.js +0 -3
- package/dist/src/graders-DS42d3ZG.js +0 -2
- package/dist/src/image-BmilRNqO.js +0 -258
- package/dist/src/image-CxJoa3aW.cjs +0 -280
- package/dist/src/image-DsGRlkh7.js +0 -257
- package/dist/src/image-a_SGUobh.js +0 -257
- package/dist/src/providers-BuyzKt7C.js +0 -2
- package/dist/src/providers-C7lNVBjX.cjs +0 -3
- package/dist/src/providers-CCE2COJi2.js +0 -2
- package/dist/src/render-7uNJ2V14.js +0 -135
- package/dist/src/render-DlscvAUJ.js +0 -135
- package/dist/src/render-eui5p5mL.js +0 -136
- package/dist/src/render-tG6ir9_g.cjs +0 -165
- package/dist/src/rubyUtils-4hjGxvju.js +0 -3
- package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
- package/dist/src/server-ByiF3qlg.js +0 -386
- package/dist/src/server-C0XKRNB_.cjs +0 -2
- package/dist/src/server-C_15p79-.js +0 -3
- package/dist/src/store-2OXm_eBY.js +0 -240
- package/dist/src/store-BELqNwvz.js +0 -3
- package/dist/src/store-uQZ4AjPe.cjs +0 -2
- package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
- package/dist/src/telemetry-ZdPZc0fm.js +0 -3
- package/dist/src/transform-BQt0BeAW.js +0 -3
- package/dist/src/transform-Bq5oqC0s.cjs +0 -2
- package/dist/src/transform-C9izGX54.cjs +0 -228
- package/dist/src/transform-CwbAZ84V.js +0 -216
- package/dist/src/transform-DzCF-wqV.js +0 -213
- package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/index.js
CHANGED
|
@@ -1,34 +1,47 @@
|
|
|
1
1
|
import { C as getEnvFloat, D as getMaxEvalTimeMs, E as getEvalTimeoutMs, O as isCI, S as getEnvBool, T as getEnvString, a as logger, b as summarizeEvaluateResultForLogging, g as getAjv, h as extractJsonObjects, k as state, m as extractFirstJsonObject, n as globalLogCallback, o as setLogCallback, r as isDebugEnabled, s as setLogLevel, t as getLogLevel, v as orderKeys, w as getEnvInt, y as safeJsonStringify } from "./logger-Ct2S6Yx-.js";
|
|
2
2
|
import { t as invariant } from "./invariant-Ddh24eXh.js";
|
|
3
|
-
import { A as
|
|
4
|
-
import { n as
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import { r as
|
|
8
|
-
import {
|
|
9
|
-
import { t as
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import { r as
|
|
13
|
-
import { i as
|
|
14
|
-
import {
|
|
15
|
-
import { a as
|
|
16
|
-
import { t as
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
import {
|
|
20
|
-
import {
|
|
21
|
-
import { h as
|
|
22
|
-
import {
|
|
23
|
-
import {
|
|
24
|
-
import { t as telemetry } from "./telemetry-
|
|
25
|
-
import {
|
|
26
|
-
import { t as
|
|
27
|
-
import {
|
|
28
|
-
import { n as
|
|
29
|
-
import {
|
|
30
|
-
import {
|
|
31
|
-
import {
|
|
3
|
+
import { $ as riskCategorySeverityMap, A as RedteamConfigSchema, At as DocumentMediaInjectionPlacementValues, B as isUuid, Bt as getInputDescription, C as TestGeneratorConfigSchema, Ct as CODING_AGENT_CORE_PLUGINS, D as VarsSchema, Dt as BaseTokenUsageSchema, E as UnifiedConfigSchema, F as PartialGenerationError, Ft as InputDefinitionSchema, G as STRATEGY_COLLECTIONS, Gt as isProviderOptions, Ht as normalizeInputDefinition, I as PluginConfigSchema, It as InputTypeSchema, K as STRATEGY_COLLECTION_MAPPINGS, L as PolicyObjectSchema, Lt as InputTypeValues, Mt as DocxInjectionPlacementValues, N as ProvidersSchema, Nt as InputConfigSchema, O as isGradingResult, Ot as CompletionTokenDetailsSchema, P as ConversationMessageSchema, Pt as InputDefinitionObjectSchema, R as StrategyConfigSchema, Rt as InputsSchema, S as TestCasesWithMetadataSchema, St as UNALIGNED_PROVIDER_HARM_PLUGINS, T as TestSuiteSchema, U as DEFAULT_STRATEGIES, Ut as normalizeInputs, Vt as getInputType, Wt as isApiProvider, X as Severity, Y as isFanoutStrategy, Z as categoryAliases, _ as ScenarioSchema, _t as REDTEAM_PROVIDER_HARM_PLUGINS, a as AtomicTestCaseSchema, at as FINANCIAL_PLUGINS, b as TestCaseWithVarsFileSchema, bt as TEEN_SAFETY_PLUGINS, c as CompletedPromptSchema, ct as INSURANCE_PLUGINS, d as EvaluateOptionsSchema, dt as MEDICAL_PLUGINS, et as ALIASED_PLUGIN_MAPPINGS, f as GradingConfigSchema, ft as MULTI_INPUT_EXCLUDED_PLUGINS, g as ResultFailureReason, gt as PLUGIN_CATEGORIES, h as OutputFileExtension, ht as PII_PLUGINS, i as AssertionTypeSchema, it as DEFAULT_PLUGINS, j as PromptSchema, jt as DocxInjectionPlacementSchema, k as isResultFailureReason, kt as DocumentMediaInjectionPlacementSchema, l as DerivedMetricSchema, lt as LLAMA_GUARD_ENABLED_CATEGORIES, m as OutputConfigSchema, mt as PHARMACY_PLUGINS, n as AssertionSchema, nt as CANARY_BREAKING_STRATEGY_IDS, o as BaseAssertionTypesSchema, ot as FOUNDATION_PLUGINS, p as NotPrefixedAssertionTypesSchema, pt as MULTI_INPUT_VAR, q as getDefaultNFanout, r as AssertionSetSchema, rt as DATASET_EXEMPT_PLUGINS, s as CommandLineOptionsSchema, st as HARM_PLUGINS, t as AssertionOrSetSchema, tt as BIAS_PLUGINS, u as EvalResultsFilterMode, ut as LLAMA_GUARD_REPLICATE_PROVIDER, v as SpecialAssertionTypesSchema, vt as REMOTE_ONLY_PLUGIN_IDS, w as TestSuiteConfigSchema, wt as CODING_AGENT_PLUGINS, x as TestCasesWithMetadataPromptSchema, xt as TELECOM_PLUGINS, y as TestCaseSchema, zt as buildInputPromptDescription } from "./types-D6glLbdF.js";
|
|
4
|
+
import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, L as FILE_METADATA_KEY, N as TERMINAL_MAX_WIDTH, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, T as cloudConfig, _ as isPromptfooSampleTarget, b as parseChatPrompt, c as getCurrentTimestamp, l as sleep, n as fetchWithRetries, p as REQUEST_TIMEOUT_MS, r as fetchWithTimeout, t as fetchWithProxy, w as CloudConfig } from "./fetch-It34O8Ur.js";
|
|
5
|
+
import { n as VERSION } from "./version-lpHV_53E.js";
|
|
6
|
+
import { i as isJavascriptFile } from "./fileExtensions-CXRfY3Ss.js";
|
|
7
|
+
import { c as setUserEmail, i as getUserEmail, o as isLoggedIntoCloud, r as getAuthor, s as promptForEmailUnverified, t as checkEmailStatusAndMaybeExit } from "./accounts-Ca7WIoPY.js";
|
|
8
|
+
import { r as importModule, t as getDirectory } from "./esm-BTK1W7lG.js";
|
|
9
|
+
import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, r as analyzeTemplateReference, t as renderEnvOnlyInObject } from "./render-DFfDeYUK.js";
|
|
10
|
+
import { t as providerRegistry } from "./providerRegistry-DHcFiVWX.js";
|
|
11
|
+
import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-DsaSwmG2.js";
|
|
12
|
+
import { r as promptYesNo } from "./server-CMJD10J4.js";
|
|
13
|
+
import { a as getCloudDatabaseId, c as getPluginSeverityOverridesFromCloud, d as isCloudProvider, i as checkCloudPermissions, o as getEvalConfigFromCloud, p as resolveTeamId, s as getOrgContext } from "./storage-CD-GWAdx.js";
|
|
14
|
+
import { r as runPython } from "./pythonUtils-D0BYebvX.js";
|
|
15
|
+
import { A as readFilters, M as loadFunction, N as parseFileUrl, O as maybeLoadToolsFromExternalFile, T as maybeLoadFromExternalFile, _ as isProviderAllowed, a as setupEnv, b as normalizeProviderRef, c as filterRuntimeVars, d as checkProviderApiKeys, f as doesProviderRefMatch, g as isOpenAiProvider, h as isGoogleProvider, i as fetchCsvFromGoogleSheet, j as readOutput, l as getTestCaseDeduplicationKey, m as isAnthropicProvider, n as writeMultipleOutputs, o as deduplicateTestCases, p as getProviderDescription, r as writeOutput, s as extractRuntimeVars, t as printBorder, u as resultIsForTestCase, w as maybeLoadConfigFromExternalFile } from "./util-Bx677_k2.js";
|
|
16
|
+
import { n as sha256, t as randomSequence } from "./createHash-BtbSX3mj.js";
|
|
17
|
+
import { c as NON_TRANSIENT_HTTP_STATUSES, i as getCache, l as isNonTransientHttpStatus, n as disableCache, r as fetchWithCache, s as withCacheNamespace, t as cache_exports } from "./cache-DIXbtkNO.js";
|
|
18
|
+
import { t as OpenAiChatCompletionProvider } from "./chat-Dabu84Br.js";
|
|
19
|
+
import { h as validateFunctionCall } from "./transform-DmvYBRll.js";
|
|
20
|
+
import { l as validateFunctionCall$1 } from "./util-LKTmNsMQ.js";
|
|
21
|
+
import { _ as AIStudioChatProvider, a as resolveProvider, f as MCPProvider, g as GoogleLiveProvider, h as VertexChatProvider, n as loadApiProvider, o as resolveProviderConfigs, r as loadApiProviders, t as getProviderIds } from "./providers-DVYRZP4E.js";
|
|
22
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CmnQ0G2m.js";
|
|
23
|
+
import { t as ellipsize } from "./text-DwYK5EBn.js";
|
|
24
|
+
import { t as telemetry } from "./telemetry-DwX9XUN5.js";
|
|
25
|
+
import { a as evalsTable, c as evalsToTagsTable, d as tagsTable, i as evalResultsTable, l as promptsTable, m as getDbSignalPath, o as evalsToDatasetsTable, p as getDb, r as datasetsTable, s as evalsToPromptsTable } from "./tables-CRSXQ2Ke.js";
|
|
26
|
+
import { t as getBlobByHash } from "./blobs-Dwef1Ao1.js";
|
|
27
|
+
import { t as getProcessShim } from "./processShim-BcGzU8fY.js";
|
|
28
|
+
import { n as loadFromPackage, t as isPackagePath } from "./packageParser-CgE-ziRo.js";
|
|
29
|
+
import { n as runRuby } from "./rubyUtils-CnlW8AYb.js";
|
|
30
|
+
import { n as materializeInputVariablesWithMetadata, t as buildPromptInputDescriptions } from "./inputVariables-DUGMb9Ka.js";
|
|
31
|
+
import { a as extractPromptFromTags, c as isBasicRefusal, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, o as getSessionId, r as extractInputVarsFromPrompt, s as getShortPluginId } from "./util-DNl96nNs.js";
|
|
32
|
+
import { n as PromptfooHarmfulCompletionProvider } from "./promptfoo-Cm4hiy1Y.js";
|
|
33
|
+
import { $ as readProviderPromptMap, A as ExcessiveAgencyPlugin, At as withProviderCallExecutionContext, B as retryWithDeduplication, C as PlinyPlugin, Ct as processFileReference, D as ImitationPlugin, Dt as getAndCheckProvider, E as IntentPlugin, Et as callProviderWithContext, F as BeavertailsPlugin, G as matchesFactuality, H as fetchHuggingFaceDataset, I as AegisPlugin, J as matchesPiScore, K as matchesGEval, L as RedteamGraderBase, M as DebugAccessPlugin, N as CrossSessionLeakPlugin, O as HarmbenchPlugin, Ot as getGradingProvider, P as ContractPlugin, Q as readPrompts, R as RedteamPluginBase, S as makeInlinePolicyIdSync, St as loadFromJavaScriptFile, T as OverreliancePlugin, Tt as DEFAULT_ANTHROPIC_MODEL, U as isGraderFailure, V as sampleArray, W as matchesClosedQa, X as doRemoteGrading, Y as matchesTrajectoryGoalSuccess, Z as processPrompts, _ as PromptExtractionPlugin, _t as normalizeMatcherTokenUsage, a as VLGuardPlugin, at as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, b as determinePolicyTypeFromId, bt as coerceString, c as ToxicChatPlugin, ct as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, d as TeenSafetyDangerousRoleplayPlugin, dt as loadRubricPrompt, et as DEFAULT_WEB_SEARCH_PROMPT, f as TeenSafetyDangerousContentPlugin, ft as renderLlmRubricPrompt, g as RbacPlugin, gt as fail, h as ShellInjectionPlugin, ht as euclideanDistance, i as VLSUPlugin, it as CONTEXT_FAITHFULNESS_LONGFORM, j as DivergentRepetitionPlugin, k as HallucinationPlugin, kt as getProviderCallExecutionContext, l as ToolDiscoveryPlugin, lt as CONTEXT_RELEVANCE, m as SqlInjectionPlugin, mt as dotProduct, n as getGraderById, nt as SUGGEST_PROMPTS_SYSTEM_MESSAGE, o as UnverifiableClaimsPlugin, ot as CONTEXT_RECALL, p as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, pt as cosineSimilarity, q as matchesLlmRubric, rt as ANSWER_RELEVANCY_GENERATE, s as UnsafeBenchPlugin, st as CONTEXT_RECALL_ATTRIBUTED_TOKEN, t as GRADERS, tt as SELECT_BEST_PROMPT, u as TeenSafetyHarmfulBodyIdealsPlugin, ut as CONTEXT_RELEVANCE_BAD, v as PoliticsPlugin, vt as splitIntoSentences, w as getPiiLeakTestsForCategory, wt as getDefaultProviders, x as isValidPolicyObject, xt as getFinalTest, y as PolicyPlugin, yt as tryParse, z as getCustomPolicies } from "./graders-BX0f2tvS.js";
|
|
34
|
+
import { f as redteamProviderManager, g as createProviderRateLimitOptions, h as createRateLimitRegistry, m as TokenUsageTracker } from "./shared-D6IjElRI.js";
|
|
35
|
+
import { i as generateIdFromPrompt, t as hashPrompt } from "./utils-BGY69tk_.js";
|
|
36
|
+
import { a as getTransformLabel, i as getTransformErrorMessage, n as TRANSFORM_KEYS, o as transform, r as TransformInputType, t as INLINE_FUNCTION_LABEL } from "./transform-CGt7Kt3y2.js";
|
|
37
|
+
import { t as getTraceStore } from "./store-HpopRVzl.js";
|
|
38
|
+
import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-CxRtnaHl.js";
|
|
39
|
+
import { i as throwIfTargetPromptExceedsMaxChars, n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-B9nZEfO6.js";
|
|
40
|
+
import { n as checkExfilTracking } from "./indirectWebPwn-CbjUG0rh.js";
|
|
41
|
+
import { n as getFirstStringAttribute, r as getToolNameFromAttributes, t as TOOL_ARGUMENT_ATTRIBUTE_KEYS } from "./toolAttributes-DJ9ZEKXD.js";
|
|
42
|
+
import { i as filterFiniteScores, n as renderPrompt, r as runExtensionHook, t as collectFileMetadata } from "./evaluatorHelpers-DuqFFfq7.js";
|
|
43
|
+
import { r as sanitizeProvider, t as EvalResult } from "./evalResult-2RRJvFyB.js";
|
|
44
|
+
import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-CiSeroPH.js";
|
|
32
45
|
import * as fs$2 from "fs";
|
|
33
46
|
import fs, { createWriteStream } from "fs";
|
|
34
47
|
import * as path$2 from "path";
|
|
@@ -51,14 +64,15 @@ import { DiagConsoleLogger, DiagLogLevel, diag, propagation } from "@opentelemet
|
|
|
51
64
|
import readline from "readline";
|
|
52
65
|
import { parse as parse$1 } from "csv-parse/sync";
|
|
53
66
|
import { globSync } from "glob";
|
|
54
|
-
import { and, desc, eq, inArray, sql } from "drizzle-orm";
|
|
55
67
|
import { XMLParser } from "fast-xml-parser";
|
|
56
|
-
import
|
|
68
|
+
import { and, desc, eq, inArray, sql } from "drizzle-orm";
|
|
57
69
|
import { URL } from "url";
|
|
58
70
|
import { parse as parse$2 } from "parse5";
|
|
59
71
|
import { distance } from "fastest-levenshtein";
|
|
72
|
+
import cliProgress from "cli-progress";
|
|
60
73
|
import * as rouge from "js-rouge";
|
|
61
74
|
import { isDeepStrictEqual } from "node:util";
|
|
75
|
+
import { LRUCache } from "lru-cache";
|
|
62
76
|
import "debounce";
|
|
63
77
|
import { ExportResultCode, W3CTraceContextPropagator } from "@opentelemetry/core";
|
|
64
78
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
@@ -429,17 +443,20 @@ async function resolveContext(assertion, test, output, prompt, fallbackContext,
|
|
|
429
443
|
contextValue = test.vars.context;
|
|
430
444
|
}
|
|
431
445
|
} else if (fallbackContext) contextValue = fallbackContext;
|
|
432
|
-
if (assertion.contextTransform)
|
|
433
|
-
const
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
446
|
+
if (assertion.contextTransform) {
|
|
447
|
+
const getLabel = () => getTransformLabel(assertion.contextTransform);
|
|
448
|
+
try {
|
|
449
|
+
const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
|
|
450
|
+
const transformed = await transform(assertion.contextTransform, outputForTransform, {
|
|
451
|
+
vars: test.vars,
|
|
452
|
+
prompt: { label: prompt },
|
|
453
|
+
...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
|
|
454
|
+
});
|
|
455
|
+
invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
|
|
456
|
+
contextValue = transformed;
|
|
457
|
+
} catch (error) {
|
|
458
|
+
throw new Error(`Failed to transform context using expression '${getLabel()}': ${getTransformErrorMessage(error)}`);
|
|
459
|
+
}
|
|
443
460
|
}
|
|
444
461
|
invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
|
|
445
462
|
return contextValue;
|
|
@@ -782,7 +799,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
782
799
|
telemetry.record("feature_used", { feature: "tracing" });
|
|
783
800
|
try {
|
|
784
801
|
logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
785
|
-
const { startOTLPReceiver } = await import("./otlpReceiver-
|
|
802
|
+
const { startOTLPReceiver } = await import("./otlpReceiver-BXjcRqAM.js");
|
|
786
803
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
787
804
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
788
805
|
const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
|
|
@@ -806,7 +823,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
806
823
|
async function stopOtlpReceiverIfNeeded() {
|
|
807
824
|
if (otlpReceiverStarted) try {
|
|
808
825
|
logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
809
|
-
const { stopOTLPReceiver } = await import("./otlpReceiver-
|
|
826
|
+
const { stopOTLPReceiver } = await import("./otlpReceiver-BXjcRqAM.js");
|
|
810
827
|
await stopOTLPReceiver();
|
|
811
828
|
otlpReceiverStarted = false;
|
|
812
829
|
logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -841,7 +858,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
841
858
|
}
|
|
842
859
|
if (!tracingEnabled) return null;
|
|
843
860
|
logger.debug("[EvaluatorTracing] Importing trace store");
|
|
844
|
-
const { getTraceStore } = await import("./store-
|
|
861
|
+
const { getTraceStore } = await import("./store-HpopRVzl.js").then((n) => n.n);
|
|
845
862
|
const traceStore = getTraceStore();
|
|
846
863
|
const traceId = generateTraceId();
|
|
847
864
|
const spanId = generateSpanId();
|
|
@@ -1473,27 +1490,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
|
|
|
1473
1490
|
};
|
|
1474
1491
|
//#endregion
|
|
1475
1492
|
//#region src/assertions/geval.ts
|
|
1476
|
-
const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1477
|
-
invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
|
|
1493
|
+
const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1494
|
+
invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
|
|
1478
1495
|
const threshold = assertion.threshold ?? .7;
|
|
1479
1496
|
if (Array.isArray(renderedValue)) {
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1497
|
+
if (renderedValue.length === 0) return {
|
|
1498
|
+
assertion,
|
|
1499
|
+
pass: false,
|
|
1500
|
+
score: 0,
|
|
1501
|
+
reason: "G-Eval assertion requires at least one criterion string in the value array."
|
|
1502
|
+
};
|
|
1503
|
+
const responses = [];
|
|
1504
|
+
let failure;
|
|
1505
|
+
for (const [index, value] of renderedValue.entries()) {
|
|
1483
1506
|
const resp = await matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1484
|
-
|
|
1485
|
-
|
|
1507
|
+
responses.push(resp);
|
|
1508
|
+
if (isGraderFailure(resp)) {
|
|
1509
|
+
failure = {
|
|
1510
|
+
index,
|
|
1511
|
+
resp
|
|
1512
|
+
};
|
|
1513
|
+
break;
|
|
1514
|
+
}
|
|
1486
1515
|
}
|
|
1487
|
-
const
|
|
1516
|
+
const tokensUsed = createEmptyTokenUsage();
|
|
1517
|
+
for (const r of responses) accumulateTokenUsage(tokensUsed, r.tokensUsed);
|
|
1518
|
+
if (failure) {
|
|
1519
|
+
const criterion = renderedValue[failure.index];
|
|
1520
|
+
return {
|
|
1521
|
+
assertion,
|
|
1522
|
+
pass: false,
|
|
1523
|
+
score: 0,
|
|
1524
|
+
reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
|
|
1525
|
+
tokensUsed,
|
|
1526
|
+
metadata: failure.resp.metadata
|
|
1527
|
+
};
|
|
1528
|
+
}
|
|
1529
|
+
const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
|
|
1530
|
+
const combinedReason = responses.map((r) => r.reason).join("\n\n");
|
|
1488
1531
|
return {
|
|
1489
1532
|
assertion,
|
|
1490
|
-
pass:
|
|
1491
|
-
score:
|
|
1492
|
-
reason:
|
|
1533
|
+
pass: averageScore >= threshold !== inverse,
|
|
1534
|
+
score: inverse ? 1 - averageScore : averageScore,
|
|
1535
|
+
reason: combinedReason,
|
|
1536
|
+
tokensUsed
|
|
1493
1537
|
};
|
|
1494
|
-
}
|
|
1538
|
+
}
|
|
1539
|
+
const resp = await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1540
|
+
if (isGraderFailure(resp)) return {
|
|
1541
|
+
assertion,
|
|
1542
|
+
pass: false,
|
|
1543
|
+
score: 0,
|
|
1544
|
+
reason: resp.reason,
|
|
1545
|
+
tokensUsed: resp.tokensUsed,
|
|
1546
|
+
metadata: resp.metadata
|
|
1547
|
+
};
|
|
1548
|
+
const passed = resp.score >= threshold !== inverse;
|
|
1549
|
+
return {
|
|
1495
1550
|
assertion,
|
|
1496
|
-
...
|
|
1551
|
+
...resp,
|
|
1552
|
+
pass: passed,
|
|
1553
|
+
score: inverse ? 1 - resp.score : resp.score
|
|
1497
1554
|
};
|
|
1498
1555
|
};
|
|
1499
1556
|
//#endregion
|
|
@@ -2496,45 +2553,6 @@ function matchesPattern(spanName, pattern) {
|
|
|
2496
2553
|
}
|
|
2497
2554
|
//#endregion
|
|
2498
2555
|
//#region src/assertions/trajectoryUtils.ts
|
|
2499
|
-
const TOOL_ATTRIBUTE_KEYS = [
|
|
2500
|
-
"tool.name",
|
|
2501
|
-
"tool_name",
|
|
2502
|
-
"tool",
|
|
2503
|
-
"function.name",
|
|
2504
|
-
"function_name",
|
|
2505
|
-
"gen_ai.tool.name",
|
|
2506
|
-
"codex.mcp.tool",
|
|
2507
|
-
"agent.tool",
|
|
2508
|
-
"agent.tool_name",
|
|
2509
|
-
"agent.toolName"
|
|
2510
|
-
];
|
|
2511
|
-
const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
|
|
2512
|
-
"tool.arguments",
|
|
2513
|
-
"tool.args",
|
|
2514
|
-
"tool.input",
|
|
2515
|
-
"tool_arguments",
|
|
2516
|
-
"tool_args",
|
|
2517
|
-
"tool_input",
|
|
2518
|
-
"function.arguments",
|
|
2519
|
-
"function.args",
|
|
2520
|
-
"function.input",
|
|
2521
|
-
"function_arguments",
|
|
2522
|
-
"function_args",
|
|
2523
|
-
"gen_ai.tool.arguments",
|
|
2524
|
-
"gen_ai.tool.args",
|
|
2525
|
-
"gen_ai.tool.input",
|
|
2526
|
-
"gen_ai.tool.call.arguments",
|
|
2527
|
-
"gen_ai.tool.call.args",
|
|
2528
|
-
"agent.tool.arguments",
|
|
2529
|
-
"agent.tool.args",
|
|
2530
|
-
"agent.tool.input",
|
|
2531
|
-
"codex.mcp.arguments",
|
|
2532
|
-
"codex.mcp.args",
|
|
2533
|
-
"codex.mcp.input",
|
|
2534
|
-
"arguments",
|
|
2535
|
-
"args",
|
|
2536
|
-
"input"
|
|
2537
|
-
];
|
|
2538
2556
|
const COMMAND_ATTRIBUTE_KEYS = [
|
|
2539
2557
|
"codex.command",
|
|
2540
2558
|
"command",
|
|
@@ -2547,16 +2565,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
|
|
|
2547
2565
|
"search_query"
|
|
2548
2566
|
];
|
|
2549
2567
|
const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
|
|
2568
|
+
const COMMAND_TOOL_NAMES = new Set([
|
|
2569
|
+
"exec_command",
|
|
2570
|
+
"local_shell",
|
|
2571
|
+
"shell"
|
|
2572
|
+
]);
|
|
2550
2573
|
const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
|
|
2551
2574
|
const MAX_JUDGE_SUMMARY_STEPS = 24;
|
|
2552
2575
|
const JUDGE_SUMMARY_HEAD_STEPS = 12;
|
|
2553
2576
|
const JUDGE_SUMMARY_TAIL_STEPS = 12;
|
|
2554
|
-
function getStringAttribute(attributes, keys) {
|
|
2555
|
-
for (const key of keys) {
|
|
2556
|
-
const value = attributes[key];
|
|
2557
|
-
if (typeof value === "string" && value.trim()) return value.trim();
|
|
2558
|
-
}
|
|
2559
|
-
}
|
|
2560
2577
|
function normalizeStructuredAttribute(value) {
|
|
2561
2578
|
if (value === void 0 || value === null) return;
|
|
2562
2579
|
if (typeof value === "string") {
|
|
@@ -2588,9 +2605,12 @@ function getTrajectoryStepStatus(step) {
|
|
|
2588
2605
|
function getCommandExecutable(command) {
|
|
2589
2606
|
return command.trim().split(/\s+/)[0] || void 0;
|
|
2590
2607
|
}
|
|
2608
|
+
function isCommandToolName(toolName) {
|
|
2609
|
+
return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
|
|
2610
|
+
}
|
|
2591
2611
|
function extractToolName(span) {
|
|
2592
2612
|
const attributes = span.attributes || {};
|
|
2593
|
-
const directMatch =
|
|
2613
|
+
const directMatch = getToolNameFromAttributes(attributes);
|
|
2594
2614
|
if (directMatch) return directMatch;
|
|
2595
2615
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2596
2616
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
@@ -2615,21 +2635,31 @@ function extractToolArgs(span) {
|
|
|
2615
2635
|
if (value !== void 0) return value;
|
|
2616
2636
|
}
|
|
2617
2637
|
}
|
|
2618
|
-
function extractCommand(span) {
|
|
2638
|
+
function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
|
|
2619
2639
|
const attributes = span.attributes || {};
|
|
2620
|
-
const directMatch =
|
|
2640
|
+
const directMatch = getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
|
|
2621
2641
|
if (directMatch) return directMatch;
|
|
2622
2642
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2623
2643
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
2624
2644
|
if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
|
|
2625
2645
|
}
|
|
2646
|
+
const toolArgs = getToolArgs();
|
|
2647
|
+
if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
|
|
2648
|
+
const args = toolArgs;
|
|
2649
|
+
const command = args.cmd ?? args.command;
|
|
2650
|
+
if (typeof command === "string" && command.trim()) return command.trim();
|
|
2651
|
+
if (Array.isArray(command)) {
|
|
2652
|
+
const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
|
|
2653
|
+
if (joined) return joined;
|
|
2654
|
+
}
|
|
2655
|
+
}
|
|
2626
2656
|
if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
|
|
2627
2657
|
}
|
|
2628
2658
|
function extractSearchQuery(span) {
|
|
2629
2659
|
const attributes = span.attributes || {};
|
|
2630
|
-
const directMatch =
|
|
2660
|
+
const directMatch = getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
|
|
2631
2661
|
if (directMatch) return directMatch;
|
|
2632
|
-
const genericQuery =
|
|
2662
|
+
const genericQuery = getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
|
|
2633
2663
|
if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
|
|
2634
2664
|
if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
|
|
2635
2665
|
}
|
|
@@ -2653,17 +2683,34 @@ function extractTrajectorySteps(trace) {
|
|
|
2653
2683
|
return left.index - right.index;
|
|
2654
2684
|
}).map(({ span }) => {
|
|
2655
2685
|
const toolName = extractToolName(span);
|
|
2656
|
-
|
|
2686
|
+
let toolArgs;
|
|
2687
|
+
let hasExtractedToolArgs = false;
|
|
2688
|
+
const getToolArgs = () => {
|
|
2689
|
+
if (!hasExtractedToolArgs) {
|
|
2690
|
+
toolArgs = extractToolArgs(span);
|
|
2691
|
+
hasExtractedToolArgs = true;
|
|
2692
|
+
}
|
|
2693
|
+
return toolArgs;
|
|
2694
|
+
};
|
|
2695
|
+
const command = extractCommand(span, toolName, getToolArgs);
|
|
2657
2696
|
const searchQuery = extractSearchQuery(span);
|
|
2658
2697
|
let type = "span";
|
|
2659
2698
|
let name = span.name;
|
|
2660
2699
|
const aliases = new Set([span.name]);
|
|
2661
2700
|
let args;
|
|
2662
|
-
if (toolName) {
|
|
2701
|
+
if (command && isCommandToolName(toolName)) {
|
|
2702
|
+
type = "command";
|
|
2703
|
+
name = command;
|
|
2704
|
+
aliases.add(command);
|
|
2705
|
+
args = getToolArgs();
|
|
2706
|
+
if (toolName) aliases.add(toolName);
|
|
2707
|
+
const executable = getCommandExecutable(command);
|
|
2708
|
+
if (executable) aliases.add(executable);
|
|
2709
|
+
} else if (toolName) {
|
|
2663
2710
|
type = "tool";
|
|
2664
2711
|
name = toolName;
|
|
2665
2712
|
aliases.add(toolName);
|
|
2666
|
-
args =
|
|
2713
|
+
args = getToolArgs();
|
|
2667
2714
|
} else if (command) {
|
|
2668
2715
|
type = "command";
|
|
2669
2716
|
name = command;
|
|
@@ -3850,13 +3897,13 @@ function resolveSequenceValue(value) {
|
|
|
3850
3897
|
}
|
|
3851
3898
|
throw new Error("trajectory:tool-sequence assertion must have an array or object value");
|
|
3852
3899
|
}
|
|
3853
|
-
function isRecord(value) {
|
|
3900
|
+
function isRecord$1(value) {
|
|
3854
3901
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
3855
3902
|
}
|
|
3856
3903
|
function matchesExpectedArgsPartial(actual, expected) {
|
|
3857
3904
|
if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
|
|
3858
|
-
if (isRecord(expected)) {
|
|
3859
|
-
if (!isRecord(actual)) return false;
|
|
3905
|
+
if (isRecord$1(expected)) {
|
|
3906
|
+
if (!isRecord$1(actual)) return false;
|
|
3860
3907
|
return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
|
|
3861
3908
|
}
|
|
3862
3909
|
return isDeepStrictEqual(actual, expected);
|
|
@@ -4227,7 +4274,7 @@ async function loadTraceData(traceId) {
|
|
|
4227
4274
|
let stableObservations = 0;
|
|
4228
4275
|
let latestTrace = null;
|
|
4229
4276
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
4230
|
-
latestTrace = await traceStore.getTrace(traceId);
|
|
4277
|
+
latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
|
|
4231
4278
|
const spanCount = latestTrace?.spans?.length ?? 0;
|
|
4232
4279
|
if (spanCount > 0) {
|
|
4233
4280
|
stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
|
|
@@ -4280,7 +4327,7 @@ const ASSERTION_HANDLERS = {
|
|
|
4280
4327
|
"llm-rubric": handleLlmRubric,
|
|
4281
4328
|
meteor: async (params) => {
|
|
4282
4329
|
try {
|
|
4283
|
-
const { handleMeteorAssertion } = await import("./meteor-
|
|
4330
|
+
const { handleMeteorAssertion } = await import("./meteor-Dce-_zGQ.js");
|
|
4284
4331
|
return handleMeteorAssertion(params);
|
|
4285
4332
|
} catch (error) {
|
|
4286
4333
|
if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
|
|
@@ -4416,7 +4463,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
4416
4463
|
};
|
|
4417
4464
|
}
|
|
4418
4465
|
else if (filePath.endsWith(".rb")) try {
|
|
4419
|
-
const { runRuby } = await import("./rubyUtils-
|
|
4466
|
+
const { runRuby } = await import("./rubyUtils-CnlW8AYb.js").then((n) => n.t);
|
|
4420
4467
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
4421
4468
|
logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
4422
4469
|
} catch (error) {
|
|
@@ -4533,7 +4580,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
|
|
|
4533
4580
|
logger.debug(`Failed to preload trace data for assertions: ${error}`);
|
|
4534
4581
|
preloadedTraceData = null;
|
|
4535
4582
|
}
|
|
4536
|
-
|
|
4583
|
+
const concurrency = getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
|
|
4584
|
+
await async.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
|
|
4537
4585
|
if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
|
|
4538
4586
|
const result = await runAssertion({
|
|
4539
4587
|
prompt,
|
|
@@ -4660,7 +4708,8 @@ var CIProgressReporter = class {
|
|
|
4660
4708
|
}
|
|
4661
4709
|
updateTotalTests(newTotal) {
|
|
4662
4710
|
this.totalTests = Math.max(newTotal, 1);
|
|
4663
|
-
|
|
4711
|
+
const percentage = Math.floor(this.completedTests / this.totalTests * 100);
|
|
4712
|
+
this.highestPercentageSeen = percentage;
|
|
4664
4713
|
}
|
|
4665
4714
|
finish() {
|
|
4666
4715
|
if (this.intervalId) {
|
|
@@ -4833,6 +4882,10 @@ function getDefaultOtelConfig() {
|
|
|
4833
4882
|
}
|
|
4834
4883
|
//#endregion
|
|
4835
4884
|
//#region src/tracing/localSpanExporter.ts
|
|
4885
|
+
const MISSING_TRACE_RETRY_DELAY_MS = 50;
|
|
4886
|
+
function delay(ms) {
|
|
4887
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
4888
|
+
}
|
|
4836
4889
|
/**
|
|
4837
4890
|
* A span exporter that writes spans to the local TraceStore (SQLite).
|
|
4838
4891
|
* This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
|
|
@@ -4874,7 +4927,7 @@ var LocalSpanExporter = class {
|
|
|
4874
4927
|
}
|
|
4875
4928
|
let firstError;
|
|
4876
4929
|
for (const [traceId, spanDataList] of spansByTrace) try {
|
|
4877
|
-
const result = await
|
|
4930
|
+
const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
|
|
4878
4931
|
if (result.stored) logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
|
|
4879
4932
|
else logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
|
|
4880
4933
|
} catch (error) {
|
|
@@ -4886,6 +4939,16 @@ var LocalSpanExporter = class {
|
|
|
4886
4939
|
}
|
|
4887
4940
|
return firstError;
|
|
4888
4941
|
}
|
|
4942
|
+
async addSpansWithTraceRetry(traceStore, traceId, spans) {
|
|
4943
|
+
const options = {
|
|
4944
|
+
skipTraceCheck: false,
|
|
4945
|
+
warnIfMissingTrace: false
|
|
4946
|
+
};
|
|
4947
|
+
const result = await traceStore.addSpans(traceId, spans, options);
|
|
4948
|
+
if (result.stored) return result;
|
|
4949
|
+
await delay(MISSING_TRACE_RETRY_DELAY_MS);
|
|
4950
|
+
return traceStore.addSpans(traceId, spans, options);
|
|
4951
|
+
}
|
|
4889
4952
|
/**
|
|
4890
4953
|
* Convert an OTEL ReadableSpan to our SpanData format.
|
|
4891
4954
|
*/
|
|
@@ -5168,6 +5231,15 @@ function isPromptAllowed(prompt, allowedPrompts) {
|
|
|
5168
5231
|
}
|
|
5169
5232
|
//#endregion
|
|
5170
5233
|
//#region src/evaluator.ts
|
|
5234
|
+
const CONVERSATION_VAR_NAME = "_conversation";
|
|
5235
|
+
const promptUsesConversationVariableCache = new LRUCache({ max: 1024 });
|
|
5236
|
+
function promptUsesConversationVariable(prompt) {
|
|
5237
|
+
const cached = promptUsesConversationVariableCache.get(prompt.raw);
|
|
5238
|
+
if (cached !== void 0) return cached;
|
|
5239
|
+
const { referenced, parsed } = analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
|
|
5240
|
+
if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
|
|
5241
|
+
return referenced;
|
|
5242
|
+
}
|
|
5171
5243
|
/**
|
|
5172
5244
|
* Manages a single progress bar for the evaluation
|
|
5173
5245
|
*/
|
|
@@ -5367,6 +5439,18 @@ function hasProviderGroupedAssertion(assertion) {
|
|
|
5367
5439
|
function shouldDeferGradingForTest(test) {
|
|
5368
5440
|
return Boolean(test.assert?.some(hasProviderGroupedAssertion));
|
|
5369
5441
|
}
|
|
5442
|
+
function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
|
|
5443
|
+
if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
|
|
5444
|
+
if (shouldGroupGradingByProvider) {
|
|
5445
|
+
logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
|
|
5446
|
+
return;
|
|
5447
|
+
}
|
|
5448
|
+
if (concurrency !== 1) return;
|
|
5449
|
+
const reasons = [];
|
|
5450
|
+
if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
|
|
5451
|
+
if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
|
|
5452
|
+
if (reasons.length > 0) logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
|
|
5453
|
+
}
|
|
5370
5454
|
function applyGradingResult(row, checkResult) {
|
|
5371
5455
|
if (!checkResult.pass) {
|
|
5372
5456
|
row.error = checkResult.reason;
|
|
@@ -5381,14 +5465,29 @@ function applyGradingResult(row, checkResult) {
|
|
|
5381
5465
|
if (checkResult.tokensUsed) accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
|
|
5382
5466
|
row.gradingResult = checkResult;
|
|
5383
5467
|
}
|
|
5384
|
-
|
|
5385
|
-
|
|
5386
|
-
|
|
5387
|
-
|
|
5388
|
-
|
|
5389
|
-
|
|
5390
|
-
|
|
5391
|
-
|
|
5468
|
+
const ABORTED_GRADING_PREFIX = "Aborted: ";
|
|
5469
|
+
function isAbortShapedError(error) {
|
|
5470
|
+
return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
|
|
5471
|
+
}
|
|
5472
|
+
function applyGradingError(row, error, abortSignal) {
|
|
5473
|
+
const errorAsError = error instanceof Error ? error : void 0;
|
|
5474
|
+
if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
|
|
5475
|
+
const shortMessage = errorAsError?.message ?? String(error);
|
|
5476
|
+
logger.debug("Assertion grading aborted", {
|
|
5477
|
+
error: shortMessage,
|
|
5478
|
+
promptIdx: row.promptIdx,
|
|
5479
|
+
testIdx: row.testIdx
|
|
5480
|
+
});
|
|
5481
|
+
row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
|
|
5482
|
+
} else {
|
|
5483
|
+
const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
|
|
5484
|
+
logger.error("Assertion grading failed during eval", {
|
|
5485
|
+
error: fullMessage,
|
|
5486
|
+
promptIdx: row.promptIdx,
|
|
5487
|
+
testIdx: row.testIdx
|
|
5488
|
+
});
|
|
5489
|
+
row.error = fullMessage;
|
|
5490
|
+
}
|
|
5392
5491
|
row.failureReason = ResultFailureReason.ERROR;
|
|
5393
5492
|
row.success = false;
|
|
5394
5493
|
row.score = 0;
|
|
@@ -5420,7 +5519,7 @@ function createRunEvalState({ provider, prompt, test }) {
|
|
|
5420
5519
|
};
|
|
5421
5520
|
}
|
|
5422
5521
|
function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
|
|
5423
|
-
const usesConversation = prompt
|
|
5522
|
+
const usesConversation = promptUsesConversationVariable(prompt);
|
|
5424
5523
|
if (!getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
|
|
5425
5524
|
}
|
|
5426
5525
|
function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
|
|
@@ -5667,7 +5766,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
|
|
|
5667
5766
|
assertScoringFunction: test.assertScoringFunction,
|
|
5668
5767
|
traceId
|
|
5669
5768
|
}).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
|
|
5670
|
-
applyGradingError(ret, error);
|
|
5769
|
+
applyGradingError(ret, error, abortSignal);
|
|
5671
5770
|
});
|
|
5672
5771
|
deferredGradingPromises.set(ret, gradingPromise);
|
|
5673
5772
|
return;
|
|
@@ -6214,7 +6313,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
|
|
|
6214
6313
|
const defaultProvider = defaultTest.provider;
|
|
6215
6314
|
if (isApiProvider(defaultProvider)) return defaultProvider;
|
|
6216
6315
|
if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
6217
|
-
const { loadApiProvider } = await import("./providers-
|
|
6316
|
+
const { loadApiProvider } = await import("./providers-DVYRZP4E.js").then((n) => n.i);
|
|
6218
6317
|
return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
6219
6318
|
}
|
|
6220
6319
|
return defaultProvider;
|
|
@@ -6374,7 +6473,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
|
|
|
6374
6473
|
async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
6375
6474
|
if (!state.resume || !evalRecord.persisted) return;
|
|
6376
6475
|
try {
|
|
6377
|
-
const { default: EvalResult } = await import("./evalResult-
|
|
6476
|
+
const { default: EvalResult } = await import("./evalResult-2RRJvFyB.js").then((n) => n.n);
|
|
6378
6477
|
const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: state.retryMode });
|
|
6379
6478
|
const originalCount = runEvalOptions.length;
|
|
6380
6479
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -6388,14 +6487,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
|
6388
6487
|
}
|
|
6389
6488
|
}
|
|
6390
6489
|
function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
|
|
6391
|
-
const usesConversationVar = prompts.some(
|
|
6490
|
+
const usesConversationVar = prompts.some(promptUsesConversationVariable);
|
|
6392
6491
|
if (concurrency <= 1) return {
|
|
6393
6492
|
concurrency,
|
|
6394
6493
|
usesConversationVar
|
|
6395
6494
|
};
|
|
6396
6495
|
const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
|
|
6397
6496
|
if (usesConversationVar) {
|
|
6398
|
-
logger.info(`Setting concurrency to 1 because the ${chalk.cyan(
|
|
6497
|
+
logger.info(`Setting concurrency to 1 because the ${chalk.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
|
|
6399
6498
|
return {
|
|
6400
6499
|
concurrency: 1,
|
|
6401
6500
|
usesConversationVar
|
|
@@ -6625,7 +6724,8 @@ var Evaluator = class {
|
|
|
6625
6724
|
};
|
|
6626
6725
|
this.conversations = {};
|
|
6627
6726
|
this.registers = {};
|
|
6628
|
-
|
|
6727
|
+
const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
|
|
6728
|
+
this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
|
|
6629
6729
|
this.rateLimitRegistry = createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
|
|
6630
6730
|
this.rateLimitRegistry.on("ratelimit:hit", (data) => {
|
|
6631
6731
|
logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
|
|
@@ -6745,6 +6845,25 @@ var Evaluator = class {
|
|
|
6745
6845
|
this.trackCompletedRow(evalStep, row, context);
|
|
6746
6846
|
context.numComplete++;
|
|
6747
6847
|
const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
|
|
6848
|
+
if (context.testSuite.extensions?.length) try {
|
|
6849
|
+
const afterEachOut = await runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6850
|
+
test: evalStep.test,
|
|
6851
|
+
result: {
|
|
6852
|
+
...row,
|
|
6853
|
+
namedScores: { ...row.namedScores },
|
|
6854
|
+
metadata: { ...row.metadata },
|
|
6855
|
+
response: row.response ? {
|
|
6856
|
+
...row.response,
|
|
6857
|
+
metadata: { ...row.response.metadata }
|
|
6858
|
+
} : row.response
|
|
6859
|
+
}
|
|
6860
|
+
});
|
|
6861
|
+
row.namedScores = filterFiniteScores(afterEachOut.result.namedScores);
|
|
6862
|
+
row.metadata = afterEachOut.result.metadata;
|
|
6863
|
+
if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
|
|
6864
|
+
} catch (error) {
|
|
6865
|
+
logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
|
|
6866
|
+
}
|
|
6748
6867
|
await this.persistEvalRow(row);
|
|
6749
6868
|
if (this.abortIfTargetUnavailable(row, context)) break;
|
|
6750
6869
|
const metrics = context.prompts[row.promptIdx].metrics;
|
|
@@ -6756,10 +6875,6 @@ var Evaluator = class {
|
|
|
6756
6875
|
promptEvalCount,
|
|
6757
6876
|
row
|
|
6758
6877
|
});
|
|
6759
|
-
await runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6760
|
-
test: evalStep.test,
|
|
6761
|
-
result: row
|
|
6762
|
-
});
|
|
6763
6878
|
context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
|
|
6764
6879
|
}
|
|
6765
6880
|
}
|
|
@@ -6914,7 +7029,15 @@ var Evaluator = class {
|
|
|
6914
7029
|
})) break;
|
|
6915
7030
|
}
|
|
6916
7031
|
} catch (error) {
|
|
6917
|
-
|
|
7032
|
+
const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
|
|
7033
|
+
try {
|
|
7034
|
+
await flushGroupedRows();
|
|
7035
|
+
} catch (flushError) {
|
|
7036
|
+
logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
|
|
7037
|
+
error: flushError instanceof Error ? flushError.message : String(flushError),
|
|
7038
|
+
pendingRowCount
|
|
7039
|
+
});
|
|
7040
|
+
}
|
|
6918
7041
|
throw error;
|
|
6919
7042
|
}
|
|
6920
7043
|
await flushGroupedRows();
|
|
@@ -7350,6 +7473,13 @@ var Evaluator = class {
|
|
|
7350
7473
|
if (!this.options.silent) {
|
|
7351
7474
|
if (serialRunEvalOptions.length > 0) logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
|
|
7352
7475
|
if (concurrentRunEvalOptions.length > 0) logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
|
|
7476
|
+
logGroupedGradingStatus({
|
|
7477
|
+
concurrency,
|
|
7478
|
+
hasEvalStepTimeout,
|
|
7479
|
+
runEvalOptions,
|
|
7480
|
+
shouldGroupGradingByProvider,
|
|
7481
|
+
usesConversationVar
|
|
7482
|
+
});
|
|
7353
7483
|
}
|
|
7354
7484
|
if (this.options.showProgressBar && progressBarManager) {
|
|
7355
7485
|
await progressBarManager.initialize(runEvalOptions, concurrency, 0);
|
|
@@ -8353,7 +8483,7 @@ var Eval = class Eval {
|
|
|
8353
8483
|
const evalInstance = new Eval(eval_.config, {
|
|
8354
8484
|
id: eval_.id,
|
|
8355
8485
|
createdAt: new Date(eval_.createdAt),
|
|
8356
|
-
author: eval_.author
|
|
8486
|
+
author: eval_.author,
|
|
8357
8487
|
description: eval_.description || void 0,
|
|
8358
8488
|
prompts: eval_.prompts || [],
|
|
8359
8489
|
datasetId,
|
|
@@ -8376,7 +8506,7 @@ var Eval = class Eval {
|
|
|
8376
8506
|
return (await getDb().select().from(evalsTable).limit(limit).orderBy(desc(evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
|
|
8377
8507
|
id: e.id,
|
|
8378
8508
|
createdAt: new Date(e.createdAt),
|
|
8379
|
-
author: e.author
|
|
8509
|
+
author: e.author,
|
|
8380
8510
|
description: e.description || void 0,
|
|
8381
8511
|
prompts: e.prompts || [],
|
|
8382
8512
|
persisted: true
|
|
@@ -8391,7 +8521,7 @@ var Eval = class Eval {
|
|
|
8391
8521
|
return (await getDb().select().from(evalsTable).orderBy(desc(evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
|
|
8392
8522
|
id: e.id,
|
|
8393
8523
|
createdAt: new Date(e.createdAt),
|
|
8394
|
-
author: e.author
|
|
8524
|
+
author: e.author,
|
|
8395
8525
|
description: e.description || void 0,
|
|
8396
8526
|
prompts: e.prompts || [],
|
|
8397
8527
|
persisted: true
|
|
@@ -8406,7 +8536,7 @@ var Eval = class Eval {
|
|
|
8406
8536
|
static async create(config, renderedPrompts, opts) {
|
|
8407
8537
|
const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
|
|
8408
8538
|
const evalId = opts?.id || createEvalId(createdAt);
|
|
8409
|
-
const author = opts
|
|
8539
|
+
const author = opts && "author" in opts ? opts.author ?? null : getAuthor();
|
|
8410
8540
|
const db = getDb();
|
|
8411
8541
|
const datasetId = sha256(JSON.stringify(config.tests || []));
|
|
8412
8542
|
db.transaction(() => {
|
|
@@ -8468,7 +8598,7 @@ var Eval = class Eval {
|
|
|
8468
8598
|
});
|
|
8469
8599
|
return new Eval(config, {
|
|
8470
8600
|
id: evalId,
|
|
8471
|
-
author
|
|
8601
|
+
author,
|
|
8472
8602
|
createdAt,
|
|
8473
8603
|
persisted: true,
|
|
8474
8604
|
runtimeOptions: sanitizeRuntimeOptions(opts?.runtimeOptions)
|
|
@@ -8478,7 +8608,7 @@ var Eval = class Eval {
|
|
|
8478
8608
|
const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
|
|
8479
8609
|
this.createdAt = createdAt.getTime();
|
|
8480
8610
|
this.id = opts?.id || createEvalId(createdAt);
|
|
8481
|
-
this.author = opts?.author;
|
|
8611
|
+
this.author = opts?.author ?? null;
|
|
8482
8612
|
this.config = config;
|
|
8483
8613
|
this.results = [];
|
|
8484
8614
|
this.prompts = opts?.prompts || [];
|
|
@@ -9008,7 +9138,7 @@ var Eval = class Eval {
|
|
|
9008
9138
|
newConfig.description = copyDescription;
|
|
9009
9139
|
const newPrompts = structuredClone(this.prompts);
|
|
9010
9140
|
const newVars = this.vars ? structuredClone(this.vars) : [];
|
|
9011
|
-
const author =
|
|
9141
|
+
const author = getAuthor();
|
|
9012
9142
|
const db = getDb();
|
|
9013
9143
|
let copiedCount = 0;
|
|
9014
9144
|
db.transaction(() => {
|
|
@@ -11402,25 +11532,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
11402
11532
|
getAssertions(_prompt) {
|
|
11403
11533
|
return getHarmfulAssertions(this.harmCategory);
|
|
11404
11534
|
}
|
|
11405
|
-
promptsToTestCases(prompts) {
|
|
11535
|
+
async promptsToTestCases(prompts) {
|
|
11406
11536
|
const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
|
|
11407
11537
|
const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
|
|
11408
|
-
|
|
11538
|
+
const pluginId = getShortPluginId(this.harmCategory);
|
|
11539
|
+
return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
|
|
11409
11540
|
const vars = { [this.injectVar]: __prompt };
|
|
11410
|
-
|
|
11411
|
-
|
|
11412
|
-
|
|
11413
|
-
|
|
11541
|
+
let inputMaterialization;
|
|
11542
|
+
if (hasMultipleInputs) {
|
|
11543
|
+
let parsed;
|
|
11544
|
+
try {
|
|
11545
|
+
parsed = JSON.parse(__prompt);
|
|
11546
|
+
} catch (error) {
|
|
11547
|
+
logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
11548
|
+
}
|
|
11549
|
+
if (parsed) try {
|
|
11550
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
|
|
11551
|
+
materializationIndex,
|
|
11552
|
+
pluginId,
|
|
11553
|
+
provider: this.provider,
|
|
11554
|
+
purpose: this.purpose
|
|
11555
|
+
});
|
|
11556
|
+
Object.assign(vars, materializedVars.vars);
|
|
11557
|
+
inputMaterialization = materializedVars.metadata;
|
|
11558
|
+
} catch (error) {
|
|
11559
|
+
logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
|
|
11560
|
+
throw error;
|
|
11561
|
+
}
|
|
11562
|
+
}
|
|
11414
11563
|
return {
|
|
11415
11564
|
vars,
|
|
11416
11565
|
metadata: {
|
|
11417
11566
|
harmCategory: harmCategoryLabel,
|
|
11418
|
-
pluginId
|
|
11419
|
-
pluginConfig: this.config
|
|
11567
|
+
pluginId,
|
|
11568
|
+
pluginConfig: this.config,
|
|
11569
|
+
...inputMaterialization ? { inputMaterialization } : {}
|
|
11420
11570
|
},
|
|
11421
11571
|
assert: getHarmfulAssertions(this.harmCategory)
|
|
11422
11572
|
};
|
|
11423
|
-
});
|
|
11573
|
+
}));
|
|
11424
11574
|
}
|
|
11425
11575
|
};
|
|
11426
11576
|
//#endregion
|
|
@@ -11429,20 +11579,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
11429
11579
|
* Extract content from <Prompt> tags and parse JSON if inputs are defined.
|
|
11430
11580
|
* Returns the processed prompt and any additional vars extracted from JSON.
|
|
11431
11581
|
*/
|
|
11432
|
-
function processPromptForInputs(prompt,
|
|
11582
|
+
async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
|
|
11433
11583
|
let processedPrompt = prompt.trim();
|
|
11434
11584
|
const additionalVars = {};
|
|
11585
|
+
let additionalMetadata;
|
|
11435
11586
|
const extractedPrompt = extractPromptFromTags(processedPrompt);
|
|
11436
11587
|
if (extractedPrompt) processedPrompt = extractedPrompt;
|
|
11437
|
-
if (inputs && Object.keys(inputs).length > 0)
|
|
11438
|
-
|
|
11439
|
-
|
|
11440
|
-
|
|
11441
|
-
|
|
11588
|
+
if (inputs && Object.keys(inputs).length > 0) {
|
|
11589
|
+
let parsed;
|
|
11590
|
+
try {
|
|
11591
|
+
parsed = JSON.parse(processedPrompt);
|
|
11592
|
+
} catch (error) {
|
|
11593
|
+
logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
11594
|
+
}
|
|
11595
|
+
if (parsed) try {
|
|
11596
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
|
|
11597
|
+
materializationIndex,
|
|
11598
|
+
pluginId: plugin,
|
|
11599
|
+
provider,
|
|
11600
|
+
purpose
|
|
11601
|
+
});
|
|
11602
|
+
Object.assign(additionalVars, materializedVars.vars);
|
|
11603
|
+
additionalMetadata = materializedVars.metadata;
|
|
11604
|
+
} catch (error) {
|
|
11605
|
+
logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
|
|
11606
|
+
throw error;
|
|
11607
|
+
}
|
|
11442
11608
|
}
|
|
11443
11609
|
return {
|
|
11444
11610
|
processedPrompt,
|
|
11445
|
-
additionalVars
|
|
11611
|
+
additionalVars,
|
|
11612
|
+
additionalMetadata
|
|
11446
11613
|
};
|
|
11447
11614
|
}
|
|
11448
11615
|
async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
|
|
@@ -11463,15 +11630,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
|
|
|
11463
11630
|
};
|
|
11464
11631
|
const allPrompts = await retryWithDeduplication(generatePrompts, n);
|
|
11465
11632
|
const inputs = config?.inputs;
|
|
11466
|
-
return sampleArray(allPrompts, n).map((prompt) => {
|
|
11467
|
-
const { processedPrompt, additionalVars } = processPromptForInputs(prompt,
|
|
11633
|
+
return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
|
|
11634
|
+
const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
|
|
11468
11635
|
const testCase = createTestCase(injectVar, processedPrompt, plugin);
|
|
11469
11636
|
if (Object.keys(additionalVars).length > 0) testCase.vars = {
|
|
11470
11637
|
...testCase.vars,
|
|
11471
11638
|
...additionalVars
|
|
11472
11639
|
};
|
|
11640
|
+
if (additionalMetadata) testCase.metadata = {
|
|
11641
|
+
...testCase.metadata,
|
|
11642
|
+
inputMaterialization: additionalMetadata
|
|
11643
|
+
};
|
|
11473
11644
|
return testCase;
|
|
11474
|
-
});
|
|
11645
|
+
}));
|
|
11475
11646
|
}
|
|
11476
11647
|
//#endregion
|
|
11477
11648
|
//#region src/redteam/plugins/teenSafety/graderExamples.ts
|
|
@@ -11789,7 +11960,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
|
|
|
11789
11960
|
function computeModifiersFromConfig(config) {
|
|
11790
11961
|
const modifiers = { ...config?.modifiers };
|
|
11791
11962
|
if (config?.language && typeof config.language === "string") modifiers.language = config.language;
|
|
11792
|
-
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
11963
|
+
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
11793
11964
|
const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
11794
11965
|
if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
|
|
11795
11966
|
return modifiers;
|
|
@@ -11955,6 +12126,31 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
11955
12126
|
return [];
|
|
11956
12127
|
}
|
|
11957
12128
|
}
|
|
12129
|
+
async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
|
|
12130
|
+
const inputs = config.inputs;
|
|
12131
|
+
if (!inputs || Object.keys(inputs).length === 0) return testCases;
|
|
12132
|
+
return Promise.all(testCases.map(async (testCase, materializationIndex) => {
|
|
12133
|
+
const inputVars = extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
|
|
12134
|
+
if (!inputVars) return testCase;
|
|
12135
|
+
const materializedVars = await materializeInputVariablesWithMetadata(inputVars, inputs, {
|
|
12136
|
+
materializationIndex,
|
|
12137
|
+
pluginId,
|
|
12138
|
+
provider,
|
|
12139
|
+
purpose
|
|
12140
|
+
});
|
|
12141
|
+
return {
|
|
12142
|
+
...testCase,
|
|
12143
|
+
vars: {
|
|
12144
|
+
...testCase.vars || {},
|
|
12145
|
+
...materializedVars.vars
|
|
12146
|
+
},
|
|
12147
|
+
metadata: {
|
|
12148
|
+
...testCase.metadata || {},
|
|
12149
|
+
...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
|
|
12150
|
+
}
|
|
12151
|
+
};
|
|
12152
|
+
}));
|
|
12153
|
+
}
|
|
11958
12154
|
function createPluginFactory(PluginClass, key, validate) {
|
|
11959
12155
|
return {
|
|
11960
12156
|
key,
|
|
@@ -11965,13 +12161,21 @@ function createPluginFactory(PluginClass, key, validate) {
|
|
|
11965
12161
|
logger.debug(`Using local redteam generation for ${key}`);
|
|
11966
12162
|
return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
|
|
11967
12163
|
}
|
|
11968
|
-
const
|
|
12164
|
+
const pluginId = getShortPluginId(key);
|
|
12165
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12166
|
+
config: configWithDefaults ?? {},
|
|
12167
|
+
injectVar,
|
|
12168
|
+
pluginId,
|
|
12169
|
+
provider,
|
|
12170
|
+
purpose,
|
|
12171
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
12172
|
+
});
|
|
11969
12173
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
11970
12174
|
return testCases.map((testCase) => ({
|
|
11971
12175
|
...testCase,
|
|
11972
12176
|
metadata: {
|
|
11973
12177
|
...testCase.metadata,
|
|
11974
|
-
pluginId
|
|
12178
|
+
pluginId,
|
|
11975
12179
|
pluginConfig: {
|
|
11976
12180
|
...configWithDefaults,
|
|
11977
12181
|
modifiers: computedModifiers
|
|
@@ -12028,7 +12232,7 @@ const pluginFactories = [
|
|
|
12028
12232
|
key: category,
|
|
12029
12233
|
action: async (params) => {
|
|
12030
12234
|
if (neverGenerateRemote()) {
|
|
12031
|
-
logger.error(`${category} plugin
|
|
12235
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
12032
12236
|
return [];
|
|
12033
12237
|
}
|
|
12034
12238
|
const testCases = await getHarmfulTests(params, category);
|
|
@@ -12051,13 +12255,21 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
|
|
|
12051
12255
|
key: category,
|
|
12052
12256
|
action: async (params) => {
|
|
12053
12257
|
if (shouldGenerateRemote()) {
|
|
12054
|
-
const
|
|
12258
|
+
const pluginId = getShortPluginId(category);
|
|
12259
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12260
|
+
config: params.config ?? {},
|
|
12261
|
+
injectVar: params.injectVar,
|
|
12262
|
+
pluginId,
|
|
12263
|
+
provider: params.provider,
|
|
12264
|
+
purpose: params.purpose,
|
|
12265
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
12266
|
+
});
|
|
12055
12267
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
12056
12268
|
return testCases.map((testCase) => ({
|
|
12057
12269
|
...testCase,
|
|
12058
12270
|
metadata: {
|
|
12059
12271
|
...testCase.metadata,
|
|
12060
|
-
pluginId
|
|
12272
|
+
pluginId,
|
|
12061
12273
|
pluginConfig: {
|
|
12062
12274
|
...params.config,
|
|
12063
12275
|
modifiers: computedModifiers
|
|
@@ -12079,16 +12291,24 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
|
|
|
12079
12291
|
key: category,
|
|
12080
12292
|
action: async (params) => {
|
|
12081
12293
|
if (neverGenerateRemote()) {
|
|
12082
|
-
logger.error(`${category} plugin
|
|
12294
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
12083
12295
|
return [];
|
|
12084
12296
|
}
|
|
12085
|
-
const
|
|
12297
|
+
const pluginId = getShortPluginId(category);
|
|
12298
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12299
|
+
config: params.config ?? {},
|
|
12300
|
+
injectVar: params.injectVar,
|
|
12301
|
+
pluginId,
|
|
12302
|
+
provider: params.provider,
|
|
12303
|
+
purpose: params.purpose,
|
|
12304
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
12305
|
+
});
|
|
12086
12306
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
12087
12307
|
return testCases.map((testCase) => ({
|
|
12088
12308
|
...testCase,
|
|
12089
12309
|
metadata: {
|
|
12090
12310
|
...testCase.metadata,
|
|
12091
|
-
pluginId
|
|
12311
|
+
pluginId,
|
|
12092
12312
|
pluginConfig: {
|
|
12093
12313
|
...params.config,
|
|
12094
12314
|
modifiers: computedModifiers
|
|
@@ -12101,19 +12321,27 @@ function createRemotePlugin(key, validate) {
|
|
|
12101
12321
|
return {
|
|
12102
12322
|
key,
|
|
12103
12323
|
validate,
|
|
12104
|
-
action: async ({ purpose, injectVar, n, config }) => {
|
|
12324
|
+
action: async ({ provider, purpose, injectVar, n, config }) => {
|
|
12105
12325
|
const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
|
|
12106
12326
|
if (neverGenerateRemote()) {
|
|
12107
|
-
logger.error(`${key} plugin
|
|
12327
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
|
|
12108
12328
|
return [];
|
|
12109
12329
|
}
|
|
12110
|
-
const
|
|
12330
|
+
const pluginId = getShortPluginId(key);
|
|
12331
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12332
|
+
config: configWithDefaults ?? {},
|
|
12333
|
+
injectVar,
|
|
12334
|
+
pluginId,
|
|
12335
|
+
provider,
|
|
12336
|
+
purpose,
|
|
12337
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
12338
|
+
});
|
|
12111
12339
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
12112
12340
|
const testsWithMetadata = testCases.map((testCase) => ({
|
|
12113
12341
|
...testCase,
|
|
12114
12342
|
metadata: {
|
|
12115
12343
|
...testCase.metadata,
|
|
12116
|
-
pluginId
|
|
12344
|
+
pluginId,
|
|
12117
12345
|
pluginConfig: {
|
|
12118
12346
|
...configWithDefaults,
|
|
12119
12347
|
modifiers: computedModifiers
|
|
@@ -12183,6 +12411,37 @@ function getPolicyText(metadata) {
|
|
|
12183
12411
|
return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
|
|
12184
12412
|
}
|
|
12185
12413
|
}
|
|
12414
|
+
async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
|
|
12415
|
+
const inputs = testCase.metadata?.pluginConfig?.inputs;
|
|
12416
|
+
const inputMaterialization = testCase.metadata?.inputMaterialization;
|
|
12417
|
+
if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
|
|
12418
|
+
inputMaterialization,
|
|
12419
|
+
vars: testCase.vars
|
|
12420
|
+
};
|
|
12421
|
+
try {
|
|
12422
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
|
|
12423
|
+
materializationIndex,
|
|
12424
|
+
pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
|
|
12425
|
+
provider,
|
|
12426
|
+
purpose
|
|
12427
|
+
});
|
|
12428
|
+
return {
|
|
12429
|
+
inputMaterialization: materializedVars.metadata ? {
|
|
12430
|
+
...inputMaterialization,
|
|
12431
|
+
...materializedVars.metadata
|
|
12432
|
+
} : inputMaterialization,
|
|
12433
|
+
vars: {
|
|
12434
|
+
...testCase.vars,
|
|
12435
|
+
...materializedVars.vars
|
|
12436
|
+
}
|
|
12437
|
+
};
|
|
12438
|
+
} catch {
|
|
12439
|
+
return {
|
|
12440
|
+
inputMaterialization,
|
|
12441
|
+
vars: testCase.vars
|
|
12442
|
+
};
|
|
12443
|
+
}
|
|
12444
|
+
}
|
|
12186
12445
|
/**
|
|
12187
12446
|
* Gets the severity level for a plugin based on its ID and configuration.
|
|
12188
12447
|
* @param pluginId - The ID of the plugin.
|
|
@@ -12324,6 +12583,7 @@ const categories = {
|
|
|
12324
12583
|
foundation: FOUNDATION_PLUGINS,
|
|
12325
12584
|
harmful: Object.keys(HARM_PLUGINS),
|
|
12326
12585
|
"coding-agent:core": CODING_AGENT_CORE_PLUGINS,
|
|
12586
|
+
"coding-agent:all": CODING_AGENT_PLUGINS,
|
|
12327
12587
|
bias: BIAS_PLUGINS,
|
|
12328
12588
|
pii: PII_PLUGINS,
|
|
12329
12589
|
medical: MEDICAL_PLUGINS,
|
|
@@ -12418,7 +12678,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
12418
12678
|
* @param injectVar - The variable to inject.
|
|
12419
12679
|
* @returns An array of new test cases generated by strategies.
|
|
12420
12680
|
*/
|
|
12421
|
-
async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
12681
|
+
async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
12422
12682
|
const newTestCases = [];
|
|
12423
12683
|
const strategyResults = {};
|
|
12424
12684
|
for (const strategy of strategies) {
|
|
@@ -12474,14 +12734,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12474
12734
|
}
|
|
12475
12735
|
}
|
|
12476
12736
|
resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
|
|
12477
|
-
newTestCases.push(...resultTestCases.map((t) => {
|
|
12478
|
-
const
|
|
12479
|
-
let updatedVars = t.vars;
|
|
12480
|
-
if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
|
|
12481
|
-
const parsed = JSON.parse(String(t.vars[injectVar]));
|
|
12482
|
-
updatedVars = { ...t.vars };
|
|
12483
|
-
Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
|
|
12484
|
-
} catch {}
|
|
12737
|
+
newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
|
|
12738
|
+
const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
|
|
12485
12739
|
const strategyConfig = {
|
|
12486
12740
|
...strategy.config || {},
|
|
12487
12741
|
...maxCharsPerMessage ? { maxCharsPerMessage } : {},
|
|
@@ -12489,16 +12743,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12489
12743
|
};
|
|
12490
12744
|
return {
|
|
12491
12745
|
...t,
|
|
12492
|
-
vars
|
|
12746
|
+
vars,
|
|
12493
12747
|
metadata: {
|
|
12494
12748
|
...t?.metadata || {},
|
|
12495
12749
|
...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
|
|
12496
12750
|
...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
|
|
12497
12751
|
...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
|
|
12752
|
+
...inputMaterialization && { inputMaterialization },
|
|
12498
12753
|
...Object.keys(strategyConfig).length > 0 && { strategyConfig }
|
|
12499
12754
|
}
|
|
12500
12755
|
};
|
|
12501
|
-
}));
|
|
12756
|
+
})));
|
|
12502
12757
|
const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
|
|
12503
12758
|
const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
|
|
12504
12759
|
const applyNumTestsCap = (calculatedRequested) => {
|
|
@@ -12953,7 +13208,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12953
13208
|
targetIds,
|
|
12954
13209
|
...retryStrategy.config
|
|
12955
13210
|
};
|
|
12956
|
-
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
|
|
13211
|
+
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
|
|
12957
13212
|
pluginTestCases.push(...retryTestCases);
|
|
12958
13213
|
Object.assign(strategyResults, retryResults);
|
|
12959
13214
|
if (showProgressBar) progressBar?.increment(retryTestCases.length);
|
|
@@ -12961,7 +13216,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12961
13216
|
checkAbort();
|
|
12962
13217
|
const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
|
|
12963
13218
|
if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
|
|
12964
|
-
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
13219
|
+
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
12965
13220
|
Object.assign(strategyResults, otherStrategyResults);
|
|
12966
13221
|
if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
|
|
12967
13222
|
const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
|
|
@@ -13854,6 +14109,10 @@ function stripAuthFromUrl(urlString) {
|
|
|
13854
14109
|
}
|
|
13855
14110
|
}
|
|
13856
14111
|
async function handleEmailCollection(evalRecord) {
|
|
14112
|
+
if (evalRecord.author) {
|
|
14113
|
+
logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
|
|
14114
|
+
return;
|
|
14115
|
+
}
|
|
13857
14116
|
if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
|
|
13858
14117
|
let email = getUserEmail();
|
|
13859
14118
|
if (!email) {
|
|
@@ -13997,6 +14256,14 @@ function shouldShareResults(opts) {
|
|
|
13997
14256
|
return cloudConfig.isEnabled() && sharing !== false;
|
|
13998
14257
|
}
|
|
13999
14258
|
//#endregion
|
|
14259
|
+
//#region src/commands/eval/redteamWarning.ts
|
|
14260
|
+
function warnIfRedteamConfigHasNoTests(config, testSuite) {
|
|
14261
|
+
if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
|
|
14262
|
+
Warning: Config file has a redteam section but no test cases.
|
|
14263
|
+
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
14264
|
+
`));
|
|
14265
|
+
}
|
|
14266
|
+
//#endregion
|
|
14000
14267
|
//#region src/util/formatDuration.ts
|
|
14001
14268
|
/**
|
|
14002
14269
|
* Formats a duration in seconds into a human-readable string
|
|
@@ -14437,14 +14704,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14437
14704
|
state.resume = true;
|
|
14438
14705
|
state.retryMode = true;
|
|
14439
14706
|
} else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
|
|
14440
|
-
if (!cmdObj.envPath && commandLineOptions?.envPath) {
|
|
14707
|
+
if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
|
|
14441
14708
|
logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
|
|
14442
14709
|
setupEnv(commandLineOptions.envPath);
|
|
14443
14710
|
}
|
|
14444
|
-
|
|
14445
|
-
Warning: Config file has a redteam section but no test cases.
|
|
14446
|
-
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
14447
|
-
`));
|
|
14711
|
+
warnIfRedteamConfigHasNoTests(config, testSuite);
|
|
14448
14712
|
if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
|
|
14449
14713
|
const maybeUrl = config.providers[0]?.config?.url;
|
|
14450
14714
|
if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
|
|
@@ -14552,7 +14816,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14552
14816
|
${z.prettifyError(testSuiteSchema.error)}
|
|
14553
14817
|
|
|
14554
14818
|
Please review your promptfooconfig.yaml configuration.`));
|
|
14555
|
-
const
|
|
14819
|
+
const author = getAuthor();
|
|
14820
|
+
const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
|
|
14821
|
+
author,
|
|
14822
|
+
runtimeOptions: options
|
|
14823
|
+
}) : new Eval(config, {
|
|
14824
|
+
author,
|
|
14825
|
+
runtimeOptions: options
|
|
14826
|
+
});
|
|
14556
14827
|
const abortController = new AbortController();
|
|
14557
14828
|
const previousAbortSignal = evaluateOptions.abortSignal;
|
|
14558
14829
|
evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
|
|
@@ -14954,6 +15225,12 @@ async function doRedteamRun(options) {
|
|
|
14954
15225
|
return evalResult;
|
|
14955
15226
|
}
|
|
14956
15227
|
//#endregion
|
|
15228
|
+
//#region src/types/transform.ts
|
|
15229
|
+
/** Runtime type guard for `TransformFunction` values. */
|
|
15230
|
+
function isTransformFunction(value) {
|
|
15231
|
+
return typeof value === "function";
|
|
15232
|
+
}
|
|
15233
|
+
//#endregion
|
|
14957
15234
|
//#region src/index.ts
|
|
14958
15235
|
/**
|
|
14959
15236
|
* Shallow-clone a test case so the caller can swap in resolved ApiProvider
|
|
@@ -14975,66 +15252,148 @@ function cloneTestForResolve(test) {
|
|
|
14975
15252
|
if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
|
|
14976
15253
|
return cloned;
|
|
14977
15254
|
}
|
|
15255
|
+
function toSerializableProviderRef(provider) {
|
|
15256
|
+
if (isApiProvider(provider)) return sanitizeProvider(provider);
|
|
15257
|
+
if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
|
|
15258
|
+
return provider;
|
|
15259
|
+
}
|
|
15260
|
+
function isRecord(value) {
|
|
15261
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
15262
|
+
}
|
|
15263
|
+
function withSerializableProvider(record) {
|
|
15264
|
+
if (!isApiProvider(record.provider)) return record;
|
|
15265
|
+
return {
|
|
15266
|
+
...record,
|
|
15267
|
+
provider: sanitizeProvider(record.provider)
|
|
15268
|
+
};
|
|
15269
|
+
}
|
|
15270
|
+
/**
|
|
15271
|
+
* Function-valued transforms are first-class at runtime but are silently dropped
|
|
15272
|
+
* by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
|
|
15273
|
+
* a function reference, so replace every `transform`-like field with a
|
|
15274
|
+
* `[inline function]: name` marker. Non-function values pass through unchanged.
|
|
15275
|
+
*
|
|
15276
|
+
* `droppedRef.value` is flipped to `true` the first time a function is replaced
|
|
15277
|
+
* so the caller can emit a single warning instead of logging per field.
|
|
15278
|
+
*/
|
|
15279
|
+
function replaceFunctionTransforms(record, droppedRef) {
|
|
15280
|
+
let result;
|
|
15281
|
+
for (const key of TRANSFORM_KEYS) {
|
|
15282
|
+
const value = record[key];
|
|
15283
|
+
if (!isTransformFunction(value)) continue;
|
|
15284
|
+
if (!result) result = { ...record };
|
|
15285
|
+
result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
|
|
15286
|
+
droppedRef.value = true;
|
|
15287
|
+
}
|
|
15288
|
+
return result ?? record;
|
|
15289
|
+
}
|
|
15290
|
+
function toSerializableAssertion(assertion, droppedRef) {
|
|
15291
|
+
if (!isRecord(assertion)) return assertion;
|
|
15292
|
+
let sanitizedAssertion = withSerializableProvider(assertion);
|
|
15293
|
+
sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
|
|
15294
|
+
if (Array.isArray(assertion.assert)) sanitizedAssertion = {
|
|
15295
|
+
...sanitizedAssertion,
|
|
15296
|
+
assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
15297
|
+
};
|
|
15298
|
+
return sanitizedAssertion;
|
|
15299
|
+
}
|
|
15300
|
+
function toSerializableTestCase(test, droppedRef) {
|
|
15301
|
+
if (!isRecord(test)) return test;
|
|
15302
|
+
let sanitizedTest = withSerializableProvider(test);
|
|
15303
|
+
if (isRecord(test.options)) {
|
|
15304
|
+
let options = withSerializableProvider(test.options);
|
|
15305
|
+
options = replaceFunctionTransforms(options, droppedRef);
|
|
15306
|
+
if (options !== test.options) sanitizedTest = {
|
|
15307
|
+
...sanitizedTest,
|
|
15308
|
+
options
|
|
15309
|
+
};
|
|
15310
|
+
}
|
|
15311
|
+
if (Array.isArray(test.assert)) sanitizedTest = {
|
|
15312
|
+
...sanitizedTest,
|
|
15313
|
+
assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
15314
|
+
};
|
|
15315
|
+
return sanitizedTest;
|
|
15316
|
+
}
|
|
15317
|
+
function toSerializableScenario(scenario, droppedRef) {
|
|
15318
|
+
if (!isRecord(scenario)) return scenario;
|
|
15319
|
+
if (!Array.isArray(scenario.tests)) return scenario;
|
|
15320
|
+
return {
|
|
15321
|
+
...scenario,
|
|
15322
|
+
tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
|
|
15323
|
+
};
|
|
15324
|
+
}
|
|
15325
|
+
function createSerializableUnifiedConfig(testSuite, prompts) {
|
|
15326
|
+
const droppedRef = { value: false };
|
|
15327
|
+
const config = {
|
|
15328
|
+
...testSuite,
|
|
15329
|
+
providers: toSerializableProviderRef(testSuite.providers),
|
|
15330
|
+
defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
|
|
15331
|
+
tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
|
|
15332
|
+
scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
|
|
15333
|
+
prompts
|
|
15334
|
+
};
|
|
15335
|
+
if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
|
|
15336
|
+
return config;
|
|
15337
|
+
}
|
|
14978
15338
|
async function evaluate(testSuite, options = {}) {
|
|
14979
|
-
|
|
14980
|
-
|
|
15339
|
+
const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
|
|
15340
|
+
if (testSuiteConfig.writeLatestResults) await runDbMigrations();
|
|
15341
|
+
const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
|
|
14981
15342
|
const providerMap = {};
|
|
14982
15343
|
for (const p of loadedProviders) {
|
|
14983
15344
|
providerMap[p.id()] = p;
|
|
14984
15345
|
if (p.label) providerMap[p.label] = p;
|
|
14985
15346
|
}
|
|
14986
|
-
let resolvedDefaultTest =
|
|
14987
|
-
if (typeof
|
|
15347
|
+
let resolvedDefaultTest = testSuiteConfig.defaultTest;
|
|
15348
|
+
if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
|
|
14988
15349
|
const constructedTestSuite = {
|
|
14989
|
-
...
|
|
15350
|
+
...testSuiteConfig,
|
|
14990
15351
|
defaultTest: resolvedDefaultTest,
|
|
14991
|
-
scenarios:
|
|
15352
|
+
scenarios: testSuiteConfig.scenarios,
|
|
14992
15353
|
providers: loadedProviders,
|
|
14993
|
-
tests: await readTests(
|
|
14994
|
-
nunjucksFilters: await readFilters(
|
|
14995
|
-
prompts: await processPrompts(
|
|
15354
|
+
tests: await readTests(testSuiteConfig.tests),
|
|
15355
|
+
nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
|
|
15356
|
+
prompts: await processPrompts(testSuiteConfig.prompts)
|
|
14996
15357
|
};
|
|
14997
15358
|
if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
|
|
14998
15359
|
constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
|
|
14999
15360
|
if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
|
|
15000
|
-
env:
|
|
15361
|
+
env: testSuiteConfig.env,
|
|
15001
15362
|
basePath: state.basePath
|
|
15002
15363
|
});
|
|
15003
15364
|
if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
|
|
15004
|
-
env:
|
|
15365
|
+
env: testSuiteConfig.env,
|
|
15005
15366
|
basePath: state.basePath
|
|
15006
15367
|
});
|
|
15007
15368
|
}
|
|
15008
15369
|
constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
|
|
15009
15370
|
for (const test of constructedTestSuite.tests) {
|
|
15010
15371
|
if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
|
|
15011
|
-
env:
|
|
15372
|
+
env: testSuiteConfig.env,
|
|
15012
15373
|
basePath: state.basePath
|
|
15013
15374
|
});
|
|
15014
15375
|
for (const assertion of test.assert || []) {
|
|
15015
15376
|
if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
|
|
15016
15377
|
if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
|
|
15017
|
-
env:
|
|
15378
|
+
env: testSuiteConfig.env,
|
|
15018
15379
|
basePath: state.basePath
|
|
15019
15380
|
});
|
|
15020
15381
|
}
|
|
15021
15382
|
}
|
|
15022
15383
|
if (options.cache === false) disableCache();
|
|
15023
|
-
const parsedProviderPromptMap = readProviderPromptMap(
|
|
15024
|
-
const unifiedConfig =
|
|
15025
|
-
|
|
15026
|
-
|
|
15027
|
-
};
|
|
15028
|
-
const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
|
|
15384
|
+
const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
|
|
15385
|
+
const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
|
|
15386
|
+
const author = getAuthor(suiteAuthor);
|
|
15387
|
+
const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
|
|
15029
15388
|
const ret = await evaluate$1({
|
|
15030
15389
|
...constructedTestSuite,
|
|
15031
15390
|
providerPromptMap: parsedProviderPromptMap
|
|
15032
15391
|
}, evalRecord, {
|
|
15033
15392
|
eventSource: "library",
|
|
15034
|
-
isRedteam: Boolean(
|
|
15393
|
+
isRedteam: Boolean(testSuiteConfig.redteam),
|
|
15035
15394
|
...options
|
|
15036
15395
|
});
|
|
15037
|
-
if (
|
|
15396
|
+
if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
|
|
15038
15397
|
const shareableUrl = await createShareableUrl(ret, { silent: true });
|
|
15039
15398
|
if (shareableUrl) {
|
|
15040
15399
|
ret.shareableUrl = shareableUrl;
|
|
@@ -15045,9 +15404,9 @@ async function evaluate(testSuite, options = {}) {
|
|
|
15045
15404
|
logger.warn(`Failed to create shareable URL: ${error}`);
|
|
15046
15405
|
}
|
|
15047
15406
|
else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
|
|
15048
|
-
if (
|
|
15049
|
-
if (typeof
|
|
15050
|
-
else if (Array.isArray(
|
|
15407
|
+
if (testSuiteConfig.outputPath) {
|
|
15408
|
+
if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
|
|
15409
|
+
else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
|
|
15051
15410
|
}
|
|
15052
15411
|
return ret;
|
|
15053
15412
|
}
|
|
@@ -15076,6 +15435,6 @@ var src_default = {
|
|
|
15076
15435
|
redteam
|
|
15077
15436
|
};
|
|
15078
15437
|
//#endregion
|
|
15079
|
-
export { AssertionOrSetSchema, AssertionSchema, AssertionSetSchema, AssertionTypeSchema, AtomicTestCaseSchema, BaseAssertionTypesSchema, BaseTokenUsageSchema, CommandLineOptionsSchema, CompletedPromptSchema, CompletionTokenDetailsSchema, ConversationMessageSchema, DerivedMetricSchema, EvalResultsFilterMode, EvaluateOptionsSchema, GradingConfigSchema, InputsSchema, NotPrefixedAssertionTypesSchema, OutputConfigSchema, OutputFileExtension, PartialGenerationError, PluginConfigSchema, PolicyObjectSchema, ProvidersSchema, ResultFailureReason, ScenarioSchema, SpecialAssertionTypesSchema, StrategyConfigSchema, TestCaseSchema, TestCaseWithVarsFileSchema, TestCasesWithMetadataPromptSchema, TestCasesWithMetadataSchema, TestGeneratorConfigSchema, TestSuiteConfigSchema, TestSuiteSchema, UnifiedConfigSchema, VarsSchema, assertions_default as assertions, cache_exports as cache, src_default as default, evaluate, generateTable, guardrails, isApiProvider, isGradingResult, isProviderOptions, isResultFailureReason, loadApiProvider, redteam };
|
|
15438
|
+
export { AssertionOrSetSchema, AssertionSchema, AssertionSetSchema, AssertionTypeSchema, AtomicTestCaseSchema, BaseAssertionTypesSchema, BaseTokenUsageSchema, CommandLineOptionsSchema, CompletedPromptSchema, CompletionTokenDetailsSchema, ConversationMessageSchema, DerivedMetricSchema, DocumentMediaInjectionPlacementSchema, DocumentMediaInjectionPlacementValues, DocxInjectionPlacementSchema, DocxInjectionPlacementValues, EvalResultsFilterMode, EvaluateOptionsSchema, GradingConfigSchema, InputConfigSchema, InputDefinitionObjectSchema, InputDefinitionSchema, InputTypeSchema, InputTypeValues, InputsSchema, NotPrefixedAssertionTypesSchema, OutputConfigSchema, OutputFileExtension, PartialGenerationError, PluginConfigSchema, PolicyObjectSchema, ProvidersSchema, ResultFailureReason, ScenarioSchema, SpecialAssertionTypesSchema, StrategyConfigSchema, TestCaseSchema, TestCaseWithVarsFileSchema, TestCasesWithMetadataPromptSchema, TestCasesWithMetadataSchema, TestGeneratorConfigSchema, TestSuiteConfigSchema, TestSuiteSchema, UnifiedConfigSchema, VarsSchema, assertions_default as assertions, buildInputPromptDescription, cache_exports as cache, src_default as default, evaluate, generateTable, getInputDescription, getInputType, guardrails, isApiProvider, isGradingResult, isProviderOptions, isResultFailureReason, isTransformFunction, loadApiProvider, normalizeInputDefinition, normalizeInputs, redteam };
|
|
15080
15439
|
|
|
15081
15440
|
//# sourceMappingURL=index.js.map
|