promptfoo 0.121.5 → 0.121.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
- package/dist/src/{accounts-CaLNYnf7.js → accounts-Ca7WIoPY.js} +12 -7
- package/dist/src/{accounts-CLJHCDDb.js → accounts-CjFnOPmb.js} +13 -8
- package/dist/src/{accounts-BIFntVWB.cjs → accounts-CmWzeD2d.cjs} +16 -10
- package/dist/src/{accounts-bnyHT7Ju.js → accounts-DanM1wq_.js} +12 -7
- package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-CJ0j3fBi.js} +2 -2
- package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-DDEGRV9v.js} +2 -2
- package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-DvPWSUpb.cjs} +8 -7
- package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-TxUEMPYS.js} +2 -2
- package/dist/src/{agents-WULPVjbH.cjs → agents-B4sRuXg3.cjs} +7 -6
- package/dist/src/{agents-DhxWMCtH.js → agents-B8q7h_ek.js} +4 -4
- package/dist/src/{agents-BGqaTDnr.js → agents-CBgJvRkB.js} +20 -9
- package/dist/src/{agents-n6vPqV3i.js → agents-CYn2n3QP.js} +4 -4
- package/dist/src/{agents-BV9yFpXX.js → agents-D-vDNFx4.js} +20 -9
- package/dist/src/{agents-BYdMl1UE.js → agents-LrHuQqr1.js} +20 -9
- package/dist/src/{agents-emVcx3yh.js → agents-QGg76OF-.js} +2 -2
- package/dist/src/{agents-DiWmQYH9.cjs → agents-eHZ9nlgA.cjs} +21 -10
- package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-CJEbQ0o6.js} +6 -6
- package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-D5HXzZ0s.js} +6 -6
- package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-T6HGNxNe.cjs} +7 -7
- package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-eYv3a_DK.js} +6 -6
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-BNHJKN35.js +1 -0
- package/dist/src/app/assets/index-BnT6P6sF.js +388 -0
- package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
- package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
- package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
- package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
- package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
- package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
- package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/{audio-Da8U9IS5.js → audio-BqnRvcWG.js} +3 -3
- package/dist/src/{audio-BvpTOArF.js → audio-CPMtV1yR.js} +3 -3
- package/dist/src/{audio-CScmnmEB.js → audio-DyiebVB3.js} +3 -3
- package/dist/src/{audio-C0vDeS0j.cjs → audio-FnxbEnSE.cjs} +4 -4
- package/dist/src/authoritativeMarkupInjection-BZIywVjG.js +74 -0
- package/dist/src/authoritativeMarkupInjection-DyAXAsSr.js +75 -0
- package/dist/src/authoritativeMarkupInjection-F2gBw0lN.cjs +74 -0
- package/dist/src/authoritativeMarkupInjection-QEQmFS83.js +74 -0
- package/dist/src/{base-BTux96b1.js → base-CKLo890h.js} +4 -3
- package/dist/src/{base-BOMaNEes.js → base-Co80MMCi.js} +4 -3
- package/dist/src/{base-Tw6uhH8K.cjs → base-DGJW48uz.cjs} +5 -4
- package/dist/src/{base-dYsl2hmL.js → base-E9I8zXjz.js} +4 -3
- package/dist/src/bestOfN-B3wNzjSB.js +137 -0
- package/dist/src/bestOfN-BBsO41z4.js +136 -0
- package/dist/src/bestOfN-CAwmg5UL.cjs +140 -0
- package/dist/src/bestOfN-_kTi8Bxe.js +136 -0
- package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
- package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
- package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
- package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
- package/dist/src/{cache-DGg-yTZG.cjs → cache-CPGUA4Yl.cjs} +135 -25
- package/dist/src/cache-Cf7b4pWE.js +3 -0
- package/dist/src/{cache-Bzttsk0X.js → cache-DIXbtkNO.js} +125 -10
- package/dist/src/{cache-BI5BY7ey.js → cache-DpPWrkTE.js} +127 -11
- package/dist/src/{cache-Cr-qWIbP.js → cache-roFAE0cI.js} +125 -10
- package/dist/src/{chat-DChSH_Es.js → chat-CUCorGiL.js} +9 -9
- package/dist/src/{chat-DH97tVV9.cjs → chat-DG1wG4w0.cjs} +6 -6
- package/dist/src/{chat-Cx_LkwvZ.js → chat-Dabu84Br.js} +11 -11
- package/dist/src/{chat-BLOdH60v.js → chat-DqUFcWI0.js} +11 -11
- package/dist/src/{chat-vYqqv1gP.cjs → chat-DxTDQ83C.cjs} +14 -13
- package/dist/src/{chat-DG2LkwLq.js → chat-GmlolEwo.js} +4 -4
- package/dist/src/{chat-aMQZw6R7.js → chat-TP8Qifkh.js} +4 -4
- package/dist/src/{chat-D9nudO9b.js → chat-iwaM5UTQ.js} +4 -4
- package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
- package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
- package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
- package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
- package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-BQNp_y-F.js} +209 -64
- package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-D5Jl0SDh.js} +210 -65
- package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-DH416NBD.cjs} +216 -70
- package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-x1XJ1-pU.js} +210 -65
- package/dist/src/{cloud-Da0bofJd.js → cloud-D3DiFqH6.js} +2 -2
- package/dist/src/cloud-p96PA4MH.js +3 -0
- package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-B6NVI3ax.js} +4 -4
- package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-CEAW-xQa.cjs} +6 -6
- package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-RFSojyXG.js} +4 -4
- package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-r4tbYmWU.js} +4 -4
- package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-BCkLouto.js} +3 -3
- package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-BaZ4insB.js} +3 -3
- package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-CF-Vb-2Z.js} +3 -3
- package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-TJMLBj6I.cjs} +5 -5
- package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-B8KHEiF4.js} +5 -5
- package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-CnrLBCeA.cjs} +12 -11
- package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-DIXZ230V.js} +4 -4
- package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-Dd22dC_N.js} +5 -5
- package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-B6Wah8Pa.js} +5 -5
- package/dist/src/codex-sdk-BGjVAk23.js +3 -0
- package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-CFF6gUyi.cjs} +18 -10
- package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-CmQABzV3.js} +3 -3
- package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-D2d54RL8.js} +5 -5
- package/dist/src/{cometapi-B5ImDlSm.js → cometapi-Bu9B8NUY.js} +7 -7
- package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CtzNCHKu.js} +7 -7
- package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-DHCDlQUI.cjs} +8 -8
- package/dist/src/{cometapi-CC7hWxmX.js → cometapi-OBILPLlu.js} +7 -7
- package/dist/src/{completion-Vq_ad618.js → completion-CO2e1_62.js} +4 -4
- package/dist/src/{completion-DtQ72Bm3.cjs → completion-CSYfl2cd.cjs} +6 -6
- package/dist/src/{completion-2iuYVxwi.js → completion-DZNxcyfG.js} +5 -5
- package/dist/src/{completion-CrD6MQ93.js → completion-sNvCLTAP.js} +5 -5
- package/dist/src/constants-BjJV0cRr.js +6 -0
- package/dist/src/constants-DH5XYLKZ.js +7 -0
- package/dist/src/constants-DZGEFLsu.js +6 -0
- package/dist/src/constants-a2kYssQk.cjs +11 -0
- package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
- package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
- package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
- package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
- package/dist/src/crescendo-BXEJK_bi.cjs +704 -0
- package/dist/src/crescendo-CU_Y2i-m.js +702 -0
- package/dist/src/crescendo-J1Xx4_zb.js +703 -0
- package/dist/src/crescendo-QiaSLW0d.js +701 -0
- package/dist/src/custom-BJfP00Bh.js +619 -0
- package/dist/src/custom-CZVn-1-r.js +620 -0
- package/dist/src/custom-Cqia7M0D.cjs +621 -0
- package/dist/src/custom-notggYVl.js +618 -0
- package/dist/src/{docker--3qzPa-6.js → docker-4D1eL6Gq.js} +5 -5
- package/dist/src/{docker-Dorv4_Dg.js → docker-BBv1WUDu.js} +5 -5
- package/dist/src/{docker-D3AY-5F5.cjs → docker-D06JUoe2.cjs} +6 -6
- package/dist/src/{docker-DCsCDvwM.js → docker-DdJQBxK9.js} +5 -5
- package/dist/src/{embedding-DNRvZwRN.js → embedding--UZVe4_7.js} +5 -5
- package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BbrwopfX.cjs} +6 -6
- package/dist/src/{embedding-ChS1ivFS.js → embedding-Bi3rxrZF.js} +5 -5
- package/dist/src/{embedding-D_bI4NDq.js → embedding-C251p1-8.js} +4 -4
- package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
- package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
- package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
- package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
- package/dist/src/{eval-DJ_4A-tr.js → eval-0VRANImH.js} +19 -19
- package/dist/src/{eval-BQPLBJbw.js → eval-DscR5iOM.js} +1 -1
- package/dist/src/{evalResult-pSvGWFMo.js → evalResult-2RRJvFyB.js} +18 -11
- package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-CvtS8h8u.cjs} +29 -11
- package/dist/src/evalResult-DqzsS6_W.js +3 -0
- package/dist/src/{evalResult-D6P5I5il.js → evalResult-eUkJv9Ko.js} +17 -10
- package/dist/src/evaluator-DNdJF1Gv.js +3 -0
- package/dist/src/{evaluator-D-UIbbYq.js → evaluator-DRoiYB2q.js} +258 -132
- package/dist/src/evaluatorHelpers-BsYP_muT.js +511 -0
- package/dist/src/evaluatorHelpers-CRqTvSux.cjs +537 -0
- package/dist/src/evaluatorHelpers-DuqFFfq7.js +510 -0
- package/dist/src/{extractor-YlZbUMsL.js → extractor-BR7XAzAL.js} +5 -5
- package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-BdxEtt3J.cjs} +6 -6
- package/dist/src/{extractor-DxyiFhPk.js → extractor-CIW3iN-b.js} +5 -5
- package/dist/src/{extractor-BM3jRERL.js → extractor-CxRtnaHl.js} +5 -5
- package/dist/src/{fetch-Y5qX_kST.js → fetch-BufrQtvR.js} +90 -26
- package/dist/src/{fetch-B6ch2nU2.js → fetch-DXUnXkVU.js} +86 -26
- package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Dw4XZHjj.cjs} +115 -32
- package/dist/src/{fetch-D9xxyC1p.js → fetch-It34O8Ur.js} +90 -26
- package/dist/src/fetch-_YgGd2qv.js +3 -0
- package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
- package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
- package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
- package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
- package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
- package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
- package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
- package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
- package/dist/src/goat-Ckd3q3AY.js +467 -0
- package/dist/src/goat-Qgurm-NP.js +466 -0
- package/dist/src/goat-ghadEDdy.js +465 -0
- package/dist/src/goat-una6pZGP.cjs +469 -0
- package/dist/src/graders-BDT7dif6.js +3 -0
- package/dist/src/{graders-CgPn32yp.js → graders-BGP99PdK.js} +1017 -84
- package/dist/src/{graders-BoUqsCEm.js → graders-BX0f2tvS.js} +1022 -84
- package/dist/src/{graders-CwrbifOo.js → graders-C0nXU_ZP.js} +1020 -82
- package/dist/src/{graders-Bw1wk_21.cjs → graders-ClrU2fnd.cjs} +1085 -128
- package/dist/src/hydra-BSNZZm2M.js +543 -0
- package/dist/src/hydra-BxdG4nkg.js +541 -0
- package/dist/src/hydra-DE4xWwyc.js +542 -0
- package/dist/src/hydra-DrJttnvw.cjs +542 -0
- package/dist/src/image-B4oBtu6J.js +443 -0
- package/dist/src/{image-Dr_3I3nK.js → image-BN-hjLL9.js} +3 -3
- package/dist/src/{image-BeWaInPF.js → image-B_fPIwdg.js} +3 -3
- package/dist/src/image-BvUAW344.js +442 -0
- package/dist/src/image-Cvjwx1uY.js +442 -0
- package/dist/src/{image-D10dNAav.cjs → image-DfVCGPbI.cjs} +4 -4
- package/dist/src/{image-qjO6FWPs.js → image-QzmydkiG.js} +3 -3
- package/dist/src/image-X0oY4350.cjs +465 -0
- package/dist/src/index.cjs +688 -313
- package/dist/src/index.d.cts +3152 -1617
- package/dist/src/index.d.ts +3151 -1616
- package/dist/src/index.js +582 -223
- package/dist/src/indirectWebPwn-02ZIghCS.js +259 -0
- package/dist/src/indirectWebPwn-BJ22AbQa.cjs +397 -0
- package/dist/src/indirectWebPwn-CbjUG0rh.js +385 -0
- package/dist/src/indirectWebPwn-CfQJt3gk.cjs +260 -0
- package/dist/src/indirectWebPwn-DBQhOjoD.js +260 -0
- package/dist/src/indirectWebPwn-OsXnKejv.js +259 -0
- package/dist/src/indirectWebPwn-tNx9OZ35.js +385 -0
- package/dist/src/indirectWebPwn-uyWdHx04.js +386 -0
- package/dist/src/inputVariables-B0qUChbV.js +467 -0
- package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
- package/dist/src/inputVariables-DXFdi7AI.js +468 -0
- package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
- package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
- package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
- package/dist/src/iterative-CpU6i2As.js +490 -0
- package/dist/src/iterative-DJQEQpG3.js +491 -0
- package/dist/src/iterative-DQBuWM-j.cjs +493 -0
- package/dist/src/iterative-FTS4Bz67.js +492 -0
- package/dist/src/iterativeImage-BUABMVOA.js +413 -0
- package/dist/src/iterativeImage-ByFWkxax.cjs +415 -0
- package/dist/src/iterativeImage-BzUapOUi.js +414 -0
- package/dist/src/iterativeImage-Doz8mgxF.js +413 -0
- package/dist/src/iterativeMeta-B3YiAOc8.js +386 -0
- package/dist/src/iterativeMeta-C7APE_P1.js +385 -0
- package/dist/src/iterativeMeta-CSS8M6Ds.cjs +385 -0
- package/dist/src/iterativeMeta-DgoQ7bLh.js +384 -0
- package/dist/src/iterativeTree-B5zxBBSW.js +769 -0
- package/dist/src/iterativeTree-CNyIk0Yn.js +768 -0
- package/dist/src/iterativeTree-CPMF10ve.cjs +771 -0
- package/dist/src/iterativeTree-DvZ7GBwt.js +770 -0
- package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-BadkINlJ.js} +24 -10
- package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-Bi_8sV-H.js} +23 -9
- package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-CkMljjdg.js} +24 -10
- package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-DUh34xba.cjs} +25 -11
- package/dist/src/{litellm-DRc4qWfc.js → litellm-BKBo0jpC.js} +4 -4
- package/dist/src/{litellm-BLSiANhk.js → litellm-BXyn5kZK.js} +4 -4
- package/dist/src/{litellm-DQGo_juI.js → litellm-CNcfbCfa.js} +4 -4
- package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CtAr7bKG.cjs} +5 -5
- package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
- package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BMX1iEB6.js} +5 -5
- package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-CR5TSpp4.js} +5 -5
- package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-D3FUc2K3.cjs} +9 -8
- package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-OEMmS1RB.js} +5 -5
- package/dist/src/main.js +704 -208
- package/dist/src/memoryPoisoning-CM83NWYl.js +107 -0
- package/dist/src/memoryPoisoning-D8h9gXJF.js +106 -0
- package/dist/src/memoryPoisoning-Dp-btinn.cjs +106 -0
- package/dist/src/memoryPoisoning-cLuCoTuJ.js +106 -0
- package/dist/src/{messages-CewuNcNS.js → messages-BabO-cX8.js} +17 -9
- package/dist/src/{messages-BnsVHUnm.cjs → messages-DBPir0TQ.cjs} +24 -15
- package/dist/src/{messages-B9dSjrNf.js → messages-DGUlSNU7.js} +18 -10
- package/dist/src/{messages-CI69Lasb.js → messages-vsE_-Lv0.js} +18 -10
- package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
- package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
- package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
- package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
- package/dist/src/mischievousUser-0l8GD7Dp.js +46 -0
- package/dist/src/mischievousUser-BUOP9W5r.js +46 -0
- package/dist/src/mischievousUser-frFYKxu6.js +47 -0
- package/dist/src/mischievousUser-olGgHIVR.cjs +46 -0
- package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-CNV5bMSk.cjs} +7 -7
- package/dist/src/{modelslab-zpz9JcK0.js → modelslab-Cogmu4mG.js} +6 -6
- package/dist/src/{modelslab-D73OnKSx.js → modelslab-Dzst7VTU.js} +6 -6
- package/dist/src/{modelslab-BCLOtfek.js → modelslab-EyDczZ5A.js} +6 -6
- package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-BGPNBOMS.js} +5 -5
- package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-B_5NKFu1.js} +5 -5
- package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-C4eUJGse.js} +5 -5
- package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-CjJRxI1X.cjs} +9 -8
- package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-BNGmgfFz.js} +3 -3
- package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-ChPlh5na.js} +2 -2
- package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-CrV0iaY_.js} +3 -3
- package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-DuOG9Aun.cjs} +5 -4
- package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
- package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
- package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
- package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-BIHlu_36.js} +9 -8
- package/dist/src/{openclaw-DObVgpjC.js → openclaw-CF7fMido.js} +9 -8
- package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-Dphc01BY.cjs} +17 -15
- package/dist/src/{openclaw-DA8U4DsD.js → openclaw-zIJAsz3P.js} +9 -8
- package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-B3vlPLsp.js} +38 -3
- package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-D05JSgMQ.js} +39 -4
- package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-DoY6GbWw.cjs} +45 -9
- package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-sRKYHGoI.js} +39 -4
- package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
- package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
- package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
- package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
- package/dist/src/packageParser--MWTSrPW.js +36 -0
- package/dist/src/packageParser-CgE-ziRo.js +35 -0
- package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
- package/dist/src/packageParser-hwwSGnAZ.js +35 -0
- package/dist/src/processShim-BBxt7LKO.js +95 -0
- package/dist/src/processShim-BcGzU8fY.js +94 -0
- package/dist/src/processShim-C_z3aRvF.js +94 -0
- package/dist/src/processShim-DSY9BV2T.cjs +98 -0
- package/dist/src/promptLength-0qIHyhA5.js +71 -0
- package/dist/src/promptLength-4X-Wd8PG.js +72 -0
- package/dist/src/promptLength-B9nZEfO6.js +71 -0
- package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
- package/dist/src/promptfoo-BDrfT30-.js +180 -0
- package/dist/src/promptfoo-Cm4hiy1Y.js +180 -0
- package/dist/src/promptfoo-Rjp-MeBb.js +181 -0
- package/dist/src/promptfoo-b-baRMj-.cjs +205 -0
- package/dist/src/prompts-BYMtqPCw.js +259 -0
- package/dist/src/prompts-C-bqE1Yp.js +260 -0
- package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
- package/dist/src/prompts-DHhQsANy.js +259 -0
- package/dist/src/prompts-D_QpZ2Dm.js +271 -0
- package/dist/src/prompts-hNvWBD3z.cjs +284 -0
- package/dist/src/prompts-huDVH2CI.js +270 -0
- package/dist/src/prompts-p78Hul5i.cjs +289 -0
- package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
- package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
- package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
- package/dist/src/{providers-DT-GtF2t.js → providers-B9KzWxAX.js} +739 -11919
- package/dist/src/{providers-DRrerKra.js → providers-BCCz6_IX.js} +813 -11944
- package/dist/src/{providers-eDShy16E.cjs → providers-BDVVIQM6.cjs} +787 -12132
- package/dist/src/{providers-Ctcc592x.js → providers-BYAn82cf.js} +1 -1
- package/dist/src/{providers-CJh7iriU.js → providers-DVYRZP4E.js} +746 -11866
- package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
- package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
- package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
- package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
- package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-BAp6iTZD.cjs} +4 -4
- package/dist/src/{quiverai-CPKhWgaT.js → quiverai-BvIhI_0l.js} +3 -3
- package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CdTWPe-A.js} +3 -3
- package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-Cv7rJKDz.js} +3 -3
- package/dist/src/registry-BUJrgjwv.js +124 -0
- package/dist/src/registry-DXm1t_x0.js +125 -0
- package/dist/src/registry-Dp5EqoXc.js +124 -0
- package/dist/src/registry-KCVF1CFC.cjs +124 -0
- package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-B1_XsKXU.js} +16 -147
- package/dist/src/{server-gyd6d4Hc.js → remoteGeneration-COpWcmWd.js} +15 -108
- package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-DS9N3pgB.cjs} +30 -119
- package/dist/src/remoteGeneration-DsaSwmG2.js +217 -0
- package/dist/src/render-BNTrbmBw.cjs +384 -0
- package/dist/src/render-CSP99NLm.js +348 -0
- package/dist/src/render-DFfDeYUK.js +347 -0
- package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
- package/dist/src/render-_6ur1fhE.js +347 -0
- package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
- package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
- package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
- package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
- package/dist/src/{responses-CF-ayauu.cjs → responses-1UFFF9N_.cjs} +12 -11
- package/dist/src/{responses-B8haB-mD.js → responses-B3W2JvOQ.js} +9 -9
- package/dist/src/{responses-1ztiVYsx.js → responses-B6ktc3Ra.js} +7 -7
- package/dist/src/{responses-BiaBguAu.js → responses-URRzV8qE.js} +9 -9
- package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
- package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
- package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
- package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
- package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
- package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
- package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
- package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
- package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
- package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
- package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-CujrzP1a.js} +61 -50
- package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-DzffAqo_.cjs} +65 -53
- package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-vhtSV7JI.js} +61 -50
- package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-yr1QKeBs.js} +61 -50
- package/dist/src/{scanner-nOCWNIXa.js → scanner-DS0109SS.js} +6 -6
- package/dist/src/server/index.js +4147 -449
- package/dist/src/server-B8rqV126.cjs +126 -0
- package/dist/src/server-BaLytskk.js +3 -0
- package/dist/src/server-CMJD10J4.js +107 -0
- package/dist/src/server-Ddp8GNMp.js +146 -0
- package/dist/src/server-DhMHosWj.js +182 -0
- package/dist/src/shared-7pmVZLNO.js +1334 -0
- package/dist/src/shared-9WHQ1oNE.js +1335 -0
- package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
- package/dist/src/shared-D6IjElRI.js +1334 -0
- package/dist/src/shared-WkgnDkcg.cjs +1436 -0
- package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
- package/dist/src/simulatedUser-C9aQObBI.js +222 -0
- package/dist/src/simulatedUser-Cu601Dd4.cjs +227 -0
- package/dist/src/simulatedUser-U_qAHnuB.js +222 -0
- package/dist/src/simulatedUser-p3tACcmw.js +223 -0
- package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
- package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
- package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
- package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
- package/dist/src/storage-BU4qcnOb.js +875 -0
- package/dist/src/storage-CA-v9V2v.cjs +911 -0
- package/dist/src/storage-CD-GWAdx.js +822 -0
- package/dist/src/storage-QdU-SmvD.js +834 -0
- package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
- package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
- package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
- package/dist/src/store-IbiRIF3k.js +3 -0
- package/dist/src/strategies-7CS3Alao.cjs +2360 -0
- package/dist/src/strategies-CiSeroPH.js +2331 -0
- package/dist/src/strategies-DRJjGTIY.js +2333 -0
- package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
- package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
- package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
- package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
- package/dist/src/{telemetry-DWdGHvEf.js → telemetry-00ezXr_t.js} +4 -4
- package/dist/src/telemetry-ByPqDcKC.js +3 -0
- package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-CJ7FnCsc.cjs} +15 -9
- package/dist/src/{telemetry--iqaGyaS.js → telemetry-DmXYcJNV.js} +4 -4
- package/dist/src/{telemetry-CgdVGV8N.js → telemetry-DwX9XUN5.js} +4 -4
- package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
- package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
- package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
- package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
- package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
- package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
- package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
- package/dist/src/toolAttributes-COVgDrBG.js +87 -0
- package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
- package/dist/src/tracingOptions-BnwKCkSB.js +221 -0
- package/dist/src/tracingOptions-Chi74lOD.js +219 -0
- package/dist/src/tracingOptions-DrbSFaKy.cjs +249 -0
- package/dist/src/tracingOptions-ji2OuXbT.js +220 -0
- package/dist/src/{transcription-84t4ALo2.js → transcription-B8uIgCYX.js} +5 -5
- package/dist/src/{transcription-Bm2emLmJ.js → transcription-CfU5loSq.js} +5 -5
- package/dist/src/{transcription-D7Q0vJsh.js → transcription-Dkd22_4K.js} +4 -4
- package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-mzuf18Mq.cjs} +9 -8
- package/dist/src/{transform-DtooZqYY.js → transform-BIMynQsA.js} +8 -8
- package/dist/src/transform-BnSTnFlp.js +187 -0
- package/dist/src/transform-BnSXWmU_2.cjs +221 -0
- package/dist/src/transform-CGt7Kt3y2.js +186 -0
- package/dist/src/transform-CrPGTsij.js +186 -0
- package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DhNkAUs8.cjs} +12 -11
- package/dist/src/{transform-_DpNB4qp.js → transform-DmvYBRll.js} +8 -8
- package/dist/src/{transform-B-b6Cq-q.js → transform-EtD4jAWi.js} +8 -8
- package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
- package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
- package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
- package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
- package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
- package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
- package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
- package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
- package/dist/src/{util-DFPeFkiV.js → util--WMgw7wM.js} +28 -8
- package/dist/src/{util-C-kmRosx.js → util-5WnCSb0h.js} +9 -7
- package/dist/src/{util-A5_ZsQUn.cjs → util-BSIuSLVK.cjs} +12 -9
- package/dist/src/{util-Dub0f_ej.js → util-Bx677_k2.js} +17 -10
- package/dist/src/util-CN8om2rz.cjs +386 -0
- package/dist/src/{util-DN0-b81k.js → util-CoQWM76y.js} +28 -8
- package/dist/src/util-DNl96nNs.js +327 -0
- package/dist/src/{util-BQOCAHQC.js → util-DURocbYR.js} +46 -11
- package/dist/src/util-Df8YMvS1.js +327 -0
- package/dist/src/{util-BVXcTwXu.js → util-DiQ3QvBB.js} +28 -8
- package/dist/src/{util-3pBZZb_H.js → util-I-Rf-KaD.js} +45 -10
- package/dist/src/{util-Dpmm_dAI.cjs → util-IYzs5Y04.cjs} +33 -7
- package/dist/src/{util-BlFVL0UF.js → util-LKTmNsMQ.js} +9 -7
- package/dist/src/{util-DvpHnLt0.cjs → util-SPsvFONY.cjs} +29 -21
- package/dist/src/{util-B9CNhyac.js → util-efByNxcr.js} +9 -7
- package/dist/src/util-kDURhgJW.js +328 -0
- package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
- package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
- package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
- package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
- package/dist/src/{version-CbuBKu2U.js → version-BK20a4sw.js} +2 -2
- package/dist/src/{version-D9zu9FWB.cjs → version-BWCSaByA.cjs} +2 -2
- package/dist/src/{version-CbpiUINz.js → version-eRkNuGv8.js} +2 -2
- package/dist/src/{version-0frU0UTr.js → version-lpHV_53E.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +48 -22
- package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
- package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
- package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
- package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
- package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
- package/dist/src/cache-BRkhlH3k.cjs +0 -3
- package/dist/src/cache-BlC6aeJ0.js +0 -3
- package/dist/src/cloud-CoD5OacT.js +0 -3
- package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
- package/dist/src/codex-sdk-Danroptg.cjs +0 -2
- package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
- package/dist/src/evalResult-BBK58h2B.js +0 -3
- package/dist/src/evalResult-spPqh1G_.js +0 -2
- package/dist/src/evaluator-DgLKaZk8.js +0 -3
- package/dist/src/fetch-8viavNv8.js +0 -3
- package/dist/src/graders-C84JI-m5.js +0 -2
- package/dist/src/graders-CBbd0K0Q.cjs +0 -2
- package/dist/src/graders-CbQqpHSN.js +0 -3
- package/dist/src/graders-DS42d3ZG.js +0 -2
- package/dist/src/image-BmilRNqO.js +0 -258
- package/dist/src/image-CxJoa3aW.cjs +0 -280
- package/dist/src/image-DsGRlkh7.js +0 -257
- package/dist/src/image-a_SGUobh.js +0 -257
- package/dist/src/providers-BuyzKt7C.js +0 -2
- package/dist/src/providers-C7lNVBjX.cjs +0 -3
- package/dist/src/providers-CCE2COJi2.js +0 -2
- package/dist/src/render-7uNJ2V14.js +0 -135
- package/dist/src/render-DlscvAUJ.js +0 -135
- package/dist/src/render-eui5p5mL.js +0 -136
- package/dist/src/render-tG6ir9_g.cjs +0 -165
- package/dist/src/rubyUtils-4hjGxvju.js +0 -3
- package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
- package/dist/src/server-ByiF3qlg.js +0 -386
- package/dist/src/server-C0XKRNB_.cjs +0 -2
- package/dist/src/server-C_15p79-.js +0 -3
- package/dist/src/store-2OXm_eBY.js +0 -240
- package/dist/src/store-BELqNwvz.js +0 -3
- package/dist/src/store-uQZ4AjPe.cjs +0 -2
- package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
- package/dist/src/telemetry-ZdPZc0fm.js +0 -3
- package/dist/src/transform-BQt0BeAW.js +0 -3
- package/dist/src/transform-Bq5oqC0s.cjs +0 -2
- package/dist/src/transform-C9izGX54.cjs +0 -228
- package/dist/src/transform-CwbAZ84V.js +0 -216
- package/dist/src/transform-DzCF-wqV.js +0 -213
- package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/main.js
CHANGED
|
@@ -1,35 +1,45 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
|
|
3
|
-
import {
|
|
4
|
-
import { n as VERSION } from "./version-
|
|
3
|
+
import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, M as TERMINAL_MAX_WIDTH, N as getDefaultPort, O as CLOUD_PROVIDER_PREFIX, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, _ as REQUEST_TIMEOUT_MS, f as sleep, i as fetchWithTimeout, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, x as isPromptfooSampleTarget } from "./fetch-DXUnXkVU.js";
|
|
4
|
+
import { n as VERSION } from "./version-eRkNuGv8.js";
|
|
5
5
|
import { t as invariant } from "./invariant-B2Rf6avk.js";
|
|
6
|
-
import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-
|
|
7
|
-
import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-
|
|
8
|
-
import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-
|
|
9
|
-
import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import { a as
|
|
13
|
-
import {
|
|
14
|
-
import { a as
|
|
15
|
-
import { i as
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
import { n as
|
|
20
|
-
import { a as
|
|
21
|
-
import { n as
|
|
22
|
-
import { n as escapeRegExp, t as ellipsize } from "./text-
|
|
23
|
-
import { n as
|
|
24
|
-
import { n as
|
|
25
|
-
import {
|
|
26
|
-
import {
|
|
27
|
-
import {
|
|
28
|
-
import {
|
|
29
|
-
import {
|
|
30
|
-
import {
|
|
31
|
-
import {
|
|
32
|
-
import { n as
|
|
6
|
+
import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-CjFnOPmb.js";
|
|
7
|
+
import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-D3DiFqH6.js";
|
|
8
|
+
import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-00ezXr_t.js";
|
|
9
|
+
import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, _t as CODING_AGENT_PLUGINS, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BFevViUY.js";
|
|
10
|
+
import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
|
|
11
|
+
import { r as importModule, t as getDirectory } from "./esm-Bexx2PFc.js";
|
|
12
|
+
import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, t as renderEnvOnlyInObject } from "./render-CSP99NLm.js";
|
|
13
|
+
import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-B1_XsKXU.js";
|
|
14
|
+
import { a as openAuthBrowser, i as checkServerRunning, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-DhMHosWj.js";
|
|
15
|
+
import { _ as makeRequest$1, c as getCloudDatabaseId, d as getOrgContext, f as getPluginSeverityOverridesFromCloud, g as isCloudProvider, h as getUserTeams, i as retrieveMedia, l as getConfigFromCloud, m as getProviderFromCloud, o as canCreateTargets, r as mediaExists, s as checkCloudPermissions, t as getMediaStorage, u as getEvalConfigFromCloud, v as resolveTeamFromIdentifier, y as resolveTeamId } from "./storage-BU4qcnOb.js";
|
|
16
|
+
import { n as sha256, t as randomSequence } from "./createHash-CgRvs4Fn.js";
|
|
17
|
+
import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-DpPWrkTE.js";
|
|
18
|
+
import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
|
|
19
|
+
import { D as normalizeProviderRef, L as readFilters, M as maybeLoadFromExternalFile, R as readOutput, _ as resultIsForTestCase, a as ComparisonEvalNotFoundError, b as getProviderDescription, c as getEvalTableOutputPromptLocationsBySize, d as fetchCsvFromGoogleSheet, f as setupEnv, g as getTestCaseDeduplicationKey, h as filterRuntimeVars, i as writeOutput, j as maybeLoadConfigFromExternalFile, l as getEvalTablePromptStrippedPayload, m as extractRuntimeVars, n as createOutputMetadata, o as evalTableToJson, p as deduplicateTestCases, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as mergeComparisonTables, v as checkProviderApiKeys, y as doesProviderRefMatch } from "./util-DURocbYR.js";
|
|
20
|
+
import { a as resolveProviderConfigs, d as MCPProvider, f as HttpProvider, i as resolveProvider, m as createTransformResponse, n as loadApiProvider, p as createTransformRequest, r as loadApiProviders, t as getProviderIds } from "./providers-B9KzWxAX.js";
|
|
21
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
|
|
22
|
+
import { n as escapeRegExp, t as ellipsize } from "./text-Db-Wt2u2.js";
|
|
23
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-DafUHOeh.js";
|
|
24
|
+
import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DODuTK-a.js";
|
|
25
|
+
import { n as createPlaceholderInputValue, r as materializeInputVariablesWithMetadata, t as buildPromptInputDescriptions } from "./inputVariables-DXFdi7AI.js";
|
|
26
|
+
import { a as extractPromptFromTags, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, r as extractInputVarsFromPrompt, s as getShortPluginId } from "./util-kDURhgJW.js";
|
|
27
|
+
import { n as PromptfooHarmfulCompletionProvider } from "./promptfoo-Rjp-MeBb.js";
|
|
28
|
+
import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as getDefaultProviders, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as readProviderPromptMap, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, X as processPrompts, Y as doRemoteGrading, Z as readPrompts, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-BGP99PdK.js";
|
|
29
|
+
import { f as redteamProviderManager, m as TokenUsageTracker } from "./shared-9WHQ1oNE.js";
|
|
30
|
+
import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-DRoiYB2q.js";
|
|
31
|
+
import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
|
|
32
|
+
import { n as TRANSFORM_KEYS, t as INLINE_FUNCTION_LABEL } from "./transform-BnSTnFlp.js";
|
|
33
|
+
import { n as getTraceStore } from "./store-DKd5592Q.js";
|
|
34
|
+
import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CSurUUyV.js";
|
|
35
|
+
import { n as isBlobStorageEnabled } from "./extractor-CIW3iN-b.js";
|
|
36
|
+
import { n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-4X-Wd8PG.js";
|
|
37
|
+
import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-0VRANImH.js";
|
|
38
|
+
import { n as sanitizeProvider, t as EvalResult } from "./evalResult-eUkJv9Ko.js";
|
|
39
|
+
import { n as renderPrompt } from "./evaluatorHelpers-BsYP_muT.js";
|
|
40
|
+
import { t as formatDuration } from "./formatDuration-CMVNrYvE.js";
|
|
41
|
+
import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-DRJjGTIY.js";
|
|
42
|
+
import { n as shouldUseInkUI } from "./interactiveCheck-C4QlIuoR.js";
|
|
33
43
|
import { fileURLToPath } from "node:url";
|
|
34
44
|
import { Command, InvalidArgumentError } from "commander";
|
|
35
45
|
import { z } from "zod";
|
|
@@ -62,9 +72,9 @@ import process$1 from "process";
|
|
|
62
72
|
import $RefParser from "@apidevtools/json-schema-ref-parser";
|
|
63
73
|
import async from "async";
|
|
64
74
|
import { and, asc, count, desc, eq, inArray, isNotNull, like, or, sql } from "drizzle-orm";
|
|
65
|
-
import cliProgress from "cli-progress";
|
|
66
75
|
import { URL as URL$1 } from "url";
|
|
67
76
|
import editor from "@inquirer/editor";
|
|
77
|
+
import cliProgress from "cli-progress";
|
|
68
78
|
import debounce from "debounce";
|
|
69
79
|
import { LRUCache } from "lru-cache";
|
|
70
80
|
import chokidar from "chokidar";
|
|
@@ -101,7 +111,7 @@ function runCommand(program) {
|
|
|
101
111
|
hasGithubPr: !!cmdObj.githubPr,
|
|
102
112
|
hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
|
|
103
113
|
});
|
|
104
|
-
const { executeScan } = await import("./scanner-
|
|
114
|
+
const { executeScan } = await import("./scanner-DS0109SS.js");
|
|
105
115
|
await executeScan(repoPath, cmdObj);
|
|
106
116
|
});
|
|
107
117
|
}
|
|
@@ -2995,6 +3005,10 @@ function stripAuthFromUrl(urlString) {
|
|
|
2995
3005
|
}
|
|
2996
3006
|
}
|
|
2997
3007
|
async function handleEmailCollection(evalRecord) {
|
|
3008
|
+
if (evalRecord.author) {
|
|
3009
|
+
logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
|
|
3010
|
+
return;
|
|
3011
|
+
}
|
|
2998
3012
|
if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
|
|
2999
3013
|
let email = getUserEmail();
|
|
3000
3014
|
if (!email) {
|
|
@@ -3257,6 +3271,14 @@ function shouldShareResults(opts) {
|
|
|
3257
3271
|
return cloudConfig.isEnabled() && sharing !== false;
|
|
3258
3272
|
}
|
|
3259
3273
|
//#endregion
|
|
3274
|
+
//#region src/commands/eval/redteamWarning.ts
|
|
3275
|
+
function warnIfRedteamConfigHasNoTests(config, testSuite) {
|
|
3276
|
+
if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
|
|
3277
|
+
Warning: Config file has a redteam section but no test cases.
|
|
3278
|
+
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
3279
|
+
`));
|
|
3280
|
+
}
|
|
3281
|
+
//#endregion
|
|
3260
3282
|
//#region src/commands/eval/summary.ts
|
|
3261
3283
|
function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
|
|
3262
3284
|
if (wasAborted) {
|
|
@@ -3644,6 +3666,9 @@ function setupRetryCommand(program) {
|
|
|
3644
3666
|
}
|
|
3645
3667
|
//#endregion
|
|
3646
3668
|
//#region src/models/modelAudit.ts
|
|
3669
|
+
function getModelAuditSortColumn(sortField) {
|
|
3670
|
+
return modelAuditsTable[sortField];
|
|
3671
|
+
}
|
|
3647
3672
|
function createScanId(createdAt = /* @__PURE__ */ new Date()) {
|
|
3648
3673
|
return `scan-${randomSequence(3)}-${createdAt.toISOString().slice(0, 19)}`;
|
|
3649
3674
|
}
|
|
@@ -3684,8 +3709,10 @@ var ModelAudit = class ModelAudit {
|
|
|
3684
3709
|
this.issues = data.issues || data.results?.issues || null;
|
|
3685
3710
|
const issues = data.issues || data.results?.issues;
|
|
3686
3711
|
const resultsHasErrors = data.results?.has_errors ?? false;
|
|
3687
|
-
if (data.hasErrors === void 0)
|
|
3688
|
-
|
|
3712
|
+
if (data.hasErrors === void 0) {
|
|
3713
|
+
const hasActualErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
|
|
3714
|
+
this.hasErrors = hasActualErrors;
|
|
3715
|
+
} else this.hasErrors = data.hasErrors;
|
|
3689
3716
|
this.totalChecks = data.totalChecks;
|
|
3690
3717
|
this.passedChecks = data.passedChecks;
|
|
3691
3718
|
this.failedChecks = data.failedChecks;
|
|
@@ -3783,9 +3810,9 @@ var ModelAudit = class ModelAudit {
|
|
|
3783
3810
|
static async getMany(limit = 100, offset = 0, sortField = "createdAt", sortOrder = "desc", search) {
|
|
3784
3811
|
let query = getDb().select().from(modelAuditsTable);
|
|
3785
3812
|
if (search) query = query.where(or(like(modelAuditsTable.name, `%${search}%`), like(modelAuditsTable.modelPath, `%${search}%`), like(modelAuditsTable.id, `%${search}%`)));
|
|
3786
|
-
const sortColumn = sortField
|
|
3787
|
-
if (sortOrder === "asc") query = query.orderBy(asc(sortColumn));
|
|
3788
|
-
else query = query.orderBy(desc(sortColumn));
|
|
3813
|
+
const sortColumn = getModelAuditSortColumn(sortField);
|
|
3814
|
+
if (sortOrder === "asc") query = sortField === "id" ? query.orderBy(asc(sortColumn)) : query.orderBy(asc(sortColumn), asc(modelAuditsTable.id));
|
|
3815
|
+
else query = sortField === "id" ? query.orderBy(desc(sortColumn)) : query.orderBy(desc(sortColumn), desc(modelAuditsTable.id));
|
|
3789
3816
|
return (await query.limit(limit).offset(offset).all()).map((r) => new ModelAudit({
|
|
3790
3817
|
...r,
|
|
3791
3818
|
persisted: true
|
|
@@ -4144,14 +4171,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
4144
4171
|
state.resume = true;
|
|
4145
4172
|
state.retryMode = true;
|
|
4146
4173
|
} else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
|
|
4147
|
-
if (!cmdObj.envPath && commandLineOptions?.envPath) {
|
|
4174
|
+
if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
|
|
4148
4175
|
logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
|
|
4149
4176
|
setupEnv(commandLineOptions.envPath);
|
|
4150
4177
|
}
|
|
4151
|
-
|
|
4152
|
-
Warning: Config file has a redteam section but no test cases.
|
|
4153
|
-
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
4154
|
-
`));
|
|
4178
|
+
warnIfRedteamConfigHasNoTests(config, testSuite);
|
|
4155
4179
|
if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
|
|
4156
4180
|
const maybeUrl = config.providers[0]?.config?.url;
|
|
4157
4181
|
if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
|
|
@@ -4259,7 +4283,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
4259
4283
|
${z.prettifyError(testSuiteSchema.error)}
|
|
4260
4284
|
|
|
4261
4285
|
Please review your promptfooconfig.yaml configuration.`));
|
|
4262
|
-
const
|
|
4286
|
+
const author = getAuthor();
|
|
4287
|
+
const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
|
|
4288
|
+
author,
|
|
4289
|
+
runtimeOptions: options
|
|
4290
|
+
}) : new Eval(config, {
|
|
4291
|
+
author,
|
|
4292
|
+
runtimeOptions: options
|
|
4293
|
+
});
|
|
4263
4294
|
const abortController = new AbortController();
|
|
4264
4295
|
const previousAbortSignal = evaluateOptions.abortSignal;
|
|
4265
4296
|
evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
|
|
@@ -6032,25 +6063,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
6032
6063
|
getAssertions(_prompt) {
|
|
6033
6064
|
return getHarmfulAssertions(this.harmCategory);
|
|
6034
6065
|
}
|
|
6035
|
-
promptsToTestCases(prompts) {
|
|
6066
|
+
async promptsToTestCases(prompts) {
|
|
6036
6067
|
const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
|
|
6037
6068
|
const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
|
|
6038
|
-
|
|
6069
|
+
const pluginId = getShortPluginId(this.harmCategory);
|
|
6070
|
+
return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
|
|
6039
6071
|
const vars = { [this.injectVar]: __prompt };
|
|
6040
|
-
|
|
6041
|
-
|
|
6042
|
-
|
|
6043
|
-
|
|
6072
|
+
let inputMaterialization;
|
|
6073
|
+
if (hasMultipleInputs) {
|
|
6074
|
+
let parsed;
|
|
6075
|
+
try {
|
|
6076
|
+
parsed = JSON.parse(__prompt);
|
|
6077
|
+
} catch (error) {
|
|
6078
|
+
logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
6079
|
+
}
|
|
6080
|
+
if (parsed) try {
|
|
6081
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
|
|
6082
|
+
materializationIndex,
|
|
6083
|
+
pluginId,
|
|
6084
|
+
provider: this.provider,
|
|
6085
|
+
purpose: this.purpose
|
|
6086
|
+
});
|
|
6087
|
+
Object.assign(vars, materializedVars.vars);
|
|
6088
|
+
inputMaterialization = materializedVars.metadata;
|
|
6089
|
+
} catch (error) {
|
|
6090
|
+
logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
|
|
6091
|
+
throw error;
|
|
6092
|
+
}
|
|
6093
|
+
}
|
|
6044
6094
|
return {
|
|
6045
6095
|
vars,
|
|
6046
6096
|
metadata: {
|
|
6047
6097
|
harmCategory: harmCategoryLabel,
|
|
6048
|
-
pluginId
|
|
6049
|
-
pluginConfig: this.config
|
|
6098
|
+
pluginId,
|
|
6099
|
+
pluginConfig: this.config,
|
|
6100
|
+
...inputMaterialization ? { inputMaterialization } : {}
|
|
6050
6101
|
},
|
|
6051
6102
|
assert: getHarmfulAssertions(this.harmCategory)
|
|
6052
6103
|
};
|
|
6053
|
-
});
|
|
6104
|
+
}));
|
|
6054
6105
|
}
|
|
6055
6106
|
};
|
|
6056
6107
|
//#endregion
|
|
@@ -6059,20 +6110,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
6059
6110
|
* Extract content from <Prompt> tags and parse JSON if inputs are defined.
|
|
6060
6111
|
* Returns the processed prompt and any additional vars extracted from JSON.
|
|
6061
6112
|
*/
|
|
6062
|
-
function processPromptForInputs(prompt,
|
|
6113
|
+
async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
|
|
6063
6114
|
let processedPrompt = prompt.trim();
|
|
6064
6115
|
const additionalVars = {};
|
|
6116
|
+
let additionalMetadata;
|
|
6065
6117
|
const extractedPrompt = extractPromptFromTags(processedPrompt);
|
|
6066
6118
|
if (extractedPrompt) processedPrompt = extractedPrompt;
|
|
6067
|
-
if (inputs && Object.keys(inputs).length > 0)
|
|
6068
|
-
|
|
6069
|
-
|
|
6070
|
-
|
|
6071
|
-
|
|
6119
|
+
if (inputs && Object.keys(inputs).length > 0) {
|
|
6120
|
+
let parsed;
|
|
6121
|
+
try {
|
|
6122
|
+
parsed = JSON.parse(processedPrompt);
|
|
6123
|
+
} catch (error) {
|
|
6124
|
+
logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
6125
|
+
}
|
|
6126
|
+
if (parsed) try {
|
|
6127
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
|
|
6128
|
+
materializationIndex,
|
|
6129
|
+
pluginId: plugin,
|
|
6130
|
+
provider,
|
|
6131
|
+
purpose
|
|
6132
|
+
});
|
|
6133
|
+
Object.assign(additionalVars, materializedVars.vars);
|
|
6134
|
+
additionalMetadata = materializedVars.metadata;
|
|
6135
|
+
} catch (error) {
|
|
6136
|
+
logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
|
|
6137
|
+
throw error;
|
|
6138
|
+
}
|
|
6072
6139
|
}
|
|
6073
6140
|
return {
|
|
6074
6141
|
processedPrompt,
|
|
6075
|
-
additionalVars
|
|
6142
|
+
additionalVars,
|
|
6143
|
+
additionalMetadata
|
|
6076
6144
|
};
|
|
6077
6145
|
}
|
|
6078
6146
|
async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
|
|
@@ -6093,15 +6161,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
|
|
|
6093
6161
|
};
|
|
6094
6162
|
const allPrompts = await retryWithDeduplication(generatePrompts, n);
|
|
6095
6163
|
const inputs = config?.inputs;
|
|
6096
|
-
return sampleArray(allPrompts, n).map((prompt) => {
|
|
6097
|
-
const { processedPrompt, additionalVars } = processPromptForInputs(prompt,
|
|
6164
|
+
return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
|
|
6165
|
+
const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
|
|
6098
6166
|
const testCase = createTestCase(injectVar, processedPrompt, plugin);
|
|
6099
6167
|
if (Object.keys(additionalVars).length > 0) testCase.vars = {
|
|
6100
6168
|
...testCase.vars,
|
|
6101
6169
|
...additionalVars
|
|
6102
6170
|
};
|
|
6171
|
+
if (additionalMetadata) testCase.metadata = {
|
|
6172
|
+
...testCase.metadata,
|
|
6173
|
+
inputMaterialization: additionalMetadata
|
|
6174
|
+
};
|
|
6103
6175
|
return testCase;
|
|
6104
|
-
});
|
|
6176
|
+
}));
|
|
6105
6177
|
}
|
|
6106
6178
|
//#endregion
|
|
6107
6179
|
//#region src/redteam/plugins/teenSafety/graderExamples.ts
|
|
@@ -6419,7 +6491,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
|
|
|
6419
6491
|
function computeModifiersFromConfig(config) {
|
|
6420
6492
|
const modifiers = { ...config?.modifiers };
|
|
6421
6493
|
if (config?.language && typeof config.language === "string") modifiers.language = config.language;
|
|
6422
|
-
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
6494
|
+
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
6423
6495
|
const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
6424
6496
|
if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
|
|
6425
6497
|
return modifiers;
|
|
@@ -6585,6 +6657,31 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
6585
6657
|
return [];
|
|
6586
6658
|
}
|
|
6587
6659
|
}
|
|
6660
|
+
async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
|
|
6661
|
+
const inputs = config.inputs;
|
|
6662
|
+
if (!inputs || Object.keys(inputs).length === 0) return testCases;
|
|
6663
|
+
return Promise.all(testCases.map(async (testCase, materializationIndex) => {
|
|
6664
|
+
const inputVars = extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
|
|
6665
|
+
if (!inputVars) return testCase;
|
|
6666
|
+
const materializedVars = await materializeInputVariablesWithMetadata(inputVars, inputs, {
|
|
6667
|
+
materializationIndex,
|
|
6668
|
+
pluginId,
|
|
6669
|
+
provider,
|
|
6670
|
+
purpose
|
|
6671
|
+
});
|
|
6672
|
+
return {
|
|
6673
|
+
...testCase,
|
|
6674
|
+
vars: {
|
|
6675
|
+
...testCase.vars || {},
|
|
6676
|
+
...materializedVars.vars
|
|
6677
|
+
},
|
|
6678
|
+
metadata: {
|
|
6679
|
+
...testCase.metadata || {},
|
|
6680
|
+
...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
|
|
6681
|
+
}
|
|
6682
|
+
};
|
|
6683
|
+
}));
|
|
6684
|
+
}
|
|
6588
6685
|
function createPluginFactory(PluginClass, key, validate) {
|
|
6589
6686
|
return {
|
|
6590
6687
|
key,
|
|
@@ -6595,13 +6692,21 @@ function createPluginFactory(PluginClass, key, validate) {
|
|
|
6595
6692
|
logger.debug(`Using local redteam generation for ${key}`);
|
|
6596
6693
|
return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
|
|
6597
6694
|
}
|
|
6598
|
-
const
|
|
6695
|
+
const pluginId = getShortPluginId(key);
|
|
6696
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6697
|
+
config: configWithDefaults ?? {},
|
|
6698
|
+
injectVar,
|
|
6699
|
+
pluginId,
|
|
6700
|
+
provider,
|
|
6701
|
+
purpose,
|
|
6702
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
6703
|
+
});
|
|
6599
6704
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
6600
6705
|
return testCases.map((testCase) => ({
|
|
6601
6706
|
...testCase,
|
|
6602
6707
|
metadata: {
|
|
6603
6708
|
...testCase.metadata,
|
|
6604
|
-
pluginId
|
|
6709
|
+
pluginId,
|
|
6605
6710
|
pluginConfig: {
|
|
6606
6711
|
...configWithDefaults,
|
|
6607
6712
|
modifiers: computedModifiers
|
|
@@ -6658,7 +6763,7 @@ const pluginFactories = [
|
|
|
6658
6763
|
key: category,
|
|
6659
6764
|
action: async (params) => {
|
|
6660
6765
|
if (neverGenerateRemote()) {
|
|
6661
|
-
logger.error(`${category} plugin
|
|
6766
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
6662
6767
|
return [];
|
|
6663
6768
|
}
|
|
6664
6769
|
const testCases = await getHarmfulTests(params, category);
|
|
@@ -6681,13 +6786,21 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
|
|
|
6681
6786
|
key: category,
|
|
6682
6787
|
action: async (params) => {
|
|
6683
6788
|
if (shouldGenerateRemote()) {
|
|
6684
|
-
const
|
|
6789
|
+
const pluginId = getShortPluginId(category);
|
|
6790
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6791
|
+
config: params.config ?? {},
|
|
6792
|
+
injectVar: params.injectVar,
|
|
6793
|
+
pluginId,
|
|
6794
|
+
provider: params.provider,
|
|
6795
|
+
purpose: params.purpose,
|
|
6796
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
6797
|
+
});
|
|
6685
6798
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
6686
6799
|
return testCases.map((testCase) => ({
|
|
6687
6800
|
...testCase,
|
|
6688
6801
|
metadata: {
|
|
6689
6802
|
...testCase.metadata,
|
|
6690
|
-
pluginId
|
|
6803
|
+
pluginId,
|
|
6691
6804
|
pluginConfig: {
|
|
6692
6805
|
...params.config,
|
|
6693
6806
|
modifiers: computedModifiers
|
|
@@ -6709,16 +6822,24 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
|
|
|
6709
6822
|
key: category,
|
|
6710
6823
|
action: async (params) => {
|
|
6711
6824
|
if (neverGenerateRemote()) {
|
|
6712
|
-
logger.error(`${category} plugin
|
|
6825
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
6713
6826
|
return [];
|
|
6714
6827
|
}
|
|
6715
|
-
const
|
|
6828
|
+
const pluginId = getShortPluginId(category);
|
|
6829
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6830
|
+
config: params.config ?? {},
|
|
6831
|
+
injectVar: params.injectVar,
|
|
6832
|
+
pluginId,
|
|
6833
|
+
provider: params.provider,
|
|
6834
|
+
purpose: params.purpose,
|
|
6835
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
6836
|
+
});
|
|
6716
6837
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
6717
6838
|
return testCases.map((testCase) => ({
|
|
6718
6839
|
...testCase,
|
|
6719
6840
|
metadata: {
|
|
6720
6841
|
...testCase.metadata,
|
|
6721
|
-
pluginId
|
|
6842
|
+
pluginId,
|
|
6722
6843
|
pluginConfig: {
|
|
6723
6844
|
...params.config,
|
|
6724
6845
|
modifiers: computedModifiers
|
|
@@ -6731,19 +6852,27 @@ function createRemotePlugin(key, validate) {
|
|
|
6731
6852
|
return {
|
|
6732
6853
|
key,
|
|
6733
6854
|
validate,
|
|
6734
|
-
action: async ({ purpose, injectVar, n, config }) => {
|
|
6855
|
+
action: async ({ provider, purpose, injectVar, n, config }) => {
|
|
6735
6856
|
const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
|
|
6736
6857
|
if (neverGenerateRemote()) {
|
|
6737
|
-
logger.error(`${key} plugin
|
|
6858
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
|
|
6738
6859
|
return [];
|
|
6739
6860
|
}
|
|
6740
|
-
const
|
|
6861
|
+
const pluginId = getShortPluginId(key);
|
|
6862
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6863
|
+
config: configWithDefaults ?? {},
|
|
6864
|
+
injectVar,
|
|
6865
|
+
pluginId,
|
|
6866
|
+
provider,
|
|
6867
|
+
purpose,
|
|
6868
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
6869
|
+
});
|
|
6741
6870
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
6742
6871
|
const testsWithMetadata = testCases.map((testCase) => ({
|
|
6743
6872
|
...testCase,
|
|
6744
6873
|
metadata: {
|
|
6745
6874
|
...testCase.metadata,
|
|
6746
|
-
pluginId
|
|
6875
|
+
pluginId,
|
|
6747
6876
|
pluginConfig: {
|
|
6748
6877
|
...configWithDefaults,
|
|
6749
6878
|
modifiers: computedModifiers
|
|
@@ -6813,6 +6942,37 @@ function getPolicyText(metadata) {
|
|
|
6813
6942
|
return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
|
|
6814
6943
|
}
|
|
6815
6944
|
}
|
|
6945
|
+
async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
|
|
6946
|
+
const inputs = testCase.metadata?.pluginConfig?.inputs;
|
|
6947
|
+
const inputMaterialization = testCase.metadata?.inputMaterialization;
|
|
6948
|
+
if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
|
|
6949
|
+
inputMaterialization,
|
|
6950
|
+
vars: testCase.vars
|
|
6951
|
+
};
|
|
6952
|
+
try {
|
|
6953
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
|
|
6954
|
+
materializationIndex,
|
|
6955
|
+
pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
|
|
6956
|
+
provider,
|
|
6957
|
+
purpose
|
|
6958
|
+
});
|
|
6959
|
+
return {
|
|
6960
|
+
inputMaterialization: materializedVars.metadata ? {
|
|
6961
|
+
...inputMaterialization,
|
|
6962
|
+
...materializedVars.metadata
|
|
6963
|
+
} : inputMaterialization,
|
|
6964
|
+
vars: {
|
|
6965
|
+
...testCase.vars,
|
|
6966
|
+
...materializedVars.vars
|
|
6967
|
+
}
|
|
6968
|
+
};
|
|
6969
|
+
} catch {
|
|
6970
|
+
return {
|
|
6971
|
+
inputMaterialization,
|
|
6972
|
+
vars: testCase.vars
|
|
6973
|
+
};
|
|
6974
|
+
}
|
|
6975
|
+
}
|
|
6816
6976
|
/**
|
|
6817
6977
|
* Gets the severity level for a plugin based on its ID and configuration.
|
|
6818
6978
|
* @param pluginId - The ID of the plugin.
|
|
@@ -6954,6 +7114,7 @@ const categories = {
|
|
|
6954
7114
|
foundation: FOUNDATION_PLUGINS,
|
|
6955
7115
|
harmful: Object.keys(HARM_PLUGINS),
|
|
6956
7116
|
"coding-agent:core": CODING_AGENT_CORE_PLUGINS,
|
|
7117
|
+
"coding-agent:all": CODING_AGENT_PLUGINS,
|
|
6957
7118
|
bias: BIAS_PLUGINS,
|
|
6958
7119
|
pii: PII_PLUGINS,
|
|
6959
7120
|
medical: MEDICAL_PLUGINS,
|
|
@@ -7048,7 +7209,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
7048
7209
|
* @param injectVar - The variable to inject.
|
|
7049
7210
|
* @returns An array of new test cases generated by strategies.
|
|
7050
7211
|
*/
|
|
7051
|
-
async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
7212
|
+
async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
7052
7213
|
const newTestCases = [];
|
|
7053
7214
|
const strategyResults = {};
|
|
7054
7215
|
for (const strategy of strategies) {
|
|
@@ -7104,14 +7265,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
7104
7265
|
}
|
|
7105
7266
|
}
|
|
7106
7267
|
resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
|
|
7107
|
-
newTestCases.push(...resultTestCases.map((t) => {
|
|
7108
|
-
const
|
|
7109
|
-
let updatedVars = t.vars;
|
|
7110
|
-
if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
|
|
7111
|
-
const parsed = JSON.parse(String(t.vars[injectVar]));
|
|
7112
|
-
updatedVars = { ...t.vars };
|
|
7113
|
-
Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
|
|
7114
|
-
} catch {}
|
|
7268
|
+
newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
|
|
7269
|
+
const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
|
|
7115
7270
|
const strategyConfig = {
|
|
7116
7271
|
...strategy.config || {},
|
|
7117
7272
|
...maxCharsPerMessage ? { maxCharsPerMessage } : {},
|
|
@@ -7119,16 +7274,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
7119
7274
|
};
|
|
7120
7275
|
return {
|
|
7121
7276
|
...t,
|
|
7122
|
-
vars
|
|
7277
|
+
vars,
|
|
7123
7278
|
metadata: {
|
|
7124
7279
|
...t?.metadata || {},
|
|
7125
7280
|
...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
|
|
7126
7281
|
...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
|
|
7127
7282
|
...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
|
|
7283
|
+
...inputMaterialization && { inputMaterialization },
|
|
7128
7284
|
...Object.keys(strategyConfig).length > 0 && { strategyConfig }
|
|
7129
7285
|
}
|
|
7130
7286
|
};
|
|
7131
|
-
}));
|
|
7287
|
+
})));
|
|
7132
7288
|
const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
|
|
7133
7289
|
const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
|
|
7134
7290
|
const applyNumTestsCap = (calculatedRequested) => {
|
|
@@ -7583,7 +7739,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7583
7739
|
targetIds,
|
|
7584
7740
|
...retryStrategy.config
|
|
7585
7741
|
};
|
|
7586
|
-
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
|
|
7742
|
+
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
|
|
7587
7743
|
pluginTestCases.push(...retryTestCases);
|
|
7588
7744
|
Object.assign(strategyResults, retryResults);
|
|
7589
7745
|
if (showProgressBar) progressBar?.increment(retryTestCases.length);
|
|
@@ -7591,7 +7747,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7591
7747
|
checkAbort();
|
|
7592
7748
|
const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
|
|
7593
7749
|
if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
|
|
7594
|
-
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
7750
|
+
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
7595
7751
|
Object.assign(strategyResults, otherStrategyResults);
|
|
7596
7752
|
if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
|
|
7597
7753
|
const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
|
|
@@ -8346,6 +8502,12 @@ async function doRedteamRun(options) {
|
|
|
8346
8502
|
return evalResult;
|
|
8347
8503
|
}
|
|
8348
8504
|
//#endregion
|
|
8505
|
+
//#region src/types/transform.ts
|
|
8506
|
+
/** Runtime type guard for `TransformFunction` values. */
|
|
8507
|
+
function isTransformFunction(value) {
|
|
8508
|
+
return typeof value === "function";
|
|
8509
|
+
}
|
|
8510
|
+
//#endregion
|
|
8349
8511
|
//#region src/index.ts
|
|
8350
8512
|
/**
|
|
8351
8513
|
* Shallow-clone a test case so the caller can swap in resolved ApiProvider
|
|
@@ -8367,66 +8529,148 @@ function cloneTestForResolve(test) {
|
|
|
8367
8529
|
if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
|
|
8368
8530
|
return cloned;
|
|
8369
8531
|
}
|
|
8532
|
+
function toSerializableProviderRef(provider) {
|
|
8533
|
+
if (isApiProvider(provider)) return sanitizeProvider(provider);
|
|
8534
|
+
if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
|
|
8535
|
+
return provider;
|
|
8536
|
+
}
|
|
8537
|
+
function isRecord(value) {
|
|
8538
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
8539
|
+
}
|
|
8540
|
+
function withSerializableProvider(record) {
|
|
8541
|
+
if (!isApiProvider(record.provider)) return record;
|
|
8542
|
+
return {
|
|
8543
|
+
...record,
|
|
8544
|
+
provider: sanitizeProvider(record.provider)
|
|
8545
|
+
};
|
|
8546
|
+
}
|
|
8547
|
+
/**
|
|
8548
|
+
* Function-valued transforms are first-class at runtime but are silently dropped
|
|
8549
|
+
* by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
|
|
8550
|
+
* a function reference, so replace every `transform`-like field with a
|
|
8551
|
+
* `[inline function]: name` marker. Non-function values pass through unchanged.
|
|
8552
|
+
*
|
|
8553
|
+
* `droppedRef.value` is flipped to `true` the first time a function is replaced
|
|
8554
|
+
* so the caller can emit a single warning instead of logging per field.
|
|
8555
|
+
*/
|
|
8556
|
+
function replaceFunctionTransforms(record, droppedRef) {
|
|
8557
|
+
let result;
|
|
8558
|
+
for (const key of TRANSFORM_KEYS) {
|
|
8559
|
+
const value = record[key];
|
|
8560
|
+
if (!isTransformFunction(value)) continue;
|
|
8561
|
+
if (!result) result = { ...record };
|
|
8562
|
+
result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
|
|
8563
|
+
droppedRef.value = true;
|
|
8564
|
+
}
|
|
8565
|
+
return result ?? record;
|
|
8566
|
+
}
|
|
8567
|
+
function toSerializableAssertion(assertion, droppedRef) {
|
|
8568
|
+
if (!isRecord(assertion)) return assertion;
|
|
8569
|
+
let sanitizedAssertion = withSerializableProvider(assertion);
|
|
8570
|
+
sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
|
|
8571
|
+
if (Array.isArray(assertion.assert)) sanitizedAssertion = {
|
|
8572
|
+
...sanitizedAssertion,
|
|
8573
|
+
assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
8574
|
+
};
|
|
8575
|
+
return sanitizedAssertion;
|
|
8576
|
+
}
|
|
8577
|
+
function toSerializableTestCase(test, droppedRef) {
|
|
8578
|
+
if (!isRecord(test)) return test;
|
|
8579
|
+
let sanitizedTest = withSerializableProvider(test);
|
|
8580
|
+
if (isRecord(test.options)) {
|
|
8581
|
+
let options = withSerializableProvider(test.options);
|
|
8582
|
+
options = replaceFunctionTransforms(options, droppedRef);
|
|
8583
|
+
if (options !== test.options) sanitizedTest = {
|
|
8584
|
+
...sanitizedTest,
|
|
8585
|
+
options
|
|
8586
|
+
};
|
|
8587
|
+
}
|
|
8588
|
+
if (Array.isArray(test.assert)) sanitizedTest = {
|
|
8589
|
+
...sanitizedTest,
|
|
8590
|
+
assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
8591
|
+
};
|
|
8592
|
+
return sanitizedTest;
|
|
8593
|
+
}
|
|
8594
|
+
function toSerializableScenario(scenario, droppedRef) {
|
|
8595
|
+
if (!isRecord(scenario)) return scenario;
|
|
8596
|
+
if (!Array.isArray(scenario.tests)) return scenario;
|
|
8597
|
+
return {
|
|
8598
|
+
...scenario,
|
|
8599
|
+
tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
|
|
8600
|
+
};
|
|
8601
|
+
}
|
|
8602
|
+
function createSerializableUnifiedConfig(testSuite, prompts) {
|
|
8603
|
+
const droppedRef = { value: false };
|
|
8604
|
+
const config = {
|
|
8605
|
+
...testSuite,
|
|
8606
|
+
providers: toSerializableProviderRef(testSuite.providers),
|
|
8607
|
+
defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
|
|
8608
|
+
tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
|
|
8609
|
+
scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
|
|
8610
|
+
prompts
|
|
8611
|
+
};
|
|
8612
|
+
if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
|
|
8613
|
+
return config;
|
|
8614
|
+
}
|
|
8370
8615
|
async function evaluate(testSuite, options = {}) {
|
|
8371
|
-
|
|
8372
|
-
|
|
8616
|
+
const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
|
|
8617
|
+
if (testSuiteConfig.writeLatestResults) await runDbMigrations();
|
|
8618
|
+
const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
|
|
8373
8619
|
const providerMap = {};
|
|
8374
8620
|
for (const p of loadedProviders) {
|
|
8375
8621
|
providerMap[p.id()] = p;
|
|
8376
8622
|
if (p.label) providerMap[p.label] = p;
|
|
8377
8623
|
}
|
|
8378
|
-
let resolvedDefaultTest =
|
|
8379
|
-
if (typeof
|
|
8624
|
+
let resolvedDefaultTest = testSuiteConfig.defaultTest;
|
|
8625
|
+
if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
|
|
8380
8626
|
const constructedTestSuite = {
|
|
8381
|
-
...
|
|
8627
|
+
...testSuiteConfig,
|
|
8382
8628
|
defaultTest: resolvedDefaultTest,
|
|
8383
|
-
scenarios:
|
|
8629
|
+
scenarios: testSuiteConfig.scenarios,
|
|
8384
8630
|
providers: loadedProviders,
|
|
8385
|
-
tests: await readTests(
|
|
8386
|
-
nunjucksFilters: await readFilters(
|
|
8387
|
-
prompts: await processPrompts(
|
|
8631
|
+
tests: await readTests(testSuiteConfig.tests),
|
|
8632
|
+
nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
|
|
8633
|
+
prompts: await processPrompts(testSuiteConfig.prompts)
|
|
8388
8634
|
};
|
|
8389
8635
|
if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
|
|
8390
8636
|
constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
|
|
8391
8637
|
if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
|
|
8392
|
-
env:
|
|
8638
|
+
env: testSuiteConfig.env,
|
|
8393
8639
|
basePath: state.basePath
|
|
8394
8640
|
});
|
|
8395
8641
|
if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
|
|
8396
|
-
env:
|
|
8642
|
+
env: testSuiteConfig.env,
|
|
8397
8643
|
basePath: state.basePath
|
|
8398
8644
|
});
|
|
8399
8645
|
}
|
|
8400
8646
|
constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
|
|
8401
8647
|
for (const test of constructedTestSuite.tests) {
|
|
8402
8648
|
if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
|
|
8403
|
-
env:
|
|
8649
|
+
env: testSuiteConfig.env,
|
|
8404
8650
|
basePath: state.basePath
|
|
8405
8651
|
});
|
|
8406
8652
|
for (const assertion of test.assert || []) {
|
|
8407
8653
|
if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
|
|
8408
8654
|
if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
|
|
8409
|
-
env:
|
|
8655
|
+
env: testSuiteConfig.env,
|
|
8410
8656
|
basePath: state.basePath
|
|
8411
8657
|
});
|
|
8412
8658
|
}
|
|
8413
8659
|
}
|
|
8414
8660
|
if (options.cache === false) disableCache();
|
|
8415
|
-
const parsedProviderPromptMap = readProviderPromptMap(
|
|
8416
|
-
const unifiedConfig =
|
|
8417
|
-
|
|
8418
|
-
|
|
8419
|
-
};
|
|
8420
|
-
const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
|
|
8661
|
+
const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
|
|
8662
|
+
const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
|
|
8663
|
+
const author = getAuthor(suiteAuthor);
|
|
8664
|
+
const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
|
|
8421
8665
|
const ret = await evaluate$1({
|
|
8422
8666
|
...constructedTestSuite,
|
|
8423
8667
|
providerPromptMap: parsedProviderPromptMap
|
|
8424
8668
|
}, evalRecord, {
|
|
8425
8669
|
eventSource: "library",
|
|
8426
|
-
isRedteam: Boolean(
|
|
8670
|
+
isRedteam: Boolean(testSuiteConfig.redteam),
|
|
8427
8671
|
...options
|
|
8428
8672
|
});
|
|
8429
|
-
if (
|
|
8673
|
+
if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
|
|
8430
8674
|
const shareableUrl = await createShareableUrl(ret, { silent: true });
|
|
8431
8675
|
if (shareableUrl) {
|
|
8432
8676
|
ret.shareableUrl = shareableUrl;
|
|
@@ -8437,9 +8681,9 @@ async function evaluate(testSuite, options = {}) {
|
|
|
8437
8681
|
logger.warn(`Failed to create shareable URL: ${error}`);
|
|
8438
8682
|
}
|
|
8439
8683
|
else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
|
|
8440
|
-
if (
|
|
8441
|
-
if (typeof
|
|
8442
|
-
else if (Array.isArray(
|
|
8684
|
+
if (testSuiteConfig.outputPath) {
|
|
8685
|
+
if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
|
|
8686
|
+
else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
|
|
8443
8687
|
}
|
|
8444
8688
|
return ret;
|
|
8445
8689
|
}
|
|
@@ -8668,6 +8912,58 @@ function setDownloadHeaders(res, fileName, contentType) {
|
|
|
8668
8912
|
//#region src/server/routes/eval.ts
|
|
8669
8913
|
const evalRouter = Router();
|
|
8670
8914
|
const evalJobs = /* @__PURE__ */ new Map();
|
|
8915
|
+
function sendEvalTableResponse(res, evalId, responsePayload) {
|
|
8916
|
+
try {
|
|
8917
|
+
res.json(responsePayload);
|
|
8918
|
+
} catch (error) {
|
|
8919
|
+
if (!(error instanceof RangeError)) throw error;
|
|
8920
|
+
logger.warn("[GET /:id/table] Response too large, stripping per-cell prompts by size", { evalId });
|
|
8921
|
+
const promptLocations = getEvalTableOutputPromptLocationsBySize(responsePayload);
|
|
8922
|
+
if (promptLocations.length === 0) {
|
|
8923
|
+
logger.error("[GET /:id/table] Response too large and has no prompts to strip", { evalId });
|
|
8924
|
+
res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
|
|
8925
|
+
return;
|
|
8926
|
+
}
|
|
8927
|
+
const tryStringifyWithStrippedPrompts = (promptCountToStrip) => {
|
|
8928
|
+
const responseWithoutPrompts = getEvalTablePromptStrippedPayload(responsePayload, promptLocations, promptCountToStrip);
|
|
8929
|
+
try {
|
|
8930
|
+
const responseBody = JSON.stringify(responseWithoutPrompts);
|
|
8931
|
+
invariant(typeof responseBody === "string", "Eval table response must serialize to JSON");
|
|
8932
|
+
return responseBody;
|
|
8933
|
+
} catch (retryError) {
|
|
8934
|
+
if (!(retryError instanceof RangeError)) throw retryError;
|
|
8935
|
+
return null;
|
|
8936
|
+
}
|
|
8937
|
+
};
|
|
8938
|
+
let lowerBound = 0;
|
|
8939
|
+
let upperBound = 1;
|
|
8940
|
+
let responseBody = null;
|
|
8941
|
+
while (upperBound < promptLocations.length) {
|
|
8942
|
+
responseBody = tryStringifyWithStrippedPrompts(upperBound);
|
|
8943
|
+
if (responseBody) break;
|
|
8944
|
+
lowerBound = upperBound;
|
|
8945
|
+
upperBound *= 2;
|
|
8946
|
+
}
|
|
8947
|
+
if (!responseBody) {
|
|
8948
|
+
upperBound = promptLocations.length;
|
|
8949
|
+
responseBody = tryStringifyWithStrippedPrompts(upperBound);
|
|
8950
|
+
}
|
|
8951
|
+
if (responseBody) {
|
|
8952
|
+
while (upperBound - lowerBound > 1) {
|
|
8953
|
+
const midPoint = lowerBound + Math.floor((upperBound - lowerBound) / 2);
|
|
8954
|
+
const midpointResponseBody = tryStringifyWithStrippedPrompts(midPoint);
|
|
8955
|
+
if (midpointResponseBody) {
|
|
8956
|
+
upperBound = midPoint;
|
|
8957
|
+
responseBody = midpointResponseBody;
|
|
8958
|
+
} else lowerBound = midPoint;
|
|
8959
|
+
}
|
|
8960
|
+
res.type("json").send(responseBody);
|
|
8961
|
+
return;
|
|
8962
|
+
}
|
|
8963
|
+
logger.error("[GET /:id/table] Response still too large after stripping prompts", { evalId });
|
|
8964
|
+
res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
|
|
8965
|
+
}
|
|
8966
|
+
}
|
|
8671
8967
|
evalRouter.post("/job", (req, res) => {
|
|
8672
8968
|
const result = EvalSchemas.CreateJob.Request.safeParse(req.body);
|
|
8673
8969
|
if (!result.success) {
|
|
@@ -8708,9 +9004,10 @@ evalRouter.post("/job", (req, res) => {
|
|
|
8708
9004
|
job.evalId = evalResult.id;
|
|
8709
9005
|
console.log(`[${id}] Complete`);
|
|
8710
9006
|
}).catch((error) => {
|
|
8711
|
-
logger.error(
|
|
8712
|
-
|
|
8713
|
-
|
|
9007
|
+
logger.error("Failed to eval tests", {
|
|
9008
|
+
error,
|
|
9009
|
+
body: sanitizeObject(testSuite, { context: "request body" })
|
|
9010
|
+
});
|
|
8714
9011
|
const job = evalJobs.get(id);
|
|
8715
9012
|
invariant(job, "Job not found");
|
|
8716
9013
|
job.status = "error";
|
|
@@ -8907,7 +9204,7 @@ evalRouter.get("/:id/table", async (req, res) => {
|
|
|
8907
9204
|
evalId: id
|
|
8908
9205
|
});
|
|
8909
9206
|
}
|
|
8910
|
-
res
|
|
9207
|
+
sendEvalTableResponse(res, id, {
|
|
8911
9208
|
table: returnTable,
|
|
8912
9209
|
totalCount: table.totalCount,
|
|
8913
9210
|
filteredCount: table.filteredCount,
|
|
@@ -9073,43 +9370,53 @@ evalRouter.post("/:evalId/results/:id/rating", async (req, res) => {
|
|
|
9073
9370
|
res.status(400).json({ error: z.prettifyError(bodyResult.error) });
|
|
9074
9371
|
return;
|
|
9075
9372
|
}
|
|
9076
|
-
|
|
9077
|
-
|
|
9078
|
-
|
|
9079
|
-
|
|
9080
|
-
|
|
9081
|
-
|
|
9082
|
-
|
|
9083
|
-
|
|
9084
|
-
|
|
9085
|
-
|
|
9086
|
-
|
|
9087
|
-
|
|
9088
|
-
|
|
9089
|
-
|
|
9090
|
-
|
|
9091
|
-
|
|
9092
|
-
|
|
9093
|
-
|
|
9373
|
+
try {
|
|
9374
|
+
const { evalId, id } = paramsResult.data;
|
|
9375
|
+
const gradingResult = bodyResult.data;
|
|
9376
|
+
const result = await EvalResult.findById(id);
|
|
9377
|
+
if (!result || result.evalId !== evalId) {
|
|
9378
|
+
res.status(404).json({ error: "Result not found" });
|
|
9379
|
+
return;
|
|
9380
|
+
}
|
|
9381
|
+
const eval_ = await Eval.findById(evalId);
|
|
9382
|
+
if (!eval_) {
|
|
9383
|
+
res.status(404).json({ error: "Eval not found" });
|
|
9384
|
+
return;
|
|
9385
|
+
}
|
|
9386
|
+
const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
|
|
9387
|
+
const successChanged = result.success !== gradingResult.pass;
|
|
9388
|
+
const scoreChange = gradingResult.score - result.score;
|
|
9389
|
+
result.gradingResult = gradingResult;
|
|
9390
|
+
result.success = gradingResult.pass;
|
|
9391
|
+
result.score = gradingResult.score;
|
|
9392
|
+
const prompt = eval_.prompts[result.promptIdx];
|
|
9393
|
+
invariant(prompt, "Prompt not found");
|
|
9394
|
+
if (!prompt.metrics) {
|
|
9395
|
+
logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
|
|
9396
|
+
res.status(400).json({ error: "Prompt metrics not found" });
|
|
9397
|
+
return;
|
|
9398
|
+
}
|
|
9399
|
+
if (successChanged) if (result.success) {
|
|
9400
|
+
prompt.metrics.testPassCount += 1;
|
|
9401
|
+
prompt.metrics.testFailCount -= 1;
|
|
9402
|
+
prompt.metrics.assertPassCount += 1;
|
|
9403
|
+
prompt.metrics.score += scoreChange;
|
|
9404
|
+
if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
|
|
9405
|
+
} else {
|
|
9406
|
+
prompt.metrics.testPassCount -= 1;
|
|
9407
|
+
prompt.metrics.testFailCount += 1;
|
|
9408
|
+
prompt.metrics.assertFailCount += 1;
|
|
9409
|
+
prompt.metrics.score += scoreChange;
|
|
9410
|
+
if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
|
|
9411
|
+
}
|
|
9412
|
+
else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
|
|
9413
|
+
else prompt.metrics.assertFailCount += 1;
|
|
9414
|
+
await eval_.save();
|
|
9415
|
+
await result.save();
|
|
9416
|
+
res.json(result);
|
|
9417
|
+
} catch (error) {
|
|
9418
|
+
sendError(res, 500, "Failed to submit rating", error);
|
|
9094
9419
|
}
|
|
9095
|
-
if (successChanged) if (result.success) {
|
|
9096
|
-
prompt.metrics.testPassCount += 1;
|
|
9097
|
-
prompt.metrics.testFailCount -= 1;
|
|
9098
|
-
prompt.metrics.assertPassCount += 1;
|
|
9099
|
-
prompt.metrics.score += scoreChange;
|
|
9100
|
-
if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
|
|
9101
|
-
} else {
|
|
9102
|
-
prompt.metrics.testPassCount -= 1;
|
|
9103
|
-
prompt.metrics.testFailCount += 1;
|
|
9104
|
-
prompt.metrics.assertFailCount += 1;
|
|
9105
|
-
prompt.metrics.score += scoreChange;
|
|
9106
|
-
if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
|
|
9107
|
-
}
|
|
9108
|
-
else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
|
|
9109
|
-
else prompt.metrics.assertFailCount += 1;
|
|
9110
|
-
await eval_.save();
|
|
9111
|
-
await result.save();
|
|
9112
|
-
res.json(result);
|
|
9113
9420
|
});
|
|
9114
9421
|
evalRouter.post("/", async (req, res) => {
|
|
9115
9422
|
const bodyResult = EvalSchemas.Save.Request.safeParse(req.body);
|
|
@@ -9143,9 +9450,10 @@ evalRouter.post("/", async (req, res) => {
|
|
|
9143
9450
|
res.json(EvalSchemas.Save.Response.parse({ id: eval_.id }));
|
|
9144
9451
|
}
|
|
9145
9452
|
} catch (error) {
|
|
9146
|
-
logger.error(
|
|
9147
|
-
|
|
9148
|
-
|
|
9453
|
+
logger.error("Failed to write eval to database", {
|
|
9454
|
+
error,
|
|
9455
|
+
body: sanitizeObject(body, { context: "request body" })
|
|
9456
|
+
});
|
|
9149
9457
|
res.status(500).json({ error: "Failed to write eval to database" });
|
|
9150
9458
|
}
|
|
9151
9459
|
});
|
|
@@ -9363,7 +9671,7 @@ async function checkForUpdates() {
|
|
|
9363
9671
|
} catch {
|
|
9364
9672
|
return false;
|
|
9365
9673
|
}
|
|
9366
|
-
if (semverGt(latestVersion, "0.121.
|
|
9674
|
+
if (semverGt(latestVersion, "0.121.7")) {
|
|
9367
9675
|
const border = "=".repeat(TERMINAL_MAX_WIDTH);
|
|
9368
9676
|
logger.info(`\n${border}
|
|
9369
9677
|
${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
|
|
@@ -9522,6 +9830,9 @@ const ModelAuditCliOptionsSchema = z.object({
|
|
|
9522
9830
|
dryRun: z.boolean().optional(),
|
|
9523
9831
|
cache: z.boolean().optional(),
|
|
9524
9832
|
stream: z.boolean().optional(),
|
|
9833
|
+
scanners: z.array(z.string()).optional(),
|
|
9834
|
+
excludeScanner: z.array(z.string()).optional(),
|
|
9835
|
+
listScanners: z.boolean().optional(),
|
|
9525
9836
|
share: z.boolean().optional(),
|
|
9526
9837
|
noShare: z.boolean().optional()
|
|
9527
9838
|
});
|
|
@@ -9609,6 +9920,18 @@ const CLI_ARG_MAP = {
|
|
|
9609
9920
|
stream: {
|
|
9610
9921
|
flag: "--stream",
|
|
9611
9922
|
type: "boolean"
|
|
9923
|
+
},
|
|
9924
|
+
scanners: {
|
|
9925
|
+
flag: "--scanners",
|
|
9926
|
+
type: "array"
|
|
9927
|
+
},
|
|
9928
|
+
excludeScanner: {
|
|
9929
|
+
flag: "--exclude-scanner",
|
|
9930
|
+
type: "array"
|
|
9931
|
+
},
|
|
9932
|
+
listScanners: {
|
|
9933
|
+
flag: "--list-scanners",
|
|
9934
|
+
type: "boolean"
|
|
9612
9935
|
}
|
|
9613
9936
|
};
|
|
9614
9937
|
/**
|
|
@@ -9777,6 +10100,62 @@ function spawnModelAudit(args, options) {
|
|
|
9777
10100
|
});
|
|
9778
10101
|
});
|
|
9779
10102
|
}
|
|
10103
|
+
function collectRepeatableOption(value, previous = []) {
|
|
10104
|
+
return [...previous, value];
|
|
10105
|
+
}
|
|
10106
|
+
function hasScannerSelectionOptions(options) {
|
|
10107
|
+
return Boolean(options.scanners?.length || options.excludeScanner?.length);
|
|
10108
|
+
}
|
|
10109
|
+
function hasScannerSelectionValue(value) {
|
|
10110
|
+
if (Array.isArray(value)) return value.length > 0;
|
|
10111
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
10112
|
+
}
|
|
10113
|
+
function hasPersistedScannerSelection(metadata) {
|
|
10114
|
+
const options = metadata?.options;
|
|
10115
|
+
if (!options || typeof options !== "object" || Array.isArray(options)) return false;
|
|
10116
|
+
return hasScannerSelectionValue(options.scanners) || hasScannerSelectionValue(options.excludeScanner);
|
|
10117
|
+
}
|
|
10118
|
+
/**
|
|
10119
|
+
* Parse CLI options through Zod, logging validation errors to the CLI.
|
|
10120
|
+
* Returns null when validation fails (and sets process.exitCode to 1).
|
|
10121
|
+
*/
|
|
10122
|
+
function buildCliArgs(paths, cliOptions) {
|
|
10123
|
+
try {
|
|
10124
|
+
return parseModelAuditArgs(paths, cliOptions);
|
|
10125
|
+
} catch (error) {
|
|
10126
|
+
if (error instanceof z.ZodError) {
|
|
10127
|
+
logger.error(`Invalid model audit options provided:\n${z.prettifyError(error)}`);
|
|
10128
|
+
process.exitCode = 1;
|
|
10129
|
+
return null;
|
|
10130
|
+
}
|
|
10131
|
+
throw error;
|
|
10132
|
+
}
|
|
10133
|
+
}
|
|
10134
|
+
/**
|
|
10135
|
+
* Run modelaudit with inherited stdio and propagate its exit code.
|
|
10136
|
+
*
|
|
10137
|
+
* `treatExitOneAsIssues=true` suppresses the error log for exit code 1, which
|
|
10138
|
+
* modelaudit uses to mean "scan completed, issues found" — callers that expect
|
|
10139
|
+
* findings (like the main scan flow) should set this, while list/help flows
|
|
10140
|
+
* (where a non-zero exit is always unexpected) should leave it false.
|
|
10141
|
+
*/
|
|
10142
|
+
async function runPassthroughModelAudit(args, env, treatExitOneAsIssues = false) {
|
|
10143
|
+
try {
|
|
10144
|
+
const spawnResult = await spawnModelAudit(args, {
|
|
10145
|
+
captureOutput: false,
|
|
10146
|
+
env
|
|
10147
|
+
});
|
|
10148
|
+
const isIssuesExit = treatExitOneAsIssues && spawnResult.code === 1;
|
|
10149
|
+
if (spawnResult.code !== null && spawnResult.code !== 0 && !isIssuesExit) logger.error(`Model scan process exited with code ${spawnResult.code}`);
|
|
10150
|
+
process.exitCode = spawnResult.code || 0;
|
|
10151
|
+
} catch (error) {
|
|
10152
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10153
|
+
logger.error(`Failed to start modelaudit: ${message}`);
|
|
10154
|
+
logger.info("Make sure modelaudit is installed and available in your PATH.");
|
|
10155
|
+
logger.info("Install it using: pip install modelaudit");
|
|
10156
|
+
process.exitCode = 1;
|
|
10157
|
+
}
|
|
10158
|
+
}
|
|
9780
10159
|
/**
|
|
9781
10160
|
* Check for existing scan and determine if re-scan is needed.
|
|
9782
10161
|
* Returns the existing audit if found and re-scan should happen.
|
|
@@ -9799,6 +10178,20 @@ async function checkExistingScan(paths, options, currentScannerVersion) {
|
|
|
9799
10178
|
shouldSkip: false,
|
|
9800
10179
|
existingAudit: null
|
|
9801
10180
|
};
|
|
10181
|
+
if (hasScannerSelectionOptions(options)) {
|
|
10182
|
+
logger.debug("Re-scanning with scanner selection options");
|
|
10183
|
+
return {
|
|
10184
|
+
shouldSkip: false,
|
|
10185
|
+
existingAudit: existing
|
|
10186
|
+
};
|
|
10187
|
+
}
|
|
10188
|
+
if (hasPersistedScannerSelection(existing.metadata)) {
|
|
10189
|
+
logger.debug("Re-scanning because cached revision used scanner selection options");
|
|
10190
|
+
return {
|
|
10191
|
+
shouldSkip: false,
|
|
10192
|
+
existingAudit: existing
|
|
10193
|
+
};
|
|
10194
|
+
}
|
|
9802
10195
|
if (options.force) {
|
|
9803
10196
|
logger.debug(`Re-scanning (--force): ${modelId}`);
|
|
9804
10197
|
return {
|
|
@@ -9930,7 +10323,9 @@ async function saveAuditRecord(paths, results, options, currentScannerVersion, e
|
|
|
9930
10323
|
cache: options.cache,
|
|
9931
10324
|
quiet: options.quiet,
|
|
9932
10325
|
progress: options.progress,
|
|
9933
|
-
stream: options.stream
|
|
10326
|
+
stream: options.stream,
|
|
10327
|
+
scanners: options.scanners,
|
|
10328
|
+
excludeScanner: options.excludeScanner
|
|
9934
10329
|
}
|
|
9935
10330
|
};
|
|
9936
10331
|
if (existingAudit) {
|
|
@@ -10062,8 +10457,8 @@ async function processScanResultsFromStdout(spawnResult, paths, options, current
|
|
|
10062
10457
|
return processJsonResults(jsonOutput, spawnResult.code || 0, paths, options, currentScannerVersion, existingAudit);
|
|
10063
10458
|
}
|
|
10064
10459
|
function modelScanCommand(program) {
|
|
10065
|
-
program.command("scan-model").description("Scan model files for security and quality issues").argument("
|
|
10066
|
-
if (!paths || paths.length === 0) {
|
|
10460
|
+
program.command("scan-model").description("Scan model files for security and quality issues").argument("[paths...]", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("--scanners <scanner>", "Only run selected ModelAudit scanners (IDs/classes; comma-separated or repeated)", collectRepeatableOption).option("--exclude-scanner <scanner>", "Exclude a ModelAudit scanner from the active set (comma-separated or repeated)", collectRepeatableOption).option("--list-scanners", "List registered ModelAudit scanners and exit").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
|
|
10461
|
+
if (!options.listScanners && (!paths || paths.length === 0)) {
|
|
10067
10462
|
logger.error("No paths specified. Provide at least one model file or directory to scan.");
|
|
10068
10463
|
process.exitCode = 1;
|
|
10069
10464
|
return;
|
|
@@ -10079,6 +10474,21 @@ function modelScanCommand(program) {
|
|
|
10079
10474
|
}
|
|
10080
10475
|
await checkModelAuditUpdates();
|
|
10081
10476
|
if (currentScannerVersion) logger.debug(`Using modelaudit version: ${currentScannerVersion}`);
|
|
10477
|
+
const delegationEnv = {
|
|
10478
|
+
...process.env,
|
|
10479
|
+
PROMPTFOO_DELEGATED: "true"
|
|
10480
|
+
};
|
|
10481
|
+
if (options.listScanners) {
|
|
10482
|
+
const parsed = buildCliArgs(paths || [], {
|
|
10483
|
+
...options,
|
|
10484
|
+
format: options.format || "text",
|
|
10485
|
+
output: options.output,
|
|
10486
|
+
timeout: void 0
|
|
10487
|
+
});
|
|
10488
|
+
if (!parsed) return;
|
|
10489
|
+
await runPassthroughModelAudit(parsed.args, delegationEnv);
|
|
10490
|
+
return;
|
|
10491
|
+
}
|
|
10082
10492
|
const saveToDatabase = options.write === void 0 || options.write === true;
|
|
10083
10493
|
let existingAuditToUpdate = null;
|
|
10084
10494
|
if (saveToDatabase) {
|
|
@@ -10090,33 +10500,22 @@ function modelScanCommand(program) {
|
|
|
10090
10500
|
existingAuditToUpdate = existingAudit;
|
|
10091
10501
|
}
|
|
10092
10502
|
const outputFormat = saveToDatabase ? "json" : options.format || "text";
|
|
10093
|
-
const
|
|
10503
|
+
const parsed = buildCliArgs(paths, {
|
|
10094
10504
|
...options,
|
|
10095
10505
|
format: outputFormat,
|
|
10096
10506
|
output: options.output && !saveToDatabase ? options.output : void 0,
|
|
10097
10507
|
timeout: options.timeout ? parseInt(options.timeout, 10) : void 0
|
|
10098
|
-
};
|
|
10099
|
-
|
|
10100
|
-
|
|
10101
|
-
|
|
10102
|
-
|
|
10103
|
-
|
|
10104
|
-
|
|
10105
|
-
|
|
10106
|
-
logger.error("Invalid model audit options provided:");
|
|
10107
|
-
for (const err of error.issues) logger.error(` - ${err.path.join(".")}: ${err.message}`);
|
|
10108
|
-
process.exitCode = 1;
|
|
10109
|
-
return;
|
|
10110
|
-
}
|
|
10111
|
-
throw error;
|
|
10508
|
+
});
|
|
10509
|
+
if (!parsed) return;
|
|
10510
|
+
const args = parsed.args;
|
|
10511
|
+
if (parsed.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${parsed.unsupportedOptions.join(", ")}`);
|
|
10512
|
+
if (saveToDatabase || outputFormat === "text") logger.info(`Running model scan on: ${paths.join(", ")}`);
|
|
10513
|
+
if (!saveToDatabase) {
|
|
10514
|
+
await runPassthroughModelAudit(args, delegationEnv, true);
|
|
10515
|
+
return;
|
|
10112
10516
|
}
|
|
10113
|
-
logger.info(`Running model scan on: ${paths.join(", ")}`);
|
|
10114
|
-
const delegationEnv = {
|
|
10115
|
-
...process.env,
|
|
10116
|
-
PROMPTFOO_DELEGATED: "true"
|
|
10117
|
-
};
|
|
10118
10517
|
try {
|
|
10119
|
-
if (
|
|
10518
|
+
if (supportsCliUiWithOutput(currentScannerVersion)) {
|
|
10120
10519
|
const tempOutputPath = createTempOutputPath();
|
|
10121
10520
|
args.push("--output", tempOutputPath);
|
|
10122
10521
|
let cleanedUp = false;
|
|
@@ -10150,14 +10549,6 @@ function modelScanCommand(program) {
|
|
|
10150
10549
|
});
|
|
10151
10550
|
process.exitCode = await processScanResultsFromStdout(spawnResult, paths, options, currentScannerVersion, existingAuditToUpdate);
|
|
10152
10551
|
}
|
|
10153
|
-
else {
|
|
10154
|
-
const spawnResult = await spawnModelAudit(args, {
|
|
10155
|
-
captureOutput: false,
|
|
10156
|
-
env: delegationEnv
|
|
10157
|
-
});
|
|
10158
|
-
if (spawnResult.code !== null && spawnResult.code !== 0 && spawnResult.code !== 1) logger.error(`Model scan process exited with code ${spawnResult.code}`);
|
|
10159
|
-
process.exitCode = spawnResult.code || 0;
|
|
10160
|
-
}
|
|
10161
10552
|
} catch (error) {
|
|
10162
10553
|
const message = error instanceof Error ? error.message : String(error);
|
|
10163
10554
|
logger.error(`Failed to start modelaudit: ${message}`);
|
|
@@ -10174,6 +10565,14 @@ const CheckInstalledResponseSchema = z.object({
|
|
|
10174
10565
|
version: z.string().nullable(),
|
|
10175
10566
|
cwd: z.string()
|
|
10176
10567
|
});
|
|
10568
|
+
const ScannerInfoSchema = z.object({
|
|
10569
|
+
id: z.string(),
|
|
10570
|
+
class: z.string().optional().default(""),
|
|
10571
|
+
description: z.string().optional().default(""),
|
|
10572
|
+
extensions: z.array(z.string()).optional().default([]),
|
|
10573
|
+
dependencies: z.array(z.string()).optional().default([])
|
|
10574
|
+
}).passthrough();
|
|
10575
|
+
const ListScannersResponseSchema = z.object({ scanners: z.array(ScannerInfoSchema) }).passthrough();
|
|
10177
10576
|
const CheckPathRequestSchema = z.object({ path: z.string().trim().min(1, "No path provided") });
|
|
10178
10577
|
const CheckPathResponseSchema = z.union([z.object({
|
|
10179
10578
|
exists: z.literal(false),
|
|
@@ -10205,6 +10604,8 @@ const ScanRequestSchema = z.object({
|
|
|
10205
10604
|
sbom: z.string().optional(),
|
|
10206
10605
|
output: z.string().optional(),
|
|
10207
10606
|
maxSize: z.string().optional(),
|
|
10607
|
+
scanners: z.array(z.string()).optional(),
|
|
10608
|
+
excludeScanner: z.array(z.string()).optional(),
|
|
10208
10609
|
persist: z.boolean().optional(),
|
|
10209
10610
|
name: z.string().optional(),
|
|
10210
10611
|
author: z.string().optional()
|
|
@@ -10215,8 +10616,13 @@ const ListScansQuerySchema = z.object({
|
|
|
10215
10616
|
offset: z.coerce.number().int().min(0).optional().default(0),
|
|
10216
10617
|
sort: z.enum([
|
|
10217
10618
|
"createdAt",
|
|
10619
|
+
"failedChecks",
|
|
10620
|
+
"hasErrors",
|
|
10621
|
+
"id",
|
|
10622
|
+
"modelPath",
|
|
10218
10623
|
"name",
|
|
10219
|
-
"
|
|
10624
|
+
"passedChecks",
|
|
10625
|
+
"totalChecks"
|
|
10220
10626
|
]).optional().default("createdAt"),
|
|
10221
10627
|
order: z.enum(["asc", "desc"]).optional().default("desc"),
|
|
10222
10628
|
search: z.string().optional()
|
|
@@ -10255,6 +10661,7 @@ const DeleteScanResponseSchema = z.object({
|
|
|
10255
10661
|
});
|
|
10256
10662
|
const ModelAuditSchemas = {
|
|
10257
10663
|
CheckInstalled: { Response: CheckInstalledResponseSchema },
|
|
10664
|
+
ListScanners: { Response: ListScannersResponseSchema },
|
|
10258
10665
|
CheckPath: {
|
|
10259
10666
|
Request: CheckPathRequestSchema,
|
|
10260
10667
|
Response: CheckPathResponseSchema
|
|
@@ -10277,6 +10684,47 @@ const ModelAuditSchemas = {
|
|
|
10277
10684
|
//#endregion
|
|
10278
10685
|
//#region src/server/routes/modelAudit.ts
|
|
10279
10686
|
const modelAuditRouter = Router();
|
|
10687
|
+
const LIST_SCANNERS_ARGS = parseModelAuditArgs([], {
|
|
10688
|
+
listScanners: true,
|
|
10689
|
+
format: "json"
|
|
10690
|
+
}).args;
|
|
10691
|
+
function getModelAuditDelegationEnv() {
|
|
10692
|
+
return {
|
|
10693
|
+
...process.env,
|
|
10694
|
+
PROMPTFOO_DELEGATED: "true"
|
|
10695
|
+
};
|
|
10696
|
+
}
|
|
10697
|
+
function spawnModelAuditCapture(args, options = {}) {
|
|
10698
|
+
return new Promise((resolve, reject) => {
|
|
10699
|
+
const child = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
|
|
10700
|
+
let stdout = "";
|
|
10701
|
+
let stderr = "";
|
|
10702
|
+
const onAbort = () => {
|
|
10703
|
+
if (!child.killed) child.kill("SIGTERM");
|
|
10704
|
+
};
|
|
10705
|
+
if (options.signal?.aborted) onAbort();
|
|
10706
|
+
else options.signal?.addEventListener("abort", onAbort, { once: true });
|
|
10707
|
+
const cleanupAbort = () => options.signal?.removeEventListener("abort", onAbort);
|
|
10708
|
+
child.stdout?.on("data", (data) => {
|
|
10709
|
+
stdout += data.toString();
|
|
10710
|
+
});
|
|
10711
|
+
child.stderr?.on("data", (data) => {
|
|
10712
|
+
stderr += data.toString();
|
|
10713
|
+
});
|
|
10714
|
+
child.on("error", (error) => {
|
|
10715
|
+
cleanupAbort();
|
|
10716
|
+
reject(error);
|
|
10717
|
+
});
|
|
10718
|
+
child.on("close", (code) => {
|
|
10719
|
+
cleanupAbort();
|
|
10720
|
+
resolve({
|
|
10721
|
+
code,
|
|
10722
|
+
stdout,
|
|
10723
|
+
stderr
|
|
10724
|
+
});
|
|
10725
|
+
});
|
|
10726
|
+
});
|
|
10727
|
+
}
|
|
10280
10728
|
modelAuditRouter.get("/check-installed", async (_req, res) => {
|
|
10281
10729
|
try {
|
|
10282
10730
|
const { installed, version } = await checkModelAuditInstalled();
|
|
@@ -10293,6 +10741,34 @@ modelAuditRouter.get("/check-installed", async (_req, res) => {
|
|
|
10293
10741
|
}));
|
|
10294
10742
|
}
|
|
10295
10743
|
});
|
|
10744
|
+
modelAuditRouter.get("/scanners", async (req, res) => {
|
|
10745
|
+
const abortController = new AbortController();
|
|
10746
|
+
const onClientClose = () => abortController.abort();
|
|
10747
|
+
req.on("close", onClientClose);
|
|
10748
|
+
try {
|
|
10749
|
+
const { installed } = await checkModelAuditInstalled();
|
|
10750
|
+
if (!installed) {
|
|
10751
|
+
res.status(400).json({ error: "ModelAudit is not installed. Please install it using: pip install modelaudit" });
|
|
10752
|
+
return;
|
|
10753
|
+
}
|
|
10754
|
+
const { code, stdout, stderr } = await spawnModelAuditCapture(LIST_SCANNERS_ARGS, { signal: abortController.signal });
|
|
10755
|
+
if (abortController.signal.aborted) return;
|
|
10756
|
+
if (code !== null && code !== 0) {
|
|
10757
|
+
sendError(res, 500, "Failed to list ModelAudit scanners", {
|
|
10758
|
+
code,
|
|
10759
|
+
stderr
|
|
10760
|
+
});
|
|
10761
|
+
return;
|
|
10762
|
+
}
|
|
10763
|
+
const parsedOutput = JSON.parse(stdout);
|
|
10764
|
+
res.json(ModelAuditSchemas.ListScanners.Response.parse(parsedOutput));
|
|
10765
|
+
} catch (error) {
|
|
10766
|
+
if (abortController.signal.aborted) return;
|
|
10767
|
+
sendError(res, 500, "Failed to list ModelAudit scanners", error);
|
|
10768
|
+
} finally {
|
|
10769
|
+
req.removeListener("close", onClientClose);
|
|
10770
|
+
}
|
|
10771
|
+
});
|
|
10296
10772
|
modelAuditRouter.post("/check-path", async (req, res) => {
|
|
10297
10773
|
const bodyResult = ModelAuditSchemas.CheckPath.Request.safeParse(req.body);
|
|
10298
10774
|
if (!bodyResult.success) {
|
|
@@ -10363,11 +10839,12 @@ modelAuditRouter.post("/scan", async (req, res) => {
|
|
|
10363
10839
|
event: "model_scan",
|
|
10364
10840
|
pathCount: paths.length,
|
|
10365
10841
|
hasBlacklist: (options.blacklist?.length ?? 0) > 0,
|
|
10842
|
+
hasScannerSelection: Boolean(options.scanners?.length || options.excludeScanner?.length),
|
|
10366
10843
|
timeout: options.timeout ?? 0,
|
|
10367
10844
|
verbose: options.verbose ?? false,
|
|
10368
10845
|
persist
|
|
10369
10846
|
});
|
|
10370
|
-
const modelAudit = spawn("modelaudit", args);
|
|
10847
|
+
const modelAudit = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
|
|
10371
10848
|
let stdout = "";
|
|
10372
10849
|
let stderr = "";
|
|
10373
10850
|
let responded = false;
|
|
@@ -10553,9 +11030,20 @@ modelAuditRouter.post("/scan", async (req, res) => {
|
|
|
10553
11030
|
options: {
|
|
10554
11031
|
blacklist: options.blacklist,
|
|
10555
11032
|
timeout: options.timeout,
|
|
11033
|
+
maxSize: options.maxSize,
|
|
10556
11034
|
maxFileSize: options.maxFileSize,
|
|
10557
11035
|
maxTotalSize: options.maxTotalSize,
|
|
10558
|
-
verbose: options.verbose
|
|
11036
|
+
verbose: options.verbose,
|
|
11037
|
+
format: options.format,
|
|
11038
|
+
strict: options.strict,
|
|
11039
|
+
dryRun: options.dryRun,
|
|
11040
|
+
cache: options.cache,
|
|
11041
|
+
quiet: options.quiet,
|
|
11042
|
+
progress: options.progress,
|
|
11043
|
+
sbom: options.sbom,
|
|
11044
|
+
output: options.output,
|
|
11045
|
+
scanners: options.scanners,
|
|
11046
|
+
excludeScanner: options.excludeScanner
|
|
10559
11047
|
}
|
|
10560
11048
|
}
|
|
10561
11049
|
})).id;
|
|
@@ -11009,7 +11497,7 @@ function determineEffectiveSessionSource({ provider, sessionConfig }) {
|
|
|
11009
11497
|
async function testProviderConnectivity({ provider, prompt = "Hello World!", inputs }) {
|
|
11010
11498
|
const vars = {};
|
|
11011
11499
|
if (!provider?.config?.sessionParser) vars["sessionId"] = crypto.randomUUID();
|
|
11012
|
-
if (inputs && typeof inputs === "object") for (const [varName,
|
|
11500
|
+
if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) vars[varName] = createPlaceholderInputValue(varName, definition);
|
|
11013
11501
|
const testSuite = {
|
|
11014
11502
|
providers: [provider],
|
|
11015
11503
|
prompts: [{
|
|
@@ -11240,10 +11728,15 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11240
11728
|
sessionConfig
|
|
11241
11729
|
});
|
|
11242
11730
|
const initialSessionId = effectiveSessionSource === "server" ? void 0 : crypto.randomUUID();
|
|
11731
|
+
const materializeSessionPrompt = (prompt) => {
|
|
11732
|
+
if (!mainInputVariable) return prompt;
|
|
11733
|
+
const definition = inputs?.[mainInputVariable];
|
|
11734
|
+
return definition ? createPlaceholderInputValue(mainInputVariable, definition, prompt) : prompt;
|
|
11735
|
+
};
|
|
11243
11736
|
const inputVars = {};
|
|
11244
|
-
if (inputs && typeof inputs === "object") for (const [varName,
|
|
11737
|
+
if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) {
|
|
11245
11738
|
if (varName === mainInputVariable) continue;
|
|
11246
|
-
inputVars[varName] =
|
|
11739
|
+
inputVars[varName] = createPlaceholderInputValue(varName, definition);
|
|
11247
11740
|
}
|
|
11248
11741
|
const firstPrompt = "What can you help me with?";
|
|
11249
11742
|
const secondPrompt = "What was the last thing I asked you?";
|
|
@@ -11256,7 +11749,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11256
11749
|
vars: {
|
|
11257
11750
|
...initialSessionId ? { sessionId: initialSessionId } : {},
|
|
11258
11751
|
...inputVars,
|
|
11259
|
-
...mainInputVariable ? { [mainInputVariable]: firstPrompt } : {}
|
|
11752
|
+
...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(firstPrompt) } : {}
|
|
11260
11753
|
},
|
|
11261
11754
|
prompt: {
|
|
11262
11755
|
raw: firstPrompt,
|
|
@@ -11303,7 +11796,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11303
11796
|
vars: {
|
|
11304
11797
|
...extractedSessionId ? { sessionId: extractedSessionId } : {},
|
|
11305
11798
|
...inputVars,
|
|
11306
|
-
...mainInputVariable ? { [mainInputVariable]: secondPrompt } : {}
|
|
11799
|
+
...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(secondPrompt) } : {}
|
|
11307
11800
|
},
|
|
11308
11801
|
prompt: {
|
|
11309
11802
|
raw: secondPrompt,
|
|
@@ -11828,7 +12321,7 @@ providersRouter.post("/test-session", async (req, res) => {
|
|
|
11828
12321
|
const TestCaseGenerationSchema = z.object({
|
|
11829
12322
|
plugin: z.object({
|
|
11830
12323
|
id: z.string().refine((val) => ALL_PLUGINS.includes(val), { message: `Invalid plugin ID. Must be one of: ${ALL_PLUGINS.join(", ")}` }),
|
|
11831
|
-
config: PluginConfigSchema.optional().prefault({})
|
|
12324
|
+
config: PluginConfigSchema.catchall(z.unknown()).optional().prefault({})
|
|
11832
12325
|
}),
|
|
11833
12326
|
strategy: z.object({
|
|
11834
12327
|
id: z.string().refine((val) => ALL_STRATEGIES.includes(val), { message: `Invalid strategy ID. Must be one of: ${ALL_STRATEGIES.join(", ")}` }),
|
|
@@ -12405,11 +12898,11 @@ redteamRouter.post("/:taskId", async (req, res) => {
|
|
|
12405
12898
|
}
|
|
12406
12899
|
const { taskId } = paramsResult.data;
|
|
12407
12900
|
const cloudFunctionUrl = getRemoteGenerationUrl();
|
|
12408
|
-
logger.debug(`Received ${taskId} task request
|
|
12901
|
+
logger.debug(`Received ${taskId} task request`, {
|
|
12409
12902
|
method: req.method,
|
|
12410
12903
|
url: req.url,
|
|
12411
|
-
body:
|
|
12412
|
-
})
|
|
12904
|
+
body: sanitizeObject(bodyResult.data, { context: "request body" })
|
|
12905
|
+
});
|
|
12413
12906
|
try {
|
|
12414
12907
|
logger.debug(`Sending request to cloud function: ${cloudFunctionUrl}`);
|
|
12415
12908
|
const response = await fetchWithProxy(cloudFunctionUrl, {
|
|
@@ -12781,7 +13274,7 @@ router.get("/", async (_req, res) => {
|
|
|
12781
13274
|
};
|
|
12782
13275
|
} catch (error) {
|
|
12783
13276
|
logger.debug(`Failed to fetch latest version: ${error}`);
|
|
12784
|
-
latestVersion = versionCache.latestVersion ?? "0.121.
|
|
13277
|
+
latestVersion = versionCache.latestVersion ?? "0.121.7";
|
|
12785
13278
|
}
|
|
12786
13279
|
}
|
|
12787
13280
|
const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
|
|
@@ -12790,7 +13283,7 @@ router.get("/", async (_req, res) => {
|
|
|
12790
13283
|
selfHosted,
|
|
12791
13284
|
isNpx
|
|
12792
13285
|
});
|
|
12793
|
-
const resolvedLatestVersion = latestVersion ?? "0.121.
|
|
13286
|
+
const resolvedLatestVersion = latestVersion ?? "0.121.7";
|
|
12794
13287
|
const response = {
|
|
12795
13288
|
currentVersion: VERSION,
|
|
12796
13289
|
latestVersion: resolvedLatestVersion,
|
|
@@ -15641,8 +16134,8 @@ function shouldUseInkList() {
|
|
|
15641
16134
|
async function runInkList(options) {
|
|
15642
16135
|
const [React, { renderInteractive }, { ListApp }] = await Promise.all([
|
|
15643
16136
|
import("react"),
|
|
15644
|
-
import("./render-
|
|
15645
|
-
import("./ListApp-
|
|
16137
|
+
import("./render-DznWrxGO.js"),
|
|
16138
|
+
import("./ListApp-DLmM02JS.js")
|
|
15646
16139
|
]);
|
|
15647
16140
|
let result = { cancelled: false };
|
|
15648
16141
|
let resolveResult;
|
|
@@ -17950,8 +18443,8 @@ function registerRunEvaluationTool(server) {
|
|
|
17950
18443
|
}
|
|
17951
18444
|
filteredTestSuite.tests = filteredTests;
|
|
17952
18445
|
}
|
|
17953
|
-
const { evaluate } = await import("./evaluator-
|
|
17954
|
-
const evalRecord = await (await import("./eval-
|
|
18446
|
+
const { evaluate } = await import("./evaluator-DNdJF1Gv.js");
|
|
18447
|
+
const evalRecord = await (await import("./eval-DscR5iOM.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
|
|
17955
18448
|
logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
|
|
17956
18449
|
const startTime = Date.now();
|
|
17957
18450
|
const result = await evaluate(filteredTestSuite, evalRecord, {
|
|
@@ -18517,6 +19010,9 @@ function analyzeConfiguration(config) {
|
|
|
18517
19010
|
}
|
|
18518
19011
|
//#endregion
|
|
18519
19012
|
//#region src/commands/mcp/server.ts
|
|
19013
|
+
function setMcpTransport(transport) {
|
|
19014
|
+
Object.assign(process.env, { MCP_TRANSPORT: transport });
|
|
19015
|
+
}
|
|
18520
19016
|
/**
|
|
18521
19017
|
* Creates an MCP server with tools for interacting with promptfoo
|
|
18522
19018
|
*/
|
|
@@ -18552,7 +19048,7 @@ async function createMcpServer() {
|
|
|
18552
19048
|
*/
|
|
18553
19049
|
async function startHttpMcpServer(port) {
|
|
18554
19050
|
if (!Number.isInteger(port) || port < 1 || port > 65535) throw new Error(`Invalid port number: ${port}. Port must be an integer between 1 and 65535.`);
|
|
18555
|
-
|
|
19051
|
+
setMcpTransport("http");
|
|
18556
19052
|
const app = express();
|
|
18557
19053
|
app.use(express.json());
|
|
18558
19054
|
const mcpServer = await createMcpServer();
|
|
@@ -18609,7 +19105,7 @@ async function startHttpMcpServer(port) {
|
|
|
18609
19105
|
* Starts an MCP server with stdio transport
|
|
18610
19106
|
*/
|
|
18611
19107
|
async function startStdioMcpServer() {
|
|
18612
|
-
|
|
19108
|
+
setMcpTransport("stdio");
|
|
18613
19109
|
logger.transports.forEach((transport) => {
|
|
18614
19110
|
if (transport.constructor.name === "Console" || transport.name === "console") transport.silent = true;
|
|
18615
19111
|
});
|
|
@@ -19478,7 +19974,7 @@ function printErrorInformation(errorLogFile, debugLogFile) {
|
|
|
19478
19974
|
async function main() {
|
|
19479
19975
|
setupEnvFilesFromArgv();
|
|
19480
19976
|
initializeRunLogging();
|
|
19481
|
-
if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") process.env
|
|
19977
|
+
if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") Object.assign(process.env, { PROMPTFOO_DISABLE_UPDATE: "true" });
|
|
19482
19978
|
await checkForUpdates();
|
|
19483
19979
|
await runDbMigrations();
|
|
19484
19980
|
const { defaultConfig, defaultConfigPath } = await loadDefaultConfig();
|