promptfoo 0.121.4 → 0.121.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-DQkFNqE9.js → ListApp-DLmM02JS.js} +1 -1
- package/dist/src/{accounts-DhMYUUbu.js → accounts-Ca7WIoPY.js} +12 -7
- package/dist/src/{accounts-F9d_5sMC.js → accounts-CjFnOPmb.js} +14 -9
- package/dist/src/{accounts-Dy17bs4D.cjs → accounts-CmWzeD2d.cjs} +16 -10
- package/dist/src/{accounts-DdJ2pHMI.js → accounts-DanM1wq_.js} +13 -8
- package/dist/src/{agentic-utils-qFlm6zes.js → agentic-utils-CJ0j3fBi.js} +3 -3
- package/dist/src/{agentic-utils-w68v6_Dz.js → agentic-utils-DDEGRV9v.js} +3 -3
- package/dist/src/{agentic-utils-BpX5b23w.cjs → agentic-utils-DvPWSUpb.cjs} +8 -7
- package/dist/src/{agentic-utils-P172hM8B.js → agentic-utils-TxUEMPYS.js} +2 -2
- package/dist/src/{agents-BahDpe5G.cjs → agents-B4sRuXg3.cjs} +7 -6
- package/dist/src/{agents-pQeBEXMm.js → agents-B8q7h_ek.js} +5 -5
- package/dist/src/{agents-CgaMXvLM.js → agents-CBgJvRkB.js} +21 -10
- package/dist/src/{agents-C-R_jfzI.js → agents-CYn2n3QP.js} +4 -4
- package/dist/src/{agents-8FDnTriG.js → agents-D-vDNFx4.js} +21 -10
- package/dist/src/{agents-aYPQLf8W.js → agents-LrHuQqr1.js} +20 -9
- package/dist/src/{agents-DJ35I3Nt.js → agents-QGg76OF-.js} +5 -5
- package/dist/src/{agents-D7-HGxUj.cjs → agents-eHZ9nlgA.cjs} +21 -10
- package/dist/src/{aimlapi-sgYnkE54.js → aimlapi-CJEbQ0o6.js} +7 -7
- package/dist/src/{aimlapi-BD6J9oKt.js → aimlapi-D5HXzZ0s.js} +6 -6
- package/dist/src/{aimlapi-qcK4OT55.cjs → aimlapi-T6HGNxNe.cjs} +7 -7
- package/dist/src/{aimlapi-BCq3MHeL.js → aimlapi-eYv3a_DK.js} +7 -7
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-BNHJKN35.js +1 -0
- package/dist/src/app/assets/index-BnT6P6sF.js +388 -0
- package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
- package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
- package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
- package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
- package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
- package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
- package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
- package/dist/src/app/index.html +31 -6
- package/dist/src/{audio-COrn8rM6.js → audio-BqnRvcWG.js} +3 -3
- package/dist/src/{audio-DcVKoInv.js → audio-CPMtV1yR.js} +4 -4
- package/dist/src/{audio-B7izf48x.js → audio-DyiebVB3.js} +4 -4
- package/dist/src/{audio-BQtNuYBj.cjs → audio-FnxbEnSE.cjs} +4 -4
- package/dist/src/authoritativeMarkupInjection-BZIywVjG.js +74 -0
- package/dist/src/authoritativeMarkupInjection-DyAXAsSr.js +75 -0
- package/dist/src/authoritativeMarkupInjection-F2gBw0lN.cjs +74 -0
- package/dist/src/authoritativeMarkupInjection-QEQmFS83.js +74 -0
- package/dist/src/{base-PYJvBE1i.js → base-CKLo890h.js} +4 -3
- package/dist/src/{base-fZ9wgg50.js → base-Co80MMCi.js} +5 -4
- package/dist/src/{base-D-670DX8.cjs → base-DGJW48uz.cjs} +5 -4
- package/dist/src/{base-yrI1Yal4.js → base-E9I8zXjz.js} +5 -4
- package/dist/src/bestOfN-B3wNzjSB.js +137 -0
- package/dist/src/bestOfN-BBsO41z4.js +136 -0
- package/dist/src/bestOfN-CAwmg5UL.cjs +140 -0
- package/dist/src/bestOfN-_kTi8Bxe.js +136 -0
- package/dist/src/{blobs-D2FAd1Q5.cjs → blobs-B0977K1O.cjs} +7 -6
- package/dist/src/{blobs-BCZavS8s.js → blobs-CeFdPn_T.js} +3 -3
- package/dist/src/{blobs-BQWqnnvL.js → blobs-DODuTK-a.js} +3 -3
- package/dist/src/{blobs-C-F78Kfn.js → blobs-Dwef1Ao1.js} +2 -2
- package/dist/src/{cache-BIyPcp5v.cjs → cache-CPGUA4Yl.cjs} +135 -25
- package/dist/src/cache-Cf7b4pWE.js +3 -0
- package/dist/src/{cache-D5NZmMiT.js → cache-DIXbtkNO.js} +125 -10
- package/dist/src/{cache-mb7c8hbp.js → cache-DpPWrkTE.js} +128 -12
- package/dist/src/{cache-C4Xb-hNb.js → cache-roFAE0cI.js} +126 -11
- package/dist/src/{chat-I9izLm49.js → chat-CUCorGiL.js} +12 -12
- package/dist/src/{chat-BPXSW8Bv.cjs → chat-DG1wG4w0.cjs} +6 -6
- package/dist/src/{chat-BfPaS15_.js → chat-Dabu84Br.js} +12 -12
- package/dist/src/{chat-Dr3DUQ0D.js → chat-DqUFcWI0.js} +12 -12
- package/dist/src/{chat-CclRbxGf.cjs → chat-DxTDQ83C.cjs} +14 -13
- package/dist/src/{chat-MKxMnZJZ.js → chat-GmlolEwo.js} +4 -4
- package/dist/src/{chat-0bwXjVP0.js → chat-TP8Qifkh.js} +6 -6
- package/dist/src/{chat-mW0ORo8G.js → chat-iwaM5UTQ.js} +6 -6
- package/dist/src/{chatkit-zUIVoDos.js → chatkit-B6DWi70Q.js} +4 -4
- package/dist/src/{chatkit-BoWoSgXl.cjs → chatkit-BYveR48_.cjs} +6 -5
- package/dist/src/{chatkit-Cv6AhukM.js → chatkit-fARZwEfV.js} +3 -3
- package/dist/src/{chatkit-CJnHRRMM.js → chatkit-lb6FK02w.js} +4 -4
- package/dist/src/{claude-agent-sdk-Dtq_L-Sc.js → claude-agent-sdk-BQNp_y-F.js} +212 -67
- package/dist/src/{claude-agent-sdk-BQNuLaAK.js → claude-agent-sdk-D5Jl0SDh.js} +212 -67
- package/dist/src/{claude-agent-sdk-CPJo3dBQ.cjs → claude-agent-sdk-DH416NBD.cjs} +218 -72
- package/dist/src/{claude-agent-sdk-nfAIcxNf.js → claude-agent-sdk-x1XJ1-pU.js} +212 -67
- package/dist/src/{cloud-DQZ5sVjW.js → cloud-D3DiFqH6.js} +3 -3
- package/dist/src/cloud-p96PA4MH.js +3 -0
- package/dist/src/{cloudflare-ai-BIB567w6.js → cloudflare-ai-B6NVI3ax.js} +4 -4
- package/dist/src/{cloudflare-ai-Dl3N9OVD.cjs → cloudflare-ai-CEAW-xQa.cjs} +6 -6
- package/dist/src/{cloudflare-ai-DlKr0rY7.js → cloudflare-ai-RFSojyXG.js} +6 -6
- package/dist/src/{cloudflare-ai-DGLte7Py.js → cloudflare-ai-r4tbYmWU.js} +6 -6
- package/dist/src/{cloudflare-gateway-CiIZHU0Q.js → cloudflare-gateway-BCkLouto.js} +5 -5
- package/dist/src/{cloudflare-gateway-DI1HNP5F.js → cloudflare-gateway-BaZ4insB.js} +3 -3
- package/dist/src/{cloudflare-gateway-BDZrYydE.js → cloudflare-gateway-CF-Vb-2Z.js} +5 -5
- package/dist/src/{cloudflare-gateway-BYDp495F.cjs → cloudflare-gateway-TJMLBj6I.cjs} +5 -5
- package/dist/src/codex-app-server-B8KHEiF4.js +1915 -0
- package/dist/src/codex-app-server-CnrLBCeA.cjs +1921 -0
- package/dist/src/codex-app-server-DIXZ230V.js +1915 -0
- package/dist/src/codex-app-server-Dd22dC_N.js +1916 -0
- package/dist/src/{codex-sdk-CpqiOqDO.js → codex-sdk-B6Wah8Pa.js} +6 -6
- package/dist/src/codex-sdk-BGjVAk23.js +3 -0
- package/dist/src/{codex-sdk-C2_M2pl_.cjs → codex-sdk-CFF6gUyi.cjs} +18 -10
- package/dist/src/{codex-sdk-Rtky3M4I.js → codex-sdk-CmQABzV3.js} +6 -6
- package/dist/src/{codex-sdk-CErXn7qh.js → codex-sdk-D2d54RL8.js} +5 -5
- package/dist/src/{cometapi-CtJ-mS8R.js → cometapi-Bu9B8NUY.js} +8 -8
- package/dist/src/{cometapi-DT-jlVCB.js → cometapi-CtzNCHKu.js} +7 -7
- package/dist/src/{cometapi-UVOryo4W.cjs → cometapi-DHCDlQUI.cjs} +8 -8
- package/dist/src/{cometapi-BUlt_ELa.js → cometapi-OBILPLlu.js} +8 -8
- package/dist/src/{completion-HUe8wDhZ.js → completion-CO2e1_62.js} +6 -6
- package/dist/src/{completion-BozdoXba.cjs → completion-CSYfl2cd.cjs} +6 -6
- package/dist/src/{completion-x0a_c2y1.js → completion-DZNxcyfG.js} +6 -6
- package/dist/src/{completion-Dnxn7E-j.js → completion-sNvCLTAP.js} +5 -5
- package/dist/src/constants-BjJV0cRr.js +6 -0
- package/dist/src/constants-DH5XYLKZ.js +7 -0
- package/dist/src/constants-DZGEFLsu.js +6 -0
- package/dist/src/constants-a2kYssQk.cjs +11 -0
- package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
- package/dist/src/{createHash-CwDVU5xr.js → createHash-CGVzWdjj.js} +1 -1
- package/dist/src/{createHash-B7KvgoOD.cjs → createHash-CSiqnK5P.cjs} +2 -2
- package/dist/src/{createHash-ChI45QR1.js → createHash-CgRvs4Fn.js} +1 -1
- package/dist/src/crescendo-BXEJK_bi.cjs +704 -0
- package/dist/src/crescendo-CU_Y2i-m.js +702 -0
- package/dist/src/crescendo-J1Xx4_zb.js +703 -0
- package/dist/src/crescendo-QiaSLW0d.js +701 -0
- package/dist/src/custom-BJfP00Bh.js +619 -0
- package/dist/src/custom-CZVn-1-r.js +620 -0
- package/dist/src/custom-Cqia7M0D.cjs +621 -0
- package/dist/src/custom-notggYVl.js +618 -0
- package/dist/src/{docker-DCgsveLD.js → docker-4D1eL6Gq.js} +6 -6
- package/dist/src/{docker-ClnmCf1Z.js → docker-BBv1WUDu.js} +5 -5
- package/dist/src/{docker-DS4_Osau.cjs → docker-D06JUoe2.cjs} +6 -6
- package/dist/src/{docker-CQmlA2NU.js → docker-DdJQBxK9.js} +6 -6
- package/dist/src/{embedding-D3xTseo7.js → embedding--UZVe4_7.js} +6 -6
- package/dist/src/{embedding-I45KG3o7.cjs → embedding-BbrwopfX.cjs} +6 -6
- package/dist/src/{embedding-nFbumxcv.js → embedding-Bi3rxrZF.js} +5 -5
- package/dist/src/{embedding-DD9wa3ae.js → embedding-C251p1-8.js} +6 -6
- package/dist/src/{errors-Cw810C93.js → errors-9PcUL8BC.js} +1 -1
- package/dist/src/{esm-Dh4dOLlt.js → esm-B6whoAcf.js} +2 -2
- package/dist/src/{esm-CtEPLdAj.cjs → esm-BIKakvNa.cjs} +8 -7
- package/dist/src/{esm-C7PnfdF8.js → esm-BTK1W7lG.js} +1 -1
- package/dist/src/{esm-tVgYPY-f.js → esm-Bexx2PFc.js} +2 -2
- package/dist/src/{eval-u4UVafl6.js → eval-0VRANImH.js} +21 -21
- package/dist/src/{eval-CzJFfFO9.js → eval-DscR5iOM.js} +1 -1
- package/dist/src/{evalResult-Bgm9ZH31.js → evalResult-2RRJvFyB.js} +41 -16
- package/dist/src/{evalResult-KZqXl4XP.cjs → evalResult-CvtS8h8u.cjs} +51 -15
- package/dist/src/evalResult-DqzsS6_W.js +3 -0
- package/dist/src/{evalResult-D3hVYFis.js → evalResult-eUkJv9Ko.js} +40 -15
- package/dist/src/evaluator-DNdJF1Gv.js +3 -0
- package/dist/src/{evaluator-IvuDYSvQ.js → evaluator-DRoiYB2q.js} +1060 -187
- package/dist/src/evaluatorHelpers-BsYP_muT.js +511 -0
- package/dist/src/evaluatorHelpers-CRqTvSux.cjs +537 -0
- package/dist/src/evaluatorHelpers-DuqFFfq7.js +510 -0
- package/dist/src/{extractor-CAfTSraf.js → extractor-BR7XAzAL.js} +6 -6
- package/dist/src/{extractor-WVPOrH43.cjs → extractor-BdxEtt3J.cjs} +6 -6
- package/dist/src/{extractor-DNSeBVOJ.js → extractor-CIW3iN-b.js} +6 -6
- package/dist/src/{extractor-Dk6bRWkv.js → extractor-CxRtnaHl.js} +5 -5
- package/dist/src/{fetch-B0Z3Oe4k.js → fetch-BufrQtvR.js} +93 -40
- package/dist/src/{fetch-BEWnXrrG.js → fetch-DXUnXkVU.js} +89 -40
- package/dist/src/{fetch-CJU5ELPa.cjs → fetch-Dw4XZHjj.cjs} +330 -270
- package/dist/src/{fetch-Di00EQrc.js → fetch-It34O8Ur.js} +305 -252
- package/dist/src/fetch-_YgGd2qv.js +3 -0
- package/dist/src/{fileExtensions-bYh77CN8.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
- package/dist/src/{fileExtensions-DnqA1y9x.js → fileExtensions-CXRfY3Ss.js} +12 -2
- package/dist/src/{fileExtensions-AWa2ZML4.js → fileExtensions-D4GCJ67J.js} +12 -2
- package/dist/src/{formatDuration-DZzPsexs.js → formatDuration-CMVNrYvE.js} +1 -1
- package/dist/src/{genaiTracer-yRuxj9-L.cjs → genaiTracer-14nugQQx.cjs} +14 -2
- package/dist/src/{genaiTracer-DWdZ28hY.js → genaiTracer-BPVvltoW.js} +2 -2
- package/dist/src/{genaiTracer-XnrcgDCe.js → genaiTracer-D18lYzhB.js} +2 -2
- package/dist/src/{genaiTracer-COYDi-tC.js → genaiTracer-jJKYsnjc.js} +2 -2
- package/dist/src/goat-Ckd3q3AY.js +467 -0
- package/dist/src/goat-Qgurm-NP.js +466 -0
- package/dist/src/goat-ghadEDdy.js +465 -0
- package/dist/src/goat-una6pZGP.cjs +469 -0
- package/dist/src/graders-BDT7dif6.js +3 -0
- package/dist/src/{graders-eIHhRqoC.js → graders-BGP99PdK.js} +2416 -2224
- package/dist/src/{graders-Zy3x0zqX.js → graders-BX0f2tvS.js} +2423 -2226
- package/dist/src/{graders-pvbReLLn.js → graders-C0nXU_ZP.js} +1806 -1609
- package/dist/src/{graders--zknU_uk.cjs → graders-ClrU2fnd.cjs} +2219 -1949
- package/dist/src/hydra-BSNZZm2M.js +543 -0
- package/dist/src/hydra-BxdG4nkg.js +541 -0
- package/dist/src/hydra-DE4xWwyc.js +542 -0
- package/dist/src/hydra-DrJttnvw.cjs +542 -0
- package/dist/src/image-B4oBtu6J.js +443 -0
- package/dist/src/{image-dnoUgPrC.js → image-BN-hjLL9.js} +4 -4
- package/dist/src/{image-9302QVqR.js → image-B_fPIwdg.js} +3 -3
- package/dist/src/image-BvUAW344.js +442 -0
- package/dist/src/image-Cvjwx1uY.js +442 -0
- package/dist/src/{image-De2FBmYV.cjs → image-DfVCGPbI.cjs} +4 -4
- package/dist/src/{image-u7-rKnYU.js → image-QzmydkiG.js} +4 -4
- package/dist/src/image-X0oY4350.cjs +465 -0
- package/dist/src/index.cjs +1689 -558
- package/dist/src/index.d.cts +3270 -1624
- package/dist/src/index.d.ts +3270 -1624
- package/dist/src/index.js +1553 -438
- package/dist/src/indirectWebPwn-02ZIghCS.js +259 -0
- package/dist/src/indirectWebPwn-BJ22AbQa.cjs +397 -0
- package/dist/src/indirectWebPwn-CbjUG0rh.js +385 -0
- package/dist/src/indirectWebPwn-CfQJt3gk.cjs +260 -0
- package/dist/src/indirectWebPwn-DBQhOjoD.js +260 -0
- package/dist/src/indirectWebPwn-OsXnKejv.js +259 -0
- package/dist/src/indirectWebPwn-tNx9OZ35.js +385 -0
- package/dist/src/indirectWebPwn-uyWdHx04.js +386 -0
- package/dist/src/inputVariables-B0qUChbV.js +467 -0
- package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
- package/dist/src/inputVariables-DXFdi7AI.js +468 -0
- package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
- package/dist/src/{interactiveCheck-CLERUB0c.js → interactiveCheck-C4QlIuoR.js} +2 -2
- package/dist/src/{invariant-BtWWVVhl.js → invariant-B2Rf6avk.js} +1 -1
- package/dist/src/{invariant-vgHWClmd.js → invariant-DIYf9sP1.js} +1 -1
- package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
- package/dist/src/iterative-CpU6i2As.js +490 -0
- package/dist/src/iterative-DJQEQpG3.js +491 -0
- package/dist/src/iterative-DQBuWM-j.cjs +493 -0
- package/dist/src/iterative-FTS4Bz67.js +492 -0
- package/dist/src/iterativeImage-BUABMVOA.js +413 -0
- package/dist/src/iterativeImage-ByFWkxax.cjs +415 -0
- package/dist/src/iterativeImage-BzUapOUi.js +414 -0
- package/dist/src/iterativeImage-Doz8mgxF.js +413 -0
- package/dist/src/iterativeMeta-B3YiAOc8.js +386 -0
- package/dist/src/iterativeMeta-C7APE_P1.js +385 -0
- package/dist/src/iterativeMeta-CSS8M6Ds.cjs +385 -0
- package/dist/src/iterativeMeta-DgoQ7bLh.js +384 -0
- package/dist/src/iterativeTree-B5zxBBSW.js +769 -0
- package/dist/src/iterativeTree-CNyIk0Yn.js +768 -0
- package/dist/src/iterativeTree-CPMF10ve.cjs +771 -0
- package/dist/src/iterativeTree-DvZ7GBwt.js +770 -0
- package/dist/src/{knowledgeBase-Dgc7CBWF.js → knowledgeBase-BadkINlJ.js} +24 -10
- package/dist/src/{knowledgeBase-RhFPGWDc.js → knowledgeBase-Bi_8sV-H.js} +25 -11
- package/dist/src/{knowledgeBase-lm9RXSAm.js → knowledgeBase-CkMljjdg.js} +25 -11
- package/dist/src/{knowledgeBase-Bpoe_nLu.cjs → knowledgeBase-DUh34xba.cjs} +25 -11
- package/dist/src/{litellm-DRjpcSa7.js → litellm-BKBo0jpC.js} +5 -5
- package/dist/src/{litellm-C2kqjxqp.js → litellm-BXyn5kZK.js} +5 -5
- package/dist/src/{litellm-p37R1dzQ.js → litellm-CNcfbCfa.js} +4 -4
- package/dist/src/{litellm-CoyI4IAl.cjs → litellm-CtAr7bKG.cjs} +5 -5
- package/dist/src/{logger-DksKw1Qc.js → logger-BbY6ypFL.js} +2 -2
- package/dist/src/{logger-B88EkIn6.js → logger-KD8JjCRJ.js} +2 -2
- package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
- package/dist/src/{luma-ray-KgTCXrZC.js → luma-ray-BMX1iEB6.js} +5 -5
- package/dist/src/{luma-ray-B863CmuZ.js → luma-ray-CR5TSpp4.js} +5 -5
- package/dist/src/{luma-ray-BxVKaW2a.cjs → luma-ray-D3FUc2K3.cjs} +9 -8
- package/dist/src/{luma-ray-BTTLtqQ8.js → luma-ray-OEMmS1RB.js} +6 -6
- package/dist/src/main.js +909 -369
- package/dist/src/memoryPoisoning-CM83NWYl.js +107 -0
- package/dist/src/memoryPoisoning-D8h9gXJF.js +106 -0
- package/dist/src/memoryPoisoning-Dp-btinn.cjs +106 -0
- package/dist/src/memoryPoisoning-cLuCoTuJ.js +106 -0
- package/dist/src/{messages-BTQz42fn.js → messages-BabO-cX8.js} +273 -17
- package/dist/src/{messages-811uVVW5.cjs → messages-DBPir0TQ.cjs} +278 -18
- package/dist/src/{messages-zWbkLLHz.js → messages-DGUlSNU7.js} +273 -17
- package/dist/src/{messages-MYTQ2TWp.js → messages-vsE_-Lv0.js} +273 -17
- package/dist/src/{meteor-DHdzY1Ss.js → meteor--TZYICTI.js} +2 -2
- package/dist/src/{meteor-Co1VQ1u5.cjs → meteor-CR226f7Z.cjs} +2 -2
- package/dist/src/{meteor-CU5UAE-H.js → meteor-Cl_yd7rJ.js} +2 -2
- package/dist/src/{meteor-DuAFv6gF.js → meteor-Dce-_zGQ.js} +1 -1
- package/dist/src/mischievousUser-0l8GD7Dp.js +46 -0
- package/dist/src/mischievousUser-BUOP9W5r.js +46 -0
- package/dist/src/mischievousUser-frFYKxu6.js +47 -0
- package/dist/src/mischievousUser-olGgHIVR.cjs +46 -0
- package/dist/src/{modelslab-Dk1JAtVo.cjs → modelslab-CNV5bMSk.cjs} +7 -7
- package/dist/src/{modelslab-D0erNWKe.js → modelslab-Cogmu4mG.js} +6 -6
- package/dist/src/{modelslab-DIq-6y7x.js → modelslab-Dzst7VTU.js} +6 -6
- package/dist/src/{modelslab-wu9yi5GE.js → modelslab-EyDczZ5A.js} +7 -7
- package/dist/src/{nova-reel-CCFRfeRb.js → nova-reel-BGPNBOMS.js} +6 -6
- package/dist/src/{nova-reel-DQrm74ng.js → nova-reel-B_5NKFu1.js} +5 -5
- package/dist/src/{nova-reel-gr11WG7f.js → nova-reel-C4eUJGse.js} +5 -5
- package/dist/src/{nova-reel-CrLXVKQf.cjs → nova-reel-CjJRxI1X.cjs} +9 -8
- package/dist/src/{nova-sonic-BYdp-QLs.js → nova-sonic-BNGmgfFz.js} +4 -4
- package/dist/src/{nova-sonic-TDgrlTk7.js → nova-sonic-ChPlh5na.js} +4 -4
- package/dist/src/{nova-sonic-B_ZXcUJB.js → nova-sonic-CrV0iaY_.js} +3 -3
- package/dist/src/{nova-sonic-i5tUvXKn.cjs → nova-sonic-DuOG9Aun.cjs} +5 -4
- package/dist/src/{openai-DhVEmgeZ.js → openai-BMHD2Huo.js} +2 -2
- package/dist/src/{openai-URNyItar.cjs → openai-C3uXv8wS.cjs} +2 -2
- package/dist/src/{openai-Qsvz25mV.js → openai-CJrsh9n4.js} +2 -2
- package/dist/src/{openai-iYtrXzOX.js → openai-zgwBb4Ff.js} +1 -1
- package/dist/src/{openclaw-CnQ363Wi.js → openclaw-BIHlu_36.js} +10 -8
- package/dist/src/{openclaw-CwzlQSQX.js → openclaw-CF7fMido.js} +9 -7
- package/dist/src/{openclaw-wX9rtfke.cjs → openclaw-Dphc01BY.cjs} +18 -15
- package/dist/src/{openclaw-CLWrW03k.js → openclaw-zIJAsz3P.js} +10 -8
- package/dist/src/{opencode-sdk-BUu5Nevv.js → opencode-sdk-B3vlPLsp.js} +40 -5
- package/dist/src/{opencode-sdk-BxD8vXp_.js → opencode-sdk-D05JSgMQ.js} +40 -5
- package/dist/src/{opencode-sdk-BZ2idgYA.cjs → opencode-sdk-DoY6GbWw.cjs} +46 -10
- package/dist/src/{opencode-sdk-GI2KaAXq.js → opencode-sdk-sRKYHGoI.js} +39 -4
- package/dist/src/{otlpReceiver-BntK801g.js → otlpReceiver--gTpSagc.js} +120 -4
- package/dist/src/{otlpReceiver-DmVulbhC.js → otlpReceiver-B2eaKC8C.js} +120 -4
- package/dist/src/{otlpReceiver-B2z58l4e.js → otlpReceiver-BXjcRqAM.js} +119 -3
- package/dist/src/{otlpReceiver-BfcVq2Nq.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
- package/dist/src/packageParser--MWTSrPW.js +36 -0
- package/dist/src/packageParser-CgE-ziRo.js +35 -0
- package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
- package/dist/src/packageParser-hwwSGnAZ.js +35 -0
- package/dist/src/processShim-BBxt7LKO.js +95 -0
- package/dist/src/processShim-BcGzU8fY.js +94 -0
- package/dist/src/processShim-C_z3aRvF.js +94 -0
- package/dist/src/processShim-DSY9BV2T.cjs +98 -0
- package/dist/src/promptLength-0qIHyhA5.js +71 -0
- package/dist/src/promptLength-4X-Wd8PG.js +72 -0
- package/dist/src/promptLength-B9nZEfO6.js +71 -0
- package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
- package/dist/src/promptfoo-BDrfT30-.js +180 -0
- package/dist/src/promptfoo-Cm4hiy1Y.js +180 -0
- package/dist/src/promptfoo-Rjp-MeBb.js +181 -0
- package/dist/src/promptfoo-b-baRMj-.cjs +205 -0
- package/dist/src/prompts-BYMtqPCw.js +259 -0
- package/dist/src/prompts-C-bqE1Yp.js +260 -0
- package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
- package/dist/src/prompts-DHhQsANy.js +259 -0
- package/dist/src/prompts-D_QpZ2Dm.js +271 -0
- package/dist/src/prompts-hNvWBD3z.cjs +284 -0
- package/dist/src/prompts-huDVH2CI.js +270 -0
- package/dist/src/prompts-p78Hul5i.cjs +289 -0
- package/dist/src/{providerRegistry-CPQ_CmVO.js → providerRegistry-1gB5vtzQ.js} +2 -2
- package/dist/src/{providerRegistry-CQMdTmHP.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
- package/dist/src/{providerRegistry-Bvh8mv85.js → providerRegistry-DHcFiVWX.js} +1 -1
- package/dist/src/{providerRegistry-CWoPjKFZ.js → providerRegistry-ReCd0sFa.js} +2 -2
- package/dist/src/{providers-BV_KMZje.js → providers-B9KzWxAX.js} +10558 -21587
- package/dist/src/{providers-DruaQfwu.js → providers-BCCz6_IX.js} +1228 -12196
- package/dist/src/{providers-1eKkXBKp.cjs → providers-BDVVIQM6.cjs} +10649 -21843
- package/dist/src/{providers-iUt5fbAN.js → providers-BYAn82cf.js} +1 -1
- package/dist/src/{providers-Domz_llv.js → providers-DVYRZP4E.js} +10589 -21570
- package/dist/src/{pythonUtils-Cldx7huE.js → pythonUtils-CLCgQ9tt.js} +3 -3
- package/dist/src/{pythonUtils-CnndUbW-.js → pythonUtils-CgYxeSmO.js} +3 -3
- package/dist/src/{pythonUtils-tAJvvpS-.cjs → pythonUtils-Cokhluq3.cjs} +8 -7
- package/dist/src/{pythonUtils-C2UQ30Rz.js → pythonUtils-D0BYebvX.js} +3 -3
- package/dist/src/{quiverai-DFotyafY.cjs → quiverai-BAp6iTZD.cjs} +4 -4
- package/dist/src/{quiverai-aPPvXOgn.js → quiverai-BvIhI_0l.js} +4 -4
- package/dist/src/{quiverai-DR0SnIQV.js → quiverai-CdTWPe-A.js} +3 -3
- package/dist/src/{quiverai-CtWi6x_g.js → quiverai-Cv7rJKDz.js} +4 -4
- package/dist/src/registry-BUJrgjwv.js +124 -0
- package/dist/src/registry-DXm1t_x0.js +125 -0
- package/dist/src/registry-Dp5EqoXc.js +124 -0
- package/dist/src/registry-KCVF1CFC.cjs +124 -0
- package/dist/src/{server-D6Il2Sob.js → remoteGeneration-B1_XsKXU.js} +16 -108
- package/dist/src/{server-BSB45Nt9.js → remoteGeneration-COpWcmWd.js} +15 -146
- package/dist/src/{server-Dx2TyCH2.cjs → remoteGeneration-DS9N3pgB.cjs} +30 -119
- package/dist/src/remoteGeneration-DsaSwmG2.js +217 -0
- package/dist/src/render-BNTrbmBw.cjs +384 -0
- package/dist/src/render-CSP99NLm.js +348 -0
- package/dist/src/render-DFfDeYUK.js +347 -0
- package/dist/src/{render-CgVDrJmM.js → render-DznWrxGO.js} +2 -2
- package/dist/src/render-_6ur1fhE.js +347 -0
- package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
- package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
- package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
- package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
- package/dist/src/{responses-Bi9vBuW_.cjs → responses-1UFFF9N_.cjs} +51 -16
- package/dist/src/{responses-DL9m8CyY.js → responses-B3W2JvOQ.js} +49 -15
- package/dist/src/{responses--OsX2aYW.js → responses-B6ktc3Ra.js} +49 -15
- package/dist/src/{responses-C-flexAY.js → responses-URRzV8qE.js} +49 -15
- package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
- package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
- package/dist/src/{rubyUtils-DsGrTx8R.js → rubyUtils-CXlFM2rR.js} +3 -3
- package/dist/src/{rubyUtils-DVLeA2jg.js → rubyUtils-CnlW8AYb.js} +3 -3
- package/dist/src/{rubyUtils-B6eljPuh.cjs → rubyUtils-CqUWBZAt.cjs} +18 -27
- package/dist/src/{rubyUtils-CYSQEG4a.js → rubyUtils-DdGojpfv.js} +3 -3
- package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
- package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
- package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
- package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
- package/dist/src/{sagemaker-BVkaG2-l.js → sagemaker-CujrzP1a.js} +62 -51
- package/dist/src/{sagemaker-XnfhheQv.cjs → sagemaker-DzffAqo_.cjs} +65 -53
- package/dist/src/{sagemaker-D67yzMzs.js → sagemaker-vhtSV7JI.js} +62 -51
- package/dist/src/{sagemaker-BveBvuxm.js → sagemaker-yr1QKeBs.js} +61 -50
- package/dist/src/{scanner-1DqWi1Ej.js → scanner-DS0109SS.js} +7 -7
- package/dist/src/server/index.js +5105 -605
- package/dist/src/server-B8rqV126.cjs +126 -0
- package/dist/src/server-BaLytskk.js +3 -0
- package/dist/src/server-CMJD10J4.js +107 -0
- package/dist/src/server-Ddp8GNMp.js +146 -0
- package/dist/src/server-DhMHosWj.js +182 -0
- package/dist/src/shared-7pmVZLNO.js +1334 -0
- package/dist/src/shared-9WHQ1oNE.js +1335 -0
- package/dist/src/{fileExtensions-BArZuxsI.js → shared-BoG7qLMv.js} +12 -2
- package/dist/src/shared-D6IjElRI.js +1334 -0
- package/dist/src/shared-WkgnDkcg.cjs +1436 -0
- package/dist/src/{signal-CE5G3a7x.js → signal-CSurUUyV.js} +3 -3
- package/dist/src/simulatedUser-C9aQObBI.js +222 -0
- package/dist/src/simulatedUser-Cu601Dd4.cjs +227 -0
- package/dist/src/simulatedUser-U_qAHnuB.js +222 -0
- package/dist/src/simulatedUser-p3tACcmw.js +223 -0
- package/dist/src/{slack-DDUe-5MC.js → slack-Bapo-7_8.js} +2 -2
- package/dist/src/{slack-1Rhq0EoV.cjs → slack-DMC1QVEg.cjs} +3 -2
- package/dist/src/{slack-D5Wpy8LM.js → slack-DTEFhrMn.js} +2 -2
- package/dist/src/{slack-acRb0IqQ.js → slack-k-_CP84Q.js} +1 -1
- package/dist/src/storage-BU4qcnOb.js +875 -0
- package/dist/src/storage-CA-v9V2v.cjs +911 -0
- package/dist/src/storage-CD-GWAdx.js +822 -0
- package/dist/src/storage-QdU-SmvD.js +834 -0
- package/dist/src/{store-DAAyxcy6.cjs → store-B2NDDooM.cjs} +60 -24
- package/dist/src/{store-CYEy5J2D.js → store-DKd5592Q.js} +51 -20
- package/dist/src/{store-M0b1WfYb.js → store-HpopRVzl.js} +50 -19
- package/dist/src/store-IbiRIF3k.js +3 -0
- package/dist/src/strategies-7CS3Alao.cjs +2360 -0
- package/dist/src/strategies-CiSeroPH.js +2331 -0
- package/dist/src/strategies-DRJjGTIY.js +2333 -0
- package/dist/src/{tables-DQ4WU5tX.js → tables-CRSXQ2Ke.js} +2 -2
- package/dist/src/{tables-CsWou1Bx.js → tables-CxjU7bBd.js} +3 -3
- package/dist/src/{tables-DUfh1F7Z.cjs → tables-DBIJU0WE.cjs} +6 -5
- package/dist/src/{tables-C4CH3zRr.js → tables-DafUHOeh.js} +3 -3
- package/dist/src/{telemetry-CQPez_Jp.js → telemetry-00ezXr_t.js} +5 -4
- package/dist/src/telemetry-ByPqDcKC.js +3 -0
- package/dist/src/{telemetry-Dsw_faFj.cjs → telemetry-CJ7FnCsc.cjs} +18 -11
- package/dist/src/{telemetry-dbaJ0E98.js → telemetry-DmXYcJNV.js} +5 -4
- package/dist/src/{telemetry-Dvqxv3YC.js → telemetry-DwX9XUN5.js} +4 -3
- package/dist/src/{text-KvuD2Iko.js → text-Db-Wt2u2.js} +1 -1
- package/dist/src/{text-DHxdyQqT.js → text-DwYK5EBn.js} +1 -1
- package/dist/src/{text-BVi-cLPJ.cjs → text-nywWsRBM.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-C-bmyHoE.js → tokenUsageUtils-BjVkdk18.js} +1 -1
- package/dist/src/{tokenUsageUtils-CXrvO-wA.js → tokenUsageUtils-CDet74yk.js} +1 -1
- package/dist/src/tokenUsageUtils-CmnQ0G2m.js +142 -0
- package/dist/src/{tokenUsageUtils-Bb7DkZPz.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
- package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
- package/dist/src/toolAttributes-COVgDrBG.js +87 -0
- package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
- package/dist/src/tracingOptions-BnwKCkSB.js +221 -0
- package/dist/src/tracingOptions-Chi74lOD.js +219 -0
- package/dist/src/tracingOptions-DrbSFaKy.cjs +249 -0
- package/dist/src/tracingOptions-ji2OuXbT.js +220 -0
- package/dist/src/{transcription-DuWDupG7.js → transcription-B8uIgCYX.js} +5 -5
- package/dist/src/{transcription-CJspiD2c.js → transcription-CfU5loSq.js} +6 -6
- package/dist/src/{transcription-V2HaAmy2.js → transcription-Dkd22_4K.js} +6 -6
- package/dist/src/{transcription-BvjmiYB1.cjs → transcription-mzuf18Mq.cjs} +9 -8
- package/dist/src/{transform-lQrDE1BQ.js → transform-BIMynQsA.js} +9 -9
- package/dist/src/transform-BnSTnFlp.js +187 -0
- package/dist/src/transform-BnSXWmU_2.cjs +221 -0
- package/dist/src/transform-CGt7Kt3y2.js +186 -0
- package/dist/src/transform-CrPGTsij.js +186 -0
- package/dist/src/{transform-CTeuTR3S.cjs → transform-DhNkAUs8.cjs} +13 -12
- package/dist/src/{transform-CG0ehZNG.js → transform-DmvYBRll.js} +9 -9
- package/dist/src/{transform-zDhMmzwX.js → transform-EtD4jAWi.js} +9 -9
- package/dist/src/{transformersAvailability-CcHusyhw.js → transformersAvailability-0ThtPved.js} +1 -1
- package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
- package/dist/src/{transformersAvailability-DLlROWhg.js → transformersAvailability-BvyU9vDD.js} +1 -1
- package/dist/src/{transformersAvailability-Cju9mHgR.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
- package/dist/src/{types-Dm9JM6Vb.js → types-BFevViUY.js} +115 -19
- package/dist/src/{types-Bgh5SOn6.js → types-BJQBBPTP.js} +115 -19
- package/dist/src/{types-CeaeaZdP.cjs → types-CxJvaY2S.cjs} +357 -172
- package/dist/src/{types-BGQDAP8i.js → types-D6glLbdF.js} +271 -170
- package/dist/src/{util-BYvQUPp7.js → util--WMgw7wM.js} +28 -8
- package/dist/src/{util-C9J8ahRn.js → util-5WnCSb0h.js} +72 -48
- package/dist/src/{util-CN3SrLT4.cjs → util-BSIuSLVK.cjs} +74 -49
- package/dist/src/{util-C8e5uydV.js → util-Bx677_k2.js} +154 -147
- package/dist/src/util-CN8om2rz.cjs +386 -0
- package/dist/src/{util-DDs-7g6-.js → util-CoQWM76y.js} +28 -8
- package/dist/src/util-DNl96nNs.js +327 -0
- package/dist/src/{util-DxWpWjhc.js → util-DURocbYR.js} +667 -507
- package/dist/src/util-Df8YMvS1.js +327 -0
- package/dist/src/{util-DvU2Pw8c.js → util-DiQ3QvBB.js} +28 -8
- package/dist/src/{util-oGMLA7vc.js → util-I-Rf-KaD.js} +862 -577
- package/dist/src/{util-olYL5C6N.cjs → util-IYzs5Y04.cjs} +33 -7
- package/dist/src/{util-D9TisOyk.js → util-LKTmNsMQ.js} +71 -47
- package/dist/src/{util-Bxn8emtE.cjs → util-SPsvFONY.cjs} +738 -582
- package/dist/src/{util-D3q0WQ-0.js → util-efByNxcr.js} +72 -48
- package/dist/src/util-kDURhgJW.js +328 -0
- package/dist/src/{utils-DJfvjyMj.js → utils-B0lzitHZ.js} +3 -3
- package/dist/src/{utils-BLJKfv0y.js → utils-BFOh20Gb.js} +3 -3
- package/dist/src/{utils-hXtCYanr.js → utils-BGY69tk_.js} +2 -2
- package/dist/src/{utils-B05gLxER.cjs → utils-Ve6kuJsa.cjs} +3 -3
- package/dist/src/version-BK20a4sw.js +16 -0
- package/dist/src/version-BWCSaByA.cjs +27 -0
- package/dist/src/version-eRkNuGv8.js +17 -0
- package/dist/src/version-lpHV_53E.js +16 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +56 -28
- package/dist/src/app/assets/Report-CQYFezYu.js +0 -1
- package/dist/src/app/assets/index-BXGkeMwh.css +0 -1
- package/dist/src/app/assets/index-BzJt18Jz.js +0 -385
- package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
- package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
- package/dist/src/cache-Cr9oLMUa.js +0 -3
- package/dist/src/cache-DbLsVWB2.cjs +0 -3
- package/dist/src/cloud-Hphvo8kr.js +0 -3
- package/dist/src/codex-sdk-BAmYE7qy.js +0 -3
- package/dist/src/codex-sdk-CWEnH70W.cjs +0 -2
- package/dist/src/evalResult-D8MT9p0s.js +0 -3
- package/dist/src/evalResult-DElBuddX.js +0 -2
- package/dist/src/evalResult-Dvc-iucu.cjs +0 -2
- package/dist/src/evaluator-CVessDWe.js +0 -3
- package/dist/src/fetch-C7bGKDlQ.js +0 -3
- package/dist/src/graders-BOAzQEUe.cjs +0 -2
- package/dist/src/graders-D4BTsZdG2.js +0 -3
- package/dist/src/graders-DOJK1XpV.js +0 -2
- package/dist/src/graders-NAv9LcBn.js +0 -2
- package/dist/src/image-B5Mv-Z3h.js +0 -257
- package/dist/src/image-DVz2RiMF.js +0 -258
- package/dist/src/image-qUpPvmNZ.js +0 -257
- package/dist/src/image-x6KqLQl4.cjs +0 -280
- package/dist/src/providers-Bp4S-FvO.js +0 -2
- package/dist/src/providers-DV3ax9e_.cjs +0 -3
- package/dist/src/providers-u9Enmfok.js +0 -2
- package/dist/src/render-CH-62LbA.js +0 -135
- package/dist/src/render-CMEpfLaO.js +0 -136
- package/dist/src/render-DHIZ6_k8.js +0 -135
- package/dist/src/render-DfQSFxGE.cjs +0 -165
- package/dist/src/rubyUtils-D1L2d3jb.js +0 -3
- package/dist/src/rubyUtils-DUbq4tff.cjs +0 -2
- package/dist/src/server-BNYztJkh.js +0 -385
- package/dist/src/server-DCtHUqlp.js +0 -3
- package/dist/src/server-DaA2eR26.cjs +0 -2
- package/dist/src/store-CWOSz6D_.cjs +0 -2
- package/dist/src/store-DCDBhv7B.js +0 -3
- package/dist/src/store-Dn9HUkdW.js +0 -240
- package/dist/src/telemetry-C1IqxcdW.js +0 -3
- package/dist/src/telemetry-C4ZEa_es.cjs +0 -2
- package/dist/src/transform-Bbg6A8Jk.js +0 -216
- package/dist/src/transform-CUnzlsbn.cjs +0 -228
- package/dist/src/transform-DYX1_Xnh.js +0 -216
- package/dist/src/transform-DgKlRr73.cjs +0 -2
- package/dist/src/transform-M6ITAESf.js +0 -3
- package/dist/src/transform-UN5UGu8U.js +0 -213
package/dist/src/main.js
CHANGED
|
@@ -1,34 +1,45 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { a as
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { r as
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import { a as
|
|
14
|
-
import { i as
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
import { n as
|
|
20
|
-
import { a as
|
|
21
|
-
import { n as
|
|
22
|
-
import {
|
|
23
|
-
import { n as
|
|
24
|
-
import { n as
|
|
25
|
-
import { n as
|
|
26
|
-
import {
|
|
27
|
-
import { n as
|
|
28
|
-
import { a as
|
|
29
|
-
import {
|
|
30
|
-
import {
|
|
31
|
-
import {
|
|
2
|
+
import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
|
|
3
|
+
import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, M as TERMINAL_MAX_WIDTH, N as getDefaultPort, O as CLOUD_PROVIDER_PREFIX, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, _ as REQUEST_TIMEOUT_MS, f as sleep, i as fetchWithTimeout, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, x as isPromptfooSampleTarget } from "./fetch-DXUnXkVU.js";
|
|
4
|
+
import { n as VERSION } from "./version-eRkNuGv8.js";
|
|
5
|
+
import { t as invariant } from "./invariant-B2Rf6avk.js";
|
|
6
|
+
import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-CjFnOPmb.js";
|
|
7
|
+
import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-D3DiFqH6.js";
|
|
8
|
+
import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-00ezXr_t.js";
|
|
9
|
+
import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, _t as CODING_AGENT_PLUGINS, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BFevViUY.js";
|
|
10
|
+
import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
|
|
11
|
+
import { r as importModule, t as getDirectory } from "./esm-Bexx2PFc.js";
|
|
12
|
+
import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, t as renderEnvOnlyInObject } from "./render-CSP99NLm.js";
|
|
13
|
+
import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-B1_XsKXU.js";
|
|
14
|
+
import { a as openAuthBrowser, i as checkServerRunning, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-DhMHosWj.js";
|
|
15
|
+
import { _ as makeRequest$1, c as getCloudDatabaseId, d as getOrgContext, f as getPluginSeverityOverridesFromCloud, g as isCloudProvider, h as getUserTeams, i as retrieveMedia, l as getConfigFromCloud, m as getProviderFromCloud, o as canCreateTargets, r as mediaExists, s as checkCloudPermissions, t as getMediaStorage, u as getEvalConfigFromCloud, v as resolveTeamFromIdentifier, y as resolveTeamId } from "./storage-BU4qcnOb.js";
|
|
16
|
+
import { n as sha256, t as randomSequence } from "./createHash-CgRvs4Fn.js";
|
|
17
|
+
import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-DpPWrkTE.js";
|
|
18
|
+
import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
|
|
19
|
+
import { D as normalizeProviderRef, L as readFilters, M as maybeLoadFromExternalFile, R as readOutput, _ as resultIsForTestCase, a as ComparisonEvalNotFoundError, b as getProviderDescription, c as getEvalTableOutputPromptLocationsBySize, d as fetchCsvFromGoogleSheet, f as setupEnv, g as getTestCaseDeduplicationKey, h as filterRuntimeVars, i as writeOutput, j as maybeLoadConfigFromExternalFile, l as getEvalTablePromptStrippedPayload, m as extractRuntimeVars, n as createOutputMetadata, o as evalTableToJson, p as deduplicateTestCases, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as mergeComparisonTables, v as checkProviderApiKeys, y as doesProviderRefMatch } from "./util-DURocbYR.js";
|
|
20
|
+
import { a as resolveProviderConfigs, d as MCPProvider, f as HttpProvider, i as resolveProvider, m as createTransformResponse, n as loadApiProvider, p as createTransformRequest, r as loadApiProviders, t as getProviderIds } from "./providers-B9KzWxAX.js";
|
|
21
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
|
|
22
|
+
import { n as escapeRegExp, t as ellipsize } from "./text-Db-Wt2u2.js";
|
|
23
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-DafUHOeh.js";
|
|
24
|
+
import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DODuTK-a.js";
|
|
25
|
+
import { n as createPlaceholderInputValue, r as materializeInputVariablesWithMetadata, t as buildPromptInputDescriptions } from "./inputVariables-DXFdi7AI.js";
|
|
26
|
+
import { a as extractPromptFromTags, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, r as extractInputVarsFromPrompt, s as getShortPluginId } from "./util-kDURhgJW.js";
|
|
27
|
+
import { n as PromptfooHarmfulCompletionProvider } from "./promptfoo-Rjp-MeBb.js";
|
|
28
|
+
import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as getDefaultProviders, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as readProviderPromptMap, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, X as processPrompts, Y as doRemoteGrading, Z as readPrompts, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-BGP99PdK.js";
|
|
29
|
+
import { f as redteamProviderManager, m as TokenUsageTracker } from "./shared-9WHQ1oNE.js";
|
|
30
|
+
import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-DRoiYB2q.js";
|
|
31
|
+
import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
|
|
32
|
+
import { n as TRANSFORM_KEYS, t as INLINE_FUNCTION_LABEL } from "./transform-BnSTnFlp.js";
|
|
33
|
+
import { n as getTraceStore } from "./store-DKd5592Q.js";
|
|
34
|
+
import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CSurUUyV.js";
|
|
35
|
+
import { n as isBlobStorageEnabled } from "./extractor-CIW3iN-b.js";
|
|
36
|
+
import { n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-4X-Wd8PG.js";
|
|
37
|
+
import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-0VRANImH.js";
|
|
38
|
+
import { n as sanitizeProvider, t as EvalResult } from "./evalResult-eUkJv9Ko.js";
|
|
39
|
+
import { n as renderPrompt } from "./evaluatorHelpers-BsYP_muT.js";
|
|
40
|
+
import { t as formatDuration } from "./formatDuration-CMVNrYvE.js";
|
|
41
|
+
import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-DRJjGTIY.js";
|
|
42
|
+
import { n as shouldUseInkUI } from "./interactiveCheck-C4QlIuoR.js";
|
|
32
43
|
import { fileURLToPath } from "node:url";
|
|
33
44
|
import { Command, InvalidArgumentError } from "commander";
|
|
34
45
|
import { z } from "zod";
|
|
@@ -61,9 +72,9 @@ import process$1 from "process";
|
|
|
61
72
|
import $RefParser from "@apidevtools/json-schema-ref-parser";
|
|
62
73
|
import async from "async";
|
|
63
74
|
import { and, asc, count, desc, eq, inArray, isNotNull, like, or, sql } from "drizzle-orm";
|
|
64
|
-
import cliProgress from "cli-progress";
|
|
65
75
|
import { URL as URL$1 } from "url";
|
|
66
76
|
import editor from "@inquirer/editor";
|
|
77
|
+
import cliProgress from "cli-progress";
|
|
67
78
|
import debounce from "debounce";
|
|
68
79
|
import { LRUCache } from "lru-cache";
|
|
69
80
|
import chokidar from "chokidar";
|
|
@@ -100,7 +111,7 @@ function runCommand(program) {
|
|
|
100
111
|
hasGithubPr: !!cmdObj.githubPr,
|
|
101
112
|
hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
|
|
102
113
|
});
|
|
103
|
-
const { executeScan } = await import("./scanner-
|
|
114
|
+
const { executeScan } = await import("./scanner-DS0109SS.js");
|
|
104
115
|
await executeScan(repoPath, cmdObj);
|
|
105
116
|
});
|
|
106
117
|
}
|
|
@@ -572,47 +583,11 @@ function filterPrompts(prompts, filterPromptsOption) {
|
|
|
572
583
|
//#endregion
|
|
573
584
|
//#region src/commands/eval/filterProviders.ts
|
|
574
585
|
/**
|
|
575
|
-
* Checks if a value is a valid provider ID (non-empty string).
|
|
576
|
-
*/
|
|
577
|
-
function isValidProviderId(id) {
|
|
578
|
-
return id !== null && id !== void 0 && typeof id === "string" && id !== "";
|
|
579
|
-
}
|
|
580
|
-
/**
|
|
581
586
|
* Extracts the id and label from a raw provider config without instantiating it.
|
|
582
587
|
* Handles all provider config formats: string, function, ProviderOptions, ProviderOptionsMap.
|
|
583
588
|
*/
|
|
584
589
|
function getProviderIdAndLabel(provider, index) {
|
|
585
|
-
|
|
586
|
-
if (typeof provider === "function") {
|
|
587
|
-
const label = provider.label;
|
|
588
|
-
return {
|
|
589
|
-
id: label ?? `custom-function-${index}`,
|
|
590
|
-
label
|
|
591
|
-
};
|
|
592
|
-
}
|
|
593
|
-
const providerId = provider.id;
|
|
594
|
-
if ("id" in provider && isValidProviderId(providerId)) return {
|
|
595
|
-
id: providerId,
|
|
596
|
-
label: provider.label
|
|
597
|
-
};
|
|
598
|
-
const keys = Object.keys(provider);
|
|
599
|
-
if (keys.length > 0) {
|
|
600
|
-
const id = keys[0];
|
|
601
|
-
const value = provider[id];
|
|
602
|
-
if (typeof value === "object" && value !== null) return {
|
|
603
|
-
id: value.id || id,
|
|
604
|
-
label: value.label
|
|
605
|
-
};
|
|
606
|
-
}
|
|
607
|
-
const label = provider.label;
|
|
608
|
-
if (isValidProviderId(label)) return {
|
|
609
|
-
id: label,
|
|
610
|
-
label
|
|
611
|
-
};
|
|
612
|
-
return {
|
|
613
|
-
id: `unknown-${index}`,
|
|
614
|
-
label
|
|
615
|
-
};
|
|
590
|
+
return normalizeProviderRef(provider, { index });
|
|
616
591
|
}
|
|
617
592
|
/**
|
|
618
593
|
* Filters raw provider configs BEFORE instantiation.
|
|
@@ -3030,6 +3005,10 @@ function stripAuthFromUrl(urlString) {
|
|
|
3030
3005
|
}
|
|
3031
3006
|
}
|
|
3032
3007
|
async function handleEmailCollection(evalRecord) {
|
|
3008
|
+
if (evalRecord.author) {
|
|
3009
|
+
logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
|
|
3010
|
+
return;
|
|
3011
|
+
}
|
|
3033
3012
|
if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
|
|
3034
3013
|
let email = getUserEmail();
|
|
3035
3014
|
if (!email) {
|
|
@@ -3208,7 +3187,7 @@ function generateTable(evaluateTable, tableCellMaxLength = 250, maxRows = 25) {
|
|
|
3208
3187
|
for (const row of evaluateTable.body.slice(0, maxRows)) table.push([...row.vars.map((v) => ellipsize(v, tableCellMaxLength)), ...row.outputs.map(({ pass, text, failureReason: failureType }) => {
|
|
3209
3188
|
text = ellipsize(text, tableCellMaxLength);
|
|
3210
3189
|
if (pass) return chalk.green("[PASS] ") + text;
|
|
3211
|
-
|
|
3190
|
+
return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
|
|
3212
3191
|
})]);
|
|
3213
3192
|
return table.toString();
|
|
3214
3193
|
}
|
|
@@ -3292,7 +3271,124 @@ function shouldShareResults(opts) {
|
|
|
3292
3271
|
return cloudConfig.isEnabled() && sharing !== false;
|
|
3293
3272
|
}
|
|
3294
3273
|
//#endregion
|
|
3274
|
+
//#region src/commands/eval/redteamWarning.ts
|
|
3275
|
+
function warnIfRedteamConfigHasNoTests(config, testSuite) {
|
|
3276
|
+
if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
|
|
3277
|
+
Warning: Config file has a redteam section but no test cases.
|
|
3278
|
+
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
3279
|
+
`));
|
|
3280
|
+
}
|
|
3281
|
+
//#endregion
|
|
3295
3282
|
//#region src/commands/eval/summary.ts
|
|
3283
|
+
function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
|
|
3284
|
+
if (wasAborted) {
|
|
3285
|
+
const idSuffix = writeToDatabase ? ` (ID: ${chalk.cyan(evalId)})` : "";
|
|
3286
|
+
return `${chalk.red("✗")} ${completionType} aborted${idSuffix}`;
|
|
3287
|
+
}
|
|
3288
|
+
if (writeToDatabase && shareableUrl) return `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
|
|
3289
|
+
if (writeToDatabase && activelySharing) return `${chalk.green("✓")} ${completionType} complete`;
|
|
3290
|
+
if (writeToDatabase) return `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
|
|
3291
|
+
return `${chalk.green("✓")} ${completionType} complete`;
|
|
3292
|
+
}
|
|
3293
|
+
function getAbortSummaryLines(targetErrorStatus) {
|
|
3294
|
+
if (targetErrorStatus == null) return [];
|
|
3295
|
+
return [
|
|
3296
|
+
"",
|
|
3297
|
+
chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."),
|
|
3298
|
+
chalk.red(` Target returned HTTP ${targetErrorStatus}`),
|
|
3299
|
+
"",
|
|
3300
|
+
chalk.yellow("Possible causes:"),
|
|
3301
|
+
chalk.yellow(" • Invalid API key or authentication (401/403)"),
|
|
3302
|
+
chalk.yellow(" • Target endpoint does not exist (404)"),
|
|
3303
|
+
chalk.yellow(" • Server does not support the request (501)"),
|
|
3304
|
+
"",
|
|
3305
|
+
chalk.cyan("To fix: Check your target configuration and credentials.")
|
|
3306
|
+
];
|
|
3307
|
+
}
|
|
3308
|
+
function getGuidanceLines({ writeToDatabase, shareableUrl, wantsToShare, activelySharing, hasExplicitDisable, cloudEnabled }) {
|
|
3309
|
+
if (!writeToDatabase || shareableUrl || wantsToShare || activelySharing) return [];
|
|
3310
|
+
const lines = ["", `» View results: ${chalk.green.bold("promptfoo view")}`];
|
|
3311
|
+
if (!hasExplicitDisable) lines.push(cloudEnabled ? `» Create shareable URL: ${chalk.green.bold("promptfoo share")}` : `» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
|
|
3312
|
+
lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
|
|
3313
|
+
return lines;
|
|
3314
|
+
}
|
|
3315
|
+
function buildUsageDetails(usage, total) {
|
|
3316
|
+
const parts = [];
|
|
3317
|
+
if (usage.prompt && usage.prompt > 0) parts.push(`${usage.prompt.toLocaleString()} prompt`);
|
|
3318
|
+
if (usage.completion && usage.completion > 0) parts.push(`${usage.completion.toLocaleString()} completion`);
|
|
3319
|
+
if (usage.cached && usage.cached > 0) parts.push(usage.cached === total && parts.length === 0 ? "cached" : `${usage.cached.toLocaleString()} cached`);
|
|
3320
|
+
if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) parts.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3321
|
+
return parts;
|
|
3322
|
+
}
|
|
3323
|
+
function getTokenUsageLines(tokenUsage, isRedteam, tracker) {
|
|
3324
|
+
const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
|
|
3325
|
+
const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
|
|
3326
|
+
if (!hasEvalTokens && !hasGradingTokens) return [];
|
|
3327
|
+
const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
|
|
3328
|
+
const evalTokens = {
|
|
3329
|
+
prompt: tokenUsage.prompt || 0,
|
|
3330
|
+
completion: tokenUsage.completion || 0,
|
|
3331
|
+
total: tokenUsage.total || combinedTotal,
|
|
3332
|
+
cached: tokenUsage.cached || 0,
|
|
3333
|
+
numRequests: tokenUsage.numRequests || 0,
|
|
3334
|
+
completionDetails: tokenUsage.completionDetails || {
|
|
3335
|
+
reasoning: 0,
|
|
3336
|
+
acceptedPrediction: 0,
|
|
3337
|
+
rejectedPrediction: 0
|
|
3338
|
+
}
|
|
3339
|
+
};
|
|
3340
|
+
const lines = [`${chalk.bold("Total Tokens:")} ${chalk.white.bold((evalTokens.total + (tokenUsage.assertions?.total || 0)).toLocaleString())}`];
|
|
3341
|
+
if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
|
|
3342
|
+
if (evalTokens.total > 0) {
|
|
3343
|
+
const evalParts = buildUsageDetails(evalTokens, evalTokens.total);
|
|
3344
|
+
lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
|
|
3345
|
+
}
|
|
3346
|
+
if (tokenUsage.assertions?.total && tokenUsage.assertions.total > 0) {
|
|
3347
|
+
const gradingParts = buildUsageDetails(tokenUsage.assertions, tokenUsage.assertions.total);
|
|
3348
|
+
lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
|
|
3349
|
+
}
|
|
3350
|
+
lines.push(...getProviderUsageLines(tracker));
|
|
3351
|
+
return lines;
|
|
3352
|
+
}
|
|
3353
|
+
function getProviderUsageLines(tracker) {
|
|
3354
|
+
const providerIds = tracker.getProviderIds();
|
|
3355
|
+
if (providerIds.length <= 1) return [];
|
|
3356
|
+
const sortedProviders = providerIds.map((id) => ({
|
|
3357
|
+
id,
|
|
3358
|
+
usage: tracker.getProviderUsage(id)
|
|
3359
|
+
})).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
|
|
3360
|
+
const lines = ["", chalk.bold("Providers:")];
|
|
3361
|
+
for (const { id, usage } of sortedProviders) {
|
|
3362
|
+
if ((usage.total || 0) === 0 && (usage.prompt || 0) + (usage.completion || 0) === 0) continue;
|
|
3363
|
+
const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
|
|
3364
|
+
const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
|
|
3365
|
+
const details = buildUsageDetails(usage, displayTotal);
|
|
3366
|
+
const requestInfo = `${usage.numRequests || 0} requests`;
|
|
3367
|
+
const separator = details.length > 0 ? "; " : "";
|
|
3368
|
+
lines.push(` ${chalk.gray(`${displayId}:`)} ${chalk.white(displayTotal.toLocaleString())} (${requestInfo}${separator}${details.join(", ")})`);
|
|
3369
|
+
}
|
|
3370
|
+
return lines;
|
|
3371
|
+
}
|
|
3372
|
+
function formatResultPercentage(count, totalTests) {
|
|
3373
|
+
const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
|
|
3374
|
+
return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
|
|
3375
|
+
}
|
|
3376
|
+
function formatResultLine(count, label, icon, iconColor, totalTests) {
|
|
3377
|
+
return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count, totalTests)})`)}`;
|
|
3378
|
+
}
|
|
3379
|
+
function getResultsLines({ successes, failures, errors, duration, maxConcurrency }) {
|
|
3380
|
+
const totalTests = successes + failures + errors;
|
|
3381
|
+
const errorLabel = errors === 1 ? "error" : "errors";
|
|
3382
|
+
return [
|
|
3383
|
+
"",
|
|
3384
|
+
chalk.bold("Results:"),
|
|
3385
|
+
formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green, totalTests),
|
|
3386
|
+
formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red, totalTests),
|
|
3387
|
+
formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red, totalTests),
|
|
3388
|
+
chalk.gray(`Duration: ${formatDuration(duration)} (concurrency: ${maxConcurrency})`),
|
|
3389
|
+
""
|
|
3390
|
+
];
|
|
3391
|
+
}
|
|
3296
3392
|
/**
|
|
3297
3393
|
* Generate formatted evaluation summary output for CLI display.
|
|
3298
3394
|
*
|
|
@@ -3331,115 +3427,28 @@ function shouldShareResults(opts) {
|
|
|
3331
3427
|
* ```
|
|
3332
3428
|
*/
|
|
3333
3429
|
function generateEvalSummary(params) {
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3344
|
-
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
3354
|
-
|
|
3355
|
-
|
|
3356
|
-
lines.push("");
|
|
3357
|
-
lines.push(chalk.cyan("To fix: Check your target configuration and credentials."));
|
|
3358
|
-
}
|
|
3359
|
-
if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
|
|
3360
|
-
lines.push("");
|
|
3361
|
-
lines.push(`» View results: ${chalk.green.bold("promptfoo view")}`);
|
|
3362
|
-
if (!hasExplicitDisable) if (cloudEnabled) lines.push(`» Create shareable URL: ${chalk.green.bold("promptfoo share")}`);
|
|
3363
|
-
else lines.push(`» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
|
|
3364
|
-
lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
|
|
3365
|
-
}
|
|
3366
|
-
lines.push("");
|
|
3367
|
-
const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
|
|
3368
|
-
const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
|
|
3369
|
-
if (hasEvalTokens || hasGradingTokens) {
|
|
3370
|
-
const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
|
|
3371
|
-
const evalTokens = {
|
|
3372
|
-
prompt: tokenUsage.prompt || 0,
|
|
3373
|
-
completion: tokenUsage.completion || 0,
|
|
3374
|
-
total: tokenUsage.total || combinedTotal,
|
|
3375
|
-
cached: tokenUsage.cached || 0,
|
|
3376
|
-
completionDetails: tokenUsage.completionDetails || {
|
|
3377
|
-
reasoning: 0,
|
|
3378
|
-
acceptedPrediction: 0,
|
|
3379
|
-
rejectedPrediction: 0
|
|
3380
|
-
}
|
|
3381
|
-
};
|
|
3382
|
-
const grandTotal = evalTokens.total + (tokenUsage.assertions?.total || 0);
|
|
3383
|
-
lines.push(`${chalk.bold("Total Tokens:")} ${chalk.white.bold(grandTotal.toLocaleString())}`);
|
|
3384
|
-
if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
|
|
3385
|
-
if (evalTokens.total > 0) {
|
|
3386
|
-
const evalParts = [];
|
|
3387
|
-
if (evalTokens.prompt > 0) evalParts.push(`${evalTokens.prompt.toLocaleString()} prompt`);
|
|
3388
|
-
if (evalTokens.completion > 0) evalParts.push(`${evalTokens.completion.toLocaleString()} completion`);
|
|
3389
|
-
if (evalTokens.cached > 0) if (evalTokens.cached === evalTokens.total && evalParts.length === 0) evalParts.push("cached");
|
|
3390
|
-
else evalParts.push(`${evalTokens.cached.toLocaleString()} cached`);
|
|
3391
|
-
if (evalTokens.completionDetails?.reasoning && evalTokens.completionDetails.reasoning > 0) evalParts.push(`${evalTokens.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3392
|
-
lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
|
|
3393
|
-
}
|
|
3394
|
-
if (tokenUsage.assertions && tokenUsage.assertions.total && tokenUsage.assertions.total > 0) {
|
|
3395
|
-
const gradingParts = [];
|
|
3396
|
-
if (tokenUsage.assertions.prompt && tokenUsage.assertions.prompt > 0) gradingParts.push(`${tokenUsage.assertions.prompt.toLocaleString()} prompt`);
|
|
3397
|
-
if (tokenUsage.assertions.completion && tokenUsage.assertions.completion > 0) gradingParts.push(`${tokenUsage.assertions.completion.toLocaleString()} completion`);
|
|
3398
|
-
if (tokenUsage.assertions.cached && tokenUsage.assertions.cached > 0) if (tokenUsage.assertions.cached === tokenUsage.assertions.total && gradingParts.length === 0) gradingParts.push("cached");
|
|
3399
|
-
else gradingParts.push(`${tokenUsage.assertions.cached.toLocaleString()} cached`);
|
|
3400
|
-
if (tokenUsage.assertions.completionDetails?.reasoning && tokenUsage.assertions.completionDetails.reasoning > 0) gradingParts.push(`${tokenUsage.assertions.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3401
|
-
lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
|
|
3402
|
-
}
|
|
3403
|
-
const providerIds = tracker.getProviderIds();
|
|
3404
|
-
if (providerIds.length > 1) {
|
|
3405
|
-
lines.push("");
|
|
3406
|
-
lines.push(chalk.bold("Providers:"));
|
|
3407
|
-
const sortedProviders = providerIds.map((id) => ({
|
|
3408
|
-
id,
|
|
3409
|
-
usage: tracker.getProviderUsage(id)
|
|
3410
|
-
})).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
|
|
3411
|
-
for (const { id, usage } of sortedProviders) if ((usage.total || 0) > 0 || (usage.prompt || 0) + (usage.completion || 0) > 0) {
|
|
3412
|
-
const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
|
|
3413
|
-
const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
|
|
3414
|
-
const details = [];
|
|
3415
|
-
if (usage.prompt && usage.prompt > 0) details.push(`${usage.prompt.toLocaleString()} prompt`);
|
|
3416
|
-
if (usage.completion && usage.completion > 0) details.push(`${usage.completion.toLocaleString()} completion`);
|
|
3417
|
-
if (usage.cached && usage.cached > 0) if (usage.cached === displayTotal && details.length === 0) details.push("cached");
|
|
3418
|
-
else details.push(`${usage.cached.toLocaleString()} cached`);
|
|
3419
|
-
if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) details.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3420
|
-
const breakdown = ` (${`${usage.numRequests || 0} requests`}${details.length > 0 ? "; " : ""}${details.join(", ")})`;
|
|
3421
|
-
lines.push(` ${chalk.gray(displayId + ":")} ${chalk.white(displayTotal.toLocaleString())}${breakdown}`);
|
|
3422
|
-
}
|
|
3423
|
-
}
|
|
3424
|
-
}
|
|
3425
|
-
lines.push("");
|
|
3426
|
-
const totalTests = successes + failures + errors;
|
|
3427
|
-
const formatResultPercentage = (count) => {
|
|
3428
|
-
const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
|
|
3429
|
-
return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
|
|
3430
|
-
};
|
|
3431
|
-
const formatResultLine = (count, label, icon, iconColor) => {
|
|
3432
|
-
return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count)})`)}`;
|
|
3433
|
-
};
|
|
3434
|
-
const errorLabel = errors === 1 ? "error" : "errors";
|
|
3435
|
-
lines.push(chalk.bold("Results:"));
|
|
3436
|
-
lines.push(formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green));
|
|
3437
|
-
lines.push(formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red));
|
|
3438
|
-
lines.push(formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red));
|
|
3439
|
-
const durationDisplay = formatDuration(duration);
|
|
3440
|
-
lines.push(chalk.gray(`Duration: ${durationDisplay} (concurrency: ${maxConcurrency})`));
|
|
3441
|
-
lines.push("");
|
|
3442
|
-
return lines;
|
|
3430
|
+
return [
|
|
3431
|
+
getCompletionMessage({
|
|
3432
|
+
completionType: params.isRedteam ? "Red team" : "Eval",
|
|
3433
|
+
evalId: params.evalId,
|
|
3434
|
+
shareableUrl: params.shareableUrl,
|
|
3435
|
+
wasAborted: params.targetErrorStatus != null,
|
|
3436
|
+
writeToDatabase: params.writeToDatabase,
|
|
3437
|
+
activelySharing: params.activelySharing ?? false
|
|
3438
|
+
}),
|
|
3439
|
+
...getAbortSummaryLines(params.targetErrorStatus),
|
|
3440
|
+
...getGuidanceLines({
|
|
3441
|
+
writeToDatabase: params.writeToDatabase,
|
|
3442
|
+
shareableUrl: params.shareableUrl,
|
|
3443
|
+
wantsToShare: params.wantsToShare,
|
|
3444
|
+
activelySharing: params.activelySharing ?? false,
|
|
3445
|
+
hasExplicitDisable: params.hasExplicitDisable,
|
|
3446
|
+
cloudEnabled: params.cloudEnabled
|
|
3447
|
+
}),
|
|
3448
|
+
"",
|
|
3449
|
+
...getTokenUsageLines(params.tokenUsage, params.isRedteam, params.tracker),
|
|
3450
|
+
...getResultsLines(params)
|
|
3451
|
+
];
|
|
3443
3452
|
}
|
|
3444
3453
|
//#endregion
|
|
3445
3454
|
//#region src/commands/retry.ts
|
|
@@ -3657,6 +3666,9 @@ function setupRetryCommand(program) {
|
|
|
3657
3666
|
}
|
|
3658
3667
|
//#endregion
|
|
3659
3668
|
//#region src/models/modelAudit.ts
|
|
3669
|
+
function getModelAuditSortColumn(sortField) {
|
|
3670
|
+
return modelAuditsTable[sortField];
|
|
3671
|
+
}
|
|
3660
3672
|
function createScanId(createdAt = /* @__PURE__ */ new Date()) {
|
|
3661
3673
|
return `scan-${randomSequence(3)}-${createdAt.toISOString().slice(0, 19)}`;
|
|
3662
3674
|
}
|
|
@@ -3697,8 +3709,10 @@ var ModelAudit = class ModelAudit {
|
|
|
3697
3709
|
this.issues = data.issues || data.results?.issues || null;
|
|
3698
3710
|
const issues = data.issues || data.results?.issues;
|
|
3699
3711
|
const resultsHasErrors = data.results?.has_errors ?? false;
|
|
3700
|
-
if (data.hasErrors === void 0)
|
|
3701
|
-
|
|
3712
|
+
if (data.hasErrors === void 0) {
|
|
3713
|
+
const hasActualErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
|
|
3714
|
+
this.hasErrors = hasActualErrors;
|
|
3715
|
+
} else this.hasErrors = data.hasErrors;
|
|
3702
3716
|
this.totalChecks = data.totalChecks;
|
|
3703
3717
|
this.passedChecks = data.passedChecks;
|
|
3704
3718
|
this.failedChecks = data.failedChecks;
|
|
@@ -3796,9 +3810,9 @@ var ModelAudit = class ModelAudit {
|
|
|
3796
3810
|
static async getMany(limit = 100, offset = 0, sortField = "createdAt", sortOrder = "desc", search) {
|
|
3797
3811
|
let query = getDb().select().from(modelAuditsTable);
|
|
3798
3812
|
if (search) query = query.where(or(like(modelAuditsTable.name, `%${search}%`), like(modelAuditsTable.modelPath, `%${search}%`), like(modelAuditsTable.id, `%${search}%`)));
|
|
3799
|
-
const sortColumn = sortField
|
|
3800
|
-
if (sortOrder === "asc") query = query.orderBy(asc(sortColumn));
|
|
3801
|
-
else query = query.orderBy(desc(sortColumn));
|
|
3813
|
+
const sortColumn = getModelAuditSortColumn(sortField);
|
|
3814
|
+
if (sortOrder === "asc") query = sortField === "id" ? query.orderBy(asc(sortColumn)) : query.orderBy(asc(sortColumn), asc(modelAuditsTable.id));
|
|
3815
|
+
else query = sortField === "id" ? query.orderBy(desc(sortColumn)) : query.orderBy(desc(sortColumn), desc(modelAuditsTable.id));
|
|
3802
3816
|
return (await query.limit(limit).offset(offset).all()).map((r) => new ModelAudit({
|
|
3803
3817
|
...r,
|
|
3804
3818
|
persisted: true
|
|
@@ -4157,14 +4171,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
4157
4171
|
state.resume = true;
|
|
4158
4172
|
state.retryMode = true;
|
|
4159
4173
|
} else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
|
|
4160
|
-
if (!cmdObj.envPath && commandLineOptions?.envPath) {
|
|
4174
|
+
if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
|
|
4161
4175
|
logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
|
|
4162
4176
|
setupEnv(commandLineOptions.envPath);
|
|
4163
4177
|
}
|
|
4164
|
-
|
|
4165
|
-
Warning: Config file has a redteam section but no test cases.
|
|
4166
|
-
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
4167
|
-
`));
|
|
4178
|
+
warnIfRedteamConfigHasNoTests(config, testSuite);
|
|
4168
4179
|
if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
|
|
4169
4180
|
const maybeUrl = config.providers[0]?.config?.url;
|
|
4170
4181
|
if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
|
|
@@ -4272,7 +4283,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
4272
4283
|
${z.prettifyError(testSuiteSchema.error)}
|
|
4273
4284
|
|
|
4274
4285
|
Please review your promptfooconfig.yaml configuration.`));
|
|
4275
|
-
const
|
|
4286
|
+
const author = getAuthor();
|
|
4287
|
+
const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
|
|
4288
|
+
author,
|
|
4289
|
+
runtimeOptions: options
|
|
4290
|
+
}) : new Eval(config, {
|
|
4291
|
+
author,
|
|
4292
|
+
runtimeOptions: options
|
|
4293
|
+
});
|
|
4276
4294
|
const abortController = new AbortController();
|
|
4277
4295
|
const previousAbortSignal = evaluateOptions.abortSignal;
|
|
4278
4296
|
evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
|
|
@@ -6045,25 +6063,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
6045
6063
|
getAssertions(_prompt) {
|
|
6046
6064
|
return getHarmfulAssertions(this.harmCategory);
|
|
6047
6065
|
}
|
|
6048
|
-
promptsToTestCases(prompts) {
|
|
6066
|
+
async promptsToTestCases(prompts) {
|
|
6049
6067
|
const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
|
|
6050
6068
|
const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
|
|
6051
|
-
|
|
6069
|
+
const pluginId = getShortPluginId(this.harmCategory);
|
|
6070
|
+
return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
|
|
6052
6071
|
const vars = { [this.injectVar]: __prompt };
|
|
6053
|
-
|
|
6054
|
-
|
|
6055
|
-
|
|
6056
|
-
|
|
6072
|
+
let inputMaterialization;
|
|
6073
|
+
if (hasMultipleInputs) {
|
|
6074
|
+
let parsed;
|
|
6075
|
+
try {
|
|
6076
|
+
parsed = JSON.parse(__prompt);
|
|
6077
|
+
} catch (error) {
|
|
6078
|
+
logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
6079
|
+
}
|
|
6080
|
+
if (parsed) try {
|
|
6081
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
|
|
6082
|
+
materializationIndex,
|
|
6083
|
+
pluginId,
|
|
6084
|
+
provider: this.provider,
|
|
6085
|
+
purpose: this.purpose
|
|
6086
|
+
});
|
|
6087
|
+
Object.assign(vars, materializedVars.vars);
|
|
6088
|
+
inputMaterialization = materializedVars.metadata;
|
|
6089
|
+
} catch (error) {
|
|
6090
|
+
logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
|
|
6091
|
+
throw error;
|
|
6092
|
+
}
|
|
6093
|
+
}
|
|
6057
6094
|
return {
|
|
6058
6095
|
vars,
|
|
6059
6096
|
metadata: {
|
|
6060
6097
|
harmCategory: harmCategoryLabel,
|
|
6061
|
-
pluginId
|
|
6062
|
-
pluginConfig: this.config
|
|
6098
|
+
pluginId,
|
|
6099
|
+
pluginConfig: this.config,
|
|
6100
|
+
...inputMaterialization ? { inputMaterialization } : {}
|
|
6063
6101
|
},
|
|
6064
6102
|
assert: getHarmfulAssertions(this.harmCategory)
|
|
6065
6103
|
};
|
|
6066
|
-
});
|
|
6104
|
+
}));
|
|
6067
6105
|
}
|
|
6068
6106
|
};
|
|
6069
6107
|
//#endregion
|
|
@@ -6072,20 +6110,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
6072
6110
|
* Extract content from <Prompt> tags and parse JSON if inputs are defined.
|
|
6073
6111
|
* Returns the processed prompt and any additional vars extracted from JSON.
|
|
6074
6112
|
*/
|
|
6075
|
-
function processPromptForInputs(prompt,
|
|
6113
|
+
async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
|
|
6076
6114
|
let processedPrompt = prompt.trim();
|
|
6077
6115
|
const additionalVars = {};
|
|
6116
|
+
let additionalMetadata;
|
|
6078
6117
|
const extractedPrompt = extractPromptFromTags(processedPrompt);
|
|
6079
6118
|
if (extractedPrompt) processedPrompt = extractedPrompt;
|
|
6080
|
-
if (inputs && Object.keys(inputs).length > 0)
|
|
6081
|
-
|
|
6082
|
-
|
|
6083
|
-
|
|
6084
|
-
|
|
6119
|
+
if (inputs && Object.keys(inputs).length > 0) {
|
|
6120
|
+
let parsed;
|
|
6121
|
+
try {
|
|
6122
|
+
parsed = JSON.parse(processedPrompt);
|
|
6123
|
+
} catch (error) {
|
|
6124
|
+
logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
6125
|
+
}
|
|
6126
|
+
if (parsed) try {
|
|
6127
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
|
|
6128
|
+
materializationIndex,
|
|
6129
|
+
pluginId: plugin,
|
|
6130
|
+
provider,
|
|
6131
|
+
purpose
|
|
6132
|
+
});
|
|
6133
|
+
Object.assign(additionalVars, materializedVars.vars);
|
|
6134
|
+
additionalMetadata = materializedVars.metadata;
|
|
6135
|
+
} catch (error) {
|
|
6136
|
+
logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
|
|
6137
|
+
throw error;
|
|
6138
|
+
}
|
|
6085
6139
|
}
|
|
6086
6140
|
return {
|
|
6087
6141
|
processedPrompt,
|
|
6088
|
-
additionalVars
|
|
6142
|
+
additionalVars,
|
|
6143
|
+
additionalMetadata
|
|
6089
6144
|
};
|
|
6090
6145
|
}
|
|
6091
6146
|
async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
|
|
@@ -6106,15 +6161,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
|
|
|
6106
6161
|
};
|
|
6107
6162
|
const allPrompts = await retryWithDeduplication(generatePrompts, n);
|
|
6108
6163
|
const inputs = config?.inputs;
|
|
6109
|
-
return sampleArray(allPrompts, n).map((prompt) => {
|
|
6110
|
-
const { processedPrompt, additionalVars } = processPromptForInputs(prompt,
|
|
6164
|
+
return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
|
|
6165
|
+
const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
|
|
6111
6166
|
const testCase = createTestCase(injectVar, processedPrompt, plugin);
|
|
6112
6167
|
if (Object.keys(additionalVars).length > 0) testCase.vars = {
|
|
6113
6168
|
...testCase.vars,
|
|
6114
6169
|
...additionalVars
|
|
6115
6170
|
};
|
|
6171
|
+
if (additionalMetadata) testCase.metadata = {
|
|
6172
|
+
...testCase.metadata,
|
|
6173
|
+
inputMaterialization: additionalMetadata
|
|
6174
|
+
};
|
|
6116
6175
|
return testCase;
|
|
6117
|
-
});
|
|
6176
|
+
}));
|
|
6118
6177
|
}
|
|
6119
6178
|
//#endregion
|
|
6120
6179
|
//#region src/redteam/plugins/teenSafety/graderExamples.ts
|
|
@@ -6432,7 +6491,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
|
|
|
6432
6491
|
function computeModifiersFromConfig(config) {
|
|
6433
6492
|
const modifiers = { ...config?.modifiers };
|
|
6434
6493
|
if (config?.language && typeof config.language === "string") modifiers.language = config.language;
|
|
6435
|
-
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
6494
|
+
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
6436
6495
|
const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
6437
6496
|
if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
|
|
6438
6497
|
return modifiers;
|
|
@@ -6517,9 +6576,10 @@ function dedupeTestCases(testCases) {
|
|
|
6517
6576
|
return deduped;
|
|
6518
6577
|
}
|
|
6519
6578
|
function buildMaxCharsRetryInstructions(rejectedPromptLengths, limit) {
|
|
6579
|
+
const longestRejectedPromptText = rejectedPromptLengths.length > 0 ? `${Math.max(...rejectedPromptLengths)} characters` : "unknown length";
|
|
6520
6580
|
return dedent`
|
|
6521
6581
|
Your previous response included ${rejectedPromptLengths.length} generated prompt${rejectedPromptLengths.length === 1 ? "" : "s"} that exceeded the ${limit ?? "configured"}-character limit.
|
|
6522
|
-
The longest rejected prompt was ${
|
|
6582
|
+
The longest rejected prompt was ${longestRejectedPromptText}.
|
|
6523
6583
|
Generate replacement prompts only, and keep every user message within the character limit.
|
|
6524
6584
|
`.trim();
|
|
6525
6585
|
}
|
|
@@ -6597,6 +6657,31 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
6597
6657
|
return [];
|
|
6598
6658
|
}
|
|
6599
6659
|
}
|
|
6660
|
+
async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
|
|
6661
|
+
const inputs = config.inputs;
|
|
6662
|
+
if (!inputs || Object.keys(inputs).length === 0) return testCases;
|
|
6663
|
+
return Promise.all(testCases.map(async (testCase, materializationIndex) => {
|
|
6664
|
+
const inputVars = extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
|
|
6665
|
+
if (!inputVars) return testCase;
|
|
6666
|
+
const materializedVars = await materializeInputVariablesWithMetadata(inputVars, inputs, {
|
|
6667
|
+
materializationIndex,
|
|
6668
|
+
pluginId,
|
|
6669
|
+
provider,
|
|
6670
|
+
purpose
|
|
6671
|
+
});
|
|
6672
|
+
return {
|
|
6673
|
+
...testCase,
|
|
6674
|
+
vars: {
|
|
6675
|
+
...testCase.vars || {},
|
|
6676
|
+
...materializedVars.vars
|
|
6677
|
+
},
|
|
6678
|
+
metadata: {
|
|
6679
|
+
...testCase.metadata || {},
|
|
6680
|
+
...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
|
|
6681
|
+
}
|
|
6682
|
+
};
|
|
6683
|
+
}));
|
|
6684
|
+
}
|
|
6600
6685
|
function createPluginFactory(PluginClass, key, validate) {
|
|
6601
6686
|
return {
|
|
6602
6687
|
key,
|
|
@@ -6607,13 +6692,21 @@ function createPluginFactory(PluginClass, key, validate) {
|
|
|
6607
6692
|
logger.debug(`Using local redteam generation for ${key}`);
|
|
6608
6693
|
return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
|
|
6609
6694
|
}
|
|
6610
|
-
const
|
|
6695
|
+
const pluginId = getShortPluginId(key);
|
|
6696
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6697
|
+
config: configWithDefaults ?? {},
|
|
6698
|
+
injectVar,
|
|
6699
|
+
pluginId,
|
|
6700
|
+
provider,
|
|
6701
|
+
purpose,
|
|
6702
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
6703
|
+
});
|
|
6611
6704
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
6612
6705
|
return testCases.map((testCase) => ({
|
|
6613
6706
|
...testCase,
|
|
6614
6707
|
metadata: {
|
|
6615
6708
|
...testCase.metadata,
|
|
6616
|
-
pluginId
|
|
6709
|
+
pluginId,
|
|
6617
6710
|
pluginConfig: {
|
|
6618
6711
|
...configWithDefaults,
|
|
6619
6712
|
modifiers: computedModifiers
|
|
@@ -6670,7 +6763,7 @@ const pluginFactories = [
|
|
|
6670
6763
|
key: category,
|
|
6671
6764
|
action: async (params) => {
|
|
6672
6765
|
if (neverGenerateRemote()) {
|
|
6673
|
-
logger.error(`${category} plugin
|
|
6766
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
6674
6767
|
return [];
|
|
6675
6768
|
}
|
|
6676
6769
|
const testCases = await getHarmfulTests(params, category);
|
|
@@ -6693,13 +6786,21 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
|
|
|
6693
6786
|
key: category,
|
|
6694
6787
|
action: async (params) => {
|
|
6695
6788
|
if (shouldGenerateRemote()) {
|
|
6696
|
-
const
|
|
6789
|
+
const pluginId = getShortPluginId(category);
|
|
6790
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6791
|
+
config: params.config ?? {},
|
|
6792
|
+
injectVar: params.injectVar,
|
|
6793
|
+
pluginId,
|
|
6794
|
+
provider: params.provider,
|
|
6795
|
+
purpose: params.purpose,
|
|
6796
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
6797
|
+
});
|
|
6697
6798
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
6698
6799
|
return testCases.map((testCase) => ({
|
|
6699
6800
|
...testCase,
|
|
6700
6801
|
metadata: {
|
|
6701
6802
|
...testCase.metadata,
|
|
6702
|
-
pluginId
|
|
6803
|
+
pluginId,
|
|
6703
6804
|
pluginConfig: {
|
|
6704
6805
|
...params.config,
|
|
6705
6806
|
modifiers: computedModifiers
|
|
@@ -6721,16 +6822,24 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
|
|
|
6721
6822
|
key: category,
|
|
6722
6823
|
action: async (params) => {
|
|
6723
6824
|
if (neverGenerateRemote()) {
|
|
6724
|
-
logger.error(`${category} plugin
|
|
6825
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
6725
6826
|
return [];
|
|
6726
6827
|
}
|
|
6727
|
-
const
|
|
6828
|
+
const pluginId = getShortPluginId(category);
|
|
6829
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6830
|
+
config: params.config ?? {},
|
|
6831
|
+
injectVar: params.injectVar,
|
|
6832
|
+
pluginId,
|
|
6833
|
+
provider: params.provider,
|
|
6834
|
+
purpose: params.purpose,
|
|
6835
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
6836
|
+
});
|
|
6728
6837
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
6729
6838
|
return testCases.map((testCase) => ({
|
|
6730
6839
|
...testCase,
|
|
6731
6840
|
metadata: {
|
|
6732
6841
|
...testCase.metadata,
|
|
6733
|
-
pluginId
|
|
6842
|
+
pluginId,
|
|
6734
6843
|
pluginConfig: {
|
|
6735
6844
|
...params.config,
|
|
6736
6845
|
modifiers: computedModifiers
|
|
@@ -6743,19 +6852,27 @@ function createRemotePlugin(key, validate) {
|
|
|
6743
6852
|
return {
|
|
6744
6853
|
key,
|
|
6745
6854
|
validate,
|
|
6746
|
-
action: async ({ purpose, injectVar, n, config }) => {
|
|
6855
|
+
action: async ({ provider, purpose, injectVar, n, config }) => {
|
|
6747
6856
|
const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
|
|
6748
6857
|
if (neverGenerateRemote()) {
|
|
6749
|
-
logger.error(`${key} plugin
|
|
6858
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
|
|
6750
6859
|
return [];
|
|
6751
6860
|
}
|
|
6752
|
-
const
|
|
6861
|
+
const pluginId = getShortPluginId(key);
|
|
6862
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
6863
|
+
config: configWithDefaults ?? {},
|
|
6864
|
+
injectVar,
|
|
6865
|
+
pluginId,
|
|
6866
|
+
provider,
|
|
6867
|
+
purpose,
|
|
6868
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
6869
|
+
});
|
|
6753
6870
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
6754
6871
|
const testsWithMetadata = testCases.map((testCase) => ({
|
|
6755
6872
|
...testCase,
|
|
6756
6873
|
metadata: {
|
|
6757
6874
|
...testCase.metadata,
|
|
6758
|
-
pluginId
|
|
6875
|
+
pluginId,
|
|
6759
6876
|
pluginConfig: {
|
|
6760
6877
|
...configWithDefaults,
|
|
6761
6878
|
modifiers: computedModifiers
|
|
@@ -6825,6 +6942,37 @@ function getPolicyText(metadata) {
|
|
|
6825
6942
|
return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
|
|
6826
6943
|
}
|
|
6827
6944
|
}
|
|
6945
|
+
async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
|
|
6946
|
+
const inputs = testCase.metadata?.pluginConfig?.inputs;
|
|
6947
|
+
const inputMaterialization = testCase.metadata?.inputMaterialization;
|
|
6948
|
+
if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
|
|
6949
|
+
inputMaterialization,
|
|
6950
|
+
vars: testCase.vars
|
|
6951
|
+
};
|
|
6952
|
+
try {
|
|
6953
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
|
|
6954
|
+
materializationIndex,
|
|
6955
|
+
pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
|
|
6956
|
+
provider,
|
|
6957
|
+
purpose
|
|
6958
|
+
});
|
|
6959
|
+
return {
|
|
6960
|
+
inputMaterialization: materializedVars.metadata ? {
|
|
6961
|
+
...inputMaterialization,
|
|
6962
|
+
...materializedVars.metadata
|
|
6963
|
+
} : inputMaterialization,
|
|
6964
|
+
vars: {
|
|
6965
|
+
...testCase.vars,
|
|
6966
|
+
...materializedVars.vars
|
|
6967
|
+
}
|
|
6968
|
+
};
|
|
6969
|
+
} catch {
|
|
6970
|
+
return {
|
|
6971
|
+
inputMaterialization,
|
|
6972
|
+
vars: testCase.vars
|
|
6973
|
+
};
|
|
6974
|
+
}
|
|
6975
|
+
}
|
|
6828
6976
|
/**
|
|
6829
6977
|
* Gets the severity level for a plugin based on its ID and configuration.
|
|
6830
6978
|
* @param pluginId - The ID of the plugin.
|
|
@@ -6966,6 +7114,7 @@ const categories = {
|
|
|
6966
7114
|
foundation: FOUNDATION_PLUGINS,
|
|
6967
7115
|
harmful: Object.keys(HARM_PLUGINS),
|
|
6968
7116
|
"coding-agent:core": CODING_AGENT_CORE_PLUGINS,
|
|
7117
|
+
"coding-agent:all": CODING_AGENT_PLUGINS,
|
|
6969
7118
|
bias: BIAS_PLUGINS,
|
|
6970
7119
|
pii: PII_PLUGINS,
|
|
6971
7120
|
medical: MEDICAL_PLUGINS,
|
|
@@ -7060,7 +7209,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
7060
7209
|
* @param injectVar - The variable to inject.
|
|
7061
7210
|
* @returns An array of new test cases generated by strategies.
|
|
7062
7211
|
*/
|
|
7063
|
-
async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
7212
|
+
async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
7064
7213
|
const newTestCases = [];
|
|
7065
7214
|
const strategyResults = {};
|
|
7066
7215
|
for (const strategy of strategies) {
|
|
@@ -7116,14 +7265,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
7116
7265
|
}
|
|
7117
7266
|
}
|
|
7118
7267
|
resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
|
|
7119
|
-
newTestCases.push(...resultTestCases.map((t) => {
|
|
7120
|
-
const
|
|
7121
|
-
let updatedVars = t.vars;
|
|
7122
|
-
if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
|
|
7123
|
-
const parsed = JSON.parse(String(t.vars[injectVar]));
|
|
7124
|
-
updatedVars = { ...t.vars };
|
|
7125
|
-
Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
|
|
7126
|
-
} catch {}
|
|
7268
|
+
newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
|
|
7269
|
+
const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
|
|
7127
7270
|
const strategyConfig = {
|
|
7128
7271
|
...strategy.config || {},
|
|
7129
7272
|
...maxCharsPerMessage ? { maxCharsPerMessage } : {},
|
|
@@ -7131,16 +7274,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
7131
7274
|
};
|
|
7132
7275
|
return {
|
|
7133
7276
|
...t,
|
|
7134
|
-
vars
|
|
7277
|
+
vars,
|
|
7135
7278
|
metadata: {
|
|
7136
7279
|
...t?.metadata || {},
|
|
7137
7280
|
...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
|
|
7138
7281
|
...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
|
|
7139
7282
|
...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
|
|
7283
|
+
...inputMaterialization && { inputMaterialization },
|
|
7140
7284
|
...Object.keys(strategyConfig).length > 0 && { strategyConfig }
|
|
7141
7285
|
}
|
|
7142
7286
|
};
|
|
7143
|
-
}));
|
|
7287
|
+
})));
|
|
7144
7288
|
const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
|
|
7145
7289
|
const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
|
|
7146
7290
|
const applyNumTestsCap = (calculatedRequested) => {
|
|
@@ -7595,7 +7739,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7595
7739
|
targetIds,
|
|
7596
7740
|
...retryStrategy.config
|
|
7597
7741
|
};
|
|
7598
|
-
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
|
|
7742
|
+
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
|
|
7599
7743
|
pluginTestCases.push(...retryTestCases);
|
|
7600
7744
|
Object.assign(strategyResults, retryResults);
|
|
7601
7745
|
if (showProgressBar) progressBar?.increment(retryTestCases.length);
|
|
@@ -7603,7 +7747,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7603
7747
|
checkAbort();
|
|
7604
7748
|
const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
|
|
7605
7749
|
if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
|
|
7606
|
-
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
7750
|
+
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
7607
7751
|
Object.assign(strategyResults, otherStrategyResults);
|
|
7608
7752
|
if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
|
|
7609
7753
|
const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
|
|
@@ -8358,65 +8502,175 @@ async function doRedteamRun(options) {
|
|
|
8358
8502
|
return evalResult;
|
|
8359
8503
|
}
|
|
8360
8504
|
//#endregion
|
|
8505
|
+
//#region src/types/transform.ts
|
|
8506
|
+
/** Runtime type guard for `TransformFunction` values. */
|
|
8507
|
+
function isTransformFunction(value) {
|
|
8508
|
+
return typeof value === "function";
|
|
8509
|
+
}
|
|
8510
|
+
//#endregion
|
|
8361
8511
|
//#region src/index.ts
|
|
8512
|
+
/**
|
|
8513
|
+
* Shallow-clone a test case so the caller can swap in resolved ApiProvider
|
|
8514
|
+
* instances on `options.provider` / `assert[].provider` without leaking those
|
|
8515
|
+
* mutations back to the input. The input may alias the unified config written
|
|
8516
|
+
* to the Eval record, and a live SDK client (e.g. Bedrock's BedrockRuntime,
|
|
8517
|
+
* Anthropic's client) holds circular references that break drizzle's JSON
|
|
8518
|
+
* serialization on `evalRecord.save()`. Fixes #8687.
|
|
8519
|
+
*
|
|
8520
|
+
* Detaches only `options` and `assert[]`. Other reference fields (`provider`,
|
|
8521
|
+
* `vars`, `metadata`, `providerOutput`) remain aliased — callers must reassign
|
|
8522
|
+
* those by reference rather than mutating in place. `assert-set` children are
|
|
8523
|
+
* not deep-cloned because the resolve loop skips `assert-set`; if that ever
|
|
8524
|
+
* changes, extend this helper.
|
|
8525
|
+
*/
|
|
8526
|
+
function cloneTestForResolve(test) {
|
|
8527
|
+
const cloned = { ...test };
|
|
8528
|
+
if (test.options) cloned.options = { ...test.options };
|
|
8529
|
+
if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
|
|
8530
|
+
return cloned;
|
|
8531
|
+
}
|
|
8532
|
+
function toSerializableProviderRef(provider) {
|
|
8533
|
+
if (isApiProvider(provider)) return sanitizeProvider(provider);
|
|
8534
|
+
if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
|
|
8535
|
+
return provider;
|
|
8536
|
+
}
|
|
8537
|
+
function isRecord(value) {
|
|
8538
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
8539
|
+
}
|
|
8540
|
+
function withSerializableProvider(record) {
|
|
8541
|
+
if (!isApiProvider(record.provider)) return record;
|
|
8542
|
+
return {
|
|
8543
|
+
...record,
|
|
8544
|
+
provider: sanitizeProvider(record.provider)
|
|
8545
|
+
};
|
|
8546
|
+
}
|
|
8547
|
+
/**
|
|
8548
|
+
* Function-valued transforms are first-class at runtime but are silently dropped
|
|
8549
|
+
* by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
|
|
8550
|
+
* a function reference, so replace every `transform`-like field with a
|
|
8551
|
+
* `[inline function]: name` marker. Non-function values pass through unchanged.
|
|
8552
|
+
*
|
|
8553
|
+
* `droppedRef.value` is flipped to `true` the first time a function is replaced
|
|
8554
|
+
* so the caller can emit a single warning instead of logging per field.
|
|
8555
|
+
*/
|
|
8556
|
+
function replaceFunctionTransforms(record, droppedRef) {
|
|
8557
|
+
let result;
|
|
8558
|
+
for (const key of TRANSFORM_KEYS) {
|
|
8559
|
+
const value = record[key];
|
|
8560
|
+
if (!isTransformFunction(value)) continue;
|
|
8561
|
+
if (!result) result = { ...record };
|
|
8562
|
+
result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
|
|
8563
|
+
droppedRef.value = true;
|
|
8564
|
+
}
|
|
8565
|
+
return result ?? record;
|
|
8566
|
+
}
|
|
8567
|
+
function toSerializableAssertion(assertion, droppedRef) {
|
|
8568
|
+
if (!isRecord(assertion)) return assertion;
|
|
8569
|
+
let sanitizedAssertion = withSerializableProvider(assertion);
|
|
8570
|
+
sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
|
|
8571
|
+
if (Array.isArray(assertion.assert)) sanitizedAssertion = {
|
|
8572
|
+
...sanitizedAssertion,
|
|
8573
|
+
assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
8574
|
+
};
|
|
8575
|
+
return sanitizedAssertion;
|
|
8576
|
+
}
|
|
8577
|
+
function toSerializableTestCase(test, droppedRef) {
|
|
8578
|
+
if (!isRecord(test)) return test;
|
|
8579
|
+
let sanitizedTest = withSerializableProvider(test);
|
|
8580
|
+
if (isRecord(test.options)) {
|
|
8581
|
+
let options = withSerializableProvider(test.options);
|
|
8582
|
+
options = replaceFunctionTransforms(options, droppedRef);
|
|
8583
|
+
if (options !== test.options) sanitizedTest = {
|
|
8584
|
+
...sanitizedTest,
|
|
8585
|
+
options
|
|
8586
|
+
};
|
|
8587
|
+
}
|
|
8588
|
+
if (Array.isArray(test.assert)) sanitizedTest = {
|
|
8589
|
+
...sanitizedTest,
|
|
8590
|
+
assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
8591
|
+
};
|
|
8592
|
+
return sanitizedTest;
|
|
8593
|
+
}
|
|
8594
|
+
function toSerializableScenario(scenario, droppedRef) {
|
|
8595
|
+
if (!isRecord(scenario)) return scenario;
|
|
8596
|
+
if (!Array.isArray(scenario.tests)) return scenario;
|
|
8597
|
+
return {
|
|
8598
|
+
...scenario,
|
|
8599
|
+
tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
|
|
8600
|
+
};
|
|
8601
|
+
}
|
|
8602
|
+
function createSerializableUnifiedConfig(testSuite, prompts) {
|
|
8603
|
+
const droppedRef = { value: false };
|
|
8604
|
+
const config = {
|
|
8605
|
+
...testSuite,
|
|
8606
|
+
providers: toSerializableProviderRef(testSuite.providers),
|
|
8607
|
+
defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
|
|
8608
|
+
tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
|
|
8609
|
+
scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
|
|
8610
|
+
prompts
|
|
8611
|
+
};
|
|
8612
|
+
if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
|
|
8613
|
+
return config;
|
|
8614
|
+
}
|
|
8362
8615
|
async function evaluate(testSuite, options = {}) {
|
|
8363
|
-
|
|
8364
|
-
|
|
8616
|
+
const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
|
|
8617
|
+
if (testSuiteConfig.writeLatestResults) await runDbMigrations();
|
|
8618
|
+
const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
|
|
8365
8619
|
const providerMap = {};
|
|
8366
8620
|
for (const p of loadedProviders) {
|
|
8367
8621
|
providerMap[p.id()] = p;
|
|
8368
8622
|
if (p.label) providerMap[p.label] = p;
|
|
8369
8623
|
}
|
|
8370
|
-
let resolvedDefaultTest =
|
|
8371
|
-
if (typeof
|
|
8624
|
+
let resolvedDefaultTest = testSuiteConfig.defaultTest;
|
|
8625
|
+
if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
|
|
8372
8626
|
const constructedTestSuite = {
|
|
8373
|
-
...
|
|
8627
|
+
...testSuiteConfig,
|
|
8374
8628
|
defaultTest: resolvedDefaultTest,
|
|
8375
|
-
scenarios:
|
|
8629
|
+
scenarios: testSuiteConfig.scenarios,
|
|
8376
8630
|
providers: loadedProviders,
|
|
8377
|
-
tests: await readTests(
|
|
8378
|
-
nunjucksFilters: await readFilters(
|
|
8379
|
-
prompts: await processPrompts(
|
|
8631
|
+
tests: await readTests(testSuiteConfig.tests),
|
|
8632
|
+
nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
|
|
8633
|
+
prompts: await processPrompts(testSuiteConfig.prompts)
|
|
8380
8634
|
};
|
|
8381
|
-
if (typeof constructedTestSuite.defaultTest === "object") {
|
|
8382
|
-
|
|
8383
|
-
|
|
8635
|
+
if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
|
|
8636
|
+
constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
|
|
8637
|
+
if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
|
|
8638
|
+
env: testSuiteConfig.env,
|
|
8384
8639
|
basePath: state.basePath
|
|
8385
8640
|
});
|
|
8386
|
-
if (constructedTestSuite.defaultTest
|
|
8387
|
-
env:
|
|
8641
|
+
if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
|
|
8642
|
+
env: testSuiteConfig.env,
|
|
8388
8643
|
basePath: state.basePath
|
|
8389
8644
|
});
|
|
8390
8645
|
}
|
|
8391
|
-
|
|
8646
|
+
constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
|
|
8647
|
+
for (const test of constructedTestSuite.tests) {
|
|
8392
8648
|
if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
|
|
8393
|
-
env:
|
|
8649
|
+
env: testSuiteConfig.env,
|
|
8394
8650
|
basePath: state.basePath
|
|
8395
8651
|
});
|
|
8396
|
-
|
|
8652
|
+
for (const assertion of test.assert || []) {
|
|
8397
8653
|
if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
|
|
8398
8654
|
if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
|
|
8399
|
-
env:
|
|
8655
|
+
env: testSuiteConfig.env,
|
|
8400
8656
|
basePath: state.basePath
|
|
8401
8657
|
});
|
|
8402
8658
|
}
|
|
8403
8659
|
}
|
|
8404
8660
|
if (options.cache === false) disableCache();
|
|
8405
|
-
const parsedProviderPromptMap = readProviderPromptMap(
|
|
8406
|
-
const unifiedConfig =
|
|
8407
|
-
|
|
8408
|
-
|
|
8409
|
-
};
|
|
8410
|
-
const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
|
|
8661
|
+
const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
|
|
8662
|
+
const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
|
|
8663
|
+
const author = getAuthor(suiteAuthor);
|
|
8664
|
+
const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
|
|
8411
8665
|
const ret = await evaluate$1({
|
|
8412
8666
|
...constructedTestSuite,
|
|
8413
8667
|
providerPromptMap: parsedProviderPromptMap
|
|
8414
8668
|
}, evalRecord, {
|
|
8415
8669
|
eventSource: "library",
|
|
8416
|
-
isRedteam: Boolean(
|
|
8670
|
+
isRedteam: Boolean(testSuiteConfig.redteam),
|
|
8417
8671
|
...options
|
|
8418
8672
|
});
|
|
8419
|
-
if (
|
|
8673
|
+
if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
|
|
8420
8674
|
const shareableUrl = await createShareableUrl(ret, { silent: true });
|
|
8421
8675
|
if (shareableUrl) {
|
|
8422
8676
|
ret.shareableUrl = shareableUrl;
|
|
@@ -8427,9 +8681,9 @@ async function evaluate(testSuite, options = {}) {
|
|
|
8427
8681
|
logger.warn(`Failed to create shareable URL: ${error}`);
|
|
8428
8682
|
}
|
|
8429
8683
|
else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
|
|
8430
|
-
if (
|
|
8431
|
-
if (typeof
|
|
8432
|
-
else if (Array.isArray(
|
|
8684
|
+
if (testSuiteConfig.outputPath) {
|
|
8685
|
+
if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
|
|
8686
|
+
else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
|
|
8433
8687
|
}
|
|
8434
8688
|
return ret;
|
|
8435
8689
|
}
|
|
@@ -8658,6 +8912,58 @@ function setDownloadHeaders(res, fileName, contentType) {
|
|
|
8658
8912
|
//#region src/server/routes/eval.ts
|
|
8659
8913
|
const evalRouter = Router();
|
|
8660
8914
|
const evalJobs = /* @__PURE__ */ new Map();
|
|
8915
|
+
function sendEvalTableResponse(res, evalId, responsePayload) {
|
|
8916
|
+
try {
|
|
8917
|
+
res.json(responsePayload);
|
|
8918
|
+
} catch (error) {
|
|
8919
|
+
if (!(error instanceof RangeError)) throw error;
|
|
8920
|
+
logger.warn("[GET /:id/table] Response too large, stripping per-cell prompts by size", { evalId });
|
|
8921
|
+
const promptLocations = getEvalTableOutputPromptLocationsBySize(responsePayload);
|
|
8922
|
+
if (promptLocations.length === 0) {
|
|
8923
|
+
logger.error("[GET /:id/table] Response too large and has no prompts to strip", { evalId });
|
|
8924
|
+
res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
|
|
8925
|
+
return;
|
|
8926
|
+
}
|
|
8927
|
+
const tryStringifyWithStrippedPrompts = (promptCountToStrip) => {
|
|
8928
|
+
const responseWithoutPrompts = getEvalTablePromptStrippedPayload(responsePayload, promptLocations, promptCountToStrip);
|
|
8929
|
+
try {
|
|
8930
|
+
const responseBody = JSON.stringify(responseWithoutPrompts);
|
|
8931
|
+
invariant(typeof responseBody === "string", "Eval table response must serialize to JSON");
|
|
8932
|
+
return responseBody;
|
|
8933
|
+
} catch (retryError) {
|
|
8934
|
+
if (!(retryError instanceof RangeError)) throw retryError;
|
|
8935
|
+
return null;
|
|
8936
|
+
}
|
|
8937
|
+
};
|
|
8938
|
+
let lowerBound = 0;
|
|
8939
|
+
let upperBound = 1;
|
|
8940
|
+
let responseBody = null;
|
|
8941
|
+
while (upperBound < promptLocations.length) {
|
|
8942
|
+
responseBody = tryStringifyWithStrippedPrompts(upperBound);
|
|
8943
|
+
if (responseBody) break;
|
|
8944
|
+
lowerBound = upperBound;
|
|
8945
|
+
upperBound *= 2;
|
|
8946
|
+
}
|
|
8947
|
+
if (!responseBody) {
|
|
8948
|
+
upperBound = promptLocations.length;
|
|
8949
|
+
responseBody = tryStringifyWithStrippedPrompts(upperBound);
|
|
8950
|
+
}
|
|
8951
|
+
if (responseBody) {
|
|
8952
|
+
while (upperBound - lowerBound > 1) {
|
|
8953
|
+
const midPoint = lowerBound + Math.floor((upperBound - lowerBound) / 2);
|
|
8954
|
+
const midpointResponseBody = tryStringifyWithStrippedPrompts(midPoint);
|
|
8955
|
+
if (midpointResponseBody) {
|
|
8956
|
+
upperBound = midPoint;
|
|
8957
|
+
responseBody = midpointResponseBody;
|
|
8958
|
+
} else lowerBound = midPoint;
|
|
8959
|
+
}
|
|
8960
|
+
res.type("json").send(responseBody);
|
|
8961
|
+
return;
|
|
8962
|
+
}
|
|
8963
|
+
logger.error("[GET /:id/table] Response still too large after stripping prompts", { evalId });
|
|
8964
|
+
res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
|
|
8965
|
+
}
|
|
8966
|
+
}
|
|
8661
8967
|
evalRouter.post("/job", (req, res) => {
|
|
8662
8968
|
const result = EvalSchemas.CreateJob.Request.safeParse(req.body);
|
|
8663
8969
|
if (!result.success) {
|
|
@@ -8698,9 +9004,10 @@ evalRouter.post("/job", (req, res) => {
|
|
|
8698
9004
|
job.evalId = evalResult.id;
|
|
8699
9005
|
console.log(`[${id}] Complete`);
|
|
8700
9006
|
}).catch((error) => {
|
|
8701
|
-
logger.error(
|
|
8702
|
-
|
|
8703
|
-
|
|
9007
|
+
logger.error("Failed to eval tests", {
|
|
9008
|
+
error,
|
|
9009
|
+
body: sanitizeObject(testSuite, { context: "request body" })
|
|
9010
|
+
});
|
|
8704
9011
|
const job = evalJobs.get(id);
|
|
8705
9012
|
invariant(job, "Job not found");
|
|
8706
9013
|
job.status = "error";
|
|
@@ -8897,7 +9204,7 @@ evalRouter.get("/:id/table", async (req, res) => {
|
|
|
8897
9204
|
evalId: id
|
|
8898
9205
|
});
|
|
8899
9206
|
}
|
|
8900
|
-
res
|
|
9207
|
+
sendEvalTableResponse(res, id, {
|
|
8901
9208
|
table: returnTable,
|
|
8902
9209
|
totalCount: table.totalCount,
|
|
8903
9210
|
filteredCount: table.filteredCount,
|
|
@@ -9063,43 +9370,53 @@ evalRouter.post("/:evalId/results/:id/rating", async (req, res) => {
|
|
|
9063
9370
|
res.status(400).json({ error: z.prettifyError(bodyResult.error) });
|
|
9064
9371
|
return;
|
|
9065
9372
|
}
|
|
9066
|
-
|
|
9067
|
-
|
|
9068
|
-
|
|
9069
|
-
|
|
9070
|
-
|
|
9071
|
-
|
|
9072
|
-
|
|
9073
|
-
|
|
9074
|
-
|
|
9075
|
-
|
|
9076
|
-
|
|
9077
|
-
|
|
9078
|
-
|
|
9079
|
-
|
|
9080
|
-
|
|
9081
|
-
|
|
9082
|
-
|
|
9083
|
-
|
|
9373
|
+
try {
|
|
9374
|
+
const { evalId, id } = paramsResult.data;
|
|
9375
|
+
const gradingResult = bodyResult.data;
|
|
9376
|
+
const result = await EvalResult.findById(id);
|
|
9377
|
+
if (!result || result.evalId !== evalId) {
|
|
9378
|
+
res.status(404).json({ error: "Result not found" });
|
|
9379
|
+
return;
|
|
9380
|
+
}
|
|
9381
|
+
const eval_ = await Eval.findById(evalId);
|
|
9382
|
+
if (!eval_) {
|
|
9383
|
+
res.status(404).json({ error: "Eval not found" });
|
|
9384
|
+
return;
|
|
9385
|
+
}
|
|
9386
|
+
const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
|
|
9387
|
+
const successChanged = result.success !== gradingResult.pass;
|
|
9388
|
+
const scoreChange = gradingResult.score - result.score;
|
|
9389
|
+
result.gradingResult = gradingResult;
|
|
9390
|
+
result.success = gradingResult.pass;
|
|
9391
|
+
result.score = gradingResult.score;
|
|
9392
|
+
const prompt = eval_.prompts[result.promptIdx];
|
|
9393
|
+
invariant(prompt, "Prompt not found");
|
|
9394
|
+
if (!prompt.metrics) {
|
|
9395
|
+
logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
|
|
9396
|
+
res.status(400).json({ error: "Prompt metrics not found" });
|
|
9397
|
+
return;
|
|
9398
|
+
}
|
|
9399
|
+
if (successChanged) if (result.success) {
|
|
9400
|
+
prompt.metrics.testPassCount += 1;
|
|
9401
|
+
prompt.metrics.testFailCount -= 1;
|
|
9402
|
+
prompt.metrics.assertPassCount += 1;
|
|
9403
|
+
prompt.metrics.score += scoreChange;
|
|
9404
|
+
if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
|
|
9405
|
+
} else {
|
|
9406
|
+
prompt.metrics.testPassCount -= 1;
|
|
9407
|
+
prompt.metrics.testFailCount += 1;
|
|
9408
|
+
prompt.metrics.assertFailCount += 1;
|
|
9409
|
+
prompt.metrics.score += scoreChange;
|
|
9410
|
+
if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
|
|
9411
|
+
}
|
|
9412
|
+
else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
|
|
9413
|
+
else prompt.metrics.assertFailCount += 1;
|
|
9414
|
+
await eval_.save();
|
|
9415
|
+
await result.save();
|
|
9416
|
+
res.json(result);
|
|
9417
|
+
} catch (error) {
|
|
9418
|
+
sendError(res, 500, "Failed to submit rating", error);
|
|
9084
9419
|
}
|
|
9085
|
-
if (successChanged) if (result.success) {
|
|
9086
|
-
prompt.metrics.testPassCount += 1;
|
|
9087
|
-
prompt.metrics.testFailCount -= 1;
|
|
9088
|
-
prompt.metrics.assertPassCount += 1;
|
|
9089
|
-
prompt.metrics.score += scoreChange;
|
|
9090
|
-
if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
|
|
9091
|
-
} else {
|
|
9092
|
-
prompt.metrics.testPassCount -= 1;
|
|
9093
|
-
prompt.metrics.testFailCount += 1;
|
|
9094
|
-
prompt.metrics.assertFailCount += 1;
|
|
9095
|
-
prompt.metrics.score += scoreChange;
|
|
9096
|
-
if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
|
|
9097
|
-
}
|
|
9098
|
-
else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
|
|
9099
|
-
else prompt.metrics.assertFailCount += 1;
|
|
9100
|
-
await eval_.save();
|
|
9101
|
-
await result.save();
|
|
9102
|
-
res.json(result);
|
|
9103
9420
|
});
|
|
9104
9421
|
evalRouter.post("/", async (req, res) => {
|
|
9105
9422
|
const bodyResult = EvalSchemas.Save.Request.safeParse(req.body);
|
|
@@ -9133,9 +9450,10 @@ evalRouter.post("/", async (req, res) => {
|
|
|
9133
9450
|
res.json(EvalSchemas.Save.Response.parse({ id: eval_.id }));
|
|
9134
9451
|
}
|
|
9135
9452
|
} catch (error) {
|
|
9136
|
-
logger.error(
|
|
9137
|
-
|
|
9138
|
-
|
|
9453
|
+
logger.error("Failed to write eval to database", {
|
|
9454
|
+
error,
|
|
9455
|
+
body: sanitizeObject(body, { context: "request body" })
|
|
9456
|
+
});
|
|
9139
9457
|
res.status(500).json({ error: "Failed to write eval to database" });
|
|
9140
9458
|
}
|
|
9141
9459
|
});
|
|
@@ -9353,7 +9671,7 @@ async function checkForUpdates() {
|
|
|
9353
9671
|
} catch {
|
|
9354
9672
|
return false;
|
|
9355
9673
|
}
|
|
9356
|
-
if (semverGt(latestVersion, "0.121.
|
|
9674
|
+
if (semverGt(latestVersion, "0.121.7")) {
|
|
9357
9675
|
const border = "=".repeat(TERMINAL_MAX_WIDTH);
|
|
9358
9676
|
logger.info(`\n${border}
|
|
9359
9677
|
${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
|
|
@@ -9512,6 +9830,9 @@ const ModelAuditCliOptionsSchema = z.object({
|
|
|
9512
9830
|
dryRun: z.boolean().optional(),
|
|
9513
9831
|
cache: z.boolean().optional(),
|
|
9514
9832
|
stream: z.boolean().optional(),
|
|
9833
|
+
scanners: z.array(z.string()).optional(),
|
|
9834
|
+
excludeScanner: z.array(z.string()).optional(),
|
|
9835
|
+
listScanners: z.boolean().optional(),
|
|
9515
9836
|
share: z.boolean().optional(),
|
|
9516
9837
|
noShare: z.boolean().optional()
|
|
9517
9838
|
});
|
|
@@ -9599,6 +9920,18 @@ const CLI_ARG_MAP = {
|
|
|
9599
9920
|
stream: {
|
|
9600
9921
|
flag: "--stream",
|
|
9601
9922
|
type: "boolean"
|
|
9923
|
+
},
|
|
9924
|
+
scanners: {
|
|
9925
|
+
flag: "--scanners",
|
|
9926
|
+
type: "array"
|
|
9927
|
+
},
|
|
9928
|
+
excludeScanner: {
|
|
9929
|
+
flag: "--exclude-scanner",
|
|
9930
|
+
type: "array"
|
|
9931
|
+
},
|
|
9932
|
+
listScanners: {
|
|
9933
|
+
flag: "--list-scanners",
|
|
9934
|
+
type: "boolean"
|
|
9602
9935
|
}
|
|
9603
9936
|
};
|
|
9604
9937
|
/**
|
|
@@ -9767,6 +10100,62 @@ function spawnModelAudit(args, options) {
|
|
|
9767
10100
|
});
|
|
9768
10101
|
});
|
|
9769
10102
|
}
|
|
10103
|
+
function collectRepeatableOption(value, previous = []) {
|
|
10104
|
+
return [...previous, value];
|
|
10105
|
+
}
|
|
10106
|
+
function hasScannerSelectionOptions(options) {
|
|
10107
|
+
return Boolean(options.scanners?.length || options.excludeScanner?.length);
|
|
10108
|
+
}
|
|
10109
|
+
function hasScannerSelectionValue(value) {
|
|
10110
|
+
if (Array.isArray(value)) return value.length > 0;
|
|
10111
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
10112
|
+
}
|
|
10113
|
+
function hasPersistedScannerSelection(metadata) {
|
|
10114
|
+
const options = metadata?.options;
|
|
10115
|
+
if (!options || typeof options !== "object" || Array.isArray(options)) return false;
|
|
10116
|
+
return hasScannerSelectionValue(options.scanners) || hasScannerSelectionValue(options.excludeScanner);
|
|
10117
|
+
}
|
|
10118
|
+
/**
|
|
10119
|
+
* Parse CLI options through Zod, logging validation errors to the CLI.
|
|
10120
|
+
* Returns null when validation fails (and sets process.exitCode to 1).
|
|
10121
|
+
*/
|
|
10122
|
+
function buildCliArgs(paths, cliOptions) {
|
|
10123
|
+
try {
|
|
10124
|
+
return parseModelAuditArgs(paths, cliOptions);
|
|
10125
|
+
} catch (error) {
|
|
10126
|
+
if (error instanceof z.ZodError) {
|
|
10127
|
+
logger.error(`Invalid model audit options provided:\n${z.prettifyError(error)}`);
|
|
10128
|
+
process.exitCode = 1;
|
|
10129
|
+
return null;
|
|
10130
|
+
}
|
|
10131
|
+
throw error;
|
|
10132
|
+
}
|
|
10133
|
+
}
|
|
10134
|
+
/**
|
|
10135
|
+
* Run modelaudit with inherited stdio and propagate its exit code.
|
|
10136
|
+
*
|
|
10137
|
+
* `treatExitOneAsIssues=true` suppresses the error log for exit code 1, which
|
|
10138
|
+
* modelaudit uses to mean "scan completed, issues found" — callers that expect
|
|
10139
|
+
* findings (like the main scan flow) should set this, while list/help flows
|
|
10140
|
+
* (where a non-zero exit is always unexpected) should leave it false.
|
|
10141
|
+
*/
|
|
10142
|
+
async function runPassthroughModelAudit(args, env, treatExitOneAsIssues = false) {
|
|
10143
|
+
try {
|
|
10144
|
+
const spawnResult = await spawnModelAudit(args, {
|
|
10145
|
+
captureOutput: false,
|
|
10146
|
+
env
|
|
10147
|
+
});
|
|
10148
|
+
const isIssuesExit = treatExitOneAsIssues && spawnResult.code === 1;
|
|
10149
|
+
if (spawnResult.code !== null && spawnResult.code !== 0 && !isIssuesExit) logger.error(`Model scan process exited with code ${spawnResult.code}`);
|
|
10150
|
+
process.exitCode = spawnResult.code || 0;
|
|
10151
|
+
} catch (error) {
|
|
10152
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10153
|
+
logger.error(`Failed to start modelaudit: ${message}`);
|
|
10154
|
+
logger.info("Make sure modelaudit is installed and available in your PATH.");
|
|
10155
|
+
logger.info("Install it using: pip install modelaudit");
|
|
10156
|
+
process.exitCode = 1;
|
|
10157
|
+
}
|
|
10158
|
+
}
|
|
9770
10159
|
/**
|
|
9771
10160
|
* Check for existing scan and determine if re-scan is needed.
|
|
9772
10161
|
* Returns the existing audit if found and re-scan should happen.
|
|
@@ -9789,6 +10178,20 @@ async function checkExistingScan(paths, options, currentScannerVersion) {
|
|
|
9789
10178
|
shouldSkip: false,
|
|
9790
10179
|
existingAudit: null
|
|
9791
10180
|
};
|
|
10181
|
+
if (hasScannerSelectionOptions(options)) {
|
|
10182
|
+
logger.debug("Re-scanning with scanner selection options");
|
|
10183
|
+
return {
|
|
10184
|
+
shouldSkip: false,
|
|
10185
|
+
existingAudit: existing
|
|
10186
|
+
};
|
|
10187
|
+
}
|
|
10188
|
+
if (hasPersistedScannerSelection(existing.metadata)) {
|
|
10189
|
+
logger.debug("Re-scanning because cached revision used scanner selection options");
|
|
10190
|
+
return {
|
|
10191
|
+
shouldSkip: false,
|
|
10192
|
+
existingAudit: existing
|
|
10193
|
+
};
|
|
10194
|
+
}
|
|
9792
10195
|
if (options.force) {
|
|
9793
10196
|
logger.debug(`Re-scanning (--force): ${modelId}`);
|
|
9794
10197
|
return {
|
|
@@ -9920,7 +10323,9 @@ async function saveAuditRecord(paths, results, options, currentScannerVersion, e
|
|
|
9920
10323
|
cache: options.cache,
|
|
9921
10324
|
quiet: options.quiet,
|
|
9922
10325
|
progress: options.progress,
|
|
9923
|
-
stream: options.stream
|
|
10326
|
+
stream: options.stream,
|
|
10327
|
+
scanners: options.scanners,
|
|
10328
|
+
excludeScanner: options.excludeScanner
|
|
9924
10329
|
}
|
|
9925
10330
|
};
|
|
9926
10331
|
if (existingAudit) {
|
|
@@ -10052,8 +10457,8 @@ async function processScanResultsFromStdout(spawnResult, paths, options, current
|
|
|
10052
10457
|
return processJsonResults(jsonOutput, spawnResult.code || 0, paths, options, currentScannerVersion, existingAudit);
|
|
10053
10458
|
}
|
|
10054
10459
|
function modelScanCommand(program) {
|
|
10055
|
-
program.command("scan-model").description("Scan model files for security and quality issues").argument("
|
|
10056
|
-
if (!paths || paths.length === 0) {
|
|
10460
|
+
program.command("scan-model").description("Scan model files for security and quality issues").argument("[paths...]", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("--scanners <scanner>", "Only run selected ModelAudit scanners (IDs/classes; comma-separated or repeated)", collectRepeatableOption).option("--exclude-scanner <scanner>", "Exclude a ModelAudit scanner from the active set (comma-separated or repeated)", collectRepeatableOption).option("--list-scanners", "List registered ModelAudit scanners and exit").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
|
|
10461
|
+
if (!options.listScanners && (!paths || paths.length === 0)) {
|
|
10057
10462
|
logger.error("No paths specified. Provide at least one model file or directory to scan.");
|
|
10058
10463
|
process.exitCode = 1;
|
|
10059
10464
|
return;
|
|
@@ -10069,6 +10474,21 @@ function modelScanCommand(program) {
|
|
|
10069
10474
|
}
|
|
10070
10475
|
await checkModelAuditUpdates();
|
|
10071
10476
|
if (currentScannerVersion) logger.debug(`Using modelaudit version: ${currentScannerVersion}`);
|
|
10477
|
+
const delegationEnv = {
|
|
10478
|
+
...process.env,
|
|
10479
|
+
PROMPTFOO_DELEGATED: "true"
|
|
10480
|
+
};
|
|
10481
|
+
if (options.listScanners) {
|
|
10482
|
+
const parsed = buildCliArgs(paths || [], {
|
|
10483
|
+
...options,
|
|
10484
|
+
format: options.format || "text",
|
|
10485
|
+
output: options.output,
|
|
10486
|
+
timeout: void 0
|
|
10487
|
+
});
|
|
10488
|
+
if (!parsed) return;
|
|
10489
|
+
await runPassthroughModelAudit(parsed.args, delegationEnv);
|
|
10490
|
+
return;
|
|
10491
|
+
}
|
|
10072
10492
|
const saveToDatabase = options.write === void 0 || options.write === true;
|
|
10073
10493
|
let existingAuditToUpdate = null;
|
|
10074
10494
|
if (saveToDatabase) {
|
|
@@ -10080,33 +10500,22 @@ function modelScanCommand(program) {
|
|
|
10080
10500
|
existingAuditToUpdate = existingAudit;
|
|
10081
10501
|
}
|
|
10082
10502
|
const outputFormat = saveToDatabase ? "json" : options.format || "text";
|
|
10083
|
-
const
|
|
10503
|
+
const parsed = buildCliArgs(paths, {
|
|
10084
10504
|
...options,
|
|
10085
10505
|
format: outputFormat,
|
|
10086
10506
|
output: options.output && !saveToDatabase ? options.output : void 0,
|
|
10087
10507
|
timeout: options.timeout ? parseInt(options.timeout, 10) : void 0
|
|
10088
|
-
};
|
|
10089
|
-
|
|
10090
|
-
|
|
10091
|
-
|
|
10092
|
-
|
|
10093
|
-
|
|
10094
|
-
|
|
10095
|
-
|
|
10096
|
-
logger.error("Invalid model audit options provided:");
|
|
10097
|
-
for (const err of error.issues) logger.error(` - ${err.path.join(".")}: ${err.message}`);
|
|
10098
|
-
process.exitCode = 1;
|
|
10099
|
-
return;
|
|
10100
|
-
}
|
|
10101
|
-
throw error;
|
|
10508
|
+
});
|
|
10509
|
+
if (!parsed) return;
|
|
10510
|
+
const args = parsed.args;
|
|
10511
|
+
if (parsed.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${parsed.unsupportedOptions.join(", ")}`);
|
|
10512
|
+
if (saveToDatabase || outputFormat === "text") logger.info(`Running model scan on: ${paths.join(", ")}`);
|
|
10513
|
+
if (!saveToDatabase) {
|
|
10514
|
+
await runPassthroughModelAudit(args, delegationEnv, true);
|
|
10515
|
+
return;
|
|
10102
10516
|
}
|
|
10103
|
-
logger.info(`Running model scan on: ${paths.join(", ")}`);
|
|
10104
|
-
const delegationEnv = {
|
|
10105
|
-
...process.env,
|
|
10106
|
-
PROMPTFOO_DELEGATED: "true"
|
|
10107
|
-
};
|
|
10108
10517
|
try {
|
|
10109
|
-
if (
|
|
10518
|
+
if (supportsCliUiWithOutput(currentScannerVersion)) {
|
|
10110
10519
|
const tempOutputPath = createTempOutputPath();
|
|
10111
10520
|
args.push("--output", tempOutputPath);
|
|
10112
10521
|
let cleanedUp = false;
|
|
@@ -10140,14 +10549,6 @@ function modelScanCommand(program) {
|
|
|
10140
10549
|
});
|
|
10141
10550
|
process.exitCode = await processScanResultsFromStdout(spawnResult, paths, options, currentScannerVersion, existingAuditToUpdate);
|
|
10142
10551
|
}
|
|
10143
|
-
else {
|
|
10144
|
-
const spawnResult = await spawnModelAudit(args, {
|
|
10145
|
-
captureOutput: false,
|
|
10146
|
-
env: delegationEnv
|
|
10147
|
-
});
|
|
10148
|
-
if (spawnResult.code !== null && spawnResult.code !== 0 && spawnResult.code !== 1) logger.error(`Model scan process exited with code ${spawnResult.code}`);
|
|
10149
|
-
process.exitCode = spawnResult.code || 0;
|
|
10150
|
-
}
|
|
10151
10552
|
} catch (error) {
|
|
10152
10553
|
const message = error instanceof Error ? error.message : String(error);
|
|
10153
10554
|
logger.error(`Failed to start modelaudit: ${message}`);
|
|
@@ -10164,6 +10565,14 @@ const CheckInstalledResponseSchema = z.object({
|
|
|
10164
10565
|
version: z.string().nullable(),
|
|
10165
10566
|
cwd: z.string()
|
|
10166
10567
|
});
|
|
10568
|
+
const ScannerInfoSchema = z.object({
|
|
10569
|
+
id: z.string(),
|
|
10570
|
+
class: z.string().optional().default(""),
|
|
10571
|
+
description: z.string().optional().default(""),
|
|
10572
|
+
extensions: z.array(z.string()).optional().default([]),
|
|
10573
|
+
dependencies: z.array(z.string()).optional().default([])
|
|
10574
|
+
}).passthrough();
|
|
10575
|
+
const ListScannersResponseSchema = z.object({ scanners: z.array(ScannerInfoSchema) }).passthrough();
|
|
10167
10576
|
const CheckPathRequestSchema = z.object({ path: z.string().trim().min(1, "No path provided") });
|
|
10168
10577
|
const CheckPathResponseSchema = z.union([z.object({
|
|
10169
10578
|
exists: z.literal(false),
|
|
@@ -10195,6 +10604,8 @@ const ScanRequestSchema = z.object({
|
|
|
10195
10604
|
sbom: z.string().optional(),
|
|
10196
10605
|
output: z.string().optional(),
|
|
10197
10606
|
maxSize: z.string().optional(),
|
|
10607
|
+
scanners: z.array(z.string()).optional(),
|
|
10608
|
+
excludeScanner: z.array(z.string()).optional(),
|
|
10198
10609
|
persist: z.boolean().optional(),
|
|
10199
10610
|
name: z.string().optional(),
|
|
10200
10611
|
author: z.string().optional()
|
|
@@ -10205,8 +10616,13 @@ const ListScansQuerySchema = z.object({
|
|
|
10205
10616
|
offset: z.coerce.number().int().min(0).optional().default(0),
|
|
10206
10617
|
sort: z.enum([
|
|
10207
10618
|
"createdAt",
|
|
10619
|
+
"failedChecks",
|
|
10620
|
+
"hasErrors",
|
|
10621
|
+
"id",
|
|
10622
|
+
"modelPath",
|
|
10208
10623
|
"name",
|
|
10209
|
-
"
|
|
10624
|
+
"passedChecks",
|
|
10625
|
+
"totalChecks"
|
|
10210
10626
|
]).optional().default("createdAt"),
|
|
10211
10627
|
order: z.enum(["asc", "desc"]).optional().default("desc"),
|
|
10212
10628
|
search: z.string().optional()
|
|
@@ -10245,6 +10661,7 @@ const DeleteScanResponseSchema = z.object({
|
|
|
10245
10661
|
});
|
|
10246
10662
|
const ModelAuditSchemas = {
|
|
10247
10663
|
CheckInstalled: { Response: CheckInstalledResponseSchema },
|
|
10664
|
+
ListScanners: { Response: ListScannersResponseSchema },
|
|
10248
10665
|
CheckPath: {
|
|
10249
10666
|
Request: CheckPathRequestSchema,
|
|
10250
10667
|
Response: CheckPathResponseSchema
|
|
@@ -10267,6 +10684,47 @@ const ModelAuditSchemas = {
|
|
|
10267
10684
|
//#endregion
|
|
10268
10685
|
//#region src/server/routes/modelAudit.ts
|
|
10269
10686
|
const modelAuditRouter = Router();
|
|
10687
|
+
const LIST_SCANNERS_ARGS = parseModelAuditArgs([], {
|
|
10688
|
+
listScanners: true,
|
|
10689
|
+
format: "json"
|
|
10690
|
+
}).args;
|
|
10691
|
+
function getModelAuditDelegationEnv() {
|
|
10692
|
+
return {
|
|
10693
|
+
...process.env,
|
|
10694
|
+
PROMPTFOO_DELEGATED: "true"
|
|
10695
|
+
};
|
|
10696
|
+
}
|
|
10697
|
+
function spawnModelAuditCapture(args, options = {}) {
|
|
10698
|
+
return new Promise((resolve, reject) => {
|
|
10699
|
+
const child = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
|
|
10700
|
+
let stdout = "";
|
|
10701
|
+
let stderr = "";
|
|
10702
|
+
const onAbort = () => {
|
|
10703
|
+
if (!child.killed) child.kill("SIGTERM");
|
|
10704
|
+
};
|
|
10705
|
+
if (options.signal?.aborted) onAbort();
|
|
10706
|
+
else options.signal?.addEventListener("abort", onAbort, { once: true });
|
|
10707
|
+
const cleanupAbort = () => options.signal?.removeEventListener("abort", onAbort);
|
|
10708
|
+
child.stdout?.on("data", (data) => {
|
|
10709
|
+
stdout += data.toString();
|
|
10710
|
+
});
|
|
10711
|
+
child.stderr?.on("data", (data) => {
|
|
10712
|
+
stderr += data.toString();
|
|
10713
|
+
});
|
|
10714
|
+
child.on("error", (error) => {
|
|
10715
|
+
cleanupAbort();
|
|
10716
|
+
reject(error);
|
|
10717
|
+
});
|
|
10718
|
+
child.on("close", (code) => {
|
|
10719
|
+
cleanupAbort();
|
|
10720
|
+
resolve({
|
|
10721
|
+
code,
|
|
10722
|
+
stdout,
|
|
10723
|
+
stderr
|
|
10724
|
+
});
|
|
10725
|
+
});
|
|
10726
|
+
});
|
|
10727
|
+
}
|
|
10270
10728
|
modelAuditRouter.get("/check-installed", async (_req, res) => {
|
|
10271
10729
|
try {
|
|
10272
10730
|
const { installed, version } = await checkModelAuditInstalled();
|
|
@@ -10283,6 +10741,34 @@ modelAuditRouter.get("/check-installed", async (_req, res) => {
|
|
|
10283
10741
|
}));
|
|
10284
10742
|
}
|
|
10285
10743
|
});
|
|
10744
|
+
modelAuditRouter.get("/scanners", async (req, res) => {
|
|
10745
|
+
const abortController = new AbortController();
|
|
10746
|
+
const onClientClose = () => abortController.abort();
|
|
10747
|
+
req.on("close", onClientClose);
|
|
10748
|
+
try {
|
|
10749
|
+
const { installed } = await checkModelAuditInstalled();
|
|
10750
|
+
if (!installed) {
|
|
10751
|
+
res.status(400).json({ error: "ModelAudit is not installed. Please install it using: pip install modelaudit" });
|
|
10752
|
+
return;
|
|
10753
|
+
}
|
|
10754
|
+
const { code, stdout, stderr } = await spawnModelAuditCapture(LIST_SCANNERS_ARGS, { signal: abortController.signal });
|
|
10755
|
+
if (abortController.signal.aborted) return;
|
|
10756
|
+
if (code !== null && code !== 0) {
|
|
10757
|
+
sendError(res, 500, "Failed to list ModelAudit scanners", {
|
|
10758
|
+
code,
|
|
10759
|
+
stderr
|
|
10760
|
+
});
|
|
10761
|
+
return;
|
|
10762
|
+
}
|
|
10763
|
+
const parsedOutput = JSON.parse(stdout);
|
|
10764
|
+
res.json(ModelAuditSchemas.ListScanners.Response.parse(parsedOutput));
|
|
10765
|
+
} catch (error) {
|
|
10766
|
+
if (abortController.signal.aborted) return;
|
|
10767
|
+
sendError(res, 500, "Failed to list ModelAudit scanners", error);
|
|
10768
|
+
} finally {
|
|
10769
|
+
req.removeListener("close", onClientClose);
|
|
10770
|
+
}
|
|
10771
|
+
});
|
|
10286
10772
|
modelAuditRouter.post("/check-path", async (req, res) => {
|
|
10287
10773
|
const bodyResult = ModelAuditSchemas.CheckPath.Request.safeParse(req.body);
|
|
10288
10774
|
if (!bodyResult.success) {
|
|
@@ -10353,11 +10839,12 @@ modelAuditRouter.post("/scan", async (req, res) => {
|
|
|
10353
10839
|
event: "model_scan",
|
|
10354
10840
|
pathCount: paths.length,
|
|
10355
10841
|
hasBlacklist: (options.blacklist?.length ?? 0) > 0,
|
|
10842
|
+
hasScannerSelection: Boolean(options.scanners?.length || options.excludeScanner?.length),
|
|
10356
10843
|
timeout: options.timeout ?? 0,
|
|
10357
10844
|
verbose: options.verbose ?? false,
|
|
10358
10845
|
persist
|
|
10359
10846
|
});
|
|
10360
|
-
const modelAudit = spawn("modelaudit", args);
|
|
10847
|
+
const modelAudit = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
|
|
10361
10848
|
let stdout = "";
|
|
10362
10849
|
let stderr = "";
|
|
10363
10850
|
let responded = false;
|
|
@@ -10543,9 +11030,20 @@ modelAuditRouter.post("/scan", async (req, res) => {
|
|
|
10543
11030
|
options: {
|
|
10544
11031
|
blacklist: options.blacklist,
|
|
10545
11032
|
timeout: options.timeout,
|
|
11033
|
+
maxSize: options.maxSize,
|
|
10546
11034
|
maxFileSize: options.maxFileSize,
|
|
10547
11035
|
maxTotalSize: options.maxTotalSize,
|
|
10548
|
-
verbose: options.verbose
|
|
11036
|
+
verbose: options.verbose,
|
|
11037
|
+
format: options.format,
|
|
11038
|
+
strict: options.strict,
|
|
11039
|
+
dryRun: options.dryRun,
|
|
11040
|
+
cache: options.cache,
|
|
11041
|
+
quiet: options.quiet,
|
|
11042
|
+
progress: options.progress,
|
|
11043
|
+
sbom: options.sbom,
|
|
11044
|
+
output: options.output,
|
|
11045
|
+
scanners: options.scanners,
|
|
11046
|
+
excludeScanner: options.excludeScanner
|
|
10549
11047
|
}
|
|
10550
11048
|
}
|
|
10551
11049
|
})).id;
|
|
@@ -10701,6 +11199,38 @@ function normalizeTargetPurposeDiscoveryResult(result) {
|
|
|
10701
11199
|
tools: cleanTools(result.tools)
|
|
10702
11200
|
};
|
|
10703
11201
|
}
|
|
11202
|
+
function extractStringField(value) {
|
|
11203
|
+
if (typeof value !== "string") return;
|
|
11204
|
+
return value.trim() || void 0;
|
|
11205
|
+
}
|
|
11206
|
+
async function getRemoteResponseErrorDetail(response) {
|
|
11207
|
+
const rawText = (await response.text()).trim();
|
|
11208
|
+
const fallback = rawText || response.statusText || "Unknown error";
|
|
11209
|
+
if (!rawText) return fallback;
|
|
11210
|
+
try {
|
|
11211
|
+
const parsed = JSON.parse(rawText);
|
|
11212
|
+
return extractStringField(parsed?.message) ?? extractStringField(parsed?.error) ?? fallback;
|
|
11213
|
+
} catch {
|
|
11214
|
+
return fallback;
|
|
11215
|
+
}
|
|
11216
|
+
}
|
|
11217
|
+
const REMOTE_ERROR_HINTS = {
|
|
11218
|
+
400: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
|
|
11219
|
+
401: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
|
|
11220
|
+
403: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
|
|
11221
|
+
404: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
|
|
11222
|
+
429: "You are being rate limited. Wait a moment and try again."
|
|
11223
|
+
};
|
|
11224
|
+
function getRemoteErrorHint(status) {
|
|
11225
|
+
if (REMOTE_ERROR_HINTS[status]) return REMOTE_ERROR_HINTS[status];
|
|
11226
|
+
if (status >= 500) return "The remote generation service may be temporarily unavailable. Retry in a few minutes or contact support if the issue persists.";
|
|
11227
|
+
}
|
|
11228
|
+
async function buildRemoteErrorFromResponse(response) {
|
|
11229
|
+
const detail = await getRemoteResponseErrorDetail(response);
|
|
11230
|
+
const hint = getRemoteErrorHint(response.status);
|
|
11231
|
+
const base = `Remote server returned HTTP ${response.status}: ${detail}`;
|
|
11232
|
+
return new Error(hint ? `${base}\n${hint}` : base);
|
|
11233
|
+
}
|
|
10704
11234
|
/**
|
|
10705
11235
|
* Queries Cloud for the purpose-discovery logic, sends each logic to the target,
|
|
10706
11236
|
* and summarizes the results.
|
|
@@ -10750,11 +11280,7 @@ async function doTargetPurposeDiscovery(target, prompt, showProgress = true) {
|
|
|
10750
11280
|
email: getUserEmail()
|
|
10751
11281
|
}))
|
|
10752
11282
|
});
|
|
10753
|
-
if (!response.ok)
|
|
10754
|
-
const error = await response.text();
|
|
10755
|
-
logger.error(`${LOG_PREFIX} Error getting the next question from remote server: ${error}`);
|
|
10756
|
-
continue;
|
|
10757
|
-
}
|
|
11283
|
+
if (!response.ok) throw await buildRemoteErrorFromResponse(response);
|
|
10758
11284
|
const responseData = await response.json();
|
|
10759
11285
|
const data = TargetPurposeDiscoveryTaskResponseSchema.parse(responseData);
|
|
10760
11286
|
logger.debug(`${LOG_PREFIX} Received response from remote server: ${JSON.stringify(data, null, 2)}`);
|
|
@@ -10971,7 +11497,7 @@ function determineEffectiveSessionSource({ provider, sessionConfig }) {
|
|
|
10971
11497
|
async function testProviderConnectivity({ provider, prompt = "Hello World!", inputs }) {
|
|
10972
11498
|
const vars = {};
|
|
10973
11499
|
if (!provider?.config?.sessionParser) vars["sessionId"] = crypto.randomUUID();
|
|
10974
|
-
if (inputs && typeof inputs === "object") for (const [varName,
|
|
11500
|
+
if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) vars[varName] = createPlaceholderInputValue(varName, definition);
|
|
10975
11501
|
const testSuite = {
|
|
10976
11502
|
providers: [provider],
|
|
10977
11503
|
prompts: [{
|
|
@@ -11202,10 +11728,15 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11202
11728
|
sessionConfig
|
|
11203
11729
|
});
|
|
11204
11730
|
const initialSessionId = effectiveSessionSource === "server" ? void 0 : crypto.randomUUID();
|
|
11731
|
+
const materializeSessionPrompt = (prompt) => {
|
|
11732
|
+
if (!mainInputVariable) return prompt;
|
|
11733
|
+
const definition = inputs?.[mainInputVariable];
|
|
11734
|
+
return definition ? createPlaceholderInputValue(mainInputVariable, definition, prompt) : prompt;
|
|
11735
|
+
};
|
|
11205
11736
|
const inputVars = {};
|
|
11206
|
-
if (inputs && typeof inputs === "object") for (const [varName,
|
|
11737
|
+
if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) {
|
|
11207
11738
|
if (varName === mainInputVariable) continue;
|
|
11208
|
-
inputVars[varName] =
|
|
11739
|
+
inputVars[varName] = createPlaceholderInputValue(varName, definition);
|
|
11209
11740
|
}
|
|
11210
11741
|
const firstPrompt = "What can you help me with?";
|
|
11211
11742
|
const secondPrompt = "What was the last thing I asked you?";
|
|
@@ -11218,7 +11749,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11218
11749
|
vars: {
|
|
11219
11750
|
...initialSessionId ? { sessionId: initialSessionId } : {},
|
|
11220
11751
|
...inputVars,
|
|
11221
|
-
...mainInputVariable ? { [mainInputVariable]: firstPrompt } : {}
|
|
11752
|
+
...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(firstPrompt) } : {}
|
|
11222
11753
|
},
|
|
11223
11754
|
prompt: {
|
|
11224
11755
|
raw: firstPrompt,
|
|
@@ -11265,7 +11796,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11265
11796
|
vars: {
|
|
11266
11797
|
...extractedSessionId ? { sessionId: extractedSessionId } : {},
|
|
11267
11798
|
...inputVars,
|
|
11268
|
-
...mainInputVariable ? { [mainInputVariable]: secondPrompt } : {}
|
|
11799
|
+
...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(secondPrompt) } : {}
|
|
11269
11800
|
},
|
|
11270
11801
|
prompt: {
|
|
11271
11802
|
raw: secondPrompt,
|
|
@@ -11790,7 +12321,7 @@ providersRouter.post("/test-session", async (req, res) => {
|
|
|
11790
12321
|
const TestCaseGenerationSchema = z.object({
|
|
11791
12322
|
plugin: z.object({
|
|
11792
12323
|
id: z.string().refine((val) => ALL_PLUGINS.includes(val), { message: `Invalid plugin ID. Must be one of: ${ALL_PLUGINS.join(", ")}` }),
|
|
11793
|
-
config: PluginConfigSchema.optional().prefault({})
|
|
12324
|
+
config: PluginConfigSchema.catchall(z.unknown()).optional().prefault({})
|
|
11794
12325
|
}),
|
|
11795
12326
|
strategy: z.object({
|
|
11796
12327
|
id: z.string().refine((val) => ALL_STRATEGIES.includes(val), { message: `Invalid strategy ID. Must be one of: ${ALL_STRATEGIES.join(", ")}` }),
|
|
@@ -12367,11 +12898,11 @@ redteamRouter.post("/:taskId", async (req, res) => {
|
|
|
12367
12898
|
}
|
|
12368
12899
|
const { taskId } = paramsResult.data;
|
|
12369
12900
|
const cloudFunctionUrl = getRemoteGenerationUrl();
|
|
12370
|
-
logger.debug(`Received ${taskId} task request
|
|
12901
|
+
logger.debug(`Received ${taskId} task request`, {
|
|
12371
12902
|
method: req.method,
|
|
12372
12903
|
url: req.url,
|
|
12373
|
-
body:
|
|
12374
|
-
})
|
|
12904
|
+
body: sanitizeObject(bodyResult.data, { context: "request body" })
|
|
12905
|
+
});
|
|
12375
12906
|
try {
|
|
12376
12907
|
logger.debug(`Sending request to cloud function: ${cloudFunctionUrl}`);
|
|
12377
12908
|
const response = await fetchWithProxy(cloudFunctionUrl, {
|
|
@@ -12743,7 +13274,7 @@ router.get("/", async (_req, res) => {
|
|
|
12743
13274
|
};
|
|
12744
13275
|
} catch (error) {
|
|
12745
13276
|
logger.debug(`Failed to fetch latest version: ${error}`);
|
|
12746
|
-
latestVersion = versionCache.latestVersion ?? "0.121.
|
|
13277
|
+
latestVersion = versionCache.latestVersion ?? "0.121.7";
|
|
12747
13278
|
}
|
|
12748
13279
|
}
|
|
12749
13280
|
const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
|
|
@@ -12752,7 +13283,7 @@ router.get("/", async (_req, res) => {
|
|
|
12752
13283
|
selfHosted,
|
|
12753
13284
|
isNpx
|
|
12754
13285
|
});
|
|
12755
|
-
const resolvedLatestVersion = latestVersion ?? "0.121.
|
|
13286
|
+
const resolvedLatestVersion = latestVersion ?? "0.121.7";
|
|
12756
13287
|
const response = {
|
|
12757
13288
|
currentVersion: VERSION,
|
|
12758
13289
|
latestVersion: resolvedLatestVersion,
|
|
@@ -15174,7 +15705,11 @@ const EXAMPLE_ALIASES = {
|
|
|
15174
15705
|
"google-adk-example": "integration-google-adk",
|
|
15175
15706
|
"google-sheets": "integration-google-sheets",
|
|
15176
15707
|
"gpt-4o-temperature-comparison": "compare-gpt-temperature",
|
|
15177
|
-
"gpt-4o-vs-4o-mini": "compare-gpt-
|
|
15708
|
+
"compare-gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
|
|
15709
|
+
"compare-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
|
|
15710
|
+
"compare-gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
|
|
15711
|
+
"gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
|
|
15712
|
+
"gpt-model-tiers": "compare-gpt-model-tiers",
|
|
15178
15713
|
"gpt-vs-claude-vs-gemini": "compare-gpt-vs-claude-vs-gemini",
|
|
15179
15714
|
"grok-4-political-bias": "redteam-grok-4-political-bias",
|
|
15180
15715
|
groq: "provider-groq",
|
|
@@ -15225,7 +15760,9 @@ const EXAMPLE_ALIASES = {
|
|
|
15225
15760
|
"node-package-typescript": "config-node-package-typescript",
|
|
15226
15761
|
nscale: "provider-nscale",
|
|
15227
15762
|
"nunjucks-custom-filters": "config-nunjucks-custom-filters",
|
|
15228
|
-
"
|
|
15763
|
+
"gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
|
|
15764
|
+
"gpt-model-tiers-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
|
|
15765
|
+
"openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
|
|
15229
15766
|
"openai-model-comparison": "compare-openai-models",
|
|
15230
15767
|
openclaw: "provider-openclaw",
|
|
15231
15768
|
"opencode-sdk": "provider-opencode-sdk/basic",
|
|
@@ -15597,8 +16134,8 @@ function shouldUseInkList() {
|
|
|
15597
16134
|
async function runInkList(options) {
|
|
15598
16135
|
const [React, { renderInteractive }, { ListApp }] = await Promise.all([
|
|
15599
16136
|
import("react"),
|
|
15600
|
-
import("./render-
|
|
15601
|
-
import("./ListApp-
|
|
16137
|
+
import("./render-DznWrxGO.js"),
|
|
16138
|
+
import("./ListApp-DLmM02JS.js")
|
|
15602
16139
|
]);
|
|
15603
16140
|
let result = { cancelled: false };
|
|
15604
16141
|
let resolveResult;
|
|
@@ -17906,8 +18443,8 @@ function registerRunEvaluationTool(server) {
|
|
|
17906
18443
|
}
|
|
17907
18444
|
filteredTestSuite.tests = filteredTests;
|
|
17908
18445
|
}
|
|
17909
|
-
const { evaluate } = await import("./evaluator-
|
|
17910
|
-
const evalRecord = await (await import("./eval-
|
|
18446
|
+
const { evaluate } = await import("./evaluator-DNdJF1Gv.js");
|
|
18447
|
+
const evalRecord = await (await import("./eval-DscR5iOM.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
|
|
17911
18448
|
logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
|
|
17912
18449
|
const startTime = Date.now();
|
|
17913
18450
|
const result = await evaluate(filteredTestSuite, evalRecord, {
|
|
@@ -18473,6 +19010,9 @@ function analyzeConfiguration(config) {
|
|
|
18473
19010
|
}
|
|
18474
19011
|
//#endregion
|
|
18475
19012
|
//#region src/commands/mcp/server.ts
|
|
19013
|
+
function setMcpTransport(transport) {
|
|
19014
|
+
Object.assign(process.env, { MCP_TRANSPORT: transport });
|
|
19015
|
+
}
|
|
18476
19016
|
/**
|
|
18477
19017
|
* Creates an MCP server with tools for interacting with promptfoo
|
|
18478
19018
|
*/
|
|
@@ -18508,7 +19048,7 @@ async function createMcpServer() {
|
|
|
18508
19048
|
*/
|
|
18509
19049
|
async function startHttpMcpServer(port) {
|
|
18510
19050
|
if (!Number.isInteger(port) || port < 1 || port > 65535) throw new Error(`Invalid port number: ${port}. Port must be an integer between 1 and 65535.`);
|
|
18511
|
-
|
|
19051
|
+
setMcpTransport("http");
|
|
18512
19052
|
const app = express();
|
|
18513
19053
|
app.use(express.json());
|
|
18514
19054
|
const mcpServer = await createMcpServer();
|
|
@@ -18565,7 +19105,7 @@ async function startHttpMcpServer(port) {
|
|
|
18565
19105
|
* Starts an MCP server with stdio transport
|
|
18566
19106
|
*/
|
|
18567
19107
|
async function startStdioMcpServer() {
|
|
18568
|
-
|
|
19108
|
+
setMcpTransport("stdio");
|
|
18569
19109
|
logger.transports.forEach((transport) => {
|
|
18570
19110
|
if (transport.constructor.name === "Console" || transport.name === "console") transport.silent = true;
|
|
18571
19111
|
});
|
|
@@ -19434,7 +19974,7 @@ function printErrorInformation(errorLogFile, debugLogFile) {
|
|
|
19434
19974
|
async function main() {
|
|
19435
19975
|
setupEnvFilesFromArgv();
|
|
19436
19976
|
initializeRunLogging();
|
|
19437
|
-
if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") process.env
|
|
19977
|
+
if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") Object.assign(process.env, { PROMPTFOO_DISABLE_UPDATE: "true" });
|
|
19438
19978
|
await checkForUpdates();
|
|
19439
19979
|
await runDbMigrations();
|
|
19440
19980
|
const { defaultConfig, defaultConfigPath } = await loadDefaultConfig();
|