promptfoo 0.121.5 → 0.121.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
- package/dist/src/{accounts-BIFntVWB.cjs → accounts-BVz5gHLK.cjs} +16 -10
- package/dist/src/{accounts-bnyHT7Ju.js → accounts-BWjqwsrf.js} +12 -7
- package/dist/src/{accounts-CLJHCDDb.js → accounts-D6IBfEE0.js} +13 -8
- package/dist/src/{accounts-CaLNYnf7.js → accounts-DAv_0iE7.js} +12 -7
- package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-BJKAkz2e.js} +2 -2
- package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-C-A92xhn.cjs} +8 -7
- package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-DmoS_S4B.js} +2 -2
- package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-GdToujHu.js} +2 -2
- package/dist/src/{agents-BGqaTDnr.js → agents-2C8NN6I1.js} +20 -9
- package/dist/src/{agents-n6vPqV3i.js → agents-BMAiSR2o.js} +4 -4
- package/dist/src/{agents-BYdMl1UE.js → agents-C-PGaxwj.js} +20 -9
- package/dist/src/{agents-BV9yFpXX.js → agents-C98cz5pl.js} +20 -9
- package/dist/src/{agents-WULPVjbH.cjs → agents-DB8Ub2Ld.cjs} +7 -6
- package/dist/src/{agents-DhxWMCtH.js → agents-Dwshy2H8.js} +4 -4
- package/dist/src/{agents-emVcx3yh.js → agents-mlKjx-cK.js} +2 -2
- package/dist/src/{agents-DiWmQYH9.cjs → agents-n2ej-c4H.cjs} +21 -10
- package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-Bi-laUlp.js} +6 -6
- package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-D0OFV4Vj.cjs} +7 -7
- package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-WyUK0wYy.js} +6 -6
- package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-v-63ZjEI.js} +6 -6
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-sCXUfaWo.js +1 -0
- package/dist/src/app/assets/index-BopgkZEh.js +388 -0
- package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
- package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
- package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
- package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
- package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
- package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
- package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/{audio-CScmnmEB.js → audio-DnEIHWZR.js} +3 -3
- package/dist/src/{audio-Da8U9IS5.js → audio-heR0mu0n.js} +3 -3
- package/dist/src/{audio-BvpTOArF.js → audio-wafFO1wn.js} +3 -3
- package/dist/src/{audio-C0vDeS0j.cjs → audio-x44tsxIo.cjs} +4 -4
- package/dist/src/authoritativeMarkupInjection-2G3Be6oL.cjs +74 -0
- package/dist/src/authoritativeMarkupInjection-81PPynHR.js +75 -0
- package/dist/src/authoritativeMarkupInjection-D9O70HPi.js +74 -0
- package/dist/src/authoritativeMarkupInjection-PxSf3Rh8.js +74 -0
- package/dist/src/{base-dYsl2hmL.js → base-0Gzzue9Z.js} +4 -3
- package/dist/src/{base-BOMaNEes.js → base-Bp4c52YZ.js} +4 -3
- package/dist/src/{base-BTux96b1.js → base-BuNn-YBX.js} +4 -3
- package/dist/src/{base-Tw6uhH8K.cjs → base-BzdS8tod.cjs} +5 -4
- package/dist/src/bestOfN-BHiOjeaq.js +136 -0
- package/dist/src/bestOfN-CdkNPPbX.js +136 -0
- package/dist/src/bestOfN-ClUSOhN0.js +137 -0
- package/dist/src/bestOfN-DWwXEg1h.cjs +140 -0
- package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
- package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
- package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
- package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
- package/dist/src/cache-BKYi3kAA.js +3 -0
- package/dist/src/{cache-BI5BY7ey.js → cache-BR77mdIR.js} +127 -11
- package/dist/src/{cache-Cr-qWIbP.js → cache-CrioYnaa.js} +125 -10
- package/dist/src/{cache-Bzttsk0X.js → cache-DdriHsNX.js} +125 -10
- package/dist/src/{cache-DGg-yTZG.cjs → cache-h5MWOBZI.cjs} +135 -25
- package/dist/src/{chat-Cx_LkwvZ.js → chat-BBEnnpQk.js} +11 -11
- package/dist/src/{chat-DChSH_Es.js → chat-BSos6PvZ.js} +9 -9
- package/dist/src/{chat-aMQZw6R7.js → chat-Bnt7ieO0.js} +4 -4
- package/dist/src/{chat-vYqqv1gP.cjs → chat-DSyYuTYT.cjs} +14 -13
- package/dist/src/{chat-BLOdH60v.js → chat-DTdf-J5Q.js} +11 -11
- package/dist/src/{chat-DH97tVV9.cjs → chat-Dq3DomYU.cjs} +6 -6
- package/dist/src/{chat-DG2LkwLq.js → chat-g5QLeLOo.js} +4 -4
- package/dist/src/{chat-D9nudO9b.js → chat-mTTuUAYb.js} +4 -4
- package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
- package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
- package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
- package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
- package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-BGUac_kS.js} +213 -66
- package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-BP__YGfK.js} +213 -66
- package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-Ddgx5BIQ.js} +212 -65
- package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-n2XcEclh.cjs} +219 -71
- package/dist/src/cloud-DiWbUiVP.js +3 -0
- package/dist/src/{cloud-Da0bofJd.js → cloud-DqF5N1aJ.js} +2 -2
- package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-BGamMotN.js} +4 -4
- package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-BwW8W-w7.js} +4 -4
- package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-DCRGnsyL.cjs} +6 -6
- package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-sD26nP6V.js} +4 -4
- package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-2lnOT4qM.js} +3 -3
- package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-B1-8KNCt.js} +3 -3
- package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-CCC1DFrC.cjs} +5 -5
- package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-DOghiU6r.js} +3 -3
- package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-BWeWamEb.js} +19 -4
- package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-DyQB1P0p.js} +20 -5
- package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-R9u_G7W9.cjs} +27 -11
- package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-ZQRJSJjU.js} +20 -5
- package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-B62H0fe7.js} +16 -5
- package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-BRPUvJG8.cjs} +31 -12
- package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-Dio1zJBS.js} +18 -7
- package/dist/src/codex-sdk-DxukZs_K.js +3 -0
- package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-iEmW1eS2.js} +18 -7
- package/dist/src/{cometapi-B5ImDlSm.js → cometapi--hh7dESS.js} +7 -7
- package/dist/src/{cometapi-CC7hWxmX.js → cometapi-C7yWNGqt.js} +7 -7
- package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CSIi16a0.js} +7 -7
- package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-sZgBZtbU.cjs} +8 -8
- package/dist/src/{completion-DtQ72Bm3.cjs → completion-CWtqdn3z.cjs} +6 -6
- package/dist/src/{completion-Vq_ad618.js → completion-DT8cxo9T.js} +4 -4
- package/dist/src/{completion-2iuYVxwi.js → completion-DUScduXp.js} +5 -5
- package/dist/src/{completion-CrD6MQ93.js → completion-D_2IOAoS.js} +5 -5
- package/dist/src/constants-BjJV0cRr.js +6 -0
- package/dist/src/constants-DH5XYLKZ.js +7 -0
- package/dist/src/constants-DZGEFLsu.js +6 -0
- package/dist/src/constants-a2kYssQk.cjs +11 -0
- package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
- package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
- package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
- package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
- package/dist/src/crescendo-B41TwUHM.js +722 -0
- package/dist/src/crescendo-Bfic7KC4.js +724 -0
- package/dist/src/crescendo-DXFB7rHP.cjs +725 -0
- package/dist/src/crescendo-DvvYxMLA.js +723 -0
- package/dist/src/custom-BdzuqFTN.js +619 -0
- package/dist/src/custom-Cf5Q6r-P.js +618 -0
- package/dist/src/custom-DLgufezC.js +620 -0
- package/dist/src/custom-LEXHCRe3.cjs +621 -0
- package/dist/src/{docker--3qzPa-6.js → docker-BOSO_6hK.js} +5 -5
- package/dist/src/{docker-D3AY-5F5.cjs → docker-D0h2vFrc.cjs} +6 -6
- package/dist/src/{docker-DCsCDvwM.js → docker-D2TWGyTP.js} +5 -5
- package/dist/src/{docker-Dorv4_Dg.js → docker-Vj_4_cPg.js} +5 -5
- package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BQIApR18.cjs} +6 -6
- package/dist/src/{embedding-ChS1ivFS.js → embedding-CStK0TV6.js} +5 -5
- package/dist/src/{embedding-DNRvZwRN.js → embedding-CU78FMnw.js} +5 -5
- package/dist/src/{embedding-D_bI4NDq.js → embedding-CV8lmCnU.js} +4 -4
- package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
- package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
- package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
- package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
- package/dist/src/{eval-DJ_4A-tr.js → eval-B3BaNBbO.js} +19 -19
- package/dist/src/{eval-BQPLBJbw.js → eval-DfR9885C.js} +1 -1
- package/dist/src/evalResult-BtZSUgQv.js +3 -0
- package/dist/src/{evalResult-D6P5I5il.js → evalResult-CcSqNl_Y.js} +17 -10
- package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-DHXs-9TL.cjs} +29 -11
- package/dist/src/{evalResult-pSvGWFMo.js → evalResult-PGqEbasb.js} +18 -11
- package/dist/src/evaluator-B9LGbKI8.js +3 -0
- package/dist/src/{evaluator-D-UIbbYq.js → evaluator-n_dEb00o.js} +258 -132
- package/dist/src/evaluatorHelpers-CrRObe2z.js +510 -0
- package/dist/src/evaluatorHelpers-D06I9WFL.cjs +537 -0
- package/dist/src/evaluatorHelpers-D1_kwvyp.js +511 -0
- package/dist/src/{extractor-DxyiFhPk.js → extractor-BVkZtk4R.js} +5 -5
- package/dist/src/{extractor-BM3jRERL.js → extractor-D3Fv_Tdh.js} +5 -5
- package/dist/src/{extractor-YlZbUMsL.js → extractor-Jp53vs-6.js} +5 -5
- package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-bV_NOoaz.cjs} +6 -6
- package/dist/src/{fetch-Y5qX_kST.js → fetch-BSSAcMxf.js} +90 -26
- package/dist/src/fetch-BodQTrMU.js +3 -0
- package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Cfm4EuOB.cjs} +115 -32
- package/dist/src/{fetch-B6ch2nU2.js → fetch-Cpf1U1nO.js} +86 -26
- package/dist/src/{fetch-D9xxyC1p.js → fetch-Doks14zQ.js} +90 -26
- package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
- package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
- package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
- package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
- package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
- package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
- package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
- package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
- package/dist/src/goat-Ba7Gznzq.js +467 -0
- package/dist/src/goat-CJngS-WU.js +468 -0
- package/dist/src/goat-CwsbfQeu.js +466 -0
- package/dist/src/goat-DOMbozoX.cjs +470 -0
- package/dist/src/graders-B3D7kCcD.js +3 -0
- package/dist/src/{graders-CgPn32yp.js → graders-BQt1BaQe.js} +1019 -86
- package/dist/src/{graders-Bw1wk_21.cjs → graders-CBQ2s6gz.cjs} +1087 -130
- package/dist/src/{graders-CwrbifOo.js → graders-DaRU98zs.js} +1022 -84
- package/dist/src/{graders-BoUqsCEm.js → graders-KXzjnIim.js} +1024 -86
- package/dist/src/hydra-BU6GjYoQ.js +560 -0
- package/dist/src/hydra-BmXHxlyl.js +559 -0
- package/dist/src/hydra-CR0KyYDb.js +561 -0
- package/dist/src/hydra-RO9jBks7.cjs +560 -0
- package/dist/src/{image-BeWaInPF.js → image-BBmZdKO3.js} +3 -3
- package/dist/src/{image-qjO6FWPs.js → image-B_PFG7IG.js} +3 -3
- package/dist/src/image-BtODOZjh.js +442 -0
- package/dist/src/image-CYuNJIqd.js +442 -0
- package/dist/src/{image-D10dNAav.cjs → image-E00WFZkq.cjs} +4 -4
- package/dist/src/image-HK2Cfqb1.cjs +465 -0
- package/dist/src/{image-Dr_3I3nK.js → image-LGj8dTcr.js} +3 -3
- package/dist/src/image-YMKejC0r.js +443 -0
- package/dist/src/index.cjs +711 -339
- package/dist/src/index.d.cts +3159 -1611
- package/dist/src/index.d.ts +3158 -1610
- package/dist/src/index.js +605 -249
- package/dist/src/indirectWebPwn-B80dLlFC.js +260 -0
- package/dist/src/indirectWebPwn-BMTXXznx.js +386 -0
- package/dist/src/indirectWebPwn-BZFPV7Q9.js +385 -0
- package/dist/src/indirectWebPwn-BaEQEOIO.cjs +260 -0
- package/dist/src/indirectWebPwn-BzpyMnFS.js +259 -0
- package/dist/src/indirectWebPwn-CiWB-vVH.js +385 -0
- package/dist/src/indirectWebPwn-D7NA9Nsv.cjs +397 -0
- package/dist/src/indirectWebPwn-o_bEFMjP.js +259 -0
- package/dist/src/inputVariables-B0qUChbV.js +467 -0
- package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
- package/dist/src/inputVariables-DXFdi7AI.js +468 -0
- package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
- package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
- package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
- package/dist/src/iterative-B-l0syBV.js +501 -0
- package/dist/src/iterative-BACUeCCz.cjs +503 -0
- package/dist/src/iterative-CVwoExo8.js +502 -0
- package/dist/src/iterative-QDrGSyss.js +500 -0
- package/dist/src/iterativeImage-BQg2OwA6.js +413 -0
- package/dist/src/iterativeImage-Bwn0fM75.js +413 -0
- package/dist/src/iterativeImage-CcgVyASo.cjs +415 -0
- package/dist/src/iterativeImage-D_UbQXg4.js +414 -0
- package/dist/src/iterativeMeta-CIu-CHRS.js +405 -0
- package/dist/src/iterativeMeta-CN8CNjFA.js +406 -0
- package/dist/src/iterativeMeta-DN6BTjpq.js +404 -0
- package/dist/src/iterativeMeta-DlqY3BsS.cjs +405 -0
- package/dist/src/iterativeTree-CJ8a8G9T.js +820 -0
- package/dist/src/iterativeTree-DcuFXnjL.js +819 -0
- package/dist/src/iterativeTree-FrDDYAN0.js +818 -0
- package/dist/src/iterativeTree-LOgAi0nU.cjs +821 -0
- package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-CdbcGBZF.cjs} +25 -11
- package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-CjOXp6Lr.js} +23 -9
- package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-DxAq4n4z.js} +24 -10
- package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-o_wTLzrt.js} +24 -10
- package/dist/src/{litellm-DRc4qWfc.js → litellm-B2gHwya_.js} +4 -4
- package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CPpdlO7n.cjs} +5 -5
- package/dist/src/{litellm-BLSiANhk.js → litellm-CYfgxLrM.js} +4 -4
- package/dist/src/{litellm-DQGo_juI.js → litellm-ojWBKU3C.js} +4 -4
- package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
- package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-BCCO9XXG.cjs} +9 -8
- package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BPrdihAb.js} +5 -5
- package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-DP5N79lB.js} +5 -5
- package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-DTDyoAlM.js} +5 -5
- package/dist/src/main.js +727 -234
- package/dist/src/memoryPoisoning-B6N3us35.cjs +106 -0
- package/dist/src/memoryPoisoning-Bc_BK_k2.js +106 -0
- package/dist/src/memoryPoisoning-D375zwSX.js +107 -0
- package/dist/src/memoryPoisoning-DJA0YjJT.js +106 -0
- package/dist/src/{messages-CewuNcNS.js → messages-Bg29Nbit.js} +17 -9
- package/dist/src/{messages-BnsVHUnm.cjs → messages-BrZEnHsV.cjs} +24 -15
- package/dist/src/{messages-CI69Lasb.js → messages-CBulRaud.js} +18 -10
- package/dist/src/{messages-B9dSjrNf.js → messages-K9A8RxBM.js} +18 -10
- package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
- package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
- package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
- package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
- package/dist/src/mischievousUser-BDLwSGN0.js +46 -0
- package/dist/src/mischievousUser-Blx-OtT3.js +47 -0
- package/dist/src/mischievousUser-CHuTTvBg.js +46 -0
- package/dist/src/mischievousUser-CSUrH3fq.cjs +46 -0
- package/dist/src/{modelslab-zpz9JcK0.js → modelslab-BjEk7yCP.js} +6 -6
- package/dist/src/{modelslab-BCLOtfek.js → modelslab-CBCrdIBc.js} +6 -6
- package/dist/src/{modelslab-D73OnKSx.js → modelslab-CrGpXuhv.js} +6 -6
- package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-Cvh0klQx.cjs} +7 -7
- package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-CUDDZcSA.js} +5 -5
- package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-CyDESu5J.js} +5 -5
- package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-DH_Ksu6X.cjs} +9 -8
- package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-YQAqUYJY.js} +5 -5
- package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-BVFFB7JZ.js} +2 -2
- package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-Bg3cxMMu.cjs} +5 -4
- package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-BhmnRGyh.js} +3 -3
- package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-DxmK95c1.js} +3 -3
- package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
- package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
- package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
- package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-CHQaZi_-.cjs} +17 -15
- package/dist/src/{openclaw-DA8U4DsD.js → openclaw-CWOm2eOr.js} +9 -8
- package/dist/src/{openclaw-DObVgpjC.js → openclaw-Cdk9JKfY.js} +9 -8
- package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-DhF8rUpI.js} +9 -8
- package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-BRjiPV-g.js} +38 -3
- package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-CPFMw0ed.cjs} +45 -9
- package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-CaSOCsBA.js} +39 -4
- package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-CqkyG8De.js} +39 -4
- package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
- package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
- package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
- package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
- package/dist/src/packageParser--MWTSrPW.js +36 -0
- package/dist/src/packageParser-CgE-ziRo.js +35 -0
- package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
- package/dist/src/packageParser-hwwSGnAZ.js +35 -0
- package/dist/src/processShim-BBxt7LKO.js +95 -0
- package/dist/src/processShim-BcGzU8fY.js +94 -0
- package/dist/src/processShim-C_z3aRvF.js +94 -0
- package/dist/src/processShim-DSY9BV2T.cjs +98 -0
- package/dist/src/promptLength-0qIHyhA5.js +71 -0
- package/dist/src/promptLength-4X-Wd8PG.js +72 -0
- package/dist/src/promptLength-B9nZEfO6.js +71 -0
- package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
- package/dist/src/promptfoo-BU4_0J85.js +238 -0
- package/dist/src/promptfoo-CReYAtfb.js +237 -0
- package/dist/src/promptfoo-ClChwT74.cjs +292 -0
- package/dist/src/promptfoo-CqEpj6Sr.js +237 -0
- package/dist/src/prompts-BYMtqPCw.js +259 -0
- package/dist/src/prompts-C-bqE1Yp.js +260 -0
- package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
- package/dist/src/prompts-DHhQsANy.js +259 -0
- package/dist/src/prompts-D_QpZ2Dm.js +271 -0
- package/dist/src/prompts-hNvWBD3z.cjs +284 -0
- package/dist/src/prompts-huDVH2CI.js +270 -0
- package/dist/src/prompts-p78Hul5i.cjs +289 -0
- package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
- package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
- package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
- package/dist/src/{providers-DT-GtF2t.js → providers-B7TyByfj.js} +789 -11925
- package/dist/src/{providers-CJh7iriU.js → providers-BGc7tDtQ.js} +796 -11872
- package/dist/src/{providers-Ctcc592x.js → providers-CSOp-bCm.js} +1 -1
- package/dist/src/{providers-eDShy16E.cjs → providers-V6RBuieY.cjs} +837 -12138
- package/dist/src/{providers-DRrerKra.js → providers-iH3Sw1yo.js} +851 -11938
- package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
- package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
- package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
- package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
- package/dist/src/{quiverai-CPKhWgaT.js → quiverai-58BtRRet.js} +3 -3
- package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-C1dYu5MW.js} +3 -3
- package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CNMIpZQg.js} +3 -3
- package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-fmSfU43k.cjs} +4 -4
- package/dist/src/registry-6Jw6ebor.cjs +124 -0
- package/dist/src/registry-BTGk2ZkB.js +124 -0
- package/dist/src/registry-BTNqmP5o.js +125 -0
- package/dist/src/registry-NGnOG2xa.js +124 -0
- package/dist/src/{server-gyd6d4Hc.js → remoteGeneration--D6WjzUm.js} +15 -108
- package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-BpIYlb_O.cjs} +30 -119
- package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-CJC3E0aW.js} +15 -147
- package/dist/src/remoteGeneration-D6UjE2JT.js +218 -0
- package/dist/src/render-BNTrbmBw.cjs +384 -0
- package/dist/src/render-CSP99NLm.js +348 -0
- package/dist/src/render-DFfDeYUK.js +347 -0
- package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
- package/dist/src/render-_6ur1fhE.js +347 -0
- package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
- package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
- package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
- package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
- package/dist/src/{responses-1ztiVYsx.js → responses-5Gf5HNOi.js} +11 -7
- package/dist/src/{responses-BiaBguAu.js → responses-BVi7xIdv.js} +13 -9
- package/dist/src/{responses-CF-ayauu.cjs → responses-CUARGrhY.cjs} +16 -11
- package/dist/src/{responses-B8haB-mD.js → responses-CrmWv6iz.js} +13 -9
- package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
- package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
- package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
- package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
- package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
- package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
- package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
- package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
- package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
- package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
- package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-BeVTKfrv.cjs} +65 -53
- package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-BwbV5U7_.js} +61 -50
- package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-ClAZ6Qwc.js} +61 -50
- package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-DY2qrOpq.js} +61 -50
- package/dist/src/{scanner-nOCWNIXa.js → scanner-BS-iFIp3.js} +6 -6
- package/dist/src/server/index.js +4115 -420
- package/dist/src/server-BHOEL8p8.cjs +126 -0
- package/dist/src/server-CbiJppij.js +107 -0
- package/dist/src/server-D18AAlAc.js +3 -0
- package/dist/src/server-DLYjSFm2.js +182 -0
- package/dist/src/server-JQYD_Nws.js +146 -0
- package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
- package/dist/src/shared-CRq0gGVf.js +1334 -0
- package/dist/src/shared-CSIGeGLl.js +1334 -0
- package/dist/src/shared-DNvim54U.js +1335 -0
- package/dist/src/shared-aHWko3P1.cjs +1436 -0
- package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
- package/dist/src/simulatedUser-C7sOFSF6.js +222 -0
- package/dist/src/simulatedUser-DH_7RzEQ.js +222 -0
- package/dist/src/simulatedUser-lgMMmniD.js +223 -0
- package/dist/src/simulatedUser-mnCUS9Bm.cjs +227 -0
- package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
- package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
- package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
- package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
- package/dist/src/storage-95htjpLs.js +834 -0
- package/dist/src/storage-A4WnAeN3.cjs +911 -0
- package/dist/src/storage-B2Ql_oq4.js +822 -0
- package/dist/src/storage-EKVWZBNY.js +875 -0
- package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
- package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
- package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
- package/dist/src/store-IbiRIF3k.js +3 -0
- package/dist/src/strategies-CBI1brSy.js +2333 -0
- package/dist/src/strategies-CCcnEbFO.cjs +2360 -0
- package/dist/src/strategies-CD1gHeeQ.js +2331 -0
- package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
- package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
- package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
- package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
- package/dist/src/telemetry-BUm_krVX.js +3 -0
- package/dist/src/{telemetry--iqaGyaS.js → telemetry-BbpmrenM.js} +4 -4
- package/dist/src/{telemetry-CgdVGV8N.js → telemetry-C4bX-6Sr.js} +4 -4
- package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-C_ImlCOk.cjs} +15 -9
- package/dist/src/{telemetry-DWdGHvEf.js → telemetry-DOE567Wj.js} +4 -4
- package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
- package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
- package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
- package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
- package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
- package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
- package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
- package/dist/src/toolAttributes-COVgDrBG.js +87 -0
- package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
- package/dist/src/tracingOptions-BOP1FsRt.js +220 -0
- package/dist/src/tracingOptions-BvmDa_jH.js +219 -0
- package/dist/src/tracingOptions-DFBoE8O0.cjs +249 -0
- package/dist/src/tracingOptions-xhjOg2b1.js +221 -0
- package/dist/src/{transcription-D7Q0vJsh.js → transcription-BOXDoehT.js} +4 -4
- package/dist/src/{transcription-Bm2emLmJ.js → transcription-CpVdKc4P.js} +5 -5
- package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-D8kkPXg_.cjs} +9 -8
- package/dist/src/{transcription-84t4ALo2.js → transcription-Z94eV9LR.js} +5 -5
- package/dist/src/transform-BnSTnFlp.js +187 -0
- package/dist/src/transform-BnSXWmU_2.cjs +221 -0
- package/dist/src/{transform-DtooZqYY.js → transform-BufxPIQL.js} +8 -8
- package/dist/src/transform-CGt7Kt3y2.js +186 -0
- package/dist/src/transform-CrPGTsij.js +186 -0
- package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DJkt81VY.cjs} +12 -11
- package/dist/src/{transform-B-b6Cq-q.js → transform-DtTfiGoh.js} +8 -8
- package/dist/src/{transform-_DpNB4qp.js → transform-Wp6s_5QE.js} +8 -8
- package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
- package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
- package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
- package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
- package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
- package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
- package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
- package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
- package/dist/src/{util-Dpmm_dAI.cjs → util-BHCAeuXx.cjs} +33 -7
- package/dist/src/{util-DFPeFkiV.js → util-CMrHV35u.js} +28 -8
- package/dist/src/{util-DvpHnLt0.cjs → util-COnzevXE.cjs} +29 -21
- package/dist/src/{util-BlFVL0UF.js → util-CYev3d-r.js} +22 -7
- package/dist/src/util-DGqkTb2-.js +327 -0
- package/dist/src/util-DNtUsus_.cjs +386 -0
- package/dist/src/{util-Dub0f_ej.js → util-DRfqa4xz.js} +17 -10
- package/dist/src/{util-BVXcTwXu.js → util-DTq3jq2z.js} +28 -8
- package/dist/src/{util-C-kmRosx.js → util-DeY58psG.js} +22 -7
- package/dist/src/util-DpV6KT5i.js +327 -0
- package/dist/src/{util-3pBZZb_H.js → util-DwNJzqOV.js} +45 -10
- package/dist/src/util-ETfU_sS9.js +328 -0
- package/dist/src/{util-A5_ZsQUn.cjs → util-NYQvo1C7.cjs} +25 -9
- package/dist/src/{util-DN0-b81k.js → util-X4KQgyVD.js} +28 -8
- package/dist/src/{util-BQOCAHQC.js → util-jZRrXe1P.js} +46 -11
- package/dist/src/{util-B9CNhyac.js → util-o2Qg5rZv.js} +22 -7
- package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
- package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
- package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
- package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
- package/dist/src/{version-CbuBKu2U.js → version-CHR-EFec.js} +2 -2
- package/dist/src/{version-CbpiUINz.js → version-F0YDgb7J.js} +2 -2
- package/dist/src/{version-D9zu9FWB.cjs → version-VzUqOBZk.cjs} +2 -2
- package/dist/src/{version-0frU0UTr.js → version-qVEN5qCm.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +49 -23
- package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
- package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
- package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
- package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
- package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
- package/dist/src/cache-BRkhlH3k.cjs +0 -3
- package/dist/src/cache-BlC6aeJ0.js +0 -3
- package/dist/src/cloud-CoD5OacT.js +0 -3
- package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
- package/dist/src/codex-sdk-Danroptg.cjs +0 -2
- package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
- package/dist/src/evalResult-BBK58h2B.js +0 -3
- package/dist/src/evalResult-spPqh1G_.js +0 -2
- package/dist/src/evaluator-DgLKaZk8.js +0 -3
- package/dist/src/fetch-8viavNv8.js +0 -3
- package/dist/src/graders-C84JI-m5.js +0 -2
- package/dist/src/graders-CBbd0K0Q.cjs +0 -2
- package/dist/src/graders-CbQqpHSN.js +0 -3
- package/dist/src/graders-DS42d3ZG.js +0 -2
- package/dist/src/image-BmilRNqO.js +0 -258
- package/dist/src/image-CxJoa3aW.cjs +0 -280
- package/dist/src/image-DsGRlkh7.js +0 -257
- package/dist/src/image-a_SGUobh.js +0 -257
- package/dist/src/providers-BuyzKt7C.js +0 -2
- package/dist/src/providers-C7lNVBjX.cjs +0 -3
- package/dist/src/providers-CCE2COJi2.js +0 -2
- package/dist/src/render-7uNJ2V14.js +0 -135
- package/dist/src/render-DlscvAUJ.js +0 -135
- package/dist/src/render-eui5p5mL.js +0 -136
- package/dist/src/render-tG6ir9_g.cjs +0 -165
- package/dist/src/rubyUtils-4hjGxvju.js +0 -3
- package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
- package/dist/src/server-ByiF3qlg.js +0 -386
- package/dist/src/server-C0XKRNB_.cjs +0 -2
- package/dist/src/server-C_15p79-.js +0 -3
- package/dist/src/store-2OXm_eBY.js +0 -240
- package/dist/src/store-BELqNwvz.js +0 -3
- package/dist/src/store-uQZ4AjPe.cjs +0 -2
- package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
- package/dist/src/telemetry-ZdPZc0fm.js +0 -3
- package/dist/src/transform-BQt0BeAW.js +0 -3
- package/dist/src/transform-Bq5oqC0s.cjs +0 -2
- package/dist/src/transform-C9izGX54.cjs +0 -228
- package/dist/src/transform-CwbAZ84V.js +0 -216
- package/dist/src/transform-DzCF-wqV.js +0 -213
- package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/main.js
CHANGED
|
@@ -1,35 +1,45 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
|
|
3
|
-
import {
|
|
4
|
-
import { n as VERSION } from "./version-
|
|
3
|
+
import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, M as TERMINAL_MAX_WIDTH, N as getDefaultPort, O as CLOUD_PROVIDER_PREFIX, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, _ as REQUEST_TIMEOUT_MS, f as sleep, i as fetchWithTimeout, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, x as isPromptfooSampleTarget } from "./fetch-Cpf1U1nO.js";
|
|
4
|
+
import { n as VERSION } from "./version-F0YDgb7J.js";
|
|
5
5
|
import { t as invariant } from "./invariant-B2Rf6avk.js";
|
|
6
|
-
import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-
|
|
7
|
-
import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-
|
|
8
|
-
import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-
|
|
9
|
-
import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import { a as
|
|
13
|
-
import {
|
|
14
|
-
import { a as
|
|
15
|
-
import { i as
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
import { n as
|
|
20
|
-
import { a as
|
|
21
|
-
import { n as
|
|
22
|
-
import { n as escapeRegExp, t as ellipsize } from "./text-
|
|
23
|
-
import { n as
|
|
24
|
-
import { n as
|
|
25
|
-
import {
|
|
26
|
-
import {
|
|
27
|
-
import {
|
|
28
|
-
import {
|
|
29
|
-
import {
|
|
30
|
-
import {
|
|
31
|
-
import {
|
|
32
|
-
import { n as
|
|
6
|
+
import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-D6IBfEE0.js";
|
|
7
|
+
import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-DqF5N1aJ.js";
|
|
8
|
+
import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-DOE567Wj.js";
|
|
9
|
+
import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, _t as CODING_AGENT_PLUGINS, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BFevViUY.js";
|
|
10
|
+
import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
|
|
11
|
+
import { r as importModule, t as getDirectory } from "./esm-Bexx2PFc.js";
|
|
12
|
+
import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, t as renderEnvOnlyInObject } from "./render-CSP99NLm.js";
|
|
13
|
+
import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-D6UjE2JT.js";
|
|
14
|
+
import { a as openAuthBrowser, i as checkServerRunning, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-DLYjSFm2.js";
|
|
15
|
+
import { _ as makeRequest$1, c as getCloudDatabaseId, d as getOrgContext, f as getPluginSeverityOverridesFromCloud, g as isCloudProvider, h as getUserTeams, i as retrieveMedia, l as getConfigFromCloud, m as getProviderFromCloud, o as canCreateTargets, r as mediaExists, s as checkCloudPermissions, t as getMediaStorage, u as getEvalConfigFromCloud, v as resolveTeamFromIdentifier, y as resolveTeamId } from "./storage-EKVWZBNY.js";
|
|
16
|
+
import { n as sha256, t as randomSequence } from "./createHash-CgRvs4Fn.js";
|
|
17
|
+
import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-BR77mdIR.js";
|
|
18
|
+
import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
|
|
19
|
+
import { D as normalizeProviderRef, L as readFilters, M as maybeLoadFromExternalFile, R as readOutput, _ as resultIsForTestCase, a as ComparisonEvalNotFoundError, b as getProviderDescription, c as getEvalTableOutputPromptLocationsBySize, d as fetchCsvFromGoogleSheet, f as setupEnv, g as getTestCaseDeduplicationKey, h as filterRuntimeVars, i as writeOutput, j as maybeLoadConfigFromExternalFile, l as getEvalTablePromptStrippedPayload, m as extractRuntimeVars, n as createOutputMetadata, o as evalTableToJson, p as deduplicateTestCases, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as mergeComparisonTables, v as checkProviderApiKeys, y as doesProviderRefMatch } from "./util-jZRrXe1P.js";
|
|
20
|
+
import { a as resolveProviderConfigs, d as MCPProvider, f as HttpProvider, i as resolveProvider, m as createTransformResponse, n as loadApiProvider, p as createTransformRequest, r as loadApiProviders, t as getProviderIds } from "./providers-B7TyByfj.js";
|
|
21
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
|
|
22
|
+
import { n as escapeRegExp, t as ellipsize } from "./text-Db-Wt2u2.js";
|
|
23
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-DafUHOeh.js";
|
|
24
|
+
import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DODuTK-a.js";
|
|
25
|
+
import { n as createPlaceholderInputValue, t as buildPromptInputDescriptions } from "./inputVariables-DXFdi7AI.js";
|
|
26
|
+
import { a as extractPromptFromTags, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, s as getShortPluginId } from "./util-ETfU_sS9.js";
|
|
27
|
+
import { a as assertRemoteMaterializationHandled, l as requiresRemoteMaterialization, n as PromptfooHarmfulCompletionProvider } from "./promptfoo-BU4_0J85.js";
|
|
28
|
+
import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as getDefaultProviders, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as readProviderPromptMap, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, X as processPrompts, Y as doRemoteGrading, Z as readPrompts, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-BQt1BaQe.js";
|
|
29
|
+
import { f as redteamProviderManager, m as TokenUsageTracker } from "./shared-DNvim54U.js";
|
|
30
|
+
import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-n_dEb00o.js";
|
|
31
|
+
import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
|
|
32
|
+
import { n as TRANSFORM_KEYS, t as INLINE_FUNCTION_LABEL } from "./transform-BnSTnFlp.js";
|
|
33
|
+
import { n as getTraceStore } from "./store-DKd5592Q.js";
|
|
34
|
+
import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CSurUUyV.js";
|
|
35
|
+
import { n as isBlobStorageEnabled } from "./extractor-BVkZtk4R.js";
|
|
36
|
+
import { n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-4X-Wd8PG.js";
|
|
37
|
+
import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-B3BaNBbO.js";
|
|
38
|
+
import { n as sanitizeProvider, t as EvalResult } from "./evalResult-CcSqNl_Y.js";
|
|
39
|
+
import { n as renderPrompt } from "./evaluatorHelpers-D1_kwvyp.js";
|
|
40
|
+
import { t as formatDuration } from "./formatDuration-CMVNrYvE.js";
|
|
41
|
+
import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-CBI1brSy.js";
|
|
42
|
+
import { n as shouldUseInkUI } from "./interactiveCheck-C4QlIuoR.js";
|
|
33
43
|
import { fileURLToPath } from "node:url";
|
|
34
44
|
import { Command, InvalidArgumentError } from "commander";
|
|
35
45
|
import { z } from "zod";
|
|
@@ -62,9 +72,9 @@ import process$1 from "process";
|
|
|
62
72
|
import $RefParser from "@apidevtools/json-schema-ref-parser";
|
|
63
73
|
import async from "async";
|
|
64
74
|
import { and, asc, count, desc, eq, inArray, isNotNull, like, or, sql } from "drizzle-orm";
|
|
65
|
-
import cliProgress from "cli-progress";
|
|
66
75
|
import { URL as URL$1 } from "url";
|
|
67
76
|
import editor from "@inquirer/editor";
|
|
77
|
+
import cliProgress from "cli-progress";
|
|
68
78
|
import debounce from "debounce";
|
|
69
79
|
import { LRUCache } from "lru-cache";
|
|
70
80
|
import chokidar from "chokidar";
|
|
@@ -101,7 +111,7 @@ function runCommand(program) {
|
|
|
101
111
|
hasGithubPr: !!cmdObj.githubPr,
|
|
102
112
|
hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
|
|
103
113
|
});
|
|
104
|
-
const { executeScan } = await import("./scanner-
|
|
114
|
+
const { executeScan } = await import("./scanner-BS-iFIp3.js");
|
|
105
115
|
await executeScan(repoPath, cmdObj);
|
|
106
116
|
});
|
|
107
117
|
}
|
|
@@ -2995,6 +3005,10 @@ function stripAuthFromUrl(urlString) {
|
|
|
2995
3005
|
}
|
|
2996
3006
|
}
|
|
2997
3007
|
async function handleEmailCollection(evalRecord) {
|
|
3008
|
+
if (evalRecord.author) {
|
|
3009
|
+
logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
|
|
3010
|
+
return;
|
|
3011
|
+
}
|
|
2998
3012
|
if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
|
|
2999
3013
|
let email = getUserEmail();
|
|
3000
3014
|
if (!email) {
|
|
@@ -3257,6 +3271,14 @@ function shouldShareResults(opts) {
|
|
|
3257
3271
|
return cloudConfig.isEnabled() && sharing !== false;
|
|
3258
3272
|
}
|
|
3259
3273
|
//#endregion
|
|
3274
|
+
//#region src/commands/eval/redteamWarning.ts
|
|
3275
|
+
function warnIfRedteamConfigHasNoTests(config, testSuite) {
|
|
3276
|
+
if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
|
|
3277
|
+
Warning: Config file has a redteam section but no test cases.
|
|
3278
|
+
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
3279
|
+
`));
|
|
3280
|
+
}
|
|
3281
|
+
//#endregion
|
|
3260
3282
|
//#region src/commands/eval/summary.ts
|
|
3261
3283
|
function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
|
|
3262
3284
|
if (wasAborted) {
|
|
@@ -3644,6 +3666,9 @@ function setupRetryCommand(program) {
|
|
|
3644
3666
|
}
|
|
3645
3667
|
//#endregion
|
|
3646
3668
|
//#region src/models/modelAudit.ts
|
|
3669
|
+
function getModelAuditSortColumn(sortField) {
|
|
3670
|
+
return modelAuditsTable[sortField];
|
|
3671
|
+
}
|
|
3647
3672
|
function createScanId(createdAt = /* @__PURE__ */ new Date()) {
|
|
3648
3673
|
return `scan-${randomSequence(3)}-${createdAt.toISOString().slice(0, 19)}`;
|
|
3649
3674
|
}
|
|
@@ -3684,8 +3709,10 @@ var ModelAudit = class ModelAudit {
|
|
|
3684
3709
|
this.issues = data.issues || data.results?.issues || null;
|
|
3685
3710
|
const issues = data.issues || data.results?.issues;
|
|
3686
3711
|
const resultsHasErrors = data.results?.has_errors ?? false;
|
|
3687
|
-
if (data.hasErrors === void 0)
|
|
3688
|
-
|
|
3712
|
+
if (data.hasErrors === void 0) {
|
|
3713
|
+
const hasActualErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
|
|
3714
|
+
this.hasErrors = hasActualErrors;
|
|
3715
|
+
} else this.hasErrors = data.hasErrors;
|
|
3689
3716
|
this.totalChecks = data.totalChecks;
|
|
3690
3717
|
this.passedChecks = data.passedChecks;
|
|
3691
3718
|
this.failedChecks = data.failedChecks;
|
|
@@ -3783,9 +3810,9 @@ var ModelAudit = class ModelAudit {
|
|
|
3783
3810
|
static async getMany(limit = 100, offset = 0, sortField = "createdAt", sortOrder = "desc", search) {
|
|
3784
3811
|
let query = getDb().select().from(modelAuditsTable);
|
|
3785
3812
|
if (search) query = query.where(or(like(modelAuditsTable.name, `%${search}%`), like(modelAuditsTable.modelPath, `%${search}%`), like(modelAuditsTable.id, `%${search}%`)));
|
|
3786
|
-
const sortColumn = sortField
|
|
3787
|
-
if (sortOrder === "asc") query = query.orderBy(asc(sortColumn));
|
|
3788
|
-
else query = query.orderBy(desc(sortColumn));
|
|
3813
|
+
const sortColumn = getModelAuditSortColumn(sortField);
|
|
3814
|
+
if (sortOrder === "asc") query = sortField === "id" ? query.orderBy(asc(sortColumn)) : query.orderBy(asc(sortColumn), asc(modelAuditsTable.id));
|
|
3815
|
+
else query = sortField === "id" ? query.orderBy(desc(sortColumn)) : query.orderBy(desc(sortColumn), desc(modelAuditsTable.id));
|
|
3789
3816
|
return (await query.limit(limit).offset(offset).all()).map((r) => new ModelAudit({
|
|
3790
3817
|
...r,
|
|
3791
3818
|
persisted: true
|
|
@@ -4144,14 +4171,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
4144
4171
|
state.resume = true;
|
|
4145
4172
|
state.retryMode = true;
|
|
4146
4173
|
} else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
|
|
4147
|
-
if (!cmdObj.envPath && commandLineOptions?.envPath) {
|
|
4174
|
+
if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
|
|
4148
4175
|
logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
|
|
4149
4176
|
setupEnv(commandLineOptions.envPath);
|
|
4150
4177
|
}
|
|
4151
|
-
|
|
4152
|
-
Warning: Config file has a redteam section but no test cases.
|
|
4153
|
-
Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
|
|
4154
|
-
`));
|
|
4178
|
+
warnIfRedteamConfigHasNoTests(config, testSuite);
|
|
4155
4179
|
if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
|
|
4156
4180
|
const maybeUrl = config.providers[0]?.config?.url;
|
|
4157
4181
|
if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
|
|
@@ -4259,7 +4283,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
4259
4283
|
${z.prettifyError(testSuiteSchema.error)}
|
|
4260
4284
|
|
|
4261
4285
|
Please review your promptfooconfig.yaml configuration.`));
|
|
4262
|
-
const
|
|
4286
|
+
const author = getAuthor();
|
|
4287
|
+
const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
|
|
4288
|
+
author,
|
|
4289
|
+
runtimeOptions: options
|
|
4290
|
+
}) : new Eval(config, {
|
|
4291
|
+
author,
|
|
4292
|
+
runtimeOptions: options
|
|
4293
|
+
});
|
|
4263
4294
|
const abortController = new AbortController();
|
|
4264
4295
|
const previousAbortSignal = evaluateOptions.abortSignal;
|
|
4265
4296
|
evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
|
|
@@ -6032,25 +6063,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
6032
6063
|
getAssertions(_prompt) {
|
|
6033
6064
|
return getHarmfulAssertions(this.harmCategory);
|
|
6034
6065
|
}
|
|
6035
|
-
promptsToTestCases(prompts) {
|
|
6066
|
+
async promptsToTestCases(prompts) {
|
|
6036
6067
|
const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
|
|
6037
6068
|
const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
|
|
6038
|
-
|
|
6069
|
+
const pluginId = getShortPluginId(this.harmCategory);
|
|
6070
|
+
return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
|
|
6039
6071
|
const vars = { [this.injectVar]: __prompt };
|
|
6040
|
-
|
|
6041
|
-
|
|
6042
|
-
|
|
6043
|
-
|
|
6072
|
+
let inputMaterialization;
|
|
6073
|
+
if (hasMultipleInputs) {
|
|
6074
|
+
let parsed;
|
|
6075
|
+
try {
|
|
6076
|
+
parsed = JSON.parse(__prompt);
|
|
6077
|
+
} catch (error) {
|
|
6078
|
+
logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
6079
|
+
}
|
|
6080
|
+
if (parsed) try {
|
|
6081
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
|
|
6082
|
+
materializationIndex,
|
|
6083
|
+
pluginId,
|
|
6084
|
+
provider: this.provider,
|
|
6085
|
+
purpose: this.purpose
|
|
6086
|
+
});
|
|
6087
|
+
Object.assign(vars, materializedVars.vars);
|
|
6088
|
+
inputMaterialization = materializedVars.metadata;
|
|
6089
|
+
} catch (error) {
|
|
6090
|
+
logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
|
|
6091
|
+
throw error;
|
|
6092
|
+
}
|
|
6093
|
+
}
|
|
6044
6094
|
return {
|
|
6045
6095
|
vars,
|
|
6046
6096
|
metadata: {
|
|
6047
6097
|
harmCategory: harmCategoryLabel,
|
|
6048
|
-
pluginId
|
|
6049
|
-
pluginConfig: this.config
|
|
6098
|
+
pluginId,
|
|
6099
|
+
pluginConfig: this.config,
|
|
6100
|
+
...inputMaterialization ? { inputMaterialization } : {}
|
|
6050
6101
|
},
|
|
6051
6102
|
assert: getHarmfulAssertions(this.harmCategory)
|
|
6052
6103
|
};
|
|
6053
|
-
});
|
|
6104
|
+
}));
|
|
6054
6105
|
}
|
|
6055
6106
|
};
|
|
6056
6107
|
//#endregion
|
|
@@ -6059,20 +6110,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
|
|
|
6059
6110
|
* Extract content from <Prompt> tags and parse JSON if inputs are defined.
|
|
6060
6111
|
* Returns the processed prompt and any additional vars extracted from JSON.
|
|
6061
6112
|
*/
|
|
6062
|
-
function processPromptForInputs(prompt,
|
|
6113
|
+
async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
|
|
6063
6114
|
let processedPrompt = prompt.trim();
|
|
6064
6115
|
const additionalVars = {};
|
|
6116
|
+
let additionalMetadata;
|
|
6065
6117
|
const extractedPrompt = extractPromptFromTags(processedPrompt);
|
|
6066
6118
|
if (extractedPrompt) processedPrompt = extractedPrompt;
|
|
6067
|
-
if (inputs && Object.keys(inputs).length > 0)
|
|
6068
|
-
|
|
6069
|
-
|
|
6070
|
-
|
|
6071
|
-
|
|
6119
|
+
if (inputs && Object.keys(inputs).length > 0) {
|
|
6120
|
+
let parsed;
|
|
6121
|
+
try {
|
|
6122
|
+
parsed = JSON.parse(processedPrompt);
|
|
6123
|
+
} catch (error) {
|
|
6124
|
+
logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
6125
|
+
}
|
|
6126
|
+
if (parsed) try {
|
|
6127
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
|
|
6128
|
+
materializationIndex,
|
|
6129
|
+
pluginId: plugin,
|
|
6130
|
+
provider,
|
|
6131
|
+
purpose
|
|
6132
|
+
});
|
|
6133
|
+
Object.assign(additionalVars, materializedVars.vars);
|
|
6134
|
+
additionalMetadata = materializedVars.metadata;
|
|
6135
|
+
} catch (error) {
|
|
6136
|
+
logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
|
|
6137
|
+
throw error;
|
|
6138
|
+
}
|
|
6072
6139
|
}
|
|
6073
6140
|
return {
|
|
6074
6141
|
processedPrompt,
|
|
6075
|
-
additionalVars
|
|
6142
|
+
additionalVars,
|
|
6143
|
+
additionalMetadata
|
|
6076
6144
|
};
|
|
6077
6145
|
}
|
|
6078
6146
|
async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
|
|
@@ -6093,15 +6161,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
|
|
|
6093
6161
|
};
|
|
6094
6162
|
const allPrompts = await retryWithDeduplication(generatePrompts, n);
|
|
6095
6163
|
const inputs = config?.inputs;
|
|
6096
|
-
return sampleArray(allPrompts, n).map((prompt) => {
|
|
6097
|
-
const { processedPrompt, additionalVars } = processPromptForInputs(prompt,
|
|
6164
|
+
return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
|
|
6165
|
+
const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
|
|
6098
6166
|
const testCase = createTestCase(injectVar, processedPrompt, plugin);
|
|
6099
6167
|
if (Object.keys(additionalVars).length > 0) testCase.vars = {
|
|
6100
6168
|
...testCase.vars,
|
|
6101
6169
|
...additionalVars
|
|
6102
6170
|
};
|
|
6171
|
+
if (additionalMetadata) testCase.metadata = {
|
|
6172
|
+
...testCase.metadata,
|
|
6173
|
+
inputMaterialization: additionalMetadata
|
|
6174
|
+
};
|
|
6103
6175
|
return testCase;
|
|
6104
|
-
});
|
|
6176
|
+
}));
|
|
6105
6177
|
}
|
|
6106
6178
|
//#endregion
|
|
6107
6179
|
//#region src/redteam/plugins/teenSafety/graderExamples.ts
|
|
@@ -6419,7 +6491,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
|
|
|
6419
6491
|
function computeModifiersFromConfig(config) {
|
|
6420
6492
|
const modifiers = { ...config?.modifiers };
|
|
6421
6493
|
if (config?.language && typeof config.language === "string") modifiers.language = config.language;
|
|
6422
|
-
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
6494
|
+
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
6423
6495
|
const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
6424
6496
|
if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
|
|
6425
6497
|
return modifiers;
|
|
@@ -6577,6 +6649,7 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
6577
6649
|
logger.error(`Error generating test cases for ${key}: ${statusText} ${JSON.stringify(data)}`);
|
|
6578
6650
|
return [];
|
|
6579
6651
|
}
|
|
6652
|
+
if (requiresRemoteMaterialization(config?.inputs)) assertRemoteMaterializationHandled(data, `Remote plugin generation for ${key}`);
|
|
6580
6653
|
const ret = data.result;
|
|
6581
6654
|
logger.debug(`Received remote generation for ${key}:\n${JSON.stringify(ret)}`);
|
|
6582
6655
|
return ret;
|
|
@@ -6595,13 +6668,14 @@ function createPluginFactory(PluginClass, key, validate) {
|
|
|
6595
6668
|
logger.debug(`Using local redteam generation for ${key}`);
|
|
6596
6669
|
return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
|
|
6597
6670
|
}
|
|
6671
|
+
const pluginId = getShortPluginId(key);
|
|
6598
6672
|
const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
|
|
6599
6673
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
6600
6674
|
return testCases.map((testCase) => ({
|
|
6601
6675
|
...testCase,
|
|
6602
6676
|
metadata: {
|
|
6603
6677
|
...testCase.metadata,
|
|
6604
|
-
pluginId
|
|
6678
|
+
pluginId,
|
|
6605
6679
|
pluginConfig: {
|
|
6606
6680
|
...configWithDefaults,
|
|
6607
6681
|
modifiers: computedModifiers
|
|
@@ -6658,7 +6732,7 @@ const pluginFactories = [
|
|
|
6658
6732
|
key: category,
|
|
6659
6733
|
action: async (params) => {
|
|
6660
6734
|
if (neverGenerateRemote()) {
|
|
6661
|
-
logger.error(`${category} plugin
|
|
6735
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
6662
6736
|
return [];
|
|
6663
6737
|
}
|
|
6664
6738
|
const testCases = await getHarmfulTests(params, category);
|
|
@@ -6681,13 +6755,14 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
|
|
|
6681
6755
|
key: category,
|
|
6682
6756
|
action: async (params) => {
|
|
6683
6757
|
if (shouldGenerateRemote()) {
|
|
6758
|
+
const pluginId = getShortPluginId(category);
|
|
6684
6759
|
const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
|
|
6685
6760
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
6686
6761
|
return testCases.map((testCase) => ({
|
|
6687
6762
|
...testCase,
|
|
6688
6763
|
metadata: {
|
|
6689
6764
|
...testCase.metadata,
|
|
6690
|
-
pluginId
|
|
6765
|
+
pluginId,
|
|
6691
6766
|
pluginConfig: {
|
|
6692
6767
|
...params.config,
|
|
6693
6768
|
modifiers: computedModifiers
|
|
@@ -6709,16 +6784,17 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
|
|
|
6709
6784
|
key: category,
|
|
6710
6785
|
action: async (params) => {
|
|
6711
6786
|
if (neverGenerateRemote()) {
|
|
6712
|
-
logger.error(`${category} plugin
|
|
6787
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
6713
6788
|
return [];
|
|
6714
6789
|
}
|
|
6790
|
+
const pluginId = getShortPluginId(category);
|
|
6715
6791
|
const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
|
|
6716
6792
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
6717
6793
|
return testCases.map((testCase) => ({
|
|
6718
6794
|
...testCase,
|
|
6719
6795
|
metadata: {
|
|
6720
6796
|
...testCase.metadata,
|
|
6721
|
-
pluginId
|
|
6797
|
+
pluginId,
|
|
6722
6798
|
pluginConfig: {
|
|
6723
6799
|
...params.config,
|
|
6724
6800
|
modifiers: computedModifiers
|
|
@@ -6734,16 +6810,17 @@ function createRemotePlugin(key, validate) {
|
|
|
6734
6810
|
action: async ({ purpose, injectVar, n, config }) => {
|
|
6735
6811
|
const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
|
|
6736
6812
|
if (neverGenerateRemote()) {
|
|
6737
|
-
logger.error(`${key} plugin
|
|
6813
|
+
logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
|
|
6738
6814
|
return [];
|
|
6739
6815
|
}
|
|
6816
|
+
const pluginId = getShortPluginId(key);
|
|
6740
6817
|
const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
|
|
6741
6818
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
6742
6819
|
const testsWithMetadata = testCases.map((testCase) => ({
|
|
6743
6820
|
...testCase,
|
|
6744
6821
|
metadata: {
|
|
6745
6822
|
...testCase.metadata,
|
|
6746
|
-
pluginId
|
|
6823
|
+
pluginId,
|
|
6747
6824
|
pluginConfig: {
|
|
6748
6825
|
...configWithDefaults,
|
|
6749
6826
|
modifiers: computedModifiers
|
|
@@ -6804,6 +6881,15 @@ async function validateSharpDependency(strategies, plugins, checkSharp = isSharp
|
|
|
6804
6881
|
}
|
|
6805
6882
|
//#endregion
|
|
6806
6883
|
//#region src/redteam/index.ts
|
|
6884
|
+
const MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY = "__promptfooMaterializedMultiInputPrompt";
|
|
6885
|
+
function getMaterializedMultiInputPromptSnapshot(metadata) {
|
|
6886
|
+
const snapshot = metadata?.[MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY];
|
|
6887
|
+
return typeof snapshot === "string" ? snapshot : void 0;
|
|
6888
|
+
}
|
|
6889
|
+
function getMaterializedMultiInputPromptMetadata(vars) {
|
|
6890
|
+
const prompt = vars?.[MULTI_INPUT_VAR];
|
|
6891
|
+
return typeof prompt === "string" ? { [MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY]: prompt } : void 0;
|
|
6892
|
+
}
|
|
6807
6893
|
function getPolicyText(metadata) {
|
|
6808
6894
|
if (!metadata || metadata.policy === void 0 || metadata.policy === null) return;
|
|
6809
6895
|
const policyValue = metadata.policy;
|
|
@@ -6813,6 +6899,44 @@ function getPolicyText(metadata) {
|
|
|
6813
6899
|
return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
|
|
6814
6900
|
}
|
|
6815
6901
|
}
|
|
6902
|
+
async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
|
|
6903
|
+
const inputs = testCase.metadata?.pluginConfig?.inputs;
|
|
6904
|
+
const inputMaterialization = testCase.metadata?.inputMaterialization;
|
|
6905
|
+
const materializedPromptSnapshot = getMaterializedMultiInputPromptSnapshot(testCase.metadata);
|
|
6906
|
+
const currentInjectVar = testCase.vars?.[injectVar];
|
|
6907
|
+
if (!inputs || Object.keys(inputs).length === 0 || !currentInjectVar) return {
|
|
6908
|
+
inputMaterialization,
|
|
6909
|
+
vars: testCase.vars
|
|
6910
|
+
};
|
|
6911
|
+
const promptChangedSinceMaterialization = typeof currentInjectVar === "string" && typeof materializedPromptSnapshot === "string" && currentInjectVar !== materializedPromptSnapshot;
|
|
6912
|
+
if (Boolean(inputMaterialization) && Object.keys(inputs).every((key) => Object.prototype.hasOwnProperty.call(testCase.vars ?? {}, key)) && !promptChangedSinceMaterialization) return {
|
|
6913
|
+
inputMaterialization,
|
|
6914
|
+
vars: testCase.vars
|
|
6915
|
+
};
|
|
6916
|
+
try {
|
|
6917
|
+
const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(currentInjectVar)), inputs, {
|
|
6918
|
+
materializationIndex,
|
|
6919
|
+
pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
|
|
6920
|
+
provider,
|
|
6921
|
+
purpose
|
|
6922
|
+
});
|
|
6923
|
+
return {
|
|
6924
|
+
inputMaterialization: materializedVars.metadata ? {
|
|
6925
|
+
...inputMaterialization,
|
|
6926
|
+
...materializedVars.metadata
|
|
6927
|
+
} : inputMaterialization,
|
|
6928
|
+
vars: {
|
|
6929
|
+
...testCase.vars,
|
|
6930
|
+
...materializedVars.vars
|
|
6931
|
+
}
|
|
6932
|
+
};
|
|
6933
|
+
} catch {
|
|
6934
|
+
return {
|
|
6935
|
+
inputMaterialization,
|
|
6936
|
+
vars: testCase.vars
|
|
6937
|
+
};
|
|
6938
|
+
}
|
|
6939
|
+
}
|
|
6816
6940
|
/**
|
|
6817
6941
|
* Gets the severity level for a plugin based on its ID and configuration.
|
|
6818
6942
|
* @param pluginId - The ID of the plugin.
|
|
@@ -6954,6 +7078,7 @@ const categories = {
|
|
|
6954
7078
|
foundation: FOUNDATION_PLUGINS,
|
|
6955
7079
|
harmful: Object.keys(HARM_PLUGINS),
|
|
6956
7080
|
"coding-agent:core": CODING_AGENT_CORE_PLUGINS,
|
|
7081
|
+
"coding-agent:all": CODING_AGENT_PLUGINS,
|
|
6957
7082
|
bias: BIAS_PLUGINS,
|
|
6958
7083
|
pii: PII_PLUGINS,
|
|
6959
7084
|
medical: MEDICAL_PLUGINS,
|
|
@@ -7021,7 +7146,8 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
7021
7146
|
...test.metadata?.modifiers,
|
|
7022
7147
|
...languageToAdd
|
|
7023
7148
|
},
|
|
7024
|
-
...languageToAdd
|
|
7149
|
+
...languageToAdd,
|
|
7150
|
+
...getMaterializedMultiInputPromptMetadata(test.vars)
|
|
7025
7151
|
}
|
|
7026
7152
|
};
|
|
7027
7153
|
}
|
|
@@ -7048,7 +7174,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
7048
7174
|
* @param injectVar - The variable to inject.
|
|
7049
7175
|
* @returns An array of new test cases generated by strategies.
|
|
7050
7176
|
*/
|
|
7051
|
-
async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
7177
|
+
async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
7052
7178
|
const newTestCases = [];
|
|
7053
7179
|
const strategyResults = {};
|
|
7054
7180
|
for (const strategy of strategies) {
|
|
@@ -7104,14 +7230,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
7104
7230
|
}
|
|
7105
7231
|
}
|
|
7106
7232
|
resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
|
|
7107
|
-
newTestCases.push(...resultTestCases.map((t) => {
|
|
7108
|
-
const
|
|
7109
|
-
let updatedVars = t.vars;
|
|
7110
|
-
if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
|
|
7111
|
-
const parsed = JSON.parse(String(t.vars[injectVar]));
|
|
7112
|
-
updatedVars = { ...t.vars };
|
|
7113
|
-
Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
|
|
7114
|
-
} catch {}
|
|
7233
|
+
newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
|
|
7234
|
+
const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
|
|
7115
7235
|
const strategyConfig = {
|
|
7116
7236
|
...strategy.config || {},
|
|
7117
7237
|
...maxCharsPerMessage ? { maxCharsPerMessage } : {},
|
|
@@ -7119,16 +7239,18 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
7119
7239
|
};
|
|
7120
7240
|
return {
|
|
7121
7241
|
...t,
|
|
7122
|
-
vars
|
|
7242
|
+
vars,
|
|
7123
7243
|
metadata: {
|
|
7124
7244
|
...t?.metadata || {},
|
|
7125
7245
|
...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
|
|
7126
7246
|
...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
|
|
7127
7247
|
...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
|
|
7128
|
-
...
|
|
7248
|
+
...inputMaterialization && { inputMaterialization },
|
|
7249
|
+
...Object.keys(strategyConfig).length > 0 && { strategyConfig },
|
|
7250
|
+
...getMaterializedMultiInputPromptMetadata(vars)
|
|
7129
7251
|
}
|
|
7130
7252
|
};
|
|
7131
|
-
}));
|
|
7253
|
+
})));
|
|
7132
7254
|
const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
|
|
7133
7255
|
const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
|
|
7134
7256
|
const applyNumTestsCap = (calculatedRequested) => {
|
|
@@ -7471,14 +7593,21 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7471
7593
|
}
|
|
7472
7594
|
});
|
|
7473
7595
|
const languageResults = await Promise.allSettled(languagePromises);
|
|
7474
|
-
for (const result of languageResults) if (result.status === "fulfilled") {
|
|
7596
|
+
for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
|
|
7475
7597
|
const { lang, tests, requested, generated } = result.value;
|
|
7476
7598
|
allPluginTests.push(...tests);
|
|
7477
7599
|
resultsPerLanguage[lang || "default"] = {
|
|
7478
7600
|
requested,
|
|
7479
7601
|
generated
|
|
7480
7602
|
};
|
|
7481
|
-
} else
|
|
7603
|
+
} else {
|
|
7604
|
+
const lang = languages[index];
|
|
7605
|
+
logger.warn(`[Language Processing] Error generating tests for ${plugin.id}: ${result.reason}`);
|
|
7606
|
+
resultsPerLanguage[lang || "default"] = {
|
|
7607
|
+
requested: plugin.numTests,
|
|
7608
|
+
generated: 0
|
|
7609
|
+
};
|
|
7610
|
+
}
|
|
7482
7611
|
logger.debug(`[Language Processing] Total tests generated for ${plugin.id}: ${allPluginTests.length} (across ${languages.length} language(s))`);
|
|
7483
7612
|
if (!Array.isArray(allPluginTests) || allPluginTests.length === 0) logger.warn(`Failed to generate tests for ${plugin.id}`);
|
|
7484
7613
|
else {
|
|
@@ -7512,35 +7641,51 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7512
7641
|
generated: allPluginTests.length
|
|
7513
7642
|
};
|
|
7514
7643
|
} else if (plugin.id.startsWith("file://")) try {
|
|
7515
|
-
const
|
|
7516
|
-
const
|
|
7517
|
-
|
|
7518
|
-
|
|
7644
|
+
const languageConfig = plugin.config?.language ?? language;
|
|
7645
|
+
const languages = Array.isArray(languageConfig) ? languageConfig : languageConfig ? [languageConfig] : [void 0];
|
|
7646
|
+
const allCustomTests = [];
|
|
7647
|
+
const resultsPerLanguage = {};
|
|
7648
|
+
const languagePromises = languages.map(async (lang) => {
|
|
7649
|
+
const resolvedConfig = {
|
|
7519
7650
|
...resolvePluginConfigWithMaxChars(plugin.config, maxCharsPerMessage),
|
|
7520
|
-
...
|
|
7651
|
+
...lang ? { language: lang } : {},
|
|
7652
|
+
...hasMultipleInputs ? { inputs } : {}
|
|
7521
7653
|
};
|
|
7522
|
-
const
|
|
7523
|
-
...
|
|
7654
|
+
const customPluginConfig = {
|
|
7655
|
+
...resolvedConfig,
|
|
7656
|
+
modifiers: buildRedteamModifiers({
|
|
7524
7657
|
maxCharsPerMessage,
|
|
7525
|
-
pluginConfig:
|
|
7658
|
+
pluginConfig: resolvedConfig,
|
|
7526
7659
|
testGenerationInstructions
|
|
7527
|
-
})
|
|
7528
|
-
...t.metadata?.modifiers
|
|
7660
|
+
})
|
|
7529
7661
|
};
|
|
7662
|
+
const testCasesWithMetadata = filterOversizedTestCases((await new CustomPlugin(redteamProvider, purpose, injectVar, plugin.id, customPluginConfig).generateTests(plugin.numTests, delay)).map((t) => addLanguageToPluginMetadata(t, lang, plugin, maxCharsPerMessage, testGenerationInstructions)), injectVar, `Custom plugin ${plugin.id}`, maxCharsPerMessage);
|
|
7530
7663
|
return {
|
|
7531
|
-
|
|
7532
|
-
|
|
7533
|
-
|
|
7534
|
-
|
|
7535
|
-
...includePluginConfig && { pluginConfig: pluginConfigWithMaxChars },
|
|
7536
|
-
severity: plugin.severity || getPluginSeverity(plugin.id, resolvePluginConfig(plugin.config)),
|
|
7537
|
-
modifiers
|
|
7538
|
-
}
|
|
7664
|
+
lang,
|
|
7665
|
+
tests: testCasesWithMetadata,
|
|
7666
|
+
requested: plugin.numTests,
|
|
7667
|
+
generated: testCasesWithMetadata.length
|
|
7539
7668
|
};
|
|
7540
|
-
})
|
|
7669
|
+
});
|
|
7670
|
+
const languageResults = await Promise.allSettled(languagePromises);
|
|
7671
|
+
for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
|
|
7672
|
+
const { lang, tests, requested, generated } = result.value;
|
|
7673
|
+
allCustomTests.push(...tests);
|
|
7674
|
+
resultsPerLanguage[lang || "default"] = {
|
|
7675
|
+
requested,
|
|
7676
|
+
generated
|
|
7677
|
+
};
|
|
7678
|
+
} else {
|
|
7679
|
+
const lang = languages[index];
|
|
7680
|
+
logger.warn(`[Language Processing] Error generating tests for custom plugin ${plugin.id}: ${result.reason}`);
|
|
7681
|
+
resultsPerLanguage[lang || "default"] = {
|
|
7682
|
+
requested: plugin.numTests,
|
|
7683
|
+
generated: 0
|
|
7684
|
+
};
|
|
7685
|
+
}
|
|
7541
7686
|
if (needsGoalExtraction) {
|
|
7542
|
-
logger.debug(`Extracting goal for ${
|
|
7543
|
-
for (const testCase of
|
|
7687
|
+
logger.debug(`Extracting goal for ${allCustomTests.length} custom tests from ${plugin.id}...`);
|
|
7688
|
+
for (const testCase of allCustomTests) {
|
|
7544
7689
|
const promptVar = testCase.vars?.[injectVar];
|
|
7545
7690
|
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
7546
7691
|
const policy = getPolicyText(testCase.metadata);
|
|
@@ -7548,13 +7693,21 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7548
7693
|
testCase.metadata.goal = extractedGoal;
|
|
7549
7694
|
}
|
|
7550
7695
|
}
|
|
7551
|
-
testCases.push(...
|
|
7552
|
-
logger.debug(`Added ${
|
|
7553
|
-
const
|
|
7554
|
-
|
|
7555
|
-
|
|
7556
|
-
|
|
7696
|
+
testCases.push(...allCustomTests);
|
|
7697
|
+
logger.debug(`Added ${allCustomTests.length} custom test cases from ${plugin.id}`);
|
|
7698
|
+
const baseDisplayId = getPluginDisplayId(plugin);
|
|
7699
|
+
if (languages.filter((lang) => lang !== void 0).length > 1) for (const [langKey, result] of Object.entries(resultsPerLanguage)) {
|
|
7700
|
+
const displayId = langKey === "en" ? baseDisplayId : `(${langKey}) ${baseDisplayId}`;
|
|
7701
|
+
pluginResults[displayId] = {
|
|
7702
|
+
requested: result.requested,
|
|
7703
|
+
generated: result.generated
|
|
7704
|
+
};
|
|
7705
|
+
}
|
|
7706
|
+
else pluginResults[baseDisplayId] = {
|
|
7707
|
+
requested: plugin.numTests * languages.length,
|
|
7708
|
+
generated: allCustomTests.length
|
|
7557
7709
|
};
|
|
7710
|
+
progressBar?.increment(plugin.numTests * languages.length);
|
|
7558
7711
|
} catch (e) {
|
|
7559
7712
|
logger.error(`Error generating tests for custom plugin ${plugin.id}: ${e}`);
|
|
7560
7713
|
const displayId = getPluginDisplayId(plugin);
|
|
@@ -7583,7 +7736,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7583
7736
|
targetIds,
|
|
7584
7737
|
...retryStrategy.config
|
|
7585
7738
|
};
|
|
7586
|
-
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
|
|
7739
|
+
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
|
|
7587
7740
|
pluginTestCases.push(...retryTestCases);
|
|
7588
7741
|
Object.assign(strategyResults, retryResults);
|
|
7589
7742
|
if (showProgressBar) progressBar?.increment(retryTestCases.length);
|
|
@@ -7591,7 +7744,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
|
|
|
7591
7744
|
checkAbort();
|
|
7592
7745
|
const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
|
|
7593
7746
|
if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
|
|
7594
|
-
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
7747
|
+
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
7595
7748
|
Object.assign(strategyResults, otherStrategyResults);
|
|
7596
7749
|
if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
|
|
7597
7750
|
const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
|
|
@@ -8346,6 +8499,12 @@ async function doRedteamRun(options) {
|
|
|
8346
8499
|
return evalResult;
|
|
8347
8500
|
}
|
|
8348
8501
|
//#endregion
|
|
8502
|
+
//#region src/types/transform.ts
|
|
8503
|
+
/** Runtime type guard for `TransformFunction` values. */
|
|
8504
|
+
function isTransformFunction(value) {
|
|
8505
|
+
return typeof value === "function";
|
|
8506
|
+
}
|
|
8507
|
+
//#endregion
|
|
8349
8508
|
//#region src/index.ts
|
|
8350
8509
|
/**
|
|
8351
8510
|
* Shallow-clone a test case so the caller can swap in resolved ApiProvider
|
|
@@ -8367,66 +8526,148 @@ function cloneTestForResolve(test) {
|
|
|
8367
8526
|
if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
|
|
8368
8527
|
return cloned;
|
|
8369
8528
|
}
|
|
8529
|
+
function toSerializableProviderRef(provider) {
|
|
8530
|
+
if (isApiProvider(provider)) return sanitizeProvider(provider);
|
|
8531
|
+
if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
|
|
8532
|
+
return provider;
|
|
8533
|
+
}
|
|
8534
|
+
function isRecord(value) {
|
|
8535
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
8536
|
+
}
|
|
8537
|
+
function withSerializableProvider(record) {
|
|
8538
|
+
if (!isApiProvider(record.provider)) return record;
|
|
8539
|
+
return {
|
|
8540
|
+
...record,
|
|
8541
|
+
provider: sanitizeProvider(record.provider)
|
|
8542
|
+
};
|
|
8543
|
+
}
|
|
8544
|
+
/**
|
|
8545
|
+
* Function-valued transforms are first-class at runtime but are silently dropped
|
|
8546
|
+
* by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
|
|
8547
|
+
* a function reference, so replace every `transform`-like field with a
|
|
8548
|
+
* `[inline function]: name` marker. Non-function values pass through unchanged.
|
|
8549
|
+
*
|
|
8550
|
+
* `droppedRef.value` is flipped to `true` the first time a function is replaced
|
|
8551
|
+
* so the caller can emit a single warning instead of logging per field.
|
|
8552
|
+
*/
|
|
8553
|
+
function replaceFunctionTransforms(record, droppedRef) {
|
|
8554
|
+
let result;
|
|
8555
|
+
for (const key of TRANSFORM_KEYS) {
|
|
8556
|
+
const value = record[key];
|
|
8557
|
+
if (!isTransformFunction(value)) continue;
|
|
8558
|
+
if (!result) result = { ...record };
|
|
8559
|
+
result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
|
|
8560
|
+
droppedRef.value = true;
|
|
8561
|
+
}
|
|
8562
|
+
return result ?? record;
|
|
8563
|
+
}
|
|
8564
|
+
function toSerializableAssertion(assertion, droppedRef) {
|
|
8565
|
+
if (!isRecord(assertion)) return assertion;
|
|
8566
|
+
let sanitizedAssertion = withSerializableProvider(assertion);
|
|
8567
|
+
sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
|
|
8568
|
+
if (Array.isArray(assertion.assert)) sanitizedAssertion = {
|
|
8569
|
+
...sanitizedAssertion,
|
|
8570
|
+
assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
8571
|
+
};
|
|
8572
|
+
return sanitizedAssertion;
|
|
8573
|
+
}
|
|
8574
|
+
function toSerializableTestCase(test, droppedRef) {
|
|
8575
|
+
if (!isRecord(test)) return test;
|
|
8576
|
+
let sanitizedTest = withSerializableProvider(test);
|
|
8577
|
+
if (isRecord(test.options)) {
|
|
8578
|
+
let options = withSerializableProvider(test.options);
|
|
8579
|
+
options = replaceFunctionTransforms(options, droppedRef);
|
|
8580
|
+
if (options !== test.options) sanitizedTest = {
|
|
8581
|
+
...sanitizedTest,
|
|
8582
|
+
options
|
|
8583
|
+
};
|
|
8584
|
+
}
|
|
8585
|
+
if (Array.isArray(test.assert)) sanitizedTest = {
|
|
8586
|
+
...sanitizedTest,
|
|
8587
|
+
assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
8588
|
+
};
|
|
8589
|
+
return sanitizedTest;
|
|
8590
|
+
}
|
|
8591
|
+
function toSerializableScenario(scenario, droppedRef) {
|
|
8592
|
+
if (!isRecord(scenario)) return scenario;
|
|
8593
|
+
if (!Array.isArray(scenario.tests)) return scenario;
|
|
8594
|
+
return {
|
|
8595
|
+
...scenario,
|
|
8596
|
+
tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
|
|
8597
|
+
};
|
|
8598
|
+
}
|
|
8599
|
+
function createSerializableUnifiedConfig(testSuite, prompts) {
|
|
8600
|
+
const droppedRef = { value: false };
|
|
8601
|
+
const config = {
|
|
8602
|
+
...testSuite,
|
|
8603
|
+
providers: toSerializableProviderRef(testSuite.providers),
|
|
8604
|
+
defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
|
|
8605
|
+
tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
|
|
8606
|
+
scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
|
|
8607
|
+
prompts
|
|
8608
|
+
};
|
|
8609
|
+
if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
|
|
8610
|
+
return config;
|
|
8611
|
+
}
|
|
8370
8612
|
async function evaluate(testSuite, options = {}) {
|
|
8371
|
-
|
|
8372
|
-
|
|
8613
|
+
const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
|
|
8614
|
+
if (testSuiteConfig.writeLatestResults) await runDbMigrations();
|
|
8615
|
+
const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
|
|
8373
8616
|
const providerMap = {};
|
|
8374
8617
|
for (const p of loadedProviders) {
|
|
8375
8618
|
providerMap[p.id()] = p;
|
|
8376
8619
|
if (p.label) providerMap[p.label] = p;
|
|
8377
8620
|
}
|
|
8378
|
-
let resolvedDefaultTest =
|
|
8379
|
-
if (typeof
|
|
8621
|
+
let resolvedDefaultTest = testSuiteConfig.defaultTest;
|
|
8622
|
+
if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
|
|
8380
8623
|
const constructedTestSuite = {
|
|
8381
|
-
...
|
|
8624
|
+
...testSuiteConfig,
|
|
8382
8625
|
defaultTest: resolvedDefaultTest,
|
|
8383
|
-
scenarios:
|
|
8626
|
+
scenarios: testSuiteConfig.scenarios,
|
|
8384
8627
|
providers: loadedProviders,
|
|
8385
|
-
tests: await readTests(
|
|
8386
|
-
nunjucksFilters: await readFilters(
|
|
8387
|
-
prompts: await processPrompts(
|
|
8628
|
+
tests: await readTests(testSuiteConfig.tests),
|
|
8629
|
+
nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
|
|
8630
|
+
prompts: await processPrompts(testSuiteConfig.prompts)
|
|
8388
8631
|
};
|
|
8389
8632
|
if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
|
|
8390
8633
|
constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
|
|
8391
8634
|
if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
|
|
8392
|
-
env:
|
|
8635
|
+
env: testSuiteConfig.env,
|
|
8393
8636
|
basePath: state.basePath
|
|
8394
8637
|
});
|
|
8395
8638
|
if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
|
|
8396
|
-
env:
|
|
8639
|
+
env: testSuiteConfig.env,
|
|
8397
8640
|
basePath: state.basePath
|
|
8398
8641
|
});
|
|
8399
8642
|
}
|
|
8400
8643
|
constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
|
|
8401
8644
|
for (const test of constructedTestSuite.tests) {
|
|
8402
8645
|
if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
|
|
8403
|
-
env:
|
|
8646
|
+
env: testSuiteConfig.env,
|
|
8404
8647
|
basePath: state.basePath
|
|
8405
8648
|
});
|
|
8406
8649
|
for (const assertion of test.assert || []) {
|
|
8407
8650
|
if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
|
|
8408
8651
|
if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
|
|
8409
|
-
env:
|
|
8652
|
+
env: testSuiteConfig.env,
|
|
8410
8653
|
basePath: state.basePath
|
|
8411
8654
|
});
|
|
8412
8655
|
}
|
|
8413
8656
|
}
|
|
8414
8657
|
if (options.cache === false) disableCache();
|
|
8415
|
-
const parsedProviderPromptMap = readProviderPromptMap(
|
|
8416
|
-
const unifiedConfig =
|
|
8417
|
-
|
|
8418
|
-
|
|
8419
|
-
};
|
|
8420
|
-
const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
|
|
8658
|
+
const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
|
|
8659
|
+
const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
|
|
8660
|
+
const author = getAuthor(suiteAuthor);
|
|
8661
|
+
const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
|
|
8421
8662
|
const ret = await evaluate$1({
|
|
8422
8663
|
...constructedTestSuite,
|
|
8423
8664
|
providerPromptMap: parsedProviderPromptMap
|
|
8424
8665
|
}, evalRecord, {
|
|
8425
8666
|
eventSource: "library",
|
|
8426
|
-
isRedteam: Boolean(
|
|
8667
|
+
isRedteam: Boolean(testSuiteConfig.redteam),
|
|
8427
8668
|
...options
|
|
8428
8669
|
});
|
|
8429
|
-
if (
|
|
8670
|
+
if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
|
|
8430
8671
|
const shareableUrl = await createShareableUrl(ret, { silent: true });
|
|
8431
8672
|
if (shareableUrl) {
|
|
8432
8673
|
ret.shareableUrl = shareableUrl;
|
|
@@ -8437,9 +8678,9 @@ async function evaluate(testSuite, options = {}) {
|
|
|
8437
8678
|
logger.warn(`Failed to create shareable URL: ${error}`);
|
|
8438
8679
|
}
|
|
8439
8680
|
else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
|
|
8440
|
-
if (
|
|
8441
|
-
if (typeof
|
|
8442
|
-
else if (Array.isArray(
|
|
8681
|
+
if (testSuiteConfig.outputPath) {
|
|
8682
|
+
if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
|
|
8683
|
+
else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
|
|
8443
8684
|
}
|
|
8444
8685
|
return ret;
|
|
8445
8686
|
}
|
|
@@ -8668,6 +8909,58 @@ function setDownloadHeaders(res, fileName, contentType) {
|
|
|
8668
8909
|
//#region src/server/routes/eval.ts
|
|
8669
8910
|
const evalRouter = Router();
|
|
8670
8911
|
const evalJobs = /* @__PURE__ */ new Map();
|
|
8912
|
+
function sendEvalTableResponse(res, evalId, responsePayload) {
|
|
8913
|
+
try {
|
|
8914
|
+
res.json(responsePayload);
|
|
8915
|
+
} catch (error) {
|
|
8916
|
+
if (!(error instanceof RangeError)) throw error;
|
|
8917
|
+
logger.warn("[GET /:id/table] Response too large, stripping per-cell prompts by size", { evalId });
|
|
8918
|
+
const promptLocations = getEvalTableOutputPromptLocationsBySize(responsePayload);
|
|
8919
|
+
if (promptLocations.length === 0) {
|
|
8920
|
+
logger.error("[GET /:id/table] Response too large and has no prompts to strip", { evalId });
|
|
8921
|
+
res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
|
|
8922
|
+
return;
|
|
8923
|
+
}
|
|
8924
|
+
const tryStringifyWithStrippedPrompts = (promptCountToStrip) => {
|
|
8925
|
+
const responseWithoutPrompts = getEvalTablePromptStrippedPayload(responsePayload, promptLocations, promptCountToStrip);
|
|
8926
|
+
try {
|
|
8927
|
+
const responseBody = JSON.stringify(responseWithoutPrompts);
|
|
8928
|
+
invariant(typeof responseBody === "string", "Eval table response must serialize to JSON");
|
|
8929
|
+
return responseBody;
|
|
8930
|
+
} catch (retryError) {
|
|
8931
|
+
if (!(retryError instanceof RangeError)) throw retryError;
|
|
8932
|
+
return null;
|
|
8933
|
+
}
|
|
8934
|
+
};
|
|
8935
|
+
let lowerBound = 0;
|
|
8936
|
+
let upperBound = 1;
|
|
8937
|
+
let responseBody = null;
|
|
8938
|
+
while (upperBound < promptLocations.length) {
|
|
8939
|
+
responseBody = tryStringifyWithStrippedPrompts(upperBound);
|
|
8940
|
+
if (responseBody) break;
|
|
8941
|
+
lowerBound = upperBound;
|
|
8942
|
+
upperBound *= 2;
|
|
8943
|
+
}
|
|
8944
|
+
if (!responseBody) {
|
|
8945
|
+
upperBound = promptLocations.length;
|
|
8946
|
+
responseBody = tryStringifyWithStrippedPrompts(upperBound);
|
|
8947
|
+
}
|
|
8948
|
+
if (responseBody) {
|
|
8949
|
+
while (upperBound - lowerBound > 1) {
|
|
8950
|
+
const midPoint = lowerBound + Math.floor((upperBound - lowerBound) / 2);
|
|
8951
|
+
const midpointResponseBody = tryStringifyWithStrippedPrompts(midPoint);
|
|
8952
|
+
if (midpointResponseBody) {
|
|
8953
|
+
upperBound = midPoint;
|
|
8954
|
+
responseBody = midpointResponseBody;
|
|
8955
|
+
} else lowerBound = midPoint;
|
|
8956
|
+
}
|
|
8957
|
+
res.type("json").send(responseBody);
|
|
8958
|
+
return;
|
|
8959
|
+
}
|
|
8960
|
+
logger.error("[GET /:id/table] Response still too large after stripping prompts", { evalId });
|
|
8961
|
+
res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
|
|
8962
|
+
}
|
|
8963
|
+
}
|
|
8671
8964
|
evalRouter.post("/job", (req, res) => {
|
|
8672
8965
|
const result = EvalSchemas.CreateJob.Request.safeParse(req.body);
|
|
8673
8966
|
if (!result.success) {
|
|
@@ -8708,9 +9001,10 @@ evalRouter.post("/job", (req, res) => {
|
|
|
8708
9001
|
job.evalId = evalResult.id;
|
|
8709
9002
|
console.log(`[${id}] Complete`);
|
|
8710
9003
|
}).catch((error) => {
|
|
8711
|
-
logger.error(
|
|
8712
|
-
|
|
8713
|
-
|
|
9004
|
+
logger.error("Failed to eval tests", {
|
|
9005
|
+
error,
|
|
9006
|
+
body: sanitizeObject(testSuite, { context: "request body" })
|
|
9007
|
+
});
|
|
8714
9008
|
const job = evalJobs.get(id);
|
|
8715
9009
|
invariant(job, "Job not found");
|
|
8716
9010
|
job.status = "error";
|
|
@@ -8907,7 +9201,7 @@ evalRouter.get("/:id/table", async (req, res) => {
|
|
|
8907
9201
|
evalId: id
|
|
8908
9202
|
});
|
|
8909
9203
|
}
|
|
8910
|
-
res
|
|
9204
|
+
sendEvalTableResponse(res, id, {
|
|
8911
9205
|
table: returnTable,
|
|
8912
9206
|
totalCount: table.totalCount,
|
|
8913
9207
|
filteredCount: table.filteredCount,
|
|
@@ -9073,43 +9367,53 @@ evalRouter.post("/:evalId/results/:id/rating", async (req, res) => {
|
|
|
9073
9367
|
res.status(400).json({ error: z.prettifyError(bodyResult.error) });
|
|
9074
9368
|
return;
|
|
9075
9369
|
}
|
|
9076
|
-
|
|
9077
|
-
|
|
9078
|
-
|
|
9079
|
-
|
|
9080
|
-
|
|
9081
|
-
|
|
9082
|
-
|
|
9083
|
-
|
|
9084
|
-
|
|
9085
|
-
|
|
9086
|
-
|
|
9087
|
-
|
|
9088
|
-
|
|
9089
|
-
|
|
9090
|
-
|
|
9091
|
-
|
|
9092
|
-
|
|
9093
|
-
|
|
9370
|
+
try {
|
|
9371
|
+
const { evalId, id } = paramsResult.data;
|
|
9372
|
+
const gradingResult = bodyResult.data;
|
|
9373
|
+
const result = await EvalResult.findById(id);
|
|
9374
|
+
if (!result || result.evalId !== evalId) {
|
|
9375
|
+
res.status(404).json({ error: "Result not found" });
|
|
9376
|
+
return;
|
|
9377
|
+
}
|
|
9378
|
+
const eval_ = await Eval.findById(evalId);
|
|
9379
|
+
if (!eval_) {
|
|
9380
|
+
res.status(404).json({ error: "Eval not found" });
|
|
9381
|
+
return;
|
|
9382
|
+
}
|
|
9383
|
+
const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
|
|
9384
|
+
const successChanged = result.success !== gradingResult.pass;
|
|
9385
|
+
const scoreChange = gradingResult.score - result.score;
|
|
9386
|
+
result.gradingResult = gradingResult;
|
|
9387
|
+
result.success = gradingResult.pass;
|
|
9388
|
+
result.score = gradingResult.score;
|
|
9389
|
+
const prompt = eval_.prompts[result.promptIdx];
|
|
9390
|
+
invariant(prompt, "Prompt not found");
|
|
9391
|
+
if (!prompt.metrics) {
|
|
9392
|
+
logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
|
|
9393
|
+
res.status(400).json({ error: "Prompt metrics not found" });
|
|
9394
|
+
return;
|
|
9395
|
+
}
|
|
9396
|
+
if (successChanged) if (result.success) {
|
|
9397
|
+
prompt.metrics.testPassCount += 1;
|
|
9398
|
+
prompt.metrics.testFailCount -= 1;
|
|
9399
|
+
prompt.metrics.assertPassCount += 1;
|
|
9400
|
+
prompt.metrics.score += scoreChange;
|
|
9401
|
+
if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
|
|
9402
|
+
} else {
|
|
9403
|
+
prompt.metrics.testPassCount -= 1;
|
|
9404
|
+
prompt.metrics.testFailCount += 1;
|
|
9405
|
+
prompt.metrics.assertFailCount += 1;
|
|
9406
|
+
prompt.metrics.score += scoreChange;
|
|
9407
|
+
if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
|
|
9408
|
+
}
|
|
9409
|
+
else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
|
|
9410
|
+
else prompt.metrics.assertFailCount += 1;
|
|
9411
|
+
await eval_.save();
|
|
9412
|
+
await result.save();
|
|
9413
|
+
res.json(result);
|
|
9414
|
+
} catch (error) {
|
|
9415
|
+
sendError(res, 500, "Failed to submit rating", error);
|
|
9094
9416
|
}
|
|
9095
|
-
if (successChanged) if (result.success) {
|
|
9096
|
-
prompt.metrics.testPassCount += 1;
|
|
9097
|
-
prompt.metrics.testFailCount -= 1;
|
|
9098
|
-
prompt.metrics.assertPassCount += 1;
|
|
9099
|
-
prompt.metrics.score += scoreChange;
|
|
9100
|
-
if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
|
|
9101
|
-
} else {
|
|
9102
|
-
prompt.metrics.testPassCount -= 1;
|
|
9103
|
-
prompt.metrics.testFailCount += 1;
|
|
9104
|
-
prompt.metrics.assertFailCount += 1;
|
|
9105
|
-
prompt.metrics.score += scoreChange;
|
|
9106
|
-
if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
|
|
9107
|
-
}
|
|
9108
|
-
else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
|
|
9109
|
-
else prompt.metrics.assertFailCount += 1;
|
|
9110
|
-
await eval_.save();
|
|
9111
|
-
await result.save();
|
|
9112
|
-
res.json(result);
|
|
9113
9417
|
});
|
|
9114
9418
|
evalRouter.post("/", async (req, res) => {
|
|
9115
9419
|
const bodyResult = EvalSchemas.Save.Request.safeParse(req.body);
|
|
@@ -9143,9 +9447,10 @@ evalRouter.post("/", async (req, res) => {
|
|
|
9143
9447
|
res.json(EvalSchemas.Save.Response.parse({ id: eval_.id }));
|
|
9144
9448
|
}
|
|
9145
9449
|
} catch (error) {
|
|
9146
|
-
logger.error(
|
|
9147
|
-
|
|
9148
|
-
|
|
9450
|
+
logger.error("Failed to write eval to database", {
|
|
9451
|
+
error,
|
|
9452
|
+
body: sanitizeObject(body, { context: "request body" })
|
|
9453
|
+
});
|
|
9149
9454
|
res.status(500).json({ error: "Failed to write eval to database" });
|
|
9150
9455
|
}
|
|
9151
9456
|
});
|
|
@@ -9363,7 +9668,7 @@ async function checkForUpdates() {
|
|
|
9363
9668
|
} catch {
|
|
9364
9669
|
return false;
|
|
9365
9670
|
}
|
|
9366
|
-
if (semverGt(latestVersion, "0.121.
|
|
9671
|
+
if (semverGt(latestVersion, "0.121.8")) {
|
|
9367
9672
|
const border = "=".repeat(TERMINAL_MAX_WIDTH);
|
|
9368
9673
|
logger.info(`\n${border}
|
|
9369
9674
|
${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
|
|
@@ -9522,6 +9827,9 @@ const ModelAuditCliOptionsSchema = z.object({
|
|
|
9522
9827
|
dryRun: z.boolean().optional(),
|
|
9523
9828
|
cache: z.boolean().optional(),
|
|
9524
9829
|
stream: z.boolean().optional(),
|
|
9830
|
+
scanners: z.array(z.string()).optional(),
|
|
9831
|
+
excludeScanner: z.array(z.string()).optional(),
|
|
9832
|
+
listScanners: z.boolean().optional(),
|
|
9525
9833
|
share: z.boolean().optional(),
|
|
9526
9834
|
noShare: z.boolean().optional()
|
|
9527
9835
|
});
|
|
@@ -9609,6 +9917,18 @@ const CLI_ARG_MAP = {
|
|
|
9609
9917
|
stream: {
|
|
9610
9918
|
flag: "--stream",
|
|
9611
9919
|
type: "boolean"
|
|
9920
|
+
},
|
|
9921
|
+
scanners: {
|
|
9922
|
+
flag: "--scanners",
|
|
9923
|
+
type: "array"
|
|
9924
|
+
},
|
|
9925
|
+
excludeScanner: {
|
|
9926
|
+
flag: "--exclude-scanner",
|
|
9927
|
+
type: "array"
|
|
9928
|
+
},
|
|
9929
|
+
listScanners: {
|
|
9930
|
+
flag: "--list-scanners",
|
|
9931
|
+
type: "boolean"
|
|
9612
9932
|
}
|
|
9613
9933
|
};
|
|
9614
9934
|
/**
|
|
@@ -9777,6 +10097,62 @@ function spawnModelAudit(args, options) {
|
|
|
9777
10097
|
});
|
|
9778
10098
|
});
|
|
9779
10099
|
}
|
|
10100
|
+
function collectRepeatableOption(value, previous = []) {
|
|
10101
|
+
return [...previous, value];
|
|
10102
|
+
}
|
|
10103
|
+
function hasScannerSelectionOptions(options) {
|
|
10104
|
+
return Boolean(options.scanners?.length || options.excludeScanner?.length);
|
|
10105
|
+
}
|
|
10106
|
+
function hasScannerSelectionValue(value) {
|
|
10107
|
+
if (Array.isArray(value)) return value.length > 0;
|
|
10108
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
10109
|
+
}
|
|
10110
|
+
function hasPersistedScannerSelection(metadata) {
|
|
10111
|
+
const options = metadata?.options;
|
|
10112
|
+
if (!options || typeof options !== "object" || Array.isArray(options)) return false;
|
|
10113
|
+
return hasScannerSelectionValue(options.scanners) || hasScannerSelectionValue(options.excludeScanner);
|
|
10114
|
+
}
|
|
10115
|
+
/**
|
|
10116
|
+
* Parse CLI options through Zod, logging validation errors to the CLI.
|
|
10117
|
+
* Returns null when validation fails (and sets process.exitCode to 1).
|
|
10118
|
+
*/
|
|
10119
|
+
function buildCliArgs(paths, cliOptions) {
|
|
10120
|
+
try {
|
|
10121
|
+
return parseModelAuditArgs(paths, cliOptions);
|
|
10122
|
+
} catch (error) {
|
|
10123
|
+
if (error instanceof z.ZodError) {
|
|
10124
|
+
logger.error(`Invalid model audit options provided:\n${z.prettifyError(error)}`);
|
|
10125
|
+
process.exitCode = 1;
|
|
10126
|
+
return null;
|
|
10127
|
+
}
|
|
10128
|
+
throw error;
|
|
10129
|
+
}
|
|
10130
|
+
}
|
|
10131
|
+
/**
|
|
10132
|
+
* Run modelaudit with inherited stdio and propagate its exit code.
|
|
10133
|
+
*
|
|
10134
|
+
* `treatExitOneAsIssues=true` suppresses the error log for exit code 1, which
|
|
10135
|
+
* modelaudit uses to mean "scan completed, issues found" — callers that expect
|
|
10136
|
+
* findings (like the main scan flow) should set this, while list/help flows
|
|
10137
|
+
* (where a non-zero exit is always unexpected) should leave it false.
|
|
10138
|
+
*/
|
|
10139
|
+
async function runPassthroughModelAudit(args, env, treatExitOneAsIssues = false) {
|
|
10140
|
+
try {
|
|
10141
|
+
const spawnResult = await spawnModelAudit(args, {
|
|
10142
|
+
captureOutput: false,
|
|
10143
|
+
env
|
|
10144
|
+
});
|
|
10145
|
+
const isIssuesExit = treatExitOneAsIssues && spawnResult.code === 1;
|
|
10146
|
+
if (spawnResult.code !== null && spawnResult.code !== 0 && !isIssuesExit) logger.error(`Model scan process exited with code ${spawnResult.code}`);
|
|
10147
|
+
process.exitCode = spawnResult.code || 0;
|
|
10148
|
+
} catch (error) {
|
|
10149
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10150
|
+
logger.error(`Failed to start modelaudit: ${message}`);
|
|
10151
|
+
logger.info("Make sure modelaudit is installed and available in your PATH.");
|
|
10152
|
+
logger.info("Install it using: pip install modelaudit");
|
|
10153
|
+
process.exitCode = 1;
|
|
10154
|
+
}
|
|
10155
|
+
}
|
|
9780
10156
|
/**
|
|
9781
10157
|
* Check for existing scan and determine if re-scan is needed.
|
|
9782
10158
|
* Returns the existing audit if found and re-scan should happen.
|
|
@@ -9799,6 +10175,20 @@ async function checkExistingScan(paths, options, currentScannerVersion) {
|
|
|
9799
10175
|
shouldSkip: false,
|
|
9800
10176
|
existingAudit: null
|
|
9801
10177
|
};
|
|
10178
|
+
if (hasScannerSelectionOptions(options)) {
|
|
10179
|
+
logger.debug("Re-scanning with scanner selection options");
|
|
10180
|
+
return {
|
|
10181
|
+
shouldSkip: false,
|
|
10182
|
+
existingAudit: existing
|
|
10183
|
+
};
|
|
10184
|
+
}
|
|
10185
|
+
if (hasPersistedScannerSelection(existing.metadata)) {
|
|
10186
|
+
logger.debug("Re-scanning because cached revision used scanner selection options");
|
|
10187
|
+
return {
|
|
10188
|
+
shouldSkip: false,
|
|
10189
|
+
existingAudit: existing
|
|
10190
|
+
};
|
|
10191
|
+
}
|
|
9802
10192
|
if (options.force) {
|
|
9803
10193
|
logger.debug(`Re-scanning (--force): ${modelId}`);
|
|
9804
10194
|
return {
|
|
@@ -9930,7 +10320,9 @@ async function saveAuditRecord(paths, results, options, currentScannerVersion, e
|
|
|
9930
10320
|
cache: options.cache,
|
|
9931
10321
|
quiet: options.quiet,
|
|
9932
10322
|
progress: options.progress,
|
|
9933
|
-
stream: options.stream
|
|
10323
|
+
stream: options.stream,
|
|
10324
|
+
scanners: options.scanners,
|
|
10325
|
+
excludeScanner: options.excludeScanner
|
|
9934
10326
|
}
|
|
9935
10327
|
};
|
|
9936
10328
|
if (existingAudit) {
|
|
@@ -10062,8 +10454,8 @@ async function processScanResultsFromStdout(spawnResult, paths, options, current
|
|
|
10062
10454
|
return processJsonResults(jsonOutput, spawnResult.code || 0, paths, options, currentScannerVersion, existingAudit);
|
|
10063
10455
|
}
|
|
10064
10456
|
function modelScanCommand(program) {
|
|
10065
|
-
program.command("scan-model").description("Scan model files for security and quality issues").argument("
|
|
10066
|
-
if (!paths || paths.length === 0) {
|
|
10457
|
+
program.command("scan-model").description("Scan model files for security and quality issues").argument("[paths...]", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("--scanners <scanner>", "Only run selected ModelAudit scanners (IDs/classes; comma-separated or repeated)", collectRepeatableOption).option("--exclude-scanner <scanner>", "Exclude a ModelAudit scanner from the active set (comma-separated or repeated)", collectRepeatableOption).option("--list-scanners", "List registered ModelAudit scanners and exit").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
|
|
10458
|
+
if (!options.listScanners && (!paths || paths.length === 0)) {
|
|
10067
10459
|
logger.error("No paths specified. Provide at least one model file or directory to scan.");
|
|
10068
10460
|
process.exitCode = 1;
|
|
10069
10461
|
return;
|
|
@@ -10079,6 +10471,21 @@ function modelScanCommand(program) {
|
|
|
10079
10471
|
}
|
|
10080
10472
|
await checkModelAuditUpdates();
|
|
10081
10473
|
if (currentScannerVersion) logger.debug(`Using modelaudit version: ${currentScannerVersion}`);
|
|
10474
|
+
const delegationEnv = {
|
|
10475
|
+
...process.env,
|
|
10476
|
+
PROMPTFOO_DELEGATED: "true"
|
|
10477
|
+
};
|
|
10478
|
+
if (options.listScanners) {
|
|
10479
|
+
const parsed = buildCliArgs(paths || [], {
|
|
10480
|
+
...options,
|
|
10481
|
+
format: options.format || "text",
|
|
10482
|
+
output: options.output,
|
|
10483
|
+
timeout: void 0
|
|
10484
|
+
});
|
|
10485
|
+
if (!parsed) return;
|
|
10486
|
+
await runPassthroughModelAudit(parsed.args, delegationEnv);
|
|
10487
|
+
return;
|
|
10488
|
+
}
|
|
10082
10489
|
const saveToDatabase = options.write === void 0 || options.write === true;
|
|
10083
10490
|
let existingAuditToUpdate = null;
|
|
10084
10491
|
if (saveToDatabase) {
|
|
@@ -10090,33 +10497,22 @@ function modelScanCommand(program) {
|
|
|
10090
10497
|
existingAuditToUpdate = existingAudit;
|
|
10091
10498
|
}
|
|
10092
10499
|
const outputFormat = saveToDatabase ? "json" : options.format || "text";
|
|
10093
|
-
const
|
|
10500
|
+
const parsed = buildCliArgs(paths, {
|
|
10094
10501
|
...options,
|
|
10095
10502
|
format: outputFormat,
|
|
10096
10503
|
output: options.output && !saveToDatabase ? options.output : void 0,
|
|
10097
10504
|
timeout: options.timeout ? parseInt(options.timeout, 10) : void 0
|
|
10098
|
-
};
|
|
10099
|
-
|
|
10100
|
-
|
|
10101
|
-
|
|
10102
|
-
|
|
10103
|
-
|
|
10104
|
-
|
|
10105
|
-
|
|
10106
|
-
logger.error("Invalid model audit options provided:");
|
|
10107
|
-
for (const err of error.issues) logger.error(` - ${err.path.join(".")}: ${err.message}`);
|
|
10108
|
-
process.exitCode = 1;
|
|
10109
|
-
return;
|
|
10110
|
-
}
|
|
10111
|
-
throw error;
|
|
10505
|
+
});
|
|
10506
|
+
if (!parsed) return;
|
|
10507
|
+
const args = parsed.args;
|
|
10508
|
+
if (parsed.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${parsed.unsupportedOptions.join(", ")}`);
|
|
10509
|
+
if (saveToDatabase || outputFormat === "text") logger.info(`Running model scan on: ${paths.join(", ")}`);
|
|
10510
|
+
if (!saveToDatabase) {
|
|
10511
|
+
await runPassthroughModelAudit(args, delegationEnv, true);
|
|
10512
|
+
return;
|
|
10112
10513
|
}
|
|
10113
|
-
logger.info(`Running model scan on: ${paths.join(", ")}`);
|
|
10114
|
-
const delegationEnv = {
|
|
10115
|
-
...process.env,
|
|
10116
|
-
PROMPTFOO_DELEGATED: "true"
|
|
10117
|
-
};
|
|
10118
10514
|
try {
|
|
10119
|
-
if (
|
|
10515
|
+
if (supportsCliUiWithOutput(currentScannerVersion)) {
|
|
10120
10516
|
const tempOutputPath = createTempOutputPath();
|
|
10121
10517
|
args.push("--output", tempOutputPath);
|
|
10122
10518
|
let cleanedUp = false;
|
|
@@ -10150,14 +10546,6 @@ function modelScanCommand(program) {
|
|
|
10150
10546
|
});
|
|
10151
10547
|
process.exitCode = await processScanResultsFromStdout(spawnResult, paths, options, currentScannerVersion, existingAuditToUpdate);
|
|
10152
10548
|
}
|
|
10153
|
-
else {
|
|
10154
|
-
const spawnResult = await spawnModelAudit(args, {
|
|
10155
|
-
captureOutput: false,
|
|
10156
|
-
env: delegationEnv
|
|
10157
|
-
});
|
|
10158
|
-
if (spawnResult.code !== null && spawnResult.code !== 0 && spawnResult.code !== 1) logger.error(`Model scan process exited with code ${spawnResult.code}`);
|
|
10159
|
-
process.exitCode = spawnResult.code || 0;
|
|
10160
|
-
}
|
|
10161
10549
|
} catch (error) {
|
|
10162
10550
|
const message = error instanceof Error ? error.message : String(error);
|
|
10163
10551
|
logger.error(`Failed to start modelaudit: ${message}`);
|
|
@@ -10174,6 +10562,14 @@ const CheckInstalledResponseSchema = z.object({
|
|
|
10174
10562
|
version: z.string().nullable(),
|
|
10175
10563
|
cwd: z.string()
|
|
10176
10564
|
});
|
|
10565
|
+
const ScannerInfoSchema = z.object({
|
|
10566
|
+
id: z.string(),
|
|
10567
|
+
class: z.string().optional().default(""),
|
|
10568
|
+
description: z.string().optional().default(""),
|
|
10569
|
+
extensions: z.array(z.string()).optional().default([]),
|
|
10570
|
+
dependencies: z.array(z.string()).optional().default([])
|
|
10571
|
+
}).passthrough();
|
|
10572
|
+
const ListScannersResponseSchema = z.object({ scanners: z.array(ScannerInfoSchema) }).passthrough();
|
|
10177
10573
|
const CheckPathRequestSchema = z.object({ path: z.string().trim().min(1, "No path provided") });
|
|
10178
10574
|
const CheckPathResponseSchema = z.union([z.object({
|
|
10179
10575
|
exists: z.literal(false),
|
|
@@ -10205,6 +10601,8 @@ const ScanRequestSchema = z.object({
|
|
|
10205
10601
|
sbom: z.string().optional(),
|
|
10206
10602
|
output: z.string().optional(),
|
|
10207
10603
|
maxSize: z.string().optional(),
|
|
10604
|
+
scanners: z.array(z.string()).optional(),
|
|
10605
|
+
excludeScanner: z.array(z.string()).optional(),
|
|
10208
10606
|
persist: z.boolean().optional(),
|
|
10209
10607
|
name: z.string().optional(),
|
|
10210
10608
|
author: z.string().optional()
|
|
@@ -10215,8 +10613,13 @@ const ListScansQuerySchema = z.object({
|
|
|
10215
10613
|
offset: z.coerce.number().int().min(0).optional().default(0),
|
|
10216
10614
|
sort: z.enum([
|
|
10217
10615
|
"createdAt",
|
|
10616
|
+
"failedChecks",
|
|
10617
|
+
"hasErrors",
|
|
10618
|
+
"id",
|
|
10619
|
+
"modelPath",
|
|
10218
10620
|
"name",
|
|
10219
|
-
"
|
|
10621
|
+
"passedChecks",
|
|
10622
|
+
"totalChecks"
|
|
10220
10623
|
]).optional().default("createdAt"),
|
|
10221
10624
|
order: z.enum(["asc", "desc"]).optional().default("desc"),
|
|
10222
10625
|
search: z.string().optional()
|
|
@@ -10255,6 +10658,7 @@ const DeleteScanResponseSchema = z.object({
|
|
|
10255
10658
|
});
|
|
10256
10659
|
const ModelAuditSchemas = {
|
|
10257
10660
|
CheckInstalled: { Response: CheckInstalledResponseSchema },
|
|
10661
|
+
ListScanners: { Response: ListScannersResponseSchema },
|
|
10258
10662
|
CheckPath: {
|
|
10259
10663
|
Request: CheckPathRequestSchema,
|
|
10260
10664
|
Response: CheckPathResponseSchema
|
|
@@ -10277,6 +10681,47 @@ const ModelAuditSchemas = {
|
|
|
10277
10681
|
//#endregion
|
|
10278
10682
|
//#region src/server/routes/modelAudit.ts
|
|
10279
10683
|
const modelAuditRouter = Router();
|
|
10684
|
+
const LIST_SCANNERS_ARGS = parseModelAuditArgs([], {
|
|
10685
|
+
listScanners: true,
|
|
10686
|
+
format: "json"
|
|
10687
|
+
}).args;
|
|
10688
|
+
function getModelAuditDelegationEnv() {
|
|
10689
|
+
return {
|
|
10690
|
+
...process.env,
|
|
10691
|
+
PROMPTFOO_DELEGATED: "true"
|
|
10692
|
+
};
|
|
10693
|
+
}
|
|
10694
|
+
function spawnModelAuditCapture(args, options = {}) {
|
|
10695
|
+
return new Promise((resolve, reject) => {
|
|
10696
|
+
const child = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
|
|
10697
|
+
let stdout = "";
|
|
10698
|
+
let stderr = "";
|
|
10699
|
+
const onAbort = () => {
|
|
10700
|
+
if (!child.killed) child.kill("SIGTERM");
|
|
10701
|
+
};
|
|
10702
|
+
if (options.signal?.aborted) onAbort();
|
|
10703
|
+
else options.signal?.addEventListener("abort", onAbort, { once: true });
|
|
10704
|
+
const cleanupAbort = () => options.signal?.removeEventListener("abort", onAbort);
|
|
10705
|
+
child.stdout?.on("data", (data) => {
|
|
10706
|
+
stdout += data.toString();
|
|
10707
|
+
});
|
|
10708
|
+
child.stderr?.on("data", (data) => {
|
|
10709
|
+
stderr += data.toString();
|
|
10710
|
+
});
|
|
10711
|
+
child.on("error", (error) => {
|
|
10712
|
+
cleanupAbort();
|
|
10713
|
+
reject(error);
|
|
10714
|
+
});
|
|
10715
|
+
child.on("close", (code) => {
|
|
10716
|
+
cleanupAbort();
|
|
10717
|
+
resolve({
|
|
10718
|
+
code,
|
|
10719
|
+
stdout,
|
|
10720
|
+
stderr
|
|
10721
|
+
});
|
|
10722
|
+
});
|
|
10723
|
+
});
|
|
10724
|
+
}
|
|
10280
10725
|
modelAuditRouter.get("/check-installed", async (_req, res) => {
|
|
10281
10726
|
try {
|
|
10282
10727
|
const { installed, version } = await checkModelAuditInstalled();
|
|
@@ -10293,6 +10738,34 @@ modelAuditRouter.get("/check-installed", async (_req, res) => {
|
|
|
10293
10738
|
}));
|
|
10294
10739
|
}
|
|
10295
10740
|
});
|
|
10741
|
+
modelAuditRouter.get("/scanners", async (req, res) => {
|
|
10742
|
+
const abortController = new AbortController();
|
|
10743
|
+
const onClientClose = () => abortController.abort();
|
|
10744
|
+
req.on("close", onClientClose);
|
|
10745
|
+
try {
|
|
10746
|
+
const { installed } = await checkModelAuditInstalled();
|
|
10747
|
+
if (!installed) {
|
|
10748
|
+
res.status(400).json({ error: "ModelAudit is not installed. Please install it using: pip install modelaudit" });
|
|
10749
|
+
return;
|
|
10750
|
+
}
|
|
10751
|
+
const { code, stdout, stderr } = await spawnModelAuditCapture(LIST_SCANNERS_ARGS, { signal: abortController.signal });
|
|
10752
|
+
if (abortController.signal.aborted) return;
|
|
10753
|
+
if (code !== null && code !== 0) {
|
|
10754
|
+
sendError(res, 500, "Failed to list ModelAudit scanners", {
|
|
10755
|
+
code,
|
|
10756
|
+
stderr
|
|
10757
|
+
});
|
|
10758
|
+
return;
|
|
10759
|
+
}
|
|
10760
|
+
const parsedOutput = JSON.parse(stdout);
|
|
10761
|
+
res.json(ModelAuditSchemas.ListScanners.Response.parse(parsedOutput));
|
|
10762
|
+
} catch (error) {
|
|
10763
|
+
if (abortController.signal.aborted) return;
|
|
10764
|
+
sendError(res, 500, "Failed to list ModelAudit scanners", error);
|
|
10765
|
+
} finally {
|
|
10766
|
+
req.removeListener("close", onClientClose);
|
|
10767
|
+
}
|
|
10768
|
+
});
|
|
10296
10769
|
modelAuditRouter.post("/check-path", async (req, res) => {
|
|
10297
10770
|
const bodyResult = ModelAuditSchemas.CheckPath.Request.safeParse(req.body);
|
|
10298
10771
|
if (!bodyResult.success) {
|
|
@@ -10363,11 +10836,12 @@ modelAuditRouter.post("/scan", async (req, res) => {
|
|
|
10363
10836
|
event: "model_scan",
|
|
10364
10837
|
pathCount: paths.length,
|
|
10365
10838
|
hasBlacklist: (options.blacklist?.length ?? 0) > 0,
|
|
10839
|
+
hasScannerSelection: Boolean(options.scanners?.length || options.excludeScanner?.length),
|
|
10366
10840
|
timeout: options.timeout ?? 0,
|
|
10367
10841
|
verbose: options.verbose ?? false,
|
|
10368
10842
|
persist
|
|
10369
10843
|
});
|
|
10370
|
-
const modelAudit = spawn("modelaudit", args);
|
|
10844
|
+
const modelAudit = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
|
|
10371
10845
|
let stdout = "";
|
|
10372
10846
|
let stderr = "";
|
|
10373
10847
|
let responded = false;
|
|
@@ -10553,9 +11027,20 @@ modelAuditRouter.post("/scan", async (req, res) => {
|
|
|
10553
11027
|
options: {
|
|
10554
11028
|
blacklist: options.blacklist,
|
|
10555
11029
|
timeout: options.timeout,
|
|
11030
|
+
maxSize: options.maxSize,
|
|
10556
11031
|
maxFileSize: options.maxFileSize,
|
|
10557
11032
|
maxTotalSize: options.maxTotalSize,
|
|
10558
|
-
verbose: options.verbose
|
|
11033
|
+
verbose: options.verbose,
|
|
11034
|
+
format: options.format,
|
|
11035
|
+
strict: options.strict,
|
|
11036
|
+
dryRun: options.dryRun,
|
|
11037
|
+
cache: options.cache,
|
|
11038
|
+
quiet: options.quiet,
|
|
11039
|
+
progress: options.progress,
|
|
11040
|
+
sbom: options.sbom,
|
|
11041
|
+
output: options.output,
|
|
11042
|
+
scanners: options.scanners,
|
|
11043
|
+
excludeScanner: options.excludeScanner
|
|
10559
11044
|
}
|
|
10560
11045
|
}
|
|
10561
11046
|
})).id;
|
|
@@ -11009,7 +11494,7 @@ function determineEffectiveSessionSource({ provider, sessionConfig }) {
|
|
|
11009
11494
|
async function testProviderConnectivity({ provider, prompt = "Hello World!", inputs }) {
|
|
11010
11495
|
const vars = {};
|
|
11011
11496
|
if (!provider?.config?.sessionParser) vars["sessionId"] = crypto.randomUUID();
|
|
11012
|
-
if (inputs && typeof inputs === "object") for (const [varName,
|
|
11497
|
+
if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) vars[varName] = createPlaceholderInputValue(varName, definition);
|
|
11013
11498
|
const testSuite = {
|
|
11014
11499
|
providers: [provider],
|
|
11015
11500
|
prompts: [{
|
|
@@ -11240,10 +11725,15 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11240
11725
|
sessionConfig
|
|
11241
11726
|
});
|
|
11242
11727
|
const initialSessionId = effectiveSessionSource === "server" ? void 0 : crypto.randomUUID();
|
|
11728
|
+
const materializeSessionPrompt = (prompt) => {
|
|
11729
|
+
if (!mainInputVariable) return prompt;
|
|
11730
|
+
const definition = inputs?.[mainInputVariable];
|
|
11731
|
+
return definition ? createPlaceholderInputValue(mainInputVariable, definition, prompt) : prompt;
|
|
11732
|
+
};
|
|
11243
11733
|
const inputVars = {};
|
|
11244
|
-
if (inputs && typeof inputs === "object") for (const [varName,
|
|
11734
|
+
if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) {
|
|
11245
11735
|
if (varName === mainInputVariable) continue;
|
|
11246
|
-
inputVars[varName] =
|
|
11736
|
+
inputVars[varName] = createPlaceholderInputValue(varName, definition);
|
|
11247
11737
|
}
|
|
11248
11738
|
const firstPrompt = "What can you help me with?";
|
|
11249
11739
|
const secondPrompt = "What was the last thing I asked you?";
|
|
@@ -11256,7 +11746,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11256
11746
|
vars: {
|
|
11257
11747
|
...initialSessionId ? { sessionId: initialSessionId } : {},
|
|
11258
11748
|
...inputVars,
|
|
11259
|
-
...mainInputVariable ? { [mainInputVariable]: firstPrompt } : {}
|
|
11749
|
+
...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(firstPrompt) } : {}
|
|
11260
11750
|
},
|
|
11261
11751
|
prompt: {
|
|
11262
11752
|
raw: firstPrompt,
|
|
@@ -11303,7 +11793,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
|
|
|
11303
11793
|
vars: {
|
|
11304
11794
|
...extractedSessionId ? { sessionId: extractedSessionId } : {},
|
|
11305
11795
|
...inputVars,
|
|
11306
|
-
...mainInputVariable ? { [mainInputVariable]: secondPrompt } : {}
|
|
11796
|
+
...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(secondPrompt) } : {}
|
|
11307
11797
|
},
|
|
11308
11798
|
prompt: {
|
|
11309
11799
|
raw: secondPrompt,
|
|
@@ -11828,7 +12318,7 @@ providersRouter.post("/test-session", async (req, res) => {
|
|
|
11828
12318
|
const TestCaseGenerationSchema = z.object({
|
|
11829
12319
|
plugin: z.object({
|
|
11830
12320
|
id: z.string().refine((val) => ALL_PLUGINS.includes(val), { message: `Invalid plugin ID. Must be one of: ${ALL_PLUGINS.join(", ")}` }),
|
|
11831
|
-
config: PluginConfigSchema.optional().prefault({})
|
|
12321
|
+
config: PluginConfigSchema.catchall(z.unknown()).optional().prefault({})
|
|
11832
12322
|
}),
|
|
11833
12323
|
strategy: z.object({
|
|
11834
12324
|
id: z.string().refine((val) => ALL_STRATEGIES.includes(val), { message: `Invalid strategy ID. Must be one of: ${ALL_STRATEGIES.join(", ")}` }),
|
|
@@ -12405,11 +12895,11 @@ redteamRouter.post("/:taskId", async (req, res) => {
|
|
|
12405
12895
|
}
|
|
12406
12896
|
const { taskId } = paramsResult.data;
|
|
12407
12897
|
const cloudFunctionUrl = getRemoteGenerationUrl();
|
|
12408
|
-
logger.debug(`Received ${taskId} task request
|
|
12898
|
+
logger.debug(`Received ${taskId} task request`, {
|
|
12409
12899
|
method: req.method,
|
|
12410
12900
|
url: req.url,
|
|
12411
|
-
body:
|
|
12412
|
-
})
|
|
12901
|
+
body: sanitizeObject(bodyResult.data, { context: "request body" })
|
|
12902
|
+
});
|
|
12413
12903
|
try {
|
|
12414
12904
|
logger.debug(`Sending request to cloud function: ${cloudFunctionUrl}`);
|
|
12415
12905
|
const response = await fetchWithProxy(cloudFunctionUrl, {
|
|
@@ -12781,7 +13271,7 @@ router.get("/", async (_req, res) => {
|
|
|
12781
13271
|
};
|
|
12782
13272
|
} catch (error) {
|
|
12783
13273
|
logger.debug(`Failed to fetch latest version: ${error}`);
|
|
12784
|
-
latestVersion = versionCache.latestVersion ?? "0.121.
|
|
13274
|
+
latestVersion = versionCache.latestVersion ?? "0.121.8";
|
|
12785
13275
|
}
|
|
12786
13276
|
}
|
|
12787
13277
|
const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
|
|
@@ -12790,7 +13280,7 @@ router.get("/", async (_req, res) => {
|
|
|
12790
13280
|
selfHosted,
|
|
12791
13281
|
isNpx
|
|
12792
13282
|
});
|
|
12793
|
-
const resolvedLatestVersion = latestVersion ?? "0.121.
|
|
13283
|
+
const resolvedLatestVersion = latestVersion ?? "0.121.8";
|
|
12794
13284
|
const response = {
|
|
12795
13285
|
currentVersion: VERSION,
|
|
12796
13286
|
latestVersion: resolvedLatestVersion,
|
|
@@ -15641,8 +16131,8 @@ function shouldUseInkList() {
|
|
|
15641
16131
|
async function runInkList(options) {
|
|
15642
16132
|
const [React, { renderInteractive }, { ListApp }] = await Promise.all([
|
|
15643
16133
|
import("react"),
|
|
15644
|
-
import("./render-
|
|
15645
|
-
import("./ListApp-
|
|
16134
|
+
import("./render-DznWrxGO.js"),
|
|
16135
|
+
import("./ListApp-DLmM02JS.js")
|
|
15646
16136
|
]);
|
|
15647
16137
|
let result = { cancelled: false };
|
|
15648
16138
|
let resolveResult;
|
|
@@ -17950,8 +18440,8 @@ function registerRunEvaluationTool(server) {
|
|
|
17950
18440
|
}
|
|
17951
18441
|
filteredTestSuite.tests = filteredTests;
|
|
17952
18442
|
}
|
|
17953
|
-
const { evaluate } = await import("./evaluator-
|
|
17954
|
-
const evalRecord = await (await import("./eval-
|
|
18443
|
+
const { evaluate } = await import("./evaluator-B9LGbKI8.js");
|
|
18444
|
+
const evalRecord = await (await import("./eval-DfR9885C.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
|
|
17955
18445
|
logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
|
|
17956
18446
|
const startTime = Date.now();
|
|
17957
18447
|
const result = await evaluate(filteredTestSuite, evalRecord, {
|
|
@@ -18517,6 +19007,9 @@ function analyzeConfiguration(config) {
|
|
|
18517
19007
|
}
|
|
18518
19008
|
//#endregion
|
|
18519
19009
|
//#region src/commands/mcp/server.ts
|
|
19010
|
+
function setMcpTransport(transport) {
|
|
19011
|
+
Object.assign(process.env, { MCP_TRANSPORT: transport });
|
|
19012
|
+
}
|
|
18520
19013
|
/**
|
|
18521
19014
|
* Creates an MCP server with tools for interacting with promptfoo
|
|
18522
19015
|
*/
|
|
@@ -18552,7 +19045,7 @@ async function createMcpServer() {
|
|
|
18552
19045
|
*/
|
|
18553
19046
|
async function startHttpMcpServer(port) {
|
|
18554
19047
|
if (!Number.isInteger(port) || port < 1 || port > 65535) throw new Error(`Invalid port number: ${port}. Port must be an integer between 1 and 65535.`);
|
|
18555
|
-
|
|
19048
|
+
setMcpTransport("http");
|
|
18556
19049
|
const app = express();
|
|
18557
19050
|
app.use(express.json());
|
|
18558
19051
|
const mcpServer = await createMcpServer();
|
|
@@ -18609,7 +19102,7 @@ async function startHttpMcpServer(port) {
|
|
|
18609
19102
|
* Starts an MCP server with stdio transport
|
|
18610
19103
|
*/
|
|
18611
19104
|
async function startStdioMcpServer() {
|
|
18612
|
-
|
|
19105
|
+
setMcpTransport("stdio");
|
|
18613
19106
|
logger.transports.forEach((transport) => {
|
|
18614
19107
|
if (transport.constructor.name === "Console" || transport.name === "console") transport.silent = true;
|
|
18615
19108
|
});
|
|
@@ -19478,7 +19971,7 @@ function printErrorInformation(errorLogFile, debugLogFile) {
|
|
|
19478
19971
|
async function main() {
|
|
19479
19972
|
setupEnvFilesFromArgv();
|
|
19480
19973
|
initializeRunLogging();
|
|
19481
|
-
if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") process.env
|
|
19974
|
+
if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") Object.assign(process.env, { PROMPTFOO_DISABLE_UPDATE: "true" });
|
|
19482
19975
|
await checkForUpdates();
|
|
19483
19976
|
await runDbMigrations();
|
|
19484
19977
|
const { defaultConfig, defaultConfigPath } = await loadDefaultConfig();
|