promptfoo 0.121.5 → 0.121.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
- package/dist/src/{accounts-BIFntVWB.cjs → accounts-BVz5gHLK.cjs} +16 -10
- package/dist/src/{accounts-bnyHT7Ju.js → accounts-BWjqwsrf.js} +12 -7
- package/dist/src/{accounts-CLJHCDDb.js → accounts-D6IBfEE0.js} +13 -8
- package/dist/src/{accounts-CaLNYnf7.js → accounts-DAv_0iE7.js} +12 -7
- package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-BJKAkz2e.js} +2 -2
- package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-C-A92xhn.cjs} +8 -7
- package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-DmoS_S4B.js} +2 -2
- package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-GdToujHu.js} +2 -2
- package/dist/src/{agents-BGqaTDnr.js → agents-2C8NN6I1.js} +20 -9
- package/dist/src/{agents-n6vPqV3i.js → agents-BMAiSR2o.js} +4 -4
- package/dist/src/{agents-BYdMl1UE.js → agents-C-PGaxwj.js} +20 -9
- package/dist/src/{agents-BV9yFpXX.js → agents-C98cz5pl.js} +20 -9
- package/dist/src/{agents-WULPVjbH.cjs → agents-DB8Ub2Ld.cjs} +7 -6
- package/dist/src/{agents-DhxWMCtH.js → agents-Dwshy2H8.js} +4 -4
- package/dist/src/{agents-emVcx3yh.js → agents-mlKjx-cK.js} +2 -2
- package/dist/src/{agents-DiWmQYH9.cjs → agents-n2ej-c4H.cjs} +21 -10
- package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-Bi-laUlp.js} +6 -6
- package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-D0OFV4Vj.cjs} +7 -7
- package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-WyUK0wYy.js} +6 -6
- package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-v-63ZjEI.js} +6 -6
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-sCXUfaWo.js +1 -0
- package/dist/src/app/assets/index-BopgkZEh.js +388 -0
- package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
- package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
- package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
- package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
- package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
- package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
- package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/{audio-CScmnmEB.js → audio-DnEIHWZR.js} +3 -3
- package/dist/src/{audio-Da8U9IS5.js → audio-heR0mu0n.js} +3 -3
- package/dist/src/{audio-BvpTOArF.js → audio-wafFO1wn.js} +3 -3
- package/dist/src/{audio-C0vDeS0j.cjs → audio-x44tsxIo.cjs} +4 -4
- package/dist/src/authoritativeMarkupInjection-2G3Be6oL.cjs +74 -0
- package/dist/src/authoritativeMarkupInjection-81PPynHR.js +75 -0
- package/dist/src/authoritativeMarkupInjection-D9O70HPi.js +74 -0
- package/dist/src/authoritativeMarkupInjection-PxSf3Rh8.js +74 -0
- package/dist/src/{base-dYsl2hmL.js → base-0Gzzue9Z.js} +4 -3
- package/dist/src/{base-BOMaNEes.js → base-Bp4c52YZ.js} +4 -3
- package/dist/src/{base-BTux96b1.js → base-BuNn-YBX.js} +4 -3
- package/dist/src/{base-Tw6uhH8K.cjs → base-BzdS8tod.cjs} +5 -4
- package/dist/src/bestOfN-BHiOjeaq.js +136 -0
- package/dist/src/bestOfN-CdkNPPbX.js +136 -0
- package/dist/src/bestOfN-ClUSOhN0.js +137 -0
- package/dist/src/bestOfN-DWwXEg1h.cjs +140 -0
- package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
- package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
- package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
- package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
- package/dist/src/cache-BKYi3kAA.js +3 -0
- package/dist/src/{cache-BI5BY7ey.js → cache-BR77mdIR.js} +127 -11
- package/dist/src/{cache-Cr-qWIbP.js → cache-CrioYnaa.js} +125 -10
- package/dist/src/{cache-Bzttsk0X.js → cache-DdriHsNX.js} +125 -10
- package/dist/src/{cache-DGg-yTZG.cjs → cache-h5MWOBZI.cjs} +135 -25
- package/dist/src/{chat-Cx_LkwvZ.js → chat-BBEnnpQk.js} +11 -11
- package/dist/src/{chat-DChSH_Es.js → chat-BSos6PvZ.js} +9 -9
- package/dist/src/{chat-aMQZw6R7.js → chat-Bnt7ieO0.js} +4 -4
- package/dist/src/{chat-vYqqv1gP.cjs → chat-DSyYuTYT.cjs} +14 -13
- package/dist/src/{chat-BLOdH60v.js → chat-DTdf-J5Q.js} +11 -11
- package/dist/src/{chat-DH97tVV9.cjs → chat-Dq3DomYU.cjs} +6 -6
- package/dist/src/{chat-DG2LkwLq.js → chat-g5QLeLOo.js} +4 -4
- package/dist/src/{chat-D9nudO9b.js → chat-mTTuUAYb.js} +4 -4
- package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
- package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
- package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
- package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
- package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-BGUac_kS.js} +213 -66
- package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-BP__YGfK.js} +213 -66
- package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-Ddgx5BIQ.js} +212 -65
- package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-n2XcEclh.cjs} +219 -71
- package/dist/src/cloud-DiWbUiVP.js +3 -0
- package/dist/src/{cloud-Da0bofJd.js → cloud-DqF5N1aJ.js} +2 -2
- package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-BGamMotN.js} +4 -4
- package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-BwW8W-w7.js} +4 -4
- package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-DCRGnsyL.cjs} +6 -6
- package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-sD26nP6V.js} +4 -4
- package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-2lnOT4qM.js} +3 -3
- package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-B1-8KNCt.js} +3 -3
- package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-CCC1DFrC.cjs} +5 -5
- package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-DOghiU6r.js} +3 -3
- package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-BWeWamEb.js} +19 -4
- package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-DyQB1P0p.js} +20 -5
- package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-R9u_G7W9.cjs} +27 -11
- package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-ZQRJSJjU.js} +20 -5
- package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-B62H0fe7.js} +16 -5
- package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-BRPUvJG8.cjs} +31 -12
- package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-Dio1zJBS.js} +18 -7
- package/dist/src/codex-sdk-DxukZs_K.js +3 -0
- package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-iEmW1eS2.js} +18 -7
- package/dist/src/{cometapi-B5ImDlSm.js → cometapi--hh7dESS.js} +7 -7
- package/dist/src/{cometapi-CC7hWxmX.js → cometapi-C7yWNGqt.js} +7 -7
- package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CSIi16a0.js} +7 -7
- package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-sZgBZtbU.cjs} +8 -8
- package/dist/src/{completion-DtQ72Bm3.cjs → completion-CWtqdn3z.cjs} +6 -6
- package/dist/src/{completion-Vq_ad618.js → completion-DT8cxo9T.js} +4 -4
- package/dist/src/{completion-2iuYVxwi.js → completion-DUScduXp.js} +5 -5
- package/dist/src/{completion-CrD6MQ93.js → completion-D_2IOAoS.js} +5 -5
- package/dist/src/constants-BjJV0cRr.js +6 -0
- package/dist/src/constants-DH5XYLKZ.js +7 -0
- package/dist/src/constants-DZGEFLsu.js +6 -0
- package/dist/src/constants-a2kYssQk.cjs +11 -0
- package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
- package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
- package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
- package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
- package/dist/src/crescendo-B41TwUHM.js +722 -0
- package/dist/src/crescendo-Bfic7KC4.js +724 -0
- package/dist/src/crescendo-DXFB7rHP.cjs +725 -0
- package/dist/src/crescendo-DvvYxMLA.js +723 -0
- package/dist/src/custom-BdzuqFTN.js +619 -0
- package/dist/src/custom-Cf5Q6r-P.js +618 -0
- package/dist/src/custom-DLgufezC.js +620 -0
- package/dist/src/custom-LEXHCRe3.cjs +621 -0
- package/dist/src/{docker--3qzPa-6.js → docker-BOSO_6hK.js} +5 -5
- package/dist/src/{docker-D3AY-5F5.cjs → docker-D0h2vFrc.cjs} +6 -6
- package/dist/src/{docker-DCsCDvwM.js → docker-D2TWGyTP.js} +5 -5
- package/dist/src/{docker-Dorv4_Dg.js → docker-Vj_4_cPg.js} +5 -5
- package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BQIApR18.cjs} +6 -6
- package/dist/src/{embedding-ChS1ivFS.js → embedding-CStK0TV6.js} +5 -5
- package/dist/src/{embedding-DNRvZwRN.js → embedding-CU78FMnw.js} +5 -5
- package/dist/src/{embedding-D_bI4NDq.js → embedding-CV8lmCnU.js} +4 -4
- package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
- package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
- package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
- package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
- package/dist/src/{eval-DJ_4A-tr.js → eval-B3BaNBbO.js} +19 -19
- package/dist/src/{eval-BQPLBJbw.js → eval-DfR9885C.js} +1 -1
- package/dist/src/evalResult-BtZSUgQv.js +3 -0
- package/dist/src/{evalResult-D6P5I5il.js → evalResult-CcSqNl_Y.js} +17 -10
- package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-DHXs-9TL.cjs} +29 -11
- package/dist/src/{evalResult-pSvGWFMo.js → evalResult-PGqEbasb.js} +18 -11
- package/dist/src/evaluator-B9LGbKI8.js +3 -0
- package/dist/src/{evaluator-D-UIbbYq.js → evaluator-n_dEb00o.js} +258 -132
- package/dist/src/evaluatorHelpers-CrRObe2z.js +510 -0
- package/dist/src/evaluatorHelpers-D06I9WFL.cjs +537 -0
- package/dist/src/evaluatorHelpers-D1_kwvyp.js +511 -0
- package/dist/src/{extractor-DxyiFhPk.js → extractor-BVkZtk4R.js} +5 -5
- package/dist/src/{extractor-BM3jRERL.js → extractor-D3Fv_Tdh.js} +5 -5
- package/dist/src/{extractor-YlZbUMsL.js → extractor-Jp53vs-6.js} +5 -5
- package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-bV_NOoaz.cjs} +6 -6
- package/dist/src/{fetch-Y5qX_kST.js → fetch-BSSAcMxf.js} +90 -26
- package/dist/src/fetch-BodQTrMU.js +3 -0
- package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Cfm4EuOB.cjs} +115 -32
- package/dist/src/{fetch-B6ch2nU2.js → fetch-Cpf1U1nO.js} +86 -26
- package/dist/src/{fetch-D9xxyC1p.js → fetch-Doks14zQ.js} +90 -26
- package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
- package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
- package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
- package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
- package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
- package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
- package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
- package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
- package/dist/src/goat-Ba7Gznzq.js +467 -0
- package/dist/src/goat-CJngS-WU.js +468 -0
- package/dist/src/goat-CwsbfQeu.js +466 -0
- package/dist/src/goat-DOMbozoX.cjs +470 -0
- package/dist/src/graders-B3D7kCcD.js +3 -0
- package/dist/src/{graders-CgPn32yp.js → graders-BQt1BaQe.js} +1019 -86
- package/dist/src/{graders-Bw1wk_21.cjs → graders-CBQ2s6gz.cjs} +1087 -130
- package/dist/src/{graders-CwrbifOo.js → graders-DaRU98zs.js} +1022 -84
- package/dist/src/{graders-BoUqsCEm.js → graders-KXzjnIim.js} +1024 -86
- package/dist/src/hydra-BU6GjYoQ.js +560 -0
- package/dist/src/hydra-BmXHxlyl.js +559 -0
- package/dist/src/hydra-CR0KyYDb.js +561 -0
- package/dist/src/hydra-RO9jBks7.cjs +560 -0
- package/dist/src/{image-BeWaInPF.js → image-BBmZdKO3.js} +3 -3
- package/dist/src/{image-qjO6FWPs.js → image-B_PFG7IG.js} +3 -3
- package/dist/src/image-BtODOZjh.js +442 -0
- package/dist/src/image-CYuNJIqd.js +442 -0
- package/dist/src/{image-D10dNAav.cjs → image-E00WFZkq.cjs} +4 -4
- package/dist/src/image-HK2Cfqb1.cjs +465 -0
- package/dist/src/{image-Dr_3I3nK.js → image-LGj8dTcr.js} +3 -3
- package/dist/src/image-YMKejC0r.js +443 -0
- package/dist/src/index.cjs +711 -339
- package/dist/src/index.d.cts +3159 -1611
- package/dist/src/index.d.ts +3158 -1610
- package/dist/src/index.js +605 -249
- package/dist/src/indirectWebPwn-B80dLlFC.js +260 -0
- package/dist/src/indirectWebPwn-BMTXXznx.js +386 -0
- package/dist/src/indirectWebPwn-BZFPV7Q9.js +385 -0
- package/dist/src/indirectWebPwn-BaEQEOIO.cjs +260 -0
- package/dist/src/indirectWebPwn-BzpyMnFS.js +259 -0
- package/dist/src/indirectWebPwn-CiWB-vVH.js +385 -0
- package/dist/src/indirectWebPwn-D7NA9Nsv.cjs +397 -0
- package/dist/src/indirectWebPwn-o_bEFMjP.js +259 -0
- package/dist/src/inputVariables-B0qUChbV.js +467 -0
- package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
- package/dist/src/inputVariables-DXFdi7AI.js +468 -0
- package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
- package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
- package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
- package/dist/src/iterative-B-l0syBV.js +501 -0
- package/dist/src/iterative-BACUeCCz.cjs +503 -0
- package/dist/src/iterative-CVwoExo8.js +502 -0
- package/dist/src/iterative-QDrGSyss.js +500 -0
- package/dist/src/iterativeImage-BQg2OwA6.js +413 -0
- package/dist/src/iterativeImage-Bwn0fM75.js +413 -0
- package/dist/src/iterativeImage-CcgVyASo.cjs +415 -0
- package/dist/src/iterativeImage-D_UbQXg4.js +414 -0
- package/dist/src/iterativeMeta-CIu-CHRS.js +405 -0
- package/dist/src/iterativeMeta-CN8CNjFA.js +406 -0
- package/dist/src/iterativeMeta-DN6BTjpq.js +404 -0
- package/dist/src/iterativeMeta-DlqY3BsS.cjs +405 -0
- package/dist/src/iterativeTree-CJ8a8G9T.js +820 -0
- package/dist/src/iterativeTree-DcuFXnjL.js +819 -0
- package/dist/src/iterativeTree-FrDDYAN0.js +818 -0
- package/dist/src/iterativeTree-LOgAi0nU.cjs +821 -0
- package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-CdbcGBZF.cjs} +25 -11
- package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-CjOXp6Lr.js} +23 -9
- package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-DxAq4n4z.js} +24 -10
- package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-o_wTLzrt.js} +24 -10
- package/dist/src/{litellm-DRc4qWfc.js → litellm-B2gHwya_.js} +4 -4
- package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CPpdlO7n.cjs} +5 -5
- package/dist/src/{litellm-BLSiANhk.js → litellm-CYfgxLrM.js} +4 -4
- package/dist/src/{litellm-DQGo_juI.js → litellm-ojWBKU3C.js} +4 -4
- package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
- package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-BCCO9XXG.cjs} +9 -8
- package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BPrdihAb.js} +5 -5
- package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-DP5N79lB.js} +5 -5
- package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-DTDyoAlM.js} +5 -5
- package/dist/src/main.js +727 -234
- package/dist/src/memoryPoisoning-B6N3us35.cjs +106 -0
- package/dist/src/memoryPoisoning-Bc_BK_k2.js +106 -0
- package/dist/src/memoryPoisoning-D375zwSX.js +107 -0
- package/dist/src/memoryPoisoning-DJA0YjJT.js +106 -0
- package/dist/src/{messages-CewuNcNS.js → messages-Bg29Nbit.js} +17 -9
- package/dist/src/{messages-BnsVHUnm.cjs → messages-BrZEnHsV.cjs} +24 -15
- package/dist/src/{messages-CI69Lasb.js → messages-CBulRaud.js} +18 -10
- package/dist/src/{messages-B9dSjrNf.js → messages-K9A8RxBM.js} +18 -10
- package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
- package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
- package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
- package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
- package/dist/src/mischievousUser-BDLwSGN0.js +46 -0
- package/dist/src/mischievousUser-Blx-OtT3.js +47 -0
- package/dist/src/mischievousUser-CHuTTvBg.js +46 -0
- package/dist/src/mischievousUser-CSUrH3fq.cjs +46 -0
- package/dist/src/{modelslab-zpz9JcK0.js → modelslab-BjEk7yCP.js} +6 -6
- package/dist/src/{modelslab-BCLOtfek.js → modelslab-CBCrdIBc.js} +6 -6
- package/dist/src/{modelslab-D73OnKSx.js → modelslab-CrGpXuhv.js} +6 -6
- package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-Cvh0klQx.cjs} +7 -7
- package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-CUDDZcSA.js} +5 -5
- package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-CyDESu5J.js} +5 -5
- package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-DH_Ksu6X.cjs} +9 -8
- package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-YQAqUYJY.js} +5 -5
- package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-BVFFB7JZ.js} +2 -2
- package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-Bg3cxMMu.cjs} +5 -4
- package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-BhmnRGyh.js} +3 -3
- package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-DxmK95c1.js} +3 -3
- package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
- package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
- package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
- package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-CHQaZi_-.cjs} +17 -15
- package/dist/src/{openclaw-DA8U4DsD.js → openclaw-CWOm2eOr.js} +9 -8
- package/dist/src/{openclaw-DObVgpjC.js → openclaw-Cdk9JKfY.js} +9 -8
- package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-DhF8rUpI.js} +9 -8
- package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-BRjiPV-g.js} +38 -3
- package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-CPFMw0ed.cjs} +45 -9
- package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-CaSOCsBA.js} +39 -4
- package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-CqkyG8De.js} +39 -4
- package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
- package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
- package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
- package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
- package/dist/src/packageParser--MWTSrPW.js +36 -0
- package/dist/src/packageParser-CgE-ziRo.js +35 -0
- package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
- package/dist/src/packageParser-hwwSGnAZ.js +35 -0
- package/dist/src/processShim-BBxt7LKO.js +95 -0
- package/dist/src/processShim-BcGzU8fY.js +94 -0
- package/dist/src/processShim-C_z3aRvF.js +94 -0
- package/dist/src/processShim-DSY9BV2T.cjs +98 -0
- package/dist/src/promptLength-0qIHyhA5.js +71 -0
- package/dist/src/promptLength-4X-Wd8PG.js +72 -0
- package/dist/src/promptLength-B9nZEfO6.js +71 -0
- package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
- package/dist/src/promptfoo-BU4_0J85.js +238 -0
- package/dist/src/promptfoo-CReYAtfb.js +237 -0
- package/dist/src/promptfoo-ClChwT74.cjs +292 -0
- package/dist/src/promptfoo-CqEpj6Sr.js +237 -0
- package/dist/src/prompts-BYMtqPCw.js +259 -0
- package/dist/src/prompts-C-bqE1Yp.js +260 -0
- package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
- package/dist/src/prompts-DHhQsANy.js +259 -0
- package/dist/src/prompts-D_QpZ2Dm.js +271 -0
- package/dist/src/prompts-hNvWBD3z.cjs +284 -0
- package/dist/src/prompts-huDVH2CI.js +270 -0
- package/dist/src/prompts-p78Hul5i.cjs +289 -0
- package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
- package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
- package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
- package/dist/src/{providers-DT-GtF2t.js → providers-B7TyByfj.js} +789 -11925
- package/dist/src/{providers-CJh7iriU.js → providers-BGc7tDtQ.js} +796 -11872
- package/dist/src/{providers-Ctcc592x.js → providers-CSOp-bCm.js} +1 -1
- package/dist/src/{providers-eDShy16E.cjs → providers-V6RBuieY.cjs} +837 -12138
- package/dist/src/{providers-DRrerKra.js → providers-iH3Sw1yo.js} +851 -11938
- package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
- package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
- package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
- package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
- package/dist/src/{quiverai-CPKhWgaT.js → quiverai-58BtRRet.js} +3 -3
- package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-C1dYu5MW.js} +3 -3
- package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CNMIpZQg.js} +3 -3
- package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-fmSfU43k.cjs} +4 -4
- package/dist/src/registry-6Jw6ebor.cjs +124 -0
- package/dist/src/registry-BTGk2ZkB.js +124 -0
- package/dist/src/registry-BTNqmP5o.js +125 -0
- package/dist/src/registry-NGnOG2xa.js +124 -0
- package/dist/src/{server-gyd6d4Hc.js → remoteGeneration--D6WjzUm.js} +15 -108
- package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-BpIYlb_O.cjs} +30 -119
- package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-CJC3E0aW.js} +15 -147
- package/dist/src/remoteGeneration-D6UjE2JT.js +218 -0
- package/dist/src/render-BNTrbmBw.cjs +384 -0
- package/dist/src/render-CSP99NLm.js +348 -0
- package/dist/src/render-DFfDeYUK.js +347 -0
- package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
- package/dist/src/render-_6ur1fhE.js +347 -0
- package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
- package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
- package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
- package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
- package/dist/src/{responses-1ztiVYsx.js → responses-5Gf5HNOi.js} +11 -7
- package/dist/src/{responses-BiaBguAu.js → responses-BVi7xIdv.js} +13 -9
- package/dist/src/{responses-CF-ayauu.cjs → responses-CUARGrhY.cjs} +16 -11
- package/dist/src/{responses-B8haB-mD.js → responses-CrmWv6iz.js} +13 -9
- package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
- package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
- package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
- package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
- package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
- package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
- package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
- package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
- package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
- package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
- package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-BeVTKfrv.cjs} +65 -53
- package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-BwbV5U7_.js} +61 -50
- package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-ClAZ6Qwc.js} +61 -50
- package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-DY2qrOpq.js} +61 -50
- package/dist/src/{scanner-nOCWNIXa.js → scanner-BS-iFIp3.js} +6 -6
- package/dist/src/server/index.js +4115 -420
- package/dist/src/server-BHOEL8p8.cjs +126 -0
- package/dist/src/server-CbiJppij.js +107 -0
- package/dist/src/server-D18AAlAc.js +3 -0
- package/dist/src/server-DLYjSFm2.js +182 -0
- package/dist/src/server-JQYD_Nws.js +146 -0
- package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
- package/dist/src/shared-CRq0gGVf.js +1334 -0
- package/dist/src/shared-CSIGeGLl.js +1334 -0
- package/dist/src/shared-DNvim54U.js +1335 -0
- package/dist/src/shared-aHWko3P1.cjs +1436 -0
- package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
- package/dist/src/simulatedUser-C7sOFSF6.js +222 -0
- package/dist/src/simulatedUser-DH_7RzEQ.js +222 -0
- package/dist/src/simulatedUser-lgMMmniD.js +223 -0
- package/dist/src/simulatedUser-mnCUS9Bm.cjs +227 -0
- package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
- package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
- package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
- package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
- package/dist/src/storage-95htjpLs.js +834 -0
- package/dist/src/storage-A4WnAeN3.cjs +911 -0
- package/dist/src/storage-B2Ql_oq4.js +822 -0
- package/dist/src/storage-EKVWZBNY.js +875 -0
- package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
- package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
- package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
- package/dist/src/store-IbiRIF3k.js +3 -0
- package/dist/src/strategies-CBI1brSy.js +2333 -0
- package/dist/src/strategies-CCcnEbFO.cjs +2360 -0
- package/dist/src/strategies-CD1gHeeQ.js +2331 -0
- package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
- package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
- package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
- package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
- package/dist/src/telemetry-BUm_krVX.js +3 -0
- package/dist/src/{telemetry--iqaGyaS.js → telemetry-BbpmrenM.js} +4 -4
- package/dist/src/{telemetry-CgdVGV8N.js → telemetry-C4bX-6Sr.js} +4 -4
- package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-C_ImlCOk.cjs} +15 -9
- package/dist/src/{telemetry-DWdGHvEf.js → telemetry-DOE567Wj.js} +4 -4
- package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
- package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
- package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
- package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
- package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
- package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
- package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
- package/dist/src/toolAttributes-COVgDrBG.js +87 -0
- package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
- package/dist/src/tracingOptions-BOP1FsRt.js +220 -0
- package/dist/src/tracingOptions-BvmDa_jH.js +219 -0
- package/dist/src/tracingOptions-DFBoE8O0.cjs +249 -0
- package/dist/src/tracingOptions-xhjOg2b1.js +221 -0
- package/dist/src/{transcription-D7Q0vJsh.js → transcription-BOXDoehT.js} +4 -4
- package/dist/src/{transcription-Bm2emLmJ.js → transcription-CpVdKc4P.js} +5 -5
- package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-D8kkPXg_.cjs} +9 -8
- package/dist/src/{transcription-84t4ALo2.js → transcription-Z94eV9LR.js} +5 -5
- package/dist/src/transform-BnSTnFlp.js +187 -0
- package/dist/src/transform-BnSXWmU_2.cjs +221 -0
- package/dist/src/{transform-DtooZqYY.js → transform-BufxPIQL.js} +8 -8
- package/dist/src/transform-CGt7Kt3y2.js +186 -0
- package/dist/src/transform-CrPGTsij.js +186 -0
- package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DJkt81VY.cjs} +12 -11
- package/dist/src/{transform-B-b6Cq-q.js → transform-DtTfiGoh.js} +8 -8
- package/dist/src/{transform-_DpNB4qp.js → transform-Wp6s_5QE.js} +8 -8
- package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
- package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
- package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
- package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
- package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
- package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
- package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
- package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
- package/dist/src/{util-Dpmm_dAI.cjs → util-BHCAeuXx.cjs} +33 -7
- package/dist/src/{util-DFPeFkiV.js → util-CMrHV35u.js} +28 -8
- package/dist/src/{util-DvpHnLt0.cjs → util-COnzevXE.cjs} +29 -21
- package/dist/src/{util-BlFVL0UF.js → util-CYev3d-r.js} +22 -7
- package/dist/src/util-DGqkTb2-.js +327 -0
- package/dist/src/util-DNtUsus_.cjs +386 -0
- package/dist/src/{util-Dub0f_ej.js → util-DRfqa4xz.js} +17 -10
- package/dist/src/{util-BVXcTwXu.js → util-DTq3jq2z.js} +28 -8
- package/dist/src/{util-C-kmRosx.js → util-DeY58psG.js} +22 -7
- package/dist/src/util-DpV6KT5i.js +327 -0
- package/dist/src/{util-3pBZZb_H.js → util-DwNJzqOV.js} +45 -10
- package/dist/src/util-ETfU_sS9.js +328 -0
- package/dist/src/{util-A5_ZsQUn.cjs → util-NYQvo1C7.cjs} +25 -9
- package/dist/src/{util-DN0-b81k.js → util-X4KQgyVD.js} +28 -8
- package/dist/src/{util-BQOCAHQC.js → util-jZRrXe1P.js} +46 -11
- package/dist/src/{util-B9CNhyac.js → util-o2Qg5rZv.js} +22 -7
- package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
- package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
- package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
- package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
- package/dist/src/{version-CbuBKu2U.js → version-CHR-EFec.js} +2 -2
- package/dist/src/{version-CbpiUINz.js → version-F0YDgb7J.js} +2 -2
- package/dist/src/{version-D9zu9FWB.cjs → version-VzUqOBZk.cjs} +2 -2
- package/dist/src/{version-0frU0UTr.js → version-qVEN5qCm.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +49 -23
- package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
- package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
- package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
- package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
- package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
- package/dist/src/cache-BRkhlH3k.cjs +0 -3
- package/dist/src/cache-BlC6aeJ0.js +0 -3
- package/dist/src/cloud-CoD5OacT.js +0 -3
- package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
- package/dist/src/codex-sdk-Danroptg.cjs +0 -2
- package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
- package/dist/src/evalResult-BBK58h2B.js +0 -3
- package/dist/src/evalResult-spPqh1G_.js +0 -2
- package/dist/src/evaluator-DgLKaZk8.js +0 -3
- package/dist/src/fetch-8viavNv8.js +0 -3
- package/dist/src/graders-C84JI-m5.js +0 -2
- package/dist/src/graders-CBbd0K0Q.cjs +0 -2
- package/dist/src/graders-CbQqpHSN.js +0 -3
- package/dist/src/graders-DS42d3ZG.js +0 -2
- package/dist/src/image-BmilRNqO.js +0 -258
- package/dist/src/image-CxJoa3aW.cjs +0 -280
- package/dist/src/image-DsGRlkh7.js +0 -257
- package/dist/src/image-a_SGUobh.js +0 -257
- package/dist/src/providers-BuyzKt7C.js +0 -2
- package/dist/src/providers-C7lNVBjX.cjs +0 -3
- package/dist/src/providers-CCE2COJi2.js +0 -2
- package/dist/src/render-7uNJ2V14.js +0 -135
- package/dist/src/render-DlscvAUJ.js +0 -135
- package/dist/src/render-eui5p5mL.js +0 -136
- package/dist/src/render-tG6ir9_g.cjs +0 -165
- package/dist/src/rubyUtils-4hjGxvju.js +0 -3
- package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
- package/dist/src/server-ByiF3qlg.js +0 -386
- package/dist/src/server-C0XKRNB_.cjs +0 -2
- package/dist/src/server-C_15p79-.js +0 -3
- package/dist/src/store-2OXm_eBY.js +0 -240
- package/dist/src/store-BELqNwvz.js +0 -3
- package/dist/src/store-uQZ4AjPe.cjs +0 -2
- package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
- package/dist/src/telemetry-ZdPZc0fm.js +0 -3
- package/dist/src/transform-BQt0BeAW.js +0 -3
- package/dist/src/transform-Bq5oqC0s.cjs +0 -2
- package/dist/src/transform-C9izGX54.cjs +0 -228
- package/dist/src/transform-CwbAZ84V.js +0 -216
- package/dist/src/transform-DzCF-wqV.js +0 -213
- package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/index.cjs
CHANGED
|
@@ -2,78 +2,93 @@ Object.defineProperties(exports, {
|
|
|
2
2
|
__esModule: { value: true },
|
|
3
3
|
[Symbol.toStringTag]: { value: "Module" }
|
|
4
4
|
});
|
|
5
|
-
const
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const
|
|
13
|
-
const
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
-
const
|
|
24
|
-
const
|
|
25
|
-
const
|
|
26
|
-
const
|
|
27
|
-
const
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
const
|
|
31
|
-
const
|
|
32
|
-
const
|
|
33
|
-
const
|
|
34
|
-
const
|
|
35
|
-
const
|
|
5
|
+
const require_rolldown_runtime = require("./rolldown-runtime-D_mwlA32.cjs");
|
|
6
|
+
const require_logger = require("./logger-cfNpzI4o.cjs");
|
|
7
|
+
const require_invariant = require("./invariant-QtnLD03y.cjs");
|
|
8
|
+
const require_types = require("./types-CxJvaY2S.cjs");
|
|
9
|
+
const require_fetch = require("./fetch-Cfm4EuOB.cjs");
|
|
10
|
+
const require_version = require("./version-VzUqOBZk.cjs");
|
|
11
|
+
const require_fileExtensions = require("./fileExtensions-BhdwzYaD.cjs");
|
|
12
|
+
const require_accounts = require("./accounts-BVz5gHLK.cjs");
|
|
13
|
+
const require_esm = require("./esm-BIKakvNa.cjs");
|
|
14
|
+
const require_render = require("./render-BNTrbmBw.cjs");
|
|
15
|
+
const require_providerRegistry = require("./providerRegistry-CZO_w7ue.cjs");
|
|
16
|
+
const require_remoteGeneration = require("./remoteGeneration-BpIYlb_O.cjs");
|
|
17
|
+
const require_server = require("./server-BHOEL8p8.cjs");
|
|
18
|
+
const require_storage = require("./storage-A4WnAeN3.cjs");
|
|
19
|
+
const require_pythonUtils = require("./pythonUtils-Cokhluq3.cjs");
|
|
20
|
+
const require_util = require("./util-COnzevXE.cjs");
|
|
21
|
+
const require_createHash = require("./createHash-CSiqnK5P.cjs");
|
|
22
|
+
const require_cache = require("./cache-h5MWOBZI.cjs");
|
|
23
|
+
const require_chat = require("./chat-DSyYuTYT.cjs");
|
|
24
|
+
const require_transform = require("./transform-DJkt81VY.cjs");
|
|
25
|
+
const require_util$1 = require("./util-NYQvo1C7.cjs");
|
|
26
|
+
const require_providers = require("./providers-V6RBuieY.cjs");
|
|
27
|
+
const require_tokenUsageUtils = require("./tokenUsageUtils-_B-P8IAi.cjs");
|
|
28
|
+
const require_text = require("./text-nywWsRBM.cjs");
|
|
29
|
+
const require_telemetry = require("./telemetry-C_ImlCOk.cjs");
|
|
30
|
+
const require_tables = require("./tables-DBIJU0WE.cjs");
|
|
31
|
+
const require_blobs = require("./blobs-B0977K1O.cjs");
|
|
32
|
+
const require_processShim = require("./processShim-DSY9BV2T.cjs");
|
|
33
|
+
const require_packageParser = require("./packageParser-QoCS1FMl.cjs");
|
|
34
|
+
const require_rubyUtils = require("./rubyUtils-CqUWBZAt.cjs");
|
|
35
|
+
const require_inputVariables = require("./inputVariables-Dq9W-Z3a.cjs");
|
|
36
|
+
const require_util$2 = require("./util-DNtUsus_.cjs");
|
|
37
|
+
const require_promptfoo = require("./promptfoo-ClChwT74.cjs");
|
|
38
|
+
const require_graders = require("./graders-CBQ2s6gz.cjs");
|
|
39
|
+
const require_shared = require("./shared-aHWko3P1.cjs");
|
|
40
|
+
const require_utils = require("./utils-Ve6kuJsa.cjs");
|
|
41
|
+
const require_transform$1 = require("./transform-BnSXWmU_2.cjs");
|
|
42
|
+
const require_store = require("./store-B2NDDooM.cjs");
|
|
43
|
+
const require_extractor = require("./extractor-bV_NOoaz.cjs");
|
|
44
|
+
const require_promptLength = require("./promptLength-BbBbDHNj.cjs");
|
|
45
|
+
const require_indirectWebPwn = require("./indirectWebPwn-D7NA9Nsv.cjs");
|
|
46
|
+
const require_toolAttributes = require("./toolAttributes-BAjwcBf0.cjs");
|
|
47
|
+
const require_evaluatorHelpers = require("./evaluatorHelpers-D06I9WFL.cjs");
|
|
48
|
+
const require_evalResult = require("./evalResult-DHXs-9TL.cjs");
|
|
49
|
+
const require_strategies = require("./strategies-CCcnEbFO.cjs");
|
|
36
50
|
let fs = require("fs");
|
|
37
|
-
fs =
|
|
51
|
+
fs = require_rolldown_runtime.__toESM(fs, 1);
|
|
38
52
|
let path = require("path");
|
|
39
|
-
path =
|
|
53
|
+
path = require_rolldown_runtime.__toESM(path, 1);
|
|
40
54
|
let async = require("async");
|
|
41
|
-
async =
|
|
55
|
+
async = require_rolldown_runtime.__toESM(async, 1);
|
|
42
56
|
let js_yaml = require("js-yaml");
|
|
43
|
-
js_yaml =
|
|
57
|
+
js_yaml = require_rolldown_runtime.__toESM(js_yaml, 1);
|
|
44
58
|
let node_async_hooks = require("node:async_hooks");
|
|
45
59
|
let chalk = require("chalk");
|
|
46
|
-
chalk =
|
|
60
|
+
chalk = require_rolldown_runtime.__toESM(chalk, 1);
|
|
47
61
|
let os = require("os");
|
|
48
|
-
os =
|
|
62
|
+
os = require_rolldown_runtime.__toESM(os, 1);
|
|
49
63
|
let dedent = require("dedent");
|
|
50
|
-
dedent =
|
|
64
|
+
dedent = require_rolldown_runtime.__toESM(dedent, 1);
|
|
51
65
|
let zod = require("zod");
|
|
52
66
|
let fs_promises = require("fs/promises");
|
|
53
|
-
fs_promises =
|
|
67
|
+
fs_promises = require_rolldown_runtime.__toESM(fs_promises, 1);
|
|
54
68
|
let util = require("util");
|
|
55
|
-
util =
|
|
69
|
+
util = require_rolldown_runtime.__toESM(util, 1);
|
|
56
70
|
let _inquirer_input = require("@inquirer/input");
|
|
57
|
-
_inquirer_input =
|
|
71
|
+
_inquirer_input = require_rolldown_runtime.__toESM(_inquirer_input, 1);
|
|
58
72
|
require("node:path");
|
|
59
73
|
require("node:url");
|
|
60
74
|
let crypto$1 = require("crypto");
|
|
61
|
-
crypto$1 =
|
|
75
|
+
crypto$1 = require_rolldown_runtime.__toESM(crypto$1, 1);
|
|
62
76
|
let _opentelemetry_api = require("@opentelemetry/api");
|
|
63
77
|
let readline = require("readline");
|
|
64
|
-
readline =
|
|
78
|
+
readline = require_rolldown_runtime.__toESM(readline, 1);
|
|
65
79
|
let csv_parse_sync = require("csv-parse/sync");
|
|
66
80
|
let glob = require("glob");
|
|
67
|
-
let drizzle_orm = require("drizzle-orm");
|
|
68
81
|
let fast_xml_parser = require("fast-xml-parser");
|
|
69
|
-
let
|
|
70
|
-
cli_progress = require_logger.__toESM(cli_progress);
|
|
82
|
+
let drizzle_orm = require("drizzle-orm");
|
|
71
83
|
let url = require("url");
|
|
72
84
|
let parse5 = require("parse5");
|
|
73
85
|
let fastest_levenshtein = require("fastest-levenshtein");
|
|
86
|
+
let cli_progress = require("cli-progress");
|
|
87
|
+
cli_progress = require_rolldown_runtime.__toESM(cli_progress, 1);
|
|
74
88
|
let js_rouge = require("js-rouge");
|
|
75
|
-
js_rouge =
|
|
89
|
+
js_rouge = require_rolldown_runtime.__toESM(js_rouge, 1);
|
|
76
90
|
let node_util = require("node:util");
|
|
91
|
+
let lru_cache = require("lru-cache");
|
|
77
92
|
require("debounce");
|
|
78
93
|
let _opentelemetry_core = require("@opentelemetry/core");
|
|
79
94
|
let _opentelemetry_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
|
|
@@ -82,15 +97,15 @@ let _opentelemetry_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
|
|
|
82
97
|
let _opentelemetry_semantic_conventions = require("@opentelemetry/semantic-conventions");
|
|
83
98
|
let drizzle_orm_better_sqlite3_migrator = require("drizzle-orm/better-sqlite3/migrator");
|
|
84
99
|
let process$1 = require("process");
|
|
85
|
-
process$1 =
|
|
100
|
+
process$1 = require_rolldown_runtime.__toESM(process$1, 1);
|
|
86
101
|
let _apidevtools_json_schema_ref_parser = require("@apidevtools/json-schema-ref-parser");
|
|
87
|
-
_apidevtools_json_schema_ref_parser =
|
|
102
|
+
_apidevtools_json_schema_ref_parser = require_rolldown_runtime.__toESM(_apidevtools_json_schema_ref_parser, 1);
|
|
88
103
|
let cli_table3 = require("cli-table3");
|
|
89
|
-
cli_table3 =
|
|
104
|
+
cli_table3 = require_rolldown_runtime.__toESM(cli_table3, 1);
|
|
90
105
|
let chokidar = require("chokidar");
|
|
91
|
-
chokidar =
|
|
106
|
+
chokidar = require_rolldown_runtime.__toESM(chokidar, 1);
|
|
92
107
|
let ora = require("ora");
|
|
93
|
-
ora =
|
|
108
|
+
ora = require_rolldown_runtime.__toESM(ora, 1);
|
|
94
109
|
require("@inquirer/confirm");
|
|
95
110
|
//#region src/external/matchers/conversationRelevancyTemplate.ts
|
|
96
111
|
var ConversationRelevancyTemplate = class {
|
|
@@ -449,17 +464,20 @@ async function resolveContext(assertion, test, output, prompt, fallbackContext,
|
|
|
449
464
|
contextValue = test.vars.context;
|
|
450
465
|
}
|
|
451
466
|
} else if (fallbackContext) contextValue = fallbackContext;
|
|
452
|
-
if (assertion.contextTransform)
|
|
453
|
-
const
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
467
|
+
if (assertion.contextTransform) {
|
|
468
|
+
const getLabel = () => require_transform$1.getTransformLabel(assertion.contextTransform);
|
|
469
|
+
try {
|
|
470
|
+
const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
|
|
471
|
+
const transformed = await require_transform$1.transform(assertion.contextTransform, outputForTransform, {
|
|
472
|
+
vars: test.vars,
|
|
473
|
+
prompt: { label: prompt },
|
|
474
|
+
...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
|
|
475
|
+
});
|
|
476
|
+
require_invariant.invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
|
|
477
|
+
contextValue = transformed;
|
|
478
|
+
} catch (error) {
|
|
479
|
+
throw new Error(`Failed to transform context using expression '${getLabel()}': ${require_transform$1.getTransformErrorMessage(error)}`);
|
|
480
|
+
}
|
|
463
481
|
}
|
|
464
482
|
require_invariant.invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
|
|
465
483
|
return contextValue;
|
|
@@ -740,7 +758,7 @@ async function calculateProviderSimilarity(finalProvider, expected, output, metr
|
|
|
740
758
|
return calculateSimilarityScore(expectedEmbedding.embedding, outputEmbedding.embedding, metric, tokensUsed);
|
|
741
759
|
}
|
|
742
760
|
async function matchesSimilarity(expected, output, threshold, inverse = false, grading, metric = "cosine") {
|
|
743
|
-
if (metric === "cosine" && require_logger.state.config?.redteam &&
|
|
761
|
+
if (metric === "cosine" && require_logger.state.config?.redteam && require_remoteGeneration.shouldGenerateRemote({ requireEmbeddingProvider: true })) try {
|
|
744
762
|
return await require_graders.doRemoteGrading({
|
|
745
763
|
task: "similar",
|
|
746
764
|
expected,
|
|
@@ -802,7 +820,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
802
820
|
require_telemetry.telemetry.record("feature_used", { feature: "tracing" });
|
|
803
821
|
try {
|
|
804
822
|
require_logger.logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
805
|
-
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
823
|
+
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-CvJdBGSc.cjs"));
|
|
806
824
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
807
825
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
808
826
|
const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
|
|
@@ -826,7 +844,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
826
844
|
async function stopOtlpReceiverIfNeeded() {
|
|
827
845
|
if (otlpReceiverStarted) try {
|
|
828
846
|
require_logger.logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
829
|
-
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
847
|
+
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-CvJdBGSc.cjs"));
|
|
830
848
|
await stopOTLPReceiver();
|
|
831
849
|
otlpReceiverStarted = false;
|
|
832
850
|
require_logger.logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -861,7 +879,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
861
879
|
}
|
|
862
880
|
if (!tracingEnabled) return null;
|
|
863
881
|
require_logger.logger.debug("[EvaluatorTracing] Importing trace store");
|
|
864
|
-
const { getTraceStore } = await Promise.resolve().then(() => require("./store-
|
|
882
|
+
const { getTraceStore } = await Promise.resolve().then(() => require("./store-B2NDDooM.cjs")).then((n) => n.store_exports);
|
|
865
883
|
const traceStore = getTraceStore();
|
|
866
884
|
const traceId = generateTraceId();
|
|
867
885
|
const spanId = generateSpanId();
|
|
@@ -1493,27 +1511,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
|
|
|
1493
1511
|
};
|
|
1494
1512
|
//#endregion
|
|
1495
1513
|
//#region src/assertions/geval.ts
|
|
1496
|
-
const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1497
|
-
require_invariant.invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
|
|
1514
|
+
const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1515
|
+
require_invariant.invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
|
|
1498
1516
|
const threshold = assertion.threshold ?? .7;
|
|
1499
1517
|
if (Array.isArray(renderedValue)) {
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1518
|
+
if (renderedValue.length === 0) return {
|
|
1519
|
+
assertion,
|
|
1520
|
+
pass: false,
|
|
1521
|
+
score: 0,
|
|
1522
|
+
reason: "G-Eval assertion requires at least one criterion string in the value array."
|
|
1523
|
+
};
|
|
1524
|
+
const responses = [];
|
|
1525
|
+
let failure;
|
|
1526
|
+
for (const [index, value] of renderedValue.entries()) {
|
|
1503
1527
|
const resp = await require_graders.matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1504
|
-
|
|
1505
|
-
|
|
1528
|
+
responses.push(resp);
|
|
1529
|
+
if (require_graders.isGraderFailure(resp)) {
|
|
1530
|
+
failure = {
|
|
1531
|
+
index,
|
|
1532
|
+
resp
|
|
1533
|
+
};
|
|
1534
|
+
break;
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
const tokensUsed = require_tokenUsageUtils.createEmptyTokenUsage();
|
|
1538
|
+
for (const r of responses) require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, r.tokensUsed);
|
|
1539
|
+
if (failure) {
|
|
1540
|
+
const criterion = renderedValue[failure.index];
|
|
1541
|
+
return {
|
|
1542
|
+
assertion,
|
|
1543
|
+
pass: false,
|
|
1544
|
+
score: 0,
|
|
1545
|
+
reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
|
|
1546
|
+
tokensUsed,
|
|
1547
|
+
metadata: failure.resp.metadata
|
|
1548
|
+
};
|
|
1506
1549
|
}
|
|
1507
|
-
const
|
|
1550
|
+
const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
|
|
1551
|
+
const combinedReason = responses.map((r) => r.reason).join("\n\n");
|
|
1508
1552
|
return {
|
|
1509
1553
|
assertion,
|
|
1510
|
-
pass:
|
|
1511
|
-
score:
|
|
1512
|
-
reason:
|
|
1554
|
+
pass: averageScore >= threshold !== inverse,
|
|
1555
|
+
score: inverse ? 1 - averageScore : averageScore,
|
|
1556
|
+
reason: combinedReason,
|
|
1557
|
+
tokensUsed
|
|
1513
1558
|
};
|
|
1514
|
-
}
|
|
1559
|
+
}
|
|
1560
|
+
const resp = await require_graders.matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1561
|
+
if (require_graders.isGraderFailure(resp)) return {
|
|
1562
|
+
assertion,
|
|
1563
|
+
pass: false,
|
|
1564
|
+
score: 0,
|
|
1565
|
+
reason: resp.reason,
|
|
1566
|
+
tokensUsed: resp.tokensUsed,
|
|
1567
|
+
metadata: resp.metadata
|
|
1568
|
+
};
|
|
1569
|
+
const passed = resp.score >= threshold !== inverse;
|
|
1570
|
+
return {
|
|
1515
1571
|
assertion,
|
|
1516
|
-
...
|
|
1572
|
+
...resp,
|
|
1573
|
+
pass: passed,
|
|
1574
|
+
score: inverse ? 1 - resp.score : resp.score
|
|
1517
1575
|
};
|
|
1518
1576
|
};
|
|
1519
1577
|
//#endregion
|
|
@@ -2024,7 +2082,7 @@ const handleJavascript = async ({ assertion, renderedValue, valueFromScript, ass
|
|
|
2024
2082
|
let result;
|
|
2025
2083
|
if (typeof valueFromScript === "undefined") {
|
|
2026
2084
|
const functionBody = renderedValue.includes("\n") ? renderedValue : buildFunctionBody(renderedValue);
|
|
2027
|
-
result = await validateResult(new Function("output", "context", "process", functionBody)(output, assertionValueContext,
|
|
2085
|
+
result = await validateResult(new Function("output", "context", "process", functionBody)(output, assertionValueContext, require_processShim.getProcessShim()));
|
|
2028
2086
|
} else {
|
|
2029
2087
|
require_invariant.invariant(typeof valueFromScript === "boolean" || typeof valueFromScript === "number" || typeof valueFromScript === "object", `Javascript assertion script must return a boolean, number, or object (${assertion.value})`);
|
|
2030
2088
|
result = await validateResult(valueFromScript);
|
|
@@ -2516,45 +2574,6 @@ function matchesPattern(spanName, pattern) {
|
|
|
2516
2574
|
}
|
|
2517
2575
|
//#endregion
|
|
2518
2576
|
//#region src/assertions/trajectoryUtils.ts
|
|
2519
|
-
const TOOL_ATTRIBUTE_KEYS = [
|
|
2520
|
-
"tool.name",
|
|
2521
|
-
"tool_name",
|
|
2522
|
-
"tool",
|
|
2523
|
-
"function.name",
|
|
2524
|
-
"function_name",
|
|
2525
|
-
"gen_ai.tool.name",
|
|
2526
|
-
"codex.mcp.tool",
|
|
2527
|
-
"agent.tool",
|
|
2528
|
-
"agent.tool_name",
|
|
2529
|
-
"agent.toolName"
|
|
2530
|
-
];
|
|
2531
|
-
const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
|
|
2532
|
-
"tool.arguments",
|
|
2533
|
-
"tool.args",
|
|
2534
|
-
"tool.input",
|
|
2535
|
-
"tool_arguments",
|
|
2536
|
-
"tool_args",
|
|
2537
|
-
"tool_input",
|
|
2538
|
-
"function.arguments",
|
|
2539
|
-
"function.args",
|
|
2540
|
-
"function.input",
|
|
2541
|
-
"function_arguments",
|
|
2542
|
-
"function_args",
|
|
2543
|
-
"gen_ai.tool.arguments",
|
|
2544
|
-
"gen_ai.tool.args",
|
|
2545
|
-
"gen_ai.tool.input",
|
|
2546
|
-
"gen_ai.tool.call.arguments",
|
|
2547
|
-
"gen_ai.tool.call.args",
|
|
2548
|
-
"agent.tool.arguments",
|
|
2549
|
-
"agent.tool.args",
|
|
2550
|
-
"agent.tool.input",
|
|
2551
|
-
"codex.mcp.arguments",
|
|
2552
|
-
"codex.mcp.args",
|
|
2553
|
-
"codex.mcp.input",
|
|
2554
|
-
"arguments",
|
|
2555
|
-
"args",
|
|
2556
|
-
"input"
|
|
2557
|
-
];
|
|
2558
2577
|
const COMMAND_ATTRIBUTE_KEYS = [
|
|
2559
2578
|
"codex.command",
|
|
2560
2579
|
"command",
|
|
@@ -2567,16 +2586,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
|
|
|
2567
2586
|
"search_query"
|
|
2568
2587
|
];
|
|
2569
2588
|
const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
|
|
2589
|
+
const COMMAND_TOOL_NAMES = new Set([
|
|
2590
|
+
"exec_command",
|
|
2591
|
+
"local_shell",
|
|
2592
|
+
"shell"
|
|
2593
|
+
]);
|
|
2570
2594
|
const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
|
|
2571
2595
|
const MAX_JUDGE_SUMMARY_STEPS = 24;
|
|
2572
2596
|
const JUDGE_SUMMARY_HEAD_STEPS = 12;
|
|
2573
2597
|
const JUDGE_SUMMARY_TAIL_STEPS = 12;
|
|
2574
|
-
function getStringAttribute(attributes, keys) {
|
|
2575
|
-
for (const key of keys) {
|
|
2576
|
-
const value = attributes[key];
|
|
2577
|
-
if (typeof value === "string" && value.trim()) return value.trim();
|
|
2578
|
-
}
|
|
2579
|
-
}
|
|
2580
2598
|
function normalizeStructuredAttribute(value) {
|
|
2581
2599
|
if (value === void 0 || value === null) return;
|
|
2582
2600
|
if (typeof value === "string") {
|
|
@@ -2608,9 +2626,12 @@ function getTrajectoryStepStatus(step) {
|
|
|
2608
2626
|
function getCommandExecutable(command) {
|
|
2609
2627
|
return command.trim().split(/\s+/)[0] || void 0;
|
|
2610
2628
|
}
|
|
2629
|
+
function isCommandToolName(toolName) {
|
|
2630
|
+
return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
|
|
2631
|
+
}
|
|
2611
2632
|
function extractToolName(span) {
|
|
2612
2633
|
const attributes = span.attributes || {};
|
|
2613
|
-
const directMatch =
|
|
2634
|
+
const directMatch = require_toolAttributes.getToolNameFromAttributes(attributes);
|
|
2614
2635
|
if (directMatch) return directMatch;
|
|
2615
2636
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2616
2637
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
@@ -2624,7 +2645,7 @@ function extractToolName(span) {
|
|
|
2624
2645
|
}
|
|
2625
2646
|
function extractToolArgs(span) {
|
|
2626
2647
|
const attributes = span.attributes || {};
|
|
2627
|
-
for (const key of TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
|
|
2648
|
+
for (const key of require_toolAttributes.TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
|
|
2628
2649
|
const value = normalizeStructuredAttribute(attributes[key]);
|
|
2629
2650
|
if (value !== void 0) return value;
|
|
2630
2651
|
}
|
|
@@ -2635,21 +2656,31 @@ function extractToolArgs(span) {
|
|
|
2635
2656
|
if (value !== void 0) return value;
|
|
2636
2657
|
}
|
|
2637
2658
|
}
|
|
2638
|
-
function extractCommand(span) {
|
|
2659
|
+
function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
|
|
2639
2660
|
const attributes = span.attributes || {};
|
|
2640
|
-
const directMatch =
|
|
2661
|
+
const directMatch = require_toolAttributes.getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
|
|
2641
2662
|
if (directMatch) return directMatch;
|
|
2642
2663
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2643
2664
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
2644
2665
|
if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
|
|
2645
2666
|
}
|
|
2667
|
+
const toolArgs = getToolArgs();
|
|
2668
|
+
if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
|
|
2669
|
+
const args = toolArgs;
|
|
2670
|
+
const command = args.cmd ?? args.command;
|
|
2671
|
+
if (typeof command === "string" && command.trim()) return command.trim();
|
|
2672
|
+
if (Array.isArray(command)) {
|
|
2673
|
+
const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
|
|
2674
|
+
if (joined) return joined;
|
|
2675
|
+
}
|
|
2676
|
+
}
|
|
2646
2677
|
if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
|
|
2647
2678
|
}
|
|
2648
2679
|
function extractSearchQuery(span) {
|
|
2649
2680
|
const attributes = span.attributes || {};
|
|
2650
|
-
const directMatch =
|
|
2681
|
+
const directMatch = require_toolAttributes.getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
|
|
2651
2682
|
if (directMatch) return directMatch;
|
|
2652
|
-
const genericQuery =
|
|
2683
|
+
const genericQuery = require_toolAttributes.getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
|
|
2653
2684
|
if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
|
|
2654
2685
|
if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
|
|
2655
2686
|
}
|
|
@@ -2673,17 +2704,34 @@ function extractTrajectorySteps(trace) {
|
|
|
2673
2704
|
return left.index - right.index;
|
|
2674
2705
|
}).map(({ span }) => {
|
|
2675
2706
|
const toolName = extractToolName(span);
|
|
2676
|
-
|
|
2707
|
+
let toolArgs;
|
|
2708
|
+
let hasExtractedToolArgs = false;
|
|
2709
|
+
const getToolArgs = () => {
|
|
2710
|
+
if (!hasExtractedToolArgs) {
|
|
2711
|
+
toolArgs = extractToolArgs(span);
|
|
2712
|
+
hasExtractedToolArgs = true;
|
|
2713
|
+
}
|
|
2714
|
+
return toolArgs;
|
|
2715
|
+
};
|
|
2716
|
+
const command = extractCommand(span, toolName, getToolArgs);
|
|
2677
2717
|
const searchQuery = extractSearchQuery(span);
|
|
2678
2718
|
let type = "span";
|
|
2679
2719
|
let name = span.name;
|
|
2680
2720
|
const aliases = new Set([span.name]);
|
|
2681
2721
|
let args;
|
|
2682
|
-
if (toolName) {
|
|
2722
|
+
if (command && isCommandToolName(toolName)) {
|
|
2723
|
+
type = "command";
|
|
2724
|
+
name = command;
|
|
2725
|
+
aliases.add(command);
|
|
2726
|
+
args = getToolArgs();
|
|
2727
|
+
if (toolName) aliases.add(toolName);
|
|
2728
|
+
const executable = getCommandExecutable(command);
|
|
2729
|
+
if (executable) aliases.add(executable);
|
|
2730
|
+
} else if (toolName) {
|
|
2683
2731
|
type = "tool";
|
|
2684
2732
|
name = toolName;
|
|
2685
2733
|
aliases.add(toolName);
|
|
2686
|
-
args =
|
|
2734
|
+
args = getToolArgs();
|
|
2687
2735
|
} else if (command) {
|
|
2688
2736
|
type = "command";
|
|
2689
2737
|
name = command;
|
|
@@ -2859,7 +2907,7 @@ const handleRedteam = async ({ assertion, baseType, test, prompt, outputString,
|
|
|
2859
2907
|
if (match) evalId = match[1];
|
|
2860
2908
|
}
|
|
2861
2909
|
}
|
|
2862
|
-
const tracking = await
|
|
2910
|
+
const tracking = await require_indirectWebPwn.checkExfilTracking(webPageUuid, evalId);
|
|
2863
2911
|
if (tracking) gradingContext = {
|
|
2864
2912
|
...gradingContext,
|
|
2865
2913
|
wasExfiltrated: tracking.wasExfiltrated,
|
|
@@ -2925,7 +2973,7 @@ function handleIsRefusal(params) {
|
|
|
2925
2973
|
assertion
|
|
2926
2974
|
};
|
|
2927
2975
|
}
|
|
2928
|
-
const pass =
|
|
2976
|
+
const pass = require_util$2.isBasicRefusal(output) !== inverse;
|
|
2929
2977
|
return {
|
|
2930
2978
|
pass,
|
|
2931
2979
|
score: pass ? 1 : 0,
|
|
@@ -3870,13 +3918,13 @@ function resolveSequenceValue(value) {
|
|
|
3870
3918
|
}
|
|
3871
3919
|
throw new Error("trajectory:tool-sequence assertion must have an array or object value");
|
|
3872
3920
|
}
|
|
3873
|
-
function isRecord(value) {
|
|
3921
|
+
function isRecord$1(value) {
|
|
3874
3922
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
3875
3923
|
}
|
|
3876
3924
|
function matchesExpectedArgsPartial(actual, expected) {
|
|
3877
3925
|
if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
|
|
3878
|
-
if (isRecord(expected)) {
|
|
3879
|
-
if (!isRecord(actual)) return false;
|
|
3926
|
+
if (isRecord$1(expected)) {
|
|
3927
|
+
if (!isRecord$1(actual)) return false;
|
|
3880
3928
|
return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
|
|
3881
3929
|
}
|
|
3882
3930
|
return (0, node_util.isDeepStrictEqual)(actual, expected);
|
|
@@ -4233,7 +4281,7 @@ function assertionMayNeedTraceContext(assertion) {
|
|
|
4233
4281
|
if (assertionUsesTrace(assertion)) return true;
|
|
4234
4282
|
if (assertion.type === "assert-set") return assertion.assert.some(assertionMayNeedTraceContext);
|
|
4235
4283
|
if (assertion.type.startsWith("promptfoo:redteam:coding-agent:")) return true;
|
|
4236
|
-
return typeof assertion.value === "string" ? assertion.value.startsWith("file://") ||
|
|
4284
|
+
return typeof assertion.value === "string" ? assertion.value.startsWith("file://") || require_packageParser.isPackagePath(assertion.value) : false;
|
|
4237
4285
|
}
|
|
4238
4286
|
function hasTraceAwareAssertions(assertions) {
|
|
4239
4287
|
return Boolean(assertions?.some(assertionMayNeedTraceContext));
|
|
@@ -4247,7 +4295,7 @@ async function loadTraceData(traceId) {
|
|
|
4247
4295
|
let stableObservations = 0;
|
|
4248
4296
|
let latestTrace = null;
|
|
4249
4297
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
4250
|
-
latestTrace = await traceStore.getTrace(traceId);
|
|
4298
|
+
latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
|
|
4251
4299
|
const spanCount = latestTrace?.spans?.length ?? 0;
|
|
4252
4300
|
if (spanCount > 0) {
|
|
4253
4301
|
stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
|
|
@@ -4300,7 +4348,7 @@ const ASSERTION_HANDLERS = {
|
|
|
4300
4348
|
"llm-rubric": handleLlmRubric,
|
|
4301
4349
|
meteor: async (params) => {
|
|
4302
4350
|
try {
|
|
4303
|
-
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-
|
|
4351
|
+
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-CR226f7Z.cjs"));
|
|
4304
4352
|
return handleMeteorAssertion(params);
|
|
4305
4353
|
} catch (error) {
|
|
4306
4354
|
if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
|
|
@@ -4436,7 +4484,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
4436
4484
|
};
|
|
4437
4485
|
}
|
|
4438
4486
|
else if (filePath.endsWith(".rb")) try {
|
|
4439
|
-
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-
|
|
4487
|
+
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-CqUWBZAt.cjs")).then((n) => n.rubyUtils_exports);
|
|
4440
4488
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
4441
4489
|
require_logger.logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
4442
4490
|
} catch (error) {
|
|
@@ -4448,9 +4496,9 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
4448
4496
|
};
|
|
4449
4497
|
}
|
|
4450
4498
|
else renderedValue = require_graders.processFileReference(renderedValue);
|
|
4451
|
-
} else if (
|
|
4499
|
+
} else if (require_packageParser.isPackagePath(renderedValue)) {
|
|
4452
4500
|
const basePath = require_logger.state.basePath || "";
|
|
4453
|
-
const requiredModule = await
|
|
4501
|
+
const requiredModule = await require_packageParser.loadFromPackage(renderedValue, basePath);
|
|
4454
4502
|
if (typeof requiredModule !== "function") throw new Error(`Assertion malformed: ${renderedValue} must be a function. Received: ${typeof requiredModule}`);
|
|
4455
4503
|
valueFromScript = await Promise.resolve(requiredModule(output, context));
|
|
4456
4504
|
} else renderedValue = nunjucks.renderString(renderedValue, resolvedVars);
|
|
@@ -4553,7 +4601,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
|
|
|
4553
4601
|
require_logger.logger.debug(`Failed to preload trace data for assertions: ${error}`);
|
|
4554
4602
|
preloadedTraceData = null;
|
|
4555
4603
|
}
|
|
4556
|
-
|
|
4604
|
+
const concurrency = require_graders.getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
|
|
4605
|
+
await async.default.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
|
|
4557
4606
|
if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
|
|
4558
4607
|
const result = await runAssertion({
|
|
4559
4608
|
prompt,
|
|
@@ -4680,7 +4729,8 @@ var CIProgressReporter = class {
|
|
|
4680
4729
|
}
|
|
4681
4730
|
updateTotalTests(newTotal) {
|
|
4682
4731
|
this.totalTests = Math.max(newTotal, 1);
|
|
4683
|
-
|
|
4732
|
+
const percentage = Math.floor(this.completedTests / this.totalTests * 100);
|
|
4733
|
+
this.highestPercentageSeen = percentage;
|
|
4684
4734
|
}
|
|
4685
4735
|
finish() {
|
|
4686
4736
|
if (this.intervalId) {
|
|
@@ -4853,6 +4903,10 @@ function getDefaultOtelConfig() {
|
|
|
4853
4903
|
}
|
|
4854
4904
|
//#endregion
|
|
4855
4905
|
//#region src/tracing/localSpanExporter.ts
|
|
4906
|
+
const MISSING_TRACE_RETRY_DELAY_MS = 50;
|
|
4907
|
+
function delay(ms) {
|
|
4908
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
4909
|
+
}
|
|
4856
4910
|
/**
|
|
4857
4911
|
* A span exporter that writes spans to the local TraceStore (SQLite).
|
|
4858
4912
|
* This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
|
|
@@ -4894,7 +4948,7 @@ var LocalSpanExporter = class {
|
|
|
4894
4948
|
}
|
|
4895
4949
|
let firstError;
|
|
4896
4950
|
for (const [traceId, spanDataList] of spansByTrace) try {
|
|
4897
|
-
const result = await
|
|
4951
|
+
const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
|
|
4898
4952
|
if (result.stored) require_logger.logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
|
|
4899
4953
|
else require_logger.logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
|
|
4900
4954
|
} catch (error) {
|
|
@@ -4906,6 +4960,16 @@ var LocalSpanExporter = class {
|
|
|
4906
4960
|
}
|
|
4907
4961
|
return firstError;
|
|
4908
4962
|
}
|
|
4963
|
+
async addSpansWithTraceRetry(traceStore, traceId, spans) {
|
|
4964
|
+
const options = {
|
|
4965
|
+
skipTraceCheck: false,
|
|
4966
|
+
warnIfMissingTrace: false
|
|
4967
|
+
};
|
|
4968
|
+
const result = await traceStore.addSpans(traceId, spans, options);
|
|
4969
|
+
if (result.stored) return result;
|
|
4970
|
+
await delay(MISSING_TRACE_RETRY_DELAY_MS);
|
|
4971
|
+
return traceStore.addSpans(traceId, spans, options);
|
|
4972
|
+
}
|
|
4909
4973
|
/**
|
|
4910
4974
|
* Convert an OTEL ReadableSpan to our SpanData format.
|
|
4911
4975
|
*/
|
|
@@ -5188,6 +5252,15 @@ function isPromptAllowed(prompt, allowedPrompts) {
|
|
|
5188
5252
|
}
|
|
5189
5253
|
//#endregion
|
|
5190
5254
|
//#region src/evaluator.ts
|
|
5255
|
+
const CONVERSATION_VAR_NAME = "_conversation";
|
|
5256
|
+
const promptUsesConversationVariableCache = new lru_cache.LRUCache({ max: 1024 });
|
|
5257
|
+
function promptUsesConversationVariable(prompt) {
|
|
5258
|
+
const cached = promptUsesConversationVariableCache.get(prompt.raw);
|
|
5259
|
+
if (cached !== void 0) return cached;
|
|
5260
|
+
const { referenced, parsed } = require_render.analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
|
|
5261
|
+
if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
|
|
5262
|
+
return referenced;
|
|
5263
|
+
}
|
|
5191
5264
|
/**
|
|
5192
5265
|
* Manages a single progress bar for the evaluation
|
|
5193
5266
|
*/
|
|
@@ -5387,6 +5460,18 @@ function hasProviderGroupedAssertion(assertion) {
|
|
|
5387
5460
|
function shouldDeferGradingForTest(test) {
|
|
5388
5461
|
return Boolean(test.assert?.some(hasProviderGroupedAssertion));
|
|
5389
5462
|
}
|
|
5463
|
+
function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
|
|
5464
|
+
if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
|
|
5465
|
+
if (shouldGroupGradingByProvider) {
|
|
5466
|
+
require_logger.logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
|
|
5467
|
+
return;
|
|
5468
|
+
}
|
|
5469
|
+
if (concurrency !== 1) return;
|
|
5470
|
+
const reasons = [];
|
|
5471
|
+
if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
|
|
5472
|
+
if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
|
|
5473
|
+
if (reasons.length > 0) require_logger.logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
|
|
5474
|
+
}
|
|
5390
5475
|
function applyGradingResult(row, checkResult) {
|
|
5391
5476
|
if (!checkResult.pass) {
|
|
5392
5477
|
row.error = checkResult.reason;
|
|
@@ -5401,14 +5486,29 @@ function applyGradingResult(row, checkResult) {
|
|
|
5401
5486
|
if (checkResult.tokensUsed) require_tokenUsageUtils.accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
|
|
5402
5487
|
row.gradingResult = checkResult;
|
|
5403
5488
|
}
|
|
5404
|
-
|
|
5405
|
-
|
|
5406
|
-
|
|
5407
|
-
|
|
5408
|
-
|
|
5409
|
-
|
|
5410
|
-
|
|
5411
|
-
|
|
5489
|
+
const ABORTED_GRADING_PREFIX = "Aborted: ";
|
|
5490
|
+
function isAbortShapedError(error) {
|
|
5491
|
+
return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
|
|
5492
|
+
}
|
|
5493
|
+
function applyGradingError(row, error, abortSignal) {
|
|
5494
|
+
const errorAsError = error instanceof Error ? error : void 0;
|
|
5495
|
+
if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
|
|
5496
|
+
const shortMessage = errorAsError?.message ?? String(error);
|
|
5497
|
+
require_logger.logger.debug("Assertion grading aborted", {
|
|
5498
|
+
error: shortMessage,
|
|
5499
|
+
promptIdx: row.promptIdx,
|
|
5500
|
+
testIdx: row.testIdx
|
|
5501
|
+
});
|
|
5502
|
+
row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
|
|
5503
|
+
} else {
|
|
5504
|
+
const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
|
|
5505
|
+
require_logger.logger.error("Assertion grading failed during eval", {
|
|
5506
|
+
error: fullMessage,
|
|
5507
|
+
promptIdx: row.promptIdx,
|
|
5508
|
+
testIdx: row.testIdx
|
|
5509
|
+
});
|
|
5510
|
+
row.error = fullMessage;
|
|
5511
|
+
}
|
|
5412
5512
|
row.failureReason = require_types.ResultFailureReason.ERROR;
|
|
5413
5513
|
row.success = false;
|
|
5414
5514
|
row.score = 0;
|
|
@@ -5420,7 +5520,7 @@ function getNonTransientTargetStatus(row) {
|
|
|
5420
5520
|
}
|
|
5421
5521
|
function createRunEvalState({ provider, prompt, test }) {
|
|
5422
5522
|
const vars = structuredClone(test.vars || {});
|
|
5423
|
-
const fileMetadata =
|
|
5523
|
+
const fileMetadata = require_evaluatorHelpers.collectFileMetadata(vars);
|
|
5424
5524
|
const conversationKey = `${provider.label || provider.id()}:${prompt.id}${test.metadata?.conversationId ? `:${test.metadata.conversationId}` : ""}`;
|
|
5425
5525
|
const setup = createRunEvalSetup({
|
|
5426
5526
|
provider,
|
|
@@ -5440,7 +5540,7 @@ function createRunEvalState({ provider, prompt, test }) {
|
|
|
5440
5540
|
};
|
|
5441
5541
|
}
|
|
5442
5542
|
function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
|
|
5443
|
-
const usesConversation = prompt
|
|
5543
|
+
const usesConversation = promptUsesConversationVariable(prompt);
|
|
5444
5544
|
if (!require_logger.getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
|
|
5445
5545
|
}
|
|
5446
5546
|
function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
|
|
@@ -5459,8 +5559,8 @@ function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
|
|
|
5459
5559
|
};
|
|
5460
5560
|
}
|
|
5461
5561
|
async function renderRunEvalPrompt({ filters, isRedteam, provider, promptForRender, test, testSuite, vars }) {
|
|
5462
|
-
const renderedPrompt = await
|
|
5463
|
-
if (isRedteam)
|
|
5562
|
+
const renderedPrompt = await require_evaluatorHelpers.renderPrompt(promptForRender, vars, filters, provider, shouldSkipRedteamInjectVar(test, testSuite, isRedteam) ? [getRedteamInjectVar(test, promptForRender, testSuite)] : void 0);
|
|
5563
|
+
if (isRedteam) require_promptLength.throwIfTargetPromptExceedsMaxChars(renderedPrompt, testSuite?.redteam?.maxCharsPerMessage);
|
|
5464
5564
|
const setup = createRunEvalSetup({
|
|
5465
5565
|
provider,
|
|
5466
5566
|
prompt: promptForRender,
|
|
@@ -5527,7 +5627,7 @@ async function callActiveProvider({ abortSignal, evalId, filters, promptForRende
|
|
|
5527
5627
|
});
|
|
5528
5628
|
const callApiOptions = abortSignal ? { abortSignal } : void 0;
|
|
5529
5629
|
const callApi = () => activeProvider.callApi(renderedPrompt, callApiContext, callApiOptions);
|
|
5530
|
-
const response = rateLimitRegistry ? await rateLimitRegistry.execute(activeProvider, callApi,
|
|
5630
|
+
const response = rateLimitRegistry ? await rateLimitRegistry.execute(activeProvider, callApi, require_shared.createProviderRateLimitOptions()) : await callApi();
|
|
5531
5631
|
require_logger.logger.debug(`Provider response properties: ${Object.keys(response).join(", ")}`);
|
|
5532
5632
|
require_logger.logger.debug(`Provider response cached property explicitly: ${response.cached}`);
|
|
5533
5633
|
return response;
|
|
@@ -5605,7 +5705,7 @@ function createEvaluateResult({ fileMetadata, latencyMs, prompt, promptIdx, rend
|
|
|
5605
5705
|
};
|
|
5606
5706
|
if (!ret.metadata?.sessionIds && !ret.metadata?.sessionId) {
|
|
5607
5707
|
ret.metadata ??= {};
|
|
5608
|
-
ret.metadata.sessionId =
|
|
5708
|
+
ret.metadata.sessionId = require_util$2.getSessionId(response, { vars });
|
|
5609
5709
|
}
|
|
5610
5710
|
return ret;
|
|
5611
5711
|
}
|
|
@@ -5613,7 +5713,7 @@ function trackProviderUsage(provider, response) {
|
|
|
5613
5713
|
if (!response.tokenUsage) return;
|
|
5614
5714
|
const providerId = provider.id();
|
|
5615
5715
|
const trackingId = provider.constructor?.name ? `${providerId} (${provider.constructor.name})` : providerId;
|
|
5616
|
-
|
|
5716
|
+
require_shared.TokenUsageTracker.getInstance().trackUsage(trackingId, response.tokenUsage);
|
|
5617
5717
|
}
|
|
5618
5718
|
async function applyRunEvalResponseOutcome({ abortSignal, deferGrading, evalId, isRedteam, latencyMs, prompt, promptIdx, provider, providerCallQueue, rateLimitRegistry, renderedPrompt, response, ret, test, testIdx, traceContext, vars }) {
|
|
5619
5719
|
if (response.error) {
|
|
@@ -5687,7 +5787,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
|
|
|
5687
5787
|
assertScoringFunction: test.assertScoringFunction,
|
|
5688
5788
|
traceId
|
|
5689
5789
|
}).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
|
|
5690
|
-
applyGradingError(ret, error);
|
|
5790
|
+
applyGradingError(ret, error, abortSignal);
|
|
5691
5791
|
});
|
|
5692
5792
|
deferredGradingPromises.set(ret, gradingPromise);
|
|
5693
5793
|
return;
|
|
@@ -6234,7 +6334,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
|
|
|
6234
6334
|
const defaultProvider = defaultTest.provider;
|
|
6235
6335
|
if (require_types.isApiProvider(defaultProvider)) return defaultProvider;
|
|
6236
6336
|
if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
6237
|
-
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-
|
|
6337
|
+
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-V6RBuieY.cjs")).then((n) => n.providers_exports);
|
|
6238
6338
|
return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
6239
6339
|
}
|
|
6240
6340
|
return defaultProvider;
|
|
@@ -6394,7 +6494,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
|
|
|
6394
6494
|
async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
6395
6495
|
if (!require_logger.state.resume || !evalRecord.persisted) return;
|
|
6396
6496
|
try {
|
|
6397
|
-
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-
|
|
6497
|
+
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-DHXs-9TL.cjs")).then((n) => n.evalResult_exports);
|
|
6398
6498
|
const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: require_logger.state.retryMode });
|
|
6399
6499
|
const originalCount = runEvalOptions.length;
|
|
6400
6500
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -6408,14 +6508,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
|
6408
6508
|
}
|
|
6409
6509
|
}
|
|
6410
6510
|
function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
|
|
6411
|
-
const usesConversationVar = prompts.some(
|
|
6511
|
+
const usesConversationVar = prompts.some(promptUsesConversationVariable);
|
|
6412
6512
|
if (concurrency <= 1) return {
|
|
6413
6513
|
concurrency,
|
|
6414
6514
|
usesConversationVar
|
|
6415
6515
|
};
|
|
6416
6516
|
const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
|
|
6417
6517
|
if (usesConversationVar) {
|
|
6418
|
-
require_logger.logger.info(`Setting concurrency to 1 because the ${chalk.default.cyan(
|
|
6518
|
+
require_logger.logger.info(`Setting concurrency to 1 because the ${chalk.default.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
|
|
6419
6519
|
return {
|
|
6420
6520
|
concurrency: 1,
|
|
6421
6521
|
usesConversationVar
|
|
@@ -6645,8 +6745,9 @@ var Evaluator = class {
|
|
|
6645
6745
|
};
|
|
6646
6746
|
this.conversations = {};
|
|
6647
6747
|
this.registers = {};
|
|
6648
|
-
|
|
6649
|
-
this.
|
|
6748
|
+
const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
|
|
6749
|
+
this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
|
|
6750
|
+
this.rateLimitRegistry = require_shared.createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
|
|
6650
6751
|
this.rateLimitRegistry.on("ratelimit:hit", (data) => {
|
|
6651
6752
|
require_logger.logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
|
|
6652
6753
|
retryAfterMs: data.retryAfterMs,
|
|
@@ -6672,7 +6773,7 @@ var Evaluator = class {
|
|
|
6672
6773
|
current: data.current
|
|
6673
6774
|
});
|
|
6674
6775
|
});
|
|
6675
|
-
|
|
6776
|
+
require_shared.redteamProviderManager.setRateLimitRegistry(this.rateLimitRegistry);
|
|
6676
6777
|
}
|
|
6677
6778
|
/**
|
|
6678
6779
|
* Updates metrics and stats after a comparison assertion (select-best or max-score).
|
|
@@ -6750,7 +6851,7 @@ var Evaluator = class {
|
|
|
6750
6851
|
});
|
|
6751
6852
|
}
|
|
6752
6853
|
async runEvalStepAfterBeforeEach(evalStep, { deferGrading, onRowsReady, providerCallQueue, testSuite }) {
|
|
6753
|
-
evalStep.test = (await
|
|
6854
|
+
evalStep.test = (await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "beforeEach", { test: evalStep.test })).test;
|
|
6754
6855
|
const rows = await runEvalInternal({
|
|
6755
6856
|
...evalStep,
|
|
6756
6857
|
deferGrading,
|
|
@@ -6765,6 +6866,25 @@ var Evaluator = class {
|
|
|
6765
6866
|
this.trackCompletedRow(evalStep, row, context);
|
|
6766
6867
|
context.numComplete++;
|
|
6767
6868
|
const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
|
|
6869
|
+
if (context.testSuite.extensions?.length) try {
|
|
6870
|
+
const afterEachOut = await require_evaluatorHelpers.runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6871
|
+
test: evalStep.test,
|
|
6872
|
+
result: {
|
|
6873
|
+
...row,
|
|
6874
|
+
namedScores: { ...row.namedScores },
|
|
6875
|
+
metadata: { ...row.metadata },
|
|
6876
|
+
response: row.response ? {
|
|
6877
|
+
...row.response,
|
|
6878
|
+
metadata: { ...row.response.metadata }
|
|
6879
|
+
} : row.response
|
|
6880
|
+
}
|
|
6881
|
+
});
|
|
6882
|
+
row.namedScores = require_evaluatorHelpers.filterFiniteScores(afterEachOut.result.namedScores);
|
|
6883
|
+
row.metadata = afterEachOut.result.metadata;
|
|
6884
|
+
if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
|
|
6885
|
+
} catch (error) {
|
|
6886
|
+
require_logger.logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
|
|
6887
|
+
}
|
|
6768
6888
|
await this.persistEvalRow(row);
|
|
6769
6889
|
if (this.abortIfTargetUnavailable(row, context)) break;
|
|
6770
6890
|
const metrics = context.prompts[row.promptIdx].metrics;
|
|
@@ -6776,10 +6896,6 @@ var Evaluator = class {
|
|
|
6776
6896
|
promptEvalCount,
|
|
6777
6897
|
row
|
|
6778
6898
|
});
|
|
6779
|
-
await require_providers.runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6780
|
-
test: evalStep.test,
|
|
6781
|
-
result: row
|
|
6782
|
-
});
|
|
6783
6899
|
context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
|
|
6784
6900
|
}
|
|
6785
6901
|
}
|
|
@@ -6934,7 +7050,15 @@ var Evaluator = class {
|
|
|
6934
7050
|
})) break;
|
|
6935
7051
|
}
|
|
6936
7052
|
} catch (error) {
|
|
6937
|
-
|
|
7053
|
+
const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
|
|
7054
|
+
try {
|
|
7055
|
+
await flushGroupedRows();
|
|
7056
|
+
} catch (flushError) {
|
|
7057
|
+
require_logger.logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
|
|
7058
|
+
error: flushError instanceof Error ? flushError.message : String(flushError),
|
|
7059
|
+
pendingRowCount
|
|
7060
|
+
});
|
|
7061
|
+
}
|
|
6938
7062
|
throw error;
|
|
6939
7063
|
}
|
|
6940
7064
|
await flushGroupedRows();
|
|
@@ -7204,7 +7328,7 @@ var Evaluator = class {
|
|
|
7204
7328
|
async runAfterAllExtensions(testSuite) {
|
|
7205
7329
|
if (!testSuite.extensions?.length) return;
|
|
7206
7330
|
const resultsForExtension = (await this.evalRecord.getResults()).map((result) => "toEvaluateResult" in result ? result.toEvaluateResult() : result);
|
|
7207
|
-
await
|
|
7331
|
+
await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "afterAll", {
|
|
7208
7332
|
prompts: this.evalRecord.prompts,
|
|
7209
7333
|
results: resultsForExtension,
|
|
7210
7334
|
suite: testSuite,
|
|
@@ -7290,7 +7414,7 @@ var Evaluator = class {
|
|
|
7290
7414
|
const rowsWithSelectBestAssertion = /* @__PURE__ */ new Set();
|
|
7291
7415
|
const rowsWithMaxScoreAssertion = /* @__PURE__ */ new Set();
|
|
7292
7416
|
ensureDefaultTestForExtensions(testSuite);
|
|
7293
|
-
testSuite = (await
|
|
7417
|
+
testSuite = (await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "beforeAll", { suite: testSuite })).suite;
|
|
7294
7418
|
if (!await maybeAddGeneratedPrompts(testSuite, options)) return this.evalRecord;
|
|
7295
7419
|
prompts.push(...buildCompletedPrompts(testSuite, this.evalRecord));
|
|
7296
7420
|
const promptIndexMap = buildPromptIndexMap(prompts);
|
|
@@ -7370,6 +7494,13 @@ var Evaluator = class {
|
|
|
7370
7494
|
if (!this.options.silent) {
|
|
7371
7495
|
if (serialRunEvalOptions.length > 0) require_logger.logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
|
|
7372
7496
|
if (concurrentRunEvalOptions.length > 0) require_logger.logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
|
|
7497
|
+
logGroupedGradingStatus({
|
|
7498
|
+
concurrency,
|
|
7499
|
+
hasEvalStepTimeout,
|
|
7500
|
+
runEvalOptions,
|
|
7501
|
+
shouldGroupGradingByProvider,
|
|
7502
|
+
usesConversationVar
|
|
7503
|
+
});
|
|
7373
7504
|
}
|
|
7374
7505
|
if (this.options.showProgressBar && progressBarManager) {
|
|
7375
7506
|
await progressBarManager.initialize(runEvalOptions, concurrency, 0);
|
|
@@ -7461,7 +7592,7 @@ var Evaluator = class {
|
|
|
7461
7592
|
});
|
|
7462
7593
|
}
|
|
7463
7594
|
this.rateLimitRegistry?.dispose();
|
|
7464
|
-
|
|
7595
|
+
require_shared.redteamProviderManager.setRateLimitRegistry(void 0);
|
|
7465
7596
|
require_logger.state.maxConcurrency = void 0;
|
|
7466
7597
|
}
|
|
7467
7598
|
}
|
|
@@ -8369,7 +8500,7 @@ var Eval = class Eval {
|
|
|
8369
8500
|
const evalInstance = new Eval(eval_.config, {
|
|
8370
8501
|
id: eval_.id,
|
|
8371
8502
|
createdAt: new Date(eval_.createdAt),
|
|
8372
|
-
author: eval_.author
|
|
8503
|
+
author: eval_.author,
|
|
8373
8504
|
description: eval_.description || void 0,
|
|
8374
8505
|
prompts: eval_.prompts || [],
|
|
8375
8506
|
datasetId,
|
|
@@ -8392,7 +8523,7 @@ var Eval = class Eval {
|
|
|
8392
8523
|
return (await require_tables.getDb().select().from(require_tables.evalsTable).limit(limit).orderBy((0, drizzle_orm.desc)(require_tables.evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
|
|
8393
8524
|
id: e.id,
|
|
8394
8525
|
createdAt: new Date(e.createdAt),
|
|
8395
|
-
author: e.author
|
|
8526
|
+
author: e.author,
|
|
8396
8527
|
description: e.description || void 0,
|
|
8397
8528
|
prompts: e.prompts || [],
|
|
8398
8529
|
persisted: true
|
|
@@ -8407,7 +8538,7 @@ var Eval = class Eval {
|
|
|
8407
8538
|
return (await require_tables.getDb().select().from(require_tables.evalsTable).orderBy((0, drizzle_orm.desc)(require_tables.evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
|
|
8408
8539
|
id: e.id,
|
|
8409
8540
|
createdAt: new Date(e.createdAt),
|
|
8410
|
-
author: e.author
|
|
8541
|
+
author: e.author,
|
|
8411
8542
|
description: e.description || void 0,
|
|
8412
8543
|
prompts: e.prompts || [],
|
|
8413
8544
|
persisted: true
|
|
@@ -8422,7 +8553,7 @@ var Eval = class Eval {
|
|
|
8422
8553
|
static async create(config, renderedPrompts, opts) {
|
|
8423
8554
|
const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
|
|
8424
8555
|
const evalId = opts?.id || createEvalId(createdAt);
|
|
8425
|
-
const author = opts
|
|
8556
|
+
const author = opts && "author" in opts ? opts.author ?? null : require_accounts.getAuthor();
|
|
8426
8557
|
const db = require_tables.getDb();
|
|
8427
8558
|
const datasetId = require_createHash.sha256(JSON.stringify(config.tests || []));
|
|
8428
8559
|
db.transaction(() => {
|
|
@@ -8484,7 +8615,7 @@ var Eval = class Eval {
|
|
|
8484
8615
|
});
|
|
8485
8616
|
return new Eval(config, {
|
|
8486
8617
|
id: evalId,
|
|
8487
|
-
author
|
|
8618
|
+
author,
|
|
8488
8619
|
createdAt,
|
|
8489
8620
|
persisted: true,
|
|
8490
8621
|
runtimeOptions: sanitizeRuntimeOptions(opts?.runtimeOptions)
|
|
@@ -8494,7 +8625,7 @@ var Eval = class Eval {
|
|
|
8494
8625
|
const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
|
|
8495
8626
|
this.createdAt = createdAt.getTime();
|
|
8496
8627
|
this.id = opts?.id || createEvalId(createdAt);
|
|
8497
|
-
this.author = opts?.author;
|
|
8628
|
+
this.author = opts?.author ?? null;
|
|
8498
8629
|
this.config = config;
|
|
8499
8630
|
this.results = [];
|
|
8500
8631
|
this.prompts = opts?.prompts || [];
|
|
@@ -9024,7 +9155,7 @@ var Eval = class Eval {
|
|
|
9024
9155
|
newConfig.description = copyDescription;
|
|
9025
9156
|
const newPrompts = structuredClone(this.prompts);
|
|
9026
9157
|
const newVars = this.vars ? structuredClone(this.vars) : [];
|
|
9027
|
-
const author = require_accounts.
|
|
9158
|
+
const author = require_accounts.getAuthor();
|
|
9028
9159
|
const db = require_tables.getDb();
|
|
9029
9160
|
let copiedCount = 0;
|
|
9030
9161
|
db.transaction(() => {
|
|
@@ -10941,7 +11072,7 @@ async function fetchRemoteGeneration(task, prompts) {
|
|
|
10941
11072
|
version: require_version.VERSION,
|
|
10942
11073
|
email: require_accounts.getUserEmail()
|
|
10943
11074
|
};
|
|
10944
|
-
const response = await require_cache.fetchWithCache(
|
|
11075
|
+
const response = await require_cache.fetchWithCache(require_remoteGeneration.getRemoteGenerationUrl(), {
|
|
10945
11076
|
method: "POST",
|
|
10946
11077
|
headers: { "Content-Type": "application/json" },
|
|
10947
11078
|
body: JSON.stringify(body)
|
|
@@ -10976,7 +11107,7 @@ function formatPrompts(prompts) {
|
|
|
10976
11107
|
//#endregion
|
|
10977
11108
|
//#region src/redteam/extraction/entities.ts
|
|
10978
11109
|
async function extractEntities(provider, prompts) {
|
|
10979
|
-
if (
|
|
11110
|
+
if (require_remoteGeneration.shouldGenerateRemote()) try {
|
|
10980
11111
|
return await fetchRemoteGeneration("entities", prompts);
|
|
10981
11112
|
} catch (error) {
|
|
10982
11113
|
require_logger.logger.warn(`[Entity Extraction] Failed, returning 0 entities. Error using remote generation: ${error}`);
|
|
@@ -11023,7 +11154,7 @@ async function extractSystemPurpose(provider, prompts) {
|
|
|
11023
11154
|
require_logger.logger.debug("[purpose] No meaningful prompts provided, returning default purpose");
|
|
11024
11155
|
return DEFAULT_PURPOSE;
|
|
11025
11156
|
}
|
|
11026
|
-
if (!
|
|
11157
|
+
if (!require_remoteGeneration.neverGenerateRemote()) try {
|
|
11027
11158
|
return await fetchRemoteGeneration("purpose", prompts);
|
|
11028
11159
|
} catch (error) {
|
|
11029
11160
|
require_logger.logger.warn(`[purpose] Error using remote generation, returning empty string: ${error}`);
|
|
@@ -11295,7 +11426,7 @@ function createTestCase(injectVar, output, harmCategory) {
|
|
|
11295
11426
|
vars: { [injectVar]: output.trim() },
|
|
11296
11427
|
metadata: {
|
|
11297
11428
|
harmCategory: harmCategoryLabel,
|
|
11298
|
-
pluginId:
|
|
11429
|
+
pluginId: require_util$2.getShortPluginId(harmCategory)
|
|
11299
11430
|
},
|
|
11300
11431
|
assert: getHarmfulAssertions(harmCategory)
|
|
11301
11432
|
};
|
|
@@ -11418,25 +11549,45 @@ var AlignedHarmfulPlugin = class extends require_graders.RedteamPluginBase {
|
|
|
11418
11549
|
getAssertions(_prompt) {
|
|
11419
11550
|
return getHarmfulAssertions(this.harmCategory);
|
|
11420
11551
|
}
|
|
11421
|
-
promptsToTestCases(prompts) {
|
|
11552
|
+
async promptsToTestCases(prompts) {
|
|
11422
11553
|
const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
|
|
11423
11554
|
const harmCategoryLabel = require_types.HARM_PLUGINS[this.harmCategory] || this.harmCategory;
|
|
11424
|
-
|
|
11555
|
+
const pluginId = require_util$2.getShortPluginId(this.harmCategory);
|
|
11556
|
+
return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
|
|
11425
11557
|
const vars = { [this.injectVar]: __prompt };
|
|
11426
|
-
|
|
11427
|
-
|
|
11428
|
-
|
|
11429
|
-
|
|
11558
|
+
let inputMaterialization;
|
|
11559
|
+
if (hasMultipleInputs) {
|
|
11560
|
+
let parsed;
|
|
11561
|
+
try {
|
|
11562
|
+
parsed = JSON.parse(__prompt);
|
|
11563
|
+
} catch (error) {
|
|
11564
|
+
require_logger.logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
11565
|
+
}
|
|
11566
|
+
if (parsed) try {
|
|
11567
|
+
const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
|
|
11568
|
+
materializationIndex,
|
|
11569
|
+
pluginId,
|
|
11570
|
+
provider: this.provider,
|
|
11571
|
+
purpose: this.purpose
|
|
11572
|
+
});
|
|
11573
|
+
Object.assign(vars, materializedVars.vars);
|
|
11574
|
+
inputMaterialization = materializedVars.metadata;
|
|
11575
|
+
} catch (error) {
|
|
11576
|
+
require_logger.logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
|
|
11577
|
+
throw error;
|
|
11578
|
+
}
|
|
11579
|
+
}
|
|
11430
11580
|
return {
|
|
11431
11581
|
vars,
|
|
11432
11582
|
metadata: {
|
|
11433
11583
|
harmCategory: harmCategoryLabel,
|
|
11434
|
-
pluginId
|
|
11435
|
-
pluginConfig: this.config
|
|
11584
|
+
pluginId,
|
|
11585
|
+
pluginConfig: this.config,
|
|
11586
|
+
...inputMaterialization ? { inputMaterialization } : {}
|
|
11436
11587
|
},
|
|
11437
11588
|
assert: getHarmfulAssertions(this.harmCategory)
|
|
11438
11589
|
};
|
|
11439
|
-
});
|
|
11590
|
+
}));
|
|
11440
11591
|
}
|
|
11441
11592
|
};
|
|
11442
11593
|
//#endregion
|
|
@@ -11445,25 +11596,42 @@ var AlignedHarmfulPlugin = class extends require_graders.RedteamPluginBase {
|
|
|
11445
11596
|
* Extract content from <Prompt> tags and parse JSON if inputs are defined.
|
|
11446
11597
|
* Returns the processed prompt and any additional vars extracted from JSON.
|
|
11447
11598
|
*/
|
|
11448
|
-
function processPromptForInputs(prompt,
|
|
11599
|
+
async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
|
|
11449
11600
|
let processedPrompt = prompt.trim();
|
|
11450
11601
|
const additionalVars = {};
|
|
11451
|
-
|
|
11602
|
+
let additionalMetadata;
|
|
11603
|
+
const extractedPrompt = require_util$2.extractPromptFromTags(processedPrompt);
|
|
11452
11604
|
if (extractedPrompt) processedPrompt = extractedPrompt;
|
|
11453
|
-
if (inputs && Object.keys(inputs).length > 0)
|
|
11454
|
-
|
|
11455
|
-
|
|
11456
|
-
|
|
11457
|
-
|
|
11605
|
+
if (inputs && Object.keys(inputs).length > 0) {
|
|
11606
|
+
let parsed;
|
|
11607
|
+
try {
|
|
11608
|
+
parsed = JSON.parse(processedPrompt);
|
|
11609
|
+
} catch (error) {
|
|
11610
|
+
require_logger.logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
11611
|
+
}
|
|
11612
|
+
if (parsed) try {
|
|
11613
|
+
const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
|
|
11614
|
+
materializationIndex,
|
|
11615
|
+
pluginId: plugin,
|
|
11616
|
+
provider,
|
|
11617
|
+
purpose
|
|
11618
|
+
});
|
|
11619
|
+
Object.assign(additionalVars, materializedVars.vars);
|
|
11620
|
+
additionalMetadata = materializedVars.metadata;
|
|
11621
|
+
} catch (error) {
|
|
11622
|
+
require_logger.logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
|
|
11623
|
+
throw error;
|
|
11624
|
+
}
|
|
11458
11625
|
}
|
|
11459
11626
|
return {
|
|
11460
11627
|
processedPrompt,
|
|
11461
|
-
additionalVars
|
|
11628
|
+
additionalVars,
|
|
11629
|
+
additionalMetadata
|
|
11462
11630
|
};
|
|
11463
11631
|
}
|
|
11464
11632
|
async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
|
|
11465
11633
|
const maxHarmfulTests = require_logger.getEnvInt("PROMPTFOO_MAX_HARMFUL_TESTS_PER_REQUEST", 5);
|
|
11466
|
-
const unalignedProvider = new
|
|
11634
|
+
const unalignedProvider = new require_promptfoo.PromptfooHarmfulCompletionProvider({
|
|
11467
11635
|
purpose,
|
|
11468
11636
|
n: Math.min(n, maxHarmfulTests),
|
|
11469
11637
|
harmCategory: plugin,
|
|
@@ -11479,15 +11647,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
|
|
|
11479
11647
|
};
|
|
11480
11648
|
const allPrompts = await require_graders.retryWithDeduplication(generatePrompts, n);
|
|
11481
11649
|
const inputs = config?.inputs;
|
|
11482
|
-
return require_graders.sampleArray(allPrompts, n).map((prompt) => {
|
|
11483
|
-
const { processedPrompt, additionalVars } = processPromptForInputs(prompt,
|
|
11650
|
+
return Promise.all(require_graders.sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
|
|
11651
|
+
const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
|
|
11484
11652
|
const testCase = createTestCase(injectVar, processedPrompt, plugin);
|
|
11485
11653
|
if (Object.keys(additionalVars).length > 0) testCase.vars = {
|
|
11486
11654
|
...testCase.vars,
|
|
11487
11655
|
...additionalVars
|
|
11488
11656
|
};
|
|
11657
|
+
if (additionalMetadata) testCase.metadata = {
|
|
11658
|
+
...testCase.metadata,
|
|
11659
|
+
inputMaterialization: additionalMetadata
|
|
11660
|
+
};
|
|
11489
11661
|
return testCase;
|
|
11490
|
-
});
|
|
11662
|
+
}));
|
|
11491
11663
|
}
|
|
11492
11664
|
//#endregion
|
|
11493
11665
|
//#region src/redteam/plugins/teenSafety/graderExamples.ts
|
|
@@ -11805,9 +11977,9 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
|
|
|
11805
11977
|
function computeModifiersFromConfig(config) {
|
|
11806
11978
|
const modifiers = { ...config?.modifiers };
|
|
11807
11979
|
if (config?.language && typeof config.language === "string") modifiers.language = config.language;
|
|
11808
|
-
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
11809
|
-
const maxCharsModifier =
|
|
11810
|
-
if (maxCharsModifier) modifiers[
|
|
11980
|
+
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(require_inputVariables.buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
11981
|
+
const maxCharsModifier = require_promptLength.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
11982
|
+
if (maxCharsModifier) modifiers[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
|
|
11811
11983
|
return modifiers;
|
|
11812
11984
|
}
|
|
11813
11985
|
function applyDefaultGraderExamples(key, config) {
|
|
@@ -11831,7 +12003,7 @@ function isValidMaxCharsPerMessage(limit) {
|
|
|
11831
12003
|
}
|
|
11832
12004
|
function getMaxCharsPerMessageFromConfig(config) {
|
|
11833
12005
|
if (isValidMaxCharsPerMessage(config?.maxCharsPerMessage)) return config.maxCharsPerMessage;
|
|
11834
|
-
const maxCharsModifier = (config?.modifiers)?.[
|
|
12006
|
+
const maxCharsModifier = (config?.modifiers)?.[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY];
|
|
11835
12007
|
if (typeof maxCharsModifier !== "string") return;
|
|
11836
12008
|
const match = /must be (\d+) characters or fewer\./.exec(maxCharsModifier);
|
|
11837
12009
|
if (!match) return;
|
|
@@ -11915,7 +12087,7 @@ function withMaxCharsRetries(pluginFactory) {
|
|
|
11915
12087
|
const rejectedPromptLengths = [];
|
|
11916
12088
|
let rejectedPromptLimit;
|
|
11917
12089
|
for (const testCase of generatedTestCases) {
|
|
11918
|
-
const violation =
|
|
12090
|
+
const violation = require_promptLength.getGeneratedPromptOverLimit(String(testCase.vars?.[params.injectVar] ?? ""), maxCharsPerMessage);
|
|
11919
12091
|
if (violation) {
|
|
11920
12092
|
rejectedPromptLengths.push(violation.length);
|
|
11921
12093
|
rejectedPromptLimit = violation.limit;
|
|
@@ -11932,16 +12104,16 @@ function withMaxCharsRetries(pluginFactory) {
|
|
|
11932
12104
|
}
|
|
11933
12105
|
async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
11934
12106
|
require_invariant.invariant(!require_logger.getEnvBool("PROMPTFOO_DISABLE_REDTEAM_REMOTE_GENERATION"), "fetchRemoteTestCases should never be called when remote generation is disabled");
|
|
11935
|
-
const remoteHealth = await checkRemoteHealth(
|
|
12107
|
+
const remoteHealth = await checkRemoteHealth(require_remoteGeneration.getRemoteHealthUrl());
|
|
11936
12108
|
if (remoteHealth.status !== "OK") {
|
|
11937
12109
|
require_logger.logger.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
|
|
11938
12110
|
return [];
|
|
11939
12111
|
}
|
|
11940
12112
|
const { graderExamples, ...configForRemote } = config ?? {};
|
|
11941
|
-
const maxCharsModifier =
|
|
12113
|
+
const maxCharsModifier = require_promptLength.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
11942
12114
|
if (maxCharsModifier) configForRemote.modifiers = {
|
|
11943
12115
|
...configForRemote.modifiers ?? {},
|
|
11944
|
-
[
|
|
12116
|
+
[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY]: maxCharsModifier
|
|
11945
12117
|
};
|
|
11946
12118
|
const body = JSON.stringify({
|
|
11947
12119
|
config: configForRemote,
|
|
@@ -11954,7 +12126,7 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
11954
12126
|
email: require_accounts.getUserEmail()
|
|
11955
12127
|
});
|
|
11956
12128
|
try {
|
|
11957
|
-
const { data, status, statusText } = await require_cache.fetchWithCache(
|
|
12129
|
+
const { data, status, statusText } = await require_cache.fetchWithCache(require_remoteGeneration.getRemoteGenerationUrl(), {
|
|
11958
12130
|
method: "POST",
|
|
11959
12131
|
headers: { "Content-Type": "application/json" },
|
|
11960
12132
|
body
|
|
@@ -11963,6 +12135,7 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
11963
12135
|
require_logger.logger.error(`Error generating test cases for ${key}: ${statusText} ${JSON.stringify(data)}`);
|
|
11964
12136
|
return [];
|
|
11965
12137
|
}
|
|
12138
|
+
if (require_promptfoo.requiresRemoteMaterialization(config?.inputs)) require_promptfoo.assertRemoteMaterializationHandled(data, `Remote plugin generation for ${key}`);
|
|
11966
12139
|
const ret = data.result;
|
|
11967
12140
|
require_logger.logger.debug(`Received remote generation for ${key}:\n${JSON.stringify(ret)}`);
|
|
11968
12141
|
return ret;
|
|
@@ -11977,17 +12150,18 @@ function createPluginFactory(PluginClass, key, validate) {
|
|
|
11977
12150
|
validate,
|
|
11978
12151
|
action: async ({ provider, purpose, injectVar, n, delayMs, config }) => {
|
|
11979
12152
|
const configWithDefaults = applyDefaultGraderExamples(key, config);
|
|
11980
|
-
if (PluginClass.canGenerateRemote === false || !
|
|
12153
|
+
if (PluginClass.canGenerateRemote === false || !require_remoteGeneration.shouldGenerateRemote()) {
|
|
11981
12154
|
require_logger.logger.debug(`Using local redteam generation for ${key}`);
|
|
11982
12155
|
return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
|
|
11983
12156
|
}
|
|
12157
|
+
const pluginId = require_util$2.getShortPluginId(key);
|
|
11984
12158
|
const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
|
|
11985
12159
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
11986
12160
|
return testCases.map((testCase) => ({
|
|
11987
12161
|
...testCase,
|
|
11988
12162
|
metadata: {
|
|
11989
12163
|
...testCase.metadata,
|
|
11990
|
-
pluginId
|
|
12164
|
+
pluginId,
|
|
11991
12165
|
pluginConfig: {
|
|
11992
12166
|
...configWithDefaults,
|
|
11993
12167
|
modifiers: computedModifiers
|
|
@@ -12043,8 +12217,8 @@ const pluginFactories = [
|
|
|
12043
12217
|
...unalignedHarmCategories.map((category) => ({
|
|
12044
12218
|
key: category,
|
|
12045
12219
|
action: async (params) => {
|
|
12046
|
-
if (
|
|
12047
|
-
require_logger.logger.error(`${category} plugin
|
|
12220
|
+
if (require_remoteGeneration.neverGenerateRemote()) {
|
|
12221
|
+
require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
12048
12222
|
return [];
|
|
12049
12223
|
}
|
|
12050
12224
|
const testCases = await getHarmfulTests(params, category);
|
|
@@ -12053,7 +12227,7 @@ const pluginFactories = [
|
|
|
12053
12227
|
...testCase,
|
|
12054
12228
|
metadata: {
|
|
12055
12229
|
...testCase.metadata,
|
|
12056
|
-
pluginId:
|
|
12230
|
+
pluginId: require_util$2.getShortPluginId(category),
|
|
12057
12231
|
pluginConfig: {
|
|
12058
12232
|
...params.config,
|
|
12059
12233
|
modifiers: computedModifiers
|
|
@@ -12066,14 +12240,15 @@ const pluginFactories = [
|
|
|
12066
12240
|
const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
|
|
12067
12241
|
key: category,
|
|
12068
12242
|
action: async (params) => {
|
|
12069
|
-
if (
|
|
12243
|
+
if (require_remoteGeneration.shouldGenerateRemote()) {
|
|
12244
|
+
const pluginId = require_util$2.getShortPluginId(category);
|
|
12070
12245
|
const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
|
|
12071
12246
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
12072
12247
|
return testCases.map((testCase) => ({
|
|
12073
12248
|
...testCase,
|
|
12074
12249
|
metadata: {
|
|
12075
12250
|
...testCase.metadata,
|
|
12076
|
-
pluginId
|
|
12251
|
+
pluginId,
|
|
12077
12252
|
pluginConfig: {
|
|
12078
12253
|
...params.config,
|
|
12079
12254
|
modifiers: computedModifiers
|
|
@@ -12086,7 +12261,7 @@ const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
|
|
|
12086
12261
|
...testCase,
|
|
12087
12262
|
metadata: {
|
|
12088
12263
|
...testCase.metadata,
|
|
12089
|
-
pluginId:
|
|
12264
|
+
pluginId: require_util$2.getShortPluginId(category)
|
|
12090
12265
|
}
|
|
12091
12266
|
}));
|
|
12092
12267
|
}
|
|
@@ -12094,17 +12269,18 @@ const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
|
|
|
12094
12269
|
const biasPlugins = require_types.BIAS_PLUGINS.map((category) => ({
|
|
12095
12270
|
key: category,
|
|
12096
12271
|
action: async (params) => {
|
|
12097
|
-
if (
|
|
12098
|
-
require_logger.logger.error(`${category} plugin
|
|
12272
|
+
if (require_remoteGeneration.neverGenerateRemote()) {
|
|
12273
|
+
require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
12099
12274
|
return [];
|
|
12100
12275
|
}
|
|
12276
|
+
const pluginId = require_util$2.getShortPluginId(category);
|
|
12101
12277
|
const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
|
|
12102
12278
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
12103
12279
|
return testCases.map((testCase) => ({
|
|
12104
12280
|
...testCase,
|
|
12105
12281
|
metadata: {
|
|
12106
12282
|
...testCase.metadata,
|
|
12107
|
-
pluginId
|
|
12283
|
+
pluginId,
|
|
12108
12284
|
pluginConfig: {
|
|
12109
12285
|
...params.config,
|
|
12110
12286
|
modifiers: computedModifiers
|
|
@@ -12119,17 +12295,18 @@ function createRemotePlugin(key, validate) {
|
|
|
12119
12295
|
validate,
|
|
12120
12296
|
action: async ({ purpose, injectVar, n, config }) => {
|
|
12121
12297
|
const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
|
|
12122
|
-
if (
|
|
12123
|
-
require_logger.logger.error(`${key} plugin
|
|
12298
|
+
if (require_remoteGeneration.neverGenerateRemote()) {
|
|
12299
|
+
require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
|
|
12124
12300
|
return [];
|
|
12125
12301
|
}
|
|
12302
|
+
const pluginId = require_util$2.getShortPluginId(key);
|
|
12126
12303
|
const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
|
|
12127
12304
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
12128
12305
|
const testsWithMetadata = testCases.map((testCase) => ({
|
|
12129
12306
|
...testCase,
|
|
12130
12307
|
metadata: {
|
|
12131
12308
|
...testCase.metadata,
|
|
12132
|
-
pluginId
|
|
12309
|
+
pluginId,
|
|
12133
12310
|
pluginConfig: {
|
|
12134
12311
|
...configWithDefaults,
|
|
12135
12312
|
modifiers: computedModifiers
|
|
@@ -12190,6 +12367,15 @@ async function validateSharpDependency(strategies, plugins, checkSharp = isSharp
|
|
|
12190
12367
|
}
|
|
12191
12368
|
//#endregion
|
|
12192
12369
|
//#region src/redteam/index.ts
|
|
12370
|
+
const MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY = "__promptfooMaterializedMultiInputPrompt";
|
|
12371
|
+
function getMaterializedMultiInputPromptSnapshot(metadata) {
|
|
12372
|
+
const snapshot = metadata?.[MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY];
|
|
12373
|
+
return typeof snapshot === "string" ? snapshot : void 0;
|
|
12374
|
+
}
|
|
12375
|
+
function getMaterializedMultiInputPromptMetadata(vars) {
|
|
12376
|
+
const prompt = vars?.[require_types.MULTI_INPUT_VAR];
|
|
12377
|
+
return typeof prompt === "string" ? { [MATERIALIZED_MULTI_INPUT_PROMPT_METADATA_KEY]: prompt } : void 0;
|
|
12378
|
+
}
|
|
12193
12379
|
function getPolicyText(metadata) {
|
|
12194
12380
|
if (!metadata || metadata.policy === void 0 || metadata.policy === null) return;
|
|
12195
12381
|
const policyValue = metadata.policy;
|
|
@@ -12199,6 +12385,44 @@ function getPolicyText(metadata) {
|
|
|
12199
12385
|
return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
|
|
12200
12386
|
}
|
|
12201
12387
|
}
|
|
12388
|
+
async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
|
|
12389
|
+
const inputs = testCase.metadata?.pluginConfig?.inputs;
|
|
12390
|
+
const inputMaterialization = testCase.metadata?.inputMaterialization;
|
|
12391
|
+
const materializedPromptSnapshot = getMaterializedMultiInputPromptSnapshot(testCase.metadata);
|
|
12392
|
+
const currentInjectVar = testCase.vars?.[injectVar];
|
|
12393
|
+
if (!inputs || Object.keys(inputs).length === 0 || !currentInjectVar) return {
|
|
12394
|
+
inputMaterialization,
|
|
12395
|
+
vars: testCase.vars
|
|
12396
|
+
};
|
|
12397
|
+
const promptChangedSinceMaterialization = typeof currentInjectVar === "string" && typeof materializedPromptSnapshot === "string" && currentInjectVar !== materializedPromptSnapshot;
|
|
12398
|
+
if (Boolean(inputMaterialization) && Object.keys(inputs).every((key) => Object.prototype.hasOwnProperty.call(testCase.vars ?? {}, key)) && !promptChangedSinceMaterialization) return {
|
|
12399
|
+
inputMaterialization,
|
|
12400
|
+
vars: testCase.vars
|
|
12401
|
+
};
|
|
12402
|
+
try {
|
|
12403
|
+
const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(currentInjectVar)), inputs, {
|
|
12404
|
+
materializationIndex,
|
|
12405
|
+
pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
|
|
12406
|
+
provider,
|
|
12407
|
+
purpose
|
|
12408
|
+
});
|
|
12409
|
+
return {
|
|
12410
|
+
inputMaterialization: materializedVars.metadata ? {
|
|
12411
|
+
...inputMaterialization,
|
|
12412
|
+
...materializedVars.metadata
|
|
12413
|
+
} : inputMaterialization,
|
|
12414
|
+
vars: {
|
|
12415
|
+
...testCase.vars,
|
|
12416
|
+
...materializedVars.vars
|
|
12417
|
+
}
|
|
12418
|
+
};
|
|
12419
|
+
} catch {
|
|
12420
|
+
return {
|
|
12421
|
+
inputMaterialization,
|
|
12422
|
+
vars: testCase.vars
|
|
12423
|
+
};
|
|
12424
|
+
}
|
|
12425
|
+
}
|
|
12202
12426
|
/**
|
|
12203
12427
|
* Gets the severity level for a plugin based on its ID and configuration.
|
|
12204
12428
|
* @param pluginId - The ID of the plugin.
|
|
@@ -12207,7 +12431,7 @@ function getPolicyText(metadata) {
|
|
|
12207
12431
|
*/
|
|
12208
12432
|
function getPluginSeverity(pluginId, pluginConfig) {
|
|
12209
12433
|
if (pluginConfig?.severity) return pluginConfig.severity;
|
|
12210
|
-
const shortId =
|
|
12434
|
+
const shortId = require_util$2.getShortPluginId(pluginId);
|
|
12211
12435
|
return shortId in require_types.riskCategorySeverityMap ? require_types.riskCategorySeverityMap[shortId] : require_types.Severity.Low;
|
|
12212
12436
|
}
|
|
12213
12437
|
const POLICY_PREVIEW_MAX_LENGTH = 20;
|
|
@@ -12332,14 +12556,15 @@ function buildRedteamModifiers({ maxCharsPerMessage, pluginConfig, testGeneratio
|
|
|
12332
12556
|
...testGenerationInstructions ? { testGenerationInstructions } : {},
|
|
12333
12557
|
...pluginConfig?.modifiers ?? {}
|
|
12334
12558
|
};
|
|
12335
|
-
const maxCharsPerMessageModifier =
|
|
12336
|
-
if (maxCharsPerMessageModifier) modifiers[
|
|
12559
|
+
const maxCharsPerMessageModifier = require_promptLength.getMaxCharsPerMessageModifierValue(maxCharsPerMessage ?? pluginConfig?.maxCharsPerMessage);
|
|
12560
|
+
if (maxCharsPerMessageModifier) modifiers[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsPerMessageModifier;
|
|
12337
12561
|
return modifiers;
|
|
12338
12562
|
}
|
|
12339
12563
|
const categories = {
|
|
12340
12564
|
foundation: require_types.FOUNDATION_PLUGINS,
|
|
12341
12565
|
harmful: Object.keys(require_types.HARM_PLUGINS),
|
|
12342
12566
|
"coding-agent:core": require_types.CODING_AGENT_CORE_PLUGINS,
|
|
12567
|
+
"coding-agent:all": require_types.CODING_AGENT_PLUGINS,
|
|
12343
12568
|
bias: require_types.BIAS_PLUGINS,
|
|
12344
12569
|
pii: require_types.PII_PLUGINS,
|
|
12345
12570
|
medical: require_types.MEDICAL_PLUGINS,
|
|
@@ -12369,7 +12594,7 @@ function getLanguageForTestCase(test) {
|
|
|
12369
12594
|
function filterOversizedTestCases(testCases, injectVar, sourceLabel, maxCharsPerMessage) {
|
|
12370
12595
|
return testCases.filter((testCase) => {
|
|
12371
12596
|
const testCaseMaxCharsPerMessage = maxCharsPerMessage ?? (testCase.metadata?.strategyConfig)?.maxCharsPerMessage ?? (testCase.metadata?.pluginConfig)?.maxCharsPerMessage;
|
|
12372
|
-
const violation =
|
|
12597
|
+
const violation = require_promptLength.getGeneratedPromptOverLimit(String(testCase.vars?.[injectVar] ?? ""), testCaseMaxCharsPerMessage);
|
|
12373
12598
|
if (!violation) return true;
|
|
12374
12599
|
require_logger.logger.warn(`[${sourceLabel}] Dropping generated test case that exceeds maxCharsPerMessage=${violation.limit} (${violation.length} chars)`);
|
|
12375
12600
|
return false;
|
|
@@ -12407,7 +12632,8 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
12407
12632
|
...test.metadata?.modifiers,
|
|
12408
12633
|
...languageToAdd
|
|
12409
12634
|
},
|
|
12410
|
-
...languageToAdd
|
|
12635
|
+
...languageToAdd,
|
|
12636
|
+
...getMaterializedMultiInputPromptMetadata(test.vars)
|
|
12411
12637
|
}
|
|
12412
12638
|
};
|
|
12413
12639
|
}
|
|
@@ -12434,18 +12660,18 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
12434
12660
|
* @param injectVar - The variable to inject.
|
|
12435
12661
|
* @returns An array of new test cases generated by strategies.
|
|
12436
12662
|
*/
|
|
12437
|
-
async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
12663
|
+
async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
12438
12664
|
const newTestCases = [];
|
|
12439
12665
|
const strategyResults = {};
|
|
12440
12666
|
for (const strategy of strategies) {
|
|
12441
12667
|
require_logger.logger.debug(`Generating ${strategy.id} tests`);
|
|
12442
12668
|
let strategyAction;
|
|
12443
|
-
if (strategy.id.startsWith("file://")) strategyAction = (await
|
|
12669
|
+
if (strategy.id.startsWith("file://")) strategyAction = (await require_strategies.loadStrategy(strategy.id)).action;
|
|
12444
12670
|
else {
|
|
12445
|
-
let builtinStrategy =
|
|
12671
|
+
let builtinStrategy = require_strategies.Strategies.find((s) => s.id === strategy.id);
|
|
12446
12672
|
if (!builtinStrategy && strategy.id.includes(":")) {
|
|
12447
12673
|
const baseStrategyId = strategy.id.split(":")[0];
|
|
12448
|
-
builtinStrategy =
|
|
12674
|
+
builtinStrategy = require_strategies.Strategies.find((s) => s.id === baseStrategyId);
|
|
12449
12675
|
}
|
|
12450
12676
|
if (!builtinStrategy) {
|
|
12451
12677
|
require_logger.logger.warn(`Strategy ${strategy.id} not registered, skipping`);
|
|
@@ -12455,7 +12681,7 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12455
12681
|
}
|
|
12456
12682
|
const targetPlugins = strategy.config?.plugins;
|
|
12457
12683
|
const applicableTestCases = testCases.filter((t) => {
|
|
12458
|
-
if (!
|
|
12684
|
+
if (!require_strategies.pluginMatchesStrategyTargets(t, strategy.id, targetPlugins)) return false;
|
|
12459
12685
|
if (t.metadata?.retry === true) {
|
|
12460
12686
|
require_logger.logger.debug(`Skipping ${strategy.id} for retry test (plugin: ${t.metadata?.pluginId}) - retry tests are not transformed`);
|
|
12461
12687
|
return false;
|
|
@@ -12490,14 +12716,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12490
12716
|
}
|
|
12491
12717
|
}
|
|
12492
12718
|
resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
|
|
12493
|
-
newTestCases.push(...resultTestCases.map((t) => {
|
|
12494
|
-
const
|
|
12495
|
-
let updatedVars = t.vars;
|
|
12496
|
-
if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
|
|
12497
|
-
const parsed = JSON.parse(String(t.vars[injectVar]));
|
|
12498
|
-
updatedVars = { ...t.vars };
|
|
12499
|
-
Object.assign(updatedVars, require_providers.extractVariablesFromJson(parsed, inputs));
|
|
12500
|
-
} catch {}
|
|
12719
|
+
newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
|
|
12720
|
+
const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
|
|
12501
12721
|
const strategyConfig = {
|
|
12502
12722
|
...strategy.config || {},
|
|
12503
12723
|
...maxCharsPerMessage ? { maxCharsPerMessage } : {},
|
|
@@ -12505,16 +12725,18 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12505
12725
|
};
|
|
12506
12726
|
return {
|
|
12507
12727
|
...t,
|
|
12508
|
-
vars
|
|
12728
|
+
vars,
|
|
12509
12729
|
metadata: {
|
|
12510
12730
|
...t?.metadata || {},
|
|
12511
12731
|
...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
|
|
12512
12732
|
...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
|
|
12513
12733
|
...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
|
|
12514
|
-
...
|
|
12734
|
+
...inputMaterialization && { inputMaterialization },
|
|
12735
|
+
...Object.keys(strategyConfig).length > 0 && { strategyConfig },
|
|
12736
|
+
...getMaterializedMultiInputPromptMetadata(vars)
|
|
12515
12737
|
}
|
|
12516
12738
|
};
|
|
12517
|
-
}));
|
|
12739
|
+
})));
|
|
12518
12740
|
const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
|
|
12519
12741
|
const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
|
|
12520
12742
|
const applyNumTestsCap = (calculatedRequested) => {
|
|
@@ -12672,10 +12894,10 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12672
12894
|
seen.add(key);
|
|
12673
12895
|
return true;
|
|
12674
12896
|
});
|
|
12675
|
-
const needsGoalExtraction = strategies.some((s) =>
|
|
12676
|
-
await
|
|
12897
|
+
const needsGoalExtraction = strategies.some((s) => require_strategies.Strategies.find((def) => def.id === s.id)?.requiresGoalExtraction);
|
|
12898
|
+
await require_strategies.validateStrategies(strategies);
|
|
12677
12899
|
await validateSharpDependency(strategies, plugins);
|
|
12678
|
-
const redteamProvider = await
|
|
12900
|
+
const redteamProvider = await require_shared.redteamProviderManager.getProvider({ provider });
|
|
12679
12901
|
const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
|
|
12680
12902
|
require_logger.logger.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.default.yellow(plugins.map((p) => {
|
|
12681
12903
|
const pluginLanguageConfig = p.config?.language ?? language;
|
|
@@ -12779,8 +13001,8 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12779
13001
|
};
|
|
12780
13002
|
require_logger.logger.debug("Validating plugins...");
|
|
12781
13003
|
plugins = [...new Set(expandedPlugins)].filter(validatePlugin).sort();
|
|
12782
|
-
if (
|
|
12783
|
-
const healthUrl =
|
|
13004
|
+
if (require_remoteGeneration.shouldGenerateRemote()) {
|
|
13005
|
+
const healthUrl = require_remoteGeneration.getRemoteHealthUrl();
|
|
12784
13006
|
if (healthUrl) {
|
|
12785
13007
|
require_logger.logger.debug(`Checking Promptfoo API health at ${healthUrl}...`);
|
|
12786
13008
|
const healthResult = await checkRemoteHealth(healthUrl);
|
|
@@ -12857,14 +13079,21 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12857
13079
|
}
|
|
12858
13080
|
});
|
|
12859
13081
|
const languageResults = await Promise.allSettled(languagePromises);
|
|
12860
|
-
for (const result of languageResults) if (result.status === "fulfilled") {
|
|
13082
|
+
for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
|
|
12861
13083
|
const { lang, tests, requested, generated } = result.value;
|
|
12862
13084
|
allPluginTests.push(...tests);
|
|
12863
13085
|
resultsPerLanguage[lang || "default"] = {
|
|
12864
13086
|
requested,
|
|
12865
13087
|
generated
|
|
12866
13088
|
};
|
|
12867
|
-
} else
|
|
13089
|
+
} else {
|
|
13090
|
+
const lang = languages[index];
|
|
13091
|
+
require_logger.logger.warn(`[Language Processing] Error generating tests for ${plugin.id}: ${result.reason}`);
|
|
13092
|
+
resultsPerLanguage[lang || "default"] = {
|
|
13093
|
+
requested: plugin.numTests,
|
|
13094
|
+
generated: 0
|
|
13095
|
+
};
|
|
13096
|
+
}
|
|
12868
13097
|
require_logger.logger.debug(`[Language Processing] Total tests generated for ${plugin.id}: ${allPluginTests.length} (across ${languages.length} language(s))`);
|
|
12869
13098
|
if (!Array.isArray(allPluginTests) || allPluginTests.length === 0) require_logger.logger.warn(`Failed to generate tests for ${plugin.id}`);
|
|
12870
13099
|
else {
|
|
@@ -12875,7 +13104,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12875
13104
|
const promptVar = testCase.vars?.[injectVar];
|
|
12876
13105
|
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
12877
13106
|
const policy = getPolicyText(testCase.metadata);
|
|
12878
|
-
const extractedGoal = await
|
|
13107
|
+
const extractedGoal = await require_util$2.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
12879
13108
|
testCase.metadata.goal = extractedGoal;
|
|
12880
13109
|
}
|
|
12881
13110
|
}
|
|
@@ -12898,49 +13127,73 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12898
13127
|
generated: allPluginTests.length
|
|
12899
13128
|
};
|
|
12900
13129
|
} else if (plugin.id.startsWith("file://")) try {
|
|
12901
|
-
const
|
|
12902
|
-
const
|
|
12903
|
-
|
|
12904
|
-
|
|
13130
|
+
const languageConfig = plugin.config?.language ?? language;
|
|
13131
|
+
const languages = Array.isArray(languageConfig) ? languageConfig : languageConfig ? [languageConfig] : [void 0];
|
|
13132
|
+
const allCustomTests = [];
|
|
13133
|
+
const resultsPerLanguage = {};
|
|
13134
|
+
const languagePromises = languages.map(async (lang) => {
|
|
13135
|
+
const resolvedConfig = {
|
|
12905
13136
|
...resolvePluginConfigWithMaxChars(plugin.config, maxCharsPerMessage),
|
|
12906
|
-
...
|
|
13137
|
+
...lang ? { language: lang } : {},
|
|
13138
|
+
...hasMultipleInputs ? { inputs } : {}
|
|
12907
13139
|
};
|
|
12908
|
-
const
|
|
12909
|
-
...
|
|
13140
|
+
const customPluginConfig = {
|
|
13141
|
+
...resolvedConfig,
|
|
13142
|
+
modifiers: buildRedteamModifiers({
|
|
12910
13143
|
maxCharsPerMessage,
|
|
12911
|
-
pluginConfig:
|
|
13144
|
+
pluginConfig: resolvedConfig,
|
|
12912
13145
|
testGenerationInstructions
|
|
12913
|
-
})
|
|
12914
|
-
...t.metadata?.modifiers
|
|
13146
|
+
})
|
|
12915
13147
|
};
|
|
13148
|
+
const testCasesWithMetadata = filterOversizedTestCases((await new CustomPlugin(redteamProvider, purpose, injectVar, plugin.id, customPluginConfig).generateTests(plugin.numTests, delay)).map((t) => addLanguageToPluginMetadata(t, lang, plugin, maxCharsPerMessage, testGenerationInstructions)), injectVar, `Custom plugin ${plugin.id}`, maxCharsPerMessage);
|
|
12916
13149
|
return {
|
|
12917
|
-
|
|
12918
|
-
|
|
12919
|
-
|
|
12920
|
-
|
|
12921
|
-
|
|
12922
|
-
|
|
12923
|
-
|
|
12924
|
-
|
|
13150
|
+
lang,
|
|
13151
|
+
tests: testCasesWithMetadata,
|
|
13152
|
+
requested: plugin.numTests,
|
|
13153
|
+
generated: testCasesWithMetadata.length
|
|
13154
|
+
};
|
|
13155
|
+
});
|
|
13156
|
+
const languageResults = await Promise.allSettled(languagePromises);
|
|
13157
|
+
for (const [index, result] of languageResults.entries()) if (result.status === "fulfilled") {
|
|
13158
|
+
const { lang, tests, requested, generated } = result.value;
|
|
13159
|
+
allCustomTests.push(...tests);
|
|
13160
|
+
resultsPerLanguage[lang || "default"] = {
|
|
13161
|
+
requested,
|
|
13162
|
+
generated
|
|
12925
13163
|
};
|
|
12926
|
-
}
|
|
13164
|
+
} else {
|
|
13165
|
+
const lang = languages[index];
|
|
13166
|
+
require_logger.logger.warn(`[Language Processing] Error generating tests for custom plugin ${plugin.id}: ${result.reason}`);
|
|
13167
|
+
resultsPerLanguage[lang || "default"] = {
|
|
13168
|
+
requested: plugin.numTests,
|
|
13169
|
+
generated: 0
|
|
13170
|
+
};
|
|
13171
|
+
}
|
|
12927
13172
|
if (needsGoalExtraction) {
|
|
12928
|
-
require_logger.logger.debug(`Extracting goal for ${
|
|
12929
|
-
for (const testCase of
|
|
13173
|
+
require_logger.logger.debug(`Extracting goal for ${allCustomTests.length} custom tests from ${plugin.id}...`);
|
|
13174
|
+
for (const testCase of allCustomTests) {
|
|
12930
13175
|
const promptVar = testCase.vars?.[injectVar];
|
|
12931
13176
|
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
12932
13177
|
const policy = getPolicyText(testCase.metadata);
|
|
12933
|
-
const extractedGoal = await
|
|
13178
|
+
const extractedGoal = await require_util$2.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
12934
13179
|
testCase.metadata.goal = extractedGoal;
|
|
12935
13180
|
}
|
|
12936
13181
|
}
|
|
12937
|
-
testCases.push(...
|
|
12938
|
-
require_logger.logger.debug(`Added ${
|
|
12939
|
-
const
|
|
12940
|
-
|
|
12941
|
-
|
|
12942
|
-
|
|
13182
|
+
testCases.push(...allCustomTests);
|
|
13183
|
+
require_logger.logger.debug(`Added ${allCustomTests.length} custom test cases from ${plugin.id}`);
|
|
13184
|
+
const baseDisplayId = getPluginDisplayId(plugin);
|
|
13185
|
+
if (languages.filter((lang) => lang !== void 0).length > 1) for (const [langKey, result] of Object.entries(resultsPerLanguage)) {
|
|
13186
|
+
const displayId = langKey === "en" ? baseDisplayId : `(${langKey}) ${baseDisplayId}`;
|
|
13187
|
+
pluginResults[displayId] = {
|
|
13188
|
+
requested: result.requested,
|
|
13189
|
+
generated: result.generated
|
|
13190
|
+
};
|
|
13191
|
+
}
|
|
13192
|
+
else pluginResults[baseDisplayId] = {
|
|
13193
|
+
requested: plugin.numTests * languages.length,
|
|
13194
|
+
generated: allCustomTests.length
|
|
12943
13195
|
};
|
|
13196
|
+
progressBar?.increment(plugin.numTests * languages.length);
|
|
12944
13197
|
} catch (e) {
|
|
12945
13198
|
require_logger.logger.error(`Error generating tests for custom plugin ${plugin.id}: ${e}`);
|
|
12946
13199
|
const displayId = getPluginDisplayId(plugin);
|
|
@@ -12969,7 +13222,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12969
13222
|
targetIds,
|
|
12970
13223
|
...retryStrategy.config
|
|
12971
13224
|
};
|
|
12972
|
-
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
|
|
13225
|
+
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
|
|
12973
13226
|
pluginTestCases.push(...retryTestCases);
|
|
12974
13227
|
Object.assign(strategyResults, retryResults);
|
|
12975
13228
|
if (showProgressBar) progressBar?.increment(retryTestCases.length);
|
|
@@ -12977,7 +13230,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12977
13230
|
checkAbort();
|
|
12978
13231
|
const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
|
|
12979
13232
|
if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
|
|
12980
|
-
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
13233
|
+
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
12981
13234
|
Object.assign(strategyResults, otherStrategyResults);
|
|
12982
13235
|
if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
|
|
12983
13236
|
const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
|
|
@@ -13104,7 +13357,7 @@ async function doGenerateRedteam(options) {
|
|
|
13104
13357
|
redteamConfig = resolved.config.redteam;
|
|
13105
13358
|
commandLineOptions = resolved.commandLineOptions;
|
|
13106
13359
|
resolvedConfig = resolved.config;
|
|
13107
|
-
await
|
|
13360
|
+
await require_storage.checkCloudPermissions(resolved.config);
|
|
13108
13361
|
if (redteamConfig && resolved.testSuite.tests && resolved.testSuite.tests.length > 0) require_logger.logger.warn(chalk.default.yellow(dedent.default`
|
|
13109
13362
|
⚠️ Warning: Found both 'tests' section and 'redteam' configuration in your config file.
|
|
13110
13363
|
|
|
@@ -13119,8 +13372,8 @@ async function doGenerateRedteam(options) {
|
|
|
13119
13372
|
`));
|
|
13120
13373
|
try {
|
|
13121
13374
|
const providerId = require_providers.getProviderIds(resolved.config.providers)[0];
|
|
13122
|
-
if (
|
|
13123
|
-
const overrides = await
|
|
13375
|
+
if (require_storage.isCloudProvider(providerId)) {
|
|
13376
|
+
const overrides = await require_storage.getPluginSeverityOverridesFromCloud(require_storage.getCloudDatabaseId(providerId));
|
|
13124
13377
|
if (overrides) {
|
|
13125
13378
|
pluginSeverityOverrides = new Map(Object.entries(overrides.severities));
|
|
13126
13379
|
pluginSeverityOverridesId = overrides.id;
|
|
@@ -13138,7 +13391,7 @@ async function doGenerateRedteam(options) {
|
|
|
13138
13391
|
require_logger.logger.info(chalk.default.red(`\nCan't generate without configuration - run ${chalk.default.yellow.bold(promptfooCommand("redteam init"))} first`));
|
|
13139
13392
|
return null;
|
|
13140
13393
|
}
|
|
13141
|
-
if (!
|
|
13394
|
+
if (!require_remoteGeneration.neverGenerateRemote()) {
|
|
13142
13395
|
let hasValidEmail = false;
|
|
13143
13396
|
while (!hasValidEmail) {
|
|
13144
13397
|
const { emailNeedsValidation } = await require_accounts.promptForEmailUnverified();
|
|
@@ -13202,7 +13455,7 @@ async function doGenerateRedteam(options) {
|
|
|
13202
13455
|
}
|
|
13203
13456
|
const policyPluginsWithRefs = plugins.filter((plugin) => plugin.config?.policy && require_graders.isValidPolicyObject(plugin.config?.policy) && require_graders.determinePolicyTypeFromId(plugin.config.policy.id) === "reusable");
|
|
13204
13457
|
if (policyPluginsWithRefs.length > 0) {
|
|
13205
|
-
const teamId = (await
|
|
13458
|
+
const teamId = (await require_storage.resolveTeamId()).id;
|
|
13206
13459
|
const policiesById = await require_graders.getCustomPolicies(policyPluginsWithRefs, teamId);
|
|
13207
13460
|
for (const policyPlugin of policyPluginsWithRefs) {
|
|
13208
13461
|
const policyId = policyPlugin.config.policy.id;
|
|
@@ -13774,7 +14027,7 @@ async function sendChunkedResults(evalRecord, url$5, options = {}) {
|
|
|
13774
14027
|
const isVerbose = require_logger.isDebugEnabled();
|
|
13775
14028
|
const { silent = false } = options;
|
|
13776
14029
|
require_logger.logger.debug(`Starting chunked results upload to ${url$5}`);
|
|
13777
|
-
await
|
|
14030
|
+
await require_storage.checkCloudPermissions(evalRecord.config);
|
|
13778
14031
|
const inlineBlobs = require_extractor.isBlobStorageEnabled() && require_logger.getEnvBool("PROMPTFOO_SHARE_INLINE_BLOBS", !require_fetch.cloudConfig.isEnabled());
|
|
13779
14032
|
const inlineCache = inlineBlobs ? createBlobInlineCache() : null;
|
|
13780
14033
|
let sampleResults = (await evalRecord.fetchResultsBatched(100).next()).value ?? [];
|
|
@@ -13870,6 +14123,10 @@ function stripAuthFromUrl(urlString) {
|
|
|
13870
14123
|
}
|
|
13871
14124
|
}
|
|
13872
14125
|
async function handleEmailCollection(evalRecord) {
|
|
14126
|
+
if (evalRecord.author) {
|
|
14127
|
+
require_logger.logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
|
|
14128
|
+
return;
|
|
14129
|
+
}
|
|
13873
14130
|
if (!process.stdout.isTTY || require_logger.isCI() || require_logger.getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
|
|
13874
14131
|
let email = require_accounts.getUserEmail();
|
|
13875
14132
|
if (!email) {
|
|
@@ -13912,7 +14169,7 @@ async function createShareableUrl(evalRecord, options = {}) {
|
|
|
13912
14169
|
return null;
|
|
13913
14170
|
}
|
|
13914
14171
|
if (!silent) {
|
|
13915
|
-
const orgContext = await
|
|
14172
|
+
const orgContext = await require_storage.getOrgContext();
|
|
13916
14173
|
if (orgContext) {
|
|
13917
14174
|
const teamSuffix = orgContext.teamName ? ` > ${orgContext.teamName}` : "";
|
|
13918
14175
|
require_logger.logger.info(`${chalk.default.dim("Sharing to:")} ${chalk.default.cyan(orgContext.organizationName)}${teamSuffix}`);
|
|
@@ -14013,6 +14270,14 @@ function shouldShareResults(opts) {
|
|
|
14013
14270
|
return require_fetch.cloudConfig.isEnabled() && sharing !== false;
|
|
14014
14271
|
}
|
|
14015
14272
|
//#endregion
|
|
14273
|
+
//#region src/commands/eval/redteamWarning.ts
|
|
14274
|
+
function warnIfRedteamConfigHasNoTests(config, testSuite) {
|
|
14275
|
+
if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) require_logger.logger.warn(chalk.default.yellow(dedent.default`
|
|
14276
|
+
Warning: Config file has a redteam section but no test cases.
|
|
14277
|
+
Did you mean to run ${chalk.default.bold("promptfoo redteam generate")} instead?
|
|
14278
|
+
`));
|
|
14279
|
+
}
|
|
14280
|
+
//#endregion
|
|
14016
14281
|
//#region src/util/formatDuration.ts
|
|
14017
14282
|
/**
|
|
14018
14283
|
* Formats a duration in seconds into a human-readable string
|
|
@@ -14359,7 +14624,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14359
14624
|
const cloudConfigId = uuidConfigArgs[0];
|
|
14360
14625
|
if (cmdObj.watch) throw new Error("--watch is not supported when using a cloud config UUID with -c. Use a local config file path for watch mode.");
|
|
14361
14626
|
try {
|
|
14362
|
-
defaultConfig = await
|
|
14627
|
+
defaultConfig = await require_storage.getEvalConfigFromCloud(cloudConfigId);
|
|
14363
14628
|
} catch (error) {
|
|
14364
14629
|
const reason = error instanceof Error ? error.message : String(error);
|
|
14365
14630
|
throw new Error(`Failed to load cloud eval config "${cloudConfigId}". ${reason}. Cloud UUID inputs do not fall back to local file paths. Check authentication and that the UUID exists.`);
|
|
@@ -14453,14 +14718,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14453
14718
|
require_logger.state.resume = true;
|
|
14454
14719
|
require_logger.state.retryMode = true;
|
|
14455
14720
|
} else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
|
|
14456
|
-
if (!cmdObj.envPath && commandLineOptions?.envPath) {
|
|
14721
|
+
if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
|
|
14457
14722
|
require_logger.logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
|
|
14458
14723
|
require_util.setupEnv(commandLineOptions.envPath);
|
|
14459
14724
|
}
|
|
14460
|
-
|
|
14461
|
-
Warning: Config file has a redteam section but no test cases.
|
|
14462
|
-
Did you mean to run ${chalk.default.bold("promptfoo redteam generate")} instead?
|
|
14463
|
-
`));
|
|
14725
|
+
warnIfRedteamConfigHasNoTests(config, testSuite);
|
|
14464
14726
|
if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
|
|
14465
14727
|
const maybeUrl = config.providers[0]?.config?.url;
|
|
14466
14728
|
if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) require_telemetry.telemetry.record("feature_used", { feature: "redteam_run_with_example" });
|
|
@@ -14508,7 +14770,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14508
14770
|
};
|
|
14509
14771
|
testSuite.tests = await filterTests(testSuite, filterOptions);
|
|
14510
14772
|
}
|
|
14511
|
-
if (!
|
|
14773
|
+
if (!require_remoteGeneration.neverGenerateRemote() && config.redteam && config.redteam.plugins && config.redteam.plugins.length > 0 && testSuite.tests && testSuite.tests.length > 0) {
|
|
14512
14774
|
let hasValidEmail = false;
|
|
14513
14775
|
while (!hasValidEmail) {
|
|
14514
14776
|
const { emailNeedsValidation } = await require_accounts.promptForEmailUnverified();
|
|
@@ -14526,7 +14788,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14526
14788
|
process.exitCode = 1;
|
|
14527
14789
|
return new Eval({}, { persisted: false });
|
|
14528
14790
|
}
|
|
14529
|
-
await
|
|
14791
|
+
await require_storage.checkCloudPermissions(config);
|
|
14530
14792
|
const options = {
|
|
14531
14793
|
...evaluateOptions,
|
|
14532
14794
|
showProgressBar: require_logger.getLogLevel() === "debug" ? false : cmdObj.progressBar === void 0 ? evaluateOptions.showProgressBar === void 0 ? true : evaluateOptions.showProgressBar : cmdObj.progressBar !== false,
|
|
@@ -14568,7 +14830,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14568
14830
|
${zod.z.prettifyError(testSuiteSchema.error)}
|
|
14569
14831
|
|
|
14570
14832
|
Please review your promptfooconfig.yaml configuration.`));
|
|
14571
|
-
const
|
|
14833
|
+
const author = require_accounts.getAuthor();
|
|
14834
|
+
const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
|
|
14835
|
+
author,
|
|
14836
|
+
runtimeOptions: options
|
|
14837
|
+
}) : new Eval(config, {
|
|
14838
|
+
author,
|
|
14839
|
+
runtimeOptions: options
|
|
14840
|
+
});
|
|
14572
14841
|
const abortController = new AbortController();
|
|
14573
14842
|
const previousAbortSignal = evaluateOptions.abortSignal;
|
|
14574
14843
|
evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
|
|
@@ -14679,7 +14948,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14679
14948
|
const paths = (Array.isArray(outputPath) ? outputPath : [outputPath]).filter((p) => typeof p === "string" && p.length > 0 && !p.endsWith(".jsonl"));
|
|
14680
14949
|
const isRedteam = Boolean(config.redteam);
|
|
14681
14950
|
const duration = Math.round((Date.now() - startTime) / 1e3);
|
|
14682
|
-
const tracker =
|
|
14951
|
+
const tracker = require_shared.TokenUsageTracker.getInstance();
|
|
14683
14952
|
const targetErrorStatus = await evalRecord.findTargetErrorStatus();
|
|
14684
14953
|
const summaryLines = generateEvalSummary({
|
|
14685
14954
|
evalId: evalRecord.id,
|
|
@@ -14709,7 +14978,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14709
14978
|
} else for (const line of summaryLines) require_logger.logger.info(line);
|
|
14710
14979
|
let shareableUrl = null;
|
|
14711
14980
|
if (sharePromise != null) {
|
|
14712
|
-
const orgContext = await
|
|
14981
|
+
const orgContext = await require_storage.getOrgContext();
|
|
14713
14982
|
const orgSuffix = orgContext ? ` to ${orgContext.organizationName}${orgContext.teamName ? ` > ${orgContext.teamName}` : ""}` : "";
|
|
14714
14983
|
if (process.stdout.isTTY && !require_logger.isCI()) {
|
|
14715
14984
|
const spinner = (0, ora.default)({
|
|
@@ -14884,7 +15153,7 @@ async function doRedteamRun(options) {
|
|
|
14884
15153
|
redteamPath = path.join(configDir, "redteam.yaml");
|
|
14885
15154
|
}
|
|
14886
15155
|
try {
|
|
14887
|
-
const healthUrl =
|
|
15156
|
+
const healthUrl = require_remoteGeneration.getRemoteHealthUrl();
|
|
14888
15157
|
if (healthUrl) {
|
|
14889
15158
|
require_logger.logger.debug(`Checking Promptfoo API health at ${healthUrl}...`);
|
|
14890
15159
|
const healthResult = await checkRemoteHealth(healthUrl);
|
|
@@ -14970,6 +15239,12 @@ async function doRedteamRun(options) {
|
|
|
14970
15239
|
return evalResult;
|
|
14971
15240
|
}
|
|
14972
15241
|
//#endregion
|
|
15242
|
+
//#region src/types/transform.ts
|
|
15243
|
+
/** Runtime type guard for `TransformFunction` values. */
|
|
15244
|
+
function isTransformFunction(value) {
|
|
15245
|
+
return typeof value === "function";
|
|
15246
|
+
}
|
|
15247
|
+
//#endregion
|
|
14973
15248
|
//#region src/index.ts
|
|
14974
15249
|
/**
|
|
14975
15250
|
* Shallow-clone a test case so the caller can swap in resolved ApiProvider
|
|
@@ -14991,66 +15266,148 @@ function cloneTestForResolve(test) {
|
|
|
14991
15266
|
if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
|
|
14992
15267
|
return cloned;
|
|
14993
15268
|
}
|
|
15269
|
+
function toSerializableProviderRef(provider) {
|
|
15270
|
+
if (require_types.isApiProvider(provider)) return require_evalResult.sanitizeProvider(provider);
|
|
15271
|
+
if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
|
|
15272
|
+
return provider;
|
|
15273
|
+
}
|
|
15274
|
+
function isRecord(value) {
|
|
15275
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
15276
|
+
}
|
|
15277
|
+
function withSerializableProvider(record) {
|
|
15278
|
+
if (!require_types.isApiProvider(record.provider)) return record;
|
|
15279
|
+
return {
|
|
15280
|
+
...record,
|
|
15281
|
+
provider: require_evalResult.sanitizeProvider(record.provider)
|
|
15282
|
+
};
|
|
15283
|
+
}
|
|
15284
|
+
/**
|
|
15285
|
+
* Function-valued transforms are first-class at runtime but are silently dropped
|
|
15286
|
+
* by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
|
|
15287
|
+
* a function reference, so replace every `transform`-like field with a
|
|
15288
|
+
* `[inline function]: name` marker. Non-function values pass through unchanged.
|
|
15289
|
+
*
|
|
15290
|
+
* `droppedRef.value` is flipped to `true` the first time a function is replaced
|
|
15291
|
+
* so the caller can emit a single warning instead of logging per field.
|
|
15292
|
+
*/
|
|
15293
|
+
function replaceFunctionTransforms(record, droppedRef) {
|
|
15294
|
+
let result;
|
|
15295
|
+
for (const key of require_transform$1.TRANSFORM_KEYS) {
|
|
15296
|
+
const value = record[key];
|
|
15297
|
+
if (!isTransformFunction(value)) continue;
|
|
15298
|
+
if (!result) result = { ...record };
|
|
15299
|
+
result[key] = value.name ? `${require_transform$1.INLINE_FUNCTION_LABEL}: ${value.name}` : require_transform$1.INLINE_FUNCTION_LABEL;
|
|
15300
|
+
droppedRef.value = true;
|
|
15301
|
+
}
|
|
15302
|
+
return result ?? record;
|
|
15303
|
+
}
|
|
15304
|
+
function toSerializableAssertion(assertion, droppedRef) {
|
|
15305
|
+
if (!isRecord(assertion)) return assertion;
|
|
15306
|
+
let sanitizedAssertion = withSerializableProvider(assertion);
|
|
15307
|
+
sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
|
|
15308
|
+
if (Array.isArray(assertion.assert)) sanitizedAssertion = {
|
|
15309
|
+
...sanitizedAssertion,
|
|
15310
|
+
assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
15311
|
+
};
|
|
15312
|
+
return sanitizedAssertion;
|
|
15313
|
+
}
|
|
15314
|
+
function toSerializableTestCase(test, droppedRef) {
|
|
15315
|
+
if (!isRecord(test)) return test;
|
|
15316
|
+
let sanitizedTest = withSerializableProvider(test);
|
|
15317
|
+
if (isRecord(test.options)) {
|
|
15318
|
+
let options = withSerializableProvider(test.options);
|
|
15319
|
+
options = replaceFunctionTransforms(options, droppedRef);
|
|
15320
|
+
if (options !== test.options) sanitizedTest = {
|
|
15321
|
+
...sanitizedTest,
|
|
15322
|
+
options
|
|
15323
|
+
};
|
|
15324
|
+
}
|
|
15325
|
+
if (Array.isArray(test.assert)) sanitizedTest = {
|
|
15326
|
+
...sanitizedTest,
|
|
15327
|
+
assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
15328
|
+
};
|
|
15329
|
+
return sanitizedTest;
|
|
15330
|
+
}
|
|
15331
|
+
function toSerializableScenario(scenario, droppedRef) {
|
|
15332
|
+
if (!isRecord(scenario)) return scenario;
|
|
15333
|
+
if (!Array.isArray(scenario.tests)) return scenario;
|
|
15334
|
+
return {
|
|
15335
|
+
...scenario,
|
|
15336
|
+
tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
|
|
15337
|
+
};
|
|
15338
|
+
}
|
|
15339
|
+
function createSerializableUnifiedConfig(testSuite, prompts) {
|
|
15340
|
+
const droppedRef = { value: false };
|
|
15341
|
+
const config = {
|
|
15342
|
+
...testSuite,
|
|
15343
|
+
providers: toSerializableProviderRef(testSuite.providers),
|
|
15344
|
+
defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
|
|
15345
|
+
tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
|
|
15346
|
+
scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
|
|
15347
|
+
prompts
|
|
15348
|
+
};
|
|
15349
|
+
if (droppedRef.value && testSuite.writeLatestResults) require_logger.logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
|
|
15350
|
+
return config;
|
|
15351
|
+
}
|
|
14994
15352
|
async function evaluate(testSuite, options = {}) {
|
|
14995
|
-
|
|
14996
|
-
|
|
15353
|
+
const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
|
|
15354
|
+
if (testSuiteConfig.writeLatestResults) await runDbMigrations();
|
|
15355
|
+
const loadedProviders = await require_providers.loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
|
|
14997
15356
|
const providerMap = {};
|
|
14998
15357
|
for (const p of loadedProviders) {
|
|
14999
15358
|
providerMap[p.id()] = p;
|
|
15000
15359
|
if (p.label) providerMap[p.label] = p;
|
|
15001
15360
|
}
|
|
15002
|
-
let resolvedDefaultTest =
|
|
15003
|
-
if (typeof
|
|
15361
|
+
let resolvedDefaultTest = testSuiteConfig.defaultTest;
|
|
15362
|
+
if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await require_util.maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
|
|
15004
15363
|
const constructedTestSuite = {
|
|
15005
|
-
...
|
|
15364
|
+
...testSuiteConfig,
|
|
15006
15365
|
defaultTest: resolvedDefaultTest,
|
|
15007
|
-
scenarios:
|
|
15366
|
+
scenarios: testSuiteConfig.scenarios,
|
|
15008
15367
|
providers: loadedProviders,
|
|
15009
|
-
tests: await readTests(
|
|
15010
|
-
nunjucksFilters: await require_util.readFilters(
|
|
15011
|
-
prompts: await require_graders.processPrompts(
|
|
15368
|
+
tests: await readTests(testSuiteConfig.tests),
|
|
15369
|
+
nunjucksFilters: await require_util.readFilters(testSuiteConfig.nunjucksFilters || {}),
|
|
15370
|
+
prompts: await require_graders.processPrompts(testSuiteConfig.prompts)
|
|
15012
15371
|
};
|
|
15013
15372
|
if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
|
|
15014
15373
|
constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
|
|
15015
15374
|
if (constructedTestSuite.defaultTest.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
|
|
15016
|
-
env:
|
|
15375
|
+
env: testSuiteConfig.env,
|
|
15017
15376
|
basePath: require_logger.state.basePath
|
|
15018
15377
|
});
|
|
15019
15378
|
if (constructedTestSuite.defaultTest.options?.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
|
|
15020
|
-
env:
|
|
15379
|
+
env: testSuiteConfig.env,
|
|
15021
15380
|
basePath: require_logger.state.basePath
|
|
15022
15381
|
});
|
|
15023
15382
|
}
|
|
15024
15383
|
constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
|
|
15025
15384
|
for (const test of constructedTestSuite.tests) {
|
|
15026
15385
|
if (test.options?.provider && !require_types.isApiProvider(test.options.provider)) test.options.provider = await require_providers.resolveProvider(test.options.provider, providerMap, {
|
|
15027
|
-
env:
|
|
15386
|
+
env: testSuiteConfig.env,
|
|
15028
15387
|
basePath: require_logger.state.basePath
|
|
15029
15388
|
});
|
|
15030
15389
|
for (const assertion of test.assert || []) {
|
|
15031
15390
|
if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
|
|
15032
15391
|
if (assertion.provider && !require_types.isApiProvider(assertion.provider)) assertion.provider = await require_providers.resolveProvider(assertion.provider, providerMap, {
|
|
15033
|
-
env:
|
|
15392
|
+
env: testSuiteConfig.env,
|
|
15034
15393
|
basePath: require_logger.state.basePath
|
|
15035
15394
|
});
|
|
15036
15395
|
}
|
|
15037
15396
|
}
|
|
15038
15397
|
if (options.cache === false) require_cache.disableCache();
|
|
15039
|
-
const parsedProviderPromptMap = require_graders.readProviderPromptMap(
|
|
15040
|
-
const unifiedConfig =
|
|
15041
|
-
|
|
15042
|
-
|
|
15043
|
-
};
|
|
15044
|
-
const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
|
|
15398
|
+
const parsedProviderPromptMap = require_graders.readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
|
|
15399
|
+
const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
|
|
15400
|
+
const author = require_accounts.getAuthor(suiteAuthor);
|
|
15401
|
+
const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
|
|
15045
15402
|
const ret = await evaluate$1({
|
|
15046
15403
|
...constructedTestSuite,
|
|
15047
15404
|
providerPromptMap: parsedProviderPromptMap
|
|
15048
15405
|
}, evalRecord, {
|
|
15049
15406
|
eventSource: "library",
|
|
15050
|
-
isRedteam: Boolean(
|
|
15407
|
+
isRedteam: Boolean(testSuiteConfig.redteam),
|
|
15051
15408
|
...options
|
|
15052
15409
|
});
|
|
15053
|
-
if (
|
|
15410
|
+
if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
|
|
15054
15411
|
const shareableUrl = await createShareableUrl(ret, { silent: true });
|
|
15055
15412
|
if (shareableUrl) {
|
|
15056
15413
|
ret.shareableUrl = shareableUrl;
|
|
@@ -15061,9 +15418,9 @@ async function evaluate(testSuite, options = {}) {
|
|
|
15061
15418
|
require_logger.logger.warn(`Failed to create shareable URL: ${error}`);
|
|
15062
15419
|
}
|
|
15063
15420
|
else require_logger.logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
|
|
15064
|
-
if (
|
|
15065
|
-
if (typeof
|
|
15066
|
-
else if (Array.isArray(
|
|
15421
|
+
if (testSuiteConfig.outputPath) {
|
|
15422
|
+
if (typeof testSuiteConfig.outputPath === "string") await require_util.writeOutput(testSuiteConfig.outputPath, evalRecord, null);
|
|
15423
|
+
else if (Array.isArray(testSuiteConfig.outputPath)) await require_util.writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
|
|
15067
15424
|
}
|
|
15068
15425
|
return ret;
|
|
15069
15426
|
}
|
|
@@ -15075,7 +15432,7 @@ const redteam = {
|
|
|
15075
15432
|
},
|
|
15076
15433
|
Graders: require_graders.GRADERS,
|
|
15077
15434
|
Plugins,
|
|
15078
|
-
Strategies:
|
|
15435
|
+
Strategies: require_strategies.Strategies,
|
|
15079
15436
|
Base: {
|
|
15080
15437
|
Plugin: require_graders.RedteamPluginBase,
|
|
15081
15438
|
Grader: require_graders.RedteamGraderBase
|
|
@@ -15104,9 +15461,18 @@ exports.CompletedPromptSchema = require_types.CompletedPromptSchema;
|
|
|
15104
15461
|
exports.CompletionTokenDetailsSchema = require_types.CompletionTokenDetailsSchema;
|
|
15105
15462
|
exports.ConversationMessageSchema = require_types.ConversationMessageSchema;
|
|
15106
15463
|
exports.DerivedMetricSchema = require_types.DerivedMetricSchema;
|
|
15464
|
+
exports.DocumentMediaInjectionPlacementSchema = require_types.DocumentMediaInjectionPlacementSchema;
|
|
15465
|
+
exports.DocumentMediaInjectionPlacementValues = require_types.DocumentMediaInjectionPlacementValues;
|
|
15466
|
+
exports.DocxInjectionPlacementSchema = require_types.DocxInjectionPlacementSchema;
|
|
15467
|
+
exports.DocxInjectionPlacementValues = require_types.DocxInjectionPlacementValues;
|
|
15107
15468
|
exports.EvalResultsFilterMode = require_types.EvalResultsFilterMode;
|
|
15108
15469
|
exports.EvaluateOptionsSchema = require_types.EvaluateOptionsSchema;
|
|
15109
15470
|
exports.GradingConfigSchema = require_types.GradingConfigSchema;
|
|
15471
|
+
exports.InputConfigSchema = require_types.InputConfigSchema;
|
|
15472
|
+
exports.InputDefinitionObjectSchema = require_types.InputDefinitionObjectSchema;
|
|
15473
|
+
exports.InputDefinitionSchema = require_types.InputDefinitionSchema;
|
|
15474
|
+
exports.InputTypeSchema = require_types.InputTypeSchema;
|
|
15475
|
+
exports.InputTypeValues = require_types.InputTypeValues;
|
|
15110
15476
|
exports.InputsSchema = require_types.InputsSchema;
|
|
15111
15477
|
exports.NotPrefixedAssertionTypesSchema = require_types.NotPrefixedAssertionTypesSchema;
|
|
15112
15478
|
exports.OutputConfigSchema = require_types.OutputConfigSchema;
|
|
@@ -15129,6 +15495,7 @@ exports.TestSuiteSchema = require_types.TestSuiteSchema;
|
|
|
15129
15495
|
exports.UnifiedConfigSchema = require_types.UnifiedConfigSchema;
|
|
15130
15496
|
exports.VarsSchema = require_types.VarsSchema;
|
|
15131
15497
|
exports.assertions = assertions_default;
|
|
15498
|
+
exports.buildInputPromptDescription = require_types.buildInputPromptDescription;
|
|
15132
15499
|
Object.defineProperty(exports, "cache", {
|
|
15133
15500
|
enumerable: true,
|
|
15134
15501
|
get: function() {
|
|
@@ -15138,12 +15505,17 @@ Object.defineProperty(exports, "cache", {
|
|
|
15138
15505
|
exports.default = src_default;
|
|
15139
15506
|
exports.evaluate = evaluate;
|
|
15140
15507
|
exports.generateTable = generateTable;
|
|
15508
|
+
exports.getInputDescription = require_types.getInputDescription;
|
|
15509
|
+
exports.getInputType = require_types.getInputType;
|
|
15141
15510
|
exports.guardrails = guardrails;
|
|
15142
15511
|
exports.isApiProvider = require_types.isApiProvider;
|
|
15143
15512
|
exports.isGradingResult = require_types.isGradingResult;
|
|
15144
15513
|
exports.isProviderOptions = require_types.isProviderOptions;
|
|
15145
15514
|
exports.isResultFailureReason = require_types.isResultFailureReason;
|
|
15515
|
+
exports.isTransformFunction = isTransformFunction;
|
|
15146
15516
|
exports.loadApiProvider = require_providers.loadApiProvider;
|
|
15517
|
+
exports.normalizeInputDefinition = require_types.normalizeInputDefinition;
|
|
15518
|
+
exports.normalizeInputs = require_types.normalizeInputs;
|
|
15147
15519
|
exports.redteam = redteam;
|
|
15148
15520
|
|
|
15149
15521
|
//# sourceMappingURL=index.cjs.map
|