promptfoo 0.121.5 → 0.121.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
- package/dist/src/{accounts-CaLNYnf7.js → accounts-Ca7WIoPY.js} +12 -7
- package/dist/src/{accounts-CLJHCDDb.js → accounts-CjFnOPmb.js} +13 -8
- package/dist/src/{accounts-BIFntVWB.cjs → accounts-CmWzeD2d.cjs} +16 -10
- package/dist/src/{accounts-bnyHT7Ju.js → accounts-DanM1wq_.js} +12 -7
- package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-CJ0j3fBi.js} +2 -2
- package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-DDEGRV9v.js} +2 -2
- package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-DvPWSUpb.cjs} +8 -7
- package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-TxUEMPYS.js} +2 -2
- package/dist/src/{agents-WULPVjbH.cjs → agents-B4sRuXg3.cjs} +7 -6
- package/dist/src/{agents-DhxWMCtH.js → agents-B8q7h_ek.js} +4 -4
- package/dist/src/{agents-BGqaTDnr.js → agents-CBgJvRkB.js} +20 -9
- package/dist/src/{agents-n6vPqV3i.js → agents-CYn2n3QP.js} +4 -4
- package/dist/src/{agents-BV9yFpXX.js → agents-D-vDNFx4.js} +20 -9
- package/dist/src/{agents-BYdMl1UE.js → agents-LrHuQqr1.js} +20 -9
- package/dist/src/{agents-emVcx3yh.js → agents-QGg76OF-.js} +2 -2
- package/dist/src/{agents-DiWmQYH9.cjs → agents-eHZ9nlgA.cjs} +21 -10
- package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-CJEbQ0o6.js} +6 -6
- package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-D5HXzZ0s.js} +6 -6
- package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-T6HGNxNe.cjs} +7 -7
- package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-eYv3a_DK.js} +6 -6
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-BNHJKN35.js +1 -0
- package/dist/src/app/assets/index-BnT6P6sF.js +388 -0
- package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
- package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
- package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
- package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
- package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
- package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
- package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/{audio-Da8U9IS5.js → audio-BqnRvcWG.js} +3 -3
- package/dist/src/{audio-BvpTOArF.js → audio-CPMtV1yR.js} +3 -3
- package/dist/src/{audio-CScmnmEB.js → audio-DyiebVB3.js} +3 -3
- package/dist/src/{audio-C0vDeS0j.cjs → audio-FnxbEnSE.cjs} +4 -4
- package/dist/src/authoritativeMarkupInjection-BZIywVjG.js +74 -0
- package/dist/src/authoritativeMarkupInjection-DyAXAsSr.js +75 -0
- package/dist/src/authoritativeMarkupInjection-F2gBw0lN.cjs +74 -0
- package/dist/src/authoritativeMarkupInjection-QEQmFS83.js +74 -0
- package/dist/src/{base-BTux96b1.js → base-CKLo890h.js} +4 -3
- package/dist/src/{base-BOMaNEes.js → base-Co80MMCi.js} +4 -3
- package/dist/src/{base-Tw6uhH8K.cjs → base-DGJW48uz.cjs} +5 -4
- package/dist/src/{base-dYsl2hmL.js → base-E9I8zXjz.js} +4 -3
- package/dist/src/bestOfN-B3wNzjSB.js +137 -0
- package/dist/src/bestOfN-BBsO41z4.js +136 -0
- package/dist/src/bestOfN-CAwmg5UL.cjs +140 -0
- package/dist/src/bestOfN-_kTi8Bxe.js +136 -0
- package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
- package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
- package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
- package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
- package/dist/src/{cache-DGg-yTZG.cjs → cache-CPGUA4Yl.cjs} +135 -25
- package/dist/src/cache-Cf7b4pWE.js +3 -0
- package/dist/src/{cache-Bzttsk0X.js → cache-DIXbtkNO.js} +125 -10
- package/dist/src/{cache-BI5BY7ey.js → cache-DpPWrkTE.js} +127 -11
- package/dist/src/{cache-Cr-qWIbP.js → cache-roFAE0cI.js} +125 -10
- package/dist/src/{chat-DChSH_Es.js → chat-CUCorGiL.js} +9 -9
- package/dist/src/{chat-DH97tVV9.cjs → chat-DG1wG4w0.cjs} +6 -6
- package/dist/src/{chat-Cx_LkwvZ.js → chat-Dabu84Br.js} +11 -11
- package/dist/src/{chat-BLOdH60v.js → chat-DqUFcWI0.js} +11 -11
- package/dist/src/{chat-vYqqv1gP.cjs → chat-DxTDQ83C.cjs} +14 -13
- package/dist/src/{chat-DG2LkwLq.js → chat-GmlolEwo.js} +4 -4
- package/dist/src/{chat-aMQZw6R7.js → chat-TP8Qifkh.js} +4 -4
- package/dist/src/{chat-D9nudO9b.js → chat-iwaM5UTQ.js} +4 -4
- package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
- package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
- package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
- package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
- package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-BQNp_y-F.js} +209 -64
- package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-D5Jl0SDh.js} +210 -65
- package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-DH416NBD.cjs} +216 -70
- package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-x1XJ1-pU.js} +210 -65
- package/dist/src/{cloud-Da0bofJd.js → cloud-D3DiFqH6.js} +2 -2
- package/dist/src/cloud-p96PA4MH.js +3 -0
- package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-B6NVI3ax.js} +4 -4
- package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-CEAW-xQa.cjs} +6 -6
- package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-RFSojyXG.js} +4 -4
- package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-r4tbYmWU.js} +4 -4
- package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-BCkLouto.js} +3 -3
- package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-BaZ4insB.js} +3 -3
- package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-CF-Vb-2Z.js} +3 -3
- package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-TJMLBj6I.cjs} +5 -5
- package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-B8KHEiF4.js} +5 -5
- package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-CnrLBCeA.cjs} +12 -11
- package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-DIXZ230V.js} +4 -4
- package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-Dd22dC_N.js} +5 -5
- package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-B6Wah8Pa.js} +5 -5
- package/dist/src/codex-sdk-BGjVAk23.js +3 -0
- package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-CFF6gUyi.cjs} +18 -10
- package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-CmQABzV3.js} +3 -3
- package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-D2d54RL8.js} +5 -5
- package/dist/src/{cometapi-B5ImDlSm.js → cometapi-Bu9B8NUY.js} +7 -7
- package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CtzNCHKu.js} +7 -7
- package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-DHCDlQUI.cjs} +8 -8
- package/dist/src/{cometapi-CC7hWxmX.js → cometapi-OBILPLlu.js} +7 -7
- package/dist/src/{completion-Vq_ad618.js → completion-CO2e1_62.js} +4 -4
- package/dist/src/{completion-DtQ72Bm3.cjs → completion-CSYfl2cd.cjs} +6 -6
- package/dist/src/{completion-2iuYVxwi.js → completion-DZNxcyfG.js} +5 -5
- package/dist/src/{completion-CrD6MQ93.js → completion-sNvCLTAP.js} +5 -5
- package/dist/src/constants-BjJV0cRr.js +6 -0
- package/dist/src/constants-DH5XYLKZ.js +7 -0
- package/dist/src/constants-DZGEFLsu.js +6 -0
- package/dist/src/constants-a2kYssQk.cjs +11 -0
- package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
- package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
- package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
- package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
- package/dist/src/crescendo-BXEJK_bi.cjs +704 -0
- package/dist/src/crescendo-CU_Y2i-m.js +702 -0
- package/dist/src/crescendo-J1Xx4_zb.js +703 -0
- package/dist/src/crescendo-QiaSLW0d.js +701 -0
- package/dist/src/custom-BJfP00Bh.js +619 -0
- package/dist/src/custom-CZVn-1-r.js +620 -0
- package/dist/src/custom-Cqia7M0D.cjs +621 -0
- package/dist/src/custom-notggYVl.js +618 -0
- package/dist/src/{docker--3qzPa-6.js → docker-4D1eL6Gq.js} +5 -5
- package/dist/src/{docker-Dorv4_Dg.js → docker-BBv1WUDu.js} +5 -5
- package/dist/src/{docker-D3AY-5F5.cjs → docker-D06JUoe2.cjs} +6 -6
- package/dist/src/{docker-DCsCDvwM.js → docker-DdJQBxK9.js} +5 -5
- package/dist/src/{embedding-DNRvZwRN.js → embedding--UZVe4_7.js} +5 -5
- package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BbrwopfX.cjs} +6 -6
- package/dist/src/{embedding-ChS1ivFS.js → embedding-Bi3rxrZF.js} +5 -5
- package/dist/src/{embedding-D_bI4NDq.js → embedding-C251p1-8.js} +4 -4
- package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
- package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
- package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
- package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
- package/dist/src/{eval-DJ_4A-tr.js → eval-0VRANImH.js} +19 -19
- package/dist/src/{eval-BQPLBJbw.js → eval-DscR5iOM.js} +1 -1
- package/dist/src/{evalResult-pSvGWFMo.js → evalResult-2RRJvFyB.js} +18 -11
- package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-CvtS8h8u.cjs} +29 -11
- package/dist/src/evalResult-DqzsS6_W.js +3 -0
- package/dist/src/{evalResult-D6P5I5il.js → evalResult-eUkJv9Ko.js} +17 -10
- package/dist/src/evaluator-DNdJF1Gv.js +3 -0
- package/dist/src/{evaluator-D-UIbbYq.js → evaluator-DRoiYB2q.js} +258 -132
- package/dist/src/evaluatorHelpers-BsYP_muT.js +511 -0
- package/dist/src/evaluatorHelpers-CRqTvSux.cjs +537 -0
- package/dist/src/evaluatorHelpers-DuqFFfq7.js +510 -0
- package/dist/src/{extractor-YlZbUMsL.js → extractor-BR7XAzAL.js} +5 -5
- package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-BdxEtt3J.cjs} +6 -6
- package/dist/src/{extractor-DxyiFhPk.js → extractor-CIW3iN-b.js} +5 -5
- package/dist/src/{extractor-BM3jRERL.js → extractor-CxRtnaHl.js} +5 -5
- package/dist/src/{fetch-Y5qX_kST.js → fetch-BufrQtvR.js} +90 -26
- package/dist/src/{fetch-B6ch2nU2.js → fetch-DXUnXkVU.js} +86 -26
- package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Dw4XZHjj.cjs} +115 -32
- package/dist/src/{fetch-D9xxyC1p.js → fetch-It34O8Ur.js} +90 -26
- package/dist/src/fetch-_YgGd2qv.js +3 -0
- package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
- package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
- package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
- package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
- package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
- package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
- package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
- package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
- package/dist/src/goat-Ckd3q3AY.js +467 -0
- package/dist/src/goat-Qgurm-NP.js +466 -0
- package/dist/src/goat-ghadEDdy.js +465 -0
- package/dist/src/goat-una6pZGP.cjs +469 -0
- package/dist/src/graders-BDT7dif6.js +3 -0
- package/dist/src/{graders-CgPn32yp.js → graders-BGP99PdK.js} +1017 -84
- package/dist/src/{graders-BoUqsCEm.js → graders-BX0f2tvS.js} +1022 -84
- package/dist/src/{graders-CwrbifOo.js → graders-C0nXU_ZP.js} +1020 -82
- package/dist/src/{graders-Bw1wk_21.cjs → graders-ClrU2fnd.cjs} +1085 -128
- package/dist/src/hydra-BSNZZm2M.js +543 -0
- package/dist/src/hydra-BxdG4nkg.js +541 -0
- package/dist/src/hydra-DE4xWwyc.js +542 -0
- package/dist/src/hydra-DrJttnvw.cjs +542 -0
- package/dist/src/image-B4oBtu6J.js +443 -0
- package/dist/src/{image-Dr_3I3nK.js → image-BN-hjLL9.js} +3 -3
- package/dist/src/{image-BeWaInPF.js → image-B_fPIwdg.js} +3 -3
- package/dist/src/image-BvUAW344.js +442 -0
- package/dist/src/image-Cvjwx1uY.js +442 -0
- package/dist/src/{image-D10dNAav.cjs → image-DfVCGPbI.cjs} +4 -4
- package/dist/src/{image-qjO6FWPs.js → image-QzmydkiG.js} +3 -3
- package/dist/src/image-X0oY4350.cjs +465 -0
- package/dist/src/index.cjs +688 -313
- package/dist/src/index.d.cts +3152 -1617
- package/dist/src/index.d.ts +3151 -1616
- package/dist/src/index.js +582 -223
- package/dist/src/indirectWebPwn-02ZIghCS.js +259 -0
- package/dist/src/indirectWebPwn-BJ22AbQa.cjs +397 -0
- package/dist/src/indirectWebPwn-CbjUG0rh.js +385 -0
- package/dist/src/indirectWebPwn-CfQJt3gk.cjs +260 -0
- package/dist/src/indirectWebPwn-DBQhOjoD.js +260 -0
- package/dist/src/indirectWebPwn-OsXnKejv.js +259 -0
- package/dist/src/indirectWebPwn-tNx9OZ35.js +385 -0
- package/dist/src/indirectWebPwn-uyWdHx04.js +386 -0
- package/dist/src/inputVariables-B0qUChbV.js +467 -0
- package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
- package/dist/src/inputVariables-DXFdi7AI.js +468 -0
- package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
- package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
- package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
- package/dist/src/iterative-CpU6i2As.js +490 -0
- package/dist/src/iterative-DJQEQpG3.js +491 -0
- package/dist/src/iterative-DQBuWM-j.cjs +493 -0
- package/dist/src/iterative-FTS4Bz67.js +492 -0
- package/dist/src/iterativeImage-BUABMVOA.js +413 -0
- package/dist/src/iterativeImage-ByFWkxax.cjs +415 -0
- package/dist/src/iterativeImage-BzUapOUi.js +414 -0
- package/dist/src/iterativeImage-Doz8mgxF.js +413 -0
- package/dist/src/iterativeMeta-B3YiAOc8.js +386 -0
- package/dist/src/iterativeMeta-C7APE_P1.js +385 -0
- package/dist/src/iterativeMeta-CSS8M6Ds.cjs +385 -0
- package/dist/src/iterativeMeta-DgoQ7bLh.js +384 -0
- package/dist/src/iterativeTree-B5zxBBSW.js +769 -0
- package/dist/src/iterativeTree-CNyIk0Yn.js +768 -0
- package/dist/src/iterativeTree-CPMF10ve.cjs +771 -0
- package/dist/src/iterativeTree-DvZ7GBwt.js +770 -0
- package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-BadkINlJ.js} +24 -10
- package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-Bi_8sV-H.js} +23 -9
- package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-CkMljjdg.js} +24 -10
- package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-DUh34xba.cjs} +25 -11
- package/dist/src/{litellm-DRc4qWfc.js → litellm-BKBo0jpC.js} +4 -4
- package/dist/src/{litellm-BLSiANhk.js → litellm-BXyn5kZK.js} +4 -4
- package/dist/src/{litellm-DQGo_juI.js → litellm-CNcfbCfa.js} +4 -4
- package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CtAr7bKG.cjs} +5 -5
- package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
- package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BMX1iEB6.js} +5 -5
- package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-CR5TSpp4.js} +5 -5
- package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-D3FUc2K3.cjs} +9 -8
- package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-OEMmS1RB.js} +5 -5
- package/dist/src/main.js +704 -208
- package/dist/src/memoryPoisoning-CM83NWYl.js +107 -0
- package/dist/src/memoryPoisoning-D8h9gXJF.js +106 -0
- package/dist/src/memoryPoisoning-Dp-btinn.cjs +106 -0
- package/dist/src/memoryPoisoning-cLuCoTuJ.js +106 -0
- package/dist/src/{messages-CewuNcNS.js → messages-BabO-cX8.js} +17 -9
- package/dist/src/{messages-BnsVHUnm.cjs → messages-DBPir0TQ.cjs} +24 -15
- package/dist/src/{messages-B9dSjrNf.js → messages-DGUlSNU7.js} +18 -10
- package/dist/src/{messages-CI69Lasb.js → messages-vsE_-Lv0.js} +18 -10
- package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
- package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
- package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
- package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
- package/dist/src/mischievousUser-0l8GD7Dp.js +46 -0
- package/dist/src/mischievousUser-BUOP9W5r.js +46 -0
- package/dist/src/mischievousUser-frFYKxu6.js +47 -0
- package/dist/src/mischievousUser-olGgHIVR.cjs +46 -0
- package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-CNV5bMSk.cjs} +7 -7
- package/dist/src/{modelslab-zpz9JcK0.js → modelslab-Cogmu4mG.js} +6 -6
- package/dist/src/{modelslab-D73OnKSx.js → modelslab-Dzst7VTU.js} +6 -6
- package/dist/src/{modelslab-BCLOtfek.js → modelslab-EyDczZ5A.js} +6 -6
- package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-BGPNBOMS.js} +5 -5
- package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-B_5NKFu1.js} +5 -5
- package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-C4eUJGse.js} +5 -5
- package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-CjJRxI1X.cjs} +9 -8
- package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-BNGmgfFz.js} +3 -3
- package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-ChPlh5na.js} +2 -2
- package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-CrV0iaY_.js} +3 -3
- package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-DuOG9Aun.cjs} +5 -4
- package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
- package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
- package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
- package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-BIHlu_36.js} +9 -8
- package/dist/src/{openclaw-DObVgpjC.js → openclaw-CF7fMido.js} +9 -8
- package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-Dphc01BY.cjs} +17 -15
- package/dist/src/{openclaw-DA8U4DsD.js → openclaw-zIJAsz3P.js} +9 -8
- package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-B3vlPLsp.js} +38 -3
- package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-D05JSgMQ.js} +39 -4
- package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-DoY6GbWw.cjs} +45 -9
- package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-sRKYHGoI.js} +39 -4
- package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
- package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
- package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
- package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
- package/dist/src/packageParser--MWTSrPW.js +36 -0
- package/dist/src/packageParser-CgE-ziRo.js +35 -0
- package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
- package/dist/src/packageParser-hwwSGnAZ.js +35 -0
- package/dist/src/processShim-BBxt7LKO.js +95 -0
- package/dist/src/processShim-BcGzU8fY.js +94 -0
- package/dist/src/processShim-C_z3aRvF.js +94 -0
- package/dist/src/processShim-DSY9BV2T.cjs +98 -0
- package/dist/src/promptLength-0qIHyhA5.js +71 -0
- package/dist/src/promptLength-4X-Wd8PG.js +72 -0
- package/dist/src/promptLength-B9nZEfO6.js +71 -0
- package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
- package/dist/src/promptfoo-BDrfT30-.js +180 -0
- package/dist/src/promptfoo-Cm4hiy1Y.js +180 -0
- package/dist/src/promptfoo-Rjp-MeBb.js +181 -0
- package/dist/src/promptfoo-b-baRMj-.cjs +205 -0
- package/dist/src/prompts-BYMtqPCw.js +259 -0
- package/dist/src/prompts-C-bqE1Yp.js +260 -0
- package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
- package/dist/src/prompts-DHhQsANy.js +259 -0
- package/dist/src/prompts-D_QpZ2Dm.js +271 -0
- package/dist/src/prompts-hNvWBD3z.cjs +284 -0
- package/dist/src/prompts-huDVH2CI.js +270 -0
- package/dist/src/prompts-p78Hul5i.cjs +289 -0
- package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
- package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
- package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
- package/dist/src/{providers-DT-GtF2t.js → providers-B9KzWxAX.js} +739 -11919
- package/dist/src/{providers-DRrerKra.js → providers-BCCz6_IX.js} +813 -11944
- package/dist/src/{providers-eDShy16E.cjs → providers-BDVVIQM6.cjs} +787 -12132
- package/dist/src/{providers-Ctcc592x.js → providers-BYAn82cf.js} +1 -1
- package/dist/src/{providers-CJh7iriU.js → providers-DVYRZP4E.js} +746 -11866
- package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
- package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
- package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
- package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
- package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-BAp6iTZD.cjs} +4 -4
- package/dist/src/{quiverai-CPKhWgaT.js → quiverai-BvIhI_0l.js} +3 -3
- package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CdTWPe-A.js} +3 -3
- package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-Cv7rJKDz.js} +3 -3
- package/dist/src/registry-BUJrgjwv.js +124 -0
- package/dist/src/registry-DXm1t_x0.js +125 -0
- package/dist/src/registry-Dp5EqoXc.js +124 -0
- package/dist/src/registry-KCVF1CFC.cjs +124 -0
- package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-B1_XsKXU.js} +16 -147
- package/dist/src/{server-gyd6d4Hc.js → remoteGeneration-COpWcmWd.js} +15 -108
- package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-DS9N3pgB.cjs} +30 -119
- package/dist/src/remoteGeneration-DsaSwmG2.js +217 -0
- package/dist/src/render-BNTrbmBw.cjs +384 -0
- package/dist/src/render-CSP99NLm.js +348 -0
- package/dist/src/render-DFfDeYUK.js +347 -0
- package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
- package/dist/src/render-_6ur1fhE.js +347 -0
- package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
- package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
- package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
- package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
- package/dist/src/{responses-CF-ayauu.cjs → responses-1UFFF9N_.cjs} +12 -11
- package/dist/src/{responses-B8haB-mD.js → responses-B3W2JvOQ.js} +9 -9
- package/dist/src/{responses-1ztiVYsx.js → responses-B6ktc3Ra.js} +7 -7
- package/dist/src/{responses-BiaBguAu.js → responses-URRzV8qE.js} +9 -9
- package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
- package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
- package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
- package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
- package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
- package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
- package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
- package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
- package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
- package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
- package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-CujrzP1a.js} +61 -50
- package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-DzffAqo_.cjs} +65 -53
- package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-vhtSV7JI.js} +61 -50
- package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-yr1QKeBs.js} +61 -50
- package/dist/src/{scanner-nOCWNIXa.js → scanner-DS0109SS.js} +6 -6
- package/dist/src/server/index.js +4147 -449
- package/dist/src/server-B8rqV126.cjs +126 -0
- package/dist/src/server-BaLytskk.js +3 -0
- package/dist/src/server-CMJD10J4.js +107 -0
- package/dist/src/server-Ddp8GNMp.js +146 -0
- package/dist/src/server-DhMHosWj.js +182 -0
- package/dist/src/shared-7pmVZLNO.js +1334 -0
- package/dist/src/shared-9WHQ1oNE.js +1335 -0
- package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
- package/dist/src/shared-D6IjElRI.js +1334 -0
- package/dist/src/shared-WkgnDkcg.cjs +1436 -0
- package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
- package/dist/src/simulatedUser-C9aQObBI.js +222 -0
- package/dist/src/simulatedUser-Cu601Dd4.cjs +227 -0
- package/dist/src/simulatedUser-U_qAHnuB.js +222 -0
- package/dist/src/simulatedUser-p3tACcmw.js +223 -0
- package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
- package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
- package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
- package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
- package/dist/src/storage-BU4qcnOb.js +875 -0
- package/dist/src/storage-CA-v9V2v.cjs +911 -0
- package/dist/src/storage-CD-GWAdx.js +822 -0
- package/dist/src/storage-QdU-SmvD.js +834 -0
- package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
- package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
- package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
- package/dist/src/store-IbiRIF3k.js +3 -0
- package/dist/src/strategies-7CS3Alao.cjs +2360 -0
- package/dist/src/strategies-CiSeroPH.js +2331 -0
- package/dist/src/strategies-DRJjGTIY.js +2333 -0
- package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
- package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
- package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
- package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
- package/dist/src/{telemetry-DWdGHvEf.js → telemetry-00ezXr_t.js} +4 -4
- package/dist/src/telemetry-ByPqDcKC.js +3 -0
- package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-CJ7FnCsc.cjs} +15 -9
- package/dist/src/{telemetry--iqaGyaS.js → telemetry-DmXYcJNV.js} +4 -4
- package/dist/src/{telemetry-CgdVGV8N.js → telemetry-DwX9XUN5.js} +4 -4
- package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
- package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
- package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
- package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
- package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
- package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
- package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
- package/dist/src/toolAttributes-COVgDrBG.js +87 -0
- package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
- package/dist/src/tracingOptions-BnwKCkSB.js +221 -0
- package/dist/src/tracingOptions-Chi74lOD.js +219 -0
- package/dist/src/tracingOptions-DrbSFaKy.cjs +249 -0
- package/dist/src/tracingOptions-ji2OuXbT.js +220 -0
- package/dist/src/{transcription-84t4ALo2.js → transcription-B8uIgCYX.js} +5 -5
- package/dist/src/{transcription-Bm2emLmJ.js → transcription-CfU5loSq.js} +5 -5
- package/dist/src/{transcription-D7Q0vJsh.js → transcription-Dkd22_4K.js} +4 -4
- package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-mzuf18Mq.cjs} +9 -8
- package/dist/src/{transform-DtooZqYY.js → transform-BIMynQsA.js} +8 -8
- package/dist/src/transform-BnSTnFlp.js +187 -0
- package/dist/src/transform-BnSXWmU_2.cjs +221 -0
- package/dist/src/transform-CGt7Kt3y2.js +186 -0
- package/dist/src/transform-CrPGTsij.js +186 -0
- package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DhNkAUs8.cjs} +12 -11
- package/dist/src/{transform-_DpNB4qp.js → transform-DmvYBRll.js} +8 -8
- package/dist/src/{transform-B-b6Cq-q.js → transform-EtD4jAWi.js} +8 -8
- package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
- package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
- package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
- package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
- package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
- package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
- package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
- package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
- package/dist/src/{util-DFPeFkiV.js → util--WMgw7wM.js} +28 -8
- package/dist/src/{util-C-kmRosx.js → util-5WnCSb0h.js} +9 -7
- package/dist/src/{util-A5_ZsQUn.cjs → util-BSIuSLVK.cjs} +12 -9
- package/dist/src/{util-Dub0f_ej.js → util-Bx677_k2.js} +17 -10
- package/dist/src/util-CN8om2rz.cjs +386 -0
- package/dist/src/{util-DN0-b81k.js → util-CoQWM76y.js} +28 -8
- package/dist/src/util-DNl96nNs.js +327 -0
- package/dist/src/{util-BQOCAHQC.js → util-DURocbYR.js} +46 -11
- package/dist/src/util-Df8YMvS1.js +327 -0
- package/dist/src/{util-BVXcTwXu.js → util-DiQ3QvBB.js} +28 -8
- package/dist/src/{util-3pBZZb_H.js → util-I-Rf-KaD.js} +45 -10
- package/dist/src/{util-Dpmm_dAI.cjs → util-IYzs5Y04.cjs} +33 -7
- package/dist/src/{util-BlFVL0UF.js → util-LKTmNsMQ.js} +9 -7
- package/dist/src/{util-DvpHnLt0.cjs → util-SPsvFONY.cjs} +29 -21
- package/dist/src/{util-B9CNhyac.js → util-efByNxcr.js} +9 -7
- package/dist/src/util-kDURhgJW.js +328 -0
- package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
- package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
- package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
- package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
- package/dist/src/{version-CbuBKu2U.js → version-BK20a4sw.js} +2 -2
- package/dist/src/{version-D9zu9FWB.cjs → version-BWCSaByA.cjs} +2 -2
- package/dist/src/{version-CbpiUINz.js → version-eRkNuGv8.js} +2 -2
- package/dist/src/{version-0frU0UTr.js → version-lpHV_53E.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +48 -22
- package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
- package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
- package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
- package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
- package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
- package/dist/src/cache-BRkhlH3k.cjs +0 -3
- package/dist/src/cache-BlC6aeJ0.js +0 -3
- package/dist/src/cloud-CoD5OacT.js +0 -3
- package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
- package/dist/src/codex-sdk-Danroptg.cjs +0 -2
- package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
- package/dist/src/evalResult-BBK58h2B.js +0 -3
- package/dist/src/evalResult-spPqh1G_.js +0 -2
- package/dist/src/evaluator-DgLKaZk8.js +0 -3
- package/dist/src/fetch-8viavNv8.js +0 -3
- package/dist/src/graders-C84JI-m5.js +0 -2
- package/dist/src/graders-CBbd0K0Q.cjs +0 -2
- package/dist/src/graders-CbQqpHSN.js +0 -3
- package/dist/src/graders-DS42d3ZG.js +0 -2
- package/dist/src/image-BmilRNqO.js +0 -258
- package/dist/src/image-CxJoa3aW.cjs +0 -280
- package/dist/src/image-DsGRlkh7.js +0 -257
- package/dist/src/image-a_SGUobh.js +0 -257
- package/dist/src/providers-BuyzKt7C.js +0 -2
- package/dist/src/providers-C7lNVBjX.cjs +0 -3
- package/dist/src/providers-CCE2COJi2.js +0 -2
- package/dist/src/render-7uNJ2V14.js +0 -135
- package/dist/src/render-DlscvAUJ.js +0 -135
- package/dist/src/render-eui5p5mL.js +0 -136
- package/dist/src/render-tG6ir9_g.cjs +0 -165
- package/dist/src/rubyUtils-4hjGxvju.js +0 -3
- package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
- package/dist/src/server-ByiF3qlg.js +0 -386
- package/dist/src/server-C0XKRNB_.cjs +0 -2
- package/dist/src/server-C_15p79-.js +0 -3
- package/dist/src/store-2OXm_eBY.js +0 -240
- package/dist/src/store-BELqNwvz.js +0 -3
- package/dist/src/store-uQZ4AjPe.cjs +0 -2
- package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
- package/dist/src/telemetry-ZdPZc0fm.js +0 -3
- package/dist/src/transform-BQt0BeAW.js +0 -3
- package/dist/src/transform-Bq5oqC0s.cjs +0 -2
- package/dist/src/transform-C9izGX54.cjs +0 -228
- package/dist/src/transform-CwbAZ84V.js +0 -216
- package/dist/src/transform-DzCF-wqV.js +0 -213
- package/dist/src/transform-eGiUAv86.js +0 -216
package/dist/src/index.cjs
CHANGED
|
@@ -2,78 +2,93 @@ Object.defineProperties(exports, {
|
|
|
2
2
|
__esModule: { value: true },
|
|
3
3
|
[Symbol.toStringTag]: { value: "Module" }
|
|
4
4
|
});
|
|
5
|
-
const
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const
|
|
13
|
-
const
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
const
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
-
const
|
|
24
|
-
const
|
|
25
|
-
const
|
|
26
|
-
const
|
|
27
|
-
const
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
const
|
|
31
|
-
const
|
|
32
|
-
const
|
|
33
|
-
const
|
|
34
|
-
const
|
|
35
|
-
const
|
|
5
|
+
const require_rolldown_runtime = require("./rolldown-runtime-D_mwlA32.cjs");
|
|
6
|
+
const require_logger = require("./logger-cfNpzI4o.cjs");
|
|
7
|
+
const require_invariant = require("./invariant-QtnLD03y.cjs");
|
|
8
|
+
const require_types = require("./types-CxJvaY2S.cjs");
|
|
9
|
+
const require_fetch = require("./fetch-Dw4XZHjj.cjs");
|
|
10
|
+
const require_version = require("./version-BWCSaByA.cjs");
|
|
11
|
+
const require_fileExtensions = require("./fileExtensions-BhdwzYaD.cjs");
|
|
12
|
+
const require_accounts = require("./accounts-CmWzeD2d.cjs");
|
|
13
|
+
const require_esm = require("./esm-BIKakvNa.cjs");
|
|
14
|
+
const require_render = require("./render-BNTrbmBw.cjs");
|
|
15
|
+
const require_providerRegistry = require("./providerRegistry-CZO_w7ue.cjs");
|
|
16
|
+
const require_remoteGeneration = require("./remoteGeneration-DS9N3pgB.cjs");
|
|
17
|
+
const require_server = require("./server-B8rqV126.cjs");
|
|
18
|
+
const require_storage = require("./storage-CA-v9V2v.cjs");
|
|
19
|
+
const require_pythonUtils = require("./pythonUtils-Cokhluq3.cjs");
|
|
20
|
+
const require_util = require("./util-SPsvFONY.cjs");
|
|
21
|
+
const require_createHash = require("./createHash-CSiqnK5P.cjs");
|
|
22
|
+
const require_cache = require("./cache-CPGUA4Yl.cjs");
|
|
23
|
+
const require_chat = require("./chat-DxTDQ83C.cjs");
|
|
24
|
+
const require_transform = require("./transform-DhNkAUs8.cjs");
|
|
25
|
+
const require_util$1 = require("./util-BSIuSLVK.cjs");
|
|
26
|
+
const require_providers = require("./providers-BDVVIQM6.cjs");
|
|
27
|
+
const require_tokenUsageUtils = require("./tokenUsageUtils-_B-P8IAi.cjs");
|
|
28
|
+
const require_text = require("./text-nywWsRBM.cjs");
|
|
29
|
+
const require_telemetry = require("./telemetry-CJ7FnCsc.cjs");
|
|
30
|
+
const require_tables = require("./tables-DBIJU0WE.cjs");
|
|
31
|
+
const require_blobs = require("./blobs-B0977K1O.cjs");
|
|
32
|
+
const require_processShim = require("./processShim-DSY9BV2T.cjs");
|
|
33
|
+
const require_packageParser = require("./packageParser-QoCS1FMl.cjs");
|
|
34
|
+
const require_rubyUtils = require("./rubyUtils-CqUWBZAt.cjs");
|
|
35
|
+
const require_inputVariables = require("./inputVariables-Dq9W-Z3a.cjs");
|
|
36
|
+
const require_util$2 = require("./util-CN8om2rz.cjs");
|
|
37
|
+
const require_promptfoo = require("./promptfoo-b-baRMj-.cjs");
|
|
38
|
+
const require_graders = require("./graders-ClrU2fnd.cjs");
|
|
39
|
+
const require_shared = require("./shared-WkgnDkcg.cjs");
|
|
40
|
+
const require_utils = require("./utils-Ve6kuJsa.cjs");
|
|
41
|
+
const require_transform$1 = require("./transform-BnSXWmU_2.cjs");
|
|
42
|
+
const require_store = require("./store-B2NDDooM.cjs");
|
|
43
|
+
const require_extractor = require("./extractor-BdxEtt3J.cjs");
|
|
44
|
+
const require_promptLength = require("./promptLength-BbBbDHNj.cjs");
|
|
45
|
+
const require_indirectWebPwn = require("./indirectWebPwn-BJ22AbQa.cjs");
|
|
46
|
+
const require_toolAttributes = require("./toolAttributes-BAjwcBf0.cjs");
|
|
47
|
+
const require_evaluatorHelpers = require("./evaluatorHelpers-CRqTvSux.cjs");
|
|
48
|
+
const require_evalResult = require("./evalResult-CvtS8h8u.cjs");
|
|
49
|
+
const require_strategies = require("./strategies-7CS3Alao.cjs");
|
|
36
50
|
let fs = require("fs");
|
|
37
|
-
fs =
|
|
51
|
+
fs = require_rolldown_runtime.__toESM(fs, 1);
|
|
38
52
|
let path = require("path");
|
|
39
|
-
path =
|
|
53
|
+
path = require_rolldown_runtime.__toESM(path, 1);
|
|
40
54
|
let async = require("async");
|
|
41
|
-
async =
|
|
55
|
+
async = require_rolldown_runtime.__toESM(async, 1);
|
|
42
56
|
let js_yaml = require("js-yaml");
|
|
43
|
-
js_yaml =
|
|
57
|
+
js_yaml = require_rolldown_runtime.__toESM(js_yaml, 1);
|
|
44
58
|
let node_async_hooks = require("node:async_hooks");
|
|
45
59
|
let chalk = require("chalk");
|
|
46
|
-
chalk =
|
|
60
|
+
chalk = require_rolldown_runtime.__toESM(chalk, 1);
|
|
47
61
|
let os = require("os");
|
|
48
|
-
os =
|
|
62
|
+
os = require_rolldown_runtime.__toESM(os, 1);
|
|
49
63
|
let dedent = require("dedent");
|
|
50
|
-
dedent =
|
|
64
|
+
dedent = require_rolldown_runtime.__toESM(dedent, 1);
|
|
51
65
|
let zod = require("zod");
|
|
52
66
|
let fs_promises = require("fs/promises");
|
|
53
|
-
fs_promises =
|
|
67
|
+
fs_promises = require_rolldown_runtime.__toESM(fs_promises, 1);
|
|
54
68
|
let util = require("util");
|
|
55
|
-
util =
|
|
69
|
+
util = require_rolldown_runtime.__toESM(util, 1);
|
|
56
70
|
let _inquirer_input = require("@inquirer/input");
|
|
57
|
-
_inquirer_input =
|
|
71
|
+
_inquirer_input = require_rolldown_runtime.__toESM(_inquirer_input, 1);
|
|
58
72
|
require("node:path");
|
|
59
73
|
require("node:url");
|
|
60
74
|
let crypto$1 = require("crypto");
|
|
61
|
-
crypto$1 =
|
|
75
|
+
crypto$1 = require_rolldown_runtime.__toESM(crypto$1, 1);
|
|
62
76
|
let _opentelemetry_api = require("@opentelemetry/api");
|
|
63
77
|
let readline = require("readline");
|
|
64
|
-
readline =
|
|
78
|
+
readline = require_rolldown_runtime.__toESM(readline, 1);
|
|
65
79
|
let csv_parse_sync = require("csv-parse/sync");
|
|
66
80
|
let glob = require("glob");
|
|
67
|
-
let drizzle_orm = require("drizzle-orm");
|
|
68
81
|
let fast_xml_parser = require("fast-xml-parser");
|
|
69
|
-
let
|
|
70
|
-
cli_progress = require_logger.__toESM(cli_progress);
|
|
82
|
+
let drizzle_orm = require("drizzle-orm");
|
|
71
83
|
let url = require("url");
|
|
72
84
|
let parse5 = require("parse5");
|
|
73
85
|
let fastest_levenshtein = require("fastest-levenshtein");
|
|
86
|
+
let cli_progress = require("cli-progress");
|
|
87
|
+
cli_progress = require_rolldown_runtime.__toESM(cli_progress, 1);
|
|
74
88
|
let js_rouge = require("js-rouge");
|
|
75
|
-
js_rouge =
|
|
89
|
+
js_rouge = require_rolldown_runtime.__toESM(js_rouge, 1);
|
|
76
90
|
let node_util = require("node:util");
|
|
91
|
+
let lru_cache = require("lru-cache");
|
|
77
92
|
require("debounce");
|
|
78
93
|
let _opentelemetry_core = require("@opentelemetry/core");
|
|
79
94
|
let _opentelemetry_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otlp-http");
|
|
@@ -82,15 +97,15 @@ let _opentelemetry_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
|
|
|
82
97
|
let _opentelemetry_semantic_conventions = require("@opentelemetry/semantic-conventions");
|
|
83
98
|
let drizzle_orm_better_sqlite3_migrator = require("drizzle-orm/better-sqlite3/migrator");
|
|
84
99
|
let process$1 = require("process");
|
|
85
|
-
process$1 =
|
|
100
|
+
process$1 = require_rolldown_runtime.__toESM(process$1, 1);
|
|
86
101
|
let _apidevtools_json_schema_ref_parser = require("@apidevtools/json-schema-ref-parser");
|
|
87
|
-
_apidevtools_json_schema_ref_parser =
|
|
102
|
+
_apidevtools_json_schema_ref_parser = require_rolldown_runtime.__toESM(_apidevtools_json_schema_ref_parser, 1);
|
|
88
103
|
let cli_table3 = require("cli-table3");
|
|
89
|
-
cli_table3 =
|
|
104
|
+
cli_table3 = require_rolldown_runtime.__toESM(cli_table3, 1);
|
|
90
105
|
let chokidar = require("chokidar");
|
|
91
|
-
chokidar =
|
|
106
|
+
chokidar = require_rolldown_runtime.__toESM(chokidar, 1);
|
|
92
107
|
let ora = require("ora");
|
|
93
|
-
ora =
|
|
108
|
+
ora = require_rolldown_runtime.__toESM(ora, 1);
|
|
94
109
|
require("@inquirer/confirm");
|
|
95
110
|
//#region src/external/matchers/conversationRelevancyTemplate.ts
|
|
96
111
|
var ConversationRelevancyTemplate = class {
|
|
@@ -449,17 +464,20 @@ async function resolveContext(assertion, test, output, prompt, fallbackContext,
|
|
|
449
464
|
contextValue = test.vars.context;
|
|
450
465
|
}
|
|
451
466
|
} else if (fallbackContext) contextValue = fallbackContext;
|
|
452
|
-
if (assertion.contextTransform)
|
|
453
|
-
const
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
467
|
+
if (assertion.contextTransform) {
|
|
468
|
+
const getLabel = () => require_transform$1.getTransformLabel(assertion.contextTransform);
|
|
469
|
+
try {
|
|
470
|
+
const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
|
|
471
|
+
const transformed = await require_transform$1.transform(assertion.contextTransform, outputForTransform, {
|
|
472
|
+
vars: test.vars,
|
|
473
|
+
prompt: { label: prompt },
|
|
474
|
+
...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
|
|
475
|
+
});
|
|
476
|
+
require_invariant.invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
|
|
477
|
+
contextValue = transformed;
|
|
478
|
+
} catch (error) {
|
|
479
|
+
throw new Error(`Failed to transform context using expression '${getLabel()}': ${require_transform$1.getTransformErrorMessage(error)}`);
|
|
480
|
+
}
|
|
463
481
|
}
|
|
464
482
|
require_invariant.invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
|
|
465
483
|
return contextValue;
|
|
@@ -740,7 +758,7 @@ async function calculateProviderSimilarity(finalProvider, expected, output, metr
|
|
|
740
758
|
return calculateSimilarityScore(expectedEmbedding.embedding, outputEmbedding.embedding, metric, tokensUsed);
|
|
741
759
|
}
|
|
742
760
|
async function matchesSimilarity(expected, output, threshold, inverse = false, grading, metric = "cosine") {
|
|
743
|
-
if (metric === "cosine" && require_logger.state.config?.redteam &&
|
|
761
|
+
if (metric === "cosine" && require_logger.state.config?.redteam && require_remoteGeneration.shouldGenerateRemote({ requireEmbeddingProvider: true })) try {
|
|
744
762
|
return await require_graders.doRemoteGrading({
|
|
745
763
|
task: "similar",
|
|
746
764
|
expected,
|
|
@@ -802,7 +820,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
802
820
|
require_telemetry.telemetry.record("feature_used", { feature: "tracing" });
|
|
803
821
|
try {
|
|
804
822
|
require_logger.logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
805
|
-
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
823
|
+
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-CvJdBGSc.cjs"));
|
|
806
824
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
807
825
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
808
826
|
const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
|
|
@@ -826,7 +844,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
826
844
|
async function stopOtlpReceiverIfNeeded() {
|
|
827
845
|
if (otlpReceiverStarted) try {
|
|
828
846
|
require_logger.logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
829
|
-
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
847
|
+
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-CvJdBGSc.cjs"));
|
|
830
848
|
await stopOTLPReceiver();
|
|
831
849
|
otlpReceiverStarted = false;
|
|
832
850
|
require_logger.logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -861,7 +879,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
861
879
|
}
|
|
862
880
|
if (!tracingEnabled) return null;
|
|
863
881
|
require_logger.logger.debug("[EvaluatorTracing] Importing trace store");
|
|
864
|
-
const { getTraceStore } = await Promise.resolve().then(() => require("./store-
|
|
882
|
+
const { getTraceStore } = await Promise.resolve().then(() => require("./store-B2NDDooM.cjs")).then((n) => n.store_exports);
|
|
865
883
|
const traceStore = getTraceStore();
|
|
866
884
|
const traceId = generateTraceId();
|
|
867
885
|
const spanId = generateSpanId();
|
|
@@ -1493,27 +1511,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
|
|
|
1493
1511
|
};
|
|
1494
1512
|
//#endregion
|
|
1495
1513
|
//#region src/assertions/geval.ts
|
|
1496
|
-
const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1497
|
-
require_invariant.invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
|
|
1514
|
+
const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1515
|
+
require_invariant.invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
|
|
1498
1516
|
const threshold = assertion.threshold ?? .7;
|
|
1499
1517
|
if (Array.isArray(renderedValue)) {
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1518
|
+
if (renderedValue.length === 0) return {
|
|
1519
|
+
assertion,
|
|
1520
|
+
pass: false,
|
|
1521
|
+
score: 0,
|
|
1522
|
+
reason: "G-Eval assertion requires at least one criterion string in the value array."
|
|
1523
|
+
};
|
|
1524
|
+
const responses = [];
|
|
1525
|
+
let failure;
|
|
1526
|
+
for (const [index, value] of renderedValue.entries()) {
|
|
1503
1527
|
const resp = await require_graders.matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1504
|
-
|
|
1505
|
-
|
|
1528
|
+
responses.push(resp);
|
|
1529
|
+
if (require_graders.isGraderFailure(resp)) {
|
|
1530
|
+
failure = {
|
|
1531
|
+
index,
|
|
1532
|
+
resp
|
|
1533
|
+
};
|
|
1534
|
+
break;
|
|
1535
|
+
}
|
|
1506
1536
|
}
|
|
1507
|
-
const
|
|
1537
|
+
const tokensUsed = require_tokenUsageUtils.createEmptyTokenUsage();
|
|
1538
|
+
for (const r of responses) require_tokenUsageUtils.accumulateTokenUsage(tokensUsed, r.tokensUsed);
|
|
1539
|
+
if (failure) {
|
|
1540
|
+
const criterion = renderedValue[failure.index];
|
|
1541
|
+
return {
|
|
1542
|
+
assertion,
|
|
1543
|
+
pass: false,
|
|
1544
|
+
score: 0,
|
|
1545
|
+
reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
|
|
1546
|
+
tokensUsed,
|
|
1547
|
+
metadata: failure.resp.metadata
|
|
1548
|
+
};
|
|
1549
|
+
}
|
|
1550
|
+
const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
|
|
1551
|
+
const combinedReason = responses.map((r) => r.reason).join("\n\n");
|
|
1508
1552
|
return {
|
|
1509
1553
|
assertion,
|
|
1510
|
-
pass:
|
|
1511
|
-
score:
|
|
1512
|
-
reason:
|
|
1554
|
+
pass: averageScore >= threshold !== inverse,
|
|
1555
|
+
score: inverse ? 1 - averageScore : averageScore,
|
|
1556
|
+
reason: combinedReason,
|
|
1557
|
+
tokensUsed
|
|
1513
1558
|
};
|
|
1514
|
-
}
|
|
1559
|
+
}
|
|
1560
|
+
const resp = await require_graders.matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1561
|
+
if (require_graders.isGraderFailure(resp)) return {
|
|
1562
|
+
assertion,
|
|
1563
|
+
pass: false,
|
|
1564
|
+
score: 0,
|
|
1565
|
+
reason: resp.reason,
|
|
1566
|
+
tokensUsed: resp.tokensUsed,
|
|
1567
|
+
metadata: resp.metadata
|
|
1568
|
+
};
|
|
1569
|
+
const passed = resp.score >= threshold !== inverse;
|
|
1570
|
+
return {
|
|
1515
1571
|
assertion,
|
|
1516
|
-
...
|
|
1572
|
+
...resp,
|
|
1573
|
+
pass: passed,
|
|
1574
|
+
score: inverse ? 1 - resp.score : resp.score
|
|
1517
1575
|
};
|
|
1518
1576
|
};
|
|
1519
1577
|
//#endregion
|
|
@@ -2024,7 +2082,7 @@ const handleJavascript = async ({ assertion, renderedValue, valueFromScript, ass
|
|
|
2024
2082
|
let result;
|
|
2025
2083
|
if (typeof valueFromScript === "undefined") {
|
|
2026
2084
|
const functionBody = renderedValue.includes("\n") ? renderedValue : buildFunctionBody(renderedValue);
|
|
2027
|
-
result = await validateResult(new Function("output", "context", "process", functionBody)(output, assertionValueContext,
|
|
2085
|
+
result = await validateResult(new Function("output", "context", "process", functionBody)(output, assertionValueContext, require_processShim.getProcessShim()));
|
|
2028
2086
|
} else {
|
|
2029
2087
|
require_invariant.invariant(typeof valueFromScript === "boolean" || typeof valueFromScript === "number" || typeof valueFromScript === "object", `Javascript assertion script must return a boolean, number, or object (${assertion.value})`);
|
|
2030
2088
|
result = await validateResult(valueFromScript);
|
|
@@ -2516,45 +2574,6 @@ function matchesPattern(spanName, pattern) {
|
|
|
2516
2574
|
}
|
|
2517
2575
|
//#endregion
|
|
2518
2576
|
//#region src/assertions/trajectoryUtils.ts
|
|
2519
|
-
const TOOL_ATTRIBUTE_KEYS = [
|
|
2520
|
-
"tool.name",
|
|
2521
|
-
"tool_name",
|
|
2522
|
-
"tool",
|
|
2523
|
-
"function.name",
|
|
2524
|
-
"function_name",
|
|
2525
|
-
"gen_ai.tool.name",
|
|
2526
|
-
"codex.mcp.tool",
|
|
2527
|
-
"agent.tool",
|
|
2528
|
-
"agent.tool_name",
|
|
2529
|
-
"agent.toolName"
|
|
2530
|
-
];
|
|
2531
|
-
const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
|
|
2532
|
-
"tool.arguments",
|
|
2533
|
-
"tool.args",
|
|
2534
|
-
"tool.input",
|
|
2535
|
-
"tool_arguments",
|
|
2536
|
-
"tool_args",
|
|
2537
|
-
"tool_input",
|
|
2538
|
-
"function.arguments",
|
|
2539
|
-
"function.args",
|
|
2540
|
-
"function.input",
|
|
2541
|
-
"function_arguments",
|
|
2542
|
-
"function_args",
|
|
2543
|
-
"gen_ai.tool.arguments",
|
|
2544
|
-
"gen_ai.tool.args",
|
|
2545
|
-
"gen_ai.tool.input",
|
|
2546
|
-
"gen_ai.tool.call.arguments",
|
|
2547
|
-
"gen_ai.tool.call.args",
|
|
2548
|
-
"agent.tool.arguments",
|
|
2549
|
-
"agent.tool.args",
|
|
2550
|
-
"agent.tool.input",
|
|
2551
|
-
"codex.mcp.arguments",
|
|
2552
|
-
"codex.mcp.args",
|
|
2553
|
-
"codex.mcp.input",
|
|
2554
|
-
"arguments",
|
|
2555
|
-
"args",
|
|
2556
|
-
"input"
|
|
2557
|
-
];
|
|
2558
2577
|
const COMMAND_ATTRIBUTE_KEYS = [
|
|
2559
2578
|
"codex.command",
|
|
2560
2579
|
"command",
|
|
@@ -2567,16 +2586,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
|
|
|
2567
2586
|
"search_query"
|
|
2568
2587
|
];
|
|
2569
2588
|
const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
|
|
2589
|
+
const COMMAND_TOOL_NAMES = new Set([
|
|
2590
|
+
"exec_command",
|
|
2591
|
+
"local_shell",
|
|
2592
|
+
"shell"
|
|
2593
|
+
]);
|
|
2570
2594
|
const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
|
|
2571
2595
|
const MAX_JUDGE_SUMMARY_STEPS = 24;
|
|
2572
2596
|
const JUDGE_SUMMARY_HEAD_STEPS = 12;
|
|
2573
2597
|
const JUDGE_SUMMARY_TAIL_STEPS = 12;
|
|
2574
|
-
function getStringAttribute(attributes, keys) {
|
|
2575
|
-
for (const key of keys) {
|
|
2576
|
-
const value = attributes[key];
|
|
2577
|
-
if (typeof value === "string" && value.trim()) return value.trim();
|
|
2578
|
-
}
|
|
2579
|
-
}
|
|
2580
2598
|
function normalizeStructuredAttribute(value) {
|
|
2581
2599
|
if (value === void 0 || value === null) return;
|
|
2582
2600
|
if (typeof value === "string") {
|
|
@@ -2608,9 +2626,12 @@ function getTrajectoryStepStatus(step) {
|
|
|
2608
2626
|
function getCommandExecutable(command) {
|
|
2609
2627
|
return command.trim().split(/\s+/)[0] || void 0;
|
|
2610
2628
|
}
|
|
2629
|
+
function isCommandToolName(toolName) {
|
|
2630
|
+
return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
|
|
2631
|
+
}
|
|
2611
2632
|
function extractToolName(span) {
|
|
2612
2633
|
const attributes = span.attributes || {};
|
|
2613
|
-
const directMatch =
|
|
2634
|
+
const directMatch = require_toolAttributes.getToolNameFromAttributes(attributes);
|
|
2614
2635
|
if (directMatch) return directMatch;
|
|
2615
2636
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2616
2637
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
@@ -2624,7 +2645,7 @@ function extractToolName(span) {
|
|
|
2624
2645
|
}
|
|
2625
2646
|
function extractToolArgs(span) {
|
|
2626
2647
|
const attributes = span.attributes || {};
|
|
2627
|
-
for (const key of TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
|
|
2648
|
+
for (const key of require_toolAttributes.TOOL_ARGUMENT_ATTRIBUTE_KEYS) {
|
|
2628
2649
|
const value = normalizeStructuredAttribute(attributes[key]);
|
|
2629
2650
|
if (value !== void 0) return value;
|
|
2630
2651
|
}
|
|
@@ -2635,21 +2656,31 @@ function extractToolArgs(span) {
|
|
|
2635
2656
|
if (value !== void 0) return value;
|
|
2636
2657
|
}
|
|
2637
2658
|
}
|
|
2638
|
-
function extractCommand(span) {
|
|
2659
|
+
function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
|
|
2639
2660
|
const attributes = span.attributes || {};
|
|
2640
|
-
const directMatch =
|
|
2661
|
+
const directMatch = require_toolAttributes.getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
|
|
2641
2662
|
if (directMatch) return directMatch;
|
|
2642
2663
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2643
2664
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
2644
2665
|
if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
|
|
2645
2666
|
}
|
|
2667
|
+
const toolArgs = getToolArgs();
|
|
2668
|
+
if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
|
|
2669
|
+
const args = toolArgs;
|
|
2670
|
+
const command = args.cmd ?? args.command;
|
|
2671
|
+
if (typeof command === "string" && command.trim()) return command.trim();
|
|
2672
|
+
if (Array.isArray(command)) {
|
|
2673
|
+
const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
|
|
2674
|
+
if (joined) return joined;
|
|
2675
|
+
}
|
|
2676
|
+
}
|
|
2646
2677
|
if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
|
|
2647
2678
|
}
|
|
2648
2679
|
function extractSearchQuery(span) {
|
|
2649
2680
|
const attributes = span.attributes || {};
|
|
2650
|
-
const directMatch =
|
|
2681
|
+
const directMatch = require_toolAttributes.getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
|
|
2651
2682
|
if (directMatch) return directMatch;
|
|
2652
|
-
const genericQuery =
|
|
2683
|
+
const genericQuery = require_toolAttributes.getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
|
|
2653
2684
|
if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
|
|
2654
2685
|
if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
|
|
2655
2686
|
}
|
|
@@ -2673,17 +2704,34 @@ function extractTrajectorySteps(trace) {
|
|
|
2673
2704
|
return left.index - right.index;
|
|
2674
2705
|
}).map(({ span }) => {
|
|
2675
2706
|
const toolName = extractToolName(span);
|
|
2676
|
-
|
|
2707
|
+
let toolArgs;
|
|
2708
|
+
let hasExtractedToolArgs = false;
|
|
2709
|
+
const getToolArgs = () => {
|
|
2710
|
+
if (!hasExtractedToolArgs) {
|
|
2711
|
+
toolArgs = extractToolArgs(span);
|
|
2712
|
+
hasExtractedToolArgs = true;
|
|
2713
|
+
}
|
|
2714
|
+
return toolArgs;
|
|
2715
|
+
};
|
|
2716
|
+
const command = extractCommand(span, toolName, getToolArgs);
|
|
2677
2717
|
const searchQuery = extractSearchQuery(span);
|
|
2678
2718
|
let type = "span";
|
|
2679
2719
|
let name = span.name;
|
|
2680
2720
|
const aliases = new Set([span.name]);
|
|
2681
2721
|
let args;
|
|
2682
|
-
if (toolName) {
|
|
2722
|
+
if (command && isCommandToolName(toolName)) {
|
|
2723
|
+
type = "command";
|
|
2724
|
+
name = command;
|
|
2725
|
+
aliases.add(command);
|
|
2726
|
+
args = getToolArgs();
|
|
2727
|
+
if (toolName) aliases.add(toolName);
|
|
2728
|
+
const executable = getCommandExecutable(command);
|
|
2729
|
+
if (executable) aliases.add(executable);
|
|
2730
|
+
} else if (toolName) {
|
|
2683
2731
|
type = "tool";
|
|
2684
2732
|
name = toolName;
|
|
2685
2733
|
aliases.add(toolName);
|
|
2686
|
-
args =
|
|
2734
|
+
args = getToolArgs();
|
|
2687
2735
|
} else if (command) {
|
|
2688
2736
|
type = "command";
|
|
2689
2737
|
name = command;
|
|
@@ -2859,7 +2907,7 @@ const handleRedteam = async ({ assertion, baseType, test, prompt, outputString,
|
|
|
2859
2907
|
if (match) evalId = match[1];
|
|
2860
2908
|
}
|
|
2861
2909
|
}
|
|
2862
|
-
const tracking = await
|
|
2910
|
+
const tracking = await require_indirectWebPwn.checkExfilTracking(webPageUuid, evalId);
|
|
2863
2911
|
if (tracking) gradingContext = {
|
|
2864
2912
|
...gradingContext,
|
|
2865
2913
|
wasExfiltrated: tracking.wasExfiltrated,
|
|
@@ -2925,7 +2973,7 @@ function handleIsRefusal(params) {
|
|
|
2925
2973
|
assertion
|
|
2926
2974
|
};
|
|
2927
2975
|
}
|
|
2928
|
-
const pass =
|
|
2976
|
+
const pass = require_util$2.isBasicRefusal(output) !== inverse;
|
|
2929
2977
|
return {
|
|
2930
2978
|
pass,
|
|
2931
2979
|
score: pass ? 1 : 0,
|
|
@@ -3870,13 +3918,13 @@ function resolveSequenceValue(value) {
|
|
|
3870
3918
|
}
|
|
3871
3919
|
throw new Error("trajectory:tool-sequence assertion must have an array or object value");
|
|
3872
3920
|
}
|
|
3873
|
-
function isRecord(value) {
|
|
3921
|
+
function isRecord$1(value) {
|
|
3874
3922
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
3875
3923
|
}
|
|
3876
3924
|
function matchesExpectedArgsPartial(actual, expected) {
|
|
3877
3925
|
if (Array.isArray(expected)) return Array.isArray(actual) && actual.length === expected.length && expected.every((item, index) => matchesExpectedArgsPartial(actual[index], item));
|
|
3878
|
-
if (isRecord(expected)) {
|
|
3879
|
-
if (!isRecord(actual)) return false;
|
|
3926
|
+
if (isRecord$1(expected)) {
|
|
3927
|
+
if (!isRecord$1(actual)) return false;
|
|
3880
3928
|
return Object.entries(expected).every(([key, expectedValue]) => Object.prototype.hasOwnProperty.call(actual, key) && matchesExpectedArgsPartial(actual[key], expectedValue));
|
|
3881
3929
|
}
|
|
3882
3930
|
return (0, node_util.isDeepStrictEqual)(actual, expected);
|
|
@@ -4233,7 +4281,7 @@ function assertionMayNeedTraceContext(assertion) {
|
|
|
4233
4281
|
if (assertionUsesTrace(assertion)) return true;
|
|
4234
4282
|
if (assertion.type === "assert-set") return assertion.assert.some(assertionMayNeedTraceContext);
|
|
4235
4283
|
if (assertion.type.startsWith("promptfoo:redteam:coding-agent:")) return true;
|
|
4236
|
-
return typeof assertion.value === "string" ? assertion.value.startsWith("file://") ||
|
|
4284
|
+
return typeof assertion.value === "string" ? assertion.value.startsWith("file://") || require_packageParser.isPackagePath(assertion.value) : false;
|
|
4237
4285
|
}
|
|
4238
4286
|
function hasTraceAwareAssertions(assertions) {
|
|
4239
4287
|
return Boolean(assertions?.some(assertionMayNeedTraceContext));
|
|
@@ -4247,7 +4295,7 @@ async function loadTraceData(traceId) {
|
|
|
4247
4295
|
let stableObservations = 0;
|
|
4248
4296
|
let latestTrace = null;
|
|
4249
4297
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
4250
|
-
latestTrace = await traceStore.getTrace(traceId);
|
|
4298
|
+
latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
|
|
4251
4299
|
const spanCount = latestTrace?.spans?.length ?? 0;
|
|
4252
4300
|
if (spanCount > 0) {
|
|
4253
4301
|
stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
|
|
@@ -4300,7 +4348,7 @@ const ASSERTION_HANDLERS = {
|
|
|
4300
4348
|
"llm-rubric": handleLlmRubric,
|
|
4301
4349
|
meteor: async (params) => {
|
|
4302
4350
|
try {
|
|
4303
|
-
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-
|
|
4351
|
+
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-CR226f7Z.cjs"));
|
|
4304
4352
|
return handleMeteorAssertion(params);
|
|
4305
4353
|
} catch (error) {
|
|
4306
4354
|
if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
|
|
@@ -4436,7 +4484,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
4436
4484
|
};
|
|
4437
4485
|
}
|
|
4438
4486
|
else if (filePath.endsWith(".rb")) try {
|
|
4439
|
-
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-
|
|
4487
|
+
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-CqUWBZAt.cjs")).then((n) => n.rubyUtils_exports);
|
|
4440
4488
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
4441
4489
|
require_logger.logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
4442
4490
|
} catch (error) {
|
|
@@ -4448,9 +4496,9 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
4448
4496
|
};
|
|
4449
4497
|
}
|
|
4450
4498
|
else renderedValue = require_graders.processFileReference(renderedValue);
|
|
4451
|
-
} else if (
|
|
4499
|
+
} else if (require_packageParser.isPackagePath(renderedValue)) {
|
|
4452
4500
|
const basePath = require_logger.state.basePath || "";
|
|
4453
|
-
const requiredModule = await
|
|
4501
|
+
const requiredModule = await require_packageParser.loadFromPackage(renderedValue, basePath);
|
|
4454
4502
|
if (typeof requiredModule !== "function") throw new Error(`Assertion malformed: ${renderedValue} must be a function. Received: ${typeof requiredModule}`);
|
|
4455
4503
|
valueFromScript = await Promise.resolve(requiredModule(output, context));
|
|
4456
4504
|
} else renderedValue = nunjucks.renderString(renderedValue, resolvedVars);
|
|
@@ -4553,7 +4601,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
|
|
|
4553
4601
|
require_logger.logger.debug(`Failed to preload trace data for assertions: ${error}`);
|
|
4554
4602
|
preloadedTraceData = null;
|
|
4555
4603
|
}
|
|
4556
|
-
|
|
4604
|
+
const concurrency = require_graders.getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
|
|
4605
|
+
await async.default.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
|
|
4557
4606
|
if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
|
|
4558
4607
|
const result = await runAssertion({
|
|
4559
4608
|
prompt,
|
|
@@ -4680,7 +4729,8 @@ var CIProgressReporter = class {
|
|
|
4680
4729
|
}
|
|
4681
4730
|
updateTotalTests(newTotal) {
|
|
4682
4731
|
this.totalTests = Math.max(newTotal, 1);
|
|
4683
|
-
|
|
4732
|
+
const percentage = Math.floor(this.completedTests / this.totalTests * 100);
|
|
4733
|
+
this.highestPercentageSeen = percentage;
|
|
4684
4734
|
}
|
|
4685
4735
|
finish() {
|
|
4686
4736
|
if (this.intervalId) {
|
|
@@ -4853,6 +4903,10 @@ function getDefaultOtelConfig() {
|
|
|
4853
4903
|
}
|
|
4854
4904
|
//#endregion
|
|
4855
4905
|
//#region src/tracing/localSpanExporter.ts
|
|
4906
|
+
const MISSING_TRACE_RETRY_DELAY_MS = 50;
|
|
4907
|
+
function delay(ms) {
|
|
4908
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
4909
|
+
}
|
|
4856
4910
|
/**
|
|
4857
4911
|
* A span exporter that writes spans to the local TraceStore (SQLite).
|
|
4858
4912
|
* This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
|
|
@@ -4894,7 +4948,7 @@ var LocalSpanExporter = class {
|
|
|
4894
4948
|
}
|
|
4895
4949
|
let firstError;
|
|
4896
4950
|
for (const [traceId, spanDataList] of spansByTrace) try {
|
|
4897
|
-
const result = await
|
|
4951
|
+
const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
|
|
4898
4952
|
if (result.stored) require_logger.logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
|
|
4899
4953
|
else require_logger.logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
|
|
4900
4954
|
} catch (error) {
|
|
@@ -4906,6 +4960,16 @@ var LocalSpanExporter = class {
|
|
|
4906
4960
|
}
|
|
4907
4961
|
return firstError;
|
|
4908
4962
|
}
|
|
4963
|
+
async addSpansWithTraceRetry(traceStore, traceId, spans) {
|
|
4964
|
+
const options = {
|
|
4965
|
+
skipTraceCheck: false,
|
|
4966
|
+
warnIfMissingTrace: false
|
|
4967
|
+
};
|
|
4968
|
+
const result = await traceStore.addSpans(traceId, spans, options);
|
|
4969
|
+
if (result.stored) return result;
|
|
4970
|
+
await delay(MISSING_TRACE_RETRY_DELAY_MS);
|
|
4971
|
+
return traceStore.addSpans(traceId, spans, options);
|
|
4972
|
+
}
|
|
4909
4973
|
/**
|
|
4910
4974
|
* Convert an OTEL ReadableSpan to our SpanData format.
|
|
4911
4975
|
*/
|
|
@@ -5188,6 +5252,15 @@ function isPromptAllowed(prompt, allowedPrompts) {
|
|
|
5188
5252
|
}
|
|
5189
5253
|
//#endregion
|
|
5190
5254
|
//#region src/evaluator.ts
|
|
5255
|
+
const CONVERSATION_VAR_NAME = "_conversation";
|
|
5256
|
+
const promptUsesConversationVariableCache = new lru_cache.LRUCache({ max: 1024 });
|
|
5257
|
+
function promptUsesConversationVariable(prompt) {
|
|
5258
|
+
const cached = promptUsesConversationVariableCache.get(prompt.raw);
|
|
5259
|
+
if (cached !== void 0) return cached;
|
|
5260
|
+
const { referenced, parsed } = require_render.analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
|
|
5261
|
+
if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
|
|
5262
|
+
return referenced;
|
|
5263
|
+
}
|
|
5191
5264
|
/**
|
|
5192
5265
|
* Manages a single progress bar for the evaluation
|
|
5193
5266
|
*/
|
|
@@ -5387,6 +5460,18 @@ function hasProviderGroupedAssertion(assertion) {
|
|
|
5387
5460
|
function shouldDeferGradingForTest(test) {
|
|
5388
5461
|
return Boolean(test.assert?.some(hasProviderGroupedAssertion));
|
|
5389
5462
|
}
|
|
5463
|
+
function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
|
|
5464
|
+
if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
|
|
5465
|
+
if (shouldGroupGradingByProvider) {
|
|
5466
|
+
require_logger.logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
|
|
5467
|
+
return;
|
|
5468
|
+
}
|
|
5469
|
+
if (concurrency !== 1) return;
|
|
5470
|
+
const reasons = [];
|
|
5471
|
+
if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
|
|
5472
|
+
if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
|
|
5473
|
+
if (reasons.length > 0) require_logger.logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
|
|
5474
|
+
}
|
|
5390
5475
|
function applyGradingResult(row, checkResult) {
|
|
5391
5476
|
if (!checkResult.pass) {
|
|
5392
5477
|
row.error = checkResult.reason;
|
|
@@ -5401,14 +5486,29 @@ function applyGradingResult(row, checkResult) {
|
|
|
5401
5486
|
if (checkResult.tokensUsed) require_tokenUsageUtils.accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
|
|
5402
5487
|
row.gradingResult = checkResult;
|
|
5403
5488
|
}
|
|
5404
|
-
|
|
5405
|
-
|
|
5406
|
-
|
|
5407
|
-
|
|
5408
|
-
|
|
5409
|
-
|
|
5410
|
-
|
|
5411
|
-
|
|
5489
|
+
const ABORTED_GRADING_PREFIX = "Aborted: ";
|
|
5490
|
+
function isAbortShapedError(error) {
|
|
5491
|
+
return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
|
|
5492
|
+
}
|
|
5493
|
+
function applyGradingError(row, error, abortSignal) {
|
|
5494
|
+
const errorAsError = error instanceof Error ? error : void 0;
|
|
5495
|
+
if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
|
|
5496
|
+
const shortMessage = errorAsError?.message ?? String(error);
|
|
5497
|
+
require_logger.logger.debug("Assertion grading aborted", {
|
|
5498
|
+
error: shortMessage,
|
|
5499
|
+
promptIdx: row.promptIdx,
|
|
5500
|
+
testIdx: row.testIdx
|
|
5501
|
+
});
|
|
5502
|
+
row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
|
|
5503
|
+
} else {
|
|
5504
|
+
const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
|
|
5505
|
+
require_logger.logger.error("Assertion grading failed during eval", {
|
|
5506
|
+
error: fullMessage,
|
|
5507
|
+
promptIdx: row.promptIdx,
|
|
5508
|
+
testIdx: row.testIdx
|
|
5509
|
+
});
|
|
5510
|
+
row.error = fullMessage;
|
|
5511
|
+
}
|
|
5412
5512
|
row.failureReason = require_types.ResultFailureReason.ERROR;
|
|
5413
5513
|
row.success = false;
|
|
5414
5514
|
row.score = 0;
|
|
@@ -5420,7 +5520,7 @@ function getNonTransientTargetStatus(row) {
|
|
|
5420
5520
|
}
|
|
5421
5521
|
function createRunEvalState({ provider, prompt, test }) {
|
|
5422
5522
|
const vars = structuredClone(test.vars || {});
|
|
5423
|
-
const fileMetadata =
|
|
5523
|
+
const fileMetadata = require_evaluatorHelpers.collectFileMetadata(vars);
|
|
5424
5524
|
const conversationKey = `${provider.label || provider.id()}:${prompt.id}${test.metadata?.conversationId ? `:${test.metadata.conversationId}` : ""}`;
|
|
5425
5525
|
const setup = createRunEvalSetup({
|
|
5426
5526
|
provider,
|
|
@@ -5440,7 +5540,7 @@ function createRunEvalState({ provider, prompt, test }) {
|
|
|
5440
5540
|
};
|
|
5441
5541
|
}
|
|
5442
5542
|
function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
|
|
5443
|
-
const usesConversation = prompt
|
|
5543
|
+
const usesConversation = promptUsesConversationVariable(prompt);
|
|
5444
5544
|
if (!require_logger.getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
|
|
5445
5545
|
}
|
|
5446
5546
|
function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
|
|
@@ -5459,8 +5559,8 @@ function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
|
|
|
5459
5559
|
};
|
|
5460
5560
|
}
|
|
5461
5561
|
async function renderRunEvalPrompt({ filters, isRedteam, provider, promptForRender, test, testSuite, vars }) {
|
|
5462
|
-
const renderedPrompt = await
|
|
5463
|
-
if (isRedteam)
|
|
5562
|
+
const renderedPrompt = await require_evaluatorHelpers.renderPrompt(promptForRender, vars, filters, provider, shouldSkipRedteamInjectVar(test, testSuite, isRedteam) ? [getRedteamInjectVar(test, promptForRender, testSuite)] : void 0);
|
|
5563
|
+
if (isRedteam) require_promptLength.throwIfTargetPromptExceedsMaxChars(renderedPrompt, testSuite?.redteam?.maxCharsPerMessage);
|
|
5464
5564
|
const setup = createRunEvalSetup({
|
|
5465
5565
|
provider,
|
|
5466
5566
|
prompt: promptForRender,
|
|
@@ -5527,7 +5627,7 @@ async function callActiveProvider({ abortSignal, evalId, filters, promptForRende
|
|
|
5527
5627
|
});
|
|
5528
5628
|
const callApiOptions = abortSignal ? { abortSignal } : void 0;
|
|
5529
5629
|
const callApi = () => activeProvider.callApi(renderedPrompt, callApiContext, callApiOptions);
|
|
5530
|
-
const response = rateLimitRegistry ? await rateLimitRegistry.execute(activeProvider, callApi,
|
|
5630
|
+
const response = rateLimitRegistry ? await rateLimitRegistry.execute(activeProvider, callApi, require_shared.createProviderRateLimitOptions()) : await callApi();
|
|
5531
5631
|
require_logger.logger.debug(`Provider response properties: ${Object.keys(response).join(", ")}`);
|
|
5532
5632
|
require_logger.logger.debug(`Provider response cached property explicitly: ${response.cached}`);
|
|
5533
5633
|
return response;
|
|
@@ -5605,7 +5705,7 @@ function createEvaluateResult({ fileMetadata, latencyMs, prompt, promptIdx, rend
|
|
|
5605
5705
|
};
|
|
5606
5706
|
if (!ret.metadata?.sessionIds && !ret.metadata?.sessionId) {
|
|
5607
5707
|
ret.metadata ??= {};
|
|
5608
|
-
ret.metadata.sessionId =
|
|
5708
|
+
ret.metadata.sessionId = require_util$2.getSessionId(response, { vars });
|
|
5609
5709
|
}
|
|
5610
5710
|
return ret;
|
|
5611
5711
|
}
|
|
@@ -5613,7 +5713,7 @@ function trackProviderUsage(provider, response) {
|
|
|
5613
5713
|
if (!response.tokenUsage) return;
|
|
5614
5714
|
const providerId = provider.id();
|
|
5615
5715
|
const trackingId = provider.constructor?.name ? `${providerId} (${provider.constructor.name})` : providerId;
|
|
5616
|
-
|
|
5716
|
+
require_shared.TokenUsageTracker.getInstance().trackUsage(trackingId, response.tokenUsage);
|
|
5617
5717
|
}
|
|
5618
5718
|
async function applyRunEvalResponseOutcome({ abortSignal, deferGrading, evalId, isRedteam, latencyMs, prompt, promptIdx, provider, providerCallQueue, rateLimitRegistry, renderedPrompt, response, ret, test, testIdx, traceContext, vars }) {
|
|
5619
5719
|
if (response.error) {
|
|
@@ -5687,7 +5787,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
|
|
|
5687
5787
|
assertScoringFunction: test.assertScoringFunction,
|
|
5688
5788
|
traceId
|
|
5689
5789
|
}).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
|
|
5690
|
-
applyGradingError(ret, error);
|
|
5790
|
+
applyGradingError(ret, error, abortSignal);
|
|
5691
5791
|
});
|
|
5692
5792
|
deferredGradingPromises.set(ret, gradingPromise);
|
|
5693
5793
|
return;
|
|
@@ -6234,7 +6334,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
|
|
|
6234
6334
|
const defaultProvider = defaultTest.provider;
|
|
6235
6335
|
if (require_types.isApiProvider(defaultProvider)) return defaultProvider;
|
|
6236
6336
|
if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
6237
|
-
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-
|
|
6337
|
+
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BDVVIQM6.cjs")).then((n) => n.providers_exports);
|
|
6238
6338
|
return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
6239
6339
|
}
|
|
6240
6340
|
return defaultProvider;
|
|
@@ -6394,7 +6494,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
|
|
|
6394
6494
|
async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
6395
6495
|
if (!require_logger.state.resume || !evalRecord.persisted) return;
|
|
6396
6496
|
try {
|
|
6397
|
-
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-
|
|
6497
|
+
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-CvtS8h8u.cjs")).then((n) => n.evalResult_exports);
|
|
6398
6498
|
const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: require_logger.state.retryMode });
|
|
6399
6499
|
const originalCount = runEvalOptions.length;
|
|
6400
6500
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -6408,14 +6508,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
|
6408
6508
|
}
|
|
6409
6509
|
}
|
|
6410
6510
|
function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
|
|
6411
|
-
const usesConversationVar = prompts.some(
|
|
6511
|
+
const usesConversationVar = prompts.some(promptUsesConversationVariable);
|
|
6412
6512
|
if (concurrency <= 1) return {
|
|
6413
6513
|
concurrency,
|
|
6414
6514
|
usesConversationVar
|
|
6415
6515
|
};
|
|
6416
6516
|
const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
|
|
6417
6517
|
if (usesConversationVar) {
|
|
6418
|
-
require_logger.logger.info(`Setting concurrency to 1 because the ${chalk.default.cyan(
|
|
6518
|
+
require_logger.logger.info(`Setting concurrency to 1 because the ${chalk.default.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
|
|
6419
6519
|
return {
|
|
6420
6520
|
concurrency: 1,
|
|
6421
6521
|
usesConversationVar
|
|
@@ -6645,8 +6745,9 @@ var Evaluator = class {
|
|
|
6645
6745
|
};
|
|
6646
6746
|
this.conversations = {};
|
|
6647
6747
|
this.registers = {};
|
|
6648
|
-
|
|
6649
|
-
this.
|
|
6748
|
+
const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
|
|
6749
|
+
this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
|
|
6750
|
+
this.rateLimitRegistry = require_shared.createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
|
|
6650
6751
|
this.rateLimitRegistry.on("ratelimit:hit", (data) => {
|
|
6651
6752
|
require_logger.logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
|
|
6652
6753
|
retryAfterMs: data.retryAfterMs,
|
|
@@ -6672,7 +6773,7 @@ var Evaluator = class {
|
|
|
6672
6773
|
current: data.current
|
|
6673
6774
|
});
|
|
6674
6775
|
});
|
|
6675
|
-
|
|
6776
|
+
require_shared.redteamProviderManager.setRateLimitRegistry(this.rateLimitRegistry);
|
|
6676
6777
|
}
|
|
6677
6778
|
/**
|
|
6678
6779
|
* Updates metrics and stats after a comparison assertion (select-best or max-score).
|
|
@@ -6750,7 +6851,7 @@ var Evaluator = class {
|
|
|
6750
6851
|
});
|
|
6751
6852
|
}
|
|
6752
6853
|
async runEvalStepAfterBeforeEach(evalStep, { deferGrading, onRowsReady, providerCallQueue, testSuite }) {
|
|
6753
|
-
evalStep.test = (await
|
|
6854
|
+
evalStep.test = (await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "beforeEach", { test: evalStep.test })).test;
|
|
6754
6855
|
const rows = await runEvalInternal({
|
|
6755
6856
|
...evalStep,
|
|
6756
6857
|
deferGrading,
|
|
@@ -6765,6 +6866,25 @@ var Evaluator = class {
|
|
|
6765
6866
|
this.trackCompletedRow(evalStep, row, context);
|
|
6766
6867
|
context.numComplete++;
|
|
6767
6868
|
const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
|
|
6869
|
+
if (context.testSuite.extensions?.length) try {
|
|
6870
|
+
const afterEachOut = await require_evaluatorHelpers.runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6871
|
+
test: evalStep.test,
|
|
6872
|
+
result: {
|
|
6873
|
+
...row,
|
|
6874
|
+
namedScores: { ...row.namedScores },
|
|
6875
|
+
metadata: { ...row.metadata },
|
|
6876
|
+
response: row.response ? {
|
|
6877
|
+
...row.response,
|
|
6878
|
+
metadata: { ...row.response.metadata }
|
|
6879
|
+
} : row.response
|
|
6880
|
+
}
|
|
6881
|
+
});
|
|
6882
|
+
row.namedScores = require_evaluatorHelpers.filterFiniteScores(afterEachOut.result.namedScores);
|
|
6883
|
+
row.metadata = afterEachOut.result.metadata;
|
|
6884
|
+
if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
|
|
6885
|
+
} catch (error) {
|
|
6886
|
+
require_logger.logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
|
|
6887
|
+
}
|
|
6768
6888
|
await this.persistEvalRow(row);
|
|
6769
6889
|
if (this.abortIfTargetUnavailable(row, context)) break;
|
|
6770
6890
|
const metrics = context.prompts[row.promptIdx].metrics;
|
|
@@ -6776,10 +6896,6 @@ var Evaluator = class {
|
|
|
6776
6896
|
promptEvalCount,
|
|
6777
6897
|
row
|
|
6778
6898
|
});
|
|
6779
|
-
await require_providers.runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6780
|
-
test: evalStep.test,
|
|
6781
|
-
result: row
|
|
6782
|
-
});
|
|
6783
6899
|
context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
|
|
6784
6900
|
}
|
|
6785
6901
|
}
|
|
@@ -6934,7 +7050,15 @@ var Evaluator = class {
|
|
|
6934
7050
|
})) break;
|
|
6935
7051
|
}
|
|
6936
7052
|
} catch (error) {
|
|
6937
|
-
|
|
7053
|
+
const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
|
|
7054
|
+
try {
|
|
7055
|
+
await flushGroupedRows();
|
|
7056
|
+
} catch (flushError) {
|
|
7057
|
+
require_logger.logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
|
|
7058
|
+
error: flushError instanceof Error ? flushError.message : String(flushError),
|
|
7059
|
+
pendingRowCount
|
|
7060
|
+
});
|
|
7061
|
+
}
|
|
6938
7062
|
throw error;
|
|
6939
7063
|
}
|
|
6940
7064
|
await flushGroupedRows();
|
|
@@ -7204,7 +7328,7 @@ var Evaluator = class {
|
|
|
7204
7328
|
async runAfterAllExtensions(testSuite) {
|
|
7205
7329
|
if (!testSuite.extensions?.length) return;
|
|
7206
7330
|
const resultsForExtension = (await this.evalRecord.getResults()).map((result) => "toEvaluateResult" in result ? result.toEvaluateResult() : result);
|
|
7207
|
-
await
|
|
7331
|
+
await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "afterAll", {
|
|
7208
7332
|
prompts: this.evalRecord.prompts,
|
|
7209
7333
|
results: resultsForExtension,
|
|
7210
7334
|
suite: testSuite,
|
|
@@ -7290,7 +7414,7 @@ var Evaluator = class {
|
|
|
7290
7414
|
const rowsWithSelectBestAssertion = /* @__PURE__ */ new Set();
|
|
7291
7415
|
const rowsWithMaxScoreAssertion = /* @__PURE__ */ new Set();
|
|
7292
7416
|
ensureDefaultTestForExtensions(testSuite);
|
|
7293
|
-
testSuite = (await
|
|
7417
|
+
testSuite = (await require_evaluatorHelpers.runExtensionHook(testSuite.extensions, "beforeAll", { suite: testSuite })).suite;
|
|
7294
7418
|
if (!await maybeAddGeneratedPrompts(testSuite, options)) return this.evalRecord;
|
|
7295
7419
|
prompts.push(...buildCompletedPrompts(testSuite, this.evalRecord));
|
|
7296
7420
|
const promptIndexMap = buildPromptIndexMap(prompts);
|
|
@@ -7370,6 +7494,13 @@ var Evaluator = class {
|
|
|
7370
7494
|
if (!this.options.silent) {
|
|
7371
7495
|
if (serialRunEvalOptions.length > 0) require_logger.logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
|
|
7372
7496
|
if (concurrentRunEvalOptions.length > 0) require_logger.logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
|
|
7497
|
+
logGroupedGradingStatus({
|
|
7498
|
+
concurrency,
|
|
7499
|
+
hasEvalStepTimeout,
|
|
7500
|
+
runEvalOptions,
|
|
7501
|
+
shouldGroupGradingByProvider,
|
|
7502
|
+
usesConversationVar
|
|
7503
|
+
});
|
|
7373
7504
|
}
|
|
7374
7505
|
if (this.options.showProgressBar && progressBarManager) {
|
|
7375
7506
|
await progressBarManager.initialize(runEvalOptions, concurrency, 0);
|
|
@@ -7461,7 +7592,7 @@ var Evaluator = class {
|
|
|
7461
7592
|
});
|
|
7462
7593
|
}
|
|
7463
7594
|
this.rateLimitRegistry?.dispose();
|
|
7464
|
-
|
|
7595
|
+
require_shared.redteamProviderManager.setRateLimitRegistry(void 0);
|
|
7465
7596
|
require_logger.state.maxConcurrency = void 0;
|
|
7466
7597
|
}
|
|
7467
7598
|
}
|
|
@@ -8369,7 +8500,7 @@ var Eval = class Eval {
|
|
|
8369
8500
|
const evalInstance = new Eval(eval_.config, {
|
|
8370
8501
|
id: eval_.id,
|
|
8371
8502
|
createdAt: new Date(eval_.createdAt),
|
|
8372
|
-
author: eval_.author
|
|
8503
|
+
author: eval_.author,
|
|
8373
8504
|
description: eval_.description || void 0,
|
|
8374
8505
|
prompts: eval_.prompts || [],
|
|
8375
8506
|
datasetId,
|
|
@@ -8392,7 +8523,7 @@ var Eval = class Eval {
|
|
|
8392
8523
|
return (await require_tables.getDb().select().from(require_tables.evalsTable).limit(limit).orderBy((0, drizzle_orm.desc)(require_tables.evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
|
|
8393
8524
|
id: e.id,
|
|
8394
8525
|
createdAt: new Date(e.createdAt),
|
|
8395
|
-
author: e.author
|
|
8526
|
+
author: e.author,
|
|
8396
8527
|
description: e.description || void 0,
|
|
8397
8528
|
prompts: e.prompts || [],
|
|
8398
8529
|
persisted: true
|
|
@@ -8407,7 +8538,7 @@ var Eval = class Eval {
|
|
|
8407
8538
|
return (await require_tables.getDb().select().from(require_tables.evalsTable).orderBy((0, drizzle_orm.desc)(require_tables.evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
|
|
8408
8539
|
id: e.id,
|
|
8409
8540
|
createdAt: new Date(e.createdAt),
|
|
8410
|
-
author: e.author
|
|
8541
|
+
author: e.author,
|
|
8411
8542
|
description: e.description || void 0,
|
|
8412
8543
|
prompts: e.prompts || [],
|
|
8413
8544
|
persisted: true
|
|
@@ -8422,7 +8553,7 @@ var Eval = class Eval {
|
|
|
8422
8553
|
static async create(config, renderedPrompts, opts) {
|
|
8423
8554
|
const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
|
|
8424
8555
|
const evalId = opts?.id || createEvalId(createdAt);
|
|
8425
|
-
const author = opts
|
|
8556
|
+
const author = opts && "author" in opts ? opts.author ?? null : require_accounts.getAuthor();
|
|
8426
8557
|
const db = require_tables.getDb();
|
|
8427
8558
|
const datasetId = require_createHash.sha256(JSON.stringify(config.tests || []));
|
|
8428
8559
|
db.transaction(() => {
|
|
@@ -8484,7 +8615,7 @@ var Eval = class Eval {
|
|
|
8484
8615
|
});
|
|
8485
8616
|
return new Eval(config, {
|
|
8486
8617
|
id: evalId,
|
|
8487
|
-
author
|
|
8618
|
+
author,
|
|
8488
8619
|
createdAt,
|
|
8489
8620
|
persisted: true,
|
|
8490
8621
|
runtimeOptions: sanitizeRuntimeOptions(opts?.runtimeOptions)
|
|
@@ -8494,7 +8625,7 @@ var Eval = class Eval {
|
|
|
8494
8625
|
const createdAt = opts?.createdAt || /* @__PURE__ */ new Date();
|
|
8495
8626
|
this.createdAt = createdAt.getTime();
|
|
8496
8627
|
this.id = opts?.id || createEvalId(createdAt);
|
|
8497
|
-
this.author = opts?.author;
|
|
8628
|
+
this.author = opts?.author ?? null;
|
|
8498
8629
|
this.config = config;
|
|
8499
8630
|
this.results = [];
|
|
8500
8631
|
this.prompts = opts?.prompts || [];
|
|
@@ -9024,7 +9155,7 @@ var Eval = class Eval {
|
|
|
9024
9155
|
newConfig.description = copyDescription;
|
|
9025
9156
|
const newPrompts = structuredClone(this.prompts);
|
|
9026
9157
|
const newVars = this.vars ? structuredClone(this.vars) : [];
|
|
9027
|
-
const author = require_accounts.
|
|
9158
|
+
const author = require_accounts.getAuthor();
|
|
9028
9159
|
const db = require_tables.getDb();
|
|
9029
9160
|
let copiedCount = 0;
|
|
9030
9161
|
db.transaction(() => {
|
|
@@ -10941,7 +11072,7 @@ async function fetchRemoteGeneration(task, prompts) {
|
|
|
10941
11072
|
version: require_version.VERSION,
|
|
10942
11073
|
email: require_accounts.getUserEmail()
|
|
10943
11074
|
};
|
|
10944
|
-
const response = await require_cache.fetchWithCache(
|
|
11075
|
+
const response = await require_cache.fetchWithCache(require_remoteGeneration.getRemoteGenerationUrl(), {
|
|
10945
11076
|
method: "POST",
|
|
10946
11077
|
headers: { "Content-Type": "application/json" },
|
|
10947
11078
|
body: JSON.stringify(body)
|
|
@@ -10976,7 +11107,7 @@ function formatPrompts(prompts) {
|
|
|
10976
11107
|
//#endregion
|
|
10977
11108
|
//#region src/redteam/extraction/entities.ts
|
|
10978
11109
|
async function extractEntities(provider, prompts) {
|
|
10979
|
-
if (
|
|
11110
|
+
if (require_remoteGeneration.shouldGenerateRemote()) try {
|
|
10980
11111
|
return await fetchRemoteGeneration("entities", prompts);
|
|
10981
11112
|
} catch (error) {
|
|
10982
11113
|
require_logger.logger.warn(`[Entity Extraction] Failed, returning 0 entities. Error using remote generation: ${error}`);
|
|
@@ -11023,7 +11154,7 @@ async function extractSystemPurpose(provider, prompts) {
|
|
|
11023
11154
|
require_logger.logger.debug("[purpose] No meaningful prompts provided, returning default purpose");
|
|
11024
11155
|
return DEFAULT_PURPOSE;
|
|
11025
11156
|
}
|
|
11026
|
-
if (!
|
|
11157
|
+
if (!require_remoteGeneration.neverGenerateRemote()) try {
|
|
11027
11158
|
return await fetchRemoteGeneration("purpose", prompts);
|
|
11028
11159
|
} catch (error) {
|
|
11029
11160
|
require_logger.logger.warn(`[purpose] Error using remote generation, returning empty string: ${error}`);
|
|
@@ -11295,7 +11426,7 @@ function createTestCase(injectVar, output, harmCategory) {
|
|
|
11295
11426
|
vars: { [injectVar]: output.trim() },
|
|
11296
11427
|
metadata: {
|
|
11297
11428
|
harmCategory: harmCategoryLabel,
|
|
11298
|
-
pluginId:
|
|
11429
|
+
pluginId: require_util$2.getShortPluginId(harmCategory)
|
|
11299
11430
|
},
|
|
11300
11431
|
assert: getHarmfulAssertions(harmCategory)
|
|
11301
11432
|
};
|
|
@@ -11418,25 +11549,45 @@ var AlignedHarmfulPlugin = class extends require_graders.RedteamPluginBase {
|
|
|
11418
11549
|
getAssertions(_prompt) {
|
|
11419
11550
|
return getHarmfulAssertions(this.harmCategory);
|
|
11420
11551
|
}
|
|
11421
|
-
promptsToTestCases(prompts) {
|
|
11552
|
+
async promptsToTestCases(prompts) {
|
|
11422
11553
|
const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
|
|
11423
11554
|
const harmCategoryLabel = require_types.HARM_PLUGINS[this.harmCategory] || this.harmCategory;
|
|
11424
|
-
|
|
11555
|
+
const pluginId = require_util$2.getShortPluginId(this.harmCategory);
|
|
11556
|
+
return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
|
|
11425
11557
|
const vars = { [this.injectVar]: __prompt };
|
|
11426
|
-
|
|
11427
|
-
|
|
11428
|
-
|
|
11429
|
-
|
|
11558
|
+
let inputMaterialization;
|
|
11559
|
+
if (hasMultipleInputs) {
|
|
11560
|
+
let parsed;
|
|
11561
|
+
try {
|
|
11562
|
+
parsed = JSON.parse(__prompt);
|
|
11563
|
+
} catch (error) {
|
|
11564
|
+
require_logger.logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
11565
|
+
}
|
|
11566
|
+
if (parsed) try {
|
|
11567
|
+
const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
|
|
11568
|
+
materializationIndex,
|
|
11569
|
+
pluginId,
|
|
11570
|
+
provider: this.provider,
|
|
11571
|
+
purpose: this.purpose
|
|
11572
|
+
});
|
|
11573
|
+
Object.assign(vars, materializedVars.vars);
|
|
11574
|
+
inputMaterialization = materializedVars.metadata;
|
|
11575
|
+
} catch (error) {
|
|
11576
|
+
require_logger.logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
|
|
11577
|
+
throw error;
|
|
11578
|
+
}
|
|
11579
|
+
}
|
|
11430
11580
|
return {
|
|
11431
11581
|
vars,
|
|
11432
11582
|
metadata: {
|
|
11433
11583
|
harmCategory: harmCategoryLabel,
|
|
11434
|
-
pluginId
|
|
11435
|
-
pluginConfig: this.config
|
|
11584
|
+
pluginId,
|
|
11585
|
+
pluginConfig: this.config,
|
|
11586
|
+
...inputMaterialization ? { inputMaterialization } : {}
|
|
11436
11587
|
},
|
|
11437
11588
|
assert: getHarmfulAssertions(this.harmCategory)
|
|
11438
11589
|
};
|
|
11439
|
-
});
|
|
11590
|
+
}));
|
|
11440
11591
|
}
|
|
11441
11592
|
};
|
|
11442
11593
|
//#endregion
|
|
@@ -11445,25 +11596,42 @@ var AlignedHarmfulPlugin = class extends require_graders.RedteamPluginBase {
|
|
|
11445
11596
|
* Extract content from <Prompt> tags and parse JSON if inputs are defined.
|
|
11446
11597
|
* Returns the processed prompt and any additional vars extracted from JSON.
|
|
11447
11598
|
*/
|
|
11448
|
-
function processPromptForInputs(prompt,
|
|
11599
|
+
async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
|
|
11449
11600
|
let processedPrompt = prompt.trim();
|
|
11450
11601
|
const additionalVars = {};
|
|
11451
|
-
|
|
11602
|
+
let additionalMetadata;
|
|
11603
|
+
const extractedPrompt = require_util$2.extractPromptFromTags(processedPrompt);
|
|
11452
11604
|
if (extractedPrompt) processedPrompt = extractedPrompt;
|
|
11453
|
-
if (inputs && Object.keys(inputs).length > 0)
|
|
11454
|
-
|
|
11455
|
-
|
|
11456
|
-
|
|
11457
|
-
|
|
11605
|
+
if (inputs && Object.keys(inputs).length > 0) {
|
|
11606
|
+
let parsed;
|
|
11607
|
+
try {
|
|
11608
|
+
parsed = JSON.parse(processedPrompt);
|
|
11609
|
+
} catch (error) {
|
|
11610
|
+
require_logger.logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
|
|
11611
|
+
}
|
|
11612
|
+
if (parsed) try {
|
|
11613
|
+
const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
|
|
11614
|
+
materializationIndex,
|
|
11615
|
+
pluginId: plugin,
|
|
11616
|
+
provider,
|
|
11617
|
+
purpose
|
|
11618
|
+
});
|
|
11619
|
+
Object.assign(additionalVars, materializedVars.vars);
|
|
11620
|
+
additionalMetadata = materializedVars.metadata;
|
|
11621
|
+
} catch (error) {
|
|
11622
|
+
require_logger.logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
|
|
11623
|
+
throw error;
|
|
11624
|
+
}
|
|
11458
11625
|
}
|
|
11459
11626
|
return {
|
|
11460
11627
|
processedPrompt,
|
|
11461
|
-
additionalVars
|
|
11628
|
+
additionalVars,
|
|
11629
|
+
additionalMetadata
|
|
11462
11630
|
};
|
|
11463
11631
|
}
|
|
11464
11632
|
async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
|
|
11465
11633
|
const maxHarmfulTests = require_logger.getEnvInt("PROMPTFOO_MAX_HARMFUL_TESTS_PER_REQUEST", 5);
|
|
11466
|
-
const unalignedProvider = new
|
|
11634
|
+
const unalignedProvider = new require_promptfoo.PromptfooHarmfulCompletionProvider({
|
|
11467
11635
|
purpose,
|
|
11468
11636
|
n: Math.min(n, maxHarmfulTests),
|
|
11469
11637
|
harmCategory: plugin,
|
|
@@ -11479,15 +11647,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
|
|
|
11479
11647
|
};
|
|
11480
11648
|
const allPrompts = await require_graders.retryWithDeduplication(generatePrompts, n);
|
|
11481
11649
|
const inputs = config?.inputs;
|
|
11482
|
-
return require_graders.sampleArray(allPrompts, n).map((prompt) => {
|
|
11483
|
-
const { processedPrompt, additionalVars } = processPromptForInputs(prompt,
|
|
11650
|
+
return Promise.all(require_graders.sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
|
|
11651
|
+
const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
|
|
11484
11652
|
const testCase = createTestCase(injectVar, processedPrompt, plugin);
|
|
11485
11653
|
if (Object.keys(additionalVars).length > 0) testCase.vars = {
|
|
11486
11654
|
...testCase.vars,
|
|
11487
11655
|
...additionalVars
|
|
11488
11656
|
};
|
|
11657
|
+
if (additionalMetadata) testCase.metadata = {
|
|
11658
|
+
...testCase.metadata,
|
|
11659
|
+
inputMaterialization: additionalMetadata
|
|
11660
|
+
};
|
|
11489
11661
|
return testCase;
|
|
11490
|
-
});
|
|
11662
|
+
}));
|
|
11491
11663
|
}
|
|
11492
11664
|
//#endregion
|
|
11493
11665
|
//#region src/redteam/plugins/teenSafety/graderExamples.ts
|
|
@@ -11805,9 +11977,9 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
|
|
|
11805
11977
|
function computeModifiersFromConfig(config) {
|
|
11806
11978
|
const modifiers = { ...config?.modifiers };
|
|
11807
11979
|
if (config?.language && typeof config.language === "string") modifiers.language = config.language;
|
|
11808
|
-
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
11809
|
-
const maxCharsModifier =
|
|
11810
|
-
if (maxCharsModifier) modifiers[
|
|
11980
|
+
if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(require_inputVariables.buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
|
|
11981
|
+
const maxCharsModifier = require_promptLength.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
11982
|
+
if (maxCharsModifier) modifiers[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
|
|
11811
11983
|
return modifiers;
|
|
11812
11984
|
}
|
|
11813
11985
|
function applyDefaultGraderExamples(key, config) {
|
|
@@ -11831,7 +12003,7 @@ function isValidMaxCharsPerMessage(limit) {
|
|
|
11831
12003
|
}
|
|
11832
12004
|
function getMaxCharsPerMessageFromConfig(config) {
|
|
11833
12005
|
if (isValidMaxCharsPerMessage(config?.maxCharsPerMessage)) return config.maxCharsPerMessage;
|
|
11834
|
-
const maxCharsModifier = (config?.modifiers)?.[
|
|
12006
|
+
const maxCharsModifier = (config?.modifiers)?.[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY];
|
|
11835
12007
|
if (typeof maxCharsModifier !== "string") return;
|
|
11836
12008
|
const match = /must be (\d+) characters or fewer\./.exec(maxCharsModifier);
|
|
11837
12009
|
if (!match) return;
|
|
@@ -11915,7 +12087,7 @@ function withMaxCharsRetries(pluginFactory) {
|
|
|
11915
12087
|
const rejectedPromptLengths = [];
|
|
11916
12088
|
let rejectedPromptLimit;
|
|
11917
12089
|
for (const testCase of generatedTestCases) {
|
|
11918
|
-
const violation =
|
|
12090
|
+
const violation = require_promptLength.getGeneratedPromptOverLimit(String(testCase.vars?.[params.injectVar] ?? ""), maxCharsPerMessage);
|
|
11919
12091
|
if (violation) {
|
|
11920
12092
|
rejectedPromptLengths.push(violation.length);
|
|
11921
12093
|
rejectedPromptLimit = violation.limit;
|
|
@@ -11932,16 +12104,16 @@ function withMaxCharsRetries(pluginFactory) {
|
|
|
11932
12104
|
}
|
|
11933
12105
|
async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
11934
12106
|
require_invariant.invariant(!require_logger.getEnvBool("PROMPTFOO_DISABLE_REDTEAM_REMOTE_GENERATION"), "fetchRemoteTestCases should never be called when remote generation is disabled");
|
|
11935
|
-
const remoteHealth = await checkRemoteHealth(
|
|
12107
|
+
const remoteHealth = await checkRemoteHealth(require_remoteGeneration.getRemoteHealthUrl());
|
|
11936
12108
|
if (remoteHealth.status !== "OK") {
|
|
11937
12109
|
require_logger.logger.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
|
|
11938
12110
|
return [];
|
|
11939
12111
|
}
|
|
11940
12112
|
const { graderExamples, ...configForRemote } = config ?? {};
|
|
11941
|
-
const maxCharsModifier =
|
|
12113
|
+
const maxCharsModifier = require_promptLength.getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
|
|
11942
12114
|
if (maxCharsModifier) configForRemote.modifiers = {
|
|
11943
12115
|
...configForRemote.modifiers ?? {},
|
|
11944
|
-
[
|
|
12116
|
+
[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY]: maxCharsModifier
|
|
11945
12117
|
};
|
|
11946
12118
|
const body = JSON.stringify({
|
|
11947
12119
|
config: configForRemote,
|
|
@@ -11954,7 +12126,7 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
11954
12126
|
email: require_accounts.getUserEmail()
|
|
11955
12127
|
});
|
|
11956
12128
|
try {
|
|
11957
|
-
const { data, status, statusText } = await require_cache.fetchWithCache(
|
|
12129
|
+
const { data, status, statusText } = await require_cache.fetchWithCache(require_remoteGeneration.getRemoteGenerationUrl(), {
|
|
11958
12130
|
method: "POST",
|
|
11959
12131
|
headers: { "Content-Type": "application/json" },
|
|
11960
12132
|
body
|
|
@@ -11971,23 +12143,56 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
11971
12143
|
return [];
|
|
11972
12144
|
}
|
|
11973
12145
|
}
|
|
12146
|
+
async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
|
|
12147
|
+
const inputs = config.inputs;
|
|
12148
|
+
if (!inputs || Object.keys(inputs).length === 0) return testCases;
|
|
12149
|
+
return Promise.all(testCases.map(async (testCase, materializationIndex) => {
|
|
12150
|
+
const inputVars = require_util$2.extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
|
|
12151
|
+
if (!inputVars) return testCase;
|
|
12152
|
+
const materializedVars = await require_inputVariables.materializeInputVariablesWithMetadata(inputVars, inputs, {
|
|
12153
|
+
materializationIndex,
|
|
12154
|
+
pluginId,
|
|
12155
|
+
provider,
|
|
12156
|
+
purpose
|
|
12157
|
+
});
|
|
12158
|
+
return {
|
|
12159
|
+
...testCase,
|
|
12160
|
+
vars: {
|
|
12161
|
+
...testCase.vars || {},
|
|
12162
|
+
...materializedVars.vars
|
|
12163
|
+
},
|
|
12164
|
+
metadata: {
|
|
12165
|
+
...testCase.metadata || {},
|
|
12166
|
+
...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
|
|
12167
|
+
}
|
|
12168
|
+
};
|
|
12169
|
+
}));
|
|
12170
|
+
}
|
|
11974
12171
|
function createPluginFactory(PluginClass, key, validate) {
|
|
11975
12172
|
return {
|
|
11976
12173
|
key,
|
|
11977
12174
|
validate,
|
|
11978
12175
|
action: async ({ provider, purpose, injectVar, n, delayMs, config }) => {
|
|
11979
12176
|
const configWithDefaults = applyDefaultGraderExamples(key, config);
|
|
11980
|
-
if (PluginClass.canGenerateRemote === false || !
|
|
12177
|
+
if (PluginClass.canGenerateRemote === false || !require_remoteGeneration.shouldGenerateRemote()) {
|
|
11981
12178
|
require_logger.logger.debug(`Using local redteam generation for ${key}`);
|
|
11982
12179
|
return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
|
|
11983
12180
|
}
|
|
11984
|
-
const
|
|
12181
|
+
const pluginId = require_util$2.getShortPluginId(key);
|
|
12182
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12183
|
+
config: configWithDefaults ?? {},
|
|
12184
|
+
injectVar,
|
|
12185
|
+
pluginId,
|
|
12186
|
+
provider,
|
|
12187
|
+
purpose,
|
|
12188
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
12189
|
+
});
|
|
11985
12190
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
11986
12191
|
return testCases.map((testCase) => ({
|
|
11987
12192
|
...testCase,
|
|
11988
12193
|
metadata: {
|
|
11989
12194
|
...testCase.metadata,
|
|
11990
|
-
pluginId
|
|
12195
|
+
pluginId,
|
|
11991
12196
|
pluginConfig: {
|
|
11992
12197
|
...configWithDefaults,
|
|
11993
12198
|
modifiers: computedModifiers
|
|
@@ -12043,8 +12248,8 @@ const pluginFactories = [
|
|
|
12043
12248
|
...unalignedHarmCategories.map((category) => ({
|
|
12044
12249
|
key: category,
|
|
12045
12250
|
action: async (params) => {
|
|
12046
|
-
if (
|
|
12047
|
-
require_logger.logger.error(`${category} plugin
|
|
12251
|
+
if (require_remoteGeneration.neverGenerateRemote()) {
|
|
12252
|
+
require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
12048
12253
|
return [];
|
|
12049
12254
|
}
|
|
12050
12255
|
const testCases = await getHarmfulTests(params, category);
|
|
@@ -12053,7 +12258,7 @@ const pluginFactories = [
|
|
|
12053
12258
|
...testCase,
|
|
12054
12259
|
metadata: {
|
|
12055
12260
|
...testCase.metadata,
|
|
12056
|
-
pluginId:
|
|
12261
|
+
pluginId: require_util$2.getShortPluginId(category),
|
|
12057
12262
|
pluginConfig: {
|
|
12058
12263
|
...params.config,
|
|
12059
12264
|
modifiers: computedModifiers
|
|
@@ -12066,14 +12271,22 @@ const pluginFactories = [
|
|
|
12066
12271
|
const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
|
|
12067
12272
|
key: category,
|
|
12068
12273
|
action: async (params) => {
|
|
12069
|
-
if (
|
|
12070
|
-
const
|
|
12274
|
+
if (require_remoteGeneration.shouldGenerateRemote()) {
|
|
12275
|
+
const pluginId = require_util$2.getShortPluginId(category);
|
|
12276
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12277
|
+
config: params.config ?? {},
|
|
12278
|
+
injectVar: params.injectVar,
|
|
12279
|
+
pluginId,
|
|
12280
|
+
provider: params.provider,
|
|
12281
|
+
purpose: params.purpose,
|
|
12282
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
12283
|
+
});
|
|
12071
12284
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
12072
12285
|
return testCases.map((testCase) => ({
|
|
12073
12286
|
...testCase,
|
|
12074
12287
|
metadata: {
|
|
12075
12288
|
...testCase.metadata,
|
|
12076
|
-
pluginId
|
|
12289
|
+
pluginId,
|
|
12077
12290
|
pluginConfig: {
|
|
12078
12291
|
...params.config,
|
|
12079
12292
|
modifiers: computedModifiers
|
|
@@ -12086,7 +12299,7 @@ const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
|
|
|
12086
12299
|
...testCase,
|
|
12087
12300
|
metadata: {
|
|
12088
12301
|
...testCase.metadata,
|
|
12089
|
-
pluginId:
|
|
12302
|
+
pluginId: require_util$2.getShortPluginId(category)
|
|
12090
12303
|
}
|
|
12091
12304
|
}));
|
|
12092
12305
|
}
|
|
@@ -12094,17 +12307,25 @@ const piiPlugins = require_types.PII_PLUGINS.map((category) => ({
|
|
|
12094
12307
|
const biasPlugins = require_types.BIAS_PLUGINS.map((category) => ({
|
|
12095
12308
|
key: category,
|
|
12096
12309
|
action: async (params) => {
|
|
12097
|
-
if (
|
|
12098
|
-
require_logger.logger.error(`${category} plugin
|
|
12310
|
+
if (require_remoteGeneration.neverGenerateRemote()) {
|
|
12311
|
+
require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
|
|
12099
12312
|
return [];
|
|
12100
12313
|
}
|
|
12101
|
-
const
|
|
12314
|
+
const pluginId = require_util$2.getShortPluginId(category);
|
|
12315
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12316
|
+
config: params.config ?? {},
|
|
12317
|
+
injectVar: params.injectVar,
|
|
12318
|
+
pluginId,
|
|
12319
|
+
provider: params.provider,
|
|
12320
|
+
purpose: params.purpose,
|
|
12321
|
+
testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
|
|
12322
|
+
});
|
|
12102
12323
|
const computedModifiers = computeModifiersFromConfig(params.config);
|
|
12103
12324
|
return testCases.map((testCase) => ({
|
|
12104
12325
|
...testCase,
|
|
12105
12326
|
metadata: {
|
|
12106
12327
|
...testCase.metadata,
|
|
12107
|
-
pluginId
|
|
12328
|
+
pluginId,
|
|
12108
12329
|
pluginConfig: {
|
|
12109
12330
|
...params.config,
|
|
12110
12331
|
modifiers: computedModifiers
|
|
@@ -12117,19 +12338,27 @@ function createRemotePlugin(key, validate) {
|
|
|
12117
12338
|
return {
|
|
12118
12339
|
key,
|
|
12119
12340
|
validate,
|
|
12120
|
-
action: async ({ purpose, injectVar, n, config }) => {
|
|
12341
|
+
action: async ({ provider, purpose, injectVar, n, config }) => {
|
|
12121
12342
|
const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
|
|
12122
|
-
if (
|
|
12123
|
-
require_logger.logger.error(`${key} plugin
|
|
12343
|
+
if (require_remoteGeneration.neverGenerateRemote()) {
|
|
12344
|
+
require_logger.logger.error(require_remoteGeneration.getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
|
|
12124
12345
|
return [];
|
|
12125
12346
|
}
|
|
12126
|
-
const
|
|
12347
|
+
const pluginId = require_util$2.getShortPluginId(key);
|
|
12348
|
+
const testCases = await materializeRemoteTestCaseInputs({
|
|
12349
|
+
config: configWithDefaults ?? {},
|
|
12350
|
+
injectVar,
|
|
12351
|
+
pluginId,
|
|
12352
|
+
provider,
|
|
12353
|
+
purpose,
|
|
12354
|
+
testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
|
|
12355
|
+
});
|
|
12127
12356
|
const computedModifiers = computeModifiersFromConfig(configWithDefaults);
|
|
12128
12357
|
const testsWithMetadata = testCases.map((testCase) => ({
|
|
12129
12358
|
...testCase,
|
|
12130
12359
|
metadata: {
|
|
12131
12360
|
...testCase.metadata,
|
|
12132
|
-
pluginId
|
|
12361
|
+
pluginId,
|
|
12133
12362
|
pluginConfig: {
|
|
12134
12363
|
...configWithDefaults,
|
|
12135
12364
|
modifiers: computedModifiers
|
|
@@ -12199,6 +12428,37 @@ function getPolicyText(metadata) {
|
|
|
12199
12428
|
return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
|
|
12200
12429
|
}
|
|
12201
12430
|
}
|
|
12431
|
+
async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
|
|
12432
|
+
const inputs = testCase.metadata?.pluginConfig?.inputs;
|
|
12433
|
+
const inputMaterialization = testCase.metadata?.inputMaterialization;
|
|
12434
|
+
if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
|
|
12435
|
+
inputMaterialization,
|
|
12436
|
+
vars: testCase.vars
|
|
12437
|
+
};
|
|
12438
|
+
try {
|
|
12439
|
+
const materializedVars = await require_util$2.extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
|
|
12440
|
+
materializationIndex,
|
|
12441
|
+
pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
|
|
12442
|
+
provider,
|
|
12443
|
+
purpose
|
|
12444
|
+
});
|
|
12445
|
+
return {
|
|
12446
|
+
inputMaterialization: materializedVars.metadata ? {
|
|
12447
|
+
...inputMaterialization,
|
|
12448
|
+
...materializedVars.metadata
|
|
12449
|
+
} : inputMaterialization,
|
|
12450
|
+
vars: {
|
|
12451
|
+
...testCase.vars,
|
|
12452
|
+
...materializedVars.vars
|
|
12453
|
+
}
|
|
12454
|
+
};
|
|
12455
|
+
} catch {
|
|
12456
|
+
return {
|
|
12457
|
+
inputMaterialization,
|
|
12458
|
+
vars: testCase.vars
|
|
12459
|
+
};
|
|
12460
|
+
}
|
|
12461
|
+
}
|
|
12202
12462
|
/**
|
|
12203
12463
|
* Gets the severity level for a plugin based on its ID and configuration.
|
|
12204
12464
|
* @param pluginId - The ID of the plugin.
|
|
@@ -12207,7 +12467,7 @@ function getPolicyText(metadata) {
|
|
|
12207
12467
|
*/
|
|
12208
12468
|
function getPluginSeverity(pluginId, pluginConfig) {
|
|
12209
12469
|
if (pluginConfig?.severity) return pluginConfig.severity;
|
|
12210
|
-
const shortId =
|
|
12470
|
+
const shortId = require_util$2.getShortPluginId(pluginId);
|
|
12211
12471
|
return shortId in require_types.riskCategorySeverityMap ? require_types.riskCategorySeverityMap[shortId] : require_types.Severity.Low;
|
|
12212
12472
|
}
|
|
12213
12473
|
const POLICY_PREVIEW_MAX_LENGTH = 20;
|
|
@@ -12332,14 +12592,15 @@ function buildRedteamModifiers({ maxCharsPerMessage, pluginConfig, testGeneratio
|
|
|
12332
12592
|
...testGenerationInstructions ? { testGenerationInstructions } : {},
|
|
12333
12593
|
...pluginConfig?.modifiers ?? {}
|
|
12334
12594
|
};
|
|
12335
|
-
const maxCharsPerMessageModifier =
|
|
12336
|
-
if (maxCharsPerMessageModifier) modifiers[
|
|
12595
|
+
const maxCharsPerMessageModifier = require_promptLength.getMaxCharsPerMessageModifierValue(maxCharsPerMessage ?? pluginConfig?.maxCharsPerMessage);
|
|
12596
|
+
if (maxCharsPerMessageModifier) modifiers[require_promptLength.MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsPerMessageModifier;
|
|
12337
12597
|
return modifiers;
|
|
12338
12598
|
}
|
|
12339
12599
|
const categories = {
|
|
12340
12600
|
foundation: require_types.FOUNDATION_PLUGINS,
|
|
12341
12601
|
harmful: Object.keys(require_types.HARM_PLUGINS),
|
|
12342
12602
|
"coding-agent:core": require_types.CODING_AGENT_CORE_PLUGINS,
|
|
12603
|
+
"coding-agent:all": require_types.CODING_AGENT_PLUGINS,
|
|
12343
12604
|
bias: require_types.BIAS_PLUGINS,
|
|
12344
12605
|
pii: require_types.PII_PLUGINS,
|
|
12345
12606
|
medical: require_types.MEDICAL_PLUGINS,
|
|
@@ -12369,7 +12630,7 @@ function getLanguageForTestCase(test) {
|
|
|
12369
12630
|
function filterOversizedTestCases(testCases, injectVar, sourceLabel, maxCharsPerMessage) {
|
|
12370
12631
|
return testCases.filter((testCase) => {
|
|
12371
12632
|
const testCaseMaxCharsPerMessage = maxCharsPerMessage ?? (testCase.metadata?.strategyConfig)?.maxCharsPerMessage ?? (testCase.metadata?.pluginConfig)?.maxCharsPerMessage;
|
|
12372
|
-
const violation =
|
|
12633
|
+
const violation = require_promptLength.getGeneratedPromptOverLimit(String(testCase.vars?.[injectVar] ?? ""), testCaseMaxCharsPerMessage);
|
|
12373
12634
|
if (!violation) return true;
|
|
12374
12635
|
require_logger.logger.warn(`[${sourceLabel}] Dropping generated test case that exceeds maxCharsPerMessage=${violation.limit} (${violation.length} chars)`);
|
|
12375
12636
|
return false;
|
|
@@ -12434,18 +12695,18 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
|
|
|
12434
12695
|
* @param injectVar - The variable to inject.
|
|
12435
12696
|
* @returns An array of new test cases generated by strategies.
|
|
12436
12697
|
*/
|
|
12437
|
-
async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
12698
|
+
async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
|
|
12438
12699
|
const newTestCases = [];
|
|
12439
12700
|
const strategyResults = {};
|
|
12440
12701
|
for (const strategy of strategies) {
|
|
12441
12702
|
require_logger.logger.debug(`Generating ${strategy.id} tests`);
|
|
12442
12703
|
let strategyAction;
|
|
12443
|
-
if (strategy.id.startsWith("file://")) strategyAction = (await
|
|
12704
|
+
if (strategy.id.startsWith("file://")) strategyAction = (await require_strategies.loadStrategy(strategy.id)).action;
|
|
12444
12705
|
else {
|
|
12445
|
-
let builtinStrategy =
|
|
12706
|
+
let builtinStrategy = require_strategies.Strategies.find((s) => s.id === strategy.id);
|
|
12446
12707
|
if (!builtinStrategy && strategy.id.includes(":")) {
|
|
12447
12708
|
const baseStrategyId = strategy.id.split(":")[0];
|
|
12448
|
-
builtinStrategy =
|
|
12709
|
+
builtinStrategy = require_strategies.Strategies.find((s) => s.id === baseStrategyId);
|
|
12449
12710
|
}
|
|
12450
12711
|
if (!builtinStrategy) {
|
|
12451
12712
|
require_logger.logger.warn(`Strategy ${strategy.id} not registered, skipping`);
|
|
@@ -12455,7 +12716,7 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12455
12716
|
}
|
|
12456
12717
|
const targetPlugins = strategy.config?.plugins;
|
|
12457
12718
|
const applicableTestCases = testCases.filter((t) => {
|
|
12458
|
-
if (!
|
|
12719
|
+
if (!require_strategies.pluginMatchesStrategyTargets(t, strategy.id, targetPlugins)) return false;
|
|
12459
12720
|
if (t.metadata?.retry === true) {
|
|
12460
12721
|
require_logger.logger.debug(`Skipping ${strategy.id} for retry test (plugin: ${t.metadata?.pluginId}) - retry tests are not transformed`);
|
|
12461
12722
|
return false;
|
|
@@ -12490,14 +12751,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12490
12751
|
}
|
|
12491
12752
|
}
|
|
12492
12753
|
resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
|
|
12493
|
-
newTestCases.push(...resultTestCases.map((t) => {
|
|
12494
|
-
const
|
|
12495
|
-
let updatedVars = t.vars;
|
|
12496
|
-
if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
|
|
12497
|
-
const parsed = JSON.parse(String(t.vars[injectVar]));
|
|
12498
|
-
updatedVars = { ...t.vars };
|
|
12499
|
-
Object.assign(updatedVars, require_providers.extractVariablesFromJson(parsed, inputs));
|
|
12500
|
-
} catch {}
|
|
12754
|
+
newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
|
|
12755
|
+
const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
|
|
12501
12756
|
const strategyConfig = {
|
|
12502
12757
|
...strategy.config || {},
|
|
12503
12758
|
...maxCharsPerMessage ? { maxCharsPerMessage } : {},
|
|
@@ -12505,16 +12760,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
|
|
|
12505
12760
|
};
|
|
12506
12761
|
return {
|
|
12507
12762
|
...t,
|
|
12508
|
-
vars
|
|
12763
|
+
vars,
|
|
12509
12764
|
metadata: {
|
|
12510
12765
|
...t?.metadata || {},
|
|
12511
12766
|
...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
|
|
12512
12767
|
...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
|
|
12513
12768
|
...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
|
|
12769
|
+
...inputMaterialization && { inputMaterialization },
|
|
12514
12770
|
...Object.keys(strategyConfig).length > 0 && { strategyConfig }
|
|
12515
12771
|
}
|
|
12516
12772
|
};
|
|
12517
|
-
}));
|
|
12773
|
+
})));
|
|
12518
12774
|
const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
|
|
12519
12775
|
const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
|
|
12520
12776
|
const applyNumTestsCap = (calculatedRequested) => {
|
|
@@ -12672,10 +12928,10 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12672
12928
|
seen.add(key);
|
|
12673
12929
|
return true;
|
|
12674
12930
|
});
|
|
12675
|
-
const needsGoalExtraction = strategies.some((s) =>
|
|
12676
|
-
await
|
|
12931
|
+
const needsGoalExtraction = strategies.some((s) => require_strategies.Strategies.find((def) => def.id === s.id)?.requiresGoalExtraction);
|
|
12932
|
+
await require_strategies.validateStrategies(strategies);
|
|
12677
12933
|
await validateSharpDependency(strategies, plugins);
|
|
12678
|
-
const redteamProvider = await
|
|
12934
|
+
const redteamProvider = await require_shared.redteamProviderManager.getProvider({ provider });
|
|
12679
12935
|
const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
|
|
12680
12936
|
require_logger.logger.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.default.yellow(plugins.map((p) => {
|
|
12681
12937
|
const pluginLanguageConfig = p.config?.language ?? language;
|
|
@@ -12779,8 +13035,8 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12779
13035
|
};
|
|
12780
13036
|
require_logger.logger.debug("Validating plugins...");
|
|
12781
13037
|
plugins = [...new Set(expandedPlugins)].filter(validatePlugin).sort();
|
|
12782
|
-
if (
|
|
12783
|
-
const healthUrl =
|
|
13038
|
+
if (require_remoteGeneration.shouldGenerateRemote()) {
|
|
13039
|
+
const healthUrl = require_remoteGeneration.getRemoteHealthUrl();
|
|
12784
13040
|
if (healthUrl) {
|
|
12785
13041
|
require_logger.logger.debug(`Checking Promptfoo API health at ${healthUrl}...`);
|
|
12786
13042
|
const healthResult = await checkRemoteHealth(healthUrl);
|
|
@@ -12875,7 +13131,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12875
13131
|
const promptVar = testCase.vars?.[injectVar];
|
|
12876
13132
|
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
12877
13133
|
const policy = getPolicyText(testCase.metadata);
|
|
12878
|
-
const extractedGoal = await
|
|
13134
|
+
const extractedGoal = await require_util$2.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
12879
13135
|
testCase.metadata.goal = extractedGoal;
|
|
12880
13136
|
}
|
|
12881
13137
|
}
|
|
@@ -12930,7 +13186,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12930
13186
|
const promptVar = testCase.vars?.[injectVar];
|
|
12931
13187
|
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
12932
13188
|
const policy = getPolicyText(testCase.metadata);
|
|
12933
|
-
const extractedGoal = await
|
|
13189
|
+
const extractedGoal = await require_util$2.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
12934
13190
|
testCase.metadata.goal = extractedGoal;
|
|
12935
13191
|
}
|
|
12936
13192
|
}
|
|
@@ -12969,7 +13225,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12969
13225
|
targetIds,
|
|
12970
13226
|
...retryStrategy.config
|
|
12971
13227
|
};
|
|
12972
|
-
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
|
|
13228
|
+
const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
|
|
12973
13229
|
pluginTestCases.push(...retryTestCases);
|
|
12974
13230
|
Object.assign(strategyResults, retryResults);
|
|
12975
13231
|
if (showProgressBar) progressBar?.increment(retryTestCases.length);
|
|
@@ -12977,7 +13233,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
12977
13233
|
checkAbort();
|
|
12978
13234
|
const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
|
|
12979
13235
|
if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
|
|
12980
|
-
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
13236
|
+
const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
|
|
12981
13237
|
Object.assign(strategyResults, otherStrategyResults);
|
|
12982
13238
|
if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
|
|
12983
13239
|
const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
|
|
@@ -13104,7 +13360,7 @@ async function doGenerateRedteam(options) {
|
|
|
13104
13360
|
redteamConfig = resolved.config.redteam;
|
|
13105
13361
|
commandLineOptions = resolved.commandLineOptions;
|
|
13106
13362
|
resolvedConfig = resolved.config;
|
|
13107
|
-
await
|
|
13363
|
+
await require_storage.checkCloudPermissions(resolved.config);
|
|
13108
13364
|
if (redteamConfig && resolved.testSuite.tests && resolved.testSuite.tests.length > 0) require_logger.logger.warn(chalk.default.yellow(dedent.default`
|
|
13109
13365
|
⚠️ Warning: Found both 'tests' section and 'redteam' configuration in your config file.
|
|
13110
13366
|
|
|
@@ -13119,8 +13375,8 @@ async function doGenerateRedteam(options) {
|
|
|
13119
13375
|
`));
|
|
13120
13376
|
try {
|
|
13121
13377
|
const providerId = require_providers.getProviderIds(resolved.config.providers)[0];
|
|
13122
|
-
if (
|
|
13123
|
-
const overrides = await
|
|
13378
|
+
if (require_storage.isCloudProvider(providerId)) {
|
|
13379
|
+
const overrides = await require_storage.getPluginSeverityOverridesFromCloud(require_storage.getCloudDatabaseId(providerId));
|
|
13124
13380
|
if (overrides) {
|
|
13125
13381
|
pluginSeverityOverrides = new Map(Object.entries(overrides.severities));
|
|
13126
13382
|
pluginSeverityOverridesId = overrides.id;
|
|
@@ -13138,7 +13394,7 @@ async function doGenerateRedteam(options) {
|
|
|
13138
13394
|
require_logger.logger.info(chalk.default.red(`\nCan't generate without configuration - run ${chalk.default.yellow.bold(promptfooCommand("redteam init"))} first`));
|
|
13139
13395
|
return null;
|
|
13140
13396
|
}
|
|
13141
|
-
if (!
|
|
13397
|
+
if (!require_remoteGeneration.neverGenerateRemote()) {
|
|
13142
13398
|
let hasValidEmail = false;
|
|
13143
13399
|
while (!hasValidEmail) {
|
|
13144
13400
|
const { emailNeedsValidation } = await require_accounts.promptForEmailUnverified();
|
|
@@ -13202,7 +13458,7 @@ async function doGenerateRedteam(options) {
|
|
|
13202
13458
|
}
|
|
13203
13459
|
const policyPluginsWithRefs = plugins.filter((plugin) => plugin.config?.policy && require_graders.isValidPolicyObject(plugin.config?.policy) && require_graders.determinePolicyTypeFromId(plugin.config.policy.id) === "reusable");
|
|
13204
13460
|
if (policyPluginsWithRefs.length > 0) {
|
|
13205
|
-
const teamId = (await
|
|
13461
|
+
const teamId = (await require_storage.resolveTeamId()).id;
|
|
13206
13462
|
const policiesById = await require_graders.getCustomPolicies(policyPluginsWithRefs, teamId);
|
|
13207
13463
|
for (const policyPlugin of policyPluginsWithRefs) {
|
|
13208
13464
|
const policyId = policyPlugin.config.policy.id;
|
|
@@ -13774,7 +14030,7 @@ async function sendChunkedResults(evalRecord, url$5, options = {}) {
|
|
|
13774
14030
|
const isVerbose = require_logger.isDebugEnabled();
|
|
13775
14031
|
const { silent = false } = options;
|
|
13776
14032
|
require_logger.logger.debug(`Starting chunked results upload to ${url$5}`);
|
|
13777
|
-
await
|
|
14033
|
+
await require_storage.checkCloudPermissions(evalRecord.config);
|
|
13778
14034
|
const inlineBlobs = require_extractor.isBlobStorageEnabled() && require_logger.getEnvBool("PROMPTFOO_SHARE_INLINE_BLOBS", !require_fetch.cloudConfig.isEnabled());
|
|
13779
14035
|
const inlineCache = inlineBlobs ? createBlobInlineCache() : null;
|
|
13780
14036
|
let sampleResults = (await evalRecord.fetchResultsBatched(100).next()).value ?? [];
|
|
@@ -13870,6 +14126,10 @@ function stripAuthFromUrl(urlString) {
|
|
|
13870
14126
|
}
|
|
13871
14127
|
}
|
|
13872
14128
|
async function handleEmailCollection(evalRecord) {
|
|
14129
|
+
if (evalRecord.author) {
|
|
14130
|
+
require_logger.logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
|
|
14131
|
+
return;
|
|
14132
|
+
}
|
|
13873
14133
|
if (!process.stdout.isTTY || require_logger.isCI() || require_logger.getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
|
|
13874
14134
|
let email = require_accounts.getUserEmail();
|
|
13875
14135
|
if (!email) {
|
|
@@ -13912,7 +14172,7 @@ async function createShareableUrl(evalRecord, options = {}) {
|
|
|
13912
14172
|
return null;
|
|
13913
14173
|
}
|
|
13914
14174
|
if (!silent) {
|
|
13915
|
-
const orgContext = await
|
|
14175
|
+
const orgContext = await require_storage.getOrgContext();
|
|
13916
14176
|
if (orgContext) {
|
|
13917
14177
|
const teamSuffix = orgContext.teamName ? ` > ${orgContext.teamName}` : "";
|
|
13918
14178
|
require_logger.logger.info(`${chalk.default.dim("Sharing to:")} ${chalk.default.cyan(orgContext.organizationName)}${teamSuffix}`);
|
|
@@ -14013,6 +14273,14 @@ function shouldShareResults(opts) {
|
|
|
14013
14273
|
return require_fetch.cloudConfig.isEnabled() && sharing !== false;
|
|
14014
14274
|
}
|
|
14015
14275
|
//#endregion
|
|
14276
|
+
//#region src/commands/eval/redteamWarning.ts
|
|
14277
|
+
function warnIfRedteamConfigHasNoTests(config, testSuite) {
|
|
14278
|
+
if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) require_logger.logger.warn(chalk.default.yellow(dedent.default`
|
|
14279
|
+
Warning: Config file has a redteam section but no test cases.
|
|
14280
|
+
Did you mean to run ${chalk.default.bold("promptfoo redteam generate")} instead?
|
|
14281
|
+
`));
|
|
14282
|
+
}
|
|
14283
|
+
//#endregion
|
|
14016
14284
|
//#region src/util/formatDuration.ts
|
|
14017
14285
|
/**
|
|
14018
14286
|
* Formats a duration in seconds into a human-readable string
|
|
@@ -14359,7 +14627,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14359
14627
|
const cloudConfigId = uuidConfigArgs[0];
|
|
14360
14628
|
if (cmdObj.watch) throw new Error("--watch is not supported when using a cloud config UUID with -c. Use a local config file path for watch mode.");
|
|
14361
14629
|
try {
|
|
14362
|
-
defaultConfig = await
|
|
14630
|
+
defaultConfig = await require_storage.getEvalConfigFromCloud(cloudConfigId);
|
|
14363
14631
|
} catch (error) {
|
|
14364
14632
|
const reason = error instanceof Error ? error.message : String(error);
|
|
14365
14633
|
throw new Error(`Failed to load cloud eval config "${cloudConfigId}". ${reason}. Cloud UUID inputs do not fall back to local file paths. Check authentication and that the UUID exists.`);
|
|
@@ -14453,14 +14721,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14453
14721
|
require_logger.state.resume = true;
|
|
14454
14722
|
require_logger.state.retryMode = true;
|
|
14455
14723
|
} else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
|
|
14456
|
-
if (!cmdObj.envPath && commandLineOptions?.envPath) {
|
|
14724
|
+
if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
|
|
14457
14725
|
require_logger.logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
|
|
14458
14726
|
require_util.setupEnv(commandLineOptions.envPath);
|
|
14459
14727
|
}
|
|
14460
|
-
|
|
14461
|
-
Warning: Config file has a redteam section but no test cases.
|
|
14462
|
-
Did you mean to run ${chalk.default.bold("promptfoo redteam generate")} instead?
|
|
14463
|
-
`));
|
|
14728
|
+
warnIfRedteamConfigHasNoTests(config, testSuite);
|
|
14464
14729
|
if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
|
|
14465
14730
|
const maybeUrl = config.providers[0]?.config?.url;
|
|
14466
14731
|
if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) require_telemetry.telemetry.record("feature_used", { feature: "redteam_run_with_example" });
|
|
@@ -14508,7 +14773,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14508
14773
|
};
|
|
14509
14774
|
testSuite.tests = await filterTests(testSuite, filterOptions);
|
|
14510
14775
|
}
|
|
14511
|
-
if (!
|
|
14776
|
+
if (!require_remoteGeneration.neverGenerateRemote() && config.redteam && config.redteam.plugins && config.redteam.plugins.length > 0 && testSuite.tests && testSuite.tests.length > 0) {
|
|
14512
14777
|
let hasValidEmail = false;
|
|
14513
14778
|
while (!hasValidEmail) {
|
|
14514
14779
|
const { emailNeedsValidation } = await require_accounts.promptForEmailUnverified();
|
|
@@ -14526,7 +14791,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14526
14791
|
process.exitCode = 1;
|
|
14527
14792
|
return new Eval({}, { persisted: false });
|
|
14528
14793
|
}
|
|
14529
|
-
await
|
|
14794
|
+
await require_storage.checkCloudPermissions(config);
|
|
14530
14795
|
const options = {
|
|
14531
14796
|
...evaluateOptions,
|
|
14532
14797
|
showProgressBar: require_logger.getLogLevel() === "debug" ? false : cmdObj.progressBar === void 0 ? evaluateOptions.showProgressBar === void 0 ? true : evaluateOptions.showProgressBar : cmdObj.progressBar !== false,
|
|
@@ -14568,7 +14833,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14568
14833
|
${zod.z.prettifyError(testSuiteSchema.error)}
|
|
14569
14834
|
|
|
14570
14835
|
Please review your promptfooconfig.yaml configuration.`));
|
|
14571
|
-
const
|
|
14836
|
+
const author = require_accounts.getAuthor();
|
|
14837
|
+
const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
|
|
14838
|
+
author,
|
|
14839
|
+
runtimeOptions: options
|
|
14840
|
+
}) : new Eval(config, {
|
|
14841
|
+
author,
|
|
14842
|
+
runtimeOptions: options
|
|
14843
|
+
});
|
|
14572
14844
|
const abortController = new AbortController();
|
|
14573
14845
|
const previousAbortSignal = evaluateOptions.abortSignal;
|
|
14574
14846
|
evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
|
|
@@ -14679,7 +14951,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14679
14951
|
const paths = (Array.isArray(outputPath) ? outputPath : [outputPath]).filter((p) => typeof p === "string" && p.length > 0 && !p.endsWith(".jsonl"));
|
|
14680
14952
|
const isRedteam = Boolean(config.redteam);
|
|
14681
14953
|
const duration = Math.round((Date.now() - startTime) / 1e3);
|
|
14682
|
-
const tracker =
|
|
14954
|
+
const tracker = require_shared.TokenUsageTracker.getInstance();
|
|
14683
14955
|
const targetErrorStatus = await evalRecord.findTargetErrorStatus();
|
|
14684
14956
|
const summaryLines = generateEvalSummary({
|
|
14685
14957
|
evalId: evalRecord.id,
|
|
@@ -14709,7 +14981,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
14709
14981
|
} else for (const line of summaryLines) require_logger.logger.info(line);
|
|
14710
14982
|
let shareableUrl = null;
|
|
14711
14983
|
if (sharePromise != null) {
|
|
14712
|
-
const orgContext = await
|
|
14984
|
+
const orgContext = await require_storage.getOrgContext();
|
|
14713
14985
|
const orgSuffix = orgContext ? ` to ${orgContext.organizationName}${orgContext.teamName ? ` > ${orgContext.teamName}` : ""}` : "";
|
|
14714
14986
|
if (process.stdout.isTTY && !require_logger.isCI()) {
|
|
14715
14987
|
const spinner = (0, ora.default)({
|
|
@@ -14884,7 +15156,7 @@ async function doRedteamRun(options) {
|
|
|
14884
15156
|
redteamPath = path.join(configDir, "redteam.yaml");
|
|
14885
15157
|
}
|
|
14886
15158
|
try {
|
|
14887
|
-
const healthUrl =
|
|
15159
|
+
const healthUrl = require_remoteGeneration.getRemoteHealthUrl();
|
|
14888
15160
|
if (healthUrl) {
|
|
14889
15161
|
require_logger.logger.debug(`Checking Promptfoo API health at ${healthUrl}...`);
|
|
14890
15162
|
const healthResult = await checkRemoteHealth(healthUrl);
|
|
@@ -14970,6 +15242,12 @@ async function doRedteamRun(options) {
|
|
|
14970
15242
|
return evalResult;
|
|
14971
15243
|
}
|
|
14972
15244
|
//#endregion
|
|
15245
|
+
//#region src/types/transform.ts
|
|
15246
|
+
/** Runtime type guard for `TransformFunction` values. */
|
|
15247
|
+
function isTransformFunction(value) {
|
|
15248
|
+
return typeof value === "function";
|
|
15249
|
+
}
|
|
15250
|
+
//#endregion
|
|
14973
15251
|
//#region src/index.ts
|
|
14974
15252
|
/**
|
|
14975
15253
|
* Shallow-clone a test case so the caller can swap in resolved ApiProvider
|
|
@@ -14991,66 +15269,148 @@ function cloneTestForResolve(test) {
|
|
|
14991
15269
|
if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
|
|
14992
15270
|
return cloned;
|
|
14993
15271
|
}
|
|
15272
|
+
function toSerializableProviderRef(provider) {
|
|
15273
|
+
if (require_types.isApiProvider(provider)) return require_evalResult.sanitizeProvider(provider);
|
|
15274
|
+
if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
|
|
15275
|
+
return provider;
|
|
15276
|
+
}
|
|
15277
|
+
function isRecord(value) {
|
|
15278
|
+
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
15279
|
+
}
|
|
15280
|
+
function withSerializableProvider(record) {
|
|
15281
|
+
if (!require_types.isApiProvider(record.provider)) return record;
|
|
15282
|
+
return {
|
|
15283
|
+
...record,
|
|
15284
|
+
provider: require_evalResult.sanitizeProvider(record.provider)
|
|
15285
|
+
};
|
|
15286
|
+
}
|
|
15287
|
+
/**
|
|
15288
|
+
* Function-valued transforms are first-class at runtime but are silently dropped
|
|
15289
|
+
* by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
|
|
15290
|
+
* a function reference, so replace every `transform`-like field with a
|
|
15291
|
+
* `[inline function]: name` marker. Non-function values pass through unchanged.
|
|
15292
|
+
*
|
|
15293
|
+
* `droppedRef.value` is flipped to `true` the first time a function is replaced
|
|
15294
|
+
* so the caller can emit a single warning instead of logging per field.
|
|
15295
|
+
*/
|
|
15296
|
+
function replaceFunctionTransforms(record, droppedRef) {
|
|
15297
|
+
let result;
|
|
15298
|
+
for (const key of require_transform$1.TRANSFORM_KEYS) {
|
|
15299
|
+
const value = record[key];
|
|
15300
|
+
if (!isTransformFunction(value)) continue;
|
|
15301
|
+
if (!result) result = { ...record };
|
|
15302
|
+
result[key] = value.name ? `${require_transform$1.INLINE_FUNCTION_LABEL}: ${value.name}` : require_transform$1.INLINE_FUNCTION_LABEL;
|
|
15303
|
+
droppedRef.value = true;
|
|
15304
|
+
}
|
|
15305
|
+
return result ?? record;
|
|
15306
|
+
}
|
|
15307
|
+
function toSerializableAssertion(assertion, droppedRef) {
|
|
15308
|
+
if (!isRecord(assertion)) return assertion;
|
|
15309
|
+
let sanitizedAssertion = withSerializableProvider(assertion);
|
|
15310
|
+
sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
|
|
15311
|
+
if (Array.isArray(assertion.assert)) sanitizedAssertion = {
|
|
15312
|
+
...sanitizedAssertion,
|
|
15313
|
+
assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
15314
|
+
};
|
|
15315
|
+
return sanitizedAssertion;
|
|
15316
|
+
}
|
|
15317
|
+
function toSerializableTestCase(test, droppedRef) {
|
|
15318
|
+
if (!isRecord(test)) return test;
|
|
15319
|
+
let sanitizedTest = withSerializableProvider(test);
|
|
15320
|
+
if (isRecord(test.options)) {
|
|
15321
|
+
let options = withSerializableProvider(test.options);
|
|
15322
|
+
options = replaceFunctionTransforms(options, droppedRef);
|
|
15323
|
+
if (options !== test.options) sanitizedTest = {
|
|
15324
|
+
...sanitizedTest,
|
|
15325
|
+
options
|
|
15326
|
+
};
|
|
15327
|
+
}
|
|
15328
|
+
if (Array.isArray(test.assert)) sanitizedTest = {
|
|
15329
|
+
...sanitizedTest,
|
|
15330
|
+
assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
|
|
15331
|
+
};
|
|
15332
|
+
return sanitizedTest;
|
|
15333
|
+
}
|
|
15334
|
+
function toSerializableScenario(scenario, droppedRef) {
|
|
15335
|
+
if (!isRecord(scenario)) return scenario;
|
|
15336
|
+
if (!Array.isArray(scenario.tests)) return scenario;
|
|
15337
|
+
return {
|
|
15338
|
+
...scenario,
|
|
15339
|
+
tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
|
|
15340
|
+
};
|
|
15341
|
+
}
|
|
15342
|
+
function createSerializableUnifiedConfig(testSuite, prompts) {
|
|
15343
|
+
const droppedRef = { value: false };
|
|
15344
|
+
const config = {
|
|
15345
|
+
...testSuite,
|
|
15346
|
+
providers: toSerializableProviderRef(testSuite.providers),
|
|
15347
|
+
defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
|
|
15348
|
+
tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
|
|
15349
|
+
scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
|
|
15350
|
+
prompts
|
|
15351
|
+
};
|
|
15352
|
+
if (droppedRef.value && testSuite.writeLatestResults) require_logger.logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
|
|
15353
|
+
return config;
|
|
15354
|
+
}
|
|
14994
15355
|
async function evaluate(testSuite, options = {}) {
|
|
14995
|
-
|
|
14996
|
-
|
|
15356
|
+
const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
|
|
15357
|
+
if (testSuiteConfig.writeLatestResults) await runDbMigrations();
|
|
15358
|
+
const loadedProviders = await require_providers.loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
|
|
14997
15359
|
const providerMap = {};
|
|
14998
15360
|
for (const p of loadedProviders) {
|
|
14999
15361
|
providerMap[p.id()] = p;
|
|
15000
15362
|
if (p.label) providerMap[p.label] = p;
|
|
15001
15363
|
}
|
|
15002
|
-
let resolvedDefaultTest =
|
|
15003
|
-
if (typeof
|
|
15364
|
+
let resolvedDefaultTest = testSuiteConfig.defaultTest;
|
|
15365
|
+
if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await require_util.maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
|
|
15004
15366
|
const constructedTestSuite = {
|
|
15005
|
-
...
|
|
15367
|
+
...testSuiteConfig,
|
|
15006
15368
|
defaultTest: resolvedDefaultTest,
|
|
15007
|
-
scenarios:
|
|
15369
|
+
scenarios: testSuiteConfig.scenarios,
|
|
15008
15370
|
providers: loadedProviders,
|
|
15009
|
-
tests: await readTests(
|
|
15010
|
-
nunjucksFilters: await require_util.readFilters(
|
|
15011
|
-
prompts: await require_graders.processPrompts(
|
|
15371
|
+
tests: await readTests(testSuiteConfig.tests),
|
|
15372
|
+
nunjucksFilters: await require_util.readFilters(testSuiteConfig.nunjucksFilters || {}),
|
|
15373
|
+
prompts: await require_graders.processPrompts(testSuiteConfig.prompts)
|
|
15012
15374
|
};
|
|
15013
15375
|
if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
|
|
15014
15376
|
constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
|
|
15015
15377
|
if (constructedTestSuite.defaultTest.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
|
|
15016
|
-
env:
|
|
15378
|
+
env: testSuiteConfig.env,
|
|
15017
15379
|
basePath: require_logger.state.basePath
|
|
15018
15380
|
});
|
|
15019
15381
|
if (constructedTestSuite.defaultTest.options?.provider && !require_types.isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await require_providers.resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
|
|
15020
|
-
env:
|
|
15382
|
+
env: testSuiteConfig.env,
|
|
15021
15383
|
basePath: require_logger.state.basePath
|
|
15022
15384
|
});
|
|
15023
15385
|
}
|
|
15024
15386
|
constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
|
|
15025
15387
|
for (const test of constructedTestSuite.tests) {
|
|
15026
15388
|
if (test.options?.provider && !require_types.isApiProvider(test.options.provider)) test.options.provider = await require_providers.resolveProvider(test.options.provider, providerMap, {
|
|
15027
|
-
env:
|
|
15389
|
+
env: testSuiteConfig.env,
|
|
15028
15390
|
basePath: require_logger.state.basePath
|
|
15029
15391
|
});
|
|
15030
15392
|
for (const assertion of test.assert || []) {
|
|
15031
15393
|
if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
|
|
15032
15394
|
if (assertion.provider && !require_types.isApiProvider(assertion.provider)) assertion.provider = await require_providers.resolveProvider(assertion.provider, providerMap, {
|
|
15033
|
-
env:
|
|
15395
|
+
env: testSuiteConfig.env,
|
|
15034
15396
|
basePath: require_logger.state.basePath
|
|
15035
15397
|
});
|
|
15036
15398
|
}
|
|
15037
15399
|
}
|
|
15038
15400
|
if (options.cache === false) require_cache.disableCache();
|
|
15039
|
-
const parsedProviderPromptMap = require_graders.readProviderPromptMap(
|
|
15040
|
-
const unifiedConfig =
|
|
15041
|
-
|
|
15042
|
-
|
|
15043
|
-
};
|
|
15044
|
-
const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
|
|
15401
|
+
const parsedProviderPromptMap = require_graders.readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
|
|
15402
|
+
const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
|
|
15403
|
+
const author = require_accounts.getAuthor(suiteAuthor);
|
|
15404
|
+
const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
|
|
15045
15405
|
const ret = await evaluate$1({
|
|
15046
15406
|
...constructedTestSuite,
|
|
15047
15407
|
providerPromptMap: parsedProviderPromptMap
|
|
15048
15408
|
}, evalRecord, {
|
|
15049
15409
|
eventSource: "library",
|
|
15050
|
-
isRedteam: Boolean(
|
|
15410
|
+
isRedteam: Boolean(testSuiteConfig.redteam),
|
|
15051
15411
|
...options
|
|
15052
15412
|
});
|
|
15053
|
-
if (
|
|
15413
|
+
if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
|
|
15054
15414
|
const shareableUrl = await createShareableUrl(ret, { silent: true });
|
|
15055
15415
|
if (shareableUrl) {
|
|
15056
15416
|
ret.shareableUrl = shareableUrl;
|
|
@@ -15061,9 +15421,9 @@ async function evaluate(testSuite, options = {}) {
|
|
|
15061
15421
|
require_logger.logger.warn(`Failed to create shareable URL: ${error}`);
|
|
15062
15422
|
}
|
|
15063
15423
|
else require_logger.logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
|
|
15064
|
-
if (
|
|
15065
|
-
if (typeof
|
|
15066
|
-
else if (Array.isArray(
|
|
15424
|
+
if (testSuiteConfig.outputPath) {
|
|
15425
|
+
if (typeof testSuiteConfig.outputPath === "string") await require_util.writeOutput(testSuiteConfig.outputPath, evalRecord, null);
|
|
15426
|
+
else if (Array.isArray(testSuiteConfig.outputPath)) await require_util.writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
|
|
15067
15427
|
}
|
|
15068
15428
|
return ret;
|
|
15069
15429
|
}
|
|
@@ -15075,7 +15435,7 @@ const redteam = {
|
|
|
15075
15435
|
},
|
|
15076
15436
|
Graders: require_graders.GRADERS,
|
|
15077
15437
|
Plugins,
|
|
15078
|
-
Strategies:
|
|
15438
|
+
Strategies: require_strategies.Strategies,
|
|
15079
15439
|
Base: {
|
|
15080
15440
|
Plugin: require_graders.RedteamPluginBase,
|
|
15081
15441
|
Grader: require_graders.RedteamGraderBase
|
|
@@ -15104,9 +15464,18 @@ exports.CompletedPromptSchema = require_types.CompletedPromptSchema;
|
|
|
15104
15464
|
exports.CompletionTokenDetailsSchema = require_types.CompletionTokenDetailsSchema;
|
|
15105
15465
|
exports.ConversationMessageSchema = require_types.ConversationMessageSchema;
|
|
15106
15466
|
exports.DerivedMetricSchema = require_types.DerivedMetricSchema;
|
|
15467
|
+
exports.DocumentMediaInjectionPlacementSchema = require_types.DocumentMediaInjectionPlacementSchema;
|
|
15468
|
+
exports.DocumentMediaInjectionPlacementValues = require_types.DocumentMediaInjectionPlacementValues;
|
|
15469
|
+
exports.DocxInjectionPlacementSchema = require_types.DocxInjectionPlacementSchema;
|
|
15470
|
+
exports.DocxInjectionPlacementValues = require_types.DocxInjectionPlacementValues;
|
|
15107
15471
|
exports.EvalResultsFilterMode = require_types.EvalResultsFilterMode;
|
|
15108
15472
|
exports.EvaluateOptionsSchema = require_types.EvaluateOptionsSchema;
|
|
15109
15473
|
exports.GradingConfigSchema = require_types.GradingConfigSchema;
|
|
15474
|
+
exports.InputConfigSchema = require_types.InputConfigSchema;
|
|
15475
|
+
exports.InputDefinitionObjectSchema = require_types.InputDefinitionObjectSchema;
|
|
15476
|
+
exports.InputDefinitionSchema = require_types.InputDefinitionSchema;
|
|
15477
|
+
exports.InputTypeSchema = require_types.InputTypeSchema;
|
|
15478
|
+
exports.InputTypeValues = require_types.InputTypeValues;
|
|
15110
15479
|
exports.InputsSchema = require_types.InputsSchema;
|
|
15111
15480
|
exports.NotPrefixedAssertionTypesSchema = require_types.NotPrefixedAssertionTypesSchema;
|
|
15112
15481
|
exports.OutputConfigSchema = require_types.OutputConfigSchema;
|
|
@@ -15129,6 +15498,7 @@ exports.TestSuiteSchema = require_types.TestSuiteSchema;
|
|
|
15129
15498
|
exports.UnifiedConfigSchema = require_types.UnifiedConfigSchema;
|
|
15130
15499
|
exports.VarsSchema = require_types.VarsSchema;
|
|
15131
15500
|
exports.assertions = assertions_default;
|
|
15501
|
+
exports.buildInputPromptDescription = require_types.buildInputPromptDescription;
|
|
15132
15502
|
Object.defineProperty(exports, "cache", {
|
|
15133
15503
|
enumerable: true,
|
|
15134
15504
|
get: function() {
|
|
@@ -15138,12 +15508,17 @@ Object.defineProperty(exports, "cache", {
|
|
|
15138
15508
|
exports.default = src_default;
|
|
15139
15509
|
exports.evaluate = evaluate;
|
|
15140
15510
|
exports.generateTable = generateTable;
|
|
15511
|
+
exports.getInputDescription = require_types.getInputDescription;
|
|
15512
|
+
exports.getInputType = require_types.getInputType;
|
|
15141
15513
|
exports.guardrails = guardrails;
|
|
15142
15514
|
exports.isApiProvider = require_types.isApiProvider;
|
|
15143
15515
|
exports.isGradingResult = require_types.isGradingResult;
|
|
15144
15516
|
exports.isProviderOptions = require_types.isProviderOptions;
|
|
15145
15517
|
exports.isResultFailureReason = require_types.isResultFailureReason;
|
|
15518
|
+
exports.isTransformFunction = isTransformFunction;
|
|
15146
15519
|
exports.loadApiProvider = require_providers.loadApiProvider;
|
|
15520
|
+
exports.normalizeInputDefinition = require_types.normalizeInputDefinition;
|
|
15521
|
+
exports.normalizeInputs = require_types.normalizeInputs;
|
|
15147
15522
|
exports.redteam = redteam;
|
|
15148
15523
|
|
|
15149
15524
|
//# sourceMappingURL=index.cjs.map
|