promptfoo 0.121.4 → 0.121.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-DQkFNqE9.js → ListApp-BRUsT43Y.js} +1 -1
- package/dist/src/{accounts-Dy17bs4D.cjs → accounts-BIFntVWB.cjs} +4 -4
- package/dist/src/{accounts-F9d_5sMC.js → accounts-CLJHCDDb.js} +6 -6
- package/dist/src/{accounts-DhMYUUbu.js → accounts-CaLNYnf7.js} +4 -4
- package/dist/src/{accounts-DdJ2pHMI.js → accounts-bnyHT7Ju.js} +5 -5
- package/dist/src/{agentic-utils-w68v6_Dz.js → agentic-utils-B5krlibj.js} +3 -3
- package/dist/src/{agentic-utils-P172hM8B.js → agentic-utils-Ba67xmgs.js} +2 -2
- package/dist/src/{agentic-utils-qFlm6zes.js → agentic-utils-BclbiXiq.js} +3 -3
- package/dist/src/{agentic-utils-BpX5b23w.cjs → agentic-utils-D2x0wGhB.cjs} +2 -2
- package/dist/src/{agents-CgaMXvLM.js → agents-BGqaTDnr.js} +5 -5
- package/dist/src/{agents-8FDnTriG.js → agents-BV9yFpXX.js} +5 -5
- package/dist/src/{agents-aYPQLf8W.js → agents-BYdMl1UE.js} +4 -4
- package/dist/src/{agents-pQeBEXMm.js → agents-DhxWMCtH.js} +5 -5
- package/dist/src/{agents-D7-HGxUj.cjs → agents-DiWmQYH9.cjs} +4 -4
- package/dist/src/{agents-BahDpe5G.cjs → agents-WULPVjbH.cjs} +4 -4
- package/dist/src/{agents-DJ35I3Nt.js → agents-emVcx3yh.js} +5 -5
- package/dist/src/{agents-C-R_jfzI.js → agents-n6vPqV3i.js} +4 -4
- package/dist/src/{aimlapi-BCq3MHeL.js → aimlapi-BxqK9HF_.js} +7 -7
- package/dist/src/{aimlapi-qcK4OT55.cjs → aimlapi-BzLjZI_m.cjs} +6 -6
- package/dist/src/{aimlapi-BD6J9oKt.js → aimlapi-DR4pgeiC.js} +6 -6
- package/dist/src/{aimlapi-sgYnkE54.js → aimlapi-uPGp0Zdo.js} +7 -7
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-vjzrbgce.js +1 -0
- package/dist/src/app/assets/index-B3NQ8HTd.js +385 -0
- package/dist/src/app/assets/{index-BXGkeMwh.css → index-Cli2yAXv.css} +1 -1
- package/dist/src/app/index.html +27 -2
- package/dist/src/{audio-DcVKoInv.js → audio-BvpTOArF.js} +4 -4
- package/dist/src/{audio-BQtNuYBj.cjs → audio-C0vDeS0j.cjs} +3 -3
- package/dist/src/{audio-B7izf48x.js → audio-CScmnmEB.js} +4 -4
- package/dist/src/{audio-COrn8rM6.js → audio-Da8U9IS5.js} +3 -3
- package/dist/src/{base-fZ9wgg50.js → base-BOMaNEes.js} +3 -3
- package/dist/src/{base-PYJvBE1i.js → base-BTux96b1.js} +2 -2
- package/dist/src/{base-D-670DX8.cjs → base-Tw6uhH8K.cjs} +2 -2
- package/dist/src/{base-yrI1Yal4.js → base-dYsl2hmL.js} +3 -3
- package/dist/src/{blobs-D2FAd1Q5.cjs → blobs-B95F_7vE.cjs} +2 -2
- package/dist/src/{blobs-C-F78Kfn.js → blobs-BW4U31ue.js} +2 -2
- package/dist/src/{blobs-BCZavS8s.js → blobs-D_gg8nbm.js} +3 -3
- package/dist/src/{blobs-BQWqnnvL.js → blobs-DjLby-uP.js} +3 -3
- package/dist/src/{cache-mb7c8hbp.js → cache-BI5BY7ey.js} +4 -4
- package/dist/src/{cache-DbLsVWB2.cjs → cache-BRkhlH3k.cjs} +1 -1
- package/dist/src/cache-BlC6aeJ0.js +3 -0
- package/dist/src/{cache-D5NZmMiT.js → cache-Bzttsk0X.js} +2 -2
- package/dist/src/{cache-C4Xb-hNb.js → cache-Cr-qWIbP.js} +3 -3
- package/dist/src/{cache-BIyPcp5v.cjs → cache-DGg-yTZG.cjs} +2 -2
- package/dist/src/{chat-Dr3DUQ0D.js → chat-BLOdH60v.js} +12 -12
- package/dist/src/{chat-BfPaS15_.js → chat-Cx_LkwvZ.js} +12 -12
- package/dist/src/{chat-mW0ORo8G.js → chat-D9nudO9b.js} +4 -4
- package/dist/src/{chat-I9izLm49.js → chat-DChSH_Es.js} +12 -12
- package/dist/src/{chat-MKxMnZJZ.js → chat-DG2LkwLq.js} +2 -2
- package/dist/src/{chat-BPXSW8Bv.cjs → chat-DH97tVV9.cjs} +2 -2
- package/dist/src/{chat-0bwXjVP0.js → chat-aMQZw6R7.js} +4 -4
- package/dist/src/{chat-CclRbxGf.cjs → chat-vYqqv1gP.cjs} +11 -11
- package/dist/src/{chatkit-zUIVoDos.js → chatkit-B8X34dQc.js} +4 -4
- package/dist/src/{chatkit-Cv6AhukM.js → chatkit-BXu42Qwt.js} +3 -3
- package/dist/src/{chatkit-CJnHRRMM.js → chatkit-CbMRoeYw.js} +4 -4
- package/dist/src/{chatkit-BoWoSgXl.cjs → chatkit-D44VyUyB.cjs} +3 -3
- package/dist/src/{claude-agent-sdk-CPJo3dBQ.cjs → claude-agent-sdk-BRq0bbIK.cjs} +8 -8
- package/dist/src/{claude-agent-sdk-BQNuLaAK.js → claude-agent-sdk-BjriSVRZ.js} +7 -7
- package/dist/src/{claude-agent-sdk-Dtq_L-Sc.js → claude-agent-sdk-BzNZeZ0N.js} +7 -7
- package/dist/src/{claude-agent-sdk-nfAIcxNf.js → claude-agent-sdk-DYv_AJ8u.js} +7 -7
- package/dist/src/cloud-CoD5OacT.js +3 -0
- package/dist/src/{cloud-DQZ5sVjW.js → cloud-Da0bofJd.js} +3 -3
- package/dist/src/{cloudflare-ai-BIB567w6.js → cloudflare-ai-CXC4b1EU.js} +4 -4
- package/dist/src/{cloudflare-ai-DlKr0rY7.js → cloudflare-ai-CyBoIs1Q.js} +6 -6
- package/dist/src/{cloudflare-ai-DGLte7Py.js → cloudflare-ai-DGOwgexC.js} +6 -6
- package/dist/src/{cloudflare-ai-Dl3N9OVD.cjs → cloudflare-ai-DJv5qnyb.cjs} +4 -4
- package/dist/src/{cloudflare-gateway-BDZrYydE.js → cloudflare-gateway-1sAoOyft.js} +5 -5
- package/dist/src/{cloudflare-gateway-CiIZHU0Q.js → cloudflare-gateway-D-dnkzCF.js} +5 -5
- package/dist/src/{cloudflare-gateway-BYDp495F.cjs → cloudflare-gateway-DKVjkDav.cjs} +3 -3
- package/dist/src/{cloudflare-gateway-DI1HNP5F.js → cloudflare-gateway-TJkVrZlB.js} +3 -3
- package/dist/src/codex-app-server-CCLjqCh9.js +1915 -0
- package/dist/src/codex-app-server-CCe0TiDc.js +1915 -0
- package/dist/src/codex-app-server-CPW1LFwh.js +1916 -0
- package/dist/src/codex-app-server-VMRnjZ68.cjs +1920 -0
- package/dist/src/codex-sdk-1jm_qPHf.js +3 -0
- package/dist/src/{codex-sdk-C2_M2pl_.cjs → codex-sdk-Bd8UbO9q.cjs} +5 -5
- package/dist/src/{codex-sdk-CpqiOqDO.js → codex-sdk-BgEFQ70r.js} +6 -6
- package/dist/src/{codex-sdk-Rtky3M4I.js → codex-sdk-Bzb_TqX9.js} +6 -6
- package/dist/src/{codex-sdk-CWEnH70W.cjs → codex-sdk-Danroptg.cjs} +1 -1
- package/dist/src/{codex-sdk-CErXn7qh.js → codex-sdk-DfvDTN33.js} +5 -5
- package/dist/src/{cometapi-CtJ-mS8R.js → cometapi-B5ImDlSm.js} +8 -8
- package/dist/src/{cometapi-UVOryo4W.cjs → cometapi-BgAkuYCw.cjs} +7 -7
- package/dist/src/{cometapi-BUlt_ELa.js → cometapi-CC7hWxmX.js} +8 -8
- package/dist/src/{cometapi-DT-jlVCB.js → cometapi-CCbpHkuF.js} +7 -7
- package/dist/src/{completion-x0a_c2y1.js → completion-2iuYVxwi.js} +6 -6
- package/dist/src/{completion-Dnxn7E-j.js → completion-CrD6MQ93.js} +5 -5
- package/dist/src/{completion-BozdoXba.cjs → completion-DtQ72Bm3.cjs} +5 -5
- package/dist/src/{completion-HUe8wDhZ.js → completion-Vq_ad618.js} +6 -6
- package/dist/src/{createHash-ChI45QR1.js → createHash-DPpsZgFF.js} +1 -1
- package/dist/src/{createHash-CwDVU5xr.js → createHash-Un4Q_huE.js} +1 -1
- package/dist/src/{createHash-B7KvgoOD.cjs → createHash-VvBIc-AW.cjs} +1 -1
- package/dist/src/{docker-DCgsveLD.js → docker--3qzPa-6.js} +6 -6
- package/dist/src/{docker-DS4_Osau.cjs → docker-D3AY-5F5.cjs} +5 -5
- package/dist/src/{docker-CQmlA2NU.js → docker-DCsCDvwM.js} +6 -6
- package/dist/src/{docker-ClnmCf1Z.js → docker-Dorv4_Dg.js} +5 -5
- package/dist/src/{embedding-I45KG3o7.cjs → embedding-BXhN5lCH.cjs} +5 -5
- package/dist/src/{embedding-nFbumxcv.js → embedding-ChS1ivFS.js} +5 -5
- package/dist/src/{embedding-D3xTseo7.js → embedding-DNRvZwRN.js} +6 -6
- package/dist/src/{embedding-DD9wa3ae.js → embedding-D_bI4NDq.js} +6 -6
- package/dist/src/{errors-Cw810C93.js → errors-DFHe4L-n.js} +1 -1
- package/dist/src/{esm-Dh4dOLlt.js → esm-B6whoAcf.js} +2 -2
- package/dist/src/{esm-C7PnfdF8.js → esm-BRkfNsYs.js} +1 -1
- package/dist/src/{esm-tVgYPY-f.js → esm-BX8fwlAO.js} +2 -2
- package/dist/src/{esm-CtEPLdAj.cjs → esm-B_rGuPTo.cjs} +1 -1
- package/dist/src/{eval-CzJFfFO9.js → eval-BQPLBJbw.js} +1 -1
- package/dist/src/{eval-u4UVafl6.js → eval-DJ_4A-tr.js} +14 -14
- package/dist/src/evalResult-BBJAHAtw.cjs +2 -0
- package/dist/src/evalResult-BBK58h2B.js +3 -0
- package/dist/src/{evalResult-KZqXl4XP.cjs → evalResult-Cx-8OWkb.cjs} +28 -10
- package/dist/src/{evalResult-D3hVYFis.js → evalResult-D6P5I5il.js} +29 -11
- package/dist/src/{evalResult-Bgm9ZH31.js → evalResult-pSvGWFMo.js} +29 -11
- package/dist/src/{evaluator-IvuDYSvQ.js → evaluator-D-UIbbYq.js} +845 -98
- package/dist/src/evaluator-DgLKaZk8.js +3 -0
- package/dist/src/{extractor-Dk6bRWkv.js → extractor-BM3jRERL.js} +5 -5
- package/dist/src/{extractor-WVPOrH43.cjs → extractor-Dxr2J_wK.cjs} +5 -5
- package/dist/src/{extractor-DNSeBVOJ.js → extractor-DxyiFhPk.js} +6 -6
- package/dist/src/{extractor-CAfTSraf.js → extractor-YlZbUMsL.js} +6 -6
- package/dist/src/fetch-8viavNv8.js +3 -0
- package/dist/src/{fetch-BEWnXrrG.js → fetch-B6ch2nU2.js} +9 -20
- package/dist/src/{fetch-Di00EQrc.js → fetch-D9xxyC1p.js} +221 -232
- package/dist/src/{fetch-CJU5ELPa.cjs → fetch-NuqXW1Xb.cjs} +221 -244
- package/dist/src/{fetch-B0Z3Oe4k.js → fetch-Y5qX_kST.js} +8 -19
- package/dist/src/{fileExtensions-BArZuxsI.js → fileExtensions-8CjoL7vB.js} +1 -1
- package/dist/src/{fileExtensions-DnqA1y9x.js → fileExtensions-BGh-W-HT.js} +1 -1
- package/dist/src/{fileExtensions-bYh77CN8.cjs → fileExtensions-D9h-8Wxg.cjs} +1 -1
- package/dist/src/{fileExtensions-AWa2ZML4.js → fileExtensions-DysCsxNG.js} +1 -1
- package/dist/src/{formatDuration-DZzPsexs.js → formatDuration-Ch4A7G3o.js} +1 -1
- package/dist/src/{genaiTracer-yRuxj9-L.cjs → genaiTracer-BokHC-MW.cjs} +1 -1
- package/dist/src/{genaiTracer-DWdZ28hY.js → genaiTracer-C3ZPQU60.js} +1 -1
- package/dist/src/{genaiTracer-XnrcgDCe.js → genaiTracer-CFny3gOy.js} +1 -1
- package/dist/src/{genaiTracer-COYDi-tC.js → genaiTracer-DxODqT9e.js} +1 -1
- package/dist/src/{graders-Zy3x0zqX.js → graders-BoUqsCEm.js} +1303 -2044
- package/dist/src/{graders--zknU_uk.cjs → graders-Bw1wk_21.cjs} +1553 -2240
- package/dist/src/graders-C84JI-m5.js +2 -0
- package/dist/src/graders-CBbd0K0Q.cjs +2 -0
- package/dist/src/graders-CbQqpHSN.js +3 -0
- package/dist/src/{graders-eIHhRqoC.js → graders-CgPn32yp.js} +1300 -2041
- package/dist/src/{graders-pvbReLLn.js → graders-CwrbifOo.js} +747 -1488
- package/dist/src/graders-DS42d3ZG.js +2 -0
- package/dist/src/{image-9302QVqR.js → image-BeWaInPF.js} +3 -3
- package/dist/src/{image-DVz2RiMF.js → image-BmilRNqO.js} +7 -7
- package/dist/src/{image-x6KqLQl4.cjs → image-CxJoa3aW.cjs} +6 -6
- package/dist/src/{image-De2FBmYV.cjs → image-D10dNAav.cjs} +3 -3
- package/dist/src/{image-dnoUgPrC.js → image-Dr_3I3nK.js} +4 -4
- package/dist/src/{image-B5Mv-Z3h.js → image-DsGRlkh7.js} +7 -7
- package/dist/src/{image-qUpPvmNZ.js → image-a_SGUobh.js} +6 -6
- package/dist/src/{image-u7-rKnYU.js → image-qjO6FWPs.js} +4 -4
- package/dist/src/index.cjs +1052 -296
- package/dist/src/index.d.cts +124 -13
- package/dist/src/index.d.ts +125 -14
- package/dist/src/index.js +1018 -262
- package/dist/src/{interactiveCheck-CLERUB0c.js → interactiveCheck-CCICw2cy.js} +2 -2
- package/dist/src/{invariant-BtWWVVhl.js → invariant-B2Rf6avk.js} +1 -1
- package/dist/src/{invariant-vgHWClmd.js → invariant-DIYf9sP1.js} +1 -1
- package/dist/src/{knowledgeBase-RhFPGWDc.js → knowledgeBase-BBETc5-S.js} +6 -6
- package/dist/src/{knowledgeBase-Bpoe_nLu.cjs → knowledgeBase-C8qOo26M.cjs} +5 -5
- package/dist/src/{knowledgeBase-lm9RXSAm.js → knowledgeBase-CzAi2rUI.js} +6 -6
- package/dist/src/{knowledgeBase-Dgc7CBWF.js → knowledgeBase-Dr3Kib7F.js} +5 -5
- package/dist/src/{litellm-C2kqjxqp.js → litellm-BLSiANhk.js} +5 -5
- package/dist/src/{litellm-CoyI4IAl.cjs → litellm-CaUmV7Mk.cjs} +4 -4
- package/dist/src/{litellm-p37R1dzQ.js → litellm-DQGo_juI.js} +4 -4
- package/dist/src/{litellm-DRjpcSa7.js → litellm-DRc4qWfc.js} +5 -5
- package/dist/src/{logger-DksKw1Qc.js → logger-BbY6ypFL.js} +2 -2
- package/dist/src/{logger-B88EkIn6.js → logger-KD8JjCRJ.js} +2 -2
- package/dist/src/{luma-ray-KgTCXrZC.js → luma-ray-B-tNZzqW.js} +6 -6
- package/dist/src/{luma-ray-B863CmuZ.js → luma-ray-CtS3OlGq.js} +5 -5
- package/dist/src/{luma-ray-BTTLtqQ8.js → luma-ray-PJJgUjOc.js} +6 -6
- package/dist/src/{luma-ray-BxVKaW2a.cjs → luma-ray-if-Ml4R9.cjs} +5 -5
- package/dist/src/main.js +242 -198
- package/dist/src/{messages-zWbkLLHz.js → messages-B9dSjrNf.js} +264 -16
- package/dist/src/{messages-811uVVW5.cjs → messages-BnsVHUnm.cjs} +266 -15
- package/dist/src/{messages-MYTQ2TWp.js → messages-CI69Lasb.js} +264 -16
- package/dist/src/{messages-BTQz42fn.js → messages-CewuNcNS.js} +264 -16
- package/dist/src/{meteor-Co1VQ1u5.cjs → meteor-BBGcGeCa.cjs} +1 -1
- package/dist/src/{meteor-DuAFv6gF.js → meteor-BKTM-7KS.js} +1 -1
- package/dist/src/{meteor-DHdzY1Ss.js → meteor-CeGo0Lu2.js} +2 -2
- package/dist/src/{meteor-CU5UAE-H.js → meteor-Wc_aUVvu.js} +2 -2
- package/dist/src/{modelslab-wu9yi5GE.js → modelslab-BCLOtfek.js} +7 -7
- package/dist/src/{modelslab-Dk1JAtVo.cjs → modelslab-BkapYJhh.cjs} +6 -6
- package/dist/src/{modelslab-DIq-6y7x.js → modelslab-D73OnKSx.js} +6 -6
- package/dist/src/{modelslab-D0erNWKe.js → modelslab-zpz9JcK0.js} +7 -7
- package/dist/src/{nova-reel-CCFRfeRb.js → nova-reel-B8F_TK5w.js} +6 -6
- package/dist/src/{nova-reel-DQrm74ng.js → nova-reel-Bx0NFV2f.js} +5 -5
- package/dist/src/{nova-reel-gr11WG7f.js → nova-reel-CNGJTLtG.js} +6 -6
- package/dist/src/{nova-reel-CrLXVKQf.cjs → nova-reel-DkT7tnoB.cjs} +5 -5
- package/dist/src/{nova-sonic-BYdp-QLs.js → nova-sonic-BaXRN1cr.js} +4 -4
- package/dist/src/{nova-sonic-TDgrlTk7.js → nova-sonic-BeTRaFOh.js} +4 -4
- package/dist/src/{nova-sonic-B_ZXcUJB.js → nova-sonic-CL7Zqv0G.js} +3 -3
- package/dist/src/{nova-sonic-i5tUvXKn.cjs → nova-sonic-YT426juD.cjs} +3 -3
- package/dist/src/{openai-DhVEmgeZ.js → openai-BMHD2Huo.js} +2 -2
- package/dist/src/{openai-Qsvz25mV.js → openai-BT-JvDse.js} +2 -2
- package/dist/src/{openai-URNyItar.cjs → openai-Cy1XLs0c.cjs} +1 -1
- package/dist/src/{openai-iYtrXzOX.js → openai-D4fxGvRx.js} +1 -1
- package/dist/src/{openclaw-CwzlQSQX.js → openclaw-Bq7RVR3k.js} +7 -6
- package/dist/src/{openclaw-CLWrW03k.js → openclaw-DA8U4DsD.js} +8 -7
- package/dist/src/{openclaw-CnQ363Wi.js → openclaw-DObVgpjC.js} +8 -7
- package/dist/src/{openclaw-wX9rtfke.cjs → openclaw-DUBZP3GL.cjs} +8 -7
- package/dist/src/{opencode-sdk-BUu5Nevv.js → opencode-sdk-BB40Wir1.js} +4 -4
- package/dist/src/{opencode-sdk-GI2KaAXq.js → opencode-sdk-BM1UAIv1.js} +3 -3
- package/dist/src/{opencode-sdk-BZ2idgYA.cjs → opencode-sdk-CeqiOcOU.cjs} +4 -4
- package/dist/src/{opencode-sdk-BxD8vXp_.js → opencode-sdk-ChdK7F7z.js} +4 -4
- package/dist/src/{otlpReceiver-DmVulbhC.js → otlpReceiver-C6thJRXi.js} +4 -4
- package/dist/src/{otlpReceiver-B2z58l4e.js → otlpReceiver-CcdIikOu.js} +3 -3
- package/dist/src/{otlpReceiver-BfcVq2Nq.cjs → otlpReceiver-DNSQj6bf.cjs} +3 -3
- package/dist/src/{otlpReceiver-BntK801g.js → otlpReceiver-UYMQx3sy.js} +4 -4
- package/dist/src/{providerRegistry-CPQ_CmVO.js → providerRegistry-1gB5vtzQ.js} +2 -2
- package/dist/src/{providerRegistry-CQMdTmHP.cjs → providerRegistry-BESeALrr.cjs} +1 -1
- package/dist/src/{providerRegistry-Bvh8mv85.js → providerRegistry-DoACwqhD.js} +1 -1
- package/dist/src/{providerRegistry-CWoPjKFZ.js → providerRegistry-PMsleEzs.js} +2 -2
- package/dist/src/{providers-Bp4S-FvO.js → providers-BuyzKt7C.js} +1 -1
- package/dist/src/{providers-DV3ax9e_.cjs → providers-C7lNVBjX.cjs} +1 -1
- package/dist/src/{providers-u9Enmfok.js → providers-CCE2COJi2.js} +1 -1
- package/dist/src/{providers-DruaQfwu.js → providers-CJh7iriU.js} +18103 -17952
- package/dist/src/{providers-iUt5fbAN.js → providers-Ctcc592x.js} +1 -1
- package/dist/src/{providers-Domz_llv.js → providers-DRrerKra.js} +432 -281
- package/dist/src/{providers-BV_KMZje.js → providers-DT-GtF2t.js} +19094 -18943
- package/dist/src/{providers-1eKkXBKp.cjs → providers-eDShy16E.cjs} +17946 -17795
- package/dist/src/{pythonUtils-Cldx7huE.js → pythonUtils-C4tltmIn.js} +3 -3
- package/dist/src/{pythonUtils-tAJvvpS-.cjs → pythonUtils-CoLaCwNY.cjs} +3 -3
- package/dist/src/{pythonUtils-C2UQ30Rz.js → pythonUtils-DMO68Jg7.js} +3 -3
- package/dist/src/{pythonUtils-CnndUbW-.js → pythonUtils-DNqbnRdx.js} +3 -3
- package/dist/src/{quiverai-DR0SnIQV.js → quiverai-BSS9a7wV.js} +3 -3
- package/dist/src/{quiverai-CtWi6x_g.js → quiverai-Bk1KrvL6.js} +4 -4
- package/dist/src/{quiverai-DFotyafY.cjs → quiverai-Bpx6MZ7T.cjs} +3 -3
- package/dist/src/{quiverai-aPPvXOgn.js → quiverai-CPKhWgaT.js} +4 -4
- package/dist/src/{render-DHIZ6_k8.js → render-7uNJ2V14.js} +2 -2
- package/dist/src/{render-CH-62LbA.js → render-DlscvAUJ.js} +1 -1
- package/dist/src/{render-CMEpfLaO.js → render-eui5p5mL.js} +2 -2
- package/dist/src/{render-CgVDrJmM.js → render-nj-UaPdn.js} +2 -2
- package/dist/src/{render-DfQSFxGE.cjs → render-tG6ir9_g.cjs} +1 -1
- package/dist/src/{responses--OsX2aYW.js → responses-1ztiVYsx.js} +49 -15
- package/dist/src/{responses-DL9m8CyY.js → responses-B8haB-mD.js} +49 -15
- package/dist/src/{responses-C-flexAY.js → responses-BiaBguAu.js} +49 -15
- package/dist/src/{responses-Bi9vBuW_.cjs → responses-CF-ayauu.cjs} +48 -14
- package/dist/src/rubyUtils-4hjGxvju.js +3 -0
- package/dist/src/{rubyUtils-DVLeA2jg.js → rubyUtils-BI0p46eZ.js} +3 -3
- package/dist/src/{rubyUtils-DsGrTx8R.js → rubyUtils-CIQFnVz4.js} +3 -3
- package/dist/src/rubyUtils-CO-tuszQ.cjs +2 -0
- package/dist/src/{rubyUtils-CYSQEG4a.js → rubyUtils-DGnoCYL2.js} +3 -3
- package/dist/src/{rubyUtils-B6eljPuh.cjs → rubyUtils-DoifqkiA.cjs} +4 -3
- package/dist/src/{sagemaker-BveBvuxm.js → sagemaker-BDLeW29y.js} +12 -12
- package/dist/src/{sagemaker-D67yzMzs.js → sagemaker-C5T60MKf.js} +13 -13
- package/dist/src/{sagemaker-BVkaG2-l.js → sagemaker-ClS_NB07.js} +13 -13
- package/dist/src/{sagemaker-XnfhheQv.cjs → sagemaker-ljtY12VM.cjs} +12 -12
- package/dist/src/{scanner-1DqWi1Ej.js → scanner-nOCWNIXa.js} +7 -7
- package/dist/src/server/index.js +1067 -265
- package/dist/src/{server-Dx2TyCH2.cjs → server-BEECpeGG.cjs} +5 -5
- package/dist/src/{server-BNYztJkh.js → server-ByiF3qlg.js} +9 -8
- package/dist/src/{server-BSB45Nt9.js → server-ByxbqAcQ.js} +8 -7
- package/dist/src/{server-DaA2eR26.cjs → server-C0XKRNB_.cjs} +1 -1
- package/dist/src/server-C_15p79-.js +3 -0
- package/dist/src/{server-D6Il2Sob.js → server-gyd6d4Hc.js} +5 -5
- package/dist/src/{signal-CE5G3a7x.js → signal-DTtUuU3l.js} +3 -3
- package/dist/src/{slack-acRb0IqQ.js → slack-4zZX1OKP.js} +1 -1
- package/dist/src/{slack-1Rhq0EoV.cjs → slack-BLlsDpfG.cjs} +1 -1
- package/dist/src/{slack-D5Wpy8LM.js → slack-BPYLQLgb.js} +2 -2
- package/dist/src/{slack-DDUe-5MC.js → slack-Bamy_7te.js} +2 -2
- package/dist/src/{store-DAAyxcy6.cjs → store-2K0kDi80.cjs} +2 -2
- package/dist/src/{store-Dn9HUkdW.js → store-2OXm_eBY.js} +3 -3
- package/dist/src/store-BELqNwvz.js +3 -0
- package/dist/src/{store-M0b1WfYb.js → store-BPkzEyFM.js} +2 -2
- package/dist/src/{store-CYEy5J2D.js → store-CPh25336.js} +3 -3
- package/dist/src/store-uQZ4AjPe.cjs +2 -0
- package/dist/src/{tables-CsWou1Bx.js → tables-BMSOS2Gg.js} +3 -3
- package/dist/src/{tables-DUfh1F7Z.cjs → tables-CXbaZ9y1.cjs} +2 -2
- package/dist/src/{tables-C4CH3zRr.js → tables-NlvH23ky.js} +3 -3
- package/dist/src/{tables-DQ4WU5tX.js → tables-WgdUZ8Ck.js} +2 -2
- package/dist/src/{telemetry-dbaJ0E98.js → telemetry--iqaGyaS.js} +5 -4
- package/dist/src/{telemetry-Dsw_faFj.cjs → telemetry-CEQxGnMZ.cjs} +7 -6
- package/dist/src/{telemetry-Dvqxv3YC.js → telemetry-CgdVGV8N.js} +4 -3
- package/dist/src/{telemetry-CQPez_Jp.js → telemetry-DWdGHvEf.js} +5 -4
- package/dist/src/telemetry-DjNoC_n3.cjs +2 -0
- package/dist/src/telemetry-ZdPZc0fm.js +3 -0
- package/dist/src/{text-BVi-cLPJ.cjs → text-BiNME7QG.cjs} +1 -1
- package/dist/src/{text-KvuD2Iko.js → text-D4lz-Jg_.js} +1 -1
- package/dist/src/{text-DHxdyQqT.js → text-DDQP0tuQ.js} +1 -1
- package/dist/src/{text-CZr46tp_.js → text-NWvfMfkF.js} +1 -1
- package/dist/src/{tokenUsageUtils-CXrvO-wA.js → tokenUsageUtils-2wIvAhB3.js} +1 -1
- package/dist/src/{tokenUsageUtils-C-bmyHoE.js → tokenUsageUtils-4c780gFd.js} +1 -1
- package/dist/src/tokenUsageUtils-BjVkdk18.js +142 -0
- package/dist/src/{tokenUsageUtils-Bb7DkZPz.cjs → tokenUsageUtils-C9odhsbW.cjs} +1 -1
- package/dist/src/{transcription-DuWDupG7.js → transcription-84t4ALo2.js} +5 -5
- package/dist/src/{transcription-CJspiD2c.js → transcription-Bm2emLmJ.js} +6 -6
- package/dist/src/{transcription-BvjmiYB1.cjs → transcription-CZ4LG5hQ.cjs} +5 -5
- package/dist/src/{transcription-V2HaAmy2.js → transcription-D7Q0vJsh.js} +6 -6
- package/dist/src/{transform-zDhMmzwX.js → transform-B-b6Cq-q.js} +5 -5
- package/dist/src/transform-BQt0BeAW.js +3 -0
- package/dist/src/{transform-DgKlRr73.cjs → transform-Bq5oqC0s.cjs} +1 -1
- package/dist/src/{transform-CUnzlsbn.cjs → transform-C9izGX54.cjs} +4 -4
- package/dist/src/{transform-DYX1_Xnh.js → transform-CwbAZ84V.js} +5 -5
- package/dist/src/{transform-CTeuTR3S.cjs → transform-Dg4LcO1Y.cjs} +6 -6
- package/dist/src/{transform-CG0ehZNG.js → transform-DtooZqYY.js} +6 -6
- package/dist/src/{transform-UN5UGu8U.js → transform-DzCF-wqV.js} +5 -5
- package/dist/src/{transform-lQrDE1BQ.js → transform-_DpNB4qp.js} +5 -5
- package/dist/src/{transform-Bbg6A8Jk.js → transform-eGiUAv86.js} +4 -4
- package/dist/src/{transformersAvailability-Cju9mHgR.cjs → transformersAvailability-B22swDxr.cjs} +1 -1
- package/dist/src/{transformersAvailability-CcHusyhw.js → transformersAvailability-lvCCvuPT.js} +1 -1
- package/dist/src/{transformersAvailability-DLlROWhg.js → transformersAvailability-rJGPccjr.js} +1 -1
- package/dist/src/{types-Bgh5SOn6.js → types-BDjGOq4E.js} +4 -2
- package/dist/src/{types-Dm9JM6Vb.js → types-BVH9hjgW.js} +4 -2
- package/dist/src/{types-CeaeaZdP.cjs → types-CgG2rKiW.cjs} +151 -149
- package/dist/src/{types-BGQDAP8i.js → types-DNRZVOue.js} +152 -150
- package/dist/src/{util-C8e5uydV.js → util-3pBZZb_H.js} +142 -17
- package/dist/src/{util-CN3SrLT4.cjs → util-A5_ZsQUn.cjs} +65 -43
- package/dist/src/{util-D3q0WQ-0.js → util-B9CNhyac.js} +66 -44
- package/dist/src/{util-DxWpWjhc.js → util-BQOCAHQC.js} +700 -575
- package/dist/src/{util-BYvQUPp7.js → util-BVXcTwXu.js} +3 -3
- package/dist/src/{util-D9TisOyk.js → util-BlFVL0UF.js} +65 -43
- package/dist/src/{util-C9J8ahRn.js → util-C-kmRosx.js} +66 -44
- package/dist/src/{util-DvU2Pw8c.js → util-DFPeFkiV.js} +3 -3
- package/dist/src/{util-DDs-7g6-.js → util-DN0-b81k.js} +3 -3
- package/dist/src/{util-olYL5C6N.cjs → util-Dpmm_dAI.cjs} +3 -3
- package/dist/src/{util-oGMLA7vc.js → util-Dub0f_ej.js} +700 -575
- package/dist/src/{util-Bxn8emtE.cjs → util-DvpHnLt0.cjs} +718 -570
- package/dist/src/{utils-DJfvjyMj.js → utils-BUMN8orw.js} +3 -3
- package/dist/src/{utils-B05gLxER.cjs → utils-DkVeShIB.cjs} +2 -2
- package/dist/src/{utils-BLJKfv0y.js → utils-kt7lv30R.js} +3 -3
- package/dist/src/{utils-hXtCYanr.js → utils-o8S5huU2.js} +2 -2
- package/dist/src/version-0frU0UTr.js +16 -0
- package/dist/src/version-CbpiUINz.js +17 -0
- package/dist/src/version-CbuBKu2U.js +16 -0
- package/dist/src/version-D9zu9FWB.cjs +27 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +22 -20
- package/dist/src/app/assets/Report-CQYFezYu.js +0 -1
- package/dist/src/app/assets/index-BzJt18Jz.js +0 -385
- package/dist/src/cache-Cr9oLMUa.js +0 -3
- package/dist/src/cloud-Hphvo8kr.js +0 -3
- package/dist/src/codex-sdk-BAmYE7qy.js +0 -3
- package/dist/src/evalResult-D8MT9p0s.js +0 -3
- package/dist/src/evalResult-Dvc-iucu.cjs +0 -2
- package/dist/src/evaluator-CVessDWe.js +0 -3
- package/dist/src/fetch-C7bGKDlQ.js +0 -3
- package/dist/src/graders-BOAzQEUe.cjs +0 -2
- package/dist/src/graders-D4BTsZdG2.js +0 -3
- package/dist/src/graders-DOJK1XpV.js +0 -2
- package/dist/src/graders-NAv9LcBn.js +0 -2
- package/dist/src/rubyUtils-D1L2d3jb.js +0 -3
- package/dist/src/rubyUtils-DUbq4tff.cjs +0 -2
- package/dist/src/server-DCtHUqlp.js +0 -3
- package/dist/src/store-CWOSz6D_.cjs +0 -2
- package/dist/src/store-DCDBhv7B.js +0 -3
- package/dist/src/telemetry-C1IqxcdW.js +0 -3
- package/dist/src/telemetry-C4ZEa_es.cjs +0 -2
- package/dist/src/transform-M6ITAESf.js +0 -3
- /package/dist/src/{evalResult-DElBuddX.js → evalResult-spPqh1G_.js} +0 -0
package/dist/src/main.js
CHANGED
|
@@ -1,34 +1,35 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-
|
|
3
|
-
import { A as getDefaultPort, F as
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { a as
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { r as
|
|
10
|
-
import {
|
|
11
|
-
import { a as
|
|
12
|
-
import {
|
|
13
|
-
import { a as
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
16
|
-
import { r as runPython } from "./pythonUtils-
|
|
17
|
-
import { A as
|
|
18
|
-
import { i as
|
|
19
|
-
import { n as
|
|
20
|
-
import { a as
|
|
21
|
-
import { n as
|
|
22
|
-
import {
|
|
23
|
-
import { n as
|
|
24
|
-
import { n as
|
|
25
|
-
import {
|
|
26
|
-
import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-
|
|
27
|
-
import {
|
|
28
|
-
import {
|
|
29
|
-
import { t as
|
|
30
|
-
import { t as
|
|
31
|
-
import {
|
|
2
|
+
import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
|
|
3
|
+
import { A as getDefaultPort, F as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, T as CLOUD_PROVIDER_PREFIX, i as fetchWithTimeout, j as getDefaultShareViewBaseUrl, k as TERMINAL_MAX_WIDTH, l as sleep, m as REQUEST_TIMEOUT_MS, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, v as isPromptfooSampleTarget } from "./fetch-B6ch2nU2.js";
|
|
4
|
+
import { n as VERSION } from "./version-CbpiUINz.js";
|
|
5
|
+
import { t as invariant } from "./invariant-B2Rf6avk.js";
|
|
6
|
+
import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-CLJHCDDb.js";
|
|
7
|
+
import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-Da0bofJd.js";
|
|
8
|
+
import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-DWdGHvEf.js";
|
|
9
|
+
import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BVH9hjgW.js";
|
|
10
|
+
import { r as importModule, t as getDirectory } from "./esm-BX8fwlAO.js";
|
|
11
|
+
import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-eui5p5mL.js";
|
|
12
|
+
import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-ByiF3qlg.js";
|
|
13
|
+
import { A as validateStrategies, B as extractVariablesFromJson, H as getShortPluginId, L as extractGoalFromPrompt, M as mediaExists, N as retrieveMedia, O as Strategies, P as pluginMatchesStrategyTargets, Q as TokenUsageTracker, Z as redteamProviderManager, _t as getUserTeams, a as resolveProviderConfigs, at as getMaxCharsPerMessageModifierValue, bt as resolveTeamFromIdentifier, ct as canCreateTargets, d as MCPProvider, dt as getConfigFromCloud, f as HttpProvider, ft as getEvalConfigFromCloud, gt as getProviderFromCloud, i as resolveProvider, it as getGeneratedPromptOverLimit, j as getMediaStorage, k as loadStrategy, lt as checkCloudPermissions, m as createTransformResponse, mt as getPluginSeverityOverridesFromCloud, n as loadApiProvider, nt as PromptfooHarmfulCompletionProvider, p as createTransformRequest, pt as getOrgContext, q as renderPrompt, r as loadApiProviders, rt as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, t as getProviderIds, ut as getCloudDatabaseId, vt as isCloudProvider, xt as resolveTeamId, yt as makeRequest$1, z as extractPromptFromTags } from "./providers-DT-GtF2t.js";
|
|
14
|
+
import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-BI5BY7ey.js";
|
|
15
|
+
import { i as isJavascriptFile } from "./fileExtensions-DysCsxNG.js";
|
|
16
|
+
import { r as runPython } from "./pythonUtils-DNqbnRdx.js";
|
|
17
|
+
import { A as maybeLoadFromExternalFile, F as readFilters, I as readOutput, T as normalizeProviderRef, _ as doesProviderRefMatch, a as ComparisonEvalNotFoundError, c as mergeComparisonTables, d as deduplicateTestCases, f as extractRuntimeVars, g as checkProviderApiKeys, h as resultIsForTestCase, i as writeOutput, k as maybeLoadConfigFromExternalFile, l as fetchCsvFromGoogleSheet, m as getTestCaseDeduplicationKey, n as createOutputMetadata, o as evalTableToJson, p as filterRuntimeVars, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as setupEnv, v as getProviderDescription } from "./util-BQOCAHQC.js";
|
|
18
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-2wIvAhB3.js";
|
|
19
|
+
import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DjLby-uP.js";
|
|
20
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-NlvH23ky.js";
|
|
21
|
+
import { n as isBlobStorageEnabled } from "./extractor-DxyiFhPk.js";
|
|
22
|
+
import { n as escapeRegExp, t as ellipsize } from "./text-D4lz-Jg_.js";
|
|
23
|
+
import { n as getTraceStore } from "./store-CPh25336.js";
|
|
24
|
+
import { n as sha256, t as randomSequence } from "./createHash-DPpsZgFF.js";
|
|
25
|
+
import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, J as doRemoteGrading, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, R as getCustomPolicies, S as PlinyPlugin, St as getDefaultProviders, T as IntentPlugin, V as fetchHuggingFaceDataset, X as readPrompts, Y as processPrompts, Z as readProviderPromptMap, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-CgPn32yp.js";
|
|
26
|
+
import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-D-UIbbYq.js";
|
|
27
|
+
import { i as generateIdFromPrompt } from "./utils-kt7lv30R.js";
|
|
28
|
+
import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-DTtUuU3l.js";
|
|
29
|
+
import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-DJ_4A-tr.js";
|
|
30
|
+
import { t as EvalResult } from "./evalResult-D6P5I5il.js";
|
|
31
|
+
import { t as formatDuration } from "./formatDuration-Ch4A7G3o.js";
|
|
32
|
+
import { n as shouldUseInkUI } from "./interactiveCheck-CCICw2cy.js";
|
|
32
33
|
import { fileURLToPath } from "node:url";
|
|
33
34
|
import { Command, InvalidArgumentError } from "commander";
|
|
34
35
|
import { z } from "zod";
|
|
@@ -100,7 +101,7 @@ function runCommand(program) {
|
|
|
100
101
|
hasGithubPr: !!cmdObj.githubPr,
|
|
101
102
|
hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
|
|
102
103
|
});
|
|
103
|
-
const { executeScan } = await import("./scanner-
|
|
104
|
+
const { executeScan } = await import("./scanner-nOCWNIXa.js");
|
|
104
105
|
await executeScan(repoPath, cmdObj);
|
|
105
106
|
});
|
|
106
107
|
}
|
|
@@ -572,47 +573,11 @@ function filterPrompts(prompts, filterPromptsOption) {
|
|
|
572
573
|
//#endregion
|
|
573
574
|
//#region src/commands/eval/filterProviders.ts
|
|
574
575
|
/**
|
|
575
|
-
* Checks if a value is a valid provider ID (non-empty string).
|
|
576
|
-
*/
|
|
577
|
-
function isValidProviderId(id) {
|
|
578
|
-
return id !== null && id !== void 0 && typeof id === "string" && id !== "";
|
|
579
|
-
}
|
|
580
|
-
/**
|
|
581
576
|
* Extracts the id and label from a raw provider config without instantiating it.
|
|
582
577
|
* Handles all provider config formats: string, function, ProviderOptions, ProviderOptionsMap.
|
|
583
578
|
*/
|
|
584
579
|
function getProviderIdAndLabel(provider, index) {
|
|
585
|
-
|
|
586
|
-
if (typeof provider === "function") {
|
|
587
|
-
const label = provider.label;
|
|
588
|
-
return {
|
|
589
|
-
id: label ?? `custom-function-${index}`,
|
|
590
|
-
label
|
|
591
|
-
};
|
|
592
|
-
}
|
|
593
|
-
const providerId = provider.id;
|
|
594
|
-
if ("id" in provider && isValidProviderId(providerId)) return {
|
|
595
|
-
id: providerId,
|
|
596
|
-
label: provider.label
|
|
597
|
-
};
|
|
598
|
-
const keys = Object.keys(provider);
|
|
599
|
-
if (keys.length > 0) {
|
|
600
|
-
const id = keys[0];
|
|
601
|
-
const value = provider[id];
|
|
602
|
-
if (typeof value === "object" && value !== null) return {
|
|
603
|
-
id: value.id || id,
|
|
604
|
-
label: value.label
|
|
605
|
-
};
|
|
606
|
-
}
|
|
607
|
-
const label = provider.label;
|
|
608
|
-
if (isValidProviderId(label)) return {
|
|
609
|
-
id: label,
|
|
610
|
-
label
|
|
611
|
-
};
|
|
612
|
-
return {
|
|
613
|
-
id: `unknown-${index}`,
|
|
614
|
-
label
|
|
615
|
-
};
|
|
580
|
+
return normalizeProviderRef(provider, { index });
|
|
616
581
|
}
|
|
617
582
|
/**
|
|
618
583
|
* Filters raw provider configs BEFORE instantiation.
|
|
@@ -3208,7 +3173,7 @@ function generateTable(evaluateTable, tableCellMaxLength = 250, maxRows = 25) {
|
|
|
3208
3173
|
for (const row of evaluateTable.body.slice(0, maxRows)) table.push([...row.vars.map((v) => ellipsize(v, tableCellMaxLength)), ...row.outputs.map(({ pass, text, failureReason: failureType }) => {
|
|
3209
3174
|
text = ellipsize(text, tableCellMaxLength);
|
|
3210
3175
|
if (pass) return chalk.green("[PASS] ") + text;
|
|
3211
|
-
|
|
3176
|
+
return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
|
|
3212
3177
|
})]);
|
|
3213
3178
|
return table.toString();
|
|
3214
3179
|
}
|
|
@@ -3293,6 +3258,115 @@ function shouldShareResults(opts) {
|
|
|
3293
3258
|
}
|
|
3294
3259
|
//#endregion
|
|
3295
3260
|
//#region src/commands/eval/summary.ts
|
|
3261
|
+
function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
|
|
3262
|
+
if (wasAborted) {
|
|
3263
|
+
const idSuffix = writeToDatabase ? ` (ID: ${chalk.cyan(evalId)})` : "";
|
|
3264
|
+
return `${chalk.red("✗")} ${completionType} aborted${idSuffix}`;
|
|
3265
|
+
}
|
|
3266
|
+
if (writeToDatabase && shareableUrl) return `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
|
|
3267
|
+
if (writeToDatabase && activelySharing) return `${chalk.green("✓")} ${completionType} complete`;
|
|
3268
|
+
if (writeToDatabase) return `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
|
|
3269
|
+
return `${chalk.green("✓")} ${completionType} complete`;
|
|
3270
|
+
}
|
|
3271
|
+
function getAbortSummaryLines(targetErrorStatus) {
|
|
3272
|
+
if (targetErrorStatus == null) return [];
|
|
3273
|
+
return [
|
|
3274
|
+
"",
|
|
3275
|
+
chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."),
|
|
3276
|
+
chalk.red(` Target returned HTTP ${targetErrorStatus}`),
|
|
3277
|
+
"",
|
|
3278
|
+
chalk.yellow("Possible causes:"),
|
|
3279
|
+
chalk.yellow(" • Invalid API key or authentication (401/403)"),
|
|
3280
|
+
chalk.yellow(" • Target endpoint does not exist (404)"),
|
|
3281
|
+
chalk.yellow(" • Server does not support the request (501)"),
|
|
3282
|
+
"",
|
|
3283
|
+
chalk.cyan("To fix: Check your target configuration and credentials.")
|
|
3284
|
+
];
|
|
3285
|
+
}
|
|
3286
|
+
function getGuidanceLines({ writeToDatabase, shareableUrl, wantsToShare, activelySharing, hasExplicitDisable, cloudEnabled }) {
|
|
3287
|
+
if (!writeToDatabase || shareableUrl || wantsToShare || activelySharing) return [];
|
|
3288
|
+
const lines = ["", `» View results: ${chalk.green.bold("promptfoo view")}`];
|
|
3289
|
+
if (!hasExplicitDisable) lines.push(cloudEnabled ? `» Create shareable URL: ${chalk.green.bold("promptfoo share")}` : `» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
|
|
3290
|
+
lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
|
|
3291
|
+
return lines;
|
|
3292
|
+
}
|
|
3293
|
+
function buildUsageDetails(usage, total) {
|
|
3294
|
+
const parts = [];
|
|
3295
|
+
if (usage.prompt && usage.prompt > 0) parts.push(`${usage.prompt.toLocaleString()} prompt`);
|
|
3296
|
+
if (usage.completion && usage.completion > 0) parts.push(`${usage.completion.toLocaleString()} completion`);
|
|
3297
|
+
if (usage.cached && usage.cached > 0) parts.push(usage.cached === total && parts.length === 0 ? "cached" : `${usage.cached.toLocaleString()} cached`);
|
|
3298
|
+
if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) parts.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3299
|
+
return parts;
|
|
3300
|
+
}
|
|
3301
|
+
function getTokenUsageLines(tokenUsage, isRedteam, tracker) {
|
|
3302
|
+
const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
|
|
3303
|
+
const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
|
|
3304
|
+
if (!hasEvalTokens && !hasGradingTokens) return [];
|
|
3305
|
+
const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
|
|
3306
|
+
const evalTokens = {
|
|
3307
|
+
prompt: tokenUsage.prompt || 0,
|
|
3308
|
+
completion: tokenUsage.completion || 0,
|
|
3309
|
+
total: tokenUsage.total || combinedTotal,
|
|
3310
|
+
cached: tokenUsage.cached || 0,
|
|
3311
|
+
numRequests: tokenUsage.numRequests || 0,
|
|
3312
|
+
completionDetails: tokenUsage.completionDetails || {
|
|
3313
|
+
reasoning: 0,
|
|
3314
|
+
acceptedPrediction: 0,
|
|
3315
|
+
rejectedPrediction: 0
|
|
3316
|
+
}
|
|
3317
|
+
};
|
|
3318
|
+
const lines = [`${chalk.bold("Total Tokens:")} ${chalk.white.bold((evalTokens.total + (tokenUsage.assertions?.total || 0)).toLocaleString())}`];
|
|
3319
|
+
if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
|
|
3320
|
+
if (evalTokens.total > 0) {
|
|
3321
|
+
const evalParts = buildUsageDetails(evalTokens, evalTokens.total);
|
|
3322
|
+
lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
|
|
3323
|
+
}
|
|
3324
|
+
if (tokenUsage.assertions?.total && tokenUsage.assertions.total > 0) {
|
|
3325
|
+
const gradingParts = buildUsageDetails(tokenUsage.assertions, tokenUsage.assertions.total);
|
|
3326
|
+
lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
|
|
3327
|
+
}
|
|
3328
|
+
lines.push(...getProviderUsageLines(tracker));
|
|
3329
|
+
return lines;
|
|
3330
|
+
}
|
|
3331
|
+
function getProviderUsageLines(tracker) {
|
|
3332
|
+
const providerIds = tracker.getProviderIds();
|
|
3333
|
+
if (providerIds.length <= 1) return [];
|
|
3334
|
+
const sortedProviders = providerIds.map((id) => ({
|
|
3335
|
+
id,
|
|
3336
|
+
usage: tracker.getProviderUsage(id)
|
|
3337
|
+
})).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
|
|
3338
|
+
const lines = ["", chalk.bold("Providers:")];
|
|
3339
|
+
for (const { id, usage } of sortedProviders) {
|
|
3340
|
+
if ((usage.total || 0) === 0 && (usage.prompt || 0) + (usage.completion || 0) === 0) continue;
|
|
3341
|
+
const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
|
|
3342
|
+
const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
|
|
3343
|
+
const details = buildUsageDetails(usage, displayTotal);
|
|
3344
|
+
const requestInfo = `${usage.numRequests || 0} requests`;
|
|
3345
|
+
const separator = details.length > 0 ? "; " : "";
|
|
3346
|
+
lines.push(` ${chalk.gray(`${displayId}:`)} ${chalk.white(displayTotal.toLocaleString())} (${requestInfo}${separator}${details.join(", ")})`);
|
|
3347
|
+
}
|
|
3348
|
+
return lines;
|
|
3349
|
+
}
|
|
3350
|
+
function formatResultPercentage(count, totalTests) {
|
|
3351
|
+
const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
|
|
3352
|
+
return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
|
|
3353
|
+
}
|
|
3354
|
+
function formatResultLine(count, label, icon, iconColor, totalTests) {
|
|
3355
|
+
return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count, totalTests)})`)}`;
|
|
3356
|
+
}
|
|
3357
|
+
function getResultsLines({ successes, failures, errors, duration, maxConcurrency }) {
|
|
3358
|
+
const totalTests = successes + failures + errors;
|
|
3359
|
+
const errorLabel = errors === 1 ? "error" : "errors";
|
|
3360
|
+
return [
|
|
3361
|
+
"",
|
|
3362
|
+
chalk.bold("Results:"),
|
|
3363
|
+
formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green, totalTests),
|
|
3364
|
+
formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red, totalTests),
|
|
3365
|
+
formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red, totalTests),
|
|
3366
|
+
chalk.gray(`Duration: ${formatDuration(duration)} (concurrency: ${maxConcurrency})`),
|
|
3367
|
+
""
|
|
3368
|
+
];
|
|
3369
|
+
}
|
|
3296
3370
|
/**
|
|
3297
3371
|
* Generate formatted evaluation summary output for CLI display.
|
|
3298
3372
|
*
|
|
@@ -3331,115 +3405,28 @@ function shouldShareResults(opts) {
|
|
|
3331
3405
|
* ```
|
|
3332
3406
|
*/
|
|
3333
3407
|
function generateEvalSummary(params) {
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3341
|
-
|
|
3342
|
-
|
|
3343
|
-
|
|
3344
|
-
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
3353
|
-
|
|
3354
|
-
|
|
3355
|
-
|
|
3356
|
-
lines.push("");
|
|
3357
|
-
lines.push(chalk.cyan("To fix: Check your target configuration and credentials."));
|
|
3358
|
-
}
|
|
3359
|
-
if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
|
|
3360
|
-
lines.push("");
|
|
3361
|
-
lines.push(`» View results: ${chalk.green.bold("promptfoo view")}`);
|
|
3362
|
-
if (!hasExplicitDisable) if (cloudEnabled) lines.push(`» Create shareable URL: ${chalk.green.bold("promptfoo share")}`);
|
|
3363
|
-
else lines.push(`» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
|
|
3364
|
-
lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
|
|
3365
|
-
}
|
|
3366
|
-
lines.push("");
|
|
3367
|
-
const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
|
|
3368
|
-
const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
|
|
3369
|
-
if (hasEvalTokens || hasGradingTokens) {
|
|
3370
|
-
const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
|
|
3371
|
-
const evalTokens = {
|
|
3372
|
-
prompt: tokenUsage.prompt || 0,
|
|
3373
|
-
completion: tokenUsage.completion || 0,
|
|
3374
|
-
total: tokenUsage.total || combinedTotal,
|
|
3375
|
-
cached: tokenUsage.cached || 0,
|
|
3376
|
-
completionDetails: tokenUsage.completionDetails || {
|
|
3377
|
-
reasoning: 0,
|
|
3378
|
-
acceptedPrediction: 0,
|
|
3379
|
-
rejectedPrediction: 0
|
|
3380
|
-
}
|
|
3381
|
-
};
|
|
3382
|
-
const grandTotal = evalTokens.total + (tokenUsage.assertions?.total || 0);
|
|
3383
|
-
lines.push(`${chalk.bold("Total Tokens:")} ${chalk.white.bold(grandTotal.toLocaleString())}`);
|
|
3384
|
-
if (isRedteam && tokenUsage.numRequests) lines.push(` ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
|
|
3385
|
-
if (evalTokens.total > 0) {
|
|
3386
|
-
const evalParts = [];
|
|
3387
|
-
if (evalTokens.prompt > 0) evalParts.push(`${evalTokens.prompt.toLocaleString()} prompt`);
|
|
3388
|
-
if (evalTokens.completion > 0) evalParts.push(`${evalTokens.completion.toLocaleString()} completion`);
|
|
3389
|
-
if (evalTokens.cached > 0) if (evalTokens.cached === evalTokens.total && evalParts.length === 0) evalParts.push("cached");
|
|
3390
|
-
else evalParts.push(`${evalTokens.cached.toLocaleString()} cached`);
|
|
3391
|
-
if (evalTokens.completionDetails?.reasoning && evalTokens.completionDetails.reasoning > 0) evalParts.push(`${evalTokens.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3392
|
-
lines.push(` ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
|
|
3393
|
-
}
|
|
3394
|
-
if (tokenUsage.assertions && tokenUsage.assertions.total && tokenUsage.assertions.total > 0) {
|
|
3395
|
-
const gradingParts = [];
|
|
3396
|
-
if (tokenUsage.assertions.prompt && tokenUsage.assertions.prompt > 0) gradingParts.push(`${tokenUsage.assertions.prompt.toLocaleString()} prompt`);
|
|
3397
|
-
if (tokenUsage.assertions.completion && tokenUsage.assertions.completion > 0) gradingParts.push(`${tokenUsage.assertions.completion.toLocaleString()} completion`);
|
|
3398
|
-
if (tokenUsage.assertions.cached && tokenUsage.assertions.cached > 0) if (tokenUsage.assertions.cached === tokenUsage.assertions.total && gradingParts.length === 0) gradingParts.push("cached");
|
|
3399
|
-
else gradingParts.push(`${tokenUsage.assertions.cached.toLocaleString()} cached`);
|
|
3400
|
-
if (tokenUsage.assertions.completionDetails?.reasoning && tokenUsage.assertions.completionDetails.reasoning > 0) gradingParts.push(`${tokenUsage.assertions.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3401
|
-
lines.push(` ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
|
|
3402
|
-
}
|
|
3403
|
-
const providerIds = tracker.getProviderIds();
|
|
3404
|
-
if (providerIds.length > 1) {
|
|
3405
|
-
lines.push("");
|
|
3406
|
-
lines.push(chalk.bold("Providers:"));
|
|
3407
|
-
const sortedProviders = providerIds.map((id) => ({
|
|
3408
|
-
id,
|
|
3409
|
-
usage: tracker.getProviderUsage(id)
|
|
3410
|
-
})).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
|
|
3411
|
-
for (const { id, usage } of sortedProviders) if ((usage.total || 0) > 0 || (usage.prompt || 0) + (usage.completion || 0) > 0) {
|
|
3412
|
-
const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
|
|
3413
|
-
const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
|
|
3414
|
-
const details = [];
|
|
3415
|
-
if (usage.prompt && usage.prompt > 0) details.push(`${usage.prompt.toLocaleString()} prompt`);
|
|
3416
|
-
if (usage.completion && usage.completion > 0) details.push(`${usage.completion.toLocaleString()} completion`);
|
|
3417
|
-
if (usage.cached && usage.cached > 0) if (usage.cached === displayTotal && details.length === 0) details.push("cached");
|
|
3418
|
-
else details.push(`${usage.cached.toLocaleString()} cached`);
|
|
3419
|
-
if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) details.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
|
|
3420
|
-
const breakdown = ` (${`${usage.numRequests || 0} requests`}${details.length > 0 ? "; " : ""}${details.join(", ")})`;
|
|
3421
|
-
lines.push(` ${chalk.gray(displayId + ":")} ${chalk.white(displayTotal.toLocaleString())}${breakdown}`);
|
|
3422
|
-
}
|
|
3423
|
-
}
|
|
3424
|
-
}
|
|
3425
|
-
lines.push("");
|
|
3426
|
-
const totalTests = successes + failures + errors;
|
|
3427
|
-
const formatResultPercentage = (count) => {
|
|
3428
|
-
const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
|
|
3429
|
-
return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
|
|
3430
|
-
};
|
|
3431
|
-
const formatResultLine = (count, label, icon, iconColor) => {
|
|
3432
|
-
return ` ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count)})`)}`;
|
|
3433
|
-
};
|
|
3434
|
-
const errorLabel = errors === 1 ? "error" : "errors";
|
|
3435
|
-
lines.push(chalk.bold("Results:"));
|
|
3436
|
-
lines.push(formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green));
|
|
3437
|
-
lines.push(formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red));
|
|
3438
|
-
lines.push(formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red));
|
|
3439
|
-
const durationDisplay = formatDuration(duration);
|
|
3440
|
-
lines.push(chalk.gray(`Duration: ${durationDisplay} (concurrency: ${maxConcurrency})`));
|
|
3441
|
-
lines.push("");
|
|
3442
|
-
return lines;
|
|
3408
|
+
return [
|
|
3409
|
+
getCompletionMessage({
|
|
3410
|
+
completionType: params.isRedteam ? "Red team" : "Eval",
|
|
3411
|
+
evalId: params.evalId,
|
|
3412
|
+
shareableUrl: params.shareableUrl,
|
|
3413
|
+
wasAborted: params.targetErrorStatus != null,
|
|
3414
|
+
writeToDatabase: params.writeToDatabase,
|
|
3415
|
+
activelySharing: params.activelySharing ?? false
|
|
3416
|
+
}),
|
|
3417
|
+
...getAbortSummaryLines(params.targetErrorStatus),
|
|
3418
|
+
...getGuidanceLines({
|
|
3419
|
+
writeToDatabase: params.writeToDatabase,
|
|
3420
|
+
shareableUrl: params.shareableUrl,
|
|
3421
|
+
wantsToShare: params.wantsToShare,
|
|
3422
|
+
activelySharing: params.activelySharing ?? false,
|
|
3423
|
+
hasExplicitDisable: params.hasExplicitDisable,
|
|
3424
|
+
cloudEnabled: params.cloudEnabled
|
|
3425
|
+
}),
|
|
3426
|
+
"",
|
|
3427
|
+
...getTokenUsageLines(params.tokenUsage, params.isRedteam, params.tracker),
|
|
3428
|
+
...getResultsLines(params)
|
|
3429
|
+
];
|
|
3443
3430
|
}
|
|
3444
3431
|
//#endregion
|
|
3445
3432
|
//#region src/commands/retry.ts
|
|
@@ -6517,9 +6504,10 @@ function dedupeTestCases(testCases) {
|
|
|
6517
6504
|
return deduped;
|
|
6518
6505
|
}
|
|
6519
6506
|
function buildMaxCharsRetryInstructions(rejectedPromptLengths, limit) {
|
|
6507
|
+
const longestRejectedPromptText = rejectedPromptLengths.length > 0 ? `${Math.max(...rejectedPromptLengths)} characters` : "unknown length";
|
|
6520
6508
|
return dedent`
|
|
6521
6509
|
Your previous response included ${rejectedPromptLengths.length} generated prompt${rejectedPromptLengths.length === 1 ? "" : "s"} that exceeded the ${limit ?? "configured"}-character limit.
|
|
6522
|
-
The longest rejected prompt was ${
|
|
6510
|
+
The longest rejected prompt was ${longestRejectedPromptText}.
|
|
6523
6511
|
Generate replacement prompts only, and keep every user message within the character limit.
|
|
6524
6512
|
`.trim();
|
|
6525
6513
|
}
|
|
@@ -8359,6 +8347,26 @@ async function doRedteamRun(options) {
|
|
|
8359
8347
|
}
|
|
8360
8348
|
//#endregion
|
|
8361
8349
|
//#region src/index.ts
|
|
8350
|
+
/**
|
|
8351
|
+
* Shallow-clone a test case so the caller can swap in resolved ApiProvider
|
|
8352
|
+
* instances on `options.provider` / `assert[].provider` without leaking those
|
|
8353
|
+
* mutations back to the input. The input may alias the unified config written
|
|
8354
|
+
* to the Eval record, and a live SDK client (e.g. Bedrock's BedrockRuntime,
|
|
8355
|
+
* Anthropic's client) holds circular references that break drizzle's JSON
|
|
8356
|
+
* serialization on `evalRecord.save()`. Fixes #8687.
|
|
8357
|
+
*
|
|
8358
|
+
* Detaches only `options` and `assert[]`. Other reference fields (`provider`,
|
|
8359
|
+
* `vars`, `metadata`, `providerOutput`) remain aliased — callers must reassign
|
|
8360
|
+
* those by reference rather than mutating in place. `assert-set` children are
|
|
8361
|
+
* not deep-cloned because the resolve loop skips `assert-set`; if that ever
|
|
8362
|
+
* changes, extend this helper.
|
|
8363
|
+
*/
|
|
8364
|
+
function cloneTestForResolve(test) {
|
|
8365
|
+
const cloned = { ...test };
|
|
8366
|
+
if (test.options) cloned.options = { ...test.options };
|
|
8367
|
+
if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
|
|
8368
|
+
return cloned;
|
|
8369
|
+
}
|
|
8362
8370
|
async function evaluate(testSuite, options = {}) {
|
|
8363
8371
|
if (testSuite.writeLatestResults) await runDbMigrations();
|
|
8364
8372
|
const loadedProviders = await loadApiProviders(testSuite.providers, { env: testSuite.env });
|
|
@@ -8378,22 +8386,24 @@ async function evaluate(testSuite, options = {}) {
|
|
|
8378
8386
|
nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
|
|
8379
8387
|
prompts: await processPrompts(testSuite.prompts)
|
|
8380
8388
|
};
|
|
8381
|
-
if (typeof constructedTestSuite.defaultTest === "object") {
|
|
8382
|
-
|
|
8389
|
+
if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
|
|
8390
|
+
constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
|
|
8391
|
+
if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
|
|
8383
8392
|
env: testSuite.env,
|
|
8384
8393
|
basePath: state.basePath
|
|
8385
8394
|
});
|
|
8386
|
-
if (constructedTestSuite.defaultTest
|
|
8395
|
+
if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
|
|
8387
8396
|
env: testSuite.env,
|
|
8388
8397
|
basePath: state.basePath
|
|
8389
8398
|
});
|
|
8390
8399
|
}
|
|
8391
|
-
|
|
8400
|
+
constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
|
|
8401
|
+
for (const test of constructedTestSuite.tests) {
|
|
8392
8402
|
if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
|
|
8393
8403
|
env: testSuite.env,
|
|
8394
8404
|
basePath: state.basePath
|
|
8395
8405
|
});
|
|
8396
|
-
|
|
8406
|
+
for (const assertion of test.assert || []) {
|
|
8397
8407
|
if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
|
|
8398
8408
|
if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
|
|
8399
8409
|
env: testSuite.env,
|
|
@@ -9353,7 +9363,7 @@ async function checkForUpdates() {
|
|
|
9353
9363
|
} catch {
|
|
9354
9364
|
return false;
|
|
9355
9365
|
}
|
|
9356
|
-
if (semverGt(latestVersion, "0.121.
|
|
9366
|
+
if (semverGt(latestVersion, "0.121.5")) {
|
|
9357
9367
|
const border = "=".repeat(TERMINAL_MAX_WIDTH);
|
|
9358
9368
|
logger.info(`\n${border}
|
|
9359
9369
|
${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
|
|
@@ -10701,6 +10711,38 @@ function normalizeTargetPurposeDiscoveryResult(result) {
|
|
|
10701
10711
|
tools: cleanTools(result.tools)
|
|
10702
10712
|
};
|
|
10703
10713
|
}
|
|
10714
|
+
function extractStringField(value) {
|
|
10715
|
+
if (typeof value !== "string") return;
|
|
10716
|
+
return value.trim() || void 0;
|
|
10717
|
+
}
|
|
10718
|
+
async function getRemoteResponseErrorDetail(response) {
|
|
10719
|
+
const rawText = (await response.text()).trim();
|
|
10720
|
+
const fallback = rawText || response.statusText || "Unknown error";
|
|
10721
|
+
if (!rawText) return fallback;
|
|
10722
|
+
try {
|
|
10723
|
+
const parsed = JSON.parse(rawText);
|
|
10724
|
+
return extractStringField(parsed?.message) ?? extractStringField(parsed?.error) ?? fallback;
|
|
10725
|
+
} catch {
|
|
10726
|
+
return fallback;
|
|
10727
|
+
}
|
|
10728
|
+
}
|
|
10729
|
+
const REMOTE_ERROR_HINTS = {
|
|
10730
|
+
400: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
|
|
10731
|
+
401: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
|
|
10732
|
+
403: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
|
|
10733
|
+
404: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
|
|
10734
|
+
429: "You are being rate limited. Wait a moment and try again."
|
|
10735
|
+
};
|
|
10736
|
+
function getRemoteErrorHint(status) {
|
|
10737
|
+
if (REMOTE_ERROR_HINTS[status]) return REMOTE_ERROR_HINTS[status];
|
|
10738
|
+
if (status >= 500) return "The remote generation service may be temporarily unavailable. Retry in a few minutes or contact support if the issue persists.";
|
|
10739
|
+
}
|
|
10740
|
+
async function buildRemoteErrorFromResponse(response) {
|
|
10741
|
+
const detail = await getRemoteResponseErrorDetail(response);
|
|
10742
|
+
const hint = getRemoteErrorHint(response.status);
|
|
10743
|
+
const base = `Remote server returned HTTP ${response.status}: ${detail}`;
|
|
10744
|
+
return new Error(hint ? `${base}\n${hint}` : base);
|
|
10745
|
+
}
|
|
10704
10746
|
/**
|
|
10705
10747
|
* Queries Cloud for the purpose-discovery logic, sends each logic to the target,
|
|
10706
10748
|
* and summarizes the results.
|
|
@@ -10750,11 +10792,7 @@ async function doTargetPurposeDiscovery(target, prompt, showProgress = true) {
|
|
|
10750
10792
|
email: getUserEmail()
|
|
10751
10793
|
}))
|
|
10752
10794
|
});
|
|
10753
|
-
if (!response.ok)
|
|
10754
|
-
const error = await response.text();
|
|
10755
|
-
logger.error(`${LOG_PREFIX} Error getting the next question from remote server: ${error}`);
|
|
10756
|
-
continue;
|
|
10757
|
-
}
|
|
10795
|
+
if (!response.ok) throw await buildRemoteErrorFromResponse(response);
|
|
10758
10796
|
const responseData = await response.json();
|
|
10759
10797
|
const data = TargetPurposeDiscoveryTaskResponseSchema.parse(responseData);
|
|
10760
10798
|
logger.debug(`${LOG_PREFIX} Received response from remote server: ${JSON.stringify(data, null, 2)}`);
|
|
@@ -12743,7 +12781,7 @@ router.get("/", async (_req, res) => {
|
|
|
12743
12781
|
};
|
|
12744
12782
|
} catch (error) {
|
|
12745
12783
|
logger.debug(`Failed to fetch latest version: ${error}`);
|
|
12746
|
-
latestVersion = versionCache.latestVersion ?? "0.121.
|
|
12784
|
+
latestVersion = versionCache.latestVersion ?? "0.121.5";
|
|
12747
12785
|
}
|
|
12748
12786
|
}
|
|
12749
12787
|
const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
|
|
@@ -12752,7 +12790,7 @@ router.get("/", async (_req, res) => {
|
|
|
12752
12790
|
selfHosted,
|
|
12753
12791
|
isNpx
|
|
12754
12792
|
});
|
|
12755
|
-
const resolvedLatestVersion = latestVersion ?? "0.121.
|
|
12793
|
+
const resolvedLatestVersion = latestVersion ?? "0.121.5";
|
|
12756
12794
|
const response = {
|
|
12757
12795
|
currentVersion: VERSION,
|
|
12758
12796
|
latestVersion: resolvedLatestVersion,
|
|
@@ -15174,7 +15212,11 @@ const EXAMPLE_ALIASES = {
|
|
|
15174
15212
|
"google-adk-example": "integration-google-adk",
|
|
15175
15213
|
"google-sheets": "integration-google-sheets",
|
|
15176
15214
|
"gpt-4o-temperature-comparison": "compare-gpt-temperature",
|
|
15177
|
-
"gpt-4o-vs-4o-mini": "compare-gpt-
|
|
15215
|
+
"compare-gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
|
|
15216
|
+
"compare-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
|
|
15217
|
+
"compare-gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
|
|
15218
|
+
"gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
|
|
15219
|
+
"gpt-model-tiers": "compare-gpt-model-tiers",
|
|
15178
15220
|
"gpt-vs-claude-vs-gemini": "compare-gpt-vs-claude-vs-gemini",
|
|
15179
15221
|
"grok-4-political-bias": "redteam-grok-4-political-bias",
|
|
15180
15222
|
groq: "provider-groq",
|
|
@@ -15225,7 +15267,9 @@ const EXAMPLE_ALIASES = {
|
|
|
15225
15267
|
"node-package-typescript": "config-node-package-typescript",
|
|
15226
15268
|
nscale: "provider-nscale",
|
|
15227
15269
|
"nunjucks-custom-filters": "config-nunjucks-custom-filters",
|
|
15228
|
-
"
|
|
15270
|
+
"gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
|
|
15271
|
+
"gpt-model-tiers-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
|
|
15272
|
+
"openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
|
|
15229
15273
|
"openai-model-comparison": "compare-openai-models",
|
|
15230
15274
|
openclaw: "provider-openclaw",
|
|
15231
15275
|
"opencode-sdk": "provider-opencode-sdk/basic",
|
|
@@ -15597,8 +15641,8 @@ function shouldUseInkList() {
|
|
|
15597
15641
|
async function runInkList(options) {
|
|
15598
15642
|
const [React, { renderInteractive }, { ListApp }] = await Promise.all([
|
|
15599
15643
|
import("react"),
|
|
15600
|
-
import("./render-
|
|
15601
|
-
import("./ListApp-
|
|
15644
|
+
import("./render-nj-UaPdn.js"),
|
|
15645
|
+
import("./ListApp-BRUsT43Y.js")
|
|
15602
15646
|
]);
|
|
15603
15647
|
let result = { cancelled: false };
|
|
15604
15648
|
let resolveResult;
|
|
@@ -17906,8 +17950,8 @@ function registerRunEvaluationTool(server) {
|
|
|
17906
17950
|
}
|
|
17907
17951
|
filteredTestSuite.tests = filteredTests;
|
|
17908
17952
|
}
|
|
17909
|
-
const { evaluate } = await import("./evaluator-
|
|
17910
|
-
const evalRecord = await (await import("./eval-
|
|
17953
|
+
const { evaluate } = await import("./evaluator-DgLKaZk8.js");
|
|
17954
|
+
const evalRecord = await (await import("./eval-BQPLBJbw.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
|
|
17911
17955
|
logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
|
|
17912
17956
|
const startTime = Date.now();
|
|
17913
17957
|
const result = await evaluate(filteredTestSuite, evalRecord, {
|