promptfoo 0.120.24 → 0.120.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -22
- package/dist/src/{ListApp-55OAHww3.js → ListApp-D3DG0F8h.js} +1 -1
- package/dist/src/{accounts-BwVGYllP.cjs → accounts-BzEY8H3v.cjs} +5 -5
- package/dist/src/{accounts-BOlMHrvn.js → accounts-DHHiXsy6.js} +6 -6
- package/dist/src/{accounts-DxR9x_En.js → accounts-R3gfCR_g.js} +5 -5
- package/dist/src/{agentic-utils-t9-1raty.js → agentic-utils-D6_gzOUF.js} +3 -3
- package/dist/src/{agents-DMpQ860j.cjs → agents-C7BiinFI.cjs} +7 -7
- package/dist/src/{agents-9Z7pO8yx.js → agents-Cao4i7AX.js} +8 -8
- package/dist/src/{agents-DjExVR3v.js → agents-Cnph5GLD.js} +2 -2
- package/dist/src/{agents-CI0Ad69d.cjs → agents-CwM7re15.cjs} +5 -5
- package/dist/src/{agents-BwI-Tm5m.js → agents-DETIQHqF.js} +7 -7
- package/dist/src/{agents-CRAmwjo-.js → agents-DYIT-hQy.js} +6 -6
- package/dist/src/{agents-BbIpDhgp.js → agents-GiUianme.js} +5 -5
- package/dist/src/{agents-yL5DzIKY.js → agents-v4cW_ZgC.js} +2 -2
- package/dist/src/{aimlapi-xHHqwNg8.js → aimlapi-CMJpKK-B.js} +13 -13
- package/dist/src/{aimlapi-BM9P45Bp.js → aimlapi-DMF6v_vb.js} +15 -15
- package/dist/src/{aimlapi-DKo7IxJg.cjs → aimlapi-DoGLcQW_.cjs} +14 -14
- package/dist/src/{aimlapi-ivzDkqbs.js → aimlapi-DtSf1ykJ.js} +2 -2
- package/dist/src/app/assets/index-BOgkICuY.css +1 -0
- package/dist/src/app/assets/index-CSgqn_Vd.js +428 -0
- package/dist/src/app/assets/{vendor-charts-DnwydkkY.js → vendor-charts-CYyo8R8v.js} +1 -1
- package/dist/src/app/assets/{vendor-markdown-DjKAU15k.js → vendor-markdown-DSmzq4Jh.js} +1 -1
- package/dist/src/app/assets/vendor-react-AtKqiNEf.js +4 -0
- package/dist/src/app/assets/{vendor-syntax-Dc86c4Bu.js → vendor-syntax-D06x6TQF.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-B3QGL6Nl.cjs → audio-BWjyvHn9.cjs} +4 -4
- package/dist/src/{audio-C-1oBiVZ.js → audio-BrJBFN2b.js} +4 -4
- package/dist/src/{audio-DnfDVjhL.js → audio-DUH4q0Xq.js} +5 -5
- package/dist/src/{audio-DQWHfAr8.js → audio-U580w8jM.js} +2 -2
- package/dist/src/{base-DHam07ip.js → base-BaXmtXYp.js} +3 -3
- package/dist/src/{base-BjsgxJp9.js → base-Dtp8b4_N.js} +3 -3
- package/dist/src/{base-Bso8ccGu.cjs → base-f71xxWai.cjs} +3 -3
- package/dist/src/{blobs-DOdc-E8L.cjs → blobs-673H0jCl.cjs} +3 -3
- package/dist/src/{blobs-Dcm272OH.js → blobs-C9J2mVgo.js} +3 -3
- package/dist/src/{blobs-MlVQyoSc.js → blobs-kt8v3UyH.js} +3 -3
- package/dist/src/{cache-KNsTJLKP.cjs → cache-7xULbvt3.cjs} +35 -8
- package/dist/src/{cache-BMl3AIo5.js → cache-BLLayYEN.js} +24 -9
- package/dist/src/{cache-B0ptF7sO.js → cache-BUPcq0Ad.js} +3 -3
- package/dist/src/cache-CVfRb-HD.cjs +6 -0
- package/dist/src/{cache-DPztwHxt.js → cache-O4EuX2JV.js} +4 -4
- package/dist/src/{cache-DHm8Pc8l.js → cache-mIszOnuz.js} +5 -22
- package/dist/src/{chat-DOQIJx7O.cjs → chat-C4zqjObh.cjs} +10 -10
- package/dist/src/{chat-DDGU7Tik.js → chat-C8Ei4f87.js} +9 -9
- package/dist/src/{chat-BnzIuoTQ.js → chat-CgyGj2hC.js} +12 -12
- package/dist/src/{chat-BRMJOuCv.js → chat-Cpz3O-Xl.js} +14 -14
- package/dist/src/{chat-CVOyOBYk.js → chat-DIywASPG.js} +11 -11
- package/dist/src/{chat-CrnXMbvh.cjs → chat-Fl6TZJRS.cjs} +13 -13
- package/dist/src/{chat-D8GcWK9l.js → chat-XPN9YHhr.js} +2 -2
- package/dist/src/{chatkit-IWIuhxFE.js → chatkit-DEls11hE.js} +4 -4
- package/dist/src/{chatkit-DlOZ-eNb.js → chatkit-DIrJX8xk.js} +4 -4
- package/dist/src/{chatkit-CcktkleS.js → chatkit-Dpxrq4eD.js} +2 -2
- package/dist/src/{chatkit-yeVtURUi.cjs → chatkit-e25Ziu17.cjs} +4 -4
- package/dist/src/{claude-agent-sdk-BiMgSH9g.js → claude-agent-sdk-6-xTaLwM.js} +48 -19
- package/dist/src/{claude-agent-sdk-Yj9Hq0cH.cjs → claude-agent-sdk-BzxF6NIJ.cjs} +48 -19
- package/dist/src/{claude-agent-sdk-rVqlUIJl.js → claude-agent-sdk-CmkTnKGH.js} +49 -20
- package/dist/src/{claude-agent-sdk-yid1kGsL.js → claude-agent-sdk-rXCBLK_o.js} +40 -11
- package/dist/src/{cloud-ByJVRSQI.js → cloud-BMbRVJFw.js} +3 -3
- package/dist/src/cloud-CZ4hytdm.js +5 -0
- package/dist/src/{cloudflare-ai-DmJZcvVx.cjs → cloudflare-ai-BAQ0u_dg.cjs} +14 -14
- package/dist/src/{cloudflare-ai-CWKYPCCF.js → cloudflare-ai-CUg4BTcj.js} +13 -13
- package/dist/src/{cloudflare-ai-goDJ5bwr.js → cloudflare-ai-CobxMTR4.js} +15 -15
- package/dist/src/{cloudflare-ai-CdKv38f6.js → cloudflare-ai-Z9X219gp.js} +2 -2
- package/dist/src/{cloudflare-gateway-BW8hcdB0.cjs → cloudflare-gateway-C0sgfr_z.cjs} +15 -15
- package/dist/src/{cloudflare-gateway-CDCOS1Os.js → cloudflare-gateway-D2_yi-Fh.js} +16 -16
- package/dist/src/{cloudflare-gateway-Dz_HCMGY.js → cloudflare-gateway-Djf3F3_H.js} +2 -2
- package/dist/src/{cloudflare-gateway-gzmTsXcd.js → cloudflare-gateway-_itGuXry.js} +14 -14
- package/dist/src/{codex-sdk-BlvhxMr0.js → codex-sdk-BASDNkIl.js} +90 -34
- package/dist/src/{codex-sdk-BC5qbCjh.js → codex-sdk-dSnGdgIp.js} +92 -36
- package/dist/src/{codex-sdk-CBpm53B8.js → codex-sdk-ibXwdglL.js} +91 -35
- package/dist/src/{codex-sdk-Cf5_CQch.cjs → codex-sdk-wTEpMM_X.cjs} +92 -36
- package/dist/src/{cometapi-DoaO5SLq.js → cometapi-B01btbfb.js} +16 -16
- package/dist/src/{cometapi-BUKGSUCw.js → cometapi-ChAaRjg5.js} +14 -14
- package/dist/src/{cometapi-C6BSw9k3.js → cometapi-DHUAH6nK.js} +2 -2
- package/dist/src/{cometapi-B3TzpkNI.cjs → cometapi-JbvOJSCO.cjs} +15 -15
- package/dist/src/{completion-BLe7SUA4.js → completion-BBJ6zmG3.js} +6 -6
- package/dist/src/{completion-1RAiLcwi.js → completion-D9_MDlnd.js} +6 -6
- package/dist/src/{completion-D6BTVt6e.cjs → completion-DDyL3Cb2.cjs} +6 -6
- package/dist/src/{docker-CkOzlD1W.js → docker-BuButc4D.js} +15 -15
- package/dist/src/{docker-BWsSX8Pn.cjs → docker-Ckw-j7Rr.cjs} +14 -14
- package/dist/src/{docker-CMf5SGLq.js → docker-JAAubMw3.js} +13 -13
- package/dist/src/{docker-C0AzMsuf.js → docker-vnOg96gi.js} +2 -2
- package/dist/src/errors-DnGCbnx8.js +36 -0
- package/dist/src/{esm-gWVPXn3r.cjs → esm-BQkx5roy.cjs} +2 -2
- package/dist/src/{esm-Lh6czAFT.js → esm-CYhseqj4.js} +2 -2
- package/dist/src/{esm-D40XsWlR.js → esm-rDtG_2rg.js} +2 -2
- package/dist/src/eval-CKHWqG9f.js +16 -0
- package/dist/src/{eval-WaSg64w3.js → eval-CYrbG57o.js} +106 -29
- package/dist/src/{evalResult-CGrGU_cG.js → evalResult-6JaUIStC.js} +7 -7
- package/dist/src/{evalResult-C_UM63nn.js → evalResult-COsVttMA.js} +7 -7
- package/dist/src/evalResult-CxTP-LMm.cjs +11 -0
- package/dist/src/evalResult-CzLURDcP.js +13 -0
- package/dist/src/{evalResult-BU4huold.cjs → evalResult-DlRfu_Rq.cjs} +7 -7
- package/dist/src/evalResult-DyttNQ_G.js +11 -0
- package/dist/src/evaluator-0PvfeBYh.js +38 -0
- package/dist/src/{evaluator-gmF3Vddp.js → evaluator-3EJCMTs0.js} +75 -49
- package/dist/src/{extractor-D-RM2m6A.cjs → extractor-DbhlYEeo.cjs} +23 -6
- package/dist/src/{extractor-Da8IxCeS.js → extractor-Hs7la_19.js} +23 -6
- package/dist/src/{extractor-BA90EtLq.js → extractor-LSYjrhK0.js} +23 -6
- package/dist/src/{fetch-BIfcW-Fd.js → fetch-18MuNu9i.js} +21 -7
- package/dist/src/{fetch-CW9GabAi.js → fetch-Bi0o-fdp.js} +2 -2
- package/dist/src/fetch-CMptBDVg.cjs +4 -0
- package/dist/src/fetch-DAZkv3gV.js +6 -0
- package/dist/src/{fetch-Mb7SbeM-.js → fetch-SRsE6Ctl.js} +21 -7
- package/dist/src/{fetch-MvWxCS5U.cjs → fetch-ZMn_oemb.cjs} +21 -7
- package/dist/src/{fileExtensions-CzcPkiKZ.js → fileExtensions-BpuMmaFL.js} +1 -1
- package/dist/src/{formatDuration-CCiZ-wBY.js → formatDuration-Doo0xq-z.js} +1 -1
- package/dist/src/{genaiTracer-BQ8AmajD.cjs → genaiTracer-CQlpZkrp.cjs} +2 -2
- package/dist/src/{genaiTracer-foKtbVa2.js → genaiTracer-CqNnnXrE.js} +1 -1
- package/dist/src/graders-BCytzXrb.js +34 -0
- package/dist/src/{graders-CwtelA9k.js → graders-BaMCwIKp.js} +212 -91
- package/dist/src/graders-CGZQShfJ.cjs +33 -0
- package/dist/src/{graders-2dabIkXs.cjs → graders-DzUUnUjC.cjs} +212 -91
- package/dist/src/{graders-BscQ6O1Q.js → graders-QsALpIdy.js} +211 -90
- package/dist/src/graders-spkuVC-E.js +36 -0
- package/dist/src/{image-Dv0o-Ab7.js → image-BXt_7u0v.js} +4 -4
- package/dist/src/{image-B6TV9l0v.js → image-BiEVdpdP.js} +7 -7
- package/dist/src/{image-DS9hlgSc.cjs → image-C3BjJUAU.cjs} +7 -7
- package/dist/src/{image-_jKUeeh9.js → image-COCWy5dX.js} +2 -2
- package/dist/src/{image-DqElR73O.cjs → image-D10zEe1f.cjs} +4 -4
- package/dist/src/{image-BVEVjrwl.js → image-DB4sHxdJ.js} +5 -5
- package/dist/src/{image-C1i3NoZo.js → image-mhAGP07h.js} +7 -7
- package/dist/src/index.cjs +305 -112
- package/dist/src/index.d.cts +137 -3
- package/dist/src/index.d.ts +137 -3
- package/dist/src/index.js +304 -111
- package/dist/src/{interactiveCheck-CjK2R4rB.js → interactiveCheck-DU-MAhp5.js} +2 -2
- package/dist/src/{knowledgeBase-K5fJN2kC.cjs → knowledgeBase-Bnb00xKs.cjs} +8 -8
- package/dist/src/{knowledgeBase-D-xthvn0.js → knowledgeBase-CMvMlLZR.js} +9 -9
- package/dist/src/{knowledgeBase-DJZHeJqg.js → knowledgeBase-DotRBzUE.js} +2 -2
- package/dist/src/{knowledgeBase-BSs2ldSR.js → knowledgeBase-XJQ0Qyez.js} +8 -8
- package/dist/src/{litellm-kPhaZkzz.js → litellm-BECdjOTx.js} +3 -3
- package/dist/src/{litellm-uRLukY7s.cjs → litellm-BrnZhMcL.cjs} +15 -15
- package/dist/src/{litellm-DH_rVIOd.js → litellm-CHrRmPAe.js} +16 -16
- package/dist/src/{litellm-DoXGHlpD.js → litellm-CrLJrPIm.js} +14 -14
- package/dist/src/{logger-CMp-NS-e.cjs → logger-BdZ-IqBc.cjs} +5 -2
- package/dist/src/{logger-Bzi5o47S.js → logger-BotXmWKW.js} +5 -2
- package/dist/src/{logger-q5I8CByj.js → logger-w8Ozp0Td.js} +5 -2
- package/dist/src/{luma-ray-B3GiVpuq.js → luma-ray-BOeX-h0M.js} +10 -10
- package/dist/src/{luma-ray-CPISsLu-.js → luma-ray-C-w6EsJm.js} +2 -2
- package/dist/src/{luma-ray-BrWrKIIB.cjs → luma-ray-C0RkI3lt.cjs} +10 -10
- package/dist/src/{luma-ray-gyI_GEy2.js → luma-ray-DgKLS0BF.js} +11 -11
- package/dist/src/main.js +3260 -2537
- package/dist/src/{messages-CJgEjRQF.js → messages-CDZYGNlS.js} +13 -10
- package/dist/src/{messages-BMkwrfmt.js → messages-D61tPFQo.js} +14 -11
- package/dist/src/{messages-B5HsO_dM.cjs → messages-DXV3Qh8_.cjs} +14 -11
- package/dist/src/{meteor-CBciquOS.js → meteor-Dj8cTkU_.js} +1 -1
- package/dist/src/{meteor-0cLf9TIn.js → meteor-P2rUE-Uz.js} +1 -1
- package/dist/src/{meteor-BQ6Ws9k2.js → meteor-SLNTgmXm.js} +2 -2
- package/dist/src/{meteor-DRuEfzuQ.cjs → meteor-odmwVbyG.cjs} +1 -1
- package/dist/src/{nova-reel-B5SOonY2.js → nova-reel-Bk5npr2q.js} +11 -11
- package/dist/src/{nova-reel-CT9ZuhJ3.js → nova-reel-C2LFfVTf.js} +2 -2
- package/dist/src/{nova-reel-ByTx85ed.cjs → nova-reel-D9FXq3Mt.cjs} +10 -10
- package/dist/src/{nova-reel-ChM7WaHR.js → nova-reel-DtCjbD5O.js} +10 -10
- package/dist/src/{nova-sonic-D4M3kKYu.js → nova-sonic-BXRfQyF-.js} +8 -8
- package/dist/src/{nova-sonic-C-H3eDvL.cjs → nova-sonic-BoRSY_U6.cjs} +7 -7
- package/dist/src/{nova-sonic-DgifpOKF.js → nova-sonic-CgaWLDM1.js} +7 -7
- package/dist/src/{nova-sonic-BqP59oOu.js → nova-sonic-D_qERM-K.js} +2 -2
- package/dist/src/{openai-BjpdxIOG.cjs → openai-Bigwjgo1.cjs} +2 -2
- package/dist/src/{openai-Cv9pEKxp.js → openai-CT5fwbve.js} +2 -2
- package/dist/src/{openai-CitF-gEN.js → openai-Dz3surb_.js} +2 -2
- package/dist/src/openclaw-B6XY2kUf.js +526 -0
- package/dist/src/openclaw-CpPrXwf6.js +524 -0
- package/dist/src/openclaw-DDSfq5fp.cjs +528 -0
- package/dist/src/openclaw-dHLcXUWZ.js +511 -0
- package/dist/src/{opencode-sdk-Dakn4QMp.js → opencode-sdk-CImWVqy9.js} +7 -7
- package/dist/src/{opencode-sdk-BcLvVMV-.js → opencode-sdk-CuCztr4P.js} +6 -6
- package/dist/src/{opencode-sdk-z7KKOCdB.js → opencode-sdk-DhcfRbBH.js} +3 -3
- package/dist/src/{opencode-sdk-Cqszt4br.cjs → opencode-sdk-mqF-Oj3f.cjs} +6 -6
- package/dist/src/{otlpReceiver-BvmMgacx.cjs → otlpReceiver-B6Xo4KZM.cjs} +6 -6
- package/dist/src/{otlpReceiver-DRNetlJH.js → otlpReceiver-BO0rbDzh.js} +6 -6
- package/dist/src/{otlpReceiver-Bhj_vnzl.js → otlpReceiver-Dg817agV.js} +6 -6
- package/dist/src/{otlpReceiver-Cpnk-Hjf.js → otlpReceiver-DmRb0NBj.js} +2 -2
- package/dist/src/{providerRegistry-D32Lt9vp.js → providerRegistry-Xf0qdqGQ.js} +2 -2
- package/dist/src/{providerRegistry-HGQd2MF6.cjs → providerRegistry-lc7a7utN.cjs} +2 -2
- package/dist/src/{providerRegistry-BdKWcUa8.js → providerRegistry-wCWd7sKQ.js} +2 -2
- package/dist/src/providers-BMZZmPBJ.cjs +32 -0
- package/dist/src/{providers-BWoVY_Wz.cjs → providers-BNKVY53V.cjs} +294 -782
- package/dist/src/{providers-Co_FGgH8.js → providers-BiNq_Iyc.js} +281 -769
- package/dist/src/{providers-CUCHJCHD.js → providers-BlEhY5mi.js} +286 -779
- package/dist/src/providers-CQQrNaJk.js +32 -0
- package/dist/src/providers-Ck8HyrC-.js +34 -0
- package/dist/src/{pythonUtils-rOCm9w_5.cjs → pythonUtils-DZ6EbdY4.cjs} +3 -3
- package/dist/src/{pythonUtils-B9JA-gsC.js → pythonUtils-r1uBuA0n.js} +3 -3
- package/dist/src/{pythonUtils-eNq6Wsfr.js → pythonUtils-vMlk9Qp5.js} +3 -3
- package/dist/src/{quiverai-BpWtOEQZ.cjs → quiverai-BNfIwKCO.cjs} +13 -13
- package/dist/src/{quiverai-Cj-PUa3p.js → quiverai-BQigKdIH.js} +14 -14
- package/dist/src/{quiverai-BN8OVvDE.js → quiverai-Bfy2WnE2.js} +12 -12
- package/dist/src/{quiverai-D5MSsd2c.js → quiverai-CedIP0PJ.js} +2 -2
- package/dist/src/{render-D2710HbA.js → render-CAZvKKkB.js} +4 -4
- package/dist/src/responses-DLLjADw5.js +653 -0
- package/dist/src/responses-TsdODUpm.js +654 -0
- package/dist/src/responses-zOtKtnY_.cjs +671 -0
- package/dist/src/{rubyUtils-Dn6MGcsk.js → rubyUtils-BtjlqyXt.js} +3 -3
- package/dist/src/{rubyUtils-LBsk3zIm.js → rubyUtils-Cs35SDYa.js} +3 -3
- package/dist/src/rubyUtils-D7--T12C.js +6 -0
- package/dist/src/{rubyUtils-rnCVDgH-.cjs → rubyUtils-DCVaJ3mc.cjs} +3 -3
- package/dist/src/rubyUtils-DRRiMFV2.js +5 -0
- package/dist/src/rubyUtils-vb8OYFC-.cjs +5 -0
- package/dist/src/{sagemaker-C8MeZIkH.js → sagemaker-BcgLu0U4.js} +18 -18
- package/dist/src/{sagemaker-4ukMNSN0.js → sagemaker-CLdUAv5z.js} +17 -17
- package/dist/src/{sagemaker-DyVHy2BW.js → sagemaker-Du4LIR97.js} +2 -2
- package/dist/src/{sagemaker-DB3Eojau.cjs → sagemaker-DwNnEVYt.cjs} +17 -17
- package/dist/src/{scanner-C28XVEq2.js → scanner-Dyw21Wg_.js} +12 -12
- package/dist/src/server/index.js +1818 -605
- package/dist/src/server-BUbS0Qfh.js +6 -0
- package/dist/src/{server-BF3HkMhe.js → server-CbMTRQkg.js} +8 -6
- package/dist/src/{server-CSbLW-UI.cjs → server-CgUQ25qW.cjs} +8 -6
- package/dist/src/{server-mycZbUH8.js → server-DWmZLfCy.js} +10 -7
- package/dist/src/server-XpGXFHkS.cjs +6 -0
- package/dist/src/server-gfOx5Zrk.js +8 -0
- package/dist/src/{signal-DM_SPNmi.js → signal-Bl32q42d.js} +3 -3
- package/dist/src/{slack-Ed1yyt_j.js → slack-BfdBx2tO.js} +2 -2
- package/dist/src/{slack-D1F9Y7CH.cjs → slack-BtMkB6xP.cjs} +2 -2
- package/dist/src/{slack-DkAF58Tr.js → slack-DPqj42Ts.js} +2 -2
- package/dist/src/{slack-BK312SXM.js → slack-OZYxoVON.js} +2 -2
- package/dist/src/{store-CT_e5OZX.js → store-2ocbYY9D.js} +3 -3
- package/dist/src/store-5u2yriTV.js +7 -0
- package/dist/src/{store-BpumNYCl.cjs → store-BqwfFEyF.cjs} +3 -3
- package/dist/src/{store-BFLqwuc_.js → store-D4gdn9ih.js} +3 -3
- package/dist/src/store-D_lq_8oQ.js +6 -0
- package/dist/src/store-m5KT6Ly7.cjs +6 -0
- package/dist/src/{tables-Dansasnu.cjs → tables-B9E1kRp-.cjs} +3 -3
- package/dist/src/{tables-BjaApSAB.js → tables-C7TT2XVn.js} +3 -3
- package/dist/src/{tables-Dwexr5Z6.js → tables-D-NSwNIb.js} +3 -3
- package/dist/src/telemetry-5RHFoCJh.js +6 -0
- package/dist/src/{telemetry-MVkZQxt9.js → telemetry-BXyVqyAg.js} +5 -4
- package/dist/src/{telemetry-DFDFKdnr.cjs → telemetry-D0_yFdtU.cjs} +5 -4
- package/dist/src/{telemetry-Ds5Nn81l.js → telemetry-DZ_7PaVq.js} +5 -4
- package/dist/src/telemetry-Do8wMnA-.js +8 -0
- package/dist/src/telemetry-LojxPoFq.cjs +6 -0
- package/dist/src/{text-PYISqVm1.cjs → text-DF2hMKdg.cjs} +1 -1
- package/dist/src/{text-DuYSUYPB.js → text-DgMr_tiM.js} +1 -1
- package/dist/src/{text-Dx0GJOCN.js → text-Dm78AVGG.js} +1 -1
- package/dist/src/{tokenUsageUtils-BtZd3sP7.cjs → tokenUsageUtils-CXhxVj72.cjs} +9 -4
- package/dist/src/{tokenUsageUtils-DoinwgKF.js → tokenUsageUtils-DmZSD2eU.js} +9 -4
- package/dist/src/{tokenUsageUtils-cFdLMERB.js → tokenUsageUtils-FZd5O_4A.js} +9 -4
- package/dist/src/{transcription-Cp19m_Mt.js → transcription-C-M81iDA.js} +2 -2
- package/dist/src/{transcription-CLRpAg07.js → transcription-CYuY5sFO.js} +10 -10
- package/dist/src/{transcription-D8ifIKOv.js → transcription-Ch7S-LWw.js} +11 -11
- package/dist/src/{transcription-CXXFEVM_.cjs → transcription-FNIz3YOe.cjs} +10 -10
- package/dist/src/transform-8eGmaH-7.js +7 -0
- package/dist/src/transform-BRVvWaG4.cjs +6 -0
- package/dist/src/{transform-Bn-lgBE2.js → transform-CoP2bJ7P.js} +44 -5
- package/dist/src/{transform-BFPYuBaW.js → transform-CqTFr7KR.js} +5 -5
- package/dist/src/{transform-DvQWeBSR.js → transform-D8dILpfZ.js} +4 -4
- package/dist/src/{transform-BUrxadlA.js → transform-DMaxQwDx.js} +44 -5
- package/dist/src/transform-GybT0X0u.js +8 -0
- package/dist/src/{transform-DshYLyBq.cjs → transform-Kd6u-oNm.cjs} +4 -4
- package/dist/src/{transform-Bw1IstDE.cjs → transform-ivxEY4f7.cjs} +55 -4
- package/dist/src/{transformersAvailability-BaoWHpu1.cjs → transformersAvailability-Bkep3ka7.cjs} +1 -1
- package/dist/src/{transformersAvailability-Dhh45n5P.js → transformersAvailability-DEU2naS1.js} +1 -1
- package/dist/src/{transformersAvailability-DtpwoeFC.js → transformersAvailability-DkAWaK5B.js} +1 -1
- package/dist/src/{transformersAvailability-O2YaCv9Z.js → transformersAvailability-DwmezkVe.js} +1 -1
- package/dist/src/{types-B-XUqfNs.cjs → types-BIfttHrT.cjs} +22 -2
- package/dist/src/{types-BsU_PxR3.js → types-DMVjYLpx.js} +16 -2
- package/dist/src/{types-CgrxBFgm.js → types-t52w-XsS.js} +19 -3
- package/dist/src/{util-M2MoNCUR.js → util-BSh4a_Q8.js} +7 -7
- package/dist/src/{util-DTJWKLkl.cjs → util-C08Kns6-.cjs} +18 -4
- package/dist/src/{util-DHBpsbZE.js → util-CUEt0Vum.js} +18 -4
- package/dist/src/{util-Bnw6EyZN.js → util-Cl0zfT3V.js} +18 -4
- package/dist/src/{util-C7A-PlKK.cjs → util-DUYOvxAy.cjs} +6 -6
- package/dist/src/{util-WYC3rB_p.js → util-DiCePfDu.js} +6 -6
- package/dist/src/{util-CV99ps44.cjs → util-DkFTvieG.cjs} +17 -5
- package/dist/src/{util-CfepsNVK.js → util-mJ58qbbw.js} +17 -5
- package/dist/src/{util-BZaMVBMq.js → util-vjscpUzy.js} +17 -5
- package/dist/src/{utils-BryuD3vq.js → utils-CVzb4YiI.js} +3 -3
- package/dist/src/{utils-Dezi3MBH.cjs → utils-DFaZa6Rf.cjs} +3 -3
- package/dist/src/{utils-GiyI2K4P.js → utils-JaY9veb5.js} +3 -3
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +75 -77
- package/dist/src/app/assets/index-07dbAcWK.js +0 -427
- package/dist/src/app/assets/index-DnEcCQgk.css +0 -1
- package/dist/src/app/assets/vendor-react-DHoaNLma.js +0 -4
- package/dist/src/cache-BujOdYlc.cjs +0 -6
- package/dist/src/cloud-BHYz0XkH.js +0 -5
- package/dist/src/eval-0IFf8hON.js +0 -16
- package/dist/src/evalResult-BCCv6FXj.js +0 -13
- package/dist/src/evalResult-C8iVgVvr.js +0 -11
- package/dist/src/evalResult-CqFeWKYx.cjs +0 -11
- package/dist/src/evaluator-DwS5NAEj.js +0 -37
- package/dist/src/fetch-BH9KSaUC.cjs +0 -4
- package/dist/src/fetch-BL6kqunF.js +0 -6
- package/dist/src/graders-BK3LuhJ8.cjs +0 -32
- package/dist/src/graders-Bpjtip-E.js +0 -35
- package/dist/src/graders-KIf6Uej3.js +0 -33
- package/dist/src/providers-41mSodR_.js +0 -31
- package/dist/src/providers-CiZCpIxz.cjs +0 -31
- package/dist/src/providers-G531909f.js +0 -33
- package/dist/src/rubyUtils-BLd6EE1u.cjs +0 -5
- package/dist/src/rubyUtils-CVELPvUH.js +0 -6
- package/dist/src/rubyUtils-_t9Gmf7U.js +0 -5
- package/dist/src/server-Bc4T4XDt.cjs +0 -6
- package/dist/src/server-CMyxJ7ct.js +0 -6
- package/dist/src/server-D9FuxNYE.js +0 -8
- package/dist/src/store-CrMinjmN.cjs +0 -6
- package/dist/src/store-EaWZOoxz.js +0 -7
- package/dist/src/store-KXWnHB15.js +0 -6
- package/dist/src/telemetry-BXD2mCyr.cjs +0 -6
- package/dist/src/telemetry-CYiBm56v.js +0 -8
- package/dist/src/telemetry-lICYKIMB.js +0 -6
- package/dist/src/transform-DZI2t8-9.js +0 -8
- package/dist/src/transform-DZUV3qpO.js +0 -7
- package/dist/src/transform-XcotjzZX.cjs +0 -6
package/dist/src/index.js
CHANGED
|
@@ -1,39 +1,40 @@
|
|
|
1
1
|
import "./chunk-FhC4c-0y.js";
|
|
2
|
-
import { C as isCI, S as getMaxEvalTimeMs, _ as getEnvBool, a as setLogCallback, b as getEnvString, d as getAjv, h as summarizeEvaluateResultForLogging, i as logger_default, m as safeJsonStringify, n as isDebugEnabled, o as setLogLevel, p as orderKeys, t as getLogLevel, u as extractJsonObjects, v as getEnvFloat, w as cliState_default, x as getEvalTimeoutMs, y as getEnvInt } from "./logger-
|
|
2
|
+
import { C as isCI, S as getMaxEvalTimeMs, _ as getEnvBool, a as setLogCallback, b as getEnvString, d as getAjv, h as summarizeEvaluateResultForLogging, i as logger_default, m as safeJsonStringify, n as isDebugEnabled, o as setLogLevel, p as orderKeys, t as getLogLevel, u as extractJsonObjects, v as getEnvFloat, w as cliState_default, x as getEvalTimeoutMs, y as getEnvInt } from "./logger-w8Ozp0Td.js";
|
|
3
3
|
import { t as invariant } from "./invariant-1pAf2CD1.js";
|
|
4
|
-
import { r as importModule, t as getDirectory } from "./esm-
|
|
5
|
-
import { r as runPython } from "./pythonUtils-
|
|
4
|
+
import { r as importModule, t as getDirectory } from "./esm-rDtG_2rg.js";
|
|
5
|
+
import { r as runPython } from "./pythonUtils-vMlk9Qp5.js";
|
|
6
6
|
import { i as isJavascriptFile } from "./fileExtensions-ePDqouxn.js";
|
|
7
|
-
import { n as transform, r as getProcessShim, t as TransformInputType } from "./transform-
|
|
8
|
-
import { $ as matchesSearchRubric, A as BeavertailsPlugin, B as getAndCheckProvider, C as HarmbenchPlugin, D as DebugAccessPlugin, E as DivergentRepetitionPlugin, F as retryWithDeduplication, G as matchesContextFaithfulness, H as matchesAnswerRelevance, I as sampleArray, J as matchesFactuality, K as matchesContextRecall, L as fetchHuggingFaceDataset, M as RedteamGraderBase, N as RedteamPluginBase, O as CrossSessionLeakPlugin, P as getCustomPolicies, Q as matchesPiScore, R as callProviderWithContext, S as ImitationPlugin, T as ExcessiveAgencyPlugin, U as matchesClassification, V as loadRubricPrompt, W as matchesClosedQa, X as matchesLlmRubric, Y as matchesGEval, Z as matchesModeration, _ as makeInlinePolicyIdSync, a as UnverifiableClaimsPlugin, at as processPrompts, b as OverreliancePlugin, c as ToolDiscoveryPlugin, ct as SUGGEST_PROMPTS_SYSTEM_MESSAGE, d as RbacPlugin, dt as loadFromJavaScriptFile, et as matchesSelectBest, f as PromptExtractionPlugin, ft as processFileReference, g as isValidPolicyObject, h as determinePolicyTypeFromId, i as VLGuardPlugin, it as DefaultSuggestionsProvider, j as AegisPlugin, k as ContractPlugin, l as SqlInjectionPlugin, lt as coerceString, m as PolicyPlugin, n as getGraderById, nt as selectMaxScore, o as UnsafeBenchPlugin, ot as readPrompts, p as PoliticsPlugin, pt as resolveContext, q as matchesContextRelevance, r as VLSUPlugin, rt as getDefaultProviders, s as ToxicChatPlugin, st as readProviderPromptMap, t as GRADERS, tt as matchesSimilarity, u as ShellInjectionPlugin, ut as getFinalTest, v as PlinyPlugin, w as HallucinationPlugin, x as IntentPlugin, y as getPiiLeakTestsForCategory, z as fail } from "./graders-
|
|
9
|
-
import {
|
|
10
|
-
import { A as isAnthropicProvider, C as deduplicateTestCases, D as resultIsForTestCase, E as getTestCaseDeduplicationKey, M as isOpenAiProvider, N as isProviderAllowed, O as doesProviderRefMatch, S as setupEnv, T as filterRuntimeVars, b as loadFunction, c as maybeLoadFromExternalFile, d as maybeLoadToolsFromExternalFile, h as renderEnvOnlyInObject, i as fetchCsvFromGoogleSheet, j as isGoogleProvider, k as getProviderDescription, m as readOutput, n as writeMultipleOutputs, p as readFilters, r as writeOutput, s as maybeLoadConfigFromExternalFile, t as printBorder, v as extractVariablesFromTemplates, w as extractRuntimeVars, x as parseFileUrl, y as getNunjucksEngine } from "./util-
|
|
11
|
-
import { F as getShareViewBaseUrl, L as VERSION, M as TERMINAL_MAX_WIDTH, N as getDefaultShareViewBaseUrl, O as DEFAULT_MAX_CONCURRENCY, P as getShareApiBaseUrl, R as FILE_METADATA_KEY, S as parseChatPrompt, c as cloudConfig, f as getCurrentTimestamp, h as REQUEST_TIMEOUT_MS, k as DEFAULT_QUERY_LIMIT, n as fetchWithRetries, p as sleep, r as fetchWithTimeout, s as CloudConfig, t as fetchWithProxy, y as isPromptfooSampleTarget, z as HUMAN_ASSERTION_TYPE } from "./fetch-
|
|
12
|
-
import { a as fetchWithCache, o as getCache, r as disableCache, t as cache_exports } from "./cache-
|
|
13
|
-
import { A as createRateLimitRegistry, B as
|
|
14
|
-
import { i as generateIdFromPrompt, t as hashPrompt } from "./utils-
|
|
15
|
-
import { a as sha256, i as randomSequence, n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-
|
|
7
|
+
import { n as transform, r as getProcessShim, t as TransformInputType } from "./transform-D8dILpfZ.js";
|
|
8
|
+
import { $ as matchesSearchRubric, A as BeavertailsPlugin, B as getAndCheckProvider, C as HarmbenchPlugin, D as DebugAccessPlugin, E as DivergentRepetitionPlugin, F as retryWithDeduplication, G as matchesContextFaithfulness, H as matchesAnswerRelevance, I as sampleArray, J as matchesFactuality, K as matchesContextRecall, L as fetchHuggingFaceDataset, M as RedteamGraderBase, N as RedteamPluginBase, O as CrossSessionLeakPlugin, P as getCustomPolicies, Q as matchesPiScore, R as callProviderWithContext, S as ImitationPlugin, T as ExcessiveAgencyPlugin, U as matchesClassification, V as loadRubricPrompt, W as matchesClosedQa, X as matchesLlmRubric, Y as matchesGEval, Z as matchesModeration, _ as makeInlinePolicyIdSync, a as UnverifiableClaimsPlugin, at as processPrompts, b as OverreliancePlugin, c as ToolDiscoveryPlugin, ct as SUGGEST_PROMPTS_SYSTEM_MESSAGE, d as RbacPlugin, dt as loadFromJavaScriptFile, et as matchesSelectBest, f as PromptExtractionPlugin, ft as processFileReference, g as isValidPolicyObject, h as determinePolicyTypeFromId, i as VLGuardPlugin, it as DefaultSuggestionsProvider, j as AegisPlugin, k as ContractPlugin, l as SqlInjectionPlugin, lt as coerceString, m as PolicyPlugin, n as getGraderById, nt as selectMaxScore, o as UnsafeBenchPlugin, ot as readPrompts, p as PoliticsPlugin, pt as resolveContext, q as matchesContextRelevance, r as VLSUPlugin, rt as getDefaultProviders, s as ToxicChatPlugin, st as readProviderPromptMap, t as GRADERS, tt as matchesSimilarity, u as ShellInjectionPlugin, ut as getFinalTest, v as PlinyPlugin, w as HallucinationPlugin, x as IntentPlugin, y as getPiiLeakTestsForCategory, z as fail } from "./graders-QsALpIdy.js";
|
|
9
|
+
import { A as isApiProvider, C as TestGeneratorConfigSchema, Ct as BaseTokenUsageSchema, D as VarsSchema, E as UnifiedConfigSchema, F as ConversationMessageSchema, I as PartialGenerationError, J as getDefaultNFanout, K as STRATEGY_COLLECTIONS, L as PluginConfigSchema, M as RedteamConfigSchema, O as isGradingResult, P as ProvidersSchema, Q as categoryAliases, R as PolicyObjectSchema, S as TestCasesWithMetadataSchema, St as PromptSchema, T as TestSuiteSchema, Tt as InputsSchema, V as isUuid, W as DEFAULT_STRATEGIES, X as isFanoutStrategy, Z as Severity, _ as ScenarioSchema, _t as REDTEAM_PROVIDER_HARM_PLUGINS, a as AtomicTestCaseSchema, at as FINANCIAL_PLUGINS, b as TestCaseWithVarsFileSchema, bt as TELECOM_PLUGINS, c as CompletedPromptSchema, ct as INSURANCE_PLUGINS, d as EvaluateOptionsSchema, dt as MEDICAL_PLUGINS, et as riskCategorySeverityMap, f as GradingConfigSchema, ft as MULTI_INPUT_EXCLUDED_PLUGINS, g as ResultFailureReason, gt as PLUGIN_CATEGORIES, h as OutputFileExtension, ht as PII_PLUGINS, i as AssertionTypeSchema, it as DEFAULT_PLUGINS, j as isProviderOptions, k as isResultFailureReason, l as DerivedMetricSchema, lt as LLAMA_GUARD_ENABLED_CATEGORIES, m as OutputConfigSchema, mt as PHARMACY_PLUGINS, n as AssertionSchema, nt as BIAS_PLUGINS, o as BaseAssertionTypesSchema, ot as FOUNDATION_PLUGINS, p as NotPrefixedAssertionTypesSchema, pt as MULTI_INPUT_VAR, q as STRATEGY_COLLECTION_MAPPINGS, r as AssertionSetSchema, rt as DATASET_EXEMPT_PLUGINS, s as CommandLineOptionsSchema, st as HARM_PLUGINS, t as AssertionOrSetSchema, tt as ALIASED_PLUGIN_MAPPINGS, u as EvalResultsFilterMode, ut as LLAMA_GUARD_REPLICATE_PROVIDER, v as SpecialAssertionTypesSchema, vt as REMOTE_ONLY_PLUGIN_IDS, w as TestSuiteConfigSchema, wt as CompletionTokenDetailsSchema, x as TestCasesWithMetadataPromptSchema, xt as UNALIGNED_PROVIDER_HARM_PLUGINS, y as TestCaseSchema, z as StrategyConfigSchema } from "./types-DMVjYLpx.js";
|
|
10
|
+
import { A as isAnthropicProvider, C as deduplicateTestCases, D as resultIsForTestCase, E as getTestCaseDeduplicationKey, M as isOpenAiProvider, N as isProviderAllowed, O as doesProviderRefMatch, S as setupEnv, T as filterRuntimeVars, b as loadFunction, c as maybeLoadFromExternalFile, d as maybeLoadToolsFromExternalFile, h as renderEnvOnlyInObject, i as fetchCsvFromGoogleSheet, j as isGoogleProvider, k as getProviderDescription, m as readOutput, n as writeMultipleOutputs, p as readFilters, r as writeOutput, s as maybeLoadConfigFromExternalFile, t as printBorder, v as extractVariablesFromTemplates, w as extractRuntimeVars, x as parseFileUrl, y as getNunjucksEngine } from "./util-DiCePfDu.js";
|
|
11
|
+
import { F as getShareViewBaseUrl, L as VERSION, M as TERMINAL_MAX_WIDTH, N as getDefaultShareViewBaseUrl, O as DEFAULT_MAX_CONCURRENCY, P as getShareApiBaseUrl, R as FILE_METADATA_KEY, S as parseChatPrompt, c as cloudConfig, f as getCurrentTimestamp, h as REQUEST_TIMEOUT_MS, k as DEFAULT_QUERY_LIMIT, n as fetchWithRetries, p as sleep, r as fetchWithTimeout, s as CloudConfig, t as fetchWithProxy, y as isPromptfooSampleTarget, z as HUMAN_ASSERTION_TYPE } from "./fetch-18MuNu9i.js";
|
|
12
|
+
import { a as fetchWithCache, c as NON_TRANSIENT_HTTP_STATUSES, l as isNonTransientHttpStatus, o as getCache, r as disableCache, t as cache_exports } from "./cache-BLLayYEN.js";
|
|
13
|
+
import { A as createRateLimitRegistry, B as isCloudProvider, C as collectFileMetadata, D as loadFromPackage, E as isPackagePath, F as getCloudDatabaseId, I as getEvalConfigFromCloud, J as AIStudioChatProvider, L as getOrgContext, M as PromptfooHarmfulCompletionProvider, O as redteamProviderManager, P as checkCloudPermissions, R as getPluginSeverityOverridesFromCloud, T as runExtensionHook, V as resolveTeamId, _ as extractVariablesFromJson, a as resolveProviderConfigs, b as isBasicRefusal, c as Strategies, d as pluginMatchesStrategyTargets, f as checkExfilTracking, g as extractPromptFromTags, i as resolveProvider, j as createProviderRateLimitOptions, k as TokenUsageTracker, l as loadStrategy, m as extractGoalFromPrompt, n as loadApiProvider, o as MCPProvider, q as VertexChatProvider, r as loadApiProviders, s as GoogleLiveProvider, t as getProviderIds, u as validateStrategies, v as getSessionId, w as renderPrompt, y as getShortPluginId } from "./providers-BiNq_Iyc.js";
|
|
14
|
+
import { i as generateIdFromPrompt, t as hashPrompt } from "./utils-CVzb4YiI.js";
|
|
15
|
+
import { a as sha256, i as randomSequence, n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-Hs7la_19.js";
|
|
16
16
|
import "./genaiTracer-Ce19n68P.js";
|
|
17
|
-
import { t as OpenAiChatCompletionProvider } from "./chat-
|
|
18
|
-
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-
|
|
19
|
-
import {
|
|
20
|
-
import "./messages-
|
|
21
|
-
import "./util-
|
|
22
|
-
import "./
|
|
23
|
-
import
|
|
24
|
-
import "./
|
|
25
|
-
import
|
|
26
|
-
import {
|
|
27
|
-
import {
|
|
28
|
-
import {
|
|
29
|
-
import {
|
|
30
|
-
import {
|
|
31
|
-
import {
|
|
32
|
-
import "./
|
|
33
|
-
import "./
|
|
34
|
-
import
|
|
35
|
-
import {
|
|
36
|
-
import {
|
|
17
|
+
import { t as OpenAiChatCompletionProvider } from "./chat-C8Ei4f87.js";
|
|
18
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-FZd5O_4A.js";
|
|
19
|
+
import { m as validateFunctionCall } from "./transform-DMaxQwDx.js";
|
|
20
|
+
import "./messages-CDZYGNlS.js";
|
|
21
|
+
import "./util-mJ58qbbw.js";
|
|
22
|
+
import "./responses-DLLjADw5.js";
|
|
23
|
+
import "./openai-Dz3surb_.js";
|
|
24
|
+
import { l as validateFunctionCall$1 } from "./util-Cl0zfT3V.js";
|
|
25
|
+
import "./completion-D9_MDlnd.js";
|
|
26
|
+
import { c as setUserEmail, i as getUserEmail, l as EMAIL_OK_STATUS, r as getAuthor, s as promptForEmailUnverified, t as checkEmailStatusAndMaybeExit } from "./accounts-R3gfCR_g.js";
|
|
27
|
+
import { i as getRemoteGenerationUrl, l as shouldGenerateRemote, o as getRemoteHealthUrl, r as promptYesNo, s as neverGenerateRemote } from "./server-CbMTRQkg.js";
|
|
28
|
+
import { t as getBlobByHash } from "./blobs-C9J2mVgo.js";
|
|
29
|
+
import { a as evalsTable, c as evalsToTagsTable, d as tagsTable, i as evalResultsTable, l as promptsTable, m as getDbSignalPath, o as evalsToDatasetsTable, p as getDb, r as datasetsTable, s as evalsToPromptsTable } from "./tables-D-NSwNIb.js";
|
|
30
|
+
import { n as telemetry_default } from "./telemetry-DZ_7PaVq.js";
|
|
31
|
+
import { t as ellipsize } from "./text-Dm78AVGG.js";
|
|
32
|
+
import { n as getTraceStore } from "./store-2ocbYY9D.js";
|
|
33
|
+
import "./base-Dtp8b4_N.js";
|
|
34
|
+
import "./image-BiEVdpdP.js";
|
|
35
|
+
import { t as providerRegistry } from "./providerRegistry-wCWd7sKQ.js";
|
|
36
|
+
import { n as runRuby } from "./rubyUtils-Cs35SDYa.js";
|
|
37
|
+
import { t as EvalResult } from "./evalResult-COsVttMA.js";
|
|
37
38
|
import * as fs$1 from "fs";
|
|
38
39
|
import fs, { createWriteStream } from "fs";
|
|
39
40
|
import * as path$2 from "path";
|
|
@@ -291,7 +292,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
291
292
|
telemetry_default.record("feature_used", { feature: "tracing" });
|
|
292
293
|
try {
|
|
293
294
|
logger_default.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
294
|
-
const { startOTLPReceiver } = await import("./otlpReceiver-
|
|
295
|
+
const { startOTLPReceiver } = await import("./otlpReceiver-BO0rbDzh.js");
|
|
295
296
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
296
297
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
297
298
|
logger_default.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
|
|
@@ -314,7 +315,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
314
315
|
async function stopOtlpReceiverIfNeeded() {
|
|
315
316
|
if (otlpReceiverStarted) try {
|
|
316
317
|
logger_default.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
317
|
-
const { stopOTLPReceiver } = await import("./otlpReceiver-
|
|
318
|
+
const { stopOTLPReceiver } = await import("./otlpReceiver-BO0rbDzh.js");
|
|
318
319
|
await stopOTLPReceiver();
|
|
319
320
|
otlpReceiverStarted = false;
|
|
320
321
|
logger_default.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -349,7 +350,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
349
350
|
}
|
|
350
351
|
if (!tracingEnabled) return null;
|
|
351
352
|
logger_default.debug("[EvaluatorTracing] Importing trace store");
|
|
352
|
-
const { getTraceStore } = await import("./store-
|
|
353
|
+
const { getTraceStore } = await import("./store-D_lq_8oQ.js");
|
|
353
354
|
const traceStore = getTraceStore();
|
|
354
355
|
const traceId = generateTraceId();
|
|
355
356
|
const spanId = generateSpanId();
|
|
@@ -2760,7 +2761,7 @@ const ASSERTION_HANDLERS = {
|
|
|
2760
2761
|
"llm-rubric": handleLlmRubric,
|
|
2761
2762
|
meteor: async (params) => {
|
|
2762
2763
|
try {
|
|
2763
|
-
const { handleMeteorAssertion } = await import("./meteor-
|
|
2764
|
+
const { handleMeteorAssertion } = await import("./meteor-Dj8cTkU_.js");
|
|
2764
2765
|
return handleMeteorAssertion(params);
|
|
2765
2766
|
} catch (error) {
|
|
2766
2767
|
if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
|
|
@@ -2870,9 +2871,9 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
2870
2871
|
let filePath = fileRef;
|
|
2871
2872
|
let functionName;
|
|
2872
2873
|
if (fileRef.includes(":")) {
|
|
2873
|
-
const
|
|
2874
|
-
filePath =
|
|
2875
|
-
functionName =
|
|
2874
|
+
const colonIndex = fileRef.indexOf(":");
|
|
2875
|
+
filePath = fileRef.slice(0, colonIndex);
|
|
2876
|
+
functionName = fileRef.slice(colonIndex + 1);
|
|
2876
2877
|
}
|
|
2877
2878
|
filePath = path.resolve(basePath, filePath);
|
|
2878
2879
|
if (isJavascriptFile(filePath)) {
|
|
@@ -2890,7 +2891,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
2890
2891
|
};
|
|
2891
2892
|
}
|
|
2892
2893
|
else if (filePath.endsWith(".rb")) try {
|
|
2893
|
-
const { runRuby } = await import("./rubyUtils-
|
|
2894
|
+
const { runRuby } = await import("./rubyUtils-DRRiMFV2.js");
|
|
2894
2895
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
2895
2896
|
logger_default.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
2896
2897
|
} catch (error) {
|
|
@@ -3898,7 +3899,7 @@ async function runEval({ provider, prompt, test, testSuite, delay, nunjucksFilte
|
|
|
3898
3899
|
promptIdx,
|
|
3899
3900
|
testIdx
|
|
3900
3901
|
});
|
|
3901
|
-
logger_default.error("Provider call failed during eval", logContext);
|
|
3902
|
+
if (!(err instanceof Error && err.name === "AbortError")) logger_default.error("Provider call failed during eval", logContext);
|
|
3902
3903
|
return [{
|
|
3903
3904
|
...setup,
|
|
3904
3905
|
error: errorWithStack,
|
|
@@ -4079,11 +4080,17 @@ var Evaluator = class {
|
|
|
4079
4080
|
let globalTimeout;
|
|
4080
4081
|
let globalAbortController;
|
|
4081
4082
|
const processedIndices = /* @__PURE__ */ new Set();
|
|
4083
|
+
let targetUnavailable = false;
|
|
4084
|
+
let targetErrorStatus;
|
|
4085
|
+
const targetErrorAbortController = new AbortController();
|
|
4082
4086
|
let ciProgressReporter = null;
|
|
4083
4087
|
let progressBarManager = null;
|
|
4088
|
+
let providerAbortSignal = options.abortSignal;
|
|
4089
|
+
let combinedAbortSignal = options.abortSignal ? AbortSignal.any([options.abortSignal, targetErrorAbortController.signal]) : targetErrorAbortController.signal;
|
|
4084
4090
|
if (maxEvalTimeMs > 0) {
|
|
4085
4091
|
globalAbortController = new AbortController();
|
|
4086
|
-
|
|
4092
|
+
providerAbortSignal = providerAbortSignal ? AbortSignal.any([providerAbortSignal, globalAbortController.signal]) : globalAbortController.signal;
|
|
4093
|
+
combinedAbortSignal = AbortSignal.any([combinedAbortSignal, globalAbortController.signal]);
|
|
4087
4094
|
globalTimeout = setTimeout(() => {
|
|
4088
4095
|
evalTimedOut = true;
|
|
4089
4096
|
globalAbortController?.abort();
|
|
@@ -4091,7 +4098,7 @@ var Evaluator = class {
|
|
|
4091
4098
|
}
|
|
4092
4099
|
const vars = /* @__PURE__ */ new Set();
|
|
4093
4100
|
const checkAbort = () => {
|
|
4094
|
-
if (
|
|
4101
|
+
if (combinedAbortSignal.aborted) throw new Error("Operation cancelled");
|
|
4095
4102
|
};
|
|
4096
4103
|
if (!options.silent) logger_default.info(`Starting evaluation ${this.evalRecord.id}`);
|
|
4097
4104
|
checkAbort();
|
|
@@ -4252,7 +4259,7 @@ var Evaluator = class {
|
|
|
4252
4259
|
const defaultProvider = testSuite.defaultTest.provider;
|
|
4253
4260
|
if (isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
|
|
4254
4261
|
else if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
4255
|
-
const { loadApiProvider } = await import("./providers-
|
|
4262
|
+
const { loadApiProvider } = await import("./providers-CQQrNaJk.js");
|
|
4256
4263
|
testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
4257
4264
|
} else testCase.provider = defaultProvider;
|
|
4258
4265
|
}
|
|
@@ -4322,7 +4329,7 @@ var Evaluator = class {
|
|
|
4322
4329
|
registers: this.registers,
|
|
4323
4330
|
isRedteam: testSuite.redteam != null,
|
|
4324
4331
|
concurrency,
|
|
4325
|
-
abortSignal:
|
|
4332
|
+
abortSignal: providerAbortSignal,
|
|
4326
4333
|
evalId: this.evalRecord.id,
|
|
4327
4334
|
rateLimitRegistry: this.rateLimitRegistry
|
|
4328
4335
|
});
|
|
@@ -4336,7 +4343,7 @@ var Evaluator = class {
|
|
|
4336
4343
|
if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
|
|
4337
4344
|
}
|
|
4338
4345
|
if (cliState_default.resume && this.evalRecord.persisted) try {
|
|
4339
|
-
const { default: EvalResult } = await import("./evalResult-
|
|
4346
|
+
const { default: EvalResult } = await import("./evalResult-DyttNQ_G.js");
|
|
4340
4347
|
const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: cliState_default.retryMode });
|
|
4341
4348
|
const originalCount = runEvalOptions.length;
|
|
4342
4349
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -4389,6 +4396,14 @@ var Evaluator = class {
|
|
|
4389
4396
|
logger_default.error(`Error saving result: ${error} ${safeJsonStringify(resultSummary)}`);
|
|
4390
4397
|
}
|
|
4391
4398
|
for (const writer of this.fileWriters) await writer.write(row);
|
|
4399
|
+
const httpStatus = row.response?.metadata?.http?.status;
|
|
4400
|
+
if (typeof httpStatus === "number" && isNonTransientHttpStatus(httpStatus)) {
|
|
4401
|
+
targetUnavailable = true;
|
|
4402
|
+
targetErrorStatus = httpStatus;
|
|
4403
|
+
logger_default.error(`Target returned HTTP ${httpStatus}. Aborting scan - this error will not resolve on retry.`);
|
|
4404
|
+
targetErrorAbortController.abort();
|
|
4405
|
+
break;
|
|
4406
|
+
}
|
|
4392
4407
|
const { promptIdx } = row;
|
|
4393
4408
|
const metrics = prompts[promptIdx].metrics;
|
|
4394
4409
|
invariant(metrics, "Expected prompt.metrics to be set");
|
|
@@ -4552,6 +4567,7 @@ var Evaluator = class {
|
|
|
4552
4567
|
if (this.options.showProgressBar && progressBarManager) await progressBarManager.initialize(runEvalOptions, concurrency, 0);
|
|
4553
4568
|
try {
|
|
4554
4569
|
if (serialRunEvalOptions.length > 0) for (const evalStep of serialRunEvalOptions) {
|
|
4570
|
+
checkAbort();
|
|
4555
4571
|
if (isWebUI) {
|
|
4556
4572
|
const provider = evalStep.provider.label || evalStep.provider.id();
|
|
4557
4573
|
const vars = formatVarsForDisplay(evalStep.test.vars || {}, 50);
|
|
@@ -4569,22 +4585,32 @@ var Evaluator = class {
|
|
|
4569
4585
|
await this.evalRecord.addPrompts(prompts);
|
|
4570
4586
|
});
|
|
4571
4587
|
} catch (err) {
|
|
4572
|
-
if (
|
|
4573
|
-
|
|
4574
|
-
|
|
4575
|
-
|
|
4576
|
-
|
|
4577
|
-
|
|
4578
|
-
|
|
4579
|
-
|
|
4580
|
-
|
|
4581
|
-
|
|
4582
|
-
|
|
4583
|
-
|
|
4588
|
+
if (combinedAbortSignal.aborted) {
|
|
4589
|
+
if (evalTimedOut) logger_default.warn(`Evaluation stopped after reaching max duration (${maxEvalTimeMs}ms)`);
|
|
4590
|
+
else if (!targetUnavailable) {
|
|
4591
|
+
logger_default.info("Evaluation interrupted, saving progress...");
|
|
4592
|
+
if (globalTimeout) clearTimeout(globalTimeout);
|
|
4593
|
+
if (progressBarManager) progressBarManager.stop();
|
|
4594
|
+
if (ciProgressReporter) ciProgressReporter.finish();
|
|
4595
|
+
this.evalRecord.setVars(Array.from(vars));
|
|
4596
|
+
await this.evalRecord.addPrompts(prompts);
|
|
4597
|
+
updateSignalFile(this.evalRecord.id);
|
|
4598
|
+
return this.evalRecord;
|
|
4599
|
+
}
|
|
4600
|
+
} else {
|
|
4584
4601
|
if (ciProgressReporter) ciProgressReporter.error(`Evaluation failed: ${String(err)}`);
|
|
4585
4602
|
throw err;
|
|
4586
4603
|
}
|
|
4587
4604
|
}
|
|
4605
|
+
if (targetUnavailable) {
|
|
4606
|
+
if (globalTimeout) clearTimeout(globalTimeout);
|
|
4607
|
+
if (progressBarManager) progressBarManager.stop();
|
|
4608
|
+
if (ciProgressReporter) ciProgressReporter.error(`Target unavailable (HTTP ${targetErrorStatus})`);
|
|
4609
|
+
this.evalRecord.setVars(Array.from(vars));
|
|
4610
|
+
await this.evalRecord.addPrompts(prompts);
|
|
4611
|
+
updateSignalFile(this.evalRecord.id);
|
|
4612
|
+
return this.evalRecord;
|
|
4613
|
+
}
|
|
4588
4614
|
const compareRowsCount = rowsWithSelectBestAssertion.size + rowsWithMaxScoreAssertion.size;
|
|
4589
4615
|
if (progressBarManager) {
|
|
4590
4616
|
if (compareRowsCount > 0) progressBarManager.updateTotalCount(compareRowsCount);
|
|
@@ -5295,10 +5321,17 @@ function convertResultsToTable(eval_) {
|
|
|
5295
5321
|
if (keyToUpdate) result.vars[keyToUpdate] = actualPrompt;
|
|
5296
5322
|
}
|
|
5297
5323
|
}
|
|
5298
|
-
if (
|
|
5299
|
-
|
|
5300
|
-
|
|
5301
|
-
|
|
5324
|
+
if (!result.vars?.sessionId) {
|
|
5325
|
+
const metadataSessionIds = result.metadata?.sessionIds;
|
|
5326
|
+
if (Array.isArray(metadataSessionIds) && metadataSessionIds.length > 0) {
|
|
5327
|
+
result.vars = result.vars || {};
|
|
5328
|
+
result.vars.sessionId = metadataSessionIds.filter((id) => id != null && id !== "").map(String).join("\n");
|
|
5329
|
+
varsForHeader.add("sessionId");
|
|
5330
|
+
} else if (result.metadata?.sessionId) {
|
|
5331
|
+
result.vars = result.vars || {};
|
|
5332
|
+
result.vars.sessionId = result.metadata.sessionId;
|
|
5333
|
+
varsForHeader.add("sessionId");
|
|
5334
|
+
}
|
|
5302
5335
|
}
|
|
5303
5336
|
const transformDisplayVars = result.response?.metadata?.transformDisplayVars;
|
|
5304
5337
|
if (transformDisplayVars) {
|
|
@@ -5354,7 +5387,12 @@ function convertResultsToTable(eval_) {
|
|
|
5354
5387
|
model: result.response.video.model,
|
|
5355
5388
|
aspectRatio: result.response.video.aspectRatio,
|
|
5356
5389
|
resolution: result.response.video.resolution
|
|
5357
|
-
} : void 0
|
|
5390
|
+
} : void 0,
|
|
5391
|
+
images: result.response?.images?.map((img) => ({
|
|
5392
|
+
data: img.data,
|
|
5393
|
+
blobRef: img.blobRef,
|
|
5394
|
+
mimeType: img.mimeType
|
|
5395
|
+
}))
|
|
5358
5396
|
};
|
|
5359
5397
|
invariant(result.promptId, "Prompt ID is required");
|
|
5360
5398
|
row.testIdx = result.testIdx;
|
|
@@ -5420,7 +5458,12 @@ function convertEvalResultToTableCell(result) {
|
|
|
5420
5458
|
model: result.response.video.model,
|
|
5421
5459
|
aspectRatio: result.response.video.aspectRatio,
|
|
5422
5460
|
resolution: result.response.video.resolution
|
|
5423
|
-
} : void 0
|
|
5461
|
+
} : void 0,
|
|
5462
|
+
images: result.response?.images?.map((img) => ({
|
|
5463
|
+
data: img.data,
|
|
5464
|
+
blobRef: img.blobRef,
|
|
5465
|
+
mimeType: img.mimeType
|
|
5466
|
+
}))
|
|
5424
5467
|
};
|
|
5425
5468
|
}
|
|
5426
5469
|
function convertTestResultsToTableRow(results, varsForHeader) {
|
|
@@ -5429,10 +5472,13 @@ function convertTestResultsToTableRow(results, varsForHeader) {
|
|
|
5429
5472
|
outputs: [],
|
|
5430
5473
|
vars: Object.values(varsForHeader).map((varName) => {
|
|
5431
5474
|
if (varName === "sessionId") {
|
|
5432
|
-
const
|
|
5433
|
-
|
|
5434
|
-
|
|
5435
|
-
return
|
|
5475
|
+
const sessionIdFromVars = results[0].testCase.vars?.sessionId;
|
|
5476
|
+
if (sessionIdFromVars != null && sessionIdFromVars !== "") return typeof sessionIdFromVars === "string" ? sessionIdFromVars : JSON.stringify(sessionIdFromVars);
|
|
5477
|
+
const metadataSessionIds = results[0].metadata?.sessionIds;
|
|
5478
|
+
if (Array.isArray(metadataSessionIds) && metadataSessionIds.length > 0) return metadataSessionIds.filter((id) => id != null && id !== "").map(String).join("\n");
|
|
5479
|
+
const metadataSessionId = results[0].metadata?.sessionId;
|
|
5480
|
+
if (metadataSessionId != null) return typeof metadataSessionId === "string" ? metadataSessionId : JSON.stringify(metadataSessionId);
|
|
5481
|
+
return "";
|
|
5436
5482
|
}
|
|
5437
5483
|
const varValue = results[0].testCase.vars?.[varName] ?? "";
|
|
5438
5484
|
if (typeof varValue === "string") return varValue;
|
|
@@ -5712,7 +5758,13 @@ var Eval = class Eval {
|
|
|
5712
5758
|
_resultsLoaded = false;
|
|
5713
5759
|
runtimeOptions;
|
|
5714
5760
|
_shared = false;
|
|
5761
|
+
/** Total wall-clock duration. For redteam evals: generationDurationMs + evaluationDurationMs.
|
|
5762
|
+
* For non-redteam evals: equals evaluationDurationMs (generation phase is N/A). */
|
|
5715
5763
|
durationMs;
|
|
5764
|
+
/** Time spent generating adversarial test cases (redteam only). */
|
|
5765
|
+
generationDurationMs;
|
|
5766
|
+
/** Time spent running the evaluation phase. */
|
|
5767
|
+
evaluationDurationMs;
|
|
5716
5768
|
/**
|
|
5717
5769
|
* The shareable URL for this evaluation, if it has been shared.
|
|
5718
5770
|
* Set by the evaluate() function when sharing is enabled.
|
|
@@ -5731,8 +5783,11 @@ var Eval = class Eval {
|
|
|
5731
5783
|
const eval_ = evalData[0];
|
|
5732
5784
|
const datasetId = datasetResults[0]?.datasetId;
|
|
5733
5785
|
const resultsObj = eval_.results;
|
|
5734
|
-
const
|
|
5735
|
-
const
|
|
5786
|
+
const validateDuration = (raw) => typeof raw === "number" && Number.isFinite(raw) && raw >= 0 ? raw : void 0;
|
|
5787
|
+
const rawDurationMs = validateDuration(resultsObj?.["durationMs"]);
|
|
5788
|
+
const generationDurationMs = validateDuration(resultsObj?.["generationDurationMs"]);
|
|
5789
|
+
const evaluationDurationMs = validateDuration(resultsObj?.["evaluationDurationMs"]);
|
|
5790
|
+
const durationMs = rawDurationMs ?? (generationDurationMs != null || evaluationDurationMs != null ? (generationDurationMs ?? 0) + (evaluationDurationMs ?? 0) : void 0);
|
|
5736
5791
|
const evalInstance = new Eval(eval_.config, {
|
|
5737
5792
|
id: eval_.id,
|
|
5738
5793
|
createdAt: new Date(eval_.createdAt),
|
|
@@ -5743,7 +5798,9 @@ var Eval = class Eval {
|
|
|
5743
5798
|
persisted: true,
|
|
5744
5799
|
vars: eval_.vars || [],
|
|
5745
5800
|
runtimeOptions: eval_.runtimeOptions ?? void 0,
|
|
5746
|
-
durationMs
|
|
5801
|
+
durationMs,
|
|
5802
|
+
generationDurationMs,
|
|
5803
|
+
evaluationDurationMs
|
|
5747
5804
|
});
|
|
5748
5805
|
if (eval_.results && "table" in eval_.results) evalInstance.oldResults = eval_.results;
|
|
5749
5806
|
if (!eval_.vars || eval_.vars.length === 0) {
|
|
@@ -5868,6 +5925,8 @@ var Eval = class Eval {
|
|
|
5868
5925
|
this.vars = opts?.vars || [];
|
|
5869
5926
|
this.runtimeOptions = opts?.runtimeOptions;
|
|
5870
5927
|
this.durationMs = opts?.durationMs;
|
|
5928
|
+
this.generationDurationMs = opts?.generationDurationMs;
|
|
5929
|
+
this.evaluationDurationMs = opts?.evaluationDurationMs;
|
|
5871
5930
|
}
|
|
5872
5931
|
version() {
|
|
5873
5932
|
/**
|
|
@@ -5898,7 +5957,13 @@ var Eval = class Eval {
|
|
|
5898
5957
|
if (this.useOldResults()) {
|
|
5899
5958
|
invariant(this.oldResults, "Old results not found");
|
|
5900
5959
|
updateObj.results = this.oldResults;
|
|
5901
|
-
} else if (this.durationMs !== void 0
|
|
5960
|
+
} else if (this.durationMs !== void 0 || this.generationDurationMs !== void 0 || this.evaluationDurationMs !== void 0) {
|
|
5961
|
+
let expr = sql`CASE WHEN json_valid(${evalsTable.results}) AND json_type(${evalsTable.results}) = 'object' THEN ${evalsTable.results} ELSE '{}' END`;
|
|
5962
|
+
if (this.durationMs !== void 0) expr = sql`json_set(${expr}, '$.durationMs', ${this.durationMs})`;
|
|
5963
|
+
if (this.generationDurationMs !== void 0) expr = sql`json_set(${expr}, '$.generationDurationMs', ${this.generationDurationMs})`;
|
|
5964
|
+
if (this.evaluationDurationMs !== void 0) expr = sql`json_set(${expr}, '$.evaluationDurationMs', ${this.evaluationDurationMs})`;
|
|
5965
|
+
updateObj.results = expr;
|
|
5966
|
+
}
|
|
5902
5967
|
db.update(evalsTable).set(updateObj).where(eq(evalsTable.id, this.id)).run();
|
|
5903
5968
|
this.persisted = true;
|
|
5904
5969
|
}
|
|
@@ -5908,8 +5973,17 @@ var Eval = class Eval {
|
|
|
5908
5973
|
addVar(varName) {
|
|
5909
5974
|
this.vars.push(varName);
|
|
5910
5975
|
}
|
|
5976
|
+
/** Sets the evaluation phase duration and recomputes the total. Called by the evaluator. */
|
|
5911
5977
|
setDurationMs(durationMs) {
|
|
5912
|
-
|
|
5978
|
+
if (!Number.isFinite(durationMs) || durationMs < 0) return;
|
|
5979
|
+
this.evaluationDurationMs = durationMs;
|
|
5980
|
+
this.durationMs = (this.generationDurationMs ?? 0) + durationMs;
|
|
5981
|
+
}
|
|
5982
|
+
/** Sets the generation phase duration and recomputes the total. Called by doRedteamRun. */
|
|
5983
|
+
setGenerationDurationMs(durationMs) {
|
|
5984
|
+
if (!Number.isFinite(durationMs) || durationMs < 0) return;
|
|
5985
|
+
this.generationDurationMs = durationMs;
|
|
5986
|
+
this.durationMs = durationMs + (this.evaluationDurationMs ?? 0);
|
|
5913
5987
|
}
|
|
5914
5988
|
getPrompts() {
|
|
5915
5989
|
if (this.useOldResults()) {
|
|
@@ -5947,6 +6021,27 @@ var Eval = class Eval {
|
|
|
5947
6021
|
async getTotalResultRowCount() {
|
|
5948
6022
|
return getTotalResultRowCount(this.id);
|
|
5949
6023
|
}
|
|
6024
|
+
/**
|
|
6025
|
+
* Find a non-transient HTTP error status from evaluation results.
|
|
6026
|
+
* Returns the first non-transient status (401, 403, 404, 500, 501) found, or undefined.
|
|
6027
|
+
*
|
|
6028
|
+
* For persisted evals: Uses efficient O(1) database query with LIMIT 1.
|
|
6029
|
+
* For non-persisted evals: Falls back to scanning in-memory results.
|
|
6030
|
+
*/
|
|
6031
|
+
async findTargetErrorStatus() {
|
|
6032
|
+
const scanInMemory = () => {
|
|
6033
|
+
for (const result of this.results) {
|
|
6034
|
+
const status = result.response?.metadata?.http?.status;
|
|
6035
|
+
if (typeof status === "number" && isNonTransientHttpStatus(status)) return status;
|
|
6036
|
+
}
|
|
6037
|
+
};
|
|
6038
|
+
if (!this.persisted) return scanInMemory();
|
|
6039
|
+
try {
|
|
6040
|
+
return getDb().select({ httpStatus: sql`CAST(json_extract(${evalResultsTable.response}, '$.metadata.http.status') AS INTEGER)` }).from(evalResultsTable).where(and(eq(evalResultsTable.evalId, this.id), sql`json_extract(${evalResultsTable.response}, '$.metadata.http.status') IN (${sql.join(NON_TRANSIENT_HTTP_STATUSES.map((s) => sql`${s}`), sql`, `)})`)).limit(1).get()?.httpStatus ?? void 0;
|
|
6041
|
+
} catch {
|
|
6042
|
+
return scanInMemory();
|
|
6043
|
+
}
|
|
6044
|
+
}
|
|
5950
6045
|
async fetchResultsByTestIdx(testIdx) {
|
|
5951
6046
|
return await EvalResult.findManyByEvalId(this.id, { testIdx });
|
|
5952
6047
|
}
|
|
@@ -6169,7 +6264,12 @@ var Eval = class Eval {
|
|
|
6169
6264
|
};
|
|
6170
6265
|
}
|
|
6171
6266
|
const allResults = await EvalResult.findManyByEvalIdAndTestIndices(this.id, testIndices);
|
|
6172
|
-
if (allResults.some((result) =>
|
|
6267
|
+
if (allResults.some((result) => {
|
|
6268
|
+
const hasSessionIds = Array.isArray(result.metadata?.sessionIds) && result.metadata.sessionIds.length > 0;
|
|
6269
|
+
const hasSessionId = Boolean(result.metadata?.sessionId);
|
|
6270
|
+
const notInVars = !result.testCase?.vars?.sessionId;
|
|
6271
|
+
return (hasSessionIds || hasSessionId) && notInVars;
|
|
6272
|
+
}) && !vars.includes("sessionId")) {
|
|
6173
6273
|
vars.push("sessionId");
|
|
6174
6274
|
vars.sort();
|
|
6175
6275
|
}
|
|
@@ -6230,7 +6330,9 @@ var Eval = class Eval {
|
|
|
6230
6330
|
failures: 0,
|
|
6231
6331
|
errors: 0,
|
|
6232
6332
|
tokenUsage: createEmptyTokenUsage(),
|
|
6233
|
-
durationMs: this.durationMs
|
|
6333
|
+
durationMs: this.durationMs,
|
|
6334
|
+
generationDurationMs: this.generationDurationMs,
|
|
6335
|
+
evaluationDurationMs: this.evaluationDurationMs
|
|
6234
6336
|
};
|
|
6235
6337
|
for (const prompt of this.prompts) {
|
|
6236
6338
|
stats.successes += prompt.metrics?.testPassCount ?? 0;
|
|
@@ -7528,6 +7630,24 @@ function validateTestProviderReferences(tests, providers, defaultTest, scenarios
|
|
|
7528
7630
|
});
|
|
7529
7631
|
}
|
|
7530
7632
|
|
|
7633
|
+
//#endregion
|
|
7634
|
+
//#region src/util/config/extensions.ts
|
|
7635
|
+
/**
|
|
7636
|
+
* Supported config file extensions, sorted by frequency of use.
|
|
7637
|
+
* Order matters: loaders try each in sequence and stop at the first match.
|
|
7638
|
+
*/
|
|
7639
|
+
const DEFAULT_CONFIG_EXTENSIONS = [
|
|
7640
|
+
"yaml",
|
|
7641
|
+
"yml",
|
|
7642
|
+
"json",
|
|
7643
|
+
"cjs",
|
|
7644
|
+
"cts",
|
|
7645
|
+
"js",
|
|
7646
|
+
"mjs",
|
|
7647
|
+
"mts",
|
|
7648
|
+
"ts"
|
|
7649
|
+
];
|
|
7650
|
+
|
|
7531
7651
|
//#endregion
|
|
7532
7652
|
//#region src/util/config/load.ts
|
|
7533
7653
|
/**
|
|
@@ -7536,6 +7656,34 @@ function validateTestProviderReferences(tests, providers, defaultTest, scenarios
|
|
|
7536
7656
|
function isTestCaseWithVars(test) {
|
|
7537
7657
|
return typeof test === "object" && test !== null && "vars" in test;
|
|
7538
7658
|
}
|
|
7659
|
+
/**
|
|
7660
|
+
* When --providers is used alongside a config file that has providers defined,
|
|
7661
|
+
* maps each CLI provider token to a matching config provider (preserving its config
|
|
7662
|
+
* options like num_ctx, temperature). Unmatched tokens are kept as bare strings.
|
|
7663
|
+
*
|
|
7664
|
+
* Matching priority per token:
|
|
7665
|
+
* 1. Exact match on provider id
|
|
7666
|
+
* 2. Exact match on provider label
|
|
7667
|
+
* 3. Provider-prefix match: config id ends with `:cliProvider` (e.g. CLI `llama3.1:8b`
|
|
7668
|
+
* matches config `ollama:llama3.1:8b`). First match wins if multiple configs share a suffix.
|
|
7669
|
+
* 4. No match: keep raw CLI string for fresh provider creation
|
|
7670
|
+
*/
|
|
7671
|
+
function resolveCliProvidersWithConfig(cliProviders, configProviders) {
|
|
7672
|
+
if (!configProviders || !Array.isArray(configProviders)) return cliProviders;
|
|
7673
|
+
const indexed = configProviders.map((cp, i) => ({
|
|
7674
|
+
provider: cp,
|
|
7675
|
+
...getProviderIdAndLabel(cp, i)
|
|
7676
|
+
}));
|
|
7677
|
+
return cliProviders.map((cliProvider) => {
|
|
7678
|
+
const exactId = indexed.find((entry) => entry.id === cliProvider);
|
|
7679
|
+
if (exactId) return exactId.provider;
|
|
7680
|
+
const exactLabel = indexed.find((entry) => entry.label === cliProvider);
|
|
7681
|
+
if (exactLabel) return exactLabel.provider;
|
|
7682
|
+
const prefixMatch = indexed.find((entry) => entry.id.endsWith(":" + cliProvider));
|
|
7683
|
+
if (prefixMatch) return prefixMatch.provider;
|
|
7684
|
+
return cliProvider;
|
|
7685
|
+
});
|
|
7686
|
+
}
|
|
7539
7687
|
async function dereferenceConfig(rawConfig) {
|
|
7540
7688
|
if (getEnvBool("PROMPTFOO_DISABLE_REF_PARSER")) return rawConfig;
|
|
7541
7689
|
const extractFunctionParameters = (functions) => {
|
|
@@ -7672,7 +7820,7 @@ async function combineConfigs(configPaths) {
|
|
|
7672
7820
|
const configs = [];
|
|
7673
7821
|
for (const configPath of configPaths) {
|
|
7674
7822
|
const globPaths = globSync(path$2.resolve(process$1.cwd(), configPath), { windowsPathsNoEscape: true });
|
|
7675
|
-
if (globPaths.length === 0) throw new Error(`No configuration file found at ${configPath}. Run "
|
|
7823
|
+
if (globPaths.length === 0) throw new Error(`No configuration file found at ${configPath}. Run "${promptfooCommand("init")}" to create one or pass --config path/to/promptfooconfig.yaml.`);
|
|
7676
7824
|
for (const globPath of globPaths) {
|
|
7677
7825
|
const config = await readConfig(globPath);
|
|
7678
7826
|
configs.push(config);
|
|
@@ -7885,7 +8033,7 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7885
8033
|
tags: fileConfig.tags || defaultConfig.tags,
|
|
7886
8034
|
description: cmdObj.description || fileConfig.description || defaultConfig.description,
|
|
7887
8035
|
prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts || [],
|
|
7888
|
-
providers:
|
|
8036
|
+
providers: fileConfig.providers || defaultConfig.providers || [],
|
|
7889
8037
|
tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests || [],
|
|
7890
8038
|
scenarios: fileConfig.scenarios || defaultConfig.scenarios,
|
|
7891
8039
|
env: fileConfig.env || defaultConfig.env,
|
|
@@ -7900,11 +8048,14 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7900
8048
|
evaluateOptions: fileConfig.evaluateOptions || defaultConfig.evaluateOptions
|
|
7901
8049
|
};
|
|
7902
8050
|
const hasPrompts = [config.prompts].flat().filter(Boolean).length > 0;
|
|
7903
|
-
const hasProviders = [config.providers].flat().filter(Boolean).length > 0;
|
|
8051
|
+
const hasProviders = cmdObj.providers && cmdObj.providers.length > 0 || [config.providers].flat().filter(Boolean).length > 0;
|
|
7904
8052
|
if (!Boolean(configPaths) && !hasPrompts && !hasProviders && !isCI()) {
|
|
8053
|
+
const extList = DEFAULT_CONFIG_EXTENSIONS.join(", ");
|
|
7905
8054
|
logger_default.warn(dedent`
|
|
7906
8055
|
${chalk.yellow.bold("⚠️ No promptfooconfig found")}
|
|
7907
8056
|
|
|
8057
|
+
${chalk.white(`Searched in ${chalk.bold(process$1.cwd())} for promptfooconfig.{${extList}}`)}
|
|
8058
|
+
|
|
7908
8059
|
${chalk.white("Try running with:")}
|
|
7909
8060
|
|
|
7910
8061
|
${chalk.cyan(`${promptfooCommand("")} eval -c ${chalk.bold("path/to/promptfooconfig.yaml")}`)}
|
|
@@ -7925,8 +8076,9 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7925
8076
|
}
|
|
7926
8077
|
invariant(Array.isArray(config.providers), "providers must be an array");
|
|
7927
8078
|
const resolvedProviderConfigs = resolveProviderConfigs(config.providers, { basePath });
|
|
8079
|
+
const cliFilteredProviderConfigs = (cmdObj.providers ? resolveCliProvidersWithConfig(cmdObj.providers, resolvedProviderConfigs) : resolvedProviderConfigs) ?? [];
|
|
7928
8080
|
const filterOption = cmdObj.filterProviders || cmdObj.filterTargets;
|
|
7929
|
-
const filteredProviderConfigs = filterProviderConfigs(
|
|
8081
|
+
const filteredProviderConfigs = filterProviderConfigs(cliFilteredProviderConfigs, filterOption);
|
|
7930
8082
|
if (filterOption && Array.isArray(filteredProviderConfigs) && filteredProviderConfigs.length === 0) logger_default.warn(`No providers matched the filter "${filterOption}". Check your --filter-providers/--filter-targets value.`);
|
|
7931
8083
|
let parsedPrompts = await readPrompts(config.prompts, cmdObj.prompts ? void 0 : basePath);
|
|
7932
8084
|
if (cmdObj.filterPrompts) {
|
|
@@ -7962,7 +8114,7 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7962
8114
|
}
|
|
7963
8115
|
const parsedProviderPromptMap = readProviderPromptMap({ providers: filteredProviderConfigs }, parsedPrompts);
|
|
7964
8116
|
if (parsedPrompts.length === 0) {
|
|
7965
|
-
logger_default.error("No prompts found");
|
|
8117
|
+
logger_default.error("No prompts found. Add a `prompts:` entry to your config or pass --prompts path/to/prompt.txt.");
|
|
7966
8118
|
process$1.exit(1);
|
|
7967
8119
|
}
|
|
7968
8120
|
const defaultTest = {
|
|
@@ -9570,6 +9722,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9570
9722
|
seen.add(key);
|
|
9571
9723
|
return true;
|
|
9572
9724
|
});
|
|
9725
|
+
const needsGoalExtraction = strategies.some((s) => Strategies.find((def) => def.id === s.id)?.requiresGoalExtraction);
|
|
9573
9726
|
await validateStrategies(strategies);
|
|
9574
9727
|
await validateSharpDependency(strategies, plugins);
|
|
9575
9728
|
const redteamProvider = await redteamProviderManager.getProvider({ provider });
|
|
@@ -9761,13 +9914,15 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9761
9914
|
if (!Array.isArray(allPluginTests) || allPluginTests.length === 0) logger_default.warn(`Failed to generate tests for ${plugin.id}`);
|
|
9762
9915
|
else {
|
|
9763
9916
|
const testCasesWithMetadata = allPluginTests;
|
|
9764
|
-
|
|
9765
|
-
|
|
9766
|
-
const
|
|
9767
|
-
|
|
9768
|
-
|
|
9769
|
-
|
|
9770
|
-
|
|
9917
|
+
if (needsGoalExtraction) {
|
|
9918
|
+
logger_default.debug(`Extracting goal for ${testCasesWithMetadata.length} tests from ${plugin.id}...`);
|
|
9919
|
+
for (const testCase of testCasesWithMetadata) {
|
|
9920
|
+
const promptVar = testCase.vars?.[injectVar];
|
|
9921
|
+
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
9922
|
+
const policy = getPolicyText(testCase.metadata);
|
|
9923
|
+
const extractedGoal = await extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
9924
|
+
testCase.metadata.goal = extractedGoal;
|
|
9925
|
+
}
|
|
9771
9926
|
}
|
|
9772
9927
|
testCases.push(...testCasesWithMetadata);
|
|
9773
9928
|
}
|
|
@@ -9802,13 +9957,15 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9802
9957
|
...t.metadata || {}
|
|
9803
9958
|
}
|
|
9804
9959
|
}));
|
|
9805
|
-
|
|
9806
|
-
|
|
9807
|
-
const
|
|
9808
|
-
|
|
9809
|
-
|
|
9810
|
-
|
|
9811
|
-
|
|
9960
|
+
if (needsGoalExtraction) {
|
|
9961
|
+
logger_default.debug(`Extracting goal for ${testCasesWithMetadata.length} custom tests from ${plugin.id}...`);
|
|
9962
|
+
for (const testCase of testCasesWithMetadata) {
|
|
9963
|
+
const promptVar = testCase.vars?.[injectVar];
|
|
9964
|
+
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
9965
|
+
const policy = getPolicyText(testCase.metadata);
|
|
9966
|
+
const extractedGoal = await extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
9967
|
+
testCase.metadata.goal = extractedGoal;
|
|
9968
|
+
}
|
|
9812
9969
|
}
|
|
9813
9970
|
testCases.push(...testCasesWithMetadata);
|
|
9814
9971
|
logger_default.debug(`Added ${customTests.length} custom test cases from ${plugin.id}`);
|
|
@@ -10831,17 +10988,7 @@ async function loadDefaultConfig(dir, configName = "promptfooconfig") {
|
|
|
10831
10988
|
if (configCache.has(cacheKey)) return configCache.get(cacheKey);
|
|
10832
10989
|
let defaultConfig = {};
|
|
10833
10990
|
let defaultConfigPath;
|
|
10834
|
-
for (const ext of
|
|
10835
|
-
"yaml",
|
|
10836
|
-
"yml",
|
|
10837
|
-
"json",
|
|
10838
|
-
"cjs",
|
|
10839
|
-
"cts",
|
|
10840
|
-
"js",
|
|
10841
|
-
"mjs",
|
|
10842
|
-
"mts",
|
|
10843
|
-
"ts"
|
|
10844
|
-
]) {
|
|
10991
|
+
for (const ext of DEFAULT_CONFIG_EXTENSIONS) {
|
|
10845
10992
|
const configPath = path.join(dir, `${configName}.${ext}`);
|
|
10846
10993
|
const maybeConfig = await maybeReadConfig(configPath);
|
|
10847
10994
|
if (maybeConfig) {
|
|
@@ -10947,11 +11094,31 @@ function formatDuration(seconds) {
|
|
|
10947
11094
|
* ```
|
|
10948
11095
|
*/
|
|
10949
11096
|
function generateEvalSummary(params) {
|
|
10950
|
-
const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker } = params;
|
|
11097
|
+
const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker, targetErrorStatus } = params;
|
|
10951
11098
|
const lines = [];
|
|
10952
11099
|
const completionType = isRedteam ? "Red team" : "Eval";
|
|
10953
|
-
const
|
|
11100
|
+
const wasAborted = targetErrorStatus != null;
|
|
11101
|
+
let completionMessage;
|
|
11102
|
+
if (wasAborted) {
|
|
11103
|
+
completionMessage = `${chalk.red("✗")} ${completionType} aborted`;
|
|
11104
|
+
if (writeToDatabase) completionMessage += ` (ID: ${chalk.cyan(evalId)})`;
|
|
11105
|
+
} else if (writeToDatabase && shareableUrl) completionMessage = `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
|
|
11106
|
+
else if (writeToDatabase && activelySharing) completionMessage = `${chalk.green("✓")} ${completionType} complete`;
|
|
11107
|
+
else if (writeToDatabase) completionMessage = `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
|
|
11108
|
+
else completionMessage = `${chalk.green("✓")} ${completionType} complete`;
|
|
10954
11109
|
lines.push(completionMessage);
|
|
11110
|
+
if (wasAborted && targetErrorStatus != null) {
|
|
11111
|
+
lines.push("");
|
|
11112
|
+
lines.push(chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."));
|
|
11113
|
+
lines.push(chalk.red(` Target returned HTTP ${targetErrorStatus}`));
|
|
11114
|
+
lines.push("");
|
|
11115
|
+
lines.push(chalk.yellow("Possible causes:"));
|
|
11116
|
+
lines.push(chalk.yellow(" • Invalid API key or authentication (401/403)"));
|
|
11117
|
+
lines.push(chalk.yellow(" • Target endpoint does not exist (404)"));
|
|
11118
|
+
lines.push(chalk.yellow(" • Server does not support the request (501)"));
|
|
11119
|
+
lines.push("");
|
|
11120
|
+
lines.push(chalk.cyan("To fix: Check your target configuration and credentials."));
|
|
11121
|
+
}
|
|
10955
11122
|
if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
|
|
10956
11123
|
lines.push("");
|
|
10957
11124
|
lines.push(`» View results: ${chalk.green.bold("promptfoo view")}`);
|
|
@@ -11194,6 +11361,21 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11194
11361
|
let testSuite = void 0;
|
|
11195
11362
|
let _basePath = void 0;
|
|
11196
11363
|
let commandLineOptions = void 0;
|
|
11364
|
+
const configArgs = Array.isArray(cmdObj.config) ? cmdObj.config : typeof cmdObj.config === "string" ? [cmdObj.config] : [];
|
|
11365
|
+
const uuidConfigArgs = configArgs.filter((configArg) => isUuid(configArg));
|
|
11366
|
+
if (configArgs.length > 1 && uuidConfigArgs.length > 0) throw new Error("Cloud config UUID mode supports exactly one -c value. Use: promptfoo eval -c <cloud-config-uuid>");
|
|
11367
|
+
if (configArgs.length === 1 && uuidConfigArgs.length === 1) {
|
|
11368
|
+
const cloudConfigId = uuidConfigArgs[0];
|
|
11369
|
+
if (cmdObj.watch) throw new Error("--watch is not supported when using a cloud config UUID with -c. Use a local config file path for watch mode.");
|
|
11370
|
+
try {
|
|
11371
|
+
defaultConfig = await getEvalConfigFromCloud(cloudConfigId);
|
|
11372
|
+
} catch (error) {
|
|
11373
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
11374
|
+
throw new Error(`Failed to load cloud eval config "${cloudConfigId}". ${reason}. Cloud UUID inputs do not fall back to local file paths. Check authentication and that the UUID exists.`);
|
|
11375
|
+
}
|
|
11376
|
+
cmdObj.config = void 0;
|
|
11377
|
+
defaultConfigPath = void 0;
|
|
11378
|
+
}
|
|
11197
11379
|
const runEvaluation = async (initialization) => {
|
|
11198
11380
|
const startTime = Date.now();
|
|
11199
11381
|
telemetry_default.record("command_used", {
|
|
@@ -11217,7 +11399,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11217
11399
|
...defaultConfig,
|
|
11218
11400
|
...dirConfig
|
|
11219
11401
|
};
|
|
11220
|
-
} else logger_default.warn(`No configuration file found in directory: ${configPath}
|
|
11402
|
+
} else logger_default.warn(`No configuration file found in directory: ${configPath}. Looked for promptfooconfig.{${DEFAULT_CONFIG_EXTENSIONS.join(",")}}. Run "${promptfooCommand("init")}" or pass --config path/to/promptfooconfig.yaml.`);
|
|
11221
11403
|
}
|
|
11222
11404
|
}
|
|
11223
11405
|
const resumeRaw = cmdObj.resume;
|
|
@@ -11497,6 +11679,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11497
11679
|
const isRedteam = Boolean(config.redteam);
|
|
11498
11680
|
const duration = Math.round((Date.now() - startTime) / 1e3);
|
|
11499
11681
|
const tracker = TokenUsageTracker.getInstance();
|
|
11682
|
+
const targetErrorStatus = await evalRecord.findTargetErrorStatus();
|
|
11500
11683
|
const summaryLines = generateEvalSummary({
|
|
11501
11684
|
evalId: evalRecord.id,
|
|
11502
11685
|
isRedteam,
|
|
@@ -11512,7 +11695,8 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11512
11695
|
errors,
|
|
11513
11696
|
duration,
|
|
11514
11697
|
maxConcurrency,
|
|
11515
|
-
tracker
|
|
11698
|
+
tracker,
|
|
11699
|
+
targetErrorStatus
|
|
11516
11700
|
});
|
|
11517
11701
|
if (cmdObj.write && wantsToShare && !canShareEval) {
|
|
11518
11702
|
logger_default.info(summaryLines[0]);
|
|
@@ -11567,7 +11751,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11567
11751
|
if (initialization) {
|
|
11568
11752
|
const configPaths = (cmdObj.config || [defaultConfigPath]).filter(Boolean);
|
|
11569
11753
|
if (!configPaths.length) {
|
|
11570
|
-
logger_default.error(
|
|
11754
|
+
logger_default.error(`Could not locate config file(s) to watch. Pass --config path/to/promptfooconfig.yaml or run from a directory containing promptfooconfig.{${DEFAULT_CONFIG_EXTENSIONS.join(",")}}.`);
|
|
11571
11755
|
process.exitCode = 1;
|
|
11572
11756
|
return ret;
|
|
11573
11757
|
}
|
|
@@ -11725,6 +11909,7 @@ async function doRedteamRun(options) {
|
|
|
11725
11909
|
logger_default.info("Generating test cases...");
|
|
11726
11910
|
const { maxConcurrency, ...passThroughOptions } = options;
|
|
11727
11911
|
let redteamConfig;
|
|
11912
|
+
const generationStartTime = Date.now();
|
|
11728
11913
|
try {
|
|
11729
11914
|
redteamConfig = await doGenerateRedteam({
|
|
11730
11915
|
...passThroughOptions,
|
|
@@ -11748,6 +11933,7 @@ async function doRedteamRun(options) {
|
|
|
11748
11933
|
}
|
|
11749
11934
|
throw error;
|
|
11750
11935
|
}
|
|
11936
|
+
const generationDurationMs = Date.now() - generationStartTime;
|
|
11751
11937
|
if (!redteamConfig || !fs$1.existsSync(redteamPath)) {
|
|
11752
11938
|
logger_default.info("No test cases generated. Skipping scan.");
|
|
11753
11939
|
if (verboseToggleCleanup) verboseToggleCleanup();
|
|
@@ -11770,7 +11956,14 @@ async function doRedteamRun(options) {
|
|
|
11770
11956
|
abortSignal: options.abortSignal,
|
|
11771
11957
|
progressCallback: options.progressCallback
|
|
11772
11958
|
});
|
|
11773
|
-
|
|
11959
|
+
if (evalResult && generationDurationMs >= 0) {
|
|
11960
|
+
evalResult.setGenerationDurationMs(generationDurationMs);
|
|
11961
|
+
if (evalResult.persisted) await evalResult.save();
|
|
11962
|
+
const totalMs = evalResult.durationMs ?? 0;
|
|
11963
|
+
const evalMs = evalResult.evaluationDurationMs ?? 0;
|
|
11964
|
+
logger_default.info(chalk.gray(`Total scan time: ${formatDuration(totalMs / 1e3)} (generation: ${formatDuration(generationDurationMs / 1e3)}, evaluation: ${formatDuration(evalMs / 1e3)})`));
|
|
11965
|
+
}
|
|
11966
|
+
if (evalResult ? await evalResult.findTargetErrorStatus() != null : false) {} else logger_default.info(chalk.green("\nRed team scan complete!"));
|
|
11774
11967
|
if (!evalResult?.shared) if (options.liveRedteamConfig) logger_default.info(chalk.blue(`To view the results, click the ${chalk.bold("View Report")} button or run ${chalk.bold(promptfooCommand("redteam report"))} on the command line.`));
|
|
11775
11968
|
else logger_default.info(chalk.blue(`To view the results, run ${chalk.bold(promptfooCommand("redteam report"))}`));
|
|
11776
11969
|
setLogCallback(null);
|