promptfoo 0.120.24 → 0.120.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -22
- package/dist/src/{ListApp-55OAHww3.js → ListApp-D3DG0F8h.js} +1 -1
- package/dist/src/{accounts-BwVGYllP.cjs → accounts-BzEY8H3v.cjs} +5 -5
- package/dist/src/{accounts-BOlMHrvn.js → accounts-DHHiXsy6.js} +6 -6
- package/dist/src/{accounts-DxR9x_En.js → accounts-R3gfCR_g.js} +5 -5
- package/dist/src/{agentic-utils-t9-1raty.js → agentic-utils-D6_gzOUF.js} +3 -3
- package/dist/src/{agents-DMpQ860j.cjs → agents-C7BiinFI.cjs} +7 -7
- package/dist/src/{agents-9Z7pO8yx.js → agents-Cao4i7AX.js} +8 -8
- package/dist/src/{agents-DjExVR3v.js → agents-Cnph5GLD.js} +2 -2
- package/dist/src/{agents-CI0Ad69d.cjs → agents-CwM7re15.cjs} +5 -5
- package/dist/src/{agents-BwI-Tm5m.js → agents-DETIQHqF.js} +7 -7
- package/dist/src/{agents-CRAmwjo-.js → agents-DYIT-hQy.js} +6 -6
- package/dist/src/{agents-BbIpDhgp.js → agents-GiUianme.js} +5 -5
- package/dist/src/{agents-yL5DzIKY.js → agents-v4cW_ZgC.js} +2 -2
- package/dist/src/{aimlapi-xHHqwNg8.js → aimlapi-CMJpKK-B.js} +13 -13
- package/dist/src/{aimlapi-BM9P45Bp.js → aimlapi-DMF6v_vb.js} +15 -15
- package/dist/src/{aimlapi-DKo7IxJg.cjs → aimlapi-DoGLcQW_.cjs} +14 -14
- package/dist/src/{aimlapi-ivzDkqbs.js → aimlapi-DtSf1ykJ.js} +2 -2
- package/dist/src/app/assets/index-BOgkICuY.css +1 -0
- package/dist/src/app/assets/index-CSgqn_Vd.js +428 -0
- package/dist/src/app/assets/{vendor-charts-DnwydkkY.js → vendor-charts-CYyo8R8v.js} +1 -1
- package/dist/src/app/assets/{vendor-markdown-DjKAU15k.js → vendor-markdown-DSmzq4Jh.js} +1 -1
- package/dist/src/app/assets/vendor-react-AtKqiNEf.js +4 -0
- package/dist/src/app/assets/{vendor-syntax-Dc86c4Bu.js → vendor-syntax-D06x6TQF.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-B3QGL6Nl.cjs → audio-BWjyvHn9.cjs} +4 -4
- package/dist/src/{audio-C-1oBiVZ.js → audio-BrJBFN2b.js} +4 -4
- package/dist/src/{audio-DnfDVjhL.js → audio-DUH4q0Xq.js} +5 -5
- package/dist/src/{audio-DQWHfAr8.js → audio-U580w8jM.js} +2 -2
- package/dist/src/{base-DHam07ip.js → base-BaXmtXYp.js} +3 -3
- package/dist/src/{base-BjsgxJp9.js → base-Dtp8b4_N.js} +3 -3
- package/dist/src/{base-Bso8ccGu.cjs → base-f71xxWai.cjs} +3 -3
- package/dist/src/{blobs-DOdc-E8L.cjs → blobs-673H0jCl.cjs} +3 -3
- package/dist/src/{blobs-Dcm272OH.js → blobs-C9J2mVgo.js} +3 -3
- package/dist/src/{blobs-MlVQyoSc.js → blobs-kt8v3UyH.js} +3 -3
- package/dist/src/{cache-KNsTJLKP.cjs → cache-7xULbvt3.cjs} +35 -8
- package/dist/src/{cache-BMl3AIo5.js → cache-BLLayYEN.js} +24 -9
- package/dist/src/{cache-B0ptF7sO.js → cache-BUPcq0Ad.js} +3 -3
- package/dist/src/cache-CVfRb-HD.cjs +6 -0
- package/dist/src/{cache-DPztwHxt.js → cache-O4EuX2JV.js} +4 -4
- package/dist/src/{cache-DHm8Pc8l.js → cache-mIszOnuz.js} +5 -22
- package/dist/src/{chat-DOQIJx7O.cjs → chat-C4zqjObh.cjs} +10 -10
- package/dist/src/{chat-DDGU7Tik.js → chat-C8Ei4f87.js} +9 -9
- package/dist/src/{chat-BnzIuoTQ.js → chat-CgyGj2hC.js} +12 -12
- package/dist/src/{chat-BRMJOuCv.js → chat-Cpz3O-Xl.js} +14 -14
- package/dist/src/{chat-CVOyOBYk.js → chat-DIywASPG.js} +11 -11
- package/dist/src/{chat-CrnXMbvh.cjs → chat-Fl6TZJRS.cjs} +13 -13
- package/dist/src/{chat-D8GcWK9l.js → chat-XPN9YHhr.js} +2 -2
- package/dist/src/{chatkit-IWIuhxFE.js → chatkit-DEls11hE.js} +4 -4
- package/dist/src/{chatkit-DlOZ-eNb.js → chatkit-DIrJX8xk.js} +4 -4
- package/dist/src/{chatkit-CcktkleS.js → chatkit-Dpxrq4eD.js} +2 -2
- package/dist/src/{chatkit-yeVtURUi.cjs → chatkit-e25Ziu17.cjs} +4 -4
- package/dist/src/{claude-agent-sdk-BiMgSH9g.js → claude-agent-sdk-6-xTaLwM.js} +48 -19
- package/dist/src/{claude-agent-sdk-Yj9Hq0cH.cjs → claude-agent-sdk-BzxF6NIJ.cjs} +48 -19
- package/dist/src/{claude-agent-sdk-rVqlUIJl.js → claude-agent-sdk-CmkTnKGH.js} +49 -20
- package/dist/src/{claude-agent-sdk-yid1kGsL.js → claude-agent-sdk-rXCBLK_o.js} +40 -11
- package/dist/src/{cloud-ByJVRSQI.js → cloud-BMbRVJFw.js} +3 -3
- package/dist/src/cloud-CZ4hytdm.js +5 -0
- package/dist/src/{cloudflare-ai-DmJZcvVx.cjs → cloudflare-ai-BAQ0u_dg.cjs} +14 -14
- package/dist/src/{cloudflare-ai-CWKYPCCF.js → cloudflare-ai-CUg4BTcj.js} +13 -13
- package/dist/src/{cloudflare-ai-goDJ5bwr.js → cloudflare-ai-CobxMTR4.js} +15 -15
- package/dist/src/{cloudflare-ai-CdKv38f6.js → cloudflare-ai-Z9X219gp.js} +2 -2
- package/dist/src/{cloudflare-gateway-BW8hcdB0.cjs → cloudflare-gateway-C0sgfr_z.cjs} +15 -15
- package/dist/src/{cloudflare-gateway-CDCOS1Os.js → cloudflare-gateway-D2_yi-Fh.js} +16 -16
- package/dist/src/{cloudflare-gateway-Dz_HCMGY.js → cloudflare-gateway-Djf3F3_H.js} +2 -2
- package/dist/src/{cloudflare-gateway-gzmTsXcd.js → cloudflare-gateway-_itGuXry.js} +14 -14
- package/dist/src/{codex-sdk-BlvhxMr0.js → codex-sdk-BASDNkIl.js} +90 -34
- package/dist/src/{codex-sdk-BC5qbCjh.js → codex-sdk-dSnGdgIp.js} +92 -36
- package/dist/src/{codex-sdk-CBpm53B8.js → codex-sdk-ibXwdglL.js} +91 -35
- package/dist/src/{codex-sdk-Cf5_CQch.cjs → codex-sdk-wTEpMM_X.cjs} +92 -36
- package/dist/src/{cometapi-DoaO5SLq.js → cometapi-B01btbfb.js} +16 -16
- package/dist/src/{cometapi-BUKGSUCw.js → cometapi-ChAaRjg5.js} +14 -14
- package/dist/src/{cometapi-C6BSw9k3.js → cometapi-DHUAH6nK.js} +2 -2
- package/dist/src/{cometapi-B3TzpkNI.cjs → cometapi-JbvOJSCO.cjs} +15 -15
- package/dist/src/{completion-BLe7SUA4.js → completion-BBJ6zmG3.js} +6 -6
- package/dist/src/{completion-1RAiLcwi.js → completion-D9_MDlnd.js} +6 -6
- package/dist/src/{completion-D6BTVt6e.cjs → completion-DDyL3Cb2.cjs} +6 -6
- package/dist/src/{docker-CkOzlD1W.js → docker-BuButc4D.js} +15 -15
- package/dist/src/{docker-BWsSX8Pn.cjs → docker-Ckw-j7Rr.cjs} +14 -14
- package/dist/src/{docker-CMf5SGLq.js → docker-JAAubMw3.js} +13 -13
- package/dist/src/{docker-C0AzMsuf.js → docker-vnOg96gi.js} +2 -2
- package/dist/src/errors-DnGCbnx8.js +36 -0
- package/dist/src/{esm-gWVPXn3r.cjs → esm-BQkx5roy.cjs} +2 -2
- package/dist/src/{esm-Lh6czAFT.js → esm-CYhseqj4.js} +2 -2
- package/dist/src/{esm-D40XsWlR.js → esm-rDtG_2rg.js} +2 -2
- package/dist/src/eval-CKHWqG9f.js +16 -0
- package/dist/src/{eval-WaSg64w3.js → eval-CYrbG57o.js} +106 -29
- package/dist/src/{evalResult-CGrGU_cG.js → evalResult-6JaUIStC.js} +7 -7
- package/dist/src/{evalResult-C_UM63nn.js → evalResult-COsVttMA.js} +7 -7
- package/dist/src/evalResult-CxTP-LMm.cjs +11 -0
- package/dist/src/evalResult-CzLURDcP.js +13 -0
- package/dist/src/{evalResult-BU4huold.cjs → evalResult-DlRfu_Rq.cjs} +7 -7
- package/dist/src/evalResult-DyttNQ_G.js +11 -0
- package/dist/src/evaluator-0PvfeBYh.js +38 -0
- package/dist/src/{evaluator-gmF3Vddp.js → evaluator-3EJCMTs0.js} +75 -49
- package/dist/src/{extractor-D-RM2m6A.cjs → extractor-DbhlYEeo.cjs} +23 -6
- package/dist/src/{extractor-Da8IxCeS.js → extractor-Hs7la_19.js} +23 -6
- package/dist/src/{extractor-BA90EtLq.js → extractor-LSYjrhK0.js} +23 -6
- package/dist/src/{fetch-BIfcW-Fd.js → fetch-18MuNu9i.js} +21 -7
- package/dist/src/{fetch-CW9GabAi.js → fetch-Bi0o-fdp.js} +2 -2
- package/dist/src/fetch-CMptBDVg.cjs +4 -0
- package/dist/src/fetch-DAZkv3gV.js +6 -0
- package/dist/src/{fetch-Mb7SbeM-.js → fetch-SRsE6Ctl.js} +21 -7
- package/dist/src/{fetch-MvWxCS5U.cjs → fetch-ZMn_oemb.cjs} +21 -7
- package/dist/src/{fileExtensions-CzcPkiKZ.js → fileExtensions-BpuMmaFL.js} +1 -1
- package/dist/src/{formatDuration-CCiZ-wBY.js → formatDuration-Doo0xq-z.js} +1 -1
- package/dist/src/{genaiTracer-BQ8AmajD.cjs → genaiTracer-CQlpZkrp.cjs} +2 -2
- package/dist/src/{genaiTracer-foKtbVa2.js → genaiTracer-CqNnnXrE.js} +1 -1
- package/dist/src/graders-BCytzXrb.js +34 -0
- package/dist/src/{graders-CwtelA9k.js → graders-BaMCwIKp.js} +212 -91
- package/dist/src/graders-CGZQShfJ.cjs +33 -0
- package/dist/src/{graders-2dabIkXs.cjs → graders-DzUUnUjC.cjs} +212 -91
- package/dist/src/{graders-BscQ6O1Q.js → graders-QsALpIdy.js} +211 -90
- package/dist/src/graders-spkuVC-E.js +36 -0
- package/dist/src/{image-Dv0o-Ab7.js → image-BXt_7u0v.js} +4 -4
- package/dist/src/{image-B6TV9l0v.js → image-BiEVdpdP.js} +7 -7
- package/dist/src/{image-DS9hlgSc.cjs → image-C3BjJUAU.cjs} +7 -7
- package/dist/src/{image-_jKUeeh9.js → image-COCWy5dX.js} +2 -2
- package/dist/src/{image-DqElR73O.cjs → image-D10zEe1f.cjs} +4 -4
- package/dist/src/{image-BVEVjrwl.js → image-DB4sHxdJ.js} +5 -5
- package/dist/src/{image-C1i3NoZo.js → image-mhAGP07h.js} +7 -7
- package/dist/src/index.cjs +305 -112
- package/dist/src/index.d.cts +137 -3
- package/dist/src/index.d.ts +137 -3
- package/dist/src/index.js +304 -111
- package/dist/src/{interactiveCheck-CjK2R4rB.js → interactiveCheck-DU-MAhp5.js} +2 -2
- package/dist/src/{knowledgeBase-K5fJN2kC.cjs → knowledgeBase-Bnb00xKs.cjs} +8 -8
- package/dist/src/{knowledgeBase-D-xthvn0.js → knowledgeBase-CMvMlLZR.js} +9 -9
- package/dist/src/{knowledgeBase-DJZHeJqg.js → knowledgeBase-DotRBzUE.js} +2 -2
- package/dist/src/{knowledgeBase-BSs2ldSR.js → knowledgeBase-XJQ0Qyez.js} +8 -8
- package/dist/src/{litellm-kPhaZkzz.js → litellm-BECdjOTx.js} +3 -3
- package/dist/src/{litellm-uRLukY7s.cjs → litellm-BrnZhMcL.cjs} +15 -15
- package/dist/src/{litellm-DH_rVIOd.js → litellm-CHrRmPAe.js} +16 -16
- package/dist/src/{litellm-DoXGHlpD.js → litellm-CrLJrPIm.js} +14 -14
- package/dist/src/{logger-CMp-NS-e.cjs → logger-BdZ-IqBc.cjs} +5 -2
- package/dist/src/{logger-Bzi5o47S.js → logger-BotXmWKW.js} +5 -2
- package/dist/src/{logger-q5I8CByj.js → logger-w8Ozp0Td.js} +5 -2
- package/dist/src/{luma-ray-B3GiVpuq.js → luma-ray-BOeX-h0M.js} +10 -10
- package/dist/src/{luma-ray-CPISsLu-.js → luma-ray-C-w6EsJm.js} +2 -2
- package/dist/src/{luma-ray-BrWrKIIB.cjs → luma-ray-C0RkI3lt.cjs} +10 -10
- package/dist/src/{luma-ray-gyI_GEy2.js → luma-ray-DgKLS0BF.js} +11 -11
- package/dist/src/main.js +3260 -2537
- package/dist/src/{messages-CJgEjRQF.js → messages-CDZYGNlS.js} +13 -10
- package/dist/src/{messages-BMkwrfmt.js → messages-D61tPFQo.js} +14 -11
- package/dist/src/{messages-B5HsO_dM.cjs → messages-DXV3Qh8_.cjs} +14 -11
- package/dist/src/{meteor-CBciquOS.js → meteor-Dj8cTkU_.js} +1 -1
- package/dist/src/{meteor-0cLf9TIn.js → meteor-P2rUE-Uz.js} +1 -1
- package/dist/src/{meteor-BQ6Ws9k2.js → meteor-SLNTgmXm.js} +2 -2
- package/dist/src/{meteor-DRuEfzuQ.cjs → meteor-odmwVbyG.cjs} +1 -1
- package/dist/src/{nova-reel-B5SOonY2.js → nova-reel-Bk5npr2q.js} +11 -11
- package/dist/src/{nova-reel-CT9ZuhJ3.js → nova-reel-C2LFfVTf.js} +2 -2
- package/dist/src/{nova-reel-ByTx85ed.cjs → nova-reel-D9FXq3Mt.cjs} +10 -10
- package/dist/src/{nova-reel-ChM7WaHR.js → nova-reel-DtCjbD5O.js} +10 -10
- package/dist/src/{nova-sonic-D4M3kKYu.js → nova-sonic-BXRfQyF-.js} +8 -8
- package/dist/src/{nova-sonic-C-H3eDvL.cjs → nova-sonic-BoRSY_U6.cjs} +7 -7
- package/dist/src/{nova-sonic-DgifpOKF.js → nova-sonic-CgaWLDM1.js} +7 -7
- package/dist/src/{nova-sonic-BqP59oOu.js → nova-sonic-D_qERM-K.js} +2 -2
- package/dist/src/{openai-BjpdxIOG.cjs → openai-Bigwjgo1.cjs} +2 -2
- package/dist/src/{openai-Cv9pEKxp.js → openai-CT5fwbve.js} +2 -2
- package/dist/src/{openai-CitF-gEN.js → openai-Dz3surb_.js} +2 -2
- package/dist/src/openclaw-B6XY2kUf.js +526 -0
- package/dist/src/openclaw-CpPrXwf6.js +524 -0
- package/dist/src/openclaw-DDSfq5fp.cjs +528 -0
- package/dist/src/openclaw-dHLcXUWZ.js +511 -0
- package/dist/src/{opencode-sdk-Dakn4QMp.js → opencode-sdk-CImWVqy9.js} +7 -7
- package/dist/src/{opencode-sdk-BcLvVMV-.js → opencode-sdk-CuCztr4P.js} +6 -6
- package/dist/src/{opencode-sdk-z7KKOCdB.js → opencode-sdk-DhcfRbBH.js} +3 -3
- package/dist/src/{opencode-sdk-Cqszt4br.cjs → opencode-sdk-mqF-Oj3f.cjs} +6 -6
- package/dist/src/{otlpReceiver-BvmMgacx.cjs → otlpReceiver-B6Xo4KZM.cjs} +6 -6
- package/dist/src/{otlpReceiver-DRNetlJH.js → otlpReceiver-BO0rbDzh.js} +6 -6
- package/dist/src/{otlpReceiver-Bhj_vnzl.js → otlpReceiver-Dg817agV.js} +6 -6
- package/dist/src/{otlpReceiver-Cpnk-Hjf.js → otlpReceiver-DmRb0NBj.js} +2 -2
- package/dist/src/{providerRegistry-D32Lt9vp.js → providerRegistry-Xf0qdqGQ.js} +2 -2
- package/dist/src/{providerRegistry-HGQd2MF6.cjs → providerRegistry-lc7a7utN.cjs} +2 -2
- package/dist/src/{providerRegistry-BdKWcUa8.js → providerRegistry-wCWd7sKQ.js} +2 -2
- package/dist/src/providers-BMZZmPBJ.cjs +32 -0
- package/dist/src/{providers-BWoVY_Wz.cjs → providers-BNKVY53V.cjs} +294 -782
- package/dist/src/{providers-Co_FGgH8.js → providers-BiNq_Iyc.js} +281 -769
- package/dist/src/{providers-CUCHJCHD.js → providers-BlEhY5mi.js} +286 -779
- package/dist/src/providers-CQQrNaJk.js +32 -0
- package/dist/src/providers-Ck8HyrC-.js +34 -0
- package/dist/src/{pythonUtils-rOCm9w_5.cjs → pythonUtils-DZ6EbdY4.cjs} +3 -3
- package/dist/src/{pythonUtils-B9JA-gsC.js → pythonUtils-r1uBuA0n.js} +3 -3
- package/dist/src/{pythonUtils-eNq6Wsfr.js → pythonUtils-vMlk9Qp5.js} +3 -3
- package/dist/src/{quiverai-BpWtOEQZ.cjs → quiverai-BNfIwKCO.cjs} +13 -13
- package/dist/src/{quiverai-Cj-PUa3p.js → quiverai-BQigKdIH.js} +14 -14
- package/dist/src/{quiverai-BN8OVvDE.js → quiverai-Bfy2WnE2.js} +12 -12
- package/dist/src/{quiverai-D5MSsd2c.js → quiverai-CedIP0PJ.js} +2 -2
- package/dist/src/{render-D2710HbA.js → render-CAZvKKkB.js} +4 -4
- package/dist/src/responses-DLLjADw5.js +653 -0
- package/dist/src/responses-TsdODUpm.js +654 -0
- package/dist/src/responses-zOtKtnY_.cjs +671 -0
- package/dist/src/{rubyUtils-Dn6MGcsk.js → rubyUtils-BtjlqyXt.js} +3 -3
- package/dist/src/{rubyUtils-LBsk3zIm.js → rubyUtils-Cs35SDYa.js} +3 -3
- package/dist/src/rubyUtils-D7--T12C.js +6 -0
- package/dist/src/{rubyUtils-rnCVDgH-.cjs → rubyUtils-DCVaJ3mc.cjs} +3 -3
- package/dist/src/rubyUtils-DRRiMFV2.js +5 -0
- package/dist/src/rubyUtils-vb8OYFC-.cjs +5 -0
- package/dist/src/{sagemaker-C8MeZIkH.js → sagemaker-BcgLu0U4.js} +18 -18
- package/dist/src/{sagemaker-4ukMNSN0.js → sagemaker-CLdUAv5z.js} +17 -17
- package/dist/src/{sagemaker-DyVHy2BW.js → sagemaker-Du4LIR97.js} +2 -2
- package/dist/src/{sagemaker-DB3Eojau.cjs → sagemaker-DwNnEVYt.cjs} +17 -17
- package/dist/src/{scanner-C28XVEq2.js → scanner-Dyw21Wg_.js} +12 -12
- package/dist/src/server/index.js +1818 -605
- package/dist/src/server-BUbS0Qfh.js +6 -0
- package/dist/src/{server-BF3HkMhe.js → server-CbMTRQkg.js} +8 -6
- package/dist/src/{server-CSbLW-UI.cjs → server-CgUQ25qW.cjs} +8 -6
- package/dist/src/{server-mycZbUH8.js → server-DWmZLfCy.js} +10 -7
- package/dist/src/server-XpGXFHkS.cjs +6 -0
- package/dist/src/server-gfOx5Zrk.js +8 -0
- package/dist/src/{signal-DM_SPNmi.js → signal-Bl32q42d.js} +3 -3
- package/dist/src/{slack-Ed1yyt_j.js → slack-BfdBx2tO.js} +2 -2
- package/dist/src/{slack-D1F9Y7CH.cjs → slack-BtMkB6xP.cjs} +2 -2
- package/dist/src/{slack-DkAF58Tr.js → slack-DPqj42Ts.js} +2 -2
- package/dist/src/{slack-BK312SXM.js → slack-OZYxoVON.js} +2 -2
- package/dist/src/{store-CT_e5OZX.js → store-2ocbYY9D.js} +3 -3
- package/dist/src/store-5u2yriTV.js +7 -0
- package/dist/src/{store-BpumNYCl.cjs → store-BqwfFEyF.cjs} +3 -3
- package/dist/src/{store-BFLqwuc_.js → store-D4gdn9ih.js} +3 -3
- package/dist/src/store-D_lq_8oQ.js +6 -0
- package/dist/src/store-m5KT6Ly7.cjs +6 -0
- package/dist/src/{tables-Dansasnu.cjs → tables-B9E1kRp-.cjs} +3 -3
- package/dist/src/{tables-BjaApSAB.js → tables-C7TT2XVn.js} +3 -3
- package/dist/src/{tables-Dwexr5Z6.js → tables-D-NSwNIb.js} +3 -3
- package/dist/src/telemetry-5RHFoCJh.js +6 -0
- package/dist/src/{telemetry-MVkZQxt9.js → telemetry-BXyVqyAg.js} +5 -4
- package/dist/src/{telemetry-DFDFKdnr.cjs → telemetry-D0_yFdtU.cjs} +5 -4
- package/dist/src/{telemetry-Ds5Nn81l.js → telemetry-DZ_7PaVq.js} +5 -4
- package/dist/src/telemetry-Do8wMnA-.js +8 -0
- package/dist/src/telemetry-LojxPoFq.cjs +6 -0
- package/dist/src/{text-PYISqVm1.cjs → text-DF2hMKdg.cjs} +1 -1
- package/dist/src/{text-DuYSUYPB.js → text-DgMr_tiM.js} +1 -1
- package/dist/src/{text-Dx0GJOCN.js → text-Dm78AVGG.js} +1 -1
- package/dist/src/{tokenUsageUtils-BtZd3sP7.cjs → tokenUsageUtils-CXhxVj72.cjs} +9 -4
- package/dist/src/{tokenUsageUtils-DoinwgKF.js → tokenUsageUtils-DmZSD2eU.js} +9 -4
- package/dist/src/{tokenUsageUtils-cFdLMERB.js → tokenUsageUtils-FZd5O_4A.js} +9 -4
- package/dist/src/{transcription-Cp19m_Mt.js → transcription-C-M81iDA.js} +2 -2
- package/dist/src/{transcription-CLRpAg07.js → transcription-CYuY5sFO.js} +10 -10
- package/dist/src/{transcription-D8ifIKOv.js → transcription-Ch7S-LWw.js} +11 -11
- package/dist/src/{transcription-CXXFEVM_.cjs → transcription-FNIz3YOe.cjs} +10 -10
- package/dist/src/transform-8eGmaH-7.js +7 -0
- package/dist/src/transform-BRVvWaG4.cjs +6 -0
- package/dist/src/{transform-Bn-lgBE2.js → transform-CoP2bJ7P.js} +44 -5
- package/dist/src/{transform-BFPYuBaW.js → transform-CqTFr7KR.js} +5 -5
- package/dist/src/{transform-DvQWeBSR.js → transform-D8dILpfZ.js} +4 -4
- package/dist/src/{transform-BUrxadlA.js → transform-DMaxQwDx.js} +44 -5
- package/dist/src/transform-GybT0X0u.js +8 -0
- package/dist/src/{transform-DshYLyBq.cjs → transform-Kd6u-oNm.cjs} +4 -4
- package/dist/src/{transform-Bw1IstDE.cjs → transform-ivxEY4f7.cjs} +55 -4
- package/dist/src/{transformersAvailability-BaoWHpu1.cjs → transformersAvailability-Bkep3ka7.cjs} +1 -1
- package/dist/src/{transformersAvailability-Dhh45n5P.js → transformersAvailability-DEU2naS1.js} +1 -1
- package/dist/src/{transformersAvailability-DtpwoeFC.js → transformersAvailability-DkAWaK5B.js} +1 -1
- package/dist/src/{transformersAvailability-O2YaCv9Z.js → transformersAvailability-DwmezkVe.js} +1 -1
- package/dist/src/{types-B-XUqfNs.cjs → types-BIfttHrT.cjs} +22 -2
- package/dist/src/{types-BsU_PxR3.js → types-DMVjYLpx.js} +16 -2
- package/dist/src/{types-CgrxBFgm.js → types-t52w-XsS.js} +19 -3
- package/dist/src/{util-M2MoNCUR.js → util-BSh4a_Q8.js} +7 -7
- package/dist/src/{util-DTJWKLkl.cjs → util-C08Kns6-.cjs} +18 -4
- package/dist/src/{util-DHBpsbZE.js → util-CUEt0Vum.js} +18 -4
- package/dist/src/{util-Bnw6EyZN.js → util-Cl0zfT3V.js} +18 -4
- package/dist/src/{util-C7A-PlKK.cjs → util-DUYOvxAy.cjs} +6 -6
- package/dist/src/{util-WYC3rB_p.js → util-DiCePfDu.js} +6 -6
- package/dist/src/{util-CV99ps44.cjs → util-DkFTvieG.cjs} +17 -5
- package/dist/src/{util-CfepsNVK.js → util-mJ58qbbw.js} +17 -5
- package/dist/src/{util-BZaMVBMq.js → util-vjscpUzy.js} +17 -5
- package/dist/src/{utils-BryuD3vq.js → utils-CVzb4YiI.js} +3 -3
- package/dist/src/{utils-Dezi3MBH.cjs → utils-DFaZa6Rf.cjs} +3 -3
- package/dist/src/{utils-GiyI2K4P.js → utils-JaY9veb5.js} +3 -3
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +75 -77
- package/dist/src/app/assets/index-07dbAcWK.js +0 -427
- package/dist/src/app/assets/index-DnEcCQgk.css +0 -1
- package/dist/src/app/assets/vendor-react-DHoaNLma.js +0 -4
- package/dist/src/cache-BujOdYlc.cjs +0 -6
- package/dist/src/cloud-BHYz0XkH.js +0 -5
- package/dist/src/eval-0IFf8hON.js +0 -16
- package/dist/src/evalResult-BCCv6FXj.js +0 -13
- package/dist/src/evalResult-C8iVgVvr.js +0 -11
- package/dist/src/evalResult-CqFeWKYx.cjs +0 -11
- package/dist/src/evaluator-DwS5NAEj.js +0 -37
- package/dist/src/fetch-BH9KSaUC.cjs +0 -4
- package/dist/src/fetch-BL6kqunF.js +0 -6
- package/dist/src/graders-BK3LuhJ8.cjs +0 -32
- package/dist/src/graders-Bpjtip-E.js +0 -35
- package/dist/src/graders-KIf6Uej3.js +0 -33
- package/dist/src/providers-41mSodR_.js +0 -31
- package/dist/src/providers-CiZCpIxz.cjs +0 -31
- package/dist/src/providers-G531909f.js +0 -33
- package/dist/src/rubyUtils-BLd6EE1u.cjs +0 -5
- package/dist/src/rubyUtils-CVELPvUH.js +0 -6
- package/dist/src/rubyUtils-_t9Gmf7U.js +0 -5
- package/dist/src/server-Bc4T4XDt.cjs +0 -6
- package/dist/src/server-CMyxJ7ct.js +0 -6
- package/dist/src/server-D9FuxNYE.js +0 -8
- package/dist/src/store-CrMinjmN.cjs +0 -6
- package/dist/src/store-EaWZOoxz.js +0 -7
- package/dist/src/store-KXWnHB15.js +0 -6
- package/dist/src/telemetry-BXD2mCyr.cjs +0 -6
- package/dist/src/telemetry-CYiBm56v.js +0 -8
- package/dist/src/telemetry-lICYKIMB.js +0 -6
- package/dist/src/transform-DZI2t8-9.js +0 -8
- package/dist/src/transform-DZUV3qpO.js +0 -7
- package/dist/src/transform-XcotjzZX.cjs +0 -6
package/dist/src/index.cjs
CHANGED
|
@@ -1,39 +1,40 @@
|
|
|
1
1
|
Object.defineProperty(exports, '__esModule', { value: true });
|
|
2
|
-
const require_logger = require('./logger-
|
|
2
|
+
const require_logger = require('./logger-BdZ-IqBc.cjs');
|
|
3
3
|
const require_invariant = require('./invariant-CKcJAQ6M.cjs');
|
|
4
|
-
const require_esm = require('./esm-
|
|
5
|
-
const require_pythonUtils = require('./pythonUtils-
|
|
4
|
+
const require_esm = require('./esm-BQkx5roy.cjs');
|
|
5
|
+
const require_pythonUtils = require('./pythonUtils-DZ6EbdY4.cjs');
|
|
6
6
|
const require_fileExtensions = require('./fileExtensions-DkJYkWUy.cjs');
|
|
7
|
-
const require_transform = require('./transform-
|
|
8
|
-
const require_graders = require('./graders-
|
|
9
|
-
const require_types = require('./types-
|
|
10
|
-
const require_util = require('./util-
|
|
11
|
-
const require_fetch = require('./fetch-
|
|
12
|
-
const require_cache = require('./cache-
|
|
13
|
-
const require_providers = require('./providers-
|
|
14
|
-
const require_utils = require('./utils-
|
|
15
|
-
const require_extractor = require('./extractor-
|
|
16
|
-
require('./genaiTracer-
|
|
17
|
-
const require_chat = require('./chat-
|
|
18
|
-
const require_tokenUsageUtils = require('./tokenUsageUtils-
|
|
19
|
-
const require_transform$1 = require('./transform-
|
|
20
|
-
require('./messages-
|
|
21
|
-
require('./util-
|
|
22
|
-
require('./
|
|
23
|
-
|
|
24
|
-
require('./
|
|
25
|
-
|
|
26
|
-
const
|
|
27
|
-
const
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
const
|
|
31
|
-
const
|
|
32
|
-
require('./
|
|
33
|
-
require('./
|
|
34
|
-
|
|
35
|
-
const
|
|
36
|
-
const
|
|
7
|
+
const require_transform = require('./transform-Kd6u-oNm.cjs');
|
|
8
|
+
const require_graders = require('./graders-DzUUnUjC.cjs');
|
|
9
|
+
const require_types = require('./types-BIfttHrT.cjs');
|
|
10
|
+
const require_util = require('./util-DUYOvxAy.cjs');
|
|
11
|
+
const require_fetch = require('./fetch-ZMn_oemb.cjs');
|
|
12
|
+
const require_cache = require('./cache-7xULbvt3.cjs');
|
|
13
|
+
const require_providers = require('./providers-BNKVY53V.cjs');
|
|
14
|
+
const require_utils = require('./utils-DFaZa6Rf.cjs');
|
|
15
|
+
const require_extractor = require('./extractor-DbhlYEeo.cjs');
|
|
16
|
+
require('./genaiTracer-CQlpZkrp.cjs');
|
|
17
|
+
const require_chat = require('./chat-C4zqjObh.cjs');
|
|
18
|
+
const require_tokenUsageUtils = require('./tokenUsageUtils-CXhxVj72.cjs');
|
|
19
|
+
const require_transform$1 = require('./transform-ivxEY4f7.cjs');
|
|
20
|
+
require('./messages-DXV3Qh8_.cjs');
|
|
21
|
+
require('./util-DkFTvieG.cjs');
|
|
22
|
+
require('./responses-zOtKtnY_.cjs');
|
|
23
|
+
require('./openai-Bigwjgo1.cjs');
|
|
24
|
+
const require_util$2 = require('./util-C08Kns6-.cjs');
|
|
25
|
+
require('./completion-DDyL3Cb2.cjs');
|
|
26
|
+
const require_accounts = require('./accounts-BzEY8H3v.cjs');
|
|
27
|
+
const require_server = require('./server-CgUQ25qW.cjs');
|
|
28
|
+
const require_blobs = require('./blobs-673H0jCl.cjs');
|
|
29
|
+
const require_tables = require('./tables-B9E1kRp-.cjs');
|
|
30
|
+
const require_telemetry = require('./telemetry-D0_yFdtU.cjs');
|
|
31
|
+
const require_text = require('./text-DF2hMKdg.cjs');
|
|
32
|
+
const require_store = require('./store-BqwfFEyF.cjs');
|
|
33
|
+
require('./base-f71xxWai.cjs');
|
|
34
|
+
require('./image-C3BjJUAU.cjs');
|
|
35
|
+
const require_providerRegistry = require('./providerRegistry-lc7a7utN.cjs');
|
|
36
|
+
const require_rubyUtils = require('./rubyUtils-DCVaJ3mc.cjs');
|
|
37
|
+
const require_evalResult = require('./evalResult-DlRfu_Rq.cjs');
|
|
37
38
|
let fs = require("fs");
|
|
38
39
|
fs = require_logger.__toESM(fs);
|
|
39
40
|
let path = require("path");
|
|
@@ -306,7 +307,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
306
307
|
require_telemetry.telemetry_default.record("feature_used", { feature: "tracing" });
|
|
307
308
|
try {
|
|
308
309
|
require_logger.logger_default.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
309
|
-
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
310
|
+
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-B6Xo4KZM.cjs"));
|
|
310
311
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
311
312
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
312
313
|
require_logger.logger_default.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
|
|
@@ -329,7 +330,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
329
330
|
async function stopOtlpReceiverIfNeeded() {
|
|
330
331
|
if (otlpReceiverStarted) try {
|
|
331
332
|
require_logger.logger_default.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
332
|
-
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
333
|
+
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-B6Xo4KZM.cjs"));
|
|
333
334
|
await stopOTLPReceiver();
|
|
334
335
|
otlpReceiverStarted = false;
|
|
335
336
|
require_logger.logger_default.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -364,7 +365,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
364
365
|
}
|
|
365
366
|
if (!tracingEnabled) return null;
|
|
366
367
|
require_logger.logger_default.debug("[EvaluatorTracing] Importing trace store");
|
|
367
|
-
const { getTraceStore } = await Promise.resolve().then(() => require("./store-
|
|
368
|
+
const { getTraceStore } = await Promise.resolve().then(() => require("./store-m5KT6Ly7.cjs"));
|
|
368
369
|
const traceStore = getTraceStore();
|
|
369
370
|
const traceId = generateTraceId();
|
|
370
371
|
const spanId = generateSpanId();
|
|
@@ -2775,7 +2776,7 @@ const ASSERTION_HANDLERS = {
|
|
|
2775
2776
|
"llm-rubric": handleLlmRubric,
|
|
2776
2777
|
meteor: async (params) => {
|
|
2777
2778
|
try {
|
|
2778
|
-
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-
|
|
2779
|
+
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-odmwVbyG.cjs"));
|
|
2779
2780
|
return handleMeteorAssertion(params);
|
|
2780
2781
|
} catch (error) {
|
|
2781
2782
|
if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
|
|
@@ -2885,9 +2886,9 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
2885
2886
|
let filePath = fileRef;
|
|
2886
2887
|
let functionName;
|
|
2887
2888
|
if (fileRef.includes(":")) {
|
|
2888
|
-
const
|
|
2889
|
-
filePath =
|
|
2890
|
-
functionName =
|
|
2889
|
+
const colonIndex = fileRef.indexOf(":");
|
|
2890
|
+
filePath = fileRef.slice(0, colonIndex);
|
|
2891
|
+
functionName = fileRef.slice(colonIndex + 1);
|
|
2891
2892
|
}
|
|
2892
2893
|
filePath = path.default.resolve(basePath, filePath);
|
|
2893
2894
|
if (require_fileExtensions.isJavascriptFile(filePath)) {
|
|
@@ -2905,7 +2906,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
2905
2906
|
};
|
|
2906
2907
|
}
|
|
2907
2908
|
else if (filePath.endsWith(".rb")) try {
|
|
2908
|
-
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-
|
|
2909
|
+
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-vb8OYFC-.cjs"));
|
|
2909
2910
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
2910
2911
|
require_logger.logger_default.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
2911
2912
|
} catch (error) {
|
|
@@ -3913,7 +3914,7 @@ async function runEval({ provider, prompt, test, testSuite, delay, nunjucksFilte
|
|
|
3913
3914
|
promptIdx,
|
|
3914
3915
|
testIdx
|
|
3915
3916
|
});
|
|
3916
|
-
require_logger.logger_default.error("Provider call failed during eval", logContext);
|
|
3917
|
+
if (!(err instanceof Error && err.name === "AbortError")) require_logger.logger_default.error("Provider call failed during eval", logContext);
|
|
3917
3918
|
return [{
|
|
3918
3919
|
...setup,
|
|
3919
3920
|
error: errorWithStack,
|
|
@@ -4094,11 +4095,17 @@ var Evaluator = class {
|
|
|
4094
4095
|
let globalTimeout;
|
|
4095
4096
|
let globalAbortController;
|
|
4096
4097
|
const processedIndices = /* @__PURE__ */ new Set();
|
|
4098
|
+
let targetUnavailable = false;
|
|
4099
|
+
let targetErrorStatus;
|
|
4100
|
+
const targetErrorAbortController = new AbortController();
|
|
4097
4101
|
let ciProgressReporter = null;
|
|
4098
4102
|
let progressBarManager = null;
|
|
4103
|
+
let providerAbortSignal = options.abortSignal;
|
|
4104
|
+
let combinedAbortSignal = options.abortSignal ? AbortSignal.any([options.abortSignal, targetErrorAbortController.signal]) : targetErrorAbortController.signal;
|
|
4099
4105
|
if (maxEvalTimeMs > 0) {
|
|
4100
4106
|
globalAbortController = new AbortController();
|
|
4101
|
-
|
|
4107
|
+
providerAbortSignal = providerAbortSignal ? AbortSignal.any([providerAbortSignal, globalAbortController.signal]) : globalAbortController.signal;
|
|
4108
|
+
combinedAbortSignal = AbortSignal.any([combinedAbortSignal, globalAbortController.signal]);
|
|
4102
4109
|
globalTimeout = setTimeout(() => {
|
|
4103
4110
|
evalTimedOut = true;
|
|
4104
4111
|
globalAbortController?.abort();
|
|
@@ -4106,7 +4113,7 @@ var Evaluator = class {
|
|
|
4106
4113
|
}
|
|
4107
4114
|
const vars = /* @__PURE__ */ new Set();
|
|
4108
4115
|
const checkAbort = () => {
|
|
4109
|
-
if (
|
|
4116
|
+
if (combinedAbortSignal.aborted) throw new Error("Operation cancelled");
|
|
4110
4117
|
};
|
|
4111
4118
|
if (!options.silent) require_logger.logger_default.info(`Starting evaluation ${this.evalRecord.id}`);
|
|
4112
4119
|
checkAbort();
|
|
@@ -4267,7 +4274,7 @@ var Evaluator = class {
|
|
|
4267
4274
|
const defaultProvider = testSuite.defaultTest.provider;
|
|
4268
4275
|
if (require_types.isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
|
|
4269
4276
|
else if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
4270
|
-
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-
|
|
4277
|
+
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BMZZmPBJ.cjs"));
|
|
4271
4278
|
testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
4272
4279
|
} else testCase.provider = defaultProvider;
|
|
4273
4280
|
}
|
|
@@ -4337,7 +4344,7 @@ var Evaluator = class {
|
|
|
4337
4344
|
registers: this.registers,
|
|
4338
4345
|
isRedteam: testSuite.redteam != null,
|
|
4339
4346
|
concurrency,
|
|
4340
|
-
abortSignal:
|
|
4347
|
+
abortSignal: providerAbortSignal,
|
|
4341
4348
|
evalId: this.evalRecord.id,
|
|
4342
4349
|
rateLimitRegistry: this.rateLimitRegistry
|
|
4343
4350
|
});
|
|
@@ -4351,7 +4358,7 @@ var Evaluator = class {
|
|
|
4351
4358
|
if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
|
|
4352
4359
|
}
|
|
4353
4360
|
if (require_logger.cliState_default.resume && this.evalRecord.persisted) try {
|
|
4354
|
-
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-
|
|
4361
|
+
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-CxTP-LMm.cjs"));
|
|
4355
4362
|
const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: require_logger.cliState_default.retryMode });
|
|
4356
4363
|
const originalCount = runEvalOptions.length;
|
|
4357
4364
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -4404,6 +4411,14 @@ var Evaluator = class {
|
|
|
4404
4411
|
require_logger.logger_default.error(`Error saving result: ${error} ${require_logger.safeJsonStringify(resultSummary)}`);
|
|
4405
4412
|
}
|
|
4406
4413
|
for (const writer of this.fileWriters) await writer.write(row);
|
|
4414
|
+
const httpStatus = row.response?.metadata?.http?.status;
|
|
4415
|
+
if (typeof httpStatus === "number" && require_cache.isNonTransientHttpStatus(httpStatus)) {
|
|
4416
|
+
targetUnavailable = true;
|
|
4417
|
+
targetErrorStatus = httpStatus;
|
|
4418
|
+
require_logger.logger_default.error(`Target returned HTTP ${httpStatus}. Aborting scan - this error will not resolve on retry.`);
|
|
4419
|
+
targetErrorAbortController.abort();
|
|
4420
|
+
break;
|
|
4421
|
+
}
|
|
4407
4422
|
const { promptIdx } = row;
|
|
4408
4423
|
const metrics = prompts[promptIdx].metrics;
|
|
4409
4424
|
require_invariant.invariant(metrics, "Expected prompt.metrics to be set");
|
|
@@ -4567,6 +4582,7 @@ var Evaluator = class {
|
|
|
4567
4582
|
if (this.options.showProgressBar && progressBarManager) await progressBarManager.initialize(runEvalOptions, concurrency, 0);
|
|
4568
4583
|
try {
|
|
4569
4584
|
if (serialRunEvalOptions.length > 0) for (const evalStep of serialRunEvalOptions) {
|
|
4585
|
+
checkAbort();
|
|
4570
4586
|
if (isWebUI) {
|
|
4571
4587
|
const provider = evalStep.provider.label || evalStep.provider.id();
|
|
4572
4588
|
const vars = formatVarsForDisplay(evalStep.test.vars || {}, 50);
|
|
@@ -4584,22 +4600,32 @@ var Evaluator = class {
|
|
|
4584
4600
|
await this.evalRecord.addPrompts(prompts);
|
|
4585
4601
|
});
|
|
4586
4602
|
} catch (err) {
|
|
4587
|
-
if (
|
|
4588
|
-
|
|
4589
|
-
|
|
4590
|
-
|
|
4591
|
-
|
|
4592
|
-
|
|
4593
|
-
|
|
4594
|
-
|
|
4595
|
-
|
|
4596
|
-
|
|
4597
|
-
|
|
4598
|
-
|
|
4603
|
+
if (combinedAbortSignal.aborted) {
|
|
4604
|
+
if (evalTimedOut) require_logger.logger_default.warn(`Evaluation stopped after reaching max duration (${maxEvalTimeMs}ms)`);
|
|
4605
|
+
else if (!targetUnavailable) {
|
|
4606
|
+
require_logger.logger_default.info("Evaluation interrupted, saving progress...");
|
|
4607
|
+
if (globalTimeout) clearTimeout(globalTimeout);
|
|
4608
|
+
if (progressBarManager) progressBarManager.stop();
|
|
4609
|
+
if (ciProgressReporter) ciProgressReporter.finish();
|
|
4610
|
+
this.evalRecord.setVars(Array.from(vars));
|
|
4611
|
+
await this.evalRecord.addPrompts(prompts);
|
|
4612
|
+
updateSignalFile(this.evalRecord.id);
|
|
4613
|
+
return this.evalRecord;
|
|
4614
|
+
}
|
|
4615
|
+
} else {
|
|
4599
4616
|
if (ciProgressReporter) ciProgressReporter.error(`Evaluation failed: ${String(err)}`);
|
|
4600
4617
|
throw err;
|
|
4601
4618
|
}
|
|
4602
4619
|
}
|
|
4620
|
+
if (targetUnavailable) {
|
|
4621
|
+
if (globalTimeout) clearTimeout(globalTimeout);
|
|
4622
|
+
if (progressBarManager) progressBarManager.stop();
|
|
4623
|
+
if (ciProgressReporter) ciProgressReporter.error(`Target unavailable (HTTP ${targetErrorStatus})`);
|
|
4624
|
+
this.evalRecord.setVars(Array.from(vars));
|
|
4625
|
+
await this.evalRecord.addPrompts(prompts);
|
|
4626
|
+
updateSignalFile(this.evalRecord.id);
|
|
4627
|
+
return this.evalRecord;
|
|
4628
|
+
}
|
|
4603
4629
|
const compareRowsCount = rowsWithSelectBestAssertion.size + rowsWithMaxScoreAssertion.size;
|
|
4604
4630
|
if (progressBarManager) {
|
|
4605
4631
|
if (compareRowsCount > 0) progressBarManager.updateTotalCount(compareRowsCount);
|
|
@@ -5306,10 +5332,17 @@ function convertResultsToTable(eval_) {
|
|
|
5306
5332
|
if (keyToUpdate) result.vars[keyToUpdate] = actualPrompt;
|
|
5307
5333
|
}
|
|
5308
5334
|
}
|
|
5309
|
-
if (
|
|
5310
|
-
|
|
5311
|
-
|
|
5312
|
-
|
|
5335
|
+
if (!result.vars?.sessionId) {
|
|
5336
|
+
const metadataSessionIds = result.metadata?.sessionIds;
|
|
5337
|
+
if (Array.isArray(metadataSessionIds) && metadataSessionIds.length > 0) {
|
|
5338
|
+
result.vars = result.vars || {};
|
|
5339
|
+
result.vars.sessionId = metadataSessionIds.filter((id) => id != null && id !== "").map(String).join("\n");
|
|
5340
|
+
varsForHeader.add("sessionId");
|
|
5341
|
+
} else if (result.metadata?.sessionId) {
|
|
5342
|
+
result.vars = result.vars || {};
|
|
5343
|
+
result.vars.sessionId = result.metadata.sessionId;
|
|
5344
|
+
varsForHeader.add("sessionId");
|
|
5345
|
+
}
|
|
5313
5346
|
}
|
|
5314
5347
|
const transformDisplayVars = result.response?.metadata?.transformDisplayVars;
|
|
5315
5348
|
if (transformDisplayVars) {
|
|
@@ -5365,7 +5398,12 @@ function convertResultsToTable(eval_) {
|
|
|
5365
5398
|
model: result.response.video.model,
|
|
5366
5399
|
aspectRatio: result.response.video.aspectRatio,
|
|
5367
5400
|
resolution: result.response.video.resolution
|
|
5368
|
-
} : void 0
|
|
5401
|
+
} : void 0,
|
|
5402
|
+
images: result.response?.images?.map((img) => ({
|
|
5403
|
+
data: img.data,
|
|
5404
|
+
blobRef: img.blobRef,
|
|
5405
|
+
mimeType: img.mimeType
|
|
5406
|
+
}))
|
|
5369
5407
|
};
|
|
5370
5408
|
require_invariant.invariant(result.promptId, "Prompt ID is required");
|
|
5371
5409
|
row.testIdx = result.testIdx;
|
|
@@ -5431,7 +5469,12 @@ function convertEvalResultToTableCell(result) {
|
|
|
5431
5469
|
model: result.response.video.model,
|
|
5432
5470
|
aspectRatio: result.response.video.aspectRatio,
|
|
5433
5471
|
resolution: result.response.video.resolution
|
|
5434
|
-
} : void 0
|
|
5472
|
+
} : void 0,
|
|
5473
|
+
images: result.response?.images?.map((img) => ({
|
|
5474
|
+
data: img.data,
|
|
5475
|
+
blobRef: img.blobRef,
|
|
5476
|
+
mimeType: img.mimeType
|
|
5477
|
+
}))
|
|
5435
5478
|
};
|
|
5436
5479
|
}
|
|
5437
5480
|
function convertTestResultsToTableRow(results, varsForHeader) {
|
|
@@ -5440,10 +5483,13 @@ function convertTestResultsToTableRow(results, varsForHeader) {
|
|
|
5440
5483
|
outputs: [],
|
|
5441
5484
|
vars: Object.values(varsForHeader).map((varName) => {
|
|
5442
5485
|
if (varName === "sessionId") {
|
|
5443
|
-
const
|
|
5444
|
-
|
|
5445
|
-
|
|
5446
|
-
return
|
|
5486
|
+
const sessionIdFromVars = results[0].testCase.vars?.sessionId;
|
|
5487
|
+
if (sessionIdFromVars != null && sessionIdFromVars !== "") return typeof sessionIdFromVars === "string" ? sessionIdFromVars : JSON.stringify(sessionIdFromVars);
|
|
5488
|
+
const metadataSessionIds = results[0].metadata?.sessionIds;
|
|
5489
|
+
if (Array.isArray(metadataSessionIds) && metadataSessionIds.length > 0) return metadataSessionIds.filter((id) => id != null && id !== "").map(String).join("\n");
|
|
5490
|
+
const metadataSessionId = results[0].metadata?.sessionId;
|
|
5491
|
+
if (metadataSessionId != null) return typeof metadataSessionId === "string" ? metadataSessionId : JSON.stringify(metadataSessionId);
|
|
5492
|
+
return "";
|
|
5447
5493
|
}
|
|
5448
5494
|
const varValue = results[0].testCase.vars?.[varName] ?? "";
|
|
5449
5495
|
if (typeof varValue === "string") return varValue;
|
|
@@ -5723,7 +5769,13 @@ var Eval = class Eval {
|
|
|
5723
5769
|
_resultsLoaded = false;
|
|
5724
5770
|
runtimeOptions;
|
|
5725
5771
|
_shared = false;
|
|
5772
|
+
/** Total wall-clock duration. For redteam evals: generationDurationMs + evaluationDurationMs.
|
|
5773
|
+
* For non-redteam evals: equals evaluationDurationMs (generation phase is N/A). */
|
|
5726
5774
|
durationMs;
|
|
5775
|
+
/** Time spent generating adversarial test cases (redteam only). */
|
|
5776
|
+
generationDurationMs;
|
|
5777
|
+
/** Time spent running the evaluation phase. */
|
|
5778
|
+
evaluationDurationMs;
|
|
5727
5779
|
/**
|
|
5728
5780
|
* The shareable URL for this evaluation, if it has been shared.
|
|
5729
5781
|
* Set by the evaluate() function when sharing is enabled.
|
|
@@ -5742,8 +5794,11 @@ var Eval = class Eval {
|
|
|
5742
5794
|
const eval_ = evalData[0];
|
|
5743
5795
|
const datasetId = datasetResults[0]?.datasetId;
|
|
5744
5796
|
const resultsObj = eval_.results;
|
|
5745
|
-
const
|
|
5746
|
-
const
|
|
5797
|
+
const validateDuration = (raw) => typeof raw === "number" && Number.isFinite(raw) && raw >= 0 ? raw : void 0;
|
|
5798
|
+
const rawDurationMs = validateDuration(resultsObj?.["durationMs"]);
|
|
5799
|
+
const generationDurationMs = validateDuration(resultsObj?.["generationDurationMs"]);
|
|
5800
|
+
const evaluationDurationMs = validateDuration(resultsObj?.["evaluationDurationMs"]);
|
|
5801
|
+
const durationMs = rawDurationMs ?? (generationDurationMs != null || evaluationDurationMs != null ? (generationDurationMs ?? 0) + (evaluationDurationMs ?? 0) : void 0);
|
|
5747
5802
|
const evalInstance = new Eval(eval_.config, {
|
|
5748
5803
|
id: eval_.id,
|
|
5749
5804
|
createdAt: new Date(eval_.createdAt),
|
|
@@ -5754,7 +5809,9 @@ var Eval = class Eval {
|
|
|
5754
5809
|
persisted: true,
|
|
5755
5810
|
vars: eval_.vars || [],
|
|
5756
5811
|
runtimeOptions: eval_.runtimeOptions ?? void 0,
|
|
5757
|
-
durationMs
|
|
5812
|
+
durationMs,
|
|
5813
|
+
generationDurationMs,
|
|
5814
|
+
evaluationDurationMs
|
|
5758
5815
|
});
|
|
5759
5816
|
if (eval_.results && "table" in eval_.results) evalInstance.oldResults = eval_.results;
|
|
5760
5817
|
if (!eval_.vars || eval_.vars.length === 0) {
|
|
@@ -5879,6 +5936,8 @@ var Eval = class Eval {
|
|
|
5879
5936
|
this.vars = opts?.vars || [];
|
|
5880
5937
|
this.runtimeOptions = opts?.runtimeOptions;
|
|
5881
5938
|
this.durationMs = opts?.durationMs;
|
|
5939
|
+
this.generationDurationMs = opts?.generationDurationMs;
|
|
5940
|
+
this.evaluationDurationMs = opts?.evaluationDurationMs;
|
|
5882
5941
|
}
|
|
5883
5942
|
version() {
|
|
5884
5943
|
/**
|
|
@@ -5909,7 +5968,13 @@ var Eval = class Eval {
|
|
|
5909
5968
|
if (this.useOldResults()) {
|
|
5910
5969
|
require_invariant.invariant(this.oldResults, "Old results not found");
|
|
5911
5970
|
updateObj.results = this.oldResults;
|
|
5912
|
-
} else if (this.durationMs !== void 0
|
|
5971
|
+
} else if (this.durationMs !== void 0 || this.generationDurationMs !== void 0 || this.evaluationDurationMs !== void 0) {
|
|
5972
|
+
let expr = drizzle_orm.sql`CASE WHEN json_valid(${require_tables.evalsTable.results}) AND json_type(${require_tables.evalsTable.results}) = 'object' THEN ${require_tables.evalsTable.results} ELSE '{}' END`;
|
|
5973
|
+
if (this.durationMs !== void 0) expr = drizzle_orm.sql`json_set(${expr}, '$.durationMs', ${this.durationMs})`;
|
|
5974
|
+
if (this.generationDurationMs !== void 0) expr = drizzle_orm.sql`json_set(${expr}, '$.generationDurationMs', ${this.generationDurationMs})`;
|
|
5975
|
+
if (this.evaluationDurationMs !== void 0) expr = drizzle_orm.sql`json_set(${expr}, '$.evaluationDurationMs', ${this.evaluationDurationMs})`;
|
|
5976
|
+
updateObj.results = expr;
|
|
5977
|
+
}
|
|
5913
5978
|
db.update(require_tables.evalsTable).set(updateObj).where((0, drizzle_orm.eq)(require_tables.evalsTable.id, this.id)).run();
|
|
5914
5979
|
this.persisted = true;
|
|
5915
5980
|
}
|
|
@@ -5919,8 +5984,17 @@ var Eval = class Eval {
|
|
|
5919
5984
|
addVar(varName) {
|
|
5920
5985
|
this.vars.push(varName);
|
|
5921
5986
|
}
|
|
5987
|
+
/** Sets the evaluation phase duration and recomputes the total. Called by the evaluator. */
|
|
5922
5988
|
setDurationMs(durationMs) {
|
|
5923
|
-
|
|
5989
|
+
if (!Number.isFinite(durationMs) || durationMs < 0) return;
|
|
5990
|
+
this.evaluationDurationMs = durationMs;
|
|
5991
|
+
this.durationMs = (this.generationDurationMs ?? 0) + durationMs;
|
|
5992
|
+
}
|
|
5993
|
+
/** Sets the generation phase duration and recomputes the total. Called by doRedteamRun. */
|
|
5994
|
+
setGenerationDurationMs(durationMs) {
|
|
5995
|
+
if (!Number.isFinite(durationMs) || durationMs < 0) return;
|
|
5996
|
+
this.generationDurationMs = durationMs;
|
|
5997
|
+
this.durationMs = durationMs + (this.evaluationDurationMs ?? 0);
|
|
5924
5998
|
}
|
|
5925
5999
|
getPrompts() {
|
|
5926
6000
|
if (this.useOldResults()) {
|
|
@@ -5958,6 +6032,27 @@ var Eval = class Eval {
|
|
|
5958
6032
|
async getTotalResultRowCount() {
|
|
5959
6033
|
return getTotalResultRowCount(this.id);
|
|
5960
6034
|
}
|
|
6035
|
+
/**
|
|
6036
|
+
* Find a non-transient HTTP error status from evaluation results.
|
|
6037
|
+
* Returns the first non-transient status (401, 403, 404, 500, 501) found, or undefined.
|
|
6038
|
+
*
|
|
6039
|
+
* For persisted evals: Uses efficient O(1) database query with LIMIT 1.
|
|
6040
|
+
* For non-persisted evals: Falls back to scanning in-memory results.
|
|
6041
|
+
*/
|
|
6042
|
+
async findTargetErrorStatus() {
|
|
6043
|
+
const scanInMemory = () => {
|
|
6044
|
+
for (const result of this.results) {
|
|
6045
|
+
const status = result.response?.metadata?.http?.status;
|
|
6046
|
+
if (typeof status === "number" && require_cache.isNonTransientHttpStatus(status)) return status;
|
|
6047
|
+
}
|
|
6048
|
+
};
|
|
6049
|
+
if (!this.persisted) return scanInMemory();
|
|
6050
|
+
try {
|
|
6051
|
+
return require_tables.getDb().select({ httpStatus: drizzle_orm.sql`CAST(json_extract(${require_tables.evalResultsTable.response}, '$.metadata.http.status') AS INTEGER)` }).from(require_tables.evalResultsTable).where((0, drizzle_orm.and)((0, drizzle_orm.eq)(require_tables.evalResultsTable.evalId, this.id), drizzle_orm.sql`json_extract(${require_tables.evalResultsTable.response}, '$.metadata.http.status') IN (${drizzle_orm.sql.join(require_cache.NON_TRANSIENT_HTTP_STATUSES.map((s) => drizzle_orm.sql`${s}`), drizzle_orm.sql`, `)})`)).limit(1).get()?.httpStatus ?? void 0;
|
|
6052
|
+
} catch {
|
|
6053
|
+
return scanInMemory();
|
|
6054
|
+
}
|
|
6055
|
+
}
|
|
5961
6056
|
async fetchResultsByTestIdx(testIdx) {
|
|
5962
6057
|
return await require_evalResult.EvalResult.findManyByEvalId(this.id, { testIdx });
|
|
5963
6058
|
}
|
|
@@ -6180,7 +6275,12 @@ var Eval = class Eval {
|
|
|
6180
6275
|
};
|
|
6181
6276
|
}
|
|
6182
6277
|
const allResults = await require_evalResult.EvalResult.findManyByEvalIdAndTestIndices(this.id, testIndices);
|
|
6183
|
-
if (allResults.some((result) =>
|
|
6278
|
+
if (allResults.some((result) => {
|
|
6279
|
+
const hasSessionIds = Array.isArray(result.metadata?.sessionIds) && result.metadata.sessionIds.length > 0;
|
|
6280
|
+
const hasSessionId = Boolean(result.metadata?.sessionId);
|
|
6281
|
+
const notInVars = !result.testCase?.vars?.sessionId;
|
|
6282
|
+
return (hasSessionIds || hasSessionId) && notInVars;
|
|
6283
|
+
}) && !vars.includes("sessionId")) {
|
|
6184
6284
|
vars.push("sessionId");
|
|
6185
6285
|
vars.sort();
|
|
6186
6286
|
}
|
|
@@ -6241,7 +6341,9 @@ var Eval = class Eval {
|
|
|
6241
6341
|
failures: 0,
|
|
6242
6342
|
errors: 0,
|
|
6243
6343
|
tokenUsage: require_tokenUsageUtils.createEmptyTokenUsage(),
|
|
6244
|
-
durationMs: this.durationMs
|
|
6344
|
+
durationMs: this.durationMs,
|
|
6345
|
+
generationDurationMs: this.generationDurationMs,
|
|
6346
|
+
evaluationDurationMs: this.evaluationDurationMs
|
|
6245
6347
|
};
|
|
6246
6348
|
for (const prompt of this.prompts) {
|
|
6247
6349
|
stats.successes += prompt.metrics?.testPassCount ?? 0;
|
|
@@ -7539,6 +7641,24 @@ function validateTestProviderReferences(tests, providers, defaultTest, scenarios
|
|
|
7539
7641
|
});
|
|
7540
7642
|
}
|
|
7541
7643
|
|
|
7644
|
+
//#endregion
|
|
7645
|
+
//#region src/util/config/extensions.ts
|
|
7646
|
+
/**
|
|
7647
|
+
* Supported config file extensions, sorted by frequency of use.
|
|
7648
|
+
* Order matters: loaders try each in sequence and stop at the first match.
|
|
7649
|
+
*/
|
|
7650
|
+
const DEFAULT_CONFIG_EXTENSIONS = [
|
|
7651
|
+
"yaml",
|
|
7652
|
+
"yml",
|
|
7653
|
+
"json",
|
|
7654
|
+
"cjs",
|
|
7655
|
+
"cts",
|
|
7656
|
+
"js",
|
|
7657
|
+
"mjs",
|
|
7658
|
+
"mts",
|
|
7659
|
+
"ts"
|
|
7660
|
+
];
|
|
7661
|
+
|
|
7542
7662
|
//#endregion
|
|
7543
7663
|
//#region src/util/config/load.ts
|
|
7544
7664
|
/**
|
|
@@ -7547,6 +7667,34 @@ function validateTestProviderReferences(tests, providers, defaultTest, scenarios
|
|
|
7547
7667
|
function isTestCaseWithVars(test) {
|
|
7548
7668
|
return typeof test === "object" && test !== null && "vars" in test;
|
|
7549
7669
|
}
|
|
7670
|
+
/**
|
|
7671
|
+
* When --providers is used alongside a config file that has providers defined,
|
|
7672
|
+
* maps each CLI provider token to a matching config provider (preserving its config
|
|
7673
|
+
* options like num_ctx, temperature). Unmatched tokens are kept as bare strings.
|
|
7674
|
+
*
|
|
7675
|
+
* Matching priority per token:
|
|
7676
|
+
* 1. Exact match on provider id
|
|
7677
|
+
* 2. Exact match on provider label
|
|
7678
|
+
* 3. Provider-prefix match: config id ends with `:cliProvider` (e.g. CLI `llama3.1:8b`
|
|
7679
|
+
* matches config `ollama:llama3.1:8b`). First match wins if multiple configs share a suffix.
|
|
7680
|
+
* 4. No match: keep raw CLI string for fresh provider creation
|
|
7681
|
+
*/
|
|
7682
|
+
function resolveCliProvidersWithConfig(cliProviders, configProviders) {
|
|
7683
|
+
if (!configProviders || !Array.isArray(configProviders)) return cliProviders;
|
|
7684
|
+
const indexed = configProviders.map((cp, i) => ({
|
|
7685
|
+
provider: cp,
|
|
7686
|
+
...getProviderIdAndLabel(cp, i)
|
|
7687
|
+
}));
|
|
7688
|
+
return cliProviders.map((cliProvider) => {
|
|
7689
|
+
const exactId = indexed.find((entry) => entry.id === cliProvider);
|
|
7690
|
+
if (exactId) return exactId.provider;
|
|
7691
|
+
const exactLabel = indexed.find((entry) => entry.label === cliProvider);
|
|
7692
|
+
if (exactLabel) return exactLabel.provider;
|
|
7693
|
+
const prefixMatch = indexed.find((entry) => entry.id.endsWith(":" + cliProvider));
|
|
7694
|
+
if (prefixMatch) return prefixMatch.provider;
|
|
7695
|
+
return cliProvider;
|
|
7696
|
+
});
|
|
7697
|
+
}
|
|
7550
7698
|
async function dereferenceConfig(rawConfig) {
|
|
7551
7699
|
if (require_logger.getEnvBool("PROMPTFOO_DISABLE_REF_PARSER")) return rawConfig;
|
|
7552
7700
|
const extractFunctionParameters = (functions) => {
|
|
@@ -7683,7 +7831,7 @@ async function combineConfigs(configPaths) {
|
|
|
7683
7831
|
const configs = [];
|
|
7684
7832
|
for (const configPath of configPaths) {
|
|
7685
7833
|
const globPaths = (0, glob.globSync)(path.resolve(process$1.default.cwd(), configPath), { windowsPathsNoEscape: true });
|
|
7686
|
-
if (globPaths.length === 0) throw new Error(`No configuration file found at ${configPath}. Run "
|
|
7834
|
+
if (globPaths.length === 0) throw new Error(`No configuration file found at ${configPath}. Run "${promptfooCommand("init")}" to create one or pass --config path/to/promptfooconfig.yaml.`);
|
|
7687
7835
|
for (const globPath of globPaths) {
|
|
7688
7836
|
const config = await readConfig(globPath);
|
|
7689
7837
|
configs.push(config);
|
|
@@ -7896,7 +8044,7 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7896
8044
|
tags: fileConfig.tags || defaultConfig.tags,
|
|
7897
8045
|
description: cmdObj.description || fileConfig.description || defaultConfig.description,
|
|
7898
8046
|
prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts || [],
|
|
7899
|
-
providers:
|
|
8047
|
+
providers: fileConfig.providers || defaultConfig.providers || [],
|
|
7900
8048
|
tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests || [],
|
|
7901
8049
|
scenarios: fileConfig.scenarios || defaultConfig.scenarios,
|
|
7902
8050
|
env: fileConfig.env || defaultConfig.env,
|
|
@@ -7911,11 +8059,14 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7911
8059
|
evaluateOptions: fileConfig.evaluateOptions || defaultConfig.evaluateOptions
|
|
7912
8060
|
};
|
|
7913
8061
|
const hasPrompts = [config.prompts].flat().filter(Boolean).length > 0;
|
|
7914
|
-
const hasProviders = [config.providers].flat().filter(Boolean).length > 0;
|
|
8062
|
+
const hasProviders = cmdObj.providers && cmdObj.providers.length > 0 || [config.providers].flat().filter(Boolean).length > 0;
|
|
7915
8063
|
if (!Boolean(configPaths) && !hasPrompts && !hasProviders && !require_logger.isCI()) {
|
|
8064
|
+
const extList = DEFAULT_CONFIG_EXTENSIONS.join(", ");
|
|
7916
8065
|
require_logger.logger_default.warn(dedent.default`
|
|
7917
8066
|
${chalk.default.yellow.bold("⚠️ No promptfooconfig found")}
|
|
7918
8067
|
|
|
8068
|
+
${chalk.default.white(`Searched in ${chalk.default.bold(process$1.default.cwd())} for promptfooconfig.{${extList}}`)}
|
|
8069
|
+
|
|
7919
8070
|
${chalk.default.white("Try running with:")}
|
|
7920
8071
|
|
|
7921
8072
|
${chalk.default.cyan(`${promptfooCommand("")} eval -c ${chalk.default.bold("path/to/promptfooconfig.yaml")}`)}
|
|
@@ -7936,8 +8087,9 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7936
8087
|
}
|
|
7937
8088
|
require_invariant.invariant(Array.isArray(config.providers), "providers must be an array");
|
|
7938
8089
|
const resolvedProviderConfigs = require_providers.resolveProviderConfigs(config.providers, { basePath });
|
|
8090
|
+
const cliFilteredProviderConfigs = (cmdObj.providers ? resolveCliProvidersWithConfig(cmdObj.providers, resolvedProviderConfigs) : resolvedProviderConfigs) ?? [];
|
|
7939
8091
|
const filterOption = cmdObj.filterProviders || cmdObj.filterTargets;
|
|
7940
|
-
const filteredProviderConfigs = filterProviderConfigs(
|
|
8092
|
+
const filteredProviderConfigs = filterProviderConfigs(cliFilteredProviderConfigs, filterOption);
|
|
7941
8093
|
if (filterOption && Array.isArray(filteredProviderConfigs) && filteredProviderConfigs.length === 0) require_logger.logger_default.warn(`No providers matched the filter "${filterOption}". Check your --filter-providers/--filter-targets value.`);
|
|
7942
8094
|
let parsedPrompts = await require_graders.readPrompts(config.prompts, cmdObj.prompts ? void 0 : basePath);
|
|
7943
8095
|
if (cmdObj.filterPrompts) {
|
|
@@ -7973,7 +8125,7 @@ async function resolveConfigs(cmdObj, _defaultConfig, type) {
|
|
|
7973
8125
|
}
|
|
7974
8126
|
const parsedProviderPromptMap = require_graders.readProviderPromptMap({ providers: filteredProviderConfigs }, parsedPrompts);
|
|
7975
8127
|
if (parsedPrompts.length === 0) {
|
|
7976
|
-
require_logger.logger_default.error("No prompts found");
|
|
8128
|
+
require_logger.logger_default.error("No prompts found. Add a `prompts:` entry to your config or pass --prompts path/to/prompt.txt.");
|
|
7977
8129
|
process$1.default.exit(1);
|
|
7978
8130
|
}
|
|
7979
8131
|
const defaultTest = {
|
|
@@ -9581,6 +9733,7 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9581
9733
|
seen.add(key);
|
|
9582
9734
|
return true;
|
|
9583
9735
|
});
|
|
9736
|
+
const needsGoalExtraction = strategies.some((s) => require_providers.Strategies.find((def) => def.id === s.id)?.requiresGoalExtraction);
|
|
9584
9737
|
await require_providers.validateStrategies(strategies);
|
|
9585
9738
|
await validateSharpDependency(strategies, plugins);
|
|
9586
9739
|
const redteamProvider = await require_providers.redteamProviderManager.getProvider({ provider });
|
|
@@ -9772,13 +9925,15 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9772
9925
|
if (!Array.isArray(allPluginTests) || allPluginTests.length === 0) require_logger.logger_default.warn(`Failed to generate tests for ${plugin.id}`);
|
|
9773
9926
|
else {
|
|
9774
9927
|
const testCasesWithMetadata = allPluginTests;
|
|
9775
|
-
|
|
9776
|
-
|
|
9777
|
-
const
|
|
9778
|
-
|
|
9779
|
-
|
|
9780
|
-
|
|
9781
|
-
|
|
9928
|
+
if (needsGoalExtraction) {
|
|
9929
|
+
require_logger.logger_default.debug(`Extracting goal for ${testCasesWithMetadata.length} tests from ${plugin.id}...`);
|
|
9930
|
+
for (const testCase of testCasesWithMetadata) {
|
|
9931
|
+
const promptVar = testCase.vars?.[injectVar];
|
|
9932
|
+
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
9933
|
+
const policy = getPolicyText(testCase.metadata);
|
|
9934
|
+
const extractedGoal = await require_providers.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
9935
|
+
testCase.metadata.goal = extractedGoal;
|
|
9936
|
+
}
|
|
9782
9937
|
}
|
|
9783
9938
|
testCases.push(...testCasesWithMetadata);
|
|
9784
9939
|
}
|
|
@@ -9813,13 +9968,15 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9813
9968
|
...t.metadata || {}
|
|
9814
9969
|
}
|
|
9815
9970
|
}));
|
|
9816
|
-
|
|
9817
|
-
|
|
9818
|
-
const
|
|
9819
|
-
|
|
9820
|
-
|
|
9821
|
-
|
|
9822
|
-
|
|
9971
|
+
if (needsGoalExtraction) {
|
|
9972
|
+
require_logger.logger_default.debug(`Extracting goal for ${testCasesWithMetadata.length} custom tests from ${plugin.id}...`);
|
|
9973
|
+
for (const testCase of testCasesWithMetadata) {
|
|
9974
|
+
const promptVar = testCase.vars?.[injectVar];
|
|
9975
|
+
const prompt = Array.isArray(promptVar) ? promptVar[0] : String(promptVar);
|
|
9976
|
+
const policy = getPolicyText(testCase.metadata);
|
|
9977
|
+
const extractedGoal = await require_providers.extractGoalFromPrompt(prompt, purpose, plugin.id, policy);
|
|
9978
|
+
testCase.metadata.goal = extractedGoal;
|
|
9979
|
+
}
|
|
9823
9980
|
}
|
|
9824
9981
|
testCases.push(...testCasesWithMetadata);
|
|
9825
9982
|
require_logger.logger_default.debug(`Added ${customTests.length} custom test cases from ${plugin.id}`);
|
|
@@ -10842,17 +10999,7 @@ async function loadDefaultConfig(dir, configName = "promptfooconfig") {
|
|
|
10842
10999
|
if (configCache.has(cacheKey)) return configCache.get(cacheKey);
|
|
10843
11000
|
let defaultConfig = {};
|
|
10844
11001
|
let defaultConfigPath;
|
|
10845
|
-
for (const ext of
|
|
10846
|
-
"yaml",
|
|
10847
|
-
"yml",
|
|
10848
|
-
"json",
|
|
10849
|
-
"cjs",
|
|
10850
|
-
"cts",
|
|
10851
|
-
"js",
|
|
10852
|
-
"mjs",
|
|
10853
|
-
"mts",
|
|
10854
|
-
"ts"
|
|
10855
|
-
]) {
|
|
11002
|
+
for (const ext of DEFAULT_CONFIG_EXTENSIONS) {
|
|
10856
11003
|
const configPath = path.default.join(dir, `${configName}.${ext}`);
|
|
10857
11004
|
const maybeConfig = await maybeReadConfig(configPath);
|
|
10858
11005
|
if (maybeConfig) {
|
|
@@ -10958,11 +11105,31 @@ function formatDuration(seconds) {
|
|
|
10958
11105
|
* ```
|
|
10959
11106
|
*/
|
|
10960
11107
|
function generateEvalSummary(params) {
|
|
10961
|
-
const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker } = params;
|
|
11108
|
+
const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker, targetErrorStatus } = params;
|
|
10962
11109
|
const lines = [];
|
|
10963
11110
|
const completionType = isRedteam ? "Red team" : "Eval";
|
|
10964
|
-
const
|
|
11111
|
+
const wasAborted = targetErrorStatus != null;
|
|
11112
|
+
let completionMessage;
|
|
11113
|
+
if (wasAborted) {
|
|
11114
|
+
completionMessage = `${chalk.default.red("✗")} ${completionType} aborted`;
|
|
11115
|
+
if (writeToDatabase) completionMessage += ` (ID: ${chalk.default.cyan(evalId)})`;
|
|
11116
|
+
} else if (writeToDatabase && shareableUrl) completionMessage = `${chalk.default.green("✓")} ${completionType} complete: ${shareableUrl}`;
|
|
11117
|
+
else if (writeToDatabase && activelySharing) completionMessage = `${chalk.default.green("✓")} ${completionType} complete`;
|
|
11118
|
+
else if (writeToDatabase) completionMessage = `${chalk.default.green("✓")} ${completionType} complete (ID: ${chalk.default.cyan(evalId)})`;
|
|
11119
|
+
else completionMessage = `${chalk.default.green("✓")} ${completionType} complete`;
|
|
10965
11120
|
lines.push(completionMessage);
|
|
11121
|
+
if (wasAborted && targetErrorStatus != null) {
|
|
11122
|
+
lines.push("");
|
|
11123
|
+
lines.push(chalk.default.red.bold("Scan stopped: Target is unavailable and will not recover on retry."));
|
|
11124
|
+
lines.push(chalk.default.red(` Target returned HTTP ${targetErrorStatus}`));
|
|
11125
|
+
lines.push("");
|
|
11126
|
+
lines.push(chalk.default.yellow("Possible causes:"));
|
|
11127
|
+
lines.push(chalk.default.yellow(" • Invalid API key or authentication (401/403)"));
|
|
11128
|
+
lines.push(chalk.default.yellow(" • Target endpoint does not exist (404)"));
|
|
11129
|
+
lines.push(chalk.default.yellow(" • Server does not support the request (501)"));
|
|
11130
|
+
lines.push("");
|
|
11131
|
+
lines.push(chalk.default.cyan("To fix: Check your target configuration and credentials."));
|
|
11132
|
+
}
|
|
10966
11133
|
if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
|
|
10967
11134
|
lines.push("");
|
|
10968
11135
|
lines.push(`» View results: ${chalk.default.green.bold("promptfoo view")}`);
|
|
@@ -11205,6 +11372,21 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11205
11372
|
let testSuite = void 0;
|
|
11206
11373
|
let _basePath = void 0;
|
|
11207
11374
|
let commandLineOptions = void 0;
|
|
11375
|
+
const configArgs = Array.isArray(cmdObj.config) ? cmdObj.config : typeof cmdObj.config === "string" ? [cmdObj.config] : [];
|
|
11376
|
+
const uuidConfigArgs = configArgs.filter((configArg) => require_types.isUuid(configArg));
|
|
11377
|
+
if (configArgs.length > 1 && uuidConfigArgs.length > 0) throw new Error("Cloud config UUID mode supports exactly one -c value. Use: promptfoo eval -c <cloud-config-uuid>");
|
|
11378
|
+
if (configArgs.length === 1 && uuidConfigArgs.length === 1) {
|
|
11379
|
+
const cloudConfigId = uuidConfigArgs[0];
|
|
11380
|
+
if (cmdObj.watch) throw new Error("--watch is not supported when using a cloud config UUID with -c. Use a local config file path for watch mode.");
|
|
11381
|
+
try {
|
|
11382
|
+
defaultConfig = await require_providers.getEvalConfigFromCloud(cloudConfigId);
|
|
11383
|
+
} catch (error) {
|
|
11384
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
11385
|
+
throw new Error(`Failed to load cloud eval config "${cloudConfigId}". ${reason}. Cloud UUID inputs do not fall back to local file paths. Check authentication and that the UUID exists.`);
|
|
11386
|
+
}
|
|
11387
|
+
cmdObj.config = void 0;
|
|
11388
|
+
defaultConfigPath = void 0;
|
|
11389
|
+
}
|
|
11208
11390
|
const runEvaluation = async (initialization) => {
|
|
11209
11391
|
const startTime = Date.now();
|
|
11210
11392
|
require_telemetry.telemetry_default.record("command_used", {
|
|
@@ -11228,7 +11410,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11228
11410
|
...defaultConfig,
|
|
11229
11411
|
...dirConfig
|
|
11230
11412
|
};
|
|
11231
|
-
} else require_logger.logger_default.warn(`No configuration file found in directory: ${configPath}
|
|
11413
|
+
} else require_logger.logger_default.warn(`No configuration file found in directory: ${configPath}. Looked for promptfooconfig.{${DEFAULT_CONFIG_EXTENSIONS.join(",")}}. Run "${promptfooCommand("init")}" or pass --config path/to/promptfooconfig.yaml.`);
|
|
11232
11414
|
}
|
|
11233
11415
|
}
|
|
11234
11416
|
const resumeRaw = cmdObj.resume;
|
|
@@ -11508,6 +11690,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11508
11690
|
const isRedteam = Boolean(config.redteam);
|
|
11509
11691
|
const duration = Math.round((Date.now() - startTime) / 1e3);
|
|
11510
11692
|
const tracker = require_providers.TokenUsageTracker.getInstance();
|
|
11693
|
+
const targetErrorStatus = await evalRecord.findTargetErrorStatus();
|
|
11511
11694
|
const summaryLines = generateEvalSummary({
|
|
11512
11695
|
evalId: evalRecord.id,
|
|
11513
11696
|
isRedteam,
|
|
@@ -11523,7 +11706,8 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11523
11706
|
errors,
|
|
11524
11707
|
duration,
|
|
11525
11708
|
maxConcurrency,
|
|
11526
|
-
tracker
|
|
11709
|
+
tracker,
|
|
11710
|
+
targetErrorStatus
|
|
11527
11711
|
});
|
|
11528
11712
|
if (cmdObj.write && wantsToShare && !canShareEval) {
|
|
11529
11713
|
require_logger.logger_default.info(summaryLines[0]);
|
|
@@ -11578,7 +11762,7 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
|
|
|
11578
11762
|
if (initialization) {
|
|
11579
11763
|
const configPaths = (cmdObj.config || [defaultConfigPath]).filter(Boolean);
|
|
11580
11764
|
if (!configPaths.length) {
|
|
11581
|
-
require_logger.logger_default.error(
|
|
11765
|
+
require_logger.logger_default.error(`Could not locate config file(s) to watch. Pass --config path/to/promptfooconfig.yaml or run from a directory containing promptfooconfig.{${DEFAULT_CONFIG_EXTENSIONS.join(",")}}.`);
|
|
11582
11766
|
process.exitCode = 1;
|
|
11583
11767
|
return ret;
|
|
11584
11768
|
}
|
|
@@ -11736,6 +11920,7 @@ async function doRedteamRun(options) {
|
|
|
11736
11920
|
require_logger.logger_default.info("Generating test cases...");
|
|
11737
11921
|
const { maxConcurrency, ...passThroughOptions } = options;
|
|
11738
11922
|
let redteamConfig;
|
|
11923
|
+
const generationStartTime = Date.now();
|
|
11739
11924
|
try {
|
|
11740
11925
|
redteamConfig = await doGenerateRedteam({
|
|
11741
11926
|
...passThroughOptions,
|
|
@@ -11759,6 +11944,7 @@ async function doRedteamRun(options) {
|
|
|
11759
11944
|
}
|
|
11760
11945
|
throw error;
|
|
11761
11946
|
}
|
|
11947
|
+
const generationDurationMs = Date.now() - generationStartTime;
|
|
11762
11948
|
if (!redteamConfig || !fs.existsSync(redteamPath)) {
|
|
11763
11949
|
require_logger.logger_default.info("No test cases generated. Skipping scan.");
|
|
11764
11950
|
if (verboseToggleCleanup) verboseToggleCleanup();
|
|
@@ -11781,7 +11967,14 @@ async function doRedteamRun(options) {
|
|
|
11781
11967
|
abortSignal: options.abortSignal,
|
|
11782
11968
|
progressCallback: options.progressCallback
|
|
11783
11969
|
});
|
|
11784
|
-
|
|
11970
|
+
if (evalResult && generationDurationMs >= 0) {
|
|
11971
|
+
evalResult.setGenerationDurationMs(generationDurationMs);
|
|
11972
|
+
if (evalResult.persisted) await evalResult.save();
|
|
11973
|
+
const totalMs = evalResult.durationMs ?? 0;
|
|
11974
|
+
const evalMs = evalResult.evaluationDurationMs ?? 0;
|
|
11975
|
+
require_logger.logger_default.info(chalk.default.gray(`Total scan time: ${formatDuration(totalMs / 1e3)} (generation: ${formatDuration(generationDurationMs / 1e3)}, evaluation: ${formatDuration(evalMs / 1e3)})`));
|
|
11976
|
+
}
|
|
11977
|
+
if (evalResult ? await evalResult.findTargetErrorStatus() != null : false) {} else require_logger.logger_default.info(chalk.default.green("\nRed team scan complete!"));
|
|
11785
11978
|
if (!evalResult?.shared) if (options.liveRedteamConfig) require_logger.logger_default.info(chalk.default.blue(`To view the results, click the ${chalk.default.bold("View Report")} button or run ${chalk.default.bold(promptfooCommand("redteam report"))} on the command line.`));
|
|
11786
11979
|
else require_logger.logger_default.info(chalk.default.blue(`To view the results, run ${chalk.default.bold(promptfooCommand("redteam report"))}`));
|
|
11787
11980
|
require_logger.setLogCallback(null);
|