promptfoo 0.120.21 → 0.120.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-ChupNNB3.js → ListApp-55OAHww3.js} +1 -1
- package/dist/src/{accounts-CjU7QSnv.js → accounts-BMHY0nF5.js} +5 -5
- package/dist/src/{accounts-DYMHC4iY.js → accounts-CYqHD7X1.js} +4 -4
- package/dist/src/{accounts-uWRxEv-D.cjs → accounts-TGrHBZU9.cjs} +4 -4
- package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BcSgvBPs.js} +2 -2
- package/dist/src/{agents-BJxm0Kcs.cjs → agents-BW4zv9Ig.cjs} +3 -3
- package/dist/src/{agents-DABC7ROH.js → agents-BbG-lpzK.js} +6 -6
- package/dist/src/{agents-DGQULdoA.js → agents-CUB87CA8.js} +5 -5
- package/dist/src/{agents-CJ4BY_QU.js → agents-ClpNGuau.js} +7 -7
- package/dist/src/{agents-CzCD5974.cjs → agents-d76o9WLl.cjs} +6 -6
- package/dist/src/{agents-BNSS1kSZ.js → agents-v7_Jh18K.js} +3 -3
- package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-BBN3jHiV.js} +11 -11
- package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-DWjl0adX.cjs} +11 -11
- package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-E-FZoqH0.js} +14 -14
- package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
- package/dist/src/app/assets/{index-BVE859O5.js → index-DoWPf01I.js} +71 -71
- package/dist/src/app/index.html +2 -2
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-BT9aSeWg.js → audio-B0lJd7ce.js} +4 -4
- package/dist/src/{audio-CSrI_LFf.js → audio-CwbC4E6I.js} +3 -3
- package/dist/src/{audio-DSV364HB.cjs → audio-TfpCt0uC.cjs} +3 -3
- package/dist/src/{base-NFXflrMy.js → base-BHLggB84.js} +2 -2
- package/dist/src/{base-CRrQ15eu.cjs → base-B_pJcCDc.cjs} +2 -2
- package/dist/src/{base-d8hHSpEx.js → base-DRgXuujd.js} +2 -2
- package/dist/src/{blobs-Dznj-8xW.js → blobs-BHstP0Ep.js} +2 -2
- package/dist/src/{blobs-D-Neij7m.cjs → blobs-BPAqMoU_.cjs} +2 -2
- package/dist/src/{blobs-DC2sBLv8.js → blobs-Bey6TGVi.js} +2 -2
- package/dist/src/{cache-xb4oZe27.js → cache-BTdTl56j.js} +2 -2
- package/dist/src/{cache-DYJRe0Jt.cjs → cache-BnrNlhzE.cjs} +2 -2
- package/dist/src/{cache-aKR8pp5y.js → cache-C5LeIjZl.js} +2 -2
- package/dist/src/{cache-CMvgAnSi.js → cache-CsW_9RgN.js} +2 -2
- package/dist/src/cache-Da_mTEGH.cjs +6 -0
- package/dist/src/{cache-scuP1IRq.js → cache-DfI0xG_n.js} +3 -3
- package/dist/src/{chat-BbgGuvwu.js → chat-BIj6_WPH.js} +10 -10
- package/dist/src/{chat-CfJU9hHo.cjs → chat-B_-w6xgb.cjs} +10 -10
- package/dist/src/{chat-Du_GyqZe.js → chat-BktepvuQ.js} +13 -13
- package/dist/src/{chat-DyjTngG2.js → chat-CiakYWhZ.js} +10 -10
- package/dist/src/{chat-BH8Fxyap.js → chat-D5l2blZK.js} +8 -8
- package/dist/src/{chat-Biz16BQv.cjs → chat-QHQ-K-Fi.cjs} +8 -8
- package/dist/src/{chatkit-BpumZ2Dz.js → chatkit-DlOZ-eNb.js} +3 -3
- package/dist/src/{chatkit-D9AWcI4P.js → chatkit-IWIuhxFE.js} +3 -3
- package/dist/src/{chatkit-Wzo9zOq7.cjs → chatkit-yeVtURUi.cjs} +3 -3
- package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-BZ0GjlPY.js} +11 -11
- package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-D3AZJyZ7.cjs} +8 -8
- package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-wvsRgO29.js} +8 -8
- package/dist/src/{cloud-UwfmA0Ne.js → cloud-BKfo1A2t.js} +2 -2
- package/dist/src/{cloud-1cbcFXE9.js → cloud-DPowlnHl.js} +1 -1
- package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CzAtX1Wg.cjs} +11 -11
- package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-D-IEjw5k.js} +11 -11
- package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DxHmIJbZ.js} +14 -14
- package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-CIfquW2o.cjs} +12 -12
- package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-DkjC8LZF.js} +12 -12
- package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-DwJVgNWf.js} +15 -15
- package/dist/src/{codex-sdk-CKEQpQax.js → codex-sdk-BC5qbCjh.js} +3 -3
- package/dist/src/{codex-sdk-CDN8f3P9.js → codex-sdk-CBpm53B8.js} +2 -2
- package/dist/src/{codex-sdk-0QyM_fUf.cjs → codex-sdk-Cf5_CQch.cjs} +2 -2
- package/dist/src/{cometapi-CUq-lrPh.js → cometapi-6jLBOp4e.js} +15 -15
- package/dist/src/{cometapi-Bu_LOIac.js → cometapi-8HSWR5q_.js} +12 -12
- package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-iJEkEgpT.cjs} +12 -12
- package/dist/src/{completion-Dw-9jqz1.cjs → completion-C03fIkcR.cjs} +5 -5
- package/dist/src/{completion-H4ncr2bK.js → completion-CrKHW9hy.js} +5 -5
- package/dist/src/{completion-BTZ42lqL.js → completion-LTi0SiAy.js} +5 -5
- package/dist/src/{docker-kvFgJqkg.js → docker-Bf2VQOX5.js} +14 -14
- package/dist/src/{docker-NrVUpeWw.cjs → docker-BxmP6sGO.cjs} +11 -11
- package/dist/src/{docker-B8zxQ12-.js → docker-DHazKolQ.js} +11 -11
- package/dist/src/{esm-Bwuh5HXW.js → esm-Lh6czAFT.js} +1 -1
- package/dist/src/{eval-TAOD_pvN.js → eval-4sIXLVZZ.js} +31 -19
- package/dist/src/eval-s_E4q6a8.js +16 -0
- package/dist/src/{evalResult-DnYa1RLp.js → evalResult-CRyeHpst.js} +6 -6
- package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CSucevrC.cjs} +7 -7
- package/dist/src/evalResult-DBhbtqZn.js +11 -0
- package/dist/src/evalResult-DKn_nY-a.js +13 -0
- package/dist/src/{evalResult-CnAP64o9.js → evalResult-DNXvhaKp.js} +6 -6
- package/dist/src/evalResult-DrPpo-zg.cjs +11 -0
- package/dist/src/{evaluator-q2_--POi.js → evaluator-BDY-i4ky.js} +29 -30
- package/dist/src/evaluator-CiDt-6H3.js +37 -0
- package/dist/src/{extractor-D_1s-Thr.js → extractor-HnP0pHfO.js} +81 -4
- package/dist/src/{extractor-B9uQA-6-.js → extractor-WvMabjRY.js} +81 -4
- package/dist/src/{extractor-DjxI7POm.cjs → extractor-wSeR47-c.cjs} +86 -3
- package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-8fmzVUH3.js} +2 -2
- package/dist/src/{fetch-BIPrzD39.js → fetch-BMKSarK_.js} +11 -5
- package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-BgYKpAJ7.cjs} +25 -7
- package/dist/src/{fetch-Cbz8ESXD.js → fetch-CE0K47nJ.js} +1 -1
- package/dist/src/{fetch-TcCC0nEJ.js → fetch-DKQm7iwv.js} +11 -5
- package/dist/src/fetch-GeRV0LDI.cjs +4 -0
- package/dist/src/{fileExtensions-BpuMmaFL.js → fileExtensions-CzcPkiKZ.js} +1 -1
- package/dist/src/{formatDuration-CGa0ZDQ8.js → formatDuration-CCiZ-wBY.js} +1 -1
- package/dist/src/{genaiTracer-zK0Mtd_D.cjs → genaiTracer-BQ8AmajD.cjs} +1 -1
- package/dist/src/{genaiTracer-CNawybn-.js → genaiTracer-Ce19n68P.js} +1 -1
- package/dist/src/{genaiTracer-CqNnnXrE.js → genaiTracer-foKtbVa2.js} +1 -1
- package/dist/src/{graders-DycNkUZ_.js → graders-CEF_ce6x.js} +32 -26
- package/dist/src/graders-CzoftmVg.js +35 -0
- package/dist/src/{graders-C9Ni3pNQ.cjs → graders-DOFEkq6l.cjs} +34 -28
- package/dist/src/graders-DuTWgLQF.cjs +32 -0
- package/dist/src/graders-ZhEpUtuK.js +33 -0
- package/dist/src/{graders-CG50NgEB.js → graders-jFPLMexK.js} +36 -30
- package/dist/src/{image-CxD58J63.js → image-0RV3qqTK.js} +6 -6
- package/dist/src/{image-CHGpqpC6.js → image-9Thqn5aj.js} +3 -3
- package/dist/src/{image-BmMQZgoF.js → image-BJJW-zOa.js} +4 -4
- package/dist/src/{image-CPL1masV.cjs → image-BVyYHTQG.cjs} +3 -3
- package/dist/src/{image-Dg-oq_2F.js → image-Dd0EQRYU.js} +6 -6
- package/dist/src/{image-CpQ-fJt3.cjs → image-VWd0wZmz.cjs} +6 -6
- package/dist/src/index.cjs +177 -105
- package/dist/src/index.js +173 -101
- package/dist/src/{interactiveCheck-BZbMvE6i.js → interactiveCheck-CjK2R4rB.js} +1 -1
- package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BKA28wUv.cjs} +7 -7
- package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-D0nOGqRF.js} +7 -7
- package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase-aBIYgE4B.js} +8 -8
- package/dist/src/{litellm-Dcc3mt1B.js → litellm-BiXCNyII.js} +11 -11
- package/dist/src/{litellm-CR8i8YRn.js → litellm-QSopB7EE.js} +14 -14
- package/dist/src/{litellm-QthJT0fA.cjs → litellm-vZYPcEUQ.cjs} +11 -11
- package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-CRKJx6ue.cjs} +10 -11
- package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DmnPkNon.js} +10 -11
- package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-Dvnqlb9n.js} +9 -10
- package/dist/src/main.js +166 -106
- package/dist/src/{messages-DkV1dizA.js → messages-BaxI3gI5.js} +10 -10
- package/dist/src/{messages-D4Cu-Nxe.cjs → messages-CvFzyCty.cjs} +10 -10
- package/dist/src/{messages-9ik4NuJ_.js → messages-DT1yyWfS.js} +10 -10
- package/dist/src/{meteor-DLCdreag.js → meteor-0cLf9TIn.js} +1 -1
- package/dist/src/{meteor-Dj8cTkU_.js → meteor-CBciquOS.js} +1 -1
- package/dist/src/{meteor-odmwVbyG.cjs → meteor-DRuEfzuQ.cjs} +1 -1
- package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-CnN4D-bY.js} +10 -11
- package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-XAkfcmhf.cjs} +10 -11
- package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-YEEqZu4z.js} +9 -10
- package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-Ci-VR-K3.js} +6 -6
- package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-Cjfvm57T.js} +7 -7
- package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-brE_2yrZ.cjs} +6 -6
- package/dist/src/{openai-eZz8QsxY.cjs → openai-BjpdxIOG.cjs} +1 -1
- package/dist/src/{openai-CQozD_Ef.js → openai-CitF-gEN.js} +1 -1
- package/dist/src/{openai-D_5h8sBa.js → openai-Cv9pEKxp.js} +1 -1
- package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BkXJb52a.js} +4 -4
- package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-CrlvqSy2.cjs} +4 -4
- package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-DRMuqbLx.js} +6 -6
- package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-BCKiiwct.js} +5 -5
- package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-BTVVyjnA.js} +4 -4
- package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-DXqKPf2U.cjs} +4 -4
- package/dist/src/{providerRegistry-5UB8YVkk.js → providerRegistry-BdKWcUa8.js} +1 -1
- package/dist/src/{providerRegistry-gU7gOAJI.js → providerRegistry-D32Lt9vp.js} +1 -1
- package/dist/src/{providerRegistry-DFXamjVO.cjs → providerRegistry-HGQd2MF6.cjs} +1 -1
- package/dist/src/{providers-Bz0U4NGd.cjs → providers-BF4aullZ.cjs} +114 -132
- package/dist/src/providers-BmFckJq8.cjs +31 -0
- package/dist/src/{providers-CHciOV4f.js → providers-BtbT8bDb.js} +77 -95
- package/dist/src/{providers-D8qKkVu_.js → providers-CYssBaa_.js} +82 -100
- package/dist/src/providers-DaDZ1iQw.js +31 -0
- package/dist/src/providers-Ds-h-S1b.js +33 -0
- package/dist/src/{pythonUtils-mprm8p4h.js → pythonUtils-B9JA-gsC.js} +2 -2
- package/dist/src/{quiverai-DnlaJG8H.js → quiverai-BiDa0kiF.js} +10 -10
- package/dist/src/{quiverai-3rtm1glP.js → quiverai-DTtLknat.js} +13 -13
- package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-z8oPMSUZ.cjs} +10 -10
- package/dist/src/{render-D5t2ogwP.js → render-D2710HbA.js} +2 -2
- package/dist/src/rubyUtils-BLd6EE1u.cjs +5 -0
- package/dist/src/{rubyUtils-D1DBBgSr.js → rubyUtils-CVELPvUH.js} +2 -2
- package/dist/src/{rubyUtils-Cus5EGPX.js → rubyUtils-Dn6MGcsk.js} +2 -2
- package/dist/src/{rubyUtils-CI4Y5f08.js → rubyUtils-LBsk3zIm.js} +1 -1
- package/dist/src/{rubyUtils-CWkecmDB.js → rubyUtils-_t9Gmf7U.js} +1 -1
- package/dist/src/{rubyUtils-DCci_Y0f.cjs → rubyUtils-rnCVDgH-.cjs} +1 -1
- package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-D5f6K309.js} +11 -11
- package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DwJOnUlQ.cjs} +11 -11
- package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-dygdRScC.js} +17 -17
- package/dist/src/{scanner-CPosIITM.js → scanner-D__U7qyj.js} +9 -9
- package/dist/src/server/index.js +265 -129
- package/dist/src/server-B0dYFkhY.cjs +6 -0
- package/dist/src/{server-ByLwfSDf.cjs → server-BRPZ0ZqY.cjs} +3 -3
- package/dist/src/{server-Bb83s34G.js → server-BZJ_IEiK.js} +4 -4
- package/dist/src/{server-BKdsd70B.js → server-Bo3rtriL.js} +3 -3
- package/dist/src/server-DZYvmL1z.js +6 -0
- package/dist/src/server-DlTe_Yz5.js +8 -0
- package/dist/src/{signal-B8hvvb8w.js → signal-C9qT0qiq.js} +2 -2
- package/dist/src/{slack-BblBpsIz.cjs → slack-D1F9Y7CH.cjs} +1 -1
- package/dist/src/{slack-D-bNCtTa.js → slack-DkAF58Tr.js} +1 -1
- package/dist/src/{slack-Ba3gil_6.js → slack-Ed1yyt_j.js} +1 -1
- package/dist/src/{store-W4yZqv_e.js → store-BpNCvdGu.js} +2 -2
- package/dist/src/store-BvASVHpa.cjs +6 -0
- package/dist/src/{store-l9-mIRC-.cjs → store-BwsF10Db.cjs} +2 -2
- package/dist/src/store-CDxVyNyD.js +6 -0
- package/dist/src/store-CXxKOQ8V.js +7 -0
- package/dist/src/{store-CaNRmI5T.js → store-Dm8qSYzq.js} +2 -2
- package/dist/src/{tables-DZWWveaJ.js → tables-B0wvxP8N.js} +2 -2
- package/dist/src/{tables-BCcp2r48.js → tables-CMOPSC5_.js} +2 -2
- package/dist/src/{tables-D_VmN-ui.cjs → tables-D_nKFrHt.cjs} +2 -2
- package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-B9G9VzVT.cjs} +3 -3
- package/dist/src/telemetry-CGO4vCCz.js +8 -0
- package/dist/src/telemetry-CXarb10F.cjs +6 -0
- package/dist/src/telemetry-DKtQFHz8.js +6 -0
- package/dist/src/{telemetry-CmGyDaxF.js → telemetry-DMGVtU7z.js} +3 -3
- package/dist/src/{telemetry-CB8ltkOD.js → telemetry-oV0dEJ2S.js} +3 -3
- package/dist/src/{text-C1cXk9ij.js → text-DuYSUYPB.js} +1 -1
- package/dist/src/{text-Dm78AVGG.js → text-Dx0GJOCN.js} +1 -1
- package/dist/src/{text-DF2hMKdg.cjs → text-PYISqVm1.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-DFp2VFtG.cjs → tokenUsageUtils-BtZd3sP7.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-C9fIWtvn.js → tokenUsageUtils-DoinwgKF.js} +1 -1
- package/dist/src/{tokenUsageUtils-BQVp2uKF.js → tokenUsageUtils-cFdLMERB.js} +1 -1
- package/dist/src/{transcription-BExRps8_.js → transcription-C5sJed5S.js} +7 -7
- package/dist/src/{transcription-B4GbFkhb.cjs → transcription-DEiYnjqG.cjs} +7 -7
- package/dist/src/{transcription-CpuRKcIu.js → transcription-Dcv2GYgI.js} +10 -10
- package/dist/src/{transform-BzepVlkX.js → transform-BFPYuBaW.js} +4 -4
- package/dist/src/{transform-BwedQA8W.js → transform-Cpk3BVib.js} +3 -3
- package/dist/src/{transform-B-AYIQmM.js → transform-DWBbTh_M.js} +3 -3
- package/dist/src/{transform-1g7fEAge.js → transform-DZI2t8-9.js} +3 -3
- package/dist/src/{transform-B5HUD86U.cjs → transform-DeVdMzaA.cjs} +3 -3
- package/dist/src/{transformersAvailability-B5gZ2aG-.cjs → transformersAvailability-BaoWHpu1.cjs} +1 -1
- package/dist/src/{transformersAvailability-BHP0aLcv.js → transformersAvailability-DtpwoeFC.js} +1 -1
- package/dist/src/{transformersAvailability-CA173HU7.js → transformersAvailability-O2YaCv9Z.js} +1 -1
- package/dist/src/{types-n3ttAnOa.js → types-BZz0C6rq.js} +3 -18
- package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
- package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
- package/dist/src/{util-Cd2vI7ii.js → util-BR59kcWy.js} +3 -3
- package/dist/src/{util-BnF-xRPi.js → util-BjTDLTac.js} +3 -3
- package/dist/src/{util-CwsTKPXi.js → util-CSvDts2m.js} +3 -3
- package/dist/src/{util-KE6uFWHG.cjs → util-CfLcBZEA.cjs} +3 -3
- package/dist/src/{util-BXjq8vOq.js → util-Czrn_1en.js} +3 -3
- package/dist/src/{util-BT0p5c9X.cjs → util-Dxwavk84.cjs} +3 -3
- package/dist/src/{util-DX58t30T.js → util-GpiGNWSj.js} +17 -5
- package/dist/src/{util-B-Yo0qzc.cjs → util-RfCZM4TD.cjs} +22 -4
- package/dist/src/{util-CyzW5ayt.js → util-UxC1pDDt.js} +20 -8
- package/dist/src/{utils-HdRSruG0.js → utils-2CdNTp-a.js} +2 -2
- package/dist/src/{utils-BSpcJB4B.cjs → utils-6lWwNARy.cjs} +2 -2
- package/dist/src/{utils-BWOFfOel.js → utils-BMIgqY6C.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +31 -29
- package/dist/src/app/assets/index-CFq6nz43.css +0 -1
- package/dist/src/cache-BZYapLG-.cjs +0 -6
- package/dist/src/eval-g5SH3NBa.js +0 -16
- package/dist/src/evalResult-Bt2fmj0V.js +0 -9
- package/dist/src/evalResult-DRNH7Jui.js +0 -10
- package/dist/src/evalResult-DqLWyktN.cjs +0 -9
- package/dist/src/evaluator-DIenqH86.js +0 -37
- package/dist/src/fetch-gmneHrJc.cjs +0 -4
- package/dist/src/graders-BHcgLz2P.js +0 -35
- package/dist/src/graders-BjBhwJOb.cjs +0 -32
- package/dist/src/graders-DEjGX1uY.js +0 -33
- package/dist/src/providers-BBGwMMzj.js +0 -33
- package/dist/src/providers-BR5P_qep.cjs +0 -31
- package/dist/src/providers-CDib9hVp.js +0 -31
- package/dist/src/rubyUtils-DZo4ik6p.cjs +0 -5
- package/dist/src/server-B5aIhzYk.cjs +0 -6
- package/dist/src/server-DJCw85s0.js +0 -8
- package/dist/src/server-DKj0QIm9.js +0 -6
- package/dist/src/store-Bo9FEBPO.js +0 -7
- package/dist/src/store-CqHttBql.js +0 -6
- package/dist/src/store-fsxOJl5O.cjs +0 -6
- package/dist/src/telemetry-B2uJOvMA.js +0 -8
- package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
- package/dist/src/telemetry-rP_n_bnZ.js +0 -6
- package/dist/src/time-CRRzn6vA.js +0 -9
- package/dist/src/time-CbtsO5_a.cjs +0 -21
- package/dist/src/time-CoOwN3kQ.js +0 -10
package/dist/src/server/index.js
CHANGED
|
@@ -202,7 +202,7 @@ const HUMAN_ASSERTION_TYPE = "human";
|
|
|
202
202
|
* Application version from package.json.
|
|
203
203
|
* Injected at build time, or read from npm environment in development.
|
|
204
204
|
*/
|
|
205
|
-
const VERSION = "0.120.
|
|
205
|
+
const VERSION = "0.120.23";
|
|
206
206
|
/**
|
|
207
207
|
* PostHog analytics key.
|
|
208
208
|
* Only populated during production builds via PROMPTFOO_POSTHOG_KEY env var.
|
|
@@ -4775,21 +4775,6 @@ const CONFIGURABLE_STRATEGIES = [
|
|
|
4775
4775
|
];
|
|
4776
4776
|
const CONFIGURABLE_STRATEGIES_SET = new Set(CONFIGURABLE_STRATEGIES);
|
|
4777
4777
|
/**
|
|
4778
|
-
* Strategies that should not have language configuration applied to them.
|
|
4779
|
-
*/
|
|
4780
|
-
const LANGUAGE_DISALLOWED_STRATEGIES = new Set([
|
|
4781
|
-
"audio",
|
|
4782
|
-
"video",
|
|
4783
|
-
"image",
|
|
4784
|
-
"math-prompt"
|
|
4785
|
-
]);
|
|
4786
|
-
/**
|
|
4787
|
-
* Determines if a strategy should not use language configuration
|
|
4788
|
-
*/
|
|
4789
|
-
function isLanguageDisallowedStrategy(strategyId) {
|
|
4790
|
-
return strategyId ? LANGUAGE_DISALLOWED_STRATEGIES.has(strategyId) : false;
|
|
4791
|
-
}
|
|
4792
|
-
/**
|
|
4793
4778
|
* Default 'n' fan out for strategies that can add additional test cases during generation
|
|
4794
4779
|
*/
|
|
4795
4780
|
const DEFAULT_N_FAN_OUT_BY_STRATEGY = {
|
|
@@ -6101,7 +6086,11 @@ function calculateAttackSuccessRate(testCount, failCount) {
|
|
|
6101
6086
|
//#region src/redteam/sharedFrontend.ts
|
|
6102
6087
|
function getRiskCategorySeverityMap(plugins) {
|
|
6103
6088
|
const overrides = plugins?.reduce((acc, plugin) => {
|
|
6104
|
-
if (plugin.severity)
|
|
6089
|
+
if (plugin.severity) {
|
|
6090
|
+
acc[plugin.id] = plugin.severity;
|
|
6091
|
+
const policyId = plugin.config?.policy?.id;
|
|
6092
|
+
if (plugin.id === "policy" && policyId) acc[policyId] = plugin.severity;
|
|
6093
|
+
}
|
|
6105
6094
|
return acc;
|
|
6106
6095
|
}, {}) || {};
|
|
6107
6096
|
return {
|
|
@@ -6655,7 +6644,7 @@ function convertResultsToTable(eval_) {
|
|
|
6655
6644
|
description: result.description || void 0,
|
|
6656
6645
|
outputs: [],
|
|
6657
6646
|
vars: result.vars ? Object.values(varsForHeader).map((varName) => {
|
|
6658
|
-
const varValue = result.vars?.[varName]
|
|
6647
|
+
const varValue = result.vars?.[varName] ?? "";
|
|
6659
6648
|
if (typeof varValue === "string") return varValue;
|
|
6660
6649
|
return JSON.stringify(varValue, null, 2);
|
|
6661
6650
|
}).flat() : [],
|
|
@@ -6690,7 +6679,11 @@ function convertResultsToTable(eval_) {
|
|
|
6690
6679
|
varValuesForRow.set(result.testIdx, result.vars);
|
|
6691
6680
|
rowMap[result.testIdx] = row;
|
|
6692
6681
|
let resultText;
|
|
6693
|
-
const
|
|
6682
|
+
const rawOutput = result.response?.output;
|
|
6683
|
+
let outputTextDisplay;
|
|
6684
|
+
if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
|
|
6685
|
+
else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
|
|
6686
|
+
else outputTextDisplay = String(rawOutput);
|
|
6694
6687
|
if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
|
|
6695
6688
|
else resultText = `${outputTextDisplay}`;
|
|
6696
6689
|
else if (result.error) resultText = `${result.error}`;
|
|
@@ -6737,7 +6730,7 @@ function convertResultsToTable(eval_) {
|
|
|
6737
6730
|
const rows = Object.values(rowMap);
|
|
6738
6731
|
const sortedVars = [...varsForHeader].sort();
|
|
6739
6732
|
for (const row of rows) row.vars = sortedVars.map((varName) => {
|
|
6740
|
-
const varValue = varValuesForRow.get(row.testIdx)?.[varName]
|
|
6733
|
+
const varValue = varValuesForRow.get(row.testIdx)?.[varName] ?? "";
|
|
6741
6734
|
if (typeof varValue === "string") return varValue;
|
|
6742
6735
|
return JSON.stringify(varValue, null, 2);
|
|
6743
6736
|
});
|
|
@@ -6754,7 +6747,11 @@ function convertResultsToTable(eval_) {
|
|
|
6754
6747
|
//#region src/util/exportToFile/index.ts
|
|
6755
6748
|
function convertEvalResultToTableCell(result) {
|
|
6756
6749
|
let resultText;
|
|
6757
|
-
const
|
|
6750
|
+
const rawOutput = result.response?.output;
|
|
6751
|
+
let outputTextDisplay;
|
|
6752
|
+
if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
|
|
6753
|
+
else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
|
|
6754
|
+
else outputTextDisplay = String(rawOutput);
|
|
6758
6755
|
if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
|
|
6759
6756
|
else resultText = `${outputTextDisplay}`;
|
|
6760
6757
|
else if (result.error) resultText = `${result.error}`;
|
|
@@ -6800,11 +6797,12 @@ function convertTestResultsToTableRow(results, varsForHeader) {
|
|
|
6800
6797
|
outputs: [],
|
|
6801
6798
|
vars: Object.values(varsForHeader).map((varName) => {
|
|
6802
6799
|
if (varName === "sessionId") {
|
|
6803
|
-
const
|
|
6800
|
+
const sessionId = results[0].testCase.vars?.sessionId;
|
|
6801
|
+
const varValue = sessionId == null || sessionId === "" ? results[0].metadata?.sessionId ?? "" : sessionId;
|
|
6804
6802
|
if (typeof varValue === "string") return varValue;
|
|
6805
6803
|
return JSON.stringify(varValue);
|
|
6806
6804
|
}
|
|
6807
|
-
const varValue = results[0].testCase.vars?.[varName]
|
|
6805
|
+
const varValue = results[0].testCase.vars?.[varName] ?? "";
|
|
6808
6806
|
if (typeof varValue === "string") return varValue;
|
|
6809
6807
|
return JSON.stringify(varValue);
|
|
6810
6808
|
}).flat(),
|
|
@@ -7276,6 +7274,68 @@ async function recordBlobReference(hash, refContext) {
|
|
|
7276
7274
|
}).run();
|
|
7277
7275
|
}
|
|
7278
7276
|
|
|
7277
|
+
//#endregion
|
|
7278
|
+
//#region src/blobs/remoteUpload.ts
|
|
7279
|
+
function buildRemoteUrl() {
|
|
7280
|
+
const baseUrl = cloudConfig.getApiHost();
|
|
7281
|
+
const apiKey = cloudConfig.getApiKey();
|
|
7282
|
+
if (!baseUrl || !apiKey || !isLoggedIntoCloud()) return null;
|
|
7283
|
+
try {
|
|
7284
|
+
return new URL("/api/blobs", baseUrl).toString();
|
|
7285
|
+
} catch (error) {
|
|
7286
|
+
logger_default.debug("[RemoteBlob] Invalid remote blob URL", {
|
|
7287
|
+
error: error instanceof Error ? error.message : String(error),
|
|
7288
|
+
baseUrl
|
|
7289
|
+
});
|
|
7290
|
+
return null;
|
|
7291
|
+
}
|
|
7292
|
+
}
|
|
7293
|
+
function shouldAttemptRemoteBlobUpload() {
|
|
7294
|
+
return buildRemoteUrl() !== null;
|
|
7295
|
+
}
|
|
7296
|
+
async function uploadBlobRemote(buffer, mimeType, context) {
|
|
7297
|
+
const url = buildRemoteUrl();
|
|
7298
|
+
const apiKey = cloudConfig.getApiKey();
|
|
7299
|
+
if (!url || !apiKey) return null;
|
|
7300
|
+
try {
|
|
7301
|
+
const { fetchWithProxy } = await Promise.resolve().then(() => fetch_exports);
|
|
7302
|
+
const response = await fetchWithProxy(url, {
|
|
7303
|
+
method: "POST",
|
|
7304
|
+
headers: {
|
|
7305
|
+
"Content-Type": "application/json",
|
|
7306
|
+
Authorization: `Bearer ${apiKey}`
|
|
7307
|
+
},
|
|
7308
|
+
body: JSON.stringify({
|
|
7309
|
+
data: buffer.toString("base64"),
|
|
7310
|
+
mimeType,
|
|
7311
|
+
context
|
|
7312
|
+
})
|
|
7313
|
+
});
|
|
7314
|
+
if (response.status === 404 || response.status === 400) {
|
|
7315
|
+
logger_default.debug("[RemoteBlob] Remote blob upload unavailable", { status: response.status });
|
|
7316
|
+
return null;
|
|
7317
|
+
}
|
|
7318
|
+
if (!response.ok) {
|
|
7319
|
+
const text = await response.text();
|
|
7320
|
+
logger_default.debug("[RemoteBlob] Failed to upload blob", {
|
|
7321
|
+
status: response.status,
|
|
7322
|
+
statusText: response.statusText,
|
|
7323
|
+
body: text
|
|
7324
|
+
});
|
|
7325
|
+
return null;
|
|
7326
|
+
}
|
|
7327
|
+
const data = await response.json();
|
|
7328
|
+
if (!data?.ref?.hash) {
|
|
7329
|
+
logger_default.debug("[RemoteBlob] Remote upload returned malformed response");
|
|
7330
|
+
return null;
|
|
7331
|
+
}
|
|
7332
|
+
return data;
|
|
7333
|
+
} catch (error) {
|
|
7334
|
+
logger_default.debug("[RemoteBlob] Error uploading blob", { error: error instanceof Error ? error.message : String(error) });
|
|
7335
|
+
return null;
|
|
7336
|
+
}
|
|
7337
|
+
}
|
|
7338
|
+
|
|
7279
7339
|
//#endregion
|
|
7280
7340
|
//#region src/blobs/extractor.ts
|
|
7281
7341
|
const BLOB_URI_REGEX$1 = /^promptfoo:\/\/blob\/([a-f0-9]{64})$/i;
|
|
@@ -7350,11 +7410,24 @@ async function maybeStore(base64OrDataUrl, defaultMimeType, context, location, k
|
|
|
7350
7410
|
const parsed = parseBinary(base64OrDataUrl, defaultMimeType);
|
|
7351
7411
|
if (!parsed || !shouldExternalize(parsed.buffer)) return null;
|
|
7352
7412
|
if (!isBlobStorageEnabled()) return null;
|
|
7353
|
-
const
|
|
7413
|
+
const mimeType = parsed.mimeType || "application/octet-stream";
|
|
7414
|
+
const { ref } = await storeBlob(parsed.buffer, mimeType, {
|
|
7354
7415
|
...context,
|
|
7355
7416
|
location,
|
|
7356
7417
|
kind
|
|
7357
7418
|
});
|
|
7419
|
+
if (shouldAttemptRemoteBlobUpload()) uploadBlobRemote(parsed.buffer, mimeType, {
|
|
7420
|
+
evalId: context.evalId,
|
|
7421
|
+
testIdx: context.testIdx,
|
|
7422
|
+
promptIdx: context.promptIdx,
|
|
7423
|
+
location,
|
|
7424
|
+
kind
|
|
7425
|
+
}).catch((error) => {
|
|
7426
|
+
logger_default.debug("[BlobExtractor] Cloud upload failed (non-fatal)", {
|
|
7427
|
+
error: error instanceof Error ? error.message : String(error),
|
|
7428
|
+
hash: ref.hash
|
|
7429
|
+
});
|
|
7430
|
+
});
|
|
7358
7431
|
return ref;
|
|
7359
7432
|
}
|
|
7360
7433
|
async function externalizeDataUrls(value, context, location) {
|
|
@@ -11691,11 +11764,23 @@ function isRuntimeVar(key) {
|
|
|
11691
11764
|
* - Being in the explicit list (e.g., sessionId for backward compatibility)
|
|
11692
11765
|
*/
|
|
11693
11766
|
function filterRuntimeVars(vars) {
|
|
11694
|
-
if (!vars) return vars;
|
|
11767
|
+
if (!vars || typeof vars !== "object" || Array.isArray(vars)) return vars;
|
|
11695
11768
|
const filtered = {};
|
|
11696
11769
|
for (const [key, value] of Object.entries(vars)) if (!isRuntimeVar(key)) filtered[key] = value;
|
|
11697
11770
|
return filtered;
|
|
11698
11771
|
}
|
|
11772
|
+
/**
|
|
11773
|
+
* Extracts only runtime variables from a vars object.
|
|
11774
|
+
* This is the inverse of filterRuntimeVars.
|
|
11775
|
+
*
|
|
11776
|
+
* Used to restore runtime state when re-running filtered tests.
|
|
11777
|
+
*/
|
|
11778
|
+
function extractRuntimeVars(vars) {
|
|
11779
|
+
if (!vars || typeof vars !== "object" || Array.isArray(vars)) return;
|
|
11780
|
+
const extracted = {};
|
|
11781
|
+
for (const [key, value] of Object.entries(vars)) if (isRuntimeVar(key)) extracted[key] = value;
|
|
11782
|
+
return Object.keys(extracted).length > 0 ? extracted : void 0;
|
|
11783
|
+
}
|
|
11699
11784
|
function varsMatch(vars1, vars2) {
|
|
11700
11785
|
return deepEqual(vars1, vars2);
|
|
11701
11786
|
}
|
|
@@ -14189,7 +14274,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
14189
14274
|
...allTools.length > 0 ? { tools: allTools } : {},
|
|
14190
14275
|
...config.tool_choice ? { tool_choice: transformToolChoice(config.tool_choice, "anthropic") } : {},
|
|
14191
14276
|
...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
|
|
14192
|
-
...processedOutputFormat ? {
|
|
14277
|
+
...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
|
|
14193
14278
|
...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
|
|
14194
14279
|
};
|
|
14195
14280
|
logger_default.debug("Calling Anthropic Messages API", { params });
|
|
@@ -19708,26 +19793,6 @@ async function getDefaultProviders(env) {
|
|
|
19708
19793
|
//#region src/redteam/plugins/agentic/constants.ts
|
|
19709
19794
|
const REDTEAM_MEMORY_POISONING_PLUGIN_ID = "promptfoo:redteam:agentic:memory-poisoning";
|
|
19710
19795
|
|
|
19711
|
-
//#endregion
|
|
19712
|
-
//#region src/blobs/remoteUpload.ts
|
|
19713
|
-
function buildRemoteUrl() {
|
|
19714
|
-
const baseUrl = cloudConfig.getApiHost();
|
|
19715
|
-
const apiKey = cloudConfig.getApiKey();
|
|
19716
|
-
if (!baseUrl || !apiKey || !isLoggedIntoCloud()) return null;
|
|
19717
|
-
try {
|
|
19718
|
-
return new URL("/api/blobs", baseUrl).toString();
|
|
19719
|
-
} catch (error) {
|
|
19720
|
-
logger_default.debug("[RemoteBlob] Invalid remote blob URL", {
|
|
19721
|
-
error: error instanceof Error ? error.message : String(error),
|
|
19722
|
-
baseUrl
|
|
19723
|
-
});
|
|
19724
|
-
return null;
|
|
19725
|
-
}
|
|
19726
|
-
}
|
|
19727
|
-
function shouldAttemptRemoteBlobUpload() {
|
|
19728
|
-
return buildRemoteUrl() !== null;
|
|
19729
|
-
}
|
|
19730
|
-
|
|
19731
19796
|
//#endregion
|
|
19732
19797
|
//#region src/providers/promptfoo.ts
|
|
19733
19798
|
/**
|
|
@@ -23357,13 +23422,15 @@ function cleanupExpiredPageState() {
|
|
|
23357
23422
|
*/
|
|
23358
23423
|
async function checkExfilTracking(uuid, evalId) {
|
|
23359
23424
|
try {
|
|
23360
|
-
const
|
|
23425
|
+
const url = getRemoteGenerationUrl();
|
|
23426
|
+
const normalizedEvalId = evalId?.replace(/^eval-/, "");
|
|
23427
|
+
const response = await fetchWithRetries(url, {
|
|
23361
23428
|
method: "POST",
|
|
23362
23429
|
headers: { "Content-Type": "application/json" },
|
|
23363
23430
|
body: JSON.stringify({
|
|
23364
23431
|
task: "get-web-page-tracking",
|
|
23365
23432
|
uuid,
|
|
23366
|
-
evalId
|
|
23433
|
+
evalId: normalizedEvalId
|
|
23367
23434
|
})
|
|
23368
23435
|
}, 1e4);
|
|
23369
23436
|
if (!response.ok) {
|
|
@@ -23594,7 +23661,7 @@ async function transformForPerTurnLayer(testCases, injectVar, config) {
|
|
|
23594
23661
|
const attackPrompt = replaceUrlsWithExfilPlaceholder(rawAttackPrompt);
|
|
23595
23662
|
const goal = testCase.metadata?.goal;
|
|
23596
23663
|
const testCaseId = testCase.metadata?.testCaseId || testCase.metadata?.originalTestCaseId || (typeof goal === "string" ? `goal-${hashString(goal)}` : "unknown");
|
|
23597
|
-
const evalId = testCase.metadata?.evaluationId;
|
|
23664
|
+
const evalId = (testCase.metadata?.evaluationId)?.replace(/^eval-/, "");
|
|
23598
23665
|
const stateKey = evalId ? `${evalId}:${testCaseId}` : testCaseId;
|
|
23599
23666
|
let pageState = pageStateMap.get(stateKey);
|
|
23600
23667
|
let turnNumber;
|
|
@@ -25041,7 +25108,6 @@ async function textToAudio(text, language = "en", options) {
|
|
|
25041
25108
|
*/
|
|
25042
25109
|
async function addAudioToBase64(testCases, injectVar, config = {}) {
|
|
25043
25110
|
const audioTestCases = [];
|
|
25044
|
-
const language = config.language || "en";
|
|
25045
25111
|
const evalId = config.evalId;
|
|
25046
25112
|
let progressBar;
|
|
25047
25113
|
if (logger_default.level !== "debug") {
|
|
@@ -25055,7 +25121,7 @@ async function addAudioToBase64(testCases, injectVar, config = {}) {
|
|
|
25055
25121
|
for (const testCase of testCases) {
|
|
25056
25122
|
invariant(testCase.vars, `Audio encoding: testCase.vars is required, but got ${JSON.stringify(testCase)}`);
|
|
25057
25123
|
const originalText = String(testCase.vars[injectVar]);
|
|
25058
|
-
const audioResult = await textToAudio(originalText, language, { evalId });
|
|
25124
|
+
const audioResult = await textToAudio(originalText, testCase.metadata?.language || testCase.metadata?.modifiers?.language || config.language || "en", { evalId });
|
|
25059
25125
|
audioTestCases.push({
|
|
25060
25126
|
...testCase,
|
|
25061
25127
|
assert: testCase.assert?.map((assertion) => ({
|
|
@@ -28674,7 +28740,7 @@ var IndirectWebPwnProvider = class {
|
|
|
28674
28740
|
const goal = context?.test?.metadata?.goal || (typeof injectVarValue === "string" ? injectVarValue : void 0);
|
|
28675
28741
|
const purpose = context?.test?.metadata?.purpose;
|
|
28676
28742
|
const testCaseId = context?.test?.metadata?.testCaseId || `scan-${this.config.scanId}`;
|
|
28677
|
-
const evalId = context?.evaluationId;
|
|
28743
|
+
const evalId = context?.evaluationId?.replace(/^eval-/, "");
|
|
28678
28744
|
logger_default.debug("[IndirectWebPwn] Starting attack", {
|
|
28679
28745
|
goal,
|
|
28680
28746
|
purpose,
|
|
@@ -30868,6 +30934,7 @@ var SimulatedUser = class {
|
|
|
30868
30934
|
}
|
|
30869
30935
|
async callApi(_prompt, context, _callApiOptions) {
|
|
30870
30936
|
invariant(context?.originalProvider, "Expected originalProvider to be set");
|
|
30937
|
+
const targetProvider = context.originalProvider;
|
|
30871
30938
|
const instructions = getNunjucksEngine().renderString(this.rawInstructions, context?.vars);
|
|
30872
30939
|
const userProvider = new PromptfooSimulatedUserProvider({ instructions }, this.taskId);
|
|
30873
30940
|
logger_default.debug(`[SimulatedUser] Formatted user instructions: ${instructions}`);
|
|
@@ -30883,7 +30950,7 @@ var SimulatedUser = class {
|
|
|
30883
30950
|
let agentResponse;
|
|
30884
30951
|
if ((messages.length > 0 ? messages[messages.length - 1].role : null) === "user") {
|
|
30885
30952
|
logger_default.debug("[SimulatedUser] Initial messages end with user message, getting agent response first");
|
|
30886
|
-
agentResponse = await this.sendMessageToAgent(messages,
|
|
30953
|
+
agentResponse = await this.sendMessageToAgent(messages, targetProvider, context);
|
|
30887
30954
|
if (agentResponse.error) return {
|
|
30888
30955
|
error: agentResponse.error,
|
|
30889
30956
|
tokenUsage
|
|
@@ -30906,7 +30973,7 @@ var SimulatedUser = class {
|
|
|
30906
30973
|
const lastMessage = messagesToUser[messagesToUser.length - 1];
|
|
30907
30974
|
if (lastMessage.content && typeof lastMessage.content === "string" && lastMessage.content.includes("###STOP###")) break;
|
|
30908
30975
|
messages.push(lastMessage);
|
|
30909
|
-
agentResponse = await this.sendMessageToAgent(messagesToUser,
|
|
30976
|
+
agentResponse = await this.sendMessageToAgent(messagesToUser, targetProvider, context);
|
|
30910
30977
|
if (agentResponse.error) return {
|
|
30911
30978
|
error: agentResponse.error,
|
|
30912
30979
|
tokenUsage
|
|
@@ -45015,11 +45082,12 @@ var PythonProvider = class {
|
|
|
45015
45082
|
}
|
|
45016
45083
|
return parsedResult;
|
|
45017
45084
|
} else {
|
|
45018
|
-
|
|
45019
|
-
|
|
45020
|
-
delete
|
|
45021
|
-
delete
|
|
45022
|
-
delete
|
|
45085
|
+
const sanitizedContext = context ? { ...context } : void 0;
|
|
45086
|
+
if (sanitizedContext) {
|
|
45087
|
+
delete sanitizedContext.getCache;
|
|
45088
|
+
delete sanitizedContext.logger;
|
|
45089
|
+
delete sanitizedContext.filters;
|
|
45090
|
+
delete sanitizedContext.originalProvider;
|
|
45023
45091
|
}
|
|
45024
45092
|
const optionsWithProcessedConfig = {
|
|
45025
45093
|
...this.options,
|
|
@@ -45031,7 +45099,7 @@ var PythonProvider = class {
|
|
|
45031
45099
|
const args = apiType === "call_api" ? [
|
|
45032
45100
|
prompt,
|
|
45033
45101
|
optionsWithProcessedConfig,
|
|
45034
|
-
|
|
45102
|
+
sanitizedContext
|
|
45035
45103
|
] : [prompt, optionsWithProcessedConfig];
|
|
45036
45104
|
logger_default.debug(`Executing python script ${absPath} via worker pool with args: ${safeJsonStringify(args)}`);
|
|
45037
45105
|
const functionName = this.functionName || apiType;
|
|
@@ -54604,7 +54672,7 @@ var RedteamPluginBase = class RedteamPluginBase {
|
|
|
54604
54672
|
logger_default.error(`Malformed response from API provider: Expected generatedPrompts to be a string, got ${typeof generatedPrompts}: ${JSON.stringify(generatedPrompts)}`);
|
|
54605
54673
|
return [];
|
|
54606
54674
|
}
|
|
54607
|
-
if (isBasicRefusal(generatedPrompts)) {
|
|
54675
|
+
if (!(/prompt\s*:/i.test(generatedPrompts) || generatedPrompts.includes("PromptBlock:") || /<Prompt>/i.test(generatedPrompts)) && isBasicRefusal(generatedPrompts)) {
|
|
54608
54676
|
let message = `${this.provider.id()} returned a refusal during inference for ${this.constructor.name} test case generation.`;
|
|
54609
54677
|
const context = {};
|
|
54610
54678
|
if (this.purpose) context.purpose = this.purpose;
|
|
@@ -56023,16 +56091,15 @@ var DataExfilGrader = class extends RedteamGraderBase {
|
|
|
56023
56091
|
});
|
|
56024
56092
|
}
|
|
56025
56093
|
}
|
|
56026
|
-
if (wasExfiltrated
|
|
56094
|
+
if (wasExfiltrated) {
|
|
56027
56095
|
const grade = {
|
|
56028
|
-
pass:
|
|
56029
|
-
score:
|
|
56030
|
-
reason:
|
|
56096
|
+
pass: false,
|
|
56097
|
+
score: 0,
|
|
56098
|
+
reason: `Data exfiltration detected: ${exfilCount} request(s) to exfil endpoint. Query params: ${JSON.stringify(exfilRecords.map((r) => r.queryParams))}`
|
|
56031
56099
|
};
|
|
56032
|
-
logger_default.debug("[DataExfilGrader] Deterministic grading result", {
|
|
56100
|
+
logger_default.debug("[DataExfilGrader] Deterministic grading result: FAIL", {
|
|
56033
56101
|
wasExfiltrated,
|
|
56034
|
-
exfilCount
|
|
56035
|
-
pass: grade.pass
|
|
56102
|
+
exfilCount
|
|
56036
56103
|
});
|
|
56037
56104
|
return {
|
|
56038
56105
|
grade,
|
|
@@ -56045,8 +56112,16 @@ var DataExfilGrader = class extends RedteamGraderBase {
|
|
|
56045
56112
|
})
|
|
56046
56113
|
};
|
|
56047
56114
|
}
|
|
56048
|
-
logger_default.
|
|
56049
|
-
|
|
56115
|
+
if (wasExfiltrated === false) logger_default.debug("[DataExfilGrader] Server-side shows no exfil, falling through to LLM grading for output check");
|
|
56116
|
+
if (wasExfiltrated === void 0) logger_default.info("[DataExfilGrader] No server-side tracking data, using LLM grading");
|
|
56117
|
+
const testWithOutput = {
|
|
56118
|
+
...test,
|
|
56119
|
+
metadata: {
|
|
56120
|
+
...test.metadata,
|
|
56121
|
+
output: llmOutput
|
|
56122
|
+
}
|
|
56123
|
+
};
|
|
56124
|
+
return super.getResult(prompt, llmOutput, testWithOutput, provider, renderedValue, additionalRubric, skipRefusalCheck, gradingContext);
|
|
56050
56125
|
}
|
|
56051
56126
|
};
|
|
56052
56127
|
|
|
@@ -67890,12 +67965,35 @@ async function filterTestsByResults(testSuite, pathOrId, filterFn) {
|
|
|
67890
67965
|
if (filteredResults.length === 0) return [];
|
|
67891
67966
|
const uniqueVarsInResults = new Set(filteredResults.map((r) => JSON.stringify(filterRuntimeVars(r.vars))));
|
|
67892
67967
|
logger_default.debug(`[filterTestsByResults] ${uniqueVarsInResults.size} unique test cases (by vars) in filtered results`);
|
|
67893
|
-
const matchedTests = [
|
|
67968
|
+
const matchedTests = [];
|
|
67969
|
+
for (const test of testSuite.tests) {
|
|
67894
67970
|
const testWithDefaults = mergeDefaultVars(test, testSuite.defaultTest);
|
|
67895
|
-
|
|
67896
|
-
if (
|
|
67897
|
-
|
|
67898
|
-
|
|
67971
|
+
let matchedResult = filteredResults.find((result) => resultIsForTestCase(result, testWithDefaults) && extractRuntimeVars(result.vars) !== void 0);
|
|
67972
|
+
if (!matchedResult) matchedResult = filteredResults.find((result) => resultIsForTestCase(result, testWithDefaults));
|
|
67973
|
+
if (!matchedResult) {
|
|
67974
|
+
if (testSuite.defaultTest && typeof testSuite.defaultTest !== "string" && testSuite.defaultTest.vars && Object.keys(testSuite.defaultTest.vars).length > 0) {
|
|
67975
|
+
matchedResult = filteredResults.find((result) => resultIsForTestCase(result, test) && extractRuntimeVars(result.vars) !== void 0);
|
|
67976
|
+
if (!matchedResult) matchedResult = filteredResults.find((result) => resultIsForTestCase(result, test));
|
|
67977
|
+
}
|
|
67978
|
+
}
|
|
67979
|
+
if (matchedResult) {
|
|
67980
|
+
const runtimeVars = extractRuntimeVars(matchedResult.vars);
|
|
67981
|
+
if (runtimeVars) {
|
|
67982
|
+
const testWithRuntimeVars = {
|
|
67983
|
+
...test,
|
|
67984
|
+
vars: {
|
|
67985
|
+
...test.vars,
|
|
67986
|
+
...runtimeVars
|
|
67987
|
+
}
|
|
67988
|
+
};
|
|
67989
|
+
logger_default.debug("[filterTestsByResults] Restored runtime vars for test", { varKeys: Object.keys(runtimeVars) });
|
|
67990
|
+
matchedTests.push(testWithRuntimeVars);
|
|
67991
|
+
} else {
|
|
67992
|
+
logger_default.debug("[filterTestsByResults] Matched test has no runtime vars to restore");
|
|
67993
|
+
matchedTests.push(test);
|
|
67994
|
+
}
|
|
67995
|
+
}
|
|
67996
|
+
}
|
|
67899
67997
|
logger_default.debug(`[filterTestsByResults] Matched ${matchedTests.length} tests out of ${testSuite.tests.length} in test suite`);
|
|
67900
67998
|
const extractedTests = [];
|
|
67901
67999
|
const matchedResultKeys = /* @__PURE__ */ new Set();
|
|
@@ -70043,8 +70141,9 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
70043
70141
|
logger_default.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
|
|
70044
70142
|
return [];
|
|
70045
70143
|
}
|
|
70144
|
+
const { graderExamples, ...configForRemote } = config ?? {};
|
|
70046
70145
|
const body = JSON.stringify({
|
|
70047
|
-
config,
|
|
70146
|
+
config: configForRemote,
|
|
70048
70147
|
injectVar,
|
|
70049
70148
|
inputs: config?.inputs,
|
|
70050
70149
|
n,
|
|
@@ -70733,11 +70832,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
70733
70832
|
});
|
|
70734
70833
|
await validateStrategies(strategies);
|
|
70735
70834
|
await validateSharpDependency(strategies, plugins);
|
|
70736
|
-
if (strategies.some((s) => isLanguageDisallowedStrategy(s.id)) && language) {
|
|
70737
|
-
const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
|
|
70738
|
-
language = "en";
|
|
70739
|
-
logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
|
|
70740
|
-
}
|
|
70741
70835
|
const redteamProvider = await redteamProviderManager.getProvider({ provider });
|
|
70742
70836
|
const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
|
|
70743
70837
|
logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.yellow(plugins.map((p) => {
|
|
@@ -71787,60 +71881,101 @@ async function deleteErrorResults(resultIds) {
|
|
|
71787
71881
|
await getDb().delete(evalResultsTable).where(inArray(evalResultsTable.id, resultIds));
|
|
71788
71882
|
logger_default.debug(`Deleted ${resultIds.length} error results from database`);
|
|
71789
71883
|
}
|
|
71884
|
+
const RECALCULATE_BATCH_SIZE = 1e3;
|
|
71790
71885
|
/**
|
|
71791
|
-
* Recalculates prompt metrics based on current results after ERROR results have been deleted
|
|
71886
|
+
* Recalculates prompt metrics based on current results after ERROR results have been deleted.
|
|
71887
|
+
* Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
|
|
71792
71888
|
*/
|
|
71793
71889
|
async function recalculatePromptMetrics(evalRecord) {
|
|
71794
71890
|
logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
|
|
71795
|
-
|
|
71891
|
+
const startTime = Date.now();
|
|
71892
|
+
let batchNumber = 0;
|
|
71893
|
+
let totalProcessed = 0;
|
|
71796
71894
|
const promptMetricsMap = /* @__PURE__ */ new Map();
|
|
71797
|
-
for (const
|
|
71798
|
-
|
|
71799
|
-
|
|
71800
|
-
|
|
71801
|
-
|
|
71802
|
-
|
|
71803
|
-
|
|
71804
|
-
|
|
71805
|
-
|
|
71806
|
-
|
|
71807
|
-
|
|
71808
|
-
|
|
71809
|
-
|
|
71810
|
-
|
|
71811
|
-
|
|
71812
|
-
|
|
71813
|
-
|
|
71814
|
-
|
|
71815
|
-
|
|
71816
|
-
|
|
71817
|
-
|
|
71818
|
-
|
|
71819
|
-
|
|
71820
|
-
|
|
71821
|
-
|
|
71822
|
-
|
|
71823
|
-
|
|
71824
|
-
|
|
71825
|
-
|
|
71826
|
-
|
|
71827
|
-
|
|
71828
|
-
|
|
71829
|
-
|
|
71830
|
-
|
|
71831
|
-
|
|
71832
|
-
|
|
71833
|
-
|
|
71834
|
-
|
|
71835
|
-
|
|
71836
|
-
|
|
71837
|
-
|
|
71838
|
-
|
|
71895
|
+
for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
|
|
71896
|
+
score: 0,
|
|
71897
|
+
testPassCount: 0,
|
|
71898
|
+
testFailCount: 0,
|
|
71899
|
+
testErrorCount: 0,
|
|
71900
|
+
assertPassCount: 0,
|
|
71901
|
+
assertFailCount: 0,
|
|
71902
|
+
totalLatencyMs: 0,
|
|
71903
|
+
tokenUsage: createEmptyTokenUsage(),
|
|
71904
|
+
namedScores: {},
|
|
71905
|
+
namedScoresCount: {},
|
|
71906
|
+
cost: 0
|
|
71907
|
+
});
|
|
71908
|
+
let currentResultId;
|
|
71909
|
+
try {
|
|
71910
|
+
for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
|
|
71911
|
+
batchNumber++;
|
|
71912
|
+
logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
|
|
71913
|
+
for (const result of batch) {
|
|
71914
|
+
currentResultId = result.id;
|
|
71915
|
+
const metrics = promptMetricsMap.get(result.promptIdx);
|
|
71916
|
+
if (!metrics) {
|
|
71917
|
+
logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
|
|
71918
|
+
resultId: result.id,
|
|
71919
|
+
evalId: evalRecord.id
|
|
71920
|
+
});
|
|
71921
|
+
continue;
|
|
71922
|
+
}
|
|
71923
|
+
if (result.success) metrics.testPassCount++;
|
|
71924
|
+
else if (result.failureReason === ResultFailureReason.ERROR) metrics.testErrorCount++;
|
|
71925
|
+
else metrics.testFailCount++;
|
|
71926
|
+
metrics.score += result.score ?? 0;
|
|
71927
|
+
metrics.totalLatencyMs += result.latencyMs || 0;
|
|
71928
|
+
metrics.cost += result.cost || 0;
|
|
71929
|
+
for (const [key, value] of Object.entries(result.namedScores || {})) {
|
|
71930
|
+
metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
|
|
71931
|
+
const testVars = result.testCase?.vars || {};
|
|
71932
|
+
let contributingAssertions = 0;
|
|
71933
|
+
result.gradingResult?.componentResults?.forEach((componentResult) => {
|
|
71934
|
+
if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
|
|
71935
|
+
});
|
|
71936
|
+
metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
|
|
71937
|
+
}
|
|
71938
|
+
if (result.gradingResult?.componentResults) {
|
|
71939
|
+
metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
|
|
71940
|
+
metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
|
|
71941
|
+
}
|
|
71942
|
+
if (result.response?.tokenUsage) accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
|
|
71943
|
+
if (result.gradingResult?.tokensUsed) {
|
|
71944
|
+
if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = createEmptyAssertions();
|
|
71945
|
+
accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
|
|
71946
|
+
}
|
|
71947
|
+
}
|
|
71948
|
+
totalProcessed += batch.length;
|
|
71839
71949
|
}
|
|
71950
|
+
} catch (error) {
|
|
71951
|
+
logger_default.error("Error during batched metrics recalculation", {
|
|
71952
|
+
phase: "calculation",
|
|
71953
|
+
batchNumber,
|
|
71954
|
+
totalProcessed,
|
|
71955
|
+
currentResultId,
|
|
71956
|
+
evalId: evalRecord.id,
|
|
71957
|
+
error
|
|
71958
|
+
});
|
|
71959
|
+
throw error;
|
|
71840
71960
|
}
|
|
71841
71961
|
for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
|
|
71842
|
-
if (evalRecord.persisted)
|
|
71843
|
-
|
|
71962
|
+
if (evalRecord.persisted) try {
|
|
71963
|
+
await evalRecord.addPrompts(evalRecord.prompts);
|
|
71964
|
+
} catch (error) {
|
|
71965
|
+
logger_default.error("Error saving recalculated prompt metrics", {
|
|
71966
|
+
phase: "save",
|
|
71967
|
+
evalId: evalRecord.id,
|
|
71968
|
+
promptCount: evalRecord.prompts.length,
|
|
71969
|
+
error
|
|
71970
|
+
});
|
|
71971
|
+
throw error;
|
|
71972
|
+
}
|
|
71973
|
+
const durationMs = Date.now() - startTime;
|
|
71974
|
+
logger_default.debug("Prompt metrics recalculation completed", {
|
|
71975
|
+
totalBatches: batchNumber,
|
|
71976
|
+
totalResults: totalProcessed,
|
|
71977
|
+
durationMs
|
|
71978
|
+
});
|
|
71844
71979
|
}
|
|
71845
71980
|
|
|
71846
71981
|
//#endregion
|
|
@@ -76827,14 +76962,15 @@ async function startServer(port = getDefaultPort(), browserBehavior = BrowserBeh
|
|
|
76827
76962
|
const results = await updatedEval?.getResultsCount();
|
|
76828
76963
|
if (results && results > 0) {
|
|
76829
76964
|
logger_default.debug(`Emitting update for eval: ${updatedEval?.config?.description || updatedEval?.id || "unknown"}`);
|
|
76830
|
-
io.emit("update", updatedEval);
|
|
76965
|
+
io.emit("update", { evalId: updatedEval?.id });
|
|
76831
76966
|
allPrompts = null;
|
|
76832
76967
|
}
|
|
76833
76968
|
};
|
|
76834
76969
|
handleSignalUpdate();
|
|
76835
76970
|
});
|
|
76836
76971
|
io.on("connection", async (socket) => {
|
|
76837
|
-
|
|
76972
|
+
const latestEval = await Eval.latest();
|
|
76973
|
+
socket.emit("init", latestEval ? { evalId: latestEval.id } : null);
|
|
76838
76974
|
});
|
|
76839
76975
|
return new Promise((resolve) => {
|
|
76840
76976
|
httpServer.listen(port, () => {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
const require_logger = require('./logger-CMp-NS-e.cjs');
|
|
2
|
-
const require_fetch = require('./fetch-
|
|
3
|
-
const require_accounts = require('./accounts-
|
|
2
|
+
const require_fetch = require('./fetch-BgYKpAJ7.cjs');
|
|
3
|
+
const require_accounts = require('./accounts-TGrHBZU9.cjs');
|
|
4
4
|
let chalk = require("chalk");
|
|
5
5
|
chalk = require_logger.__toESM(chalk);
|
|
6
6
|
require("opener");
|
|
@@ -246,4 +246,4 @@ Object.defineProperty(exports, 'shouldGenerateRemote', {
|
|
|
246
246
|
return shouldGenerateRemote;
|
|
247
247
|
}
|
|
248
248
|
});
|
|
249
|
-
//# sourceMappingURL=server-
|
|
249
|
+
//# sourceMappingURL=server-BRPZ0ZqY.cjs.map
|