promptfoo 0.120.21 → 0.120.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-ChupNNB3.js → ListApp-55OAHww3.js} +1 -1
- package/dist/src/{accounts-CjU7QSnv.js → accounts-BMHY0nF5.js} +5 -5
- package/dist/src/{accounts-DYMHC4iY.js → accounts-CYqHD7X1.js} +4 -4
- package/dist/src/{accounts-uWRxEv-D.cjs → accounts-TGrHBZU9.cjs} +4 -4
- package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BcSgvBPs.js} +2 -2
- package/dist/src/{agents-BJxm0Kcs.cjs → agents-BW4zv9Ig.cjs} +3 -3
- package/dist/src/{agents-DABC7ROH.js → agents-BbG-lpzK.js} +6 -6
- package/dist/src/{agents-DGQULdoA.js → agents-CUB87CA8.js} +5 -5
- package/dist/src/{agents-CJ4BY_QU.js → agents-ClpNGuau.js} +7 -7
- package/dist/src/{agents-CzCD5974.cjs → agents-d76o9WLl.cjs} +6 -6
- package/dist/src/{agents-BNSS1kSZ.js → agents-v7_Jh18K.js} +3 -3
- package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-BBN3jHiV.js} +11 -11
- package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-DWjl0adX.cjs} +11 -11
- package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-E-FZoqH0.js} +14 -14
- package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
- package/dist/src/app/assets/{index-BVE859O5.js → index-DoWPf01I.js} +71 -71
- package/dist/src/app/index.html +2 -2
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-BT9aSeWg.js → audio-B0lJd7ce.js} +4 -4
- package/dist/src/{audio-CSrI_LFf.js → audio-CwbC4E6I.js} +3 -3
- package/dist/src/{audio-DSV364HB.cjs → audio-TfpCt0uC.cjs} +3 -3
- package/dist/src/{base-NFXflrMy.js → base-BHLggB84.js} +2 -2
- package/dist/src/{base-CRrQ15eu.cjs → base-B_pJcCDc.cjs} +2 -2
- package/dist/src/{base-d8hHSpEx.js → base-DRgXuujd.js} +2 -2
- package/dist/src/{blobs-Dznj-8xW.js → blobs-BHstP0Ep.js} +2 -2
- package/dist/src/{blobs-D-Neij7m.cjs → blobs-BPAqMoU_.cjs} +2 -2
- package/dist/src/{blobs-DC2sBLv8.js → blobs-Bey6TGVi.js} +2 -2
- package/dist/src/{cache-xb4oZe27.js → cache-BTdTl56j.js} +2 -2
- package/dist/src/{cache-DYJRe0Jt.cjs → cache-BnrNlhzE.cjs} +2 -2
- package/dist/src/{cache-aKR8pp5y.js → cache-C5LeIjZl.js} +2 -2
- package/dist/src/{cache-CMvgAnSi.js → cache-CsW_9RgN.js} +2 -2
- package/dist/src/cache-Da_mTEGH.cjs +6 -0
- package/dist/src/{cache-scuP1IRq.js → cache-DfI0xG_n.js} +3 -3
- package/dist/src/{chat-BbgGuvwu.js → chat-BIj6_WPH.js} +10 -10
- package/dist/src/{chat-CfJU9hHo.cjs → chat-B_-w6xgb.cjs} +10 -10
- package/dist/src/{chat-Du_GyqZe.js → chat-BktepvuQ.js} +13 -13
- package/dist/src/{chat-DyjTngG2.js → chat-CiakYWhZ.js} +10 -10
- package/dist/src/{chat-BH8Fxyap.js → chat-D5l2blZK.js} +8 -8
- package/dist/src/{chat-Biz16BQv.cjs → chat-QHQ-K-Fi.cjs} +8 -8
- package/dist/src/{chatkit-BpumZ2Dz.js → chatkit-DlOZ-eNb.js} +3 -3
- package/dist/src/{chatkit-D9AWcI4P.js → chatkit-IWIuhxFE.js} +3 -3
- package/dist/src/{chatkit-Wzo9zOq7.cjs → chatkit-yeVtURUi.cjs} +3 -3
- package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-BZ0GjlPY.js} +11 -11
- package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-D3AZJyZ7.cjs} +8 -8
- package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-wvsRgO29.js} +8 -8
- package/dist/src/{cloud-UwfmA0Ne.js → cloud-BKfo1A2t.js} +2 -2
- package/dist/src/{cloud-1cbcFXE9.js → cloud-DPowlnHl.js} +1 -1
- package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CzAtX1Wg.cjs} +11 -11
- package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-D-IEjw5k.js} +11 -11
- package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DxHmIJbZ.js} +14 -14
- package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-CIfquW2o.cjs} +12 -12
- package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-DkjC8LZF.js} +12 -12
- package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-DwJVgNWf.js} +15 -15
- package/dist/src/{codex-sdk-CKEQpQax.js → codex-sdk-BC5qbCjh.js} +3 -3
- package/dist/src/{codex-sdk-CDN8f3P9.js → codex-sdk-CBpm53B8.js} +2 -2
- package/dist/src/{codex-sdk-0QyM_fUf.cjs → codex-sdk-Cf5_CQch.cjs} +2 -2
- package/dist/src/{cometapi-CUq-lrPh.js → cometapi-6jLBOp4e.js} +15 -15
- package/dist/src/{cometapi-Bu_LOIac.js → cometapi-8HSWR5q_.js} +12 -12
- package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-iJEkEgpT.cjs} +12 -12
- package/dist/src/{completion-Dw-9jqz1.cjs → completion-C03fIkcR.cjs} +5 -5
- package/dist/src/{completion-H4ncr2bK.js → completion-CrKHW9hy.js} +5 -5
- package/dist/src/{completion-BTZ42lqL.js → completion-LTi0SiAy.js} +5 -5
- package/dist/src/{docker-kvFgJqkg.js → docker-Bf2VQOX5.js} +14 -14
- package/dist/src/{docker-NrVUpeWw.cjs → docker-BxmP6sGO.cjs} +11 -11
- package/dist/src/{docker-B8zxQ12-.js → docker-DHazKolQ.js} +11 -11
- package/dist/src/{esm-Bwuh5HXW.js → esm-Lh6czAFT.js} +1 -1
- package/dist/src/{eval-TAOD_pvN.js → eval-4sIXLVZZ.js} +31 -19
- package/dist/src/eval-s_E4q6a8.js +16 -0
- package/dist/src/{evalResult-DnYa1RLp.js → evalResult-CRyeHpst.js} +6 -6
- package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CSucevrC.cjs} +7 -7
- package/dist/src/evalResult-DBhbtqZn.js +11 -0
- package/dist/src/evalResult-DKn_nY-a.js +13 -0
- package/dist/src/{evalResult-CnAP64o9.js → evalResult-DNXvhaKp.js} +6 -6
- package/dist/src/evalResult-DrPpo-zg.cjs +11 -0
- package/dist/src/{evaluator-q2_--POi.js → evaluator-BDY-i4ky.js} +29 -30
- package/dist/src/evaluator-CiDt-6H3.js +37 -0
- package/dist/src/{extractor-D_1s-Thr.js → extractor-HnP0pHfO.js} +81 -4
- package/dist/src/{extractor-B9uQA-6-.js → extractor-WvMabjRY.js} +81 -4
- package/dist/src/{extractor-DjxI7POm.cjs → extractor-wSeR47-c.cjs} +86 -3
- package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-8fmzVUH3.js} +2 -2
- package/dist/src/{fetch-BIPrzD39.js → fetch-BMKSarK_.js} +11 -5
- package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-BgYKpAJ7.cjs} +25 -7
- package/dist/src/{fetch-Cbz8ESXD.js → fetch-CE0K47nJ.js} +1 -1
- package/dist/src/{fetch-TcCC0nEJ.js → fetch-DKQm7iwv.js} +11 -5
- package/dist/src/fetch-GeRV0LDI.cjs +4 -0
- package/dist/src/{fileExtensions-BpuMmaFL.js → fileExtensions-CzcPkiKZ.js} +1 -1
- package/dist/src/{formatDuration-CGa0ZDQ8.js → formatDuration-CCiZ-wBY.js} +1 -1
- package/dist/src/{genaiTracer-zK0Mtd_D.cjs → genaiTracer-BQ8AmajD.cjs} +1 -1
- package/dist/src/{genaiTracer-CNawybn-.js → genaiTracer-Ce19n68P.js} +1 -1
- package/dist/src/{genaiTracer-CqNnnXrE.js → genaiTracer-foKtbVa2.js} +1 -1
- package/dist/src/{graders-DycNkUZ_.js → graders-CEF_ce6x.js} +32 -26
- package/dist/src/graders-CzoftmVg.js +35 -0
- package/dist/src/{graders-C9Ni3pNQ.cjs → graders-DOFEkq6l.cjs} +34 -28
- package/dist/src/graders-DuTWgLQF.cjs +32 -0
- package/dist/src/graders-ZhEpUtuK.js +33 -0
- package/dist/src/{graders-CG50NgEB.js → graders-jFPLMexK.js} +36 -30
- package/dist/src/{image-CxD58J63.js → image-0RV3qqTK.js} +6 -6
- package/dist/src/{image-CHGpqpC6.js → image-9Thqn5aj.js} +3 -3
- package/dist/src/{image-BmMQZgoF.js → image-BJJW-zOa.js} +4 -4
- package/dist/src/{image-CPL1masV.cjs → image-BVyYHTQG.cjs} +3 -3
- package/dist/src/{image-Dg-oq_2F.js → image-Dd0EQRYU.js} +6 -6
- package/dist/src/{image-CpQ-fJt3.cjs → image-VWd0wZmz.cjs} +6 -6
- package/dist/src/index.cjs +177 -105
- package/dist/src/index.js +173 -101
- package/dist/src/{interactiveCheck-BZbMvE6i.js → interactiveCheck-CjK2R4rB.js} +1 -1
- package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BKA28wUv.cjs} +7 -7
- package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-D0nOGqRF.js} +7 -7
- package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase-aBIYgE4B.js} +8 -8
- package/dist/src/{litellm-Dcc3mt1B.js → litellm-BiXCNyII.js} +11 -11
- package/dist/src/{litellm-CR8i8YRn.js → litellm-QSopB7EE.js} +14 -14
- package/dist/src/{litellm-QthJT0fA.cjs → litellm-vZYPcEUQ.cjs} +11 -11
- package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-CRKJx6ue.cjs} +10 -11
- package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DmnPkNon.js} +10 -11
- package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-Dvnqlb9n.js} +9 -10
- package/dist/src/main.js +166 -106
- package/dist/src/{messages-DkV1dizA.js → messages-BaxI3gI5.js} +10 -10
- package/dist/src/{messages-D4Cu-Nxe.cjs → messages-CvFzyCty.cjs} +10 -10
- package/dist/src/{messages-9ik4NuJ_.js → messages-DT1yyWfS.js} +10 -10
- package/dist/src/{meteor-DLCdreag.js → meteor-0cLf9TIn.js} +1 -1
- package/dist/src/{meteor-Dj8cTkU_.js → meteor-CBciquOS.js} +1 -1
- package/dist/src/{meteor-odmwVbyG.cjs → meteor-DRuEfzuQ.cjs} +1 -1
- package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-CnN4D-bY.js} +10 -11
- package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-XAkfcmhf.cjs} +10 -11
- package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-YEEqZu4z.js} +9 -10
- package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-Ci-VR-K3.js} +6 -6
- package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-Cjfvm57T.js} +7 -7
- package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-brE_2yrZ.cjs} +6 -6
- package/dist/src/{openai-eZz8QsxY.cjs → openai-BjpdxIOG.cjs} +1 -1
- package/dist/src/{openai-CQozD_Ef.js → openai-CitF-gEN.js} +1 -1
- package/dist/src/{openai-D_5h8sBa.js → openai-Cv9pEKxp.js} +1 -1
- package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BkXJb52a.js} +4 -4
- package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-CrlvqSy2.cjs} +4 -4
- package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-DRMuqbLx.js} +6 -6
- package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-BCKiiwct.js} +5 -5
- package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-BTVVyjnA.js} +4 -4
- package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-DXqKPf2U.cjs} +4 -4
- package/dist/src/{providerRegistry-5UB8YVkk.js → providerRegistry-BdKWcUa8.js} +1 -1
- package/dist/src/{providerRegistry-gU7gOAJI.js → providerRegistry-D32Lt9vp.js} +1 -1
- package/dist/src/{providerRegistry-DFXamjVO.cjs → providerRegistry-HGQd2MF6.cjs} +1 -1
- package/dist/src/{providers-Bz0U4NGd.cjs → providers-BF4aullZ.cjs} +114 -132
- package/dist/src/providers-BmFckJq8.cjs +31 -0
- package/dist/src/{providers-CHciOV4f.js → providers-BtbT8bDb.js} +77 -95
- package/dist/src/{providers-D8qKkVu_.js → providers-CYssBaa_.js} +82 -100
- package/dist/src/providers-DaDZ1iQw.js +31 -0
- package/dist/src/providers-Ds-h-S1b.js +33 -0
- package/dist/src/{pythonUtils-mprm8p4h.js → pythonUtils-B9JA-gsC.js} +2 -2
- package/dist/src/{quiverai-DnlaJG8H.js → quiverai-BiDa0kiF.js} +10 -10
- package/dist/src/{quiverai-3rtm1glP.js → quiverai-DTtLknat.js} +13 -13
- package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-z8oPMSUZ.cjs} +10 -10
- package/dist/src/{render-D5t2ogwP.js → render-D2710HbA.js} +2 -2
- package/dist/src/rubyUtils-BLd6EE1u.cjs +5 -0
- package/dist/src/{rubyUtils-D1DBBgSr.js → rubyUtils-CVELPvUH.js} +2 -2
- package/dist/src/{rubyUtils-Cus5EGPX.js → rubyUtils-Dn6MGcsk.js} +2 -2
- package/dist/src/{rubyUtils-CI4Y5f08.js → rubyUtils-LBsk3zIm.js} +1 -1
- package/dist/src/{rubyUtils-CWkecmDB.js → rubyUtils-_t9Gmf7U.js} +1 -1
- package/dist/src/{rubyUtils-DCci_Y0f.cjs → rubyUtils-rnCVDgH-.cjs} +1 -1
- package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-D5f6K309.js} +11 -11
- package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DwJOnUlQ.cjs} +11 -11
- package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-dygdRScC.js} +17 -17
- package/dist/src/{scanner-CPosIITM.js → scanner-D__U7qyj.js} +9 -9
- package/dist/src/server/index.js +265 -129
- package/dist/src/server-B0dYFkhY.cjs +6 -0
- package/dist/src/{server-ByLwfSDf.cjs → server-BRPZ0ZqY.cjs} +3 -3
- package/dist/src/{server-Bb83s34G.js → server-BZJ_IEiK.js} +4 -4
- package/dist/src/{server-BKdsd70B.js → server-Bo3rtriL.js} +3 -3
- package/dist/src/server-DZYvmL1z.js +6 -0
- package/dist/src/server-DlTe_Yz5.js +8 -0
- package/dist/src/{signal-B8hvvb8w.js → signal-C9qT0qiq.js} +2 -2
- package/dist/src/{slack-BblBpsIz.cjs → slack-D1F9Y7CH.cjs} +1 -1
- package/dist/src/{slack-D-bNCtTa.js → slack-DkAF58Tr.js} +1 -1
- package/dist/src/{slack-Ba3gil_6.js → slack-Ed1yyt_j.js} +1 -1
- package/dist/src/{store-W4yZqv_e.js → store-BpNCvdGu.js} +2 -2
- package/dist/src/store-BvASVHpa.cjs +6 -0
- package/dist/src/{store-l9-mIRC-.cjs → store-BwsF10Db.cjs} +2 -2
- package/dist/src/store-CDxVyNyD.js +6 -0
- package/dist/src/store-CXxKOQ8V.js +7 -0
- package/dist/src/{store-CaNRmI5T.js → store-Dm8qSYzq.js} +2 -2
- package/dist/src/{tables-DZWWveaJ.js → tables-B0wvxP8N.js} +2 -2
- package/dist/src/{tables-BCcp2r48.js → tables-CMOPSC5_.js} +2 -2
- package/dist/src/{tables-D_VmN-ui.cjs → tables-D_nKFrHt.cjs} +2 -2
- package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-B9G9VzVT.cjs} +3 -3
- package/dist/src/telemetry-CGO4vCCz.js +8 -0
- package/dist/src/telemetry-CXarb10F.cjs +6 -0
- package/dist/src/telemetry-DKtQFHz8.js +6 -0
- package/dist/src/{telemetry-CmGyDaxF.js → telemetry-DMGVtU7z.js} +3 -3
- package/dist/src/{telemetry-CB8ltkOD.js → telemetry-oV0dEJ2S.js} +3 -3
- package/dist/src/{text-C1cXk9ij.js → text-DuYSUYPB.js} +1 -1
- package/dist/src/{text-Dm78AVGG.js → text-Dx0GJOCN.js} +1 -1
- package/dist/src/{text-DF2hMKdg.cjs → text-PYISqVm1.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-DFp2VFtG.cjs → tokenUsageUtils-BtZd3sP7.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-C9fIWtvn.js → tokenUsageUtils-DoinwgKF.js} +1 -1
- package/dist/src/{tokenUsageUtils-BQVp2uKF.js → tokenUsageUtils-cFdLMERB.js} +1 -1
- package/dist/src/{transcription-BExRps8_.js → transcription-C5sJed5S.js} +7 -7
- package/dist/src/{transcription-B4GbFkhb.cjs → transcription-DEiYnjqG.cjs} +7 -7
- package/dist/src/{transcription-CpuRKcIu.js → transcription-Dcv2GYgI.js} +10 -10
- package/dist/src/{transform-BzepVlkX.js → transform-BFPYuBaW.js} +4 -4
- package/dist/src/{transform-BwedQA8W.js → transform-Cpk3BVib.js} +3 -3
- package/dist/src/{transform-B-AYIQmM.js → transform-DWBbTh_M.js} +3 -3
- package/dist/src/{transform-1g7fEAge.js → transform-DZI2t8-9.js} +3 -3
- package/dist/src/{transform-B5HUD86U.cjs → transform-DeVdMzaA.cjs} +3 -3
- package/dist/src/{transformersAvailability-B5gZ2aG-.cjs → transformersAvailability-BaoWHpu1.cjs} +1 -1
- package/dist/src/{transformersAvailability-BHP0aLcv.js → transformersAvailability-DtpwoeFC.js} +1 -1
- package/dist/src/{transformersAvailability-CA173HU7.js → transformersAvailability-O2YaCv9Z.js} +1 -1
- package/dist/src/{types-n3ttAnOa.js → types-BZz0C6rq.js} +3 -18
- package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
- package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
- package/dist/src/{util-Cd2vI7ii.js → util-BR59kcWy.js} +3 -3
- package/dist/src/{util-BnF-xRPi.js → util-BjTDLTac.js} +3 -3
- package/dist/src/{util-CwsTKPXi.js → util-CSvDts2m.js} +3 -3
- package/dist/src/{util-KE6uFWHG.cjs → util-CfLcBZEA.cjs} +3 -3
- package/dist/src/{util-BXjq8vOq.js → util-Czrn_1en.js} +3 -3
- package/dist/src/{util-BT0p5c9X.cjs → util-Dxwavk84.cjs} +3 -3
- package/dist/src/{util-DX58t30T.js → util-GpiGNWSj.js} +17 -5
- package/dist/src/{util-B-Yo0qzc.cjs → util-RfCZM4TD.cjs} +22 -4
- package/dist/src/{util-CyzW5ayt.js → util-UxC1pDDt.js} +20 -8
- package/dist/src/{utils-HdRSruG0.js → utils-2CdNTp-a.js} +2 -2
- package/dist/src/{utils-BSpcJB4B.cjs → utils-6lWwNARy.cjs} +2 -2
- package/dist/src/{utils-BWOFfOel.js → utils-BMIgqY6C.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +31 -29
- package/dist/src/app/assets/index-CFq6nz43.css +0 -1
- package/dist/src/cache-BZYapLG-.cjs +0 -6
- package/dist/src/eval-g5SH3NBa.js +0 -16
- package/dist/src/evalResult-Bt2fmj0V.js +0 -9
- package/dist/src/evalResult-DRNH7Jui.js +0 -10
- package/dist/src/evalResult-DqLWyktN.cjs +0 -9
- package/dist/src/evaluator-DIenqH86.js +0 -37
- package/dist/src/fetch-gmneHrJc.cjs +0 -4
- package/dist/src/graders-BHcgLz2P.js +0 -35
- package/dist/src/graders-BjBhwJOb.cjs +0 -32
- package/dist/src/graders-DEjGX1uY.js +0 -33
- package/dist/src/providers-BBGwMMzj.js +0 -33
- package/dist/src/providers-BR5P_qep.cjs +0 -31
- package/dist/src/providers-CDib9hVp.js +0 -31
- package/dist/src/rubyUtils-DZo4ik6p.cjs +0 -5
- package/dist/src/server-B5aIhzYk.cjs +0 -6
- package/dist/src/server-DJCw85s0.js +0 -8
- package/dist/src/server-DKj0QIm9.js +0 -6
- package/dist/src/store-Bo9FEBPO.js +0 -7
- package/dist/src/store-CqHttBql.js +0 -6
- package/dist/src/store-fsxOJl5O.cjs +0 -6
- package/dist/src/telemetry-B2uJOvMA.js +0 -8
- package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
- package/dist/src/telemetry-rP_n_bnZ.js +0 -6
- package/dist/src/time-CRRzn6vA.js +0 -9
- package/dist/src/time-CbtsO5_a.cjs +0 -21
- package/dist/src/time-CoOwN3kQ.js +0 -10
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
const require_logger = require('./logger-CMp-NS-e.cjs');
|
|
2
|
-
require('./types-
|
|
3
|
-
require('./fetch-
|
|
4
|
-
|
|
5
|
-
require('./
|
|
6
|
-
|
|
7
|
-
require('./
|
|
8
|
-
require('./
|
|
9
|
-
const
|
|
10
|
-
const require_base = require('./base-CRrQ15eu.cjs');
|
|
2
|
+
require('./types-Cixl-wRO.cjs');
|
|
3
|
+
const require_fetch = require('./fetch-BgYKpAJ7.cjs');
|
|
4
|
+
require('./accounts-TGrHBZU9.cjs');
|
|
5
|
+
const require_blobs = require('./blobs-BPAqMoU_.cjs');
|
|
6
|
+
require('./tables-D_nKFrHt.cjs');
|
|
7
|
+
require('./telemetry-B9G9VzVT.cjs');
|
|
8
|
+
const require_text = require('./text-PYISqVm1.cjs');
|
|
9
|
+
const require_base = require('./base-B_pJcCDc.cjs');
|
|
11
10
|
let fs = require("fs");
|
|
12
11
|
fs = require_logger.__toESM(fs);
|
|
13
12
|
let path = require("path");
|
|
@@ -178,7 +177,7 @@ var LumaRayVideoProvider = class extends require_base.AwsBedrockGenericProvider
|
|
|
178
177
|
outputDataConfig: invocation.outputDataConfig
|
|
179
178
|
} };
|
|
180
179
|
if (invocation.status === "Failed") return { error: `Video generation failed: ${invocation.failureMessage}` };
|
|
181
|
-
await
|
|
180
|
+
await require_fetch.sleep(pollIntervalMs);
|
|
182
181
|
}
|
|
183
182
|
return { error: `Video generation timed out after ${maxPollTimeMs / 1e3} seconds` };
|
|
184
183
|
} catch (err) {
|
|
@@ -318,4 +317,4 @@ var LumaRayVideoProvider = class extends require_base.AwsBedrockGenericProvider
|
|
|
318
317
|
|
|
319
318
|
//#endregion
|
|
320
319
|
exports.LumaRayVideoProvider = LumaRayVideoProvider;
|
|
321
|
-
//# sourceMappingURL=luma-ray-
|
|
320
|
+
//# sourceMappingURL=luma-ray-CRKJx6ue.cjs.map
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { o as logger_default } from "./logger-Bzi5o47S.js";
|
|
3
|
-
import "./fetch-
|
|
4
|
-
import "./accounts-
|
|
5
|
-
import
|
|
6
|
-
import "./
|
|
7
|
-
import "./
|
|
8
|
-
import "./
|
|
9
|
-
import
|
|
10
|
-
import "./
|
|
11
|
-
import { t as
|
|
12
|
-
import { t as AwsBedrockGenericProvider } from "./base-d8hHSpEx.js";
|
|
3
|
+
import { c as sleep } from "./fetch-DKQm7iwv.js";
|
|
4
|
+
import "./accounts-BMHY0nF5.js";
|
|
5
|
+
import "./cloud-BKfo1A2t.js";
|
|
6
|
+
import "./telemetry-oV0dEJ2S.js";
|
|
7
|
+
import "./types-BZz0C6rq.js";
|
|
8
|
+
import { i as storeBlob } from "./blobs-Bey6TGVi.js";
|
|
9
|
+
import "./tables-B0wvxP8N.js";
|
|
10
|
+
import { t as ellipsize } from "./text-DuYSUYPB.js";
|
|
11
|
+
import { t as AwsBedrockGenericProvider } from "./base-DRgXuujd.js";
|
|
13
12
|
import * as fs$1 from "fs";
|
|
14
13
|
import * as path$1 from "path";
|
|
15
14
|
|
|
@@ -318,4 +317,4 @@ var LumaRayVideoProvider = class extends AwsBedrockGenericProvider {
|
|
|
318
317
|
|
|
319
318
|
//#endregion
|
|
320
319
|
export { LumaRayVideoProvider };
|
|
321
|
-
//# sourceMappingURL=luma-ray-
|
|
320
|
+
//# sourceMappingURL=luma-ray-DmnPkNon.js.map
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import { i as logger_default } from "./logger-q5I8CByj.js";
|
|
2
|
-
import "./types-
|
|
3
|
-
import "./fetch-
|
|
4
|
-
import
|
|
5
|
-
import "./
|
|
6
|
-
import
|
|
7
|
-
import "./
|
|
8
|
-
import "./
|
|
9
|
-
import { t as
|
|
10
|
-
import { t as AwsBedrockGenericProvider } from "./base-NFXflrMy.js";
|
|
2
|
+
import "./types-l7snZgzv.js";
|
|
3
|
+
import { p as sleep } from "./fetch-BMKSarK_.js";
|
|
4
|
+
import "./accounts-CYqHD7X1.js";
|
|
5
|
+
import { r as storeBlob } from "./blobs-BHstP0Ep.js";
|
|
6
|
+
import "./tables-CMOPSC5_.js";
|
|
7
|
+
import "./telemetry-DMGVtU7z.js";
|
|
8
|
+
import { t as ellipsize } from "./text-Dx0GJOCN.js";
|
|
9
|
+
import { t as AwsBedrockGenericProvider } from "./base-BHLggB84.js";
|
|
11
10
|
import * as fs$1 from "fs";
|
|
12
11
|
import * as path$1 from "path";
|
|
13
12
|
|
|
@@ -316,4 +315,4 @@ var LumaRayVideoProvider = class extends AwsBedrockGenericProvider {
|
|
|
316
315
|
|
|
317
316
|
//#endregion
|
|
318
317
|
export { LumaRayVideoProvider };
|
|
319
|
-
//# sourceMappingURL=luma-ray-
|
|
318
|
+
//# sourceMappingURL=luma-ray-Dvnqlb9n.js.map
|
package/dist/src/main.js
CHANGED
|
@@ -1,47 +1,46 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import "./chunk-DHDDz29n.js";
|
|
3
3
|
import { C as getEnvString, D as isNonInteractive, E as isCI, O as cliState_default, S as getEnvInt, b as getEnvBool, c as setLogLevel, d as extractFirstJsonObject, f as extractJsonObjects, h as orderKeys, i as isDebugEnabled, l as sanitizeObject, n as getLogLevel, o as logger_default, r as initializeRunLogging, s as setLogCallback, t as closeLogger, v as getConfigDirectoryPath, x as getEnvFloat, y as setConfigDirectoryPath } from "./logger-Bzi5o47S.js";
|
|
4
|
-
import {
|
|
4
|
+
import { A as getShareApiBaseUrl, C as DEFAULT_MAX_CONCURRENCY, D as TERMINAL_MAX_WIDTH, F as HUMAN_ASSERTION_TYPE, N as VERSION, O as getDefaultPort, c as sleep, j as getShareViewBaseUrl, k as getDefaultShareViewBaseUrl, m as isPromptfooSampleTarget, n as fetchWithRetries, r as fetchWithTimeout, t as fetchWithProxy, u as REQUEST_TIMEOUT_MS, w as DEFAULT_QUERY_LIMIT, x as CLOUD_PROVIDER_PREFIX } from "./fetch-DKQm7iwv.js";
|
|
5
5
|
import { t as invariant } from "./invariant-DT20jrBd.js";
|
|
6
|
-
import { a as getAuthor, d as EMAIL_OK_STATUS, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { n as
|
|
10
|
-
import {
|
|
11
|
-
import { a as
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import { i as
|
|
15
|
-
import {
|
|
16
|
-
import { r as
|
|
17
|
-
import
|
|
18
|
-
import "./
|
|
19
|
-
import {
|
|
20
|
-
import { i as
|
|
21
|
-
import
|
|
22
|
-
import "./
|
|
23
|
-
import "./
|
|
24
|
-
import
|
|
25
|
-
import "./
|
|
26
|
-
import "./
|
|
27
|
-
import "./
|
|
28
|
-
import "./
|
|
29
|
-
import "./
|
|
30
|
-
import "./
|
|
31
|
-
import { n as
|
|
32
|
-
import {
|
|
33
|
-
import { n as
|
|
34
|
-
import
|
|
35
|
-
import "./
|
|
36
|
-
import "./
|
|
37
|
-
import "./
|
|
38
|
-
import "./
|
|
39
|
-
import {
|
|
40
|
-
import { n as
|
|
41
|
-
import {
|
|
42
|
-
import { t as
|
|
43
|
-
import {
|
|
44
|
-
import { n as shouldUseInkUI } from "./interactiveCheck-BZbMvE6i.js";
|
|
6
|
+
import { a as getAuthor, d as EMAIL_OK_STATUS, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-BMHY0nF5.js";
|
|
7
|
+
import { a as readGlobalConfig, i as cloudConfig, r as CloudConfig, s as writeGlobalConfigPartial } from "./cloud-BKfo1A2t.js";
|
|
8
|
+
import { n as TelemetryEventSchema, r as telemetry_default } from "./telemetry-oV0dEJ2S.js";
|
|
9
|
+
import { $ as HARM_PLUGINS, B as Severity, D as ADDITIONAL_STRATEGIES, E as isUuid, F as STRATEGY_COLLECTION_MAPPINGS, G as ALIASED_PLUGIN_MAPPINGS, I as getDefaultNFanout, J as BIAS_PLUGINS, K as ADDITIONAL_PLUGINS, M as DEFAULT_STRATEGIES, P as STRATEGY_COLLECTIONS, Q as FOUNDATION_PLUGINS, R as isFanoutStrategy, S as PluginConfigSchema, U as riskCategorySeverityMap, V as categoryAliases, W as subCategoryDescriptions, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, h as RedteamConfigSchema, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_MULTI_TURN_MAX_TURNS, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as UNALIGNED_PROVIDER_HARM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TELECOM_PLUGINS, q as ALL_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as isMultiTurnStrategy } from "./types-BZz0C6rq.js";
|
|
10
|
+
import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-BZJ_IEiK.js";
|
|
11
|
+
import { A as renderPrompt, C as extractVariablesFromJson, F as TokenUsageTracker, P as redteamProviderManager, R as PromptfooHarmfulCompletionProvider, S as extractPromptFromTags, T as getShortPluginId, _ as pluginMatchesStrategyTargets, _t as resolveTeamId, a as resolveProviderConfigs, at as checkCloudPermissions, b as extractGoalFromPrompt, c as createTransformRequest, ct as getDefaultTeam, d as Strategies, f as loadStrategy, ft as getProviderFromCloud, g as retrieveMedia, gt as resolveTeamFromIdentifier, h as mediaExists, ht as makeRequest$1, i as resolveProvider, it as canCreateTargets, l as createTransformResponse, lt as getOrgContext, m as getMediaStorage, mt as isCloudProvider, n as loadApiProvider, o as MCPProvider, ot as getCloudDatabaseId, p as validateStrategies, pt as getUserTeams, r as loadApiProviders, s as HttpProvider, st as getConfigFromCloud, t as getProviderIds, ut as getPluginSeverityOverridesFromCloud } from "./providers-CYssBaa_.js";
|
|
12
|
+
import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-BTdTl56j.js";
|
|
13
|
+
import { i as isJavascriptFile } from "./fileExtensions-CzcPkiKZ.js";
|
|
14
|
+
import { A as filterRuntimeVars, C as extractVariablesFromTemplates, D as setupEnv, M as resultIsForTestCase, N as doesProviderRefMatch, O as deduplicateTestCases, P as getProviderDescription, S as extractVariablesFromTemplate, a as ComparisonEvalNotFoundError, b as renderEnvOnlyInObject, c as mergeComparisonTables, f as maybeLoadConfigFromExternalFile, i as writeOutput, j as getTestCaseDeduplicationKey, k as extractRuntimeVars, l as fetchCsvFromGoogleSheet, n as createOutputMetadata, o as evalTableToJson, p as maybeLoadFromExternalFile, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, v as readFilters, w as getNunjucksEngine, y as readOutput } from "./util-UxC1pDDt.js";
|
|
15
|
+
import { r as importModule, t as getDirectory } from "./esm-Lh6czAFT.js";
|
|
16
|
+
import { r as runPython } from "./pythonUtils-B9JA-gsC.js";
|
|
17
|
+
import "./transform-BFPYuBaW.js";
|
|
18
|
+
import { A as BeavertailsPlugin, C as HarmbenchPlugin, D as DebugAccessPlugin, E as DivergentRepetitionPlugin, F as retryWithDeduplication, I as sampleArray, L as fetchHuggingFaceDataset, M as RedteamGraderBase, N as RedteamPluginBase, O as CrossSessionLeakPlugin, P as getCustomPolicies, S as ImitationPlugin, T as ExcessiveAgencyPlugin, _ as makeInlinePolicyIdSync, a as UnverifiableClaimsPlugin, b as OverreliancePlugin, c as ToolDiscoveryPlugin, ct as readProviderPromptMap, d as RbacPlugin, f as PromptExtractionPlugin, g as isValidPolicyObject, h as determinePolicyTypeFromId, i as VLGuardPlugin, it as getDefaultProviders, j as AegisPlugin, k as ContractPlugin, l as SqlInjectionPlugin, m as PolicyPlugin, o as UnsafeBenchPlugin, ot as processPrompts, p as PoliticsPlugin, r as VLSUPlugin, rt as doRemoteGrading, s as ToxicChatPlugin, st as readPrompts, t as GRADERS, u as ShellInjectionPlugin, v as PlinyPlugin, w as HallucinationPlugin, x as IntentPlugin, y as getPiiLeakTestsForCategory } from "./graders-jFPLMexK.js";
|
|
19
|
+
import { i as generateIdFromPrompt } from "./utils-2CdNTp-a.js";
|
|
20
|
+
import { a as sha256, i as randomSequence, n as isBlobStorageEnabled } from "./extractor-HnP0pHfO.js";
|
|
21
|
+
import "./genaiTracer-foKtbVa2.js";
|
|
22
|
+
import "./chat-BIj6_WPH.js";
|
|
23
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-DoinwgKF.js";
|
|
24
|
+
import "./transform-DWBbTh_M.js";
|
|
25
|
+
import "./messages-DT1yyWfS.js";
|
|
26
|
+
import "./util-BR59kcWy.js";
|
|
27
|
+
import "./openai-Cv9pEKxp.js";
|
|
28
|
+
import "./util-CSvDts2m.js";
|
|
29
|
+
import "./completion-LTi0SiAy.js";
|
|
30
|
+
import { n as getBlobUrl, t as getBlobByHash } from "./blobs-Bey6TGVi.js";
|
|
31
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-B0wvxP8N.js";
|
|
32
|
+
import { n as escapeRegExp, t as ellipsize } from "./text-DuYSUYPB.js";
|
|
33
|
+
import { n as getTraceStore } from "./store-Dm8qSYzq.js";
|
|
34
|
+
import "./base-DRgXuujd.js";
|
|
35
|
+
import "./image-0RV3qqTK.js";
|
|
36
|
+
import "./providerRegistry-D32Lt9vp.js";
|
|
37
|
+
import "./rubyUtils-Dn6MGcsk.js";
|
|
38
|
+
import { c as readAssertions, l as renderMetricName, o as doesPromptRefMatch, s as assertions_default, t as evaluate$1, u as runAssertions } from "./evaluator-BDY-i4ky.js";
|
|
39
|
+
import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-C9qT0qiq.js";
|
|
40
|
+
import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-4sIXLVZZ.js";
|
|
41
|
+
import { t as EvalResult } from "./evalResult-CRyeHpst.js";
|
|
42
|
+
import { t as formatDuration } from "./formatDuration-CCiZ-wBY.js";
|
|
43
|
+
import { n as shouldUseInkUI } from "./interactiveCheck-CjK2R4rB.js";
|
|
45
44
|
import { fileURLToPath } from "node:url";
|
|
46
45
|
import fs, { realpathSync } from "node:fs";
|
|
47
46
|
import path, { resolve as resolve$1 } from "node:path";
|
|
@@ -114,7 +113,7 @@ function runCommand(program) {
|
|
|
114
113
|
hasGithubPr: !!cmdObj.githubPr,
|
|
115
114
|
hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
|
|
116
115
|
});
|
|
117
|
-
const { executeScan } = await import("./scanner-
|
|
116
|
+
const { executeScan } = await import("./scanner-D__U7qyj.js");
|
|
118
117
|
await executeScan(repoPath, cmdObj);
|
|
119
118
|
});
|
|
120
119
|
}
|
|
@@ -638,12 +637,35 @@ async function filterTestsByResults(testSuite, pathOrId, filterFn) {
|
|
|
638
637
|
if (filteredResults.length === 0) return [];
|
|
639
638
|
const uniqueVarsInResults = new Set(filteredResults.map((r) => JSON.stringify(filterRuntimeVars(r.vars))));
|
|
640
639
|
logger_default.debug(`[filterTestsByResults] ${uniqueVarsInResults.size} unique test cases (by vars) in filtered results`);
|
|
641
|
-
const matchedTests = [
|
|
640
|
+
const matchedTests = [];
|
|
641
|
+
for (const test of testSuite.tests) {
|
|
642
642
|
const testWithDefaults = mergeDefaultVars(test, testSuite.defaultTest);
|
|
643
|
-
|
|
644
|
-
if (
|
|
645
|
-
|
|
646
|
-
|
|
643
|
+
let matchedResult = filteredResults.find((result) => resultIsForTestCase(result, testWithDefaults) && extractRuntimeVars(result.vars) !== void 0);
|
|
644
|
+
if (!matchedResult) matchedResult = filteredResults.find((result) => resultIsForTestCase(result, testWithDefaults));
|
|
645
|
+
if (!matchedResult) {
|
|
646
|
+
if (testSuite.defaultTest && typeof testSuite.defaultTest !== "string" && testSuite.defaultTest.vars && Object.keys(testSuite.defaultTest.vars).length > 0) {
|
|
647
|
+
matchedResult = filteredResults.find((result) => resultIsForTestCase(result, test) && extractRuntimeVars(result.vars) !== void 0);
|
|
648
|
+
if (!matchedResult) matchedResult = filteredResults.find((result) => resultIsForTestCase(result, test));
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
if (matchedResult) {
|
|
652
|
+
const runtimeVars = extractRuntimeVars(matchedResult.vars);
|
|
653
|
+
if (runtimeVars) {
|
|
654
|
+
const testWithRuntimeVars = {
|
|
655
|
+
...test,
|
|
656
|
+
vars: {
|
|
657
|
+
...test.vars,
|
|
658
|
+
...runtimeVars
|
|
659
|
+
}
|
|
660
|
+
};
|
|
661
|
+
logger_default.debug("[filterTestsByResults] Restored runtime vars for test", { varKeys: Object.keys(runtimeVars) });
|
|
662
|
+
matchedTests.push(testWithRuntimeVars);
|
|
663
|
+
} else {
|
|
664
|
+
logger_default.debug("[filterTestsByResults] Matched test has no runtime vars to restore");
|
|
665
|
+
matchedTests.push(test);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
}
|
|
647
669
|
logger_default.debug(`[filterTestsByResults] Matched ${matchedTests.length} tests out of ${testSuite.tests.length} in test suite`);
|
|
648
670
|
const extractedTests = [];
|
|
649
671
|
const matchedResultKeys = /* @__PURE__ */ new Set();
|
|
@@ -3289,60 +3311,101 @@ async function deleteErrorResults(resultIds) {
|
|
|
3289
3311
|
await getDb().delete(evalResultsTable).where(inArray(evalResultsTable.id, resultIds));
|
|
3290
3312
|
logger_default.debug(`Deleted ${resultIds.length} error results from database`);
|
|
3291
3313
|
}
|
|
3314
|
+
const RECALCULATE_BATCH_SIZE = 1e3;
|
|
3292
3315
|
/**
|
|
3293
|
-
* Recalculates prompt metrics based on current results after ERROR results have been deleted
|
|
3316
|
+
* Recalculates prompt metrics based on current results after ERROR results have been deleted.
|
|
3317
|
+
* Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
|
|
3294
3318
|
*/
|
|
3295
3319
|
async function recalculatePromptMetrics(evalRecord) {
|
|
3296
3320
|
logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
|
|
3297
|
-
|
|
3321
|
+
const startTime = Date.now();
|
|
3322
|
+
let batchNumber = 0;
|
|
3323
|
+
let totalProcessed = 0;
|
|
3298
3324
|
const promptMetricsMap = /* @__PURE__ */ new Map();
|
|
3299
|
-
for (const
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3325
|
+
for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
|
|
3326
|
+
score: 0,
|
|
3327
|
+
testPassCount: 0,
|
|
3328
|
+
testFailCount: 0,
|
|
3329
|
+
testErrorCount: 0,
|
|
3330
|
+
assertPassCount: 0,
|
|
3331
|
+
assertFailCount: 0,
|
|
3332
|
+
totalLatencyMs: 0,
|
|
3333
|
+
tokenUsage: createEmptyTokenUsage(),
|
|
3334
|
+
namedScores: {},
|
|
3335
|
+
namedScoresCount: {},
|
|
3336
|
+
cost: 0
|
|
3337
|
+
});
|
|
3338
|
+
let currentResultId;
|
|
3339
|
+
try {
|
|
3340
|
+
for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
|
|
3341
|
+
batchNumber++;
|
|
3342
|
+
logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
|
|
3343
|
+
for (const result of batch) {
|
|
3344
|
+
currentResultId = result.id;
|
|
3345
|
+
const metrics = promptMetricsMap.get(result.promptIdx);
|
|
3346
|
+
if (!metrics) {
|
|
3347
|
+
logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
|
|
3348
|
+
resultId: result.id,
|
|
3349
|
+
evalId: evalRecord.id
|
|
3350
|
+
});
|
|
3351
|
+
continue;
|
|
3352
|
+
}
|
|
3353
|
+
if (result.success) metrics.testPassCount++;
|
|
3354
|
+
else if (result.failureReason === ResultFailureReason.ERROR) metrics.testErrorCount++;
|
|
3355
|
+
else metrics.testFailCount++;
|
|
3356
|
+
metrics.score += result.score ?? 0;
|
|
3357
|
+
metrics.totalLatencyMs += result.latencyMs || 0;
|
|
3358
|
+
metrics.cost += result.cost || 0;
|
|
3359
|
+
for (const [key, value] of Object.entries(result.namedScores || {})) {
|
|
3360
|
+
metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
|
|
3361
|
+
const testVars = result.testCase?.vars || {};
|
|
3362
|
+
let contributingAssertions = 0;
|
|
3363
|
+
result.gradingResult?.componentResults?.forEach((componentResult) => {
|
|
3364
|
+
if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
|
|
3365
|
+
});
|
|
3366
|
+
metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
|
|
3367
|
+
}
|
|
3368
|
+
if (result.gradingResult?.componentResults) {
|
|
3369
|
+
metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
|
|
3370
|
+
metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
|
|
3371
|
+
}
|
|
3372
|
+
if (result.response?.tokenUsage) accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
|
|
3373
|
+
if (result.gradingResult?.tokensUsed) {
|
|
3374
|
+
if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = createEmptyAssertions();
|
|
3375
|
+
accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
|
|
3376
|
+
}
|
|
3377
|
+
}
|
|
3378
|
+
totalProcessed += batch.length;
|
|
3341
3379
|
}
|
|
3380
|
+
} catch (error) {
|
|
3381
|
+
logger_default.error("Error during batched metrics recalculation", {
|
|
3382
|
+
phase: "calculation",
|
|
3383
|
+
batchNumber,
|
|
3384
|
+
totalProcessed,
|
|
3385
|
+
currentResultId,
|
|
3386
|
+
evalId: evalRecord.id,
|
|
3387
|
+
error
|
|
3388
|
+
});
|
|
3389
|
+
throw error;
|
|
3342
3390
|
}
|
|
3343
3391
|
for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
|
|
3344
|
-
if (evalRecord.persisted)
|
|
3345
|
-
|
|
3392
|
+
if (evalRecord.persisted) try {
|
|
3393
|
+
await evalRecord.addPrompts(evalRecord.prompts);
|
|
3394
|
+
} catch (error) {
|
|
3395
|
+
logger_default.error("Error saving recalculated prompt metrics", {
|
|
3396
|
+
phase: "save",
|
|
3397
|
+
evalId: evalRecord.id,
|
|
3398
|
+
promptCount: evalRecord.prompts.length,
|
|
3399
|
+
error
|
|
3400
|
+
});
|
|
3401
|
+
throw error;
|
|
3402
|
+
}
|
|
3403
|
+
const durationMs = Date.now() - startTime;
|
|
3404
|
+
logger_default.debug("Prompt metrics recalculation completed", {
|
|
3405
|
+
totalBatches: batchNumber,
|
|
3406
|
+
totalResults: totalProcessed,
|
|
3407
|
+
durationMs
|
|
3408
|
+
});
|
|
3346
3409
|
}
|
|
3347
3410
|
/**
|
|
3348
3411
|
* Main retry function
|
|
@@ -5104,7 +5167,7 @@ async function doGenerateAssertions(options) {
|
|
|
5104
5167
|
let testSuite;
|
|
5105
5168
|
const configPath = options.config || options.defaultConfigPath;
|
|
5106
5169
|
if (configPath) testSuite = (await resolveConfigs({ config: [configPath] }, options.defaultConfig, "AssertionGeneration")).testSuite;
|
|
5107
|
-
else throw new Error(
|
|
5170
|
+
else throw new Error(`Could not find a config file. Pass --config path/to/promptfooconfig.yaml or run "${promptfooCommand("init")}" to create one.`);
|
|
5108
5171
|
const startTime = Date.now();
|
|
5109
5172
|
telemetry_default.record("command_used", {
|
|
5110
5173
|
name: "generate_assertions - started",
|
|
@@ -5292,7 +5355,7 @@ async function doGenerateDataset(options) {
|
|
|
5292
5355
|
let testSuite;
|
|
5293
5356
|
const configPath = options.config || options.defaultConfigPath;
|
|
5294
5357
|
if (configPath) testSuite = (await resolveConfigs({ config: [configPath] }, options.defaultConfig, "DatasetGeneration")).testSuite;
|
|
5295
|
-
else throw new Error(
|
|
5358
|
+
else throw new Error(`Could not find a config file. Pass --config path/to/promptfooconfig.yaml or run "${promptfooCommand("init")}" to create one.`);
|
|
5296
5359
|
const startTime = Date.now();
|
|
5297
5360
|
telemetry_default.record("command_used", {
|
|
5298
5361
|
name: "generate_dataset - started",
|
|
@@ -6502,8 +6565,9 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
6502
6565
|
logger_default.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
|
|
6503
6566
|
return [];
|
|
6504
6567
|
}
|
|
6568
|
+
const { graderExamples, ...configForRemote } = config ?? {};
|
|
6505
6569
|
const body = JSON.stringify({
|
|
6506
|
-
config,
|
|
6570
|
+
config: configForRemote,
|
|
6507
6571
|
injectVar,
|
|
6508
6572
|
inputs: config?.inputs,
|
|
6509
6573
|
n,
|
|
@@ -7192,11 +7256,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
7192
7256
|
});
|
|
7193
7257
|
await validateStrategies(strategies);
|
|
7194
7258
|
await validateSharpDependency(strategies, plugins);
|
|
7195
|
-
if (strategies.some((s) => isLanguageDisallowedStrategy(s.id)) && language) {
|
|
7196
|
-
const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
|
|
7197
|
-
language = "en";
|
|
7198
|
-
logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
|
|
7199
|
-
}
|
|
7200
7259
|
const redteamProvider = await redteamProviderManager.getProvider({ provider });
|
|
7201
7260
|
const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
|
|
7202
7261
|
logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.yellow(plugins.map((p) => {
|
|
@@ -13072,14 +13131,15 @@ async function startServer(port = getDefaultPort(), browserBehavior = BrowserBeh
|
|
|
13072
13131
|
const results = await updatedEval?.getResultsCount();
|
|
13073
13132
|
if (results && results > 0) {
|
|
13074
13133
|
logger_default.debug(`Emitting update for eval: ${updatedEval?.config?.description || updatedEval?.id || "unknown"}`);
|
|
13075
|
-
io.emit("update", updatedEval);
|
|
13134
|
+
io.emit("update", { evalId: updatedEval?.id });
|
|
13076
13135
|
allPrompts = null;
|
|
13077
13136
|
}
|
|
13078
13137
|
};
|
|
13079
13138
|
handleSignalUpdate();
|
|
13080
13139
|
});
|
|
13081
13140
|
io.on("connection", async (socket) => {
|
|
13082
|
-
|
|
13141
|
+
const latestEval = await Eval.latest();
|
|
13142
|
+
socket.emit("init", latestEval ? { evalId: latestEval.id } : null);
|
|
13083
13143
|
});
|
|
13084
13144
|
return new Promise((resolve) => {
|
|
13085
13145
|
httpServer.listen(port, () => {
|
|
@@ -14337,8 +14397,8 @@ function shouldUseInkList() {
|
|
|
14337
14397
|
async function runInkList(options) {
|
|
14338
14398
|
const [React, { renderInteractive }, { ListApp }] = await Promise.all([
|
|
14339
14399
|
import("react"),
|
|
14340
|
-
import("./render-
|
|
14341
|
-
import("./ListApp-
|
|
14400
|
+
import("./render-D2710HbA.js"),
|
|
14401
|
+
import("./ListApp-55OAHww3.js")
|
|
14342
14402
|
]);
|
|
14343
14403
|
let result = { cancelled: false };
|
|
14344
14404
|
let resolveResult;
|
|
@@ -16641,8 +16701,8 @@ function registerRunEvaluationTool(server) {
|
|
|
16641
16701
|
if (filteredProviders.length === 0) return createToolResponse("run_evaluation", false, void 0, `No providers matched filter: ${filters.join(", ")}. Available providers: ${providers.map((p) => typeof p.id === "function" ? p.id() : p.id).join(", ")}`);
|
|
16642
16702
|
filteredTestSuite.providers = filteredProviders;
|
|
16643
16703
|
}
|
|
16644
|
-
const { evaluate } = await import("./evaluator-
|
|
16645
|
-
const evalRecord = await (await import("./eval-
|
|
16704
|
+
const { evaluate } = await import("./evaluator-CiDt-6H3.js");
|
|
16705
|
+
const evalRecord = await (await import("./eval-s_E4q6a8.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
|
|
16646
16706
|
logger_default.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
|
|
16647
16707
|
const startTime = Date.now();
|
|
16648
16708
|
const result = await evaluate(filteredTestSuite, evalRecord, {
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { b as getEnvString, i as logger_default, v as getEnvFloat, y as getEnvInt } from "./logger-q5I8CByj.js";
|
|
2
|
-
import { d as maybeLoadToolsFromExternalFile, u as maybeLoadResponseFormatFromExternalFile } from "./util-
|
|
3
|
-
import {
|
|
4
|
-
import { o as getCache, s as isCacheEnabled } from "./cache-
|
|
5
|
-
import { n as withGenAISpan } from "./genaiTracer-
|
|
6
|
-
import { i as normalizeFinishReason, n as MCPClient } from "./chat-
|
|
7
|
-
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-
|
|
8
|
-
import { n as transformMCPToolsToAnthropic } from "./transform-
|
|
9
|
-
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-
|
|
2
|
+
import { d as maybeLoadToolsFromExternalFile, u as maybeLoadResponseFormatFromExternalFile } from "./util-GpiGNWSj.js";
|
|
3
|
+
import { T as transformTools, w as transformToolChoice } from "./fetch-BMKSarK_.js";
|
|
4
|
+
import { o as getCache, s as isCacheEnabled } from "./cache-CsW_9RgN.js";
|
|
5
|
+
import { n as withGenAISpan } from "./genaiTracer-Ce19n68P.js";
|
|
6
|
+
import { i as normalizeFinishReason, n as MCPClient } from "./chat-D5l2blZK.js";
|
|
7
|
+
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-cFdLMERB.js";
|
|
8
|
+
import { n as transformMCPToolsToAnthropic } from "./transform-Cpk3BVib.js";
|
|
9
|
+
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-BjTDLTac.js";
|
|
10
10
|
import Anthropic, { APIError } from "@anthropic-ai/sdk";
|
|
11
11
|
|
|
12
12
|
//#region src/providers/anthropic/generic.ts
|
|
@@ -137,7 +137,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
137
137
|
...allTools.length > 0 ? { tools: allTools } : {},
|
|
138
138
|
...config.tool_choice ? { tool_choice: transformToolChoice(config.tool_choice, "anthropic") } : {},
|
|
139
139
|
...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
|
|
140
|
-
...processedOutputFormat ? {
|
|
140
|
+
...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
|
|
141
141
|
...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
|
|
142
142
|
};
|
|
143
143
|
logger_default.debug("Calling Anthropic Messages API", { params });
|
|
@@ -233,4 +233,4 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
233
233
|
|
|
234
234
|
//#endregion
|
|
235
235
|
export { AnthropicGenericProvider as n, AnthropicMessagesProvider as t };
|
|
236
|
-
//# sourceMappingURL=messages-
|
|
236
|
+
//# sourceMappingURL=messages-BaxI3gI5.js.map
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
const require_logger = require('./logger-CMp-NS-e.cjs');
|
|
2
|
-
const require_util = require('./util-
|
|
3
|
-
const require_fetch = require('./fetch-
|
|
4
|
-
const require_cache = require('./cache-
|
|
5
|
-
const require_genaiTracer = require('./genaiTracer-
|
|
6
|
-
const require_chat = require('./chat-
|
|
7
|
-
const require_tokenUsageUtils = require('./tokenUsageUtils-
|
|
8
|
-
const require_transform = require('./transform-
|
|
9
|
-
const require_util$1 = require('./util-
|
|
2
|
+
const require_util = require('./util-RfCZM4TD.cjs');
|
|
3
|
+
const require_fetch = require('./fetch-BgYKpAJ7.cjs');
|
|
4
|
+
const require_cache = require('./cache-BnrNlhzE.cjs');
|
|
5
|
+
const require_genaiTracer = require('./genaiTracer-BQ8AmajD.cjs');
|
|
6
|
+
const require_chat = require('./chat-QHQ-K-Fi.cjs');
|
|
7
|
+
const require_tokenUsageUtils = require('./tokenUsageUtils-BtZd3sP7.cjs');
|
|
8
|
+
const require_transform = require('./transform-DeVdMzaA.cjs');
|
|
9
|
+
const require_util$1 = require('./util-CfLcBZEA.cjs');
|
|
10
10
|
let _anthropic_ai_sdk = require("@anthropic-ai/sdk");
|
|
11
11
|
_anthropic_ai_sdk = require_logger.__toESM(_anthropic_ai_sdk);
|
|
12
12
|
|
|
@@ -138,7 +138,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
138
138
|
...allTools.length > 0 ? { tools: allTools } : {},
|
|
139
139
|
...config.tool_choice ? { tool_choice: require_fetch.transformToolChoice(config.tool_choice, "anthropic") } : {},
|
|
140
140
|
...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
|
|
141
|
-
...processedOutputFormat ? {
|
|
141
|
+
...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
|
|
142
142
|
...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
|
|
143
143
|
};
|
|
144
144
|
require_logger.logger_default.debug("Calling Anthropic Messages API", { params });
|
|
@@ -245,4 +245,4 @@ Object.defineProperty(exports, 'AnthropicMessagesProvider', {
|
|
|
245
245
|
return AnthropicMessagesProvider;
|
|
246
246
|
}
|
|
247
247
|
});
|
|
248
|
-
//# sourceMappingURL=messages-
|
|
248
|
+
//# sourceMappingURL=messages-CvFzyCty.cjs.map
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { C as getEnvString, S as getEnvInt, o as logger_default, x as getEnvFloat } from "./logger-Bzi5o47S.js";
|
|
3
|
-
import {
|
|
4
|
-
import { o as getCache, s as isCacheEnabled } from "./cache-
|
|
5
|
-
import { g as maybeLoadToolsFromExternalFile, h as maybeLoadResponseFormatFromExternalFile } from "./util-
|
|
6
|
-
import { n as withGenAISpan } from "./genaiTracer-
|
|
7
|
-
import { i as normalizeFinishReason, n as MCPClient } from "./chat-
|
|
8
|
-
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-
|
|
9
|
-
import { n as transformMCPToolsToAnthropic } from "./transform-
|
|
10
|
-
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-
|
|
3
|
+
import { b as transformTools, y as transformToolChoice } from "./fetch-DKQm7iwv.js";
|
|
4
|
+
import { o as getCache, s as isCacheEnabled } from "./cache-BTdTl56j.js";
|
|
5
|
+
import { g as maybeLoadToolsFromExternalFile, h as maybeLoadResponseFormatFromExternalFile } from "./util-UxC1pDDt.js";
|
|
6
|
+
import { n as withGenAISpan } from "./genaiTracer-foKtbVa2.js";
|
|
7
|
+
import { i as normalizeFinishReason, n as MCPClient } from "./chat-BIj6_WPH.js";
|
|
8
|
+
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-DoinwgKF.js";
|
|
9
|
+
import { n as transformMCPToolsToAnthropic } from "./transform-DWBbTh_M.js";
|
|
10
|
+
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-BR59kcWy.js";
|
|
11
11
|
import Anthropic, { APIError } from "@anthropic-ai/sdk";
|
|
12
12
|
|
|
13
13
|
//#region src/providers/anthropic/generic.ts
|
|
@@ -138,7 +138,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
138
138
|
...allTools.length > 0 ? { tools: allTools } : {},
|
|
139
139
|
...config.tool_choice ? { tool_choice: transformToolChoice(config.tool_choice, "anthropic") } : {},
|
|
140
140
|
...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
|
|
141
|
-
...processedOutputFormat ? {
|
|
141
|
+
...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
|
|
142
142
|
...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
|
|
143
143
|
};
|
|
144
144
|
logger_default.debug("Calling Anthropic Messages API", { params });
|
|
@@ -234,4 +234,4 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
234
234
|
|
|
235
235
|
//#endregion
|
|
236
236
|
export { AnthropicGenericProvider as n, AnthropicMessagesProvider as t };
|
|
237
|
-
//# sourceMappingURL=messages-
|
|
237
|
+
//# sourceMappingURL=messages-DT1yyWfS.js.map
|