promptfoo 0.120.21 → 0.120.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-ChupNNB3.js → ListApp-55OAHww3.js} +1 -1
- package/dist/src/{accounts-CjU7QSnv.js → accounts-BMHY0nF5.js} +5 -5
- package/dist/src/{accounts-DYMHC4iY.js → accounts-CYqHD7X1.js} +4 -4
- package/dist/src/{accounts-uWRxEv-D.cjs → accounts-TGrHBZU9.cjs} +4 -4
- package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BcSgvBPs.js} +2 -2
- package/dist/src/{agents-BJxm0Kcs.cjs → agents-BW4zv9Ig.cjs} +3 -3
- package/dist/src/{agents-DABC7ROH.js → agents-BbG-lpzK.js} +6 -6
- package/dist/src/{agents-DGQULdoA.js → agents-CUB87CA8.js} +5 -5
- package/dist/src/{agents-CJ4BY_QU.js → agents-ClpNGuau.js} +7 -7
- package/dist/src/{agents-CzCD5974.cjs → agents-d76o9WLl.cjs} +6 -6
- package/dist/src/{agents-BNSS1kSZ.js → agents-v7_Jh18K.js} +3 -3
- package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-BBN3jHiV.js} +11 -11
- package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-DWjl0adX.cjs} +11 -11
- package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-E-FZoqH0.js} +14 -14
- package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
- package/dist/src/app/assets/{index-BVE859O5.js → index-DoWPf01I.js} +71 -71
- package/dist/src/app/index.html +2 -2
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-BT9aSeWg.js → audio-B0lJd7ce.js} +4 -4
- package/dist/src/{audio-CSrI_LFf.js → audio-CwbC4E6I.js} +3 -3
- package/dist/src/{audio-DSV364HB.cjs → audio-TfpCt0uC.cjs} +3 -3
- package/dist/src/{base-NFXflrMy.js → base-BHLggB84.js} +2 -2
- package/dist/src/{base-CRrQ15eu.cjs → base-B_pJcCDc.cjs} +2 -2
- package/dist/src/{base-d8hHSpEx.js → base-DRgXuujd.js} +2 -2
- package/dist/src/{blobs-Dznj-8xW.js → blobs-BHstP0Ep.js} +2 -2
- package/dist/src/{blobs-D-Neij7m.cjs → blobs-BPAqMoU_.cjs} +2 -2
- package/dist/src/{blobs-DC2sBLv8.js → blobs-Bey6TGVi.js} +2 -2
- package/dist/src/{cache-xb4oZe27.js → cache-BTdTl56j.js} +2 -2
- package/dist/src/{cache-DYJRe0Jt.cjs → cache-BnrNlhzE.cjs} +2 -2
- package/dist/src/{cache-aKR8pp5y.js → cache-C5LeIjZl.js} +2 -2
- package/dist/src/{cache-CMvgAnSi.js → cache-CsW_9RgN.js} +2 -2
- package/dist/src/cache-Da_mTEGH.cjs +6 -0
- package/dist/src/{cache-scuP1IRq.js → cache-DfI0xG_n.js} +3 -3
- package/dist/src/{chat-BbgGuvwu.js → chat-BIj6_WPH.js} +10 -10
- package/dist/src/{chat-CfJU9hHo.cjs → chat-B_-w6xgb.cjs} +10 -10
- package/dist/src/{chat-Du_GyqZe.js → chat-BktepvuQ.js} +13 -13
- package/dist/src/{chat-DyjTngG2.js → chat-CiakYWhZ.js} +10 -10
- package/dist/src/{chat-BH8Fxyap.js → chat-D5l2blZK.js} +8 -8
- package/dist/src/{chat-Biz16BQv.cjs → chat-QHQ-K-Fi.cjs} +8 -8
- package/dist/src/{chatkit-BpumZ2Dz.js → chatkit-DlOZ-eNb.js} +3 -3
- package/dist/src/{chatkit-D9AWcI4P.js → chatkit-IWIuhxFE.js} +3 -3
- package/dist/src/{chatkit-Wzo9zOq7.cjs → chatkit-yeVtURUi.cjs} +3 -3
- package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-BZ0GjlPY.js} +11 -11
- package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-D3AZJyZ7.cjs} +8 -8
- package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-wvsRgO29.js} +8 -8
- package/dist/src/{cloud-UwfmA0Ne.js → cloud-BKfo1A2t.js} +2 -2
- package/dist/src/{cloud-1cbcFXE9.js → cloud-DPowlnHl.js} +1 -1
- package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CzAtX1Wg.cjs} +11 -11
- package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-D-IEjw5k.js} +11 -11
- package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DxHmIJbZ.js} +14 -14
- package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-CIfquW2o.cjs} +12 -12
- package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-DkjC8LZF.js} +12 -12
- package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-DwJVgNWf.js} +15 -15
- package/dist/src/{codex-sdk-CKEQpQax.js → codex-sdk-BC5qbCjh.js} +3 -3
- package/dist/src/{codex-sdk-CDN8f3P9.js → codex-sdk-CBpm53B8.js} +2 -2
- package/dist/src/{codex-sdk-0QyM_fUf.cjs → codex-sdk-Cf5_CQch.cjs} +2 -2
- package/dist/src/{cometapi-CUq-lrPh.js → cometapi-6jLBOp4e.js} +15 -15
- package/dist/src/{cometapi-Bu_LOIac.js → cometapi-8HSWR5q_.js} +12 -12
- package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-iJEkEgpT.cjs} +12 -12
- package/dist/src/{completion-Dw-9jqz1.cjs → completion-C03fIkcR.cjs} +5 -5
- package/dist/src/{completion-H4ncr2bK.js → completion-CrKHW9hy.js} +5 -5
- package/dist/src/{completion-BTZ42lqL.js → completion-LTi0SiAy.js} +5 -5
- package/dist/src/{docker-kvFgJqkg.js → docker-Bf2VQOX5.js} +14 -14
- package/dist/src/{docker-NrVUpeWw.cjs → docker-BxmP6sGO.cjs} +11 -11
- package/dist/src/{docker-B8zxQ12-.js → docker-DHazKolQ.js} +11 -11
- package/dist/src/{esm-Bwuh5HXW.js → esm-Lh6czAFT.js} +1 -1
- package/dist/src/{eval-TAOD_pvN.js → eval-4sIXLVZZ.js} +31 -19
- package/dist/src/eval-s_E4q6a8.js +16 -0
- package/dist/src/{evalResult-DnYa1RLp.js → evalResult-CRyeHpst.js} +6 -6
- package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CSucevrC.cjs} +7 -7
- package/dist/src/evalResult-DBhbtqZn.js +11 -0
- package/dist/src/evalResult-DKn_nY-a.js +13 -0
- package/dist/src/{evalResult-CnAP64o9.js → evalResult-DNXvhaKp.js} +6 -6
- package/dist/src/evalResult-DrPpo-zg.cjs +11 -0
- package/dist/src/{evaluator-q2_--POi.js → evaluator-BDY-i4ky.js} +29 -30
- package/dist/src/evaluator-CiDt-6H3.js +37 -0
- package/dist/src/{extractor-D_1s-Thr.js → extractor-HnP0pHfO.js} +81 -4
- package/dist/src/{extractor-B9uQA-6-.js → extractor-WvMabjRY.js} +81 -4
- package/dist/src/{extractor-DjxI7POm.cjs → extractor-wSeR47-c.cjs} +86 -3
- package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-8fmzVUH3.js} +2 -2
- package/dist/src/{fetch-BIPrzD39.js → fetch-BMKSarK_.js} +11 -5
- package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-BgYKpAJ7.cjs} +25 -7
- package/dist/src/{fetch-Cbz8ESXD.js → fetch-CE0K47nJ.js} +1 -1
- package/dist/src/{fetch-TcCC0nEJ.js → fetch-DKQm7iwv.js} +11 -5
- package/dist/src/fetch-GeRV0LDI.cjs +4 -0
- package/dist/src/{fileExtensions-BpuMmaFL.js → fileExtensions-CzcPkiKZ.js} +1 -1
- package/dist/src/{formatDuration-CGa0ZDQ8.js → formatDuration-CCiZ-wBY.js} +1 -1
- package/dist/src/{genaiTracer-zK0Mtd_D.cjs → genaiTracer-BQ8AmajD.cjs} +1 -1
- package/dist/src/{genaiTracer-CNawybn-.js → genaiTracer-Ce19n68P.js} +1 -1
- package/dist/src/{genaiTracer-CqNnnXrE.js → genaiTracer-foKtbVa2.js} +1 -1
- package/dist/src/{graders-DycNkUZ_.js → graders-CEF_ce6x.js} +32 -26
- package/dist/src/graders-CzoftmVg.js +35 -0
- package/dist/src/{graders-C9Ni3pNQ.cjs → graders-DOFEkq6l.cjs} +34 -28
- package/dist/src/graders-DuTWgLQF.cjs +32 -0
- package/dist/src/graders-ZhEpUtuK.js +33 -0
- package/dist/src/{graders-CG50NgEB.js → graders-jFPLMexK.js} +36 -30
- package/dist/src/{image-CxD58J63.js → image-0RV3qqTK.js} +6 -6
- package/dist/src/{image-CHGpqpC6.js → image-9Thqn5aj.js} +3 -3
- package/dist/src/{image-BmMQZgoF.js → image-BJJW-zOa.js} +4 -4
- package/dist/src/{image-CPL1masV.cjs → image-BVyYHTQG.cjs} +3 -3
- package/dist/src/{image-Dg-oq_2F.js → image-Dd0EQRYU.js} +6 -6
- package/dist/src/{image-CpQ-fJt3.cjs → image-VWd0wZmz.cjs} +6 -6
- package/dist/src/index.cjs +177 -105
- package/dist/src/index.js +173 -101
- package/dist/src/{interactiveCheck-BZbMvE6i.js → interactiveCheck-CjK2R4rB.js} +1 -1
- package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BKA28wUv.cjs} +7 -7
- package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-D0nOGqRF.js} +7 -7
- package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase-aBIYgE4B.js} +8 -8
- package/dist/src/{litellm-Dcc3mt1B.js → litellm-BiXCNyII.js} +11 -11
- package/dist/src/{litellm-CR8i8YRn.js → litellm-QSopB7EE.js} +14 -14
- package/dist/src/{litellm-QthJT0fA.cjs → litellm-vZYPcEUQ.cjs} +11 -11
- package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-CRKJx6ue.cjs} +10 -11
- package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DmnPkNon.js} +10 -11
- package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-Dvnqlb9n.js} +9 -10
- package/dist/src/main.js +166 -106
- package/dist/src/{messages-DkV1dizA.js → messages-BaxI3gI5.js} +10 -10
- package/dist/src/{messages-D4Cu-Nxe.cjs → messages-CvFzyCty.cjs} +10 -10
- package/dist/src/{messages-9ik4NuJ_.js → messages-DT1yyWfS.js} +10 -10
- package/dist/src/{meteor-DLCdreag.js → meteor-0cLf9TIn.js} +1 -1
- package/dist/src/{meteor-Dj8cTkU_.js → meteor-CBciquOS.js} +1 -1
- package/dist/src/{meteor-odmwVbyG.cjs → meteor-DRuEfzuQ.cjs} +1 -1
- package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-CnN4D-bY.js} +10 -11
- package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-XAkfcmhf.cjs} +10 -11
- package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-YEEqZu4z.js} +9 -10
- package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-Ci-VR-K3.js} +6 -6
- package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-Cjfvm57T.js} +7 -7
- package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-brE_2yrZ.cjs} +6 -6
- package/dist/src/{openai-eZz8QsxY.cjs → openai-BjpdxIOG.cjs} +1 -1
- package/dist/src/{openai-CQozD_Ef.js → openai-CitF-gEN.js} +1 -1
- package/dist/src/{openai-D_5h8sBa.js → openai-Cv9pEKxp.js} +1 -1
- package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BkXJb52a.js} +4 -4
- package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-CrlvqSy2.cjs} +4 -4
- package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-DRMuqbLx.js} +6 -6
- package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-BCKiiwct.js} +5 -5
- package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-BTVVyjnA.js} +4 -4
- package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-DXqKPf2U.cjs} +4 -4
- package/dist/src/{providerRegistry-5UB8YVkk.js → providerRegistry-BdKWcUa8.js} +1 -1
- package/dist/src/{providerRegistry-gU7gOAJI.js → providerRegistry-D32Lt9vp.js} +1 -1
- package/dist/src/{providerRegistry-DFXamjVO.cjs → providerRegistry-HGQd2MF6.cjs} +1 -1
- package/dist/src/{providers-Bz0U4NGd.cjs → providers-BF4aullZ.cjs} +114 -132
- package/dist/src/providers-BmFckJq8.cjs +31 -0
- package/dist/src/{providers-CHciOV4f.js → providers-BtbT8bDb.js} +77 -95
- package/dist/src/{providers-D8qKkVu_.js → providers-CYssBaa_.js} +82 -100
- package/dist/src/providers-DaDZ1iQw.js +31 -0
- package/dist/src/providers-Ds-h-S1b.js +33 -0
- package/dist/src/{pythonUtils-mprm8p4h.js → pythonUtils-B9JA-gsC.js} +2 -2
- package/dist/src/{quiverai-DnlaJG8H.js → quiverai-BiDa0kiF.js} +10 -10
- package/dist/src/{quiverai-3rtm1glP.js → quiverai-DTtLknat.js} +13 -13
- package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-z8oPMSUZ.cjs} +10 -10
- package/dist/src/{render-D5t2ogwP.js → render-D2710HbA.js} +2 -2
- package/dist/src/rubyUtils-BLd6EE1u.cjs +5 -0
- package/dist/src/{rubyUtils-D1DBBgSr.js → rubyUtils-CVELPvUH.js} +2 -2
- package/dist/src/{rubyUtils-Cus5EGPX.js → rubyUtils-Dn6MGcsk.js} +2 -2
- package/dist/src/{rubyUtils-CI4Y5f08.js → rubyUtils-LBsk3zIm.js} +1 -1
- package/dist/src/{rubyUtils-CWkecmDB.js → rubyUtils-_t9Gmf7U.js} +1 -1
- package/dist/src/{rubyUtils-DCci_Y0f.cjs → rubyUtils-rnCVDgH-.cjs} +1 -1
- package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-D5f6K309.js} +11 -11
- package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DwJOnUlQ.cjs} +11 -11
- package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-dygdRScC.js} +17 -17
- package/dist/src/{scanner-CPosIITM.js → scanner-D__U7qyj.js} +9 -9
- package/dist/src/server/index.js +265 -129
- package/dist/src/server-B0dYFkhY.cjs +6 -0
- package/dist/src/{server-ByLwfSDf.cjs → server-BRPZ0ZqY.cjs} +3 -3
- package/dist/src/{server-Bb83s34G.js → server-BZJ_IEiK.js} +4 -4
- package/dist/src/{server-BKdsd70B.js → server-Bo3rtriL.js} +3 -3
- package/dist/src/server-DZYvmL1z.js +6 -0
- package/dist/src/server-DlTe_Yz5.js +8 -0
- package/dist/src/{signal-B8hvvb8w.js → signal-C9qT0qiq.js} +2 -2
- package/dist/src/{slack-BblBpsIz.cjs → slack-D1F9Y7CH.cjs} +1 -1
- package/dist/src/{slack-D-bNCtTa.js → slack-DkAF58Tr.js} +1 -1
- package/dist/src/{slack-Ba3gil_6.js → slack-Ed1yyt_j.js} +1 -1
- package/dist/src/{store-W4yZqv_e.js → store-BpNCvdGu.js} +2 -2
- package/dist/src/store-BvASVHpa.cjs +6 -0
- package/dist/src/{store-l9-mIRC-.cjs → store-BwsF10Db.cjs} +2 -2
- package/dist/src/store-CDxVyNyD.js +6 -0
- package/dist/src/store-CXxKOQ8V.js +7 -0
- package/dist/src/{store-CaNRmI5T.js → store-Dm8qSYzq.js} +2 -2
- package/dist/src/{tables-DZWWveaJ.js → tables-B0wvxP8N.js} +2 -2
- package/dist/src/{tables-BCcp2r48.js → tables-CMOPSC5_.js} +2 -2
- package/dist/src/{tables-D_VmN-ui.cjs → tables-D_nKFrHt.cjs} +2 -2
- package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-B9G9VzVT.cjs} +3 -3
- package/dist/src/telemetry-CGO4vCCz.js +8 -0
- package/dist/src/telemetry-CXarb10F.cjs +6 -0
- package/dist/src/telemetry-DKtQFHz8.js +6 -0
- package/dist/src/{telemetry-CmGyDaxF.js → telemetry-DMGVtU7z.js} +3 -3
- package/dist/src/{telemetry-CB8ltkOD.js → telemetry-oV0dEJ2S.js} +3 -3
- package/dist/src/{text-C1cXk9ij.js → text-DuYSUYPB.js} +1 -1
- package/dist/src/{text-Dm78AVGG.js → text-Dx0GJOCN.js} +1 -1
- package/dist/src/{text-DF2hMKdg.cjs → text-PYISqVm1.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-DFp2VFtG.cjs → tokenUsageUtils-BtZd3sP7.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-C9fIWtvn.js → tokenUsageUtils-DoinwgKF.js} +1 -1
- package/dist/src/{tokenUsageUtils-BQVp2uKF.js → tokenUsageUtils-cFdLMERB.js} +1 -1
- package/dist/src/{transcription-BExRps8_.js → transcription-C5sJed5S.js} +7 -7
- package/dist/src/{transcription-B4GbFkhb.cjs → transcription-DEiYnjqG.cjs} +7 -7
- package/dist/src/{transcription-CpuRKcIu.js → transcription-Dcv2GYgI.js} +10 -10
- package/dist/src/{transform-BzepVlkX.js → transform-BFPYuBaW.js} +4 -4
- package/dist/src/{transform-BwedQA8W.js → transform-Cpk3BVib.js} +3 -3
- package/dist/src/{transform-B-AYIQmM.js → transform-DWBbTh_M.js} +3 -3
- package/dist/src/{transform-1g7fEAge.js → transform-DZI2t8-9.js} +3 -3
- package/dist/src/{transform-B5HUD86U.cjs → transform-DeVdMzaA.cjs} +3 -3
- package/dist/src/{transformersAvailability-B5gZ2aG-.cjs → transformersAvailability-BaoWHpu1.cjs} +1 -1
- package/dist/src/{transformersAvailability-BHP0aLcv.js → transformersAvailability-DtpwoeFC.js} +1 -1
- package/dist/src/{transformersAvailability-CA173HU7.js → transformersAvailability-O2YaCv9Z.js} +1 -1
- package/dist/src/{types-n3ttAnOa.js → types-BZz0C6rq.js} +3 -18
- package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
- package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
- package/dist/src/{util-Cd2vI7ii.js → util-BR59kcWy.js} +3 -3
- package/dist/src/{util-BnF-xRPi.js → util-BjTDLTac.js} +3 -3
- package/dist/src/{util-CwsTKPXi.js → util-CSvDts2m.js} +3 -3
- package/dist/src/{util-KE6uFWHG.cjs → util-CfLcBZEA.cjs} +3 -3
- package/dist/src/{util-BXjq8vOq.js → util-Czrn_1en.js} +3 -3
- package/dist/src/{util-BT0p5c9X.cjs → util-Dxwavk84.cjs} +3 -3
- package/dist/src/{util-DX58t30T.js → util-GpiGNWSj.js} +17 -5
- package/dist/src/{util-B-Yo0qzc.cjs → util-RfCZM4TD.cjs} +22 -4
- package/dist/src/{util-CyzW5ayt.js → util-UxC1pDDt.js} +20 -8
- package/dist/src/{utils-HdRSruG0.js → utils-2CdNTp-a.js} +2 -2
- package/dist/src/{utils-BSpcJB4B.cjs → utils-6lWwNARy.cjs} +2 -2
- package/dist/src/{utils-BWOFfOel.js → utils-BMIgqY6C.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +31 -29
- package/dist/src/app/assets/index-CFq6nz43.css +0 -1
- package/dist/src/cache-BZYapLG-.cjs +0 -6
- package/dist/src/eval-g5SH3NBa.js +0 -16
- package/dist/src/evalResult-Bt2fmj0V.js +0 -9
- package/dist/src/evalResult-DRNH7Jui.js +0 -10
- package/dist/src/evalResult-DqLWyktN.cjs +0 -9
- package/dist/src/evaluator-DIenqH86.js +0 -37
- package/dist/src/fetch-gmneHrJc.cjs +0 -4
- package/dist/src/graders-BHcgLz2P.js +0 -35
- package/dist/src/graders-BjBhwJOb.cjs +0 -32
- package/dist/src/graders-DEjGX1uY.js +0 -33
- package/dist/src/providers-BBGwMMzj.js +0 -33
- package/dist/src/providers-BR5P_qep.cjs +0 -31
- package/dist/src/providers-CDib9hVp.js +0 -31
- package/dist/src/rubyUtils-DZo4ik6p.cjs +0 -5
- package/dist/src/server-B5aIhzYk.cjs +0 -6
- package/dist/src/server-DJCw85s0.js +0 -8
- package/dist/src/server-DKj0QIm9.js +0 -6
- package/dist/src/store-Bo9FEBPO.js +0 -7
- package/dist/src/store-CqHttBql.js +0 -6
- package/dist/src/store-fsxOJl5O.cjs +0 -6
- package/dist/src/telemetry-B2uJOvMA.js +0 -8
- package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
- package/dist/src/telemetry-rP_n_bnZ.js +0 -6
- package/dist/src/time-CRRzn6vA.js +0 -9
- package/dist/src/time-CbtsO5_a.cjs +0 -21
- package/dist/src/time-CoOwN3kQ.js +0 -10
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const require_logger = require('./logger-CMp-NS-e.cjs');
|
|
2
|
-
const require_fetch = require('./fetch-
|
|
3
|
-
const require_cache = require('./cache-
|
|
4
|
-
const require_openai = require('./openai-
|
|
5
|
-
const require_util = require('./util-
|
|
6
|
-
const require_text = require('./text-
|
|
2
|
+
const require_fetch = require('./fetch-BgYKpAJ7.cjs');
|
|
3
|
+
const require_cache = require('./cache-BnrNlhzE.cjs');
|
|
4
|
+
const require_openai = require('./openai-BjpdxIOG.cjs');
|
|
5
|
+
const require_util = require('./util-Dxwavk84.cjs');
|
|
6
|
+
const require_text = require('./text-PYISqVm1.cjs');
|
|
7
7
|
|
|
8
8
|
//#region src/providers/openai/image.ts
|
|
9
9
|
const DALLE2_VALID_SIZES = [
|
|
@@ -232,4 +232,4 @@ Object.defineProperty(exports, 'formatOutput', {
|
|
|
232
232
|
return formatOutput;
|
|
233
233
|
}
|
|
234
234
|
});
|
|
235
|
-
//# sourceMappingURL=image-
|
|
235
|
+
//# sourceMappingURL=image-VWd0wZmz.cjs.map
|
package/dist/src/index.cjs
CHANGED
|
@@ -5,36 +5,35 @@ const require_esm = require('./esm-gWVPXn3r.cjs');
|
|
|
5
5
|
const require_pythonUtils = require('./pythonUtils-rOCm9w_5.cjs');
|
|
6
6
|
const require_fileExtensions = require('./fileExtensions-DkJYkWUy.cjs');
|
|
7
7
|
const require_transform = require('./transform-DshYLyBq.cjs');
|
|
8
|
-
const require_graders = require('./graders-
|
|
9
|
-
const require_types = require('./types-
|
|
10
|
-
const require_util = require('./util-
|
|
11
|
-
const require_fetch = require('./fetch-
|
|
12
|
-
const
|
|
13
|
-
const
|
|
14
|
-
const
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
require('./
|
|
18
|
-
const
|
|
19
|
-
const
|
|
20
|
-
|
|
21
|
-
require('./
|
|
22
|
-
require('./
|
|
23
|
-
require('./
|
|
24
|
-
|
|
25
|
-
require('./
|
|
26
|
-
const
|
|
27
|
-
const
|
|
28
|
-
const
|
|
29
|
-
const
|
|
30
|
-
const
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
require('./
|
|
34
|
-
require('./
|
|
35
|
-
const
|
|
36
|
-
const
|
|
37
|
-
const require_evalResult = require('./evalResult-CFSvp3Zh.cjs');
|
|
8
|
+
const require_graders = require('./graders-DOFEkq6l.cjs');
|
|
9
|
+
const require_types = require('./types-Cixl-wRO.cjs');
|
|
10
|
+
const require_util = require('./util-RfCZM4TD.cjs');
|
|
11
|
+
const require_fetch = require('./fetch-BgYKpAJ7.cjs');
|
|
12
|
+
const require_cache = require('./cache-BnrNlhzE.cjs');
|
|
13
|
+
const require_providers = require('./providers-BF4aullZ.cjs');
|
|
14
|
+
const require_utils = require('./utils-6lWwNARy.cjs');
|
|
15
|
+
const require_extractor = require('./extractor-wSeR47-c.cjs');
|
|
16
|
+
require('./genaiTracer-BQ8AmajD.cjs');
|
|
17
|
+
const require_chat = require('./chat-QHQ-K-Fi.cjs');
|
|
18
|
+
const require_tokenUsageUtils = require('./tokenUsageUtils-BtZd3sP7.cjs');
|
|
19
|
+
const require_transform$1 = require('./transform-DeVdMzaA.cjs');
|
|
20
|
+
require('./messages-CvFzyCty.cjs');
|
|
21
|
+
require('./util-CfLcBZEA.cjs');
|
|
22
|
+
require('./openai-BjpdxIOG.cjs');
|
|
23
|
+
const require_util$2 = require('./util-Dxwavk84.cjs');
|
|
24
|
+
require('./completion-C03fIkcR.cjs');
|
|
25
|
+
const require_accounts = require('./accounts-TGrHBZU9.cjs');
|
|
26
|
+
const require_server = require('./server-BRPZ0ZqY.cjs');
|
|
27
|
+
const require_blobs = require('./blobs-BPAqMoU_.cjs');
|
|
28
|
+
const require_tables = require('./tables-D_nKFrHt.cjs');
|
|
29
|
+
const require_telemetry = require('./telemetry-B9G9VzVT.cjs');
|
|
30
|
+
const require_text = require('./text-PYISqVm1.cjs');
|
|
31
|
+
const require_store = require('./store-BwsF10Db.cjs');
|
|
32
|
+
require('./base-B_pJcCDc.cjs');
|
|
33
|
+
require('./image-VWd0wZmz.cjs');
|
|
34
|
+
const require_providerRegistry = require('./providerRegistry-HGQd2MF6.cjs');
|
|
35
|
+
const require_rubyUtils = require('./rubyUtils-rnCVDgH-.cjs');
|
|
36
|
+
const require_evalResult = require('./evalResult-CSucevrC.cjs');
|
|
38
37
|
let fs = require("fs");
|
|
39
38
|
fs = require_logger.__toESM(fs);
|
|
40
39
|
let path = require("path");
|
|
@@ -307,7 +306,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
307
306
|
require_telemetry.telemetry_default.record("feature_used", { feature: "tracing" });
|
|
308
307
|
try {
|
|
309
308
|
require_logger.logger_default.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
310
|
-
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
309
|
+
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-DXqKPf2U.cjs"));
|
|
311
310
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
312
311
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
313
312
|
require_logger.logger_default.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
|
|
@@ -330,7 +329,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
330
329
|
async function stopOtlpReceiverIfNeeded() {
|
|
331
330
|
if (otlpReceiverStarted) try {
|
|
332
331
|
require_logger.logger_default.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
333
|
-
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
332
|
+
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-DXqKPf2U.cjs"));
|
|
334
333
|
await stopOTLPReceiver();
|
|
335
334
|
otlpReceiverStarted = false;
|
|
336
335
|
require_logger.logger_default.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -365,7 +364,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
365
364
|
}
|
|
366
365
|
if (!tracingEnabled) return null;
|
|
367
366
|
require_logger.logger_default.debug("[EvaluatorTracing] Importing trace store");
|
|
368
|
-
const { getTraceStore } = await Promise.resolve().then(() => require("./store-
|
|
367
|
+
const { getTraceStore } = await Promise.resolve().then(() => require("./store-BvASVHpa.cjs"));
|
|
369
368
|
const traceStore = getTraceStore();
|
|
370
369
|
const traceId = generateTraceId();
|
|
371
370
|
const spanId = generateSpanId();
|
|
@@ -2776,7 +2775,7 @@ const ASSERTION_HANDLERS = {
|
|
|
2776
2775
|
"llm-rubric": handleLlmRubric,
|
|
2777
2776
|
meteor: async (params) => {
|
|
2778
2777
|
try {
|
|
2779
|
-
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-
|
|
2778
|
+
const { handleMeteorAssertion } = await Promise.resolve().then(() => require("./meteor-DRuEfzuQ.cjs"));
|
|
2780
2779
|
return handleMeteorAssertion(params);
|
|
2781
2780
|
} catch (error) {
|
|
2782
2781
|
if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
|
|
@@ -2906,7 +2905,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
2906
2905
|
};
|
|
2907
2906
|
}
|
|
2908
2907
|
else if (filePath.endsWith(".rb")) try {
|
|
2909
|
-
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-
|
|
2908
|
+
const { runRuby } = await Promise.resolve().then(() => require("./rubyUtils-BLd6EE1u.cjs"));
|
|
2910
2909
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
2911
2910
|
require_logger.logger_default.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
2912
2911
|
} catch (error) {
|
|
@@ -3810,7 +3809,7 @@ async function runEval({ provider, prompt, test, testSuite, delay, nunjucksFilte
|
|
|
3810
3809
|
require_logger.logger_default.debug(`Evaluator checking cached flag: response.cached = ${Boolean(response.cached)}, provider.delay = ${provider.delay}`);
|
|
3811
3810
|
if (!response.cached && provider.delay > 0) {
|
|
3812
3811
|
require_logger.logger_default.debug(`Sleeping for ${provider.delay}ms`);
|
|
3813
|
-
await
|
|
3812
|
+
await require_fetch.sleep(provider.delay);
|
|
3814
3813
|
} else if (response.cached) require_logger.logger_default.debug(`Skipping delay because response is cached`);
|
|
3815
3814
|
const ret = {
|
|
3816
3815
|
...setup,
|
|
@@ -4266,7 +4265,7 @@ var Evaluator = class {
|
|
|
4266
4265
|
const defaultProvider = testSuite.defaultTest.provider;
|
|
4267
4266
|
if (require_types.isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
|
|
4268
4267
|
else if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
4269
|
-
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-
|
|
4268
|
+
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BmFckJq8.cjs"));
|
|
4270
4269
|
testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
4271
4270
|
} else testCase.provider = defaultProvider;
|
|
4272
4271
|
}
|
|
@@ -4341,7 +4340,7 @@ var Evaluator = class {
|
|
|
4341
4340
|
if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
|
|
4342
4341
|
}
|
|
4343
4342
|
if (require_logger.cliState_default.resume && this.evalRecord.persisted) try {
|
|
4344
|
-
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-
|
|
4343
|
+
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-DrPpo-zg.cjs"));
|
|
4345
4344
|
const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: require_logger.cliState_default.retryMode });
|
|
4346
4345
|
const originalCount = runEvalOptions.length;
|
|
4347
4346
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -4857,7 +4856,7 @@ var Evaluator = class {
|
|
|
4857
4856
|
}
|
|
4858
4857
|
if (isOtlpReceiverStarted()) {
|
|
4859
4858
|
require_logger.logger_default.debug("[Evaluator] Waiting for span exports to complete...");
|
|
4860
|
-
await
|
|
4859
|
+
await require_fetch.sleep(3e3);
|
|
4861
4860
|
}
|
|
4862
4861
|
await stopOtlpReceiverIfNeeded();
|
|
4863
4862
|
await require_providerRegistry.providerRegistry.shutdownAll();
|
|
@@ -4984,7 +4983,11 @@ async function runDbMigrations() {
|
|
|
4984
4983
|
//#region src/redteam/sharedFrontend.ts
|
|
4985
4984
|
function getRiskCategorySeverityMap(plugins) {
|
|
4986
4985
|
const overrides = plugins?.reduce((acc, plugin) => {
|
|
4987
|
-
if (plugin.severity)
|
|
4986
|
+
if (plugin.severity) {
|
|
4987
|
+
acc[plugin.id] = plugin.severity;
|
|
4988
|
+
const policyId = plugin.config?.policy?.id;
|
|
4989
|
+
if (plugin.id === "policy" && policyId) acc[policyId] = plugin.severity;
|
|
4990
|
+
}
|
|
4988
4991
|
return acc;
|
|
4989
4992
|
}, {}) || {};
|
|
4990
4993
|
return {
|
|
@@ -5273,7 +5276,7 @@ function convertResultsToTable(eval_) {
|
|
|
5273
5276
|
description: result.description || void 0,
|
|
5274
5277
|
outputs: [],
|
|
5275
5278
|
vars: result.vars ? Object.values(varsForHeader).map((varName) => {
|
|
5276
|
-
const varValue = result.vars?.[varName]
|
|
5279
|
+
const varValue = result.vars?.[varName] ?? "";
|
|
5277
5280
|
if (typeof varValue === "string") return varValue;
|
|
5278
5281
|
return JSON.stringify(varValue, null, 2);
|
|
5279
5282
|
}).flat() : [],
|
|
@@ -5308,7 +5311,11 @@ function convertResultsToTable(eval_) {
|
|
|
5308
5311
|
varValuesForRow.set(result.testIdx, result.vars);
|
|
5309
5312
|
rowMap[result.testIdx] = row;
|
|
5310
5313
|
let resultText;
|
|
5311
|
-
const
|
|
5314
|
+
const rawOutput = result.response?.output;
|
|
5315
|
+
let outputTextDisplay;
|
|
5316
|
+
if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
|
|
5317
|
+
else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
|
|
5318
|
+
else outputTextDisplay = String(rawOutput);
|
|
5312
5319
|
if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
|
|
5313
5320
|
else resultText = `${outputTextDisplay}`;
|
|
5314
5321
|
else if (result.error) resultText = `${result.error}`;
|
|
@@ -5355,7 +5362,7 @@ function convertResultsToTable(eval_) {
|
|
|
5355
5362
|
const rows = Object.values(rowMap);
|
|
5356
5363
|
const sortedVars = [...varsForHeader].sort();
|
|
5357
5364
|
for (const row of rows) row.vars = sortedVars.map((varName) => {
|
|
5358
|
-
const varValue = varValuesForRow.get(row.testIdx)?.[varName]
|
|
5365
|
+
const varValue = varValuesForRow.get(row.testIdx)?.[varName] ?? "";
|
|
5359
5366
|
if (typeof varValue === "string") return varValue;
|
|
5360
5367
|
return JSON.stringify(varValue, null, 2);
|
|
5361
5368
|
});
|
|
@@ -5372,7 +5379,11 @@ function convertResultsToTable(eval_) {
|
|
|
5372
5379
|
//#region src/util/exportToFile/index.ts
|
|
5373
5380
|
function convertEvalResultToTableCell(result) {
|
|
5374
5381
|
let resultText;
|
|
5375
|
-
const
|
|
5382
|
+
const rawOutput = result.response?.output;
|
|
5383
|
+
let outputTextDisplay;
|
|
5384
|
+
if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
|
|
5385
|
+
else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
|
|
5386
|
+
else outputTextDisplay = String(rawOutput);
|
|
5376
5387
|
if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
|
|
5377
5388
|
else resultText = `${outputTextDisplay}`;
|
|
5378
5389
|
else if (result.error) resultText = `${result.error}`;
|
|
@@ -5418,11 +5429,12 @@ function convertTestResultsToTableRow(results, varsForHeader) {
|
|
|
5418
5429
|
outputs: [],
|
|
5419
5430
|
vars: Object.values(varsForHeader).map((varName) => {
|
|
5420
5431
|
if (varName === "sessionId") {
|
|
5421
|
-
const
|
|
5432
|
+
const sessionId = results[0].testCase.vars?.sessionId;
|
|
5433
|
+
const varValue = sessionId == null || sessionId === "" ? results[0].metadata?.sessionId ?? "" : sessionId;
|
|
5422
5434
|
if (typeof varValue === "string") return varValue;
|
|
5423
5435
|
return JSON.stringify(varValue);
|
|
5424
5436
|
}
|
|
5425
|
-
const varValue = results[0].testCase.vars?.[varName]
|
|
5437
|
+
const varValue = results[0].testCase.vars?.[varName] ?? "";
|
|
5426
5438
|
if (typeof varValue === "string") return varValue;
|
|
5427
5439
|
return JSON.stringify(varValue);
|
|
5428
5440
|
}).flat(),
|
|
@@ -5879,7 +5891,7 @@ var Eval = class Eval {
|
|
|
5879
5891
|
prompts: this.prompts,
|
|
5880
5892
|
description: this.config.description,
|
|
5881
5893
|
author: this.author,
|
|
5882
|
-
updatedAt:
|
|
5894
|
+
updatedAt: require_fetch.getCurrentTimestamp(),
|
|
5883
5895
|
vars: Array.from(this.vars),
|
|
5884
5896
|
runtimeOptions: sanitizeRuntimeOptions(this.runtimeOptions)
|
|
5885
5897
|
};
|
|
@@ -6596,12 +6608,35 @@ async function filterTestsByResults(testSuite, pathOrId, filterFn) {
|
|
|
6596
6608
|
if (filteredResults.length === 0) return [];
|
|
6597
6609
|
const uniqueVarsInResults = new Set(filteredResults.map((r) => JSON.stringify(require_util.filterRuntimeVars(r.vars))));
|
|
6598
6610
|
require_logger.logger_default.debug(`[filterTestsByResults] ${uniqueVarsInResults.size} unique test cases (by vars) in filtered results`);
|
|
6599
|
-
const matchedTests = [
|
|
6611
|
+
const matchedTests = [];
|
|
6612
|
+
for (const test of testSuite.tests) {
|
|
6600
6613
|
const testWithDefaults = mergeDefaultVars(test, testSuite.defaultTest);
|
|
6601
|
-
|
|
6602
|
-
if (
|
|
6603
|
-
|
|
6604
|
-
|
|
6614
|
+
let matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, testWithDefaults) && require_util.extractRuntimeVars(result.vars) !== void 0);
|
|
6615
|
+
if (!matchedResult) matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, testWithDefaults));
|
|
6616
|
+
if (!matchedResult) {
|
|
6617
|
+
if (testSuite.defaultTest && typeof testSuite.defaultTest !== "string" && testSuite.defaultTest.vars && Object.keys(testSuite.defaultTest.vars).length > 0) {
|
|
6618
|
+
matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, test) && require_util.extractRuntimeVars(result.vars) !== void 0);
|
|
6619
|
+
if (!matchedResult) matchedResult = filteredResults.find((result) => require_util.resultIsForTestCase(result, test));
|
|
6620
|
+
}
|
|
6621
|
+
}
|
|
6622
|
+
if (matchedResult) {
|
|
6623
|
+
const runtimeVars = require_util.extractRuntimeVars(matchedResult.vars);
|
|
6624
|
+
if (runtimeVars) {
|
|
6625
|
+
const testWithRuntimeVars = {
|
|
6626
|
+
...test,
|
|
6627
|
+
vars: {
|
|
6628
|
+
...test.vars,
|
|
6629
|
+
...runtimeVars
|
|
6630
|
+
}
|
|
6631
|
+
};
|
|
6632
|
+
require_logger.logger_default.debug("[filterTestsByResults] Restored runtime vars for test", { varKeys: Object.keys(runtimeVars) });
|
|
6633
|
+
matchedTests.push(testWithRuntimeVars);
|
|
6634
|
+
} else {
|
|
6635
|
+
require_logger.logger_default.debug("[filterTestsByResults] Matched test has no runtime vars to restore");
|
|
6636
|
+
matchedTests.push(test);
|
|
6637
|
+
}
|
|
6638
|
+
}
|
|
6639
|
+
}
|
|
6605
6640
|
require_logger.logger_default.debug(`[filterTestsByResults] Matched ${matchedTests.length} tests out of ${testSuite.tests.length} in test suite`);
|
|
6606
6641
|
const extractedTests = [];
|
|
6607
6642
|
const matchedResultKeys = /* @__PURE__ */ new Set();
|
|
@@ -8651,7 +8686,7 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
|
|
|
8651
8686
|
const generatePrompts = async () => {
|
|
8652
8687
|
const result = await unalignedProvider.callApi("");
|
|
8653
8688
|
if (result.output) {
|
|
8654
|
-
if (delayMs > 0) await
|
|
8689
|
+
if (delayMs > 0) await require_fetch.sleep(delayMs);
|
|
8655
8690
|
return result.output;
|
|
8656
8691
|
}
|
|
8657
8692
|
return [];
|
|
@@ -8816,8 +8851,9 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
|
|
|
8816
8851
|
require_logger.logger_default.error(`Error generating test cases for ${key}: ${remoteHealth.message}`);
|
|
8817
8852
|
return [];
|
|
8818
8853
|
}
|
|
8854
|
+
const { graderExamples, ...configForRemote } = config ?? {};
|
|
8819
8855
|
const body = JSON.stringify({
|
|
8820
|
-
config,
|
|
8856
|
+
config: configForRemote,
|
|
8821
8857
|
injectVar,
|
|
8822
8858
|
inputs: config?.inputs,
|
|
8823
8859
|
n,
|
|
@@ -9506,11 +9542,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9506
9542
|
});
|
|
9507
9543
|
await require_providers.validateStrategies(strategies);
|
|
9508
9544
|
await validateSharpDependency(strategies, plugins);
|
|
9509
|
-
if (strategies.some((s) => require_types.isLanguageDisallowedStrategy(s.id)) && language) {
|
|
9510
|
-
const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
|
|
9511
|
-
language = "en";
|
|
9512
|
-
require_logger.logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
|
|
9513
|
-
}
|
|
9514
9545
|
const redteamProvider = await require_providers.redteamProviderManager.getProvider({ provider });
|
|
9515
9546
|
const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
|
|
9516
9547
|
require_logger.logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.default.yellow(plugins.map((p) => {
|
|
@@ -10995,60 +11026,101 @@ async function deleteErrorResults(resultIds) {
|
|
|
10995
11026
|
await require_tables.getDb().delete(require_tables.evalResultsTable).where((0, drizzle_orm.inArray)(require_tables.evalResultsTable.id, resultIds));
|
|
10996
11027
|
require_logger.logger_default.debug(`Deleted ${resultIds.length} error results from database`);
|
|
10997
11028
|
}
|
|
11029
|
+
const RECALCULATE_BATCH_SIZE = 1e3;
|
|
10998
11030
|
/**
|
|
10999
|
-
* Recalculates prompt metrics based on current results after ERROR results have been deleted
|
|
11031
|
+
* Recalculates prompt metrics based on current results after ERROR results have been deleted.
|
|
11032
|
+
* Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
|
|
11000
11033
|
*/
|
|
11001
11034
|
async function recalculatePromptMetrics(evalRecord) {
|
|
11002
11035
|
require_logger.logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
|
|
11003
|
-
|
|
11036
|
+
const startTime = Date.now();
|
|
11037
|
+
let batchNumber = 0;
|
|
11038
|
+
let totalProcessed = 0;
|
|
11004
11039
|
const promptMetricsMap = /* @__PURE__ */ new Map();
|
|
11005
|
-
for (const
|
|
11006
|
-
|
|
11007
|
-
|
|
11008
|
-
|
|
11009
|
-
|
|
11010
|
-
|
|
11011
|
-
|
|
11012
|
-
|
|
11013
|
-
|
|
11014
|
-
|
|
11015
|
-
|
|
11016
|
-
|
|
11017
|
-
|
|
11018
|
-
|
|
11019
|
-
|
|
11020
|
-
|
|
11021
|
-
|
|
11022
|
-
|
|
11023
|
-
|
|
11024
|
-
|
|
11025
|
-
|
|
11026
|
-
|
|
11027
|
-
|
|
11028
|
-
|
|
11029
|
-
|
|
11030
|
-
|
|
11031
|
-
|
|
11032
|
-
|
|
11033
|
-
|
|
11034
|
-
|
|
11035
|
-
|
|
11036
|
-
|
|
11037
|
-
|
|
11038
|
-
|
|
11039
|
-
|
|
11040
|
-
|
|
11041
|
-
|
|
11042
|
-
|
|
11043
|
-
|
|
11044
|
-
|
|
11045
|
-
|
|
11046
|
-
|
|
11040
|
+
for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
|
|
11041
|
+
score: 0,
|
|
11042
|
+
testPassCount: 0,
|
|
11043
|
+
testFailCount: 0,
|
|
11044
|
+
testErrorCount: 0,
|
|
11045
|
+
assertPassCount: 0,
|
|
11046
|
+
assertFailCount: 0,
|
|
11047
|
+
totalLatencyMs: 0,
|
|
11048
|
+
tokenUsage: require_tokenUsageUtils.createEmptyTokenUsage(),
|
|
11049
|
+
namedScores: {},
|
|
11050
|
+
namedScoresCount: {},
|
|
11051
|
+
cost: 0
|
|
11052
|
+
});
|
|
11053
|
+
let currentResultId;
|
|
11054
|
+
try {
|
|
11055
|
+
for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
|
|
11056
|
+
batchNumber++;
|
|
11057
|
+
require_logger.logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
|
|
11058
|
+
for (const result of batch) {
|
|
11059
|
+
currentResultId = result.id;
|
|
11060
|
+
const metrics = promptMetricsMap.get(result.promptIdx);
|
|
11061
|
+
if (!metrics) {
|
|
11062
|
+
require_logger.logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
|
|
11063
|
+
resultId: result.id,
|
|
11064
|
+
evalId: evalRecord.id
|
|
11065
|
+
});
|
|
11066
|
+
continue;
|
|
11067
|
+
}
|
|
11068
|
+
if (result.success) metrics.testPassCount++;
|
|
11069
|
+
else if (result.failureReason === require_types.ResultFailureReason.ERROR) metrics.testErrorCount++;
|
|
11070
|
+
else metrics.testFailCount++;
|
|
11071
|
+
metrics.score += result.score ?? 0;
|
|
11072
|
+
metrics.totalLatencyMs += result.latencyMs || 0;
|
|
11073
|
+
metrics.cost += result.cost || 0;
|
|
11074
|
+
for (const [key, value] of Object.entries(result.namedScores || {})) {
|
|
11075
|
+
metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
|
|
11076
|
+
const testVars = result.testCase?.vars || {};
|
|
11077
|
+
let contributingAssertions = 0;
|
|
11078
|
+
result.gradingResult?.componentResults?.forEach((componentResult) => {
|
|
11079
|
+
if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
|
|
11080
|
+
});
|
|
11081
|
+
metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
|
|
11082
|
+
}
|
|
11083
|
+
if (result.gradingResult?.componentResults) {
|
|
11084
|
+
metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
|
|
11085
|
+
metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
|
|
11086
|
+
}
|
|
11087
|
+
if (result.response?.tokenUsage) require_tokenUsageUtils.accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
|
|
11088
|
+
if (result.gradingResult?.tokensUsed) {
|
|
11089
|
+
if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = require_tokenUsageUtils.createEmptyAssertions();
|
|
11090
|
+
require_tokenUsageUtils.accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
|
|
11091
|
+
}
|
|
11092
|
+
}
|
|
11093
|
+
totalProcessed += batch.length;
|
|
11047
11094
|
}
|
|
11095
|
+
} catch (error) {
|
|
11096
|
+
require_logger.logger_default.error("Error during batched metrics recalculation", {
|
|
11097
|
+
phase: "calculation",
|
|
11098
|
+
batchNumber,
|
|
11099
|
+
totalProcessed,
|
|
11100
|
+
currentResultId,
|
|
11101
|
+
evalId: evalRecord.id,
|
|
11102
|
+
error
|
|
11103
|
+
});
|
|
11104
|
+
throw error;
|
|
11048
11105
|
}
|
|
11049
11106
|
for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
|
|
11050
|
-
if (evalRecord.persisted)
|
|
11051
|
-
|
|
11107
|
+
if (evalRecord.persisted) try {
|
|
11108
|
+
await evalRecord.addPrompts(evalRecord.prompts);
|
|
11109
|
+
} catch (error) {
|
|
11110
|
+
require_logger.logger_default.error("Error saving recalculated prompt metrics", {
|
|
11111
|
+
phase: "save",
|
|
11112
|
+
evalId: evalRecord.id,
|
|
11113
|
+
promptCount: evalRecord.prompts.length,
|
|
11114
|
+
error
|
|
11115
|
+
});
|
|
11116
|
+
throw error;
|
|
11117
|
+
}
|
|
11118
|
+
const durationMs = Date.now() - startTime;
|
|
11119
|
+
require_logger.logger_default.debug("Prompt metrics recalculation completed", {
|
|
11120
|
+
totalBatches: batchNumber,
|
|
11121
|
+
totalResults: totalProcessed,
|
|
11122
|
+
durationMs
|
|
11123
|
+
});
|
|
11052
11124
|
}
|
|
11053
11125
|
|
|
11054
11126
|
//#endregion
|