promptfoo 0.120.21 → 0.120.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{accounts-uWRxEv-D.cjs → accounts-BGa3oLcq.cjs} +4 -4
- package/dist/src/{accounts-DYMHC4iY.js → accounts-BR9WiyfR.js} +4 -4
- package/dist/src/{accounts-CjU7QSnv.js → accounts-Ckp-8szu.js} +5 -5
- package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BE3QlHKy.js} +2 -2
- package/dist/src/{agents-CzCD5974.cjs → agents-BhbWppc2.cjs} +6 -6
- package/dist/src/{agents-DGQULdoA.js → agents-BtgWQypw.js} +3 -3
- package/dist/src/{agents-CJ4BY_QU.js → agents-BxHVTa7v.js} +7 -7
- package/dist/src/{agents-DABC7ROH.js → agents-CMv_PdKa.js} +6 -6
- package/dist/src/{agents-BNSS1kSZ.js → agents-CTESpLg_.js} +2 -2
- package/dist/src/{agents-BJxm0Kcs.cjs → agents-CnjTsnFv.cjs} +2 -2
- package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-BDfWTIuB.js} +10 -10
- package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-B_JaNK7C.cjs} +9 -9
- package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-DzmOPVaB.js} +9 -9
- package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
- package/dist/src/app/assets/{index-BVE859O5.js → index-gBlK8Ooo.js} +74 -74
- package/dist/src/app/index.html +2 -2
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-BT9aSeWg.js → audio-CLv1koN0.js} +4 -4
- package/dist/src/{audio-CSrI_LFf.js → audio-DCfFE8hp.js} +3 -3
- package/dist/src/{audio-DSV364HB.cjs → audio-UH7JqpeT.cjs} +3 -3
- package/dist/src/{base-d8hHSpEx.js → base-BMtzQZDN.js} +2 -2
- package/dist/src/{base-NFXflrMy.js → base-DGXF05vt.js} +2 -2
- package/dist/src/{base-CRrQ15eu.cjs → base-oCiJ630X.cjs} +2 -2
- package/dist/src/{blobs-Dznj-8xW.js → blobs-CNXTPz_3.js} +2 -2
- package/dist/src/{blobs-DC2sBLv8.js → blobs-eYQ1PJ-z.js} +2 -2
- package/dist/src/{blobs-D-Neij7m.cjs → blobs-h2nW1G8J.cjs} +2 -2
- package/dist/src/cache-BbrUrAzE.cjs +6 -0
- package/dist/src/{cache-aKR8pp5y.js → cache-CG_VUeq6.js} +2 -2
- package/dist/src/{cache-CMvgAnSi.js → cache-CkRahBA6.js} +2 -2
- package/dist/src/{cache-scuP1IRq.js → cache-CuMs-Ct_.js} +3 -3
- package/dist/src/{cache-DYJRe0Jt.cjs → cache-PWdK7nbt.cjs} +2 -2
- package/dist/src/{cache-xb4oZe27.js → cache-_erPRF2X.js} +2 -2
- package/dist/src/{chat-DyjTngG2.js → chat-4dCIDs0X.js} +8 -8
- package/dist/src/{chat-BbgGuvwu.js → chat-Bvvgb30H.js} +6 -6
- package/dist/src/{chat-CfJU9hHo.cjs → chat-CPZqlTJM.cjs} +8 -8
- package/dist/src/{chat-BH8Fxyap.js → chat-Cz8uiybK.js} +6 -6
- package/dist/src/{chat-Biz16BQv.cjs → chat-DKNIBnnw.cjs} +6 -6
- package/dist/src/{chat-Du_GyqZe.js → chat-Noh-ZAEA.js} +9 -9
- package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-C_1rEeQS.js} +8 -8
- package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-Cf9ilftQ.cjs} +8 -8
- package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-Duik96Pj.js} +9 -9
- package/dist/src/{cloud-UwfmA0Ne.js → cloud-CZHXB86j.js} +2 -2
- package/dist/src/{cloud-1cbcFXE9.js → cloud-Ckel3stH.js} +1 -1
- package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-8EaAKyYe.js} +9 -9
- package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CCHnFeAX.cjs} +9 -9
- package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DM055RzH.js} +10 -10
- package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-BMUSnpcR.js} +11 -11
- package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-Bcw05eOu.js} +10 -10
- package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-DNxNiVNW.cjs} +10 -10
- package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-Bc95q8BF.cjs} +10 -10
- package/dist/src/{cometapi-CUq-lrPh.js → cometapi-CHlpzlEb.js} +11 -11
- package/dist/src/{cometapi-Bu_LOIac.js → cometapi-DWXoEmta.js} +10 -10
- package/dist/src/{completion-BTZ42lqL.js → completion-2IaHHyEe.js} +4 -4
- package/dist/src/{completion-H4ncr2bK.js → completion-D25ktKRj.js} +4 -4
- package/dist/src/{completion-Dw-9jqz1.cjs → completion-Xj-aJd1b.cjs} +4 -4
- package/dist/src/{docker-kvFgJqkg.js → docker-CJbB16zr.js} +10 -10
- package/dist/src/{docker-B8zxQ12-.js → docker-Cc26CxVP.js} +9 -9
- package/dist/src/{docker-NrVUpeWw.cjs → docker-jL9w9fA6.cjs} +9 -9
- package/dist/src/eval-D4RlVjjL.js +16 -0
- package/dist/src/{eval-TAOD_pvN.js → eval-nWuy1AG2.js} +25 -16
- package/dist/src/evalResult-BeU2oOzP.cjs +9 -0
- package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CPVLU2Np.cjs} +5 -5
- package/dist/src/evalResult-Cf2g8jAB.js +10 -0
- package/dist/src/{evalResult-CnAP64o9.js → evalResult-ChJlD0Kq.js} +5 -5
- package/dist/src/{evalResult-DnYa1RLp.js → evalResult-DIIXLdjz.js} +5 -5
- package/dist/src/evalResult-XDigzbtJ.js +9 -0
- package/dist/src/evaluator-8HYi1Fph.js +37 -0
- package/dist/src/{evaluator-q2_--POi.js → evaluator-GYXFbA_Y.js} +21 -21
- package/dist/src/{extractor-DjxI7POm.cjs → extractor-6TFoLBb3.cjs} +2 -2
- package/dist/src/{extractor-D_1s-Thr.js → extractor-BEtCUN8X.js} +2 -2
- package/dist/src/{extractor-B9uQA-6-.js → extractor-CjCWhyEf.js} +2 -2
- package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-24MJ7lmX.js} +2 -2
- package/dist/src/fetch-BNRxuf6y.cjs +4 -0
- package/dist/src/{fetch-BIPrzD39.js → fetch-Cn7G4hKv.js} +3 -3
- package/dist/src/{fetch-TcCC0nEJ.js → fetch-CruohpfF.js} +3 -3
- package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-E3xbPP9w.cjs} +3 -3
- package/dist/src/{fetch-Cbz8ESXD.js → fetch-YrlVkoyU.js} +1 -1
- package/dist/src/{graders-C9Ni3pNQ.cjs → graders-B6ThhLLt.cjs} +14 -14
- package/dist/src/{graders-CG50NgEB.js → graders-DcCxY5P5.js} +14 -14
- package/dist/src/{graders-DycNkUZ_.js → graders-DmOCxaH5.js} +14 -14
- package/dist/src/graders-R44KsWy_.js +35 -0
- package/dist/src/graders-femSFbBL.js +33 -0
- package/dist/src/graders-q--EyLqs.cjs +32 -0
- package/dist/src/{image-CPL1masV.cjs → image-C2ceOyBf.cjs} +3 -3
- package/dist/src/{image-CxD58J63.js → image-CKdRHN-J.js} +4 -4
- package/dist/src/{image-Dg-oq_2F.js → image-CQjDlt1F.js} +4 -4
- package/dist/src/{image-CpQ-fJt3.cjs → image-CVIQmm2j.cjs} +4 -4
- package/dist/src/{image-BmMQZgoF.js → image-KltLhC3X.js} +4 -4
- package/dist/src/{image-CHGpqpC6.js → image-KxTS2kF1.js} +3 -3
- package/dist/src/index.cjs +130 -85
- package/dist/src/index.js +130 -85
- package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase--WN82LQi.js} +7 -7
- package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BqWUVrpf.cjs} +6 -6
- package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-Cj6dsMqo.js} +6 -6
- package/dist/src/{litellm-CR8i8YRn.js → litellm-Cr4cLT5U.js} +10 -10
- package/dist/src/{litellm-Dcc3mt1B.js → litellm-LBVMdo0D.js} +9 -9
- package/dist/src/{litellm-QthJT0fA.cjs → litellm-fFlC5dSL.cjs} +9 -9
- package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DIn2tYU5.js} +9 -9
- package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-DX0-i4oF.js} +8 -8
- package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-Di9Jozxk.cjs} +8 -8
- package/dist/src/main.js +122 -85
- package/dist/src/{messages-DkV1dizA.js → messages-CrKRl5Pk.js} +8 -8
- package/dist/src/{messages-D4Cu-Nxe.cjs → messages-DR9VV_jC.cjs} +8 -8
- package/dist/src/{messages-9ik4NuJ_.js → messages-HEhixC8X.js} +8 -8
- package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-B0gtacpW.js} +8 -8
- package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-Bq0thILx.js} +9 -9
- package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-DGvKrmSN.cjs} +8 -8
- package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-CjysF4hQ.js} +5 -5
- package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-D5qOssIP.js} +6 -6
- package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-a0F7idWe.cjs} +5 -5
- package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BJQlARp2.js} +4 -4
- package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-BsA1jKss.js} +5 -5
- package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-DLgVrMGX.cjs} +4 -4
- package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-14NR1X1K.js} +4 -4
- package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-BGH8dMy4.cjs} +4 -4
- package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-K8eh39vC.js} +4 -4
- package/dist/src/providers-BGK6Bn3A.cjs +31 -0
- package/dist/src/providers-BMWePlwX.js +31 -0
- package/dist/src/{providers-CHciOV4f.js → providers-C5LI9356.js} +51 -52
- package/dist/src/{providers-D8qKkVu_.js → providers-DbetV5MB.js} +52 -53
- package/dist/src/providers-EXgGEohn.js +33 -0
- package/dist/src/{providers-Bz0U4NGd.cjs → providers-lmV8J0jR.cjs} +51 -52
- package/dist/src/{quiverai-DnlaJG8H.js → quiverai-3Uc6za3c.js} +8 -8
- package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-QLaorfoo.cjs} +8 -8
- package/dist/src/{quiverai-3rtm1glP.js → quiverai-h9NdOMN2.js} +9 -9
- package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-BghZ9VSg.js} +12 -12
- package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DIBiJ2A1.cjs} +11 -11
- package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-Dd4VJGu5.js} +11 -11
- package/dist/src/{scanner-CPosIITM.js → scanner-CMWzx1QH.js} +6 -6
- package/dist/src/server/index.js +108 -78
- package/dist/src/server-BrgtL_8J.js +8 -0
- package/dist/src/server-CJPqXqVd.js +6 -0
- package/dist/src/{server-BKdsd70B.js → server-DHV0r-FB.js} +3 -3
- package/dist/src/server-DZlHxFYI.cjs +6 -0
- package/dist/src/{server-Bb83s34G.js → server-JDwxLoXT.js} +4 -4
- package/dist/src/{server-ByLwfSDf.cjs → server-PndEy2Nj.cjs} +3 -3
- package/dist/src/{signal-B8hvvb8w.js → signal-V7dUBr64.js} +2 -2
- package/dist/src/{store-W4yZqv_e.js → store-BSsuF2Jj.js} +2 -2
- package/dist/src/store-BppHaTbi.cjs +6 -0
- package/dist/src/{store-CaNRmI5T.js → store-C7WtvRII.js} +2 -2
- package/dist/src/store-DyZAxX01.js +7 -0
- package/dist/src/{store-l9-mIRC-.cjs → store-j9IVHCUH.cjs} +2 -2
- package/dist/src/store-jXGyn3ME.js +6 -0
- package/dist/src/{tables-BCcp2r48.js → tables-0EDD8777.js} +2 -2
- package/dist/src/{tables-D_VmN-ui.cjs → tables-B5ootZ1x.cjs} +2 -2
- package/dist/src/{tables-DZWWveaJ.js → tables-DchWyfYe.js} +2 -2
- package/dist/src/{telemetry-CB8ltkOD.js → telemetry-Ar0--jvU.js} +3 -3
- package/dist/src/telemetry-D186sF_u.js +6 -0
- package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-DMNm_KFn.cjs} +3 -3
- package/dist/src/telemetry-DPa6sGTP.js +8 -0
- package/dist/src/{telemetry-CmGyDaxF.js → telemetry-J9g1ySRA.js} +3 -3
- package/dist/src/telemetry-XUPEBnrQ.cjs +6 -0
- package/dist/src/{transcription-BExRps8_.js → transcription-BhomPVy1.js} +6 -6
- package/dist/src/{transcription-CpuRKcIu.js → transcription-Dijlv9vZ.js} +7 -7
- package/dist/src/{transcription-B4GbFkhb.cjs → transcription-hPpt68W9.cjs} +6 -6
- package/dist/src/{transform-B5HUD86U.cjs → transform-Bow1252l.cjs} +3 -3
- package/dist/src/{transform-B-AYIQmM.js → transform-DFkyOjQ7.js} +3 -3
- package/dist/src/{transform-BwedQA8W.js → transform-yRqaT_HG.js} +3 -3
- package/dist/src/{types-n3ttAnOa.js → types-BjfZC_QB.js} +2 -17
- package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
- package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
- package/dist/src/{util-DX58t30T.js → util-07lmo96G.js} +3 -3
- package/dist/src/{util-Cd2vI7ii.js → util-4LvhctH4.js} +3 -3
- package/dist/src/{util-BnF-xRPi.js → util-7G7p_iRM.js} +3 -3
- package/dist/src/{util-KE6uFWHG.cjs → util-9SFCdUzk.cjs} +3 -3
- package/dist/src/{util-BXjq8vOq.js → util-CD-FiF5u.js} +3 -3
- package/dist/src/{util-B-Yo0qzc.cjs → util-CxWghxGa.cjs} +3 -3
- package/dist/src/{util-CyzW5ayt.js → util-DhvhJDlT.js} +3 -3
- package/dist/src/{util-BT0p5c9X.cjs → util-Dqg4mr2c.cjs} +3 -3
- package/dist/src/{util-CwsTKPXi.js → util-ImmJ_Dxe.js} +3 -3
- package/dist/src/{utils-BWOFfOel.js → utils-CF4fxllU.js} +2 -2
- package/dist/src/{utils-HdRSruG0.js → utils-D_6Jp7Sl.js} +2 -2
- package/dist/src/{utils-BSpcJB4B.cjs → utils-u1jU5_Iq.cjs} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +10 -8
- package/dist/src/app/assets/index-CFq6nz43.css +0 -1
- package/dist/src/cache-BZYapLG-.cjs +0 -6
- package/dist/src/eval-g5SH3NBa.js +0 -16
- package/dist/src/evalResult-Bt2fmj0V.js +0 -9
- package/dist/src/evalResult-DRNH7Jui.js +0 -10
- package/dist/src/evalResult-DqLWyktN.cjs +0 -9
- package/dist/src/evaluator-DIenqH86.js +0 -37
- package/dist/src/fetch-gmneHrJc.cjs +0 -4
- package/dist/src/graders-BHcgLz2P.js +0 -35
- package/dist/src/graders-BjBhwJOb.cjs +0 -32
- package/dist/src/graders-DEjGX1uY.js +0 -33
- package/dist/src/providers-BBGwMMzj.js +0 -33
- package/dist/src/providers-BR5P_qep.cjs +0 -31
- package/dist/src/providers-CDib9hVp.js +0 -31
- package/dist/src/server-B5aIhzYk.cjs +0 -6
- package/dist/src/server-DJCw85s0.js +0 -8
- package/dist/src/server-DKj0QIm9.js +0 -6
- package/dist/src/store-Bo9FEBPO.js +0 -7
- package/dist/src/store-CqHttBql.js +0 -6
- package/dist/src/store-fsxOJl5O.cjs +0 -6
- package/dist/src/telemetry-B2uJOvMA.js +0 -8
- package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
- package/dist/src/telemetry-rP_n_bnZ.js +0 -6
package/dist/src/index.cjs
CHANGED
|
@@ -5,36 +5,36 @@ const require_esm = require('./esm-gWVPXn3r.cjs');
|
|
|
5
5
|
const require_pythonUtils = require('./pythonUtils-rOCm9w_5.cjs');
|
|
6
6
|
const require_fileExtensions = require('./fileExtensions-DkJYkWUy.cjs');
|
|
7
7
|
const require_transform = require('./transform-DshYLyBq.cjs');
|
|
8
|
-
const require_graders = require('./graders-
|
|
9
|
-
const require_types = require('./types-
|
|
10
|
-
const require_util = require('./util-
|
|
11
|
-
const require_fetch = require('./fetch-
|
|
8
|
+
const require_graders = require('./graders-B6ThhLLt.cjs');
|
|
9
|
+
const require_types = require('./types-Cixl-wRO.cjs');
|
|
10
|
+
const require_util = require('./util-CxWghxGa.cjs');
|
|
11
|
+
const require_fetch = require('./fetch-E3xbPP9w.cjs');
|
|
12
12
|
const require_time = require('./time-CbtsO5_a.cjs');
|
|
13
|
-
const require_cache = require('./cache-
|
|
14
|
-
const require_providers = require('./providers-
|
|
15
|
-
const require_utils = require('./utils-
|
|
16
|
-
const require_extractor = require('./extractor-
|
|
13
|
+
const require_cache = require('./cache-PWdK7nbt.cjs');
|
|
14
|
+
const require_providers = require('./providers-lmV8J0jR.cjs');
|
|
15
|
+
const require_utils = require('./utils-u1jU5_Iq.cjs');
|
|
16
|
+
const require_extractor = require('./extractor-6TFoLBb3.cjs');
|
|
17
17
|
require('./genaiTracer-zK0Mtd_D.cjs');
|
|
18
|
-
const require_chat = require('./chat-
|
|
18
|
+
const require_chat = require('./chat-DKNIBnnw.cjs');
|
|
19
19
|
const require_tokenUsageUtils = require('./tokenUsageUtils-DFp2VFtG.cjs');
|
|
20
|
-
const require_transform$1 = require('./transform-
|
|
21
|
-
require('./messages-
|
|
22
|
-
require('./util-
|
|
20
|
+
const require_transform$1 = require('./transform-Bow1252l.cjs');
|
|
21
|
+
require('./messages-DR9VV_jC.cjs');
|
|
22
|
+
require('./util-9SFCdUzk.cjs');
|
|
23
23
|
require('./openai-eZz8QsxY.cjs');
|
|
24
|
-
const require_util$2 = require('./util-
|
|
25
|
-
require('./completion-
|
|
26
|
-
const require_accounts = require('./accounts-
|
|
27
|
-
const require_server = require('./server-
|
|
28
|
-
const require_blobs = require('./blobs-
|
|
29
|
-
const require_tables = require('./tables-
|
|
30
|
-
const require_telemetry = require('./telemetry-
|
|
24
|
+
const require_util$2 = require('./util-Dqg4mr2c.cjs');
|
|
25
|
+
require('./completion-Xj-aJd1b.cjs');
|
|
26
|
+
const require_accounts = require('./accounts-BGa3oLcq.cjs');
|
|
27
|
+
const require_server = require('./server-PndEy2Nj.cjs');
|
|
28
|
+
const require_blobs = require('./blobs-h2nW1G8J.cjs');
|
|
29
|
+
const require_tables = require('./tables-B5ootZ1x.cjs');
|
|
30
|
+
const require_telemetry = require('./telemetry-DMNm_KFn.cjs');
|
|
31
31
|
const require_text = require('./text-DF2hMKdg.cjs');
|
|
32
|
-
const require_store = require('./store-
|
|
33
|
-
require('./base-
|
|
34
|
-
require('./image-
|
|
32
|
+
const require_store = require('./store-j9IVHCUH.cjs');
|
|
33
|
+
require('./base-oCiJ630X.cjs');
|
|
34
|
+
require('./image-CVIQmm2j.cjs');
|
|
35
35
|
const require_providerRegistry = require('./providerRegistry-DFXamjVO.cjs');
|
|
36
36
|
const require_rubyUtils = require('./rubyUtils-DCci_Y0f.cjs');
|
|
37
|
-
const require_evalResult = require('./evalResult-
|
|
37
|
+
const require_evalResult = require('./evalResult-CPVLU2Np.cjs');
|
|
38
38
|
let fs = require("fs");
|
|
39
39
|
fs = require_logger.__toESM(fs);
|
|
40
40
|
let path = require("path");
|
|
@@ -307,7 +307,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
307
307
|
require_telemetry.telemetry_default.record("feature_used", { feature: "tracing" });
|
|
308
308
|
try {
|
|
309
309
|
require_logger.logger_default.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
310
|
-
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
310
|
+
const { startOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-BGH8dMy4.cjs"));
|
|
311
311
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
312
312
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
313
313
|
require_logger.logger_default.debug(`[EvaluatorTracing] Starting OTLP receiver on ${host}:${port}`);
|
|
@@ -330,7 +330,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
330
330
|
async function stopOtlpReceiverIfNeeded() {
|
|
331
331
|
if (otlpReceiverStarted) try {
|
|
332
332
|
require_logger.logger_default.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
333
|
-
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-
|
|
333
|
+
const { stopOTLPReceiver } = await Promise.resolve().then(() => require("./otlpReceiver-BGH8dMy4.cjs"));
|
|
334
334
|
await stopOTLPReceiver();
|
|
335
335
|
otlpReceiverStarted = false;
|
|
336
336
|
require_logger.logger_default.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -365,7 +365,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
365
365
|
}
|
|
366
366
|
if (!tracingEnabled) return null;
|
|
367
367
|
require_logger.logger_default.debug("[EvaluatorTracing] Importing trace store");
|
|
368
|
-
const { getTraceStore } = await Promise.resolve().then(() => require("./store-
|
|
368
|
+
const { getTraceStore } = await Promise.resolve().then(() => require("./store-BppHaTbi.cjs"));
|
|
369
369
|
const traceStore = getTraceStore();
|
|
370
370
|
const traceId = generateTraceId();
|
|
371
371
|
const spanId = generateSpanId();
|
|
@@ -4266,7 +4266,7 @@ var Evaluator = class {
|
|
|
4266
4266
|
const defaultProvider = testSuite.defaultTest.provider;
|
|
4267
4267
|
if (require_types.isApiProvider(defaultProvider)) testCase.provider = defaultProvider;
|
|
4268
4268
|
else if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
4269
|
-
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-
|
|
4269
|
+
const { loadApiProvider } = await Promise.resolve().then(() => require("./providers-BGK6Bn3A.cjs"));
|
|
4270
4270
|
testCase.provider = await loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
4271
4271
|
} else testCase.provider = defaultProvider;
|
|
4272
4272
|
}
|
|
@@ -4341,7 +4341,7 @@ var Evaluator = class {
|
|
|
4341
4341
|
if (evalOption.test.assert?.some((a) => a.type === "max-score")) rowsWithMaxScoreAssertion.add(evalOption.testIdx);
|
|
4342
4342
|
}
|
|
4343
4343
|
if (require_logger.cliState_default.resume && this.evalRecord.persisted) try {
|
|
4344
|
-
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-
|
|
4344
|
+
const { default: EvalResult } = await Promise.resolve().then(() => require("./evalResult-BeU2oOzP.cjs"));
|
|
4345
4345
|
const completedPairs = await EvalResult.getCompletedIndexPairs(this.evalRecord.id, { excludeErrors: require_logger.cliState_default.retryMode });
|
|
4346
4346
|
const originalCount = runEvalOptions.length;
|
|
4347
4347
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -5273,7 +5273,7 @@ function convertResultsToTable(eval_) {
|
|
|
5273
5273
|
description: result.description || void 0,
|
|
5274
5274
|
outputs: [],
|
|
5275
5275
|
vars: result.vars ? Object.values(varsForHeader).map((varName) => {
|
|
5276
|
-
const varValue = result.vars?.[varName]
|
|
5276
|
+
const varValue = result.vars?.[varName] ?? "";
|
|
5277
5277
|
if (typeof varValue === "string") return varValue;
|
|
5278
5278
|
return JSON.stringify(varValue, null, 2);
|
|
5279
5279
|
}).flat() : [],
|
|
@@ -5308,7 +5308,11 @@ function convertResultsToTable(eval_) {
|
|
|
5308
5308
|
varValuesForRow.set(result.testIdx, result.vars);
|
|
5309
5309
|
rowMap[result.testIdx] = row;
|
|
5310
5310
|
let resultText;
|
|
5311
|
-
const
|
|
5311
|
+
const rawOutput = result.response?.output;
|
|
5312
|
+
let outputTextDisplay;
|
|
5313
|
+
if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
|
|
5314
|
+
else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
|
|
5315
|
+
else outputTextDisplay = String(rawOutput);
|
|
5312
5316
|
if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
|
|
5313
5317
|
else resultText = `${outputTextDisplay}`;
|
|
5314
5318
|
else if (result.error) resultText = `${result.error}`;
|
|
@@ -5355,7 +5359,7 @@ function convertResultsToTable(eval_) {
|
|
|
5355
5359
|
const rows = Object.values(rowMap);
|
|
5356
5360
|
const sortedVars = [...varsForHeader].sort();
|
|
5357
5361
|
for (const row of rows) row.vars = sortedVars.map((varName) => {
|
|
5358
|
-
const varValue = varValuesForRow.get(row.testIdx)?.[varName]
|
|
5362
|
+
const varValue = varValuesForRow.get(row.testIdx)?.[varName] ?? "";
|
|
5359
5363
|
if (typeof varValue === "string") return varValue;
|
|
5360
5364
|
return JSON.stringify(varValue, null, 2);
|
|
5361
5365
|
});
|
|
@@ -5372,7 +5376,11 @@ function convertResultsToTable(eval_) {
|
|
|
5372
5376
|
//#region src/util/exportToFile/index.ts
|
|
5373
5377
|
function convertEvalResultToTableCell(result) {
|
|
5374
5378
|
let resultText;
|
|
5375
|
-
const
|
|
5379
|
+
const rawOutput = result.response?.output;
|
|
5380
|
+
let outputTextDisplay;
|
|
5381
|
+
if (rawOutput !== null && typeof rawOutput === "object") outputTextDisplay = JSON.stringify(rawOutput);
|
|
5382
|
+
else if (rawOutput == null || rawOutput === "") outputTextDisplay = result.error || "";
|
|
5383
|
+
else outputTextDisplay = String(rawOutput);
|
|
5376
5384
|
if (result.testCase.assert) if (result.success) resultText = `${outputTextDisplay || result.error || ""}`;
|
|
5377
5385
|
else resultText = `${outputTextDisplay}`;
|
|
5378
5386
|
else if (result.error) resultText = `${result.error}`;
|
|
@@ -5418,11 +5426,12 @@ function convertTestResultsToTableRow(results, varsForHeader) {
|
|
|
5418
5426
|
outputs: [],
|
|
5419
5427
|
vars: Object.values(varsForHeader).map((varName) => {
|
|
5420
5428
|
if (varName === "sessionId") {
|
|
5421
|
-
const
|
|
5429
|
+
const sessionId = results[0].testCase.vars?.sessionId;
|
|
5430
|
+
const varValue = sessionId == null || sessionId === "" ? results[0].metadata?.sessionId ?? "" : sessionId;
|
|
5422
5431
|
if (typeof varValue === "string") return varValue;
|
|
5423
5432
|
return JSON.stringify(varValue);
|
|
5424
5433
|
}
|
|
5425
|
-
const varValue = results[0].testCase.vars?.[varName]
|
|
5434
|
+
const varValue = results[0].testCase.vars?.[varName] ?? "";
|
|
5426
5435
|
if (typeof varValue === "string") return varValue;
|
|
5427
5436
|
return JSON.stringify(varValue);
|
|
5428
5437
|
}).flat(),
|
|
@@ -9506,11 +9515,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
9506
9515
|
});
|
|
9507
9516
|
await require_providers.validateStrategies(strategies);
|
|
9508
9517
|
await validateSharpDependency(strategies, plugins);
|
|
9509
|
-
if (strategies.some((s) => require_types.isLanguageDisallowedStrategy(s.id)) && language) {
|
|
9510
|
-
const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
|
|
9511
|
-
language = "en";
|
|
9512
|
-
require_logger.logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
|
|
9513
|
-
}
|
|
9514
9518
|
const redteamProvider = await require_providers.redteamProviderManager.getProvider({ provider });
|
|
9515
9519
|
const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
|
|
9516
9520
|
require_logger.logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.default.yellow(plugins.map((p) => {
|
|
@@ -10995,60 +10999,101 @@ async function deleteErrorResults(resultIds) {
|
|
|
10995
10999
|
await require_tables.getDb().delete(require_tables.evalResultsTable).where((0, drizzle_orm.inArray)(require_tables.evalResultsTable.id, resultIds));
|
|
10996
11000
|
require_logger.logger_default.debug(`Deleted ${resultIds.length} error results from database`);
|
|
10997
11001
|
}
|
|
11002
|
+
const RECALCULATE_BATCH_SIZE = 1e3;
|
|
10998
11003
|
/**
|
|
10999
|
-
* Recalculates prompt metrics based on current results after ERROR results have been deleted
|
|
11004
|
+
* Recalculates prompt metrics based on current results after ERROR results have been deleted.
|
|
11005
|
+
* Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
|
|
11000
11006
|
*/
|
|
11001
11007
|
async function recalculatePromptMetrics(evalRecord) {
|
|
11002
11008
|
require_logger.logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
|
|
11003
|
-
|
|
11009
|
+
const startTime = Date.now();
|
|
11010
|
+
let batchNumber = 0;
|
|
11011
|
+
let totalProcessed = 0;
|
|
11004
11012
|
const promptMetricsMap = /* @__PURE__ */ new Map();
|
|
11005
|
-
for (const
|
|
11006
|
-
|
|
11007
|
-
|
|
11008
|
-
|
|
11009
|
-
|
|
11010
|
-
|
|
11011
|
-
|
|
11012
|
-
|
|
11013
|
-
|
|
11014
|
-
|
|
11015
|
-
|
|
11016
|
-
|
|
11017
|
-
|
|
11018
|
-
|
|
11019
|
-
|
|
11020
|
-
|
|
11021
|
-
|
|
11022
|
-
|
|
11023
|
-
|
|
11024
|
-
|
|
11025
|
-
|
|
11026
|
-
|
|
11027
|
-
|
|
11028
|
-
|
|
11029
|
-
|
|
11030
|
-
|
|
11031
|
-
|
|
11032
|
-
|
|
11033
|
-
|
|
11034
|
-
|
|
11035
|
-
|
|
11036
|
-
|
|
11037
|
-
|
|
11038
|
-
|
|
11039
|
-
|
|
11040
|
-
|
|
11041
|
-
|
|
11042
|
-
|
|
11043
|
-
|
|
11044
|
-
|
|
11045
|
-
|
|
11046
|
-
|
|
11013
|
+
for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
|
|
11014
|
+
score: 0,
|
|
11015
|
+
testPassCount: 0,
|
|
11016
|
+
testFailCount: 0,
|
|
11017
|
+
testErrorCount: 0,
|
|
11018
|
+
assertPassCount: 0,
|
|
11019
|
+
assertFailCount: 0,
|
|
11020
|
+
totalLatencyMs: 0,
|
|
11021
|
+
tokenUsage: require_tokenUsageUtils.createEmptyTokenUsage(),
|
|
11022
|
+
namedScores: {},
|
|
11023
|
+
namedScoresCount: {},
|
|
11024
|
+
cost: 0
|
|
11025
|
+
});
|
|
11026
|
+
let currentResultId;
|
|
11027
|
+
try {
|
|
11028
|
+
for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
|
|
11029
|
+
batchNumber++;
|
|
11030
|
+
require_logger.logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
|
|
11031
|
+
for (const result of batch) {
|
|
11032
|
+
currentResultId = result.id;
|
|
11033
|
+
const metrics = promptMetricsMap.get(result.promptIdx);
|
|
11034
|
+
if (!metrics) {
|
|
11035
|
+
require_logger.logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
|
|
11036
|
+
resultId: result.id,
|
|
11037
|
+
evalId: evalRecord.id
|
|
11038
|
+
});
|
|
11039
|
+
continue;
|
|
11040
|
+
}
|
|
11041
|
+
if (result.success) metrics.testPassCount++;
|
|
11042
|
+
else if (result.failureReason === require_types.ResultFailureReason.ERROR) metrics.testErrorCount++;
|
|
11043
|
+
else metrics.testFailCount++;
|
|
11044
|
+
metrics.score += result.score ?? 0;
|
|
11045
|
+
metrics.totalLatencyMs += result.latencyMs || 0;
|
|
11046
|
+
metrics.cost += result.cost || 0;
|
|
11047
|
+
for (const [key, value] of Object.entries(result.namedScores || {})) {
|
|
11048
|
+
metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
|
|
11049
|
+
const testVars = result.testCase?.vars || {};
|
|
11050
|
+
let contributingAssertions = 0;
|
|
11051
|
+
result.gradingResult?.componentResults?.forEach((componentResult) => {
|
|
11052
|
+
if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
|
|
11053
|
+
});
|
|
11054
|
+
metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
|
|
11055
|
+
}
|
|
11056
|
+
if (result.gradingResult?.componentResults) {
|
|
11057
|
+
metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
|
|
11058
|
+
metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
|
|
11059
|
+
}
|
|
11060
|
+
if (result.response?.tokenUsage) require_tokenUsageUtils.accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
|
|
11061
|
+
if (result.gradingResult?.tokensUsed) {
|
|
11062
|
+
if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = require_tokenUsageUtils.createEmptyAssertions();
|
|
11063
|
+
require_tokenUsageUtils.accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
|
|
11064
|
+
}
|
|
11065
|
+
}
|
|
11066
|
+
totalProcessed += batch.length;
|
|
11047
11067
|
}
|
|
11068
|
+
} catch (error) {
|
|
11069
|
+
require_logger.logger_default.error("Error during batched metrics recalculation", {
|
|
11070
|
+
phase: "calculation",
|
|
11071
|
+
batchNumber,
|
|
11072
|
+
totalProcessed,
|
|
11073
|
+
currentResultId,
|
|
11074
|
+
evalId: evalRecord.id,
|
|
11075
|
+
error
|
|
11076
|
+
});
|
|
11077
|
+
throw error;
|
|
11048
11078
|
}
|
|
11049
11079
|
for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
|
|
11050
|
-
if (evalRecord.persisted)
|
|
11051
|
-
|
|
11080
|
+
if (evalRecord.persisted) try {
|
|
11081
|
+
await evalRecord.addPrompts(evalRecord.prompts);
|
|
11082
|
+
} catch (error) {
|
|
11083
|
+
require_logger.logger_default.error("Error saving recalculated prompt metrics", {
|
|
11084
|
+
phase: "save",
|
|
11085
|
+
evalId: evalRecord.id,
|
|
11086
|
+
promptCount: evalRecord.prompts.length,
|
|
11087
|
+
error
|
|
11088
|
+
});
|
|
11089
|
+
throw error;
|
|
11090
|
+
}
|
|
11091
|
+
const durationMs = Date.now() - startTime;
|
|
11092
|
+
require_logger.logger_default.debug("Prompt metrics recalculation completed", {
|
|
11093
|
+
totalBatches: batchNumber,
|
|
11094
|
+
totalResults: totalProcessed,
|
|
11095
|
+
durationMs
|
|
11096
|
+
});
|
|
11052
11097
|
}
|
|
11053
11098
|
|
|
11054
11099
|
//#endregion
|