promptfoo 0.120.21 → 0.120.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{accounts-uWRxEv-D.cjs → accounts-BGa3oLcq.cjs} +4 -4
- package/dist/src/{accounts-DYMHC4iY.js → accounts-BR9WiyfR.js} +4 -4
- package/dist/src/{accounts-CjU7QSnv.js → accounts-Ckp-8szu.js} +5 -5
- package/dist/src/{agentic-utils-BMbQVVd4.js → agentic-utils-BE3QlHKy.js} +2 -2
- package/dist/src/{agents-CzCD5974.cjs → agents-BhbWppc2.cjs} +6 -6
- package/dist/src/{agents-DGQULdoA.js → agents-BtgWQypw.js} +3 -3
- package/dist/src/{agents-CJ4BY_QU.js → agents-BxHVTa7v.js} +7 -7
- package/dist/src/{agents-DABC7ROH.js → agents-CMv_PdKa.js} +6 -6
- package/dist/src/{agents-BNSS1kSZ.js → agents-CTESpLg_.js} +2 -2
- package/dist/src/{agents-BJxm0Kcs.cjs → agents-CnjTsnFv.cjs} +2 -2
- package/dist/src/{aimlapi-B2qJXUsC.js → aimlapi-BDfWTIuB.js} +10 -10
- package/dist/src/{aimlapi-ABHUqmBg.cjs → aimlapi-B_JaNK7C.cjs} +9 -9
- package/dist/src/{aimlapi-CnBWjM7x.js → aimlapi-DzmOPVaB.js} +9 -9
- package/dist/src/app/assets/index-CbpQqvt4.css +1 -0
- package/dist/src/app/assets/{index-BVE859O5.js → index-gBlK8Ooo.js} +74 -74
- package/dist/src/app/index.html +2 -2
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-BT9aSeWg.js → audio-CLv1koN0.js} +4 -4
- package/dist/src/{audio-CSrI_LFf.js → audio-DCfFE8hp.js} +3 -3
- package/dist/src/{audio-DSV364HB.cjs → audio-UH7JqpeT.cjs} +3 -3
- package/dist/src/{base-d8hHSpEx.js → base-BMtzQZDN.js} +2 -2
- package/dist/src/{base-NFXflrMy.js → base-DGXF05vt.js} +2 -2
- package/dist/src/{base-CRrQ15eu.cjs → base-oCiJ630X.cjs} +2 -2
- package/dist/src/{blobs-Dznj-8xW.js → blobs-CNXTPz_3.js} +2 -2
- package/dist/src/{blobs-DC2sBLv8.js → blobs-eYQ1PJ-z.js} +2 -2
- package/dist/src/{blobs-D-Neij7m.cjs → blobs-h2nW1G8J.cjs} +2 -2
- package/dist/src/cache-BbrUrAzE.cjs +6 -0
- package/dist/src/{cache-aKR8pp5y.js → cache-CG_VUeq6.js} +2 -2
- package/dist/src/{cache-CMvgAnSi.js → cache-CkRahBA6.js} +2 -2
- package/dist/src/{cache-scuP1IRq.js → cache-CuMs-Ct_.js} +3 -3
- package/dist/src/{cache-DYJRe0Jt.cjs → cache-PWdK7nbt.cjs} +2 -2
- package/dist/src/{cache-xb4oZe27.js → cache-_erPRF2X.js} +2 -2
- package/dist/src/{chat-DyjTngG2.js → chat-4dCIDs0X.js} +8 -8
- package/dist/src/{chat-BbgGuvwu.js → chat-Bvvgb30H.js} +6 -6
- package/dist/src/{chat-CfJU9hHo.cjs → chat-CPZqlTJM.cjs} +8 -8
- package/dist/src/{chat-BH8Fxyap.js → chat-Cz8uiybK.js} +6 -6
- package/dist/src/{chat-Biz16BQv.cjs → chat-DKNIBnnw.cjs} +6 -6
- package/dist/src/{chat-Du_GyqZe.js → chat-Noh-ZAEA.js} +9 -9
- package/dist/src/{claude-agent-sdk-CdRG6b7t.js → claude-agent-sdk-C_1rEeQS.js} +8 -8
- package/dist/src/{claude-agent-sdk-BW-YEYQi.cjs → claude-agent-sdk-Cf9ilftQ.cjs} +8 -8
- package/dist/src/{claude-agent-sdk-CL5NzJEu.js → claude-agent-sdk-Duik96Pj.js} +9 -9
- package/dist/src/{cloud-UwfmA0Ne.js → cloud-CZHXB86j.js} +2 -2
- package/dist/src/{cloud-1cbcFXE9.js → cloud-Ckel3stH.js} +1 -1
- package/dist/src/{cloudflare-ai-BB77BUMD.js → cloudflare-ai-8EaAKyYe.js} +9 -9
- package/dist/src/{cloudflare-ai-D2Wlbus6.cjs → cloudflare-ai-CCHnFeAX.cjs} +9 -9
- package/dist/src/{cloudflare-ai-DXTxK3ou.js → cloudflare-ai-DM055RzH.js} +10 -10
- package/dist/src/{cloudflare-gateway-DeZRwitI.js → cloudflare-gateway-BMUSnpcR.js} +11 -11
- package/dist/src/{cloudflare-gateway-Tn2OEgYQ.js → cloudflare-gateway-Bcw05eOu.js} +10 -10
- package/dist/src/{cloudflare-gateway-D5IjGRjZ.cjs → cloudflare-gateway-DNxNiVNW.cjs} +10 -10
- package/dist/src/{cometapi-RP8h5O2f.cjs → cometapi-Bc95q8BF.cjs} +10 -10
- package/dist/src/{cometapi-CUq-lrPh.js → cometapi-CHlpzlEb.js} +11 -11
- package/dist/src/{cometapi-Bu_LOIac.js → cometapi-DWXoEmta.js} +10 -10
- package/dist/src/{completion-BTZ42lqL.js → completion-2IaHHyEe.js} +4 -4
- package/dist/src/{completion-H4ncr2bK.js → completion-D25ktKRj.js} +4 -4
- package/dist/src/{completion-Dw-9jqz1.cjs → completion-Xj-aJd1b.cjs} +4 -4
- package/dist/src/{docker-kvFgJqkg.js → docker-CJbB16zr.js} +10 -10
- package/dist/src/{docker-B8zxQ12-.js → docker-Cc26CxVP.js} +9 -9
- package/dist/src/{docker-NrVUpeWw.cjs → docker-jL9w9fA6.cjs} +9 -9
- package/dist/src/eval-D4RlVjjL.js +16 -0
- package/dist/src/{eval-TAOD_pvN.js → eval-nWuy1AG2.js} +25 -16
- package/dist/src/evalResult-BeU2oOzP.cjs +9 -0
- package/dist/src/{evalResult-CFSvp3Zh.cjs → evalResult-CPVLU2Np.cjs} +5 -5
- package/dist/src/evalResult-Cf2g8jAB.js +10 -0
- package/dist/src/{evalResult-CnAP64o9.js → evalResult-ChJlD0Kq.js} +5 -5
- package/dist/src/{evalResult-DnYa1RLp.js → evalResult-DIIXLdjz.js} +5 -5
- package/dist/src/evalResult-XDigzbtJ.js +9 -0
- package/dist/src/evaluator-8HYi1Fph.js +37 -0
- package/dist/src/{evaluator-q2_--POi.js → evaluator-GYXFbA_Y.js} +21 -21
- package/dist/src/{extractor-DjxI7POm.cjs → extractor-6TFoLBb3.cjs} +2 -2
- package/dist/src/{extractor-D_1s-Thr.js → extractor-BEtCUN8X.js} +2 -2
- package/dist/src/{extractor-B9uQA-6-.js → extractor-CjCWhyEf.js} +2 -2
- package/dist/src/{fetch-Cb-qQ7Hm.js → fetch-24MJ7lmX.js} +2 -2
- package/dist/src/fetch-BNRxuf6y.cjs +4 -0
- package/dist/src/{fetch-BIPrzD39.js → fetch-Cn7G4hKv.js} +3 -3
- package/dist/src/{fetch-TcCC0nEJ.js → fetch-CruohpfF.js} +3 -3
- package/dist/src/{fetch-MdGxb6Bv.cjs → fetch-E3xbPP9w.cjs} +3 -3
- package/dist/src/{fetch-Cbz8ESXD.js → fetch-YrlVkoyU.js} +1 -1
- package/dist/src/{graders-C9Ni3pNQ.cjs → graders-B6ThhLLt.cjs} +14 -14
- package/dist/src/{graders-CG50NgEB.js → graders-DcCxY5P5.js} +14 -14
- package/dist/src/{graders-DycNkUZ_.js → graders-DmOCxaH5.js} +14 -14
- package/dist/src/graders-R44KsWy_.js +35 -0
- package/dist/src/graders-femSFbBL.js +33 -0
- package/dist/src/graders-q--EyLqs.cjs +32 -0
- package/dist/src/{image-CPL1masV.cjs → image-C2ceOyBf.cjs} +3 -3
- package/dist/src/{image-CxD58J63.js → image-CKdRHN-J.js} +4 -4
- package/dist/src/{image-Dg-oq_2F.js → image-CQjDlt1F.js} +4 -4
- package/dist/src/{image-CpQ-fJt3.cjs → image-CVIQmm2j.cjs} +4 -4
- package/dist/src/{image-BmMQZgoF.js → image-KltLhC3X.js} +4 -4
- package/dist/src/{image-CHGpqpC6.js → image-KxTS2kF1.js} +3 -3
- package/dist/src/index.cjs +130 -85
- package/dist/src/index.js +130 -85
- package/dist/src/{knowledgeBase-Csr92Wc4.js → knowledgeBase--WN82LQi.js} +7 -7
- package/dist/src/{knowledgeBase-Dy1Ln178.cjs → knowledgeBase-BqWUVrpf.cjs} +6 -6
- package/dist/src/{knowledgeBase-BXvPbeXF.js → knowledgeBase-Cj6dsMqo.js} +6 -6
- package/dist/src/{litellm-CR8i8YRn.js → litellm-Cr4cLT5U.js} +10 -10
- package/dist/src/{litellm-Dcc3mt1B.js → litellm-LBVMdo0D.js} +9 -9
- package/dist/src/{litellm-QthJT0fA.cjs → litellm-fFlC5dSL.cjs} +9 -9
- package/dist/src/{luma-ray-vv5ggMvl.js → luma-ray-DIn2tYU5.js} +9 -9
- package/dist/src/{luma-ray-DbAhu9zC.js → luma-ray-DX0-i4oF.js} +8 -8
- package/dist/src/{luma-ray-DpAL4igs.cjs → luma-ray-Di9Jozxk.cjs} +8 -8
- package/dist/src/main.js +122 -85
- package/dist/src/{messages-DkV1dizA.js → messages-CrKRl5Pk.js} +8 -8
- package/dist/src/{messages-D4Cu-Nxe.cjs → messages-DR9VV_jC.cjs} +8 -8
- package/dist/src/{messages-9ik4NuJ_.js → messages-HEhixC8X.js} +8 -8
- package/dist/src/{nova-reel-mMroVw7W.js → nova-reel-B0gtacpW.js} +8 -8
- package/dist/src/{nova-reel-B1c4YdE8.js → nova-reel-Bq0thILx.js} +9 -9
- package/dist/src/{nova-reel-Bs903nib.cjs → nova-reel-DGvKrmSN.cjs} +8 -8
- package/dist/src/{nova-sonic-BLgglDkA.js → nova-sonic-CjysF4hQ.js} +5 -5
- package/dist/src/{nova-sonic-rZe65fKE.js → nova-sonic-D5qOssIP.js} +6 -6
- package/dist/src/{nova-sonic-BylXGNj-.cjs → nova-sonic-a0F7idWe.cjs} +5 -5
- package/dist/src/{opencode-sdk-G-dBq9xe.js → opencode-sdk-BJQlARp2.js} +4 -4
- package/dist/src/{opencode-sdk-lf_9VEhj.js → opencode-sdk-BsA1jKss.js} +5 -5
- package/dist/src/{opencode-sdk-B3o0jNxx.cjs → opencode-sdk-DLgVrMGX.cjs} +4 -4
- package/dist/src/{otlpReceiver-CO1TzzW1.js → otlpReceiver-14NR1X1K.js} +4 -4
- package/dist/src/{otlpReceiver-COmdVimi.cjs → otlpReceiver-BGH8dMy4.cjs} +4 -4
- package/dist/src/{otlpReceiver-CBZNmyZi.js → otlpReceiver-K8eh39vC.js} +4 -4
- package/dist/src/providers-BGK6Bn3A.cjs +31 -0
- package/dist/src/providers-BMWePlwX.js +31 -0
- package/dist/src/{providers-CHciOV4f.js → providers-C5LI9356.js} +51 -52
- package/dist/src/{providers-D8qKkVu_.js → providers-DbetV5MB.js} +52 -53
- package/dist/src/providers-EXgGEohn.js +33 -0
- package/dist/src/{providers-Bz0U4NGd.cjs → providers-lmV8J0jR.cjs} +51 -52
- package/dist/src/{quiverai-DnlaJG8H.js → quiverai-3Uc6za3c.js} +8 -8
- package/dist/src/{quiverai-BIveJ-Ql.cjs → quiverai-QLaorfoo.cjs} +8 -8
- package/dist/src/{quiverai-3rtm1glP.js → quiverai-h9NdOMN2.js} +9 -9
- package/dist/src/{sagemaker-B4hvJgkV.js → sagemaker-BghZ9VSg.js} +12 -12
- package/dist/src/{sagemaker-BsttQmAf.cjs → sagemaker-DIBiJ2A1.cjs} +11 -11
- package/dist/src/{sagemaker-BvwRP8TL.js → sagemaker-Dd4VJGu5.js} +11 -11
- package/dist/src/{scanner-CPosIITM.js → scanner-CMWzx1QH.js} +6 -6
- package/dist/src/server/index.js +108 -78
- package/dist/src/server-BrgtL_8J.js +8 -0
- package/dist/src/server-CJPqXqVd.js +6 -0
- package/dist/src/{server-BKdsd70B.js → server-DHV0r-FB.js} +3 -3
- package/dist/src/server-DZlHxFYI.cjs +6 -0
- package/dist/src/{server-Bb83s34G.js → server-JDwxLoXT.js} +4 -4
- package/dist/src/{server-ByLwfSDf.cjs → server-PndEy2Nj.cjs} +3 -3
- package/dist/src/{signal-B8hvvb8w.js → signal-V7dUBr64.js} +2 -2
- package/dist/src/{store-W4yZqv_e.js → store-BSsuF2Jj.js} +2 -2
- package/dist/src/store-BppHaTbi.cjs +6 -0
- package/dist/src/{store-CaNRmI5T.js → store-C7WtvRII.js} +2 -2
- package/dist/src/store-DyZAxX01.js +7 -0
- package/dist/src/{store-l9-mIRC-.cjs → store-j9IVHCUH.cjs} +2 -2
- package/dist/src/store-jXGyn3ME.js +6 -0
- package/dist/src/{tables-BCcp2r48.js → tables-0EDD8777.js} +2 -2
- package/dist/src/{tables-D_VmN-ui.cjs → tables-B5ootZ1x.cjs} +2 -2
- package/dist/src/{tables-DZWWveaJ.js → tables-DchWyfYe.js} +2 -2
- package/dist/src/{telemetry-CB8ltkOD.js → telemetry-Ar0--jvU.js} +3 -3
- package/dist/src/telemetry-D186sF_u.js +6 -0
- package/dist/src/{telemetry-DrSTNTH-.cjs → telemetry-DMNm_KFn.cjs} +3 -3
- package/dist/src/telemetry-DPa6sGTP.js +8 -0
- package/dist/src/{telemetry-CmGyDaxF.js → telemetry-J9g1ySRA.js} +3 -3
- package/dist/src/telemetry-XUPEBnrQ.cjs +6 -0
- package/dist/src/{transcription-BExRps8_.js → transcription-BhomPVy1.js} +6 -6
- package/dist/src/{transcription-CpuRKcIu.js → transcription-Dijlv9vZ.js} +7 -7
- package/dist/src/{transcription-B4GbFkhb.cjs → transcription-hPpt68W9.cjs} +6 -6
- package/dist/src/{transform-B5HUD86U.cjs → transform-Bow1252l.cjs} +3 -3
- package/dist/src/{transform-B-AYIQmM.js → transform-DFkyOjQ7.js} +3 -3
- package/dist/src/{transform-BwedQA8W.js → transform-yRqaT_HG.js} +3 -3
- package/dist/src/{types-n3ttAnOa.js → types-BjfZC_QB.js} +2 -17
- package/dist/src/{types-DAmPdu4_.cjs → types-Cixl-wRO.cjs} +1 -22
- package/dist/src/{types-BlspLqbN.js → types-l7snZgzv.js} +2 -17
- package/dist/src/{util-DX58t30T.js → util-07lmo96G.js} +3 -3
- package/dist/src/{util-Cd2vI7ii.js → util-4LvhctH4.js} +3 -3
- package/dist/src/{util-BnF-xRPi.js → util-7G7p_iRM.js} +3 -3
- package/dist/src/{util-KE6uFWHG.cjs → util-9SFCdUzk.cjs} +3 -3
- package/dist/src/{util-BXjq8vOq.js → util-CD-FiF5u.js} +3 -3
- package/dist/src/{util-B-Yo0qzc.cjs → util-CxWghxGa.cjs} +3 -3
- package/dist/src/{util-CyzW5ayt.js → util-DhvhJDlT.js} +3 -3
- package/dist/src/{util-BT0p5c9X.cjs → util-Dqg4mr2c.cjs} +3 -3
- package/dist/src/{util-CwsTKPXi.js → util-ImmJ_Dxe.js} +3 -3
- package/dist/src/{utils-BWOFfOel.js → utils-CF4fxllU.js} +2 -2
- package/dist/src/{utils-HdRSruG0.js → utils-D_6Jp7Sl.js} +2 -2
- package/dist/src/{utils-BSpcJB4B.cjs → utils-u1jU5_Iq.cjs} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +10 -8
- package/dist/src/app/assets/index-CFq6nz43.css +0 -1
- package/dist/src/cache-BZYapLG-.cjs +0 -6
- package/dist/src/eval-g5SH3NBa.js +0 -16
- package/dist/src/evalResult-Bt2fmj0V.js +0 -9
- package/dist/src/evalResult-DRNH7Jui.js +0 -10
- package/dist/src/evalResult-DqLWyktN.cjs +0 -9
- package/dist/src/evaluator-DIenqH86.js +0 -37
- package/dist/src/fetch-gmneHrJc.cjs +0 -4
- package/dist/src/graders-BHcgLz2P.js +0 -35
- package/dist/src/graders-BjBhwJOb.cjs +0 -32
- package/dist/src/graders-DEjGX1uY.js +0 -33
- package/dist/src/providers-BBGwMMzj.js +0 -33
- package/dist/src/providers-BR5P_qep.cjs +0 -31
- package/dist/src/providers-CDib9hVp.js +0 -31
- package/dist/src/server-B5aIhzYk.cjs +0 -6
- package/dist/src/server-DJCw85s0.js +0 -8
- package/dist/src/server-DKj0QIm9.js +0 -6
- package/dist/src/store-Bo9FEBPO.js +0 -7
- package/dist/src/store-CqHttBql.js +0 -6
- package/dist/src/store-fsxOJl5O.cjs +0 -6
- package/dist/src/telemetry-B2uJOvMA.js +0 -8
- package/dist/src/telemetry-TZ0v6oGo.cjs +0 -6
- package/dist/src/telemetry-rP_n_bnZ.js +0 -6
package/dist/src/main.js
CHANGED
|
@@ -1,45 +1,45 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import "./chunk-DHDDz29n.js";
|
|
3
3
|
import { C as getEnvString, D as isNonInteractive, E as isCI, O as cliState_default, S as getEnvInt, b as getEnvBool, c as setLogLevel, d as extractFirstJsonObject, f as extractJsonObjects, h as orderKeys, i as isDebugEnabled, l as sanitizeObject, n as getLogLevel, o as logger_default, r as initializeRunLogging, s as setLogCallback, t as closeLogger, v as getConfigDirectoryPath, x as getEnvFloat, y as setConfigDirectoryPath } from "./logger-Bzi5o47S.js";
|
|
4
|
-
import { D as getDefaultShareViewBaseUrl, E as getDefaultPort, N as HUMAN_ASSERTION_TYPE, O as getShareApiBaseUrl, S as DEFAULT_QUERY_LIMIT, T as TERMINAL_MAX_WIDTH, c as REQUEST_TIMEOUT_MS, f as isPromptfooSampleTarget, j as VERSION, k as getShareViewBaseUrl, n as fetchWithRetries, r as fetchWithTimeout, t as fetchWithProxy, x as DEFAULT_MAX_CONCURRENCY, y as CLOUD_PROVIDER_PREFIX } from "./fetch-
|
|
4
|
+
import { D as getDefaultShareViewBaseUrl, E as getDefaultPort, N as HUMAN_ASSERTION_TYPE, O as getShareApiBaseUrl, S as DEFAULT_QUERY_LIMIT, T as TERMINAL_MAX_WIDTH, c as REQUEST_TIMEOUT_MS, f as isPromptfooSampleTarget, j as VERSION, k as getShareViewBaseUrl, n as fetchWithRetries, r as fetchWithTimeout, t as fetchWithProxy, x as DEFAULT_MAX_CONCURRENCY, y as CLOUD_PROVIDER_PREFIX } from "./fetch-CruohpfF.js";
|
|
5
5
|
import { t as invariant } from "./invariant-DT20jrBd.js";
|
|
6
|
-
import { a as getAuthor, d as EMAIL_OK_STATUS, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-
|
|
6
|
+
import { a as getAuthor, d as EMAIL_OK_STATUS, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-Ckp-8szu.js";
|
|
7
7
|
import { n as sleep } from "./time-CoOwN3kQ.js";
|
|
8
|
-
import { a as readGlobalConfig, i as cloudConfig, r as CloudConfig, s as writeGlobalConfigPartial } from "./cloud-
|
|
9
|
-
import { n as TelemetryEventSchema, r as telemetry_default } from "./telemetry-
|
|
10
|
-
import { $ as
|
|
11
|
-
import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-
|
|
12
|
-
import { A as renderPrompt, C as extractVariablesFromJson, F as TokenUsageTracker, P as redteamProviderManager, R as PromptfooHarmfulCompletionProvider, S as extractPromptFromTags, T as getShortPluginId, _ as pluginMatchesStrategyTargets, _t as resolveTeamId, a as resolveProviderConfigs, at as checkCloudPermissions, b as extractGoalFromPrompt, c as createTransformRequest, ct as getDefaultTeam, d as Strategies, f as loadStrategy, ft as getProviderFromCloud, g as retrieveMedia, gt as resolveTeamFromIdentifier, h as mediaExists, ht as makeRequest$1, i as resolveProvider, it as canCreateTargets, l as createTransformResponse, lt as getOrgContext, m as getMediaStorage, mt as isCloudProvider, n as loadApiProvider, o as MCPProvider, ot as getCloudDatabaseId, p as validateStrategies, pt as getUserTeams, r as loadApiProviders, s as HttpProvider, st as getConfigFromCloud, t as getProviderIds, ut as getPluginSeverityOverridesFromCloud } from "./providers-
|
|
13
|
-
import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-
|
|
8
|
+
import { a as readGlobalConfig, i as cloudConfig, r as CloudConfig, s as writeGlobalConfigPartial } from "./cloud-CZHXB86j.js";
|
|
9
|
+
import { n as TelemetryEventSchema, r as telemetry_default } from "./telemetry-Ar0--jvU.js";
|
|
10
|
+
import { $ as HARM_PLUGINS, B as Severity, D as ADDITIONAL_STRATEGIES, E as isUuid, F as STRATEGY_COLLECTION_MAPPINGS, G as ALIASED_PLUGIN_MAPPINGS, I as getDefaultNFanout, J as BIAS_PLUGINS, K as ADDITIONAL_PLUGINS, M as DEFAULT_STRATEGIES, P as STRATEGY_COLLECTIONS, Q as FOUNDATION_PLUGINS, R as isFanoutStrategy, S as PluginConfigSchema, U as riskCategorySeverityMap, V as categoryAliases, W as subCategoryDescriptions, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, h as RedteamConfigSchema, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_MULTI_TURN_MAX_TURNS, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as UNALIGNED_PROVIDER_HARM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TELECOM_PLUGINS, q as ALL_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as isMultiTurnStrategy } from "./types-BjfZC_QB.js";
|
|
11
|
+
import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-JDwxLoXT.js";
|
|
12
|
+
import { A as renderPrompt, C as extractVariablesFromJson, F as TokenUsageTracker, P as redteamProviderManager, R as PromptfooHarmfulCompletionProvider, S as extractPromptFromTags, T as getShortPluginId, _ as pluginMatchesStrategyTargets, _t as resolveTeamId, a as resolveProviderConfigs, at as checkCloudPermissions, b as extractGoalFromPrompt, c as createTransformRequest, ct as getDefaultTeam, d as Strategies, f as loadStrategy, ft as getProviderFromCloud, g as retrieveMedia, gt as resolveTeamFromIdentifier, h as mediaExists, ht as makeRequest$1, i as resolveProvider, it as canCreateTargets, l as createTransformResponse, lt as getOrgContext, m as getMediaStorage, mt as isCloudProvider, n as loadApiProvider, o as MCPProvider, ot as getCloudDatabaseId, p as validateStrategies, pt as getUserTeams, r as loadApiProviders, s as HttpProvider, st as getConfigFromCloud, t as getProviderIds, ut as getPluginSeverityOverridesFromCloud } from "./providers-DbetV5MB.js";
|
|
13
|
+
import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-_erPRF2X.js";
|
|
14
14
|
import { i as isJavascriptFile } from "./fileExtensions-BpuMmaFL.js";
|
|
15
|
-
import { A as getTestCaseDeduplicationKey, C as extractVariablesFromTemplates, D as setupEnv, M as doesProviderRefMatch, N as getProviderDescription, O as deduplicateTestCases, S as extractVariablesFromTemplate, a as ComparisonEvalNotFoundError, b as renderEnvOnlyInObject, c as mergeComparisonTables, f as maybeLoadConfigFromExternalFile, i as writeOutput, j as resultIsForTestCase, k as filterRuntimeVars, l as fetchCsvFromGoogleSheet, n as createOutputMetadata, o as evalTableToJson, p as maybeLoadFromExternalFile, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, v as readFilters, w as getNunjucksEngine, y as readOutput } from "./util-
|
|
15
|
+
import { A as getTestCaseDeduplicationKey, C as extractVariablesFromTemplates, D as setupEnv, M as doesProviderRefMatch, N as getProviderDescription, O as deduplicateTestCases, S as extractVariablesFromTemplate, a as ComparisonEvalNotFoundError, b as renderEnvOnlyInObject, c as mergeComparisonTables, f as maybeLoadConfigFromExternalFile, i as writeOutput, j as resultIsForTestCase, k as filterRuntimeVars, l as fetchCsvFromGoogleSheet, n as createOutputMetadata, o as evalTableToJson, p as maybeLoadFromExternalFile, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, v as readFilters, w as getNunjucksEngine, y as readOutput } from "./util-DhvhJDlT.js";
|
|
16
16
|
import { r as importModule, t as getDirectory } from "./esm-Bwuh5HXW.js";
|
|
17
17
|
import { r as runPython } from "./pythonUtils-mprm8p4h.js";
|
|
18
18
|
import "./transform-BzepVlkX.js";
|
|
19
|
-
import { A as BeavertailsPlugin, C as HarmbenchPlugin, D as DebugAccessPlugin, E as DivergentRepetitionPlugin, F as retryWithDeduplication, I as sampleArray, L as fetchHuggingFaceDataset, M as RedteamGraderBase, N as RedteamPluginBase, O as CrossSessionLeakPlugin, P as getCustomPolicies, S as ImitationPlugin, T as ExcessiveAgencyPlugin, _ as makeInlinePolicyIdSync, a as UnverifiableClaimsPlugin, b as OverreliancePlugin, c as ToolDiscoveryPlugin, ct as readProviderPromptMap, d as RbacPlugin, f as PromptExtractionPlugin, g as isValidPolicyObject, h as determinePolicyTypeFromId, i as VLGuardPlugin, it as getDefaultProviders, j as AegisPlugin, k as ContractPlugin, l as SqlInjectionPlugin, m as PolicyPlugin, o as UnsafeBenchPlugin, ot as processPrompts, p as PoliticsPlugin, r as VLSUPlugin, rt as doRemoteGrading, s as ToxicChatPlugin, st as readPrompts, t as GRADERS, u as ShellInjectionPlugin, v as PlinyPlugin, w as HallucinationPlugin, x as IntentPlugin, y as getPiiLeakTestsForCategory } from "./graders-
|
|
20
|
-
import { i as generateIdFromPrompt } from "./utils-
|
|
21
|
-
import { i as sha256, n as isBlobStorageEnabled, r as randomSequence } from "./extractor-
|
|
19
|
+
import { A as BeavertailsPlugin, C as HarmbenchPlugin, D as DebugAccessPlugin, E as DivergentRepetitionPlugin, F as retryWithDeduplication, I as sampleArray, L as fetchHuggingFaceDataset, M as RedteamGraderBase, N as RedteamPluginBase, O as CrossSessionLeakPlugin, P as getCustomPolicies, S as ImitationPlugin, T as ExcessiveAgencyPlugin, _ as makeInlinePolicyIdSync, a as UnverifiableClaimsPlugin, b as OverreliancePlugin, c as ToolDiscoveryPlugin, ct as readProviderPromptMap, d as RbacPlugin, f as PromptExtractionPlugin, g as isValidPolicyObject, h as determinePolicyTypeFromId, i as VLGuardPlugin, it as getDefaultProviders, j as AegisPlugin, k as ContractPlugin, l as SqlInjectionPlugin, m as PolicyPlugin, o as UnsafeBenchPlugin, ot as processPrompts, p as PoliticsPlugin, r as VLSUPlugin, rt as doRemoteGrading, s as ToxicChatPlugin, st as readPrompts, t as GRADERS, u as ShellInjectionPlugin, v as PlinyPlugin, w as HallucinationPlugin, x as IntentPlugin, y as getPiiLeakTestsForCategory } from "./graders-DcCxY5P5.js";
|
|
20
|
+
import { i as generateIdFromPrompt } from "./utils-D_6Jp7Sl.js";
|
|
21
|
+
import { i as sha256, n as isBlobStorageEnabled, r as randomSequence } from "./extractor-BEtCUN8X.js";
|
|
22
22
|
import "./genaiTracer-CqNnnXrE.js";
|
|
23
|
-
import "./chat-
|
|
23
|
+
import "./chat-Bvvgb30H.js";
|
|
24
24
|
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-C9fIWtvn.js";
|
|
25
|
-
import "./transform-
|
|
26
|
-
import "./messages-
|
|
27
|
-
import "./util-
|
|
25
|
+
import "./transform-DFkyOjQ7.js";
|
|
26
|
+
import "./messages-HEhixC8X.js";
|
|
27
|
+
import "./util-4LvhctH4.js";
|
|
28
28
|
import "./openai-D_5h8sBa.js";
|
|
29
|
-
import "./util-
|
|
30
|
-
import "./completion-
|
|
31
|
-
import { n as getBlobUrl, t as getBlobByHash } from "./blobs-
|
|
32
|
-
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-
|
|
29
|
+
import "./util-ImmJ_Dxe.js";
|
|
30
|
+
import "./completion-2IaHHyEe.js";
|
|
31
|
+
import { n as getBlobUrl, t as getBlobByHash } from "./blobs-eYQ1PJ-z.js";
|
|
32
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-DchWyfYe.js";
|
|
33
33
|
import { n as escapeRegExp, t as ellipsize } from "./text-C1cXk9ij.js";
|
|
34
|
-
import { n as getTraceStore } from "./store-
|
|
35
|
-
import "./base-
|
|
36
|
-
import "./image-
|
|
34
|
+
import { n as getTraceStore } from "./store-C7WtvRII.js";
|
|
35
|
+
import "./base-BMtzQZDN.js";
|
|
36
|
+
import "./image-CKdRHN-J.js";
|
|
37
37
|
import "./providerRegistry-gU7gOAJI.js";
|
|
38
38
|
import "./rubyUtils-Cus5EGPX.js";
|
|
39
|
-
import { c as readAssertions, l as renderMetricName, o as doesPromptRefMatch, s as assertions_default, t as evaluate$1, u as runAssertions } from "./evaluator-
|
|
40
|
-
import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-
|
|
41
|
-
import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-
|
|
42
|
-
import { t as EvalResult } from "./evalResult-
|
|
39
|
+
import { c as readAssertions, l as renderMetricName, o as doesPromptRefMatch, s as assertions_default, t as evaluate$1, u as runAssertions } from "./evaluator-GYXFbA_Y.js";
|
|
40
|
+
import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-V7dUBr64.js";
|
|
41
|
+
import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-nWuy1AG2.js";
|
|
42
|
+
import { t as EvalResult } from "./evalResult-DIIXLdjz.js";
|
|
43
43
|
import { t as formatDuration } from "./formatDuration-CGa0ZDQ8.js";
|
|
44
44
|
import { n as shouldUseInkUI } from "./interactiveCheck-BZbMvE6i.js";
|
|
45
45
|
import { fileURLToPath } from "node:url";
|
|
@@ -114,7 +114,7 @@ function runCommand(program) {
|
|
|
114
114
|
hasGithubPr: !!cmdObj.githubPr,
|
|
115
115
|
hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
|
|
116
116
|
});
|
|
117
|
-
const { executeScan } = await import("./scanner-
|
|
117
|
+
const { executeScan } = await import("./scanner-CMWzx1QH.js");
|
|
118
118
|
await executeScan(repoPath, cmdObj);
|
|
119
119
|
});
|
|
120
120
|
}
|
|
@@ -3289,60 +3289,101 @@ async function deleteErrorResults(resultIds) {
|
|
|
3289
3289
|
await getDb().delete(evalResultsTable).where(inArray(evalResultsTable.id, resultIds));
|
|
3290
3290
|
logger_default.debug(`Deleted ${resultIds.length} error results from database`);
|
|
3291
3291
|
}
|
|
3292
|
+
const RECALCULATE_BATCH_SIZE = 1e3;
|
|
3292
3293
|
/**
|
|
3293
|
-
* Recalculates prompt metrics based on current results after ERROR results have been deleted
|
|
3294
|
+
* Recalculates prompt metrics based on current results after ERROR results have been deleted.
|
|
3295
|
+
* Uses streaming batched iteration to avoid OOM with large evaluations (40K+ results).
|
|
3294
3296
|
*/
|
|
3295
3297
|
async function recalculatePromptMetrics(evalRecord) {
|
|
3296
3298
|
logger_default.debug("Recalculating prompt metrics after deleting ERROR results");
|
|
3297
|
-
|
|
3299
|
+
const startTime = Date.now();
|
|
3300
|
+
let batchNumber = 0;
|
|
3301
|
+
let totalProcessed = 0;
|
|
3298
3302
|
const promptMetricsMap = /* @__PURE__ */ new Map();
|
|
3299
|
-
for (const
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
|
|
3312
|
-
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3333
|
-
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
3303
|
+
for (const [promptIdx] of evalRecord.prompts.entries()) promptMetricsMap.set(promptIdx, {
|
|
3304
|
+
score: 0,
|
|
3305
|
+
testPassCount: 0,
|
|
3306
|
+
testFailCount: 0,
|
|
3307
|
+
testErrorCount: 0,
|
|
3308
|
+
assertPassCount: 0,
|
|
3309
|
+
assertFailCount: 0,
|
|
3310
|
+
totalLatencyMs: 0,
|
|
3311
|
+
tokenUsage: createEmptyTokenUsage(),
|
|
3312
|
+
namedScores: {},
|
|
3313
|
+
namedScoresCount: {},
|
|
3314
|
+
cost: 0
|
|
3315
|
+
});
|
|
3316
|
+
let currentResultId;
|
|
3317
|
+
try {
|
|
3318
|
+
for await (const batch of evalRecord.fetchResultsBatched(RECALCULATE_BATCH_SIZE)) {
|
|
3319
|
+
batchNumber++;
|
|
3320
|
+
logger_default.debug(`Processing batch ${batchNumber} with ${batch.length} results`);
|
|
3321
|
+
for (const result of batch) {
|
|
3322
|
+
currentResultId = result.id;
|
|
3323
|
+
const metrics = promptMetricsMap.get(result.promptIdx);
|
|
3324
|
+
if (!metrics) {
|
|
3325
|
+
logger_default.debug(`Skipping result with invalid promptIdx: ${result.promptIdx}`, {
|
|
3326
|
+
resultId: result.id,
|
|
3327
|
+
evalId: evalRecord.id
|
|
3328
|
+
});
|
|
3329
|
+
continue;
|
|
3330
|
+
}
|
|
3331
|
+
if (result.success) metrics.testPassCount++;
|
|
3332
|
+
else if (result.failureReason === ResultFailureReason.ERROR) metrics.testErrorCount++;
|
|
3333
|
+
else metrics.testFailCount++;
|
|
3334
|
+
metrics.score += result.score ?? 0;
|
|
3335
|
+
metrics.totalLatencyMs += result.latencyMs || 0;
|
|
3336
|
+
metrics.cost += result.cost || 0;
|
|
3337
|
+
for (const [key, value] of Object.entries(result.namedScores || {})) {
|
|
3338
|
+
metrics.namedScores[key] = (metrics.namedScores[key] || 0) + value;
|
|
3339
|
+
const testVars = result.testCase?.vars || {};
|
|
3340
|
+
let contributingAssertions = 0;
|
|
3341
|
+
result.gradingResult?.componentResults?.forEach((componentResult) => {
|
|
3342
|
+
if (renderMetricName(componentResult.assertion?.metric, testVars) === key) contributingAssertions++;
|
|
3343
|
+
});
|
|
3344
|
+
metrics.namedScoresCount[key] = (metrics.namedScoresCount[key] || 0) + (contributingAssertions || 1);
|
|
3345
|
+
}
|
|
3346
|
+
if (result.gradingResult?.componentResults) {
|
|
3347
|
+
metrics.assertPassCount += result.gradingResult.componentResults.filter((r) => r.pass).length;
|
|
3348
|
+
metrics.assertFailCount += result.gradingResult.componentResults.filter((r) => !r.pass).length;
|
|
3349
|
+
}
|
|
3350
|
+
if (result.response?.tokenUsage) accumulateResponseTokenUsage(metrics.tokenUsage, { tokenUsage: result.response.tokenUsage });
|
|
3351
|
+
if (result.gradingResult?.tokensUsed) {
|
|
3352
|
+
if (!metrics.tokenUsage.assertions) metrics.tokenUsage.assertions = createEmptyAssertions();
|
|
3353
|
+
accumulateAssertionTokenUsage(metrics.tokenUsage.assertions, result.gradingResult.tokensUsed);
|
|
3354
|
+
}
|
|
3355
|
+
}
|
|
3356
|
+
totalProcessed += batch.length;
|
|
3341
3357
|
}
|
|
3358
|
+
} catch (error) {
|
|
3359
|
+
logger_default.error("Error during batched metrics recalculation", {
|
|
3360
|
+
phase: "calculation",
|
|
3361
|
+
batchNumber,
|
|
3362
|
+
totalProcessed,
|
|
3363
|
+
currentResultId,
|
|
3364
|
+
evalId: evalRecord.id,
|
|
3365
|
+
error
|
|
3366
|
+
});
|
|
3367
|
+
throw error;
|
|
3342
3368
|
}
|
|
3343
3369
|
for (const [promptIdx, newMetrics] of promptMetricsMap.entries()) if (promptIdx < evalRecord.prompts.length) evalRecord.prompts[promptIdx].metrics = newMetrics;
|
|
3344
|
-
if (evalRecord.persisted)
|
|
3345
|
-
|
|
3370
|
+
if (evalRecord.persisted) try {
|
|
3371
|
+
await evalRecord.addPrompts(evalRecord.prompts);
|
|
3372
|
+
} catch (error) {
|
|
3373
|
+
logger_default.error("Error saving recalculated prompt metrics", {
|
|
3374
|
+
phase: "save",
|
|
3375
|
+
evalId: evalRecord.id,
|
|
3376
|
+
promptCount: evalRecord.prompts.length,
|
|
3377
|
+
error
|
|
3378
|
+
});
|
|
3379
|
+
throw error;
|
|
3380
|
+
}
|
|
3381
|
+
const durationMs = Date.now() - startTime;
|
|
3382
|
+
logger_default.debug("Prompt metrics recalculation completed", {
|
|
3383
|
+
totalBatches: batchNumber,
|
|
3384
|
+
totalResults: totalProcessed,
|
|
3385
|
+
durationMs
|
|
3386
|
+
});
|
|
3346
3387
|
}
|
|
3347
3388
|
/**
|
|
3348
3389
|
* Main retry function
|
|
@@ -5104,7 +5145,7 @@ async function doGenerateAssertions(options) {
|
|
|
5104
5145
|
let testSuite;
|
|
5105
5146
|
const configPath = options.config || options.defaultConfigPath;
|
|
5106
5147
|
if (configPath) testSuite = (await resolveConfigs({ config: [configPath] }, options.defaultConfig, "AssertionGeneration")).testSuite;
|
|
5107
|
-
else throw new Error(
|
|
5148
|
+
else throw new Error(`Could not find a config file. Pass --config path/to/promptfooconfig.yaml or run "${promptfooCommand("init")}" to create one.`);
|
|
5108
5149
|
const startTime = Date.now();
|
|
5109
5150
|
telemetry_default.record("command_used", {
|
|
5110
5151
|
name: "generate_assertions - started",
|
|
@@ -5292,7 +5333,7 @@ async function doGenerateDataset(options) {
|
|
|
5292
5333
|
let testSuite;
|
|
5293
5334
|
const configPath = options.config || options.defaultConfigPath;
|
|
5294
5335
|
if (configPath) testSuite = (await resolveConfigs({ config: [configPath] }, options.defaultConfig, "DatasetGeneration")).testSuite;
|
|
5295
|
-
else throw new Error(
|
|
5336
|
+
else throw new Error(`Could not find a config file. Pass --config path/to/promptfooconfig.yaml or run "${promptfooCommand("init")}" to create one.`);
|
|
5296
5337
|
const startTime = Date.now();
|
|
5297
5338
|
telemetry_default.record("command_used", {
|
|
5298
5339
|
name: "generate_dataset - started",
|
|
@@ -7192,11 +7233,6 @@ async function synthesize({ abortSignal, delay, entities: entitiesOverride, inje
|
|
|
7192
7233
|
});
|
|
7193
7234
|
await validateStrategies(strategies);
|
|
7194
7235
|
await validateSharpDependency(strategies, plugins);
|
|
7195
|
-
if (strategies.some((s) => isLanguageDisallowedStrategy(s.id)) && language) {
|
|
7196
|
-
const originalLanguage = Array.isArray(language) ? language.join(", ") : language;
|
|
7197
|
-
language = "en";
|
|
7198
|
-
logger_default.info(`[Language Override] Detected language-disallowed strategy (audio/video/image/layer/math-prompt). Forcing language to 'en' (was: ${originalLanguage})`);
|
|
7199
|
-
}
|
|
7200
7236
|
const redteamProvider = await redteamProviderManager.getProvider({ provider });
|
|
7201
7237
|
const { effectiveStrategyCount, includeBasicTests, totalPluginTests, totalTests } = calculateTotalTests(plugins, strategies, language);
|
|
7202
7238
|
logger_default.info(`Synthesizing test cases for ${prompts.length} ${prompts.length === 1 ? "prompt" : "prompts"}...\nUsing plugins:\n\n${chalk.yellow(plugins.map((p) => {
|
|
@@ -13072,14 +13108,15 @@ async function startServer(port = getDefaultPort(), browserBehavior = BrowserBeh
|
|
|
13072
13108
|
const results = await updatedEval?.getResultsCount();
|
|
13073
13109
|
if (results && results > 0) {
|
|
13074
13110
|
logger_default.debug(`Emitting update for eval: ${updatedEval?.config?.description || updatedEval?.id || "unknown"}`);
|
|
13075
|
-
io.emit("update", updatedEval);
|
|
13111
|
+
io.emit("update", { evalId: updatedEval?.id });
|
|
13076
13112
|
allPrompts = null;
|
|
13077
13113
|
}
|
|
13078
13114
|
};
|
|
13079
13115
|
handleSignalUpdate();
|
|
13080
13116
|
});
|
|
13081
13117
|
io.on("connection", async (socket) => {
|
|
13082
|
-
|
|
13118
|
+
const latestEval = await Eval.latest();
|
|
13119
|
+
socket.emit("init", latestEval ? { evalId: latestEval.id } : null);
|
|
13083
13120
|
});
|
|
13084
13121
|
return new Promise((resolve) => {
|
|
13085
13122
|
httpServer.listen(port, () => {
|
|
@@ -16641,8 +16678,8 @@ function registerRunEvaluationTool(server) {
|
|
|
16641
16678
|
if (filteredProviders.length === 0) return createToolResponse("run_evaluation", false, void 0, `No providers matched filter: ${filters.join(", ")}. Available providers: ${providers.map((p) => typeof p.id === "function" ? p.id() : p.id).join(", ")}`);
|
|
16642
16679
|
filteredTestSuite.providers = filteredProviders;
|
|
16643
16680
|
}
|
|
16644
|
-
const { evaluate } = await import("./evaluator-
|
|
16645
|
-
const evalRecord = await (await import("./eval-
|
|
16681
|
+
const { evaluate } = await import("./evaluator-8HYi1Fph.js");
|
|
16682
|
+
const evalRecord = await (await import("./eval-D4RlVjjL.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
|
|
16646
16683
|
logger_default.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
|
|
16647
16684
|
const startTime = Date.now();
|
|
16648
16685
|
const result = await evaluate(filteredTestSuite, evalRecord, {
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { b as getEnvString, i as logger_default, v as getEnvFloat, y as getEnvInt } from "./logger-q5I8CByj.js";
|
|
2
|
-
import { d as maybeLoadToolsFromExternalFile, u as maybeLoadResponseFormatFromExternalFile } from "./util-
|
|
3
|
-
import { C as transformTools, S as transformToolChoice } from "./fetch-
|
|
4
|
-
import { o as getCache, s as isCacheEnabled } from "./cache-
|
|
2
|
+
import { d as maybeLoadToolsFromExternalFile, u as maybeLoadResponseFormatFromExternalFile } from "./util-07lmo96G.js";
|
|
3
|
+
import { C as transformTools, S as transformToolChoice } from "./fetch-Cn7G4hKv.js";
|
|
4
|
+
import { o as getCache, s as isCacheEnabled } from "./cache-CkRahBA6.js";
|
|
5
5
|
import { n as withGenAISpan } from "./genaiTracer-CNawybn-.js";
|
|
6
|
-
import { i as normalizeFinishReason, n as MCPClient } from "./chat-
|
|
6
|
+
import { i as normalizeFinishReason, n as MCPClient } from "./chat-Cz8uiybK.js";
|
|
7
7
|
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-BQVp2uKF.js";
|
|
8
|
-
import { n as transformMCPToolsToAnthropic } from "./transform-
|
|
9
|
-
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-
|
|
8
|
+
import { n as transformMCPToolsToAnthropic } from "./transform-yRqaT_HG.js";
|
|
9
|
+
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-7G7p_iRM.js";
|
|
10
10
|
import Anthropic, { APIError } from "@anthropic-ai/sdk";
|
|
11
11
|
|
|
12
12
|
//#region src/providers/anthropic/generic.ts
|
|
@@ -137,7 +137,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
137
137
|
...allTools.length > 0 ? { tools: allTools } : {},
|
|
138
138
|
...config.tool_choice ? { tool_choice: transformToolChoice(config.tool_choice, "anthropic") } : {},
|
|
139
139
|
...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
|
|
140
|
-
...processedOutputFormat ? {
|
|
140
|
+
...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
|
|
141
141
|
...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
|
|
142
142
|
};
|
|
143
143
|
logger_default.debug("Calling Anthropic Messages API", { params });
|
|
@@ -233,4 +233,4 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
233
233
|
|
|
234
234
|
//#endregion
|
|
235
235
|
export { AnthropicGenericProvider as n, AnthropicMessagesProvider as t };
|
|
236
|
-
//# sourceMappingURL=messages-
|
|
236
|
+
//# sourceMappingURL=messages-CrKRl5Pk.js.map
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
const require_logger = require('./logger-CMp-NS-e.cjs');
|
|
2
|
-
const require_util = require('./util-
|
|
3
|
-
const require_fetch = require('./fetch-
|
|
4
|
-
const require_cache = require('./cache-
|
|
2
|
+
const require_util = require('./util-CxWghxGa.cjs');
|
|
3
|
+
const require_fetch = require('./fetch-E3xbPP9w.cjs');
|
|
4
|
+
const require_cache = require('./cache-PWdK7nbt.cjs');
|
|
5
5
|
const require_genaiTracer = require('./genaiTracer-zK0Mtd_D.cjs');
|
|
6
|
-
const require_chat = require('./chat-
|
|
6
|
+
const require_chat = require('./chat-DKNIBnnw.cjs');
|
|
7
7
|
const require_tokenUsageUtils = require('./tokenUsageUtils-DFp2VFtG.cjs');
|
|
8
|
-
const require_transform = require('./transform-
|
|
9
|
-
const require_util$1 = require('./util-
|
|
8
|
+
const require_transform = require('./transform-Bow1252l.cjs');
|
|
9
|
+
const require_util$1 = require('./util-9SFCdUzk.cjs');
|
|
10
10
|
let _anthropic_ai_sdk = require("@anthropic-ai/sdk");
|
|
11
11
|
_anthropic_ai_sdk = require_logger.__toESM(_anthropic_ai_sdk);
|
|
12
12
|
|
|
@@ -138,7 +138,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
138
138
|
...allTools.length > 0 ? { tools: allTools } : {},
|
|
139
139
|
...config.tool_choice ? { tool_choice: require_fetch.transformToolChoice(config.tool_choice, "anthropic") } : {},
|
|
140
140
|
...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
|
|
141
|
-
...processedOutputFormat ? {
|
|
141
|
+
...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
|
|
142
142
|
...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
|
|
143
143
|
};
|
|
144
144
|
require_logger.logger_default.debug("Calling Anthropic Messages API", { params });
|
|
@@ -245,4 +245,4 @@ Object.defineProperty(exports, 'AnthropicMessagesProvider', {
|
|
|
245
245
|
return AnthropicMessagesProvider;
|
|
246
246
|
}
|
|
247
247
|
});
|
|
248
|
-
//# sourceMappingURL=messages-
|
|
248
|
+
//# sourceMappingURL=messages-DR9VV_jC.cjs.map
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { C as getEnvString, S as getEnvInt, o as logger_default, x as getEnvFloat } from "./logger-Bzi5o47S.js";
|
|
3
|
-
import { _ as transformToolChoice, v as transformTools } from "./fetch-
|
|
4
|
-
import { o as getCache, s as isCacheEnabled } from "./cache-
|
|
5
|
-
import { g as maybeLoadToolsFromExternalFile, h as maybeLoadResponseFormatFromExternalFile } from "./util-
|
|
3
|
+
import { _ as transformToolChoice, v as transformTools } from "./fetch-CruohpfF.js";
|
|
4
|
+
import { o as getCache, s as isCacheEnabled } from "./cache-_erPRF2X.js";
|
|
5
|
+
import { g as maybeLoadToolsFromExternalFile, h as maybeLoadResponseFormatFromExternalFile } from "./util-DhvhJDlT.js";
|
|
6
6
|
import { n as withGenAISpan } from "./genaiTracer-CqNnnXrE.js";
|
|
7
|
-
import { i as normalizeFinishReason, n as MCPClient } from "./chat-
|
|
7
|
+
import { i as normalizeFinishReason, n as MCPClient } from "./chat-Bvvgb30H.js";
|
|
8
8
|
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-C9fIWtvn.js";
|
|
9
|
-
import { n as transformMCPToolsToAnthropic } from "./transform-
|
|
10
|
-
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-
|
|
9
|
+
import { n as transformMCPToolsToAnthropic } from "./transform-DFkyOjQ7.js";
|
|
10
|
+
import { a as parseMessages, i as outputFromMessage, n as calculateAnthropicCost, o as processAnthropicTools, r as getTokenUsage, t as ANTHROPIC_MODELS } from "./util-4LvhctH4.js";
|
|
11
11
|
import Anthropic, { APIError } from "@anthropic-ai/sdk";
|
|
12
12
|
|
|
13
13
|
//#region src/providers/anthropic/generic.ts
|
|
@@ -138,7 +138,7 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
138
138
|
...allTools.length > 0 ? { tools: allTools } : {},
|
|
139
139
|
...config.tool_choice ? { tool_choice: transformToolChoice(config.tool_choice, "anthropic") } : {},
|
|
140
140
|
...config.thinking || thinking ? { thinking: config.thinking || thinking } : {},
|
|
141
|
-
...processedOutputFormat ? {
|
|
141
|
+
...processedOutputFormat ? { output_config: { format: processedOutputFormat } } : {},
|
|
142
142
|
...typeof config?.extra_body === "object" && config.extra_body ? config.extra_body : {}
|
|
143
143
|
};
|
|
144
144
|
logger_default.debug("Calling Anthropic Messages API", { params });
|
|
@@ -234,4 +234,4 @@ var AnthropicMessagesProvider = class AnthropicMessagesProvider extends Anthropi
|
|
|
234
234
|
|
|
235
235
|
//#endregion
|
|
236
236
|
export { AnthropicGenericProvider as n, AnthropicMessagesProvider as t };
|
|
237
|
-
//# sourceMappingURL=messages-
|
|
237
|
+
//# sourceMappingURL=messages-HEhixC8X.js.map
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import { i as logger_default } from "./logger-q5I8CByj.js";
|
|
2
|
-
import "./types-
|
|
3
|
-
import "./fetch-
|
|
2
|
+
import "./types-l7snZgzv.js";
|
|
3
|
+
import "./fetch-Cn7G4hKv.js";
|
|
4
4
|
import { n as sleep } from "./time-CRRzn6vA.js";
|
|
5
|
-
import "./accounts-
|
|
6
|
-
import { r as storeBlob } from "./blobs-
|
|
7
|
-
import "./tables-
|
|
8
|
-
import "./telemetry-
|
|
5
|
+
import "./accounts-BR9WiyfR.js";
|
|
6
|
+
import { r as storeBlob } from "./blobs-CNXTPz_3.js";
|
|
7
|
+
import "./tables-0EDD8777.js";
|
|
8
|
+
import "./telemetry-J9g1ySRA.js";
|
|
9
9
|
import { t as ellipsize } from "./text-Dm78AVGG.js";
|
|
10
|
-
import { t as AwsBedrockGenericProvider } from "./base-
|
|
10
|
+
import { t as AwsBedrockGenericProvider } from "./base-DGXF05vt.js";
|
|
11
11
|
import * as fs$1 from "fs";
|
|
12
12
|
import * as path$1 from "path";
|
|
13
13
|
|
|
@@ -279,4 +279,4 @@ var NovaReelVideoProvider = class extends AwsBedrockGenericProvider {
|
|
|
279
279
|
|
|
280
280
|
//#endregion
|
|
281
281
|
export { NovaReelVideoProvider };
|
|
282
|
-
//# sourceMappingURL=nova-reel-
|
|
282
|
+
//# sourceMappingURL=nova-reel-B0gtacpW.js.map
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { o as logger_default } from "./logger-Bzi5o47S.js";
|
|
3
|
-
import "./fetch-
|
|
4
|
-
import "./accounts-
|
|
3
|
+
import "./fetch-CruohpfF.js";
|
|
4
|
+
import "./accounts-Ckp-8szu.js";
|
|
5
5
|
import { n as sleep } from "./time-CoOwN3kQ.js";
|
|
6
|
-
import "./cloud-
|
|
7
|
-
import "./telemetry-
|
|
8
|
-
import "./types-
|
|
9
|
-
import { i as storeBlob } from "./blobs-
|
|
10
|
-
import "./tables-
|
|
6
|
+
import "./cloud-CZHXB86j.js";
|
|
7
|
+
import "./telemetry-Ar0--jvU.js";
|
|
8
|
+
import "./types-BjfZC_QB.js";
|
|
9
|
+
import { i as storeBlob } from "./blobs-eYQ1PJ-z.js";
|
|
10
|
+
import "./tables-DchWyfYe.js";
|
|
11
11
|
import { t as ellipsize } from "./text-C1cXk9ij.js";
|
|
12
|
-
import { t as AwsBedrockGenericProvider } from "./base-
|
|
12
|
+
import { t as AwsBedrockGenericProvider } from "./base-BMtzQZDN.js";
|
|
13
13
|
import * as fs$1 from "fs";
|
|
14
14
|
import * as path$1 from "path";
|
|
15
15
|
|
|
@@ -281,4 +281,4 @@ var NovaReelVideoProvider = class extends AwsBedrockGenericProvider {
|
|
|
281
281
|
|
|
282
282
|
//#endregion
|
|
283
283
|
export { NovaReelVideoProvider };
|
|
284
|
-
//# sourceMappingURL=nova-reel-
|
|
284
|
+
//# sourceMappingURL=nova-reel-Bq0thILx.js.map
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
const require_logger = require('./logger-CMp-NS-e.cjs');
|
|
2
|
-
require('./types-
|
|
3
|
-
require('./fetch-
|
|
2
|
+
require('./types-Cixl-wRO.cjs');
|
|
3
|
+
require('./fetch-E3xbPP9w.cjs');
|
|
4
4
|
const require_time = require('./time-CbtsO5_a.cjs');
|
|
5
|
-
require('./accounts-
|
|
6
|
-
const require_blobs = require('./blobs-
|
|
7
|
-
require('./tables-
|
|
8
|
-
require('./telemetry-
|
|
5
|
+
require('./accounts-BGa3oLcq.cjs');
|
|
6
|
+
const require_blobs = require('./blobs-h2nW1G8J.cjs');
|
|
7
|
+
require('./tables-B5ootZ1x.cjs');
|
|
8
|
+
require('./telemetry-DMNm_KFn.cjs');
|
|
9
9
|
const require_text = require('./text-DF2hMKdg.cjs');
|
|
10
|
-
const require_base = require('./base-
|
|
10
|
+
const require_base = require('./base-oCiJ630X.cjs');
|
|
11
11
|
let fs = require("fs");
|
|
12
12
|
fs = require_logger.__toESM(fs);
|
|
13
13
|
let path = require("path");
|
|
@@ -281,4 +281,4 @@ var NovaReelVideoProvider = class extends require_base.AwsBedrockGenericProvider
|
|
|
281
281
|
|
|
282
282
|
//#endregion
|
|
283
283
|
exports.NovaReelVideoProvider = NovaReelVideoProvider;
|
|
284
|
-
//# sourceMappingURL=nova-reel-
|
|
284
|
+
//# sourceMappingURL=nova-reel-DGvKrmSN.cjs.map
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { i as logger_default } from "./logger-q5I8CByj.js";
|
|
2
|
-
import "./fetch-
|
|
2
|
+
import "./fetch-Cn7G4hKv.js";
|
|
3
3
|
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-BQVp2uKF.js";
|
|
4
|
-
import "./accounts-
|
|
5
|
-
import "./telemetry-
|
|
6
|
-
import { t as AwsBedrockGenericProvider } from "./base-
|
|
4
|
+
import "./accounts-BR9WiyfR.js";
|
|
5
|
+
import "./telemetry-J9g1ySRA.js";
|
|
6
|
+
import { t as AwsBedrockGenericProvider } from "./base-DGXF05vt.js";
|
|
7
7
|
import { Buffer } from "node:buffer";
|
|
8
8
|
import { Subject, firstValueFrom } from "rxjs";
|
|
9
9
|
import { take } from "rxjs/operators";
|
|
@@ -364,4 +364,4 @@ var NovaSonicProvider = class extends AwsBedrockGenericProvider {
|
|
|
364
364
|
|
|
365
365
|
//#endregion
|
|
366
366
|
export { NovaSonicProvider };
|
|
367
|
-
//# sourceMappingURL=nova-sonic-
|
|
367
|
+
//# sourceMappingURL=nova-sonic-CjysF4hQ.js.map
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { o as logger_default } from "./logger-Bzi5o47S.js";
|
|
3
|
-
import "./fetch-
|
|
4
|
-
import "./accounts-
|
|
5
|
-
import "./cloud-
|
|
6
|
-
import "./telemetry-
|
|
3
|
+
import "./fetch-CruohpfF.js";
|
|
4
|
+
import "./accounts-Ckp-8szu.js";
|
|
5
|
+
import "./cloud-CZHXB86j.js";
|
|
6
|
+
import "./telemetry-Ar0--jvU.js";
|
|
7
7
|
import { a as createEmptyTokenUsage } from "./tokenUsageUtils-C9fIWtvn.js";
|
|
8
|
-
import { t as AwsBedrockGenericProvider } from "./base-
|
|
8
|
+
import { t as AwsBedrockGenericProvider } from "./base-BMtzQZDN.js";
|
|
9
9
|
import { Buffer } from "node:buffer";
|
|
10
10
|
import { Subject, firstValueFrom } from "rxjs";
|
|
11
11
|
import { take } from "rxjs/operators";
|
|
@@ -366,4 +366,4 @@ var NovaSonicProvider = class extends AwsBedrockGenericProvider {
|
|
|
366
366
|
|
|
367
367
|
//#endregion
|
|
368
368
|
export { NovaSonicProvider };
|
|
369
|
-
//# sourceMappingURL=nova-sonic-
|
|
369
|
+
//# sourceMappingURL=nova-sonic-D5qOssIP.js.map
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
const require_logger = require('./logger-CMp-NS-e.cjs');
|
|
2
|
-
require('./fetch-
|
|
2
|
+
require('./fetch-E3xbPP9w.cjs');
|
|
3
3
|
const require_tokenUsageUtils = require('./tokenUsageUtils-DFp2VFtG.cjs');
|
|
4
|
-
require('./accounts-
|
|
5
|
-
require('./telemetry-
|
|
6
|
-
const require_base = require('./base-
|
|
4
|
+
require('./accounts-BGa3oLcq.cjs');
|
|
5
|
+
require('./telemetry-DMNm_KFn.cjs');
|
|
6
|
+
const require_base = require('./base-oCiJ630X.cjs');
|
|
7
7
|
let node_buffer = require("node:buffer");
|
|
8
8
|
let rxjs = require("rxjs");
|
|
9
9
|
let rxjs_operators = require("rxjs/operators");
|
|
@@ -364,4 +364,4 @@ var NovaSonicProvider = class extends require_base.AwsBedrockGenericProvider {
|
|
|
364
364
|
|
|
365
365
|
//#endregion
|
|
366
366
|
exports.NovaSonicProvider = NovaSonicProvider;
|
|
367
|
-
//# sourceMappingURL=nova-sonic-
|
|
367
|
+
//# sourceMappingURL=nova-sonic-a0F7idWe.cjs.map
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import "./chunk-FhC4c-0y.js";
|
|
2
2
|
import { b as getEnvString, i as logger_default, t as getLogLevel, w as cliState_default } from "./logger-q5I8CByj.js";
|
|
3
3
|
import { r as importModule } from "./esm-D40XsWlR.js";
|
|
4
|
-
import "./fetch-
|
|
5
|
-
import "./cache-
|
|
6
|
-
import { n as getCachedResponse, r as initializeAgenticCache, t as cacheResponse } from "./cometapi-
|
|
4
|
+
import "./fetch-Cn7G4hKv.js";
|
|
5
|
+
import "./cache-CkRahBA6.js";
|
|
6
|
+
import { n as getCachedResponse, r as initializeAgenticCache, t as cacheResponse } from "./cometapi-DWXoEmta.js";
|
|
7
7
|
import { createRequire } from "node:module";
|
|
8
8
|
import fs from "fs";
|
|
9
9
|
import path from "path";
|
|
@@ -375,4 +375,4 @@ var OpenCodeSDKProvider = class {
|
|
|
375
375
|
|
|
376
376
|
//#endregion
|
|
377
377
|
export { OpenCodeSDKProvider };
|
|
378
|
-
//# sourceMappingURL=opencode-sdk-
|
|
378
|
+
//# sourceMappingURL=opencode-sdk-BJQlARp2.js.map
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import "./chunk-DHDDz29n.js";
|
|
3
3
|
import { C as getEnvString, O as cliState_default, n as getLogLevel, o as logger_default } from "./logger-Bzi5o47S.js";
|
|
4
|
-
import "./fetch-
|
|
5
|
-
import "./cloud-
|
|
6
|
-
import "./cache-
|
|
4
|
+
import "./fetch-CruohpfF.js";
|
|
5
|
+
import "./cloud-CZHXB86j.js";
|
|
6
|
+
import "./cache-_erPRF2X.js";
|
|
7
7
|
import { r as importModule } from "./esm-Bwuh5HXW.js";
|
|
8
|
-
import { n as getCachedResponse, r as initializeAgenticCache, t as cacheResponse } from "./agentic-utils-
|
|
8
|
+
import { n as getCachedResponse, r as initializeAgenticCache, t as cacheResponse } from "./agentic-utils-BE3QlHKy.js";
|
|
9
9
|
import { createRequire } from "node:module";
|
|
10
10
|
import fs from "fs";
|
|
11
11
|
import path from "path";
|
|
@@ -377,4 +377,4 @@ var OpenCodeSDKProvider = class {
|
|
|
377
377
|
|
|
378
378
|
//#endregion
|
|
379
379
|
export { OpenCodeSDKProvider };
|
|
380
|
-
//# sourceMappingURL=opencode-sdk-
|
|
380
|
+
//# sourceMappingURL=opencode-sdk-BsA1jKss.js.map
|