promptfoo 0.120.26 → 0.121.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/drizzle/0023_wooden_mandrill.sql +2 -0
- package/dist/drizzle/meta/0023_snapshot.json +1496 -0
- package/dist/drizzle/meta/_journal.json +7 -0
- package/dist/src/{ListApp-D3DG0F8h.js → ListApp-Du7YVwj5.js} +2 -4
- package/dist/src/accounts-BgNJDBE6.js +206 -0
- package/dist/src/{accounts-BzEY8H3v.cjs → accounts-Bx-x3bmW.cjs} +99 -80
- package/dist/src/{accounts-DHHiXsy6.js → accounts-CMqkzrVf.js} +61 -36
- package/dist/src/{accounts-R3gfCR_g.js → accounts-xrUGFA6n.js} +60 -35
- package/dist/src/{agentic-utils-D6_gzOUF.js → agentic-utils-BKIN5PKu.js} +9 -10
- package/dist/src/{agents-CwM7re15.cjs → agents-B0f4HICh.cjs} +37 -40
- package/dist/src/{agents-Cnph5GLD.js → agents-C-dDThPK.js} +37 -37
- package/dist/src/{agents-C7BiinFI.cjs → agents-CErsqg5U.cjs} +19 -27
- package/dist/src/{agents-v4cW_ZgC.js → agents-CVIn-Utx.js} +19 -22
- package/dist/src/{agents-GiUianme.js → agents-CXknwsFX.js} +37 -40
- package/dist/src/{agents-DETIQHqF.js → agents-DeH4Gu94.js} +21 -28
- package/dist/src/{agents-DYIT-hQy.js → agents-Dy2YpZpa.js} +38 -41
- package/dist/src/{agents-Cao4i7AX.js → agents-aF4-T121.js} +19 -30
- package/dist/src/{aimlapi-DMF6v_vb.js → aimlapi-BAGZDo5G.js} +16 -18
- package/dist/src/{aimlapi-CMJpKK-B.js → aimlapi-BNfTBexL.js} +15 -17
- package/dist/src/{aimlapi-DtSf1ykJ.js → aimlapi-DHRKlBEA.js} +15 -4
- package/dist/src/{aimlapi-DoGLcQW_.cjs → aimlapi-tg0Gkcvr.cjs} +15 -16
- package/dist/src/app/assets/index-BFCZg7hQ.js +439 -0
- package/dist/src/app/assets/index-NCn4eVBv.css +1 -0
- package/dist/src/app/assets/{vendor-charts-CYyo8R8v.js → vendor-charts-CCl15Imd.js} +1 -1
- package/dist/src/app/assets/{vendor-markdown-DSmzq4Jh.js → vendor-markdown-0tekx3KX.js} +1 -1
- package/dist/src/app/index.html +4 -34
- package/dist/src/{audio-DUH4q0Xq.js → audio-BRODU0UK.js} +7 -9
- package/dist/src/{audio-BWjyvHn9.cjs → audio-BWeaWovU.cjs} +6 -7
- package/dist/src/{audio-U580w8jM.js → audio-CHQ4r-RV.js} +6 -5
- package/dist/src/{audio-BrJBFN2b.js → audio-tf_NBjlC.js} +6 -8
- package/dist/src/base-B0tcrnq_.js +193 -0
- package/dist/src/base-B4QJRyFS.js +194 -0
- package/dist/src/base-DBtwl2FR.cjs +222 -0
- package/dist/src/base-fEDN28WM.js +193 -0
- package/dist/src/{blobs-kt8v3UyH.js → blobs-BAU-dXan.js} +9 -12
- package/dist/src/{blobs-C9J2mVgo.js → blobs-Bpg5rH6i.js} +9 -12
- package/dist/src/{blobs-673H0jCl.cjs → blobs-DvS-O6be.cjs} +34 -37
- package/dist/src/blobs-qTYm-1PY.js +236 -0
- package/dist/src/{cache-BLLayYEN.js → cache-8XhNqPKW.js} +64 -67
- package/dist/src/cache-Bbn1Nyrd.cjs +5 -0
- package/dist/src/cache-BwsMSda7.js +6 -0
- package/dist/src/{cache-mIszOnuz.js → cache-CG0SlR1d.js} +64 -66
- package/dist/src/{cache-7xULbvt3.cjs → cache-COish3-W.cjs} +114 -117
- package/dist/src/cache-D3eqDYGU.js +739 -0
- package/dist/src/{chat-Fl6TZJRS.cjs → chat-2K608PeQ.cjs} +20 -21
- package/dist/src/chat-BKm79wib.js +764 -0
- package/dist/src/{chat-XPN9YHhr.js → chat-CM_kyI8B.js} +20 -9
- package/dist/src/{chat-DIywASPG.js → chat-CRWNNq73.js} +49 -49
- package/dist/src/{chat-C8Ei4f87.js → chat-CznLWr_D.js} +49 -49
- package/dist/src/{chat-CgyGj2hC.js → chat-DHMH-N64.js} +20 -22
- package/dist/src/{chat-C4zqjObh.cjs → chat-DaqekjFr.cjs} +69 -69
- package/dist/src/{chat-Cpz3O-Xl.js → chat-DxysjBvt.js} +21 -23
- package/dist/src/{chatkit-Dpxrq4eD.js → chatkit-65VXf5SR.js} +58 -58
- package/dist/src/{chatkit-DIrJX8xk.js → chatkit-Be-Q-a9F.js} +58 -60
- package/dist/src/{chatkit-DEls11hE.js → chatkit-BxFvW8KY.js} +58 -60
- package/dist/src/{chatkit-e25Ziu17.cjs → chatkit-DKyPi1Gs.cjs} +58 -60
- package/dist/src/chunk-DEq-mXcV.js +15 -0
- package/dist/src/chunk-DRamLcfz.js +16 -0
- package/dist/src/{claude-agent-sdk-6-xTaLwM.js → claude-agent-sdk-BLTu0WBO.js} +45 -31
- package/dist/src/{claude-agent-sdk-BzxF6NIJ.cjs → claude-agent-sdk-CJH22shf.cjs} +44 -29
- package/dist/src/{claude-agent-sdk-CmkTnKGH.js → claude-agent-sdk-D6_k9FKA.js} +45 -33
- package/dist/src/{claude-agent-sdk-rXCBLK_o.js → claude-agent-sdk-Dy5lT-Tx.js} +46 -21
- package/dist/src/{cloud-BMbRVJFw.js → cloud-Bc9526yV.js} +32 -12
- package/dist/src/cloud-DmE0EwsY.js +4 -0
- package/dist/src/{cloudflare-ai-CUg4BTcj.js → cloudflare-ai-C9r2sRhw.js} +16 -18
- package/dist/src/{cloudflare-ai-Z9X219gp.js → cloudflare-ai-CWWJCRim.js} +16 -4
- package/dist/src/{cloudflare-ai-BAQ0u_dg.cjs → cloudflare-ai-ClWSdor4.cjs} +16 -17
- package/dist/src/{cloudflare-ai-CobxMTR4.js → cloudflare-ai-ICsOuD-z.js} +17 -19
- package/dist/src/{cloudflare-gateway-C0sgfr_z.cjs → cloudflare-gateway-C2_-KG5o.cjs} +21 -22
- package/dist/src/{cloudflare-gateway-_itGuXry.js → cloudflare-gateway-D6O7AlYb.js} +23 -23
- package/dist/src/{cloudflare-gateway-D2_yi-Fh.js → cloudflare-gateway-D6xFc5pa.js} +21 -25
- package/dist/src/{cloudflare-gateway-Djf3F3_H.js → cloudflare-gateway-pXGHxJ47.js} +26 -14
- package/dist/src/{codex-sdk-ibXwdglL.js → codex-sdk-C6UMlxwV.js} +49 -32
- package/dist/src/{codex-sdk-BASDNkIl.js → codex-sdk-DUwKWezN.js} +49 -30
- package/dist/src/{codex-sdk-dSnGdgIp.js → codex-sdk-GGAw0qbD.js} +49 -32
- package/dist/src/{codex-sdk-wTEpMM_X.cjs → codex-sdk-fAO0c3yA.cjs} +49 -32
- package/dist/src/{cometapi-B01btbfb.js → cometapi-BasUi7-_.js} +17 -19
- package/dist/src/{cometapi-DHUAH6nK.js → cometapi-Bbjp5V4x.js} +16 -4
- package/dist/src/{cometapi-ChAaRjg5.js → cometapi-DkXrKi5z.js} +21 -24
- package/dist/src/{cometapi-JbvOJSCO.cjs → cometapi-vY6aDZgo.cjs} +21 -22
- package/dist/src/{completion-D9_MDlnd.js → completion-6Mx_iXxK.js} +11 -13
- package/dist/src/{completion-BBJ6zmG3.js → completion-C5rtR_9P.js} +11 -13
- package/dist/src/{completion-DDyL3Cb2.cjs → completion-CDOouNzq.cjs} +21 -23
- package/dist/src/completion-C_P3ypkJ.js +120 -0
- package/dist/src/createHash-CTQmL3G2.js +15 -0
- package/dist/src/createHash-CfZSc0b4.cjs +27 -0
- package/dist/src/createHash-Da8fMwqB.js +16 -0
- package/dist/src/createHash-DmPQkvBh.js +15 -0
- package/dist/src/{docker-JAAubMw3.js → docker-5KcG-_86.js} +18 -20
- package/dist/src/{docker-Ckw-j7Rr.cjs → docker-BwsKwxFs.cjs} +18 -19
- package/dist/src/{docker-vnOg96gi.js → docker-CZnqU1XV.js} +18 -7
- package/dist/src/{docker-BuButc4D.js → docker-DzxyDPIj.js} +19 -21
- package/dist/src/entrypoint.js +2 -3
- package/dist/src/{errors-DnGCbnx8.js → errors-P6ll7XSJ.js} +2 -2
- package/dist/src/{esm-CYhseqj4.js → esm-C03C-mv3.js} +17 -20
- package/dist/src/{esm-rDtG_2rg.js → esm-CaIwzWR5.js} +18 -21
- package/dist/src/esm-Cd1AjG1D.js +379 -0
- package/dist/src/{esm-BQkx5roy.cjs → esm-CnNt7sI4.cjs} +47 -49
- package/dist/src/eval-17JizQIv.js +15 -0
- package/dist/src/{eval-CYrbG57o.js → eval-DmFyWU7i.js} +49 -55
- package/dist/src/{evalResult-COsVttMA.js → evalResult-CDQiuUuf.js} +16 -12
- package/dist/src/{evalResult-6JaUIStC.js → evalResult-CTG2AHOS.js} +10 -11
- package/dist/src/evalResult-Cqj8pldJ.js +12 -0
- package/dist/src/{evalResult-DlRfu_Rq.cjs → evalResult-Dap2CekP.cjs} +20 -21
- package/dist/src/evalResult-DvcJAWJU.cjs +10 -0
- package/dist/src/evalResult-Hftn-S_i.js +10 -0
- package/dist/src/evaluator-B2CFNt-P.js +36 -0
- package/dist/src/{evaluator-3EJCMTs0.js → evaluator-DPFRbFIL.js} +210 -232
- package/dist/src/{extractor-LSYjrhK0.js → extractor-CFG6bcWJ.js} +23 -38
- package/dist/src/{extractor-DbhlYEeo.cjs → extractor-DX36oYEv.cjs} +37 -64
- package/dist/src/{extractor-Hs7la_19.js → extractor-M67RUtg6.js} +23 -38
- package/dist/src/extractor-YMU_Gvt8.js +374 -0
- package/dist/src/fetch-4M3YRaqL.js +780 -0
- package/dist/src/{fetch-18MuNu9i.js → fetch-60Gzydls.js} +60 -46
- package/dist/src/{fetch-SRsE6Ctl.js → fetch-BMv0O527.js} +41 -35
- package/dist/src/{fetch-ZMn_oemb.cjs → fetch-BxUk8odA.cjs} +268 -279
- package/dist/src/fetch-KV5kNASw.js +5 -0
- package/dist/src/{fileExtensions-ePDqouxn.js → fileExtensions-DnqA1y9x.js} +2 -2
- package/dist/src/{fileExtensions-BpuMmaFL.js → fileExtensions-Ds-foDzt.js} +2 -2
- package/dist/src/fileExtensions-LcDYkU4v.js +85 -0
- package/dist/src/{fileExtensions-DkJYkWUy.cjs → fileExtensions-bYh77CN8.cjs} +27 -28
- package/dist/src/{formatDuration-Doo0xq-z.js → formatDuration-DgBVMN65.js} +2 -2
- package/dist/src/{genaiTracer-Ce19n68P.js → genaiTracer-70Z8BIuV.js} +2 -3
- package/dist/src/{genaiTracer-CqNnnXrE.js → genaiTracer-C1rxGO8Q.js} +2 -3
- package/dist/src/genaiTracer-D3fD9dNV.js +256 -0
- package/dist/src/{genaiTracer-CQlpZkrp.cjs → genaiTracer-DN4dQywX.cjs} +13 -14
- package/dist/src/graders-Bu0H9nXi.js +32 -0
- package/dist/src/{graders-BaMCwIKp.js → graders-CHO8EPM4.js} +385 -417
- package/dist/src/graders-Cfhkvx-e.js +34 -0
- package/dist/src/{graders-QsALpIdy.js → graders-CpdqD9PI.js} +385 -417
- package/dist/src/graders-DClJVpGP.cjs +32 -0
- package/dist/src/{graders-DzUUnUjC.cjs → graders-DOXycdlG.cjs} +721 -753
- package/dist/src/graders-DcnJsrMO.js +32 -0
- package/dist/src/graders-R9rYUM0d.js +13466 -0
- package/dist/src/{image-BiEVdpdP.js → image-BmEZqVmk.js} +57 -18
- package/dist/src/{image-mhAGP07h.js → image-CBBVXWuT.js} +57 -18
- package/dist/src/{image-D10zEe1f.cjs → image-CDLQOcqT.cjs} +6 -7
- package/dist/src/{image-COCWy5dX.js → image-DJEvKveK.js} +6 -5
- package/dist/src/{image-C3BjJUAU.cjs → image-DTedmQPg.cjs} +77 -32
- package/dist/src/{image-DB4sHxdJ.js → image-gvmivTEe.js} +7 -9
- package/dist/src/image-pAX56tPG.js +257 -0
- package/dist/src/{image-BXt_7u0v.js → image-tL5hIOFh.js} +6 -8
- package/dist/src/index.cjs +696 -693
- package/dist/src/index.d.cts +113 -10
- package/dist/src/index.d.ts +113 -6
- package/dist/src/index.js +657 -658
- package/dist/src/{interactiveCheck-DU-MAhp5.js → interactiveCheck-BgLZUIt3.js} +7 -8
- package/dist/src/{invariant-DT20jrBd.js → invariant-BtWWVVhl.js} +2 -2
- package/dist/src/{invariant-1pAf2CD1.js → invariant-Ddh24eXh.js} +2 -2
- package/dist/src/{invariant-CKcJAQ6M.cjs → invariant-kfQ8Bu82.cjs} +7 -8
- package/dist/src/invariant-vgHWClmd.js +25 -0
- package/dist/src/{knowledgeBase-DotRBzUE.js → knowledgeBase-CLJybhnF.js} +19 -34
- package/dist/src/{knowledgeBase-XJQ0Qyez.js → knowledgeBase-CoU-UQBg.js} +17 -41
- package/dist/src/{knowledgeBase-CMvMlLZR.js → knowledgeBase-DjWPVqSb.js} +17 -43
- package/dist/src/{knowledgeBase-Bnb00xKs.cjs → knowledgeBase-wkxuRFhA.cjs} +17 -40
- package/dist/src/{litellm-CHrRmPAe.js → litellm-B9Hysuri.js} +16 -18
- package/dist/src/{litellm-CrLJrPIm.js → litellm-CTfa0hqi.js} +15 -17
- package/dist/src/{litellm-BrnZhMcL.cjs → litellm-NYpQ8RQu.cjs} +15 -16
- package/dist/src/{litellm-BECdjOTx.js → litellm-ePxtr9F1.js} +15 -4
- package/dist/src/{logger-w8Ozp0Td.js → logger-CT3IKMKA.js} +24 -41
- package/dist/src/{logger-BdZ-IqBc.cjs → logger-Cp1GPUjj.cjs} +166 -192
- package/dist/src/logger-DLcq4dWf.js +713 -0
- package/dist/src/{logger-BotXmWKW.js → logger-KkObSCzq.js} +27 -43
- package/dist/src/{luma-ray-C0RkI3lt.cjs → luma-ray-B0GGNRc1.cjs} +20 -21
- package/dist/src/{luma-ray-C-w6EsJm.js → luma-ray-BE2mOt6N.js} +20 -13
- package/dist/src/{luma-ray-BOeX-h0M.js → luma-ray-BW9IRGIc.js} +22 -21
- package/dist/src/{luma-ray-DgKLS0BF.js → luma-ray-Cm1KZBhs.js} +20 -23
- package/dist/src/main.js +1985 -2055
- package/dist/src/{messages-DXV3Qh8_.cjs → messages-1JrJs91T.cjs} +35 -34
- package/dist/src/{messages-D61tPFQo.js → messages-1x9atZmP.js} +25 -24
- package/dist/src/{messages-CDZYGNlS.js → messages-BLbWdsyt.js} +25 -24
- package/dist/src/messages-D8EA0oDc.js +240 -0
- package/dist/src/{meteor-P2rUE-Uz.js → meteor-44VjEACX.js} +3 -4
- package/dist/src/{meteor-SLNTgmXm.js → meteor-D-SotUw9.js} +3 -4
- package/dist/src/{meteor-odmwVbyG.cjs → meteor-DLZZ3osF.cjs} +3 -4
- package/dist/src/{meteor-Dj8cTkU_.js → meteor-DUiCJRC-.js} +3 -4
- package/dist/src/modelslab-C1OLRmVX.js +166 -0
- package/dist/src/modelslab-CqXBy3U8.js +168 -0
- package/dist/src/modelslab-DcOSFwKh.cjs +166 -0
- package/dist/src/modelslab-X5-4LroM.js +166 -0
- package/dist/src/{nova-reel-C2LFfVTf.js → nova-reel-BgS1ZWuK.js} +20 -13
- package/dist/src/{nova-reel-DtCjbD5O.js → nova-reel-D2ZkOSyr.js} +22 -21
- package/dist/src/{nova-reel-D9FXq3Mt.cjs → nova-reel-D9xfaMBs.cjs} +20 -21
- package/dist/src/{nova-reel-Bk5npr2q.js → nova-reel-DihqLeol.js} +20 -23
- package/dist/src/{nova-sonic-BoRSY_U6.cjs → nova-sonic-DVu3mMIy.cjs} +30 -31
- package/dist/src/{nova-sonic-D_qERM-K.js → nova-sonic-DezhVUYT.js} +30 -26
- package/dist/src/{nova-sonic-CgaWLDM1.js → nova-sonic-P-CdUMlV.js} +30 -31
- package/dist/src/{nova-sonic-BXRfQyF-.js → nova-sonic-Q3BOJeig.js} +31 -32
- package/dist/src/{openai-Bigwjgo1.cjs → openai-Cuif0GEt.cjs} +8 -9
- package/dist/src/{openai-Dz3surb_.js → openai-DElQ-fPX.js} +3 -4
- package/dist/src/{openai-CT5fwbve.js → openai-DhbB7eWK.js} +3 -4
- package/dist/src/openai-j-sE2O7r.js +44 -0
- package/dist/src/{openclaw-dHLcXUWZ.js → openclaw-BiSZPL7J.js} +20 -14
- package/dist/src/{openclaw-CpPrXwf6.js → openclaw-Bv1DINsX.js} +20 -27
- package/dist/src/{openclaw-B6XY2kUf.js → openclaw-D1D_ej1z.js} +21 -28
- package/dist/src/{openclaw-DDSfq5fp.cjs → openclaw-DAfWQn-o.cjs} +33 -39
- package/dist/src/opencode-sdk-C7m-wRfI.js +560 -0
- package/dist/src/opencode-sdk-CfaLN8PY.cjs +564 -0
- package/dist/src/opencode-sdk-D95s6SnR.js +562 -0
- package/dist/src/opencode-sdk-DxUPkLT7.js +560 -0
- package/dist/src/{otlpReceiver-DmRb0NBj.js → otlpReceiver--AIRW_S4.js} +53 -51
- package/dist/src/{otlpReceiver-Dg817agV.js → otlpReceiver-Bn5wGB1v.js} +53 -55
- package/dist/src/{otlpReceiver-B6Xo4KZM.cjs → otlpReceiver-Diec4cln.cjs} +53 -55
- package/dist/src/{otlpReceiver-BO0rbDzh.js → otlpReceiver-g3ByGaXs.js} +53 -55
- package/dist/src/{providerRegistry-Xf0qdqGQ.js → providerRegistry-B0RUOLI_.js} +7 -8
- package/dist/src/{providerRegistry-wCWd7sKQ.js → providerRegistry-CD8MEar9.js} +7 -8
- package/dist/src/{providerRegistry-lc7a7utN.cjs → providerRegistry-Civky8Ar.cjs} +12 -13
- package/dist/src/providerRegistry-DM8rZYol.js +45 -0
- package/dist/src/providers-B3HvufyI.js +33246 -0
- package/dist/src/{providers-BiNq_Iyc.js → providers-BKRJTjBz.js} +1743 -1795
- package/dist/src/providers-C1rOSHiR.js +32 -0
- package/dist/src/{providers-BlEhY5mi.js → providers-CFLy1_ji.js} +1750 -1802
- package/dist/src/{providers-BNKVY53V.cjs → providers-CFu-TZl-.cjs} +2111 -2163
- package/dist/src/providers-CxmDwEFf.cjs +31 -0
- package/dist/src/providers-Dodakqr0.js +30 -0
- package/dist/src/providers-GIQ2TcsA.js +30 -0
- package/dist/src/{pythonUtils-r1uBuA0n.js → pythonUtils-C3py6GC1.js} +18 -19
- package/dist/src/{pythonUtils-DZ6EbdY4.cjs → pythonUtils-CTU3Y3lw.cjs} +42 -43
- package/dist/src/{pythonUtils-vMlk9Qp5.js → pythonUtils-D5nxkQ0P.js} +18 -19
- package/dist/src/pythonUtils-D6fwaDSg.js +249 -0
- package/dist/src/quiverai-C2jVwbH1.js +213 -0
- package/dist/src/quiverai-CI6gYJVI.js +213 -0
- package/dist/src/quiverai-CLkWkyZc.cjs +213 -0
- package/dist/src/quiverai-MHSxbmmZ.js +215 -0
- package/dist/src/{render-CAZvKKkB.js → render-Drod8m7K.js} +4 -5
- package/dist/src/{responses-DLLjADw5.js → responses-BKqJmhhc.js} +34 -27
- package/dist/src/{responses-TsdODUpm.js → responses-CGw0DCzh.js} +34 -27
- package/dist/src/responses-jxdehPkC.js +660 -0
- package/dist/src/{responses-zOtKtnY_.cjs → responses-tD4Bd4dc.cjs} +49 -42
- package/dist/src/rubyUtils-BUHu6PhO.js +5 -0
- package/dist/src/{rubyUtils-Cs35SDYa.js → rubyUtils-BUVePouc.js} +27 -20
- package/dist/src/rubyUtils-BcuGX77l.js +222 -0
- package/dist/src/{rubyUtils-BtjlqyXt.js → rubyUtils-Boc4HZzX.js} +18 -19
- package/dist/src/rubyUtils-CP42kMvq.cjs +4 -0
- package/dist/src/{rubyUtils-DCVaJ3mc.cjs → rubyUtils-DhCAlxZr.cjs} +48 -50
- package/dist/src/{sagemaker-Du4LIR97.js → sagemaker-BK4Zb993.js} +75 -70
- package/dist/src/{sagemaker-CLdUAv5z.js → sagemaker-BfiWTmvn.js} +77 -77
- package/dist/src/{sagemaker-DwNnEVYt.cjs → sagemaker-CcQHM1jV.cjs} +75 -76
- package/dist/src/{sagemaker-BcgLu0U4.js → sagemaker-D2Q1c-sD.js} +75 -79
- package/dist/src/{scanner-Dyw21Wg_.js → scanner-J8CA3LsV.js} +149 -122
- package/dist/src/server/index.js +5620 -67302
- package/dist/src/{server-CgUQ25qW.cjs → server-B0PPuDw-.cjs} +57 -67
- package/dist/src/server-B1vi21hA.js +7 -0
- package/dist/src/{server-CbMTRQkg.js → server-BC7XJFgr.js} +19 -24
- package/dist/src/server-Cm9Kai_h.cjs +5 -0
- package/dist/src/{server-DWmZLfCy.js → server-DbFphssR.js} +26 -29
- package/dist/src/server-OAs3nBRT.js +229 -0
- package/dist/src/{signal-Bl32q42d.js → signal-BOTbd53Z.js} +9 -11
- package/dist/src/{slack-BtMkB6xP.cjs → slack-BmVAVGaK.cjs} +7 -8
- package/dist/src/{slack-OZYxoVON.js → slack-DCUPTzS2.js} +8 -8
- package/dist/src/{slack-DPqj42Ts.js → slack-DOdy_kyv.js} +7 -8
- package/dist/src/{slack-BfdBx2tO.js → slack-DXMKtA-f.js} +7 -9
- package/dist/src/store-BNmZ1KAz.cjs +5 -0
- package/dist/src/{store-BqwfFEyF.cjs → store-BSc-TF2w.cjs} +44 -45
- package/dist/src/store-BltJg2cd.js +6 -0
- package/dist/src/{store-D4gdn9ih.js → store-D1tv90v3.js} +34 -35
- package/dist/src/{store-2ocbYY9D.js → store-DQLEjuEO.js} +40 -36
- package/dist/src/store-Ub2vaGJ1.js +228 -0
- package/dist/src/{tables-D-NSwNIb.js → tables-5EvT_Bwn.js} +23 -23
- package/dist/src/{tables-B9E1kRp-.cjs → tables-C7K-XKWp.cjs} +93 -93
- package/dist/src/{tables-C7TT2XVn.js → tables-D36WTqKX.js} +25 -25
- package/dist/src/tables-xKANLRBD.js +288 -0
- package/dist/src/telemetry-5BCRNBbe.cjs +5 -0
- package/dist/src/{telemetry-DZ_7PaVq.js → telemetry-C15ziL8u.js} +17 -14
- package/dist/src/{telemetry-BXyVqyAg.js → telemetry-C2YDkUQH.js} +11 -13
- package/dist/src/{telemetry-D0_yFdtU.cjs → telemetry-CbrnxHp_.cjs} +21 -24
- package/dist/src/telemetry-D4W5hboe.js +7 -0
- package/dist/src/telemetry-DMb2Mpfm.js +171 -0
- package/dist/src/{text-Dm78AVGG.js → text-B_UCRPp2.js} +2 -2
- package/dist/src/{text-DF2hMKdg.cjs → text-CW1cyrwj.cjs} +12 -13
- package/dist/src/{text-DgMr_tiM.js → text-Db-Wt2u2.js} +2 -2
- package/dist/src/text-TIv0QYnd.js +22 -0
- package/dist/src/{tokenUsageUtils-FZd5O_4A.js → tokenUsageUtils-BDGe-iyI.js} +2 -2
- package/dist/src/{tokenUsageUtils-DmZSD2eU.js → tokenUsageUtils-DflFMjS0.js} +2 -2
- package/dist/src/tokenUsageUtils-NYT-WKS6.js +138 -0
- package/dist/src/{tokenUsageUtils-CXhxVj72.cjs → tokenUsageUtils-bVa1ga6f.cjs} +32 -33
- package/dist/src/{transcription-FNIz3YOe.cjs → transcription-CL78qbOU.cjs} +14 -15
- package/dist/src/{transcription-C-M81iDA.js → transcription-DAtxHhAM.js} +14 -7
- package/dist/src/{transcription-CYuY5sFO.js → transcription-LNZTNUUL.js} +14 -16
- package/dist/src/{transcription-Ch7S-LWw.js → transcription-QHh3AH6Z.js} +15 -17
- package/dist/src/{transform-CoP2bJ7P.js → transform-Cgi24fJ7.js} +94 -66
- package/dist/src/{transform-Kd6u-oNm.cjs → transform-CzK1Q0zl.cjs} +24 -26
- package/dist/src/{transform-D8dILpfZ.js → transform-DECvGmzp.js} +15 -13
- package/dist/src/{transform-DMaxQwDx.js → transform-DGLazrMm.js} +94 -66
- package/dist/src/transform-DGxXocjk.js +1506 -0
- package/dist/src/{transform-ivxEY4f7.cjs → transform-DOcQeLld.cjs} +234 -206
- package/dist/src/transform-DTGDnAzW.js +6 -0
- package/dist/src/{transform-CqTFr7KR.js → transform-DilY9wbS.js} +10 -12
- package/dist/src/transform-aa6tmVpZ.js +216 -0
- package/dist/src/transform-m3qNw4KP.cjs +5 -0
- package/dist/src/{transformersAvailability-DEU2naS1.js → transformersAvailability-CEVM2GNQ.js} +2 -2
- package/dist/src/{transformersAvailability-Bkep3ka7.cjs → transformersAvailability-CwayUSlh.cjs} +2 -3
- package/dist/src/{transformersAvailability-DwmezkVe.js → transformersAvailability-D6c6ROpT.js} +2 -2
- package/dist/src/{types-t52w-XsS.js → types-CH3Ge2sE.js} +103 -92
- package/dist/src/{types-DMVjYLpx.js → types-CLKiCBW3.js} +98 -91
- package/dist/src/types-CN_TZ2GJ.js +3260 -0
- package/dist/src/{types-BIfttHrT.cjs → types-LJ0r3wbR.cjs} +573 -566
- package/dist/src/util-5cB-L7U3.js +1430 -0
- package/dist/src/util-6-GqIvzS.js +599 -0
- package/dist/src/{util-vjscpUzy.js → util-B7T3SiBS.js} +5 -6
- package/dist/src/{util-Cl0zfT3V.js → util-Betm42rL.js} +44 -17
- package/dist/src/{util-CUEt0Vum.js → util-C-PPYSMq.js} +44 -17
- package/dist/src/{util-DkFTvieG.cjs → util-CchiqXh_.cjs} +35 -36
- package/dist/src/{util-mJ58qbbw.js → util-DaWTWKBK.js} +5 -6
- package/dist/src/{util-C08Kns6-.cjs → util-Db0a0AFH.cjs} +89 -62
- package/dist/src/{util-DiCePfDu.js → util-Dlz_Wvgm.js} +102 -53
- package/dist/src/{util-BSh4a_Q8.js → util-YT5HPZaS.js} +102 -53
- package/dist/src/{util-DUYOvxAy.cjs → util-Yz-1aEhW.cjs} +274 -219
- package/dist/src/util-ZZH-3QZz.js +293 -0
- package/dist/src/{utils-DFaZa6Rf.cjs → utils-Cz9qXqII.cjs} +32 -35
- package/dist/src/{utils-CVzb4YiI.js → utils-XiOAgly5.js} +4 -7
- package/dist/src/utils-dLokC-eR.js +94 -0
- package/dist/src/{utils-JaY9veb5.js → utils-f2-Moju7.js} +4 -7
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +59 -53
- package/dist/src/app/assets/index-BOgkICuY.css +0 -1
- package/dist/src/app/assets/index-CSgqn_Vd.js +0 -428
- package/dist/src/app/tsconfig.app.tsbuildinfo +0 -1
- package/dist/src/base-BaXmtXYp.js +0 -107
- package/dist/src/base-Dtp8b4_N.js +0 -106
- package/dist/src/base-f71xxWai.cjs +0 -111
- package/dist/src/cache-BUPcq0Ad.js +0 -6
- package/dist/src/cache-CVfRb-HD.cjs +0 -6
- package/dist/src/cache-O4EuX2JV.js +0 -8
- package/dist/src/chunk-DHDDz29n.js +0 -22
- package/dist/src/chunk-FhC4c-0y.js +0 -21
- package/dist/src/cloud-CZ4hytdm.js +0 -5
- package/dist/src/eval-CKHWqG9f.js +0 -16
- package/dist/src/evalResult-CxTP-LMm.cjs +0 -11
- package/dist/src/evalResult-CzLURDcP.js +0 -13
- package/dist/src/evalResult-DyttNQ_G.js +0 -11
- package/dist/src/evaluator-0PvfeBYh.js +0 -38
- package/dist/src/fetch-Bi0o-fdp.js +0 -4
- package/dist/src/fetch-CMptBDVg.cjs +0 -4
- package/dist/src/fetch-DAZkv3gV.js +0 -6
- package/dist/src/graders-BCytzXrb.js +0 -34
- package/dist/src/graders-CGZQShfJ.cjs +0 -33
- package/dist/src/graders-spkuVC-E.js +0 -36
- package/dist/src/opencode-sdk-CImWVqy9.js +0 -382
- package/dist/src/opencode-sdk-CuCztr4P.js +0 -380
- package/dist/src/opencode-sdk-DhcfRbBH.js +0 -376
- package/dist/src/opencode-sdk-mqF-Oj3f.cjs +0 -383
- package/dist/src/providers-BMZZmPBJ.cjs +0 -32
- package/dist/src/providers-CQQrNaJk.js +0 -32
- package/dist/src/providers-Ck8HyrC-.js +0 -34
- package/dist/src/quiverai-BNfIwKCO.cjs +0 -54
- package/dist/src/quiverai-BQigKdIH.js +0 -57
- package/dist/src/quiverai-Bfy2WnE2.js +0 -55
- package/dist/src/quiverai-CedIP0PJ.js +0 -43
- package/dist/src/rubyUtils-D7--T12C.js +0 -6
- package/dist/src/rubyUtils-DRRiMFV2.js +0 -5
- package/dist/src/rubyUtils-vb8OYFC-.cjs +0 -5
- package/dist/src/server-BUbS0Qfh.js +0 -6
- package/dist/src/server-XpGXFHkS.cjs +0 -6
- package/dist/src/server-gfOx5Zrk.js +0 -8
- package/dist/src/store-5u2yriTV.js +0 -7
- package/dist/src/store-D_lq_8oQ.js +0 -6
- package/dist/src/store-m5KT6Ly7.cjs +0 -6
- package/dist/src/telemetry-5RHFoCJh.js +0 -6
- package/dist/src/telemetry-Do8wMnA-.js +0 -8
- package/dist/src/telemetry-LojxPoFq.cjs +0 -6
- package/dist/src/transform-8eGmaH-7.js +0 -7
- package/dist/src/transform-BRVvWaG4.cjs +0 -6
- package/dist/src/transform-GybT0X0u.js +0 -8
- package/dist/src/transformersAvailability-DkAWaK5B.js +0 -35
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { b as getEnvBool, o as
|
|
3
|
-
import {
|
|
4
|
-
import { t as invariant } from "./invariant-
|
|
5
|
-
import { o as getUserEmail } from "./accounts-
|
|
6
|
-
import {
|
|
7
|
-
import { n as isNonTransientHttpStatus, t as NON_TRANSIENT_HTTP_STATUSES } from "./errors-
|
|
8
|
-
import { t as hashPrompt } from "./utils-
|
|
9
|
-
import {
|
|
10
|
-
import { a as createEmptyTokenUsage, r as accumulateTokenUsage } from "./tokenUsageUtils-
|
|
11
|
-
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, i as datasetsTable, l as evalsToTagsTable, o as evalsTable, p as tagsTable, s as evalsToDatasetsTable } from "./tables-
|
|
12
|
-
import { n as getTraceStore } from "./store-
|
|
13
|
-
import { i as getActualPrompt, r as updateSignalFile } from "./signal-
|
|
14
|
-
import { t as EvalResult } from "./evalResult-
|
|
2
|
+
import { b as getEnvBool, o as logger } from "./logger-KkObSCzq.js";
|
|
3
|
+
import { F as HUMAN_ASSERTION_TYPE, c as getCurrentTimestamp } from "./fetch-BMv0O527.js";
|
|
4
|
+
import { t as invariant } from "./invariant-BtWWVVhl.js";
|
|
5
|
+
import { o as getUserEmail } from "./accounts-CMqkzrVf.js";
|
|
6
|
+
import { H as riskCategorySeverityMap, s as ResultFailureReason, st as PLUGIN_CATEGORIES } from "./types-CH3Ge2sE.js";
|
|
7
|
+
import { n as isNonTransientHttpStatus, t as NON_TRANSIENT_HTTP_STATUSES } from "./errors-P6ll7XSJ.js";
|
|
8
|
+
import { t as hashPrompt } from "./utils-f2-Moju7.js";
|
|
9
|
+
import { n as sha256, t as randomSequence } from "./createHash-Da8fMwqB.js";
|
|
10
|
+
import { a as createEmptyTokenUsage, r as accumulateTokenUsage } from "./tokenUsageUtils-DflFMjS0.js";
|
|
11
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, i as datasetsTable, l as evalsToTagsTable, o as evalsTable, p as tagsTable, s as evalsToDatasetsTable } from "./tables-D36WTqKX.js";
|
|
12
|
+
import { n as getTraceStore } from "./store-D1tv90v3.js";
|
|
13
|
+
import { i as getActualPrompt, r as updateSignalFile } from "./signal-BOTbd53Z.js";
|
|
14
|
+
import { t as EvalResult } from "./evalResult-CTG2AHOS.js";
|
|
15
15
|
import { and, desc, eq, sql } from "drizzle-orm";
|
|
16
|
-
|
|
17
16
|
//#region src/redteam/metrics.ts
|
|
18
17
|
/**
|
|
19
18
|
* The Attack Success Rate (ASR) is the number of tests which failed grading divided by the
|
|
@@ -25,7 +24,6 @@ import { and, desc, eq, sql } from "drizzle-orm";
|
|
|
25
24
|
function calculateAttackSuccessRate(testCount, failCount) {
|
|
26
25
|
return testCount > 0 ? failCount / testCount * 100 : 0;
|
|
27
26
|
}
|
|
28
|
-
|
|
29
27
|
//#endregion
|
|
30
28
|
//#region src/redteam/sharedFrontend.ts
|
|
31
29
|
function getRiskCategorySeverityMap(plugins) {
|
|
@@ -42,7 +40,6 @@ function getRiskCategorySeverityMap(plugins) {
|
|
|
42
40
|
...overrides
|
|
43
41
|
};
|
|
44
42
|
}
|
|
45
|
-
|
|
46
43
|
//#endregion
|
|
47
44
|
//#region src/util/calculateFilteredMetrics.ts
|
|
48
45
|
/**
|
|
@@ -96,12 +93,12 @@ async function calculateFilteredMetrics(opts) {
|
|
|
96
93
|
try {
|
|
97
94
|
const countResult = await getResultCount(whereSql);
|
|
98
95
|
if (countResult > MAX_RESULTS_FOR_METRICS) {
|
|
99
|
-
|
|
96
|
+
logger.warn(`Filtered result count ${countResult} exceeds limit ${MAX_RESULTS_FOR_METRICS}`, { evalId: opts.evalId });
|
|
100
97
|
throw new Error(`Result count ${countResult} exceeds maximum ${MAX_RESULTS_FOR_METRICS}`);
|
|
101
98
|
}
|
|
102
99
|
return await calculateWithOptimizedQuery(opts);
|
|
103
100
|
} catch (error) {
|
|
104
|
-
|
|
101
|
+
logger.error("Failed to calculate filtered metrics with optimized query", { error });
|
|
105
102
|
return createEmptyMetricsArray(numPrompts);
|
|
106
103
|
}
|
|
107
104
|
}
|
|
@@ -154,7 +151,7 @@ async function calculateWithOptimizedQuery(opts) {
|
|
|
154
151
|
for (const row of basicResults) {
|
|
155
152
|
const idx = row.prompt_idx;
|
|
156
153
|
if (idx < 0 || idx >= numPrompts) {
|
|
157
|
-
|
|
154
|
+
logger.warn(`Invalid prompt_idx ${idx}, expected 0-${numPrompts - 1}`);
|
|
158
155
|
continue;
|
|
159
156
|
}
|
|
160
157
|
metrics[idx] = {
|
|
@@ -179,7 +176,7 @@ async function calculateWithOptimizedQuery(opts) {
|
|
|
179
176
|
}
|
|
180
177
|
await aggregateNamedScores(metrics, whereSql);
|
|
181
178
|
await aggregateAssertions(metrics, whereSql);
|
|
182
|
-
|
|
179
|
+
logger.debug("Filtered metrics calculated", {
|
|
183
180
|
numPrompts,
|
|
184
181
|
metricsCount: basicResults.length
|
|
185
182
|
});
|
|
@@ -300,7 +297,6 @@ function createEmptyMetricsArray(numPrompts) {
|
|
|
300
297
|
cost: 0
|
|
301
298
|
}));
|
|
302
299
|
}
|
|
303
|
-
|
|
304
300
|
//#endregion
|
|
305
301
|
//#region src/util/convertEvalResultsToTable.ts
|
|
306
302
|
/**
|
|
@@ -433,7 +429,6 @@ function convertResultsToTable(eval_) {
|
|
|
433
429
|
body: rows
|
|
434
430
|
};
|
|
435
431
|
}
|
|
436
|
-
|
|
437
432
|
//#endregion
|
|
438
433
|
//#region src/util/exportToFile/index.ts
|
|
439
434
|
function convertEvalResultToTableCell(result) {
|
|
@@ -511,7 +506,6 @@ function convertTestResultsToTableRow(results, varsForHeader) {
|
|
|
511
506
|
for (const result of results) row.outputs[result.promptIdx] = convertEvalResultToTableCell(result);
|
|
512
507
|
return row;
|
|
513
508
|
}
|
|
514
|
-
|
|
515
509
|
//#endregion
|
|
516
510
|
//#region src/models/evalPerformance.ts
|
|
517
511
|
const distinctCountCache = /* @__PURE__ */ new Map();
|
|
@@ -528,7 +522,7 @@ async function getCachedResultsCount(evalId) {
|
|
|
528
522
|
const cacheKey = `distinct:${evalId}`;
|
|
529
523
|
const cached = distinctCountCache.get(cacheKey);
|
|
530
524
|
if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
|
|
531
|
-
|
|
525
|
+
logger.debug(`Using cached distinct count for eval ${evalId}: ${cached.count}`);
|
|
532
526
|
return cached.count;
|
|
533
527
|
}
|
|
534
528
|
const db = getDb();
|
|
@@ -536,7 +530,7 @@ async function getCachedResultsCount(evalId) {
|
|
|
536
530
|
const result = db.select({ count: sql`COUNT(DISTINCT test_idx)` }).from(evalResultsTable).where(sql`eval_id = ${evalId}`).all();
|
|
537
531
|
const count = Number(result[0]?.count ?? 0);
|
|
538
532
|
const duration = Date.now() - start;
|
|
539
|
-
|
|
533
|
+
logger.debug(`Distinct count query for eval ${evalId}: ${count} in ${duration}ms`);
|
|
540
534
|
distinctCountCache.set(cacheKey, {
|
|
541
535
|
count,
|
|
542
536
|
timestamp: Date.now()
|
|
@@ -554,7 +548,7 @@ async function getTotalResultRowCount(evalId) {
|
|
|
554
548
|
const cacheKey = `total:${evalId}`;
|
|
555
549
|
const cached = totalRowCountCache.get(cacheKey);
|
|
556
550
|
if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
|
|
557
|
-
|
|
551
|
+
logger.debug(`Using cached total row count for eval ${evalId}: ${cached.count}`);
|
|
558
552
|
return cached.count;
|
|
559
553
|
}
|
|
560
554
|
const db = getDb();
|
|
@@ -562,7 +556,7 @@ async function getTotalResultRowCount(evalId) {
|
|
|
562
556
|
const result = db.select({ count: sql`COUNT(*)` }).from(evalResultsTable).where(sql`eval_id = ${evalId}`).all();
|
|
563
557
|
const count = Number(result[0]?.count ?? 0);
|
|
564
558
|
const duration = Date.now() - start;
|
|
565
|
-
|
|
559
|
+
logger.debug(`Total row count query for eval ${evalId}: ${count} in ${duration}ms`);
|
|
566
560
|
totalRowCountCache.set(cacheKey, {
|
|
567
561
|
count,
|
|
568
562
|
timestamp: Date.now()
|
|
@@ -595,7 +589,7 @@ async function queryTestIndicesOptimized(evalId, opts) {
|
|
|
595
589
|
`;
|
|
596
590
|
const countResult = db.all(countQuery);
|
|
597
591
|
const filteredCount = Number(countResult[0]?.count ?? 0);
|
|
598
|
-
|
|
592
|
+
logger.debug(`Optimized count query took ${Date.now() - countStart}ms`);
|
|
599
593
|
const idxStart = Date.now();
|
|
600
594
|
const idxQuery = sql`
|
|
601
595
|
SELECT DISTINCT test_idx
|
|
@@ -606,13 +600,12 @@ async function queryTestIndicesOptimized(evalId, opts) {
|
|
|
606
600
|
OFFSET ${offset}
|
|
607
601
|
`;
|
|
608
602
|
const testIndices = db.all(idxQuery).map((row) => row.test_idx);
|
|
609
|
-
|
|
603
|
+
logger.debug(`Optimized index query took ${Date.now() - idxStart}ms`);
|
|
610
604
|
return {
|
|
611
605
|
testIndices,
|
|
612
606
|
filteredCount
|
|
613
607
|
};
|
|
614
608
|
}
|
|
615
|
-
|
|
616
609
|
//#endregion
|
|
617
610
|
//#region src/models/eval.ts
|
|
618
611
|
/**
|
|
@@ -707,7 +700,7 @@ var EvalQueries = class {
|
|
|
707
700
|
try {
|
|
708
701
|
db.update(evalsTable).set({ vars }).where(eq(evalsTable.id, evalId)).run();
|
|
709
702
|
} catch (e) {
|
|
710
|
-
|
|
703
|
+
logger.error(`Error setting vars: ${vars} for eval ${evalId}: ${e}`);
|
|
711
704
|
}
|
|
712
705
|
}
|
|
713
706
|
static async getMetadataKeysFromEval(evalId, comparisonEvalIds = []) {
|
|
@@ -728,7 +721,7 @@ var EvalQueries = class {
|
|
|
728
721
|
`;
|
|
729
722
|
return (await db.all(query)).map((r) => r.key);
|
|
730
723
|
} catch (error) {
|
|
731
|
-
|
|
724
|
+
logger.error(`Error fetching metadata keys for eval ${evalId} and comparisons [${comparisonEvalIds.join(", ")}]: ${error}`);
|
|
732
725
|
return [];
|
|
733
726
|
}
|
|
734
727
|
}
|
|
@@ -759,7 +752,7 @@ var EvalQueries = class {
|
|
|
759
752
|
const values = db.all(query).map(({ value }) => String(value).trim()).filter((value) => value.length > 0);
|
|
760
753
|
return Array.from(new Set(values));
|
|
761
754
|
} catch (error) {
|
|
762
|
-
|
|
755
|
+
logger.error(`Error fetching metadata values for eval ${evalId} and key ${trimmedKey}: ${error instanceof Error ? error.message : String(error)}`);
|
|
763
756
|
return [];
|
|
764
757
|
}
|
|
765
758
|
}
|
|
@@ -831,7 +824,7 @@ var Eval = class Eval {
|
|
|
831
824
|
}
|
|
832
825
|
return evalInstance;
|
|
833
826
|
}
|
|
834
|
-
static async getMany(limit =
|
|
827
|
+
static async getMany(limit = 100) {
|
|
835
828
|
return (await getDb().select().from(evalsTable).limit(limit).orderBy(desc(evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
|
|
836
829
|
id: e.id,
|
|
837
830
|
createdAt: new Date(e.createdAt),
|
|
@@ -846,7 +839,7 @@ var Eval = class Eval {
|
|
|
846
839
|
* @param offset - Number of evals to skip
|
|
847
840
|
* @param limit - Maximum number of evals to return
|
|
848
841
|
*/
|
|
849
|
-
static async getPaginated(offset = 0, limit =
|
|
842
|
+
static async getPaginated(offset = 0, limit = 100) {
|
|
850
843
|
return (await getDb().select().from(evalsTable).orderBy(desc(evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
|
|
851
844
|
id: e.id,
|
|
852
845
|
createdAt: new Date(e.createdAt),
|
|
@@ -878,7 +871,8 @@ var Eval = class Eval {
|
|
|
878
871
|
results: {},
|
|
879
872
|
vars: opts?.vars || [],
|
|
880
873
|
runtimeOptions: sanitizeRuntimeOptions(opts?.runtimeOptions),
|
|
881
|
-
prompts: opts?.completedPrompts || []
|
|
874
|
+
prompts: opts?.completedPrompts || [],
|
|
875
|
+
isRedteam: Boolean(config.redteam)
|
|
882
876
|
}).run();
|
|
883
877
|
for (const prompt of renderedPrompts) {
|
|
884
878
|
const label = prompt.label || prompt.display || prompt.raw;
|
|
@@ -891,7 +885,7 @@ var Eval = class Eval {
|
|
|
891
885
|
evalId,
|
|
892
886
|
promptId
|
|
893
887
|
}).onConflictDoNothing().run();
|
|
894
|
-
|
|
888
|
+
logger.debug(`Inserting prompt ${promptId}`);
|
|
895
889
|
}
|
|
896
890
|
if (opts?.results && opts.results.length > 0) {
|
|
897
891
|
const res = db.insert(evalResultsTable).values(opts.results?.map((r) => ({
|
|
@@ -899,7 +893,7 @@ var Eval = class Eval {
|
|
|
899
893
|
evalId,
|
|
900
894
|
id: crypto.randomUUID()
|
|
901
895
|
}))).run();
|
|
902
|
-
|
|
896
|
+
logger.debug(`Inserted ${res.changes} eval results`);
|
|
903
897
|
}
|
|
904
898
|
db.insert(datasetsTable).values({
|
|
905
899
|
id: datasetId,
|
|
@@ -909,7 +903,7 @@ var Eval = class Eval {
|
|
|
909
903
|
evalId,
|
|
910
904
|
datasetId
|
|
911
905
|
}).onConflictDoNothing().run();
|
|
912
|
-
|
|
906
|
+
logger.debug(`Inserting dataset ${datasetId}`);
|
|
913
907
|
if (config.tags) for (const [tagKey, tagValue] of Object.entries(config.tags)) {
|
|
914
908
|
const tagId = sha256(`${tagKey}:${tagValue}`);
|
|
915
909
|
db.insert(tagsTable).values({
|
|
@@ -921,7 +915,7 @@ var Eval = class Eval {
|
|
|
921
915
|
evalId,
|
|
922
916
|
tagId
|
|
923
917
|
}).onConflictDoNothing().run();
|
|
924
|
-
|
|
918
|
+
logger.debug(`Inserting tag ${tagId}`);
|
|
925
919
|
}
|
|
926
920
|
});
|
|
927
921
|
return new Eval(config, {
|
|
@@ -1102,7 +1096,7 @@ var Eval = class Eval {
|
|
|
1102
1096
|
if (type === "metric") {
|
|
1103
1097
|
const metricKey = field || value;
|
|
1104
1098
|
if (!metricKey) {
|
|
1105
|
-
|
|
1099
|
+
logger.warn("Invalid metric filter: missing field and value", { filter });
|
|
1106
1100
|
return;
|
|
1107
1101
|
}
|
|
1108
1102
|
const jsonPath = buildSafeJsonPath(metricKey);
|
|
@@ -1116,7 +1110,7 @@ var Eval = class Eval {
|
|
|
1116
1110
|
else if (operator === "lt") condition = sql`CAST(json_extract(named_scores, ${jsonPath}) AS REAL) < ${numericValue}`;
|
|
1117
1111
|
else if (operator === "lte") condition = sql`CAST(json_extract(named_scores, ${jsonPath}) AS REAL) <= ${numericValue}`;
|
|
1118
1112
|
} else {
|
|
1119
|
-
|
|
1113
|
+
logger.warn("Invalid numeric value in metric filter", {
|
|
1120
1114
|
metricKey,
|
|
1121
1115
|
value,
|
|
1122
1116
|
numericValue,
|
|
@@ -1194,7 +1188,7 @@ var Eval = class Eval {
|
|
|
1194
1188
|
const countStart = Date.now();
|
|
1195
1189
|
const countResult = await db.get(filteredCountQuery);
|
|
1196
1190
|
const countEnd = Date.now();
|
|
1197
|
-
|
|
1191
|
+
logger.debug(`Count query took ${countEnd - countStart}ms`);
|
|
1198
1192
|
const filteredCount = countResult?.count || 0;
|
|
1199
1193
|
const idxQuery = sql`
|
|
1200
1194
|
SELECT DISTINCT test_idx
|
|
@@ -1207,7 +1201,7 @@ var Eval = class Eval {
|
|
|
1207
1201
|
const idxStart = Date.now();
|
|
1208
1202
|
const rows = await db.all(idxQuery);
|
|
1209
1203
|
const idxEnd = Date.now();
|
|
1210
|
-
|
|
1204
|
+
logger.debug(`Index query took ${idxEnd - idxStart}ms`);
|
|
1211
1205
|
return {
|
|
1212
1206
|
testIndices: rows.map((row) => row.test_idx),
|
|
1213
1207
|
filteredCount
|
|
@@ -1243,7 +1237,7 @@ var Eval = class Eval {
|
|
|
1243
1237
|
const hasComplexFilters = opts.filters && opts.filters.length > 0;
|
|
1244
1238
|
let queryResult;
|
|
1245
1239
|
if (hasComplexFilters) {
|
|
1246
|
-
|
|
1240
|
+
logger.debug("Using original query for complex filters");
|
|
1247
1241
|
queryResult = await this.queryTestIndices({
|
|
1248
1242
|
offset: opts.offset,
|
|
1249
1243
|
limit: opts.limit,
|
|
@@ -1252,7 +1246,7 @@ var Eval = class Eval {
|
|
|
1252
1246
|
filters: opts.filters
|
|
1253
1247
|
});
|
|
1254
1248
|
} else {
|
|
1255
|
-
|
|
1249
|
+
logger.debug("Using optimized query for table page");
|
|
1256
1250
|
queryResult = await queryTestIndicesOptimized(this.id, {
|
|
1257
1251
|
offset: opts.offset,
|
|
1258
1252
|
limit: opts.limit,
|
|
@@ -1267,12 +1261,12 @@ var Eval = class Eval {
|
|
|
1267
1261
|
const varsStart = Date.now();
|
|
1268
1262
|
const vars = Array.from(this.vars);
|
|
1269
1263
|
const varsEnd = Date.now();
|
|
1270
|
-
|
|
1264
|
+
logger.debug(`Vars query took ${varsEnd - varsStart}ms`);
|
|
1271
1265
|
const body = [];
|
|
1272
1266
|
const bodyStart = Date.now();
|
|
1273
1267
|
if (testIndices.length === 0) {
|
|
1274
1268
|
const bodyEnd = Date.now();
|
|
1275
|
-
|
|
1269
|
+
logger.debug(`Body query took ${bodyEnd - bodyStart}ms`);
|
|
1276
1270
|
return {
|
|
1277
1271
|
head: {
|
|
1278
1272
|
prompts: this.prompts,
|
|
@@ -1304,7 +1298,7 @@ var Eval = class Eval {
|
|
|
1304
1298
|
if (results.length > 0) body.push(convertTestResultsToTableRow(results, vars));
|
|
1305
1299
|
}
|
|
1306
1300
|
const bodyEnd = Date.now();
|
|
1307
|
-
|
|
1301
|
+
logger.debug(`Body query took ${bodyEnd - bodyStart}ms`);
|
|
1308
1302
|
return {
|
|
1309
1303
|
head: {
|
|
1310
1304
|
prompts: this.prompts,
|
|
@@ -1417,7 +1411,7 @@ var Eval = class Eval {
|
|
|
1417
1411
|
})
|
|
1418
1412
|
}));
|
|
1419
1413
|
} catch (error) {
|
|
1420
|
-
|
|
1414
|
+
logger.debug(`Failed to fetch traces for eval ${this.id}: ${error}`);
|
|
1421
1415
|
return [];
|
|
1422
1416
|
}
|
|
1423
1417
|
}
|
|
@@ -1454,7 +1448,7 @@ var Eval = class Eval {
|
|
|
1454
1448
|
const newEvalId = createEvalId(/* @__PURE__ */ new Date());
|
|
1455
1449
|
const copyDescription = description || `${this.description || "Evaluation"} (Copy)`;
|
|
1456
1450
|
const testCount = distinctTestCount ?? await this.getResultsCount();
|
|
1457
|
-
|
|
1451
|
+
logger.info("Starting eval copy", {
|
|
1458
1452
|
sourceEvalId: this.id,
|
|
1459
1453
|
targetEvalId: newEvalId,
|
|
1460
1454
|
distinctTestCount: testCount
|
|
@@ -1517,7 +1511,7 @@ var Eval = class Eval {
|
|
|
1517
1511
|
db.insert(evalResultsTable).values(copiedResults).run();
|
|
1518
1512
|
copiedCount += batch.length;
|
|
1519
1513
|
offset += BATCH_SIZE;
|
|
1520
|
-
|
|
1514
|
+
logger.debug("Copied batch of eval results", {
|
|
1521
1515
|
sourceEvalId: this.id,
|
|
1522
1516
|
targetEvalId: newEvalId,
|
|
1523
1517
|
batchSize: batch.length,
|
|
@@ -1526,7 +1520,7 @@ var Eval = class Eval {
|
|
|
1526
1520
|
});
|
|
1527
1521
|
}
|
|
1528
1522
|
});
|
|
1529
|
-
|
|
1523
|
+
logger.info("Eval copy completed successfully", {
|
|
1530
1524
|
sourceEvalId: this.id,
|
|
1531
1525
|
targetEvalId: newEvalId,
|
|
1532
1526
|
rowsCopied: copiedCount,
|
|
@@ -1623,7 +1617,7 @@ async function getEvalSummaries(datasetId, type, includeProviders = false) {
|
|
|
1623
1617
|
};
|
|
1624
1618
|
});
|
|
1625
1619
|
}
|
|
1626
|
-
|
|
1627
1620
|
//#endregion
|
|
1628
1621
|
export { createEvalId as a, combineFilterConditions as i, EvalQueries as n, escapeJsonPathKey as o, buildSafeJsonPath as r, getEvalSummaries as s, Eval as t };
|
|
1629
|
-
|
|
1622
|
+
|
|
1623
|
+
//# sourceMappingURL=eval-DmFyWU7i.js.map
|
|
@@ -1,12 +1,16 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { i as evalResultsTable, p as getDb } from "./tables-
|
|
1
|
+
import { t as __exportAll } from "./chunk-DEq-mXcV.js";
|
|
2
|
+
import { _ as getEnvBool, i as logger, m as safeJsonStringify } from "./logger-CT3IKMKA.js";
|
|
3
|
+
import { A as isApiProvider, g as ResultFailureReason, j as isProviderOptions, k as isResultFailureReason } from "./types-CLKiCBW3.js";
|
|
4
|
+
import { u as getCurrentTimestamp } from "./fetch-60Gzydls.js";
|
|
5
|
+
import { t as hashPrompt } from "./utils-XiOAgly5.js";
|
|
6
|
+
import { i as evalResultsTable, p as getDb } from "./tables-5EvT_Bwn.js";
|
|
7
|
+
import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-M67RUtg6.js";
|
|
7
8
|
import { and, eq, gte, inArray, lt, ne } from "drizzle-orm";
|
|
8
|
-
|
|
9
9
|
//#region src/models/evalResult.ts
|
|
10
|
+
var evalResult_exports = /* @__PURE__ */ __exportAll({
|
|
11
|
+
default: () => EvalResult,
|
|
12
|
+
sanitizeProvider: () => sanitizeProvider
|
|
13
|
+
});
|
|
10
14
|
function sanitizeProvider(provider) {
|
|
11
15
|
try {
|
|
12
16
|
if (isApiProvider(provider)) return {
|
|
@@ -44,7 +48,7 @@ function sanitizeForDb(obj) {
|
|
|
44
48
|
try {
|
|
45
49
|
const serialized = safeJsonStringify(obj);
|
|
46
50
|
if (serialized === void 0) {
|
|
47
|
-
|
|
51
|
+
logger.debug("sanitizeForDb: Failed to serialize object, using fallback", {
|
|
48
52
|
valueType: typeof obj,
|
|
49
53
|
isArray: Array.isArray(obj)
|
|
50
54
|
});
|
|
@@ -52,7 +56,7 @@ function sanitizeForDb(obj) {
|
|
|
52
56
|
}
|
|
53
57
|
return JSON.parse(serialized);
|
|
54
58
|
} catch (error) {
|
|
55
|
-
|
|
59
|
+
logger.debug("sanitizeForDb: Parse error, using fallback", { error });
|
|
56
60
|
return Array.isArray(obj) ? [] : null;
|
|
57
61
|
}
|
|
58
62
|
}
|
|
@@ -285,7 +289,7 @@ var EvalResult = class EvalResult {
|
|
|
285
289
|
};
|
|
286
290
|
}
|
|
287
291
|
};
|
|
288
|
-
|
|
289
292
|
//#endregion
|
|
290
|
-
export {
|
|
291
|
-
|
|
293
|
+
export { evalResult_exports as n, EvalResult as t };
|
|
294
|
+
|
|
295
|
+
//# sourceMappingURL=evalResult-CDQiuUuf.js.map
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { b as getEnvBool, g as safeJsonStringify, o as
|
|
3
|
-
import { c as getCurrentTimestamp } from "./fetch-
|
|
4
|
-
import { f as isResultFailureReason, m as isProviderOptions, p as isApiProvider, s as ResultFailureReason } from "./types-
|
|
5
|
-
import { t as hashPrompt } from "./utils-
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
2
|
+
import { b as getEnvBool, g as safeJsonStringify, o as logger } from "./logger-KkObSCzq.js";
|
|
3
|
+
import { c as getCurrentTimestamp } from "./fetch-BMv0O527.js";
|
|
4
|
+
import { f as isResultFailureReason, m as isProviderOptions, p as isApiProvider, s as ResultFailureReason } from "./types-CH3Ge2sE.js";
|
|
5
|
+
import { t as hashPrompt } from "./utils-f2-Moju7.js";
|
|
6
|
+
import { a as evalResultsTable, g as getDb } from "./tables-D36WTqKX.js";
|
|
7
|
+
import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-CFG6bcWJ.js";
|
|
8
8
|
import { and, eq, gte, inArray, lt, ne } from "drizzle-orm";
|
|
9
|
-
|
|
10
9
|
//#region src/models/evalResult.ts
|
|
11
10
|
function sanitizeProvider(provider) {
|
|
12
11
|
try {
|
|
@@ -45,7 +44,7 @@ function sanitizeForDb(obj) {
|
|
|
45
44
|
try {
|
|
46
45
|
const serialized = safeJsonStringify(obj);
|
|
47
46
|
if (serialized === void 0) {
|
|
48
|
-
|
|
47
|
+
logger.debug("sanitizeForDb: Failed to serialize object, using fallback", {
|
|
49
48
|
valueType: typeof obj,
|
|
50
49
|
isArray: Array.isArray(obj)
|
|
51
50
|
});
|
|
@@ -53,7 +52,7 @@ function sanitizeForDb(obj) {
|
|
|
53
52
|
}
|
|
54
53
|
return JSON.parse(serialized);
|
|
55
54
|
} catch (error) {
|
|
56
|
-
|
|
55
|
+
logger.debug("sanitizeForDb: Parse error, using fallback", { error });
|
|
57
56
|
return Array.isArray(obj) ? [] : null;
|
|
58
57
|
}
|
|
59
58
|
}
|
|
@@ -286,7 +285,7 @@ var EvalResult = class EvalResult {
|
|
|
286
285
|
};
|
|
287
286
|
}
|
|
288
287
|
};
|
|
289
|
-
|
|
290
288
|
//#endregion
|
|
291
289
|
export { sanitizeProvider as n, EvalResult as t };
|
|
292
|
-
|
|
290
|
+
|
|
291
|
+
//# sourceMappingURL=evalResult-CTG2AHOS.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import "./logger-KkObSCzq.js";
|
|
3
|
+
import "./fetch-BMv0O527.js";
|
|
4
|
+
import "./accounts-CMqkzrVf.js";
|
|
5
|
+
import "./cloud-Bc9526yV.js";
|
|
6
|
+
import "./types-CH3Ge2sE.js";
|
|
7
|
+
import "./utils-f2-Moju7.js";
|
|
8
|
+
import "./blobs-BAU-dXan.js";
|
|
9
|
+
import "./tables-D36WTqKX.js";
|
|
10
|
+
import "./extractor-CFG6bcWJ.js";
|
|
11
|
+
import { t as EvalResult } from "./evalResult-CTG2AHOS.js";
|
|
12
|
+
export { EvalResult as default };
|
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
const require_logger = require(
|
|
2
|
-
const require_types = require(
|
|
3
|
-
const require_fetch = require(
|
|
4
|
-
const require_utils = require(
|
|
5
|
-
const
|
|
6
|
-
const
|
|
1
|
+
const require_logger = require("./logger-Cp1GPUjj.cjs");
|
|
2
|
+
const require_types = require("./types-LJ0r3wbR.cjs");
|
|
3
|
+
const require_fetch = require("./fetch-BxUk8odA.cjs");
|
|
4
|
+
const require_utils = require("./utils-Cz9qXqII.cjs");
|
|
5
|
+
const require_tables = require("./tables-C7K-XKWp.cjs");
|
|
6
|
+
const require_extractor = require("./extractor-DX36oYEv.cjs");
|
|
7
7
|
let drizzle_orm = require("drizzle-orm");
|
|
8
|
-
|
|
9
8
|
//#region src/models/evalResult.ts
|
|
10
9
|
function sanitizeProvider(provider) {
|
|
11
10
|
try {
|
|
@@ -44,7 +43,7 @@ function sanitizeForDb(obj) {
|
|
|
44
43
|
try {
|
|
45
44
|
const serialized = require_logger.safeJsonStringify(obj);
|
|
46
45
|
if (serialized === void 0) {
|
|
47
|
-
require_logger.
|
|
46
|
+
require_logger.logger.debug("sanitizeForDb: Failed to serialize object, using fallback", {
|
|
48
47
|
valueType: typeof obj,
|
|
49
48
|
isArray: Array.isArray(obj)
|
|
50
49
|
});
|
|
@@ -52,7 +51,7 @@ function sanitizeForDb(obj) {
|
|
|
52
51
|
}
|
|
53
52
|
return JSON.parse(serialized);
|
|
54
53
|
} catch (error) {
|
|
55
|
-
require_logger.
|
|
54
|
+
require_logger.logger.debug("sanitizeForDb: Parse error, using fallback", { error });
|
|
56
55
|
return Array.isArray(obj) ? [] : null;
|
|
57
56
|
}
|
|
58
57
|
}
|
|
@@ -285,18 +284,18 @@ var EvalResult = class EvalResult {
|
|
|
285
284
|
};
|
|
286
285
|
}
|
|
287
286
|
};
|
|
288
|
-
|
|
289
287
|
//#endregion
|
|
290
|
-
Object.defineProperty(exports,
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
288
|
+
Object.defineProperty(exports, "EvalResult", {
|
|
289
|
+
enumerable: true,
|
|
290
|
+
get: function() {
|
|
291
|
+
return EvalResult;
|
|
292
|
+
}
|
|
295
293
|
});
|
|
296
|
-
Object.defineProperty(exports,
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
294
|
+
Object.defineProperty(exports, "sanitizeProvider", {
|
|
295
|
+
enumerable: true,
|
|
296
|
+
get: function() {
|
|
297
|
+
return sanitizeProvider;
|
|
298
|
+
}
|
|
301
299
|
});
|
|
302
|
-
|
|
300
|
+
|
|
301
|
+
//# sourceMappingURL=evalResult-Dap2CekP.cjs.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
require("./logger-Cp1GPUjj.cjs");
|
|
2
|
+
require("./types-LJ0r3wbR.cjs");
|
|
3
|
+
require("./fetch-BxUk8odA.cjs");
|
|
4
|
+
require("./utils-Cz9qXqII.cjs");
|
|
5
|
+
require("./accounts-Bx-x3bmW.cjs");
|
|
6
|
+
require("./blobs-DvS-O6be.cjs");
|
|
7
|
+
require("./tables-C7K-XKWp.cjs");
|
|
8
|
+
require("./extractor-DX36oYEv.cjs");
|
|
9
|
+
const require_evalResult = require("./evalResult-Dap2CekP.cjs");
|
|
10
|
+
exports.default = require_evalResult.EvalResult;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import "./logger-DLcq4dWf.js";
|
|
2
|
+
import "./fetch-4M3YRaqL.js";
|
|
3
|
+
import "./accounts-BgNJDBE6.js";
|
|
4
|
+
import "./tables-xKANLRBD.js";
|
|
5
|
+
import "./types-CN_TZ2GJ.js";
|
|
6
|
+
import "./utils-dLokC-eR.js";
|
|
7
|
+
import "./blobs-qTYm-1PY.js";
|
|
8
|
+
import "./extractor-YMU_Gvt8.js";
|
|
9
|
+
import { t as EvalResult } from "./server/index.js";
|
|
10
|
+
export { EvalResult as default };
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import "./logger-KkObSCzq.js";
|
|
3
|
+
import "./fetch-BMv0O527.js";
|
|
4
|
+
import "./accounts-CMqkzrVf.js";
|
|
5
|
+
import "./cloud-Bc9526yV.js";
|
|
6
|
+
import "./telemetry-C2YDkUQH.js";
|
|
7
|
+
import "./types-CH3Ge2sE.js";
|
|
8
|
+
import "./server-DbFphssR.js";
|
|
9
|
+
import "./providers-CFLy1_ji.js";
|
|
10
|
+
import "./cache-CG0SlR1d.js";
|
|
11
|
+
import "./util-YT5HPZaS.js";
|
|
12
|
+
import "./esm-C03C-mv3.js";
|
|
13
|
+
import "./pythonUtils-C3py6GC1.js";
|
|
14
|
+
import "./transform-DilY9wbS.js";
|
|
15
|
+
import "./graders-CHO8EPM4.js";
|
|
16
|
+
import "./utils-f2-Moju7.js";
|
|
17
|
+
import "./genaiTracer-C1rxGO8Q.js";
|
|
18
|
+
import "./chat-CRWNNq73.js";
|
|
19
|
+
import "./transform-Cgi24fJ7.js";
|
|
20
|
+
import "./messages-1x9atZmP.js";
|
|
21
|
+
import "./util-B7T3SiBS.js";
|
|
22
|
+
import "./responses-CGw0DCzh.js";
|
|
23
|
+
import "./openai-DhbB7eWK.js";
|
|
24
|
+
import "./util-C-PPYSMq.js";
|
|
25
|
+
import "./completion-C5rtR_9P.js";
|
|
26
|
+
import "./blobs-BAU-dXan.js";
|
|
27
|
+
import "./tables-D36WTqKX.js";
|
|
28
|
+
import "./extractor-CFG6bcWJ.js";
|
|
29
|
+
import "./store-D1tv90v3.js";
|
|
30
|
+
import "./base-B4QJRyFS.js";
|
|
31
|
+
import "./image-CBBVXWuT.js";
|
|
32
|
+
import "./providerRegistry-B0RUOLI_.js";
|
|
33
|
+
import "./rubyUtils-Boc4HZzX.js";
|
|
34
|
+
import { t as evaluate } from "./evaluator-DPFRbFIL.js";
|
|
35
|
+
import "./signal-BOTbd53Z.js";
|
|
36
|
+
export { evaluate };
|