promptfoo 0.120.27 → 0.121.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/src/{ListApp-8WOe2nT6.js → ListApp-Du7YVwj5.js} +2 -4
- package/dist/src/accounts-B0pgC1oV.js +206 -0
- package/dist/src/{accounts-DVINui-2.js → accounts-Bm2D8Db9.js} +39 -34
- package/dist/src/{accounts-CPDRAMND.js → accounts-CiBLOnA7.js} +38 -33
- package/dist/src/{accounts-Fl2J3_Fu.cjs → accounts-gtkH-5KX.cjs} +77 -78
- package/dist/src/{agentic-utils-D922n6mm.js → agentic-utils-DS1g3GLF.js} +9 -10
- package/dist/src/{agents-BcsN_BgB.js → agents-9qiOy0ho.js} +16 -12
- package/dist/src/{agents-BXLmVsxR.js → agents-CBr9A01V.js} +37 -37
- package/dist/src/{agents-pMfppv9Z.js → agents-CmvBq8LV.js} +16 -18
- package/dist/src/{agents-hqgSV-3o.js → agents-D__IdAlg.js} +39 -40
- package/dist/src/{agents-BO2n8Z0d.cjs → agents-DbRtpYxR.cjs} +37 -40
- package/dist/src/{agents-BdUTAwi-.js → agents-DgF2zDag.js} +37 -42
- package/dist/src/{agents-DgJf2-ez.cjs → agents-Di9DKPzn.cjs} +16 -17
- package/dist/src/{agents-DNvSH78i.js → agents-cLXA8a_8.js} +17 -19
- package/dist/src/{aimlapi-DtgPI0nE.js → aimlapi-B4rcnZgv.js} +15 -17
- package/dist/src/{aimlapi-BE_Tg9Fl.cjs → aimlapi-BvlNH0gr.cjs} +15 -16
- package/dist/src/{aimlapi-DOib86oE.js → aimlapi-CnkC2HqE.js} +16 -18
- package/dist/src/{aimlapi-DTPACCB1.js → aimlapi-DHJU_kcV.js} +15 -4
- package/dist/src/app/assets/index-4LKxG2CG.js +439 -0
- package/dist/src/app/assets/{index-NCn4eVBv.css → index-C3zcsZFQ.css} +1 -1
- package/dist/src/app/assets/vendor-charts-BnDWwBlI.js +36 -0
- package/dist/src/app/index.html +3 -3
- package/dist/src/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/{audio-BnRUGAm_.js → audio-Bkv46et0.js} +6 -5
- package/dist/src/{audio-Cwo68yZS.cjs → audio-CGMyULza.cjs} +6 -7
- package/dist/src/{audio-MSRki4JU.js → audio-ClI_AFre.js} +6 -8
- package/dist/src/{audio-BRYU0BFo.js → audio-Dz3z7s3J.js} +7 -9
- package/dist/src/{base-pGVmXNl4.cjs → base-CGrhspbK.cjs} +36 -38
- package/dist/src/{base-h961VXYk.js → base-CpjcHe4e.js} +11 -13
- package/dist/src/base-DLKtKMFh.js +193 -0
- package/dist/src/{base-XB2tDJrB.js → base-Dy1V8--Z.js} +11 -13
- package/dist/src/blobs-BDbfYdrJ.js +236 -0
- package/dist/src/{blobs-CR5C4Ihh.js → blobs-CBO20krR.js} +9 -12
- package/dist/src/{blobs-BM_e6hCa.js → blobs-CMHN0Qcz.js} +9 -12
- package/dist/src/{blobs-B-KQAFhX.cjs → blobs-D23XLin-.cjs} +34 -37
- package/dist/src/{cache-jsiwsAJv.js → cache-BVeDlD87.js} +132 -117
- package/dist/src/{cache-CIpsoBZR.js → cache-C4Nxf52C.js} +132 -118
- package/dist/src/cache-CeUpFm3M.cjs +5 -0
- package/dist/src/{cache-BTVYfbka.cjs → cache-Dh5WtQps.cjs} +182 -168
- package/dist/src/cache-i1P6crbO.js +756 -0
- package/dist/src/cache-n-RCJ-hL.js +6 -0
- package/dist/src/{chat-BcPjZXIp.js → chat-BiKyneZl.js} +45 -46
- package/dist/src/{chat-D31K7C4u.cjs → chat-C1Qst7jL.cjs} +20 -21
- package/dist/src/{chat-B84t99NW.js → chat-C2jrdPMx.js} +20 -9
- package/dist/src/{chat-BE44YOc6.cjs → chat-CgF-J-Jj.cjs} +65 -66
- package/dist/src/{chat-DwWifjxi.js → chat-CzkrVDfz.js} +20 -22
- package/dist/src/chat-DJIw17u0.js +766 -0
- package/dist/src/{chat-CcUCysjU.js → chat-DqxYYtWA.js} +45 -46
- package/dist/src/{chat-DZM2GUHO.js → chat-qmatte1u.js} +21 -23
- package/dist/src/{chatkit-D67HS_0b.js → chatkit-65VXf5SR.js} +58 -58
- package/dist/src/{chatkit-DAB_qfzI.js → chatkit-Be-Q-a9F.js} +58 -60
- package/dist/src/{chatkit-Biqb_wsD.js → chatkit-BxFvW8KY.js} +58 -60
- package/dist/src/{chatkit-PGG4ZYIn.cjs → chatkit-DKyPi1Gs.cjs} +58 -60
- package/dist/src/chunk-DEq-mXcV.js +15 -0
- package/dist/src/chunk-DRamLcfz.js +16 -0
- package/dist/src/{claude-agent-sdk-SVM6AdBu.js → claude-agent-sdk-Apiy0iaz.js} +31 -31
- package/dist/src/{claude-agent-sdk-C-IOTPfo.js → claude-agent-sdk-D2bJee9S.js} +31 -29
- package/dist/src/{claude-agent-sdk-C9SiaQub.cjs → claude-agent-sdk-D9Z5Pr9X.cjs} +31 -28
- package/dist/src/{claude-agent-sdk-CiluSyW1.js → claude-agent-sdk-DfCoW0E6.js} +33 -20
- package/dist/src/cloud-BBh91EUK.js +4 -0
- package/dist/src/{cloud-CZ-q9Ier.js → cloud-C0dlstV_.js} +7 -9
- package/dist/src/{cloudflare-ai-BahKHyhh.js → cloudflare-ai-8TDxHR0x.js} +16 -18
- package/dist/src/{cloudflare-ai-v_qZD6_q.js → cloudflare-ai-BxAGvfju.js} +17 -19
- package/dist/src/{cloudflare-ai-Dfahv5SY.cjs → cloudflare-ai-CknbZ5LJ.cjs} +16 -17
- package/dist/src/{cloudflare-ai-Dxyt50Nl.js → cloudflare-ai-g7PB6VHR.js} +16 -4
- package/dist/src/{cloudflare-gateway-Bi_FpOFy.js → cloudflare-gateway-B9HWA5wf.js} +23 -23
- package/dist/src/{cloudflare-gateway-BPWoZIzJ.cjs → cloudflare-gateway-BSnDmHYo.cjs} +21 -22
- package/dist/src/{cloudflare-gateway-C0guUNwk.js → cloudflare-gateway-CKDb4dJ8.js} +26 -14
- package/dist/src/{cloudflare-gateway-btS7h1OZ.js → cloudflare-gateway-CP9QEWYS.js} +21 -25
- package/dist/src/{codex-sdk-DSxAnbfT.js → codex-sdk-C6UMlxwV.js} +28 -29
- package/dist/src/{codex-sdk-IYVi9fuM.js → codex-sdk-DUwKWezN.js} +28 -27
- package/dist/src/{codex-sdk-DulY0ZRq.js → codex-sdk-GGAw0qbD.js} +28 -29
- package/dist/src/{codex-sdk-DFKMtAyf.cjs → codex-sdk-fAO0c3yA.cjs} +28 -29
- package/dist/src/{cometapi-DzrR3SR_.js → cometapi-BL9yvj_f.js} +16 -4
- package/dist/src/{cometapi-DIO64tf4.cjs → cometapi-C4xSqeID.cjs} +21 -22
- package/dist/src/{cometapi-C9EEpJzT.js → cometapi-CUQq3H_a.js} +21 -24
- package/dist/src/{cometapi-DkNBMk0G.js → cometapi-DFNiKmSz.js} +17 -19
- package/dist/src/{completion-CG29bfKX.js → completion-5MzrpJxT.js} +11 -13
- package/dist/src/{completion-CCRT4kX1.cjs → completion-CM6oK8PS.cjs} +21 -23
- package/dist/src/{completion-Bgf1VJoq.js → completion-DZ083F31.js} +11 -13
- package/dist/src/completion-qRoZAYRB.js +120 -0
- package/dist/src/{createHash-Dw_iLu31.js → createHash-CTQmL3G2.js} +2 -3
- package/dist/src/{createHash-CYQy4YeL.cjs → createHash-CfZSc0b4.cjs} +13 -14
- package/dist/src/{createHash-CJcfskIZ.js → createHash-Da8fMwqB.js} +2 -3
- package/dist/src/createHash-DmPQkvBh.js +15 -0
- package/dist/src/{docker-D-ayp2FW.js → docker-Bb5dcxr8.js} +18 -20
- package/dist/src/{docker-B81N0t4e.js → docker-BvfL2BrW.js} +19 -21
- package/dist/src/{docker-DNcLR4Ig.cjs → docker-DcF2pRrj.cjs} +18 -19
- package/dist/src/{docker-egERKxCF.js → docker-ExVyLp0S.js} +18 -7
- package/dist/src/entrypoint.js +2 -3
- package/dist/src/{errors-DnGCbnx8.js → errors-P6ll7XSJ.js} +2 -2
- package/dist/src/{esm-B9dPm_BF.js → esm-C03C-mv3.js} +17 -20
- package/dist/src/{esm-D2pZ87fL.js → esm-CaIwzWR5.js} +18 -21
- package/dist/src/esm-Cd1AjG1D.js +379 -0
- package/dist/src/{esm-Ct-Joyue.cjs → esm-CnNt7sI4.cjs} +47 -49
- package/dist/src/eval-B3r2CVXr.js +15 -0
- package/dist/src/{eval-C-Nr6wX_.js → eval-Dg2nG4v2.js} +47 -54
- package/dist/src/evalResult-5xwYnECe.js +12 -0
- package/dist/src/evalResult-71lY93Kj.cjs +10 -0
- package/dist/src/{evalResult-DXMWJ3sx.js → evalResult-BBRNtX4I.js} +10 -11
- package/dist/src/{evalResult-4BzI2tmj.js → evalResult-BDMqrapS.js} +16 -12
- package/dist/src/evalResult-Dx5P5cIv.js +10 -0
- package/dist/src/{evalResult-CX8wQecI.cjs → evalResult-fuaI8HkH.cjs} +20 -21
- package/dist/src/{evaluator-8aGyV12L.js → evaluator-BhoWwp5b.js} +211 -235
- package/dist/src/evaluator-Jx6bRZV6.js +36 -0
- package/dist/src/{extractor-V5x_m1i0.js → extractor-C0EVHewb.js} +22 -24
- package/dist/src/extractor-D25qpmGX.js +374 -0
- package/dist/src/{extractor-CD5yKL-G.js → extractor-DReVID0K.js} +22 -24
- package/dist/src/{extractor-C031XmTA.cjs → extractor-pYLLi3wS.cjs} +37 -39
- package/dist/src/{fetch-BmbD-v1L.cjs → fetch-BPkYtG8K.cjs} +244 -277
- package/dist/src/fetch-BxNb_Lp3.js +5 -0
- package/dist/src/{fetch-D3OHf-lV.js → fetch-Cwxnd8zz.js} +36 -44
- package/dist/src/{fetch-CXZI9RRr.js → fetch-Dxpd4_sr.js} +23 -35
- package/dist/src/fetch-HaqdX7U1.js +780 -0
- package/dist/src/{fileExtensions-ePDqouxn.js → fileExtensions-DnqA1y9x.js} +2 -2
- package/dist/src/{fileExtensions-BpuMmaFL.js → fileExtensions-Ds-foDzt.js} +2 -2
- package/dist/src/fileExtensions-LcDYkU4v.js +85 -0
- package/dist/src/{fileExtensions-DkJYkWUy.cjs → fileExtensions-bYh77CN8.cjs} +27 -28
- package/dist/src/{formatDuration-CdevI3An.js → formatDuration-DgBVMN65.js} +2 -2
- package/dist/src/{genaiTracer-Ce19n68P.js → genaiTracer-70Z8BIuV.js} +2 -3
- package/dist/src/{genaiTracer-CqNnnXrE.js → genaiTracer-C1rxGO8Q.js} +2 -3
- package/dist/src/genaiTracer-D3fD9dNV.js +256 -0
- package/dist/src/{genaiTracer-Dres3qrN.cjs → genaiTracer-DN4dQywX.cjs} +13 -14
- package/dist/src/{graders--1y2u9HO.js → graders-BTeBGqjJ.js} +349 -397
- package/dist/src/graders-B_pgMLS2.js +34 -0
- package/dist/src/{graders-DTeBrzWp.js → graders-Bj_Odv7c.js} +349 -397
- package/dist/src/graders-DErokPDO.cjs +32 -0
- package/dist/src/graders-DP7KFFo-.js +13466 -0
- package/dist/src/graders-DR_uNe54.js +32 -0
- package/dist/src/{graders-DohM2dir.cjs → graders-DU49_J8Y.cjs} +684 -732
- package/dist/src/graders-w3176Wz-.js +32 -0
- package/dist/src/{image-B0U4Hqll.js → image-B02ogr_b.js} +7 -9
- package/dist/src/{image-DmE-niFE.js → image-B0h9VEMc.js} +6 -5
- package/dist/src/{image-CuKHuccK.cjs → image-BLmROtN3.cjs} +29 -30
- package/dist/src/{image-DNEIf_aI.js → image-Bb4vWQLM.js} +6 -8
- package/dist/src/{image-DpKl2F15.cjs → image-C1madmKh.cjs} +6 -7
- package/dist/src/{image-C3wHC9_h.js → image-CHfWvljl.js} +9 -10
- package/dist/src/{image-O1u4bCFg.js → image-DS-o-0ph.js} +9 -10
- package/dist/src/image-Dpxa1Jt6.js +257 -0
- package/dist/src/index.cjs +615 -695
- package/dist/src/index.d.cts +271 -7
- package/dist/src/index.d.ts +271 -3
- package/dist/src/index.js +580 -664
- package/dist/src/{interactiveCheck-Bxj1Swex.js → interactiveCheck-BgLZUIt3.js} +7 -8
- package/dist/src/{invariant-DT20jrBd.js → invariant-BtWWVVhl.js} +2 -2
- package/dist/src/{invariant-1pAf2CD1.js → invariant-Ddh24eXh.js} +2 -2
- package/dist/src/{invariant-CKcJAQ6M.cjs → invariant-kfQ8Bu82.cjs} +7 -8
- package/dist/src/invariant-vgHWClmd.js +25 -0
- package/dist/src/{knowledgeBase-CEzQobWX.js → knowledgeBase-B3OoKIej.js} +14 -9
- package/dist/src/{knowledgeBase-Be_zyW4L.js → knowledgeBase-CYTLHOt1.js} +16 -16
- package/dist/src/{knowledgeBase-BZ41IFwq.js → knowledgeBase-D33Ty2l6.js} +14 -18
- package/dist/src/{knowledgeBase-D-5BMXlr.cjs → knowledgeBase-DOO_BM9b.cjs} +14 -15
- package/dist/src/{litellm-DnbRJ2if.js → litellm-AaeZcZQF.js} +18 -19
- package/dist/src/{litellm-hUSNM_M2.cjs → litellm-I_hbp_dc.cjs} +17 -17
- package/dist/src/{litellm-CRDqPhNI.js → litellm-NbjknEh6.js} +17 -18
- package/dist/src/{litellm-9vR8zpfU.js → litellm-TrljxD9G.js} +17 -5
- package/dist/src/{logger-CG1uZPbQ.js → logger-CT3IKMKA.js} +10 -29
- package/dist/src/{logger-B7sBeGa0.cjs → logger-Cp1GPUjj.cjs} +152 -180
- package/dist/src/logger-DLcq4dWf.js +713 -0
- package/dist/src/{logger-LSBxlt7a.js → logger-KkObSCzq.js} +13 -31
- package/dist/src/{luma-ray-4blv9iZ2.js → luma-ray-BS2_tY8L.js} +22 -21
- package/dist/src/{luma-ray-drvgdpP9.js → luma-ray-DDsjcgZZ.js} +20 -13
- package/dist/src/{luma-ray-Hm3d6VJE.cjs → luma-ray-Due0n7di.cjs} +20 -21
- package/dist/src/{luma-ray-B2__8lYH.js → luma-ray-f6I2fft-.js} +20 -23
- package/dist/src/main.js +1170 -1321
- package/dist/src/{messages-Uee41Mj5.js → messages-BS17jdMx.js} +22 -24
- package/dist/src/{messages-XhiwCbi4.cjs → messages-Bs1kC7P4.cjs} +32 -34
- package/dist/src/{messages-CGPPidQr.js → messages-D0lx5qK7.js} +22 -24
- package/dist/src/messages-ZJk778GH.js +240 -0
- package/dist/src/{meteor-BYykdXrV.js → meteor-44VjEACX.js} +3 -4
- package/dist/src/{meteor-CsopaHrH.js → meteor-D-SotUw9.js} +3 -4
- package/dist/src/{meteor-e-E-2vVl.cjs → meteor-DLZZ3osF.cjs} +3 -4
- package/dist/src/{meteor-C8lGP6P4.js → meteor-DUiCJRC-.js} +3 -4
- package/dist/src/{modelslab-yKz-ZNB4.js → modelslab-Bmni6skY.js} +17 -10
- package/dist/src/{modelslab-E9gO-bYd.js → modelslab-Bx9IrZfS.js} +18 -20
- package/dist/src/{modelslab-lUVW0cmB.cjs → modelslab-CoUX6Jc_.cjs} +17 -18
- package/dist/src/{modelslab-ClBkr8_9.js → modelslab-DRb74SP4.js} +17 -19
- package/dist/src/{nova-reel-Dk8jNpId.js → nova-reel-BfPq-0Yk.js} +20 -13
- package/dist/src/{nova-reel-D8CuO6QH.cjs → nova-reel-C_QM18Xn.cjs} +20 -21
- package/dist/src/{nova-reel-u2eF2Cxm.js → nova-reel-D_W1tjMH.js} +22 -21
- package/dist/src/{nova-reel-P9bwvtYX.js → nova-reel-bgjxilYW.js} +20 -23
- package/dist/src/{nova-sonic-CK2rAiKi.js → nova-sonic-CFb5GYhg.js} +30 -26
- package/dist/src/{nova-sonic-BaqWlkds.js → nova-sonic-DIGQNR07.js} +30 -31
- package/dist/src/{nova-sonic-yZapPLv7.js → nova-sonic-De1HW5fD.js} +31 -32
- package/dist/src/{nova-sonic-Ds1C-dpm.cjs → nova-sonic-zfcljeRp.cjs} +30 -31
- package/dist/src/{openai-DUFopMrH.cjs → openai-Cuif0GEt.cjs} +8 -9
- package/dist/src/{openai-PblZ3jUE.js → openai-DElQ-fPX.js} +3 -4
- package/dist/src/{openai-CcN1B8Sb.js → openai-DhbB7eWK.js} +3 -4
- package/dist/src/openai-j-sE2O7r.js +44 -0
- package/dist/src/{openclaw-B6qqDr_u.cjs → openclaw-CSugPYAr.cjs} +188 -130
- package/dist/src/{openclaw-A-3_loM7.js → openclaw-DiSz3I5L.js} +180 -109
- package/dist/src/{openclaw-a3lylB-V.js → openclaw-DuvJKEW5.js} +178 -124
- package/dist/src/{openclaw-COn6QzDi.js → openclaw-tiVYRtr-.js} +178 -122
- package/dist/src/opencode-sdk-0j6rTWNb.js +562 -0
- package/dist/src/opencode-sdk-B3CWY9h_.js +560 -0
- package/dist/src/opencode-sdk-BL764Jdi.cjs +564 -0
- package/dist/src/opencode-sdk-C2y6UkP2.js +560 -0
- package/dist/src/{otlpReceiver-oyf5wLGC.js → otlpReceiver-C99PPb48.js} +53 -51
- package/dist/src/{otlpReceiver-lXsYVbpj.cjs → otlpReceiver-CGq6LspY.cjs} +53 -55
- package/dist/src/{otlpReceiver-94URx7UW.js → otlpReceiver-CdNBdbsk.js} +53 -55
- package/dist/src/{otlpReceiver-BmmTiMjA.js → otlpReceiver-D89fR-rC.js} +53 -55
- package/dist/src/{providerRegistry-Cq_JK_CJ.js → providerRegistry-B0RUOLI_.js} +7 -8
- package/dist/src/{providerRegistry-DSSHjMKf.js → providerRegistry-CD8MEar9.js} +7 -8
- package/dist/src/{providerRegistry-CvHEVJad.cjs → providerRegistry-Civky8Ar.cjs} +12 -13
- package/dist/src/providerRegistry-DM8rZYol.js +45 -0
- package/dist/src/providers-B7V0njNs.js +32 -0
- package/dist/src/providers-BEwbhv0X.js +30 -0
- package/dist/src/{providers-Iil64vk9.js → providers-BlqUifFg.js} +1543 -1676
- package/dist/src/providers-CH3C7zf7.js +30 -0
- package/dist/src/{providers-DHbjzW2e.cjs → providers-CgKOSgTR.cjs} +1896 -2029
- package/dist/src/providers-D8lF1sqW.js +33246 -0
- package/dist/src/{providers-BnFpbY_s.js → providers-Dk_6ocUX.js} +1536 -1669
- package/dist/src/providers-zyB6k_38.cjs +31 -0
- package/dist/src/{pythonUtils-CcT5LH1M.js → pythonUtils-C3py6GC1.js} +18 -19
- package/dist/src/{pythonUtils-DBbuI3QJ.cjs → pythonUtils-CTU3Y3lw.cjs} +42 -43
- package/dist/src/{pythonUtils-hZ8LeQLv.js → pythonUtils-D5nxkQ0P.js} +18 -19
- package/dist/src/pythonUtils-D6fwaDSg.js +249 -0
- package/dist/src/{quiverai-BuI0tE39.js → quiverai-BbOUOn2L.js} +8 -7
- package/dist/src/{quiverai-DCGSZt4U.js → quiverai-CIaELU_m.js} +8 -10
- package/dist/src/{quiverai-DiMVJQDz.cjs → quiverai-PdShCPox.cjs} +8 -9
- package/dist/src/{quiverai-fQNkExW4.js → quiverai-uH-dcTIr.js} +9 -11
- package/dist/src/{render-Dj1smHEb.js → render-Drod8m7K.js} +4 -5
- package/dist/src/responses-CB2jwoAr.js +660 -0
- package/dist/src/{responses-ghR3IOfy.cjs → responses-D8SBTL64.cjs} +39 -42
- package/dist/src/{responses-DOAFFENS.js → responses-DIR9Ud3j.js} +24 -27
- package/dist/src/{responses-CxzoQoBe.js → responses-WNGNYe3K.js} +24 -27
- package/dist/src/rubyUtils-BUHu6PhO.js +5 -0
- package/dist/src/{rubyUtils-CwbGmgYN.js → rubyUtils-BUVePouc.js} +27 -20
- package/dist/src/rubyUtils-BcuGX77l.js +222 -0
- package/dist/src/{rubyUtils-DudlFZed.js → rubyUtils-Boc4HZzX.js} +18 -19
- package/dist/src/rubyUtils-CP42kMvq.cjs +4 -0
- package/dist/src/{rubyUtils-C8MhKGHb.cjs → rubyUtils-DhCAlxZr.cjs} +48 -50
- package/dist/src/{sagemaker-gmskuyre.js → sagemaker-CNBxx5CJ.js} +75 -70
- package/dist/src/{sagemaker-CcxhlOAR.js → sagemaker-CemTFp2h.js} +75 -79
- package/dist/src/{sagemaker-77zbJ2Q2.cjs → sagemaker-Cl28mZU2.cjs} +75 -76
- package/dist/src/{sagemaker-DuM71dVU.js → sagemaker-YSyBXQQh.js} +77 -77
- package/dist/src/{scanner-DJYiSXQj.js → scanner-BsBlNXNn.js} +100 -121
- package/dist/src/server/index.js +5520 -67427
- package/dist/src/{server-B5v33lvE.cjs → server-C_7Ax-hA.cjs} +57 -67
- package/dist/src/{server-BJ4m4f1D.js → server-CqzrVGpF.js} +26 -29
- package/dist/src/server-CuxBbeSY.js +229 -0
- package/dist/src/server-DA4Cyrrq.js +7 -0
- package/dist/src/server-Dulb-4-K.cjs +5 -0
- package/dist/src/{server-RV_i_YX5.js → server-VWgWb00X.js} +19 -24
- package/dist/src/{signal-BW33JuId.js → signal-4U3mfRvL.js} +9 -11
- package/dist/src/{slack-DEURelTy.cjs → slack-BmVAVGaK.cjs} +7 -8
- package/dist/src/{slack-BQYeW9L3.js → slack-DCUPTzS2.js} +8 -8
- package/dist/src/{slack-BB6yuZzp.js → slack-DOdy_kyv.js} +7 -8
- package/dist/src/{slack-2pRrhhgJ.js → slack-DXMKtA-f.js} +7 -9
- package/dist/src/store-CXGFv4aR.js +228 -0
- package/dist/src/store-CXS-Q_91.js +6 -0
- package/dist/src/{store-D7CgQzAR.cjs → store-DLlFCC4h.cjs} +44 -45
- package/dist/src/{store-DJNsD1iC.js → store-DXilxTl-.js} +40 -36
- package/dist/src/{store-s3SftUwF.js → store-Dim__MDd.js} +34 -35
- package/dist/src/store-eYkaKMwq.cjs +5 -0
- package/dist/src/{tables-DfTsNN7X.js → tables-6YKwjN9-.js} +19 -21
- package/dist/src/tables-DLJPUdUE.js +288 -0
- package/dist/src/{tables-BKTmd6u7.cjs → tables-DPi7wKeM.cjs} +89 -91
- package/dist/src/{tables-DMegD0Xf.js → tables-gftXzE9I.js} +21 -23
- package/dist/src/telemetry-BpMfhthR.cjs +5 -0
- package/dist/src/{telemetry--WAdAfVi.js → telemetry-CMrFgtPB.js} +11 -13
- package/dist/src/telemetry-Cps3mIU-.js +171 -0
- package/dist/src/{telemetry-DQgVBCAb.cjs → telemetry-DaX14Chu.cjs} +21 -24
- package/dist/src/{telemetry-BedSm-bZ.js → telemetry-Dthj_BbD.js} +17 -14
- package/dist/src/telemetry-Dw38hanS.js +7 -0
- package/dist/src/{text-oiSbwSOI.js → text-B_UCRPp2.js} +2 -2
- package/dist/src/{text-oKzCBnK6.cjs → text-CW1cyrwj.cjs} +12 -13
- package/dist/src/{text-B_IrO4GZ.js → text-Db-Wt2u2.js} +2 -2
- package/dist/src/text-TIv0QYnd.js +22 -0
- package/dist/src/{tokenUsageUtils-FZd5O_4A.js → tokenUsageUtils-BDGe-iyI.js} +2 -2
- package/dist/src/{tokenUsageUtils-DmZSD2eU.js → tokenUsageUtils-DflFMjS0.js} +2 -2
- package/dist/src/tokenUsageUtils-NYT-WKS6.js +138 -0
- package/dist/src/{tokenUsageUtils-CXhxVj72.cjs → tokenUsageUtils-bVa1ga6f.cjs} +32 -33
- package/dist/src/{transcription-mYS9vd5v.js → transcription-BNYURcXg.js} +14 -7
- package/dist/src/{transcription-X2-B4vkX.js → transcription-B_OdaHp7.js} +14 -16
- package/dist/src/{transcription-BO1AHegO.cjs → transcription-NLVG9MT1.cjs} +14 -15
- package/dist/src/{transcription-lzBLiTFJ.js → transcription-s6A-bNrZ.js} +15 -17
- package/dist/src/{transform-B1Hi5lWS.cjs → transform-CzK1Q0zl.cjs} +24 -26
- package/dist/src/{transform-DeGlxb0D.js → transform-D5HsjduX.js} +39 -47
- package/dist/src/{transform-CYDILYDe.js → transform-DECvGmzp.js} +15 -13
- package/dist/src/transform-DTGDnAzW.js +6 -0
- package/dist/src/{transform-BEgStbHK.js → transform-DilY9wbS.js} +10 -12
- package/dist/src/{transform-D5PjiWiZ.cjs → transform-DuHvhZpj.cjs} +179 -187
- package/dist/src/transform-aa6tmVpZ.js +216 -0
- package/dist/src/transform-m3qNw4KP.cjs +5 -0
- package/dist/src/transform-uAytVuyX.js +1506 -0
- package/dist/src/{transform-Dfl89yi4.js → transform-vNucnNr0.js} +39 -47
- package/dist/src/{transformersAvailability-SZnTS3pJ.js → transformersAvailability-CEVM2GNQ.js} +2 -2
- package/dist/src/{transformersAvailability-D-glmEy7.cjs → transformersAvailability-CwayUSlh.cjs} +2 -3
- package/dist/src/{transformersAvailability-CjeFXhuJ.js → transformersAvailability-D6c6ROpT.js} +2 -2
- package/dist/src/{types-DWNf48sT.cjs → types-C_7nyzr1.cjs} +538 -574
- package/dist/src/{types-CXQduE9o.js → types-Cbd8uOMq.js} +68 -100
- package/dist/src/types-CzW2QFyi.js +3288 -0
- package/dist/src/{types-C5hEkb-x.js → types-DmyIJ-sR.js} +63 -99
- package/dist/src/{util-CoQjmE3u.js → util-B3xGByQh.js} +4 -5
- package/dist/src/{util-aLhtl3fe.cjs → util-B9vlHIIh.cjs} +208 -223
- package/dist/src/{util-Du96oyYS.js → util-BHGHw5G1.js} +4 -5
- package/dist/src/{util-DQ984syk.js → util-BRYkYPTd.js} +36 -51
- package/dist/src/{util-D9eLdGfa.js → util-BV4XUC0n.js} +5 -6
- package/dist/src/util-Bv6uGDfH.js +293 -0
- package/dist/src/{util-1wWM599Z.cjs → util-BzMcevZc.cjs} +50 -51
- package/dist/src/{util-_h4pVqrz.js → util-C1CeHl-P.js} +36 -51
- package/dist/src/{util-Bm_-UMD_.js → util-CMy69ZgQ.js} +5 -6
- package/dist/src/{util-CyUdMzV0.cjs → util-DGNOS1db.cjs} +34 -35
- package/dist/src/util-Dnmk2mBQ.js +599 -0
- package/dist/src/util-ZzmqNPlg.js +1426 -0
- package/dist/src/{utils-BjLy-Q72.cjs → utils-Cz9qXqII.cjs} +29 -32
- package/dist/src/{utils-CFMn2yHW.js → utils-XiOAgly5.js} +4 -7
- package/dist/src/utils-dLokC-eR.js +94 -0
- package/dist/src/{utils-DvWMzuMx.js → utils-f2-Moju7.js} +4 -7
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +38 -38
- package/dist/src/app/assets/index-B2D0bCSI.js +0 -439
- package/dist/src/app/assets/vendor-charts-CCl15Imd.js +0 -36
- package/dist/src/cache-ChPcurj7.js +0 -6
- package/dist/src/cache-VVu_W-yg.js +0 -8
- package/dist/src/cache-YLNCFEM2.cjs +0 -6
- package/dist/src/chunk-DHDDz29n.js +0 -22
- package/dist/src/chunk-FhC4c-0y.js +0 -21
- package/dist/src/cloud-BndfXy4H.js +0 -5
- package/dist/src/eval-BhHvMY82.js +0 -17
- package/dist/src/evalResult-Dq2gFNQY.js +0 -12
- package/dist/src/evalResult-nmcP5VKH.cjs +0 -12
- package/dist/src/evalResult-trqZjVYh.js +0 -14
- package/dist/src/evaluator-CnfPstzT.js +0 -39
- package/dist/src/fetch-IDPDue6F.cjs +0 -4
- package/dist/src/fetch-hKJ-It8q.js +0 -6
- package/dist/src/fetch-ouKnrWK-.js +0 -4
- package/dist/src/graders-CQn7WUsd.cjs +0 -34
- package/dist/src/graders-DC6QAbpW.js +0 -35
- package/dist/src/graders-DUWz3Y7j.js +0 -37
- package/dist/src/opencode-sdk-4bL9n-Gk.js +0 -382
- package/dist/src/opencode-sdk-BfC2zWcR.js +0 -376
- package/dist/src/opencode-sdk-DMJyuwMg.js +0 -380
- package/dist/src/opencode-sdk-Da-9adza.cjs +0 -383
- package/dist/src/providers-CsXB2Ix-.js +0 -35
- package/dist/src/providers-DO8ltjLC.js +0 -33
- package/dist/src/providers-Dtq-xnXd.cjs +0 -33
- package/dist/src/rubyUtils-BUbcND2f.js +0 -6
- package/dist/src/rubyUtils-Cr55X_KE.js +0 -5
- package/dist/src/rubyUtils-DlIiqoYo.cjs +0 -5
- package/dist/src/server-C2eQH4Gu.js +0 -6
- package/dist/src/server-CXWycu7H.cjs +0 -6
- package/dist/src/server-Q6OGlxxT.js +0 -8
- package/dist/src/store-B3EDO9Q3.js +0 -7
- package/dist/src/store-Dl9F8aw5.js +0 -6
- package/dist/src/store-SnrGrlt9.cjs +0 -6
- package/dist/src/telemetry-BGhiPZtl.js +0 -8
- package/dist/src/telemetry-CFfiYan6.cjs +0 -6
- package/dist/src/telemetry-DHzEduxX.js +0 -6
- package/dist/src/transform-C1x1ZlMQ.cjs +0 -6
- package/dist/src/transform-DYHjFmQu.js +0 -8
- package/dist/src/transform-rmwJT5JQ.js +0 -7
- package/dist/src/transformersAvailability-eJooj0gX.js +0 -35
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { b as getEnvBool, o as
|
|
3
|
-
import {
|
|
4
|
-
import { t as invariant } from "./invariant-
|
|
5
|
-
import { o as getUserEmail } from "./accounts-
|
|
6
|
-
import {
|
|
7
|
-
import { n as isNonTransientHttpStatus, t as NON_TRANSIENT_HTTP_STATUSES } from "./errors-
|
|
8
|
-
import { t as hashPrompt } from "./utils-
|
|
9
|
-
import { n as sha256, t as randomSequence } from "./createHash-
|
|
10
|
-
import { a as createEmptyTokenUsage, r as accumulateTokenUsage } from "./tokenUsageUtils-
|
|
11
|
-
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, i as datasetsTable, l as evalsToTagsTable, o as evalsTable, p as tagsTable, s as evalsToDatasetsTable } from "./tables-
|
|
12
|
-
import { n as getTraceStore } from "./store-
|
|
13
|
-
import { i as getActualPrompt, r as updateSignalFile } from "./signal-
|
|
14
|
-
import { t as EvalResult } from "./evalResult-
|
|
2
|
+
import { b as getEnvBool, o as logger } from "./logger-KkObSCzq.js";
|
|
3
|
+
import { F as HUMAN_ASSERTION_TYPE, c as getCurrentTimestamp } from "./fetch-Dxpd4_sr.js";
|
|
4
|
+
import { t as invariant } from "./invariant-BtWWVVhl.js";
|
|
5
|
+
import { o as getUserEmail } from "./accounts-Bm2D8Db9.js";
|
|
6
|
+
import { H as riskCategorySeverityMap, s as ResultFailureReason, st as PLUGIN_CATEGORIES } from "./types-Cbd8uOMq.js";
|
|
7
|
+
import { n as isNonTransientHttpStatus, t as NON_TRANSIENT_HTTP_STATUSES } from "./errors-P6ll7XSJ.js";
|
|
8
|
+
import { t as hashPrompt } from "./utils-f2-Moju7.js";
|
|
9
|
+
import { n as sha256, t as randomSequence } from "./createHash-Da8fMwqB.js";
|
|
10
|
+
import { a as createEmptyTokenUsage, r as accumulateTokenUsage } from "./tokenUsageUtils-DflFMjS0.js";
|
|
11
|
+
import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, i as datasetsTable, l as evalsToTagsTable, o as evalsTable, p as tagsTable, s as evalsToDatasetsTable } from "./tables-gftXzE9I.js";
|
|
12
|
+
import { n as getTraceStore } from "./store-Dim__MDd.js";
|
|
13
|
+
import { i as getActualPrompt, r as updateSignalFile } from "./signal-4U3mfRvL.js";
|
|
14
|
+
import { t as EvalResult } from "./evalResult-BBRNtX4I.js";
|
|
15
15
|
import { and, desc, eq, sql } from "drizzle-orm";
|
|
16
|
-
|
|
17
16
|
//#region src/redteam/metrics.ts
|
|
18
17
|
/**
|
|
19
18
|
* The Attack Success Rate (ASR) is the number of tests which failed grading divided by the
|
|
@@ -25,7 +24,6 @@ import { and, desc, eq, sql } from "drizzle-orm";
|
|
|
25
24
|
function calculateAttackSuccessRate(testCount, failCount) {
|
|
26
25
|
return testCount > 0 ? failCount / testCount * 100 : 0;
|
|
27
26
|
}
|
|
28
|
-
|
|
29
27
|
//#endregion
|
|
30
28
|
//#region src/redteam/sharedFrontend.ts
|
|
31
29
|
function getRiskCategorySeverityMap(plugins) {
|
|
@@ -42,7 +40,6 @@ function getRiskCategorySeverityMap(plugins) {
|
|
|
42
40
|
...overrides
|
|
43
41
|
};
|
|
44
42
|
}
|
|
45
|
-
|
|
46
43
|
//#endregion
|
|
47
44
|
//#region src/util/calculateFilteredMetrics.ts
|
|
48
45
|
/**
|
|
@@ -96,12 +93,12 @@ async function calculateFilteredMetrics(opts) {
|
|
|
96
93
|
try {
|
|
97
94
|
const countResult = await getResultCount(whereSql);
|
|
98
95
|
if (countResult > MAX_RESULTS_FOR_METRICS) {
|
|
99
|
-
|
|
96
|
+
logger.warn(`Filtered result count ${countResult} exceeds limit ${MAX_RESULTS_FOR_METRICS}`, { evalId: opts.evalId });
|
|
100
97
|
throw new Error(`Result count ${countResult} exceeds maximum ${MAX_RESULTS_FOR_METRICS}`);
|
|
101
98
|
}
|
|
102
99
|
return await calculateWithOptimizedQuery(opts);
|
|
103
100
|
} catch (error) {
|
|
104
|
-
|
|
101
|
+
logger.error("Failed to calculate filtered metrics with optimized query", { error });
|
|
105
102
|
return createEmptyMetricsArray(numPrompts);
|
|
106
103
|
}
|
|
107
104
|
}
|
|
@@ -154,7 +151,7 @@ async function calculateWithOptimizedQuery(opts) {
|
|
|
154
151
|
for (const row of basicResults) {
|
|
155
152
|
const idx = row.prompt_idx;
|
|
156
153
|
if (idx < 0 || idx >= numPrompts) {
|
|
157
|
-
|
|
154
|
+
logger.warn(`Invalid prompt_idx ${idx}, expected 0-${numPrompts - 1}`);
|
|
158
155
|
continue;
|
|
159
156
|
}
|
|
160
157
|
metrics[idx] = {
|
|
@@ -179,7 +176,7 @@ async function calculateWithOptimizedQuery(opts) {
|
|
|
179
176
|
}
|
|
180
177
|
await aggregateNamedScores(metrics, whereSql);
|
|
181
178
|
await aggregateAssertions(metrics, whereSql);
|
|
182
|
-
|
|
179
|
+
logger.debug("Filtered metrics calculated", {
|
|
183
180
|
numPrompts,
|
|
184
181
|
metricsCount: basicResults.length
|
|
185
182
|
});
|
|
@@ -300,7 +297,6 @@ function createEmptyMetricsArray(numPrompts) {
|
|
|
300
297
|
cost: 0
|
|
301
298
|
}));
|
|
302
299
|
}
|
|
303
|
-
|
|
304
300
|
//#endregion
|
|
305
301
|
//#region src/util/convertEvalResultsToTable.ts
|
|
306
302
|
/**
|
|
@@ -433,7 +429,6 @@ function convertResultsToTable(eval_) {
|
|
|
433
429
|
body: rows
|
|
434
430
|
};
|
|
435
431
|
}
|
|
436
|
-
|
|
437
432
|
//#endregion
|
|
438
433
|
//#region src/util/exportToFile/index.ts
|
|
439
434
|
function convertEvalResultToTableCell(result) {
|
|
@@ -511,7 +506,6 @@ function convertTestResultsToTableRow(results, varsForHeader) {
|
|
|
511
506
|
for (const result of results) row.outputs[result.promptIdx] = convertEvalResultToTableCell(result);
|
|
512
507
|
return row;
|
|
513
508
|
}
|
|
514
|
-
|
|
515
509
|
//#endregion
|
|
516
510
|
//#region src/models/evalPerformance.ts
|
|
517
511
|
const distinctCountCache = /* @__PURE__ */ new Map();
|
|
@@ -528,7 +522,7 @@ async function getCachedResultsCount(evalId) {
|
|
|
528
522
|
const cacheKey = `distinct:${evalId}`;
|
|
529
523
|
const cached = distinctCountCache.get(cacheKey);
|
|
530
524
|
if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
|
|
531
|
-
|
|
525
|
+
logger.debug(`Using cached distinct count for eval ${evalId}: ${cached.count}`);
|
|
532
526
|
return cached.count;
|
|
533
527
|
}
|
|
534
528
|
const db = getDb();
|
|
@@ -536,7 +530,7 @@ async function getCachedResultsCount(evalId) {
|
|
|
536
530
|
const result = db.select({ count: sql`COUNT(DISTINCT test_idx)` }).from(evalResultsTable).where(sql`eval_id = ${evalId}`).all();
|
|
537
531
|
const count = Number(result[0]?.count ?? 0);
|
|
538
532
|
const duration = Date.now() - start;
|
|
539
|
-
|
|
533
|
+
logger.debug(`Distinct count query for eval ${evalId}: ${count} in ${duration}ms`);
|
|
540
534
|
distinctCountCache.set(cacheKey, {
|
|
541
535
|
count,
|
|
542
536
|
timestamp: Date.now()
|
|
@@ -554,7 +548,7 @@ async function getTotalResultRowCount(evalId) {
|
|
|
554
548
|
const cacheKey = `total:${evalId}`;
|
|
555
549
|
const cached = totalRowCountCache.get(cacheKey);
|
|
556
550
|
if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
|
|
557
|
-
|
|
551
|
+
logger.debug(`Using cached total row count for eval ${evalId}: ${cached.count}`);
|
|
558
552
|
return cached.count;
|
|
559
553
|
}
|
|
560
554
|
const db = getDb();
|
|
@@ -562,7 +556,7 @@ async function getTotalResultRowCount(evalId) {
|
|
|
562
556
|
const result = db.select({ count: sql`COUNT(*)` }).from(evalResultsTable).where(sql`eval_id = ${evalId}`).all();
|
|
563
557
|
const count = Number(result[0]?.count ?? 0);
|
|
564
558
|
const duration = Date.now() - start;
|
|
565
|
-
|
|
559
|
+
logger.debug(`Total row count query for eval ${evalId}: ${count} in ${duration}ms`);
|
|
566
560
|
totalRowCountCache.set(cacheKey, {
|
|
567
561
|
count,
|
|
568
562
|
timestamp: Date.now()
|
|
@@ -595,7 +589,7 @@ async function queryTestIndicesOptimized(evalId, opts) {
|
|
|
595
589
|
`;
|
|
596
590
|
const countResult = db.all(countQuery);
|
|
597
591
|
const filteredCount = Number(countResult[0]?.count ?? 0);
|
|
598
|
-
|
|
592
|
+
logger.debug(`Optimized count query took ${Date.now() - countStart}ms`);
|
|
599
593
|
const idxStart = Date.now();
|
|
600
594
|
const idxQuery = sql`
|
|
601
595
|
SELECT DISTINCT test_idx
|
|
@@ -606,13 +600,12 @@ async function queryTestIndicesOptimized(evalId, opts) {
|
|
|
606
600
|
OFFSET ${offset}
|
|
607
601
|
`;
|
|
608
602
|
const testIndices = db.all(idxQuery).map((row) => row.test_idx);
|
|
609
|
-
|
|
603
|
+
logger.debug(`Optimized index query took ${Date.now() - idxStart}ms`);
|
|
610
604
|
return {
|
|
611
605
|
testIndices,
|
|
612
606
|
filteredCount
|
|
613
607
|
};
|
|
614
608
|
}
|
|
615
|
-
|
|
616
609
|
//#endregion
|
|
617
610
|
//#region src/models/eval.ts
|
|
618
611
|
/**
|
|
@@ -707,7 +700,7 @@ var EvalQueries = class {
|
|
|
707
700
|
try {
|
|
708
701
|
db.update(evalsTable).set({ vars }).where(eq(evalsTable.id, evalId)).run();
|
|
709
702
|
} catch (e) {
|
|
710
|
-
|
|
703
|
+
logger.error(`Error setting vars: ${vars} for eval ${evalId}: ${e}`);
|
|
711
704
|
}
|
|
712
705
|
}
|
|
713
706
|
static async getMetadataKeysFromEval(evalId, comparisonEvalIds = []) {
|
|
@@ -728,7 +721,7 @@ var EvalQueries = class {
|
|
|
728
721
|
`;
|
|
729
722
|
return (await db.all(query)).map((r) => r.key);
|
|
730
723
|
} catch (error) {
|
|
731
|
-
|
|
724
|
+
logger.error(`Error fetching metadata keys for eval ${evalId} and comparisons [${comparisonEvalIds.join(", ")}]: ${error}`);
|
|
732
725
|
return [];
|
|
733
726
|
}
|
|
734
727
|
}
|
|
@@ -759,7 +752,7 @@ var EvalQueries = class {
|
|
|
759
752
|
const values = db.all(query).map(({ value }) => String(value).trim()).filter((value) => value.length > 0);
|
|
760
753
|
return Array.from(new Set(values));
|
|
761
754
|
} catch (error) {
|
|
762
|
-
|
|
755
|
+
logger.error(`Error fetching metadata values for eval ${evalId} and key ${trimmedKey}: ${error instanceof Error ? error.message : String(error)}`);
|
|
763
756
|
return [];
|
|
764
757
|
}
|
|
765
758
|
}
|
|
@@ -831,7 +824,7 @@ var Eval = class Eval {
|
|
|
831
824
|
}
|
|
832
825
|
return evalInstance;
|
|
833
826
|
}
|
|
834
|
-
static async getMany(limit =
|
|
827
|
+
static async getMany(limit = 100) {
|
|
835
828
|
return (await getDb().select().from(evalsTable).limit(limit).orderBy(desc(evalsTable.createdAt)).all()).map((e) => new Eval(e.config, {
|
|
836
829
|
id: e.id,
|
|
837
830
|
createdAt: new Date(e.createdAt),
|
|
@@ -846,7 +839,7 @@ var Eval = class Eval {
|
|
|
846
839
|
* @param offset - Number of evals to skip
|
|
847
840
|
* @param limit - Maximum number of evals to return
|
|
848
841
|
*/
|
|
849
|
-
static async getPaginated(offset = 0, limit =
|
|
842
|
+
static async getPaginated(offset = 0, limit = 100) {
|
|
850
843
|
return (await getDb().select().from(evalsTable).orderBy(desc(evalsTable.createdAt)).limit(limit).offset(offset).all()).map((e) => new Eval(e.config, {
|
|
851
844
|
id: e.id,
|
|
852
845
|
createdAt: new Date(e.createdAt),
|
|
@@ -892,7 +885,7 @@ var Eval = class Eval {
|
|
|
892
885
|
evalId,
|
|
893
886
|
promptId
|
|
894
887
|
}).onConflictDoNothing().run();
|
|
895
|
-
|
|
888
|
+
logger.debug(`Inserting prompt ${promptId}`);
|
|
896
889
|
}
|
|
897
890
|
if (opts?.results && opts.results.length > 0) {
|
|
898
891
|
const res = db.insert(evalResultsTable).values(opts.results?.map((r) => ({
|
|
@@ -900,7 +893,7 @@ var Eval = class Eval {
|
|
|
900
893
|
evalId,
|
|
901
894
|
id: crypto.randomUUID()
|
|
902
895
|
}))).run();
|
|
903
|
-
|
|
896
|
+
logger.debug(`Inserted ${res.changes} eval results`);
|
|
904
897
|
}
|
|
905
898
|
db.insert(datasetsTable).values({
|
|
906
899
|
id: datasetId,
|
|
@@ -910,7 +903,7 @@ var Eval = class Eval {
|
|
|
910
903
|
evalId,
|
|
911
904
|
datasetId
|
|
912
905
|
}).onConflictDoNothing().run();
|
|
913
|
-
|
|
906
|
+
logger.debug(`Inserting dataset ${datasetId}`);
|
|
914
907
|
if (config.tags) for (const [tagKey, tagValue] of Object.entries(config.tags)) {
|
|
915
908
|
const tagId = sha256(`${tagKey}:${tagValue}`);
|
|
916
909
|
db.insert(tagsTable).values({
|
|
@@ -922,7 +915,7 @@ var Eval = class Eval {
|
|
|
922
915
|
evalId,
|
|
923
916
|
tagId
|
|
924
917
|
}).onConflictDoNothing().run();
|
|
925
|
-
|
|
918
|
+
logger.debug(`Inserting tag ${tagId}`);
|
|
926
919
|
}
|
|
927
920
|
});
|
|
928
921
|
return new Eval(config, {
|
|
@@ -1103,7 +1096,7 @@ var Eval = class Eval {
|
|
|
1103
1096
|
if (type === "metric") {
|
|
1104
1097
|
const metricKey = field || value;
|
|
1105
1098
|
if (!metricKey) {
|
|
1106
|
-
|
|
1099
|
+
logger.warn("Invalid metric filter: missing field and value", { filter });
|
|
1107
1100
|
return;
|
|
1108
1101
|
}
|
|
1109
1102
|
const jsonPath = buildSafeJsonPath(metricKey);
|
|
@@ -1117,7 +1110,7 @@ var Eval = class Eval {
|
|
|
1117
1110
|
else if (operator === "lt") condition = sql`CAST(json_extract(named_scores, ${jsonPath}) AS REAL) < ${numericValue}`;
|
|
1118
1111
|
else if (operator === "lte") condition = sql`CAST(json_extract(named_scores, ${jsonPath}) AS REAL) <= ${numericValue}`;
|
|
1119
1112
|
} else {
|
|
1120
|
-
|
|
1113
|
+
logger.warn("Invalid numeric value in metric filter", {
|
|
1121
1114
|
metricKey,
|
|
1122
1115
|
value,
|
|
1123
1116
|
numericValue,
|
|
@@ -1195,7 +1188,7 @@ var Eval = class Eval {
|
|
|
1195
1188
|
const countStart = Date.now();
|
|
1196
1189
|
const countResult = await db.get(filteredCountQuery);
|
|
1197
1190
|
const countEnd = Date.now();
|
|
1198
|
-
|
|
1191
|
+
logger.debug(`Count query took ${countEnd - countStart}ms`);
|
|
1199
1192
|
const filteredCount = countResult?.count || 0;
|
|
1200
1193
|
const idxQuery = sql`
|
|
1201
1194
|
SELECT DISTINCT test_idx
|
|
@@ -1208,7 +1201,7 @@ var Eval = class Eval {
|
|
|
1208
1201
|
const idxStart = Date.now();
|
|
1209
1202
|
const rows = await db.all(idxQuery);
|
|
1210
1203
|
const idxEnd = Date.now();
|
|
1211
|
-
|
|
1204
|
+
logger.debug(`Index query took ${idxEnd - idxStart}ms`);
|
|
1212
1205
|
return {
|
|
1213
1206
|
testIndices: rows.map((row) => row.test_idx),
|
|
1214
1207
|
filteredCount
|
|
@@ -1244,7 +1237,7 @@ var Eval = class Eval {
|
|
|
1244
1237
|
const hasComplexFilters = opts.filters && opts.filters.length > 0;
|
|
1245
1238
|
let queryResult;
|
|
1246
1239
|
if (hasComplexFilters) {
|
|
1247
|
-
|
|
1240
|
+
logger.debug("Using original query for complex filters");
|
|
1248
1241
|
queryResult = await this.queryTestIndices({
|
|
1249
1242
|
offset: opts.offset,
|
|
1250
1243
|
limit: opts.limit,
|
|
@@ -1253,7 +1246,7 @@ var Eval = class Eval {
|
|
|
1253
1246
|
filters: opts.filters
|
|
1254
1247
|
});
|
|
1255
1248
|
} else {
|
|
1256
|
-
|
|
1249
|
+
logger.debug("Using optimized query for table page");
|
|
1257
1250
|
queryResult = await queryTestIndicesOptimized(this.id, {
|
|
1258
1251
|
offset: opts.offset,
|
|
1259
1252
|
limit: opts.limit,
|
|
@@ -1268,12 +1261,12 @@ var Eval = class Eval {
|
|
|
1268
1261
|
const varsStart = Date.now();
|
|
1269
1262
|
const vars = Array.from(this.vars);
|
|
1270
1263
|
const varsEnd = Date.now();
|
|
1271
|
-
|
|
1264
|
+
logger.debug(`Vars query took ${varsEnd - varsStart}ms`);
|
|
1272
1265
|
const body = [];
|
|
1273
1266
|
const bodyStart = Date.now();
|
|
1274
1267
|
if (testIndices.length === 0) {
|
|
1275
1268
|
const bodyEnd = Date.now();
|
|
1276
|
-
|
|
1269
|
+
logger.debug(`Body query took ${bodyEnd - bodyStart}ms`);
|
|
1277
1270
|
return {
|
|
1278
1271
|
head: {
|
|
1279
1272
|
prompts: this.prompts,
|
|
@@ -1305,7 +1298,7 @@ var Eval = class Eval {
|
|
|
1305
1298
|
if (results.length > 0) body.push(convertTestResultsToTableRow(results, vars));
|
|
1306
1299
|
}
|
|
1307
1300
|
const bodyEnd = Date.now();
|
|
1308
|
-
|
|
1301
|
+
logger.debug(`Body query took ${bodyEnd - bodyStart}ms`);
|
|
1309
1302
|
return {
|
|
1310
1303
|
head: {
|
|
1311
1304
|
prompts: this.prompts,
|
|
@@ -1418,7 +1411,7 @@ var Eval = class Eval {
|
|
|
1418
1411
|
})
|
|
1419
1412
|
}));
|
|
1420
1413
|
} catch (error) {
|
|
1421
|
-
|
|
1414
|
+
logger.debug(`Failed to fetch traces for eval ${this.id}: ${error}`);
|
|
1422
1415
|
return [];
|
|
1423
1416
|
}
|
|
1424
1417
|
}
|
|
@@ -1455,7 +1448,7 @@ var Eval = class Eval {
|
|
|
1455
1448
|
const newEvalId = createEvalId(/* @__PURE__ */ new Date());
|
|
1456
1449
|
const copyDescription = description || `${this.description || "Evaluation"} (Copy)`;
|
|
1457
1450
|
const testCount = distinctTestCount ?? await this.getResultsCount();
|
|
1458
|
-
|
|
1451
|
+
logger.info("Starting eval copy", {
|
|
1459
1452
|
sourceEvalId: this.id,
|
|
1460
1453
|
targetEvalId: newEvalId,
|
|
1461
1454
|
distinctTestCount: testCount
|
|
@@ -1518,7 +1511,7 @@ var Eval = class Eval {
|
|
|
1518
1511
|
db.insert(evalResultsTable).values(copiedResults).run();
|
|
1519
1512
|
copiedCount += batch.length;
|
|
1520
1513
|
offset += BATCH_SIZE;
|
|
1521
|
-
|
|
1514
|
+
logger.debug("Copied batch of eval results", {
|
|
1522
1515
|
sourceEvalId: this.id,
|
|
1523
1516
|
targetEvalId: newEvalId,
|
|
1524
1517
|
batchSize: batch.length,
|
|
@@ -1527,7 +1520,7 @@ var Eval = class Eval {
|
|
|
1527
1520
|
});
|
|
1528
1521
|
}
|
|
1529
1522
|
});
|
|
1530
|
-
|
|
1523
|
+
logger.info("Eval copy completed successfully", {
|
|
1531
1524
|
sourceEvalId: this.id,
|
|
1532
1525
|
targetEvalId: newEvalId,
|
|
1533
1526
|
rowsCopied: copiedCount,
|
|
@@ -1624,7 +1617,7 @@ async function getEvalSummaries(datasetId, type, includeProviders = false) {
|
|
|
1624
1617
|
};
|
|
1625
1618
|
});
|
|
1626
1619
|
}
|
|
1627
|
-
|
|
1628
1620
|
//#endregion
|
|
1629
1621
|
export { createEvalId as a, combineFilterConditions as i, EvalQueries as n, escapeJsonPathKey as o, buildSafeJsonPath as r, getEvalSummaries as s, Eval as t };
|
|
1630
|
-
|
|
1622
|
+
|
|
1623
|
+
//# sourceMappingURL=eval-Dg2nG4v2.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import "./logger-KkObSCzq.js";
|
|
3
|
+
import "./fetch-Dxpd4_sr.js";
|
|
4
|
+
import "./accounts-Bm2D8Db9.js";
|
|
5
|
+
import "./cloud-C0dlstV_.js";
|
|
6
|
+
import "./types-Cbd8uOMq.js";
|
|
7
|
+
import "./utils-f2-Moju7.js";
|
|
8
|
+
import "./blobs-CMHN0Qcz.js";
|
|
9
|
+
import "./tables-gftXzE9I.js";
|
|
10
|
+
import "./extractor-DReVID0K.js";
|
|
11
|
+
import { t as EvalResult } from "./evalResult-BBRNtX4I.js";
|
|
12
|
+
export { EvalResult as default };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
require("./logger-Cp1GPUjj.cjs");
|
|
2
|
+
require("./types-C_7nyzr1.cjs");
|
|
3
|
+
require("./fetch-BPkYtG8K.cjs");
|
|
4
|
+
require("./utils-Cz9qXqII.cjs");
|
|
5
|
+
require("./accounts-gtkH-5KX.cjs");
|
|
6
|
+
require("./blobs-D23XLin-.cjs");
|
|
7
|
+
require("./tables-DPi7wKeM.cjs");
|
|
8
|
+
require("./extractor-pYLLi3wS.cjs");
|
|
9
|
+
const require_evalResult = require("./evalResult-fuaI8HkH.cjs");
|
|
10
|
+
exports.default = require_evalResult.EvalResult;
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { b as getEnvBool, g as safeJsonStringify, o as
|
|
3
|
-
import { c as getCurrentTimestamp } from "./fetch-
|
|
4
|
-
import { f as isResultFailureReason, m as isProviderOptions, p as isApiProvider, s as ResultFailureReason } from "./types-
|
|
5
|
-
import { t as hashPrompt } from "./utils-
|
|
6
|
-
import { a as evalResultsTable, g as getDb } from "./tables-
|
|
7
|
-
import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-
|
|
2
|
+
import { b as getEnvBool, g as safeJsonStringify, o as logger } from "./logger-KkObSCzq.js";
|
|
3
|
+
import { c as getCurrentTimestamp } from "./fetch-Dxpd4_sr.js";
|
|
4
|
+
import { f as isResultFailureReason, m as isProviderOptions, p as isApiProvider, s as ResultFailureReason } from "./types-Cbd8uOMq.js";
|
|
5
|
+
import { t as hashPrompt } from "./utils-f2-Moju7.js";
|
|
6
|
+
import { a as evalResultsTable, g as getDb } from "./tables-gftXzE9I.js";
|
|
7
|
+
import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-DReVID0K.js";
|
|
8
8
|
import { and, eq, gte, inArray, lt, ne } from "drizzle-orm";
|
|
9
|
-
|
|
10
9
|
//#region src/models/evalResult.ts
|
|
11
10
|
function sanitizeProvider(provider) {
|
|
12
11
|
try {
|
|
@@ -45,7 +44,7 @@ function sanitizeForDb(obj) {
|
|
|
45
44
|
try {
|
|
46
45
|
const serialized = safeJsonStringify(obj);
|
|
47
46
|
if (serialized === void 0) {
|
|
48
|
-
|
|
47
|
+
logger.debug("sanitizeForDb: Failed to serialize object, using fallback", {
|
|
49
48
|
valueType: typeof obj,
|
|
50
49
|
isArray: Array.isArray(obj)
|
|
51
50
|
});
|
|
@@ -53,7 +52,7 @@ function sanitizeForDb(obj) {
|
|
|
53
52
|
}
|
|
54
53
|
return JSON.parse(serialized);
|
|
55
54
|
} catch (error) {
|
|
56
|
-
|
|
55
|
+
logger.debug("sanitizeForDb: Parse error, using fallback", { error });
|
|
57
56
|
return Array.isArray(obj) ? [] : null;
|
|
58
57
|
}
|
|
59
58
|
}
|
|
@@ -286,7 +285,7 @@ var EvalResult = class EvalResult {
|
|
|
286
285
|
};
|
|
287
286
|
}
|
|
288
287
|
};
|
|
289
|
-
|
|
290
288
|
//#endregion
|
|
291
289
|
export { sanitizeProvider as n, EvalResult as t };
|
|
292
|
-
|
|
290
|
+
|
|
291
|
+
//# sourceMappingURL=evalResult-BBRNtX4I.js.map
|
|
@@ -1,12 +1,16 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
1
|
+
import { t as __exportAll } from "./chunk-DEq-mXcV.js";
|
|
2
|
+
import { _ as getEnvBool, i as logger, m as safeJsonStringify } from "./logger-CT3IKMKA.js";
|
|
3
|
+
import { A as isApiProvider, g as ResultFailureReason, j as isProviderOptions, k as isResultFailureReason } from "./types-DmyIJ-sR.js";
|
|
4
|
+
import { u as getCurrentTimestamp } from "./fetch-Cwxnd8zz.js";
|
|
5
|
+
import { t as hashPrompt } from "./utils-XiOAgly5.js";
|
|
6
|
+
import { i as evalResultsTable, p as getDb } from "./tables-6YKwjN9-.js";
|
|
7
|
+
import { n as isBlobStorageEnabled, t as extractAndStoreBinaryData } from "./extractor-C0EVHewb.js";
|
|
7
8
|
import { and, eq, gte, inArray, lt, ne } from "drizzle-orm";
|
|
8
|
-
|
|
9
9
|
//#region src/models/evalResult.ts
|
|
10
|
+
var evalResult_exports = /* @__PURE__ */ __exportAll({
|
|
11
|
+
default: () => EvalResult,
|
|
12
|
+
sanitizeProvider: () => sanitizeProvider
|
|
13
|
+
});
|
|
10
14
|
function sanitizeProvider(provider) {
|
|
11
15
|
try {
|
|
12
16
|
if (isApiProvider(provider)) return {
|
|
@@ -44,7 +48,7 @@ function sanitizeForDb(obj) {
|
|
|
44
48
|
try {
|
|
45
49
|
const serialized = safeJsonStringify(obj);
|
|
46
50
|
if (serialized === void 0) {
|
|
47
|
-
|
|
51
|
+
logger.debug("sanitizeForDb: Failed to serialize object, using fallback", {
|
|
48
52
|
valueType: typeof obj,
|
|
49
53
|
isArray: Array.isArray(obj)
|
|
50
54
|
});
|
|
@@ -52,7 +56,7 @@ function sanitizeForDb(obj) {
|
|
|
52
56
|
}
|
|
53
57
|
return JSON.parse(serialized);
|
|
54
58
|
} catch (error) {
|
|
55
|
-
|
|
59
|
+
logger.debug("sanitizeForDb: Parse error, using fallback", { error });
|
|
56
60
|
return Array.isArray(obj) ? [] : null;
|
|
57
61
|
}
|
|
58
62
|
}
|
|
@@ -285,7 +289,7 @@ var EvalResult = class EvalResult {
|
|
|
285
289
|
};
|
|
286
290
|
}
|
|
287
291
|
};
|
|
288
|
-
|
|
289
292
|
//#endregion
|
|
290
|
-
export {
|
|
291
|
-
|
|
293
|
+
export { evalResult_exports as n, EvalResult as t };
|
|
294
|
+
|
|
295
|
+
//# sourceMappingURL=evalResult-BDMqrapS.js.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import "./logger-DLcq4dWf.js";
|
|
2
|
+
import "./fetch-HaqdX7U1.js";
|
|
3
|
+
import "./accounts-B0pgC1oV.js";
|
|
4
|
+
import "./tables-DLJPUdUE.js";
|
|
5
|
+
import "./types-CzW2QFyi.js";
|
|
6
|
+
import "./utils-dLokC-eR.js";
|
|
7
|
+
import "./blobs-BDbfYdrJ.js";
|
|
8
|
+
import "./extractor-D25qpmGX.js";
|
|
9
|
+
import { t as EvalResult } from "./server/index.js";
|
|
10
|
+
export { EvalResult as default };
|
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
const require_logger = require(
|
|
2
|
-
const require_types = require(
|
|
3
|
-
const require_fetch = require(
|
|
4
|
-
const require_utils = require(
|
|
5
|
-
const require_tables = require(
|
|
6
|
-
const require_extractor = require(
|
|
1
|
+
const require_logger = require("./logger-Cp1GPUjj.cjs");
|
|
2
|
+
const require_types = require("./types-C_7nyzr1.cjs");
|
|
3
|
+
const require_fetch = require("./fetch-BPkYtG8K.cjs");
|
|
4
|
+
const require_utils = require("./utils-Cz9qXqII.cjs");
|
|
5
|
+
const require_tables = require("./tables-DPi7wKeM.cjs");
|
|
6
|
+
const require_extractor = require("./extractor-pYLLi3wS.cjs");
|
|
7
7
|
let drizzle_orm = require("drizzle-orm");
|
|
8
|
-
|
|
9
8
|
//#region src/models/evalResult.ts
|
|
10
9
|
function sanitizeProvider(provider) {
|
|
11
10
|
try {
|
|
@@ -44,7 +43,7 @@ function sanitizeForDb(obj) {
|
|
|
44
43
|
try {
|
|
45
44
|
const serialized = require_logger.safeJsonStringify(obj);
|
|
46
45
|
if (serialized === void 0) {
|
|
47
|
-
require_logger.
|
|
46
|
+
require_logger.logger.debug("sanitizeForDb: Failed to serialize object, using fallback", {
|
|
48
47
|
valueType: typeof obj,
|
|
49
48
|
isArray: Array.isArray(obj)
|
|
50
49
|
});
|
|
@@ -52,7 +51,7 @@ function sanitizeForDb(obj) {
|
|
|
52
51
|
}
|
|
53
52
|
return JSON.parse(serialized);
|
|
54
53
|
} catch (error) {
|
|
55
|
-
require_logger.
|
|
54
|
+
require_logger.logger.debug("sanitizeForDb: Parse error, using fallback", { error });
|
|
56
55
|
return Array.isArray(obj) ? [] : null;
|
|
57
56
|
}
|
|
58
57
|
}
|
|
@@ -285,18 +284,18 @@ var EvalResult = class EvalResult {
|
|
|
285
284
|
};
|
|
286
285
|
}
|
|
287
286
|
};
|
|
288
|
-
|
|
289
287
|
//#endregion
|
|
290
|
-
Object.defineProperty(exports,
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
288
|
+
Object.defineProperty(exports, "EvalResult", {
|
|
289
|
+
enumerable: true,
|
|
290
|
+
get: function() {
|
|
291
|
+
return EvalResult;
|
|
292
|
+
}
|
|
295
293
|
});
|
|
296
|
-
Object.defineProperty(exports,
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
294
|
+
Object.defineProperty(exports, "sanitizeProvider", {
|
|
295
|
+
enumerable: true,
|
|
296
|
+
get: function() {
|
|
297
|
+
return sanitizeProvider;
|
|
298
|
+
}
|
|
301
299
|
});
|
|
302
|
-
|
|
300
|
+
|
|
301
|
+
//# sourceMappingURL=evalResult-fuaI8HkH.cjs.map
|