promptfoo 0.121.5 → 0.121.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/{ListApp-BRUsT43Y.js → ListApp-DLmM02JS.js} +1 -1
- package/dist/src/{accounts-BIFntVWB.cjs → accounts-BVz5gHLK.cjs} +16 -10
- package/dist/src/{accounts-bnyHT7Ju.js → accounts-BWjqwsrf.js} +12 -7
- package/dist/src/{accounts-CLJHCDDb.js → accounts-D6IBfEE0.js} +13 -8
- package/dist/src/{accounts-CaLNYnf7.js → accounts-DAv_0iE7.js} +12 -7
- package/dist/src/{agentic-utils-B5krlibj.js → agentic-utils-BJKAkz2e.js} +2 -2
- package/dist/src/{agentic-utils-D2x0wGhB.cjs → agentic-utils-C-A92xhn.cjs} +8 -7
- package/dist/src/{agentic-utils-Ba67xmgs.js → agentic-utils-DmoS_S4B.js} +2 -2
- package/dist/src/{agentic-utils-BclbiXiq.js → agentic-utils-GdToujHu.js} +2 -2
- package/dist/src/{agents-BGqaTDnr.js → agents-2C8NN6I1.js} +20 -9
- package/dist/src/{agents-n6vPqV3i.js → agents-BMAiSR2o.js} +4 -4
- package/dist/src/{agents-BYdMl1UE.js → agents-C-PGaxwj.js} +20 -9
- package/dist/src/{agents-BV9yFpXX.js → agents-C98cz5pl.js} +20 -9
- package/dist/src/{agents-WULPVjbH.cjs → agents-DB8Ub2Ld.cjs} +7 -6
- package/dist/src/{agents-DhxWMCtH.js → agents-Dwshy2H8.js} +4 -4
- package/dist/src/{agents-emVcx3yh.js → agents-mlKjx-cK.js} +2 -2
- package/dist/src/{agents-DiWmQYH9.cjs → agents-n2ej-c4H.cjs} +21 -10
- package/dist/src/{aimlapi-DR4pgeiC.js → aimlapi-Bi-laUlp.js} +6 -6
- package/dist/src/{aimlapi-BzLjZI_m.cjs → aimlapi-D0OFV4Vj.cjs} +7 -7
- package/dist/src/{aimlapi-BxqK9HF_.js → aimlapi-WyUK0wYy.js} +6 -6
- package/dist/src/{aimlapi-uPGp0Zdo.js → aimlapi-v-63ZjEI.js} +6 -6
- package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
- package/dist/src/app/assets/Report-sCXUfaWo.js +1 -0
- package/dist/src/app/assets/index-BopgkZEh.js +388 -0
- package/dist/src/app/assets/index-yhM8y1PP.css +1 -0
- package/dist/src/app/assets/{scroll-timeline-D9IT_e8Z.js → scroll-timeline-RpeTwOvs.js} +1 -1
- package/dist/src/app/assets/sync-5gq6fmG4.js +4 -0
- package/dist/src/app/assets/vendor-charts-BL9OMNU7.js +36 -0
- package/dist/src/app/assets/{vendor-markdown-Ch00wnNI.js → vendor-markdown-BYsQqn7Z.js} +10 -10
- package/dist/src/app/assets/{vendor-react-CVvmk1UB.js → vendor-react-CqWgVW6T.js} +2 -2
- package/dist/src/app/assets/{vendor-utils-BnEYbx2Q.js → vendor-utils-BHPO71pu.js} +1 -1
- package/dist/src/app/index.html +6 -6
- package/dist/src/{audio-CScmnmEB.js → audio-DnEIHWZR.js} +3 -3
- package/dist/src/{audio-Da8U9IS5.js → audio-heR0mu0n.js} +3 -3
- package/dist/src/{audio-BvpTOArF.js → audio-wafFO1wn.js} +3 -3
- package/dist/src/{audio-C0vDeS0j.cjs → audio-x44tsxIo.cjs} +4 -4
- package/dist/src/authoritativeMarkupInjection-2G3Be6oL.cjs +74 -0
- package/dist/src/authoritativeMarkupInjection-81PPynHR.js +75 -0
- package/dist/src/authoritativeMarkupInjection-D9O70HPi.js +74 -0
- package/dist/src/authoritativeMarkupInjection-PxSf3Rh8.js +74 -0
- package/dist/src/{base-dYsl2hmL.js → base-0Gzzue9Z.js} +4 -3
- package/dist/src/{base-BOMaNEes.js → base-Bp4c52YZ.js} +4 -3
- package/dist/src/{base-BTux96b1.js → base-BuNn-YBX.js} +4 -3
- package/dist/src/{base-Tw6uhH8K.cjs → base-BzdS8tod.cjs} +5 -4
- package/dist/src/bestOfN-BHiOjeaq.js +136 -0
- package/dist/src/bestOfN-CdkNPPbX.js +136 -0
- package/dist/src/bestOfN-ClUSOhN0.js +137 -0
- package/dist/src/bestOfN-DWwXEg1h.cjs +140 -0
- package/dist/src/{blobs-B95F_7vE.cjs → blobs-B0977K1O.cjs} +7 -6
- package/dist/src/{blobs-D_gg8nbm.js → blobs-CeFdPn_T.js} +2 -2
- package/dist/src/{blobs-DjLby-uP.js → blobs-DODuTK-a.js} +2 -2
- package/dist/src/{blobs-BW4U31ue.js → blobs-Dwef1Ao1.js} +2 -2
- package/dist/src/cache-BKYi3kAA.js +3 -0
- package/dist/src/{cache-BI5BY7ey.js → cache-BR77mdIR.js} +127 -11
- package/dist/src/{cache-Cr-qWIbP.js → cache-CrioYnaa.js} +125 -10
- package/dist/src/{cache-Bzttsk0X.js → cache-DdriHsNX.js} +125 -10
- package/dist/src/{cache-DGg-yTZG.cjs → cache-h5MWOBZI.cjs} +135 -25
- package/dist/src/{chat-Cx_LkwvZ.js → chat-BBEnnpQk.js} +11 -11
- package/dist/src/{chat-DChSH_Es.js → chat-BSos6PvZ.js} +9 -9
- package/dist/src/{chat-aMQZw6R7.js → chat-Bnt7ieO0.js} +4 -4
- package/dist/src/{chat-vYqqv1gP.cjs → chat-DSyYuTYT.cjs} +14 -13
- package/dist/src/{chat-BLOdH60v.js → chat-DTdf-J5Q.js} +11 -11
- package/dist/src/{chat-DH97tVV9.cjs → chat-Dq3DomYU.cjs} +6 -6
- package/dist/src/{chat-DG2LkwLq.js → chat-g5QLeLOo.js} +4 -4
- package/dist/src/{chat-D9nudO9b.js → chat-mTTuUAYb.js} +4 -4
- package/dist/src/{chatkit-B8X34dQc.js → chatkit-B6DWi70Q.js} +3 -3
- package/dist/src/{chatkit-D44VyUyB.cjs → chatkit-BYveR48_.cjs} +6 -5
- package/dist/src/{chatkit-BXu42Qwt.js → chatkit-fARZwEfV.js} +3 -3
- package/dist/src/{chatkit-CbMRoeYw.js → chatkit-lb6FK02w.js} +1 -1
- package/dist/src/{claude-agent-sdk-DYv_AJ8u.js → claude-agent-sdk-BGUac_kS.js} +213 -66
- package/dist/src/{claude-agent-sdk-BjriSVRZ.js → claude-agent-sdk-BP__YGfK.js} +213 -66
- package/dist/src/{claude-agent-sdk-BzNZeZ0N.js → claude-agent-sdk-Ddgx5BIQ.js} +212 -65
- package/dist/src/{claude-agent-sdk-BRq0bbIK.cjs → claude-agent-sdk-n2XcEclh.cjs} +219 -71
- package/dist/src/cloud-DiWbUiVP.js +3 -0
- package/dist/src/{cloud-Da0bofJd.js → cloud-DqF5N1aJ.js} +2 -2
- package/dist/src/{cloudflare-ai-CXC4b1EU.js → cloudflare-ai-BGamMotN.js} +4 -4
- package/dist/src/{cloudflare-ai-DGOwgexC.js → cloudflare-ai-BwW8W-w7.js} +4 -4
- package/dist/src/{cloudflare-ai-DJv5qnyb.cjs → cloudflare-ai-DCRGnsyL.cjs} +6 -6
- package/dist/src/{cloudflare-ai-CyBoIs1Q.js → cloudflare-ai-sD26nP6V.js} +4 -4
- package/dist/src/{cloudflare-gateway-TJkVrZlB.js → cloudflare-gateway-2lnOT4qM.js} +3 -3
- package/dist/src/{cloudflare-gateway-D-dnkzCF.js → cloudflare-gateway-B1-8KNCt.js} +3 -3
- package/dist/src/{cloudflare-gateway-DKVjkDav.cjs → cloudflare-gateway-CCC1DFrC.cjs} +5 -5
- package/dist/src/{cloudflare-gateway-1sAoOyft.js → cloudflare-gateway-DOghiU6r.js} +3 -3
- package/dist/src/{codex-app-server-CCLjqCh9.js → codex-app-server-BWeWamEb.js} +19 -4
- package/dist/src/{codex-app-server-CCe0TiDc.js → codex-app-server-DyQB1P0p.js} +20 -5
- package/dist/src/{codex-app-server-VMRnjZ68.cjs → codex-app-server-R9u_G7W9.cjs} +27 -11
- package/dist/src/{codex-app-server-CPW1LFwh.js → codex-app-server-ZQRJSJjU.js} +20 -5
- package/dist/src/{codex-sdk-Bzb_TqX9.js → codex-sdk-B62H0fe7.js} +16 -5
- package/dist/src/{codex-sdk-Bd8UbO9q.cjs → codex-sdk-BRPUvJG8.cjs} +31 -12
- package/dist/src/{codex-sdk-BgEFQ70r.js → codex-sdk-Dio1zJBS.js} +18 -7
- package/dist/src/codex-sdk-DxukZs_K.js +3 -0
- package/dist/src/{codex-sdk-DfvDTN33.js → codex-sdk-iEmW1eS2.js} +18 -7
- package/dist/src/{cometapi-B5ImDlSm.js → cometapi--hh7dESS.js} +7 -7
- package/dist/src/{cometapi-CC7hWxmX.js → cometapi-C7yWNGqt.js} +7 -7
- package/dist/src/{cometapi-CCbpHkuF.js → cometapi-CSIi16a0.js} +7 -7
- package/dist/src/{cometapi-BgAkuYCw.cjs → cometapi-sZgBZtbU.cjs} +8 -8
- package/dist/src/{completion-DtQ72Bm3.cjs → completion-CWtqdn3z.cjs} +6 -6
- package/dist/src/{completion-Vq_ad618.js → completion-DT8cxo9T.js} +4 -4
- package/dist/src/{completion-2iuYVxwi.js → completion-DUScduXp.js} +5 -5
- package/dist/src/{completion-CrD6MQ93.js → completion-D_2IOAoS.js} +5 -5
- package/dist/src/constants-BjJV0cRr.js +6 -0
- package/dist/src/constants-DH5XYLKZ.js +7 -0
- package/dist/src/constants-DZGEFLsu.js +6 -0
- package/dist/src/constants-a2kYssQk.cjs +11 -0
- package/dist/src/{createHash-4gFQpDDv.js → createHash-BtbSX3mj.js} +1 -1
- package/dist/src/{createHash-Un4Q_huE.js → createHash-CGVzWdjj.js} +1 -1
- package/dist/src/{createHash-VvBIc-AW.cjs → createHash-CSiqnK5P.cjs} +2 -2
- package/dist/src/{createHash-DPpsZgFF.js → createHash-CgRvs4Fn.js} +1 -1
- package/dist/src/crescendo-B41TwUHM.js +722 -0
- package/dist/src/crescendo-Bfic7KC4.js +724 -0
- package/dist/src/crescendo-DXFB7rHP.cjs +725 -0
- package/dist/src/crescendo-DvvYxMLA.js +723 -0
- package/dist/src/custom-BdzuqFTN.js +619 -0
- package/dist/src/custom-Cf5Q6r-P.js +618 -0
- package/dist/src/custom-DLgufezC.js +620 -0
- package/dist/src/custom-LEXHCRe3.cjs +621 -0
- package/dist/src/{docker--3qzPa-6.js → docker-BOSO_6hK.js} +5 -5
- package/dist/src/{docker-D3AY-5F5.cjs → docker-D0h2vFrc.cjs} +6 -6
- package/dist/src/{docker-DCsCDvwM.js → docker-D2TWGyTP.js} +5 -5
- package/dist/src/{docker-Dorv4_Dg.js → docker-Vj_4_cPg.js} +5 -5
- package/dist/src/{embedding-BXhN5lCH.cjs → embedding-BQIApR18.cjs} +6 -6
- package/dist/src/{embedding-ChS1ivFS.js → embedding-CStK0TV6.js} +5 -5
- package/dist/src/{embedding-DNRvZwRN.js → embedding-CU78FMnw.js} +5 -5
- package/dist/src/{embedding-D_bI4NDq.js → embedding-CV8lmCnU.js} +4 -4
- package/dist/src/{errors-DFHe4L-n.js → errors-9PcUL8BC.js} +1 -1
- package/dist/src/{esm-B_rGuPTo.cjs → esm-BIKakvNa.cjs} +8 -7
- package/dist/src/{esm-BRkfNsYs.js → esm-BTK1W7lG.js} +1 -1
- package/dist/src/{esm-BX8fwlAO.js → esm-Bexx2PFc.js} +1 -1
- package/dist/src/{eval-DJ_4A-tr.js → eval-B3BaNBbO.js} +19 -19
- package/dist/src/{eval-BQPLBJbw.js → eval-DfR9885C.js} +1 -1
- package/dist/src/evalResult-BtZSUgQv.js +3 -0
- package/dist/src/{evalResult-D6P5I5il.js → evalResult-CcSqNl_Y.js} +17 -10
- package/dist/src/{evalResult-Cx-8OWkb.cjs → evalResult-DHXs-9TL.cjs} +29 -11
- package/dist/src/{evalResult-pSvGWFMo.js → evalResult-PGqEbasb.js} +18 -11
- package/dist/src/evaluator-B9LGbKI8.js +3 -0
- package/dist/src/{evaluator-D-UIbbYq.js → evaluator-n_dEb00o.js} +258 -132
- package/dist/src/evaluatorHelpers-CrRObe2z.js +510 -0
- package/dist/src/evaluatorHelpers-D06I9WFL.cjs +537 -0
- package/dist/src/evaluatorHelpers-D1_kwvyp.js +511 -0
- package/dist/src/{extractor-DxyiFhPk.js → extractor-BVkZtk4R.js} +5 -5
- package/dist/src/{extractor-BM3jRERL.js → extractor-D3Fv_Tdh.js} +5 -5
- package/dist/src/{extractor-YlZbUMsL.js → extractor-Jp53vs-6.js} +5 -5
- package/dist/src/{extractor-Dxr2J_wK.cjs → extractor-bV_NOoaz.cjs} +6 -6
- package/dist/src/{fetch-Y5qX_kST.js → fetch-BSSAcMxf.js} +90 -26
- package/dist/src/fetch-BodQTrMU.js +3 -0
- package/dist/src/{fetch-NuqXW1Xb.cjs → fetch-Cfm4EuOB.cjs} +115 -32
- package/dist/src/{fetch-B6ch2nU2.js → fetch-Cpf1U1nO.js} +86 -26
- package/dist/src/{fetch-D9xxyC1p.js → fetch-Doks14zQ.js} +90 -26
- package/dist/src/{fileExtensions-D9h-8Wxg.cjs → fileExtensions-BhdwzYaD.cjs} +24 -1
- package/dist/src/{fileExtensions-BGh-W-HT.js → fileExtensions-CXRfY3Ss.js} +12 -2
- package/dist/src/{fileExtensions-DysCsxNG.js → fileExtensions-D4GCJ67J.js} +12 -2
- package/dist/src/{formatDuration-Ch4A7G3o.js → formatDuration-CMVNrYvE.js} +1 -1
- package/dist/src/{genaiTracer-BokHC-MW.cjs → genaiTracer-14nugQQx.cjs} +14 -2
- package/dist/src/{genaiTracer-C3ZPQU60.js → genaiTracer-BPVvltoW.js} +2 -2
- package/dist/src/{genaiTracer-DxODqT9e.js → genaiTracer-D18lYzhB.js} +2 -2
- package/dist/src/{genaiTracer-CFny3gOy.js → genaiTracer-jJKYsnjc.js} +2 -2
- package/dist/src/goat-Ba7Gznzq.js +467 -0
- package/dist/src/goat-CJngS-WU.js +468 -0
- package/dist/src/goat-CwsbfQeu.js +466 -0
- package/dist/src/goat-DOMbozoX.cjs +470 -0
- package/dist/src/graders-B3D7kCcD.js +3 -0
- package/dist/src/{graders-CgPn32yp.js → graders-BQt1BaQe.js} +1019 -86
- package/dist/src/{graders-Bw1wk_21.cjs → graders-CBQ2s6gz.cjs} +1087 -130
- package/dist/src/{graders-CwrbifOo.js → graders-DaRU98zs.js} +1022 -84
- package/dist/src/{graders-BoUqsCEm.js → graders-KXzjnIim.js} +1024 -86
- package/dist/src/hydra-BU6GjYoQ.js +560 -0
- package/dist/src/hydra-BmXHxlyl.js +559 -0
- package/dist/src/hydra-CR0KyYDb.js +561 -0
- package/dist/src/hydra-RO9jBks7.cjs +560 -0
- package/dist/src/{image-BeWaInPF.js → image-BBmZdKO3.js} +3 -3
- package/dist/src/{image-qjO6FWPs.js → image-B_PFG7IG.js} +3 -3
- package/dist/src/image-BtODOZjh.js +442 -0
- package/dist/src/image-CYuNJIqd.js +442 -0
- package/dist/src/{image-D10dNAav.cjs → image-E00WFZkq.cjs} +4 -4
- package/dist/src/image-HK2Cfqb1.cjs +465 -0
- package/dist/src/{image-Dr_3I3nK.js → image-LGj8dTcr.js} +3 -3
- package/dist/src/image-YMKejC0r.js +443 -0
- package/dist/src/index.cjs +711 -339
- package/dist/src/index.d.cts +3159 -1611
- package/dist/src/index.d.ts +3158 -1610
- package/dist/src/index.js +605 -249
- package/dist/src/indirectWebPwn-B80dLlFC.js +260 -0
- package/dist/src/indirectWebPwn-BMTXXznx.js +386 -0
- package/dist/src/indirectWebPwn-BZFPV7Q9.js +385 -0
- package/dist/src/indirectWebPwn-BaEQEOIO.cjs +260 -0
- package/dist/src/indirectWebPwn-BzpyMnFS.js +259 -0
- package/dist/src/indirectWebPwn-CiWB-vVH.js +385 -0
- package/dist/src/indirectWebPwn-D7NA9Nsv.cjs +397 -0
- package/dist/src/indirectWebPwn-o_bEFMjP.js +259 -0
- package/dist/src/inputVariables-B0qUChbV.js +467 -0
- package/dist/src/inputVariables-DUGMb9Ka.js +464 -0
- package/dist/src/inputVariables-DXFdi7AI.js +468 -0
- package/dist/src/inputVariables-Dq9W-Z3a.cjs +475 -0
- package/dist/src/{interactiveCheck-CCICw2cy.js → interactiveCheck-C4QlIuoR.js} +1 -1
- package/dist/src/{invariant-kfQ8Bu82.cjs → invariant-QtnLD03y.cjs} +1 -1
- package/dist/src/iterative-B-l0syBV.js +501 -0
- package/dist/src/iterative-BACUeCCz.cjs +503 -0
- package/dist/src/iterative-CVwoExo8.js +502 -0
- package/dist/src/iterative-QDrGSyss.js +500 -0
- package/dist/src/iterativeImage-BQg2OwA6.js +413 -0
- package/dist/src/iterativeImage-Bwn0fM75.js +413 -0
- package/dist/src/iterativeImage-CcgVyASo.cjs +415 -0
- package/dist/src/iterativeImage-D_UbQXg4.js +414 -0
- package/dist/src/iterativeMeta-CIu-CHRS.js +405 -0
- package/dist/src/iterativeMeta-CN8CNjFA.js +406 -0
- package/dist/src/iterativeMeta-DN6BTjpq.js +404 -0
- package/dist/src/iterativeMeta-DlqY3BsS.cjs +405 -0
- package/dist/src/iterativeTree-CJ8a8G9T.js +820 -0
- package/dist/src/iterativeTree-DcuFXnjL.js +819 -0
- package/dist/src/iterativeTree-FrDDYAN0.js +818 -0
- package/dist/src/iterativeTree-LOgAi0nU.cjs +821 -0
- package/dist/src/{knowledgeBase-C8qOo26M.cjs → knowledgeBase-CdbcGBZF.cjs} +25 -11
- package/dist/src/{knowledgeBase-BBETc5-S.js → knowledgeBase-CjOXp6Lr.js} +23 -9
- package/dist/src/{knowledgeBase-Dr3Kib7F.js → knowledgeBase-DxAq4n4z.js} +24 -10
- package/dist/src/{knowledgeBase-CzAi2rUI.js → knowledgeBase-o_wTLzrt.js} +24 -10
- package/dist/src/{litellm-DRc4qWfc.js → litellm-B2gHwya_.js} +4 -4
- package/dist/src/{litellm-CaUmV7Mk.cjs → litellm-CPpdlO7n.cjs} +5 -5
- package/dist/src/{litellm-BLSiANhk.js → litellm-CYfgxLrM.js} +4 -4
- package/dist/src/{litellm-DQGo_juI.js → litellm-ojWBKU3C.js} +4 -4
- package/dist/src/{logger-COuQb2xB.cjs → logger-cfNpzI4o.cjs} +13 -55
- package/dist/src/{luma-ray-if-Ml4R9.cjs → luma-ray-BCCO9XXG.cjs} +9 -8
- package/dist/src/{luma-ray-B-tNZzqW.js → luma-ray-BPrdihAb.js} +5 -5
- package/dist/src/{luma-ray-CtS3OlGq.js → luma-ray-DP5N79lB.js} +5 -5
- package/dist/src/{luma-ray-PJJgUjOc.js → luma-ray-DTDyoAlM.js} +5 -5
- package/dist/src/main.js +727 -234
- package/dist/src/memoryPoisoning-B6N3us35.cjs +106 -0
- package/dist/src/memoryPoisoning-Bc_BK_k2.js +106 -0
- package/dist/src/memoryPoisoning-D375zwSX.js +107 -0
- package/dist/src/memoryPoisoning-DJA0YjJT.js +106 -0
- package/dist/src/{messages-CewuNcNS.js → messages-Bg29Nbit.js} +17 -9
- package/dist/src/{messages-BnsVHUnm.cjs → messages-BrZEnHsV.cjs} +24 -15
- package/dist/src/{messages-CI69Lasb.js → messages-CBulRaud.js} +18 -10
- package/dist/src/{messages-B9dSjrNf.js → messages-K9A8RxBM.js} +18 -10
- package/dist/src/{meteor-CeGo0Lu2.js → meteor--TZYICTI.js} +1 -1
- package/dist/src/{meteor-BBGcGeCa.cjs → meteor-CR226f7Z.cjs} +2 -2
- package/dist/src/{meteor-Wc_aUVvu.js → meteor-Cl_yd7rJ.js} +1 -1
- package/dist/src/{meteor-BKTM-7KS.js → meteor-Dce-_zGQ.js} +1 -1
- package/dist/src/mischievousUser-BDLwSGN0.js +46 -0
- package/dist/src/mischievousUser-Blx-OtT3.js +47 -0
- package/dist/src/mischievousUser-CHuTTvBg.js +46 -0
- package/dist/src/mischievousUser-CSUrH3fq.cjs +46 -0
- package/dist/src/{modelslab-zpz9JcK0.js → modelslab-BjEk7yCP.js} +6 -6
- package/dist/src/{modelslab-BCLOtfek.js → modelslab-CBCrdIBc.js} +6 -6
- package/dist/src/{modelslab-D73OnKSx.js → modelslab-CrGpXuhv.js} +6 -6
- package/dist/src/{modelslab-BkapYJhh.cjs → modelslab-Cvh0klQx.cjs} +7 -7
- package/dist/src/{nova-reel-B8F_TK5w.js → nova-reel-CUDDZcSA.js} +5 -5
- package/dist/src/{nova-reel-Bx0NFV2f.js → nova-reel-CyDESu5J.js} +5 -5
- package/dist/src/{nova-reel-DkT7tnoB.cjs → nova-reel-DH_Ksu6X.cjs} +9 -8
- package/dist/src/{nova-reel-CNGJTLtG.js → nova-reel-YQAqUYJY.js} +5 -5
- package/dist/src/{nova-sonic-BeTRaFOh.js → nova-sonic-BVFFB7JZ.js} +2 -2
- package/dist/src/{nova-sonic-YT426juD.cjs → nova-sonic-Bg3cxMMu.cjs} +5 -4
- package/dist/src/{nova-sonic-CL7Zqv0G.js → nova-sonic-BhmnRGyh.js} +3 -3
- package/dist/src/{nova-sonic-BaXRN1cr.js → nova-sonic-DxmK95c1.js} +3 -3
- package/dist/src/{openai-Cy1XLs0c.cjs → openai-C3uXv8wS.cjs} +2 -2
- package/dist/src/{openai-BT-JvDse.js → openai-CJrsh9n4.js} +1 -1
- package/dist/src/{openai-D4fxGvRx.js → openai-zgwBb4Ff.js} +1 -1
- package/dist/src/{openclaw-DUBZP3GL.cjs → openclaw-CHQaZi_-.cjs} +17 -15
- package/dist/src/{openclaw-DA8U4DsD.js → openclaw-CWOm2eOr.js} +9 -8
- package/dist/src/{openclaw-DObVgpjC.js → openclaw-Cdk9JKfY.js} +9 -8
- package/dist/src/{openclaw-Bq7RVR3k.js → openclaw-DhF8rUpI.js} +9 -8
- package/dist/src/{opencode-sdk-BB40Wir1.js → opencode-sdk-BRjiPV-g.js} +38 -3
- package/dist/src/{opencode-sdk-CeqiOcOU.cjs → opencode-sdk-CPFMw0ed.cjs} +45 -9
- package/dist/src/{opencode-sdk-BM1UAIv1.js → opencode-sdk-CaSOCsBA.js} +39 -4
- package/dist/src/{opencode-sdk-ChdK7F7z.js → opencode-sdk-CqkyG8De.js} +39 -4
- package/dist/src/{otlpReceiver-UYMQx3sy.js → otlpReceiver--gTpSagc.js} +119 -3
- package/dist/src/{otlpReceiver-C6thJRXi.js → otlpReceiver-B2eaKC8C.js} +118 -2
- package/dist/src/{otlpReceiver-CcdIikOu.js → otlpReceiver-BXjcRqAM.js} +119 -3
- package/dist/src/{otlpReceiver-DNSQj6bf.cjs → otlpReceiver-CvJdBGSc.cjs} +125 -7
- package/dist/src/packageParser--MWTSrPW.js +36 -0
- package/dist/src/packageParser-CgE-ziRo.js +35 -0
- package/dist/src/packageParser-QoCS1FMl.cjs +54 -0
- package/dist/src/packageParser-hwwSGnAZ.js +35 -0
- package/dist/src/processShim-BBxt7LKO.js +95 -0
- package/dist/src/processShim-BcGzU8fY.js +94 -0
- package/dist/src/processShim-C_z3aRvF.js +94 -0
- package/dist/src/processShim-DSY9BV2T.cjs +98 -0
- package/dist/src/promptLength-0qIHyhA5.js +71 -0
- package/dist/src/promptLength-4X-Wd8PG.js +72 -0
- package/dist/src/promptLength-B9nZEfO6.js +71 -0
- package/dist/src/promptLength-BbBbDHNj.cjs +94 -0
- package/dist/src/promptfoo-BU4_0J85.js +238 -0
- package/dist/src/promptfoo-CReYAtfb.js +237 -0
- package/dist/src/promptfoo-ClChwT74.cjs +292 -0
- package/dist/src/promptfoo-CqEpj6Sr.js +237 -0
- package/dist/src/prompts-BYMtqPCw.js +259 -0
- package/dist/src/prompts-C-bqE1Yp.js +260 -0
- package/dist/src/prompts-Cp_Qx5Ml.js +270 -0
- package/dist/src/prompts-DHhQsANy.js +259 -0
- package/dist/src/prompts-D_QpZ2Dm.js +271 -0
- package/dist/src/prompts-hNvWBD3z.cjs +284 -0
- package/dist/src/prompts-huDVH2CI.js +270 -0
- package/dist/src/prompts-p78Hul5i.cjs +289 -0
- package/dist/src/{providerRegistry-BESeALrr.cjs → providerRegistry-CZO_w7ue.cjs} +2 -2
- package/dist/src/{providerRegistry-DoACwqhD.js → providerRegistry-DHcFiVWX.js} +1 -1
- package/dist/src/{providerRegistry-PMsleEzs.js → providerRegistry-ReCd0sFa.js} +1 -1
- package/dist/src/{providers-DT-GtF2t.js → providers-B7TyByfj.js} +789 -11925
- package/dist/src/{providers-CJh7iriU.js → providers-BGc7tDtQ.js} +796 -11872
- package/dist/src/{providers-Ctcc592x.js → providers-CSOp-bCm.js} +1 -1
- package/dist/src/{providers-eDShy16E.cjs → providers-V6RBuieY.cjs} +837 -12138
- package/dist/src/{providers-DRrerKra.js → providers-iH3Sw1yo.js} +851 -11938
- package/dist/src/{pythonUtils-C4tltmIn.js → pythonUtils-CLCgQ9tt.js} +1 -1
- package/dist/src/{pythonUtils-DNqbnRdx.js → pythonUtils-CgYxeSmO.js} +2 -2
- package/dist/src/{pythonUtils-CoLaCwNY.cjs → pythonUtils-Cokhluq3.cjs} +7 -6
- package/dist/src/{pythonUtils-DMO68Jg7.js → pythonUtils-D0BYebvX.js} +2 -2
- package/dist/src/{quiverai-CPKhWgaT.js → quiverai-58BtRRet.js} +3 -3
- package/dist/src/{quiverai-Bk1KrvL6.js → quiverai-C1dYu5MW.js} +3 -3
- package/dist/src/{quiverai-BSS9a7wV.js → quiverai-CNMIpZQg.js} +3 -3
- package/dist/src/{quiverai-Bpx6MZ7T.cjs → quiverai-fmSfU43k.cjs} +4 -4
- package/dist/src/registry-6Jw6ebor.cjs +124 -0
- package/dist/src/registry-BTGk2ZkB.js +124 -0
- package/dist/src/registry-BTNqmP5o.js +125 -0
- package/dist/src/registry-NGnOG2xa.js +124 -0
- package/dist/src/{server-gyd6d4Hc.js → remoteGeneration--D6WjzUm.js} +15 -108
- package/dist/src/{server-BEECpeGG.cjs → remoteGeneration-BpIYlb_O.cjs} +30 -119
- package/dist/src/{server-ByxbqAcQ.js → remoteGeneration-CJC3E0aW.js} +15 -147
- package/dist/src/remoteGeneration-D6UjE2JT.js +218 -0
- package/dist/src/render-BNTrbmBw.cjs +384 -0
- package/dist/src/render-CSP99NLm.js +348 -0
- package/dist/src/render-DFfDeYUK.js +347 -0
- package/dist/src/{render-nj-UaPdn.js → render-DznWrxGO.js} +2 -2
- package/dist/src/render-_6ur1fhE.js +347 -0
- package/dist/src/resourceAttributes-D1jP3kL5.js +17 -0
- package/dist/src/resourceAttributes-DQbBB--2.js +16 -0
- package/dist/src/resourceAttributes-ephgOvdR.cjs +27 -0
- package/dist/src/resourceAttributes-v6-I67fn.js +16 -0
- package/dist/src/{responses-1ztiVYsx.js → responses-5Gf5HNOi.js} +11 -7
- package/dist/src/{responses-BiaBguAu.js → responses-BVi7xIdv.js} +13 -9
- package/dist/src/{responses-CF-ayauu.cjs → responses-CUARGrhY.cjs} +16 -11
- package/dist/src/{responses-B8haB-mD.js → responses-CrmWv6iz.js} +13 -9
- package/dist/src/rolldown-runtime-D_mwlA32.cjs +43 -0
- package/dist/src/rubyUtils-BYVlQ94c.js +3 -0
- package/dist/src/{rubyUtils-CIQFnVz4.js → rubyUtils-CXlFM2rR.js} +2 -2
- package/dist/src/{rubyUtils-BI0p46eZ.js → rubyUtils-CnlW8AYb.js} +2 -2
- package/dist/src/{rubyUtils-DoifqkiA.cjs → rubyUtils-CqUWBZAt.cjs} +16 -26
- package/dist/src/{rubyUtils-DGnoCYL2.js → rubyUtils-DdGojpfv.js} +1 -1
- package/dist/src/runtimeTransform-BJOpL9Yc.js +142 -0
- package/dist/src/runtimeTransform-Dgh_D7DU.js +143 -0
- package/dist/src/runtimeTransform-DigbjU1r.js +142 -0
- package/dist/src/runtimeTransform-ON3YYILw.cjs +147 -0
- package/dist/src/{sagemaker-ljtY12VM.cjs → sagemaker-BeVTKfrv.cjs} +65 -53
- package/dist/src/{sagemaker-BDLeW29y.js → sagemaker-BwbV5U7_.js} +61 -50
- package/dist/src/{sagemaker-C5T60MKf.js → sagemaker-ClAZ6Qwc.js} +61 -50
- package/dist/src/{sagemaker-ClS_NB07.js → sagemaker-DY2qrOpq.js} +61 -50
- package/dist/src/{scanner-nOCWNIXa.js → scanner-BS-iFIp3.js} +6 -6
- package/dist/src/server/index.js +4115 -420
- package/dist/src/server-BHOEL8p8.cjs +126 -0
- package/dist/src/server-CbiJppij.js +107 -0
- package/dist/src/server-D18AAlAc.js +3 -0
- package/dist/src/server-DLYjSFm2.js +182 -0
- package/dist/src/server-JQYD_Nws.js +146 -0
- package/dist/src/{fileExtensions-8CjoL7vB.js → shared-BoG7qLMv.js} +12 -2
- package/dist/src/shared-CRq0gGVf.js +1334 -0
- package/dist/src/shared-CSIGeGLl.js +1334 -0
- package/dist/src/shared-DNvim54U.js +1335 -0
- package/dist/src/shared-aHWko3P1.cjs +1436 -0
- package/dist/src/{signal-DTtUuU3l.js → signal-CSurUUyV.js} +2 -2
- package/dist/src/simulatedUser-C7sOFSF6.js +222 -0
- package/dist/src/simulatedUser-DH_7RzEQ.js +222 -0
- package/dist/src/simulatedUser-lgMMmniD.js +223 -0
- package/dist/src/simulatedUser-mnCUS9Bm.cjs +227 -0
- package/dist/src/{slack-Bamy_7te.js → slack-Bapo-7_8.js} +1 -1
- package/dist/src/{slack-BLlsDpfG.cjs → slack-DMC1QVEg.cjs} +3 -2
- package/dist/src/{slack-BPYLQLgb.js → slack-DTEFhrMn.js} +1 -1
- package/dist/src/{slack-4zZX1OKP.js → slack-k-_CP84Q.js} +1 -1
- package/dist/src/storage-95htjpLs.js +834 -0
- package/dist/src/storage-A4WnAeN3.cjs +911 -0
- package/dist/src/storage-B2Ql_oq4.js +822 -0
- package/dist/src/storage-EKVWZBNY.js +875 -0
- package/dist/src/{store-2K0kDi80.cjs → store-B2NDDooM.cjs} +60 -24
- package/dist/src/{store-CPh25336.js → store-DKd5592Q.js} +50 -19
- package/dist/src/{store-BPkzEyFM.js → store-HpopRVzl.js} +50 -19
- package/dist/src/store-IbiRIF3k.js +3 -0
- package/dist/src/strategies-CBI1brSy.js +2333 -0
- package/dist/src/strategies-CCcnEbFO.cjs +2360 -0
- package/dist/src/strategies-CD1gHeeQ.js +2331 -0
- package/dist/src/{tables-WgdUZ8Ck.js → tables-CRSXQ2Ke.js} +2 -2
- package/dist/src/{tables-BMSOS2Gg.js → tables-CxjU7bBd.js} +2 -2
- package/dist/src/{tables-CXbaZ9y1.cjs → tables-DBIJU0WE.cjs} +6 -5
- package/dist/src/{tables-NlvH23ky.js → tables-DafUHOeh.js} +2 -2
- package/dist/src/telemetry-BUm_krVX.js +3 -0
- package/dist/src/{telemetry--iqaGyaS.js → telemetry-BbpmrenM.js} +4 -4
- package/dist/src/{telemetry-CgdVGV8N.js → telemetry-C4bX-6Sr.js} +4 -4
- package/dist/src/{telemetry-CEQxGnMZ.cjs → telemetry-C_ImlCOk.cjs} +15 -9
- package/dist/src/{telemetry-DWdGHvEf.js → telemetry-DOE567Wj.js} +4 -4
- package/dist/src/{text-DDQP0tuQ.js → text-CZr46tp_.js} +1 -1
- package/dist/src/{text-D4lz-Jg_.js → text-Db-Wt2u2.js} +1 -1
- package/dist/src/{text-NWvfMfkF.js → text-DwYK5EBn.js} +1 -1
- package/dist/src/{text-BiNME7QG.cjs → text-nywWsRBM.cjs} +1 -1
- package/dist/src/{tokenUsageUtils-2wIvAhB3.js → tokenUsageUtils-CDet74yk.js} +1 -1
- package/dist/src/{tokenUsageUtils-4c780gFd.js → tokenUsageUtils-CmnQ0G2m.js} +1 -1
- package/dist/src/{tokenUsageUtils-C9odhsbW.cjs → tokenUsageUtils-_B-P8IAi.cjs} +1 -1
- package/dist/src/toolAttributes-BAjwcBf0.cjs +103 -0
- package/dist/src/toolAttributes-COVgDrBG.js +87 -0
- package/dist/src/toolAttributes-DJ9ZEKXD.js +86 -0
- package/dist/src/tracingOptions-BOP1FsRt.js +220 -0
- package/dist/src/tracingOptions-BvmDa_jH.js +219 -0
- package/dist/src/tracingOptions-DFBoE8O0.cjs +249 -0
- package/dist/src/tracingOptions-xhjOg2b1.js +221 -0
- package/dist/src/{transcription-D7Q0vJsh.js → transcription-BOXDoehT.js} +4 -4
- package/dist/src/{transcription-Bm2emLmJ.js → transcription-CpVdKc4P.js} +5 -5
- package/dist/src/{transcription-CZ4LG5hQ.cjs → transcription-D8kkPXg_.cjs} +9 -8
- package/dist/src/{transcription-84t4ALo2.js → transcription-Z94eV9LR.js} +5 -5
- package/dist/src/transform-BnSTnFlp.js +187 -0
- package/dist/src/transform-BnSXWmU_2.cjs +221 -0
- package/dist/src/{transform-DtooZqYY.js → transform-BufxPIQL.js} +8 -8
- package/dist/src/transform-CGt7Kt3y2.js +186 -0
- package/dist/src/transform-CrPGTsij.js +186 -0
- package/dist/src/{transform-Dg4LcO1Y.cjs → transform-DJkt81VY.cjs} +12 -11
- package/dist/src/{transform-B-b6Cq-q.js → transform-DtTfiGoh.js} +8 -8
- package/dist/src/{transform-_DpNB4qp.js → transform-Wp6s_5QE.js} +8 -8
- package/dist/src/{transformersAvailability-lvCCvuPT.js → transformersAvailability-0ThtPved.js} +1 -1
- package/dist/src/transformersAvailability-BYydDE5U.js +35 -0
- package/dist/src/{transformersAvailability-rJGPccjr.js → transformersAvailability-BvyU9vDD.js} +1 -1
- package/dist/src/{transformersAvailability-B22swDxr.cjs → transformersAvailability-BytPvKUW.cjs} +1 -1
- package/dist/src/{types-BVH9hjgW.js → types-BFevViUY.js} +113 -19
- package/dist/src/{types-BDjGOq4E.js → types-BJQBBPTP.js} +113 -19
- package/dist/src/{types-CgG2rKiW.cjs → types-CxJvaY2S.cjs} +211 -28
- package/dist/src/{types-DNRZVOue.js → types-D6glLbdF.js} +125 -26
- package/dist/src/{util-Dpmm_dAI.cjs → util-BHCAeuXx.cjs} +33 -7
- package/dist/src/{util-DFPeFkiV.js → util-CMrHV35u.js} +28 -8
- package/dist/src/{util-DvpHnLt0.cjs → util-COnzevXE.cjs} +29 -21
- package/dist/src/{util-BlFVL0UF.js → util-CYev3d-r.js} +22 -7
- package/dist/src/util-DGqkTb2-.js +327 -0
- package/dist/src/util-DNtUsus_.cjs +386 -0
- package/dist/src/{util-Dub0f_ej.js → util-DRfqa4xz.js} +17 -10
- package/dist/src/{util-BVXcTwXu.js → util-DTq3jq2z.js} +28 -8
- package/dist/src/{util-C-kmRosx.js → util-DeY58psG.js} +22 -7
- package/dist/src/util-DpV6KT5i.js +327 -0
- package/dist/src/{util-3pBZZb_H.js → util-DwNJzqOV.js} +45 -10
- package/dist/src/util-ETfU_sS9.js +328 -0
- package/dist/src/{util-A5_ZsQUn.cjs → util-NYQvo1C7.cjs} +25 -9
- package/dist/src/{util-DN0-b81k.js → util-X4KQgyVD.js} +28 -8
- package/dist/src/{util-BQOCAHQC.js → util-jZRrXe1P.js} +46 -11
- package/dist/src/{util-B9CNhyac.js → util-o2Qg5rZv.js} +22 -7
- package/dist/src/{utils-BUMN8orw.js → utils-B0lzitHZ.js} +2 -2
- package/dist/src/{utils-kt7lv30R.js → utils-BFOh20Gb.js} +2 -2
- package/dist/src/{utils-o8S5huU2.js → utils-BGY69tk_.js} +2 -2
- package/dist/src/{utils-DkVeShIB.cjs → utils-Ve6kuJsa.cjs} +3 -3
- package/dist/src/{version-CbuBKu2U.js → version-CHR-EFec.js} +2 -2
- package/dist/src/{version-CbpiUINz.js → version-F0YDgb7J.js} +2 -2
- package/dist/src/{version-D9zu9FWB.cjs → version-VzUqOBZk.cjs} +2 -2
- package/dist/src/{version-0frU0UTr.js → version-qVEN5qCm.js} +2 -2
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +49 -23
- package/dist/src/app/assets/Report-vjzrbgce.js +0 -1
- package/dist/src/app/assets/index-B3NQ8HTd.js +0 -385
- package/dist/src/app/assets/index-Cli2yAXv.css +0 -1
- package/dist/src/app/assets/sync-IjzpWrOE.js +0 -4
- package/dist/src/app/assets/vendor-charts-BNdH8TCw.js +0 -36
- package/dist/src/cache-BRkhlH3k.cjs +0 -3
- package/dist/src/cache-BlC6aeJ0.js +0 -3
- package/dist/src/cloud-CoD5OacT.js +0 -3
- package/dist/src/codex-sdk-1jm_qPHf.js +0 -3
- package/dist/src/codex-sdk-Danroptg.cjs +0 -2
- package/dist/src/evalResult-BBJAHAtw.cjs +0 -2
- package/dist/src/evalResult-BBK58h2B.js +0 -3
- package/dist/src/evalResult-spPqh1G_.js +0 -2
- package/dist/src/evaluator-DgLKaZk8.js +0 -3
- package/dist/src/fetch-8viavNv8.js +0 -3
- package/dist/src/graders-C84JI-m5.js +0 -2
- package/dist/src/graders-CBbd0K0Q.cjs +0 -2
- package/dist/src/graders-CbQqpHSN.js +0 -3
- package/dist/src/graders-DS42d3ZG.js +0 -2
- package/dist/src/image-BmilRNqO.js +0 -258
- package/dist/src/image-CxJoa3aW.cjs +0 -280
- package/dist/src/image-DsGRlkh7.js +0 -257
- package/dist/src/image-a_SGUobh.js +0 -257
- package/dist/src/providers-BuyzKt7C.js +0 -2
- package/dist/src/providers-C7lNVBjX.cjs +0 -3
- package/dist/src/providers-CCE2COJi2.js +0 -2
- package/dist/src/render-7uNJ2V14.js +0 -135
- package/dist/src/render-DlscvAUJ.js +0 -135
- package/dist/src/render-eui5p5mL.js +0 -136
- package/dist/src/render-tG6ir9_g.cjs +0 -165
- package/dist/src/rubyUtils-4hjGxvju.js +0 -3
- package/dist/src/rubyUtils-CO-tuszQ.cjs +0 -2
- package/dist/src/server-ByiF3qlg.js +0 -386
- package/dist/src/server-C0XKRNB_.cjs +0 -2
- package/dist/src/server-C_15p79-.js +0 -3
- package/dist/src/store-2OXm_eBY.js +0 -240
- package/dist/src/store-BELqNwvz.js +0 -3
- package/dist/src/store-uQZ4AjPe.cjs +0 -2
- package/dist/src/telemetry-DjNoC_n3.cjs +0 -2
- package/dist/src/telemetry-ZdPZc0fm.js +0 -3
- package/dist/src/transform-BQt0BeAW.js +0 -3
- package/dist/src/transform-Bq5oqC0s.cjs +0 -2
- package/dist/src/transform-C9izGX54.cjs +0 -228
- package/dist/src/transform-CwbAZ84V.js +0 -216
- package/dist/src/transform-DzCF-wqV.js +0 -213
- package/dist/src/transform-eGiUAv86.js +0 -216
|
@@ -1,30 +1,39 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { A as getMaxEvalTimeMs, D as getEnvInt, N as state, O as getEnvString, S as summarizeEvaluateResultForLogging, T as getEnvBool, _ as extractJsonObjects, c as setLogCallback, g as extractFirstJsonObject, j as isCI, k as getEvalTimeoutMs, r as globalLogCallback, s as logger, v as getAjv, x as safeJsonStringify } from "./logger-BbY6ypFL.js";
|
|
3
|
-
import {
|
|
4
|
-
import { n as VERSION } from "./version-
|
|
3
|
+
import { L as FILE_METADATA_KEY, f as sleep, r as fetchWithRetries, w as parseChatPrompt, x as isPromptfooSampleTarget } from "./fetch-Cpf1U1nO.js";
|
|
4
|
+
import { n as VERSION } from "./version-F0YDgb7J.js";
|
|
5
5
|
import { t as invariant } from "./invariant-B2Rf6avk.js";
|
|
6
|
-
import { r as telemetry } from "./telemetry-
|
|
7
|
-
import { at as MULTI_INPUT_VAR, d as isGradingResult, nt as LLAMA_GUARD_REPLICATE_PROVIDER, p as isApiProvider, s as ResultFailureReason } from "./types-
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import { r as runPython } from "./pythonUtils-
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
18
|
-
import {
|
|
19
|
-
import {
|
|
20
|
-
import {
|
|
21
|
-
import {
|
|
22
|
-
import {
|
|
23
|
-
import { n as
|
|
24
|
-
import { n as runRuby } from "./rubyUtils-
|
|
25
|
-
import {
|
|
26
|
-
import {
|
|
27
|
-
import {
|
|
6
|
+
import { r as telemetry } from "./telemetry-DOE567Wj.js";
|
|
7
|
+
import { at as MULTI_INPUT_VAR, d as isGradingResult, nt as LLAMA_GUARD_REPLICATE_PROVIDER, p as isApiProvider, s as ResultFailureReason } from "./types-BFevViUY.js";
|
|
8
|
+
import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
|
|
9
|
+
import { i as extractVariablesFromTemplate, o as getNunjucksEngine, r as analyzeTemplateReference } from "./render-CSP99NLm.js";
|
|
10
|
+
import { t as providerRegistry } from "./providerRegistry-ReCd0sFa.js";
|
|
11
|
+
import { l as shouldGenerateRemote } from "./remoteGeneration-D6UjE2JT.js";
|
|
12
|
+
import { c as promptYesNo } from "./server-DLYjSFm2.js";
|
|
13
|
+
import { n as isNonTransientHttpStatus } from "./errors-9PcUL8BC.js";
|
|
14
|
+
import { l as withCacheNamespace, o as getCache } from "./cache-BR77mdIR.js";
|
|
15
|
+
import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
|
|
16
|
+
import { B as parseFileUrl, C as isOpenAiProvider, F as maybeLoadToolsFromExternalFile, S as isGoogleProvider, w as isProviderAllowed, x as isAnthropicProvider, z as loadFunction } from "./util-jZRrXe1P.js";
|
|
17
|
+
import { t as OpenAiChatCompletionProvider } from "./chat-DTdf-J5Q.js";
|
|
18
|
+
import { h as validateFunctionCall } from "./transform-DtTfiGoh.js";
|
|
19
|
+
import { l as validateFunctionCall$1 } from "./util-DeY58psG.js";
|
|
20
|
+
import { _ as VertexChatProvider, n as loadApiProvider, v as GoogleLiveProvider, y as AIStudioChatProvider } from "./providers-B7TyByfj.js";
|
|
21
|
+
import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
|
|
22
|
+
import { t as getProcessShim } from "./processShim-BBxt7LKO.js";
|
|
23
|
+
import { n as loadFromPackage, t as isPackagePath } from "./packageParser--MWTSrPW.js";
|
|
24
|
+
import { n as runRuby } from "./rubyUtils-CXlFM2rR.js";
|
|
25
|
+
import { c as isBasicRefusal, o as getSessionId } from "./util-ETfU_sS9.js";
|
|
26
|
+
import { $ as DEFAULT_WEB_SEARCH_PROMPT, Ct as getDefaultProviders, Dt as getGradingProvider, Et as getAndCheckProvider, G as matchesGEval, H as isGraderFailure, J as matchesTrajectoryGoalSuccess, K as matchesLlmRubric, Ot as getProviderCallExecutionContext, St as processFileReference, Tt as callProviderWithContext, U as matchesClosedQa, W as matchesFactuality, Y as doRemoteGrading, _t as splitIntoSentences, at as CONTEXT_RECALL, bt as getFinalTest, ct as CONTEXT_RELEVANCE, dt as renderLlmRubricPrompt, et as SELECT_BEST_PROMPT, ft as cosineSimilarity, gt as normalizeMatcherTokenUsage, ht as fail, it as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, kt as withProviderCallExecutionContext, lt as CONTEXT_RELEVANCE_BAD, mt as euclideanDistance, n as getGraderById, nt as ANSWER_RELEVANCY_GENERATE, ot as CONTEXT_RECALL_ATTRIBUTED_TOKEN, pt as dotProduct, q as matchesPiScore, rt as CONTEXT_FAITHFULNESS_LONGFORM, st as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, tt as SUGGEST_PROMPTS_SYSTEM_MESSAGE, ut as loadRubricPrompt, vt as tryParse, wt as DEFAULT_ANTHROPIC_MODEL, xt as loadFromJavaScriptFile, yt as coerceString } from "./graders-BQt1BaQe.js";
|
|
27
|
+
import { f as redteamProviderManager, g as createProviderRateLimitOptions, h as createRateLimitRegistry, m as TokenUsageTracker } from "./shared-DNvim54U.js";
|
|
28
|
+
import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
|
|
29
|
+
import { a as getTransformLabel, i as getTransformErrorMessage, o as transform, r as TransformInputType } from "./transform-BnSTnFlp.js";
|
|
30
|
+
import { n as getTraceStore } from "./store-DKd5592Q.js";
|
|
31
|
+
import { a as getActualPromptWithFallback, r as updateSignalFile } from "./signal-CSurUUyV.js";
|
|
32
|
+
import { t as extractAndStoreBinaryData } from "./extractor-BVkZtk4R.js";
|
|
33
|
+
import { i as throwIfTargetPromptExceedsMaxChars } from "./promptLength-4X-Wd8PG.js";
|
|
34
|
+
import { n as checkExfilTracking } from "./indirectWebPwn-BMTXXznx.js";
|
|
35
|
+
import { n as getFirstStringAttribute, r as getToolNameFromAttributes, t as TOOL_ARGUMENT_ATTRIBUTE_KEYS } from "./toolAttributes-COVgDrBG.js";
|
|
36
|
+
import { i as filterFiniteScores, n as renderPrompt, r as runExtensionHook, t as collectFileMetadata } from "./evaluatorHelpers-D1_kwvyp.js";
|
|
28
37
|
import { AsyncResource } from "node:async_hooks";
|
|
29
38
|
import chalk from "chalk";
|
|
30
39
|
import fs, { createWriteStream } from "fs";
|
|
@@ -38,11 +47,12 @@ import readline from "readline";
|
|
|
38
47
|
import { globSync } from "glob";
|
|
39
48
|
import { XMLParser } from "fast-xml-parser";
|
|
40
49
|
import async from "async";
|
|
41
|
-
import cliProgress from "cli-progress";
|
|
42
50
|
import { parse as parse$1 } from "parse5";
|
|
43
51
|
import { distance } from "fastest-levenshtein";
|
|
52
|
+
import cliProgress from "cli-progress";
|
|
44
53
|
import * as rouge from "js-rouge";
|
|
45
54
|
import { isDeepStrictEqual } from "node:util";
|
|
55
|
+
import { LRUCache } from "lru-cache";
|
|
46
56
|
import { ExportResultCode, W3CTraceContextPropagator } from "@opentelemetry/core";
|
|
47
57
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
48
58
|
import { resourceFromAttributes } from "@opentelemetry/resources";
|
|
@@ -405,17 +415,20 @@ async function resolveContext(assertion, test, output, prompt, fallbackContext,
|
|
|
405
415
|
contextValue = test.vars.context;
|
|
406
416
|
}
|
|
407
417
|
} else if (fallbackContext) contextValue = fallbackContext;
|
|
408
|
-
if (assertion.contextTransform)
|
|
409
|
-
const
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
418
|
+
if (assertion.contextTransform) {
|
|
419
|
+
const getLabel = () => getTransformLabel(assertion.contextTransform);
|
|
420
|
+
try {
|
|
421
|
+
const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
|
|
422
|
+
const transformed = await transform(assertion.contextTransform, outputForTransform, {
|
|
423
|
+
vars: test.vars,
|
|
424
|
+
prompt: { label: prompt },
|
|
425
|
+
...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
|
|
426
|
+
});
|
|
427
|
+
invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), () => `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${getLabel()}`);
|
|
428
|
+
contextValue = transformed;
|
|
429
|
+
} catch (error) {
|
|
430
|
+
throw new Error(`Failed to transform context using expression '${getLabel()}': ${getTransformErrorMessage(error)}`);
|
|
431
|
+
}
|
|
419
432
|
}
|
|
420
433
|
invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
|
|
421
434
|
return contextValue;
|
|
@@ -758,7 +771,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
758
771
|
telemetry.record("feature_used", { feature: "tracing" });
|
|
759
772
|
try {
|
|
760
773
|
logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
|
|
761
|
-
const { startOTLPReceiver } = await import("./otlpReceiver
|
|
774
|
+
const { startOTLPReceiver } = await import("./otlpReceiver--gTpSagc.js");
|
|
762
775
|
const port = testSuite.tracing.otlp.http.port || 4318;
|
|
763
776
|
const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
|
|
764
777
|
const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
|
|
@@ -782,7 +795,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
|
|
|
782
795
|
async function stopOtlpReceiverIfNeeded() {
|
|
783
796
|
if (otlpReceiverStarted) try {
|
|
784
797
|
logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
|
|
785
|
-
const { stopOTLPReceiver } = await import("./otlpReceiver
|
|
798
|
+
const { stopOTLPReceiver } = await import("./otlpReceiver--gTpSagc.js");
|
|
786
799
|
await stopOTLPReceiver();
|
|
787
800
|
otlpReceiverStarted = false;
|
|
788
801
|
logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
|
|
@@ -817,7 +830,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
|
|
|
817
830
|
}
|
|
818
831
|
if (!tracingEnabled) return null;
|
|
819
832
|
logger.debug("[EvaluatorTracing] Importing trace store");
|
|
820
|
-
const { getTraceStore } = await import("./store-
|
|
833
|
+
const { getTraceStore } = await import("./store-IbiRIF3k.js");
|
|
821
834
|
const traceStore = getTraceStore();
|
|
822
835
|
const traceId = generateTraceId();
|
|
823
836
|
const spanId = generateSpanId();
|
|
@@ -1449,27 +1462,67 @@ const handleIsValidFunctionCall = ({ assertion, output, provider, test }) => {
|
|
|
1449
1462
|
};
|
|
1450
1463
|
//#endregion
|
|
1451
1464
|
//#region src/assertions/geval.ts
|
|
1452
|
-
const handleGEval = async ({ assertion, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1453
|
-
invariant(typeof renderedValue === "string" || Array.isArray(renderedValue), "G-Eval assertion type must have a string or array of strings value");
|
|
1465
|
+
const handleGEval = async ({ assertion, inverse, renderedValue, prompt, outputString, test, providerCallContext }) => {
|
|
1466
|
+
invariant(typeof renderedValue === "string" || Array.isArray(renderedValue) && renderedValue.every((value) => typeof value === "string"), "G-Eval assertion type must have a string or array of strings value");
|
|
1454
1467
|
const threshold = assertion.threshold ?? .7;
|
|
1455
1468
|
if (Array.isArray(renderedValue)) {
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1469
|
+
if (renderedValue.length === 0) return {
|
|
1470
|
+
assertion,
|
|
1471
|
+
pass: false,
|
|
1472
|
+
score: 0,
|
|
1473
|
+
reason: "G-Eval assertion requires at least one criterion string in the value array."
|
|
1474
|
+
};
|
|
1475
|
+
const responses = [];
|
|
1476
|
+
let failure;
|
|
1477
|
+
for (const [index, value] of renderedValue.entries()) {
|
|
1459
1478
|
const resp = await matchesGEval(value, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1460
|
-
|
|
1461
|
-
|
|
1479
|
+
responses.push(resp);
|
|
1480
|
+
if (isGraderFailure(resp)) {
|
|
1481
|
+
failure = {
|
|
1482
|
+
index,
|
|
1483
|
+
resp
|
|
1484
|
+
};
|
|
1485
|
+
break;
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
const tokensUsed = createEmptyTokenUsage();
|
|
1489
|
+
for (const r of responses) accumulateTokenUsage(tokensUsed, r.tokensUsed);
|
|
1490
|
+
if (failure) {
|
|
1491
|
+
const criterion = renderedValue[failure.index];
|
|
1492
|
+
return {
|
|
1493
|
+
assertion,
|
|
1494
|
+
pass: false,
|
|
1495
|
+
score: 0,
|
|
1496
|
+
reason: `G-Eval criterion ${failure.index + 1}/${renderedValue.length} (${JSON.stringify(criterion)}) failed: ${failure.resp.reason}`,
|
|
1497
|
+
tokensUsed,
|
|
1498
|
+
metadata: failure.resp.metadata
|
|
1499
|
+
};
|
|
1462
1500
|
}
|
|
1463
|
-
const
|
|
1501
|
+
const averageScore = responses.reduce((acc, r) => acc + r.score, 0) / responses.length;
|
|
1502
|
+
const combinedReason = responses.map((r) => r.reason).join("\n\n");
|
|
1464
1503
|
return {
|
|
1465
1504
|
assertion,
|
|
1466
|
-
pass:
|
|
1467
|
-
score:
|
|
1468
|
-
reason:
|
|
1505
|
+
pass: averageScore >= threshold !== inverse,
|
|
1506
|
+
score: inverse ? 1 - averageScore : averageScore,
|
|
1507
|
+
reason: combinedReason,
|
|
1508
|
+
tokensUsed
|
|
1469
1509
|
};
|
|
1470
|
-
}
|
|
1510
|
+
}
|
|
1511
|
+
const resp = await matchesGEval(renderedValue, prompt || "", outputString, threshold, test.options, providerCallContext);
|
|
1512
|
+
if (isGraderFailure(resp)) return {
|
|
1471
1513
|
assertion,
|
|
1472
|
-
|
|
1514
|
+
pass: false,
|
|
1515
|
+
score: 0,
|
|
1516
|
+
reason: resp.reason,
|
|
1517
|
+
tokensUsed: resp.tokensUsed,
|
|
1518
|
+
metadata: resp.metadata
|
|
1519
|
+
};
|
|
1520
|
+
const passed = resp.score >= threshold !== inverse;
|
|
1521
|
+
return {
|
|
1522
|
+
assertion,
|
|
1523
|
+
...resp,
|
|
1524
|
+
pass: passed,
|
|
1525
|
+
score: inverse ? 1 - resp.score : resp.score
|
|
1473
1526
|
};
|
|
1474
1527
|
};
|
|
1475
1528
|
//#endregion
|
|
@@ -2430,45 +2483,6 @@ function matchesPattern(spanName, pattern) {
|
|
|
2430
2483
|
}
|
|
2431
2484
|
//#endregion
|
|
2432
2485
|
//#region src/assertions/trajectoryUtils.ts
|
|
2433
|
-
const TOOL_ATTRIBUTE_KEYS = [
|
|
2434
|
-
"tool.name",
|
|
2435
|
-
"tool_name",
|
|
2436
|
-
"tool",
|
|
2437
|
-
"function.name",
|
|
2438
|
-
"function_name",
|
|
2439
|
-
"gen_ai.tool.name",
|
|
2440
|
-
"codex.mcp.tool",
|
|
2441
|
-
"agent.tool",
|
|
2442
|
-
"agent.tool_name",
|
|
2443
|
-
"agent.toolName"
|
|
2444
|
-
];
|
|
2445
|
-
const TOOL_ARGUMENT_ATTRIBUTE_KEYS = [
|
|
2446
|
-
"tool.arguments",
|
|
2447
|
-
"tool.args",
|
|
2448
|
-
"tool.input",
|
|
2449
|
-
"tool_arguments",
|
|
2450
|
-
"tool_args",
|
|
2451
|
-
"tool_input",
|
|
2452
|
-
"function.arguments",
|
|
2453
|
-
"function.args",
|
|
2454
|
-
"function.input",
|
|
2455
|
-
"function_arguments",
|
|
2456
|
-
"function_args",
|
|
2457
|
-
"gen_ai.tool.arguments",
|
|
2458
|
-
"gen_ai.tool.args",
|
|
2459
|
-
"gen_ai.tool.input",
|
|
2460
|
-
"gen_ai.tool.call.arguments",
|
|
2461
|
-
"gen_ai.tool.call.args",
|
|
2462
|
-
"agent.tool.arguments",
|
|
2463
|
-
"agent.tool.args",
|
|
2464
|
-
"agent.tool.input",
|
|
2465
|
-
"codex.mcp.arguments",
|
|
2466
|
-
"codex.mcp.args",
|
|
2467
|
-
"codex.mcp.input",
|
|
2468
|
-
"arguments",
|
|
2469
|
-
"args",
|
|
2470
|
-
"input"
|
|
2471
|
-
];
|
|
2472
2486
|
const COMMAND_ATTRIBUTE_KEYS = [
|
|
2473
2487
|
"codex.command",
|
|
2474
2488
|
"command",
|
|
@@ -2481,16 +2495,15 @@ const SEARCH_ATTRIBUTE_KEYS = [
|
|
|
2481
2495
|
"search_query"
|
|
2482
2496
|
];
|
|
2483
2497
|
const GENERIC_QUERY_ATTRIBUTE_KEYS = ["query"];
|
|
2498
|
+
const COMMAND_TOOL_NAMES = new Set([
|
|
2499
|
+
"exec_command",
|
|
2500
|
+
"local_shell",
|
|
2501
|
+
"shell"
|
|
2502
|
+
]);
|
|
2484
2503
|
const SEARCH_SPAN_NAME_PATTERN = /(^|[\s._:/-])(search|find|lookup|retriev(?:e|al))($|[\s._:/-])/i;
|
|
2485
2504
|
const MAX_JUDGE_SUMMARY_STEPS = 24;
|
|
2486
2505
|
const JUDGE_SUMMARY_HEAD_STEPS = 12;
|
|
2487
2506
|
const JUDGE_SUMMARY_TAIL_STEPS = 12;
|
|
2488
|
-
function getStringAttribute(attributes, keys) {
|
|
2489
|
-
for (const key of keys) {
|
|
2490
|
-
const value = attributes[key];
|
|
2491
|
-
if (typeof value === "string" && value.trim()) return value.trim();
|
|
2492
|
-
}
|
|
2493
|
-
}
|
|
2494
2507
|
function normalizeStructuredAttribute(value) {
|
|
2495
2508
|
if (value === void 0 || value === null) return;
|
|
2496
2509
|
if (typeof value === "string") {
|
|
@@ -2522,9 +2535,12 @@ function getTrajectoryStepStatus(step) {
|
|
|
2522
2535
|
function getCommandExecutable(command) {
|
|
2523
2536
|
return command.trim().split(/\s+/)[0] || void 0;
|
|
2524
2537
|
}
|
|
2538
|
+
function isCommandToolName(toolName) {
|
|
2539
|
+
return !!toolName && COMMAND_TOOL_NAMES.has(toolName.trim().toLowerCase());
|
|
2540
|
+
}
|
|
2525
2541
|
function extractToolName(span) {
|
|
2526
2542
|
const attributes = span.attributes || {};
|
|
2527
|
-
const directMatch =
|
|
2543
|
+
const directMatch = getToolNameFromAttributes(attributes);
|
|
2528
2544
|
if (directMatch) return directMatch;
|
|
2529
2545
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2530
2546
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
@@ -2549,21 +2565,31 @@ function extractToolArgs(span) {
|
|
|
2549
2565
|
if (value !== void 0) return value;
|
|
2550
2566
|
}
|
|
2551
2567
|
}
|
|
2552
|
-
function extractCommand(span) {
|
|
2568
|
+
function extractCommand(span, toolName = extractToolName(span), getToolArgs = () => extractToolArgs(span)) {
|
|
2553
2569
|
const attributes = span.attributes || {};
|
|
2554
|
-
const directMatch =
|
|
2570
|
+
const directMatch = getFirstStringAttribute(attributes, COMMAND_ATTRIBUTE_KEYS);
|
|
2555
2571
|
if (directMatch) return directMatch;
|
|
2556
2572
|
for (const [key, value] of Object.entries(attributes)) {
|
|
2557
2573
|
if (typeof value !== "string" || !value.trim()) continue;
|
|
2558
2574
|
if (/command/i.test(key) && !/output|result/i.test(key)) return value.trim();
|
|
2559
2575
|
}
|
|
2576
|
+
const toolArgs = getToolArgs();
|
|
2577
|
+
if (isCommandToolName(toolName) && toolArgs && typeof toolArgs === "object") {
|
|
2578
|
+
const args = toolArgs;
|
|
2579
|
+
const command = args.cmd ?? args.command;
|
|
2580
|
+
if (typeof command === "string" && command.trim()) return command.trim();
|
|
2581
|
+
if (Array.isArray(command)) {
|
|
2582
|
+
const joined = command.map((part) => String(part).trim()).filter(Boolean).join(" ");
|
|
2583
|
+
if (joined) return joined;
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2560
2586
|
if (span.name.startsWith("exec ")) return span.name.slice(5).trim();
|
|
2561
2587
|
}
|
|
2562
2588
|
function extractSearchQuery(span) {
|
|
2563
2589
|
const attributes = span.attributes || {};
|
|
2564
|
-
const directMatch =
|
|
2590
|
+
const directMatch = getFirstStringAttribute(attributes, SEARCH_ATTRIBUTE_KEYS);
|
|
2565
2591
|
if (directMatch) return directMatch;
|
|
2566
|
-
const genericQuery =
|
|
2592
|
+
const genericQuery = getFirstStringAttribute(attributes, GENERIC_QUERY_ATTRIBUTE_KEYS);
|
|
2567
2593
|
if (genericQuery && isSearchLikeSpan(span)) return genericQuery;
|
|
2568
2594
|
if (span.name.startsWith("search ")) return span.name.slice(7).replace(/^"|"$/g, "").trim();
|
|
2569
2595
|
}
|
|
@@ -2587,17 +2613,34 @@ function extractTrajectorySteps(trace) {
|
|
|
2587
2613
|
return left.index - right.index;
|
|
2588
2614
|
}).map(({ span }) => {
|
|
2589
2615
|
const toolName = extractToolName(span);
|
|
2590
|
-
|
|
2616
|
+
let toolArgs;
|
|
2617
|
+
let hasExtractedToolArgs = false;
|
|
2618
|
+
const getToolArgs = () => {
|
|
2619
|
+
if (!hasExtractedToolArgs) {
|
|
2620
|
+
toolArgs = extractToolArgs(span);
|
|
2621
|
+
hasExtractedToolArgs = true;
|
|
2622
|
+
}
|
|
2623
|
+
return toolArgs;
|
|
2624
|
+
};
|
|
2625
|
+
const command = extractCommand(span, toolName, getToolArgs);
|
|
2591
2626
|
const searchQuery = extractSearchQuery(span);
|
|
2592
2627
|
let type = "span";
|
|
2593
2628
|
let name = span.name;
|
|
2594
2629
|
const aliases = new Set([span.name]);
|
|
2595
2630
|
let args;
|
|
2596
|
-
if (toolName) {
|
|
2631
|
+
if (command && isCommandToolName(toolName)) {
|
|
2632
|
+
type = "command";
|
|
2633
|
+
name = command;
|
|
2634
|
+
aliases.add(command);
|
|
2635
|
+
args = getToolArgs();
|
|
2636
|
+
if (toolName) aliases.add(toolName);
|
|
2637
|
+
const executable = getCommandExecutable(command);
|
|
2638
|
+
if (executable) aliases.add(executable);
|
|
2639
|
+
} else if (toolName) {
|
|
2597
2640
|
type = "tool";
|
|
2598
2641
|
name = toolName;
|
|
2599
2642
|
aliases.add(toolName);
|
|
2600
|
-
args =
|
|
2643
|
+
args = getToolArgs();
|
|
2601
2644
|
} else if (command) {
|
|
2602
2645
|
type = "command";
|
|
2603
2646
|
name = command;
|
|
@@ -4161,7 +4204,7 @@ async function loadTraceData(traceId) {
|
|
|
4161
4204
|
let stableObservations = 0;
|
|
4162
4205
|
let latestTrace = null;
|
|
4163
4206
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
4164
|
-
latestTrace = await traceStore.getTrace(traceId);
|
|
4207
|
+
latestTrace = await traceStore.getTrace(traceId, { sanitizeAttributes: false });
|
|
4165
4208
|
const spanCount = latestTrace?.spans?.length ?? 0;
|
|
4166
4209
|
if (spanCount > 0) {
|
|
4167
4210
|
stableObservations = spanCount === lastSpanCount ? stableObservations + 1 : 1;
|
|
@@ -4214,7 +4257,7 @@ const ASSERTION_HANDLERS = {
|
|
|
4214
4257
|
"llm-rubric": handleLlmRubric,
|
|
4215
4258
|
meteor: async (params) => {
|
|
4216
4259
|
try {
|
|
4217
|
-
const { handleMeteorAssertion } = await import("./meteor
|
|
4260
|
+
const { handleMeteorAssertion } = await import("./meteor--TZYICTI.js");
|
|
4218
4261
|
return handleMeteorAssertion(params);
|
|
4219
4262
|
} catch (error) {
|
|
4220
4263
|
if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
|
|
@@ -4350,7 +4393,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
|
|
|
4350
4393
|
};
|
|
4351
4394
|
}
|
|
4352
4395
|
else if (filePath.endsWith(".rb")) try {
|
|
4353
|
-
const { runRuby } = await import("./rubyUtils-
|
|
4396
|
+
const { runRuby } = await import("./rubyUtils-BYVlQ94c.js");
|
|
4354
4397
|
valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
|
|
4355
4398
|
logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
|
|
4356
4399
|
} catch (error) {
|
|
@@ -4467,7 +4510,8 @@ async function runAssertions({ assertScoringFunction, latencyMs, prompt, provide
|
|
|
4467
4510
|
logger.debug(`Failed to preload trace data for assertions: ${error}`);
|
|
4468
4511
|
preloadedTraceData = null;
|
|
4469
4512
|
}
|
|
4470
|
-
|
|
4513
|
+
const concurrency = getProviderCallExecutionContext()?.providerCallQueue ? 1 : ASSERTIONS_MAX_CONCURRENCY;
|
|
4514
|
+
await async.forEachOfLimit(asserts, concurrency, async ({ assertion, assertResult, index }) => {
|
|
4471
4515
|
if (assertion.type.startsWith("select-") || assertion.type === "max-score") return;
|
|
4472
4516
|
const result = await runAssertion({
|
|
4473
4517
|
prompt,
|
|
@@ -4614,7 +4658,8 @@ var CIProgressReporter = class {
|
|
|
4614
4658
|
}
|
|
4615
4659
|
updateTotalTests(newTotal) {
|
|
4616
4660
|
this.totalTests = Math.max(newTotal, 1);
|
|
4617
|
-
|
|
4661
|
+
const percentage = Math.floor(this.completedTests / this.totalTests * 100);
|
|
4662
|
+
this.highestPercentageSeen = percentage;
|
|
4618
4663
|
}
|
|
4619
4664
|
finish() {
|
|
4620
4665
|
if (this.intervalId) {
|
|
@@ -4787,6 +4832,10 @@ function getDefaultOtelConfig() {
|
|
|
4787
4832
|
}
|
|
4788
4833
|
//#endregion
|
|
4789
4834
|
//#region src/tracing/localSpanExporter.ts
|
|
4835
|
+
const MISSING_TRACE_RETRY_DELAY_MS = 50;
|
|
4836
|
+
function delay(ms) {
|
|
4837
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
4838
|
+
}
|
|
4790
4839
|
/**
|
|
4791
4840
|
* A span exporter that writes spans to the local TraceStore (SQLite).
|
|
4792
4841
|
* This allows OTEL spans to be stored locally for analysis in the promptfoo UI.
|
|
@@ -4828,7 +4877,7 @@ var LocalSpanExporter = class {
|
|
|
4828
4877
|
}
|
|
4829
4878
|
let firstError;
|
|
4830
4879
|
for (const [traceId, spanDataList] of spansByTrace) try {
|
|
4831
|
-
const result = await
|
|
4880
|
+
const result = await this.addSpansWithTraceRetry(traceStore, traceId, spanDataList);
|
|
4832
4881
|
if (result.stored) logger.debug(`[LocalSpanExporter] Added ${spanDataList.length} spans to trace ${traceId}`);
|
|
4833
4882
|
else logger.debug(`[LocalSpanExporter] Skipping ${spanDataList.length} spans for orphan trace ${traceId}: ${result.reason}`);
|
|
4834
4883
|
} catch (error) {
|
|
@@ -4840,6 +4889,16 @@ var LocalSpanExporter = class {
|
|
|
4840
4889
|
}
|
|
4841
4890
|
return firstError;
|
|
4842
4891
|
}
|
|
4892
|
+
async addSpansWithTraceRetry(traceStore, traceId, spans) {
|
|
4893
|
+
const options = {
|
|
4894
|
+
skipTraceCheck: false,
|
|
4895
|
+
warnIfMissingTrace: false
|
|
4896
|
+
};
|
|
4897
|
+
const result = await traceStore.addSpans(traceId, spans, options);
|
|
4898
|
+
if (result.stored) return result;
|
|
4899
|
+
await delay(MISSING_TRACE_RETRY_DELAY_MS);
|
|
4900
|
+
return traceStore.addSpans(traceId, spans, options);
|
|
4901
|
+
}
|
|
4843
4902
|
/**
|
|
4844
4903
|
* Convert an OTEL ReadableSpan to our SpanData format.
|
|
4845
4904
|
*/
|
|
@@ -5085,6 +5144,15 @@ function backfillNamedScoreWeights(accumulator) {
|
|
|
5085
5144
|
}
|
|
5086
5145
|
//#endregion
|
|
5087
5146
|
//#region src/evaluator.ts
|
|
5147
|
+
const CONVERSATION_VAR_NAME = "_conversation";
|
|
5148
|
+
const promptUsesConversationVariableCache = new LRUCache({ max: 1024 });
|
|
5149
|
+
function promptUsesConversationVariable(prompt) {
|
|
5150
|
+
const cached = promptUsesConversationVariableCache.get(prompt.raw);
|
|
5151
|
+
if (cached !== void 0) return cached;
|
|
5152
|
+
const { referenced, parsed } = analyzeTemplateReference(prompt.raw, CONVERSATION_VAR_NAME);
|
|
5153
|
+
if (parsed) promptUsesConversationVariableCache.set(prompt.raw, referenced);
|
|
5154
|
+
return referenced;
|
|
5155
|
+
}
|
|
5088
5156
|
/**
|
|
5089
5157
|
* Manages a single progress bar for the evaluation
|
|
5090
5158
|
*/
|
|
@@ -5284,6 +5352,18 @@ function hasProviderGroupedAssertion(assertion) {
|
|
|
5284
5352
|
function shouldDeferGradingForTest(test) {
|
|
5285
5353
|
return Boolean(test.assert?.some(hasProviderGroupedAssertion));
|
|
5286
5354
|
}
|
|
5355
|
+
function logGroupedGradingStatus({ concurrency, hasEvalStepTimeout, runEvalOptions, shouldGroupGradingByProvider, usesConversationVar }) {
|
|
5356
|
+
if (!runEvalOptions.some(({ test }) => shouldDeferGradingForTest(test))) return;
|
|
5357
|
+
if (shouldGroupGradingByProvider) {
|
|
5358
|
+
logger.info("Grouping model-graded assertions by provider to minimize local-model reload overhead.");
|
|
5359
|
+
return;
|
|
5360
|
+
}
|
|
5361
|
+
if (concurrency !== 1) return;
|
|
5362
|
+
const reasons = [];
|
|
5363
|
+
if (hasEvalStepTimeout) reasons.push("per-eval-step timeout is configured");
|
|
5364
|
+
if (usesConversationVar) reasons.push("conversation variables require per-row ordering");
|
|
5365
|
+
if (reasons.length > 0) logger.info(`Serial grading grouping disabled because ${reasons.join(" and ")}; model-graded judges may reload between rows.`);
|
|
5366
|
+
}
|
|
5287
5367
|
function applyGradingResult(row, checkResult) {
|
|
5288
5368
|
if (!checkResult.pass) {
|
|
5289
5369
|
row.error = checkResult.reason;
|
|
@@ -5298,14 +5378,29 @@ function applyGradingResult(row, checkResult) {
|
|
|
5298
5378
|
if (checkResult.tokensUsed) accumulateAssertionTokenUsage(row.tokenUsage.assertions, checkResult.tokensUsed);
|
|
5299
5379
|
row.gradingResult = checkResult;
|
|
5300
5380
|
}
|
|
5301
|
-
|
|
5302
|
-
|
|
5303
|
-
|
|
5304
|
-
|
|
5305
|
-
|
|
5306
|
-
|
|
5307
|
-
|
|
5308
|
-
|
|
5381
|
+
const ABORTED_GRADING_PREFIX = "Aborted: ";
|
|
5382
|
+
function isAbortShapedError(error) {
|
|
5383
|
+
return error instanceof Error && (error.name === "AbortError" || error.name === "AbortException");
|
|
5384
|
+
}
|
|
5385
|
+
function applyGradingError(row, error, abortSignal) {
|
|
5386
|
+
const errorAsError = error instanceof Error ? error : void 0;
|
|
5387
|
+
if (Boolean(abortSignal?.aborted) && isAbortShapedError(error)) {
|
|
5388
|
+
const shortMessage = errorAsError?.message ?? String(error);
|
|
5389
|
+
logger.debug("Assertion grading aborted", {
|
|
5390
|
+
error: shortMessage,
|
|
5391
|
+
promptIdx: row.promptIdx,
|
|
5392
|
+
testIdx: row.testIdx
|
|
5393
|
+
});
|
|
5394
|
+
row.error = `${ABORTED_GRADING_PREFIX}${shortMessage}`;
|
|
5395
|
+
} else {
|
|
5396
|
+
const fullMessage = errorAsError ? errorAsError.stack ?? errorAsError.message : String(error);
|
|
5397
|
+
logger.error("Assertion grading failed during eval", {
|
|
5398
|
+
error: fullMessage,
|
|
5399
|
+
promptIdx: row.promptIdx,
|
|
5400
|
+
testIdx: row.testIdx
|
|
5401
|
+
});
|
|
5402
|
+
row.error = fullMessage;
|
|
5403
|
+
}
|
|
5309
5404
|
row.failureReason = ResultFailureReason.ERROR;
|
|
5310
5405
|
row.success = false;
|
|
5311
5406
|
row.score = 0;
|
|
@@ -5337,7 +5432,7 @@ function createRunEvalState({ provider, prompt, test }) {
|
|
|
5337
5432
|
};
|
|
5338
5433
|
}
|
|
5339
5434
|
function attachConversationVar({ conversations, conversationKey, prompt, test, vars }) {
|
|
5340
|
-
const usesConversation = prompt
|
|
5435
|
+
const usesConversation = promptUsesConversationVariable(prompt);
|
|
5341
5436
|
if (!getEnvBool("PROMPTFOO_DISABLE_CONVERSATION_VAR") && !test.options?.disableConversationVar && usesConversation) vars._conversation = conversations?.[conversationKey] || [];
|
|
5342
5437
|
}
|
|
5343
5438
|
function createRunEvalSetup({ provider, prompt, promptConfig, vars }) {
|
|
@@ -5584,7 +5679,7 @@ async function gradeRunEvalResponse({ abortSignal, deferGrading, evalId, latency
|
|
|
5584
5679
|
assertScoringFunction: test.assertScoringFunction,
|
|
5585
5680
|
traceId
|
|
5586
5681
|
}).then((checkResult) => applyGradingResult(ret, checkResult))).catch((error) => {
|
|
5587
|
-
applyGradingError(ret, error);
|
|
5682
|
+
applyGradingError(ret, error, abortSignal);
|
|
5588
5683
|
});
|
|
5589
5684
|
deferredGradingPromises.set(ret, gradingPromise);
|
|
5590
5685
|
return;
|
|
@@ -6131,7 +6226,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
|
|
|
6131
6226
|
const defaultProvider = defaultTest.provider;
|
|
6132
6227
|
if (isApiProvider(defaultProvider)) return defaultProvider;
|
|
6133
6228
|
if (typeof defaultProvider === "object" && defaultProvider.id) {
|
|
6134
|
-
const { loadApiProvider } = await import("./providers-
|
|
6229
|
+
const { loadApiProvider } = await import("./providers-CSOp-bCm.js");
|
|
6135
6230
|
return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
|
|
6136
6231
|
}
|
|
6137
6232
|
return defaultProvider;
|
|
@@ -6291,7 +6386,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
|
|
|
6291
6386
|
async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
6292
6387
|
if (!state.resume || !evalRecord.persisted) return;
|
|
6293
6388
|
try {
|
|
6294
|
-
const { default: EvalResult } = await import("./evalResult-
|
|
6389
|
+
const { default: EvalResult } = await import("./evalResult-BtZSUgQv.js");
|
|
6295
6390
|
const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: state.retryMode });
|
|
6296
6391
|
const originalCount = runEvalOptions.length;
|
|
6297
6392
|
for (let i = runEvalOptions.length - 1; i >= 0; i--) {
|
|
@@ -6305,14 +6400,14 @@ async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
|
|
|
6305
6400
|
}
|
|
6306
6401
|
}
|
|
6307
6402
|
function adjustConcurrencyForSerialFeatures({ concurrency, prompts, tests }) {
|
|
6308
|
-
const usesConversationVar = prompts.some(
|
|
6403
|
+
const usesConversationVar = prompts.some(promptUsesConversationVariable);
|
|
6309
6404
|
if (concurrency <= 1) return {
|
|
6310
6405
|
concurrency,
|
|
6311
6406
|
usesConversationVar
|
|
6312
6407
|
};
|
|
6313
6408
|
const usesStoreOutputAs = tests.some((t) => t.options?.storeOutputAs);
|
|
6314
6409
|
if (usesConversationVar) {
|
|
6315
|
-
logger.info(`Setting concurrency to 1 because the ${chalk.cyan(
|
|
6410
|
+
logger.info(`Setting concurrency to 1 because the ${chalk.cyan(CONVERSATION_VAR_NAME)} variable is used.`);
|
|
6316
6411
|
return {
|
|
6317
6412
|
concurrency: 1,
|
|
6318
6413
|
usesConversationVar
|
|
@@ -6542,7 +6637,8 @@ var Evaluator = class {
|
|
|
6542
6637
|
};
|
|
6543
6638
|
this.conversations = {};
|
|
6544
6639
|
this.registers = {};
|
|
6545
|
-
|
|
6640
|
+
const jsonlFiles = Array.isArray(evalRecord.config.outputPath) ? evalRecord.config.outputPath.filter((p) => p.endsWith(".jsonl")) : evalRecord.config.outputPath?.endsWith(".jsonl") ? [evalRecord.config.outputPath] : [];
|
|
6641
|
+
this.fileWriters = jsonlFiles.map((p) => new JsonlFileWriter(p));
|
|
6546
6642
|
this.rateLimitRegistry = createRateLimitRegistry({ maxConcurrency: options.maxConcurrency || 4 });
|
|
6547
6643
|
this.rateLimitRegistry.on("ratelimit:hit", (data) => {
|
|
6548
6644
|
logger.debug(`[Scheduler] Rate limit hit for ${data.rateLimitKey}`, {
|
|
@@ -6662,6 +6758,25 @@ var Evaluator = class {
|
|
|
6662
6758
|
this.trackCompletedRow(evalStep, row, context);
|
|
6663
6759
|
context.numComplete++;
|
|
6664
6760
|
const promptEvalCount = reservePromptEvalCount(context, row.promptIdx);
|
|
6761
|
+
if (context.testSuite.extensions?.length) try {
|
|
6762
|
+
const afterEachOut = await runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6763
|
+
test: evalStep.test,
|
|
6764
|
+
result: {
|
|
6765
|
+
...row,
|
|
6766
|
+
namedScores: { ...row.namedScores },
|
|
6767
|
+
metadata: { ...row.metadata },
|
|
6768
|
+
response: row.response ? {
|
|
6769
|
+
...row.response,
|
|
6770
|
+
metadata: { ...row.response.metadata }
|
|
6771
|
+
} : row.response
|
|
6772
|
+
}
|
|
6773
|
+
});
|
|
6774
|
+
row.namedScores = filterFiniteScores(afterEachOut.result.namedScores);
|
|
6775
|
+
row.metadata = afterEachOut.result.metadata;
|
|
6776
|
+
if (row.response && afterEachOut.result.response) row.response.metadata = afterEachOut.result.response.metadata;
|
|
6777
|
+
} catch (error) {
|
|
6778
|
+
logger.error(`afterEach extension hook failed, persisting row without hook modifications`, { error });
|
|
6779
|
+
}
|
|
6665
6780
|
await this.persistEvalRow(row);
|
|
6666
6781
|
if (this.abortIfTargetUnavailable(row, context)) break;
|
|
6667
6782
|
const metrics = context.prompts[row.promptIdx].metrics;
|
|
@@ -6673,10 +6788,6 @@ var Evaluator = class {
|
|
|
6673
6788
|
promptEvalCount,
|
|
6674
6789
|
row
|
|
6675
6790
|
});
|
|
6676
|
-
await runExtensionHook(context.testSuite.extensions, "afterEach", {
|
|
6677
|
-
test: evalStep.test,
|
|
6678
|
-
result: row
|
|
6679
|
-
});
|
|
6680
6791
|
context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics);
|
|
6681
6792
|
}
|
|
6682
6793
|
}
|
|
@@ -6831,7 +6942,15 @@ var Evaluator = class {
|
|
|
6831
6942
|
})) break;
|
|
6832
6943
|
}
|
|
6833
6944
|
} catch (error) {
|
|
6834
|
-
|
|
6945
|
+
const pendingRowCount = groupedRows.reduce((sum, entry) => sum + entry.rows.length, 0);
|
|
6946
|
+
try {
|
|
6947
|
+
await flushGroupedRows();
|
|
6948
|
+
} catch (flushError) {
|
|
6949
|
+
logger.warn("Failed to flush grouped rows after error; target outputs may be lost", {
|
|
6950
|
+
error: flushError instanceof Error ? flushError.message : String(flushError),
|
|
6951
|
+
pendingRowCount
|
|
6952
|
+
});
|
|
6953
|
+
}
|
|
6835
6954
|
throw error;
|
|
6836
6955
|
}
|
|
6837
6956
|
await flushGroupedRows();
|
|
@@ -7267,6 +7386,13 @@ var Evaluator = class {
|
|
|
7267
7386
|
if (!this.options.silent) {
|
|
7268
7387
|
if (serialRunEvalOptions.length > 0) logger.info(`Running ${serialRunEvalOptions.length} test cases serially...`);
|
|
7269
7388
|
if (concurrentRunEvalOptions.length > 0) logger.info(`Running ${concurrentRunEvalOptions.length} test cases (up to ${concurrency} at a time)...`);
|
|
7389
|
+
logGroupedGradingStatus({
|
|
7390
|
+
concurrency,
|
|
7391
|
+
hasEvalStepTimeout,
|
|
7392
|
+
runEvalOptions,
|
|
7393
|
+
shouldGroupGradingByProvider,
|
|
7394
|
+
usesConversationVar
|
|
7395
|
+
});
|
|
7270
7396
|
}
|
|
7271
7397
|
if (this.options.showProgressBar && progressBarManager) {
|
|
7272
7398
|
await progressBarManager.initialize(runEvalOptions, concurrency, 0);
|
|
@@ -7369,4 +7495,4 @@ function evaluate(testSuite, evalRecord, options) {
|
|
|
7369
7495
|
//#endregion
|
|
7370
7496
|
export { isAllowedPrompt as a, assertions_default as c, generateVarCombinations as i, readAssertions as l, evaluate as n, accumulateNamedMetric as o, formatVarsForDisplay as r, doesPromptRefMatch as s, ProgressBarManager as t, runAssertions as u };
|
|
7371
7497
|
|
|
7372
|
-
//# sourceMappingURL=evaluator-
|
|
7498
|
+
//# sourceMappingURL=evaluator-n_dEb00o.js.map
|