promptfoo 0.121.4 → 0.121.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. package/dist/src/{ListApp-DQkFNqE9.js → ListApp-BRUsT43Y.js} +1 -1
  2. package/dist/src/{accounts-Dy17bs4D.cjs → accounts-BIFntVWB.cjs} +4 -4
  3. package/dist/src/{accounts-F9d_5sMC.js → accounts-CLJHCDDb.js} +6 -6
  4. package/dist/src/{accounts-DhMYUUbu.js → accounts-CaLNYnf7.js} +4 -4
  5. package/dist/src/{accounts-DdJ2pHMI.js → accounts-bnyHT7Ju.js} +5 -5
  6. package/dist/src/{agentic-utils-w68v6_Dz.js → agentic-utils-B5krlibj.js} +3 -3
  7. package/dist/src/{agentic-utils-P172hM8B.js → agentic-utils-Ba67xmgs.js} +2 -2
  8. package/dist/src/{agentic-utils-qFlm6zes.js → agentic-utils-BclbiXiq.js} +3 -3
  9. package/dist/src/{agentic-utils-BpX5b23w.cjs → agentic-utils-D2x0wGhB.cjs} +2 -2
  10. package/dist/src/{agents-CgaMXvLM.js → agents-BGqaTDnr.js} +5 -5
  11. package/dist/src/{agents-8FDnTriG.js → agents-BV9yFpXX.js} +5 -5
  12. package/dist/src/{agents-aYPQLf8W.js → agents-BYdMl1UE.js} +4 -4
  13. package/dist/src/{agents-pQeBEXMm.js → agents-DhxWMCtH.js} +5 -5
  14. package/dist/src/{agents-D7-HGxUj.cjs → agents-DiWmQYH9.cjs} +4 -4
  15. package/dist/src/{agents-BahDpe5G.cjs → agents-WULPVjbH.cjs} +4 -4
  16. package/dist/src/{agents-DJ35I3Nt.js → agents-emVcx3yh.js} +5 -5
  17. package/dist/src/{agents-C-R_jfzI.js → agents-n6vPqV3i.js} +4 -4
  18. package/dist/src/{aimlapi-BCq3MHeL.js → aimlapi-BxqK9HF_.js} +7 -7
  19. package/dist/src/{aimlapi-qcK4OT55.cjs → aimlapi-BzLjZI_m.cjs} +6 -6
  20. package/dist/src/{aimlapi-BD6J9oKt.js → aimlapi-DR4pgeiC.js} +6 -6
  21. package/dist/src/{aimlapi-sgYnkE54.js → aimlapi-uPGp0Zdo.js} +7 -7
  22. package/dist/src/app/app/tsconfig.app.tsbuildinfo +1 -1
  23. package/dist/src/app/assets/Report-vjzrbgce.js +1 -0
  24. package/dist/src/app/assets/index-B3NQ8HTd.js +385 -0
  25. package/dist/src/app/assets/{index-BXGkeMwh.css → index-Cli2yAXv.css} +1 -1
  26. package/dist/src/app/index.html +27 -2
  27. package/dist/src/{audio-DcVKoInv.js → audio-BvpTOArF.js} +4 -4
  28. package/dist/src/{audio-BQtNuYBj.cjs → audio-C0vDeS0j.cjs} +3 -3
  29. package/dist/src/{audio-B7izf48x.js → audio-CScmnmEB.js} +4 -4
  30. package/dist/src/{audio-COrn8rM6.js → audio-Da8U9IS5.js} +3 -3
  31. package/dist/src/{base-fZ9wgg50.js → base-BOMaNEes.js} +3 -3
  32. package/dist/src/{base-PYJvBE1i.js → base-BTux96b1.js} +2 -2
  33. package/dist/src/{base-D-670DX8.cjs → base-Tw6uhH8K.cjs} +2 -2
  34. package/dist/src/{base-yrI1Yal4.js → base-dYsl2hmL.js} +3 -3
  35. package/dist/src/{blobs-D2FAd1Q5.cjs → blobs-B95F_7vE.cjs} +2 -2
  36. package/dist/src/{blobs-C-F78Kfn.js → blobs-BW4U31ue.js} +2 -2
  37. package/dist/src/{blobs-BCZavS8s.js → blobs-D_gg8nbm.js} +3 -3
  38. package/dist/src/{blobs-BQWqnnvL.js → blobs-DjLby-uP.js} +3 -3
  39. package/dist/src/{cache-mb7c8hbp.js → cache-BI5BY7ey.js} +4 -4
  40. package/dist/src/{cache-DbLsVWB2.cjs → cache-BRkhlH3k.cjs} +1 -1
  41. package/dist/src/cache-BlC6aeJ0.js +3 -0
  42. package/dist/src/{cache-D5NZmMiT.js → cache-Bzttsk0X.js} +2 -2
  43. package/dist/src/{cache-C4Xb-hNb.js → cache-Cr-qWIbP.js} +3 -3
  44. package/dist/src/{cache-BIyPcp5v.cjs → cache-DGg-yTZG.cjs} +2 -2
  45. package/dist/src/{chat-Dr3DUQ0D.js → chat-BLOdH60v.js} +12 -12
  46. package/dist/src/{chat-BfPaS15_.js → chat-Cx_LkwvZ.js} +12 -12
  47. package/dist/src/{chat-mW0ORo8G.js → chat-D9nudO9b.js} +4 -4
  48. package/dist/src/{chat-I9izLm49.js → chat-DChSH_Es.js} +12 -12
  49. package/dist/src/{chat-MKxMnZJZ.js → chat-DG2LkwLq.js} +2 -2
  50. package/dist/src/{chat-BPXSW8Bv.cjs → chat-DH97tVV9.cjs} +2 -2
  51. package/dist/src/{chat-0bwXjVP0.js → chat-aMQZw6R7.js} +4 -4
  52. package/dist/src/{chat-CclRbxGf.cjs → chat-vYqqv1gP.cjs} +11 -11
  53. package/dist/src/{chatkit-zUIVoDos.js → chatkit-B8X34dQc.js} +4 -4
  54. package/dist/src/{chatkit-Cv6AhukM.js → chatkit-BXu42Qwt.js} +3 -3
  55. package/dist/src/{chatkit-CJnHRRMM.js → chatkit-CbMRoeYw.js} +4 -4
  56. package/dist/src/{chatkit-BoWoSgXl.cjs → chatkit-D44VyUyB.cjs} +3 -3
  57. package/dist/src/{claude-agent-sdk-CPJo3dBQ.cjs → claude-agent-sdk-BRq0bbIK.cjs} +8 -8
  58. package/dist/src/{claude-agent-sdk-BQNuLaAK.js → claude-agent-sdk-BjriSVRZ.js} +7 -7
  59. package/dist/src/{claude-agent-sdk-Dtq_L-Sc.js → claude-agent-sdk-BzNZeZ0N.js} +7 -7
  60. package/dist/src/{claude-agent-sdk-nfAIcxNf.js → claude-agent-sdk-DYv_AJ8u.js} +7 -7
  61. package/dist/src/cloud-CoD5OacT.js +3 -0
  62. package/dist/src/{cloud-DQZ5sVjW.js → cloud-Da0bofJd.js} +3 -3
  63. package/dist/src/{cloudflare-ai-BIB567w6.js → cloudflare-ai-CXC4b1EU.js} +4 -4
  64. package/dist/src/{cloudflare-ai-DlKr0rY7.js → cloudflare-ai-CyBoIs1Q.js} +6 -6
  65. package/dist/src/{cloudflare-ai-DGLte7Py.js → cloudflare-ai-DGOwgexC.js} +6 -6
  66. package/dist/src/{cloudflare-ai-Dl3N9OVD.cjs → cloudflare-ai-DJv5qnyb.cjs} +4 -4
  67. package/dist/src/{cloudflare-gateway-BDZrYydE.js → cloudflare-gateway-1sAoOyft.js} +5 -5
  68. package/dist/src/{cloudflare-gateway-CiIZHU0Q.js → cloudflare-gateway-D-dnkzCF.js} +5 -5
  69. package/dist/src/{cloudflare-gateway-BYDp495F.cjs → cloudflare-gateway-DKVjkDav.cjs} +3 -3
  70. package/dist/src/{cloudflare-gateway-DI1HNP5F.js → cloudflare-gateway-TJkVrZlB.js} +3 -3
  71. package/dist/src/codex-app-server-CCLjqCh9.js +1915 -0
  72. package/dist/src/codex-app-server-CCe0TiDc.js +1915 -0
  73. package/dist/src/codex-app-server-CPW1LFwh.js +1916 -0
  74. package/dist/src/codex-app-server-VMRnjZ68.cjs +1920 -0
  75. package/dist/src/codex-sdk-1jm_qPHf.js +3 -0
  76. package/dist/src/{codex-sdk-C2_M2pl_.cjs → codex-sdk-Bd8UbO9q.cjs} +5 -5
  77. package/dist/src/{codex-sdk-CpqiOqDO.js → codex-sdk-BgEFQ70r.js} +6 -6
  78. package/dist/src/{codex-sdk-Rtky3M4I.js → codex-sdk-Bzb_TqX9.js} +6 -6
  79. package/dist/src/{codex-sdk-CWEnH70W.cjs → codex-sdk-Danroptg.cjs} +1 -1
  80. package/dist/src/{codex-sdk-CErXn7qh.js → codex-sdk-DfvDTN33.js} +5 -5
  81. package/dist/src/{cometapi-CtJ-mS8R.js → cometapi-B5ImDlSm.js} +8 -8
  82. package/dist/src/{cometapi-UVOryo4W.cjs → cometapi-BgAkuYCw.cjs} +7 -7
  83. package/dist/src/{cometapi-BUlt_ELa.js → cometapi-CC7hWxmX.js} +8 -8
  84. package/dist/src/{cometapi-DT-jlVCB.js → cometapi-CCbpHkuF.js} +7 -7
  85. package/dist/src/{completion-x0a_c2y1.js → completion-2iuYVxwi.js} +6 -6
  86. package/dist/src/{completion-Dnxn7E-j.js → completion-CrD6MQ93.js} +5 -5
  87. package/dist/src/{completion-BozdoXba.cjs → completion-DtQ72Bm3.cjs} +5 -5
  88. package/dist/src/{completion-HUe8wDhZ.js → completion-Vq_ad618.js} +6 -6
  89. package/dist/src/{createHash-ChI45QR1.js → createHash-DPpsZgFF.js} +1 -1
  90. package/dist/src/{createHash-CwDVU5xr.js → createHash-Un4Q_huE.js} +1 -1
  91. package/dist/src/{createHash-B7KvgoOD.cjs → createHash-VvBIc-AW.cjs} +1 -1
  92. package/dist/src/{docker-DCgsveLD.js → docker--3qzPa-6.js} +6 -6
  93. package/dist/src/{docker-DS4_Osau.cjs → docker-D3AY-5F5.cjs} +5 -5
  94. package/dist/src/{docker-CQmlA2NU.js → docker-DCsCDvwM.js} +6 -6
  95. package/dist/src/{docker-ClnmCf1Z.js → docker-Dorv4_Dg.js} +5 -5
  96. package/dist/src/{embedding-I45KG3o7.cjs → embedding-BXhN5lCH.cjs} +5 -5
  97. package/dist/src/{embedding-nFbumxcv.js → embedding-ChS1ivFS.js} +5 -5
  98. package/dist/src/{embedding-D3xTseo7.js → embedding-DNRvZwRN.js} +6 -6
  99. package/dist/src/{embedding-DD9wa3ae.js → embedding-D_bI4NDq.js} +6 -6
  100. package/dist/src/{errors-Cw810C93.js → errors-DFHe4L-n.js} +1 -1
  101. package/dist/src/{esm-Dh4dOLlt.js → esm-B6whoAcf.js} +2 -2
  102. package/dist/src/{esm-C7PnfdF8.js → esm-BRkfNsYs.js} +1 -1
  103. package/dist/src/{esm-tVgYPY-f.js → esm-BX8fwlAO.js} +2 -2
  104. package/dist/src/{esm-CtEPLdAj.cjs → esm-B_rGuPTo.cjs} +1 -1
  105. package/dist/src/{eval-CzJFfFO9.js → eval-BQPLBJbw.js} +1 -1
  106. package/dist/src/{eval-u4UVafl6.js → eval-DJ_4A-tr.js} +14 -14
  107. package/dist/src/evalResult-BBJAHAtw.cjs +2 -0
  108. package/dist/src/evalResult-BBK58h2B.js +3 -0
  109. package/dist/src/{evalResult-KZqXl4XP.cjs → evalResult-Cx-8OWkb.cjs} +28 -10
  110. package/dist/src/{evalResult-D3hVYFis.js → evalResult-D6P5I5il.js} +29 -11
  111. package/dist/src/{evalResult-Bgm9ZH31.js → evalResult-pSvGWFMo.js} +29 -11
  112. package/dist/src/{evaluator-IvuDYSvQ.js → evaluator-D-UIbbYq.js} +845 -98
  113. package/dist/src/evaluator-DgLKaZk8.js +3 -0
  114. package/dist/src/{extractor-Dk6bRWkv.js → extractor-BM3jRERL.js} +5 -5
  115. package/dist/src/{extractor-WVPOrH43.cjs → extractor-Dxr2J_wK.cjs} +5 -5
  116. package/dist/src/{extractor-DNSeBVOJ.js → extractor-DxyiFhPk.js} +6 -6
  117. package/dist/src/{extractor-CAfTSraf.js → extractor-YlZbUMsL.js} +6 -6
  118. package/dist/src/fetch-8viavNv8.js +3 -0
  119. package/dist/src/{fetch-BEWnXrrG.js → fetch-B6ch2nU2.js} +9 -20
  120. package/dist/src/{fetch-Di00EQrc.js → fetch-D9xxyC1p.js} +221 -232
  121. package/dist/src/{fetch-CJU5ELPa.cjs → fetch-NuqXW1Xb.cjs} +221 -244
  122. package/dist/src/{fetch-B0Z3Oe4k.js → fetch-Y5qX_kST.js} +8 -19
  123. package/dist/src/{fileExtensions-BArZuxsI.js → fileExtensions-8CjoL7vB.js} +1 -1
  124. package/dist/src/{fileExtensions-DnqA1y9x.js → fileExtensions-BGh-W-HT.js} +1 -1
  125. package/dist/src/{fileExtensions-bYh77CN8.cjs → fileExtensions-D9h-8Wxg.cjs} +1 -1
  126. package/dist/src/{fileExtensions-AWa2ZML4.js → fileExtensions-DysCsxNG.js} +1 -1
  127. package/dist/src/{formatDuration-DZzPsexs.js → formatDuration-Ch4A7G3o.js} +1 -1
  128. package/dist/src/{genaiTracer-yRuxj9-L.cjs → genaiTracer-BokHC-MW.cjs} +1 -1
  129. package/dist/src/{genaiTracer-DWdZ28hY.js → genaiTracer-C3ZPQU60.js} +1 -1
  130. package/dist/src/{genaiTracer-XnrcgDCe.js → genaiTracer-CFny3gOy.js} +1 -1
  131. package/dist/src/{genaiTracer-COYDi-tC.js → genaiTracer-DxODqT9e.js} +1 -1
  132. package/dist/src/{graders-Zy3x0zqX.js → graders-BoUqsCEm.js} +1303 -2044
  133. package/dist/src/{graders--zknU_uk.cjs → graders-Bw1wk_21.cjs} +1553 -2240
  134. package/dist/src/graders-C84JI-m5.js +2 -0
  135. package/dist/src/graders-CBbd0K0Q.cjs +2 -0
  136. package/dist/src/graders-CbQqpHSN.js +3 -0
  137. package/dist/src/{graders-eIHhRqoC.js → graders-CgPn32yp.js} +1300 -2041
  138. package/dist/src/{graders-pvbReLLn.js → graders-CwrbifOo.js} +747 -1488
  139. package/dist/src/graders-DS42d3ZG.js +2 -0
  140. package/dist/src/{image-9302QVqR.js → image-BeWaInPF.js} +3 -3
  141. package/dist/src/{image-DVz2RiMF.js → image-BmilRNqO.js} +7 -7
  142. package/dist/src/{image-x6KqLQl4.cjs → image-CxJoa3aW.cjs} +6 -6
  143. package/dist/src/{image-De2FBmYV.cjs → image-D10dNAav.cjs} +3 -3
  144. package/dist/src/{image-dnoUgPrC.js → image-Dr_3I3nK.js} +4 -4
  145. package/dist/src/{image-B5Mv-Z3h.js → image-DsGRlkh7.js} +7 -7
  146. package/dist/src/{image-qUpPvmNZ.js → image-a_SGUobh.js} +6 -6
  147. package/dist/src/{image-u7-rKnYU.js → image-qjO6FWPs.js} +4 -4
  148. package/dist/src/index.cjs +1052 -296
  149. package/dist/src/index.d.cts +124 -13
  150. package/dist/src/index.d.ts +125 -14
  151. package/dist/src/index.js +1018 -262
  152. package/dist/src/{interactiveCheck-CLERUB0c.js → interactiveCheck-CCICw2cy.js} +2 -2
  153. package/dist/src/{invariant-BtWWVVhl.js → invariant-B2Rf6avk.js} +1 -1
  154. package/dist/src/{invariant-vgHWClmd.js → invariant-DIYf9sP1.js} +1 -1
  155. package/dist/src/{knowledgeBase-RhFPGWDc.js → knowledgeBase-BBETc5-S.js} +6 -6
  156. package/dist/src/{knowledgeBase-Bpoe_nLu.cjs → knowledgeBase-C8qOo26M.cjs} +5 -5
  157. package/dist/src/{knowledgeBase-lm9RXSAm.js → knowledgeBase-CzAi2rUI.js} +6 -6
  158. package/dist/src/{knowledgeBase-Dgc7CBWF.js → knowledgeBase-Dr3Kib7F.js} +5 -5
  159. package/dist/src/{litellm-C2kqjxqp.js → litellm-BLSiANhk.js} +5 -5
  160. package/dist/src/{litellm-CoyI4IAl.cjs → litellm-CaUmV7Mk.cjs} +4 -4
  161. package/dist/src/{litellm-p37R1dzQ.js → litellm-DQGo_juI.js} +4 -4
  162. package/dist/src/{litellm-DRjpcSa7.js → litellm-DRc4qWfc.js} +5 -5
  163. package/dist/src/{logger-DksKw1Qc.js → logger-BbY6ypFL.js} +2 -2
  164. package/dist/src/{logger-B88EkIn6.js → logger-KD8JjCRJ.js} +2 -2
  165. package/dist/src/{luma-ray-KgTCXrZC.js → luma-ray-B-tNZzqW.js} +6 -6
  166. package/dist/src/{luma-ray-B863CmuZ.js → luma-ray-CtS3OlGq.js} +5 -5
  167. package/dist/src/{luma-ray-BTTLtqQ8.js → luma-ray-PJJgUjOc.js} +6 -6
  168. package/dist/src/{luma-ray-BxVKaW2a.cjs → luma-ray-if-Ml4R9.cjs} +5 -5
  169. package/dist/src/main.js +242 -198
  170. package/dist/src/{messages-zWbkLLHz.js → messages-B9dSjrNf.js} +264 -16
  171. package/dist/src/{messages-811uVVW5.cjs → messages-BnsVHUnm.cjs} +266 -15
  172. package/dist/src/{messages-MYTQ2TWp.js → messages-CI69Lasb.js} +264 -16
  173. package/dist/src/{messages-BTQz42fn.js → messages-CewuNcNS.js} +264 -16
  174. package/dist/src/{meteor-Co1VQ1u5.cjs → meteor-BBGcGeCa.cjs} +1 -1
  175. package/dist/src/{meteor-DuAFv6gF.js → meteor-BKTM-7KS.js} +1 -1
  176. package/dist/src/{meteor-DHdzY1Ss.js → meteor-CeGo0Lu2.js} +2 -2
  177. package/dist/src/{meteor-CU5UAE-H.js → meteor-Wc_aUVvu.js} +2 -2
  178. package/dist/src/{modelslab-wu9yi5GE.js → modelslab-BCLOtfek.js} +7 -7
  179. package/dist/src/{modelslab-Dk1JAtVo.cjs → modelslab-BkapYJhh.cjs} +6 -6
  180. package/dist/src/{modelslab-DIq-6y7x.js → modelslab-D73OnKSx.js} +6 -6
  181. package/dist/src/{modelslab-D0erNWKe.js → modelslab-zpz9JcK0.js} +7 -7
  182. package/dist/src/{nova-reel-CCFRfeRb.js → nova-reel-B8F_TK5w.js} +6 -6
  183. package/dist/src/{nova-reel-DQrm74ng.js → nova-reel-Bx0NFV2f.js} +5 -5
  184. package/dist/src/{nova-reel-gr11WG7f.js → nova-reel-CNGJTLtG.js} +6 -6
  185. package/dist/src/{nova-reel-CrLXVKQf.cjs → nova-reel-DkT7tnoB.cjs} +5 -5
  186. package/dist/src/{nova-sonic-BYdp-QLs.js → nova-sonic-BaXRN1cr.js} +4 -4
  187. package/dist/src/{nova-sonic-TDgrlTk7.js → nova-sonic-BeTRaFOh.js} +4 -4
  188. package/dist/src/{nova-sonic-B_ZXcUJB.js → nova-sonic-CL7Zqv0G.js} +3 -3
  189. package/dist/src/{nova-sonic-i5tUvXKn.cjs → nova-sonic-YT426juD.cjs} +3 -3
  190. package/dist/src/{openai-DhVEmgeZ.js → openai-BMHD2Huo.js} +2 -2
  191. package/dist/src/{openai-Qsvz25mV.js → openai-BT-JvDse.js} +2 -2
  192. package/dist/src/{openai-URNyItar.cjs → openai-Cy1XLs0c.cjs} +1 -1
  193. package/dist/src/{openai-iYtrXzOX.js → openai-D4fxGvRx.js} +1 -1
  194. package/dist/src/{openclaw-CwzlQSQX.js → openclaw-Bq7RVR3k.js} +7 -6
  195. package/dist/src/{openclaw-CLWrW03k.js → openclaw-DA8U4DsD.js} +8 -7
  196. package/dist/src/{openclaw-CnQ363Wi.js → openclaw-DObVgpjC.js} +8 -7
  197. package/dist/src/{openclaw-wX9rtfke.cjs → openclaw-DUBZP3GL.cjs} +8 -7
  198. package/dist/src/{opencode-sdk-BUu5Nevv.js → opencode-sdk-BB40Wir1.js} +4 -4
  199. package/dist/src/{opencode-sdk-GI2KaAXq.js → opencode-sdk-BM1UAIv1.js} +3 -3
  200. package/dist/src/{opencode-sdk-BZ2idgYA.cjs → opencode-sdk-CeqiOcOU.cjs} +4 -4
  201. package/dist/src/{opencode-sdk-BxD8vXp_.js → opencode-sdk-ChdK7F7z.js} +4 -4
  202. package/dist/src/{otlpReceiver-DmVulbhC.js → otlpReceiver-C6thJRXi.js} +4 -4
  203. package/dist/src/{otlpReceiver-B2z58l4e.js → otlpReceiver-CcdIikOu.js} +3 -3
  204. package/dist/src/{otlpReceiver-BfcVq2Nq.cjs → otlpReceiver-DNSQj6bf.cjs} +3 -3
  205. package/dist/src/{otlpReceiver-BntK801g.js → otlpReceiver-UYMQx3sy.js} +4 -4
  206. package/dist/src/{providerRegistry-CPQ_CmVO.js → providerRegistry-1gB5vtzQ.js} +2 -2
  207. package/dist/src/{providerRegistry-CQMdTmHP.cjs → providerRegistry-BESeALrr.cjs} +1 -1
  208. package/dist/src/{providerRegistry-Bvh8mv85.js → providerRegistry-DoACwqhD.js} +1 -1
  209. package/dist/src/{providerRegistry-CWoPjKFZ.js → providerRegistry-PMsleEzs.js} +2 -2
  210. package/dist/src/{providers-Bp4S-FvO.js → providers-BuyzKt7C.js} +1 -1
  211. package/dist/src/{providers-DV3ax9e_.cjs → providers-C7lNVBjX.cjs} +1 -1
  212. package/dist/src/{providers-u9Enmfok.js → providers-CCE2COJi2.js} +1 -1
  213. package/dist/src/{providers-DruaQfwu.js → providers-CJh7iriU.js} +18103 -17952
  214. package/dist/src/{providers-iUt5fbAN.js → providers-Ctcc592x.js} +1 -1
  215. package/dist/src/{providers-Domz_llv.js → providers-DRrerKra.js} +432 -281
  216. package/dist/src/{providers-BV_KMZje.js → providers-DT-GtF2t.js} +19094 -18943
  217. package/dist/src/{providers-1eKkXBKp.cjs → providers-eDShy16E.cjs} +17946 -17795
  218. package/dist/src/{pythonUtils-Cldx7huE.js → pythonUtils-C4tltmIn.js} +3 -3
  219. package/dist/src/{pythonUtils-tAJvvpS-.cjs → pythonUtils-CoLaCwNY.cjs} +3 -3
  220. package/dist/src/{pythonUtils-C2UQ30Rz.js → pythonUtils-DMO68Jg7.js} +3 -3
  221. package/dist/src/{pythonUtils-CnndUbW-.js → pythonUtils-DNqbnRdx.js} +3 -3
  222. package/dist/src/{quiverai-DR0SnIQV.js → quiverai-BSS9a7wV.js} +3 -3
  223. package/dist/src/{quiverai-CtWi6x_g.js → quiverai-Bk1KrvL6.js} +4 -4
  224. package/dist/src/{quiverai-DFotyafY.cjs → quiverai-Bpx6MZ7T.cjs} +3 -3
  225. package/dist/src/{quiverai-aPPvXOgn.js → quiverai-CPKhWgaT.js} +4 -4
  226. package/dist/src/{render-DHIZ6_k8.js → render-7uNJ2V14.js} +2 -2
  227. package/dist/src/{render-CH-62LbA.js → render-DlscvAUJ.js} +1 -1
  228. package/dist/src/{render-CMEpfLaO.js → render-eui5p5mL.js} +2 -2
  229. package/dist/src/{render-CgVDrJmM.js → render-nj-UaPdn.js} +2 -2
  230. package/dist/src/{render-DfQSFxGE.cjs → render-tG6ir9_g.cjs} +1 -1
  231. package/dist/src/{responses--OsX2aYW.js → responses-1ztiVYsx.js} +49 -15
  232. package/dist/src/{responses-DL9m8CyY.js → responses-B8haB-mD.js} +49 -15
  233. package/dist/src/{responses-C-flexAY.js → responses-BiaBguAu.js} +49 -15
  234. package/dist/src/{responses-Bi9vBuW_.cjs → responses-CF-ayauu.cjs} +48 -14
  235. package/dist/src/rubyUtils-4hjGxvju.js +3 -0
  236. package/dist/src/{rubyUtils-DVLeA2jg.js → rubyUtils-BI0p46eZ.js} +3 -3
  237. package/dist/src/{rubyUtils-DsGrTx8R.js → rubyUtils-CIQFnVz4.js} +3 -3
  238. package/dist/src/rubyUtils-CO-tuszQ.cjs +2 -0
  239. package/dist/src/{rubyUtils-CYSQEG4a.js → rubyUtils-DGnoCYL2.js} +3 -3
  240. package/dist/src/{rubyUtils-B6eljPuh.cjs → rubyUtils-DoifqkiA.cjs} +4 -3
  241. package/dist/src/{sagemaker-BveBvuxm.js → sagemaker-BDLeW29y.js} +12 -12
  242. package/dist/src/{sagemaker-D67yzMzs.js → sagemaker-C5T60MKf.js} +13 -13
  243. package/dist/src/{sagemaker-BVkaG2-l.js → sagemaker-ClS_NB07.js} +13 -13
  244. package/dist/src/{sagemaker-XnfhheQv.cjs → sagemaker-ljtY12VM.cjs} +12 -12
  245. package/dist/src/{scanner-1DqWi1Ej.js → scanner-nOCWNIXa.js} +7 -7
  246. package/dist/src/server/index.js +1067 -265
  247. package/dist/src/{server-Dx2TyCH2.cjs → server-BEECpeGG.cjs} +5 -5
  248. package/dist/src/{server-BNYztJkh.js → server-ByiF3qlg.js} +9 -8
  249. package/dist/src/{server-BSB45Nt9.js → server-ByxbqAcQ.js} +8 -7
  250. package/dist/src/{server-DaA2eR26.cjs → server-C0XKRNB_.cjs} +1 -1
  251. package/dist/src/server-C_15p79-.js +3 -0
  252. package/dist/src/{server-D6Il2Sob.js → server-gyd6d4Hc.js} +5 -5
  253. package/dist/src/{signal-CE5G3a7x.js → signal-DTtUuU3l.js} +3 -3
  254. package/dist/src/{slack-acRb0IqQ.js → slack-4zZX1OKP.js} +1 -1
  255. package/dist/src/{slack-1Rhq0EoV.cjs → slack-BLlsDpfG.cjs} +1 -1
  256. package/dist/src/{slack-D5Wpy8LM.js → slack-BPYLQLgb.js} +2 -2
  257. package/dist/src/{slack-DDUe-5MC.js → slack-Bamy_7te.js} +2 -2
  258. package/dist/src/{store-DAAyxcy6.cjs → store-2K0kDi80.cjs} +2 -2
  259. package/dist/src/{store-Dn9HUkdW.js → store-2OXm_eBY.js} +3 -3
  260. package/dist/src/store-BELqNwvz.js +3 -0
  261. package/dist/src/{store-M0b1WfYb.js → store-BPkzEyFM.js} +2 -2
  262. package/dist/src/{store-CYEy5J2D.js → store-CPh25336.js} +3 -3
  263. package/dist/src/store-uQZ4AjPe.cjs +2 -0
  264. package/dist/src/{tables-CsWou1Bx.js → tables-BMSOS2Gg.js} +3 -3
  265. package/dist/src/{tables-DUfh1F7Z.cjs → tables-CXbaZ9y1.cjs} +2 -2
  266. package/dist/src/{tables-C4CH3zRr.js → tables-NlvH23ky.js} +3 -3
  267. package/dist/src/{tables-DQ4WU5tX.js → tables-WgdUZ8Ck.js} +2 -2
  268. package/dist/src/{telemetry-dbaJ0E98.js → telemetry--iqaGyaS.js} +5 -4
  269. package/dist/src/{telemetry-Dsw_faFj.cjs → telemetry-CEQxGnMZ.cjs} +7 -6
  270. package/dist/src/{telemetry-Dvqxv3YC.js → telemetry-CgdVGV8N.js} +4 -3
  271. package/dist/src/{telemetry-CQPez_Jp.js → telemetry-DWdGHvEf.js} +5 -4
  272. package/dist/src/telemetry-DjNoC_n3.cjs +2 -0
  273. package/dist/src/telemetry-ZdPZc0fm.js +3 -0
  274. package/dist/src/{text-BVi-cLPJ.cjs → text-BiNME7QG.cjs} +1 -1
  275. package/dist/src/{text-KvuD2Iko.js → text-D4lz-Jg_.js} +1 -1
  276. package/dist/src/{text-DHxdyQqT.js → text-DDQP0tuQ.js} +1 -1
  277. package/dist/src/{text-CZr46tp_.js → text-NWvfMfkF.js} +1 -1
  278. package/dist/src/{tokenUsageUtils-CXrvO-wA.js → tokenUsageUtils-2wIvAhB3.js} +1 -1
  279. package/dist/src/{tokenUsageUtils-C-bmyHoE.js → tokenUsageUtils-4c780gFd.js} +1 -1
  280. package/dist/src/tokenUsageUtils-BjVkdk18.js +142 -0
  281. package/dist/src/{tokenUsageUtils-Bb7DkZPz.cjs → tokenUsageUtils-C9odhsbW.cjs} +1 -1
  282. package/dist/src/{transcription-DuWDupG7.js → transcription-84t4ALo2.js} +5 -5
  283. package/dist/src/{transcription-CJspiD2c.js → transcription-Bm2emLmJ.js} +6 -6
  284. package/dist/src/{transcription-BvjmiYB1.cjs → transcription-CZ4LG5hQ.cjs} +5 -5
  285. package/dist/src/{transcription-V2HaAmy2.js → transcription-D7Q0vJsh.js} +6 -6
  286. package/dist/src/{transform-zDhMmzwX.js → transform-B-b6Cq-q.js} +5 -5
  287. package/dist/src/transform-BQt0BeAW.js +3 -0
  288. package/dist/src/{transform-DgKlRr73.cjs → transform-Bq5oqC0s.cjs} +1 -1
  289. package/dist/src/{transform-CUnzlsbn.cjs → transform-C9izGX54.cjs} +4 -4
  290. package/dist/src/{transform-DYX1_Xnh.js → transform-CwbAZ84V.js} +5 -5
  291. package/dist/src/{transform-CTeuTR3S.cjs → transform-Dg4LcO1Y.cjs} +6 -6
  292. package/dist/src/{transform-CG0ehZNG.js → transform-DtooZqYY.js} +6 -6
  293. package/dist/src/{transform-UN5UGu8U.js → transform-DzCF-wqV.js} +5 -5
  294. package/dist/src/{transform-lQrDE1BQ.js → transform-_DpNB4qp.js} +5 -5
  295. package/dist/src/{transform-Bbg6A8Jk.js → transform-eGiUAv86.js} +4 -4
  296. package/dist/src/{transformersAvailability-Cju9mHgR.cjs → transformersAvailability-B22swDxr.cjs} +1 -1
  297. package/dist/src/{transformersAvailability-CcHusyhw.js → transformersAvailability-lvCCvuPT.js} +1 -1
  298. package/dist/src/{transformersAvailability-DLlROWhg.js → transformersAvailability-rJGPccjr.js} +1 -1
  299. package/dist/src/{types-Bgh5SOn6.js → types-BDjGOq4E.js} +4 -2
  300. package/dist/src/{types-Dm9JM6Vb.js → types-BVH9hjgW.js} +4 -2
  301. package/dist/src/{types-CeaeaZdP.cjs → types-CgG2rKiW.cjs} +151 -149
  302. package/dist/src/{types-BGQDAP8i.js → types-DNRZVOue.js} +152 -150
  303. package/dist/src/{util-C8e5uydV.js → util-3pBZZb_H.js} +142 -17
  304. package/dist/src/{util-CN3SrLT4.cjs → util-A5_ZsQUn.cjs} +65 -43
  305. package/dist/src/{util-D3q0WQ-0.js → util-B9CNhyac.js} +66 -44
  306. package/dist/src/{util-DxWpWjhc.js → util-BQOCAHQC.js} +700 -575
  307. package/dist/src/{util-BYvQUPp7.js → util-BVXcTwXu.js} +3 -3
  308. package/dist/src/{util-D9TisOyk.js → util-BlFVL0UF.js} +65 -43
  309. package/dist/src/{util-C9J8ahRn.js → util-C-kmRosx.js} +66 -44
  310. package/dist/src/{util-DvU2Pw8c.js → util-DFPeFkiV.js} +3 -3
  311. package/dist/src/{util-DDs-7g6-.js → util-DN0-b81k.js} +3 -3
  312. package/dist/src/{util-olYL5C6N.cjs → util-Dpmm_dAI.cjs} +3 -3
  313. package/dist/src/{util-oGMLA7vc.js → util-Dub0f_ej.js} +700 -575
  314. package/dist/src/{util-Bxn8emtE.cjs → util-DvpHnLt0.cjs} +718 -570
  315. package/dist/src/{utils-DJfvjyMj.js → utils-BUMN8orw.js} +3 -3
  316. package/dist/src/{utils-B05gLxER.cjs → utils-DkVeShIB.cjs} +2 -2
  317. package/dist/src/{utils-BLJKfv0y.js → utils-kt7lv30R.js} +3 -3
  318. package/dist/src/{utils-hXtCYanr.js → utils-o8S5huU2.js} +2 -2
  319. package/dist/src/version-0frU0UTr.js +16 -0
  320. package/dist/src/version-CbpiUINz.js +17 -0
  321. package/dist/src/version-CbuBKu2U.js +16 -0
  322. package/dist/src/version-D9zu9FWB.cjs +27 -0
  323. package/dist/tsconfig.tsbuildinfo +1 -1
  324. package/package.json +22 -20
  325. package/dist/src/app/assets/Report-CQYFezYu.js +0 -1
  326. package/dist/src/app/assets/index-BzJt18Jz.js +0 -385
  327. package/dist/src/cache-Cr9oLMUa.js +0 -3
  328. package/dist/src/cloud-Hphvo8kr.js +0 -3
  329. package/dist/src/codex-sdk-BAmYE7qy.js +0 -3
  330. package/dist/src/evalResult-D8MT9p0s.js +0 -3
  331. package/dist/src/evalResult-Dvc-iucu.cjs +0 -2
  332. package/dist/src/evaluator-CVessDWe.js +0 -3
  333. package/dist/src/fetch-C7bGKDlQ.js +0 -3
  334. package/dist/src/graders-BOAzQEUe.cjs +0 -2
  335. package/dist/src/graders-D4BTsZdG2.js +0 -3
  336. package/dist/src/graders-DOJK1XpV.js +0 -2
  337. package/dist/src/graders-NAv9LcBn.js +0 -2
  338. package/dist/src/rubyUtils-D1L2d3jb.js +0 -3
  339. package/dist/src/rubyUtils-DUbq4tff.cjs +0 -2
  340. package/dist/src/server-DCtHUqlp.js +0 -3
  341. package/dist/src/store-CWOSz6D_.cjs +0 -2
  342. package/dist/src/store-DCDBhv7B.js +0 -3
  343. package/dist/src/telemetry-C1IqxcdW.js +0 -3
  344. package/dist/src/telemetry-C4ZEa_es.cjs +0 -2
  345. package/dist/src/transform-M6ITAESf.js +0 -3
  346. /package/dist/src/{evalResult-DElBuddX.js → evalResult-spPqh1G_.js} +0 -0
@@ -1,29 +1,30 @@
1
1
  #!/usr/bin/env node
2
- import { A as getMaxEvalTimeMs, D as getEnvInt, N as state, O as getEnvString, S as summarizeEvaluateResultForLogging, T as getEnvBool, _ as extractJsonObjects, c as setLogCallback, j as isCI, k as getEvalTimeoutMs, r as globalLogCallback, s as logger, v as getAjv, x as safeJsonStringify } from "./logger-DksKw1Qc.js";
3
- import { F as VERSION, I as FILE_METADATA_KEY, l as sleep, r as fetchWithRetries, v as isPromptfooSampleTarget, x as parseChatPrompt } from "./fetch-BEWnXrrG.js";
4
- import { t as invariant } from "./invariant-BtWWVVhl.js";
5
- import { r as telemetry } from "./telemetry-CQPez_Jp.js";
6
- import { at as MULTI_INPUT_VAR, d as isGradingResult, p as isApiProvider, s as ResultFailureReason } from "./types-Dm9JM6Vb.js";
7
- import { a as getNunjucksEngine, r as extractVariablesFromTemplate } from "./render-CMEpfLaO.js";
8
- import { t as providerRegistry } from "./providerRegistry-CWoPjKFZ.js";
9
- import { c as promptYesNo } from "./server-BNYztJkh.js";
10
- import { A as renderPrompt, E as isBasicRefusal, F as TokenUsageTracker, I as createRateLimitRegistry, L as createProviderRateLimitOptions, M as isPackagePath, N as loadFromPackage, P as redteamProviderManager, U as throwIfTargetPromptExceedsMaxChars, X as VertexChatProvider, Z as AIStudioChatProvider, j as runExtensionHook, k as collectFileMetadata, u as GoogleLiveProvider, v as checkExfilTracking, w as getSessionId } from "./providers-BV_KMZje.js";
11
- import { n as isNonTransientHttpStatus } from "./errors-Cw810C93.js";
12
- import { c as withCacheNamespace, o as getCache } from "./cache-mb7c8hbp.js";
13
- import { i as isJavascriptFile } from "./fileExtensions-AWa2ZML4.js";
14
- import { M as isGoogleProvider, N as isOpenAiProvider, P as isProviderAllowed, b as loadFunction, g as maybeLoadToolsFromExternalFile, j as isAnthropicProvider, x as parseFileUrl } from "./util-DxWpWjhc.js";
15
- import { r as runPython } from "./pythonUtils-CnndUbW-.js";
16
- import { n as transform, r as getProcessShim, t as TransformInputType } from "./transform-UN5UGu8U.js";
17
- import { $ as matchesGEval, G as loadRubricPrompt, H as callProviderWithContext, J as matchesClosedQa, K as matchesAnswerRelevance, Q as matchesFactuality, U as fail, W as getAndCheckProvider, X as matchesContextRecall, Y as matchesContextFaithfulness, Z as matchesContextRelevance, _t as loadFromJavaScriptFile, at as matchesSimilarity, ct as withProviderCallExecutionContext, et as matchesLlmRubric, gt as getFinalTest, ht as coerceString, it as matchesSelectBest, mt as SUGGEST_PROMPTS_SYSTEM_MESSAGE, n as getGraderById, nt as matchesPiScore, ot as matchesTrajectoryGoalSuccess, q as matchesClassification, rt as matchesSearchRubric, st as selectMaxScore, tt as matchesModeration, ut as getDefaultProviders, vt as processFileReference, yt as resolveContext } from "./graders-eIHhRqoC.js";
18
- import { i as generateIdFromPrompt } from "./utils-BLJKfv0y.js";
19
- import { t as OpenAiChatCompletionProvider } from "./chat-Dr3DUQ0D.js";
20
- import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CXrvO-wA.js";
21
- import { h as validateFunctionCall } from "./transform-zDhMmzwX.js";
22
- import { l as validateFunctionCall$1 } from "./util-C9J8ahRn.js";
23
- import { t as extractAndStoreBinaryData } from "./extractor-DNSeBVOJ.js";
24
- import { n as getTraceStore } from "./store-CYEy5J2D.js";
25
- import { n as runRuby } from "./rubyUtils-DsGrTx8R.js";
26
- import { a as getActualPromptWithFallback, r as updateSignalFile } from "./signal-CE5G3a7x.js";
2
+ import { A as getMaxEvalTimeMs, D as getEnvInt, N as state, O as getEnvString, S as summarizeEvaluateResultForLogging, T as getEnvBool, _ as extractJsonObjects, c as setLogCallback, g as extractFirstJsonObject, j as isCI, k as getEvalTimeoutMs, r as globalLogCallback, s as logger, v as getAjv, x as safeJsonStringify } from "./logger-BbY6ypFL.js";
3
+ import { P as FILE_METADATA_KEY, l as sleep, r as fetchWithRetries, v as isPromptfooSampleTarget, x as parseChatPrompt } from "./fetch-B6ch2nU2.js";
4
+ import { n as VERSION } from "./version-CbpiUINz.js";
5
+ import { t as invariant } from "./invariant-B2Rf6avk.js";
6
+ import { r as telemetry } from "./telemetry-DWdGHvEf.js";
7
+ import { at as MULTI_INPUT_VAR, d as isGradingResult, nt as LLAMA_GUARD_REPLICATE_PROVIDER, p as isApiProvider, s as ResultFailureReason } from "./types-BVH9hjgW.js";
8
+ import { a as getNunjucksEngine, r as extractVariablesFromTemplate } from "./render-eui5p5mL.js";
9
+ import { t as providerRegistry } from "./providerRegistry-PMsleEzs.js";
10
+ import { c as promptYesNo, m as shouldGenerateRemote } from "./server-ByiF3qlg.js";
11
+ import { $ as createRateLimitRegistry, F as checkExfilTracking, J as runExtensionHook, K as collectFileMetadata, Q as TokenUsageTracker, U as isBasicRefusal, V as getSessionId, X as loadFromPackage, Y as isPackagePath, Z as redteamProviderManager, _ as VertexChatProvider, et as createProviderRateLimitOptions, n as loadApiProvider, ot as throwIfTargetPromptExceedsMaxChars, q as renderPrompt, v as GoogleLiveProvider, y as AIStudioChatProvider } from "./providers-DT-GtF2t.js";
12
+ import { n as isNonTransientHttpStatus } from "./errors-DFHe4L-n.js";
13
+ import { c as withCacheNamespace, o as getCache } from "./cache-BI5BY7ey.js";
14
+ import { i as isJavascriptFile } from "./fileExtensions-DysCsxNG.js";
15
+ import { r as runPython } from "./pythonUtils-DNqbnRdx.js";
16
+ import { L as loadFunction, N as maybeLoadToolsFromExternalFile, R as parseFileUrl, S as isProviderAllowed, b as isGoogleProvider, x as isOpenAiProvider, y as isAnthropicProvider } from "./util-BQOCAHQC.js";
17
+ import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, o as normalizeTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-2wIvAhB3.js";
18
+ import { t as extractAndStoreBinaryData } from "./extractor-DxyiFhPk.js";
19
+ import { t as OpenAiChatCompletionProvider } from "./chat-BLOdH60v.js";
20
+ import { h as validateFunctionCall } from "./transform-B-b6Cq-q.js";
21
+ import { l as validateFunctionCall$1 } from "./util-C-kmRosx.js";
22
+ import { n as transform, r as getProcessShim, t as TransformInputType } from "./transform-DzCF-wqV.js";
23
+ import { n as getTraceStore } from "./store-CPh25336.js";
24
+ import { n as runRuby } from "./rubyUtils-CIQFnVz4.js";
25
+ import { $ as SELECT_BEST_PROMPT, Ct as DEFAULT_ANTHROPIC_MODEL, Dt as withProviderCallExecutionContext, Et as getGradingProvider, G as matchesLlmRubric, H as matchesClosedQa, J as doRemoteGrading, K as matchesPiScore, Q as DEFAULT_WEB_SEARCH_PROMPT, St as getDefaultProviders, Tt as getAndCheckProvider, U as matchesFactuality, W as matchesGEval, _t as tryParse, at as CONTEXT_RECALL_ATTRIBUTED_TOKEN, bt as loadFromJavaScriptFile, ct as CONTEXT_RELEVANCE_BAD, dt as cosineSimilarity, et as SUGGEST_PROMPTS_SYSTEM_MESSAGE, ft as dotProduct, gt as splitIntoSentences, ht as normalizeMatcherTokenUsage, it as CONTEXT_RECALL, lt as loadRubricPrompt, mt as fail, n as getGraderById, nt as CONTEXT_FAITHFULNESS_LONGFORM, ot as CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN, pt as euclideanDistance, q as matchesTrajectoryGoalSuccess, rt as CONTEXT_FAITHFULNESS_NLI_STATEMENTS, st as CONTEXT_RELEVANCE, tt as ANSWER_RELEVANCY_GENERATE, ut as renderLlmRubricPrompt, vt as coerceString, wt as callProviderWithContext, xt as processFileReference, yt as getFinalTest } from "./graders-CgPn32yp.js";
26
+ import { i as generateIdFromPrompt } from "./utils-kt7lv30R.js";
27
+ import { a as getActualPromptWithFallback, r as updateSignalFile } from "./signal-DTtUuU3l.js";
27
28
  import { AsyncResource } from "node:async_hooks";
28
29
  import chalk from "chalk";
29
30
  import fs, { createWriteStream } from "fs";
@@ -38,7 +39,7 @@ import { globSync } from "glob";
38
39
  import { XMLParser } from "fast-xml-parser";
39
40
  import async from "async";
40
41
  import cliProgress from "cli-progress";
41
- import { JSDOM } from "jsdom";
42
+ import { parse as parse$1 } from "parse5";
42
43
  import { distance } from "fastest-levenshtein";
43
44
  import * as rouge from "js-rouge";
44
45
  import { isDeepStrictEqual } from "node:util";
@@ -218,6 +219,502 @@ const handleConversationRelevance = async ({ assertion, outputString, prompt, pr
218
219
  };
219
220
  };
220
221
  //#endregion
222
+ //#region src/matchers/classification.ts
223
+ /**
224
+ *
225
+ * @param expected Expected classification. If undefined, matches any classification.
226
+ * @param output Text to classify.
227
+ * @param threshold Value between 0 and 1. If the expected classification is undefined, the threshold is the minimum score for any classification. If the expected classification is defined, the threshold is the minimum score for that classification.
228
+ * @param grading
229
+ * @returns Pass if the output matches the classification with a score greater than or equal to the threshold.
230
+ */
231
+ async function matchesClassification(expected, output, threshold, grading) {
232
+ const resp = await (await getAndCheckProvider("classification", grading?.provider, null, "classification check")).callClassificationApi(output);
233
+ if (!resp.classification) return fail(resp.error || "Unknown error fetching classification");
234
+ let score;
235
+ if (expected === void 0) {
236
+ const scores = Object.values(resp.classification);
237
+ if (scores.length === 0) return {
238
+ pass: false,
239
+ score: 0,
240
+ reason: "No classification scores returned"
241
+ };
242
+ score = Math.max(...scores);
243
+ } else score = resp.classification[expected] || 0;
244
+ if (score >= threshold - Number.EPSILON) {
245
+ const reason = expected === void 0 ? `Maximum classification score ${score.toFixed(2)} >= ${threshold}` : `Classification ${expected} has score ${score.toFixed(2)} >= ${threshold}`;
246
+ return {
247
+ pass: true,
248
+ score,
249
+ reason
250
+ };
251
+ }
252
+ return {
253
+ pass: false,
254
+ score,
255
+ reason: expected === void 0 ? `Maximum classification score ${score.toFixed(2)} < ${threshold}` : `Classification ${expected} has score ${score.toFixed(2)} < ${threshold}`
256
+ };
257
+ }
258
+ //#endregion
259
+ //#region src/matchers/comparison.ts
260
+ async function matchesSelectBest(criteria, outputs, grading, vars, providerCallContext) {
261
+ invariant(outputs.length >= 2, "select-best assertion must have at least two outputs to compare between");
262
+ const resp = await callProviderWithContext(await getAndCheckProvider("text", grading?.provider, (await getDefaultProviders()).gradingProvider, "select-best check"), await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, SELECT_BEST_PROMPT), {
263
+ criteria,
264
+ outputs: outputs.map((o) => tryParse(o)),
265
+ ...vars || {}
266
+ }), "select-best", {
267
+ criteria,
268
+ outputs: outputs.map((o) => tryParse(o)),
269
+ ...vars || {}
270
+ }, providerCallContext);
271
+ if (resp.error || !resp.output) return Array.from({ length: outputs.length }, () => fail(resp.error || "No output", resp.tokenUsage));
272
+ invariant(typeof resp.output === "string", "select-best produced malformed response");
273
+ const firstIntegerMatch = resp.output.trim().match(/\d+/);
274
+ const verdict = firstIntegerMatch ? Number.parseInt(firstIntegerMatch[0], 10) : NaN;
275
+ if (Number.isNaN(verdict) || verdict < 0 || verdict >= outputs.length) return Array.from({ length: outputs.length }, () => fail(`Invalid select-best verdict: ${verdict}`, resp.tokenUsage));
276
+ const tokensUsed = normalizeMatcherTokenUsage(resp.tokenUsage);
277
+ return outputs.map((_output, index) => {
278
+ if (index === verdict) return {
279
+ pass: true,
280
+ score: 1,
281
+ reason: `Output selected as the best: ${criteria}`,
282
+ tokensUsed
283
+ };
284
+ else return {
285
+ pass: false,
286
+ score: 0,
287
+ reason: `Output not selected: ${criteria}`,
288
+ tokensUsed
289
+ };
290
+ });
291
+ }
292
+ async function selectMaxScore(outputs, resultsWithGradingResults, assertion) {
293
+ invariant(outputs.length >= 2, "max-score assertion must have at least two outputs to compare between");
294
+ const value = assertion.value || {};
295
+ const options = {
296
+ method: typeof value === "object" && "method" in value ? value.method : "average",
297
+ weights: typeof value === "object" && "weights" in value ? value.weights : {},
298
+ threshold: typeof value === "object" && "threshold" in value ? value.threshold : void 0
299
+ };
300
+ const scores = resultsWithGradingResults.map((result, index) => {
301
+ const relevantResults = (result.gradingResult?.componentResults || []).filter((r) => r.assertion && r.assertion.type !== "max-score" && r.assertion.type !== "select-best");
302
+ if (relevantResults.length === 0) throw new Error("max-score requires at least one other assertion (besides max-score or select-best) to aggregate scores from");
303
+ let totalWeightedScore = 0;
304
+ let totalWeight = 0;
305
+ relevantResults.forEach((componentResult) => {
306
+ const assertionType = componentResult.assertion?.type || "unknown";
307
+ const weight = options.weights[assertionType] === void 0 ? 1 : options.weights[assertionType];
308
+ const score = componentResult.score || 0;
309
+ totalWeightedScore += score * weight;
310
+ totalWeight += weight;
311
+ });
312
+ let aggregateScore;
313
+ if (options.method === "sum") aggregateScore = totalWeightedScore;
314
+ else aggregateScore = totalWeight > 0 ? totalWeightedScore / totalWeight : 0;
315
+ return {
316
+ index,
317
+ score: aggregateScore,
318
+ componentCount: relevantResults.length,
319
+ totalWeight
320
+ };
321
+ });
322
+ let maxScore = -Infinity;
323
+ let winnerIndex = 0;
324
+ for (let i = 0; i < scores.length; i++) if (scores[i].score > maxScore) {
325
+ maxScore = scores[i].score;
326
+ winnerIndex = i;
327
+ }
328
+ const meetsThreshold = options.threshold === void 0 || maxScore >= options.threshold;
329
+ return scores.map(({ index, score, componentCount, totalWeight }) => {
330
+ const isWinner = index === winnerIndex && meetsThreshold;
331
+ return {
332
+ pass: isWinner,
333
+ score: isWinner ? 1 : 0,
334
+ reason: isWinner ? `Selected as highest scoring output (score: ${score.toFixed(3)})` : score === maxScore && !meetsThreshold ? `Not selected - score ${score.toFixed(3)} below threshold ${options.threshold}` : `Not selected (score: ${score.toFixed(3)}, max: ${maxScore.toFixed(3)})`,
335
+ namedScores: {
336
+ maxScore: score,
337
+ assertionCount: componentCount,
338
+ totalWeight
339
+ }
340
+ };
341
+ });
342
+ }
343
+ //#endregion
344
+ //#region src/matchers/moderation.ts
345
+ async function matchesModeration({ userPrompt, assistantResponse, categories = [] }, grading) {
346
+ if (!assistantResponse) return {
347
+ pass: true,
348
+ score: 1,
349
+ reason: "No output to moderate"
350
+ };
351
+ const defaultProviders = await getDefaultProviders();
352
+ const defaultModerationProvider = !getEnvString("OPENAI_API_KEY") && (getEnvString("REPLICATE_API_KEY") || getEnvString("REPLICATE_API_TOKEN")) ? await loadApiProvider(LLAMA_GUARD_REPLICATE_PROVIDER) : defaultProviders.moderationProvider;
353
+ const moderationProvider = await getAndCheckProvider("moderation", grading?.provider, defaultModerationProvider, "moderation check");
354
+ invariant(moderationProvider, "Moderation provider must be defined");
355
+ const resp = await moderationProvider.callModerationApi(userPrompt, assistantResponse);
356
+ if (resp.error) return {
357
+ pass: false,
358
+ score: 0,
359
+ reason: `Moderation API error: ${resp.error}`
360
+ };
361
+ const { flags } = resp;
362
+ if (!flags || flags.length === 0) return {
363
+ pass: true,
364
+ score: 1,
365
+ reason: "No moderation flags detected"
366
+ };
367
+ const filteredFlags = categories.length === 0 ? flags : flags.filter((flag) => categories.includes(flag.code));
368
+ if (filteredFlags.length > 0) return {
369
+ pass: false,
370
+ score: 0,
371
+ reason: `Moderation flags detected: ${filteredFlags.map((flag) => flag.description).join(", ")}`
372
+ };
373
+ return {
374
+ pass: true,
375
+ score: 1,
376
+ reason: "No relevant moderation flags detected"
377
+ };
378
+ }
379
+ //#endregion
380
+ //#region src/assertions/contextUtils.ts
381
+ /**
382
+ * Resolves the context value for context-based assertions.
383
+ * Supports extracting context from test variables or transforming from output.
384
+ * Can return either a single context string or an array of context chunks.
385
+ *
386
+ * @param assertion - The assertion configuration
387
+ * @param test - The test case
388
+ * @param output - The provider output (after provider transform, before test transform)
389
+ * @param prompt - The prompt text
390
+ * @param fallbackContext - Optional fallback context (e.g., prompt for context-recall)
391
+ * @param providerResponse - Optional full provider response for contextTransform
392
+ * @returns The resolved context string or array of strings
393
+ * @throws Error if context cannot be resolved or transform fails
394
+ */
395
+ async function resolveContext(assertion, test, output, prompt, fallbackContext, providerResponse) {
396
+ let contextValue;
397
+ if (test.vars?.context) {
398
+ if (typeof test.vars.context === "string") contextValue = test.vars.context;
399
+ else if (Array.isArray(test.vars.context)) {
400
+ const invalidEntry = [...test.vars.context.entries()].find(([, v]) => typeof v !== "string");
401
+ if (invalidEntry) {
402
+ const [idx, val] = invalidEntry;
403
+ invariant(false, `Invalid context: expected an array of strings, but found ${typeof val} at index ${idx}`);
404
+ }
405
+ contextValue = test.vars.context;
406
+ }
407
+ } else if (fallbackContext) contextValue = fallbackContext;
408
+ if (assertion.contextTransform) try {
409
+ const outputForTransform = providerResponse?.providerTransformedOutput ?? output;
410
+ const transformed = await transform(assertion.contextTransform, outputForTransform, {
411
+ vars: test.vars,
412
+ prompt: { label: prompt },
413
+ ...providerResponse && providerResponse.metadata && { metadata: providerResponse.metadata }
414
+ });
415
+ invariant(typeof transformed === "string" || Array.isArray(transformed) && transformed.every((item) => typeof item === "string"), `contextTransform must return a string or array of strings. Got ${typeof transformed}. Check your transform expression: ${assertion.contextTransform}`);
416
+ contextValue = transformed;
417
+ } catch (error) {
418
+ throw new Error(`Failed to transform context using expression '${assertion.contextTransform}': ${error instanceof Error ? error.message : String(error)}`);
419
+ }
420
+ invariant(typeof contextValue === "string" && contextValue.length > 0 || Array.isArray(contextValue) && contextValue.length > 0 && contextValue.every((item) => typeof item === "string" && item.length > 0), "Context is required for context-based assertions. Provide either a \"context\" variable (string or array of strings) in your test case or use \"contextTransform\" to extract context from the provider response.");
421
+ return contextValue;
422
+ }
423
+ /**
424
+ * Serializes context (string or string[]) to a single string for prompts.
425
+ * Joins chunks with double newlines to preserve separation.
426
+ */
427
+ function serializeContext(context) {
428
+ return Array.isArray(context) ? context.join("\n\n") : context;
429
+ }
430
+ //#endregion
431
+ //#region src/matchers/rag.ts
432
+ async function matchesAnswerRelevance(input, output, threshold, grading, providerCallContext) {
433
+ const defaults = await getDefaultProviders();
434
+ const embeddingProvider = await getAndCheckProvider("embedding", grading?.provider, defaults.embeddingProvider, "answer relevancy check");
435
+ const textProvider = await getAndCheckProvider("text", grading?.provider, defaults.gradingProvider, "answer relevancy check");
436
+ const tokensUsed = normalizeMatcherTokenUsage(void 0);
437
+ const rubricPrompt = await loadRubricPrompt(grading?.rubricPrompt, ANSWER_RELEVANCY_GENERATE);
438
+ const parsedOutput = tryParse(output);
439
+ const promptText = await renderLlmRubricPrompt(rubricPrompt, { answer: parsedOutput });
440
+ const candidateQuestions = [];
441
+ for (let i = 0; i < 3; i++) {
442
+ const resp = await callProviderWithContext(textProvider, promptText, "answer-relevance", { answer: parsedOutput }, providerCallContext);
443
+ accumulateTokenUsage(tokensUsed, resp.tokenUsage);
444
+ if (resp.error || !resp.output) return fail(resp.error || "No output", tokensUsed);
445
+ invariant(typeof resp.output === "string", "answer relevancy check produced malformed response");
446
+ candidateQuestions.push(resp.output);
447
+ }
448
+ invariant(typeof embeddingProvider.callEmbeddingApi === "function", `Provider ${embeddingProvider.id()} must implement callEmbeddingApi for similarity check`);
449
+ const inputEmbeddingResp = await embeddingProvider.callEmbeddingApi(input);
450
+ accumulateTokenUsage(tokensUsed, inputEmbeddingResp.tokenUsage);
451
+ if (inputEmbeddingResp.error || !inputEmbeddingResp.embedding) return fail(inputEmbeddingResp.error || "No embedding", tokensUsed);
452
+ const inputEmbedding = inputEmbeddingResp.embedding;
453
+ const similarities = [];
454
+ const questionsWithScores = [];
455
+ for (const question of candidateQuestions) {
456
+ const resp = await embeddingProvider.callEmbeddingApi(question);
457
+ accumulateTokenUsage(tokensUsed, resp.tokenUsage);
458
+ if (resp.error || !resp.embedding) return fail(resp.error || "No embedding", tokensUsed);
459
+ const questionSimilarity = cosineSimilarity(inputEmbedding, resp.embedding);
460
+ similarities.push(questionSimilarity);
461
+ questionsWithScores.push({
462
+ question,
463
+ similarity: questionSimilarity
464
+ });
465
+ }
466
+ const similarity = similarities.reduce((a, b) => a + b, 0) / similarities.length;
467
+ const pass = similarity >= threshold - Number.EPSILON;
468
+ const greaterThanReason = `Relevance ${similarity.toFixed(2)} is greater than threshold ${threshold}`;
469
+ const lessThanReason = `Relevance ${similarity.toFixed(2)} is less than threshold ${threshold}`;
470
+ const metadata = {
471
+ generatedQuestions: questionsWithScores,
472
+ averageSimilarity: similarity,
473
+ threshold
474
+ };
475
+ if (pass) return {
476
+ pass: true,
477
+ score: similarity,
478
+ reason: greaterThanReason,
479
+ tokensUsed,
480
+ metadata
481
+ };
482
+ return {
483
+ pass: false,
484
+ score: similarity,
485
+ reason: lessThanReason,
486
+ tokensUsed,
487
+ metadata
488
+ };
489
+ }
490
+ async function matchesContextRecall(context, groundTruth, threshold, grading, vars, providerCallContext) {
491
+ const textProvider = await getAndCheckProvider("text", grading?.provider, (await getDefaultProviders()).gradingProvider, "context recall check");
492
+ const contextString = serializeContext(context);
493
+ const resp = await callProviderWithContext(textProvider, await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, CONTEXT_RECALL), {
494
+ context: contextString,
495
+ groundTruth,
496
+ ...vars || {}
497
+ }), "context-recall", {
498
+ context: contextString,
499
+ groundTruth,
500
+ ...vars || {}
501
+ }, providerCallContext);
502
+ if (resp.error || !resp.output) return fail(resp.error || "No output", resp.tokenUsage);
503
+ invariant(typeof resp.output === "string", "context-recall produced malformed response");
504
+ const attributedTokenLower = CONTEXT_RECALL_ATTRIBUTED_TOKEN.toLowerCase();
505
+ const notAttributedTokenLower = CONTEXT_RECALL_NOT_ATTRIBUTED_TOKEN.toLowerCase();
506
+ const sentences = splitIntoSentences(resp.output).filter((line) => {
507
+ const lowerLine = line.toLowerCase();
508
+ return lowerLine.includes(attributedTokenLower) || lowerLine.includes(notAttributedTokenLower);
509
+ });
510
+ const sentenceAttributions = [];
511
+ let numerator = 0;
512
+ for (const sentence of sentences) {
513
+ const lowerSentence = sentence.toLowerCase();
514
+ const isAttributed = !lowerSentence.includes(notAttributedTokenLower) && lowerSentence.includes(attributedTokenLower);
515
+ if (isAttributed) numerator++;
516
+ const sentenceMatch = sentence.match(/^\d+\.\s*([^\.]+\.)/);
517
+ const cleanSentence = sentenceMatch ? sentenceMatch[1].trim() : sentence.split(".")[0].trim();
518
+ sentenceAttributions.push({
519
+ sentence: cleanSentence,
520
+ attributed: isAttributed
521
+ });
522
+ }
523
+ const score = sentences.length > 0 ? numerator / sentences.length : 0;
524
+ const pass = score >= threshold - Number.EPSILON;
525
+ const metadata = {
526
+ sentenceAttributions,
527
+ totalSentences: sentences.length,
528
+ attributedSentences: numerator,
529
+ score
530
+ };
531
+ return {
532
+ pass,
533
+ score,
534
+ reason: pass ? `Recall ${score.toFixed(2)} is >= ${threshold}` : `Recall ${score.toFixed(2)} is < ${threshold}`,
535
+ tokensUsed: normalizeMatcherTokenUsage(resp.tokenUsage),
536
+ metadata
537
+ };
538
+ }
539
+ async function matchesContextRelevance(question, context, threshold, grading, providerCallContext) {
540
+ const textProvider = await getAndCheckProvider("text", grading?.provider, (await getDefaultProviders()).gradingProvider, "context relevance check");
541
+ const contextString = serializeContext(context);
542
+ const resp = await callProviderWithContext(textProvider, await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, CONTEXT_RELEVANCE), {
543
+ context: contextString,
544
+ query: question
545
+ }), "context-relevance", {
546
+ context: contextString,
547
+ query: question
548
+ }, providerCallContext);
549
+ if (resp.error || !resp.output) return fail(resp.error || "No output", resp.tokenUsage);
550
+ invariant(typeof resp.output === "string", "context-relevance produced malformed response");
551
+ const contextUnits = Array.isArray(context) ? context.filter((chunk) => chunk.trim().length > 0) : splitIntoSentences(context);
552
+ const totalContextUnits = contextUnits.length;
553
+ const extractedSentences = splitIntoSentences(resp.output);
554
+ const relevantSentences = [];
555
+ const insufficientInformation = resp.output.includes(CONTEXT_RELEVANCE_BAD);
556
+ let numerator = 0;
557
+ if (insufficientInformation) numerator = 0;
558
+ else {
559
+ const uniqueRelevantSentences = [...new Set(extractedSentences)];
560
+ numerator = Math.min(uniqueRelevantSentences.length, totalContextUnits);
561
+ relevantSentences.push(...uniqueRelevantSentences);
562
+ }
563
+ const score = totalContextUnits > 0 ? numerator / totalContextUnits : 0;
564
+ const pass = score >= threshold - Number.EPSILON;
565
+ const metadata = {
566
+ extractedSentences: relevantSentences,
567
+ totalContextUnits,
568
+ totalContextSentences: totalContextUnits,
569
+ contextUnits,
570
+ relevantSentenceCount: numerator,
571
+ insufficientInformation,
572
+ score
573
+ };
574
+ return {
575
+ pass,
576
+ score,
577
+ reason: pass ? `Context relevance ${score.toFixed(2)} is >= ${threshold}` : `Context relevance ${score.toFixed(2)} is < ${threshold}`,
578
+ tokensUsed: normalizeMatcherTokenUsage(resp.tokenUsage),
579
+ metadata
580
+ };
581
+ }
582
+ async function matchesContextFaithfulness(query, output, context, threshold, grading, vars, providerCallContext) {
583
+ const textProvider = await getAndCheckProvider("text", grading?.provider, (await getDefaultProviders()).gradingProvider, "faithfulness check");
584
+ const tokensUsed = normalizeMatcherTokenUsage(void 0);
585
+ if (grading?.rubricPrompt) invariant(Array.isArray(grading.rubricPrompt), "rubricPrompt must be an array");
586
+ const rawLongformPrompt = typeof grading?.rubricPrompt?.[0] === "string" ? grading?.rubricPrompt?.[0] : grading?.rubricPrompt?.[0]?.content;
587
+ const rawNliPrompt = typeof grading?.rubricPrompt?.[1] === "string" ? grading?.rubricPrompt?.[1] : grading?.rubricPrompt?.[1]?.content;
588
+ const longformPrompt = await loadRubricPrompt(rawLongformPrompt, CONTEXT_FAITHFULNESS_LONGFORM);
589
+ const nliPrompt = await loadRubricPrompt(rawNliPrompt, CONTEXT_FAITHFULNESS_NLI_STATEMENTS);
590
+ let promptText = await renderLlmRubricPrompt(longformPrompt, {
591
+ question: query,
592
+ answer: tryParse(output),
593
+ ...vars || {}
594
+ });
595
+ let resp = await callProviderWithContext(textProvider, promptText, "context-faithfulness-longform", {
596
+ question: query,
597
+ answer: tryParse(output),
598
+ ...vars || {}
599
+ }, providerCallContext);
600
+ accumulateTokenUsage(tokensUsed, resp.tokenUsage);
601
+ if (resp.error || !resp.output) return fail(resp.error || "No output", tokensUsed);
602
+ invariant(typeof resp.output === "string", "context-faithfulness produced malformed response");
603
+ const contextString = serializeContext(context);
604
+ const statements = splitIntoSentences(resp.output);
605
+ promptText = await renderLlmRubricPrompt(nliPrompt, {
606
+ context: contextString,
607
+ statements,
608
+ ...vars || {}
609
+ });
610
+ resp = await callProviderWithContext(textProvider, promptText, "context-faithfulness-nli", {
611
+ context: contextString,
612
+ statements,
613
+ ...vars || {}
614
+ }, providerCallContext);
615
+ accumulateTokenUsage(tokensUsed, resp.tokenUsage);
616
+ if (resp.error || !resp.output) return fail(resp.error || "No output", tokensUsed);
617
+ invariant(typeof resp.output === "string", "context-faithfulness produced malformed response");
618
+ let finalAnswer = "Final verdict for each statement in order:";
619
+ finalAnswer = finalAnswer.toLowerCase();
620
+ let verdicts = resp.output.toLowerCase().trim();
621
+ let score = 0;
622
+ if (statements.length > 0) if (verdicts.includes(finalAnswer)) {
623
+ verdicts = verdicts.slice(verdicts.indexOf(finalAnswer) + finalAnswer.length);
624
+ const parsedVerdicts = verdicts.split(".").filter((answer) => answer.trim() !== "");
625
+ if (parsedVerdicts.length > 0) score = 1 - parsedVerdicts.filter((answer) => !answer.includes("yes")).length / statements.length;
626
+ } else {
627
+ const noVerdictCount = verdicts.split("verdict: no").length - 1;
628
+ if (noVerdictCount + (verdicts.split("verdict: yes").length - 1) > 0) score = 1 - noVerdictCount / statements.length;
629
+ }
630
+ score = Math.min(1, Math.max(0, score));
631
+ const pass = score >= threshold - Number.EPSILON;
632
+ return {
633
+ pass,
634
+ score,
635
+ reason: pass ? `Faithfulness ${score.toFixed(2)} is >= ${threshold}` : `Faithfulness ${score.toFixed(2)} is < ${threshold}`,
636
+ tokensUsed
637
+ };
638
+ }
639
+ //#endregion
640
+ //#region src/matchers/similarity.ts
641
+ function calculateSimilarityScore(expectedEmbedding, outputEmbedding, metric, tokensUsed) {
642
+ switch (metric) {
643
+ case "cosine": return cosineSimilarity(expectedEmbedding, outputEmbedding);
644
+ case "dot_product": return dotProduct(expectedEmbedding, outputEmbedding);
645
+ case "euclidean": return euclideanDistance(expectedEmbedding, outputEmbedding);
646
+ default: return fail(`Unsupported metric: ${metric}`, tokensUsed);
647
+ }
648
+ }
649
+ function buildSimilarityResult(similarity, threshold, inverse, metric, tokensUsed) {
650
+ if (metric === "euclidean") {
651
+ const distance = similarity;
652
+ const pass = inverse ? distance >= threshold - Number.EPSILON : distance <= threshold + Number.EPSILON;
653
+ const normalizedScore = 1 / (1 + distance);
654
+ const score = inverse ? 1 - normalizedScore : normalizedScore;
655
+ const belowThresholdReason = `Distance ${distance.toFixed(2)} is less than or equal to threshold ${threshold}`;
656
+ const aboveThresholdReason = `Distance ${distance.toFixed(2)} is greater than threshold ${threshold}`;
657
+ return {
658
+ pass,
659
+ score,
660
+ reason: pass ? inverse ? aboveThresholdReason : belowThresholdReason : inverse ? belowThresholdReason : aboveThresholdReason,
661
+ tokensUsed
662
+ };
663
+ }
664
+ const pass = inverse ? similarity <= threshold + Number.EPSILON : similarity >= threshold - Number.EPSILON;
665
+ const score = inverse ? 1 - similarity : similarity;
666
+ const greaterThanReason = `Similarity ${similarity.toFixed(2)} is greater than or equal to threshold ${threshold}`;
667
+ const lessThanReason = `Similarity ${similarity.toFixed(2)} is less than threshold ${threshold}`;
668
+ return {
669
+ pass,
670
+ score,
671
+ reason: pass ? inverse ? lessThanReason : greaterThanReason : inverse ? greaterThanReason : lessThanReason,
672
+ tokensUsed
673
+ };
674
+ }
675
+ async function calculateProviderSimilarity(finalProvider, expected, output, metric, tokensUsed) {
676
+ if (metric === "cosine" && "callSimilarityApi" in finalProvider) {
677
+ const similarityResp = await finalProvider.callSimilarityApi(expected, output);
678
+ accumulateTokenUsage(tokensUsed, similarityResp.tokenUsage);
679
+ if (similarityResp.error) return fail(similarityResp.error, tokensUsed);
680
+ if (similarityResp.similarity == null) return fail("Unknown error fetching similarity", tokensUsed);
681
+ if (!Number.isFinite(similarityResp.similarity)) return fail(`Invalid similarity score: ${similarityResp.similarity}`, tokensUsed);
682
+ return similarityResp.similarity;
683
+ }
684
+ const callEmbeddingApi = "callEmbeddingApi" in finalProvider ? finalProvider.callEmbeddingApi : void 0;
685
+ if (typeof callEmbeddingApi !== "function") {
686
+ if ("callSimilarityApi" in finalProvider) return fail(`Provider ${finalProvider.id()} only supports cosine similarity via callSimilarityApi`, tokensUsed);
687
+ throw new Error("Provider must implement callSimilarityApi or callEmbeddingApi");
688
+ }
689
+ const [expectedEmbedding, outputEmbedding] = await Promise.all([callEmbeddingApi.call(finalProvider, expected), callEmbeddingApi.call(finalProvider, output)]);
690
+ const mergedUsage = normalizeMatcherTokenUsage(void 0);
691
+ accumulateTokenUsage(mergedUsage, expectedEmbedding.tokenUsage);
692
+ accumulateTokenUsage(mergedUsage, outputEmbedding.tokenUsage);
693
+ accumulateTokenUsage(tokensUsed, mergedUsage);
694
+ if (expectedEmbedding.error || outputEmbedding.error) return fail(expectedEmbedding.error || outputEmbedding.error || "Unknown error fetching embeddings", tokensUsed);
695
+ if (!expectedEmbedding.embedding || !outputEmbedding.embedding) return fail("Embedding not found", tokensUsed);
696
+ return calculateSimilarityScore(expectedEmbedding.embedding, outputEmbedding.embedding, metric, tokensUsed);
697
+ }
698
+ async function matchesSimilarity(expected, output, threshold, inverse = false, grading, metric = "cosine") {
699
+ if (metric === "cosine" && state.config?.redteam && shouldGenerateRemote({ requireEmbeddingProvider: true })) try {
700
+ return await doRemoteGrading({
701
+ task: "similar",
702
+ expected,
703
+ output,
704
+ threshold,
705
+ inverse
706
+ });
707
+ } catch (error) {
708
+ return fail(`Could not perform remote grading: ${error}`);
709
+ }
710
+ const defaults = await getDefaultProviders();
711
+ const finalProvider = await getAndCheckProvider("embedding", grading?.provider, defaults.embeddingProvider, "similarity check");
712
+ const tokensUsed = normalizeMatcherTokenUsage(void 0);
713
+ const similarity = await calculateProviderSimilarity(finalProvider, expected, output, metric, tokensUsed);
714
+ if (typeof similarity !== "number") return similarity;
715
+ return buildSimilarityResult(similarity, threshold, inverse, metric, tokensUsed);
716
+ }
717
+ //#endregion
221
718
  //#region src/tracing/evaluatorTracing.ts
222
719
  let otlpReceiverStarted = false;
223
720
  const DEFAULT_OTLP_ACCEPT_FORMATS = ["json", "protobuf"];
@@ -261,7 +758,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
261
758
  telemetry.record("feature_used", { feature: "tracing" });
262
759
  try {
263
760
  logger.debug("[EvaluatorTracing] Tracing configuration detected, starting OTLP receiver");
264
- const { startOTLPReceiver } = await import("./otlpReceiver-BntK801g.js");
761
+ const { startOTLPReceiver } = await import("./otlpReceiver-UYMQx3sy.js");
265
762
  const port = testSuite.tracing.otlp.http.port || 4318;
266
763
  const host = testSuite.tracing.otlp.http.host || "127.0.0.1";
267
764
  const acceptFormats = normalizeOtlpAcceptFormats(testSuite.tracing.otlp.http.acceptFormats);
@@ -285,7 +782,7 @@ async function startOtlpReceiverIfNeeded(testSuite) {
285
782
  async function stopOtlpReceiverIfNeeded() {
286
783
  if (otlpReceiverStarted) try {
287
784
  logger.debug("[EvaluatorTracing] Stopping OTLP receiver");
288
- const { stopOTLPReceiver } = await import("./otlpReceiver-BntK801g.js");
785
+ const { stopOTLPReceiver } = await import("./otlpReceiver-UYMQx3sy.js");
289
786
  await stopOTLPReceiver();
290
787
  otlpReceiverStarted = false;
291
788
  logger.info("[EvaluatorTracing] OTLP receiver stopped successfully");
@@ -320,7 +817,7 @@ async function generateTraceContextIfNeeded(test, evaluateOptions, testIdx, prom
320
817
  }
321
818
  if (!tracingEnabled) return null;
322
819
  logger.debug("[EvaluatorTracing] Importing trace store");
323
- const { getTraceStore } = await import("./store-DCDBhv7B.js");
820
+ const { getTraceStore } = await import("./store-BELqNwvz.js");
324
821
  const traceStore = getTraceStore();
325
822
  const traceId = generateTraceId();
326
823
  const spanId = generateSpanId();
@@ -630,38 +1127,84 @@ async function handleClassifier({ assertion, renderedValue, outputString, test,
630
1127
  }
631
1128
  //#endregion
632
1129
  //#region src/assertions/contains.ts
1130
+ /**
1131
+ * Advance over separators between parsed fields.
1132
+ *
1133
+ * Contains-any values allow whitespace around comma delimiters, and historical
1134
+ * parsing ignored repeated commas rather than producing empty fields.
1135
+ */
1136
+ function skipWhitespaceAndCommas(value, startIndex) {
1137
+ let i = startIndex;
1138
+ while (i < value.length) {
1139
+ i = skipWhitespace(value, i);
1140
+ if (value[i] !== ",") break;
1141
+ i++;
1142
+ }
1143
+ return i;
1144
+ }
1145
+ /**
1146
+ * Advance over whitespace while preserving comma delimiter handling for callers.
1147
+ */
1148
+ function skipWhitespace(value, startIndex) {
1149
+ let i = startIndex;
1150
+ while (i < value.length && /\s/.test(value[i])) i++;
1151
+ return i;
1152
+ }
1153
+ /**
1154
+ * Parse a quoted field using the assertion parser's CSV-like escape rules.
1155
+ *
1156
+ * Supports backslash-escaped quotes/backslashes and doubled quotes, and rejects
1157
+ * unterminated fields so malformed assertion values do not silently pass.
1158
+ */
1159
+ function parseQuotedField(value, startIndex) {
1160
+ let i = startIndex + 1;
1161
+ let field = "";
1162
+ let terminated = false;
1163
+ while (i < value.length) if (value[i] === "\\" && i + 1 < value.length && ["\"", "\\"].includes(value[i + 1])) {
1164
+ field += value[i + 1];
1165
+ i += 2;
1166
+ } else if (value[i] === "\"" && i + 1 < value.length && value[i + 1] === "\"") {
1167
+ field += "\"";
1168
+ i += 2;
1169
+ } else if (value[i] === "\"") {
1170
+ i++;
1171
+ terminated = true;
1172
+ break;
1173
+ } else {
1174
+ field += value[i];
1175
+ i++;
1176
+ }
1177
+ invariant(terminated, "Unterminated quoted field in contains assertion value");
1178
+ return {
1179
+ field,
1180
+ nextIndex: i
1181
+ };
1182
+ }
1183
+ /**
1184
+ * Parse an unquoted field up to the next comma, trimming surrounding whitespace.
1185
+ */
1186
+ function parseUnquotedField(value, startIndex) {
1187
+ let i = startIndex;
1188
+ while (i < value.length && value[i] !== ",") i++;
1189
+ return {
1190
+ field: value.substring(startIndex, i).trim(),
1191
+ nextIndex: i
1192
+ };
1193
+ }
1194
+ /**
1195
+ * Split a contains-any string into fields while preserving quoted commas.
1196
+ */
633
1197
  function parseCommaSeparatedValues(value) {
634
1198
  const results = [];
635
1199
  let i = 0;
636
1200
  while (i < value.length) {
637
- while (i < value.length && /\s/.test(value[i])) i++;
1201
+ i = skipWhitespaceAndCommas(value, i);
638
1202
  if (i >= value.length) break;
639
- if (value[i] === ",") {
640
- i++;
641
- continue;
642
- }
643
- if (value[i] === "\"") {
644
- i++;
645
- let field = "";
646
- while (i < value.length) if (value[i] === "\\" && i + 1 < value.length && (value[i + 1] === "\"" || value[i + 1] === "\\")) {
647
- field += value[i + 1];
648
- i += 2;
649
- } else if (value[i] === "\"" && i + 1 < value.length && value[i + 1] === "\"") {
650
- field += "\"";
651
- i += 2;
652
- } else if (value[i] === "\"") {
653
- i++;
654
- break;
655
- } else {
656
- field += value[i];
657
- i++;
658
- }
659
- results.push(field);
660
- } else {
661
- const start = i;
662
- while (i < value.length && value[i] !== ",") i++;
663
- results.push(value.substring(start, i).trim());
664
- }
1203
+ const isQuotedField = value[i] === "\"";
1204
+ const parsed = isQuotedField ? parseQuotedField(value, i) : parseUnquotedField(value, i);
1205
+ results.push(parsed.field);
1206
+ i = isQuotedField ? skipWhitespace(value, parsed.nextIndex) : parsed.nextIndex;
1207
+ invariant(!isQuotedField || i >= value.length || value[i] === ",", "Expected comma after quoted field in contains assertion value");
665
1208
  }
666
1209
  return results;
667
1210
  }
@@ -1066,6 +1609,43 @@ const handleGuardrails = async ({ assertion, inverse, providerResponse }) => {
1066
1609
  };
1067
1610
  //#endregion
1068
1611
  //#region src/assertions/html.ts
1612
+ const LITERAL_WRAPPER_PATTERNS = {
1613
+ html: /<html(?=[\s>/])/,
1614
+ head: /<head(?=[\s>/])/,
1615
+ body: /<body(?=[\s>/])/
1616
+ };
1617
+ function isWrapperTagName(tagName) {
1618
+ return tagName === "html" || tagName === "head" || tagName === "body";
1619
+ }
1620
+ function isTextNode(node) {
1621
+ return node.nodeName === "#text";
1622
+ }
1623
+ function isElementNode(node) {
1624
+ return "tagName" in node;
1625
+ }
1626
+ function hasSourceCodeLocation(element) {
1627
+ return "sourceCodeLocation" in element && element.sourceCodeLocation !== null && element.sourceCodeLocation !== void 0;
1628
+ }
1629
+ function getChildNodes(node) {
1630
+ return "childNodes" in node ? node.childNodes : [];
1631
+ }
1632
+ function findFirstElement(root, predicate) {
1633
+ const stack = [root];
1634
+ while (stack.length > 0) {
1635
+ const current = stack.pop();
1636
+ if (isElementNode(current) && predicate(current)) return current;
1637
+ const children = getChildNodes(current);
1638
+ for (let i = children.length - 1; i >= 0; i--) stack.push(children[i]);
1639
+ }
1640
+ }
1641
+ function hasTopLevelText(parentNode) {
1642
+ return parentNode.childNodes.some((node) => isTextNode(node) && Boolean(node.value.trim()));
1643
+ }
1644
+ function isUserProvidedElement(element, inputLowercase) {
1645
+ const tagName = element.tagName.toLowerCase();
1646
+ if (isWrapperTagName(tagName)) return LITERAL_WRAPPER_PATTERNS[tagName].test(inputLowercase) && hasSourceCodeLocation(element);
1647
+ return VALID_HTML_ELEMENTS.has(tagName) || tagName.includes("-");
1648
+ }
1069
1649
  const HTML_PATTERNS = {
1070
1650
  openingTag: /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?>/,
1071
1651
  closingTag: /<\/[a-zA-Z][a-zA-Z0-9-]*\s*>/,
@@ -1221,37 +1801,21 @@ function validateHtml(htmlString) {
1221
1801
  isValid: false,
1222
1802
  reason: "Output appears to be XML, not HTML"
1223
1803
  };
1224
- try {
1225
- const { document } = new JSDOM(trimmed, { contentType: "text/html" }).window;
1226
- if (document.body && !trimmed.toLowerCase().includes("<body")) {
1227
- if (Array.from(document.body.childNodes).some((node) => node.nodeType === 3 && node.textContent?.trim())) return {
1228
- isValid: false,
1229
- reason: "Output must be wrapped in HTML tags"
1230
- };
1231
- }
1232
- const allElements = document.querySelectorAll("*");
1233
- if (!Array.from(allElements).find((element) => {
1234
- const tagName = element.tagName.toLowerCase();
1235
- if ([
1236
- "html",
1237
- "head",
1238
- "body"
1239
- ].includes(tagName) && !trimmed.toLowerCase().includes(`<${tagName}`)) return false;
1240
- return VALID_HTML_ELEMENTS.has(tagName) || tagName.includes("-");
1241
- })) return {
1242
- isValid: false,
1243
- reason: "Output does not contain recognized HTML elements"
1244
- };
1245
- return {
1246
- isValid: true,
1247
- reason: "Output is valid HTML"
1248
- };
1249
- } catch (error) {
1250
- return {
1251
- isValid: false,
1252
- reason: `HTML parsing failed: ${error instanceof Error ? error.message : "Unknown error"}`
1253
- };
1254
- }
1804
+ const document = parse$1(trimmed, { sourceCodeLocationInfo: true });
1805
+ const inputLowercase = trimmed.toLowerCase();
1806
+ const body = findFirstElement(document, (element) => element.tagName === "body");
1807
+ if (!(body !== void 0 && LITERAL_WRAPPER_PATTERNS.body.test(inputLowercase) && hasSourceCodeLocation(body)) && body && hasTopLevelText(body)) return {
1808
+ isValid: false,
1809
+ reason: "Output must be wrapped in HTML tags"
1810
+ };
1811
+ if (!findFirstElement(document, (element) => isUserProvidedElement(element, inputLowercase))) return {
1812
+ isValid: false,
1813
+ reason: "Output does not contain recognized HTML elements"
1814
+ };
1815
+ return {
1816
+ isValid: true,
1817
+ reason: "Output is valid HTML"
1818
+ };
1255
1819
  }
1256
1820
  const handleContainsHtml = ({ assertion, outputString, inverse }) => {
1257
1821
  const pass = containsHtml(outputString) !== inverse;
@@ -2314,11 +2878,10 @@ function handleRougeScore({ baseType, assertion, renderedValue, outputString, in
2314
2878
  const rougeMethod = rouge[baseType[baseType.length - 1]];
2315
2879
  const score = rougeMethod(outputString, renderedValue, {});
2316
2880
  const threshold = assertion.threshold ?? .75;
2317
- const pass = score >= threshold != inverse;
2318
2881
  return {
2319
- pass,
2882
+ pass: score >= threshold !== inverse,
2320
2883
  score: inverse ? 1 - score : score,
2321
- reason: pass ? `${baseType.toUpperCase()} score ${score.toFixed(2)} is greater than or equal to threshold ${threshold}` : `${baseType.toUpperCase()} score ${score.toFixed(2)} is less than threshold ${threshold}`,
2884
+ reason: `${baseType.toUpperCase()} score ${score.toFixed(2)} is ${score >= threshold ? "greater than or equal to" : "less than"} threshold ${threshold}`,
2322
2885
  assertion
2323
2886
  };
2324
2887
  }
@@ -2380,6 +2943,192 @@ const handleRuby = async ({ assertion, renderedValue, valueFromScript, assertion
2380
2943
  }
2381
2944
  };
2382
2945
  //#endregion
2946
+ //#region src/providers/webSearchUtils.ts
2947
+ function hasTool(provider, predicate) {
2948
+ return Array.isArray(provider.config?.tools) && provider.config.tools.some(predicate);
2949
+ }
2950
+ function getProviderId(provider) {
2951
+ if (typeof provider.id !== "function") return null;
2952
+ try {
2953
+ return provider.id();
2954
+ } catch (err) {
2955
+ logger.debug(`Failed to read provider id: ${err}`);
2956
+ return null;
2957
+ }
2958
+ }
2959
+ function isOpenAiResponsesProvider(provider, id) {
2960
+ return id.includes("openai:responses") || provider.constructor?.name === "OpenAiResponsesProvider";
2961
+ }
2962
+ /**
2963
+ * Check if a provider has web search capabilities
2964
+ * @param provider The provider to check
2965
+ * @returns true if the provider supports web search
2966
+ */
2967
+ function hasWebSearchCapability(provider) {
2968
+ if (!provider) return false;
2969
+ const id = getProviderId(provider);
2970
+ if (!id) return false;
2971
+ if (id.includes("perplexity")) return true;
2972
+ if ((id.includes("google") || id.includes("gemini") || id.includes("vertex")) && hasTool(provider, (t) => t.googleSearch !== void 0)) return true;
2973
+ if (id.includes("xai") && provider.config?.search_parameters?.mode === "on") return true;
2974
+ if (isOpenAiResponsesProvider(provider, id) && hasTool(provider, (t) => t.type === "web_search_preview")) return true;
2975
+ if (id.startsWith("openai:codex") && (provider.config?.web_search_mode === "live" || provider.config?.web_search_mode === "cached" || provider.config?.web_search_enabled === true)) return true;
2976
+ if (id.includes("anthropic") && hasTool(provider, (t) => t.type === "web_search_20250305")) return true;
2977
+ return false;
2978
+ }
2979
+ /**
2980
+ * Load a provider with web search capabilities.
2981
+ * Tries multiple providers in order of preference until one succeeds.
2982
+ * Uses the latest and most capable models from each provider with specific checkpoint IDs.
2983
+ *
2984
+ * @param preferAnthropic Whether to try Anthropic first (true) or OpenAI first (false)
2985
+ * @returns A provider with web search capabilities or null
2986
+ */
2987
+ async function loadWebSearchProvider(preferAnthropic = false) {
2988
+ const loadAnthropicWebSearch = async () => {
2989
+ try {
2990
+ return await loadApiProvider("anthropic:messages:claude-opus-4-6", { options: { config: { tools: [{
2991
+ type: "web_search_20250305",
2992
+ name: "web_search",
2993
+ max_uses: 5
2994
+ }] } } });
2995
+ } catch (err) {
2996
+ logger.debug(`Failed to load Anthropic web search provider: ${err}`);
2997
+ return null;
2998
+ }
2999
+ };
3000
+ const loadOpenAIWebSearch = async () => {
3001
+ try {
3002
+ return await loadApiProvider("openai:responses:gpt-5.4-2026-03-05", { options: { config: { tools: [{ type: "web_search_preview" }] } } });
3003
+ } catch (err) {
3004
+ logger.debug(`Failed to load OpenAI web search provider: ${err}`);
3005
+ return null;
3006
+ }
3007
+ };
3008
+ const loadPerplexity = async () => {
3009
+ try {
3010
+ return await loadApiProvider("perplexity:sonar-pro");
3011
+ } catch (err) {
3012
+ logger.debug(`Failed to load Perplexity provider: ${err}`);
3013
+ return null;
3014
+ }
3015
+ };
3016
+ const loadGoogleWebSearch = async () => {
3017
+ try {
3018
+ return await loadApiProvider("google:gemini-3-pro-preview", { options: { config: { tools: [{ googleSearch: {} }] } } });
3019
+ } catch (err) {
3020
+ logger.debug(`Failed to load Google web search provider: ${err}`);
3021
+ return null;
3022
+ }
3023
+ };
3024
+ const loadVertexWebSearch = async () => {
3025
+ try {
3026
+ return await loadApiProvider("vertex:gemini-3-pro-preview", { options: { config: { tools: [{ googleSearch: {} }] } } });
3027
+ } catch (err) {
3028
+ logger.debug(`Failed to load Vertex web search provider: ${err}`);
3029
+ return null;
3030
+ }
3031
+ };
3032
+ const loadXaiWebSearch = async () => {
3033
+ try {
3034
+ return await loadApiProvider("xai:grok-4-1-fast-reasoning", { options: { config: { search_parameters: { mode: "on" } } } });
3035
+ } catch (err) {
3036
+ logger.debug(`Failed to load xAI web search provider: ${err}`);
3037
+ return null;
3038
+ }
3039
+ };
3040
+ const providers = preferAnthropic ? [
3041
+ loadAnthropicWebSearch,
3042
+ loadOpenAIWebSearch,
3043
+ loadPerplexity,
3044
+ loadGoogleWebSearch,
3045
+ loadVertexWebSearch,
3046
+ loadXaiWebSearch
3047
+ ] : [
3048
+ loadOpenAIWebSearch,
3049
+ loadAnthropicWebSearch,
3050
+ loadPerplexity,
3051
+ loadGoogleWebSearch,
3052
+ loadVertexWebSearch,
3053
+ loadXaiWebSearch
3054
+ ];
3055
+ for (const getProvider of providers) {
3056
+ const provider = await getProvider();
3057
+ if (provider && hasWebSearchCapability(provider)) {
3058
+ logger.info(`Using ${getProviderId(provider) ?? "loaded provider"} as web search provider`);
3059
+ return provider;
3060
+ }
3061
+ if (provider) logger.debug(`Loaded provider ${getProviderId(provider) ?? "unknown"} does not support web search`);
3062
+ }
3063
+ return null;
3064
+ }
3065
+ //#endregion
3066
+ //#region src/matchers/search.ts
3067
+ async function matchesSearchRubric(rubric, llmOutput, grading, vars, assertion, _provider, providerCallContext) {
3068
+ if (!grading) throw new Error("Cannot grade output without grading config. Specify --grader option or grading config.");
3069
+ const defaultProviders = await getDefaultProviders();
3070
+ const defaultSearchProviders = [
3071
+ defaultProviders.webSearchProvider,
3072
+ defaultProviders.llmRubricProvider,
3073
+ defaultProviders.gradingProvider
3074
+ ];
3075
+ let searchProvider = (grading.provider ? await getGradingProvider("text", grading.provider, null) : null) || defaultSearchProviders.find((provider) => Boolean(provider));
3076
+ if (!hasWebSearchCapability(searchProvider)) {
3077
+ const webSearchDefault = defaultSearchProviders.find((provider) => hasWebSearchCapability(provider));
3078
+ if (webSearchDefault) searchProvider = webSearchDefault;
3079
+ }
3080
+ if (!hasWebSearchCapability(searchProvider)) {
3081
+ const webSearchProvider = await loadWebSearchProvider(true);
3082
+ if (webSearchProvider) searchProvider = webSearchProvider;
3083
+ }
3084
+ if (!searchProvider || !hasWebSearchCapability(searchProvider)) throw new Error(`search-rubric assertion requires a grading provider with web search capabilities. Use --grader with a web search provider (e.g., anthropic:messages:${DEFAULT_ANTHROPIC_MODEL}, openai:responses:o4-mini with tools configured, perplexity:sonar) or configure one in defaultTest.options.provider`);
3085
+ const prompt = await renderLlmRubricPrompt(await loadRubricPrompt(grading?.rubricPrompt, DEFAULT_WEB_SEARCH_PROMPT), {
3086
+ output: tryParse(llmOutput),
3087
+ rubric,
3088
+ ...vars || {}
3089
+ });
3090
+ const resp = await callProviderWithContext(searchProvider, prompt, "search-rubric", {
3091
+ output: tryParse(llmOutput),
3092
+ rubric,
3093
+ ...vars || {}
3094
+ }, providerCallContext);
3095
+ if (resp.error || !resp.output) return {
3096
+ pass: false,
3097
+ score: 0,
3098
+ reason: `Search rubric evaluation failed: ${resp.error || "No output"}`,
3099
+ tokensUsed: resp.tokenUsage,
3100
+ assertion
3101
+ };
3102
+ try {
3103
+ const result = extractFirstJsonObject(String(resp.output));
3104
+ let pass = result.pass ?? false;
3105
+ const score = typeof result.score === "number" ? result.score : pass ? 1 : 0;
3106
+ if (assertion?.threshold !== void 0) pass = pass && score >= assertion.threshold;
3107
+ return {
3108
+ pass,
3109
+ score,
3110
+ reason: result.reason || "No reason provided",
3111
+ tokensUsed: resp.tokenUsage,
3112
+ assertion,
3113
+ metadata: {
3114
+ searchResults: result.searchResults || [],
3115
+ searchProvider: searchProvider.id()
3116
+ }
3117
+ };
3118
+ } catch (err) {
3119
+ logger.warn(`[search-rubric] Could not parse structured JSON from provider response, falling back to substring matching: ${err.message}`);
3120
+ const outputLower = String(resp.output).toLowerCase();
3121
+ const pass = outputLower.includes("\"pass\":true") || outputLower.includes("\"pass\": true");
3122
+ return {
3123
+ pass,
3124
+ score: pass ? 1 : 0,
3125
+ reason: resp.output,
3126
+ tokensUsed: resp.tokenUsage,
3127
+ assertion
3128
+ };
3129
+ }
3130
+ }
3131
+ //#endregion
2383
3132
  //#region src/assertions/searchRubric.ts
2384
3133
  async function handleSearchRubric({ assertion, baseType: _baseType, inverse, provider, providerCallContext, renderedValue, test, providerResponse }) {
2385
3134
  if (renderedValue == null) throw new Error("search-rubric assertion type must have a string value");
@@ -3465,7 +4214,7 @@ const ASSERTION_HANDLERS = {
3465
4214
  "llm-rubric": handleLlmRubric,
3466
4215
  meteor: async (params) => {
3467
4216
  try {
3468
- const { handleMeteorAssertion } = await import("./meteor-DHdzY1Ss.js");
4217
+ const { handleMeteorAssertion } = await import("./meteor-CeGo0Lu2.js");
3469
4218
  return handleMeteorAssertion(params);
3470
4219
  } catch (error) {
3471
4220
  if (error instanceof Error && (error.message.includes("Cannot find module") || error.message.includes("natural\" package is required"))) return {
@@ -3601,7 +4350,7 @@ async function runAssertion({ prompt, provider, assertion, test, vars, latencyMs
3601
4350
  };
3602
4351
  }
3603
4352
  else if (filePath.endsWith(".rb")) try {
3604
- const { runRuby } = await import("./rubyUtils-D1L2d3jb.js");
4353
+ const { runRuby } = await import("./rubyUtils-4hjGxvju.js");
3605
4354
  valueFromScript = await runRuby(filePath, functionName || "get_assert", [output, context]);
3606
4355
  logger.debug(`Ruby script ${filePath} output: ${valueFromScript}`);
3607
4356
  } catch (error) {
@@ -5382,7 +6131,7 @@ async function resolveDefaultTestProvider(defaultTest, testCase) {
5382
6131
  const defaultProvider = defaultTest.provider;
5383
6132
  if (isApiProvider(defaultProvider)) return defaultProvider;
5384
6133
  if (typeof defaultProvider === "object" && defaultProvider.id) {
5385
- const { loadApiProvider } = await import("./providers-iUt5fbAN.js");
6134
+ const { loadApiProvider } = await import("./providers-Ctcc592x.js");
5386
6135
  return loadApiProvider(typeof defaultProvider.id === "function" ? defaultProvider.id() : defaultProvider.id, { options: defaultProvider });
5387
6136
  }
5388
6137
  return defaultProvider;
@@ -5542,7 +6291,7 @@ function buildRepeatCacheContextByTestIdx(runEvalOptions) {
5542
6291
  async function filterCompletedResumeSteps(runEvalOptions, evalRecord) {
5543
6292
  if (!state.resume || !evalRecord.persisted) return;
5544
6293
  try {
5545
- const { default: EvalResult } = await import("./evalResult-D8MT9p0s.js");
6294
+ const { default: EvalResult } = await import("./evalResult-BBK58h2B.js");
5546
6295
  const completedPairs = await EvalResult.getCompletedIndexPairs(evalRecord.id, { excludeErrors: state.retryMode });
5547
6296
  const originalCount = runEvalOptions.length;
5548
6297
  for (let i = runEvalOptions.length - 1; i >= 0; i--) {
@@ -6001,9 +6750,8 @@ var Evaluator = class {
6001
6750
  context.options.progressCallback?.(context.numComplete, context.runEvalOptionsLength, index, evalStep, metrics || createTimeoutMetrics(timeoutMs));
6002
6751
  }
6003
6752
  async executeEvalSteps({ checkAbort, ciProgressReporter, combinedAbortSignal, concurrentRunEvalOptions, evalStepIndexMap, globalTimeout, groupedRunEvalOptions, isEvalTimedOut, isWebUI, maxEvalTimeMs, processingContext, processedIndices, progressBarManager, prompts, serialRunEvalOptions, shouldGroupGradingByProvider }) {
6004
- let flushGroupedRows;
6005
6753
  try {
6006
- if (shouldGroupGradingByProvider) flushGroupedRows = await this.runGroupedEvalSteps({
6754
+ if (shouldGroupGradingByProvider) await this.runGroupedEvalSteps({
6007
6755
  checkAbort,
6008
6756
  evalStepIndexMap,
6009
6757
  groupedRunEvalOptions,
@@ -6035,7 +6783,6 @@ var Evaluator = class {
6035
6783
  cleanupProgressAfterError(progressBarManager, ciProgressReporter, err);
6036
6784
  throw err;
6037
6785
  }
6038
- await flushGroupedRows?.();
6039
6786
  if (isEvalTimedOut()) logger.warn(`Evaluation stopped after reaching max duration (${maxEvalTimeMs}ms)`);
6040
6787
  else if (!processingContext.targetUnavailable) return this.saveInterruptedEval({
6041
6788
  ciProgressReporter,
@@ -6622,4 +7369,4 @@ function evaluate(testSuite, evalRecord, options) {
6622
7369
  //#endregion
6623
7370
  export { isAllowedPrompt as a, assertions_default as c, generateVarCombinations as i, readAssertions as l, evaluate as n, accumulateNamedMetric as o, formatVarsForDisplay as r, doesPromptRefMatch as s, ProgressBarManager as t, runAssertions as u };
6624
7371
 
6625
- //# sourceMappingURL=evaluator-IvuDYSvQ.js.map
7372
+ //# sourceMappingURL=evaluator-D-UIbbYq.js.map