promptfoo 0.95.0 → 0.96.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (350) hide show
  1. package/README.md +3 -1
  2. package/dist/package.json +19 -17
  3. package/dist/src/app/assets/{index-zXzgAsKj.js → index-CL29fyye.js} +185 -185
  4. package/dist/src/app/assets/{index.es-Jztl1qad.js → index.es-CsYHA4xg.js} +1 -1
  5. package/dist/src/app/assets/{sync-BJBBGzPI.js → sync-B9AjROwZ.js} +1 -1
  6. package/dist/src/app/index.html +1 -1
  7. package/dist/src/assertions/answerRelevance.d.ts +3 -0
  8. package/dist/src/assertions/answerRelevance.d.ts.map +1 -0
  9. package/dist/src/assertions/answerRelevance.js +19 -0
  10. package/dist/src/assertions/answerRelevance.js.map +1 -0
  11. package/dist/src/assertions/bleu.d.ts +23 -0
  12. package/dist/src/assertions/bleu.d.ts.map +1 -0
  13. package/dist/src/assertions/bleu.js +132 -0
  14. package/dist/src/assertions/bleu.js.map +1 -0
  15. package/dist/src/assertions/classifier.d.ts +3 -0
  16. package/dist/src/assertions/classifier.d.ts.map +1 -0
  17. package/dist/src/assertions/classifier.js +22 -0
  18. package/dist/src/assertions/classifier.js.map +1 -0
  19. package/dist/src/assertions/contains.d.ts +8 -0
  20. package/dist/src/assertions/contains.d.ts.map +1 -0
  21. package/dist/src/assertions/contains.js +110 -0
  22. package/dist/src/assertions/contains.js.map +1 -0
  23. package/dist/src/assertions/contextFaithfulness.d.ts +3 -0
  24. package/dist/src/assertions/contextFaithfulness.d.ts.map +1 -0
  25. package/dist/src/assertions/contextFaithfulness.js +19 -0
  26. package/dist/src/assertions/contextFaithfulness.js.map +1 -0
  27. package/dist/src/assertions/contextRecall.d.ts +3 -0
  28. package/dist/src/assertions/contextRecall.d.ts.map +1 -0
  29. package/dist/src/assertions/contextRecall.js +18 -0
  30. package/dist/src/assertions/contextRecall.js.map +1 -0
  31. package/dist/src/assertions/contextRelevance.d.ts +3 -0
  32. package/dist/src/assertions/contextRelevance.d.ts.map +1 -0
  33. package/dist/src/assertions/contextRelevance.js +19 -0
  34. package/dist/src/assertions/contextRelevance.js.map +1 -0
  35. package/dist/src/assertions/cost.d.ts +3 -0
  36. package/dist/src/assertions/cost.d.ts.map +1 -0
  37. package/dist/src/assertions/cost.js +22 -0
  38. package/dist/src/assertions/cost.js.map +1 -0
  39. package/dist/src/assertions/equals.d.ts +4 -0
  40. package/dist/src/assertions/equals.d.ts.map +1 -0
  41. package/dist/src/assertions/equals.js +32 -0
  42. package/dist/src/assertions/equals.js.map +1 -0
  43. package/dist/src/assertions/factuality.d.ts +3 -0
  44. package/dist/src/assertions/factuality.d.ts.map +1 -0
  45. package/dist/src/assertions/factuality.js +25 -0
  46. package/dist/src/assertions/factuality.js.map +1 -0
  47. package/dist/src/assertions/index.d.ts +1 -12
  48. package/dist/src/assertions/index.d.ts.map +1 -1
  49. package/dist/src/assertions/index.js +94 -987
  50. package/dist/src/assertions/index.js.map +1 -1
  51. package/dist/src/assertions/javascript.d.ts +4 -0
  52. package/dist/src/assertions/javascript.d.ts.map +1 -0
  53. package/dist/src/assertions/javascript.js +94 -0
  54. package/dist/src/assertions/javascript.js.map +1 -0
  55. package/dist/src/assertions/json.d.ts +7 -0
  56. package/dist/src/assertions/json.d.ts.map +1 -0
  57. package/dist/src/assertions/json.js +121 -0
  58. package/dist/src/assertions/json.js.map +1 -0
  59. package/dist/src/assertions/latency.d.ts +3 -0
  60. package/dist/src/assertions/latency.d.ts.map +1 -0
  61. package/dist/src/assertions/latency.js +22 -0
  62. package/dist/src/assertions/latency.js.map +1 -0
  63. package/dist/src/assertions/levenshtein.d.ts +3 -0
  64. package/dist/src/assertions/levenshtein.d.ts.map +1 -0
  65. package/dist/src/assertions/levenshtein.js +22 -0
  66. package/dist/src/assertions/levenshtein.js.map +1 -0
  67. package/dist/src/assertions/llmRubric.d.ts +3 -0
  68. package/dist/src/assertions/llmRubric.d.ts.map +1 -0
  69. package/dist/src/assertions/llmRubric.js +22 -0
  70. package/dist/src/assertions/llmRubric.js.map +1 -0
  71. package/dist/src/assertions/modelGradedClosedQa.d.ts +3 -0
  72. package/dist/src/assertions/modelGradedClosedQa.d.ts.map +1 -0
  73. package/dist/src/assertions/modelGradedClosedQa.js +25 -0
  74. package/dist/src/assertions/modelGradedClosedQa.js.map +1 -0
  75. package/dist/src/assertions/moderation.d.ts +3 -0
  76. package/dist/src/assertions/moderation.d.ts.map +1 -0
  77. package/dist/src/assertions/moderation.js +41 -0
  78. package/dist/src/assertions/moderation.js.map +1 -0
  79. package/dist/src/assertions/openai.d.ts +5 -0
  80. package/dist/src/assertions/openai.d.ts.map +1 -0
  81. package/dist/src/assertions/openai.js +68 -0
  82. package/dist/src/assertions/openai.js.map +1 -0
  83. package/dist/src/assertions/perplexity.d.ts +4 -0
  84. package/dist/src/assertions/perplexity.d.ts.map +1 -0
  85. package/dist/src/assertions/perplexity.js +40 -0
  86. package/dist/src/assertions/perplexity.js.map +1 -0
  87. package/dist/src/assertions/python.d.ts +4 -0
  88. package/dist/src/assertions/python.d.ts.map +1 -0
  89. package/dist/src/assertions/python.js +107 -0
  90. package/dist/src/assertions/python.js.map +1 -0
  91. package/dist/src/assertions/redteam.d.ts +3 -0
  92. package/dist/src/assertions/redteam.d.ts.map +1 -0
  93. package/dist/src/assertions/redteam.js +29 -0
  94. package/dist/src/assertions/redteam.js.map +1 -0
  95. package/dist/src/assertions/regex.d.ts +3 -0
  96. package/dist/src/assertions/regex.d.ts.map +1 -0
  97. package/dist/src/assertions/regex.js +34 -0
  98. package/dist/src/assertions/regex.js.map +1 -0
  99. package/dist/src/assertions/rouge.d.ts +3 -0
  100. package/dist/src/assertions/rouge.d.ts.map +1 -0
  101. package/dist/src/assertions/rouge.js +47 -0
  102. package/dist/src/assertions/rouge.js.map +1 -0
  103. package/dist/src/assertions/similar.d.ts +3 -0
  104. package/dist/src/assertions/similar.d.ts.map +1 -0
  105. package/dist/src/assertions/similar.js +40 -0
  106. package/dist/src/assertions/similar.js.map +1 -0
  107. package/dist/src/assertions/sql.d.ts +4 -0
  108. package/dist/src/assertions/sql.d.ts.map +1 -0
  109. package/dist/src/assertions/sql.js +98 -0
  110. package/dist/src/assertions/sql.js.map +1 -0
  111. package/dist/src/assertions/startsWith.d.ts +3 -0
  112. package/dist/src/assertions/startsWith.d.ts.map +1 -0
  113. package/dist/src/assertions/startsWith.js +22 -0
  114. package/dist/src/assertions/startsWith.js.map +1 -0
  115. package/dist/src/assertions/utils.d.ts +1 -0
  116. package/dist/src/assertions/utils.d.ts.map +1 -1
  117. package/dist/src/assertions/utils.js +7 -0
  118. package/dist/src/assertions/utils.js.map +1 -1
  119. package/dist/src/assertions/webhook.d.ts +3 -0
  120. package/dist/src/assertions/webhook.d.ts.map +1 -0
  121. package/dist/src/assertions/webhook.js +55 -0
  122. package/dist/src/assertions/webhook.js.map +1 -0
  123. package/dist/src/assertions/xml.d.ts +11 -0
  124. package/dist/src/assertions/xml.d.ts.map +1 -0
  125. package/dist/src/assertions/xml.js +86 -0
  126. package/dist/src/assertions/xml.js.map +1 -0
  127. package/dist/src/commands/eval/filterTests.d.ts +1 -0
  128. package/dist/src/commands/eval/filterTests.d.ts.map +1 -1
  129. package/dist/src/commands/eval/filterTests.js +14 -1
  130. package/dist/src/commands/eval/filterTests.js.map +1 -1
  131. package/dist/src/commands/eval.d.ts.map +1 -1
  132. package/dist/src/commands/eval.js +14 -3
  133. package/dist/src/commands/eval.js.map +1 -1
  134. package/dist/src/commands/import.d.ts.map +1 -1
  135. package/dist/src/commands/import.js +1 -0
  136. package/dist/src/commands/import.js.map +1 -1
  137. package/dist/src/commands/list.d.ts.map +1 -1
  138. package/dist/src/commands/list.js +15 -0
  139. package/dist/src/commands/list.js.map +1 -1
  140. package/dist/src/csv.d.ts.map +1 -1
  141. package/dist/src/csv.js +9 -4
  142. package/dist/src/csv.js.map +1 -1
  143. package/dist/src/database/tables.d.ts +189 -1
  144. package/dist/src/database/tables.d.ts.map +1 -1
  145. package/dist/src/envars.d.ts +9 -0
  146. package/dist/src/envars.d.ts.map +1 -1
  147. package/dist/src/envars.js.map +1 -1
  148. package/dist/src/evaluator.js +2 -2
  149. package/dist/src/evaluator.js.map +1 -1
  150. package/dist/src/evaluatorHelpers.d.ts.map +1 -1
  151. package/dist/src/evaluatorHelpers.js +4 -0
  152. package/dist/src/evaluatorHelpers.js.map +1 -1
  153. package/dist/src/fetch.d.ts.map +1 -1
  154. package/dist/src/fetch.js +8 -1
  155. package/dist/src/fetch.js.map +1 -1
  156. package/dist/src/globalConfig/accounts.d.ts +1 -0
  157. package/dist/src/globalConfig/accounts.d.ts.map +1 -1
  158. package/dist/src/globalConfig/accounts.js +24 -0
  159. package/dist/src/globalConfig/accounts.js.map +1 -1
  160. package/dist/src/index.d.ts +74 -68
  161. package/dist/src/index.d.ts.map +1 -1
  162. package/dist/src/onboarding.js +8 -8
  163. package/dist/src/onboarding.js.map +1 -1
  164. package/dist/src/providers/adaline.gateway.d.ts.map +1 -1
  165. package/dist/src/providers/adaline.gateway.js +4 -4
  166. package/dist/src/providers/adaline.gateway.js.map +1 -1
  167. package/dist/src/providers/anthropic.d.ts.map +1 -1
  168. package/dist/src/providers/anthropic.js +31 -0
  169. package/dist/src/providers/anthropic.js.map +1 -1
  170. package/dist/src/providers/{azureopenai.d.ts → azure.d.ts} +13 -13
  171. package/dist/src/providers/azure.d.ts.map +1 -0
  172. package/dist/src/providers/{azureopenai.js → azure.js} +64 -42
  173. package/dist/src/providers/azure.js.map +1 -0
  174. package/dist/src/providers/{azureopenaiUtil.d.ts → azureUtil.d.ts} +1 -1
  175. package/dist/src/providers/azureUtil.d.ts.map +1 -0
  176. package/dist/src/providers/{azureopenaiUtil.js → azureUtil.js} +3 -3
  177. package/dist/src/providers/azureUtil.js.map +1 -0
  178. package/dist/src/providers/bedrock.d.ts +5 -3
  179. package/dist/src/providers/bedrock.d.ts.map +1 -1
  180. package/dist/src/providers/bedrock.js +38 -9
  181. package/dist/src/providers/bedrock.js.map +1 -1
  182. package/dist/src/providers/defaults.d.ts.map +1 -1
  183. package/dist/src/providers/defaults.js +36 -0
  184. package/dist/src/providers/defaults.js.map +1 -1
  185. package/dist/src/providers/portkey.d.ts +3 -2
  186. package/dist/src/providers/portkey.d.ts.map +1 -1
  187. package/dist/src/providers/portkey.js +17 -10
  188. package/dist/src/providers/portkey.js.map +1 -1
  189. package/dist/src/providers/promptfoo.js +2 -2
  190. package/dist/src/providers/promptfoo.js.map +1 -1
  191. package/dist/src/providers.d.ts +9 -5
  192. package/dist/src/providers.d.ts.map +1 -1
  193. package/dist/src/providers.js +16 -11
  194. package/dist/src/providers.js.map +1 -1
  195. package/dist/src/redteam/commands/generate.d.ts.map +1 -1
  196. package/dist/src/redteam/commands/generate.js +5 -1
  197. package/dist/src/redteam/commands/generate.js.map +1 -1
  198. package/dist/src/redteam/commands/init.d.ts +11 -0
  199. package/dist/src/redteam/commands/init.d.ts.map +1 -1
  200. package/dist/src/redteam/commands/init.js +47 -12
  201. package/dist/src/redteam/commands/init.js.map +1 -1
  202. package/dist/src/redteam/commands/poison.d.ts +3 -0
  203. package/dist/src/redteam/commands/poison.d.ts.map +1 -0
  204. package/dist/src/redteam/commands/poison.js +165 -0
  205. package/dist/src/redteam/commands/poison.js.map +1 -0
  206. package/dist/src/redteam/commands/run.d.ts.map +1 -1
  207. package/dist/src/redteam/commands/run.js +2 -0
  208. package/dist/src/redteam/commands/run.js.map +1 -1
  209. package/dist/src/redteam/constants.d.ts +6 -5
  210. package/dist/src/redteam/constants.d.ts.map +1 -1
  211. package/dist/src/redteam/constants.js +54 -37
  212. package/dist/src/redteam/constants.js.map +1 -1
  213. package/dist/src/redteam/extraction/util.js +1 -1
  214. package/dist/src/redteam/extraction/util.js.map +1 -1
  215. package/dist/src/redteam/graders.d.ts +34 -32
  216. package/dist/src/redteam/graders.d.ts.map +1 -1
  217. package/dist/src/redteam/graders.js +34 -33
  218. package/dist/src/redteam/graders.js.map +1 -1
  219. package/dist/src/redteam/index.d.ts.map +1 -1
  220. package/dist/src/redteam/index.js +9 -2
  221. package/dist/src/redteam/index.js.map +1 -1
  222. package/dist/src/redteam/plugins/bfla.d.ts.map +1 -1
  223. package/dist/src/redteam/plugins/bfla.js +5 -3
  224. package/dist/src/redteam/plugins/bfla.js.map +1 -1
  225. package/dist/src/redteam/plugins/bola.js +3 -3
  226. package/dist/src/redteam/plugins/index.d.ts.map +1 -1
  227. package/dist/src/redteam/plugins/index.js +3 -2
  228. package/dist/src/redteam/plugins/index.js.map +1 -1
  229. package/dist/src/redteam/plugins/intent.d.ts +22 -0
  230. package/dist/src/redteam/plugins/intent.d.ts.map +1 -0
  231. package/dist/src/redteam/plugins/intent.js +100 -0
  232. package/dist/src/redteam/plugins/intent.js.map +1 -0
  233. package/dist/src/redteam/plugins/pii.d.ts.map +1 -1
  234. package/dist/src/redteam/plugins/pii.js +21 -14
  235. package/dist/src/redteam/plugins/pii.js.map +1 -1
  236. package/dist/src/redteam/plugins/rbac.d.ts.map +1 -1
  237. package/dist/src/redteam/plugins/rbac.js +5 -4
  238. package/dist/src/redteam/plugins/rbac.js.map +1 -1
  239. package/dist/src/redteam/plugins/sqlInjection.d.ts.map +1 -1
  240. package/dist/src/redteam/plugins/sqlInjection.js +13 -4
  241. package/dist/src/redteam/plugins/sqlInjection.js.map +1 -1
  242. package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
  243. package/dist/src/redteam/providers/crescendo/index.js +6 -4
  244. package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
  245. package/dist/src/redteam/providers/goat.d.ts +2 -0
  246. package/dist/src/redteam/providers/goat.d.ts.map +1 -1
  247. package/dist/src/redteam/providers/goat.js +10 -3
  248. package/dist/src/redteam/providers/goat.js.map +1 -1
  249. package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
  250. package/dist/src/redteam/providers/iterative.js +4 -2
  251. package/dist/src/redteam/providers/iterative.js.map +1 -1
  252. package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
  253. package/dist/src/redteam/providers/iterativeImage.js +4 -2
  254. package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
  255. package/dist/src/redteam/providers/iterativeTree.d.ts +3 -1
  256. package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
  257. package/dist/src/redteam/providers/iterativeTree.js +5 -3
  258. package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
  259. package/dist/src/redteam/providers/shared.d.ts +2 -2
  260. package/dist/src/redteam/providers/shared.d.ts.map +1 -1
  261. package/dist/src/redteam/providers/shared.js +2 -2
  262. package/dist/src/redteam/providers/shared.js.map +1 -1
  263. package/dist/src/redteam/strategies/mathPrompt.js +1 -1
  264. package/dist/src/redteam/strategies/mathPrompt.js.map +1 -1
  265. package/dist/src/redteam/strategies/multilingual.js +1 -1
  266. package/dist/src/redteam/strategies/multilingual.js.map +1 -1
  267. package/dist/src/redteam/types.d.ts +1 -0
  268. package/dist/src/redteam/types.d.ts.map +1 -1
  269. package/dist/src/remoteGrading.js +1 -1
  270. package/dist/src/remoteGrading.js.map +1 -1
  271. package/dist/src/telemetry.d.ts +2 -2
  272. package/dist/src/types/index.d.ts +4737 -12
  273. package/dist/src/types/index.d.ts.map +1 -1
  274. package/dist/src/types/index.js +23 -26
  275. package/dist/src/types/index.js.map +1 -1
  276. package/dist/src/types/providers.d.ts +12 -0
  277. package/dist/src/types/providers.d.ts.map +1 -1
  278. package/dist/src/types/providers.js.map +1 -1
  279. package/dist/src/util/config/load.d.ts +1 -1
  280. package/dist/src/util/config/load.d.ts.map +1 -1
  281. package/dist/src/util/config/load.js +15 -2
  282. package/dist/src/util/config/load.js.map +1 -1
  283. package/dist/src/util/index.d.ts +34 -0
  284. package/dist/src/util/index.d.ts.map +1 -1
  285. package/dist/src/validators/providers.d.ts +391 -0
  286. package/dist/src/validators/providers.d.ts.map +1 -1
  287. package/dist/src/validators/providers.js +18 -0
  288. package/dist/src/validators/providers.js.map +1 -1
  289. package/dist/src/validators/redteam.d.ts +136 -0
  290. package/dist/src/validators/redteam.d.ts.map +1 -1
  291. package/dist/test/assertions/bleu.test.d.ts +2 -0
  292. package/dist/test/assertions/bleu.test.d.ts.map +1 -0
  293. package/dist/test/assertions/bleu.test.js +162 -0
  294. package/dist/test/assertions/bleu.test.js.map +1 -0
  295. package/dist/test/assertions/index.test.js +276 -297
  296. package/dist/test/assertions/index.test.js.map +1 -1
  297. package/dist/test/assertions/json.test.d.ts +2 -0
  298. package/dist/test/assertions/json.test.d.ts.map +1 -0
  299. package/dist/test/assertions/json.test.js +36 -0
  300. package/dist/test/assertions/json.test.js.map +1 -0
  301. package/dist/test/assertions/sql.test.d.ts +2 -0
  302. package/dist/test/assertions/sql.test.d.ts.map +1 -0
  303. package/dist/test/assertions/sql.test.js +280 -0
  304. package/dist/test/assertions/sql.test.js.map +1 -0
  305. package/dist/test/commands/eval/filterTests.test.js +30 -0
  306. package/dist/test/commands/eval/filterTests.test.js.map +1 -1
  307. package/dist/test/factories/evalFactory.d.ts +155 -1
  308. package/dist/test/factories/evalFactory.d.ts.map +1 -1
  309. package/dist/test/fetch.test.js +17 -0
  310. package/dist/test/fetch.test.js.map +1 -1
  311. package/dist/test/onboarding.test.js +126 -1
  312. package/dist/test/onboarding.test.js.map +1 -1
  313. package/dist/test/providers/anthropic.test.js +120 -0
  314. package/dist/test/providers/anthropic.test.js.map +1 -1
  315. package/dist/test/providers/azure.test.js +22 -25
  316. package/dist/test/providers/azure.test.js.map +1 -1
  317. package/dist/test/providers/bedrock.test.js +178 -55
  318. package/dist/test/providers/bedrock.test.js.map +1 -1
  319. package/dist/test/providers/index.test.js +7 -7
  320. package/dist/test/providers/index.test.js.map +1 -1
  321. package/dist/test/providers/portkey.test.d.ts +2 -0
  322. package/dist/test/providers/portkey.test.d.ts.map +1 -0
  323. package/dist/test/providers/portkey.test.js +46 -0
  324. package/dist/test/providers/portkey.test.js.map +1 -0
  325. package/dist/test/redteam/commands/init.test.d.ts +2 -0
  326. package/dist/test/redteam/commands/init.test.d.ts.map +1 -0
  327. package/dist/test/redteam/commands/init.test.js +109 -0
  328. package/dist/test/redteam/commands/init.test.js.map +1 -0
  329. package/dist/test/redteam/plugins/pluginDocumentation.test.js +4 -1
  330. package/dist/test/redteam/plugins/pluginDocumentation.test.js.map +1 -1
  331. package/dist/test/redteam/providers/goat.test.js +1 -1
  332. package/dist/test/redteam/providers/goat.test.js.map +1 -1
  333. package/dist/test/redteam/providers/iterativeTree.test.js +8 -3
  334. package/dist/test/redteam/providers/iterativeTree.test.js.map +1 -1
  335. package/dist/test/util/config/load.test.js +44 -1
  336. package/dist/test/util/config/load.test.js.map +1 -1
  337. package/dist/tsconfig.tsbuildinfo +1 -1
  338. package/package.json +19 -17
  339. package/dist/src/providers/azureopenai.d.ts.map +0 -1
  340. package/dist/src/providers/azureopenai.js.map +0 -1
  341. package/dist/src/providers/azureopenaiUtil.d.ts.map +0 -1
  342. package/dist/src/providers/azureopenaiUtil.js.map +0 -1
  343. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240617.json +0 -10
  344. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240618.json +0 -10
  345. package/dist/src/redteam/eval/harmful/llm_rubric-20240723.json +0 -10
  346. package/dist/src/redteam/eval/harmful/llm_rubric-20240724.json +0 -10
  347. package/dist/test/is-sql-tests/node-sql-parser.test.d.ts +0 -2
  348. package/dist/test/is-sql-tests/node-sql-parser.test.d.ts.map +0 -1
  349. package/dist/test/is-sql-tests/node-sql-parser.test.js +0 -179
  350. package/dist/test/is-sql-tests/node-sql-parser.test.js.map +0 -1
@@ -1,231 +1,79 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || function (mod) {
19
- if (mod && mod.__esModule) return mod;
20
- var result = {};
21
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
- __setModuleDefault(result, mod);
23
- return result;
24
- };
25
2
  var __importDefault = (this && this.__importDefault) || function (mod) {
26
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
4
  };
28
5
  Object.defineProperty(exports, "__esModule", { value: true });
29
6
  exports.MODEL_GRADED_ASSERTION_TYPES = void 0;
30
- exports.createAjv = createAjv;
31
- exports.validateXml = validateXml;
32
- exports.containsXml = containsXml;
33
- exports.isSql = isSql;
34
7
  exports.runAssertion = runAssertion;
35
8
  exports.runAssertions = runAssertions;
36
9
  exports.runCompareAssertion = runCompareAssertion;
37
10
  exports.readAssertions = readAssertions;
38
- const ajv_1 = __importDefault(require("ajv"));
39
- const ajv_formats_1 = __importDefault(require("ajv-formats"));
40
11
  const async_1 = __importDefault(require("async"));
41
- const fast_xml_parser_1 = require("fast-xml-parser");
42
- const fastest_levenshtein_1 = require("fastest-levenshtein");
43
12
  const fs_1 = __importDefault(require("fs"));
44
- const rouge = __importStar(require("js-rouge"));
45
13
  const js_yaml_1 = __importDefault(require("js-yaml"));
46
- const node_util_1 = __importDefault(require("node:util"));
47
14
  const path_1 = __importDefault(require("path"));
48
15
  const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
49
16
  const cliState_1 = __importDefault(require("../cliState"));
50
17
  const envars_1 = require("../envars");
51
18
  const esm_1 = require("../esm");
52
- const fetch_1 = require("../fetch");
53
19
  const logger_1 = __importDefault(require("../logger"));
54
20
  const matchers_1 = require("../matchers");
55
- const openaiUtil_1 = require("../providers/openaiUtil");
56
21
  const packageParser_1 = require("../providers/packageParser");
57
- const shared_1 = require("../providers/shared");
58
22
  const pythonUtils_1 = require("../python/pythonUtils");
59
- const wrapper_1 = require("../python/wrapper");
60
- const graders_1 = require("../redteam/graders");
61
23
  const telemetry_1 = __importDefault(require("../telemetry"));
62
- const types_1 = require("../types");
63
24
  const file_1 = require("../util/file");
64
- const json_1 = require("../util/json");
65
25
  const templates_1 = require("../util/templates");
66
26
  const transform_1 = require("../util/transform");
67
27
  const AssertionsResult_1 = require("./AssertionsResult");
28
+ const answerRelevance_1 = require("./answerRelevance");
29
+ const bleu_1 = require("./bleu");
30
+ const classifier_1 = require("./classifier");
31
+ const contains_1 = require("./contains");
32
+ const contextFaithfulness_1 = require("./contextFaithfulness");
33
+ const contextRecall_1 = require("./contextRecall");
34
+ const contextRelevance_1 = require("./contextRelevance");
35
+ const cost_1 = require("./cost");
36
+ const equals_1 = require("./equals");
37
+ const factuality_1 = require("./factuality");
38
+ const javascript_1 = require("./javascript");
39
+ const json_1 = require("./json");
40
+ const latency_1 = require("./latency");
41
+ const levenshtein_1 = require("./levenshtein");
42
+ const llmRubric_1 = require("./llmRubric");
43
+ const modelGradedClosedQa_1 = require("./modelGradedClosedQa");
44
+ const moderation_1 = require("./moderation");
45
+ const openai_1 = require("./openai");
46
+ const perplexity_1 = require("./perplexity");
47
+ const python_1 = require("./python");
48
+ const redteam_1 = require("./redteam");
49
+ const regex_1 = require("./regex");
50
+ const rouge_1 = require("./rouge");
51
+ const similar_1 = require("./similar");
52
+ const sql_1 = require("./sql");
53
+ const startsWith_1 = require("./startsWith");
68
54
  const utils_1 = require("./utils");
55
+ const webhook_1 = require("./webhook");
56
+ const xml_1 = require("./xml");
69
57
  const ASSERTIONS_MAX_CONCURRENCY = (0, envars_1.getEnvInt)('PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY', 3);
70
58
  exports.MODEL_GRADED_ASSERTION_TYPES = new Set([
71
59
  'answer-relevance',
72
60
  'context-faithfulness',
73
61
  'context-recall',
74
62
  'context-relevance',
63
+ 'factuality',
75
64
  'llm-rubric',
76
65
  'model-graded-closedqa',
77
- 'factuality',
78
66
  'model-graded-factuality',
79
67
  ]);
80
- function createAjv() {
81
- const ajvOptions = {
82
- strictSchema: !(0, envars_1.getEnvBool)('PROMPTFOO_DISABLE_AJV_STRICT_MODE'),
83
- };
84
- const ajv = new ajv_1.default(ajvOptions);
85
- (0, ajv_formats_1.default)(ajv);
86
- return ajv;
87
- }
88
- const ajv = createAjv();
89
68
  const nunjucks = (0, templates_1.getNunjucksEngine)();
90
- function coerceString(value) {
91
- if (typeof value === 'string') {
92
- return value;
93
- }
94
- return JSON.stringify(value);
95
- }
96
- function handleRougeScore(baseType, assertion, expected, output, inverted) {
97
- const fnName = baseType[baseType.length - 1];
98
- const rougeMethod = rouge[fnName];
99
- const score = rougeMethod(output, expected, {});
100
- const pass = score >= (assertion.threshold || 0.75) != inverted;
101
- return {
102
- pass,
103
- score: inverted ? 1 - score : score,
104
- reason: pass
105
- ? `${baseType.toUpperCase()} score ${score.toFixed(2)} is greater than or equal to threshold ${assertion.threshold || 0.75}`
106
- : `${baseType.toUpperCase()} score ${score.toFixed(2)} is less than threshold ${assertion.threshold || 0.75}`,
107
- assertion,
108
- };
109
- }
110
- function validateXml(xmlString, requiredElements) {
111
- if (!xmlString.startsWith('<')) {
112
- return { isValid: false, reason: 'XML is missing opening tag' };
113
- }
114
- const parser = new fast_xml_parser_1.XMLParser({
115
- allowBooleanAttributes: true,
116
- ignoreAttributes: false,
117
- parseAttributeValue: true,
118
- parseTagValue: true,
119
- });
120
- try {
121
- const parsedXml = parser.parse(xmlString);
122
- if (requiredElements && requiredElements.length > 0) {
123
- const missingElements = requiredElements.filter((element) => {
124
- const path = element.split('.');
125
- let current = parsedXml;
126
- for (const key of path) {
127
- if (current[key] === undefined) {
128
- return true;
129
- }
130
- current = current[key];
131
- }
132
- return false;
133
- });
134
- if (missingElements.length > 0) {
135
- return {
136
- isValid: false,
137
- reason: `XML is missing required elements: ${missingElements.join(', ')}`,
138
- };
139
- }
140
- }
141
- return { isValid: true, reason: 'XML is valid and contains all required elements' };
142
- }
143
- catch (err) {
144
- return { isValid: false, reason: `XML parsing failed: ${err.message}` };
145
- }
146
- }
147
- function containsXml(outputString, requiredElements) {
148
- const xmlRegex = /<\?xml.*?>[\s\S]*<\/[^>]+>|\S*<[^>]+>[\s\S]*<\/[^>]+>/;
149
- const xmlMatches = outputString.match(xmlRegex);
150
- if (!xmlMatches) {
151
- return { isValid: false, reason: 'No XML content found in the output' };
152
- }
153
- for (const xmlMatch of xmlMatches) {
154
- const { isValid, reason } = validateXml(xmlMatch, requiredElements);
155
- if (isValid) {
156
- return { isValid: true, reason };
157
- }
158
- }
159
- return { isValid: false, reason: 'No valid XML content found matching the requirements' };
160
- }
161
- async function isSql(outputString, renderedValue, inverse, assertion) {
162
- let pass = false;
163
- let databaseType = 'MySQL';
164
- let whiteTableList;
165
- let whiteColumnList;
166
- if (renderedValue && typeof renderedValue === 'object') {
167
- const value = renderedValue;
168
- databaseType = value.databaseType || 'MySQL';
169
- whiteTableList = value.allowedTables;
170
- whiteColumnList = value.allowedColumns;
171
- }
172
- if (renderedValue && typeof renderedValue !== 'object') {
173
- throw new Error('is-sql assertion must have a object value.');
174
- }
175
- const { Parser: SqlParser } = await Promise.resolve().then(() => __importStar(require('node-sql-parser'))).catch(() => {
176
- throw new Error('node-sql-parser is not installed. Please install it first');
177
- });
178
- const sqlParser = new SqlParser();
179
- const opt = { database: databaseType };
180
- const failureReasons = [];
181
- try {
182
- sqlParser.astify(outputString, opt);
183
- pass = !inverse;
184
- }
185
- catch {
186
- pass = inverse;
187
- failureReasons.push(`SQL statement does not conform to the provided ${databaseType} database syntax.`);
188
- }
189
- if (whiteTableList) {
190
- opt.type = 'table';
191
- try {
192
- sqlParser.whiteListCheck(outputString, whiteTableList, opt);
193
- }
194
- catch (err) {
195
- pass = inverse;
196
- const error = err;
197
- failureReasons.push(`SQL validation failed: ${error.message}.`);
198
- }
199
- }
200
- if (whiteColumnList) {
201
- opt.type = 'column';
202
- try {
203
- sqlParser.whiteListCheck(outputString, whiteColumnList, opt);
204
- }
205
- catch (err) {
206
- pass = inverse;
207
- const error = err;
208
- failureReasons.push(`SQL validation failed: ${error.message}.`);
209
- }
210
- }
211
- if (inverse && pass === false && failureReasons.length === 0) {
212
- failureReasons.push('The output SQL statement is valid');
213
- }
214
- return {
215
- pass,
216
- score: pass ? 1 : 0,
217
- reason: pass ? 'Assertion passed' : failureReasons.join(' '),
218
- assertion,
219
- };
220
- }
221
69
  async function runAssertion({ prompt, provider, assertion, test, latencyMs, providerResponse, }) {
222
70
  const { cost, logProbs, output: originalOutput } = providerResponse;
223
71
  let output = originalOutput;
224
- let pass = false;
225
- let score = 0.0;
226
72
  (0, tiny_invariant_1.default)(assertion.type, `Assertion must have a type: ${JSON.stringify(assertion)}`);
227
73
  const inverse = assertion.type.startsWith('not-');
228
- const baseType = inverse ? assertion.type.slice(4) : assertion.type;
74
+ const baseType = inverse
75
+ ? assertion.type.slice(4)
76
+ : assertion.type;
229
77
  telemetry_1.default.record('assertion_used', {
230
78
  type: baseType,
231
79
  });
@@ -235,7 +83,6 @@ async function runAssertion({ prompt, provider, assertion, test, latencyMs, prov
235
83
  prompt: { label: prompt },
236
84
  });
237
85
  }
238
- const outputString = coerceString(output);
239
86
  const context = {
240
87
  prompt,
241
88
  vars: test.vars || {},
@@ -309,810 +156,70 @@ async function runAssertion({ prompt, provider, assertion, test, latencyMs, prov
309
156
  return v;
310
157
  });
311
158
  }
312
- // Transform test
313
- test = (0, utils_1.getFinalTest)(test, assertion);
314
- if (baseType === 'equals') {
315
- if (typeof renderedValue === 'object') {
316
- pass = node_util_1.default.isDeepStrictEqual(renderedValue, JSON.parse(outputString)) !== inverse;
317
- renderedValue = JSON.stringify(renderedValue);
318
- }
319
- else {
320
- pass = (renderedValue == outputString) !== inverse;
321
- }
322
- return {
323
- pass,
324
- score: pass ? 1 : 0,
325
- reason: pass
326
- ? 'Assertion passed'
327
- : `Expected output "${outputString}" to ${inverse ? 'not ' : ''}equal "${renderedValue}"`,
328
- assertion,
329
- };
330
- }
331
- if (baseType === 'is-json') {
332
- let parsedJson;
333
- try {
334
- parsedJson = JSON.parse(outputString);
335
- pass = !inverse;
336
- }
337
- catch {
338
- pass = inverse;
339
- }
340
- if (pass && renderedValue) {
341
- let validate;
342
- if (typeof renderedValue === 'string') {
343
- if (renderedValue.startsWith('file://')) {
344
- // Reference the JSON schema from external file
345
- const schema = valueFromScript;
346
- (0, tiny_invariant_1.default)(schema, 'is-json references a file that does not export a JSON schema');
347
- validate = ajv.compile(schema);
348
- }
349
- else {
350
- const scheme = js_yaml_1.default.load(renderedValue);
351
- validate = ajv.compile(scheme);
352
- }
353
- }
354
- else if (typeof renderedValue === 'object') {
355
- // Value is JSON schema
356
- validate = ajv.compile(renderedValue);
357
- }
358
- else {
359
- throw new Error('is-json assertion must have a string or object value');
360
- }
361
- pass = validate(parsedJson);
362
- if (!pass) {
363
- return {
364
- pass,
365
- score: 0,
366
- reason: `JSON does not conform to the provided schema. Errors: ${ajv.errorsText(validate.errors)}`,
367
- assertion,
368
- };
369
- }
370
- }
371
- return {
372
- pass,
373
- score: pass ? 1 : 0,
374
- reason: pass ? 'Assertion passed' : 'Expected output to be valid JSON',
375
- assertion,
376
- };
377
- }
378
- if (baseType === 'is-xml' || baseType === 'contains-xml') {
379
- let requiredElements;
380
- if (typeof renderedValue === 'string') {
381
- requiredElements = renderedValue.split(',').map((el) => el.trim());
382
- }
383
- else if (Array.isArray(renderedValue) && renderedValue.length > 0) {
384
- requiredElements = renderedValue.map((el) => el.toString());
385
- }
386
- else if (typeof renderedValue === 'object' && Object.keys(renderedValue).length > 0) {
387
- if ('requiredElements' in renderedValue && Array.isArray(renderedValue.requiredElements)) {
388
- requiredElements = renderedValue.requiredElements.map((el) => el.toString());
389
- }
390
- else {
391
- throw new Error('xml assertion must contain a string, array value, or no value');
392
- }
393
- }
394
- const result = (baseType === 'is-xml' ? validateXml : containsXml)(outputString, requiredElements);
395
- pass = result.isValid !== inverse;
396
- return {
397
- pass,
398
- score: pass ? 1 : 0,
399
- reason: pass ? 'Assertion passed' : result.reason,
400
- assertion,
401
- };
402
- }
403
- if (baseType === 'is-sql') {
404
- return isSql(outputString, renderedValue, inverse, assertion);
405
- }
406
- if (baseType === 'contains-sql') {
407
- const match = outputString.match(/```(?:sql)?([^`]+)```/);
408
- if (match) {
409
- const sqlCode = match[1].trim();
410
- return isSql(sqlCode, renderedValue, inverse, assertion);
411
- }
412
- else {
413
- return isSql(outputString, renderedValue, inverse, assertion);
414
- }
415
- }
416
- if (baseType === 'contains') {
417
- (0, tiny_invariant_1.default)(renderedValue, '"contains" assertion type must have a string or number value');
418
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string' || typeof renderedValue === 'number', '"contains" assertion type must have a string or number value');
419
- pass = outputString.includes(String(renderedValue)) !== inverse;
420
- return {
421
- pass,
422
- score: pass ? 1 : 0,
423
- reason: pass
424
- ? 'Assertion passed'
425
- : `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
426
- assertion,
427
- };
428
- }
429
- if (baseType === 'contains-any') {
430
- (0, tiny_invariant_1.default)(renderedValue, '"contains-any" assertion type must have a value');
431
- if (typeof renderedValue === 'string') {
432
- renderedValue = renderedValue.split(',').map((v) => v.trim());
433
- }
434
- (0, tiny_invariant_1.default)(Array.isArray(renderedValue), '"contains-any" assertion type must have an array value');
435
- pass = renderedValue.some((value) => outputString.includes(String(value))) !== inverse;
436
- return {
437
- pass,
438
- score: pass ? 1 : 0,
439
- reason: pass
440
- ? 'Assertion passed'
441
- : `Expected output to ${inverse ? 'not ' : ''}contain one of "${renderedValue.join(', ')}"`,
442
- assertion,
443
- };
444
- }
445
- if (baseType === 'icontains-any') {
446
- (0, tiny_invariant_1.default)(renderedValue, '"icontains-any" assertion type must have a value');
447
- if (typeof renderedValue === 'string') {
448
- renderedValue = renderedValue.split(',').map((v) => v.trim());
449
- }
450
- (0, tiny_invariant_1.default)(Array.isArray(renderedValue), '"icontains-any" assertion type must have an array value');
451
- pass =
452
- renderedValue.some((value) => outputString.toLowerCase().includes(String(value).toLowerCase())) !== inverse;
453
- return {
454
- pass,
455
- score: pass ? 1 : 0,
456
- reason: pass
457
- ? 'Assertion passed'
458
- : `Expected output to ${inverse ? 'not ' : ''}contain one of "${renderedValue.join(', ')}"`,
459
- assertion,
460
- };
461
- }
462
- if (baseType === 'contains-all') {
463
- (0, tiny_invariant_1.default)(renderedValue, '"contains-all" assertion type must have a value');
464
- if (typeof renderedValue === 'string') {
465
- renderedValue = renderedValue.split(',').map((v) => v.trim());
466
- }
467
- (0, tiny_invariant_1.default)(Array.isArray(renderedValue), '"contains-all" assertion type must have an array value');
468
- const missingStrings = renderedValue.filter((value) => !outputString.includes(String(value)));
469
- pass = (missingStrings.length === 0) !== inverse;
470
- return {
471
- pass,
472
- score: pass ? 1 : 0,
473
- reason: pass
474
- ? 'Assertion passed'
475
- : `Expected output to ${inverse ? 'not ' : ''}contain all of [${renderedValue.join(', ')}]. Missing: [${missingStrings.join(', ')}]`,
476
- assertion,
477
- };
478
- }
479
- if (baseType === 'icontains-all') {
480
- (0, tiny_invariant_1.default)(renderedValue, '"icontains-all" assertion type must have a value');
481
- if (typeof renderedValue === 'string') {
482
- renderedValue = renderedValue.split(',').map((v) => v.trim());
483
- }
484
- (0, tiny_invariant_1.default)(Array.isArray(renderedValue), '"icontains-all" assertion type must have an array value');
485
- const missingStrings = renderedValue.filter((value) => !outputString.toLowerCase().includes(String(value).toLowerCase()));
486
- pass = (missingStrings.length === 0) !== inverse;
487
- return {
488
- pass,
489
- score: pass ? 1 : 0,
490
- reason: pass
491
- ? 'Assertion passed'
492
- : `Expected output to ${inverse ? 'not ' : ''}contain all of [${renderedValue.join(', ')}]. Missing: [${missingStrings.join(', ')}]`,
493
- assertion,
494
- };
495
- }
496
- if (baseType === 'regex') {
497
- (0, tiny_invariant_1.default)(renderedValue, '"regex" assertion type must have a string value');
498
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', '"regex" assertion type must have a string value');
499
- const regex = new RegExp(renderedValue);
500
- pass = regex.test(outputString) !== inverse;
501
- return {
502
- pass,
503
- score: pass ? 1 : 0,
504
- reason: pass
505
- ? 'Assertion passed'
506
- : `Expected output to ${inverse ? 'not ' : ''}match regex "${renderedValue}"`,
507
- assertion,
508
- };
509
- }
510
- if (baseType === 'icontains') {
511
- (0, tiny_invariant_1.default)(renderedValue, '"icontains" assertion type must have a string or number value');
512
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string' || typeof renderedValue === 'number', '"icontains" assertion type must have a string or number value');
513
- pass = outputString.toLowerCase().includes(String(renderedValue).toLowerCase()) !== inverse;
514
- return {
515
- pass,
516
- score: pass ? 1 : 0,
517
- reason: pass
518
- ? 'Assertion passed'
519
- : `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
520
- assertion,
521
- };
522
- }
523
- if (baseType === 'starts-with') {
524
- (0, tiny_invariant_1.default)(renderedValue, '"starts-with" assertion type must have a string value');
525
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', '"starts-with" assertion type must have a string value');
526
- pass = outputString.startsWith(String(renderedValue)) !== inverse;
527
- return {
528
- pass,
529
- score: pass ? 1 : 0,
530
- reason: pass
531
- ? 'Assertion passed'
532
- : `Expected output to ${inverse ? 'not ' : ''}start with "${renderedValue}"`,
533
- assertion,
534
- };
535
- }
536
- if (baseType === 'contains-json') {
537
- let errorMessage = 'Expected output to contain valid JSON';
538
- const jsonObjects = (0, json_1.extractJsonObjects)(outputString);
539
- pass = inverse ? jsonObjects.length === 0 : jsonObjects.length > 0;
540
- for (const jsonObject of jsonObjects) {
541
- if (renderedValue) {
542
- let validate;
543
- if (typeof renderedValue === 'string') {
544
- if (renderedValue.startsWith('file://')) {
545
- // Reference the JSON schema from external file
546
- const schema = valueFromScript;
547
- (0, tiny_invariant_1.default)(schema, 'contains-json references a file that does not export a JSON schema');
548
- validate = ajv.compile(schema);
549
- }
550
- else {
551
- const scheme = js_yaml_1.default.load(renderedValue);
552
- validate = ajv.compile(scheme);
553
- }
554
- }
555
- else if (typeof renderedValue === 'object') {
556
- // Value is JSON schema
557
- validate = ajv.compile(renderedValue);
558
- }
559
- else {
560
- throw new Error('contains-json assertion must have a string or object value');
561
- }
562
- pass = validate(jsonObject);
563
- if (pass) {
564
- break;
565
- }
566
- else {
567
- errorMessage = `JSON does not conform to the provided schema. Errors: ${ajv.errorsText(validate.errors)}`;
568
- }
569
- }
570
- }
571
- return {
572
- pass,
573
- score: pass ? 1 : 0,
574
- reason: pass ? 'Assertion passed' : errorMessage,
575
- assertion,
576
- };
577
- }
578
- if (baseType === 'is-valid-openai-tools-call') {
579
- const toolsOutput = output;
580
- if (!Array.isArray(toolsOutput) ||
581
- toolsOutput.length === 0 ||
582
- typeof toolsOutput[0].function.name !== 'string' ||
583
- typeof toolsOutput[0].function.arguments !== 'string') {
584
- return {
585
- pass: false,
586
- score: 0,
587
- reason: `OpenAI did not return a valid-looking tools response: ${JSON.stringify(toolsOutput)}`,
588
- assertion,
589
- };
590
- }
591
- try {
592
- toolsOutput.forEach((toolOutput) => (0, openaiUtil_1.validateFunctionCall)(toolOutput.function, provider.config.tools?.map((tool) => tool.function), test.vars));
593
- return {
594
- pass: true,
595
- score: 1,
596
- reason: 'Assertion passed',
597
- assertion,
598
- };
599
- }
600
- catch (err) {
601
- return {
602
- pass: false,
603
- score: 0,
604
- reason: err.message,
605
- assertion,
606
- };
607
- }
608
- }
609
- if (baseType === 'is-valid-openai-function-call') {
610
- const functionOutput = output;
611
- if (typeof functionOutput !== 'object' ||
612
- typeof functionOutput.name !== 'string' ||
613
- typeof functionOutput.arguments !== 'string') {
614
- return {
615
- pass: false,
616
- score: 0,
617
- reason: `OpenAI did not return a valid-looking function call: ${JSON.stringify(functionOutput)}`,
618
- assertion,
619
- };
620
- }
621
- try {
622
- (0, openaiUtil_1.validateFunctionCall)(functionOutput, provider.config.functions, test.vars);
623
- return {
624
- pass: true,
625
- score: 1,
626
- reason: 'Assertion passed',
627
- assertion,
628
- };
629
- }
630
- catch (err) {
631
- return {
632
- pass: false,
633
- score: 0,
634
- reason: err.message,
635
- assertion,
636
- };
637
- }
638
- }
639
- if (baseType === 'javascript') {
640
- try {
641
- const validateResult = async (result) => {
642
- result = await Promise.resolve(result);
643
- if (typeof result === 'boolean' || typeof result === 'number' || (0, types_1.isGradingResult)(result)) {
644
- return result;
645
- }
646
- else {
647
- throw new Error(`Custom function must return a boolean, number, or GradingResult object. Got type ${typeof result}: ${JSON.stringify(result)}`);
648
- }
649
- };
650
- if (typeof assertion.value === 'function') {
651
- let ret = assertion.value(outputString, context);
652
- ret = await validateResult(ret);
653
- if (!ret.assertion) {
654
- // Populate the assertion object if the custom function didn't return it.
655
- const functionString = assertion.value.toString();
656
- ret.assertion = {
657
- type: 'javascript',
658
- value: functionString.length > 50 ? functionString.slice(0, 50) + '...' : functionString,
659
- };
660
- }
661
- return ret;
662
- }
663
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', 'javascript assertion must have a string value');
664
- /**
665
- * Removes trailing newline from the rendered value.
666
- * This is necessary for handling multi-line string literals in YAML
667
- * that are defined on a single line in the YAML file.
668
- *
669
- * @example
670
- * value: |
671
- * output === 'true'
672
- */
673
- renderedValue = renderedValue.trimEnd();
674
- let result;
675
- if (typeof valueFromScript === 'undefined') {
676
- const functionBody = renderedValue.includes('\n')
677
- ? renderedValue
678
- : `return ${renderedValue}`;
679
- const customFunction = new Function('output', 'context', functionBody);
680
- result = await validateResult(customFunction(output, context));
681
- }
682
- else {
683
- (0, tiny_invariant_1.default)(typeof valueFromScript === 'boolean' ||
684
- typeof valueFromScript === 'number' ||
685
- typeof valueFromScript === 'object', `Javascript assertion script must return a boolean, number, or object (${assertion.value})`);
686
- result = await validateResult(valueFromScript);
687
- }
688
- if (typeof result === 'boolean') {
689
- pass = result !== inverse;
690
- score = pass ? 1 : 0;
691
- }
692
- else if (typeof result === 'number') {
693
- pass = assertion.threshold ? result >= assertion.threshold : result > 0;
694
- score = result;
695
- }
696
- else if (typeof result === 'object') {
697
- return result;
698
- }
699
- else {
700
- throw new Error('Custom function must return a boolean or number');
701
- }
702
- }
703
- catch (err) {
704
- return {
705
- pass: false,
706
- score: 0,
707
- reason: `Custom function threw error: ${err.message}
708
- Stack Trace: ${err.stack}
709
- ${renderedValue}`,
710
- assertion,
711
- };
712
- }
713
- return {
714
- pass,
715
- score,
716
- reason: pass
717
- ? 'Assertion passed'
718
- : `Custom function returned ${inverse ? 'true' : 'false'}
719
- ${renderedValue}`,
720
- assertion,
721
- };
722
- }
723
- if (baseType === 'python') {
724
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', 'python assertion must have a string value');
725
- try {
726
- let result;
727
- if (typeof valueFromScript === 'undefined') {
728
- const isMultiline = renderedValue.includes('\n');
729
- let indentStyle = ' ';
730
- if (isMultiline) {
731
- // Detect the indentation style of the first indented line
732
- const match = renderedValue.match(/^(?!\s*$)\s+/m);
733
- if (match) {
734
- indentStyle = match[0];
735
- }
736
- }
737
- const pythonScript = `import json
738
-
739
- def main(output, context):
740
- ${isMultiline
741
- ? renderedValue
742
- .split('\n')
743
- .map((line) => `${indentStyle}${line}`)
744
- .join('\n')
745
- : ` return ${renderedValue}`}
746
- `;
747
- result = await (0, wrapper_1.runPythonCode)(pythonScript, 'main', [output, context]);
748
- }
749
- else {
750
- result = valueFromScript;
751
- }
752
- if ((typeof result === 'boolean' && result) ||
753
- (typeof result === 'string' && result.toLowerCase() === 'true')) {
754
- pass = true;
755
- score = 1.0;
756
- }
757
- else if ((typeof result === 'boolean' && !result) ||
758
- (typeof result === 'string' && result.toLowerCase() === 'false')) {
759
- pass = false;
760
- score = 0.0;
761
- }
762
- else if (typeof result === 'string' && result.startsWith('{')) {
763
- let parsed;
764
- try {
765
- parsed = JSON.parse(result);
766
- }
767
- catch (err) {
768
- throw new Error(`Invalid JSON: ${err} when parsing result: ${result}`);
769
- }
770
- if (!(0, types_1.isGradingResult)(parsed)) {
771
- throw new Error(`Python assertion must return a boolean, number, or {pass, score, reason} object. Got instead: ${result}`);
772
- }
773
- return parsed;
774
- }
775
- else if (typeof result === 'object') {
776
- if (!(0, types_1.isGradingResult)(result)) {
777
- throw new Error(`Python assertion must return a boolean, number, or {pass, score, reason} object. Got instead:\n${JSON.stringify(result, null, 2)}`);
778
- }
779
- const pythonGradingResult = result;
780
- if (assertion.threshold && pythonGradingResult.score < assertion.threshold) {
781
- pythonGradingResult.pass = false;
782
- pythonGradingResult.reason = `Python score ${pythonGradingResult.score} is less than threshold ${assertion.threshold}`;
783
- }
784
- return {
785
- ...pythonGradingResult,
786
- assertion,
787
- };
788
- }
789
- else {
790
- score = Number.parseFloat(String(result));
791
- pass = assertion.threshold ? score >= assertion.threshold : score > 0;
792
- if (Number.isNaN(score)) {
793
- throw new Error(`Python assertion must return a boolean, number, or {pass, score, reason} object. Instead got:\n${result}`);
794
- }
795
- if (typeof assertion.threshold !== 'undefined' && score < assertion.threshold) {
796
- pass = false;
797
- }
798
- }
799
- }
800
- catch (err) {
801
- return {
802
- pass: false,
803
- score: 0,
804
- reason: `Python code execution failed: ${err.message}`,
805
- assertion,
806
- };
807
- }
808
- return {
809
- pass,
810
- score,
811
- reason: pass
812
- ? 'Assertion passed'
813
- : `Python code returned ${pass ? 'true' : 'false'}\n${assertion.value}`,
814
- assertion,
815
- };
816
- }
817
- if (baseType === 'similar') {
818
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string' || Array.isArray(renderedValue), 'Similarity assertion type must have a string or array of strings value');
819
- if (Array.isArray(renderedValue)) {
820
- let minScore = Infinity;
821
- for (const value of renderedValue) {
822
- const result = await (0, matchers_1.matchesSimilarity)(value, outputString, assertion.threshold || 0.75, inverse, test.options);
823
- if (result.pass) {
824
- return {
825
- assertion,
826
- ...result,
827
- };
828
- }
829
- if (result.score < minScore) {
830
- minScore = result.score;
831
- }
832
- }
833
- return {
834
- assertion,
835
- pass: false,
836
- score: minScore,
837
- reason: `None of the provided values met the similarity threshold`,
838
- };
839
- }
840
- else {
841
- return {
842
- assertion,
843
- ...(await (0, matchers_1.matchesSimilarity)(renderedValue, outputString, assertion.threshold || 0.75, inverse, test.options)),
844
- };
845
- }
846
- }
847
- if (baseType === 'llm-rubric') {
848
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string' || typeof renderedValue === 'undefined', '"llm-rubric" assertion type must have a string value');
849
- if (test.options?.rubricPrompt && typeof test.options.rubricPrompt === 'object') {
850
- test.options.rubricPrompt = JSON.stringify(test.options.rubricPrompt);
851
- }
852
- // Update the assertion value. This allows the web view to display the prompt.
853
- assertion.value = assertion.value || test.options?.rubricPrompt;
854
- return {
855
- assertion,
856
- ...(await (0, matchers_1.matchesLlmRubric)(renderedValue || '', outputString, test.options, test.vars)),
857
- };
858
- }
859
- if (baseType === 'model-graded-factuality' || baseType === 'factuality') {
860
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', 'factuality assertion type must have a string value');
861
- (0, tiny_invariant_1.default)(prompt, 'factuality assertion type must have a prompt');
862
- if (test.options?.rubricPrompt) {
863
- // Substitute vars in prompt
864
- (0, tiny_invariant_1.default)(typeof test.options.rubricPrompt === 'string', 'rubricPrompt must be a string');
865
- test.options.rubricPrompt = nunjucks.renderString(test.options.rubricPrompt, test.vars || {});
866
- }
867
- return {
868
- assertion,
869
- ...(await (0, matchers_1.matchesFactuality)(prompt, renderedValue, outputString, test.options, test.vars)),
870
- };
871
- }
872
- if (baseType === 'model-graded-closedqa') {
873
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', 'model-graded-closedqa assertion type must have a string value');
874
- (0, tiny_invariant_1.default)(prompt, 'model-graded-closedqa assertion type must have a prompt');
875
- if (test.options?.rubricPrompt) {
876
- // Substitute vars in prompt
877
- (0, tiny_invariant_1.default)(typeof test.options.rubricPrompt === 'string', 'rubricPrompt must be a string');
878
- test.options.rubricPrompt = nunjucks.renderString(test.options.rubricPrompt, test.vars || {});
879
- }
880
- return {
881
- assertion,
882
- ...(await (0, matchers_1.matchesClosedQa)(prompt, renderedValue, outputString, test.options, test.vars)),
883
- };
884
- }
885
- if (baseType === 'answer-relevance') {
886
- (0, tiny_invariant_1.default)(typeof output === 'string', 'answer-relevance assertion type must evaluate a string output');
887
- (0, tiny_invariant_1.default)(prompt, 'answer-relevance assertion type must have a prompt');
888
- const input = typeof test.vars?.query === 'string' ? test.vars.query : prompt;
889
- return {
890
- assertion,
891
- ...(await (0, matchers_1.matchesAnswerRelevance)(input, output, assertion.threshold || 0, test.options)),
892
- };
893
- }
894
- if (baseType === 'context-recall') {
895
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', 'context-recall assertion type must have a string value');
896
- (0, tiny_invariant_1.default)(prompt, 'context-recall assertion type must have a prompt');
897
- return {
898
- assertion,
899
- ...(await (0, matchers_1.matchesContextRecall)(typeof test.vars?.context === 'string' ? test.vars.context : prompt, renderedValue, assertion.threshold || 0, test.options, test.vars)),
900
- };
901
- }
902
- if (baseType === 'context-relevance') {
903
- (0, tiny_invariant_1.default)(test.vars, 'context-relevance assertion type must have a vars object');
904
- (0, tiny_invariant_1.default)(typeof test.vars.query === 'string', 'context-relevance assertion type must have a query var');
905
- (0, tiny_invariant_1.default)(typeof test.vars.context === 'string', 'context-relevance assertion type must have a context var');
906
- return {
907
- assertion,
908
- ...(await (0, matchers_1.matchesContextRelevance)(test.vars.query, test.vars.context, assertion.threshold || 0, test.options)),
909
- };
910
- }
911
- if (baseType === 'context-faithfulness') {
912
- (0, tiny_invariant_1.default)(test.vars, 'context-faithfulness assertion type must have a vars object');
913
- (0, tiny_invariant_1.default)(typeof test.vars.query === 'string', 'context-faithfulness assertion type must have a query var');
914
- (0, tiny_invariant_1.default)(typeof test.vars.context === 'string', 'context-faithfulness assertion type must have a context var');
915
- (0, tiny_invariant_1.default)(typeof output === 'string', 'context-faithfulness assertion type must have a string output');
916
- return {
917
- assertion,
918
- ...(await (0, matchers_1.matchesContextFaithfulness)(test.vars.query, output, test.vars.context, assertion.threshold || 0, test.options)),
919
- };
920
- }
921
- if (baseType === 'moderation') {
922
- // Some redteam techniques override the actual prompt that is used, so we need to assess that prompt for moderation.
923
- const promptToModerate = providerResponse.metadata?.redteamFinalPrompt || prompt;
924
- const outputString = typeof output === 'string' ? output : JSON.stringify(output);
925
- (0, tiny_invariant_1.default)(promptToModerate, 'moderation assertion type must have a prompt');
926
- (0, tiny_invariant_1.default)(!assertion.value ||
927
- (Array.isArray(assertion.value) && typeof assertion.value[0] === 'string'), 'moderation assertion value must be a string array if set');
928
- if (promptToModerate[0] === '[' || promptToModerate[0] === '{') {
929
- // Try to extract the last user message from OpenAI-style prompts.
930
- try {
931
- const parsedPrompt = (0, shared_1.parseChatPrompt)(promptToModerate, null);
932
- if (parsedPrompt && parsedPrompt.length > 0) {
933
- prompt = parsedPrompt[parsedPrompt.length - 1].content;
934
- }
935
- }
936
- catch {
937
- // Ignore error
938
- }
939
- }
940
- const moderationResult = await (0, matchers_1.matchesModeration)({
941
- userPrompt: promptToModerate,
942
- assistantResponse: outputString,
943
- categories: Array.isArray(assertion.value) ? assertion.value : [],
944
- }, test.options);
945
- pass = moderationResult.pass;
946
- return {
947
- pass,
948
- score: moderationResult.score,
949
- reason: moderationResult.reason,
950
- assertion,
951
- };
952
- }
953
- if (baseType === 'webhook') {
954
- (0, tiny_invariant_1.default)(renderedValue, '"webhook" assertion type must have a URL value');
955
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', '"webhook" assertion type must have a URL value');
956
- try {
957
- const context = {
958
- prompt,
959
- vars: test.vars || {},
960
- };
961
- const response = await (0, fetch_1.fetchWithRetries)(renderedValue, {
962
- method: 'POST',
963
- headers: {
964
- 'Content-Type': 'application/json',
965
- },
966
- body: JSON.stringify({ output, context }),
967
- }, (0, envars_1.getEnvInt)('WEBHOOK_TIMEOUT', 5000));
968
- if (!response.ok) {
969
- throw new Error(`Webhook response status: ${response.status}`);
970
- }
971
- const jsonResponse = await response.json();
972
- pass = jsonResponse.pass !== inverse;
973
- score =
974
- typeof jsonResponse.score === 'undefined'
975
- ? pass
976
- ? 1
977
- : 0
978
- : inverse
979
- ? 1 - jsonResponse.score
980
- : jsonResponse.score;
981
- const reason = jsonResponse.reason ||
982
- (pass ? 'Assertion passed' : `Webhook returned ${inverse ? 'true' : 'false'}`);
983
- return {
984
- pass,
985
- score,
986
- reason,
987
- assertion,
988
- };
989
- }
990
- catch (err) {
991
- return {
992
- pass: false,
993
- score: 0,
994
- reason: `Webhook error: ${err.message}`,
995
- assertion,
996
- };
997
- }
998
- }
999
- if (baseType === 'rouge-n') {
1000
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', '"rouge" assertion type must be a string value');
1001
- return handleRougeScore(baseType, assertion, renderedValue, outputString, inverse);
1002
- }
1003
- if (baseType === 'levenshtein') {
1004
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string', '"levenshtein" assertion type must have a string value');
1005
- const levDistance = (0, fastest_levenshtein_1.distance)(outputString, renderedValue);
1006
- pass = levDistance <= (assertion.threshold || 5);
1007
- return {
1008
- pass,
1009
- score: pass ? 1 : 0,
1010
- reason: pass
1011
- ? 'Assertion passed'
1012
- : `Levenshtein distance ${levDistance} is greater than threshold ${assertion.threshold || 5}`,
1013
- assertion,
1014
- };
1015
- }
1016
- if (baseType === 'classifier') {
1017
- (0, tiny_invariant_1.default)(typeof renderedValue === 'string' || typeof renderedValue === 'undefined', '"classifier" assertion type must have a string value or be undefined');
1018
- // Assertion provider overrides test provider
1019
- const classificationResult = await (0, matchers_1.matchesClassification)(renderedValue, outputString, assertion.threshold ?? 1, test.options);
1020
- if (inverse) {
1021
- classificationResult.pass = !classificationResult.pass;
1022
- classificationResult.score = 1 - classificationResult.score;
1023
- }
1024
- return {
1025
- assertion,
1026
- ...classificationResult,
1027
- };
1028
- }
1029
- if (baseType === 'latency') {
1030
- if (!assertion.threshold) {
1031
- throw new Error('Latency assertion must have a threshold in milliseconds');
1032
- }
1033
- if (latencyMs === undefined) {
1034
- throw new Error('Latency assertion does not support cached results. Rerun the eval with --no-cache');
1035
- }
1036
- pass = latencyMs <= assertion.threshold;
1037
- return {
1038
- pass,
1039
- score: pass ? 1 : 0,
1040
- reason: pass
1041
- ? 'Assertion passed'
1042
- : `Latency ${latencyMs}ms is greater than threshold ${assertion.threshold}ms`,
1043
- assertion,
1044
- };
1045
- }
1046
- if (baseType === 'perplexity') {
1047
- if (!logProbs || logProbs.length === 0) {
1048
- throw new Error('Perplexity assertion does not support providers that do not return logProbs');
1049
- }
1050
- const sumLogProbs = logProbs.reduce((acc, logProb) => acc + logProb, 0);
1051
- const avgLogProb = sumLogProbs / logProbs.length;
1052
- const perplexity = Math.exp(-avgLogProb);
1053
- pass = assertion.threshold ? perplexity <= assertion.threshold : true;
1054
- return {
1055
- pass,
1056
- score: pass ? 1 : 0,
1057
- reason: pass
1058
- ? 'Assertion passed'
1059
- : `Perplexity ${perplexity.toFixed(2)} is greater than threshold ${assertion.threshold}`,
1060
- assertion,
1061
- };
1062
- }
1063
- if (baseType === 'perplexity-score') {
1064
- if (!logProbs || logProbs.length === 0) {
1065
- throw new Error('perplexity-score assertion does not support providers that do not return logProbs');
1066
- }
1067
- const sumLogProbs = logProbs.reduce((acc, logProb) => acc + logProb, 0);
1068
- const avgLogProb = sumLogProbs / logProbs.length;
1069
- const perplexity = Math.exp(-avgLogProb);
1070
- const perplexityNorm = 1 / (1 + perplexity);
1071
- pass = assertion.threshold ? perplexityNorm >= assertion.threshold : true;
1072
- return {
1073
- pass,
1074
- score: perplexityNorm,
1075
- reason: pass
1076
- ? 'Assertion passed'
1077
- : `Perplexity score ${perplexityNorm.toFixed(2)} is less than threshold ${assertion.threshold}`,
1078
- assertion,
1079
- };
1080
- }
1081
- if (baseType === 'cost') {
1082
- if (!assertion.threshold) {
1083
- throw new Error('Cost assertion must have a threshold');
1084
- }
1085
- if (typeof cost === 'undefined') {
1086
- throw new Error('Cost assertion does not support providers that do not return cost');
1087
- }
1088
- pass = cost <= assertion.threshold;
1089
- return {
1090
- pass,
1091
- score: pass ? 1 : 0,
1092
- reason: pass
1093
- ? 'Assertion passed'
1094
- : `Cost ${cost.toPrecision(2)} is greater than threshold ${assertion.threshold}`,
1095
- assertion,
1096
- };
159
+ const assertionParams = {
160
+ assertion,
161
+ baseType,
162
+ context,
163
+ cost,
164
+ inverse,
165
+ latencyMs,
166
+ logProbs,
167
+ output,
168
+ outputString: (0, utils_1.coerceString)(output),
169
+ prompt,
170
+ provider,
171
+ providerResponse,
172
+ renderedValue,
173
+ test: (0, utils_1.getFinalTest)(test, assertion),
174
+ valueFromScript,
175
+ };
176
+ // Map assertion types to their handler functions>
177
+ const assertionHandlers = {
178
+ 'answer-relevance': answerRelevance_1.handleAnswerRelevance,
179
+ bleu: bleu_1.handleBleuScore,
180
+ classifier: classifier_1.handleClassifier,
181
+ contains: contains_1.handleContains,
182
+ 'contains-all': contains_1.handleContainsAll,
183
+ 'contains-any': contains_1.handleContainsAny,
184
+ 'contains-json': json_1.handleContainsJson,
185
+ 'contains-sql': sql_1.handleContainsSql,
186
+ 'contains-xml': xml_1.handleIsXml,
187
+ 'context-faithfulness': contextFaithfulness_1.handleContextFaithfulness,
188
+ 'context-recall': contextRecall_1.handleContextRecall,
189
+ 'context-relevance': contextRelevance_1.handleContextRelevance,
190
+ cost: cost_1.handleCost,
191
+ equals: equals_1.handleEquals,
192
+ factuality: factuality_1.handleFactuality,
193
+ icontains: contains_1.handleIContains,
194
+ 'icontains-all': contains_1.handleIContainsAll,
195
+ 'icontains-any': contains_1.handleIContainsAny,
196
+ 'is-json': json_1.handleIsJson,
197
+ 'is-sql': sql_1.handleIsSql,
198
+ 'is-valid-openai-function-call': openai_1.handleIsValidOpenAiFunctionCall,
199
+ 'is-valid-openai-tools-call': openai_1.handleIsValidOpenAiToolsCall,
200
+ 'is-xml': xml_1.handleIsXml,
201
+ javascript: javascript_1.handleJavascript,
202
+ latency: latency_1.handleLatency,
203
+ levenshtein: levenshtein_1.handleLevenshtein,
204
+ 'llm-rubric': llmRubric_1.handleLlmRubric,
205
+ 'model-graded-closedqa': modelGradedClosedQa_1.handleModelGradedClosedQa,
206
+ 'model-graded-factuality': factuality_1.handleFactuality,
207
+ moderation: moderation_1.handleModeration,
208
+ perplexity: perplexity_1.handlePerplexity,
209
+ 'perplexity-score': perplexity_1.handlePerplexityScore,
210
+ python: python_1.handlePython,
211
+ regex: regex_1.handleRegex,
212
+ 'rouge-n': rouge_1.handleRougeScore,
213
+ similar: similar_1.handleSimilar,
214
+ 'starts-with': startsWith_1.handleStartsWith,
215
+ webhook: webhook_1.handleWebhook,
216
+ };
217
+ const handler = assertionHandlers[baseType];
218
+ if (handler) {
219
+ return handler(assertionParams);
1097
220
  }
1098
221
  if (baseType.startsWith('promptfoo:redteam:')) {
1099
- const grader = (0, graders_1.getGraderById)(baseType);
1100
- (0, tiny_invariant_1.default)(grader, `Unknown promptfoo grader: ${baseType}`);
1101
- (0, tiny_invariant_1.default)(prompt, `Promptfoo grader ${baseType} must have a prompt`);
1102
- const { grade, rubric, suggestions } = await grader.getResult(prompt, outputString, test, provider, renderedValue);
1103
- return {
1104
- assertion: {
1105
- ...assertion,
1106
- value: rubric,
1107
- },
1108
- ...grade,
1109
- suggestions,
1110
- metadata: {
1111
- // Pass through all test metadata for redteam
1112
- ...test.metadata,
1113
- ...grade.metadata,
1114
- },
1115
- };
222
+ return (0, redteam_1.handleRedteam)(assertionParams);
1116
223
  }
1117
224
  throw new Error('Unknown assertion type: ' + assertion.type);
1118
225
  }