promptfoo 0.66.0 → 0.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. package/README.md +10 -10
  2. package/dist/package.json +6 -4
  3. package/dist/src/assertions/validateAssertions.d.ts.map +1 -1
  4. package/dist/src/assertions/validateAssertions.js +11 -11
  5. package/dist/src/assertions/validateAssertions.js.map +1 -1
  6. package/dist/src/assertions.d.ts +6 -4
  7. package/dist/src/assertions.d.ts.map +1 -1
  8. package/dist/src/assertions.js +138 -165
  9. package/dist/src/assertions.js.map +1 -1
  10. package/dist/src/cache.d.ts.map +1 -1
  11. package/dist/src/cache.js +3 -3
  12. package/dist/src/cache.js.map +1 -1
  13. package/dist/src/checkNodeVersion.d.ts +6 -0
  14. package/dist/src/checkNodeVersion.d.ts.map +1 -0
  15. package/dist/src/checkNodeVersion.js +67 -0
  16. package/dist/src/checkNodeVersion.js.map +1 -0
  17. package/dist/src/commands/config.d.ts.map +1 -1
  18. package/dist/src/commands/config.js +13 -2
  19. package/dist/src/commands/config.js.map +1 -1
  20. package/dist/src/commands/delete.d.ts.map +1 -1
  21. package/dist/src/commands/delete.js +11 -11
  22. package/dist/src/commands/delete.js.map +1 -1
  23. package/dist/src/commands/export.d.ts.map +1 -1
  24. package/dist/src/commands/export.js +1 -1
  25. package/dist/src/commands/export.js.map +1 -1
  26. package/dist/src/commands/import.js +1 -1
  27. package/dist/src/commands/import.js.map +1 -1
  28. package/dist/src/commands/list.d.ts.map +1 -1
  29. package/dist/src/commands/list.js +2 -2
  30. package/dist/src/commands/list.js.map +1 -1
  31. package/dist/src/commands/show.d.ts.map +1 -1
  32. package/dist/src/commands/show.js +66 -66
  33. package/dist/src/commands/show.js.map +1 -1
  34. package/dist/src/constants.d.ts +1 -0
  35. package/dist/src/constants.d.ts.map +1 -1
  36. package/dist/src/constants.js +3 -1
  37. package/dist/src/constants.js.map +1 -1
  38. package/dist/src/csv.d.ts +1 -1
  39. package/dist/src/csv.d.ts.map +1 -1
  40. package/dist/src/csv.js +50 -51
  41. package/dist/src/csv.js.map +1 -1
  42. package/dist/src/database.d.ts +56 -56
  43. package/dist/src/database.d.ts.map +1 -1
  44. package/dist/src/database.js +34 -34
  45. package/dist/src/database.js.map +1 -1
  46. package/dist/src/esm.d.ts.map +1 -1
  47. package/dist/src/esm.js +1 -1
  48. package/dist/src/esm.js.map +1 -1
  49. package/dist/src/evaluator.d.ts +18 -0
  50. package/dist/src/evaluator.d.ts.map +1 -1
  51. package/dist/src/evaluator.js +41 -23
  52. package/dist/src/evaluator.js.map +1 -1
  53. package/dist/src/feedback.d.ts +1 -1
  54. package/dist/src/feedback.d.ts.map +1 -1
  55. package/dist/src/feedback.js +21 -21
  56. package/dist/src/feedback.js.map +1 -1
  57. package/dist/src/fetch.d.ts.map +1 -1
  58. package/dist/src/fetch.js.map +1 -1
  59. package/dist/src/globalConfig.d.ts +0 -4
  60. package/dist/src/globalConfig.d.ts.map +1 -1
  61. package/dist/src/globalConfig.js +5 -5
  62. package/dist/src/globalConfig.js.map +1 -1
  63. package/dist/src/googleSheets.d.ts +1 -1
  64. package/dist/src/googleSheets.d.ts.map +1 -1
  65. package/dist/src/googleSheets.js +16 -16
  66. package/dist/src/googleSheets.js.map +1 -1
  67. package/dist/src/index.d.ts +3 -1
  68. package/dist/src/index.d.ts.map +1 -1
  69. package/dist/src/index.js +4 -4
  70. package/dist/src/index.js.map +1 -1
  71. package/dist/src/integrations/langfuse.js +1 -1
  72. package/dist/src/integrations/langfuse.js.map +1 -1
  73. package/dist/src/main.js +28 -26
  74. package/dist/src/main.js.map +1 -1
  75. package/dist/src/matchers.d.ts.map +1 -1
  76. package/dist/src/matchers.js +8 -7
  77. package/dist/src/matchers.js.map +1 -1
  78. package/dist/src/migrate.d.ts.map +1 -1
  79. package/dist/src/migrate.js +2 -2
  80. package/dist/src/migrate.js.map +1 -1
  81. package/dist/src/onboarding.d.ts.map +1 -1
  82. package/dist/src/onboarding.js +2 -2
  83. package/dist/src/onboarding.js.map +1 -1
  84. package/dist/src/prompts/constants.d.ts +3 -0
  85. package/dist/src/prompts/constants.d.ts.map +1 -0
  86. package/dist/src/prompts/constants.js +16 -0
  87. package/dist/src/prompts/constants.js.map +1 -0
  88. package/dist/src/prompts/external/ragas.d.ts.map +1 -0
  89. package/dist/src/prompts/external/ragas.js.map +1 -0
  90. package/dist/src/{prompts.d.ts → prompts/grading.d.ts} +7 -4
  91. package/dist/src/prompts/grading.d.ts.map +1 -0
  92. package/dist/src/prompts/grading.js +119 -0
  93. package/dist/src/prompts/grading.js.map +1 -0
  94. package/dist/src/prompts/index.d.ts +25 -0
  95. package/dist/src/prompts/index.d.ts.map +1 -0
  96. package/dist/src/prompts/index.js +143 -0
  97. package/dist/src/prompts/index.js.map +1 -0
  98. package/dist/src/prompts/processors/javascript.d.ts +9 -0
  99. package/dist/src/prompts/processors/javascript.d.ts.map +1 -0
  100. package/dist/src/prompts/processors/javascript.js +22 -0
  101. package/dist/src/prompts/processors/javascript.js.map +1 -0
  102. package/dist/src/prompts/processors/json.d.ts +12 -0
  103. package/dist/src/prompts/processors/json.d.ts.map +1 -0
  104. package/dist/src/prompts/processors/json.js +48 -0
  105. package/dist/src/prompts/processors/json.js.map +1 -0
  106. package/dist/src/prompts/processors/jsonl.d.ts +9 -0
  107. package/dist/src/prompts/processors/jsonl.d.ts.map +1 -0
  108. package/dist/src/prompts/processors/jsonl.js +48 -0
  109. package/dist/src/prompts/processors/jsonl.js.map +1 -0
  110. package/dist/src/prompts/processors/python.d.ts +31 -0
  111. package/dist/src/prompts/processors/python.d.ts.map +1 -0
  112. package/dist/src/prompts/processors/python.js +96 -0
  113. package/dist/src/prompts/processors/python.js.map +1 -0
  114. package/dist/src/prompts/processors/string.d.ts +8 -0
  115. package/dist/src/prompts/processors/string.d.ts.map +1 -0
  116. package/dist/src/prompts/processors/string.js +23 -0
  117. package/dist/src/prompts/processors/string.js.map +1 -0
  118. package/dist/src/prompts/processors/text.d.ts +9 -0
  119. package/dist/src/prompts/processors/text.d.ts.map +1 -0
  120. package/dist/src/prompts/processors/text.js +47 -0
  121. package/dist/src/prompts/processors/text.js.map +1 -0
  122. package/dist/src/prompts/processors/yaml.d.ts +13 -0
  123. package/dist/src/prompts/processors/yaml.d.ts.map +1 -0
  124. package/dist/src/prompts/processors/yaml.js +55 -0
  125. package/dist/src/prompts/processors/yaml.js.map +1 -0
  126. package/dist/src/prompts/utils.d.ts +29 -0
  127. package/dist/src/prompts/utils.d.ts.map +1 -0
  128. package/dist/src/prompts/utils.js +143 -0
  129. package/dist/src/prompts/utils.js.map +1 -0
  130. package/dist/src/providers/anthropic.d.ts +1 -1
  131. package/dist/src/providers/anthropic.d.ts.map +1 -1
  132. package/dist/src/providers/anthropic.js +68 -67
  133. package/dist/src/providers/anthropic.js.map +1 -1
  134. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  135. package/dist/src/providers/azureopenai.js +1 -1
  136. package/dist/src/providers/azureopenai.js.map +1 -1
  137. package/dist/src/providers/azureopenaiUtil.d.ts.map +1 -1
  138. package/dist/src/providers/azureopenaiUtil.js +2 -2
  139. package/dist/src/providers/azureopenaiUtil.js.map +1 -1
  140. package/dist/src/providers/bam.d.ts.map +1 -1
  141. package/dist/src/providers/bam.js +1 -1
  142. package/dist/src/providers/bam.js.map +1 -1
  143. package/dist/src/providers/bedrock.d.ts +24 -0
  144. package/dist/src/providers/bedrock.d.ts.map +1 -1
  145. package/dist/src/providers/bedrock.js +146 -46
  146. package/dist/src/providers/bedrock.js.map +1 -1
  147. package/dist/src/providers/cloudflare-ai.d.ts +1 -1
  148. package/dist/src/providers/cloudflare-ai.d.ts.map +1 -1
  149. package/dist/src/providers/cloudflare-ai.js +1 -1
  150. package/dist/src/providers/cloudflare-ai.js.map +1 -1
  151. package/dist/src/providers/cohere.d.ts.map +1 -1
  152. package/dist/src/providers/cohere.js.map +1 -1
  153. package/dist/src/providers/defaults.d.ts +1 -1
  154. package/dist/src/providers/defaults.d.ts.map +1 -1
  155. package/dist/src/providers/defaults.js +2 -2
  156. package/dist/src/providers/defaults.js.map +1 -1
  157. package/dist/src/providers/http.d.ts.map +1 -1
  158. package/dist/src/providers/http.js +12 -12
  159. package/dist/src/providers/http.js.map +1 -1
  160. package/dist/src/providers/huggingface.d.ts +1 -1
  161. package/dist/src/providers/huggingface.d.ts.map +1 -1
  162. package/dist/src/providers/huggingface.js +1 -1
  163. package/dist/src/providers/huggingface.js.map +1 -1
  164. package/dist/src/providers/llama.d.ts.map +1 -1
  165. package/dist/src/providers/llama.js.map +1 -1
  166. package/dist/src/providers/localai.d.ts.map +1 -1
  167. package/dist/src/providers/localai.js +1 -1
  168. package/dist/src/providers/localai.js.map +1 -1
  169. package/dist/src/providers/mistral.d.ts.map +1 -1
  170. package/dist/src/providers/mistral.js +55 -54
  171. package/dist/src/providers/mistral.js.map +1 -1
  172. package/dist/src/providers/ollama.d.ts.map +1 -1
  173. package/dist/src/providers/ollama.js +1 -1
  174. package/dist/src/providers/ollama.js.map +1 -1
  175. package/dist/src/providers/openai.d.ts +1 -1
  176. package/dist/src/providers/openai.d.ts.map +1 -1
  177. package/dist/src/providers/openai.js +124 -118
  178. package/dist/src/providers/openai.js.map +1 -1
  179. package/dist/src/providers/palm.d.ts.map +1 -1
  180. package/dist/src/providers/palm.js +1 -1
  181. package/dist/src/providers/palm.js.map +1 -1
  182. package/dist/src/providers/portkey.d.ts +1 -1
  183. package/dist/src/providers/portkey.d.ts.map +1 -1
  184. package/dist/src/providers/portkey.js.map +1 -1
  185. package/dist/src/providers/promptfoo.d.ts.map +1 -1
  186. package/dist/src/providers/promptfoo.js.map +1 -1
  187. package/dist/src/providers/pythonCompletion.d.ts.map +1 -1
  188. package/dist/src/providers/pythonCompletion.js +2 -2
  189. package/dist/src/providers/pythonCompletion.js.map +1 -1
  190. package/dist/src/providers/replicate.d.ts +17 -1
  191. package/dist/src/providers/replicate.d.ts.map +1 -1
  192. package/dist/src/providers/replicate.js +65 -3
  193. package/dist/src/providers/replicate.js.map +1 -1
  194. package/dist/src/providers/scriptCompletion.d.ts.map +1 -1
  195. package/dist/src/providers/scriptCompletion.js +1 -1
  196. package/dist/src/providers/scriptCompletion.js.map +1 -1
  197. package/dist/src/providers/vertex.d.ts +43 -32
  198. package/dist/src/providers/vertex.d.ts.map +1 -1
  199. package/dist/src/providers/vertex.js +60 -3
  200. package/dist/src/providers/vertex.js.map +1 -1
  201. package/dist/src/providers/voyage.d.ts.map +1 -1
  202. package/dist/src/providers/voyage.js.map +1 -1
  203. package/dist/src/providers/webhook.d.ts.map +1 -1
  204. package/dist/src/providers/webhook.js +1 -1
  205. package/dist/src/providers/webhook.js.map +1 -1
  206. package/dist/src/providers.d.ts +7 -7
  207. package/dist/src/providers.d.ts.map +1 -1
  208. package/dist/src/providers.js +82 -65
  209. package/dist/src/providers.js.map +1 -1
  210. package/dist/src/python/wrapper.d.ts.map +1 -1
  211. package/dist/src/python/wrapper.js +1 -1
  212. package/dist/src/python/wrapper.js.map +1 -1
  213. package/dist/src/redteam/getHijackingTests.d.ts.map +1 -1
  214. package/dist/src/redteam/getHijackingTests.js.map +1 -1
  215. package/dist/src/redteam/index.d.ts +1 -1
  216. package/dist/src/redteam/index.d.ts.map +1 -1
  217. package/dist/src/redteam/index.js +38 -38
  218. package/dist/src/redteam/index.js.map +1 -1
  219. package/dist/src/redteam/iterative.d.ts +3 -0
  220. package/dist/src/redteam/iterative.d.ts.map +1 -1
  221. package/dist/src/redteam/iterative.js +24 -14
  222. package/dist/src/redteam/iterative.js.map +1 -1
  223. package/dist/src/redteam/iterativeImage.d.ts +12 -0
  224. package/dist/src/redteam/iterativeImage.d.ts.map +1 -0
  225. package/dist/src/redteam/iterativeImage.js +227 -0
  226. package/dist/src/redteam/iterativeImage.js.map +1 -0
  227. package/dist/src/share.d.ts.map +1 -1
  228. package/dist/src/share.js +1 -1
  229. package/dist/src/share.js.map +1 -1
  230. package/dist/src/suggestions.d.ts.map +1 -1
  231. package/dist/src/suggestions.js.map +1 -1
  232. package/dist/src/table.d.ts.map +1 -1
  233. package/dist/src/table.js +4 -5
  234. package/dist/src/table.js.map +1 -1
  235. package/dist/src/telemetry.d.ts.map +1 -1
  236. package/dist/src/telemetry.js +1 -1
  237. package/dist/src/telemetry.js.map +1 -1
  238. package/dist/src/testCases.d.ts +1 -1
  239. package/dist/src/testCases.d.ts.map +1 -1
  240. package/dist/src/testCases.js +24 -15
  241. package/dist/src/testCases.js.map +1 -1
  242. package/dist/src/types.d.ts +5 -2
  243. package/dist/src/types.d.ts.map +1 -1
  244. package/dist/src/types.js +5 -5
  245. package/dist/src/types.js.map +1 -1
  246. package/dist/src/updates.js +4 -3
  247. package/dist/src/updates.js.map +1 -1
  248. package/dist/src/util.d.ts +18 -17
  249. package/dist/src/util.d.ts.map +1 -1
  250. package/dist/src/util.js +157 -126
  251. package/dist/src/util.js.map +1 -1
  252. package/dist/src/web/nextui/404/index.html +1 -1
  253. package/dist/src/web/nextui/404.html +1 -1
  254. package/dist/src/web/nextui/_next/static/chunks/2-e4ac60fba7a205e9.js +1 -0
  255. package/dist/src/web/nextui/_next/static/chunks/897-1955b232a2148365.js +32 -0
  256. package/dist/src/web/nextui/_next/static/chunks/app/auth/login/{page-c4a2650ac3a0ecd9.js → page-d932a73274f0f175.js} +1 -1
  257. package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/{page-dd18caf3100d8d0e.js → page-7a8f35189f8bc5b8.js} +1 -1
  258. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-8b6fc67a6c47c793.js +1 -0
  259. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-35bb69e87d17a291.js → page-dff9258a62cdf49e.js} +1 -1
  260. package/dist/src/web/nextui/_next/static/chunks/app/eval/{page-aef3aed32af8d4d7.js → page-7955455d29645096.js} +1 -1
  261. package/dist/src/web/nextui/_next/static/chunks/app/layout-45eacc3320f78daa.js +1 -0
  262. package/dist/src/web/nextui/_next/static/chunks/app/progress/{page-00b5c3308a81af12.js → page-948dc7bcbf53cecf.js} +1 -1
  263. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-3b66a4c23899e662.js +1 -0
  264. package/dist/src/web/nextui/_next/static/chunks/app/report/page-7869eb9950cab8de.js +1 -0
  265. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-2b2257cb43136762.js +1 -0
  266. package/dist/src/web/nextui/_next/static/chunks/{webpack-2fa22c6070dd15bc.js → webpack-525f81bed20c14b6.js} +1 -1
  267. package/dist/src/web/nextui/_next/static/css/036bf4af64e53e86.css +1 -0
  268. package/dist/src/web/nextui/_next/static/css/255fe4bf8eb4c6e9.css +1 -0
  269. package/dist/src/web/nextui/_next/static/css/dab5d695b3657d59.css +1 -0
  270. package/dist/src/web/nextui/_next/static/css/e141e895af3747c6.css +1 -0
  271. package/dist/src/web/nextui/_next/static/css/edcd6f0b6c902fde.css +1 -0
  272. package/dist/src/web/nextui/auth/login/index.html +1 -1
  273. package/dist/src/web/nextui/auth/login/index.txt +6 -6
  274. package/dist/src/web/nextui/auth/signup/index.html +1 -1
  275. package/dist/src/web/nextui/auth/signup/index.txt +6 -6
  276. package/dist/src/web/nextui/datasets/index.html +1 -1
  277. package/dist/src/web/nextui/datasets/index.txt +6 -6
  278. package/dist/src/web/nextui/eval/index.html +1 -1
  279. package/dist/src/web/nextui/eval/index.txt +8 -8
  280. package/dist/src/web/nextui/index.html +1 -1
  281. package/dist/src/web/nextui/index.txt +5 -5
  282. package/dist/src/web/nextui/progress/index.html +1 -1
  283. package/dist/src/web/nextui/progress/index.txt +6 -6
  284. package/dist/src/web/nextui/prompts/index.html +1 -1
  285. package/dist/src/web/nextui/prompts/index.txt +6 -6
  286. package/dist/src/web/nextui/report/index.html +1 -1
  287. package/dist/src/web/nextui/report/index.txt +8 -8
  288. package/dist/src/web/nextui/setup/index.html +2 -2
  289. package/dist/src/web/nextui/setup/index.txt +9 -9
  290. package/dist/src/web/server.d.ts.map +1 -1
  291. package/dist/src/web/server.js +10 -10
  292. package/dist/src/web/server.js.map +1 -1
  293. package/package.json +6 -4
  294. package/dist/src/external/ragas.d.ts.map +0 -1
  295. package/dist/src/external/ragas.js.map +0 -1
  296. package/dist/src/prompts.d.ts.map +0 -1
  297. package/dist/src/prompts.js +0 -391
  298. package/dist/src/prompts.js.map +0 -1
  299. package/dist/src/web/nextui/_next/static/chunks/2-60ab1c881a240da6.js +0 -1
  300. package/dist/src/web/nextui/_next/static/chunks/94-c07f30271fa4d8e4.js +0 -32
  301. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-9426b519d4be1fdb.js +0 -1
  302. package/dist/src/web/nextui/_next/static/chunks/app/layout-dfda5ed5ef745c2d.js +0 -1
  303. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-ee610cffca4b965b.js +0 -1
  304. package/dist/src/web/nextui/_next/static/chunks/app/report/page-1b97ddc1b365a121.js +0 -1
  305. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-e1c49ea6fe7c04c5.js +0 -1
  306. package/dist/src/web/nextui/_next/static/css/16c1dd82fc87c9d7.css +0 -1
  307. package/dist/src/web/nextui/_next/static/css/451beaa5570cb9d3.css +0 -1
  308. package/dist/src/web/nextui/_next/static/css/51a17e8edcdfdbb2.css +0 -1
  309. package/dist/src/web/nextui/_next/static/css/51f7d6933894a4f8.css +0 -1
  310. package/dist/src/web/nextui/_next/static/css/e9f25719d0b14939.css +0 -1
  311. /package/dist/src/{external → prompts/external}/ragas.d.ts +0 -0
  312. /package/dist/src/{external → prompts/external}/ragas.js +0 -0
  313. /package/dist/src/web/nextui/_next/static/{lMO8mRWL6KkcjtN4Giq14 → 82qlai1jFeoFILGwnRAJx}/_buildManifest.js +0 -0
  314. /package/dist/src/web/nextui/_next/static/{lMO8mRWL6KkcjtN4Giq14 → 82qlai1jFeoFILGwnRAJx}/_ssgManifest.js +0 -0
package/README.md CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  [![npm](https://img.shields.io/npm/v/promptfoo)](https://npmjs.com/package/promptfoo)
4
4
  [![npm](https://img.shields.io/npm/dm/promptfoo)](https://npmjs.com/package/promptfoo)
5
- [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/typpo/promptfoo/main.yml)](https://github.com/typpo/promptfoo/actions/workflows/main.yml)
6
- ![MIT license](https://img.shields.io/github/license/typpo/promptfoo)
5
+ [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/typpo/promptfoo/main.yml)](https://github.com/promptfoo/promptfoo/actions/workflows/main.yml)
6
+ ![MIT license](https://img.shields.io/github/license/promptfoo/promptfoo)
7
7
  [![Discord](https://dcbadge.vercel.app/api/server/gHPS9jjfbs?style=flat&compact=true)](https://discord.gg/gHPS9jjfbs)
8
8
 
9
9
  `promptfoo` is a tool for testing and evaluating LLM apps.
@@ -30,7 +30,7 @@ promptfoo produces matrix views that let you quickly evaluate outputs across man
30
30
 
31
31
  It works on the command line too:
32
32
 
33
- ![Prompt evaluation](https://github.com/typpo/promptfoo/assets/310310/480e1114-d049-40b9-bd5f-f81c15060284)
33
+ ![Prompt evaluation](https://github.com/promptfoo/promptfoo/assets/310310/480e1114-d049-40b9-bd5f-f81c15060284)
34
34
 
35
35
  ## Why choose promptfoo?
36
36
 
@@ -52,7 +52,7 @@ As you explore modifications to the prompt, use `promptfoo eval` to rate all out
52
52
 
53
53
  As you collect more examples and establish a user feedback loop, continue to build the pool of test cases.
54
54
 
55
- <img width="772" alt="LLM ops" src="https://github.com/typpo/promptfoo/assets/310310/cf0461a7-2832-4362-9fbb-4ebd911d06ff">
55
+ <img width="772" alt="LLM ops" src="https://github.com/promptfoo/promptfoo/assets/310310/cf0461a7-2832-4362-9fbb-4ebd911d06ff">
56
56
 
57
57
  ## Usage
58
58
 
@@ -161,7 +161,7 @@ providers: [openai:gpt-3.5-turbo]
161
161
  tests: tests.csv
162
162
  ```
163
163
 
164
- See [example CSV](https://github.com/typpo/promptfoo/blob/main/examples/simple-test/tests.csv).
164
+ See [example CSV](https://github.com/promptfoo/promptfoo/blob/main/examples/simple-test/tests.csv).
165
165
 
166
166
  ### Command-line
167
167
 
@@ -169,7 +169,7 @@ If you're looking to customize your usage, you have a wide set of parameters at
169
169
 
170
170
  | Option | Description |
171
171
  | ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
172
- | `-p, --prompts <paths...>` | Paths to [prompt files](https://www.promptfoo.dev/docs/configuration/parameters#prompt-files), directory, or glob |
172
+ | `-p, --prompts <paths...>` | Paths to [prompt files](https://www.promptfoo.dev/docs/configuration/parameters#prompts), directory, or glob |
173
173
  | `-r, --providers <name or path...>` | One of: openai:chat, openai:completion, openai:model-name, localai:chat:model-name, localai:completion:model-name. See [API providers][providers-docs] |
174
174
  | `-o, --output <path>` | Path to [output file](https://www.promptfoo.dev/docs/configuration/parameters#output-file) (csv, json, yaml, html) |
175
175
  | `--tests <path>` | Path to [external test file](https://www.promptfoo.dev/docs/configurationexpected-outputsassertions#load-an-external-tests-file) |
@@ -190,7 +190,7 @@ npx promptfoo view
190
190
 
191
191
  #### Prompt quality
192
192
 
193
- In [this example](https://github.com/typpo/promptfoo/tree/main/examples/assistant-cli), we evaluate whether adding adjectives to the personality of an assistant bot affects the responses:
193
+ In [this example](https://github.com/promptfoo/promptfoo/tree/main/examples/assistant-cli), we evaluate whether adding adjectives to the personality of an assistant bot affects the responses:
194
194
 
195
195
  ```
196
196
  npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo -t tests.csv
@@ -204,13 +204,13 @@ npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo -t tests.csv
204
204
 
205
205
  This command will evaluate the prompts in `prompts.txt`, substituting the variable values from `vars.csv`, and output results in your terminal.
206
206
 
207
- You can also output a nice [spreadsheet](https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing), [JSON](https://github.com/typpo/promptfoo/blob/main/examples/simple-cli/output.json), YAML, or an HTML file:
207
+ You can also output a nice [spreadsheet](https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing), [JSON](https://github.com/promptfoo/promptfoo/blob/main/examples/simple-cli/output.json), YAML, or an HTML file:
208
208
 
209
209
  ![Table output](https://user-images.githubusercontent.com/310310/235483444-4ddb832d-e103-4b9c-a862-b0d6cc11cdc0.png)
210
210
 
211
211
  #### Model quality
212
212
 
213
- In the [next example](https://github.com/typpo/promptfoo/tree/main/examples/gpt-3.5-vs-4), we evaluate the difference between GPT 3 and GPT 4 outputs for a given prompt:
213
+ In the [next example](https://github.com/promptfoo/promptfoo/tree/main/examples/gpt-3.5-vs-4), we evaluate the difference between GPT 3 and GPT 4 outputs for a given prompt:
214
214
 
215
215
  ```
216
216
  npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo openai:gpt-4 -o output.html
@@ -302,7 +302,7 @@ const results = await promptfoo.evaluate({
302
302
 
303
303
  This code imports the `promptfoo` library, defines the evaluation options, and then calls the `evaluate` function with these options.
304
304
 
305
- See the full example [here](https://github.com/typpo/promptfoo/tree/main/examples/simple-import), which includes an example results object.
305
+ See the full example [here](https://github.com/promptfoo/promptfoo/tree/main/examples/simple-import), which includes an example results object.
306
306
 
307
307
  ## Configuration
308
308
 
package/dist/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.66.0",
5
+ "version": "0.68.0",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "repository": "promptfoo/promptfoo",
@@ -23,7 +23,7 @@
23
23
  "dist"
24
24
  ],
25
25
  "engines": {
26
- "node": ">=18"
26
+ "node": ">=18.0.0"
27
27
  },
28
28
  "bin": {
29
29
  "promptfoo": "dist/src/main.js"
@@ -49,12 +49,12 @@
49
49
  "prepublishOnly": "npm run build:clean && npm run build"
50
50
  },
51
51
  "peerDependencies": {
52
- "@aws-sdk/client-bedrock-runtime": "^3.458.0",
52
+ "@aws-sdk/client-bedrock-runtime": "^3.602.0",
53
53
  "@azure/identity": "^4.0.0",
54
54
  "@azure/openai-assistants": "^1.0.0-beta.5",
55
55
  "@ibm-generative-ai/node-sdk": "^2.0.6",
56
+ "@smithy/node-http-handler": "^3.1.1",
56
57
  "google-auth-library": "^9.7.0",
57
- "googleapis": "^134.0.0",
58
58
  "langfuse": "^3.7.0",
59
59
  "node-sql-parser": "^5.2.0"
60
60
  },
@@ -65,6 +65,7 @@
65
65
  "@swc/cli": "^0.3.12",
66
66
  "@swc/core": "^1.6.1",
67
67
  "@swc/jest": "^0.2.36",
68
+ "@trivago/prettier-plugin-sort-imports": "^4.3.0",
68
69
  "@types/async": "^3.2.24",
69
70
  "@types/better-sqlite3": "^7.6.10",
70
71
  "@types/cache-manager": "^4.0.6",
@@ -103,6 +104,7 @@
103
104
  "dependencies": {
104
105
  "@anthropic-ai/sdk": "^0.24.0",
105
106
  "@apidevtools/json-schema-ref-parser": "^11.6.4",
107
+ "@googleapis/sheets": "^8.0.0",
106
108
  "ajv": "^8.16.0",
107
109
  "ajv-formats": "^2.1.1",
108
110
  "async": "^3.2.5",
@@ -1 +1 @@
1
- {"version":3,"file":"validateAssertions.d.ts","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEpC,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ;CAMhD;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,EAAE,CAAC,CAAC,EAAE,QAU/F"}
1
+ {"version":3,"file":"validateAssertions.d.ts","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEpC,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ;CAMhD;AAgBD,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,EAAE,CAAC,CAAC,EAAE,QAU/F"}
@@ -9,6 +9,17 @@ class AssertValiationError extends Error {
9
9
  }
10
10
  }
11
11
  exports.AssertValiationError = AssertValiationError;
12
+ function validateAssertSet(assertion, test) {
13
+ if (!('assert' in assertion)) {
14
+ throw new AssertValiationError('assert-set must have an `assert` property', test);
15
+ }
16
+ if (!Array.isArray(assertion.assert)) {
17
+ throw new AssertValiationError('assert-set `assert` must be an array of assertions', test);
18
+ }
19
+ if (assertion.assert.some((assertion) => assertion.type === 'assert-set')) {
20
+ throw new AssertValiationError('assert-set must not have child assert-sets', test);
21
+ }
22
+ }
12
23
  function validateAssertions(tests) {
13
24
  for (const test of tests) {
14
25
  if (test.assert) {
@@ -21,15 +32,4 @@ function validateAssertions(tests) {
21
32
  }
22
33
  }
23
34
  exports.validateAssertions = validateAssertions;
24
- function validateAssertSet(assertion, test) {
25
- if (!('assert' in assertion)) {
26
- throw new AssertValiationError('assert-set must have an `assert` property', test);
27
- }
28
- if (!Array.isArray(assertion.assert)) {
29
- throw new AssertValiationError('assert-set `assert` must be an array of assertions', test);
30
- }
31
- if (assertion.assert.some((assertion) => assertion.type === 'assert-set')) {
32
- throw new AssertValiationError('assert-set must not have child assert-sets', test);
33
- }
34
- }
35
35
  //# sourceMappingURL=validateAssertions.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"validateAssertions.js","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":";;;AAEA,MAAa,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe,EAAE,QAAkB;QAC7C,MAAM,mBAAmB,GAAG,QAAQ,CAAC,WAAW,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAE7E,KAAK,CAAC,GAAG,OAAO,SAAS,mBAAmB,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAPD,oDAOC;AAED,SAAgB,kBAAkB,CAAC,KAA6D;IAC9F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpC,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBACpC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAVD,gDAUC;AAED,SAAS,iBAAiB,CAAC,SAAiB,EAAE,IAAc;IAC1D,IAAI,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,oBAAoB,CAAC,2CAA2C,EAAE,IAAI,CAAC,CAAC;IACpF,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;QACrC,MAAM,IAAI,oBAAoB,CAAC,oDAAoD,EAAE,IAAI,CAAC,CAAC;IAC7F,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,YAAY,CAAC,EAAE,CAAC;QAC1E,MAAM,IAAI,oBAAoB,CAAC,4CAA4C,EAAE,IAAI,CAAC,CAAC;IACrF,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"validateAssertions.js","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":";;;AAEA,MAAa,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe,EAAE,QAAkB;QAC7C,MAAM,mBAAmB,GAAG,QAAQ,CAAC,WAAW,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAE7E,KAAK,CAAC,GAAG,OAAO,SAAS,mBAAmB,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAPD,oDAOC;AAED,SAAS,iBAAiB,CAAC,SAAiB,EAAE,IAAc;IAC1D,IAAI,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,oBAAoB,CAAC,2CAA2C,EAAE,IAAI,CAAC,CAAC;IACpF,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;QACrC,MAAM,IAAI,oBAAoB,CAAC,oDAAoD,EAAE,IAAI,CAAC,CAAC;IAC7F,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,YAAY,CAAC,EAAE,CAAC;QAC1E,MAAM,IAAI,oBAAoB,CAAC,4CAA4C,EAAE,IAAI,CAAC,CAAC;IACrF,CAAC;AACH,CAAC;AAED,SAAgB,kBAAkB,CAAC,KAA6D;IAC9F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpC,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBACpC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAVD,gDAUC"}
@@ -1,29 +1,31 @@
1
1
  import { matchesSimilarity, matchesLlmRubric, matchesFactuality, matchesClosedQa, matchesClassification, matchesAnswerRelevance, matchesContextRecall, matchesContextRelevance, matchesContextFaithfulness, matchesSelectBest, matchesModeration } from './matchers';
2
2
  import { type ApiProvider, type Assertion, type AssertionType, type AtomicTestCase, type GradingResult, AssertionValue } from './types';
3
3
  export declare const MODEL_GRADED_ASSERTION_TYPES: Set<AssertionType>;
4
- export declare function runAssertions({ prompt, provider, test, output, latencyMs, logProbs, cost, }: {
4
+ export declare function isSql(outputString: string, renderedValue: AssertionValue | undefined, inverse: boolean, assertion: Assertion): Promise<GradingResult>;
5
+ export declare function runAssertion({ prompt, provider, assertion, test, output, latencyMs, logProbs, cost, }: {
5
6
  prompt?: string;
6
7
  provider?: ApiProvider;
8
+ assertion: Assertion;
7
9
  test: AtomicTestCase;
8
10
  output: string | object;
9
11
  latencyMs?: number;
10
12
  logProbs?: number[];
11
13
  cost?: number;
12
14
  }): Promise<GradingResult>;
13
- export declare function runAssertion({ prompt, provider, assertion, test, output, latencyMs, logProbs, cost, }: {
15
+ export declare function runAssertions({ prompt, provider, test, output, latencyMs, logProbs, cost, }: {
14
16
  prompt?: string;
15
17
  provider?: ApiProvider;
16
- assertion: Assertion;
17
18
  test: AtomicTestCase;
18
19
  output: string | object;
19
20
  latencyMs?: number;
20
21
  logProbs?: number[];
21
22
  cost?: number;
22
23
  }): Promise<GradingResult>;
23
- export declare function isSql(outputString: string, renderedValue: AssertionValue | undefined, inverse: boolean, assertion: Assertion): Promise<GradingResult>;
24
24
  export declare function runCompareAssertion(test: AtomicTestCase, assertion: Assertion, outputs: string[]): Promise<GradingResult[]>;
25
25
  export declare function readAssertions(filePath: string): Promise<Assertion[]>;
26
26
  declare const _default: {
27
+ runAssertion: typeof runAssertion;
28
+ runAssertions: typeof runAssertions;
27
29
  matchesSimilarity: typeof matchesSimilarity;
28
30
  matchesClassification: typeof matchesClassification;
29
31
  matchesLlmRubric: typeof matchesLlmRubric;
@@ -1 +1 @@
1
- {"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/assertions.ts"],"names":[],"mappings":"AAkBA,OAAO,EACL,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,sBAAsB,EACtB,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,aAAa,EAGlB,cAAc,EACf,MAAM,SAAS,CAAC;AASjB,eAAO,MAAM,4BAA4B,oBASvC,CAAC;AAqDH,wBAAsB,aAAa,CAAC,EAClC,MAAM,EACN,QAAQ,EACR,IAAI,EACJ,MAAM,EACN,SAAS,EACT,QAAQ,EACR,IAAI,GACL,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,IAAI,EAAE,cAAc,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,OAAO,CAAC,aAAa,CAAC,CAoFzB;AAED,wBAAsB,YAAY,CAAC,EACjC,MAAM,EACN,QAAQ,EACR,SAAS,EACT,IAAI,EACJ,MAAM,EACN,SAAS,EACT,QAAQ,EACR,IAAI,GACL,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,SAAS,EAAE,SAAS,CAAC;IACrB,IAAI,EAAE,cAAc,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,OAAO,CAAC,aAAa,CAAC,CAqhCzB;AAgCD,wBAAsB,KAAK,CACzB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,cAAc,GAAG,SAAS,EACzC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,aAAa,CAAC,CA2ExB;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,cAAc,EACpB,SAAS,EAAE,SAAS,EACpB,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,aAAa,EAAE,CAAC,CAe1B;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAU3E;;;;;;;;;;;;;;AAGD,wBAYE"}
1
+ {"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/assertions.ts"],"names":[],"mappings":"AAiBA,OAAO,EACL,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,sBAAsB,EACtB,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,aAAa,EAGlB,cAAc,EACf,MAAM,SAAS,CAAC;AAOjB,eAAO,MAAM,4BAA4B,oBASvC,CAAC;AAqDH,wBAAsB,KAAK,CACzB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,cAAc,GAAG,SAAS,EACzC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,aAAa,CAAC,CA2ExB;AAED,wBAAsB,YAAY,CAAC,EACjC,MAAM,EACN,QAAQ,EACR,SAAS,EACT,IAAI,EACJ,MAAM,EACN,SAAS,EACT,QAAQ,EACR,IAAI,GACL,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,SAAS,EAAE,SAAS,CAAC;IACrB,IAAI,EAAE,cAAc,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,OAAO,CAAC,aAAa,CAAC,CAqhCzB;AAED,wBAAsB,aAAa,CAAC,EAClC,MAAM,EACN,QAAQ,EACR,IAAI,EACJ,MAAM,EACN,SAAS,EACT,QAAQ,EACR,IAAI,GACL,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,IAAI,EAAE,cAAc,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,OAAO,CAAC,aAAa,CAAC,CAoFzB;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,cAAc,EACpB,SAAS,EAAE,SAAS,EACpB,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,aAAa,EAAE,CAAC,CAe1B;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAU3E;;;;;;;;;;;;;;;;AAGD,wBAcE"}
@@ -26,30 +26,30 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
26
26
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.readAssertions = exports.runCompareAssertion = exports.isSql = exports.runAssertion = exports.runAssertions = exports.MODEL_GRADED_ASSERTION_TYPES = void 0;
29
+ exports.readAssertions = exports.runCompareAssertion = exports.runAssertions = exports.runAssertion = exports.isSql = exports.MODEL_GRADED_ASSERTION_TYPES = void 0;
30
+ const ajv_1 = __importDefault(require("ajv"));
31
+ const ajv_formats_1 = __importDefault(require("ajv-formats"));
32
+ const async_1 = __importDefault(require("async"));
33
+ const fastest_levenshtein_1 = require("fastest-levenshtein");
30
34
  const fs_1 = __importDefault(require("fs"));
31
- const path_1 = __importDefault(require("path"));
35
+ const js_yaml_1 = __importDefault(require("js-yaml"));
32
36
  const node_util_1 = __importDefault(require("node:util"));
33
- const async_1 = __importDefault(require("async"));
37
+ const path_1 = __importDefault(require("path"));
38
+ const rfdc_1 = __importDefault(require("rfdc"));
34
39
  const rouge_1 = __importDefault(require("rouge"));
35
40
  const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
36
- const js_yaml_1 = __importDefault(require("js-yaml"));
37
- const ajv_1 = __importDefault(require("ajv"));
38
- const ajv_formats_1 = __importDefault(require("ajv-formats"));
39
- const rfdc_1 = __importDefault(require("rfdc"));
40
- const fastest_levenshtein_1 = require("fastest-levenshtein");
41
+ const AssertionsResult_1 = require("./assertions/AssertionsResult");
41
42
  const cliState_1 = __importDefault(require("./cliState"));
42
- const telemetry_1 = __importDefault(require("./telemetry"));
43
- const logger_1 = __importDefault(require("./logger"));
43
+ const esm_1 = require("./esm");
44
44
  const fetch_1 = require("./fetch");
45
- const util_1 = require("./util");
45
+ const logger_1 = __importDefault(require("./logger"));
46
46
  const matchers_1 = require("./matchers");
47
47
  const openaiUtil_1 = require("./providers/openaiUtil");
48
+ const shared_1 = require("./providers/shared");
48
49
  const wrapper_1 = require("./python/wrapper");
49
- const esm_1 = require("./esm");
50
+ const telemetry_1 = __importDefault(require("./telemetry"));
50
51
  const types_1 = require("./types");
51
- const AssertionsResult_1 = require("./assertions/AssertionsResult");
52
- const shared_1 = require("./providers/shared");
52
+ const util_1 = require("./util");
53
53
  const ASSERTIONS_MAX_CONCURRENCY = process.env.PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY
54
54
  ? parseInt(process.env.PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY, 10)
55
55
  : 3;
@@ -96,71 +96,68 @@ function handleRougeScore(baseType, assertion, expected, output, inverted) {
96
96
  assertion,
97
97
  };
98
98
  }
99
- async function runAssertions({ prompt, provider, test, output, latencyMs, logProbs, cost, }) {
100
- if (!test.assert || test.assert.length < 1) {
101
- return AssertionsResult_1.AssertionsResult.noAssertsResult();
99
+ async function isSql(outputString, renderedValue, inverse, assertion) {
100
+ let pass = false;
101
+ let parsedSql;
102
+ let databaseType = 'MySQL';
103
+ let whiteTableList;
104
+ let whiteColumnList;
105
+ if (renderedValue && typeof renderedValue === 'object') {
106
+ const value = renderedValue;
107
+ databaseType = value.database || 'MySQL';
108
+ whiteTableList = value.allowedTables;
109
+ whiteColumnList = value.allowedColumns;
102
110
  }
103
- const mainAssertResult = new AssertionsResult_1.AssertionsResult({
104
- threshold: test.threshold,
111
+ if (renderedValue && typeof renderedValue !== 'object') {
112
+ throw new Error('is-sql assertion must have a object value.');
113
+ }
114
+ const { Parser: SqlParser } = await Promise.resolve().then(() => __importStar(require('node-sql-parser'))).catch(() => {
115
+ throw new Error('node-sql-parser is not installed. Please install it first');
105
116
  });
106
- const subAssertResults = [];
107
- const asserts = test.assert
108
- .map((assertion, i) => {
109
- if (assertion.type === 'assert-set') {
110
- const subAssertResult = new AssertionsResult_1.AssertionsResult({
111
- threshold: assertion.threshold,
112
- parentAssertionSet: {
113
- assertionSet: assertion,
114
- index: i,
115
- },
116
- });
117
- subAssertResults.push(subAssertResult);
118
- return assertion.assert.map((subAssert, j) => {
119
- return {
120
- assertion: subAssert,
121
- assertResult: subAssertResult,
122
- index: j,
123
- };
124
- });
117
+ const sqlParser = new SqlParser();
118
+ const opt = { database: databaseType };
119
+ const failureReasons = [];
120
+ try {
121
+ parsedSql = sqlParser.astify(outputString, opt);
122
+ pass = !inverse;
123
+ }
124
+ catch (err) {
125
+ pass = inverse;
126
+ failureReasons.push(`SQL statement does not conform to the provided ${databaseType} database syntax.`);
127
+ }
128
+ if (whiteTableList) {
129
+ opt.type = 'table';
130
+ try {
131
+ sqlParser.whiteListCheck(outputString, whiteTableList, opt);
125
132
  }
126
- return { assertion, assertResult: mainAssertResult, index: i };
127
- })
128
- .flat();
129
- await async_1.default.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
130
- if (assertion.type.startsWith('select-')) {
131
- // Select-type assertions are handled separately because they depend on multiple outputs.
132
- return;
133
+ catch (err) {
134
+ pass = inverse;
135
+ const error = err;
136
+ failureReasons.push(`SQL validation failed: ${error.message}.`);
133
137
  }
134
- const result = await runAssertion({
135
- prompt,
136
- provider,
137
- assertion,
138
- test,
139
- output,
140
- latencyMs,
141
- logProbs,
142
- cost,
143
- });
144
- assertResult.addResult({
145
- index,
146
- result,
147
- metric: assertion.metric,
148
- weight: assertion.weight,
149
- });
150
- });
151
- subAssertResults.forEach((subAssertResult) => {
152
- const result = subAssertResult.testResult();
153
- const { index, assertionSet: { metric, weight }, } = subAssertResult.parentAssertionSet;
154
- mainAssertResult.addResult({
155
- index,
156
- result,
157
- metric,
158
- weight,
159
- });
160
- });
161
- return mainAssertResult.testResult();
138
+ }
139
+ if (whiteColumnList) {
140
+ opt.type = 'column';
141
+ try {
142
+ sqlParser.whiteListCheck(outputString, whiteColumnList, opt);
143
+ }
144
+ catch (err) {
145
+ pass = inverse;
146
+ const error = err;
147
+ failureReasons.push(`SQL validation failed: ${error.message}.`);
148
+ }
149
+ }
150
+ if (inverse && pass === false && failureReasons.length === 0) {
151
+ failureReasons.push('The output SQL statement is valid');
152
+ }
153
+ return {
154
+ pass,
155
+ score: pass ? 1 : 0,
156
+ reason: pass ? 'Assertion passed' : failureReasons.join(' '),
157
+ assertion,
158
+ };
162
159
  }
163
- exports.runAssertions = runAssertions;
160
+ exports.isSql = isSql;
164
161
  async function runAssertion({ prompt, provider, assertion, test, output, latencyMs, logProbs, cost, }) {
165
162
  let pass = false;
166
163
  let score = 0.0;
@@ -441,10 +438,10 @@ async function runAssertion({ prompt, provider, assertion, test, output, latency
441
438
  }
442
439
  if (baseType === 'contains-json') {
443
440
  let errorMessage = 'Expected output to contain valid JSON';
444
- const jsonOutputs = containsJSON(outputString);
445
- for (const jsonMatch of jsonOutputs) {
446
- pass = jsonMatch !== inverse;
447
- if (pass && renderedValue) {
441
+ const jsonObjects = (0, util_1.extractJsonObjects)(outputString);
442
+ pass = inverse ? jsonObjects.length === 0 : jsonObjects.length > 0;
443
+ for (const jsonObject of jsonObjects) {
444
+ if (renderedValue) {
448
445
  let validate;
449
446
  if (typeof renderedValue === 'string') {
450
447
  if (renderedValue.startsWith('file://')) {
@@ -465,7 +462,7 @@ async function runAssertion({ prompt, provider, assertion, test, output, latency
465
462
  else {
466
463
  throw new Error('contains-json assertion must have a string or object value');
467
464
  }
468
- pass = validate(jsonMatch);
465
+ pass = validate(jsonObject);
469
466
  if (pass) {
470
467
  break;
471
468
  }
@@ -996,97 +993,71 @@ ${isMultiline
996
993
  throw new Error('Unknown assertion type: ' + assertion.type);
997
994
  }
998
995
  exports.runAssertion = runAssertion;
999
- function containsJSON(str) {
1000
- // This will extract all json objects from a string
1001
- const jsonObjects = [];
1002
- let openBracket = str.indexOf('{');
1003
- let closeBracket = str.indexOf('}', openBracket);
1004
- // Iterate over the string until we find a valid JSON-like pattern
1005
- // Iterate over all trailing } until the contents parse as json
1006
- while (openBracket !== -1) {
1007
- const jsonStr = str.slice(openBracket, closeBracket + 1);
1008
- try {
1009
- jsonObjects.push(JSON.parse(jsonStr));
1010
- // This is a valid JSON object, so start looking for
1011
- // an opening bracket after the last closing bracket
1012
- openBracket = str.indexOf('{', closeBracket + 1);
1013
- closeBracket = str.indexOf('}', openBracket);
1014
- }
1015
- catch (err) {
1016
- // Not a valid object, move on to the next closing bracket
1017
- closeBracket = str.indexOf('}', closeBracket + 1);
1018
- while (closeBracket === -1) {
1019
- // No closing brackets made a valid json object, so
1020
- // start looking with the next opening bracket
1021
- openBracket = str.indexOf('{', openBracket + 1);
1022
- closeBracket = str.indexOf('}', openBracket);
1023
- }
1024
- }
1025
- }
1026
- return jsonObjects;
1027
- }
1028
- async function isSql(outputString, renderedValue, inverse, assertion) {
1029
- let pass = false;
1030
- let parsedSql;
1031
- let databaseType = 'MySQL';
1032
- let whiteTableList;
1033
- let whiteColumnList;
1034
- if (renderedValue && typeof renderedValue === 'object') {
1035
- const value = renderedValue;
1036
- databaseType = value.database || 'MySQL';
1037
- whiteTableList = value.allowedTables;
1038
- whiteColumnList = value.allowedColumns;
1039
- }
1040
- if (renderedValue && typeof renderedValue !== 'object') {
1041
- throw new Error('is-sql assertion must have a object value.');
996
+ async function runAssertions({ prompt, provider, test, output, latencyMs, logProbs, cost, }) {
997
+ if (!test.assert || test.assert.length < 1) {
998
+ return AssertionsResult_1.AssertionsResult.noAssertsResult();
1042
999
  }
1043
- const { Parser: SqlParser } = await Promise.resolve().then(() => __importStar(require('node-sql-parser'))).catch(() => {
1044
- throw new Error('node-sql-parser is not installed. Please install it first');
1000
+ const mainAssertResult = new AssertionsResult_1.AssertionsResult({
1001
+ threshold: test.threshold,
1045
1002
  });
1046
- const sqlParser = new SqlParser();
1047
- const opt = { database: databaseType };
1048
- const failureReasons = [];
1049
- try {
1050
- parsedSql = sqlParser.astify(outputString, opt);
1051
- pass = !inverse;
1052
- }
1053
- catch (err) {
1054
- pass = inverse;
1055
- failureReasons.push(`SQL statement does not conform to the provided ${databaseType} database syntax.`);
1056
- }
1057
- if (whiteTableList) {
1058
- opt.type = 'table';
1059
- try {
1060
- sqlParser.whiteListCheck(outputString, whiteTableList, opt);
1061
- }
1062
- catch (err) {
1063
- pass = inverse;
1064
- const error = err;
1065
- failureReasons.push(`SQL validation failed: ${error.message}.`);
1066
- }
1067
- }
1068
- if (whiteColumnList) {
1069
- opt.type = 'column';
1070
- try {
1071
- sqlParser.whiteListCheck(outputString, whiteColumnList, opt);
1003
+ const subAssertResults = [];
1004
+ const asserts = test.assert
1005
+ .map((assertion, i) => {
1006
+ if (assertion.type === 'assert-set') {
1007
+ const subAssertResult = new AssertionsResult_1.AssertionsResult({
1008
+ threshold: assertion.threshold,
1009
+ parentAssertionSet: {
1010
+ assertionSet: assertion,
1011
+ index: i,
1012
+ },
1013
+ });
1014
+ subAssertResults.push(subAssertResult);
1015
+ return assertion.assert.map((subAssert, j) => {
1016
+ return {
1017
+ assertion: subAssert,
1018
+ assertResult: subAssertResult,
1019
+ index: j,
1020
+ };
1021
+ });
1072
1022
  }
1073
- catch (err) {
1074
- pass = inverse;
1075
- const error = err;
1076
- failureReasons.push(`SQL validation failed: ${error.message}.`);
1023
+ return { assertion, assertResult: mainAssertResult, index: i };
1024
+ })
1025
+ .flat();
1026
+ await async_1.default.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
1027
+ if (assertion.type.startsWith('select-')) {
1028
+ // Select-type assertions are handled separately because they depend on multiple outputs.
1029
+ return;
1077
1030
  }
1078
- }
1079
- if (inverse && pass === false && failureReasons.length === 0) {
1080
- failureReasons.push('The output SQL statement is valid');
1081
- }
1082
- return {
1083
- pass,
1084
- score: pass ? 1 : 0,
1085
- reason: pass ? 'Assertion passed' : failureReasons.join(' '),
1086
- assertion,
1087
- };
1031
+ const result = await runAssertion({
1032
+ prompt,
1033
+ provider,
1034
+ assertion,
1035
+ test,
1036
+ output,
1037
+ latencyMs,
1038
+ logProbs,
1039
+ cost,
1040
+ });
1041
+ assertResult.addResult({
1042
+ index,
1043
+ result,
1044
+ metric: assertion.metric,
1045
+ weight: assertion.weight,
1046
+ });
1047
+ });
1048
+ subAssertResults.forEach((subAssertResult) => {
1049
+ const result = subAssertResult.testResult();
1050
+ const { index, assertionSet: { metric, weight }, } = subAssertResult.parentAssertionSet;
1051
+ mainAssertResult.addResult({
1052
+ index,
1053
+ result,
1054
+ metric,
1055
+ weight,
1056
+ });
1057
+ });
1058
+ return mainAssertResult.testResult();
1088
1059
  }
1089
- exports.isSql = isSql;
1060
+ exports.runAssertions = runAssertions;
1090
1061
  async function runCompareAssertion(test, assertion, outputs) {
1091
1062
  (0, tiny_invariant_1.default)(typeof assertion.value === 'string', 'select-best must have a string value');
1092
1063
  test.options = test.options || {};
@@ -1114,6 +1085,8 @@ async function readAssertions(filePath) {
1114
1085
  exports.readAssertions = readAssertions;
1115
1086
  // These exports are used by the node.js package (index.ts)
1116
1087
  exports.default = {
1088
+ runAssertion,
1089
+ runAssertions,
1117
1090
  matchesSimilarity: matchers_1.matchesSimilarity,
1118
1091
  matchesClassification: matchers_1.matchesClassification,
1119
1092
  matchesLlmRubric: matchers_1.matchesLlmRubric,