promptfoo 0.70.1 → 0.71.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/README.md +2 -0
  2. package/dist/drizzle/meta/0000_snapshot.json +16 -17
  3. package/dist/drizzle/meta/0001_snapshot.json +16 -17
  4. package/dist/drizzle/meta/0002_snapshot.json +16 -17
  5. package/dist/package.json +13 -10
  6. package/dist/src/assertions.d.ts +8 -0
  7. package/dist/src/assertions.d.ts.map +1 -1
  8. package/dist/src/assertions.js +81 -1
  9. package/dist/src/assertions.js.map +1 -1
  10. package/dist/src/commands/cache.d.ts +3 -0
  11. package/dist/src/commands/cache.d.ts.map +1 -0
  12. package/dist/src/commands/cache.js +31 -0
  13. package/dist/src/commands/cache.js.map +1 -0
  14. package/dist/src/commands/feedback.d.ts +3 -0
  15. package/dist/src/commands/feedback.d.ts.map +1 -0
  16. package/dist/src/commands/feedback.js +13 -0
  17. package/dist/src/commands/feedback.js.map +1 -0
  18. package/dist/src/commands/generate/dataset.d.ts +4 -0
  19. package/dist/src/commands/generate/dataset.d.ts.map +1 -0
  20. package/dist/src/commands/generate/dataset.js +119 -0
  21. package/dist/src/commands/generate/dataset.js.map +1 -0
  22. package/dist/src/commands/{generate.d.ts → generate/redteam.d.ts} +5 -4
  23. package/dist/src/commands/generate/redteam.d.ts.map +1 -0
  24. package/dist/src/commands/{generate.js → generate/redteam.js} +18 -90
  25. package/dist/src/commands/generate/redteam.js.map +1 -0
  26. package/dist/src/commands/init.d.ts +3 -0
  27. package/dist/src/commands/init.d.ts.map +1 -0
  28. package/dist/src/commands/init.js +27 -0
  29. package/dist/src/commands/init.js.map +1 -0
  30. package/dist/src/commands/redteam.d.ts.map +1 -1
  31. package/dist/src/commands/redteam.js +3 -2
  32. package/dist/src/commands/redteam.js.map +1 -1
  33. package/dist/src/commands/share.d.ts +3 -0
  34. package/dist/src/commands/share.d.ts.map +1 -0
  35. package/dist/src/commands/share.js +55 -0
  36. package/dist/src/commands/share.js.map +1 -0
  37. package/dist/src/commands/version.d.ts +3 -0
  38. package/dist/src/commands/version.d.ts.map +1 -0
  39. package/dist/src/commands/version.js +18 -0
  40. package/dist/src/commands/version.js.map +1 -0
  41. package/dist/src/commands/view.d.ts +3 -0
  42. package/dist/src/commands/view.d.ts.map +1 -0
  43. package/dist/src/commands/view.js +39 -0
  44. package/dist/src/commands/view.js.map +1 -0
  45. package/dist/src/config.js +1 -1
  46. package/dist/src/config.js.map +1 -1
  47. package/dist/src/database.d.ts +64 -0
  48. package/dist/src/database.d.ts.map +1 -1
  49. package/dist/src/evaluator.d.ts.map +1 -1
  50. package/dist/src/evaluator.js +2 -1
  51. package/dist/src/evaluator.js.map +1 -1
  52. package/dist/src/main.d.ts +5 -1
  53. package/dist/src/main.d.ts.map +1 -1
  54. package/dist/src/main.js +29 -130
  55. package/dist/src/main.js.map +1 -1
  56. package/dist/src/matchers.d.ts.map +1 -1
  57. package/dist/src/matchers.js +2 -1
  58. package/dist/src/matchers.js.map +1 -1
  59. package/dist/src/onboarding.js +2 -2
  60. package/dist/src/onboarding.js.map +1 -1
  61. package/dist/src/providers/anthropic.d.ts.map +1 -1
  62. package/dist/src/providers/anthropic.js +3 -3
  63. package/dist/src/providers/anthropic.js.map +1 -1
  64. package/dist/src/providers/defaults.d.ts +13 -23
  65. package/dist/src/providers/defaults.d.ts.map +1 -1
  66. package/dist/src/providers/defaults.js +13 -7
  67. package/dist/src/providers/defaults.js.map +1 -1
  68. package/dist/src/providers/http.d.ts.map +1 -1
  69. package/dist/src/providers/http.js +9 -4
  70. package/dist/src/providers/http.js.map +1 -1
  71. package/dist/src/providers/vertex.js +2 -2
  72. package/dist/src/providers/vertex.js.map +1 -1
  73. package/dist/src/providers.d.ts.map +1 -1
  74. package/dist/src/providers.js +2 -3
  75. package/dist/src/providers.js.map +1 -1
  76. package/dist/src/redteam/constants.d.ts +1 -0
  77. package/dist/src/redteam/constants.d.ts.map +1 -1
  78. package/dist/src/redteam/constants.js +2 -1
  79. package/dist/src/redteam/constants.js.map +1 -1
  80. package/dist/src/redteam/index.d.ts +6 -2
  81. package/dist/src/redteam/index.d.ts.map +1 -1
  82. package/dist/src/redteam/index.js +93 -64
  83. package/dist/src/redteam/index.js.map +1 -1
  84. package/dist/src/redteam/methods/injections.d.ts +3 -0
  85. package/dist/src/redteam/methods/injections.d.ts.map +1 -0
  86. package/dist/src/redteam/methods/injections.js +61 -0
  87. package/dist/src/redteam/methods/injections.js.map +1 -0
  88. package/dist/src/redteam/methods/iterative.d.ts +3 -0
  89. package/dist/src/redteam/methods/iterative.d.ts.map +1 -0
  90. package/dist/src/redteam/methods/iterative.js +35 -0
  91. package/dist/src/redteam/methods/iterative.js.map +1 -0
  92. package/dist/src/redteam/plugins/base.d.ts +5 -4
  93. package/dist/src/redteam/plugins/base.d.ts.map +1 -1
  94. package/dist/src/redteam/plugins/base.js +7 -5
  95. package/dist/src/redteam/plugins/base.js.map +1 -1
  96. package/dist/src/redteam/plugins/competitors.d.ts +2 -5
  97. package/dist/src/redteam/plugins/competitors.d.ts.map +1 -1
  98. package/dist/src/redteam/plugins/competitors.js +9 -7
  99. package/dist/src/redteam/plugins/competitors.js.map +1 -1
  100. package/dist/src/redteam/plugins/contracts.d.ts +2 -5
  101. package/dist/src/redteam/plugins/contracts.d.ts.map +1 -1
  102. package/dist/src/redteam/plugins/contracts.js +9 -7
  103. package/dist/src/redteam/plugins/contracts.js.map +1 -1
  104. package/dist/src/redteam/plugins/excessiveAgency.d.ts +2 -5
  105. package/dist/src/redteam/plugins/excessiveAgency.d.ts.map +1 -1
  106. package/dist/src/redteam/plugins/excessiveAgency.js +9 -7
  107. package/dist/src/redteam/plugins/excessiveAgency.js.map +1 -1
  108. package/dist/src/redteam/plugins/hallucination.d.ts +2 -5
  109. package/dist/src/redteam/plugins/hallucination.d.ts.map +1 -1
  110. package/dist/src/redteam/plugins/hallucination.js +9 -7
  111. package/dist/src/redteam/plugins/hallucination.js.map +1 -1
  112. package/dist/src/redteam/plugins/harmful.d.ts +50 -33
  113. package/dist/src/redteam/plugins/harmful.d.ts.map +1 -1
  114. package/dist/src/redteam/plugins/harmful.js +159 -99
  115. package/dist/src/redteam/plugins/harmful.js.map +1 -1
  116. package/dist/src/redteam/plugins/hijacking.d.ts +2 -5
  117. package/dist/src/redteam/plugins/hijacking.d.ts.map +1 -1
  118. package/dist/src/redteam/plugins/hijacking.js +9 -7
  119. package/dist/src/redteam/plugins/hijacking.js.map +1 -1
  120. package/dist/src/redteam/plugins/overreliance.d.ts +2 -5
  121. package/dist/src/redteam/plugins/overreliance.d.ts.map +1 -1
  122. package/dist/src/redteam/plugins/overreliance.js +36 -7
  123. package/dist/src/redteam/plugins/overreliance.js.map +1 -1
  124. package/dist/src/redteam/plugins/pii.d.ts +2 -1
  125. package/dist/src/redteam/plugins/pii.d.ts.map +1 -1
  126. package/dist/src/redteam/plugins/pii.js +9 -7
  127. package/dist/src/redteam/plugins/pii.js.map +1 -1
  128. package/dist/src/redteam/plugins/politics.d.ts +2 -5
  129. package/dist/src/redteam/plugins/politics.d.ts.map +1 -1
  130. package/dist/src/redteam/plugins/politics.js +9 -7
  131. package/dist/src/redteam/plugins/politics.js.map +1 -1
  132. package/dist/src/redteam/{iterative.d.ts → providers/iterative.d.ts} +1 -1
  133. package/dist/src/redteam/providers/iterative.d.ts.map +1 -0
  134. package/dist/src/redteam/{iterative.js → providers/iterative.js} +3 -3
  135. package/dist/src/redteam/providers/iterative.js.map +1 -0
  136. package/dist/src/redteam/{iterativeImage.d.ts → providers/iterativeImage.d.ts} +1 -1
  137. package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -0
  138. package/dist/src/redteam/{iterativeImage.js → providers/iterativeImage.js} +4 -4
  139. package/dist/src/redteam/providers/iterativeImage.js.map +1 -0
  140. package/dist/src/redteam/purpose.d.ts +3 -0
  141. package/dist/src/redteam/purpose.d.ts.map +1 -0
  142. package/dist/src/redteam/purpose.js +30 -0
  143. package/dist/src/redteam/purpose.js.map +1 -0
  144. package/dist/src/testCases.d.ts +4 -3
  145. package/dist/src/testCases.d.ts.map +1 -1
  146. package/dist/src/testCases.js +46 -44
  147. package/dist/src/testCases.js.map +1 -1
  148. package/dist/src/types.d.ts +10 -1
  149. package/dist/src/types.d.ts.map +1 -1
  150. package/dist/src/types.js +2 -0
  151. package/dist/src/types.js.map +1 -1
  152. package/dist/src/{util.d.ts → util/index.d.ts} +4 -10
  153. package/dist/src/util/index.d.ts.map +1 -0
  154. package/dist/src/{util.js → util/index.js} +29 -31
  155. package/dist/src/util/index.js.map +1 -0
  156. package/dist/src/util/templates.d.ts +18 -0
  157. package/dist/src/util/templates.d.ts.map +1 -0
  158. package/dist/src/util/templates.js +66 -0
  159. package/dist/src/util/templates.js.map +1 -0
  160. package/dist/src/web/nextui/404/index.html +1 -1
  161. package/dist/src/web/nextui/404.html +1 -1
  162. package/dist/src/web/nextui/_next/static/chunks/{106-598707cae3f2a1bd.js → 106-40e3e363c957c313.js} +2 -2
  163. package/dist/src/web/nextui/_next/static/chunks/163-e65e0f7f442a0c72.js +6 -0
  164. package/dist/src/web/nextui/_next/static/chunks/{915-b1361d86b62c2cfe.js → 180-46db19289d856800.js} +1 -1
  165. package/dist/src/web/nextui/_next/static/chunks/{215-7bd08e1ce7028897.js → 215-f97c02cbf00861f3.js} +1 -1
  166. package/dist/src/web/nextui/_next/static/chunks/{258-8a7aa9a347c287ea.js → 258-9bad5f8846b93785.js} +2 -2
  167. package/dist/src/web/nextui/_next/static/chunks/{304-37065d54ff218f46.js → 304-9d57a2251034b801.js} +1 -1
  168. package/dist/src/web/nextui/_next/static/chunks/{322-ac32773931b7a8aa.js → 322-4a0fc9b0508f47e8.js} +1 -1
  169. package/dist/src/web/nextui/_next/static/chunks/{339-f38253669d469118.js → 339-db5bd05e24dde905.js} +1 -1
  170. package/dist/src/web/nextui/_next/static/chunks/{374-d40e4777b00cf7a7.js → 374-d40afe599198abd3.js} +1 -1
  171. package/dist/src/web/nextui/_next/static/chunks/{378-c23e2864527ac3a0.js → 378-e22c8aeb8e031fad.js} +1 -1
  172. package/dist/src/web/nextui/_next/static/chunks/{998-d370cd9f4473ff31.js → 531-efd0c50e833e81f1.js} +1 -1
  173. package/dist/src/web/nextui/_next/static/chunks/540-ab57db4fdf86cc8a.js +97 -0
  174. package/dist/src/web/nextui/_next/static/chunks/611-d73a08bebf1d6f3c.js +1 -0
  175. package/dist/src/web/nextui/_next/static/chunks/{620-1e779bd81b43dafb.js → 620-7a5a7bf6e1fe49e5.js} +1 -1
  176. package/dist/src/web/nextui/_next/static/chunks/{770-28c6d673e9540d80.js → 770-8ec8b39ce6f615c4.js} +2 -2
  177. package/dist/src/web/nextui/_next/static/chunks/858-9393537662061ee7.js +1 -0
  178. package/dist/src/web/nextui/_next/static/chunks/873-177b84c58d904b5a.js +1 -0
  179. package/dist/src/web/nextui/_next/static/chunks/{995-81cea2f3d08297f2.js → 995-803c74b81e7bf6cd.js} +1 -1
  180. package/dist/src/web/nextui/_next/static/chunks/app/auth/login/{page-6fcc9431205718c7.js → page-3bd92b90dc19429b.js} +1 -1
  181. package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/{page-8caf49a834d34420.js → page-557ff1f8c491e68a.js} +1 -1
  182. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-3c5a944373865122.js → page-7bc08c50ac222ddd.js} +1 -1
  183. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-b26513300f6f0273.js +1 -0
  184. package/dist/src/web/nextui/_next/static/chunks/app/{layout-2038906de6c19565.js → layout-1b9c6b87155ccc6f.js} +1 -1
  185. package/dist/src/web/nextui/_next/static/chunks/app/progress/{page-73442c531d579c51.js → page-e8235b15bd64fb03.js} +1 -1
  186. package/dist/src/web/nextui/_next/static/chunks/app/report/{page-be00cf77531ce9cb.js → page-6716acff9825da98.js} +1 -1
  187. package/dist/src/web/nextui/_next/static/chunks/app/setup/{page-26cb5d2478fdbd34.js → page-59d712c1f56ecac6.js} +1 -1
  188. package/dist/src/web/nextui/auth/login/index.html +1 -1
  189. package/dist/src/web/nextui/auth/login/index.txt +3 -3
  190. package/dist/src/web/nextui/auth/signup/index.html +1 -1
  191. package/dist/src/web/nextui/auth/signup/index.txt +3 -3
  192. package/dist/src/web/nextui/datasets/index.html +1 -1
  193. package/dist/src/web/nextui/datasets/index.txt +3 -3
  194. package/dist/src/web/nextui/eval/index.html +1 -1
  195. package/dist/src/web/nextui/eval/index.txt +3 -3
  196. package/dist/src/web/nextui/index.html +1 -1
  197. package/dist/src/web/nextui/index.txt +2 -2
  198. package/dist/src/web/nextui/progress/index.html +1 -1
  199. package/dist/src/web/nextui/progress/index.txt +3 -3
  200. package/dist/src/web/nextui/prompts/index.html +1 -1
  201. package/dist/src/web/nextui/prompts/index.txt +3 -3
  202. package/dist/src/web/nextui/report/index.html +1 -1
  203. package/dist/src/web/nextui/report/index.txt +3 -3
  204. package/dist/src/web/nextui/setup/index.html +1 -1
  205. package/dist/src/web/nextui/setup/index.txt +3 -3
  206. package/dist/src/web/server.js +1 -1
  207. package/dist/src/web/server.js.map +1 -1
  208. package/package.json +13 -10
  209. package/dist/src/commands/generate.d.ts.map +0 -1
  210. package/dist/src/commands/generate.js.map +0 -1
  211. package/dist/src/redteam/iterative.d.ts.map +0 -1
  212. package/dist/src/redteam/iterative.js.map +0 -1
  213. package/dist/src/redteam/iterativeImage.d.ts.map +0 -1
  214. package/dist/src/redteam/iterativeImage.js.map +0 -1
  215. package/dist/src/util.d.ts.map +0 -1
  216. package/dist/src/util.js.map +0 -1
  217. package/dist/src/web/nextui/_next/static/chunks/2-671ad31c05d2c976.js +0 -1
  218. package/dist/src/web/nextui/_next/static/chunks/540-c94eb8fa0d9db136.js +0 -97
  219. package/dist/src/web/nextui/_next/static/chunks/858-5d3a3678769b7e36.js +0 -1
  220. package/dist/src/web/nextui/_next/static/chunks/873-e1436a5f57769f17.js +0 -1
  221. package/dist/src/web/nextui/_next/static/chunks/954-c35d4864ecbacd62.js +0 -6
  222. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-73e894c39cc191f1.js +0 -1
  223. /package/dist/src/web/nextui/_next/static/{ENNANMoEha-uMGFo0DvzO → FqU2doca_2rkP7LW_2ZLN}/_buildManifest.js +0 -0
  224. /package/dist/src/web/nextui/_next/static/{ENNANMoEha-uMGFo0DvzO → FqU2doca_2rkP7LW_2ZLN}/_ssgManifest.js +0 -0
package/README.md CHANGED
@@ -122,6 +122,8 @@ Deterministic eval metrics
122
122
  | `contains-json` | output contains valid json (optional json schema validation) |
123
123
  | `is-sql` | output is valid sql |
124
124
  | `contains-sql` | output contains valid sql |
125
+ | `is-xml` | output is valid xml |
126
+ | `contains-xml` | output contains valid xml |
125
127
  | `javascript` | provided Javascript function validates the output |
126
128
  | `python` | provided Python function validates the output |
127
129
  | `webhook` | provided webhook returns `{pass: true}` |
@@ -1,8 +1,6 @@
1
1
  {
2
- "version": "5",
2
+ "version": "6",
3
3
  "dialect": "sqlite",
4
- "id": "8b53403f-5b6f-436a-862e-9fd17a52204e",
5
- "prevId": "00000000-0000-0000-0000-000000000000",
6
4
  "tables": {
7
5
  "datasets": {
8
6
  "name": "datasets",
@@ -103,28 +101,28 @@
103
101
  "evals_to_datasets_eval_id_evals_id_fk": {
104
102
  "name": "evals_to_datasets_eval_id_evals_id_fk",
105
103
  "tableFrom": "evals_to_datasets",
106
- "tableTo": "evals",
107
104
  "columnsFrom": [
108
105
  "eval_id"
109
106
  ],
107
+ "tableTo": "evals",
110
108
  "columnsTo": [
111
109
  "id"
112
110
  ],
113
- "onDelete": "no action",
114
- "onUpdate": "no action"
111
+ "onUpdate": "no action",
112
+ "onDelete": "no action"
115
113
  },
116
114
  "evals_to_datasets_dataset_id_datasets_id_fk": {
117
115
  "name": "evals_to_datasets_dataset_id_datasets_id_fk",
118
116
  "tableFrom": "evals_to_datasets",
119
- "tableTo": "datasets",
120
117
  "columnsFrom": [
121
118
  "dataset_id"
122
119
  ],
120
+ "tableTo": "datasets",
123
121
  "columnsTo": [
124
122
  "id"
125
123
  ],
126
- "onDelete": "no action",
127
- "onUpdate": "no action"
124
+ "onUpdate": "no action",
125
+ "onDelete": "no action"
128
126
  }
129
127
  },
130
128
  "compositePrimaryKeys": {
@@ -161,28 +159,28 @@
161
159
  "evals_to_prompts_eval_id_evals_id_fk": {
162
160
  "name": "evals_to_prompts_eval_id_evals_id_fk",
163
161
  "tableFrom": "evals_to_prompts",
164
- "tableTo": "evals",
165
162
  "columnsFrom": [
166
163
  "eval_id"
167
164
  ],
165
+ "tableTo": "evals",
168
166
  "columnsTo": [
169
167
  "id"
170
168
  ],
171
- "onDelete": "no action",
172
- "onUpdate": "no action"
169
+ "onUpdate": "no action",
170
+ "onDelete": "no action"
173
171
  },
174
172
  "evals_to_prompts_prompt_id_prompts_id_fk": {
175
173
  "name": "evals_to_prompts_prompt_id_prompts_id_fk",
176
174
  "tableFrom": "evals_to_prompts",
177
- "tableTo": "prompts",
178
175
  "columnsFrom": [
179
176
  "prompt_id"
180
177
  ],
178
+ "tableTo": "prompts",
181
179
  "columnsTo": [
182
180
  "id"
183
181
  ],
184
- "onDelete": "no action",
185
- "onUpdate": "no action"
182
+ "onUpdate": "no action",
183
+ "onDelete": "no action"
186
184
  }
187
185
  },
188
186
  "compositePrimaryKeys": {
@@ -237,8 +235,9 @@
237
235
  },
238
236
  "enums": {},
239
237
  "_meta": {
240
- "schemas": {},
241
238
  "tables": {},
242
239
  "columns": {}
243
- }
240
+ },
241
+ "id": "8b53403f-5b6f-436a-862e-9fd17a52204e",
242
+ "prevId": "00000000-0000-0000-0000-000000000000"
244
243
  }
@@ -1,8 +1,6 @@
1
1
  {
2
- "version": "5",
2
+ "version": "6",
3
3
  "dialect": "sqlite",
4
- "id": "476c887f-edcb-4b29-b054-50a8383cc6c2",
5
- "prevId": "8b53403f-5b6f-436a-862e-9fd17a52204e",
6
4
  "tables": {
7
5
  "datasets": {
8
6
  "name": "datasets",
@@ -103,28 +101,28 @@
103
101
  "evals_to_datasets_eval_id_evals_id_fk": {
104
102
  "name": "evals_to_datasets_eval_id_evals_id_fk",
105
103
  "tableFrom": "evals_to_datasets",
106
- "tableTo": "evals",
107
104
  "columnsFrom": [
108
105
  "eval_id"
109
106
  ],
107
+ "tableTo": "evals",
110
108
  "columnsTo": [
111
109
  "id"
112
110
  ],
113
- "onDelete": "no action",
114
- "onUpdate": "no action"
111
+ "onUpdate": "no action",
112
+ "onDelete": "no action"
115
113
  },
116
114
  "evals_to_datasets_dataset_id_datasets_id_fk": {
117
115
  "name": "evals_to_datasets_dataset_id_datasets_id_fk",
118
116
  "tableFrom": "evals_to_datasets",
119
- "tableTo": "datasets",
120
117
  "columnsFrom": [
121
118
  "dataset_id"
122
119
  ],
120
+ "tableTo": "datasets",
123
121
  "columnsTo": [
124
122
  "id"
125
123
  ],
126
- "onDelete": "no action",
127
- "onUpdate": "no action"
124
+ "onUpdate": "no action",
125
+ "onDelete": "no action"
128
126
  }
129
127
  },
130
128
  "compositePrimaryKeys": {
@@ -161,28 +159,28 @@
161
159
  "evals_to_prompts_eval_id_evals_id_fk": {
162
160
  "name": "evals_to_prompts_eval_id_evals_id_fk",
163
161
  "tableFrom": "evals_to_prompts",
164
- "tableTo": "evals",
165
162
  "columnsFrom": [
166
163
  "eval_id"
167
164
  ],
165
+ "tableTo": "evals",
168
166
  "columnsTo": [
169
167
  "id"
170
168
  ],
171
- "onDelete": "no action",
172
- "onUpdate": "no action"
169
+ "onUpdate": "no action",
170
+ "onDelete": "no action"
173
171
  },
174
172
  "evals_to_prompts_prompt_id_prompts_id_fk": {
175
173
  "name": "evals_to_prompts_prompt_id_prompts_id_fk",
176
174
  "tableFrom": "evals_to_prompts",
177
- "tableTo": "prompts",
178
175
  "columnsFrom": [
179
176
  "prompt_id"
180
177
  ],
178
+ "tableTo": "prompts",
181
179
  "columnsTo": [
182
180
  "id"
183
181
  ],
184
- "onDelete": "no action",
185
- "onUpdate": "no action"
182
+ "onUpdate": "no action",
183
+ "onDelete": "no action"
186
184
  }
187
185
  },
188
186
  "compositePrimaryKeys": {
@@ -230,8 +228,9 @@
230
228
  },
231
229
  "enums": {},
232
230
  "_meta": {
233
- "schemas": {},
234
231
  "tables": {},
235
232
  "columns": {}
236
- }
233
+ },
234
+ "id": "476c887f-edcb-4b29-b054-50a8383cc6c2",
235
+ "prevId": "8b53403f-5b6f-436a-862e-9fd17a52204e"
237
236
  }
@@ -1,8 +1,6 @@
1
1
  {
2
- "version": "5",
2
+ "version": "6",
3
3
  "dialect": "sqlite",
4
- "id": "6325e41a-40cd-4db1-9e99-df139a264a14",
5
- "prevId": "476c887f-edcb-4b29-b054-50a8383cc6c2",
6
4
  "tables": {
7
5
  "datasets": {
8
6
  "name": "datasets",
@@ -110,28 +108,28 @@
110
108
  "evals_to_datasets_eval_id_evals_id_fk": {
111
109
  "name": "evals_to_datasets_eval_id_evals_id_fk",
112
110
  "tableFrom": "evals_to_datasets",
113
- "tableTo": "evals",
114
111
  "columnsFrom": [
115
112
  "eval_id"
116
113
  ],
114
+ "tableTo": "evals",
117
115
  "columnsTo": [
118
116
  "id"
119
117
  ],
120
- "onDelete": "no action",
121
- "onUpdate": "no action"
118
+ "onUpdate": "no action",
119
+ "onDelete": "no action"
122
120
  },
123
121
  "evals_to_datasets_dataset_id_datasets_id_fk": {
124
122
  "name": "evals_to_datasets_dataset_id_datasets_id_fk",
125
123
  "tableFrom": "evals_to_datasets",
126
- "tableTo": "datasets",
127
124
  "columnsFrom": [
128
125
  "dataset_id"
129
126
  ],
127
+ "tableTo": "datasets",
130
128
  "columnsTo": [
131
129
  "id"
132
130
  ],
133
- "onDelete": "no action",
134
- "onUpdate": "no action"
131
+ "onUpdate": "no action",
132
+ "onDelete": "no action"
135
133
  }
136
134
  },
137
135
  "compositePrimaryKeys": {
@@ -168,28 +166,28 @@
168
166
  "evals_to_prompts_eval_id_evals_id_fk": {
169
167
  "name": "evals_to_prompts_eval_id_evals_id_fk",
170
168
  "tableFrom": "evals_to_prompts",
171
- "tableTo": "evals",
172
169
  "columnsFrom": [
173
170
  "eval_id"
174
171
  ],
172
+ "tableTo": "evals",
175
173
  "columnsTo": [
176
174
  "id"
177
175
  ],
178
- "onDelete": "no action",
179
- "onUpdate": "no action"
176
+ "onUpdate": "no action",
177
+ "onDelete": "no action"
180
178
  },
181
179
  "evals_to_prompts_prompt_id_prompts_id_fk": {
182
180
  "name": "evals_to_prompts_prompt_id_prompts_id_fk",
183
181
  "tableFrom": "evals_to_prompts",
184
- "tableTo": "prompts",
185
182
  "columnsFrom": [
186
183
  "prompt_id"
187
184
  ],
185
+ "tableTo": "prompts",
188
186
  "columnsTo": [
189
187
  "id"
190
188
  ],
191
- "onDelete": "no action",
192
- "onUpdate": "no action"
189
+ "onUpdate": "no action",
190
+ "onDelete": "no action"
193
191
  }
194
192
  },
195
193
  "compositePrimaryKeys": {
@@ -237,8 +235,9 @@
237
235
  },
238
236
  "enums": {},
239
237
  "_meta": {
240
- "schemas": {},
241
238
  "tables": {},
242
239
  "columns": {}
243
- }
240
+ },
241
+ "id": "6325e41a-40cd-4db1-9e99-df139a264a14",
242
+ "prevId": "476c887f-edcb-4b29-b054-50a8383cc6c2"
244
243
  }
package/dist/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.70.1",
5
+ "version": "0.71.1",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "repository": {
@@ -62,9 +62,10 @@
62
62
  "node-sql-parser": "^5.2.0"
63
63
  },
64
64
  "devDependencies": {
65
- "@aws-sdk/client-bedrock-runtime": "^3.609.0",
65
+ "@aws-sdk/client-bedrock-runtime": "^3.614.0",
66
66
  "@azure/identity": "^4.3.0",
67
- "@eslint/js": "^9.6.0",
67
+ "@eslint/js": "^9.7.0",
68
+ "@libsql/client": "^0.7.0",
68
69
  "@swc/cli": "^0.4.0",
69
70
  "@swc/core": "^1.6.13",
70
71
  "@swc/jest": "^0.2.36",
@@ -90,7 +91,8 @@
90
91
  "@types/uuid": "^10.0.0",
91
92
  "check-dependency-version-consistency": "^4.1.0",
92
93
  "cloudflare": "^3.4.0",
93
- "drizzle-kit": "^0.20.13",
94
+ "drizzle-kit": "^0.23.0",
95
+ "esbuild": "^0.23.0",
94
96
  "eslint": "^8.57.0",
95
97
  "eslint-plugin-jest": "^28.6.0",
96
98
  "eslint-plugin-unused-imports": "^3.2.0",
@@ -98,17 +100,17 @@
98
100
  "jest-watch-typeahead": "^2.2.2",
99
101
  "next": "13.4.13",
100
102
  "nock": "^13.5.4",
101
- "prettier": "^3.3.2",
103
+ "prettier": "^3.3.3",
102
104
  "shx": "^0.3.4",
103
105
  "ts-node": "^10.9.2",
104
106
  "typescript": "^5.5.3",
105
- "typescript-eslint": "^7.15.0"
107
+ "typescript-eslint": "^7.16.0"
106
108
  },
107
109
  "dependencies": {
108
110
  "@anthropic-ai/sdk": "^0.24.3",
109
111
  "@apidevtools/json-schema-ref-parser": "^11.6.4",
110
112
  "@googleapis/sheets": "^9.0.0",
111
- "ajv": "^8.16.0",
113
+ "ajv": "^8.17.1",
112
114
  "ajv-formats": "^2.1.1",
113
115
  "async": "^3.2.5",
114
116
  "better-sqlite3": "^11.1.2",
@@ -126,9 +128,10 @@
126
128
  "debounce": "^1.2.1",
127
129
  "dedent": "^1.5.3",
128
130
  "dotenv": "^16.4.5",
129
- "drizzle-orm": "^0.29.3",
131
+ "drizzle-orm": "^0.32.0",
130
132
  "express": "^4.19.2",
131
133
  "fast-deep-equal": "^3.1.3",
134
+ "fast-xml-parser": "^4.4.0",
132
135
  "fastest-levenshtein": "^1.0.16",
133
136
  "glob": "^10.4.3",
134
137
  "inquirer": "^8.2.6",
@@ -136,7 +139,7 @@
136
139
  "mathjs": "^13.0.2",
137
140
  "node-fetch": "^2.6.7",
138
141
  "nunjucks": "^3.2.4",
139
- "openai": "^4.52.3",
142
+ "openai": "^4.52.7",
140
143
  "opener": "^1.5.2",
141
144
  "proxy-agent": "^6.4.0",
142
145
  "python-shell": "^5.0.0",
@@ -147,7 +150,7 @@
147
150
  "socket.io": "^4.7.5",
148
151
  "tiny-invariant": "^1.3.3",
149
152
  "uuid": "^10.0.0",
150
- "winston": "^3.13.0",
153
+ "winston": "^3.13.1",
151
154
  "zod": "^3.23.8"
152
155
  }
153
156
  }
@@ -1,6 +1,14 @@
1
1
  import { matchesSimilarity, matchesLlmRubric, matchesFactuality, matchesClosedQa, matchesClassification, matchesAnswerRelevance, matchesContextRecall, matchesContextRelevance, matchesContextFaithfulness, matchesSelectBest, matchesModeration } from './matchers';
2
2
  import { type ApiProvider, type Assertion, type AssertionType, type AtomicTestCase, type GradingResult, AssertionValue, ProviderResponse } from './types';
3
3
  export declare const MODEL_GRADED_ASSERTION_TYPES: Set<AssertionType>;
4
+ export declare function validateXml(xmlString: string, requiredElements?: string[]): {
5
+ isValid: boolean;
6
+ reason: string;
7
+ };
8
+ export declare function containsXml(outputString: string, requiredElements?: string[]): {
9
+ isValid: boolean;
10
+ reason: string;
11
+ };
4
12
  export declare function isSql(outputString: string, renderedValue: AssertionValue | undefined, inverse: boolean, assertion: Assertion): Promise<GradingResult>;
5
13
  export declare function runAssertion({ prompt, provider, assertion, test, latencyMs, providerResponse, }: {
6
14
  prompt?: string;
@@ -1 +1 @@
1
- {"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/assertions.ts"],"names":[],"mappings":"AAiBA,OAAO,EACL,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,sBAAsB,EACtB,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,aAAa,EAGlB,cAAc,EACd,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAOjB,eAAO,MAAM,4BAA4B,oBASvC,CAAC;AAqDH,wBAAsB,KAAK,CACzB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,cAAc,GAAG,SAAS,EACzC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,aAAa,CAAC,CA2ExB;AAED,wBAAsB,YAAY,CAAC,EACjC,MAAM,EACN,QAAQ,EACR,SAAS,EACT,IAAI,EACJ,SAAS,EACT,gBAAgB,GACjB,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,SAAS,EAAE,SAAS,CAAC;IACrB,IAAI,EAAE,cAAc,CAAC;IACrB,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,aAAa,CAAC,CAuhCzB;AAED,wBAAsB,aAAa,CAAC,EAClC,MAAM,EACN,QAAQ,EACR,gBAAgB,EAChB,IAAI,EACJ,SAAS,GACV,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,IAAI,EAAE,cAAc,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,aAAa,CAAC,CAmFzB;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,cAAc,EACpB,SAAS,EAAE,SAAS,EACpB,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,aAAa,EAAE,CAAC,CAa1B;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAU3E;;;;;;;;;;;;;;;;AAGD,wBAcE"}
1
+ {"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/assertions.ts"],"names":[],"mappings":"AAkBA,OAAO,EACL,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,sBAAsB,EACtB,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,aAAa,EAGlB,cAAc,EACd,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAQjB,eAAO,MAAM,4BAA4B,oBASvC,CAAC;AAqDH,wBAAgB,WAAW,CACzB,SAAS,EAAE,MAAM,EACjB,gBAAgB,CAAC,EAAE,MAAM,EAAE,GAC1B;IAAE,OAAO,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAsCtC;AAED,wBAAgB,WAAW,CACzB,YAAY,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,MAAM,EAAE,GAC1B;IAAE,OAAO,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAgBtC;AAED,wBAAsB,KAAK,CACzB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,cAAc,GAAG,SAAS,EACzC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,aAAa,CAAC,CA2ExB;AAED,wBAAsB,YAAY,CAAC,EACjC,MAAM,EACN,QAAQ,EACR,SAAS,EACT,IAAI,EACJ,SAAS,EACT,gBAAgB,GACjB,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,SAAS,EAAE,SAAS,CAAC;IACrB,IAAI,EAAE,cAAc,CAAC;IACrB,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,aAAa,CAAC,CAkjCzB;AAED,wBAAsB,aAAa,CAAC,EAClC,MAAM,EACN,QAAQ,EACR,gBAAgB,EAChB,IAAI,EACJ,SAAS,GACV,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,IAAI,EAAE,cAAc,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,GAAG,OAAO,CAAC,aAAa,CAAC,CAmFzB;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,cAAc,EACpB,SAAS,EAAE,SAAS,EACpB,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,aAAa,EAAE,CAAC,CAa1B;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAU3E;;;;;;;;;;;;;;;;AAGD,wBAcE"}
@@ -27,6 +27,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
29
  exports.MODEL_GRADED_ASSERTION_TYPES = void 0;
30
+ exports.validateXml = validateXml;
31
+ exports.containsXml = containsXml;
30
32
  exports.isSql = isSql;
31
33
  exports.runAssertion = runAssertion;
32
34
  exports.runAssertions = runAssertions;
@@ -35,6 +37,7 @@ exports.readAssertions = readAssertions;
35
37
  const ajv_1 = __importDefault(require("ajv"));
36
38
  const ajv_formats_1 = __importDefault(require("ajv-formats"));
37
39
  const async_1 = __importDefault(require("async"));
40
+ const fast_xml_parser_1 = require("fast-xml-parser");
38
41
  const fastest_levenshtein_1 = require("fastest-levenshtein");
39
42
  const fs_1 = __importDefault(require("fs"));
40
43
  const js_yaml_1 = __importDefault(require("js-yaml"));
@@ -55,6 +58,7 @@ const wrapper_1 = require("./python/wrapper");
55
58
  const telemetry_1 = __importDefault(require("./telemetry"));
56
59
  const types_1 = require("./types");
57
60
  const util_1 = require("./util");
61
+ const templates_1 = require("./util/templates");
58
62
  const ASSERTIONS_MAX_CONCURRENCY = process.env.PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY
59
63
  ? parseInt(process.env.PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY, 10)
60
64
  : 3;
@@ -70,7 +74,7 @@ exports.MODEL_GRADED_ASSERTION_TYPES = new Set([
70
74
  ]);
71
75
  const ajv = new ajv_1.default();
72
76
  (0, ajv_formats_1.default)(ajv);
73
- const nunjucks = (0, util_1.getNunjucksEngine)();
77
+ const nunjucks = (0, templates_1.getNunjucksEngine)();
74
78
  const clone = (0, rfdc_1.default)();
75
79
  function getFinalTest(test, assertion) {
76
80
  // Deep copy
@@ -101,6 +105,57 @@ function handleRougeScore(baseType, assertion, expected, output, inverted) {
101
105
  assertion,
102
106
  };
103
107
  }
108
+ function validateXml(xmlString, requiredElements) {
109
+ if (!xmlString.startsWith('<')) {
110
+ return { isValid: false, reason: 'XML is missing opening tag' };
111
+ }
112
+ const parser = new fast_xml_parser_1.XMLParser({
113
+ allowBooleanAttributes: true,
114
+ ignoreAttributes: false,
115
+ parseAttributeValue: true,
116
+ parseTagValue: true,
117
+ });
118
+ try {
119
+ const parsedXml = parser.parse(xmlString);
120
+ if (requiredElements && requiredElements.length > 0) {
121
+ const missingElements = requiredElements.filter((element) => {
122
+ const path = element.split('.');
123
+ let current = parsedXml;
124
+ for (const key of path) {
125
+ if (current[key] === undefined) {
126
+ return true;
127
+ }
128
+ current = current[key];
129
+ }
130
+ return false;
131
+ });
132
+ if (missingElements.length > 0) {
133
+ return {
134
+ isValid: false,
135
+ reason: `XML is missing required elements: ${missingElements.join(', ')}`,
136
+ };
137
+ }
138
+ }
139
+ return { isValid: true, reason: 'XML is valid and contains all required elements' };
140
+ }
141
+ catch (err) {
142
+ return { isValid: false, reason: `XML parsing failed: ${err.message}` };
143
+ }
144
+ }
145
+ function containsXml(outputString, requiredElements) {
146
+ const xmlRegex = /<\?xml.*?>[\s\S]*<\/[^>]+>|\S*<[^>]+>[\s\S]*<\/[^>]+>/;
147
+ const xmlMatches = outputString.match(xmlRegex);
148
+ if (!xmlMatches) {
149
+ return { isValid: false, reason: 'No XML content found in the output' };
150
+ }
151
+ for (const xmlMatch of xmlMatches) {
152
+ const { isValid, reason } = validateXml(xmlMatch, requiredElements);
153
+ if (isValid) {
154
+ return { isValid: true, reason: reason };
155
+ }
156
+ }
157
+ return { isValid: false, reason: 'No valid XML content found matching the requirements' };
158
+ }
104
159
  async function isSql(outputString, renderedValue, inverse, assertion) {
105
160
  let pass = false;
106
161
  let parsedSql;
@@ -310,6 +365,31 @@ async function runAssertion({ prompt, provider, assertion, test, latencyMs, prov
310
365
  assertion,
311
366
  };
312
367
  }
368
+ if (baseType === 'is-xml' || baseType === 'contains-xml') {
369
+ let requiredElements;
370
+ if (typeof renderedValue === 'string') {
371
+ requiredElements = renderedValue.split(',').map((el) => el.trim());
372
+ }
373
+ else if (Array.isArray(renderedValue) && renderedValue.length > 0) {
374
+ requiredElements = renderedValue.map((el) => el.toString());
375
+ }
376
+ else if (typeof renderedValue === 'object' && Object.keys(renderedValue).length > 0) {
377
+ if ('requiredElements' in renderedValue && Array.isArray(renderedValue.requiredElements)) {
378
+ requiredElements = renderedValue.requiredElements.map((el) => el.toString());
379
+ }
380
+ else {
381
+ throw new Error('xml assertion must contain a string, array value, or no value');
382
+ }
383
+ }
384
+ const result = (baseType === 'is-xml' ? validateXml : containsXml)(outputString, requiredElements);
385
+ pass = result.isValid !== inverse;
386
+ return {
387
+ pass,
388
+ score: pass ? 1 : 0,
389
+ reason: pass ? 'Assertion passed' : result.reason,
390
+ assertion,
391
+ };
392
+ }
313
393
  if (baseType === 'is-sql') {
314
394
  return isSql(outputString, renderedValue, inverse, assertion);
315
395
  }