promptfoo 0.91.2 → 0.92.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/dist/drizzle/0006_harsh_caretaker.sql +42 -0
  2. package/dist/drizzle/0007_cloudy_wong.sql +1 -0
  3. package/dist/drizzle/meta/0006_snapshot.json +721 -0
  4. package/dist/drizzle/meta/0007_snapshot.json +723 -0
  5. package/dist/drizzle/meta/_journal.json +14 -0
  6. package/dist/package.json +10 -8
  7. package/dist/src/app/assets/{index-Bc-q9rGp.js → index-CMDD1oSm.js} +233 -231
  8. package/dist/src/app/assets/{index.es-b3UhzAjj.js → index.es-D8cSwMq4.js} +1 -1
  9. package/dist/src/app/assets/{sync-D-OjEwME.js → sync-DJZvzYiS.js} +1 -1
  10. package/dist/src/app/index.html +1 -1
  11. package/dist/src/assertions.js +2 -2
  12. package/dist/src/assertions.js.map +1 -1
  13. package/dist/src/commands/cache.d.ts.map +1 -1
  14. package/dist/src/commands/cache.js +0 -2
  15. package/dist/src/commands/cache.js.map +1 -1
  16. package/dist/src/commands/eval.d.ts.map +1 -1
  17. package/dist/src/commands/eval.js +19 -16
  18. package/dist/src/commands/eval.js.map +1 -1
  19. package/dist/src/commands/export.d.ts.map +1 -1
  20. package/dist/src/commands/export.js +8 -31
  21. package/dist/src/commands/export.js.map +1 -1
  22. package/dist/src/commands/import.d.ts.map +1 -1
  23. package/dist/src/commands/import.js +52 -13
  24. package/dist/src/commands/import.js.map +1 -1
  25. package/dist/src/commands/list.d.ts.map +1 -1
  26. package/dist/src/commands/list.js +35 -7
  27. package/dist/src/commands/list.js.map +1 -1
  28. package/dist/src/commands/share.d.ts +2 -2
  29. package/dist/src/commands/share.d.ts.map +1 -1
  30. package/dist/src/commands/share.js +12 -13
  31. package/dist/src/commands/share.js.map +1 -1
  32. package/dist/src/commands/show.d.ts.map +1 -1
  33. package/dist/src/commands/show.js +10 -6
  34. package/dist/src/commands/show.js.map +1 -1
  35. package/dist/src/constants.d.ts +1 -0
  36. package/dist/src/constants.d.ts.map +1 -1
  37. package/dist/src/constants.js +2 -1
  38. package/dist/src/constants.js.map +1 -1
  39. package/dist/src/database/index.js +1 -1
  40. package/dist/src/database/index.js.map +1 -1
  41. package/dist/src/database/tables.d.ts +602 -4
  42. package/dist/src/database/tables.d.ts.map +1 -1
  43. package/dist/src/database/tables.js +67 -8
  44. package/dist/src/database/tables.js.map +1 -1
  45. package/dist/src/database/types.d.ts +3 -3
  46. package/dist/src/database/types.d.ts.map +1 -1
  47. package/dist/src/evaluator.d.ts +3 -2
  48. package/dist/src/evaluator.d.ts.map +1 -1
  49. package/dist/src/evaluator.js +75 -104
  50. package/dist/src/evaluator.js.map +1 -1
  51. package/dist/src/evaluatorHelpers.d.ts.map +1 -1
  52. package/dist/src/evaluatorHelpers.js +2 -1
  53. package/dist/src/evaluatorHelpers.js.map +1 -1
  54. package/dist/src/index.d.ts +4 -1
  55. package/dist/src/index.d.ts.map +1 -1
  56. package/dist/src/index.js +12 -9
  57. package/dist/src/index.js.map +1 -1
  58. package/dist/src/models/eval.d.ts +95 -0
  59. package/dist/src/models/eval.d.ts.map +1 -0
  60. package/dist/src/models/eval.js +390 -0
  61. package/dist/src/models/eval.js.map +1 -0
  62. package/dist/src/models/evalResult.d.ts +50 -0
  63. package/dist/src/models/evalResult.d.ts.map +1 -0
  64. package/dist/src/models/evalResult.js +122 -0
  65. package/dist/src/models/evalResult.js.map +1 -0
  66. package/dist/src/models/provider.d.ts +9 -0
  67. package/dist/src/models/provider.d.ts.map +1 -0
  68. package/dist/src/models/provider.js +47 -0
  69. package/dist/src/models/provider.js.map +1 -0
  70. package/dist/src/prompts/index.d.ts.map +1 -1
  71. package/dist/src/prompts/index.js +2 -1
  72. package/dist/src/prompts/index.js.map +1 -1
  73. package/dist/src/prompts/utils.d.ts +1 -0
  74. package/dist/src/prompts/utils.d.ts.map +1 -1
  75. package/dist/src/prompts/utils.js +7 -0
  76. package/dist/src/prompts/utils.js.map +1 -1
  77. package/dist/src/providers/http.js +2 -2
  78. package/dist/src/providers/http.js.map +1 -1
  79. package/dist/src/providers.js +5 -5
  80. package/dist/src/providers.js.map +1 -1
  81. package/dist/src/redteam/constants.d.ts +1 -1
  82. package/dist/src/redteam/constants.d.ts.map +1 -1
  83. package/dist/src/redteam/constants.js +7 -5
  84. package/dist/src/redteam/constants.js.map +1 -1
  85. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240617.json +10 -0
  86. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240618.json +10 -0
  87. package/dist/src/redteam/eval/harmful/llm_rubric-20240723.json +10 -0
  88. package/dist/src/redteam/eval/harmful/llm_rubric-20240724.json +10 -0
  89. package/dist/src/redteam/graders.d.ts +2 -0
  90. package/dist/src/redteam/graders.d.ts.map +1 -1
  91. package/dist/src/redteam/graders.js +2 -0
  92. package/dist/src/redteam/graders.js.map +1 -1
  93. package/dist/src/redteam/plugins/index.d.ts.map +1 -1
  94. package/dist/src/redteam/plugins/index.js +1 -0
  95. package/dist/src/redteam/plugins/index.js.map +1 -1
  96. package/dist/src/redteam/plugins/religion.d.ts +6 -0
  97. package/dist/src/redteam/plugins/religion.d.ts.map +1 -0
  98. package/dist/src/redteam/plugins/religion.js +14 -0
  99. package/dist/src/redteam/plugins/religion.js.map +1 -0
  100. package/dist/src/server/routes/evalRoutes.d.ts +1 -0
  101. package/dist/src/server/routes/evalRoutes.d.ts.map +1 -0
  102. package/dist/src/server/routes/evalRoutes.js +2 -0
  103. package/dist/src/server/routes/evalRoutes.js.map +1 -0
  104. package/dist/src/server/server.d.ts +1 -0
  105. package/dist/src/server/server.d.ts.map +1 -1
  106. package/dist/src/server/server.js +70 -31
  107. package/dist/src/server/server.js.map +1 -1
  108. package/dist/src/share.d.ts +2 -2
  109. package/dist/src/share.d.ts.map +1 -1
  110. package/dist/src/share.js +93 -34
  111. package/dist/src/share.js.map +1 -1
  112. package/dist/src/table.d.ts +2 -2
  113. package/dist/src/table.d.ts.map +1 -1
  114. package/dist/src/table.js +3 -3
  115. package/dist/src/table.js.map +1 -1
  116. package/dist/src/types/index.d.ts +163 -11
  117. package/dist/src/types/index.d.ts.map +1 -1
  118. package/dist/src/types/index.js +21 -1
  119. package/dist/src/types/index.js.map +1 -1
  120. package/dist/src/util/config/load.d.ts.map +1 -1
  121. package/dist/src/util/config/load.js +2 -1
  122. package/dist/src/util/config/load.js.map +1 -1
  123. package/dist/src/util/config/manage.d.ts.map +1 -1
  124. package/dist/src/util/config/manage.js.map +1 -1
  125. package/dist/src/util/convertEvalResultsToTable.d.ts +16 -0
  126. package/dist/src/util/convertEvalResultsToTable.d.ts.map +1 -0
  127. package/dist/src/util/convertEvalResultsToTable.js +137 -0
  128. package/dist/src/util/convertEvalResultsToTable.js.map +1 -0
  129. package/dist/src/util/createHash.d.ts +1 -0
  130. package/dist/src/util/createHash.d.ts.map +1 -1
  131. package/dist/src/util/createHash.js +9 -0
  132. package/dist/src/util/createHash.js.map +1 -1
  133. package/dist/src/util/file.d.ts +8 -0
  134. package/dist/src/util/file.d.ts.map +1 -0
  135. package/dist/src/util/file.js +13 -0
  136. package/dist/src/util/file.js.map +1 -0
  137. package/dist/src/util/index.d.ts +9 -14
  138. package/dist/src/util/index.d.ts.map +1 -1
  139. package/dist/src/util/index.js +87 -223
  140. package/dist/src/util/index.js.map +1 -1
  141. package/dist/src/util/time.d.ts +2 -0
  142. package/dist/src/util/time.d.ts.map +1 -0
  143. package/dist/src/util/time.js +7 -0
  144. package/dist/src/util/time.js.map +1 -0
  145. package/dist/src/util/transform.js +2 -2
  146. package/dist/src/util/transform.js.map +1 -1
  147. package/dist/src/validators/providers.d.ts +6 -0
  148. package/dist/src/validators/providers.d.ts.map +1 -1
  149. package/dist/src/validators/providers.js +1 -0
  150. package/dist/src/validators/providers.js.map +1 -1
  151. package/dist/src/validators/redteam.d.ts +6 -0
  152. package/dist/src/validators/redteam.d.ts.map +1 -1
  153. package/dist/test/commands/eval/filterFailingTests.test.js +24 -2
  154. package/dist/test/commands/eval/filterFailingTests.test.js.map +1 -1
  155. package/dist/test/evaluator.test.js +153 -74
  156. package/dist/test/evaluator.test.js.map +1 -1
  157. package/dist/test/factories/data/eval/database_records.d.ts +142 -0
  158. package/dist/test/factories/data/eval/database_records.d.ts.map +1 -0
  159. package/dist/test/factories/data/eval/database_records.js +251 -0
  160. package/dist/test/factories/data/eval/database_records.js.map +1 -0
  161. package/dist/test/factories/evalFactory.d.ts +768 -0
  162. package/dist/test/factories/evalFactory.d.ts.map +1 -0
  163. package/dist/test/factories/evalFactory.js +121 -0
  164. package/dist/test/factories/evalFactory.js.map +1 -0
  165. package/dist/test/factories/index.d.ts +1 -0
  166. package/dist/test/factories/index.d.ts.map +1 -0
  167. package/dist/test/factories/index.js +2 -0
  168. package/dist/test/factories/index.js.map +1 -0
  169. package/dist/test/index.test.js +17 -33
  170. package/dist/test/index.test.js.map +1 -1
  171. package/dist/test/models/eval.test.d.ts +2 -0
  172. package/dist/test/models/eval.test.d.ts.map +1 -0
  173. package/dist/test/models/eval.test.js +34 -0
  174. package/dist/test/models/eval.test.js.map +1 -0
  175. package/dist/test/providers.test.js +3 -3
  176. package/dist/test/providers.test.js.map +1 -1
  177. package/dist/test/server/share.test.d.ts +2 -0
  178. package/dist/test/server/share.test.d.ts.map +1 -0
  179. package/dist/test/server/share.test.js +36 -0
  180. package/dist/test/server/share.test.js.map +1 -0
  181. package/dist/test/server/v3evalToShare.json +507 -0
  182. package/dist/test/server/v4evalToShare.json +421 -0
  183. package/dist/test/types.test.js +58 -0
  184. package/dist/test/types.test.js.map +1 -1
  185. package/dist/test/util.file.test.d.ts +2 -0
  186. package/dist/test/util.file.test.d.ts.map +1 -0
  187. package/dist/test/util.file.test.js +32 -0
  188. package/dist/test/util.file.test.js.map +1 -0
  189. package/dist/test/util.listPrevious.test.d.ts +2 -0
  190. package/dist/test/util.listPrevious.test.d.ts.map +1 -0
  191. package/dist/test/util.listPrevious.test.js +37 -0
  192. package/dist/test/util.listPrevious.test.js.map +1 -0
  193. package/dist/test/util.test.js +38 -311
  194. package/dist/test/util.test.js.map +1 -1
  195. package/dist/tsconfig.tsbuildinfo +1 -0
  196. package/package.json +10 -8
@@ -0,0 +1,768 @@
1
+ import Eval from '../../src/models/eval';
2
+ export default class EvalFactory {
3
+ static create(): Promise<Eval>;
4
+ static createOldResult(): Promise<{
5
+ id: string;
6
+ config: Partial<{
7
+ prompts: string | Record<string, string> | (string | {
8
+ raw: string;
9
+ label: string;
10
+ function?: ((args_0: {
11
+ vars: Record<string, any>;
12
+ provider?: import("../../src").ApiProvider | undefined;
13
+ }, ...args: unknown[]) => Promise<any>) | undefined;
14
+ id?: string | undefined;
15
+ config?: any;
16
+ display?: string | undefined;
17
+ } | {
18
+ id: string;
19
+ raw?: string | undefined;
20
+ label?: string | undefined;
21
+ })[];
22
+ providers: string | (((args_0: string, args_1: {
23
+ vars: Record<string, string | {}>;
24
+ prompt: {
25
+ raw: string;
26
+ label: string;
27
+ function?: ((args_0: {
28
+ vars: Record<string, any>;
29
+ provider?: import("../../src").ApiProvider | undefined;
30
+ }, ...args: unknown[]) => Promise<any>) | undefined;
31
+ id?: string | undefined;
32
+ config?: any;
33
+ display?: string | undefined;
34
+ };
35
+ fetchWithCache?: any;
36
+ filters?: Record<string, (args_0: any, ...args: any[]) => string> | undefined;
37
+ getCache?: any;
38
+ logger?: any;
39
+ originalProvider?: any;
40
+ } | undefined, args_2: {
41
+ includeLogProbs?: boolean | undefined;
42
+ } | undefined, ...args: unknown[]) => Promise<import("../../src").ProviderResponse>) & {
43
+ label?: string | undefined;
44
+ }) | (string | {
45
+ id?: string | undefined;
46
+ config?: any;
47
+ label?: string | undefined;
48
+ prompts?: string[] | undefined;
49
+ transform?: string | undefined;
50
+ delay?: number | undefined;
51
+ env?: {
52
+ AI21_API_BASE_URL?: string | undefined;
53
+ AI21_API_KEY?: string | undefined;
54
+ ANTHROPIC_API_KEY?: string | undefined;
55
+ AWS_BEDROCK_REGION?: string | undefined;
56
+ AZURE_OPENAI_API_BASE_URL?: string | undefined;
57
+ AZURE_OPENAI_API_HOST?: string | undefined;
58
+ AZURE_OPENAI_API_KEY?: string | undefined;
59
+ AZURE_OPENAI_BASE_URL?: string | undefined;
60
+ BAM_API_HOST?: string | undefined;
61
+ BAM_API_KEY?: string | undefined;
62
+ CLOUDFLARE_ACCOUNT_ID?: string | undefined;
63
+ CLOUDFLARE_API_KEY?: string | undefined;
64
+ COHERE_API_KEY?: string | undefined;
65
+ GOOGLE_API_HOST?: string | undefined;
66
+ GOOGLE_API_KEY?: string | undefined;
67
+ GROQ_API_KEY?: string | undefined;
68
+ LOCALAI_BASE_URL?: string | undefined;
69
+ MISTRAL_API_BASE_URL?: string | undefined;
70
+ MISTRAL_API_HOST?: string | undefined;
71
+ MISTRAL_API_KEY?: string | undefined;
72
+ OPENAI_API_BASE_URL?: string | undefined;
73
+ OPENAI_API_HOST?: string | undefined;
74
+ OPENAI_API_KEY?: string | undefined;
75
+ OPENAI_BASE_URL?: string | undefined;
76
+ OPENAI_ORGANIZATION?: string | undefined;
77
+ PALM_API_HOST?: string | undefined;
78
+ PALM_API_KEY?: string | undefined;
79
+ REPLICATE_API_KEY?: string | undefined;
80
+ REPLICATE_API_TOKEN?: string | undefined;
81
+ VERTEX_API_HOST?: string | undefined;
82
+ VERTEX_API_KEY?: string | undefined;
83
+ VERTEX_PROJECT_ID?: string | undefined;
84
+ VERTEX_PUBLISHER?: string | undefined;
85
+ VERTEX_REGION?: string | undefined;
86
+ } | undefined;
87
+ } | (((args_0: string, args_1: {
88
+ vars: Record<string, string | {}>;
89
+ prompt: {
90
+ raw: string;
91
+ label: string;
92
+ function?: ((args_0: {
93
+ vars: Record<string, any>;
94
+ provider?: import("../../src").ApiProvider | undefined;
95
+ }, ...args: unknown[]) => Promise<any>) | undefined;
96
+ id?: string | undefined;
97
+ config?: any;
98
+ display?: string | undefined;
99
+ };
100
+ fetchWithCache?: any;
101
+ filters?: Record<string, (args_0: any, ...args: any[]) => string> | undefined;
102
+ getCache?: any;
103
+ logger?: any;
104
+ originalProvider?: any;
105
+ } | undefined, args_2: {
106
+ includeLogProbs?: boolean | undefined;
107
+ } | undefined, ...args: unknown[]) => Promise<import("../../src").ProviderResponse>) & {
108
+ label?: string | undefined;
109
+ }) | Record<string, {
110
+ id?: string | undefined;
111
+ config?: any;
112
+ label?: string | undefined;
113
+ prompts?: string[] | undefined;
114
+ transform?: string | undefined;
115
+ delay?: number | undefined;
116
+ env?: {
117
+ AI21_API_BASE_URL?: string | undefined;
118
+ AI21_API_KEY?: string | undefined;
119
+ ANTHROPIC_API_KEY?: string | undefined;
120
+ AWS_BEDROCK_REGION?: string | undefined;
121
+ AZURE_OPENAI_API_BASE_URL?: string | undefined;
122
+ AZURE_OPENAI_API_HOST?: string | undefined;
123
+ AZURE_OPENAI_API_KEY?: string | undefined;
124
+ AZURE_OPENAI_BASE_URL?: string | undefined;
125
+ BAM_API_HOST?: string | undefined;
126
+ BAM_API_KEY?: string | undefined;
127
+ CLOUDFLARE_ACCOUNT_ID?: string | undefined;
128
+ CLOUDFLARE_API_KEY?: string | undefined;
129
+ COHERE_API_KEY?: string | undefined;
130
+ GOOGLE_API_HOST?: string | undefined;
131
+ GOOGLE_API_KEY?: string | undefined;
132
+ GROQ_API_KEY?: string | undefined;
133
+ LOCALAI_BASE_URL?: string | undefined;
134
+ MISTRAL_API_BASE_URL?: string | undefined;
135
+ MISTRAL_API_HOST?: string | undefined;
136
+ MISTRAL_API_KEY?: string | undefined;
137
+ OPENAI_API_BASE_URL?: string | undefined;
138
+ OPENAI_API_HOST?: string | undefined;
139
+ OPENAI_API_KEY?: string | undefined;
140
+ OPENAI_BASE_URL?: string | undefined;
141
+ OPENAI_ORGANIZATION?: string | undefined;
142
+ PALM_API_HOST?: string | undefined;
143
+ PALM_API_KEY?: string | undefined;
144
+ REPLICATE_API_KEY?: string | undefined;
145
+ REPLICATE_API_TOKEN?: string | undefined;
146
+ VERTEX_API_HOST?: string | undefined;
147
+ VERTEX_API_KEY?: string | undefined;
148
+ VERTEX_PROJECT_ID?: string | undefined;
149
+ VERTEX_PUBLISHER?: string | undefined;
150
+ VERTEX_REGION?: string | undefined;
151
+ } | undefined;
152
+ }>)[];
153
+ env?: {
154
+ AI21_API_BASE_URL?: string | undefined;
155
+ AI21_API_KEY?: string | undefined;
156
+ ANTHROPIC_API_KEY?: string | undefined;
157
+ AWS_BEDROCK_REGION?: string | undefined;
158
+ AZURE_OPENAI_API_BASE_URL?: string | undefined;
159
+ AZURE_OPENAI_API_HOST?: string | undefined;
160
+ AZURE_OPENAI_API_KEY?: string | undefined;
161
+ AZURE_OPENAI_BASE_URL?: string | undefined;
162
+ BAM_API_HOST?: string | undefined;
163
+ BAM_API_KEY?: string | undefined;
164
+ CLOUDFLARE_ACCOUNT_ID?: string | undefined;
165
+ CLOUDFLARE_API_KEY?: string | undefined;
166
+ COHERE_API_KEY?: string | undefined;
167
+ GOOGLE_API_HOST?: string | undefined;
168
+ GOOGLE_API_KEY?: string | undefined;
169
+ GROQ_API_KEY?: string | undefined;
170
+ LOCALAI_BASE_URL?: string | undefined;
171
+ MISTRAL_API_BASE_URL?: string | undefined;
172
+ MISTRAL_API_HOST?: string | undefined;
173
+ MISTRAL_API_KEY?: string | undefined;
174
+ OPENAI_API_BASE_URL?: string | undefined;
175
+ OPENAI_API_HOST?: string | undefined;
176
+ OPENAI_API_KEY?: string | undefined;
177
+ OPENAI_BASE_URL?: string | undefined;
178
+ OPENAI_ORGANIZATION?: string | undefined;
179
+ PALM_API_HOST?: string | undefined;
180
+ PALM_API_KEY?: string | undefined;
181
+ REPLICATE_API_KEY?: string | undefined;
182
+ REPLICATE_API_TOKEN?: string | undefined;
183
+ VERTEX_API_HOST?: string | undefined;
184
+ VERTEX_API_KEY?: string | undefined;
185
+ VERTEX_PROJECT_ID?: string | undefined;
186
+ VERTEX_PUBLISHER?: string | undefined;
187
+ VERTEX_REGION?: string | undefined;
188
+ } | undefined;
189
+ metadata?: Record<string, any> | undefined;
190
+ description?: string | undefined;
191
+ tests?: string | (string | {
192
+ options?: ({
193
+ prefix?: string | undefined;
194
+ suffix?: string | undefined;
195
+ } & {
196
+ transform?: string | undefined;
197
+ postprocess?: string | undefined;
198
+ transformVars?: string | undefined;
199
+ storeOutputAs?: string | undefined;
200
+ } & {
201
+ provider?: any;
202
+ rubricPrompt?: string | string[] | {
203
+ role: string;
204
+ content: string;
205
+ }[] | undefined;
206
+ factuality?: {
207
+ subset?: number | undefined;
208
+ superset?: number | undefined;
209
+ agree?: number | undefined;
210
+ disagree?: number | undefined;
211
+ differButFactual?: number | undefined;
212
+ } | undefined;
213
+ } & {
214
+ disableVarExpansion?: boolean | undefined;
215
+ disableConversationVar?: boolean | undefined;
216
+ runSerially?: boolean | undefined;
217
+ }) | undefined;
218
+ vars?: Record<string, string | any[] | string[] | {}> | undefined;
219
+ provider?: string | {
220
+ id?: string | undefined;
221
+ config?: any;
222
+ label?: string | undefined;
223
+ prompts?: string[] | undefined;
224
+ transform?: string | undefined;
225
+ delay?: number | undefined;
226
+ env?: {
227
+ AI21_API_BASE_URL?: string | undefined;
228
+ AI21_API_KEY?: string | undefined;
229
+ ANTHROPIC_API_KEY?: string | undefined;
230
+ AWS_BEDROCK_REGION?: string | undefined;
231
+ AZURE_OPENAI_API_BASE_URL?: string | undefined;
232
+ AZURE_OPENAI_API_HOST?: string | undefined;
233
+ AZURE_OPENAI_API_KEY?: string | undefined;
234
+ AZURE_OPENAI_BASE_URL?: string | undefined;
235
+ BAM_API_HOST?: string | undefined;
236
+ BAM_API_KEY?: string | undefined;
237
+ CLOUDFLARE_ACCOUNT_ID?: string | undefined;
238
+ CLOUDFLARE_API_KEY?: string | undefined;
239
+ COHERE_API_KEY?: string | undefined;
240
+ GOOGLE_API_HOST?: string | undefined;
241
+ GOOGLE_API_KEY?: string | undefined;
242
+ GROQ_API_KEY?: string | undefined;
243
+ LOCALAI_BASE_URL?: string | undefined;
244
+ MISTRAL_API_BASE_URL?: string | undefined;
245
+ MISTRAL_API_HOST?: string | undefined;
246
+ MISTRAL_API_KEY?: string | undefined;
247
+ OPENAI_API_BASE_URL?: string | undefined;
248
+ OPENAI_API_HOST?: string | undefined;
249
+ OPENAI_API_KEY?: string | undefined;
250
+ OPENAI_BASE_URL?: string | undefined;
251
+ OPENAI_ORGANIZATION?: string | undefined;
252
+ PALM_API_HOST?: string | undefined;
253
+ PALM_API_KEY?: string | undefined;
254
+ REPLICATE_API_KEY?: string | undefined;
255
+ REPLICATE_API_TOKEN?: string | undefined;
256
+ VERTEX_API_HOST?: string | undefined;
257
+ VERTEX_API_KEY?: string | undefined;
258
+ VERTEX_PROJECT_ID?: string | undefined;
259
+ VERTEX_PUBLISHER?: string | undefined;
260
+ VERTEX_REGION?: string | undefined;
261
+ } | undefined;
262
+ } | {
263
+ callApi: import("../../src").CallApiFunction;
264
+ id: (...args: unknown[]) => string;
265
+ config?: any;
266
+ label?: string | undefined;
267
+ transform?: string | undefined;
268
+ delay?: number | undefined;
269
+ callEmbeddingApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderEmbeddingResponse>) | undefined;
270
+ callClassificationApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderClassificationResponse>) | undefined;
271
+ } | undefined;
272
+ metadata?: Record<string, any> | undefined;
273
+ description?: string | undefined;
274
+ providerOutput?: string | {} | undefined;
275
+ assert?: ({
276
+ type: import("../../src").AssertionType;
277
+ config?: Record<string, any> | undefined;
278
+ value?: import("../../src").AssertionValue | undefined;
279
+ provider?: any;
280
+ transform?: string | undefined;
281
+ rubricPrompt?: string | string[] | {
282
+ role: string;
283
+ content: string;
284
+ }[] | undefined;
285
+ threshold?: number | undefined;
286
+ weight?: number | undefined;
287
+ metric?: string | undefined;
288
+ } | {
289
+ type: "assert-set";
290
+ assert: {
291
+ type: import("../../src").AssertionType;
292
+ config?: Record<string, any> | undefined;
293
+ value?: import("../../src").AssertionValue | undefined;
294
+ provider?: any;
295
+ transform?: string | undefined;
296
+ rubricPrompt?: string | string[] | {
297
+ role: string;
298
+ content: string;
299
+ }[] | undefined;
300
+ threshold?: number | undefined;
301
+ weight?: number | undefined;
302
+ metric?: string | undefined;
303
+ }[];
304
+ threshold?: number | undefined;
305
+ weight?: number | undefined;
306
+ metric?: string | undefined;
307
+ })[] | undefined;
308
+ threshold?: number | undefined;
309
+ })[] | undefined;
310
+ redteam?: import("../../src").RedteamFileConfig | undefined;
311
+ tags?: Record<string, string> | undefined;
312
+ scenarios?: (string | {
313
+ config: {
314
+ options?: ({
315
+ prefix?: string | undefined;
316
+ suffix?: string | undefined;
317
+ } & {
318
+ transform?: string | undefined;
319
+ postprocess?: string | undefined;
320
+ transformVars?: string | undefined;
321
+ storeOutputAs?: string | undefined;
322
+ } & {
323
+ provider?: any;
324
+ rubricPrompt?: string | string[] | {
325
+ role: string;
326
+ content: string;
327
+ }[] | undefined;
328
+ factuality?: {
329
+ subset?: number | undefined;
330
+ superset?: number | undefined;
331
+ agree?: number | undefined;
332
+ disagree?: number | undefined;
333
+ differButFactual?: number | undefined;
334
+ } | undefined;
335
+ } & {
336
+ disableVarExpansion?: boolean | undefined;
337
+ disableConversationVar?: boolean | undefined;
338
+ runSerially?: boolean | undefined;
339
+ }) | undefined;
340
+ vars?: Record<string, string | any[] | string[] | {}> | undefined;
341
+ provider?: string | {
342
+ id?: string | undefined;
343
+ config?: any;
344
+ label?: string | undefined;
345
+ prompts?: string[] | undefined;
346
+ transform?: string | undefined;
347
+ delay?: number | undefined;
348
+ env?: {
349
+ AI21_API_BASE_URL?: string | undefined;
350
+ AI21_API_KEY?: string | undefined;
351
+ ANTHROPIC_API_KEY?: string | undefined;
352
+ AWS_BEDROCK_REGION?: string | undefined;
353
+ AZURE_OPENAI_API_BASE_URL?: string | undefined;
354
+ AZURE_OPENAI_API_HOST?: string | undefined;
355
+ AZURE_OPENAI_API_KEY?: string | undefined;
356
+ AZURE_OPENAI_BASE_URL?: string | undefined;
357
+ BAM_API_HOST?: string | undefined;
358
+ BAM_API_KEY?: string | undefined;
359
+ CLOUDFLARE_ACCOUNT_ID?: string | undefined;
360
+ CLOUDFLARE_API_KEY?: string | undefined;
361
+ COHERE_API_KEY?: string | undefined;
362
+ GOOGLE_API_HOST?: string | undefined;
363
+ GOOGLE_API_KEY?: string | undefined;
364
+ GROQ_API_KEY?: string | undefined;
365
+ LOCALAI_BASE_URL?: string | undefined;
366
+ MISTRAL_API_BASE_URL?: string | undefined;
367
+ MISTRAL_API_HOST?: string | undefined;
368
+ MISTRAL_API_KEY?: string | undefined;
369
+ OPENAI_API_BASE_URL?: string | undefined;
370
+ OPENAI_API_HOST?: string | undefined;
371
+ OPENAI_API_KEY?: string | undefined;
372
+ OPENAI_BASE_URL?: string | undefined;
373
+ OPENAI_ORGANIZATION?: string | undefined;
374
+ PALM_API_HOST?: string | undefined;
375
+ PALM_API_KEY?: string | undefined;
376
+ REPLICATE_API_KEY?: string | undefined;
377
+ REPLICATE_API_TOKEN?: string | undefined;
378
+ VERTEX_API_HOST?: string | undefined;
379
+ VERTEX_API_KEY?: string | undefined;
380
+ VERTEX_PROJECT_ID?: string | undefined;
381
+ VERTEX_PUBLISHER?: string | undefined;
382
+ VERTEX_REGION?: string | undefined;
383
+ } | undefined;
384
+ } | {
385
+ callApi: import("../../src").CallApiFunction;
386
+ id: (...args: unknown[]) => string;
387
+ config?: any;
388
+ label?: string | undefined;
389
+ transform?: string | undefined;
390
+ delay?: number | undefined;
391
+ callEmbeddingApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderEmbeddingResponse>) | undefined;
392
+ callClassificationApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderClassificationResponse>) | undefined;
393
+ } | undefined;
394
+ metadata?: Record<string, any> | undefined;
395
+ description?: string | undefined;
396
+ providerOutput?: string | {} | undefined;
397
+ assert?: ({
398
+ type: import("../../src").AssertionType;
399
+ config?: Record<string, any> | undefined;
400
+ value?: import("../../src").AssertionValue | undefined;
401
+ provider?: any;
402
+ transform?: string | undefined;
403
+ rubricPrompt?: string | string[] | {
404
+ role: string;
405
+ content: string;
406
+ }[] | undefined;
407
+ threshold?: number | undefined;
408
+ weight?: number | undefined;
409
+ metric?: string | undefined;
410
+ } | {
411
+ type: "assert-set";
412
+ assert: {
413
+ type: import("../../src").AssertionType;
414
+ config?: Record<string, any> | undefined;
415
+ value?: import("../../src").AssertionValue | undefined;
416
+ provider?: any;
417
+ transform?: string | undefined;
418
+ rubricPrompt?: string | string[] | {
419
+ role: string;
420
+ content: string;
421
+ }[] | undefined;
422
+ threshold?: number | undefined;
423
+ weight?: number | undefined;
424
+ metric?: string | undefined;
425
+ }[];
426
+ threshold?: number | undefined;
427
+ weight?: number | undefined;
428
+ metric?: string | undefined;
429
+ })[] | undefined;
430
+ threshold?: number | undefined;
431
+ }[];
432
+ tests: {
433
+ options?: ({
434
+ prefix?: string | undefined;
435
+ suffix?: string | undefined;
436
+ } & {
437
+ transform?: string | undefined;
438
+ postprocess?: string | undefined;
439
+ transformVars?: string | undefined;
440
+ storeOutputAs?: string | undefined;
441
+ } & {
442
+ provider?: any;
443
+ rubricPrompt?: string | string[] | {
444
+ role: string;
445
+ content: string;
446
+ }[] | undefined;
447
+ factuality?: {
448
+ subset?: number | undefined;
449
+ superset?: number | undefined;
450
+ agree?: number | undefined;
451
+ disagree?: number | undefined;
452
+ differButFactual?: number | undefined;
453
+ } | undefined;
454
+ } & {
455
+ disableVarExpansion?: boolean | undefined;
456
+ disableConversationVar?: boolean | undefined;
457
+ runSerially?: boolean | undefined;
458
+ }) | undefined;
459
+ vars?: Record<string, string | any[] | string[] | {}> | undefined;
460
+ provider?: string | {
461
+ id?: string | undefined;
462
+ config?: any;
463
+ label?: string | undefined;
464
+ prompts?: string[] | undefined;
465
+ transform?: string | undefined;
466
+ delay?: number | undefined;
467
+ env?: {
468
+ AI21_API_BASE_URL?: string | undefined;
469
+ AI21_API_KEY?: string | undefined;
470
+ ANTHROPIC_API_KEY?: string | undefined;
471
+ AWS_BEDROCK_REGION?: string | undefined;
472
+ AZURE_OPENAI_API_BASE_URL?: string | undefined;
473
+ AZURE_OPENAI_API_HOST?: string | undefined;
474
+ AZURE_OPENAI_API_KEY?: string | undefined;
475
+ AZURE_OPENAI_BASE_URL?: string | undefined;
476
+ BAM_API_HOST?: string | undefined;
477
+ BAM_API_KEY?: string | undefined;
478
+ CLOUDFLARE_ACCOUNT_ID?: string | undefined;
479
+ CLOUDFLARE_API_KEY?: string | undefined;
480
+ COHERE_API_KEY?: string | undefined;
481
+ GOOGLE_API_HOST?: string | undefined;
482
+ GOOGLE_API_KEY?: string | undefined;
483
+ GROQ_API_KEY?: string | undefined;
484
+ LOCALAI_BASE_URL?: string | undefined;
485
+ MISTRAL_API_BASE_URL?: string | undefined;
486
+ MISTRAL_API_HOST?: string | undefined;
487
+ MISTRAL_API_KEY?: string | undefined;
488
+ OPENAI_API_BASE_URL?: string | undefined;
489
+ OPENAI_API_HOST?: string | undefined;
490
+ OPENAI_API_KEY?: string | undefined;
491
+ OPENAI_BASE_URL?: string | undefined;
492
+ OPENAI_ORGANIZATION?: string | undefined;
493
+ PALM_API_HOST?: string | undefined;
494
+ PALM_API_KEY?: string | undefined;
495
+ REPLICATE_API_KEY?: string | undefined;
496
+ REPLICATE_API_TOKEN?: string | undefined;
497
+ VERTEX_API_HOST?: string | undefined;
498
+ VERTEX_API_KEY?: string | undefined;
499
+ VERTEX_PROJECT_ID?: string | undefined;
500
+ VERTEX_PUBLISHER?: string | undefined;
501
+ VERTEX_REGION?: string | undefined;
502
+ } | undefined;
503
+ } | {
504
+ callApi: import("../../src").CallApiFunction;
505
+ id: (...args: unknown[]) => string;
506
+ config?: any;
507
+ label?: string | undefined;
508
+ transform?: string | undefined;
509
+ delay?: number | undefined;
510
+ callEmbeddingApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderEmbeddingResponse>) | undefined;
511
+ callClassificationApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderClassificationResponse>) | undefined;
512
+ } | undefined;
513
+ metadata?: Record<string, any> | undefined;
514
+ description?: string | undefined;
515
+ providerOutput?: string | {} | undefined;
516
+ assert?: ({
517
+ type: import("../../src").AssertionType;
518
+ config?: Record<string, any> | undefined;
519
+ value?: import("../../src").AssertionValue | undefined;
520
+ provider?: any;
521
+ transform?: string | undefined;
522
+ rubricPrompt?: string | string[] | {
523
+ role: string;
524
+ content: string;
525
+ }[] | undefined;
526
+ threshold?: number | undefined;
527
+ weight?: number | undefined;
528
+ metric?: string | undefined;
529
+ } | {
530
+ type: "assert-set";
531
+ assert: {
532
+ type: import("../../src").AssertionType;
533
+ config?: Record<string, any> | undefined;
534
+ value?: import("../../src").AssertionValue | undefined;
535
+ provider?: any;
536
+ transform?: string | undefined;
537
+ rubricPrompt?: string | string[] | {
538
+ role: string;
539
+ content: string;
540
+ }[] | undefined;
541
+ threshold?: number | undefined;
542
+ weight?: number | undefined;
543
+ metric?: string | undefined;
544
+ }[];
545
+ threshold?: number | undefined;
546
+ weight?: number | undefined;
547
+ metric?: string | undefined;
548
+ })[] | undefined;
549
+ threshold?: number | undefined;
550
+ }[];
551
+ description?: string | undefined;
552
+ })[] | undefined;
553
+ defaultTest?: {
554
+ options?: ({
555
+ prefix?: string | undefined;
556
+ suffix?: string | undefined;
557
+ } & {
558
+ transform?: string | undefined;
559
+ postprocess?: string | undefined;
560
+ transformVars?: string | undefined;
561
+ storeOutputAs?: string | undefined;
562
+ } & {
563
+ provider?: any;
564
+ rubricPrompt?: string | string[] | {
565
+ role: string;
566
+ content: string;
567
+ }[] | undefined;
568
+ factuality?: {
569
+ subset?: number | undefined;
570
+ superset?: number | undefined;
571
+ agree?: number | undefined;
572
+ disagree?: number | undefined;
573
+ differButFactual?: number | undefined;
574
+ } | undefined;
575
+ } & {
576
+ disableVarExpansion?: boolean | undefined;
577
+ disableConversationVar?: boolean | undefined;
578
+ runSerially?: boolean | undefined;
579
+ }) | undefined;
580
+ vars?: Record<string, string | any[] | string[] | {}> | undefined;
581
+ provider?: string | {
582
+ id?: string | undefined;
583
+ config?: any;
584
+ label?: string | undefined;
585
+ prompts?: string[] | undefined;
586
+ transform?: string | undefined;
587
+ delay?: number | undefined;
588
+ env?: {
589
+ AI21_API_BASE_URL?: string | undefined;
590
+ AI21_API_KEY?: string | undefined;
591
+ ANTHROPIC_API_KEY?: string | undefined;
592
+ AWS_BEDROCK_REGION?: string | undefined;
593
+ AZURE_OPENAI_API_BASE_URL?: string | undefined;
594
+ AZURE_OPENAI_API_HOST?: string | undefined;
595
+ AZURE_OPENAI_API_KEY?: string | undefined;
596
+ AZURE_OPENAI_BASE_URL?: string | undefined;
597
+ BAM_API_HOST?: string | undefined;
598
+ BAM_API_KEY?: string | undefined;
599
+ CLOUDFLARE_ACCOUNT_ID?: string | undefined;
600
+ CLOUDFLARE_API_KEY?: string | undefined;
601
+ COHERE_API_KEY?: string | undefined;
602
+ GOOGLE_API_HOST?: string | undefined;
603
+ GOOGLE_API_KEY?: string | undefined;
604
+ GROQ_API_KEY?: string | undefined;
605
+ LOCALAI_BASE_URL?: string | undefined;
606
+ MISTRAL_API_BASE_URL?: string | undefined;
607
+ MISTRAL_API_HOST?: string | undefined;
608
+ MISTRAL_API_KEY?: string | undefined;
609
+ OPENAI_API_BASE_URL?: string | undefined;
610
+ OPENAI_API_HOST?: string | undefined;
611
+ OPENAI_API_KEY?: string | undefined;
612
+ OPENAI_BASE_URL?: string | undefined;
613
+ OPENAI_ORGANIZATION?: string | undefined;
614
+ PALM_API_HOST?: string | undefined;
615
+ PALM_API_KEY?: string | undefined;
616
+ REPLICATE_API_KEY?: string | undefined;
617
+ REPLICATE_API_TOKEN?: string | undefined;
618
+ VERTEX_API_HOST?: string | undefined;
619
+ VERTEX_API_KEY?: string | undefined;
620
+ VERTEX_PROJECT_ID?: string | undefined;
621
+ VERTEX_PUBLISHER?: string | undefined;
622
+ VERTEX_REGION?: string | undefined;
623
+ } | undefined;
624
+ } | {
625
+ callApi: import("../../src").CallApiFunction;
626
+ id: (...args: unknown[]) => string;
627
+ config?: any;
628
+ label?: string | undefined;
629
+ transform?: string | undefined;
630
+ delay?: number | undefined;
631
+ callEmbeddingApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderEmbeddingResponse>) | undefined;
632
+ callClassificationApi?: ((args_0: string, ...args: unknown[]) => Promise<import("../../src").ProviderClassificationResponse>) | undefined;
633
+ } | undefined;
634
+ metadata?: Record<string, any> | undefined;
635
+ providerOutput?: string | {} | undefined;
636
+ assert?: ({
637
+ type: import("../../src").AssertionType;
638
+ config?: Record<string, any> | undefined;
639
+ value?: import("../../src").AssertionValue | undefined;
640
+ provider?: any;
641
+ transform?: string | undefined;
642
+ rubricPrompt?: string | string[] | {
643
+ role: string;
644
+ content: string;
645
+ }[] | undefined;
646
+ threshold?: number | undefined;
647
+ weight?: number | undefined;
648
+ metric?: string | undefined;
649
+ } | {
650
+ type: "assert-set";
651
+ assert: {
652
+ type: import("../../src").AssertionType;
653
+ config?: Record<string, any> | undefined;
654
+ value?: import("../../src").AssertionValue | undefined;
655
+ provider?: any;
656
+ transform?: string | undefined;
657
+ rubricPrompt?: string | string[] | {
658
+ role: string;
659
+ content: string;
660
+ }[] | undefined;
661
+ threshold?: number | undefined;
662
+ weight?: number | undefined;
663
+ metric?: string | undefined;
664
+ }[];
665
+ threshold?: number | undefined;
666
+ weight?: number | undefined;
667
+ metric?: string | undefined;
668
+ })[] | undefined;
669
+ threshold?: number | undefined;
670
+ } | undefined;
671
+ nunjucksFilters?: Record<string, string> | undefined;
672
+ derivedMetrics?: {
673
+ value: string | ((args_0: Record<string, number>, args_1: import("../../src").RunEvalOptions, ...args: unknown[]) => number);
674
+ name: string;
675
+ }[] | undefined;
676
+ extensions?: string[] | undefined;
677
+ outputPath?: string | string[] | undefined;
678
+ sharing?: boolean | {
679
+ apiBaseUrl?: string | undefined;
680
+ appBaseUrl?: string | undefined;
681
+ } | undefined;
682
+ writeLatestResults?: boolean | undefined;
683
+ evaluateOptions?: {
684
+ cache?: boolean | undefined;
685
+ maxConcurrency?: number | undefined;
686
+ delay?: number | undefined;
687
+ repeat?: number | undefined;
688
+ generateSuggestions?: boolean | undefined;
689
+ eventSource?: string | undefined;
690
+ interactiveProviders?: boolean | undefined;
691
+ progressCallback?: ((args_0: number, args_1: number, args_2: number, args_3: import("../../src").RunEvalOptions) => void) | undefined;
692
+ showProgressBar?: boolean | undefined;
693
+ } | undefined;
694
+ commandLineOptions?: {
695
+ cache?: boolean | undefined;
696
+ config?: string[] | undefined;
697
+ maxConcurrency?: number | undefined;
698
+ output?: string[] | undefined;
699
+ write?: boolean | undefined;
700
+ vars?: string | undefined;
701
+ prompts?: string[] | undefined;
702
+ delay?: number | undefined;
703
+ description?: string | undefined;
704
+ providers?: string[] | undefined;
705
+ repeat?: number | undefined;
706
+ tests?: string | undefined;
707
+ assertions?: string | undefined;
708
+ modelOutputs?: string | undefined;
709
+ verbose?: boolean | undefined;
710
+ grader?: string | undefined;
711
+ tableCellMaxLength?: number | undefined;
712
+ table?: boolean | undefined;
713
+ share?: boolean | undefined;
714
+ progressBar?: boolean | undefined;
715
+ watch?: boolean | undefined;
716
+ filterFailing?: string | undefined;
717
+ filterFirstN?: number | undefined;
718
+ filterPattern?: string | undefined;
719
+ filterProviders?: string | undefined;
720
+ var?: Record<string, string> | undefined;
721
+ generateSuggestions?: boolean | undefined;
722
+ promptPrefix?: string | undefined;
723
+ promptSuffix?: string | undefined;
724
+ envPath?: string | undefined;
725
+ } | undefined;
726
+ }>;
727
+ prompts: {
728
+ provider: string;
729
+ raw: string;
730
+ label: string;
731
+ function?: ((args_0: {
732
+ vars: Record<string, any>;
733
+ provider?: import("../../src").ApiProvider | undefined;
734
+ }, ...args: unknown[]) => Promise<any>) | undefined;
735
+ id?: string | undefined;
736
+ config?: any;
737
+ display?: string | undefined;
738
+ metrics?: {
739
+ cost: number;
740
+ tokenUsage: {
741
+ cached?: number | undefined;
742
+ completion?: number | undefined;
743
+ prompt?: number | undefined;
744
+ total?: number | undefined;
745
+ };
746
+ score: number;
747
+ testPassCount: number;
748
+ testFailCount: number;
749
+ assertPassCount: number;
750
+ assertFailCount: number;
751
+ totalLatencyMs: number;
752
+ namedScores: Record<string, number>;
753
+ namedScoresCount: Record<string, number>;
754
+ redteam?: {
755
+ pluginPassCount: Record<string, number>;
756
+ pluginFailCount: Record<string, number>;
757
+ strategyPassCount: Record<string, number>;
758
+ strategyFailCount: Record<string, number>;
759
+ } | undefined;
760
+ } | undefined;
761
+ }[] | null;
762
+ description: string | null;
763
+ createdAt: number;
764
+ author: string | null;
765
+ results: object | import("../../src").EvaluateSummaryV2;
766
+ }>;
767
+ }
768
+ //# sourceMappingURL=evalFactory.d.ts.map