promptfoo 0.17.8 → 0.17.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,8 +5,8 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-0c6f887d.js"></script>
9
- <link rel="stylesheet" href="/assets/index-f9b230d1.css">
8
+ <script type="module" crossorigin src="/assets/index-8388d689.js"></script>
9
+ <link rel="stylesheet" href="/assets/index-d2b6a160.css">
10
10
  </head>
11
11
  <body>
12
12
  <div id="root"></div>
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.17.8",
5
+ "version": "0.17.9",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/src/index.js",
package/src/evaluator.ts CHANGED
@@ -111,13 +111,19 @@ class Evaluator {
111
111
  vars,
112
112
  };
113
113
 
114
+ let latencyMs = 0;
114
115
  try {
116
+ const startTime = Date.now();
115
117
  const response = await provider.callApi(renderedPrompt);
118
+ const endTime = Date.now();
119
+ latencyMs = endTime - startTime;
120
+
116
121
  const ret: EvaluateResult = {
117
122
  ...setup,
118
123
  response,
119
124
  success: false,
120
125
  score: 0,
126
+ latencyMs,
121
127
  };
122
128
  if (response.error) {
123
129
  ret.error = response.error;
@@ -177,6 +183,7 @@ class Evaluator {
177
183
  error: String(err) + '\n\n' + (err as Error).stack,
178
184
  success: false,
179
185
  score: 0,
186
+ latencyMs,
180
187
  };
181
188
  }
182
189
  }
@@ -426,6 +433,8 @@ class Evaluator {
426
433
  score: row.score,
427
434
  text: resultText,
428
435
  prompt: row.prompt.raw,
436
+ latencyMs: row.latencyMs,
437
+ tokenUsage: row.response?.tokenUsage,
429
438
  };
430
439
  },
431
440
  );
package/src/types.ts CHANGED
@@ -96,6 +96,7 @@ export interface EvaluateResult {
96
96
  error?: string;
97
97
  success: boolean;
98
98
  score: number;
99
+ latencyMs: number;
99
100
  }
100
101
 
101
102
  export interface EvaluateTableOutput {
@@ -103,6 +104,8 @@ export interface EvaluateTableOutput {
103
104
  score: number;
104
105
  text: string;
105
106
  prompt: string;
107
+ latencyMs: number;
108
+ tokenUsage?: Partial<TokenUsage>;
106
109
  }
107
110
 
108
111
  export interface EvaluateTable {
@@ -185,6 +188,9 @@ export interface TestCase {
185
188
  // Key-value pairs to substitute in the prompt
186
189
  vars?: Record<string, string | string[] | object>;
187
190
 
191
+ // Optional filepath or glob pattern to load vars from
192
+ loadVars?: string | string[];
193
+
188
194
  // Optional list of automatic checks to run on the LLM output
189
195
  assert?: Assertion[];
190
196
 
@@ -235,7 +241,7 @@ export interface TestSuiteConfig {
235
241
  prompts: string | string[];
236
242
 
237
243
  // Path to a test file, OR list of LLM prompt variations (aka "test case")
238
- tests: string | TestCase[];
244
+ tests: string | string[] | TestCase[];
239
245
 
240
246
  // Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
241
247
  defaultTest?: Omit<TestCase, 'description'>;
package/src/util.ts CHANGED
@@ -224,7 +224,31 @@ export async function fetchCsvFromGoogleSheet(url: string): Promise<string> {
224
224
  return csvData;
225
225
  }
226
226
 
227
- export async function readVars(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
227
+ export async function readVarsFiles(
228
+ pathOrGlobs: string | string[],
229
+ basePath: string = '',
230
+ ): Promise<Record<string, string | string[] | object>> {
231
+ if (typeof pathOrGlobs === 'string') {
232
+ pathOrGlobs = [pathOrGlobs];
233
+ }
234
+
235
+ const ret: Record<string, string | string[] | object> = {};
236
+ for (const pathOrGlob of pathOrGlobs) {
237
+ const resolvedPath = path.resolve(basePath, pathOrGlob);
238
+ const paths = globSync(resolvedPath);
239
+
240
+ for (const p of paths) {
241
+ const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
242
+ Object.assign(ret, yamlData);
243
+ }
244
+ }
245
+
246
+ return ret;
247
+ }
248
+
249
+ export async function readTestsFile(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
250
+ // This function is confusingly named - it reads a CSV, JSON, or YAML file of
251
+ // TESTS or test equivalents.
228
252
  const resolvedVarsPath = path.resolve(basePath, varsPath);
229
253
  const fileExtension = parsePath(varsPath).ext.slice(1);
230
254
  let rows: CsvRow[] = [];
@@ -246,25 +270,53 @@ export async function readVars(varsPath: string, basePath: string = ''): Promise
246
270
  }
247
271
 
248
272
  export async function readTests(
249
- tests: string | TestCase[] | undefined,
273
+ tests: string | string[] | TestCase[] | undefined,
250
274
  basePath: string = '',
251
275
  ): Promise<TestCase[]> {
252
- if (!tests) {
253
- return [];
254
- }
276
+ const ret: TestCase[] = [];
277
+
278
+ const loadTestsFromGlob = async (loadTestsGlob: string) => {
279
+ const resolvedPath = path.resolve(basePath, loadTestsGlob);
280
+ const testFiles = globSync(resolvedPath);
281
+ for (const testFile of testFiles) {
282
+ const testFileContent = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
283
+ for (const testCase of testFileContent) {
284
+ if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
285
+ const testcaseBasePath = path.dirname(testFile);
286
+ testCase.vars = await readVarsFiles(testCase.vars, testcaseBasePath);
287
+ }
288
+ }
289
+ ret.push(...testFileContent);
290
+ }
291
+ };
255
292
 
256
293
  if (typeof tests === 'string') {
257
- // It's a filepath, load from CSV
258
- const vars = await readVars(tests, basePath);
259
- return vars.map((row, idx) => {
260
- const test = testCaseFromCsvRow(row);
261
- test.description = `Row #${idx + 1}`;
262
- return test;
263
- });
294
+ if (tests.endsWith('yaml') || tests.endsWith('yml')) {
295
+ // Load testcase config from yaml
296
+ await loadTestsFromGlob(tests);
297
+ } else {
298
+ // Legacy load CSV
299
+ const vars = await readTestsFile(tests, basePath);
300
+ return vars.map((row, idx) => {
301
+ const test = testCaseFromCsvRow(row);
302
+ test.description = `Row #${idx + 1}`;
303
+ return test;
304
+ });
305
+ }
306
+ } else if (Array.isArray(tests)) {
307
+ for (const maybeTestsGlob of tests) {
308
+ if (typeof maybeTestsGlob === 'string') {
309
+ // Assume it's a filepath
310
+ await loadTestsFromGlob(maybeTestsGlob);
311
+ } else {
312
+ // Assume it's a full test case
313
+ ret.push(maybeTestsGlob);
314
+ }
315
+ }
264
316
  }
265
317
 
266
318
  // Some validation of the shape of tests
267
- for (const test of tests) {
319
+ for (const test of ret) {
268
320
  if (!test.assert && !test.vars) {
269
321
  throw new Error(
270
322
  `Test case must have either "assert" or "vars" property. Instead got ${JSON.stringify(
@@ -276,7 +328,7 @@ export async function readTests(
276
328
  }
277
329
  }
278
330
 
279
- return tests;
331
+ return ret;
280
332
  }
281
333
 
282
334
  export function writeOutput(
@@ -374,7 +426,10 @@ export function getLatestResultsPath(): string {
374
426
 
375
427
  export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
376
428
  const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
377
- const timestamp = new Date().toISOString();
429
+
430
+ // Replace hyphens with colons (Windows compatibility).
431
+ const timestamp = new Date().toISOString().replace(/:/g, '-');
432
+
378
433
  const newResultsPath = path.join(resultsDirectory, `eval-${timestamp}.json`);
379
434
  const latestResultsPath = getLatestResultsPath();
380
435
  try {