promptfoo 0.17.8 → 0.17.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/package.json +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +8 -0
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/types.d.ts +5 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +3 -2
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +65 -16
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/client/assets/{index-0c6f887d.js → index-8388d689.js} +1 -1
- package/dist/src/web/client/assets/{index-f9b230d1.css → index-d2b6a160.css} +1 -1
- package/dist/src/web/client/index.html +2 -2
- package/package.json +1 -1
- package/src/evaluator.ts +9 -0
- package/src/types.ts +7 -1
- package/src/util.ts +70 -15
- package/src/web/client/package-lock.json +5726 -0
- package/src/web/client/src/ResultsTable.css +11 -1
- package/src/web/client/src/ResultsTable.tsx +10 -0
- package/src/web/client/src/ResultsView.tsx +7 -1
- package/src/web/client/src/types.ts +4 -0
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="favicon.ico" />
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
7
7
|
<title>promptfoo web viewer</title>
|
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
|
9
|
-
<link rel="stylesheet" href="/assets/index-
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-8388d689.js"></script>
|
|
9
|
+
<link rel="stylesheet" href="/assets/index-d2b6a160.css">
|
|
10
10
|
</head>
|
|
11
11
|
<body>
|
|
12
12
|
<div id="root"></div>
|
package/package.json
CHANGED
package/src/evaluator.ts
CHANGED
|
@@ -111,13 +111,19 @@ class Evaluator {
|
|
|
111
111
|
vars,
|
|
112
112
|
};
|
|
113
113
|
|
|
114
|
+
let latencyMs = 0;
|
|
114
115
|
try {
|
|
116
|
+
const startTime = Date.now();
|
|
115
117
|
const response = await provider.callApi(renderedPrompt);
|
|
118
|
+
const endTime = Date.now();
|
|
119
|
+
latencyMs = endTime - startTime;
|
|
120
|
+
|
|
116
121
|
const ret: EvaluateResult = {
|
|
117
122
|
...setup,
|
|
118
123
|
response,
|
|
119
124
|
success: false,
|
|
120
125
|
score: 0,
|
|
126
|
+
latencyMs,
|
|
121
127
|
};
|
|
122
128
|
if (response.error) {
|
|
123
129
|
ret.error = response.error;
|
|
@@ -177,6 +183,7 @@ class Evaluator {
|
|
|
177
183
|
error: String(err) + '\n\n' + (err as Error).stack,
|
|
178
184
|
success: false,
|
|
179
185
|
score: 0,
|
|
186
|
+
latencyMs,
|
|
180
187
|
};
|
|
181
188
|
}
|
|
182
189
|
}
|
|
@@ -426,6 +433,8 @@ class Evaluator {
|
|
|
426
433
|
score: row.score,
|
|
427
434
|
text: resultText,
|
|
428
435
|
prompt: row.prompt.raw,
|
|
436
|
+
latencyMs: row.latencyMs,
|
|
437
|
+
tokenUsage: row.response?.tokenUsage,
|
|
429
438
|
};
|
|
430
439
|
},
|
|
431
440
|
);
|
package/src/types.ts
CHANGED
|
@@ -96,6 +96,7 @@ export interface EvaluateResult {
|
|
|
96
96
|
error?: string;
|
|
97
97
|
success: boolean;
|
|
98
98
|
score: number;
|
|
99
|
+
latencyMs: number;
|
|
99
100
|
}
|
|
100
101
|
|
|
101
102
|
export interface EvaluateTableOutput {
|
|
@@ -103,6 +104,8 @@ export interface EvaluateTableOutput {
|
|
|
103
104
|
score: number;
|
|
104
105
|
text: string;
|
|
105
106
|
prompt: string;
|
|
107
|
+
latencyMs: number;
|
|
108
|
+
tokenUsage?: Partial<TokenUsage>;
|
|
106
109
|
}
|
|
107
110
|
|
|
108
111
|
export interface EvaluateTable {
|
|
@@ -185,6 +188,9 @@ export interface TestCase {
|
|
|
185
188
|
// Key-value pairs to substitute in the prompt
|
|
186
189
|
vars?: Record<string, string | string[] | object>;
|
|
187
190
|
|
|
191
|
+
// Optional filepath or glob pattern to load vars from
|
|
192
|
+
loadVars?: string | string[];
|
|
193
|
+
|
|
188
194
|
// Optional list of automatic checks to run on the LLM output
|
|
189
195
|
assert?: Assertion[];
|
|
190
196
|
|
|
@@ -235,7 +241,7 @@ export interface TestSuiteConfig {
|
|
|
235
241
|
prompts: string | string[];
|
|
236
242
|
|
|
237
243
|
// Path to a test file, OR list of LLM prompt variations (aka "test case")
|
|
238
|
-
tests: string | TestCase[];
|
|
244
|
+
tests: string | string[] | TestCase[];
|
|
239
245
|
|
|
240
246
|
// Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
|
|
241
247
|
defaultTest?: Omit<TestCase, 'description'>;
|
package/src/util.ts
CHANGED
|
@@ -224,7 +224,31 @@ export async function fetchCsvFromGoogleSheet(url: string): Promise<string> {
|
|
|
224
224
|
return csvData;
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
-
export async function
|
|
227
|
+
export async function readVarsFiles(
|
|
228
|
+
pathOrGlobs: string | string[],
|
|
229
|
+
basePath: string = '',
|
|
230
|
+
): Promise<Record<string, string | string[] | object>> {
|
|
231
|
+
if (typeof pathOrGlobs === 'string') {
|
|
232
|
+
pathOrGlobs = [pathOrGlobs];
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const ret: Record<string, string | string[] | object> = {};
|
|
236
|
+
for (const pathOrGlob of pathOrGlobs) {
|
|
237
|
+
const resolvedPath = path.resolve(basePath, pathOrGlob);
|
|
238
|
+
const paths = globSync(resolvedPath);
|
|
239
|
+
|
|
240
|
+
for (const p of paths) {
|
|
241
|
+
const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
|
|
242
|
+
Object.assign(ret, yamlData);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return ret;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
export async function readTestsFile(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
|
|
250
|
+
// This function is confusingly named - it reads a CSV, JSON, or YAML file of
|
|
251
|
+
// TESTS or test equivalents.
|
|
228
252
|
const resolvedVarsPath = path.resolve(basePath, varsPath);
|
|
229
253
|
const fileExtension = parsePath(varsPath).ext.slice(1);
|
|
230
254
|
let rows: CsvRow[] = [];
|
|
@@ -246,25 +270,53 @@ export async function readVars(varsPath: string, basePath: string = ''): Promise
|
|
|
246
270
|
}
|
|
247
271
|
|
|
248
272
|
export async function readTests(
|
|
249
|
-
tests: string | TestCase[] | undefined,
|
|
273
|
+
tests: string | string[] | TestCase[] | undefined,
|
|
250
274
|
basePath: string = '',
|
|
251
275
|
): Promise<TestCase[]> {
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
276
|
+
const ret: TestCase[] = [];
|
|
277
|
+
|
|
278
|
+
const loadTestsFromGlob = async (loadTestsGlob: string) => {
|
|
279
|
+
const resolvedPath = path.resolve(basePath, loadTestsGlob);
|
|
280
|
+
const testFiles = globSync(resolvedPath);
|
|
281
|
+
for (const testFile of testFiles) {
|
|
282
|
+
const testFileContent = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
|
|
283
|
+
for (const testCase of testFileContent) {
|
|
284
|
+
if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
|
|
285
|
+
const testcaseBasePath = path.dirname(testFile);
|
|
286
|
+
testCase.vars = await readVarsFiles(testCase.vars, testcaseBasePath);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
ret.push(...testFileContent);
|
|
290
|
+
}
|
|
291
|
+
};
|
|
255
292
|
|
|
256
293
|
if (typeof tests === 'string') {
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
294
|
+
if (tests.endsWith('yaml') || tests.endsWith('yml')) {
|
|
295
|
+
// Load testcase config from yaml
|
|
296
|
+
await loadTestsFromGlob(tests);
|
|
297
|
+
} else {
|
|
298
|
+
// Legacy load CSV
|
|
299
|
+
const vars = await readTestsFile(tests, basePath);
|
|
300
|
+
return vars.map((row, idx) => {
|
|
301
|
+
const test = testCaseFromCsvRow(row);
|
|
302
|
+
test.description = `Row #${idx + 1}`;
|
|
303
|
+
return test;
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
} else if (Array.isArray(tests)) {
|
|
307
|
+
for (const maybeTestsGlob of tests) {
|
|
308
|
+
if (typeof maybeTestsGlob === 'string') {
|
|
309
|
+
// Assume it's a filepath
|
|
310
|
+
await loadTestsFromGlob(maybeTestsGlob);
|
|
311
|
+
} else {
|
|
312
|
+
// Assume it's a full test case
|
|
313
|
+
ret.push(maybeTestsGlob);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
264
316
|
}
|
|
265
317
|
|
|
266
318
|
// Some validation of the shape of tests
|
|
267
|
-
for (const test of
|
|
319
|
+
for (const test of ret) {
|
|
268
320
|
if (!test.assert && !test.vars) {
|
|
269
321
|
throw new Error(
|
|
270
322
|
`Test case must have either "assert" or "vars" property. Instead got ${JSON.stringify(
|
|
@@ -276,7 +328,7 @@ export async function readTests(
|
|
|
276
328
|
}
|
|
277
329
|
}
|
|
278
330
|
|
|
279
|
-
return
|
|
331
|
+
return ret;
|
|
280
332
|
}
|
|
281
333
|
|
|
282
334
|
export function writeOutput(
|
|
@@ -374,7 +426,10 @@ export function getLatestResultsPath(): string {
|
|
|
374
426
|
|
|
375
427
|
export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
|
|
376
428
|
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
377
|
-
|
|
429
|
+
|
|
430
|
+
// Replace hyphens with colons (Windows compatibility).
|
|
431
|
+
const timestamp = new Date().toISOString().replace(/:/g, '-');
|
|
432
|
+
|
|
378
433
|
const newResultsPath = path.join(resultsDirectory, `eval-${timestamp}.json`);
|
|
379
434
|
const latestResultsPath = getLatestResultsPath();
|
|
380
435
|
try {
|