promptfoo 0.17.7 → 0.17.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/package.json +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +31 -6
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/main.js +2 -0
- package/dist/src/main.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +4 -0
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +15 -0
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/openai.d.ts +4 -0
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +21 -2
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/replicate.js +2 -1
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/shared.d.ts.map +1 -1
- package/dist/src/providers/shared.js.map +1 -1
- package/dist/src/types.d.ts +9 -2
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +10 -3
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +125 -40
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/client/assets/{index-13198388.js → index-8388d689.js} +25 -25
- package/dist/src/web/client/assets/{index-f9b230d1.css → index-d2b6a160.css} +1 -1
- package/dist/src/web/client/index.html +2 -2
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +26 -3
- package/dist/src/web/server.js.map +1 -1
- package/package.json +1 -1
- package/src/evaluator.ts +37 -6
- package/src/main.ts +3 -0
- package/src/providers/azureopenai.ts +24 -0
- package/src/providers/openai.ts +32 -3
- package/src/providers/replicate.ts +7 -3
- package/src/providers/shared.ts +3 -1
- package/src/types.ts +12 -2
- package/src/util.ts +140 -42
- package/src/web/client/src/App.tsx +24 -1
- package/src/web/client/src/ResultsTable.css +11 -1
- package/src/web/client/src/ResultsTable.tsx +10 -0
- package/src/web/client/src/ResultsView.tsx +48 -3
- package/src/web/client/src/types.ts +4 -0
- package/src/web/server.ts +33 -10
package/src/util.ts
CHANGED
|
@@ -29,7 +29,10 @@ import type {
|
|
|
29
29
|
TestSuite,
|
|
30
30
|
} from './types';
|
|
31
31
|
|
|
32
|
-
export function readProviderPromptMap(
|
|
32
|
+
export function readProviderPromptMap(
|
|
33
|
+
config: Partial<UnifiedConfig>,
|
|
34
|
+
parsedPrompts: Prompt[],
|
|
35
|
+
): TestSuite['providerPromptMap'] {
|
|
33
36
|
const ret: Record<string, string[]> = {};
|
|
34
37
|
|
|
35
38
|
if (!config.providers) {
|
|
@@ -221,7 +224,31 @@ export async function fetchCsvFromGoogleSheet(url: string): Promise<string> {
|
|
|
221
224
|
return csvData;
|
|
222
225
|
}
|
|
223
226
|
|
|
224
|
-
export async function
|
|
227
|
+
export async function readVarsFiles(
|
|
228
|
+
pathOrGlobs: string | string[],
|
|
229
|
+
basePath: string = '',
|
|
230
|
+
): Promise<Record<string, string | string[] | object>> {
|
|
231
|
+
if (typeof pathOrGlobs === 'string') {
|
|
232
|
+
pathOrGlobs = [pathOrGlobs];
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const ret: Record<string, string | string[] | object> = {};
|
|
236
|
+
for (const pathOrGlob of pathOrGlobs) {
|
|
237
|
+
const resolvedPath = path.resolve(basePath, pathOrGlob);
|
|
238
|
+
const paths = globSync(resolvedPath);
|
|
239
|
+
|
|
240
|
+
for (const p of paths) {
|
|
241
|
+
const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
|
|
242
|
+
Object.assign(ret, yamlData);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return ret;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
export async function readTestsFile(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
|
|
250
|
+
// This function is confusingly named - it reads a CSV, JSON, or YAML file of
|
|
251
|
+
// TESTS or test equivalents.
|
|
225
252
|
const resolvedVarsPath = path.resolve(basePath, varsPath);
|
|
226
253
|
const fileExtension = parsePath(varsPath).ext.slice(1);
|
|
227
254
|
let rows: CsvRow[] = [];
|
|
@@ -243,25 +270,53 @@ export async function readVars(varsPath: string, basePath: string = ''): Promise
|
|
|
243
270
|
}
|
|
244
271
|
|
|
245
272
|
export async function readTests(
|
|
246
|
-
tests: string | TestCase[] | undefined,
|
|
273
|
+
tests: string | string[] | TestCase[] | undefined,
|
|
247
274
|
basePath: string = '',
|
|
248
275
|
): Promise<TestCase[]> {
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
276
|
+
const ret: TestCase[] = [];
|
|
277
|
+
|
|
278
|
+
const loadTestsFromGlob = async (loadTestsGlob: string) => {
|
|
279
|
+
const resolvedPath = path.resolve(basePath, loadTestsGlob);
|
|
280
|
+
const testFiles = globSync(resolvedPath);
|
|
281
|
+
for (const testFile of testFiles) {
|
|
282
|
+
const testFileContent = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
|
|
283
|
+
for (const testCase of testFileContent) {
|
|
284
|
+
if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
|
|
285
|
+
const testcaseBasePath = path.dirname(testFile);
|
|
286
|
+
testCase.vars = await readVarsFiles(testCase.vars, testcaseBasePath);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
ret.push(...testFileContent);
|
|
290
|
+
}
|
|
291
|
+
};
|
|
252
292
|
|
|
253
293
|
if (typeof tests === 'string') {
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
294
|
+
if (tests.endsWith('yaml') || tests.endsWith('yml')) {
|
|
295
|
+
// Load testcase config from yaml
|
|
296
|
+
await loadTestsFromGlob(tests);
|
|
297
|
+
} else {
|
|
298
|
+
// Legacy load CSV
|
|
299
|
+
const vars = await readTestsFile(tests, basePath);
|
|
300
|
+
return vars.map((row, idx) => {
|
|
301
|
+
const test = testCaseFromCsvRow(row);
|
|
302
|
+
test.description = `Row #${idx + 1}`;
|
|
303
|
+
return test;
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
} else if (Array.isArray(tests)) {
|
|
307
|
+
for (const maybeTestsGlob of tests) {
|
|
308
|
+
if (typeof maybeTestsGlob === 'string') {
|
|
309
|
+
// Assume it's a filepath
|
|
310
|
+
await loadTestsFromGlob(maybeTestsGlob);
|
|
311
|
+
} else {
|
|
312
|
+
// Assume it's a full test case
|
|
313
|
+
ret.push(maybeTestsGlob);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
261
316
|
}
|
|
262
317
|
|
|
263
318
|
// Some validation of the shape of tests
|
|
264
|
-
for (const test of
|
|
319
|
+
for (const test of ret) {
|
|
265
320
|
if (!test.assert && !test.vars) {
|
|
266
321
|
throw new Error(
|
|
267
322
|
`Test case must have either "assert" or "vars" property. Instead got ${JSON.stringify(
|
|
@@ -273,7 +328,7 @@ export async function readTests(
|
|
|
273
328
|
}
|
|
274
329
|
}
|
|
275
330
|
|
|
276
|
-
return
|
|
331
|
+
return ret;
|
|
277
332
|
}
|
|
278
333
|
|
|
279
334
|
export function writeOutput(
|
|
@@ -313,28 +368,31 @@ export function writeOutput(
|
|
|
313
368
|
}
|
|
314
369
|
}
|
|
315
370
|
|
|
316
|
-
export
|
|
371
|
+
export function fetchWithTimeout(
|
|
317
372
|
url: RequestInfo,
|
|
318
373
|
options: RequestInit = {},
|
|
319
374
|
timeout: number,
|
|
320
375
|
): Promise<Response> {
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
376
|
+
return new Promise((resolve, reject) => {
|
|
377
|
+
const controller = new AbortController();
|
|
378
|
+
const { signal } = controller;
|
|
379
|
+
options.signal = signal;
|
|
380
|
+
|
|
381
|
+
const timeoutId = setTimeout(() => {
|
|
382
|
+
controller.abort();
|
|
383
|
+
reject(new Error(`Request timed out after ${timeout} ms`));
|
|
384
|
+
}, timeout);
|
|
385
|
+
|
|
386
|
+
fetch(url, options)
|
|
387
|
+
.then((response) => {
|
|
388
|
+
clearTimeout(timeoutId);
|
|
389
|
+
resolve(response);
|
|
390
|
+
})
|
|
391
|
+
.catch((error) => {
|
|
392
|
+
clearTimeout(timeoutId);
|
|
393
|
+
reject(error);
|
|
394
|
+
});
|
|
395
|
+
});
|
|
338
396
|
}
|
|
339
397
|
|
|
340
398
|
export async function fetchWithRetries(
|
|
@@ -356,6 +414,8 @@ export async function fetchWithRetries(
|
|
|
356
414
|
throw new Error(`Request failed after ${retries} retries: ${(lastError as Error).message}`);
|
|
357
415
|
}
|
|
358
416
|
|
|
417
|
+
const RESULT_HISTORY_LENGTH = 50;
|
|
418
|
+
|
|
359
419
|
export function getConfigDirectoryPath(): string {
|
|
360
420
|
return path.join(os.homedir(), '.promptfoo');
|
|
361
421
|
}
|
|
@@ -365,11 +425,17 @@ export function getLatestResultsPath(): string {
|
|
|
365
425
|
}
|
|
366
426
|
|
|
367
427
|
export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
|
|
428
|
+
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
429
|
+
|
|
430
|
+
// Replace hyphens with colons (Windows compatibility).
|
|
431
|
+
const timestamp = new Date().toISOString().replace(/:/g, '-');
|
|
432
|
+
|
|
433
|
+
const newResultsPath = path.join(resultsDirectory, `eval-${timestamp}.json`);
|
|
368
434
|
const latestResultsPath = getLatestResultsPath();
|
|
369
435
|
try {
|
|
370
|
-
fs.mkdirSync(
|
|
436
|
+
fs.mkdirSync(resultsDirectory, { recursive: true });
|
|
371
437
|
fs.writeFileSync(
|
|
372
|
-
|
|
438
|
+
newResultsPath,
|
|
373
439
|
JSON.stringify(
|
|
374
440
|
{
|
|
375
441
|
version: 1,
|
|
@@ -380,8 +446,45 @@ export function writeLatestResults(results: EvaluateSummary, config: Partial<Uni
|
|
|
380
446
|
2,
|
|
381
447
|
),
|
|
382
448
|
);
|
|
449
|
+
if (fs.existsSync(latestResultsPath)) {
|
|
450
|
+
fs.unlinkSync(latestResultsPath);
|
|
451
|
+
}
|
|
452
|
+
fs.symlinkSync(newResultsPath, latestResultsPath);
|
|
453
|
+
cleanupOldResults();
|
|
383
454
|
} catch (err) {
|
|
384
|
-
logger.error(`Failed to write latest results to ${
|
|
455
|
+
logger.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
export function listPreviousResults(): string[] {
|
|
460
|
+
const directory = path.join(getConfigDirectoryPath(), 'output');
|
|
461
|
+
const files = fs.readdirSync(directory);
|
|
462
|
+
const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
|
|
463
|
+
const sortedFiles = resultsFiles.sort((a, b) => {
|
|
464
|
+
const statA = fs.statSync(path.join(directory, a));
|
|
465
|
+
const statB = fs.statSync(path.join(directory, b));
|
|
466
|
+
return statB.birthtime.getTime() - statA.birthtime.getTime(); // sort in descending order
|
|
467
|
+
});
|
|
468
|
+
return sortedFiles;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
export function cleanupOldResults(remaining = RESULT_HISTORY_LENGTH) {
|
|
472
|
+
const sortedFiles = listPreviousResults();
|
|
473
|
+
for (let i = 0; i < sortedFiles.length - remaining; i++) {
|
|
474
|
+
fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFiles[i]));
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
export function readResult(
|
|
479
|
+
name: string,
|
|
480
|
+
): { results: EvaluateSummary; config: Partial<UnifiedConfig> } | undefined {
|
|
481
|
+
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
482
|
+
const resultsPath = path.join(resultsDirectory, name);
|
|
483
|
+
try {
|
|
484
|
+
const results = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
|
|
485
|
+
return results;
|
|
486
|
+
} catch (err) {
|
|
487
|
+
logger.error(`Failed to read results from ${resultsPath}:\n${err}`);
|
|
385
488
|
}
|
|
386
489
|
}
|
|
387
490
|
|
|
@@ -389,12 +492,7 @@ export function readLatestResults():
|
|
|
389
492
|
| { results: EvaluateSummary; config: Partial<UnifiedConfig> }
|
|
390
493
|
| undefined {
|
|
391
494
|
const latestResultsPath = getLatestResultsPath();
|
|
392
|
-
|
|
393
|
-
const latestResults = JSON.parse(fs.readFileSync(latestResultsPath, 'utf-8'));
|
|
394
|
-
return latestResults;
|
|
395
|
-
} catch (err) {
|
|
396
|
-
logger.error(`Failed to read latest results from ${latestResultsPath}:\n${err}`);
|
|
397
|
-
}
|
|
495
|
+
return readResult(latestResultsPath);
|
|
398
496
|
}
|
|
399
497
|
|
|
400
498
|
export function cosineSimilarity(vecA: number[], vecB: number[]) {
|
|
@@ -14,6 +14,7 @@ function App() {
|
|
|
14
14
|
const { table, setTable, setConfig } = useStore();
|
|
15
15
|
const [loaded, setLoaded] = React.useState<boolean>(false);
|
|
16
16
|
const loadedFromApi = React.useRef(false);
|
|
17
|
+
const [recentFiles, setRecentFiles] = React.useState<string[]>([]);
|
|
17
18
|
|
|
18
19
|
const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
|
|
19
20
|
const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
|
|
@@ -43,6 +44,22 @@ function App() {
|
|
|
43
44
|
}
|
|
44
45
|
}, [prefersDarkMode]);
|
|
45
46
|
|
|
47
|
+
const fetchRecentFiles = async () => {
|
|
48
|
+
if (!window.location.href.includes('localhost')) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
const resp = await fetch(`http://localhost:15500/results`);
|
|
52
|
+
const body = await resp.json();
|
|
53
|
+
setRecentFiles(body.data);
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
const handleRecentFileSelection = async (file: string) => {
|
|
57
|
+
const resp = await fetch(`http://localhost:15500/results/${file}`);
|
|
58
|
+
const body = await resp.json();
|
|
59
|
+
setTable(body.data.results.table);
|
|
60
|
+
setConfig(body.data.config);
|
|
61
|
+
};
|
|
62
|
+
|
|
46
63
|
React.useEffect(() => {
|
|
47
64
|
const fetchEvalData = async (id: string) => {
|
|
48
65
|
if (loadedFromApi.current) {
|
|
@@ -72,12 +89,14 @@ function App() {
|
|
|
72
89
|
setLoaded(true);
|
|
73
90
|
setTable(data.results.table);
|
|
74
91
|
setConfig(data.config);
|
|
92
|
+
fetchRecentFiles();
|
|
75
93
|
});
|
|
76
94
|
|
|
77
95
|
socket.on('update', (data) => {
|
|
78
96
|
console.log('Received data update', data);
|
|
79
97
|
setTable(data.results.table);
|
|
80
98
|
setConfig(data.config);
|
|
99
|
+
fetchRecentFiles();
|
|
81
100
|
});
|
|
82
101
|
}
|
|
83
102
|
|
|
@@ -89,7 +108,11 @@ function App() {
|
|
|
89
108
|
return (
|
|
90
109
|
<ThemeProvider theme={theme}>
|
|
91
110
|
<NavBar darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
|
|
92
|
-
{loaded && table ?
|
|
111
|
+
{loaded && table ? (
|
|
112
|
+
<ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
|
|
113
|
+
) : (
|
|
114
|
+
<div>Loading...</div>
|
|
115
|
+
)}
|
|
93
116
|
</ThemeProvider>
|
|
94
117
|
);
|
|
95
118
|
}
|
|
@@ -83,7 +83,17 @@ tr .cell-actions {
|
|
|
83
83
|
font-size: 1.75rem;
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
-
tr
|
|
86
|
+
tr .cell-detail {
|
|
87
|
+
visibility: hidden;
|
|
88
|
+
position: absolute;
|
|
89
|
+
bottom: 0.25rem;
|
|
90
|
+
margin-top: 1rem;
|
|
91
|
+
font-size: 0.75rem;
|
|
92
|
+
color: #888;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
tr:hover .cell-actions,
|
|
96
|
+
tr:hover .cell-detail {
|
|
87
97
|
visibility: visible;
|
|
88
98
|
}
|
|
89
99
|
|
|
@@ -135,6 +135,16 @@ function EvalOutputCell({
|
|
|
135
135
|
)}{' '}
|
|
136
136
|
<TruncatedText text={text} maxLength={maxTextLength} />
|
|
137
137
|
</div>
|
|
138
|
+
<div className="cell-detail">
|
|
139
|
+
{output.tokenUsage?.cached ? (
|
|
140
|
+
<span>{output.tokenUsage.cached} tokens (cached)</span>
|
|
141
|
+
) : (
|
|
142
|
+
<>
|
|
143
|
+
{output.tokenUsage?.total && <span>{output.tokenUsage.total} tokens</span>} |{' '}
|
|
144
|
+
<span>{output.latencyMs} ms</span>
|
|
145
|
+
</>
|
|
146
|
+
)}
|
|
147
|
+
</div>
|
|
138
148
|
<div className="cell-actions">
|
|
139
149
|
{output.prompt && (
|
|
140
150
|
<>
|
|
@@ -37,7 +37,32 @@ const ResponsiveStack = styled(Stack)(({ theme }) => ({
|
|
|
37
37
|
},
|
|
38
38
|
}));
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
function filenameToDate(filename: string) {
|
|
41
|
+
const dateString = filename.slice('eval-'.length, filename.length - '.json'.length);
|
|
42
|
+
|
|
43
|
+
// Replace hyphens with colons where necessary (Windows compatibility).
|
|
44
|
+
const dateParts = dateString.split('T');
|
|
45
|
+
const timePart = dateParts[1].replace(/-/g, ':');
|
|
46
|
+
const formattedDateString = `${dateParts[0]}T${timePart}`;
|
|
47
|
+
|
|
48
|
+
const date = new Date(formattedDateString);
|
|
49
|
+
return date.toLocaleDateString('en-US', {
|
|
50
|
+
year: 'numeric',
|
|
51
|
+
month: 'long',
|
|
52
|
+
day: 'numeric',
|
|
53
|
+
hour: '2-digit',
|
|
54
|
+
minute: '2-digit',
|
|
55
|
+
second: '2-digit',
|
|
56
|
+
timeZoneName: 'short',
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
interface ResultsViewProps {
|
|
61
|
+
recentFiles: string[];
|
|
62
|
+
onRecentFileSelected: (file: string) => void;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export default function ResultsView({ recentFiles, onRecentFileSelected }: ResultsViewProps) {
|
|
41
66
|
const { table, config } = useStore();
|
|
42
67
|
const [maxTextLength, setMaxTextLength] = React.useState(250);
|
|
43
68
|
const [columnVisibility, setColumnVisibility] = React.useState<VisibilityState>({});
|
|
@@ -148,10 +173,30 @@ export default function ResultsView() {
|
|
|
148
173
|
return (
|
|
149
174
|
<div>
|
|
150
175
|
<Paper py="md">
|
|
151
|
-
<ResponsiveStack direction="row" spacing={
|
|
176
|
+
<ResponsiveStack direction="row" spacing={4} alignItems="center">
|
|
177
|
+
<Box>
|
|
178
|
+
{recentFiles && recentFiles.length > 0 && (
|
|
179
|
+
<FormControl sx={{ m: 1, minWidth: 200 }} size="small">
|
|
180
|
+
<InputLabel>View run</InputLabel>
|
|
181
|
+
<Select
|
|
182
|
+
key={recentFiles.join(',')}
|
|
183
|
+
className="recent-files"
|
|
184
|
+
label="Previous runs"
|
|
185
|
+
defaultValue={recentFiles[0]}
|
|
186
|
+
onChange={(e: SelectChangeEvent) => onRecentFileSelected(e.target.value)}
|
|
187
|
+
>
|
|
188
|
+
{recentFiles.map((file) => (
|
|
189
|
+
<MenuItem key={file} value={file}>
|
|
190
|
+
{filenameToDate(file)}
|
|
191
|
+
</MenuItem>
|
|
192
|
+
))}
|
|
193
|
+
</Select>
|
|
194
|
+
</FormControl>
|
|
195
|
+
)}
|
|
196
|
+
</Box>
|
|
152
197
|
<Box>
|
|
153
198
|
<FormControl sx={{ m: 1, minWidth: 200 }} size="small">
|
|
154
|
-
<InputLabel id="visible-columns-label">
|
|
199
|
+
<InputLabel id="visible-columns-label">Show columns</InputLabel>
|
|
155
200
|
<Select
|
|
156
201
|
labelId="visible-columns-label"
|
|
157
202
|
id="visible-columns"
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { TokenUsage } from '../../../types';
|
|
2
|
+
|
|
1
3
|
type Prompt = {
|
|
2
4
|
display: string;
|
|
3
5
|
raw: string;
|
|
@@ -13,6 +15,8 @@ export type EvalRowOutput = {
|
|
|
13
15
|
score: number;
|
|
14
16
|
text: string | object;
|
|
15
17
|
prompt: string;
|
|
18
|
+
latencyMs: number;
|
|
19
|
+
tokenUsage?: Partial<TokenUsage>;
|
|
16
20
|
};
|
|
17
21
|
|
|
18
22
|
export type EvalRow = {
|
package/src/web/server.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
1
|
+
import fs, { Stats } from 'fs';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import readline from 'node:readline';
|
|
4
4
|
import http from 'node:http';
|
|
@@ -11,7 +11,7 @@ import { Server as SocketIOServer } from 'socket.io';
|
|
|
11
11
|
|
|
12
12
|
import logger from '../logger';
|
|
13
13
|
import { getDirectory } from '../esm';
|
|
14
|
-
import { getLatestResultsPath } from '../util';
|
|
14
|
+
import { getLatestResultsPath, listPreviousResults, readResult } from '../util';
|
|
15
15
|
|
|
16
16
|
export function init(port = 15500) {
|
|
17
17
|
const app = express();
|
|
@@ -40,14 +40,37 @@ export function init(port = 15500) {
|
|
|
40
40
|
socket.emit('init', readLatestJson());
|
|
41
41
|
|
|
42
42
|
// Watch for changes to latest.json and emit the update event
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
43
|
+
const watcher = debounce((curr: Stats, prev: Stats) => {
|
|
44
|
+
if (curr.mtime !== prev.mtime) {
|
|
45
|
+
socket.emit('update', readLatestJson());
|
|
46
|
+
}
|
|
47
|
+
}, 250);
|
|
48
|
+
fs.watchFile(latestJsonPath, watcher);
|
|
49
|
+
|
|
50
|
+
// Stop watching the file when the socket connection is closed
|
|
51
|
+
socket.on('disconnect', () => {
|
|
52
|
+
fs.unwatchFile(latestJsonPath, watcher);
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
app.get('/results', (req, res) => {
|
|
57
|
+
const previousResults = listPreviousResults();
|
|
58
|
+
res.json({ data: previousResults });
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
app.get('/results/:filename', (req, res) => {
|
|
62
|
+
const filename = req.params.filename;
|
|
63
|
+
const safeFilename = path.basename(filename);
|
|
64
|
+
if (safeFilename !== filename || !listPreviousResults().includes(safeFilename)) {
|
|
65
|
+
res.status(400).send('Invalid filename');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const result = readResult(safeFilename);
|
|
69
|
+
if (!result) {
|
|
70
|
+
res.status(404).send('Result not found');
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
res.json({ data: result });
|
|
51
74
|
});
|
|
52
75
|
|
|
53
76
|
httpServer.listen(port, () => {
|