promptfoo 0.47.0 → 0.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/drizzle/0000_lush_hellion.sql +36 -0
- package/dist/drizzle/0001_wide_calypso.sql +3 -0
- package/dist/drizzle/meta/0000_snapshot.json +244 -0
- package/dist/drizzle/meta/0001_snapshot.json +237 -0
- package/dist/drizzle/meta/_journal.json +20 -0
- package/dist/package.json +10 -3
- package/dist/src/__mocks__/database.d.ts +5 -0
- package/dist/src/__mocks__/database.d.ts.map +1 -0
- package/dist/src/__mocks__/database.js +27 -0
- package/dist/src/__mocks__/database.js.map +1 -0
- package/dist/src/commands/list.d.ts.map +1 -1
- package/dist/src/commands/list.js +4 -5
- package/dist/src/commands/list.js.map +1 -1
- package/dist/src/commands/show.d.ts +1 -1
- package/dist/src/commands/show.d.ts.map +1 -1
- package/dist/src/commands/show.js +7 -7
- package/dist/src/commands/show.js.map +1 -1
- package/dist/src/csv.d.ts +1 -1
- package/dist/src/csv.d.ts.map +1 -1
- package/dist/src/csv.js +5 -0
- package/dist/src/csv.js.map +1 -1
- package/dist/src/database.d.ts +238 -0
- package/dist/src/database.d.ts.map +1 -0
- package/dist/src/database.js +141 -0
- package/dist/src/database.js.map +1 -0
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +8 -6
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +2 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/main.js +6 -4
- package/dist/src/main.js.map +1 -1
- package/dist/src/migrate.d.ts +5 -0
- package/dist/src/migrate.d.ts.map +1 -0
- package/dist/src/migrate.js +50 -0
- package/dist/src/migrate.js.map +1 -0
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +3 -0
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +2 -0
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +4 -2
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +2 -1
- package/dist/src/providers.js.map +1 -1
- package/dist/src/types.d.ts +5 -6
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +46 -19
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +363 -121
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/chunks/952-ede6b209625d42a2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-ad55f89d622ef8e7.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-01ab4878803b7068.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-9c163111247d8da5.js +1 -0
- package/dist/src/web/nextui/api/results +1 -1
- package/dist/src/web/nextui/auth/login/index.html +1 -1
- package/dist/src/web/nextui/auth/login/index.txt +1 -1
- package/dist/src/web/nextui/auth/signup/index.html +1 -1
- package/dist/src/web/nextui/auth/signup/index.txt +1 -1
- package/dist/src/web/nextui/datasets/index.html +1 -1
- package/dist/src/web/nextui/datasets/index.txt +2 -2
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +2 -2
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +1 -1
- package/dist/src/web/nextui/prompts/index.html +1 -1
- package/dist/src/web/nextui/prompts/index.txt +2 -2
- package/dist/src/web/nextui/setup/index.html +1 -1
- package/dist/src/web/nextui/setup/index.txt +2 -2
- package/dist/src/web/server.d.ts +1 -1
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +25 -43
- package/dist/src/web/server.js.map +1 -1
- package/package.json +10 -3
- package/dist/src/web/nextui/_next/static/chunks/952-1367984f076e3060.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-44ab188f3b846712.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-0bf3409d6a6bfa22.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-83c7e62787113081.js +0 -1
- /package/dist/src/web/nextui/_next/static/{kWF8sUISiIgB0hKr0muJH → 8yxA5JzS0wXTxJptFRKTo}/_buildManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/{kWF8sUISiIgB0hKr0muJH → 8yxA5JzS0wXTxJptFRKTo}/_ssgManifest.js +0 -0
package/dist/src/util.js
CHANGED
|
@@ -26,7 +26,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
26
26
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
27
27
|
};
|
|
28
28
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
-
exports.transformOutput = exports.printBorder = exports.getNunjucksEngine = exports.readFilters = exports.getEvalsWithPredicate = exports.getEvalFromHash = exports.getEvals = exports.getDatasetFromHash = exports.getPromptFromHash = exports.getTestCasesWithPredicate = exports.getTestCases = exports.getPromptsWithPredicate = exports.getPrompts = exports.sha256 = exports.getPromptsForTestCasesHash = exports.getPromptsForTestCases = exports.readLatestResults = exports.updateResult = exports.readResult = exports.dateToFilename = exports.filenameToDate = exports.
|
|
29
|
+
exports.transformOutput = exports.printBorder = exports.getNunjucksEngine = exports.readFilters = exports.getEvalsWithPredicate = exports.getEvalFromHash = exports.getEvals = exports.getDatasetFromHash = exports.getPromptFromHash = exports.getTestCasesWithPredicate = exports.getTestCases = exports.getPromptsWithPredicate = exports.getPrompts = exports.sha256 = exports.getPromptsForTestCasesHash = exports.getPromptsForTestCases = exports.readLatestResults = exports.updateResult = exports.readResult_fileSystem = exports.readResult = exports.dateToFilename = exports.filenameToDate = exports.cleanupOldFileResults = exports.migrateResultsFromFileSystemToDatabase = exports.listPreviousResults_fileSystem = exports.listPreviousResultFilenames_fileSystem = exports.listPreviousResults = exports.writeResultsToDatabase = exports.getLatestResultsPath = exports.setConfigDirectoryPath = exports.getConfigDirectoryPath = exports.writeOutput = exports.writeMultipleOutputs = exports.readConfigs = exports.readConfig = exports.dereferenceConfig = exports.maybeReadConfig = exports.maybeRecordFirstRun = exports.readGlobalConfig = exports.resetGlobalConfig = void 0;
|
|
30
30
|
const fs = __importStar(require("fs"));
|
|
31
31
|
const path = __importStar(require("path"));
|
|
32
32
|
const os = __importStar(require("os"));
|
|
@@ -37,9 +37,12 @@ const nunjucks_1 = __importDefault(require("nunjucks"));
|
|
|
37
37
|
const js_yaml_1 = __importDefault(require("js-yaml"));
|
|
38
38
|
const sync_1 = require("csv-stringify/sync");
|
|
39
39
|
const glob_1 = require("glob");
|
|
40
|
+
const drizzle_orm_1 = require("drizzle-orm");
|
|
40
41
|
const logger_1 = __importDefault(require("./logger"));
|
|
41
42
|
const esm_1 = require("./esm");
|
|
42
43
|
const testCases_1 = require("./testCases");
|
|
44
|
+
const database_1 = require("./database");
|
|
45
|
+
const migrate_1 = require("./migrate");
|
|
43
46
|
let globalConfigCache = null;
|
|
44
47
|
function resetGlobalConfig() {
|
|
45
48
|
globalConfigCache = null;
|
|
@@ -356,44 +359,114 @@ function setConfigDirectoryPath(newPath) {
|
|
|
356
359
|
configDirectoryPath = newPath;
|
|
357
360
|
}
|
|
358
361
|
exports.setConfigDirectoryPath = setConfigDirectoryPath;
|
|
362
|
+
/**
|
|
363
|
+
* TODO(ian): Remove this
|
|
364
|
+
* @deprecated Use readLatestResults directly instead.
|
|
365
|
+
*/
|
|
359
366
|
function getLatestResultsPath() {
|
|
360
367
|
return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
|
|
361
368
|
}
|
|
362
369
|
exports.getLatestResultsPath = getLatestResultsPath;
|
|
363
|
-
function
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
const
|
|
367
|
-
const
|
|
368
|
-
|
|
370
|
+
async function writeResultsToDatabase(results, config, createdAt) {
|
|
371
|
+
createdAt = createdAt || new Date();
|
|
372
|
+
const evalId = `eval-${createdAt.toISOString().slice(0, 19)}`;
|
|
373
|
+
const db = (0, database_1.getDb)();
|
|
374
|
+
const promises = [];
|
|
375
|
+
promises.push(db
|
|
376
|
+
.insert(database_1.evals)
|
|
377
|
+
.values({
|
|
378
|
+
id: evalId,
|
|
379
|
+
createdAt: createdAt.getTime(),
|
|
380
|
+
description: config.description,
|
|
381
|
+
config,
|
|
382
|
+
results,
|
|
383
|
+
})
|
|
384
|
+
.onConflictDoNothing()
|
|
385
|
+
.run());
|
|
386
|
+
logger_1.default.debug(`Inserting eval ${evalId}`);
|
|
387
|
+
// Record prompt relation
|
|
388
|
+
for (const prompt of results.table.head.prompts) {
|
|
389
|
+
const promptId = sha256(prompt.display);
|
|
390
|
+
promises.push(db
|
|
391
|
+
.insert(database_1.prompts)
|
|
392
|
+
.values({
|
|
393
|
+
id: promptId,
|
|
394
|
+
prompt: prompt.display,
|
|
395
|
+
})
|
|
396
|
+
.onConflictDoNothing()
|
|
397
|
+
.run());
|
|
398
|
+
promises.push(db
|
|
399
|
+
.insert(database_1.evalsToPrompts)
|
|
400
|
+
.values({
|
|
401
|
+
evalId,
|
|
402
|
+
promptId,
|
|
403
|
+
})
|
|
404
|
+
.onConflictDoNothing()
|
|
405
|
+
.run());
|
|
406
|
+
logger_1.default.debug(`Inserting prompt ${promptId}`);
|
|
407
|
+
}
|
|
408
|
+
// Record dataset relation
|
|
409
|
+
const datasetId = sha256(JSON.stringify(config.tests || []));
|
|
410
|
+
promises.push(db
|
|
411
|
+
.insert(database_1.datasets)
|
|
412
|
+
.values({
|
|
413
|
+
id: datasetId,
|
|
414
|
+
tests: config.tests,
|
|
415
|
+
})
|
|
416
|
+
.onConflictDoNothing()
|
|
417
|
+
.run());
|
|
418
|
+
promises.push(db
|
|
419
|
+
.insert(database_1.evalsToDatasets)
|
|
420
|
+
.values({
|
|
421
|
+
evalId,
|
|
422
|
+
datasetId,
|
|
423
|
+
})
|
|
424
|
+
.onConflictDoNothing()
|
|
425
|
+
.run());
|
|
426
|
+
logger_1.default.debug(`Inserting dataset ${datasetId}`);
|
|
427
|
+
logger_1.default.debug(`Awaiting ${promises.length} promises to database...`);
|
|
428
|
+
await Promise.all(promises);
|
|
429
|
+
// "touch" db signal path
|
|
430
|
+
const filePath = (0, database_1.getDbSignalPath)();
|
|
369
431
|
try {
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
version: 2,
|
|
373
|
-
createdAt: new Date().toISOString(),
|
|
374
|
-
config,
|
|
375
|
-
results,
|
|
376
|
-
};
|
|
377
|
-
fs.writeFileSync(newResultsPath, JSON.stringify(resultsFileData, null, 2));
|
|
378
|
-
// Use copy instead of symlink to avoid issues with Windows permissions.
|
|
379
|
-
try {
|
|
380
|
-
// Backwards compatibility: delete old symlink.
|
|
381
|
-
fs.unlinkSync(latestResultsPath);
|
|
382
|
-
}
|
|
383
|
-
catch { }
|
|
384
|
-
fs.copyFileSync(newResultsPath, latestResultsPath);
|
|
385
|
-
cleanupOldResults();
|
|
386
|
-
return filename;
|
|
432
|
+
const now = new Date();
|
|
433
|
+
fs.utimesSync(filePath, now, now);
|
|
387
434
|
}
|
|
388
435
|
catch (err) {
|
|
389
|
-
|
|
390
|
-
return null;
|
|
436
|
+
fs.closeSync(fs.openSync(filePath, 'w'));
|
|
391
437
|
}
|
|
438
|
+
return evalId;
|
|
392
439
|
}
|
|
393
|
-
exports.
|
|
394
|
-
|
|
395
|
-
|
|
440
|
+
exports.writeResultsToDatabase = writeResultsToDatabase;
|
|
441
|
+
/**
|
|
442
|
+
*
|
|
443
|
+
* @returns Last 100 evals in descending order.
|
|
444
|
+
*/
|
|
445
|
+
function listPreviousResults() {
|
|
446
|
+
const db = (0, database_1.getDb)();
|
|
447
|
+
const results = db
|
|
448
|
+
.select({
|
|
449
|
+
name: database_1.evals.id,
|
|
450
|
+
description: database_1.evals.description,
|
|
451
|
+
})
|
|
452
|
+
.from(database_1.evals)
|
|
453
|
+
.orderBy((0, drizzle_orm_1.desc)(database_1.evals.createdAt))
|
|
454
|
+
.limit(100)
|
|
455
|
+
.all();
|
|
456
|
+
return results.map((result) => ({
|
|
457
|
+
evalId: result.name,
|
|
458
|
+
description: result.description,
|
|
459
|
+
}));
|
|
460
|
+
}
|
|
461
|
+
exports.listPreviousResults = listPreviousResults;
|
|
462
|
+
/**
|
|
463
|
+
* @deprecated Used only for migration to sqlite
|
|
464
|
+
*/
|
|
465
|
+
function listPreviousResultFilenames_fileSystem() {
|
|
396
466
|
const directory = path.join(getConfigDirectoryPath(), 'output');
|
|
467
|
+
if (!fs.existsSync(directory)) {
|
|
468
|
+
return [];
|
|
469
|
+
}
|
|
397
470
|
const files = fs.readdirSync(directory);
|
|
398
471
|
const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
|
|
399
472
|
return resultsFiles.sort((a, b) => {
|
|
@@ -402,10 +475,17 @@ function listPreviousResultFilenames() {
|
|
|
402
475
|
return statA.birthtime.getTime() - statB.birthtime.getTime(); // sort in ascending order
|
|
403
476
|
});
|
|
404
477
|
}
|
|
405
|
-
exports.
|
|
406
|
-
|
|
478
|
+
exports.listPreviousResultFilenames_fileSystem = listPreviousResultFilenames_fileSystem;
|
|
479
|
+
const resultsCache = {};
|
|
480
|
+
/**
|
|
481
|
+
* @deprecated Used only for migration to sqlite
|
|
482
|
+
*/
|
|
483
|
+
function listPreviousResults_fileSystem() {
|
|
407
484
|
const directory = path.join(getConfigDirectoryPath(), 'output');
|
|
408
|
-
|
|
485
|
+
if (!fs.existsSync(directory)) {
|
|
486
|
+
return [];
|
|
487
|
+
}
|
|
488
|
+
const sortedFiles = listPreviousResultFilenames_fileSystem();
|
|
409
489
|
return sortedFiles.map((fileName) => {
|
|
410
490
|
if (!resultsCache[fileName]) {
|
|
411
491
|
try {
|
|
@@ -423,15 +503,71 @@ function listPreviousResults() {
|
|
|
423
503
|
};
|
|
424
504
|
});
|
|
425
505
|
}
|
|
426
|
-
exports.
|
|
506
|
+
exports.listPreviousResults_fileSystem = listPreviousResults_fileSystem;
|
|
507
|
+
let attemptedMigration = false;
|
|
508
|
+
async function migrateResultsFromFileSystemToDatabase() {
|
|
509
|
+
if (attemptedMigration) {
|
|
510
|
+
// TODO(ian): Record this bit in the database.
|
|
511
|
+
return;
|
|
512
|
+
}
|
|
513
|
+
// First run db migrations
|
|
514
|
+
logger_1.default.debug('Running db migrations...');
|
|
515
|
+
await (0, migrate_1.runDbMigrations)();
|
|
516
|
+
const fileNames = listPreviousResultFilenames_fileSystem();
|
|
517
|
+
if (fileNames.length === 0) {
|
|
518
|
+
return;
|
|
519
|
+
}
|
|
520
|
+
logger_1.default.info(`🔁 Migrating ${fileNames.length} flat files to local database.`);
|
|
521
|
+
logger_1.default.info('This is a one-time operation and may take a minute...');
|
|
522
|
+
attemptedMigration = true;
|
|
523
|
+
const outputDir = path.join(getConfigDirectoryPath(), 'output');
|
|
524
|
+
const backupDir = `${outputDir}-backup-${new Date()
|
|
525
|
+
.toISOString()
|
|
526
|
+
.slice(0, 10)
|
|
527
|
+
.replace(/-/g, '')}`;
|
|
528
|
+
try {
|
|
529
|
+
fs.cpSync(outputDir, backupDir, { recursive: true });
|
|
530
|
+
logger_1.default.info(`Backup of output directory created at ${backupDir}`);
|
|
531
|
+
}
|
|
532
|
+
catch (backupError) {
|
|
533
|
+
logger_1.default.error(`Failed to create backup of output directory: ${backupError}`);
|
|
534
|
+
return;
|
|
535
|
+
}
|
|
536
|
+
logger_1.default.info('Moving files into database...');
|
|
537
|
+
const migrationPromises = fileNames.map(async (fileName) => {
|
|
538
|
+
const fileData = readResult_fileSystem(fileName);
|
|
539
|
+
if (fileData) {
|
|
540
|
+
await writeResultsToDatabase(fileData.result.results, fileData.result.config, filenameToDate(fileName));
|
|
541
|
+
logger_1.default.debug(`Migrated ${fileName} to database.`);
|
|
542
|
+
try {
|
|
543
|
+
fs.unlinkSync(path.join(outputDir, fileName));
|
|
544
|
+
}
|
|
545
|
+
catch (err) {
|
|
546
|
+
logger_1.default.warn(`Failed to delete ${fileName} after migration: ${err}`);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
else {
|
|
550
|
+
logger_1.default.warn(`Failed to migrate result ${fileName} due to read error.`);
|
|
551
|
+
}
|
|
552
|
+
});
|
|
553
|
+
await Promise.all(migrationPromises);
|
|
554
|
+
try {
|
|
555
|
+
fs.unlinkSync(getLatestResultsPath());
|
|
556
|
+
}
|
|
557
|
+
catch (err) {
|
|
558
|
+
logger_1.default.warn(`Failed to delete latest.json: ${err}`);
|
|
559
|
+
}
|
|
560
|
+
logger_1.default.info('Migration complete. Please restart your web server if it is running.');
|
|
561
|
+
}
|
|
562
|
+
exports.migrateResultsFromFileSystemToDatabase = migrateResultsFromFileSystemToDatabase;
|
|
427
563
|
const RESULT_HISTORY_LENGTH = parseInt(process.env.RESULT_HISTORY_LENGTH || '', 10) || 100;
|
|
428
|
-
function
|
|
429
|
-
const sortedFilenames =
|
|
564
|
+
function cleanupOldFileResults(remaining = RESULT_HISTORY_LENGTH) {
|
|
565
|
+
const sortedFilenames = listPreviousResultFilenames_fileSystem();
|
|
430
566
|
for (let i = 0; i < sortedFilenames.length - remaining; i++) {
|
|
431
567
|
fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFilenames[i]));
|
|
432
568
|
}
|
|
433
569
|
}
|
|
434
|
-
exports.
|
|
570
|
+
exports.cleanupOldFileResults = cleanupOldFileResults;
|
|
435
571
|
function filenameToDate(filename) {
|
|
436
572
|
const dateString = filename.slice('eval-'.length, filename.length - '.json'.length);
|
|
437
573
|
// Replace hyphens with colons where necessary (Windows compatibility).
|
|
@@ -439,27 +575,67 @@ function filenameToDate(filename) {
|
|
|
439
575
|
const timePart = dateParts[1].replace(/-/g, ':');
|
|
440
576
|
const formattedDateString = `${dateParts[0]}T${timePart}`;
|
|
441
577
|
const date = new Date(formattedDateString);
|
|
578
|
+
return date;
|
|
579
|
+
/*
|
|
442
580
|
return date.toLocaleDateString('en-US', {
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
581
|
+
year: 'numeric',
|
|
582
|
+
month: 'long',
|
|
583
|
+
day: 'numeric',
|
|
584
|
+
hour: '2-digit',
|
|
585
|
+
minute: '2-digit',
|
|
586
|
+
second: '2-digit',
|
|
587
|
+
timeZoneName: 'short',
|
|
450
588
|
});
|
|
589
|
+
*/
|
|
451
590
|
}
|
|
452
591
|
exports.filenameToDate = filenameToDate;
|
|
453
592
|
function dateToFilename(date) {
|
|
454
593
|
return `eval-${date.toISOString().replace(/:/g, '-')}.json`;
|
|
455
594
|
}
|
|
456
595
|
exports.dateToFilename = dateToFilename;
|
|
457
|
-
function readResult(
|
|
596
|
+
async function readResult(id) {
|
|
597
|
+
const db = (0, database_1.getDb)();
|
|
598
|
+
try {
|
|
599
|
+
const evalResult = await db
|
|
600
|
+
.select({
|
|
601
|
+
id: database_1.evals.id,
|
|
602
|
+
createdAt: database_1.evals.createdAt,
|
|
603
|
+
results: database_1.evals.results,
|
|
604
|
+
config: database_1.evals.config,
|
|
605
|
+
})
|
|
606
|
+
.from(database_1.evals)
|
|
607
|
+
.where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
|
|
608
|
+
.execute();
|
|
609
|
+
if (evalResult.length === 0) {
|
|
610
|
+
return undefined;
|
|
611
|
+
}
|
|
612
|
+
const { id: resultId, createdAt, results, config } = evalResult[0];
|
|
613
|
+
const result = {
|
|
614
|
+
version: 3,
|
|
615
|
+
createdAt: new Date(createdAt).toISOString().slice(0, 10),
|
|
616
|
+
results,
|
|
617
|
+
config,
|
|
618
|
+
};
|
|
619
|
+
return {
|
|
620
|
+
id: resultId,
|
|
621
|
+
result,
|
|
622
|
+
createdAt: new Date(createdAt),
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
catch (err) {
|
|
626
|
+
logger_1.default.error(`Failed to read result with ID ${id} from database:\n${err}`);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
exports.readResult = readResult;
|
|
630
|
+
/**
|
|
631
|
+
* @deprecated Used only for migration to sqlite
|
|
632
|
+
*/
|
|
633
|
+
function readResult_fileSystem(name) {
|
|
458
634
|
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
459
635
|
const resultsPath = path.join(resultsDirectory, name);
|
|
460
636
|
try {
|
|
461
637
|
const result = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
|
|
462
|
-
const createdAt =
|
|
638
|
+
const createdAt = filenameToDate(name);
|
|
463
639
|
return {
|
|
464
640
|
id: sha256(JSON.stringify(result.config)),
|
|
465
641
|
result,
|
|
@@ -470,35 +646,70 @@ function readResult(name) {
|
|
|
470
646
|
logger_1.default.error(`Failed to read results from ${resultsPath}:\n${err}`);
|
|
471
647
|
}
|
|
472
648
|
}
|
|
473
|
-
exports.
|
|
474
|
-
function updateResult(
|
|
475
|
-
const
|
|
476
|
-
const safeFilename = path.basename(filename);
|
|
477
|
-
const resultsPath = path.join(resultsDirectory, safeFilename);
|
|
649
|
+
exports.readResult_fileSystem = readResult_fileSystem;
|
|
650
|
+
async function updateResult(id, newConfig, newTable) {
|
|
651
|
+
const db = (0, database_1.getDb)();
|
|
478
652
|
try {
|
|
479
|
-
|
|
653
|
+
// Fetch the existing eval data from the database
|
|
654
|
+
const existingEval = await db
|
|
655
|
+
.select({
|
|
656
|
+
config: database_1.evals.config,
|
|
657
|
+
results: database_1.evals.results,
|
|
658
|
+
})
|
|
659
|
+
.from(database_1.evals)
|
|
660
|
+
.where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
|
|
661
|
+
.limit(1)
|
|
662
|
+
.all();
|
|
663
|
+
if (existingEval.length === 0) {
|
|
664
|
+
logger_1.default.error(`Eval with ID ${id} not found.`);
|
|
665
|
+
return;
|
|
666
|
+
}
|
|
667
|
+
const evalData = existingEval[0];
|
|
480
668
|
if (newConfig) {
|
|
481
669
|
evalData.config = newConfig;
|
|
482
670
|
}
|
|
483
671
|
if (newTable) {
|
|
484
672
|
evalData.results.table = newTable;
|
|
485
673
|
}
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
674
|
+
await db
|
|
675
|
+
.update(database_1.evals)
|
|
676
|
+
.set({
|
|
677
|
+
description: evalData.config.description,
|
|
678
|
+
config: evalData.config,
|
|
679
|
+
results: evalData.results,
|
|
680
|
+
})
|
|
681
|
+
.where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
|
|
682
|
+
.run();
|
|
683
|
+
logger_1.default.info(`Updated eval with ID ${id}`);
|
|
494
684
|
}
|
|
495
685
|
catch (err) {
|
|
496
|
-
logger_1.default.error(`Failed to update eval
|
|
686
|
+
logger_1.default.error(`Failed to update eval with ID ${id}:\n${err}`);
|
|
497
687
|
}
|
|
498
688
|
}
|
|
499
689
|
exports.updateResult = updateResult;
|
|
500
|
-
function readLatestResults() {
|
|
501
|
-
|
|
690
|
+
async function readLatestResults() {
|
|
691
|
+
const db = (0, database_1.getDb)();
|
|
692
|
+
const latestResults = await db
|
|
693
|
+
.select({
|
|
694
|
+
id: database_1.evals.id,
|
|
695
|
+
createdAt: database_1.evals.createdAt,
|
|
696
|
+
description: database_1.evals.description,
|
|
697
|
+
results: database_1.evals.results,
|
|
698
|
+
config: database_1.evals.config,
|
|
699
|
+
})
|
|
700
|
+
.from(database_1.evals)
|
|
701
|
+
.orderBy((0, drizzle_orm_1.desc)(database_1.evals.createdAt))
|
|
702
|
+
.limit(1);
|
|
703
|
+
if (!latestResults || latestResults.length === 0) {
|
|
704
|
+
return undefined;
|
|
705
|
+
}
|
|
706
|
+
const latestResult = latestResults[0];
|
|
707
|
+
return {
|
|
708
|
+
version: 3,
|
|
709
|
+
createdAt: new Date(latestResult.createdAt).toISOString(),
|
|
710
|
+
results: latestResult.results,
|
|
711
|
+
config: latestResult.config,
|
|
712
|
+
};
|
|
502
713
|
}
|
|
503
714
|
exports.readLatestResults = readLatestResults;
|
|
504
715
|
function getPromptsForTestCases(testCases) {
|
|
@@ -523,26 +734,39 @@ function getPrompts() {
|
|
|
523
734
|
return getPromptsWithPredicate(() => true);
|
|
524
735
|
}
|
|
525
736
|
exports.getPrompts = getPrompts;
|
|
526
|
-
function getPromptsWithPredicate(predicate) {
|
|
527
|
-
|
|
737
|
+
async function getPromptsWithPredicate(predicate) {
|
|
738
|
+
// TODO(ian): Make this use a proper database query
|
|
739
|
+
const db = (0, database_1.getDb)();
|
|
740
|
+
const evals_ = await db
|
|
741
|
+
.select({
|
|
742
|
+
id: database_1.evals.id,
|
|
743
|
+
createdAt: database_1.evals.createdAt,
|
|
744
|
+
results: database_1.evals.results,
|
|
745
|
+
config: database_1.evals.config,
|
|
746
|
+
})
|
|
747
|
+
.from(database_1.evals)
|
|
748
|
+
.limit(100)
|
|
749
|
+
.all();
|
|
528
750
|
const groupedPrompts = {};
|
|
529
|
-
for (const
|
|
530
|
-
const
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
751
|
+
for (const eval_ of evals_) {
|
|
752
|
+
const createdAt = new Date(eval_.createdAt).toISOString();
|
|
753
|
+
const resultWrapper = {
|
|
754
|
+
version: 3,
|
|
755
|
+
createdAt,
|
|
756
|
+
results: eval_.results,
|
|
757
|
+
config: eval_.config,
|
|
758
|
+
};
|
|
759
|
+
if (predicate(resultWrapper)) {
|
|
760
|
+
for (const prompt of resultWrapper.results.table.head.prompts) {
|
|
538
761
|
const promptId = sha256(prompt.raw);
|
|
539
|
-
const datasetId =
|
|
762
|
+
const datasetId = resultWrapper.config.tests
|
|
763
|
+
? sha256(JSON.stringify(resultWrapper.config.tests))
|
|
764
|
+
: '-';
|
|
540
765
|
if (promptId in groupedPrompts) {
|
|
541
766
|
groupedPrompts[promptId].recentEvalDate = new Date(Math.max(groupedPrompts[promptId].recentEvalDate.getTime(), new Date(createdAt).getTime()));
|
|
542
767
|
groupedPrompts[promptId].count += 1;
|
|
543
768
|
groupedPrompts[promptId].evals.push({
|
|
544
|
-
id:
|
|
545
|
-
filePath: fileName,
|
|
769
|
+
id: eval_.id,
|
|
546
770
|
datasetId,
|
|
547
771
|
metrics: prompt.metrics,
|
|
548
772
|
});
|
|
@@ -553,12 +777,10 @@ function getPromptsWithPredicate(predicate) {
|
|
|
553
777
|
id: promptId,
|
|
554
778
|
prompt,
|
|
555
779
|
recentEvalDate: new Date(createdAt),
|
|
556
|
-
recentEvalId:
|
|
557
|
-
recentEvalFilepath: fileName,
|
|
780
|
+
recentEvalId: eval_.id,
|
|
558
781
|
evals: [
|
|
559
782
|
{
|
|
560
|
-
id:
|
|
561
|
-
filePath: fileName,
|
|
783
|
+
id: eval_.id,
|
|
562
784
|
datasetId,
|
|
563
785
|
metrics: prompt.metrics,
|
|
564
786
|
},
|
|
@@ -571,31 +793,42 @@ function getPromptsWithPredicate(predicate) {
|
|
|
571
793
|
return Object.values(groupedPrompts);
|
|
572
794
|
}
|
|
573
795
|
exports.getPromptsWithPredicate = getPromptsWithPredicate;
|
|
574
|
-
function getTestCases() {
|
|
796
|
+
async function getTestCases() {
|
|
575
797
|
return getTestCasesWithPredicate(() => true);
|
|
576
798
|
}
|
|
577
799
|
exports.getTestCases = getTestCases;
|
|
578
|
-
function getTestCasesWithPredicate(predicate) {
|
|
579
|
-
const
|
|
800
|
+
async function getTestCasesWithPredicate(predicate) {
|
|
801
|
+
const db = (0, database_1.getDb)();
|
|
802
|
+
const evals_ = await db
|
|
803
|
+
.select({
|
|
804
|
+
id: database_1.evals.id,
|
|
805
|
+
createdAt: database_1.evals.createdAt,
|
|
806
|
+
results: database_1.evals.results,
|
|
807
|
+
config: database_1.evals.config,
|
|
808
|
+
})
|
|
809
|
+
.from(database_1.evals)
|
|
810
|
+
.limit(100)
|
|
811
|
+
.all();
|
|
580
812
|
const groupedTestCases = {};
|
|
581
|
-
for (const
|
|
582
|
-
const
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
813
|
+
for (const eval_ of evals_) {
|
|
814
|
+
const createdAt = new Date(eval_.createdAt).toISOString();
|
|
815
|
+
const resultWrapper = {
|
|
816
|
+
version: 3,
|
|
817
|
+
createdAt,
|
|
818
|
+
results: eval_.results,
|
|
819
|
+
config: eval_.config,
|
|
820
|
+
};
|
|
821
|
+
const testCases = resultWrapper.config.tests;
|
|
822
|
+
if (testCases && predicate(resultWrapper)) {
|
|
823
|
+
const evalId = eval_.id;
|
|
590
824
|
const datasetId = sha256(JSON.stringify(testCases));
|
|
591
825
|
if (datasetId in groupedTestCases) {
|
|
592
|
-
groupedTestCases[datasetId].recentEvalDate = new Date(Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(),
|
|
826
|
+
groupedTestCases[datasetId].recentEvalDate = new Date(Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), eval_.createdAt));
|
|
593
827
|
groupedTestCases[datasetId].count += 1;
|
|
594
|
-
const newPrompts =
|
|
828
|
+
const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
|
|
595
829
|
id: sha256(prompt.raw),
|
|
596
830
|
prompt,
|
|
597
831
|
evalId,
|
|
598
|
-
evalFilepath: fileName,
|
|
599
832
|
}));
|
|
600
833
|
const promptsById = {};
|
|
601
834
|
for (const prompt of groupedTestCases[datasetId].prompts.concat(newPrompts)) {
|
|
@@ -606,11 +839,10 @@ function getTestCasesWithPredicate(predicate) {
|
|
|
606
839
|
groupedTestCases[datasetId].prompts = Object.values(promptsById);
|
|
607
840
|
}
|
|
608
841
|
else {
|
|
609
|
-
const newPrompts =
|
|
610
|
-
id:
|
|
842
|
+
const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
|
|
843
|
+
id: sha256(prompt.raw),
|
|
611
844
|
prompt,
|
|
612
845
|
evalId,
|
|
613
|
-
evalFilepath: fileName,
|
|
614
846
|
}));
|
|
615
847
|
const promptsById = {};
|
|
616
848
|
for (const prompt of newPrompts) {
|
|
@@ -624,7 +856,6 @@ function getTestCasesWithPredicate(predicate) {
|
|
|
624
856
|
testCases,
|
|
625
857
|
recentEvalDate: new Date(createdAt),
|
|
626
858
|
recentEvalId: evalId,
|
|
627
|
-
recentEvalFilepath: fileName,
|
|
628
859
|
prompts: Object.values(promptsById),
|
|
629
860
|
};
|
|
630
861
|
}
|
|
@@ -633,8 +864,8 @@ function getTestCasesWithPredicate(predicate) {
|
|
|
633
864
|
return Object.values(groupedTestCases);
|
|
634
865
|
}
|
|
635
866
|
exports.getTestCasesWithPredicate = getTestCasesWithPredicate;
|
|
636
|
-
function getPromptFromHash(hash) {
|
|
637
|
-
const prompts = getPrompts();
|
|
867
|
+
async function getPromptFromHash(hash) {
|
|
868
|
+
const prompts = await getPrompts();
|
|
638
869
|
for (const prompt of prompts) {
|
|
639
870
|
if (prompt.id.startsWith(hash)) {
|
|
640
871
|
return prompt;
|
|
@@ -643,8 +874,8 @@ function getPromptFromHash(hash) {
|
|
|
643
874
|
return undefined;
|
|
644
875
|
}
|
|
645
876
|
exports.getPromptFromHash = getPromptFromHash;
|
|
646
|
-
function getDatasetFromHash(hash) {
|
|
647
|
-
const datasets = getTestCases();
|
|
877
|
+
async function getDatasetFromHash(hash) {
|
|
878
|
+
const datasets = await getTestCases();
|
|
648
879
|
for (const dataset of datasets) {
|
|
649
880
|
if (dataset.id.startsWith(hash)) {
|
|
650
881
|
return dataset;
|
|
@@ -653,13 +884,13 @@ function getDatasetFromHash(hash) {
|
|
|
653
884
|
return undefined;
|
|
654
885
|
}
|
|
655
886
|
exports.getDatasetFromHash = getDatasetFromHash;
|
|
656
|
-
function getEvals() {
|
|
887
|
+
async function getEvals() {
|
|
657
888
|
return getEvalsWithPredicate(() => true);
|
|
658
889
|
}
|
|
659
890
|
exports.getEvals = getEvals;
|
|
660
|
-
function getEvalFromHash(hash) {
|
|
661
|
-
const
|
|
662
|
-
for (const eval_ of
|
|
891
|
+
async function getEvalFromHash(hash) {
|
|
892
|
+
const evals_ = await getEvals();
|
|
893
|
+
for (const eval_ of evals_) {
|
|
663
894
|
if (eval_.id.startsWith(hash)) {
|
|
664
895
|
return eval_;
|
|
665
896
|
}
|
|
@@ -667,23 +898,34 @@ function getEvalFromHash(hash) {
|
|
|
667
898
|
return undefined;
|
|
668
899
|
}
|
|
669
900
|
exports.getEvalFromHash = getEvalFromHash;
|
|
670
|
-
function getEvalsWithPredicate(predicate) {
|
|
901
|
+
async function getEvalsWithPredicate(predicate) {
|
|
902
|
+
const db = (0, database_1.getDb)();
|
|
903
|
+
const evals_ = await db
|
|
904
|
+
.select({
|
|
905
|
+
id: database_1.evals.id,
|
|
906
|
+
createdAt: database_1.evals.createdAt,
|
|
907
|
+
results: database_1.evals.results,
|
|
908
|
+
config: database_1.evals.config,
|
|
909
|
+
})
|
|
910
|
+
.from(database_1.evals)
|
|
911
|
+
.limit(100)
|
|
912
|
+
.all();
|
|
671
913
|
const ret = [];
|
|
672
|
-
const
|
|
673
|
-
|
|
674
|
-
const
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
914
|
+
for (const eval_ of evals_) {
|
|
915
|
+
const createdAt = new Date(eval_.createdAt).toISOString();
|
|
916
|
+
const resultWrapper = {
|
|
917
|
+
version: 3,
|
|
918
|
+
createdAt: createdAt,
|
|
919
|
+
results: eval_.results,
|
|
920
|
+
config: eval_.config,
|
|
921
|
+
};
|
|
922
|
+
if (predicate(resultWrapper)) {
|
|
923
|
+
const evalId = eval_.id;
|
|
681
924
|
ret.push({
|
|
682
925
|
id: evalId,
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
results: result.results,
|
|
926
|
+
date: new Date(eval_.createdAt),
|
|
927
|
+
config: eval_.config,
|
|
928
|
+
results: eval_.results,
|
|
687
929
|
});
|
|
688
930
|
}
|
|
689
931
|
}
|