promptfoo 0.47.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +2 -0
  2. package/dist/drizzle/0000_lush_hellion.sql +36 -0
  3. package/dist/drizzle/0001_wide_calypso.sql +3 -0
  4. package/dist/drizzle/meta/0000_snapshot.json +244 -0
  5. package/dist/drizzle/meta/0001_snapshot.json +237 -0
  6. package/dist/drizzle/meta/_journal.json +20 -0
  7. package/dist/package.json +10 -3
  8. package/dist/src/__mocks__/database.d.ts +5 -0
  9. package/dist/src/__mocks__/database.d.ts.map +1 -0
  10. package/dist/src/__mocks__/database.js +27 -0
  11. package/dist/src/__mocks__/database.js.map +1 -0
  12. package/dist/src/commands/list.d.ts.map +1 -1
  13. package/dist/src/commands/list.js +4 -5
  14. package/dist/src/commands/list.js.map +1 -1
  15. package/dist/src/commands/show.d.ts +1 -1
  16. package/dist/src/commands/show.d.ts.map +1 -1
  17. package/dist/src/commands/show.js +7 -7
  18. package/dist/src/commands/show.js.map +1 -1
  19. package/dist/src/csv.d.ts +1 -1
  20. package/dist/src/csv.d.ts.map +1 -1
  21. package/dist/src/csv.js +5 -0
  22. package/dist/src/csv.js.map +1 -1
  23. package/dist/src/database.d.ts +238 -0
  24. package/dist/src/database.d.ts.map +1 -0
  25. package/dist/src/database.js +141 -0
  26. package/dist/src/database.js.map +1 -0
  27. package/dist/src/evaluator.d.ts.map +1 -1
  28. package/dist/src/evaluator.js +8 -6
  29. package/dist/src/evaluator.js.map +1 -1
  30. package/dist/src/index.d.ts.map +1 -1
  31. package/dist/src/index.js +2 -1
  32. package/dist/src/index.js.map +1 -1
  33. package/dist/src/main.js +6 -4
  34. package/dist/src/main.js.map +1 -1
  35. package/dist/src/migrate.d.ts +5 -0
  36. package/dist/src/migrate.d.ts.map +1 -0
  37. package/dist/src/migrate.js +50 -0
  38. package/dist/src/migrate.js.map +1 -0
  39. package/dist/src/prompts.d.ts.map +1 -1
  40. package/dist/src/prompts.js +3 -0
  41. package/dist/src/prompts.js.map +1 -1
  42. package/dist/src/providers/azureopenai.d.ts +2 -0
  43. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  44. package/dist/src/providers/azureopenai.js +4 -2
  45. package/dist/src/providers/azureopenai.js.map +1 -1
  46. package/dist/src/providers.d.ts.map +1 -1
  47. package/dist/src/providers.js +2 -1
  48. package/dist/src/providers.js.map +1 -1
  49. package/dist/src/types.d.ts +5 -6
  50. package/dist/src/types.d.ts.map +1 -1
  51. package/dist/src/util.d.ts +46 -19
  52. package/dist/src/util.d.ts.map +1 -1
  53. package/dist/src/util.js +363 -121
  54. package/dist/src/util.js.map +1 -1
  55. package/dist/src/web/nextui/404/index.html +1 -1
  56. package/dist/src/web/nextui/404.html +1 -1
  57. package/dist/src/web/nextui/_next/static/chunks/952-ede6b209625d42a2.js +1 -0
  58. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-ad55f89d622ef8e7.js +1 -0
  59. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-01ab4878803b7068.js +1 -0
  60. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-9c163111247d8da5.js +1 -0
  61. package/dist/src/web/nextui/api/results +1 -1
  62. package/dist/src/web/nextui/auth/login/index.html +1 -1
  63. package/dist/src/web/nextui/auth/login/index.txt +1 -1
  64. package/dist/src/web/nextui/auth/signup/index.html +1 -1
  65. package/dist/src/web/nextui/auth/signup/index.txt +1 -1
  66. package/dist/src/web/nextui/datasets/index.html +1 -1
  67. package/dist/src/web/nextui/datasets/index.txt +2 -2
  68. package/dist/src/web/nextui/eval/index.html +1 -1
  69. package/dist/src/web/nextui/eval/index.txt +2 -2
  70. package/dist/src/web/nextui/index.html +1 -1
  71. package/dist/src/web/nextui/index.txt +1 -1
  72. package/dist/src/web/nextui/prompts/index.html +1 -1
  73. package/dist/src/web/nextui/prompts/index.txt +2 -2
  74. package/dist/src/web/nextui/setup/index.html +1 -1
  75. package/dist/src/web/nextui/setup/index.txt +2 -2
  76. package/dist/src/web/server.d.ts +1 -1
  77. package/dist/src/web/server.d.ts.map +1 -1
  78. package/dist/src/web/server.js +25 -43
  79. package/dist/src/web/server.js.map +1 -1
  80. package/package.json +10 -3
  81. package/dist/src/web/nextui/_next/static/chunks/952-1367984f076e3060.js +0 -1
  82. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-44ab188f3b846712.js +0 -1
  83. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-0bf3409d6a6bfa22.js +0 -1
  84. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-83c7e62787113081.js +0 -1
  85. /package/dist/src/web/nextui/_next/static/{kWF8sUISiIgB0hKr0muJH → 8yxA5JzS0wXTxJptFRKTo}/_buildManifest.js +0 -0
  86. /package/dist/src/web/nextui/_next/static/{kWF8sUISiIgB0hKr0muJH → 8yxA5JzS0wXTxJptFRKTo}/_ssgManifest.js +0 -0
package/dist/src/util.js CHANGED
@@ -26,7 +26,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
26
26
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.transformOutput = exports.printBorder = exports.getNunjucksEngine = exports.readFilters = exports.getEvalsWithPredicate = exports.getEvalFromHash = exports.getEvals = exports.getDatasetFromHash = exports.getPromptFromHash = exports.getTestCasesWithPredicate = exports.getTestCases = exports.getPromptsWithPredicate = exports.getPrompts = exports.sha256 = exports.getPromptsForTestCasesHash = exports.getPromptsForTestCases = exports.readLatestResults = exports.updateResult = exports.readResult = exports.dateToFilename = exports.filenameToDate = exports.cleanupOldResults = exports.listPreviousResults = exports.listPreviousResultFilenames = exports.writeLatestResults = exports.getLatestResultsPath = exports.setConfigDirectoryPath = exports.getConfigDirectoryPath = exports.writeOutput = exports.writeMultipleOutputs = exports.readConfigs = exports.readConfig = exports.dereferenceConfig = exports.maybeReadConfig = exports.maybeRecordFirstRun = exports.readGlobalConfig = exports.resetGlobalConfig = void 0;
29
+ exports.transformOutput = exports.printBorder = exports.getNunjucksEngine = exports.readFilters = exports.getEvalsWithPredicate = exports.getEvalFromHash = exports.getEvals = exports.getDatasetFromHash = exports.getPromptFromHash = exports.getTestCasesWithPredicate = exports.getTestCases = exports.getPromptsWithPredicate = exports.getPrompts = exports.sha256 = exports.getPromptsForTestCasesHash = exports.getPromptsForTestCases = exports.readLatestResults = exports.updateResult = exports.readResult_fileSystem = exports.readResult = exports.dateToFilename = exports.filenameToDate = exports.cleanupOldFileResults = exports.migrateResultsFromFileSystemToDatabase = exports.listPreviousResults_fileSystem = exports.listPreviousResultFilenames_fileSystem = exports.listPreviousResults = exports.writeResultsToDatabase = exports.getLatestResultsPath = exports.setConfigDirectoryPath = exports.getConfigDirectoryPath = exports.writeOutput = exports.writeMultipleOutputs = exports.readConfigs = exports.readConfig = exports.dereferenceConfig = exports.maybeReadConfig = exports.maybeRecordFirstRun = exports.readGlobalConfig = exports.resetGlobalConfig = void 0;
30
30
  const fs = __importStar(require("fs"));
31
31
  const path = __importStar(require("path"));
32
32
  const os = __importStar(require("os"));
@@ -37,9 +37,12 @@ const nunjucks_1 = __importDefault(require("nunjucks"));
37
37
  const js_yaml_1 = __importDefault(require("js-yaml"));
38
38
  const sync_1 = require("csv-stringify/sync");
39
39
  const glob_1 = require("glob");
40
+ const drizzle_orm_1 = require("drizzle-orm");
40
41
  const logger_1 = __importDefault(require("./logger"));
41
42
  const esm_1 = require("./esm");
42
43
  const testCases_1 = require("./testCases");
44
+ const database_1 = require("./database");
45
+ const migrate_1 = require("./migrate");
43
46
  let globalConfigCache = null;
44
47
  function resetGlobalConfig() {
45
48
  globalConfigCache = null;
@@ -356,44 +359,114 @@ function setConfigDirectoryPath(newPath) {
356
359
  configDirectoryPath = newPath;
357
360
  }
358
361
  exports.setConfigDirectoryPath = setConfigDirectoryPath;
362
+ /**
363
+ * TODO(ian): Remove this
364
+ * @deprecated Use readLatestResults directly instead.
365
+ */
359
366
  function getLatestResultsPath() {
360
367
  return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
361
368
  }
362
369
  exports.getLatestResultsPath = getLatestResultsPath;
363
- function writeLatestResults(results, config) {
364
- const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
365
- // Replace hyphens with colons (Windows compatibility).
366
- const filename = dateToFilename(new Date());
367
- const newResultsPath = path.join(resultsDirectory, filename);
368
- const latestResultsPath = getLatestResultsPath();
370
+ async function writeResultsToDatabase(results, config, createdAt) {
371
+ createdAt = createdAt || new Date();
372
+ const evalId = `eval-${createdAt.toISOString().slice(0, 19)}`;
373
+ const db = (0, database_1.getDb)();
374
+ const promises = [];
375
+ promises.push(db
376
+ .insert(database_1.evals)
377
+ .values({
378
+ id: evalId,
379
+ createdAt: createdAt.getTime(),
380
+ description: config.description,
381
+ config,
382
+ results,
383
+ })
384
+ .onConflictDoNothing()
385
+ .run());
386
+ logger_1.default.debug(`Inserting eval ${evalId}`);
387
+ // Record prompt relation
388
+ for (const prompt of results.table.head.prompts) {
389
+ const promptId = sha256(prompt.display);
390
+ promises.push(db
391
+ .insert(database_1.prompts)
392
+ .values({
393
+ id: promptId,
394
+ prompt: prompt.display,
395
+ })
396
+ .onConflictDoNothing()
397
+ .run());
398
+ promises.push(db
399
+ .insert(database_1.evalsToPrompts)
400
+ .values({
401
+ evalId,
402
+ promptId,
403
+ })
404
+ .onConflictDoNothing()
405
+ .run());
406
+ logger_1.default.debug(`Inserting prompt ${promptId}`);
407
+ }
408
+ // Record dataset relation
409
+ const datasetId = sha256(JSON.stringify(config.tests || []));
410
+ promises.push(db
411
+ .insert(database_1.datasets)
412
+ .values({
413
+ id: datasetId,
414
+ tests: config.tests,
415
+ })
416
+ .onConflictDoNothing()
417
+ .run());
418
+ promises.push(db
419
+ .insert(database_1.evalsToDatasets)
420
+ .values({
421
+ evalId,
422
+ datasetId,
423
+ })
424
+ .onConflictDoNothing()
425
+ .run());
426
+ logger_1.default.debug(`Inserting dataset ${datasetId}`);
427
+ logger_1.default.debug(`Awaiting ${promises.length} promises to database...`);
428
+ await Promise.all(promises);
429
+ // "touch" db signal path
430
+ const filePath = (0, database_1.getDbSignalPath)();
369
431
  try {
370
- fs.mkdirSync(resultsDirectory, { recursive: true });
371
- const resultsFileData = {
372
- version: 2,
373
- createdAt: new Date().toISOString(),
374
- config,
375
- results,
376
- };
377
- fs.writeFileSync(newResultsPath, JSON.stringify(resultsFileData, null, 2));
378
- // Use copy instead of symlink to avoid issues with Windows permissions.
379
- try {
380
- // Backwards compatibility: delete old symlink.
381
- fs.unlinkSync(latestResultsPath);
382
- }
383
- catch { }
384
- fs.copyFileSync(newResultsPath, latestResultsPath);
385
- cleanupOldResults();
386
- return filename;
432
+ const now = new Date();
433
+ fs.utimesSync(filePath, now, now);
387
434
  }
388
435
  catch (err) {
389
- logger_1.default.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
390
- return null;
436
+ fs.closeSync(fs.openSync(filePath, 'w'));
391
437
  }
438
+ return evalId;
392
439
  }
393
- exports.writeLatestResults = writeLatestResults;
394
- const resultsCache = {};
395
- function listPreviousResultFilenames() {
440
+ exports.writeResultsToDatabase = writeResultsToDatabase;
441
+ /**
442
+ *
443
+ * @returns Last 100 evals in descending order.
444
+ */
445
+ function listPreviousResults() {
446
+ const db = (0, database_1.getDb)();
447
+ const results = db
448
+ .select({
449
+ name: database_1.evals.id,
450
+ description: database_1.evals.description,
451
+ })
452
+ .from(database_1.evals)
453
+ .orderBy((0, drizzle_orm_1.desc)(database_1.evals.createdAt))
454
+ .limit(100)
455
+ .all();
456
+ return results.map((result) => ({
457
+ evalId: result.name,
458
+ description: result.description,
459
+ }));
460
+ }
461
+ exports.listPreviousResults = listPreviousResults;
462
+ /**
463
+ * @deprecated Used only for migration to sqlite
464
+ */
465
+ function listPreviousResultFilenames_fileSystem() {
396
466
  const directory = path.join(getConfigDirectoryPath(), 'output');
467
+ if (!fs.existsSync(directory)) {
468
+ return [];
469
+ }
397
470
  const files = fs.readdirSync(directory);
398
471
  const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
399
472
  return resultsFiles.sort((a, b) => {
@@ -402,10 +475,17 @@ function listPreviousResultFilenames() {
402
475
  return statA.birthtime.getTime() - statB.birthtime.getTime(); // sort in ascending order
403
476
  });
404
477
  }
405
- exports.listPreviousResultFilenames = listPreviousResultFilenames;
406
- function listPreviousResults() {
478
+ exports.listPreviousResultFilenames_fileSystem = listPreviousResultFilenames_fileSystem;
479
+ const resultsCache = {};
480
+ /**
481
+ * @deprecated Used only for migration to sqlite
482
+ */
483
+ function listPreviousResults_fileSystem() {
407
484
  const directory = path.join(getConfigDirectoryPath(), 'output');
408
- const sortedFiles = listPreviousResultFilenames();
485
+ if (!fs.existsSync(directory)) {
486
+ return [];
487
+ }
488
+ const sortedFiles = listPreviousResultFilenames_fileSystem();
409
489
  return sortedFiles.map((fileName) => {
410
490
  if (!resultsCache[fileName]) {
411
491
  try {
@@ -423,15 +503,71 @@ function listPreviousResults() {
423
503
  };
424
504
  });
425
505
  }
426
- exports.listPreviousResults = listPreviousResults;
506
+ exports.listPreviousResults_fileSystem = listPreviousResults_fileSystem;
507
+ let attemptedMigration = false;
508
+ async function migrateResultsFromFileSystemToDatabase() {
509
+ if (attemptedMigration) {
510
+ // TODO(ian): Record this bit in the database.
511
+ return;
512
+ }
513
+ // First run db migrations
514
+ logger_1.default.debug('Running db migrations...');
515
+ await (0, migrate_1.runDbMigrations)();
516
+ const fileNames = listPreviousResultFilenames_fileSystem();
517
+ if (fileNames.length === 0) {
518
+ return;
519
+ }
520
+ logger_1.default.info(`🔁 Migrating ${fileNames.length} flat files to local database.`);
521
+ logger_1.default.info('This is a one-time operation and may take a minute...');
522
+ attemptedMigration = true;
523
+ const outputDir = path.join(getConfigDirectoryPath(), 'output');
524
+ const backupDir = `${outputDir}-backup-${new Date()
525
+ .toISOString()
526
+ .slice(0, 10)
527
+ .replace(/-/g, '')}`;
528
+ try {
529
+ fs.cpSync(outputDir, backupDir, { recursive: true });
530
+ logger_1.default.info(`Backup of output directory created at ${backupDir}`);
531
+ }
532
+ catch (backupError) {
533
+ logger_1.default.error(`Failed to create backup of output directory: ${backupError}`);
534
+ return;
535
+ }
536
+ logger_1.default.info('Moving files into database...');
537
+ const migrationPromises = fileNames.map(async (fileName) => {
538
+ const fileData = readResult_fileSystem(fileName);
539
+ if (fileData) {
540
+ await writeResultsToDatabase(fileData.result.results, fileData.result.config, filenameToDate(fileName));
541
+ logger_1.default.debug(`Migrated ${fileName} to database.`);
542
+ try {
543
+ fs.unlinkSync(path.join(outputDir, fileName));
544
+ }
545
+ catch (err) {
546
+ logger_1.default.warn(`Failed to delete ${fileName} after migration: ${err}`);
547
+ }
548
+ }
549
+ else {
550
+ logger_1.default.warn(`Failed to migrate result ${fileName} due to read error.`);
551
+ }
552
+ });
553
+ await Promise.all(migrationPromises);
554
+ try {
555
+ fs.unlinkSync(getLatestResultsPath());
556
+ }
557
+ catch (err) {
558
+ logger_1.default.warn(`Failed to delete latest.json: ${err}`);
559
+ }
560
+ logger_1.default.info('Migration complete. Please restart your web server if it is running.');
561
+ }
562
+ exports.migrateResultsFromFileSystemToDatabase = migrateResultsFromFileSystemToDatabase;
427
563
  const RESULT_HISTORY_LENGTH = parseInt(process.env.RESULT_HISTORY_LENGTH || '', 10) || 100;
428
- function cleanupOldResults(remaining = RESULT_HISTORY_LENGTH) {
429
- const sortedFilenames = listPreviousResultFilenames();
564
+ function cleanupOldFileResults(remaining = RESULT_HISTORY_LENGTH) {
565
+ const sortedFilenames = listPreviousResultFilenames_fileSystem();
430
566
  for (let i = 0; i < sortedFilenames.length - remaining; i++) {
431
567
  fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFilenames[i]));
432
568
  }
433
569
  }
434
- exports.cleanupOldResults = cleanupOldResults;
570
+ exports.cleanupOldFileResults = cleanupOldFileResults;
435
571
  function filenameToDate(filename) {
436
572
  const dateString = filename.slice('eval-'.length, filename.length - '.json'.length);
437
573
  // Replace hyphens with colons where necessary (Windows compatibility).
@@ -439,27 +575,67 @@ function filenameToDate(filename) {
439
575
  const timePart = dateParts[1].replace(/-/g, ':');
440
576
  const formattedDateString = `${dateParts[0]}T${timePart}`;
441
577
  const date = new Date(formattedDateString);
578
+ return date;
579
+ /*
442
580
  return date.toLocaleDateString('en-US', {
443
- year: 'numeric',
444
- month: 'long',
445
- day: 'numeric',
446
- hour: '2-digit',
447
- minute: '2-digit',
448
- second: '2-digit',
449
- timeZoneName: 'short',
581
+ year: 'numeric',
582
+ month: 'long',
583
+ day: 'numeric',
584
+ hour: '2-digit',
585
+ minute: '2-digit',
586
+ second: '2-digit',
587
+ timeZoneName: 'short',
450
588
  });
589
+ */
451
590
  }
452
591
  exports.filenameToDate = filenameToDate;
453
592
  function dateToFilename(date) {
454
593
  return `eval-${date.toISOString().replace(/:/g, '-')}.json`;
455
594
  }
456
595
  exports.dateToFilename = dateToFilename;
457
- function readResult(name) {
596
+ async function readResult(id) {
597
+ const db = (0, database_1.getDb)();
598
+ try {
599
+ const evalResult = await db
600
+ .select({
601
+ id: database_1.evals.id,
602
+ createdAt: database_1.evals.createdAt,
603
+ results: database_1.evals.results,
604
+ config: database_1.evals.config,
605
+ })
606
+ .from(database_1.evals)
607
+ .where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
608
+ .execute();
609
+ if (evalResult.length === 0) {
610
+ return undefined;
611
+ }
612
+ const { id: resultId, createdAt, results, config } = evalResult[0];
613
+ const result = {
614
+ version: 3,
615
+ createdAt: new Date(createdAt).toISOString().slice(0, 10),
616
+ results,
617
+ config,
618
+ };
619
+ return {
620
+ id: resultId,
621
+ result,
622
+ createdAt: new Date(createdAt),
623
+ };
624
+ }
625
+ catch (err) {
626
+ logger_1.default.error(`Failed to read result with ID ${id} from database:\n${err}`);
627
+ }
628
+ }
629
+ exports.readResult = readResult;
630
+ /**
631
+ * @deprecated Used only for migration to sqlite
632
+ */
633
+ function readResult_fileSystem(name) {
458
634
  const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
459
635
  const resultsPath = path.join(resultsDirectory, name);
460
636
  try {
461
637
  const result = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
462
- const createdAt = new Date(filenameToDate(name));
638
+ const createdAt = filenameToDate(name);
463
639
  return {
464
640
  id: sha256(JSON.stringify(result.config)),
465
641
  result,
@@ -470,35 +646,70 @@ function readResult(name) {
470
646
  logger_1.default.error(`Failed to read results from ${resultsPath}:\n${err}`);
471
647
  }
472
648
  }
473
- exports.readResult = readResult;
474
- function updateResult(filename, newConfig, newTable) {
475
- const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
476
- const safeFilename = path.basename(filename);
477
- const resultsPath = path.join(resultsDirectory, safeFilename);
649
+ exports.readResult_fileSystem = readResult_fileSystem;
650
+ async function updateResult(id, newConfig, newTable) {
651
+ const db = (0, database_1.getDb)();
478
652
  try {
479
- const evalData = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
653
+ // Fetch the existing eval data from the database
654
+ const existingEval = await db
655
+ .select({
656
+ config: database_1.evals.config,
657
+ results: database_1.evals.results,
658
+ })
659
+ .from(database_1.evals)
660
+ .where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
661
+ .limit(1)
662
+ .all();
663
+ if (existingEval.length === 0) {
664
+ logger_1.default.error(`Eval with ID ${id} not found.`);
665
+ return;
666
+ }
667
+ const evalData = existingEval[0];
480
668
  if (newConfig) {
481
669
  evalData.config = newConfig;
482
670
  }
483
671
  if (newTable) {
484
672
  evalData.results.table = newTable;
485
673
  }
486
- resultsCache[safeFilename] = evalData;
487
- fs.writeFileSync(resultsPath, JSON.stringify(evalData, null, 2));
488
- logger_1.default.info(`Updated eval at ${resultsPath}`);
489
- const resultFilenames = listPreviousResultFilenames();
490
- if (filename === resultFilenames[resultFilenames.length - 1]) {
491
- // Overwite latest.json too
492
- fs.copyFileSync(resultsPath, getLatestResultsPath());
493
- }
674
+ await db
675
+ .update(database_1.evals)
676
+ .set({
677
+ description: evalData.config.description,
678
+ config: evalData.config,
679
+ results: evalData.results,
680
+ })
681
+ .where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
682
+ .run();
683
+ logger_1.default.info(`Updated eval with ID ${id}`);
494
684
  }
495
685
  catch (err) {
496
- logger_1.default.error(`Failed to update eval at ${resultsPath}:\n${err}`);
686
+ logger_1.default.error(`Failed to update eval with ID ${id}:\n${err}`);
497
687
  }
498
688
  }
499
689
  exports.updateResult = updateResult;
500
- function readLatestResults() {
501
- return JSON.parse(fs.readFileSync(getLatestResultsPath(), 'utf-8'));
690
+ async function readLatestResults() {
691
+ const db = (0, database_1.getDb)();
692
+ const latestResults = await db
693
+ .select({
694
+ id: database_1.evals.id,
695
+ createdAt: database_1.evals.createdAt,
696
+ description: database_1.evals.description,
697
+ results: database_1.evals.results,
698
+ config: database_1.evals.config,
699
+ })
700
+ .from(database_1.evals)
701
+ .orderBy((0, drizzle_orm_1.desc)(database_1.evals.createdAt))
702
+ .limit(1);
703
+ if (!latestResults || latestResults.length === 0) {
704
+ return undefined;
705
+ }
706
+ const latestResult = latestResults[0];
707
+ return {
708
+ version: 3,
709
+ createdAt: new Date(latestResult.createdAt).toISOString(),
710
+ results: latestResult.results,
711
+ config: latestResult.config,
712
+ };
502
713
  }
503
714
  exports.readLatestResults = readLatestResults;
504
715
  function getPromptsForTestCases(testCases) {
@@ -523,26 +734,39 @@ function getPrompts() {
523
734
  return getPromptsWithPredicate(() => true);
524
735
  }
525
736
  exports.getPrompts = getPrompts;
526
- function getPromptsWithPredicate(predicate) {
527
- const resultFilenames = listPreviousResultFilenames();
737
+ async function getPromptsWithPredicate(predicate) {
738
+ // TODO(ian): Make this use a proper database query
739
+ const db = (0, database_1.getDb)();
740
+ const evals_ = await db
741
+ .select({
742
+ id: database_1.evals.id,
743
+ createdAt: database_1.evals.createdAt,
744
+ results: database_1.evals.results,
745
+ config: database_1.evals.config,
746
+ })
747
+ .from(database_1.evals)
748
+ .limit(100)
749
+ .all();
528
750
  const groupedPrompts = {};
529
- for (const fileName of resultFilenames) {
530
- const file = readResult(fileName);
531
- if (!file) {
532
- continue;
533
- }
534
- const { result, createdAt } = file;
535
- if (result && predicate(result)) {
536
- for (const prompt of result.results.table.head.prompts) {
537
- const evalId = sha256(JSON.stringify(result.config));
751
+ for (const eval_ of evals_) {
752
+ const createdAt = new Date(eval_.createdAt).toISOString();
753
+ const resultWrapper = {
754
+ version: 3,
755
+ createdAt,
756
+ results: eval_.results,
757
+ config: eval_.config,
758
+ };
759
+ if (predicate(resultWrapper)) {
760
+ for (const prompt of resultWrapper.results.table.head.prompts) {
538
761
  const promptId = sha256(prompt.raw);
539
- const datasetId = result.config.tests ? sha256(JSON.stringify(result.config.tests)) : '-';
762
+ const datasetId = resultWrapper.config.tests
763
+ ? sha256(JSON.stringify(resultWrapper.config.tests))
764
+ : '-';
540
765
  if (promptId in groupedPrompts) {
541
766
  groupedPrompts[promptId].recentEvalDate = new Date(Math.max(groupedPrompts[promptId].recentEvalDate.getTime(), new Date(createdAt).getTime()));
542
767
  groupedPrompts[promptId].count += 1;
543
768
  groupedPrompts[promptId].evals.push({
544
- id: evalId,
545
- filePath: fileName,
769
+ id: eval_.id,
546
770
  datasetId,
547
771
  metrics: prompt.metrics,
548
772
  });
@@ -553,12 +777,10 @@ function getPromptsWithPredicate(predicate) {
553
777
  id: promptId,
554
778
  prompt,
555
779
  recentEvalDate: new Date(createdAt),
556
- recentEvalId: evalId,
557
- recentEvalFilepath: fileName,
780
+ recentEvalId: eval_.id,
558
781
  evals: [
559
782
  {
560
- id: evalId,
561
- filePath: fileName,
783
+ id: eval_.id,
562
784
  datasetId,
563
785
  metrics: prompt.metrics,
564
786
  },
@@ -571,31 +793,42 @@ function getPromptsWithPredicate(predicate) {
571
793
  return Object.values(groupedPrompts);
572
794
  }
573
795
  exports.getPromptsWithPredicate = getPromptsWithPredicate;
574
- function getTestCases() {
796
+ async function getTestCases() {
575
797
  return getTestCasesWithPredicate(() => true);
576
798
  }
577
799
  exports.getTestCases = getTestCases;
578
- function getTestCasesWithPredicate(predicate) {
579
- const resultFilenames = listPreviousResultFilenames();
800
+ async function getTestCasesWithPredicate(predicate) {
801
+ const db = (0, database_1.getDb)();
802
+ const evals_ = await db
803
+ .select({
804
+ id: database_1.evals.id,
805
+ createdAt: database_1.evals.createdAt,
806
+ results: database_1.evals.results,
807
+ config: database_1.evals.config,
808
+ })
809
+ .from(database_1.evals)
810
+ .limit(100)
811
+ .all();
580
812
  const groupedTestCases = {};
581
- for (const fileName of resultFilenames) {
582
- const file = readResult(fileName);
583
- if (!file) {
584
- continue;
585
- }
586
- const { result, createdAt } = file;
587
- const testCases = result?.config?.tests;
588
- if (testCases && predicate(result)) {
589
- const evalId = sha256(JSON.stringify(result.config));
813
+ for (const eval_ of evals_) {
814
+ const createdAt = new Date(eval_.createdAt).toISOString();
815
+ const resultWrapper = {
816
+ version: 3,
817
+ createdAt,
818
+ results: eval_.results,
819
+ config: eval_.config,
820
+ };
821
+ const testCases = resultWrapper.config.tests;
822
+ if (testCases && predicate(resultWrapper)) {
823
+ const evalId = eval_.id;
590
824
  const datasetId = sha256(JSON.stringify(testCases));
591
825
  if (datasetId in groupedTestCases) {
592
- groupedTestCases[datasetId].recentEvalDate = new Date(Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), new Date(createdAt).getTime()));
826
+ groupedTestCases[datasetId].recentEvalDate = new Date(Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), eval_.createdAt));
593
827
  groupedTestCases[datasetId].count += 1;
594
- const newPrompts = result.results.table.head.prompts.map((prompt) => ({
828
+ const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
595
829
  id: sha256(prompt.raw),
596
830
  prompt,
597
831
  evalId,
598
- evalFilepath: fileName,
599
832
  }));
600
833
  const promptsById = {};
601
834
  for (const prompt of groupedTestCases[datasetId].prompts.concat(newPrompts)) {
@@ -606,11 +839,10 @@ function getTestCasesWithPredicate(predicate) {
606
839
  groupedTestCases[datasetId].prompts = Object.values(promptsById);
607
840
  }
608
841
  else {
609
- const newPrompts = result.results.table.head.prompts.map((prompt) => ({
610
- id: (0, crypto_1.createHash)('sha256').update(prompt.raw).digest('hex'),
842
+ const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
843
+ id: sha256(prompt.raw),
611
844
  prompt,
612
845
  evalId,
613
- evalFilepath: fileName,
614
846
  }));
615
847
  const promptsById = {};
616
848
  for (const prompt of newPrompts) {
@@ -624,7 +856,6 @@ function getTestCasesWithPredicate(predicate) {
624
856
  testCases,
625
857
  recentEvalDate: new Date(createdAt),
626
858
  recentEvalId: evalId,
627
- recentEvalFilepath: fileName,
628
859
  prompts: Object.values(promptsById),
629
860
  };
630
861
  }
@@ -633,8 +864,8 @@ function getTestCasesWithPredicate(predicate) {
633
864
  return Object.values(groupedTestCases);
634
865
  }
635
866
  exports.getTestCasesWithPredicate = getTestCasesWithPredicate;
636
- function getPromptFromHash(hash) {
637
- const prompts = getPrompts();
867
+ async function getPromptFromHash(hash) {
868
+ const prompts = await getPrompts();
638
869
  for (const prompt of prompts) {
639
870
  if (prompt.id.startsWith(hash)) {
640
871
  return prompt;
@@ -643,8 +874,8 @@ function getPromptFromHash(hash) {
643
874
  return undefined;
644
875
  }
645
876
  exports.getPromptFromHash = getPromptFromHash;
646
- function getDatasetFromHash(hash) {
647
- const datasets = getTestCases();
877
+ async function getDatasetFromHash(hash) {
878
+ const datasets = await getTestCases();
648
879
  for (const dataset of datasets) {
649
880
  if (dataset.id.startsWith(hash)) {
650
881
  return dataset;
@@ -653,13 +884,13 @@ function getDatasetFromHash(hash) {
653
884
  return undefined;
654
885
  }
655
886
  exports.getDatasetFromHash = getDatasetFromHash;
656
- function getEvals() {
887
+ async function getEvals() {
657
888
  return getEvalsWithPredicate(() => true);
658
889
  }
659
890
  exports.getEvals = getEvals;
660
- function getEvalFromHash(hash) {
661
- const evals = getEvals();
662
- for (const eval_ of evals) {
891
+ async function getEvalFromHash(hash) {
892
+ const evals_ = await getEvals();
893
+ for (const eval_ of evals_) {
663
894
  if (eval_.id.startsWith(hash)) {
664
895
  return eval_;
665
896
  }
@@ -667,23 +898,34 @@ function getEvalFromHash(hash) {
667
898
  return undefined;
668
899
  }
669
900
  exports.getEvalFromHash = getEvalFromHash;
670
- function getEvalsWithPredicate(predicate) {
901
+ async function getEvalsWithPredicate(predicate) {
902
+ const db = (0, database_1.getDb)();
903
+ const evals_ = await db
904
+ .select({
905
+ id: database_1.evals.id,
906
+ createdAt: database_1.evals.createdAt,
907
+ results: database_1.evals.results,
908
+ config: database_1.evals.config,
909
+ })
910
+ .from(database_1.evals)
911
+ .limit(100)
912
+ .all();
671
913
  const ret = [];
672
- const resultsFilenames = listPreviousResultFilenames();
673
- for (const fileName of resultsFilenames) {
674
- const file = readResult(fileName);
675
- if (!file) {
676
- continue;
677
- }
678
- const { result, createdAt } = file;
679
- if (result && predicate(result)) {
680
- const evalId = sha256(fileName + ':' + JSON.stringify(result.config));
914
+ for (const eval_ of evals_) {
915
+ const createdAt = new Date(eval_.createdAt).toISOString();
916
+ const resultWrapper = {
917
+ version: 3,
918
+ createdAt: createdAt,
919
+ results: eval_.results,
920
+ config: eval_.config,
921
+ };
922
+ if (predicate(resultWrapper)) {
923
+ const evalId = eval_.id;
681
924
  ret.push({
682
925
  id: evalId,
683
- filePath: fileName,
684
- date: createdAt,
685
- config: result.config,
686
- results: result.results,
926
+ date: new Date(eval_.createdAt),
927
+ config: eval_.config,
928
+ results: eval_.results,
687
929
  });
688
930
  }
689
931
  }