promptfoo 0.46.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +2 -0
  2. package/dist/drizzle/0000_lush_hellion.sql +36 -0
  3. package/dist/drizzle/0001_wide_calypso.sql +3 -0
  4. package/dist/drizzle/meta/0000_snapshot.json +244 -0
  5. package/dist/drizzle/meta/0001_snapshot.json +237 -0
  6. package/dist/drizzle/meta/_journal.json +20 -0
  7. package/dist/package.json +10 -3
  8. package/dist/src/__mocks__/database.d.ts +5 -0
  9. package/dist/src/__mocks__/database.d.ts.map +1 -0
  10. package/dist/src/__mocks__/database.js +27 -0
  11. package/dist/src/__mocks__/database.js.map +1 -0
  12. package/dist/src/assertions.d.ts.map +1 -1
  13. package/dist/src/assertions.js +51 -42
  14. package/dist/src/assertions.js.map +1 -1
  15. package/dist/src/commands/list.d.ts.map +1 -1
  16. package/dist/src/commands/list.js +4 -5
  17. package/dist/src/commands/list.js.map +1 -1
  18. package/dist/src/commands/show.d.ts +1 -1
  19. package/dist/src/commands/show.d.ts.map +1 -1
  20. package/dist/src/commands/show.js +7 -7
  21. package/dist/src/commands/show.js.map +1 -1
  22. package/dist/src/csv.d.ts +1 -1
  23. package/dist/src/csv.d.ts.map +1 -1
  24. package/dist/src/csv.js +5 -0
  25. package/dist/src/csv.js.map +1 -1
  26. package/dist/src/database.d.ts +238 -0
  27. package/dist/src/database.d.ts.map +1 -0
  28. package/dist/src/database.js +141 -0
  29. package/dist/src/database.js.map +1 -0
  30. package/dist/src/evaluator.d.ts.map +1 -1
  31. package/dist/src/evaluator.js +8 -6
  32. package/dist/src/evaluator.js.map +1 -1
  33. package/dist/src/index.d.ts.map +1 -1
  34. package/dist/src/index.js +2 -1
  35. package/dist/src/index.js.map +1 -1
  36. package/dist/src/main.js +6 -4
  37. package/dist/src/main.js.map +1 -1
  38. package/dist/src/migrate.d.ts +5 -0
  39. package/dist/src/migrate.d.ts.map +1 -0
  40. package/dist/src/migrate.js +50 -0
  41. package/dist/src/migrate.js.map +1 -0
  42. package/dist/src/prompts.d.ts.map +1 -1
  43. package/dist/src/prompts.js +3 -0
  44. package/dist/src/prompts.js.map +1 -1
  45. package/dist/src/providers/anthropic.d.ts.map +1 -1
  46. package/dist/src/providers/anthropic.js +37 -1
  47. package/dist/src/providers/anthropic.js.map +1 -1
  48. package/dist/src/providers/azureopenai.d.ts +16 -0
  49. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  50. package/dist/src/providers/azureopenai.js +6 -2
  51. package/dist/src/providers/azureopenai.js.map +1 -1
  52. package/dist/src/providers/replicate.d.ts.map +1 -1
  53. package/dist/src/providers/replicate.js +16 -1
  54. package/dist/src/providers/replicate.js.map +1 -1
  55. package/dist/src/providers.d.ts.map +1 -1
  56. package/dist/src/providers.js +2 -1
  57. package/dist/src/providers.js.map +1 -1
  58. package/dist/src/python/wrapper.d.ts +9 -1
  59. package/dist/src/python/wrapper.d.ts.map +1 -1
  60. package/dist/src/python/wrapper.js +32 -4
  61. package/dist/src/python/wrapper.js.map +1 -1
  62. package/dist/src/types.d.ts +5 -6
  63. package/dist/src/types.d.ts.map +1 -1
  64. package/dist/src/util.d.ts +52 -19
  65. package/dist/src/util.d.ts.map +1 -1
  66. package/dist/src/util.js +378 -125
  67. package/dist/src/util.js.map +1 -1
  68. package/dist/src/web/nextui/404/index.html +1 -1
  69. package/dist/src/web/nextui/404.html +1 -1
  70. package/dist/src/web/nextui/_next/static/chunks/952-ede6b209625d42a2.js +1 -0
  71. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-ad55f89d622ef8e7.js +1 -0
  72. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-01ab4878803b7068.js +1 -0
  73. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-9c163111247d8da5.js +1 -0
  74. package/dist/src/web/nextui/api/results +1 -1
  75. package/dist/src/web/nextui/auth/login/index.html +1 -1
  76. package/dist/src/web/nextui/auth/login/index.txt +3 -3
  77. package/dist/src/web/nextui/auth/signup/index.html +1 -1
  78. package/dist/src/web/nextui/auth/signup/index.txt +3 -3
  79. package/dist/src/web/nextui/datasets/index.html +1 -1
  80. package/dist/src/web/nextui/datasets/index.txt +3 -3
  81. package/dist/src/web/nextui/eval/index.html +1 -1
  82. package/dist/src/web/nextui/eval/index.txt +3 -3
  83. package/dist/src/web/nextui/index.html +1 -1
  84. package/dist/src/web/nextui/index.txt +2 -2
  85. package/dist/src/web/nextui/prompts/index.html +1 -1
  86. package/dist/src/web/nextui/prompts/index.txt +3 -3
  87. package/dist/src/web/nextui/setup/index.html +1 -1
  88. package/dist/src/web/nextui/setup/index.txt +3 -3
  89. package/dist/src/web/server.d.ts +1 -1
  90. package/dist/src/web/server.d.ts.map +1 -1
  91. package/dist/src/web/server.js +25 -43
  92. package/dist/src/web/server.js.map +1 -1
  93. package/package.json +10 -3
  94. package/dist/src/web/nextui/_next/static/chunks/952-1367984f076e3060.js +0 -1
  95. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-44ab188f3b846712.js +0 -1
  96. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-0bf3409d6a6bfa22.js +0 -1
  97. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-83c7e62787113081.js +0 -1
  98. /package/dist/src/web/nextui/_next/static/{Np8tRhZUzimy-v_hu8F8W → 8yxA5JzS0wXTxJptFRKTo}/_buildManifest.js +0 -0
  99. /package/dist/src/web/nextui/_next/static/{Np8tRhZUzimy-v_hu8F8W → 8yxA5JzS0wXTxJptFRKTo}/_ssgManifest.js +0 -0
  100. /package/dist/src/web/nextui/_next/static/chunks/{82-ca0360e473d81167.js → 82-6e8c9ebc91ff932b.js} +0 -0
package/dist/src/util.js CHANGED
@@ -26,7 +26,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
26
26
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.transformOutput = exports.printBorder = exports.getNunjucksEngine = exports.readFilters = exports.getEvalsWithPredicate = exports.getEvalFromHash = exports.getEvals = exports.getDatasetFromHash = exports.getPromptFromHash = exports.getTestCasesWithPredicate = exports.getTestCases = exports.getPromptsWithPredicate = exports.getPrompts = exports.sha256 = exports.getPromptsForTestCasesHash = exports.getPromptsForTestCases = exports.readLatestResults = exports.updateResult = exports.readResult = exports.dateToFilename = exports.filenameToDate = exports.cleanupOldResults = exports.listPreviousResults = exports.listPreviousResultFilenames = exports.writeLatestResults = exports.getLatestResultsPath = exports.setConfigDirectoryPath = exports.getConfigDirectoryPath = exports.writeOutput = exports.writeMultipleOutputs = exports.readConfigs = exports.readConfig = exports.dereferenceConfig = exports.maybeReadConfig = exports.maybeRecordFirstRun = exports.readGlobalConfig = exports.resetGlobalConfig = void 0;
29
+ exports.transformOutput = exports.printBorder = exports.getNunjucksEngine = exports.readFilters = exports.getEvalsWithPredicate = exports.getEvalFromHash = exports.getEvals = exports.getDatasetFromHash = exports.getPromptFromHash = exports.getTestCasesWithPredicate = exports.getTestCases = exports.getPromptsWithPredicate = exports.getPrompts = exports.sha256 = exports.getPromptsForTestCasesHash = exports.getPromptsForTestCases = exports.readLatestResults = exports.updateResult = exports.readResult_fileSystem = exports.readResult = exports.dateToFilename = exports.filenameToDate = exports.cleanupOldFileResults = exports.migrateResultsFromFileSystemToDatabase = exports.listPreviousResults_fileSystem = exports.listPreviousResultFilenames_fileSystem = exports.listPreviousResults = exports.writeResultsToDatabase = exports.getLatestResultsPath = exports.setConfigDirectoryPath = exports.getConfigDirectoryPath = exports.writeOutput = exports.writeMultipleOutputs = exports.readConfigs = exports.readConfig = exports.dereferenceConfig = exports.maybeReadConfig = exports.maybeRecordFirstRun = exports.readGlobalConfig = exports.resetGlobalConfig = void 0;
30
30
  const fs = __importStar(require("fs"));
31
31
  const path = __importStar(require("path"));
32
32
  const os = __importStar(require("os"));
@@ -37,9 +37,12 @@ const nunjucks_1 = __importDefault(require("nunjucks"));
37
37
  const js_yaml_1 = __importDefault(require("js-yaml"));
38
38
  const sync_1 = require("csv-stringify/sync");
39
39
  const glob_1 = require("glob");
40
+ const drizzle_orm_1 = require("drizzle-orm");
40
41
  const logger_1 = __importDefault(require("./logger"));
41
42
  const esm_1 = require("./esm");
42
43
  const testCases_1 = require("./testCases");
44
+ const database_1 = require("./database");
45
+ const migrate_1 = require("./migrate");
43
46
  let globalConfigCache = null;
44
47
  function resetGlobalConfig() {
45
48
  globalConfigCache = null;
@@ -182,6 +185,12 @@ async function readConfig(configPath) {
182
185
  }
183
186
  }
184
187
  exports.readConfig = readConfig;
188
+ /**
189
+ * Reads multiple configuration files and combines them into a single UnifiedConfig.
190
+ *
191
+ * @param {string[]} configPaths - An array of paths to configuration files. Supports glob patterns.
192
+ * @returns {Promise<UnifiedConfig>} A promise that resolves to a unified configuration object.
193
+ */
185
194
  async function readConfigs(configPaths) {
186
195
  const configs = [];
187
196
  for (const configPath of configPaths) {
@@ -230,16 +239,18 @@ async function readConfigs(configPaths) {
230
239
  }
231
240
  return relativePath;
232
241
  };
242
+ const seenPrompts = new Set();
233
243
  configs.forEach((config, idx) => {
234
244
  if (typeof config.prompts === 'string') {
235
245
  (0, tiny_invariant_1.default)(Array.isArray(prompts), 'Cannot mix string and map-type prompts');
236
- config.prompts = makeAbsolute(configPaths[idx], config.prompts);
237
- prompts.push(config.prompts);
246
+ const absolutePrompt = makeAbsolute(configPaths[idx], config.prompts);
247
+ seenPrompts.add(absolutePrompt);
238
248
  }
239
249
  else if (Array.isArray(config.prompts)) {
240
250
  (0, tiny_invariant_1.default)(Array.isArray(prompts), 'Cannot mix configs with map and array-type prompts');
241
- config.prompts = config.prompts.map((prompt) => makeAbsolute(configPaths[idx], prompt));
242
- prompts.push(...config.prompts);
251
+ config.prompts
252
+ .map((prompt) => makeAbsolute(configPaths[idx], prompt))
253
+ .forEach((prompt) => seenPrompts.add(prompt));
243
254
  }
244
255
  else {
245
256
  // Object format such as { 'prompts/prompt1.txt': 'foo', 'prompts/prompt2.txt': 'bar' }
@@ -247,6 +258,9 @@ async function readConfigs(configPaths) {
247
258
  prompts = { ...prompts, ...config.prompts };
248
259
  }
249
260
  });
261
+ if (Array.isArray(prompts)) {
262
+ prompts.push(...Array.from(seenPrompts));
263
+ }
250
264
  // Combine all configs into a single UnifiedConfig
251
265
  const combinedConfig = {
252
266
  description: configs.map((config) => config.description).join(', '),
@@ -345,44 +359,114 @@ function setConfigDirectoryPath(newPath) {
345
359
  configDirectoryPath = newPath;
346
360
  }
347
361
  exports.setConfigDirectoryPath = setConfigDirectoryPath;
362
+ /**
363
+ * TODO(ian): Remove this
364
+ * @deprecated Use readLatestResults directly instead.
365
+ */
348
366
  function getLatestResultsPath() {
349
367
  return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
350
368
  }
351
369
  exports.getLatestResultsPath = getLatestResultsPath;
352
- function writeLatestResults(results, config) {
353
- const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
354
- // Replace hyphens with colons (Windows compatibility).
355
- const filename = dateToFilename(new Date());
356
- const newResultsPath = path.join(resultsDirectory, filename);
357
- const latestResultsPath = getLatestResultsPath();
370
+ async function writeResultsToDatabase(results, config, createdAt) {
371
+ createdAt = createdAt || new Date();
372
+ const evalId = `eval-${createdAt.toISOString().slice(0, 19)}`;
373
+ const db = (0, database_1.getDb)();
374
+ const promises = [];
375
+ promises.push(db
376
+ .insert(database_1.evals)
377
+ .values({
378
+ id: evalId,
379
+ createdAt: createdAt.getTime(),
380
+ description: config.description,
381
+ config,
382
+ results,
383
+ })
384
+ .onConflictDoNothing()
385
+ .run());
386
+ logger_1.default.debug(`Inserting eval ${evalId}`);
387
+ // Record prompt relation
388
+ for (const prompt of results.table.head.prompts) {
389
+ const promptId = sha256(prompt.display);
390
+ promises.push(db
391
+ .insert(database_1.prompts)
392
+ .values({
393
+ id: promptId,
394
+ prompt: prompt.display,
395
+ })
396
+ .onConflictDoNothing()
397
+ .run());
398
+ promises.push(db
399
+ .insert(database_1.evalsToPrompts)
400
+ .values({
401
+ evalId,
402
+ promptId,
403
+ })
404
+ .onConflictDoNothing()
405
+ .run());
406
+ logger_1.default.debug(`Inserting prompt ${promptId}`);
407
+ }
408
+ // Record dataset relation
409
+ const datasetId = sha256(JSON.stringify(config.tests || []));
410
+ promises.push(db
411
+ .insert(database_1.datasets)
412
+ .values({
413
+ id: datasetId,
414
+ tests: config.tests,
415
+ })
416
+ .onConflictDoNothing()
417
+ .run());
418
+ promises.push(db
419
+ .insert(database_1.evalsToDatasets)
420
+ .values({
421
+ evalId,
422
+ datasetId,
423
+ })
424
+ .onConflictDoNothing()
425
+ .run());
426
+ logger_1.default.debug(`Inserting dataset ${datasetId}`);
427
+ logger_1.default.debug(`Awaiting ${promises.length} promises to database...`);
428
+ await Promise.all(promises);
429
+ // "touch" db signal path
430
+ const filePath = (0, database_1.getDbSignalPath)();
358
431
  try {
359
- fs.mkdirSync(resultsDirectory, { recursive: true });
360
- const resultsFileData = {
361
- version: 2,
362
- createdAt: new Date().toISOString(),
363
- config,
364
- results,
365
- };
366
- fs.writeFileSync(newResultsPath, JSON.stringify(resultsFileData, null, 2));
367
- // Use copy instead of symlink to avoid issues with Windows permissions.
368
- try {
369
- // Backwards compatibility: delete old symlink.
370
- fs.unlinkSync(latestResultsPath);
371
- }
372
- catch { }
373
- fs.copyFileSync(newResultsPath, latestResultsPath);
374
- cleanupOldResults();
375
- return filename;
432
+ const now = new Date();
433
+ fs.utimesSync(filePath, now, now);
376
434
  }
377
435
  catch (err) {
378
- logger_1.default.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
379
- return null;
436
+ fs.closeSync(fs.openSync(filePath, 'w'));
380
437
  }
438
+ return evalId;
381
439
  }
382
- exports.writeLatestResults = writeLatestResults;
383
- const resultsCache = {};
384
- function listPreviousResultFilenames() {
440
+ exports.writeResultsToDatabase = writeResultsToDatabase;
441
+ /**
442
+ *
443
+ * @returns Last 100 evals in descending order.
444
+ */
445
+ function listPreviousResults() {
446
+ const db = (0, database_1.getDb)();
447
+ const results = db
448
+ .select({
449
+ name: database_1.evals.id,
450
+ description: database_1.evals.description,
451
+ })
452
+ .from(database_1.evals)
453
+ .orderBy((0, drizzle_orm_1.desc)(database_1.evals.createdAt))
454
+ .limit(100)
455
+ .all();
456
+ return results.map((result) => ({
457
+ evalId: result.name,
458
+ description: result.description,
459
+ }));
460
+ }
461
+ exports.listPreviousResults = listPreviousResults;
462
+ /**
463
+ * @deprecated Used only for migration to sqlite
464
+ */
465
+ function listPreviousResultFilenames_fileSystem() {
385
466
  const directory = path.join(getConfigDirectoryPath(), 'output');
467
+ if (!fs.existsSync(directory)) {
468
+ return [];
469
+ }
386
470
  const files = fs.readdirSync(directory);
387
471
  const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
388
472
  return resultsFiles.sort((a, b) => {
@@ -391,10 +475,17 @@ function listPreviousResultFilenames() {
391
475
  return statA.birthtime.getTime() - statB.birthtime.getTime(); // sort in ascending order
392
476
  });
393
477
  }
394
- exports.listPreviousResultFilenames = listPreviousResultFilenames;
395
- function listPreviousResults() {
478
+ exports.listPreviousResultFilenames_fileSystem = listPreviousResultFilenames_fileSystem;
479
+ const resultsCache = {};
480
+ /**
481
+ * @deprecated Used only for migration to sqlite
482
+ */
483
+ function listPreviousResults_fileSystem() {
396
484
  const directory = path.join(getConfigDirectoryPath(), 'output');
397
- const sortedFiles = listPreviousResultFilenames();
485
+ if (!fs.existsSync(directory)) {
486
+ return [];
487
+ }
488
+ const sortedFiles = listPreviousResultFilenames_fileSystem();
398
489
  return sortedFiles.map((fileName) => {
399
490
  if (!resultsCache[fileName]) {
400
491
  try {
@@ -412,15 +503,71 @@ function listPreviousResults() {
412
503
  };
413
504
  });
414
505
  }
415
- exports.listPreviousResults = listPreviousResults;
506
+ exports.listPreviousResults_fileSystem = listPreviousResults_fileSystem;
507
+ let attemptedMigration = false;
508
+ async function migrateResultsFromFileSystemToDatabase() {
509
+ if (attemptedMigration) {
510
+ // TODO(ian): Record this bit in the database.
511
+ return;
512
+ }
513
+ // First run db migrations
514
+ logger_1.default.debug('Running db migrations...');
515
+ await (0, migrate_1.runDbMigrations)();
516
+ const fileNames = listPreviousResultFilenames_fileSystem();
517
+ if (fileNames.length === 0) {
518
+ return;
519
+ }
520
+ logger_1.default.info(`🔁 Migrating ${fileNames.length} flat files to local database.`);
521
+ logger_1.default.info('This is a one-time operation and may take a minute...');
522
+ attemptedMigration = true;
523
+ const outputDir = path.join(getConfigDirectoryPath(), 'output');
524
+ const backupDir = `${outputDir}-backup-${new Date()
525
+ .toISOString()
526
+ .slice(0, 10)
527
+ .replace(/-/g, '')}`;
528
+ try {
529
+ fs.cpSync(outputDir, backupDir, { recursive: true });
530
+ logger_1.default.info(`Backup of output directory created at ${backupDir}`);
531
+ }
532
+ catch (backupError) {
533
+ logger_1.default.error(`Failed to create backup of output directory: ${backupError}`);
534
+ return;
535
+ }
536
+ logger_1.default.info('Moving files into database...');
537
+ const migrationPromises = fileNames.map(async (fileName) => {
538
+ const fileData = readResult_fileSystem(fileName);
539
+ if (fileData) {
540
+ await writeResultsToDatabase(fileData.result.results, fileData.result.config, filenameToDate(fileName));
541
+ logger_1.default.debug(`Migrated ${fileName} to database.`);
542
+ try {
543
+ fs.unlinkSync(path.join(outputDir, fileName));
544
+ }
545
+ catch (err) {
546
+ logger_1.default.warn(`Failed to delete ${fileName} after migration: ${err}`);
547
+ }
548
+ }
549
+ else {
550
+ logger_1.default.warn(`Failed to migrate result ${fileName} due to read error.`);
551
+ }
552
+ });
553
+ await Promise.all(migrationPromises);
554
+ try {
555
+ fs.unlinkSync(getLatestResultsPath());
556
+ }
557
+ catch (err) {
558
+ logger_1.default.warn(`Failed to delete latest.json: ${err}`);
559
+ }
560
+ logger_1.default.info('Migration complete. Please restart your web server if it is running.');
561
+ }
562
+ exports.migrateResultsFromFileSystemToDatabase = migrateResultsFromFileSystemToDatabase;
416
563
  const RESULT_HISTORY_LENGTH = parseInt(process.env.RESULT_HISTORY_LENGTH || '', 10) || 100;
417
- function cleanupOldResults(remaining = RESULT_HISTORY_LENGTH) {
418
- const sortedFilenames = listPreviousResultFilenames();
564
+ function cleanupOldFileResults(remaining = RESULT_HISTORY_LENGTH) {
565
+ const sortedFilenames = listPreviousResultFilenames_fileSystem();
419
566
  for (let i = 0; i < sortedFilenames.length - remaining; i++) {
420
567
  fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFilenames[i]));
421
568
  }
422
569
  }
423
- exports.cleanupOldResults = cleanupOldResults;
570
+ exports.cleanupOldFileResults = cleanupOldFileResults;
424
571
  function filenameToDate(filename) {
425
572
  const dateString = filename.slice('eval-'.length, filename.length - '.json'.length);
426
573
  // Replace hyphens with colons where necessary (Windows compatibility).
@@ -428,27 +575,67 @@ function filenameToDate(filename) {
428
575
  const timePart = dateParts[1].replace(/-/g, ':');
429
576
  const formattedDateString = `${dateParts[0]}T${timePart}`;
430
577
  const date = new Date(formattedDateString);
578
+ return date;
579
+ /*
431
580
  return date.toLocaleDateString('en-US', {
432
- year: 'numeric',
433
- month: 'long',
434
- day: 'numeric',
435
- hour: '2-digit',
436
- minute: '2-digit',
437
- second: '2-digit',
438
- timeZoneName: 'short',
581
+ year: 'numeric',
582
+ month: 'long',
583
+ day: 'numeric',
584
+ hour: '2-digit',
585
+ minute: '2-digit',
586
+ second: '2-digit',
587
+ timeZoneName: 'short',
439
588
  });
589
+ */
440
590
  }
441
591
  exports.filenameToDate = filenameToDate;
442
592
  function dateToFilename(date) {
443
593
  return `eval-${date.toISOString().replace(/:/g, '-')}.json`;
444
594
  }
445
595
  exports.dateToFilename = dateToFilename;
446
- function readResult(name) {
596
+ async function readResult(id) {
597
+ const db = (0, database_1.getDb)();
598
+ try {
599
+ const evalResult = await db
600
+ .select({
601
+ id: database_1.evals.id,
602
+ createdAt: database_1.evals.createdAt,
603
+ results: database_1.evals.results,
604
+ config: database_1.evals.config,
605
+ })
606
+ .from(database_1.evals)
607
+ .where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
608
+ .execute();
609
+ if (evalResult.length === 0) {
610
+ return undefined;
611
+ }
612
+ const { id: resultId, createdAt, results, config } = evalResult[0];
613
+ const result = {
614
+ version: 3,
615
+ createdAt: new Date(createdAt).toISOString().slice(0, 10),
616
+ results,
617
+ config,
618
+ };
619
+ return {
620
+ id: resultId,
621
+ result,
622
+ createdAt: new Date(createdAt),
623
+ };
624
+ }
625
+ catch (err) {
626
+ logger_1.default.error(`Failed to read result with ID ${id} from database:\n${err}`);
627
+ }
628
+ }
629
+ exports.readResult = readResult;
630
+ /**
631
+ * @deprecated Used only for migration to sqlite
632
+ */
633
+ function readResult_fileSystem(name) {
447
634
  const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
448
635
  const resultsPath = path.join(resultsDirectory, name);
449
636
  try {
450
637
  const result = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
451
- const createdAt = new Date(filenameToDate(name));
638
+ const createdAt = filenameToDate(name);
452
639
  return {
453
640
  id: sha256(JSON.stringify(result.config)),
454
641
  result,
@@ -459,35 +646,70 @@ function readResult(name) {
459
646
  logger_1.default.error(`Failed to read results from ${resultsPath}:\n${err}`);
460
647
  }
461
648
  }
462
- exports.readResult = readResult;
463
- function updateResult(filename, newConfig, newTable) {
464
- const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
465
- const safeFilename = path.basename(filename);
466
- const resultsPath = path.join(resultsDirectory, safeFilename);
649
+ exports.readResult_fileSystem = readResult_fileSystem;
650
+ async function updateResult(id, newConfig, newTable) {
651
+ const db = (0, database_1.getDb)();
467
652
  try {
468
- const evalData = JSON.parse(fs.readFileSync(resultsPath, 'utf-8'));
653
+ // Fetch the existing eval data from the database
654
+ const existingEval = await db
655
+ .select({
656
+ config: database_1.evals.config,
657
+ results: database_1.evals.results,
658
+ })
659
+ .from(database_1.evals)
660
+ .where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
661
+ .limit(1)
662
+ .all();
663
+ if (existingEval.length === 0) {
664
+ logger_1.default.error(`Eval with ID ${id} not found.`);
665
+ return;
666
+ }
667
+ const evalData = existingEval[0];
469
668
  if (newConfig) {
470
669
  evalData.config = newConfig;
471
670
  }
472
671
  if (newTable) {
473
672
  evalData.results.table = newTable;
474
673
  }
475
- resultsCache[safeFilename] = evalData;
476
- fs.writeFileSync(resultsPath, JSON.stringify(evalData, null, 2));
477
- logger_1.default.info(`Updated eval at ${resultsPath}`);
478
- const resultFilenames = listPreviousResultFilenames();
479
- if (filename === resultFilenames[resultFilenames.length - 1]) {
480
- // Overwite latest.json too
481
- fs.copyFileSync(resultsPath, getLatestResultsPath());
482
- }
674
+ await db
675
+ .update(database_1.evals)
676
+ .set({
677
+ description: evalData.config.description,
678
+ config: evalData.config,
679
+ results: evalData.results,
680
+ })
681
+ .where((0, drizzle_orm_1.eq)(database_1.evals.id, id))
682
+ .run();
683
+ logger_1.default.info(`Updated eval with ID ${id}`);
483
684
  }
484
685
  catch (err) {
485
- logger_1.default.error(`Failed to update eval at ${resultsPath}:\n${err}`);
686
+ logger_1.default.error(`Failed to update eval with ID ${id}:\n${err}`);
486
687
  }
487
688
  }
488
689
  exports.updateResult = updateResult;
489
- function readLatestResults() {
490
- return JSON.parse(fs.readFileSync(getLatestResultsPath(), 'utf-8'));
690
+ async function readLatestResults() {
691
+ const db = (0, database_1.getDb)();
692
+ const latestResults = await db
693
+ .select({
694
+ id: database_1.evals.id,
695
+ createdAt: database_1.evals.createdAt,
696
+ description: database_1.evals.description,
697
+ results: database_1.evals.results,
698
+ config: database_1.evals.config,
699
+ })
700
+ .from(database_1.evals)
701
+ .orderBy((0, drizzle_orm_1.desc)(database_1.evals.createdAt))
702
+ .limit(1);
703
+ if (!latestResults || latestResults.length === 0) {
704
+ return undefined;
705
+ }
706
+ const latestResult = latestResults[0];
707
+ return {
708
+ version: 3,
709
+ createdAt: new Date(latestResult.createdAt).toISOString(),
710
+ results: latestResult.results,
711
+ config: latestResult.config,
712
+ };
491
713
  }
492
714
  exports.readLatestResults = readLatestResults;
493
715
  function getPromptsForTestCases(testCases) {
@@ -512,26 +734,39 @@ function getPrompts() {
512
734
  return getPromptsWithPredicate(() => true);
513
735
  }
514
736
  exports.getPrompts = getPrompts;
515
- function getPromptsWithPredicate(predicate) {
516
- const resultFilenames = listPreviousResultFilenames();
737
+ async function getPromptsWithPredicate(predicate) {
738
+ // TODO(ian): Make this use a proper database query
739
+ const db = (0, database_1.getDb)();
740
+ const evals_ = await db
741
+ .select({
742
+ id: database_1.evals.id,
743
+ createdAt: database_1.evals.createdAt,
744
+ results: database_1.evals.results,
745
+ config: database_1.evals.config,
746
+ })
747
+ .from(database_1.evals)
748
+ .limit(100)
749
+ .all();
517
750
  const groupedPrompts = {};
518
- for (const fileName of resultFilenames) {
519
- const file = readResult(fileName);
520
- if (!file) {
521
- continue;
522
- }
523
- const { result, createdAt } = file;
524
- if (result && predicate(result)) {
525
- for (const prompt of result.results.table.head.prompts) {
526
- const evalId = sha256(JSON.stringify(result.config));
751
+ for (const eval_ of evals_) {
752
+ const createdAt = new Date(eval_.createdAt).toISOString();
753
+ const resultWrapper = {
754
+ version: 3,
755
+ createdAt,
756
+ results: eval_.results,
757
+ config: eval_.config,
758
+ };
759
+ if (predicate(resultWrapper)) {
760
+ for (const prompt of resultWrapper.results.table.head.prompts) {
527
761
  const promptId = sha256(prompt.raw);
528
- const datasetId = result.config.tests ? sha256(JSON.stringify(result.config.tests)) : '-';
762
+ const datasetId = resultWrapper.config.tests
763
+ ? sha256(JSON.stringify(resultWrapper.config.tests))
764
+ : '-';
529
765
  if (promptId in groupedPrompts) {
530
766
  groupedPrompts[promptId].recentEvalDate = new Date(Math.max(groupedPrompts[promptId].recentEvalDate.getTime(), new Date(createdAt).getTime()));
531
767
  groupedPrompts[promptId].count += 1;
532
768
  groupedPrompts[promptId].evals.push({
533
- id: evalId,
534
- filePath: fileName,
769
+ id: eval_.id,
535
770
  datasetId,
536
771
  metrics: prompt.metrics,
537
772
  });
@@ -542,12 +777,10 @@ function getPromptsWithPredicate(predicate) {
542
777
  id: promptId,
543
778
  prompt,
544
779
  recentEvalDate: new Date(createdAt),
545
- recentEvalId: evalId,
546
- recentEvalFilepath: fileName,
780
+ recentEvalId: eval_.id,
547
781
  evals: [
548
782
  {
549
- id: evalId,
550
- filePath: fileName,
783
+ id: eval_.id,
551
784
  datasetId,
552
785
  metrics: prompt.metrics,
553
786
  },
@@ -560,31 +793,42 @@ function getPromptsWithPredicate(predicate) {
560
793
  return Object.values(groupedPrompts);
561
794
  }
562
795
  exports.getPromptsWithPredicate = getPromptsWithPredicate;
563
- function getTestCases() {
796
+ async function getTestCases() {
564
797
  return getTestCasesWithPredicate(() => true);
565
798
  }
566
799
  exports.getTestCases = getTestCases;
567
- function getTestCasesWithPredicate(predicate) {
568
- const resultFilenames = listPreviousResultFilenames();
800
+ async function getTestCasesWithPredicate(predicate) {
801
+ const db = (0, database_1.getDb)();
802
+ const evals_ = await db
803
+ .select({
804
+ id: database_1.evals.id,
805
+ createdAt: database_1.evals.createdAt,
806
+ results: database_1.evals.results,
807
+ config: database_1.evals.config,
808
+ })
809
+ .from(database_1.evals)
810
+ .limit(100)
811
+ .all();
569
812
  const groupedTestCases = {};
570
- for (const fileName of resultFilenames) {
571
- const file = readResult(fileName);
572
- if (!file) {
573
- continue;
574
- }
575
- const { result, createdAt } = file;
576
- const testCases = result?.config?.tests;
577
- if (testCases && predicate(result)) {
578
- const evalId = sha256(JSON.stringify(result.config));
813
+ for (const eval_ of evals_) {
814
+ const createdAt = new Date(eval_.createdAt).toISOString();
815
+ const resultWrapper = {
816
+ version: 3,
817
+ createdAt,
818
+ results: eval_.results,
819
+ config: eval_.config,
820
+ };
821
+ const testCases = resultWrapper.config.tests;
822
+ if (testCases && predicate(resultWrapper)) {
823
+ const evalId = eval_.id;
579
824
  const datasetId = sha256(JSON.stringify(testCases));
580
825
  if (datasetId in groupedTestCases) {
581
- groupedTestCases[datasetId].recentEvalDate = new Date(Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), new Date(createdAt).getTime()));
826
+ groupedTestCases[datasetId].recentEvalDate = new Date(Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), eval_.createdAt));
582
827
  groupedTestCases[datasetId].count += 1;
583
- const newPrompts = result.results.table.head.prompts.map((prompt) => ({
828
+ const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
584
829
  id: sha256(prompt.raw),
585
830
  prompt,
586
831
  evalId,
587
- evalFilepath: fileName,
588
832
  }));
589
833
  const promptsById = {};
590
834
  for (const prompt of groupedTestCases[datasetId].prompts.concat(newPrompts)) {
@@ -595,11 +839,10 @@ function getTestCasesWithPredicate(predicate) {
595
839
  groupedTestCases[datasetId].prompts = Object.values(promptsById);
596
840
  }
597
841
  else {
598
- const newPrompts = result.results.table.head.prompts.map((prompt) => ({
599
- id: (0, crypto_1.createHash)('sha256').update(prompt.raw).digest('hex'),
842
+ const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
843
+ id: sha256(prompt.raw),
600
844
  prompt,
601
845
  evalId,
602
- evalFilepath: fileName,
603
846
  }));
604
847
  const promptsById = {};
605
848
  for (const prompt of newPrompts) {
@@ -613,7 +856,6 @@ function getTestCasesWithPredicate(predicate) {
613
856
  testCases,
614
857
  recentEvalDate: new Date(createdAt),
615
858
  recentEvalId: evalId,
616
- recentEvalFilepath: fileName,
617
859
  prompts: Object.values(promptsById),
618
860
  };
619
861
  }
@@ -622,8 +864,8 @@ function getTestCasesWithPredicate(predicate) {
622
864
  return Object.values(groupedTestCases);
623
865
  }
624
866
  exports.getTestCasesWithPredicate = getTestCasesWithPredicate;
625
- function getPromptFromHash(hash) {
626
- const prompts = getPrompts();
867
+ async function getPromptFromHash(hash) {
868
+ const prompts = await getPrompts();
627
869
  for (const prompt of prompts) {
628
870
  if (prompt.id.startsWith(hash)) {
629
871
  return prompt;
@@ -632,8 +874,8 @@ function getPromptFromHash(hash) {
632
874
  return undefined;
633
875
  }
634
876
  exports.getPromptFromHash = getPromptFromHash;
635
- function getDatasetFromHash(hash) {
636
- const datasets = getTestCases();
877
+ async function getDatasetFromHash(hash) {
878
+ const datasets = await getTestCases();
637
879
  for (const dataset of datasets) {
638
880
  if (dataset.id.startsWith(hash)) {
639
881
  return dataset;
@@ -642,13 +884,13 @@ function getDatasetFromHash(hash) {
642
884
  return undefined;
643
885
  }
644
886
  exports.getDatasetFromHash = getDatasetFromHash;
645
- function getEvals() {
887
+ async function getEvals() {
646
888
  return getEvalsWithPredicate(() => true);
647
889
  }
648
890
  exports.getEvals = getEvals;
649
- function getEvalFromHash(hash) {
650
- const evals = getEvals();
651
- for (const eval_ of evals) {
891
+ async function getEvalFromHash(hash) {
892
+ const evals_ = await getEvals();
893
+ for (const eval_ of evals_) {
652
894
  if (eval_.id.startsWith(hash)) {
653
895
  return eval_;
654
896
  }
@@ -656,23 +898,34 @@ function getEvalFromHash(hash) {
656
898
  return undefined;
657
899
  }
658
900
  exports.getEvalFromHash = getEvalFromHash;
659
- function getEvalsWithPredicate(predicate) {
901
+ async function getEvalsWithPredicate(predicate) {
902
+ const db = (0, database_1.getDb)();
903
+ const evals_ = await db
904
+ .select({
905
+ id: database_1.evals.id,
906
+ createdAt: database_1.evals.createdAt,
907
+ results: database_1.evals.results,
908
+ config: database_1.evals.config,
909
+ })
910
+ .from(database_1.evals)
911
+ .limit(100)
912
+ .all();
660
913
  const ret = [];
661
- const resultsFilenames = listPreviousResultFilenames();
662
- for (const fileName of resultsFilenames) {
663
- const file = readResult(fileName);
664
- if (!file) {
665
- continue;
666
- }
667
- const { result, createdAt } = file;
668
- if (result && predicate(result)) {
669
- const evalId = sha256(fileName + ':' + JSON.stringify(result.config));
914
+ for (const eval_ of evals_) {
915
+ const createdAt = new Date(eval_.createdAt).toISOString();
916
+ const resultWrapper = {
917
+ version: 3,
918
+ createdAt: createdAt,
919
+ results: eval_.results,
920
+ config: eval_.config,
921
+ };
922
+ if (predicate(resultWrapper)) {
923
+ const evalId = eval_.id;
670
924
  ret.push({
671
925
  id: evalId,
672
- filePath: fileName,
673
- date: createdAt,
674
- config: result.config,
675
- results: result.results,
926
+ date: new Date(eval_.createdAt),
927
+ config: eval_.config,
928
+ results: eval_.results,
676
929
  });
677
930
  }
678
931
  }