promptfoo 0.91.2 → 0.92.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/dist/drizzle/0006_harsh_caretaker.sql +42 -0
  2. package/dist/drizzle/0007_cloudy_wong.sql +1 -0
  3. package/dist/drizzle/meta/0006_snapshot.json +721 -0
  4. package/dist/drizzle/meta/0007_snapshot.json +723 -0
  5. package/dist/drizzle/meta/_journal.json +14 -0
  6. package/dist/package.json +10 -8
  7. package/dist/src/app/assets/{index-Bc-q9rGp.js → index-CMDD1oSm.js} +233 -231
  8. package/dist/src/app/assets/{index.es-b3UhzAjj.js → index.es-D8cSwMq4.js} +1 -1
  9. package/dist/src/app/assets/{sync-D-OjEwME.js → sync-DJZvzYiS.js} +1 -1
  10. package/dist/src/app/index.html +1 -1
  11. package/dist/src/assertions.js +2 -2
  12. package/dist/src/assertions.js.map +1 -1
  13. package/dist/src/commands/cache.d.ts.map +1 -1
  14. package/dist/src/commands/cache.js +0 -2
  15. package/dist/src/commands/cache.js.map +1 -1
  16. package/dist/src/commands/eval.d.ts.map +1 -1
  17. package/dist/src/commands/eval.js +19 -16
  18. package/dist/src/commands/eval.js.map +1 -1
  19. package/dist/src/commands/export.d.ts.map +1 -1
  20. package/dist/src/commands/export.js +8 -31
  21. package/dist/src/commands/export.js.map +1 -1
  22. package/dist/src/commands/import.d.ts.map +1 -1
  23. package/dist/src/commands/import.js +52 -13
  24. package/dist/src/commands/import.js.map +1 -1
  25. package/dist/src/commands/list.d.ts.map +1 -1
  26. package/dist/src/commands/list.js +35 -7
  27. package/dist/src/commands/list.js.map +1 -1
  28. package/dist/src/commands/share.d.ts +2 -2
  29. package/dist/src/commands/share.d.ts.map +1 -1
  30. package/dist/src/commands/share.js +12 -13
  31. package/dist/src/commands/share.js.map +1 -1
  32. package/dist/src/commands/show.d.ts.map +1 -1
  33. package/dist/src/commands/show.js +10 -6
  34. package/dist/src/commands/show.js.map +1 -1
  35. package/dist/src/constants.d.ts +1 -0
  36. package/dist/src/constants.d.ts.map +1 -1
  37. package/dist/src/constants.js +2 -1
  38. package/dist/src/constants.js.map +1 -1
  39. package/dist/src/database/index.js +1 -1
  40. package/dist/src/database/index.js.map +1 -1
  41. package/dist/src/database/tables.d.ts +602 -4
  42. package/dist/src/database/tables.d.ts.map +1 -1
  43. package/dist/src/database/tables.js +67 -8
  44. package/dist/src/database/tables.js.map +1 -1
  45. package/dist/src/database/types.d.ts +3 -3
  46. package/dist/src/database/types.d.ts.map +1 -1
  47. package/dist/src/evaluator.d.ts +3 -2
  48. package/dist/src/evaluator.d.ts.map +1 -1
  49. package/dist/src/evaluator.js +75 -104
  50. package/dist/src/evaluator.js.map +1 -1
  51. package/dist/src/evaluatorHelpers.d.ts.map +1 -1
  52. package/dist/src/evaluatorHelpers.js +2 -1
  53. package/dist/src/evaluatorHelpers.js.map +1 -1
  54. package/dist/src/index.d.ts +4 -1
  55. package/dist/src/index.d.ts.map +1 -1
  56. package/dist/src/index.js +12 -9
  57. package/dist/src/index.js.map +1 -1
  58. package/dist/src/models/eval.d.ts +95 -0
  59. package/dist/src/models/eval.d.ts.map +1 -0
  60. package/dist/src/models/eval.js +390 -0
  61. package/dist/src/models/eval.js.map +1 -0
  62. package/dist/src/models/evalResult.d.ts +50 -0
  63. package/dist/src/models/evalResult.d.ts.map +1 -0
  64. package/dist/src/models/evalResult.js +122 -0
  65. package/dist/src/models/evalResult.js.map +1 -0
  66. package/dist/src/models/provider.d.ts +9 -0
  67. package/dist/src/models/provider.d.ts.map +1 -0
  68. package/dist/src/models/provider.js +47 -0
  69. package/dist/src/models/provider.js.map +1 -0
  70. package/dist/src/prompts/index.d.ts.map +1 -1
  71. package/dist/src/prompts/index.js +2 -1
  72. package/dist/src/prompts/index.js.map +1 -1
  73. package/dist/src/prompts/utils.d.ts +1 -0
  74. package/dist/src/prompts/utils.d.ts.map +1 -1
  75. package/dist/src/prompts/utils.js +7 -0
  76. package/dist/src/prompts/utils.js.map +1 -1
  77. package/dist/src/providers/http.js +2 -2
  78. package/dist/src/providers/http.js.map +1 -1
  79. package/dist/src/providers.js +5 -5
  80. package/dist/src/providers.js.map +1 -1
  81. package/dist/src/redteam/constants.d.ts +1 -1
  82. package/dist/src/redteam/constants.d.ts.map +1 -1
  83. package/dist/src/redteam/constants.js +7 -5
  84. package/dist/src/redteam/constants.js.map +1 -1
  85. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240617.json +10 -0
  86. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240618.json +10 -0
  87. package/dist/src/redteam/eval/harmful/llm_rubric-20240723.json +10 -0
  88. package/dist/src/redteam/eval/harmful/llm_rubric-20240724.json +10 -0
  89. package/dist/src/redteam/graders.d.ts +2 -0
  90. package/dist/src/redteam/graders.d.ts.map +1 -1
  91. package/dist/src/redteam/graders.js +2 -0
  92. package/dist/src/redteam/graders.js.map +1 -1
  93. package/dist/src/redteam/plugins/index.d.ts.map +1 -1
  94. package/dist/src/redteam/plugins/index.js +1 -0
  95. package/dist/src/redteam/plugins/index.js.map +1 -1
  96. package/dist/src/redteam/plugins/religion.d.ts +6 -0
  97. package/dist/src/redteam/plugins/religion.d.ts.map +1 -0
  98. package/dist/src/redteam/plugins/religion.js +14 -0
  99. package/dist/src/redteam/plugins/religion.js.map +1 -0
  100. package/dist/src/server/routes/evalRoutes.d.ts +1 -0
  101. package/dist/src/server/routes/evalRoutes.d.ts.map +1 -0
  102. package/dist/src/server/routes/evalRoutes.js +2 -0
  103. package/dist/src/server/routes/evalRoutes.js.map +1 -0
  104. package/dist/src/server/server.d.ts +1 -0
  105. package/dist/src/server/server.d.ts.map +1 -1
  106. package/dist/src/server/server.js +70 -31
  107. package/dist/src/server/server.js.map +1 -1
  108. package/dist/src/share.d.ts +2 -2
  109. package/dist/src/share.d.ts.map +1 -1
  110. package/dist/src/share.js +93 -34
  111. package/dist/src/share.js.map +1 -1
  112. package/dist/src/table.d.ts +2 -2
  113. package/dist/src/table.d.ts.map +1 -1
  114. package/dist/src/table.js +3 -3
  115. package/dist/src/table.js.map +1 -1
  116. package/dist/src/types/index.d.ts +163 -11
  117. package/dist/src/types/index.d.ts.map +1 -1
  118. package/dist/src/types/index.js +21 -1
  119. package/dist/src/types/index.js.map +1 -1
  120. package/dist/src/util/config/load.d.ts.map +1 -1
  121. package/dist/src/util/config/load.js +2 -1
  122. package/dist/src/util/config/load.js.map +1 -1
  123. package/dist/src/util/config/manage.d.ts.map +1 -1
  124. package/dist/src/util/config/manage.js.map +1 -1
  125. package/dist/src/util/convertEvalResultsToTable.d.ts +16 -0
  126. package/dist/src/util/convertEvalResultsToTable.d.ts.map +1 -0
  127. package/dist/src/util/convertEvalResultsToTable.js +137 -0
  128. package/dist/src/util/convertEvalResultsToTable.js.map +1 -0
  129. package/dist/src/util/createHash.d.ts +1 -0
  130. package/dist/src/util/createHash.d.ts.map +1 -1
  131. package/dist/src/util/createHash.js +9 -0
  132. package/dist/src/util/createHash.js.map +1 -1
  133. package/dist/src/util/file.d.ts +8 -0
  134. package/dist/src/util/file.d.ts.map +1 -0
  135. package/dist/src/util/file.js +13 -0
  136. package/dist/src/util/file.js.map +1 -0
  137. package/dist/src/util/index.d.ts +9 -14
  138. package/dist/src/util/index.d.ts.map +1 -1
  139. package/dist/src/util/index.js +87 -223
  140. package/dist/src/util/index.js.map +1 -1
  141. package/dist/src/util/time.d.ts +2 -0
  142. package/dist/src/util/time.d.ts.map +1 -0
  143. package/dist/src/util/time.js +7 -0
  144. package/dist/src/util/time.js.map +1 -0
  145. package/dist/src/util/transform.js +2 -2
  146. package/dist/src/util/transform.js.map +1 -1
  147. package/dist/src/validators/providers.d.ts +6 -0
  148. package/dist/src/validators/providers.d.ts.map +1 -1
  149. package/dist/src/validators/providers.js +1 -0
  150. package/dist/src/validators/providers.js.map +1 -1
  151. package/dist/src/validators/redteam.d.ts +6 -0
  152. package/dist/src/validators/redteam.d.ts.map +1 -1
  153. package/dist/test/commands/eval/filterFailingTests.test.js +24 -2
  154. package/dist/test/commands/eval/filterFailingTests.test.js.map +1 -1
  155. package/dist/test/evaluator.test.js +153 -74
  156. package/dist/test/evaluator.test.js.map +1 -1
  157. package/dist/test/factories/data/eval/database_records.d.ts +142 -0
  158. package/dist/test/factories/data/eval/database_records.d.ts.map +1 -0
  159. package/dist/test/factories/data/eval/database_records.js +251 -0
  160. package/dist/test/factories/data/eval/database_records.js.map +1 -0
  161. package/dist/test/factories/evalFactory.d.ts +768 -0
  162. package/dist/test/factories/evalFactory.d.ts.map +1 -0
  163. package/dist/test/factories/evalFactory.js +121 -0
  164. package/dist/test/factories/evalFactory.js.map +1 -0
  165. package/dist/test/factories/index.d.ts +1 -0
  166. package/dist/test/factories/index.d.ts.map +1 -0
  167. package/dist/test/factories/index.js +2 -0
  168. package/dist/test/factories/index.js.map +1 -0
  169. package/dist/test/index.test.js +17 -33
  170. package/dist/test/index.test.js.map +1 -1
  171. package/dist/test/models/eval.test.d.ts +2 -0
  172. package/dist/test/models/eval.test.d.ts.map +1 -0
  173. package/dist/test/models/eval.test.js +34 -0
  174. package/dist/test/models/eval.test.js.map +1 -0
  175. package/dist/test/providers.test.js +3 -3
  176. package/dist/test/providers.test.js.map +1 -1
  177. package/dist/test/server/share.test.d.ts +2 -0
  178. package/dist/test/server/share.test.d.ts.map +1 -0
  179. package/dist/test/server/share.test.js +36 -0
  180. package/dist/test/server/share.test.js.map +1 -0
  181. package/dist/test/server/v3evalToShare.json +507 -0
  182. package/dist/test/server/v4evalToShare.json +421 -0
  183. package/dist/test/types.test.js +58 -0
  184. package/dist/test/types.test.js.map +1 -1
  185. package/dist/test/util.file.test.d.ts +2 -0
  186. package/dist/test/util.file.test.d.ts.map +1 -0
  187. package/dist/test/util.file.test.js +32 -0
  188. package/dist/test/util.file.test.js.map +1 -0
  189. package/dist/test/util.listPrevious.test.d.ts +2 -0
  190. package/dist/test/util.listPrevious.test.d.ts.map +1 -0
  191. package/dist/test/util.listPrevious.test.js +37 -0
  192. package/dist/test/util.listPrevious.test.js.map +1 -0
  193. package/dist/test/util.test.js +38 -311
  194. package/dist/test/util.test.js.map +1 -1
  195. package/dist/tsconfig.tsbuildinfo +1 -0
  196. package/package.json +10 -8
@@ -1,25 +1,19 @@
1
- import { type EvalWithMetadata, type EvaluateResult, type EvaluateSummary, type EvaluateTable, type NunjucksFilterMap, type PromptWithMetadata, type ResultsFile, type TestCase, type TestCasesWithMetadata, type UnifiedConfig, type OutputFile, type CompletedPrompt, type ResultLightweight } from '../types';
2
- /**
3
- * Checks if a file is a JavaScript or TypeScript file based on its extension.
4
- *
5
- * @param filePath - The path of the file to check.
6
- * @returns True if the file has a JavaScript or TypeScript extension, false otherwise.
7
- */
8
- export declare function isJavascriptFile(filePath: string): boolean;
9
- export declare function writeOutput(outputPath: string, evalId: string | null, results: EvaluateSummary, config: Partial<UnifiedConfig>, shareableUrl: string | null): Promise<void>;
10
- export declare function writeMultipleOutputs(outputPaths: string[], evalId: string | null, results: EvaluateSummary, config: Partial<UnifiedConfig>, shareableUrl: string | null): Promise<void>;
1
+ import Eval from '../models/eval';
2
+ import { type EvalWithMetadata, type EvaluateResult, type EvaluateTable, type NunjucksFilterMap, type PromptWithMetadata, type ResultsFile, type TestCase, type TestCasesWithMetadata, type UnifiedConfig, type OutputFile, type CompletedPrompt, type ResultLightweight, type EvaluateSummaryV2 } from '../types';
3
+ export declare function writeOutput(outputPath: string, evalRecord: Eval, shareableUrl: string | null): Promise<void>;
4
+ export declare function writeMultipleOutputs(outputPaths: string[], evalRecord: Eval, shareableUrl: string | null): Promise<void>;
11
5
  export declare function readOutput(outputPath: string): Promise<OutputFile>;
12
6
  /**
13
7
  * TODO(ian): Remove this
14
8
  * @deprecated Use readLatestResults directly instead.
15
9
  */
16
10
  export declare function getLatestResultsPath(): string;
17
- export declare function writeResultsToDatabase(results: EvaluateSummary, config: Partial<UnifiedConfig>, createdAt?: Date): Promise<string>;
11
+ export declare function writeResultsToDatabase(results: EvaluateSummaryV2, config: Partial<UnifiedConfig>, createdAt?: Date): Promise<string>;
18
12
  /**
19
13
  *
20
14
  * @returns Last n evals in descending order.
21
15
  */
22
- export declare function listPreviousResults(limit?: number, filterDescription?: string, datasetId?: string): ResultLightweight[];
16
+ export declare function listPreviousResults(limit?: number, filterDescription?: string, datasetId?: string): Promise<ResultLightweight[]>;
23
17
  /**
24
18
  * @deprecated Used only for migration to sqlite
25
19
  */
@@ -42,7 +36,6 @@ export declare function readResult_fileSystem(name: string): {
42
36
  createdAt: Date;
43
37
  } | undefined;
44
38
  export declare function migrateResultsFromFileSystemToDatabase(): Promise<void>;
45
- export declare function cleanupOldFileResults(remaining?: number): void;
46
39
  export declare function readResult(id: string): Promise<{
47
40
  id: string;
48
41
  result: ResultsFile;
@@ -170,6 +163,7 @@ export declare function getTestCases(limit?: number): Promise<{
170
163
  } | {
171
164
  callApi: import("../types").CallApiFunction;
172
165
  id: (...args: unknown[]) => string;
166
+ config?: any;
173
167
  label?: string | undefined;
174
168
  transform?: string | undefined;
175
169
  delay?: number | undefined;
@@ -334,6 +328,7 @@ export declare function getDatasetFromHash(hash: string): Promise<{
334
328
  } | {
335
329
  callApi: import("../types").CallApiFunction;
336
330
  id: (...args: unknown[]) => string;
331
+ config?: any;
337
332
  label?: string | undefined;
338
333
  transform?: string | undefined;
339
334
  delay?: number | undefined;
@@ -412,7 +407,7 @@ export declare function getStandaloneEvals({ limit, tag, description, }?: {
412
407
  value: string;
413
408
  };
414
409
  description?: string;
415
- }): StandaloneEval[];
410
+ }): Promise<StandaloneEval[]>;
416
411
  export declare function providerToIdentifier(provider: TestCase['provider']): string | undefined;
417
412
  export declare function varsMatch(vars1: Record<string, string | string[] | object> | undefined, vars2: Record<string, string | string[] | object> | undefined): boolean;
418
413
  export declare function resultIsForTestCase(result: EvaluateResult, testCase: TestCase): boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/util/index.ts"],"names":[],"mappings":"AA+BA,OAAO,EACL,KAAK,gBAAgB,EACrB,KAAK,cAAc,EACnB,KAAK,eAAe,EACpB,KAAK,aAAa,EAElB,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,EACvB,KAAK,WAAW,EAChB,KAAK,QAAQ,EACb,KAAK,qBAAqB,EAE1B,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,eAAe,EAEpB,KAAK,iBAAiB,EAIvB,MAAM,UAAU,CAAC;AAOlB;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAE1D;AAuBD,wBAAsB,WAAW,CAC/B,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,GAAG,IAAI,EACrB,OAAO,EAAE,eAAe,EACxB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,YAAY,EAAE,MAAM,GAAG,IAAI,iBAgE5B;AAED,wBAAsB,oBAAoB,CACxC,WAAW,EAAE,MAAM,EAAE,EACrB,MAAM,EAAE,MAAM,GAAG,IAAI,EACrB,OAAO,EAAE,eAAe,EACxB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,YAAY,EAAE,MAAM,GAAG,IAAI,iBAK5B;AAED,wBAAsB,UAAU,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CASxE;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,CAE7C;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,eAAe,EACxB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,SAAS,CAAC,EAAE,IAAI,GACf,OAAO,CAAC,MAAM,CAAC,CA4HjB;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CACjC,KAAK,GAAE,MAA4B,EACnC,iBAAiB,CAAC,EAAE,MAAM,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,iBAAiB,EAAE,CAmCrB;AAED;;GAEG;AACH,wBAAgB,sCAAsC,IAAI,MAAM,EAAE,CAYjE;AAID;;GAEG;AACH,wBAAgB,8BAA8B,IAAI;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,EAAE,CAqB7F;AAED,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,QAqB9C;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,IAAI,UAExC;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,GACX;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,WAAW,CAAC;IAAC,SAAS,EAAE,IAAI,CAAA;CAAE,GAAG,SAAS,CAgBlE;AAID,wBAAsB,sCAAsC,kBA0D3D;AAID,wBAAgB,qBAAqB,CAAC,SAAS,SAAwB,QAKtE;AAED,wBAAsB,UAAU,CAC9B,EAAE,EAAE,MAAM,GACT,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,WAAW,CAAC;IAAC,SAAS,EAAE,IAAI,CAAA;CAAE,GAAG,SAAS,CAAC,CAsC3E;AAED,wBAAsB,YAAY,CAChC,EAAE,EAAE,MAAM,EACV,SAAS,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,EAClC,QAAQ,CAAC,EAAE,aAAa,GACvB,OAAO,CAAC,IAAI,CAAC,CAyCf;AAED,wBAAsB,aAAa,CAAC,iBAAiB,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,GAAG,SAAS,CAAC,CAgChG;AAED,wBAAsB,uBAAuB,CAC3C,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,EAC3C,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,kBAAkB,EAAE,CAAC,CAkE/B;AAED,wBAAgB,0BAA0B,CACxC,eAAe,EAAE,MAAM,EACvB,KAAK,GAAE,MAA4B,iCAOpC;AAED,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,QAAQ,EAAE,iCAI3D;AAED,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,EAC3C,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAwElC;AAED,wBAAgB,UAAU,CAAC,KAAK,GAAE,MAA4B,iCAE7D;AAED,wBAAsB,YAAY,CAAC,KAAK,GAAE,MAA4B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAErE;AAED,wBAAsB,iBAAiB,CAAC,IAAI,EAAE,MAAM,2CAQnD;AAED,wBAAsB,kBAAkB,CAAC,IAAI,EAAE,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;eAQpD;AAED,wBAAsB,qBAAqB,CACzC,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,EAC3C,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAwC7B;AAED,wBAAsB,QAAQ,CAAC,KAAK,GAAE,MAA4B,+BAEjE;AAED,wBAAsB,aAAa,CAAC,IAAI,EAAE,MAAM,yCAQ/C;AAED,wBAAsB,UAAU,CAAC,MAAM,EAAE,MAAM,iBAa9C;AAED;;;;GAIG;AACH,wBAAsB,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC,CAQpD;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAC/B,QAAQ,GAAE,MAAW,GACpB,OAAO,CAAC,iBAAiB,CAAC,CAa5B;AAED,wBAAgB,WAAW,SAG1B;AAED,wBAAgB,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,QAOnD;AAED,MAAM,MAAM,cAAc,GAAG,eAAe,GAAG;IAC7C,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAElB,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC,CAAC;AAIF,wBAAgB,kBAAkB,CAAC,EACjC,KAA2B,EAC3B,GAAG,EACH,WAAW,GACZ,GAAE;IACD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IACrC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjB,GAAG,cAAc,EAAE,CA+ExB;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,QAAQ,CAAC,UAAU,CAAC,GAAG,MAAM,GAAG,SAAS,CASvF;AAED,wBAAgB,SAAS,CACvB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,SAAS,EAC7D,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,SAAS,WAG9D;AAED,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAMvF;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAsBvF;AAED;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB;IACD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB,CAoCA;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,IAAI,OA+BhG"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/util/index.ts"],"names":[],"mappings":"AAgCA,OAAO,IAA+C,MAAM,gBAAgB,CAAC;AAE7E,OAAO,EACL,KAAK,gBAAgB,EACrB,KAAK,cAAc,EACnB,KAAK,aAAa,EAElB,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,EACvB,KAAK,WAAW,EAChB,KAAK,QAAQ,EACb,KAAK,qBAAqB,EAE1B,KAAK,aAAa,EAClB,KAAK,UAAU,EACf,KAAK,eAAe,EAEpB,KAAK,iBAAiB,EAItB,KAAK,iBAAiB,EACvB,MAAM,UAAU,CAAC;AA6BlB,wBAAsB,WAAW,CAC/B,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,IAAI,EAChB,YAAY,EAAE,MAAM,GAAG,IAAI,iBAuF5B;AAED,wBAAsB,oBAAoB,CACxC,WAAW,EAAE,MAAM,EAAE,EACrB,UAAU,EAAE,IAAI,EAChB,YAAY,EAAE,MAAM,GAAG,IAAI,iBAK5B;AAED,wBAAsB,UAAU,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CASxE;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,CAE7C;AAED,wBAAsB,sBAAsB,CAC1C,OAAO,EAAE,iBAAiB,EAC1B,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,SAAS,GAAE,IAAiB,GAC3B,OAAO,CAAC,MAAM,CAAC,CA8HjB;AAED;;;GAGG;AACH,wBAAsB,mBAAmB,CACvC,KAAK,GAAE,MAA4B,EACnC,iBAAiB,CAAC,EAAE,MAAM,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,OAAO,CAAC,iBAAiB,EAAE,CAAC,CAqC9B;AAED;;GAEG;AACH,wBAAgB,sCAAsC,IAAI,MAAM,EAAE,CAYjE;AAID;;GAEG;AACH,wBAAgB,8BAA8B,IAAI;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,EAAE,CAqB7F;AAED,wBAAgB,cAAc,CAAC,QAAQ,EAAE,MAAM,QAqB9C;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,IAAI,UAExC;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,MAAM,GACX;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,WAAW,CAAC;IAAC,SAAS,EAAE,IAAI,CAAA;CAAE,GAAG,SAAS,CAgBlE;AAED,wBAAsB,sCAAsC,kBAI3D;AAED,wBAAsB,UAAU,CAC9B,EAAE,EAAE,MAAM,GACT,OAAO,CAAC;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,WAAW,CAAC;IAAC,SAAS,EAAE,IAAI,CAAA;CAAE,GAAG,SAAS,CAAC,CAY3E;AAED,wBAAsB,YAAY,CAChC,EAAE,EAAE,MAAM,EACV,SAAS,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,EAClC,QAAQ,CAAC,EAAE,aAAa,GACvB,OAAO,CAAC,IAAI,CAAC,CAuBf;AAED,wBAAsB,aAAa,CAAC,iBAAiB,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,GAAG,SAAS,CAAC,CAGhG;AAED,wBAAsB,uBAAuB,CAC3C,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,EAC3C,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,kBAAkB,EAAE,CAAC,CAiD/B;AAED,wBAAgB,0BAA0B,CACxC,eAAe,EAAE,MAAM,EACvB,KAAK,GAAE,MAA4B,iCAOpC;AAED,wBAAgB,sBAAsB,CAAC,SAAS,EAAE,QAAQ,EAAE,iCAI3D;AAED,wBAAsB,yBAAyB,CAC7C,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,EAC3C,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAuDlC;AAED,wBAAgB,UAAU,CAAC,KAAK,GAAE,MAA4B,iCAE7D;AAED,wBAAsB,YAAY,CAAC,KAAK,GAAE,MAA4B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAErE;AAED,wBAAsB,iBAAiB,CAAC,IAAI,EAAE,MAAM,2CAQnD;AAED,wBAAsB,kBAAkB,CAAC,IAAI,EAAE,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;eAQpD;AAED,wBAAsB,qBAAqB,CACzC,SAAS,EAAE,CAAC,MAAM,EAAE,WAAW,KAAK,OAAO,EAC3C,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,gBAAgB,EAAE,CAAC,CA0C7B;AAED,wBAAsB,QAAQ,CAAC,KAAK,GAAE,MAA4B,+BAEjE;AAED,wBAAsB,aAAa,CAAC,IAAI,EAAE,MAAM,yCAQ/C;AAED,wBAAsB,UAAU,CAAC,MAAM,EAAE,MAAM,iBAe9C;AAED;;;;GAIG;AACH,wBAAsB,cAAc,IAAI,OAAO,CAAC,IAAI,CAAC,CAQpD;AAED,wBAAsB,WAAW,CAC/B,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAC/B,QAAQ,GAAE,MAAW,GACpB,OAAO,CAAC,iBAAiB,CAAC,CAa5B;AAED,wBAAgB,WAAW,SAG1B;AAED,wBAAgB,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,QAOnD;AAED,MAAM,MAAM,cAAc,GAAG,eAAe,GAAG;IAC7C,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAElB,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACzC,CAAC;AAIF,wBAAsB,kBAAkB,CAAC,EACvC,KAA2B,EAC3B,GAAG,EACH,WAAW,GACZ,GAAE;IACD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,GAAG,CAAC,EAAE;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IACrC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CA0FjC;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,QAAQ,CAAC,UAAU,CAAC,GAAG,MAAM,GAAG,SAAS,CASvF;AAED,wBAAgB,SAAS,CACvB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,SAAS,EAC7D,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC,GAAG,SAAS,WAG9D;AAED,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAMvF;AAED,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAsBvF;AAED;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB;IACD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,OAAO,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB,CAoCA;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,IAAI,OA+BhG"}
@@ -26,7 +26,6 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
26
26
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.isJavascriptFile = isJavascriptFile;
30
29
  exports.writeOutput = writeOutput;
31
30
  exports.writeMultipleOutputs = writeMultipleOutputs;
32
31
  exports.readOutput = readOutput;
@@ -39,7 +38,6 @@ exports.filenameToDate = filenameToDate;
39
38
  exports.dateToFilename = dateToFilename;
40
39
  exports.readResult_fileSystem = readResult_fileSystem;
41
40
  exports.migrateResultsFromFileSystemToDatabase = migrateResultsFromFileSystemToDatabase;
42
- exports.cleanupOldFileResults = cleanupOldFileResults;
43
41
  exports.readResult = readResult;
44
42
  exports.updateResult = updateResult;
45
43
  exports.getLatestEval = getLatestEval;
@@ -88,21 +86,14 @@ const accounts_1 = require("../globalConfig/accounts");
88
86
  const googleSheets_1 = require("../googleSheets");
89
87
  const logger_1 = __importDefault(require("../logger"));
90
88
  const migrate_1 = require("../migrate");
89
+ const eval_1 = __importStar(require("../models/eval"));
91
90
  const prompt_1 = require("../models/prompt");
92
91
  const types_1 = require("../types");
93
92
  const manage_1 = require("./config/manage");
94
93
  const createHash_1 = require("./createHash");
94
+ const file_1 = require("./file");
95
95
  const templates_1 = require("./templates");
96
96
  const DEFAULT_QUERY_LIMIT = 100;
97
- /**
98
- * Checks if a file is a JavaScript or TypeScript file based on its extension.
99
- *
100
- * @param filePath - The path of the file to check.
101
- * @returns True if the file has a JavaScript or TypeScript extension, false otherwise.
102
- */
103
- function isJavascriptFile(filePath) {
104
- return /\.(js|cjs|mjs|ts|cts|mts)$/.test(filePath);
105
- }
106
97
  const outputToSimpleString = (output) => {
107
98
  const passFailText = output.pass ? '[PASS]' : '[FAIL]';
108
99
  const namedScoresText = Object.entries(output.namedScores)
@@ -122,14 +113,16 @@ const outputToSimpleString = (output) => {
122
113
  ${gradingResultText}
123
114
  `.trim();
124
115
  };
125
- async function writeOutput(outputPath, evalId, results, config, shareableUrl) {
116
+ async function writeOutput(outputPath, evalRecord, shareableUrl) {
117
+ const table = await evalRecord.getTable();
118
+ (0, tiny_invariant_1.default)(table, 'Table is required');
126
119
  if (outputPath.match(/^https:\/\/docs\.google\.com\/spreadsheets\//)) {
127
- const rows = results.table.body.map((row) => {
120
+ const rows = table.body.map((row) => {
128
121
  const csvRow = {};
129
- results.table.head.vars.forEach((varName, index) => {
122
+ table.head.vars.forEach((varName, index) => {
130
123
  csvRow[varName] = row.vars[index];
131
124
  });
132
- results.table.head.prompts.forEach((prompt, index) => {
125
+ table.head.prompts.forEach((prompt, index) => {
133
126
  csvRow[prompt.label] = outputToSimpleString(row.outputs[index]);
134
127
  });
135
128
  return csvRow;
@@ -148,38 +141,51 @@ async function writeOutput(outputPath, evalId, results, config, shareableUrl) {
148
141
  if (outputExtension === 'csv') {
149
142
  const csvOutput = (0, sync_1.stringify)([
150
143
  [
151
- ...results.table.head.vars,
152
- ...results.table.head.prompts.map((prompt) => `[${prompt.provider}] ${prompt.label}`),
144
+ ...table.head.vars,
145
+ ...table.head.prompts.map((prompt) => `[${prompt.provider}] ${prompt.label}`),
153
146
  ],
154
- ...results.table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
147
+ ...table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
155
148
  ]);
156
149
  fs.writeFileSync(outputPath, csvOutput);
157
150
  }
158
151
  else if (outputExtension === 'json') {
159
- fs.writeFileSync(outputPath, JSON.stringify({ evalId, results, config, shareableUrl }, null, 2));
152
+ const summary = await evalRecord.toEvaluateSummary();
153
+ fs.writeFileSync(outputPath, JSON.stringify({
154
+ evalId: evalRecord.id,
155
+ results: summary,
156
+ config: evalRecord.config,
157
+ shareableUrl,
158
+ }, null, 2));
160
159
  }
161
160
  else if (outputExtension === 'yaml' || outputExtension === 'yml' || outputExtension === 'txt') {
162
- fs.writeFileSync(outputPath, js_yaml_1.default.dump({ results, config, shareableUrl }));
161
+ const summary = await evalRecord.toEvaluateSummary();
162
+ fs.writeFileSync(outputPath, js_yaml_1.default.dump({
163
+ evalId: evalRecord.id,
164
+ results: summary,
165
+ config: evalRecord.config,
166
+ shareableUrl,
167
+ }));
163
168
  }
164
169
  else if (outputExtension === 'html') {
170
+ const summary = await evalRecord.toEvaluateSummary();
165
171
  const template = fs.readFileSync(`${(0, esm_1.getDirectory)()}/tableOutput.html`, 'utf-8');
166
- const table = [
172
+ const htmlTable = [
167
173
  [
168
- ...results.table.head.vars,
169
- ...results.table.head.prompts.map((prompt) => `[${prompt.provider}] ${prompt.label}`),
174
+ ...table.head.vars,
175
+ ...table.head.prompts.map((prompt) => `[${prompt.provider}] ${prompt.label}`),
170
176
  ],
171
- ...results.table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
177
+ ...table.body.map((row) => [...row.vars, ...row.outputs.map(outputToSimpleString)]),
172
178
  ];
173
179
  const htmlOutput = (0, templates_1.getNunjucksEngine)().renderString(template, {
174
- config,
175
- table,
176
- results: results.results,
180
+ config: evalRecord.config,
181
+ table: htmlTable,
182
+ results: summary,
177
183
  });
178
184
  fs.writeFileSync(outputPath, htmlOutput);
179
185
  }
180
186
  }
181
- async function writeMultipleOutputs(outputPaths, evalId, results, config, shareableUrl) {
182
- await Promise.all(outputPaths.map((outputPath) => writeOutput(outputPath, evalId, results, config, shareableUrl)));
187
+ async function writeMultipleOutputs(outputPaths, evalRecord, shareableUrl) {
188
+ await Promise.all(outputPaths.map((outputPath) => writeOutput(outputPath, evalRecord, shareableUrl)));
183
189
  }
184
190
  async function readOutput(outputPath) {
185
191
  const ext = path.parse(outputPath).ext.slice(1);
@@ -197,9 +203,9 @@ async function readOutput(outputPath) {
197
203
  function getLatestResultsPath() {
198
204
  return path.join((0, manage_1.getConfigDirectoryPath)(), 'output', 'latest.json');
199
205
  }
200
- async function writeResultsToDatabase(results, config, createdAt) {
206
+ async function writeResultsToDatabase(results, config, createdAt = new Date()) {
201
207
  createdAt = createdAt || (results.timestamp ? new Date(results.timestamp) : new Date());
202
- const evalId = `eval-${createdAt.toISOString().slice(0, 19)}`;
208
+ const evalId = (0, eval_1.createEvalId)(createdAt);
203
209
  const db = (0, database_1.getDb)();
204
210
  const promises = [];
205
211
  promises.push(db
@@ -216,9 +222,10 @@ async function writeResultsToDatabase(results, config, createdAt) {
216
222
  .run());
217
223
  logger_1.default.debug(`Inserting eval ${evalId}`);
218
224
  // Record prompt relation
225
+ (0, tiny_invariant_1.default)(results.table, 'Table is required');
219
226
  for (const prompt of results.table.head.prompts) {
220
227
  const label = prompt.label || prompt.display || prompt.raw;
221
- const promptId = prompt.id || (0, prompt_1.generateIdFromPrompt)(prompt);
228
+ const promptId = (0, prompt_1.generateIdFromPrompt)(prompt);
222
229
  promises.push(db
223
230
  .insert(tables_1.prompts)
224
231
  .values({
@@ -297,7 +304,7 @@ async function writeResultsToDatabase(results, config, createdAt) {
297
304
  *
298
305
  * @returns Last n evals in descending order.
299
306
  */
300
- function listPreviousResults(limit = DEFAULT_QUERY_LIMIT, filterDescription, datasetId) {
307
+ async function listPreviousResults(limit = DEFAULT_QUERY_LIMIT, filterDescription, datasetId) {
301
308
  const db = (0, database_1.getDb)();
302
309
  const startTime = performance.now();
303
310
  const query = db
@@ -310,7 +317,7 @@ function listPreviousResults(limit = DEFAULT_QUERY_LIMIT, filterDescription, dat
310
317
  })
311
318
  .from(tables_1.evals)
312
319
  .leftJoin(tables_1.evalsToDatasets, (0, drizzle_orm_1.eq)(tables_1.evals.id, tables_1.evalsToDatasets.evalId))
313
- .where((0, drizzle_orm_1.and)(datasetId ? (0, drizzle_orm_1.eq)(tables_1.evalsToDatasets.datasetId, datasetId) : undefined, filterDescription ? (0, drizzle_orm_1.like)(tables_1.evals.description, `%${filterDescription}%`) : undefined));
320
+ .where((0, drizzle_orm_1.and)(datasetId ? (0, drizzle_orm_1.eq)(tables_1.evalsToDatasets.datasetId, datasetId) : undefined, filterDescription ? (0, drizzle_orm_1.like)(tables_1.evals.description, `%${filterDescription}%`) : undefined, (0, drizzle_orm_1.not)((0, drizzle_orm_1.eq)(tables_1.evals.results, {}))));
314
321
  const results = query.orderBy((0, drizzle_orm_1.desc)(tables_1.evals.createdAt)).limit(limit).all();
315
322
  const mappedResults = results.map((result) => ({
316
323
  evalId: result.evalId,
@@ -321,8 +328,10 @@ function listPreviousResults(limit = DEFAULT_QUERY_LIMIT, filterDescription, dat
321
328
  }));
322
329
  const endTime = performance.now();
323
330
  const executionTime = endTime - startTime;
331
+ const evalResults = await (0, eval_1.getSummaryofLatestEvals)(undefined, filterDescription, datasetId);
324
332
  logger_1.default.debug(`listPreviousResults execution time: ${executionTime.toFixed(2)}ms`);
325
- return mappedResults;
333
+ const combinedResults = [...evalResults, ...mappedResults];
334
+ return combinedResults;
326
335
  }
327
336
  /**
328
337
  * @deprecated Used only for migration to sqlite
@@ -409,100 +418,19 @@ function readResult_fileSystem(name) {
409
418
  logger_1.default.error(`Failed to read results from ${resultsPath}:\n${err}`);
410
419
  }
411
420
  }
412
- let attemptedMigration = false;
413
421
  async function migrateResultsFromFileSystemToDatabase() {
414
- if (attemptedMigration) {
415
- // TODO(ian): Record this bit in the database.
416
- return;
417
- }
418
422
  // First run db migrations
419
423
  logger_1.default.debug('Running db migrations...');
420
424
  await (0, migrate_1.runDbMigrations)();
421
- const fileNames = listPreviousResultFilenames_fileSystem();
422
- if (fileNames.length === 0) {
423
- return;
424
- }
425
- logger_1.default.info(`🔁 Migrating ${fileNames.length} flat files to local database.`);
426
- logger_1.default.info('This is a one-time operation and may take a minute...');
427
- attemptedMigration = true;
428
- const outputDir = path.join((0, manage_1.getConfigDirectoryPath)(true /* createIfNotExists */), 'output');
429
- const backupDir = `${outputDir}-backup-${new Date()
430
- .toISOString()
431
- .slice(0, 10)
432
- .replace(/-/g, '')}`;
433
- try {
434
- fs.cpSync(outputDir, backupDir, { recursive: true });
435
- logger_1.default.info(`Backup of output directory created at ${backupDir}`);
436
- }
437
- catch (backupError) {
438
- logger_1.default.error(`Failed to create backup of output directory: ${backupError}`);
439
- return;
440
- }
441
- logger_1.default.info('Moving files into database...');
442
- const migrationPromises = fileNames.map(async (fileName) => {
443
- const fileData = readResult_fileSystem(fileName);
444
- if (fileData) {
445
- await writeResultsToDatabase(fileData.result.results, fileData.result.config, filenameToDate(fileName));
446
- logger_1.default.debug(`Migrated ${fileName} to database.`);
447
- try {
448
- fs.unlinkSync(path.join(outputDir, fileName));
449
- }
450
- catch (err) {
451
- logger_1.default.warn(`Failed to delete ${fileName} after migration: ${err}`);
452
- }
453
- }
454
- else {
455
- logger_1.default.warn(`Failed to migrate result ${fileName} due to read error.`);
456
- }
457
- });
458
- await Promise.all(migrationPromises);
459
- try {
460
- fs.unlinkSync(getLatestResultsPath());
461
- }
462
- catch (err) {
463
- logger_1.default.warn(`Failed to delete latest.json: ${err}`);
464
- }
465
- logger_1.default.info('Migration complete. Please restart your web server if it is running.');
466
- }
467
- const RESULT_HISTORY_LENGTH = (0, envars_1.getEnvInt)('RESULT_HISTORY_LENGTH', DEFAULT_QUERY_LIMIT);
468
- function cleanupOldFileResults(remaining = RESULT_HISTORY_LENGTH) {
469
- const sortedFilenames = listPreviousResultFilenames_fileSystem();
470
- for (let i = 0; i < sortedFilenames.length - remaining; i++) {
471
- fs.unlinkSync(path.join((0, manage_1.getConfigDirectoryPath)(), 'output', sortedFilenames[i]));
472
- }
473
425
  }
474
426
  async function readResult(id) {
475
- const db = (0, database_1.getDb)();
476
427
  try {
477
- const evalResult = await db
478
- .select({
479
- id: tables_1.evals.id,
480
- createdAt: tables_1.evals.createdAt,
481
- author: tables_1.evals.author,
482
- results: tables_1.evals.results,
483
- config: tables_1.evals.config,
484
- datasetId: tables_1.evalsToDatasets.datasetId,
485
- })
486
- .from(tables_1.evals)
487
- .leftJoin(tables_1.evalsToDatasets, (0, drizzle_orm_1.eq)(tables_1.evals.id, tables_1.evalsToDatasets.evalId))
488
- .where((0, drizzle_orm_1.eq)(tables_1.evals.id, id))
489
- .execute();
490
- if (evalResult.length === 0) {
491
- return undefined;
492
- }
493
- const { id: resultId, createdAt, results, config, author, datasetId } = evalResult[0];
494
- const result = {
495
- version: 3,
496
- createdAt: new Date(createdAt).toISOString().slice(0, 10),
497
- author,
498
- results,
499
- config,
500
- datasetId,
501
- };
428
+ const eval_ = await eval_1.default.findById(id);
429
+ (0, tiny_invariant_1.default)(eval_, `Eval with ID ${id} not found.`);
502
430
  return {
503
- id: resultId,
504
- result,
505
- createdAt: new Date(createdAt),
431
+ id,
432
+ result: await eval_.toResultsFile(),
433
+ createdAt: new Date(eval_.createdAt),
506
434
  };
507
435
  }
508
436
  catch (err) {
@@ -510,38 +438,20 @@ async function readResult(id) {
510
438
  }
511
439
  }
512
440
  async function updateResult(id, newConfig, newTable) {
513
- const db = (0, database_1.getDb)();
514
441
  try {
515
442
  // Fetch the existing eval data from the database
516
- const existingEval = await db
517
- .select({
518
- config: tables_1.evals.config,
519
- results: tables_1.evals.results,
520
- })
521
- .from(tables_1.evals)
522
- .where((0, drizzle_orm_1.eq)(tables_1.evals.id, id))
523
- .limit(1)
524
- .all();
525
- if (existingEval.length === 0) {
443
+ const existingEval = await eval_1.default.findById(id);
444
+ if (!existingEval) {
526
445
  logger_1.default.error(`Eval with ID ${id} not found.`);
527
446
  return;
528
447
  }
529
- const evalData = existingEval[0];
530
448
  if (newConfig) {
531
- evalData.config = newConfig;
449
+ existingEval.config = newConfig;
532
450
  }
533
451
  if (newTable) {
534
- evalData.results.table = newTable;
452
+ existingEval.setTable(newTable);
535
453
  }
536
- await db
537
- .update(tables_1.evals)
538
- .set({
539
- description: evalData.config.description,
540
- config: evalData.config,
541
- results: evalData.results,
542
- })
543
- .where((0, drizzle_orm_1.eq)(tables_1.evals.id, id))
544
- .run();
454
+ await existingEval.save();
545
455
  logger_1.default.info(`Updated eval with ID ${id}`);
546
456
  }
547
457
  catch (err) {
@@ -549,61 +459,18 @@ async function updateResult(id, newConfig, newTable) {
549
459
  }
550
460
  }
551
461
  async function getLatestEval(filterDescription) {
552
- const db = (0, database_1.getDb)();
553
- let latestResults = await db
554
- .select({
555
- id: tables_1.evals.id,
556
- createdAt: tables_1.evals.createdAt,
557
- author: tables_1.evals.author,
558
- description: tables_1.evals.description,
559
- results: tables_1.evals.results,
560
- config: tables_1.evals.config,
561
- })
562
- .from(tables_1.evals)
563
- .orderBy((0, drizzle_orm_1.desc)(tables_1.evals.createdAt))
564
- .limit(1);
565
- if (filterDescription) {
566
- const regex = new RegExp(filterDescription, 'i');
567
- latestResults = latestResults.filter((result) => regex.test(result.description || ''));
568
- }
569
- if (!latestResults.length) {
570
- return undefined;
571
- }
572
- const latestResult = latestResults[0];
573
- return {
574
- version: 3,
575
- createdAt: new Date(latestResult.createdAt).toISOString(),
576
- author: latestResult.author,
577
- results: latestResult.results,
578
- config: latestResult.config,
579
- };
462
+ const eval_ = await eval_1.default.latest();
463
+ return await eval_?.toResultsFile();
580
464
  }
581
465
  async function getPromptsWithPredicate(predicate, limit) {
582
466
  // TODO(ian): Make this use a proper database query
583
- const db = (0, database_1.getDb)();
584
- const evals_ = await db
585
- .select({
586
- id: tables_1.evals.id,
587
- createdAt: tables_1.evals.createdAt,
588
- author: tables_1.evals.author,
589
- results: tables_1.evals.results,
590
- config: tables_1.evals.config,
591
- })
592
- .from(tables_1.evals)
593
- .limit(limit)
594
- .all();
467
+ const evals_ = await eval_1.default.getMany(limit);
595
468
  const groupedPrompts = {};
596
469
  for (const eval_ of evals_) {
597
470
  const createdAt = new Date(eval_.createdAt).toISOString();
598
- const resultWrapper = {
599
- version: 3,
600
- createdAt,
601
- author: eval_.author,
602
- results: eval_.results,
603
- config: eval_.config,
604
- };
471
+ const resultWrapper = await eval_.toResultsFile();
605
472
  if (predicate(resultWrapper)) {
606
- for (const prompt of resultWrapper.results.table.head.prompts) {
473
+ for (const prompt of eval_.getPrompts()) {
607
474
  const promptId = (0, createHash_1.sha256)(prompt.raw);
608
475
  const datasetId = resultWrapper.config.tests
609
476
  ? (0, createHash_1.sha256)(JSON.stringify(resultWrapper.config.tests))
@@ -651,29 +518,11 @@ function getPromptsForTestCases(testCases) {
651
518
  return getPromptsForTestCasesHash(testCasesSha256);
652
519
  }
653
520
  async function getTestCasesWithPredicate(predicate, limit) {
654
- const db = (0, database_1.getDb)();
655
- const evals_ = await db
656
- .select({
657
- id: tables_1.evals.id,
658
- createdAt: tables_1.evals.createdAt,
659
- author: tables_1.evals.author,
660
- results: tables_1.evals.results,
661
- config: tables_1.evals.config,
662
- })
663
- .from(tables_1.evals)
664
- .orderBy((0, drizzle_orm_1.desc)(tables_1.evals.createdAt))
665
- .limit(limit)
666
- .all();
521
+ const evals_ = await eval_1.default.getMany(limit);
667
522
  const groupedTestCases = {};
668
523
  for (const eval_ of evals_) {
669
524
  const createdAt = new Date(eval_.createdAt).toISOString();
670
- const resultWrapper = {
671
- version: 3,
672
- createdAt,
673
- author: eval_.author,
674
- results: eval_.results,
675
- config: eval_.config,
676
- };
525
+ const resultWrapper = await eval_.toResultsFile();
677
526
  const testCases = resultWrapper.config.tests;
678
527
  if (testCases && predicate(resultWrapper)) {
679
528
  const evalId = eval_.id;
@@ -681,7 +530,7 @@ async function getTestCasesWithPredicate(predicate, limit) {
681
530
  if (datasetId in groupedTestCases) {
682
531
  groupedTestCases[datasetId].recentEvalDate = new Date(Math.max(groupedTestCases[datasetId].recentEvalDate.getTime(), eval_.createdAt));
683
532
  groupedTestCases[datasetId].count += 1;
684
- const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
533
+ const newPrompts = eval_.getPrompts().map((prompt) => ({
685
534
  id: (0, createHash_1.sha256)(prompt.raw),
686
535
  prompt,
687
536
  evalId,
@@ -695,7 +544,7 @@ async function getTestCasesWithPredicate(predicate, limit) {
695
544
  groupedTestCases[datasetId].prompts = Object.values(promptsById);
696
545
  }
697
546
  else {
698
- const newPrompts = resultWrapper.results.table.head.prompts.map((prompt) => ({
547
+ const newPrompts = eval_.getPrompts().map((prompt) => ({
699
548
  id: (0, createHash_1.sha256)(prompt.raw),
700
549
  prompt,
701
550
  evalId,
@@ -765,6 +614,7 @@ async function getEvalsWithPredicate(predicate, limit) {
765
614
  version: 3,
766
615
  createdAt,
767
616
  author: eval_.author,
617
+ // @ts-ignore
768
618
  results: eval_.results,
769
619
  config: eval_.config,
770
620
  };
@@ -774,6 +624,7 @@ async function getEvalsWithPredicate(predicate, limit) {
774
624
  id: evalId,
775
625
  date: new Date(eval_.createdAt),
776
626
  config: eval_.config,
627
+ // @ts-ignore
777
628
  results: eval_.results,
778
629
  description: eval_.description || undefined,
779
630
  });
@@ -799,6 +650,9 @@ async function deleteEval(evalId) {
799
650
  // We need to clean up foreign keys first. We don't have onDelete: 'cascade' set on all these relationships.
800
651
  await db.delete(tables_1.evalsToPrompts).where((0, drizzle_orm_1.eq)(tables_1.evalsToPrompts.evalId, evalId)).run();
801
652
  await db.delete(tables_1.evalsToDatasets).where((0, drizzle_orm_1.eq)(tables_1.evalsToDatasets.evalId, evalId)).run();
653
+ await db.delete(tables_1.evalsToTags).where((0, drizzle_orm_1.eq)(tables_1.evalsToTags.evalId, evalId)).run();
654
+ await db.delete(tables_1.evalResultsTable).where((0, drizzle_orm_1.eq)(tables_1.evalResultsTable.evalId, evalId)).run();
655
+ await db.delete(tables_1.evalsToProviders).where((0, drizzle_orm_1.eq)(tables_1.evalsToProviders.evalId, evalId)).run();
802
656
  // Finally, delete the eval record
803
657
  const deletedIds = await db.delete(tables_1.evals).where((0, drizzle_orm_1.eq)(tables_1.evals.id, evalId)).run();
804
658
  if (deletedIds.changes === 0) {
@@ -848,7 +702,7 @@ function setupEnv(envPath) {
848
702
  }
849
703
  }
850
704
  const standaloneEvalCache = new node_cache_1.default({ stdTTL: 60 * 60 * 2 }); // Cache for 2 hours
851
- function getStandaloneEvals({ limit = DEFAULT_QUERY_LIMIT, tag, description, } = {}) {
705
+ async function getStandaloneEvals({ limit = DEFAULT_QUERY_LIMIT, tag, description, } = {}) {
852
706
  const cacheKey = `standalone_evals_${limit}_${tag?.key}_${tag?.value}`;
853
707
  const cachedResult = standaloneEvalCache.get(cacheKey);
854
708
  if (cachedResult) {
@@ -876,16 +730,26 @@ function getStandaloneEvals({ limit = DEFAULT_QUERY_LIMIT, tag, description, } =
876
730
  .orderBy((0, drizzle_orm_1.desc)(tables_1.evals.createdAt))
877
731
  .limit(limit)
878
732
  .all();
879
- const standaloneEvals = results.flatMap((result) => {
880
- const { description, createdAt, evalId, promptId, datasetId, results: { table }, isRedteam, } = result;
881
- return table.head.prompts.map((col, index) => {
733
+ const standaloneEvals = (await Promise.all(results.map(async (result) => {
734
+ const { description, createdAt, evalId, promptId, datasetId,
735
+ // @ts-ignore
736
+ isRedteam, } = result;
737
+ const eval_ = await eval_1.default.findById(evalId);
738
+ (0, tiny_invariant_1.default)(eval_, `Eval with ID ${evalId} not found`);
739
+ const table = (await eval_.getTable()) || { body: [] };
740
+ // @ts-ignore
741
+ return eval_.getPrompts().map((col, index) => {
882
742
  // Compute some stats
883
- const pluginCounts = table.body.reduce((acc, row) => {
743
+ const pluginCounts = table.body.reduce(
744
+ // @ts-ignore
745
+ (acc, row) => {
884
746
  const pluginId = row.test.metadata?.pluginId;
885
747
  if (pluginId) {
886
748
  const isPass = row.outputs[index].pass;
887
- acc.pluginPassCount[pluginId] = (acc.pluginPassCount[pluginId] || 0) + (isPass ? 1 : 0);
888
- acc.pluginFailCount[pluginId] = (acc.pluginFailCount[pluginId] || 0) + (isPass ? 0 : 1);
749
+ acc.pluginPassCount[pluginId] =
750
+ (acc.pluginPassCount[pluginId] || 0) + (isPass ? 1 : 0);
751
+ acc.pluginFailCount[pluginId] =
752
+ (acc.pluginFailCount[pluginId] || 0) + (isPass ? 0 : 1);
889
753
  }
890
754
  return acc;
891
755
  }, { pluginPassCount: {}, pluginFailCount: {} });
@@ -900,7 +764,7 @@ function getStandaloneEvals({ limit = DEFAULT_QUERY_LIMIT, tag, description, } =
900
764
  ...col,
901
765
  };
902
766
  });
903
- });
767
+ }))).flat();
904
768
  standaloneEvalCache.set(cacheKey, standaloneEvals);
905
769
  return standaloneEvals;
906
770
  }
@@ -975,7 +839,7 @@ function parsePathOrGlob(basePath, promptPath) {
975
839
  let functionName;
976
840
  if (filename.includes(':')) {
977
841
  const splits = filename.split(':');
978
- if (splits[0] && (isJavascriptFile(splits[0]) || splits[0].endsWith('.py'))) {
842
+ if (splits[0] && ((0, file_1.isJavascriptFile)(splits[0]) || splits[0].endsWith('.py'))) {
979
843
  [filename, functionName] = splits;
980
844
  }
981
845
  }