@m4trix/evals 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -279,11 +279,17 @@ function toEvaluatorOption(item) {
279
279
  };
280
280
  }
281
281
  async function loadRunnerData(runner) {
282
- const [datasets, evaluators] = await Promise.all([
282
+ const [datasets, evaluators, diskSnapshots] = await Promise.all([
283
283
  runner.collectDatasets(),
284
- runner.collectEvaluators()
284
+ runner.collectEvaluators(),
285
+ runner.loadRunSnapshotsFromArtifacts()
285
286
  ]);
286
- const snapshots = runner.getAllRunSnapshots();
287
+ const memSnapshots = runner.getAllRunSnapshots();
288
+ const seen = new Set(memSnapshots.map((s) => s.runId));
289
+ const fromDisk = diskSnapshots.filter((s) => !seen.has(s.runId));
290
+ const snapshots = [...memSnapshots, ...fromDisk].sort(
291
+ (a, b) => b.queuedAt - a.queuedAt
292
+ );
287
293
  if (datasets.length === 0 && evaluators.length === 0) {
288
294
  return loadMockData();
289
295
  }
@@ -325,15 +331,23 @@ var TestCase = class _TestCase {
325
331
  this._config = config;
326
332
  }
327
333
  static describe(config) {
334
+ const reruns = config.reruns ?? 1;
335
+ if (reruns < 1 || !Number.isInteger(reruns)) {
336
+ throw new Error(`TestCase reruns must be a positive integer, got ${reruns}`);
337
+ }
328
338
  return new _TestCase({
329
339
  name: config.name,
330
340
  tags: config.tags,
341
+ reruns,
331
342
  inputSchema: config.inputSchema,
332
343
  input: config.input,
333
344
  outputSchema: config.outputSchema,
334
345
  output: config.output
335
346
  });
336
347
  }
348
+ getReruns() {
349
+ return this._config.reruns;
350
+ }
337
351
  getName() {
338
352
  return this._config.name;
339
353
  }
@@ -507,6 +521,7 @@ var Metric = {
507
521
  const def = {
508
522
  id: config.id,
509
523
  name: config.name,
524
+ aggregate: config.aggregate,
510
525
  format: config.format,
511
526
  make: (data) => ({ id: config.id, data })
512
527
  };
@@ -526,6 +541,7 @@ var Score = {
526
541
  id: config.id,
527
542
  name: config.name,
528
543
  displayStrategy: config.displayStrategy,
544
+ aggregate: config.aggregate,
529
545
  format: config.format,
530
546
  make: (data, options) => {
531
547
  const passed = options?.definePassed !== void 0 ? options.definePassed(data) : void 0;
@@ -544,23 +560,62 @@ function getScoreById(id) {
544
560
  return registry2.get(id);
545
561
  }
546
562
 
563
+ // src/evals/aggregators.ts
564
+ function aggregateAverage(values) {
565
+ if (values.length === 0) {
566
+ return { value: 0 };
567
+ }
568
+ const sum = values.reduce((s, v) => s + v.value, 0);
569
+ return { value: sum / values.length };
570
+ }
571
+ function aggregateAll(values) {
572
+ return { passed: values.length > 0 && values.every((v) => v.passed) };
573
+ }
574
+ function aggregateTokenCountSum(values) {
575
+ const initial = {
576
+ input: 0,
577
+ output: 0,
578
+ inputCached: 0,
579
+ outputCached: 0
580
+ };
581
+ return values.reduce(
582
+ (acc, v) => ({
583
+ input: acc.input + (v.input ?? 0),
584
+ output: acc.output + (v.output ?? 0),
585
+ inputCached: acc.inputCached + (v.inputCached ?? 0),
586
+ outputCached: acc.outputCached + (v.outputCached ?? 0)
587
+ }),
588
+ initial
589
+ );
590
+ }
591
+ function aggregateLatencyAverage(values) {
592
+ if (values.length === 0) {
593
+ return { ms: 0 };
594
+ }
595
+ const sum = values.reduce((s, v) => s + v.ms, 0);
596
+ return { ms: sum / values.length };
597
+ }
598
+
547
599
  // src/evals/metrics/standard.ts
548
600
  var tokenCountMetric = Metric.of({
549
601
  id: "token-count",
550
602
  name: "Tokens",
551
- format: (data) => {
603
+ aggregate: aggregateTokenCountSum,
604
+ format: (data, options) => {
552
605
  const input = data.input ?? 0;
553
606
  const output = data.output ?? 0;
554
607
  const inputCached = data.inputCached ?? 0;
555
608
  const outputCached = data.outputCached ?? 0;
556
609
  const cached = inputCached + outputCached;
557
- return `in:${input} out:${output} cached:${cached}`;
610
+ const base = `in:${input} out:${output} cached:${cached}`;
611
+ return options?.isAggregated ? `Total: ${base}` : base;
558
612
  }
559
613
  });
560
614
  var latencyMetric = Metric.of({
561
615
  id: "latency",
562
616
  name: "Latency",
563
- format: (data) => `${data.ms}ms`
617
+ aggregate: aggregateLatencyAverage,
618
+ format: (data, options) => options?.isAggregated ? `Avg: ${data.ms}ms` : `${data.ms}ms`
564
619
  });
565
620
 
566
621
  // src/evals/scores/standard.ts
@@ -568,13 +623,15 @@ var percentScore = Score.of({
568
623
  id: "percent",
569
624
  name: "Score",
570
625
  displayStrategy: "bar",
571
- format: (data) => data.value.toFixed(2)
626
+ format: (data, options) => options?.isAggregated ? `Avg: ${data.value.toFixed(2)}` : data.value.toFixed(2),
627
+ aggregate: aggregateAverage
572
628
  });
573
629
  var binaryScore = Score.of({
574
630
  id: "binary",
575
631
  name: "Result",
576
632
  displayStrategy: "passFail",
577
- format: (data) => data.passed ? "PASSED" : "NOT PASSED"
633
+ format: (data, options) => options?.isAggregated ? data.passed ? "All: PASSED" : "Some: FAILED" : data.passed ? "PASSED" : "NOT PASSED",
634
+ aggregate: aggregateAll
578
635
  });
579
636
  function createDiffLogEntry(expected, actual, options) {
580
637
  const diff = jsonDiff.diffString(expected, actual, { color: false });
@@ -615,7 +672,8 @@ var defaultRunnerConfig = {
615
672
  ],
616
673
  excludeDirectories: ["node_modules", "dist", ".next", ".git", ".pnpm-store"]
617
674
  },
618
- artifactDirectory: ".eval-results"
675
+ artifactDirectory: ".eval-results",
676
+ maxConcurrency: 1
619
677
  };
620
678
  function toRunnerConfigOverrides(config) {
621
679
  if (!config) {
@@ -648,6 +706,9 @@ function toRunnerConfigOverrides(config) {
648
706
  if (config.artifactDirectory !== void 0) {
649
707
  overrides.artifactDirectory = config.artifactDirectory;
650
708
  }
709
+ if (config.maxConcurrency !== void 0) {
710
+ overrides.maxConcurrency = config.maxConcurrency;
711
+ }
651
712
  if (Object.keys(discovery).length > 0) {
652
713
  overrides.discovery = discovery;
653
714
  }
@@ -921,6 +982,105 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
921
982
  `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
922
983
  );
923
984
  }
985
+ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, completedRef, passedRef, failedRef) {
986
+ return effect.Effect.gen(function* () {
987
+ const reruns = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
988
+ const rerunPassed = [];
989
+ for (let r = 0; r < reruns; r++) {
990
+ const started = Date.now();
991
+ const evaluatorScores = [];
992
+ let testCaseError;
993
+ const output = readOutput(testCaseItem.testCase);
994
+ for (const { id: evaluatorId, evaluator } of task.evaluators) {
995
+ const evaluateFn = evaluator.getEvaluateFn();
996
+ if (!evaluateFn) {
997
+ continue;
998
+ }
999
+ try {
1000
+ const logs = [];
1001
+ const logDiff = (expected, actual, options) => {
1002
+ logs.push(createDiffLogEntry(expected, actual, options));
1003
+ };
1004
+ const ctx = yield* effect.Effect.promise(
1005
+ () => Promise.resolve(evaluator.resolveContext())
1006
+ );
1007
+ const result = yield* effect.Effect.promise(
1008
+ () => Promise.resolve(
1009
+ evaluateFn({
1010
+ input: testCaseItem.testCase.getInput(),
1011
+ ctx,
1012
+ output,
1013
+ logDiff
1014
+ })
1015
+ )
1016
+ );
1017
+ const { scores, metrics } = normalizeResult(result);
1018
+ const passed2 = computeEvaluatorPassed(evaluator, result, scores);
1019
+ evaluatorScores.push({
1020
+ evaluatorId,
1021
+ scores,
1022
+ passed: passed2,
1023
+ metrics,
1024
+ logs: logs.length > 0 ? logs : void 0
1025
+ });
1026
+ } catch (error) {
1027
+ testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
1028
+ evaluatorScores.push({
1029
+ evaluatorId,
1030
+ scores: [],
1031
+ passed: false
1032
+ });
1033
+ }
1034
+ }
1035
+ const rerunPassedThis = evaluatorScores.every((s) => s.passed);
1036
+ rerunPassed.push(rerunPassedThis);
1037
+ const completedEvaluations = yield* effect.Ref.modify(completedRef, (n) => [
1038
+ n + 1,
1039
+ n + 1
1040
+ ]);
1041
+ const progressEvent = {
1042
+ type: "TestCaseProgress",
1043
+ runId: task.runId,
1044
+ testCaseId: testCaseItem.id,
1045
+ testCaseName: testCaseItem.testCase.getName(),
1046
+ completedTestCases: completedEvaluations,
1047
+ totalTestCases: totalEvaluations,
1048
+ rerunIndex: r + 1,
1049
+ rerunTotal: reruns,
1050
+ passed: rerunPassedThis,
1051
+ durationMs: Date.now() - started,
1052
+ evaluatorScores,
1053
+ output,
1054
+ errorMessage: testCaseError
1055
+ };
1056
+ updateSnapshot(task.runId, (snapshot) => ({
1057
+ ...snapshot,
1058
+ completedTestCases: completedEvaluations
1059
+ }));
1060
+ yield* publishEvent(progressEvent);
1061
+ yield* effect.Queue.offer(persistenceQueue, {
1062
+ runId: task.runId,
1063
+ artifactPath: task.snapshot.artifactPath,
1064
+ payload: progressEvent
1065
+ });
1066
+ }
1067
+ const testCasePassed = rerunPassed.every(Boolean);
1068
+ if (testCasePassed) {
1069
+ yield* effect.Ref.update(passedRef, (n) => n + 1);
1070
+ } else {
1071
+ yield* effect.Ref.update(failedRef, (n) => n + 1);
1072
+ }
1073
+ const [passed, failed] = yield* effect.Effect.all([
1074
+ effect.Ref.get(passedRef),
1075
+ effect.Ref.get(failedRef)
1076
+ ]);
1077
+ updateSnapshot(task.runId, (snapshot) => ({
1078
+ ...snapshot,
1079
+ passedTestCases: passed,
1080
+ failedTestCases: failed
1081
+ }));
1082
+ });
1083
+ }
924
1084
  var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => effect.Effect.gen(function* () {
925
1085
  const startedAt = Date.now();
926
1086
  updateSnapshot(task.runId, (snapshot) => ({
@@ -933,104 +1093,51 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
933
1093
  runId: task.runId,
934
1094
  startedAt
935
1095
  });
936
- let completedTestCases = 0;
937
- let passedTestCases = 0;
938
- let failedTestCases = 0;
939
- for (const testCaseItem of task.testCases) {
940
- const started = Date.now();
941
- const evaluatorScores = [];
942
- let testCaseError;
943
- const output = readOutput(testCaseItem.testCase);
944
- for (const { id: evaluatorId, evaluator } of task.evaluators) {
945
- const evaluateFn = evaluator.getEvaluateFn();
946
- if (!evaluateFn) {
947
- continue;
948
- }
949
- try {
950
- const logs = [];
951
- const logDiff = (expected, actual, options) => {
952
- logs.push(createDiffLogEntry(expected, actual, options));
953
- };
954
- const ctx = yield* effect.Effect.promise(
955
- () => Promise.resolve(evaluator.resolveContext())
956
- );
957
- const result = yield* effect.Effect.promise(
958
- () => Promise.resolve(
959
- evaluateFn({
960
- input: testCaseItem.testCase.getInput(),
961
- ctx,
962
- output,
963
- logDiff
964
- })
965
- )
966
- );
967
- const { scores, metrics } = normalizeResult(result);
968
- const passed = computeEvaluatorPassed(evaluator, result, scores);
969
- evaluatorScores.push({
970
- evaluatorId,
971
- scores,
972
- passed,
973
- metrics,
974
- logs: logs.length > 0 ? logs : void 0
975
- });
976
- } catch (error) {
977
- testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
978
- evaluatorScores.push({
979
- evaluatorId,
980
- scores: [],
981
- passed: false
982
- });
983
- }
984
- }
985
- const testCasePassed = evaluatorScores.every((s) => s.passed);
986
- completedTestCases += 1;
987
- if (testCasePassed) {
988
- passedTestCases += 1;
989
- } else {
990
- failedTestCases += 1;
991
- }
992
- const progressEvent = {
993
- type: "TestCaseProgress",
994
- runId: task.runId,
995
- testCaseId: testCaseItem.id,
996
- testCaseName: testCaseItem.testCase.getName(),
997
- completedTestCases,
998
- totalTestCases: task.testCases.length,
999
- passed: testCasePassed,
1000
- durationMs: Date.now() - started,
1001
- evaluatorScores,
1002
- output,
1003
- errorMessage: testCaseError
1004
- };
1005
- updateSnapshot(task.runId, (snapshot) => ({
1006
- ...snapshot,
1007
- completedTestCases,
1008
- passedTestCases,
1009
- failedTestCases
1010
- }));
1011
- yield* publishEvent(progressEvent);
1012
- yield* effect.Queue.offer(persistenceQueue, {
1013
- runId: task.runId,
1014
- artifactPath: task.snapshot.artifactPath,
1015
- payload: progressEvent
1016
- });
1017
- }
1096
+ const totalEvaluations = task.testCases.reduce(
1097
+ (sum, tc) => sum + (typeof tc.testCase.getReruns === "function" ? tc.testCase.getReruns() : 1),
1098
+ 0
1099
+ );
1100
+ const maxConcurrency = Math.max(1, task.maxConcurrency ?? 1);
1101
+ const completedRef = yield* effect.Ref.make(0);
1102
+ const passedRef = yield* effect.Ref.make(0);
1103
+ const failedRef = yield* effect.Ref.make(0);
1104
+ const processTestCase = (testCaseItem) => processOneTestCase(
1105
+ task,
1106
+ testCaseItem,
1107
+ totalEvaluations,
1108
+ publishEvent,
1109
+ persistenceQueue,
1110
+ updateSnapshot,
1111
+ completedRef,
1112
+ passedRef,
1113
+ failedRef
1114
+ );
1115
+ yield* effect.Effect.forEach(
1116
+ task.testCases,
1117
+ processTestCase,
1118
+ maxConcurrency > 1 ? { concurrency: maxConcurrency } : void 0
1119
+ );
1120
+ const [completedEvaluations, passedUniqueTestCases, failedUniqueTestCases] = yield* effect.Effect.all([
1121
+ effect.Ref.get(completedRef),
1122
+ effect.Ref.get(passedRef),
1123
+ effect.Ref.get(failedRef)
1124
+ ]);
1018
1125
  const finishedAt = Date.now();
1019
1126
  const completedEvent = {
1020
1127
  type: "RunCompleted",
1021
1128
  runId: task.runId,
1022
1129
  finishedAt,
1023
- passedTestCases,
1024
- failedTestCases,
1130
+ passedTestCases: passedUniqueTestCases,
1131
+ failedTestCases: failedUniqueTestCases,
1025
1132
  totalTestCases: task.testCases.length,
1026
1133
  artifactPath: task.snapshot.artifactPath
1027
1134
  };
1028
1135
  updateSnapshot(task.runId, (snapshot) => ({
1029
1136
  ...snapshot,
1030
1137
  status: "completed",
1031
- completedTestCases,
1032
- passedTestCases,
1033
- failedTestCases,
1138
+ completedTestCases: completedEvaluations,
1139
+ passedTestCases: passedUniqueTestCases,
1140
+ failedTestCases: failedUniqueTestCases,
1034
1141
  finishedAt
1035
1142
  }));
1036
1143
  yield* publishEvent(completedEvent);
@@ -1045,6 +1152,126 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
1045
1152
  artifactPath: task.snapshot.artifactPath
1046
1153
  });
1047
1154
  });
1155
+ async function loadRunSnapshotsFromArtifacts(config) {
1156
+ const baseDir = path.resolve(config.artifactDirectory);
1157
+ let entries;
1158
+ try {
1159
+ entries = await promises.readdir(baseDir);
1160
+ } catch {
1161
+ return [];
1162
+ }
1163
+ const jsonlFiles = entries.filter((name) => name.endsWith(".jsonl"));
1164
+ const snapshots = [];
1165
+ for (const fileName of jsonlFiles) {
1166
+ const filePath = path.join(baseDir, fileName);
1167
+ try {
1168
+ const snapshot = await parseArtifactToSnapshot(filePath, config);
1169
+ if (snapshot) {
1170
+ snapshots.push(snapshot);
1171
+ }
1172
+ } catch {
1173
+ }
1174
+ }
1175
+ return snapshots.sort((a, b) => b.queuedAt - a.queuedAt);
1176
+ }
1177
+ async function parseArtifactToSnapshot(filePath, _config) {
1178
+ const content = await promises.readFile(filePath, "utf8");
1179
+ const lines = content.split("\n").filter((line) => line.trim().length > 0);
1180
+ if (lines.length === 0) {
1181
+ return null;
1182
+ }
1183
+ let runQueued = null;
1184
+ let runCompleted = null;
1185
+ let runFailed = null;
1186
+ let runStarted = null;
1187
+ for (const line of lines) {
1188
+ try {
1189
+ const event = JSON.parse(line);
1190
+ const type = event.type;
1191
+ if (type === "RunQueued") {
1192
+ runQueued = {
1193
+ runId: event.runId,
1194
+ datasetId: event.datasetId,
1195
+ datasetName: event.datasetName,
1196
+ evaluatorIds: event.evaluatorIds,
1197
+ totalTestCases: event.totalTestCases ?? 0,
1198
+ artifactPath: event.artifactPath ?? filePath,
1199
+ ts: event.ts
1200
+ };
1201
+ }
1202
+ if (type === "RunStarted") {
1203
+ runStarted = { startedAt: event.startedAt };
1204
+ }
1205
+ if (type === "RunCompleted") {
1206
+ runCompleted = {
1207
+ passedTestCases: event.passedTestCases,
1208
+ failedTestCases: event.failedTestCases,
1209
+ totalTestCases: event.totalTestCases,
1210
+ finishedAt: event.finishedAt
1211
+ };
1212
+ }
1213
+ if (type === "RunFailed") {
1214
+ runFailed = {
1215
+ finishedAt: event.finishedAt,
1216
+ errorMessage: event.errorMessage
1217
+ };
1218
+ }
1219
+ } catch {
1220
+ }
1221
+ }
1222
+ if (!runQueued) {
1223
+ return null;
1224
+ }
1225
+ const artifactPath = filePath;
1226
+ const status = runFailed ? "failed" : runCompleted ? "completed" : runStarted ? "running" : "queued";
1227
+ const progress = aggregateTestCaseProgress(lines);
1228
+ const completedTestCases = runCompleted ? runQueued.totalTestCases : progress.completedTestCases;
1229
+ const passedTestCases = runCompleted?.passedTestCases ?? progress.passedTestCases;
1230
+ const failedTestCases = runCompleted?.failedTestCases ?? progress.failedTestCases;
1231
+ return {
1232
+ runId: runQueued.runId,
1233
+ datasetId: runQueued.datasetId,
1234
+ datasetName: runQueued.datasetName,
1235
+ evaluatorIds: runQueued.evaluatorIds,
1236
+ queuedAt: runQueued.ts ?? 0,
1237
+ startedAt: runStarted?.startedAt,
1238
+ finishedAt: runCompleted?.finishedAt ?? runFailed?.finishedAt,
1239
+ totalTestCases: runQueued.totalTestCases,
1240
+ completedTestCases,
1241
+ passedTestCases,
1242
+ failedTestCases,
1243
+ status,
1244
+ artifactPath,
1245
+ errorMessage: runFailed?.errorMessage
1246
+ };
1247
+ }
1248
+ function aggregateTestCaseProgress(lines) {
1249
+ let completedTestCases = 0;
1250
+ const testCasePassedBy = /* @__PURE__ */ new Map();
1251
+ for (const line of lines) {
1252
+ try {
1253
+ const event = JSON.parse(line);
1254
+ if (event.type === "TestCaseProgress") {
1255
+ const ev = event;
1256
+ completedTestCases = ev.completedTestCases ?? completedTestCases;
1257
+ const id = ev.testCaseId;
1258
+ const current = testCasePassedBy.get(id);
1259
+ testCasePassedBy.set(id, current === void 0 ? ev.passed : current && ev.passed);
1260
+ }
1261
+ } catch {
1262
+ }
1263
+ }
1264
+ let passedTestCases = 0;
1265
+ let failedTestCases = 0;
1266
+ for (const passed of testCasePassedBy.values()) {
1267
+ if (passed) {
1268
+ passedTestCases += 1;
1269
+ } else {
1270
+ failedTestCases += 1;
1271
+ }
1272
+ }
1273
+ return { completedTestCases, passedTestCases, failedTestCases };
1274
+ }
1048
1275
  async function appendJsonLine(artifactPath, payload) {
1049
1276
  await promises.mkdir(path.dirname(artifactPath), { recursive: true });
1050
1277
  await promises.appendFile(artifactPath, `${JSON.stringify(payload)}
@@ -1237,6 +1464,10 @@ var EffectRunner = class {
1237
1464
  throw new Error("No evaluators selected for run");
1238
1465
  }
1239
1466
  const selectedTestCases = await this.collectDatasetTestCases(request.datasetId);
1467
+ const totalEvaluations = selectedTestCases.reduce(
1468
+ (sum, tc) => sum + (typeof tc.testCase.getReruns === "function" ? tc.testCase.getReruns() : 1),
1469
+ 0
1470
+ );
1240
1471
  const runId = `run-${crypto.randomUUID()}`;
1241
1472
  const artifactPath = createArtifactPath(
1242
1473
  this.config.artifactDirectory,
@@ -1249,7 +1480,7 @@ var EffectRunner = class {
1249
1480
  datasetName: dataset.dataset.getName(),
1250
1481
  evaluatorIds: selectedEvaluators.map((item) => item.id),
1251
1482
  queuedAt: Date.now(),
1252
- totalTestCases: selectedTestCases.length,
1483
+ totalTestCases: totalEvaluations,
1253
1484
  completedTestCases: 0,
1254
1485
  passedTestCases: 0,
1255
1486
  failedTestCases: 0,
@@ -1263,7 +1494,7 @@ var EffectRunner = class {
1263
1494
  datasetId: request.datasetId,
1264
1495
  datasetName: dataset.dataset.getName(),
1265
1496
  evaluatorIds: selectedEvaluators.map((item) => item.id),
1266
- totalTestCases: selectedTestCases.length,
1497
+ totalTestCases: totalEvaluations,
1267
1498
  artifactPath
1268
1499
  };
1269
1500
  await effect.Effect.runPromise(this.publishEvent(queuedEvent));
@@ -1274,6 +1505,7 @@ var EffectRunner = class {
1274
1505
  payload: queuedEvent
1275
1506
  })
1276
1507
  );
1508
+ const maxConcurrency = request.concurrency ?? this.config.maxConcurrency ?? 1;
1277
1509
  await effect.Effect.runPromise(
1278
1510
  effect.Queue.offer(this.runQueue, {
1279
1511
  runId,
@@ -1281,7 +1513,8 @@ var EffectRunner = class {
1281
1513
  dataset: dataset.dataset,
1282
1514
  evaluators: selectedEvaluators,
1283
1515
  testCases: selectedTestCases,
1284
- snapshot
1516
+ snapshot,
1517
+ maxConcurrency
1285
1518
  })
1286
1519
  );
1287
1520
  return snapshot;
@@ -1301,6 +1534,9 @@ var EffectRunner = class {
1301
1534
  (a, b) => b.queuedAt - a.queuedAt
1302
1535
  );
1303
1536
  }
1537
+ async loadRunSnapshotsFromArtifacts() {
1538
+ return loadRunSnapshotsFromArtifacts(this.config);
1539
+ }
1304
1540
  async shutdown() {
1305
1541
  await effect.Effect.runPromise(effect.Fiber.interrupt(this.schedulerFiber));
1306
1542
  await effect.Effect.runPromise(effect.Fiber.interrupt(this.persistenceFiber));