@m4trix/evals 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1128,13 +1128,27 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
1128
1128
  `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
1129
1129
  );
1130
1130
  }
1131
- function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, completedRef, passedRef, failedRef) {
1131
+ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef) {
1132
1132
  return effect.Effect.gen(function* () {
1133
1133
  const reruns = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
1134
1134
  const rerunPassed = [];
1135
1135
  for (let r = 0; r < reruns; r++) {
1136
1136
  const evaluatorRunId = `run-${crypto.randomUUID()}`;
1137
1137
  const started = Date.now();
1138
+ const startedEvaluations = yield* effect.Ref.modify(startedRef, (n) => [
1139
+ n + 1,
1140
+ n + 1
1141
+ ]);
1142
+ yield* publishEvent({
1143
+ type: "TestCaseStarted",
1144
+ runId: task.runId,
1145
+ testCaseId: testCaseItem.id,
1146
+ testCaseName: testCaseItem.testCase.getName(),
1147
+ startedTestCases: startedEvaluations,
1148
+ totalTestCases: totalEvaluations,
1149
+ rerunIndex: r + 1,
1150
+ rerunTotal: reruns
1151
+ });
1138
1152
  const evaluatorScores = [];
1139
1153
  let testCaseError;
1140
1154
  const output = readOutput(testCaseItem.testCase);
@@ -1280,6 +1294,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
1280
1294
  );
1281
1295
  const maxConcurrency = Math.max(1, task.maxConcurrency ?? 1);
1282
1296
  const completedRef = yield* effect.Ref.make(0);
1297
+ const startedRef = yield* effect.Ref.make(0);
1283
1298
  const passedRef = yield* effect.Ref.make(0);
1284
1299
  const failedRef = yield* effect.Ref.make(0);
1285
1300
  const processTestCase = (testCaseItem) => processOneTestCase(
@@ -1289,6 +1304,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
1289
1304
  publishEvent,
1290
1305
  persistenceQueue,
1291
1306
  updateSnapshot,
1307
+ startedRef,
1292
1308
  completedRef,
1293
1309
  passedRef,
1294
1310
  failedRef