@m4trix/evals 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -1358,13 +1358,27 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
1358
1358
  `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
1359
1359
  );
1360
1360
  }
1361
- function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, completedRef, passedRef, failedRef) {
1361
+ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef) {
1362
1362
  return effect.Effect.gen(function* () {
1363
1363
  const reruns = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
1364
1364
  const rerunPassed = [];
1365
1365
  for (let r = 0; r < reruns; r++) {
1366
1366
  const evaluatorRunId = `run-${crypto.randomUUID()}`;
1367
1367
  const started = Date.now();
1368
+ const startedEvaluations = yield* effect.Ref.modify(startedRef, (n) => [
1369
+ n + 1,
1370
+ n + 1
1371
+ ]);
1372
+ yield* publishEvent({
1373
+ type: "TestCaseStarted",
1374
+ runId: task.runId,
1375
+ testCaseId: testCaseItem.id,
1376
+ testCaseName: testCaseItem.testCase.getName(),
1377
+ startedTestCases: startedEvaluations,
1378
+ totalTestCases: totalEvaluations,
1379
+ rerunIndex: r + 1,
1380
+ rerunTotal: reruns
1381
+ });
1368
1382
  const evaluatorScores = [];
1369
1383
  let testCaseError;
1370
1384
  const output = readOutput(testCaseItem.testCase);
@@ -1510,6 +1524,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
1510
1524
  );
1511
1525
  const maxConcurrency = Math.max(1, task.maxConcurrency ?? 1);
1512
1526
  const completedRef = yield* effect.Ref.make(0);
1527
+ const startedRef = yield* effect.Ref.make(0);
1513
1528
  const passedRef = yield* effect.Ref.make(0);
1514
1529
  const failedRef = yield* effect.Ref.make(0);
1515
1530
  const processTestCase = (testCaseItem) => processOneTestCase(
@@ -1519,6 +1534,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
1519
1534
  publishEvent,
1520
1535
  persistenceQueue,
1521
1536
  updateSnapshot,
1537
+ startedRef,
1522
1538
  completedRef,
1523
1539
  passedRef,
1524
1540
  failedRef