@m4trix/evals 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +352 -184
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +350 -185
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +294 -155
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +294 -156
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +296 -155
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +1 -2
- package/dist/index.js +294 -156
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/cli.js
CHANGED
|
@@ -6,12 +6,13 @@ import { jsx, jsxs, Fragment } from 'react/jsx-runtime';
|
|
|
6
6
|
import { resolve, relative, join, dirname } from 'path';
|
|
7
7
|
import { LineGraph } from '@pppp606/ink-chart';
|
|
8
8
|
import { randomUUID } from 'crypto';
|
|
9
|
-
import { Effect, PubSub, Queue,
|
|
9
|
+
import { Effect, PubSub, Queue, Ref, Fiber } from 'effect';
|
|
10
10
|
import { existsSync } from 'fs';
|
|
11
11
|
import * as jitiModule from 'jiti';
|
|
12
12
|
import { readdir, readFile, mkdir, appendFile } from 'fs/promises';
|
|
13
13
|
import { pathToFileURL } from 'url';
|
|
14
|
-
import {
|
|
14
|
+
import { diffLines } from 'diff';
|
|
15
|
+
import stringify from 'fast-json-stable-stringify';
|
|
15
16
|
|
|
16
17
|
var SEP = " ";
|
|
17
18
|
var ARROW = "\u203A";
|
|
@@ -978,10 +979,102 @@ async function collectTestCasesFromFiles(config) {
|
|
|
978
979
|
);
|
|
979
980
|
return found.flat();
|
|
980
981
|
}
|
|
982
|
+
function preprocessForDiff(value, options) {
|
|
983
|
+
if (options?.sort && Array.isArray(value)) {
|
|
984
|
+
return [...value].sort((a, b) => {
|
|
985
|
+
const aStr = stringify(preprocessForDiff(a, options));
|
|
986
|
+
const bStr = stringify(preprocessForDiff(b, options));
|
|
987
|
+
return aStr.localeCompare(bStr);
|
|
988
|
+
}).map((item) => preprocessForDiff(item, options));
|
|
989
|
+
}
|
|
990
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value) && options?.excludeKeys) {
|
|
991
|
+
const keys = Array.isArray(options.excludeKeys) ? options.excludeKeys : options.excludeKeys.split(",").map((k) => k.trim());
|
|
992
|
+
const filtered = {};
|
|
993
|
+
for (const [k, v] of Object.entries(value)) {
|
|
994
|
+
if (!keys.includes(k)) {
|
|
995
|
+
filtered[k] = preprocessForDiff(v, options);
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
return filtered;
|
|
999
|
+
}
|
|
1000
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value)) {
|
|
1001
|
+
const result = {};
|
|
1002
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1003
|
+
result[k] = preprocessForDiff(v, options);
|
|
1004
|
+
}
|
|
1005
|
+
return result;
|
|
1006
|
+
}
|
|
1007
|
+
if (typeof value === "number" && options?.precision !== void 0) {
|
|
1008
|
+
return Number(value.toFixed(options.precision));
|
|
1009
|
+
}
|
|
1010
|
+
return value;
|
|
1011
|
+
}
|
|
1012
|
+
function toPrettyJson(value) {
|
|
1013
|
+
const str = stringify(value);
|
|
1014
|
+
try {
|
|
1015
|
+
const parsed = JSON.parse(str);
|
|
1016
|
+
return JSON.stringify(parsed, null, 2);
|
|
1017
|
+
} catch {
|
|
1018
|
+
return str;
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
function formatDiffParts(parts) {
|
|
1022
|
+
const lines = [];
|
|
1023
|
+
for (const part of parts) {
|
|
1024
|
+
const prefix = part.added ? "+ " : part.removed ? "- " : "";
|
|
1025
|
+
const partLines = part.value.split("\n");
|
|
1026
|
+
for (let i = 0; i < partLines.length; i++) {
|
|
1027
|
+
const line = partLines[i];
|
|
1028
|
+
if (i === partLines.length - 1 && line === "")
|
|
1029
|
+
continue;
|
|
1030
|
+
lines.push(prefix + line);
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
return lines.join("\n");
|
|
1034
|
+
}
|
|
981
1035
|
function createDiffString(expected, actual, diffOptions) {
|
|
982
|
-
const
|
|
983
|
-
const
|
|
984
|
-
|
|
1036
|
+
const expectedProcessed = preprocessForDiff(expected, diffOptions);
|
|
1037
|
+
const actualProcessed = preprocessForDiff(actual, diffOptions);
|
|
1038
|
+
if (diffOptions?.keysOnly) {
|
|
1039
|
+
const expectedKeys = JSON.stringify(
|
|
1040
|
+
extractKeys(expectedProcessed),
|
|
1041
|
+
null,
|
|
1042
|
+
2
|
|
1043
|
+
);
|
|
1044
|
+
const actualKeys = JSON.stringify(
|
|
1045
|
+
extractKeys(actualProcessed),
|
|
1046
|
+
null,
|
|
1047
|
+
2
|
|
1048
|
+
);
|
|
1049
|
+
const parts2 = diffLines(expectedKeys, actualKeys);
|
|
1050
|
+
return formatDiffParts(parts2);
|
|
1051
|
+
}
|
|
1052
|
+
const expectedStr = toPrettyJson(expectedProcessed);
|
|
1053
|
+
const actualStr = toPrettyJson(actualProcessed);
|
|
1054
|
+
if (expectedStr === actualStr) {
|
|
1055
|
+
return "";
|
|
1056
|
+
}
|
|
1057
|
+
const parts = diffLines(expectedStr, actualStr);
|
|
1058
|
+
if (diffOptions?.outputNewOnly) {
|
|
1059
|
+
const filtered = parts.filter(
|
|
1060
|
+
(p) => p.added === true
|
|
1061
|
+
);
|
|
1062
|
+
return formatDiffParts(filtered);
|
|
1063
|
+
}
|
|
1064
|
+
return formatDiffParts(parts);
|
|
1065
|
+
}
|
|
1066
|
+
function extractKeys(value) {
|
|
1067
|
+
if (value === null || typeof value !== "object") {
|
|
1068
|
+
return "\xB7";
|
|
1069
|
+
}
|
|
1070
|
+
if (Array.isArray(value)) {
|
|
1071
|
+
return value.map(extractKeys);
|
|
1072
|
+
}
|
|
1073
|
+
const result = {};
|
|
1074
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1075
|
+
result[k] = extractKeys(v);
|
|
1076
|
+
}
|
|
1077
|
+
return result;
|
|
985
1078
|
}
|
|
986
1079
|
function formatLogMessage(msg) {
|
|
987
1080
|
if (typeof msg === "string")
|
|
@@ -1323,6 +1416,20 @@ function readOutput(testCase) {
|
|
|
1323
1416
|
}
|
|
1324
1417
|
return candidate.getOutput();
|
|
1325
1418
|
}
|
|
1419
|
+
function buildEvaluationUnits(testCases) {
|
|
1420
|
+
const units = [];
|
|
1421
|
+
for (const testCaseItem of testCases) {
|
|
1422
|
+
const rerunTotal = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
|
|
1423
|
+
for (let r = 0; r < rerunTotal; r++) {
|
|
1424
|
+
units.push({
|
|
1425
|
+
testCaseItem,
|
|
1426
|
+
rerunIndex: r + 1,
|
|
1427
|
+
rerunTotal
|
|
1428
|
+
});
|
|
1429
|
+
}
|
|
1430
|
+
}
|
|
1431
|
+
return units;
|
|
1432
|
+
}
|
|
1326
1433
|
function nowIsoForFile() {
|
|
1327
1434
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1328
1435
|
}
|
|
@@ -1332,157 +1439,171 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
|
|
|
1332
1439
|
`${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
|
|
1333
1440
|
);
|
|
1334
1441
|
}
|
|
1335
|
-
function
|
|
1442
|
+
function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef, testCaseResultsRef) {
|
|
1443
|
+
const { testCaseItem, rerunIndex, rerunTotal } = unit;
|
|
1336
1444
|
return Effect.gen(function* () {
|
|
1337
|
-
const
|
|
1338
|
-
const
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1401
|
-
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1402
|
-
testCaseError = result.message;
|
|
1403
|
-
evaluatorScores.push({
|
|
1404
|
-
evaluatorId,
|
|
1405
|
-
scores: [],
|
|
1406
|
-
passed: false,
|
|
1407
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1408
|
-
});
|
|
1409
|
-
continue;
|
|
1410
|
-
}
|
|
1411
|
-
const { scores, metrics } = normalizeResult(result);
|
|
1412
|
-
const passed2 = computeEvaluatorPassed(evaluator, result, scores);
|
|
1413
|
-
evaluatorScores.push({
|
|
1414
|
-
evaluatorId,
|
|
1415
|
-
scores,
|
|
1416
|
-
passed: passed2,
|
|
1417
|
-
metrics,
|
|
1418
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1419
|
-
});
|
|
1420
|
-
} catch (error) {
|
|
1421
|
-
if (error instanceof Error) {
|
|
1422
|
-
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1423
|
-
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1424
|
-
}
|
|
1425
|
-
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1445
|
+
const evaluatorRunId = `run-${randomUUID()}`;
|
|
1446
|
+
const started = Date.now();
|
|
1447
|
+
const startedEvaluations = yield* Ref.modify(startedRef, (n) => [
|
|
1448
|
+
n + 1,
|
|
1449
|
+
n + 1
|
|
1450
|
+
]);
|
|
1451
|
+
yield* publishEvent({
|
|
1452
|
+
type: "TestCaseStarted",
|
|
1453
|
+
runId: task.runId,
|
|
1454
|
+
testCaseId: testCaseItem.id,
|
|
1455
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1456
|
+
startedTestCases: startedEvaluations,
|
|
1457
|
+
totalTestCases: totalEvaluations,
|
|
1458
|
+
rerunIndex,
|
|
1459
|
+
rerunTotal
|
|
1460
|
+
});
|
|
1461
|
+
const evaluatorScores = [];
|
|
1462
|
+
let testCaseError;
|
|
1463
|
+
const output = readOutput(testCaseItem.testCase);
|
|
1464
|
+
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1465
|
+
const evaluateFn = evaluator.getEvaluateFn();
|
|
1466
|
+
if (!evaluateFn) {
|
|
1467
|
+
continue;
|
|
1468
|
+
}
|
|
1469
|
+
const logs = [];
|
|
1470
|
+
const logDiff = (expected, actual, options) => {
|
|
1471
|
+
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1472
|
+
};
|
|
1473
|
+
const log = (message, options) => {
|
|
1474
|
+
logs.push(createLogEntry(message, options));
|
|
1475
|
+
};
|
|
1476
|
+
const createError = (message, options) => {
|
|
1477
|
+
const entry = createLogEntry(message, options);
|
|
1478
|
+
const error = message instanceof Error ? message : new Error(entry.message);
|
|
1479
|
+
error[evaluatorErrorLogEntryKey] = entry;
|
|
1480
|
+
return error;
|
|
1481
|
+
};
|
|
1482
|
+
try {
|
|
1483
|
+
const ctx = yield* Effect.promise(
|
|
1484
|
+
() => Promise.resolve(evaluator.resolveContext())
|
|
1485
|
+
);
|
|
1486
|
+
const result = yield* Effect.promise(
|
|
1487
|
+
() => Promise.resolve().then(
|
|
1488
|
+
() => evaluateFn({
|
|
1489
|
+
input: testCaseItem.testCase.getInput(),
|
|
1490
|
+
ctx,
|
|
1491
|
+
output,
|
|
1492
|
+
meta: {
|
|
1493
|
+
triggerId: task.triggerId,
|
|
1494
|
+
runId: evaluatorRunId,
|
|
1495
|
+
datasetId: task.datasetId
|
|
1496
|
+
},
|
|
1497
|
+
logDiff,
|
|
1498
|
+
log,
|
|
1499
|
+
createError
|
|
1500
|
+
})
|
|
1501
|
+
)
|
|
1502
|
+
);
|
|
1503
|
+
if (result instanceof Error) {
|
|
1504
|
+
const evaluatorError = result;
|
|
1505
|
+
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1506
|
+
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1507
|
+
testCaseError = result.message;
|
|
1426
1508
|
evaluatorScores.push({
|
|
1427
1509
|
evaluatorId,
|
|
1428
1510
|
scores: [],
|
|
1429
1511
|
passed: false,
|
|
1430
1512
|
logs: logs.length > 0 ? logs : void 0
|
|
1431
1513
|
});
|
|
1514
|
+
continue;
|
|
1515
|
+
}
|
|
1516
|
+
const { scores, metrics } = normalizeResult(result);
|
|
1517
|
+
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
1518
|
+
evaluatorScores.push({
|
|
1519
|
+
evaluatorId,
|
|
1520
|
+
scores,
|
|
1521
|
+
passed,
|
|
1522
|
+
metrics,
|
|
1523
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1524
|
+
});
|
|
1525
|
+
} catch (error) {
|
|
1526
|
+
if (error instanceof Error) {
|
|
1527
|
+
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1528
|
+
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1432
1529
|
}
|
|
1530
|
+
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1531
|
+
evaluatorScores.push({
|
|
1532
|
+
evaluatorId,
|
|
1533
|
+
scores: [],
|
|
1534
|
+
passed: false,
|
|
1535
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1536
|
+
});
|
|
1433
1537
|
}
|
|
1434
|
-
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1435
|
-
rerunPassed.push(rerunPassedThis);
|
|
1436
|
-
const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
|
|
1437
|
-
n + 1,
|
|
1438
|
-
n + 1
|
|
1439
|
-
]);
|
|
1440
|
-
const progressEvent = {
|
|
1441
|
-
type: "TestCaseProgress",
|
|
1442
|
-
runId: task.runId,
|
|
1443
|
-
testCaseId: testCaseItem.id,
|
|
1444
|
-
testCaseName: testCaseItem.testCase.getName(),
|
|
1445
|
-
completedTestCases: completedEvaluations,
|
|
1446
|
-
totalTestCases: totalEvaluations,
|
|
1447
|
-
rerunIndex: r + 1,
|
|
1448
|
-
rerunTotal: reruns,
|
|
1449
|
-
passed: rerunPassedThis,
|
|
1450
|
-
durationMs: Date.now() - started,
|
|
1451
|
-
evaluatorScores,
|
|
1452
|
-
output,
|
|
1453
|
-
errorMessage: testCaseError
|
|
1454
|
-
};
|
|
1455
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1456
|
-
...snapshot,
|
|
1457
|
-
completedTestCases: completedEvaluations
|
|
1458
|
-
}));
|
|
1459
|
-
yield* publishEvent(progressEvent);
|
|
1460
|
-
yield* Queue.offer(persistenceQueue, {
|
|
1461
|
-
runId: task.runId,
|
|
1462
|
-
artifactPath: task.snapshot.artifactPath,
|
|
1463
|
-
payload: progressEvent
|
|
1464
|
-
});
|
|
1465
1538
|
}
|
|
1466
|
-
const
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
yield* Ref.update(failedRef, (n) => n + 1);
|
|
1471
|
-
}
|
|
1472
|
-
const [passed, failed] = yield* Effect.all([
|
|
1473
|
-
Ref.get(passedRef),
|
|
1474
|
-
Ref.get(failedRef)
|
|
1539
|
+
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1540
|
+
const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
|
|
1541
|
+
n + 1,
|
|
1542
|
+
n + 1
|
|
1475
1543
|
]);
|
|
1476
|
-
|
|
1544
|
+
const progressEvent = {
|
|
1545
|
+
type: "TestCaseProgress",
|
|
1546
|
+
runId: task.runId,
|
|
1547
|
+
testCaseId: testCaseItem.id,
|
|
1548
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1549
|
+
completedTestCases: completedEvaluations,
|
|
1550
|
+
totalTestCases: totalEvaluations,
|
|
1551
|
+
rerunIndex,
|
|
1552
|
+
rerunTotal,
|
|
1553
|
+
passed: rerunPassedThis,
|
|
1554
|
+
durationMs: Date.now() - started,
|
|
1555
|
+
evaluatorScores,
|
|
1556
|
+
output,
|
|
1557
|
+
errorMessage: testCaseError
|
|
1558
|
+
};
|
|
1559
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1477
1560
|
...snapshot,
|
|
1478
|
-
|
|
1479
|
-
failedTestCases: failed
|
|
1561
|
+
completedTestCases: completedEvaluations
|
|
1480
1562
|
}));
|
|
1563
|
+
yield* publishEvent(progressEvent);
|
|
1564
|
+
yield* Queue.offer(persistenceQueue, {
|
|
1565
|
+
runId: task.runId,
|
|
1566
|
+
artifactPath: task.snapshot.artifactPath,
|
|
1567
|
+
payload: progressEvent
|
|
1568
|
+
});
|
|
1569
|
+
const testCaseCompleted = yield* Ref.modify(
|
|
1570
|
+
testCaseResultsRef,
|
|
1571
|
+
(map) => {
|
|
1572
|
+
const key = testCaseItem.id;
|
|
1573
|
+
const existing = map.get(key) ?? { completedCount: 0, results: [] };
|
|
1574
|
+
const newResults = [...existing.results, rerunPassedThis];
|
|
1575
|
+
const newCompletedCount = existing.completedCount + 1;
|
|
1576
|
+
const isLast = newCompletedCount === rerunTotal;
|
|
1577
|
+
const newMap = new Map(map);
|
|
1578
|
+
newMap.set(key, {
|
|
1579
|
+
completedCount: newCompletedCount,
|
|
1580
|
+
results: newResults
|
|
1581
|
+
});
|
|
1582
|
+
const outcome = isLast ? newResults.every(Boolean) : null;
|
|
1583
|
+
return [outcome, newMap];
|
|
1584
|
+
}
|
|
1585
|
+
);
|
|
1586
|
+
if (testCaseCompleted !== null) {
|
|
1587
|
+
if (testCaseCompleted) {
|
|
1588
|
+
yield* Ref.update(passedRef, (n) => n + 1);
|
|
1589
|
+
} else {
|
|
1590
|
+
yield* Ref.update(failedRef, (n) => n + 1);
|
|
1591
|
+
}
|
|
1592
|
+
const [passed, failed] = yield* Effect.all([
|
|
1593
|
+
Ref.get(passedRef),
|
|
1594
|
+
Ref.get(failedRef)
|
|
1595
|
+
]);
|
|
1596
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1597
|
+
...snapshot,
|
|
1598
|
+
passedTestCases: passed,
|
|
1599
|
+
failedTestCases: failed
|
|
1600
|
+
}));
|
|
1601
|
+
}
|
|
1481
1602
|
});
|
|
1482
1603
|
}
|
|
1483
1604
|
var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => Effect.gen(function* () {
|
|
1484
1605
|
const startedAt = Date.now();
|
|
1485
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1606
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1486
1607
|
...snapshot,
|
|
1487
1608
|
status: "running",
|
|
1488
1609
|
startedAt
|
|
@@ -1501,9 +1622,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1501
1622
|
const startedRef = yield* Ref.make(0);
|
|
1502
1623
|
const passedRef = yield* Ref.make(0);
|
|
1503
1624
|
const failedRef = yield* Ref.make(0);
|
|
1504
|
-
const
|
|
1625
|
+
const testCaseResultsRef = yield* Ref.make(
|
|
1626
|
+
/* @__PURE__ */ new Map()
|
|
1627
|
+
);
|
|
1628
|
+
const evaluationUnits = buildEvaluationUnits(task.testCases);
|
|
1629
|
+
const processEvaluation = (unit) => processOneEvaluation(
|
|
1505
1630
|
task,
|
|
1506
|
-
|
|
1631
|
+
unit,
|
|
1507
1632
|
totalEvaluations,
|
|
1508
1633
|
publishEvent,
|
|
1509
1634
|
persistenceQueue,
|
|
@@ -1511,11 +1636,12 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1511
1636
|
startedRef,
|
|
1512
1637
|
completedRef,
|
|
1513
1638
|
passedRef,
|
|
1514
|
-
failedRef
|
|
1639
|
+
failedRef,
|
|
1640
|
+
testCaseResultsRef
|
|
1515
1641
|
);
|
|
1516
1642
|
yield* Effect.forEach(
|
|
1517
|
-
|
|
1518
|
-
|
|
1643
|
+
evaluationUnits,
|
|
1644
|
+
processEvaluation,
|
|
1519
1645
|
maxConcurrency > 1 ? { concurrency: maxConcurrency } : void 0
|
|
1520
1646
|
);
|
|
1521
1647
|
const [completedEvaluations, passedUniqueTestCases, failedUniqueTestCases] = yield* Effect.all([
|
|
@@ -1533,7 +1659,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1533
1659
|
totalTestCases: task.testCases.length,
|
|
1534
1660
|
artifactPath: task.snapshot.artifactPath
|
|
1535
1661
|
};
|
|
1536
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1662
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1537
1663
|
...snapshot,
|
|
1538
1664
|
status: "completed",
|
|
1539
1665
|
completedTestCases: completedEvaluations,
|
|
@@ -1816,7 +1942,9 @@ var EffectRunner = class {
|
|
|
1816
1942
|
this.persistenceQueue = Effect.runSync(
|
|
1817
1943
|
Queue.unbounded()
|
|
1818
1944
|
);
|
|
1819
|
-
this.
|
|
1945
|
+
this.snapshotsRef = Effect.runSync(
|
|
1946
|
+
Ref.make(/* @__PURE__ */ new Map())
|
|
1947
|
+
);
|
|
1820
1948
|
this.listeners = /* @__PURE__ */ new Set();
|
|
1821
1949
|
this.datasetsById = /* @__PURE__ */ new Map();
|
|
1822
1950
|
this.evaluatorsById = /* @__PURE__ */ new Map();
|
|
@@ -1919,7 +2047,13 @@ var EffectRunner = class {
|
|
|
1919
2047
|
status: "queued",
|
|
1920
2048
|
artifactPath
|
|
1921
2049
|
};
|
|
1922
|
-
|
|
2050
|
+
await Effect.runPromise(
|
|
2051
|
+
Ref.update(this.snapshotsRef, (map) => {
|
|
2052
|
+
const next = new Map(map);
|
|
2053
|
+
next.set(runId, snapshot);
|
|
2054
|
+
return next;
|
|
2055
|
+
})
|
|
2056
|
+
);
|
|
1923
2057
|
const queuedEvent = {
|
|
1924
2058
|
type: "RunQueued",
|
|
1925
2059
|
runId,
|
|
@@ -1960,12 +2094,12 @@ var EffectRunner = class {
|
|
|
1960
2094
|
};
|
|
1961
2095
|
}
|
|
1962
2096
|
getRunSnapshot(runId) {
|
|
1963
|
-
return this.
|
|
2097
|
+
return Effect.runSync(Ref.get(this.snapshotsRef)).get(runId);
|
|
1964
2098
|
}
|
|
1965
2099
|
getAllRunSnapshots() {
|
|
1966
|
-
return Array.from(
|
|
1967
|
-
(
|
|
1968
|
-
);
|
|
2100
|
+
return Array.from(
|
|
2101
|
+
Effect.runSync(Ref.get(this.snapshotsRef)).values()
|
|
2102
|
+
).sort((a, b) => b.queuedAt - a.queuedAt);
|
|
1969
2103
|
}
|
|
1970
2104
|
async loadRunSnapshotsFromArtifacts() {
|
|
1971
2105
|
return loadRunSnapshotsFromArtifacts(this.config);
|
|
@@ -1994,11 +2128,15 @@ var EffectRunner = class {
|
|
|
1994
2128
|
);
|
|
1995
2129
|
}
|
|
1996
2130
|
updateSnapshot(runId, updater) {
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2131
|
+
return Ref.modify(this.snapshotsRef, (map) => {
|
|
2132
|
+
const existing = map.get(runId);
|
|
2133
|
+
if (!existing) {
|
|
2134
|
+
return [void 0, map];
|
|
2135
|
+
}
|
|
2136
|
+
const next = new Map(map);
|
|
2137
|
+
next.set(runId, updater(existing));
|
|
2138
|
+
return [void 0, next];
|
|
2139
|
+
}).pipe(Effect.asVoid);
|
|
2002
2140
|
}
|
|
2003
2141
|
publishEvent(event) {
|
|
2004
2142
|
return Effect.sync(() => {
|