@m4trix/evals 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +352 -184
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +350 -185
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +294 -155
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +294 -156
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +296 -155
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +1 -2
- package/dist/index.js +294 -156
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/cli.cjs
CHANGED
|
@@ -13,7 +13,8 @@ var fs = require('fs');
|
|
|
13
13
|
var jitiModule = require('jiti');
|
|
14
14
|
var promises = require('fs/promises');
|
|
15
15
|
var url = require('url');
|
|
16
|
-
var
|
|
16
|
+
var diff = require('diff');
|
|
17
|
+
var stringify = require('fast-json-stable-stringify');
|
|
17
18
|
|
|
18
19
|
var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
|
|
19
20
|
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
@@ -38,6 +39,7 @@ function _interopNamespace(e) {
|
|
|
38
39
|
|
|
39
40
|
var React2__default = /*#__PURE__*/_interopDefault(React2);
|
|
40
41
|
var jitiModule__namespace = /*#__PURE__*/_interopNamespace(jitiModule);
|
|
42
|
+
var stringify__default = /*#__PURE__*/_interopDefault(stringify);
|
|
41
43
|
|
|
42
44
|
var SEP = " ";
|
|
43
45
|
var ARROW = "\u203A";
|
|
@@ -1004,10 +1006,102 @@ async function collectTestCasesFromFiles(config) {
|
|
|
1004
1006
|
);
|
|
1005
1007
|
return found.flat();
|
|
1006
1008
|
}
|
|
1009
|
+
function preprocessForDiff(value, options) {
|
|
1010
|
+
if (options?.sort && Array.isArray(value)) {
|
|
1011
|
+
return [...value].sort((a, b) => {
|
|
1012
|
+
const aStr = stringify__default.default(preprocessForDiff(a, options));
|
|
1013
|
+
const bStr = stringify__default.default(preprocessForDiff(b, options));
|
|
1014
|
+
return aStr.localeCompare(bStr);
|
|
1015
|
+
}).map((item) => preprocessForDiff(item, options));
|
|
1016
|
+
}
|
|
1017
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value) && options?.excludeKeys) {
|
|
1018
|
+
const keys = Array.isArray(options.excludeKeys) ? options.excludeKeys : options.excludeKeys.split(",").map((k) => k.trim());
|
|
1019
|
+
const filtered = {};
|
|
1020
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1021
|
+
if (!keys.includes(k)) {
|
|
1022
|
+
filtered[k] = preprocessForDiff(v, options);
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
return filtered;
|
|
1026
|
+
}
|
|
1027
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value)) {
|
|
1028
|
+
const result = {};
|
|
1029
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1030
|
+
result[k] = preprocessForDiff(v, options);
|
|
1031
|
+
}
|
|
1032
|
+
return result;
|
|
1033
|
+
}
|
|
1034
|
+
if (typeof value === "number" && options?.precision !== void 0) {
|
|
1035
|
+
return Number(value.toFixed(options.precision));
|
|
1036
|
+
}
|
|
1037
|
+
return value;
|
|
1038
|
+
}
|
|
1039
|
+
function toPrettyJson(value) {
|
|
1040
|
+
const str = stringify__default.default(value);
|
|
1041
|
+
try {
|
|
1042
|
+
const parsed = JSON.parse(str);
|
|
1043
|
+
return JSON.stringify(parsed, null, 2);
|
|
1044
|
+
} catch {
|
|
1045
|
+
return str;
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
function formatDiffParts(parts) {
|
|
1049
|
+
const lines = [];
|
|
1050
|
+
for (const part of parts) {
|
|
1051
|
+
const prefix = part.added ? "+ " : part.removed ? "- " : "";
|
|
1052
|
+
const partLines = part.value.split("\n");
|
|
1053
|
+
for (let i = 0; i < partLines.length; i++) {
|
|
1054
|
+
const line = partLines[i];
|
|
1055
|
+
if (i === partLines.length - 1 && line === "")
|
|
1056
|
+
continue;
|
|
1057
|
+
lines.push(prefix + line);
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
return lines.join("\n");
|
|
1061
|
+
}
|
|
1007
1062
|
function createDiffString(expected, actual, diffOptions) {
|
|
1008
|
-
const
|
|
1009
|
-
const
|
|
1010
|
-
|
|
1063
|
+
const expectedProcessed = preprocessForDiff(expected, diffOptions);
|
|
1064
|
+
const actualProcessed = preprocessForDiff(actual, diffOptions);
|
|
1065
|
+
if (diffOptions?.keysOnly) {
|
|
1066
|
+
const expectedKeys = JSON.stringify(
|
|
1067
|
+
extractKeys(expectedProcessed),
|
|
1068
|
+
null,
|
|
1069
|
+
2
|
|
1070
|
+
);
|
|
1071
|
+
const actualKeys = JSON.stringify(
|
|
1072
|
+
extractKeys(actualProcessed),
|
|
1073
|
+
null,
|
|
1074
|
+
2
|
|
1075
|
+
);
|
|
1076
|
+
const parts2 = diff.diffLines(expectedKeys, actualKeys);
|
|
1077
|
+
return formatDiffParts(parts2);
|
|
1078
|
+
}
|
|
1079
|
+
const expectedStr = toPrettyJson(expectedProcessed);
|
|
1080
|
+
const actualStr = toPrettyJson(actualProcessed);
|
|
1081
|
+
if (expectedStr === actualStr) {
|
|
1082
|
+
return "";
|
|
1083
|
+
}
|
|
1084
|
+
const parts = diff.diffLines(expectedStr, actualStr);
|
|
1085
|
+
if (diffOptions?.outputNewOnly) {
|
|
1086
|
+
const filtered = parts.filter(
|
|
1087
|
+
(p) => p.added === true
|
|
1088
|
+
);
|
|
1089
|
+
return formatDiffParts(filtered);
|
|
1090
|
+
}
|
|
1091
|
+
return formatDiffParts(parts);
|
|
1092
|
+
}
|
|
1093
|
+
function extractKeys(value) {
|
|
1094
|
+
if (value === null || typeof value !== "object") {
|
|
1095
|
+
return "\xB7";
|
|
1096
|
+
}
|
|
1097
|
+
if (Array.isArray(value)) {
|
|
1098
|
+
return value.map(extractKeys);
|
|
1099
|
+
}
|
|
1100
|
+
const result = {};
|
|
1101
|
+
for (const [k, v] of Object.entries(value)) {
|
|
1102
|
+
result[k] = extractKeys(v);
|
|
1103
|
+
}
|
|
1104
|
+
return result;
|
|
1011
1105
|
}
|
|
1012
1106
|
function formatLogMessage(msg) {
|
|
1013
1107
|
if (typeof msg === "string")
|
|
@@ -1349,6 +1443,20 @@ function readOutput(testCase) {
|
|
|
1349
1443
|
}
|
|
1350
1444
|
return candidate.getOutput();
|
|
1351
1445
|
}
|
|
1446
|
+
function buildEvaluationUnits(testCases) {
|
|
1447
|
+
const units = [];
|
|
1448
|
+
for (const testCaseItem of testCases) {
|
|
1449
|
+
const rerunTotal = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
|
|
1450
|
+
for (let r = 0; r < rerunTotal; r++) {
|
|
1451
|
+
units.push({
|
|
1452
|
+
testCaseItem,
|
|
1453
|
+
rerunIndex: r + 1,
|
|
1454
|
+
rerunTotal
|
|
1455
|
+
});
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
return units;
|
|
1459
|
+
}
|
|
1352
1460
|
function nowIsoForFile() {
|
|
1353
1461
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1354
1462
|
}
|
|
@@ -1358,157 +1466,171 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
|
|
|
1358
1466
|
`${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
|
|
1359
1467
|
);
|
|
1360
1468
|
}
|
|
1361
|
-
function
|
|
1469
|
+
function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef, testCaseResultsRef) {
|
|
1470
|
+
const { testCaseItem, rerunIndex, rerunTotal } = unit;
|
|
1362
1471
|
return effect.Effect.gen(function* () {
|
|
1363
|
-
const
|
|
1364
|
-
const
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1427
|
-
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1428
|
-
testCaseError = result.message;
|
|
1429
|
-
evaluatorScores.push({
|
|
1430
|
-
evaluatorId,
|
|
1431
|
-
scores: [],
|
|
1432
|
-
passed: false,
|
|
1433
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1434
|
-
});
|
|
1435
|
-
continue;
|
|
1436
|
-
}
|
|
1437
|
-
const { scores, metrics } = normalizeResult(result);
|
|
1438
|
-
const passed2 = computeEvaluatorPassed(evaluator, result, scores);
|
|
1439
|
-
evaluatorScores.push({
|
|
1440
|
-
evaluatorId,
|
|
1441
|
-
scores,
|
|
1442
|
-
passed: passed2,
|
|
1443
|
-
metrics,
|
|
1444
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1445
|
-
});
|
|
1446
|
-
} catch (error) {
|
|
1447
|
-
if (error instanceof Error) {
|
|
1448
|
-
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1449
|
-
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1450
|
-
}
|
|
1451
|
-
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1472
|
+
const evaluatorRunId = `run-${crypto.randomUUID()}`;
|
|
1473
|
+
const started = Date.now();
|
|
1474
|
+
const startedEvaluations = yield* effect.Ref.modify(startedRef, (n) => [
|
|
1475
|
+
n + 1,
|
|
1476
|
+
n + 1
|
|
1477
|
+
]);
|
|
1478
|
+
yield* publishEvent({
|
|
1479
|
+
type: "TestCaseStarted",
|
|
1480
|
+
runId: task.runId,
|
|
1481
|
+
testCaseId: testCaseItem.id,
|
|
1482
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1483
|
+
startedTestCases: startedEvaluations,
|
|
1484
|
+
totalTestCases: totalEvaluations,
|
|
1485
|
+
rerunIndex,
|
|
1486
|
+
rerunTotal
|
|
1487
|
+
});
|
|
1488
|
+
const evaluatorScores = [];
|
|
1489
|
+
let testCaseError;
|
|
1490
|
+
const output = readOutput(testCaseItem.testCase);
|
|
1491
|
+
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1492
|
+
const evaluateFn = evaluator.getEvaluateFn();
|
|
1493
|
+
if (!evaluateFn) {
|
|
1494
|
+
continue;
|
|
1495
|
+
}
|
|
1496
|
+
const logs = [];
|
|
1497
|
+
const logDiff = (expected, actual, options) => {
|
|
1498
|
+
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1499
|
+
};
|
|
1500
|
+
const log = (message, options) => {
|
|
1501
|
+
logs.push(createLogEntry(message, options));
|
|
1502
|
+
};
|
|
1503
|
+
const createError = (message, options) => {
|
|
1504
|
+
const entry = createLogEntry(message, options);
|
|
1505
|
+
const error = message instanceof Error ? message : new Error(entry.message);
|
|
1506
|
+
error[evaluatorErrorLogEntryKey] = entry;
|
|
1507
|
+
return error;
|
|
1508
|
+
};
|
|
1509
|
+
try {
|
|
1510
|
+
const ctx = yield* effect.Effect.promise(
|
|
1511
|
+
() => Promise.resolve(evaluator.resolveContext())
|
|
1512
|
+
);
|
|
1513
|
+
const result = yield* effect.Effect.promise(
|
|
1514
|
+
() => Promise.resolve().then(
|
|
1515
|
+
() => evaluateFn({
|
|
1516
|
+
input: testCaseItem.testCase.getInput(),
|
|
1517
|
+
ctx,
|
|
1518
|
+
output,
|
|
1519
|
+
meta: {
|
|
1520
|
+
triggerId: task.triggerId,
|
|
1521
|
+
runId: evaluatorRunId,
|
|
1522
|
+
datasetId: task.datasetId
|
|
1523
|
+
},
|
|
1524
|
+
logDiff,
|
|
1525
|
+
log,
|
|
1526
|
+
createError
|
|
1527
|
+
})
|
|
1528
|
+
)
|
|
1529
|
+
);
|
|
1530
|
+
if (result instanceof Error) {
|
|
1531
|
+
const evaluatorError = result;
|
|
1532
|
+
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1533
|
+
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1534
|
+
testCaseError = result.message;
|
|
1452
1535
|
evaluatorScores.push({
|
|
1453
1536
|
evaluatorId,
|
|
1454
1537
|
scores: [],
|
|
1455
1538
|
passed: false,
|
|
1456
1539
|
logs: logs.length > 0 ? logs : void 0
|
|
1457
1540
|
});
|
|
1541
|
+
continue;
|
|
1542
|
+
}
|
|
1543
|
+
const { scores, metrics } = normalizeResult(result);
|
|
1544
|
+
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
1545
|
+
evaluatorScores.push({
|
|
1546
|
+
evaluatorId,
|
|
1547
|
+
scores,
|
|
1548
|
+
passed,
|
|
1549
|
+
metrics,
|
|
1550
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1551
|
+
});
|
|
1552
|
+
} catch (error) {
|
|
1553
|
+
if (error instanceof Error) {
|
|
1554
|
+
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1555
|
+
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1458
1556
|
}
|
|
1557
|
+
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1558
|
+
evaluatorScores.push({
|
|
1559
|
+
evaluatorId,
|
|
1560
|
+
scores: [],
|
|
1561
|
+
passed: false,
|
|
1562
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1563
|
+
});
|
|
1459
1564
|
}
|
|
1460
|
-
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1461
|
-
rerunPassed.push(rerunPassedThis);
|
|
1462
|
-
const completedEvaluations = yield* effect.Ref.modify(completedRef, (n) => [
|
|
1463
|
-
n + 1,
|
|
1464
|
-
n + 1
|
|
1465
|
-
]);
|
|
1466
|
-
const progressEvent = {
|
|
1467
|
-
type: "TestCaseProgress",
|
|
1468
|
-
runId: task.runId,
|
|
1469
|
-
testCaseId: testCaseItem.id,
|
|
1470
|
-
testCaseName: testCaseItem.testCase.getName(),
|
|
1471
|
-
completedTestCases: completedEvaluations,
|
|
1472
|
-
totalTestCases: totalEvaluations,
|
|
1473
|
-
rerunIndex: r + 1,
|
|
1474
|
-
rerunTotal: reruns,
|
|
1475
|
-
passed: rerunPassedThis,
|
|
1476
|
-
durationMs: Date.now() - started,
|
|
1477
|
-
evaluatorScores,
|
|
1478
|
-
output,
|
|
1479
|
-
errorMessage: testCaseError
|
|
1480
|
-
};
|
|
1481
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1482
|
-
...snapshot,
|
|
1483
|
-
completedTestCases: completedEvaluations
|
|
1484
|
-
}));
|
|
1485
|
-
yield* publishEvent(progressEvent);
|
|
1486
|
-
yield* effect.Queue.offer(persistenceQueue, {
|
|
1487
|
-
runId: task.runId,
|
|
1488
|
-
artifactPath: task.snapshot.artifactPath,
|
|
1489
|
-
payload: progressEvent
|
|
1490
|
-
});
|
|
1491
1565
|
}
|
|
1492
|
-
const
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
yield* effect.Ref.update(failedRef, (n) => n + 1);
|
|
1497
|
-
}
|
|
1498
|
-
const [passed, failed] = yield* effect.Effect.all([
|
|
1499
|
-
effect.Ref.get(passedRef),
|
|
1500
|
-
effect.Ref.get(failedRef)
|
|
1566
|
+
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1567
|
+
const completedEvaluations = yield* effect.Ref.modify(completedRef, (n) => [
|
|
1568
|
+
n + 1,
|
|
1569
|
+
n + 1
|
|
1501
1570
|
]);
|
|
1502
|
-
|
|
1571
|
+
const progressEvent = {
|
|
1572
|
+
type: "TestCaseProgress",
|
|
1573
|
+
runId: task.runId,
|
|
1574
|
+
testCaseId: testCaseItem.id,
|
|
1575
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1576
|
+
completedTestCases: completedEvaluations,
|
|
1577
|
+
totalTestCases: totalEvaluations,
|
|
1578
|
+
rerunIndex,
|
|
1579
|
+
rerunTotal,
|
|
1580
|
+
passed: rerunPassedThis,
|
|
1581
|
+
durationMs: Date.now() - started,
|
|
1582
|
+
evaluatorScores,
|
|
1583
|
+
output,
|
|
1584
|
+
errorMessage: testCaseError
|
|
1585
|
+
};
|
|
1586
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1503
1587
|
...snapshot,
|
|
1504
|
-
|
|
1505
|
-
failedTestCases: failed
|
|
1588
|
+
completedTestCases: completedEvaluations
|
|
1506
1589
|
}));
|
|
1590
|
+
yield* publishEvent(progressEvent);
|
|
1591
|
+
yield* effect.Queue.offer(persistenceQueue, {
|
|
1592
|
+
runId: task.runId,
|
|
1593
|
+
artifactPath: task.snapshot.artifactPath,
|
|
1594
|
+
payload: progressEvent
|
|
1595
|
+
});
|
|
1596
|
+
const testCaseCompleted = yield* effect.Ref.modify(
|
|
1597
|
+
testCaseResultsRef,
|
|
1598
|
+
(map) => {
|
|
1599
|
+
const key = testCaseItem.id;
|
|
1600
|
+
const existing = map.get(key) ?? { completedCount: 0, results: [] };
|
|
1601
|
+
const newResults = [...existing.results, rerunPassedThis];
|
|
1602
|
+
const newCompletedCount = existing.completedCount + 1;
|
|
1603
|
+
const isLast = newCompletedCount === rerunTotal;
|
|
1604
|
+
const newMap = new Map(map);
|
|
1605
|
+
newMap.set(key, {
|
|
1606
|
+
completedCount: newCompletedCount,
|
|
1607
|
+
results: newResults
|
|
1608
|
+
});
|
|
1609
|
+
const outcome = isLast ? newResults.every(Boolean) : null;
|
|
1610
|
+
return [outcome, newMap];
|
|
1611
|
+
}
|
|
1612
|
+
);
|
|
1613
|
+
if (testCaseCompleted !== null) {
|
|
1614
|
+
if (testCaseCompleted) {
|
|
1615
|
+
yield* effect.Ref.update(passedRef, (n) => n + 1);
|
|
1616
|
+
} else {
|
|
1617
|
+
yield* effect.Ref.update(failedRef, (n) => n + 1);
|
|
1618
|
+
}
|
|
1619
|
+
const [passed, failed] = yield* effect.Effect.all([
|
|
1620
|
+
effect.Ref.get(passedRef),
|
|
1621
|
+
effect.Ref.get(failedRef)
|
|
1622
|
+
]);
|
|
1623
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1624
|
+
...snapshot,
|
|
1625
|
+
passedTestCases: passed,
|
|
1626
|
+
failedTestCases: failed
|
|
1627
|
+
}));
|
|
1628
|
+
}
|
|
1507
1629
|
});
|
|
1508
1630
|
}
|
|
1509
1631
|
var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => effect.Effect.gen(function* () {
|
|
1510
1632
|
const startedAt = Date.now();
|
|
1511
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1633
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1512
1634
|
...snapshot,
|
|
1513
1635
|
status: "running",
|
|
1514
1636
|
startedAt
|
|
@@ -1527,9 +1649,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
1527
1649
|
const startedRef = yield* effect.Ref.make(0);
|
|
1528
1650
|
const passedRef = yield* effect.Ref.make(0);
|
|
1529
1651
|
const failedRef = yield* effect.Ref.make(0);
|
|
1530
|
-
const
|
|
1652
|
+
const testCaseResultsRef = yield* effect.Ref.make(
|
|
1653
|
+
/* @__PURE__ */ new Map()
|
|
1654
|
+
);
|
|
1655
|
+
const evaluationUnits = buildEvaluationUnits(task.testCases);
|
|
1656
|
+
const processEvaluation = (unit) => processOneEvaluation(
|
|
1531
1657
|
task,
|
|
1532
|
-
|
|
1658
|
+
unit,
|
|
1533
1659
|
totalEvaluations,
|
|
1534
1660
|
publishEvent,
|
|
1535
1661
|
persistenceQueue,
|
|
@@ -1537,11 +1663,12 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
1537
1663
|
startedRef,
|
|
1538
1664
|
completedRef,
|
|
1539
1665
|
passedRef,
|
|
1540
|
-
failedRef
|
|
1666
|
+
failedRef,
|
|
1667
|
+
testCaseResultsRef
|
|
1541
1668
|
);
|
|
1542
1669
|
yield* effect.Effect.forEach(
|
|
1543
|
-
|
|
1544
|
-
|
|
1670
|
+
evaluationUnits,
|
|
1671
|
+
processEvaluation,
|
|
1545
1672
|
maxConcurrency > 1 ? { concurrency: maxConcurrency } : void 0
|
|
1546
1673
|
);
|
|
1547
1674
|
const [completedEvaluations, passedUniqueTestCases, failedUniqueTestCases] = yield* effect.Effect.all([
|
|
@@ -1559,7 +1686,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
1559
1686
|
totalTestCases: task.testCases.length,
|
|
1560
1687
|
artifactPath: task.snapshot.artifactPath
|
|
1561
1688
|
};
|
|
1562
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1689
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1563
1690
|
...snapshot,
|
|
1564
1691
|
status: "completed",
|
|
1565
1692
|
completedTestCases: completedEvaluations,
|
|
@@ -1842,7 +1969,9 @@ var EffectRunner = class {
|
|
|
1842
1969
|
this.persistenceQueue = effect.Effect.runSync(
|
|
1843
1970
|
effect.Queue.unbounded()
|
|
1844
1971
|
);
|
|
1845
|
-
this.
|
|
1972
|
+
this.snapshotsRef = effect.Effect.runSync(
|
|
1973
|
+
effect.Ref.make(/* @__PURE__ */ new Map())
|
|
1974
|
+
);
|
|
1846
1975
|
this.listeners = /* @__PURE__ */ new Set();
|
|
1847
1976
|
this.datasetsById = /* @__PURE__ */ new Map();
|
|
1848
1977
|
this.evaluatorsById = /* @__PURE__ */ new Map();
|
|
@@ -1945,7 +2074,13 @@ var EffectRunner = class {
|
|
|
1945
2074
|
status: "queued",
|
|
1946
2075
|
artifactPath
|
|
1947
2076
|
};
|
|
1948
|
-
|
|
2077
|
+
await effect.Effect.runPromise(
|
|
2078
|
+
effect.Ref.update(this.snapshotsRef, (map) => {
|
|
2079
|
+
const next = new Map(map);
|
|
2080
|
+
next.set(runId, snapshot);
|
|
2081
|
+
return next;
|
|
2082
|
+
})
|
|
2083
|
+
);
|
|
1949
2084
|
const queuedEvent = {
|
|
1950
2085
|
type: "RunQueued",
|
|
1951
2086
|
runId,
|
|
@@ -1986,12 +2121,12 @@ var EffectRunner = class {
|
|
|
1986
2121
|
};
|
|
1987
2122
|
}
|
|
1988
2123
|
getRunSnapshot(runId) {
|
|
1989
|
-
return this.
|
|
2124
|
+
return effect.Effect.runSync(effect.Ref.get(this.snapshotsRef)).get(runId);
|
|
1990
2125
|
}
|
|
1991
2126
|
getAllRunSnapshots() {
|
|
1992
|
-
return Array.from(
|
|
1993
|
-
(
|
|
1994
|
-
);
|
|
2127
|
+
return Array.from(
|
|
2128
|
+
effect.Effect.runSync(effect.Ref.get(this.snapshotsRef)).values()
|
|
2129
|
+
).sort((a, b) => b.queuedAt - a.queuedAt);
|
|
1995
2130
|
}
|
|
1996
2131
|
async loadRunSnapshotsFromArtifacts() {
|
|
1997
2132
|
return loadRunSnapshotsFromArtifacts(this.config);
|
|
@@ -2020,11 +2155,15 @@ var EffectRunner = class {
|
|
|
2020
2155
|
);
|
|
2021
2156
|
}
|
|
2022
2157
|
updateSnapshot(runId, updater) {
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2158
|
+
return effect.Ref.modify(this.snapshotsRef, (map) => {
|
|
2159
|
+
const existing = map.get(runId);
|
|
2160
|
+
if (!existing) {
|
|
2161
|
+
return [void 0, map];
|
|
2162
|
+
}
|
|
2163
|
+
const next = new Map(map);
|
|
2164
|
+
next.set(runId, updater(existing));
|
|
2165
|
+
return [void 0, next];
|
|
2166
|
+
}).pipe(effect.Effect.asVoid);
|
|
2028
2167
|
}
|
|
2029
2168
|
publishEvent(event) {
|
|
2030
2169
|
return effect.Effect.sync(() => {
|