@m4trix/evals 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +352 -184
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +350 -185
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +294 -155
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +294 -156
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +296 -155
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +1 -2
- package/dist/index.js +294 -156
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.cjs
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var effect = require('effect');
|
|
4
|
-
var
|
|
4
|
+
var diff = require('diff');
|
|
5
|
+
var stringify = require('fast-json-stable-stringify');
|
|
5
6
|
var crypto = require('crypto');
|
|
6
7
|
var fs = require('fs');
|
|
7
8
|
var path = require('path');
|
|
@@ -10,6 +11,8 @@ var promises = require('fs/promises');
|
|
|
10
11
|
var url = require('url');
|
|
11
12
|
|
|
12
13
|
var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
|
|
14
|
+
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
15
|
+
|
|
13
16
|
function _interopNamespace(e) {
|
|
14
17
|
if (e && e.__esModule) return e;
|
|
15
18
|
var n = Object.create(null);
|
|
@@ -28,6 +31,7 @@ function _interopNamespace(e) {
|
|
|
28
31
|
return Object.freeze(n);
|
|
29
32
|
}
|
|
30
33
|
|
|
34
|
+
var stringify__default = /*#__PURE__*/_interopDefault(stringify);
|
|
31
35
|
var jitiModule__namespace = /*#__PURE__*/_interopNamespace(jitiModule);
|
|
32
36
|
|
|
33
37
|
// src/cli/data.mock.json
|
|
@@ -730,10 +734,102 @@ var binaryScore = Score.of({
|
|
|
730
734
|
},
|
|
731
735
|
aggregateValues: Score.aggregate.all
|
|
732
736
|
});
|
|
737
|
+
function preprocessForDiff(value, options) {
|
|
738
|
+
if (options?.sort && Array.isArray(value)) {
|
|
739
|
+
return [...value].sort((a, b) => {
|
|
740
|
+
const aStr = stringify__default.default(preprocessForDiff(a, options));
|
|
741
|
+
const bStr = stringify__default.default(preprocessForDiff(b, options));
|
|
742
|
+
return aStr.localeCompare(bStr);
|
|
743
|
+
}).map((item) => preprocessForDiff(item, options));
|
|
744
|
+
}
|
|
745
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value) && options?.excludeKeys) {
|
|
746
|
+
const keys = Array.isArray(options.excludeKeys) ? options.excludeKeys : options.excludeKeys.split(",").map((k) => k.trim());
|
|
747
|
+
const filtered = {};
|
|
748
|
+
for (const [k, v] of Object.entries(value)) {
|
|
749
|
+
if (!keys.includes(k)) {
|
|
750
|
+
filtered[k] = preprocessForDiff(v, options);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
return filtered;
|
|
754
|
+
}
|
|
755
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value)) {
|
|
756
|
+
const result = {};
|
|
757
|
+
for (const [k, v] of Object.entries(value)) {
|
|
758
|
+
result[k] = preprocessForDiff(v, options);
|
|
759
|
+
}
|
|
760
|
+
return result;
|
|
761
|
+
}
|
|
762
|
+
if (typeof value === "number" && options?.precision !== void 0) {
|
|
763
|
+
return Number(value.toFixed(options.precision));
|
|
764
|
+
}
|
|
765
|
+
return value;
|
|
766
|
+
}
|
|
767
|
+
function toPrettyJson(value) {
|
|
768
|
+
const str = stringify__default.default(value);
|
|
769
|
+
try {
|
|
770
|
+
const parsed = JSON.parse(str);
|
|
771
|
+
return JSON.stringify(parsed, null, 2);
|
|
772
|
+
} catch {
|
|
773
|
+
return str;
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
function formatDiffParts(parts) {
|
|
777
|
+
const lines = [];
|
|
778
|
+
for (const part of parts) {
|
|
779
|
+
const prefix = part.added ? "+ " : part.removed ? "- " : "";
|
|
780
|
+
const partLines = part.value.split("\n");
|
|
781
|
+
for (let i = 0; i < partLines.length; i++) {
|
|
782
|
+
const line = partLines[i];
|
|
783
|
+
if (i === partLines.length - 1 && line === "")
|
|
784
|
+
continue;
|
|
785
|
+
lines.push(prefix + line);
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
return lines.join("\n");
|
|
789
|
+
}
|
|
733
790
|
function createDiffString(expected, actual, diffOptions) {
|
|
734
|
-
const
|
|
735
|
-
const
|
|
736
|
-
|
|
791
|
+
const expectedProcessed = preprocessForDiff(expected, diffOptions);
|
|
792
|
+
const actualProcessed = preprocessForDiff(actual, diffOptions);
|
|
793
|
+
if (diffOptions?.keysOnly) {
|
|
794
|
+
const expectedKeys = JSON.stringify(
|
|
795
|
+
extractKeys(expectedProcessed),
|
|
796
|
+
null,
|
|
797
|
+
2
|
|
798
|
+
);
|
|
799
|
+
const actualKeys = JSON.stringify(
|
|
800
|
+
extractKeys(actualProcessed),
|
|
801
|
+
null,
|
|
802
|
+
2
|
|
803
|
+
);
|
|
804
|
+
const parts2 = diff.diffLines(expectedKeys, actualKeys);
|
|
805
|
+
return formatDiffParts(parts2);
|
|
806
|
+
}
|
|
807
|
+
const expectedStr = toPrettyJson(expectedProcessed);
|
|
808
|
+
const actualStr = toPrettyJson(actualProcessed);
|
|
809
|
+
if (expectedStr === actualStr) {
|
|
810
|
+
return "";
|
|
811
|
+
}
|
|
812
|
+
const parts = diff.diffLines(expectedStr, actualStr);
|
|
813
|
+
if (diffOptions?.outputNewOnly) {
|
|
814
|
+
const filtered = parts.filter(
|
|
815
|
+
(p) => p.added === true
|
|
816
|
+
);
|
|
817
|
+
return formatDiffParts(filtered);
|
|
818
|
+
}
|
|
819
|
+
return formatDiffParts(parts);
|
|
820
|
+
}
|
|
821
|
+
function extractKeys(value) {
|
|
822
|
+
if (value === null || typeof value !== "object") {
|
|
823
|
+
return "\xB7";
|
|
824
|
+
}
|
|
825
|
+
if (Array.isArray(value)) {
|
|
826
|
+
return value.map(extractKeys);
|
|
827
|
+
}
|
|
828
|
+
const result = {};
|
|
829
|
+
for (const [k, v] of Object.entries(value)) {
|
|
830
|
+
result[k] = extractKeys(v);
|
|
831
|
+
}
|
|
832
|
+
return result;
|
|
737
833
|
}
|
|
738
834
|
function formatLogMessage(msg) {
|
|
739
835
|
if (typeof msg === "string")
|
|
@@ -1119,6 +1215,20 @@ function readOutput(testCase) {
|
|
|
1119
1215
|
}
|
|
1120
1216
|
return candidate.getOutput();
|
|
1121
1217
|
}
|
|
1218
|
+
function buildEvaluationUnits(testCases) {
|
|
1219
|
+
const units = [];
|
|
1220
|
+
for (const testCaseItem of testCases) {
|
|
1221
|
+
const rerunTotal = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
|
|
1222
|
+
for (let r = 0; r < rerunTotal; r++) {
|
|
1223
|
+
units.push({
|
|
1224
|
+
testCaseItem,
|
|
1225
|
+
rerunIndex: r + 1,
|
|
1226
|
+
rerunTotal
|
|
1227
|
+
});
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
return units;
|
|
1231
|
+
}
|
|
1122
1232
|
function nowIsoForFile() {
|
|
1123
1233
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1124
1234
|
}
|
|
@@ -1128,157 +1238,171 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
|
|
|
1128
1238
|
`${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
|
|
1129
1239
|
);
|
|
1130
1240
|
}
|
|
1131
|
-
function
|
|
1241
|
+
function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef, testCaseResultsRef) {
|
|
1242
|
+
const { testCaseItem, rerunIndex, rerunTotal } = unit;
|
|
1132
1243
|
return effect.Effect.gen(function* () {
|
|
1133
|
-
const
|
|
1134
|
-
const
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1197
|
-
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1198
|
-
testCaseError = result.message;
|
|
1199
|
-
evaluatorScores.push({
|
|
1200
|
-
evaluatorId,
|
|
1201
|
-
scores: [],
|
|
1202
|
-
passed: false,
|
|
1203
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1204
|
-
});
|
|
1205
|
-
continue;
|
|
1206
|
-
}
|
|
1207
|
-
const { scores, metrics } = normalizeResult(result);
|
|
1208
|
-
const passed2 = computeEvaluatorPassed(evaluator, result, scores);
|
|
1209
|
-
evaluatorScores.push({
|
|
1210
|
-
evaluatorId,
|
|
1211
|
-
scores,
|
|
1212
|
-
passed: passed2,
|
|
1213
|
-
metrics,
|
|
1214
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1215
|
-
});
|
|
1216
|
-
} catch (error) {
|
|
1217
|
-
if (error instanceof Error) {
|
|
1218
|
-
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1219
|
-
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1220
|
-
}
|
|
1221
|
-
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1244
|
+
const evaluatorRunId = `run-${crypto.randomUUID()}`;
|
|
1245
|
+
const started = Date.now();
|
|
1246
|
+
const startedEvaluations = yield* effect.Ref.modify(startedRef, (n) => [
|
|
1247
|
+
n + 1,
|
|
1248
|
+
n + 1
|
|
1249
|
+
]);
|
|
1250
|
+
yield* publishEvent({
|
|
1251
|
+
type: "TestCaseStarted",
|
|
1252
|
+
runId: task.runId,
|
|
1253
|
+
testCaseId: testCaseItem.id,
|
|
1254
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1255
|
+
startedTestCases: startedEvaluations,
|
|
1256
|
+
totalTestCases: totalEvaluations,
|
|
1257
|
+
rerunIndex,
|
|
1258
|
+
rerunTotal
|
|
1259
|
+
});
|
|
1260
|
+
const evaluatorScores = [];
|
|
1261
|
+
let testCaseError;
|
|
1262
|
+
const output = readOutput(testCaseItem.testCase);
|
|
1263
|
+
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1264
|
+
const evaluateFn = evaluator.getEvaluateFn();
|
|
1265
|
+
if (!evaluateFn) {
|
|
1266
|
+
continue;
|
|
1267
|
+
}
|
|
1268
|
+
const logs = [];
|
|
1269
|
+
const logDiff = (expected, actual, options) => {
|
|
1270
|
+
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1271
|
+
};
|
|
1272
|
+
const log = (message, options) => {
|
|
1273
|
+
logs.push(createLogEntry(message, options));
|
|
1274
|
+
};
|
|
1275
|
+
const createError = (message, options) => {
|
|
1276
|
+
const entry = createLogEntry(message, options);
|
|
1277
|
+
const error = message instanceof Error ? message : new Error(entry.message);
|
|
1278
|
+
error[evaluatorErrorLogEntryKey] = entry;
|
|
1279
|
+
return error;
|
|
1280
|
+
};
|
|
1281
|
+
try {
|
|
1282
|
+
const ctx = yield* effect.Effect.promise(
|
|
1283
|
+
() => Promise.resolve(evaluator.resolveContext())
|
|
1284
|
+
);
|
|
1285
|
+
const result = yield* effect.Effect.promise(
|
|
1286
|
+
() => Promise.resolve().then(
|
|
1287
|
+
() => evaluateFn({
|
|
1288
|
+
input: testCaseItem.testCase.getInput(),
|
|
1289
|
+
ctx,
|
|
1290
|
+
output,
|
|
1291
|
+
meta: {
|
|
1292
|
+
triggerId: task.triggerId,
|
|
1293
|
+
runId: evaluatorRunId,
|
|
1294
|
+
datasetId: task.datasetId
|
|
1295
|
+
},
|
|
1296
|
+
logDiff,
|
|
1297
|
+
log,
|
|
1298
|
+
createError
|
|
1299
|
+
})
|
|
1300
|
+
)
|
|
1301
|
+
);
|
|
1302
|
+
if (result instanceof Error) {
|
|
1303
|
+
const evaluatorError = result;
|
|
1304
|
+
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1305
|
+
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1306
|
+
testCaseError = result.message;
|
|
1222
1307
|
evaluatorScores.push({
|
|
1223
1308
|
evaluatorId,
|
|
1224
1309
|
scores: [],
|
|
1225
1310
|
passed: false,
|
|
1226
1311
|
logs: logs.length > 0 ? logs : void 0
|
|
1227
1312
|
});
|
|
1313
|
+
continue;
|
|
1228
1314
|
}
|
|
1315
|
+
const { scores, metrics } = normalizeResult(result);
|
|
1316
|
+
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
1317
|
+
evaluatorScores.push({
|
|
1318
|
+
evaluatorId,
|
|
1319
|
+
scores,
|
|
1320
|
+
passed,
|
|
1321
|
+
metrics,
|
|
1322
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1323
|
+
});
|
|
1324
|
+
} catch (error) {
|
|
1325
|
+
if (error instanceof Error) {
|
|
1326
|
+
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1327
|
+
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1328
|
+
}
|
|
1329
|
+
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1330
|
+
evaluatorScores.push({
|
|
1331
|
+
evaluatorId,
|
|
1332
|
+
scores: [],
|
|
1333
|
+
passed: false,
|
|
1334
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1335
|
+
});
|
|
1229
1336
|
}
|
|
1230
|
-
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1231
|
-
rerunPassed.push(rerunPassedThis);
|
|
1232
|
-
const completedEvaluations = yield* effect.Ref.modify(completedRef, (n) => [
|
|
1233
|
-
n + 1,
|
|
1234
|
-
n + 1
|
|
1235
|
-
]);
|
|
1236
|
-
const progressEvent = {
|
|
1237
|
-
type: "TestCaseProgress",
|
|
1238
|
-
runId: task.runId,
|
|
1239
|
-
testCaseId: testCaseItem.id,
|
|
1240
|
-
testCaseName: testCaseItem.testCase.getName(),
|
|
1241
|
-
completedTestCases: completedEvaluations,
|
|
1242
|
-
totalTestCases: totalEvaluations,
|
|
1243
|
-
rerunIndex: r + 1,
|
|
1244
|
-
rerunTotal: reruns,
|
|
1245
|
-
passed: rerunPassedThis,
|
|
1246
|
-
durationMs: Date.now() - started,
|
|
1247
|
-
evaluatorScores,
|
|
1248
|
-
output,
|
|
1249
|
-
errorMessage: testCaseError
|
|
1250
|
-
};
|
|
1251
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1252
|
-
...snapshot,
|
|
1253
|
-
completedTestCases: completedEvaluations
|
|
1254
|
-
}));
|
|
1255
|
-
yield* publishEvent(progressEvent);
|
|
1256
|
-
yield* effect.Queue.offer(persistenceQueue, {
|
|
1257
|
-
runId: task.runId,
|
|
1258
|
-
artifactPath: task.snapshot.artifactPath,
|
|
1259
|
-
payload: progressEvent
|
|
1260
|
-
});
|
|
1261
|
-
}
|
|
1262
|
-
const testCasePassed = rerunPassed.every(Boolean);
|
|
1263
|
-
if (testCasePassed) {
|
|
1264
|
-
yield* effect.Ref.update(passedRef, (n) => n + 1);
|
|
1265
|
-
} else {
|
|
1266
|
-
yield* effect.Ref.update(failedRef, (n) => n + 1);
|
|
1267
1337
|
}
|
|
1268
|
-
const
|
|
1269
|
-
|
|
1270
|
-
|
|
1338
|
+
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1339
|
+
const completedEvaluations = yield* effect.Ref.modify(completedRef, (n) => [
|
|
1340
|
+
n + 1,
|
|
1341
|
+
n + 1
|
|
1271
1342
|
]);
|
|
1272
|
-
|
|
1343
|
+
const progressEvent = {
|
|
1344
|
+
type: "TestCaseProgress",
|
|
1345
|
+
runId: task.runId,
|
|
1346
|
+
testCaseId: testCaseItem.id,
|
|
1347
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1348
|
+
completedTestCases: completedEvaluations,
|
|
1349
|
+
totalTestCases: totalEvaluations,
|
|
1350
|
+
rerunIndex,
|
|
1351
|
+
rerunTotal,
|
|
1352
|
+
passed: rerunPassedThis,
|
|
1353
|
+
durationMs: Date.now() - started,
|
|
1354
|
+
evaluatorScores,
|
|
1355
|
+
output,
|
|
1356
|
+
errorMessage: testCaseError
|
|
1357
|
+
};
|
|
1358
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1273
1359
|
...snapshot,
|
|
1274
|
-
|
|
1275
|
-
failedTestCases: failed
|
|
1360
|
+
completedTestCases: completedEvaluations
|
|
1276
1361
|
}));
|
|
1362
|
+
yield* publishEvent(progressEvent);
|
|
1363
|
+
yield* effect.Queue.offer(persistenceQueue, {
|
|
1364
|
+
runId: task.runId,
|
|
1365
|
+
artifactPath: task.snapshot.artifactPath,
|
|
1366
|
+
payload: progressEvent
|
|
1367
|
+
});
|
|
1368
|
+
const testCaseCompleted = yield* effect.Ref.modify(
|
|
1369
|
+
testCaseResultsRef,
|
|
1370
|
+
(map) => {
|
|
1371
|
+
const key = testCaseItem.id;
|
|
1372
|
+
const existing = map.get(key) ?? { completedCount: 0, results: [] };
|
|
1373
|
+
const newResults = [...existing.results, rerunPassedThis];
|
|
1374
|
+
const newCompletedCount = existing.completedCount + 1;
|
|
1375
|
+
const isLast = newCompletedCount === rerunTotal;
|
|
1376
|
+
const newMap = new Map(map);
|
|
1377
|
+
newMap.set(key, {
|
|
1378
|
+
completedCount: newCompletedCount,
|
|
1379
|
+
results: newResults
|
|
1380
|
+
});
|
|
1381
|
+
const outcome = isLast ? newResults.every(Boolean) : null;
|
|
1382
|
+
return [outcome, newMap];
|
|
1383
|
+
}
|
|
1384
|
+
);
|
|
1385
|
+
if (testCaseCompleted !== null) {
|
|
1386
|
+
if (testCaseCompleted) {
|
|
1387
|
+
yield* effect.Ref.update(passedRef, (n) => n + 1);
|
|
1388
|
+
} else {
|
|
1389
|
+
yield* effect.Ref.update(failedRef, (n) => n + 1);
|
|
1390
|
+
}
|
|
1391
|
+
const [passed, failed] = yield* effect.Effect.all([
|
|
1392
|
+
effect.Ref.get(passedRef),
|
|
1393
|
+
effect.Ref.get(failedRef)
|
|
1394
|
+
]);
|
|
1395
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1396
|
+
...snapshot,
|
|
1397
|
+
passedTestCases: passed,
|
|
1398
|
+
failedTestCases: failed
|
|
1399
|
+
}));
|
|
1400
|
+
}
|
|
1277
1401
|
});
|
|
1278
1402
|
}
|
|
1279
1403
|
var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => effect.Effect.gen(function* () {
|
|
1280
1404
|
const startedAt = Date.now();
|
|
1281
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1405
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1282
1406
|
...snapshot,
|
|
1283
1407
|
status: "running",
|
|
1284
1408
|
startedAt
|
|
@@ -1297,9 +1421,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
1297
1421
|
const startedRef = yield* effect.Ref.make(0);
|
|
1298
1422
|
const passedRef = yield* effect.Ref.make(0);
|
|
1299
1423
|
const failedRef = yield* effect.Ref.make(0);
|
|
1300
|
-
const
|
|
1424
|
+
const testCaseResultsRef = yield* effect.Ref.make(
|
|
1425
|
+
/* @__PURE__ */ new Map()
|
|
1426
|
+
);
|
|
1427
|
+
const evaluationUnits = buildEvaluationUnits(task.testCases);
|
|
1428
|
+
const processEvaluation = (unit) => processOneEvaluation(
|
|
1301
1429
|
task,
|
|
1302
|
-
|
|
1430
|
+
unit,
|
|
1303
1431
|
totalEvaluations,
|
|
1304
1432
|
publishEvent,
|
|
1305
1433
|
persistenceQueue,
|
|
@@ -1307,11 +1435,12 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
1307
1435
|
startedRef,
|
|
1308
1436
|
completedRef,
|
|
1309
1437
|
passedRef,
|
|
1310
|
-
failedRef
|
|
1438
|
+
failedRef,
|
|
1439
|
+
testCaseResultsRef
|
|
1311
1440
|
);
|
|
1312
1441
|
yield* effect.Effect.forEach(
|
|
1313
|
-
|
|
1314
|
-
|
|
1442
|
+
evaluationUnits,
|
|
1443
|
+
processEvaluation,
|
|
1315
1444
|
maxConcurrency > 1 ? { concurrency: maxConcurrency } : void 0
|
|
1316
1445
|
);
|
|
1317
1446
|
const [completedEvaluations, passedUniqueTestCases, failedUniqueTestCases] = yield* effect.Effect.all([
|
|
@@ -1329,7 +1458,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
1329
1458
|
totalTestCases: task.testCases.length,
|
|
1330
1459
|
artifactPath: task.snapshot.artifactPath
|
|
1331
1460
|
};
|
|
1332
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1461
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1333
1462
|
...snapshot,
|
|
1334
1463
|
status: "completed",
|
|
1335
1464
|
completedTestCases: completedEvaluations,
|
|
@@ -1582,7 +1711,9 @@ var EffectRunner = class {
|
|
|
1582
1711
|
this.persistenceQueue = effect.Effect.runSync(
|
|
1583
1712
|
effect.Queue.unbounded()
|
|
1584
1713
|
);
|
|
1585
|
-
this.
|
|
1714
|
+
this.snapshotsRef = effect.Effect.runSync(
|
|
1715
|
+
effect.Ref.make(/* @__PURE__ */ new Map())
|
|
1716
|
+
);
|
|
1586
1717
|
this.listeners = /* @__PURE__ */ new Set();
|
|
1587
1718
|
this.datasetsById = /* @__PURE__ */ new Map();
|
|
1588
1719
|
this.evaluatorsById = /* @__PURE__ */ new Map();
|
|
@@ -1685,7 +1816,13 @@ var EffectRunner = class {
|
|
|
1685
1816
|
status: "queued",
|
|
1686
1817
|
artifactPath
|
|
1687
1818
|
};
|
|
1688
|
-
|
|
1819
|
+
await effect.Effect.runPromise(
|
|
1820
|
+
effect.Ref.update(this.snapshotsRef, (map) => {
|
|
1821
|
+
const next = new Map(map);
|
|
1822
|
+
next.set(runId, snapshot);
|
|
1823
|
+
return next;
|
|
1824
|
+
})
|
|
1825
|
+
);
|
|
1689
1826
|
const queuedEvent = {
|
|
1690
1827
|
type: "RunQueued",
|
|
1691
1828
|
runId,
|
|
@@ -1726,12 +1863,12 @@ var EffectRunner = class {
|
|
|
1726
1863
|
};
|
|
1727
1864
|
}
|
|
1728
1865
|
getRunSnapshot(runId) {
|
|
1729
|
-
return this.
|
|
1866
|
+
return effect.Effect.runSync(effect.Ref.get(this.snapshotsRef)).get(runId);
|
|
1730
1867
|
}
|
|
1731
1868
|
getAllRunSnapshots() {
|
|
1732
|
-
return Array.from(
|
|
1733
|
-
(
|
|
1734
|
-
);
|
|
1869
|
+
return Array.from(
|
|
1870
|
+
effect.Effect.runSync(effect.Ref.get(this.snapshotsRef)).values()
|
|
1871
|
+
).sort((a, b) => b.queuedAt - a.queuedAt);
|
|
1735
1872
|
}
|
|
1736
1873
|
async loadRunSnapshotsFromArtifacts() {
|
|
1737
1874
|
return loadRunSnapshotsFromArtifacts(this.config);
|
|
@@ -1760,11 +1897,15 @@ var EffectRunner = class {
|
|
|
1760
1897
|
);
|
|
1761
1898
|
}
|
|
1762
1899
|
updateSnapshot(runId, updater) {
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1900
|
+
return effect.Ref.modify(this.snapshotsRef, (map) => {
|
|
1901
|
+
const existing = map.get(runId);
|
|
1902
|
+
if (!existing) {
|
|
1903
|
+
return [void 0, map];
|
|
1904
|
+
}
|
|
1905
|
+
const next = new Map(map);
|
|
1906
|
+
next.set(runId, updater(existing));
|
|
1907
|
+
return [void 0, next];
|
|
1908
|
+
}).pipe(effect.Effect.asVoid);
|
|
1768
1909
|
}
|
|
1769
1910
|
publishEvent(event) {
|
|
1770
1911
|
return effect.Effect.sync(() => {
|