@m4trix/evals 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +352 -184
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +350 -185
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +294 -155
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +294 -156
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +296 -155
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +1 -2
- package/dist/index.js +294 -156
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -169,10 +169,9 @@ declare class Dataset {
|
|
|
169
169
|
|
|
170
170
|
/**
|
|
171
171
|
* Options for customizing JSON diff output. Passed to logDiff, createDiffLogEntry, and printJsonDiff.
|
|
172
|
-
* @see https://www.npmjs.com/package/json-diff
|
|
173
172
|
*/
|
|
174
173
|
interface JsonDiffOptions {
|
|
175
|
-
/** Include equal sections of the document, not just deltas */
|
|
174
|
+
/** Include equal sections of the document, not just deltas (always true with current implementation) */
|
|
176
175
|
full?: boolean;
|
|
177
176
|
/** Sort primitive values in arrays before comparing */
|
|
178
177
|
sort?: boolean;
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { Effect, PubSub, Queue,
|
|
1
|
+
import { Effect, PubSub, Queue, Ref, Fiber } from 'effect';
|
|
2
2
|
export { Schema as S } from 'effect';
|
|
3
|
-
import {
|
|
3
|
+
import { diffLines } from 'diff';
|
|
4
|
+
import stringify from 'fast-json-stable-stringify';
|
|
4
5
|
import { randomUUID } from 'crypto';
|
|
5
6
|
import { existsSync } from 'fs';
|
|
6
7
|
import { resolve as resolve$1, relative, join, dirname } from 'path';
|
|
@@ -708,10 +709,102 @@ var binaryScore = Score.of({
|
|
|
708
709
|
},
|
|
709
710
|
aggregateValues: Score.aggregate.all
|
|
710
711
|
});
|
|
712
|
+
function preprocessForDiff(value, options) {
|
|
713
|
+
if (options?.sort && Array.isArray(value)) {
|
|
714
|
+
return [...value].sort((a, b) => {
|
|
715
|
+
const aStr = stringify(preprocessForDiff(a, options));
|
|
716
|
+
const bStr = stringify(preprocessForDiff(b, options));
|
|
717
|
+
return aStr.localeCompare(bStr);
|
|
718
|
+
}).map((item) => preprocessForDiff(item, options));
|
|
719
|
+
}
|
|
720
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value) && options?.excludeKeys) {
|
|
721
|
+
const keys = Array.isArray(options.excludeKeys) ? options.excludeKeys : options.excludeKeys.split(",").map((k) => k.trim());
|
|
722
|
+
const filtered = {};
|
|
723
|
+
for (const [k, v] of Object.entries(value)) {
|
|
724
|
+
if (!keys.includes(k)) {
|
|
725
|
+
filtered[k] = preprocessForDiff(v, options);
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
return filtered;
|
|
729
|
+
}
|
|
730
|
+
if (value !== null && typeof value === "object" && !Array.isArray(value)) {
|
|
731
|
+
const result = {};
|
|
732
|
+
for (const [k, v] of Object.entries(value)) {
|
|
733
|
+
result[k] = preprocessForDiff(v, options);
|
|
734
|
+
}
|
|
735
|
+
return result;
|
|
736
|
+
}
|
|
737
|
+
if (typeof value === "number" && options?.precision !== void 0) {
|
|
738
|
+
return Number(value.toFixed(options.precision));
|
|
739
|
+
}
|
|
740
|
+
return value;
|
|
741
|
+
}
|
|
742
|
+
function toPrettyJson(value) {
|
|
743
|
+
const str = stringify(value);
|
|
744
|
+
try {
|
|
745
|
+
const parsed = JSON.parse(str);
|
|
746
|
+
return JSON.stringify(parsed, null, 2);
|
|
747
|
+
} catch {
|
|
748
|
+
return str;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
function formatDiffParts(parts) {
|
|
752
|
+
const lines = [];
|
|
753
|
+
for (const part of parts) {
|
|
754
|
+
const prefix = part.added ? "+ " : part.removed ? "- " : "";
|
|
755
|
+
const partLines = part.value.split("\n");
|
|
756
|
+
for (let i = 0; i < partLines.length; i++) {
|
|
757
|
+
const line = partLines[i];
|
|
758
|
+
if (i === partLines.length - 1 && line === "")
|
|
759
|
+
continue;
|
|
760
|
+
lines.push(prefix + line);
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
return lines.join("\n");
|
|
764
|
+
}
|
|
711
765
|
function createDiffString(expected, actual, diffOptions) {
|
|
712
|
-
const
|
|
713
|
-
const
|
|
714
|
-
|
|
766
|
+
const expectedProcessed = preprocessForDiff(expected, diffOptions);
|
|
767
|
+
const actualProcessed = preprocessForDiff(actual, diffOptions);
|
|
768
|
+
if (diffOptions?.keysOnly) {
|
|
769
|
+
const expectedKeys = JSON.stringify(
|
|
770
|
+
extractKeys(expectedProcessed),
|
|
771
|
+
null,
|
|
772
|
+
2
|
|
773
|
+
);
|
|
774
|
+
const actualKeys = JSON.stringify(
|
|
775
|
+
extractKeys(actualProcessed),
|
|
776
|
+
null,
|
|
777
|
+
2
|
|
778
|
+
);
|
|
779
|
+
const parts2 = diffLines(expectedKeys, actualKeys);
|
|
780
|
+
return formatDiffParts(parts2);
|
|
781
|
+
}
|
|
782
|
+
const expectedStr = toPrettyJson(expectedProcessed);
|
|
783
|
+
const actualStr = toPrettyJson(actualProcessed);
|
|
784
|
+
if (expectedStr === actualStr) {
|
|
785
|
+
return "";
|
|
786
|
+
}
|
|
787
|
+
const parts = diffLines(expectedStr, actualStr);
|
|
788
|
+
if (diffOptions?.outputNewOnly) {
|
|
789
|
+
const filtered = parts.filter(
|
|
790
|
+
(p) => p.added === true
|
|
791
|
+
);
|
|
792
|
+
return formatDiffParts(filtered);
|
|
793
|
+
}
|
|
794
|
+
return formatDiffParts(parts);
|
|
795
|
+
}
|
|
796
|
+
function extractKeys(value) {
|
|
797
|
+
if (value === null || typeof value !== "object") {
|
|
798
|
+
return "\xB7";
|
|
799
|
+
}
|
|
800
|
+
if (Array.isArray(value)) {
|
|
801
|
+
return value.map(extractKeys);
|
|
802
|
+
}
|
|
803
|
+
const result = {};
|
|
804
|
+
for (const [k, v] of Object.entries(value)) {
|
|
805
|
+
result[k] = extractKeys(v);
|
|
806
|
+
}
|
|
807
|
+
return result;
|
|
715
808
|
}
|
|
716
809
|
function formatLogMessage(msg) {
|
|
717
810
|
if (typeof msg === "string")
|
|
@@ -1097,6 +1190,20 @@ function readOutput(testCase) {
|
|
|
1097
1190
|
}
|
|
1098
1191
|
return candidate.getOutput();
|
|
1099
1192
|
}
|
|
1193
|
+
function buildEvaluationUnits(testCases) {
|
|
1194
|
+
const units = [];
|
|
1195
|
+
for (const testCaseItem of testCases) {
|
|
1196
|
+
const rerunTotal = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
|
|
1197
|
+
for (let r = 0; r < rerunTotal; r++) {
|
|
1198
|
+
units.push({
|
|
1199
|
+
testCaseItem,
|
|
1200
|
+
rerunIndex: r + 1,
|
|
1201
|
+
rerunTotal
|
|
1202
|
+
});
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
return units;
|
|
1206
|
+
}
|
|
1100
1207
|
function nowIsoForFile() {
|
|
1101
1208
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1102
1209
|
}
|
|
@@ -1106,157 +1213,171 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
|
|
|
1106
1213
|
`${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
|
|
1107
1214
|
);
|
|
1108
1215
|
}
|
|
1109
|
-
function
|
|
1216
|
+
function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef, testCaseResultsRef) {
|
|
1217
|
+
const { testCaseItem, rerunIndex, rerunTotal } = unit;
|
|
1110
1218
|
return Effect.gen(function* () {
|
|
1111
|
-
const
|
|
1112
|
-
const
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1175
|
-
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1176
|
-
testCaseError = result.message;
|
|
1177
|
-
evaluatorScores.push({
|
|
1178
|
-
evaluatorId,
|
|
1179
|
-
scores: [],
|
|
1180
|
-
passed: false,
|
|
1181
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1182
|
-
});
|
|
1183
|
-
continue;
|
|
1184
|
-
}
|
|
1185
|
-
const { scores, metrics } = normalizeResult(result);
|
|
1186
|
-
const passed2 = computeEvaluatorPassed(evaluator, result, scores);
|
|
1187
|
-
evaluatorScores.push({
|
|
1188
|
-
evaluatorId,
|
|
1189
|
-
scores,
|
|
1190
|
-
passed: passed2,
|
|
1191
|
-
metrics,
|
|
1192
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1193
|
-
});
|
|
1194
|
-
} catch (error) {
|
|
1195
|
-
if (error instanceof Error) {
|
|
1196
|
-
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1197
|
-
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1198
|
-
}
|
|
1199
|
-
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1219
|
+
const evaluatorRunId = `run-${randomUUID()}`;
|
|
1220
|
+
const started = Date.now();
|
|
1221
|
+
const startedEvaluations = yield* Ref.modify(startedRef, (n) => [
|
|
1222
|
+
n + 1,
|
|
1223
|
+
n + 1
|
|
1224
|
+
]);
|
|
1225
|
+
yield* publishEvent({
|
|
1226
|
+
type: "TestCaseStarted",
|
|
1227
|
+
runId: task.runId,
|
|
1228
|
+
testCaseId: testCaseItem.id,
|
|
1229
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1230
|
+
startedTestCases: startedEvaluations,
|
|
1231
|
+
totalTestCases: totalEvaluations,
|
|
1232
|
+
rerunIndex,
|
|
1233
|
+
rerunTotal
|
|
1234
|
+
});
|
|
1235
|
+
const evaluatorScores = [];
|
|
1236
|
+
let testCaseError;
|
|
1237
|
+
const output = readOutput(testCaseItem.testCase);
|
|
1238
|
+
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1239
|
+
const evaluateFn = evaluator.getEvaluateFn();
|
|
1240
|
+
if (!evaluateFn) {
|
|
1241
|
+
continue;
|
|
1242
|
+
}
|
|
1243
|
+
const logs = [];
|
|
1244
|
+
const logDiff = (expected, actual, options) => {
|
|
1245
|
+
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1246
|
+
};
|
|
1247
|
+
const log = (message, options) => {
|
|
1248
|
+
logs.push(createLogEntry(message, options));
|
|
1249
|
+
};
|
|
1250
|
+
const createError = (message, options) => {
|
|
1251
|
+
const entry = createLogEntry(message, options);
|
|
1252
|
+
const error = message instanceof Error ? message : new Error(entry.message);
|
|
1253
|
+
error[evaluatorErrorLogEntryKey] = entry;
|
|
1254
|
+
return error;
|
|
1255
|
+
};
|
|
1256
|
+
try {
|
|
1257
|
+
const ctx = yield* Effect.promise(
|
|
1258
|
+
() => Promise.resolve(evaluator.resolveContext())
|
|
1259
|
+
);
|
|
1260
|
+
const result = yield* Effect.promise(
|
|
1261
|
+
() => Promise.resolve().then(
|
|
1262
|
+
() => evaluateFn({
|
|
1263
|
+
input: testCaseItem.testCase.getInput(),
|
|
1264
|
+
ctx,
|
|
1265
|
+
output,
|
|
1266
|
+
meta: {
|
|
1267
|
+
triggerId: task.triggerId,
|
|
1268
|
+
runId: evaluatorRunId,
|
|
1269
|
+
datasetId: task.datasetId
|
|
1270
|
+
},
|
|
1271
|
+
logDiff,
|
|
1272
|
+
log,
|
|
1273
|
+
createError
|
|
1274
|
+
})
|
|
1275
|
+
)
|
|
1276
|
+
);
|
|
1277
|
+
if (result instanceof Error) {
|
|
1278
|
+
const evaluatorError = result;
|
|
1279
|
+
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
1280
|
+
logs.push(taggedEntry ?? createLogEntry(result));
|
|
1281
|
+
testCaseError = result.message;
|
|
1200
1282
|
evaluatorScores.push({
|
|
1201
1283
|
evaluatorId,
|
|
1202
1284
|
scores: [],
|
|
1203
1285
|
passed: false,
|
|
1204
1286
|
logs: logs.length > 0 ? logs : void 0
|
|
1205
1287
|
});
|
|
1288
|
+
continue;
|
|
1289
|
+
}
|
|
1290
|
+
const { scores, metrics } = normalizeResult(result);
|
|
1291
|
+
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
1292
|
+
evaluatorScores.push({
|
|
1293
|
+
evaluatorId,
|
|
1294
|
+
scores,
|
|
1295
|
+
passed,
|
|
1296
|
+
metrics,
|
|
1297
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1298
|
+
});
|
|
1299
|
+
} catch (error) {
|
|
1300
|
+
if (error instanceof Error) {
|
|
1301
|
+
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
1302
|
+
logs.push(taggedEntry ?? createLogEntry(error));
|
|
1206
1303
|
}
|
|
1304
|
+
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1305
|
+
evaluatorScores.push({
|
|
1306
|
+
evaluatorId,
|
|
1307
|
+
scores: [],
|
|
1308
|
+
passed: false,
|
|
1309
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1310
|
+
});
|
|
1207
1311
|
}
|
|
1208
|
-
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1209
|
-
rerunPassed.push(rerunPassedThis);
|
|
1210
|
-
const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
|
|
1211
|
-
n + 1,
|
|
1212
|
-
n + 1
|
|
1213
|
-
]);
|
|
1214
|
-
const progressEvent = {
|
|
1215
|
-
type: "TestCaseProgress",
|
|
1216
|
-
runId: task.runId,
|
|
1217
|
-
testCaseId: testCaseItem.id,
|
|
1218
|
-
testCaseName: testCaseItem.testCase.getName(),
|
|
1219
|
-
completedTestCases: completedEvaluations,
|
|
1220
|
-
totalTestCases: totalEvaluations,
|
|
1221
|
-
rerunIndex: r + 1,
|
|
1222
|
-
rerunTotal: reruns,
|
|
1223
|
-
passed: rerunPassedThis,
|
|
1224
|
-
durationMs: Date.now() - started,
|
|
1225
|
-
evaluatorScores,
|
|
1226
|
-
output,
|
|
1227
|
-
errorMessage: testCaseError
|
|
1228
|
-
};
|
|
1229
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1230
|
-
...snapshot,
|
|
1231
|
-
completedTestCases: completedEvaluations
|
|
1232
|
-
}));
|
|
1233
|
-
yield* publishEvent(progressEvent);
|
|
1234
|
-
yield* Queue.offer(persistenceQueue, {
|
|
1235
|
-
runId: task.runId,
|
|
1236
|
-
artifactPath: task.snapshot.artifactPath,
|
|
1237
|
-
payload: progressEvent
|
|
1238
|
-
});
|
|
1239
|
-
}
|
|
1240
|
-
const testCasePassed = rerunPassed.every(Boolean);
|
|
1241
|
-
if (testCasePassed) {
|
|
1242
|
-
yield* Ref.update(passedRef, (n) => n + 1);
|
|
1243
|
-
} else {
|
|
1244
|
-
yield* Ref.update(failedRef, (n) => n + 1);
|
|
1245
1312
|
}
|
|
1246
|
-
const
|
|
1247
|
-
|
|
1248
|
-
|
|
1313
|
+
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1314
|
+
const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
|
|
1315
|
+
n + 1,
|
|
1316
|
+
n + 1
|
|
1249
1317
|
]);
|
|
1250
|
-
|
|
1318
|
+
const progressEvent = {
|
|
1319
|
+
type: "TestCaseProgress",
|
|
1320
|
+
runId: task.runId,
|
|
1321
|
+
testCaseId: testCaseItem.id,
|
|
1322
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1323
|
+
completedTestCases: completedEvaluations,
|
|
1324
|
+
totalTestCases: totalEvaluations,
|
|
1325
|
+
rerunIndex,
|
|
1326
|
+
rerunTotal,
|
|
1327
|
+
passed: rerunPassedThis,
|
|
1328
|
+
durationMs: Date.now() - started,
|
|
1329
|
+
evaluatorScores,
|
|
1330
|
+
output,
|
|
1331
|
+
errorMessage: testCaseError
|
|
1332
|
+
};
|
|
1333
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1251
1334
|
...snapshot,
|
|
1252
|
-
|
|
1253
|
-
failedTestCases: failed
|
|
1335
|
+
completedTestCases: completedEvaluations
|
|
1254
1336
|
}));
|
|
1337
|
+
yield* publishEvent(progressEvent);
|
|
1338
|
+
yield* Queue.offer(persistenceQueue, {
|
|
1339
|
+
runId: task.runId,
|
|
1340
|
+
artifactPath: task.snapshot.artifactPath,
|
|
1341
|
+
payload: progressEvent
|
|
1342
|
+
});
|
|
1343
|
+
const testCaseCompleted = yield* Ref.modify(
|
|
1344
|
+
testCaseResultsRef,
|
|
1345
|
+
(map) => {
|
|
1346
|
+
const key = testCaseItem.id;
|
|
1347
|
+
const existing = map.get(key) ?? { completedCount: 0, results: [] };
|
|
1348
|
+
const newResults = [...existing.results, rerunPassedThis];
|
|
1349
|
+
const newCompletedCount = existing.completedCount + 1;
|
|
1350
|
+
const isLast = newCompletedCount === rerunTotal;
|
|
1351
|
+
const newMap = new Map(map);
|
|
1352
|
+
newMap.set(key, {
|
|
1353
|
+
completedCount: newCompletedCount,
|
|
1354
|
+
results: newResults
|
|
1355
|
+
});
|
|
1356
|
+
const outcome = isLast ? newResults.every(Boolean) : null;
|
|
1357
|
+
return [outcome, newMap];
|
|
1358
|
+
}
|
|
1359
|
+
);
|
|
1360
|
+
if (testCaseCompleted !== null) {
|
|
1361
|
+
if (testCaseCompleted) {
|
|
1362
|
+
yield* Ref.update(passedRef, (n) => n + 1);
|
|
1363
|
+
} else {
|
|
1364
|
+
yield* Ref.update(failedRef, (n) => n + 1);
|
|
1365
|
+
}
|
|
1366
|
+
const [passed, failed] = yield* Effect.all([
|
|
1367
|
+
Ref.get(passedRef),
|
|
1368
|
+
Ref.get(failedRef)
|
|
1369
|
+
]);
|
|
1370
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1371
|
+
...snapshot,
|
|
1372
|
+
passedTestCases: passed,
|
|
1373
|
+
failedTestCases: failed
|
|
1374
|
+
}));
|
|
1375
|
+
}
|
|
1255
1376
|
});
|
|
1256
1377
|
}
|
|
1257
1378
|
var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => Effect.gen(function* () {
|
|
1258
1379
|
const startedAt = Date.now();
|
|
1259
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1380
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1260
1381
|
...snapshot,
|
|
1261
1382
|
status: "running",
|
|
1262
1383
|
startedAt
|
|
@@ -1275,9 +1396,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1275
1396
|
const startedRef = yield* Ref.make(0);
|
|
1276
1397
|
const passedRef = yield* Ref.make(0);
|
|
1277
1398
|
const failedRef = yield* Ref.make(0);
|
|
1278
|
-
const
|
|
1399
|
+
const testCaseResultsRef = yield* Ref.make(
|
|
1400
|
+
/* @__PURE__ */ new Map()
|
|
1401
|
+
);
|
|
1402
|
+
const evaluationUnits = buildEvaluationUnits(task.testCases);
|
|
1403
|
+
const processEvaluation = (unit) => processOneEvaluation(
|
|
1279
1404
|
task,
|
|
1280
|
-
|
|
1405
|
+
unit,
|
|
1281
1406
|
totalEvaluations,
|
|
1282
1407
|
publishEvent,
|
|
1283
1408
|
persistenceQueue,
|
|
@@ -1285,11 +1410,12 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1285
1410
|
startedRef,
|
|
1286
1411
|
completedRef,
|
|
1287
1412
|
passedRef,
|
|
1288
|
-
failedRef
|
|
1413
|
+
failedRef,
|
|
1414
|
+
testCaseResultsRef
|
|
1289
1415
|
);
|
|
1290
1416
|
yield* Effect.forEach(
|
|
1291
|
-
|
|
1292
|
-
|
|
1417
|
+
evaluationUnits,
|
|
1418
|
+
processEvaluation,
|
|
1293
1419
|
maxConcurrency > 1 ? { concurrency: maxConcurrency } : void 0
|
|
1294
1420
|
);
|
|
1295
1421
|
const [completedEvaluations, passedUniqueTestCases, failedUniqueTestCases] = yield* Effect.all([
|
|
@@ -1307,7 +1433,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1307
1433
|
totalTestCases: task.testCases.length,
|
|
1308
1434
|
artifactPath: task.snapshot.artifactPath
|
|
1309
1435
|
};
|
|
1310
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1436
|
+
yield* updateSnapshot(task.runId, (snapshot) => ({
|
|
1311
1437
|
...snapshot,
|
|
1312
1438
|
status: "completed",
|
|
1313
1439
|
completedTestCases: completedEvaluations,
|
|
@@ -1560,7 +1686,9 @@ var EffectRunner = class {
|
|
|
1560
1686
|
this.persistenceQueue = Effect.runSync(
|
|
1561
1687
|
Queue.unbounded()
|
|
1562
1688
|
);
|
|
1563
|
-
this.
|
|
1689
|
+
this.snapshotsRef = Effect.runSync(
|
|
1690
|
+
Ref.make(/* @__PURE__ */ new Map())
|
|
1691
|
+
);
|
|
1564
1692
|
this.listeners = /* @__PURE__ */ new Set();
|
|
1565
1693
|
this.datasetsById = /* @__PURE__ */ new Map();
|
|
1566
1694
|
this.evaluatorsById = /* @__PURE__ */ new Map();
|
|
@@ -1663,7 +1791,13 @@ var EffectRunner = class {
|
|
|
1663
1791
|
status: "queued",
|
|
1664
1792
|
artifactPath
|
|
1665
1793
|
};
|
|
1666
|
-
|
|
1794
|
+
await Effect.runPromise(
|
|
1795
|
+
Ref.update(this.snapshotsRef, (map) => {
|
|
1796
|
+
const next = new Map(map);
|
|
1797
|
+
next.set(runId, snapshot);
|
|
1798
|
+
return next;
|
|
1799
|
+
})
|
|
1800
|
+
);
|
|
1667
1801
|
const queuedEvent = {
|
|
1668
1802
|
type: "RunQueued",
|
|
1669
1803
|
runId,
|
|
@@ -1704,12 +1838,12 @@ var EffectRunner = class {
|
|
|
1704
1838
|
};
|
|
1705
1839
|
}
|
|
1706
1840
|
getRunSnapshot(runId) {
|
|
1707
|
-
return this.
|
|
1841
|
+
return Effect.runSync(Ref.get(this.snapshotsRef)).get(runId);
|
|
1708
1842
|
}
|
|
1709
1843
|
getAllRunSnapshots() {
|
|
1710
|
-
return Array.from(
|
|
1711
|
-
(
|
|
1712
|
-
);
|
|
1844
|
+
return Array.from(
|
|
1845
|
+
Effect.runSync(Ref.get(this.snapshotsRef)).values()
|
|
1846
|
+
).sort((a, b) => b.queuedAt - a.queuedAt);
|
|
1713
1847
|
}
|
|
1714
1848
|
async loadRunSnapshotsFromArtifacts() {
|
|
1715
1849
|
return loadRunSnapshotsFromArtifacts(this.config);
|
|
@@ -1738,11 +1872,15 @@ var EffectRunner = class {
|
|
|
1738
1872
|
);
|
|
1739
1873
|
}
|
|
1740
1874
|
updateSnapshot(runId, updater) {
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1875
|
+
return Ref.modify(this.snapshotsRef, (map) => {
|
|
1876
|
+
const existing = map.get(runId);
|
|
1877
|
+
if (!existing) {
|
|
1878
|
+
return [void 0, map];
|
|
1879
|
+
}
|
|
1880
|
+
const next = new Map(map);
|
|
1881
|
+
next.set(runId, updater(existing));
|
|
1882
|
+
return [void 0, next];
|
|
1883
|
+
}).pipe(Effect.asVoid);
|
|
1746
1884
|
}
|
|
1747
1885
|
publishEvent(event) {
|
|
1748
1886
|
return Effect.sync(() => {
|