@m4trix/evals 0.29.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli-simple.cjs +19 -6
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +19 -6
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +9 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +9 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +7 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +15 -0
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli-simple.js
CHANGED
|
@@ -986,6 +986,8 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
986
986
|
output,
|
|
987
987
|
meta: {
|
|
988
988
|
triggerId: task.triggerId,
|
|
989
|
+
triggerTimestamp: task.triggerTimestamp,
|
|
990
|
+
triggeredAt: new Date(task.triggerTimestamp).toISOString(),
|
|
989
991
|
runId: evaluatorRunId,
|
|
990
992
|
datasetName: task.dataset.getDisplayLabel(),
|
|
991
993
|
testCaseId: testCaseItem.id,
|
|
@@ -1466,6 +1468,7 @@ var EffectRunner = class {
|
|
|
1466
1468
|
const globalConcurrency = Math.max(1, request.globalConcurrency);
|
|
1467
1469
|
const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
|
|
1468
1470
|
const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
|
|
1471
|
+
const triggerTimestamp = request.triggerTimestamp ?? Date.now();
|
|
1469
1472
|
const snapshots = [];
|
|
1470
1473
|
for (const job of request.jobs) {
|
|
1471
1474
|
snapshots.push(
|
|
@@ -1473,6 +1476,7 @@ var EffectRunner = class {
|
|
|
1473
1476
|
datasetId: job.datasetId,
|
|
1474
1477
|
evaluatorIds: job.evaluatorIds,
|
|
1475
1478
|
triggerId,
|
|
1479
|
+
triggerTimestamp,
|
|
1476
1480
|
maxConcurrency: this.config.maxConcurrency ?? 1,
|
|
1477
1481
|
globalEvaluationSemaphore: sem,
|
|
1478
1482
|
runConfigName: job.runConfigName,
|
|
@@ -1510,6 +1514,7 @@ var EffectRunner = class {
|
|
|
1510
1514
|
datasetId: request.datasetId,
|
|
1511
1515
|
evaluatorIds: request.evaluatorIds,
|
|
1512
1516
|
triggerId: request.triggerId,
|
|
1517
|
+
triggerTimestamp: request.triggerTimestamp ?? Date.now(),
|
|
1513
1518
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
1514
1519
|
repetitions: request.repetitions,
|
|
1515
1520
|
runConfigName,
|
|
@@ -1537,6 +1542,7 @@ var EffectRunner = class {
|
|
|
1537
1542
|
const totalEvaluations = selectedTestCases.length * repetitions;
|
|
1538
1543
|
const runConfigTags = [...params.runConfigTags ?? []];
|
|
1539
1544
|
const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
|
|
1545
|
+
const triggerTimestamp = params.triggerTimestamp ?? Date.now();
|
|
1540
1546
|
const runId = `run-${randomUUID()}`;
|
|
1541
1547
|
const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
|
|
1542
1548
|
const snapshot = {
|
|
@@ -1580,6 +1586,7 @@ var EffectRunner = class {
|
|
|
1580
1586
|
Queue.offer(this.runQueue, {
|
|
1581
1587
|
runId,
|
|
1582
1588
|
triggerId,
|
|
1589
|
+
triggerTimestamp,
|
|
1583
1590
|
datasetId: params.datasetId,
|
|
1584
1591
|
dataset: dataset.dataset,
|
|
1585
1592
|
evaluators: selectedEvaluators,
|
|
@@ -2012,6 +2019,7 @@ function RunView({
|
|
|
2012
2019
|
runConfigNames,
|
|
2013
2020
|
concurrency,
|
|
2014
2021
|
experimentName,
|
|
2022
|
+
triggerTimestamp,
|
|
2015
2023
|
onComplete
|
|
2016
2024
|
}) {
|
|
2017
2025
|
const [phase, setPhase] = useState("loading");
|
|
@@ -2182,7 +2190,8 @@ function RunView({
|
|
|
2182
2190
|
const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
|
|
2183
2191
|
jobs,
|
|
2184
2192
|
globalConcurrency: concurrency,
|
|
2185
|
-
experimentName
|
|
2193
|
+
experimentName,
|
|
2194
|
+
triggerTimestamp
|
|
2186
2195
|
});
|
|
2187
2196
|
for (let i = 0; i < snapshots.length; i += 1) {
|
|
2188
2197
|
const snap = snapshots[i];
|
|
@@ -2239,7 +2248,7 @@ function RunView({
|
|
|
2239
2248
|
setPhase("completed");
|
|
2240
2249
|
const exitCode = failedTestCases > 0 ? 1 : 0;
|
|
2241
2250
|
setTimeout(() => onComplete(void 0, exitCode), 200);
|
|
2242
|
-
}, [runner, runConfigNames, concurrency, experimentName, onComplete]);
|
|
2251
|
+
}, [runner, runConfigNames, concurrency, experimentName, triggerTimestamp, onComplete]);
|
|
2243
2252
|
useEffect(() => {
|
|
2244
2253
|
void runEval();
|
|
2245
2254
|
}, [runEval]);
|
|
@@ -2727,7 +2736,7 @@ function formatEvaluatorScoreLine(name, scores, passed, metrics, options) {
|
|
|
2727
2736
|
}
|
|
2728
2737
|
return lines;
|
|
2729
2738
|
}
|
|
2730
|
-
async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName) {
|
|
2739
|
+
async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
|
|
2731
2740
|
const jobs = await runner.expandRunConfigNamesToJobs(runConfigNames);
|
|
2732
2741
|
if (jobs.length === 0) {
|
|
2733
2742
|
throw new Error("No jobs expanded from RunConfigs.");
|
|
@@ -2937,7 +2946,8 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
|
|
|
2937
2946
|
const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
|
|
2938
2947
|
jobs,
|
|
2939
2948
|
globalConcurrency: concurrency,
|
|
2940
|
-
experimentName
|
|
2949
|
+
experimentName,
|
|
2950
|
+
triggerTimestamp
|
|
2941
2951
|
});
|
|
2942
2952
|
for (let i = 0; i < snapshots.length; i += 1) {
|
|
2943
2953
|
const snap = snapshots[i];
|
|
@@ -3037,7 +3047,7 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
|
|
|
3037
3047
|
}
|
|
3038
3048
|
return failedTestCasesTotal > 0 ? 1 : 0;
|
|
3039
3049
|
}
|
|
3040
|
-
async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName) {
|
|
3050
|
+
async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
|
|
3041
3051
|
return new Promise((resolve5, reject) => {
|
|
3042
3052
|
const app = render(
|
|
3043
3053
|
React.createElement(RunView, {
|
|
@@ -3045,6 +3055,7 @@ async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, e
|
|
|
3045
3055
|
runConfigNames,
|
|
3046
3056
|
concurrency,
|
|
3047
3057
|
experimentName,
|
|
3058
|
+
triggerTimestamp,
|
|
3048
3059
|
onComplete: (err, exitCode) => {
|
|
3049
3060
|
app.unmount();
|
|
3050
3061
|
if (err) {
|
|
@@ -3102,11 +3113,13 @@ async function main() {
|
|
|
3102
3113
|
try {
|
|
3103
3114
|
if (args.command === "run") {
|
|
3104
3115
|
const concurrency = args.concurrency ?? getDefaultConcurrency();
|
|
3116
|
+
const triggerTimestamp = Date.now();
|
|
3105
3117
|
const exitCode = await (useInk ? runSimpleEvalRunConfigsInk : runSimpleEvalRunConfigsPlain)(
|
|
3106
3118
|
runner,
|
|
3107
3119
|
args.runConfigNames,
|
|
3108
3120
|
concurrency,
|
|
3109
|
-
args.experimentName
|
|
3121
|
+
args.experimentName,
|
|
3122
|
+
triggerTimestamp
|
|
3110
3123
|
);
|
|
3111
3124
|
if (args.ci && exitCode !== 0) {
|
|
3112
3125
|
process.exit(1);
|