@m4trix/evals 0.29.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli-simple.cjs +18 -6
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +18 -6
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +8 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +8 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +6 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +13 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli-simple.js
CHANGED
|
@@ -986,6 +986,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
986
986
|
output,
|
|
987
987
|
meta: {
|
|
988
988
|
triggerId: task.triggerId,
|
|
989
|
+
triggerTimestamp: task.triggerTimestamp,
|
|
989
990
|
runId: evaluatorRunId,
|
|
990
991
|
datasetName: task.dataset.getDisplayLabel(),
|
|
991
992
|
testCaseId: testCaseItem.id,
|
|
@@ -1466,6 +1467,7 @@ var EffectRunner = class {
|
|
|
1466
1467
|
const globalConcurrency = Math.max(1, request.globalConcurrency);
|
|
1467
1468
|
const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
|
|
1468
1469
|
const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
|
|
1470
|
+
const triggerTimestamp = request.triggerTimestamp ?? Date.now();
|
|
1469
1471
|
const snapshots = [];
|
|
1470
1472
|
for (const job of request.jobs) {
|
|
1471
1473
|
snapshots.push(
|
|
@@ -1473,6 +1475,7 @@ var EffectRunner = class {
|
|
|
1473
1475
|
datasetId: job.datasetId,
|
|
1474
1476
|
evaluatorIds: job.evaluatorIds,
|
|
1475
1477
|
triggerId,
|
|
1478
|
+
triggerTimestamp,
|
|
1476
1479
|
maxConcurrency: this.config.maxConcurrency ?? 1,
|
|
1477
1480
|
globalEvaluationSemaphore: sem,
|
|
1478
1481
|
runConfigName: job.runConfigName,
|
|
@@ -1510,6 +1513,7 @@ var EffectRunner = class {
|
|
|
1510
1513
|
datasetId: request.datasetId,
|
|
1511
1514
|
evaluatorIds: request.evaluatorIds,
|
|
1512
1515
|
triggerId: request.triggerId,
|
|
1516
|
+
triggerTimestamp: request.triggerTimestamp ?? Date.now(),
|
|
1513
1517
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
1514
1518
|
repetitions: request.repetitions,
|
|
1515
1519
|
runConfigName,
|
|
@@ -1537,6 +1541,7 @@ var EffectRunner = class {
|
|
|
1537
1541
|
const totalEvaluations = selectedTestCases.length * repetitions;
|
|
1538
1542
|
const runConfigTags = [...params.runConfigTags ?? []];
|
|
1539
1543
|
const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
|
|
1544
|
+
const triggerTimestamp = params.triggerTimestamp ?? Date.now();
|
|
1540
1545
|
const runId = `run-${randomUUID()}`;
|
|
1541
1546
|
const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
|
|
1542
1547
|
const snapshot = {
|
|
@@ -1580,6 +1585,7 @@ var EffectRunner = class {
|
|
|
1580
1585
|
Queue.offer(this.runQueue, {
|
|
1581
1586
|
runId,
|
|
1582
1587
|
triggerId,
|
|
1588
|
+
triggerTimestamp,
|
|
1583
1589
|
datasetId: params.datasetId,
|
|
1584
1590
|
dataset: dataset.dataset,
|
|
1585
1591
|
evaluators: selectedEvaluators,
|
|
@@ -2012,6 +2018,7 @@ function RunView({
|
|
|
2012
2018
|
runConfigNames,
|
|
2013
2019
|
concurrency,
|
|
2014
2020
|
experimentName,
|
|
2021
|
+
triggerTimestamp,
|
|
2015
2022
|
onComplete
|
|
2016
2023
|
}) {
|
|
2017
2024
|
const [phase, setPhase] = useState("loading");
|
|
@@ -2182,7 +2189,8 @@ function RunView({
|
|
|
2182
2189
|
const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
|
|
2183
2190
|
jobs,
|
|
2184
2191
|
globalConcurrency: concurrency,
|
|
2185
|
-
experimentName
|
|
2192
|
+
experimentName,
|
|
2193
|
+
triggerTimestamp
|
|
2186
2194
|
});
|
|
2187
2195
|
for (let i = 0; i < snapshots.length; i += 1) {
|
|
2188
2196
|
const snap = snapshots[i];
|
|
@@ -2239,7 +2247,7 @@ function RunView({
|
|
|
2239
2247
|
setPhase("completed");
|
|
2240
2248
|
const exitCode = failedTestCases > 0 ? 1 : 0;
|
|
2241
2249
|
setTimeout(() => onComplete(void 0, exitCode), 200);
|
|
2242
|
-
}, [runner, runConfigNames, concurrency, experimentName, onComplete]);
|
|
2250
|
+
}, [runner, runConfigNames, concurrency, experimentName, triggerTimestamp, onComplete]);
|
|
2243
2251
|
useEffect(() => {
|
|
2244
2252
|
void runEval();
|
|
2245
2253
|
}, [runEval]);
|
|
@@ -2727,7 +2735,7 @@ function formatEvaluatorScoreLine(name, scores, passed, metrics, options) {
|
|
|
2727
2735
|
}
|
|
2728
2736
|
return lines;
|
|
2729
2737
|
}
|
|
2730
|
-
async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName) {
|
|
2738
|
+
async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
|
|
2731
2739
|
const jobs = await runner.expandRunConfigNamesToJobs(runConfigNames);
|
|
2732
2740
|
if (jobs.length === 0) {
|
|
2733
2741
|
throw new Error("No jobs expanded from RunConfigs.");
|
|
@@ -2937,7 +2945,8 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
|
|
|
2937
2945
|
const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
|
|
2938
2946
|
jobs,
|
|
2939
2947
|
globalConcurrency: concurrency,
|
|
2940
|
-
experimentName
|
|
2948
|
+
experimentName,
|
|
2949
|
+
triggerTimestamp
|
|
2941
2950
|
});
|
|
2942
2951
|
for (let i = 0; i < snapshots.length; i += 1) {
|
|
2943
2952
|
const snap = snapshots[i];
|
|
@@ -3037,7 +3046,7 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
|
|
|
3037
3046
|
}
|
|
3038
3047
|
return failedTestCasesTotal > 0 ? 1 : 0;
|
|
3039
3048
|
}
|
|
3040
|
-
async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName) {
|
|
3049
|
+
async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
|
|
3041
3050
|
return new Promise((resolve5, reject) => {
|
|
3042
3051
|
const app = render(
|
|
3043
3052
|
React.createElement(RunView, {
|
|
@@ -3045,6 +3054,7 @@ async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, e
|
|
|
3045
3054
|
runConfigNames,
|
|
3046
3055
|
concurrency,
|
|
3047
3056
|
experimentName,
|
|
3057
|
+
triggerTimestamp,
|
|
3048
3058
|
onComplete: (err, exitCode) => {
|
|
3049
3059
|
app.unmount();
|
|
3050
3060
|
if (err) {
|
|
@@ -3102,11 +3112,13 @@ async function main() {
|
|
|
3102
3112
|
try {
|
|
3103
3113
|
if (args.command === "run") {
|
|
3104
3114
|
const concurrency = args.concurrency ?? getDefaultConcurrency();
|
|
3115
|
+
const triggerTimestamp = Date.now();
|
|
3105
3116
|
const exitCode = await (useInk ? runSimpleEvalRunConfigsInk : runSimpleEvalRunConfigsPlain)(
|
|
3106
3117
|
runner,
|
|
3107
3118
|
args.runConfigNames,
|
|
3108
3119
|
concurrency,
|
|
3109
|
-
args.experimentName
|
|
3120
|
+
args.experimentName,
|
|
3121
|
+
triggerTimestamp
|
|
3110
3122
|
);
|
|
3111
3123
|
if (args.ci && exitCode !== 0) {
|
|
3112
3124
|
process.exit(1);
|