@m4trix/evals 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -986,6 +986,8 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
986
986
  output,
987
987
  meta: {
988
988
  triggerId: task.triggerId,
989
+ triggerTimestamp: task.triggerTimestamp,
990
+ triggeredAt: new Date(task.triggerTimestamp).toISOString(),
989
991
  runId: evaluatorRunId,
990
992
  datasetName: task.dataset.getDisplayLabel(),
991
993
  testCaseId: testCaseItem.id,
@@ -1466,6 +1468,7 @@ var EffectRunner = class {
1466
1468
  const globalConcurrency = Math.max(1, request.globalConcurrency);
1467
1469
  const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
1468
1470
  const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
1471
+ const triggerTimestamp = request.triggerTimestamp ?? Date.now();
1469
1472
  const snapshots = [];
1470
1473
  for (const job of request.jobs) {
1471
1474
  snapshots.push(
@@ -1473,6 +1476,7 @@ var EffectRunner = class {
1473
1476
  datasetId: job.datasetId,
1474
1477
  evaluatorIds: job.evaluatorIds,
1475
1478
  triggerId,
1479
+ triggerTimestamp,
1476
1480
  maxConcurrency: this.config.maxConcurrency ?? 1,
1477
1481
  globalEvaluationSemaphore: sem,
1478
1482
  runConfigName: job.runConfigName,
@@ -1510,6 +1514,7 @@ var EffectRunner = class {
1510
1514
  datasetId: request.datasetId,
1511
1515
  evaluatorIds: request.evaluatorIds,
1512
1516
  triggerId: request.triggerId,
1517
+ triggerTimestamp: request.triggerTimestamp ?? Date.now(),
1513
1518
  maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
1514
1519
  repetitions: request.repetitions,
1515
1520
  runConfigName,
@@ -1537,6 +1542,7 @@ var EffectRunner = class {
1537
1542
  const totalEvaluations = selectedTestCases.length * repetitions;
1538
1543
  const runConfigTags = [...params.runConfigTags ?? []];
1539
1544
  const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
1545
+ const triggerTimestamp = params.triggerTimestamp ?? Date.now();
1540
1546
  const runId = `run-${randomUUID()}`;
1541
1547
  const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
1542
1548
  const snapshot = {
@@ -1580,6 +1586,7 @@ var EffectRunner = class {
1580
1586
  Queue.offer(this.runQueue, {
1581
1587
  runId,
1582
1588
  triggerId,
1589
+ triggerTimestamp,
1583
1590
  datasetId: params.datasetId,
1584
1591
  dataset: dataset.dataset,
1585
1592
  evaluators: selectedEvaluators,
@@ -2012,6 +2019,7 @@ function RunView({
2012
2019
  runConfigNames,
2013
2020
  concurrency,
2014
2021
  experimentName,
2022
+ triggerTimestamp,
2015
2023
  onComplete
2016
2024
  }) {
2017
2025
  const [phase, setPhase] = useState("loading");
@@ -2182,7 +2190,8 @@ function RunView({
2182
2190
  const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
2183
2191
  jobs,
2184
2192
  globalConcurrency: concurrency,
2185
- experimentName
2193
+ experimentName,
2194
+ triggerTimestamp
2186
2195
  });
2187
2196
  for (let i = 0; i < snapshots.length; i += 1) {
2188
2197
  const snap = snapshots[i];
@@ -2239,7 +2248,7 @@ function RunView({
2239
2248
  setPhase("completed");
2240
2249
  const exitCode = failedTestCases > 0 ? 1 : 0;
2241
2250
  setTimeout(() => onComplete(void 0, exitCode), 200);
2242
- }, [runner, runConfigNames, concurrency, experimentName, onComplete]);
2251
+ }, [runner, runConfigNames, concurrency, experimentName, triggerTimestamp, onComplete]);
2243
2252
  useEffect(() => {
2244
2253
  void runEval();
2245
2254
  }, [runEval]);
@@ -2727,7 +2736,7 @@ function formatEvaluatorScoreLine(name, scores, passed, metrics, options) {
2727
2736
  }
2728
2737
  return lines;
2729
2738
  }
2730
- async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName) {
2739
+ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
2731
2740
  const jobs = await runner.expandRunConfigNamesToJobs(runConfigNames);
2732
2741
  if (jobs.length === 0) {
2733
2742
  throw new Error("No jobs expanded from RunConfigs.");
@@ -2937,7 +2946,8 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
2937
2946
  const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
2938
2947
  jobs,
2939
2948
  globalConcurrency: concurrency,
2940
- experimentName
2949
+ experimentName,
2950
+ triggerTimestamp
2941
2951
  });
2942
2952
  for (let i = 0; i < snapshots.length; i += 1) {
2943
2953
  const snap = snapshots[i];
@@ -3037,7 +3047,7 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
3037
3047
  }
3038
3048
  return failedTestCasesTotal > 0 ? 1 : 0;
3039
3049
  }
3040
- async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName) {
3050
+ async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
3041
3051
  return new Promise((resolve5, reject) => {
3042
3052
  const app = render(
3043
3053
  React.createElement(RunView, {
@@ -3045,6 +3055,7 @@ async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, e
3045
3055
  runConfigNames,
3046
3056
  concurrency,
3047
3057
  experimentName,
3058
+ triggerTimestamp,
3048
3059
  onComplete: (err, exitCode) => {
3049
3060
  app.unmount();
3050
3061
  if (err) {
@@ -3102,11 +3113,13 @@ async function main() {
3102
3113
  try {
3103
3114
  if (args.command === "run") {
3104
3115
  const concurrency = args.concurrency ?? getDefaultConcurrency();
3116
+ const triggerTimestamp = Date.now();
3105
3117
  const exitCode = await (useInk ? runSimpleEvalRunConfigsInk : runSimpleEvalRunConfigsPlain)(
3106
3118
  runner,
3107
3119
  args.runConfigNames,
3108
3120
  concurrency,
3109
- args.experimentName
3121
+ args.experimentName,
3122
+ triggerTimestamp
3110
3123
  );
3111
3124
  if (args.ci && exitCode !== 0) {
3112
3125
  process.exit(1);