@m4trix/evals 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -986,6 +986,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
986
986
  output,
987
987
  meta: {
988
988
  triggerId: task.triggerId,
989
+ triggerTimestamp: task.triggerTimestamp,
989
990
  runId: evaluatorRunId,
990
991
  datasetName: task.dataset.getDisplayLabel(),
991
992
  testCaseId: testCaseItem.id,
@@ -1466,6 +1467,7 @@ var EffectRunner = class {
1466
1467
  const globalConcurrency = Math.max(1, request.globalConcurrency);
1467
1468
  const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
1468
1469
  const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
1470
+ const triggerTimestamp = request.triggerTimestamp ?? Date.now();
1469
1471
  const snapshots = [];
1470
1472
  for (const job of request.jobs) {
1471
1473
  snapshots.push(
@@ -1473,6 +1475,7 @@ var EffectRunner = class {
1473
1475
  datasetId: job.datasetId,
1474
1476
  evaluatorIds: job.evaluatorIds,
1475
1477
  triggerId,
1478
+ triggerTimestamp,
1476
1479
  maxConcurrency: this.config.maxConcurrency ?? 1,
1477
1480
  globalEvaluationSemaphore: sem,
1478
1481
  runConfigName: job.runConfigName,
@@ -1510,6 +1513,7 @@ var EffectRunner = class {
1510
1513
  datasetId: request.datasetId,
1511
1514
  evaluatorIds: request.evaluatorIds,
1512
1515
  triggerId: request.triggerId,
1516
+ triggerTimestamp: request.triggerTimestamp ?? Date.now(),
1513
1517
  maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
1514
1518
  repetitions: request.repetitions,
1515
1519
  runConfigName,
@@ -1537,6 +1541,7 @@ var EffectRunner = class {
1537
1541
  const totalEvaluations = selectedTestCases.length * repetitions;
1538
1542
  const runConfigTags = [...params.runConfigTags ?? []];
1539
1543
  const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
1544
+ const triggerTimestamp = params.triggerTimestamp ?? Date.now();
1540
1545
  const runId = `run-${randomUUID()}`;
1541
1546
  const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
1542
1547
  const snapshot = {
@@ -1580,6 +1585,7 @@ var EffectRunner = class {
1580
1585
  Queue.offer(this.runQueue, {
1581
1586
  runId,
1582
1587
  triggerId,
1588
+ triggerTimestamp,
1583
1589
  datasetId: params.datasetId,
1584
1590
  dataset: dataset.dataset,
1585
1591
  evaluators: selectedEvaluators,
@@ -2012,6 +2018,7 @@ function RunView({
2012
2018
  runConfigNames,
2013
2019
  concurrency,
2014
2020
  experimentName,
2021
+ triggerTimestamp,
2015
2022
  onComplete
2016
2023
  }) {
2017
2024
  const [phase, setPhase] = useState("loading");
@@ -2182,7 +2189,8 @@ function RunView({
2182
2189
  const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
2183
2190
  jobs,
2184
2191
  globalConcurrency: concurrency,
2185
- experimentName
2192
+ experimentName,
2193
+ triggerTimestamp
2186
2194
  });
2187
2195
  for (let i = 0; i < snapshots.length; i += 1) {
2188
2196
  const snap = snapshots[i];
@@ -2239,7 +2247,7 @@ function RunView({
2239
2247
  setPhase("completed");
2240
2248
  const exitCode = failedTestCases > 0 ? 1 : 0;
2241
2249
  setTimeout(() => onComplete(void 0, exitCode), 200);
2242
- }, [runner, runConfigNames, concurrency, experimentName, onComplete]);
2250
+ }, [runner, runConfigNames, concurrency, experimentName, triggerTimestamp, onComplete]);
2243
2251
  useEffect(() => {
2244
2252
  void runEval();
2245
2253
  }, [runEval]);
@@ -2727,7 +2735,7 @@ function formatEvaluatorScoreLine(name, scores, passed, metrics, options) {
2727
2735
  }
2728
2736
  return lines;
2729
2737
  }
2730
- async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName) {
2738
+ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
2731
2739
  const jobs = await runner.expandRunConfigNamesToJobs(runConfigNames);
2732
2740
  if (jobs.length === 0) {
2733
2741
  throw new Error("No jobs expanded from RunConfigs.");
@@ -2937,7 +2945,8 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
2937
2945
  const snapshots = await runner.runDatasetJobsWithSharedConcurrency({
2938
2946
  jobs,
2939
2947
  globalConcurrency: concurrency,
2940
- experimentName
2948
+ experimentName,
2949
+ triggerTimestamp
2941
2950
  });
2942
2951
  for (let i = 0; i < snapshots.length; i += 1) {
2943
2952
  const snap = snapshots[i];
@@ -3037,7 +3046,7 @@ async function runSimpleEvalRunConfigsPlain(runner, runConfigNames, concurrency,
3037
3046
  }
3038
3047
  return failedTestCasesTotal > 0 ? 1 : 0;
3039
3048
  }
3040
- async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName) {
3049
+ async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, experimentName, triggerTimestamp) {
3041
3050
  return new Promise((resolve5, reject) => {
3042
3051
  const app = render(
3043
3052
  React.createElement(RunView, {
@@ -3045,6 +3054,7 @@ async function runSimpleEvalRunConfigsInk(runner, runConfigNames, concurrency, e
3045
3054
  runConfigNames,
3046
3055
  concurrency,
3047
3056
  experimentName,
3057
+ triggerTimestamp,
3048
3058
  onComplete: (err, exitCode) => {
3049
3059
  app.unmount();
3050
3060
  if (err) {
@@ -3102,11 +3112,13 @@ async function main() {
3102
3112
  try {
3103
3113
  if (args.command === "run") {
3104
3114
  const concurrency = args.concurrency ?? getDefaultConcurrency();
3115
+ const triggerTimestamp = Date.now();
3105
3116
  const exitCode = await (useInk ? runSimpleEvalRunConfigsInk : runSimpleEvalRunConfigsPlain)(
3106
3117
  runner,
3107
3118
  args.runConfigNames,
3108
3119
  concurrency,
3109
- args.experimentName
3120
+ args.experimentName,
3121
+ triggerTimestamp
3110
3122
  );
3111
3123
  if (args.ci && exitCode !== 0) {
3112
3124
  process.exit(1);