@m4trix/evals 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -274,6 +274,11 @@ interface EvalMiddleware<TCtx> {
274
274
  interface EvaluateMeta {
275
275
  /** Identifier of the trigger that started the run (for example, a CLI invocation). */
276
276
  triggerId: string;
277
+ /**
278
+ * Milliseconds since Unix epoch when the run was triggered (e.g. `Date.now()` at CLI start, or when
279
+ * `runDatasetWith` / `runDatasetJobsWithSharedConcurrency` was invoked). Shared across all jobs in a batch.
280
+ */
281
+ triggerTimestamp: number;
277
282
  /**
278
283
  * Identifier of the current test-case execution shared across all evaluators
279
284
  * for this specific test-case run.
@@ -593,6 +598,10 @@ interface RunDatasetRequest {
593
598
  * When omitted, the runner generates one in the format `trg-[uuid]`.
594
599
  */
595
600
  triggerId?: string;
601
+ /**
602
+ * When the run was triggered (`Date.now()` ms); defaults to now. Forwarded as `meta.triggerTimestamp`.
603
+ */
604
+ triggerTimestamp?: number;
596
605
  datasetId: string;
597
606
  evaluatorIds: ReadonlyArray<string>;
598
607
  /** RunConfig name surfaced on evaluator `meta` (from the job or `PROGRAMMATIC_RUN_CONFIG`). */
@@ -697,6 +706,10 @@ interface RunDatasetJobsWithSharedConcurrencyRequest {
697
706
  jobs: ReadonlyArray<RunDatasetJob>;
698
707
  globalConcurrency: number;
699
708
  triggerId?: string;
709
+ /**
710
+ * When the batch was triggered (`Date.now()` ms); defaults to now. CLI sets this once at command start.
711
+ */
712
+ triggerTimestamp?: number;
700
713
  /** Applied to every job in this batch (e.g. CLI `--experiment`). */
701
714
  experimentName?: string;
702
715
  }
package/dist/index.js CHANGED
@@ -1566,6 +1566,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1566
1566
  output,
1567
1567
  meta: {
1568
1568
  triggerId: task.triggerId,
1569
+ triggerTimestamp: task.triggerTimestamp,
1569
1570
  runId: evaluatorRunId,
1570
1571
  datasetName: task.dataset.getDisplayLabel(),
1571
1572
  testCaseId: testCaseItem.id,
@@ -2046,6 +2047,7 @@ var EffectRunner = class {
2046
2047
  const globalConcurrency = Math.max(1, request.globalConcurrency);
2047
2048
  const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
2048
2049
  const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
2050
+ const triggerTimestamp = request.triggerTimestamp ?? Date.now();
2049
2051
  const snapshots = [];
2050
2052
  for (const job of request.jobs) {
2051
2053
  snapshots.push(
@@ -2053,6 +2055,7 @@ var EffectRunner = class {
2053
2055
  datasetId: job.datasetId,
2054
2056
  evaluatorIds: job.evaluatorIds,
2055
2057
  triggerId,
2058
+ triggerTimestamp,
2056
2059
  maxConcurrency: this.config.maxConcurrency ?? 1,
2057
2060
  globalEvaluationSemaphore: sem,
2058
2061
  runConfigName: job.runConfigName,
@@ -2090,6 +2093,7 @@ var EffectRunner = class {
2090
2093
  datasetId: request.datasetId,
2091
2094
  evaluatorIds: request.evaluatorIds,
2092
2095
  triggerId: request.triggerId,
2096
+ triggerTimestamp: request.triggerTimestamp ?? Date.now(),
2093
2097
  maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
2094
2098
  repetitions: request.repetitions,
2095
2099
  runConfigName,
@@ -2117,6 +2121,7 @@ var EffectRunner = class {
2117
2121
  const totalEvaluations = selectedTestCases.length * repetitions;
2118
2122
  const runConfigTags = [...params.runConfigTags ?? []];
2119
2123
  const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
2124
+ const triggerTimestamp = params.triggerTimestamp ?? Date.now();
2120
2125
  const runId = `run-${randomUUID()}`;
2121
2126
  const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
2122
2127
  const snapshot = {
@@ -2160,6 +2165,7 @@ var EffectRunner = class {
2160
2165
  Queue.offer(this.runQueue, {
2161
2166
  runId,
2162
2167
  triggerId,
2168
+ triggerTimestamp,
2163
2169
  datasetId: params.datasetId,
2164
2170
  dataset: dataset.dataset,
2165
2171
  evaluators: selectedEvaluators,