@m4trix/evals 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -274,6 +274,11 @@ interface EvalMiddleware<TCtx> {
274
274
  interface EvaluateMeta {
275
275
  /** Identifier of the trigger that started the run (for example, a CLI invocation). */
276
276
  triggerId: string;
277
+ /**
278
+ * Milliseconds since Unix epoch when the run was triggered (e.g. `Date.now()` at CLI start, or when
279
+ * `runDatasetWith` / `runDatasetJobsWithSharedConcurrency` was invoked). Shared across all jobs in a batch.
280
+ */
281
+ triggerTimestamp: number;
277
282
  /**
278
283
  * Identifier of the current test-case execution shared across all evaluators
279
284
  * for this specific test-case run.
@@ -281,6 +286,10 @@ interface EvaluateMeta {
281
286
  runId: string;
282
287
  /** Display label for the dataset (`Dataset.getDisplayLabel()`, i.e. `displayName ?? name`). */
283
288
  datasetName: string;
289
+ /** Discovery id for the current test case (same as runner events’ `testCaseId`). */
290
+ testCaseId: string;
291
+ /** Display label for the test case (`TestCase.getDisplayLabel()`, i.e. `displayName ?? name`). */
292
+ testCaseName: string;
284
293
  /** Canonical `RunConfig` name (or `programmatic` for API/TUI-only runs). */
285
294
  runConfigName: string;
286
295
  /**
@@ -589,6 +598,10 @@ interface RunDatasetRequest {
589
598
  * When omitted, the runner generates one in the format `trg-[uuid]`.
590
599
  */
591
600
  triggerId?: string;
601
+ /**
602
+ * When the run was triggered (`Date.now()` ms); defaults to now. Forwarded as `meta.triggerTimestamp`.
603
+ */
604
+ triggerTimestamp?: number;
592
605
  datasetId: string;
593
606
  evaluatorIds: ReadonlyArray<string>;
594
607
  /** RunConfig name surfaced on evaluator `meta` (from the job or `PROGRAMMATIC_RUN_CONFIG`). */
@@ -693,6 +706,10 @@ interface RunDatasetJobsWithSharedConcurrencyRequest {
693
706
  jobs: ReadonlyArray<RunDatasetJob>;
694
707
  globalConcurrency: number;
695
708
  triggerId?: string;
709
+ /**
710
+ * When the batch was triggered (`Date.now()` ms); defaults to now. CLI sets this once at command start.
711
+ */
712
+ triggerTimestamp?: number;
696
713
  /** Applied to every job in this batch (e.g. CLI `--experiment`). */
697
714
  experimentName?: string;
698
715
  }
package/dist/index.js CHANGED
@@ -1566,8 +1566,11 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1566
1566
  output,
1567
1567
  meta: {
1568
1568
  triggerId: task.triggerId,
1569
+ triggerTimestamp: task.triggerTimestamp,
1569
1570
  runId: evaluatorRunId,
1570
1571
  datasetName: task.dataset.getDisplayLabel(),
1572
+ testCaseId: testCaseItem.id,
1573
+ testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
1571
1574
  repetitionId,
1572
1575
  repetitionIndex,
1573
1576
  repetitionCount,
@@ -2044,6 +2047,7 @@ var EffectRunner = class {
2044
2047
  const globalConcurrency = Math.max(1, request.globalConcurrency);
2045
2048
  const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
2046
2049
  const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
2050
+ const triggerTimestamp = request.triggerTimestamp ?? Date.now();
2047
2051
  const snapshots = [];
2048
2052
  for (const job of request.jobs) {
2049
2053
  snapshots.push(
@@ -2051,6 +2055,7 @@ var EffectRunner = class {
2051
2055
  datasetId: job.datasetId,
2052
2056
  evaluatorIds: job.evaluatorIds,
2053
2057
  triggerId,
2058
+ triggerTimestamp,
2054
2059
  maxConcurrency: this.config.maxConcurrency ?? 1,
2055
2060
  globalEvaluationSemaphore: sem,
2056
2061
  runConfigName: job.runConfigName,
@@ -2088,6 +2093,7 @@ var EffectRunner = class {
2088
2093
  datasetId: request.datasetId,
2089
2094
  evaluatorIds: request.evaluatorIds,
2090
2095
  triggerId: request.triggerId,
2096
+ triggerTimestamp: request.triggerTimestamp ?? Date.now(),
2091
2097
  maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
2092
2098
  repetitions: request.repetitions,
2093
2099
  runConfigName,
@@ -2115,6 +2121,7 @@ var EffectRunner = class {
2115
2121
  const totalEvaluations = selectedTestCases.length * repetitions;
2116
2122
  const runConfigTags = [...params.runConfigTags ?? []];
2117
2123
  const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
2124
+ const triggerTimestamp = params.triggerTimestamp ?? Date.now();
2118
2125
  const runId = `run-${randomUUID()}`;
2119
2126
  const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
2120
2127
  const snapshot = {
@@ -2158,6 +2165,7 @@ var EffectRunner = class {
2158
2165
  Queue.offer(this.runQueue, {
2159
2166
  runId,
2160
2167
  triggerId,
2168
+ triggerTimestamp,
2161
2169
  datasetId: params.datasetId,
2162
2170
  dataset: dataset.dataset,
2163
2171
  evaluators: selectedEvaluators,