npm - @m4trix/evals - Versions diffs - 0.28.0 → 0.30.0 - Mend

@m4trix/evals 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -274,6 +274,11 @@ interface EvalMiddleware<TCtx> {
 interface EvaluateMeta {
     /** Identifier of the trigger that started the run (for example, a CLI invocation). */
     triggerId: string;
+    /**
+     * Milliseconds since Unix epoch when the run was triggered (e.g. `Date.now()` at CLI start, or when
+     * `runDatasetWith` / `runDatasetJobsWithSharedConcurrency` was invoked). Shared across all jobs in a batch.
+     */
+    triggerTimestamp: number;
     /**
      * Identifier of the current test-case execution shared across all evaluators
      * for this specific test-case run.
@@ -281,6 +286,10 @@ interface EvaluateMeta {
     runId: string;
     /** Display label for the dataset (`Dataset.getDisplayLabel()`, i.e. `displayName ?? name`). */
     datasetName: string;
+    /** Discovery id for the current test case (same as runner events’ `testCaseId`). */
+    testCaseId: string;
+    /** Display label for the test case (`TestCase.getDisplayLabel()`, i.e. `displayName ?? name`). */
+    testCaseName: string;
     /** Canonical `RunConfig` name (or `programmatic` for API/TUI-only runs). */
     runConfigName: string;
     /**
@@ -589,6 +598,10 @@ interface RunDatasetRequest {
      * When omitted, the runner generates one in the format `trg-[uuid]`.
      */
     triggerId?: string;
+    /**
+     * When the run was triggered (`Date.now()` ms); defaults to now. Forwarded as `meta.triggerTimestamp`.
+     */
+    triggerTimestamp?: number;
     datasetId: string;
     evaluatorIds: ReadonlyArray<string>;
     /** RunConfig name surfaced on evaluator `meta` (from the job or `PROGRAMMATIC_RUN_CONFIG`). */
@@ -693,6 +706,10 @@ interface RunDatasetJobsWithSharedConcurrencyRequest {
     jobs: ReadonlyArray<RunDatasetJob>;
     globalConcurrency: number;
     triggerId?: string;
+    /**
+     * When the batch was triggered (`Date.now()` ms); defaults to now. CLI sets this once at command start.
+     */
+    triggerTimestamp?: number;
     /** Applied to every job in this batch (e.g. CLI `--experiment`). */
     experimentName?: string;
 }

package/dist/index.js CHANGED Viewed

@@ -1566,8 +1566,11 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
               output,
               meta: {
                 triggerId: task.triggerId,
+                triggerTimestamp: task.triggerTimestamp,
                 runId: evaluatorRunId,
                 datasetName: task.dataset.getDisplayLabel(),
+                testCaseId: testCaseItem.id,
+                testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
                 repetitionId,
                 repetitionIndex,
                 repetitionCount,
@@ -2044,6 +2047,7 @@ var EffectRunner = class {
     const globalConcurrency = Math.max(1, request.globalConcurrency);
     const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
     const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
+    const triggerTimestamp = request.triggerTimestamp ?? Date.now();
     const snapshots = [];
     for (const job of request.jobs) {
       snapshots.push(
@@ -2051,6 +2055,7 @@ var EffectRunner = class {
           datasetId: job.datasetId,
           evaluatorIds: job.evaluatorIds,
           triggerId,
+          triggerTimestamp,
           maxConcurrency: this.config.maxConcurrency ?? 1,
           globalEvaluationSemaphore: sem,
           runConfigName: job.runConfigName,
@@ -2088,6 +2093,7 @@ var EffectRunner = class {
       datasetId: request.datasetId,
       evaluatorIds: request.evaluatorIds,
       triggerId: request.triggerId,
+      triggerTimestamp: request.triggerTimestamp ?? Date.now(),
       maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
       repetitions: request.repetitions,
       runConfigName,
@@ -2115,6 +2121,7 @@ var EffectRunner = class {
     const totalEvaluations = selectedTestCases.length * repetitions;
     const runConfigTags = [...params.runConfigTags ?? []];
     const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
+    const triggerTimestamp = params.triggerTimestamp ?? Date.now();
     const runId = `run-${randomUUID()}`;
     const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
     const snapshot = {
@@ -2158,6 +2165,7 @@ var EffectRunner = class {
       Queue.offer(this.runQueue, {
         runId,
         triggerId,
+        triggerTimestamp,
         datasetId: params.datasetId,
         dataset: dataset.dataset,
         evaluators: selectedEvaluators,