@m4trix/evals 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1704,8 +1704,11 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1704
1704
  output,
1705
1705
  meta: {
1706
1706
  triggerId: task.triggerId,
1707
+ triggerTimestamp: task.triggerTimestamp,
1707
1708
  runId: evaluatorRunId,
1708
1709
  datasetName: task.dataset.getDisplayLabel(),
1710
+ testCaseId: testCaseItem.id,
1711
+ testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
1709
1712
  repetitionId,
1710
1713
  repetitionIndex,
1711
1714
  repetitionCount,
@@ -2182,6 +2185,7 @@ var EffectRunner = class {
2182
2185
  const globalConcurrency = Math.max(1, request.globalConcurrency);
2183
2186
  const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
2184
2187
  const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
2188
+ const triggerTimestamp = request.triggerTimestamp ?? Date.now();
2185
2189
  const snapshots = [];
2186
2190
  for (const job of request.jobs) {
2187
2191
  snapshots.push(
@@ -2189,6 +2193,7 @@ var EffectRunner = class {
2189
2193
  datasetId: job.datasetId,
2190
2194
  evaluatorIds: job.evaluatorIds,
2191
2195
  triggerId,
2196
+ triggerTimestamp,
2192
2197
  maxConcurrency: this.config.maxConcurrency ?? 1,
2193
2198
  globalEvaluationSemaphore: sem,
2194
2199
  runConfigName: job.runConfigName,
@@ -2226,6 +2231,7 @@ var EffectRunner = class {
2226
2231
  datasetId: request.datasetId,
2227
2232
  evaluatorIds: request.evaluatorIds,
2228
2233
  triggerId: request.triggerId,
2234
+ triggerTimestamp: request.triggerTimestamp ?? Date.now(),
2229
2235
  maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
2230
2236
  repetitions: request.repetitions,
2231
2237
  runConfigName,
@@ -2253,6 +2259,7 @@ var EffectRunner = class {
2253
2259
  const totalEvaluations = selectedTestCases.length * repetitions;
2254
2260
  const runConfigTags = [...params.runConfigTags ?? []];
2255
2261
  const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
2262
+ const triggerTimestamp = params.triggerTimestamp ?? Date.now();
2256
2263
  const runId = `run-${randomUUID()}`;
2257
2264
  const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
2258
2265
  const snapshot = {
@@ -2296,6 +2303,7 @@ var EffectRunner = class {
2296
2303
  Queue.offer(this.runQueue, {
2297
2304
  runId,
2298
2305
  triggerId,
2306
+ triggerTimestamp,
2299
2307
  datasetId: params.datasetId,
2300
2308
  dataset: dataset.dataset,
2301
2309
  evaluators: selectedEvaluators,
@@ -3074,7 +3082,8 @@ function EvalsCliApp({ data, args, runner }) {
3074
3082
  void runner.runDatasetWith({
3075
3083
  datasetId: selectedDataset.id,
3076
3084
  evaluatorIds: clampedState.selectedEvaluatorIds,
3077
- ...PROGRAMMATIC_RUN_CONFIG
3085
+ ...PROGRAMMATIC_RUN_CONFIG,
3086
+ triggerTimestamp: Date.now()
3078
3087
  }).then((snapshot) => {
3079
3088
  setRuntimeMessage(
3080
3089
  `Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`