@m4trix/evals 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -1731,8 +1731,11 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1731
1731
  output,
1732
1732
  meta: {
1733
1733
  triggerId: task.triggerId,
1734
+ triggerTimestamp: task.triggerTimestamp,
1734
1735
  runId: evaluatorRunId,
1735
1736
  datasetName: task.dataset.getDisplayLabel(),
1737
+ testCaseId: testCaseItem.id,
1738
+ testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
1736
1739
  repetitionId,
1737
1740
  repetitionIndex,
1738
1741
  repetitionCount,
@@ -2209,6 +2212,7 @@ var EffectRunner = class {
2209
2212
  const globalConcurrency = Math.max(1, request.globalConcurrency);
2210
2213
  const sem = effect.Effect.unsafeMakeSemaphore(globalConcurrency);
2211
2214
  const triggerId = request.triggerId ?? `trg-${crypto.randomUUID()}`;
2215
+ const triggerTimestamp = request.triggerTimestamp ?? Date.now();
2212
2216
  const snapshots = [];
2213
2217
  for (const job of request.jobs) {
2214
2218
  snapshots.push(
@@ -2216,6 +2220,7 @@ var EffectRunner = class {
2216
2220
  datasetId: job.datasetId,
2217
2221
  evaluatorIds: job.evaluatorIds,
2218
2222
  triggerId,
2223
+ triggerTimestamp,
2219
2224
  maxConcurrency: this.config.maxConcurrency ?? 1,
2220
2225
  globalEvaluationSemaphore: sem,
2221
2226
  runConfigName: job.runConfigName,
@@ -2253,6 +2258,7 @@ var EffectRunner = class {
2253
2258
  datasetId: request.datasetId,
2254
2259
  evaluatorIds: request.evaluatorIds,
2255
2260
  triggerId: request.triggerId,
2261
+ triggerTimestamp: request.triggerTimestamp ?? Date.now(),
2256
2262
  maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
2257
2263
  repetitions: request.repetitions,
2258
2264
  runConfigName,
@@ -2280,6 +2286,7 @@ var EffectRunner = class {
2280
2286
  const totalEvaluations = selectedTestCases.length * repetitions;
2281
2287
  const runConfigTags = [...params.runConfigTags ?? []];
2282
2288
  const triggerId = params.triggerId ?? `trg-${crypto.randomUUID()}`;
2289
+ const triggerTimestamp = params.triggerTimestamp ?? Date.now();
2283
2290
  const runId = `run-${crypto.randomUUID()}`;
2284
2291
  const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
2285
2292
  const snapshot = {
@@ -2323,6 +2330,7 @@ var EffectRunner = class {
2323
2330
  effect.Queue.offer(this.runQueue, {
2324
2331
  runId,
2325
2332
  triggerId,
2333
+ triggerTimestamp,
2326
2334
  datasetId: params.datasetId,
2327
2335
  dataset: dataset.dataset,
2328
2336
  evaluators: selectedEvaluators,
@@ -3101,7 +3109,8 @@ function EvalsCliApp({ data, args, runner }) {
3101
3109
  void runner.runDatasetWith({
3102
3110
  datasetId: selectedDataset.id,
3103
3111
  evaluatorIds: clampedState.selectedEvaluatorIds,
3104
- ...PROGRAMMATIC_RUN_CONFIG
3112
+ ...PROGRAMMATIC_RUN_CONFIG,
3113
+ triggerTimestamp: Date.now()
3105
3114
  }).then((snapshot) => {
3106
3115
  setRuntimeMessage(
3107
3116
  `Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`