@m4trix/evals 0.27.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -841,7 +841,7 @@ var RunConfig = class _RunConfig {
841
841
  getDisplayLabel() {
842
842
  return this._displayName ?? this._name;
843
843
  }
844
- /** Tags from `RunConfig.define({ tags })`; surfaced as `runConfigTags` on evaluator callbacks. */
844
+ /** Tags from `RunConfig.define({ tags })`; surfaced as `meta.runConfigTags` on evaluator callbacks. */
845
845
  getTags() {
846
846
  return [...this._tags];
847
847
  }
@@ -1014,10 +1014,11 @@ var TestCase = class _TestCase {
1014
1014
  static describe(config) {
1015
1015
  const name = validateTestCaseName(config.name, "TestCase.describe");
1016
1016
  const displayName = normalizeOptionalDisplayName(config.displayName);
1017
+ const tags = config.tags !== void 0 ? [...config.tags] : [];
1017
1018
  return new _TestCase({
1018
1019
  name,
1019
1020
  displayName,
1020
- tags: config.tags,
1021
+ tags,
1021
1022
  inputSchema: config.inputSchema,
1022
1023
  input: config.input,
1023
1024
  outputSchema: config.outputSchema,
@@ -1034,7 +1035,7 @@ var TestCase = class _TestCase {
1034
1035
  return this._config.displayName ?? this._config.name;
1035
1036
  }
1036
1037
  getTags() {
1037
- return this._config.tags;
1038
+ return [...this._config.tags];
1038
1039
  }
1039
1040
  getInputSchema() {
1040
1041
  return this._config.inputSchema;
@@ -1592,14 +1593,17 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1592
1593
  triggerId: task.triggerId,
1593
1594
  runId: evaluatorRunId,
1594
1595
  datasetName: task.dataset.getDisplayLabel(),
1596
+ testCaseId: testCaseItem.id,
1597
+ testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
1595
1598
  repetitionId,
1596
1599
  repetitionIndex,
1597
1600
  repetitionCount,
1598
- runConfigName: task.runConfigName
1601
+ runConfigName: task.runConfigName,
1602
+ ...task.experimentName !== void 0 && task.experimentName !== "" ? { experimentName: task.experimentName } : {},
1603
+ testCaseTags: getTestCaseTagList(testCaseItem.testCase),
1604
+ runConfigTags: task.runConfigTags,
1605
+ evaluatorTags: getEvaluatorTagList(evaluator)
1599
1606
  },
1600
- testCaseTags: getTestCaseTagList(testCaseItem.testCase),
1601
- runConfigTags: task.runConfigTags,
1602
- evaluatorTags: getEvaluatorTagList(evaluator),
1603
1607
  logDiff,
1604
1608
  log,
1605
1609
  createError
@@ -2078,7 +2082,8 @@ var EffectRunner = class {
2078
2082
  globalEvaluationSemaphore: sem,
2079
2083
  runConfigName: job.runConfigName,
2080
2084
  runConfigTags: job.runConfigTags,
2081
- repetitions: job.repetitions
2085
+ repetitions: job.repetitions,
2086
+ experimentName: request.experimentName
2082
2087
  })
2083
2088
  );
2084
2089
  }
@@ -2113,7 +2118,8 @@ var EffectRunner = class {
2113
2118
  maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
2114
2119
  repetitions: request.repetitions,
2115
2120
  runConfigName,
2116
- runConfigTags: request.runConfigTags
2121
+ runConfigTags: request.runConfigTags,
2122
+ experimentName: request.experimentName
2117
2123
  });
2118
2124
  }
2119
2125
  async startDatasetRun(params) {
@@ -2188,7 +2194,8 @@ var EffectRunner = class {
2188
2194
  globalEvaluationSemaphore: params.globalEvaluationSemaphore,
2189
2195
  runConfigName: params.runConfigName,
2190
2196
  runConfigTags,
2191
- repetitions
2197
+ repetitions,
2198
+ experimentName: params.experimentName
2192
2199
  })
2193
2200
  );
2194
2201
  return snapshot;