@m4trix/evals 0.26.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -280,6 +280,7 @@ function makeEntityIdSchema(brand, label) {
280
280
  var RunConfigNameSchema = makeEntityIdSchema("RunConfigName", "RunConfig name");
281
281
  makeEntityIdSchema("EvaluatorName", "Evaluator name");
282
282
  makeEntityIdSchema("TestCaseName", "Test case name");
283
+ makeEntityIdSchema("DatasetName", "Dataset name");
283
284
  function validateWithSchema(schema, raw, context) {
284
285
  const trimmed = raw.trim();
285
286
  const decode = effect.Schema.decodeUnknownEither(
@@ -295,6 +296,14 @@ function validateRunConfigName(raw, context) {
295
296
  return validateWithSchema(RunConfigNameSchema, raw, context);
296
297
  }
297
298
 
299
+ // src/evals/dataset.ts
300
+ function getDatasetDisplayLabel(dataset) {
301
+ if (typeof dataset.getDisplayLabel === "function") {
302
+ return dataset.getDisplayLabel();
303
+ }
304
+ return typeof dataset.getName === "function" ? dataset.getName() : "";
305
+ }
306
+
298
307
  // src/evals/evaluator.ts
299
308
  function getEvaluatorDisplayLabel(evaluator) {
300
309
  if (typeof evaluator.getDisplayLabel === "function") {
@@ -555,7 +564,7 @@ function toEvalDataset(item, snapshots) {
555
564
  const runs = snapshots.filter((snapshot) => snapshot.datasetId === item.id).sort((a, b) => b.queuedAt - a.queuedAt).map(toEvalRun);
556
565
  return {
557
566
  id: item.id,
558
- name: item.dataset.getName(),
567
+ name: getDatasetDisplayLabel(item.dataset),
559
568
  overview: `Discovered from ${item.filePath}`,
560
569
  runs
561
570
  };
@@ -1723,7 +1732,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1723
1732
  meta: {
1724
1733
  triggerId: task.triggerId,
1725
1734
  runId: evaluatorRunId,
1726
- datasetId: task.datasetId,
1735
+ datasetName: task.dataset.getDisplayLabel(),
1727
1736
  repetitionId,
1728
1737
  repetitionIndex,
1729
1738
  repetitionCount,
@@ -2138,7 +2147,7 @@ var EffectRunner = class {
2138
2147
  );
2139
2148
  if (!dsCollected) {
2140
2149
  throw new Error(
2141
- `RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getName()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
2150
+ `RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getDisplayLabel()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
2142
2151
  );
2143
2152
  }
2144
2153
  let evaluatorIds;
@@ -2273,7 +2282,7 @@ var EffectRunner = class {
2273
2282
  const snapshot = {
2274
2283
  runId,
2275
2284
  datasetId: params.datasetId,
2276
- datasetName: dataset.dataset.getName(),
2285
+ datasetName: dataset.dataset.getDisplayLabel(),
2277
2286
  evaluatorIds: selectedEvaluators.map((item) => item.id),
2278
2287
  queuedAt: Date.now(),
2279
2288
  totalTestCases: totalEvaluations,
@@ -2294,7 +2303,7 @@ var EffectRunner = class {
2294
2303
  type: "RunQueued",
2295
2304
  runId,
2296
2305
  datasetId: params.datasetId,
2297
- datasetName: dataset.dataset.getName(),
2306
+ datasetName: dataset.dataset.getDisplayLabel(),
2298
2307
  evaluatorIds: selectedEvaluators.map((item) => item.id),
2299
2308
  totalTestCases: totalEvaluations,
2300
2309
  artifactPath