@m4trix/evals 0.26.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -253,6 +253,7 @@ function makeEntityIdSchema(brand, label) {
253
253
  var RunConfigNameSchema = makeEntityIdSchema("RunConfigName", "RunConfig name");
254
254
  makeEntityIdSchema("EvaluatorName", "Evaluator name");
255
255
  makeEntityIdSchema("TestCaseName", "Test case name");
256
+ makeEntityIdSchema("DatasetName", "Dataset name");
256
257
  function validateWithSchema(schema, raw, context) {
257
258
  const trimmed = raw.trim();
258
259
  const decode = Schema.decodeUnknownEither(
@@ -268,6 +269,14 @@ function validateRunConfigName(raw, context) {
268
269
  return validateWithSchema(RunConfigNameSchema, raw, context);
269
270
  }
270
271
 
272
+ // src/evals/dataset.ts
273
+ function getDatasetDisplayLabel(dataset) {
274
+ if (typeof dataset.getDisplayLabel === "function") {
275
+ return dataset.getDisplayLabel();
276
+ }
277
+ return typeof dataset.getName === "function" ? dataset.getName() : "";
278
+ }
279
+
271
280
  // src/evals/evaluator.ts
272
281
  function getEvaluatorDisplayLabel(evaluator) {
273
282
  if (typeof evaluator.getDisplayLabel === "function") {
@@ -528,7 +537,7 @@ function toEvalDataset(item, snapshots) {
528
537
  const runs = snapshots.filter((snapshot) => snapshot.datasetId === item.id).sort((a, b) => b.queuedAt - a.queuedAt).map(toEvalRun);
529
538
  return {
530
539
  id: item.id,
531
- name: item.dataset.getName(),
540
+ name: getDatasetDisplayLabel(item.dataset),
532
541
  overview: `Discovered from ${item.filePath}`,
533
542
  runs
534
543
  };
@@ -1696,7 +1705,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1696
1705
  meta: {
1697
1706
  triggerId: task.triggerId,
1698
1707
  runId: evaluatorRunId,
1699
- datasetId: task.datasetId,
1708
+ datasetName: task.dataset.getDisplayLabel(),
1700
1709
  repetitionId,
1701
1710
  repetitionIndex,
1702
1711
  repetitionCount,
@@ -2111,7 +2120,7 @@ var EffectRunner = class {
2111
2120
  );
2112
2121
  if (!dsCollected) {
2113
2122
  throw new Error(
2114
- `RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getName()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
2123
+ `RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getDisplayLabel()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
2115
2124
  );
2116
2125
  }
2117
2126
  let evaluatorIds;
@@ -2246,7 +2255,7 @@ var EffectRunner = class {
2246
2255
  const snapshot = {
2247
2256
  runId,
2248
2257
  datasetId: params.datasetId,
2249
- datasetName: dataset.dataset.getName(),
2258
+ datasetName: dataset.dataset.getDisplayLabel(),
2250
2259
  evaluatorIds: selectedEvaluators.map((item) => item.id),
2251
2260
  queuedAt: Date.now(),
2252
2261
  totalTestCases: totalEvaluations,
@@ -2267,7 +2276,7 @@ var EffectRunner = class {
2267
2276
  type: "RunQueued",
2268
2277
  runId,
2269
2278
  datasetId: params.datasetId,
2270
- datasetName: dataset.dataset.getName(),
2279
+ datasetName: dataset.dataset.getDisplayLabel(),
2271
2280
  evaluatorIds: selectedEvaluators.map((item) => item.id),
2272
2281
  totalTestCases: totalEvaluations,
2273
2282
  artifactPath