@m4trix/evals 0.26.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ function makeEntityIdSchema(brand, label) {
29
29
  var RunConfigNameSchema = makeEntityIdSchema("RunConfigName", "RunConfig name");
30
30
  makeEntityIdSchema("EvaluatorName", "Evaluator name");
31
31
  makeEntityIdSchema("TestCaseName", "Test case name");
32
+ makeEntityIdSchema("DatasetName", "Dataset name");
32
33
  function validateWithSchema(schema, raw, context) {
33
34
  const trimmed = raw.trim();
34
35
  const decode = Schema.decodeUnknownEither(
@@ -585,6 +586,14 @@ function getTestCaseTagList(testCase) {
585
586
  return typeof testCase.getTags === "function" ? [...testCase.getTags()] : [];
586
587
  }
587
588
 
589
+ // src/evals/dataset.ts
590
+ function getDatasetDisplayLabel(dataset) {
591
+ if (typeof dataset.getDisplayLabel === "function") {
592
+ return dataset.getDisplayLabel();
593
+ }
594
+ return typeof dataset.getName === "function" ? dataset.getName() : "";
595
+ }
596
+
588
597
  // src/evals/metric.ts
589
598
  var registry = /* @__PURE__ */ new Map();
590
599
  var Metric = {
@@ -978,7 +987,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
978
987
  meta: {
979
988
  triggerId: task.triggerId,
980
989
  runId: evaluatorRunId,
981
- datasetId: task.datasetId,
990
+ datasetName: task.dataset.getDisplayLabel(),
982
991
  repetitionId,
983
992
  repetitionIndex,
984
993
  repetitionCount,
@@ -1393,7 +1402,7 @@ var EffectRunner = class {
1393
1402
  );
1394
1403
  if (!dsCollected) {
1395
1404
  throw new Error(
1396
- `RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getName()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
1405
+ `RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getDisplayLabel()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
1397
1406
  );
1398
1407
  }
1399
1408
  let evaluatorIds;
@@ -1528,7 +1537,7 @@ var EffectRunner = class {
1528
1537
  const snapshot = {
1529
1538
  runId,
1530
1539
  datasetId: params.datasetId,
1531
- datasetName: dataset.dataset.getName(),
1540
+ datasetName: dataset.dataset.getDisplayLabel(),
1532
1541
  evaluatorIds: selectedEvaluators.map((item) => item.id),
1533
1542
  queuedAt: Date.now(),
1534
1543
  totalTestCases: totalEvaluations,
@@ -1549,7 +1558,7 @@ var EffectRunner = class {
1549
1558
  type: "RunQueued",
1550
1559
  runId,
1551
1560
  datasetId: params.datasetId,
1552
- datasetName: dataset.dataset.getName(),
1561
+ datasetName: dataset.dataset.getDisplayLabel(),
1553
1562
  evaluatorIds: selectedEvaluators.map((item) => item.id),
1554
1563
  totalTestCases: totalEvaluations,
1555
1564
  artifactPath
@@ -1703,7 +1712,7 @@ function getSimpleCliUsage() {
1703
1712
  return [
1704
1713
  "Usage:",
1705
1714
  " eval-agents-simple run --run-config <name> [--run-config <name> ...] [--concurrency N] [--ci]",
1706
- " eval-agents-simple generate --dataset <datasetName>",
1715
+ " eval-agents-simple generate --dataset <datasetId>",
1707
1716
  "",
1708
1717
  "Options:",
1709
1718
  " --ci With run: exit with code 1 if any test case fails.",
@@ -1771,7 +1780,7 @@ function GenerateView({
1771
1780
  if (!cancelled) {
1772
1781
  setResult({
1773
1782
  count: payload.length,
1774
- datasetName: dataset.dataset.getName(),
1783
+ datasetName: getDatasetDisplayLabel(dataset.dataset),
1775
1784
  outputPath
1776
1785
  });
1777
1786
  setTimeout(() => onComplete(), 200);
@@ -1832,7 +1841,7 @@ async function generateDatasetJsonCommandPlain(runner, datasetName) {
1832
1841
  const outputPath = createOutputPath(absoluteDatasetPath);
1833
1842
  await writeFile(outputPath, `${JSON.stringify(payload, null, 2)}
1834
1843
  `, "utf8");
1835
- console.log(`Generated ${payload.length} test cases for dataset "${dataset.dataset.getName()}".`);
1844
+ console.log(`Generated ${payload.length} test cases for dataset "${getDatasetDisplayLabel(dataset.dataset)}".`);
1836
1845
  console.log(`Wrote ${outputPath}`);
1837
1846
  }
1838
1847
  async function generateDatasetJsonCommandInk(runner, datasetName) {
@@ -3083,7 +3092,7 @@ async function main() {
3083
3092
  }
3084
3093
  const genDataset = args.datasetName;
3085
3094
  if (!genDataset) {
3086
- console.error("Missing required --dataset <datasetName> argument.");
3095
+ console.error("Missing required --dataset <datasetId> argument.");
3087
3096
  printUsageAndExit(1);
3088
3097
  }
3089
3098
  await (useInk ? generateDatasetJsonCommandInk : generateDatasetJsonCommandPlain)(