@m4trix/evals 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/cli-simple.cjs +17 -8
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +17 -8
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +14 -5
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +14 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +93 -69
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +30 -10
- package/dist/index.js +91 -70
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -253,6 +253,7 @@ function makeEntityIdSchema(brand, label) {
|
|
|
253
253
|
var RunConfigNameSchema = makeEntityIdSchema("RunConfigName", "RunConfig name");
|
|
254
254
|
makeEntityIdSchema("EvaluatorName", "Evaluator name");
|
|
255
255
|
makeEntityIdSchema("TestCaseName", "Test case name");
|
|
256
|
+
makeEntityIdSchema("DatasetName", "Dataset name");
|
|
256
257
|
function validateWithSchema(schema, raw, context) {
|
|
257
258
|
const trimmed = raw.trim();
|
|
258
259
|
const decode = Schema.decodeUnknownEither(
|
|
@@ -268,6 +269,14 @@ function validateRunConfigName(raw, context) {
|
|
|
268
269
|
return validateWithSchema(RunConfigNameSchema, raw, context);
|
|
269
270
|
}
|
|
270
271
|
|
|
272
|
+
// src/evals/dataset.ts
|
|
273
|
+
function getDatasetDisplayLabel(dataset) {
|
|
274
|
+
if (typeof dataset.getDisplayLabel === "function") {
|
|
275
|
+
return dataset.getDisplayLabel();
|
|
276
|
+
}
|
|
277
|
+
return typeof dataset.getName === "function" ? dataset.getName() : "";
|
|
278
|
+
}
|
|
279
|
+
|
|
271
280
|
// src/evals/evaluator.ts
|
|
272
281
|
function getEvaluatorDisplayLabel(evaluator) {
|
|
273
282
|
if (typeof evaluator.getDisplayLabel === "function") {
|
|
@@ -528,7 +537,7 @@ function toEvalDataset(item, snapshots) {
|
|
|
528
537
|
const runs = snapshots.filter((snapshot) => snapshot.datasetId === item.id).sort((a, b) => b.queuedAt - a.queuedAt).map(toEvalRun);
|
|
529
538
|
return {
|
|
530
539
|
id: item.id,
|
|
531
|
-
name: item.dataset
|
|
540
|
+
name: getDatasetDisplayLabel(item.dataset),
|
|
532
541
|
overview: `Discovered from ${item.filePath}`,
|
|
533
542
|
runs
|
|
534
543
|
};
|
|
@@ -1696,7 +1705,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1696
1705
|
meta: {
|
|
1697
1706
|
triggerId: task.triggerId,
|
|
1698
1707
|
runId: evaluatorRunId,
|
|
1699
|
-
|
|
1708
|
+
datasetName: task.dataset.getDisplayLabel(),
|
|
1700
1709
|
repetitionId,
|
|
1701
1710
|
repetitionIndex,
|
|
1702
1711
|
repetitionCount,
|
|
@@ -2111,7 +2120,7 @@ var EffectRunner = class {
|
|
|
2111
2120
|
);
|
|
2112
2121
|
if (!dsCollected) {
|
|
2113
2122
|
throw new Error(
|
|
2114
|
-
`RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.
|
|
2123
|
+
`RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getDisplayLabel()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
|
|
2115
2124
|
);
|
|
2116
2125
|
}
|
|
2117
2126
|
let evaluatorIds;
|
|
@@ -2246,7 +2255,7 @@ var EffectRunner = class {
|
|
|
2246
2255
|
const snapshot = {
|
|
2247
2256
|
runId,
|
|
2248
2257
|
datasetId: params.datasetId,
|
|
2249
|
-
datasetName: dataset.dataset.
|
|
2258
|
+
datasetName: dataset.dataset.getDisplayLabel(),
|
|
2250
2259
|
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
2251
2260
|
queuedAt: Date.now(),
|
|
2252
2261
|
totalTestCases: totalEvaluations,
|
|
@@ -2267,7 +2276,7 @@ var EffectRunner = class {
|
|
|
2267
2276
|
type: "RunQueued",
|
|
2268
2277
|
runId,
|
|
2269
2278
|
datasetId: params.datasetId,
|
|
2270
|
-
datasetName: dataset.dataset.
|
|
2279
|
+
datasetName: dataset.dataset.getDisplayLabel(),
|
|
2271
2280
|
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
2272
2281
|
totalTestCases: totalEvaluations,
|
|
2273
2282
|
artifactPath
|