@m4trix/evals 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/cli-simple.cjs +17 -8
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +17 -8
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +14 -5
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +14 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +93 -69
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +30 -10
- package/dist/index.js +91 -70
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.cjs
CHANGED
|
@@ -280,6 +280,7 @@ function makeEntityIdSchema(brand, label) {
|
|
|
280
280
|
var RunConfigNameSchema = makeEntityIdSchema("RunConfigName", "RunConfig name");
|
|
281
281
|
makeEntityIdSchema("EvaluatorName", "Evaluator name");
|
|
282
282
|
makeEntityIdSchema("TestCaseName", "Test case name");
|
|
283
|
+
makeEntityIdSchema("DatasetName", "Dataset name");
|
|
283
284
|
function validateWithSchema(schema, raw, context) {
|
|
284
285
|
const trimmed = raw.trim();
|
|
285
286
|
const decode = effect.Schema.decodeUnknownEither(
|
|
@@ -295,6 +296,14 @@ function validateRunConfigName(raw, context) {
|
|
|
295
296
|
return validateWithSchema(RunConfigNameSchema, raw, context);
|
|
296
297
|
}
|
|
297
298
|
|
|
299
|
+
// src/evals/dataset.ts
|
|
300
|
+
function getDatasetDisplayLabel(dataset) {
|
|
301
|
+
if (typeof dataset.getDisplayLabel === "function") {
|
|
302
|
+
return dataset.getDisplayLabel();
|
|
303
|
+
}
|
|
304
|
+
return typeof dataset.getName === "function" ? dataset.getName() : "";
|
|
305
|
+
}
|
|
306
|
+
|
|
298
307
|
// src/evals/evaluator.ts
|
|
299
308
|
function getEvaluatorDisplayLabel(evaluator) {
|
|
300
309
|
if (typeof evaluator.getDisplayLabel === "function") {
|
|
@@ -555,7 +564,7 @@ function toEvalDataset(item, snapshots) {
|
|
|
555
564
|
const runs = snapshots.filter((snapshot) => snapshot.datasetId === item.id).sort((a, b) => b.queuedAt - a.queuedAt).map(toEvalRun);
|
|
556
565
|
return {
|
|
557
566
|
id: item.id,
|
|
558
|
-
name: item.dataset
|
|
567
|
+
name: getDatasetDisplayLabel(item.dataset),
|
|
559
568
|
overview: `Discovered from ${item.filePath}`,
|
|
560
569
|
runs
|
|
561
570
|
};
|
|
@@ -1723,7 +1732,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1723
1732
|
meta: {
|
|
1724
1733
|
triggerId: task.triggerId,
|
|
1725
1734
|
runId: evaluatorRunId,
|
|
1726
|
-
|
|
1735
|
+
datasetName: task.dataset.getDisplayLabel(),
|
|
1727
1736
|
repetitionId,
|
|
1728
1737
|
repetitionIndex,
|
|
1729
1738
|
repetitionCount,
|
|
@@ -2138,7 +2147,7 @@ var EffectRunner = class {
|
|
|
2138
2147
|
);
|
|
2139
2148
|
if (!dsCollected) {
|
|
2140
2149
|
throw new Error(
|
|
2141
|
-
`RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.
|
|
2150
|
+
`RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getDisplayLabel()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
|
|
2142
2151
|
);
|
|
2143
2152
|
}
|
|
2144
2153
|
let evaluatorIds;
|
|
@@ -2273,7 +2282,7 @@ var EffectRunner = class {
|
|
|
2273
2282
|
const snapshot = {
|
|
2274
2283
|
runId,
|
|
2275
2284
|
datasetId: params.datasetId,
|
|
2276
|
-
datasetName: dataset.dataset.
|
|
2285
|
+
datasetName: dataset.dataset.getDisplayLabel(),
|
|
2277
2286
|
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
2278
2287
|
queuedAt: Date.now(),
|
|
2279
2288
|
totalTestCases: totalEvaluations,
|
|
@@ -2294,7 +2303,7 @@ var EffectRunner = class {
|
|
|
2294
2303
|
type: "RunQueued",
|
|
2295
2304
|
runId,
|
|
2296
2305
|
datasetId: params.datasetId,
|
|
2297
|
-
datasetName: dataset.dataset.
|
|
2306
|
+
datasetName: dataset.dataset.getDisplayLabel(),
|
|
2298
2307
|
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
2299
2308
|
totalTestCases: totalEvaluations,
|
|
2300
2309
|
artifactPath
|