@m4trix/evals 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/cli-simple.cjs +17 -8
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +17 -8
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +14 -5
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +14 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +93 -69
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +30 -10
- package/dist/index.js +91 -70
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -123,19 +123,14 @@ declare function defineConfig<TConfig extends ConfigType>(factory: M4trixEvalCon
|
|
|
123
123
|
declare const defaultRunnerConfig: RunnerConfig;
|
|
124
124
|
declare function withRunnerConfig(overrides?: RunnerConfigOverrides): RunnerConfig;
|
|
125
125
|
|
|
126
|
-
/** Matches a tag by exact string equality or regex test */
|
|
127
|
-
type TagMatcher = string | RegExp;
|
|
128
|
-
/** Matches a file path by glob string or regex test */
|
|
129
|
-
type PathMatcher = string | RegExp;
|
|
130
|
-
|
|
131
126
|
type InputOrBuilder<T> = T | (() => T);
|
|
132
127
|
interface TestCaseDescribeConfig<TI extends Schema.Schema.Any, TO extends Schema.Schema.Any = Schema.Schema<unknown>> {
|
|
133
128
|
/**
|
|
134
|
-
* Stable id (letters, digits, `_`, `-`).
|
|
129
|
+
* Stable id (letters, digits, `_`, `-`); used in discovery and matching.
|
|
135
130
|
* For an unrestricted UI label, set {@link displayName}.
|
|
136
131
|
*/
|
|
137
132
|
name: string;
|
|
138
|
-
/** Optional human-readable label for CLI/TUI (any characters). */
|
|
133
|
+
/** Optional human-readable label for CLI/TUI and evaluator args (any characters). */
|
|
139
134
|
displayName?: string;
|
|
140
135
|
tags: string[];
|
|
141
136
|
inputSchema: TI;
|
|
@@ -166,8 +161,19 @@ declare function getTestCaseTagList(testCase: {
|
|
|
166
161
|
getTags?: () => ReadonlyArray<string>;
|
|
167
162
|
}): string[];
|
|
168
163
|
|
|
164
|
+
/** Matches a tag by exact string equality or regex test */
|
|
165
|
+
type TagMatcher = string | RegExp;
|
|
166
|
+
/** Matches a file path by glob string or regex test */
|
|
167
|
+
type PathMatcher = string | RegExp;
|
|
168
|
+
|
|
169
169
|
interface DatasetDefineConfig {
|
|
170
|
+
/**
|
|
171
|
+
* Stable id (letters, digits, `_`, `-`); used for discovery ids and `resolveDatasetByName`.
|
|
172
|
+
* For an unrestricted UI label, set {@link displayName}.
|
|
173
|
+
*/
|
|
170
174
|
name: string;
|
|
175
|
+
/** Optional human-readable label for CLI/TUI (any characters). */
|
|
176
|
+
displayName?: string;
|
|
171
177
|
includedTags?: TagMatcher[];
|
|
172
178
|
excludedTags?: TagMatcher[];
|
|
173
179
|
includedPaths?: PathMatcher[];
|
|
@@ -177,13 +183,22 @@ declare class Dataset {
|
|
|
177
183
|
private readonly _config;
|
|
178
184
|
private constructor();
|
|
179
185
|
static define(config: DatasetDefineConfig): Dataset;
|
|
186
|
+
/** Canonical dataset id (same rules as `RunConfig` / `TestCase` `name`). */
|
|
180
187
|
getName(): string;
|
|
188
|
+
getDisplayName(): string | undefined;
|
|
189
|
+
/** Label for CLI/TUI and evaluator `meta.datasetName`: {@link getDisplayName} if set, otherwise {@link getName}. */
|
|
190
|
+
getDisplayLabel(): string;
|
|
181
191
|
getIncludedTags(): ReadonlyArray<TagMatcher>;
|
|
182
192
|
getExcludedTags(): ReadonlyArray<TagMatcher>;
|
|
183
193
|
getIncludedPaths(): ReadonlyArray<PathMatcher>;
|
|
184
194
|
getExcludedPaths(): ReadonlyArray<PathMatcher>;
|
|
185
195
|
matchesTestCase(testCase: TestCase<unknown>, filePath: string): boolean;
|
|
186
196
|
}
|
|
197
|
+
/** CLI / runner: display label for a dataset-shaped object (supports discovery duck-types). */
|
|
198
|
+
declare function getDatasetDisplayLabel(dataset: {
|
|
199
|
+
getDisplayLabel?: () => string;
|
|
200
|
+
getName?: () => string;
|
|
201
|
+
}): string;
|
|
187
202
|
|
|
188
203
|
/**
|
|
189
204
|
* Options for customizing JSON diff output. Passed to logDiff, createDiffLogEntry, and printJsonDiff.
|
|
@@ -259,8 +274,8 @@ interface EvaluateMeta {
|
|
|
259
274
|
* for this specific test-case run.
|
|
260
275
|
*/
|
|
261
276
|
runId: string;
|
|
262
|
-
/**
|
|
263
|
-
|
|
277
|
+
/** Display label for the dataset (`Dataset.getDisplayLabel()`, i.e. `displayName ?? name`). */
|
|
278
|
+
datasetName: string;
|
|
264
279
|
/** Canonical `RunConfig` name (or `programmatic` for API/TUI-only runs). */
|
|
265
280
|
runConfigName: string;
|
|
266
281
|
/**
|
|
@@ -384,12 +399,16 @@ declare const RunConfigNameSchema: Schema.brand<Schema.filter<Schema.filter<Sche
|
|
|
384
399
|
declare const EvaluatorNameSchema: Schema.brand<Schema.filter<Schema.filter<Schema.filter<typeof Schema.String>>>, "EvaluatorName">;
|
|
385
400
|
/** Branded id for `TestCase.describe({ name })` (decode with {@link TestCaseNameSchema}). */
|
|
386
401
|
declare const TestCaseNameSchema: Schema.brand<Schema.filter<Schema.filter<Schema.filter<typeof Schema.String>>>, "TestCaseName">;
|
|
402
|
+
/** Branded id for `Dataset.define({ name })` (decode with {@link DatasetNameSchema}). */
|
|
403
|
+
declare const DatasetNameSchema: Schema.brand<Schema.filter<Schema.filter<Schema.filter<typeof Schema.String>>>, "DatasetName">;
|
|
387
404
|
type RunConfigName = Schema.Schema.Type<typeof RunConfigNameSchema>;
|
|
388
405
|
type EvaluatorName = Schema.Schema.Type<typeof EvaluatorNameSchema>;
|
|
389
406
|
type TestCaseName = Schema.Schema.Type<typeof TestCaseNameSchema>;
|
|
407
|
+
type DatasetName = Schema.Schema.Type<typeof DatasetNameSchema>;
|
|
390
408
|
declare function validateRunConfigName(raw: string, context: string): RunConfigName;
|
|
391
409
|
declare function validateEvaluatorName(raw: string, context: string): EvaluatorName;
|
|
392
410
|
declare function validateTestCaseName(raw: string, context: string): TestCaseName;
|
|
411
|
+
declare function validateDatasetName(raw: string, context: string): DatasetName;
|
|
393
412
|
/** Optional UI label: trim; empty after trim becomes undefined. */
|
|
394
413
|
declare function normalizeOptionalDisplayName(raw: string | undefined): string | undefined;
|
|
395
414
|
|
|
@@ -660,6 +679,7 @@ interface RunnerApi {
|
|
|
660
679
|
collectDatasets(): Promise<ReadonlyArray<CollectedDataset>>;
|
|
661
680
|
collectEvaluators(): Promise<ReadonlyArray<CollectedEvaluator>>;
|
|
662
681
|
collectRunConfigs(): Promise<ReadonlyArray<CollectedRunConfig>>;
|
|
682
|
+
/** Resolves a dataset by canonical **`Dataset` `name`** (id), case-insensitive. */
|
|
663
683
|
resolveDatasetByName(name: string): Promise<CollectedDataset | undefined>;
|
|
664
684
|
resolveEvaluatorsByNamePattern(pattern: string): Promise<ReadonlyArray<CollectedEvaluator>>;
|
|
665
685
|
/**
|
|
@@ -732,4 +752,4 @@ declare class TagSet {
|
|
|
732
752
|
static define<const T extends readonly string[]>(tags: T): TagSetMembers<T>;
|
|
733
753
|
}
|
|
734
754
|
|
|
735
|
-
export { BinaryScoreData, CliState, CollectedDataset, CollectedEvaluator, CollectedRunConfig, CollectedTestCase, ConfigType, CreateDiffLogEntryOptions, Dataset, DeltaScoreData, DiffLogEntry, EvalDataset, EvalMiddleware, EvalRun, EvalsData, EvaluateArgs, EvaluateMeta, Evaluator, EvaluatorLogEntry, EvaluatorName, EvaluatorNameSchema, EvaluatorOption, FormatMetricOptions, FormatScoreOptions, JsonDiffOptions, LatencyData, LogEntry, M4trixEvalConfig, M4trixEvalConfigDiscovery, Metric, MetricDef, MetricItem, PROGRAMMATIC_RUN_CONFIG, PathMatcher, PercentScoreData, PrintJsonDiffOptions, RunConfig, RunConfigDefineConfig, RunConfigName, RunConfigNameSchema, RunConfigRow, RunConfigRowEvaluators, RunConfigRowPattern, RunDatasetJob, RunDatasetJobsWithSharedConcurrencyRequest, RunDatasetRequest, RunSnapshot, RunnerApi, RunnerConfig, RunnerConfigOverrides, RunnerDiscoveryConfig, RunnerEvent, Score, ScoreDef, ScoreDisplayStrategy, ScoreItem, SearchTestCasesQuery, StartupArgs, TagMatcher, TagSet, TagSetMembers, TestCase, TestCaseName, TestCaseNameSchema, TokenCountData, ViewLevel, binaryScore, createLogEntry, createRunner, defaultRunnerConfig, defineConfig, deltaScore, formatScoreData, getEvaluatorDisplayLabel, getEvaluatorTagList, getLogLines, getMetricById, getScoreById, getTestCaseDisplayLabel, getTestCaseTagList, latencyMetric, loadMockData, loadRunnerData, normalizeOptionalDisplayName, parseStartupArgs, percentScore, printJsonDiff, tokenCountMetric, validateEvaluatorName, validateRunConfigName, validateTestCaseName, withRunnerConfig };
|
|
755
|
+
export { BinaryScoreData, CliState, CollectedDataset, CollectedEvaluator, CollectedRunConfig, CollectedTestCase, ConfigType, CreateDiffLogEntryOptions, Dataset, DatasetDefineConfig, DatasetName, DatasetNameSchema, DeltaScoreData, DiffLogEntry, EvalDataset, EvalMiddleware, EvalRun, EvalsData, EvaluateArgs, EvaluateMeta, Evaluator, EvaluatorLogEntry, EvaluatorName, EvaluatorNameSchema, EvaluatorOption, FormatMetricOptions, FormatScoreOptions, JsonDiffOptions, LatencyData, LogEntry, M4trixEvalConfig, M4trixEvalConfigDiscovery, Metric, MetricDef, MetricItem, PROGRAMMATIC_RUN_CONFIG, PathMatcher, PercentScoreData, PrintJsonDiffOptions, RunConfig, RunConfigDefineConfig, RunConfigName, RunConfigNameSchema, RunConfigRow, RunConfigRowEvaluators, RunConfigRowPattern, RunDatasetJob, RunDatasetJobsWithSharedConcurrencyRequest, RunDatasetRequest, RunSnapshot, RunnerApi, RunnerConfig, RunnerConfigOverrides, RunnerDiscoveryConfig, RunnerEvent, Score, ScoreDef, ScoreDisplayStrategy, ScoreItem, SearchTestCasesQuery, StartupArgs, TagMatcher, TagSet, TagSetMembers, TestCase, TestCaseName, TestCaseNameSchema, TokenCountData, ViewLevel, binaryScore, createLogEntry, createRunner, defaultRunnerConfig, defineConfig, deltaScore, formatScoreData, getDatasetDisplayLabel, getEvaluatorDisplayLabel, getEvaluatorTagList, getLogLines, getMetricById, getScoreById, getTestCaseDisplayLabel, getTestCaseTagList, latencyMetric, loadMockData, loadRunnerData, normalizeOptionalDisplayName, parseStartupArgs, percentScore, printJsonDiff, tokenCountMetric, validateDatasetName, validateEvaluatorName, validateRunConfigName, validateTestCaseName, withRunnerConfig };
|
package/dist/index.js
CHANGED
|
@@ -26,6 +26,7 @@ function makeEntityIdSchema(brand, label) {
|
|
|
26
26
|
var RunConfigNameSchema = makeEntityIdSchema("RunConfigName", "RunConfig name");
|
|
27
27
|
var EvaluatorNameSchema = makeEntityIdSchema("EvaluatorName", "Evaluator name");
|
|
28
28
|
var TestCaseNameSchema = makeEntityIdSchema("TestCaseName", "Test case name");
|
|
29
|
+
var DatasetNameSchema = makeEntityIdSchema("DatasetName", "Dataset name");
|
|
29
30
|
function validateWithSchema(schema, raw, context) {
|
|
30
31
|
const trimmed = raw.trim();
|
|
31
32
|
const decode = Schema.decodeUnknownEither(
|
|
@@ -46,6 +47,9 @@ function validateEvaluatorName(raw, context) {
|
|
|
46
47
|
function validateTestCaseName(raw, context) {
|
|
47
48
|
return validateWithSchema(TestCaseNameSchema, raw, context);
|
|
48
49
|
}
|
|
50
|
+
function validateDatasetName(raw, context) {
|
|
51
|
+
return validateWithSchema(DatasetNameSchema, raw, context);
|
|
52
|
+
}
|
|
49
53
|
function normalizeOptionalDisplayName(raw) {
|
|
50
54
|
if (raw === void 0) {
|
|
51
55
|
return void 0;
|
|
@@ -54,6 +58,87 @@ function normalizeOptionalDisplayName(raw) {
|
|
|
54
58
|
return t.length === 0 ? void 0 : t;
|
|
55
59
|
}
|
|
56
60
|
|
|
61
|
+
// src/evals/dataset.ts
|
|
62
|
+
function matchesAny(value, matchers) {
|
|
63
|
+
return matchers.some(
|
|
64
|
+
(matcher) => typeof matcher === "string" ? value === matcher : matcher.test(value)
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
function matchesAnyPath(filePath, matchers) {
|
|
68
|
+
return matchers.some((matcher) => {
|
|
69
|
+
if (typeof matcher === "string") {
|
|
70
|
+
return simpleGlobMatch(matcher, filePath);
|
|
71
|
+
}
|
|
72
|
+
return matcher.test(filePath);
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
function simpleGlobMatch(pattern, value) {
|
|
76
|
+
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\?/g, "[^/]").replace(/\*\*\//g, "(?:.*/)?").replace(/\*\*/g, ".*").replace(/\*/g, "[^/]*");
|
|
77
|
+
return new RegExp(`^${escaped}$`).test(value);
|
|
78
|
+
}
|
|
79
|
+
var Dataset = class _Dataset {
|
|
80
|
+
constructor(config) {
|
|
81
|
+
this._config = config;
|
|
82
|
+
}
|
|
83
|
+
static define(config) {
|
|
84
|
+
const name = validateDatasetName(config.name, "Dataset.define");
|
|
85
|
+
const displayName = normalizeOptionalDisplayName(config.displayName);
|
|
86
|
+
return new _Dataset({
|
|
87
|
+
name,
|
|
88
|
+
displayName,
|
|
89
|
+
includedTags: config.includedTags ?? [],
|
|
90
|
+
excludedTags: config.excludedTags ?? [],
|
|
91
|
+
includedPaths: config.includedPaths ?? [],
|
|
92
|
+
excludedPaths: config.excludedPaths ?? []
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
/** Canonical dataset id (same rules as `RunConfig` / `TestCase` `name`). */
|
|
96
|
+
getName() {
|
|
97
|
+
return this._config.name;
|
|
98
|
+
}
|
|
99
|
+
getDisplayName() {
|
|
100
|
+
return this._config.displayName;
|
|
101
|
+
}
|
|
102
|
+
/** Label for CLI/TUI and evaluator `meta.datasetName`: {@link getDisplayName} if set, otherwise {@link getName}. */
|
|
103
|
+
getDisplayLabel() {
|
|
104
|
+
return this._config.displayName ?? this._config.name;
|
|
105
|
+
}
|
|
106
|
+
getIncludedTags() {
|
|
107
|
+
return this._config.includedTags;
|
|
108
|
+
}
|
|
109
|
+
getExcludedTags() {
|
|
110
|
+
return this._config.excludedTags;
|
|
111
|
+
}
|
|
112
|
+
getIncludedPaths() {
|
|
113
|
+
return this._config.includedPaths;
|
|
114
|
+
}
|
|
115
|
+
getExcludedPaths() {
|
|
116
|
+
return this._config.excludedPaths;
|
|
117
|
+
}
|
|
118
|
+
matchesTestCase(testCase, filePath) {
|
|
119
|
+
const tags = testCase.getTags();
|
|
120
|
+
if (this._config.excludedTags.length > 0) {
|
|
121
|
+
if (tags.some((tag) => matchesAny(tag, this._config.excludedTags))) {
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (this._config.excludedPaths.length > 0) {
|
|
126
|
+
if (matchesAnyPath(filePath, this._config.excludedPaths)) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const tagMatch = this._config.includedTags.length === 0 || tags.some((tag) => matchesAny(tag, this._config.includedTags));
|
|
131
|
+
const pathMatch = this._config.includedPaths.length === 0 || matchesAnyPath(filePath, this._config.includedPaths);
|
|
132
|
+
return tagMatch && pathMatch;
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
function getDatasetDisplayLabel(dataset) {
|
|
136
|
+
if (typeof dataset.getDisplayLabel === "function") {
|
|
137
|
+
return dataset.getDisplayLabel();
|
|
138
|
+
}
|
|
139
|
+
return typeof dataset.getName === "function" ? dataset.getName() : "";
|
|
140
|
+
}
|
|
141
|
+
|
|
57
142
|
// src/evals/evaluator.ts
|
|
58
143
|
var Evaluator = class _Evaluator {
|
|
59
144
|
constructor(config) {
|
|
@@ -413,7 +498,7 @@ function toEvalDataset(item, snapshots) {
|
|
|
413
498
|
const runs = snapshots.filter((snapshot) => snapshot.datasetId === item.id).sort((a, b) => b.queuedAt - a.queuedAt).map(toEvalRun);
|
|
414
499
|
return {
|
|
415
500
|
id: item.id,
|
|
416
|
-
name: item.dataset
|
|
501
|
+
name: getDatasetDisplayLabel(item.dataset),
|
|
417
502
|
overview: `Discovered from ${item.filePath}`,
|
|
418
503
|
runs
|
|
419
504
|
};
|
|
@@ -466,70 +551,6 @@ function parseStartupArgs(argv) {
|
|
|
466
551
|
}
|
|
467
552
|
return args;
|
|
468
553
|
}
|
|
469
|
-
|
|
470
|
-
// src/evals/dataset.ts
|
|
471
|
-
function matchesAny(value, matchers) {
|
|
472
|
-
return matchers.some(
|
|
473
|
-
(matcher) => typeof matcher === "string" ? value === matcher : matcher.test(value)
|
|
474
|
-
);
|
|
475
|
-
}
|
|
476
|
-
function matchesAnyPath(filePath, matchers) {
|
|
477
|
-
return matchers.some((matcher) => {
|
|
478
|
-
if (typeof matcher === "string") {
|
|
479
|
-
return simpleGlobMatch(matcher, filePath);
|
|
480
|
-
}
|
|
481
|
-
return matcher.test(filePath);
|
|
482
|
-
});
|
|
483
|
-
}
|
|
484
|
-
function simpleGlobMatch(pattern, value) {
|
|
485
|
-
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\?/g, "[^/]").replace(/\*\*\//g, "(?:.*/)?").replace(/\*\*/g, ".*").replace(/\*/g, "[^/]*");
|
|
486
|
-
return new RegExp(`^${escaped}$`).test(value);
|
|
487
|
-
}
|
|
488
|
-
var Dataset = class _Dataset {
|
|
489
|
-
constructor(config) {
|
|
490
|
-
this._config = config;
|
|
491
|
-
}
|
|
492
|
-
static define(config) {
|
|
493
|
-
return new _Dataset({
|
|
494
|
-
name: config.name,
|
|
495
|
-
includedTags: config.includedTags ?? [],
|
|
496
|
-
excludedTags: config.excludedTags ?? [],
|
|
497
|
-
includedPaths: config.includedPaths ?? [],
|
|
498
|
-
excludedPaths: config.excludedPaths ?? []
|
|
499
|
-
});
|
|
500
|
-
}
|
|
501
|
-
getName() {
|
|
502
|
-
return this._config.name;
|
|
503
|
-
}
|
|
504
|
-
getIncludedTags() {
|
|
505
|
-
return this._config.includedTags;
|
|
506
|
-
}
|
|
507
|
-
getExcludedTags() {
|
|
508
|
-
return this._config.excludedTags;
|
|
509
|
-
}
|
|
510
|
-
getIncludedPaths() {
|
|
511
|
-
return this._config.includedPaths;
|
|
512
|
-
}
|
|
513
|
-
getExcludedPaths() {
|
|
514
|
-
return this._config.excludedPaths;
|
|
515
|
-
}
|
|
516
|
-
matchesTestCase(testCase, filePath) {
|
|
517
|
-
const tags = testCase.getTags();
|
|
518
|
-
if (this._config.excludedTags.length > 0) {
|
|
519
|
-
if (tags.some((tag) => matchesAny(tag, this._config.excludedTags))) {
|
|
520
|
-
return false;
|
|
521
|
-
}
|
|
522
|
-
}
|
|
523
|
-
if (this._config.excludedPaths.length > 0) {
|
|
524
|
-
if (matchesAnyPath(filePath, this._config.excludedPaths)) {
|
|
525
|
-
return false;
|
|
526
|
-
}
|
|
527
|
-
}
|
|
528
|
-
const tagMatch = this._config.includedTags.length === 0 || tags.some((tag) => matchesAny(tag, this._config.includedTags));
|
|
529
|
-
const pathMatch = this._config.includedPaths.length === 0 || matchesAnyPath(filePath, this._config.includedPaths);
|
|
530
|
-
return tagMatch && pathMatch;
|
|
531
|
-
}
|
|
532
|
-
};
|
|
533
554
|
function preprocessForDiff(value, options) {
|
|
534
555
|
if (options?.sort && Array.isArray(value)) {
|
|
535
556
|
return [...value].sort((a, b) => {
|
|
@@ -1545,7 +1566,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1545
1566
|
meta: {
|
|
1546
1567
|
triggerId: task.triggerId,
|
|
1547
1568
|
runId: evaluatorRunId,
|
|
1548
|
-
|
|
1569
|
+
datasetName: task.dataset.getDisplayLabel(),
|
|
1549
1570
|
repetitionId,
|
|
1550
1571
|
repetitionIndex,
|
|
1551
1572
|
repetitionCount,
|
|
@@ -1960,7 +1981,7 @@ var EffectRunner = class {
|
|
|
1960
1981
|
);
|
|
1961
1982
|
if (!dsCollected) {
|
|
1962
1983
|
throw new Error(
|
|
1963
|
-
`RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.
|
|
1984
|
+
`RunConfig "${rcName}" run[${i}]: dataset "${row.dataset.getDisplayLabel()}" was not found among discovered dataset exports (import the same module instances the scanner loads).`
|
|
1964
1985
|
);
|
|
1965
1986
|
}
|
|
1966
1987
|
let evaluatorIds;
|
|
@@ -2095,7 +2116,7 @@ var EffectRunner = class {
|
|
|
2095
2116
|
const snapshot = {
|
|
2096
2117
|
runId,
|
|
2097
2118
|
datasetId: params.datasetId,
|
|
2098
|
-
datasetName: dataset.dataset.
|
|
2119
|
+
datasetName: dataset.dataset.getDisplayLabel(),
|
|
2099
2120
|
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
2100
2121
|
queuedAt: Date.now(),
|
|
2101
2122
|
totalTestCases: totalEvaluations,
|
|
@@ -2116,7 +2137,7 @@ var EffectRunner = class {
|
|
|
2116
2137
|
type: "RunQueued",
|
|
2117
2138
|
runId,
|
|
2118
2139
|
datasetId: params.datasetId,
|
|
2119
|
-
datasetName: dataset.dataset.
|
|
2140
|
+
datasetName: dataset.dataset.getDisplayLabel(),
|
|
2120
2141
|
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
2121
2142
|
totalTestCases: totalEvaluations,
|
|
2122
2143
|
artifactPath
|
|
@@ -2219,6 +2240,6 @@ var PROGRAMMATIC_RUN_CONFIG = {
|
|
|
2219
2240
|
runConfigName: "programmatic"
|
|
2220
2241
|
};
|
|
2221
2242
|
|
|
2222
|
-
export { Dataset, Evaluator, EvaluatorNameSchema, Metric, PROGRAMMATIC_RUN_CONFIG, RunConfig, RunConfigNameSchema, Score, TagSet, TestCase, TestCaseNameSchema, binaryScore, createLogEntry, createRunner, defaultRunnerConfig, defineConfig, deltaScore, formatScoreData, getEvaluatorDisplayLabel, getEvaluatorTagList, getLogLines, getMetricById, getScoreById, getTestCaseDisplayLabel, getTestCaseTagList, latencyMetric, loadMockData, loadRunnerData, normalizeOptionalDisplayName, parseStartupArgs, percentScore, printJsonDiff, tokenCountMetric, validateEvaluatorName, validateRunConfigName, validateTestCaseName, withRunnerConfig };
|
|
2243
|
+
export { Dataset, DatasetNameSchema, Evaluator, EvaluatorNameSchema, Metric, PROGRAMMATIC_RUN_CONFIG, RunConfig, RunConfigNameSchema, Score, TagSet, TestCase, TestCaseNameSchema, binaryScore, createLogEntry, createRunner, defaultRunnerConfig, defineConfig, deltaScore, formatScoreData, getDatasetDisplayLabel, getEvaluatorDisplayLabel, getEvaluatorTagList, getLogLines, getMetricById, getScoreById, getTestCaseDisplayLabel, getTestCaseTagList, latencyMetric, loadMockData, loadRunnerData, normalizeOptionalDisplayName, parseStartupArgs, percentScore, printJsonDiff, tokenCountMetric, validateDatasetName, validateEvaluatorName, validateRunConfigName, validateTestCaseName, withRunnerConfig };
|
|
2223
2244
|
//# sourceMappingURL=out.js.map
|
|
2224
2245
|
//# sourceMappingURL=index.js.map
|