@m4trix/evals 0.27.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/dist/cli-simple.cjs +38 -15
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +38 -15
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +13 -7
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +13 -7
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +17 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +39 -14
- package/dist/index.js +17 -10
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -841,7 +841,7 @@ var RunConfig = class _RunConfig {
|
|
|
841
841
|
getDisplayLabel() {
|
|
842
842
|
return this._displayName ?? this._name;
|
|
843
843
|
}
|
|
844
|
-
/** Tags from `RunConfig.define({ tags })`; surfaced as `runConfigTags` on evaluator callbacks. */
|
|
844
|
+
/** Tags from `RunConfig.define({ tags })`; surfaced as `meta.runConfigTags` on evaluator callbacks. */
|
|
845
845
|
getTags() {
|
|
846
846
|
return [...this._tags];
|
|
847
847
|
}
|
|
@@ -1014,10 +1014,11 @@ var TestCase = class _TestCase {
|
|
|
1014
1014
|
static describe(config) {
|
|
1015
1015
|
const name = validateTestCaseName(config.name, "TestCase.describe");
|
|
1016
1016
|
const displayName = normalizeOptionalDisplayName(config.displayName);
|
|
1017
|
+
const tags = config.tags !== void 0 ? [...config.tags] : [];
|
|
1017
1018
|
return new _TestCase({
|
|
1018
1019
|
name,
|
|
1019
1020
|
displayName,
|
|
1020
|
-
tags
|
|
1021
|
+
tags,
|
|
1021
1022
|
inputSchema: config.inputSchema,
|
|
1022
1023
|
input: config.input,
|
|
1023
1024
|
outputSchema: config.outputSchema,
|
|
@@ -1034,7 +1035,7 @@ var TestCase = class _TestCase {
|
|
|
1034
1035
|
return this._config.displayName ?? this._config.name;
|
|
1035
1036
|
}
|
|
1036
1037
|
getTags() {
|
|
1037
|
-
return this._config.tags;
|
|
1038
|
+
return [...this._config.tags];
|
|
1038
1039
|
}
|
|
1039
1040
|
getInputSchema() {
|
|
1040
1041
|
return this._config.inputSchema;
|
|
@@ -1592,14 +1593,17 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1592
1593
|
triggerId: task.triggerId,
|
|
1593
1594
|
runId: evaluatorRunId,
|
|
1594
1595
|
datasetName: task.dataset.getDisplayLabel(),
|
|
1596
|
+
testCaseId: testCaseItem.id,
|
|
1597
|
+
testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
|
|
1595
1598
|
repetitionId,
|
|
1596
1599
|
repetitionIndex,
|
|
1597
1600
|
repetitionCount,
|
|
1598
|
-
runConfigName: task.runConfigName
|
|
1601
|
+
runConfigName: task.runConfigName,
|
|
1602
|
+
...task.experimentName !== void 0 && task.experimentName !== "" ? { experimentName: task.experimentName } : {},
|
|
1603
|
+
testCaseTags: getTestCaseTagList(testCaseItem.testCase),
|
|
1604
|
+
runConfigTags: task.runConfigTags,
|
|
1605
|
+
evaluatorTags: getEvaluatorTagList(evaluator)
|
|
1599
1606
|
},
|
|
1600
|
-
testCaseTags: getTestCaseTagList(testCaseItem.testCase),
|
|
1601
|
-
runConfigTags: task.runConfigTags,
|
|
1602
|
-
evaluatorTags: getEvaluatorTagList(evaluator),
|
|
1603
1607
|
logDiff,
|
|
1604
1608
|
log,
|
|
1605
1609
|
createError
|
|
@@ -2078,7 +2082,8 @@ var EffectRunner = class {
|
|
|
2078
2082
|
globalEvaluationSemaphore: sem,
|
|
2079
2083
|
runConfigName: job.runConfigName,
|
|
2080
2084
|
runConfigTags: job.runConfigTags,
|
|
2081
|
-
repetitions: job.repetitions
|
|
2085
|
+
repetitions: job.repetitions,
|
|
2086
|
+
experimentName: request.experimentName
|
|
2082
2087
|
})
|
|
2083
2088
|
);
|
|
2084
2089
|
}
|
|
@@ -2113,7 +2118,8 @@ var EffectRunner = class {
|
|
|
2113
2118
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
2114
2119
|
repetitions: request.repetitions,
|
|
2115
2120
|
runConfigName,
|
|
2116
|
-
runConfigTags: request.runConfigTags
|
|
2121
|
+
runConfigTags: request.runConfigTags,
|
|
2122
|
+
experimentName: request.experimentName
|
|
2117
2123
|
});
|
|
2118
2124
|
}
|
|
2119
2125
|
async startDatasetRun(params) {
|
|
@@ -2188,7 +2194,8 @@ var EffectRunner = class {
|
|
|
2188
2194
|
globalEvaluationSemaphore: params.globalEvaluationSemaphore,
|
|
2189
2195
|
runConfigName: params.runConfigName,
|
|
2190
2196
|
runConfigTags,
|
|
2191
|
-
repetitions
|
|
2197
|
+
repetitions,
|
|
2198
|
+
experimentName: params.experimentName
|
|
2192
2199
|
})
|
|
2193
2200
|
);
|
|
2194
2201
|
return snapshot;
|