@agentv/core 4.29.2-next.1 → 4.30.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-YFXMMBUG.js → chunk-5RQMJZDJ.js} +57 -1
- package/dist/{chunk-YFXMMBUG.js.map → chunk-5RQMJZDJ.js.map} +1 -1
- package/dist/{chunk-SCC35F3L.js → chunk-Z2BBOGE4.js} +69 -30
- package/dist/{chunk-SCC35F3L.js.map → chunk-Z2BBOGE4.js.map} +1 -1
- package/dist/evaluation/validation/index.cjs +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -1
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +122 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -2
- package/dist/{ts-eval-loader-EMSGL2BQ.js → ts-eval-loader-JL5DGTJL.js} +3 -3
- package/package.json +1 -1
- /package/dist/{ts-eval-loader-EMSGL2BQ.js.map → ts-eval-loader-JL5DGTJL.js.map} +0 -0
|
@@ -6,6 +6,7 @@ import {
|
|
|
6
6
|
fileExists,
|
|
7
7
|
findGitRoot,
|
|
8
8
|
interpolateEnv,
|
|
9
|
+
interpolateTemplateVars,
|
|
9
10
|
isAgentProvider,
|
|
10
11
|
isContentArray,
|
|
11
12
|
isGraderKind,
|
|
@@ -17,7 +18,7 @@ import {
|
|
|
17
18
|
readTextFile,
|
|
18
19
|
resolveDelegatedTargetDefinition,
|
|
19
20
|
resolveTargetDefinition
|
|
20
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-5RQMJZDJ.js";
|
|
21
22
|
import {
|
|
22
23
|
execFileWithStdin,
|
|
23
24
|
execShellWithStdin
|
|
@@ -15695,6 +15696,41 @@ function resolveTests(suite) {
|
|
|
15695
15696
|
}
|
|
15696
15697
|
return void 0;
|
|
15697
15698
|
}
|
|
15699
|
+
function interpolateCaseField(value, vars) {
|
|
15700
|
+
if (!vars || value === void 0) {
|
|
15701
|
+
return value;
|
|
15702
|
+
}
|
|
15703
|
+
return interpolateTemplateVars(value, vars);
|
|
15704
|
+
}
|
|
15705
|
+
function interpolateCaseTurns(turns, vars) {
|
|
15706
|
+
if (!vars || !Array.isArray(turns)) {
|
|
15707
|
+
return turns;
|
|
15708
|
+
}
|
|
15709
|
+
return turns.map((rawTurn) => {
|
|
15710
|
+
if (!isJsonObject(rawTurn)) {
|
|
15711
|
+
return rawTurn;
|
|
15712
|
+
}
|
|
15713
|
+
return {
|
|
15714
|
+
...rawTurn,
|
|
15715
|
+
input: interpolateCaseField(rawTurn.input, vars),
|
|
15716
|
+
expected_output: interpolateCaseField(rawTurn.expected_output, vars)
|
|
15717
|
+
};
|
|
15718
|
+
});
|
|
15719
|
+
}
|
|
15720
|
+
function interpolateRawEvalCase(raw, vars) {
|
|
15721
|
+
if (!vars) {
|
|
15722
|
+
return raw;
|
|
15723
|
+
}
|
|
15724
|
+
return {
|
|
15725
|
+
...raw,
|
|
15726
|
+
...raw.criteria !== void 0 ? { criteria: interpolateCaseField(raw.criteria, vars) } : {},
|
|
15727
|
+
...raw.expected_outcome !== void 0 ? { expected_outcome: interpolateCaseField(raw.expected_outcome, vars) } : {},
|
|
15728
|
+
...raw.input !== void 0 ? { input: interpolateCaseField(raw.input, vars) } : {},
|
|
15729
|
+
...raw.input_files !== void 0 ? { input_files: interpolateCaseField(raw.input_files, vars) } : {},
|
|
15730
|
+
...raw.expected_output !== void 0 ? { expected_output: interpolateCaseField(raw.expected_output, vars) } : {},
|
|
15731
|
+
...raw.turns !== void 0 ? { turns: interpolateCaseTurns(raw.turns, vars) } : {}
|
|
15732
|
+
};
|
|
15733
|
+
}
|
|
15698
15734
|
async function readTestSuiteMetadata(testFilePath) {
|
|
15699
15735
|
try {
|
|
15700
15736
|
const absolutePath = path43.resolve(testFilePath);
|
|
@@ -15722,7 +15758,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
15722
15758
|
return { tests: await loadTestsFromAgentSkills(evalFilePath) };
|
|
15723
15759
|
}
|
|
15724
15760
|
if (format === "typescript") {
|
|
15725
|
-
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-
|
|
15761
|
+
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-JL5DGTJL.js");
|
|
15726
15762
|
return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
|
|
15727
15763
|
}
|
|
15728
15764
|
const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
|
|
@@ -15757,7 +15793,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
|
|
|
15757
15793
|
return loadTestsFromAgentSkills(evalFilePath);
|
|
15758
15794
|
}
|
|
15759
15795
|
if (format === "typescript") {
|
|
15760
|
-
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-
|
|
15796
|
+
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-JL5DGTJL.js");
|
|
15761
15797
|
const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
|
|
15762
15798
|
return suite.tests;
|
|
15763
15799
|
}
|
|
@@ -15811,8 +15847,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
15811
15847
|
}
|
|
15812
15848
|
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
15813
15849
|
const suiteGovernance = extractSuiteGovernance(suite);
|
|
15814
|
-
const
|
|
15815
|
-
const
|
|
15850
|
+
const rawSuiteInput = suite.input;
|
|
15851
|
+
const rawSuiteInputFiles = suite.input_files;
|
|
15816
15852
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
15817
15853
|
const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
|
|
15818
15854
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
@@ -15831,30 +15867,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
15831
15867
|
if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
|
|
15832
15868
|
continue;
|
|
15833
15869
|
}
|
|
15834
|
-
const
|
|
15835
|
-
|
|
15836
|
-
|
|
15837
|
-
|
|
15870
|
+
const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : void 0;
|
|
15871
|
+
const renderedCase = interpolateRawEvalCase(testCaseConfig, caseVars);
|
|
15872
|
+
const conversationId = asString5(renderedCase.conversation_id);
|
|
15873
|
+
let outcome = asString5(renderedCase.criteria);
|
|
15874
|
+
if (!outcome && renderedCase.expected_outcome !== void 0) {
|
|
15875
|
+
outcome = asString5(renderedCase.expected_outcome);
|
|
15838
15876
|
if (outcome) {
|
|
15839
15877
|
logWarning5(
|
|
15840
|
-
`Test '${asString5(
|
|
15878
|
+
`Test '${asString5(renderedCase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
15841
15879
|
);
|
|
15842
15880
|
}
|
|
15843
15881
|
}
|
|
15844
|
-
const caseExecution = isJsonObject(
|
|
15882
|
+
const caseExecution = isJsonObject(renderedCase.execution) ? renderedCase.execution : void 0;
|
|
15845
15883
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
15846
15884
|
const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
|
|
15847
|
-
const effectiveSuiteInputFiles =
|
|
15848
|
-
const testInputMessages = resolveInputMessages(
|
|
15849
|
-
const expectedMessages = resolveExpectedMessages(
|
|
15850
|
-
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 ||
|
|
15885
|
+
const effectiveSuiteInputFiles = rawSuiteInputFiles && !skipDefaults ? interpolateCaseField(rawSuiteInputFiles, caseVars) : void 0;
|
|
15886
|
+
const testInputMessages = resolveInputMessages(renderedCase, effectiveSuiteInputFiles);
|
|
15887
|
+
const expectedMessages = resolveExpectedMessages(renderedCase) ?? [];
|
|
15888
|
+
const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || renderedCase.assertions !== void 0 || renderedCase.assert !== void 0 || Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0;
|
|
15851
15889
|
if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
|
|
15852
15890
|
logError3(
|
|
15853
15891
|
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions/turns`
|
|
15854
15892
|
);
|
|
15855
15893
|
continue;
|
|
15856
15894
|
}
|
|
15857
|
-
const
|
|
15895
|
+
const effectiveSuiteInputValue = rawSuiteInput && !skipDefaults ? interpolateCaseField(rawSuiteInput, caseVars) : void 0;
|
|
15896
|
+
const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
|
|
15858
15897
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
15859
15898
|
const inputTextParts = [];
|
|
15860
15899
|
const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
|
|
@@ -15894,11 +15933,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
15894
15933
|
}
|
|
15895
15934
|
}
|
|
15896
15935
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
15897
|
-
const testCaseEvaluatorKind = coerceEvaluator(
|
|
15936
|
+
const testCaseEvaluatorKind = coerceEvaluator(renderedCase.evaluator, id) ?? globalEvaluator;
|
|
15898
15937
|
let evaluators;
|
|
15899
15938
|
try {
|
|
15900
15939
|
evaluators = await parseGraders(
|
|
15901
|
-
|
|
15940
|
+
renderedCase,
|
|
15902
15941
|
globalExecution,
|
|
15903
15942
|
searchRoots,
|
|
15904
15943
|
id ?? "unknown",
|
|
@@ -15909,7 +15948,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
15909
15948
|
logError3(`Skipping test '${id}': ${message}`);
|
|
15910
15949
|
continue;
|
|
15911
15950
|
}
|
|
15912
|
-
const inlineRubrics =
|
|
15951
|
+
const inlineRubrics = renderedCase.rubrics;
|
|
15913
15952
|
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
15914
15953
|
const rubricEvaluator = parseInlineRubrics(inlineRubrics);
|
|
15915
15954
|
if (rubricEvaluator) {
|
|
@@ -15918,25 +15957,25 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
15918
15957
|
}
|
|
15919
15958
|
warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
|
|
15920
15959
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
15921
|
-
const caseWorkspace = await resolveWorkspaceConfig(
|
|
15960
|
+
const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
|
|
15922
15961
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
15923
|
-
const rawCaseMetadata = isJsonObject(
|
|
15962
|
+
const rawCaseMetadata = isJsonObject(renderedCase.metadata) ? renderedCase.metadata : void 0;
|
|
15924
15963
|
const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
|
|
15925
15964
|
const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
|
|
15926
|
-
const caseTargets = extractTargetsFromTestCase(
|
|
15927
|
-
const dependsOn = Array.isArray(
|
|
15965
|
+
const caseTargets = extractTargetsFromTestCase(renderedCase);
|
|
15966
|
+
const dependsOn = Array.isArray(renderedCase.depends_on) ? renderedCase.depends_on.filter(
|
|
15928
15967
|
(v) => typeof v === "string"
|
|
15929
15968
|
) : void 0;
|
|
15930
|
-
const onDependencyFailureRaw = asString5(
|
|
15969
|
+
const onDependencyFailureRaw = asString5(renderedCase.on_dependency_failure);
|
|
15931
15970
|
const onDependencyFailure = onDependencyFailureRaw === "skip" || onDependencyFailureRaw === "fail" || onDependencyFailureRaw === "run" ? onDependencyFailureRaw : void 0;
|
|
15932
|
-
const modeRaw = asString5(
|
|
15971
|
+
const modeRaw = asString5(renderedCase.mode);
|
|
15933
15972
|
const mode = modeRaw === "conversation" ? "conversation" : void 0;
|
|
15934
|
-
const turns = Array.isArray(
|
|
15935
|
-
const aggregationRaw = asString5(
|
|
15973
|
+
const turns = Array.isArray(renderedCase.turns) ? parseTurns(renderedCase.turns) : void 0;
|
|
15974
|
+
const aggregationRaw = asString5(renderedCase.aggregation);
|
|
15936
15975
|
const aggregation = aggregationRaw === "mean" || aggregationRaw === "min" || aggregationRaw === "max" ? aggregationRaw : void 0;
|
|
15937
|
-
const onTurnFailureRaw = asString5(
|
|
15976
|
+
const onTurnFailureRaw = asString5(renderedCase.on_turn_failure);
|
|
15938
15977
|
const onTurnFailure = onTurnFailureRaw === "continue" || onTurnFailureRaw === "stop" ? onTurnFailureRaw : void 0;
|
|
15939
|
-
const windowSize = typeof
|
|
15978
|
+
const windowSize = typeof renderedCase.window_size === "number" && renderedCase.window_size >= 1 ? renderedCase.window_size : void 0;
|
|
15940
15979
|
const testCase = {
|
|
15941
15980
|
id,
|
|
15942
15981
|
suite: suiteName,
|
|
@@ -19632,4 +19671,4 @@ export {
|
|
|
19632
19671
|
loadTestById,
|
|
19633
19672
|
loadEvalCaseById
|
|
19634
19673
|
};
|
|
19635
|
-
//# sourceMappingURL=chunk-
|
|
19674
|
+
//# sourceMappingURL=chunk-Z2BBOGE4.js.map
|