@agentv/core 4.29.2-next.1 → 4.30.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ import {
6
6
  fileExists,
7
7
  findGitRoot,
8
8
  interpolateEnv,
9
+ interpolateTemplateVars,
9
10
  isAgentProvider,
10
11
  isContentArray,
11
12
  isGraderKind,
@@ -17,7 +18,7 @@ import {
17
18
  readTextFile,
18
19
  resolveDelegatedTargetDefinition,
19
20
  resolveTargetDefinition
20
- } from "./chunk-YFXMMBUG.js";
21
+ } from "./chunk-5RQMJZDJ.js";
21
22
  import {
22
23
  execFileWithStdin,
23
24
  execShellWithStdin
@@ -15695,6 +15696,41 @@ function resolveTests(suite) {
15695
15696
  }
15696
15697
  return void 0;
15697
15698
  }
15699
+ function interpolateCaseField(value, vars) {
15700
+ if (!vars || value === void 0) {
15701
+ return value;
15702
+ }
15703
+ return interpolateTemplateVars(value, vars);
15704
+ }
15705
+ function interpolateCaseTurns(turns, vars) {
15706
+ if (!vars || !Array.isArray(turns)) {
15707
+ return turns;
15708
+ }
15709
+ return turns.map((rawTurn) => {
15710
+ if (!isJsonObject(rawTurn)) {
15711
+ return rawTurn;
15712
+ }
15713
+ return {
15714
+ ...rawTurn,
15715
+ input: interpolateCaseField(rawTurn.input, vars),
15716
+ expected_output: interpolateCaseField(rawTurn.expected_output, vars)
15717
+ };
15718
+ });
15719
+ }
15720
+ function interpolateRawEvalCase(raw, vars) {
15721
+ if (!vars) {
15722
+ return raw;
15723
+ }
15724
+ return {
15725
+ ...raw,
15726
+ ...raw.criteria !== void 0 ? { criteria: interpolateCaseField(raw.criteria, vars) } : {},
15727
+ ...raw.expected_outcome !== void 0 ? { expected_outcome: interpolateCaseField(raw.expected_outcome, vars) } : {},
15728
+ ...raw.input !== void 0 ? { input: interpolateCaseField(raw.input, vars) } : {},
15729
+ ...raw.input_files !== void 0 ? { input_files: interpolateCaseField(raw.input_files, vars) } : {},
15730
+ ...raw.expected_output !== void 0 ? { expected_output: interpolateCaseField(raw.expected_output, vars) } : {},
15731
+ ...raw.turns !== void 0 ? { turns: interpolateCaseTurns(raw.turns, vars) } : {}
15732
+ };
15733
+ }
15698
15734
  async function readTestSuiteMetadata(testFilePath) {
15699
15735
  try {
15700
15736
  const absolutePath = path43.resolve(testFilePath);
@@ -15722,7 +15758,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
15722
15758
  return { tests: await loadTestsFromAgentSkills(evalFilePath) };
15723
15759
  }
15724
15760
  if (format === "typescript") {
15725
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EMSGL2BQ.js");
15761
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-JL5DGTJL.js");
15726
15762
  return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
15727
15763
  }
15728
15764
  const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
@@ -15757,7 +15793,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
15757
15793
  return loadTestsFromAgentSkills(evalFilePath);
15758
15794
  }
15759
15795
  if (format === "typescript") {
15760
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EMSGL2BQ.js");
15796
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-JL5DGTJL.js");
15761
15797
  const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
15762
15798
  return suite.tests;
15763
15799
  }
@@ -15811,8 +15847,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15811
15847
  }
15812
15848
  const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
15813
15849
  const suiteGovernance = extractSuiteGovernance(suite);
15814
- const suiteInputMessages = expandInputShorthand(suite.input);
15815
- const suiteInputFiles = suite.input_files;
15850
+ const rawSuiteInput = suite.input;
15851
+ const rawSuiteInputFiles = suite.input_files;
15816
15852
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
15817
15853
  const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
15818
15854
  const suiteAssertions = suite.assertions ?? suite.assert;
@@ -15831,30 +15867,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15831
15867
  if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
15832
15868
  continue;
15833
15869
  }
15834
- const conversationId = asString5(testCaseConfig.conversation_id);
15835
- let outcome = asString5(testCaseConfig.criteria);
15836
- if (!outcome && testCaseConfig.expected_outcome !== void 0) {
15837
- outcome = asString5(testCaseConfig.expected_outcome);
15870
+ const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : void 0;
15871
+ const renderedCase = interpolateRawEvalCase(testCaseConfig, caseVars);
15872
+ const conversationId = asString5(renderedCase.conversation_id);
15873
+ let outcome = asString5(renderedCase.criteria);
15874
+ if (!outcome && renderedCase.expected_outcome !== void 0) {
15875
+ outcome = asString5(renderedCase.expected_outcome);
15838
15876
  if (outcome) {
15839
15877
  logWarning5(
15840
- `Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
15878
+ `Test '${asString5(renderedCase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
15841
15879
  );
15842
15880
  }
15843
15881
  }
15844
- const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
15882
+ const caseExecution = isJsonObject(renderedCase.execution) ? renderedCase.execution : void 0;
15845
15883
  const skipDefaults = caseExecution?.skip_defaults === true;
15846
15884
  const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
15847
- const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
15848
- const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
15849
- const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
15850
- const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0 || Array.isArray(testCaseConfig.turns) && testCaseConfig.turns.length > 0;
15885
+ const effectiveSuiteInputFiles = rawSuiteInputFiles && !skipDefaults ? interpolateCaseField(rawSuiteInputFiles, caseVars) : void 0;
15886
+ const testInputMessages = resolveInputMessages(renderedCase, effectiveSuiteInputFiles);
15887
+ const expectedMessages = resolveExpectedMessages(renderedCase) ?? [];
15888
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || renderedCase.assertions !== void 0 || renderedCase.assert !== void 0 || Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0;
15851
15889
  if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
15852
15890
  logError3(
15853
15891
  `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions/turns`
15854
15892
  );
15855
15893
  continue;
15856
15894
  }
15857
- const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
15895
+ const effectiveSuiteInputValue = rawSuiteInput && !skipDefaults ? interpolateCaseField(rawSuiteInput, caseVars) : void 0;
15896
+ const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
15858
15897
  const hasExpectedMessages = expectedMessages.length > 0;
15859
15898
  const inputTextParts = [];
15860
15899
  const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
@@ -15894,11 +15933,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15894
15933
  }
15895
15934
  }
15896
15935
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
15897
- const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
15936
+ const testCaseEvaluatorKind = coerceEvaluator(renderedCase.evaluator, id) ?? globalEvaluator;
15898
15937
  let evaluators;
15899
15938
  try {
15900
15939
  evaluators = await parseGraders(
15901
- testCaseConfig,
15940
+ renderedCase,
15902
15941
  globalExecution,
15903
15942
  searchRoots,
15904
15943
  id ?? "unknown",
@@ -15909,7 +15948,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15909
15948
  logError3(`Skipping test '${id}': ${message}`);
15910
15949
  continue;
15911
15950
  }
15912
- const inlineRubrics = testCaseConfig.rubrics;
15951
+ const inlineRubrics = renderedCase.rubrics;
15913
15952
  if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
15914
15953
  const rubricEvaluator = parseInlineRubrics(inlineRubrics);
15915
15954
  if (rubricEvaluator) {
@@ -15918,25 +15957,25 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15918
15957
  }
15919
15958
  warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
15920
15959
  const userFilePaths = collectResolvedInputFilePaths(inputMessages);
15921
- const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
15960
+ const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
15922
15961
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
15923
- const rawCaseMetadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
15962
+ const rawCaseMetadata = isJsonObject(renderedCase.metadata) ? renderedCase.metadata : void 0;
15924
15963
  const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
15925
15964
  const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
15926
- const caseTargets = extractTargetsFromTestCase(testCaseConfig);
15927
- const dependsOn = Array.isArray(testCaseConfig.depends_on) ? testCaseConfig.depends_on.filter(
15965
+ const caseTargets = extractTargetsFromTestCase(renderedCase);
15966
+ const dependsOn = Array.isArray(renderedCase.depends_on) ? renderedCase.depends_on.filter(
15928
15967
  (v) => typeof v === "string"
15929
15968
  ) : void 0;
15930
- const onDependencyFailureRaw = asString5(testCaseConfig.on_dependency_failure);
15969
+ const onDependencyFailureRaw = asString5(renderedCase.on_dependency_failure);
15931
15970
  const onDependencyFailure = onDependencyFailureRaw === "skip" || onDependencyFailureRaw === "fail" || onDependencyFailureRaw === "run" ? onDependencyFailureRaw : void 0;
15932
- const modeRaw = asString5(testCaseConfig.mode);
15971
+ const modeRaw = asString5(renderedCase.mode);
15933
15972
  const mode = modeRaw === "conversation" ? "conversation" : void 0;
15934
- const turns = Array.isArray(testCaseConfig.turns) ? parseTurns(testCaseConfig.turns) : void 0;
15935
- const aggregationRaw = asString5(testCaseConfig.aggregation);
15973
+ const turns = Array.isArray(renderedCase.turns) ? parseTurns(renderedCase.turns) : void 0;
15974
+ const aggregationRaw = asString5(renderedCase.aggregation);
15936
15975
  const aggregation = aggregationRaw === "mean" || aggregationRaw === "min" || aggregationRaw === "max" ? aggregationRaw : void 0;
15937
- const onTurnFailureRaw = asString5(testCaseConfig.on_turn_failure);
15976
+ const onTurnFailureRaw = asString5(renderedCase.on_turn_failure);
15938
15977
  const onTurnFailure = onTurnFailureRaw === "continue" || onTurnFailureRaw === "stop" ? onTurnFailureRaw : void 0;
15939
- const windowSize = typeof testCaseConfig.window_size === "number" && testCaseConfig.window_size >= 1 ? testCaseConfig.window_size : void 0;
15978
+ const windowSize = typeof renderedCase.window_size === "number" && renderedCase.window_size >= 1 ? renderedCase.window_size : void 0;
15940
15979
  const testCase = {
15941
15980
  id,
15942
15981
  suite: suiteName,
@@ -19632,4 +19671,4 @@ export {
19632
19671
  loadTestById,
19633
19672
  loadEvalCaseById
19634
19673
  };
19635
- //# sourceMappingURL=chunk-SCC35F3L.js.map
19674
+ //# sourceMappingURL=chunk-Z2BBOGE4.js.map