@agentv/core 4.29.1-next.1 → 4.30.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -369,6 +369,35 @@ function coercePrimitive(value) {
369
369
  if (PLAIN_NUMBER_PATTERN.test(value)) return Number(value);
370
370
  return value;
371
371
  }
372
+ function isPlainObject(value) {
373
+ return typeof value === "object" && value !== null && !Array.isArray(value);
374
+ }
375
+ function cloneTemplateValue(value) {
376
+ if (Array.isArray(value)) {
377
+ return value.map((item) => cloneTemplateValue(item));
378
+ }
379
+ if (isPlainObject(value)) {
380
+ const result = {};
381
+ for (const [key, nested] of Object.entries(value)) {
382
+ result[key] = cloneTemplateValue(nested);
383
+ }
384
+ return result;
385
+ }
386
+ return value;
387
+ }
388
+ function stringifyTemplateValue(value) {
389
+ if (typeof value === "string") return value;
390
+ return JSON.stringify(value);
391
+ }
392
+ function lookupTemplateVar(vars, expression) {
393
+ if (!expression) return void 0;
394
+ return expression.split(".").reduce((current, segment) => {
395
+ if (!isPlainObject(current)) {
396
+ return void 0;
397
+ }
398
+ return current[segment];
399
+ }, vars);
400
+ }
372
401
  function interpolateEnv(value, env) {
373
402
  if (typeof value === "string") {
374
403
  const wholeMatch = WHOLE_VAR_PATTERN.exec(value);
@@ -390,12 +419,38 @@ function interpolateEnv(value, env) {
390
419
  }
391
420
  return value;
392
421
  }
393
- var ENV_VAR_PATTERN, WHOLE_VAR_PATTERN, PLAIN_NUMBER_PATTERN;
422
+ function interpolateTemplateVars(value, vars) {
423
+ if (typeof value === "string") {
424
+ const wholeMatch = WHOLE_TEMPLATE_VAR_PATTERN.exec(value);
425
+ if (wholeMatch) {
426
+ const resolved = lookupTemplateVar(vars, wholeMatch[1]);
427
+ return resolved === void 0 ? value : cloneTemplateValue(resolved);
428
+ }
429
+ return value.replace(TEMPLATE_VAR_PATTERN, (match, expression) => {
430
+ const resolved = lookupTemplateVar(vars, expression);
431
+ return resolved === void 0 ? match : stringifyTemplateValue(resolved);
432
+ });
433
+ }
434
+ if (Array.isArray(value)) {
435
+ return value.map((item) => interpolateTemplateVars(item, vars));
436
+ }
437
+ if (isPlainObject(value)) {
438
+ const result = {};
439
+ for (const [key, nested] of Object.entries(value)) {
440
+ result[key] = interpolateTemplateVars(nested, vars);
441
+ }
442
+ return result;
443
+ }
444
+ return value;
445
+ }
446
+ var ENV_VAR_PATTERN, TEMPLATE_VAR_PATTERN, WHOLE_TEMPLATE_VAR_PATTERN, WHOLE_VAR_PATTERN, PLAIN_NUMBER_PATTERN;
394
447
  var init_interpolation = __esm({
395
448
  "src/evaluation/interpolation.ts"() {
396
449
  "use strict";
397
450
  init_cjs_shims();
398
451
  ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
452
+ TEMPLATE_VAR_PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}/g;
453
+ WHOLE_TEMPLATE_VAR_PATTERN = /^\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}$/;
399
454
  WHOLE_VAR_PATTERN = /^\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}$/;
400
455
  PLAIN_NUMBER_PATTERN = /^-?(?:0|[1-9]\d*)(?:\.\d+)?$/;
401
456
  }
@@ -22740,6 +22795,41 @@ function resolveTests(suite) {
22740
22795
  }
22741
22796
  return void 0;
22742
22797
  }
22798
+ function interpolateCaseField(value, vars) {
22799
+ if (!vars || value === void 0) {
22800
+ return value;
22801
+ }
22802
+ return interpolateTemplateVars(value, vars);
22803
+ }
22804
+ function interpolateCaseTurns(turns, vars) {
22805
+ if (!vars || !Array.isArray(turns)) {
22806
+ return turns;
22807
+ }
22808
+ return turns.map((rawTurn) => {
22809
+ if (!isJsonObject(rawTurn)) {
22810
+ return rawTurn;
22811
+ }
22812
+ return {
22813
+ ...rawTurn,
22814
+ input: interpolateCaseField(rawTurn.input, vars),
22815
+ expected_output: interpolateCaseField(rawTurn.expected_output, vars)
22816
+ };
22817
+ });
22818
+ }
22819
+ function interpolateRawEvalCase(raw, vars) {
22820
+ if (!vars) {
22821
+ return raw;
22822
+ }
22823
+ return {
22824
+ ...raw,
22825
+ ...raw.criteria !== void 0 ? { criteria: interpolateCaseField(raw.criteria, vars) } : {},
22826
+ ...raw.expected_outcome !== void 0 ? { expected_outcome: interpolateCaseField(raw.expected_outcome, vars) } : {},
22827
+ ...raw.input !== void 0 ? { input: interpolateCaseField(raw.input, vars) } : {},
22828
+ ...raw.input_files !== void 0 ? { input_files: interpolateCaseField(raw.input_files, vars) } : {},
22829
+ ...raw.expected_output !== void 0 ? { expected_output: interpolateCaseField(raw.expected_output, vars) } : {},
22830
+ ...raw.turns !== void 0 ? { turns: interpolateCaseTurns(raw.turns, vars) } : {}
22831
+ };
22832
+ }
22743
22833
  async function readTestSuiteMetadata(testFilePath) {
22744
22834
  try {
22745
22835
  const absolutePath = import_node_path50.default.resolve(testFilePath);
@@ -22854,8 +22944,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
22854
22944
  }
22855
22945
  const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
22856
22946
  const suiteGovernance = extractSuiteGovernance(suite);
22857
- const suiteInputMessages = expandInputShorthand(suite.input);
22858
- const suiteInputFiles = suite.input_files;
22947
+ const rawSuiteInput = suite.input;
22948
+ const rawSuiteInputFiles = suite.input_files;
22859
22949
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
22860
22950
  const _globalTarget = asString5(rawGlobalExecution?.target) ?? asString5(suite.target);
22861
22951
  const suiteAssertions = suite.assertions ?? suite.assert;
@@ -22874,30 +22964,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
22874
22964
  if (filterPattern && (!id || !matchesFilter4(id, filterPattern))) {
22875
22965
  continue;
22876
22966
  }
22877
- const conversationId = asString5(testCaseConfig.conversation_id);
22878
- let outcome = asString5(testCaseConfig.criteria);
22879
- if (!outcome && testCaseConfig.expected_outcome !== void 0) {
22880
- outcome = asString5(testCaseConfig.expected_outcome);
22967
+ const caseVars = isJsonObject(testCaseConfig.vars) ? testCaseConfig.vars : void 0;
22968
+ const renderedCase = interpolateRawEvalCase(testCaseConfig, caseVars);
22969
+ const conversationId = asString5(renderedCase.conversation_id);
22970
+ let outcome = asString5(renderedCase.criteria);
22971
+ if (!outcome && renderedCase.expected_outcome !== void 0) {
22972
+ outcome = asString5(renderedCase.expected_outcome);
22881
22973
  if (outcome) {
22882
22974
  logWarning5(
22883
- `Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
22975
+ `Test '${asString5(renderedCase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
22884
22976
  );
22885
22977
  }
22886
22978
  }
22887
- const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
22979
+ const caseExecution = isJsonObject(renderedCase.execution) ? renderedCase.execution : void 0;
22888
22980
  const skipDefaults = caseExecution?.skip_defaults === true;
22889
22981
  const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
22890
- const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
22891
- const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
22892
- const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
22893
- const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0 || Array.isArray(testCaseConfig.turns) && testCaseConfig.turns.length > 0;
22982
+ const effectiveSuiteInputFiles = rawSuiteInputFiles && !skipDefaults ? interpolateCaseField(rawSuiteInputFiles, caseVars) : void 0;
22983
+ const testInputMessages = resolveInputMessages(renderedCase, effectiveSuiteInputFiles);
22984
+ const expectedMessages = resolveExpectedMessages(renderedCase) ?? [];
22985
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || renderedCase.assertions !== void 0 || renderedCase.assert !== void 0 || Array.isArray(renderedCase.turns) && renderedCase.turns.length > 0;
22894
22986
  if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
22895
22987
  logError3(
22896
22988
  `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions/turns`
22897
22989
  );
22898
22990
  continue;
22899
22991
  }
22900
- const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
22992
+ const effectiveSuiteInputValue = rawSuiteInput && !skipDefaults ? interpolateCaseField(rawSuiteInput, caseVars) : void 0;
22993
+ const effectiveSuiteInputMessages = expandInputShorthand(effectiveSuiteInputValue);
22901
22994
  const hasExpectedMessages = expectedMessages.length > 0;
22902
22995
  const inputTextParts = [];
22903
22996
  const suiteResolvedInputMessages = effectiveSuiteInputMessages ? await processMessages({
@@ -22937,11 +23030,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
22937
23030
  }
22938
23031
  }
22939
23032
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
22940
- const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
23033
+ const testCaseEvaluatorKind = coerceEvaluator(renderedCase.evaluator, id) ?? globalEvaluator;
22941
23034
  let evaluators;
22942
23035
  try {
22943
23036
  evaluators = await parseGraders(
22944
- testCaseConfig,
23037
+ renderedCase,
22945
23038
  globalExecution,
22946
23039
  searchRoots,
22947
23040
  id ?? "unknown",
@@ -22952,7 +23045,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
22952
23045
  logError3(`Skipping test '${id}': ${message}`);
22953
23046
  continue;
22954
23047
  }
22955
- const inlineRubrics = testCaseConfig.rubrics;
23048
+ const inlineRubrics = renderedCase.rubrics;
22956
23049
  if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
22957
23050
  const rubricEvaluator = parseInlineRubrics(inlineRubrics);
22958
23051
  if (rubricEvaluator) {
@@ -22961,25 +23054,25 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
22961
23054
  }
22962
23055
  warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
22963
23056
  const userFilePaths = collectResolvedInputFilePaths(inputMessages);
22964
- const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
23057
+ const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
22965
23058
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
22966
- const rawCaseMetadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
23059
+ const rawCaseMetadata = isJsonObject(renderedCase.metadata) ? renderedCase.metadata : void 0;
22967
23060
  const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
22968
23061
  const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
22969
- const caseTargets = extractTargetsFromTestCase(testCaseConfig);
22970
- const dependsOn = Array.isArray(testCaseConfig.depends_on) ? testCaseConfig.depends_on.filter(
23062
+ const caseTargets = extractTargetsFromTestCase(renderedCase);
23063
+ const dependsOn = Array.isArray(renderedCase.depends_on) ? renderedCase.depends_on.filter(
22971
23064
  (v) => typeof v === "string"
22972
23065
  ) : void 0;
22973
- const onDependencyFailureRaw = asString5(testCaseConfig.on_dependency_failure);
23066
+ const onDependencyFailureRaw = asString5(renderedCase.on_dependency_failure);
22974
23067
  const onDependencyFailure = onDependencyFailureRaw === "skip" || onDependencyFailureRaw === "fail" || onDependencyFailureRaw === "run" ? onDependencyFailureRaw : void 0;
22975
- const modeRaw = asString5(testCaseConfig.mode);
23068
+ const modeRaw = asString5(renderedCase.mode);
22976
23069
  const mode = modeRaw === "conversation" ? "conversation" : void 0;
22977
- const turns = Array.isArray(testCaseConfig.turns) ? parseTurns(testCaseConfig.turns) : void 0;
22978
- const aggregationRaw = asString5(testCaseConfig.aggregation);
23070
+ const turns = Array.isArray(renderedCase.turns) ? parseTurns(renderedCase.turns) : void 0;
23071
+ const aggregationRaw = asString5(renderedCase.aggregation);
22979
23072
  const aggregation = aggregationRaw === "mean" || aggregationRaw === "min" || aggregationRaw === "max" ? aggregationRaw : void 0;
22980
- const onTurnFailureRaw = asString5(testCaseConfig.on_turn_failure);
23073
+ const onTurnFailureRaw = asString5(renderedCase.on_turn_failure);
22981
23074
  const onTurnFailure = onTurnFailureRaw === "continue" || onTurnFailureRaw === "stop" ? onTurnFailureRaw : void 0;
22982
- const windowSize = typeof testCaseConfig.window_size === "number" && testCaseConfig.window_size >= 1 ? testCaseConfig.window_size : void 0;
23075
+ const windowSize = typeof renderedCase.window_size === "number" && renderedCase.window_size >= 1 ? renderedCase.window_size : void 0;
22983
23076
  const testCase = {
22984
23077
  id,
22985
23078
  suite: suiteName,
@@ -24111,7 +24204,7 @@ var init_rpc_metadata = __esm({
24111
24204
  });
24112
24205
 
24113
24206
  // ../../node_modules/.bun/@opentelemetry+core@2.5.1+460773ef8ff1e07c/node_modules/@opentelemetry/core/build/esm/utils/lodash.merge.js
24114
- function isPlainObject(value) {
24207
+ function isPlainObject2(value) {
24115
24208
  if (!isObjectLike(value) || baseGetTag(value) !== objectTag) {
24116
24209
  return false;
24117
24210
  }
@@ -24270,7 +24363,7 @@ function isPrimitive(value) {
24270
24363
  return typeof value === "string" || typeof value === "number" || typeof value === "boolean" || typeof value === "undefined" || value instanceof Date || value instanceof RegExp || value === null;
24271
24364
  }
24272
24365
  function shouldMerge(one, two) {
24273
- if (!isPlainObject(one) || !isPlainObject(two)) {
24366
+ if (!isPlainObject2(one) || !isPlainObject2(two)) {
24274
24367
  return false;
24275
24368
  }
24276
24369
  return true;