@agentv/core 2.17.0 → 2.17.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import {
6
6
  findGitRoot,
7
7
  isEvaluatorKind,
8
8
  resolveFileReference
9
- } from "../../chunk-CPPYERD2.js";
9
+ } from "../../chunk-PSYFRPNT.js";
10
10
 
11
11
  // src/evaluation/validation/file-type.ts
12
12
  import { readFile } from "node:fs/promises";
package/dist/index.cjs CHANGED
@@ -1777,6 +1777,25 @@ var import_node_path8 = __toESM(require("path"), 1);
1777
1777
  var import_micromatch3 = __toESM(require("micromatch"), 1);
1778
1778
  var import_yaml4 = require("yaml");
1779
1779
 
1780
+ // src/evaluation/interpolation.ts
1781
+ var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
1782
+ function interpolateEnv(value, env) {
1783
+ if (typeof value === "string") {
1784
+ return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
1785
+ }
1786
+ if (Array.isArray(value)) {
1787
+ return value.map((item) => interpolateEnv(item, env));
1788
+ }
1789
+ if (value !== null && typeof value === "object") {
1790
+ const result = {};
1791
+ for (const [key, val] of Object.entries(value)) {
1792
+ result[key] = interpolateEnv(val, env);
1793
+ }
1794
+ return result;
1795
+ }
1796
+ return value;
1797
+ }
1798
+
1780
1799
  // src/evaluation/loaders/case-file-loader.ts
1781
1800
  var import_promises = require("fs/promises");
1782
1801
  var import_node_path = __toESM(require("path"), 1);
@@ -1795,7 +1814,8 @@ function isGlobPattern(filePath) {
1795
1814
  return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
1796
1815
  }
1797
1816
  function parseYamlCases(content, filePath) {
1798
- const parsed = (0, import_yaml.parse)(content);
1817
+ const raw = (0, import_yaml.parse)(content);
1818
+ const parsed = interpolateEnv(raw, process.env);
1799
1819
  if (!Array.isArray(parsed)) {
1800
1820
  throw new Error(
1801
1821
  `External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
@@ -1817,7 +1837,8 @@ function parseJsonlCases(content, filePath) {
1817
1837
  const line = lines[i].trim();
1818
1838
  if (line === "") continue;
1819
1839
  try {
1820
- const parsed = JSON.parse(line);
1840
+ const raw = JSON.parse(line);
1841
+ const parsed = interpolateEnv(raw, process.env);
1821
1842
  if (!isJsonObject(parsed)) {
1822
1843
  throw new Error("Expected JSON object");
1823
1844
  }
@@ -3966,7 +3987,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
3966
3987
  }
3967
3988
  try {
3968
3989
  const content = await (0, import_promises6.readFile)(sidecarPath, "utf8");
3969
- const parsed = (0, import_yaml3.parse)(content);
3990
+ const parsed = interpolateEnv((0, import_yaml3.parse)(content), process.env);
3970
3991
  if (!isJsonObject(parsed)) {
3971
3992
  logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
3972
3993
  return {};
@@ -3989,7 +4010,8 @@ function parseJsonlContent(content, filePath) {
3989
4010
  const line = lines[i].trim();
3990
4011
  if (line === "") continue;
3991
4012
  try {
3992
- const parsed = JSON.parse(line);
4013
+ const raw = JSON.parse(line);
4014
+ const parsed = interpolateEnv(raw, process.env);
3993
4015
  if (!isJsonObject(parsed)) {
3994
4016
  throw new Error("Expected JSON object");
3995
4017
  }
@@ -4046,9 +4068,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4046
4068
  }
4047
4069
  const inputMessages = resolveInputMessages(evalcase);
4048
4070
  const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
4049
- if (!id || !outcome || !inputMessages || inputMessages.length === 0) {
4071
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
4072
+ if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
4050
4073
  logError(
4051
- `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
4074
+ `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
4052
4075
  );
4053
4076
  continue;
4054
4077
  }
@@ -4126,7 +4149,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4126
4149
  guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
4127
4150
  guideline_patterns: guidelinePatterns,
4128
4151
  file_paths: allFilePaths,
4129
- criteria: outcome,
4152
+ criteria: outcome ?? "",
4130
4153
  evaluator: evalCaseEvaluatorKind,
4131
4154
  evaluators
4132
4155
  };
@@ -4439,7 +4462,7 @@ async function readTestSuiteMetadata(testFilePath) {
4439
4462
  try {
4440
4463
  const absolutePath = import_node_path8.default.resolve(testFilePath);
4441
4464
  const content = await (0, import_promises8.readFile)(absolutePath, "utf8");
4442
- const parsed = (0, import_yaml4.parse)(content);
4465
+ const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
4443
4466
  if (!isJsonObject(parsed)) {
4444
4467
  return {};
4445
4468
  }
@@ -4489,11 +4512,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4489
4512
  const config = await loadConfig(absoluteTestPath, repoRootPath);
4490
4513
  const guidelinePatterns = config?.guideline_patterns;
4491
4514
  const rawFile = await (0, import_promises8.readFile)(absoluteTestPath, "utf8");
4492
- const parsed = (0, import_yaml4.parse)(rawFile);
4493
- if (!isJsonObject(parsed)) {
4515
+ const interpolated = interpolateEnv((0, import_yaml4.parse)(rawFile), process.env);
4516
+ if (!isJsonObject(interpolated)) {
4494
4517
  throw new Error(`Invalid test file format: ${evalFilePath}`);
4495
4518
  }
4496
- const suite = parsed;
4519
+ const suite = interpolated;
4497
4520
  const datasetNameFromSuite = asString6(suite.dataset)?.trim();
4498
4521
  const fallbackDataset = import_node_path8.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
4499
4522
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
@@ -4537,9 +4560,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4537
4560
  }
4538
4561
  const testInputMessages = resolveInputMessages(evalcase);
4539
4562
  const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
4540
- if (!id || !outcome || !testInputMessages || testInputMessages.length === 0) {
4563
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
4564
+ if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
4541
4565
  logError2(
4542
- `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
4566
+ `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
4543
4567
  );
4544
4568
  continue;
4545
4569
  }
@@ -4635,7 +4659,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4635
4659
  guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path8.default.resolve(guidelinePath)),
4636
4660
  guideline_patterns: guidelinePatterns,
4637
4661
  file_paths: allFilePaths,
4638
- criteria: outcome,
4662
+ criteria: outcome ?? "",
4639
4663
  evaluator: evalCaseEvaluatorKind,
4640
4664
  evaluators,
4641
4665
  workspace: mergedWorkspace,
@@ -4775,7 +4799,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
4775
4799
  } catch {
4776
4800
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
4777
4801
  }
4778
- const parsed = (0, import_yaml4.parse)(content);
4802
+ const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
4779
4803
  if (!isJsonObject(parsed)) {
4780
4804
  throw new Error(
4781
4805
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
@@ -17714,9 +17738,11 @@ async function runEvaluatorList(options) {
17714
17738
  registry: typeRegistry
17715
17739
  };
17716
17740
  for (const evaluatorConfig of evaluators ?? []) {
17741
+ const startedAt = /* @__PURE__ */ new Date();
17717
17742
  try {
17718
17743
  const evaluatorInstance = await typeRegistry.create(evaluatorConfig, dispatchContext);
17719
17744
  const score2 = await evaluatorInstance.evaluate(evalContext);
17745
+ const endedAt = /* @__PURE__ */ new Date();
17720
17746
  const weight = evaluatorConfig.weight ?? 1;
17721
17747
  scored.push({
17722
17748
  score: score2,
@@ -17737,9 +17763,13 @@ async function runEvaluatorList(options) {
17737
17763
  evaluatorProviderRequest: score2.evaluatorRawRequest,
17738
17764
  details: score2.details,
17739
17765
  scores: mapChildResults(score2.scores),
17740
- tokenUsage: score2.tokenUsage
17766
+ tokenUsage: score2.tokenUsage,
17767
+ durationMs: endedAt.getTime() - startedAt.getTime(),
17768
+ startedAt: startedAt.toISOString(),
17769
+ endedAt: endedAt.toISOString()
17741
17770
  });
17742
17771
  } catch (error) {
17772
+ const endedAt = /* @__PURE__ */ new Date();
17743
17773
  const message = error instanceof Error ? error.message : String(error);
17744
17774
  const fallbackScore = {
17745
17775
  score: 0,
@@ -17765,7 +17795,10 @@ async function runEvaluatorList(options) {
17765
17795
  verdict: "fail",
17766
17796
  hits: [],
17767
17797
  misses: [`Evaluator '${evaluatorConfig.name ?? "unknown"}' failed: ${message}`],
17768
- reasoning: message
17798
+ reasoning: message,
17799
+ durationMs: endedAt.getTime() - startedAt.getTime(),
17800
+ startedAt: startedAt.toISOString(),
17801
+ endedAt: endedAt.toISOString()
17769
17802
  });
17770
17803
  }
17771
17804
  if (evaluatorConfig.negate === true && scored.length > 0) {