@agentv/core 2.17.0 → 2.17.1-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1166,6 +1166,12 @@ interface EvaluatorResult {
1166
1166
  readonly details?: JsonObject;
1167
1167
  /** Token usage from LLM calls made by this evaluator (optional). */
1168
1168
  readonly tokenUsage?: TokenUsage;
1169
+ /** Wall-clock duration of this judge execution in milliseconds. */
1170
+ readonly durationMs?: number;
1171
+ /** ISO 8601 UTC timestamp when this judge started executing. */
1172
+ readonly startedAt?: string;
1173
+ /** ISO 8601 UTC timestamp when this judge finished executing. */
1174
+ readonly endedAt?: string;
1169
1175
  }
1170
1176
  /**
1171
1177
  * Convenience accessor matching the Python hit_count property.
package/dist/index.d.ts CHANGED
@@ -1166,6 +1166,12 @@ interface EvaluatorResult {
1166
1166
  readonly details?: JsonObject;
1167
1167
  /** Token usage from LLM calls made by this evaluator (optional). */
1168
1168
  readonly tokenUsage?: TokenUsage;
1169
+ /** Wall-clock duration of this judge execution in milliseconds. */
1170
+ readonly durationMs?: number;
1171
+ /** ISO 8601 UTC timestamp when this judge started executing. */
1172
+ readonly startedAt?: string;
1173
+ /** ISO 8601 UTC timestamp when this judge finished executing. */
1174
+ readonly endedAt?: string;
1169
1175
  }
1170
1176
  /**
1171
1177
  * Convenience accessor matching the Python hit_count property.
package/dist/index.js CHANGED
@@ -17,7 +17,7 @@ import {
17
17
  readTextFile,
18
18
  resolveFileReference,
19
19
  resolveTargetDefinition
20
- } from "./chunk-CPPYERD2.js";
20
+ } from "./chunk-PSYFRPNT.js";
21
21
  import {
22
22
  OtlpJsonFileExporter
23
23
  } from "./chunk-HFSYZHGF.js";
@@ -151,6 +151,25 @@ import path8 from "node:path";
151
151
  import micromatch3 from "micromatch";
152
152
  import { parse as parse2 } from "yaml";
153
153
 
154
+ // src/evaluation/interpolation.ts
155
+ var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
156
+ function interpolateEnv(value, env) {
157
+ if (typeof value === "string") {
158
+ return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
159
+ }
160
+ if (Array.isArray(value)) {
161
+ return value.map((item) => interpolateEnv(item, env));
162
+ }
163
+ if (value !== null && typeof value === "object") {
164
+ const result = {};
165
+ for (const [key, val] of Object.entries(value)) {
166
+ result[key] = interpolateEnv(val, env);
167
+ }
168
+ return result;
169
+ }
170
+ return value;
171
+ }
172
+
154
173
  // src/evaluation/loaders/case-file-loader.ts
155
174
  import { readFile } from "node:fs/promises";
156
175
  import path from "node:path";
@@ -169,7 +188,8 @@ function isGlobPattern(filePath) {
169
188
  return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
170
189
  }
171
190
  function parseYamlCases(content, filePath) {
172
- const parsed = parseYaml(content);
191
+ const raw = parseYaml(content);
192
+ const parsed = interpolateEnv(raw, process.env);
173
193
  if (!Array.isArray(parsed)) {
174
194
  throw new Error(
175
195
  `External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
@@ -191,7 +211,8 @@ function parseJsonlCases(content, filePath) {
191
211
  const line = lines[i].trim();
192
212
  if (line === "") continue;
193
213
  try {
194
- const parsed = JSON.parse(line);
214
+ const raw = JSON.parse(line);
215
+ const parsed = interpolateEnv(raw, process.env);
195
216
  if (!isJsonObject(parsed)) {
196
217
  throw new Error("Expected JSON object");
197
218
  }
@@ -2340,7 +2361,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
2340
2361
  }
2341
2362
  try {
2342
2363
  const content = await readFile5(sidecarPath, "utf8");
2343
- const parsed = parseYaml2(content);
2364
+ const parsed = interpolateEnv(parseYaml2(content), process.env);
2344
2365
  if (!isJsonObject(parsed)) {
2345
2366
  logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
2346
2367
  return {};
@@ -2363,7 +2384,8 @@ function parseJsonlContent(content, filePath) {
2363
2384
  const line = lines[i].trim();
2364
2385
  if (line === "") continue;
2365
2386
  try {
2366
- const parsed = JSON.parse(line);
2387
+ const raw = JSON.parse(line);
2388
+ const parsed = interpolateEnv(raw, process.env);
2367
2389
  if (!isJsonObject(parsed)) {
2368
2390
  throw new Error("Expected JSON object");
2369
2391
  }
@@ -2420,9 +2442,10 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2420
2442
  }
2421
2443
  const inputMessages = resolveInputMessages(evalcase);
2422
2444
  const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
2423
- if (!id || !outcome || !inputMessages || inputMessages.length === 0) {
2445
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
2446
+ if (!id || !hasEvaluationSpec || !inputMessages || inputMessages.length === 0) {
2424
2447
  logError(
2425
- `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
2448
+ `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
2426
2449
  );
2427
2450
  continue;
2428
2451
  }
@@ -2500,7 +2523,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2500
2523
  guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
2501
2524
  guideline_patterns: guidelinePatterns,
2502
2525
  file_paths: allFilePaths,
2503
- criteria: outcome,
2526
+ criteria: outcome ?? "",
2504
2527
  evaluator: evalCaseEvaluatorKind,
2505
2528
  evaluators
2506
2529
  };
@@ -2813,7 +2836,7 @@ async function readTestSuiteMetadata(testFilePath) {
2813
2836
  try {
2814
2837
  const absolutePath = path8.resolve(testFilePath);
2815
2838
  const content = await readFile7(absolutePath, "utf8");
2816
- const parsed = parse2(content);
2839
+ const parsed = interpolateEnv(parse2(content), process.env);
2817
2840
  if (!isJsonObject(parsed)) {
2818
2841
  return {};
2819
2842
  }
@@ -2863,11 +2886,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2863
2886
  const config = await loadConfig(absoluteTestPath, repoRootPath);
2864
2887
  const guidelinePatterns = config?.guideline_patterns;
2865
2888
  const rawFile = await readFile7(absoluteTestPath, "utf8");
2866
- const parsed = parse2(rawFile);
2867
- if (!isJsonObject(parsed)) {
2889
+ const interpolated = interpolateEnv(parse2(rawFile), process.env);
2890
+ if (!isJsonObject(interpolated)) {
2868
2891
  throw new Error(`Invalid test file format: ${evalFilePath}`);
2869
2892
  }
2870
- const suite = parsed;
2893
+ const suite = interpolated;
2871
2894
  const datasetNameFromSuite = asString6(suite.dataset)?.trim();
2872
2895
  const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
2873
2896
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
@@ -2911,9 +2934,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2911
2934
  }
2912
2935
  const testInputMessages = resolveInputMessages(evalcase);
2913
2936
  const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
2914
- if (!id || !outcome || !testInputMessages || testInputMessages.length === 0) {
2937
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
2938
+ if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
2915
2939
  logError2(
2916
- `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
2940
+ `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
2917
2941
  );
2918
2942
  continue;
2919
2943
  }
@@ -3009,7 +3033,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3009
3033
  guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
3010
3034
  guideline_patterns: guidelinePatterns,
3011
3035
  file_paths: allFilePaths,
3012
- criteria: outcome,
3036
+ criteria: outcome ?? "",
3013
3037
  evaluator: evalCaseEvaluatorKind,
3014
3038
  evaluators,
3015
3039
  workspace: mergedWorkspace,
@@ -3149,7 +3173,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
3149
3173
  } catch {
3150
3174
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
3151
3175
  }
3152
- const parsed = parse2(content);
3176
+ const parsed = interpolateEnv(parse2(content), process.env);
3153
3177
  if (!isJsonObject(parsed)) {
3154
3178
  throw new Error(
3155
3179
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
@@ -14847,9 +14871,11 @@ async function runEvaluatorList(options) {
14847
14871
  registry: typeRegistry
14848
14872
  };
14849
14873
  for (const evaluatorConfig of evaluators ?? []) {
14874
+ const startedAt = /* @__PURE__ */ new Date();
14850
14875
  try {
14851
14876
  const evaluatorInstance = await typeRegistry.create(evaluatorConfig, dispatchContext);
14852
14877
  const score2 = await evaluatorInstance.evaluate(evalContext);
14878
+ const endedAt = /* @__PURE__ */ new Date();
14853
14879
  const weight = evaluatorConfig.weight ?? 1;
14854
14880
  scored.push({
14855
14881
  score: score2,
@@ -14870,9 +14896,13 @@ async function runEvaluatorList(options) {
14870
14896
  evaluatorProviderRequest: score2.evaluatorRawRequest,
14871
14897
  details: score2.details,
14872
14898
  scores: mapChildResults(score2.scores),
14873
- tokenUsage: score2.tokenUsage
14899
+ tokenUsage: score2.tokenUsage,
14900
+ durationMs: endedAt.getTime() - startedAt.getTime(),
14901
+ startedAt: startedAt.toISOString(),
14902
+ endedAt: endedAt.toISOString()
14874
14903
  });
14875
14904
  } catch (error) {
14905
+ const endedAt = /* @__PURE__ */ new Date();
14876
14906
  const message = error instanceof Error ? error.message : String(error);
14877
14907
  const fallbackScore = {
14878
14908
  score: 0,
@@ -14898,7 +14928,10 @@ async function runEvaluatorList(options) {
14898
14928
  verdict: "fail",
14899
14929
  hits: [],
14900
14930
  misses: [`Evaluator '${evaluatorConfig.name ?? "unknown"}' failed: ${message}`],
14901
- reasoning: message
14931
+ reasoning: message,
14932
+ durationMs: endedAt.getTime() - startedAt.getTime(),
14933
+ startedAt: startedAt.toISOString(),
14934
+ endedAt: endedAt.toISOString()
14902
14935
  });
14903
14936
  }
14904
14937
  if (evaluatorConfig.negate === true && scored.length > 0) {