agentv 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -588,9 +588,9 @@ import { readFileSync as readFileSync2 } from "node:fs";
588
588
  // src/commands/eval/index.ts
589
589
  import fg from "fast-glob";
590
590
  import { stat as stat3 } from "node:fs/promises";
591
- import path15 from "node:path";
591
+ import path19 from "node:path";
592
592
 
593
- // ../../packages/core/dist/chunk-YQBJAT5I.js
593
+ // ../../packages/core/dist/chunk-U3GEJ3K7.js
594
594
  import { constants } from "node:fs";
595
595
  import { access, readFile } from "node:fs/promises";
596
596
  import path from "node:path";
@@ -1073,8 +1073,8 @@ function getErrorMap() {
1073
1073
 
1074
1074
  // ../../node_modules/.pnpm/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
1075
1075
  var makeIssue = (params) => {
1076
- const { data, path: path19, errorMaps, issueData } = params;
1077
- const fullPath = [...path19, ...issueData.path || []];
1076
+ const { data, path: path25, errorMaps, issueData } = params;
1077
+ const fullPath = [...path25, ...issueData.path || []];
1078
1078
  const fullIssue = {
1079
1079
  ...issueData,
1080
1080
  path: fullPath
@@ -1190,11 +1190,11 @@ var errorUtil;
1190
1190
 
1191
1191
  // ../../node_modules/.pnpm/zod@3.25.76/node_modules/zod/v3/types.js
1192
1192
  var ParseInputLazyPath = class {
1193
- constructor(parent, value, path19, key2) {
1193
+ constructor(parent, value, path25, key2) {
1194
1194
  this._cachedPath = [];
1195
1195
  this.parent = parent;
1196
1196
  this.data = value;
1197
- this._path = path19;
1197
+ this._path = path25;
1198
1198
  this._key = key2;
1199
1199
  }
1200
1200
  get path() {
@@ -4636,7 +4636,7 @@ var coerce = {
4636
4636
  };
4637
4637
  var NEVER = INVALID;
4638
4638
 
4639
- // ../../packages/core/dist/chunk-YQBJAT5I.js
4639
+ // ../../packages/core/dist/chunk-U3GEJ3K7.js
4640
4640
  async function fileExists(filePath) {
4641
4641
  try {
4642
4642
  await access(filePath, constants.F_OK);
@@ -5288,12 +5288,21 @@ function isAgentProvider(provider) {
5288
5288
  }
5289
5289
 
5290
5290
  // ../../packages/core/dist/index.js
5291
+ import { readFile as readFile4 } from "node:fs/promises";
5292
+ import path62 from "node:path";
5293
+ import { parse as parse22 } from "yaml";
5291
5294
  import micromatch from "micromatch";
5295
+ import { readFile as readFile3 } from "node:fs/promises";
5296
+ import path22 from "node:path";
5297
+ import { parse as parse3 } from "yaml";
5292
5298
  import { constants as constants3 } from "node:fs";
5293
- import { access as access3, readFile as readFile3 } from "node:fs/promises";
5299
+ import { access as access3 } from "node:fs/promises";
5294
5300
  import path8 from "node:path";
5295
- import { fileURLToPath } from "node:url";
5296
- import { parse as parse3 } from "yaml";
5301
+ import path32 from "node:path";
5302
+ import { readFile as readFile22 } from "node:fs/promises";
5303
+ import path42 from "node:path";
5304
+ import { readFile as readFile32 } from "node:fs/promises";
5305
+ import path52 from "node:path";
5297
5306
 
5298
5307
  // ../../node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
5299
5308
  var _globalThis = typeof globalThis === "object" ? globalThis : global;
@@ -9747,17 +9756,17 @@ var $a = new Error("Agent definition is the prompt you give to the LLM for the a
9747
9756
  import { exec as execWithCallback } from "node:child_process";
9748
9757
  import fs from "node:fs/promises";
9749
9758
  import os2 from "node:os";
9750
- import path22 from "node:path";
9759
+ import path72 from "node:path";
9751
9760
  import { promisify as promisify2 } from "node:util";
9752
9761
  import { exec as execCallback, spawn as spawn2 } from "node:child_process";
9753
9762
  import { randomUUID } from "node:crypto";
9754
9763
  import { constants as constants22, createWriteStream } from "node:fs";
9755
9764
  import { access as access22, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
9756
9765
  import { tmpdir } from "node:os";
9757
- import path42 from "node:path";
9766
+ import path9 from "node:path";
9758
9767
  import { promisify as promisify22 } from "node:util";
9759
- import path32 from "node:path";
9760
- import path52 from "node:path";
9768
+ import path82 from "node:path";
9769
+ import path10 from "node:path";
9761
9770
 
9762
9771
  // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
9763
9772
  import { exec, spawn } from "child_process";
@@ -11696,13 +11705,12 @@ async function provisionSubagents(options) {
11696
11705
 
11697
11706
  // ../../packages/core/dist/index.js
11698
11707
  import { constants as constants32 } from "node:fs";
11699
- import { access as access32, readFile as readFile22 } from "node:fs/promises";
11700
- import path62 from "node:path";
11701
- import { parse as parse22 } from "yaml";
11702
- import { randomUUID as randomUUID2 } from "node:crypto";
11703
- import { createHash, randomUUID as randomUUID3 } from "node:crypto";
11708
+ import { access as access32, readFile as readFile5 } from "node:fs/promises";
11709
+ import path11 from "node:path";
11710
+ import { parse as parse32 } from "yaml";
11711
+ import { createHash, randomUUID as randomUUID2 } from "node:crypto";
11704
11712
  import { mkdir as mkdir22, writeFile as writeFile22 } from "node:fs/promises";
11705
- import path72 from "node:path";
11713
+ import path12 from "node:path";
11706
11714
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
11707
11715
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
11708
11716
  function isTestMessageRole(value) {
@@ -11747,42 +11755,179 @@ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
11747
11755
  function isEvaluatorKind(value) {
11748
11756
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
11749
11757
  }
11750
- var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
11751
- var ANSI_YELLOW = "\x1B[33m";
11752
- var ANSI_RESET = "\x1B[0m";
11753
- var SCHEMA_EVAL_V2 = "agentv-eval-v2";
11754
- var SCHEMA_CONFIG_V2 = "agentv-config-v2";
11755
- async function readTestSuiteMetadata(testFilePath) {
11756
- try {
11757
- const absolutePath = path8.resolve(testFilePath);
11758
- const content = await readFile3(absolutePath, "utf8");
11759
- const parsed = parse3(content);
11760
- if (!isJsonObject(parsed)) {
11761
- return {};
11758
+ function extractCodeBlocks(segments) {
11759
+ const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
11760
+ const codeBlocks = [];
11761
+ for (const segment of segments) {
11762
+ const typeValue = segment["type"];
11763
+ if (typeof typeValue !== "string" || typeValue !== "text") {
11764
+ continue;
11762
11765
  }
11763
- return { target: extractTargetFromSuite(parsed) };
11766
+ const textValue = segment["value"];
11767
+ if (typeof textValue !== "string") {
11768
+ continue;
11769
+ }
11770
+ const matches = textValue.match(CODE_BLOCK_PATTERN);
11771
+ if (matches) {
11772
+ codeBlocks.push(...matches);
11773
+ }
11774
+ }
11775
+ return codeBlocks;
11776
+ }
11777
+ function formatFileContents(parts) {
11778
+ const fileCount = parts.filter((p) => p.isFile).length;
11779
+ if (fileCount > 0) {
11780
+ return parts.map((part) => {
11781
+ if (part.isFile && part.displayPath) {
11782
+ return `<file path="${part.displayPath}">
11783
+ ${part.content}
11784
+ </file>`;
11785
+ }
11786
+ return part.content;
11787
+ }).join("\n\n");
11788
+ }
11789
+ return parts.map((p) => p.content).join(" ");
11790
+ }
11791
+ function formatSegment(segment) {
11792
+ const type = asString(segment.type);
11793
+ if (type === "text") {
11794
+ return asString(segment.value);
11795
+ }
11796
+ if (type === "guideline_ref") {
11797
+ const refPath = asString(segment.path);
11798
+ return refPath ? `<Attached: ${refPath}>` : void 0;
11799
+ }
11800
+ if (type === "file") {
11801
+ const text = asString(segment.text);
11802
+ const filePath = asString(segment.path);
11803
+ if (text && filePath) {
11804
+ return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
11805
+ }
11806
+ }
11807
+ return void 0;
11808
+ }
11809
+ function hasVisibleContent(segments) {
11810
+ return segments.some((segment) => {
11811
+ const type = asString(segment.type);
11812
+ if (type === "text") {
11813
+ const value = asString(segment.value);
11814
+ return value !== void 0 && value.trim().length > 0;
11815
+ }
11816
+ if (type === "guideline_ref") {
11817
+ return false;
11818
+ }
11819
+ if (type === "file") {
11820
+ const text = asString(segment.text);
11821
+ return text !== void 0 && text.trim().length > 0;
11822
+ }
11823
+ return false;
11824
+ });
11825
+ }
11826
+ function asString(value) {
11827
+ return typeof value === "string" ? value : void 0;
11828
+ }
11829
+ async function fileExists2(absolutePath) {
11830
+ try {
11831
+ await access3(absolutePath, constants3.F_OK);
11832
+ return true;
11764
11833
  } catch {
11765
- return {};
11834
+ return false;
11766
11835
  }
11767
11836
  }
11768
- function extractTargetFromSuite(suite) {
11769
- const execution = suite.execution;
11770
- if (execution && typeof execution === "object" && !Array.isArray(execution)) {
11771
- const executionTarget = execution.target;
11772
- if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
11773
- return executionTarget.trim();
11837
+ function resolveToAbsolutePath(candidate) {
11838
+ if (candidate instanceof URL) {
11839
+ return new URL(candidate).pathname;
11840
+ }
11841
+ if (typeof candidate === "string") {
11842
+ if (candidate.startsWith("file://")) {
11843
+ return new URL(candidate).pathname;
11774
11844
  }
11845
+ return path8.resolve(candidate);
11775
11846
  }
11776
- const targetValue = suite.target;
11777
- if (typeof targetValue === "string" && targetValue.trim().length > 0) {
11778
- return targetValue.trim();
11847
+ throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
11848
+ }
11849
+ function buildDirectoryChain2(filePath, repoRoot) {
11850
+ const directories = [];
11851
+ const seen = /* @__PURE__ */ new Set();
11852
+ const boundary = path8.resolve(repoRoot);
11853
+ let current = path8.resolve(path8.dirname(filePath));
11854
+ while (current !== void 0) {
11855
+ if (!seen.has(current)) {
11856
+ directories.push(current);
11857
+ seen.add(current);
11858
+ }
11859
+ if (current === boundary) {
11860
+ break;
11861
+ }
11862
+ const parent = path8.dirname(current);
11863
+ if (parent === current) {
11864
+ break;
11865
+ }
11866
+ current = parent;
11779
11867
  }
11780
- return void 0;
11868
+ if (!seen.has(boundary)) {
11869
+ directories.push(boundary);
11870
+ }
11871
+ return directories;
11872
+ }
11873
+ function buildSearchRoots2(evalPath, repoRoot) {
11874
+ const uniqueRoots = [];
11875
+ const addRoot = (root2) => {
11876
+ const normalized = path8.resolve(root2);
11877
+ if (!uniqueRoots.includes(normalized)) {
11878
+ uniqueRoots.push(normalized);
11879
+ }
11880
+ };
11881
+ let currentDir = path8.dirname(evalPath);
11882
+ let reachedBoundary = false;
11883
+ while (!reachedBoundary) {
11884
+ addRoot(currentDir);
11885
+ const parentDir = path8.dirname(currentDir);
11886
+ if (currentDir === repoRoot || parentDir === currentDir) {
11887
+ reachedBoundary = true;
11888
+ } else {
11889
+ currentDir = parentDir;
11890
+ }
11891
+ }
11892
+ addRoot(repoRoot);
11893
+ addRoot(process.cwd());
11894
+ return uniqueRoots;
11895
+ }
11896
+ function trimLeadingSeparators2(value) {
11897
+ const trimmed = value.replace(/^[/\\]+/, "");
11898
+ return trimmed.length > 0 ? trimmed : value;
11899
+ }
11900
+ async function resolveFileReference2(rawValue, searchRoots) {
11901
+ const displayPath = trimLeadingSeparators2(rawValue);
11902
+ const potentialPaths = [];
11903
+ if (path8.isAbsolute(rawValue)) {
11904
+ potentialPaths.push(path8.normalize(rawValue));
11905
+ }
11906
+ for (const base of searchRoots) {
11907
+ potentialPaths.push(path8.resolve(base, displayPath));
11908
+ }
11909
+ const attempted = [];
11910
+ const seen = /* @__PURE__ */ new Set();
11911
+ for (const candidate of potentialPaths) {
11912
+ const absoluteCandidate = path8.resolve(candidate);
11913
+ if (seen.has(absoluteCandidate)) {
11914
+ continue;
11915
+ }
11916
+ seen.add(absoluteCandidate);
11917
+ attempted.push(absoluteCandidate);
11918
+ if (await fileExists2(absoluteCandidate)) {
11919
+ return { displayPath, resolvedPath: absoluteCandidate, attempted };
11920
+ }
11921
+ }
11922
+ return { displayPath, attempted };
11781
11923
  }
11924
+ var SCHEMA_CONFIG_V2 = "agentv-config-v2";
11925
+ var ANSI_YELLOW = "\x1B[33m";
11926
+ var ANSI_RESET = "\x1B[0m";
11782
11927
  async function loadConfig(evalFilePath, repoRoot) {
11783
- const directories = buildDirectoryChain(evalFilePath, repoRoot);
11928
+ const directories = buildDirectoryChain2(evalFilePath, repoRoot);
11784
11929
  for (const directory of directories) {
11785
- const configPath = path8.join(directory, ".agentv", "config.yaml");
11930
+ const configPath = path22.join(directory, ".agentv", "config.yaml");
11786
11931
  if (!await fileExists2(configPath)) {
11787
11932
  continue;
11788
11933
  }
@@ -11825,71 +11970,174 @@ function isGuidelineFile(filePath, patterns) {
11825
11970
  const patternsToUse = patterns ?? [];
11826
11971
  return micromatch.isMatch(normalized, patternsToUse);
11827
11972
  }
11828
- function extractCodeBlocks(segments) {
11829
- const codeBlocks = [];
11830
- for (const segment of segments) {
11831
- const typeValue = segment["type"];
11832
- if (typeof typeValue !== "string" || typeValue !== "text") {
11833
- continue;
11834
- }
11835
- const textValue = segment["value"];
11836
- if (typeof textValue !== "string") {
11837
- continue;
11838
- }
11839
- const matches = textValue.match(CODE_BLOCK_PATTERN);
11840
- if (matches) {
11841
- codeBlocks.push(...matches);
11973
+ function extractTargetFromSuite(suite) {
11974
+ const execution = suite.execution;
11975
+ if (execution && typeof execution === "object" && !Array.isArray(execution)) {
11976
+ const executionTarget = execution.target;
11977
+ if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
11978
+ return executionTarget.trim();
11842
11979
  }
11843
11980
  }
11844
- return codeBlocks;
11981
+ const targetValue = suite.target;
11982
+ if (typeof targetValue === "string" && targetValue.trim().length > 0) {
11983
+ return targetValue.trim();
11984
+ }
11985
+ return void 0;
11845
11986
  }
11846
- async function processMessages(options) {
11847
- const {
11848
- messages,
11849
- searchRoots,
11850
- repoRootPath,
11851
- guidelinePatterns,
11852
- guidelinePaths,
11853
- textParts,
11854
- messageType,
11855
- verbose
11856
- } = options;
11857
- const segments = [];
11858
- for (const message of messages) {
11859
- const content = message.content;
11860
- if (typeof content === "string") {
11861
- segments.push({ type: "text", value: content });
11862
- if (textParts) {
11863
- textParts.push(content);
11864
- }
11987
+ function logWarning(message) {
11988
+ console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
11989
+ }
11990
+ var ANSI_YELLOW2 = "\x1B[33m";
11991
+ var ANSI_RESET2 = "\x1B[0m";
11992
+ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
11993
+ const execution = rawEvalCase.execution;
11994
+ const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
11995
+ if (candidateEvaluators === void 0) {
11996
+ return void 0;
11997
+ }
11998
+ if (!Array.isArray(candidateEvaluators)) {
11999
+ logWarning2(`Skipping evaluators for '${evalId}': expected array`);
12000
+ return void 0;
12001
+ }
12002
+ const evaluators = [];
12003
+ for (const rawEvaluator of candidateEvaluators) {
12004
+ if (!isJsonObject2(rawEvaluator)) {
12005
+ logWarning2(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
11865
12006
  continue;
11866
12007
  }
11867
- for (const rawSegment of content) {
11868
- if (!isJsonObject(rawSegment)) {
12008
+ const name = asString2(rawEvaluator.name);
12009
+ const typeValue = rawEvaluator.type;
12010
+ if (!name || !isEvaluatorKind(typeValue)) {
12011
+ logWarning2(`Skipping evaluator with invalid name/type in '${evalId}'`);
12012
+ continue;
12013
+ }
12014
+ if (typeValue === "code") {
12015
+ const script = asString2(rawEvaluator.script);
12016
+ if (!script) {
12017
+ logWarning2(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
11869
12018
  continue;
11870
12019
  }
11871
- const segmentType = asString(rawSegment.type);
11872
- if (segmentType === "file") {
11873
- const rawValue = asString(rawSegment.value);
11874
- if (!rawValue) {
11875
- continue;
12020
+ const cwd = asString2(rawEvaluator.cwd);
12021
+ let resolvedCwd;
12022
+ if (cwd) {
12023
+ const resolved = await resolveFileReference2(cwd, searchRoots);
12024
+ if (resolved.resolvedPath) {
12025
+ resolvedCwd = path32.resolve(resolved.resolvedPath);
12026
+ } else {
12027
+ logWarning2(
12028
+ `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
12029
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12030
+ );
12031
+ }
12032
+ } else {
12033
+ resolvedCwd = searchRoots[0];
12034
+ }
12035
+ evaluators.push({
12036
+ name,
12037
+ type: "code",
12038
+ script,
12039
+ cwd,
12040
+ resolvedCwd
12041
+ });
12042
+ continue;
12043
+ }
12044
+ const prompt = asString2(rawEvaluator.prompt);
12045
+ let promptPath;
12046
+ if (prompt) {
12047
+ const resolved = await resolveFileReference2(prompt, searchRoots);
12048
+ if (resolved.resolvedPath) {
12049
+ promptPath = path32.resolve(resolved.resolvedPath);
12050
+ } else {
12051
+ logWarning2(
12052
+ `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
12053
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12054
+ );
12055
+ }
12056
+ }
12057
+ const _model = asString2(rawEvaluator.model);
12058
+ evaluators.push({
12059
+ name,
12060
+ type: "llm_judge",
12061
+ prompt,
12062
+ promptPath
12063
+ });
12064
+ }
12065
+ return evaluators.length > 0 ? evaluators : void 0;
12066
+ }
12067
+ function coerceEvaluator(candidate, contextId) {
12068
+ if (typeof candidate !== "string") {
12069
+ return void 0;
12070
+ }
12071
+ if (isEvaluatorKind(candidate)) {
12072
+ return candidate;
12073
+ }
12074
+ logWarning2(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
12075
+ return void 0;
12076
+ }
12077
+ function asString2(value) {
12078
+ return typeof value === "string" ? value : void 0;
12079
+ }
12080
+ function isJsonObject2(value) {
12081
+ return typeof value === "object" && value !== null && !Array.isArray(value);
12082
+ }
12083
+ function logWarning2(message, details) {
12084
+ if (details && details.length > 0) {
12085
+ const detailBlock = details.join("\n");
12086
+ console.warn(`${ANSI_YELLOW2}Warning: ${message}
12087
+ ${detailBlock}${ANSI_RESET2}`);
12088
+ } else {
12089
+ console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
12090
+ }
12091
+ }
12092
+ var ANSI_YELLOW3 = "\x1B[33m";
12093
+ var ANSI_RESET3 = "\x1B[0m";
12094
+ async function processMessages(options) {
12095
+ const {
12096
+ messages,
12097
+ searchRoots,
12098
+ repoRootPath,
12099
+ guidelinePatterns,
12100
+ guidelinePaths,
12101
+ textParts,
12102
+ messageType,
12103
+ verbose
12104
+ } = options;
12105
+ const segments = [];
12106
+ for (const message of messages) {
12107
+ const content = message.content;
12108
+ if (typeof content === "string") {
12109
+ segments.push({ type: "text", value: content });
12110
+ if (textParts) {
12111
+ textParts.push(content);
12112
+ }
12113
+ continue;
12114
+ }
12115
+ for (const rawSegment of content) {
12116
+ if (!isJsonObject(rawSegment)) {
12117
+ continue;
12118
+ }
12119
+ const segmentType = asString3(rawSegment.type);
12120
+ if (segmentType === "file") {
12121
+ const rawValue = asString3(rawSegment.value);
12122
+ if (!rawValue) {
12123
+ continue;
11876
12124
  }
11877
- const { displayPath, resolvedPath, attempted } = await resolveFileReference(
12125
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
11878
12126
  rawValue,
11879
12127
  searchRoots
11880
12128
  );
11881
12129
  if (!resolvedPath) {
11882
12130
  const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
11883
12131
  const context2 = messageType === "input" ? "" : " in expected_messages";
11884
- logWarning(`File not found${context2}: ${displayPath}`, attempts);
12132
+ logWarning3(`File not found${context2}: ${displayPath}`, attempts);
11885
12133
  continue;
11886
12134
  }
11887
12135
  try {
11888
- const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
12136
+ const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
11889
12137
  if (messageType === "input" && guidelinePatterns && guidelinePaths) {
11890
- const relativeToRepo = path8.relative(repoRootPath, resolvedPath);
12138
+ const relativeToRepo = path42.relative(repoRootPath, resolvedPath);
11891
12139
  if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
11892
- guidelinePaths.push(path8.resolve(resolvedPath));
12140
+ guidelinePaths.push(path42.resolve(resolvedPath));
11893
12141
  if (verbose) {
11894
12142
  console.log(` [Guideline] Found: ${displayPath}`);
11895
12143
  console.log(` Resolved to: ${resolvedPath}`);
@@ -11901,7 +12149,7 @@ async function processMessages(options) {
11901
12149
  type: "file",
11902
12150
  path: displayPath,
11903
12151
  text: fileContent,
11904
- resolvedPath: path8.resolve(resolvedPath)
12152
+ resolvedPath: path42.resolve(resolvedPath)
11905
12153
  });
11906
12154
  if (verbose) {
11907
12155
  const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -11910,7 +12158,7 @@ async function processMessages(options) {
11910
12158
  }
11911
12159
  } catch (error) {
11912
12160
  const context2 = messageType === "input" ? "" : " expected output";
11913
- logWarning(`Could not read${context2} file ${resolvedPath}: ${error.message}`);
12161
+ logWarning3(`Could not read${context2} file ${resolvedPath}: ${error.message}`);
11914
12162
  }
11915
12163
  continue;
11916
12164
  }
@@ -11924,201 +12172,113 @@ async function processMessages(options) {
11924
12172
  }
11925
12173
  return segments;
11926
12174
  }
11927
- async function loadEvalCases(evalFilePath, repoRoot, options) {
11928
- const verbose = options?.verbose ?? false;
11929
- const evalIdFilter = options?.evalId;
11930
- const absoluteTestPath = path8.resolve(evalFilePath);
11931
- if (!await fileExists2(absoluteTestPath)) {
11932
- throw new Error(`Test file not found: ${evalFilePath}`);
11933
- }
11934
- const repoRootPath = resolveToAbsolutePath(repoRoot);
11935
- const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
11936
- const config = await loadConfig(absoluteTestPath, repoRootPath);
11937
- const guidelinePatterns = config?.guideline_patterns;
11938
- const rawFile = await readFile3(absoluteTestPath, "utf8");
11939
- const parsed = parse3(rawFile);
11940
- if (!isJsonObject(parsed)) {
11941
- throw new Error(`Invalid test file format: ${evalFilePath}`);
11942
- }
11943
- const suite = parsed;
11944
- const datasetNameFromSuite = asString(suite.dataset)?.trim();
11945
- const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
11946
- const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
11947
- const schema = suite.$schema;
11948
- if (schema !== SCHEMA_EVAL_V2) {
11949
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
11950
- Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11951
- throw new Error(message);
12175
+ async function resolveAssistantContent(content, searchRoots, verbose) {
12176
+ if (typeof content === "string") {
12177
+ return content;
11952
12178
  }
11953
- const rawTestcases = suite.evalcases;
11954
- if (!Array.isArray(rawTestcases)) {
11955
- throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
12179
+ if (!content) {
12180
+ return "";
11956
12181
  }
11957
- const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
11958
- const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
11959
- const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
11960
- const results = [];
11961
- for (const rawEvalcase of rawTestcases) {
11962
- if (!isJsonObject(rawEvalcase)) {
11963
- logWarning("Skipping invalid eval case entry (expected object)");
12182
+ const parts = [];
12183
+ for (const entry of content) {
12184
+ if (typeof entry === "string") {
12185
+ parts.push({ content: entry, isFile: false });
11964
12186
  continue;
11965
12187
  }
11966
- const evalcase = rawEvalcase;
11967
- const id = asString(evalcase.id);
11968
- if (evalIdFilter && id !== evalIdFilter) {
12188
+ if (!isJsonObject(entry)) {
11969
12189
  continue;
11970
12190
  }
11971
- const conversationId = asString(evalcase.conversation_id);
11972
- const outcome = asString(evalcase.outcome);
11973
- const inputMessagesValue = evalcase.input_messages;
11974
- const expectedMessagesValue = evalcase.expected_messages;
11975
- if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
11976
- logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
12191
+ const segmentType = asString3(entry.type);
12192
+ if (segmentType === "file") {
12193
+ const rawValue = asString3(entry.value);
12194
+ if (!rawValue) {
12195
+ continue;
12196
+ }
12197
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
12198
+ rawValue,
12199
+ searchRoots
12200
+ );
12201
+ if (!resolvedPath) {
12202
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
12203
+ logWarning3(`File not found in expected_messages: ${displayPath}`, attempts);
12204
+ continue;
12205
+ }
12206
+ try {
12207
+ const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
12208
+ parts.push({ content: fileContent, isFile: true, displayPath });
12209
+ if (verbose) {
12210
+ console.log(` [Expected Assistant File] Found: ${displayPath}`);
12211
+ console.log(` Resolved to: ${resolvedPath}`);
12212
+ }
12213
+ } catch (error) {
12214
+ logWarning3(`Could not read file ${resolvedPath}: ${error.message}`);
12215
+ }
11977
12216
  continue;
11978
12217
  }
11979
- const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
11980
- const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
11981
- const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
11982
- if (hasExpectedMessages && expectedMessages.length === 0) {
11983
- logWarning(`No valid expected message found for eval case: ${id}`);
12218
+ const textValue = asString3(entry.text);
12219
+ if (typeof textValue === "string") {
12220
+ parts.push({ content: textValue, isFile: false });
11984
12221
  continue;
11985
12222
  }
11986
- if (expectedMessages.length > 1) {
11987
- logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
11988
- }
11989
- const guidelinePaths = [];
11990
- const inputTextParts = [];
11991
- const inputSegments = await processMessages({
11992
- messages: inputMessages,
11993
- searchRoots,
11994
- repoRootPath,
11995
- guidelinePatterns,
11996
- guidelinePaths,
11997
- textParts: inputTextParts,
11998
- messageType: "input",
11999
- verbose
12000
- });
12001
- const outputSegments = hasExpectedMessages ? await processMessages({
12002
- messages: expectedMessages,
12003
- searchRoots,
12004
- repoRootPath,
12005
- guidelinePatterns,
12006
- messageType: "output",
12007
- verbose
12008
- }) : [];
12009
- const codeSnippets = extractCodeBlocks(inputSegments);
12010
- const expectedContent = expectedMessages[0]?.content;
12011
- const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
12012
- const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
12013
- const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
12014
- const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
12015
- const userFilePaths = [];
12016
- for (const segment of inputSegments) {
12017
- if (segment.type === "file" && typeof segment.resolvedPath === "string") {
12018
- userFilePaths.push(segment.resolvedPath);
12019
- }
12020
- }
12021
- const allFilePaths = [
12022
- ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
12023
- ...userFilePaths
12024
- ];
12025
- const testCase = {
12026
- id,
12027
- dataset: datasetName,
12028
- conversation_id: conversationId,
12029
- question,
12030
- input_messages: inputMessages,
12031
- input_segments: inputSegments,
12032
- output_segments: outputSegments,
12033
- reference_answer: referenceAnswer,
12034
- guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
12035
- guideline_patterns: guidelinePatterns,
12036
- file_paths: allFilePaths,
12037
- code_snippets: codeSnippets,
12038
- expected_outcome: outcome,
12039
- evaluator: evalCaseEvaluatorKind,
12040
- evaluators
12041
- };
12042
- if (verbose) {
12043
- console.log(`
12044
- [Eval Case: ${id}]`);
12045
- if (testCase.guideline_paths.length > 0) {
12046
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
12047
- for (const guidelinePath of testCase.guideline_paths) {
12048
- console.log(` - ${guidelinePath}`);
12049
- }
12050
- } else {
12051
- console.log(" No guidelines found");
12052
- }
12223
+ const valueValue = asString3(entry.value);
12224
+ if (typeof valueValue === "string") {
12225
+ parts.push({ content: valueValue, isFile: false });
12226
+ continue;
12053
12227
  }
12054
- results.push(testCase);
12228
+ parts.push({ content: JSON.stringify(entry), isFile: false });
12055
12229
  }
12056
- return results;
12230
+ return formatFileContents(parts);
12057
12231
  }
12058
- function needsRoleMarkers(messages, processedSegmentsByMessage) {
12059
- if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
12060
- return true;
12061
- }
12062
- let messagesWithContent = 0;
12063
- for (const segments of processedSegmentsByMessage) {
12064
- if (hasVisibleContent(segments)) {
12065
- messagesWithContent++;
12066
- }
12067
- }
12068
- return messagesWithContent > 1;
12232
+ function asString3(value) {
12233
+ return typeof value === "string" ? value : void 0;
12069
12234
  }
12070
- function hasVisibleContent(segments) {
12071
- return segments.some((segment) => {
12072
- const type = asString(segment.type);
12073
- if (type === "text") {
12074
- const value = asString(segment.value);
12075
- return value !== void 0 && value.trim().length > 0;
12076
- }
12077
- if (type === "guideline_ref") {
12078
- return false;
12079
- }
12080
- if (type === "file") {
12081
- const text = asString(segment.text);
12082
- return text !== void 0 && text.trim().length > 0;
12083
- }
12084
- return false;
12085
- });
12235
+ function cloneJsonObject(source2) {
12236
+ const entries = Object.entries(source2).map(([key2, value]) => [key2, cloneJsonValue(value)]);
12237
+ return Object.fromEntries(entries);
12086
12238
  }
12087
- function formatSegment(segment) {
12088
- const type = asString(segment.type);
12089
- if (type === "text") {
12090
- return asString(segment.value);
12239
+ function cloneJsonValue(value) {
12240
+ if (value === null) {
12241
+ return null;
12091
12242
  }
12092
- if (type === "guideline_ref") {
12093
- const refPath = asString(segment.path);
12094
- return refPath ? `<Attached: ${refPath}>` : void 0;
12243
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
12244
+ return value;
12095
12245
  }
12096
- if (type === "file") {
12097
- const text = asString(segment.text);
12098
- const filePath = asString(segment.path);
12099
- if (text && filePath) {
12100
- return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
12101
- }
12246
+ if (Array.isArray(value)) {
12247
+ return value.map((item) => cloneJsonValue(item));
12248
+ }
12249
+ if (typeof value === "object") {
12250
+ return cloneJsonObject(value);
12251
+ }
12252
+ return value;
12253
+ }
12254
+ function logWarning3(message, details) {
12255
+ if (details && details.length > 0) {
12256
+ const detailBlock = details.join("\n");
12257
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}
12258
+ ${detailBlock}${ANSI_RESET3}`);
12259
+ } else {
12260
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
12102
12261
  }
12103
- return void 0;
12104
12262
  }
12263
+ var ANSI_YELLOW4 = "\x1B[33m";
12264
+ var ANSI_RESET4 = "\x1B[0m";
12105
12265
  async function buildPromptInputs(testCase) {
12106
12266
  const guidelineParts = [];
12107
12267
  for (const rawPath of testCase.guideline_paths) {
12108
- const absolutePath = path8.resolve(rawPath);
12268
+ const absolutePath = path52.resolve(rawPath);
12109
12269
  if (!await fileExists2(absolutePath)) {
12110
- logWarning(`Could not read guideline file ${absolutePath}: file does not exist`);
12270
+ logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
12111
12271
  continue;
12112
12272
  }
12113
12273
  try {
12114
- const content = (await readFile3(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
12274
+ const content = (await readFile32(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
12115
12275
  guidelineParts.push({
12116
12276
  content,
12117
12277
  isFile: true,
12118
- displayPath: path8.basename(absolutePath)
12278
+ displayPath: path52.basename(absolutePath)
12119
12279
  });
12120
12280
  } catch (error) {
12121
- logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
12281
+ logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
12122
12282
  }
12123
12283
  }
12124
12284
  const guidelines = formatFileContents(guidelineParts);
@@ -12142,9 +12302,9 @@ async function buildPromptInputs(testCase) {
12142
12302
  messageSegments.push({ type: "text", value: segment });
12143
12303
  }
12144
12304
  } else if (isJsonObject(segment)) {
12145
- const type = asString(segment.type);
12305
+ const type = asString4(segment.type);
12146
12306
  if (type === "file") {
12147
- const value = asString(segment.value);
12307
+ const value = asString4(segment.value);
12148
12308
  if (!value) continue;
12149
12309
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
12150
12310
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -12155,7 +12315,7 @@ async function buildPromptInputs(testCase) {
12155
12315
  messageSegments.push({ type: "file", text: fileText, path: value });
12156
12316
  }
12157
12317
  } else if (type === "text") {
12158
- const textValue = asString(segment.value);
12318
+ const textValue = asString4(segment.value);
12159
12319
  if (textValue && textValue.trim().length > 0) {
12160
12320
  messageSegments.push({ type: "text", value: textValue });
12161
12321
  }
@@ -12211,6 +12371,18 @@ ${messageContent}`);
12211
12371
  }) : void 0;
12212
12372
  return { question, guidelines, chatPrompt };
12213
12373
  }
12374
+ function needsRoleMarkers(messages, processedSegmentsByMessage) {
12375
+ if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
12376
+ return true;
12377
+ }
12378
+ let messagesWithContent = 0;
12379
+ for (const segments of processedSegmentsByMessage) {
12380
+ if (hasVisibleContent(segments)) {
12381
+ messagesWithContent++;
12382
+ }
12383
+ }
12384
+ return messagesWithContent > 1;
12385
+ }
12214
12386
  function buildChatPromptFromSegments(options) {
12215
12387
  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
12216
12388
  if (messages.length === 0) {
@@ -12263,229 +12435,187 @@ ${guidelineContent.trim()}`);
12263
12435
  for (const segment of segments) {
12264
12436
  if (segment.type === "guideline_ref") {
12265
12437
  continue;
12266
- }
12267
- const formatted = formatSegment(segment);
12268
- if (formatted) {
12269
- const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
12270
- if (isGuidelineRef) {
12271
- continue;
12272
- }
12273
- contentParts.push(formatted);
12274
- }
12275
- }
12276
- if (contentParts.length === 0) {
12277
- continue;
12278
- }
12279
- chatPrompt.push({
12280
- role,
12281
- content: contentParts.join("\n"),
12282
- ...name ? { name } : {}
12283
- });
12284
- }
12285
- return chatPrompt.length > 0 ? chatPrompt : void 0;
12286
- }
12287
- async function fileExists2(absolutePath) {
12288
- try {
12289
- await access3(absolutePath, constants3.F_OK);
12290
- return true;
12291
- } catch {
12292
- return false;
12293
- }
12294
- }
12295
- function resolveToAbsolutePath(candidate) {
12296
- if (candidate instanceof URL) {
12297
- return fileURLToPath(candidate);
12298
- }
12299
- if (typeof candidate === "string") {
12300
- if (candidate.startsWith("file://")) {
12301
- return fileURLToPath(new URL(candidate));
12302
- }
12303
- return path8.resolve(candidate);
12304
- }
12305
- throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
12306
- }
12307
- function asString(value) {
12308
- return typeof value === "string" ? value : void 0;
12309
- }
12310
- function cloneJsonObject(source2) {
12311
- const entries = Object.entries(source2).map(([key2, value]) => [key2, cloneJsonValue(value)]);
12312
- return Object.fromEntries(entries);
12313
- }
12314
- function cloneJsonValue(value) {
12315
- if (value === null) {
12316
- return null;
12317
- }
12318
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
12319
- return value;
12320
- }
12321
- if (Array.isArray(value)) {
12322
- return value.map((item) => cloneJsonValue(item));
12323
- }
12324
- return cloneJsonObject(value);
12325
- }
12326
- function formatFileContents(parts) {
12327
- const fileCount = parts.filter((p) => p.isFile).length;
12328
- if (fileCount > 0) {
12329
- return parts.map((part) => {
12330
- if (part.isFile && part.displayPath) {
12331
- return `<file path="${part.displayPath}">
12332
- ${part.content}
12333
- </file>`;
12334
- }
12335
- return part.content;
12336
- }).join("\n\n");
12337
- }
12338
- return parts.map((p) => p.content).join(" ");
12339
- }
12340
- async function resolveAssistantContent(content, searchRoots, verbose) {
12341
- if (typeof content === "string") {
12342
- return content;
12343
- }
12344
- if (!content) {
12345
- return "";
12346
- }
12347
- const parts = [];
12348
- for (const entry of content) {
12349
- if (typeof entry === "string") {
12350
- parts.push({ content: entry, isFile: false });
12351
- continue;
12352
- }
12353
- if (!isJsonObject(entry)) {
12354
- continue;
12355
- }
12356
- const segmentType = asString(entry.type);
12357
- if (segmentType === "file") {
12358
- const rawValue = asString(entry.value);
12359
- if (!rawValue) {
12360
- continue;
12361
- }
12362
- const { displayPath, resolvedPath, attempted } = await resolveFileReference(
12363
- rawValue,
12364
- searchRoots
12365
- );
12366
- if (!resolvedPath) {
12367
- const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
12368
- logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
12369
- continue;
12370
- }
12371
- try {
12372
- const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
12373
- parts.push({ content: fileContent, isFile: true, displayPath });
12374
- if (verbose) {
12375
- console.log(` [Expected Assistant File] Found: ${displayPath}`);
12376
- console.log(` Resolved to: ${resolvedPath}`);
12438
+ }
12439
+ const formatted = formatSegment(segment);
12440
+ if (formatted) {
12441
+ const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
12442
+ if (isGuidelineRef) {
12443
+ continue;
12377
12444
  }
12378
- } catch (error) {
12379
- logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
12445
+ contentParts.push(formatted);
12380
12446
  }
12381
- continue;
12382
12447
  }
12383
- const textValue = asString(entry.text);
12384
- if (typeof textValue === "string") {
12385
- parts.push({ content: textValue, isFile: false });
12448
+ if (contentParts.length === 0) {
12386
12449
  continue;
12387
12450
  }
12388
- const valueValue = asString(entry.value);
12389
- if (typeof valueValue === "string") {
12390
- parts.push({ content: valueValue, isFile: false });
12391
- continue;
12451
+ chatPrompt.push({
12452
+ role,
12453
+ content: contentParts.join("\n"),
12454
+ ...name ? { name } : {}
12455
+ });
12456
+ }
12457
+ return chatPrompt.length > 0 ? chatPrompt : void 0;
12458
+ }
12459
+ function asString4(value) {
12460
+ return typeof value === "string" ? value : void 0;
12461
+ }
12462
+ function logWarning4(message) {
12463
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
12464
+ }
12465
+ var ANSI_YELLOW5 = "\x1B[33m";
12466
+ var ANSI_RESET5 = "\x1B[0m";
12467
+ var SCHEMA_EVAL_V2 = "agentv-eval-v2";
12468
+ async function readTestSuiteMetadata(testFilePath) {
12469
+ try {
12470
+ const absolutePath = path62.resolve(testFilePath);
12471
+ const content = await readFile4(absolutePath, "utf8");
12472
+ const parsed = parse22(content);
12473
+ if (!isJsonObject(parsed)) {
12474
+ return {};
12392
12475
  }
12393
- parts.push({ content: JSON.stringify(entry), isFile: false });
12476
+ return { target: extractTargetFromSuite(parsed) };
12477
+ } catch {
12478
+ return {};
12394
12479
  }
12395
- return formatFileContents(parts);
12396
12480
  }
12397
- async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
12398
- const execution = rawEvalCase.execution;
12399
- const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
12400
- if (candidateEvaluators === void 0) {
12401
- return void 0;
12481
+ async function loadEvalCases(evalFilePath, repoRoot, options) {
12482
+ const verbose = options?.verbose ?? false;
12483
+ const evalIdFilter = options?.evalId;
12484
+ const absoluteTestPath = path62.resolve(evalFilePath);
12485
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
12486
+ const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
12487
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
12488
+ const guidelinePatterns = config?.guideline_patterns;
12489
+ const rawFile = await readFile4(absoluteTestPath, "utf8");
12490
+ const parsed = parse22(rawFile);
12491
+ if (!isJsonObject(parsed)) {
12492
+ throw new Error(`Invalid test file format: ${evalFilePath}`);
12402
12493
  }
12403
- if (!Array.isArray(candidateEvaluators)) {
12404
- logWarning(`Skipping evaluators for '${evalId}': expected array`);
12405
- return void 0;
12494
+ const suite = parsed;
12495
+ const datasetNameFromSuite = asString5(suite.dataset)?.trim();
12496
+ const fallbackDataset = path62.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
12497
+ const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
12498
+ const schema = suite.$schema;
12499
+ if (schema !== SCHEMA_EVAL_V2) {
12500
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
12501
+ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
12502
+ throw new Error(message);
12406
12503
  }
12407
- const evaluators = [];
12408
- for (const rawEvaluator of candidateEvaluators) {
12409
- if (!isJsonObject(rawEvaluator)) {
12410
- logWarning(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
12504
+ const rawTestcases = suite.evalcases;
12505
+ if (!Array.isArray(rawTestcases)) {
12506
+ throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
12507
+ }
12508
+ const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
12509
+ const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
12510
+ const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
12511
+ const results = [];
12512
+ for (const rawEvalcase of rawTestcases) {
12513
+ if (!isJsonObject(rawEvalcase)) {
12514
+ logWarning5("Skipping invalid eval case entry (expected object)");
12411
12515
  continue;
12412
12516
  }
12413
- const name = asString(rawEvaluator.name);
12414
- const typeValue = rawEvaluator.type;
12415
- if (!name || !isEvaluatorKind(typeValue)) {
12416
- logWarning(`Skipping evaluator with invalid name/type in '${evalId}'`);
12517
+ const evalcase = rawEvalcase;
12518
+ const id = asString5(evalcase.id);
12519
+ if (evalIdFilter && id !== evalIdFilter) {
12417
12520
  continue;
12418
12521
  }
12419
- if (typeValue === "code") {
12420
- const script = asString(rawEvaluator.script);
12421
- if (!script) {
12422
- logWarning(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
12423
- continue;
12424
- }
12425
- const cwd = asString(rawEvaluator.cwd);
12426
- let resolvedCwd;
12427
- if (cwd) {
12428
- const resolved = await resolveFileReference(cwd, searchRoots);
12429
- if (resolved.resolvedPath) {
12430
- resolvedCwd = path8.resolve(resolved.resolvedPath);
12431
- } else {
12432
- logWarning(
12433
- `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
12434
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12435
- );
12436
- }
12437
- } else {
12438
- resolvedCwd = searchRoots[0];
12439
- }
12440
- evaluators.push({
12441
- name,
12442
- type: "code",
12443
- script,
12444
- cwd,
12445
- resolvedCwd
12446
- });
12522
+ const conversationId = asString5(evalcase.conversation_id);
12523
+ const outcome = asString5(evalcase.outcome);
12524
+ const inputMessagesValue = evalcase.input_messages;
12525
+ const expectedMessagesValue = evalcase.expected_messages;
12526
+ if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
12527
+ logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
12447
12528
  continue;
12448
12529
  }
12449
- const prompt = asString(rawEvaluator.prompt);
12450
- let promptPath;
12451
- if (prompt) {
12452
- const resolved = await resolveFileReference(prompt, searchRoots);
12453
- if (resolved.resolvedPath) {
12454
- promptPath = path8.resolve(resolved.resolvedPath);
12530
+ const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
12531
+ const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
12532
+ const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
12533
+ if (hasExpectedMessages && expectedMessages.length === 0) {
12534
+ logWarning5(`No valid expected message found for eval case: ${id}`);
12535
+ continue;
12536
+ }
12537
+ if (expectedMessages.length > 1) {
12538
+ logWarning5(`Multiple expected messages found for eval case: ${id}, using first`);
12539
+ }
12540
+ const guidelinePaths = [];
12541
+ const inputTextParts = [];
12542
+ const inputSegments = await processMessages({
12543
+ messages: inputMessages,
12544
+ searchRoots,
12545
+ repoRootPath,
12546
+ guidelinePatterns,
12547
+ guidelinePaths,
12548
+ textParts: inputTextParts,
12549
+ messageType: "input",
12550
+ verbose
12551
+ });
12552
+ const outputSegments = hasExpectedMessages ? await processMessages({
12553
+ messages: expectedMessages,
12554
+ searchRoots,
12555
+ repoRootPath,
12556
+ guidelinePatterns,
12557
+ messageType: "output",
12558
+ verbose
12559
+ }) : [];
12560
+ const codeSnippets = extractCodeBlocks(inputSegments);
12561
+ const expectedContent = expectedMessages[0]?.content;
12562
+ const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
12563
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
12564
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
12565
+ const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
12566
+ const userFilePaths = [];
12567
+ for (const segment of inputSegments) {
12568
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
12569
+ userFilePaths.push(segment.resolvedPath);
12570
+ }
12571
+ }
12572
+ const allFilePaths = [
12573
+ ...guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
12574
+ ...userFilePaths
12575
+ ];
12576
+ const testCase = {
12577
+ id,
12578
+ dataset: datasetName,
12579
+ conversation_id: conversationId,
12580
+ question,
12581
+ input_messages: inputMessages,
12582
+ input_segments: inputSegments,
12583
+ output_segments: outputSegments,
12584
+ reference_answer: referenceAnswer,
12585
+ guideline_paths: guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
12586
+ guideline_patterns: guidelinePatterns,
12587
+ file_paths: allFilePaths,
12588
+ code_snippets: codeSnippets,
12589
+ expected_outcome: outcome,
12590
+ evaluator: evalCaseEvaluatorKind,
12591
+ evaluators
12592
+ };
12593
+ if (verbose) {
12594
+ console.log(`
12595
+ [Eval Case: ${id}]`);
12596
+ if (testCase.guideline_paths.length > 0) {
12597
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
12598
+ for (const guidelinePath of testCase.guideline_paths) {
12599
+ console.log(` - ${guidelinePath}`);
12600
+ }
12455
12601
  } else {
12456
- logWarning(
12457
- `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
12458
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12459
- );
12602
+ console.log(" No guidelines found");
12460
12603
  }
12461
12604
  }
12462
- const model = asString(rawEvaluator.model);
12463
- evaluators.push({
12464
- name,
12465
- type: "llm_judge",
12466
- prompt,
12467
- promptPath
12468
- });
12605
+ results.push(testCase);
12469
12606
  }
12470
- return evaluators.length > 0 ? evaluators : void 0;
12607
+ return results;
12471
12608
  }
12472
- function coerceEvaluator(candidate, contextId) {
12473
- if (typeof candidate !== "string") {
12474
- return void 0;
12475
- }
12476
- if (isEvaluatorKind(candidate)) {
12477
- return candidate;
12478
- }
12479
- logWarning(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
12480
- return void 0;
12609
+ function asString5(value) {
12610
+ return typeof value === "string" ? value : void 0;
12481
12611
  }
12482
- function logWarning(message, details) {
12612
+ function logWarning5(message, details) {
12483
12613
  if (details && details.length > 0) {
12484
12614
  const detailBlock = details.join("\n");
12485
- console.warn(`${ANSI_YELLOW}Warning: ${message}
12486
- ${detailBlock}${ANSI_RESET}`);
12615
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
12616
+ ${detailBlock}${ANSI_RESET5}`);
12487
12617
  } else {
12488
- console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
12618
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
12489
12619
  }
12490
12620
  }
12491
12621
  var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
@@ -12514,9 +12644,8 @@ function buildChatPrompt(request) {
12514
12644
  }
12515
12645
  function resolveSystemContent(request) {
12516
12646
  const systemSegments = [];
12517
- const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
12518
- if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
12519
- systemSegments.push(metadataSystemPrompt.trim());
12647
+ if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
12648
+ systemSegments.push(request.systemPrompt.trim());
12520
12649
  } else {
12521
12650
  systemSegments.push(DEFAULT_SYSTEM_PROMPT);
12522
12651
  }
@@ -12941,7 +13070,7 @@ function normalizeInputFiles(inputFiles) {
12941
13070
  }
12942
13071
  const unique = /* @__PURE__ */ new Map();
12943
13072
  for (const inputFile of inputFiles) {
12944
- const absolutePath = path22.resolve(inputFile);
13073
+ const absolutePath = path72.resolve(inputFile);
12945
13074
  if (!unique.has(absolutePath)) {
12946
13075
  unique.set(absolutePath, absolutePath);
12947
13076
  }
@@ -12955,7 +13084,7 @@ function formatFileList(files, template) {
12955
13084
  const formatter = template ?? "{path}";
12956
13085
  return files.map((filePath) => {
12957
13086
  const escapedPath = shellEscape(filePath);
12958
- const escapedName = shellEscape(path22.basename(filePath));
13087
+ const escapedName = shellEscape(path72.basename(filePath));
12959
13088
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
12960
13089
  }).join(" ");
12961
13090
  }
@@ -12979,7 +13108,7 @@ function generateOutputFilePath(evalCaseId) {
12979
13108
  const safeEvalId = evalCaseId || "unknown";
12980
13109
  const timestamp = Date.now();
12981
13110
  const random = Math.random().toString(36).substring(2, 9);
12982
- return path22.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
13111
+ return path72.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
12983
13112
  }
12984
13113
  function formatTimeoutSuffix(timeoutMs) {
12985
13114
  if (!timeoutMs || timeoutMs <= 0) {
@@ -13056,7 +13185,7 @@ function normalizeInputFiles2(inputFiles) {
13056
13185
  }
13057
13186
  const deduped = /* @__PURE__ */ new Map();
13058
13187
  for (const inputFile of inputFiles) {
13059
- const absolutePath = path32.resolve(inputFile);
13188
+ const absolutePath = path82.resolve(inputFile);
13060
13189
  if (!deduped.has(absolutePath)) {
13061
13190
  deduped.set(absolutePath, absolutePath);
13062
13191
  }
@@ -13069,14 +13198,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
13069
13198
  }
13070
13199
  const unique = /* @__PURE__ */ new Map();
13071
13200
  for (const inputFile of inputFiles) {
13072
- const absolutePath = path32.resolve(inputFile);
13201
+ const absolutePath = path82.resolve(inputFile);
13073
13202
  if (overrides?.has(absolutePath)) {
13074
13203
  if (!unique.has(absolutePath)) {
13075
13204
  unique.set(absolutePath, absolutePath);
13076
13205
  }
13077
13206
  continue;
13078
13207
  }
13079
- const normalized = absolutePath.split(path32.sep).join("/");
13208
+ const normalized = absolutePath.split(path82.sep).join("/");
13080
13209
  if (isGuidelineFile(normalized, guidelinePatterns)) {
13081
13210
  if (!unique.has(absolutePath)) {
13082
13211
  unique.set(absolutePath, absolutePath);
@@ -13091,7 +13220,7 @@ function collectInputFiles(inputFiles) {
13091
13220
  }
13092
13221
  const unique = /* @__PURE__ */ new Map();
13093
13222
  for (const inputFile of inputFiles) {
13094
- const absolutePath = path32.resolve(inputFile);
13223
+ const absolutePath = path82.resolve(inputFile);
13095
13224
  if (!unique.has(absolutePath)) {
13096
13225
  unique.set(absolutePath, absolutePath);
13097
13226
  }
@@ -13103,7 +13232,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
13103
13232
  return "";
13104
13233
  }
13105
13234
  const buildList = (files) => files.map((absolutePath) => {
13106
- const fileName = path32.basename(absolutePath);
13235
+ const fileName = path82.basename(absolutePath);
13107
13236
  const fileUri = pathToFileUri2(absolutePath);
13108
13237
  return `* [${fileName}](${fileUri})`;
13109
13238
  });
@@ -13123,7 +13252,7 @@ ${buildList(inputFiles).join("\n")}.`);
13123
13252
  return sections.join("\n");
13124
13253
  }
13125
13254
  function pathToFileUri2(filePath) {
13126
- const absolutePath = path32.isAbsolute(filePath) ? filePath : path32.resolve(filePath);
13255
+ const absolutePath = path82.isAbsolute(filePath) ? filePath : path82.resolve(filePath);
13127
13256
  const normalizedPath = absolutePath.replace(/\\/g, "/");
13128
13257
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
13129
13258
  return `file:///${normalizedPath}`;
@@ -13159,7 +13288,7 @@ var CodexProvider = class {
13159
13288
  const logger = await this.createStreamLogger(request).catch(() => void 0);
13160
13289
  try {
13161
13290
  const promptContent = buildPromptDocument(request, inputFiles);
13162
- const promptFile = path42.join(workspaceRoot, PROMPT_FILENAME);
13291
+ const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
13163
13292
  await writeFile3(promptFile, promptContent, "utf8");
13164
13293
  const args = this.buildCodexArgs();
13165
13294
  const cwd = this.resolveCwd(workspaceRoot);
@@ -13209,7 +13338,7 @@ var CodexProvider = class {
13209
13338
  if (!this.config.cwd) {
13210
13339
  return workspaceRoot;
13211
13340
  }
13212
- return path42.resolve(this.config.cwd);
13341
+ return path9.resolve(this.config.cwd);
13213
13342
  }
13214
13343
  buildCodexArgs() {
13215
13344
  const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
@@ -13243,7 +13372,7 @@ var CodexProvider = class {
13243
13372
  }
13244
13373
  }
13245
13374
  async createWorkspace() {
13246
- return await mkdtemp(path42.join(tmpdir(), WORKSPACE_PREFIX));
13375
+ return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
13247
13376
  }
13248
13377
  async cleanupWorkspace(workspaceRoot) {
13249
13378
  try {
@@ -13257,9 +13386,9 @@ var CodexProvider = class {
13257
13386
  return void 0;
13258
13387
  }
13259
13388
  if (this.config.logDir) {
13260
- return path42.resolve(this.config.logDir);
13389
+ return path9.resolve(this.config.logDir);
13261
13390
  }
13262
- return path42.join(process.cwd(), ".agentv", "logs", "codex");
13391
+ return path9.join(process.cwd(), ".agentv", "logs", "codex");
13263
13392
  }
13264
13393
  async createStreamLogger(request) {
13265
13394
  const logDir = this.resolveLogDirectory();
@@ -13273,7 +13402,7 @@ var CodexProvider = class {
13273
13402
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
13274
13403
  return void 0;
13275
13404
  }
13276
- const filePath = path42.join(logDir, buildLogFilename(request, this.targetName));
13405
+ const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
13277
13406
  try {
13278
13407
  const logger = await CodexStreamLogger.create({
13279
13408
  filePath,
@@ -13488,7 +13617,7 @@ function tryParseJsonValue(rawLine) {
13488
13617
  async function locateExecutable(candidate) {
13489
13618
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
13490
13619
  if (includesPathSeparator) {
13491
- const resolved = path42.isAbsolute(candidate) ? candidate : path42.resolve(candidate);
13620
+ const resolved = path9.isAbsolute(candidate) ? candidate : path9.resolve(candidate);
13492
13621
  const executablePath = await ensureWindowsExecutableVariant(resolved);
13493
13622
  await access22(executablePath, constants22.F_OK);
13494
13623
  return executablePath;
@@ -13942,6 +14071,9 @@ var VSCodeProvider = class {
13942
14071
  };
13943
14072
  function buildPromptDocument2(request, attachments, guidelinePatterns) {
13944
14073
  const parts = [];
14074
+ if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
14075
+ parts.push(request.systemPrompt.trim());
14076
+ }
13945
14077
  const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
13946
14078
  const attachmentFiles = collectAttachmentFiles(attachments);
13947
14079
  const nonGuidelineAttachments = attachmentFiles.filter(
@@ -13959,7 +14091,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
13959
14091
  return "";
13960
14092
  }
13961
14093
  const buildList = (files) => files.map((absolutePath) => {
13962
- const fileName = path52.basename(absolutePath);
14094
+ const fileName = path10.basename(absolutePath);
13963
14095
  const fileUri = pathToFileUri22(absolutePath);
13964
14096
  return `* [${fileName}](${fileUri})`;
13965
14097
  });
@@ -13984,8 +14116,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
13984
14116
  }
13985
14117
  const unique = /* @__PURE__ */ new Map();
13986
14118
  for (const attachment of attachments) {
13987
- const absolutePath = path52.resolve(attachment);
13988
- const normalized = absolutePath.split(path52.sep).join("/");
14119
+ const absolutePath = path10.resolve(attachment);
14120
+ const normalized = absolutePath.split(path10.sep).join("/");
13989
14121
  if (isGuidelineFile(normalized, guidelinePatterns)) {
13990
14122
  if (!unique.has(absolutePath)) {
13991
14123
  unique.set(absolutePath, absolutePath);
@@ -14000,7 +14132,7 @@ function collectAttachmentFiles(attachments) {
14000
14132
  }
14001
14133
  const unique = /* @__PURE__ */ new Map();
14002
14134
  for (const attachment of attachments) {
14003
- const absolutePath = path52.resolve(attachment);
14135
+ const absolutePath = path10.resolve(attachment);
14004
14136
  if (!unique.has(absolutePath)) {
14005
14137
  unique.set(absolutePath, absolutePath);
14006
14138
  }
@@ -14008,7 +14140,7 @@ function collectAttachmentFiles(attachments) {
14008
14140
  return Array.from(unique.values());
14009
14141
  }
14010
14142
  function pathToFileUri22(filePath) {
14011
- const absolutePath = path52.isAbsolute(filePath) ? filePath : path52.resolve(filePath);
14143
+ const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
14012
14144
  const normalizedPath = absolutePath.replace(/\\/g, "/");
14013
14145
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
14014
14146
  return `file:///${normalizedPath}`;
@@ -14021,7 +14153,7 @@ function normalizeAttachments(attachments) {
14021
14153
  }
14022
14154
  const deduped = /* @__PURE__ */ new Set();
14023
14155
  for (const attachment of attachments) {
14024
- deduped.add(path52.resolve(attachment));
14156
+ deduped.add(path10.resolve(attachment));
14025
14157
  }
14026
14158
  return Array.from(deduped);
14027
14159
  }
@@ -14030,7 +14162,7 @@ function mergeAttachments(all) {
14030
14162
  for (const list of all) {
14031
14163
  if (!list) continue;
14032
14164
  for (const inputFile of list) {
14033
- deduped.add(path52.resolve(inputFile));
14165
+ deduped.add(path10.resolve(inputFile));
14034
14166
  }
14035
14167
  }
14036
14168
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -14127,12 +14259,12 @@ async function fileExists3(filePath) {
14127
14259
  }
14128
14260
  }
14129
14261
  async function readTargetDefinitions(filePath) {
14130
- const absolutePath = path62.resolve(filePath);
14262
+ const absolutePath = path11.resolve(filePath);
14131
14263
  if (!await fileExists3(absolutePath)) {
14132
14264
  throw new Error(`targets.yaml not found at ${absolutePath}`);
14133
14265
  }
14134
- const raw = await readFile22(absolutePath, "utf8");
14135
- const parsed = parse22(raw);
14266
+ const raw = await readFile5(absolutePath, "utf8");
14267
+ const parsed = parse32(raw);
14136
14268
  if (!isRecord(parsed)) {
14137
14269
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
14138
14270
  }
@@ -14167,17 +14299,34 @@ function createProvider(target) {
14167
14299
  }
14168
14300
  }
14169
14301
  }
14302
+ var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
14303
+
14304
+ Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
14305
+
14306
+ Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
14307
+
14308
+ [[ ## expected_outcome ## ]]
14309
+ {{expected_outcome}}
14310
+
14311
+ [[ ## question ## ]]
14312
+ {{question}}
14313
+
14314
+ [[ ## reference_answer ## ]]
14315
+ {{reference_answer}}
14316
+
14317
+ [[ ## candidate_answer ## ]]
14318
+ {{candidate_answer}}`;
14170
14319
  var LlmJudgeEvaluator = class {
14171
14320
  kind = "llm_judge";
14172
14321
  resolveJudgeProvider;
14173
14322
  maxOutputTokens;
14174
14323
  temperature;
14175
- customPrompt;
14324
+ evaluatorTemplate;
14176
14325
  constructor(options) {
14177
14326
  this.resolveJudgeProvider = options.resolveJudgeProvider;
14178
14327
  this.maxOutputTokens = options.maxOutputTokens;
14179
14328
  this.temperature = options.temperature;
14180
- this.customPrompt = options.customPrompt;
14329
+ this.evaluatorTemplate = options.evaluatorTemplate;
14181
14330
  }
14182
14331
  async evaluate(context2) {
14183
14332
  const judgeProvider = await this.resolveJudgeProvider(context2);
@@ -14187,26 +14336,21 @@ var LlmJudgeEvaluator = class {
14187
14336
  return this.evaluateWithPrompt(context2, judgeProvider);
14188
14337
  }
14189
14338
  async evaluateWithPrompt(context2, judgeProvider) {
14190
- const hasReferenceAnswer = hasNonEmptyReferenceAnswer(context2.evalCase);
14191
14339
  const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
14192
- let prompt = buildQualityPrompt(context2.evalCase, context2.candidate, formattedQuestion);
14193
- let systemPrompt = context2.systemPrompt ?? this.customPrompt ?? buildSystemPrompt(hasReferenceAnswer);
14194
- if (systemPrompt && hasTemplateVariables(systemPrompt)) {
14195
- const variables = {
14196
- input_messages: JSON.stringify(context2.evalCase.input_segments, null, 2),
14197
- output_messages: JSON.stringify(context2.evalCase.output_segments, null, 2),
14198
- candidate_answer: context2.candidate,
14199
- reference_answer: context2.evalCase.reference_answer ?? "",
14200
- expected_outcome: context2.evalCase.expected_outcome,
14201
- question: formattedQuestion
14202
- };
14203
- prompt = substituteVariables(systemPrompt, variables);
14204
- systemPrompt = buildSystemPrompt(hasReferenceAnswer);
14205
- }
14206
- const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
14340
+ const variables = {
14341
+ input_messages: JSON.stringify(context2.evalCase.input_segments, null, 2),
14342
+ output_messages: JSON.stringify(context2.evalCase.output_segments, null, 2),
14343
+ candidate_answer: context2.candidate.trim(),
14344
+ reference_answer: (context2.evalCase.reference_answer ?? "").trim(),
14345
+ expected_outcome: context2.evalCase.expected_outcome.trim(),
14346
+ question: formattedQuestion.trim()
14347
+ };
14348
+ const systemPrompt = buildOutputSchema();
14349
+ const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
14350
+ const userPrompt = substituteVariables(evaluatorTemplate, variables);
14207
14351
  const response = await judgeProvider.invoke({
14208
- question: prompt,
14209
- metadata,
14352
+ question: userPrompt,
14353
+ systemPrompt,
14210
14354
  evalCaseId: context2.evalCase.id,
14211
14355
  attempt: context2.attempt,
14212
14356
  maxOutputTokens: this.maxOutputTokens,
@@ -14219,11 +14363,9 @@ var LlmJudgeEvaluator = class {
14219
14363
  const reasoning = parsed.reasoning ?? response.reasoning;
14220
14364
  const expectedAspectCount = Math.max(hits.length + misses.length, 1);
14221
14365
  const evaluatorRawRequest = {
14222
- id: randomUUID2(),
14223
- provider: judgeProvider.id,
14224
- prompt,
14225
- target: context2.target.name,
14226
- ...systemPrompt !== void 0 && { systemPrompt }
14366
+ userPrompt,
14367
+ systemPrompt,
14368
+ target: judgeProvider.targetName
14227
14369
  };
14228
14370
  return {
14229
14371
  score,
@@ -14235,20 +14377,8 @@ var LlmJudgeEvaluator = class {
14235
14377
  };
14236
14378
  }
14237
14379
  };
14238
- function buildSystemPrompt(hasReferenceAnswer) {
14239
- const basePrompt = [
14240
- "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
14241
- ""
14242
- ];
14243
- if (hasReferenceAnswer) {
14244
- basePrompt.push(
14245
- "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.",
14246
- ""
14247
- );
14248
- }
14249
- basePrompt.push(
14250
- "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
14251
- "",
14380
+ function buildOutputSchema() {
14381
+ return [
14252
14382
  "You must respond with a single JSON object matching this schema:",
14253
14383
  "",
14254
14384
  "{",
@@ -14257,30 +14387,7 @@ function buildSystemPrompt(hasReferenceAnswer) {
14257
14387
  ' "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
14258
14388
  ' "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
14259
14389
  "}"
14260
- );
14261
- return basePrompt.join("\n");
14262
- }
14263
- function buildQualityPrompt(evalCase, candidate, question) {
14264
- const parts = [
14265
- "[[ ## expected_outcome ## ]]",
14266
- evalCase.expected_outcome.trim(),
14267
- "",
14268
- "[[ ## question ## ]]",
14269
- question.trim(),
14270
- ""
14271
- ];
14272
- if (hasNonEmptyReferenceAnswer(evalCase)) {
14273
- parts.push(
14274
- "[[ ## reference_answer ## ]]",
14275
- evalCase.reference_answer.trim(),
14276
- ""
14277
- );
14278
- }
14279
- parts.push(
14280
- "[[ ## candidate_answer ## ]]",
14281
- candidate.trim()
14282
- );
14283
- return parts.join("\n");
14390
+ ].join("\n");
14284
14391
  }
14285
14392
  function clampScore(value) {
14286
14393
  if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -14362,9 +14469,6 @@ function extractJsonBlob(text) {
14362
14469
  function isNonEmptyString(value) {
14363
14470
  return typeof value === "string" && value.trim().length > 0;
14364
14471
  }
14365
- function hasNonEmptyReferenceAnswer(evalCase) {
14366
- return evalCase.reference_answer !== void 0 && evalCase.reference_answer.trim().length > 0;
14367
- }
14368
14472
  var CodeEvaluator = class {
14369
14473
  kind = "code";
14370
14474
  script;
@@ -14470,11 +14574,8 @@ function parseJsonSafe(payload) {
14470
14574
  return void 0;
14471
14575
  }
14472
14576
  }
14473
- function hasTemplateVariables(text) {
14474
- return /\$\{[a-zA-Z0-9_]+\}/.test(text);
14475
- }
14476
14577
  function substituteVariables(template, variables) {
14477
- return template.replace(/\$\{([a-zA-Z0-9_]+)\}/g, (match, varName) => {
14578
+ return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
14478
14579
  return variables[varName] ?? match;
14479
14580
  });
14480
14581
  }
@@ -15034,6 +15135,7 @@ async function evaluateCandidate(options) {
15034
15135
  }
15035
15136
  }
15036
15137
  return {
15138
+ timestamp: completedAt.toISOString(),
15037
15139
  eval_id: evalCase.id,
15038
15140
  dataset: evalCase.dataset,
15039
15141
  conversation_id: evalCase.conversation_id,
@@ -15041,14 +15143,12 @@ async function evaluateCandidate(options) {
15041
15143
  hits: score.hits,
15042
15144
  misses: score.misses,
15043
15145
  candidate_answer: candidate,
15044
- expected_aspect_count: score.expectedAspectCount,
15045
15146
  target: target.name,
15046
- timestamp: completedAt.toISOString(),
15047
15147
  reasoning: score.reasoning,
15048
15148
  raw_aspects: score.rawAspects,
15049
15149
  agent_provider_request: agentProviderRequest,
15050
15150
  lm_provider_request: lmProviderRequest,
15051
- evaluator_raw_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
15151
+ evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
15052
15152
  evaluator_results: evaluatorResults
15053
15153
  };
15054
15154
  }
@@ -15125,7 +15225,7 @@ async function runEvaluatorList(options) {
15125
15225
  hits: score2.hits,
15126
15226
  misses: score2.misses,
15127
15227
  reasoning: score2.reasoning,
15128
- evaluator_raw_request: score2.evaluatorRawRequest
15228
+ evaluator_provider_request: score2.evaluatorRawRequest
15129
15229
  });
15130
15230
  continue;
15131
15231
  }
@@ -15152,7 +15252,7 @@ async function runEvaluatorList(options) {
15152
15252
  hits: score2.hits,
15153
15253
  misses: score2.misses,
15154
15254
  reasoning: score2.reasoning,
15155
- evaluator_raw_request: score2.evaluatorRawRequest
15255
+ evaluator_provider_request: score2.evaluatorRawRequest
15156
15256
  });
15157
15257
  continue;
15158
15258
  }
@@ -15205,7 +15305,7 @@ async function runLlmJudgeEvaluator(options) {
15205
15305
  promptInputs,
15206
15306
  now,
15207
15307
  judgeProvider,
15208
- systemPrompt: customPrompt,
15308
+ evaluatorTemplateOverride: customPrompt,
15209
15309
  evaluator: config
15210
15310
  });
15211
15311
  }
@@ -15246,8 +15346,8 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
15246
15346
  async function dumpPrompt(directory, evalCase, promptInputs) {
15247
15347
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
15248
15348
  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
15249
- const filePath = path72.resolve(directory, filename);
15250
- await mkdir22(path72.dirname(filePath), { recursive: true });
15349
+ const filePath = path12.resolve(directory, filename);
15350
+ await mkdir22(path12.dirname(filePath), { recursive: true });
15251
15351
  const payload = {
15252
15352
  eval_id: evalCase.id,
15253
15353
  question: promptInputs.question,
@@ -15261,7 +15361,7 @@ function sanitizeFilename(value) {
15261
15361
  return "prompt";
15262
15362
  }
15263
15363
  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
15264
- return sanitized.length > 0 ? sanitized : randomUUID3();
15364
+ return sanitized.length > 0 ? sanitized : randomUUID2();
15265
15365
  }
15266
15366
  async function invokeProvider(provider, options) {
15267
15367
  const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -15317,6 +15417,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
15317
15417
  }
15318
15418
  }
15319
15419
  return {
15420
+ timestamp: timestamp.toISOString(),
15320
15421
  eval_id: evalCase.id,
15321
15422
  dataset: evalCase.dataset,
15322
15423
  conversation_id: evalCase.conversation_id,
@@ -15324,9 +15425,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
15324
15425
  hits: [],
15325
15426
  misses: [`Error: ${message}`],
15326
15427
  candidate_answer: `Error occurred: ${message}`,
15327
- expected_aspect_count: 0,
15328
15428
  target: targetName,
15329
- timestamp: timestamp.toISOString(),
15330
15429
  raw_aspects: [],
15331
15430
  agent_provider_request: agentProviderRequest,
15332
15431
  lm_provider_request: lmProviderRequest,
@@ -15368,19 +15467,19 @@ function createAgentKernel() {
15368
15467
  // src/commands/eval/run-eval.ts
15369
15468
  import { constants as constants6 } from "node:fs";
15370
15469
  import { access as access6, mkdir as mkdir6 } from "node:fs/promises";
15371
- import path14 from "node:path";
15470
+ import path18 from "node:path";
15372
15471
  import { pathToFileURL } from "node:url";
15373
15472
 
15374
15473
  // src/commands/eval/env.ts
15375
15474
  import { config as loadDotenv } from "dotenv";
15376
15475
  import { constants as constants4 } from "node:fs";
15377
15476
  import { access as access4 } from "node:fs/promises";
15378
- import path9 from "node:path";
15477
+ import path13 from "node:path";
15379
15478
  function uniqueDirs(directories) {
15380
15479
  const seen = /* @__PURE__ */ new Set();
15381
15480
  const result = [];
15382
15481
  for (const dir of directories) {
15383
- const absolute = path9.resolve(dir);
15482
+ const absolute = path13.resolve(dir);
15384
15483
  if (seen.has(absolute)) {
15385
15484
  continue;
15386
15485
  }
@@ -15399,14 +15498,14 @@ async function fileExists4(filePath) {
15399
15498
  }
15400
15499
  function collectAncestorDirectories(start, boundary) {
15401
15500
  const directories = [];
15402
- const boundaryDir = path9.resolve(boundary);
15403
- let current = path9.resolve(start);
15501
+ const boundaryDir = path13.resolve(boundary);
15502
+ let current = path13.resolve(start);
15404
15503
  while (current !== void 0) {
15405
15504
  directories.push(current);
15406
15505
  if (current === boundaryDir) {
15407
15506
  break;
15408
15507
  }
15409
- const parent = path9.dirname(current);
15508
+ const parent = path13.dirname(current);
15410
15509
  if (parent === current) {
15411
15510
  break;
15412
15511
  }
@@ -15416,7 +15515,7 @@ function collectAncestorDirectories(start, boundary) {
15416
15515
  }
15417
15516
  async function loadEnvFromHierarchy(options) {
15418
15517
  const { testFilePath, repoRoot, verbose } = options;
15419
- const testDir = path9.dirname(path9.resolve(testFilePath));
15518
+ const testDir = path13.dirname(path13.resolve(testFilePath));
15420
15519
  const cwd = process.cwd();
15421
15520
  const searchDirs = uniqueDirs([
15422
15521
  ...collectAncestorDirectories(testDir, repoRoot),
@@ -15424,7 +15523,7 @@ async function loadEnvFromHierarchy(options) {
15424
15523
  cwd
15425
15524
  ]);
15426
15525
  for (const dir of searchDirs) {
15427
- const candidate = path9.join(dir, ".env");
15526
+ const candidate = path13.join(dir, ".env");
15428
15527
  if (await fileExists4(candidate)) {
15429
15528
  loadDotenv({ path: candidate, override: false });
15430
15529
  if (verbose) {
@@ -15648,7 +15747,7 @@ var Mutex = class {
15648
15747
  // src/commands/eval/jsonl-writer.ts
15649
15748
  import { createWriteStream as createWriteStream2 } from "node:fs";
15650
15749
  import { mkdir as mkdir4 } from "node:fs/promises";
15651
- import path10 from "node:path";
15750
+ import path14 from "node:path";
15652
15751
  import { finished } from "node:stream/promises";
15653
15752
  var JsonlWriter = class _JsonlWriter {
15654
15753
  stream;
@@ -15658,7 +15757,7 @@ var JsonlWriter = class _JsonlWriter {
15658
15757
  this.stream = stream;
15659
15758
  }
15660
15759
  static async open(filePath) {
15661
- await mkdir4(path10.dirname(filePath), { recursive: true });
15760
+ await mkdir4(path14.dirname(filePath), { recursive: true });
15662
15761
  const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
15663
15762
  return new _JsonlWriter(stream);
15664
15763
  }
@@ -15690,7 +15789,7 @@ var JsonlWriter = class _JsonlWriter {
15690
15789
  // src/commands/eval/yaml-writer.ts
15691
15790
  import { createWriteStream as createWriteStream3 } from "node:fs";
15692
15791
  import { mkdir as mkdir5 } from "node:fs/promises";
15693
- import path11 from "node:path";
15792
+ import path15 from "node:path";
15694
15793
  import { finished as finished2 } from "node:stream/promises";
15695
15794
  import { stringify as stringifyYaml } from "yaml";
15696
15795
  var YamlWriter = class _YamlWriter {
@@ -15702,7 +15801,7 @@ var YamlWriter = class _YamlWriter {
15702
15801
  this.stream = stream;
15703
15802
  }
15704
15803
  static async open(filePath) {
15705
- await mkdir5(path11.dirname(filePath), { recursive: true });
15804
+ await mkdir5(path15.dirname(filePath), { recursive: true });
15706
15805
  const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
15707
15806
  return new _YamlWriter(stream);
15708
15807
  }
@@ -15824,12 +15923,12 @@ var ProgressDisplay = class {
15824
15923
  }
15825
15924
  addLogPaths(paths) {
15826
15925
  const newPaths = [];
15827
- for (const path19 of paths) {
15828
- if (this.logPathSet.has(path19)) {
15926
+ for (const path25 of paths) {
15927
+ if (this.logPathSet.has(path25)) {
15829
15928
  continue;
15830
15929
  }
15831
- this.logPathSet.add(path19);
15832
- newPaths.push(path19);
15930
+ this.logPathSet.add(path25);
15931
+ newPaths.push(path25);
15833
15932
  }
15834
15933
  if (newPaths.length === 0) {
15835
15934
  return;
@@ -15845,8 +15944,8 @@ var ProgressDisplay = class {
15845
15944
  this.hasPrintedLogHeader = true;
15846
15945
  }
15847
15946
  const startIndex = this.logPaths.length - newPaths.length;
15848
- newPaths.forEach((path19, offset) => {
15849
- console.log(`${startIndex + offset + 1}. ${path19}`);
15947
+ newPaths.forEach((path25, offset) => {
15948
+ console.log(`${startIndex + offset + 1}. ${path25}`);
15850
15949
  });
15851
15950
  }
15852
15951
  scheduleRender() {
@@ -15894,8 +15993,8 @@ var ProgressDisplay = class {
15894
15993
  if (this.logPaths.length > 0) {
15895
15994
  lines.push("");
15896
15995
  lines.push("Codex CLI logs:");
15897
- this.logPaths.forEach((path19, index) => {
15898
- lines.push(`${index + 1}. ${path19}`);
15996
+ this.logPaths.forEach((path25, index) => {
15997
+ lines.push(`${index + 1}. ${path25}`);
15899
15998
  });
15900
15999
  }
15901
16000
  const rowCount = this.getRenderedRowCount(lines);
@@ -16100,17 +16199,17 @@ function formatEvaluationSummary(summary) {
16100
16199
  }
16101
16200
 
16102
16201
  // ../../packages/core/dist/evaluation/validation/index.js
16103
- import { readFile as readFile4 } from "node:fs/promises";
16202
+ import { readFile as readFile6 } from "node:fs/promises";
16104
16203
  import { parse as parse4 } from "yaml";
16105
16204
  import { readFile as readFile23 } from "node:fs/promises";
16106
- import path12 from "node:path";
16205
+ import path16 from "node:path";
16107
16206
  import { parse as parse23 } from "yaml";
16108
- import { readFile as readFile32 } from "node:fs/promises";
16207
+ import { readFile as readFile33 } from "node:fs/promises";
16109
16208
  import path23 from "node:path";
16110
- import { parse as parse32 } from "yaml";
16209
+ import { parse as parse33 } from "yaml";
16111
16210
  import { readFile as readFile42 } from "node:fs/promises";
16112
16211
  import { parse as parse42 } from "yaml";
16113
- import { readFile as readFile5 } from "node:fs/promises";
16212
+ import { readFile as readFile52 } from "node:fs/promises";
16114
16213
  import path33 from "node:path";
16115
16214
  import { parse as parse5 } from "yaml";
16116
16215
  var SCHEMA_EVAL_V22 = "agentv-eval-v2";
@@ -16118,7 +16217,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
16118
16217
  var SCHEMA_CONFIG_V22 = "agentv-config-v2";
16119
16218
  async function detectFileType(filePath) {
16120
16219
  try {
16121
- const content = await readFile4(filePath, "utf8");
16220
+ const content = await readFile6(filePath, "utf8");
16122
16221
  const parsed = parse4(content);
16123
16222
  if (typeof parsed !== "object" || parsed === null) {
16124
16223
  return "unknown";
@@ -16148,7 +16247,7 @@ function isObject(value) {
16148
16247
  }
16149
16248
  async function validateEvalFile(filePath) {
16150
16249
  const errors = [];
16151
- const absolutePath = path12.resolve(filePath);
16250
+ const absolutePath = path16.resolve(filePath);
16152
16251
  let parsed;
16153
16252
  try {
16154
16253
  const content = await readFile23(absolutePath, "utf8");
@@ -16513,8 +16612,8 @@ async function validateTargetsFile(filePath) {
16513
16612
  const absolutePath = path23.resolve(filePath);
16514
16613
  let parsed;
16515
16614
  try {
16516
- const content = await readFile32(absolutePath, "utf8");
16517
- parsed = parse32(content);
16615
+ const content = await readFile33(absolutePath, "utf8");
16616
+ parsed = parse33(content);
16518
16617
  } catch (error) {
16519
16618
  errors.push({
16520
16619
  severity: "error",
@@ -16865,7 +16964,7 @@ async function validateFileReferences(evalFilePath) {
16865
16964
  const searchRoots = buildSearchRoots(absolutePath, gitRoot);
16866
16965
  let parsed;
16867
16966
  try {
16868
- const content = await readFile5(absolutePath, "utf8");
16967
+ const content = await readFile52(absolutePath, "utf8");
16869
16968
  parsed = parse5(content);
16870
16969
  } catch {
16871
16970
  return errors;
@@ -16935,7 +17034,7 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
16935
17034
  });
16936
17035
  } else {
16937
17036
  try {
16938
- const fileContent = await readFile5(resolvedPath, "utf8");
17037
+ const fileContent = await readFile52(resolvedPath, "utf8");
16939
17038
  if (fileContent.trim().length === 0) {
16940
17039
  errors.push({
16941
17040
  severity: "warning",
@@ -16960,16 +17059,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
16960
17059
  // src/commands/eval/targets.ts
16961
17060
  import { constants as constants5 } from "node:fs";
16962
17061
  import { access as access5 } from "node:fs/promises";
16963
- import path13 from "node:path";
17062
+ import path17 from "node:path";
16964
17063
  var TARGET_FILE_CANDIDATES = [
16965
17064
  "targets.yaml",
16966
17065
  "targets.yml",
16967
- path13.join(".agentv", "targets.yaml"),
16968
- path13.join(".agentv", "targets.yml")
17066
+ path17.join(".agentv", "targets.yaml"),
17067
+ path17.join(".agentv", "targets.yml")
16969
17068
  ];
16970
- var ANSI_YELLOW2 = "\x1B[33m";
17069
+ var ANSI_YELLOW6 = "\x1B[33m";
16971
17070
  var ANSI_RED = "\x1B[31m";
16972
- var ANSI_RESET2 = "\x1B[0m";
17071
+ var ANSI_RESET6 = "\x1B[0m";
16973
17072
  function isTTY() {
16974
17073
  return process.stdout.isTTY ?? false;
16975
17074
  }
@@ -16988,12 +17087,12 @@ async function readTestSuiteTarget(testFilePath) {
16988
17087
  async function discoverTargetsFile(options) {
16989
17088
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
16990
17089
  if (explicitPath) {
16991
- const resolvedExplicit = path13.resolve(explicitPath);
17090
+ const resolvedExplicit = path17.resolve(explicitPath);
16992
17091
  if (await fileExists5(resolvedExplicit)) {
16993
17092
  return resolvedExplicit;
16994
17093
  }
16995
17094
  for (const candidate of TARGET_FILE_CANDIDATES) {
16996
- const nested = path13.join(resolvedExplicit, candidate);
17095
+ const nested = path17.join(resolvedExplicit, candidate);
16997
17096
  if (await fileExists5(nested)) {
16998
17097
  return nested;
16999
17098
  }
@@ -17001,13 +17100,13 @@ async function discoverTargetsFile(options) {
17001
17100
  throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
17002
17101
  }
17003
17102
  const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
17004
- const resolvedCwd = path13.resolve(cwd);
17103
+ const resolvedCwd = path17.resolve(cwd);
17005
17104
  if (!directories.includes(resolvedCwd)) {
17006
17105
  directories.push(resolvedCwd);
17007
17106
  }
17008
17107
  for (const directory of directories) {
17009
17108
  for (const candidate of TARGET_FILE_CANDIDATES) {
17010
- const fullPath = path13.join(directory, candidate);
17109
+ const fullPath = path17.join(directory, candidate);
17011
17110
  if (await fileExists5(fullPath)) {
17012
17111
  return fullPath;
17013
17112
  }
@@ -17042,8 +17141,8 @@ async function selectTarget(options) {
17042
17141
  Warnings in ${targetsFilePath}:`);
17043
17142
  for (const warning of warnings) {
17044
17143
  const location = warning.location ? ` [${warning.location}]` : "";
17045
- const prefix = useColors ? `${ANSI_YELLOW2} \u26A0${ANSI_RESET2}` : " \u26A0";
17046
- const message = useColors ? `${ANSI_YELLOW2}${warning.message}${ANSI_RESET2}` : warning.message;
17144
+ const prefix = useColors ? `${ANSI_YELLOW6} \u26A0${ANSI_RESET6}` : " \u26A0";
17145
+ const message = useColors ? `${ANSI_YELLOW6}${warning.message}${ANSI_RESET6}` : warning.message;
17047
17146
  console.warn(`${prefix}${location} ${message}`);
17048
17147
  }
17049
17148
  console.warn("");
@@ -17054,8 +17153,8 @@ Warnings in ${targetsFilePath}:`);
17054
17153
  Errors in ${targetsFilePath}:`);
17055
17154
  for (const error of errors) {
17056
17155
  const location = error.location ? ` [${error.location}]` : "";
17057
- const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET2}` : " \u2717";
17058
- const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET2}` : error.message;
17156
+ const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET6}` : " \u2717";
17157
+ const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET6}` : error.message;
17059
17158
  console.error(`${prefix}${location} ${message}`);
17060
17159
  }
17061
17160
  throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
@@ -17159,15 +17258,15 @@ async function ensureFileExists(filePath, description) {
17159
17258
  }
17160
17259
  }
17161
17260
  async function findRepoRoot(start) {
17162
- const fallback = path14.resolve(start);
17261
+ const fallback = path18.resolve(start);
17163
17262
  let current = fallback;
17164
17263
  while (current !== void 0) {
17165
- const candidate = path14.join(current, ".git");
17264
+ const candidate = path18.join(current, ".git");
17166
17265
  try {
17167
17266
  await access6(candidate, constants6.F_OK);
17168
17267
  return current;
17169
17268
  } catch {
17170
- const parent = path14.dirname(current);
17269
+ const parent = path18.dirname(current);
17171
17270
  if (parent === current) {
17172
17271
  break;
17173
17272
  }
@@ -17180,16 +17279,16 @@ function buildDefaultOutputPath(cwd, format) {
17180
17279
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
17181
17280
  const baseName = "eval";
17182
17281
  const extension = getDefaultExtension(format);
17183
- return path14.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
17282
+ return path18.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
17184
17283
  }
17185
17284
  function resolvePromptDirectory(option, cwd) {
17186
17285
  if (option === void 0) {
17187
17286
  return void 0;
17188
17287
  }
17189
17288
  if (typeof option === "string" && option.trim().length > 0) {
17190
- return path14.resolve(cwd, option);
17289
+ return path18.resolve(cwd, option);
17191
17290
  }
17192
- return path14.join(cwd, ".agentv", "prompts");
17291
+ return path18.join(cwd, ".agentv", "prompts");
17193
17292
  }
17194
17293
  function createEvaluationCache() {
17195
17294
  const store = /* @__PURE__ */ new Map();
@@ -17214,7 +17313,7 @@ function createProgressReporter(maxWorkers) {
17214
17313
  };
17215
17314
  }
17216
17315
  function makeEvalKey(testFilePath, evalId) {
17217
- return `${path14.resolve(testFilePath)}::${evalId}`;
17316
+ return `${path18.resolve(testFilePath)}::${evalId}`;
17218
17317
  }
17219
17318
  function createDisplayIdTracker() {
17220
17319
  const map = /* @__PURE__ */ new Map();
@@ -17367,7 +17466,7 @@ async function runEvalCommand(input) {
17367
17466
  if (options.verbose) {
17368
17467
  console.log(`Repository root: ${repoRoot}`);
17369
17468
  }
17370
- const outputPath = options.outPath ? path14.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
17469
+ const outputPath = options.outPath ? path18.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
17371
17470
  console.log(`Output path: ${outputPath}`);
17372
17471
  const outputWriter = await createOutputWriter(outputPath, options.format);
17373
17472
  const cache = options.cache ? createEvaluationCache() : void 0;
@@ -17375,7 +17474,7 @@ async function runEvalCommand(input) {
17375
17474
  const allResults = [];
17376
17475
  let lastPromptDumpDir;
17377
17476
  const seenEvalCases = /* @__PURE__ */ new Set();
17378
- const resolvedTestFiles = input.testFiles.map((file) => path14.resolve(file));
17477
+ const resolvedTestFiles = input.testFiles.map((file) => path18.resolve(file));
17379
17478
  const displayIdTracker = createDisplayIdTracker();
17380
17479
  const totalWorkers = options.workers ?? DEFAULT_WORKERS;
17381
17480
  const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
@@ -17467,7 +17566,7 @@ async function resolveEvaluationRunner() {
17467
17566
  if (!overridePath) {
17468
17567
  return runEvaluation;
17469
17568
  }
17470
- const resolved = path14.isAbsolute(overridePath) ? overridePath : path14.resolve(process.cwd(), overridePath);
17569
+ const resolved = path18.isAbsolute(overridePath) ? overridePath : path18.resolve(process.cwd(), overridePath);
17471
17570
  const moduleUrl = pathToFileURL(resolved).href;
17472
17571
  const mod = await import(moduleUrl);
17473
17572
  const candidate = mod.runEvaluation;
@@ -17538,7 +17637,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
17538
17637
  const unmatched = [];
17539
17638
  const results = /* @__PURE__ */ new Set();
17540
17639
  for (const pattern of normalizedInputs) {
17541
- const candidatePath = path15.isAbsolute(pattern) ? path15.normalize(pattern) : path15.resolve(cwd, pattern);
17640
+ const candidatePath = path19.isAbsolute(pattern) ? path19.normalize(pattern) : path19.resolve(cwd, pattern);
17542
17641
  try {
17543
17642
  const stats = await stat3(candidatePath);
17544
17643
  if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
@@ -17561,7 +17660,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
17561
17660
  unmatched.push(pattern);
17562
17661
  continue;
17563
17662
  }
17564
- yamlMatches.forEach((filePath) => results.add(path15.normalize(filePath)));
17663
+ yamlMatches.forEach((filePath) => results.add(path19.normalize(filePath)));
17565
17664
  }
17566
17665
  if (unmatched.length > 0) {
17567
17666
  throw new Error(
@@ -17577,27 +17676,30 @@ async function resolveEvalPaths(evalPaths, cwd) {
17577
17676
 
17578
17677
  // src/commands/init/index.ts
17579
17678
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
17580
- import path17 from "node:path";
17679
+ import path21 from "node:path";
17581
17680
  import * as readline from "node:readline/promises";
17582
17681
 
17583
17682
  // src/templates/index.ts
17584
17683
  import { readFileSync, readdirSync, statSync } from "node:fs";
17585
- import path16 from "node:path";
17586
- import { fileURLToPath as fileURLToPath2 } from "node:url";
17684
+ import path20 from "node:path";
17685
+ import { fileURLToPath } from "node:url";
17587
17686
  var TemplateManager = class {
17588
17687
  static getGithubTemplates() {
17589
- return this.getTemplatesFromDir("github");
17688
+ return this.getTemplatesFromDir(".github");
17590
17689
  }
17591
17690
  static getAgentvTemplates() {
17592
- return this.getTemplatesFromDir("agentv");
17691
+ return this.getTemplatesFromDir(".agentv");
17692
+ }
17693
+ static getClaudeTemplates() {
17694
+ return this.getTemplatesFromDir(".claude");
17593
17695
  }
17594
17696
  static getTemplatesFromDir(subdir) {
17595
- const currentDir = path16.dirname(fileURLToPath2(import.meta.url));
17697
+ const currentDir = path20.dirname(fileURLToPath(import.meta.url));
17596
17698
  let templatesDir;
17597
- if (currentDir.includes(path16.sep + "dist")) {
17598
- templatesDir = path16.join(currentDir, "templates", subdir);
17699
+ if (currentDir.includes(path20.sep + "dist")) {
17700
+ templatesDir = path20.join(currentDir, "templates", subdir);
17599
17701
  } else {
17600
- templatesDir = path16.join(currentDir, subdir);
17702
+ templatesDir = path20.join(currentDir, subdir);
17601
17703
  }
17602
17704
  return this.readTemplatesRecursively(templatesDir, "");
17603
17705
  }
@@ -17605,15 +17707,15 @@ var TemplateManager = class {
17605
17707
  const templates = [];
17606
17708
  const entries = readdirSync(dir);
17607
17709
  for (const entry of entries) {
17608
- const fullPath = path16.join(dir, entry);
17710
+ const fullPath = path20.join(dir, entry);
17609
17711
  const stat5 = statSync(fullPath);
17610
- const entryRelativePath = relativePath ? path16.join(relativePath, entry) : entry;
17712
+ const entryRelativePath = relativePath ? path20.join(relativePath, entry) : entry;
17611
17713
  if (stat5.isDirectory()) {
17612
17714
  templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
17613
17715
  } else {
17614
17716
  const content = readFileSync(fullPath, "utf-8");
17615
17717
  templates.push({
17616
- path: entryRelativePath.split(path16.sep).join("/"),
17718
+ path: entryRelativePath.split(path20.sep).join("/"),
17617
17719
  // Normalize to forward slashes
17618
17720
  content
17619
17721
  });
@@ -17637,25 +17739,35 @@ async function promptYesNo(message) {
17637
17739
  }
17638
17740
  }
17639
17741
  async function initCommand(options = {}) {
17640
- const targetPath = path17.resolve(options.targetPath ?? ".");
17641
- const githubDir = path17.join(targetPath, ".github");
17642
- const agentvDir = path17.join(targetPath, ".agentv");
17742
+ const targetPath = path21.resolve(options.targetPath ?? ".");
17743
+ const githubDir = path21.join(targetPath, ".github");
17744
+ const agentvDir = path21.join(targetPath, ".agentv");
17745
+ const claudeDir = path21.join(targetPath, ".claude");
17643
17746
  const githubTemplates = TemplateManager.getGithubTemplates();
17644
17747
  const agentvTemplates = TemplateManager.getAgentvTemplates();
17748
+ const claudeTemplates = TemplateManager.getClaudeTemplates();
17645
17749
  const existingFiles = [];
17646
17750
  if (existsSync(githubDir)) {
17647
17751
  for (const template of githubTemplates) {
17648
- const targetFilePath = path17.join(githubDir, template.path);
17752
+ const targetFilePath = path21.join(githubDir, template.path);
17649
17753
  if (existsSync(targetFilePath)) {
17650
- existingFiles.push(path17.relative(targetPath, targetFilePath));
17754
+ existingFiles.push(path21.relative(targetPath, targetFilePath));
17651
17755
  }
17652
17756
  }
17653
17757
  }
17654
17758
  if (existsSync(agentvDir)) {
17655
17759
  for (const template of agentvTemplates) {
17656
- const targetFilePath = path17.join(agentvDir, template.path);
17760
+ const targetFilePath = path21.join(agentvDir, template.path);
17761
+ if (existsSync(targetFilePath)) {
17762
+ existingFiles.push(path21.relative(targetPath, targetFilePath));
17763
+ }
17764
+ }
17765
+ }
17766
+ if (existsSync(claudeDir)) {
17767
+ for (const template of claudeTemplates) {
17768
+ const targetFilePath = path21.join(claudeDir, template.path);
17657
17769
  if (existsSync(targetFilePath)) {
17658
- existingFiles.push(path17.relative(targetPath, targetFilePath));
17770
+ existingFiles.push(path21.relative(targetPath, targetFilePath));
17659
17771
  }
17660
17772
  }
17661
17773
  }
@@ -17676,31 +17788,46 @@ async function initCommand(options = {}) {
17676
17788
  if (!existsSync(agentvDir)) {
17677
17789
  mkdirSync(agentvDir, { recursive: true });
17678
17790
  }
17791
+ if (!existsSync(claudeDir)) {
17792
+ mkdirSync(claudeDir, { recursive: true });
17793
+ }
17679
17794
  for (const template of githubTemplates) {
17680
- const targetFilePath = path17.join(githubDir, template.path);
17681
- const targetDirPath = path17.dirname(targetFilePath);
17795
+ const targetFilePath = path21.join(githubDir, template.path);
17796
+ const targetDirPath = path21.dirname(targetFilePath);
17682
17797
  if (!existsSync(targetDirPath)) {
17683
17798
  mkdirSync(targetDirPath, { recursive: true });
17684
17799
  }
17685
17800
  writeFileSync(targetFilePath, template.content, "utf-8");
17686
- console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
17801
+ console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
17687
17802
  }
17688
17803
  for (const template of agentvTemplates) {
17689
- const targetFilePath = path17.join(agentvDir, template.path);
17690
- const targetDirPath = path17.dirname(targetFilePath);
17804
+ const targetFilePath = path21.join(agentvDir, template.path);
17805
+ const targetDirPath = path21.dirname(targetFilePath);
17806
+ if (!existsSync(targetDirPath)) {
17807
+ mkdirSync(targetDirPath, { recursive: true });
17808
+ }
17809
+ writeFileSync(targetFilePath, template.content, "utf-8");
17810
+ console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
17811
+ }
17812
+ for (const template of claudeTemplates) {
17813
+ const targetFilePath = path21.join(claudeDir, template.path);
17814
+ const targetDirPath = path21.dirname(targetFilePath);
17691
17815
  if (!existsSync(targetDirPath)) {
17692
17816
  mkdirSync(targetDirPath, { recursive: true });
17693
17817
  }
17694
17818
  writeFileSync(targetFilePath, template.content, "utf-8");
17695
- console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
17819
+ console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
17696
17820
  }
17697
17821
  console.log("\nAgentV initialized successfully!");
17698
17822
  console.log(`
17699
- Files installed to ${path17.relative(targetPath, githubDir)}:`);
17823
+ Files installed to ${path21.relative(targetPath, githubDir)}:`);
17700
17824
  githubTemplates.forEach((t) => console.log(` - ${t.path}`));
17701
17825
  console.log(`
17702
- Files installed to ${path17.relative(targetPath, agentvDir)}:`);
17826
+ Files installed to ${path21.relative(targetPath, agentvDir)}:`);
17703
17827
  agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
17828
+ console.log(`
17829
+ Files installed to ${path21.relative(targetPath, claudeDir)}:`);
17830
+ claudeTemplates.forEach((t) => console.log(` - ${t.path}`));
17704
17831
  console.log("\nYou can now:");
17705
17832
  console.log(" 1. Edit .agentv/.env with your API credentials");
17706
17833
  console.log(" 2. Configure targets in .agentv/targets.yaml");
@@ -17718,11 +17845,11 @@ function registerStatusCommand(program) {
17718
17845
 
17719
17846
  // src/commands/validate/format-output.ts
17720
17847
  var ANSI_RED2 = "\x1B[31m";
17721
- var ANSI_YELLOW3 = "\x1B[33m";
17848
+ var ANSI_YELLOW7 = "\x1B[33m";
17722
17849
  var ANSI_GREEN = "\x1B[32m";
17723
17850
  var ANSI_CYAN = "\x1B[36m";
17724
17851
  var ANSI_BOLD = "\x1B[1m";
17725
- var ANSI_RESET3 = "\x1B[0m";
17852
+ var ANSI_RESET7 = "\x1B[0m";
17726
17853
  function formatSummary(summary, useColors) {
17727
17854
  const lines = [];
17728
17855
  lines.push("");
@@ -17738,7 +17865,7 @@ function formatSummary(summary, useColors) {
17738
17865
  }
17739
17866
  function formatHeader(text, useColors) {
17740
17867
  if (useColors) {
17741
- return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET3}`;
17868
+ return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET7}`;
17742
17869
  }
17743
17870
  return text;
17744
17871
  }
@@ -17746,7 +17873,7 @@ function formatFileResult(result, useColors) {
17746
17873
  const lines = [];
17747
17874
  const status = result.valid ? "\u2713" : "\u2717";
17748
17875
  const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
17749
- const statusText = useColors ? `${statusColor}${status}${ANSI_RESET3}` : status;
17876
+ const statusText = useColors ? `${statusColor}${status}${ANSI_RESET7}` : status;
17750
17877
  const fileName = result.filePath;
17751
17878
  lines.push(`${statusText} ${fileName}`);
17752
17879
  if (result.errors.length > 0) {
@@ -17758,8 +17885,8 @@ function formatFileResult(result, useColors) {
17758
17885
  }
17759
17886
  function formatError(error, useColors) {
17760
17887
  const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
17761
- const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW3;
17762
- const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET3}` : prefix;
17888
+ const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW7;
17889
+ const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET7}` : prefix;
17763
17890
  const location = error.location ? ` [${error.location}]` : "";
17764
17891
  return `${coloredPrefix}${location} ${error.message}`;
17765
17892
  }
@@ -17772,15 +17899,15 @@ function formatStats(summary, useColors) {
17772
17899
  (r) => r.errors.some((e) => e.severity === "warning")
17773
17900
  ).length;
17774
17901
  if (useColors) {
17775
- lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET3}`);
17776
- lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET3}`);
17902
+ lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET7}`);
17903
+ lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET7}`);
17777
17904
  if (summary.invalidFiles > 0) {
17778
- lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET3}`);
17905
+ lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET7}`);
17779
17906
  } else {
17780
17907
  lines.push(invalidText);
17781
17908
  }
17782
17909
  if (filesWithWarnings > 0) {
17783
- lines.push(`${ANSI_YELLOW3}Files with warnings: ${filesWithWarnings}${ANSI_RESET3}`);
17910
+ lines.push(`${ANSI_YELLOW7}Files with warnings: ${filesWithWarnings}${ANSI_RESET7}`);
17784
17911
  }
17785
17912
  } else {
17786
17913
  lines.push(totalText);
@@ -17799,7 +17926,7 @@ function isTTY2() {
17799
17926
  // src/commands/validate/validate-files.ts
17800
17927
  import { constants as constants7 } from "node:fs";
17801
17928
  import { access as access7, readdir as readdir3, stat as stat4 } from "node:fs/promises";
17802
- import path18 from "node:path";
17929
+ import path24 from "node:path";
17803
17930
  async function validateFiles(paths) {
17804
17931
  const filePaths = await expandPaths(paths);
17805
17932
  const results = [];
@@ -17817,7 +17944,7 @@ async function validateFiles(paths) {
17817
17944
  };
17818
17945
  }
17819
17946
  async function validateSingleFile(filePath) {
17820
- const absolutePath = path18.resolve(filePath);
17947
+ const absolutePath = path24.resolve(filePath);
17821
17948
  const fileType = await detectFileType(absolutePath);
17822
17949
  if (fileType === "unknown") {
17823
17950
  return {
@@ -17856,7 +17983,7 @@ async function validateSingleFile(filePath) {
17856
17983
  async function expandPaths(paths) {
17857
17984
  const expanded = [];
17858
17985
  for (const inputPath of paths) {
17859
- const absolutePath = path18.resolve(inputPath);
17986
+ const absolutePath = path24.resolve(inputPath);
17860
17987
  try {
17861
17988
  await access7(absolutePath, constants7.F_OK);
17862
17989
  } catch {
@@ -17880,7 +18007,7 @@ async function findYamlFiles(dirPath) {
17880
18007
  try {
17881
18008
  const entries = await readdir3(dirPath, { withFileTypes: true });
17882
18009
  for (const entry of entries) {
17883
- const fullPath = path18.join(dirPath, entry.name);
18010
+ const fullPath = path24.join(dirPath, entry.name);
17884
18011
  if (entry.isDirectory()) {
17885
18012
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
17886
18013
  continue;
@@ -17897,7 +18024,7 @@ async function findYamlFiles(dirPath) {
17897
18024
  return results;
17898
18025
  }
17899
18026
  function isYamlFile(filePath) {
17900
- const ext = path18.extname(filePath).toLowerCase();
18027
+ const ext = path24.extname(filePath).toLowerCase();
17901
18028
  return ext === ".yaml" || ext === ".yml";
17902
18029
  }
17903
18030
 
@@ -17954,4 +18081,4 @@ export {
17954
18081
  createProgram,
17955
18082
  runCli
17956
18083
  };
17957
- //# sourceMappingURL=chunk-7CJK3EYC.js.map
18084
+ //# sourceMappingURL=chunk-WMO5PVPX.js.map