agentv 0.10.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -588,9 +588,9 @@ import { readFileSync as readFileSync2 } from "node:fs";
588
588
  // src/commands/eval/index.ts
589
589
  import fg from "fast-glob";
590
590
  import { stat as stat3 } from "node:fs/promises";
591
- import path15 from "node:path";
591
+ import path19 from "node:path";
592
592
 
593
- // ../../packages/core/dist/chunk-YQBJAT5I.js
593
+ // ../../packages/core/dist/chunk-U3GEJ3K7.js
594
594
  import { constants } from "node:fs";
595
595
  import { access, readFile } from "node:fs/promises";
596
596
  import path from "node:path";
@@ -1073,8 +1073,8 @@ function getErrorMap() {
1073
1073
 
1074
1074
  // ../../node_modules/.pnpm/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
1075
1075
  var makeIssue = (params) => {
1076
- const { data, path: path19, errorMaps, issueData } = params;
1077
- const fullPath = [...path19, ...issueData.path || []];
1076
+ const { data, path: path25, errorMaps, issueData } = params;
1077
+ const fullPath = [...path25, ...issueData.path || []];
1078
1078
  const fullIssue = {
1079
1079
  ...issueData,
1080
1080
  path: fullPath
@@ -1190,11 +1190,11 @@ var errorUtil;
1190
1190
 
1191
1191
  // ../../node_modules/.pnpm/zod@3.25.76/node_modules/zod/v3/types.js
1192
1192
  var ParseInputLazyPath = class {
1193
- constructor(parent, value, path19, key2) {
1193
+ constructor(parent, value, path25, key2) {
1194
1194
  this._cachedPath = [];
1195
1195
  this.parent = parent;
1196
1196
  this.data = value;
1197
- this._path = path19;
1197
+ this._path = path25;
1198
1198
  this._key = key2;
1199
1199
  }
1200
1200
  get path() {
@@ -4636,7 +4636,7 @@ var coerce = {
4636
4636
  };
4637
4637
  var NEVER = INVALID;
4638
4638
 
4639
- // ../../packages/core/dist/chunk-YQBJAT5I.js
4639
+ // ../../packages/core/dist/chunk-U3GEJ3K7.js
4640
4640
  async function fileExists(filePath) {
4641
4641
  try {
4642
4642
  await access(filePath, constants.F_OK);
@@ -5288,12 +5288,21 @@ function isAgentProvider(provider) {
5288
5288
  }
5289
5289
 
5290
5290
  // ../../packages/core/dist/index.js
5291
+ import { readFile as readFile4 } from "node:fs/promises";
5292
+ import path62 from "node:path";
5293
+ import { parse as parse22 } from "yaml";
5291
5294
  import micromatch from "micromatch";
5295
+ import { readFile as readFile3 } from "node:fs/promises";
5296
+ import path22 from "node:path";
5297
+ import { parse as parse3 } from "yaml";
5292
5298
  import { constants as constants3 } from "node:fs";
5293
- import { access as access3, readFile as readFile3 } from "node:fs/promises";
5299
+ import { access as access3 } from "node:fs/promises";
5294
5300
  import path8 from "node:path";
5295
- import { fileURLToPath } from "node:url";
5296
- import { parse as parse3 } from "yaml";
5301
+ import path32 from "node:path";
5302
+ import { readFile as readFile22 } from "node:fs/promises";
5303
+ import path42 from "node:path";
5304
+ import { readFile as readFile32 } from "node:fs/promises";
5305
+ import path52 from "node:path";
5297
5306
 
5298
5307
  // ../../node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
5299
5308
  var _globalThis = typeof globalThis === "object" ? globalThis : global;
@@ -9747,17 +9756,17 @@ var $a = new Error("Agent definition is the prompt you give to the LLM for the a
9747
9756
  import { exec as execWithCallback } from "node:child_process";
9748
9757
  import fs from "node:fs/promises";
9749
9758
  import os2 from "node:os";
9750
- import path22 from "node:path";
9759
+ import path72 from "node:path";
9751
9760
  import { promisify as promisify2 } from "node:util";
9752
9761
  import { exec as execCallback, spawn as spawn2 } from "node:child_process";
9753
9762
  import { randomUUID } from "node:crypto";
9754
9763
  import { constants as constants22, createWriteStream } from "node:fs";
9755
9764
  import { access as access22, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
9756
9765
  import { tmpdir } from "node:os";
9757
- import path42 from "node:path";
9766
+ import path9 from "node:path";
9758
9767
  import { promisify as promisify22 } from "node:util";
9759
- import path32 from "node:path";
9760
- import path52 from "node:path";
9768
+ import path82 from "node:path";
9769
+ import path10 from "node:path";
9761
9770
 
9762
9771
  // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
9763
9772
  import { exec, spawn } from "child_process";
@@ -11696,13 +11705,12 @@ async function provisionSubagents(options) {
11696
11705
 
11697
11706
  // ../../packages/core/dist/index.js
11698
11707
  import { constants as constants32 } from "node:fs";
11699
- import { access as access32, readFile as readFile22 } from "node:fs/promises";
11700
- import path62 from "node:path";
11701
- import { parse as parse22 } from "yaml";
11702
- import { randomUUID as randomUUID2 } from "node:crypto";
11703
- import { createHash, randomUUID as randomUUID3 } from "node:crypto";
11708
+ import { access as access32, readFile as readFile5 } from "node:fs/promises";
11709
+ import path11 from "node:path";
11710
+ import { parse as parse32 } from "yaml";
11711
+ import { createHash, randomUUID as randomUUID2 } from "node:crypto";
11704
11712
  import { mkdir as mkdir22, writeFile as writeFile22 } from "node:fs/promises";
11705
- import path72 from "node:path";
11713
+ import path12 from "node:path";
11706
11714
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
11707
11715
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
11708
11716
  function isTestMessageRole(value) {
@@ -11747,42 +11755,179 @@ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
11747
11755
  function isEvaluatorKind(value) {
11748
11756
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
11749
11757
  }
11750
- var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
11751
- var ANSI_YELLOW = "\x1B[33m";
11752
- var ANSI_RESET = "\x1B[0m";
11753
- var SCHEMA_EVAL_V2 = "agentv-eval-v2";
11754
- var SCHEMA_CONFIG_V2 = "agentv-config-v2";
11755
- async function readTestSuiteMetadata(testFilePath) {
11756
- try {
11757
- const absolutePath = path8.resolve(testFilePath);
11758
- const content = await readFile3(absolutePath, "utf8");
11759
- const parsed = parse3(content);
11760
- if (!isJsonObject(parsed)) {
11761
- return {};
11758
+ function extractCodeBlocks(segments) {
11759
+ const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
11760
+ const codeBlocks = [];
11761
+ for (const segment of segments) {
11762
+ const typeValue = segment["type"];
11763
+ if (typeof typeValue !== "string" || typeValue !== "text") {
11764
+ continue;
11762
11765
  }
11763
- return { target: extractTargetFromSuite(parsed) };
11766
+ const textValue = segment["value"];
11767
+ if (typeof textValue !== "string") {
11768
+ continue;
11769
+ }
11770
+ const matches = textValue.match(CODE_BLOCK_PATTERN);
11771
+ if (matches) {
11772
+ codeBlocks.push(...matches);
11773
+ }
11774
+ }
11775
+ return codeBlocks;
11776
+ }
11777
+ function formatFileContents(parts) {
11778
+ const fileCount = parts.filter((p) => p.isFile).length;
11779
+ if (fileCount > 0) {
11780
+ return parts.map((part) => {
11781
+ if (part.isFile && part.displayPath) {
11782
+ return `<file path="${part.displayPath}">
11783
+ ${part.content}
11784
+ </file>`;
11785
+ }
11786
+ return part.content;
11787
+ }).join("\n\n");
11788
+ }
11789
+ return parts.map((p) => p.content).join(" ");
11790
+ }
11791
+ function formatSegment(segment) {
11792
+ const type = asString(segment.type);
11793
+ if (type === "text") {
11794
+ return asString(segment.value);
11795
+ }
11796
+ if (type === "guideline_ref") {
11797
+ const refPath = asString(segment.path);
11798
+ return refPath ? `<Attached: ${refPath}>` : void 0;
11799
+ }
11800
+ if (type === "file") {
11801
+ const text = asString(segment.text);
11802
+ const filePath = asString(segment.path);
11803
+ if (text && filePath) {
11804
+ return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
11805
+ }
11806
+ }
11807
+ return void 0;
11808
+ }
11809
+ function hasVisibleContent(segments) {
11810
+ return segments.some((segment) => {
11811
+ const type = asString(segment.type);
11812
+ if (type === "text") {
11813
+ const value = asString(segment.value);
11814
+ return value !== void 0 && value.trim().length > 0;
11815
+ }
11816
+ if (type === "guideline_ref") {
11817
+ return false;
11818
+ }
11819
+ if (type === "file") {
11820
+ const text = asString(segment.text);
11821
+ return text !== void 0 && text.trim().length > 0;
11822
+ }
11823
+ return false;
11824
+ });
11825
+ }
11826
+ function asString(value) {
11827
+ return typeof value === "string" ? value : void 0;
11828
+ }
11829
+ async function fileExists2(absolutePath) {
11830
+ try {
11831
+ await access3(absolutePath, constants3.F_OK);
11832
+ return true;
11764
11833
  } catch {
11765
- return {};
11834
+ return false;
11766
11835
  }
11767
11836
  }
11768
- function extractTargetFromSuite(suite) {
11769
- const execution = suite.execution;
11770
- if (execution && typeof execution === "object" && !Array.isArray(execution)) {
11771
- const executionTarget = execution.target;
11772
- if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
11773
- return executionTarget.trim();
11837
+ function resolveToAbsolutePath(candidate) {
11838
+ if (candidate instanceof URL) {
11839
+ return new URL(candidate).pathname;
11840
+ }
11841
+ if (typeof candidate === "string") {
11842
+ if (candidate.startsWith("file://")) {
11843
+ return new URL(candidate).pathname;
11774
11844
  }
11845
+ return path8.resolve(candidate);
11775
11846
  }
11776
- const targetValue = suite.target;
11777
- if (typeof targetValue === "string" && targetValue.trim().length > 0) {
11778
- return targetValue.trim();
11847
+ throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
11848
+ }
11849
+ function buildDirectoryChain2(filePath, repoRoot) {
11850
+ const directories = [];
11851
+ const seen = /* @__PURE__ */ new Set();
11852
+ const boundary = path8.resolve(repoRoot);
11853
+ let current = path8.resolve(path8.dirname(filePath));
11854
+ while (current !== void 0) {
11855
+ if (!seen.has(current)) {
11856
+ directories.push(current);
11857
+ seen.add(current);
11858
+ }
11859
+ if (current === boundary) {
11860
+ break;
11861
+ }
11862
+ const parent = path8.dirname(current);
11863
+ if (parent === current) {
11864
+ break;
11865
+ }
11866
+ current = parent;
11779
11867
  }
11780
- return void 0;
11868
+ if (!seen.has(boundary)) {
11869
+ directories.push(boundary);
11870
+ }
11871
+ return directories;
11872
+ }
11873
+ function buildSearchRoots2(evalPath, repoRoot) {
11874
+ const uniqueRoots = [];
11875
+ const addRoot = (root2) => {
11876
+ const normalized = path8.resolve(root2);
11877
+ if (!uniqueRoots.includes(normalized)) {
11878
+ uniqueRoots.push(normalized);
11879
+ }
11880
+ };
11881
+ let currentDir = path8.dirname(evalPath);
11882
+ let reachedBoundary = false;
11883
+ while (!reachedBoundary) {
11884
+ addRoot(currentDir);
11885
+ const parentDir = path8.dirname(currentDir);
11886
+ if (currentDir === repoRoot || parentDir === currentDir) {
11887
+ reachedBoundary = true;
11888
+ } else {
11889
+ currentDir = parentDir;
11890
+ }
11891
+ }
11892
+ addRoot(repoRoot);
11893
+ addRoot(process.cwd());
11894
+ return uniqueRoots;
11895
+ }
11896
+ function trimLeadingSeparators2(value) {
11897
+ const trimmed = value.replace(/^[/\\]+/, "");
11898
+ return trimmed.length > 0 ? trimmed : value;
11899
+ }
11900
+ async function resolveFileReference2(rawValue, searchRoots) {
11901
+ const displayPath = trimLeadingSeparators2(rawValue);
11902
+ const potentialPaths = [];
11903
+ if (path8.isAbsolute(rawValue)) {
11904
+ potentialPaths.push(path8.normalize(rawValue));
11905
+ }
11906
+ for (const base of searchRoots) {
11907
+ potentialPaths.push(path8.resolve(base, displayPath));
11908
+ }
11909
+ const attempted = [];
11910
+ const seen = /* @__PURE__ */ new Set();
11911
+ for (const candidate of potentialPaths) {
11912
+ const absoluteCandidate = path8.resolve(candidate);
11913
+ if (seen.has(absoluteCandidate)) {
11914
+ continue;
11915
+ }
11916
+ seen.add(absoluteCandidate);
11917
+ attempted.push(absoluteCandidate);
11918
+ if (await fileExists2(absoluteCandidate)) {
11919
+ return { displayPath, resolvedPath: absoluteCandidate, attempted };
11920
+ }
11921
+ }
11922
+ return { displayPath, attempted };
11781
11923
  }
11924
+ var SCHEMA_CONFIG_V2 = "agentv-config-v2";
11925
+ var ANSI_YELLOW = "\x1B[33m";
11926
+ var ANSI_RESET = "\x1B[0m";
11782
11927
  async function loadConfig(evalFilePath, repoRoot) {
11783
- const directories = buildDirectoryChain(evalFilePath, repoRoot);
11928
+ const directories = buildDirectoryChain2(evalFilePath, repoRoot);
11784
11929
  for (const directory of directories) {
11785
- const configPath = path8.join(directory, ".agentv", "config.yaml");
11930
+ const configPath = path22.join(directory, ".agentv", "config.yaml");
11786
11931
  if (!await fileExists2(configPath)) {
11787
11932
  continue;
11788
11933
  }
@@ -11825,71 +11970,174 @@ function isGuidelineFile(filePath, patterns) {
11825
11970
  const patternsToUse = patterns ?? [];
11826
11971
  return micromatch.isMatch(normalized, patternsToUse);
11827
11972
  }
11828
- function extractCodeBlocks(segments) {
11829
- const codeBlocks = [];
11830
- for (const segment of segments) {
11831
- const typeValue = segment["type"];
11832
- if (typeof typeValue !== "string" || typeValue !== "text") {
11833
- continue;
11834
- }
11835
- const textValue = segment["value"];
11836
- if (typeof textValue !== "string") {
11837
- continue;
11838
- }
11839
- const matches = textValue.match(CODE_BLOCK_PATTERN);
11840
- if (matches) {
11841
- codeBlocks.push(...matches);
11973
+ function extractTargetFromSuite(suite) {
11974
+ const execution = suite.execution;
11975
+ if (execution && typeof execution === "object" && !Array.isArray(execution)) {
11976
+ const executionTarget = execution.target;
11977
+ if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
11978
+ return executionTarget.trim();
11842
11979
  }
11843
11980
  }
11844
- return codeBlocks;
11981
+ const targetValue = suite.target;
11982
+ if (typeof targetValue === "string" && targetValue.trim().length > 0) {
11983
+ return targetValue.trim();
11984
+ }
11985
+ return void 0;
11845
11986
  }
11846
- async function processMessages(options) {
11847
- const {
11848
- messages,
11849
- searchRoots,
11850
- repoRootPath,
11851
- guidelinePatterns,
11852
- guidelinePaths,
11853
- textParts,
11854
- messageType,
11855
- verbose
11856
- } = options;
11857
- const segments = [];
11858
- for (const message of messages) {
11859
- const content = message.content;
11860
- if (typeof content === "string") {
11861
- segments.push({ type: "text", value: content });
11862
- if (textParts) {
11863
- textParts.push(content);
11864
- }
11987
+ function logWarning(message) {
11988
+ console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
11989
+ }
11990
+ var ANSI_YELLOW2 = "\x1B[33m";
11991
+ var ANSI_RESET2 = "\x1B[0m";
11992
+ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
11993
+ const execution = rawEvalCase.execution;
11994
+ const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
11995
+ if (candidateEvaluators === void 0) {
11996
+ return void 0;
11997
+ }
11998
+ if (!Array.isArray(candidateEvaluators)) {
11999
+ logWarning2(`Skipping evaluators for '${evalId}': expected array`);
12000
+ return void 0;
12001
+ }
12002
+ const evaluators = [];
12003
+ for (const rawEvaluator of candidateEvaluators) {
12004
+ if (!isJsonObject2(rawEvaluator)) {
12005
+ logWarning2(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
11865
12006
  continue;
11866
12007
  }
11867
- for (const rawSegment of content) {
11868
- if (!isJsonObject(rawSegment)) {
12008
+ const name = asString2(rawEvaluator.name);
12009
+ const typeValue = rawEvaluator.type;
12010
+ if (!name || !isEvaluatorKind(typeValue)) {
12011
+ logWarning2(`Skipping evaluator with invalid name/type in '${evalId}'`);
12012
+ continue;
12013
+ }
12014
+ if (typeValue === "code") {
12015
+ const script = asString2(rawEvaluator.script);
12016
+ if (!script) {
12017
+ logWarning2(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
11869
12018
  continue;
11870
12019
  }
11871
- const segmentType = asString(rawSegment.type);
11872
- if (segmentType === "file") {
11873
- const rawValue = asString(rawSegment.value);
11874
- if (!rawValue) {
11875
- continue;
12020
+ const cwd = asString2(rawEvaluator.cwd);
12021
+ let resolvedCwd;
12022
+ if (cwd) {
12023
+ const resolved = await resolveFileReference2(cwd, searchRoots);
12024
+ if (resolved.resolvedPath) {
12025
+ resolvedCwd = path32.resolve(resolved.resolvedPath);
12026
+ } else {
12027
+ logWarning2(
12028
+ `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
12029
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12030
+ );
11876
12031
  }
11877
- const { displayPath, resolvedPath, attempted } = await resolveFileReference(
11878
- rawValue,
11879
- searchRoots
11880
- );
11881
- if (!resolvedPath) {
11882
- const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
12032
+ } else {
12033
+ resolvedCwd = searchRoots[0];
12034
+ }
12035
+ evaluators.push({
12036
+ name,
12037
+ type: "code",
12038
+ script,
12039
+ cwd,
12040
+ resolvedCwd
12041
+ });
12042
+ continue;
12043
+ }
12044
+ const prompt = asString2(rawEvaluator.prompt);
12045
+ let promptPath;
12046
+ if (prompt) {
12047
+ const resolved = await resolveFileReference2(prompt, searchRoots);
12048
+ if (resolved.resolvedPath) {
12049
+ promptPath = path32.resolve(resolved.resolvedPath);
12050
+ } else {
12051
+ logWarning2(
12052
+ `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
12053
+ resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12054
+ );
12055
+ }
12056
+ }
12057
+ const _model = asString2(rawEvaluator.model);
12058
+ evaluators.push({
12059
+ name,
12060
+ type: "llm_judge",
12061
+ prompt,
12062
+ promptPath
12063
+ });
12064
+ }
12065
+ return evaluators.length > 0 ? evaluators : void 0;
12066
+ }
12067
+ function coerceEvaluator(candidate, contextId) {
12068
+ if (typeof candidate !== "string") {
12069
+ return void 0;
12070
+ }
12071
+ if (isEvaluatorKind(candidate)) {
12072
+ return candidate;
12073
+ }
12074
+ logWarning2(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
12075
+ return void 0;
12076
+ }
12077
+ function asString2(value) {
12078
+ return typeof value === "string" ? value : void 0;
12079
+ }
12080
+ function isJsonObject2(value) {
12081
+ return typeof value === "object" && value !== null && !Array.isArray(value);
12082
+ }
12083
+ function logWarning2(message, details) {
12084
+ if (details && details.length > 0) {
12085
+ const detailBlock = details.join("\n");
12086
+ console.warn(`${ANSI_YELLOW2}Warning: ${message}
12087
+ ${detailBlock}${ANSI_RESET2}`);
12088
+ } else {
12089
+ console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
12090
+ }
12091
+ }
12092
+ var ANSI_YELLOW3 = "\x1B[33m";
12093
+ var ANSI_RESET3 = "\x1B[0m";
12094
+ async function processMessages(options) {
12095
+ const {
12096
+ messages,
12097
+ searchRoots,
12098
+ repoRootPath,
12099
+ guidelinePatterns,
12100
+ guidelinePaths,
12101
+ textParts,
12102
+ messageType,
12103
+ verbose
12104
+ } = options;
12105
+ const segments = [];
12106
+ for (const message of messages) {
12107
+ const content = message.content;
12108
+ if (typeof content === "string") {
12109
+ segments.push({ type: "text", value: content });
12110
+ if (textParts) {
12111
+ textParts.push(content);
12112
+ }
12113
+ continue;
12114
+ }
12115
+ for (const rawSegment of content) {
12116
+ if (!isJsonObject(rawSegment)) {
12117
+ continue;
12118
+ }
12119
+ const segmentType = asString3(rawSegment.type);
12120
+ if (segmentType === "file") {
12121
+ const rawValue = asString3(rawSegment.value);
12122
+ if (!rawValue) {
12123
+ continue;
12124
+ }
12125
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
12126
+ rawValue,
12127
+ searchRoots
12128
+ );
12129
+ if (!resolvedPath) {
12130
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
11883
12131
  const context2 = messageType === "input" ? "" : " in expected_messages";
11884
- logWarning(`File not found${context2}: ${displayPath}`, attempts);
12132
+ logWarning3(`File not found${context2}: ${displayPath}`, attempts);
11885
12133
  continue;
11886
12134
  }
11887
12135
  try {
11888
- const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
12136
+ const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
11889
12137
  if (messageType === "input" && guidelinePatterns && guidelinePaths) {
11890
- const relativeToRepo = path8.relative(repoRootPath, resolvedPath);
12138
+ const relativeToRepo = path42.relative(repoRootPath, resolvedPath);
11891
12139
  if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
11892
- guidelinePaths.push(path8.resolve(resolvedPath));
12140
+ guidelinePaths.push(path42.resolve(resolvedPath));
11893
12141
  if (verbose) {
11894
12142
  console.log(` [Guideline] Found: ${displayPath}`);
11895
12143
  console.log(` Resolved to: ${resolvedPath}`);
@@ -11901,7 +12149,7 @@ async function processMessages(options) {
11901
12149
  type: "file",
11902
12150
  path: displayPath,
11903
12151
  text: fileContent,
11904
- resolvedPath: path8.resolve(resolvedPath)
12152
+ resolvedPath: path42.resolve(resolvedPath)
11905
12153
  });
11906
12154
  if (verbose) {
11907
12155
  const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -11910,7 +12158,7 @@ async function processMessages(options) {
11910
12158
  }
11911
12159
  } catch (error) {
11912
12160
  const context2 = messageType === "input" ? "" : " expected output";
11913
- logWarning(`Could not read${context2} file ${resolvedPath}: ${error.message}`);
12161
+ logWarning3(`Could not read${context2} file ${resolvedPath}: ${error.message}`);
11914
12162
  }
11915
12163
  continue;
11916
12164
  }
@@ -11924,202 +12172,116 @@ async function processMessages(options) {
11924
12172
  }
11925
12173
  return segments;
11926
12174
  }
11927
- async function loadEvalCases(evalFilePath, repoRoot, options) {
11928
- const verbose = options?.verbose ?? false;
11929
- const evalIdFilter = options?.evalId;
11930
- const absoluteTestPath = path8.resolve(evalFilePath);
11931
- if (!await fileExists2(absoluteTestPath)) {
11932
- throw new Error(`Test file not found: ${evalFilePath}`);
11933
- }
11934
- const repoRootPath = resolveToAbsolutePath(repoRoot);
11935
- const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
11936
- const config = await loadConfig(absoluteTestPath, repoRootPath);
11937
- const guidelinePatterns = config?.guideline_patterns;
11938
- const rawFile = await readFile3(absoluteTestPath, "utf8");
11939
- const parsed = parse3(rawFile);
11940
- if (!isJsonObject(parsed)) {
11941
- throw new Error(`Invalid test file format: ${evalFilePath}`);
11942
- }
11943
- const suite = parsed;
11944
- const datasetNameFromSuite = asString(suite.dataset)?.trim();
11945
- const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
11946
- const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
11947
- const schema = suite.$schema;
11948
- if (schema !== SCHEMA_EVAL_V2) {
11949
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
11950
- Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11951
- throw new Error(message);
12175
+ async function resolveAssistantContent(content, searchRoots, verbose) {
12176
+ if (typeof content === "string") {
12177
+ return content;
11952
12178
  }
11953
- const rawTestcases = suite.evalcases;
11954
- if (!Array.isArray(rawTestcases)) {
11955
- throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
12179
+ if (!content) {
12180
+ return "";
11956
12181
  }
11957
- const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
11958
- const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
11959
- const globalTarget = asString(globalExecution?.target) ?? asString(suite.target);
11960
- const results = [];
11961
- for (const rawEvalcase of rawTestcases) {
11962
- if (!isJsonObject(rawEvalcase)) {
11963
- logWarning("Skipping invalid eval case entry (expected object)");
12182
+ const parts = [];
12183
+ for (const entry of content) {
12184
+ if (typeof entry === "string") {
12185
+ parts.push({ content: entry, isFile: false });
11964
12186
  continue;
11965
12187
  }
11966
- const evalcase = rawEvalcase;
11967
- const id = asString(evalcase.id);
11968
- if (evalIdFilter && id !== evalIdFilter) {
12188
+ if (!isJsonObject(entry)) {
11969
12189
  continue;
11970
12190
  }
11971
- const conversationId = asString(evalcase.conversation_id);
11972
- const outcome = asString(evalcase.outcome);
11973
- const inputMessagesValue = evalcase.input_messages;
11974
- const expectedMessagesValue = evalcase.expected_messages;
11975
- if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
11976
- logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
12191
+ const segmentType = asString3(entry.type);
12192
+ if (segmentType === "file") {
12193
+ const rawValue = asString3(entry.value);
12194
+ if (!rawValue) {
12195
+ continue;
12196
+ }
12197
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
12198
+ rawValue,
12199
+ searchRoots
12200
+ );
12201
+ if (!resolvedPath) {
12202
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
12203
+ logWarning3(`File not found in expected_messages: ${displayPath}`, attempts);
12204
+ continue;
12205
+ }
12206
+ try {
12207
+ const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
12208
+ parts.push({ content: fileContent, isFile: true, displayPath });
12209
+ if (verbose) {
12210
+ console.log(` [Expected Assistant File] Found: ${displayPath}`);
12211
+ console.log(` Resolved to: ${resolvedPath}`);
12212
+ }
12213
+ } catch (error) {
12214
+ logWarning3(`Could not read file ${resolvedPath}: ${error.message}`);
12215
+ }
11977
12216
  continue;
11978
12217
  }
11979
- const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
11980
- const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
11981
- const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
11982
- if (hasExpectedMessages && expectedMessages.length === 0) {
11983
- logWarning(`No valid expected message found for eval case: ${id}`);
12218
+ const textValue = asString3(entry.text);
12219
+ if (typeof textValue === "string") {
12220
+ parts.push({ content: textValue, isFile: false });
11984
12221
  continue;
11985
12222
  }
11986
- if (expectedMessages.length > 1) {
11987
- logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
11988
- }
11989
- const guidelinePaths = [];
11990
- const inputTextParts = [];
11991
- const inputSegments = await processMessages({
11992
- messages: inputMessages,
11993
- searchRoots,
11994
- repoRootPath,
11995
- guidelinePatterns,
11996
- guidelinePaths,
11997
- textParts: inputTextParts,
11998
- messageType: "input",
11999
- verbose
12000
- });
12001
- const outputSegments = hasExpectedMessages ? await processMessages({
12002
- messages: expectedMessages,
12003
- searchRoots,
12004
- repoRootPath,
12005
- guidelinePatterns,
12006
- messageType: "output",
12007
- verbose
12008
- }) : [];
12009
- const codeSnippets = extractCodeBlocks(inputSegments);
12010
- const expectedContent = expectedMessages[0]?.content;
12011
- const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
12012
- const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
12013
- const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
12014
- const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
12015
- const userFilePaths = [];
12016
- for (const segment of inputSegments) {
12017
- if (segment.type === "file" && typeof segment.resolvedPath === "string") {
12018
- userFilePaths.push(segment.resolvedPath);
12019
- }
12020
- }
12021
- const allFilePaths = [
12022
- ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
12023
- ...userFilePaths
12024
- ];
12025
- const testCase = {
12026
- id,
12027
- dataset: datasetName,
12028
- conversation_id: conversationId,
12029
- question,
12030
- input_messages: inputMessages,
12031
- input_segments: inputSegments,
12032
- output_segments: outputSegments,
12033
- reference_answer: referenceAnswer,
12034
- guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
12035
- guideline_patterns: guidelinePatterns,
12036
- file_paths: allFilePaths,
12037
- code_snippets: codeSnippets,
12038
- expected_outcome: outcome,
12039
- evaluator: evalCaseEvaluatorKind,
12040
- evaluators
12041
- };
12042
- if (verbose) {
12043
- console.log(`
12044
- [Eval Case: ${id}]`);
12045
- if (testCase.guideline_paths.length > 0) {
12046
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
12047
- for (const guidelinePath of testCase.guideline_paths) {
12048
- console.log(` - ${guidelinePath}`);
12049
- }
12050
- } else {
12051
- console.log(" No guidelines found");
12052
- }
12223
+ const valueValue = asString3(entry.value);
12224
+ if (typeof valueValue === "string") {
12225
+ parts.push({ content: valueValue, isFile: false });
12226
+ continue;
12053
12227
  }
12054
- results.push(testCase);
12228
+ parts.push({ content: JSON.stringify(entry), isFile: false });
12055
12229
  }
12056
- return results;
12230
+ return formatFileContents(parts);
12057
12231
  }
12058
- function needsRoleMarkers(messages, processedSegmentsByMessage) {
12059
- if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
12060
- return true;
12061
- }
12062
- let messagesWithContent = 0;
12063
- for (const segments of processedSegmentsByMessage) {
12064
- if (hasVisibleContent(segments)) {
12065
- messagesWithContent++;
12066
- }
12067
- }
12068
- return messagesWithContent > 1;
12232
+ function asString3(value) {
12233
+ return typeof value === "string" ? value : void 0;
12069
12234
  }
12070
- function hasVisibleContent(segments) {
12071
- return segments.some((segment) => {
12072
- const type = asString(segment.type);
12073
- if (type === "text") {
12074
- const value = asString(segment.value);
12075
- return value !== void 0 && value.trim().length > 0;
12076
- }
12077
- if (type === "guideline_ref") {
12078
- return false;
12079
- }
12080
- if (type === "file") {
12081
- const text = asString(segment.text);
12082
- return text !== void 0 && text.trim().length > 0;
12083
- }
12084
- return false;
12085
- });
12235
+ function cloneJsonObject(source2) {
12236
+ const entries = Object.entries(source2).map(([key2, value]) => [key2, cloneJsonValue(value)]);
12237
+ return Object.fromEntries(entries);
12086
12238
  }
12087
- function formatSegment(segment) {
12088
- const type = asString(segment.type);
12089
- if (type === "text") {
12090
- return asString(segment.value);
12239
+ function cloneJsonValue(value) {
12240
+ if (value === null) {
12241
+ return null;
12091
12242
  }
12092
- if (type === "guideline_ref") {
12093
- const refPath = asString(segment.path);
12094
- return refPath ? `<Attached: ${refPath}>` : void 0;
12243
+ if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
12244
+ return value;
12095
12245
  }
12096
- if (type === "file") {
12097
- const text = asString(segment.text);
12098
- const filePath = asString(segment.path);
12099
- if (text && filePath) {
12100
- return `=== ${filePath} ===
12101
- ${text}`;
12102
- }
12246
+ if (Array.isArray(value)) {
12247
+ return value.map((item) => cloneJsonValue(item));
12103
12248
  }
12104
- return void 0;
12249
+ if (typeof value === "object") {
12250
+ return cloneJsonObject(value);
12251
+ }
12252
+ return value;
12105
12253
  }
12254
+ function logWarning3(message, details) {
12255
+ if (details && details.length > 0) {
12256
+ const detailBlock = details.join("\n");
12257
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}
12258
+ ${detailBlock}${ANSI_RESET3}`);
12259
+ } else {
12260
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
12261
+ }
12262
+ }
12263
+ var ANSI_YELLOW4 = "\x1B[33m";
12264
+ var ANSI_RESET4 = "\x1B[0m";
12106
12265
  async function buildPromptInputs(testCase) {
12107
- const guidelineContents = [];
12266
+ const guidelineParts = [];
12108
12267
  for (const rawPath of testCase.guideline_paths) {
12109
- const absolutePath = path8.resolve(rawPath);
12268
+ const absolutePath = path52.resolve(rawPath);
12110
12269
  if (!await fileExists2(absolutePath)) {
12111
- logWarning(`Could not read guideline file ${absolutePath}: file does not exist`);
12270
+ logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
12112
12271
  continue;
12113
12272
  }
12114
12273
  try {
12115
- const content = (await readFile3(absolutePath, "utf8")).replace(/\r\n/g, "\n");
12116
- guidelineContents.push(`=== ${path8.basename(absolutePath)} ===
12117
- ${content}`);
12274
+ const content = (await readFile32(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
12275
+ guidelineParts.push({
12276
+ content,
12277
+ isFile: true,
12278
+ displayPath: path52.basename(absolutePath)
12279
+ });
12118
12280
  } catch (error) {
12119
- logWarning(`Could not read guideline file ${absolutePath}: ${error.message}`);
12281
+ logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
12120
12282
  }
12121
12283
  }
12122
- const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
12284
+ const guidelines = formatFileContents(guidelineParts);
12123
12285
  const segmentsByMessage = [];
12124
12286
  const fileContentsByPath = /* @__PURE__ */ new Map();
12125
12287
  for (const segment of testCase.input_segments) {
@@ -12140,9 +12302,9 @@ ${content}`);
12140
12302
  messageSegments.push({ type: "text", value: segment });
12141
12303
  }
12142
12304
  } else if (isJsonObject(segment)) {
12143
- const type = asString(segment.type);
12305
+ const type = asString4(segment.type);
12144
12306
  if (type === "file") {
12145
- const value = asString(segment.value);
12307
+ const value = asString4(segment.value);
12146
12308
  if (!value) continue;
12147
12309
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
12148
12310
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -12153,7 +12315,7 @@ ${content}`);
12153
12315
  messageSegments.push({ type: "file", text: fileText, path: value });
12154
12316
  }
12155
12317
  } else if (type === "text") {
12156
- const textValue = asString(segment.value);
12318
+ const textValue = asString4(segment.value);
12157
12319
  if (textValue && textValue.trim().length > 0) {
12158
12320
  messageSegments.push({ type: "text", value: textValue });
12159
12321
  }
@@ -12209,6 +12371,18 @@ ${messageContent}`);
12209
12371
  }) : void 0;
12210
12372
  return { question, guidelines, chatPrompt };
12211
12373
  }
12374
+ function needsRoleMarkers(messages, processedSegmentsByMessage) {
12375
+ if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
12376
+ return true;
12377
+ }
12378
+ let messagesWithContent = 0;
12379
+ for (const segments of processedSegmentsByMessage) {
12380
+ if (hasVisibleContent(segments)) {
12381
+ messagesWithContent++;
12382
+ }
12383
+ }
12384
+ return messagesWithContent > 1;
12385
+ }
12212
12386
  function buildChatPromptFromSegments(options) {
12213
12387
  const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
12214
12388
  if (messages.length === 0) {
@@ -12259,217 +12433,189 @@ ${guidelineContent.trim()}`);
12259
12433
  name = "tool";
12260
12434
  }
12261
12435
  for (const segment of segments) {
12262
- if (segment.type === "guideline_ref") {
12263
- continue;
12264
- }
12265
- const formatted = formatSegment(segment);
12266
- if (formatted) {
12267
- const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
12268
- if (isGuidelineRef) {
12269
- continue;
12270
- }
12271
- contentParts.push(formatted);
12272
- }
12273
- }
12274
- if (contentParts.length === 0) {
12275
- continue;
12276
- }
12277
- chatPrompt.push({
12278
- role,
12279
- content: contentParts.join("\n"),
12280
- ...name ? { name } : {}
12281
- });
12282
- }
12283
- return chatPrompt.length > 0 ? chatPrompt : void 0;
12284
- }
12285
- async function fileExists2(absolutePath) {
12286
- try {
12287
- await access3(absolutePath, constants3.F_OK);
12288
- return true;
12289
- } catch {
12290
- return false;
12291
- }
12292
- }
12293
- function resolveToAbsolutePath(candidate) {
12294
- if (candidate instanceof URL) {
12295
- return fileURLToPath(candidate);
12296
- }
12297
- if (typeof candidate === "string") {
12298
- if (candidate.startsWith("file://")) {
12299
- return fileURLToPath(new URL(candidate));
12300
- }
12301
- return path8.resolve(candidate);
12302
- }
12303
- throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
12304
- }
12305
- function asString(value) {
12306
- return typeof value === "string" ? value : void 0;
12307
- }
12308
- function cloneJsonObject(source2) {
12309
- const entries = Object.entries(source2).map(([key2, value]) => [key2, cloneJsonValue(value)]);
12310
- return Object.fromEntries(entries);
12311
- }
12312
- function cloneJsonValue(value) {
12313
- if (value === null) {
12314
- return null;
12315
- }
12316
- if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
12317
- return value;
12318
- }
12319
- if (Array.isArray(value)) {
12320
- return value.map((item) => cloneJsonValue(item));
12321
- }
12322
- return cloneJsonObject(value);
12323
- }
12324
- async function resolveAssistantContent(content, searchRoots, verbose) {
12325
- if (typeof content === "string") {
12326
- return content;
12327
- }
12328
- if (!content) {
12329
- return "";
12330
- }
12331
- const parts = [];
12332
- for (const entry of content) {
12333
- if (typeof entry === "string") {
12334
- parts.push(entry);
12335
- continue;
12336
- }
12337
- if (!isJsonObject(entry)) {
12338
- continue;
12339
- }
12340
- const segmentType = asString(entry.type);
12341
- if (segmentType === "file") {
12342
- const rawValue = asString(entry.value);
12343
- if (!rawValue) {
12344
- continue;
12345
- }
12346
- const { displayPath, resolvedPath, attempted } = await resolveFileReference(
12347
- rawValue,
12348
- searchRoots
12349
- );
12350
- if (!resolvedPath) {
12351
- const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
12352
- logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
12436
+ if (segment.type === "guideline_ref") {
12353
12437
  continue;
12354
12438
  }
12355
- try {
12356
- const fileContent = (await readFile3(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
12357
- parts.push(fileContent);
12358
- if (verbose) {
12359
- console.log(` [Expected Assistant File] Found: ${displayPath}`);
12360
- console.log(` Resolved to: ${resolvedPath}`);
12439
+ const formatted = formatSegment(segment);
12440
+ if (formatted) {
12441
+ const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
12442
+ if (isGuidelineRef) {
12443
+ continue;
12361
12444
  }
12362
- } catch (error) {
12363
- logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
12445
+ contentParts.push(formatted);
12364
12446
  }
12365
- continue;
12366
12447
  }
12367
- const textValue = asString(entry.text);
12368
- if (typeof textValue === "string") {
12369
- parts.push(textValue);
12448
+ if (contentParts.length === 0) {
12370
12449
  continue;
12371
12450
  }
12372
- const valueValue = asString(entry.value);
12373
- if (typeof valueValue === "string") {
12374
- parts.push(valueValue);
12375
- continue;
12451
+ chatPrompt.push({
12452
+ role,
12453
+ content: contentParts.join("\n"),
12454
+ ...name ? { name } : {}
12455
+ });
12456
+ }
12457
+ return chatPrompt.length > 0 ? chatPrompt : void 0;
12458
+ }
12459
+ function asString4(value) {
12460
+ return typeof value === "string" ? value : void 0;
12461
+ }
12462
+ function logWarning4(message) {
12463
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
12464
+ }
12465
+ var ANSI_YELLOW5 = "\x1B[33m";
12466
+ var ANSI_RESET5 = "\x1B[0m";
12467
+ var SCHEMA_EVAL_V2 = "agentv-eval-v2";
12468
+ async function readTestSuiteMetadata(testFilePath) {
12469
+ try {
12470
+ const absolutePath = path62.resolve(testFilePath);
12471
+ const content = await readFile4(absolutePath, "utf8");
12472
+ const parsed = parse22(content);
12473
+ if (!isJsonObject(parsed)) {
12474
+ return {};
12376
12475
  }
12377
- parts.push(JSON.stringify(entry));
12476
+ return { target: extractTargetFromSuite(parsed) };
12477
+ } catch {
12478
+ return {};
12378
12479
  }
12379
- return parts.join(" ");
12380
12480
  }
12381
- async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
12382
- const execution = rawEvalCase.execution;
12383
- const candidateEvaluators = isJsonObject(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
12384
- if (candidateEvaluators === void 0) {
12385
- return void 0;
12481
+ async function loadEvalCases(evalFilePath, repoRoot, options) {
12482
+ const verbose = options?.verbose ?? false;
12483
+ const evalIdFilter = options?.evalId;
12484
+ const absoluteTestPath = path62.resolve(evalFilePath);
12485
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
12486
+ const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
12487
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
12488
+ const guidelinePatterns = config?.guideline_patterns;
12489
+ const rawFile = await readFile4(absoluteTestPath, "utf8");
12490
+ const parsed = parse22(rawFile);
12491
+ if (!isJsonObject(parsed)) {
12492
+ throw new Error(`Invalid test file format: ${evalFilePath}`);
12386
12493
  }
12387
- if (!Array.isArray(candidateEvaluators)) {
12388
- logWarning(`Skipping evaluators for '${evalId}': expected array`);
12389
- return void 0;
12494
+ const suite = parsed;
12495
+ const datasetNameFromSuite = asString5(suite.dataset)?.trim();
12496
+ const fallbackDataset = path62.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
12497
+ const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
12498
+ const schema = suite.$schema;
12499
+ if (schema !== SCHEMA_EVAL_V2) {
12500
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
12501
+ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
12502
+ throw new Error(message);
12390
12503
  }
12391
- const evaluators = [];
12392
- for (const rawEvaluator of candidateEvaluators) {
12393
- if (!isJsonObject(rawEvaluator)) {
12394
- logWarning(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
12504
+ const rawTestcases = suite.evalcases;
12505
+ if (!Array.isArray(rawTestcases)) {
12506
+ throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
12507
+ }
12508
+ const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
12509
+ const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
12510
+ const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
12511
+ const results = [];
12512
+ for (const rawEvalcase of rawTestcases) {
12513
+ if (!isJsonObject(rawEvalcase)) {
12514
+ logWarning5("Skipping invalid eval case entry (expected object)");
12395
12515
  continue;
12396
12516
  }
12397
- const name = asString(rawEvaluator.name);
12398
- const typeValue = rawEvaluator.type;
12399
- if (!name || !isEvaluatorKind(typeValue)) {
12400
- logWarning(`Skipping evaluator with invalid name/type in '${evalId}'`);
12517
+ const evalcase = rawEvalcase;
12518
+ const id = asString5(evalcase.id);
12519
+ if (evalIdFilter && id !== evalIdFilter) {
12401
12520
  continue;
12402
12521
  }
12403
- if (typeValue === "code") {
12404
- const script = asString(rawEvaluator.script);
12405
- if (!script) {
12406
- logWarning(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
12407
- continue;
12408
- }
12409
- const cwd = asString(rawEvaluator.cwd);
12410
- let resolvedCwd;
12411
- if (cwd) {
12412
- const resolved = await resolveFileReference(cwd, searchRoots);
12413
- if (resolved.resolvedPath) {
12414
- resolvedCwd = path8.resolve(resolved.resolvedPath);
12415
- } else {
12416
- logWarning(
12417
- `Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
12418
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12419
- );
12420
- }
12421
- } else {
12422
- resolvedCwd = searchRoots[0];
12423
- }
12424
- evaluators.push({
12425
- name,
12426
- type: "code",
12427
- script,
12428
- cwd,
12429
- resolvedCwd
12430
- });
12522
+ const conversationId = asString5(evalcase.conversation_id);
12523
+ const outcome = asString5(evalcase.outcome);
12524
+ const inputMessagesValue = evalcase.input_messages;
12525
+ const expectedMessagesValue = evalcase.expected_messages;
12526
+ if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
12527
+ logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
12431
12528
  continue;
12432
12529
  }
12433
- const prompt = asString(rawEvaluator.prompt);
12434
- let promptPath;
12435
- if (prompt) {
12436
- const resolved = await resolveFileReference(prompt, searchRoots);
12437
- if (resolved.resolvedPath) {
12438
- promptPath = path8.resolve(resolved.resolvedPath);
12530
+ const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
12531
+ const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
12532
+ const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
12533
+ if (hasExpectedMessages && expectedMessages.length === 0) {
12534
+ logWarning5(`No valid expected message found for eval case: ${id}`);
12535
+ continue;
12536
+ }
12537
+ if (expectedMessages.length > 1) {
12538
+ logWarning5(`Multiple expected messages found for eval case: ${id}, using first`);
12539
+ }
12540
+ const guidelinePaths = [];
12541
+ const inputTextParts = [];
12542
+ const inputSegments = await processMessages({
12543
+ messages: inputMessages,
12544
+ searchRoots,
12545
+ repoRootPath,
12546
+ guidelinePatterns,
12547
+ guidelinePaths,
12548
+ textParts: inputTextParts,
12549
+ messageType: "input",
12550
+ verbose
12551
+ });
12552
+ const outputSegments = hasExpectedMessages ? await processMessages({
12553
+ messages: expectedMessages,
12554
+ searchRoots,
12555
+ repoRootPath,
12556
+ guidelinePatterns,
12557
+ messageType: "output",
12558
+ verbose
12559
+ }) : [];
12560
+ const codeSnippets = extractCodeBlocks(inputSegments);
12561
+ const expectedContent = expectedMessages[0]?.content;
12562
+ const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
12563
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
12564
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
12565
+ const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
12566
+ const userFilePaths = [];
12567
+ for (const segment of inputSegments) {
12568
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
12569
+ userFilePaths.push(segment.resolvedPath);
12570
+ }
12571
+ }
12572
+ const allFilePaths = [
12573
+ ...guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
12574
+ ...userFilePaths
12575
+ ];
12576
+ const testCase = {
12577
+ id,
12578
+ dataset: datasetName,
12579
+ conversation_id: conversationId,
12580
+ question,
12581
+ input_messages: inputMessages,
12582
+ input_segments: inputSegments,
12583
+ output_segments: outputSegments,
12584
+ reference_answer: referenceAnswer,
12585
+ guideline_paths: guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
12586
+ guideline_patterns: guidelinePatterns,
12587
+ file_paths: allFilePaths,
12588
+ code_snippets: codeSnippets,
12589
+ expected_outcome: outcome,
12590
+ evaluator: evalCaseEvaluatorKind,
12591
+ evaluators
12592
+ };
12593
+ if (verbose) {
12594
+ console.log(`
12595
+ [Eval Case: ${id}]`);
12596
+ if (testCase.guideline_paths.length > 0) {
12597
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
12598
+ for (const guidelinePath of testCase.guideline_paths) {
12599
+ console.log(` - ${guidelinePath}`);
12600
+ }
12439
12601
  } else {
12440
- logWarning(
12441
- `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
12442
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
12443
- );
12602
+ console.log(" No guidelines found");
12444
12603
  }
12445
12604
  }
12446
- const model = asString(rawEvaluator.model);
12447
- evaluators.push({
12448
- name,
12449
- type: "llm_judge",
12450
- prompt,
12451
- promptPath
12452
- });
12605
+ results.push(testCase);
12453
12606
  }
12454
- return evaluators.length > 0 ? evaluators : void 0;
12607
+ return results;
12455
12608
  }
12456
- function coerceEvaluator(candidate, contextId) {
12457
- if (typeof candidate !== "string") {
12458
- return void 0;
12459
- }
12460
- if (isEvaluatorKind(candidate)) {
12461
- return candidate;
12462
- }
12463
- logWarning(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
12464
- return void 0;
12609
+ function asString5(value) {
12610
+ return typeof value === "string" ? value : void 0;
12465
12611
  }
12466
- function logWarning(message, details) {
12612
+ function logWarning5(message, details) {
12467
12613
  if (details && details.length > 0) {
12468
12614
  const detailBlock = details.join("\n");
12469
- console.warn(`${ANSI_YELLOW}Warning: ${message}
12470
- ${detailBlock}${ANSI_RESET}`);
12615
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
12616
+ ${detailBlock}${ANSI_RESET5}`);
12471
12617
  } else {
12472
- console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
12618
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
12473
12619
  }
12474
12620
  }
12475
12621
  var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
@@ -12498,9 +12644,8 @@ function buildChatPrompt(request) {
12498
12644
  }
12499
12645
  function resolveSystemContent(request) {
12500
12646
  const systemSegments = [];
12501
- const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
12502
- if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
12503
- systemSegments.push(metadataSystemPrompt.trim());
12647
+ if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
12648
+ systemSegments.push(request.systemPrompt.trim());
12504
12649
  } else {
12505
12650
  systemSegments.push(DEFAULT_SYSTEM_PROMPT);
12506
12651
  }
@@ -12925,7 +13070,7 @@ function normalizeInputFiles(inputFiles) {
12925
13070
  }
12926
13071
  const unique = /* @__PURE__ */ new Map();
12927
13072
  for (const inputFile of inputFiles) {
12928
- const absolutePath = path22.resolve(inputFile);
13073
+ const absolutePath = path72.resolve(inputFile);
12929
13074
  if (!unique.has(absolutePath)) {
12930
13075
  unique.set(absolutePath, absolutePath);
12931
13076
  }
@@ -12939,7 +13084,7 @@ function formatFileList(files, template) {
12939
13084
  const formatter = template ?? "{path}";
12940
13085
  return files.map((filePath) => {
12941
13086
  const escapedPath = shellEscape(filePath);
12942
- const escapedName = shellEscape(path22.basename(filePath));
13087
+ const escapedName = shellEscape(path72.basename(filePath));
12943
13088
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
12944
13089
  }).join(" ");
12945
13090
  }
@@ -12963,7 +13108,7 @@ function generateOutputFilePath(evalCaseId) {
12963
13108
  const safeEvalId = evalCaseId || "unknown";
12964
13109
  const timestamp = Date.now();
12965
13110
  const random = Math.random().toString(36).substring(2, 9);
12966
- return path22.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
13111
+ return path72.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
12967
13112
  }
12968
13113
  function formatTimeoutSuffix(timeoutMs) {
12969
13114
  if (!timeoutMs || timeoutMs <= 0) {
@@ -13040,7 +13185,7 @@ function normalizeInputFiles2(inputFiles) {
13040
13185
  }
13041
13186
  const deduped = /* @__PURE__ */ new Map();
13042
13187
  for (const inputFile of inputFiles) {
13043
- const absolutePath = path32.resolve(inputFile);
13188
+ const absolutePath = path82.resolve(inputFile);
13044
13189
  if (!deduped.has(absolutePath)) {
13045
13190
  deduped.set(absolutePath, absolutePath);
13046
13191
  }
@@ -13053,14 +13198,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
13053
13198
  }
13054
13199
  const unique = /* @__PURE__ */ new Map();
13055
13200
  for (const inputFile of inputFiles) {
13056
- const absolutePath = path32.resolve(inputFile);
13201
+ const absolutePath = path82.resolve(inputFile);
13057
13202
  if (overrides?.has(absolutePath)) {
13058
13203
  if (!unique.has(absolutePath)) {
13059
13204
  unique.set(absolutePath, absolutePath);
13060
13205
  }
13061
13206
  continue;
13062
13207
  }
13063
- const normalized = absolutePath.split(path32.sep).join("/");
13208
+ const normalized = absolutePath.split(path82.sep).join("/");
13064
13209
  if (isGuidelineFile(normalized, guidelinePatterns)) {
13065
13210
  if (!unique.has(absolutePath)) {
13066
13211
  unique.set(absolutePath, absolutePath);
@@ -13075,7 +13220,7 @@ function collectInputFiles(inputFiles) {
13075
13220
  }
13076
13221
  const unique = /* @__PURE__ */ new Map();
13077
13222
  for (const inputFile of inputFiles) {
13078
- const absolutePath = path32.resolve(inputFile);
13223
+ const absolutePath = path82.resolve(inputFile);
13079
13224
  if (!unique.has(absolutePath)) {
13080
13225
  unique.set(absolutePath, absolutePath);
13081
13226
  }
@@ -13087,7 +13232,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
13087
13232
  return "";
13088
13233
  }
13089
13234
  const buildList = (files) => files.map((absolutePath) => {
13090
- const fileName = path32.basename(absolutePath);
13235
+ const fileName = path82.basename(absolutePath);
13091
13236
  const fileUri = pathToFileUri2(absolutePath);
13092
13237
  return `* [${fileName}](${fileUri})`;
13093
13238
  });
@@ -13107,7 +13252,7 @@ ${buildList(inputFiles).join("\n")}.`);
13107
13252
  return sections.join("\n");
13108
13253
  }
13109
13254
  function pathToFileUri2(filePath) {
13110
- const absolutePath = path32.isAbsolute(filePath) ? filePath : path32.resolve(filePath);
13255
+ const absolutePath = path82.isAbsolute(filePath) ? filePath : path82.resolve(filePath);
13111
13256
  const normalizedPath = absolutePath.replace(/\\/g, "/");
13112
13257
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
13113
13258
  return `file:///${normalizedPath}`;
@@ -13143,7 +13288,7 @@ var CodexProvider = class {
13143
13288
  const logger = await this.createStreamLogger(request).catch(() => void 0);
13144
13289
  try {
13145
13290
  const promptContent = buildPromptDocument(request, inputFiles);
13146
- const promptFile = path42.join(workspaceRoot, PROMPT_FILENAME);
13291
+ const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
13147
13292
  await writeFile3(promptFile, promptContent, "utf8");
13148
13293
  const args = this.buildCodexArgs();
13149
13294
  const cwd = this.resolveCwd(workspaceRoot);
@@ -13193,7 +13338,7 @@ var CodexProvider = class {
13193
13338
  if (!this.config.cwd) {
13194
13339
  return workspaceRoot;
13195
13340
  }
13196
- return path42.resolve(this.config.cwd);
13341
+ return path9.resolve(this.config.cwd);
13197
13342
  }
13198
13343
  buildCodexArgs() {
13199
13344
  const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
@@ -13227,7 +13372,7 @@ var CodexProvider = class {
13227
13372
  }
13228
13373
  }
13229
13374
  async createWorkspace() {
13230
- return await mkdtemp(path42.join(tmpdir(), WORKSPACE_PREFIX));
13375
+ return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
13231
13376
  }
13232
13377
  async cleanupWorkspace(workspaceRoot) {
13233
13378
  try {
@@ -13241,9 +13386,9 @@ var CodexProvider = class {
13241
13386
  return void 0;
13242
13387
  }
13243
13388
  if (this.config.logDir) {
13244
- return path42.resolve(this.config.logDir);
13389
+ return path9.resolve(this.config.logDir);
13245
13390
  }
13246
- return path42.join(process.cwd(), ".agentv", "logs", "codex");
13391
+ return path9.join(process.cwd(), ".agentv", "logs", "codex");
13247
13392
  }
13248
13393
  async createStreamLogger(request) {
13249
13394
  const logDir = this.resolveLogDirectory();
@@ -13257,7 +13402,7 @@ var CodexProvider = class {
13257
13402
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
13258
13403
  return void 0;
13259
13404
  }
13260
- const filePath = path42.join(logDir, buildLogFilename(request, this.targetName));
13405
+ const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
13261
13406
  try {
13262
13407
  const logger = await CodexStreamLogger.create({
13263
13408
  filePath,
@@ -13472,7 +13617,7 @@ function tryParseJsonValue(rawLine) {
13472
13617
  async function locateExecutable(candidate) {
13473
13618
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
13474
13619
  if (includesPathSeparator) {
13475
- const resolved = path42.isAbsolute(candidate) ? candidate : path42.resolve(candidate);
13620
+ const resolved = path9.isAbsolute(candidate) ? candidate : path9.resolve(candidate);
13476
13621
  const executablePath = await ensureWindowsExecutableVariant(resolved);
13477
13622
  await access22(executablePath, constants22.F_OK);
13478
13623
  return executablePath;
@@ -13926,6 +14071,9 @@ var VSCodeProvider = class {
13926
14071
  };
13927
14072
  function buildPromptDocument2(request, attachments, guidelinePatterns) {
13928
14073
  const parts = [];
14074
+ if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
14075
+ parts.push(request.systemPrompt.trim());
14076
+ }
13929
14077
  const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
13930
14078
  const attachmentFiles = collectAttachmentFiles(attachments);
13931
14079
  const nonGuidelineAttachments = attachmentFiles.filter(
@@ -13943,7 +14091,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
13943
14091
  return "";
13944
14092
  }
13945
14093
  const buildList = (files) => files.map((absolutePath) => {
13946
- const fileName = path52.basename(absolutePath);
14094
+ const fileName = path10.basename(absolutePath);
13947
14095
  const fileUri = pathToFileUri22(absolutePath);
13948
14096
  return `* [${fileName}](${fileUri})`;
13949
14097
  });
@@ -13968,8 +14116,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
13968
14116
  }
13969
14117
  const unique = /* @__PURE__ */ new Map();
13970
14118
  for (const attachment of attachments) {
13971
- const absolutePath = path52.resolve(attachment);
13972
- const normalized = absolutePath.split(path52.sep).join("/");
14119
+ const absolutePath = path10.resolve(attachment);
14120
+ const normalized = absolutePath.split(path10.sep).join("/");
13973
14121
  if (isGuidelineFile(normalized, guidelinePatterns)) {
13974
14122
  if (!unique.has(absolutePath)) {
13975
14123
  unique.set(absolutePath, absolutePath);
@@ -13984,7 +14132,7 @@ function collectAttachmentFiles(attachments) {
13984
14132
  }
13985
14133
  const unique = /* @__PURE__ */ new Map();
13986
14134
  for (const attachment of attachments) {
13987
- const absolutePath = path52.resolve(attachment);
14135
+ const absolutePath = path10.resolve(attachment);
13988
14136
  if (!unique.has(absolutePath)) {
13989
14137
  unique.set(absolutePath, absolutePath);
13990
14138
  }
@@ -13992,7 +14140,7 @@ function collectAttachmentFiles(attachments) {
13992
14140
  return Array.from(unique.values());
13993
14141
  }
13994
14142
  function pathToFileUri22(filePath) {
13995
- const absolutePath = path52.isAbsolute(filePath) ? filePath : path52.resolve(filePath);
14143
+ const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
13996
14144
  const normalizedPath = absolutePath.replace(/\\/g, "/");
13997
14145
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
13998
14146
  return `file:///${normalizedPath}`;
@@ -14005,7 +14153,7 @@ function normalizeAttachments(attachments) {
14005
14153
  }
14006
14154
  const deduped = /* @__PURE__ */ new Set();
14007
14155
  for (const attachment of attachments) {
14008
- deduped.add(path52.resolve(attachment));
14156
+ deduped.add(path10.resolve(attachment));
14009
14157
  }
14010
14158
  return Array.from(deduped);
14011
14159
  }
@@ -14014,7 +14162,7 @@ function mergeAttachments(all) {
14014
14162
  for (const list of all) {
14015
14163
  if (!list) continue;
14016
14164
  for (const inputFile of list) {
14017
- deduped.add(path52.resolve(inputFile));
14165
+ deduped.add(path10.resolve(inputFile));
14018
14166
  }
14019
14167
  }
14020
14168
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -14111,12 +14259,12 @@ async function fileExists3(filePath) {
14111
14259
  }
14112
14260
  }
14113
14261
  async function readTargetDefinitions(filePath) {
14114
- const absolutePath = path62.resolve(filePath);
14262
+ const absolutePath = path11.resolve(filePath);
14115
14263
  if (!await fileExists3(absolutePath)) {
14116
14264
  throw new Error(`targets.yaml not found at ${absolutePath}`);
14117
14265
  }
14118
- const raw = await readFile22(absolutePath, "utf8");
14119
- const parsed = parse22(raw);
14266
+ const raw = await readFile5(absolutePath, "utf8");
14267
+ const parsed = parse32(raw);
14120
14268
  if (!isRecord(parsed)) {
14121
14269
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
14122
14270
  }
@@ -14151,17 +14299,34 @@ function createProvider(target) {
14151
14299
  }
14152
14300
  }
14153
14301
  }
14302
+ var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
14303
+
14304
+ Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
14305
+
14306
+ Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
14307
+
14308
+ [[ ## expected_outcome ## ]]
14309
+ {{expected_outcome}}
14310
+
14311
+ [[ ## question ## ]]
14312
+ {{question}}
14313
+
14314
+ [[ ## reference_answer ## ]]
14315
+ {{reference_answer}}
14316
+
14317
+ [[ ## candidate_answer ## ]]
14318
+ {{candidate_answer}}`;
14154
14319
  var LlmJudgeEvaluator = class {
14155
14320
  kind = "llm_judge";
14156
14321
  resolveJudgeProvider;
14157
14322
  maxOutputTokens;
14158
14323
  temperature;
14159
- customPrompt;
14324
+ evaluatorTemplate;
14160
14325
  constructor(options) {
14161
14326
  this.resolveJudgeProvider = options.resolveJudgeProvider;
14162
14327
  this.maxOutputTokens = options.maxOutputTokens;
14163
14328
  this.temperature = options.temperature;
14164
- this.customPrompt = options.customPrompt;
14329
+ this.evaluatorTemplate = options.evaluatorTemplate;
14165
14330
  }
14166
14331
  async evaluate(context2) {
14167
14332
  const judgeProvider = await this.resolveJudgeProvider(context2);
@@ -14171,26 +14336,21 @@ var LlmJudgeEvaluator = class {
14171
14336
  return this.evaluateWithPrompt(context2, judgeProvider);
14172
14337
  }
14173
14338
  async evaluateWithPrompt(context2, judgeProvider) {
14174
- const hasReferenceAnswer = hasNonEmptyReferenceAnswer(context2.evalCase);
14175
14339
  const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
14176
- let prompt = buildQualityPrompt(context2.evalCase, context2.candidate, formattedQuestion);
14177
- let systemPrompt = context2.systemPrompt ?? this.customPrompt ?? buildSystemPrompt(hasReferenceAnswer);
14178
- if (systemPrompt && hasTemplateVariables(systemPrompt)) {
14179
- const variables = {
14180
- input_messages: JSON.stringify(context2.evalCase.input_segments, null, 2),
14181
- output_messages: JSON.stringify(context2.evalCase.output_segments, null, 2),
14182
- candidate_answer: context2.candidate,
14183
- reference_answer: context2.evalCase.reference_answer ?? "",
14184
- expected_outcome: context2.evalCase.expected_outcome,
14185
- question: formattedQuestion
14186
- };
14187
- prompt = substituteVariables(systemPrompt, variables);
14188
- systemPrompt = buildSystemPrompt(hasReferenceAnswer);
14189
- }
14190
- const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
14340
+ const variables = {
14341
+ input_messages: JSON.stringify(context2.evalCase.input_segments, null, 2),
14342
+ output_messages: JSON.stringify(context2.evalCase.output_segments, null, 2),
14343
+ candidate_answer: context2.candidate.trim(),
14344
+ reference_answer: (context2.evalCase.reference_answer ?? "").trim(),
14345
+ expected_outcome: context2.evalCase.expected_outcome.trim(),
14346
+ question: formattedQuestion.trim()
14347
+ };
14348
+ const systemPrompt = buildOutputSchema();
14349
+ const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
14350
+ const userPrompt = substituteVariables(evaluatorTemplate, variables);
14191
14351
  const response = await judgeProvider.invoke({
14192
- question: prompt,
14193
- metadata,
14352
+ question: userPrompt,
14353
+ systemPrompt,
14194
14354
  evalCaseId: context2.evalCase.id,
14195
14355
  attempt: context2.attempt,
14196
14356
  maxOutputTokens: this.maxOutputTokens,
@@ -14203,11 +14363,9 @@ var LlmJudgeEvaluator = class {
14203
14363
  const reasoning = parsed.reasoning ?? response.reasoning;
14204
14364
  const expectedAspectCount = Math.max(hits.length + misses.length, 1);
14205
14365
  const evaluatorRawRequest = {
14206
- id: randomUUID2(),
14207
- provider: judgeProvider.id,
14208
- prompt,
14209
- target: context2.target.name,
14210
- ...systemPrompt !== void 0 && { systemPrompt }
14366
+ userPrompt,
14367
+ systemPrompt,
14368
+ target: judgeProvider.targetName
14211
14369
  };
14212
14370
  return {
14213
14371
  score,
@@ -14219,20 +14377,8 @@ var LlmJudgeEvaluator = class {
14219
14377
  };
14220
14378
  }
14221
14379
  };
14222
- function buildSystemPrompt(hasReferenceAnswer) {
14223
- const basePrompt = [
14224
- "You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
14225
- ""
14226
- ];
14227
- if (hasReferenceAnswer) {
14228
- basePrompt.push(
14229
- "Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.",
14230
- ""
14231
- );
14232
- }
14233
- basePrompt.push(
14234
- "Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
14235
- "",
14380
+ function buildOutputSchema() {
14381
+ return [
14236
14382
  "You must respond with a single JSON object matching this schema:",
14237
14383
  "",
14238
14384
  "{",
@@ -14241,30 +14387,7 @@ function buildSystemPrompt(hasReferenceAnswer) {
14241
14387
  ' "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
14242
14388
  ' "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
14243
14389
  "}"
14244
- );
14245
- return basePrompt.join("\n");
14246
- }
14247
- function buildQualityPrompt(evalCase, candidate, question) {
14248
- const parts = [
14249
- "[[ ## expected_outcome ## ]]",
14250
- evalCase.expected_outcome.trim(),
14251
- "",
14252
- "[[ ## question ## ]]",
14253
- question.trim(),
14254
- ""
14255
- ];
14256
- if (hasNonEmptyReferenceAnswer(evalCase)) {
14257
- parts.push(
14258
- "[[ ## reference_answer ## ]]",
14259
- evalCase.reference_answer.trim(),
14260
- ""
14261
- );
14262
- }
14263
- parts.push(
14264
- "[[ ## candidate_answer ## ]]",
14265
- candidate.trim()
14266
- );
14267
- return parts.join("\n");
14390
+ ].join("\n");
14268
14391
  }
14269
14392
  function clampScore(value) {
14270
14393
  if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -14346,9 +14469,6 @@ function extractJsonBlob(text) {
14346
14469
  function isNonEmptyString(value) {
14347
14470
  return typeof value === "string" && value.trim().length > 0;
14348
14471
  }
14349
- function hasNonEmptyReferenceAnswer(evalCase) {
14350
- return evalCase.reference_answer !== void 0 && evalCase.reference_answer.trim().length > 0;
14351
- }
14352
14472
  var CodeEvaluator = class {
14353
14473
  kind = "code";
14354
14474
  script;
@@ -14454,11 +14574,8 @@ function parseJsonSafe(payload) {
14454
14574
  return void 0;
14455
14575
  }
14456
14576
  }
14457
- function hasTemplateVariables(text) {
14458
- return /\$\{[a-zA-Z0-9_]+\}/.test(text);
14459
- }
14460
14577
  function substituteVariables(template, variables) {
14461
- return template.replace(/\$\{([a-zA-Z0-9_]+)\}/g, (match, varName) => {
14578
+ return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
14462
14579
  return variables[varName] ?? match;
14463
14580
  });
14464
14581
  }
@@ -15018,6 +15135,7 @@ async function evaluateCandidate(options) {
15018
15135
  }
15019
15136
  }
15020
15137
  return {
15138
+ timestamp: completedAt.toISOString(),
15021
15139
  eval_id: evalCase.id,
15022
15140
  dataset: evalCase.dataset,
15023
15141
  conversation_id: evalCase.conversation_id,
@@ -15025,14 +15143,12 @@ async function evaluateCandidate(options) {
15025
15143
  hits: score.hits,
15026
15144
  misses: score.misses,
15027
15145
  candidate_answer: candidate,
15028
- expected_aspect_count: score.expectedAspectCount,
15029
15146
  target: target.name,
15030
- timestamp: completedAt.toISOString(),
15031
15147
  reasoning: score.reasoning,
15032
15148
  raw_aspects: score.rawAspects,
15033
15149
  agent_provider_request: agentProviderRequest,
15034
15150
  lm_provider_request: lmProviderRequest,
15035
- evaluator_raw_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
15151
+ evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
15036
15152
  evaluator_results: evaluatorResults
15037
15153
  };
15038
15154
  }
@@ -15109,7 +15225,7 @@ async function runEvaluatorList(options) {
15109
15225
  hits: score2.hits,
15110
15226
  misses: score2.misses,
15111
15227
  reasoning: score2.reasoning,
15112
- evaluator_raw_request: score2.evaluatorRawRequest
15228
+ evaluator_provider_request: score2.evaluatorRawRequest
15113
15229
  });
15114
15230
  continue;
15115
15231
  }
@@ -15136,7 +15252,7 @@ async function runEvaluatorList(options) {
15136
15252
  hits: score2.hits,
15137
15253
  misses: score2.misses,
15138
15254
  reasoning: score2.reasoning,
15139
- evaluator_raw_request: score2.evaluatorRawRequest
15255
+ evaluator_provider_request: score2.evaluatorRawRequest
15140
15256
  });
15141
15257
  continue;
15142
15258
  }
@@ -15189,7 +15305,7 @@ async function runLlmJudgeEvaluator(options) {
15189
15305
  promptInputs,
15190
15306
  now,
15191
15307
  judgeProvider,
15192
- systemPrompt: customPrompt,
15308
+ evaluatorTemplateOverride: customPrompt,
15193
15309
  evaluator: config
15194
15310
  });
15195
15311
  }
@@ -15230,8 +15346,8 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
15230
15346
  async function dumpPrompt(directory, evalCase, promptInputs) {
15231
15347
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
15232
15348
  const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
15233
- const filePath = path72.resolve(directory, filename);
15234
- await mkdir22(path72.dirname(filePath), { recursive: true });
15349
+ const filePath = path12.resolve(directory, filename);
15350
+ await mkdir22(path12.dirname(filePath), { recursive: true });
15235
15351
  const payload = {
15236
15352
  eval_id: evalCase.id,
15237
15353
  question: promptInputs.question,
@@ -15245,7 +15361,7 @@ function sanitizeFilename(value) {
15245
15361
  return "prompt";
15246
15362
  }
15247
15363
  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
15248
- return sanitized.length > 0 ? sanitized : randomUUID3();
15364
+ return sanitized.length > 0 ? sanitized : randomUUID2();
15249
15365
  }
15250
15366
  async function invokeProvider(provider, options) {
15251
15367
  const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -15301,6 +15417,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
15301
15417
  }
15302
15418
  }
15303
15419
  return {
15420
+ timestamp: timestamp.toISOString(),
15304
15421
  eval_id: evalCase.id,
15305
15422
  dataset: evalCase.dataset,
15306
15423
  conversation_id: evalCase.conversation_id,
@@ -15308,9 +15425,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
15308
15425
  hits: [],
15309
15426
  misses: [`Error: ${message}`],
15310
15427
  candidate_answer: `Error occurred: ${message}`,
15311
- expected_aspect_count: 0,
15312
15428
  target: targetName,
15313
- timestamp: timestamp.toISOString(),
15314
15429
  raw_aspects: [],
15315
15430
  agent_provider_request: agentProviderRequest,
15316
15431
  lm_provider_request: lmProviderRequest,
@@ -15352,19 +15467,19 @@ function createAgentKernel() {
15352
15467
  // src/commands/eval/run-eval.ts
15353
15468
  import { constants as constants6 } from "node:fs";
15354
15469
  import { access as access6, mkdir as mkdir6 } from "node:fs/promises";
15355
- import path14 from "node:path";
15470
+ import path18 from "node:path";
15356
15471
  import { pathToFileURL } from "node:url";
15357
15472
 
15358
15473
  // src/commands/eval/env.ts
15359
15474
  import { config as loadDotenv } from "dotenv";
15360
15475
  import { constants as constants4 } from "node:fs";
15361
15476
  import { access as access4 } from "node:fs/promises";
15362
- import path9 from "node:path";
15477
+ import path13 from "node:path";
15363
15478
  function uniqueDirs(directories) {
15364
15479
  const seen = /* @__PURE__ */ new Set();
15365
15480
  const result = [];
15366
15481
  for (const dir of directories) {
15367
- const absolute = path9.resolve(dir);
15482
+ const absolute = path13.resolve(dir);
15368
15483
  if (seen.has(absolute)) {
15369
15484
  continue;
15370
15485
  }
@@ -15383,14 +15498,14 @@ async function fileExists4(filePath) {
15383
15498
  }
15384
15499
  function collectAncestorDirectories(start, boundary) {
15385
15500
  const directories = [];
15386
- const boundaryDir = path9.resolve(boundary);
15387
- let current = path9.resolve(start);
15501
+ const boundaryDir = path13.resolve(boundary);
15502
+ let current = path13.resolve(start);
15388
15503
  while (current !== void 0) {
15389
15504
  directories.push(current);
15390
15505
  if (current === boundaryDir) {
15391
15506
  break;
15392
15507
  }
15393
- const parent = path9.dirname(current);
15508
+ const parent = path13.dirname(current);
15394
15509
  if (parent === current) {
15395
15510
  break;
15396
15511
  }
@@ -15400,7 +15515,7 @@ function collectAncestorDirectories(start, boundary) {
15400
15515
  }
15401
15516
  async function loadEnvFromHierarchy(options) {
15402
15517
  const { testFilePath, repoRoot, verbose } = options;
15403
- const testDir = path9.dirname(path9.resolve(testFilePath));
15518
+ const testDir = path13.dirname(path13.resolve(testFilePath));
15404
15519
  const cwd = process.cwd();
15405
15520
  const searchDirs = uniqueDirs([
15406
15521
  ...collectAncestorDirectories(testDir, repoRoot),
@@ -15408,7 +15523,7 @@ async function loadEnvFromHierarchy(options) {
15408
15523
  cwd
15409
15524
  ]);
15410
15525
  for (const dir of searchDirs) {
15411
- const candidate = path9.join(dir, ".env");
15526
+ const candidate = path13.join(dir, ".env");
15412
15527
  if (await fileExists4(candidate)) {
15413
15528
  loadDotenv({ path: candidate, override: false });
15414
15529
  if (verbose) {
@@ -15632,7 +15747,7 @@ var Mutex = class {
15632
15747
  // src/commands/eval/jsonl-writer.ts
15633
15748
  import { createWriteStream as createWriteStream2 } from "node:fs";
15634
15749
  import { mkdir as mkdir4 } from "node:fs/promises";
15635
- import path10 from "node:path";
15750
+ import path14 from "node:path";
15636
15751
  import { finished } from "node:stream/promises";
15637
15752
  var JsonlWriter = class _JsonlWriter {
15638
15753
  stream;
@@ -15642,7 +15757,7 @@ var JsonlWriter = class _JsonlWriter {
15642
15757
  this.stream = stream;
15643
15758
  }
15644
15759
  static async open(filePath) {
15645
- await mkdir4(path10.dirname(filePath), { recursive: true });
15760
+ await mkdir4(path14.dirname(filePath), { recursive: true });
15646
15761
  const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
15647
15762
  return new _JsonlWriter(stream);
15648
15763
  }
@@ -15674,7 +15789,7 @@ var JsonlWriter = class _JsonlWriter {
15674
15789
  // src/commands/eval/yaml-writer.ts
15675
15790
  import { createWriteStream as createWriteStream3 } from "node:fs";
15676
15791
  import { mkdir as mkdir5 } from "node:fs/promises";
15677
- import path11 from "node:path";
15792
+ import path15 from "node:path";
15678
15793
  import { finished as finished2 } from "node:stream/promises";
15679
15794
  import { stringify as stringifyYaml } from "yaml";
15680
15795
  var YamlWriter = class _YamlWriter {
@@ -15686,7 +15801,7 @@ var YamlWriter = class _YamlWriter {
15686
15801
  this.stream = stream;
15687
15802
  }
15688
15803
  static async open(filePath) {
15689
- await mkdir5(path11.dirname(filePath), { recursive: true });
15804
+ await mkdir5(path15.dirname(filePath), { recursive: true });
15690
15805
  const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
15691
15806
  return new _YamlWriter(stream);
15692
15807
  }
@@ -15808,12 +15923,12 @@ var ProgressDisplay = class {
15808
15923
  }
15809
15924
  addLogPaths(paths) {
15810
15925
  const newPaths = [];
15811
- for (const path19 of paths) {
15812
- if (this.logPathSet.has(path19)) {
15926
+ for (const path25 of paths) {
15927
+ if (this.logPathSet.has(path25)) {
15813
15928
  continue;
15814
15929
  }
15815
- this.logPathSet.add(path19);
15816
- newPaths.push(path19);
15930
+ this.logPathSet.add(path25);
15931
+ newPaths.push(path25);
15817
15932
  }
15818
15933
  if (newPaths.length === 0) {
15819
15934
  return;
@@ -15829,8 +15944,8 @@ var ProgressDisplay = class {
15829
15944
  this.hasPrintedLogHeader = true;
15830
15945
  }
15831
15946
  const startIndex = this.logPaths.length - newPaths.length;
15832
- newPaths.forEach((path19, offset) => {
15833
- console.log(`${startIndex + offset + 1}. ${path19}`);
15947
+ newPaths.forEach((path25, offset) => {
15948
+ console.log(`${startIndex + offset + 1}. ${path25}`);
15834
15949
  });
15835
15950
  }
15836
15951
  scheduleRender() {
@@ -15878,8 +15993,8 @@ var ProgressDisplay = class {
15878
15993
  if (this.logPaths.length > 0) {
15879
15994
  lines.push("");
15880
15995
  lines.push("Codex CLI logs:");
15881
- this.logPaths.forEach((path19, index) => {
15882
- lines.push(`${index + 1}. ${path19}`);
15996
+ this.logPaths.forEach((path25, index) => {
15997
+ lines.push(`${index + 1}. ${path25}`);
15883
15998
  });
15884
15999
  }
15885
16000
  const rowCount = this.getRenderedRowCount(lines);
@@ -16084,17 +16199,17 @@ function formatEvaluationSummary(summary) {
16084
16199
  }
16085
16200
 
16086
16201
  // ../../packages/core/dist/evaluation/validation/index.js
16087
- import { readFile as readFile4 } from "node:fs/promises";
16202
+ import { readFile as readFile6 } from "node:fs/promises";
16088
16203
  import { parse as parse4 } from "yaml";
16089
16204
  import { readFile as readFile23 } from "node:fs/promises";
16090
- import path12 from "node:path";
16205
+ import path16 from "node:path";
16091
16206
  import { parse as parse23 } from "yaml";
16092
- import { readFile as readFile32 } from "node:fs/promises";
16207
+ import { readFile as readFile33 } from "node:fs/promises";
16093
16208
  import path23 from "node:path";
16094
- import { parse as parse32 } from "yaml";
16209
+ import { parse as parse33 } from "yaml";
16095
16210
  import { readFile as readFile42 } from "node:fs/promises";
16096
16211
  import { parse as parse42 } from "yaml";
16097
- import { readFile as readFile5 } from "node:fs/promises";
16212
+ import { readFile as readFile52 } from "node:fs/promises";
16098
16213
  import path33 from "node:path";
16099
16214
  import { parse as parse5 } from "yaml";
16100
16215
  var SCHEMA_EVAL_V22 = "agentv-eval-v2";
@@ -16102,7 +16217,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
16102
16217
  var SCHEMA_CONFIG_V22 = "agentv-config-v2";
16103
16218
  async function detectFileType(filePath) {
16104
16219
  try {
16105
- const content = await readFile4(filePath, "utf8");
16220
+ const content = await readFile6(filePath, "utf8");
16106
16221
  const parsed = parse4(content);
16107
16222
  if (typeof parsed !== "object" || parsed === null) {
16108
16223
  return "unknown";
@@ -16132,7 +16247,7 @@ function isObject(value) {
16132
16247
  }
16133
16248
  async function validateEvalFile(filePath) {
16134
16249
  const errors = [];
16135
- const absolutePath = path12.resolve(filePath);
16250
+ const absolutePath = path16.resolve(filePath);
16136
16251
  let parsed;
16137
16252
  try {
16138
16253
  const content = await readFile23(absolutePath, "utf8");
@@ -16497,8 +16612,8 @@ async function validateTargetsFile(filePath) {
16497
16612
  const absolutePath = path23.resolve(filePath);
16498
16613
  let parsed;
16499
16614
  try {
16500
- const content = await readFile32(absolutePath, "utf8");
16501
- parsed = parse32(content);
16615
+ const content = await readFile33(absolutePath, "utf8");
16616
+ parsed = parse33(content);
16502
16617
  } catch (error) {
16503
16618
  errors.push({
16504
16619
  severity: "error",
@@ -16849,7 +16964,7 @@ async function validateFileReferences(evalFilePath) {
16849
16964
  const searchRoots = buildSearchRoots(absolutePath, gitRoot);
16850
16965
  let parsed;
16851
16966
  try {
16852
- const content = await readFile5(absolutePath, "utf8");
16967
+ const content = await readFile52(absolutePath, "utf8");
16853
16968
  parsed = parse5(content);
16854
16969
  } catch {
16855
16970
  return errors;
@@ -16919,7 +17034,7 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
16919
17034
  });
16920
17035
  } else {
16921
17036
  try {
16922
- const fileContent = await readFile5(resolvedPath, "utf8");
17037
+ const fileContent = await readFile52(resolvedPath, "utf8");
16923
17038
  if (fileContent.trim().length === 0) {
16924
17039
  errors.push({
16925
17040
  severity: "warning",
@@ -16944,16 +17059,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
16944
17059
  // src/commands/eval/targets.ts
16945
17060
  import { constants as constants5 } from "node:fs";
16946
17061
  import { access as access5 } from "node:fs/promises";
16947
- import path13 from "node:path";
17062
+ import path17 from "node:path";
16948
17063
  var TARGET_FILE_CANDIDATES = [
16949
17064
  "targets.yaml",
16950
17065
  "targets.yml",
16951
- path13.join(".agentv", "targets.yaml"),
16952
- path13.join(".agentv", "targets.yml")
17066
+ path17.join(".agentv", "targets.yaml"),
17067
+ path17.join(".agentv", "targets.yml")
16953
17068
  ];
16954
- var ANSI_YELLOW2 = "\x1B[33m";
17069
+ var ANSI_YELLOW6 = "\x1B[33m";
16955
17070
  var ANSI_RED = "\x1B[31m";
16956
- var ANSI_RESET2 = "\x1B[0m";
17071
+ var ANSI_RESET6 = "\x1B[0m";
16957
17072
  function isTTY() {
16958
17073
  return process.stdout.isTTY ?? false;
16959
17074
  }
@@ -16972,12 +17087,12 @@ async function readTestSuiteTarget(testFilePath) {
16972
17087
  async function discoverTargetsFile(options) {
16973
17088
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
16974
17089
  if (explicitPath) {
16975
- const resolvedExplicit = path13.resolve(explicitPath);
17090
+ const resolvedExplicit = path17.resolve(explicitPath);
16976
17091
  if (await fileExists5(resolvedExplicit)) {
16977
17092
  return resolvedExplicit;
16978
17093
  }
16979
17094
  for (const candidate of TARGET_FILE_CANDIDATES) {
16980
- const nested = path13.join(resolvedExplicit, candidate);
17095
+ const nested = path17.join(resolvedExplicit, candidate);
16981
17096
  if (await fileExists5(nested)) {
16982
17097
  return nested;
16983
17098
  }
@@ -16985,13 +17100,13 @@ async function discoverTargetsFile(options) {
16985
17100
  throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
16986
17101
  }
16987
17102
  const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
16988
- const resolvedCwd = path13.resolve(cwd);
17103
+ const resolvedCwd = path17.resolve(cwd);
16989
17104
  if (!directories.includes(resolvedCwd)) {
16990
17105
  directories.push(resolvedCwd);
16991
17106
  }
16992
17107
  for (const directory of directories) {
16993
17108
  for (const candidate of TARGET_FILE_CANDIDATES) {
16994
- const fullPath = path13.join(directory, candidate);
17109
+ const fullPath = path17.join(directory, candidate);
16995
17110
  if (await fileExists5(fullPath)) {
16996
17111
  return fullPath;
16997
17112
  }
@@ -17026,8 +17141,8 @@ async function selectTarget(options) {
17026
17141
  Warnings in ${targetsFilePath}:`);
17027
17142
  for (const warning of warnings) {
17028
17143
  const location = warning.location ? ` [${warning.location}]` : "";
17029
- const prefix = useColors ? `${ANSI_YELLOW2} \u26A0${ANSI_RESET2}` : " \u26A0";
17030
- const message = useColors ? `${ANSI_YELLOW2}${warning.message}${ANSI_RESET2}` : warning.message;
17144
+ const prefix = useColors ? `${ANSI_YELLOW6} \u26A0${ANSI_RESET6}` : " \u26A0";
17145
+ const message = useColors ? `${ANSI_YELLOW6}${warning.message}${ANSI_RESET6}` : warning.message;
17031
17146
  console.warn(`${prefix}${location} ${message}`);
17032
17147
  }
17033
17148
  console.warn("");
@@ -17038,8 +17153,8 @@ Warnings in ${targetsFilePath}:`);
17038
17153
  Errors in ${targetsFilePath}:`);
17039
17154
  for (const error of errors) {
17040
17155
  const location = error.location ? ` [${error.location}]` : "";
17041
- const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET2}` : " \u2717";
17042
- const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET2}` : error.message;
17156
+ const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET6}` : " \u2717";
17157
+ const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET6}` : error.message;
17043
17158
  console.error(`${prefix}${location} ${message}`);
17044
17159
  }
17045
17160
  throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
@@ -17143,15 +17258,15 @@ async function ensureFileExists(filePath, description) {
17143
17258
  }
17144
17259
  }
17145
17260
  async function findRepoRoot(start) {
17146
- const fallback = path14.resolve(start);
17261
+ const fallback = path18.resolve(start);
17147
17262
  let current = fallback;
17148
17263
  while (current !== void 0) {
17149
- const candidate = path14.join(current, ".git");
17264
+ const candidate = path18.join(current, ".git");
17150
17265
  try {
17151
17266
  await access6(candidate, constants6.F_OK);
17152
17267
  return current;
17153
17268
  } catch {
17154
- const parent = path14.dirname(current);
17269
+ const parent = path18.dirname(current);
17155
17270
  if (parent === current) {
17156
17271
  break;
17157
17272
  }
@@ -17164,16 +17279,16 @@ function buildDefaultOutputPath(cwd, format) {
17164
17279
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
17165
17280
  const baseName = "eval";
17166
17281
  const extension = getDefaultExtension(format);
17167
- return path14.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
17282
+ return path18.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
17168
17283
  }
17169
17284
  function resolvePromptDirectory(option, cwd) {
17170
17285
  if (option === void 0) {
17171
17286
  return void 0;
17172
17287
  }
17173
17288
  if (typeof option === "string" && option.trim().length > 0) {
17174
- return path14.resolve(cwd, option);
17289
+ return path18.resolve(cwd, option);
17175
17290
  }
17176
- return path14.join(cwd, ".agentv", "prompts");
17291
+ return path18.join(cwd, ".agentv", "prompts");
17177
17292
  }
17178
17293
  function createEvaluationCache() {
17179
17294
  const store = /* @__PURE__ */ new Map();
@@ -17198,7 +17313,7 @@ function createProgressReporter(maxWorkers) {
17198
17313
  };
17199
17314
  }
17200
17315
  function makeEvalKey(testFilePath, evalId) {
17201
- return `${path14.resolve(testFilePath)}::${evalId}`;
17316
+ return `${path18.resolve(testFilePath)}::${evalId}`;
17202
17317
  }
17203
17318
  function createDisplayIdTracker() {
17204
17319
  const map = /* @__PURE__ */ new Map();
@@ -17351,7 +17466,7 @@ async function runEvalCommand(input) {
17351
17466
  if (options.verbose) {
17352
17467
  console.log(`Repository root: ${repoRoot}`);
17353
17468
  }
17354
- const outputPath = options.outPath ? path14.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
17469
+ const outputPath = options.outPath ? path18.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
17355
17470
  console.log(`Output path: ${outputPath}`);
17356
17471
  const outputWriter = await createOutputWriter(outputPath, options.format);
17357
17472
  const cache = options.cache ? createEvaluationCache() : void 0;
@@ -17359,7 +17474,7 @@ async function runEvalCommand(input) {
17359
17474
  const allResults = [];
17360
17475
  let lastPromptDumpDir;
17361
17476
  const seenEvalCases = /* @__PURE__ */ new Set();
17362
- const resolvedTestFiles = input.testFiles.map((file) => path14.resolve(file));
17477
+ const resolvedTestFiles = input.testFiles.map((file) => path18.resolve(file));
17363
17478
  const displayIdTracker = createDisplayIdTracker();
17364
17479
  const totalWorkers = options.workers ?? DEFAULT_WORKERS;
17365
17480
  const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
@@ -17451,7 +17566,7 @@ async function resolveEvaluationRunner() {
17451
17566
  if (!overridePath) {
17452
17567
  return runEvaluation;
17453
17568
  }
17454
- const resolved = path14.isAbsolute(overridePath) ? overridePath : path14.resolve(process.cwd(), overridePath);
17569
+ const resolved = path18.isAbsolute(overridePath) ? overridePath : path18.resolve(process.cwd(), overridePath);
17455
17570
  const moduleUrl = pathToFileURL(resolved).href;
17456
17571
  const mod = await import(moduleUrl);
17457
17572
  const candidate = mod.runEvaluation;
@@ -17522,7 +17637,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
17522
17637
  const unmatched = [];
17523
17638
  const results = /* @__PURE__ */ new Set();
17524
17639
  for (const pattern of normalizedInputs) {
17525
- const candidatePath = path15.isAbsolute(pattern) ? path15.normalize(pattern) : path15.resolve(cwd, pattern);
17640
+ const candidatePath = path19.isAbsolute(pattern) ? path19.normalize(pattern) : path19.resolve(cwd, pattern);
17526
17641
  try {
17527
17642
  const stats = await stat3(candidatePath);
17528
17643
  if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
@@ -17545,7 +17660,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
17545
17660
  unmatched.push(pattern);
17546
17661
  continue;
17547
17662
  }
17548
- yamlMatches.forEach((filePath) => results.add(path15.normalize(filePath)));
17663
+ yamlMatches.forEach((filePath) => results.add(path19.normalize(filePath)));
17549
17664
  }
17550
17665
  if (unmatched.length > 0) {
17551
17666
  throw new Error(
@@ -17561,27 +17676,30 @@ async function resolveEvalPaths(evalPaths, cwd) {
17561
17676
 
17562
17677
  // src/commands/init/index.ts
17563
17678
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
17564
- import path17 from "node:path";
17679
+ import path21 from "node:path";
17565
17680
  import * as readline from "node:readline/promises";
17566
17681
 
17567
17682
  // src/templates/index.ts
17568
17683
  import { readFileSync, readdirSync, statSync } from "node:fs";
17569
- import path16 from "node:path";
17570
- import { fileURLToPath as fileURLToPath2 } from "node:url";
17684
+ import path20 from "node:path";
17685
+ import { fileURLToPath } from "node:url";
17571
17686
  var TemplateManager = class {
17572
17687
  static getGithubTemplates() {
17573
- return this.getTemplatesFromDir("github");
17688
+ return this.getTemplatesFromDir(".github");
17574
17689
  }
17575
17690
  static getAgentvTemplates() {
17576
- return this.getTemplatesFromDir("agentv");
17691
+ return this.getTemplatesFromDir(".agentv");
17692
+ }
17693
+ static getClaudeTemplates() {
17694
+ return this.getTemplatesFromDir(".claude");
17577
17695
  }
17578
17696
  static getTemplatesFromDir(subdir) {
17579
- const currentDir = path16.dirname(fileURLToPath2(import.meta.url));
17697
+ const currentDir = path20.dirname(fileURLToPath(import.meta.url));
17580
17698
  let templatesDir;
17581
- if (currentDir.includes(path16.sep + "dist")) {
17582
- templatesDir = path16.join(currentDir, "templates", subdir);
17699
+ if (currentDir.includes(path20.sep + "dist")) {
17700
+ templatesDir = path20.join(currentDir, "templates", subdir);
17583
17701
  } else {
17584
- templatesDir = path16.join(currentDir, subdir);
17702
+ templatesDir = path20.join(currentDir, subdir);
17585
17703
  }
17586
17704
  return this.readTemplatesRecursively(templatesDir, "");
17587
17705
  }
@@ -17589,15 +17707,15 @@ var TemplateManager = class {
17589
17707
  const templates = [];
17590
17708
  const entries = readdirSync(dir);
17591
17709
  for (const entry of entries) {
17592
- const fullPath = path16.join(dir, entry);
17710
+ const fullPath = path20.join(dir, entry);
17593
17711
  const stat5 = statSync(fullPath);
17594
- const entryRelativePath = relativePath ? path16.join(relativePath, entry) : entry;
17712
+ const entryRelativePath = relativePath ? path20.join(relativePath, entry) : entry;
17595
17713
  if (stat5.isDirectory()) {
17596
17714
  templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
17597
17715
  } else {
17598
17716
  const content = readFileSync(fullPath, "utf-8");
17599
17717
  templates.push({
17600
- path: entryRelativePath.split(path16.sep).join("/"),
17718
+ path: entryRelativePath.split(path20.sep).join("/"),
17601
17719
  // Normalize to forward slashes
17602
17720
  content
17603
17721
  });
@@ -17621,25 +17739,35 @@ async function promptYesNo(message) {
17621
17739
  }
17622
17740
  }
17623
17741
  async function initCommand(options = {}) {
17624
- const targetPath = path17.resolve(options.targetPath ?? ".");
17625
- const githubDir = path17.join(targetPath, ".github");
17626
- const agentvDir = path17.join(targetPath, ".agentv");
17742
+ const targetPath = path21.resolve(options.targetPath ?? ".");
17743
+ const githubDir = path21.join(targetPath, ".github");
17744
+ const agentvDir = path21.join(targetPath, ".agentv");
17745
+ const claudeDir = path21.join(targetPath, ".claude");
17627
17746
  const githubTemplates = TemplateManager.getGithubTemplates();
17628
17747
  const agentvTemplates = TemplateManager.getAgentvTemplates();
17748
+ const claudeTemplates = TemplateManager.getClaudeTemplates();
17629
17749
  const existingFiles = [];
17630
17750
  if (existsSync(githubDir)) {
17631
17751
  for (const template of githubTemplates) {
17632
- const targetFilePath = path17.join(githubDir, template.path);
17752
+ const targetFilePath = path21.join(githubDir, template.path);
17633
17753
  if (existsSync(targetFilePath)) {
17634
- existingFiles.push(path17.relative(targetPath, targetFilePath));
17754
+ existingFiles.push(path21.relative(targetPath, targetFilePath));
17635
17755
  }
17636
17756
  }
17637
17757
  }
17638
17758
  if (existsSync(agentvDir)) {
17639
17759
  for (const template of agentvTemplates) {
17640
- const targetFilePath = path17.join(agentvDir, template.path);
17760
+ const targetFilePath = path21.join(agentvDir, template.path);
17761
+ if (existsSync(targetFilePath)) {
17762
+ existingFiles.push(path21.relative(targetPath, targetFilePath));
17763
+ }
17764
+ }
17765
+ }
17766
+ if (existsSync(claudeDir)) {
17767
+ for (const template of claudeTemplates) {
17768
+ const targetFilePath = path21.join(claudeDir, template.path);
17641
17769
  if (existsSync(targetFilePath)) {
17642
- existingFiles.push(path17.relative(targetPath, targetFilePath));
17770
+ existingFiles.push(path21.relative(targetPath, targetFilePath));
17643
17771
  }
17644
17772
  }
17645
17773
  }
@@ -17660,31 +17788,46 @@ async function initCommand(options = {}) {
17660
17788
  if (!existsSync(agentvDir)) {
17661
17789
  mkdirSync(agentvDir, { recursive: true });
17662
17790
  }
17791
+ if (!existsSync(claudeDir)) {
17792
+ mkdirSync(claudeDir, { recursive: true });
17793
+ }
17663
17794
  for (const template of githubTemplates) {
17664
- const targetFilePath = path17.join(githubDir, template.path);
17665
- const targetDirPath = path17.dirname(targetFilePath);
17795
+ const targetFilePath = path21.join(githubDir, template.path);
17796
+ const targetDirPath = path21.dirname(targetFilePath);
17666
17797
  if (!existsSync(targetDirPath)) {
17667
17798
  mkdirSync(targetDirPath, { recursive: true });
17668
17799
  }
17669
17800
  writeFileSync(targetFilePath, template.content, "utf-8");
17670
- console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
17801
+ console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
17671
17802
  }
17672
17803
  for (const template of agentvTemplates) {
17673
- const targetFilePath = path17.join(agentvDir, template.path);
17674
- const targetDirPath = path17.dirname(targetFilePath);
17804
+ const targetFilePath = path21.join(agentvDir, template.path);
17805
+ const targetDirPath = path21.dirname(targetFilePath);
17806
+ if (!existsSync(targetDirPath)) {
17807
+ mkdirSync(targetDirPath, { recursive: true });
17808
+ }
17809
+ writeFileSync(targetFilePath, template.content, "utf-8");
17810
+ console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
17811
+ }
17812
+ for (const template of claudeTemplates) {
17813
+ const targetFilePath = path21.join(claudeDir, template.path);
17814
+ const targetDirPath = path21.dirname(targetFilePath);
17675
17815
  if (!existsSync(targetDirPath)) {
17676
17816
  mkdirSync(targetDirPath, { recursive: true });
17677
17817
  }
17678
17818
  writeFileSync(targetFilePath, template.content, "utf-8");
17679
- console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
17819
+ console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
17680
17820
  }
17681
17821
  console.log("\nAgentV initialized successfully!");
17682
17822
  console.log(`
17683
- Files installed to ${path17.relative(targetPath, githubDir)}:`);
17823
+ Files installed to ${path21.relative(targetPath, githubDir)}:`);
17684
17824
  githubTemplates.forEach((t) => console.log(` - ${t.path}`));
17685
17825
  console.log(`
17686
- Files installed to ${path17.relative(targetPath, agentvDir)}:`);
17826
+ Files installed to ${path21.relative(targetPath, agentvDir)}:`);
17687
17827
  agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
17828
+ console.log(`
17829
+ Files installed to ${path21.relative(targetPath, claudeDir)}:`);
17830
+ claudeTemplates.forEach((t) => console.log(` - ${t.path}`));
17688
17831
  console.log("\nYou can now:");
17689
17832
  console.log(" 1. Edit .agentv/.env with your API credentials");
17690
17833
  console.log(" 2. Configure targets in .agentv/targets.yaml");
@@ -17702,11 +17845,11 @@ function registerStatusCommand(program) {
17702
17845
 
17703
17846
  // src/commands/validate/format-output.ts
17704
17847
  var ANSI_RED2 = "\x1B[31m";
17705
- var ANSI_YELLOW3 = "\x1B[33m";
17848
+ var ANSI_YELLOW7 = "\x1B[33m";
17706
17849
  var ANSI_GREEN = "\x1B[32m";
17707
17850
  var ANSI_CYAN = "\x1B[36m";
17708
17851
  var ANSI_BOLD = "\x1B[1m";
17709
- var ANSI_RESET3 = "\x1B[0m";
17852
+ var ANSI_RESET7 = "\x1B[0m";
17710
17853
  function formatSummary(summary, useColors) {
17711
17854
  const lines = [];
17712
17855
  lines.push("");
@@ -17722,7 +17865,7 @@ function formatSummary(summary, useColors) {
17722
17865
  }
17723
17866
  function formatHeader(text, useColors) {
17724
17867
  if (useColors) {
17725
- return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET3}`;
17868
+ return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET7}`;
17726
17869
  }
17727
17870
  return text;
17728
17871
  }
@@ -17730,7 +17873,7 @@ function formatFileResult(result, useColors) {
17730
17873
  const lines = [];
17731
17874
  const status = result.valid ? "\u2713" : "\u2717";
17732
17875
  const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
17733
- const statusText = useColors ? `${statusColor}${status}${ANSI_RESET3}` : status;
17876
+ const statusText = useColors ? `${statusColor}${status}${ANSI_RESET7}` : status;
17734
17877
  const fileName = result.filePath;
17735
17878
  lines.push(`${statusText} ${fileName}`);
17736
17879
  if (result.errors.length > 0) {
@@ -17742,8 +17885,8 @@ function formatFileResult(result, useColors) {
17742
17885
  }
17743
17886
  function formatError(error, useColors) {
17744
17887
  const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
17745
- const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW3;
17746
- const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET3}` : prefix;
17888
+ const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW7;
17889
+ const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET7}` : prefix;
17747
17890
  const location = error.location ? ` [${error.location}]` : "";
17748
17891
  return `${coloredPrefix}${location} ${error.message}`;
17749
17892
  }
@@ -17756,15 +17899,15 @@ function formatStats(summary, useColors) {
17756
17899
  (r) => r.errors.some((e) => e.severity === "warning")
17757
17900
  ).length;
17758
17901
  if (useColors) {
17759
- lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET3}`);
17760
- lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET3}`);
17902
+ lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET7}`);
17903
+ lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET7}`);
17761
17904
  if (summary.invalidFiles > 0) {
17762
- lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET3}`);
17905
+ lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET7}`);
17763
17906
  } else {
17764
17907
  lines.push(invalidText);
17765
17908
  }
17766
17909
  if (filesWithWarnings > 0) {
17767
- lines.push(`${ANSI_YELLOW3}Files with warnings: ${filesWithWarnings}${ANSI_RESET3}`);
17910
+ lines.push(`${ANSI_YELLOW7}Files with warnings: ${filesWithWarnings}${ANSI_RESET7}`);
17768
17911
  }
17769
17912
  } else {
17770
17913
  lines.push(totalText);
@@ -17783,7 +17926,7 @@ function isTTY2() {
17783
17926
  // src/commands/validate/validate-files.ts
17784
17927
  import { constants as constants7 } from "node:fs";
17785
17928
  import { access as access7, readdir as readdir3, stat as stat4 } from "node:fs/promises";
17786
- import path18 from "node:path";
17929
+ import path24 from "node:path";
17787
17930
  async function validateFiles(paths) {
17788
17931
  const filePaths = await expandPaths(paths);
17789
17932
  const results = [];
@@ -17801,7 +17944,7 @@ async function validateFiles(paths) {
17801
17944
  };
17802
17945
  }
17803
17946
  async function validateSingleFile(filePath) {
17804
- const absolutePath = path18.resolve(filePath);
17947
+ const absolutePath = path24.resolve(filePath);
17805
17948
  const fileType = await detectFileType(absolutePath);
17806
17949
  if (fileType === "unknown") {
17807
17950
  return {
@@ -17840,7 +17983,7 @@ async function validateSingleFile(filePath) {
17840
17983
  async function expandPaths(paths) {
17841
17984
  const expanded = [];
17842
17985
  for (const inputPath of paths) {
17843
- const absolutePath = path18.resolve(inputPath);
17986
+ const absolutePath = path24.resolve(inputPath);
17844
17987
  try {
17845
17988
  await access7(absolutePath, constants7.F_OK);
17846
17989
  } catch {
@@ -17864,7 +18007,7 @@ async function findYamlFiles(dirPath) {
17864
18007
  try {
17865
18008
  const entries = await readdir3(dirPath, { withFileTypes: true });
17866
18009
  for (const entry of entries) {
17867
- const fullPath = path18.join(dirPath, entry.name);
18010
+ const fullPath = path24.join(dirPath, entry.name);
17868
18011
  if (entry.isDirectory()) {
17869
18012
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
17870
18013
  continue;
@@ -17881,7 +18024,7 @@ async function findYamlFiles(dirPath) {
17881
18024
  return results;
17882
18025
  }
17883
18026
  function isYamlFile(filePath) {
17884
- const ext = path18.extname(filePath).toLowerCase();
18027
+ const ext = path24.extname(filePath).toLowerCase();
17885
18028
  return ext === ".yaml" || ext === ".yml";
17886
18029
  }
17887
18030
 
@@ -17938,4 +18081,4 @@ export {
17938
18081
  createProgram,
17939
18082
  runCli
17940
18083
  };
17941
- //# sourceMappingURL=chunk-72BHGHIT.js.map
18084
+ //# sourceMappingURL=chunk-WMO5PVPX.js.map