agentv 0.10.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-72BHGHIT.js → chunk-WMO5PVPX.js} +806 -663
- package/dist/chunk-WMO5PVPX.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/.agentv/.env.template +23 -0
- package/dist/templates/{github/prompts/eval-build.prompt.md → .claude/skills/agentv-eval-builder/SKILL.md} +57 -6
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +399 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +317 -0
- package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +70 -0
- package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +5 -0
- package/dist/templates/.github/prompts/agentv-optimize.prompt.md +4 -0
- package/package.json +2 -2
- package/dist/chunk-72BHGHIT.js.map +0 -1
- /package/dist/templates/{agentv → .agentv}/config.yaml +0 -0
- /package/dist/templates/{agentv → .agentv}/targets.yaml +0 -0
- /package/dist/templates/{github/contexts → .claude/skills/agentv-eval-builder/references}/config-schema.json +0 -0
- /package/dist/templates/{github/contexts → .claude/skills/agentv-eval-builder/references}/eval-schema.json +0 -0
|
@@ -588,9 +588,9 @@ import { readFileSync as readFileSync2 } from "node:fs";
|
|
|
588
588
|
// src/commands/eval/index.ts
|
|
589
589
|
import fg from "fast-glob";
|
|
590
590
|
import { stat as stat3 } from "node:fs/promises";
|
|
591
|
-
import
|
|
591
|
+
import path19 from "node:path";
|
|
592
592
|
|
|
593
|
-
// ../../packages/core/dist/chunk-
|
|
593
|
+
// ../../packages/core/dist/chunk-U3GEJ3K7.js
|
|
594
594
|
import { constants } from "node:fs";
|
|
595
595
|
import { access, readFile } from "node:fs/promises";
|
|
596
596
|
import path from "node:path";
|
|
@@ -1073,8 +1073,8 @@ function getErrorMap() {
|
|
|
1073
1073
|
|
|
1074
1074
|
// ../../node_modules/.pnpm/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
1075
1075
|
var makeIssue = (params) => {
|
|
1076
|
-
const { data, path:
|
|
1077
|
-
const fullPath = [...
|
|
1076
|
+
const { data, path: path25, errorMaps, issueData } = params;
|
|
1077
|
+
const fullPath = [...path25, ...issueData.path || []];
|
|
1078
1078
|
const fullIssue = {
|
|
1079
1079
|
...issueData,
|
|
1080
1080
|
path: fullPath
|
|
@@ -1190,11 +1190,11 @@ var errorUtil;
|
|
|
1190
1190
|
|
|
1191
1191
|
// ../../node_modules/.pnpm/zod@3.25.76/node_modules/zod/v3/types.js
|
|
1192
1192
|
var ParseInputLazyPath = class {
|
|
1193
|
-
constructor(parent, value,
|
|
1193
|
+
constructor(parent, value, path25, key2) {
|
|
1194
1194
|
this._cachedPath = [];
|
|
1195
1195
|
this.parent = parent;
|
|
1196
1196
|
this.data = value;
|
|
1197
|
-
this._path =
|
|
1197
|
+
this._path = path25;
|
|
1198
1198
|
this._key = key2;
|
|
1199
1199
|
}
|
|
1200
1200
|
get path() {
|
|
@@ -4636,7 +4636,7 @@ var coerce = {
|
|
|
4636
4636
|
};
|
|
4637
4637
|
var NEVER = INVALID;
|
|
4638
4638
|
|
|
4639
|
-
// ../../packages/core/dist/chunk-
|
|
4639
|
+
// ../../packages/core/dist/chunk-U3GEJ3K7.js
|
|
4640
4640
|
async function fileExists(filePath) {
|
|
4641
4641
|
try {
|
|
4642
4642
|
await access(filePath, constants.F_OK);
|
|
@@ -5288,12 +5288,21 @@ function isAgentProvider(provider) {
|
|
|
5288
5288
|
}
|
|
5289
5289
|
|
|
5290
5290
|
// ../../packages/core/dist/index.js
|
|
5291
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
5292
|
+
import path62 from "node:path";
|
|
5293
|
+
import { parse as parse22 } from "yaml";
|
|
5291
5294
|
import micromatch from "micromatch";
|
|
5295
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
5296
|
+
import path22 from "node:path";
|
|
5297
|
+
import { parse as parse3 } from "yaml";
|
|
5292
5298
|
import { constants as constants3 } from "node:fs";
|
|
5293
|
-
import { access as access3
|
|
5299
|
+
import { access as access3 } from "node:fs/promises";
|
|
5294
5300
|
import path8 from "node:path";
|
|
5295
|
-
import
|
|
5296
|
-
import {
|
|
5301
|
+
import path32 from "node:path";
|
|
5302
|
+
import { readFile as readFile22 } from "node:fs/promises";
|
|
5303
|
+
import path42 from "node:path";
|
|
5304
|
+
import { readFile as readFile32 } from "node:fs/promises";
|
|
5305
|
+
import path52 from "node:path";
|
|
5297
5306
|
|
|
5298
5307
|
// ../../node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
|
|
5299
5308
|
var _globalThis = typeof globalThis === "object" ? globalThis : global;
|
|
@@ -9747,17 +9756,17 @@ var $a = new Error("Agent definition is the prompt you give to the LLM for the a
|
|
|
9747
9756
|
import { exec as execWithCallback } from "node:child_process";
|
|
9748
9757
|
import fs from "node:fs/promises";
|
|
9749
9758
|
import os2 from "node:os";
|
|
9750
|
-
import
|
|
9759
|
+
import path72 from "node:path";
|
|
9751
9760
|
import { promisify as promisify2 } from "node:util";
|
|
9752
9761
|
import { exec as execCallback, spawn as spawn2 } from "node:child_process";
|
|
9753
9762
|
import { randomUUID } from "node:crypto";
|
|
9754
9763
|
import { constants as constants22, createWriteStream } from "node:fs";
|
|
9755
9764
|
import { access as access22, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
|
|
9756
9765
|
import { tmpdir } from "node:os";
|
|
9757
|
-
import
|
|
9766
|
+
import path9 from "node:path";
|
|
9758
9767
|
import { promisify as promisify22 } from "node:util";
|
|
9759
|
-
import
|
|
9760
|
-
import
|
|
9768
|
+
import path82 from "node:path";
|
|
9769
|
+
import path10 from "node:path";
|
|
9761
9770
|
|
|
9762
9771
|
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
9763
9772
|
import { exec, spawn } from "child_process";
|
|
@@ -11696,13 +11705,12 @@ async function provisionSubagents(options) {
|
|
|
11696
11705
|
|
|
11697
11706
|
// ../../packages/core/dist/index.js
|
|
11698
11707
|
import { constants as constants32 } from "node:fs";
|
|
11699
|
-
import { access as access32, readFile as
|
|
11700
|
-
import
|
|
11701
|
-
import { parse as
|
|
11702
|
-
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
11703
|
-
import { createHash, randomUUID as randomUUID3 } from "node:crypto";
|
|
11708
|
+
import { access as access32, readFile as readFile5 } from "node:fs/promises";
|
|
11709
|
+
import path11 from "node:path";
|
|
11710
|
+
import { parse as parse32 } from "yaml";
|
|
11711
|
+
import { createHash, randomUUID as randomUUID2 } from "node:crypto";
|
|
11704
11712
|
import { mkdir as mkdir22, writeFile as writeFile22 } from "node:fs/promises";
|
|
11705
|
-
import
|
|
11713
|
+
import path12 from "node:path";
|
|
11706
11714
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
11707
11715
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
11708
11716
|
function isTestMessageRole(value) {
|
|
@@ -11747,42 +11755,179 @@ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
|
|
|
11747
11755
|
function isEvaluatorKind(value) {
|
|
11748
11756
|
return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
|
|
11749
11757
|
}
|
|
11750
|
-
|
|
11751
|
-
|
|
11752
|
-
|
|
11753
|
-
|
|
11754
|
-
|
|
11755
|
-
|
|
11756
|
-
|
|
11757
|
-
const absolutePath = path8.resolve(testFilePath);
|
|
11758
|
-
const content = await readFile3(absolutePath, "utf8");
|
|
11759
|
-
const parsed = parse3(content);
|
|
11760
|
-
if (!isJsonObject(parsed)) {
|
|
11761
|
-
return {};
|
|
11758
|
+
function extractCodeBlocks(segments) {
|
|
11759
|
+
const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
|
|
11760
|
+
const codeBlocks = [];
|
|
11761
|
+
for (const segment of segments) {
|
|
11762
|
+
const typeValue = segment["type"];
|
|
11763
|
+
if (typeof typeValue !== "string" || typeValue !== "text") {
|
|
11764
|
+
continue;
|
|
11762
11765
|
}
|
|
11763
|
-
|
|
11766
|
+
const textValue = segment["value"];
|
|
11767
|
+
if (typeof textValue !== "string") {
|
|
11768
|
+
continue;
|
|
11769
|
+
}
|
|
11770
|
+
const matches = textValue.match(CODE_BLOCK_PATTERN);
|
|
11771
|
+
if (matches) {
|
|
11772
|
+
codeBlocks.push(...matches);
|
|
11773
|
+
}
|
|
11774
|
+
}
|
|
11775
|
+
return codeBlocks;
|
|
11776
|
+
}
|
|
11777
|
+
function formatFileContents(parts) {
|
|
11778
|
+
const fileCount = parts.filter((p) => p.isFile).length;
|
|
11779
|
+
if (fileCount > 0) {
|
|
11780
|
+
return parts.map((part) => {
|
|
11781
|
+
if (part.isFile && part.displayPath) {
|
|
11782
|
+
return `<file path="${part.displayPath}">
|
|
11783
|
+
${part.content}
|
|
11784
|
+
</file>`;
|
|
11785
|
+
}
|
|
11786
|
+
return part.content;
|
|
11787
|
+
}).join("\n\n");
|
|
11788
|
+
}
|
|
11789
|
+
return parts.map((p) => p.content).join(" ");
|
|
11790
|
+
}
|
|
11791
|
+
function formatSegment(segment) {
|
|
11792
|
+
const type = asString(segment.type);
|
|
11793
|
+
if (type === "text") {
|
|
11794
|
+
return asString(segment.value);
|
|
11795
|
+
}
|
|
11796
|
+
if (type === "guideline_ref") {
|
|
11797
|
+
const refPath = asString(segment.path);
|
|
11798
|
+
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
11799
|
+
}
|
|
11800
|
+
if (type === "file") {
|
|
11801
|
+
const text = asString(segment.text);
|
|
11802
|
+
const filePath = asString(segment.path);
|
|
11803
|
+
if (text && filePath) {
|
|
11804
|
+
return formatFileContents([{ content: text.trim(), isFile: true, displayPath: filePath }]);
|
|
11805
|
+
}
|
|
11806
|
+
}
|
|
11807
|
+
return void 0;
|
|
11808
|
+
}
|
|
11809
|
+
function hasVisibleContent(segments) {
|
|
11810
|
+
return segments.some((segment) => {
|
|
11811
|
+
const type = asString(segment.type);
|
|
11812
|
+
if (type === "text") {
|
|
11813
|
+
const value = asString(segment.value);
|
|
11814
|
+
return value !== void 0 && value.trim().length > 0;
|
|
11815
|
+
}
|
|
11816
|
+
if (type === "guideline_ref") {
|
|
11817
|
+
return false;
|
|
11818
|
+
}
|
|
11819
|
+
if (type === "file") {
|
|
11820
|
+
const text = asString(segment.text);
|
|
11821
|
+
return text !== void 0 && text.trim().length > 0;
|
|
11822
|
+
}
|
|
11823
|
+
return false;
|
|
11824
|
+
});
|
|
11825
|
+
}
|
|
11826
|
+
function asString(value) {
|
|
11827
|
+
return typeof value === "string" ? value : void 0;
|
|
11828
|
+
}
|
|
11829
|
+
async function fileExists2(absolutePath) {
|
|
11830
|
+
try {
|
|
11831
|
+
await access3(absolutePath, constants3.F_OK);
|
|
11832
|
+
return true;
|
|
11764
11833
|
} catch {
|
|
11765
|
-
return
|
|
11834
|
+
return false;
|
|
11766
11835
|
}
|
|
11767
11836
|
}
|
|
11768
|
-
function
|
|
11769
|
-
|
|
11770
|
-
|
|
11771
|
-
|
|
11772
|
-
|
|
11773
|
-
|
|
11837
|
+
function resolveToAbsolutePath(candidate) {
|
|
11838
|
+
if (candidate instanceof URL) {
|
|
11839
|
+
return new URL(candidate).pathname;
|
|
11840
|
+
}
|
|
11841
|
+
if (typeof candidate === "string") {
|
|
11842
|
+
if (candidate.startsWith("file://")) {
|
|
11843
|
+
return new URL(candidate).pathname;
|
|
11774
11844
|
}
|
|
11845
|
+
return path8.resolve(candidate);
|
|
11775
11846
|
}
|
|
11776
|
-
|
|
11777
|
-
|
|
11778
|
-
|
|
11847
|
+
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
11848
|
+
}
|
|
11849
|
+
function buildDirectoryChain2(filePath, repoRoot) {
|
|
11850
|
+
const directories = [];
|
|
11851
|
+
const seen = /* @__PURE__ */ new Set();
|
|
11852
|
+
const boundary = path8.resolve(repoRoot);
|
|
11853
|
+
let current = path8.resolve(path8.dirname(filePath));
|
|
11854
|
+
while (current !== void 0) {
|
|
11855
|
+
if (!seen.has(current)) {
|
|
11856
|
+
directories.push(current);
|
|
11857
|
+
seen.add(current);
|
|
11858
|
+
}
|
|
11859
|
+
if (current === boundary) {
|
|
11860
|
+
break;
|
|
11861
|
+
}
|
|
11862
|
+
const parent = path8.dirname(current);
|
|
11863
|
+
if (parent === current) {
|
|
11864
|
+
break;
|
|
11865
|
+
}
|
|
11866
|
+
current = parent;
|
|
11779
11867
|
}
|
|
11780
|
-
|
|
11868
|
+
if (!seen.has(boundary)) {
|
|
11869
|
+
directories.push(boundary);
|
|
11870
|
+
}
|
|
11871
|
+
return directories;
|
|
11872
|
+
}
|
|
11873
|
+
function buildSearchRoots2(evalPath, repoRoot) {
|
|
11874
|
+
const uniqueRoots = [];
|
|
11875
|
+
const addRoot = (root2) => {
|
|
11876
|
+
const normalized = path8.resolve(root2);
|
|
11877
|
+
if (!uniqueRoots.includes(normalized)) {
|
|
11878
|
+
uniqueRoots.push(normalized);
|
|
11879
|
+
}
|
|
11880
|
+
};
|
|
11881
|
+
let currentDir = path8.dirname(evalPath);
|
|
11882
|
+
let reachedBoundary = false;
|
|
11883
|
+
while (!reachedBoundary) {
|
|
11884
|
+
addRoot(currentDir);
|
|
11885
|
+
const parentDir = path8.dirname(currentDir);
|
|
11886
|
+
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
11887
|
+
reachedBoundary = true;
|
|
11888
|
+
} else {
|
|
11889
|
+
currentDir = parentDir;
|
|
11890
|
+
}
|
|
11891
|
+
}
|
|
11892
|
+
addRoot(repoRoot);
|
|
11893
|
+
addRoot(process.cwd());
|
|
11894
|
+
return uniqueRoots;
|
|
11895
|
+
}
|
|
11896
|
+
function trimLeadingSeparators2(value) {
|
|
11897
|
+
const trimmed = value.replace(/^[/\\]+/, "");
|
|
11898
|
+
return trimmed.length > 0 ? trimmed : value;
|
|
11899
|
+
}
|
|
11900
|
+
async function resolveFileReference2(rawValue, searchRoots) {
|
|
11901
|
+
const displayPath = trimLeadingSeparators2(rawValue);
|
|
11902
|
+
const potentialPaths = [];
|
|
11903
|
+
if (path8.isAbsolute(rawValue)) {
|
|
11904
|
+
potentialPaths.push(path8.normalize(rawValue));
|
|
11905
|
+
}
|
|
11906
|
+
for (const base of searchRoots) {
|
|
11907
|
+
potentialPaths.push(path8.resolve(base, displayPath));
|
|
11908
|
+
}
|
|
11909
|
+
const attempted = [];
|
|
11910
|
+
const seen = /* @__PURE__ */ new Set();
|
|
11911
|
+
for (const candidate of potentialPaths) {
|
|
11912
|
+
const absoluteCandidate = path8.resolve(candidate);
|
|
11913
|
+
if (seen.has(absoluteCandidate)) {
|
|
11914
|
+
continue;
|
|
11915
|
+
}
|
|
11916
|
+
seen.add(absoluteCandidate);
|
|
11917
|
+
attempted.push(absoluteCandidate);
|
|
11918
|
+
if (await fileExists2(absoluteCandidate)) {
|
|
11919
|
+
return { displayPath, resolvedPath: absoluteCandidate, attempted };
|
|
11920
|
+
}
|
|
11921
|
+
}
|
|
11922
|
+
return { displayPath, attempted };
|
|
11781
11923
|
}
|
|
11924
|
+
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
11925
|
+
var ANSI_YELLOW = "\x1B[33m";
|
|
11926
|
+
var ANSI_RESET = "\x1B[0m";
|
|
11782
11927
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
11783
|
-
const directories =
|
|
11928
|
+
const directories = buildDirectoryChain2(evalFilePath, repoRoot);
|
|
11784
11929
|
for (const directory of directories) {
|
|
11785
|
-
const configPath =
|
|
11930
|
+
const configPath = path22.join(directory, ".agentv", "config.yaml");
|
|
11786
11931
|
if (!await fileExists2(configPath)) {
|
|
11787
11932
|
continue;
|
|
11788
11933
|
}
|
|
@@ -11825,71 +11970,174 @@ function isGuidelineFile(filePath, patterns) {
|
|
|
11825
11970
|
const patternsToUse = patterns ?? [];
|
|
11826
11971
|
return micromatch.isMatch(normalized, patternsToUse);
|
|
11827
11972
|
}
|
|
11828
|
-
function
|
|
11829
|
-
const
|
|
11830
|
-
|
|
11831
|
-
const
|
|
11832
|
-
if (typeof
|
|
11833
|
-
|
|
11834
|
-
}
|
|
11835
|
-
const textValue = segment["value"];
|
|
11836
|
-
if (typeof textValue !== "string") {
|
|
11837
|
-
continue;
|
|
11838
|
-
}
|
|
11839
|
-
const matches = textValue.match(CODE_BLOCK_PATTERN);
|
|
11840
|
-
if (matches) {
|
|
11841
|
-
codeBlocks.push(...matches);
|
|
11973
|
+
function extractTargetFromSuite(suite) {
|
|
11974
|
+
const execution = suite.execution;
|
|
11975
|
+
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
11976
|
+
const executionTarget = execution.target;
|
|
11977
|
+
if (typeof executionTarget === "string" && executionTarget.trim().length > 0) {
|
|
11978
|
+
return executionTarget.trim();
|
|
11842
11979
|
}
|
|
11843
11980
|
}
|
|
11844
|
-
|
|
11981
|
+
const targetValue = suite.target;
|
|
11982
|
+
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
11983
|
+
return targetValue.trim();
|
|
11984
|
+
}
|
|
11985
|
+
return void 0;
|
|
11845
11986
|
}
|
|
11846
|
-
|
|
11847
|
-
|
|
11848
|
-
|
|
11849
|
-
|
|
11850
|
-
|
|
11851
|
-
|
|
11852
|
-
|
|
11853
|
-
|
|
11854
|
-
|
|
11855
|
-
|
|
11856
|
-
}
|
|
11857
|
-
|
|
11858
|
-
|
|
11859
|
-
|
|
11860
|
-
|
|
11861
|
-
|
|
11862
|
-
|
|
11863
|
-
|
|
11864
|
-
}
|
|
11987
|
+
function logWarning(message) {
|
|
11988
|
+
console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET}`);
|
|
11989
|
+
}
|
|
11990
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
11991
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
11992
|
+
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
11993
|
+
const execution = rawEvalCase.execution;
|
|
11994
|
+
const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
|
|
11995
|
+
if (candidateEvaluators === void 0) {
|
|
11996
|
+
return void 0;
|
|
11997
|
+
}
|
|
11998
|
+
if (!Array.isArray(candidateEvaluators)) {
|
|
11999
|
+
logWarning2(`Skipping evaluators for '${evalId}': expected array`);
|
|
12000
|
+
return void 0;
|
|
12001
|
+
}
|
|
12002
|
+
const evaluators = [];
|
|
12003
|
+
for (const rawEvaluator of candidateEvaluators) {
|
|
12004
|
+
if (!isJsonObject2(rawEvaluator)) {
|
|
12005
|
+
logWarning2(`Skipping invalid evaluator entry for '${evalId}' (expected object)`);
|
|
11865
12006
|
continue;
|
|
11866
12007
|
}
|
|
11867
|
-
|
|
11868
|
-
|
|
12008
|
+
const name = asString2(rawEvaluator.name);
|
|
12009
|
+
const typeValue = rawEvaluator.type;
|
|
12010
|
+
if (!name || !isEvaluatorKind(typeValue)) {
|
|
12011
|
+
logWarning2(`Skipping evaluator with invalid name/type in '${evalId}'`);
|
|
12012
|
+
continue;
|
|
12013
|
+
}
|
|
12014
|
+
if (typeValue === "code") {
|
|
12015
|
+
const script = asString2(rawEvaluator.script);
|
|
12016
|
+
if (!script) {
|
|
12017
|
+
logWarning2(`Skipping code evaluator '${name}' in '${evalId}': missing script`);
|
|
11869
12018
|
continue;
|
|
11870
12019
|
}
|
|
11871
|
-
const
|
|
11872
|
-
|
|
11873
|
-
|
|
11874
|
-
|
|
11875
|
-
|
|
12020
|
+
const cwd = asString2(rawEvaluator.cwd);
|
|
12021
|
+
let resolvedCwd;
|
|
12022
|
+
if (cwd) {
|
|
12023
|
+
const resolved = await resolveFileReference2(cwd, searchRoots);
|
|
12024
|
+
if (resolved.resolvedPath) {
|
|
12025
|
+
resolvedCwd = path32.resolve(resolved.resolvedPath);
|
|
12026
|
+
} else {
|
|
12027
|
+
logWarning2(
|
|
12028
|
+
`Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
12029
|
+
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
12030
|
+
);
|
|
11876
12031
|
}
|
|
11877
|
-
|
|
11878
|
-
|
|
11879
|
-
|
|
11880
|
-
|
|
11881
|
-
|
|
11882
|
-
|
|
12032
|
+
} else {
|
|
12033
|
+
resolvedCwd = searchRoots[0];
|
|
12034
|
+
}
|
|
12035
|
+
evaluators.push({
|
|
12036
|
+
name,
|
|
12037
|
+
type: "code",
|
|
12038
|
+
script,
|
|
12039
|
+
cwd,
|
|
12040
|
+
resolvedCwd
|
|
12041
|
+
});
|
|
12042
|
+
continue;
|
|
12043
|
+
}
|
|
12044
|
+
const prompt = asString2(rawEvaluator.prompt);
|
|
12045
|
+
let promptPath;
|
|
12046
|
+
if (prompt) {
|
|
12047
|
+
const resolved = await resolveFileReference2(prompt, searchRoots);
|
|
12048
|
+
if (resolved.resolvedPath) {
|
|
12049
|
+
promptPath = path32.resolve(resolved.resolvedPath);
|
|
12050
|
+
} else {
|
|
12051
|
+
logWarning2(
|
|
12052
|
+
`Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
|
|
12053
|
+
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
12054
|
+
);
|
|
12055
|
+
}
|
|
12056
|
+
}
|
|
12057
|
+
const _model = asString2(rawEvaluator.model);
|
|
12058
|
+
evaluators.push({
|
|
12059
|
+
name,
|
|
12060
|
+
type: "llm_judge",
|
|
12061
|
+
prompt,
|
|
12062
|
+
promptPath
|
|
12063
|
+
});
|
|
12064
|
+
}
|
|
12065
|
+
return evaluators.length > 0 ? evaluators : void 0;
|
|
12066
|
+
}
|
|
12067
|
+
function coerceEvaluator(candidate, contextId) {
|
|
12068
|
+
if (typeof candidate !== "string") {
|
|
12069
|
+
return void 0;
|
|
12070
|
+
}
|
|
12071
|
+
if (isEvaluatorKind(candidate)) {
|
|
12072
|
+
return candidate;
|
|
12073
|
+
}
|
|
12074
|
+
logWarning2(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
|
|
12075
|
+
return void 0;
|
|
12076
|
+
}
|
|
12077
|
+
function asString2(value) {
|
|
12078
|
+
return typeof value === "string" ? value : void 0;
|
|
12079
|
+
}
|
|
12080
|
+
function isJsonObject2(value) {
|
|
12081
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
12082
|
+
}
|
|
12083
|
+
function logWarning2(message, details) {
|
|
12084
|
+
if (details && details.length > 0) {
|
|
12085
|
+
const detailBlock = details.join("\n");
|
|
12086
|
+
console.warn(`${ANSI_YELLOW2}Warning: ${message}
|
|
12087
|
+
${detailBlock}${ANSI_RESET2}`);
|
|
12088
|
+
} else {
|
|
12089
|
+
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
|
|
12090
|
+
}
|
|
12091
|
+
}
|
|
12092
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
12093
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
12094
|
+
async function processMessages(options) {
|
|
12095
|
+
const {
|
|
12096
|
+
messages,
|
|
12097
|
+
searchRoots,
|
|
12098
|
+
repoRootPath,
|
|
12099
|
+
guidelinePatterns,
|
|
12100
|
+
guidelinePaths,
|
|
12101
|
+
textParts,
|
|
12102
|
+
messageType,
|
|
12103
|
+
verbose
|
|
12104
|
+
} = options;
|
|
12105
|
+
const segments = [];
|
|
12106
|
+
for (const message of messages) {
|
|
12107
|
+
const content = message.content;
|
|
12108
|
+
if (typeof content === "string") {
|
|
12109
|
+
segments.push({ type: "text", value: content });
|
|
12110
|
+
if (textParts) {
|
|
12111
|
+
textParts.push(content);
|
|
12112
|
+
}
|
|
12113
|
+
continue;
|
|
12114
|
+
}
|
|
12115
|
+
for (const rawSegment of content) {
|
|
12116
|
+
if (!isJsonObject(rawSegment)) {
|
|
12117
|
+
continue;
|
|
12118
|
+
}
|
|
12119
|
+
const segmentType = asString3(rawSegment.type);
|
|
12120
|
+
if (segmentType === "file") {
|
|
12121
|
+
const rawValue = asString3(rawSegment.value);
|
|
12122
|
+
if (!rawValue) {
|
|
12123
|
+
continue;
|
|
12124
|
+
}
|
|
12125
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
12126
|
+
rawValue,
|
|
12127
|
+
searchRoots
|
|
12128
|
+
);
|
|
12129
|
+
if (!resolvedPath) {
|
|
12130
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
11883
12131
|
const context2 = messageType === "input" ? "" : " in expected_messages";
|
|
11884
|
-
|
|
12132
|
+
logWarning3(`File not found${context2}: ${displayPath}`, attempts);
|
|
11885
12133
|
continue;
|
|
11886
12134
|
}
|
|
11887
12135
|
try {
|
|
11888
|
-
const fileContent = (await
|
|
12136
|
+
const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
11889
12137
|
if (messageType === "input" && guidelinePatterns && guidelinePaths) {
|
|
11890
|
-
const relativeToRepo =
|
|
12138
|
+
const relativeToRepo = path42.relative(repoRootPath, resolvedPath);
|
|
11891
12139
|
if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
|
|
11892
|
-
guidelinePaths.push(
|
|
12140
|
+
guidelinePaths.push(path42.resolve(resolvedPath));
|
|
11893
12141
|
if (verbose) {
|
|
11894
12142
|
console.log(` [Guideline] Found: ${displayPath}`);
|
|
11895
12143
|
console.log(` Resolved to: ${resolvedPath}`);
|
|
@@ -11901,7 +12149,7 @@ async function processMessages(options) {
|
|
|
11901
12149
|
type: "file",
|
|
11902
12150
|
path: displayPath,
|
|
11903
12151
|
text: fileContent,
|
|
11904
|
-
resolvedPath:
|
|
12152
|
+
resolvedPath: path42.resolve(resolvedPath)
|
|
11905
12153
|
});
|
|
11906
12154
|
if (verbose) {
|
|
11907
12155
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -11910,7 +12158,7 @@ async function processMessages(options) {
|
|
|
11910
12158
|
}
|
|
11911
12159
|
} catch (error) {
|
|
11912
12160
|
const context2 = messageType === "input" ? "" : " expected output";
|
|
11913
|
-
|
|
12161
|
+
logWarning3(`Could not read${context2} file ${resolvedPath}: ${error.message}`);
|
|
11914
12162
|
}
|
|
11915
12163
|
continue;
|
|
11916
12164
|
}
|
|
@@ -11924,202 +12172,116 @@ async function processMessages(options) {
|
|
|
11924
12172
|
}
|
|
11925
12173
|
return segments;
|
|
11926
12174
|
}
|
|
11927
|
-
async function
|
|
11928
|
-
|
|
11929
|
-
|
|
11930
|
-
const absoluteTestPath = path8.resolve(evalFilePath);
|
|
11931
|
-
if (!await fileExists2(absoluteTestPath)) {
|
|
11932
|
-
throw new Error(`Test file not found: ${evalFilePath}`);
|
|
11933
|
-
}
|
|
11934
|
-
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
11935
|
-
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
11936
|
-
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
11937
|
-
const guidelinePatterns = config?.guideline_patterns;
|
|
11938
|
-
const rawFile = await readFile3(absoluteTestPath, "utf8");
|
|
11939
|
-
const parsed = parse3(rawFile);
|
|
11940
|
-
if (!isJsonObject(parsed)) {
|
|
11941
|
-
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
11942
|
-
}
|
|
11943
|
-
const suite = parsed;
|
|
11944
|
-
const datasetNameFromSuite = asString(suite.dataset)?.trim();
|
|
11945
|
-
const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
11946
|
-
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
11947
|
-
const schema = suite.$schema;
|
|
11948
|
-
if (schema !== SCHEMA_EVAL_V2) {
|
|
11949
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
|
|
11950
|
-
Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
11951
|
-
throw new Error(message);
|
|
12175
|
+
async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
12176
|
+
if (typeof content === "string") {
|
|
12177
|
+
return content;
|
|
11952
12178
|
}
|
|
11953
|
-
|
|
11954
|
-
|
|
11955
|
-
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
12179
|
+
if (!content) {
|
|
12180
|
+
return "";
|
|
11956
12181
|
}
|
|
11957
|
-
const
|
|
11958
|
-
const
|
|
11959
|
-
|
|
11960
|
-
|
|
11961
|
-
for (const rawEvalcase of rawTestcases) {
|
|
11962
|
-
if (!isJsonObject(rawEvalcase)) {
|
|
11963
|
-
logWarning("Skipping invalid eval case entry (expected object)");
|
|
12182
|
+
const parts = [];
|
|
12183
|
+
for (const entry of content) {
|
|
12184
|
+
if (typeof entry === "string") {
|
|
12185
|
+
parts.push({ content: entry, isFile: false });
|
|
11964
12186
|
continue;
|
|
11965
12187
|
}
|
|
11966
|
-
|
|
11967
|
-
const id = asString(evalcase.id);
|
|
11968
|
-
if (evalIdFilter && id !== evalIdFilter) {
|
|
12188
|
+
if (!isJsonObject(entry)) {
|
|
11969
12189
|
continue;
|
|
11970
12190
|
}
|
|
11971
|
-
const
|
|
11972
|
-
|
|
11973
|
-
|
|
11974
|
-
|
|
11975
|
-
|
|
11976
|
-
|
|
12191
|
+
const segmentType = asString3(entry.type);
|
|
12192
|
+
if (segmentType === "file") {
|
|
12193
|
+
const rawValue = asString3(entry.value);
|
|
12194
|
+
if (!rawValue) {
|
|
12195
|
+
continue;
|
|
12196
|
+
}
|
|
12197
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
12198
|
+
rawValue,
|
|
12199
|
+
searchRoots
|
|
12200
|
+
);
|
|
12201
|
+
if (!resolvedPath) {
|
|
12202
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
12203
|
+
logWarning3(`File not found in expected_messages: ${displayPath}`, attempts);
|
|
12204
|
+
continue;
|
|
12205
|
+
}
|
|
12206
|
+
try {
|
|
12207
|
+
const fileContent = (await readFile22(resolvedPath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
12208
|
+
parts.push({ content: fileContent, isFile: true, displayPath });
|
|
12209
|
+
if (verbose) {
|
|
12210
|
+
console.log(` [Expected Assistant File] Found: ${displayPath}`);
|
|
12211
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
12212
|
+
}
|
|
12213
|
+
} catch (error) {
|
|
12214
|
+
logWarning3(`Could not read file ${resolvedPath}: ${error.message}`);
|
|
12215
|
+
}
|
|
11977
12216
|
continue;
|
|
11978
12217
|
}
|
|
11979
|
-
const
|
|
11980
|
-
|
|
11981
|
-
|
|
11982
|
-
if (hasExpectedMessages && expectedMessages.length === 0) {
|
|
11983
|
-
logWarning(`No valid expected message found for eval case: ${id}`);
|
|
12218
|
+
const textValue = asString3(entry.text);
|
|
12219
|
+
if (typeof textValue === "string") {
|
|
12220
|
+
parts.push({ content: textValue, isFile: false });
|
|
11984
12221
|
continue;
|
|
11985
12222
|
}
|
|
11986
|
-
|
|
11987
|
-
|
|
11988
|
-
|
|
11989
|
-
|
|
11990
|
-
const inputTextParts = [];
|
|
11991
|
-
const inputSegments = await processMessages({
|
|
11992
|
-
messages: inputMessages,
|
|
11993
|
-
searchRoots,
|
|
11994
|
-
repoRootPath,
|
|
11995
|
-
guidelinePatterns,
|
|
11996
|
-
guidelinePaths,
|
|
11997
|
-
textParts: inputTextParts,
|
|
11998
|
-
messageType: "input",
|
|
11999
|
-
verbose
|
|
12000
|
-
});
|
|
12001
|
-
const outputSegments = hasExpectedMessages ? await processMessages({
|
|
12002
|
-
messages: expectedMessages,
|
|
12003
|
-
searchRoots,
|
|
12004
|
-
repoRootPath,
|
|
12005
|
-
guidelinePatterns,
|
|
12006
|
-
messageType: "output",
|
|
12007
|
-
verbose
|
|
12008
|
-
}) : [];
|
|
12009
|
-
const codeSnippets = extractCodeBlocks(inputSegments);
|
|
12010
|
-
const expectedContent = expectedMessages[0]?.content;
|
|
12011
|
-
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
12012
|
-
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
12013
|
-
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
12014
|
-
const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
12015
|
-
const userFilePaths = [];
|
|
12016
|
-
for (const segment of inputSegments) {
|
|
12017
|
-
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
12018
|
-
userFilePaths.push(segment.resolvedPath);
|
|
12019
|
-
}
|
|
12020
|
-
}
|
|
12021
|
-
const allFilePaths = [
|
|
12022
|
-
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
12023
|
-
...userFilePaths
|
|
12024
|
-
];
|
|
12025
|
-
const testCase = {
|
|
12026
|
-
id,
|
|
12027
|
-
dataset: datasetName,
|
|
12028
|
-
conversation_id: conversationId,
|
|
12029
|
-
question,
|
|
12030
|
-
input_messages: inputMessages,
|
|
12031
|
-
input_segments: inputSegments,
|
|
12032
|
-
output_segments: outputSegments,
|
|
12033
|
-
reference_answer: referenceAnswer,
|
|
12034
|
-
guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
12035
|
-
guideline_patterns: guidelinePatterns,
|
|
12036
|
-
file_paths: allFilePaths,
|
|
12037
|
-
code_snippets: codeSnippets,
|
|
12038
|
-
expected_outcome: outcome,
|
|
12039
|
-
evaluator: evalCaseEvaluatorKind,
|
|
12040
|
-
evaluators
|
|
12041
|
-
};
|
|
12042
|
-
if (verbose) {
|
|
12043
|
-
console.log(`
|
|
12044
|
-
[Eval Case: ${id}]`);
|
|
12045
|
-
if (testCase.guideline_paths.length > 0) {
|
|
12046
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
12047
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
12048
|
-
console.log(` - ${guidelinePath}`);
|
|
12049
|
-
}
|
|
12050
|
-
} else {
|
|
12051
|
-
console.log(" No guidelines found");
|
|
12052
|
-
}
|
|
12223
|
+
const valueValue = asString3(entry.value);
|
|
12224
|
+
if (typeof valueValue === "string") {
|
|
12225
|
+
parts.push({ content: valueValue, isFile: false });
|
|
12226
|
+
continue;
|
|
12053
12227
|
}
|
|
12054
|
-
|
|
12228
|
+
parts.push({ content: JSON.stringify(entry), isFile: false });
|
|
12055
12229
|
}
|
|
12056
|
-
return
|
|
12230
|
+
return formatFileContents(parts);
|
|
12057
12231
|
}
|
|
12058
|
-
function
|
|
12059
|
-
|
|
12060
|
-
return true;
|
|
12061
|
-
}
|
|
12062
|
-
let messagesWithContent = 0;
|
|
12063
|
-
for (const segments of processedSegmentsByMessage) {
|
|
12064
|
-
if (hasVisibleContent(segments)) {
|
|
12065
|
-
messagesWithContent++;
|
|
12066
|
-
}
|
|
12067
|
-
}
|
|
12068
|
-
return messagesWithContent > 1;
|
|
12232
|
+
function asString3(value) {
|
|
12233
|
+
return typeof value === "string" ? value : void 0;
|
|
12069
12234
|
}
|
|
12070
|
-
function
|
|
12071
|
-
|
|
12072
|
-
|
|
12073
|
-
if (type === "text") {
|
|
12074
|
-
const value = asString(segment.value);
|
|
12075
|
-
return value !== void 0 && value.trim().length > 0;
|
|
12076
|
-
}
|
|
12077
|
-
if (type === "guideline_ref") {
|
|
12078
|
-
return false;
|
|
12079
|
-
}
|
|
12080
|
-
if (type === "file") {
|
|
12081
|
-
const text = asString(segment.text);
|
|
12082
|
-
return text !== void 0 && text.trim().length > 0;
|
|
12083
|
-
}
|
|
12084
|
-
return false;
|
|
12085
|
-
});
|
|
12235
|
+
function cloneJsonObject(source2) {
|
|
12236
|
+
const entries = Object.entries(source2).map(([key2, value]) => [key2, cloneJsonValue(value)]);
|
|
12237
|
+
return Object.fromEntries(entries);
|
|
12086
12238
|
}
|
|
12087
|
-
function
|
|
12088
|
-
|
|
12089
|
-
|
|
12090
|
-
return asString(segment.value);
|
|
12239
|
+
function cloneJsonValue(value) {
|
|
12240
|
+
if (value === null) {
|
|
12241
|
+
return null;
|
|
12091
12242
|
}
|
|
12092
|
-
if (
|
|
12093
|
-
|
|
12094
|
-
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
12243
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
12244
|
+
return value;
|
|
12095
12245
|
}
|
|
12096
|
-
if (
|
|
12097
|
-
|
|
12098
|
-
const filePath = asString(segment.path);
|
|
12099
|
-
if (text && filePath) {
|
|
12100
|
-
return `=== ${filePath} ===
|
|
12101
|
-
${text}`;
|
|
12102
|
-
}
|
|
12246
|
+
if (Array.isArray(value)) {
|
|
12247
|
+
return value.map((item) => cloneJsonValue(item));
|
|
12103
12248
|
}
|
|
12104
|
-
|
|
12249
|
+
if (typeof value === "object") {
|
|
12250
|
+
return cloneJsonObject(value);
|
|
12251
|
+
}
|
|
12252
|
+
return value;
|
|
12105
12253
|
}
|
|
12254
|
+
function logWarning3(message, details) {
|
|
12255
|
+
if (details && details.length > 0) {
|
|
12256
|
+
const detailBlock = details.join("\n");
|
|
12257
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}
|
|
12258
|
+
${detailBlock}${ANSI_RESET3}`);
|
|
12259
|
+
} else {
|
|
12260
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
|
|
12261
|
+
}
|
|
12262
|
+
}
|
|
12263
|
+
var ANSI_YELLOW4 = "\x1B[33m";
|
|
12264
|
+
var ANSI_RESET4 = "\x1B[0m";
|
|
12106
12265
|
async function buildPromptInputs(testCase) {
|
|
12107
|
-
const
|
|
12266
|
+
const guidelineParts = [];
|
|
12108
12267
|
for (const rawPath of testCase.guideline_paths) {
|
|
12109
|
-
const absolutePath =
|
|
12268
|
+
const absolutePath = path52.resolve(rawPath);
|
|
12110
12269
|
if (!await fileExists2(absolutePath)) {
|
|
12111
|
-
|
|
12270
|
+
logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
12112
12271
|
continue;
|
|
12113
12272
|
}
|
|
12114
12273
|
try {
|
|
12115
|
-
const content = (await
|
|
12116
|
-
|
|
12117
|
-
|
|
12274
|
+
const content = (await readFile32(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
12275
|
+
guidelineParts.push({
|
|
12276
|
+
content,
|
|
12277
|
+
isFile: true,
|
|
12278
|
+
displayPath: path52.basename(absolutePath)
|
|
12279
|
+
});
|
|
12118
12280
|
} catch (error) {
|
|
12119
|
-
|
|
12281
|
+
logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
12120
12282
|
}
|
|
12121
12283
|
}
|
|
12122
|
-
const guidelines =
|
|
12284
|
+
const guidelines = formatFileContents(guidelineParts);
|
|
12123
12285
|
const segmentsByMessage = [];
|
|
12124
12286
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
12125
12287
|
for (const segment of testCase.input_segments) {
|
|
@@ -12140,9 +12302,9 @@ ${content}`);
|
|
|
12140
12302
|
messageSegments.push({ type: "text", value: segment });
|
|
12141
12303
|
}
|
|
12142
12304
|
} else if (isJsonObject(segment)) {
|
|
12143
|
-
const type =
|
|
12305
|
+
const type = asString4(segment.type);
|
|
12144
12306
|
if (type === "file") {
|
|
12145
|
-
const value =
|
|
12307
|
+
const value = asString4(segment.value);
|
|
12146
12308
|
if (!value) continue;
|
|
12147
12309
|
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
12148
12310
|
messageSegments.push({ type: "guideline_ref", path: value });
|
|
@@ -12153,7 +12315,7 @@ ${content}`);
|
|
|
12153
12315
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
12154
12316
|
}
|
|
12155
12317
|
} else if (type === "text") {
|
|
12156
|
-
const textValue =
|
|
12318
|
+
const textValue = asString4(segment.value);
|
|
12157
12319
|
if (textValue && textValue.trim().length > 0) {
|
|
12158
12320
|
messageSegments.push({ type: "text", value: textValue });
|
|
12159
12321
|
}
|
|
@@ -12209,6 +12371,18 @@ ${messageContent}`);
|
|
|
12209
12371
|
}) : void 0;
|
|
12210
12372
|
return { question, guidelines, chatPrompt };
|
|
12211
12373
|
}
|
|
12374
|
+
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
12375
|
+
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
12376
|
+
return true;
|
|
12377
|
+
}
|
|
12378
|
+
let messagesWithContent = 0;
|
|
12379
|
+
for (const segments of processedSegmentsByMessage) {
|
|
12380
|
+
if (hasVisibleContent(segments)) {
|
|
12381
|
+
messagesWithContent++;
|
|
12382
|
+
}
|
|
12383
|
+
}
|
|
12384
|
+
return messagesWithContent > 1;
|
|
12385
|
+
}
|
|
12212
12386
|
function buildChatPromptFromSegments(options) {
|
|
12213
12387
|
const { messages, segmentsByMessage, guidelinePatterns, guidelineContent, systemPrompt } = options;
|
|
12214
12388
|
if (messages.length === 0) {
|
|
@@ -12259,217 +12433,189 @@ ${guidelineContent.trim()}`);
|
|
|
12259
12433
|
name = "tool";
|
|
12260
12434
|
}
|
|
12261
12435
|
for (const segment of segments) {
|
|
12262
|
-
if (segment.type === "guideline_ref") {
|
|
12263
|
-
continue;
|
|
12264
|
-
}
|
|
12265
|
-
const formatted = formatSegment(segment);
|
|
12266
|
-
if (formatted) {
|
|
12267
|
-
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
12268
|
-
if (isGuidelineRef) {
|
|
12269
|
-
continue;
|
|
12270
|
-
}
|
|
12271
|
-
contentParts.push(formatted);
|
|
12272
|
-
}
|
|
12273
|
-
}
|
|
12274
|
-
if (contentParts.length === 0) {
|
|
12275
|
-
continue;
|
|
12276
|
-
}
|
|
12277
|
-
chatPrompt.push({
|
|
12278
|
-
role,
|
|
12279
|
-
content: contentParts.join("\n"),
|
|
12280
|
-
...name ? { name } : {}
|
|
12281
|
-
});
|
|
12282
|
-
}
|
|
12283
|
-
return chatPrompt.length > 0 ? chatPrompt : void 0;
|
|
12284
|
-
}
|
|
12285
|
-
async function fileExists2(absolutePath) {
|
|
12286
|
-
try {
|
|
12287
|
-
await access3(absolutePath, constants3.F_OK);
|
|
12288
|
-
return true;
|
|
12289
|
-
} catch {
|
|
12290
|
-
return false;
|
|
12291
|
-
}
|
|
12292
|
-
}
|
|
12293
|
-
function resolveToAbsolutePath(candidate) {
|
|
12294
|
-
if (candidate instanceof URL) {
|
|
12295
|
-
return fileURLToPath(candidate);
|
|
12296
|
-
}
|
|
12297
|
-
if (typeof candidate === "string") {
|
|
12298
|
-
if (candidate.startsWith("file://")) {
|
|
12299
|
-
return fileURLToPath(new URL(candidate));
|
|
12300
|
-
}
|
|
12301
|
-
return path8.resolve(candidate);
|
|
12302
|
-
}
|
|
12303
|
-
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
12304
|
-
}
|
|
12305
|
-
function asString(value) {
|
|
12306
|
-
return typeof value === "string" ? value : void 0;
|
|
12307
|
-
}
|
|
12308
|
-
function cloneJsonObject(source2) {
|
|
12309
|
-
const entries = Object.entries(source2).map(([key2, value]) => [key2, cloneJsonValue(value)]);
|
|
12310
|
-
return Object.fromEntries(entries);
|
|
12311
|
-
}
|
|
12312
|
-
function cloneJsonValue(value) {
|
|
12313
|
-
if (value === null) {
|
|
12314
|
-
return null;
|
|
12315
|
-
}
|
|
12316
|
-
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
12317
|
-
return value;
|
|
12318
|
-
}
|
|
12319
|
-
if (Array.isArray(value)) {
|
|
12320
|
-
return value.map((item) => cloneJsonValue(item));
|
|
12321
|
-
}
|
|
12322
|
-
return cloneJsonObject(value);
|
|
12323
|
-
}
|
|
12324
|
-
async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
12325
|
-
if (typeof content === "string") {
|
|
12326
|
-
return content;
|
|
12327
|
-
}
|
|
12328
|
-
if (!content) {
|
|
12329
|
-
return "";
|
|
12330
|
-
}
|
|
12331
|
-
const parts = [];
|
|
12332
|
-
for (const entry of content) {
|
|
12333
|
-
if (typeof entry === "string") {
|
|
12334
|
-
parts.push(entry);
|
|
12335
|
-
continue;
|
|
12336
|
-
}
|
|
12337
|
-
if (!isJsonObject(entry)) {
|
|
12338
|
-
continue;
|
|
12339
|
-
}
|
|
12340
|
-
const segmentType = asString(entry.type);
|
|
12341
|
-
if (segmentType === "file") {
|
|
12342
|
-
const rawValue = asString(entry.value);
|
|
12343
|
-
if (!rawValue) {
|
|
12344
|
-
continue;
|
|
12345
|
-
}
|
|
12346
|
-
const { displayPath, resolvedPath, attempted } = await resolveFileReference(
|
|
12347
|
-
rawValue,
|
|
12348
|
-
searchRoots
|
|
12349
|
-
);
|
|
12350
|
-
if (!resolvedPath) {
|
|
12351
|
-
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
12352
|
-
logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
|
|
12436
|
+
if (segment.type === "guideline_ref") {
|
|
12353
12437
|
continue;
|
|
12354
12438
|
}
|
|
12355
|
-
|
|
12356
|
-
|
|
12357
|
-
|
|
12358
|
-
if (
|
|
12359
|
-
|
|
12360
|
-
console.log(` Resolved to: ${resolvedPath}`);
|
|
12439
|
+
const formatted = formatSegment(segment);
|
|
12440
|
+
if (formatted) {
|
|
12441
|
+
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
12442
|
+
if (isGuidelineRef) {
|
|
12443
|
+
continue;
|
|
12361
12444
|
}
|
|
12362
|
-
|
|
12363
|
-
logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
|
|
12445
|
+
contentParts.push(formatted);
|
|
12364
12446
|
}
|
|
12365
|
-
continue;
|
|
12366
12447
|
}
|
|
12367
|
-
|
|
12368
|
-
if (typeof textValue === "string") {
|
|
12369
|
-
parts.push(textValue);
|
|
12448
|
+
if (contentParts.length === 0) {
|
|
12370
12449
|
continue;
|
|
12371
12450
|
}
|
|
12372
|
-
|
|
12373
|
-
|
|
12374
|
-
|
|
12375
|
-
|
|
12451
|
+
chatPrompt.push({
|
|
12452
|
+
role,
|
|
12453
|
+
content: contentParts.join("\n"),
|
|
12454
|
+
...name ? { name } : {}
|
|
12455
|
+
});
|
|
12456
|
+
}
|
|
12457
|
+
return chatPrompt.length > 0 ? chatPrompt : void 0;
|
|
12458
|
+
}
|
|
12459
|
+
function asString4(value) {
|
|
12460
|
+
return typeof value === "string" ? value : void 0;
|
|
12461
|
+
}
|
|
12462
|
+
function logWarning4(message) {
|
|
12463
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
|
|
12464
|
+
}
|
|
12465
|
+
var ANSI_YELLOW5 = "\x1B[33m";
|
|
12466
|
+
var ANSI_RESET5 = "\x1B[0m";
|
|
12467
|
+
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
12468
|
+
async function readTestSuiteMetadata(testFilePath) {
|
|
12469
|
+
try {
|
|
12470
|
+
const absolutePath = path62.resolve(testFilePath);
|
|
12471
|
+
const content = await readFile4(absolutePath, "utf8");
|
|
12472
|
+
const parsed = parse22(content);
|
|
12473
|
+
if (!isJsonObject(parsed)) {
|
|
12474
|
+
return {};
|
|
12376
12475
|
}
|
|
12377
|
-
|
|
12476
|
+
return { target: extractTargetFromSuite(parsed) };
|
|
12477
|
+
} catch {
|
|
12478
|
+
return {};
|
|
12378
12479
|
}
|
|
12379
|
-
return parts.join(" ");
|
|
12380
12480
|
}
|
|
12381
|
-
async function
|
|
12382
|
-
const
|
|
12383
|
-
const
|
|
12384
|
-
|
|
12385
|
-
|
|
12481
|
+
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
12482
|
+
const verbose = options?.verbose ?? false;
|
|
12483
|
+
const evalIdFilter = options?.evalId;
|
|
12484
|
+
const absoluteTestPath = path62.resolve(evalFilePath);
|
|
12485
|
+
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
12486
|
+
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
12487
|
+
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
12488
|
+
const guidelinePatterns = config?.guideline_patterns;
|
|
12489
|
+
const rawFile = await readFile4(absoluteTestPath, "utf8");
|
|
12490
|
+
const parsed = parse22(rawFile);
|
|
12491
|
+
if (!isJsonObject(parsed)) {
|
|
12492
|
+
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
12386
12493
|
}
|
|
12387
|
-
|
|
12388
|
-
|
|
12389
|
-
|
|
12494
|
+
const suite = parsed;
|
|
12495
|
+
const datasetNameFromSuite = asString5(suite.dataset)?.trim();
|
|
12496
|
+
const fallbackDataset = path62.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
12497
|
+
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
12498
|
+
const schema = suite.$schema;
|
|
12499
|
+
if (schema !== SCHEMA_EVAL_V2) {
|
|
12500
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
|
|
12501
|
+
Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
12502
|
+
throw new Error(message);
|
|
12390
12503
|
}
|
|
12391
|
-
const
|
|
12392
|
-
|
|
12393
|
-
|
|
12394
|
-
|
|
12504
|
+
const rawTestcases = suite.evalcases;
|
|
12505
|
+
if (!Array.isArray(rawTestcases)) {
|
|
12506
|
+
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
12507
|
+
}
|
|
12508
|
+
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
|
|
12509
|
+
const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
12510
|
+
const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
|
|
12511
|
+
const results = [];
|
|
12512
|
+
for (const rawEvalcase of rawTestcases) {
|
|
12513
|
+
if (!isJsonObject(rawEvalcase)) {
|
|
12514
|
+
logWarning5("Skipping invalid eval case entry (expected object)");
|
|
12395
12515
|
continue;
|
|
12396
12516
|
}
|
|
12397
|
-
const
|
|
12398
|
-
const
|
|
12399
|
-
if (
|
|
12400
|
-
logWarning(`Skipping evaluator with invalid name/type in '${evalId}'`);
|
|
12517
|
+
const evalcase = rawEvalcase;
|
|
12518
|
+
const id = asString5(evalcase.id);
|
|
12519
|
+
if (evalIdFilter && id !== evalIdFilter) {
|
|
12401
12520
|
continue;
|
|
12402
12521
|
}
|
|
12403
|
-
|
|
12404
|
-
|
|
12405
|
-
|
|
12406
|
-
|
|
12407
|
-
|
|
12408
|
-
}
|
|
12409
|
-
const cwd = asString(rawEvaluator.cwd);
|
|
12410
|
-
let resolvedCwd;
|
|
12411
|
-
if (cwd) {
|
|
12412
|
-
const resolved = await resolveFileReference(cwd, searchRoots);
|
|
12413
|
-
if (resolved.resolvedPath) {
|
|
12414
|
-
resolvedCwd = path8.resolve(resolved.resolvedPath);
|
|
12415
|
-
} else {
|
|
12416
|
-
logWarning(
|
|
12417
|
-
`Code evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
12418
|
-
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
12419
|
-
);
|
|
12420
|
-
}
|
|
12421
|
-
} else {
|
|
12422
|
-
resolvedCwd = searchRoots[0];
|
|
12423
|
-
}
|
|
12424
|
-
evaluators.push({
|
|
12425
|
-
name,
|
|
12426
|
-
type: "code",
|
|
12427
|
-
script,
|
|
12428
|
-
cwd,
|
|
12429
|
-
resolvedCwd
|
|
12430
|
-
});
|
|
12522
|
+
const conversationId = asString5(evalcase.conversation_id);
|
|
12523
|
+
const outcome = asString5(evalcase.outcome);
|
|
12524
|
+
const inputMessagesValue = evalcase.input_messages;
|
|
12525
|
+
const expectedMessagesValue = evalcase.expected_messages;
|
|
12526
|
+
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
12527
|
+
logWarning5(`Skipping incomplete eval case: ${id ?? "unknown"}`);
|
|
12431
12528
|
continue;
|
|
12432
12529
|
}
|
|
12433
|
-
const
|
|
12434
|
-
|
|
12435
|
-
|
|
12436
|
-
|
|
12437
|
-
|
|
12438
|
-
|
|
12530
|
+
const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
|
|
12531
|
+
const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
|
|
12532
|
+
const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
|
|
12533
|
+
if (hasExpectedMessages && expectedMessages.length === 0) {
|
|
12534
|
+
logWarning5(`No valid expected message found for eval case: ${id}`);
|
|
12535
|
+
continue;
|
|
12536
|
+
}
|
|
12537
|
+
if (expectedMessages.length > 1) {
|
|
12538
|
+
logWarning5(`Multiple expected messages found for eval case: ${id}, using first`);
|
|
12539
|
+
}
|
|
12540
|
+
const guidelinePaths = [];
|
|
12541
|
+
const inputTextParts = [];
|
|
12542
|
+
const inputSegments = await processMessages({
|
|
12543
|
+
messages: inputMessages,
|
|
12544
|
+
searchRoots,
|
|
12545
|
+
repoRootPath,
|
|
12546
|
+
guidelinePatterns,
|
|
12547
|
+
guidelinePaths,
|
|
12548
|
+
textParts: inputTextParts,
|
|
12549
|
+
messageType: "input",
|
|
12550
|
+
verbose
|
|
12551
|
+
});
|
|
12552
|
+
const outputSegments = hasExpectedMessages ? await processMessages({
|
|
12553
|
+
messages: expectedMessages,
|
|
12554
|
+
searchRoots,
|
|
12555
|
+
repoRootPath,
|
|
12556
|
+
guidelinePatterns,
|
|
12557
|
+
messageType: "output",
|
|
12558
|
+
verbose
|
|
12559
|
+
}) : [];
|
|
12560
|
+
const codeSnippets = extractCodeBlocks(inputSegments);
|
|
12561
|
+
const expectedContent = expectedMessages[0]?.content;
|
|
12562
|
+
const referenceAnswer = expectedContent ? await resolveAssistantContent(expectedContent, searchRoots, verbose) : "";
|
|
12563
|
+
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
12564
|
+
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
12565
|
+
const evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
12566
|
+
const userFilePaths = [];
|
|
12567
|
+
for (const segment of inputSegments) {
|
|
12568
|
+
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
12569
|
+
userFilePaths.push(segment.resolvedPath);
|
|
12570
|
+
}
|
|
12571
|
+
}
|
|
12572
|
+
const allFilePaths = [
|
|
12573
|
+
...guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
|
|
12574
|
+
...userFilePaths
|
|
12575
|
+
];
|
|
12576
|
+
const testCase = {
|
|
12577
|
+
id,
|
|
12578
|
+
dataset: datasetName,
|
|
12579
|
+
conversation_id: conversationId,
|
|
12580
|
+
question,
|
|
12581
|
+
input_messages: inputMessages,
|
|
12582
|
+
input_segments: inputSegments,
|
|
12583
|
+
output_segments: outputSegments,
|
|
12584
|
+
reference_answer: referenceAnswer,
|
|
12585
|
+
guideline_paths: guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
|
|
12586
|
+
guideline_patterns: guidelinePatterns,
|
|
12587
|
+
file_paths: allFilePaths,
|
|
12588
|
+
code_snippets: codeSnippets,
|
|
12589
|
+
expected_outcome: outcome,
|
|
12590
|
+
evaluator: evalCaseEvaluatorKind,
|
|
12591
|
+
evaluators
|
|
12592
|
+
};
|
|
12593
|
+
if (verbose) {
|
|
12594
|
+
console.log(`
|
|
12595
|
+
[Eval Case: ${id}]`);
|
|
12596
|
+
if (testCase.guideline_paths.length > 0) {
|
|
12597
|
+
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
12598
|
+
for (const guidelinePath of testCase.guideline_paths) {
|
|
12599
|
+
console.log(` - ${guidelinePath}`);
|
|
12600
|
+
}
|
|
12439
12601
|
} else {
|
|
12440
|
-
|
|
12441
|
-
`Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
|
|
12442
|
-
resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
|
|
12443
|
-
);
|
|
12602
|
+
console.log(" No guidelines found");
|
|
12444
12603
|
}
|
|
12445
12604
|
}
|
|
12446
|
-
|
|
12447
|
-
evaluators.push({
|
|
12448
|
-
name,
|
|
12449
|
-
type: "llm_judge",
|
|
12450
|
-
prompt,
|
|
12451
|
-
promptPath
|
|
12452
|
-
});
|
|
12605
|
+
results.push(testCase);
|
|
12453
12606
|
}
|
|
12454
|
-
return
|
|
12607
|
+
return results;
|
|
12455
12608
|
}
|
|
12456
|
-
function
|
|
12457
|
-
|
|
12458
|
-
return void 0;
|
|
12459
|
-
}
|
|
12460
|
-
if (isEvaluatorKind(candidate)) {
|
|
12461
|
-
return candidate;
|
|
12462
|
-
}
|
|
12463
|
-
logWarning(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
|
|
12464
|
-
return void 0;
|
|
12609
|
+
function asString5(value) {
|
|
12610
|
+
return typeof value === "string" ? value : void 0;
|
|
12465
12611
|
}
|
|
12466
|
-
function
|
|
12612
|
+
function logWarning5(message, details) {
|
|
12467
12613
|
if (details && details.length > 0) {
|
|
12468
12614
|
const detailBlock = details.join("\n");
|
|
12469
|
-
console.warn(`${
|
|
12470
|
-
${detailBlock}${
|
|
12615
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}
|
|
12616
|
+
${detailBlock}${ANSI_RESET5}`);
|
|
12471
12617
|
} else {
|
|
12472
|
-
console.warn(`${
|
|
12618
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
|
|
12473
12619
|
}
|
|
12474
12620
|
}
|
|
12475
12621
|
var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
|
|
@@ -12498,9 +12644,8 @@ function buildChatPrompt(request) {
|
|
|
12498
12644
|
}
|
|
12499
12645
|
function resolveSystemContent(request) {
|
|
12500
12646
|
const systemSegments = [];
|
|
12501
|
-
|
|
12502
|
-
|
|
12503
|
-
systemSegments.push(metadataSystemPrompt.trim());
|
|
12647
|
+
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
12648
|
+
systemSegments.push(request.systemPrompt.trim());
|
|
12504
12649
|
} else {
|
|
12505
12650
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
12506
12651
|
}
|
|
@@ -12925,7 +13070,7 @@ function normalizeInputFiles(inputFiles) {
|
|
|
12925
13070
|
}
|
|
12926
13071
|
const unique = /* @__PURE__ */ new Map();
|
|
12927
13072
|
for (const inputFile of inputFiles) {
|
|
12928
|
-
const absolutePath =
|
|
13073
|
+
const absolutePath = path72.resolve(inputFile);
|
|
12929
13074
|
if (!unique.has(absolutePath)) {
|
|
12930
13075
|
unique.set(absolutePath, absolutePath);
|
|
12931
13076
|
}
|
|
@@ -12939,7 +13084,7 @@ function formatFileList(files, template) {
|
|
|
12939
13084
|
const formatter = template ?? "{path}";
|
|
12940
13085
|
return files.map((filePath) => {
|
|
12941
13086
|
const escapedPath = shellEscape(filePath);
|
|
12942
|
-
const escapedName = shellEscape(
|
|
13087
|
+
const escapedName = shellEscape(path72.basename(filePath));
|
|
12943
13088
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
12944
13089
|
}).join(" ");
|
|
12945
13090
|
}
|
|
@@ -12963,7 +13108,7 @@ function generateOutputFilePath(evalCaseId) {
|
|
|
12963
13108
|
const safeEvalId = evalCaseId || "unknown";
|
|
12964
13109
|
const timestamp = Date.now();
|
|
12965
13110
|
const random = Math.random().toString(36).substring(2, 9);
|
|
12966
|
-
return
|
|
13111
|
+
return path72.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
|
|
12967
13112
|
}
|
|
12968
13113
|
function formatTimeoutSuffix(timeoutMs) {
|
|
12969
13114
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -13040,7 +13185,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
13040
13185
|
}
|
|
13041
13186
|
const deduped = /* @__PURE__ */ new Map();
|
|
13042
13187
|
for (const inputFile of inputFiles) {
|
|
13043
|
-
const absolutePath =
|
|
13188
|
+
const absolutePath = path82.resolve(inputFile);
|
|
13044
13189
|
if (!deduped.has(absolutePath)) {
|
|
13045
13190
|
deduped.set(absolutePath, absolutePath);
|
|
13046
13191
|
}
|
|
@@ -13053,14 +13198,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
|
13053
13198
|
}
|
|
13054
13199
|
const unique = /* @__PURE__ */ new Map();
|
|
13055
13200
|
for (const inputFile of inputFiles) {
|
|
13056
|
-
const absolutePath =
|
|
13201
|
+
const absolutePath = path82.resolve(inputFile);
|
|
13057
13202
|
if (overrides?.has(absolutePath)) {
|
|
13058
13203
|
if (!unique.has(absolutePath)) {
|
|
13059
13204
|
unique.set(absolutePath, absolutePath);
|
|
13060
13205
|
}
|
|
13061
13206
|
continue;
|
|
13062
13207
|
}
|
|
13063
|
-
const normalized = absolutePath.split(
|
|
13208
|
+
const normalized = absolutePath.split(path82.sep).join("/");
|
|
13064
13209
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
13065
13210
|
if (!unique.has(absolutePath)) {
|
|
13066
13211
|
unique.set(absolutePath, absolutePath);
|
|
@@ -13075,7 +13220,7 @@ function collectInputFiles(inputFiles) {
|
|
|
13075
13220
|
}
|
|
13076
13221
|
const unique = /* @__PURE__ */ new Map();
|
|
13077
13222
|
for (const inputFile of inputFiles) {
|
|
13078
|
-
const absolutePath =
|
|
13223
|
+
const absolutePath = path82.resolve(inputFile);
|
|
13079
13224
|
if (!unique.has(absolutePath)) {
|
|
13080
13225
|
unique.set(absolutePath, absolutePath);
|
|
13081
13226
|
}
|
|
@@ -13087,7 +13232,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
|
|
|
13087
13232
|
return "";
|
|
13088
13233
|
}
|
|
13089
13234
|
const buildList = (files) => files.map((absolutePath) => {
|
|
13090
|
-
const fileName =
|
|
13235
|
+
const fileName = path82.basename(absolutePath);
|
|
13091
13236
|
const fileUri = pathToFileUri2(absolutePath);
|
|
13092
13237
|
return `* [${fileName}](${fileUri})`;
|
|
13093
13238
|
});
|
|
@@ -13107,7 +13252,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
13107
13252
|
return sections.join("\n");
|
|
13108
13253
|
}
|
|
13109
13254
|
function pathToFileUri2(filePath) {
|
|
13110
|
-
const absolutePath =
|
|
13255
|
+
const absolutePath = path82.isAbsolute(filePath) ? filePath : path82.resolve(filePath);
|
|
13111
13256
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
13112
13257
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
13113
13258
|
return `file:///${normalizedPath}`;
|
|
@@ -13143,7 +13288,7 @@ var CodexProvider = class {
|
|
|
13143
13288
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
13144
13289
|
try {
|
|
13145
13290
|
const promptContent = buildPromptDocument(request, inputFiles);
|
|
13146
|
-
const promptFile =
|
|
13291
|
+
const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
|
|
13147
13292
|
await writeFile3(promptFile, promptContent, "utf8");
|
|
13148
13293
|
const args = this.buildCodexArgs();
|
|
13149
13294
|
const cwd = this.resolveCwd(workspaceRoot);
|
|
@@ -13193,7 +13338,7 @@ var CodexProvider = class {
|
|
|
13193
13338
|
if (!this.config.cwd) {
|
|
13194
13339
|
return workspaceRoot;
|
|
13195
13340
|
}
|
|
13196
|
-
return
|
|
13341
|
+
return path9.resolve(this.config.cwd);
|
|
13197
13342
|
}
|
|
13198
13343
|
buildCodexArgs() {
|
|
13199
13344
|
const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
|
|
@@ -13227,7 +13372,7 @@ var CodexProvider = class {
|
|
|
13227
13372
|
}
|
|
13228
13373
|
}
|
|
13229
13374
|
async createWorkspace() {
|
|
13230
|
-
return await mkdtemp(
|
|
13375
|
+
return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
|
|
13231
13376
|
}
|
|
13232
13377
|
async cleanupWorkspace(workspaceRoot) {
|
|
13233
13378
|
try {
|
|
@@ -13241,9 +13386,9 @@ var CodexProvider = class {
|
|
|
13241
13386
|
return void 0;
|
|
13242
13387
|
}
|
|
13243
13388
|
if (this.config.logDir) {
|
|
13244
|
-
return
|
|
13389
|
+
return path9.resolve(this.config.logDir);
|
|
13245
13390
|
}
|
|
13246
|
-
return
|
|
13391
|
+
return path9.join(process.cwd(), ".agentv", "logs", "codex");
|
|
13247
13392
|
}
|
|
13248
13393
|
async createStreamLogger(request) {
|
|
13249
13394
|
const logDir = this.resolveLogDirectory();
|
|
@@ -13257,7 +13402,7 @@ var CodexProvider = class {
|
|
|
13257
13402
|
console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
|
|
13258
13403
|
return void 0;
|
|
13259
13404
|
}
|
|
13260
|
-
const filePath =
|
|
13405
|
+
const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
|
|
13261
13406
|
try {
|
|
13262
13407
|
const logger = await CodexStreamLogger.create({
|
|
13263
13408
|
filePath,
|
|
@@ -13472,7 +13617,7 @@ function tryParseJsonValue(rawLine) {
|
|
|
13472
13617
|
async function locateExecutable(candidate) {
|
|
13473
13618
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
13474
13619
|
if (includesPathSeparator) {
|
|
13475
|
-
const resolved =
|
|
13620
|
+
const resolved = path9.isAbsolute(candidate) ? candidate : path9.resolve(candidate);
|
|
13476
13621
|
const executablePath = await ensureWindowsExecutableVariant(resolved);
|
|
13477
13622
|
await access22(executablePath, constants22.F_OK);
|
|
13478
13623
|
return executablePath;
|
|
@@ -13926,6 +14071,9 @@ var VSCodeProvider = class {
|
|
|
13926
14071
|
};
|
|
13927
14072
|
function buildPromptDocument2(request, attachments, guidelinePatterns) {
|
|
13928
14073
|
const parts = [];
|
|
14074
|
+
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
14075
|
+
parts.push(request.systemPrompt.trim());
|
|
14076
|
+
}
|
|
13929
14077
|
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
13930
14078
|
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
13931
14079
|
const nonGuidelineAttachments = attachmentFiles.filter(
|
|
@@ -13943,7 +14091,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
13943
14091
|
return "";
|
|
13944
14092
|
}
|
|
13945
14093
|
const buildList = (files) => files.map((absolutePath) => {
|
|
13946
|
-
const fileName =
|
|
14094
|
+
const fileName = path10.basename(absolutePath);
|
|
13947
14095
|
const fileUri = pathToFileUri22(absolutePath);
|
|
13948
14096
|
return `* [${fileName}](${fileUri})`;
|
|
13949
14097
|
});
|
|
@@ -13968,8 +14116,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
13968
14116
|
}
|
|
13969
14117
|
const unique = /* @__PURE__ */ new Map();
|
|
13970
14118
|
for (const attachment of attachments) {
|
|
13971
|
-
const absolutePath =
|
|
13972
|
-
const normalized = absolutePath.split(
|
|
14119
|
+
const absolutePath = path10.resolve(attachment);
|
|
14120
|
+
const normalized = absolutePath.split(path10.sep).join("/");
|
|
13973
14121
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
13974
14122
|
if (!unique.has(absolutePath)) {
|
|
13975
14123
|
unique.set(absolutePath, absolutePath);
|
|
@@ -13984,7 +14132,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
13984
14132
|
}
|
|
13985
14133
|
const unique = /* @__PURE__ */ new Map();
|
|
13986
14134
|
for (const attachment of attachments) {
|
|
13987
|
-
const absolutePath =
|
|
14135
|
+
const absolutePath = path10.resolve(attachment);
|
|
13988
14136
|
if (!unique.has(absolutePath)) {
|
|
13989
14137
|
unique.set(absolutePath, absolutePath);
|
|
13990
14138
|
}
|
|
@@ -13992,7 +14140,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
13992
14140
|
return Array.from(unique.values());
|
|
13993
14141
|
}
|
|
13994
14142
|
function pathToFileUri22(filePath) {
|
|
13995
|
-
const absolutePath =
|
|
14143
|
+
const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
|
|
13996
14144
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
13997
14145
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
13998
14146
|
return `file:///${normalizedPath}`;
|
|
@@ -14005,7 +14153,7 @@ function normalizeAttachments(attachments) {
|
|
|
14005
14153
|
}
|
|
14006
14154
|
const deduped = /* @__PURE__ */ new Set();
|
|
14007
14155
|
for (const attachment of attachments) {
|
|
14008
|
-
deduped.add(
|
|
14156
|
+
deduped.add(path10.resolve(attachment));
|
|
14009
14157
|
}
|
|
14010
14158
|
return Array.from(deduped);
|
|
14011
14159
|
}
|
|
@@ -14014,7 +14162,7 @@ function mergeAttachments(all) {
|
|
|
14014
14162
|
for (const list of all) {
|
|
14015
14163
|
if (!list) continue;
|
|
14016
14164
|
for (const inputFile of list) {
|
|
14017
|
-
deduped.add(
|
|
14165
|
+
deduped.add(path10.resolve(inputFile));
|
|
14018
14166
|
}
|
|
14019
14167
|
}
|
|
14020
14168
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -14111,12 +14259,12 @@ async function fileExists3(filePath) {
|
|
|
14111
14259
|
}
|
|
14112
14260
|
}
|
|
14113
14261
|
async function readTargetDefinitions(filePath) {
|
|
14114
|
-
const absolutePath =
|
|
14262
|
+
const absolutePath = path11.resolve(filePath);
|
|
14115
14263
|
if (!await fileExists3(absolutePath)) {
|
|
14116
14264
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
14117
14265
|
}
|
|
14118
|
-
const raw = await
|
|
14119
|
-
const parsed =
|
|
14266
|
+
const raw = await readFile5(absolutePath, "utf8");
|
|
14267
|
+
const parsed = parse32(raw);
|
|
14120
14268
|
if (!isRecord(parsed)) {
|
|
14121
14269
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
14122
14270
|
}
|
|
@@ -14151,17 +14299,34 @@ function createProvider(target) {
|
|
|
14151
14299
|
}
|
|
14152
14300
|
}
|
|
14153
14301
|
}
|
|
14302
|
+
var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
|
|
14303
|
+
|
|
14304
|
+
Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
|
|
14305
|
+
|
|
14306
|
+
Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.
|
|
14307
|
+
|
|
14308
|
+
[[ ## expected_outcome ## ]]
|
|
14309
|
+
{{expected_outcome}}
|
|
14310
|
+
|
|
14311
|
+
[[ ## question ## ]]
|
|
14312
|
+
{{question}}
|
|
14313
|
+
|
|
14314
|
+
[[ ## reference_answer ## ]]
|
|
14315
|
+
{{reference_answer}}
|
|
14316
|
+
|
|
14317
|
+
[[ ## candidate_answer ## ]]
|
|
14318
|
+
{{candidate_answer}}`;
|
|
14154
14319
|
var LlmJudgeEvaluator = class {
|
|
14155
14320
|
kind = "llm_judge";
|
|
14156
14321
|
resolveJudgeProvider;
|
|
14157
14322
|
maxOutputTokens;
|
|
14158
14323
|
temperature;
|
|
14159
|
-
|
|
14324
|
+
evaluatorTemplate;
|
|
14160
14325
|
constructor(options) {
|
|
14161
14326
|
this.resolveJudgeProvider = options.resolveJudgeProvider;
|
|
14162
14327
|
this.maxOutputTokens = options.maxOutputTokens;
|
|
14163
14328
|
this.temperature = options.temperature;
|
|
14164
|
-
this.
|
|
14329
|
+
this.evaluatorTemplate = options.evaluatorTemplate;
|
|
14165
14330
|
}
|
|
14166
14331
|
async evaluate(context2) {
|
|
14167
14332
|
const judgeProvider = await this.resolveJudgeProvider(context2);
|
|
@@ -14171,26 +14336,21 @@ var LlmJudgeEvaluator = class {
|
|
|
14171
14336
|
return this.evaluateWithPrompt(context2, judgeProvider);
|
|
14172
14337
|
}
|
|
14173
14338
|
async evaluateWithPrompt(context2, judgeProvider) {
|
|
14174
|
-
const hasReferenceAnswer = hasNonEmptyReferenceAnswer(context2.evalCase);
|
|
14175
14339
|
const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
|
|
14176
|
-
|
|
14177
|
-
|
|
14178
|
-
|
|
14179
|
-
|
|
14180
|
-
|
|
14181
|
-
|
|
14182
|
-
|
|
14183
|
-
|
|
14184
|
-
|
|
14185
|
-
|
|
14186
|
-
|
|
14187
|
-
prompt = substituteVariables(systemPrompt, variables);
|
|
14188
|
-
systemPrompt = buildSystemPrompt(hasReferenceAnswer);
|
|
14189
|
-
}
|
|
14190
|
-
const metadata = systemPrompt !== void 0 ? { systemPrompt } : {};
|
|
14340
|
+
const variables = {
|
|
14341
|
+
input_messages: JSON.stringify(context2.evalCase.input_segments, null, 2),
|
|
14342
|
+
output_messages: JSON.stringify(context2.evalCase.output_segments, null, 2),
|
|
14343
|
+
candidate_answer: context2.candidate.trim(),
|
|
14344
|
+
reference_answer: (context2.evalCase.reference_answer ?? "").trim(),
|
|
14345
|
+
expected_outcome: context2.evalCase.expected_outcome.trim(),
|
|
14346
|
+
question: formattedQuestion.trim()
|
|
14347
|
+
};
|
|
14348
|
+
const systemPrompt = buildOutputSchema();
|
|
14349
|
+
const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
14350
|
+
const userPrompt = substituteVariables(evaluatorTemplate, variables);
|
|
14191
14351
|
const response = await judgeProvider.invoke({
|
|
14192
|
-
question:
|
|
14193
|
-
|
|
14352
|
+
question: userPrompt,
|
|
14353
|
+
systemPrompt,
|
|
14194
14354
|
evalCaseId: context2.evalCase.id,
|
|
14195
14355
|
attempt: context2.attempt,
|
|
14196
14356
|
maxOutputTokens: this.maxOutputTokens,
|
|
@@ -14203,11 +14363,9 @@ var LlmJudgeEvaluator = class {
|
|
|
14203
14363
|
const reasoning = parsed.reasoning ?? response.reasoning;
|
|
14204
14364
|
const expectedAspectCount = Math.max(hits.length + misses.length, 1);
|
|
14205
14365
|
const evaluatorRawRequest = {
|
|
14206
|
-
|
|
14207
|
-
|
|
14208
|
-
|
|
14209
|
-
target: context2.target.name,
|
|
14210
|
-
...systemPrompt !== void 0 && { systemPrompt }
|
|
14366
|
+
userPrompt,
|
|
14367
|
+
systemPrompt,
|
|
14368
|
+
target: judgeProvider.targetName
|
|
14211
14369
|
};
|
|
14212
14370
|
return {
|
|
14213
14371
|
score,
|
|
@@ -14219,20 +14377,8 @@ var LlmJudgeEvaluator = class {
|
|
|
14219
14377
|
};
|
|
14220
14378
|
}
|
|
14221
14379
|
};
|
|
14222
|
-
function
|
|
14223
|
-
|
|
14224
|
-
"You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.",
|
|
14225
|
-
""
|
|
14226
|
-
];
|
|
14227
|
-
if (hasReferenceAnswer) {
|
|
14228
|
-
basePrompt.push(
|
|
14229
|
-
"Use the reference_answer as a gold standard for a high-quality response. The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.",
|
|
14230
|
-
""
|
|
14231
|
-
);
|
|
14232
|
-
}
|
|
14233
|
-
basePrompt.push(
|
|
14234
|
-
"Be concise and focused in your evaluation. Provide succinct, specific feedback rather than verbose explanations.",
|
|
14235
|
-
"",
|
|
14380
|
+
function buildOutputSchema() {
|
|
14381
|
+
return [
|
|
14236
14382
|
"You must respond with a single JSON object matching this schema:",
|
|
14237
14383
|
"",
|
|
14238
14384
|
"{",
|
|
@@ -14241,30 +14387,7 @@ function buildSystemPrompt(hasReferenceAnswer) {
|
|
|
14241
14387
|
' "misses": [<array of strings, max 4 items, brief specific failures or omissions, empty if none>],',
|
|
14242
14388
|
' "reasoning": "<string, concise explanation for the score, 1-2 sentences max>"',
|
|
14243
14389
|
"}"
|
|
14244
|
-
);
|
|
14245
|
-
return basePrompt.join("\n");
|
|
14246
|
-
}
|
|
14247
|
-
function buildQualityPrompt(evalCase, candidate, question) {
|
|
14248
|
-
const parts = [
|
|
14249
|
-
"[[ ## expected_outcome ## ]]",
|
|
14250
|
-
evalCase.expected_outcome.trim(),
|
|
14251
|
-
"",
|
|
14252
|
-
"[[ ## question ## ]]",
|
|
14253
|
-
question.trim(),
|
|
14254
|
-
""
|
|
14255
|
-
];
|
|
14256
|
-
if (hasNonEmptyReferenceAnswer(evalCase)) {
|
|
14257
|
-
parts.push(
|
|
14258
|
-
"[[ ## reference_answer ## ]]",
|
|
14259
|
-
evalCase.reference_answer.trim(),
|
|
14260
|
-
""
|
|
14261
|
-
);
|
|
14262
|
-
}
|
|
14263
|
-
parts.push(
|
|
14264
|
-
"[[ ## candidate_answer ## ]]",
|
|
14265
|
-
candidate.trim()
|
|
14266
|
-
);
|
|
14267
|
-
return parts.join("\n");
|
|
14390
|
+
].join("\n");
|
|
14268
14391
|
}
|
|
14269
14392
|
function clampScore(value) {
|
|
14270
14393
|
if (Number.isNaN(value) || !Number.isFinite(value)) {
|
|
@@ -14346,9 +14469,6 @@ function extractJsonBlob(text) {
|
|
|
14346
14469
|
function isNonEmptyString(value) {
|
|
14347
14470
|
return typeof value === "string" && value.trim().length > 0;
|
|
14348
14471
|
}
|
|
14349
|
-
function hasNonEmptyReferenceAnswer(evalCase) {
|
|
14350
|
-
return evalCase.reference_answer !== void 0 && evalCase.reference_answer.trim().length > 0;
|
|
14351
|
-
}
|
|
14352
14472
|
var CodeEvaluator = class {
|
|
14353
14473
|
kind = "code";
|
|
14354
14474
|
script;
|
|
@@ -14454,11 +14574,8 @@ function parseJsonSafe(payload) {
|
|
|
14454
14574
|
return void 0;
|
|
14455
14575
|
}
|
|
14456
14576
|
}
|
|
14457
|
-
function hasTemplateVariables(text) {
|
|
14458
|
-
return /\$\{[a-zA-Z0-9_]+\}/.test(text);
|
|
14459
|
-
}
|
|
14460
14577
|
function substituteVariables(template, variables) {
|
|
14461
|
-
return template.replace(
|
|
14578
|
+
return template.replace(/\{\{([a-zA-Z0-9_]+)\}\}/g, (match, varName) => {
|
|
14462
14579
|
return variables[varName] ?? match;
|
|
14463
14580
|
});
|
|
14464
14581
|
}
|
|
@@ -15018,6 +15135,7 @@ async function evaluateCandidate(options) {
|
|
|
15018
15135
|
}
|
|
15019
15136
|
}
|
|
15020
15137
|
return {
|
|
15138
|
+
timestamp: completedAt.toISOString(),
|
|
15021
15139
|
eval_id: evalCase.id,
|
|
15022
15140
|
dataset: evalCase.dataset,
|
|
15023
15141
|
conversation_id: evalCase.conversation_id,
|
|
@@ -15025,14 +15143,12 @@ async function evaluateCandidate(options) {
|
|
|
15025
15143
|
hits: score.hits,
|
|
15026
15144
|
misses: score.misses,
|
|
15027
15145
|
candidate_answer: candidate,
|
|
15028
|
-
expected_aspect_count: score.expectedAspectCount,
|
|
15029
15146
|
target: target.name,
|
|
15030
|
-
timestamp: completedAt.toISOString(),
|
|
15031
15147
|
reasoning: score.reasoning,
|
|
15032
15148
|
raw_aspects: score.rawAspects,
|
|
15033
15149
|
agent_provider_request: agentProviderRequest,
|
|
15034
15150
|
lm_provider_request: lmProviderRequest,
|
|
15035
|
-
|
|
15151
|
+
evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
|
|
15036
15152
|
evaluator_results: evaluatorResults
|
|
15037
15153
|
};
|
|
15038
15154
|
}
|
|
@@ -15109,7 +15225,7 @@ async function runEvaluatorList(options) {
|
|
|
15109
15225
|
hits: score2.hits,
|
|
15110
15226
|
misses: score2.misses,
|
|
15111
15227
|
reasoning: score2.reasoning,
|
|
15112
|
-
|
|
15228
|
+
evaluator_provider_request: score2.evaluatorRawRequest
|
|
15113
15229
|
});
|
|
15114
15230
|
continue;
|
|
15115
15231
|
}
|
|
@@ -15136,7 +15252,7 @@ async function runEvaluatorList(options) {
|
|
|
15136
15252
|
hits: score2.hits,
|
|
15137
15253
|
misses: score2.misses,
|
|
15138
15254
|
reasoning: score2.reasoning,
|
|
15139
|
-
|
|
15255
|
+
evaluator_provider_request: score2.evaluatorRawRequest
|
|
15140
15256
|
});
|
|
15141
15257
|
continue;
|
|
15142
15258
|
}
|
|
@@ -15189,7 +15305,7 @@ async function runLlmJudgeEvaluator(options) {
|
|
|
15189
15305
|
promptInputs,
|
|
15190
15306
|
now,
|
|
15191
15307
|
judgeProvider,
|
|
15192
|
-
|
|
15308
|
+
evaluatorTemplateOverride: customPrompt,
|
|
15193
15309
|
evaluator: config
|
|
15194
15310
|
});
|
|
15195
15311
|
}
|
|
@@ -15230,8 +15346,8 @@ function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
|
|
|
15230
15346
|
async function dumpPrompt(directory, evalCase, promptInputs) {
|
|
15231
15347
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
15232
15348
|
const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
|
|
15233
|
-
const filePath =
|
|
15234
|
-
await mkdir22(
|
|
15349
|
+
const filePath = path12.resolve(directory, filename);
|
|
15350
|
+
await mkdir22(path12.dirname(filePath), { recursive: true });
|
|
15235
15351
|
const payload = {
|
|
15236
15352
|
eval_id: evalCase.id,
|
|
15237
15353
|
question: promptInputs.question,
|
|
@@ -15245,7 +15361,7 @@ function sanitizeFilename(value) {
|
|
|
15245
15361
|
return "prompt";
|
|
15246
15362
|
}
|
|
15247
15363
|
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
15248
|
-
return sanitized.length > 0 ? sanitized :
|
|
15364
|
+
return sanitized.length > 0 ? sanitized : randomUUID2();
|
|
15249
15365
|
}
|
|
15250
15366
|
async function invokeProvider(provider, options) {
|
|
15251
15367
|
const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
|
|
@@ -15301,6 +15417,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
15301
15417
|
}
|
|
15302
15418
|
}
|
|
15303
15419
|
return {
|
|
15420
|
+
timestamp: timestamp.toISOString(),
|
|
15304
15421
|
eval_id: evalCase.id,
|
|
15305
15422
|
dataset: evalCase.dataset,
|
|
15306
15423
|
conversation_id: evalCase.conversation_id,
|
|
@@ -15308,9 +15425,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
15308
15425
|
hits: [],
|
|
15309
15426
|
misses: [`Error: ${message}`],
|
|
15310
15427
|
candidate_answer: `Error occurred: ${message}`,
|
|
15311
|
-
expected_aspect_count: 0,
|
|
15312
15428
|
target: targetName,
|
|
15313
|
-
timestamp: timestamp.toISOString(),
|
|
15314
15429
|
raw_aspects: [],
|
|
15315
15430
|
agent_provider_request: agentProviderRequest,
|
|
15316
15431
|
lm_provider_request: lmProviderRequest,
|
|
@@ -15352,19 +15467,19 @@ function createAgentKernel() {
|
|
|
15352
15467
|
// src/commands/eval/run-eval.ts
|
|
15353
15468
|
import { constants as constants6 } from "node:fs";
|
|
15354
15469
|
import { access as access6, mkdir as mkdir6 } from "node:fs/promises";
|
|
15355
|
-
import
|
|
15470
|
+
import path18 from "node:path";
|
|
15356
15471
|
import { pathToFileURL } from "node:url";
|
|
15357
15472
|
|
|
15358
15473
|
// src/commands/eval/env.ts
|
|
15359
15474
|
import { config as loadDotenv } from "dotenv";
|
|
15360
15475
|
import { constants as constants4 } from "node:fs";
|
|
15361
15476
|
import { access as access4 } from "node:fs/promises";
|
|
15362
|
-
import
|
|
15477
|
+
import path13 from "node:path";
|
|
15363
15478
|
function uniqueDirs(directories) {
|
|
15364
15479
|
const seen = /* @__PURE__ */ new Set();
|
|
15365
15480
|
const result = [];
|
|
15366
15481
|
for (const dir of directories) {
|
|
15367
|
-
const absolute =
|
|
15482
|
+
const absolute = path13.resolve(dir);
|
|
15368
15483
|
if (seen.has(absolute)) {
|
|
15369
15484
|
continue;
|
|
15370
15485
|
}
|
|
@@ -15383,14 +15498,14 @@ async function fileExists4(filePath) {
|
|
|
15383
15498
|
}
|
|
15384
15499
|
function collectAncestorDirectories(start, boundary) {
|
|
15385
15500
|
const directories = [];
|
|
15386
|
-
const boundaryDir =
|
|
15387
|
-
let current =
|
|
15501
|
+
const boundaryDir = path13.resolve(boundary);
|
|
15502
|
+
let current = path13.resolve(start);
|
|
15388
15503
|
while (current !== void 0) {
|
|
15389
15504
|
directories.push(current);
|
|
15390
15505
|
if (current === boundaryDir) {
|
|
15391
15506
|
break;
|
|
15392
15507
|
}
|
|
15393
|
-
const parent =
|
|
15508
|
+
const parent = path13.dirname(current);
|
|
15394
15509
|
if (parent === current) {
|
|
15395
15510
|
break;
|
|
15396
15511
|
}
|
|
@@ -15400,7 +15515,7 @@ function collectAncestorDirectories(start, boundary) {
|
|
|
15400
15515
|
}
|
|
15401
15516
|
async function loadEnvFromHierarchy(options) {
|
|
15402
15517
|
const { testFilePath, repoRoot, verbose } = options;
|
|
15403
|
-
const testDir =
|
|
15518
|
+
const testDir = path13.dirname(path13.resolve(testFilePath));
|
|
15404
15519
|
const cwd = process.cwd();
|
|
15405
15520
|
const searchDirs = uniqueDirs([
|
|
15406
15521
|
...collectAncestorDirectories(testDir, repoRoot),
|
|
@@ -15408,7 +15523,7 @@ async function loadEnvFromHierarchy(options) {
|
|
|
15408
15523
|
cwd
|
|
15409
15524
|
]);
|
|
15410
15525
|
for (const dir of searchDirs) {
|
|
15411
|
-
const candidate =
|
|
15526
|
+
const candidate = path13.join(dir, ".env");
|
|
15412
15527
|
if (await fileExists4(candidate)) {
|
|
15413
15528
|
loadDotenv({ path: candidate, override: false });
|
|
15414
15529
|
if (verbose) {
|
|
@@ -15632,7 +15747,7 @@ var Mutex = class {
|
|
|
15632
15747
|
// src/commands/eval/jsonl-writer.ts
|
|
15633
15748
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
15634
15749
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
15635
|
-
import
|
|
15750
|
+
import path14 from "node:path";
|
|
15636
15751
|
import { finished } from "node:stream/promises";
|
|
15637
15752
|
var JsonlWriter = class _JsonlWriter {
|
|
15638
15753
|
stream;
|
|
@@ -15642,7 +15757,7 @@ var JsonlWriter = class _JsonlWriter {
|
|
|
15642
15757
|
this.stream = stream;
|
|
15643
15758
|
}
|
|
15644
15759
|
static async open(filePath) {
|
|
15645
|
-
await mkdir4(
|
|
15760
|
+
await mkdir4(path14.dirname(filePath), { recursive: true });
|
|
15646
15761
|
const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
|
|
15647
15762
|
return new _JsonlWriter(stream);
|
|
15648
15763
|
}
|
|
@@ -15674,7 +15789,7 @@ var JsonlWriter = class _JsonlWriter {
|
|
|
15674
15789
|
// src/commands/eval/yaml-writer.ts
|
|
15675
15790
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
15676
15791
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
15677
|
-
import
|
|
15792
|
+
import path15 from "node:path";
|
|
15678
15793
|
import { finished as finished2 } from "node:stream/promises";
|
|
15679
15794
|
import { stringify as stringifyYaml } from "yaml";
|
|
15680
15795
|
var YamlWriter = class _YamlWriter {
|
|
@@ -15686,7 +15801,7 @@ var YamlWriter = class _YamlWriter {
|
|
|
15686
15801
|
this.stream = stream;
|
|
15687
15802
|
}
|
|
15688
15803
|
static async open(filePath) {
|
|
15689
|
-
await mkdir5(
|
|
15804
|
+
await mkdir5(path15.dirname(filePath), { recursive: true });
|
|
15690
15805
|
const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
|
|
15691
15806
|
return new _YamlWriter(stream);
|
|
15692
15807
|
}
|
|
@@ -15808,12 +15923,12 @@ var ProgressDisplay = class {
|
|
|
15808
15923
|
}
|
|
15809
15924
|
addLogPaths(paths) {
|
|
15810
15925
|
const newPaths = [];
|
|
15811
|
-
for (const
|
|
15812
|
-
if (this.logPathSet.has(
|
|
15926
|
+
for (const path25 of paths) {
|
|
15927
|
+
if (this.logPathSet.has(path25)) {
|
|
15813
15928
|
continue;
|
|
15814
15929
|
}
|
|
15815
|
-
this.logPathSet.add(
|
|
15816
|
-
newPaths.push(
|
|
15930
|
+
this.logPathSet.add(path25);
|
|
15931
|
+
newPaths.push(path25);
|
|
15817
15932
|
}
|
|
15818
15933
|
if (newPaths.length === 0) {
|
|
15819
15934
|
return;
|
|
@@ -15829,8 +15944,8 @@ var ProgressDisplay = class {
|
|
|
15829
15944
|
this.hasPrintedLogHeader = true;
|
|
15830
15945
|
}
|
|
15831
15946
|
const startIndex = this.logPaths.length - newPaths.length;
|
|
15832
|
-
newPaths.forEach((
|
|
15833
|
-
console.log(`${startIndex + offset + 1}. ${
|
|
15947
|
+
newPaths.forEach((path25, offset) => {
|
|
15948
|
+
console.log(`${startIndex + offset + 1}. ${path25}`);
|
|
15834
15949
|
});
|
|
15835
15950
|
}
|
|
15836
15951
|
scheduleRender() {
|
|
@@ -15878,8 +15993,8 @@ var ProgressDisplay = class {
|
|
|
15878
15993
|
if (this.logPaths.length > 0) {
|
|
15879
15994
|
lines.push("");
|
|
15880
15995
|
lines.push("Codex CLI logs:");
|
|
15881
|
-
this.logPaths.forEach((
|
|
15882
|
-
lines.push(`${index + 1}. ${
|
|
15996
|
+
this.logPaths.forEach((path25, index) => {
|
|
15997
|
+
lines.push(`${index + 1}. ${path25}`);
|
|
15883
15998
|
});
|
|
15884
15999
|
}
|
|
15885
16000
|
const rowCount = this.getRenderedRowCount(lines);
|
|
@@ -16084,17 +16199,17 @@ function formatEvaluationSummary(summary) {
|
|
|
16084
16199
|
}
|
|
16085
16200
|
|
|
16086
16201
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
16087
|
-
import { readFile as
|
|
16202
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
16088
16203
|
import { parse as parse4 } from "yaml";
|
|
16089
16204
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
16090
|
-
import
|
|
16205
|
+
import path16 from "node:path";
|
|
16091
16206
|
import { parse as parse23 } from "yaml";
|
|
16092
|
-
import { readFile as
|
|
16207
|
+
import { readFile as readFile33 } from "node:fs/promises";
|
|
16093
16208
|
import path23 from "node:path";
|
|
16094
|
-
import { parse as
|
|
16209
|
+
import { parse as parse33 } from "yaml";
|
|
16095
16210
|
import { readFile as readFile42 } from "node:fs/promises";
|
|
16096
16211
|
import { parse as parse42 } from "yaml";
|
|
16097
|
-
import { readFile as
|
|
16212
|
+
import { readFile as readFile52 } from "node:fs/promises";
|
|
16098
16213
|
import path33 from "node:path";
|
|
16099
16214
|
import { parse as parse5 } from "yaml";
|
|
16100
16215
|
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
@@ -16102,7 +16217,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
|
|
|
16102
16217
|
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
16103
16218
|
async function detectFileType(filePath) {
|
|
16104
16219
|
try {
|
|
16105
|
-
const content = await
|
|
16220
|
+
const content = await readFile6(filePath, "utf8");
|
|
16106
16221
|
const parsed = parse4(content);
|
|
16107
16222
|
if (typeof parsed !== "object" || parsed === null) {
|
|
16108
16223
|
return "unknown";
|
|
@@ -16132,7 +16247,7 @@ function isObject(value) {
|
|
|
16132
16247
|
}
|
|
16133
16248
|
async function validateEvalFile(filePath) {
|
|
16134
16249
|
const errors = [];
|
|
16135
|
-
const absolutePath =
|
|
16250
|
+
const absolutePath = path16.resolve(filePath);
|
|
16136
16251
|
let parsed;
|
|
16137
16252
|
try {
|
|
16138
16253
|
const content = await readFile23(absolutePath, "utf8");
|
|
@@ -16497,8 +16612,8 @@ async function validateTargetsFile(filePath) {
|
|
|
16497
16612
|
const absolutePath = path23.resolve(filePath);
|
|
16498
16613
|
let parsed;
|
|
16499
16614
|
try {
|
|
16500
|
-
const content = await
|
|
16501
|
-
parsed =
|
|
16615
|
+
const content = await readFile33(absolutePath, "utf8");
|
|
16616
|
+
parsed = parse33(content);
|
|
16502
16617
|
} catch (error) {
|
|
16503
16618
|
errors.push({
|
|
16504
16619
|
severity: "error",
|
|
@@ -16849,7 +16964,7 @@ async function validateFileReferences(evalFilePath) {
|
|
|
16849
16964
|
const searchRoots = buildSearchRoots(absolutePath, gitRoot);
|
|
16850
16965
|
let parsed;
|
|
16851
16966
|
try {
|
|
16852
|
-
const content = await
|
|
16967
|
+
const content = await readFile52(absolutePath, "utf8");
|
|
16853
16968
|
parsed = parse5(content);
|
|
16854
16969
|
} catch {
|
|
16855
16970
|
return errors;
|
|
@@ -16919,7 +17034,7 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
16919
17034
|
});
|
|
16920
17035
|
} else {
|
|
16921
17036
|
try {
|
|
16922
|
-
const fileContent = await
|
|
17037
|
+
const fileContent = await readFile52(resolvedPath, "utf8");
|
|
16923
17038
|
if (fileContent.trim().length === 0) {
|
|
16924
17039
|
errors.push({
|
|
16925
17040
|
severity: "warning",
|
|
@@ -16944,16 +17059,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
16944
17059
|
// src/commands/eval/targets.ts
|
|
16945
17060
|
import { constants as constants5 } from "node:fs";
|
|
16946
17061
|
import { access as access5 } from "node:fs/promises";
|
|
16947
|
-
import
|
|
17062
|
+
import path17 from "node:path";
|
|
16948
17063
|
var TARGET_FILE_CANDIDATES = [
|
|
16949
17064
|
"targets.yaml",
|
|
16950
17065
|
"targets.yml",
|
|
16951
|
-
|
|
16952
|
-
|
|
17066
|
+
path17.join(".agentv", "targets.yaml"),
|
|
17067
|
+
path17.join(".agentv", "targets.yml")
|
|
16953
17068
|
];
|
|
16954
|
-
var
|
|
17069
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
16955
17070
|
var ANSI_RED = "\x1B[31m";
|
|
16956
|
-
var
|
|
17071
|
+
var ANSI_RESET6 = "\x1B[0m";
|
|
16957
17072
|
function isTTY() {
|
|
16958
17073
|
return process.stdout.isTTY ?? false;
|
|
16959
17074
|
}
|
|
@@ -16972,12 +17087,12 @@ async function readTestSuiteTarget(testFilePath) {
|
|
|
16972
17087
|
async function discoverTargetsFile(options) {
|
|
16973
17088
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
16974
17089
|
if (explicitPath) {
|
|
16975
|
-
const resolvedExplicit =
|
|
17090
|
+
const resolvedExplicit = path17.resolve(explicitPath);
|
|
16976
17091
|
if (await fileExists5(resolvedExplicit)) {
|
|
16977
17092
|
return resolvedExplicit;
|
|
16978
17093
|
}
|
|
16979
17094
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16980
|
-
const nested =
|
|
17095
|
+
const nested = path17.join(resolvedExplicit, candidate);
|
|
16981
17096
|
if (await fileExists5(nested)) {
|
|
16982
17097
|
return nested;
|
|
16983
17098
|
}
|
|
@@ -16985,13 +17100,13 @@ async function discoverTargetsFile(options) {
|
|
|
16985
17100
|
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
16986
17101
|
}
|
|
16987
17102
|
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
16988
|
-
const resolvedCwd =
|
|
17103
|
+
const resolvedCwd = path17.resolve(cwd);
|
|
16989
17104
|
if (!directories.includes(resolvedCwd)) {
|
|
16990
17105
|
directories.push(resolvedCwd);
|
|
16991
17106
|
}
|
|
16992
17107
|
for (const directory of directories) {
|
|
16993
17108
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16994
|
-
const fullPath =
|
|
17109
|
+
const fullPath = path17.join(directory, candidate);
|
|
16995
17110
|
if (await fileExists5(fullPath)) {
|
|
16996
17111
|
return fullPath;
|
|
16997
17112
|
}
|
|
@@ -17026,8 +17141,8 @@ async function selectTarget(options) {
|
|
|
17026
17141
|
Warnings in ${targetsFilePath}:`);
|
|
17027
17142
|
for (const warning of warnings) {
|
|
17028
17143
|
const location = warning.location ? ` [${warning.location}]` : "";
|
|
17029
|
-
const prefix = useColors ? `${
|
|
17030
|
-
const message = useColors ? `${
|
|
17144
|
+
const prefix = useColors ? `${ANSI_YELLOW6} \u26A0${ANSI_RESET6}` : " \u26A0";
|
|
17145
|
+
const message = useColors ? `${ANSI_YELLOW6}${warning.message}${ANSI_RESET6}` : warning.message;
|
|
17031
17146
|
console.warn(`${prefix}${location} ${message}`);
|
|
17032
17147
|
}
|
|
17033
17148
|
console.warn("");
|
|
@@ -17038,8 +17153,8 @@ Warnings in ${targetsFilePath}:`);
|
|
|
17038
17153
|
Errors in ${targetsFilePath}:`);
|
|
17039
17154
|
for (const error of errors) {
|
|
17040
17155
|
const location = error.location ? ` [${error.location}]` : "";
|
|
17041
|
-
const prefix = useColors ? `${ANSI_RED} \u2717${
|
|
17042
|
-
const message = useColors ? `${ANSI_RED}${error.message}${
|
|
17156
|
+
const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET6}` : " \u2717";
|
|
17157
|
+
const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET6}` : error.message;
|
|
17043
17158
|
console.error(`${prefix}${location} ${message}`);
|
|
17044
17159
|
}
|
|
17045
17160
|
throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
|
|
@@ -17143,15 +17258,15 @@ async function ensureFileExists(filePath, description) {
|
|
|
17143
17258
|
}
|
|
17144
17259
|
}
|
|
17145
17260
|
async function findRepoRoot(start) {
|
|
17146
|
-
const fallback =
|
|
17261
|
+
const fallback = path18.resolve(start);
|
|
17147
17262
|
let current = fallback;
|
|
17148
17263
|
while (current !== void 0) {
|
|
17149
|
-
const candidate =
|
|
17264
|
+
const candidate = path18.join(current, ".git");
|
|
17150
17265
|
try {
|
|
17151
17266
|
await access6(candidate, constants6.F_OK);
|
|
17152
17267
|
return current;
|
|
17153
17268
|
} catch {
|
|
17154
|
-
const parent =
|
|
17269
|
+
const parent = path18.dirname(current);
|
|
17155
17270
|
if (parent === current) {
|
|
17156
17271
|
break;
|
|
17157
17272
|
}
|
|
@@ -17164,16 +17279,16 @@ function buildDefaultOutputPath(cwd, format) {
|
|
|
17164
17279
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
17165
17280
|
const baseName = "eval";
|
|
17166
17281
|
const extension = getDefaultExtension(format);
|
|
17167
|
-
return
|
|
17282
|
+
return path18.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
17168
17283
|
}
|
|
17169
17284
|
function resolvePromptDirectory(option, cwd) {
|
|
17170
17285
|
if (option === void 0) {
|
|
17171
17286
|
return void 0;
|
|
17172
17287
|
}
|
|
17173
17288
|
if (typeof option === "string" && option.trim().length > 0) {
|
|
17174
|
-
return
|
|
17289
|
+
return path18.resolve(cwd, option);
|
|
17175
17290
|
}
|
|
17176
|
-
return
|
|
17291
|
+
return path18.join(cwd, ".agentv", "prompts");
|
|
17177
17292
|
}
|
|
17178
17293
|
function createEvaluationCache() {
|
|
17179
17294
|
const store = /* @__PURE__ */ new Map();
|
|
@@ -17198,7 +17313,7 @@ function createProgressReporter(maxWorkers) {
|
|
|
17198
17313
|
};
|
|
17199
17314
|
}
|
|
17200
17315
|
function makeEvalKey(testFilePath, evalId) {
|
|
17201
|
-
return `${
|
|
17316
|
+
return `${path18.resolve(testFilePath)}::${evalId}`;
|
|
17202
17317
|
}
|
|
17203
17318
|
function createDisplayIdTracker() {
|
|
17204
17319
|
const map = /* @__PURE__ */ new Map();
|
|
@@ -17351,7 +17466,7 @@ async function runEvalCommand(input) {
|
|
|
17351
17466
|
if (options.verbose) {
|
|
17352
17467
|
console.log(`Repository root: ${repoRoot}`);
|
|
17353
17468
|
}
|
|
17354
|
-
const outputPath = options.outPath ?
|
|
17469
|
+
const outputPath = options.outPath ? path18.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
|
|
17355
17470
|
console.log(`Output path: ${outputPath}`);
|
|
17356
17471
|
const outputWriter = await createOutputWriter(outputPath, options.format);
|
|
17357
17472
|
const cache = options.cache ? createEvaluationCache() : void 0;
|
|
@@ -17359,7 +17474,7 @@ async function runEvalCommand(input) {
|
|
|
17359
17474
|
const allResults = [];
|
|
17360
17475
|
let lastPromptDumpDir;
|
|
17361
17476
|
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
17362
|
-
const resolvedTestFiles = input.testFiles.map((file) =>
|
|
17477
|
+
const resolvedTestFiles = input.testFiles.map((file) => path18.resolve(file));
|
|
17363
17478
|
const displayIdTracker = createDisplayIdTracker();
|
|
17364
17479
|
const totalWorkers = options.workers ?? DEFAULT_WORKERS;
|
|
17365
17480
|
const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
|
|
@@ -17451,7 +17566,7 @@ async function resolveEvaluationRunner() {
|
|
|
17451
17566
|
if (!overridePath) {
|
|
17452
17567
|
return runEvaluation;
|
|
17453
17568
|
}
|
|
17454
|
-
const resolved =
|
|
17569
|
+
const resolved = path18.isAbsolute(overridePath) ? overridePath : path18.resolve(process.cwd(), overridePath);
|
|
17455
17570
|
const moduleUrl = pathToFileURL(resolved).href;
|
|
17456
17571
|
const mod = await import(moduleUrl);
|
|
17457
17572
|
const candidate = mod.runEvaluation;
|
|
@@ -17522,7 +17637,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
17522
17637
|
const unmatched = [];
|
|
17523
17638
|
const results = /* @__PURE__ */ new Set();
|
|
17524
17639
|
for (const pattern of normalizedInputs) {
|
|
17525
|
-
const candidatePath =
|
|
17640
|
+
const candidatePath = path19.isAbsolute(pattern) ? path19.normalize(pattern) : path19.resolve(cwd, pattern);
|
|
17526
17641
|
try {
|
|
17527
17642
|
const stats = await stat3(candidatePath);
|
|
17528
17643
|
if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
|
|
@@ -17545,7 +17660,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
17545
17660
|
unmatched.push(pattern);
|
|
17546
17661
|
continue;
|
|
17547
17662
|
}
|
|
17548
|
-
yamlMatches.forEach((filePath) => results.add(
|
|
17663
|
+
yamlMatches.forEach((filePath) => results.add(path19.normalize(filePath)));
|
|
17549
17664
|
}
|
|
17550
17665
|
if (unmatched.length > 0) {
|
|
17551
17666
|
throw new Error(
|
|
@@ -17561,27 +17676,30 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
17561
17676
|
|
|
17562
17677
|
// src/commands/init/index.ts
|
|
17563
17678
|
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
17564
|
-
import
|
|
17679
|
+
import path21 from "node:path";
|
|
17565
17680
|
import * as readline from "node:readline/promises";
|
|
17566
17681
|
|
|
17567
17682
|
// src/templates/index.ts
|
|
17568
17683
|
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
17569
|
-
import
|
|
17570
|
-
import { fileURLToPath
|
|
17684
|
+
import path20 from "node:path";
|
|
17685
|
+
import { fileURLToPath } from "node:url";
|
|
17571
17686
|
var TemplateManager = class {
|
|
17572
17687
|
static getGithubTemplates() {
|
|
17573
|
-
return this.getTemplatesFromDir("github");
|
|
17688
|
+
return this.getTemplatesFromDir(".github");
|
|
17574
17689
|
}
|
|
17575
17690
|
static getAgentvTemplates() {
|
|
17576
|
-
return this.getTemplatesFromDir("agentv");
|
|
17691
|
+
return this.getTemplatesFromDir(".agentv");
|
|
17692
|
+
}
|
|
17693
|
+
static getClaudeTemplates() {
|
|
17694
|
+
return this.getTemplatesFromDir(".claude");
|
|
17577
17695
|
}
|
|
17578
17696
|
static getTemplatesFromDir(subdir) {
|
|
17579
|
-
const currentDir =
|
|
17697
|
+
const currentDir = path20.dirname(fileURLToPath(import.meta.url));
|
|
17580
17698
|
let templatesDir;
|
|
17581
|
-
if (currentDir.includes(
|
|
17582
|
-
templatesDir =
|
|
17699
|
+
if (currentDir.includes(path20.sep + "dist")) {
|
|
17700
|
+
templatesDir = path20.join(currentDir, "templates", subdir);
|
|
17583
17701
|
} else {
|
|
17584
|
-
templatesDir =
|
|
17702
|
+
templatesDir = path20.join(currentDir, subdir);
|
|
17585
17703
|
}
|
|
17586
17704
|
return this.readTemplatesRecursively(templatesDir, "");
|
|
17587
17705
|
}
|
|
@@ -17589,15 +17707,15 @@ var TemplateManager = class {
|
|
|
17589
17707
|
const templates = [];
|
|
17590
17708
|
const entries = readdirSync(dir);
|
|
17591
17709
|
for (const entry of entries) {
|
|
17592
|
-
const fullPath =
|
|
17710
|
+
const fullPath = path20.join(dir, entry);
|
|
17593
17711
|
const stat5 = statSync(fullPath);
|
|
17594
|
-
const entryRelativePath = relativePath ?
|
|
17712
|
+
const entryRelativePath = relativePath ? path20.join(relativePath, entry) : entry;
|
|
17595
17713
|
if (stat5.isDirectory()) {
|
|
17596
17714
|
templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
|
|
17597
17715
|
} else {
|
|
17598
17716
|
const content = readFileSync(fullPath, "utf-8");
|
|
17599
17717
|
templates.push({
|
|
17600
|
-
path: entryRelativePath.split(
|
|
17718
|
+
path: entryRelativePath.split(path20.sep).join("/"),
|
|
17601
17719
|
// Normalize to forward slashes
|
|
17602
17720
|
content
|
|
17603
17721
|
});
|
|
@@ -17621,25 +17739,35 @@ async function promptYesNo(message) {
|
|
|
17621
17739
|
}
|
|
17622
17740
|
}
|
|
17623
17741
|
async function initCommand(options = {}) {
|
|
17624
|
-
const targetPath =
|
|
17625
|
-
const githubDir =
|
|
17626
|
-
const agentvDir =
|
|
17742
|
+
const targetPath = path21.resolve(options.targetPath ?? ".");
|
|
17743
|
+
const githubDir = path21.join(targetPath, ".github");
|
|
17744
|
+
const agentvDir = path21.join(targetPath, ".agentv");
|
|
17745
|
+
const claudeDir = path21.join(targetPath, ".claude");
|
|
17627
17746
|
const githubTemplates = TemplateManager.getGithubTemplates();
|
|
17628
17747
|
const agentvTemplates = TemplateManager.getAgentvTemplates();
|
|
17748
|
+
const claudeTemplates = TemplateManager.getClaudeTemplates();
|
|
17629
17749
|
const existingFiles = [];
|
|
17630
17750
|
if (existsSync(githubDir)) {
|
|
17631
17751
|
for (const template of githubTemplates) {
|
|
17632
|
-
const targetFilePath =
|
|
17752
|
+
const targetFilePath = path21.join(githubDir, template.path);
|
|
17633
17753
|
if (existsSync(targetFilePath)) {
|
|
17634
|
-
existingFiles.push(
|
|
17754
|
+
existingFiles.push(path21.relative(targetPath, targetFilePath));
|
|
17635
17755
|
}
|
|
17636
17756
|
}
|
|
17637
17757
|
}
|
|
17638
17758
|
if (existsSync(agentvDir)) {
|
|
17639
17759
|
for (const template of agentvTemplates) {
|
|
17640
|
-
const targetFilePath =
|
|
17760
|
+
const targetFilePath = path21.join(agentvDir, template.path);
|
|
17761
|
+
if (existsSync(targetFilePath)) {
|
|
17762
|
+
existingFiles.push(path21.relative(targetPath, targetFilePath));
|
|
17763
|
+
}
|
|
17764
|
+
}
|
|
17765
|
+
}
|
|
17766
|
+
if (existsSync(claudeDir)) {
|
|
17767
|
+
for (const template of claudeTemplates) {
|
|
17768
|
+
const targetFilePath = path21.join(claudeDir, template.path);
|
|
17641
17769
|
if (existsSync(targetFilePath)) {
|
|
17642
|
-
existingFiles.push(
|
|
17770
|
+
existingFiles.push(path21.relative(targetPath, targetFilePath));
|
|
17643
17771
|
}
|
|
17644
17772
|
}
|
|
17645
17773
|
}
|
|
@@ -17660,31 +17788,46 @@ async function initCommand(options = {}) {
|
|
|
17660
17788
|
if (!existsSync(agentvDir)) {
|
|
17661
17789
|
mkdirSync(agentvDir, { recursive: true });
|
|
17662
17790
|
}
|
|
17791
|
+
if (!existsSync(claudeDir)) {
|
|
17792
|
+
mkdirSync(claudeDir, { recursive: true });
|
|
17793
|
+
}
|
|
17663
17794
|
for (const template of githubTemplates) {
|
|
17664
|
-
const targetFilePath =
|
|
17665
|
-
const targetDirPath =
|
|
17795
|
+
const targetFilePath = path21.join(githubDir, template.path);
|
|
17796
|
+
const targetDirPath = path21.dirname(targetFilePath);
|
|
17666
17797
|
if (!existsSync(targetDirPath)) {
|
|
17667
17798
|
mkdirSync(targetDirPath, { recursive: true });
|
|
17668
17799
|
}
|
|
17669
17800
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
17670
|
-
console.log(`Created ${
|
|
17801
|
+
console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
|
|
17671
17802
|
}
|
|
17672
17803
|
for (const template of agentvTemplates) {
|
|
17673
|
-
const targetFilePath =
|
|
17674
|
-
const targetDirPath =
|
|
17804
|
+
const targetFilePath = path21.join(agentvDir, template.path);
|
|
17805
|
+
const targetDirPath = path21.dirname(targetFilePath);
|
|
17806
|
+
if (!existsSync(targetDirPath)) {
|
|
17807
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
17808
|
+
}
|
|
17809
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
17810
|
+
console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
|
|
17811
|
+
}
|
|
17812
|
+
for (const template of claudeTemplates) {
|
|
17813
|
+
const targetFilePath = path21.join(claudeDir, template.path);
|
|
17814
|
+
const targetDirPath = path21.dirname(targetFilePath);
|
|
17675
17815
|
if (!existsSync(targetDirPath)) {
|
|
17676
17816
|
mkdirSync(targetDirPath, { recursive: true });
|
|
17677
17817
|
}
|
|
17678
17818
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
17679
|
-
console.log(`Created ${
|
|
17819
|
+
console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
|
|
17680
17820
|
}
|
|
17681
17821
|
console.log("\nAgentV initialized successfully!");
|
|
17682
17822
|
console.log(`
|
|
17683
|
-
Files installed to ${
|
|
17823
|
+
Files installed to ${path21.relative(targetPath, githubDir)}:`);
|
|
17684
17824
|
githubTemplates.forEach((t) => console.log(` - ${t.path}`));
|
|
17685
17825
|
console.log(`
|
|
17686
|
-
Files installed to ${
|
|
17826
|
+
Files installed to ${path21.relative(targetPath, agentvDir)}:`);
|
|
17687
17827
|
agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
|
|
17828
|
+
console.log(`
|
|
17829
|
+
Files installed to ${path21.relative(targetPath, claudeDir)}:`);
|
|
17830
|
+
claudeTemplates.forEach((t) => console.log(` - ${t.path}`));
|
|
17688
17831
|
console.log("\nYou can now:");
|
|
17689
17832
|
console.log(" 1. Edit .agentv/.env with your API credentials");
|
|
17690
17833
|
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
@@ -17702,11 +17845,11 @@ function registerStatusCommand(program) {
|
|
|
17702
17845
|
|
|
17703
17846
|
// src/commands/validate/format-output.ts
|
|
17704
17847
|
var ANSI_RED2 = "\x1B[31m";
|
|
17705
|
-
var
|
|
17848
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
17706
17849
|
var ANSI_GREEN = "\x1B[32m";
|
|
17707
17850
|
var ANSI_CYAN = "\x1B[36m";
|
|
17708
17851
|
var ANSI_BOLD = "\x1B[1m";
|
|
17709
|
-
var
|
|
17852
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
17710
17853
|
function formatSummary(summary, useColors) {
|
|
17711
17854
|
const lines = [];
|
|
17712
17855
|
lines.push("");
|
|
@@ -17722,7 +17865,7 @@ function formatSummary(summary, useColors) {
|
|
|
17722
17865
|
}
|
|
17723
17866
|
function formatHeader(text, useColors) {
|
|
17724
17867
|
if (useColors) {
|
|
17725
|
-
return `${ANSI_BOLD}${ANSI_CYAN}${text}${
|
|
17868
|
+
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET7}`;
|
|
17726
17869
|
}
|
|
17727
17870
|
return text;
|
|
17728
17871
|
}
|
|
@@ -17730,7 +17873,7 @@ function formatFileResult(result, useColors) {
|
|
|
17730
17873
|
const lines = [];
|
|
17731
17874
|
const status = result.valid ? "\u2713" : "\u2717";
|
|
17732
17875
|
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
|
|
17733
|
-
const statusText = useColors ? `${statusColor}${status}${
|
|
17876
|
+
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET7}` : status;
|
|
17734
17877
|
const fileName = result.filePath;
|
|
17735
17878
|
lines.push(`${statusText} ${fileName}`);
|
|
17736
17879
|
if (result.errors.length > 0) {
|
|
@@ -17742,8 +17885,8 @@ function formatFileResult(result, useColors) {
|
|
|
17742
17885
|
}
|
|
17743
17886
|
function formatError(error, useColors) {
|
|
17744
17887
|
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
17745
|
-
const color = error.severity === "error" ? ANSI_RED2 :
|
|
17746
|
-
const coloredPrefix = useColors ? `${color}${prefix}${
|
|
17888
|
+
const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW7;
|
|
17889
|
+
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET7}` : prefix;
|
|
17747
17890
|
const location = error.location ? ` [${error.location}]` : "";
|
|
17748
17891
|
return `${coloredPrefix}${location} ${error.message}`;
|
|
17749
17892
|
}
|
|
@@ -17756,15 +17899,15 @@ function formatStats(summary, useColors) {
|
|
|
17756
17899
|
(r) => r.errors.some((e) => e.severity === "warning")
|
|
17757
17900
|
).length;
|
|
17758
17901
|
if (useColors) {
|
|
17759
|
-
lines.push(`${ANSI_BOLD}${totalText}${
|
|
17760
|
-
lines.push(`${ANSI_GREEN}${validText}${
|
|
17902
|
+
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET7}`);
|
|
17903
|
+
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET7}`);
|
|
17761
17904
|
if (summary.invalidFiles > 0) {
|
|
17762
|
-
lines.push(`${ANSI_RED2}${invalidText}${
|
|
17905
|
+
lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET7}`);
|
|
17763
17906
|
} else {
|
|
17764
17907
|
lines.push(invalidText);
|
|
17765
17908
|
}
|
|
17766
17909
|
if (filesWithWarnings > 0) {
|
|
17767
|
-
lines.push(`${
|
|
17910
|
+
lines.push(`${ANSI_YELLOW7}Files with warnings: ${filesWithWarnings}${ANSI_RESET7}`);
|
|
17768
17911
|
}
|
|
17769
17912
|
} else {
|
|
17770
17913
|
lines.push(totalText);
|
|
@@ -17783,7 +17926,7 @@ function isTTY2() {
|
|
|
17783
17926
|
// src/commands/validate/validate-files.ts
|
|
17784
17927
|
import { constants as constants7 } from "node:fs";
|
|
17785
17928
|
import { access as access7, readdir as readdir3, stat as stat4 } from "node:fs/promises";
|
|
17786
|
-
import
|
|
17929
|
+
import path24 from "node:path";
|
|
17787
17930
|
async function validateFiles(paths) {
|
|
17788
17931
|
const filePaths = await expandPaths(paths);
|
|
17789
17932
|
const results = [];
|
|
@@ -17801,7 +17944,7 @@ async function validateFiles(paths) {
|
|
|
17801
17944
|
};
|
|
17802
17945
|
}
|
|
17803
17946
|
async function validateSingleFile(filePath) {
|
|
17804
|
-
const absolutePath =
|
|
17947
|
+
const absolutePath = path24.resolve(filePath);
|
|
17805
17948
|
const fileType = await detectFileType(absolutePath);
|
|
17806
17949
|
if (fileType === "unknown") {
|
|
17807
17950
|
return {
|
|
@@ -17840,7 +17983,7 @@ async function validateSingleFile(filePath) {
|
|
|
17840
17983
|
async function expandPaths(paths) {
|
|
17841
17984
|
const expanded = [];
|
|
17842
17985
|
for (const inputPath of paths) {
|
|
17843
|
-
const absolutePath =
|
|
17986
|
+
const absolutePath = path24.resolve(inputPath);
|
|
17844
17987
|
try {
|
|
17845
17988
|
await access7(absolutePath, constants7.F_OK);
|
|
17846
17989
|
} catch {
|
|
@@ -17864,7 +18007,7 @@ async function findYamlFiles(dirPath) {
|
|
|
17864
18007
|
try {
|
|
17865
18008
|
const entries = await readdir3(dirPath, { withFileTypes: true });
|
|
17866
18009
|
for (const entry of entries) {
|
|
17867
|
-
const fullPath =
|
|
18010
|
+
const fullPath = path24.join(dirPath, entry.name);
|
|
17868
18011
|
if (entry.isDirectory()) {
|
|
17869
18012
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
17870
18013
|
continue;
|
|
@@ -17881,7 +18024,7 @@ async function findYamlFiles(dirPath) {
|
|
|
17881
18024
|
return results;
|
|
17882
18025
|
}
|
|
17883
18026
|
function isYamlFile(filePath) {
|
|
17884
|
-
const ext =
|
|
18027
|
+
const ext = path24.extname(filePath).toLowerCase();
|
|
17885
18028
|
return ext === ".yaml" || ext === ".yml";
|
|
17886
18029
|
}
|
|
17887
18030
|
|
|
@@ -17938,4 +18081,4 @@ export {
|
|
|
17938
18081
|
createProgram,
|
|
17939
18082
|
runCli
|
|
17940
18083
|
};
|
|
17941
|
-
//# sourceMappingURL=chunk-
|
|
18084
|
+
//# sourceMappingURL=chunk-WMO5PVPX.js.map
|