agentv 0.2.3 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -42
- package/dist/{chunk-S3RN2GSO.js → chunk-RLBRJX7V.js} +611 -428
- package/dist/chunk-RLBRJX7V.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/config-schema.json +27 -0
- package/dist/templates/eval-build.prompt.md +3 -3
- package/dist/templates/eval-schema.json +3 -3
- package/package.json +3 -2
- package/dist/chunk-S3RN2GSO.js.map +0 -1
|
@@ -585,7 +585,7 @@ var require_utc = __commonJS({
|
|
|
585
585
|
import { Command } from "commander";
|
|
586
586
|
import { readFileSync as readFileSync2 } from "node:fs";
|
|
587
587
|
|
|
588
|
-
// ../../packages/core/dist/chunk-
|
|
588
|
+
// ../../packages/core/dist/chunk-XXNQA4EW.js
|
|
589
589
|
import { constants } from "node:fs";
|
|
590
590
|
import { access } from "node:fs/promises";
|
|
591
591
|
import path from "node:path";
|
|
@@ -613,6 +613,30 @@ async function findGitRoot(startPath) {
|
|
|
613
613
|
}
|
|
614
614
|
return null;
|
|
615
615
|
}
|
|
616
|
+
function buildDirectoryChain(filePath, repoRoot) {
|
|
617
|
+
const directories = [];
|
|
618
|
+
const seen = /* @__PURE__ */ new Set();
|
|
619
|
+
const boundary = path.resolve(repoRoot);
|
|
620
|
+
let current = path.resolve(path.dirname(filePath));
|
|
621
|
+
while (current !== void 0) {
|
|
622
|
+
if (!seen.has(current)) {
|
|
623
|
+
directories.push(current);
|
|
624
|
+
seen.add(current);
|
|
625
|
+
}
|
|
626
|
+
if (current === boundary) {
|
|
627
|
+
break;
|
|
628
|
+
}
|
|
629
|
+
const parent = path.dirname(current);
|
|
630
|
+
if (parent === current) {
|
|
631
|
+
break;
|
|
632
|
+
}
|
|
633
|
+
current = parent;
|
|
634
|
+
}
|
|
635
|
+
if (!seen.has(boundary)) {
|
|
636
|
+
directories.push(boundary);
|
|
637
|
+
}
|
|
638
|
+
return directories;
|
|
639
|
+
}
|
|
616
640
|
function buildSearchRoots(evalPath, repoRoot) {
|
|
617
641
|
const uniqueRoots = [];
|
|
618
642
|
const addRoot = (root2) => {
|
|
@@ -664,8 +688,32 @@ async function resolveFileReference(rawValue, searchRoots) {
|
|
|
664
688
|
}
|
|
665
689
|
return { displayPath, attempted };
|
|
666
690
|
}
|
|
691
|
+
var KNOWN_PROVIDERS = [
|
|
692
|
+
"azure",
|
|
693
|
+
"anthropic",
|
|
694
|
+
"gemini",
|
|
695
|
+
"mock",
|
|
696
|
+
"vscode",
|
|
697
|
+
"vscode-insiders"
|
|
698
|
+
];
|
|
699
|
+
var PROVIDER_ALIASES = [
|
|
700
|
+
"azure-openai",
|
|
701
|
+
// alias for "azure"
|
|
702
|
+
"google",
|
|
703
|
+
// alias for "gemini"
|
|
704
|
+
"google-gemini",
|
|
705
|
+
// alias for "gemini"
|
|
706
|
+
"openai",
|
|
707
|
+
// legacy/future support
|
|
708
|
+
"bedrock",
|
|
709
|
+
// legacy/future support
|
|
710
|
+
"vertex"
|
|
711
|
+
// legacy/future support
|
|
712
|
+
];
|
|
713
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
|
|
667
714
|
|
|
668
715
|
// ../../packages/core/dist/index.js
|
|
716
|
+
import micromatch from "micromatch";
|
|
669
717
|
import { constants as constants3 } from "node:fs";
|
|
670
718
|
import { access as access3, readFile as readFile2 } from "node:fs/promises";
|
|
671
719
|
import path7 from "node:path";
|
|
@@ -9025,17 +9073,16 @@ var coerce = {
|
|
|
9025
9073
|
var NEVER = INVALID;
|
|
9026
9074
|
|
|
9027
9075
|
// ../../packages/core/dist/index.js
|
|
9028
|
-
import {
|
|
9029
|
-
import { tmpdir } from "node:os";
|
|
9076
|
+
import { readFile as readFile22 } from "node:fs/promises";
|
|
9030
9077
|
import path22 from "node:path";
|
|
9031
9078
|
|
|
9032
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9079
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
9033
9080
|
import { exec, spawn } from "child_process";
|
|
9034
9081
|
import { copyFile, mkdir as mkdir2, readdir as readdir2, readFile, stat as stat2, writeFile } from "fs/promises";
|
|
9035
9082
|
import path5 from "path";
|
|
9036
9083
|
import { promisify } from "util";
|
|
9037
9084
|
|
|
9038
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9085
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/constants.js
|
|
9039
9086
|
import os from "os";
|
|
9040
9087
|
import path2 from "path";
|
|
9041
9088
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
@@ -9047,7 +9094,7 @@ var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
|
9047
9094
|
var DEFAULT_WAKEUP_FILENAME = "wakeup.chatmode.md";
|
|
9048
9095
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
9049
9096
|
|
|
9050
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9097
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/fs.js
|
|
9051
9098
|
import { constants as constants2 } from "fs";
|
|
9052
9099
|
import { access as access2, mkdir, readdir, rm, stat } from "fs/promises";
|
|
9053
9100
|
import path3 from "path";
|
|
@@ -9080,14 +9127,14 @@ async function removeIfExists(target) {
|
|
|
9080
9127
|
}
|
|
9081
9128
|
}
|
|
9082
9129
|
|
|
9083
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9130
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/time.js
|
|
9084
9131
|
function sleep(ms2) {
|
|
9085
9132
|
return new Promise((resolve) => {
|
|
9086
9133
|
setTimeout(resolve, ms2);
|
|
9087
9134
|
});
|
|
9088
9135
|
}
|
|
9089
9136
|
|
|
9090
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9137
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/workspace.js
|
|
9091
9138
|
import path4 from "path";
|
|
9092
9139
|
|
|
9093
9140
|
// ../../node_modules/.pnpm/json5@2.2.3/node_modules/json5/dist/index.mjs
|
|
@@ -10175,7 +10222,7 @@ var JSON5 = {
|
|
|
10175
10222
|
var lib = JSON5;
|
|
10176
10223
|
var dist_default = lib;
|
|
10177
10224
|
|
|
10178
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10225
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/workspace.js
|
|
10179
10226
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
10180
10227
|
let workspace;
|
|
10181
10228
|
try {
|
|
@@ -10248,7 +10295,7 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10248
10295
|
return JSON.stringify(transformedWorkspace, null, 2);
|
|
10249
10296
|
}
|
|
10250
10297
|
|
|
10251
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10298
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
10252
10299
|
var execAsync = promisify(exec);
|
|
10253
10300
|
var DEFAULT_WORKSPACE_TEMPLATE = {
|
|
10254
10301
|
folders: [
|
|
@@ -10430,9 +10477,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
10430
10477
|
return 0;
|
|
10431
10478
|
}
|
|
10432
10479
|
function createRequestPrompt(userQuery, responseFileTmp, responseFileFinal, subagentName, vscodeCmd) {
|
|
10433
|
-
const escapedUserQuery = userQuery.replace(/`/g, "\\`");
|
|
10434
10480
|
return `[[ ## task ## ]]
|
|
10435
|
-
|
|
10481
|
+
|
|
10482
|
+
${userQuery}
|
|
10436
10483
|
|
|
10437
10484
|
[[ ## system_instructions ## ]]
|
|
10438
10485
|
|
|
@@ -10589,7 +10636,7 @@ async function dispatchAgentSession(options) {
|
|
|
10589
10636
|
}
|
|
10590
10637
|
}
|
|
10591
10638
|
|
|
10592
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10639
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/provision.js
|
|
10593
10640
|
import { writeFile as writeFile2 } from "fs/promises";
|
|
10594
10641
|
import path6 from "path";
|
|
10595
10642
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
@@ -10762,9 +10809,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
|
|
|
10762
10809
|
var ANSI_YELLOW = "\x1B[33m";
|
|
10763
10810
|
var ANSI_RESET = "\x1B[0m";
|
|
10764
10811
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
10765
|
-
|
|
10812
|
+
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
10813
|
+
async function loadConfig(evalFilePath, repoRoot) {
|
|
10814
|
+
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
10815
|
+
for (const directory of directories) {
|
|
10816
|
+
const configPath = path7.join(directory, ".agentv", "config.yaml");
|
|
10817
|
+
if (!await fileExists2(configPath)) {
|
|
10818
|
+
continue;
|
|
10819
|
+
}
|
|
10820
|
+
try {
|
|
10821
|
+
const rawConfig = await readFile2(configPath, "utf8");
|
|
10822
|
+
const parsed = parse3(rawConfig);
|
|
10823
|
+
if (!isJsonObject(parsed)) {
|
|
10824
|
+
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
10825
|
+
continue;
|
|
10826
|
+
}
|
|
10827
|
+
const config = parsed;
|
|
10828
|
+
const schema = config.$schema;
|
|
10829
|
+
if (schema !== SCHEMA_CONFIG_V2) {
|
|
10830
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
|
|
10831
|
+
Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
|
|
10832
|
+
logWarning(message);
|
|
10833
|
+
continue;
|
|
10834
|
+
}
|
|
10835
|
+
const guidelinePatterns = config.guideline_patterns;
|
|
10836
|
+
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
10837
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
10838
|
+
continue;
|
|
10839
|
+
}
|
|
10840
|
+
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
10841
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
10842
|
+
continue;
|
|
10843
|
+
}
|
|
10844
|
+
return {
|
|
10845
|
+
guideline_patterns: guidelinePatterns
|
|
10846
|
+
};
|
|
10847
|
+
} catch (error) {
|
|
10848
|
+
logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
|
|
10849
|
+
continue;
|
|
10850
|
+
}
|
|
10851
|
+
}
|
|
10852
|
+
return null;
|
|
10853
|
+
}
|
|
10854
|
+
function isGuidelineFile(filePath, patterns) {
|
|
10766
10855
|
const normalized = filePath.split("\\").join("/");
|
|
10767
|
-
|
|
10856
|
+
const patternsToUse = patterns ?? [];
|
|
10857
|
+
return micromatch.isMatch(normalized, patternsToUse);
|
|
10768
10858
|
}
|
|
10769
10859
|
function extractCodeBlocks(segments) {
|
|
10770
10860
|
const codeBlocks = [];
|
|
@@ -10784,43 +10874,45 @@ function extractCodeBlocks(segments) {
|
|
|
10784
10874
|
}
|
|
10785
10875
|
return codeBlocks;
|
|
10786
10876
|
}
|
|
10787
|
-
async function
|
|
10877
|
+
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
10788
10878
|
const verbose = options?.verbose ?? false;
|
|
10789
|
-
const absoluteTestPath = path7.resolve(
|
|
10879
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
10790
10880
|
if (!await fileExists2(absoluteTestPath)) {
|
|
10791
|
-
throw new Error(`Test file not found: ${
|
|
10881
|
+
throw new Error(`Test file not found: ${evalFilePath}`);
|
|
10792
10882
|
}
|
|
10793
10883
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
10794
10884
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
10885
|
+
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
10886
|
+
const guidelinePatterns = config?.guideline_patterns;
|
|
10795
10887
|
const rawFile = await readFile2(absoluteTestPath, "utf8");
|
|
10796
10888
|
const parsed = parse3(rawFile);
|
|
10797
10889
|
if (!isJsonObject(parsed)) {
|
|
10798
|
-
throw new Error(`Invalid test file format: ${
|
|
10890
|
+
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
10799
10891
|
}
|
|
10800
10892
|
const suite = parsed;
|
|
10801
10893
|
const schema = suite.$schema;
|
|
10802
10894
|
if (schema !== SCHEMA_EVAL_V2) {
|
|
10803
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${
|
|
10895
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
|
|
10804
10896
|
Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
10805
10897
|
throw new Error(message);
|
|
10806
10898
|
}
|
|
10807
10899
|
const rawTestcases = suite.evalcases;
|
|
10808
10900
|
if (!Array.isArray(rawTestcases)) {
|
|
10809
|
-
throw new Error(`Invalid test file format: ${
|
|
10901
|
+
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
10810
10902
|
}
|
|
10811
10903
|
const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
|
|
10812
10904
|
const results = [];
|
|
10813
|
-
for (const
|
|
10814
|
-
if (!isJsonObject(
|
|
10905
|
+
for (const rawEvalcase of rawTestcases) {
|
|
10906
|
+
if (!isJsonObject(rawEvalcase)) {
|
|
10815
10907
|
logWarning("Skipping invalid test case entry (expected object)");
|
|
10816
10908
|
continue;
|
|
10817
10909
|
}
|
|
10818
|
-
const
|
|
10819
|
-
const id = asString(
|
|
10820
|
-
const conversationId = asString(
|
|
10821
|
-
const outcome = asString(
|
|
10822
|
-
const inputMessagesValue =
|
|
10823
|
-
const expectedMessagesValue =
|
|
10910
|
+
const evalcase = rawEvalcase;
|
|
10911
|
+
const id = asString(evalcase.id);
|
|
10912
|
+
const conversationId = asString(evalcase.conversation_id);
|
|
10913
|
+
const outcome = asString(evalcase.outcome);
|
|
10914
|
+
const inputMessagesValue = evalcase.input_messages;
|
|
10915
|
+
const expectedMessagesValue = evalcase.expected_messages;
|
|
10824
10916
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
10825
10917
|
logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
|
|
10826
10918
|
continue;
|
|
@@ -10833,6 +10925,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10833
10925
|
const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
|
|
10834
10926
|
const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
|
|
10835
10927
|
const userMessages = inputMessages.filter((message) => message.role === "user");
|
|
10928
|
+
const systemMessages = inputMessages.filter((message) => message.role === "system");
|
|
10836
10929
|
if (assistantMessages.length === 0) {
|
|
10837
10930
|
logWarning(`No assistant message found for test case: ${id}`);
|
|
10838
10931
|
continue;
|
|
@@ -10840,6 +10933,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10840
10933
|
if (assistantMessages.length > 1) {
|
|
10841
10934
|
logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
|
|
10842
10935
|
}
|
|
10936
|
+
if (systemMessages.length > 1) {
|
|
10937
|
+
logWarning(`Multiple system messages found for test case: ${id}, using first`);
|
|
10938
|
+
}
|
|
10939
|
+
let systemMessageContent;
|
|
10940
|
+
if (systemMessages.length > 0) {
|
|
10941
|
+
const content = systemMessages[0]?.content;
|
|
10942
|
+
if (typeof content === "string") {
|
|
10943
|
+
systemMessageContent = content;
|
|
10944
|
+
} else if (Array.isArray(content)) {
|
|
10945
|
+
const textParts = [];
|
|
10946
|
+
for (const segment of content) {
|
|
10947
|
+
if (isJsonObject(segment)) {
|
|
10948
|
+
const value = segment.value;
|
|
10949
|
+
if (typeof value === "string") {
|
|
10950
|
+
textParts.push(value);
|
|
10951
|
+
}
|
|
10952
|
+
}
|
|
10953
|
+
}
|
|
10954
|
+
if (textParts.length > 0) {
|
|
10955
|
+
systemMessageContent = textParts.join("\n\n");
|
|
10956
|
+
}
|
|
10957
|
+
}
|
|
10958
|
+
}
|
|
10843
10959
|
const userSegments = [];
|
|
10844
10960
|
const guidelinePaths = [];
|
|
10845
10961
|
const userTextParts = [];
|
|
@@ -10871,7 +10987,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10871
10987
|
}
|
|
10872
10988
|
try {
|
|
10873
10989
|
const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10874
|
-
|
|
10990
|
+
const relativeToRepo = path7.relative(repoRootPath, resolvedPath);
|
|
10991
|
+
if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
|
|
10875
10992
|
guidelinePaths.push(path7.resolve(resolvedPath));
|
|
10876
10993
|
if (verbose) {
|
|
10877
10994
|
console.log(` [Guideline] Found: ${displayPath}`);
|
|
@@ -10881,7 +10998,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10881
10998
|
userSegments.push({
|
|
10882
10999
|
type: "file",
|
|
10883
11000
|
path: displayPath,
|
|
10884
|
-
text: fileContent
|
|
11001
|
+
text: fileContent,
|
|
11002
|
+
resolvedPath: path7.resolve(resolvedPath)
|
|
10885
11003
|
});
|
|
10886
11004
|
if (verbose) {
|
|
10887
11005
|
console.log(` [File] Found: ${displayPath}`);
|
|
@@ -10903,16 +11021,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10903
11021
|
}
|
|
10904
11022
|
const codeSnippets = extractCodeBlocks(userSegments);
|
|
10905
11023
|
const assistantContent = assistantMessages[0]?.content;
|
|
10906
|
-
const expectedAssistantRaw =
|
|
11024
|
+
const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
|
|
10907
11025
|
const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
10908
|
-
const testCaseGrader = coerceGrader(
|
|
11026
|
+
const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
|
|
11027
|
+
const userFilePaths = [];
|
|
11028
|
+
for (const segment of userSegments) {
|
|
11029
|
+
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
11030
|
+
userFilePaths.push(segment.resolvedPath);
|
|
11031
|
+
}
|
|
11032
|
+
}
|
|
11033
|
+
const allFilePaths = [
|
|
11034
|
+
...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
11035
|
+
...userFilePaths
|
|
11036
|
+
];
|
|
10909
11037
|
const testCase = {
|
|
10910
11038
|
id,
|
|
10911
11039
|
conversation_id: conversationId,
|
|
10912
11040
|
task: userTextPrompt,
|
|
10913
11041
|
user_segments: userSegments,
|
|
11042
|
+
system_message: systemMessageContent,
|
|
10914
11043
|
expected_assistant_raw: expectedAssistantRaw,
|
|
10915
11044
|
guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
11045
|
+
guideline_patterns: guidelinePatterns,
|
|
11046
|
+
file_paths: allFilePaths,
|
|
10916
11047
|
code_snippets: codeSnippets,
|
|
10917
11048
|
outcome,
|
|
10918
11049
|
grader: testCaseGrader
|
|
@@ -10978,7 +11109,7 @@ ${body}`);
|
|
|
10978
11109
|
}
|
|
10979
11110
|
const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
10980
11111
|
const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
10981
|
-
return { request, guidelines };
|
|
11112
|
+
return { request, guidelines, systemMessage: testCase.system_message };
|
|
10982
11113
|
}
|
|
10983
11114
|
async function fileExists2(absolutePath) {
|
|
10984
11115
|
try {
|
|
@@ -11019,7 +11150,7 @@ function cloneJsonValue(value) {
|
|
|
11019
11150
|
}
|
|
11020
11151
|
return cloneJsonObject(value);
|
|
11021
11152
|
}
|
|
11022
|
-
function
|
|
11153
|
+
async function resolveAssistantContent(content, searchRoots, verbose) {
|
|
11023
11154
|
if (typeof content === "string") {
|
|
11024
11155
|
return content;
|
|
11025
11156
|
}
|
|
@@ -11032,12 +11163,42 @@ function normalizeAssistantContent(content) {
|
|
|
11032
11163
|
parts.push(entry);
|
|
11033
11164
|
continue;
|
|
11034
11165
|
}
|
|
11035
|
-
|
|
11166
|
+
if (!isJsonObject(entry)) {
|
|
11167
|
+
continue;
|
|
11168
|
+
}
|
|
11169
|
+
const segmentType = asString(entry.type);
|
|
11170
|
+
if (segmentType === "file") {
|
|
11171
|
+
const rawValue = asString(entry.value);
|
|
11172
|
+
if (!rawValue) {
|
|
11173
|
+
continue;
|
|
11174
|
+
}
|
|
11175
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference(
|
|
11176
|
+
rawValue,
|
|
11177
|
+
searchRoots
|
|
11178
|
+
);
|
|
11179
|
+
if (!resolvedPath) {
|
|
11180
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
11181
|
+
logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
|
|
11182
|
+
continue;
|
|
11183
|
+
}
|
|
11184
|
+
try {
|
|
11185
|
+
const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
11186
|
+
parts.push(fileContent);
|
|
11187
|
+
if (verbose) {
|
|
11188
|
+
console.log(` [Expected Assistant File] Found: ${displayPath}`);
|
|
11189
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
11190
|
+
}
|
|
11191
|
+
} catch (error) {
|
|
11192
|
+
logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
|
|
11193
|
+
}
|
|
11194
|
+
continue;
|
|
11195
|
+
}
|
|
11196
|
+
const textValue = asString(entry.text);
|
|
11036
11197
|
if (typeof textValue === "string") {
|
|
11037
11198
|
parts.push(textValue);
|
|
11038
11199
|
continue;
|
|
11039
11200
|
}
|
|
11040
|
-
const valueValue = asString(entry
|
|
11201
|
+
const valueValue = asString(entry.value);
|
|
11041
11202
|
if (typeof valueValue === "string") {
|
|
11042
11203
|
parts.push(valueValue);
|
|
11043
11204
|
continue;
|
|
@@ -11071,15 +11232,18 @@ function buildChatPrompt(request) {
|
|
|
11071
11232
|
return request.chatPrompt;
|
|
11072
11233
|
}
|
|
11073
11234
|
const systemSegments = [];
|
|
11074
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11075
|
-
systemSegments.push(`Guidelines:
|
|
11076
|
-
${request.guidelines.trim()}`);
|
|
11077
|
-
}
|
|
11078
11235
|
const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
|
|
11079
11236
|
if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
|
|
11080
11237
|
systemSegments.push(metadataSystemPrompt.trim());
|
|
11238
|
+
} else {
|
|
11239
|
+
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
11081
11240
|
}
|
|
11082
|
-
|
|
11241
|
+
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11242
|
+
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
11243
|
+
|
|
11244
|
+
${request.guidelines.trim()}`);
|
|
11245
|
+
}
|
|
11246
|
+
const systemContent = systemSegments.join("\n\n");
|
|
11083
11247
|
const userContent = request.prompt.trim();
|
|
11084
11248
|
const prompt = [
|
|
11085
11249
|
{
|
|
@@ -11528,7 +11692,6 @@ function resolveOptionalBoolean(source2) {
|
|
|
11528
11692
|
function isLikelyEnvReference(value) {
|
|
11529
11693
|
return /^[A-Z0-9_]+$/.test(value);
|
|
11530
11694
|
}
|
|
11531
|
-
var PROMPT_FILE_PREFIX = "bbeval-vscode-";
|
|
11532
11695
|
var VSCodeProvider = class {
|
|
11533
11696
|
id;
|
|
11534
11697
|
kind;
|
|
@@ -11545,128 +11708,89 @@ var VSCodeProvider = class {
|
|
|
11545
11708
|
throw new Error("VS Code provider request was aborted before dispatch");
|
|
11546
11709
|
}
|
|
11547
11710
|
const attachments = normalizeAttachments(request.attachments);
|
|
11548
|
-
const promptContent = buildPromptDocument(request, attachments);
|
|
11549
|
-
const
|
|
11550
|
-
|
|
11551
|
-
|
|
11552
|
-
|
|
11553
|
-
|
|
11554
|
-
|
|
11555
|
-
|
|
11556
|
-
|
|
11557
|
-
|
|
11558
|
-
|
|
11559
|
-
|
|
11560
|
-
|
|
11561
|
-
|
|
11562
|
-
|
|
11563
|
-
|
|
11564
|
-
|
|
11565
|
-
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
11566
|
-
throw new Error(failure);
|
|
11567
|
-
}
|
|
11568
|
-
if (this.config.dryRun) {
|
|
11569
|
-
return {
|
|
11570
|
-
text: "",
|
|
11571
|
-
raw: {
|
|
11572
|
-
session,
|
|
11573
|
-
promptFile: promptPath,
|
|
11574
|
-
attachments
|
|
11575
|
-
}
|
|
11576
|
-
};
|
|
11577
|
-
}
|
|
11578
|
-
const responseText = await readFile22(session.responseFile, "utf8");
|
|
11711
|
+
const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
|
|
11712
|
+
const session = await dispatchAgentSession({
|
|
11713
|
+
userQuery: promptContent,
|
|
11714
|
+
// Use full prompt content instead of just request.prompt
|
|
11715
|
+
extraAttachments: attachments,
|
|
11716
|
+
wait: this.config.waitForResponse,
|
|
11717
|
+
dryRun: this.config.dryRun,
|
|
11718
|
+
vscodeCmd: this.config.command,
|
|
11719
|
+
subagentRoot: this.config.subagentRoot,
|
|
11720
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
11721
|
+
silent: true
|
|
11722
|
+
});
|
|
11723
|
+
if (session.exitCode !== 0 || !session.responseFile) {
|
|
11724
|
+
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
11725
|
+
throw new Error(failure);
|
|
11726
|
+
}
|
|
11727
|
+
if (this.config.dryRun) {
|
|
11579
11728
|
return {
|
|
11580
|
-
text:
|
|
11729
|
+
text: "",
|
|
11581
11730
|
raw: {
|
|
11582
11731
|
session,
|
|
11583
|
-
promptFile: promptPath,
|
|
11584
11732
|
attachments
|
|
11585
11733
|
}
|
|
11586
11734
|
};
|
|
11587
|
-
} finally {
|
|
11588
|
-
await rm2(directory, { recursive: true, force: true });
|
|
11589
11735
|
}
|
|
11736
|
+
const responseText = await readFile22(session.responseFile, "utf8");
|
|
11737
|
+
return {
|
|
11738
|
+
text: responseText,
|
|
11739
|
+
raw: {
|
|
11740
|
+
session,
|
|
11741
|
+
attachments
|
|
11742
|
+
}
|
|
11743
|
+
};
|
|
11590
11744
|
}
|
|
11591
11745
|
};
|
|
11592
|
-
function buildPromptDocument(request, attachments) {
|
|
11746
|
+
function buildPromptDocument(request, attachments, guidelinePatterns) {
|
|
11593
11747
|
const parts = [];
|
|
11594
|
-
const
|
|
11595
|
-
if (
|
|
11596
|
-
parts.push(buildMandatoryPrereadBlock(
|
|
11597
|
-
}
|
|
11598
|
-
parts.push(`# BbEval Request`);
|
|
11599
|
-
if (request.testCaseId) {
|
|
11600
|
-
parts.push(`- Test Case: ${request.testCaseId}`);
|
|
11601
|
-
}
|
|
11602
|
-
if (request.metadata?.target) {
|
|
11603
|
-
parts.push(`- Target: ${String(request.metadata.target)}`);
|
|
11604
|
-
}
|
|
11605
|
-
parts.push("\n## Task\n", request.prompt.trim());
|
|
11606
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11607
|
-
parts.push("\n## Guidelines\n", request.guidelines.trim());
|
|
11608
|
-
}
|
|
11609
|
-
if (attachments && attachments.length > 0) {
|
|
11610
|
-
const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
|
|
11611
|
-
parts.push("\n## Attachments\n", attachmentList);
|
|
11748
|
+
const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
|
|
11749
|
+
if (guidelineFiles.length > 0) {
|
|
11750
|
+
parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
|
|
11612
11751
|
}
|
|
11752
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
11613
11753
|
return parts.join("\n").trim();
|
|
11614
11754
|
}
|
|
11615
|
-
function buildMandatoryPrereadBlock(
|
|
11616
|
-
if (
|
|
11755
|
+
function buildMandatoryPrereadBlock(guidelineFiles) {
|
|
11756
|
+
if (guidelineFiles.length === 0) {
|
|
11617
11757
|
return "";
|
|
11618
11758
|
}
|
|
11619
11759
|
const fileList = [];
|
|
11620
|
-
const tokenList = [];
|
|
11621
11760
|
let counter = 0;
|
|
11622
|
-
for (const absolutePath of
|
|
11761
|
+
for (const absolutePath of guidelineFiles) {
|
|
11623
11762
|
counter += 1;
|
|
11624
11763
|
const fileName = path22.basename(absolutePath);
|
|
11625
11764
|
const fileUri = pathToFileUri(absolutePath);
|
|
11626
|
-
fileList.push(
|
|
11627
|
-
tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
|
|
11765
|
+
fileList.push(`* [${fileName}](${fileUri})`);
|
|
11628
11766
|
}
|
|
11629
|
-
const filesText = fileList.join("
|
|
11630
|
-
const tokensText = tokenList.join("\n");
|
|
11767
|
+
const filesText = fileList.join("\n");
|
|
11631
11768
|
const instruction = [
|
|
11632
|
-
`Read all
|
|
11633
|
-
|
|
11634
|
-
"`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
|
|
11635
|
-
`Then include, at the top of your reply, these exact tokens on separate lines:
|
|
11769
|
+
`Read all guideline files:
|
|
11770
|
+
${filesText}.
|
|
11636
11771
|
`,
|
|
11637
|
-
tokensText,
|
|
11638
|
-
`
|
|
11639
|
-
Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
|
|
11640
11772
|
`If any file is missing, fail with ERROR: missing-file <filename> and stop.
|
|
11641
11773
|
`,
|
|
11642
|
-
`Then
|
|
11643
|
-
].join("
|
|
11644
|
-
return
|
|
11645
|
-
|
|
11646
|
-
${instruction}
|
|
11647
|
-
|
|
11648
|
-
`;
|
|
11774
|
+
`Then apply system_instructions on the user query below.`
|
|
11775
|
+
].join("");
|
|
11776
|
+
return `${instruction}`;
|
|
11649
11777
|
}
|
|
11650
|
-
function
|
|
11778
|
+
function collectGuidelineFiles(attachments, guidelinePatterns) {
|
|
11651
11779
|
if (!attachments || attachments.length === 0) {
|
|
11652
11780
|
return [];
|
|
11653
11781
|
}
|
|
11654
11782
|
const unique = /* @__PURE__ */ new Map();
|
|
11655
11783
|
for (const attachment of attachments) {
|
|
11656
|
-
if (!isInstructionPath(attachment)) {
|
|
11657
|
-
continue;
|
|
11658
|
-
}
|
|
11659
11784
|
const absolutePath = path22.resolve(attachment);
|
|
11660
|
-
|
|
11661
|
-
|
|
11785
|
+
const normalized = absolutePath.split(path22.sep).join("/");
|
|
11786
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
11787
|
+
if (!unique.has(absolutePath)) {
|
|
11788
|
+
unique.set(absolutePath, absolutePath);
|
|
11789
|
+
}
|
|
11662
11790
|
}
|
|
11663
11791
|
}
|
|
11664
11792
|
return Array.from(unique.values());
|
|
11665
11793
|
}
|
|
11666
|
-
function isInstructionPath(filePath) {
|
|
11667
|
-
const normalized = filePath.split(path22.sep).join("/");
|
|
11668
|
-
return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
|
|
11669
|
-
}
|
|
11670
11794
|
function pathToFileUri(filePath) {
|
|
11671
11795
|
const absolutePath = path22.isAbsolute(filePath) ? filePath : path22.resolve(filePath);
|
|
11672
11796
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
@@ -11675,14 +11799,6 @@ function pathToFileUri(filePath) {
|
|
|
11675
11799
|
}
|
|
11676
11800
|
return `file://${normalizedPath}`;
|
|
11677
11801
|
}
|
|
11678
|
-
function composeUserQuery(request) {
|
|
11679
|
-
const segments = [];
|
|
11680
|
-
segments.push(request.prompt.trim());
|
|
11681
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11682
|
-
segments.push("\nGuidelines:\n", request.guidelines.trim());
|
|
11683
|
-
}
|
|
11684
|
-
return segments.join("\n").trim();
|
|
11685
|
-
}
|
|
11686
11802
|
function normalizeAttachments(attachments) {
|
|
11687
11803
|
if (!attachments || attachments.length === 0) {
|
|
11688
11804
|
return void 0;
|
|
@@ -11734,18 +11850,24 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
11734
11850
|
function isRecord(value) {
|
|
11735
11851
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
11736
11852
|
}
|
|
11737
|
-
function
|
|
11738
|
-
const
|
|
11739
|
-
if (
|
|
11853
|
+
function checkSchema(parsed, absolutePath) {
|
|
11854
|
+
const schema = parsed.$schema;
|
|
11855
|
+
if (schema === void 0) {
|
|
11856
|
+
throw new Error(
|
|
11857
|
+
`Missing $schema field in targets.yaml at ${absolutePath}.
|
|
11858
|
+
Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
|
|
11859
|
+
);
|
|
11860
|
+
}
|
|
11861
|
+
if (typeof schema !== "string") {
|
|
11740
11862
|
throw new Error(
|
|
11741
|
-
`
|
|
11742
|
-
|
|
11863
|
+
`Invalid $schema field in targets.yaml at ${absolutePath}.
|
|
11864
|
+
Expected a string value '${TARGETS_SCHEMA_V2}'.`
|
|
11743
11865
|
);
|
|
11744
11866
|
}
|
|
11745
|
-
if (
|
|
11867
|
+
if (schema !== TARGETS_SCHEMA_V2) {
|
|
11746
11868
|
throw new Error(
|
|
11747
|
-
`
|
|
11748
|
-
|
|
11869
|
+
`Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
|
|
11870
|
+
Expected '${TARGETS_SCHEMA_V2}'.`
|
|
11749
11871
|
);
|
|
11750
11872
|
}
|
|
11751
11873
|
}
|
|
@@ -11777,7 +11899,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
11777
11899
|
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
11778
11900
|
};
|
|
11779
11901
|
}
|
|
11780
|
-
async function
|
|
11902
|
+
async function fileExists3(filePath) {
|
|
11781
11903
|
try {
|
|
11782
11904
|
await access22(filePath, constants22.F_OK);
|
|
11783
11905
|
return true;
|
|
@@ -11787,15 +11909,15 @@ async function fileExists22(filePath) {
|
|
|
11787
11909
|
}
|
|
11788
11910
|
async function readTargetDefinitions(filePath) {
|
|
11789
11911
|
const absolutePath = path32.resolve(filePath);
|
|
11790
|
-
if (!await
|
|
11912
|
+
if (!await fileExists3(absolutePath)) {
|
|
11791
11913
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
11792
11914
|
}
|
|
11793
11915
|
const raw = await readFile3(absolutePath, "utf8");
|
|
11794
11916
|
const parsed = parse22(raw);
|
|
11795
11917
|
if (!isRecord(parsed)) {
|
|
11796
|
-
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '
|
|
11918
|
+
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
11797
11919
|
}
|
|
11798
|
-
|
|
11920
|
+
checkSchema(parsed, absolutePath);
|
|
11799
11921
|
const targets = extractTargetsArray(parsed, absolutePath);
|
|
11800
11922
|
const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
|
|
11801
11923
|
return definitions;
|
|
@@ -12002,7 +12124,7 @@ function extractKeyTerms(aspect, maxTerms = 5) {
|
|
|
12002
12124
|
var HeuristicGrader = class {
|
|
12003
12125
|
kind = "heuristic";
|
|
12004
12126
|
grade(context2) {
|
|
12005
|
-
const expectedAspects = extractAspects(context2.
|
|
12127
|
+
const expectedAspects = extractAspects(context2.evalCase.expected_assistant_raw);
|
|
12006
12128
|
const result = scoreCandidateResponse(context2.candidate, expectedAspects);
|
|
12007
12129
|
const misses = [...result.misses];
|
|
12008
12130
|
if (expectedAspects.length === 0 && isErrorLike(context2.candidate)) {
|
|
@@ -12035,14 +12157,14 @@ var QualityGrader = class {
|
|
|
12035
12157
|
if (!judgeProvider) {
|
|
12036
12158
|
throw new Error("No judge provider available for LLM grading");
|
|
12037
12159
|
}
|
|
12038
|
-
const prompt = buildQualityPrompt(context2.
|
|
12160
|
+
const prompt = buildQualityPrompt(context2.evalCase, context2.candidate);
|
|
12039
12161
|
const metadata = {
|
|
12040
12162
|
systemPrompt: QUALITY_SYSTEM_PROMPT
|
|
12041
12163
|
};
|
|
12042
12164
|
const response = await judgeProvider.invoke({
|
|
12043
12165
|
prompt,
|
|
12044
12166
|
metadata,
|
|
12045
|
-
|
|
12167
|
+
evalCaseId: context2.evalCase.id,
|
|
12046
12168
|
attempt: context2.attempt,
|
|
12047
12169
|
maxOutputTokens: this.maxOutputTokens,
|
|
12048
12170
|
temperature: this.temperature
|
|
@@ -12088,16 +12210,16 @@ var QUALITY_SYSTEM_PROMPT = [
|
|
|
12088
12210
|
function buildQualityPrompt(testCase, candidate) {
|
|
12089
12211
|
const parts = [
|
|
12090
12212
|
"[[ ## expected_outcome ## ]]",
|
|
12091
|
-
testCase.outcome,
|
|
12213
|
+
testCase.outcome.trim(),
|
|
12092
12214
|
"",
|
|
12093
12215
|
"[[ ## request ## ]]",
|
|
12094
|
-
testCase.task,
|
|
12216
|
+
testCase.task.trim(),
|
|
12095
12217
|
"",
|
|
12096
12218
|
"[[ ## reference_answer ## ]]",
|
|
12097
|
-
testCase.expected_assistant_raw,
|
|
12219
|
+
testCase.expected_assistant_raw.trim(),
|
|
12098
12220
|
"",
|
|
12099
12221
|
"[[ ## generated_answer ## ]]",
|
|
12100
|
-
candidate,
|
|
12222
|
+
candidate.trim(),
|
|
12101
12223
|
"",
|
|
12102
12224
|
"Respond with a single JSON object matching the schema described in the system prompt."
|
|
12103
12225
|
];
|
|
@@ -12330,17 +12452,17 @@ async function runEvaluation(options) {
|
|
|
12330
12452
|
cache,
|
|
12331
12453
|
useCache,
|
|
12332
12454
|
now,
|
|
12333
|
-
|
|
12455
|
+
evalId,
|
|
12334
12456
|
verbose,
|
|
12335
12457
|
onResult,
|
|
12336
12458
|
onProgress
|
|
12337
12459
|
} = options;
|
|
12338
|
-
const load =
|
|
12339
|
-
const
|
|
12340
|
-
const
|
|
12341
|
-
if (
|
|
12342
|
-
if (
|
|
12343
|
-
throw new Error(`Test case with id '${
|
|
12460
|
+
const load = loadEvalCases;
|
|
12461
|
+
const evalCases = await load(testFilePath, repoRoot, { verbose });
|
|
12462
|
+
const filteredEvalCases = filterEvalCases(evalCases, evalId);
|
|
12463
|
+
if (filteredEvalCases.length === 0) {
|
|
12464
|
+
if (evalId) {
|
|
12465
|
+
throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
|
|
12344
12466
|
}
|
|
12345
12467
|
return [];
|
|
12346
12468
|
}
|
|
@@ -12384,11 +12506,11 @@ async function runEvaluation(options) {
|
|
|
12384
12506
|
};
|
|
12385
12507
|
const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
|
|
12386
12508
|
const primaryProvider = getOrCreateProvider(target);
|
|
12387
|
-
if (onProgress &&
|
|
12388
|
-
for (let i6 = 0; i6 <
|
|
12509
|
+
if (onProgress && filteredEvalCases.length > 0) {
|
|
12510
|
+
for (let i6 = 0; i6 < filteredEvalCases.length; i6++) {
|
|
12389
12511
|
await onProgress({
|
|
12390
12512
|
workerId: i6 + 1,
|
|
12391
|
-
|
|
12513
|
+
evalId: filteredEvalCases[i6].id,
|
|
12392
12514
|
status: "pending"
|
|
12393
12515
|
});
|
|
12394
12516
|
}
|
|
@@ -12396,23 +12518,23 @@ async function runEvaluation(options) {
|
|
|
12396
12518
|
const workers = options.maxConcurrency ?? target.workers ?? 1;
|
|
12397
12519
|
const limit = pLimit(workers);
|
|
12398
12520
|
let nextWorkerId = 1;
|
|
12399
|
-
const
|
|
12400
|
-
const promises =
|
|
12401
|
-
(
|
|
12521
|
+
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
12522
|
+
const promises = filteredEvalCases.map(
|
|
12523
|
+
(evalCase) => limit(async () => {
|
|
12402
12524
|
const workerId = nextWorkerId++;
|
|
12403
|
-
|
|
12525
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
12404
12526
|
if (onProgress) {
|
|
12405
12527
|
await onProgress({
|
|
12406
12528
|
workerId,
|
|
12407
|
-
|
|
12529
|
+
evalId: evalCase.id,
|
|
12408
12530
|
status: "running",
|
|
12409
12531
|
startedAt: Date.now()
|
|
12410
12532
|
});
|
|
12411
12533
|
}
|
|
12412
12534
|
try {
|
|
12413
12535
|
const judgeProvider = await resolveJudgeProvider(target);
|
|
12414
|
-
const result = await
|
|
12415
|
-
|
|
12536
|
+
const result = await runEvalCase({
|
|
12537
|
+
evalCase,
|
|
12416
12538
|
provider: primaryProvider,
|
|
12417
12539
|
target,
|
|
12418
12540
|
graders: graderRegistry,
|
|
@@ -12427,7 +12549,7 @@ async function runEvaluation(options) {
|
|
|
12427
12549
|
if (onProgress) {
|
|
12428
12550
|
await onProgress({
|
|
12429
12551
|
workerId,
|
|
12430
|
-
|
|
12552
|
+
evalId: evalCase.id,
|
|
12431
12553
|
status: "completed",
|
|
12432
12554
|
startedAt: 0,
|
|
12433
12555
|
// Not used for completed status
|
|
@@ -12442,7 +12564,7 @@ async function runEvaluation(options) {
|
|
|
12442
12564
|
if (onProgress) {
|
|
12443
12565
|
await onProgress({
|
|
12444
12566
|
workerId,
|
|
12445
|
-
|
|
12567
|
+
evalId: evalCase.id,
|
|
12446
12568
|
status: "failed",
|
|
12447
12569
|
completedAt: Date.now(),
|
|
12448
12570
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -12459,10 +12581,10 @@ async function runEvaluation(options) {
|
|
|
12459
12581
|
if (outcome.status === "fulfilled") {
|
|
12460
12582
|
results.push(outcome.value);
|
|
12461
12583
|
} else {
|
|
12462
|
-
const
|
|
12463
|
-
const promptInputs = await buildPromptInputs(
|
|
12584
|
+
const evalCase = filteredEvalCases[i6];
|
|
12585
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
12464
12586
|
const errorResult = buildErrorResult(
|
|
12465
|
-
|
|
12587
|
+
evalCase,
|
|
12466
12588
|
target.name,
|
|
12467
12589
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
12468
12590
|
outcome.reason,
|
|
@@ -12476,9 +12598,9 @@ async function runEvaluation(options) {
|
|
|
12476
12598
|
}
|
|
12477
12599
|
return results;
|
|
12478
12600
|
}
|
|
12479
|
-
async function
|
|
12601
|
+
async function runEvalCase(options) {
|
|
12480
12602
|
const {
|
|
12481
|
-
|
|
12603
|
+
evalCase,
|
|
12482
12604
|
provider,
|
|
12483
12605
|
target,
|
|
12484
12606
|
graders,
|
|
@@ -12491,11 +12613,11 @@ async function runTestCase(options) {
|
|
|
12491
12613
|
signal,
|
|
12492
12614
|
judgeProvider
|
|
12493
12615
|
} = options;
|
|
12494
|
-
const promptInputs = await buildPromptInputs(
|
|
12616
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
12495
12617
|
if (promptDumpDir) {
|
|
12496
|
-
await dumpPrompt(promptDumpDir,
|
|
12618
|
+
await dumpPrompt(promptDumpDir, evalCase, promptInputs);
|
|
12497
12619
|
}
|
|
12498
|
-
const cacheKey = useCache ? createCacheKey(provider, target,
|
|
12620
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
12499
12621
|
let cachedResponse;
|
|
12500
12622
|
if (cacheKey && cache) {
|
|
12501
12623
|
cachedResponse = await cache.get(cacheKey);
|
|
@@ -12508,7 +12630,7 @@ async function runTestCase(options) {
|
|
|
12508
12630
|
while (!providerResponse && attempt < attemptBudget) {
|
|
12509
12631
|
try {
|
|
12510
12632
|
providerResponse = await invokeProvider(provider, {
|
|
12511
|
-
|
|
12633
|
+
evalCase,
|
|
12512
12634
|
target,
|
|
12513
12635
|
promptInputs,
|
|
12514
12636
|
attempt,
|
|
@@ -12521,12 +12643,12 @@ async function runTestCase(options) {
|
|
|
12521
12643
|
attempt += 1;
|
|
12522
12644
|
continue;
|
|
12523
12645
|
}
|
|
12524
|
-
return buildErrorResult(
|
|
12646
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
12525
12647
|
}
|
|
12526
12648
|
}
|
|
12527
12649
|
if (!providerResponse) {
|
|
12528
12650
|
return buildErrorResult(
|
|
12529
|
-
|
|
12651
|
+
evalCase,
|
|
12530
12652
|
target.name,
|
|
12531
12653
|
nowFn(),
|
|
12532
12654
|
lastError ?? new Error("Provider did not return a response"),
|
|
@@ -12536,7 +12658,7 @@ async function runTestCase(options) {
|
|
|
12536
12658
|
if (cacheKey && cache && !cachedResponse) {
|
|
12537
12659
|
await cache.set(cacheKey, providerResponse);
|
|
12538
12660
|
}
|
|
12539
|
-
const graderKind =
|
|
12661
|
+
const graderKind = evalCase.grader ?? "heuristic";
|
|
12540
12662
|
const activeGrader = graders[graderKind] ?? graders.heuristic;
|
|
12541
12663
|
if (!activeGrader) {
|
|
12542
12664
|
throw new Error(`No grader registered for kind '${graderKind}'`);
|
|
@@ -12545,7 +12667,7 @@ async function runTestCase(options) {
|
|
|
12545
12667
|
try {
|
|
12546
12668
|
const gradeTimestamp = nowFn();
|
|
12547
12669
|
grade = await activeGrader.grade({
|
|
12548
|
-
|
|
12670
|
+
evalCase,
|
|
12549
12671
|
candidate: providerResponse.text ?? "",
|
|
12550
12672
|
target,
|
|
12551
12673
|
provider,
|
|
@@ -12555,17 +12677,18 @@ async function runTestCase(options) {
|
|
|
12555
12677
|
judgeProvider
|
|
12556
12678
|
});
|
|
12557
12679
|
} catch (error) {
|
|
12558
|
-
return buildErrorResult(
|
|
12680
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
12559
12681
|
}
|
|
12560
12682
|
const completedAt = nowFn();
|
|
12561
12683
|
const rawRequest = {
|
|
12562
12684
|
request: promptInputs.request,
|
|
12563
12685
|
guidelines: promptInputs.guidelines,
|
|
12564
|
-
guideline_paths:
|
|
12686
|
+
guideline_paths: evalCase.guideline_paths,
|
|
12687
|
+
system_message: promptInputs.systemMessage ?? ""
|
|
12565
12688
|
};
|
|
12566
12689
|
return {
|
|
12567
|
-
|
|
12568
|
-
conversation_id:
|
|
12690
|
+
eval_id: evalCase.id,
|
|
12691
|
+
conversation_id: evalCase.conversation_id,
|
|
12569
12692
|
score: grade.score,
|
|
12570
12693
|
hits: grade.hits,
|
|
12571
12694
|
misses: grade.misses,
|
|
@@ -12579,11 +12702,11 @@ async function runTestCase(options) {
|
|
|
12579
12702
|
grader_raw_request: grade.graderRawRequest
|
|
12580
12703
|
};
|
|
12581
12704
|
}
|
|
12582
|
-
function
|
|
12583
|
-
if (!
|
|
12584
|
-
return
|
|
12705
|
+
function filterEvalCases(evalCases, evalId) {
|
|
12706
|
+
if (!evalId) {
|
|
12707
|
+
return evalCases;
|
|
12585
12708
|
}
|
|
12586
|
-
return
|
|
12709
|
+
return evalCases.filter((evalCase) => evalCase.id === evalId);
|
|
12587
12710
|
}
|
|
12588
12711
|
function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
12589
12712
|
const heuristic = overrides?.heuristic ?? new HeuristicGrader();
|
|
@@ -12601,16 +12724,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
|
12601
12724
|
llm_judge: llmJudge
|
|
12602
12725
|
};
|
|
12603
12726
|
}
|
|
12604
|
-
async function dumpPrompt(directory,
|
|
12727
|
+
async function dumpPrompt(directory, evalCase, promptInputs) {
|
|
12605
12728
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
12606
|
-
const filename = `${timestamp}_${sanitizeFilename(
|
|
12729
|
+
const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
|
|
12607
12730
|
const filePath = path42.resolve(directory, filename);
|
|
12608
12731
|
await mkdir3(path42.dirname(filePath), { recursive: true });
|
|
12609
12732
|
const payload = {
|
|
12610
|
-
|
|
12733
|
+
eval_id: evalCase.id,
|
|
12611
12734
|
request: promptInputs.request,
|
|
12612
12735
|
guidelines: promptInputs.guidelines,
|
|
12613
|
-
guideline_paths:
|
|
12736
|
+
guideline_paths: evalCase.guideline_paths
|
|
12614
12737
|
};
|
|
12615
12738
|
await writeFile22(filePath, JSON.stringify(payload, null, 2), "utf8");
|
|
12616
12739
|
}
|
|
@@ -12622,7 +12745,7 @@ function sanitizeFilename(value) {
|
|
|
12622
12745
|
return sanitized.length > 0 ? sanitized : randomUUID2();
|
|
12623
12746
|
}
|
|
12624
12747
|
async function invokeProvider(provider, options) {
|
|
12625
|
-
const {
|
|
12748
|
+
const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
|
|
12626
12749
|
const controller = new AbortController();
|
|
12627
12750
|
const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
|
|
12628
12751
|
if (signal) {
|
|
@@ -12632,12 +12755,12 @@ async function invokeProvider(provider, options) {
|
|
|
12632
12755
|
return await provider.invoke({
|
|
12633
12756
|
prompt: promptInputs.request,
|
|
12634
12757
|
guidelines: promptInputs.guidelines,
|
|
12635
|
-
|
|
12636
|
-
|
|
12758
|
+
guideline_patterns: evalCase.guideline_patterns,
|
|
12759
|
+
attachments: evalCase.file_paths,
|
|
12760
|
+
evalCaseId: evalCase.id,
|
|
12637
12761
|
attempt,
|
|
12638
12762
|
metadata: {
|
|
12639
|
-
|
|
12640
|
-
grader: testCase.grader
|
|
12763
|
+
systemPrompt: promptInputs.systemMessage ?? ""
|
|
12641
12764
|
},
|
|
12642
12765
|
signal: controller.signal
|
|
12643
12766
|
});
|
|
@@ -12647,17 +12770,18 @@ async function invokeProvider(provider, options) {
|
|
|
12647
12770
|
}
|
|
12648
12771
|
}
|
|
12649
12772
|
}
|
|
12650
|
-
function buildErrorResult(
|
|
12773
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
12651
12774
|
const message = error instanceof Error ? error.message : String(error);
|
|
12652
12775
|
const rawRequest = {
|
|
12653
12776
|
request: promptInputs.request,
|
|
12654
12777
|
guidelines: promptInputs.guidelines,
|
|
12655
|
-
guideline_paths:
|
|
12778
|
+
guideline_paths: evalCase.guideline_paths,
|
|
12779
|
+
system_message: promptInputs.systemMessage ?? "",
|
|
12656
12780
|
error: message
|
|
12657
12781
|
};
|
|
12658
12782
|
return {
|
|
12659
|
-
|
|
12660
|
-
conversation_id:
|
|
12783
|
+
eval_id: evalCase.id,
|
|
12784
|
+
conversation_id: evalCase.conversation_id,
|
|
12661
12785
|
score: 0,
|
|
12662
12786
|
hits: [],
|
|
12663
12787
|
misses: [`Error: ${message}`],
|
|
@@ -12669,13 +12793,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
|
|
|
12669
12793
|
raw_request: rawRequest
|
|
12670
12794
|
};
|
|
12671
12795
|
}
|
|
12672
|
-
function createCacheKey(provider, target,
|
|
12796
|
+
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
12673
12797
|
const hash = createHash("sha256");
|
|
12674
12798
|
hash.update(provider.id);
|
|
12675
12799
|
hash.update(target.name);
|
|
12676
|
-
hash.update(
|
|
12800
|
+
hash.update(evalCase.id);
|
|
12677
12801
|
hash.update(promptInputs.request);
|
|
12678
12802
|
hash.update(promptInputs.guidelines);
|
|
12803
|
+
hash.update(promptInputs.systemMessage ?? "");
|
|
12679
12804
|
return hash.digest("hex");
|
|
12680
12805
|
}
|
|
12681
12806
|
function isTimeoutLike(error) {
|
|
@@ -12721,7 +12846,7 @@ function uniqueDirs(directories) {
|
|
|
12721
12846
|
}
|
|
12722
12847
|
return result;
|
|
12723
12848
|
}
|
|
12724
|
-
async function
|
|
12849
|
+
async function fileExists4(filePath) {
|
|
12725
12850
|
try {
|
|
12726
12851
|
await access4(filePath, constants4.F_OK);
|
|
12727
12852
|
return true;
|
|
@@ -12757,7 +12882,7 @@ async function loadEnvFromHierarchy(options) {
|
|
|
12757
12882
|
]);
|
|
12758
12883
|
for (const dir of searchDirs) {
|
|
12759
12884
|
const candidate = path8.join(dir, ".env");
|
|
12760
|
-
if (await
|
|
12885
|
+
if (await fileExists4(candidate)) {
|
|
12761
12886
|
loadDotenv({ path: candidate, override: false });
|
|
12762
12887
|
if (verbose) {
|
|
12763
12888
|
console.log(`Loaded environment from: ${candidate}`);
|
|
@@ -13127,9 +13252,9 @@ var ProgressDisplay = class {
|
|
|
13127
13252
|
this.scheduleRender();
|
|
13128
13253
|
} else {
|
|
13129
13254
|
if (progress.status === "completed") {
|
|
13130
|
-
console.log(`\u2713 Test ${progress.
|
|
13255
|
+
console.log(`\u2713 Test ${progress.evalId} completed`);
|
|
13131
13256
|
} else if (progress.status === "failed") {
|
|
13132
|
-
console.log(`\u2717 Test ${progress.
|
|
13257
|
+
console.log(`\u2717 Test ${progress.evalId} failed${progress.error ? `: ${progress.error}` : ""}`);
|
|
13133
13258
|
}
|
|
13134
13259
|
}
|
|
13135
13260
|
}
|
|
@@ -13162,7 +13287,7 @@ var ProgressDisplay = class {
|
|
|
13162
13287
|
const statusIcon = this.getStatusIcon(worker.status);
|
|
13163
13288
|
const elapsed = worker.startedAt ? this.formatElapsed(Date.now() - worker.startedAt) : "";
|
|
13164
13289
|
const timeLabel = elapsed ? ` (${elapsed})` : "";
|
|
13165
|
-
let testLabel = worker.
|
|
13290
|
+
let testLabel = worker.evalId;
|
|
13166
13291
|
if (testLabel.length > 50) {
|
|
13167
13292
|
testLabel = testLabel.substring(0, 47) + "...";
|
|
13168
13293
|
}
|
|
@@ -13316,7 +13441,7 @@ function formatEvaluationSummary(summary) {
|
|
|
13316
13441
|
lines.push("\n==================================================");
|
|
13317
13442
|
lines.push("EVALUATION SUMMARY");
|
|
13318
13443
|
lines.push("==================================================");
|
|
13319
|
-
lines.push(`Total
|
|
13444
|
+
lines.push(`Total eval cases: ${summary.total}`);
|
|
13320
13445
|
lines.push(`Mean score: ${formatScore(summary.mean)}`);
|
|
13321
13446
|
lines.push(`Median score: ${formatScore(summary.median)}`);
|
|
13322
13447
|
lines.push(`Min score: ${formatScore(summary.min)}`);
|
|
@@ -13329,13 +13454,13 @@ function formatEvaluationSummary(summary) {
|
|
|
13329
13454
|
const [start, end] = bin.range;
|
|
13330
13455
|
lines.push(` ${start.toFixed(1)}-${end.toFixed(1)}: ${bin.count}`);
|
|
13331
13456
|
}
|
|
13332
|
-
lines.push("\nTop performing
|
|
13457
|
+
lines.push("\nTop performing eval cases:");
|
|
13333
13458
|
summary.topResults.forEach((result, index) => {
|
|
13334
|
-
lines.push(` ${index + 1}. ${result.
|
|
13459
|
+
lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
|
|
13335
13460
|
});
|
|
13336
|
-
lines.push("\nLowest performing
|
|
13461
|
+
lines.push("\nLowest performing eval cases:");
|
|
13337
13462
|
summary.bottomResults.forEach((result, index) => {
|
|
13338
|
-
lines.push(` ${index + 1}. ${result.
|
|
13463
|
+
lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
|
|
13339
13464
|
});
|
|
13340
13465
|
return lines.join("\n");
|
|
13341
13466
|
}
|
|
@@ -13349,11 +13474,9 @@ var TARGET_FILE_CANDIDATES = [
|
|
|
13349
13474
|
"targets.yaml",
|
|
13350
13475
|
"targets.yml",
|
|
13351
13476
|
path11.join(".agentv", "targets.yaml"),
|
|
13352
|
-
path11.join(".agentv", "targets.yml")
|
|
13353
|
-
path11.join(".bbeval", "targets.yaml"),
|
|
13354
|
-
path11.join(".bbeval", "targets.yml")
|
|
13477
|
+
path11.join(".agentv", "targets.yml")
|
|
13355
13478
|
];
|
|
13356
|
-
async function
|
|
13479
|
+
async function fileExists5(filePath) {
|
|
13357
13480
|
try {
|
|
13358
13481
|
await access5(filePath, constants5.F_OK);
|
|
13359
13482
|
return true;
|
|
@@ -13375,56 +13498,30 @@ async function readTestSuiteTarget(testFilePath) {
|
|
|
13375
13498
|
}
|
|
13376
13499
|
return void 0;
|
|
13377
13500
|
}
|
|
13378
|
-
function buildDirectoryChain(testFilePath, repoRoot, cwd) {
|
|
13379
|
-
const directories = [];
|
|
13380
|
-
const seen = /* @__PURE__ */ new Set();
|
|
13381
|
-
const boundary = path11.resolve(repoRoot);
|
|
13382
|
-
let current = path11.resolve(path11.dirname(testFilePath));
|
|
13383
|
-
while (current !== void 0) {
|
|
13384
|
-
if (!seen.has(current)) {
|
|
13385
|
-
directories.push(current);
|
|
13386
|
-
seen.add(current);
|
|
13387
|
-
}
|
|
13388
|
-
if (current === boundary) {
|
|
13389
|
-
break;
|
|
13390
|
-
}
|
|
13391
|
-
const parent = path11.dirname(current);
|
|
13392
|
-
if (parent === current) {
|
|
13393
|
-
break;
|
|
13394
|
-
}
|
|
13395
|
-
current = parent;
|
|
13396
|
-
}
|
|
13397
|
-
if (!seen.has(boundary)) {
|
|
13398
|
-
directories.push(boundary);
|
|
13399
|
-
seen.add(boundary);
|
|
13400
|
-
}
|
|
13401
|
-
const resolvedCwd = path11.resolve(cwd);
|
|
13402
|
-
if (!seen.has(resolvedCwd)) {
|
|
13403
|
-
directories.push(resolvedCwd);
|
|
13404
|
-
seen.add(resolvedCwd);
|
|
13405
|
-
}
|
|
13406
|
-
return directories;
|
|
13407
|
-
}
|
|
13408
13501
|
async function discoverTargetsFile(options) {
|
|
13409
13502
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
13410
13503
|
if (explicitPath) {
|
|
13411
13504
|
const resolvedExplicit = path11.resolve(explicitPath);
|
|
13412
|
-
if (await
|
|
13505
|
+
if (await fileExists5(resolvedExplicit)) {
|
|
13413
13506
|
return resolvedExplicit;
|
|
13414
13507
|
}
|
|
13415
13508
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
13416
13509
|
const nested = path11.join(resolvedExplicit, candidate);
|
|
13417
|
-
if (await
|
|
13510
|
+
if (await fileExists5(nested)) {
|
|
13418
13511
|
return nested;
|
|
13419
13512
|
}
|
|
13420
13513
|
}
|
|
13421
13514
|
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
13422
13515
|
}
|
|
13423
|
-
const directories = buildDirectoryChain(testFilePath, repoRoot
|
|
13516
|
+
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
13517
|
+
const resolvedCwd = path11.resolve(cwd);
|
|
13518
|
+
if (!directories.includes(resolvedCwd)) {
|
|
13519
|
+
directories.push(resolvedCwd);
|
|
13520
|
+
}
|
|
13424
13521
|
for (const directory of directories) {
|
|
13425
13522
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
13426
13523
|
const fullPath = path11.join(directory, candidate);
|
|
13427
|
-
if (await
|
|
13524
|
+
if (await fileExists5(fullPath)) {
|
|
13428
13525
|
return fullPath;
|
|
13429
13526
|
}
|
|
13430
13527
|
}
|
|
@@ -13525,7 +13622,7 @@ function normalizeOptions(rawOptions) {
|
|
|
13525
13622
|
return {
|
|
13526
13623
|
target: normalizeString(rawOptions.target),
|
|
13527
13624
|
targetsPath: normalizeString(rawOptions.targets),
|
|
13528
|
-
|
|
13625
|
+
evalId: normalizeString(rawOptions.evalId),
|
|
13529
13626
|
workers: workers > 0 ? workers : void 0,
|
|
13530
13627
|
outPath: normalizeString(rawOptions.out),
|
|
13531
13628
|
format,
|
|
@@ -13672,7 +13769,7 @@ async function runEvalCommand(input) {
|
|
|
13672
13769
|
promptDumpDir,
|
|
13673
13770
|
cache,
|
|
13674
13771
|
useCache: options.cache,
|
|
13675
|
-
|
|
13772
|
+
evalId: options.evalId,
|
|
13676
13773
|
verbose: options.verbose,
|
|
13677
13774
|
maxConcurrency: resolvedWorkers,
|
|
13678
13775
|
onResult: async (result) => {
|
|
@@ -13685,7 +13782,7 @@ async function runEvalCommand(input) {
|
|
|
13685
13782
|
}
|
|
13686
13783
|
progressDisplay.updateWorker({
|
|
13687
13784
|
workerId: event.workerId,
|
|
13688
|
-
|
|
13785
|
+
evalId: event.evalId,
|
|
13689
13786
|
status: event.status,
|
|
13690
13787
|
startedAt: event.startedAt,
|
|
13691
13788
|
completedAt: event.completedAt,
|
|
@@ -13735,7 +13832,7 @@ function parseInteger(value, fallback) {
|
|
|
13735
13832
|
return parsed;
|
|
13736
13833
|
}
|
|
13737
13834
|
function registerEvalCommand(program) {
|
|
13738
|
-
program.command("eval").description("Run
|
|
13835
|
+
program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
|
|
13739
13836
|
"--workers <count>",
|
|
13740
13837
|
"Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
13741
13838
|
(value) => parseInteger(value, 1)
|
|
@@ -13773,25 +13870,164 @@ function registerEvalCommand(program) {
|
|
|
13773
13870
|
return program;
|
|
13774
13871
|
}
|
|
13775
13872
|
|
|
13776
|
-
// src/commands/
|
|
13777
|
-
import {
|
|
13778
|
-
import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
|
|
13873
|
+
// src/commands/init/index.ts
|
|
13874
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
13779
13875
|
import path14 from "node:path";
|
|
13780
13876
|
|
|
13877
|
+
// src/templates/index.ts
|
|
13878
|
+
import { readFileSync } from "node:fs";
|
|
13879
|
+
import path13 from "node:path";
|
|
13880
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
13881
|
+
var TemplateManager = class {
|
|
13882
|
+
static getTemplates() {
|
|
13883
|
+
const currentDir = path13.dirname(fileURLToPath2(import.meta.url));
|
|
13884
|
+
let templatesDir;
|
|
13885
|
+
if (currentDir.includes(path13.sep + "dist")) {
|
|
13886
|
+
templatesDir = path13.join(currentDir, "templates");
|
|
13887
|
+
} else {
|
|
13888
|
+
templatesDir = currentDir;
|
|
13889
|
+
}
|
|
13890
|
+
const evalBuildPrompt = readFileSync(
|
|
13891
|
+
path13.join(templatesDir, "eval-build.prompt.md"),
|
|
13892
|
+
"utf-8"
|
|
13893
|
+
);
|
|
13894
|
+
const evalSchema = readFileSync(
|
|
13895
|
+
path13.join(templatesDir, "eval-schema.json"),
|
|
13896
|
+
"utf-8"
|
|
13897
|
+
);
|
|
13898
|
+
const configSchema = readFileSync(
|
|
13899
|
+
path13.join(templatesDir, "config-schema.json"),
|
|
13900
|
+
"utf-8"
|
|
13901
|
+
);
|
|
13902
|
+
return [
|
|
13903
|
+
{
|
|
13904
|
+
path: "prompts/eval-build.prompt.md",
|
|
13905
|
+
content: evalBuildPrompt
|
|
13906
|
+
},
|
|
13907
|
+
{
|
|
13908
|
+
path: "contexts/eval-schema.json",
|
|
13909
|
+
content: evalSchema
|
|
13910
|
+
},
|
|
13911
|
+
{
|
|
13912
|
+
path: "contexts/config-schema.json",
|
|
13913
|
+
content: configSchema
|
|
13914
|
+
}
|
|
13915
|
+
];
|
|
13916
|
+
}
|
|
13917
|
+
};
|
|
13918
|
+
|
|
13919
|
+
// src/commands/init/index.ts
|
|
13920
|
+
async function initCommand(options = {}) {
|
|
13921
|
+
const targetPath = path14.resolve(options.targetPath ?? ".");
|
|
13922
|
+
const githubDir = path14.join(targetPath, ".github");
|
|
13923
|
+
if (!existsSync(githubDir)) {
|
|
13924
|
+
mkdirSync(githubDir, { recursive: true });
|
|
13925
|
+
}
|
|
13926
|
+
const templates = TemplateManager.getTemplates();
|
|
13927
|
+
for (const template of templates) {
|
|
13928
|
+
const targetFilePath = path14.join(githubDir, template.path);
|
|
13929
|
+
const targetDirPath = path14.dirname(targetFilePath);
|
|
13930
|
+
if (!existsSync(targetDirPath)) {
|
|
13931
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
13932
|
+
}
|
|
13933
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
13934
|
+
console.log(`Created ${path14.relative(targetPath, targetFilePath)}`);
|
|
13935
|
+
}
|
|
13936
|
+
console.log("\nAgentV initialized successfully!");
|
|
13937
|
+
console.log(`
|
|
13938
|
+
Files installed to ${path14.relative(targetPath, githubDir)}:`);
|
|
13939
|
+
templates.forEach((t) => console.log(` - ${t.path}`));
|
|
13940
|
+
console.log("\nYou can now create eval files using the schema and prompt templates.");
|
|
13941
|
+
}
|
|
13942
|
+
|
|
13943
|
+
// src/commands/validate/format-output.ts
|
|
13944
|
+
var ANSI_RED = "\x1B[31m";
|
|
13945
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
13946
|
+
var ANSI_GREEN = "\x1B[32m";
|
|
13947
|
+
var ANSI_CYAN = "\x1B[36m";
|
|
13948
|
+
var ANSI_BOLD = "\x1B[1m";
|
|
13949
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
13950
|
+
function formatSummary(summary, useColors) {
|
|
13951
|
+
const lines = [];
|
|
13952
|
+
lines.push("");
|
|
13953
|
+
lines.push(formatHeader("Validation Summary", useColors));
|
|
13954
|
+
lines.push("");
|
|
13955
|
+
for (const result of summary.results) {
|
|
13956
|
+
lines.push(formatFileResult(result, useColors));
|
|
13957
|
+
}
|
|
13958
|
+
lines.push("");
|
|
13959
|
+
lines.push(formatStats(summary, useColors));
|
|
13960
|
+
lines.push("");
|
|
13961
|
+
return lines.join("\n");
|
|
13962
|
+
}
|
|
13963
|
+
function formatHeader(text, useColors) {
|
|
13964
|
+
if (useColors) {
|
|
13965
|
+
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
13966
|
+
}
|
|
13967
|
+
return text;
|
|
13968
|
+
}
|
|
13969
|
+
function formatFileResult(result, useColors) {
|
|
13970
|
+
const lines = [];
|
|
13971
|
+
const status = result.valid ? "\u2713" : "\u2717";
|
|
13972
|
+
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
13973
|
+
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
13974
|
+
const fileName = result.filePath;
|
|
13975
|
+
lines.push(`${statusText} ${fileName}`);
|
|
13976
|
+
if (result.errors.length > 0) {
|
|
13977
|
+
for (const error of result.errors) {
|
|
13978
|
+
lines.push(formatError(error, useColors));
|
|
13979
|
+
}
|
|
13980
|
+
}
|
|
13981
|
+
return lines.join("\n");
|
|
13982
|
+
}
|
|
13983
|
+
function formatError(error, useColors) {
|
|
13984
|
+
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
13985
|
+
const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
|
|
13986
|
+
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
|
|
13987
|
+
const location = error.location ? ` [${error.location}]` : "";
|
|
13988
|
+
return `${coloredPrefix}${location} ${error.message}`;
|
|
13989
|
+
}
|
|
13990
|
+
function formatStats(summary, useColors) {
|
|
13991
|
+
const lines = [];
|
|
13992
|
+
const totalText = `Total files: ${summary.totalFiles}`;
|
|
13993
|
+
const validText = `Valid: ${summary.validFiles}`;
|
|
13994
|
+
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
13995
|
+
if (useColors) {
|
|
13996
|
+
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
13997
|
+
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
13998
|
+
if (summary.invalidFiles > 0) {
|
|
13999
|
+
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
14000
|
+
} else {
|
|
14001
|
+
lines.push(invalidText);
|
|
14002
|
+
}
|
|
14003
|
+
} else {
|
|
14004
|
+
lines.push(totalText);
|
|
14005
|
+
lines.push(validText);
|
|
14006
|
+
lines.push(invalidText);
|
|
14007
|
+
}
|
|
14008
|
+
return lines.join("\n");
|
|
14009
|
+
}
|
|
14010
|
+
function isTTY() {
|
|
14011
|
+
return process.stdout.isTTY ?? false;
|
|
14012
|
+
}
|
|
14013
|
+
|
|
13781
14014
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
13782
14015
|
import { readFile as readFile5 } from "node:fs/promises";
|
|
13783
14016
|
import { parse as parse5 } from "yaml";
|
|
13784
14017
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
13785
|
-
import
|
|
14018
|
+
import path15 from "node:path";
|
|
13786
14019
|
import { parse as parse23 } from "yaml";
|
|
13787
14020
|
import { readFile as readFile32 } from "node:fs/promises";
|
|
13788
14021
|
import path23 from "node:path";
|
|
13789
14022
|
import { parse as parse32 } from "yaml";
|
|
13790
14023
|
import { readFile as readFile42 } from "node:fs/promises";
|
|
13791
|
-
import path33 from "node:path";
|
|
13792
14024
|
import { parse as parse42 } from "yaml";
|
|
14025
|
+
import { readFile as readFile52 } from "node:fs/promises";
|
|
14026
|
+
import path33 from "node:path";
|
|
14027
|
+
import { parse as parse52 } from "yaml";
|
|
13793
14028
|
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
13794
14029
|
var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
|
|
14030
|
+
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
13795
14031
|
async function detectFileType(filePath) {
|
|
13796
14032
|
try {
|
|
13797
14033
|
const content = await readFile5(filePath, "utf8");
|
|
@@ -13809,6 +14045,8 @@ async function detectFileType(filePath) {
|
|
|
13809
14045
|
return "eval";
|
|
13810
14046
|
case SCHEMA_TARGETS_V2:
|
|
13811
14047
|
return "targets";
|
|
14048
|
+
case SCHEMA_CONFIG_V22:
|
|
14049
|
+
return "config";
|
|
13812
14050
|
default:
|
|
13813
14051
|
return "unknown";
|
|
13814
14052
|
}
|
|
@@ -13822,7 +14060,7 @@ function isObject(value) {
|
|
|
13822
14060
|
}
|
|
13823
14061
|
async function validateEvalFile(filePath) {
|
|
13824
14062
|
const errors = [];
|
|
13825
|
-
const absolutePath =
|
|
14063
|
+
const absolutePath = path15.resolve(filePath);
|
|
13826
14064
|
let parsed;
|
|
13827
14065
|
try {
|
|
13828
14066
|
const content = await readFile23(absolutePath, "utf8");
|
|
@@ -14008,7 +14246,6 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
14008
14246
|
}
|
|
14009
14247
|
}
|
|
14010
14248
|
}
|
|
14011
|
-
var SCHEMA_TARGETS_V22 = "agentv-targets-v2";
|
|
14012
14249
|
function isObject2(value) {
|
|
14013
14250
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
14014
14251
|
}
|
|
@@ -14046,8 +14283,8 @@ async function validateTargetsFile(filePath) {
|
|
|
14046
14283
|
};
|
|
14047
14284
|
}
|
|
14048
14285
|
const schema = parsed["$schema"];
|
|
14049
|
-
if (schema !==
|
|
14050
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${
|
|
14286
|
+
if (schema !== TARGETS_SCHEMA_V2) {
|
|
14287
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
|
|
14051
14288
|
errors.push({
|
|
14052
14289
|
severity: "error",
|
|
14053
14290
|
filePath: absolutePath,
|
|
@@ -14070,7 +14307,7 @@ async function validateTargetsFile(filePath) {
|
|
|
14070
14307
|
errors
|
|
14071
14308
|
};
|
|
14072
14309
|
}
|
|
14073
|
-
const knownProviders = [
|
|
14310
|
+
const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
|
|
14074
14311
|
for (let i6 = 0; i6 < targets.length; i6++) {
|
|
14075
14312
|
const target = targets[i6];
|
|
14076
14313
|
const location = `targets[${i6}]`;
|
|
@@ -14134,6 +14371,80 @@ async function validateTargetsFile(filePath) {
|
|
|
14134
14371
|
errors
|
|
14135
14372
|
};
|
|
14136
14373
|
}
|
|
14374
|
+
var SCHEMA_CONFIG_V222 = "agentv-config-v2";
|
|
14375
|
+
async function validateConfigFile(filePath) {
|
|
14376
|
+
const errors = [];
|
|
14377
|
+
try {
|
|
14378
|
+
const content = await readFile42(filePath, "utf8");
|
|
14379
|
+
const parsed = parse42(content);
|
|
14380
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
14381
|
+
errors.push({
|
|
14382
|
+
severity: "error",
|
|
14383
|
+
filePath,
|
|
14384
|
+
message: "Config file must contain a valid YAML object"
|
|
14385
|
+
});
|
|
14386
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
14387
|
+
}
|
|
14388
|
+
const config = parsed;
|
|
14389
|
+
const schema = config["$schema"];
|
|
14390
|
+
if (schema !== SCHEMA_CONFIG_V222) {
|
|
14391
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
|
|
14392
|
+
errors.push({
|
|
14393
|
+
severity: "error",
|
|
14394
|
+
filePath,
|
|
14395
|
+
location: "$schema",
|
|
14396
|
+
message
|
|
14397
|
+
});
|
|
14398
|
+
}
|
|
14399
|
+
const guidelinePatterns = config["guideline_patterns"];
|
|
14400
|
+
if (guidelinePatterns !== void 0) {
|
|
14401
|
+
if (!Array.isArray(guidelinePatterns)) {
|
|
14402
|
+
errors.push({
|
|
14403
|
+
severity: "error",
|
|
14404
|
+
filePath,
|
|
14405
|
+
location: "guideline_patterns",
|
|
14406
|
+
message: "Field 'guideline_patterns' must be an array"
|
|
14407
|
+
});
|
|
14408
|
+
} else if (!guidelinePatterns.every((p) => typeof p === "string")) {
|
|
14409
|
+
errors.push({
|
|
14410
|
+
severity: "error",
|
|
14411
|
+
filePath,
|
|
14412
|
+
location: "guideline_patterns",
|
|
14413
|
+
message: "All entries in 'guideline_patterns' must be strings"
|
|
14414
|
+
});
|
|
14415
|
+
} else if (guidelinePatterns.length === 0) {
|
|
14416
|
+
errors.push({
|
|
14417
|
+
severity: "warning",
|
|
14418
|
+
filePath,
|
|
14419
|
+
location: "guideline_patterns",
|
|
14420
|
+
message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
|
|
14421
|
+
});
|
|
14422
|
+
}
|
|
14423
|
+
}
|
|
14424
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
|
|
14425
|
+
const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
|
|
14426
|
+
if (unexpectedFields.length > 0) {
|
|
14427
|
+
errors.push({
|
|
14428
|
+
severity: "warning",
|
|
14429
|
+
filePath,
|
|
14430
|
+
message: `Unexpected fields: ${unexpectedFields.join(", ")}`
|
|
14431
|
+
});
|
|
14432
|
+
}
|
|
14433
|
+
return {
|
|
14434
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
14435
|
+
filePath,
|
|
14436
|
+
fileType: "config",
|
|
14437
|
+
errors
|
|
14438
|
+
};
|
|
14439
|
+
} catch (error) {
|
|
14440
|
+
errors.push({
|
|
14441
|
+
severity: "error",
|
|
14442
|
+
filePath,
|
|
14443
|
+
message: `Failed to parse config file: ${error.message}`
|
|
14444
|
+
});
|
|
14445
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
14446
|
+
}
|
|
14447
|
+
}
|
|
14137
14448
|
function isObject3(value) {
|
|
14138
14449
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
14139
14450
|
}
|
|
@@ -14152,8 +14463,8 @@ async function validateFileReferences(evalFilePath) {
|
|
|
14152
14463
|
const searchRoots = buildSearchRoots(absolutePath, gitRoot);
|
|
14153
14464
|
let parsed;
|
|
14154
14465
|
try {
|
|
14155
|
-
const content = await
|
|
14156
|
-
parsed =
|
|
14466
|
+
const content = await readFile52(absolutePath, "utf8");
|
|
14467
|
+
parsed = parse52(content);
|
|
14157
14468
|
} catch {
|
|
14158
14469
|
return errors;
|
|
14159
14470
|
}
|
|
@@ -14222,7 +14533,7 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
14222
14533
|
});
|
|
14223
14534
|
} else {
|
|
14224
14535
|
try {
|
|
14225
|
-
const fileContent = await
|
|
14536
|
+
const fileContent = await readFile52(resolvedPath, "utf8");
|
|
14226
14537
|
if (fileContent.trim().length === 0) {
|
|
14227
14538
|
errors.push({
|
|
14228
14539
|
severity: "warning",
|
|
@@ -14244,12 +14555,15 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
14244
14555
|
}
|
|
14245
14556
|
}
|
|
14246
14557
|
|
|
14247
|
-
// src/commands/
|
|
14248
|
-
|
|
14558
|
+
// src/commands/validate/validate-files.ts
|
|
14559
|
+
import { constants as constants7 } from "node:fs";
|
|
14560
|
+
import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
|
|
14561
|
+
import path16 from "node:path";
|
|
14562
|
+
async function validateFiles(paths) {
|
|
14249
14563
|
const filePaths = await expandPaths(paths);
|
|
14250
14564
|
const results = [];
|
|
14251
14565
|
for (const filePath of filePaths) {
|
|
14252
|
-
const result = await
|
|
14566
|
+
const result = await validateSingleFile(filePath);
|
|
14253
14567
|
results.push(result);
|
|
14254
14568
|
}
|
|
14255
14569
|
const validFiles = results.filter((r) => r.valid).length;
|
|
@@ -14261,8 +14575,8 @@ async function lintFiles(paths) {
|
|
|
14261
14575
|
results
|
|
14262
14576
|
};
|
|
14263
14577
|
}
|
|
14264
|
-
async function
|
|
14265
|
-
const absolutePath =
|
|
14578
|
+
async function validateSingleFile(filePath) {
|
|
14579
|
+
const absolutePath = path16.resolve(filePath);
|
|
14266
14580
|
const fileType = await detectFileType(absolutePath);
|
|
14267
14581
|
if (fileType === "unknown") {
|
|
14268
14582
|
return {
|
|
@@ -14273,7 +14587,7 @@ async function lintSingleFile(filePath) {
|
|
|
14273
14587
|
{
|
|
14274
14588
|
severity: "error",
|
|
14275
14589
|
filePath: absolutePath,
|
|
14276
|
-
message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2' or 'agentv-
|
|
14590
|
+
message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2', 'agentv-targets-v2', or 'agentv-config-v2'"
|
|
14277
14591
|
}
|
|
14278
14592
|
]
|
|
14279
14593
|
};
|
|
@@ -14291,15 +14605,17 @@ async function lintSingleFile(filePath) {
|
|
|
14291
14605
|
};
|
|
14292
14606
|
}
|
|
14293
14607
|
}
|
|
14294
|
-
} else {
|
|
14608
|
+
} else if (fileType === "targets") {
|
|
14295
14609
|
result = await validateTargetsFile(absolutePath);
|
|
14610
|
+
} else {
|
|
14611
|
+
result = await validateConfigFile(absolutePath);
|
|
14296
14612
|
}
|
|
14297
14613
|
return result;
|
|
14298
14614
|
}
|
|
14299
14615
|
async function expandPaths(paths) {
|
|
14300
14616
|
const expanded = [];
|
|
14301
14617
|
for (const inputPath of paths) {
|
|
14302
|
-
const absolutePath =
|
|
14618
|
+
const absolutePath = path16.resolve(inputPath);
|
|
14303
14619
|
try {
|
|
14304
14620
|
await access7(absolutePath, constants7.F_OK);
|
|
14305
14621
|
} catch {
|
|
@@ -14323,7 +14639,7 @@ async function findYamlFiles(dirPath) {
|
|
|
14323
14639
|
try {
|
|
14324
14640
|
const entries = await readdir3(dirPath, { withFileTypes: true });
|
|
14325
14641
|
for (const entry of entries) {
|
|
14326
|
-
const fullPath =
|
|
14642
|
+
const fullPath = path16.join(dirPath, entry.name);
|
|
14327
14643
|
if (entry.isDirectory()) {
|
|
14328
14644
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
14329
14645
|
continue;
|
|
@@ -14340,98 +14656,27 @@ async function findYamlFiles(dirPath) {
|
|
|
14340
14656
|
return results;
|
|
14341
14657
|
}
|
|
14342
14658
|
function isYamlFile(filePath) {
|
|
14343
|
-
const ext =
|
|
14659
|
+
const ext = path16.extname(filePath).toLowerCase();
|
|
14344
14660
|
return ext === ".yaml" || ext === ".yml";
|
|
14345
14661
|
}
|
|
14346
14662
|
|
|
14347
|
-
// src/commands/
|
|
14348
|
-
|
|
14349
|
-
var ANSI_YELLOW2 = "\x1B[33m";
|
|
14350
|
-
var ANSI_GREEN = "\x1B[32m";
|
|
14351
|
-
var ANSI_CYAN = "\x1B[36m";
|
|
14352
|
-
var ANSI_BOLD = "\x1B[1m";
|
|
14353
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
14354
|
-
function formatSummary(summary, useColors) {
|
|
14355
|
-
const lines = [];
|
|
14356
|
-
lines.push("");
|
|
14357
|
-
lines.push(formatHeader("Validation Summary", useColors));
|
|
14358
|
-
lines.push("");
|
|
14359
|
-
for (const result of summary.results) {
|
|
14360
|
-
lines.push(formatFileResult(result, useColors));
|
|
14361
|
-
}
|
|
14362
|
-
lines.push("");
|
|
14363
|
-
lines.push(formatStats(summary, useColors));
|
|
14364
|
-
lines.push("");
|
|
14365
|
-
return lines.join("\n");
|
|
14366
|
-
}
|
|
14367
|
-
function formatHeader(text, useColors) {
|
|
14368
|
-
if (useColors) {
|
|
14369
|
-
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
14370
|
-
}
|
|
14371
|
-
return text;
|
|
14372
|
-
}
|
|
14373
|
-
function formatFileResult(result, useColors) {
|
|
14374
|
-
const lines = [];
|
|
14375
|
-
const status = result.valid ? "\u2713" : "\u2717";
|
|
14376
|
-
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
14377
|
-
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
14378
|
-
const fileName = result.filePath;
|
|
14379
|
-
lines.push(`${statusText} ${fileName}`);
|
|
14380
|
-
if (result.errors.length > 0) {
|
|
14381
|
-
for (const error of result.errors) {
|
|
14382
|
-
lines.push(formatError(error, useColors));
|
|
14383
|
-
}
|
|
14384
|
-
}
|
|
14385
|
-
return lines.join("\n");
|
|
14386
|
-
}
|
|
14387
|
-
function formatError(error, useColors) {
|
|
14388
|
-
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
14389
|
-
const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
|
|
14390
|
-
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
|
|
14391
|
-
const location = error.location ? ` [${error.location}]` : "";
|
|
14392
|
-
return `${coloredPrefix}${location} ${error.message}`;
|
|
14393
|
-
}
|
|
14394
|
-
function formatStats(summary, useColors) {
|
|
14395
|
-
const lines = [];
|
|
14396
|
-
const totalText = `Total files: ${summary.totalFiles}`;
|
|
14397
|
-
const validText = `Valid: ${summary.validFiles}`;
|
|
14398
|
-
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
14399
|
-
if (useColors) {
|
|
14400
|
-
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
14401
|
-
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
14402
|
-
if (summary.invalidFiles > 0) {
|
|
14403
|
-
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
14404
|
-
} else {
|
|
14405
|
-
lines.push(invalidText);
|
|
14406
|
-
}
|
|
14407
|
-
} else {
|
|
14408
|
-
lines.push(totalText);
|
|
14409
|
-
lines.push(validText);
|
|
14410
|
-
lines.push(invalidText);
|
|
14411
|
-
}
|
|
14412
|
-
return lines.join("\n");
|
|
14413
|
-
}
|
|
14414
|
-
function isTTY() {
|
|
14415
|
-
return process.stdout.isTTY ?? false;
|
|
14416
|
-
}
|
|
14417
|
-
|
|
14418
|
-
// src/commands/lint/index.ts
|
|
14419
|
-
async function runLintCommand(paths, options) {
|
|
14663
|
+
// src/commands/validate/index.ts
|
|
14664
|
+
async function runValidateCommand(paths, _options) {
|
|
14420
14665
|
if (paths.length === 0) {
|
|
14421
|
-
console.error("Error: No paths specified. Usage: agentv
|
|
14666
|
+
console.error("Error: No paths specified. Usage: agentv validate <paths...>");
|
|
14422
14667
|
process.exit(1);
|
|
14423
14668
|
}
|
|
14424
|
-
const summary = await
|
|
14669
|
+
const summary = await validateFiles(paths);
|
|
14425
14670
|
const useColors = isTTY();
|
|
14426
14671
|
console.log(formatSummary(summary, useColors));
|
|
14427
14672
|
if (summary.invalidFiles > 0) {
|
|
14428
14673
|
process.exit(1);
|
|
14429
14674
|
}
|
|
14430
14675
|
}
|
|
14431
|
-
function
|
|
14432
|
-
program.command("
|
|
14676
|
+
function registerValidateCommand(program) {
|
|
14677
|
+
program.command("validate").description("Validate AgentV eval and targets YAML files").argument("<paths...>", "Files or directories to validate").action(async (paths, _options) => {
|
|
14433
14678
|
try {
|
|
14434
|
-
await
|
|
14679
|
+
await runValidateCommand(paths, _options);
|
|
14435
14680
|
} catch (error) {
|
|
14436
14681
|
console.error(`Error: ${error.message}`);
|
|
14437
14682
|
process.exit(1);
|
|
@@ -14449,68 +14694,6 @@ function registerStatusCommand(program) {
|
|
|
14449
14694
|
return program;
|
|
14450
14695
|
}
|
|
14451
14696
|
|
|
14452
|
-
// src/commands/init/index.ts
|
|
14453
|
-
import { existsSync as existsSync2, mkdirSync, writeFileSync } from "node:fs";
|
|
14454
|
-
import path16 from "node:path";
|
|
14455
|
-
|
|
14456
|
-
// src/templates/index.ts
|
|
14457
|
-
import { readFileSync } from "node:fs";
|
|
14458
|
-
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
14459
|
-
import path15 from "node:path";
|
|
14460
|
-
var TemplateManager = class {
|
|
14461
|
-
static getTemplates() {
|
|
14462
|
-
const currentDir = path15.dirname(fileURLToPath2(import.meta.url));
|
|
14463
|
-
let templatesDir;
|
|
14464
|
-
if (currentDir.includes(path15.sep + "dist")) {
|
|
14465
|
-
templatesDir = path15.join(currentDir, "templates");
|
|
14466
|
-
} else {
|
|
14467
|
-
templatesDir = currentDir;
|
|
14468
|
-
}
|
|
14469
|
-
const evalBuildPrompt = readFileSync(
|
|
14470
|
-
path15.join(templatesDir, "eval-build.prompt.md"),
|
|
14471
|
-
"utf-8"
|
|
14472
|
-
);
|
|
14473
|
-
const evalSchema = readFileSync(
|
|
14474
|
-
path15.join(templatesDir, "eval-schema.json"),
|
|
14475
|
-
"utf-8"
|
|
14476
|
-
);
|
|
14477
|
-
return [
|
|
14478
|
-
{
|
|
14479
|
-
path: "prompts/eval-build.prompt.md",
|
|
14480
|
-
content: evalBuildPrompt
|
|
14481
|
-
},
|
|
14482
|
-
{
|
|
14483
|
-
path: "contexts/eval-schema.json",
|
|
14484
|
-
content: evalSchema
|
|
14485
|
-
}
|
|
14486
|
-
];
|
|
14487
|
-
}
|
|
14488
|
-
};
|
|
14489
|
-
|
|
14490
|
-
// src/commands/init/index.ts
|
|
14491
|
-
async function initCommand(options = {}) {
|
|
14492
|
-
const targetPath = path16.resolve(options.targetPath ?? ".");
|
|
14493
|
-
const githubDir = path16.join(targetPath, ".github");
|
|
14494
|
-
if (!existsSync2(githubDir)) {
|
|
14495
|
-
mkdirSync(githubDir, { recursive: true });
|
|
14496
|
-
}
|
|
14497
|
-
const templates = TemplateManager.getTemplates();
|
|
14498
|
-
for (const template of templates) {
|
|
14499
|
-
const targetFilePath = path16.join(githubDir, template.path);
|
|
14500
|
-
const targetDirPath = path16.dirname(targetFilePath);
|
|
14501
|
-
if (!existsSync2(targetDirPath)) {
|
|
14502
|
-
mkdirSync(targetDirPath, { recursive: true });
|
|
14503
|
-
}
|
|
14504
|
-
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
14505
|
-
console.log(`Created ${path16.relative(targetPath, targetFilePath)}`);
|
|
14506
|
-
}
|
|
14507
|
-
console.log("\nAgentV initialized successfully!");
|
|
14508
|
-
console.log(`
|
|
14509
|
-
Files installed to ${path16.relative(targetPath, githubDir)}:`);
|
|
14510
|
-
templates.forEach((t) => console.log(` - ${t.path}`));
|
|
14511
|
-
console.log("\nYou can now create eval files using the schema and prompt templates.");
|
|
14512
|
-
}
|
|
14513
|
-
|
|
14514
14697
|
// src/index.ts
|
|
14515
14698
|
var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
|
|
14516
14699
|
function createProgram() {
|
|
@@ -14518,7 +14701,7 @@ function createProgram() {
|
|
|
14518
14701
|
program.name("agentv").description("AgentV CLI scaffolding").version(packageJson.version);
|
|
14519
14702
|
registerStatusCommand(program);
|
|
14520
14703
|
registerEvalCommand(program);
|
|
14521
|
-
|
|
14704
|
+
registerValidateCommand(program);
|
|
14522
14705
|
program.command("init [path]").description("Initialize AgentV in your project (installs prompt templates and schema to .github)").action(async (targetPath) => {
|
|
14523
14706
|
try {
|
|
14524
14707
|
await initCommand({ targetPath });
|
|
@@ -14539,4 +14722,4 @@ export {
|
|
|
14539
14722
|
createProgram,
|
|
14540
14723
|
runCli
|
|
14541
14724
|
};
|
|
14542
|
-
//# sourceMappingURL=chunk-
|
|
14725
|
+
//# sourceMappingURL=chunk-RLBRJX7V.js.map
|