agentv 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -41
- package/dist/{chunk-32ZAVIQY.js → chunk-RLBRJX7V.js} +523 -396
- package/dist/chunk-RLBRJX7V.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/config-schema.json +27 -0
- package/dist/templates/eval-build.prompt.md +3 -3
- package/dist/templates/eval-schema.json +3 -3
- package/package.json +3 -2
- package/dist/chunk-32ZAVIQY.js.map +0 -1
|
@@ -585,7 +585,7 @@ var require_utc = __commonJS({
|
|
|
585
585
|
import { Command } from "commander";
|
|
586
586
|
import { readFileSync as readFileSync2 } from "node:fs";
|
|
587
587
|
|
|
588
|
-
// ../../packages/core/dist/chunk-
|
|
588
|
+
// ../../packages/core/dist/chunk-XXNQA4EW.js
|
|
589
589
|
import { constants } from "node:fs";
|
|
590
590
|
import { access } from "node:fs/promises";
|
|
591
591
|
import path from "node:path";
|
|
@@ -613,6 +613,30 @@ async function findGitRoot(startPath) {
|
|
|
613
613
|
}
|
|
614
614
|
return null;
|
|
615
615
|
}
|
|
616
|
+
function buildDirectoryChain(filePath, repoRoot) {
|
|
617
|
+
const directories = [];
|
|
618
|
+
const seen = /* @__PURE__ */ new Set();
|
|
619
|
+
const boundary = path.resolve(repoRoot);
|
|
620
|
+
let current = path.resolve(path.dirname(filePath));
|
|
621
|
+
while (current !== void 0) {
|
|
622
|
+
if (!seen.has(current)) {
|
|
623
|
+
directories.push(current);
|
|
624
|
+
seen.add(current);
|
|
625
|
+
}
|
|
626
|
+
if (current === boundary) {
|
|
627
|
+
break;
|
|
628
|
+
}
|
|
629
|
+
const parent = path.dirname(current);
|
|
630
|
+
if (parent === current) {
|
|
631
|
+
break;
|
|
632
|
+
}
|
|
633
|
+
current = parent;
|
|
634
|
+
}
|
|
635
|
+
if (!seen.has(boundary)) {
|
|
636
|
+
directories.push(boundary);
|
|
637
|
+
}
|
|
638
|
+
return directories;
|
|
639
|
+
}
|
|
616
640
|
function buildSearchRoots(evalPath, repoRoot) {
|
|
617
641
|
const uniqueRoots = [];
|
|
618
642
|
const addRoot = (root2) => {
|
|
@@ -689,6 +713,7 @@ var PROVIDER_ALIASES = [
|
|
|
689
713
|
var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
|
|
690
714
|
|
|
691
715
|
// ../../packages/core/dist/index.js
|
|
716
|
+
import micromatch from "micromatch";
|
|
692
717
|
import { constants as constants3 } from "node:fs";
|
|
693
718
|
import { access as access3, readFile as readFile2 } from "node:fs/promises";
|
|
694
719
|
import path7 from "node:path";
|
|
@@ -9048,17 +9073,16 @@ var coerce = {
|
|
|
9048
9073
|
var NEVER = INVALID;
|
|
9049
9074
|
|
|
9050
9075
|
// ../../packages/core/dist/index.js
|
|
9051
|
-
import {
|
|
9052
|
-
import { tmpdir } from "node:os";
|
|
9076
|
+
import { readFile as readFile22 } from "node:fs/promises";
|
|
9053
9077
|
import path22 from "node:path";
|
|
9054
9078
|
|
|
9055
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9079
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
9056
9080
|
import { exec, spawn } from "child_process";
|
|
9057
9081
|
import { copyFile, mkdir as mkdir2, readdir as readdir2, readFile, stat as stat2, writeFile } from "fs/promises";
|
|
9058
9082
|
import path5 from "path";
|
|
9059
9083
|
import { promisify } from "util";
|
|
9060
9084
|
|
|
9061
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9085
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/constants.js
|
|
9062
9086
|
import os from "os";
|
|
9063
9087
|
import path2 from "path";
|
|
9064
9088
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
@@ -9070,7 +9094,7 @@ var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
|
9070
9094
|
var DEFAULT_WAKEUP_FILENAME = "wakeup.chatmode.md";
|
|
9071
9095
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
9072
9096
|
|
|
9073
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9097
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/fs.js
|
|
9074
9098
|
import { constants as constants2 } from "fs";
|
|
9075
9099
|
import { access as access2, mkdir, readdir, rm, stat } from "fs/promises";
|
|
9076
9100
|
import path3 from "path";
|
|
@@ -9103,14 +9127,14 @@ async function removeIfExists(target) {
|
|
|
9103
9127
|
}
|
|
9104
9128
|
}
|
|
9105
9129
|
|
|
9106
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9130
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/time.js
|
|
9107
9131
|
function sleep(ms2) {
|
|
9108
9132
|
return new Promise((resolve) => {
|
|
9109
9133
|
setTimeout(resolve, ms2);
|
|
9110
9134
|
});
|
|
9111
9135
|
}
|
|
9112
9136
|
|
|
9113
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9137
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/workspace.js
|
|
9114
9138
|
import path4 from "path";
|
|
9115
9139
|
|
|
9116
9140
|
// ../../node_modules/.pnpm/json5@2.2.3/node_modules/json5/dist/index.mjs
|
|
@@ -10198,7 +10222,7 @@ var JSON5 = {
|
|
|
10198
10222
|
var lib = JSON5;
|
|
10199
10223
|
var dist_default = lib;
|
|
10200
10224
|
|
|
10201
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10225
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/workspace.js
|
|
10202
10226
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
10203
10227
|
let workspace;
|
|
10204
10228
|
try {
|
|
@@ -10271,7 +10295,7 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10271
10295
|
return JSON.stringify(transformedWorkspace, null, 2);
|
|
10272
10296
|
}
|
|
10273
10297
|
|
|
10274
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10298
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
10275
10299
|
var execAsync = promisify(exec);
|
|
10276
10300
|
var DEFAULT_WORKSPACE_TEMPLATE = {
|
|
10277
10301
|
folders: [
|
|
@@ -10453,9 +10477,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
10453
10477
|
return 0;
|
|
10454
10478
|
}
|
|
10455
10479
|
function createRequestPrompt(userQuery, responseFileTmp, responseFileFinal, subagentName, vscodeCmd) {
|
|
10456
|
-
const escapedUserQuery = userQuery.replace(/`/g, "\\`");
|
|
10457
10480
|
return `[[ ## task ## ]]
|
|
10458
|
-
|
|
10481
|
+
|
|
10482
|
+
${userQuery}
|
|
10459
10483
|
|
|
10460
10484
|
[[ ## system_instructions ## ]]
|
|
10461
10485
|
|
|
@@ -10612,7 +10636,7 @@ async function dispatchAgentSession(options) {
|
|
|
10612
10636
|
}
|
|
10613
10637
|
}
|
|
10614
10638
|
|
|
10615
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10639
|
+
// ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/provision.js
|
|
10616
10640
|
import { writeFile as writeFile2 } from "fs/promises";
|
|
10617
10641
|
import path6 from "path";
|
|
10618
10642
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
@@ -10785,9 +10809,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
|
|
|
10785
10809
|
var ANSI_YELLOW = "\x1B[33m";
|
|
10786
10810
|
var ANSI_RESET = "\x1B[0m";
|
|
10787
10811
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
10788
|
-
|
|
10812
|
+
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
10813
|
+
async function loadConfig(evalFilePath, repoRoot) {
|
|
10814
|
+
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
10815
|
+
for (const directory of directories) {
|
|
10816
|
+
const configPath = path7.join(directory, ".agentv", "config.yaml");
|
|
10817
|
+
if (!await fileExists2(configPath)) {
|
|
10818
|
+
continue;
|
|
10819
|
+
}
|
|
10820
|
+
try {
|
|
10821
|
+
const rawConfig = await readFile2(configPath, "utf8");
|
|
10822
|
+
const parsed = parse3(rawConfig);
|
|
10823
|
+
if (!isJsonObject(parsed)) {
|
|
10824
|
+
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
10825
|
+
continue;
|
|
10826
|
+
}
|
|
10827
|
+
const config = parsed;
|
|
10828
|
+
const schema = config.$schema;
|
|
10829
|
+
if (schema !== SCHEMA_CONFIG_V2) {
|
|
10830
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
|
|
10831
|
+
Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
|
|
10832
|
+
logWarning(message);
|
|
10833
|
+
continue;
|
|
10834
|
+
}
|
|
10835
|
+
const guidelinePatterns = config.guideline_patterns;
|
|
10836
|
+
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
10837
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
10838
|
+
continue;
|
|
10839
|
+
}
|
|
10840
|
+
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
10841
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
10842
|
+
continue;
|
|
10843
|
+
}
|
|
10844
|
+
return {
|
|
10845
|
+
guideline_patterns: guidelinePatterns
|
|
10846
|
+
};
|
|
10847
|
+
} catch (error) {
|
|
10848
|
+
logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
|
|
10849
|
+
continue;
|
|
10850
|
+
}
|
|
10851
|
+
}
|
|
10852
|
+
return null;
|
|
10853
|
+
}
|
|
10854
|
+
function isGuidelineFile(filePath, patterns) {
|
|
10789
10855
|
const normalized = filePath.split("\\").join("/");
|
|
10790
|
-
|
|
10856
|
+
const patternsToUse = patterns ?? [];
|
|
10857
|
+
return micromatch.isMatch(normalized, patternsToUse);
|
|
10791
10858
|
}
|
|
10792
10859
|
function extractCodeBlocks(segments) {
|
|
10793
10860
|
const codeBlocks = [];
|
|
@@ -10807,43 +10874,45 @@ function extractCodeBlocks(segments) {
|
|
|
10807
10874
|
}
|
|
10808
10875
|
return codeBlocks;
|
|
10809
10876
|
}
|
|
10810
|
-
async function
|
|
10877
|
+
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
10811
10878
|
const verbose = options?.verbose ?? false;
|
|
10812
|
-
const absoluteTestPath = path7.resolve(
|
|
10879
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
10813
10880
|
if (!await fileExists2(absoluteTestPath)) {
|
|
10814
|
-
throw new Error(`Test file not found: ${
|
|
10881
|
+
throw new Error(`Test file not found: ${evalFilePath}`);
|
|
10815
10882
|
}
|
|
10816
10883
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
10817
10884
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
10885
|
+
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
10886
|
+
const guidelinePatterns = config?.guideline_patterns;
|
|
10818
10887
|
const rawFile = await readFile2(absoluteTestPath, "utf8");
|
|
10819
10888
|
const parsed = parse3(rawFile);
|
|
10820
10889
|
if (!isJsonObject(parsed)) {
|
|
10821
|
-
throw new Error(`Invalid test file format: ${
|
|
10890
|
+
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
10822
10891
|
}
|
|
10823
10892
|
const suite = parsed;
|
|
10824
10893
|
const schema = suite.$schema;
|
|
10825
10894
|
if (schema !== SCHEMA_EVAL_V2) {
|
|
10826
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${
|
|
10895
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
|
|
10827
10896
|
Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
10828
10897
|
throw new Error(message);
|
|
10829
10898
|
}
|
|
10830
10899
|
const rawTestcases = suite.evalcases;
|
|
10831
10900
|
if (!Array.isArray(rawTestcases)) {
|
|
10832
|
-
throw new Error(`Invalid test file format: ${
|
|
10901
|
+
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
10833
10902
|
}
|
|
10834
10903
|
const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
|
|
10835
10904
|
const results = [];
|
|
10836
|
-
for (const
|
|
10837
|
-
if (!isJsonObject(
|
|
10905
|
+
for (const rawEvalcase of rawTestcases) {
|
|
10906
|
+
if (!isJsonObject(rawEvalcase)) {
|
|
10838
10907
|
logWarning("Skipping invalid test case entry (expected object)");
|
|
10839
10908
|
continue;
|
|
10840
10909
|
}
|
|
10841
|
-
const
|
|
10842
|
-
const id = asString(
|
|
10843
|
-
const conversationId = asString(
|
|
10844
|
-
const outcome = asString(
|
|
10845
|
-
const inputMessagesValue =
|
|
10846
|
-
const expectedMessagesValue =
|
|
10910
|
+
const evalcase = rawEvalcase;
|
|
10911
|
+
const id = asString(evalcase.id);
|
|
10912
|
+
const conversationId = asString(evalcase.conversation_id);
|
|
10913
|
+
const outcome = asString(evalcase.outcome);
|
|
10914
|
+
const inputMessagesValue = evalcase.input_messages;
|
|
10915
|
+
const expectedMessagesValue = evalcase.expected_messages;
|
|
10847
10916
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
10848
10917
|
logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
|
|
10849
10918
|
continue;
|
|
@@ -10856,6 +10925,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10856
10925
|
const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
|
|
10857
10926
|
const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
|
|
10858
10927
|
const userMessages = inputMessages.filter((message) => message.role === "user");
|
|
10928
|
+
const systemMessages = inputMessages.filter((message) => message.role === "system");
|
|
10859
10929
|
if (assistantMessages.length === 0) {
|
|
10860
10930
|
logWarning(`No assistant message found for test case: ${id}`);
|
|
10861
10931
|
continue;
|
|
@@ -10863,6 +10933,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10863
10933
|
if (assistantMessages.length > 1) {
|
|
10864
10934
|
logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
|
|
10865
10935
|
}
|
|
10936
|
+
if (systemMessages.length > 1) {
|
|
10937
|
+
logWarning(`Multiple system messages found for test case: ${id}, using first`);
|
|
10938
|
+
}
|
|
10939
|
+
let systemMessageContent;
|
|
10940
|
+
if (systemMessages.length > 0) {
|
|
10941
|
+
const content = systemMessages[0]?.content;
|
|
10942
|
+
if (typeof content === "string") {
|
|
10943
|
+
systemMessageContent = content;
|
|
10944
|
+
} else if (Array.isArray(content)) {
|
|
10945
|
+
const textParts = [];
|
|
10946
|
+
for (const segment of content) {
|
|
10947
|
+
if (isJsonObject(segment)) {
|
|
10948
|
+
const value = segment.value;
|
|
10949
|
+
if (typeof value === "string") {
|
|
10950
|
+
textParts.push(value);
|
|
10951
|
+
}
|
|
10952
|
+
}
|
|
10953
|
+
}
|
|
10954
|
+
if (textParts.length > 0) {
|
|
10955
|
+
systemMessageContent = textParts.join("\n\n");
|
|
10956
|
+
}
|
|
10957
|
+
}
|
|
10958
|
+
}
|
|
10866
10959
|
const userSegments = [];
|
|
10867
10960
|
const guidelinePaths = [];
|
|
10868
10961
|
const userTextParts = [];
|
|
@@ -10894,7 +10987,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10894
10987
|
}
|
|
10895
10988
|
try {
|
|
10896
10989
|
const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10897
|
-
|
|
10990
|
+
const relativeToRepo = path7.relative(repoRootPath, resolvedPath);
|
|
10991
|
+
if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
|
|
10898
10992
|
guidelinePaths.push(path7.resolve(resolvedPath));
|
|
10899
10993
|
if (verbose) {
|
|
10900
10994
|
console.log(` [Guideline] Found: ${displayPath}`);
|
|
@@ -10904,7 +10998,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10904
10998
|
userSegments.push({
|
|
10905
10999
|
type: "file",
|
|
10906
11000
|
path: displayPath,
|
|
10907
|
-
text: fileContent
|
|
11001
|
+
text: fileContent,
|
|
11002
|
+
resolvedPath: path7.resolve(resolvedPath)
|
|
10908
11003
|
});
|
|
10909
11004
|
if (verbose) {
|
|
10910
11005
|
console.log(` [File] Found: ${displayPath}`);
|
|
@@ -10928,14 +11023,27 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10928
11023
|
const assistantContent = assistantMessages[0]?.content;
|
|
10929
11024
|
const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
|
|
10930
11025
|
const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
10931
|
-
const testCaseGrader = coerceGrader(
|
|
11026
|
+
const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
|
|
11027
|
+
const userFilePaths = [];
|
|
11028
|
+
for (const segment of userSegments) {
|
|
11029
|
+
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
11030
|
+
userFilePaths.push(segment.resolvedPath);
|
|
11031
|
+
}
|
|
11032
|
+
}
|
|
11033
|
+
const allFilePaths = [
|
|
11034
|
+
...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
11035
|
+
...userFilePaths
|
|
11036
|
+
];
|
|
10932
11037
|
const testCase = {
|
|
10933
11038
|
id,
|
|
10934
11039
|
conversation_id: conversationId,
|
|
10935
11040
|
task: userTextPrompt,
|
|
10936
11041
|
user_segments: userSegments,
|
|
11042
|
+
system_message: systemMessageContent,
|
|
10937
11043
|
expected_assistant_raw: expectedAssistantRaw,
|
|
10938
11044
|
guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
11045
|
+
guideline_patterns: guidelinePatterns,
|
|
11046
|
+
file_paths: allFilePaths,
|
|
10939
11047
|
code_snippets: codeSnippets,
|
|
10940
11048
|
outcome,
|
|
10941
11049
|
grader: testCaseGrader
|
|
@@ -11001,7 +11109,7 @@ ${body}`);
|
|
|
11001
11109
|
}
|
|
11002
11110
|
const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
11003
11111
|
const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
11004
|
-
return { request, guidelines };
|
|
11112
|
+
return { request, guidelines, systemMessage: testCase.system_message };
|
|
11005
11113
|
}
|
|
11006
11114
|
async function fileExists2(absolutePath) {
|
|
11007
11115
|
try {
|
|
@@ -11124,15 +11232,18 @@ function buildChatPrompt(request) {
|
|
|
11124
11232
|
return request.chatPrompt;
|
|
11125
11233
|
}
|
|
11126
11234
|
const systemSegments = [];
|
|
11127
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11128
|
-
systemSegments.push(`Guidelines:
|
|
11129
|
-
${request.guidelines.trim()}`);
|
|
11130
|
-
}
|
|
11131
11235
|
const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
|
|
11132
11236
|
if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
|
|
11133
11237
|
systemSegments.push(metadataSystemPrompt.trim());
|
|
11238
|
+
} else {
|
|
11239
|
+
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
11134
11240
|
}
|
|
11135
|
-
|
|
11241
|
+
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11242
|
+
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
11243
|
+
|
|
11244
|
+
${request.guidelines.trim()}`);
|
|
11245
|
+
}
|
|
11246
|
+
const systemContent = systemSegments.join("\n\n");
|
|
11136
11247
|
const userContent = request.prompt.trim();
|
|
11137
11248
|
const prompt = [
|
|
11138
11249
|
{
|
|
@@ -11581,7 +11692,6 @@ function resolveOptionalBoolean(source2) {
|
|
|
11581
11692
|
function isLikelyEnvReference(value) {
|
|
11582
11693
|
return /^[A-Z0-9_]+$/.test(value);
|
|
11583
11694
|
}
|
|
11584
|
-
var PROMPT_FILE_PREFIX = "agentv-vscode-";
|
|
11585
11695
|
var VSCodeProvider = class {
|
|
11586
11696
|
id;
|
|
11587
11697
|
kind;
|
|
@@ -11598,128 +11708,89 @@ var VSCodeProvider = class {
|
|
|
11598
11708
|
throw new Error("VS Code provider request was aborted before dispatch");
|
|
11599
11709
|
}
|
|
11600
11710
|
const attachments = normalizeAttachments(request.attachments);
|
|
11601
|
-
const promptContent = buildPromptDocument(request, attachments);
|
|
11602
|
-
const
|
|
11603
|
-
|
|
11604
|
-
|
|
11605
|
-
|
|
11606
|
-
|
|
11607
|
-
|
|
11608
|
-
|
|
11609
|
-
|
|
11610
|
-
|
|
11611
|
-
|
|
11612
|
-
|
|
11613
|
-
|
|
11614
|
-
|
|
11615
|
-
|
|
11616
|
-
|
|
11617
|
-
|
|
11618
|
-
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
11619
|
-
throw new Error(failure);
|
|
11620
|
-
}
|
|
11621
|
-
if (this.config.dryRun) {
|
|
11622
|
-
return {
|
|
11623
|
-
text: "",
|
|
11624
|
-
raw: {
|
|
11625
|
-
session,
|
|
11626
|
-
promptFile: promptPath,
|
|
11627
|
-
attachments
|
|
11628
|
-
}
|
|
11629
|
-
};
|
|
11630
|
-
}
|
|
11631
|
-
const responseText = await readFile22(session.responseFile, "utf8");
|
|
11711
|
+
const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
|
|
11712
|
+
const session = await dispatchAgentSession({
|
|
11713
|
+
userQuery: promptContent,
|
|
11714
|
+
// Use full prompt content instead of just request.prompt
|
|
11715
|
+
extraAttachments: attachments,
|
|
11716
|
+
wait: this.config.waitForResponse,
|
|
11717
|
+
dryRun: this.config.dryRun,
|
|
11718
|
+
vscodeCmd: this.config.command,
|
|
11719
|
+
subagentRoot: this.config.subagentRoot,
|
|
11720
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
11721
|
+
silent: true
|
|
11722
|
+
});
|
|
11723
|
+
if (session.exitCode !== 0 || !session.responseFile) {
|
|
11724
|
+
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
11725
|
+
throw new Error(failure);
|
|
11726
|
+
}
|
|
11727
|
+
if (this.config.dryRun) {
|
|
11632
11728
|
return {
|
|
11633
|
-
text:
|
|
11729
|
+
text: "",
|
|
11634
11730
|
raw: {
|
|
11635
11731
|
session,
|
|
11636
|
-
promptFile: promptPath,
|
|
11637
11732
|
attachments
|
|
11638
11733
|
}
|
|
11639
11734
|
};
|
|
11640
|
-
} finally {
|
|
11641
|
-
await rm2(directory, { recursive: true, force: true });
|
|
11642
11735
|
}
|
|
11736
|
+
const responseText = await readFile22(session.responseFile, "utf8");
|
|
11737
|
+
return {
|
|
11738
|
+
text: responseText,
|
|
11739
|
+
raw: {
|
|
11740
|
+
session,
|
|
11741
|
+
attachments
|
|
11742
|
+
}
|
|
11743
|
+
};
|
|
11643
11744
|
}
|
|
11644
11745
|
};
|
|
11645
|
-
function buildPromptDocument(request, attachments) {
|
|
11746
|
+
function buildPromptDocument(request, attachments, guidelinePatterns) {
|
|
11646
11747
|
const parts = [];
|
|
11647
|
-
const
|
|
11648
|
-
if (
|
|
11649
|
-
parts.push(buildMandatoryPrereadBlock(
|
|
11650
|
-
}
|
|
11651
|
-
parts.push(`# AgentV Request`);
|
|
11652
|
-
if (request.testCaseId) {
|
|
11653
|
-
parts.push(`- Test Case: ${request.testCaseId}`);
|
|
11654
|
-
}
|
|
11655
|
-
if (request.metadata?.target) {
|
|
11656
|
-
parts.push(`- Target: ${String(request.metadata.target)}`);
|
|
11657
|
-
}
|
|
11658
|
-
parts.push("\n## Task\n", request.prompt.trim());
|
|
11659
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11660
|
-
parts.push("\n## Guidelines\n", request.guidelines.trim());
|
|
11661
|
-
}
|
|
11662
|
-
if (attachments && attachments.length > 0) {
|
|
11663
|
-
const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
|
|
11664
|
-
parts.push("\n## Attachments\n", attachmentList);
|
|
11748
|
+
const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
|
|
11749
|
+
if (guidelineFiles.length > 0) {
|
|
11750
|
+
parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
|
|
11665
11751
|
}
|
|
11752
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
11666
11753
|
return parts.join("\n").trim();
|
|
11667
11754
|
}
|
|
11668
|
-
function buildMandatoryPrereadBlock(
|
|
11669
|
-
if (
|
|
11755
|
+
function buildMandatoryPrereadBlock(guidelineFiles) {
|
|
11756
|
+
if (guidelineFiles.length === 0) {
|
|
11670
11757
|
return "";
|
|
11671
11758
|
}
|
|
11672
11759
|
const fileList = [];
|
|
11673
|
-
const tokenList = [];
|
|
11674
11760
|
let counter = 0;
|
|
11675
|
-
for (const absolutePath of
|
|
11761
|
+
for (const absolutePath of guidelineFiles) {
|
|
11676
11762
|
counter += 1;
|
|
11677
11763
|
const fileName = path22.basename(absolutePath);
|
|
11678
11764
|
const fileUri = pathToFileUri(absolutePath);
|
|
11679
|
-
fileList.push(
|
|
11680
|
-
tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
|
|
11765
|
+
fileList.push(`* [${fileName}](${fileUri})`);
|
|
11681
11766
|
}
|
|
11682
|
-
const filesText = fileList.join("
|
|
11683
|
-
const tokensText = tokenList.join("\n");
|
|
11767
|
+
const filesText = fileList.join("\n");
|
|
11684
11768
|
const instruction = [
|
|
11685
|
-
`Read all
|
|
11686
|
-
|
|
11687
|
-
"`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
|
|
11688
|
-
`Then include, at the top of your reply, these exact tokens on separate lines:
|
|
11769
|
+
`Read all guideline files:
|
|
11770
|
+
${filesText}.
|
|
11689
11771
|
`,
|
|
11690
|
-
tokensText,
|
|
11691
|
-
`
|
|
11692
|
-
Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
|
|
11693
11772
|
`If any file is missing, fail with ERROR: missing-file <filename> and stop.
|
|
11694
11773
|
`,
|
|
11695
|
-
`Then
|
|
11696
|
-
].join("
|
|
11697
|
-
return
|
|
11698
|
-
|
|
11699
|
-
${instruction}
|
|
11700
|
-
|
|
11701
|
-
`;
|
|
11774
|
+
`Then apply system_instructions on the user query below.`
|
|
11775
|
+
].join("");
|
|
11776
|
+
return `${instruction}`;
|
|
11702
11777
|
}
|
|
11703
|
-
function
|
|
11778
|
+
function collectGuidelineFiles(attachments, guidelinePatterns) {
|
|
11704
11779
|
if (!attachments || attachments.length === 0) {
|
|
11705
11780
|
return [];
|
|
11706
11781
|
}
|
|
11707
11782
|
const unique = /* @__PURE__ */ new Map();
|
|
11708
11783
|
for (const attachment of attachments) {
|
|
11709
|
-
if (!isInstructionPath(attachment)) {
|
|
11710
|
-
continue;
|
|
11711
|
-
}
|
|
11712
11784
|
const absolutePath = path22.resolve(attachment);
|
|
11713
|
-
|
|
11714
|
-
|
|
11785
|
+
const normalized = absolutePath.split(path22.sep).join("/");
|
|
11786
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
11787
|
+
if (!unique.has(absolutePath)) {
|
|
11788
|
+
unique.set(absolutePath, absolutePath);
|
|
11789
|
+
}
|
|
11715
11790
|
}
|
|
11716
11791
|
}
|
|
11717
11792
|
return Array.from(unique.values());
|
|
11718
11793
|
}
|
|
11719
|
-
function isInstructionPath(filePath) {
|
|
11720
|
-
const normalized = filePath.split(path22.sep).join("/");
|
|
11721
|
-
return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
|
|
11722
|
-
}
|
|
11723
11794
|
function pathToFileUri(filePath) {
|
|
11724
11795
|
const absolutePath = path22.isAbsolute(filePath) ? filePath : path22.resolve(filePath);
|
|
11725
11796
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
@@ -11728,14 +11799,6 @@ function pathToFileUri(filePath) {
|
|
|
11728
11799
|
}
|
|
11729
11800
|
return `file://${normalizedPath}`;
|
|
11730
11801
|
}
|
|
11731
|
-
function composeUserQuery(request) {
|
|
11732
|
-
const segments = [];
|
|
11733
|
-
segments.push(request.prompt.trim());
|
|
11734
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11735
|
-
segments.push("\nGuidelines:\n", request.guidelines.trim());
|
|
11736
|
-
}
|
|
11737
|
-
return segments.join("\n").trim();
|
|
11738
|
-
}
|
|
11739
11802
|
function normalizeAttachments(attachments) {
|
|
11740
11803
|
if (!attachments || attachments.length === 0) {
|
|
11741
11804
|
return void 0;
|
|
@@ -11836,7 +11899,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
11836
11899
|
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
11837
11900
|
};
|
|
11838
11901
|
}
|
|
11839
|
-
async function
|
|
11902
|
+
async function fileExists3(filePath) {
|
|
11840
11903
|
try {
|
|
11841
11904
|
await access22(filePath, constants22.F_OK);
|
|
11842
11905
|
return true;
|
|
@@ -11846,7 +11909,7 @@ async function fileExists22(filePath) {
|
|
|
11846
11909
|
}
|
|
11847
11910
|
async function readTargetDefinitions(filePath) {
|
|
11848
11911
|
const absolutePath = path32.resolve(filePath);
|
|
11849
|
-
if (!await
|
|
11912
|
+
if (!await fileExists3(absolutePath)) {
|
|
11850
11913
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
11851
11914
|
}
|
|
11852
11915
|
const raw = await readFile3(absolutePath, "utf8");
|
|
@@ -12061,7 +12124,7 @@ function extractKeyTerms(aspect, maxTerms = 5) {
|
|
|
12061
12124
|
var HeuristicGrader = class {
|
|
12062
12125
|
kind = "heuristic";
|
|
12063
12126
|
grade(context2) {
|
|
12064
|
-
const expectedAspects = extractAspects(context2.
|
|
12127
|
+
const expectedAspects = extractAspects(context2.evalCase.expected_assistant_raw);
|
|
12065
12128
|
const result = scoreCandidateResponse(context2.candidate, expectedAspects);
|
|
12066
12129
|
const misses = [...result.misses];
|
|
12067
12130
|
if (expectedAspects.length === 0 && isErrorLike(context2.candidate)) {
|
|
@@ -12094,14 +12157,14 @@ var QualityGrader = class {
|
|
|
12094
12157
|
if (!judgeProvider) {
|
|
12095
12158
|
throw new Error("No judge provider available for LLM grading");
|
|
12096
12159
|
}
|
|
12097
|
-
const prompt = buildQualityPrompt(context2.
|
|
12160
|
+
const prompt = buildQualityPrompt(context2.evalCase, context2.candidate);
|
|
12098
12161
|
const metadata = {
|
|
12099
12162
|
systemPrompt: QUALITY_SYSTEM_PROMPT
|
|
12100
12163
|
};
|
|
12101
12164
|
const response = await judgeProvider.invoke({
|
|
12102
12165
|
prompt,
|
|
12103
12166
|
metadata,
|
|
12104
|
-
|
|
12167
|
+
evalCaseId: context2.evalCase.id,
|
|
12105
12168
|
attempt: context2.attempt,
|
|
12106
12169
|
maxOutputTokens: this.maxOutputTokens,
|
|
12107
12170
|
temperature: this.temperature
|
|
@@ -12147,16 +12210,16 @@ var QUALITY_SYSTEM_PROMPT = [
|
|
|
12147
12210
|
function buildQualityPrompt(testCase, candidate) {
|
|
12148
12211
|
const parts = [
|
|
12149
12212
|
"[[ ## expected_outcome ## ]]",
|
|
12150
|
-
testCase.outcome,
|
|
12213
|
+
testCase.outcome.trim(),
|
|
12151
12214
|
"",
|
|
12152
12215
|
"[[ ## request ## ]]",
|
|
12153
|
-
testCase.task,
|
|
12216
|
+
testCase.task.trim(),
|
|
12154
12217
|
"",
|
|
12155
12218
|
"[[ ## reference_answer ## ]]",
|
|
12156
|
-
testCase.expected_assistant_raw,
|
|
12219
|
+
testCase.expected_assistant_raw.trim(),
|
|
12157
12220
|
"",
|
|
12158
12221
|
"[[ ## generated_answer ## ]]",
|
|
12159
|
-
candidate,
|
|
12222
|
+
candidate.trim(),
|
|
12160
12223
|
"",
|
|
12161
12224
|
"Respond with a single JSON object matching the schema described in the system prompt."
|
|
12162
12225
|
];
|
|
@@ -12394,10 +12457,10 @@ async function runEvaluation(options) {
|
|
|
12394
12457
|
onResult,
|
|
12395
12458
|
onProgress
|
|
12396
12459
|
} = options;
|
|
12397
|
-
const load =
|
|
12398
|
-
const
|
|
12399
|
-
const
|
|
12400
|
-
if (
|
|
12460
|
+
const load = loadEvalCases;
|
|
12461
|
+
const evalCases = await load(testFilePath, repoRoot, { verbose });
|
|
12462
|
+
const filteredEvalCases = filterEvalCases(evalCases, evalId);
|
|
12463
|
+
if (filteredEvalCases.length === 0) {
|
|
12401
12464
|
if (evalId) {
|
|
12402
12465
|
throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
|
|
12403
12466
|
}
|
|
@@ -12443,11 +12506,11 @@ async function runEvaluation(options) {
|
|
|
12443
12506
|
};
|
|
12444
12507
|
const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
|
|
12445
12508
|
const primaryProvider = getOrCreateProvider(target);
|
|
12446
|
-
if (onProgress &&
|
|
12447
|
-
for (let i6 = 0; i6 <
|
|
12509
|
+
if (onProgress && filteredEvalCases.length > 0) {
|
|
12510
|
+
for (let i6 = 0; i6 < filteredEvalCases.length; i6++) {
|
|
12448
12511
|
await onProgress({
|
|
12449
12512
|
workerId: i6 + 1,
|
|
12450
|
-
evalId:
|
|
12513
|
+
evalId: filteredEvalCases[i6].id,
|
|
12451
12514
|
status: "pending"
|
|
12452
12515
|
});
|
|
12453
12516
|
}
|
|
@@ -12456,22 +12519,22 @@ async function runEvaluation(options) {
|
|
|
12456
12519
|
const limit = pLimit(workers);
|
|
12457
12520
|
let nextWorkerId = 1;
|
|
12458
12521
|
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
12459
|
-
const promises =
|
|
12460
|
-
(
|
|
12522
|
+
const promises = filteredEvalCases.map(
|
|
12523
|
+
(evalCase) => limit(async () => {
|
|
12461
12524
|
const workerId = nextWorkerId++;
|
|
12462
|
-
workerIdByEvalId.set(
|
|
12525
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
12463
12526
|
if (onProgress) {
|
|
12464
12527
|
await onProgress({
|
|
12465
12528
|
workerId,
|
|
12466
|
-
evalId:
|
|
12529
|
+
evalId: evalCase.id,
|
|
12467
12530
|
status: "running",
|
|
12468
12531
|
startedAt: Date.now()
|
|
12469
12532
|
});
|
|
12470
12533
|
}
|
|
12471
12534
|
try {
|
|
12472
12535
|
const judgeProvider = await resolveJudgeProvider(target);
|
|
12473
|
-
const result = await
|
|
12474
|
-
|
|
12536
|
+
const result = await runEvalCase({
|
|
12537
|
+
evalCase,
|
|
12475
12538
|
provider: primaryProvider,
|
|
12476
12539
|
target,
|
|
12477
12540
|
graders: graderRegistry,
|
|
@@ -12486,7 +12549,7 @@ async function runEvaluation(options) {
|
|
|
12486
12549
|
if (onProgress) {
|
|
12487
12550
|
await onProgress({
|
|
12488
12551
|
workerId,
|
|
12489
|
-
evalId:
|
|
12552
|
+
evalId: evalCase.id,
|
|
12490
12553
|
status: "completed",
|
|
12491
12554
|
startedAt: 0,
|
|
12492
12555
|
// Not used for completed status
|
|
@@ -12501,7 +12564,7 @@ async function runEvaluation(options) {
|
|
|
12501
12564
|
if (onProgress) {
|
|
12502
12565
|
await onProgress({
|
|
12503
12566
|
workerId,
|
|
12504
|
-
evalId:
|
|
12567
|
+
evalId: evalCase.id,
|
|
12505
12568
|
status: "failed",
|
|
12506
12569
|
completedAt: Date.now(),
|
|
12507
12570
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -12518,10 +12581,10 @@ async function runEvaluation(options) {
|
|
|
12518
12581
|
if (outcome.status === "fulfilled") {
|
|
12519
12582
|
results.push(outcome.value);
|
|
12520
12583
|
} else {
|
|
12521
|
-
const
|
|
12522
|
-
const promptInputs = await buildPromptInputs(
|
|
12584
|
+
const evalCase = filteredEvalCases[i6];
|
|
12585
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
12523
12586
|
const errorResult = buildErrorResult(
|
|
12524
|
-
|
|
12587
|
+
evalCase,
|
|
12525
12588
|
target.name,
|
|
12526
12589
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
12527
12590
|
outcome.reason,
|
|
@@ -12535,9 +12598,9 @@ async function runEvaluation(options) {
|
|
|
12535
12598
|
}
|
|
12536
12599
|
return results;
|
|
12537
12600
|
}
|
|
12538
|
-
async function
|
|
12601
|
+
async function runEvalCase(options) {
|
|
12539
12602
|
const {
|
|
12540
|
-
|
|
12603
|
+
evalCase,
|
|
12541
12604
|
provider,
|
|
12542
12605
|
target,
|
|
12543
12606
|
graders,
|
|
@@ -12550,11 +12613,11 @@ async function runTestCase(options) {
|
|
|
12550
12613
|
signal,
|
|
12551
12614
|
judgeProvider
|
|
12552
12615
|
} = options;
|
|
12553
|
-
const promptInputs = await buildPromptInputs(
|
|
12616
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
12554
12617
|
if (promptDumpDir) {
|
|
12555
|
-
await dumpPrompt(promptDumpDir,
|
|
12618
|
+
await dumpPrompt(promptDumpDir, evalCase, promptInputs);
|
|
12556
12619
|
}
|
|
12557
|
-
const cacheKey = useCache ? createCacheKey(provider, target,
|
|
12620
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
12558
12621
|
let cachedResponse;
|
|
12559
12622
|
if (cacheKey && cache) {
|
|
12560
12623
|
cachedResponse = await cache.get(cacheKey);
|
|
@@ -12567,7 +12630,7 @@ async function runTestCase(options) {
|
|
|
12567
12630
|
while (!providerResponse && attempt < attemptBudget) {
|
|
12568
12631
|
try {
|
|
12569
12632
|
providerResponse = await invokeProvider(provider, {
|
|
12570
|
-
|
|
12633
|
+
evalCase,
|
|
12571
12634
|
target,
|
|
12572
12635
|
promptInputs,
|
|
12573
12636
|
attempt,
|
|
@@ -12580,12 +12643,12 @@ async function runTestCase(options) {
|
|
|
12580
12643
|
attempt += 1;
|
|
12581
12644
|
continue;
|
|
12582
12645
|
}
|
|
12583
|
-
return buildErrorResult(
|
|
12646
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
12584
12647
|
}
|
|
12585
12648
|
}
|
|
12586
12649
|
if (!providerResponse) {
|
|
12587
12650
|
return buildErrorResult(
|
|
12588
|
-
|
|
12651
|
+
evalCase,
|
|
12589
12652
|
target.name,
|
|
12590
12653
|
nowFn(),
|
|
12591
12654
|
lastError ?? new Error("Provider did not return a response"),
|
|
@@ -12595,7 +12658,7 @@ async function runTestCase(options) {
|
|
|
12595
12658
|
if (cacheKey && cache && !cachedResponse) {
|
|
12596
12659
|
await cache.set(cacheKey, providerResponse);
|
|
12597
12660
|
}
|
|
12598
|
-
const graderKind =
|
|
12661
|
+
const graderKind = evalCase.grader ?? "heuristic";
|
|
12599
12662
|
const activeGrader = graders[graderKind] ?? graders.heuristic;
|
|
12600
12663
|
if (!activeGrader) {
|
|
12601
12664
|
throw new Error(`No grader registered for kind '${graderKind}'`);
|
|
@@ -12604,7 +12667,7 @@ async function runTestCase(options) {
|
|
|
12604
12667
|
try {
|
|
12605
12668
|
const gradeTimestamp = nowFn();
|
|
12606
12669
|
grade = await activeGrader.grade({
|
|
12607
|
-
|
|
12670
|
+
evalCase,
|
|
12608
12671
|
candidate: providerResponse.text ?? "",
|
|
12609
12672
|
target,
|
|
12610
12673
|
provider,
|
|
@@ -12614,17 +12677,18 @@ async function runTestCase(options) {
|
|
|
12614
12677
|
judgeProvider
|
|
12615
12678
|
});
|
|
12616
12679
|
} catch (error) {
|
|
12617
|
-
return buildErrorResult(
|
|
12680
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
12618
12681
|
}
|
|
12619
12682
|
const completedAt = nowFn();
|
|
12620
12683
|
const rawRequest = {
|
|
12621
12684
|
request: promptInputs.request,
|
|
12622
12685
|
guidelines: promptInputs.guidelines,
|
|
12623
|
-
guideline_paths:
|
|
12686
|
+
guideline_paths: evalCase.guideline_paths,
|
|
12687
|
+
system_message: promptInputs.systemMessage ?? ""
|
|
12624
12688
|
};
|
|
12625
12689
|
return {
|
|
12626
|
-
eval_id:
|
|
12627
|
-
conversation_id:
|
|
12690
|
+
eval_id: evalCase.id,
|
|
12691
|
+
conversation_id: evalCase.conversation_id,
|
|
12628
12692
|
score: grade.score,
|
|
12629
12693
|
hits: grade.hits,
|
|
12630
12694
|
misses: grade.misses,
|
|
@@ -12638,11 +12702,11 @@ async function runTestCase(options) {
|
|
|
12638
12702
|
grader_raw_request: grade.graderRawRequest
|
|
12639
12703
|
};
|
|
12640
12704
|
}
|
|
12641
|
-
function
|
|
12705
|
+
function filterEvalCases(evalCases, evalId) {
|
|
12642
12706
|
if (!evalId) {
|
|
12643
|
-
return
|
|
12707
|
+
return evalCases;
|
|
12644
12708
|
}
|
|
12645
|
-
return
|
|
12709
|
+
return evalCases.filter((evalCase) => evalCase.id === evalId);
|
|
12646
12710
|
}
|
|
12647
12711
|
function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
12648
12712
|
const heuristic = overrides?.heuristic ?? new HeuristicGrader();
|
|
@@ -12660,16 +12724,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
|
12660
12724
|
llm_judge: llmJudge
|
|
12661
12725
|
};
|
|
12662
12726
|
}
|
|
12663
|
-
async function dumpPrompt(directory,
|
|
12727
|
+
async function dumpPrompt(directory, evalCase, promptInputs) {
|
|
12664
12728
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
12665
|
-
const filename = `${timestamp}_${sanitizeFilename(
|
|
12729
|
+
const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
|
|
12666
12730
|
const filePath = path42.resolve(directory, filename);
|
|
12667
12731
|
await mkdir3(path42.dirname(filePath), { recursive: true });
|
|
12668
12732
|
const payload = {
|
|
12669
|
-
eval_id:
|
|
12733
|
+
eval_id: evalCase.id,
|
|
12670
12734
|
request: promptInputs.request,
|
|
12671
12735
|
guidelines: promptInputs.guidelines,
|
|
12672
|
-
guideline_paths:
|
|
12736
|
+
guideline_paths: evalCase.guideline_paths
|
|
12673
12737
|
};
|
|
12674
12738
|
await writeFile22(filePath, JSON.stringify(payload, null, 2), "utf8");
|
|
12675
12739
|
}
|
|
@@ -12681,7 +12745,7 @@ function sanitizeFilename(value) {
|
|
|
12681
12745
|
return sanitized.length > 0 ? sanitized : randomUUID2();
|
|
12682
12746
|
}
|
|
12683
12747
|
async function invokeProvider(provider, options) {
|
|
12684
|
-
const {
|
|
12748
|
+
const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
|
|
12685
12749
|
const controller = new AbortController();
|
|
12686
12750
|
const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
|
|
12687
12751
|
if (signal) {
|
|
@@ -12691,12 +12755,12 @@ async function invokeProvider(provider, options) {
|
|
|
12691
12755
|
return await provider.invoke({
|
|
12692
12756
|
prompt: promptInputs.request,
|
|
12693
12757
|
guidelines: promptInputs.guidelines,
|
|
12694
|
-
|
|
12695
|
-
|
|
12758
|
+
guideline_patterns: evalCase.guideline_patterns,
|
|
12759
|
+
attachments: evalCase.file_paths,
|
|
12760
|
+
evalCaseId: evalCase.id,
|
|
12696
12761
|
attempt,
|
|
12697
12762
|
metadata: {
|
|
12698
|
-
|
|
12699
|
-
grader: testCase.grader
|
|
12763
|
+
systemPrompt: promptInputs.systemMessage ?? ""
|
|
12700
12764
|
},
|
|
12701
12765
|
signal: controller.signal
|
|
12702
12766
|
});
|
|
@@ -12706,17 +12770,18 @@ async function invokeProvider(provider, options) {
|
|
|
12706
12770
|
}
|
|
12707
12771
|
}
|
|
12708
12772
|
}
|
|
12709
|
-
function buildErrorResult(
|
|
12773
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
12710
12774
|
const message = error instanceof Error ? error.message : String(error);
|
|
12711
12775
|
const rawRequest = {
|
|
12712
12776
|
request: promptInputs.request,
|
|
12713
12777
|
guidelines: promptInputs.guidelines,
|
|
12714
|
-
guideline_paths:
|
|
12778
|
+
guideline_paths: evalCase.guideline_paths,
|
|
12779
|
+
system_message: promptInputs.systemMessage ?? "",
|
|
12715
12780
|
error: message
|
|
12716
12781
|
};
|
|
12717
12782
|
return {
|
|
12718
|
-
eval_id:
|
|
12719
|
-
conversation_id:
|
|
12783
|
+
eval_id: evalCase.id,
|
|
12784
|
+
conversation_id: evalCase.conversation_id,
|
|
12720
12785
|
score: 0,
|
|
12721
12786
|
hits: [],
|
|
12722
12787
|
misses: [`Error: ${message}`],
|
|
@@ -12728,13 +12793,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
|
|
|
12728
12793
|
raw_request: rawRequest
|
|
12729
12794
|
};
|
|
12730
12795
|
}
|
|
12731
|
-
function createCacheKey(provider, target,
|
|
12796
|
+
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
12732
12797
|
const hash = createHash("sha256");
|
|
12733
12798
|
hash.update(provider.id);
|
|
12734
12799
|
hash.update(target.name);
|
|
12735
|
-
hash.update(
|
|
12800
|
+
hash.update(evalCase.id);
|
|
12736
12801
|
hash.update(promptInputs.request);
|
|
12737
12802
|
hash.update(promptInputs.guidelines);
|
|
12803
|
+
hash.update(promptInputs.systemMessage ?? "");
|
|
12738
12804
|
return hash.digest("hex");
|
|
12739
12805
|
}
|
|
12740
12806
|
function isTimeoutLike(error) {
|
|
@@ -12780,7 +12846,7 @@ function uniqueDirs(directories) {
|
|
|
12780
12846
|
}
|
|
12781
12847
|
return result;
|
|
12782
12848
|
}
|
|
12783
|
-
async function
|
|
12849
|
+
async function fileExists4(filePath) {
|
|
12784
12850
|
try {
|
|
12785
12851
|
await access4(filePath, constants4.F_OK);
|
|
12786
12852
|
return true;
|
|
@@ -12816,7 +12882,7 @@ async function loadEnvFromHierarchy(options) {
|
|
|
12816
12882
|
]);
|
|
12817
12883
|
for (const dir of searchDirs) {
|
|
12818
12884
|
const candidate = path8.join(dir, ".env");
|
|
12819
|
-
if (await
|
|
12885
|
+
if (await fileExists4(candidate)) {
|
|
12820
12886
|
loadDotenv({ path: candidate, override: false });
|
|
12821
12887
|
if (verbose) {
|
|
12822
12888
|
console.log(`Loaded environment from: ${candidate}`);
|
|
@@ -13375,7 +13441,7 @@ function formatEvaluationSummary(summary) {
|
|
|
13375
13441
|
lines.push("\n==================================================");
|
|
13376
13442
|
lines.push("EVALUATION SUMMARY");
|
|
13377
13443
|
lines.push("==================================================");
|
|
13378
|
-
lines.push(`Total
|
|
13444
|
+
lines.push(`Total eval cases: ${summary.total}`);
|
|
13379
13445
|
lines.push(`Mean score: ${formatScore(summary.mean)}`);
|
|
13380
13446
|
lines.push(`Median score: ${formatScore(summary.median)}`);
|
|
13381
13447
|
lines.push(`Min score: ${formatScore(summary.min)}`);
|
|
@@ -13388,13 +13454,13 @@ function formatEvaluationSummary(summary) {
|
|
|
13388
13454
|
const [start, end] = bin.range;
|
|
13389
13455
|
lines.push(` ${start.toFixed(1)}-${end.toFixed(1)}: ${bin.count}`);
|
|
13390
13456
|
}
|
|
13391
|
-
lines.push("\nTop performing
|
|
13457
|
+
lines.push("\nTop performing eval cases:");
|
|
13392
13458
|
summary.topResults.forEach((result, index) => {
|
|
13393
|
-
lines.push(` ${index + 1}. ${result.
|
|
13459
|
+
lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
|
|
13394
13460
|
});
|
|
13395
|
-
lines.push("\nLowest performing
|
|
13461
|
+
lines.push("\nLowest performing eval cases:");
|
|
13396
13462
|
summary.bottomResults.forEach((result, index) => {
|
|
13397
|
-
lines.push(` ${index + 1}. ${result.
|
|
13463
|
+
lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
|
|
13398
13464
|
});
|
|
13399
13465
|
return lines.join("\n");
|
|
13400
13466
|
}
|
|
@@ -13410,7 +13476,7 @@ var TARGET_FILE_CANDIDATES = [
|
|
|
13410
13476
|
path11.join(".agentv", "targets.yaml"),
|
|
13411
13477
|
path11.join(".agentv", "targets.yml")
|
|
13412
13478
|
];
|
|
13413
|
-
async function
|
|
13479
|
+
async function fileExists5(filePath) {
|
|
13414
13480
|
try {
|
|
13415
13481
|
await access5(filePath, constants5.F_OK);
|
|
13416
13482
|
return true;
|
|
@@ -13432,56 +13498,30 @@ async function readTestSuiteTarget(testFilePath) {
|
|
|
13432
13498
|
}
|
|
13433
13499
|
return void 0;
|
|
13434
13500
|
}
|
|
13435
|
-
function buildDirectoryChain(testFilePath, repoRoot, cwd) {
|
|
13436
|
-
const directories = [];
|
|
13437
|
-
const seen = /* @__PURE__ */ new Set();
|
|
13438
|
-
const boundary = path11.resolve(repoRoot);
|
|
13439
|
-
let current = path11.resolve(path11.dirname(testFilePath));
|
|
13440
|
-
while (current !== void 0) {
|
|
13441
|
-
if (!seen.has(current)) {
|
|
13442
|
-
directories.push(current);
|
|
13443
|
-
seen.add(current);
|
|
13444
|
-
}
|
|
13445
|
-
if (current === boundary) {
|
|
13446
|
-
break;
|
|
13447
|
-
}
|
|
13448
|
-
const parent = path11.dirname(current);
|
|
13449
|
-
if (parent === current) {
|
|
13450
|
-
break;
|
|
13451
|
-
}
|
|
13452
|
-
current = parent;
|
|
13453
|
-
}
|
|
13454
|
-
if (!seen.has(boundary)) {
|
|
13455
|
-
directories.push(boundary);
|
|
13456
|
-
seen.add(boundary);
|
|
13457
|
-
}
|
|
13458
|
-
const resolvedCwd = path11.resolve(cwd);
|
|
13459
|
-
if (!seen.has(resolvedCwd)) {
|
|
13460
|
-
directories.push(resolvedCwd);
|
|
13461
|
-
seen.add(resolvedCwd);
|
|
13462
|
-
}
|
|
13463
|
-
return directories;
|
|
13464
|
-
}
|
|
13465
13501
|
async function discoverTargetsFile(options) {
|
|
13466
13502
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
13467
13503
|
if (explicitPath) {
|
|
13468
13504
|
const resolvedExplicit = path11.resolve(explicitPath);
|
|
13469
|
-
if (await
|
|
13505
|
+
if (await fileExists5(resolvedExplicit)) {
|
|
13470
13506
|
return resolvedExplicit;
|
|
13471
13507
|
}
|
|
13472
13508
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
13473
13509
|
const nested = path11.join(resolvedExplicit, candidate);
|
|
13474
|
-
if (await
|
|
13510
|
+
if (await fileExists5(nested)) {
|
|
13475
13511
|
return nested;
|
|
13476
13512
|
}
|
|
13477
13513
|
}
|
|
13478
13514
|
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
13479
13515
|
}
|
|
13480
|
-
const directories = buildDirectoryChain(testFilePath, repoRoot
|
|
13516
|
+
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
13517
|
+
const resolvedCwd = path11.resolve(cwd);
|
|
13518
|
+
if (!directories.includes(resolvedCwd)) {
|
|
13519
|
+
directories.push(resolvedCwd);
|
|
13520
|
+
}
|
|
13481
13521
|
for (const directory of directories) {
|
|
13482
13522
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
13483
13523
|
const fullPath = path11.join(directory, candidate);
|
|
13484
|
-
if (await
|
|
13524
|
+
if (await fileExists5(fullPath)) {
|
|
13485
13525
|
return fullPath;
|
|
13486
13526
|
}
|
|
13487
13527
|
}
|
|
@@ -13792,7 +13832,7 @@ function parseInteger(value, fallback) {
|
|
|
13792
13832
|
return parsed;
|
|
13793
13833
|
}
|
|
13794
13834
|
function registerEvalCommand(program) {
|
|
13795
|
-
program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the
|
|
13835
|
+
program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
|
|
13796
13836
|
"--workers <count>",
|
|
13797
13837
|
"Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
13798
13838
|
(value) => parseInteger(value, 1)
|
|
@@ -13830,25 +13870,164 @@ function registerEvalCommand(program) {
|
|
|
13830
13870
|
return program;
|
|
13831
13871
|
}
|
|
13832
13872
|
|
|
13833
|
-
// src/commands/
|
|
13834
|
-
import {
|
|
13835
|
-
import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
|
|
13873
|
+
// src/commands/init/index.ts
|
|
13874
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
13836
13875
|
import path14 from "node:path";
|
|
13837
13876
|
|
|
13877
|
+
// src/templates/index.ts
|
|
13878
|
+
import { readFileSync } from "node:fs";
|
|
13879
|
+
import path13 from "node:path";
|
|
13880
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
13881
|
+
var TemplateManager = class {
|
|
13882
|
+
static getTemplates() {
|
|
13883
|
+
const currentDir = path13.dirname(fileURLToPath2(import.meta.url));
|
|
13884
|
+
let templatesDir;
|
|
13885
|
+
if (currentDir.includes(path13.sep + "dist")) {
|
|
13886
|
+
templatesDir = path13.join(currentDir, "templates");
|
|
13887
|
+
} else {
|
|
13888
|
+
templatesDir = currentDir;
|
|
13889
|
+
}
|
|
13890
|
+
const evalBuildPrompt = readFileSync(
|
|
13891
|
+
path13.join(templatesDir, "eval-build.prompt.md"),
|
|
13892
|
+
"utf-8"
|
|
13893
|
+
);
|
|
13894
|
+
const evalSchema = readFileSync(
|
|
13895
|
+
path13.join(templatesDir, "eval-schema.json"),
|
|
13896
|
+
"utf-8"
|
|
13897
|
+
);
|
|
13898
|
+
const configSchema = readFileSync(
|
|
13899
|
+
path13.join(templatesDir, "config-schema.json"),
|
|
13900
|
+
"utf-8"
|
|
13901
|
+
);
|
|
13902
|
+
return [
|
|
13903
|
+
{
|
|
13904
|
+
path: "prompts/eval-build.prompt.md",
|
|
13905
|
+
content: evalBuildPrompt
|
|
13906
|
+
},
|
|
13907
|
+
{
|
|
13908
|
+
path: "contexts/eval-schema.json",
|
|
13909
|
+
content: evalSchema
|
|
13910
|
+
},
|
|
13911
|
+
{
|
|
13912
|
+
path: "contexts/config-schema.json",
|
|
13913
|
+
content: configSchema
|
|
13914
|
+
}
|
|
13915
|
+
];
|
|
13916
|
+
}
|
|
13917
|
+
};
|
|
13918
|
+
|
|
13919
|
+
// src/commands/init/index.ts
|
|
13920
|
+
async function initCommand(options = {}) {
|
|
13921
|
+
const targetPath = path14.resolve(options.targetPath ?? ".");
|
|
13922
|
+
const githubDir = path14.join(targetPath, ".github");
|
|
13923
|
+
if (!existsSync(githubDir)) {
|
|
13924
|
+
mkdirSync(githubDir, { recursive: true });
|
|
13925
|
+
}
|
|
13926
|
+
const templates = TemplateManager.getTemplates();
|
|
13927
|
+
for (const template of templates) {
|
|
13928
|
+
const targetFilePath = path14.join(githubDir, template.path);
|
|
13929
|
+
const targetDirPath = path14.dirname(targetFilePath);
|
|
13930
|
+
if (!existsSync(targetDirPath)) {
|
|
13931
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
13932
|
+
}
|
|
13933
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
13934
|
+
console.log(`Created ${path14.relative(targetPath, targetFilePath)}`);
|
|
13935
|
+
}
|
|
13936
|
+
console.log("\nAgentV initialized successfully!");
|
|
13937
|
+
console.log(`
|
|
13938
|
+
Files installed to ${path14.relative(targetPath, githubDir)}:`);
|
|
13939
|
+
templates.forEach((t) => console.log(` - ${t.path}`));
|
|
13940
|
+
console.log("\nYou can now create eval files using the schema and prompt templates.");
|
|
13941
|
+
}
|
|
13942
|
+
|
|
13943
|
+
// src/commands/validate/format-output.ts
|
|
13944
|
+
var ANSI_RED = "\x1B[31m";
|
|
13945
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
13946
|
+
var ANSI_GREEN = "\x1B[32m";
|
|
13947
|
+
var ANSI_CYAN = "\x1B[36m";
|
|
13948
|
+
var ANSI_BOLD = "\x1B[1m";
|
|
13949
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
13950
|
+
function formatSummary(summary, useColors) {
|
|
13951
|
+
const lines = [];
|
|
13952
|
+
lines.push("");
|
|
13953
|
+
lines.push(formatHeader("Validation Summary", useColors));
|
|
13954
|
+
lines.push("");
|
|
13955
|
+
for (const result of summary.results) {
|
|
13956
|
+
lines.push(formatFileResult(result, useColors));
|
|
13957
|
+
}
|
|
13958
|
+
lines.push("");
|
|
13959
|
+
lines.push(formatStats(summary, useColors));
|
|
13960
|
+
lines.push("");
|
|
13961
|
+
return lines.join("\n");
|
|
13962
|
+
}
|
|
13963
|
+
function formatHeader(text, useColors) {
|
|
13964
|
+
if (useColors) {
|
|
13965
|
+
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
13966
|
+
}
|
|
13967
|
+
return text;
|
|
13968
|
+
}
|
|
13969
|
+
function formatFileResult(result, useColors) {
|
|
13970
|
+
const lines = [];
|
|
13971
|
+
const status = result.valid ? "\u2713" : "\u2717";
|
|
13972
|
+
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
13973
|
+
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
13974
|
+
const fileName = result.filePath;
|
|
13975
|
+
lines.push(`${statusText} ${fileName}`);
|
|
13976
|
+
if (result.errors.length > 0) {
|
|
13977
|
+
for (const error of result.errors) {
|
|
13978
|
+
lines.push(formatError(error, useColors));
|
|
13979
|
+
}
|
|
13980
|
+
}
|
|
13981
|
+
return lines.join("\n");
|
|
13982
|
+
}
|
|
13983
|
+
function formatError(error, useColors) {
|
|
13984
|
+
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
13985
|
+
const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
|
|
13986
|
+
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
|
|
13987
|
+
const location = error.location ? ` [${error.location}]` : "";
|
|
13988
|
+
return `${coloredPrefix}${location} ${error.message}`;
|
|
13989
|
+
}
|
|
13990
|
+
function formatStats(summary, useColors) {
|
|
13991
|
+
const lines = [];
|
|
13992
|
+
const totalText = `Total files: ${summary.totalFiles}`;
|
|
13993
|
+
const validText = `Valid: ${summary.validFiles}`;
|
|
13994
|
+
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
13995
|
+
if (useColors) {
|
|
13996
|
+
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
13997
|
+
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
13998
|
+
if (summary.invalidFiles > 0) {
|
|
13999
|
+
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
14000
|
+
} else {
|
|
14001
|
+
lines.push(invalidText);
|
|
14002
|
+
}
|
|
14003
|
+
} else {
|
|
14004
|
+
lines.push(totalText);
|
|
14005
|
+
lines.push(validText);
|
|
14006
|
+
lines.push(invalidText);
|
|
14007
|
+
}
|
|
14008
|
+
return lines.join("\n");
|
|
14009
|
+
}
|
|
14010
|
+
function isTTY() {
|
|
14011
|
+
return process.stdout.isTTY ?? false;
|
|
14012
|
+
}
|
|
14013
|
+
|
|
13838
14014
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
13839
14015
|
import { readFile as readFile5 } from "node:fs/promises";
|
|
13840
14016
|
import { parse as parse5 } from "yaml";
|
|
13841
14017
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
13842
|
-
import
|
|
14018
|
+
import path15 from "node:path";
|
|
13843
14019
|
import { parse as parse23 } from "yaml";
|
|
13844
14020
|
import { readFile as readFile32 } from "node:fs/promises";
|
|
13845
14021
|
import path23 from "node:path";
|
|
13846
14022
|
import { parse as parse32 } from "yaml";
|
|
13847
14023
|
import { readFile as readFile42 } from "node:fs/promises";
|
|
13848
|
-
import path33 from "node:path";
|
|
13849
14024
|
import { parse as parse42 } from "yaml";
|
|
14025
|
+
import { readFile as readFile52 } from "node:fs/promises";
|
|
14026
|
+
import path33 from "node:path";
|
|
14027
|
+
import { parse as parse52 } from "yaml";
|
|
13850
14028
|
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
13851
14029
|
var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
|
|
14030
|
+
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
13852
14031
|
async function detectFileType(filePath) {
|
|
13853
14032
|
try {
|
|
13854
14033
|
const content = await readFile5(filePath, "utf8");
|
|
@@ -13866,6 +14045,8 @@ async function detectFileType(filePath) {
|
|
|
13866
14045
|
return "eval";
|
|
13867
14046
|
case SCHEMA_TARGETS_V2:
|
|
13868
14047
|
return "targets";
|
|
14048
|
+
case SCHEMA_CONFIG_V22:
|
|
14049
|
+
return "config";
|
|
13869
14050
|
default:
|
|
13870
14051
|
return "unknown";
|
|
13871
14052
|
}
|
|
@@ -13879,7 +14060,7 @@ function isObject(value) {
|
|
|
13879
14060
|
}
|
|
13880
14061
|
async function validateEvalFile(filePath) {
|
|
13881
14062
|
const errors = [];
|
|
13882
|
-
const absolutePath =
|
|
14063
|
+
const absolutePath = path15.resolve(filePath);
|
|
13883
14064
|
let parsed;
|
|
13884
14065
|
try {
|
|
13885
14066
|
const content = await readFile23(absolutePath, "utf8");
|
|
@@ -14190,6 +14371,80 @@ async function validateTargetsFile(filePath) {
|
|
|
14190
14371
|
errors
|
|
14191
14372
|
};
|
|
14192
14373
|
}
|
|
14374
|
+
var SCHEMA_CONFIG_V222 = "agentv-config-v2";
|
|
14375
|
+
async function validateConfigFile(filePath) {
|
|
14376
|
+
const errors = [];
|
|
14377
|
+
try {
|
|
14378
|
+
const content = await readFile42(filePath, "utf8");
|
|
14379
|
+
const parsed = parse42(content);
|
|
14380
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
14381
|
+
errors.push({
|
|
14382
|
+
severity: "error",
|
|
14383
|
+
filePath,
|
|
14384
|
+
message: "Config file must contain a valid YAML object"
|
|
14385
|
+
});
|
|
14386
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
14387
|
+
}
|
|
14388
|
+
const config = parsed;
|
|
14389
|
+
const schema = config["$schema"];
|
|
14390
|
+
if (schema !== SCHEMA_CONFIG_V222) {
|
|
14391
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
|
|
14392
|
+
errors.push({
|
|
14393
|
+
severity: "error",
|
|
14394
|
+
filePath,
|
|
14395
|
+
location: "$schema",
|
|
14396
|
+
message
|
|
14397
|
+
});
|
|
14398
|
+
}
|
|
14399
|
+
const guidelinePatterns = config["guideline_patterns"];
|
|
14400
|
+
if (guidelinePatterns !== void 0) {
|
|
14401
|
+
if (!Array.isArray(guidelinePatterns)) {
|
|
14402
|
+
errors.push({
|
|
14403
|
+
severity: "error",
|
|
14404
|
+
filePath,
|
|
14405
|
+
location: "guideline_patterns",
|
|
14406
|
+
message: "Field 'guideline_patterns' must be an array"
|
|
14407
|
+
});
|
|
14408
|
+
} else if (!guidelinePatterns.every((p) => typeof p === "string")) {
|
|
14409
|
+
errors.push({
|
|
14410
|
+
severity: "error",
|
|
14411
|
+
filePath,
|
|
14412
|
+
location: "guideline_patterns",
|
|
14413
|
+
message: "All entries in 'guideline_patterns' must be strings"
|
|
14414
|
+
});
|
|
14415
|
+
} else if (guidelinePatterns.length === 0) {
|
|
14416
|
+
errors.push({
|
|
14417
|
+
severity: "warning",
|
|
14418
|
+
filePath,
|
|
14419
|
+
location: "guideline_patterns",
|
|
14420
|
+
message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
|
|
14421
|
+
});
|
|
14422
|
+
}
|
|
14423
|
+
}
|
|
14424
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
|
|
14425
|
+
const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
|
|
14426
|
+
if (unexpectedFields.length > 0) {
|
|
14427
|
+
errors.push({
|
|
14428
|
+
severity: "warning",
|
|
14429
|
+
filePath,
|
|
14430
|
+
message: `Unexpected fields: ${unexpectedFields.join(", ")}`
|
|
14431
|
+
});
|
|
14432
|
+
}
|
|
14433
|
+
return {
|
|
14434
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
14435
|
+
filePath,
|
|
14436
|
+
fileType: "config",
|
|
14437
|
+
errors
|
|
14438
|
+
};
|
|
14439
|
+
} catch (error) {
|
|
14440
|
+
errors.push({
|
|
14441
|
+
severity: "error",
|
|
14442
|
+
filePath,
|
|
14443
|
+
message: `Failed to parse config file: ${error.message}`
|
|
14444
|
+
});
|
|
14445
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
14446
|
+
}
|
|
14447
|
+
}
|
|
14193
14448
|
function isObject3(value) {
|
|
14194
14449
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
14195
14450
|
}
|
|
@@ -14208,8 +14463,8 @@ async function validateFileReferences(evalFilePath) {
|
|
|
14208
14463
|
const searchRoots = buildSearchRoots(absolutePath, gitRoot);
|
|
14209
14464
|
let parsed;
|
|
14210
14465
|
try {
|
|
14211
|
-
const content = await
|
|
14212
|
-
parsed =
|
|
14466
|
+
const content = await readFile52(absolutePath, "utf8");
|
|
14467
|
+
parsed = parse52(content);
|
|
14213
14468
|
} catch {
|
|
14214
14469
|
return errors;
|
|
14215
14470
|
}
|
|
@@ -14278,7 +14533,7 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
14278
14533
|
});
|
|
14279
14534
|
} else {
|
|
14280
14535
|
try {
|
|
14281
|
-
const fileContent = await
|
|
14536
|
+
const fileContent = await readFile52(resolvedPath, "utf8");
|
|
14282
14537
|
if (fileContent.trim().length === 0) {
|
|
14283
14538
|
errors.push({
|
|
14284
14539
|
severity: "warning",
|
|
@@ -14300,12 +14555,15 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
14300
14555
|
}
|
|
14301
14556
|
}
|
|
14302
14557
|
|
|
14303
|
-
// src/commands/
|
|
14304
|
-
|
|
14558
|
+
// src/commands/validate/validate-files.ts
|
|
14559
|
+
import { constants as constants7 } from "node:fs";
|
|
14560
|
+
import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
|
|
14561
|
+
import path16 from "node:path";
|
|
14562
|
+
async function validateFiles(paths) {
|
|
14305
14563
|
const filePaths = await expandPaths(paths);
|
|
14306
14564
|
const results = [];
|
|
14307
14565
|
for (const filePath of filePaths) {
|
|
14308
|
-
const result = await
|
|
14566
|
+
const result = await validateSingleFile(filePath);
|
|
14309
14567
|
results.push(result);
|
|
14310
14568
|
}
|
|
14311
14569
|
const validFiles = results.filter((r) => r.valid).length;
|
|
@@ -14317,8 +14575,8 @@ async function lintFiles(paths) {
|
|
|
14317
14575
|
results
|
|
14318
14576
|
};
|
|
14319
14577
|
}
|
|
14320
|
-
async function
|
|
14321
|
-
const absolutePath =
|
|
14578
|
+
async function validateSingleFile(filePath) {
|
|
14579
|
+
const absolutePath = path16.resolve(filePath);
|
|
14322
14580
|
const fileType = await detectFileType(absolutePath);
|
|
14323
14581
|
if (fileType === "unknown") {
|
|
14324
14582
|
return {
|
|
@@ -14329,7 +14587,7 @@ async function lintSingleFile(filePath) {
|
|
|
14329
14587
|
{
|
|
14330
14588
|
severity: "error",
|
|
14331
14589
|
filePath: absolutePath,
|
|
14332
|
-
message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2' or 'agentv-
|
|
14590
|
+
message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2', 'agentv-targets-v2', or 'agentv-config-v2'"
|
|
14333
14591
|
}
|
|
14334
14592
|
]
|
|
14335
14593
|
};
|
|
@@ -14347,15 +14605,17 @@ async function lintSingleFile(filePath) {
|
|
|
14347
14605
|
};
|
|
14348
14606
|
}
|
|
14349
14607
|
}
|
|
14350
|
-
} else {
|
|
14608
|
+
} else if (fileType === "targets") {
|
|
14351
14609
|
result = await validateTargetsFile(absolutePath);
|
|
14610
|
+
} else {
|
|
14611
|
+
result = await validateConfigFile(absolutePath);
|
|
14352
14612
|
}
|
|
14353
14613
|
return result;
|
|
14354
14614
|
}
|
|
14355
14615
|
async function expandPaths(paths) {
|
|
14356
14616
|
const expanded = [];
|
|
14357
14617
|
for (const inputPath of paths) {
|
|
14358
|
-
const absolutePath =
|
|
14618
|
+
const absolutePath = path16.resolve(inputPath);
|
|
14359
14619
|
try {
|
|
14360
14620
|
await access7(absolutePath, constants7.F_OK);
|
|
14361
14621
|
} catch {
|
|
@@ -14379,7 +14639,7 @@ async function findYamlFiles(dirPath) {
|
|
|
14379
14639
|
try {
|
|
14380
14640
|
const entries = await readdir3(dirPath, { withFileTypes: true });
|
|
14381
14641
|
for (const entry of entries) {
|
|
14382
|
-
const fullPath =
|
|
14642
|
+
const fullPath = path16.join(dirPath, entry.name);
|
|
14383
14643
|
if (entry.isDirectory()) {
|
|
14384
14644
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
14385
14645
|
continue;
|
|
@@ -14396,98 +14656,27 @@ async function findYamlFiles(dirPath) {
|
|
|
14396
14656
|
return results;
|
|
14397
14657
|
}
|
|
14398
14658
|
function isYamlFile(filePath) {
|
|
14399
|
-
const ext =
|
|
14659
|
+
const ext = path16.extname(filePath).toLowerCase();
|
|
14400
14660
|
return ext === ".yaml" || ext === ".yml";
|
|
14401
14661
|
}
|
|
14402
14662
|
|
|
14403
|
-
// src/commands/
|
|
14404
|
-
|
|
14405
|
-
var ANSI_YELLOW2 = "\x1B[33m";
|
|
14406
|
-
var ANSI_GREEN = "\x1B[32m";
|
|
14407
|
-
var ANSI_CYAN = "\x1B[36m";
|
|
14408
|
-
var ANSI_BOLD = "\x1B[1m";
|
|
14409
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
14410
|
-
function formatSummary(summary, useColors) {
|
|
14411
|
-
const lines = [];
|
|
14412
|
-
lines.push("");
|
|
14413
|
-
lines.push(formatHeader("Validation Summary", useColors));
|
|
14414
|
-
lines.push("");
|
|
14415
|
-
for (const result of summary.results) {
|
|
14416
|
-
lines.push(formatFileResult(result, useColors));
|
|
14417
|
-
}
|
|
14418
|
-
lines.push("");
|
|
14419
|
-
lines.push(formatStats(summary, useColors));
|
|
14420
|
-
lines.push("");
|
|
14421
|
-
return lines.join("\n");
|
|
14422
|
-
}
|
|
14423
|
-
function formatHeader(text, useColors) {
|
|
14424
|
-
if (useColors) {
|
|
14425
|
-
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
14426
|
-
}
|
|
14427
|
-
return text;
|
|
14428
|
-
}
|
|
14429
|
-
function formatFileResult(result, useColors) {
|
|
14430
|
-
const lines = [];
|
|
14431
|
-
const status = result.valid ? "\u2713" : "\u2717";
|
|
14432
|
-
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
14433
|
-
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
14434
|
-
const fileName = result.filePath;
|
|
14435
|
-
lines.push(`${statusText} ${fileName}`);
|
|
14436
|
-
if (result.errors.length > 0) {
|
|
14437
|
-
for (const error of result.errors) {
|
|
14438
|
-
lines.push(formatError(error, useColors));
|
|
14439
|
-
}
|
|
14440
|
-
}
|
|
14441
|
-
return lines.join("\n");
|
|
14442
|
-
}
|
|
14443
|
-
function formatError(error, useColors) {
|
|
14444
|
-
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
14445
|
-
const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
|
|
14446
|
-
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
|
|
14447
|
-
const location = error.location ? ` [${error.location}]` : "";
|
|
14448
|
-
return `${coloredPrefix}${location} ${error.message}`;
|
|
14449
|
-
}
|
|
14450
|
-
function formatStats(summary, useColors) {
|
|
14451
|
-
const lines = [];
|
|
14452
|
-
const totalText = `Total files: ${summary.totalFiles}`;
|
|
14453
|
-
const validText = `Valid: ${summary.validFiles}`;
|
|
14454
|
-
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
14455
|
-
if (useColors) {
|
|
14456
|
-
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
14457
|
-
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
14458
|
-
if (summary.invalidFiles > 0) {
|
|
14459
|
-
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
14460
|
-
} else {
|
|
14461
|
-
lines.push(invalidText);
|
|
14462
|
-
}
|
|
14463
|
-
} else {
|
|
14464
|
-
lines.push(totalText);
|
|
14465
|
-
lines.push(validText);
|
|
14466
|
-
lines.push(invalidText);
|
|
14467
|
-
}
|
|
14468
|
-
return lines.join("\n");
|
|
14469
|
-
}
|
|
14470
|
-
function isTTY() {
|
|
14471
|
-
return process.stdout.isTTY ?? false;
|
|
14472
|
-
}
|
|
14473
|
-
|
|
14474
|
-
// src/commands/lint/index.ts
|
|
14475
|
-
async function runLintCommand(paths, options) {
|
|
14663
|
+
// src/commands/validate/index.ts
|
|
14664
|
+
async function runValidateCommand(paths, _options) {
|
|
14476
14665
|
if (paths.length === 0) {
|
|
14477
|
-
console.error("Error: No paths specified. Usage: agentv
|
|
14666
|
+
console.error("Error: No paths specified. Usage: agentv validate <paths...>");
|
|
14478
14667
|
process.exit(1);
|
|
14479
14668
|
}
|
|
14480
|
-
const summary = await
|
|
14669
|
+
const summary = await validateFiles(paths);
|
|
14481
14670
|
const useColors = isTTY();
|
|
14482
14671
|
console.log(formatSummary(summary, useColors));
|
|
14483
14672
|
if (summary.invalidFiles > 0) {
|
|
14484
14673
|
process.exit(1);
|
|
14485
14674
|
}
|
|
14486
14675
|
}
|
|
14487
|
-
function
|
|
14488
|
-
program.command("
|
|
14676
|
+
function registerValidateCommand(program) {
|
|
14677
|
+
program.command("validate").description("Validate AgentV eval and targets YAML files").argument("<paths...>", "Files or directories to validate").action(async (paths, _options) => {
|
|
14489
14678
|
try {
|
|
14490
|
-
await
|
|
14679
|
+
await runValidateCommand(paths, _options);
|
|
14491
14680
|
} catch (error) {
|
|
14492
14681
|
console.error(`Error: ${error.message}`);
|
|
14493
14682
|
process.exit(1);
|
|
@@ -14505,68 +14694,6 @@ function registerStatusCommand(program) {
|
|
|
14505
14694
|
return program;
|
|
14506
14695
|
}
|
|
14507
14696
|
|
|
14508
|
-
// src/commands/init/index.ts
|
|
14509
|
-
import { existsSync as existsSync2, mkdirSync, writeFileSync } from "node:fs";
|
|
14510
|
-
import path16 from "node:path";
|
|
14511
|
-
|
|
14512
|
-
// src/templates/index.ts
|
|
14513
|
-
import { readFileSync } from "node:fs";
|
|
14514
|
-
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
14515
|
-
import path15 from "node:path";
|
|
14516
|
-
var TemplateManager = class {
|
|
14517
|
-
static getTemplates() {
|
|
14518
|
-
const currentDir = path15.dirname(fileURLToPath2(import.meta.url));
|
|
14519
|
-
let templatesDir;
|
|
14520
|
-
if (currentDir.includes(path15.sep + "dist")) {
|
|
14521
|
-
templatesDir = path15.join(currentDir, "templates");
|
|
14522
|
-
} else {
|
|
14523
|
-
templatesDir = currentDir;
|
|
14524
|
-
}
|
|
14525
|
-
const evalBuildPrompt = readFileSync(
|
|
14526
|
-
path15.join(templatesDir, "eval-build.prompt.md"),
|
|
14527
|
-
"utf-8"
|
|
14528
|
-
);
|
|
14529
|
-
const evalSchema = readFileSync(
|
|
14530
|
-
path15.join(templatesDir, "eval-schema.json"),
|
|
14531
|
-
"utf-8"
|
|
14532
|
-
);
|
|
14533
|
-
return [
|
|
14534
|
-
{
|
|
14535
|
-
path: "prompts/eval-build.prompt.md",
|
|
14536
|
-
content: evalBuildPrompt
|
|
14537
|
-
},
|
|
14538
|
-
{
|
|
14539
|
-
path: "contexts/eval-schema.json",
|
|
14540
|
-
content: evalSchema
|
|
14541
|
-
}
|
|
14542
|
-
];
|
|
14543
|
-
}
|
|
14544
|
-
};
|
|
14545
|
-
|
|
14546
|
-
// src/commands/init/index.ts
|
|
14547
|
-
async function initCommand(options = {}) {
|
|
14548
|
-
const targetPath = path16.resolve(options.targetPath ?? ".");
|
|
14549
|
-
const githubDir = path16.join(targetPath, ".github");
|
|
14550
|
-
if (!existsSync2(githubDir)) {
|
|
14551
|
-
mkdirSync(githubDir, { recursive: true });
|
|
14552
|
-
}
|
|
14553
|
-
const templates = TemplateManager.getTemplates();
|
|
14554
|
-
for (const template of templates) {
|
|
14555
|
-
const targetFilePath = path16.join(githubDir, template.path);
|
|
14556
|
-
const targetDirPath = path16.dirname(targetFilePath);
|
|
14557
|
-
if (!existsSync2(targetDirPath)) {
|
|
14558
|
-
mkdirSync(targetDirPath, { recursive: true });
|
|
14559
|
-
}
|
|
14560
|
-
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
14561
|
-
console.log(`Created ${path16.relative(targetPath, targetFilePath)}`);
|
|
14562
|
-
}
|
|
14563
|
-
console.log("\nAgentV initialized successfully!");
|
|
14564
|
-
console.log(`
|
|
14565
|
-
Files installed to ${path16.relative(targetPath, githubDir)}:`);
|
|
14566
|
-
templates.forEach((t) => console.log(` - ${t.path}`));
|
|
14567
|
-
console.log("\nYou can now create eval files using the schema and prompt templates.");
|
|
14568
|
-
}
|
|
14569
|
-
|
|
14570
14697
|
// src/index.ts
|
|
14571
14698
|
var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
|
|
14572
14699
|
function createProgram() {
|
|
@@ -14574,7 +14701,7 @@ function createProgram() {
|
|
|
14574
14701
|
program.name("agentv").description("AgentV CLI scaffolding").version(packageJson.version);
|
|
14575
14702
|
registerStatusCommand(program);
|
|
14576
14703
|
registerEvalCommand(program);
|
|
14577
|
-
|
|
14704
|
+
registerValidateCommand(program);
|
|
14578
14705
|
program.command("init [path]").description("Initialize AgentV in your project (installs prompt templates and schema to .github)").action(async (targetPath) => {
|
|
14579
14706
|
try {
|
|
14580
14707
|
await initCommand({ targetPath });
|
|
@@ -14595,4 +14722,4 @@ export {
|
|
|
14595
14722
|
createProgram,
|
|
14596
14723
|
runCli
|
|
14597
14724
|
};
|
|
14598
|
-
//# sourceMappingURL=chunk-
|
|
14725
|
+
//# sourceMappingURL=chunk-RLBRJX7V.js.map
|