agentv 0.2.6 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -42
- package/dist/chunk-7MGIZBZG.js +15238 -0
- package/dist/chunk-7MGIZBZG.js.map +1 -0
- package/dist/{chunk-32ZAVIQY.js → chunk-JT3E7T7V.js} +582 -436
- package/dist/chunk-JT3E7T7V.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/config-schema.json +27 -0
- package/dist/templates/eval-build.prompt.md +3 -3
- package/dist/templates/eval-schema.json +3 -3
- package/package.json +3 -2
- package/dist/chunk-32ZAVIQY.js.map +0 -1
|
@@ -585,7 +585,7 @@ var require_utc = __commonJS({
|
|
|
585
585
|
import { Command } from "commander";
|
|
586
586
|
import { readFileSync as readFileSync2 } from "node:fs";
|
|
587
587
|
|
|
588
|
-
// ../../packages/core/dist/chunk-
|
|
588
|
+
// ../../packages/core/dist/chunk-XXNQA4EW.js
|
|
589
589
|
import { constants } from "node:fs";
|
|
590
590
|
import { access } from "node:fs/promises";
|
|
591
591
|
import path from "node:path";
|
|
@@ -613,6 +613,30 @@ async function findGitRoot(startPath) {
|
|
|
613
613
|
}
|
|
614
614
|
return null;
|
|
615
615
|
}
|
|
616
|
+
function buildDirectoryChain(filePath, repoRoot) {
|
|
617
|
+
const directories = [];
|
|
618
|
+
const seen = /* @__PURE__ */ new Set();
|
|
619
|
+
const boundary = path.resolve(repoRoot);
|
|
620
|
+
let current = path.resolve(path.dirname(filePath));
|
|
621
|
+
while (current !== void 0) {
|
|
622
|
+
if (!seen.has(current)) {
|
|
623
|
+
directories.push(current);
|
|
624
|
+
seen.add(current);
|
|
625
|
+
}
|
|
626
|
+
if (current === boundary) {
|
|
627
|
+
break;
|
|
628
|
+
}
|
|
629
|
+
const parent = path.dirname(current);
|
|
630
|
+
if (parent === current) {
|
|
631
|
+
break;
|
|
632
|
+
}
|
|
633
|
+
current = parent;
|
|
634
|
+
}
|
|
635
|
+
if (!seen.has(boundary)) {
|
|
636
|
+
directories.push(boundary);
|
|
637
|
+
}
|
|
638
|
+
return directories;
|
|
639
|
+
}
|
|
616
640
|
function buildSearchRoots(evalPath, repoRoot) {
|
|
617
641
|
const uniqueRoots = [];
|
|
618
642
|
const addRoot = (root2) => {
|
|
@@ -689,6 +713,7 @@ var PROVIDER_ALIASES = [
|
|
|
689
713
|
var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
|
|
690
714
|
|
|
691
715
|
// ../../packages/core/dist/index.js
|
|
716
|
+
import micromatch from "micromatch";
|
|
692
717
|
import { constants as constants3 } from "node:fs";
|
|
693
718
|
import { access as access3, readFile as readFile2 } from "node:fs/promises";
|
|
694
719
|
import path7 from "node:path";
|
|
@@ -9048,29 +9073,27 @@ var coerce = {
|
|
|
9048
9073
|
var NEVER = INVALID;
|
|
9049
9074
|
|
|
9050
9075
|
// ../../packages/core/dist/index.js
|
|
9051
|
-
import {
|
|
9052
|
-
import { tmpdir } from "node:os";
|
|
9076
|
+
import { readFile as readFile22 } from "node:fs/promises";
|
|
9053
9077
|
import path22 from "node:path";
|
|
9054
9078
|
|
|
9055
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9079
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
9056
9080
|
import { exec, spawn } from "child_process";
|
|
9057
9081
|
import { copyFile, mkdir as mkdir2, readdir as readdir2, readFile, stat as stat2, writeFile } from "fs/promises";
|
|
9058
9082
|
import path5 from "path";
|
|
9059
9083
|
import { promisify } from "util";
|
|
9060
9084
|
|
|
9061
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9085
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/constants.js
|
|
9062
9086
|
import os from "os";
|
|
9063
9087
|
import path2 from "path";
|
|
9064
9088
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
9089
|
+
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
9065
9090
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
9066
9091
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
9067
9092
|
return path2.join(os.homedir(), ".subagent", folder);
|
|
9068
9093
|
}
|
|
9069
9094
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
9070
|
-
var DEFAULT_WAKEUP_FILENAME = "wakeup.chatmode.md";
|
|
9071
|
-
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
9072
9095
|
|
|
9073
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9096
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/fs.js
|
|
9074
9097
|
import { constants as constants2 } from "fs";
|
|
9075
9098
|
import { access as access2, mkdir, readdir, rm, stat } from "fs/promises";
|
|
9076
9099
|
import path3 from "path";
|
|
@@ -9103,14 +9126,14 @@ async function removeIfExists(target) {
|
|
|
9103
9126
|
}
|
|
9104
9127
|
}
|
|
9105
9128
|
|
|
9106
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9129
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/time.js
|
|
9107
9130
|
function sleep(ms2) {
|
|
9108
9131
|
return new Promise((resolve) => {
|
|
9109
9132
|
setTimeout(resolve, ms2);
|
|
9110
9133
|
});
|
|
9111
9134
|
}
|
|
9112
9135
|
|
|
9113
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9136
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/workspace.js
|
|
9114
9137
|
import path4 from "path";
|
|
9115
9138
|
|
|
9116
9139
|
// ../../node_modules/.pnpm/json5@2.2.3/node_modules/json5/dist/index.mjs
|
|
@@ -10198,7 +10221,7 @@ var JSON5 = {
|
|
|
10198
10221
|
var lib = JSON5;
|
|
10199
10222
|
var dist_default = lib;
|
|
10200
10223
|
|
|
10201
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10224
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/workspace.js
|
|
10202
10225
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
10203
10226
|
let workspace;
|
|
10204
10227
|
try {
|
|
@@ -10271,28 +10294,39 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10271
10294
|
return JSON.stringify(transformedWorkspace, null, 2);
|
|
10272
10295
|
}
|
|
10273
10296
|
|
|
10274
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10297
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
10275
10298
|
var execAsync = promisify(exec);
|
|
10299
|
+
function generateTimestamp() {
|
|
10300
|
+
return (/* @__PURE__ */ new Date()).toISOString().replace(/[-:TZ.]/g, "").slice(0, 14);
|
|
10301
|
+
}
|
|
10276
10302
|
var DEFAULT_WORKSPACE_TEMPLATE = {
|
|
10277
10303
|
folders: [
|
|
10278
10304
|
{
|
|
10279
10305
|
path: "."
|
|
10280
10306
|
}
|
|
10281
|
-
]
|
|
10282
|
-
settings: {
|
|
10283
|
-
"chat.modeFilesLocations": {
|
|
10284
|
-
"**/*.chatmode.md": true
|
|
10285
|
-
}
|
|
10286
|
-
}
|
|
10307
|
+
]
|
|
10287
10308
|
};
|
|
10288
10309
|
var DEFAULT_WAKEUP_CONTENT = `---
|
|
10289
10310
|
description: 'Wake-up Signal'
|
|
10290
|
-
|
|
10291
|
-
model: GPT-4.1 (copilot)
|
|
10311
|
+
model: Grok Code Fast 1 (copilot)
|
|
10292
10312
|
---`;
|
|
10293
10313
|
function getSubagentRoot(vscodeCmd = "code") {
|
|
10294
10314
|
return getDefaultSubagentRoot(vscodeCmd);
|
|
10295
10315
|
}
|
|
10316
|
+
async function resolvePromptFile(promptFile) {
|
|
10317
|
+
if (!promptFile) {
|
|
10318
|
+
return void 0;
|
|
10319
|
+
}
|
|
10320
|
+
const resolvedPrompt = path5.resolve(promptFile);
|
|
10321
|
+
if (!await pathExists(resolvedPrompt)) {
|
|
10322
|
+
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
10323
|
+
}
|
|
10324
|
+
const promptStats = await stat2(resolvedPrompt);
|
|
10325
|
+
if (!promptStats.isFile()) {
|
|
10326
|
+
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
10327
|
+
}
|
|
10328
|
+
return resolvedPrompt;
|
|
10329
|
+
}
|
|
10296
10330
|
async function findUnlockedSubagent(subagentRoot) {
|
|
10297
10331
|
if (!await pathExists(subagentRoot)) {
|
|
10298
10332
|
return null;
|
|
@@ -10326,7 +10360,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
10326
10360
|
}
|
|
10327
10361
|
const aliveFile = path5.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
10328
10362
|
await removeIfExists(aliveFile);
|
|
10329
|
-
const
|
|
10363
|
+
const githubAgentsDir = path5.join(subagentDir, ".github", "agents");
|
|
10364
|
+
await mkdir2(githubAgentsDir, { recursive: true });
|
|
10365
|
+
const wakeupDst = path5.join(githubAgentsDir, "wakeup.md");
|
|
10366
|
+
const subagentDst = path5.join(githubAgentsDir, "subagent.md");
|
|
10330
10367
|
await writeFile(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
10331
10368
|
spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
|
|
10332
10369
|
await sleep(100);
|
|
@@ -10378,8 +10415,12 @@ async function createSubagentLock(subagentDir) {
|
|
|
10378
10415
|
await removeIfExists(target);
|
|
10379
10416
|
}));
|
|
10380
10417
|
}
|
|
10381
|
-
const
|
|
10382
|
-
|
|
10418
|
+
const githubAgentsDir = path5.join(subagentDir, ".github", "agents");
|
|
10419
|
+
if (await pathExists(githubAgentsDir)) {
|
|
10420
|
+
const agentFiles = await readdir2(githubAgentsDir);
|
|
10421
|
+
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
10422
|
+
await Promise.all(agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path5.join(githubAgentsDir, file))));
|
|
10423
|
+
}
|
|
10383
10424
|
const lockFile = path5.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
10384
10425
|
await writeFile(lockFile, "", { encoding: "utf8" });
|
|
10385
10426
|
return lockFile;
|
|
@@ -10442,20 +10483,22 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
10442
10483
|
return 1;
|
|
10443
10484
|
}
|
|
10444
10485
|
if (promptFile) {
|
|
10445
|
-
const
|
|
10486
|
+
const githubAgentsDir = path5.join(subagentDir, ".github", "agents");
|
|
10487
|
+
await mkdir2(githubAgentsDir, { recursive: true });
|
|
10488
|
+
const agentFile = path5.join(githubAgentsDir, `${chatId}.md`);
|
|
10446
10489
|
try {
|
|
10447
|
-
await copyFile(promptFile,
|
|
10490
|
+
await copyFile(promptFile, agentFile);
|
|
10448
10491
|
} catch (error) {
|
|
10449
|
-
console.error(`error: Failed to copy prompt file to
|
|
10492
|
+
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
10450
10493
|
return 1;
|
|
10451
10494
|
}
|
|
10452
10495
|
}
|
|
10453
10496
|
return 0;
|
|
10454
10497
|
}
|
|
10455
|
-
function createRequestPrompt(userQuery, responseFileTmp, responseFileFinal
|
|
10456
|
-
const escapedUserQuery = userQuery.replace(/`/g, "\\`");
|
|
10498
|
+
function createRequestPrompt(userQuery, responseFileTmp, responseFileFinal) {
|
|
10457
10499
|
return `[[ ## task ## ]]
|
|
10458
|
-
|
|
10500
|
+
|
|
10501
|
+
${userQuery}
|
|
10459
10502
|
|
|
10460
10503
|
[[ ## system_instructions ## ]]
|
|
10461
10504
|
|
|
@@ -10464,7 +10507,7 @@ ${escapedUserQuery}
|
|
|
10464
10507
|
2. When completely finished, run these PowerShell commands to signal completion:
|
|
10465
10508
|
\`\`\`
|
|
10466
10509
|
Move-Item -LiteralPath '${responseFileTmp}' -Destination '${responseFileFinal}'
|
|
10467
|
-
subagent
|
|
10510
|
+
if (Test-Path subagent.lock) { del subagent.lock }
|
|
10468
10511
|
\`\`\`
|
|
10469
10512
|
|
|
10470
10513
|
Do not proceed to step 2 until your response is completely written to the temporary file.`;
|
|
@@ -10512,21 +10555,13 @@ async function dispatchAgentSession(options) {
|
|
|
10512
10555
|
const { userQuery, promptFile, extraAttachments, workspaceTemplate, dryRun = false, wait = true, vscodeCmd = "code", subagentRoot, silent = false } = options;
|
|
10513
10556
|
try {
|
|
10514
10557
|
let resolvedPrompt;
|
|
10515
|
-
|
|
10516
|
-
resolvedPrompt =
|
|
10517
|
-
|
|
10518
|
-
|
|
10519
|
-
|
|
10520
|
-
|
|
10521
|
-
|
|
10522
|
-
}
|
|
10523
|
-
const promptStats = await stat2(resolvedPrompt);
|
|
10524
|
-
if (!promptStats.isFile()) {
|
|
10525
|
-
return {
|
|
10526
|
-
exitCode: 1,
|
|
10527
|
-
error: `Prompt file must be a file, not a directory: ${resolvedPrompt}`
|
|
10528
|
-
};
|
|
10529
|
-
}
|
|
10558
|
+
try {
|
|
10559
|
+
resolvedPrompt = await resolvePromptFile(promptFile);
|
|
10560
|
+
} catch (error) {
|
|
10561
|
+
return {
|
|
10562
|
+
exitCode: 1,
|
|
10563
|
+
error: error.message
|
|
10564
|
+
};
|
|
10530
10565
|
}
|
|
10531
10566
|
const subagentRootPath = subagentRoot ?? getSubagentRoot(vscodeCmd);
|
|
10532
10567
|
const subagentDir = await findUnlockedSubagent(subagentRootPath);
|
|
@@ -10556,11 +10591,11 @@ async function dispatchAgentSession(options) {
|
|
|
10556
10591
|
error: attachmentError.message
|
|
10557
10592
|
};
|
|
10558
10593
|
}
|
|
10559
|
-
const timestamp = (
|
|
10594
|
+
const timestamp = generateTimestamp();
|
|
10560
10595
|
const messagesDir = path5.join(subagentDir, "messages");
|
|
10561
10596
|
const responseFileTmp = path5.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
10562
10597
|
const responseFileFinal = path5.join(messagesDir, `${timestamp}_res.md`);
|
|
10563
|
-
const requestInstructions = createRequestPrompt(userQuery, responseFileTmp, responseFileFinal
|
|
10598
|
+
const requestInstructions = createRequestPrompt(userQuery, responseFileTmp, responseFileFinal);
|
|
10564
10599
|
if (dryRun) {
|
|
10565
10600
|
return {
|
|
10566
10601
|
exitCode: 0,
|
|
@@ -10612,7 +10647,7 @@ async function dispatchAgentSession(options) {
|
|
|
10612
10647
|
}
|
|
10613
10648
|
}
|
|
10614
10649
|
|
|
10615
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10650
|
+
// ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/provision.js
|
|
10616
10651
|
import { writeFile as writeFile2 } from "fs/promises";
|
|
10617
10652
|
import path6 from "path";
|
|
10618
10653
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
@@ -10623,7 +10658,7 @@ var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
|
10623
10658
|
],
|
|
10624
10659
|
settings: {
|
|
10625
10660
|
"chat.modeFilesLocations": {
|
|
10626
|
-
"
|
|
10661
|
+
".github/agents/**/*.md": true
|
|
10627
10662
|
}
|
|
10628
10663
|
}
|
|
10629
10664
|
};
|
|
@@ -10673,9 +10708,11 @@ async function provisionSubagents(options) {
|
|
|
10673
10708
|
break;
|
|
10674
10709
|
}
|
|
10675
10710
|
const subagentDir = subagent.absolutePath;
|
|
10711
|
+
const githubAgentsDir = path6.join(subagentDir, ".github", "agents");
|
|
10676
10712
|
const lockFile = path6.join(subagentDir, lockName);
|
|
10677
10713
|
const workspaceDst = path6.join(subagentDir, `${path6.basename(subagentDir)}.code-workspace`);
|
|
10678
|
-
const wakeupDst = path6.join(
|
|
10714
|
+
const wakeupDst = path6.join(githubAgentsDir, "wakeup.md");
|
|
10715
|
+
const subagentDst = path6.join(githubAgentsDir, "subagent.md");
|
|
10679
10716
|
const isLocked = await pathExists(lockFile);
|
|
10680
10717
|
if (isLocked && !force) {
|
|
10681
10718
|
continue;
|
|
@@ -10683,6 +10720,7 @@ async function provisionSubagents(options) {
|
|
|
10683
10720
|
if (isLocked && force) {
|
|
10684
10721
|
if (!dryRun) {
|
|
10685
10722
|
await removeIfExists(lockFile);
|
|
10723
|
+
await ensureDir(githubAgentsDir);
|
|
10686
10724
|
await writeFile2(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
10687
10725
|
await writeFile2(wakeupDst, wakeupContent, "utf8");
|
|
10688
10726
|
}
|
|
@@ -10693,6 +10731,7 @@ async function provisionSubagents(options) {
|
|
|
10693
10731
|
}
|
|
10694
10732
|
if (!isLocked && force) {
|
|
10695
10733
|
if (!dryRun) {
|
|
10734
|
+
await ensureDir(githubAgentsDir);
|
|
10696
10735
|
await writeFile2(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
10697
10736
|
await writeFile2(wakeupDst, wakeupContent, "utf8");
|
|
10698
10737
|
}
|
|
@@ -10701,6 +10740,7 @@ async function provisionSubagents(options) {
|
|
|
10701
10740
|
continue;
|
|
10702
10741
|
}
|
|
10703
10742
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
10743
|
+
await ensureDir(githubAgentsDir);
|
|
10704
10744
|
await writeFile2(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
10705
10745
|
await writeFile2(wakeupDst, wakeupContent, "utf8");
|
|
10706
10746
|
}
|
|
@@ -10711,10 +10751,13 @@ async function provisionSubagents(options) {
|
|
|
10711
10751
|
while (subagentsProvisioned < subagents) {
|
|
10712
10752
|
nextIndex += 1;
|
|
10713
10753
|
const subagentDir = path6.join(targetPath, `subagent-${nextIndex}`);
|
|
10754
|
+
const githubAgentsDir = path6.join(subagentDir, ".github", "agents");
|
|
10714
10755
|
const workspaceDst = path6.join(subagentDir, `${path6.basename(subagentDir)}.code-workspace`);
|
|
10715
|
-
const wakeupDst = path6.join(
|
|
10756
|
+
const wakeupDst = path6.join(githubAgentsDir, "wakeup.md");
|
|
10757
|
+
const subagentDst = path6.join(githubAgentsDir, "subagent.md");
|
|
10716
10758
|
if (!dryRun) {
|
|
10717
10759
|
await ensureDir(subagentDir);
|
|
10760
|
+
await ensureDir(githubAgentsDir);
|
|
10718
10761
|
await writeFile2(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
10719
10762
|
await writeFile2(wakeupDst, wakeupContent, "utf8");
|
|
10720
10763
|
}
|
|
@@ -10785,9 +10828,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
|
|
|
10785
10828
|
var ANSI_YELLOW = "\x1B[33m";
|
|
10786
10829
|
var ANSI_RESET = "\x1B[0m";
|
|
10787
10830
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
10788
|
-
|
|
10831
|
+
var SCHEMA_CONFIG_V2 = "agentv-config-v2";
|
|
10832
|
+
async function loadConfig(evalFilePath, repoRoot) {
|
|
10833
|
+
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
10834
|
+
for (const directory of directories) {
|
|
10835
|
+
const configPath = path7.join(directory, ".agentv", "config.yaml");
|
|
10836
|
+
if (!await fileExists2(configPath)) {
|
|
10837
|
+
continue;
|
|
10838
|
+
}
|
|
10839
|
+
try {
|
|
10840
|
+
const rawConfig = await readFile2(configPath, "utf8");
|
|
10841
|
+
const parsed = parse3(rawConfig);
|
|
10842
|
+
if (!isJsonObject(parsed)) {
|
|
10843
|
+
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
10844
|
+
continue;
|
|
10845
|
+
}
|
|
10846
|
+
const config = parsed;
|
|
10847
|
+
const schema = config.$schema;
|
|
10848
|
+
if (schema !== SCHEMA_CONFIG_V2) {
|
|
10849
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
|
|
10850
|
+
Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
|
|
10851
|
+
logWarning(message);
|
|
10852
|
+
continue;
|
|
10853
|
+
}
|
|
10854
|
+
const guidelinePatterns = config.guideline_patterns;
|
|
10855
|
+
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
10856
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
10857
|
+
continue;
|
|
10858
|
+
}
|
|
10859
|
+
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
10860
|
+
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
10861
|
+
continue;
|
|
10862
|
+
}
|
|
10863
|
+
return {
|
|
10864
|
+
guideline_patterns: guidelinePatterns
|
|
10865
|
+
};
|
|
10866
|
+
} catch (error) {
|
|
10867
|
+
logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
|
|
10868
|
+
continue;
|
|
10869
|
+
}
|
|
10870
|
+
}
|
|
10871
|
+
return null;
|
|
10872
|
+
}
|
|
10873
|
+
function isGuidelineFile(filePath, patterns) {
|
|
10789
10874
|
const normalized = filePath.split("\\").join("/");
|
|
10790
|
-
|
|
10875
|
+
const patternsToUse = patterns ?? [];
|
|
10876
|
+
return micromatch.isMatch(normalized, patternsToUse);
|
|
10791
10877
|
}
|
|
10792
10878
|
function extractCodeBlocks(segments) {
|
|
10793
10879
|
const codeBlocks = [];
|
|
@@ -10807,43 +10893,45 @@ function extractCodeBlocks(segments) {
|
|
|
10807
10893
|
}
|
|
10808
10894
|
return codeBlocks;
|
|
10809
10895
|
}
|
|
10810
|
-
async function
|
|
10896
|
+
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
10811
10897
|
const verbose = options?.verbose ?? false;
|
|
10812
|
-
const absoluteTestPath = path7.resolve(
|
|
10898
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
10813
10899
|
if (!await fileExists2(absoluteTestPath)) {
|
|
10814
|
-
throw new Error(`Test file not found: ${
|
|
10900
|
+
throw new Error(`Test file not found: ${evalFilePath}`);
|
|
10815
10901
|
}
|
|
10816
10902
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
10817
10903
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
10904
|
+
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
10905
|
+
const guidelinePatterns = config?.guideline_patterns;
|
|
10818
10906
|
const rawFile = await readFile2(absoluteTestPath, "utf8");
|
|
10819
10907
|
const parsed = parse3(rawFile);
|
|
10820
10908
|
if (!isJsonObject(parsed)) {
|
|
10821
|
-
throw new Error(`Invalid test file format: ${
|
|
10909
|
+
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
10822
10910
|
}
|
|
10823
10911
|
const suite = parsed;
|
|
10824
10912
|
const schema = suite.$schema;
|
|
10825
10913
|
if (schema !== SCHEMA_EVAL_V2) {
|
|
10826
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${
|
|
10914
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
|
|
10827
10915
|
Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
10828
10916
|
throw new Error(message);
|
|
10829
10917
|
}
|
|
10830
10918
|
const rawTestcases = suite.evalcases;
|
|
10831
10919
|
if (!Array.isArray(rawTestcases)) {
|
|
10832
|
-
throw new Error(`Invalid test file format: ${
|
|
10920
|
+
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
|
|
10833
10921
|
}
|
|
10834
10922
|
const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
|
|
10835
10923
|
const results = [];
|
|
10836
|
-
for (const
|
|
10837
|
-
if (!isJsonObject(
|
|
10924
|
+
for (const rawEvalcase of rawTestcases) {
|
|
10925
|
+
if (!isJsonObject(rawEvalcase)) {
|
|
10838
10926
|
logWarning("Skipping invalid test case entry (expected object)");
|
|
10839
10927
|
continue;
|
|
10840
10928
|
}
|
|
10841
|
-
const
|
|
10842
|
-
const id = asString(
|
|
10843
|
-
const conversationId = asString(
|
|
10844
|
-
const outcome = asString(
|
|
10845
|
-
const inputMessagesValue =
|
|
10846
|
-
const expectedMessagesValue =
|
|
10929
|
+
const evalcase = rawEvalcase;
|
|
10930
|
+
const id = asString(evalcase.id);
|
|
10931
|
+
const conversationId = asString(evalcase.conversation_id);
|
|
10932
|
+
const outcome = asString(evalcase.outcome);
|
|
10933
|
+
const inputMessagesValue = evalcase.input_messages;
|
|
10934
|
+
const expectedMessagesValue = evalcase.expected_messages;
|
|
10847
10935
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
10848
10936
|
logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
|
|
10849
10937
|
continue;
|
|
@@ -10856,6 +10944,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10856
10944
|
const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
|
|
10857
10945
|
const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
|
|
10858
10946
|
const userMessages = inputMessages.filter((message) => message.role === "user");
|
|
10947
|
+
const systemMessages = inputMessages.filter((message) => message.role === "system");
|
|
10859
10948
|
if (assistantMessages.length === 0) {
|
|
10860
10949
|
logWarning(`No assistant message found for test case: ${id}`);
|
|
10861
10950
|
continue;
|
|
@@ -10863,6 +10952,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10863
10952
|
if (assistantMessages.length > 1) {
|
|
10864
10953
|
logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
|
|
10865
10954
|
}
|
|
10955
|
+
if (systemMessages.length > 1) {
|
|
10956
|
+
logWarning(`Multiple system messages found for test case: ${id}, using first`);
|
|
10957
|
+
}
|
|
10958
|
+
let systemMessageContent;
|
|
10959
|
+
if (systemMessages.length > 0) {
|
|
10960
|
+
const content = systemMessages[0]?.content;
|
|
10961
|
+
if (typeof content === "string") {
|
|
10962
|
+
systemMessageContent = content;
|
|
10963
|
+
} else if (Array.isArray(content)) {
|
|
10964
|
+
const textParts = [];
|
|
10965
|
+
for (const segment of content) {
|
|
10966
|
+
if (isJsonObject(segment)) {
|
|
10967
|
+
const value = segment.value;
|
|
10968
|
+
if (typeof value === "string") {
|
|
10969
|
+
textParts.push(value);
|
|
10970
|
+
}
|
|
10971
|
+
}
|
|
10972
|
+
}
|
|
10973
|
+
if (textParts.length > 0) {
|
|
10974
|
+
systemMessageContent = textParts.join("\n\n");
|
|
10975
|
+
}
|
|
10976
|
+
}
|
|
10977
|
+
}
|
|
10866
10978
|
const userSegments = [];
|
|
10867
10979
|
const guidelinePaths = [];
|
|
10868
10980
|
const userTextParts = [];
|
|
@@ -10894,7 +11006,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10894
11006
|
}
|
|
10895
11007
|
try {
|
|
10896
11008
|
const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10897
|
-
|
|
11009
|
+
const relativeToRepo = path7.relative(repoRootPath, resolvedPath);
|
|
11010
|
+
if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
|
|
10898
11011
|
guidelinePaths.push(path7.resolve(resolvedPath));
|
|
10899
11012
|
if (verbose) {
|
|
10900
11013
|
console.log(` [Guideline] Found: ${displayPath}`);
|
|
@@ -10904,7 +11017,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10904
11017
|
userSegments.push({
|
|
10905
11018
|
type: "file",
|
|
10906
11019
|
path: displayPath,
|
|
10907
|
-
text: fileContent
|
|
11020
|
+
text: fileContent,
|
|
11021
|
+
resolvedPath: path7.resolve(resolvedPath)
|
|
10908
11022
|
});
|
|
10909
11023
|
if (verbose) {
|
|
10910
11024
|
console.log(` [File] Found: ${displayPath}`);
|
|
@@ -10928,14 +11042,27 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
10928
11042
|
const assistantContent = assistantMessages[0]?.content;
|
|
10929
11043
|
const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
|
|
10930
11044
|
const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
10931
|
-
const testCaseGrader = coerceGrader(
|
|
11045
|
+
const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
|
|
11046
|
+
const userFilePaths = [];
|
|
11047
|
+
for (const segment of userSegments) {
|
|
11048
|
+
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
11049
|
+
userFilePaths.push(segment.resolvedPath);
|
|
11050
|
+
}
|
|
11051
|
+
}
|
|
11052
|
+
const allFilePaths = [
|
|
11053
|
+
...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
11054
|
+
...userFilePaths
|
|
11055
|
+
];
|
|
10932
11056
|
const testCase = {
|
|
10933
11057
|
id,
|
|
10934
11058
|
conversation_id: conversationId,
|
|
10935
11059
|
task: userTextPrompt,
|
|
10936
11060
|
user_segments: userSegments,
|
|
11061
|
+
system_message: systemMessageContent,
|
|
10937
11062
|
expected_assistant_raw: expectedAssistantRaw,
|
|
10938
11063
|
guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
11064
|
+
guideline_patterns: guidelinePatterns,
|
|
11065
|
+
file_paths: allFilePaths,
|
|
10939
11066
|
code_snippets: codeSnippets,
|
|
10940
11067
|
outcome,
|
|
10941
11068
|
grader: testCaseGrader
|
|
@@ -11001,7 +11128,7 @@ ${body}`);
|
|
|
11001
11128
|
}
|
|
11002
11129
|
const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
11003
11130
|
const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
11004
|
-
return { request, guidelines };
|
|
11131
|
+
return { request, guidelines, systemMessage: testCase.system_message };
|
|
11005
11132
|
}
|
|
11006
11133
|
async function fileExists2(absolutePath) {
|
|
11007
11134
|
try {
|
|
@@ -11124,15 +11251,18 @@ function buildChatPrompt(request) {
|
|
|
11124
11251
|
return request.chatPrompt;
|
|
11125
11252
|
}
|
|
11126
11253
|
const systemSegments = [];
|
|
11127
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11128
|
-
systemSegments.push(`Guidelines:
|
|
11129
|
-
${request.guidelines.trim()}`);
|
|
11130
|
-
}
|
|
11131
11254
|
const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
|
|
11132
11255
|
if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
|
|
11133
11256
|
systemSegments.push(metadataSystemPrompt.trim());
|
|
11257
|
+
} else {
|
|
11258
|
+
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
11134
11259
|
}
|
|
11135
|
-
|
|
11260
|
+
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11261
|
+
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
11262
|
+
|
|
11263
|
+
${request.guidelines.trim()}`);
|
|
11264
|
+
}
|
|
11265
|
+
const systemContent = systemSegments.join("\n\n");
|
|
11136
11266
|
const userContent = request.prompt.trim();
|
|
11137
11267
|
const prompt = [
|
|
11138
11268
|
{
|
|
@@ -11581,7 +11711,6 @@ function resolveOptionalBoolean(source2) {
|
|
|
11581
11711
|
function isLikelyEnvReference(value) {
|
|
11582
11712
|
return /^[A-Z0-9_]+$/.test(value);
|
|
11583
11713
|
}
|
|
11584
|
-
var PROMPT_FILE_PREFIX = "agentv-vscode-";
|
|
11585
11714
|
var VSCodeProvider = class {
|
|
11586
11715
|
id;
|
|
11587
11716
|
kind;
|
|
@@ -11598,129 +11727,90 @@ var VSCodeProvider = class {
|
|
|
11598
11727
|
throw new Error("VS Code provider request was aborted before dispatch");
|
|
11599
11728
|
}
|
|
11600
11729
|
const attachments = normalizeAttachments(request.attachments);
|
|
11601
|
-
const promptContent = buildPromptDocument(request, attachments);
|
|
11602
|
-
const
|
|
11603
|
-
|
|
11604
|
-
|
|
11605
|
-
|
|
11606
|
-
|
|
11607
|
-
|
|
11608
|
-
|
|
11609
|
-
|
|
11610
|
-
|
|
11611
|
-
|
|
11612
|
-
|
|
11613
|
-
|
|
11614
|
-
|
|
11615
|
-
|
|
11616
|
-
|
|
11617
|
-
|
|
11618
|
-
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
11619
|
-
throw new Error(failure);
|
|
11620
|
-
}
|
|
11621
|
-
if (this.config.dryRun) {
|
|
11622
|
-
return {
|
|
11623
|
-
text: "",
|
|
11624
|
-
raw: {
|
|
11625
|
-
session,
|
|
11626
|
-
promptFile: promptPath,
|
|
11627
|
-
attachments
|
|
11628
|
-
}
|
|
11629
|
-
};
|
|
11630
|
-
}
|
|
11631
|
-
const responseText = await readFile22(session.responseFile, "utf8");
|
|
11730
|
+
const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
|
|
11731
|
+
const session = await dispatchAgentSession({
|
|
11732
|
+
userQuery: promptContent,
|
|
11733
|
+
// Use full prompt content instead of just request.prompt
|
|
11734
|
+
extraAttachments: attachments,
|
|
11735
|
+
wait: this.config.waitForResponse,
|
|
11736
|
+
dryRun: this.config.dryRun,
|
|
11737
|
+
vscodeCmd: this.config.command,
|
|
11738
|
+
subagentRoot: this.config.subagentRoot,
|
|
11739
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
11740
|
+
silent: true
|
|
11741
|
+
});
|
|
11742
|
+
if (session.exitCode !== 0 || !session.responseFile) {
|
|
11743
|
+
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
11744
|
+
throw new Error(failure);
|
|
11745
|
+
}
|
|
11746
|
+
if (this.config.dryRun) {
|
|
11632
11747
|
return {
|
|
11633
|
-
text:
|
|
11748
|
+
text: "",
|
|
11634
11749
|
raw: {
|
|
11635
11750
|
session,
|
|
11636
|
-
promptFile: promptPath,
|
|
11637
11751
|
attachments
|
|
11638
11752
|
}
|
|
11639
11753
|
};
|
|
11640
|
-
} finally {
|
|
11641
|
-
await rm2(directory, { recursive: true, force: true });
|
|
11642
11754
|
}
|
|
11755
|
+
const responseText = await readFile22(session.responseFile, "utf8");
|
|
11756
|
+
return {
|
|
11757
|
+
text: responseText,
|
|
11758
|
+
raw: {
|
|
11759
|
+
session,
|
|
11760
|
+
attachments
|
|
11761
|
+
}
|
|
11762
|
+
};
|
|
11643
11763
|
}
|
|
11644
11764
|
};
|
|
11645
|
-
function buildPromptDocument(request, attachments) {
|
|
11765
|
+
function buildPromptDocument(request, attachments, guidelinePatterns) {
|
|
11646
11766
|
const parts = [];
|
|
11647
|
-
const
|
|
11648
|
-
if (
|
|
11649
|
-
parts.push(buildMandatoryPrereadBlock(
|
|
11650
|
-
}
|
|
11651
|
-
parts.push(`# AgentV Request`);
|
|
11652
|
-
if (request.testCaseId) {
|
|
11653
|
-
parts.push(`- Test Case: ${request.testCaseId}`);
|
|
11654
|
-
}
|
|
11655
|
-
if (request.metadata?.target) {
|
|
11656
|
-
parts.push(`- Target: ${String(request.metadata.target)}`);
|
|
11657
|
-
}
|
|
11658
|
-
parts.push("\n## Task\n", request.prompt.trim());
|
|
11659
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11660
|
-
parts.push("\n## Guidelines\n", request.guidelines.trim());
|
|
11661
|
-
}
|
|
11662
|
-
if (attachments && attachments.length > 0) {
|
|
11663
|
-
const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
|
|
11664
|
-
parts.push("\n## Attachments\n", attachmentList);
|
|
11767
|
+
const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
|
|
11768
|
+
if (guidelineFiles.length > 0) {
|
|
11769
|
+
parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
|
|
11665
11770
|
}
|
|
11771
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
11666
11772
|
return parts.join("\n").trim();
|
|
11667
11773
|
}
|
|
11668
|
-
function buildMandatoryPrereadBlock(
|
|
11669
|
-
if (
|
|
11774
|
+
function buildMandatoryPrereadBlock(guidelineFiles) {
|
|
11775
|
+
if (guidelineFiles.length === 0) {
|
|
11670
11776
|
return "";
|
|
11671
11777
|
}
|
|
11672
11778
|
const fileList = [];
|
|
11673
|
-
const tokenList = [];
|
|
11674
11779
|
let counter = 0;
|
|
11675
|
-
for (const absolutePath of
|
|
11780
|
+
for (const absolutePath of guidelineFiles) {
|
|
11676
11781
|
counter += 1;
|
|
11677
11782
|
const fileName = path22.basename(absolutePath);
|
|
11678
|
-
const fileUri =
|
|
11679
|
-
fileList.push(
|
|
11680
|
-
tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
|
|
11783
|
+
const fileUri = pathToFileUri2(absolutePath);
|
|
11784
|
+
fileList.push(`* [${fileName}](${fileUri})`);
|
|
11681
11785
|
}
|
|
11682
|
-
const filesText = fileList.join("
|
|
11683
|
-
const tokensText = tokenList.join("\n");
|
|
11786
|
+
const filesText = fileList.join("\n");
|
|
11684
11787
|
const instruction = [
|
|
11685
|
-
`Read all
|
|
11686
|
-
|
|
11687
|
-
"`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
|
|
11688
|
-
`Then include, at the top of your reply, these exact tokens on separate lines:
|
|
11788
|
+
`Read all guideline files:
|
|
11789
|
+
${filesText}.
|
|
11689
11790
|
`,
|
|
11690
|
-
tokensText,
|
|
11691
|
-
`
|
|
11692
|
-
Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
|
|
11693
11791
|
`If any file is missing, fail with ERROR: missing-file <filename> and stop.
|
|
11694
11792
|
`,
|
|
11695
|
-
`Then
|
|
11696
|
-
].join("
|
|
11697
|
-
return
|
|
11698
|
-
|
|
11699
|
-
${instruction}
|
|
11700
|
-
|
|
11701
|
-
`;
|
|
11793
|
+
`Then apply system_instructions on the user query below.`
|
|
11794
|
+
].join("");
|
|
11795
|
+
return `${instruction}`;
|
|
11702
11796
|
}
|
|
11703
|
-
function
|
|
11797
|
+
function collectGuidelineFiles(attachments, guidelinePatterns) {
|
|
11704
11798
|
if (!attachments || attachments.length === 0) {
|
|
11705
11799
|
return [];
|
|
11706
11800
|
}
|
|
11707
11801
|
const unique = /* @__PURE__ */ new Map();
|
|
11708
11802
|
for (const attachment of attachments) {
|
|
11709
|
-
if (!isInstructionPath(attachment)) {
|
|
11710
|
-
continue;
|
|
11711
|
-
}
|
|
11712
11803
|
const absolutePath = path22.resolve(attachment);
|
|
11713
|
-
|
|
11714
|
-
|
|
11804
|
+
const normalized = absolutePath.split(path22.sep).join("/");
|
|
11805
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
11806
|
+
if (!unique.has(absolutePath)) {
|
|
11807
|
+
unique.set(absolutePath, absolutePath);
|
|
11808
|
+
}
|
|
11715
11809
|
}
|
|
11716
11810
|
}
|
|
11717
11811
|
return Array.from(unique.values());
|
|
11718
11812
|
}
|
|
11719
|
-
function
|
|
11720
|
-
const normalized = filePath.split(path22.sep).join("/");
|
|
11721
|
-
return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
|
|
11722
|
-
}
|
|
11723
|
-
function pathToFileUri(filePath) {
|
|
11813
|
+
function pathToFileUri2(filePath) {
|
|
11724
11814
|
const absolutePath = path22.isAbsolute(filePath) ? filePath : path22.resolve(filePath);
|
|
11725
11815
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
11726
11816
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
@@ -11728,14 +11818,6 @@ function pathToFileUri(filePath) {
|
|
|
11728
11818
|
}
|
|
11729
11819
|
return `file://${normalizedPath}`;
|
|
11730
11820
|
}
|
|
11731
|
-
function composeUserQuery(request) {
|
|
11732
|
-
const segments = [];
|
|
11733
|
-
segments.push(request.prompt.trim());
|
|
11734
|
-
if (request.guidelines && request.guidelines.trim().length > 0) {
|
|
11735
|
-
segments.push("\nGuidelines:\n", request.guidelines.trim());
|
|
11736
|
-
}
|
|
11737
|
-
return segments.join("\n").trim();
|
|
11738
|
-
}
|
|
11739
11821
|
function normalizeAttachments(attachments) {
|
|
11740
11822
|
if (!attachments || attachments.length === 0) {
|
|
11741
11823
|
return void 0;
|
|
@@ -11836,7 +11918,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
11836
11918
|
judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
|
|
11837
11919
|
};
|
|
11838
11920
|
}
|
|
11839
|
-
async function
|
|
11921
|
+
async function fileExists3(filePath) {
|
|
11840
11922
|
try {
|
|
11841
11923
|
await access22(filePath, constants22.F_OK);
|
|
11842
11924
|
return true;
|
|
@@ -11846,7 +11928,7 @@ async function fileExists22(filePath) {
|
|
|
11846
11928
|
}
|
|
11847
11929
|
async function readTargetDefinitions(filePath) {
|
|
11848
11930
|
const absolutePath = path32.resolve(filePath);
|
|
11849
|
-
if (!await
|
|
11931
|
+
if (!await fileExists3(absolutePath)) {
|
|
11850
11932
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
11851
11933
|
}
|
|
11852
11934
|
const raw = await readFile3(absolutePath, "utf8");
|
|
@@ -12061,7 +12143,7 @@ function extractKeyTerms(aspect, maxTerms = 5) {
|
|
|
12061
12143
|
var HeuristicGrader = class {
|
|
12062
12144
|
kind = "heuristic";
|
|
12063
12145
|
grade(context2) {
|
|
12064
|
-
const expectedAspects = extractAspects(context2.
|
|
12146
|
+
const expectedAspects = extractAspects(context2.evalCase.expected_assistant_raw);
|
|
12065
12147
|
const result = scoreCandidateResponse(context2.candidate, expectedAspects);
|
|
12066
12148
|
const misses = [...result.misses];
|
|
12067
12149
|
if (expectedAspects.length === 0 && isErrorLike(context2.candidate)) {
|
|
@@ -12094,14 +12176,14 @@ var QualityGrader = class {
|
|
|
12094
12176
|
if (!judgeProvider) {
|
|
12095
12177
|
throw new Error("No judge provider available for LLM grading");
|
|
12096
12178
|
}
|
|
12097
|
-
const prompt = buildQualityPrompt(context2.
|
|
12179
|
+
const prompt = buildQualityPrompt(context2.evalCase, context2.candidate);
|
|
12098
12180
|
const metadata = {
|
|
12099
12181
|
systemPrompt: QUALITY_SYSTEM_PROMPT
|
|
12100
12182
|
};
|
|
12101
12183
|
const response = await judgeProvider.invoke({
|
|
12102
12184
|
prompt,
|
|
12103
12185
|
metadata,
|
|
12104
|
-
|
|
12186
|
+
evalCaseId: context2.evalCase.id,
|
|
12105
12187
|
attempt: context2.attempt,
|
|
12106
12188
|
maxOutputTokens: this.maxOutputTokens,
|
|
12107
12189
|
temperature: this.temperature
|
|
@@ -12147,16 +12229,16 @@ var QUALITY_SYSTEM_PROMPT = [
|
|
|
12147
12229
|
function buildQualityPrompt(testCase, candidate) {
|
|
12148
12230
|
const parts = [
|
|
12149
12231
|
"[[ ## expected_outcome ## ]]",
|
|
12150
|
-
testCase.outcome,
|
|
12232
|
+
testCase.outcome.trim(),
|
|
12151
12233
|
"",
|
|
12152
12234
|
"[[ ## request ## ]]",
|
|
12153
|
-
testCase.task,
|
|
12235
|
+
testCase.task.trim(),
|
|
12154
12236
|
"",
|
|
12155
12237
|
"[[ ## reference_answer ## ]]",
|
|
12156
|
-
testCase.expected_assistant_raw,
|
|
12238
|
+
testCase.expected_assistant_raw.trim(),
|
|
12157
12239
|
"",
|
|
12158
12240
|
"[[ ## generated_answer ## ]]",
|
|
12159
|
-
candidate,
|
|
12241
|
+
candidate.trim(),
|
|
12160
12242
|
"",
|
|
12161
12243
|
"Respond with a single JSON object matching the schema described in the system prompt."
|
|
12162
12244
|
];
|
|
@@ -12394,10 +12476,10 @@ async function runEvaluation(options) {
|
|
|
12394
12476
|
onResult,
|
|
12395
12477
|
onProgress
|
|
12396
12478
|
} = options;
|
|
12397
|
-
const load =
|
|
12398
|
-
const
|
|
12399
|
-
const
|
|
12400
|
-
if (
|
|
12479
|
+
const load = loadEvalCases;
|
|
12480
|
+
const evalCases = await load(testFilePath, repoRoot, { verbose });
|
|
12481
|
+
const filteredEvalCases = filterEvalCases(evalCases, evalId);
|
|
12482
|
+
if (filteredEvalCases.length === 0) {
|
|
12401
12483
|
if (evalId) {
|
|
12402
12484
|
throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
|
|
12403
12485
|
}
|
|
@@ -12443,11 +12525,11 @@ async function runEvaluation(options) {
|
|
|
12443
12525
|
};
|
|
12444
12526
|
const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
|
|
12445
12527
|
const primaryProvider = getOrCreateProvider(target);
|
|
12446
|
-
if (onProgress &&
|
|
12447
|
-
for (let i6 = 0; i6 <
|
|
12528
|
+
if (onProgress && filteredEvalCases.length > 0) {
|
|
12529
|
+
for (let i6 = 0; i6 < filteredEvalCases.length; i6++) {
|
|
12448
12530
|
await onProgress({
|
|
12449
12531
|
workerId: i6 + 1,
|
|
12450
|
-
evalId:
|
|
12532
|
+
evalId: filteredEvalCases[i6].id,
|
|
12451
12533
|
status: "pending"
|
|
12452
12534
|
});
|
|
12453
12535
|
}
|
|
@@ -12456,22 +12538,22 @@ async function runEvaluation(options) {
|
|
|
12456
12538
|
const limit = pLimit(workers);
|
|
12457
12539
|
let nextWorkerId = 1;
|
|
12458
12540
|
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
12459
|
-
const promises =
|
|
12460
|
-
(
|
|
12541
|
+
const promises = filteredEvalCases.map(
|
|
12542
|
+
(evalCase) => limit(async () => {
|
|
12461
12543
|
const workerId = nextWorkerId++;
|
|
12462
|
-
workerIdByEvalId.set(
|
|
12544
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
12463
12545
|
if (onProgress) {
|
|
12464
12546
|
await onProgress({
|
|
12465
12547
|
workerId,
|
|
12466
|
-
evalId:
|
|
12548
|
+
evalId: evalCase.id,
|
|
12467
12549
|
status: "running",
|
|
12468
12550
|
startedAt: Date.now()
|
|
12469
12551
|
});
|
|
12470
12552
|
}
|
|
12471
12553
|
try {
|
|
12472
12554
|
const judgeProvider = await resolveJudgeProvider(target);
|
|
12473
|
-
const result = await
|
|
12474
|
-
|
|
12555
|
+
const result = await runEvalCase({
|
|
12556
|
+
evalCase,
|
|
12475
12557
|
provider: primaryProvider,
|
|
12476
12558
|
target,
|
|
12477
12559
|
graders: graderRegistry,
|
|
@@ -12486,7 +12568,7 @@ async function runEvaluation(options) {
|
|
|
12486
12568
|
if (onProgress) {
|
|
12487
12569
|
await onProgress({
|
|
12488
12570
|
workerId,
|
|
12489
|
-
evalId:
|
|
12571
|
+
evalId: evalCase.id,
|
|
12490
12572
|
status: "completed",
|
|
12491
12573
|
startedAt: 0,
|
|
12492
12574
|
// Not used for completed status
|
|
@@ -12501,7 +12583,7 @@ async function runEvaluation(options) {
|
|
|
12501
12583
|
if (onProgress) {
|
|
12502
12584
|
await onProgress({
|
|
12503
12585
|
workerId,
|
|
12504
|
-
evalId:
|
|
12586
|
+
evalId: evalCase.id,
|
|
12505
12587
|
status: "failed",
|
|
12506
12588
|
completedAt: Date.now(),
|
|
12507
12589
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -12518,10 +12600,10 @@ async function runEvaluation(options) {
|
|
|
12518
12600
|
if (outcome.status === "fulfilled") {
|
|
12519
12601
|
results.push(outcome.value);
|
|
12520
12602
|
} else {
|
|
12521
|
-
const
|
|
12522
|
-
const promptInputs = await buildPromptInputs(
|
|
12603
|
+
const evalCase = filteredEvalCases[i6];
|
|
12604
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
12523
12605
|
const errorResult = buildErrorResult(
|
|
12524
|
-
|
|
12606
|
+
evalCase,
|
|
12525
12607
|
target.name,
|
|
12526
12608
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
12527
12609
|
outcome.reason,
|
|
@@ -12535,9 +12617,9 @@ async function runEvaluation(options) {
|
|
|
12535
12617
|
}
|
|
12536
12618
|
return results;
|
|
12537
12619
|
}
|
|
12538
|
-
async function
|
|
12620
|
+
async function runEvalCase(options) {
|
|
12539
12621
|
const {
|
|
12540
|
-
|
|
12622
|
+
evalCase,
|
|
12541
12623
|
provider,
|
|
12542
12624
|
target,
|
|
12543
12625
|
graders,
|
|
@@ -12550,11 +12632,11 @@ async function runTestCase(options) {
|
|
|
12550
12632
|
signal,
|
|
12551
12633
|
judgeProvider
|
|
12552
12634
|
} = options;
|
|
12553
|
-
const promptInputs = await buildPromptInputs(
|
|
12635
|
+
const promptInputs = await buildPromptInputs(evalCase);
|
|
12554
12636
|
if (promptDumpDir) {
|
|
12555
|
-
await dumpPrompt(promptDumpDir,
|
|
12637
|
+
await dumpPrompt(promptDumpDir, evalCase, promptInputs);
|
|
12556
12638
|
}
|
|
12557
|
-
const cacheKey = useCache ? createCacheKey(provider, target,
|
|
12639
|
+
const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
|
|
12558
12640
|
let cachedResponse;
|
|
12559
12641
|
if (cacheKey && cache) {
|
|
12560
12642
|
cachedResponse = await cache.get(cacheKey);
|
|
@@ -12567,7 +12649,7 @@ async function runTestCase(options) {
|
|
|
12567
12649
|
while (!providerResponse && attempt < attemptBudget) {
|
|
12568
12650
|
try {
|
|
12569
12651
|
providerResponse = await invokeProvider(provider, {
|
|
12570
|
-
|
|
12652
|
+
evalCase,
|
|
12571
12653
|
target,
|
|
12572
12654
|
promptInputs,
|
|
12573
12655
|
attempt,
|
|
@@ -12580,12 +12662,12 @@ async function runTestCase(options) {
|
|
|
12580
12662
|
attempt += 1;
|
|
12581
12663
|
continue;
|
|
12582
12664
|
}
|
|
12583
|
-
return buildErrorResult(
|
|
12665
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
12584
12666
|
}
|
|
12585
12667
|
}
|
|
12586
12668
|
if (!providerResponse) {
|
|
12587
12669
|
return buildErrorResult(
|
|
12588
|
-
|
|
12670
|
+
evalCase,
|
|
12589
12671
|
target.name,
|
|
12590
12672
|
nowFn(),
|
|
12591
12673
|
lastError ?? new Error("Provider did not return a response"),
|
|
@@ -12595,7 +12677,7 @@ async function runTestCase(options) {
|
|
|
12595
12677
|
if (cacheKey && cache && !cachedResponse) {
|
|
12596
12678
|
await cache.set(cacheKey, providerResponse);
|
|
12597
12679
|
}
|
|
12598
|
-
const graderKind =
|
|
12680
|
+
const graderKind = evalCase.grader ?? "heuristic";
|
|
12599
12681
|
const activeGrader = graders[graderKind] ?? graders.heuristic;
|
|
12600
12682
|
if (!activeGrader) {
|
|
12601
12683
|
throw new Error(`No grader registered for kind '${graderKind}'`);
|
|
@@ -12604,7 +12686,7 @@ async function runTestCase(options) {
|
|
|
12604
12686
|
try {
|
|
12605
12687
|
const gradeTimestamp = nowFn();
|
|
12606
12688
|
grade = await activeGrader.grade({
|
|
12607
|
-
|
|
12689
|
+
evalCase,
|
|
12608
12690
|
candidate: providerResponse.text ?? "",
|
|
12609
12691
|
target,
|
|
12610
12692
|
provider,
|
|
@@ -12614,17 +12696,18 @@ async function runTestCase(options) {
|
|
|
12614
12696
|
judgeProvider
|
|
12615
12697
|
});
|
|
12616
12698
|
} catch (error) {
|
|
12617
|
-
return buildErrorResult(
|
|
12699
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
12618
12700
|
}
|
|
12619
12701
|
const completedAt = nowFn();
|
|
12620
12702
|
const rawRequest = {
|
|
12621
12703
|
request: promptInputs.request,
|
|
12622
12704
|
guidelines: promptInputs.guidelines,
|
|
12623
|
-
guideline_paths:
|
|
12705
|
+
guideline_paths: evalCase.guideline_paths,
|
|
12706
|
+
system_message: promptInputs.systemMessage ?? ""
|
|
12624
12707
|
};
|
|
12625
12708
|
return {
|
|
12626
|
-
eval_id:
|
|
12627
|
-
conversation_id:
|
|
12709
|
+
eval_id: evalCase.id,
|
|
12710
|
+
conversation_id: evalCase.conversation_id,
|
|
12628
12711
|
score: grade.score,
|
|
12629
12712
|
hits: grade.hits,
|
|
12630
12713
|
misses: grade.misses,
|
|
@@ -12638,11 +12721,11 @@ async function runTestCase(options) {
|
|
|
12638
12721
|
grader_raw_request: grade.graderRawRequest
|
|
12639
12722
|
};
|
|
12640
12723
|
}
|
|
12641
|
-
function
|
|
12724
|
+
function filterEvalCases(evalCases, evalId) {
|
|
12642
12725
|
if (!evalId) {
|
|
12643
|
-
return
|
|
12726
|
+
return evalCases;
|
|
12644
12727
|
}
|
|
12645
|
-
return
|
|
12728
|
+
return evalCases.filter((evalCase) => evalCase.id === evalId);
|
|
12646
12729
|
}
|
|
12647
12730
|
function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
12648
12731
|
const heuristic = overrides?.heuristic ?? new HeuristicGrader();
|
|
@@ -12660,16 +12743,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
|
|
|
12660
12743
|
llm_judge: llmJudge
|
|
12661
12744
|
};
|
|
12662
12745
|
}
|
|
12663
|
-
async function dumpPrompt(directory,
|
|
12746
|
+
async function dumpPrompt(directory, evalCase, promptInputs) {
|
|
12664
12747
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
12665
|
-
const filename = `${timestamp}_${sanitizeFilename(
|
|
12748
|
+
const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
|
|
12666
12749
|
const filePath = path42.resolve(directory, filename);
|
|
12667
12750
|
await mkdir3(path42.dirname(filePath), { recursive: true });
|
|
12668
12751
|
const payload = {
|
|
12669
|
-
eval_id:
|
|
12752
|
+
eval_id: evalCase.id,
|
|
12670
12753
|
request: promptInputs.request,
|
|
12671
12754
|
guidelines: promptInputs.guidelines,
|
|
12672
|
-
guideline_paths:
|
|
12755
|
+
guideline_paths: evalCase.guideline_paths
|
|
12673
12756
|
};
|
|
12674
12757
|
await writeFile22(filePath, JSON.stringify(payload, null, 2), "utf8");
|
|
12675
12758
|
}
|
|
@@ -12681,7 +12764,7 @@ function sanitizeFilename(value) {
|
|
|
12681
12764
|
return sanitized.length > 0 ? sanitized : randomUUID2();
|
|
12682
12765
|
}
|
|
12683
12766
|
async function invokeProvider(provider, options) {
|
|
12684
|
-
const {
|
|
12767
|
+
const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
|
|
12685
12768
|
const controller = new AbortController();
|
|
12686
12769
|
const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
|
|
12687
12770
|
if (signal) {
|
|
@@ -12691,12 +12774,12 @@ async function invokeProvider(provider, options) {
|
|
|
12691
12774
|
return await provider.invoke({
|
|
12692
12775
|
prompt: promptInputs.request,
|
|
12693
12776
|
guidelines: promptInputs.guidelines,
|
|
12694
|
-
|
|
12695
|
-
|
|
12777
|
+
guideline_patterns: evalCase.guideline_patterns,
|
|
12778
|
+
attachments: evalCase.file_paths,
|
|
12779
|
+
evalCaseId: evalCase.id,
|
|
12696
12780
|
attempt,
|
|
12697
12781
|
metadata: {
|
|
12698
|
-
|
|
12699
|
-
grader: testCase.grader
|
|
12782
|
+
systemPrompt: promptInputs.systemMessage ?? ""
|
|
12700
12783
|
},
|
|
12701
12784
|
signal: controller.signal
|
|
12702
12785
|
});
|
|
@@ -12706,17 +12789,18 @@ async function invokeProvider(provider, options) {
|
|
|
12706
12789
|
}
|
|
12707
12790
|
}
|
|
12708
12791
|
}
|
|
12709
|
-
function buildErrorResult(
|
|
12792
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
12710
12793
|
const message = error instanceof Error ? error.message : String(error);
|
|
12711
12794
|
const rawRequest = {
|
|
12712
12795
|
request: promptInputs.request,
|
|
12713
12796
|
guidelines: promptInputs.guidelines,
|
|
12714
|
-
guideline_paths:
|
|
12797
|
+
guideline_paths: evalCase.guideline_paths,
|
|
12798
|
+
system_message: promptInputs.systemMessage ?? "",
|
|
12715
12799
|
error: message
|
|
12716
12800
|
};
|
|
12717
12801
|
return {
|
|
12718
|
-
eval_id:
|
|
12719
|
-
conversation_id:
|
|
12802
|
+
eval_id: evalCase.id,
|
|
12803
|
+
conversation_id: evalCase.conversation_id,
|
|
12720
12804
|
score: 0,
|
|
12721
12805
|
hits: [],
|
|
12722
12806
|
misses: [`Error: ${message}`],
|
|
@@ -12728,13 +12812,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
|
|
|
12728
12812
|
raw_request: rawRequest
|
|
12729
12813
|
};
|
|
12730
12814
|
}
|
|
12731
|
-
function createCacheKey(provider, target,
|
|
12815
|
+
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
12732
12816
|
const hash = createHash("sha256");
|
|
12733
12817
|
hash.update(provider.id);
|
|
12734
12818
|
hash.update(target.name);
|
|
12735
|
-
hash.update(
|
|
12819
|
+
hash.update(evalCase.id);
|
|
12736
12820
|
hash.update(promptInputs.request);
|
|
12737
12821
|
hash.update(promptInputs.guidelines);
|
|
12822
|
+
hash.update(promptInputs.systemMessage ?? "");
|
|
12738
12823
|
return hash.digest("hex");
|
|
12739
12824
|
}
|
|
12740
12825
|
function isTimeoutLike(error) {
|
|
@@ -12780,7 +12865,7 @@ function uniqueDirs(directories) {
|
|
|
12780
12865
|
}
|
|
12781
12866
|
return result;
|
|
12782
12867
|
}
|
|
12783
|
-
async function
|
|
12868
|
+
async function fileExists4(filePath) {
|
|
12784
12869
|
try {
|
|
12785
12870
|
await access4(filePath, constants4.F_OK);
|
|
12786
12871
|
return true;
|
|
@@ -12816,7 +12901,7 @@ async function loadEnvFromHierarchy(options) {
|
|
|
12816
12901
|
]);
|
|
12817
12902
|
for (const dir of searchDirs) {
|
|
12818
12903
|
const candidate = path8.join(dir, ".env");
|
|
12819
|
-
if (await
|
|
12904
|
+
if (await fileExists4(candidate)) {
|
|
12820
12905
|
loadDotenv({ path: candidate, override: false });
|
|
12821
12906
|
if (verbose) {
|
|
12822
12907
|
console.log(`Loaded environment from: ${candidate}`);
|
|
@@ -13375,7 +13460,7 @@ function formatEvaluationSummary(summary) {
|
|
|
13375
13460
|
lines.push("\n==================================================");
|
|
13376
13461
|
lines.push("EVALUATION SUMMARY");
|
|
13377
13462
|
lines.push("==================================================");
|
|
13378
|
-
lines.push(`Total
|
|
13463
|
+
lines.push(`Total eval cases: ${summary.total}`);
|
|
13379
13464
|
lines.push(`Mean score: ${formatScore(summary.mean)}`);
|
|
13380
13465
|
lines.push(`Median score: ${formatScore(summary.median)}`);
|
|
13381
13466
|
lines.push(`Min score: ${formatScore(summary.min)}`);
|
|
@@ -13388,13 +13473,13 @@ function formatEvaluationSummary(summary) {
|
|
|
13388
13473
|
const [start, end] = bin.range;
|
|
13389
13474
|
lines.push(` ${start.toFixed(1)}-${end.toFixed(1)}: ${bin.count}`);
|
|
13390
13475
|
}
|
|
13391
|
-
lines.push("\nTop performing
|
|
13476
|
+
lines.push("\nTop performing eval cases:");
|
|
13392
13477
|
summary.topResults.forEach((result, index) => {
|
|
13393
|
-
lines.push(` ${index + 1}. ${result.
|
|
13478
|
+
lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
|
|
13394
13479
|
});
|
|
13395
|
-
lines.push("\nLowest performing
|
|
13480
|
+
lines.push("\nLowest performing eval cases:");
|
|
13396
13481
|
summary.bottomResults.forEach((result, index) => {
|
|
13397
|
-
lines.push(` ${index + 1}. ${result.
|
|
13482
|
+
lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
|
|
13398
13483
|
});
|
|
13399
13484
|
return lines.join("\n");
|
|
13400
13485
|
}
|
|
@@ -13410,7 +13495,7 @@ var TARGET_FILE_CANDIDATES = [
|
|
|
13410
13495
|
path11.join(".agentv", "targets.yaml"),
|
|
13411
13496
|
path11.join(".agentv", "targets.yml")
|
|
13412
13497
|
];
|
|
13413
|
-
async function
|
|
13498
|
+
async function fileExists5(filePath) {
|
|
13414
13499
|
try {
|
|
13415
13500
|
await access5(filePath, constants5.F_OK);
|
|
13416
13501
|
return true;
|
|
@@ -13432,56 +13517,30 @@ async function readTestSuiteTarget(testFilePath) {
|
|
|
13432
13517
|
}
|
|
13433
13518
|
return void 0;
|
|
13434
13519
|
}
|
|
13435
|
-
function buildDirectoryChain(testFilePath, repoRoot, cwd) {
|
|
13436
|
-
const directories = [];
|
|
13437
|
-
const seen = /* @__PURE__ */ new Set();
|
|
13438
|
-
const boundary = path11.resolve(repoRoot);
|
|
13439
|
-
let current = path11.resolve(path11.dirname(testFilePath));
|
|
13440
|
-
while (current !== void 0) {
|
|
13441
|
-
if (!seen.has(current)) {
|
|
13442
|
-
directories.push(current);
|
|
13443
|
-
seen.add(current);
|
|
13444
|
-
}
|
|
13445
|
-
if (current === boundary) {
|
|
13446
|
-
break;
|
|
13447
|
-
}
|
|
13448
|
-
const parent = path11.dirname(current);
|
|
13449
|
-
if (parent === current) {
|
|
13450
|
-
break;
|
|
13451
|
-
}
|
|
13452
|
-
current = parent;
|
|
13453
|
-
}
|
|
13454
|
-
if (!seen.has(boundary)) {
|
|
13455
|
-
directories.push(boundary);
|
|
13456
|
-
seen.add(boundary);
|
|
13457
|
-
}
|
|
13458
|
-
const resolvedCwd = path11.resolve(cwd);
|
|
13459
|
-
if (!seen.has(resolvedCwd)) {
|
|
13460
|
-
directories.push(resolvedCwd);
|
|
13461
|
-
seen.add(resolvedCwd);
|
|
13462
|
-
}
|
|
13463
|
-
return directories;
|
|
13464
|
-
}
|
|
13465
13520
|
async function discoverTargetsFile(options) {
|
|
13466
13521
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
13467
13522
|
if (explicitPath) {
|
|
13468
13523
|
const resolvedExplicit = path11.resolve(explicitPath);
|
|
13469
|
-
if (await
|
|
13524
|
+
if (await fileExists5(resolvedExplicit)) {
|
|
13470
13525
|
return resolvedExplicit;
|
|
13471
13526
|
}
|
|
13472
13527
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
13473
13528
|
const nested = path11.join(resolvedExplicit, candidate);
|
|
13474
|
-
if (await
|
|
13529
|
+
if (await fileExists5(nested)) {
|
|
13475
13530
|
return nested;
|
|
13476
13531
|
}
|
|
13477
13532
|
}
|
|
13478
13533
|
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
13479
13534
|
}
|
|
13480
|
-
const directories = buildDirectoryChain(testFilePath, repoRoot
|
|
13535
|
+
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
13536
|
+
const resolvedCwd = path11.resolve(cwd);
|
|
13537
|
+
if (!directories.includes(resolvedCwd)) {
|
|
13538
|
+
directories.push(resolvedCwd);
|
|
13539
|
+
}
|
|
13481
13540
|
for (const directory of directories) {
|
|
13482
13541
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
13483
13542
|
const fullPath = path11.join(directory, candidate);
|
|
13484
|
-
if (await
|
|
13543
|
+
if (await fileExists5(fullPath)) {
|
|
13485
13544
|
return fullPath;
|
|
13486
13545
|
}
|
|
13487
13546
|
}
|
|
@@ -13792,7 +13851,7 @@ function parseInteger(value, fallback) {
|
|
|
13792
13851
|
return parsed;
|
|
13793
13852
|
}
|
|
13794
13853
|
function registerEvalCommand(program) {
|
|
13795
|
-
program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the
|
|
13854
|
+
program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
|
|
13796
13855
|
"--workers <count>",
|
|
13797
13856
|
"Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
13798
13857
|
(value) => parseInteger(value, 1)
|
|
@@ -13830,25 +13889,164 @@ function registerEvalCommand(program) {
|
|
|
13830
13889
|
return program;
|
|
13831
13890
|
}
|
|
13832
13891
|
|
|
13833
|
-
// src/commands/
|
|
13834
|
-
import {
|
|
13835
|
-
import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
|
|
13892
|
+
// src/commands/init/index.ts
|
|
13893
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
13836
13894
|
import path14 from "node:path";
|
|
13837
13895
|
|
|
13896
|
+
// src/templates/index.ts
|
|
13897
|
+
import { readFileSync } from "node:fs";
|
|
13898
|
+
import path13 from "node:path";
|
|
13899
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
13900
|
+
var TemplateManager = class {
|
|
13901
|
+
static getTemplates() {
|
|
13902
|
+
const currentDir = path13.dirname(fileURLToPath2(import.meta.url));
|
|
13903
|
+
let templatesDir;
|
|
13904
|
+
if (currentDir.includes(path13.sep + "dist")) {
|
|
13905
|
+
templatesDir = path13.join(currentDir, "templates");
|
|
13906
|
+
} else {
|
|
13907
|
+
templatesDir = currentDir;
|
|
13908
|
+
}
|
|
13909
|
+
const evalBuildPrompt = readFileSync(
|
|
13910
|
+
path13.join(templatesDir, "eval-build.prompt.md"),
|
|
13911
|
+
"utf-8"
|
|
13912
|
+
);
|
|
13913
|
+
const evalSchema = readFileSync(
|
|
13914
|
+
path13.join(templatesDir, "eval-schema.json"),
|
|
13915
|
+
"utf-8"
|
|
13916
|
+
);
|
|
13917
|
+
const configSchema = readFileSync(
|
|
13918
|
+
path13.join(templatesDir, "config-schema.json"),
|
|
13919
|
+
"utf-8"
|
|
13920
|
+
);
|
|
13921
|
+
return [
|
|
13922
|
+
{
|
|
13923
|
+
path: "prompts/eval-build.prompt.md",
|
|
13924
|
+
content: evalBuildPrompt
|
|
13925
|
+
},
|
|
13926
|
+
{
|
|
13927
|
+
path: "contexts/eval-schema.json",
|
|
13928
|
+
content: evalSchema
|
|
13929
|
+
},
|
|
13930
|
+
{
|
|
13931
|
+
path: "contexts/config-schema.json",
|
|
13932
|
+
content: configSchema
|
|
13933
|
+
}
|
|
13934
|
+
];
|
|
13935
|
+
}
|
|
13936
|
+
};
|
|
13937
|
+
|
|
13938
|
+
// src/commands/init/index.ts
|
|
13939
|
+
async function initCommand(options = {}) {
|
|
13940
|
+
const targetPath = path14.resolve(options.targetPath ?? ".");
|
|
13941
|
+
const githubDir = path14.join(targetPath, ".github");
|
|
13942
|
+
if (!existsSync(githubDir)) {
|
|
13943
|
+
mkdirSync(githubDir, { recursive: true });
|
|
13944
|
+
}
|
|
13945
|
+
const templates = TemplateManager.getTemplates();
|
|
13946
|
+
for (const template of templates) {
|
|
13947
|
+
const targetFilePath = path14.join(githubDir, template.path);
|
|
13948
|
+
const targetDirPath = path14.dirname(targetFilePath);
|
|
13949
|
+
if (!existsSync(targetDirPath)) {
|
|
13950
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
13951
|
+
}
|
|
13952
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
13953
|
+
console.log(`Created ${path14.relative(targetPath, targetFilePath)}`);
|
|
13954
|
+
}
|
|
13955
|
+
console.log("\nAgentV initialized successfully!");
|
|
13956
|
+
console.log(`
|
|
13957
|
+
Files installed to ${path14.relative(targetPath, githubDir)}:`);
|
|
13958
|
+
templates.forEach((t) => console.log(` - ${t.path}`));
|
|
13959
|
+
console.log("\nYou can now create eval files using the schema and prompt templates.");
|
|
13960
|
+
}
|
|
13961
|
+
|
|
13962
|
+
// src/commands/validate/format-output.ts
|
|
13963
|
+
var ANSI_RED = "\x1B[31m";
|
|
13964
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
13965
|
+
var ANSI_GREEN = "\x1B[32m";
|
|
13966
|
+
var ANSI_CYAN = "\x1B[36m";
|
|
13967
|
+
var ANSI_BOLD = "\x1B[1m";
|
|
13968
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
13969
|
+
function formatSummary(summary, useColors) {
|
|
13970
|
+
const lines = [];
|
|
13971
|
+
lines.push("");
|
|
13972
|
+
lines.push(formatHeader("Validation Summary", useColors));
|
|
13973
|
+
lines.push("");
|
|
13974
|
+
for (const result of summary.results) {
|
|
13975
|
+
lines.push(formatFileResult(result, useColors));
|
|
13976
|
+
}
|
|
13977
|
+
lines.push("");
|
|
13978
|
+
lines.push(formatStats(summary, useColors));
|
|
13979
|
+
lines.push("");
|
|
13980
|
+
return lines.join("\n");
|
|
13981
|
+
}
|
|
13982
|
+
function formatHeader(text, useColors) {
|
|
13983
|
+
if (useColors) {
|
|
13984
|
+
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
13985
|
+
}
|
|
13986
|
+
return text;
|
|
13987
|
+
}
|
|
13988
|
+
function formatFileResult(result, useColors) {
|
|
13989
|
+
const lines = [];
|
|
13990
|
+
const status = result.valid ? "\u2713" : "\u2717";
|
|
13991
|
+
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
13992
|
+
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
13993
|
+
const fileName = result.filePath;
|
|
13994
|
+
lines.push(`${statusText} ${fileName}`);
|
|
13995
|
+
if (result.errors.length > 0) {
|
|
13996
|
+
for (const error of result.errors) {
|
|
13997
|
+
lines.push(formatError(error, useColors));
|
|
13998
|
+
}
|
|
13999
|
+
}
|
|
14000
|
+
return lines.join("\n");
|
|
14001
|
+
}
|
|
14002
|
+
function formatError(error, useColors) {
|
|
14003
|
+
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
14004
|
+
const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
|
|
14005
|
+
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
|
|
14006
|
+
const location = error.location ? ` [${error.location}]` : "";
|
|
14007
|
+
return `${coloredPrefix}${location} ${error.message}`;
|
|
14008
|
+
}
|
|
14009
|
+
function formatStats(summary, useColors) {
|
|
14010
|
+
const lines = [];
|
|
14011
|
+
const totalText = `Total files: ${summary.totalFiles}`;
|
|
14012
|
+
const validText = `Valid: ${summary.validFiles}`;
|
|
14013
|
+
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
14014
|
+
if (useColors) {
|
|
14015
|
+
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
14016
|
+
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
14017
|
+
if (summary.invalidFiles > 0) {
|
|
14018
|
+
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
14019
|
+
} else {
|
|
14020
|
+
lines.push(invalidText);
|
|
14021
|
+
}
|
|
14022
|
+
} else {
|
|
14023
|
+
lines.push(totalText);
|
|
14024
|
+
lines.push(validText);
|
|
14025
|
+
lines.push(invalidText);
|
|
14026
|
+
}
|
|
14027
|
+
return lines.join("\n");
|
|
14028
|
+
}
|
|
14029
|
+
function isTTY() {
|
|
14030
|
+
return process.stdout.isTTY ?? false;
|
|
14031
|
+
}
|
|
14032
|
+
|
|
13838
14033
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
13839
14034
|
import { readFile as readFile5 } from "node:fs/promises";
|
|
13840
14035
|
import { parse as parse5 } from "yaml";
|
|
13841
14036
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
13842
|
-
import
|
|
14037
|
+
import path15 from "node:path";
|
|
13843
14038
|
import { parse as parse23 } from "yaml";
|
|
13844
14039
|
import { readFile as readFile32 } from "node:fs/promises";
|
|
13845
14040
|
import path23 from "node:path";
|
|
13846
14041
|
import { parse as parse32 } from "yaml";
|
|
13847
14042
|
import { readFile as readFile42 } from "node:fs/promises";
|
|
13848
|
-
import path33 from "node:path";
|
|
13849
14043
|
import { parse as parse42 } from "yaml";
|
|
14044
|
+
import { readFile as readFile52 } from "node:fs/promises";
|
|
14045
|
+
import path33 from "node:path";
|
|
14046
|
+
import { parse as parse52 } from "yaml";
|
|
13850
14047
|
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
13851
14048
|
var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
|
|
14049
|
+
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
13852
14050
|
async function detectFileType(filePath) {
|
|
13853
14051
|
try {
|
|
13854
14052
|
const content = await readFile5(filePath, "utf8");
|
|
@@ -13866,6 +14064,8 @@ async function detectFileType(filePath) {
|
|
|
13866
14064
|
return "eval";
|
|
13867
14065
|
case SCHEMA_TARGETS_V2:
|
|
13868
14066
|
return "targets";
|
|
14067
|
+
case SCHEMA_CONFIG_V22:
|
|
14068
|
+
return "config";
|
|
13869
14069
|
default:
|
|
13870
14070
|
return "unknown";
|
|
13871
14071
|
}
|
|
@@ -13879,7 +14079,7 @@ function isObject(value) {
|
|
|
13879
14079
|
}
|
|
13880
14080
|
async function validateEvalFile(filePath) {
|
|
13881
14081
|
const errors = [];
|
|
13882
|
-
const absolutePath =
|
|
14082
|
+
const absolutePath = path15.resolve(filePath);
|
|
13883
14083
|
let parsed;
|
|
13884
14084
|
try {
|
|
13885
14085
|
const content = await readFile23(absolutePath, "utf8");
|
|
@@ -14190,6 +14390,80 @@ async function validateTargetsFile(filePath) {
|
|
|
14190
14390
|
errors
|
|
14191
14391
|
};
|
|
14192
14392
|
}
|
|
14393
|
+
var SCHEMA_CONFIG_V222 = "agentv-config-v2";
|
|
14394
|
+
async function validateConfigFile(filePath) {
|
|
14395
|
+
const errors = [];
|
|
14396
|
+
try {
|
|
14397
|
+
const content = await readFile42(filePath, "utf8");
|
|
14398
|
+
const parsed = parse42(content);
|
|
14399
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
14400
|
+
errors.push({
|
|
14401
|
+
severity: "error",
|
|
14402
|
+
filePath,
|
|
14403
|
+
message: "Config file must contain a valid YAML object"
|
|
14404
|
+
});
|
|
14405
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
14406
|
+
}
|
|
14407
|
+
const config = parsed;
|
|
14408
|
+
const schema = config["$schema"];
|
|
14409
|
+
if (schema !== SCHEMA_CONFIG_V222) {
|
|
14410
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
|
|
14411
|
+
errors.push({
|
|
14412
|
+
severity: "error",
|
|
14413
|
+
filePath,
|
|
14414
|
+
location: "$schema",
|
|
14415
|
+
message
|
|
14416
|
+
});
|
|
14417
|
+
}
|
|
14418
|
+
const guidelinePatterns = config["guideline_patterns"];
|
|
14419
|
+
if (guidelinePatterns !== void 0) {
|
|
14420
|
+
if (!Array.isArray(guidelinePatterns)) {
|
|
14421
|
+
errors.push({
|
|
14422
|
+
severity: "error",
|
|
14423
|
+
filePath,
|
|
14424
|
+
location: "guideline_patterns",
|
|
14425
|
+
message: "Field 'guideline_patterns' must be an array"
|
|
14426
|
+
});
|
|
14427
|
+
} else if (!guidelinePatterns.every((p) => typeof p === "string")) {
|
|
14428
|
+
errors.push({
|
|
14429
|
+
severity: "error",
|
|
14430
|
+
filePath,
|
|
14431
|
+
location: "guideline_patterns",
|
|
14432
|
+
message: "All entries in 'guideline_patterns' must be strings"
|
|
14433
|
+
});
|
|
14434
|
+
} else if (guidelinePatterns.length === 0) {
|
|
14435
|
+
errors.push({
|
|
14436
|
+
severity: "warning",
|
|
14437
|
+
filePath,
|
|
14438
|
+
location: "guideline_patterns",
|
|
14439
|
+
message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
|
|
14440
|
+
});
|
|
14441
|
+
}
|
|
14442
|
+
}
|
|
14443
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
|
|
14444
|
+
const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
|
|
14445
|
+
if (unexpectedFields.length > 0) {
|
|
14446
|
+
errors.push({
|
|
14447
|
+
severity: "warning",
|
|
14448
|
+
filePath,
|
|
14449
|
+
message: `Unexpected fields: ${unexpectedFields.join(", ")}`
|
|
14450
|
+
});
|
|
14451
|
+
}
|
|
14452
|
+
return {
|
|
14453
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
14454
|
+
filePath,
|
|
14455
|
+
fileType: "config",
|
|
14456
|
+
errors
|
|
14457
|
+
};
|
|
14458
|
+
} catch (error) {
|
|
14459
|
+
errors.push({
|
|
14460
|
+
severity: "error",
|
|
14461
|
+
filePath,
|
|
14462
|
+
message: `Failed to parse config file: ${error.message}`
|
|
14463
|
+
});
|
|
14464
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
14465
|
+
}
|
|
14466
|
+
}
|
|
14193
14467
|
function isObject3(value) {
|
|
14194
14468
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
14195
14469
|
}
|
|
@@ -14208,8 +14482,8 @@ async function validateFileReferences(evalFilePath) {
|
|
|
14208
14482
|
const searchRoots = buildSearchRoots(absolutePath, gitRoot);
|
|
14209
14483
|
let parsed;
|
|
14210
14484
|
try {
|
|
14211
|
-
const content = await
|
|
14212
|
-
parsed =
|
|
14485
|
+
const content = await readFile52(absolutePath, "utf8");
|
|
14486
|
+
parsed = parse52(content);
|
|
14213
14487
|
} catch {
|
|
14214
14488
|
return errors;
|
|
14215
14489
|
}
|
|
@@ -14278,7 +14552,7 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
14278
14552
|
});
|
|
14279
14553
|
} else {
|
|
14280
14554
|
try {
|
|
14281
|
-
const fileContent = await
|
|
14555
|
+
const fileContent = await readFile52(resolvedPath, "utf8");
|
|
14282
14556
|
if (fileContent.trim().length === 0) {
|
|
14283
14557
|
errors.push({
|
|
14284
14558
|
severity: "warning",
|
|
@@ -14300,12 +14574,15 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
14300
14574
|
}
|
|
14301
14575
|
}
|
|
14302
14576
|
|
|
14303
|
-
// src/commands/
|
|
14304
|
-
|
|
14577
|
+
// src/commands/validate/validate-files.ts
|
|
14578
|
+
import { constants as constants7 } from "node:fs";
|
|
14579
|
+
import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
|
|
14580
|
+
import path16 from "node:path";
|
|
14581
|
+
async function validateFiles(paths) {
|
|
14305
14582
|
const filePaths = await expandPaths(paths);
|
|
14306
14583
|
const results = [];
|
|
14307
14584
|
for (const filePath of filePaths) {
|
|
14308
|
-
const result = await
|
|
14585
|
+
const result = await validateSingleFile(filePath);
|
|
14309
14586
|
results.push(result);
|
|
14310
14587
|
}
|
|
14311
14588
|
const validFiles = results.filter((r) => r.valid).length;
|
|
@@ -14317,8 +14594,8 @@ async function lintFiles(paths) {
|
|
|
14317
14594
|
results
|
|
14318
14595
|
};
|
|
14319
14596
|
}
|
|
14320
|
-
async function
|
|
14321
|
-
const absolutePath =
|
|
14597
|
+
async function validateSingleFile(filePath) {
|
|
14598
|
+
const absolutePath = path16.resolve(filePath);
|
|
14322
14599
|
const fileType = await detectFileType(absolutePath);
|
|
14323
14600
|
if (fileType === "unknown") {
|
|
14324
14601
|
return {
|
|
@@ -14329,7 +14606,7 @@ async function lintSingleFile(filePath) {
|
|
|
14329
14606
|
{
|
|
14330
14607
|
severity: "error",
|
|
14331
14608
|
filePath: absolutePath,
|
|
14332
|
-
message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2' or 'agentv-
|
|
14609
|
+
message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2', 'agentv-targets-v2', or 'agentv-config-v2'"
|
|
14333
14610
|
}
|
|
14334
14611
|
]
|
|
14335
14612
|
};
|
|
@@ -14347,15 +14624,17 @@ async function lintSingleFile(filePath) {
|
|
|
14347
14624
|
};
|
|
14348
14625
|
}
|
|
14349
14626
|
}
|
|
14350
|
-
} else {
|
|
14627
|
+
} else if (fileType === "targets") {
|
|
14351
14628
|
result = await validateTargetsFile(absolutePath);
|
|
14629
|
+
} else {
|
|
14630
|
+
result = await validateConfigFile(absolutePath);
|
|
14352
14631
|
}
|
|
14353
14632
|
return result;
|
|
14354
14633
|
}
|
|
14355
14634
|
async function expandPaths(paths) {
|
|
14356
14635
|
const expanded = [];
|
|
14357
14636
|
for (const inputPath of paths) {
|
|
14358
|
-
const absolutePath =
|
|
14637
|
+
const absolutePath = path16.resolve(inputPath);
|
|
14359
14638
|
try {
|
|
14360
14639
|
await access7(absolutePath, constants7.F_OK);
|
|
14361
14640
|
} catch {
|
|
@@ -14379,7 +14658,7 @@ async function findYamlFiles(dirPath) {
|
|
|
14379
14658
|
try {
|
|
14380
14659
|
const entries = await readdir3(dirPath, { withFileTypes: true });
|
|
14381
14660
|
for (const entry of entries) {
|
|
14382
|
-
const fullPath =
|
|
14661
|
+
const fullPath = path16.join(dirPath, entry.name);
|
|
14383
14662
|
if (entry.isDirectory()) {
|
|
14384
14663
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
14385
14664
|
continue;
|
|
@@ -14396,98 +14675,27 @@ async function findYamlFiles(dirPath) {
|
|
|
14396
14675
|
return results;
|
|
14397
14676
|
}
|
|
14398
14677
|
function isYamlFile(filePath) {
|
|
14399
|
-
const ext =
|
|
14678
|
+
const ext = path16.extname(filePath).toLowerCase();
|
|
14400
14679
|
return ext === ".yaml" || ext === ".yml";
|
|
14401
14680
|
}
|
|
14402
14681
|
|
|
14403
|
-
// src/commands/
|
|
14404
|
-
|
|
14405
|
-
var ANSI_YELLOW2 = "\x1B[33m";
|
|
14406
|
-
var ANSI_GREEN = "\x1B[32m";
|
|
14407
|
-
var ANSI_CYAN = "\x1B[36m";
|
|
14408
|
-
var ANSI_BOLD = "\x1B[1m";
|
|
14409
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
14410
|
-
function formatSummary(summary, useColors) {
|
|
14411
|
-
const lines = [];
|
|
14412
|
-
lines.push("");
|
|
14413
|
-
lines.push(formatHeader("Validation Summary", useColors));
|
|
14414
|
-
lines.push("");
|
|
14415
|
-
for (const result of summary.results) {
|
|
14416
|
-
lines.push(formatFileResult(result, useColors));
|
|
14417
|
-
}
|
|
14418
|
-
lines.push("");
|
|
14419
|
-
lines.push(formatStats(summary, useColors));
|
|
14420
|
-
lines.push("");
|
|
14421
|
-
return lines.join("\n");
|
|
14422
|
-
}
|
|
14423
|
-
function formatHeader(text, useColors) {
|
|
14424
|
-
if (useColors) {
|
|
14425
|
-
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
14426
|
-
}
|
|
14427
|
-
return text;
|
|
14428
|
-
}
|
|
14429
|
-
function formatFileResult(result, useColors) {
|
|
14430
|
-
const lines = [];
|
|
14431
|
-
const status = result.valid ? "\u2713" : "\u2717";
|
|
14432
|
-
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
14433
|
-
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
14434
|
-
const fileName = result.filePath;
|
|
14435
|
-
lines.push(`${statusText} ${fileName}`);
|
|
14436
|
-
if (result.errors.length > 0) {
|
|
14437
|
-
for (const error of result.errors) {
|
|
14438
|
-
lines.push(formatError(error, useColors));
|
|
14439
|
-
}
|
|
14440
|
-
}
|
|
14441
|
-
return lines.join("\n");
|
|
14442
|
-
}
|
|
14443
|
-
function formatError(error, useColors) {
|
|
14444
|
-
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
14445
|
-
const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
|
|
14446
|
-
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
|
|
14447
|
-
const location = error.location ? ` [${error.location}]` : "";
|
|
14448
|
-
return `${coloredPrefix}${location} ${error.message}`;
|
|
14449
|
-
}
|
|
14450
|
-
function formatStats(summary, useColors) {
|
|
14451
|
-
const lines = [];
|
|
14452
|
-
const totalText = `Total files: ${summary.totalFiles}`;
|
|
14453
|
-
const validText = `Valid: ${summary.validFiles}`;
|
|
14454
|
-
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
14455
|
-
if (useColors) {
|
|
14456
|
-
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
14457
|
-
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
14458
|
-
if (summary.invalidFiles > 0) {
|
|
14459
|
-
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
14460
|
-
} else {
|
|
14461
|
-
lines.push(invalidText);
|
|
14462
|
-
}
|
|
14463
|
-
} else {
|
|
14464
|
-
lines.push(totalText);
|
|
14465
|
-
lines.push(validText);
|
|
14466
|
-
lines.push(invalidText);
|
|
14467
|
-
}
|
|
14468
|
-
return lines.join("\n");
|
|
14469
|
-
}
|
|
14470
|
-
function isTTY() {
|
|
14471
|
-
return process.stdout.isTTY ?? false;
|
|
14472
|
-
}
|
|
14473
|
-
|
|
14474
|
-
// src/commands/lint/index.ts
|
|
14475
|
-
async function runLintCommand(paths, options) {
|
|
14682
|
+
// src/commands/validate/index.ts
|
|
14683
|
+
async function runValidateCommand(paths, _options) {
|
|
14476
14684
|
if (paths.length === 0) {
|
|
14477
|
-
console.error("Error: No paths specified. Usage: agentv
|
|
14685
|
+
console.error("Error: No paths specified. Usage: agentv validate <paths...>");
|
|
14478
14686
|
process.exit(1);
|
|
14479
14687
|
}
|
|
14480
|
-
const summary = await
|
|
14688
|
+
const summary = await validateFiles(paths);
|
|
14481
14689
|
const useColors = isTTY();
|
|
14482
14690
|
console.log(formatSummary(summary, useColors));
|
|
14483
14691
|
if (summary.invalidFiles > 0) {
|
|
14484
14692
|
process.exit(1);
|
|
14485
14693
|
}
|
|
14486
14694
|
}
|
|
14487
|
-
function
|
|
14488
|
-
program.command("
|
|
14695
|
+
function registerValidateCommand(program) {
|
|
14696
|
+
program.command("validate").description("Validate AgentV eval and targets YAML files").argument("<paths...>", "Files or directories to validate").action(async (paths, _options) => {
|
|
14489
14697
|
try {
|
|
14490
|
-
await
|
|
14698
|
+
await runValidateCommand(paths, _options);
|
|
14491
14699
|
} catch (error) {
|
|
14492
14700
|
console.error(`Error: ${error.message}`);
|
|
14493
14701
|
process.exit(1);
|
|
@@ -14505,68 +14713,6 @@ function registerStatusCommand(program) {
|
|
|
14505
14713
|
return program;
|
|
14506
14714
|
}
|
|
14507
14715
|
|
|
14508
|
-
// src/commands/init/index.ts
|
|
14509
|
-
import { existsSync as existsSync2, mkdirSync, writeFileSync } from "node:fs";
|
|
14510
|
-
import path16 from "node:path";
|
|
14511
|
-
|
|
14512
|
-
// src/templates/index.ts
|
|
14513
|
-
import { readFileSync } from "node:fs";
|
|
14514
|
-
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
14515
|
-
import path15 from "node:path";
|
|
14516
|
-
var TemplateManager = class {
|
|
14517
|
-
static getTemplates() {
|
|
14518
|
-
const currentDir = path15.dirname(fileURLToPath2(import.meta.url));
|
|
14519
|
-
let templatesDir;
|
|
14520
|
-
if (currentDir.includes(path15.sep + "dist")) {
|
|
14521
|
-
templatesDir = path15.join(currentDir, "templates");
|
|
14522
|
-
} else {
|
|
14523
|
-
templatesDir = currentDir;
|
|
14524
|
-
}
|
|
14525
|
-
const evalBuildPrompt = readFileSync(
|
|
14526
|
-
path15.join(templatesDir, "eval-build.prompt.md"),
|
|
14527
|
-
"utf-8"
|
|
14528
|
-
);
|
|
14529
|
-
const evalSchema = readFileSync(
|
|
14530
|
-
path15.join(templatesDir, "eval-schema.json"),
|
|
14531
|
-
"utf-8"
|
|
14532
|
-
);
|
|
14533
|
-
return [
|
|
14534
|
-
{
|
|
14535
|
-
path: "prompts/eval-build.prompt.md",
|
|
14536
|
-
content: evalBuildPrompt
|
|
14537
|
-
},
|
|
14538
|
-
{
|
|
14539
|
-
path: "contexts/eval-schema.json",
|
|
14540
|
-
content: evalSchema
|
|
14541
|
-
}
|
|
14542
|
-
];
|
|
14543
|
-
}
|
|
14544
|
-
};
|
|
14545
|
-
|
|
14546
|
-
// src/commands/init/index.ts
|
|
14547
|
-
async function initCommand(options = {}) {
|
|
14548
|
-
const targetPath = path16.resolve(options.targetPath ?? ".");
|
|
14549
|
-
const githubDir = path16.join(targetPath, ".github");
|
|
14550
|
-
if (!existsSync2(githubDir)) {
|
|
14551
|
-
mkdirSync(githubDir, { recursive: true });
|
|
14552
|
-
}
|
|
14553
|
-
const templates = TemplateManager.getTemplates();
|
|
14554
|
-
for (const template of templates) {
|
|
14555
|
-
const targetFilePath = path16.join(githubDir, template.path);
|
|
14556
|
-
const targetDirPath = path16.dirname(targetFilePath);
|
|
14557
|
-
if (!existsSync2(targetDirPath)) {
|
|
14558
|
-
mkdirSync(targetDirPath, { recursive: true });
|
|
14559
|
-
}
|
|
14560
|
-
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
14561
|
-
console.log(`Created ${path16.relative(targetPath, targetFilePath)}`);
|
|
14562
|
-
}
|
|
14563
|
-
console.log("\nAgentV initialized successfully!");
|
|
14564
|
-
console.log(`
|
|
14565
|
-
Files installed to ${path16.relative(targetPath, githubDir)}:`);
|
|
14566
|
-
templates.forEach((t) => console.log(` - ${t.path}`));
|
|
14567
|
-
console.log("\nYou can now create eval files using the schema and prompt templates.");
|
|
14568
|
-
}
|
|
14569
|
-
|
|
14570
14716
|
// src/index.ts
|
|
14571
14717
|
var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
|
|
14572
14718
|
function createProgram() {
|
|
@@ -14574,7 +14720,7 @@ function createProgram() {
|
|
|
14574
14720
|
program.name("agentv").description("AgentV CLI scaffolding").version(packageJson.version);
|
|
14575
14721
|
registerStatusCommand(program);
|
|
14576
14722
|
registerEvalCommand(program);
|
|
14577
|
-
|
|
14723
|
+
registerValidateCommand(program);
|
|
14578
14724
|
program.command("init [path]").description("Initialize AgentV in your project (installs prompt templates and schema to .github)").action(async (targetPath) => {
|
|
14579
14725
|
try {
|
|
14580
14726
|
await initCommand({ targetPath });
|
|
@@ -14595,4 +14741,4 @@ export {
|
|
|
14595
14741
|
createProgram,
|
|
14596
14742
|
runCli
|
|
14597
14743
|
};
|
|
14598
|
-
//# sourceMappingURL=chunk-
|
|
14744
|
+
//# sourceMappingURL=chunk-JT3E7T7V.js.map
|