agentv 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-X6NSDSD2.js → chunk-VHNACQDK.js} +1612 -1460
- package/dist/chunk-VHNACQDK.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/package.json +2 -2
- package/dist/chunk-X6NSDSD2.js.map +0 -1
|
@@ -588,9 +588,9 @@ import { readFileSync as readFileSync2 } from "node:fs";
|
|
|
588
588
|
// src/commands/eval/index.ts
|
|
589
589
|
import fg from "fast-glob";
|
|
590
590
|
import { stat as stat3 } from "node:fs/promises";
|
|
591
|
-
import
|
|
591
|
+
import path15 from "node:path";
|
|
592
592
|
|
|
593
|
-
// ../../packages/core/dist/chunk-
|
|
593
|
+
// ../../packages/core/dist/chunk-UQLHF3T7.js
|
|
594
594
|
import { constants } from "node:fs";
|
|
595
595
|
import { access, readFile } from "node:fs/promises";
|
|
596
596
|
import path from "node:path";
|
|
@@ -697,6 +697,11 @@ async function resolveFileReference(rawValue, searchRoots) {
|
|
|
697
697
|
}
|
|
698
698
|
return { displayPath, attempted };
|
|
699
699
|
}
|
|
700
|
+
var AGENT_PROVIDER_KINDS = [
|
|
701
|
+
"codex",
|
|
702
|
+
"vscode",
|
|
703
|
+
"vscode-insiders"
|
|
704
|
+
];
|
|
700
705
|
var KNOWN_PROVIDERS = [
|
|
701
706
|
"azure",
|
|
702
707
|
"anthropic",
|
|
@@ -723,7 +728,10 @@ var PROVIDER_ALIASES = [
|
|
|
723
728
|
"vertex"
|
|
724
729
|
// legacy/future support
|
|
725
730
|
];
|
|
726
|
-
var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
|
|
731
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.1";
|
|
732
|
+
function isAgentProvider(provider) {
|
|
733
|
+
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
734
|
+
}
|
|
727
735
|
|
|
728
736
|
// ../../packages/core/dist/index.js
|
|
729
737
|
import micromatch from "micromatch";
|
|
@@ -5188,7 +5196,7 @@ import { promisify as promisify2 } from "node:util";
|
|
|
5188
5196
|
import { exec as execCallback, spawn as spawn2 } from "node:child_process";
|
|
5189
5197
|
import { randomUUID } from "node:crypto";
|
|
5190
5198
|
import { constants as constants22, createWriteStream } from "node:fs";
|
|
5191
|
-
import { access as access22,
|
|
5199
|
+
import { access as access22, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
|
|
5192
5200
|
import { tmpdir } from "node:os";
|
|
5193
5201
|
import path42 from "node:path";
|
|
5194
5202
|
import { promisify as promisify22 } from "node:util";
|
|
@@ -9236,16 +9244,15 @@ var coerce = {
|
|
|
9236
9244
|
var NEVER = INVALID;
|
|
9237
9245
|
|
|
9238
9246
|
// ../../packages/core/dist/index.js
|
|
9239
|
-
import { readFile as readFile22 } from "node:fs/promises";
|
|
9240
9247
|
import path52 from "node:path";
|
|
9241
9248
|
|
|
9242
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9249
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
9243
9250
|
import { exec, spawn } from "child_process";
|
|
9244
9251
|
import { copyFile, mkdir as mkdir2, readdir as readdir2, readFile as readFile2, stat as stat2, writeFile } from "fs/promises";
|
|
9245
9252
|
import path6 from "path";
|
|
9246
9253
|
import { promisify } from "util";
|
|
9247
9254
|
|
|
9248
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9255
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/constants.js
|
|
9249
9256
|
import os from "os";
|
|
9250
9257
|
import path2 from "path";
|
|
9251
9258
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
@@ -9256,7 +9263,7 @@ function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
|
9256
9263
|
}
|
|
9257
9264
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
9258
9265
|
|
|
9259
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9266
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/fs.js
|
|
9260
9267
|
import { constants as constants2 } from "fs";
|
|
9261
9268
|
import { access as access2, mkdir, readdir, rm, stat } from "fs/promises";
|
|
9262
9269
|
import path3 from "path";
|
|
@@ -9289,7 +9296,7 @@ async function removeIfExists(target) {
|
|
|
9289
9296
|
}
|
|
9290
9297
|
}
|
|
9291
9298
|
|
|
9292
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9299
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/path.js
|
|
9293
9300
|
import path4 from "path";
|
|
9294
9301
|
function pathToFileUri(filePath) {
|
|
9295
9302
|
const absolutePath = path4.isAbsolute(filePath) ? filePath : path4.resolve(filePath);
|
|
@@ -9300,14 +9307,14 @@ function pathToFileUri(filePath) {
|
|
|
9300
9307
|
return `file://${normalizedPath}`;
|
|
9301
9308
|
}
|
|
9302
9309
|
|
|
9303
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9310
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/time.js
|
|
9304
9311
|
function sleep(ms) {
|
|
9305
9312
|
return new Promise((resolve) => {
|
|
9306
9313
|
setTimeout(resolve, ms);
|
|
9307
9314
|
});
|
|
9308
9315
|
}
|
|
9309
9316
|
|
|
9310
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9317
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/workspace.js
|
|
9311
9318
|
import path5 from "path";
|
|
9312
9319
|
|
|
9313
9320
|
// ../../node_modules/.pnpm/json5@2.2.3/node_modules/json5/dist/index.mjs
|
|
@@ -10395,7 +10402,7 @@ var JSON5 = {
|
|
|
10395
10402
|
var lib = JSON5;
|
|
10396
10403
|
var dist_default = lib;
|
|
10397
10404
|
|
|
10398
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10405
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/workspace.js
|
|
10399
10406
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
10400
10407
|
let workspace;
|
|
10401
10408
|
try {
|
|
@@ -10468,7 +10475,7 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10468
10475
|
return JSON.stringify(transformedWorkspace, null, 2);
|
|
10469
10476
|
}
|
|
10470
10477
|
|
|
10471
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10478
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
10472
10479
|
var execAsync = promisify(exec);
|
|
10473
10480
|
function generateTimestamp() {
|
|
10474
10481
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[-:TZ.]/g, "").slice(0, 14);
|
|
@@ -10542,7 +10549,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
10542
10549
|
spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
|
|
10543
10550
|
await sleep(100);
|
|
10544
10551
|
const wakeupChatId = "wakeup";
|
|
10545
|
-
const chatArgs = ["-r", "chat", "-m", wakeupChatId,
|
|
10552
|
+
const chatArgs = ["-r", "chat", "-m", wakeupChatId, `create a file named .alive in the ${path6.basename(subagentDir)} folder`];
|
|
10546
10553
|
spawn(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
|
|
10547
10554
|
const start = Date.now();
|
|
10548
10555
|
while (!await pathExists(aliveFile)) {
|
|
@@ -11050,7 +11057,7 @@ async function dispatchBatchAgent(options) {
|
|
|
11050
11057
|
}
|
|
11051
11058
|
}
|
|
11052
11059
|
|
|
11053
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
11060
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/provision.js
|
|
11054
11061
|
import { writeFile as writeFile2 } from "fs/promises";
|
|
11055
11062
|
import path7 from "path";
|
|
11056
11063
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
@@ -11176,7 +11183,7 @@ async function provisionSubagents(options) {
|
|
|
11176
11183
|
|
|
11177
11184
|
// ../../packages/core/dist/index.js
|
|
11178
11185
|
import { constants as constants32 } from "node:fs";
|
|
11179
|
-
import { access as access32, readFile as
|
|
11186
|
+
import { access as access32, readFile as readFile22 } from "node:fs/promises";
|
|
11180
11187
|
import path62 from "node:path";
|
|
11181
11188
|
import { parse as parse22 } from "yaml";
|
|
11182
11189
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
@@ -12304,7 +12311,6 @@ function pathToFileUri2(filePath) {
|
|
|
12304
12311
|
var execAsync22 = promisify22(execCallback);
|
|
12305
12312
|
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
12306
12313
|
var PROMPT_FILENAME = "prompt.md";
|
|
12307
|
-
var FILES_DIR = "files";
|
|
12308
12314
|
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
12309
12315
|
var CodexProvider = class {
|
|
12310
12316
|
id;
|
|
@@ -12327,21 +12333,10 @@ var CodexProvider = class {
|
|
|
12327
12333
|
}
|
|
12328
12334
|
await this.ensureEnvironmentReady();
|
|
12329
12335
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
12330
|
-
const originalGuidelines = new Set(
|
|
12331
|
-
collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path42.resolve(file))
|
|
12332
|
-
);
|
|
12333
12336
|
const workspaceRoot = await this.createWorkspace();
|
|
12334
12337
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
12335
12338
|
try {
|
|
12336
|
-
const
|
|
12337
|
-
inputFiles,
|
|
12338
|
-
workspaceRoot,
|
|
12339
|
-
originalGuidelines
|
|
12340
|
-
);
|
|
12341
|
-
const promptContent = buildPromptDocument(request, mirroredInputFiles, {
|
|
12342
|
-
guidelinePatterns: request.guideline_patterns,
|
|
12343
|
-
guidelineOverrides: guidelineMirrors
|
|
12344
|
-
});
|
|
12339
|
+
const promptContent = buildPromptDocument(request, inputFiles);
|
|
12345
12340
|
const promptFile = path42.join(workspaceRoot, PROMPT_FILENAME);
|
|
12346
12341
|
await writeFile3(promptFile, promptContent, "utf8");
|
|
12347
12342
|
const args = this.buildCodexArgs();
|
|
@@ -12370,7 +12365,7 @@ var CodexProvider = class {
|
|
|
12370
12365
|
executable: this.resolvedExecutable ?? this.config.executable,
|
|
12371
12366
|
promptFile,
|
|
12372
12367
|
workspace: workspaceRoot,
|
|
12373
|
-
inputFiles
|
|
12368
|
+
inputFiles,
|
|
12374
12369
|
logFile: logger?.filePath
|
|
12375
12370
|
}
|
|
12376
12371
|
};
|
|
@@ -12425,37 +12420,6 @@ var CodexProvider = class {
|
|
|
12425
12420
|
throw error;
|
|
12426
12421
|
}
|
|
12427
12422
|
}
|
|
12428
|
-
async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
|
|
12429
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
12430
|
-
return {
|
|
12431
|
-
mirroredInputFiles: void 0,
|
|
12432
|
-
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
12433
|
-
};
|
|
12434
|
-
}
|
|
12435
|
-
const filesRoot = path42.join(workspaceRoot, FILES_DIR);
|
|
12436
|
-
await mkdir3(filesRoot, { recursive: true });
|
|
12437
|
-
const mirrored = [];
|
|
12438
|
-
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
12439
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
12440
|
-
for (const inputFile of inputFiles) {
|
|
12441
|
-
const absoluteSource = path42.resolve(inputFile);
|
|
12442
|
-
const baseName = path42.basename(absoluteSource);
|
|
12443
|
-
const count = nameCounts.get(baseName) ?? 0;
|
|
12444
|
-
nameCounts.set(baseName, count + 1);
|
|
12445
|
-
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
12446
|
-
const destination = path42.join(filesRoot, finalName);
|
|
12447
|
-
await copyFile2(absoluteSource, destination);
|
|
12448
|
-
const resolvedDestination = path42.resolve(destination);
|
|
12449
|
-
mirrored.push(resolvedDestination);
|
|
12450
|
-
if (guidelineOriginals.has(absoluteSource)) {
|
|
12451
|
-
guidelineMirrors.add(resolvedDestination);
|
|
12452
|
-
}
|
|
12453
|
-
}
|
|
12454
|
-
return {
|
|
12455
|
-
mirroredInputFiles: mirrored,
|
|
12456
|
-
guidelineMirrors
|
|
12457
|
-
};
|
|
12458
|
-
}
|
|
12459
12423
|
async createWorkspace() {
|
|
12460
12424
|
return await mkdtemp(path42.join(tmpdir(), WORKSPACE_PREFIX));
|
|
12461
12425
|
}
|
|
@@ -13421,23 +13385,25 @@ function resolveOptionalString(source2, env, description, options) {
|
|
|
13421
13385
|
if (trimmed.length === 0) {
|
|
13422
13386
|
return void 0;
|
|
13423
13387
|
}
|
|
13424
|
-
const
|
|
13425
|
-
if (
|
|
13426
|
-
|
|
13427
|
-
|
|
13388
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
13389
|
+
if (envVarMatch) {
|
|
13390
|
+
const varName = envVarMatch[1];
|
|
13391
|
+
const envValue = env[varName];
|
|
13392
|
+
if (envValue !== void 0) {
|
|
13393
|
+
if (envValue.trim().length === 0) {
|
|
13394
|
+
throw new Error(`Environment variable '${varName}' for ${description} is empty`);
|
|
13395
|
+
}
|
|
13396
|
+
return envValue;
|
|
13428
13397
|
}
|
|
13429
|
-
|
|
13430
|
-
}
|
|
13431
|
-
const allowLiteral = options?.allowLiteral ?? false;
|
|
13432
|
-
const optionalEnv = options?.optionalEnv ?? false;
|
|
13433
|
-
const looksLikeEnv = isLikelyEnvReference(trimmed);
|
|
13434
|
-
if (looksLikeEnv) {
|
|
13398
|
+
const optionalEnv = options?.optionalEnv ?? false;
|
|
13435
13399
|
if (optionalEnv) {
|
|
13436
13400
|
return void 0;
|
|
13437
13401
|
}
|
|
13438
|
-
|
|
13439
|
-
|
|
13440
|
-
|
|
13402
|
+
throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
|
|
13403
|
+
}
|
|
13404
|
+
const allowLiteral = options?.allowLiteral ?? false;
|
|
13405
|
+
if (!allowLiteral) {
|
|
13406
|
+
throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
|
|
13441
13407
|
}
|
|
13442
13408
|
return trimmed;
|
|
13443
13409
|
}
|
|
@@ -13484,9 +13450,6 @@ function resolveOptionalBoolean(source2) {
|
|
|
13484
13450
|
}
|
|
13485
13451
|
throw new Error("expected boolean value");
|
|
13486
13452
|
}
|
|
13487
|
-
function isLikelyEnvReference(value) {
|
|
13488
|
-
return /^[A-Z0-9_]+$/.test(value);
|
|
13489
|
-
}
|
|
13490
13453
|
function resolveOptionalStringArray(source2, env, description) {
|
|
13491
13454
|
if (source2 === void 0 || source2 === null) {
|
|
13492
13455
|
return void 0;
|
|
@@ -13507,15 +13470,20 @@ function resolveOptionalStringArray(source2, env, description) {
|
|
|
13507
13470
|
if (trimmed.length === 0) {
|
|
13508
13471
|
throw new Error(`${description}[${i6}] cannot be empty`);
|
|
13509
13472
|
}
|
|
13510
|
-
const
|
|
13511
|
-
if (
|
|
13512
|
-
|
|
13513
|
-
|
|
13473
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
13474
|
+
if (envVarMatch) {
|
|
13475
|
+
const varName = envVarMatch[1];
|
|
13476
|
+
const envValue = env[varName];
|
|
13477
|
+
if (envValue !== void 0) {
|
|
13478
|
+
if (envValue.trim().length === 0) {
|
|
13479
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i6}] is empty`);
|
|
13480
|
+
}
|
|
13481
|
+
resolved.push(envValue);
|
|
13482
|
+
continue;
|
|
13514
13483
|
}
|
|
13515
|
-
|
|
13516
|
-
} else {
|
|
13517
|
-
resolved.push(trimmed);
|
|
13484
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i6}] is not set`);
|
|
13518
13485
|
}
|
|
13486
|
+
resolved.push(trimmed);
|
|
13519
13487
|
}
|
|
13520
13488
|
return resolved.length > 0 ? resolved : void 0;
|
|
13521
13489
|
}
|
|
@@ -13560,7 +13528,7 @@ var VSCodeProvider = class {
|
|
|
13560
13528
|
}
|
|
13561
13529
|
};
|
|
13562
13530
|
}
|
|
13563
|
-
const responseText = await
|
|
13531
|
+
const responseText = await readTextFile(session.responseFile);
|
|
13564
13532
|
return {
|
|
13565
13533
|
text: responseText,
|
|
13566
13534
|
raw: {
|
|
@@ -13614,7 +13582,7 @@ var VSCodeProvider = class {
|
|
|
13614
13582
|
}
|
|
13615
13583
|
const responses = [];
|
|
13616
13584
|
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
13617
|
-
const responseText = await
|
|
13585
|
+
const responseText = await readTextFile(responseFile);
|
|
13618
13586
|
responses.push({
|
|
13619
13587
|
text: responseText,
|
|
13620
13588
|
raw: {
|
|
@@ -13826,7 +13794,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
13826
13794
|
if (!await fileExists3(absolutePath)) {
|
|
13827
13795
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
13828
13796
|
}
|
|
13829
|
-
const raw = await
|
|
13797
|
+
const raw = await readFile22(absolutePath, "utf8");
|
|
13830
13798
|
const parsed = parse22(raw);
|
|
13831
13799
|
if (!isRecord(parsed)) {
|
|
13832
13800
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
@@ -14471,7 +14439,8 @@ async function runEvaluation(options) {
|
|
|
14471
14439
|
target.name,
|
|
14472
14440
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
14473
14441
|
outcome.reason,
|
|
14474
|
-
promptInputs
|
|
14442
|
+
promptInputs,
|
|
14443
|
+
primaryProvider
|
|
14475
14444
|
);
|
|
14476
14445
|
results.push(errorResult);
|
|
14477
14446
|
if (onResult) {
|
|
@@ -14555,7 +14524,7 @@ async function runBatchEvaluation(options) {
|
|
|
14555
14524
|
agentTimeoutMs
|
|
14556
14525
|
});
|
|
14557
14526
|
} catch (error) {
|
|
14558
|
-
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
14527
|
+
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
14559
14528
|
results.push(errorResult);
|
|
14560
14529
|
if (onResult) {
|
|
14561
14530
|
await onResult(errorResult);
|
|
@@ -14632,7 +14601,7 @@ async function runEvalCase(options) {
|
|
|
14632
14601
|
attempt += 1;
|
|
14633
14602
|
continue;
|
|
14634
14603
|
}
|
|
14635
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
14604
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
14636
14605
|
}
|
|
14637
14606
|
}
|
|
14638
14607
|
if (!providerResponse) {
|
|
@@ -14641,7 +14610,8 @@ async function runEvalCase(options) {
|
|
|
14641
14610
|
target.name,
|
|
14642
14611
|
nowFn(),
|
|
14643
14612
|
lastError ?? new Error("Provider did not return a response"),
|
|
14644
|
-
promptInputs
|
|
14613
|
+
promptInputs,
|
|
14614
|
+
provider
|
|
14645
14615
|
);
|
|
14646
14616
|
}
|
|
14647
14617
|
if (cacheKey && cache && !cachedResponse) {
|
|
@@ -14661,7 +14631,7 @@ async function runEvalCase(options) {
|
|
|
14661
14631
|
agentTimeoutMs
|
|
14662
14632
|
});
|
|
14663
14633
|
} catch (error) {
|
|
14664
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
14634
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
14665
14635
|
}
|
|
14666
14636
|
}
|
|
14667
14637
|
async function evaluateCandidate(options) {
|
|
@@ -14693,7 +14663,7 @@ async function evaluateCandidate(options) {
|
|
|
14693
14663
|
const completedAt = nowFn();
|
|
14694
14664
|
const rawRequest = {
|
|
14695
14665
|
question: promptInputs.question,
|
|
14696
|
-
guidelines: promptInputs.guidelines,
|
|
14666
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
14697
14667
|
guideline_paths: evalCase.guideline_paths,
|
|
14698
14668
|
system_message: promptInputs.systemMessage ?? ""
|
|
14699
14669
|
};
|
|
@@ -14953,11 +14923,11 @@ async function invokeProvider(provider, options) {
|
|
|
14953
14923
|
}
|
|
14954
14924
|
}
|
|
14955
14925
|
}
|
|
14956
|
-
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
14926
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
|
|
14957
14927
|
const message = error instanceof Error ? error.message : String(error);
|
|
14958
14928
|
const rawRequest = {
|
|
14959
14929
|
question: promptInputs.question,
|
|
14960
|
-
guidelines: promptInputs.guidelines,
|
|
14930
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
14961
14931
|
guideline_paths: evalCase.guideline_paths,
|
|
14962
14932
|
system_message: promptInputs.systemMessage ?? "",
|
|
14963
14933
|
error: message
|
|
@@ -15009,7 +14979,7 @@ function createAgentKernel() {
|
|
|
15009
14979
|
// src/commands/eval/run-eval.ts
|
|
15010
14980
|
import { constants as constants6 } from "node:fs";
|
|
15011
14981
|
import { access as access6, mkdir as mkdir6 } from "node:fs/promises";
|
|
15012
|
-
import
|
|
14982
|
+
import path14 from "node:path";
|
|
15013
14983
|
import { pathToFileURL } from "node:url";
|
|
15014
14984
|
|
|
15015
14985
|
// src/commands/eval/env.ts
|
|
@@ -15694,883 +15664,580 @@ function formatEvaluationSummary(summary) {
|
|
|
15694
15664
|
return lines.join("\n");
|
|
15695
15665
|
}
|
|
15696
15666
|
|
|
15697
|
-
//
|
|
15698
|
-
import {
|
|
15699
|
-
import { access as access5, readFile as readFile4 } from "node:fs/promises";
|
|
15700
|
-
import path12 from "node:path";
|
|
15667
|
+
// ../../packages/core/dist/evaluation/validation/index.js
|
|
15668
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
15701
15669
|
import { parse as parse4 } from "yaml";
|
|
15702
|
-
|
|
15703
|
-
|
|
15704
|
-
|
|
15705
|
-
|
|
15706
|
-
|
|
15707
|
-
|
|
15708
|
-
|
|
15709
|
-
|
|
15710
|
-
|
|
15711
|
-
|
|
15712
|
-
|
|
15713
|
-
|
|
15714
|
-
|
|
15715
|
-
|
|
15716
|
-
async function
|
|
15670
|
+
import { readFile as readFile23 } from "node:fs/promises";
|
|
15671
|
+
import path12 from "node:path";
|
|
15672
|
+
import { parse as parse23 } from "yaml";
|
|
15673
|
+
import { readFile as readFile32 } from "node:fs/promises";
|
|
15674
|
+
import path23 from "node:path";
|
|
15675
|
+
import { parse as parse32 } from "yaml";
|
|
15676
|
+
import { readFile as readFile42 } from "node:fs/promises";
|
|
15677
|
+
import { parse as parse42 } from "yaml";
|
|
15678
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
15679
|
+
import path33 from "node:path";
|
|
15680
|
+
import { parse as parse5 } from "yaml";
|
|
15681
|
+
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
15682
|
+
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.1";
|
|
15683
|
+
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
15684
|
+
async function detectFileType(filePath) {
|
|
15717
15685
|
try {
|
|
15718
|
-
const
|
|
15719
|
-
const parsed = parse4(
|
|
15720
|
-
if (
|
|
15721
|
-
|
|
15722
|
-
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
15723
|
-
return targetValue.trim();
|
|
15724
|
-
}
|
|
15725
|
-
}
|
|
15726
|
-
} catch {
|
|
15727
|
-
}
|
|
15728
|
-
return void 0;
|
|
15729
|
-
}
|
|
15730
|
-
async function discoverTargetsFile(options) {
|
|
15731
|
-
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
15732
|
-
if (explicitPath) {
|
|
15733
|
-
const resolvedExplicit = path12.resolve(explicitPath);
|
|
15734
|
-
if (await fileExists5(resolvedExplicit)) {
|
|
15735
|
-
return resolvedExplicit;
|
|
15686
|
+
const content = await readFile4(filePath, "utf8");
|
|
15687
|
+
const parsed = parse4(content);
|
|
15688
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
15689
|
+
return "unknown";
|
|
15736
15690
|
}
|
|
15737
|
-
|
|
15738
|
-
|
|
15739
|
-
|
|
15740
|
-
|
|
15741
|
-
}
|
|
15691
|
+
const record = parsed;
|
|
15692
|
+
const schema = record["$schema"];
|
|
15693
|
+
if (typeof schema !== "string") {
|
|
15694
|
+
return "unknown";
|
|
15742
15695
|
}
|
|
15743
|
-
|
|
15744
|
-
|
|
15745
|
-
|
|
15746
|
-
|
|
15747
|
-
|
|
15748
|
-
|
|
15749
|
-
|
|
15750
|
-
|
|
15751
|
-
|
|
15752
|
-
const fullPath = path12.join(directory, candidate);
|
|
15753
|
-
if (await fileExists5(fullPath)) {
|
|
15754
|
-
return fullPath;
|
|
15755
|
-
}
|
|
15696
|
+
switch (schema) {
|
|
15697
|
+
case SCHEMA_EVAL_V22:
|
|
15698
|
+
return "eval";
|
|
15699
|
+
case SCHEMA_TARGETS_V2:
|
|
15700
|
+
return "targets";
|
|
15701
|
+
case SCHEMA_CONFIG_V22:
|
|
15702
|
+
return "config";
|
|
15703
|
+
default:
|
|
15704
|
+
return "unknown";
|
|
15756
15705
|
}
|
|
15706
|
+
} catch {
|
|
15707
|
+
return "unknown";
|
|
15757
15708
|
}
|
|
15758
|
-
throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
|
|
15759
15709
|
}
|
|
15760
|
-
|
|
15761
|
-
|
|
15762
|
-
|
|
15763
|
-
return { name: cliName, source: "cli" };
|
|
15764
|
-
}
|
|
15765
|
-
const fileName = options.fileTargetName?.trim();
|
|
15766
|
-
if (fileName && fileName.length > 0) {
|
|
15767
|
-
return { name: fileName, source: "test-file" };
|
|
15768
|
-
}
|
|
15769
|
-
return { name: "default", source: "default" };
|
|
15710
|
+
var SCHEMA_EVAL_V222 = "agentv-eval-v2";
|
|
15711
|
+
function isObject(value) {
|
|
15712
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
15770
15713
|
}
|
|
15771
|
-
async function
|
|
15772
|
-
const
|
|
15773
|
-
const
|
|
15774
|
-
|
|
15775
|
-
|
|
15776
|
-
|
|
15777
|
-
|
|
15778
|
-
})
|
|
15779
|
-
|
|
15780
|
-
|
|
15781
|
-
|
|
15782
|
-
|
|
15783
|
-
|
|
15784
|
-
const available = listTargetNames(definitions).join(", ");
|
|
15785
|
-
throw new Error(
|
|
15786
|
-
`Target '${targetChoice.name}' not found in ${targetsFilePath}. Available targets: ${available}`
|
|
15787
|
-
);
|
|
15788
|
-
}
|
|
15789
|
-
if (dryRun) {
|
|
15790
|
-
const mockTarget = {
|
|
15791
|
-
kind: "mock",
|
|
15792
|
-
name: `${targetDefinition.name}-dry-run`,
|
|
15793
|
-
judgeTarget: void 0,
|
|
15794
|
-
config: {
|
|
15795
|
-
response: '{"answer":"Mock dry-run response"}',
|
|
15796
|
-
delayMs: dryRunDelay,
|
|
15797
|
-
delayMinMs: dryRunDelayMin,
|
|
15798
|
-
delayMaxMs: dryRunDelayMax
|
|
15799
|
-
}
|
|
15800
|
-
};
|
|
15714
|
+
async function validateEvalFile(filePath) {
|
|
15715
|
+
const errors = [];
|
|
15716
|
+
const absolutePath = path12.resolve(filePath);
|
|
15717
|
+
let parsed;
|
|
15718
|
+
try {
|
|
15719
|
+
const content = await readFile23(absolutePath, "utf8");
|
|
15720
|
+
parsed = parse23(content);
|
|
15721
|
+
} catch (error) {
|
|
15722
|
+
errors.push({
|
|
15723
|
+
severity: "error",
|
|
15724
|
+
filePath: absolutePath,
|
|
15725
|
+
message: `Failed to parse YAML: ${error.message}`
|
|
15726
|
+
});
|
|
15801
15727
|
return {
|
|
15802
|
-
|
|
15803
|
-
|
|
15804
|
-
|
|
15805
|
-
|
|
15806
|
-
targetsFilePath
|
|
15728
|
+
valid: false,
|
|
15729
|
+
filePath: absolutePath,
|
|
15730
|
+
fileType: "eval",
|
|
15731
|
+
errors
|
|
15807
15732
|
};
|
|
15808
15733
|
}
|
|
15809
|
-
|
|
15810
|
-
|
|
15734
|
+
if (!isObject(parsed)) {
|
|
15735
|
+
errors.push({
|
|
15736
|
+
severity: "error",
|
|
15737
|
+
filePath: absolutePath,
|
|
15738
|
+
message: "File must contain a YAML object"
|
|
15739
|
+
});
|
|
15811
15740
|
return {
|
|
15812
|
-
|
|
15813
|
-
|
|
15814
|
-
|
|
15815
|
-
|
|
15816
|
-
targetsFilePath
|
|
15741
|
+
valid: false,
|
|
15742
|
+
filePath: absolutePath,
|
|
15743
|
+
fileType: "eval",
|
|
15744
|
+
errors
|
|
15817
15745
|
};
|
|
15818
|
-
} catch (error) {
|
|
15819
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
15820
|
-
throw new Error(`Failed to resolve target '${targetChoice.name}': ${message}`);
|
|
15821
15746
|
}
|
|
15822
|
-
|
|
15823
|
-
|
|
15824
|
-
|
|
15825
|
-
|
|
15826
|
-
|
|
15827
|
-
|
|
15828
|
-
|
|
15829
|
-
|
|
15830
|
-
|
|
15831
|
-
return void 0;
|
|
15747
|
+
const schema = parsed["$schema"];
|
|
15748
|
+
if (schema !== SCHEMA_EVAL_V222) {
|
|
15749
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_EVAL_V222}'` : `Missing required field '$schema'. Expected '${SCHEMA_EVAL_V222}'`;
|
|
15750
|
+
errors.push({
|
|
15751
|
+
severity: "error",
|
|
15752
|
+
filePath: absolutePath,
|
|
15753
|
+
location: "$schema",
|
|
15754
|
+
message
|
|
15755
|
+
});
|
|
15832
15756
|
}
|
|
15833
|
-
const
|
|
15834
|
-
|
|
15835
|
-
|
|
15836
|
-
|
|
15837
|
-
|
|
15838
|
-
|
|
15757
|
+
const evalcases = parsed["evalcases"];
|
|
15758
|
+
if (!Array.isArray(evalcases)) {
|
|
15759
|
+
errors.push({
|
|
15760
|
+
severity: "error",
|
|
15761
|
+
filePath: absolutePath,
|
|
15762
|
+
location: "evalcases",
|
|
15763
|
+
message: "Missing or invalid 'evalcases' field (must be an array)"
|
|
15764
|
+
});
|
|
15765
|
+
return {
|
|
15766
|
+
valid: errors.length === 0,
|
|
15767
|
+
filePath: absolutePath,
|
|
15768
|
+
fileType: "eval",
|
|
15769
|
+
errors
|
|
15770
|
+
};
|
|
15839
15771
|
}
|
|
15840
|
-
|
|
15841
|
-
const
|
|
15842
|
-
|
|
15843
|
-
|
|
15772
|
+
for (let i6 = 0; i6 < evalcases.length; i6++) {
|
|
15773
|
+
const evalCase = evalcases[i6];
|
|
15774
|
+
const location = `evalcases[${i6}]`;
|
|
15775
|
+
if (!isObject(evalCase)) {
|
|
15776
|
+
errors.push({
|
|
15777
|
+
severity: "error",
|
|
15778
|
+
filePath: absolutePath,
|
|
15779
|
+
location,
|
|
15780
|
+
message: "Eval case must be an object"
|
|
15781
|
+
});
|
|
15782
|
+
continue;
|
|
15844
15783
|
}
|
|
15845
|
-
|
|
15846
|
-
|
|
15847
|
-
|
|
15848
|
-
|
|
15849
|
-
|
|
15850
|
-
|
|
15851
|
-
|
|
15784
|
+
const id = evalCase["id"];
|
|
15785
|
+
if (typeof id !== "string" || id.trim().length === 0) {
|
|
15786
|
+
errors.push({
|
|
15787
|
+
severity: "error",
|
|
15788
|
+
filePath: absolutePath,
|
|
15789
|
+
location: `${location}.id`,
|
|
15790
|
+
message: "Missing or invalid 'id' field (must be a non-empty string)"
|
|
15791
|
+
});
|
|
15792
|
+
}
|
|
15793
|
+
const outcome = evalCase["outcome"];
|
|
15794
|
+
if (typeof outcome !== "string" || outcome.trim().length === 0) {
|
|
15795
|
+
errors.push({
|
|
15796
|
+
severity: "error",
|
|
15797
|
+
filePath: absolutePath,
|
|
15798
|
+
location: `${location}.outcome`,
|
|
15799
|
+
message: "Missing or invalid 'outcome' field (must be a non-empty string)"
|
|
15800
|
+
});
|
|
15801
|
+
}
|
|
15802
|
+
const inputMessages = evalCase["input_messages"];
|
|
15803
|
+
if (!Array.isArray(inputMessages)) {
|
|
15804
|
+
errors.push({
|
|
15805
|
+
severity: "error",
|
|
15806
|
+
filePath: absolutePath,
|
|
15807
|
+
location: `${location}.input_messages`,
|
|
15808
|
+
message: "Missing or invalid 'input_messages' field (must be an array)"
|
|
15809
|
+
});
|
|
15810
|
+
} else {
|
|
15811
|
+
validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
|
|
15812
|
+
}
|
|
15813
|
+
const expectedMessages = evalCase["expected_messages"];
|
|
15814
|
+
if (!Array.isArray(expectedMessages)) {
|
|
15815
|
+
errors.push({
|
|
15816
|
+
severity: "error",
|
|
15817
|
+
filePath: absolutePath,
|
|
15818
|
+
location: `${location}.expected_messages`,
|
|
15819
|
+
message: "Missing or invalid 'expected_messages' field (must be an array)"
|
|
15820
|
+
});
|
|
15821
|
+
} else {
|
|
15822
|
+
validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
|
|
15823
|
+
}
|
|
15824
|
+
}
|
|
15852
15825
|
return {
|
|
15853
|
-
|
|
15854
|
-
|
|
15855
|
-
|
|
15856
|
-
|
|
15857
|
-
outPath: normalizeString(rawOptions.out),
|
|
15858
|
-
format,
|
|
15859
|
-
dryRun: normalizeBoolean(rawOptions.dryRun),
|
|
15860
|
-
dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
|
|
15861
|
-
dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
|
|
15862
|
-
dryRunDelayMax: normalizeNumber(rawOptions.dryRunDelayMax, 0),
|
|
15863
|
-
agentTimeoutSeconds: normalizeNumber(rawOptions.agentTimeout, 120),
|
|
15864
|
-
maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
|
|
15865
|
-
cache: normalizeBoolean(rawOptions.cache),
|
|
15866
|
-
verbose: normalizeBoolean(rawOptions.verbose),
|
|
15867
|
-
dumpPrompts: rawOptions.dumpPrompts
|
|
15826
|
+
valid: errors.length === 0,
|
|
15827
|
+
filePath: absolutePath,
|
|
15828
|
+
fileType: "eval",
|
|
15829
|
+
errors
|
|
15868
15830
|
};
|
|
15869
15831
|
}
|
|
15870
|
-
|
|
15871
|
-
|
|
15872
|
-
|
|
15873
|
-
|
|
15874
|
-
|
|
15875
|
-
|
|
15876
|
-
|
|
15877
|
-
|
|
15878
|
-
|
|
15879
|
-
|
|
15880
|
-
|
|
15881
|
-
|
|
15882
|
-
|
|
15883
|
-
|
|
15884
|
-
|
|
15885
|
-
|
|
15886
|
-
|
|
15887
|
-
|
|
15888
|
-
|
|
15832
|
+
function validateMessages(messages, location, filePath, errors) {
|
|
15833
|
+
for (let i6 = 0; i6 < messages.length; i6++) {
|
|
15834
|
+
const message = messages[i6];
|
|
15835
|
+
const msgLocation = `${location}[${i6}]`;
|
|
15836
|
+
if (!isObject(message)) {
|
|
15837
|
+
errors.push({
|
|
15838
|
+
severity: "error",
|
|
15839
|
+
filePath,
|
|
15840
|
+
location: msgLocation,
|
|
15841
|
+
message: "Message must be an object"
|
|
15842
|
+
});
|
|
15843
|
+
continue;
|
|
15844
|
+
}
|
|
15845
|
+
const role = message["role"];
|
|
15846
|
+
const validRoles = ["system", "user", "assistant"];
|
|
15847
|
+
if (!validRoles.includes(role)) {
|
|
15848
|
+
errors.push({
|
|
15849
|
+
severity: "error",
|
|
15850
|
+
filePath,
|
|
15851
|
+
location: `${msgLocation}.role`,
|
|
15852
|
+
message: `Invalid role '${role}'. Must be one of: ${validRoles.join(", ")}`
|
|
15853
|
+
});
|
|
15854
|
+
}
|
|
15855
|
+
const content = message["content"];
|
|
15856
|
+
if (typeof content === "string") {
|
|
15857
|
+
} else if (Array.isArray(content)) {
|
|
15858
|
+
for (let j2 = 0; j2 < content.length; j2++) {
|
|
15859
|
+
const contentItem = content[j2];
|
|
15860
|
+
const contentLocation = `${msgLocation}.content[${j2}]`;
|
|
15861
|
+
if (typeof contentItem === "string") {
|
|
15862
|
+
} else if (isObject(contentItem)) {
|
|
15863
|
+
const type = contentItem["type"];
|
|
15864
|
+
if (typeof type !== "string") {
|
|
15865
|
+
errors.push({
|
|
15866
|
+
severity: "error",
|
|
15867
|
+
filePath,
|
|
15868
|
+
location: `${contentLocation}.type`,
|
|
15869
|
+
message: "Content object must have a 'type' field"
|
|
15870
|
+
});
|
|
15871
|
+
}
|
|
15872
|
+
if (type === "text") {
|
|
15873
|
+
const value = contentItem["value"];
|
|
15874
|
+
if (typeof value !== "string") {
|
|
15875
|
+
errors.push({
|
|
15876
|
+
severity: "error",
|
|
15877
|
+
filePath,
|
|
15878
|
+
location: `${contentLocation}.value`,
|
|
15879
|
+
message: "Content with type 'text' must have a 'value' field"
|
|
15880
|
+
});
|
|
15881
|
+
}
|
|
15882
|
+
}
|
|
15883
|
+
} else {
|
|
15884
|
+
errors.push({
|
|
15885
|
+
severity: "error",
|
|
15886
|
+
filePath,
|
|
15887
|
+
location: contentLocation,
|
|
15888
|
+
message: "Content array items must be strings or objects"
|
|
15889
|
+
});
|
|
15890
|
+
}
|
|
15889
15891
|
}
|
|
15890
|
-
|
|
15892
|
+
} else {
|
|
15893
|
+
errors.push({
|
|
15894
|
+
severity: "error",
|
|
15895
|
+
filePath,
|
|
15896
|
+
location: `${msgLocation}.content`,
|
|
15897
|
+
message: "Missing or invalid 'content' field (must be a string or array)"
|
|
15898
|
+
});
|
|
15891
15899
|
}
|
|
15892
15900
|
}
|
|
15893
|
-
return fallback;
|
|
15894
15901
|
}
|
|
15895
|
-
function
|
|
15896
|
-
|
|
15897
|
-
const baseName = "eval";
|
|
15898
|
-
const extension = getDefaultExtension(format);
|
|
15899
|
-
return path13.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
15902
|
+
function isObject2(value) {
|
|
15903
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
15900
15904
|
}
|
|
15901
|
-
|
|
15902
|
-
|
|
15903
|
-
|
|
15904
|
-
|
|
15905
|
-
|
|
15906
|
-
|
|
15905
|
+
var CLI_PLACEHOLDERS2 = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
15906
|
+
var COMMON_SETTINGS = /* @__PURE__ */ new Set([
|
|
15907
|
+
"provider_batching",
|
|
15908
|
+
"providerBatching"
|
|
15909
|
+
]);
|
|
15910
|
+
var AZURE_SETTINGS = /* @__PURE__ */ new Set([
|
|
15911
|
+
...COMMON_SETTINGS,
|
|
15912
|
+
"endpoint",
|
|
15913
|
+
"resource",
|
|
15914
|
+
"resourceName",
|
|
15915
|
+
"api_key",
|
|
15916
|
+
"apiKey",
|
|
15917
|
+
"deployment",
|
|
15918
|
+
"deploymentName",
|
|
15919
|
+
"model",
|
|
15920
|
+
"version",
|
|
15921
|
+
"api_version",
|
|
15922
|
+
"temperature",
|
|
15923
|
+
"max_output_tokens",
|
|
15924
|
+
"maxTokens"
|
|
15925
|
+
]);
|
|
15926
|
+
var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
|
|
15927
|
+
...COMMON_SETTINGS,
|
|
15928
|
+
"api_key",
|
|
15929
|
+
"apiKey",
|
|
15930
|
+
"model",
|
|
15931
|
+
"deployment",
|
|
15932
|
+
"variant",
|
|
15933
|
+
"temperature",
|
|
15934
|
+
"max_output_tokens",
|
|
15935
|
+
"maxTokens",
|
|
15936
|
+
"thinking_budget",
|
|
15937
|
+
"thinkingBudget"
|
|
15938
|
+
]);
|
|
15939
|
+
var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
|
|
15940
|
+
...COMMON_SETTINGS,
|
|
15941
|
+
"api_key",
|
|
15942
|
+
"apiKey",
|
|
15943
|
+
"model",
|
|
15944
|
+
"deployment",
|
|
15945
|
+
"variant",
|
|
15946
|
+
"temperature",
|
|
15947
|
+
"max_output_tokens",
|
|
15948
|
+
"maxTokens"
|
|
15949
|
+
]);
|
|
15950
|
+
var CODEX_SETTINGS = /* @__PURE__ */ new Set([
|
|
15951
|
+
...COMMON_SETTINGS,
|
|
15952
|
+
"executable",
|
|
15953
|
+
"command",
|
|
15954
|
+
"binary",
|
|
15955
|
+
"args",
|
|
15956
|
+
"arguments",
|
|
15957
|
+
"cwd",
|
|
15958
|
+
"timeout_seconds",
|
|
15959
|
+
"timeoutSeconds",
|
|
15960
|
+
"log_dir",
|
|
15961
|
+
"logDir",
|
|
15962
|
+
"log_directory",
|
|
15963
|
+
"logDirectory",
|
|
15964
|
+
"log_format",
|
|
15965
|
+
"logFormat",
|
|
15966
|
+
"log_output_format",
|
|
15967
|
+
"logOutputFormat"
|
|
15968
|
+
]);
|
|
15969
|
+
var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
|
|
15970
|
+
...COMMON_SETTINGS,
|
|
15971
|
+
"workspace_template",
|
|
15972
|
+
"workspaceTemplate",
|
|
15973
|
+
"vscode_cmd",
|
|
15974
|
+
"command",
|
|
15975
|
+
"wait",
|
|
15976
|
+
"dry_run",
|
|
15977
|
+
"dryRun",
|
|
15978
|
+
"subagent_root",
|
|
15979
|
+
"subagentRoot"
|
|
15980
|
+
]);
|
|
15981
|
+
var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
15982
|
+
...COMMON_SETTINGS,
|
|
15983
|
+
"response",
|
|
15984
|
+
"delayMs",
|
|
15985
|
+
"delayMinMs",
|
|
15986
|
+
"delayMaxMs"
|
|
15987
|
+
]);
|
|
15988
|
+
var CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
15989
|
+
...COMMON_SETTINGS,
|
|
15990
|
+
"command_template",
|
|
15991
|
+
"commandTemplate",
|
|
15992
|
+
"files_format",
|
|
15993
|
+
"filesFormat",
|
|
15994
|
+
"attachments_format",
|
|
15995
|
+
"attachmentsFormat",
|
|
15996
|
+
"cwd",
|
|
15997
|
+
"env",
|
|
15998
|
+
"timeout_seconds",
|
|
15999
|
+
"timeoutSeconds",
|
|
16000
|
+
"healthcheck"
|
|
16001
|
+
]);
|
|
16002
|
+
function getKnownSettings(provider) {
|
|
16003
|
+
const normalizedProvider = provider.toLowerCase();
|
|
16004
|
+
switch (normalizedProvider) {
|
|
16005
|
+
case "azure":
|
|
16006
|
+
case "azure-openai":
|
|
16007
|
+
return AZURE_SETTINGS;
|
|
16008
|
+
case "anthropic":
|
|
16009
|
+
return ANTHROPIC_SETTINGS;
|
|
16010
|
+
case "gemini":
|
|
16011
|
+
case "google":
|
|
16012
|
+
case "google-gemini":
|
|
16013
|
+
return GEMINI_SETTINGS;
|
|
16014
|
+
case "codex":
|
|
16015
|
+
case "codex-cli":
|
|
16016
|
+
return CODEX_SETTINGS;
|
|
16017
|
+
case "vscode":
|
|
16018
|
+
case "vscode-insiders":
|
|
16019
|
+
return VSCODE_SETTINGS;
|
|
16020
|
+
case "mock":
|
|
16021
|
+
return MOCK_SETTINGS;
|
|
16022
|
+
case "cli":
|
|
16023
|
+
return CLI_SETTINGS;
|
|
16024
|
+
default:
|
|
16025
|
+
return null;
|
|
15907
16026
|
}
|
|
15908
|
-
return path13.join(cwd, ".agentv", "prompts");
|
|
15909
16027
|
}
|
|
15910
|
-
function
|
|
15911
|
-
const
|
|
15912
|
-
|
|
15913
|
-
|
|
15914
|
-
|
|
15915
|
-
|
|
15916
|
-
|
|
15917
|
-
|
|
16028
|
+
function validateUnknownSettings(settings, provider, absolutePath, location, errors) {
|
|
16029
|
+
const knownSettings = getKnownSettings(provider);
|
|
16030
|
+
if (!knownSettings) {
|
|
16031
|
+
return;
|
|
16032
|
+
}
|
|
16033
|
+
for (const key2 of Object.keys(settings)) {
|
|
16034
|
+
if (!knownSettings.has(key2)) {
|
|
16035
|
+
errors.push({
|
|
16036
|
+
severity: "warning",
|
|
16037
|
+
filePath: absolutePath,
|
|
16038
|
+
location: `${location}.${key2}`,
|
|
16039
|
+
message: `Unknown setting '${key2}' for ${provider} provider. This property will be ignored.`
|
|
16040
|
+
});
|
|
15918
16041
|
}
|
|
15919
|
-
}
|
|
15920
|
-
}
|
|
15921
|
-
function createProgressReporter(maxWorkers) {
|
|
15922
|
-
const display = new ProgressDisplay(maxWorkers);
|
|
15923
|
-
return {
|
|
15924
|
-
isInteractive: display.isInteractiveMode(),
|
|
15925
|
-
start: () => display.start(),
|
|
15926
|
-
setTotal: (total) => display.setTotalTests(total),
|
|
15927
|
-
update: (workerId, progress) => display.updateWorker({ ...progress, workerId }),
|
|
15928
|
-
finish: () => display.finish(),
|
|
15929
|
-
addLogPaths: (paths) => display.addLogPaths(paths)
|
|
15930
|
-
};
|
|
15931
|
-
}
|
|
15932
|
-
function makeEvalKey(testFilePath, evalId) {
|
|
15933
|
-
return `${path13.resolve(testFilePath)}::${evalId}`;
|
|
16042
|
+
}
|
|
15934
16043
|
}
|
|
15935
|
-
function
|
|
15936
|
-
const
|
|
15937
|
-
|
|
15938
|
-
|
|
15939
|
-
|
|
15940
|
-
|
|
15941
|
-
|
|
15942
|
-
|
|
15943
|
-
|
|
15944
|
-
|
|
15945
|
-
|
|
15946
|
-
|
|
16044
|
+
async function validateTargetsFile(filePath) {
|
|
16045
|
+
const errors = [];
|
|
16046
|
+
const absolutePath = path23.resolve(filePath);
|
|
16047
|
+
let parsed;
|
|
16048
|
+
try {
|
|
16049
|
+
const content = await readFile32(absolutePath, "utf8");
|
|
16050
|
+
parsed = parse32(content);
|
|
16051
|
+
} catch (error) {
|
|
16052
|
+
errors.push({
|
|
16053
|
+
severity: "error",
|
|
16054
|
+
filePath: absolutePath,
|
|
16055
|
+
message: `Failed to parse YAML: ${error.message}`
|
|
16056
|
+
});
|
|
16057
|
+
return {
|
|
16058
|
+
valid: false,
|
|
16059
|
+
filePath: absolutePath,
|
|
16060
|
+
fileType: "targets",
|
|
16061
|
+
errors
|
|
16062
|
+
};
|
|
16063
|
+
}
|
|
16064
|
+
function validateCliSettings(settings, absolutePath2, location, errors2) {
|
|
16065
|
+
if (!isObject2(settings)) {
|
|
16066
|
+
errors2.push({
|
|
16067
|
+
severity: "error",
|
|
16068
|
+
filePath: absolutePath2,
|
|
16069
|
+
location,
|
|
16070
|
+
message: "CLI provider requires a 'settings' object"
|
|
16071
|
+
});
|
|
16072
|
+
return;
|
|
15947
16073
|
}
|
|
15948
|
-
|
|
15949
|
-
|
|
15950
|
-
|
|
15951
|
-
|
|
15952
|
-
|
|
15953
|
-
|
|
15954
|
-
|
|
15955
|
-
repoRoot,
|
|
15956
|
-
verbose: options.verbose
|
|
15957
|
-
});
|
|
15958
|
-
const selection = await selectTarget({
|
|
15959
|
-
testFilePath,
|
|
15960
|
-
repoRoot,
|
|
15961
|
-
cwd,
|
|
15962
|
-
explicitTargetsPath: options.targetsPath,
|
|
15963
|
-
cliTargetName: options.target,
|
|
15964
|
-
dryRun: options.dryRun,
|
|
15965
|
-
dryRunDelay: options.dryRunDelay,
|
|
15966
|
-
dryRunDelayMin: options.dryRunDelayMin,
|
|
15967
|
-
dryRunDelayMax: options.dryRunDelayMax,
|
|
15968
|
-
env: process.env
|
|
15969
|
-
});
|
|
15970
|
-
const providerLabel = options.dryRun ? `${selection.resolvedTarget.kind} (dry-run)` : selection.resolvedTarget.kind;
|
|
15971
|
-
const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
|
|
15972
|
-
const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose });
|
|
15973
|
-
const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
|
|
15974
|
-
return { evalIds: filteredIds, selection, inlineTargetLabel };
|
|
15975
|
-
}
|
|
15976
|
-
async function runWithLimit(items, limit, task) {
|
|
15977
|
-
const safeLimit = Math.max(1, limit);
|
|
15978
|
-
let index = 0;
|
|
15979
|
-
const workers = Array.from({ length: safeLimit }, async () => {
|
|
15980
|
-
while (index < items.length) {
|
|
15981
|
-
const current = items[index];
|
|
15982
|
-
index += 1;
|
|
15983
|
-
await task(current);
|
|
15984
|
-
}
|
|
15985
|
-
});
|
|
15986
|
-
await Promise.all(workers);
|
|
15987
|
-
}
|
|
15988
|
-
async function runSingleEvalFile(params) {
|
|
15989
|
-
const {
|
|
15990
|
-
testFilePath,
|
|
15991
|
-
cwd,
|
|
15992
|
-
repoRoot,
|
|
15993
|
-
options,
|
|
15994
|
-
outputWriter,
|
|
15995
|
-
cache,
|
|
15996
|
-
evaluationRunner,
|
|
15997
|
-
workersOverride,
|
|
15998
|
-
progressReporter,
|
|
15999
|
-
seenEvalCases,
|
|
16000
|
-
displayIdTracker,
|
|
16001
|
-
selection,
|
|
16002
|
-
inlineTargetLabel
|
|
16003
|
-
} = params;
|
|
16004
|
-
await ensureFileExists(testFilePath, "Test file");
|
|
16005
|
-
const resolvedTargetSelection = selection;
|
|
16006
|
-
const providerLabel = options.dryRun ? `${resolvedTargetSelection.resolvedTarget.kind} (dry-run)` : resolvedTargetSelection.resolvedTarget.kind;
|
|
16007
|
-
const targetMessage = options.verbose ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} [provider=${providerLabel}] via ${resolvedTargetSelection.targetsFilePath}` : `Using target: ${inlineTargetLabel}`;
|
|
16008
|
-
if (!progressReporter.isInteractive || options.verbose) {
|
|
16009
|
-
console.log(targetMessage);
|
|
16010
|
-
}
|
|
16011
|
-
const promptDumpDir = resolvePromptDirectory(options.dumpPrompts, cwd);
|
|
16012
|
-
if (promptDumpDir) {
|
|
16013
|
-
await mkdir6(promptDumpDir, { recursive: true });
|
|
16014
|
-
if (options.verbose) {
|
|
16015
|
-
console.log(`Prompt dumps enabled at: ${promptDumpDir}`);
|
|
16016
|
-
}
|
|
16017
|
-
}
|
|
16018
|
-
const agentTimeoutMs = Math.max(0, options.agentTimeoutSeconds) * 1e3;
|
|
16019
|
-
const workerPreference = workersOverride ?? options.workers;
|
|
16020
|
-
let resolvedWorkers = workerPreference ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
|
|
16021
|
-
if (resolvedWorkers < 1 || resolvedWorkers > 50) {
|
|
16022
|
-
throw new Error(`Workers must be between 1 and 50, got: ${resolvedWorkers}`);
|
|
16023
|
-
}
|
|
16024
|
-
const isVSCodeProvider = ["vscode", "vscode-insiders"].includes(
|
|
16025
|
-
resolvedTargetSelection.resolvedTarget.kind
|
|
16026
|
-
);
|
|
16027
|
-
if (isVSCodeProvider && resolvedWorkers > 1) {
|
|
16028
|
-
console.warn(`Warning: VSCode providers require window focus. Limiting workers from ${resolvedWorkers} to 1 to prevent race conditions.`);
|
|
16029
|
-
resolvedWorkers = 1;
|
|
16030
|
-
}
|
|
16031
|
-
if (options.verbose) {
|
|
16032
|
-
const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
|
|
16033
|
-
console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
|
|
16034
|
-
}
|
|
16035
|
-
if (isVSCodeProvider && !options.dryRun) {
|
|
16036
|
-
await ensureVSCodeSubagents({
|
|
16037
|
-
kind: resolvedTargetSelection.resolvedTarget.kind,
|
|
16038
|
-
count: resolvedWorkers,
|
|
16039
|
-
verbose: options.verbose
|
|
16040
|
-
});
|
|
16041
|
-
}
|
|
16042
|
-
const results = await evaluationRunner({
|
|
16043
|
-
testFilePath,
|
|
16044
|
-
repoRoot,
|
|
16045
|
-
target: resolvedTargetSelection.resolvedTarget,
|
|
16046
|
-
targets: resolvedTargetSelection.definitions,
|
|
16047
|
-
env: process.env,
|
|
16048
|
-
maxRetries: Math.max(0, options.maxRetries),
|
|
16049
|
-
agentTimeoutMs,
|
|
16050
|
-
promptDumpDir,
|
|
16051
|
-
cache,
|
|
16052
|
-
useCache: options.cache,
|
|
16053
|
-
evalId: options.evalId,
|
|
16054
|
-
verbose: options.verbose,
|
|
16055
|
-
maxConcurrency: resolvedWorkers,
|
|
16056
|
-
onResult: async (result) => {
|
|
16057
|
-
await outputWriter.append(result);
|
|
16058
|
-
},
|
|
16059
|
-
onProgress: async (event) => {
|
|
16060
|
-
const evalKey = makeEvalKey(testFilePath, event.evalId);
|
|
16061
|
-
if (event.status === "pending" && !seenEvalCases.has(evalKey)) {
|
|
16062
|
-
seenEvalCases.add(evalKey);
|
|
16063
|
-
progressReporter.setTotal(seenEvalCases.size);
|
|
16064
|
-
}
|
|
16065
|
-
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
16066
|
-
progressReporter.update(displayId, {
|
|
16067
|
-
workerId: displayId,
|
|
16068
|
-
evalId: event.evalId,
|
|
16069
|
-
status: event.status,
|
|
16070
|
-
startedAt: event.startedAt,
|
|
16071
|
-
completedAt: event.completedAt,
|
|
16072
|
-
error: event.error,
|
|
16073
|
-
targetLabel: inlineTargetLabel
|
|
16074
|
-
});
|
|
16075
|
-
}
|
|
16076
|
-
});
|
|
16077
|
-
return { results: [...results], promptDumpDir };
|
|
16078
|
-
}
|
|
16079
|
-
async function runEvalCommand(input) {
|
|
16080
|
-
const options = normalizeOptions(input.rawOptions);
|
|
16081
|
-
const cwd = process.cwd();
|
|
16082
|
-
const repoRoot = await findRepoRoot(cwd);
|
|
16083
|
-
if (options.verbose) {
|
|
16084
|
-
console.log(`Repository root: ${repoRoot}`);
|
|
16085
|
-
}
|
|
16086
|
-
const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
|
|
16087
|
-
console.log(`Output path: ${outputPath}`);
|
|
16088
|
-
const outputWriter = await createOutputWriter(outputPath, options.format);
|
|
16089
|
-
const cache = options.cache ? createEvaluationCache() : void 0;
|
|
16090
|
-
const evaluationRunner = await resolveEvaluationRunner();
|
|
16091
|
-
const allResults = [];
|
|
16092
|
-
let lastPromptDumpDir;
|
|
16093
|
-
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
16094
|
-
const resolvedTestFiles = input.testFiles.map((file) => path13.resolve(file));
|
|
16095
|
-
const displayIdTracker = createDisplayIdTracker();
|
|
16096
|
-
const totalWorkers = options.workers ?? DEFAULT_WORKERS;
|
|
16097
|
-
const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
|
|
16098
|
-
const perFileWorkers = options.workers ? Math.max(1, Math.floor(totalWorkers / fileConcurrency)) : void 0;
|
|
16099
|
-
const fileMetadata = /* @__PURE__ */ new Map();
|
|
16100
|
-
for (const testFilePath of resolvedTestFiles) {
|
|
16101
|
-
const meta = await prepareFileMetadata({
|
|
16102
|
-
testFilePath,
|
|
16103
|
-
repoRoot,
|
|
16104
|
-
cwd,
|
|
16105
|
-
options
|
|
16106
|
-
});
|
|
16107
|
-
fileMetadata.set(testFilePath, meta);
|
|
16108
|
-
}
|
|
16109
|
-
const totalEvalCount = Array.from(fileMetadata.values()).reduce(
|
|
16110
|
-
(sum, meta) => sum + meta.evalIds.length,
|
|
16111
|
-
0
|
|
16112
|
-
);
|
|
16113
|
-
if (totalEvalCount === 0) {
|
|
16114
|
-
throw new Error("No eval cases matched the provided filters.");
|
|
16115
|
-
}
|
|
16116
|
-
const progressReporter = createProgressReporter(totalWorkers);
|
|
16117
|
-
progressReporter.start();
|
|
16118
|
-
progressReporter.setTotal(totalEvalCount);
|
|
16119
|
-
const seenCodexLogPaths = /* @__PURE__ */ new Set();
|
|
16120
|
-
const unsubscribeCodexLogs = subscribeToCodexLogEntries((entry) => {
|
|
16121
|
-
if (!entry.filePath || seenCodexLogPaths.has(entry.filePath)) {
|
|
16122
|
-
return;
|
|
16123
|
-
}
|
|
16124
|
-
seenCodexLogPaths.add(entry.filePath);
|
|
16125
|
-
progressReporter.addLogPaths([entry.filePath]);
|
|
16126
|
-
});
|
|
16127
|
-
for (const [testFilePath, meta] of fileMetadata.entries()) {
|
|
16128
|
-
for (const evalId of meta.evalIds) {
|
|
16129
|
-
const evalKey = makeEvalKey(testFilePath, evalId);
|
|
16130
|
-
seenEvalCases.add(evalKey);
|
|
16131
|
-
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
16132
|
-
progressReporter.update(displayId, {
|
|
16133
|
-
workerId: displayId,
|
|
16134
|
-
evalId,
|
|
16135
|
-
status: "pending",
|
|
16136
|
-
targetLabel: meta.inlineTargetLabel
|
|
16074
|
+
const commandTemplate = settings["command_template"] ?? settings["commandTemplate"];
|
|
16075
|
+
if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
|
|
16076
|
+
errors2.push({
|
|
16077
|
+
severity: "error",
|
|
16078
|
+
filePath: absolutePath2,
|
|
16079
|
+
location: `${location}.commandTemplate`,
|
|
16080
|
+
message: "CLI provider requires 'commandTemplate' as a non-empty string"
|
|
16137
16081
|
});
|
|
16082
|
+
} else {
|
|
16083
|
+
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16138
16084
|
}
|
|
16139
|
-
|
|
16140
|
-
|
|
16141
|
-
|
|
16142
|
-
|
|
16143
|
-
|
|
16144
|
-
|
|
16145
|
-
|
|
16146
|
-
const result = await runSingleEvalFile({
|
|
16147
|
-
testFilePath,
|
|
16148
|
-
cwd,
|
|
16149
|
-
repoRoot,
|
|
16150
|
-
options,
|
|
16151
|
-
outputWriter,
|
|
16152
|
-
cache,
|
|
16153
|
-
evaluationRunner,
|
|
16154
|
-
workersOverride: perFileWorkers,
|
|
16155
|
-
progressReporter,
|
|
16156
|
-
seenEvalCases,
|
|
16157
|
-
displayIdTracker,
|
|
16158
|
-
selection: targetPrep.selection,
|
|
16159
|
-
inlineTargetLabel: targetPrep.inlineTargetLabel
|
|
16085
|
+
const attachmentsFormat = settings["attachments_format"] ?? settings["attachmentsFormat"];
|
|
16086
|
+
if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
|
|
16087
|
+
errors2.push({
|
|
16088
|
+
severity: "error",
|
|
16089
|
+
filePath: absolutePath2,
|
|
16090
|
+
location: `${location}.attachmentsFormat`,
|
|
16091
|
+
message: "'attachmentsFormat' must be a string when provided"
|
|
16160
16092
|
});
|
|
16161
|
-
allResults.push(...result.results);
|
|
16162
|
-
if (result.promptDumpDir) {
|
|
16163
|
-
lastPromptDumpDir = result.promptDumpDir;
|
|
16164
|
-
}
|
|
16165
|
-
});
|
|
16166
|
-
progressReporter.finish();
|
|
16167
|
-
const summary = calculateEvaluationSummary(allResults);
|
|
16168
|
-
console.log(formatEvaluationSummary(summary));
|
|
16169
|
-
if (allResults.length > 0) {
|
|
16170
|
-
console.log(`
|
|
16171
|
-
Results written to: ${outputPath}`);
|
|
16172
16093
|
}
|
|
16173
|
-
|
|
16174
|
-
|
|
16175
|
-
|
|
16176
|
-
|
|
16177
|
-
|
|
16178
|
-
|
|
16179
|
-
|
|
16180
|
-
}
|
|
16181
|
-
async function resolveEvaluationRunner() {
|
|
16182
|
-
const overridePath = process.env.AGENTEVO_CLI_EVAL_RUNNER;
|
|
16183
|
-
if (!overridePath) {
|
|
16184
|
-
return runEvaluation;
|
|
16185
|
-
}
|
|
16186
|
-
const resolved = path13.isAbsolute(overridePath) ? overridePath : path13.resolve(process.cwd(), overridePath);
|
|
16187
|
-
const moduleUrl = pathToFileURL(resolved).href;
|
|
16188
|
-
const mod = await import(moduleUrl);
|
|
16189
|
-
const candidate = mod.runEvaluation;
|
|
16190
|
-
if (typeof candidate !== "function") {
|
|
16191
|
-
throw new Error(
|
|
16192
|
-
`Module '${resolved}' must export a 'runEvaluation' function to override the default implementation`
|
|
16193
|
-
);
|
|
16194
|
-
}
|
|
16195
|
-
return candidate;
|
|
16196
|
-
}
|
|
16197
|
-
|
|
16198
|
-
// src/commands/eval/index.ts
|
|
16199
|
-
function parseInteger(value, fallback) {
|
|
16200
|
-
const parsed = Number.parseInt(value, 10);
|
|
16201
|
-
if (Number.isNaN(parsed)) {
|
|
16202
|
-
return fallback;
|
|
16203
|
-
}
|
|
16204
|
-
return parsed;
|
|
16205
|
-
}
|
|
16206
|
-
function registerEvalCommand(program) {
|
|
16207
|
-
program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
|
|
16208
|
-
"--workers <count>",
|
|
16209
|
-
"Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
16210
|
-
(value) => parseInteger(value, 1)
|
|
16211
|
-
).option("--out <path>", "Write results to the specified path").option(
|
|
16212
|
-
"--output-format <format>",
|
|
16213
|
-
"Output format: 'jsonl' or 'yaml' (default: jsonl)",
|
|
16214
|
-
"jsonl"
|
|
16215
|
-
).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
|
|
16216
|
-
"--dry-run-delay <ms>",
|
|
16217
|
-
"Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
|
|
16218
|
-
(value) => parseInteger(value, 0),
|
|
16219
|
-
0
|
|
16220
|
-
).option(
|
|
16221
|
-
"--dry-run-delay-min <ms>",
|
|
16222
|
-
"Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
|
|
16223
|
-
(value) => parseInteger(value, 0),
|
|
16224
|
-
0
|
|
16225
|
-
).option(
|
|
16226
|
-
"--dry-run-delay-max <ms>",
|
|
16227
|
-
"Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
|
|
16228
|
-
(value) => parseInteger(value, 0),
|
|
16229
|
-
0
|
|
16230
|
-
).option(
|
|
16231
|
-
"--agent-timeout <seconds>",
|
|
16232
|
-
"Timeout in seconds for provider responses (default: 120)",
|
|
16233
|
-
(value) => parseInteger(value, 120),
|
|
16234
|
-
120
|
|
16235
|
-
).option(
|
|
16236
|
-
"--max-retries <count>",
|
|
16237
|
-
"Retry count for timeout recoveries (default: 2)",
|
|
16238
|
-
(value) => parseInteger(value, 2),
|
|
16239
|
-
2
|
|
16240
|
-
).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
|
|
16241
|
-
"--dump-prompts [dir]",
|
|
16242
|
-
"Persist prompt payloads for debugging (optional custom directory)"
|
|
16243
|
-
).action(async (evalPaths, rawOptions) => {
|
|
16244
|
-
const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
|
|
16245
|
-
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
16246
|
-
});
|
|
16247
|
-
return program;
|
|
16248
|
-
}
|
|
16249
|
-
async function resolveEvalPaths(evalPaths, cwd) {
|
|
16250
|
-
const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
|
|
16251
|
-
if (normalizedInputs.length === 0) {
|
|
16252
|
-
throw new Error("No eval paths provided.");
|
|
16253
|
-
}
|
|
16254
|
-
const unmatched = [];
|
|
16255
|
-
const results = /* @__PURE__ */ new Set();
|
|
16256
|
-
for (const pattern of normalizedInputs) {
|
|
16257
|
-
const candidatePath = path14.isAbsolute(pattern) ? path14.normalize(pattern) : path14.resolve(cwd, pattern);
|
|
16258
|
-
try {
|
|
16259
|
-
const stats = await stat3(candidatePath);
|
|
16260
|
-
if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
|
|
16261
|
-
results.add(candidatePath);
|
|
16262
|
-
continue;
|
|
16263
|
-
}
|
|
16264
|
-
} catch {
|
|
16265
|
-
}
|
|
16266
|
-
const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
|
|
16267
|
-
const matches = await fg(globPattern, {
|
|
16268
|
-
cwd,
|
|
16269
|
-
absolute: true,
|
|
16270
|
-
onlyFiles: true,
|
|
16271
|
-
unique: true,
|
|
16272
|
-
dot: true,
|
|
16273
|
-
followSymbolicLinks: true
|
|
16274
|
-
});
|
|
16275
|
-
const yamlMatches = matches.filter((filePath) => /\.ya?ml$/i.test(filePath));
|
|
16276
|
-
if (yamlMatches.length === 0) {
|
|
16277
|
-
unmatched.push(pattern);
|
|
16278
|
-
continue;
|
|
16279
|
-
}
|
|
16280
|
-
yamlMatches.forEach((filePath) => results.add(path14.normalize(filePath)));
|
|
16281
|
-
}
|
|
16282
|
-
if (unmatched.length > 0) {
|
|
16283
|
-
throw new Error(
|
|
16284
|
-
`No eval files matched: ${unmatched.join(
|
|
16285
|
-
", "
|
|
16286
|
-
)}. Provide YAML paths or globs (e.g., "evals/**/*.yaml").`
|
|
16287
|
-
);
|
|
16288
|
-
}
|
|
16289
|
-
const sorted = Array.from(results);
|
|
16290
|
-
sorted.sort();
|
|
16291
|
-
return sorted;
|
|
16292
|
-
}
|
|
16293
|
-
|
|
16294
|
-
// src/commands/init/index.ts
|
|
16295
|
-
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
16296
|
-
import path16 from "node:path";
|
|
16297
|
-
import * as readline from "node:readline/promises";
|
|
16298
|
-
|
|
16299
|
-
// src/templates/index.ts
|
|
16300
|
-
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
16301
|
-
import path15 from "node:path";
|
|
16302
|
-
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
16303
|
-
var TemplateManager = class {
|
|
16304
|
-
static getGithubTemplates() {
|
|
16305
|
-
return this.getTemplatesFromDir("github");
|
|
16306
|
-
}
|
|
16307
|
-
static getAgentvTemplates() {
|
|
16308
|
-
return this.getTemplatesFromDir("agentv");
|
|
16309
|
-
}
|
|
16310
|
-
static getTemplatesFromDir(subdir) {
|
|
16311
|
-
const currentDir = path15.dirname(fileURLToPath2(import.meta.url));
|
|
16312
|
-
let templatesDir;
|
|
16313
|
-
if (currentDir.includes(path15.sep + "dist")) {
|
|
16314
|
-
templatesDir = path15.join(currentDir, "templates", subdir);
|
|
16315
|
-
} else {
|
|
16316
|
-
templatesDir = path15.join(currentDir, subdir);
|
|
16094
|
+
const filesFormat = settings["files_format"] ?? settings["filesFormat"];
|
|
16095
|
+
if (filesFormat !== void 0 && typeof filesFormat !== "string") {
|
|
16096
|
+
errors2.push({
|
|
16097
|
+
severity: "error",
|
|
16098
|
+
filePath: absolutePath2,
|
|
16099
|
+
location: `${location}.filesFormat`,
|
|
16100
|
+
message: "'filesFormat' must be a string when provided"
|
|
16101
|
+
});
|
|
16317
16102
|
}
|
|
16318
|
-
|
|
16319
|
-
|
|
16320
|
-
|
|
16321
|
-
|
|
16322
|
-
|
|
16323
|
-
|
|
16324
|
-
|
|
16325
|
-
|
|
16326
|
-
|
|
16327
|
-
|
|
16328
|
-
|
|
16329
|
-
|
|
16330
|
-
|
|
16331
|
-
|
|
16332
|
-
|
|
16333
|
-
|
|
16334
|
-
|
|
16103
|
+
const cwd = settings["cwd"];
|
|
16104
|
+
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16105
|
+
errors2.push({
|
|
16106
|
+
severity: "error",
|
|
16107
|
+
filePath: absolutePath2,
|
|
16108
|
+
location: `${location}.cwd`,
|
|
16109
|
+
message: "'cwd' must be a string when provided"
|
|
16110
|
+
});
|
|
16111
|
+
}
|
|
16112
|
+
const timeoutSeconds = settings["timeout_seconds"] ?? settings["timeoutSeconds"];
|
|
16113
|
+
if (timeoutSeconds !== void 0) {
|
|
16114
|
+
const numericTimeout = Number(timeoutSeconds);
|
|
16115
|
+
if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
|
|
16116
|
+
errors2.push({
|
|
16117
|
+
severity: "error",
|
|
16118
|
+
filePath: absolutePath2,
|
|
16119
|
+
location: `${location}.timeoutSeconds`,
|
|
16120
|
+
message: "'timeoutSeconds' must be a positive number when provided"
|
|
16335
16121
|
});
|
|
16336
16122
|
}
|
|
16337
16123
|
}
|
|
16338
|
-
|
|
16339
|
-
|
|
16340
|
-
|
|
16341
|
-
|
|
16342
|
-
|
|
16343
|
-
|
|
16344
|
-
|
|
16345
|
-
|
|
16346
|
-
|
|
16347
|
-
|
|
16348
|
-
|
|
16349
|
-
|
|
16350
|
-
|
|
16351
|
-
|
|
16352
|
-
|
|
16353
|
-
|
|
16354
|
-
}
|
|
16355
|
-
|
|
16356
|
-
|
|
16357
|
-
|
|
16358
|
-
const agentvDir = path16.join(targetPath, ".agentv");
|
|
16359
|
-
const githubTemplates = TemplateManager.getGithubTemplates();
|
|
16360
|
-
const agentvTemplates = TemplateManager.getAgentvTemplates();
|
|
16361
|
-
const existingFiles = [];
|
|
16362
|
-
if (existsSync(githubDir)) {
|
|
16363
|
-
for (const template of githubTemplates) {
|
|
16364
|
-
const targetFilePath = path16.join(githubDir, template.path);
|
|
16365
|
-
if (existsSync(targetFilePath)) {
|
|
16366
|
-
existingFiles.push(path16.relative(targetPath, targetFilePath));
|
|
16124
|
+
const envOverrides = settings["env"];
|
|
16125
|
+
if (envOverrides !== void 0) {
|
|
16126
|
+
if (!isObject2(envOverrides)) {
|
|
16127
|
+
errors2.push({
|
|
16128
|
+
severity: "error",
|
|
16129
|
+
filePath: absolutePath2,
|
|
16130
|
+
location: `${location}.env`,
|
|
16131
|
+
message: "'env' must be an object with string values"
|
|
16132
|
+
});
|
|
16133
|
+
} else {
|
|
16134
|
+
for (const [key2, value] of Object.entries(envOverrides)) {
|
|
16135
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
16136
|
+
errors2.push({
|
|
16137
|
+
severity: "error",
|
|
16138
|
+
filePath: absolutePath2,
|
|
16139
|
+
location: `${location}.env.${key2}`,
|
|
16140
|
+
message: `Environment override '${key2}' must be a non-empty string`
|
|
16141
|
+
});
|
|
16142
|
+
}
|
|
16143
|
+
}
|
|
16367
16144
|
}
|
|
16368
16145
|
}
|
|
16369
|
-
|
|
16370
|
-
|
|
16371
|
-
|
|
16372
|
-
const targetFilePath = path16.join(agentvDir, template.path);
|
|
16373
|
-
if (existsSync(targetFilePath)) {
|
|
16374
|
-
existingFiles.push(path16.relative(targetPath, targetFilePath));
|
|
16375
|
-
}
|
|
16146
|
+
const healthcheck = settings["healthcheck"];
|
|
16147
|
+
if (healthcheck !== void 0) {
|
|
16148
|
+
validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
|
|
16376
16149
|
}
|
|
16377
16150
|
}
|
|
16378
|
-
|
|
16379
|
-
|
|
16380
|
-
|
|
16381
|
-
|
|
16382
|
-
|
|
16383
|
-
|
|
16384
|
-
|
|
16151
|
+
function validateCliHealthcheck(healthcheck, absolutePath2, location, errors2) {
|
|
16152
|
+
if (!isObject2(healthcheck)) {
|
|
16153
|
+
errors2.push({
|
|
16154
|
+
severity: "error",
|
|
16155
|
+
filePath: absolutePath2,
|
|
16156
|
+
location,
|
|
16157
|
+
message: "'healthcheck' must be an object when provided"
|
|
16158
|
+
});
|
|
16385
16159
|
return;
|
|
16386
16160
|
}
|
|
16387
|
-
|
|
16388
|
-
|
|
16389
|
-
|
|
16390
|
-
|
|
16391
|
-
|
|
16392
|
-
|
|
16393
|
-
|
|
16394
|
-
|
|
16395
|
-
|
|
16396
|
-
const targetFilePath = path16.join(githubDir, template.path);
|
|
16397
|
-
const targetDirPath = path16.dirname(targetFilePath);
|
|
16398
|
-
if (!existsSync(targetDirPath)) {
|
|
16399
|
-
mkdirSync(targetDirPath, { recursive: true });
|
|
16161
|
+
const type = healthcheck["type"];
|
|
16162
|
+
if (type !== "http" && type !== "command") {
|
|
16163
|
+
errors2.push({
|
|
16164
|
+
severity: "error",
|
|
16165
|
+
filePath: absolutePath2,
|
|
16166
|
+
location: `${location}.type`,
|
|
16167
|
+
message: "healthcheck.type must be either 'http' or 'command'"
|
|
16168
|
+
});
|
|
16169
|
+
return;
|
|
16400
16170
|
}
|
|
16401
|
-
|
|
16402
|
-
|
|
16403
|
-
|
|
16404
|
-
|
|
16405
|
-
|
|
16406
|
-
|
|
16407
|
-
|
|
16408
|
-
|
|
16171
|
+
const timeoutSeconds = healthcheck["timeout_seconds"] ?? healthcheck["timeoutSeconds"];
|
|
16172
|
+
if (timeoutSeconds !== void 0) {
|
|
16173
|
+
const numericTimeout = Number(timeoutSeconds);
|
|
16174
|
+
if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
|
|
16175
|
+
errors2.push({
|
|
16176
|
+
severity: "error",
|
|
16177
|
+
filePath: absolutePath2,
|
|
16178
|
+
location: `${location}.timeoutSeconds`,
|
|
16179
|
+
message: "healthcheck.timeoutSeconds must be a positive number when provided"
|
|
16180
|
+
});
|
|
16181
|
+
}
|
|
16409
16182
|
}
|
|
16410
|
-
|
|
16411
|
-
|
|
16412
|
-
|
|
16413
|
-
|
|
16414
|
-
|
|
16415
|
-
|
|
16416
|
-
|
|
16417
|
-
|
|
16418
|
-
|
|
16419
|
-
|
|
16420
|
-
|
|
16421
|
-
console.log(" 1. Edit .agentv/.env with your API credentials");
|
|
16422
|
-
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
16423
|
-
console.log(" 3. Create eval files using the schema and prompt templates");
|
|
16424
|
-
}
|
|
16425
|
-
|
|
16426
|
-
// src/commands/status.ts
|
|
16427
|
-
function registerStatusCommand(program) {
|
|
16428
|
-
program.command("status").description("Show the latest AgentV kernel status").action(() => {
|
|
16429
|
-
const kernel = createAgentKernel();
|
|
16430
|
-
console.log(`Kernel status: ${kernel.status}`);
|
|
16431
|
-
});
|
|
16432
|
-
return program;
|
|
16433
|
-
}
|
|
16434
|
-
|
|
16435
|
-
// src/commands/validate/format-output.ts
|
|
16436
|
-
var ANSI_RED = "\x1B[31m";
|
|
16437
|
-
var ANSI_YELLOW2 = "\x1B[33m";
|
|
16438
|
-
var ANSI_GREEN = "\x1B[32m";
|
|
16439
|
-
var ANSI_CYAN = "\x1B[36m";
|
|
16440
|
-
var ANSI_BOLD = "\x1B[1m";
|
|
16441
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
16442
|
-
function formatSummary(summary, useColors) {
|
|
16443
|
-
const lines = [];
|
|
16444
|
-
lines.push("");
|
|
16445
|
-
lines.push(formatHeader("Validation Summary", useColors));
|
|
16446
|
-
lines.push("");
|
|
16447
|
-
for (const result of summary.results) {
|
|
16448
|
-
lines.push(formatFileResult(result, useColors));
|
|
16449
|
-
}
|
|
16450
|
-
lines.push("");
|
|
16451
|
-
lines.push(formatStats(summary, useColors));
|
|
16452
|
-
lines.push("");
|
|
16453
|
-
return lines.join("\n");
|
|
16454
|
-
}
|
|
16455
|
-
function formatHeader(text, useColors) {
|
|
16456
|
-
if (useColors) {
|
|
16457
|
-
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
16458
|
-
}
|
|
16459
|
-
return text;
|
|
16460
|
-
}
|
|
16461
|
-
function formatFileResult(result, useColors) {
|
|
16462
|
-
const lines = [];
|
|
16463
|
-
const status = result.valid ? "\u2713" : "\u2717";
|
|
16464
|
-
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
16465
|
-
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
16466
|
-
const fileName = result.filePath;
|
|
16467
|
-
lines.push(`${statusText} ${fileName}`);
|
|
16468
|
-
if (result.errors.length > 0) {
|
|
16469
|
-
for (const error of result.errors) {
|
|
16470
|
-
lines.push(formatError(error, useColors));
|
|
16183
|
+
if (type === "http") {
|
|
16184
|
+
const url = healthcheck["url"];
|
|
16185
|
+
if (typeof url !== "string" || url.trim().length === 0) {
|
|
16186
|
+
errors2.push({
|
|
16187
|
+
severity: "error",
|
|
16188
|
+
filePath: absolutePath2,
|
|
16189
|
+
location: `${location}.url`,
|
|
16190
|
+
message: "healthcheck.url must be a non-empty string for http checks"
|
|
16191
|
+
});
|
|
16192
|
+
}
|
|
16193
|
+
return;
|
|
16471
16194
|
}
|
|
16472
|
-
|
|
16473
|
-
|
|
16474
|
-
|
|
16475
|
-
|
|
16476
|
-
|
|
16477
|
-
|
|
16478
|
-
|
|
16479
|
-
|
|
16480
|
-
return `${coloredPrefix}${location} ${error.message}`;
|
|
16481
|
-
}
|
|
16482
|
-
function formatStats(summary, useColors) {
|
|
16483
|
-
const lines = [];
|
|
16484
|
-
const totalText = `Total files: ${summary.totalFiles}`;
|
|
16485
|
-
const validText = `Valid: ${summary.validFiles}`;
|
|
16486
|
-
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
16487
|
-
if (useColors) {
|
|
16488
|
-
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
16489
|
-
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
16490
|
-
if (summary.invalidFiles > 0) {
|
|
16491
|
-
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
16195
|
+
const commandTemplate = healthcheck["command_template"] ?? healthcheck["commandTemplate"];
|
|
16196
|
+
if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
|
|
16197
|
+
errors2.push({
|
|
16198
|
+
severity: "error",
|
|
16199
|
+
filePath: absolutePath2,
|
|
16200
|
+
location: `${location}.commandTemplate`,
|
|
16201
|
+
message: "healthcheck.commandTemplate must be a non-empty string for command checks"
|
|
16202
|
+
});
|
|
16492
16203
|
} else {
|
|
16493
|
-
|
|
16494
|
-
}
|
|
16495
|
-
} else {
|
|
16496
|
-
lines.push(totalText);
|
|
16497
|
-
lines.push(validText);
|
|
16498
|
-
lines.push(invalidText);
|
|
16499
|
-
}
|
|
16500
|
-
return lines.join("\n");
|
|
16501
|
-
}
|
|
16502
|
-
function isTTY() {
|
|
16503
|
-
return process.stdout.isTTY ?? false;
|
|
16504
|
-
}
|
|
16505
|
-
|
|
16506
|
-
// ../../packages/core/dist/evaluation/validation/index.js
|
|
16507
|
-
import { readFile as readFile5 } from "node:fs/promises";
|
|
16508
|
-
import { parse as parse5 } from "yaml";
|
|
16509
|
-
import { readFile as readFile23 } from "node:fs/promises";
|
|
16510
|
-
import path17 from "node:path";
|
|
16511
|
-
import { parse as parse23 } from "yaml";
|
|
16512
|
-
import { readFile as readFile33 } from "node:fs/promises";
|
|
16513
|
-
import path23 from "node:path";
|
|
16514
|
-
import { parse as parse32 } from "yaml";
|
|
16515
|
-
import { readFile as readFile42 } from "node:fs/promises";
|
|
16516
|
-
import { parse as parse42 } from "yaml";
|
|
16517
|
-
import { readFile as readFile52 } from "node:fs/promises";
|
|
16518
|
-
import path33 from "node:path";
|
|
16519
|
-
import { parse as parse52 } from "yaml";
|
|
16520
|
-
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
16521
|
-
var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
|
|
16522
|
-
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
16523
|
-
async function detectFileType(filePath) {
|
|
16524
|
-
try {
|
|
16525
|
-
const content = await readFile5(filePath, "utf8");
|
|
16526
|
-
const parsed = parse5(content);
|
|
16527
|
-
if (typeof parsed !== "object" || parsed === null) {
|
|
16528
|
-
return "unknown";
|
|
16204
|
+
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16529
16205
|
}
|
|
16530
|
-
const
|
|
16531
|
-
|
|
16532
|
-
|
|
16533
|
-
|
|
16206
|
+
const cwd = healthcheck["cwd"];
|
|
16207
|
+
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16208
|
+
errors2.push({
|
|
16209
|
+
severity: "error",
|
|
16210
|
+
filePath: absolutePath2,
|
|
16211
|
+
location: `${location}.cwd`,
|
|
16212
|
+
message: "healthcheck.cwd must be a string when provided"
|
|
16213
|
+
});
|
|
16534
16214
|
}
|
|
16535
|
-
|
|
16536
|
-
|
|
16537
|
-
|
|
16538
|
-
|
|
16539
|
-
|
|
16540
|
-
|
|
16541
|
-
|
|
16542
|
-
|
|
16543
|
-
|
|
16215
|
+
}
|
|
16216
|
+
function recordUnknownPlaceholders(template, absolutePath2, location, errors2) {
|
|
16217
|
+
const placeholders = extractPlaceholders(template);
|
|
16218
|
+
for (const placeholder of placeholders) {
|
|
16219
|
+
if (!CLI_PLACEHOLDERS2.has(placeholder)) {
|
|
16220
|
+
errors2.push({
|
|
16221
|
+
severity: "error",
|
|
16222
|
+
filePath: absolutePath2,
|
|
16223
|
+
location,
|
|
16224
|
+
message: `Unknown CLI placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS2).join(", ")}`
|
|
16225
|
+
});
|
|
16226
|
+
}
|
|
16544
16227
|
}
|
|
16545
|
-
} catch {
|
|
16546
|
-
return "unknown";
|
|
16547
16228
|
}
|
|
16548
|
-
|
|
16549
|
-
|
|
16550
|
-
|
|
16551
|
-
|
|
16552
|
-
|
|
16553
|
-
|
|
16554
|
-
|
|
16555
|
-
|
|
16556
|
-
|
|
16557
|
-
|
|
16558
|
-
const content = await readFile23(absolutePath, "utf8");
|
|
16559
|
-
parsed = parse23(content);
|
|
16560
|
-
} catch (error) {
|
|
16561
|
-
errors.push({
|
|
16562
|
-
severity: "error",
|
|
16563
|
-
filePath: absolutePath,
|
|
16564
|
-
message: `Failed to parse YAML: ${error.message}`
|
|
16565
|
-
});
|
|
16566
|
-
return {
|
|
16567
|
-
valid: false,
|
|
16568
|
-
filePath: absolutePath,
|
|
16569
|
-
fileType: "eval",
|
|
16570
|
-
errors
|
|
16571
|
-
};
|
|
16229
|
+
function extractPlaceholders(template) {
|
|
16230
|
+
const matches = template.matchAll(/\{([A-Z_]+)\}/g);
|
|
16231
|
+
const result = [];
|
|
16232
|
+
for (const match of matches) {
|
|
16233
|
+
const placeholder = match[1];
|
|
16234
|
+
if (placeholder) {
|
|
16235
|
+
result.push(placeholder);
|
|
16236
|
+
}
|
|
16237
|
+
}
|
|
16238
|
+
return result;
|
|
16572
16239
|
}
|
|
16573
|
-
if (!
|
|
16240
|
+
if (!isObject2(parsed)) {
|
|
16574
16241
|
errors.push({
|
|
16575
16242
|
severity: "error",
|
|
16576
16243
|
filePath: absolutePath,
|
|
@@ -16579,13 +16246,13 @@ async function validateEvalFile(filePath) {
|
|
|
16579
16246
|
return {
|
|
16580
16247
|
valid: false,
|
|
16581
16248
|
filePath: absolutePath,
|
|
16582
|
-
fileType: "
|
|
16249
|
+
fileType: "targets",
|
|
16583
16250
|
errors
|
|
16584
16251
|
};
|
|
16585
16252
|
}
|
|
16586
16253
|
const schema = parsed["$schema"];
|
|
16587
|
-
if (schema !==
|
|
16588
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${
|
|
16254
|
+
if (schema !== TARGETS_SCHEMA_V2) {
|
|
16255
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
|
|
16589
16256
|
errors.push({
|
|
16590
16257
|
severity: "error",
|
|
16591
16258
|
filePath: absolutePath,
|
|
@@ -16593,639 +16260,1124 @@ async function validateEvalFile(filePath) {
|
|
|
16593
16260
|
message
|
|
16594
16261
|
});
|
|
16595
16262
|
}
|
|
16596
|
-
const
|
|
16597
|
-
if (!Array.isArray(
|
|
16263
|
+
const targets = parsed["targets"];
|
|
16264
|
+
if (!Array.isArray(targets)) {
|
|
16598
16265
|
errors.push({
|
|
16599
16266
|
severity: "error",
|
|
16600
16267
|
filePath: absolutePath,
|
|
16601
|
-
location: "
|
|
16602
|
-
message: "Missing or invalid '
|
|
16268
|
+
location: "targets",
|
|
16269
|
+
message: "Missing or invalid 'targets' field (must be an array)"
|
|
16603
16270
|
});
|
|
16604
16271
|
return {
|
|
16605
16272
|
valid: errors.length === 0,
|
|
16606
16273
|
filePath: absolutePath,
|
|
16607
|
-
fileType: "
|
|
16274
|
+
fileType: "targets",
|
|
16608
16275
|
errors
|
|
16609
16276
|
};
|
|
16610
16277
|
}
|
|
16611
|
-
|
|
16612
|
-
|
|
16613
|
-
const
|
|
16614
|
-
|
|
16278
|
+
const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
|
|
16279
|
+
for (let i6 = 0; i6 < targets.length; i6++) {
|
|
16280
|
+
const target = targets[i6];
|
|
16281
|
+
const location = `targets[${i6}]`;
|
|
16282
|
+
if (!isObject2(target)) {
|
|
16615
16283
|
errors.push({
|
|
16616
16284
|
severity: "error",
|
|
16617
16285
|
filePath: absolutePath,
|
|
16618
16286
|
location,
|
|
16619
|
-
message: "
|
|
16287
|
+
message: "Target must be an object"
|
|
16620
16288
|
});
|
|
16621
16289
|
continue;
|
|
16622
16290
|
}
|
|
16623
|
-
const
|
|
16624
|
-
if (typeof
|
|
16291
|
+
const name = target["name"];
|
|
16292
|
+
if (typeof name !== "string" || name.trim().length === 0) {
|
|
16625
16293
|
errors.push({
|
|
16626
16294
|
severity: "error",
|
|
16627
16295
|
filePath: absolutePath,
|
|
16628
|
-
location: `${location}.
|
|
16629
|
-
message: "Missing or invalid '
|
|
16296
|
+
location: `${location}.name`,
|
|
16297
|
+
message: "Missing or invalid 'name' field (must be a non-empty string)"
|
|
16630
16298
|
});
|
|
16631
16299
|
}
|
|
16632
|
-
const
|
|
16633
|
-
|
|
16300
|
+
const provider = target["provider"];
|
|
16301
|
+
const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
|
|
16302
|
+
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
16634
16303
|
errors.push({
|
|
16635
16304
|
severity: "error",
|
|
16636
16305
|
filePath: absolutePath,
|
|
16637
|
-
location: `${location}.
|
|
16638
|
-
message: "Missing or invalid '
|
|
16306
|
+
location: `${location}.provider`,
|
|
16307
|
+
message: "Missing or invalid 'provider' field (must be a non-empty string)"
|
|
16308
|
+
});
|
|
16309
|
+
} else if (!knownProviders.includes(provider)) {
|
|
16310
|
+
errors.push({
|
|
16311
|
+
severity: "warning",
|
|
16312
|
+
filePath: absolutePath,
|
|
16313
|
+
location: `${location}.provider`,
|
|
16314
|
+
message: `Unknown provider '${provider}'. Known providers: ${knownProviders.join(", ")}`
|
|
16315
|
+
});
|
|
16316
|
+
}
|
|
16317
|
+
const settings = target["settings"];
|
|
16318
|
+
if (providerValue !== "cli" && settings !== void 0 && !isObject2(settings)) {
|
|
16319
|
+
errors.push({
|
|
16320
|
+
severity: "error",
|
|
16321
|
+
filePath: absolutePath,
|
|
16322
|
+
location: `${location}.settings`,
|
|
16323
|
+
message: "Invalid 'settings' field (must be an object)"
|
|
16324
|
+
});
|
|
16325
|
+
}
|
|
16326
|
+
if (providerValue === "cli") {
|
|
16327
|
+
validateCliSettings(settings, absolutePath, `${location}.settings`, errors);
|
|
16328
|
+
}
|
|
16329
|
+
if (settings !== void 0 && isObject2(settings) && typeof provider === "string") {
|
|
16330
|
+
validateUnknownSettings(settings, provider, absolutePath, `${location}.settings`, errors);
|
|
16331
|
+
}
|
|
16332
|
+
const judgeTarget = target["judge_target"];
|
|
16333
|
+
if (judgeTarget !== void 0 && typeof judgeTarget !== "string") {
|
|
16334
|
+
errors.push({
|
|
16335
|
+
severity: "error",
|
|
16336
|
+
filePath: absolutePath,
|
|
16337
|
+
location: `${location}.judge_target`,
|
|
16338
|
+
message: "Invalid 'judge_target' field (must be a string)"
|
|
16339
|
+
});
|
|
16340
|
+
}
|
|
16341
|
+
}
|
|
16342
|
+
return {
|
|
16343
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
16344
|
+
filePath: absolutePath,
|
|
16345
|
+
fileType: "targets",
|
|
16346
|
+
errors
|
|
16347
|
+
};
|
|
16348
|
+
}
|
|
16349
|
+
var SCHEMA_CONFIG_V222 = "agentv-config-v2";
|
|
16350
|
+
async function validateConfigFile(filePath) {
|
|
16351
|
+
const errors = [];
|
|
16352
|
+
try {
|
|
16353
|
+
const content = await readFile42(filePath, "utf8");
|
|
16354
|
+
const parsed = parse42(content);
|
|
16355
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
16356
|
+
errors.push({
|
|
16357
|
+
severity: "error",
|
|
16358
|
+
filePath,
|
|
16359
|
+
message: "Config file must contain a valid YAML object"
|
|
16360
|
+
});
|
|
16361
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
16362
|
+
}
|
|
16363
|
+
const config = parsed;
|
|
16364
|
+
const schema = config["$schema"];
|
|
16365
|
+
if (schema !== SCHEMA_CONFIG_V222) {
|
|
16366
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
|
|
16367
|
+
errors.push({
|
|
16368
|
+
severity: "error",
|
|
16369
|
+
filePath,
|
|
16370
|
+
location: "$schema",
|
|
16371
|
+
message
|
|
16372
|
+
});
|
|
16373
|
+
}
|
|
16374
|
+
const guidelinePatterns = config["guideline_patterns"];
|
|
16375
|
+
if (guidelinePatterns !== void 0) {
|
|
16376
|
+
if (!Array.isArray(guidelinePatterns)) {
|
|
16377
|
+
errors.push({
|
|
16378
|
+
severity: "error",
|
|
16379
|
+
filePath,
|
|
16380
|
+
location: "guideline_patterns",
|
|
16381
|
+
message: "Field 'guideline_patterns' must be an array"
|
|
16382
|
+
});
|
|
16383
|
+
} else if (!guidelinePatterns.every((p) => typeof p === "string")) {
|
|
16384
|
+
errors.push({
|
|
16385
|
+
severity: "error",
|
|
16386
|
+
filePath,
|
|
16387
|
+
location: "guideline_patterns",
|
|
16388
|
+
message: "All entries in 'guideline_patterns' must be strings"
|
|
16389
|
+
});
|
|
16390
|
+
} else if (guidelinePatterns.length === 0) {
|
|
16391
|
+
errors.push({
|
|
16392
|
+
severity: "warning",
|
|
16393
|
+
filePath,
|
|
16394
|
+
location: "guideline_patterns",
|
|
16395
|
+
message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
|
|
16396
|
+
});
|
|
16397
|
+
}
|
|
16398
|
+
}
|
|
16399
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
|
|
16400
|
+
const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
|
|
16401
|
+
if (unexpectedFields.length > 0) {
|
|
16402
|
+
errors.push({
|
|
16403
|
+
severity: "warning",
|
|
16404
|
+
filePath,
|
|
16405
|
+
message: `Unexpected fields: ${unexpectedFields.join(", ")}`
|
|
16639
16406
|
});
|
|
16640
16407
|
}
|
|
16408
|
+
return {
|
|
16409
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
16410
|
+
filePath,
|
|
16411
|
+
fileType: "config",
|
|
16412
|
+
errors
|
|
16413
|
+
};
|
|
16414
|
+
} catch (error) {
|
|
16415
|
+
errors.push({
|
|
16416
|
+
severity: "error",
|
|
16417
|
+
filePath,
|
|
16418
|
+
message: `Failed to parse config file: ${error.message}`
|
|
16419
|
+
});
|
|
16420
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
16421
|
+
}
|
|
16422
|
+
}
|
|
16423
|
+
function isObject3(value) {
|
|
16424
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
16425
|
+
}
|
|
16426
|
+
async function validateFileReferences(evalFilePath) {
|
|
16427
|
+
const errors = [];
|
|
16428
|
+
const absolutePath = path33.resolve(evalFilePath);
|
|
16429
|
+
const gitRoot = await findGitRoot(absolutePath);
|
|
16430
|
+
if (!gitRoot) {
|
|
16431
|
+
errors.push({
|
|
16432
|
+
severity: "error",
|
|
16433
|
+
filePath: absolutePath,
|
|
16434
|
+
message: "Cannot validate file references: git repository root not found"
|
|
16435
|
+
});
|
|
16436
|
+
return errors;
|
|
16437
|
+
}
|
|
16438
|
+
const searchRoots = buildSearchRoots(absolutePath, gitRoot);
|
|
16439
|
+
let parsed;
|
|
16440
|
+
try {
|
|
16441
|
+
const content = await readFile5(absolutePath, "utf8");
|
|
16442
|
+
parsed = parse5(content);
|
|
16443
|
+
} catch {
|
|
16444
|
+
return errors;
|
|
16445
|
+
}
|
|
16446
|
+
if (!isObject3(parsed)) {
|
|
16447
|
+
return errors;
|
|
16448
|
+
}
|
|
16449
|
+
const evalcases = parsed["evalcases"];
|
|
16450
|
+
if (!Array.isArray(evalcases)) {
|
|
16451
|
+
return errors;
|
|
16452
|
+
}
|
|
16453
|
+
for (let i6 = 0; i6 < evalcases.length; i6++) {
|
|
16454
|
+
const evalCase = evalcases[i6];
|
|
16455
|
+
if (!isObject3(evalCase)) {
|
|
16456
|
+
continue;
|
|
16457
|
+
}
|
|
16641
16458
|
const inputMessages = evalCase["input_messages"];
|
|
16642
|
-
if (
|
|
16643
|
-
|
|
16644
|
-
severity: "error",
|
|
16645
|
-
filePath: absolutePath,
|
|
16646
|
-
location: `${location}.input_messages`,
|
|
16647
|
-
message: "Missing or invalid 'input_messages' field (must be an array)"
|
|
16648
|
-
});
|
|
16649
|
-
} else {
|
|
16650
|
-
validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
|
|
16459
|
+
if (Array.isArray(inputMessages)) {
|
|
16460
|
+
await validateMessagesFileRefs(inputMessages, `evalcases[${i6}].input_messages`, searchRoots, absolutePath, errors);
|
|
16651
16461
|
}
|
|
16652
16462
|
const expectedMessages = evalCase["expected_messages"];
|
|
16653
|
-
if (
|
|
16654
|
-
|
|
16655
|
-
severity: "error",
|
|
16656
|
-
filePath: absolutePath,
|
|
16657
|
-
location: `${location}.expected_messages`,
|
|
16658
|
-
message: "Missing or invalid 'expected_messages' field (must be an array)"
|
|
16659
|
-
});
|
|
16660
|
-
} else {
|
|
16661
|
-
validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
|
|
16463
|
+
if (Array.isArray(expectedMessages)) {
|
|
16464
|
+
await validateMessagesFileRefs(expectedMessages, `evalcases[${i6}].expected_messages`, searchRoots, absolutePath, errors);
|
|
16662
16465
|
}
|
|
16663
16466
|
}
|
|
16664
|
-
return
|
|
16665
|
-
valid: errors.length === 0,
|
|
16666
|
-
filePath: absolutePath,
|
|
16667
|
-
fileType: "eval",
|
|
16668
|
-
errors
|
|
16669
|
-
};
|
|
16467
|
+
return errors;
|
|
16670
16468
|
}
|
|
16671
|
-
function
|
|
16469
|
+
async function validateMessagesFileRefs(messages, location, searchRoots, filePath, errors) {
|
|
16672
16470
|
for (let i6 = 0; i6 < messages.length; i6++) {
|
|
16673
16471
|
const message = messages[i6];
|
|
16674
|
-
|
|
16675
|
-
if (!isObject(message)) {
|
|
16676
|
-
errors.push({
|
|
16677
|
-
severity: "error",
|
|
16678
|
-
filePath,
|
|
16679
|
-
location: msgLocation,
|
|
16680
|
-
message: "Message must be an object"
|
|
16681
|
-
});
|
|
16472
|
+
if (!isObject3(message)) {
|
|
16682
16473
|
continue;
|
|
16683
16474
|
}
|
|
16684
|
-
const role = message["role"];
|
|
16685
|
-
const validRoles = ["system", "user", "assistant"];
|
|
16686
|
-
if (!validRoles.includes(role)) {
|
|
16687
|
-
errors.push({
|
|
16688
|
-
severity: "error",
|
|
16689
|
-
filePath,
|
|
16690
|
-
location: `${msgLocation}.role`,
|
|
16691
|
-
message: `Invalid role '${role}'. Must be one of: ${validRoles.join(", ")}`
|
|
16692
|
-
});
|
|
16693
|
-
}
|
|
16694
16475
|
const content = message["content"];
|
|
16695
16476
|
if (typeof content === "string") {
|
|
16696
|
-
|
|
16697
|
-
|
|
16698
|
-
|
|
16699
|
-
|
|
16700
|
-
|
|
16701
|
-
|
|
16702
|
-
|
|
16703
|
-
|
|
16477
|
+
continue;
|
|
16478
|
+
}
|
|
16479
|
+
if (!Array.isArray(content)) {
|
|
16480
|
+
continue;
|
|
16481
|
+
}
|
|
16482
|
+
for (let j2 = 0; j2 < content.length; j2++) {
|
|
16483
|
+
const contentItem = content[j2];
|
|
16484
|
+
if (!isObject3(contentItem)) {
|
|
16485
|
+
continue;
|
|
16486
|
+
}
|
|
16487
|
+
const type = contentItem["type"];
|
|
16488
|
+
if (type !== "file") {
|
|
16489
|
+
continue;
|
|
16490
|
+
}
|
|
16491
|
+
const value = contentItem["value"];
|
|
16492
|
+
if (typeof value !== "string") {
|
|
16493
|
+
errors.push({
|
|
16494
|
+
severity: "error",
|
|
16495
|
+
filePath,
|
|
16496
|
+
location: `${location}[${i6}].content[${j2}].value`,
|
|
16497
|
+
message: "File reference must have a 'value' field with the file path"
|
|
16498
|
+
});
|
|
16499
|
+
continue;
|
|
16500
|
+
}
|
|
16501
|
+
const { resolvedPath } = await resolveFileReference(value, searchRoots);
|
|
16502
|
+
if (!resolvedPath) {
|
|
16503
|
+
errors.push({
|
|
16504
|
+
severity: "error",
|
|
16505
|
+
filePath,
|
|
16506
|
+
location: `${location}[${i6}].content[${j2}]`,
|
|
16507
|
+
message: `Referenced file not found: ${value}`
|
|
16508
|
+
});
|
|
16509
|
+
} else {
|
|
16510
|
+
try {
|
|
16511
|
+
const fileContent = await readFile5(resolvedPath, "utf8");
|
|
16512
|
+
if (fileContent.trim().length === 0) {
|
|
16704
16513
|
errors.push({
|
|
16705
|
-
severity: "
|
|
16514
|
+
severity: "warning",
|
|
16706
16515
|
filePath,
|
|
16707
|
-
location: `${
|
|
16708
|
-
message:
|
|
16516
|
+
location: `${location}[${i6}].content[${j2}]`,
|
|
16517
|
+
message: `Referenced file is empty: ${value}`
|
|
16709
16518
|
});
|
|
16710
16519
|
}
|
|
16711
|
-
|
|
16712
|
-
const value = contentItem["value"];
|
|
16713
|
-
if (typeof value !== "string") {
|
|
16714
|
-
errors.push({
|
|
16715
|
-
severity: "error",
|
|
16716
|
-
filePath,
|
|
16717
|
-
location: `${contentLocation}.value`,
|
|
16718
|
-
message: "Content with type 'text' must have a 'value' field"
|
|
16719
|
-
});
|
|
16720
|
-
}
|
|
16721
|
-
}
|
|
16722
|
-
} else {
|
|
16520
|
+
} catch (error) {
|
|
16723
16521
|
errors.push({
|
|
16724
16522
|
severity: "error",
|
|
16725
16523
|
filePath,
|
|
16726
|
-
location:
|
|
16727
|
-
message:
|
|
16524
|
+
location: `${location}[${i6}].content[${j2}]`,
|
|
16525
|
+
message: `Cannot read referenced file: ${value} (${error.message})`
|
|
16728
16526
|
});
|
|
16729
16527
|
}
|
|
16730
16528
|
}
|
|
16731
|
-
} else {
|
|
16732
|
-
errors.push({
|
|
16733
|
-
severity: "error",
|
|
16734
|
-
filePath,
|
|
16735
|
-
location: `${msgLocation}.content`,
|
|
16736
|
-
message: "Missing or invalid 'content' field (must be a string or array)"
|
|
16737
|
-
});
|
|
16738
16529
|
}
|
|
16739
16530
|
}
|
|
16740
16531
|
}
|
|
16741
|
-
|
|
16742
|
-
|
|
16532
|
+
|
|
16533
|
+
// src/commands/eval/targets.ts
|
|
16534
|
+
import { constants as constants5 } from "node:fs";
|
|
16535
|
+
import { access as access5, readFile as readFile6 } from "node:fs/promises";
|
|
16536
|
+
import path13 from "node:path";
|
|
16537
|
+
import { parse as parse6 } from "yaml";
|
|
16538
|
+
var TARGET_FILE_CANDIDATES = [
|
|
16539
|
+
"targets.yaml",
|
|
16540
|
+
"targets.yml",
|
|
16541
|
+
path13.join(".agentv", "targets.yaml"),
|
|
16542
|
+
path13.join(".agentv", "targets.yml")
|
|
16543
|
+
];
|
|
16544
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
16545
|
+
var ANSI_RED = "\x1B[31m";
|
|
16546
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
16547
|
+
function isTTY() {
|
|
16548
|
+
return process.stdout.isTTY ?? false;
|
|
16549
|
+
}
|
|
16550
|
+
async function fileExists5(filePath) {
|
|
16551
|
+
try {
|
|
16552
|
+
await access5(filePath, constants5.F_OK);
|
|
16553
|
+
return true;
|
|
16554
|
+
} catch {
|
|
16555
|
+
return false;
|
|
16556
|
+
}
|
|
16557
|
+
}
|
|
16558
|
+
async function readTestSuiteTarget(testFilePath) {
|
|
16559
|
+
try {
|
|
16560
|
+
const raw = await readFile6(path13.resolve(testFilePath), "utf8");
|
|
16561
|
+
const parsed = parse6(raw);
|
|
16562
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
16563
|
+
const targetValue = parsed.target;
|
|
16564
|
+
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
16565
|
+
return targetValue.trim();
|
|
16566
|
+
}
|
|
16567
|
+
}
|
|
16568
|
+
} catch {
|
|
16569
|
+
}
|
|
16570
|
+
return void 0;
|
|
16571
|
+
}
|
|
16572
|
+
async function discoverTargetsFile(options) {
|
|
16573
|
+
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
16574
|
+
if (explicitPath) {
|
|
16575
|
+
const resolvedExplicit = path13.resolve(explicitPath);
|
|
16576
|
+
if (await fileExists5(resolvedExplicit)) {
|
|
16577
|
+
return resolvedExplicit;
|
|
16578
|
+
}
|
|
16579
|
+
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16580
|
+
const nested = path13.join(resolvedExplicit, candidate);
|
|
16581
|
+
if (await fileExists5(nested)) {
|
|
16582
|
+
return nested;
|
|
16583
|
+
}
|
|
16584
|
+
}
|
|
16585
|
+
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
16586
|
+
}
|
|
16587
|
+
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
16588
|
+
const resolvedCwd = path13.resolve(cwd);
|
|
16589
|
+
if (!directories.includes(resolvedCwd)) {
|
|
16590
|
+
directories.push(resolvedCwd);
|
|
16591
|
+
}
|
|
16592
|
+
for (const directory of directories) {
|
|
16593
|
+
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16594
|
+
const fullPath = path13.join(directory, candidate);
|
|
16595
|
+
if (await fileExists5(fullPath)) {
|
|
16596
|
+
return fullPath;
|
|
16597
|
+
}
|
|
16598
|
+
}
|
|
16599
|
+
}
|
|
16600
|
+
throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
|
|
16601
|
+
}
|
|
16602
|
+
function pickTargetName(options) {
|
|
16603
|
+
const cliName = options.cliTargetName?.trim();
|
|
16604
|
+
if (cliName && cliName !== "default") {
|
|
16605
|
+
return { name: cliName, source: "cli" };
|
|
16606
|
+
}
|
|
16607
|
+
const fileName = options.fileTargetName?.trim();
|
|
16608
|
+
if (fileName && fileName.length > 0) {
|
|
16609
|
+
return { name: fileName, source: "test-file" };
|
|
16610
|
+
}
|
|
16611
|
+
return { name: "default", source: "default" };
|
|
16612
|
+
}
|
|
16613
|
+
async function selectTarget(options) {
|
|
16614
|
+
const { testFilePath, repoRoot, cwd, explicitTargetsPath, cliTargetName, dryRun, dryRunDelay, dryRunDelayMin, dryRunDelayMax, env } = options;
|
|
16615
|
+
const targetsFilePath = await discoverTargetsFile({
|
|
16616
|
+
explicitPath: explicitTargetsPath,
|
|
16617
|
+
testFilePath,
|
|
16618
|
+
repoRoot,
|
|
16619
|
+
cwd
|
|
16620
|
+
});
|
|
16621
|
+
const validationResult = await validateTargetsFile(targetsFilePath);
|
|
16622
|
+
const warnings = validationResult.errors.filter((e) => e.severity === "warning");
|
|
16623
|
+
const useColors = isTTY();
|
|
16624
|
+
if (warnings.length > 0) {
|
|
16625
|
+
console.warn(`
|
|
16626
|
+
Warnings in ${targetsFilePath}:`);
|
|
16627
|
+
for (const warning of warnings) {
|
|
16628
|
+
const location = warning.location ? ` [${warning.location}]` : "";
|
|
16629
|
+
const prefix = useColors ? `${ANSI_YELLOW2} \u26A0${ANSI_RESET2}` : " \u26A0";
|
|
16630
|
+
const message = useColors ? `${ANSI_YELLOW2}${warning.message}${ANSI_RESET2}` : warning.message;
|
|
16631
|
+
console.warn(`${prefix}${location} ${message}`);
|
|
16632
|
+
}
|
|
16633
|
+
console.warn("");
|
|
16634
|
+
}
|
|
16635
|
+
const errors = validationResult.errors.filter((e) => e.severity === "error");
|
|
16636
|
+
if (errors.length > 0) {
|
|
16637
|
+
console.error(`
|
|
16638
|
+
Errors in ${targetsFilePath}:`);
|
|
16639
|
+
for (const error of errors) {
|
|
16640
|
+
const location = error.location ? ` [${error.location}]` : "";
|
|
16641
|
+
const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET2}` : " \u2717";
|
|
16642
|
+
const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET2}` : error.message;
|
|
16643
|
+
console.error(`${prefix}${location} ${message}`);
|
|
16644
|
+
}
|
|
16645
|
+
throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
|
|
16646
|
+
}
|
|
16647
|
+
const definitions = await readTargetDefinitions(targetsFilePath);
|
|
16648
|
+
const fileTargetName = await readTestSuiteTarget(testFilePath);
|
|
16649
|
+
const targetChoice = pickTargetName({ cliTargetName, fileTargetName });
|
|
16650
|
+
const targetDefinition = definitions.find((definition) => definition.name === targetChoice.name);
|
|
16651
|
+
if (!targetDefinition) {
|
|
16652
|
+
const available = listTargetNames(definitions).join(", ");
|
|
16653
|
+
throw new Error(
|
|
16654
|
+
`Target '${targetChoice.name}' not found in ${targetsFilePath}. Available targets: ${available}`
|
|
16655
|
+
);
|
|
16656
|
+
}
|
|
16657
|
+
if (dryRun) {
|
|
16658
|
+
const mockTarget = {
|
|
16659
|
+
kind: "mock",
|
|
16660
|
+
name: `${targetDefinition.name}-dry-run`,
|
|
16661
|
+
judgeTarget: void 0,
|
|
16662
|
+
config: {
|
|
16663
|
+
response: '{"answer":"Mock dry-run response"}',
|
|
16664
|
+
delayMs: dryRunDelay,
|
|
16665
|
+
delayMinMs: dryRunDelayMin,
|
|
16666
|
+
delayMaxMs: dryRunDelayMax
|
|
16667
|
+
}
|
|
16668
|
+
};
|
|
16669
|
+
return {
|
|
16670
|
+
definitions,
|
|
16671
|
+
resolvedTarget: mockTarget,
|
|
16672
|
+
targetName: targetChoice.name,
|
|
16673
|
+
targetSource: targetChoice.source,
|
|
16674
|
+
targetsFilePath
|
|
16675
|
+
};
|
|
16676
|
+
}
|
|
16677
|
+
try {
|
|
16678
|
+
const resolvedTarget = resolveTargetDefinition(targetDefinition, env);
|
|
16679
|
+
return {
|
|
16680
|
+
definitions,
|
|
16681
|
+
resolvedTarget,
|
|
16682
|
+
targetName: targetChoice.name,
|
|
16683
|
+
targetSource: targetChoice.source,
|
|
16684
|
+
targetsFilePath
|
|
16685
|
+
};
|
|
16686
|
+
} catch (error) {
|
|
16687
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
16688
|
+
throw new Error(`Failed to resolve target '${targetChoice.name}': ${message}`);
|
|
16689
|
+
}
|
|
16690
|
+
}
|
|
16691
|
+
|
|
16692
|
+
// src/commands/eval/run-eval.ts
|
|
16693
|
+
var DEFAULT_WORKERS = 3;
|
|
16694
|
+
function normalizeBoolean(value) {
|
|
16695
|
+
return value === true;
|
|
16696
|
+
}
|
|
16697
|
+
function normalizeString(value) {
|
|
16698
|
+
if (typeof value !== "string") {
|
|
16699
|
+
return void 0;
|
|
16700
|
+
}
|
|
16701
|
+
const trimmed = value.trim();
|
|
16702
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
16703
|
+
}
|
|
16704
|
+
function normalizeNumber(value, fallback) {
|
|
16705
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
16706
|
+
return value;
|
|
16707
|
+
}
|
|
16708
|
+
if (typeof value === "string") {
|
|
16709
|
+
const parsed = Number.parseInt(value, 10);
|
|
16710
|
+
if (!Number.isNaN(parsed)) {
|
|
16711
|
+
return parsed;
|
|
16712
|
+
}
|
|
16713
|
+
}
|
|
16714
|
+
return fallback;
|
|
16715
|
+
}
|
|
16716
|
+
function normalizeOptions(rawOptions) {
|
|
16717
|
+
const formatStr = normalizeString(rawOptions.outputFormat) ?? "jsonl";
|
|
16718
|
+
const format = formatStr === "yaml" ? "yaml" : "jsonl";
|
|
16719
|
+
const workers = normalizeNumber(rawOptions.workers, 0);
|
|
16720
|
+
return {
|
|
16721
|
+
target: normalizeString(rawOptions.target),
|
|
16722
|
+
targetsPath: normalizeString(rawOptions.targets),
|
|
16723
|
+
evalId: normalizeString(rawOptions.evalId),
|
|
16724
|
+
workers: workers > 0 ? workers : void 0,
|
|
16725
|
+
outPath: normalizeString(rawOptions.out),
|
|
16726
|
+
format,
|
|
16727
|
+
dryRun: normalizeBoolean(rawOptions.dryRun),
|
|
16728
|
+
dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
|
|
16729
|
+
dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
|
|
16730
|
+
dryRunDelayMax: normalizeNumber(rawOptions.dryRunDelayMax, 0),
|
|
16731
|
+
agentTimeoutSeconds: normalizeNumber(rawOptions.agentTimeout, 120),
|
|
16732
|
+
maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
|
|
16733
|
+
cache: normalizeBoolean(rawOptions.cache),
|
|
16734
|
+
verbose: normalizeBoolean(rawOptions.verbose),
|
|
16735
|
+
dumpPrompts: rawOptions.dumpPrompts
|
|
16736
|
+
};
|
|
16743
16737
|
}
|
|
16744
|
-
|
|
16745
|
-
async function validateTargetsFile(filePath) {
|
|
16746
|
-
const errors = [];
|
|
16747
|
-
const absolutePath = path23.resolve(filePath);
|
|
16748
|
-
let parsed;
|
|
16738
|
+
async function ensureFileExists(filePath, description) {
|
|
16749
16739
|
try {
|
|
16750
|
-
|
|
16751
|
-
|
|
16752
|
-
|
|
16753
|
-
errors.push({
|
|
16754
|
-
severity: "error",
|
|
16755
|
-
filePath: absolutePath,
|
|
16756
|
-
message: `Failed to parse YAML: ${error.message}`
|
|
16757
|
-
});
|
|
16758
|
-
return {
|
|
16759
|
-
valid: false,
|
|
16760
|
-
filePath: absolutePath,
|
|
16761
|
-
fileType: "targets",
|
|
16762
|
-
errors
|
|
16763
|
-
};
|
|
16740
|
+
await access6(filePath, constants6.F_OK);
|
|
16741
|
+
} catch {
|
|
16742
|
+
throw new Error(`${description} not found: ${filePath}`);
|
|
16764
16743
|
}
|
|
16765
|
-
|
|
16766
|
-
|
|
16767
|
-
|
|
16768
|
-
|
|
16769
|
-
|
|
16770
|
-
|
|
16771
|
-
|
|
16772
|
-
|
|
16773
|
-
return;
|
|
16774
|
-
}
|
|
16775
|
-
|
|
16776
|
-
|
|
16777
|
-
|
|
16778
|
-
severity: "error",
|
|
16779
|
-
filePath: absolutePath2,
|
|
16780
|
-
location: `${location}.commandTemplate`,
|
|
16781
|
-
message: "CLI provider requires 'commandTemplate' as a non-empty string"
|
|
16782
|
-
});
|
|
16783
|
-
} else {
|
|
16784
|
-
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16785
|
-
}
|
|
16786
|
-
const attachmentsFormat = settings["attachments_format"] ?? settings["attachmentsFormat"];
|
|
16787
|
-
if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
|
|
16788
|
-
errors2.push({
|
|
16789
|
-
severity: "error",
|
|
16790
|
-
filePath: absolutePath2,
|
|
16791
|
-
location: `${location}.attachmentsFormat`,
|
|
16792
|
-
message: "'attachmentsFormat' must be a string when provided"
|
|
16793
|
-
});
|
|
16794
|
-
}
|
|
16795
|
-
const filesFormat = settings["files_format"] ?? settings["filesFormat"];
|
|
16796
|
-
if (filesFormat !== void 0 && typeof filesFormat !== "string") {
|
|
16797
|
-
errors2.push({
|
|
16798
|
-
severity: "error",
|
|
16799
|
-
filePath: absolutePath2,
|
|
16800
|
-
location: `${location}.filesFormat`,
|
|
16801
|
-
message: "'filesFormat' must be a string when provided"
|
|
16802
|
-
});
|
|
16803
|
-
}
|
|
16804
|
-
const cwd = settings["cwd"];
|
|
16805
|
-
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16806
|
-
errors2.push({
|
|
16807
|
-
severity: "error",
|
|
16808
|
-
filePath: absolutePath2,
|
|
16809
|
-
location: `${location}.cwd`,
|
|
16810
|
-
message: "'cwd' must be a string when provided"
|
|
16811
|
-
});
|
|
16812
|
-
}
|
|
16813
|
-
const timeoutSeconds = settings["timeout_seconds"] ?? settings["timeoutSeconds"];
|
|
16814
|
-
if (timeoutSeconds !== void 0) {
|
|
16815
|
-
const numericTimeout = Number(timeoutSeconds);
|
|
16816
|
-
if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
|
|
16817
|
-
errors2.push({
|
|
16818
|
-
severity: "error",
|
|
16819
|
-
filePath: absolutePath2,
|
|
16820
|
-
location: `${location}.timeoutSeconds`,
|
|
16821
|
-
message: "'timeoutSeconds' must be a positive number when provided"
|
|
16822
|
-
});
|
|
16823
|
-
}
|
|
16824
|
-
}
|
|
16825
|
-
const envOverrides = settings["env"];
|
|
16826
|
-
if (envOverrides !== void 0) {
|
|
16827
|
-
if (!isObject2(envOverrides)) {
|
|
16828
|
-
errors2.push({
|
|
16829
|
-
severity: "error",
|
|
16830
|
-
filePath: absolutePath2,
|
|
16831
|
-
location: `${location}.env`,
|
|
16832
|
-
message: "'env' must be an object with string values"
|
|
16833
|
-
});
|
|
16834
|
-
} else {
|
|
16835
|
-
for (const [key2, value] of Object.entries(envOverrides)) {
|
|
16836
|
-
if (typeof value !== "string" || value.trim().length === 0) {
|
|
16837
|
-
errors2.push({
|
|
16838
|
-
severity: "error",
|
|
16839
|
-
filePath: absolutePath2,
|
|
16840
|
-
location: `${location}.env.${key2}`,
|
|
16841
|
-
message: `Environment override '${key2}' must be a non-empty string`
|
|
16842
|
-
});
|
|
16843
|
-
}
|
|
16844
|
-
}
|
|
16744
|
+
}
|
|
16745
|
+
async function findRepoRoot(start) {
|
|
16746
|
+
const fallback = path14.resolve(start);
|
|
16747
|
+
let current = fallback;
|
|
16748
|
+
while (current !== void 0) {
|
|
16749
|
+
const candidate = path14.join(current, ".git");
|
|
16750
|
+
try {
|
|
16751
|
+
await access6(candidate, constants6.F_OK);
|
|
16752
|
+
return current;
|
|
16753
|
+
} catch {
|
|
16754
|
+
const parent = path14.dirname(current);
|
|
16755
|
+
if (parent === current) {
|
|
16756
|
+
break;
|
|
16845
16757
|
}
|
|
16846
|
-
|
|
16847
|
-
const healthcheck = settings["healthcheck"];
|
|
16848
|
-
if (healthcheck !== void 0) {
|
|
16849
|
-
validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
|
|
16758
|
+
current = parent;
|
|
16850
16759
|
}
|
|
16851
16760
|
}
|
|
16852
|
-
|
|
16853
|
-
|
|
16854
|
-
|
|
16855
|
-
|
|
16856
|
-
|
|
16857
|
-
|
|
16858
|
-
|
|
16859
|
-
|
|
16860
|
-
|
|
16861
|
-
|
|
16862
|
-
|
|
16863
|
-
|
|
16864
|
-
|
|
16865
|
-
|
|
16866
|
-
|
|
16867
|
-
|
|
16868
|
-
|
|
16869
|
-
|
|
16870
|
-
|
|
16871
|
-
|
|
16872
|
-
|
|
16873
|
-
|
|
16874
|
-
|
|
16875
|
-
|
|
16876
|
-
|
|
16877
|
-
severity: "error",
|
|
16878
|
-
filePath: absolutePath2,
|
|
16879
|
-
location: `${location}.timeoutSeconds`,
|
|
16880
|
-
message: "healthcheck.timeoutSeconds must be a positive number when provided"
|
|
16881
|
-
});
|
|
16882
|
-
}
|
|
16883
|
-
}
|
|
16884
|
-
if (type === "http") {
|
|
16885
|
-
const url = healthcheck["url"];
|
|
16886
|
-
if (typeof url !== "string" || url.trim().length === 0) {
|
|
16887
|
-
errors2.push({
|
|
16888
|
-
severity: "error",
|
|
16889
|
-
filePath: absolutePath2,
|
|
16890
|
-
location: `${location}.url`,
|
|
16891
|
-
message: "healthcheck.url must be a non-empty string for http checks"
|
|
16892
|
-
});
|
|
16893
|
-
}
|
|
16894
|
-
return;
|
|
16761
|
+
return fallback;
|
|
16762
|
+
}
|
|
16763
|
+
function buildDefaultOutputPath(cwd, format) {
|
|
16764
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
16765
|
+
const baseName = "eval";
|
|
16766
|
+
const extension = getDefaultExtension(format);
|
|
16767
|
+
return path14.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
16768
|
+
}
|
|
16769
|
+
function resolvePromptDirectory(option, cwd) {
|
|
16770
|
+
if (option === void 0) {
|
|
16771
|
+
return void 0;
|
|
16772
|
+
}
|
|
16773
|
+
if (typeof option === "string" && option.trim().length > 0) {
|
|
16774
|
+
return path14.resolve(cwd, option);
|
|
16775
|
+
}
|
|
16776
|
+
return path14.join(cwd, ".agentv", "prompts");
|
|
16777
|
+
}
|
|
16778
|
+
function createEvaluationCache() {
|
|
16779
|
+
const store = /* @__PURE__ */ new Map();
|
|
16780
|
+
return {
|
|
16781
|
+
async get(key2) {
|
|
16782
|
+
return store.get(key2);
|
|
16783
|
+
},
|
|
16784
|
+
async set(key2, value) {
|
|
16785
|
+
store.set(key2, value);
|
|
16895
16786
|
}
|
|
16896
|
-
|
|
16897
|
-
|
|
16898
|
-
|
|
16899
|
-
|
|
16900
|
-
|
|
16901
|
-
|
|
16902
|
-
|
|
16903
|
-
|
|
16904
|
-
|
|
16905
|
-
|
|
16787
|
+
};
|
|
16788
|
+
}
|
|
16789
|
+
function createProgressReporter(maxWorkers) {
|
|
16790
|
+
const display = new ProgressDisplay(maxWorkers);
|
|
16791
|
+
return {
|
|
16792
|
+
isInteractive: display.isInteractiveMode(),
|
|
16793
|
+
start: () => display.start(),
|
|
16794
|
+
setTotal: (total) => display.setTotalTests(total),
|
|
16795
|
+
update: (workerId, progress) => display.updateWorker({ ...progress, workerId }),
|
|
16796
|
+
finish: () => display.finish(),
|
|
16797
|
+
addLogPaths: (paths) => display.addLogPaths(paths)
|
|
16798
|
+
};
|
|
16799
|
+
}
|
|
16800
|
+
function makeEvalKey(testFilePath, evalId) {
|
|
16801
|
+
return `${path14.resolve(testFilePath)}::${evalId}`;
|
|
16802
|
+
}
|
|
16803
|
+
function createDisplayIdTracker() {
|
|
16804
|
+
const map = /* @__PURE__ */ new Map();
|
|
16805
|
+
let nextId = 1;
|
|
16806
|
+
return {
|
|
16807
|
+
getOrAssign(evalKey) {
|
|
16808
|
+
const existing = map.get(evalKey);
|
|
16809
|
+
if (existing !== void 0) {
|
|
16810
|
+
return existing;
|
|
16811
|
+
}
|
|
16812
|
+
const assigned = nextId++;
|
|
16813
|
+
map.set(evalKey, assigned);
|
|
16814
|
+
return assigned;
|
|
16906
16815
|
}
|
|
16907
|
-
|
|
16908
|
-
|
|
16909
|
-
|
|
16910
|
-
|
|
16911
|
-
|
|
16912
|
-
|
|
16913
|
-
|
|
16914
|
-
|
|
16816
|
+
};
|
|
16817
|
+
}
|
|
16818
|
+
async function prepareFileMetadata(params) {
|
|
16819
|
+
const { testFilePath, repoRoot, cwd, options } = params;
|
|
16820
|
+
await ensureFileExists(testFilePath, "Test file");
|
|
16821
|
+
await loadEnvFromHierarchy({
|
|
16822
|
+
testFilePath,
|
|
16823
|
+
repoRoot,
|
|
16824
|
+
verbose: options.verbose
|
|
16825
|
+
});
|
|
16826
|
+
const selection = await selectTarget({
|
|
16827
|
+
testFilePath,
|
|
16828
|
+
repoRoot,
|
|
16829
|
+
cwd,
|
|
16830
|
+
explicitTargetsPath: options.targetsPath,
|
|
16831
|
+
cliTargetName: options.target,
|
|
16832
|
+
dryRun: options.dryRun,
|
|
16833
|
+
dryRunDelay: options.dryRunDelay,
|
|
16834
|
+
dryRunDelayMin: options.dryRunDelayMin,
|
|
16835
|
+
dryRunDelayMax: options.dryRunDelayMax,
|
|
16836
|
+
env: process.env
|
|
16837
|
+
});
|
|
16838
|
+
const providerLabel = options.dryRun ? `${selection.resolvedTarget.kind} (dry-run)` : selection.resolvedTarget.kind;
|
|
16839
|
+
const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
|
|
16840
|
+
const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose });
|
|
16841
|
+
const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
|
|
16842
|
+
return { evalIds: filteredIds, selection, inlineTargetLabel };
|
|
16843
|
+
}
|
|
16844
|
+
async function runWithLimit(items, limit, task) {
|
|
16845
|
+
const safeLimit = Math.max(1, limit);
|
|
16846
|
+
let index = 0;
|
|
16847
|
+
const workers = Array.from({ length: safeLimit }, async () => {
|
|
16848
|
+
while (index < items.length) {
|
|
16849
|
+
const current = items[index];
|
|
16850
|
+
index += 1;
|
|
16851
|
+
await task(current);
|
|
16915
16852
|
}
|
|
16853
|
+
});
|
|
16854
|
+
await Promise.all(workers);
|
|
16855
|
+
}
|
|
16856
|
+
async function runSingleEvalFile(params) {
|
|
16857
|
+
const {
|
|
16858
|
+
testFilePath,
|
|
16859
|
+
cwd,
|
|
16860
|
+
repoRoot,
|
|
16861
|
+
options,
|
|
16862
|
+
outputWriter,
|
|
16863
|
+
cache,
|
|
16864
|
+
evaluationRunner,
|
|
16865
|
+
workersOverride,
|
|
16866
|
+
progressReporter,
|
|
16867
|
+
seenEvalCases,
|
|
16868
|
+
displayIdTracker,
|
|
16869
|
+
selection,
|
|
16870
|
+
inlineTargetLabel
|
|
16871
|
+
} = params;
|
|
16872
|
+
await ensureFileExists(testFilePath, "Test file");
|
|
16873
|
+
const resolvedTargetSelection = selection;
|
|
16874
|
+
const providerLabel = options.dryRun ? `${resolvedTargetSelection.resolvedTarget.kind} (dry-run)` : resolvedTargetSelection.resolvedTarget.kind;
|
|
16875
|
+
const targetMessage = options.verbose ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} [provider=${providerLabel}] via ${resolvedTargetSelection.targetsFilePath}` : `Using target: ${inlineTargetLabel}`;
|
|
16876
|
+
if (!progressReporter.isInteractive || options.verbose) {
|
|
16877
|
+
console.log(targetMessage);
|
|
16916
16878
|
}
|
|
16917
|
-
|
|
16918
|
-
|
|
16919
|
-
|
|
16920
|
-
|
|
16921
|
-
|
|
16922
|
-
severity: "error",
|
|
16923
|
-
filePath: absolutePath2,
|
|
16924
|
-
location,
|
|
16925
|
-
message: `Unknown CLI placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS2).join(", ")}`
|
|
16926
|
-
});
|
|
16927
|
-
}
|
|
16879
|
+
const promptDumpDir = resolvePromptDirectory(options.dumpPrompts, cwd);
|
|
16880
|
+
if (promptDumpDir) {
|
|
16881
|
+
await mkdir6(promptDumpDir, { recursive: true });
|
|
16882
|
+
if (options.verbose) {
|
|
16883
|
+
console.log(`Prompt dumps enabled at: ${promptDumpDir}`);
|
|
16928
16884
|
}
|
|
16929
16885
|
}
|
|
16930
|
-
|
|
16931
|
-
|
|
16932
|
-
|
|
16933
|
-
|
|
16934
|
-
|
|
16935
|
-
if (placeholder) {
|
|
16936
|
-
result.push(placeholder);
|
|
16937
|
-
}
|
|
16938
|
-
}
|
|
16939
|
-
return result;
|
|
16886
|
+
const agentTimeoutMs = Math.max(0, options.agentTimeoutSeconds) * 1e3;
|
|
16887
|
+
const workerPreference = workersOverride ?? options.workers;
|
|
16888
|
+
let resolvedWorkers = workerPreference ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
|
|
16889
|
+
if (resolvedWorkers < 1 || resolvedWorkers > 50) {
|
|
16890
|
+
throw new Error(`Workers must be between 1 and 50, got: ${resolvedWorkers}`);
|
|
16940
16891
|
}
|
|
16941
|
-
|
|
16942
|
-
|
|
16943
|
-
|
|
16944
|
-
|
|
16945
|
-
|
|
16946
|
-
|
|
16947
|
-
return {
|
|
16948
|
-
valid: false,
|
|
16949
|
-
filePath: absolutePath,
|
|
16950
|
-
fileType: "targets",
|
|
16951
|
-
errors
|
|
16952
|
-
};
|
|
16892
|
+
const isVSCodeProvider = ["vscode", "vscode-insiders"].includes(
|
|
16893
|
+
resolvedTargetSelection.resolvedTarget.kind
|
|
16894
|
+
);
|
|
16895
|
+
if (isVSCodeProvider && resolvedWorkers > 1) {
|
|
16896
|
+
console.warn(`Warning: VSCode providers require window focus. Limiting workers from ${resolvedWorkers} to 1 to prevent race conditions.`);
|
|
16897
|
+
resolvedWorkers = 1;
|
|
16953
16898
|
}
|
|
16954
|
-
|
|
16955
|
-
|
|
16956
|
-
|
|
16957
|
-
errors.push({
|
|
16958
|
-
severity: "error",
|
|
16959
|
-
filePath: absolutePath,
|
|
16960
|
-
location: "$schema",
|
|
16961
|
-
message
|
|
16962
|
-
});
|
|
16899
|
+
if (options.verbose) {
|
|
16900
|
+
const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
|
|
16901
|
+
console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
|
|
16963
16902
|
}
|
|
16964
|
-
|
|
16965
|
-
|
|
16966
|
-
|
|
16967
|
-
|
|
16968
|
-
|
|
16969
|
-
location: "targets",
|
|
16970
|
-
message: "Missing or invalid 'targets' field (must be an array)"
|
|
16903
|
+
if (isVSCodeProvider && !options.dryRun) {
|
|
16904
|
+
await ensureVSCodeSubagents({
|
|
16905
|
+
kind: resolvedTargetSelection.resolvedTarget.kind,
|
|
16906
|
+
count: resolvedWorkers,
|
|
16907
|
+
verbose: options.verbose
|
|
16971
16908
|
});
|
|
16972
|
-
return {
|
|
16973
|
-
valid: errors.length === 0,
|
|
16974
|
-
filePath: absolutePath,
|
|
16975
|
-
fileType: "targets",
|
|
16976
|
-
errors
|
|
16977
|
-
};
|
|
16978
16909
|
}
|
|
16979
|
-
const
|
|
16980
|
-
|
|
16981
|
-
|
|
16982
|
-
|
|
16983
|
-
|
|
16984
|
-
|
|
16985
|
-
|
|
16986
|
-
|
|
16987
|
-
|
|
16988
|
-
|
|
16989
|
-
|
|
16990
|
-
|
|
16991
|
-
|
|
16992
|
-
|
|
16993
|
-
|
|
16994
|
-
|
|
16995
|
-
|
|
16996
|
-
|
|
16997
|
-
|
|
16998
|
-
|
|
16910
|
+
const results = await evaluationRunner({
|
|
16911
|
+
testFilePath,
|
|
16912
|
+
repoRoot,
|
|
16913
|
+
target: resolvedTargetSelection.resolvedTarget,
|
|
16914
|
+
targets: resolvedTargetSelection.definitions,
|
|
16915
|
+
env: process.env,
|
|
16916
|
+
maxRetries: Math.max(0, options.maxRetries),
|
|
16917
|
+
agentTimeoutMs,
|
|
16918
|
+
promptDumpDir,
|
|
16919
|
+
cache,
|
|
16920
|
+
useCache: options.cache,
|
|
16921
|
+
evalId: options.evalId,
|
|
16922
|
+
verbose: options.verbose,
|
|
16923
|
+
maxConcurrency: resolvedWorkers,
|
|
16924
|
+
onResult: async (result) => {
|
|
16925
|
+
await outputWriter.append(result);
|
|
16926
|
+
},
|
|
16927
|
+
onProgress: async (event) => {
|
|
16928
|
+
const evalKey = makeEvalKey(testFilePath, event.evalId);
|
|
16929
|
+
if (event.status === "pending" && !seenEvalCases.has(evalKey)) {
|
|
16930
|
+
seenEvalCases.add(evalKey);
|
|
16931
|
+
progressReporter.setTotal(seenEvalCases.size);
|
|
16932
|
+
}
|
|
16933
|
+
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
16934
|
+
progressReporter.update(displayId, {
|
|
16935
|
+
workerId: displayId,
|
|
16936
|
+
evalId: event.evalId,
|
|
16937
|
+
status: event.status,
|
|
16938
|
+
startedAt: event.startedAt,
|
|
16939
|
+
completedAt: event.completedAt,
|
|
16940
|
+
error: event.error,
|
|
16941
|
+
targetLabel: inlineTargetLabel
|
|
16999
16942
|
});
|
|
17000
16943
|
}
|
|
17001
|
-
|
|
17002
|
-
|
|
17003
|
-
|
|
17004
|
-
|
|
17005
|
-
|
|
17006
|
-
|
|
17007
|
-
|
|
17008
|
-
|
|
17009
|
-
|
|
17010
|
-
|
|
17011
|
-
|
|
17012
|
-
|
|
17013
|
-
|
|
17014
|
-
|
|
17015
|
-
|
|
17016
|
-
|
|
16944
|
+
});
|
|
16945
|
+
return { results: [...results], promptDumpDir };
|
|
16946
|
+
}
|
|
16947
|
+
async function runEvalCommand(input) {
|
|
16948
|
+
const options = normalizeOptions(input.rawOptions);
|
|
16949
|
+
const cwd = process.cwd();
|
|
16950
|
+
const repoRoot = await findRepoRoot(cwd);
|
|
16951
|
+
if (options.verbose) {
|
|
16952
|
+
console.log(`Repository root: ${repoRoot}`);
|
|
16953
|
+
}
|
|
16954
|
+
const outputPath = options.outPath ? path14.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
|
|
16955
|
+
console.log(`Output path: ${outputPath}`);
|
|
16956
|
+
const outputWriter = await createOutputWriter(outputPath, options.format);
|
|
16957
|
+
const cache = options.cache ? createEvaluationCache() : void 0;
|
|
16958
|
+
const evaluationRunner = await resolveEvaluationRunner();
|
|
16959
|
+
const allResults = [];
|
|
16960
|
+
let lastPromptDumpDir;
|
|
16961
|
+
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
16962
|
+
const resolvedTestFiles = input.testFiles.map((file) => path14.resolve(file));
|
|
16963
|
+
const displayIdTracker = createDisplayIdTracker();
|
|
16964
|
+
const totalWorkers = options.workers ?? DEFAULT_WORKERS;
|
|
16965
|
+
const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
|
|
16966
|
+
const perFileWorkers = options.workers ? Math.max(1, Math.floor(totalWorkers / fileConcurrency)) : void 0;
|
|
16967
|
+
const fileMetadata = /* @__PURE__ */ new Map();
|
|
16968
|
+
for (const testFilePath of resolvedTestFiles) {
|
|
16969
|
+
const meta = await prepareFileMetadata({
|
|
16970
|
+
testFilePath,
|
|
16971
|
+
repoRoot,
|
|
16972
|
+
cwd,
|
|
16973
|
+
options
|
|
16974
|
+
});
|
|
16975
|
+
fileMetadata.set(testFilePath, meta);
|
|
16976
|
+
}
|
|
16977
|
+
const totalEvalCount = Array.from(fileMetadata.values()).reduce(
|
|
16978
|
+
(sum, meta) => sum + meta.evalIds.length,
|
|
16979
|
+
0
|
|
16980
|
+
);
|
|
16981
|
+
if (totalEvalCount === 0) {
|
|
16982
|
+
throw new Error("No eval cases matched the provided filters.");
|
|
16983
|
+
}
|
|
16984
|
+
const progressReporter = createProgressReporter(totalWorkers);
|
|
16985
|
+
progressReporter.start();
|
|
16986
|
+
progressReporter.setTotal(totalEvalCount);
|
|
16987
|
+
const seenCodexLogPaths = /* @__PURE__ */ new Set();
|
|
16988
|
+
const unsubscribeCodexLogs = subscribeToCodexLogEntries((entry) => {
|
|
16989
|
+
if (!entry.filePath || seenCodexLogPaths.has(entry.filePath)) {
|
|
16990
|
+
return;
|
|
17017
16991
|
}
|
|
17018
|
-
|
|
17019
|
-
|
|
17020
|
-
|
|
17021
|
-
|
|
17022
|
-
|
|
17023
|
-
|
|
17024
|
-
|
|
16992
|
+
seenCodexLogPaths.add(entry.filePath);
|
|
16993
|
+
progressReporter.addLogPaths([entry.filePath]);
|
|
16994
|
+
});
|
|
16995
|
+
for (const [testFilePath, meta] of fileMetadata.entries()) {
|
|
16996
|
+
for (const evalId of meta.evalIds) {
|
|
16997
|
+
const evalKey = makeEvalKey(testFilePath, evalId);
|
|
16998
|
+
seenEvalCases.add(evalKey);
|
|
16999
|
+
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
17000
|
+
progressReporter.update(displayId, {
|
|
17001
|
+
workerId: displayId,
|
|
17002
|
+
evalId,
|
|
17003
|
+
status: "pending",
|
|
17004
|
+
targetLabel: meta.inlineTargetLabel
|
|
17025
17005
|
});
|
|
17026
17006
|
}
|
|
17027
|
-
|
|
17028
|
-
|
|
17029
|
-
|
|
17030
|
-
|
|
17031
|
-
|
|
17032
|
-
|
|
17033
|
-
|
|
17034
|
-
|
|
17035
|
-
|
|
17036
|
-
|
|
17007
|
+
}
|
|
17008
|
+
try {
|
|
17009
|
+
await runWithLimit(resolvedTestFiles, fileConcurrency, async (testFilePath) => {
|
|
17010
|
+
const targetPrep = fileMetadata.get(testFilePath);
|
|
17011
|
+
if (!targetPrep) {
|
|
17012
|
+
throw new Error(`Missing metadata for ${testFilePath}`);
|
|
17013
|
+
}
|
|
17014
|
+
const result = await runSingleEvalFile({
|
|
17015
|
+
testFilePath,
|
|
17016
|
+
cwd,
|
|
17017
|
+
repoRoot,
|
|
17018
|
+
options,
|
|
17019
|
+
outputWriter,
|
|
17020
|
+
cache,
|
|
17021
|
+
evaluationRunner,
|
|
17022
|
+
workersOverride: perFileWorkers,
|
|
17023
|
+
progressReporter,
|
|
17024
|
+
seenEvalCases,
|
|
17025
|
+
displayIdTracker,
|
|
17026
|
+
selection: targetPrep.selection,
|
|
17027
|
+
inlineTargetLabel: targetPrep.inlineTargetLabel
|
|
17037
17028
|
});
|
|
17029
|
+
allResults.push(...result.results);
|
|
17030
|
+
if (result.promptDumpDir) {
|
|
17031
|
+
lastPromptDumpDir = result.promptDumpDir;
|
|
17032
|
+
}
|
|
17033
|
+
});
|
|
17034
|
+
progressReporter.finish();
|
|
17035
|
+
const summary = calculateEvaluationSummary(allResults);
|
|
17036
|
+
console.log(formatEvaluationSummary(summary));
|
|
17037
|
+
if (allResults.length > 0) {
|
|
17038
|
+
console.log(`
|
|
17039
|
+
Results written to: ${outputPath}`);
|
|
17038
17040
|
}
|
|
17041
|
+
if (lastPromptDumpDir && allResults.length > 0) {
|
|
17042
|
+
console.log(`Prompt payloads saved to: ${lastPromptDumpDir}`);
|
|
17043
|
+
}
|
|
17044
|
+
} finally {
|
|
17045
|
+
unsubscribeCodexLogs();
|
|
17046
|
+
await outputWriter.close().catch(() => void 0);
|
|
17039
17047
|
}
|
|
17040
|
-
return {
|
|
17041
|
-
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
17042
|
-
filePath: absolutePath,
|
|
17043
|
-
fileType: "targets",
|
|
17044
|
-
errors
|
|
17045
|
-
};
|
|
17046
17048
|
}
|
|
17047
|
-
|
|
17048
|
-
|
|
17049
|
-
|
|
17050
|
-
|
|
17051
|
-
|
|
17052
|
-
|
|
17053
|
-
|
|
17054
|
-
|
|
17055
|
-
|
|
17056
|
-
|
|
17057
|
-
|
|
17058
|
-
}
|
|
17059
|
-
|
|
17049
|
+
async function resolveEvaluationRunner() {
|
|
17050
|
+
const overridePath = process.env.AGENTEVO_CLI_EVAL_RUNNER;
|
|
17051
|
+
if (!overridePath) {
|
|
17052
|
+
return runEvaluation;
|
|
17053
|
+
}
|
|
17054
|
+
const resolved = path14.isAbsolute(overridePath) ? overridePath : path14.resolve(process.cwd(), overridePath);
|
|
17055
|
+
const moduleUrl = pathToFileURL(resolved).href;
|
|
17056
|
+
const mod = await import(moduleUrl);
|
|
17057
|
+
const candidate = mod.runEvaluation;
|
|
17058
|
+
if (typeof candidate !== "function") {
|
|
17059
|
+
throw new Error(
|
|
17060
|
+
`Module '${resolved}' must export a 'runEvaluation' function to override the default implementation`
|
|
17061
|
+
);
|
|
17062
|
+
}
|
|
17063
|
+
return candidate;
|
|
17064
|
+
}
|
|
17065
|
+
|
|
17066
|
+
// src/commands/eval/index.ts
|
|
17067
|
+
function parseInteger(value, fallback) {
|
|
17068
|
+
const parsed = Number.parseInt(value, 10);
|
|
17069
|
+
if (Number.isNaN(parsed)) {
|
|
17070
|
+
return fallback;
|
|
17071
|
+
}
|
|
17072
|
+
return parsed;
|
|
17073
|
+
}
|
|
17074
|
+
function registerEvalCommand(program) {
|
|
17075
|
+
program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
|
|
17076
|
+
"--workers <count>",
|
|
17077
|
+
"Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
17078
|
+
(value) => parseInteger(value, 1)
|
|
17079
|
+
).option("--out <path>", "Write results to the specified path").option(
|
|
17080
|
+
"--output-format <format>",
|
|
17081
|
+
"Output format: 'jsonl' or 'yaml' (default: jsonl)",
|
|
17082
|
+
"jsonl"
|
|
17083
|
+
).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
|
|
17084
|
+
"--dry-run-delay <ms>",
|
|
17085
|
+
"Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
|
|
17086
|
+
(value) => parseInteger(value, 0),
|
|
17087
|
+
0
|
|
17088
|
+
).option(
|
|
17089
|
+
"--dry-run-delay-min <ms>",
|
|
17090
|
+
"Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
|
|
17091
|
+
(value) => parseInteger(value, 0),
|
|
17092
|
+
0
|
|
17093
|
+
).option(
|
|
17094
|
+
"--dry-run-delay-max <ms>",
|
|
17095
|
+
"Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
|
|
17096
|
+
(value) => parseInteger(value, 0),
|
|
17097
|
+
0
|
|
17098
|
+
).option(
|
|
17099
|
+
"--agent-timeout <seconds>",
|
|
17100
|
+
"Timeout in seconds for provider responses (default: 120)",
|
|
17101
|
+
(value) => parseInteger(value, 120),
|
|
17102
|
+
120
|
|
17103
|
+
).option(
|
|
17104
|
+
"--max-retries <count>",
|
|
17105
|
+
"Retry count for timeout recoveries (default: 2)",
|
|
17106
|
+
(value) => parseInteger(value, 2),
|
|
17107
|
+
2
|
|
17108
|
+
).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
|
|
17109
|
+
"--dump-prompts [dir]",
|
|
17110
|
+
"Persist prompt payloads for debugging (optional custom directory)"
|
|
17111
|
+
).action(async (evalPaths, rawOptions) => {
|
|
17112
|
+
const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
|
|
17113
|
+
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
17114
|
+
});
|
|
17115
|
+
return program;
|
|
17116
|
+
}
|
|
17117
|
+
async function resolveEvalPaths(evalPaths, cwd) {
|
|
17118
|
+
const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
|
|
17119
|
+
if (normalizedInputs.length === 0) {
|
|
17120
|
+
throw new Error("No eval paths provided.");
|
|
17121
|
+
}
|
|
17122
|
+
const unmatched = [];
|
|
17123
|
+
const results = /* @__PURE__ */ new Set();
|
|
17124
|
+
for (const pattern of normalizedInputs) {
|
|
17125
|
+
const candidatePath = path15.isAbsolute(pattern) ? path15.normalize(pattern) : path15.resolve(cwd, pattern);
|
|
17126
|
+
try {
|
|
17127
|
+
const stats = await stat3(candidatePath);
|
|
17128
|
+
if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
|
|
17129
|
+
results.add(candidatePath);
|
|
17130
|
+
continue;
|
|
17131
|
+
}
|
|
17132
|
+
} catch {
|
|
17060
17133
|
}
|
|
17061
|
-
const
|
|
17062
|
-
const
|
|
17063
|
-
|
|
17064
|
-
|
|
17065
|
-
|
|
17066
|
-
|
|
17067
|
-
|
|
17068
|
-
|
|
17069
|
-
|
|
17070
|
-
|
|
17134
|
+
const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
|
|
17135
|
+
const matches = await fg(globPattern, {
|
|
17136
|
+
cwd,
|
|
17137
|
+
absolute: true,
|
|
17138
|
+
onlyFiles: true,
|
|
17139
|
+
unique: true,
|
|
17140
|
+
dot: true,
|
|
17141
|
+
followSymbolicLinks: true
|
|
17142
|
+
});
|
|
17143
|
+
const yamlMatches = matches.filter((filePath) => /\.ya?ml$/i.test(filePath));
|
|
17144
|
+
if (yamlMatches.length === 0) {
|
|
17145
|
+
unmatched.push(pattern);
|
|
17146
|
+
continue;
|
|
17071
17147
|
}
|
|
17072
|
-
|
|
17073
|
-
|
|
17074
|
-
|
|
17075
|
-
|
|
17076
|
-
|
|
17077
|
-
|
|
17078
|
-
|
|
17079
|
-
|
|
17080
|
-
|
|
17081
|
-
|
|
17082
|
-
|
|
17083
|
-
|
|
17084
|
-
|
|
17085
|
-
|
|
17086
|
-
|
|
17087
|
-
|
|
17088
|
-
|
|
17089
|
-
|
|
17090
|
-
|
|
17091
|
-
|
|
17092
|
-
|
|
17093
|
-
|
|
17148
|
+
yamlMatches.forEach((filePath) => results.add(path15.normalize(filePath)));
|
|
17149
|
+
}
|
|
17150
|
+
if (unmatched.length > 0) {
|
|
17151
|
+
throw new Error(
|
|
17152
|
+
`No eval files matched: ${unmatched.join(
|
|
17153
|
+
", "
|
|
17154
|
+
)}. Provide YAML paths or globs (e.g., "evals/**/*.yaml").`
|
|
17155
|
+
);
|
|
17156
|
+
}
|
|
17157
|
+
const sorted = Array.from(results);
|
|
17158
|
+
sorted.sort();
|
|
17159
|
+
return sorted;
|
|
17160
|
+
}
|
|
17161
|
+
|
|
17162
|
+
// src/commands/init/index.ts
|
|
17163
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
17164
|
+
import path17 from "node:path";
|
|
17165
|
+
import * as readline from "node:readline/promises";
|
|
17166
|
+
|
|
17167
|
+
// src/templates/index.ts
|
|
17168
|
+
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
17169
|
+
import path16 from "node:path";
|
|
17170
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
17171
|
+
var TemplateManager = class {
|
|
17172
|
+
static getGithubTemplates() {
|
|
17173
|
+
return this.getTemplatesFromDir("github");
|
|
17174
|
+
}
|
|
17175
|
+
static getAgentvTemplates() {
|
|
17176
|
+
return this.getTemplatesFromDir("agentv");
|
|
17177
|
+
}
|
|
17178
|
+
static getTemplatesFromDir(subdir) {
|
|
17179
|
+
const currentDir = path16.dirname(fileURLToPath2(import.meta.url));
|
|
17180
|
+
let templatesDir;
|
|
17181
|
+
if (currentDir.includes(path16.sep + "dist")) {
|
|
17182
|
+
templatesDir = path16.join(currentDir, "templates", subdir);
|
|
17183
|
+
} else {
|
|
17184
|
+
templatesDir = path16.join(currentDir, subdir);
|
|
17185
|
+
}
|
|
17186
|
+
return this.readTemplatesRecursively(templatesDir, "");
|
|
17187
|
+
}
|
|
17188
|
+
static readTemplatesRecursively(dir, relativePath) {
|
|
17189
|
+
const templates = [];
|
|
17190
|
+
const entries = readdirSync(dir);
|
|
17191
|
+
for (const entry of entries) {
|
|
17192
|
+
const fullPath = path16.join(dir, entry);
|
|
17193
|
+
const stat5 = statSync(fullPath);
|
|
17194
|
+
const entryRelativePath = relativePath ? path16.join(relativePath, entry) : entry;
|
|
17195
|
+
if (stat5.isDirectory()) {
|
|
17196
|
+
templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
|
|
17197
|
+
} else {
|
|
17198
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
17199
|
+
templates.push({
|
|
17200
|
+
path: entryRelativePath.split(path16.sep).join("/"),
|
|
17201
|
+
// Normalize to forward slashes
|
|
17202
|
+
content
|
|
17094
17203
|
});
|
|
17095
17204
|
}
|
|
17096
17205
|
}
|
|
17097
|
-
|
|
17098
|
-
const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
|
|
17099
|
-
if (unexpectedFields.length > 0) {
|
|
17100
|
-
errors.push({
|
|
17101
|
-
severity: "warning",
|
|
17102
|
-
filePath,
|
|
17103
|
-
message: `Unexpected fields: ${unexpectedFields.join(", ")}`
|
|
17104
|
-
});
|
|
17105
|
-
}
|
|
17106
|
-
return {
|
|
17107
|
-
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
17108
|
-
filePath,
|
|
17109
|
-
fileType: "config",
|
|
17110
|
-
errors
|
|
17111
|
-
};
|
|
17112
|
-
} catch (error) {
|
|
17113
|
-
errors.push({
|
|
17114
|
-
severity: "error",
|
|
17115
|
-
filePath,
|
|
17116
|
-
message: `Failed to parse config file: ${error.message}`
|
|
17117
|
-
});
|
|
17118
|
-
return { valid: false, filePath, fileType: "config", errors };
|
|
17119
|
-
}
|
|
17120
|
-
}
|
|
17121
|
-
function isObject3(value) {
|
|
17122
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
17123
|
-
}
|
|
17124
|
-
async function validateFileReferences(evalFilePath) {
|
|
17125
|
-
const errors = [];
|
|
17126
|
-
const absolutePath = path33.resolve(evalFilePath);
|
|
17127
|
-
const gitRoot = await findGitRoot(absolutePath);
|
|
17128
|
-
if (!gitRoot) {
|
|
17129
|
-
errors.push({
|
|
17130
|
-
severity: "error",
|
|
17131
|
-
filePath: absolutePath,
|
|
17132
|
-
message: "Cannot validate file references: git repository root not found"
|
|
17133
|
-
});
|
|
17134
|
-
return errors;
|
|
17206
|
+
return templates;
|
|
17135
17207
|
}
|
|
17136
|
-
|
|
17137
|
-
|
|
17208
|
+
};
|
|
17209
|
+
|
|
17210
|
+
// src/commands/init/index.ts
|
|
17211
|
+
async function promptYesNo(message) {
|
|
17212
|
+
const rl2 = readline.createInterface({
|
|
17213
|
+
input: process.stdin,
|
|
17214
|
+
output: process.stdout
|
|
17215
|
+
});
|
|
17138
17216
|
try {
|
|
17139
|
-
const
|
|
17140
|
-
|
|
17141
|
-
}
|
|
17142
|
-
|
|
17217
|
+
const answer = await rl2.question(`${message} (y/N): `);
|
|
17218
|
+
return answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
|
|
17219
|
+
} finally {
|
|
17220
|
+
rl2.close();
|
|
17143
17221
|
}
|
|
17144
|
-
|
|
17145
|
-
|
|
17222
|
+
}
|
|
17223
|
+
async function initCommand(options = {}) {
|
|
17224
|
+
const targetPath = path17.resolve(options.targetPath ?? ".");
|
|
17225
|
+
const githubDir = path17.join(targetPath, ".github");
|
|
17226
|
+
const agentvDir = path17.join(targetPath, ".agentv");
|
|
17227
|
+
const githubTemplates = TemplateManager.getGithubTemplates();
|
|
17228
|
+
const agentvTemplates = TemplateManager.getAgentvTemplates();
|
|
17229
|
+
const existingFiles = [];
|
|
17230
|
+
if (existsSync(githubDir)) {
|
|
17231
|
+
for (const template of githubTemplates) {
|
|
17232
|
+
const targetFilePath = path17.join(githubDir, template.path);
|
|
17233
|
+
if (existsSync(targetFilePath)) {
|
|
17234
|
+
existingFiles.push(path17.relative(targetPath, targetFilePath));
|
|
17235
|
+
}
|
|
17236
|
+
}
|
|
17146
17237
|
}
|
|
17147
|
-
|
|
17148
|
-
|
|
17149
|
-
|
|
17238
|
+
if (existsSync(agentvDir)) {
|
|
17239
|
+
for (const template of agentvTemplates) {
|
|
17240
|
+
const targetFilePath = path17.join(agentvDir, template.path);
|
|
17241
|
+
if (existsSync(targetFilePath)) {
|
|
17242
|
+
existingFiles.push(path17.relative(targetPath, targetFilePath));
|
|
17243
|
+
}
|
|
17244
|
+
}
|
|
17150
17245
|
}
|
|
17151
|
-
|
|
17152
|
-
|
|
17153
|
-
|
|
17154
|
-
|
|
17246
|
+
if (existingFiles.length > 0) {
|
|
17247
|
+
console.log("We detected an existing setup:");
|
|
17248
|
+
existingFiles.forEach((file) => console.log(` - ${file}`));
|
|
17249
|
+
console.log();
|
|
17250
|
+
const shouldReplace = await promptYesNo("Do you want to replace these files?");
|
|
17251
|
+
if (!shouldReplace) {
|
|
17252
|
+
console.log("\nInit cancelled. No files were changed.");
|
|
17253
|
+
return;
|
|
17155
17254
|
}
|
|
17156
|
-
|
|
17157
|
-
|
|
17158
|
-
|
|
17255
|
+
console.log();
|
|
17256
|
+
}
|
|
17257
|
+
if (!existsSync(githubDir)) {
|
|
17258
|
+
mkdirSync(githubDir, { recursive: true });
|
|
17259
|
+
}
|
|
17260
|
+
if (!existsSync(agentvDir)) {
|
|
17261
|
+
mkdirSync(agentvDir, { recursive: true });
|
|
17262
|
+
}
|
|
17263
|
+
for (const template of githubTemplates) {
|
|
17264
|
+
const targetFilePath = path17.join(githubDir, template.path);
|
|
17265
|
+
const targetDirPath = path17.dirname(targetFilePath);
|
|
17266
|
+
if (!existsSync(targetDirPath)) {
|
|
17267
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
17159
17268
|
}
|
|
17160
|
-
|
|
17161
|
-
|
|
17162
|
-
|
|
17269
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
17270
|
+
console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
|
|
17271
|
+
}
|
|
17272
|
+
for (const template of agentvTemplates) {
|
|
17273
|
+
const targetFilePath = path17.join(agentvDir, template.path);
|
|
17274
|
+
const targetDirPath = path17.dirname(targetFilePath);
|
|
17275
|
+
if (!existsSync(targetDirPath)) {
|
|
17276
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
17163
17277
|
}
|
|
17278
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
17279
|
+
console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
|
|
17164
17280
|
}
|
|
17165
|
-
|
|
17281
|
+
console.log("\nAgentV initialized successfully!");
|
|
17282
|
+
console.log(`
|
|
17283
|
+
Files installed to ${path17.relative(targetPath, githubDir)}:`);
|
|
17284
|
+
githubTemplates.forEach((t) => console.log(` - ${t.path}`));
|
|
17285
|
+
console.log(`
|
|
17286
|
+
Files installed to ${path17.relative(targetPath, agentvDir)}:`);
|
|
17287
|
+
agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
|
|
17288
|
+
console.log("\nYou can now:");
|
|
17289
|
+
console.log(" 1. Edit .agentv/.env with your API credentials");
|
|
17290
|
+
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
17291
|
+
console.log(" 3. Create eval files using the schema and prompt templates");
|
|
17166
17292
|
}
|
|
17167
|
-
|
|
17168
|
-
|
|
17169
|
-
|
|
17170
|
-
|
|
17171
|
-
|
|
17293
|
+
|
|
17294
|
+
// src/commands/status.ts
|
|
17295
|
+
function registerStatusCommand(program) {
|
|
17296
|
+
program.command("status").description("Show the latest AgentV kernel status").action(() => {
|
|
17297
|
+
const kernel = createAgentKernel();
|
|
17298
|
+
console.log(`Kernel status: ${kernel.status}`);
|
|
17299
|
+
});
|
|
17300
|
+
return program;
|
|
17301
|
+
}
|
|
17302
|
+
|
|
17303
|
+
// src/commands/validate/format-output.ts
|
|
17304
|
+
var ANSI_RED2 = "\x1B[31m";
|
|
17305
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
17306
|
+
var ANSI_GREEN = "\x1B[32m";
|
|
17307
|
+
var ANSI_CYAN = "\x1B[36m";
|
|
17308
|
+
var ANSI_BOLD = "\x1B[1m";
|
|
17309
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
17310
|
+
function formatSummary(summary, useColors) {
|
|
17311
|
+
const lines = [];
|
|
17312
|
+
lines.push("");
|
|
17313
|
+
lines.push(formatHeader("Validation Summary", useColors));
|
|
17314
|
+
lines.push("");
|
|
17315
|
+
for (const result of summary.results) {
|
|
17316
|
+
lines.push(formatFileResult(result, useColors));
|
|
17317
|
+
}
|
|
17318
|
+
lines.push("");
|
|
17319
|
+
lines.push(formatStats(summary, useColors));
|
|
17320
|
+
lines.push("");
|
|
17321
|
+
return lines.join("\n");
|
|
17322
|
+
}
|
|
17323
|
+
function formatHeader(text, useColors) {
|
|
17324
|
+
if (useColors) {
|
|
17325
|
+
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET3}`;
|
|
17326
|
+
}
|
|
17327
|
+
return text;
|
|
17328
|
+
}
|
|
17329
|
+
function formatFileResult(result, useColors) {
|
|
17330
|
+
const lines = [];
|
|
17331
|
+
const status = result.valid ? "\u2713" : "\u2717";
|
|
17332
|
+
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
|
|
17333
|
+
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET3}` : status;
|
|
17334
|
+
const fileName = result.filePath;
|
|
17335
|
+
lines.push(`${statusText} ${fileName}`);
|
|
17336
|
+
if (result.errors.length > 0) {
|
|
17337
|
+
for (const error of result.errors) {
|
|
17338
|
+
lines.push(formatError(error, useColors));
|
|
17172
17339
|
}
|
|
17173
|
-
|
|
17174
|
-
|
|
17175
|
-
|
|
17340
|
+
}
|
|
17341
|
+
return lines.join("\n");
|
|
17342
|
+
}
|
|
17343
|
+
function formatError(error, useColors) {
|
|
17344
|
+
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
17345
|
+
const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW3;
|
|
17346
|
+
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET3}` : prefix;
|
|
17347
|
+
const location = error.location ? ` [${error.location}]` : "";
|
|
17348
|
+
return `${coloredPrefix}${location} ${error.message}`;
|
|
17349
|
+
}
|
|
17350
|
+
function formatStats(summary, useColors) {
|
|
17351
|
+
const lines = [];
|
|
17352
|
+
const totalText = `Total files: ${summary.totalFiles}`;
|
|
17353
|
+
const validText = `Valid: ${summary.validFiles}`;
|
|
17354
|
+
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
17355
|
+
const filesWithWarnings = summary.results.filter(
|
|
17356
|
+
(r) => r.errors.some((e) => e.severity === "warning")
|
|
17357
|
+
).length;
|
|
17358
|
+
if (useColors) {
|
|
17359
|
+
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET3}`);
|
|
17360
|
+
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET3}`);
|
|
17361
|
+
if (summary.invalidFiles > 0) {
|
|
17362
|
+
lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET3}`);
|
|
17363
|
+
} else {
|
|
17364
|
+
lines.push(invalidText);
|
|
17176
17365
|
}
|
|
17177
|
-
if (
|
|
17178
|
-
|
|
17366
|
+
if (filesWithWarnings > 0) {
|
|
17367
|
+
lines.push(`${ANSI_YELLOW3}Files with warnings: ${filesWithWarnings}${ANSI_RESET3}`);
|
|
17179
17368
|
}
|
|
17180
|
-
|
|
17181
|
-
|
|
17182
|
-
|
|
17183
|
-
|
|
17184
|
-
|
|
17185
|
-
|
|
17186
|
-
if (type !== "file") {
|
|
17187
|
-
continue;
|
|
17188
|
-
}
|
|
17189
|
-
const value = contentItem["value"];
|
|
17190
|
-
if (typeof value !== "string") {
|
|
17191
|
-
errors.push({
|
|
17192
|
-
severity: "error",
|
|
17193
|
-
filePath,
|
|
17194
|
-
location: `${location}[${i6}].content[${j2}].value`,
|
|
17195
|
-
message: "File reference must have a 'value' field with the file path"
|
|
17196
|
-
});
|
|
17197
|
-
continue;
|
|
17198
|
-
}
|
|
17199
|
-
const { resolvedPath } = await resolveFileReference(value, searchRoots);
|
|
17200
|
-
if (!resolvedPath) {
|
|
17201
|
-
errors.push({
|
|
17202
|
-
severity: "error",
|
|
17203
|
-
filePath,
|
|
17204
|
-
location: `${location}[${i6}].content[${j2}]`,
|
|
17205
|
-
message: `Referenced file not found: ${value}`
|
|
17206
|
-
});
|
|
17207
|
-
} else {
|
|
17208
|
-
try {
|
|
17209
|
-
const fileContent = await readFile52(resolvedPath, "utf8");
|
|
17210
|
-
if (fileContent.trim().length === 0) {
|
|
17211
|
-
errors.push({
|
|
17212
|
-
severity: "warning",
|
|
17213
|
-
filePath,
|
|
17214
|
-
location: `${location}[${i6}].content[${j2}]`,
|
|
17215
|
-
message: `Referenced file is empty: ${value}`
|
|
17216
|
-
});
|
|
17217
|
-
}
|
|
17218
|
-
} catch (error) {
|
|
17219
|
-
errors.push({
|
|
17220
|
-
severity: "error",
|
|
17221
|
-
filePath,
|
|
17222
|
-
location: `${location}[${i6}].content[${j2}]`,
|
|
17223
|
-
message: `Cannot read referenced file: ${value} (${error.message})`
|
|
17224
|
-
});
|
|
17225
|
-
}
|
|
17226
|
-
}
|
|
17369
|
+
} else {
|
|
17370
|
+
lines.push(totalText);
|
|
17371
|
+
lines.push(validText);
|
|
17372
|
+
lines.push(invalidText);
|
|
17373
|
+
if (filesWithWarnings > 0) {
|
|
17374
|
+
lines.push(`Files with warnings: ${filesWithWarnings}`);
|
|
17227
17375
|
}
|
|
17228
17376
|
}
|
|
17377
|
+
return lines.join("\n");
|
|
17378
|
+
}
|
|
17379
|
+
function isTTY2() {
|
|
17380
|
+
return process.stdout.isTTY ?? false;
|
|
17229
17381
|
}
|
|
17230
17382
|
|
|
17231
17383
|
// src/commands/validate/validate-files.ts
|
|
@@ -17340,7 +17492,7 @@ async function runValidateCommand(paths, _options) {
|
|
|
17340
17492
|
process.exit(1);
|
|
17341
17493
|
}
|
|
17342
17494
|
const summary = await validateFiles(paths);
|
|
17343
|
-
const useColors =
|
|
17495
|
+
const useColors = isTTY2();
|
|
17344
17496
|
console.log(formatSummary(summary, useColors));
|
|
17345
17497
|
if (summary.invalidFiles > 0) {
|
|
17346
17498
|
process.exit(1);
|
|
@@ -17386,4 +17538,4 @@ export {
|
|
|
17386
17538
|
createProgram,
|
|
17387
17539
|
runCli
|
|
17388
17540
|
};
|
|
17389
|
-
//# sourceMappingURL=chunk-
|
|
17541
|
+
//# sourceMappingURL=chunk-VHNACQDK.js.map
|