agentv 0.7.0 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-X6NSDSD2.js → chunk-GWH4WZTW.js} +1602 -1476
- package/dist/chunk-GWH4WZTW.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/package.json +2 -2
- package/dist/chunk-X6NSDSD2.js.map +0 -1
|
@@ -588,9 +588,9 @@ import { readFileSync as readFileSync2 } from "node:fs";
|
|
|
588
588
|
// src/commands/eval/index.ts
|
|
589
589
|
import fg from "fast-glob";
|
|
590
590
|
import { stat as stat3 } from "node:fs/promises";
|
|
591
|
-
import
|
|
591
|
+
import path15 from "node:path";
|
|
592
592
|
|
|
593
|
-
// ../../packages/core/dist/chunk-
|
|
593
|
+
// ../../packages/core/dist/chunk-UQLHF3T7.js
|
|
594
594
|
import { constants } from "node:fs";
|
|
595
595
|
import { access, readFile } from "node:fs/promises";
|
|
596
596
|
import path from "node:path";
|
|
@@ -697,6 +697,11 @@ async function resolveFileReference(rawValue, searchRoots) {
|
|
|
697
697
|
}
|
|
698
698
|
return { displayPath, attempted };
|
|
699
699
|
}
|
|
700
|
+
var AGENT_PROVIDER_KINDS = [
|
|
701
|
+
"codex",
|
|
702
|
+
"vscode",
|
|
703
|
+
"vscode-insiders"
|
|
704
|
+
];
|
|
700
705
|
var KNOWN_PROVIDERS = [
|
|
701
706
|
"azure",
|
|
702
707
|
"anthropic",
|
|
@@ -723,7 +728,10 @@ var PROVIDER_ALIASES = [
|
|
|
723
728
|
"vertex"
|
|
724
729
|
// legacy/future support
|
|
725
730
|
];
|
|
726
|
-
var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
|
|
731
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.1";
|
|
732
|
+
function isAgentProvider(provider) {
|
|
733
|
+
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
734
|
+
}
|
|
727
735
|
|
|
728
736
|
// ../../packages/core/dist/index.js
|
|
729
737
|
import micromatch from "micromatch";
|
|
@@ -5188,7 +5196,7 @@ import { promisify as promisify2 } from "node:util";
|
|
|
5188
5196
|
import { exec as execCallback, spawn as spawn2 } from "node:child_process";
|
|
5189
5197
|
import { randomUUID } from "node:crypto";
|
|
5190
5198
|
import { constants as constants22, createWriteStream } from "node:fs";
|
|
5191
|
-
import { access as access22,
|
|
5199
|
+
import { access as access22, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
|
|
5192
5200
|
import { tmpdir } from "node:os";
|
|
5193
5201
|
import path42 from "node:path";
|
|
5194
5202
|
import { promisify as promisify22 } from "node:util";
|
|
@@ -9236,16 +9244,15 @@ var coerce = {
|
|
|
9236
9244
|
var NEVER = INVALID;
|
|
9237
9245
|
|
|
9238
9246
|
// ../../packages/core/dist/index.js
|
|
9239
|
-
import { readFile as readFile22 } from "node:fs/promises";
|
|
9240
9247
|
import path52 from "node:path";
|
|
9241
9248
|
|
|
9242
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9249
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
9243
9250
|
import { exec, spawn } from "child_process";
|
|
9244
9251
|
import { copyFile, mkdir as mkdir2, readdir as readdir2, readFile as readFile2, stat as stat2, writeFile } from "fs/promises";
|
|
9245
9252
|
import path6 from "path";
|
|
9246
9253
|
import { promisify } from "util";
|
|
9247
9254
|
|
|
9248
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9255
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/constants.js
|
|
9249
9256
|
import os from "os";
|
|
9250
9257
|
import path2 from "path";
|
|
9251
9258
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
@@ -9256,7 +9263,7 @@ function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
|
9256
9263
|
}
|
|
9257
9264
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
9258
9265
|
|
|
9259
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9266
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/fs.js
|
|
9260
9267
|
import { constants as constants2 } from "fs";
|
|
9261
9268
|
import { access as access2, mkdir, readdir, rm, stat } from "fs/promises";
|
|
9262
9269
|
import path3 from "path";
|
|
@@ -9289,7 +9296,7 @@ async function removeIfExists(target) {
|
|
|
9289
9296
|
}
|
|
9290
9297
|
}
|
|
9291
9298
|
|
|
9292
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9299
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/path.js
|
|
9293
9300
|
import path4 from "path";
|
|
9294
9301
|
function pathToFileUri(filePath) {
|
|
9295
9302
|
const absolutePath = path4.isAbsolute(filePath) ? filePath : path4.resolve(filePath);
|
|
@@ -9300,14 +9307,14 @@ function pathToFileUri(filePath) {
|
|
|
9300
9307
|
return `file://${normalizedPath}`;
|
|
9301
9308
|
}
|
|
9302
9309
|
|
|
9303
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9310
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/time.js
|
|
9304
9311
|
function sleep(ms) {
|
|
9305
9312
|
return new Promise((resolve) => {
|
|
9306
9313
|
setTimeout(resolve, ms);
|
|
9307
9314
|
});
|
|
9308
9315
|
}
|
|
9309
9316
|
|
|
9310
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
9317
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/workspace.js
|
|
9311
9318
|
import path5 from "path";
|
|
9312
9319
|
|
|
9313
9320
|
// ../../node_modules/.pnpm/json5@2.2.3/node_modules/json5/dist/index.mjs
|
|
@@ -10395,7 +10402,7 @@ var JSON5 = {
|
|
|
10395
10402
|
var lib = JSON5;
|
|
10396
10403
|
var dist_default = lib;
|
|
10397
10404
|
|
|
10398
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10405
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/workspace.js
|
|
10399
10406
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
10400
10407
|
let workspace;
|
|
10401
10408
|
try {
|
|
@@ -10468,7 +10475,7 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10468
10475
|
return JSON.stringify(transformedWorkspace, null, 2);
|
|
10469
10476
|
}
|
|
10470
10477
|
|
|
10471
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
10478
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
10472
10479
|
var execAsync = promisify(exec);
|
|
10473
10480
|
function generateTimestamp() {
|
|
10474
10481
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[-:TZ.]/g, "").slice(0, 14);
|
|
@@ -10542,7 +10549,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
10542
10549
|
spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
|
|
10543
10550
|
await sleep(100);
|
|
10544
10551
|
const wakeupChatId = "wakeup";
|
|
10545
|
-
const chatArgs = ["-r", "chat", "-m", wakeupChatId,
|
|
10552
|
+
const chatArgs = ["-r", "chat", "-m", wakeupChatId, `create a file named .alive in the ${path6.basename(subagentDir)} folder`];
|
|
10546
10553
|
spawn(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
|
|
10547
10554
|
const start = Date.now();
|
|
10548
10555
|
while (!await pathExists(aliveFile)) {
|
|
@@ -11050,7 +11057,7 @@ async function dispatchBatchAgent(options) {
|
|
|
11050
11057
|
}
|
|
11051
11058
|
}
|
|
11052
11059
|
|
|
11053
|
-
// ../../node_modules/.pnpm/subagent@0.4.
|
|
11060
|
+
// ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/provision.js
|
|
11054
11061
|
import { writeFile as writeFile2 } from "fs/promises";
|
|
11055
11062
|
import path7 from "path";
|
|
11056
11063
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
@@ -11176,7 +11183,7 @@ async function provisionSubagents(options) {
|
|
|
11176
11183
|
|
|
11177
11184
|
// ../../packages/core/dist/index.js
|
|
11178
11185
|
import { constants as constants32 } from "node:fs";
|
|
11179
|
-
import { access as access32, readFile as
|
|
11186
|
+
import { access as access32, readFile as readFile22 } from "node:fs/promises";
|
|
11180
11187
|
import path62 from "node:path";
|
|
11181
11188
|
import { parse as parse22 } from "yaml";
|
|
11182
11189
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
@@ -11379,6 +11386,7 @@ async function processMessages(options) {
|
|
|
11379
11386
|
}
|
|
11380
11387
|
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
11381
11388
|
const verbose = options?.verbose ?? false;
|
|
11389
|
+
const evalIdFilter = options?.evalId;
|
|
11382
11390
|
const absoluteTestPath = path8.resolve(evalFilePath);
|
|
11383
11391
|
if (!await fileExists2(absoluteTestPath)) {
|
|
11384
11392
|
throw new Error(`Test file not found: ${evalFilePath}`);
|
|
@@ -11410,62 +11418,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11410
11418
|
const results = [];
|
|
11411
11419
|
for (const rawEvalcase of rawTestcases) {
|
|
11412
11420
|
if (!isJsonObject(rawEvalcase)) {
|
|
11413
|
-
logWarning("Skipping invalid
|
|
11421
|
+
logWarning("Skipping invalid eval case entry (expected object)");
|
|
11414
11422
|
continue;
|
|
11415
11423
|
}
|
|
11416
11424
|
const evalcase = rawEvalcase;
|
|
11417
11425
|
const id = asString(evalcase.id);
|
|
11426
|
+
if (evalIdFilter && id !== evalIdFilter) {
|
|
11427
|
+
continue;
|
|
11428
|
+
}
|
|
11418
11429
|
const conversationId = asString(evalcase.conversation_id);
|
|
11419
11430
|
const outcome = asString(evalcase.outcome);
|
|
11420
11431
|
const inputMessagesValue = evalcase.input_messages;
|
|
11421
11432
|
const expectedMessagesValue = evalcase.expected_messages;
|
|
11422
11433
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
11423
|
-
logWarning(`Skipping incomplete
|
|
11434
|
+
logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
|
|
11424
11435
|
continue;
|
|
11425
11436
|
}
|
|
11426
11437
|
if (!Array.isArray(expectedMessagesValue)) {
|
|
11427
|
-
logWarning(`
|
|
11438
|
+
logWarning(`Eval case '${id}' missing expected_messages array`);
|
|
11428
11439
|
continue;
|
|
11429
11440
|
}
|
|
11430
11441
|
const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
|
|
11431
11442
|
const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
|
|
11432
|
-
|
|
11433
|
-
|
|
11434
|
-
const systemMessages = inputMessages.filter((message) => message.role === "system");
|
|
11435
|
-
if (assistantMessages.length === 0) {
|
|
11436
|
-
logWarning(`No assistant message found for test case: ${id}`);
|
|
11443
|
+
if (expectedMessages.length === 0) {
|
|
11444
|
+
logWarning(`No expected message found for eval case: ${id}`);
|
|
11437
11445
|
continue;
|
|
11438
11446
|
}
|
|
11439
|
-
if (
|
|
11440
|
-
logWarning(`Multiple
|
|
11441
|
-
}
|
|
11442
|
-
if (systemMessages.length > 1) {
|
|
11443
|
-
logWarning(`Multiple system messages found for test case: ${id}, using first`);
|
|
11444
|
-
}
|
|
11445
|
-
let systemMessageContent;
|
|
11446
|
-
if (systemMessages.length > 0) {
|
|
11447
|
-
const content = systemMessages[0]?.content;
|
|
11448
|
-
if (typeof content === "string") {
|
|
11449
|
-
systemMessageContent = content;
|
|
11450
|
-
} else if (Array.isArray(content)) {
|
|
11451
|
-
const textParts = [];
|
|
11452
|
-
for (const segment of content) {
|
|
11453
|
-
if (isJsonObject(segment)) {
|
|
11454
|
-
const value = segment.value;
|
|
11455
|
-
if (typeof value === "string") {
|
|
11456
|
-
textParts.push(value);
|
|
11457
|
-
}
|
|
11458
|
-
}
|
|
11459
|
-
}
|
|
11460
|
-
if (textParts.length > 0) {
|
|
11461
|
-
systemMessageContent = textParts.join("\n\n");
|
|
11462
|
-
}
|
|
11463
|
-
}
|
|
11447
|
+
if (expectedMessages.length > 1) {
|
|
11448
|
+
logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
|
|
11464
11449
|
}
|
|
11465
11450
|
const guidelinePaths = [];
|
|
11466
11451
|
const inputTextParts = [];
|
|
11467
11452
|
const inputSegments = await processMessages({
|
|
11468
|
-
messages:
|
|
11453
|
+
messages: inputMessages,
|
|
11469
11454
|
searchRoots,
|
|
11470
11455
|
repoRootPath,
|
|
11471
11456
|
guidelinePatterns,
|
|
@@ -11475,7 +11460,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11475
11460
|
verbose
|
|
11476
11461
|
});
|
|
11477
11462
|
const outputSegments = await processMessages({
|
|
11478
|
-
messages:
|
|
11463
|
+
messages: expectedMessages,
|
|
11479
11464
|
searchRoots,
|
|
11480
11465
|
repoRootPath,
|
|
11481
11466
|
guidelinePatterns,
|
|
@@ -11483,10 +11468,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11483
11468
|
verbose
|
|
11484
11469
|
});
|
|
11485
11470
|
const codeSnippets = extractCodeBlocks(inputSegments);
|
|
11486
|
-
const
|
|
11487
|
-
const referenceAnswer = await resolveAssistantContent(
|
|
11471
|
+
const expectedContent = expectedMessages[0]?.content;
|
|
11472
|
+
const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
|
|
11488
11473
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
11489
|
-
const
|
|
11474
|
+
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
11490
11475
|
const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
|
|
11491
11476
|
const userFilePaths = [];
|
|
11492
11477
|
for (const segment of inputSegments) {
|
|
@@ -11505,19 +11490,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
11505
11490
|
question,
|
|
11506
11491
|
input_segments: inputSegments,
|
|
11507
11492
|
output_segments: outputSegments,
|
|
11508
|
-
system_message: systemMessageContent,
|
|
11509
11493
|
reference_answer: referenceAnswer,
|
|
11510
11494
|
guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
11511
11495
|
guideline_patterns: guidelinePatterns,
|
|
11512
11496
|
file_paths: allFilePaths,
|
|
11513
11497
|
code_snippets: codeSnippets,
|
|
11514
11498
|
expected_outcome: outcome,
|
|
11515
|
-
evaluator:
|
|
11499
|
+
evaluator: evalCaseEvaluatorKind,
|
|
11516
11500
|
evaluators
|
|
11517
11501
|
};
|
|
11518
11502
|
if (verbose) {
|
|
11519
11503
|
console.log(`
|
|
11520
|
-
[
|
|
11504
|
+
[Eval Case: ${id}]`);
|
|
11521
11505
|
if (testCase.guideline_paths.length > 0) {
|
|
11522
11506
|
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
11523
11507
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
@@ -11576,7 +11560,7 @@ ${body}`);
|
|
|
11576
11560
|
}
|
|
11577
11561
|
const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
11578
11562
|
const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
11579
|
-
return { question, guidelines
|
|
11563
|
+
return { question, guidelines };
|
|
11580
11564
|
}
|
|
11581
11565
|
async function fileExists2(absolutePath) {
|
|
11582
11566
|
try {
|
|
@@ -12304,7 +12288,6 @@ function pathToFileUri2(filePath) {
|
|
|
12304
12288
|
var execAsync22 = promisify22(execCallback);
|
|
12305
12289
|
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
12306
12290
|
var PROMPT_FILENAME = "prompt.md";
|
|
12307
|
-
var FILES_DIR = "files";
|
|
12308
12291
|
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
12309
12292
|
var CodexProvider = class {
|
|
12310
12293
|
id;
|
|
@@ -12327,21 +12310,10 @@ var CodexProvider = class {
|
|
|
12327
12310
|
}
|
|
12328
12311
|
await this.ensureEnvironmentReady();
|
|
12329
12312
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
12330
|
-
const originalGuidelines = new Set(
|
|
12331
|
-
collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path42.resolve(file))
|
|
12332
|
-
);
|
|
12333
12313
|
const workspaceRoot = await this.createWorkspace();
|
|
12334
12314
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
12335
12315
|
try {
|
|
12336
|
-
const
|
|
12337
|
-
inputFiles,
|
|
12338
|
-
workspaceRoot,
|
|
12339
|
-
originalGuidelines
|
|
12340
|
-
);
|
|
12341
|
-
const promptContent = buildPromptDocument(request, mirroredInputFiles, {
|
|
12342
|
-
guidelinePatterns: request.guideline_patterns,
|
|
12343
|
-
guidelineOverrides: guidelineMirrors
|
|
12344
|
-
});
|
|
12316
|
+
const promptContent = buildPromptDocument(request, inputFiles);
|
|
12345
12317
|
const promptFile = path42.join(workspaceRoot, PROMPT_FILENAME);
|
|
12346
12318
|
await writeFile3(promptFile, promptContent, "utf8");
|
|
12347
12319
|
const args = this.buildCodexArgs();
|
|
@@ -12370,7 +12342,7 @@ var CodexProvider = class {
|
|
|
12370
12342
|
executable: this.resolvedExecutable ?? this.config.executable,
|
|
12371
12343
|
promptFile,
|
|
12372
12344
|
workspace: workspaceRoot,
|
|
12373
|
-
inputFiles
|
|
12345
|
+
inputFiles,
|
|
12374
12346
|
logFile: logger?.filePath
|
|
12375
12347
|
}
|
|
12376
12348
|
};
|
|
@@ -12425,37 +12397,6 @@ var CodexProvider = class {
|
|
|
12425
12397
|
throw error;
|
|
12426
12398
|
}
|
|
12427
12399
|
}
|
|
12428
|
-
async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
|
|
12429
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
12430
|
-
return {
|
|
12431
|
-
mirroredInputFiles: void 0,
|
|
12432
|
-
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
12433
|
-
};
|
|
12434
|
-
}
|
|
12435
|
-
const filesRoot = path42.join(workspaceRoot, FILES_DIR);
|
|
12436
|
-
await mkdir3(filesRoot, { recursive: true });
|
|
12437
|
-
const mirrored = [];
|
|
12438
|
-
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
12439
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
12440
|
-
for (const inputFile of inputFiles) {
|
|
12441
|
-
const absoluteSource = path42.resolve(inputFile);
|
|
12442
|
-
const baseName = path42.basename(absoluteSource);
|
|
12443
|
-
const count = nameCounts.get(baseName) ?? 0;
|
|
12444
|
-
nameCounts.set(baseName, count + 1);
|
|
12445
|
-
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
12446
|
-
const destination = path42.join(filesRoot, finalName);
|
|
12447
|
-
await copyFile2(absoluteSource, destination);
|
|
12448
|
-
const resolvedDestination = path42.resolve(destination);
|
|
12449
|
-
mirrored.push(resolvedDestination);
|
|
12450
|
-
if (guidelineOriginals.has(absoluteSource)) {
|
|
12451
|
-
guidelineMirrors.add(resolvedDestination);
|
|
12452
|
-
}
|
|
12453
|
-
}
|
|
12454
|
-
return {
|
|
12455
|
-
mirroredInputFiles: mirrored,
|
|
12456
|
-
guidelineMirrors
|
|
12457
|
-
};
|
|
12458
|
-
}
|
|
12459
12400
|
async createWorkspace() {
|
|
12460
12401
|
return await mkdtemp(path42.join(tmpdir(), WORKSPACE_PREFIX));
|
|
12461
12402
|
}
|
|
@@ -13421,23 +13362,25 @@ function resolveOptionalString(source2, env, description, options) {
|
|
|
13421
13362
|
if (trimmed.length === 0) {
|
|
13422
13363
|
return void 0;
|
|
13423
13364
|
}
|
|
13424
|
-
const
|
|
13425
|
-
if (
|
|
13426
|
-
|
|
13427
|
-
|
|
13365
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
13366
|
+
if (envVarMatch) {
|
|
13367
|
+
const varName = envVarMatch[1];
|
|
13368
|
+
const envValue = env[varName];
|
|
13369
|
+
if (envValue !== void 0) {
|
|
13370
|
+
if (envValue.trim().length === 0) {
|
|
13371
|
+
throw new Error(`Environment variable '${varName}' for ${description} is empty`);
|
|
13372
|
+
}
|
|
13373
|
+
return envValue;
|
|
13428
13374
|
}
|
|
13429
|
-
|
|
13430
|
-
}
|
|
13431
|
-
const allowLiteral = options?.allowLiteral ?? false;
|
|
13432
|
-
const optionalEnv = options?.optionalEnv ?? false;
|
|
13433
|
-
const looksLikeEnv = isLikelyEnvReference(trimmed);
|
|
13434
|
-
if (looksLikeEnv) {
|
|
13375
|
+
const optionalEnv = options?.optionalEnv ?? false;
|
|
13435
13376
|
if (optionalEnv) {
|
|
13436
13377
|
return void 0;
|
|
13437
13378
|
}
|
|
13438
|
-
|
|
13439
|
-
|
|
13440
|
-
|
|
13379
|
+
throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
|
|
13380
|
+
}
|
|
13381
|
+
const allowLiteral = options?.allowLiteral ?? false;
|
|
13382
|
+
if (!allowLiteral) {
|
|
13383
|
+
throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
|
|
13441
13384
|
}
|
|
13442
13385
|
return trimmed;
|
|
13443
13386
|
}
|
|
@@ -13484,9 +13427,6 @@ function resolveOptionalBoolean(source2) {
|
|
|
13484
13427
|
}
|
|
13485
13428
|
throw new Error("expected boolean value");
|
|
13486
13429
|
}
|
|
13487
|
-
function isLikelyEnvReference(value) {
|
|
13488
|
-
return /^[A-Z0-9_]+$/.test(value);
|
|
13489
|
-
}
|
|
13490
13430
|
function resolveOptionalStringArray(source2, env, description) {
|
|
13491
13431
|
if (source2 === void 0 || source2 === null) {
|
|
13492
13432
|
return void 0;
|
|
@@ -13507,15 +13447,20 @@ function resolveOptionalStringArray(source2, env, description) {
|
|
|
13507
13447
|
if (trimmed.length === 0) {
|
|
13508
13448
|
throw new Error(`${description}[${i6}] cannot be empty`);
|
|
13509
13449
|
}
|
|
13510
|
-
const
|
|
13511
|
-
if (
|
|
13512
|
-
|
|
13513
|
-
|
|
13450
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
13451
|
+
if (envVarMatch) {
|
|
13452
|
+
const varName = envVarMatch[1];
|
|
13453
|
+
const envValue = env[varName];
|
|
13454
|
+
if (envValue !== void 0) {
|
|
13455
|
+
if (envValue.trim().length === 0) {
|
|
13456
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i6}] is empty`);
|
|
13457
|
+
}
|
|
13458
|
+
resolved.push(envValue);
|
|
13459
|
+
continue;
|
|
13514
13460
|
}
|
|
13515
|
-
|
|
13516
|
-
} else {
|
|
13517
|
-
resolved.push(trimmed);
|
|
13461
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i6}] is not set`);
|
|
13518
13462
|
}
|
|
13463
|
+
resolved.push(trimmed);
|
|
13519
13464
|
}
|
|
13520
13465
|
return resolved.length > 0 ? resolved : void 0;
|
|
13521
13466
|
}
|
|
@@ -13560,7 +13505,7 @@ var VSCodeProvider = class {
|
|
|
13560
13505
|
}
|
|
13561
13506
|
};
|
|
13562
13507
|
}
|
|
13563
|
-
const responseText = await
|
|
13508
|
+
const responseText = await readTextFile(session.responseFile);
|
|
13564
13509
|
return {
|
|
13565
13510
|
text: responseText,
|
|
13566
13511
|
raw: {
|
|
@@ -13614,7 +13559,7 @@ var VSCodeProvider = class {
|
|
|
13614
13559
|
}
|
|
13615
13560
|
const responses = [];
|
|
13616
13561
|
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
13617
|
-
const responseText = await
|
|
13562
|
+
const responseText = await readTextFile(responseFile);
|
|
13618
13563
|
responses.push({
|
|
13619
13564
|
text: responseText,
|
|
13620
13565
|
raw: {
|
|
@@ -13826,7 +13771,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
13826
13771
|
if (!await fileExists3(absolutePath)) {
|
|
13827
13772
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
13828
13773
|
}
|
|
13829
|
-
const raw = await
|
|
13774
|
+
const raw = await readFile22(absolutePath, "utf8");
|
|
13830
13775
|
const parsed = parse22(raw);
|
|
13831
13776
|
if (!isRecord(parsed)) {
|
|
13832
13777
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
@@ -14063,7 +14008,6 @@ var CodeEvaluator = class {
|
|
|
14063
14008
|
expected_outcome: context2.evalCase.expected_outcome,
|
|
14064
14009
|
reference_answer: context2.evalCase.reference_answer,
|
|
14065
14010
|
candidate_answer: context2.candidate,
|
|
14066
|
-
system_message: context2.promptInputs.systemMessage ?? "",
|
|
14067
14011
|
guideline_paths: context2.evalCase.guideline_paths,
|
|
14068
14012
|
input_files: context2.evalCase.file_paths,
|
|
14069
14013
|
input_segments: context2.evalCase.input_segments
|
|
@@ -14294,7 +14238,7 @@ function validateConcurrency(concurrency) {
|
|
|
14294
14238
|
}
|
|
14295
14239
|
async function runEvaluation(options) {
|
|
14296
14240
|
const {
|
|
14297
|
-
testFilePath,
|
|
14241
|
+
testFilePath: evalFilePath,
|
|
14298
14242
|
repoRoot,
|
|
14299
14243
|
target,
|
|
14300
14244
|
targets,
|
|
@@ -14313,11 +14257,11 @@ async function runEvaluation(options) {
|
|
|
14313
14257
|
onProgress
|
|
14314
14258
|
} = options;
|
|
14315
14259
|
const load = loadEvalCases;
|
|
14316
|
-
const evalCases = await load(
|
|
14260
|
+
const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
|
|
14317
14261
|
const filteredEvalCases = filterEvalCases(evalCases, evalId);
|
|
14318
14262
|
if (filteredEvalCases.length === 0) {
|
|
14319
14263
|
if (evalId) {
|
|
14320
|
-
throw new Error(`
|
|
14264
|
+
throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
|
|
14321
14265
|
}
|
|
14322
14266
|
return [];
|
|
14323
14267
|
}
|
|
@@ -14471,7 +14415,8 @@ async function runEvaluation(options) {
|
|
|
14471
14415
|
target.name,
|
|
14472
14416
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
14473
14417
|
outcome.reason,
|
|
14474
|
-
promptInputs
|
|
14418
|
+
promptInputs,
|
|
14419
|
+
primaryProvider
|
|
14475
14420
|
);
|
|
14476
14421
|
results.push(errorResult);
|
|
14477
14422
|
if (onResult) {
|
|
@@ -14555,7 +14500,7 @@ async function runBatchEvaluation(options) {
|
|
|
14555
14500
|
agentTimeoutMs
|
|
14556
14501
|
});
|
|
14557
14502
|
} catch (error) {
|
|
14558
|
-
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
14503
|
+
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
14559
14504
|
results.push(errorResult);
|
|
14560
14505
|
if (onResult) {
|
|
14561
14506
|
await onResult(errorResult);
|
|
@@ -14632,7 +14577,7 @@ async function runEvalCase(options) {
|
|
|
14632
14577
|
attempt += 1;
|
|
14633
14578
|
continue;
|
|
14634
14579
|
}
|
|
14635
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
14580
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
14636
14581
|
}
|
|
14637
14582
|
}
|
|
14638
14583
|
if (!providerResponse) {
|
|
@@ -14641,7 +14586,8 @@ async function runEvalCase(options) {
|
|
|
14641
14586
|
target.name,
|
|
14642
14587
|
nowFn(),
|
|
14643
14588
|
lastError ?? new Error("Provider did not return a response"),
|
|
14644
|
-
promptInputs
|
|
14589
|
+
promptInputs,
|
|
14590
|
+
provider
|
|
14645
14591
|
);
|
|
14646
14592
|
}
|
|
14647
14593
|
if (cacheKey && cache && !cachedResponse) {
|
|
@@ -14661,7 +14607,7 @@ async function runEvalCase(options) {
|
|
|
14661
14607
|
agentTimeoutMs
|
|
14662
14608
|
});
|
|
14663
14609
|
} catch (error) {
|
|
14664
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
14610
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
14665
14611
|
}
|
|
14666
14612
|
}
|
|
14667
14613
|
async function evaluateCandidate(options) {
|
|
@@ -14693,9 +14639,8 @@ async function evaluateCandidate(options) {
|
|
|
14693
14639
|
const completedAt = nowFn();
|
|
14694
14640
|
const rawRequest = {
|
|
14695
14641
|
question: promptInputs.question,
|
|
14696
|
-
guidelines: promptInputs.guidelines,
|
|
14697
|
-
guideline_paths: evalCase.guideline_paths
|
|
14698
|
-
system_message: promptInputs.systemMessage ?? ""
|
|
14642
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
14643
|
+
guideline_paths: evalCase.guideline_paths
|
|
14699
14644
|
};
|
|
14700
14645
|
return {
|
|
14701
14646
|
eval_id: evalCase.id,
|
|
@@ -14953,13 +14898,12 @@ async function invokeProvider(provider, options) {
|
|
|
14953
14898
|
}
|
|
14954
14899
|
}
|
|
14955
14900
|
}
|
|
14956
|
-
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
14901
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
|
|
14957
14902
|
const message = error instanceof Error ? error.message : String(error);
|
|
14958
14903
|
const rawRequest = {
|
|
14959
14904
|
question: promptInputs.question,
|
|
14960
|
-
guidelines: promptInputs.guidelines,
|
|
14905
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
14961
14906
|
guideline_paths: evalCase.guideline_paths,
|
|
14962
|
-
system_message: promptInputs.systemMessage ?? "",
|
|
14963
14907
|
error: message
|
|
14964
14908
|
};
|
|
14965
14909
|
return {
|
|
@@ -15009,7 +14953,7 @@ function createAgentKernel() {
|
|
|
15009
14953
|
// src/commands/eval/run-eval.ts
|
|
15010
14954
|
import { constants as constants6 } from "node:fs";
|
|
15011
14955
|
import { access as access6, mkdir as mkdir6 } from "node:fs/promises";
|
|
15012
|
-
import
|
|
14956
|
+
import path14 from "node:path";
|
|
15013
14957
|
import { pathToFileURL } from "node:url";
|
|
15014
14958
|
|
|
15015
14959
|
// src/commands/eval/env.ts
|
|
@@ -15694,883 +15638,580 @@ function formatEvaluationSummary(summary) {
|
|
|
15694
15638
|
return lines.join("\n");
|
|
15695
15639
|
}
|
|
15696
15640
|
|
|
15697
|
-
//
|
|
15698
|
-
import {
|
|
15699
|
-
import { access as access5, readFile as readFile4 } from "node:fs/promises";
|
|
15700
|
-
import path12 from "node:path";
|
|
15641
|
+
// ../../packages/core/dist/evaluation/validation/index.js
|
|
15642
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
15701
15643
|
import { parse as parse4 } from "yaml";
|
|
15702
|
-
|
|
15703
|
-
|
|
15704
|
-
|
|
15705
|
-
|
|
15706
|
-
|
|
15707
|
-
|
|
15708
|
-
|
|
15709
|
-
|
|
15710
|
-
|
|
15711
|
-
|
|
15712
|
-
|
|
15713
|
-
|
|
15714
|
-
|
|
15715
|
-
|
|
15716
|
-
async function
|
|
15644
|
+
import { readFile as readFile23 } from "node:fs/promises";
|
|
15645
|
+
import path12 from "node:path";
|
|
15646
|
+
import { parse as parse23 } from "yaml";
|
|
15647
|
+
import { readFile as readFile32 } from "node:fs/promises";
|
|
15648
|
+
import path23 from "node:path";
|
|
15649
|
+
import { parse as parse32 } from "yaml";
|
|
15650
|
+
import { readFile as readFile42 } from "node:fs/promises";
|
|
15651
|
+
import { parse as parse42 } from "yaml";
|
|
15652
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
15653
|
+
import path33 from "node:path";
|
|
15654
|
+
import { parse as parse5 } from "yaml";
|
|
15655
|
+
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
15656
|
+
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.1";
|
|
15657
|
+
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
15658
|
+
async function detectFileType(filePath) {
|
|
15717
15659
|
try {
|
|
15718
|
-
const
|
|
15719
|
-
const parsed = parse4(
|
|
15720
|
-
if (
|
|
15721
|
-
|
|
15722
|
-
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
15723
|
-
return targetValue.trim();
|
|
15724
|
-
}
|
|
15725
|
-
}
|
|
15726
|
-
} catch {
|
|
15727
|
-
}
|
|
15728
|
-
return void 0;
|
|
15729
|
-
}
|
|
15730
|
-
async function discoverTargetsFile(options) {
|
|
15731
|
-
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
15732
|
-
if (explicitPath) {
|
|
15733
|
-
const resolvedExplicit = path12.resolve(explicitPath);
|
|
15734
|
-
if (await fileExists5(resolvedExplicit)) {
|
|
15735
|
-
return resolvedExplicit;
|
|
15660
|
+
const content = await readFile4(filePath, "utf8");
|
|
15661
|
+
const parsed = parse4(content);
|
|
15662
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
15663
|
+
return "unknown";
|
|
15736
15664
|
}
|
|
15737
|
-
|
|
15738
|
-
|
|
15739
|
-
|
|
15740
|
-
|
|
15741
|
-
}
|
|
15665
|
+
const record = parsed;
|
|
15666
|
+
const schema = record["$schema"];
|
|
15667
|
+
if (typeof schema !== "string") {
|
|
15668
|
+
return "unknown";
|
|
15742
15669
|
}
|
|
15743
|
-
|
|
15744
|
-
|
|
15745
|
-
|
|
15746
|
-
|
|
15747
|
-
|
|
15748
|
-
|
|
15749
|
-
|
|
15750
|
-
|
|
15751
|
-
|
|
15752
|
-
const fullPath = path12.join(directory, candidate);
|
|
15753
|
-
if (await fileExists5(fullPath)) {
|
|
15754
|
-
return fullPath;
|
|
15755
|
-
}
|
|
15670
|
+
switch (schema) {
|
|
15671
|
+
case SCHEMA_EVAL_V22:
|
|
15672
|
+
return "eval";
|
|
15673
|
+
case SCHEMA_TARGETS_V2:
|
|
15674
|
+
return "targets";
|
|
15675
|
+
case SCHEMA_CONFIG_V22:
|
|
15676
|
+
return "config";
|
|
15677
|
+
default:
|
|
15678
|
+
return "unknown";
|
|
15756
15679
|
}
|
|
15680
|
+
} catch {
|
|
15681
|
+
return "unknown";
|
|
15757
15682
|
}
|
|
15758
|
-
throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
|
|
15759
15683
|
}
|
|
15760
|
-
|
|
15761
|
-
|
|
15762
|
-
|
|
15763
|
-
return { name: cliName, source: "cli" };
|
|
15764
|
-
}
|
|
15765
|
-
const fileName = options.fileTargetName?.trim();
|
|
15766
|
-
if (fileName && fileName.length > 0) {
|
|
15767
|
-
return { name: fileName, source: "test-file" };
|
|
15768
|
-
}
|
|
15769
|
-
return { name: "default", source: "default" };
|
|
15684
|
+
var SCHEMA_EVAL_V222 = "agentv-eval-v2";
|
|
15685
|
+
function isObject(value) {
|
|
15686
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
15770
15687
|
}
|
|
15771
|
-
async function
|
|
15772
|
-
const
|
|
15773
|
-
const
|
|
15774
|
-
|
|
15775
|
-
|
|
15776
|
-
|
|
15777
|
-
|
|
15778
|
-
})
|
|
15779
|
-
|
|
15780
|
-
|
|
15781
|
-
|
|
15782
|
-
|
|
15783
|
-
|
|
15784
|
-
const available = listTargetNames(definitions).join(", ");
|
|
15785
|
-
throw new Error(
|
|
15786
|
-
`Target '${targetChoice.name}' not found in ${targetsFilePath}. Available targets: ${available}`
|
|
15787
|
-
);
|
|
15788
|
-
}
|
|
15789
|
-
if (dryRun) {
|
|
15790
|
-
const mockTarget = {
|
|
15791
|
-
kind: "mock",
|
|
15792
|
-
name: `${targetDefinition.name}-dry-run`,
|
|
15793
|
-
judgeTarget: void 0,
|
|
15794
|
-
config: {
|
|
15795
|
-
response: '{"answer":"Mock dry-run response"}',
|
|
15796
|
-
delayMs: dryRunDelay,
|
|
15797
|
-
delayMinMs: dryRunDelayMin,
|
|
15798
|
-
delayMaxMs: dryRunDelayMax
|
|
15799
|
-
}
|
|
15800
|
-
};
|
|
15688
|
+
async function validateEvalFile(filePath) {
|
|
15689
|
+
const errors = [];
|
|
15690
|
+
const absolutePath = path12.resolve(filePath);
|
|
15691
|
+
let parsed;
|
|
15692
|
+
try {
|
|
15693
|
+
const content = await readFile23(absolutePath, "utf8");
|
|
15694
|
+
parsed = parse23(content);
|
|
15695
|
+
} catch (error) {
|
|
15696
|
+
errors.push({
|
|
15697
|
+
severity: "error",
|
|
15698
|
+
filePath: absolutePath,
|
|
15699
|
+
message: `Failed to parse YAML: ${error.message}`
|
|
15700
|
+
});
|
|
15801
15701
|
return {
|
|
15802
|
-
|
|
15803
|
-
|
|
15804
|
-
|
|
15805
|
-
|
|
15806
|
-
targetsFilePath
|
|
15702
|
+
valid: false,
|
|
15703
|
+
filePath: absolutePath,
|
|
15704
|
+
fileType: "eval",
|
|
15705
|
+
errors
|
|
15807
15706
|
};
|
|
15808
15707
|
}
|
|
15809
|
-
|
|
15810
|
-
|
|
15708
|
+
if (!isObject(parsed)) {
|
|
15709
|
+
errors.push({
|
|
15710
|
+
severity: "error",
|
|
15711
|
+
filePath: absolutePath,
|
|
15712
|
+
message: "File must contain a YAML object"
|
|
15713
|
+
});
|
|
15811
15714
|
return {
|
|
15812
|
-
|
|
15813
|
-
|
|
15814
|
-
|
|
15815
|
-
|
|
15816
|
-
targetsFilePath
|
|
15715
|
+
valid: false,
|
|
15716
|
+
filePath: absolutePath,
|
|
15717
|
+
fileType: "eval",
|
|
15718
|
+
errors
|
|
15817
15719
|
};
|
|
15818
|
-
} catch (error) {
|
|
15819
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
15820
|
-
throw new Error(`Failed to resolve target '${targetChoice.name}': ${message}`);
|
|
15821
|
-
}
|
|
15822
|
-
}
|
|
15823
|
-
|
|
15824
|
-
// src/commands/eval/run-eval.ts
|
|
15825
|
-
var DEFAULT_WORKERS = 3;
|
|
15826
|
-
function normalizeBoolean(value) {
|
|
15827
|
-
return value === true;
|
|
15828
|
-
}
|
|
15829
|
-
function normalizeString(value) {
|
|
15830
|
-
if (typeof value !== "string") {
|
|
15831
|
-
return void 0;
|
|
15832
15720
|
}
|
|
15833
|
-
const
|
|
15834
|
-
|
|
15835
|
-
}
|
|
15836
|
-
|
|
15837
|
-
|
|
15838
|
-
|
|
15721
|
+
const schema = parsed["$schema"];
|
|
15722
|
+
if (schema !== SCHEMA_EVAL_V222) {
|
|
15723
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_EVAL_V222}'` : `Missing required field '$schema'. Expected '${SCHEMA_EVAL_V222}'`;
|
|
15724
|
+
errors.push({
|
|
15725
|
+
severity: "error",
|
|
15726
|
+
filePath: absolutePath,
|
|
15727
|
+
location: "$schema",
|
|
15728
|
+
message
|
|
15729
|
+
});
|
|
15839
15730
|
}
|
|
15840
|
-
|
|
15841
|
-
|
|
15842
|
-
|
|
15843
|
-
|
|
15844
|
-
|
|
15731
|
+
const evalcases = parsed["evalcases"];
|
|
15732
|
+
if (!Array.isArray(evalcases)) {
|
|
15733
|
+
errors.push({
|
|
15734
|
+
severity: "error",
|
|
15735
|
+
filePath: absolutePath,
|
|
15736
|
+
location: "evalcases",
|
|
15737
|
+
message: "Missing or invalid 'evalcases' field (must be an array)"
|
|
15738
|
+
});
|
|
15739
|
+
return {
|
|
15740
|
+
valid: errors.length === 0,
|
|
15741
|
+
filePath: absolutePath,
|
|
15742
|
+
fileType: "eval",
|
|
15743
|
+
errors
|
|
15744
|
+
};
|
|
15745
|
+
}
|
|
15746
|
+
for (let i6 = 0; i6 < evalcases.length; i6++) {
|
|
15747
|
+
const evalCase = evalcases[i6];
|
|
15748
|
+
const location = `evalcases[${i6}]`;
|
|
15749
|
+
if (!isObject(evalCase)) {
|
|
15750
|
+
errors.push({
|
|
15751
|
+
severity: "error",
|
|
15752
|
+
filePath: absolutePath,
|
|
15753
|
+
location,
|
|
15754
|
+
message: "Eval case must be an object"
|
|
15755
|
+
});
|
|
15756
|
+
continue;
|
|
15757
|
+
}
|
|
15758
|
+
const id = evalCase["id"];
|
|
15759
|
+
if (typeof id !== "string" || id.trim().length === 0) {
|
|
15760
|
+
errors.push({
|
|
15761
|
+
severity: "error",
|
|
15762
|
+
filePath: absolutePath,
|
|
15763
|
+
location: `${location}.id`,
|
|
15764
|
+
message: "Missing or invalid 'id' field (must be a non-empty string)"
|
|
15765
|
+
});
|
|
15766
|
+
}
|
|
15767
|
+
const outcome = evalCase["outcome"];
|
|
15768
|
+
if (typeof outcome !== "string" || outcome.trim().length === 0) {
|
|
15769
|
+
errors.push({
|
|
15770
|
+
severity: "error",
|
|
15771
|
+
filePath: absolutePath,
|
|
15772
|
+
location: `${location}.outcome`,
|
|
15773
|
+
message: "Missing or invalid 'outcome' field (must be a non-empty string)"
|
|
15774
|
+
});
|
|
15775
|
+
}
|
|
15776
|
+
const inputMessages = evalCase["input_messages"];
|
|
15777
|
+
if (!Array.isArray(inputMessages)) {
|
|
15778
|
+
errors.push({
|
|
15779
|
+
severity: "error",
|
|
15780
|
+
filePath: absolutePath,
|
|
15781
|
+
location: `${location}.input_messages`,
|
|
15782
|
+
message: "Missing or invalid 'input_messages' field (must be an array)"
|
|
15783
|
+
});
|
|
15784
|
+
} else {
|
|
15785
|
+
validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
|
|
15786
|
+
}
|
|
15787
|
+
const expectedMessages = evalCase["expected_messages"];
|
|
15788
|
+
if (!Array.isArray(expectedMessages)) {
|
|
15789
|
+
errors.push({
|
|
15790
|
+
severity: "error",
|
|
15791
|
+
filePath: absolutePath,
|
|
15792
|
+
location: `${location}.expected_messages`,
|
|
15793
|
+
message: "Missing or invalid 'expected_messages' field (must be an array)"
|
|
15794
|
+
});
|
|
15795
|
+
} else {
|
|
15796
|
+
validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
|
|
15797
|
+
}
|
|
15845
15798
|
}
|
|
15846
|
-
return fallback;
|
|
15847
|
-
}
|
|
15848
|
-
function normalizeOptions(rawOptions) {
|
|
15849
|
-
const formatStr = normalizeString(rawOptions.outputFormat) ?? "jsonl";
|
|
15850
|
-
const format = formatStr === "yaml" ? "yaml" : "jsonl";
|
|
15851
|
-
const workers = normalizeNumber(rawOptions.workers, 0);
|
|
15852
15799
|
return {
|
|
15853
|
-
|
|
15854
|
-
|
|
15855
|
-
|
|
15856
|
-
|
|
15857
|
-
outPath: normalizeString(rawOptions.out),
|
|
15858
|
-
format,
|
|
15859
|
-
dryRun: normalizeBoolean(rawOptions.dryRun),
|
|
15860
|
-
dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
|
|
15861
|
-
dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
|
|
15862
|
-
dryRunDelayMax: normalizeNumber(rawOptions.dryRunDelayMax, 0),
|
|
15863
|
-
agentTimeoutSeconds: normalizeNumber(rawOptions.agentTimeout, 120),
|
|
15864
|
-
maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
|
|
15865
|
-
cache: normalizeBoolean(rawOptions.cache),
|
|
15866
|
-
verbose: normalizeBoolean(rawOptions.verbose),
|
|
15867
|
-
dumpPrompts: rawOptions.dumpPrompts
|
|
15800
|
+
valid: errors.length === 0,
|
|
15801
|
+
filePath: absolutePath,
|
|
15802
|
+
fileType: "eval",
|
|
15803
|
+
errors
|
|
15868
15804
|
};
|
|
15869
15805
|
}
|
|
15870
|
-
|
|
15871
|
-
|
|
15872
|
-
|
|
15873
|
-
|
|
15874
|
-
|
|
15875
|
-
|
|
15876
|
-
|
|
15877
|
-
|
|
15878
|
-
|
|
15879
|
-
|
|
15880
|
-
|
|
15881
|
-
|
|
15882
|
-
|
|
15883
|
-
|
|
15884
|
-
|
|
15885
|
-
|
|
15886
|
-
|
|
15887
|
-
|
|
15888
|
-
|
|
15806
|
+
function validateMessages(messages, location, filePath, errors) {
|
|
15807
|
+
for (let i6 = 0; i6 < messages.length; i6++) {
|
|
15808
|
+
const message = messages[i6];
|
|
15809
|
+
const msgLocation = `${location}[${i6}]`;
|
|
15810
|
+
if (!isObject(message)) {
|
|
15811
|
+
errors.push({
|
|
15812
|
+
severity: "error",
|
|
15813
|
+
filePath,
|
|
15814
|
+
location: msgLocation,
|
|
15815
|
+
message: "Message must be an object"
|
|
15816
|
+
});
|
|
15817
|
+
continue;
|
|
15818
|
+
}
|
|
15819
|
+
const role = message["role"];
|
|
15820
|
+
const validRoles = ["system", "user", "assistant"];
|
|
15821
|
+
if (!validRoles.includes(role)) {
|
|
15822
|
+
errors.push({
|
|
15823
|
+
severity: "error",
|
|
15824
|
+
filePath,
|
|
15825
|
+
location: `${msgLocation}.role`,
|
|
15826
|
+
message: `Invalid role '${role}'. Must be one of: ${validRoles.join(", ")}`
|
|
15827
|
+
});
|
|
15828
|
+
}
|
|
15829
|
+
const content = message["content"];
|
|
15830
|
+
if (typeof content === "string") {
|
|
15831
|
+
} else if (Array.isArray(content)) {
|
|
15832
|
+
for (let j2 = 0; j2 < content.length; j2++) {
|
|
15833
|
+
const contentItem = content[j2];
|
|
15834
|
+
const contentLocation = `${msgLocation}.content[${j2}]`;
|
|
15835
|
+
if (typeof contentItem === "string") {
|
|
15836
|
+
} else if (isObject(contentItem)) {
|
|
15837
|
+
const type = contentItem["type"];
|
|
15838
|
+
if (typeof type !== "string") {
|
|
15839
|
+
errors.push({
|
|
15840
|
+
severity: "error",
|
|
15841
|
+
filePath,
|
|
15842
|
+
location: `${contentLocation}.type`,
|
|
15843
|
+
message: "Content object must have a 'type' field"
|
|
15844
|
+
});
|
|
15845
|
+
}
|
|
15846
|
+
if (type === "text") {
|
|
15847
|
+
const value = contentItem["value"];
|
|
15848
|
+
if (typeof value !== "string") {
|
|
15849
|
+
errors.push({
|
|
15850
|
+
severity: "error",
|
|
15851
|
+
filePath,
|
|
15852
|
+
location: `${contentLocation}.value`,
|
|
15853
|
+
message: "Content with type 'text' must have a 'value' field"
|
|
15854
|
+
});
|
|
15855
|
+
}
|
|
15856
|
+
}
|
|
15857
|
+
} else {
|
|
15858
|
+
errors.push({
|
|
15859
|
+
severity: "error",
|
|
15860
|
+
filePath,
|
|
15861
|
+
location: contentLocation,
|
|
15862
|
+
message: "Content array items must be strings or objects"
|
|
15863
|
+
});
|
|
15864
|
+
}
|
|
15889
15865
|
}
|
|
15890
|
-
|
|
15866
|
+
} else {
|
|
15867
|
+
errors.push({
|
|
15868
|
+
severity: "error",
|
|
15869
|
+
filePath,
|
|
15870
|
+
location: `${msgLocation}.content`,
|
|
15871
|
+
message: "Missing or invalid 'content' field (must be a string or array)"
|
|
15872
|
+
});
|
|
15891
15873
|
}
|
|
15892
15874
|
}
|
|
15893
|
-
return fallback;
|
|
15894
15875
|
}
|
|
15895
|
-
function
|
|
15896
|
-
|
|
15897
|
-
const baseName = "eval";
|
|
15898
|
-
const extension = getDefaultExtension(format);
|
|
15899
|
-
return path13.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
15876
|
+
function isObject2(value) {
|
|
15877
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
15900
15878
|
}
|
|
15901
|
-
|
|
15902
|
-
|
|
15903
|
-
|
|
15904
|
-
|
|
15905
|
-
|
|
15906
|
-
|
|
15879
|
+
var CLI_PLACEHOLDERS2 = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
15880
|
+
var COMMON_SETTINGS = /* @__PURE__ */ new Set([
|
|
15881
|
+
"provider_batching",
|
|
15882
|
+
"providerBatching"
|
|
15883
|
+
]);
|
|
15884
|
+
var AZURE_SETTINGS = /* @__PURE__ */ new Set([
|
|
15885
|
+
...COMMON_SETTINGS,
|
|
15886
|
+
"endpoint",
|
|
15887
|
+
"resource",
|
|
15888
|
+
"resourceName",
|
|
15889
|
+
"api_key",
|
|
15890
|
+
"apiKey",
|
|
15891
|
+
"deployment",
|
|
15892
|
+
"deploymentName",
|
|
15893
|
+
"model",
|
|
15894
|
+
"version",
|
|
15895
|
+
"api_version",
|
|
15896
|
+
"temperature",
|
|
15897
|
+
"max_output_tokens",
|
|
15898
|
+
"maxTokens"
|
|
15899
|
+
]);
|
|
15900
|
+
var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
|
|
15901
|
+
...COMMON_SETTINGS,
|
|
15902
|
+
"api_key",
|
|
15903
|
+
"apiKey",
|
|
15904
|
+
"model",
|
|
15905
|
+
"deployment",
|
|
15906
|
+
"variant",
|
|
15907
|
+
"temperature",
|
|
15908
|
+
"max_output_tokens",
|
|
15909
|
+
"maxTokens",
|
|
15910
|
+
"thinking_budget",
|
|
15911
|
+
"thinkingBudget"
|
|
15912
|
+
]);
|
|
15913
|
+
var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
|
|
15914
|
+
...COMMON_SETTINGS,
|
|
15915
|
+
"api_key",
|
|
15916
|
+
"apiKey",
|
|
15917
|
+
"model",
|
|
15918
|
+
"deployment",
|
|
15919
|
+
"variant",
|
|
15920
|
+
"temperature",
|
|
15921
|
+
"max_output_tokens",
|
|
15922
|
+
"maxTokens"
|
|
15923
|
+
]);
|
|
15924
|
+
var CODEX_SETTINGS = /* @__PURE__ */ new Set([
|
|
15925
|
+
...COMMON_SETTINGS,
|
|
15926
|
+
"executable",
|
|
15927
|
+
"command",
|
|
15928
|
+
"binary",
|
|
15929
|
+
"args",
|
|
15930
|
+
"arguments",
|
|
15931
|
+
"cwd",
|
|
15932
|
+
"timeout_seconds",
|
|
15933
|
+
"timeoutSeconds",
|
|
15934
|
+
"log_dir",
|
|
15935
|
+
"logDir",
|
|
15936
|
+
"log_directory",
|
|
15937
|
+
"logDirectory",
|
|
15938
|
+
"log_format",
|
|
15939
|
+
"logFormat",
|
|
15940
|
+
"log_output_format",
|
|
15941
|
+
"logOutputFormat"
|
|
15942
|
+
]);
|
|
15943
|
+
var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
|
|
15944
|
+
...COMMON_SETTINGS,
|
|
15945
|
+
"workspace_template",
|
|
15946
|
+
"workspaceTemplate",
|
|
15947
|
+
"vscode_cmd",
|
|
15948
|
+
"command",
|
|
15949
|
+
"wait",
|
|
15950
|
+
"dry_run",
|
|
15951
|
+
"dryRun",
|
|
15952
|
+
"subagent_root",
|
|
15953
|
+
"subagentRoot"
|
|
15954
|
+
]);
|
|
15955
|
+
var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
15956
|
+
...COMMON_SETTINGS,
|
|
15957
|
+
"response",
|
|
15958
|
+
"delayMs",
|
|
15959
|
+
"delayMinMs",
|
|
15960
|
+
"delayMaxMs"
|
|
15961
|
+
]);
|
|
15962
|
+
var CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
15963
|
+
...COMMON_SETTINGS,
|
|
15964
|
+
"command_template",
|
|
15965
|
+
"commandTemplate",
|
|
15966
|
+
"files_format",
|
|
15967
|
+
"filesFormat",
|
|
15968
|
+
"attachments_format",
|
|
15969
|
+
"attachmentsFormat",
|
|
15970
|
+
"cwd",
|
|
15971
|
+
"env",
|
|
15972
|
+
"timeout_seconds",
|
|
15973
|
+
"timeoutSeconds",
|
|
15974
|
+
"healthcheck"
|
|
15975
|
+
]);
|
|
15976
|
+
function getKnownSettings(provider) {
|
|
15977
|
+
const normalizedProvider = provider.toLowerCase();
|
|
15978
|
+
switch (normalizedProvider) {
|
|
15979
|
+
case "azure":
|
|
15980
|
+
case "azure-openai":
|
|
15981
|
+
return AZURE_SETTINGS;
|
|
15982
|
+
case "anthropic":
|
|
15983
|
+
return ANTHROPIC_SETTINGS;
|
|
15984
|
+
case "gemini":
|
|
15985
|
+
case "google":
|
|
15986
|
+
case "google-gemini":
|
|
15987
|
+
return GEMINI_SETTINGS;
|
|
15988
|
+
case "codex":
|
|
15989
|
+
case "codex-cli":
|
|
15990
|
+
return CODEX_SETTINGS;
|
|
15991
|
+
case "vscode":
|
|
15992
|
+
case "vscode-insiders":
|
|
15993
|
+
return VSCODE_SETTINGS;
|
|
15994
|
+
case "mock":
|
|
15995
|
+
return MOCK_SETTINGS;
|
|
15996
|
+
case "cli":
|
|
15997
|
+
return CLI_SETTINGS;
|
|
15998
|
+
default:
|
|
15999
|
+
return null;
|
|
15907
16000
|
}
|
|
15908
|
-
return path13.join(cwd, ".agentv", "prompts");
|
|
15909
|
-
}
|
|
15910
|
-
function createEvaluationCache() {
|
|
15911
|
-
const store = /* @__PURE__ */ new Map();
|
|
15912
|
-
return {
|
|
15913
|
-
async get(key2) {
|
|
15914
|
-
return store.get(key2);
|
|
15915
|
-
},
|
|
15916
|
-
async set(key2, value) {
|
|
15917
|
-
store.set(key2, value);
|
|
15918
|
-
}
|
|
15919
|
-
};
|
|
15920
|
-
}
|
|
15921
|
-
function createProgressReporter(maxWorkers) {
|
|
15922
|
-
const display = new ProgressDisplay(maxWorkers);
|
|
15923
|
-
return {
|
|
15924
|
-
isInteractive: display.isInteractiveMode(),
|
|
15925
|
-
start: () => display.start(),
|
|
15926
|
-
setTotal: (total) => display.setTotalTests(total),
|
|
15927
|
-
update: (workerId, progress) => display.updateWorker({ ...progress, workerId }),
|
|
15928
|
-
finish: () => display.finish(),
|
|
15929
|
-
addLogPaths: (paths) => display.addLogPaths(paths)
|
|
15930
|
-
};
|
|
15931
|
-
}
|
|
15932
|
-
function makeEvalKey(testFilePath, evalId) {
|
|
15933
|
-
return `${path13.resolve(testFilePath)}::${evalId}`;
|
|
15934
16001
|
}
|
|
15935
|
-
function
|
|
15936
|
-
const
|
|
15937
|
-
|
|
15938
|
-
|
|
15939
|
-
|
|
15940
|
-
|
|
15941
|
-
|
|
15942
|
-
|
|
15943
|
-
|
|
15944
|
-
|
|
15945
|
-
|
|
15946
|
-
|
|
16002
|
+
function validateUnknownSettings(settings, provider, absolutePath, location, errors) {
|
|
16003
|
+
const knownSettings = getKnownSettings(provider);
|
|
16004
|
+
if (!knownSettings) {
|
|
16005
|
+
return;
|
|
16006
|
+
}
|
|
16007
|
+
for (const key2 of Object.keys(settings)) {
|
|
16008
|
+
if (!knownSettings.has(key2)) {
|
|
16009
|
+
errors.push({
|
|
16010
|
+
severity: "warning",
|
|
16011
|
+
filePath: absolutePath,
|
|
16012
|
+
location: `${location}.${key2}`,
|
|
16013
|
+
message: `Unknown setting '${key2}' for ${provider} provider. This property will be ignored.`
|
|
16014
|
+
});
|
|
15947
16015
|
}
|
|
15948
|
-
}
|
|
16016
|
+
}
|
|
15949
16017
|
}
|
|
15950
|
-
async function
|
|
15951
|
-
const
|
|
15952
|
-
|
|
15953
|
-
|
|
15954
|
-
|
|
15955
|
-
|
|
15956
|
-
|
|
15957
|
-
})
|
|
15958
|
-
|
|
15959
|
-
|
|
15960
|
-
|
|
15961
|
-
|
|
15962
|
-
explicitTargetsPath: options.targetsPath,
|
|
15963
|
-
cliTargetName: options.target,
|
|
15964
|
-
dryRun: options.dryRun,
|
|
15965
|
-
dryRunDelay: options.dryRunDelay,
|
|
15966
|
-
dryRunDelayMin: options.dryRunDelayMin,
|
|
15967
|
-
dryRunDelayMax: options.dryRunDelayMax,
|
|
15968
|
-
env: process.env
|
|
15969
|
-
});
|
|
15970
|
-
const providerLabel = options.dryRun ? `${selection.resolvedTarget.kind} (dry-run)` : selection.resolvedTarget.kind;
|
|
15971
|
-
const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
|
|
15972
|
-
const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose });
|
|
15973
|
-
const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
|
|
15974
|
-
return { evalIds: filteredIds, selection, inlineTargetLabel };
|
|
15975
|
-
}
|
|
15976
|
-
async function runWithLimit(items, limit, task) {
|
|
15977
|
-
const safeLimit = Math.max(1, limit);
|
|
15978
|
-
let index = 0;
|
|
15979
|
-
const workers = Array.from({ length: safeLimit }, async () => {
|
|
15980
|
-
while (index < items.length) {
|
|
15981
|
-
const current = items[index];
|
|
15982
|
-
index += 1;
|
|
15983
|
-
await task(current);
|
|
15984
|
-
}
|
|
15985
|
-
});
|
|
15986
|
-
await Promise.all(workers);
|
|
15987
|
-
}
|
|
15988
|
-
async function runSingleEvalFile(params) {
|
|
15989
|
-
const {
|
|
15990
|
-
testFilePath,
|
|
15991
|
-
cwd,
|
|
15992
|
-
repoRoot,
|
|
15993
|
-
options,
|
|
15994
|
-
outputWriter,
|
|
15995
|
-
cache,
|
|
15996
|
-
evaluationRunner,
|
|
15997
|
-
workersOverride,
|
|
15998
|
-
progressReporter,
|
|
15999
|
-
seenEvalCases,
|
|
16000
|
-
displayIdTracker,
|
|
16001
|
-
selection,
|
|
16002
|
-
inlineTargetLabel
|
|
16003
|
-
} = params;
|
|
16004
|
-
await ensureFileExists(testFilePath, "Test file");
|
|
16005
|
-
const resolvedTargetSelection = selection;
|
|
16006
|
-
const providerLabel = options.dryRun ? `${resolvedTargetSelection.resolvedTarget.kind} (dry-run)` : resolvedTargetSelection.resolvedTarget.kind;
|
|
16007
|
-
const targetMessage = options.verbose ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} [provider=${providerLabel}] via ${resolvedTargetSelection.targetsFilePath}` : `Using target: ${inlineTargetLabel}`;
|
|
16008
|
-
if (!progressReporter.isInteractive || options.verbose) {
|
|
16009
|
-
console.log(targetMessage);
|
|
16010
|
-
}
|
|
16011
|
-
const promptDumpDir = resolvePromptDirectory(options.dumpPrompts, cwd);
|
|
16012
|
-
if (promptDumpDir) {
|
|
16013
|
-
await mkdir6(promptDumpDir, { recursive: true });
|
|
16014
|
-
if (options.verbose) {
|
|
16015
|
-
console.log(`Prompt dumps enabled at: ${promptDumpDir}`);
|
|
16016
|
-
}
|
|
16017
|
-
}
|
|
16018
|
-
const agentTimeoutMs = Math.max(0, options.agentTimeoutSeconds) * 1e3;
|
|
16019
|
-
const workerPreference = workersOverride ?? options.workers;
|
|
16020
|
-
let resolvedWorkers = workerPreference ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
|
|
16021
|
-
if (resolvedWorkers < 1 || resolvedWorkers > 50) {
|
|
16022
|
-
throw new Error(`Workers must be between 1 and 50, got: ${resolvedWorkers}`);
|
|
16023
|
-
}
|
|
16024
|
-
const isVSCodeProvider = ["vscode", "vscode-insiders"].includes(
|
|
16025
|
-
resolvedTargetSelection.resolvedTarget.kind
|
|
16026
|
-
);
|
|
16027
|
-
if (isVSCodeProvider && resolvedWorkers > 1) {
|
|
16028
|
-
console.warn(`Warning: VSCode providers require window focus. Limiting workers from ${resolvedWorkers} to 1 to prevent race conditions.`);
|
|
16029
|
-
resolvedWorkers = 1;
|
|
16030
|
-
}
|
|
16031
|
-
if (options.verbose) {
|
|
16032
|
-
const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
|
|
16033
|
-
console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
|
|
16034
|
-
}
|
|
16035
|
-
if (isVSCodeProvider && !options.dryRun) {
|
|
16036
|
-
await ensureVSCodeSubagents({
|
|
16037
|
-
kind: resolvedTargetSelection.resolvedTarget.kind,
|
|
16038
|
-
count: resolvedWorkers,
|
|
16039
|
-
verbose: options.verbose
|
|
16018
|
+
async function validateTargetsFile(filePath) {
|
|
16019
|
+
const errors = [];
|
|
16020
|
+
const absolutePath = path23.resolve(filePath);
|
|
16021
|
+
let parsed;
|
|
16022
|
+
try {
|
|
16023
|
+
const content = await readFile32(absolutePath, "utf8");
|
|
16024
|
+
parsed = parse32(content);
|
|
16025
|
+
} catch (error) {
|
|
16026
|
+
errors.push({
|
|
16027
|
+
severity: "error",
|
|
16028
|
+
filePath: absolutePath,
|
|
16029
|
+
message: `Failed to parse YAML: ${error.message}`
|
|
16040
16030
|
});
|
|
16031
|
+
return {
|
|
16032
|
+
valid: false,
|
|
16033
|
+
filePath: absolutePath,
|
|
16034
|
+
fileType: "targets",
|
|
16035
|
+
errors
|
|
16036
|
+
};
|
|
16041
16037
|
}
|
|
16042
|
-
|
|
16043
|
-
|
|
16044
|
-
|
|
16045
|
-
|
|
16046
|
-
|
|
16047
|
-
|
|
16048
|
-
|
|
16049
|
-
agentTimeoutMs,
|
|
16050
|
-
promptDumpDir,
|
|
16051
|
-
cache,
|
|
16052
|
-
useCache: options.cache,
|
|
16053
|
-
evalId: options.evalId,
|
|
16054
|
-
verbose: options.verbose,
|
|
16055
|
-
maxConcurrency: resolvedWorkers,
|
|
16056
|
-
onResult: async (result) => {
|
|
16057
|
-
await outputWriter.append(result);
|
|
16058
|
-
},
|
|
16059
|
-
onProgress: async (event) => {
|
|
16060
|
-
const evalKey = makeEvalKey(testFilePath, event.evalId);
|
|
16061
|
-
if (event.status === "pending" && !seenEvalCases.has(evalKey)) {
|
|
16062
|
-
seenEvalCases.add(evalKey);
|
|
16063
|
-
progressReporter.setTotal(seenEvalCases.size);
|
|
16064
|
-
}
|
|
16065
|
-
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
16066
|
-
progressReporter.update(displayId, {
|
|
16067
|
-
workerId: displayId,
|
|
16068
|
-
evalId: event.evalId,
|
|
16069
|
-
status: event.status,
|
|
16070
|
-
startedAt: event.startedAt,
|
|
16071
|
-
completedAt: event.completedAt,
|
|
16072
|
-
error: event.error,
|
|
16073
|
-
targetLabel: inlineTargetLabel
|
|
16038
|
+
function validateCliSettings(settings, absolutePath2, location, errors2) {
|
|
16039
|
+
if (!isObject2(settings)) {
|
|
16040
|
+
errors2.push({
|
|
16041
|
+
severity: "error",
|
|
16042
|
+
filePath: absolutePath2,
|
|
16043
|
+
location,
|
|
16044
|
+
message: "CLI provider requires a 'settings' object"
|
|
16074
16045
|
});
|
|
16075
|
-
}
|
|
16076
|
-
});
|
|
16077
|
-
return { results: [...results], promptDumpDir };
|
|
16078
|
-
}
|
|
16079
|
-
async function runEvalCommand(input) {
|
|
16080
|
-
const options = normalizeOptions(input.rawOptions);
|
|
16081
|
-
const cwd = process.cwd();
|
|
16082
|
-
const repoRoot = await findRepoRoot(cwd);
|
|
16083
|
-
if (options.verbose) {
|
|
16084
|
-
console.log(`Repository root: ${repoRoot}`);
|
|
16085
|
-
}
|
|
16086
|
-
const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
|
|
16087
|
-
console.log(`Output path: ${outputPath}`);
|
|
16088
|
-
const outputWriter = await createOutputWriter(outputPath, options.format);
|
|
16089
|
-
const cache = options.cache ? createEvaluationCache() : void 0;
|
|
16090
|
-
const evaluationRunner = await resolveEvaluationRunner();
|
|
16091
|
-
const allResults = [];
|
|
16092
|
-
let lastPromptDumpDir;
|
|
16093
|
-
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
16094
|
-
const resolvedTestFiles = input.testFiles.map((file) => path13.resolve(file));
|
|
16095
|
-
const displayIdTracker = createDisplayIdTracker();
|
|
16096
|
-
const totalWorkers = options.workers ?? DEFAULT_WORKERS;
|
|
16097
|
-
const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
|
|
16098
|
-
const perFileWorkers = options.workers ? Math.max(1, Math.floor(totalWorkers / fileConcurrency)) : void 0;
|
|
16099
|
-
const fileMetadata = /* @__PURE__ */ new Map();
|
|
16100
|
-
for (const testFilePath of resolvedTestFiles) {
|
|
16101
|
-
const meta = await prepareFileMetadata({
|
|
16102
|
-
testFilePath,
|
|
16103
|
-
repoRoot,
|
|
16104
|
-
cwd,
|
|
16105
|
-
options
|
|
16106
|
-
});
|
|
16107
|
-
fileMetadata.set(testFilePath, meta);
|
|
16108
|
-
}
|
|
16109
|
-
const totalEvalCount = Array.from(fileMetadata.values()).reduce(
|
|
16110
|
-
(sum, meta) => sum + meta.evalIds.length,
|
|
16111
|
-
0
|
|
16112
|
-
);
|
|
16113
|
-
if (totalEvalCount === 0) {
|
|
16114
|
-
throw new Error("No eval cases matched the provided filters.");
|
|
16115
|
-
}
|
|
16116
|
-
const progressReporter = createProgressReporter(totalWorkers);
|
|
16117
|
-
progressReporter.start();
|
|
16118
|
-
progressReporter.setTotal(totalEvalCount);
|
|
16119
|
-
const seenCodexLogPaths = /* @__PURE__ */ new Set();
|
|
16120
|
-
const unsubscribeCodexLogs = subscribeToCodexLogEntries((entry) => {
|
|
16121
|
-
if (!entry.filePath || seenCodexLogPaths.has(entry.filePath)) {
|
|
16122
16046
|
return;
|
|
16123
16047
|
}
|
|
16124
|
-
|
|
16125
|
-
|
|
16126
|
-
|
|
16127
|
-
|
|
16128
|
-
|
|
16129
|
-
|
|
16130
|
-
|
|
16131
|
-
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
16132
|
-
progressReporter.update(displayId, {
|
|
16133
|
-
workerId: displayId,
|
|
16134
|
-
evalId,
|
|
16135
|
-
status: "pending",
|
|
16136
|
-
targetLabel: meta.inlineTargetLabel
|
|
16048
|
+
const commandTemplate = settings["command_template"] ?? settings["commandTemplate"];
|
|
16049
|
+
if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
|
|
16050
|
+
errors2.push({
|
|
16051
|
+
severity: "error",
|
|
16052
|
+
filePath: absolutePath2,
|
|
16053
|
+
location: `${location}.commandTemplate`,
|
|
16054
|
+
message: "CLI provider requires 'commandTemplate' as a non-empty string"
|
|
16137
16055
|
});
|
|
16056
|
+
} else {
|
|
16057
|
+
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16138
16058
|
}
|
|
16139
|
-
|
|
16140
|
-
|
|
16141
|
-
|
|
16142
|
-
|
|
16143
|
-
|
|
16144
|
-
|
|
16145
|
-
|
|
16146
|
-
const result = await runSingleEvalFile({
|
|
16147
|
-
testFilePath,
|
|
16148
|
-
cwd,
|
|
16149
|
-
repoRoot,
|
|
16150
|
-
options,
|
|
16151
|
-
outputWriter,
|
|
16152
|
-
cache,
|
|
16153
|
-
evaluationRunner,
|
|
16154
|
-
workersOverride: perFileWorkers,
|
|
16155
|
-
progressReporter,
|
|
16156
|
-
seenEvalCases,
|
|
16157
|
-
displayIdTracker,
|
|
16158
|
-
selection: targetPrep.selection,
|
|
16159
|
-
inlineTargetLabel: targetPrep.inlineTargetLabel
|
|
16059
|
+
const attachmentsFormat = settings["attachments_format"] ?? settings["attachmentsFormat"];
|
|
16060
|
+
if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
|
|
16061
|
+
errors2.push({
|
|
16062
|
+
severity: "error",
|
|
16063
|
+
filePath: absolutePath2,
|
|
16064
|
+
location: `${location}.attachmentsFormat`,
|
|
16065
|
+
message: "'attachmentsFormat' must be a string when provided"
|
|
16160
16066
|
});
|
|
16161
|
-
allResults.push(...result.results);
|
|
16162
|
-
if (result.promptDumpDir) {
|
|
16163
|
-
lastPromptDumpDir = result.promptDumpDir;
|
|
16164
|
-
}
|
|
16165
|
-
});
|
|
16166
|
-
progressReporter.finish();
|
|
16167
|
-
const summary = calculateEvaluationSummary(allResults);
|
|
16168
|
-
console.log(formatEvaluationSummary(summary));
|
|
16169
|
-
if (allResults.length > 0) {
|
|
16170
|
-
console.log(`
|
|
16171
|
-
Results written to: ${outputPath}`);
|
|
16172
16067
|
}
|
|
16173
|
-
|
|
16174
|
-
|
|
16068
|
+
const filesFormat = settings["files_format"] ?? settings["filesFormat"];
|
|
16069
|
+
if (filesFormat !== void 0 && typeof filesFormat !== "string") {
|
|
16070
|
+
errors2.push({
|
|
16071
|
+
severity: "error",
|
|
16072
|
+
filePath: absolutePath2,
|
|
16073
|
+
location: `${location}.filesFormat`,
|
|
16074
|
+
message: "'filesFormat' must be a string when provided"
|
|
16075
|
+
});
|
|
16175
16076
|
}
|
|
16176
|
-
|
|
16177
|
-
|
|
16178
|
-
|
|
16179
|
-
|
|
16180
|
-
|
|
16181
|
-
|
|
16182
|
-
|
|
16183
|
-
|
|
16184
|
-
return runEvaluation;
|
|
16185
|
-
}
|
|
16186
|
-
const resolved = path13.isAbsolute(overridePath) ? overridePath : path13.resolve(process.cwd(), overridePath);
|
|
16187
|
-
const moduleUrl = pathToFileURL(resolved).href;
|
|
16188
|
-
const mod = await import(moduleUrl);
|
|
16189
|
-
const candidate = mod.runEvaluation;
|
|
16190
|
-
if (typeof candidate !== "function") {
|
|
16191
|
-
throw new Error(
|
|
16192
|
-
`Module '${resolved}' must export a 'runEvaluation' function to override the default implementation`
|
|
16193
|
-
);
|
|
16194
|
-
}
|
|
16195
|
-
return candidate;
|
|
16196
|
-
}
|
|
16197
|
-
|
|
16198
|
-
// src/commands/eval/index.ts
|
|
16199
|
-
function parseInteger(value, fallback) {
|
|
16200
|
-
const parsed = Number.parseInt(value, 10);
|
|
16201
|
-
if (Number.isNaN(parsed)) {
|
|
16202
|
-
return fallback;
|
|
16203
|
-
}
|
|
16204
|
-
return parsed;
|
|
16205
|
-
}
|
|
16206
|
-
function registerEvalCommand(program) {
|
|
16207
|
-
program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
|
|
16208
|
-
"--workers <count>",
|
|
16209
|
-
"Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
16210
|
-
(value) => parseInteger(value, 1)
|
|
16211
|
-
).option("--out <path>", "Write results to the specified path").option(
|
|
16212
|
-
"--output-format <format>",
|
|
16213
|
-
"Output format: 'jsonl' or 'yaml' (default: jsonl)",
|
|
16214
|
-
"jsonl"
|
|
16215
|
-
).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
|
|
16216
|
-
"--dry-run-delay <ms>",
|
|
16217
|
-
"Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
|
|
16218
|
-
(value) => parseInteger(value, 0),
|
|
16219
|
-
0
|
|
16220
|
-
).option(
|
|
16221
|
-
"--dry-run-delay-min <ms>",
|
|
16222
|
-
"Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
|
|
16223
|
-
(value) => parseInteger(value, 0),
|
|
16224
|
-
0
|
|
16225
|
-
).option(
|
|
16226
|
-
"--dry-run-delay-max <ms>",
|
|
16227
|
-
"Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
|
|
16228
|
-
(value) => parseInteger(value, 0),
|
|
16229
|
-
0
|
|
16230
|
-
).option(
|
|
16231
|
-
"--agent-timeout <seconds>",
|
|
16232
|
-
"Timeout in seconds for provider responses (default: 120)",
|
|
16233
|
-
(value) => parseInteger(value, 120),
|
|
16234
|
-
120
|
|
16235
|
-
).option(
|
|
16236
|
-
"--max-retries <count>",
|
|
16237
|
-
"Retry count for timeout recoveries (default: 2)",
|
|
16238
|
-
(value) => parseInteger(value, 2),
|
|
16239
|
-
2
|
|
16240
|
-
).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
|
|
16241
|
-
"--dump-prompts [dir]",
|
|
16242
|
-
"Persist prompt payloads for debugging (optional custom directory)"
|
|
16243
|
-
).action(async (evalPaths, rawOptions) => {
|
|
16244
|
-
const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
|
|
16245
|
-
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
16246
|
-
});
|
|
16247
|
-
return program;
|
|
16248
|
-
}
|
|
16249
|
-
async function resolveEvalPaths(evalPaths, cwd) {
|
|
16250
|
-
const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
|
|
16251
|
-
if (normalizedInputs.length === 0) {
|
|
16252
|
-
throw new Error("No eval paths provided.");
|
|
16253
|
-
}
|
|
16254
|
-
const unmatched = [];
|
|
16255
|
-
const results = /* @__PURE__ */ new Set();
|
|
16256
|
-
for (const pattern of normalizedInputs) {
|
|
16257
|
-
const candidatePath = path14.isAbsolute(pattern) ? path14.normalize(pattern) : path14.resolve(cwd, pattern);
|
|
16258
|
-
try {
|
|
16259
|
-
const stats = await stat3(candidatePath);
|
|
16260
|
-
if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
|
|
16261
|
-
results.add(candidatePath);
|
|
16262
|
-
continue;
|
|
16263
|
-
}
|
|
16264
|
-
} catch {
|
|
16265
|
-
}
|
|
16266
|
-
const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
|
|
16267
|
-
const matches = await fg(globPattern, {
|
|
16268
|
-
cwd,
|
|
16269
|
-
absolute: true,
|
|
16270
|
-
onlyFiles: true,
|
|
16271
|
-
unique: true,
|
|
16272
|
-
dot: true,
|
|
16273
|
-
followSymbolicLinks: true
|
|
16274
|
-
});
|
|
16275
|
-
const yamlMatches = matches.filter((filePath) => /\.ya?ml$/i.test(filePath));
|
|
16276
|
-
if (yamlMatches.length === 0) {
|
|
16277
|
-
unmatched.push(pattern);
|
|
16278
|
-
continue;
|
|
16279
|
-
}
|
|
16280
|
-
yamlMatches.forEach((filePath) => results.add(path14.normalize(filePath)));
|
|
16281
|
-
}
|
|
16282
|
-
if (unmatched.length > 0) {
|
|
16283
|
-
throw new Error(
|
|
16284
|
-
`No eval files matched: ${unmatched.join(
|
|
16285
|
-
", "
|
|
16286
|
-
)}. Provide YAML paths or globs (e.g., "evals/**/*.yaml").`
|
|
16287
|
-
);
|
|
16288
|
-
}
|
|
16289
|
-
const sorted = Array.from(results);
|
|
16290
|
-
sorted.sort();
|
|
16291
|
-
return sorted;
|
|
16292
|
-
}
|
|
16293
|
-
|
|
16294
|
-
// src/commands/init/index.ts
|
|
16295
|
-
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
16296
|
-
import path16 from "node:path";
|
|
16297
|
-
import * as readline from "node:readline/promises";
|
|
16298
|
-
|
|
16299
|
-
// src/templates/index.ts
|
|
16300
|
-
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
16301
|
-
import path15 from "node:path";
|
|
16302
|
-
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
16303
|
-
var TemplateManager = class {
|
|
16304
|
-
static getGithubTemplates() {
|
|
16305
|
-
return this.getTemplatesFromDir("github");
|
|
16306
|
-
}
|
|
16307
|
-
static getAgentvTemplates() {
|
|
16308
|
-
return this.getTemplatesFromDir("agentv");
|
|
16309
|
-
}
|
|
16310
|
-
static getTemplatesFromDir(subdir) {
|
|
16311
|
-
const currentDir = path15.dirname(fileURLToPath2(import.meta.url));
|
|
16312
|
-
let templatesDir;
|
|
16313
|
-
if (currentDir.includes(path15.sep + "dist")) {
|
|
16314
|
-
templatesDir = path15.join(currentDir, "templates", subdir);
|
|
16315
|
-
} else {
|
|
16316
|
-
templatesDir = path15.join(currentDir, subdir);
|
|
16077
|
+
const cwd = settings["cwd"];
|
|
16078
|
+
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16079
|
+
errors2.push({
|
|
16080
|
+
severity: "error",
|
|
16081
|
+
filePath: absolutePath2,
|
|
16082
|
+
location: `${location}.cwd`,
|
|
16083
|
+
message: "'cwd' must be a string when provided"
|
|
16084
|
+
});
|
|
16317
16085
|
}
|
|
16318
|
-
|
|
16319
|
-
|
|
16320
|
-
|
|
16321
|
-
|
|
16322
|
-
|
|
16323
|
-
|
|
16324
|
-
|
|
16325
|
-
|
|
16326
|
-
|
|
16327
|
-
if (stat5.isDirectory()) {
|
|
16328
|
-
templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
|
|
16329
|
-
} else {
|
|
16330
|
-
const content = readFileSync(fullPath, "utf-8");
|
|
16331
|
-
templates.push({
|
|
16332
|
-
path: entryRelativePath.split(path15.sep).join("/"),
|
|
16333
|
-
// Normalize to forward slashes
|
|
16334
|
-
content
|
|
16086
|
+
const timeoutSeconds = settings["timeout_seconds"] ?? settings["timeoutSeconds"];
|
|
16087
|
+
if (timeoutSeconds !== void 0) {
|
|
16088
|
+
const numericTimeout = Number(timeoutSeconds);
|
|
16089
|
+
if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
|
|
16090
|
+
errors2.push({
|
|
16091
|
+
severity: "error",
|
|
16092
|
+
filePath: absolutePath2,
|
|
16093
|
+
location: `${location}.timeoutSeconds`,
|
|
16094
|
+
message: "'timeoutSeconds' must be a positive number when provided"
|
|
16335
16095
|
});
|
|
16336
16096
|
}
|
|
16337
16097
|
}
|
|
16338
|
-
|
|
16339
|
-
|
|
16340
|
-
|
|
16341
|
-
|
|
16342
|
-
|
|
16343
|
-
|
|
16344
|
-
|
|
16345
|
-
|
|
16346
|
-
|
|
16347
|
-
|
|
16348
|
-
|
|
16349
|
-
|
|
16350
|
-
|
|
16351
|
-
|
|
16352
|
-
|
|
16353
|
-
|
|
16354
|
-
}
|
|
16355
|
-
|
|
16356
|
-
|
|
16357
|
-
|
|
16358
|
-
const agentvDir = path16.join(targetPath, ".agentv");
|
|
16359
|
-
const githubTemplates = TemplateManager.getGithubTemplates();
|
|
16360
|
-
const agentvTemplates = TemplateManager.getAgentvTemplates();
|
|
16361
|
-
const existingFiles = [];
|
|
16362
|
-
if (existsSync(githubDir)) {
|
|
16363
|
-
for (const template of githubTemplates) {
|
|
16364
|
-
const targetFilePath = path16.join(githubDir, template.path);
|
|
16365
|
-
if (existsSync(targetFilePath)) {
|
|
16366
|
-
existingFiles.push(path16.relative(targetPath, targetFilePath));
|
|
16098
|
+
const envOverrides = settings["env"];
|
|
16099
|
+
if (envOverrides !== void 0) {
|
|
16100
|
+
if (!isObject2(envOverrides)) {
|
|
16101
|
+
errors2.push({
|
|
16102
|
+
severity: "error",
|
|
16103
|
+
filePath: absolutePath2,
|
|
16104
|
+
location: `${location}.env`,
|
|
16105
|
+
message: "'env' must be an object with string values"
|
|
16106
|
+
});
|
|
16107
|
+
} else {
|
|
16108
|
+
for (const [key2, value] of Object.entries(envOverrides)) {
|
|
16109
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
16110
|
+
errors2.push({
|
|
16111
|
+
severity: "error",
|
|
16112
|
+
filePath: absolutePath2,
|
|
16113
|
+
location: `${location}.env.${key2}`,
|
|
16114
|
+
message: `Environment override '${key2}' must be a non-empty string`
|
|
16115
|
+
});
|
|
16116
|
+
}
|
|
16117
|
+
}
|
|
16367
16118
|
}
|
|
16368
16119
|
}
|
|
16369
|
-
|
|
16370
|
-
|
|
16371
|
-
|
|
16372
|
-
const targetFilePath = path16.join(agentvDir, template.path);
|
|
16373
|
-
if (existsSync(targetFilePath)) {
|
|
16374
|
-
existingFiles.push(path16.relative(targetPath, targetFilePath));
|
|
16375
|
-
}
|
|
16120
|
+
const healthcheck = settings["healthcheck"];
|
|
16121
|
+
if (healthcheck !== void 0) {
|
|
16122
|
+
validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
|
|
16376
16123
|
}
|
|
16377
16124
|
}
|
|
16378
|
-
|
|
16379
|
-
|
|
16380
|
-
|
|
16381
|
-
|
|
16382
|
-
|
|
16383
|
-
|
|
16384
|
-
|
|
16125
|
+
function validateCliHealthcheck(healthcheck, absolutePath2, location, errors2) {
|
|
16126
|
+
if (!isObject2(healthcheck)) {
|
|
16127
|
+
errors2.push({
|
|
16128
|
+
severity: "error",
|
|
16129
|
+
filePath: absolutePath2,
|
|
16130
|
+
location,
|
|
16131
|
+
message: "'healthcheck' must be an object when provided"
|
|
16132
|
+
});
|
|
16385
16133
|
return;
|
|
16386
16134
|
}
|
|
16387
|
-
|
|
16388
|
-
|
|
16389
|
-
|
|
16390
|
-
|
|
16391
|
-
|
|
16392
|
-
|
|
16393
|
-
|
|
16394
|
-
|
|
16395
|
-
|
|
16396
|
-
const targetFilePath = path16.join(githubDir, template.path);
|
|
16397
|
-
const targetDirPath = path16.dirname(targetFilePath);
|
|
16398
|
-
if (!existsSync(targetDirPath)) {
|
|
16399
|
-
mkdirSync(targetDirPath, { recursive: true });
|
|
16135
|
+
const type = healthcheck["type"];
|
|
16136
|
+
if (type !== "http" && type !== "command") {
|
|
16137
|
+
errors2.push({
|
|
16138
|
+
severity: "error",
|
|
16139
|
+
filePath: absolutePath2,
|
|
16140
|
+
location: `${location}.type`,
|
|
16141
|
+
message: "healthcheck.type must be either 'http' or 'command'"
|
|
16142
|
+
});
|
|
16143
|
+
return;
|
|
16400
16144
|
}
|
|
16401
|
-
|
|
16402
|
-
|
|
16403
|
-
|
|
16404
|
-
|
|
16405
|
-
|
|
16406
|
-
|
|
16407
|
-
|
|
16408
|
-
|
|
16145
|
+
const timeoutSeconds = healthcheck["timeout_seconds"] ?? healthcheck["timeoutSeconds"];
|
|
16146
|
+
if (timeoutSeconds !== void 0) {
|
|
16147
|
+
const numericTimeout = Number(timeoutSeconds);
|
|
16148
|
+
if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
|
|
16149
|
+
errors2.push({
|
|
16150
|
+
severity: "error",
|
|
16151
|
+
filePath: absolutePath2,
|
|
16152
|
+
location: `${location}.timeoutSeconds`,
|
|
16153
|
+
message: "healthcheck.timeoutSeconds must be a positive number when provided"
|
|
16154
|
+
});
|
|
16155
|
+
}
|
|
16409
16156
|
}
|
|
16410
|
-
|
|
16411
|
-
|
|
16412
|
-
|
|
16413
|
-
|
|
16414
|
-
|
|
16415
|
-
|
|
16416
|
-
|
|
16417
|
-
|
|
16418
|
-
|
|
16419
|
-
|
|
16420
|
-
|
|
16421
|
-
console.log(" 1. Edit .agentv/.env with your API credentials");
|
|
16422
|
-
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
16423
|
-
console.log(" 3. Create eval files using the schema and prompt templates");
|
|
16424
|
-
}
|
|
16425
|
-
|
|
16426
|
-
// src/commands/status.ts
|
|
16427
|
-
function registerStatusCommand(program) {
|
|
16428
|
-
program.command("status").description("Show the latest AgentV kernel status").action(() => {
|
|
16429
|
-
const kernel = createAgentKernel();
|
|
16430
|
-
console.log(`Kernel status: ${kernel.status}`);
|
|
16431
|
-
});
|
|
16432
|
-
return program;
|
|
16433
|
-
}
|
|
16434
|
-
|
|
16435
|
-
// src/commands/validate/format-output.ts
|
|
16436
|
-
var ANSI_RED = "\x1B[31m";
|
|
16437
|
-
var ANSI_YELLOW2 = "\x1B[33m";
|
|
16438
|
-
var ANSI_GREEN = "\x1B[32m";
|
|
16439
|
-
var ANSI_CYAN = "\x1B[36m";
|
|
16440
|
-
var ANSI_BOLD = "\x1B[1m";
|
|
16441
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
16442
|
-
function formatSummary(summary, useColors) {
|
|
16443
|
-
const lines = [];
|
|
16444
|
-
lines.push("");
|
|
16445
|
-
lines.push(formatHeader("Validation Summary", useColors));
|
|
16446
|
-
lines.push("");
|
|
16447
|
-
for (const result of summary.results) {
|
|
16448
|
-
lines.push(formatFileResult(result, useColors));
|
|
16449
|
-
}
|
|
16450
|
-
lines.push("");
|
|
16451
|
-
lines.push(formatStats(summary, useColors));
|
|
16452
|
-
lines.push("");
|
|
16453
|
-
return lines.join("\n");
|
|
16454
|
-
}
|
|
16455
|
-
function formatHeader(text, useColors) {
|
|
16456
|
-
if (useColors) {
|
|
16457
|
-
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
|
|
16458
|
-
}
|
|
16459
|
-
return text;
|
|
16460
|
-
}
|
|
16461
|
-
function formatFileResult(result, useColors) {
|
|
16462
|
-
const lines = [];
|
|
16463
|
-
const status = result.valid ? "\u2713" : "\u2717";
|
|
16464
|
-
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
|
|
16465
|
-
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
|
|
16466
|
-
const fileName = result.filePath;
|
|
16467
|
-
lines.push(`${statusText} ${fileName}`);
|
|
16468
|
-
if (result.errors.length > 0) {
|
|
16469
|
-
for (const error of result.errors) {
|
|
16470
|
-
lines.push(formatError(error, useColors));
|
|
16157
|
+
if (type === "http") {
|
|
16158
|
+
const url = healthcheck["url"];
|
|
16159
|
+
if (typeof url !== "string" || url.trim().length === 0) {
|
|
16160
|
+
errors2.push({
|
|
16161
|
+
severity: "error",
|
|
16162
|
+
filePath: absolutePath2,
|
|
16163
|
+
location: `${location}.url`,
|
|
16164
|
+
message: "healthcheck.url must be a non-empty string for http checks"
|
|
16165
|
+
});
|
|
16166
|
+
}
|
|
16167
|
+
return;
|
|
16471
16168
|
}
|
|
16472
|
-
|
|
16473
|
-
|
|
16474
|
-
|
|
16475
|
-
|
|
16476
|
-
|
|
16477
|
-
|
|
16478
|
-
|
|
16479
|
-
|
|
16480
|
-
return `${coloredPrefix}${location} ${error.message}`;
|
|
16481
|
-
}
|
|
16482
|
-
function formatStats(summary, useColors) {
|
|
16483
|
-
const lines = [];
|
|
16484
|
-
const totalText = `Total files: ${summary.totalFiles}`;
|
|
16485
|
-
const validText = `Valid: ${summary.validFiles}`;
|
|
16486
|
-
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
16487
|
-
if (useColors) {
|
|
16488
|
-
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
|
|
16489
|
-
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
|
|
16490
|
-
if (summary.invalidFiles > 0) {
|
|
16491
|
-
lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
|
|
16169
|
+
const commandTemplate = healthcheck["command_template"] ?? healthcheck["commandTemplate"];
|
|
16170
|
+
if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
|
|
16171
|
+
errors2.push({
|
|
16172
|
+
severity: "error",
|
|
16173
|
+
filePath: absolutePath2,
|
|
16174
|
+
location: `${location}.commandTemplate`,
|
|
16175
|
+
message: "healthcheck.commandTemplate must be a non-empty string for command checks"
|
|
16176
|
+
});
|
|
16492
16177
|
} else {
|
|
16493
|
-
|
|
16494
|
-
}
|
|
16495
|
-
} else {
|
|
16496
|
-
lines.push(totalText);
|
|
16497
|
-
lines.push(validText);
|
|
16498
|
-
lines.push(invalidText);
|
|
16499
|
-
}
|
|
16500
|
-
return lines.join("\n");
|
|
16501
|
-
}
|
|
16502
|
-
function isTTY() {
|
|
16503
|
-
return process.stdout.isTTY ?? false;
|
|
16504
|
-
}
|
|
16505
|
-
|
|
16506
|
-
// ../../packages/core/dist/evaluation/validation/index.js
|
|
16507
|
-
import { readFile as readFile5 } from "node:fs/promises";
|
|
16508
|
-
import { parse as parse5 } from "yaml";
|
|
16509
|
-
import { readFile as readFile23 } from "node:fs/promises";
|
|
16510
|
-
import path17 from "node:path";
|
|
16511
|
-
import { parse as parse23 } from "yaml";
|
|
16512
|
-
import { readFile as readFile33 } from "node:fs/promises";
|
|
16513
|
-
import path23 from "node:path";
|
|
16514
|
-
import { parse as parse32 } from "yaml";
|
|
16515
|
-
import { readFile as readFile42 } from "node:fs/promises";
|
|
16516
|
-
import { parse as parse42 } from "yaml";
|
|
16517
|
-
import { readFile as readFile52 } from "node:fs/promises";
|
|
16518
|
-
import path33 from "node:path";
|
|
16519
|
-
import { parse as parse52 } from "yaml";
|
|
16520
|
-
var SCHEMA_EVAL_V22 = "agentv-eval-v2";
|
|
16521
|
-
var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
|
|
16522
|
-
var SCHEMA_CONFIG_V22 = "agentv-config-v2";
|
|
16523
|
-
async function detectFileType(filePath) {
|
|
16524
|
-
try {
|
|
16525
|
-
const content = await readFile5(filePath, "utf8");
|
|
16526
|
-
const parsed = parse5(content);
|
|
16527
|
-
if (typeof parsed !== "object" || parsed === null) {
|
|
16528
|
-
return "unknown";
|
|
16178
|
+
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16529
16179
|
}
|
|
16530
|
-
const
|
|
16531
|
-
|
|
16532
|
-
|
|
16533
|
-
|
|
16180
|
+
const cwd = healthcheck["cwd"];
|
|
16181
|
+
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16182
|
+
errors2.push({
|
|
16183
|
+
severity: "error",
|
|
16184
|
+
filePath: absolutePath2,
|
|
16185
|
+
location: `${location}.cwd`,
|
|
16186
|
+
message: "healthcheck.cwd must be a string when provided"
|
|
16187
|
+
});
|
|
16534
16188
|
}
|
|
16535
|
-
|
|
16536
|
-
|
|
16537
|
-
|
|
16538
|
-
|
|
16539
|
-
|
|
16540
|
-
|
|
16541
|
-
|
|
16542
|
-
|
|
16543
|
-
|
|
16189
|
+
}
|
|
16190
|
+
function recordUnknownPlaceholders(template, absolutePath2, location, errors2) {
|
|
16191
|
+
const placeholders = extractPlaceholders(template);
|
|
16192
|
+
for (const placeholder of placeholders) {
|
|
16193
|
+
if (!CLI_PLACEHOLDERS2.has(placeholder)) {
|
|
16194
|
+
errors2.push({
|
|
16195
|
+
severity: "error",
|
|
16196
|
+
filePath: absolutePath2,
|
|
16197
|
+
location,
|
|
16198
|
+
message: `Unknown CLI placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS2).join(", ")}`
|
|
16199
|
+
});
|
|
16200
|
+
}
|
|
16544
16201
|
}
|
|
16545
|
-
} catch {
|
|
16546
|
-
return "unknown";
|
|
16547
16202
|
}
|
|
16548
|
-
|
|
16549
|
-
|
|
16550
|
-
|
|
16551
|
-
|
|
16552
|
-
|
|
16553
|
-
|
|
16554
|
-
|
|
16555
|
-
|
|
16556
|
-
|
|
16557
|
-
|
|
16558
|
-
const content = await readFile23(absolutePath, "utf8");
|
|
16559
|
-
parsed = parse23(content);
|
|
16560
|
-
} catch (error) {
|
|
16561
|
-
errors.push({
|
|
16562
|
-
severity: "error",
|
|
16563
|
-
filePath: absolutePath,
|
|
16564
|
-
message: `Failed to parse YAML: ${error.message}`
|
|
16565
|
-
});
|
|
16566
|
-
return {
|
|
16567
|
-
valid: false,
|
|
16568
|
-
filePath: absolutePath,
|
|
16569
|
-
fileType: "eval",
|
|
16570
|
-
errors
|
|
16571
|
-
};
|
|
16203
|
+
function extractPlaceholders(template) {
|
|
16204
|
+
const matches = template.matchAll(/\{([A-Z_]+)\}/g);
|
|
16205
|
+
const result = [];
|
|
16206
|
+
for (const match of matches) {
|
|
16207
|
+
const placeholder = match[1];
|
|
16208
|
+
if (placeholder) {
|
|
16209
|
+
result.push(placeholder);
|
|
16210
|
+
}
|
|
16211
|
+
}
|
|
16212
|
+
return result;
|
|
16572
16213
|
}
|
|
16573
|
-
if (!
|
|
16214
|
+
if (!isObject2(parsed)) {
|
|
16574
16215
|
errors.push({
|
|
16575
16216
|
severity: "error",
|
|
16576
16217
|
filePath: absolutePath,
|
|
@@ -16579,13 +16220,13 @@ async function validateEvalFile(filePath) {
|
|
|
16579
16220
|
return {
|
|
16580
16221
|
valid: false,
|
|
16581
16222
|
filePath: absolutePath,
|
|
16582
|
-
fileType: "
|
|
16223
|
+
fileType: "targets",
|
|
16583
16224
|
errors
|
|
16584
16225
|
};
|
|
16585
16226
|
}
|
|
16586
16227
|
const schema = parsed["$schema"];
|
|
16587
|
-
if (schema !==
|
|
16588
|
-
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${
|
|
16228
|
+
if (schema !== TARGETS_SCHEMA_V2) {
|
|
16229
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
|
|
16589
16230
|
errors.push({
|
|
16590
16231
|
severity: "error",
|
|
16591
16232
|
filePath: absolutePath,
|
|
@@ -16593,639 +16234,1124 @@ async function validateEvalFile(filePath) {
|
|
|
16593
16234
|
message
|
|
16594
16235
|
});
|
|
16595
16236
|
}
|
|
16596
|
-
const
|
|
16597
|
-
if (!Array.isArray(
|
|
16237
|
+
const targets = parsed["targets"];
|
|
16238
|
+
if (!Array.isArray(targets)) {
|
|
16598
16239
|
errors.push({
|
|
16599
16240
|
severity: "error",
|
|
16600
16241
|
filePath: absolutePath,
|
|
16601
|
-
location: "
|
|
16602
|
-
message: "Missing or invalid '
|
|
16242
|
+
location: "targets",
|
|
16243
|
+
message: "Missing or invalid 'targets' field (must be an array)"
|
|
16603
16244
|
});
|
|
16604
16245
|
return {
|
|
16605
16246
|
valid: errors.length === 0,
|
|
16606
16247
|
filePath: absolutePath,
|
|
16607
|
-
fileType: "
|
|
16248
|
+
fileType: "targets",
|
|
16608
16249
|
errors
|
|
16609
16250
|
};
|
|
16610
16251
|
}
|
|
16611
|
-
|
|
16612
|
-
|
|
16613
|
-
const
|
|
16614
|
-
|
|
16252
|
+
const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
|
|
16253
|
+
for (let i6 = 0; i6 < targets.length; i6++) {
|
|
16254
|
+
const target = targets[i6];
|
|
16255
|
+
const location = `targets[${i6}]`;
|
|
16256
|
+
if (!isObject2(target)) {
|
|
16615
16257
|
errors.push({
|
|
16616
16258
|
severity: "error",
|
|
16617
16259
|
filePath: absolutePath,
|
|
16618
16260
|
location,
|
|
16619
|
-
message: "
|
|
16261
|
+
message: "Target must be an object"
|
|
16620
16262
|
});
|
|
16621
16263
|
continue;
|
|
16622
16264
|
}
|
|
16623
|
-
const
|
|
16624
|
-
if (typeof
|
|
16265
|
+
const name = target["name"];
|
|
16266
|
+
if (typeof name !== "string" || name.trim().length === 0) {
|
|
16625
16267
|
errors.push({
|
|
16626
16268
|
severity: "error",
|
|
16627
16269
|
filePath: absolutePath,
|
|
16628
|
-
location: `${location}.
|
|
16629
|
-
message: "Missing or invalid '
|
|
16270
|
+
location: `${location}.name`,
|
|
16271
|
+
message: "Missing or invalid 'name' field (must be a non-empty string)"
|
|
16630
16272
|
});
|
|
16631
16273
|
}
|
|
16632
|
-
const
|
|
16633
|
-
|
|
16274
|
+
const provider = target["provider"];
|
|
16275
|
+
const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
|
|
16276
|
+
if (typeof provider !== "string" || provider.trim().length === 0) {
|
|
16634
16277
|
errors.push({
|
|
16635
16278
|
severity: "error",
|
|
16636
16279
|
filePath: absolutePath,
|
|
16637
|
-
location: `${location}.
|
|
16638
|
-
message: "Missing or invalid '
|
|
16280
|
+
location: `${location}.provider`,
|
|
16281
|
+
message: "Missing or invalid 'provider' field (must be a non-empty string)"
|
|
16639
16282
|
});
|
|
16640
|
-
}
|
|
16641
|
-
const inputMessages = evalCase["input_messages"];
|
|
16642
|
-
if (!Array.isArray(inputMessages)) {
|
|
16283
|
+
} else if (!knownProviders.includes(provider)) {
|
|
16643
16284
|
errors.push({
|
|
16644
|
-
severity: "
|
|
16285
|
+
severity: "warning",
|
|
16645
16286
|
filePath: absolutePath,
|
|
16646
|
-
location: `${location}.
|
|
16647
|
-
message:
|
|
16287
|
+
location: `${location}.provider`,
|
|
16288
|
+
message: `Unknown provider '${provider}'. Known providers: ${knownProviders.join(", ")}`
|
|
16648
16289
|
});
|
|
16649
|
-
} else {
|
|
16650
|
-
validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
|
|
16651
16290
|
}
|
|
16652
|
-
const
|
|
16653
|
-
if (!
|
|
16291
|
+
const settings = target["settings"];
|
|
16292
|
+
if (providerValue !== "cli" && settings !== void 0 && !isObject2(settings)) {
|
|
16654
16293
|
errors.push({
|
|
16655
16294
|
severity: "error",
|
|
16656
16295
|
filePath: absolutePath,
|
|
16657
|
-
location: `${location}.
|
|
16658
|
-
message: "
|
|
16296
|
+
location: `${location}.settings`,
|
|
16297
|
+
message: "Invalid 'settings' field (must be an object)"
|
|
16298
|
+
});
|
|
16299
|
+
}
|
|
16300
|
+
if (providerValue === "cli") {
|
|
16301
|
+
validateCliSettings(settings, absolutePath, `${location}.settings`, errors);
|
|
16302
|
+
}
|
|
16303
|
+
if (settings !== void 0 && isObject2(settings) && typeof provider === "string") {
|
|
16304
|
+
validateUnknownSettings(settings, provider, absolutePath, `${location}.settings`, errors);
|
|
16305
|
+
}
|
|
16306
|
+
const judgeTarget = target["judge_target"];
|
|
16307
|
+
if (judgeTarget !== void 0 && typeof judgeTarget !== "string") {
|
|
16308
|
+
errors.push({
|
|
16309
|
+
severity: "error",
|
|
16310
|
+
filePath: absolutePath,
|
|
16311
|
+
location: `${location}.judge_target`,
|
|
16312
|
+
message: "Invalid 'judge_target' field (must be a string)"
|
|
16659
16313
|
});
|
|
16660
|
-
} else {
|
|
16661
|
-
validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
|
|
16662
16314
|
}
|
|
16663
16315
|
}
|
|
16664
16316
|
return {
|
|
16665
|
-
valid: errors.length === 0,
|
|
16317
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
16666
16318
|
filePath: absolutePath,
|
|
16667
|
-
fileType: "
|
|
16319
|
+
fileType: "targets",
|
|
16668
16320
|
errors
|
|
16669
16321
|
};
|
|
16670
16322
|
}
|
|
16671
|
-
|
|
16672
|
-
|
|
16673
|
-
|
|
16674
|
-
|
|
16675
|
-
|
|
16323
|
+
var SCHEMA_CONFIG_V222 = "agentv-config-v2";
|
|
16324
|
+
async function validateConfigFile(filePath) {
|
|
16325
|
+
const errors = [];
|
|
16326
|
+
try {
|
|
16327
|
+
const content = await readFile42(filePath, "utf8");
|
|
16328
|
+
const parsed = parse42(content);
|
|
16329
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
16676
16330
|
errors.push({
|
|
16677
16331
|
severity: "error",
|
|
16678
16332
|
filePath,
|
|
16679
|
-
|
|
16680
|
-
message: "Message must be an object"
|
|
16333
|
+
message: "Config file must contain a valid YAML object"
|
|
16681
16334
|
});
|
|
16682
|
-
|
|
16335
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
16683
16336
|
}
|
|
16684
|
-
const
|
|
16685
|
-
const
|
|
16686
|
-
if (
|
|
16337
|
+
const config = parsed;
|
|
16338
|
+
const schema = config["$schema"];
|
|
16339
|
+
if (schema !== SCHEMA_CONFIG_V222) {
|
|
16340
|
+
const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
|
|
16687
16341
|
errors.push({
|
|
16688
16342
|
severity: "error",
|
|
16689
16343
|
filePath,
|
|
16690
|
-
location:
|
|
16691
|
-
message
|
|
16344
|
+
location: "$schema",
|
|
16345
|
+
message
|
|
16692
16346
|
});
|
|
16693
16347
|
}
|
|
16694
|
-
const
|
|
16695
|
-
if (
|
|
16696
|
-
|
|
16697
|
-
|
|
16698
|
-
|
|
16699
|
-
|
|
16700
|
-
|
|
16701
|
-
|
|
16702
|
-
|
|
16703
|
-
|
|
16704
|
-
|
|
16705
|
-
|
|
16706
|
-
|
|
16707
|
-
|
|
16708
|
-
|
|
16709
|
-
|
|
16710
|
-
|
|
16711
|
-
|
|
16712
|
-
|
|
16713
|
-
|
|
16714
|
-
|
|
16715
|
-
|
|
16716
|
-
|
|
16717
|
-
location: `${contentLocation}.value`,
|
|
16718
|
-
message: "Content with type 'text' must have a 'value' field"
|
|
16719
|
-
});
|
|
16720
|
-
}
|
|
16721
|
-
}
|
|
16722
|
-
} else {
|
|
16723
|
-
errors.push({
|
|
16724
|
-
severity: "error",
|
|
16725
|
-
filePath,
|
|
16726
|
-
location: contentLocation,
|
|
16727
|
-
message: "Content array items must be strings or objects"
|
|
16728
|
-
});
|
|
16729
|
-
}
|
|
16348
|
+
const guidelinePatterns = config["guideline_patterns"];
|
|
16349
|
+
if (guidelinePatterns !== void 0) {
|
|
16350
|
+
if (!Array.isArray(guidelinePatterns)) {
|
|
16351
|
+
errors.push({
|
|
16352
|
+
severity: "error",
|
|
16353
|
+
filePath,
|
|
16354
|
+
location: "guideline_patterns",
|
|
16355
|
+
message: "Field 'guideline_patterns' must be an array"
|
|
16356
|
+
});
|
|
16357
|
+
} else if (!guidelinePatterns.every((p) => typeof p === "string")) {
|
|
16358
|
+
errors.push({
|
|
16359
|
+
severity: "error",
|
|
16360
|
+
filePath,
|
|
16361
|
+
location: "guideline_patterns",
|
|
16362
|
+
message: "All entries in 'guideline_patterns' must be strings"
|
|
16363
|
+
});
|
|
16364
|
+
} else if (guidelinePatterns.length === 0) {
|
|
16365
|
+
errors.push({
|
|
16366
|
+
severity: "warning",
|
|
16367
|
+
filePath,
|
|
16368
|
+
location: "guideline_patterns",
|
|
16369
|
+
message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
|
|
16370
|
+
});
|
|
16730
16371
|
}
|
|
16731
|
-
}
|
|
16372
|
+
}
|
|
16373
|
+
const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
|
|
16374
|
+
const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
|
|
16375
|
+
if (unexpectedFields.length > 0) {
|
|
16732
16376
|
errors.push({
|
|
16733
|
-
severity: "
|
|
16377
|
+
severity: "warning",
|
|
16734
16378
|
filePath,
|
|
16735
|
-
|
|
16736
|
-
message: "Missing or invalid 'content' field (must be a string or array)"
|
|
16379
|
+
message: `Unexpected fields: ${unexpectedFields.join(", ")}`
|
|
16737
16380
|
});
|
|
16738
16381
|
}
|
|
16382
|
+
return {
|
|
16383
|
+
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
16384
|
+
filePath,
|
|
16385
|
+
fileType: "config",
|
|
16386
|
+
errors
|
|
16387
|
+
};
|
|
16388
|
+
} catch (error) {
|
|
16389
|
+
errors.push({
|
|
16390
|
+
severity: "error",
|
|
16391
|
+
filePath,
|
|
16392
|
+
message: `Failed to parse config file: ${error.message}`
|
|
16393
|
+
});
|
|
16394
|
+
return { valid: false, filePath, fileType: "config", errors };
|
|
16739
16395
|
}
|
|
16740
16396
|
}
|
|
16741
|
-
function
|
|
16397
|
+
function isObject3(value) {
|
|
16742
16398
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
16743
16399
|
}
|
|
16744
|
-
|
|
16745
|
-
async function validateTargetsFile(filePath) {
|
|
16400
|
+
async function validateFileReferences(evalFilePath) {
|
|
16746
16401
|
const errors = [];
|
|
16747
|
-
const absolutePath =
|
|
16748
|
-
|
|
16749
|
-
|
|
16750
|
-
const content = await readFile33(absolutePath, "utf8");
|
|
16751
|
-
parsed = parse32(content);
|
|
16752
|
-
} catch (error) {
|
|
16402
|
+
const absolutePath = path33.resolve(evalFilePath);
|
|
16403
|
+
const gitRoot = await findGitRoot(absolutePath);
|
|
16404
|
+
if (!gitRoot) {
|
|
16753
16405
|
errors.push({
|
|
16754
16406
|
severity: "error",
|
|
16755
16407
|
filePath: absolutePath,
|
|
16756
|
-
message:
|
|
16408
|
+
message: "Cannot validate file references: git repository root not found"
|
|
16757
16409
|
});
|
|
16758
|
-
return
|
|
16759
|
-
valid: false,
|
|
16760
|
-
filePath: absolutePath,
|
|
16761
|
-
fileType: "targets",
|
|
16762
|
-
errors
|
|
16763
|
-
};
|
|
16410
|
+
return errors;
|
|
16764
16411
|
}
|
|
16765
|
-
|
|
16766
|
-
|
|
16767
|
-
|
|
16768
|
-
|
|
16769
|
-
|
|
16770
|
-
|
|
16771
|
-
|
|
16772
|
-
|
|
16773
|
-
|
|
16412
|
+
const searchRoots = buildSearchRoots(absolutePath, gitRoot);
|
|
16413
|
+
let parsed;
|
|
16414
|
+
try {
|
|
16415
|
+
const content = await readFile5(absolutePath, "utf8");
|
|
16416
|
+
parsed = parse5(content);
|
|
16417
|
+
} catch {
|
|
16418
|
+
return errors;
|
|
16419
|
+
}
|
|
16420
|
+
if (!isObject3(parsed)) {
|
|
16421
|
+
return errors;
|
|
16422
|
+
}
|
|
16423
|
+
const evalcases = parsed["evalcases"];
|
|
16424
|
+
if (!Array.isArray(evalcases)) {
|
|
16425
|
+
return errors;
|
|
16426
|
+
}
|
|
16427
|
+
for (let i6 = 0; i6 < evalcases.length; i6++) {
|
|
16428
|
+
const evalCase = evalcases[i6];
|
|
16429
|
+
if (!isObject3(evalCase)) {
|
|
16430
|
+
continue;
|
|
16774
16431
|
}
|
|
16775
|
-
const
|
|
16776
|
-
if (
|
|
16777
|
-
|
|
16778
|
-
severity: "error",
|
|
16779
|
-
filePath: absolutePath2,
|
|
16780
|
-
location: `${location}.commandTemplate`,
|
|
16781
|
-
message: "CLI provider requires 'commandTemplate' as a non-empty string"
|
|
16782
|
-
});
|
|
16783
|
-
} else {
|
|
16784
|
-
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16432
|
+
const inputMessages = evalCase["input_messages"];
|
|
16433
|
+
if (Array.isArray(inputMessages)) {
|
|
16434
|
+
await validateMessagesFileRefs(inputMessages, `evalcases[${i6}].input_messages`, searchRoots, absolutePath, errors);
|
|
16785
16435
|
}
|
|
16786
|
-
const
|
|
16787
|
-
if (
|
|
16788
|
-
|
|
16789
|
-
severity: "error",
|
|
16790
|
-
filePath: absolutePath2,
|
|
16791
|
-
location: `${location}.attachmentsFormat`,
|
|
16792
|
-
message: "'attachmentsFormat' must be a string when provided"
|
|
16793
|
-
});
|
|
16436
|
+
const expectedMessages = evalCase["expected_messages"];
|
|
16437
|
+
if (Array.isArray(expectedMessages)) {
|
|
16438
|
+
await validateMessagesFileRefs(expectedMessages, `evalcases[${i6}].expected_messages`, searchRoots, absolutePath, errors);
|
|
16794
16439
|
}
|
|
16795
|
-
|
|
16796
|
-
|
|
16797
|
-
|
|
16798
|
-
|
|
16799
|
-
|
|
16800
|
-
|
|
16801
|
-
|
|
16802
|
-
|
|
16440
|
+
}
|
|
16441
|
+
return errors;
|
|
16442
|
+
}
|
|
16443
|
+
async function validateMessagesFileRefs(messages, location, searchRoots, filePath, errors) {
|
|
16444
|
+
for (let i6 = 0; i6 < messages.length; i6++) {
|
|
16445
|
+
const message = messages[i6];
|
|
16446
|
+
if (!isObject3(message)) {
|
|
16447
|
+
continue;
|
|
16803
16448
|
}
|
|
16804
|
-
const
|
|
16805
|
-
if (
|
|
16806
|
-
|
|
16807
|
-
severity: "error",
|
|
16808
|
-
filePath: absolutePath2,
|
|
16809
|
-
location: `${location}.cwd`,
|
|
16810
|
-
message: "'cwd' must be a string when provided"
|
|
16811
|
-
});
|
|
16449
|
+
const content = message["content"];
|
|
16450
|
+
if (typeof content === "string") {
|
|
16451
|
+
continue;
|
|
16812
16452
|
}
|
|
16813
|
-
|
|
16814
|
-
|
|
16815
|
-
|
|
16816
|
-
|
|
16817
|
-
|
|
16453
|
+
if (!Array.isArray(content)) {
|
|
16454
|
+
continue;
|
|
16455
|
+
}
|
|
16456
|
+
for (let j2 = 0; j2 < content.length; j2++) {
|
|
16457
|
+
const contentItem = content[j2];
|
|
16458
|
+
if (!isObject3(contentItem)) {
|
|
16459
|
+
continue;
|
|
16460
|
+
}
|
|
16461
|
+
const type = contentItem["type"];
|
|
16462
|
+
if (type !== "file") {
|
|
16463
|
+
continue;
|
|
16464
|
+
}
|
|
16465
|
+
const value = contentItem["value"];
|
|
16466
|
+
if (typeof value !== "string") {
|
|
16467
|
+
errors.push({
|
|
16818
16468
|
severity: "error",
|
|
16819
|
-
filePath
|
|
16820
|
-
location: `${location}.
|
|
16821
|
-
message: "
|
|
16469
|
+
filePath,
|
|
16470
|
+
location: `${location}[${i6}].content[${j2}].value`,
|
|
16471
|
+
message: "File reference must have a 'value' field with the file path"
|
|
16822
16472
|
});
|
|
16473
|
+
continue;
|
|
16823
16474
|
}
|
|
16824
|
-
|
|
16825
|
-
|
|
16826
|
-
|
|
16827
|
-
if (!isObject2(envOverrides)) {
|
|
16828
|
-
errors2.push({
|
|
16475
|
+
const { resolvedPath } = await resolveFileReference(value, searchRoots);
|
|
16476
|
+
if (!resolvedPath) {
|
|
16477
|
+
errors.push({
|
|
16829
16478
|
severity: "error",
|
|
16830
|
-
filePath
|
|
16831
|
-
location: `${location}.
|
|
16832
|
-
message:
|
|
16479
|
+
filePath,
|
|
16480
|
+
location: `${location}[${i6}].content[${j2}]`,
|
|
16481
|
+
message: `Referenced file not found: ${value}`
|
|
16833
16482
|
});
|
|
16834
16483
|
} else {
|
|
16835
|
-
|
|
16836
|
-
|
|
16837
|
-
|
|
16838
|
-
|
|
16839
|
-
|
|
16840
|
-
|
|
16841
|
-
|
|
16484
|
+
try {
|
|
16485
|
+
const fileContent = await readFile5(resolvedPath, "utf8");
|
|
16486
|
+
if (fileContent.trim().length === 0) {
|
|
16487
|
+
errors.push({
|
|
16488
|
+
severity: "warning",
|
|
16489
|
+
filePath,
|
|
16490
|
+
location: `${location}[${i6}].content[${j2}]`,
|
|
16491
|
+
message: `Referenced file is empty: ${value}`
|
|
16842
16492
|
});
|
|
16843
16493
|
}
|
|
16494
|
+
} catch (error) {
|
|
16495
|
+
errors.push({
|
|
16496
|
+
severity: "error",
|
|
16497
|
+
filePath,
|
|
16498
|
+
location: `${location}[${i6}].content[${j2}]`,
|
|
16499
|
+
message: `Cannot read referenced file: ${value} (${error.message})`
|
|
16500
|
+
});
|
|
16844
16501
|
}
|
|
16845
16502
|
}
|
|
16846
16503
|
}
|
|
16847
|
-
const healthcheck = settings["healthcheck"];
|
|
16848
|
-
if (healthcheck !== void 0) {
|
|
16849
|
-
validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
|
|
16850
|
-
}
|
|
16851
16504
|
}
|
|
16852
|
-
|
|
16853
|
-
|
|
16854
|
-
|
|
16855
|
-
|
|
16856
|
-
|
|
16857
|
-
|
|
16858
|
-
|
|
16859
|
-
|
|
16860
|
-
|
|
16861
|
-
|
|
16862
|
-
|
|
16863
|
-
|
|
16864
|
-
|
|
16865
|
-
|
|
16866
|
-
|
|
16867
|
-
|
|
16868
|
-
|
|
16869
|
-
|
|
16870
|
-
|
|
16871
|
-
|
|
16872
|
-
|
|
16873
|
-
|
|
16874
|
-
|
|
16875
|
-
|
|
16876
|
-
|
|
16877
|
-
|
|
16878
|
-
|
|
16879
|
-
|
|
16880
|
-
|
|
16881
|
-
|
|
16882
|
-
|
|
16883
|
-
|
|
16884
|
-
|
|
16885
|
-
|
|
16886
|
-
|
|
16887
|
-
errors2.push({
|
|
16888
|
-
severity: "error",
|
|
16889
|
-
filePath: absolutePath2,
|
|
16890
|
-
location: `${location}.url`,
|
|
16891
|
-
message: "healthcheck.url must be a non-empty string for http checks"
|
|
16892
|
-
});
|
|
16505
|
+
}
|
|
16506
|
+
|
|
16507
|
+
// src/commands/eval/targets.ts
|
|
16508
|
+
import { constants as constants5 } from "node:fs";
|
|
16509
|
+
import { access as access5, readFile as readFile6 } from "node:fs/promises";
|
|
16510
|
+
import path13 from "node:path";
|
|
16511
|
+
import { parse as parse6 } from "yaml";
|
|
16512
|
+
var TARGET_FILE_CANDIDATES = [
|
|
16513
|
+
"targets.yaml",
|
|
16514
|
+
"targets.yml",
|
|
16515
|
+
path13.join(".agentv", "targets.yaml"),
|
|
16516
|
+
path13.join(".agentv", "targets.yml")
|
|
16517
|
+
];
|
|
16518
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
16519
|
+
var ANSI_RED = "\x1B[31m";
|
|
16520
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
16521
|
+
function isTTY() {
|
|
16522
|
+
return process.stdout.isTTY ?? false;
|
|
16523
|
+
}
|
|
16524
|
+
async function fileExists5(filePath) {
|
|
16525
|
+
try {
|
|
16526
|
+
await access5(filePath, constants5.F_OK);
|
|
16527
|
+
return true;
|
|
16528
|
+
} catch {
|
|
16529
|
+
return false;
|
|
16530
|
+
}
|
|
16531
|
+
}
|
|
16532
|
+
async function readTestSuiteTarget(testFilePath) {
|
|
16533
|
+
try {
|
|
16534
|
+
const raw = await readFile6(path13.resolve(testFilePath), "utf8");
|
|
16535
|
+
const parsed = parse6(raw);
|
|
16536
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
16537
|
+
const targetValue = parsed.target;
|
|
16538
|
+
if (typeof targetValue === "string" && targetValue.trim().length > 0) {
|
|
16539
|
+
return targetValue.trim();
|
|
16893
16540
|
}
|
|
16894
|
-
return;
|
|
16895
|
-
}
|
|
16896
|
-
const commandTemplate = healthcheck["command_template"] ?? healthcheck["commandTemplate"];
|
|
16897
|
-
if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
|
|
16898
|
-
errors2.push({
|
|
16899
|
-
severity: "error",
|
|
16900
|
-
filePath: absolutePath2,
|
|
16901
|
-
location: `${location}.commandTemplate`,
|
|
16902
|
-
message: "healthcheck.commandTemplate must be a non-empty string for command checks"
|
|
16903
|
-
});
|
|
16904
|
-
} else {
|
|
16905
|
-
recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
|
|
16906
|
-
}
|
|
16907
|
-
const cwd = healthcheck["cwd"];
|
|
16908
|
-
if (cwd !== void 0 && typeof cwd !== "string") {
|
|
16909
|
-
errors2.push({
|
|
16910
|
-
severity: "error",
|
|
16911
|
-
filePath: absolutePath2,
|
|
16912
|
-
location: `${location}.cwd`,
|
|
16913
|
-
message: "healthcheck.cwd must be a string when provided"
|
|
16914
|
-
});
|
|
16915
16541
|
}
|
|
16542
|
+
} catch {
|
|
16916
16543
|
}
|
|
16917
|
-
|
|
16918
|
-
|
|
16919
|
-
|
|
16920
|
-
|
|
16921
|
-
|
|
16922
|
-
|
|
16923
|
-
|
|
16924
|
-
|
|
16925
|
-
|
|
16926
|
-
|
|
16544
|
+
return void 0;
|
|
16545
|
+
}
|
|
16546
|
+
async function discoverTargetsFile(options) {
|
|
16547
|
+
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
16548
|
+
if (explicitPath) {
|
|
16549
|
+
const resolvedExplicit = path13.resolve(explicitPath);
|
|
16550
|
+
if (await fileExists5(resolvedExplicit)) {
|
|
16551
|
+
return resolvedExplicit;
|
|
16552
|
+
}
|
|
16553
|
+
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16554
|
+
const nested = path13.join(resolvedExplicit, candidate);
|
|
16555
|
+
if (await fileExists5(nested)) {
|
|
16556
|
+
return nested;
|
|
16927
16557
|
}
|
|
16928
16558
|
}
|
|
16559
|
+
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
16929
16560
|
}
|
|
16930
|
-
|
|
16931
|
-
|
|
16932
|
-
|
|
16933
|
-
|
|
16934
|
-
|
|
16935
|
-
|
|
16936
|
-
|
|
16561
|
+
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
16562
|
+
const resolvedCwd = path13.resolve(cwd);
|
|
16563
|
+
if (!directories.includes(resolvedCwd)) {
|
|
16564
|
+
directories.push(resolvedCwd);
|
|
16565
|
+
}
|
|
16566
|
+
for (const directory of directories) {
|
|
16567
|
+
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16568
|
+
const fullPath = path13.join(directory, candidate);
|
|
16569
|
+
if (await fileExists5(fullPath)) {
|
|
16570
|
+
return fullPath;
|
|
16937
16571
|
}
|
|
16938
16572
|
}
|
|
16939
|
-
return result;
|
|
16940
16573
|
}
|
|
16941
|
-
|
|
16942
|
-
|
|
16943
|
-
|
|
16944
|
-
|
|
16945
|
-
|
|
16946
|
-
}
|
|
16947
|
-
return {
|
|
16948
|
-
valid: false,
|
|
16949
|
-
filePath: absolutePath,
|
|
16950
|
-
fileType: "targets",
|
|
16951
|
-
errors
|
|
16952
|
-
};
|
|
16574
|
+
throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
|
|
16575
|
+
}
|
|
16576
|
+
function pickTargetName(options) {
|
|
16577
|
+
const cliName = options.cliTargetName?.trim();
|
|
16578
|
+
if (cliName && cliName !== "default") {
|
|
16579
|
+
return { name: cliName, source: "cli" };
|
|
16953
16580
|
}
|
|
16954
|
-
const
|
|
16955
|
-
if (
|
|
16956
|
-
|
|
16957
|
-
errors.push({
|
|
16958
|
-
severity: "error",
|
|
16959
|
-
filePath: absolutePath,
|
|
16960
|
-
location: "$schema",
|
|
16961
|
-
message
|
|
16962
|
-
});
|
|
16581
|
+
const fileName = options.fileTargetName?.trim();
|
|
16582
|
+
if (fileName && fileName.length > 0) {
|
|
16583
|
+
return { name: fileName, source: "test-file" };
|
|
16963
16584
|
}
|
|
16964
|
-
|
|
16965
|
-
|
|
16966
|
-
|
|
16967
|
-
|
|
16968
|
-
|
|
16969
|
-
|
|
16970
|
-
|
|
16971
|
-
|
|
16585
|
+
return { name: "default", source: "default" };
|
|
16586
|
+
}
|
|
16587
|
+
async function selectTarget(options) {
|
|
16588
|
+
const { testFilePath, repoRoot, cwd, explicitTargetsPath, cliTargetName, dryRun, dryRunDelay, dryRunDelayMin, dryRunDelayMax, env } = options;
|
|
16589
|
+
const targetsFilePath = await discoverTargetsFile({
|
|
16590
|
+
explicitPath: explicitTargetsPath,
|
|
16591
|
+
testFilePath,
|
|
16592
|
+
repoRoot,
|
|
16593
|
+
cwd
|
|
16594
|
+
});
|
|
16595
|
+
const validationResult = await validateTargetsFile(targetsFilePath);
|
|
16596
|
+
const warnings = validationResult.errors.filter((e) => e.severity === "warning");
|
|
16597
|
+
const useColors = isTTY();
|
|
16598
|
+
if (warnings.length > 0) {
|
|
16599
|
+
console.warn(`
|
|
16600
|
+
Warnings in ${targetsFilePath}:`);
|
|
16601
|
+
for (const warning of warnings) {
|
|
16602
|
+
const location = warning.location ? ` [${warning.location}]` : "";
|
|
16603
|
+
const prefix = useColors ? `${ANSI_YELLOW2} \u26A0${ANSI_RESET2}` : " \u26A0";
|
|
16604
|
+
const message = useColors ? `${ANSI_YELLOW2}${warning.message}${ANSI_RESET2}` : warning.message;
|
|
16605
|
+
console.warn(`${prefix}${location} ${message}`);
|
|
16606
|
+
}
|
|
16607
|
+
console.warn("");
|
|
16608
|
+
}
|
|
16609
|
+
const errors = validationResult.errors.filter((e) => e.severity === "error");
|
|
16610
|
+
if (errors.length > 0) {
|
|
16611
|
+
console.error(`
|
|
16612
|
+
Errors in ${targetsFilePath}:`);
|
|
16613
|
+
for (const error of errors) {
|
|
16614
|
+
const location = error.location ? ` [${error.location}]` : "";
|
|
16615
|
+
const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET2}` : " \u2717";
|
|
16616
|
+
const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET2}` : error.message;
|
|
16617
|
+
console.error(`${prefix}${location} ${message}`);
|
|
16618
|
+
}
|
|
16619
|
+
throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
|
|
16620
|
+
}
|
|
16621
|
+
const definitions = await readTargetDefinitions(targetsFilePath);
|
|
16622
|
+
const fileTargetName = await readTestSuiteTarget(testFilePath);
|
|
16623
|
+
const targetChoice = pickTargetName({ cliTargetName, fileTargetName });
|
|
16624
|
+
const targetDefinition = definitions.find((definition) => definition.name === targetChoice.name);
|
|
16625
|
+
if (!targetDefinition) {
|
|
16626
|
+
const available = listTargetNames(definitions).join(", ");
|
|
16627
|
+
throw new Error(
|
|
16628
|
+
`Target '${targetChoice.name}' not found in ${targetsFilePath}. Available targets: ${available}`
|
|
16629
|
+
);
|
|
16630
|
+
}
|
|
16631
|
+
if (dryRun) {
|
|
16632
|
+
const mockTarget = {
|
|
16633
|
+
kind: "mock",
|
|
16634
|
+
name: `${targetDefinition.name}-dry-run`,
|
|
16635
|
+
judgeTarget: void 0,
|
|
16636
|
+
config: {
|
|
16637
|
+
response: '{"answer":"Mock dry-run response"}',
|
|
16638
|
+
delayMs: dryRunDelay,
|
|
16639
|
+
delayMinMs: dryRunDelayMin,
|
|
16640
|
+
delayMaxMs: dryRunDelayMax
|
|
16641
|
+
}
|
|
16642
|
+
};
|
|
16972
16643
|
return {
|
|
16973
|
-
|
|
16974
|
-
|
|
16975
|
-
|
|
16976
|
-
|
|
16644
|
+
definitions,
|
|
16645
|
+
resolvedTarget: mockTarget,
|
|
16646
|
+
targetName: targetChoice.name,
|
|
16647
|
+
targetSource: targetChoice.source,
|
|
16648
|
+
targetsFilePath
|
|
16977
16649
|
};
|
|
16978
16650
|
}
|
|
16979
|
-
|
|
16980
|
-
|
|
16981
|
-
|
|
16982
|
-
|
|
16983
|
-
|
|
16984
|
-
|
|
16985
|
-
|
|
16986
|
-
|
|
16987
|
-
|
|
16988
|
-
|
|
16989
|
-
|
|
16990
|
-
|
|
16651
|
+
try {
|
|
16652
|
+
const resolvedTarget = resolveTargetDefinition(targetDefinition, env);
|
|
16653
|
+
return {
|
|
16654
|
+
definitions,
|
|
16655
|
+
resolvedTarget,
|
|
16656
|
+
targetName: targetChoice.name,
|
|
16657
|
+
targetSource: targetChoice.source,
|
|
16658
|
+
targetsFilePath
|
|
16659
|
+
};
|
|
16660
|
+
} catch (error) {
|
|
16661
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
16662
|
+
throw new Error(`Failed to resolve target '${targetChoice.name}': ${message}`);
|
|
16663
|
+
}
|
|
16664
|
+
}
|
|
16665
|
+
|
|
16666
|
+
// src/commands/eval/run-eval.ts
|
|
16667
|
+
var DEFAULT_WORKERS = 3;
|
|
16668
|
+
function normalizeBoolean(value) {
|
|
16669
|
+
return value === true;
|
|
16670
|
+
}
|
|
16671
|
+
function normalizeString(value) {
|
|
16672
|
+
if (typeof value !== "string") {
|
|
16673
|
+
return void 0;
|
|
16674
|
+
}
|
|
16675
|
+
const trimmed = value.trim();
|
|
16676
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
16677
|
+
}
|
|
16678
|
+
function normalizeNumber(value, fallback) {
|
|
16679
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
16680
|
+
return value;
|
|
16681
|
+
}
|
|
16682
|
+
if (typeof value === "string") {
|
|
16683
|
+
const parsed = Number.parseInt(value, 10);
|
|
16684
|
+
if (!Number.isNaN(parsed)) {
|
|
16685
|
+
return parsed;
|
|
16991
16686
|
}
|
|
16992
|
-
|
|
16993
|
-
|
|
16994
|
-
|
|
16995
|
-
|
|
16996
|
-
|
|
16997
|
-
|
|
16998
|
-
|
|
16999
|
-
|
|
16687
|
+
}
|
|
16688
|
+
return fallback;
|
|
16689
|
+
}
|
|
16690
|
+
function normalizeOptions(rawOptions) {
|
|
16691
|
+
const formatStr = normalizeString(rawOptions.outputFormat) ?? "jsonl";
|
|
16692
|
+
const format = formatStr === "yaml" ? "yaml" : "jsonl";
|
|
16693
|
+
const workers = normalizeNumber(rawOptions.workers, 0);
|
|
16694
|
+
return {
|
|
16695
|
+
target: normalizeString(rawOptions.target),
|
|
16696
|
+
targetsPath: normalizeString(rawOptions.targets),
|
|
16697
|
+
evalId: normalizeString(rawOptions.evalId),
|
|
16698
|
+
workers: workers > 0 ? workers : void 0,
|
|
16699
|
+
outPath: normalizeString(rawOptions.out),
|
|
16700
|
+
format,
|
|
16701
|
+
dryRun: normalizeBoolean(rawOptions.dryRun),
|
|
16702
|
+
dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
|
|
16703
|
+
dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
|
|
16704
|
+
dryRunDelayMax: normalizeNumber(rawOptions.dryRunDelayMax, 0),
|
|
16705
|
+
agentTimeoutSeconds: normalizeNumber(rawOptions.agentTimeout, 120),
|
|
16706
|
+
maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
|
|
16707
|
+
cache: normalizeBoolean(rawOptions.cache),
|
|
16708
|
+
verbose: normalizeBoolean(rawOptions.verbose),
|
|
16709
|
+
dumpPrompts: rawOptions.dumpPrompts
|
|
16710
|
+
};
|
|
16711
|
+
}
|
|
16712
|
+
async function ensureFileExists(filePath, description) {
|
|
16713
|
+
try {
|
|
16714
|
+
await access6(filePath, constants6.F_OK);
|
|
16715
|
+
} catch {
|
|
16716
|
+
throw new Error(`${description} not found: ${filePath}`);
|
|
16717
|
+
}
|
|
16718
|
+
}
|
|
16719
|
+
async function findRepoRoot(start) {
|
|
16720
|
+
const fallback = path14.resolve(start);
|
|
16721
|
+
let current = fallback;
|
|
16722
|
+
while (current !== void 0) {
|
|
16723
|
+
const candidate = path14.join(current, ".git");
|
|
16724
|
+
try {
|
|
16725
|
+
await access6(candidate, constants6.F_OK);
|
|
16726
|
+
return current;
|
|
16727
|
+
} catch {
|
|
16728
|
+
const parent = path14.dirname(current);
|
|
16729
|
+
if (parent === current) {
|
|
16730
|
+
break;
|
|
16731
|
+
}
|
|
16732
|
+
current = parent;
|
|
17000
16733
|
}
|
|
17001
|
-
|
|
17002
|
-
|
|
17003
|
-
|
|
17004
|
-
|
|
17005
|
-
|
|
17006
|
-
|
|
17007
|
-
|
|
17008
|
-
|
|
17009
|
-
|
|
17010
|
-
|
|
17011
|
-
|
|
17012
|
-
|
|
17013
|
-
|
|
17014
|
-
|
|
17015
|
-
|
|
16734
|
+
}
|
|
16735
|
+
return fallback;
|
|
16736
|
+
}
|
|
16737
|
+
function buildDefaultOutputPath(cwd, format) {
|
|
16738
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
16739
|
+
const baseName = "eval";
|
|
16740
|
+
const extension = getDefaultExtension(format);
|
|
16741
|
+
return path14.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
16742
|
+
}
|
|
16743
|
+
function resolvePromptDirectory(option, cwd) {
|
|
16744
|
+
if (option === void 0) {
|
|
16745
|
+
return void 0;
|
|
16746
|
+
}
|
|
16747
|
+
if (typeof option === "string" && option.trim().length > 0) {
|
|
16748
|
+
return path14.resolve(cwd, option);
|
|
16749
|
+
}
|
|
16750
|
+
return path14.join(cwd, ".agentv", "prompts");
|
|
16751
|
+
}
|
|
16752
|
+
function createEvaluationCache() {
|
|
16753
|
+
const store = /* @__PURE__ */ new Map();
|
|
16754
|
+
return {
|
|
16755
|
+
async get(key2) {
|
|
16756
|
+
return store.get(key2);
|
|
16757
|
+
},
|
|
16758
|
+
async set(key2, value) {
|
|
16759
|
+
store.set(key2, value);
|
|
16760
|
+
}
|
|
16761
|
+
};
|
|
16762
|
+
}
|
|
16763
|
+
function createProgressReporter(maxWorkers) {
|
|
16764
|
+
const display = new ProgressDisplay(maxWorkers);
|
|
16765
|
+
return {
|
|
16766
|
+
isInteractive: display.isInteractiveMode(),
|
|
16767
|
+
start: () => display.start(),
|
|
16768
|
+
setTotal: (total) => display.setTotalTests(total),
|
|
16769
|
+
update: (workerId, progress) => display.updateWorker({ ...progress, workerId }),
|
|
16770
|
+
finish: () => display.finish(),
|
|
16771
|
+
addLogPaths: (paths) => display.addLogPaths(paths)
|
|
16772
|
+
};
|
|
16773
|
+
}
|
|
16774
|
+
function makeEvalKey(testFilePath, evalId) {
|
|
16775
|
+
return `${path14.resolve(testFilePath)}::${evalId}`;
|
|
16776
|
+
}
|
|
16777
|
+
function createDisplayIdTracker() {
|
|
16778
|
+
const map = /* @__PURE__ */ new Map();
|
|
16779
|
+
let nextId = 1;
|
|
16780
|
+
return {
|
|
16781
|
+
getOrAssign(evalKey) {
|
|
16782
|
+
const existing = map.get(evalKey);
|
|
16783
|
+
if (existing !== void 0) {
|
|
16784
|
+
return existing;
|
|
16785
|
+
}
|
|
16786
|
+
const assigned = nextId++;
|
|
16787
|
+
map.set(evalKey, assigned);
|
|
16788
|
+
return assigned;
|
|
16789
|
+
}
|
|
16790
|
+
};
|
|
16791
|
+
}
|
|
16792
|
+
async function prepareFileMetadata(params) {
|
|
16793
|
+
const { testFilePath, repoRoot, cwd, options } = params;
|
|
16794
|
+
await ensureFileExists(testFilePath, "Test file");
|
|
16795
|
+
await loadEnvFromHierarchy({
|
|
16796
|
+
testFilePath,
|
|
16797
|
+
repoRoot,
|
|
16798
|
+
verbose: options.verbose
|
|
16799
|
+
});
|
|
16800
|
+
const selection = await selectTarget({
|
|
16801
|
+
testFilePath,
|
|
16802
|
+
repoRoot,
|
|
16803
|
+
cwd,
|
|
16804
|
+
explicitTargetsPath: options.targetsPath,
|
|
16805
|
+
cliTargetName: options.target,
|
|
16806
|
+
dryRun: options.dryRun,
|
|
16807
|
+
dryRunDelay: options.dryRunDelay,
|
|
16808
|
+
dryRunDelayMin: options.dryRunDelayMin,
|
|
16809
|
+
dryRunDelayMax: options.dryRunDelayMax,
|
|
16810
|
+
env: process.env
|
|
16811
|
+
});
|
|
16812
|
+
const providerLabel = options.dryRun ? `${selection.resolvedTarget.kind} (dry-run)` : selection.resolvedTarget.kind;
|
|
16813
|
+
const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
|
|
16814
|
+
const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose, evalId: options.evalId });
|
|
16815
|
+
const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
|
|
16816
|
+
return { evalIds: filteredIds, selection, inlineTargetLabel };
|
|
16817
|
+
}
|
|
16818
|
+
async function runWithLimit(items, limit, task) {
|
|
16819
|
+
const safeLimit = Math.max(1, limit);
|
|
16820
|
+
let index = 0;
|
|
16821
|
+
const workers = Array.from({ length: safeLimit }, async () => {
|
|
16822
|
+
while (index < items.length) {
|
|
16823
|
+
const current = items[index];
|
|
16824
|
+
index += 1;
|
|
16825
|
+
await task(current);
|
|
16826
|
+
}
|
|
16827
|
+
});
|
|
16828
|
+
await Promise.all(workers);
|
|
16829
|
+
}
|
|
16830
|
+
async function runSingleEvalFile(params) {
|
|
16831
|
+
const {
|
|
16832
|
+
testFilePath,
|
|
16833
|
+
cwd,
|
|
16834
|
+
repoRoot,
|
|
16835
|
+
options,
|
|
16836
|
+
outputWriter,
|
|
16837
|
+
cache,
|
|
16838
|
+
evaluationRunner,
|
|
16839
|
+
workersOverride,
|
|
16840
|
+
progressReporter,
|
|
16841
|
+
seenEvalCases,
|
|
16842
|
+
displayIdTracker,
|
|
16843
|
+
selection,
|
|
16844
|
+
inlineTargetLabel
|
|
16845
|
+
} = params;
|
|
16846
|
+
await ensureFileExists(testFilePath, "Test file");
|
|
16847
|
+
const resolvedTargetSelection = selection;
|
|
16848
|
+
const providerLabel = options.dryRun ? `${resolvedTargetSelection.resolvedTarget.kind} (dry-run)` : resolvedTargetSelection.resolvedTarget.kind;
|
|
16849
|
+
const targetMessage = options.verbose ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} [provider=${providerLabel}] via ${resolvedTargetSelection.targetsFilePath}` : `Using target: ${inlineTargetLabel}`;
|
|
16850
|
+
if (!progressReporter.isInteractive || options.verbose) {
|
|
16851
|
+
console.log(targetMessage);
|
|
16852
|
+
}
|
|
16853
|
+
const promptDumpDir = resolvePromptDirectory(options.dumpPrompts, cwd);
|
|
16854
|
+
if (promptDumpDir) {
|
|
16855
|
+
await mkdir6(promptDumpDir, { recursive: true });
|
|
16856
|
+
if (options.verbose) {
|
|
16857
|
+
console.log(`Prompt dumps enabled at: ${promptDumpDir}`);
|
|
16858
|
+
}
|
|
16859
|
+
}
|
|
16860
|
+
const agentTimeoutMs = Math.max(0, options.agentTimeoutSeconds) * 1e3;
|
|
16861
|
+
const workerPreference = workersOverride ?? options.workers;
|
|
16862
|
+
let resolvedWorkers = workerPreference ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
|
|
16863
|
+
if (resolvedWorkers < 1 || resolvedWorkers > 50) {
|
|
16864
|
+
throw new Error(`Workers must be between 1 and 50, got: ${resolvedWorkers}`);
|
|
16865
|
+
}
|
|
16866
|
+
const isVSCodeProvider = ["vscode", "vscode-insiders"].includes(
|
|
16867
|
+
resolvedTargetSelection.resolvedTarget.kind
|
|
16868
|
+
);
|
|
16869
|
+
if (isVSCodeProvider && resolvedWorkers > 1) {
|
|
16870
|
+
console.warn(`Warning: VSCode providers require window focus. Limiting workers from ${resolvedWorkers} to 1 to prevent race conditions.`);
|
|
16871
|
+
resolvedWorkers = 1;
|
|
16872
|
+
}
|
|
16873
|
+
if (options.verbose) {
|
|
16874
|
+
const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
|
|
16875
|
+
console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
|
|
16876
|
+
}
|
|
16877
|
+
if (isVSCodeProvider && !options.dryRun) {
|
|
16878
|
+
await ensureVSCodeSubagents({
|
|
16879
|
+
kind: resolvedTargetSelection.resolvedTarget.kind,
|
|
16880
|
+
count: resolvedWorkers,
|
|
16881
|
+
verbose: options.verbose
|
|
16882
|
+
});
|
|
16883
|
+
}
|
|
16884
|
+
const results = await evaluationRunner({
|
|
16885
|
+
testFilePath,
|
|
16886
|
+
repoRoot,
|
|
16887
|
+
target: resolvedTargetSelection.resolvedTarget,
|
|
16888
|
+
targets: resolvedTargetSelection.definitions,
|
|
16889
|
+
env: process.env,
|
|
16890
|
+
maxRetries: Math.max(0, options.maxRetries),
|
|
16891
|
+
agentTimeoutMs,
|
|
16892
|
+
promptDumpDir,
|
|
16893
|
+
cache,
|
|
16894
|
+
useCache: options.cache,
|
|
16895
|
+
evalId: options.evalId,
|
|
16896
|
+
verbose: options.verbose,
|
|
16897
|
+
maxConcurrency: resolvedWorkers,
|
|
16898
|
+
onResult: async (result) => {
|
|
16899
|
+
await outputWriter.append(result);
|
|
16900
|
+
},
|
|
16901
|
+
onProgress: async (event) => {
|
|
16902
|
+
const evalKey = makeEvalKey(testFilePath, event.evalId);
|
|
16903
|
+
if (event.status === "pending" && !seenEvalCases.has(evalKey)) {
|
|
16904
|
+
seenEvalCases.add(evalKey);
|
|
16905
|
+
progressReporter.setTotal(seenEvalCases.size);
|
|
16906
|
+
}
|
|
16907
|
+
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
16908
|
+
progressReporter.update(displayId, {
|
|
16909
|
+
workerId: displayId,
|
|
16910
|
+
evalId: event.evalId,
|
|
16911
|
+
status: event.status,
|
|
16912
|
+
startedAt: event.startedAt,
|
|
16913
|
+
completedAt: event.completedAt,
|
|
16914
|
+
error: event.error,
|
|
16915
|
+
targetLabel: inlineTargetLabel
|
|
17016
16916
|
});
|
|
17017
16917
|
}
|
|
17018
|
-
|
|
17019
|
-
|
|
17020
|
-
|
|
17021
|
-
|
|
17022
|
-
|
|
17023
|
-
|
|
17024
|
-
|
|
16918
|
+
});
|
|
16919
|
+
return { results: [...results], promptDumpDir };
|
|
16920
|
+
}
|
|
16921
|
+
async function runEvalCommand(input) {
|
|
16922
|
+
const options = normalizeOptions(input.rawOptions);
|
|
16923
|
+
const cwd = process.cwd();
|
|
16924
|
+
const repoRoot = await findRepoRoot(cwd);
|
|
16925
|
+
if (options.verbose) {
|
|
16926
|
+
console.log(`Repository root: ${repoRoot}`);
|
|
16927
|
+
}
|
|
16928
|
+
const outputPath = options.outPath ? path14.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
|
|
16929
|
+
console.log(`Output path: ${outputPath}`);
|
|
16930
|
+
const outputWriter = await createOutputWriter(outputPath, options.format);
|
|
16931
|
+
const cache = options.cache ? createEvaluationCache() : void 0;
|
|
16932
|
+
const evaluationRunner = await resolveEvaluationRunner();
|
|
16933
|
+
const allResults = [];
|
|
16934
|
+
let lastPromptDumpDir;
|
|
16935
|
+
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
16936
|
+
const resolvedTestFiles = input.testFiles.map((file) => path14.resolve(file));
|
|
16937
|
+
const displayIdTracker = createDisplayIdTracker();
|
|
16938
|
+
const totalWorkers = options.workers ?? DEFAULT_WORKERS;
|
|
16939
|
+
const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
|
|
16940
|
+
const perFileWorkers = options.workers ? Math.max(1, Math.floor(totalWorkers / fileConcurrency)) : void 0;
|
|
16941
|
+
const fileMetadata = /* @__PURE__ */ new Map();
|
|
16942
|
+
for (const testFilePath of resolvedTestFiles) {
|
|
16943
|
+
const meta = await prepareFileMetadata({
|
|
16944
|
+
testFilePath,
|
|
16945
|
+
repoRoot,
|
|
16946
|
+
cwd,
|
|
16947
|
+
options
|
|
16948
|
+
});
|
|
16949
|
+
fileMetadata.set(testFilePath, meta);
|
|
16950
|
+
}
|
|
16951
|
+
const totalEvalCount = Array.from(fileMetadata.values()).reduce(
|
|
16952
|
+
(sum, meta) => sum + meta.evalIds.length,
|
|
16953
|
+
0
|
|
16954
|
+
);
|
|
16955
|
+
if (totalEvalCount === 0) {
|
|
16956
|
+
throw new Error("No eval cases matched the provided filters.");
|
|
16957
|
+
}
|
|
16958
|
+
const progressReporter = createProgressReporter(totalWorkers);
|
|
16959
|
+
progressReporter.start();
|
|
16960
|
+
progressReporter.setTotal(totalEvalCount);
|
|
16961
|
+
const seenCodexLogPaths = /* @__PURE__ */ new Set();
|
|
16962
|
+
const unsubscribeCodexLogs = subscribeToCodexLogEntries((entry) => {
|
|
16963
|
+
if (!entry.filePath || seenCodexLogPaths.has(entry.filePath)) {
|
|
16964
|
+
return;
|
|
16965
|
+
}
|
|
16966
|
+
seenCodexLogPaths.add(entry.filePath);
|
|
16967
|
+
progressReporter.addLogPaths([entry.filePath]);
|
|
16968
|
+
});
|
|
16969
|
+
for (const [testFilePath, meta] of fileMetadata.entries()) {
|
|
16970
|
+
for (const evalId of meta.evalIds) {
|
|
16971
|
+
const evalKey = makeEvalKey(testFilePath, evalId);
|
|
16972
|
+
seenEvalCases.add(evalKey);
|
|
16973
|
+
const displayId = displayIdTracker.getOrAssign(evalKey);
|
|
16974
|
+
progressReporter.update(displayId, {
|
|
16975
|
+
workerId: displayId,
|
|
16976
|
+
evalId,
|
|
16977
|
+
status: "pending",
|
|
16978
|
+
targetLabel: meta.inlineTargetLabel
|
|
17025
16979
|
});
|
|
17026
16980
|
}
|
|
17027
|
-
|
|
17028
|
-
|
|
17029
|
-
|
|
17030
|
-
|
|
17031
|
-
|
|
17032
|
-
|
|
17033
|
-
|
|
17034
|
-
|
|
17035
|
-
|
|
17036
|
-
|
|
16981
|
+
}
|
|
16982
|
+
try {
|
|
16983
|
+
await runWithLimit(resolvedTestFiles, fileConcurrency, async (testFilePath) => {
|
|
16984
|
+
const targetPrep = fileMetadata.get(testFilePath);
|
|
16985
|
+
if (!targetPrep) {
|
|
16986
|
+
throw new Error(`Missing metadata for ${testFilePath}`);
|
|
16987
|
+
}
|
|
16988
|
+
const result = await runSingleEvalFile({
|
|
16989
|
+
testFilePath,
|
|
16990
|
+
cwd,
|
|
16991
|
+
repoRoot,
|
|
16992
|
+
options,
|
|
16993
|
+
outputWriter,
|
|
16994
|
+
cache,
|
|
16995
|
+
evaluationRunner,
|
|
16996
|
+
workersOverride: perFileWorkers,
|
|
16997
|
+
progressReporter,
|
|
16998
|
+
seenEvalCases,
|
|
16999
|
+
displayIdTracker,
|
|
17000
|
+
selection: targetPrep.selection,
|
|
17001
|
+
inlineTargetLabel: targetPrep.inlineTargetLabel
|
|
17037
17002
|
});
|
|
17003
|
+
allResults.push(...result.results);
|
|
17004
|
+
if (result.promptDumpDir) {
|
|
17005
|
+
lastPromptDumpDir = result.promptDumpDir;
|
|
17006
|
+
}
|
|
17007
|
+
});
|
|
17008
|
+
progressReporter.finish();
|
|
17009
|
+
const summary = calculateEvaluationSummary(allResults);
|
|
17010
|
+
console.log(formatEvaluationSummary(summary));
|
|
17011
|
+
if (allResults.length > 0) {
|
|
17012
|
+
console.log(`
|
|
17013
|
+
Results written to: ${outputPath}`);
|
|
17038
17014
|
}
|
|
17015
|
+
if (lastPromptDumpDir && allResults.length > 0) {
|
|
17016
|
+
console.log(`Prompt payloads saved to: ${lastPromptDumpDir}`);
|
|
17017
|
+
}
|
|
17018
|
+
} finally {
|
|
17019
|
+
unsubscribeCodexLogs();
|
|
17020
|
+
await outputWriter.close().catch(() => void 0);
|
|
17039
17021
|
}
|
|
17040
|
-
return {
|
|
17041
|
-
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
17042
|
-
filePath: absolutePath,
|
|
17043
|
-
fileType: "targets",
|
|
17044
|
-
errors
|
|
17045
|
-
};
|
|
17046
17022
|
}
|
|
17047
|
-
|
|
17048
|
-
|
|
17049
|
-
|
|
17050
|
-
|
|
17051
|
-
|
|
17052
|
-
|
|
17053
|
-
|
|
17054
|
-
|
|
17055
|
-
|
|
17056
|
-
|
|
17057
|
-
|
|
17058
|
-
}
|
|
17059
|
-
|
|
17023
|
+
async function resolveEvaluationRunner() {
|
|
17024
|
+
const overridePath = process.env.AGENTEVO_CLI_EVAL_RUNNER;
|
|
17025
|
+
if (!overridePath) {
|
|
17026
|
+
return runEvaluation;
|
|
17027
|
+
}
|
|
17028
|
+
const resolved = path14.isAbsolute(overridePath) ? overridePath : path14.resolve(process.cwd(), overridePath);
|
|
17029
|
+
const moduleUrl = pathToFileURL(resolved).href;
|
|
17030
|
+
const mod = await import(moduleUrl);
|
|
17031
|
+
const candidate = mod.runEvaluation;
|
|
17032
|
+
if (typeof candidate !== "function") {
|
|
17033
|
+
throw new Error(
|
|
17034
|
+
`Module '${resolved}' must export a 'runEvaluation' function to override the default implementation`
|
|
17035
|
+
);
|
|
17036
|
+
}
|
|
17037
|
+
return candidate;
|
|
17038
|
+
}
|
|
17039
|
+
|
|
17040
|
+
// src/commands/eval/index.ts
|
|
17041
|
+
function parseInteger(value, fallback) {
|
|
17042
|
+
const parsed = Number.parseInt(value, 10);
|
|
17043
|
+
if (Number.isNaN(parsed)) {
|
|
17044
|
+
return fallback;
|
|
17045
|
+
}
|
|
17046
|
+
return parsed;
|
|
17047
|
+
}
|
|
17048
|
+
function registerEvalCommand(program) {
|
|
17049
|
+
program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
|
|
17050
|
+
"--workers <count>",
|
|
17051
|
+
"Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
17052
|
+
(value) => parseInteger(value, 1)
|
|
17053
|
+
).option("--out <path>", "Write results to the specified path").option(
|
|
17054
|
+
"--output-format <format>",
|
|
17055
|
+
"Output format: 'jsonl' or 'yaml' (default: jsonl)",
|
|
17056
|
+
"jsonl"
|
|
17057
|
+
).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
|
|
17058
|
+
"--dry-run-delay <ms>",
|
|
17059
|
+
"Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
|
|
17060
|
+
(value) => parseInteger(value, 0),
|
|
17061
|
+
0
|
|
17062
|
+
).option(
|
|
17063
|
+
"--dry-run-delay-min <ms>",
|
|
17064
|
+
"Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
|
|
17065
|
+
(value) => parseInteger(value, 0),
|
|
17066
|
+
0
|
|
17067
|
+
).option(
|
|
17068
|
+
"--dry-run-delay-max <ms>",
|
|
17069
|
+
"Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
|
|
17070
|
+
(value) => parseInteger(value, 0),
|
|
17071
|
+
0
|
|
17072
|
+
).option(
|
|
17073
|
+
"--agent-timeout <seconds>",
|
|
17074
|
+
"Timeout in seconds for provider responses (default: 120)",
|
|
17075
|
+
(value) => parseInteger(value, 120),
|
|
17076
|
+
120
|
|
17077
|
+
).option(
|
|
17078
|
+
"--max-retries <count>",
|
|
17079
|
+
"Retry count for timeout recoveries (default: 2)",
|
|
17080
|
+
(value) => parseInteger(value, 2),
|
|
17081
|
+
2
|
|
17082
|
+
).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
|
|
17083
|
+
"--dump-prompts [dir]",
|
|
17084
|
+
"Persist prompt payloads for debugging (optional custom directory)"
|
|
17085
|
+
).action(async (evalPaths, rawOptions) => {
|
|
17086
|
+
const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
|
|
17087
|
+
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
17088
|
+
});
|
|
17089
|
+
return program;
|
|
17090
|
+
}
|
|
17091
|
+
async function resolveEvalPaths(evalPaths, cwd) {
|
|
17092
|
+
const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
|
|
17093
|
+
if (normalizedInputs.length === 0) {
|
|
17094
|
+
throw new Error("No eval paths provided.");
|
|
17095
|
+
}
|
|
17096
|
+
const unmatched = [];
|
|
17097
|
+
const results = /* @__PURE__ */ new Set();
|
|
17098
|
+
for (const pattern of normalizedInputs) {
|
|
17099
|
+
const candidatePath = path15.isAbsolute(pattern) ? path15.normalize(pattern) : path15.resolve(cwd, pattern);
|
|
17100
|
+
try {
|
|
17101
|
+
const stats = await stat3(candidatePath);
|
|
17102
|
+
if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
|
|
17103
|
+
results.add(candidatePath);
|
|
17104
|
+
continue;
|
|
17105
|
+
}
|
|
17106
|
+
} catch {
|
|
17060
17107
|
}
|
|
17061
|
-
const
|
|
17062
|
-
const
|
|
17063
|
-
|
|
17064
|
-
|
|
17065
|
-
|
|
17066
|
-
|
|
17067
|
-
|
|
17068
|
-
|
|
17069
|
-
|
|
17070
|
-
|
|
17108
|
+
const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
|
|
17109
|
+
const matches = await fg(globPattern, {
|
|
17110
|
+
cwd,
|
|
17111
|
+
absolute: true,
|
|
17112
|
+
onlyFiles: true,
|
|
17113
|
+
unique: true,
|
|
17114
|
+
dot: true,
|
|
17115
|
+
followSymbolicLinks: true
|
|
17116
|
+
});
|
|
17117
|
+
const yamlMatches = matches.filter((filePath) => /\.ya?ml$/i.test(filePath));
|
|
17118
|
+
if (yamlMatches.length === 0) {
|
|
17119
|
+
unmatched.push(pattern);
|
|
17120
|
+
continue;
|
|
17071
17121
|
}
|
|
17072
|
-
|
|
17073
|
-
|
|
17074
|
-
|
|
17075
|
-
|
|
17076
|
-
|
|
17077
|
-
|
|
17078
|
-
|
|
17079
|
-
|
|
17080
|
-
|
|
17081
|
-
|
|
17082
|
-
|
|
17083
|
-
|
|
17084
|
-
|
|
17085
|
-
|
|
17086
|
-
|
|
17087
|
-
|
|
17088
|
-
|
|
17089
|
-
|
|
17090
|
-
|
|
17091
|
-
|
|
17092
|
-
|
|
17093
|
-
|
|
17122
|
+
yamlMatches.forEach((filePath) => results.add(path15.normalize(filePath)));
|
|
17123
|
+
}
|
|
17124
|
+
if (unmatched.length > 0) {
|
|
17125
|
+
throw new Error(
|
|
17126
|
+
`No eval files matched: ${unmatched.join(
|
|
17127
|
+
", "
|
|
17128
|
+
)}. Provide YAML paths or globs (e.g., "evals/**/*.yaml").`
|
|
17129
|
+
);
|
|
17130
|
+
}
|
|
17131
|
+
const sorted = Array.from(results);
|
|
17132
|
+
sorted.sort();
|
|
17133
|
+
return sorted;
|
|
17134
|
+
}
|
|
17135
|
+
|
|
17136
|
+
// src/commands/init/index.ts
|
|
17137
|
+
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
17138
|
+
import path17 from "node:path";
|
|
17139
|
+
import * as readline from "node:readline/promises";
|
|
17140
|
+
|
|
17141
|
+
// src/templates/index.ts
|
|
17142
|
+
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
17143
|
+
import path16 from "node:path";
|
|
17144
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
17145
|
+
var TemplateManager = class {
|
|
17146
|
+
static getGithubTemplates() {
|
|
17147
|
+
return this.getTemplatesFromDir("github");
|
|
17148
|
+
}
|
|
17149
|
+
static getAgentvTemplates() {
|
|
17150
|
+
return this.getTemplatesFromDir("agentv");
|
|
17151
|
+
}
|
|
17152
|
+
static getTemplatesFromDir(subdir) {
|
|
17153
|
+
const currentDir = path16.dirname(fileURLToPath2(import.meta.url));
|
|
17154
|
+
let templatesDir;
|
|
17155
|
+
if (currentDir.includes(path16.sep + "dist")) {
|
|
17156
|
+
templatesDir = path16.join(currentDir, "templates", subdir);
|
|
17157
|
+
} else {
|
|
17158
|
+
templatesDir = path16.join(currentDir, subdir);
|
|
17159
|
+
}
|
|
17160
|
+
return this.readTemplatesRecursively(templatesDir, "");
|
|
17161
|
+
}
|
|
17162
|
+
static readTemplatesRecursively(dir, relativePath) {
|
|
17163
|
+
const templates = [];
|
|
17164
|
+
const entries = readdirSync(dir);
|
|
17165
|
+
for (const entry of entries) {
|
|
17166
|
+
const fullPath = path16.join(dir, entry);
|
|
17167
|
+
const stat5 = statSync(fullPath);
|
|
17168
|
+
const entryRelativePath = relativePath ? path16.join(relativePath, entry) : entry;
|
|
17169
|
+
if (stat5.isDirectory()) {
|
|
17170
|
+
templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
|
|
17171
|
+
} else {
|
|
17172
|
+
const content = readFileSync(fullPath, "utf-8");
|
|
17173
|
+
templates.push({
|
|
17174
|
+
path: entryRelativePath.split(path16.sep).join("/"),
|
|
17175
|
+
// Normalize to forward slashes
|
|
17176
|
+
content
|
|
17094
17177
|
});
|
|
17095
17178
|
}
|
|
17096
17179
|
}
|
|
17097
|
-
|
|
17098
|
-
const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
|
|
17099
|
-
if (unexpectedFields.length > 0) {
|
|
17100
|
-
errors.push({
|
|
17101
|
-
severity: "warning",
|
|
17102
|
-
filePath,
|
|
17103
|
-
message: `Unexpected fields: ${unexpectedFields.join(", ")}`
|
|
17104
|
-
});
|
|
17105
|
-
}
|
|
17106
|
-
return {
|
|
17107
|
-
valid: errors.filter((e) => e.severity === "error").length === 0,
|
|
17108
|
-
filePath,
|
|
17109
|
-
fileType: "config",
|
|
17110
|
-
errors
|
|
17111
|
-
};
|
|
17112
|
-
} catch (error) {
|
|
17113
|
-
errors.push({
|
|
17114
|
-
severity: "error",
|
|
17115
|
-
filePath,
|
|
17116
|
-
message: `Failed to parse config file: ${error.message}`
|
|
17117
|
-
});
|
|
17118
|
-
return { valid: false, filePath, fileType: "config", errors };
|
|
17119
|
-
}
|
|
17120
|
-
}
|
|
17121
|
-
function isObject3(value) {
|
|
17122
|
-
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
17123
|
-
}
|
|
17124
|
-
async function validateFileReferences(evalFilePath) {
|
|
17125
|
-
const errors = [];
|
|
17126
|
-
const absolutePath = path33.resolve(evalFilePath);
|
|
17127
|
-
const gitRoot = await findGitRoot(absolutePath);
|
|
17128
|
-
if (!gitRoot) {
|
|
17129
|
-
errors.push({
|
|
17130
|
-
severity: "error",
|
|
17131
|
-
filePath: absolutePath,
|
|
17132
|
-
message: "Cannot validate file references: git repository root not found"
|
|
17133
|
-
});
|
|
17134
|
-
return errors;
|
|
17180
|
+
return templates;
|
|
17135
17181
|
}
|
|
17136
|
-
|
|
17137
|
-
|
|
17182
|
+
};
|
|
17183
|
+
|
|
17184
|
+
// src/commands/init/index.ts
|
|
17185
|
+
async function promptYesNo(message) {
|
|
17186
|
+
const rl2 = readline.createInterface({
|
|
17187
|
+
input: process.stdin,
|
|
17188
|
+
output: process.stdout
|
|
17189
|
+
});
|
|
17138
17190
|
try {
|
|
17139
|
-
const
|
|
17140
|
-
|
|
17141
|
-
}
|
|
17142
|
-
|
|
17191
|
+
const answer = await rl2.question(`${message} (y/N): `);
|
|
17192
|
+
return answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
|
|
17193
|
+
} finally {
|
|
17194
|
+
rl2.close();
|
|
17143
17195
|
}
|
|
17144
|
-
|
|
17145
|
-
|
|
17196
|
+
}
|
|
17197
|
+
async function initCommand(options = {}) {
|
|
17198
|
+
const targetPath = path17.resolve(options.targetPath ?? ".");
|
|
17199
|
+
const githubDir = path17.join(targetPath, ".github");
|
|
17200
|
+
const agentvDir = path17.join(targetPath, ".agentv");
|
|
17201
|
+
const githubTemplates = TemplateManager.getGithubTemplates();
|
|
17202
|
+
const agentvTemplates = TemplateManager.getAgentvTemplates();
|
|
17203
|
+
const existingFiles = [];
|
|
17204
|
+
if (existsSync(githubDir)) {
|
|
17205
|
+
for (const template of githubTemplates) {
|
|
17206
|
+
const targetFilePath = path17.join(githubDir, template.path);
|
|
17207
|
+
if (existsSync(targetFilePath)) {
|
|
17208
|
+
existingFiles.push(path17.relative(targetPath, targetFilePath));
|
|
17209
|
+
}
|
|
17210
|
+
}
|
|
17146
17211
|
}
|
|
17147
|
-
|
|
17148
|
-
|
|
17149
|
-
|
|
17212
|
+
if (existsSync(agentvDir)) {
|
|
17213
|
+
for (const template of agentvTemplates) {
|
|
17214
|
+
const targetFilePath = path17.join(agentvDir, template.path);
|
|
17215
|
+
if (existsSync(targetFilePath)) {
|
|
17216
|
+
existingFiles.push(path17.relative(targetPath, targetFilePath));
|
|
17217
|
+
}
|
|
17218
|
+
}
|
|
17150
17219
|
}
|
|
17151
|
-
|
|
17152
|
-
|
|
17153
|
-
|
|
17154
|
-
|
|
17220
|
+
if (existingFiles.length > 0) {
|
|
17221
|
+
console.log("We detected an existing setup:");
|
|
17222
|
+
existingFiles.forEach((file) => console.log(` - ${file}`));
|
|
17223
|
+
console.log();
|
|
17224
|
+
const shouldReplace = await promptYesNo("Do you want to replace these files?");
|
|
17225
|
+
if (!shouldReplace) {
|
|
17226
|
+
console.log("\nInit cancelled. No files were changed.");
|
|
17227
|
+
return;
|
|
17155
17228
|
}
|
|
17156
|
-
|
|
17157
|
-
|
|
17158
|
-
|
|
17229
|
+
console.log();
|
|
17230
|
+
}
|
|
17231
|
+
if (!existsSync(githubDir)) {
|
|
17232
|
+
mkdirSync(githubDir, { recursive: true });
|
|
17233
|
+
}
|
|
17234
|
+
if (!existsSync(agentvDir)) {
|
|
17235
|
+
mkdirSync(agentvDir, { recursive: true });
|
|
17236
|
+
}
|
|
17237
|
+
for (const template of githubTemplates) {
|
|
17238
|
+
const targetFilePath = path17.join(githubDir, template.path);
|
|
17239
|
+
const targetDirPath = path17.dirname(targetFilePath);
|
|
17240
|
+
if (!existsSync(targetDirPath)) {
|
|
17241
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
17159
17242
|
}
|
|
17160
|
-
|
|
17161
|
-
|
|
17162
|
-
|
|
17243
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
17244
|
+
console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
|
|
17245
|
+
}
|
|
17246
|
+
for (const template of agentvTemplates) {
|
|
17247
|
+
const targetFilePath = path17.join(agentvDir, template.path);
|
|
17248
|
+
const targetDirPath = path17.dirname(targetFilePath);
|
|
17249
|
+
if (!existsSync(targetDirPath)) {
|
|
17250
|
+
mkdirSync(targetDirPath, { recursive: true });
|
|
17163
17251
|
}
|
|
17252
|
+
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
17253
|
+
console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
|
|
17164
17254
|
}
|
|
17165
|
-
|
|
17255
|
+
console.log("\nAgentV initialized successfully!");
|
|
17256
|
+
console.log(`
|
|
17257
|
+
Files installed to ${path17.relative(targetPath, githubDir)}:`);
|
|
17258
|
+
githubTemplates.forEach((t) => console.log(` - ${t.path}`));
|
|
17259
|
+
console.log(`
|
|
17260
|
+
Files installed to ${path17.relative(targetPath, agentvDir)}:`);
|
|
17261
|
+
agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
|
|
17262
|
+
console.log("\nYou can now:");
|
|
17263
|
+
console.log(" 1. Edit .agentv/.env with your API credentials");
|
|
17264
|
+
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
17265
|
+
console.log(" 3. Create eval files using the schema and prompt templates");
|
|
17166
17266
|
}
|
|
17167
|
-
|
|
17168
|
-
|
|
17169
|
-
|
|
17170
|
-
|
|
17171
|
-
|
|
17267
|
+
|
|
17268
|
+
// src/commands/status.ts
|
|
17269
|
+
function registerStatusCommand(program) {
|
|
17270
|
+
program.command("status").description("Show the latest AgentV kernel status").action(() => {
|
|
17271
|
+
const kernel = createAgentKernel();
|
|
17272
|
+
console.log(`Kernel status: ${kernel.status}`);
|
|
17273
|
+
});
|
|
17274
|
+
return program;
|
|
17275
|
+
}
|
|
17276
|
+
|
|
17277
|
+
// src/commands/validate/format-output.ts
|
|
17278
|
+
var ANSI_RED2 = "\x1B[31m";
|
|
17279
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
17280
|
+
var ANSI_GREEN = "\x1B[32m";
|
|
17281
|
+
var ANSI_CYAN = "\x1B[36m";
|
|
17282
|
+
var ANSI_BOLD = "\x1B[1m";
|
|
17283
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
17284
|
+
function formatSummary(summary, useColors) {
|
|
17285
|
+
const lines = [];
|
|
17286
|
+
lines.push("");
|
|
17287
|
+
lines.push(formatHeader("Validation Summary", useColors));
|
|
17288
|
+
lines.push("");
|
|
17289
|
+
for (const result of summary.results) {
|
|
17290
|
+
lines.push(formatFileResult(result, useColors));
|
|
17291
|
+
}
|
|
17292
|
+
lines.push("");
|
|
17293
|
+
lines.push(formatStats(summary, useColors));
|
|
17294
|
+
lines.push("");
|
|
17295
|
+
return lines.join("\n");
|
|
17296
|
+
}
|
|
17297
|
+
function formatHeader(text, useColors) {
|
|
17298
|
+
if (useColors) {
|
|
17299
|
+
return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET3}`;
|
|
17300
|
+
}
|
|
17301
|
+
return text;
|
|
17302
|
+
}
|
|
17303
|
+
function formatFileResult(result, useColors) {
|
|
17304
|
+
const lines = [];
|
|
17305
|
+
const status = result.valid ? "\u2713" : "\u2717";
|
|
17306
|
+
const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
|
|
17307
|
+
const statusText = useColors ? `${statusColor}${status}${ANSI_RESET3}` : status;
|
|
17308
|
+
const fileName = result.filePath;
|
|
17309
|
+
lines.push(`${statusText} ${fileName}`);
|
|
17310
|
+
if (result.errors.length > 0) {
|
|
17311
|
+
for (const error of result.errors) {
|
|
17312
|
+
lines.push(formatError(error, useColors));
|
|
17172
17313
|
}
|
|
17173
|
-
|
|
17174
|
-
|
|
17175
|
-
|
|
17314
|
+
}
|
|
17315
|
+
return lines.join("\n");
|
|
17316
|
+
}
|
|
17317
|
+
function formatError(error, useColors) {
|
|
17318
|
+
const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
|
|
17319
|
+
const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW3;
|
|
17320
|
+
const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET3}` : prefix;
|
|
17321
|
+
const location = error.location ? ` [${error.location}]` : "";
|
|
17322
|
+
return `${coloredPrefix}${location} ${error.message}`;
|
|
17323
|
+
}
|
|
17324
|
+
function formatStats(summary, useColors) {
|
|
17325
|
+
const lines = [];
|
|
17326
|
+
const totalText = `Total files: ${summary.totalFiles}`;
|
|
17327
|
+
const validText = `Valid: ${summary.validFiles}`;
|
|
17328
|
+
const invalidText = `Invalid: ${summary.invalidFiles}`;
|
|
17329
|
+
const filesWithWarnings = summary.results.filter(
|
|
17330
|
+
(r) => r.errors.some((e) => e.severity === "warning")
|
|
17331
|
+
).length;
|
|
17332
|
+
if (useColors) {
|
|
17333
|
+
lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET3}`);
|
|
17334
|
+
lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET3}`);
|
|
17335
|
+
if (summary.invalidFiles > 0) {
|
|
17336
|
+
lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET3}`);
|
|
17337
|
+
} else {
|
|
17338
|
+
lines.push(invalidText);
|
|
17176
17339
|
}
|
|
17177
|
-
if (
|
|
17178
|
-
|
|
17340
|
+
if (filesWithWarnings > 0) {
|
|
17341
|
+
lines.push(`${ANSI_YELLOW3}Files with warnings: ${filesWithWarnings}${ANSI_RESET3}`);
|
|
17179
17342
|
}
|
|
17180
|
-
|
|
17181
|
-
|
|
17182
|
-
|
|
17183
|
-
|
|
17184
|
-
|
|
17185
|
-
|
|
17186
|
-
if (type !== "file") {
|
|
17187
|
-
continue;
|
|
17188
|
-
}
|
|
17189
|
-
const value = contentItem["value"];
|
|
17190
|
-
if (typeof value !== "string") {
|
|
17191
|
-
errors.push({
|
|
17192
|
-
severity: "error",
|
|
17193
|
-
filePath,
|
|
17194
|
-
location: `${location}[${i6}].content[${j2}].value`,
|
|
17195
|
-
message: "File reference must have a 'value' field with the file path"
|
|
17196
|
-
});
|
|
17197
|
-
continue;
|
|
17198
|
-
}
|
|
17199
|
-
const { resolvedPath } = await resolveFileReference(value, searchRoots);
|
|
17200
|
-
if (!resolvedPath) {
|
|
17201
|
-
errors.push({
|
|
17202
|
-
severity: "error",
|
|
17203
|
-
filePath,
|
|
17204
|
-
location: `${location}[${i6}].content[${j2}]`,
|
|
17205
|
-
message: `Referenced file not found: ${value}`
|
|
17206
|
-
});
|
|
17207
|
-
} else {
|
|
17208
|
-
try {
|
|
17209
|
-
const fileContent = await readFile52(resolvedPath, "utf8");
|
|
17210
|
-
if (fileContent.trim().length === 0) {
|
|
17211
|
-
errors.push({
|
|
17212
|
-
severity: "warning",
|
|
17213
|
-
filePath,
|
|
17214
|
-
location: `${location}[${i6}].content[${j2}]`,
|
|
17215
|
-
message: `Referenced file is empty: ${value}`
|
|
17216
|
-
});
|
|
17217
|
-
}
|
|
17218
|
-
} catch (error) {
|
|
17219
|
-
errors.push({
|
|
17220
|
-
severity: "error",
|
|
17221
|
-
filePath,
|
|
17222
|
-
location: `${location}[${i6}].content[${j2}]`,
|
|
17223
|
-
message: `Cannot read referenced file: ${value} (${error.message})`
|
|
17224
|
-
});
|
|
17225
|
-
}
|
|
17226
|
-
}
|
|
17343
|
+
} else {
|
|
17344
|
+
lines.push(totalText);
|
|
17345
|
+
lines.push(validText);
|
|
17346
|
+
lines.push(invalidText);
|
|
17347
|
+
if (filesWithWarnings > 0) {
|
|
17348
|
+
lines.push(`Files with warnings: ${filesWithWarnings}`);
|
|
17227
17349
|
}
|
|
17228
17350
|
}
|
|
17351
|
+
return lines.join("\n");
|
|
17352
|
+
}
|
|
17353
|
+
function isTTY2() {
|
|
17354
|
+
return process.stdout.isTTY ?? false;
|
|
17229
17355
|
}
|
|
17230
17356
|
|
|
17231
17357
|
// src/commands/validate/validate-files.ts
|
|
@@ -17340,7 +17466,7 @@ async function runValidateCommand(paths, _options) {
|
|
|
17340
17466
|
process.exit(1);
|
|
17341
17467
|
}
|
|
17342
17468
|
const summary = await validateFiles(paths);
|
|
17343
|
-
const useColors =
|
|
17469
|
+
const useColors = isTTY2();
|
|
17344
17470
|
console.log(formatSummary(summary, useColors));
|
|
17345
17471
|
if (summary.invalidFiles > 0) {
|
|
17346
17472
|
process.exit(1);
|
|
@@ -17386,4 +17512,4 @@ export {
|
|
|
17386
17512
|
createProgram,
|
|
17387
17513
|
runCli
|
|
17388
17514
|
};
|
|
17389
|
-
//# sourceMappingURL=chunk-
|
|
17515
|
+
//# sourceMappingURL=chunk-GWH4WZTW.js.map
|