agentv 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -588,9 +588,9 @@ import { readFileSync as readFileSync2 } from "node:fs";
588
588
  // src/commands/eval/index.ts
589
589
  import fg from "fast-glob";
590
590
  import { stat as stat3 } from "node:fs/promises";
591
- import path14 from "node:path";
591
+ import path15 from "node:path";
592
592
 
593
- // ../../packages/core/dist/chunk-L7I5UTJU.js
593
+ // ../../packages/core/dist/chunk-UQLHF3T7.js
594
594
  import { constants } from "node:fs";
595
595
  import { access, readFile } from "node:fs/promises";
596
596
  import path from "node:path";
@@ -697,6 +697,11 @@ async function resolveFileReference(rawValue, searchRoots) {
697
697
  }
698
698
  return { displayPath, attempted };
699
699
  }
700
+ var AGENT_PROVIDER_KINDS = [
701
+ "codex",
702
+ "vscode",
703
+ "vscode-insiders"
704
+ ];
700
705
  var KNOWN_PROVIDERS = [
701
706
  "azure",
702
707
  "anthropic",
@@ -723,7 +728,10 @@ var PROVIDER_ALIASES = [
723
728
  "vertex"
724
729
  // legacy/future support
725
730
  ];
726
- var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
731
+ var TARGETS_SCHEMA_V2 = "agentv-targets-v2.1";
732
+ function isAgentProvider(provider) {
733
+ return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
734
+ }
727
735
 
728
736
  // ../../packages/core/dist/index.js
729
737
  import micromatch from "micromatch";
@@ -5188,7 +5196,7 @@ import { promisify as promisify2 } from "node:util";
5188
5196
  import { exec as execCallback, spawn as spawn2 } from "node:child_process";
5189
5197
  import { randomUUID } from "node:crypto";
5190
5198
  import { constants as constants22, createWriteStream } from "node:fs";
5191
- import { access as access22, copyFile as copyFile2, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
5199
+ import { access as access22, mkdtemp, mkdir as mkdir3, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
5192
5200
  import { tmpdir } from "node:os";
5193
5201
  import path42 from "node:path";
5194
5202
  import { promisify as promisify22 } from "node:util";
@@ -9236,16 +9244,15 @@ var coerce = {
9236
9244
  var NEVER = INVALID;
9237
9245
 
9238
9246
  // ../../packages/core/dist/index.js
9239
- import { readFile as readFile22 } from "node:fs/promises";
9240
9247
  import path52 from "node:path";
9241
9248
 
9242
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/agentDispatch.js
9249
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
9243
9250
  import { exec, spawn } from "child_process";
9244
9251
  import { copyFile, mkdir as mkdir2, readdir as readdir2, readFile as readFile2, stat as stat2, writeFile } from "fs/promises";
9245
9252
  import path6 from "path";
9246
9253
  import { promisify } from "util";
9247
9254
 
9248
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/constants.js
9255
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/constants.js
9249
9256
  import os from "os";
9250
9257
  import path2 from "path";
9251
9258
  var DEFAULT_LOCK_NAME = "subagent.lock";
@@ -9256,7 +9263,7 @@ function getDefaultSubagentRoot(vscodeCmd = "code") {
9256
9263
  }
9257
9264
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
9258
9265
 
9259
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/fs.js
9266
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/fs.js
9260
9267
  import { constants as constants2 } from "fs";
9261
9268
  import { access as access2, mkdir, readdir, rm, stat } from "fs/promises";
9262
9269
  import path3 from "path";
@@ -9289,7 +9296,7 @@ async function removeIfExists(target) {
9289
9296
  }
9290
9297
  }
9291
9298
 
9292
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/path.js
9299
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/path.js
9293
9300
  import path4 from "path";
9294
9301
  function pathToFileUri(filePath) {
9295
9302
  const absolutePath = path4.isAbsolute(filePath) ? filePath : path4.resolve(filePath);
@@ -9300,14 +9307,14 @@ function pathToFileUri(filePath) {
9300
9307
  return `file://${normalizedPath}`;
9301
9308
  }
9302
9309
 
9303
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/time.js
9310
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/time.js
9304
9311
  function sleep(ms) {
9305
9312
  return new Promise((resolve) => {
9306
9313
  setTimeout(resolve, ms);
9307
9314
  });
9308
9315
  }
9309
9316
 
9310
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/workspace.js
9317
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/workspace.js
9311
9318
  import path5 from "path";
9312
9319
 
9313
9320
  // ../../node_modules/.pnpm/json5@2.2.3/node_modules/json5/dist/index.mjs
@@ -10395,7 +10402,7 @@ var JSON5 = {
10395
10402
  var lib = JSON5;
10396
10403
  var dist_default = lib;
10397
10404
 
10398
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/utils/workspace.js
10405
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/utils/workspace.js
10399
10406
  function transformWorkspacePaths(workspaceContent, templateDir) {
10400
10407
  let workspace;
10401
10408
  try {
@@ -10468,7 +10475,7 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
10468
10475
  return JSON.stringify(transformedWorkspace, null, 2);
10469
10476
  }
10470
10477
 
10471
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/agentDispatch.js
10478
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/agentDispatch.js
10472
10479
  var execAsync = promisify(exec);
10473
10480
  function generateTimestamp() {
10474
10481
  return (/* @__PURE__ */ new Date()).toISOString().replace(/[-:TZ.]/g, "").slice(0, 14);
@@ -10542,7 +10549,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
10542
10549
  spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
10543
10550
  await sleep(100);
10544
10551
  const wakeupChatId = "wakeup";
10545
- const chatArgs = ["-r", "chat", "-m", wakeupChatId, "create a file named .alive"];
10552
+ const chatArgs = ["-r", "chat", "-m", wakeupChatId, `create a file named .alive in the ${path6.basename(subagentDir)} folder`];
10546
10553
  spawn(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
10547
10554
  const start = Date.now();
10548
10555
  while (!await pathExists(aliveFile)) {
@@ -11050,7 +11057,7 @@ async function dispatchBatchAgent(options) {
11050
11057
  }
11051
11058
  }
11052
11059
 
11053
- // ../../node_modules/.pnpm/subagent@0.4.6/node_modules/subagent/dist/vscode/provision.js
11060
+ // ../../node_modules/.pnpm/subagent@0.4.7/node_modules/subagent/dist/vscode/provision.js
11054
11061
  import { writeFile as writeFile2 } from "fs/promises";
11055
11062
  import path7 from "path";
11056
11063
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
@@ -11176,7 +11183,7 @@ async function provisionSubagents(options) {
11176
11183
 
11177
11184
  // ../../packages/core/dist/index.js
11178
11185
  import { constants as constants32 } from "node:fs";
11179
- import { access as access32, readFile as readFile32 } from "node:fs/promises";
11186
+ import { access as access32, readFile as readFile22 } from "node:fs/promises";
11180
11187
  import path62 from "node:path";
11181
11188
  import { parse as parse22 } from "yaml";
11182
11189
  import { randomUUID as randomUUID2 } from "node:crypto";
@@ -11379,6 +11386,7 @@ async function processMessages(options) {
11379
11386
  }
11380
11387
  async function loadEvalCases(evalFilePath, repoRoot, options) {
11381
11388
  const verbose = options?.verbose ?? false;
11389
+ const evalIdFilter = options?.evalId;
11382
11390
  const absoluteTestPath = path8.resolve(evalFilePath);
11383
11391
  if (!await fileExists2(absoluteTestPath)) {
11384
11392
  throw new Error(`Test file not found: ${evalFilePath}`);
@@ -11410,62 +11418,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11410
11418
  const results = [];
11411
11419
  for (const rawEvalcase of rawTestcases) {
11412
11420
  if (!isJsonObject(rawEvalcase)) {
11413
- logWarning("Skipping invalid test case entry (expected object)");
11421
+ logWarning("Skipping invalid eval case entry (expected object)");
11414
11422
  continue;
11415
11423
  }
11416
11424
  const evalcase = rawEvalcase;
11417
11425
  const id = asString(evalcase.id);
11426
+ if (evalIdFilter && id !== evalIdFilter) {
11427
+ continue;
11428
+ }
11418
11429
  const conversationId = asString(evalcase.conversation_id);
11419
11430
  const outcome = asString(evalcase.outcome);
11420
11431
  const inputMessagesValue = evalcase.input_messages;
11421
11432
  const expectedMessagesValue = evalcase.expected_messages;
11422
11433
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
11423
- logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
11434
+ logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
11424
11435
  continue;
11425
11436
  }
11426
11437
  if (!Array.isArray(expectedMessagesValue)) {
11427
- logWarning(`Test case '${id}' missing expected_messages array`);
11438
+ logWarning(`Eval case '${id}' missing expected_messages array`);
11428
11439
  continue;
11429
11440
  }
11430
11441
  const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
11431
11442
  const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
11432
- const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
11433
- const userMessages = inputMessages.filter((message) => message.role === "user");
11434
- const systemMessages = inputMessages.filter((message) => message.role === "system");
11435
- if (assistantMessages.length === 0) {
11436
- logWarning(`No assistant message found for test case: ${id}`);
11443
+ if (expectedMessages.length === 0) {
11444
+ logWarning(`No expected message found for eval case: ${id}`);
11437
11445
  continue;
11438
11446
  }
11439
- if (assistantMessages.length > 1) {
11440
- logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
11441
- }
11442
- if (systemMessages.length > 1) {
11443
- logWarning(`Multiple system messages found for test case: ${id}, using first`);
11444
- }
11445
- let systemMessageContent;
11446
- if (systemMessages.length > 0) {
11447
- const content = systemMessages[0]?.content;
11448
- if (typeof content === "string") {
11449
- systemMessageContent = content;
11450
- } else if (Array.isArray(content)) {
11451
- const textParts = [];
11452
- for (const segment of content) {
11453
- if (isJsonObject(segment)) {
11454
- const value = segment.value;
11455
- if (typeof value === "string") {
11456
- textParts.push(value);
11457
- }
11458
- }
11459
- }
11460
- if (textParts.length > 0) {
11461
- systemMessageContent = textParts.join("\n\n");
11462
- }
11463
- }
11447
+ if (expectedMessages.length > 1) {
11448
+ logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
11464
11449
  }
11465
11450
  const guidelinePaths = [];
11466
11451
  const inputTextParts = [];
11467
11452
  const inputSegments = await processMessages({
11468
- messages: userMessages,
11453
+ messages: inputMessages,
11469
11454
  searchRoots,
11470
11455
  repoRootPath,
11471
11456
  guidelinePatterns,
@@ -11475,7 +11460,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11475
11460
  verbose
11476
11461
  });
11477
11462
  const outputSegments = await processMessages({
11478
- messages: assistantMessages,
11463
+ messages: expectedMessages,
11479
11464
  searchRoots,
11480
11465
  repoRootPath,
11481
11466
  guidelinePatterns,
@@ -11483,10 +11468,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11483
11468
  verbose
11484
11469
  });
11485
11470
  const codeSnippets = extractCodeBlocks(inputSegments);
11486
- const assistantContent = assistantMessages[0]?.content;
11487
- const referenceAnswer = await resolveAssistantContent(assistantContent, searchRoots, verbose);
11471
+ const expectedContent = expectedMessages[0]?.content;
11472
+ const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
11488
11473
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
11489
- const testCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
11474
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
11490
11475
  const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
11491
11476
  const userFilePaths = [];
11492
11477
  for (const segment of inputSegments) {
@@ -11505,19 +11490,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
11505
11490
  question,
11506
11491
  input_segments: inputSegments,
11507
11492
  output_segments: outputSegments,
11508
- system_message: systemMessageContent,
11509
11493
  reference_answer: referenceAnswer,
11510
11494
  guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
11511
11495
  guideline_patterns: guidelinePatterns,
11512
11496
  file_paths: allFilePaths,
11513
11497
  code_snippets: codeSnippets,
11514
11498
  expected_outcome: outcome,
11515
- evaluator: testCaseEvaluatorKind,
11499
+ evaluator: evalCaseEvaluatorKind,
11516
11500
  evaluators
11517
11501
  };
11518
11502
  if (verbose) {
11519
11503
  console.log(`
11520
- [Test Case: ${id}]`);
11504
+ [Eval Case: ${id}]`);
11521
11505
  if (testCase.guideline_paths.length > 0) {
11522
11506
  console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
11523
11507
  for (const guidelinePath of testCase.guideline_paths) {
@@ -11576,7 +11560,7 @@ ${body}`);
11576
11560
  }
11577
11561
  const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
11578
11562
  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
11579
- return { question, guidelines, systemMessage: testCase.system_message };
11563
+ return { question, guidelines };
11580
11564
  }
11581
11565
  async function fileExists2(absolutePath) {
11582
11566
  try {
@@ -12304,7 +12288,6 @@ function pathToFileUri2(filePath) {
12304
12288
  var execAsync22 = promisify22(execCallback);
12305
12289
  var WORKSPACE_PREFIX = "agentv-codex-";
12306
12290
  var PROMPT_FILENAME = "prompt.md";
12307
- var FILES_DIR = "files";
12308
12291
  var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
12309
12292
  var CodexProvider = class {
12310
12293
  id;
@@ -12327,21 +12310,10 @@ var CodexProvider = class {
12327
12310
  }
12328
12311
  await this.ensureEnvironmentReady();
12329
12312
  const inputFiles = normalizeInputFiles2(request.inputFiles);
12330
- const originalGuidelines = new Set(
12331
- collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path42.resolve(file))
12332
- );
12333
12313
  const workspaceRoot = await this.createWorkspace();
12334
12314
  const logger = await this.createStreamLogger(request).catch(() => void 0);
12335
12315
  try {
12336
- const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
12337
- inputFiles,
12338
- workspaceRoot,
12339
- originalGuidelines
12340
- );
12341
- const promptContent = buildPromptDocument(request, mirroredInputFiles, {
12342
- guidelinePatterns: request.guideline_patterns,
12343
- guidelineOverrides: guidelineMirrors
12344
- });
12316
+ const promptContent = buildPromptDocument(request, inputFiles);
12345
12317
  const promptFile = path42.join(workspaceRoot, PROMPT_FILENAME);
12346
12318
  await writeFile3(promptFile, promptContent, "utf8");
12347
12319
  const args = this.buildCodexArgs();
@@ -12370,7 +12342,7 @@ var CodexProvider = class {
12370
12342
  executable: this.resolvedExecutable ?? this.config.executable,
12371
12343
  promptFile,
12372
12344
  workspace: workspaceRoot,
12373
- inputFiles: mirroredInputFiles,
12345
+ inputFiles,
12374
12346
  logFile: logger?.filePath
12375
12347
  }
12376
12348
  };
@@ -12425,37 +12397,6 @@ var CodexProvider = class {
12425
12397
  throw error;
12426
12398
  }
12427
12399
  }
12428
- async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
12429
- if (!inputFiles || inputFiles.length === 0) {
12430
- return {
12431
- mirroredInputFiles: void 0,
12432
- guidelineMirrors: /* @__PURE__ */ new Set()
12433
- };
12434
- }
12435
- const filesRoot = path42.join(workspaceRoot, FILES_DIR);
12436
- await mkdir3(filesRoot, { recursive: true });
12437
- const mirrored = [];
12438
- const guidelineMirrors = /* @__PURE__ */ new Set();
12439
- const nameCounts = /* @__PURE__ */ new Map();
12440
- for (const inputFile of inputFiles) {
12441
- const absoluteSource = path42.resolve(inputFile);
12442
- const baseName = path42.basename(absoluteSource);
12443
- const count = nameCounts.get(baseName) ?? 0;
12444
- nameCounts.set(baseName, count + 1);
12445
- const finalName = count === 0 ? baseName : `${baseName}.${count}`;
12446
- const destination = path42.join(filesRoot, finalName);
12447
- await copyFile2(absoluteSource, destination);
12448
- const resolvedDestination = path42.resolve(destination);
12449
- mirrored.push(resolvedDestination);
12450
- if (guidelineOriginals.has(absoluteSource)) {
12451
- guidelineMirrors.add(resolvedDestination);
12452
- }
12453
- }
12454
- return {
12455
- mirroredInputFiles: mirrored,
12456
- guidelineMirrors
12457
- };
12458
- }
12459
12400
  async createWorkspace() {
12460
12401
  return await mkdtemp(path42.join(tmpdir(), WORKSPACE_PREFIX));
12461
12402
  }
@@ -13421,23 +13362,25 @@ function resolveOptionalString(source2, env, description, options) {
13421
13362
  if (trimmed.length === 0) {
13422
13363
  return void 0;
13423
13364
  }
13424
- const envValue = env[trimmed];
13425
- if (envValue !== void 0) {
13426
- if (envValue.trim().length === 0) {
13427
- throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
13365
+ const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
13366
+ if (envVarMatch) {
13367
+ const varName = envVarMatch[1];
13368
+ const envValue = env[varName];
13369
+ if (envValue !== void 0) {
13370
+ if (envValue.trim().length === 0) {
13371
+ throw new Error(`Environment variable '${varName}' for ${description} is empty`);
13372
+ }
13373
+ return envValue;
13428
13374
  }
13429
- return envValue;
13430
- }
13431
- const allowLiteral = options?.allowLiteral ?? false;
13432
- const optionalEnv = options?.optionalEnv ?? false;
13433
- const looksLikeEnv = isLikelyEnvReference(trimmed);
13434
- if (looksLikeEnv) {
13375
+ const optionalEnv = options?.optionalEnv ?? false;
13435
13376
  if (optionalEnv) {
13436
13377
  return void 0;
13437
13378
  }
13438
- if (!allowLiteral) {
13439
- throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
13440
- }
13379
+ throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
13380
+ }
13381
+ const allowLiteral = options?.allowLiteral ?? false;
13382
+ if (!allowLiteral) {
13383
+ throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
13441
13384
  }
13442
13385
  return trimmed;
13443
13386
  }
@@ -13484,9 +13427,6 @@ function resolveOptionalBoolean(source2) {
13484
13427
  }
13485
13428
  throw new Error("expected boolean value");
13486
13429
  }
13487
- function isLikelyEnvReference(value) {
13488
- return /^[A-Z0-9_]+$/.test(value);
13489
- }
13490
13430
  function resolveOptionalStringArray(source2, env, description) {
13491
13431
  if (source2 === void 0 || source2 === null) {
13492
13432
  return void 0;
@@ -13507,15 +13447,20 @@ function resolveOptionalStringArray(source2, env, description) {
13507
13447
  if (trimmed.length === 0) {
13508
13448
  throw new Error(`${description}[${i6}] cannot be empty`);
13509
13449
  }
13510
- const envValue = env[trimmed];
13511
- if (envValue !== void 0) {
13512
- if (envValue.trim().length === 0) {
13513
- throw new Error(`Environment variable '${trimmed}' for ${description}[${i6}] is empty`);
13450
+ const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
13451
+ if (envVarMatch) {
13452
+ const varName = envVarMatch[1];
13453
+ const envValue = env[varName];
13454
+ if (envValue !== void 0) {
13455
+ if (envValue.trim().length === 0) {
13456
+ throw new Error(`Environment variable '${varName}' for ${description}[${i6}] is empty`);
13457
+ }
13458
+ resolved.push(envValue);
13459
+ continue;
13514
13460
  }
13515
- resolved.push(envValue);
13516
- } else {
13517
- resolved.push(trimmed);
13461
+ throw new Error(`Environment variable '${varName}' for ${description}[${i6}] is not set`);
13518
13462
  }
13463
+ resolved.push(trimmed);
13519
13464
  }
13520
13465
  return resolved.length > 0 ? resolved : void 0;
13521
13466
  }
@@ -13560,7 +13505,7 @@ var VSCodeProvider = class {
13560
13505
  }
13561
13506
  };
13562
13507
  }
13563
- const responseText = await readFile22(session.responseFile, "utf8");
13508
+ const responseText = await readTextFile(session.responseFile);
13564
13509
  return {
13565
13510
  text: responseText,
13566
13511
  raw: {
@@ -13614,7 +13559,7 @@ var VSCodeProvider = class {
13614
13559
  }
13615
13560
  const responses = [];
13616
13561
  for (const [index, responseFile] of session.responseFiles.entries()) {
13617
- const responseText = await readFile22(responseFile, "utf8");
13562
+ const responseText = await readTextFile(responseFile);
13618
13563
  responses.push({
13619
13564
  text: responseText,
13620
13565
  raw: {
@@ -13826,7 +13771,7 @@ async function readTargetDefinitions(filePath) {
13826
13771
  if (!await fileExists3(absolutePath)) {
13827
13772
  throw new Error(`targets.yaml not found at ${absolutePath}`);
13828
13773
  }
13829
- const raw = await readFile32(absolutePath, "utf8");
13774
+ const raw = await readFile22(absolutePath, "utf8");
13830
13775
  const parsed = parse22(raw);
13831
13776
  if (!isRecord(parsed)) {
13832
13777
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
@@ -14063,7 +14008,6 @@ var CodeEvaluator = class {
14063
14008
  expected_outcome: context2.evalCase.expected_outcome,
14064
14009
  reference_answer: context2.evalCase.reference_answer,
14065
14010
  candidate_answer: context2.candidate,
14066
- system_message: context2.promptInputs.systemMessage ?? "",
14067
14011
  guideline_paths: context2.evalCase.guideline_paths,
14068
14012
  input_files: context2.evalCase.file_paths,
14069
14013
  input_segments: context2.evalCase.input_segments
@@ -14294,7 +14238,7 @@ function validateConcurrency(concurrency) {
14294
14238
  }
14295
14239
  async function runEvaluation(options) {
14296
14240
  const {
14297
- testFilePath,
14241
+ testFilePath: evalFilePath,
14298
14242
  repoRoot,
14299
14243
  target,
14300
14244
  targets,
@@ -14313,11 +14257,11 @@ async function runEvaluation(options) {
14313
14257
  onProgress
14314
14258
  } = options;
14315
14259
  const load = loadEvalCases;
14316
- const evalCases = await load(testFilePath, repoRoot, { verbose });
14260
+ const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
14317
14261
  const filteredEvalCases = filterEvalCases(evalCases, evalId);
14318
14262
  if (filteredEvalCases.length === 0) {
14319
14263
  if (evalId) {
14320
- throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
14264
+ throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
14321
14265
  }
14322
14266
  return [];
14323
14267
  }
@@ -14471,7 +14415,8 @@ async function runEvaluation(options) {
14471
14415
  target.name,
14472
14416
  (now ?? (() => /* @__PURE__ */ new Date()))(),
14473
14417
  outcome.reason,
14474
- promptInputs
14418
+ promptInputs,
14419
+ primaryProvider
14475
14420
  );
14476
14421
  results.push(errorResult);
14477
14422
  if (onResult) {
@@ -14555,7 +14500,7 @@ async function runBatchEvaluation(options) {
14555
14500
  agentTimeoutMs
14556
14501
  });
14557
14502
  } catch (error) {
14558
- const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
14503
+ const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
14559
14504
  results.push(errorResult);
14560
14505
  if (onResult) {
14561
14506
  await onResult(errorResult);
@@ -14632,7 +14577,7 @@ async function runEvalCase(options) {
14632
14577
  attempt += 1;
14633
14578
  continue;
14634
14579
  }
14635
- return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
14580
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
14636
14581
  }
14637
14582
  }
14638
14583
  if (!providerResponse) {
@@ -14641,7 +14586,8 @@ async function runEvalCase(options) {
14641
14586
  target.name,
14642
14587
  nowFn(),
14643
14588
  lastError ?? new Error("Provider did not return a response"),
14644
- promptInputs
14589
+ promptInputs,
14590
+ provider
14645
14591
  );
14646
14592
  }
14647
14593
  if (cacheKey && cache && !cachedResponse) {
@@ -14661,7 +14607,7 @@ async function runEvalCase(options) {
14661
14607
  agentTimeoutMs
14662
14608
  });
14663
14609
  } catch (error) {
14664
- return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
14610
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
14665
14611
  }
14666
14612
  }
14667
14613
  async function evaluateCandidate(options) {
@@ -14693,9 +14639,8 @@ async function evaluateCandidate(options) {
14693
14639
  const completedAt = nowFn();
14694
14640
  const rawRequest = {
14695
14641
  question: promptInputs.question,
14696
- guidelines: promptInputs.guidelines,
14697
- guideline_paths: evalCase.guideline_paths,
14698
- system_message: promptInputs.systemMessage ?? ""
14642
+ ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
14643
+ guideline_paths: evalCase.guideline_paths
14699
14644
  };
14700
14645
  return {
14701
14646
  eval_id: evalCase.id,
@@ -14953,13 +14898,12 @@ async function invokeProvider(provider, options) {
14953
14898
  }
14954
14899
  }
14955
14900
  }
14956
- function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
14901
+ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
14957
14902
  const message = error instanceof Error ? error.message : String(error);
14958
14903
  const rawRequest = {
14959
14904
  question: promptInputs.question,
14960
- guidelines: promptInputs.guidelines,
14905
+ ...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
14961
14906
  guideline_paths: evalCase.guideline_paths,
14962
- system_message: promptInputs.systemMessage ?? "",
14963
14907
  error: message
14964
14908
  };
14965
14909
  return {
@@ -15009,7 +14953,7 @@ function createAgentKernel() {
15009
14953
  // src/commands/eval/run-eval.ts
15010
14954
  import { constants as constants6 } from "node:fs";
15011
14955
  import { access as access6, mkdir as mkdir6 } from "node:fs/promises";
15012
- import path13 from "node:path";
14956
+ import path14 from "node:path";
15013
14957
  import { pathToFileURL } from "node:url";
15014
14958
 
15015
14959
  // src/commands/eval/env.ts
@@ -15694,883 +15638,580 @@ function formatEvaluationSummary(summary) {
15694
15638
  return lines.join("\n");
15695
15639
  }
15696
15640
 
15697
- // src/commands/eval/targets.ts
15698
- import { constants as constants5 } from "node:fs";
15699
- import { access as access5, readFile as readFile4 } from "node:fs/promises";
15700
- import path12 from "node:path";
15641
+ // ../../packages/core/dist/evaluation/validation/index.js
15642
+ import { readFile as readFile4 } from "node:fs/promises";
15701
15643
  import { parse as parse4 } from "yaml";
15702
- var TARGET_FILE_CANDIDATES = [
15703
- "targets.yaml",
15704
- "targets.yml",
15705
- path12.join(".agentv", "targets.yaml"),
15706
- path12.join(".agentv", "targets.yml")
15707
- ];
15708
- async function fileExists5(filePath) {
15709
- try {
15710
- await access5(filePath, constants5.F_OK);
15711
- return true;
15712
- } catch {
15713
- return false;
15714
- }
15715
- }
15716
- async function readTestSuiteTarget(testFilePath) {
15644
+ import { readFile as readFile23 } from "node:fs/promises";
15645
+ import path12 from "node:path";
15646
+ import { parse as parse23 } from "yaml";
15647
+ import { readFile as readFile32 } from "node:fs/promises";
15648
+ import path23 from "node:path";
15649
+ import { parse as parse32 } from "yaml";
15650
+ import { readFile as readFile42 } from "node:fs/promises";
15651
+ import { parse as parse42 } from "yaml";
15652
+ import { readFile as readFile5 } from "node:fs/promises";
15653
+ import path33 from "node:path";
15654
+ import { parse as parse5 } from "yaml";
15655
+ var SCHEMA_EVAL_V22 = "agentv-eval-v2";
15656
+ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.1";
15657
+ var SCHEMA_CONFIG_V22 = "agentv-config-v2";
15658
+ async function detectFileType(filePath) {
15717
15659
  try {
15718
- const raw = await readFile4(path12.resolve(testFilePath), "utf8");
15719
- const parsed = parse4(raw);
15720
- if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
15721
- const targetValue = parsed.target;
15722
- if (typeof targetValue === "string" && targetValue.trim().length > 0) {
15723
- return targetValue.trim();
15724
- }
15725
- }
15726
- } catch {
15727
- }
15728
- return void 0;
15729
- }
15730
- async function discoverTargetsFile(options) {
15731
- const { explicitPath, testFilePath, repoRoot, cwd } = options;
15732
- if (explicitPath) {
15733
- const resolvedExplicit = path12.resolve(explicitPath);
15734
- if (await fileExists5(resolvedExplicit)) {
15735
- return resolvedExplicit;
15660
+ const content = await readFile4(filePath, "utf8");
15661
+ const parsed = parse4(content);
15662
+ if (typeof parsed !== "object" || parsed === null) {
15663
+ return "unknown";
15736
15664
  }
15737
- for (const candidate of TARGET_FILE_CANDIDATES) {
15738
- const nested = path12.join(resolvedExplicit, candidate);
15739
- if (await fileExists5(nested)) {
15740
- return nested;
15741
- }
15665
+ const record = parsed;
15666
+ const schema = record["$schema"];
15667
+ if (typeof schema !== "string") {
15668
+ return "unknown";
15742
15669
  }
15743
- throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
15744
- }
15745
- const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
15746
- const resolvedCwd = path12.resolve(cwd);
15747
- if (!directories.includes(resolvedCwd)) {
15748
- directories.push(resolvedCwd);
15749
- }
15750
- for (const directory of directories) {
15751
- for (const candidate of TARGET_FILE_CANDIDATES) {
15752
- const fullPath = path12.join(directory, candidate);
15753
- if (await fileExists5(fullPath)) {
15754
- return fullPath;
15755
- }
15670
+ switch (schema) {
15671
+ case SCHEMA_EVAL_V22:
15672
+ return "eval";
15673
+ case SCHEMA_TARGETS_V2:
15674
+ return "targets";
15675
+ case SCHEMA_CONFIG_V22:
15676
+ return "config";
15677
+ default:
15678
+ return "unknown";
15756
15679
  }
15680
+ } catch {
15681
+ return "unknown";
15757
15682
  }
15758
- throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
15759
15683
  }
15760
- function pickTargetName(options) {
15761
- const cliName = options.cliTargetName?.trim();
15762
- if (cliName && cliName !== "default") {
15763
- return { name: cliName, source: "cli" };
15764
- }
15765
- const fileName = options.fileTargetName?.trim();
15766
- if (fileName && fileName.length > 0) {
15767
- return { name: fileName, source: "test-file" };
15768
- }
15769
- return { name: "default", source: "default" };
15684
+ var SCHEMA_EVAL_V222 = "agentv-eval-v2";
15685
+ function isObject(value) {
15686
+ return typeof value === "object" && value !== null && !Array.isArray(value);
15770
15687
  }
15771
- async function selectTarget(options) {
15772
- const { testFilePath, repoRoot, cwd, explicitTargetsPath, cliTargetName, dryRun, dryRunDelay, dryRunDelayMin, dryRunDelayMax, env } = options;
15773
- const targetsFilePath = await discoverTargetsFile({
15774
- explicitPath: explicitTargetsPath,
15775
- testFilePath,
15776
- repoRoot,
15777
- cwd
15778
- });
15779
- const definitions = await readTargetDefinitions(targetsFilePath);
15780
- const fileTargetName = await readTestSuiteTarget(testFilePath);
15781
- const targetChoice = pickTargetName({ cliTargetName, fileTargetName });
15782
- const targetDefinition = definitions.find((definition) => definition.name === targetChoice.name);
15783
- if (!targetDefinition) {
15784
- const available = listTargetNames(definitions).join(", ");
15785
- throw new Error(
15786
- `Target '${targetChoice.name}' not found in ${targetsFilePath}. Available targets: ${available}`
15787
- );
15788
- }
15789
- if (dryRun) {
15790
- const mockTarget = {
15791
- kind: "mock",
15792
- name: `${targetDefinition.name}-dry-run`,
15793
- judgeTarget: void 0,
15794
- config: {
15795
- response: '{"answer":"Mock dry-run response"}',
15796
- delayMs: dryRunDelay,
15797
- delayMinMs: dryRunDelayMin,
15798
- delayMaxMs: dryRunDelayMax
15799
- }
15800
- };
15688
+ async function validateEvalFile(filePath) {
15689
+ const errors = [];
15690
+ const absolutePath = path12.resolve(filePath);
15691
+ let parsed;
15692
+ try {
15693
+ const content = await readFile23(absolutePath, "utf8");
15694
+ parsed = parse23(content);
15695
+ } catch (error) {
15696
+ errors.push({
15697
+ severity: "error",
15698
+ filePath: absolutePath,
15699
+ message: `Failed to parse YAML: ${error.message}`
15700
+ });
15801
15701
  return {
15802
- definitions,
15803
- resolvedTarget: mockTarget,
15804
- targetName: targetChoice.name,
15805
- targetSource: targetChoice.source,
15806
- targetsFilePath
15702
+ valid: false,
15703
+ filePath: absolutePath,
15704
+ fileType: "eval",
15705
+ errors
15807
15706
  };
15808
15707
  }
15809
- try {
15810
- const resolvedTarget = resolveTargetDefinition(targetDefinition, env);
15708
+ if (!isObject(parsed)) {
15709
+ errors.push({
15710
+ severity: "error",
15711
+ filePath: absolutePath,
15712
+ message: "File must contain a YAML object"
15713
+ });
15811
15714
  return {
15812
- definitions,
15813
- resolvedTarget,
15814
- targetName: targetChoice.name,
15815
- targetSource: targetChoice.source,
15816
- targetsFilePath
15715
+ valid: false,
15716
+ filePath: absolutePath,
15717
+ fileType: "eval",
15718
+ errors
15817
15719
  };
15818
- } catch (error) {
15819
- const message = error instanceof Error ? error.message : String(error);
15820
- throw new Error(`Failed to resolve target '${targetChoice.name}': ${message}`);
15821
- }
15822
- }
15823
-
15824
- // src/commands/eval/run-eval.ts
15825
- var DEFAULT_WORKERS = 3;
15826
- function normalizeBoolean(value) {
15827
- return value === true;
15828
- }
15829
- function normalizeString(value) {
15830
- if (typeof value !== "string") {
15831
- return void 0;
15832
15720
  }
15833
- const trimmed = value.trim();
15834
- return trimmed.length > 0 ? trimmed : void 0;
15835
- }
15836
- function normalizeNumber(value, fallback) {
15837
- if (typeof value === "number" && Number.isFinite(value)) {
15838
- return value;
15721
+ const schema = parsed["$schema"];
15722
+ if (schema !== SCHEMA_EVAL_V222) {
15723
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_EVAL_V222}'` : `Missing required field '$schema'. Expected '${SCHEMA_EVAL_V222}'`;
15724
+ errors.push({
15725
+ severity: "error",
15726
+ filePath: absolutePath,
15727
+ location: "$schema",
15728
+ message
15729
+ });
15839
15730
  }
15840
- if (typeof value === "string") {
15841
- const parsed = Number.parseInt(value, 10);
15842
- if (!Number.isNaN(parsed)) {
15843
- return parsed;
15844
- }
15731
+ const evalcases = parsed["evalcases"];
15732
+ if (!Array.isArray(evalcases)) {
15733
+ errors.push({
15734
+ severity: "error",
15735
+ filePath: absolutePath,
15736
+ location: "evalcases",
15737
+ message: "Missing or invalid 'evalcases' field (must be an array)"
15738
+ });
15739
+ return {
15740
+ valid: errors.length === 0,
15741
+ filePath: absolutePath,
15742
+ fileType: "eval",
15743
+ errors
15744
+ };
15745
+ }
15746
+ for (let i6 = 0; i6 < evalcases.length; i6++) {
15747
+ const evalCase = evalcases[i6];
15748
+ const location = `evalcases[${i6}]`;
15749
+ if (!isObject(evalCase)) {
15750
+ errors.push({
15751
+ severity: "error",
15752
+ filePath: absolutePath,
15753
+ location,
15754
+ message: "Eval case must be an object"
15755
+ });
15756
+ continue;
15757
+ }
15758
+ const id = evalCase["id"];
15759
+ if (typeof id !== "string" || id.trim().length === 0) {
15760
+ errors.push({
15761
+ severity: "error",
15762
+ filePath: absolutePath,
15763
+ location: `${location}.id`,
15764
+ message: "Missing or invalid 'id' field (must be a non-empty string)"
15765
+ });
15766
+ }
15767
+ const outcome = evalCase["outcome"];
15768
+ if (typeof outcome !== "string" || outcome.trim().length === 0) {
15769
+ errors.push({
15770
+ severity: "error",
15771
+ filePath: absolutePath,
15772
+ location: `${location}.outcome`,
15773
+ message: "Missing or invalid 'outcome' field (must be a non-empty string)"
15774
+ });
15775
+ }
15776
+ const inputMessages = evalCase["input_messages"];
15777
+ if (!Array.isArray(inputMessages)) {
15778
+ errors.push({
15779
+ severity: "error",
15780
+ filePath: absolutePath,
15781
+ location: `${location}.input_messages`,
15782
+ message: "Missing or invalid 'input_messages' field (must be an array)"
15783
+ });
15784
+ } else {
15785
+ validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
15786
+ }
15787
+ const expectedMessages = evalCase["expected_messages"];
15788
+ if (!Array.isArray(expectedMessages)) {
15789
+ errors.push({
15790
+ severity: "error",
15791
+ filePath: absolutePath,
15792
+ location: `${location}.expected_messages`,
15793
+ message: "Missing or invalid 'expected_messages' field (must be an array)"
15794
+ });
15795
+ } else {
15796
+ validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
15797
+ }
15845
15798
  }
15846
- return fallback;
15847
- }
15848
- function normalizeOptions(rawOptions) {
15849
- const formatStr = normalizeString(rawOptions.outputFormat) ?? "jsonl";
15850
- const format = formatStr === "yaml" ? "yaml" : "jsonl";
15851
- const workers = normalizeNumber(rawOptions.workers, 0);
15852
15799
  return {
15853
- target: normalizeString(rawOptions.target),
15854
- targetsPath: normalizeString(rawOptions.targets),
15855
- evalId: normalizeString(rawOptions.evalId),
15856
- workers: workers > 0 ? workers : void 0,
15857
- outPath: normalizeString(rawOptions.out),
15858
- format,
15859
- dryRun: normalizeBoolean(rawOptions.dryRun),
15860
- dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
15861
- dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
15862
- dryRunDelayMax: normalizeNumber(rawOptions.dryRunDelayMax, 0),
15863
- agentTimeoutSeconds: normalizeNumber(rawOptions.agentTimeout, 120),
15864
- maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
15865
- cache: normalizeBoolean(rawOptions.cache),
15866
- verbose: normalizeBoolean(rawOptions.verbose),
15867
- dumpPrompts: rawOptions.dumpPrompts
15800
+ valid: errors.length === 0,
15801
+ filePath: absolutePath,
15802
+ fileType: "eval",
15803
+ errors
15868
15804
  };
15869
15805
  }
15870
- async function ensureFileExists(filePath, description) {
15871
- try {
15872
- await access6(filePath, constants6.F_OK);
15873
- } catch {
15874
- throw new Error(`${description} not found: ${filePath}`);
15875
- }
15876
- }
15877
- async function findRepoRoot(start) {
15878
- const fallback = path13.resolve(start);
15879
- let current = fallback;
15880
- while (current !== void 0) {
15881
- const candidate = path13.join(current, ".git");
15882
- try {
15883
- await access6(candidate, constants6.F_OK);
15884
- return current;
15885
- } catch {
15886
- const parent = path13.dirname(current);
15887
- if (parent === current) {
15888
- break;
15806
+ function validateMessages(messages, location, filePath, errors) {
15807
+ for (let i6 = 0; i6 < messages.length; i6++) {
15808
+ const message = messages[i6];
15809
+ const msgLocation = `${location}[${i6}]`;
15810
+ if (!isObject(message)) {
15811
+ errors.push({
15812
+ severity: "error",
15813
+ filePath,
15814
+ location: msgLocation,
15815
+ message: "Message must be an object"
15816
+ });
15817
+ continue;
15818
+ }
15819
+ const role = message["role"];
15820
+ const validRoles = ["system", "user", "assistant"];
15821
+ if (!validRoles.includes(role)) {
15822
+ errors.push({
15823
+ severity: "error",
15824
+ filePath,
15825
+ location: `${msgLocation}.role`,
15826
+ message: `Invalid role '${role}'. Must be one of: ${validRoles.join(", ")}`
15827
+ });
15828
+ }
15829
+ const content = message["content"];
15830
+ if (typeof content === "string") {
15831
+ } else if (Array.isArray(content)) {
15832
+ for (let j2 = 0; j2 < content.length; j2++) {
15833
+ const contentItem = content[j2];
15834
+ const contentLocation = `${msgLocation}.content[${j2}]`;
15835
+ if (typeof contentItem === "string") {
15836
+ } else if (isObject(contentItem)) {
15837
+ const type = contentItem["type"];
15838
+ if (typeof type !== "string") {
15839
+ errors.push({
15840
+ severity: "error",
15841
+ filePath,
15842
+ location: `${contentLocation}.type`,
15843
+ message: "Content object must have a 'type' field"
15844
+ });
15845
+ }
15846
+ if (type === "text") {
15847
+ const value = contentItem["value"];
15848
+ if (typeof value !== "string") {
15849
+ errors.push({
15850
+ severity: "error",
15851
+ filePath,
15852
+ location: `${contentLocation}.value`,
15853
+ message: "Content with type 'text' must have a 'value' field"
15854
+ });
15855
+ }
15856
+ }
15857
+ } else {
15858
+ errors.push({
15859
+ severity: "error",
15860
+ filePath,
15861
+ location: contentLocation,
15862
+ message: "Content array items must be strings or objects"
15863
+ });
15864
+ }
15889
15865
  }
15890
- current = parent;
15866
+ } else {
15867
+ errors.push({
15868
+ severity: "error",
15869
+ filePath,
15870
+ location: `${msgLocation}.content`,
15871
+ message: "Missing or invalid 'content' field (must be a string or array)"
15872
+ });
15891
15873
  }
15892
15874
  }
15893
- return fallback;
15894
15875
  }
15895
- function buildDefaultOutputPath(cwd, format) {
15896
- const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
15897
- const baseName = "eval";
15898
- const extension = getDefaultExtension(format);
15899
- return path13.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
15876
+ function isObject2(value) {
15877
+ return typeof value === "object" && value !== null && !Array.isArray(value);
15900
15878
  }
15901
- function resolvePromptDirectory(option, cwd) {
15902
- if (option === void 0) {
15903
- return void 0;
15904
- }
15905
- if (typeof option === "string" && option.trim().length > 0) {
15906
- return path13.resolve(cwd, option);
15879
+ var CLI_PLACEHOLDERS2 = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
15880
+ var COMMON_SETTINGS = /* @__PURE__ */ new Set([
15881
+ "provider_batching",
15882
+ "providerBatching"
15883
+ ]);
15884
+ var AZURE_SETTINGS = /* @__PURE__ */ new Set([
15885
+ ...COMMON_SETTINGS,
15886
+ "endpoint",
15887
+ "resource",
15888
+ "resourceName",
15889
+ "api_key",
15890
+ "apiKey",
15891
+ "deployment",
15892
+ "deploymentName",
15893
+ "model",
15894
+ "version",
15895
+ "api_version",
15896
+ "temperature",
15897
+ "max_output_tokens",
15898
+ "maxTokens"
15899
+ ]);
15900
+ var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
15901
+ ...COMMON_SETTINGS,
15902
+ "api_key",
15903
+ "apiKey",
15904
+ "model",
15905
+ "deployment",
15906
+ "variant",
15907
+ "temperature",
15908
+ "max_output_tokens",
15909
+ "maxTokens",
15910
+ "thinking_budget",
15911
+ "thinkingBudget"
15912
+ ]);
15913
+ var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
15914
+ ...COMMON_SETTINGS,
15915
+ "api_key",
15916
+ "apiKey",
15917
+ "model",
15918
+ "deployment",
15919
+ "variant",
15920
+ "temperature",
15921
+ "max_output_tokens",
15922
+ "maxTokens"
15923
+ ]);
15924
+ var CODEX_SETTINGS = /* @__PURE__ */ new Set([
15925
+ ...COMMON_SETTINGS,
15926
+ "executable",
15927
+ "command",
15928
+ "binary",
15929
+ "args",
15930
+ "arguments",
15931
+ "cwd",
15932
+ "timeout_seconds",
15933
+ "timeoutSeconds",
15934
+ "log_dir",
15935
+ "logDir",
15936
+ "log_directory",
15937
+ "logDirectory",
15938
+ "log_format",
15939
+ "logFormat",
15940
+ "log_output_format",
15941
+ "logOutputFormat"
15942
+ ]);
15943
+ var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
15944
+ ...COMMON_SETTINGS,
15945
+ "workspace_template",
15946
+ "workspaceTemplate",
15947
+ "vscode_cmd",
15948
+ "command",
15949
+ "wait",
15950
+ "dry_run",
15951
+ "dryRun",
15952
+ "subagent_root",
15953
+ "subagentRoot"
15954
+ ]);
15955
+ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
15956
+ ...COMMON_SETTINGS,
15957
+ "response",
15958
+ "delayMs",
15959
+ "delayMinMs",
15960
+ "delayMaxMs"
15961
+ ]);
15962
+ var CLI_SETTINGS = /* @__PURE__ */ new Set([
15963
+ ...COMMON_SETTINGS,
15964
+ "command_template",
15965
+ "commandTemplate",
15966
+ "files_format",
15967
+ "filesFormat",
15968
+ "attachments_format",
15969
+ "attachmentsFormat",
15970
+ "cwd",
15971
+ "env",
15972
+ "timeout_seconds",
15973
+ "timeoutSeconds",
15974
+ "healthcheck"
15975
+ ]);
15976
+ function getKnownSettings(provider) {
15977
+ const normalizedProvider = provider.toLowerCase();
15978
+ switch (normalizedProvider) {
15979
+ case "azure":
15980
+ case "azure-openai":
15981
+ return AZURE_SETTINGS;
15982
+ case "anthropic":
15983
+ return ANTHROPIC_SETTINGS;
15984
+ case "gemini":
15985
+ case "google":
15986
+ case "google-gemini":
15987
+ return GEMINI_SETTINGS;
15988
+ case "codex":
15989
+ case "codex-cli":
15990
+ return CODEX_SETTINGS;
15991
+ case "vscode":
15992
+ case "vscode-insiders":
15993
+ return VSCODE_SETTINGS;
15994
+ case "mock":
15995
+ return MOCK_SETTINGS;
15996
+ case "cli":
15997
+ return CLI_SETTINGS;
15998
+ default:
15999
+ return null;
15907
16000
  }
15908
- return path13.join(cwd, ".agentv", "prompts");
15909
- }
15910
- function createEvaluationCache() {
15911
- const store = /* @__PURE__ */ new Map();
15912
- return {
15913
- async get(key2) {
15914
- return store.get(key2);
15915
- },
15916
- async set(key2, value) {
15917
- store.set(key2, value);
15918
- }
15919
- };
15920
- }
15921
- function createProgressReporter(maxWorkers) {
15922
- const display = new ProgressDisplay(maxWorkers);
15923
- return {
15924
- isInteractive: display.isInteractiveMode(),
15925
- start: () => display.start(),
15926
- setTotal: (total) => display.setTotalTests(total),
15927
- update: (workerId, progress) => display.updateWorker({ ...progress, workerId }),
15928
- finish: () => display.finish(),
15929
- addLogPaths: (paths) => display.addLogPaths(paths)
15930
- };
15931
- }
15932
- function makeEvalKey(testFilePath, evalId) {
15933
- return `${path13.resolve(testFilePath)}::${evalId}`;
15934
16001
  }
15935
- function createDisplayIdTracker() {
15936
- const map = /* @__PURE__ */ new Map();
15937
- let nextId = 1;
15938
- return {
15939
- getOrAssign(evalKey) {
15940
- const existing = map.get(evalKey);
15941
- if (existing !== void 0) {
15942
- return existing;
15943
- }
15944
- const assigned = nextId++;
15945
- map.set(evalKey, assigned);
15946
- return assigned;
16002
+ function validateUnknownSettings(settings, provider, absolutePath, location, errors) {
16003
+ const knownSettings = getKnownSettings(provider);
16004
+ if (!knownSettings) {
16005
+ return;
16006
+ }
16007
+ for (const key2 of Object.keys(settings)) {
16008
+ if (!knownSettings.has(key2)) {
16009
+ errors.push({
16010
+ severity: "warning",
16011
+ filePath: absolutePath,
16012
+ location: `${location}.${key2}`,
16013
+ message: `Unknown setting '${key2}' for ${provider} provider. This property will be ignored.`
16014
+ });
15947
16015
  }
15948
- };
16016
+ }
15949
16017
  }
15950
- async function prepareFileMetadata(params) {
15951
- const { testFilePath, repoRoot, cwd, options } = params;
15952
- await ensureFileExists(testFilePath, "Test file");
15953
- await loadEnvFromHierarchy({
15954
- testFilePath,
15955
- repoRoot,
15956
- verbose: options.verbose
15957
- });
15958
- const selection = await selectTarget({
15959
- testFilePath,
15960
- repoRoot,
15961
- cwd,
15962
- explicitTargetsPath: options.targetsPath,
15963
- cliTargetName: options.target,
15964
- dryRun: options.dryRun,
15965
- dryRunDelay: options.dryRunDelay,
15966
- dryRunDelayMin: options.dryRunDelayMin,
15967
- dryRunDelayMax: options.dryRunDelayMax,
15968
- env: process.env
15969
- });
15970
- const providerLabel = options.dryRun ? `${selection.resolvedTarget.kind} (dry-run)` : selection.resolvedTarget.kind;
15971
- const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
15972
- const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose });
15973
- const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
15974
- return { evalIds: filteredIds, selection, inlineTargetLabel };
15975
- }
15976
- async function runWithLimit(items, limit, task) {
15977
- const safeLimit = Math.max(1, limit);
15978
- let index = 0;
15979
- const workers = Array.from({ length: safeLimit }, async () => {
15980
- while (index < items.length) {
15981
- const current = items[index];
15982
- index += 1;
15983
- await task(current);
15984
- }
15985
- });
15986
- await Promise.all(workers);
15987
- }
15988
- async function runSingleEvalFile(params) {
15989
- const {
15990
- testFilePath,
15991
- cwd,
15992
- repoRoot,
15993
- options,
15994
- outputWriter,
15995
- cache,
15996
- evaluationRunner,
15997
- workersOverride,
15998
- progressReporter,
15999
- seenEvalCases,
16000
- displayIdTracker,
16001
- selection,
16002
- inlineTargetLabel
16003
- } = params;
16004
- await ensureFileExists(testFilePath, "Test file");
16005
- const resolvedTargetSelection = selection;
16006
- const providerLabel = options.dryRun ? `${resolvedTargetSelection.resolvedTarget.kind} (dry-run)` : resolvedTargetSelection.resolvedTarget.kind;
16007
- const targetMessage = options.verbose ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} [provider=${providerLabel}] via ${resolvedTargetSelection.targetsFilePath}` : `Using target: ${inlineTargetLabel}`;
16008
- if (!progressReporter.isInteractive || options.verbose) {
16009
- console.log(targetMessage);
16010
- }
16011
- const promptDumpDir = resolvePromptDirectory(options.dumpPrompts, cwd);
16012
- if (promptDumpDir) {
16013
- await mkdir6(promptDumpDir, { recursive: true });
16014
- if (options.verbose) {
16015
- console.log(`Prompt dumps enabled at: ${promptDumpDir}`);
16016
- }
16017
- }
16018
- const agentTimeoutMs = Math.max(0, options.agentTimeoutSeconds) * 1e3;
16019
- const workerPreference = workersOverride ?? options.workers;
16020
- let resolvedWorkers = workerPreference ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
16021
- if (resolvedWorkers < 1 || resolvedWorkers > 50) {
16022
- throw new Error(`Workers must be between 1 and 50, got: ${resolvedWorkers}`);
16023
- }
16024
- const isVSCodeProvider = ["vscode", "vscode-insiders"].includes(
16025
- resolvedTargetSelection.resolvedTarget.kind
16026
- );
16027
- if (isVSCodeProvider && resolvedWorkers > 1) {
16028
- console.warn(`Warning: VSCode providers require window focus. Limiting workers from ${resolvedWorkers} to 1 to prevent race conditions.`);
16029
- resolvedWorkers = 1;
16030
- }
16031
- if (options.verbose) {
16032
- const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
16033
- console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
16034
- }
16035
- if (isVSCodeProvider && !options.dryRun) {
16036
- await ensureVSCodeSubagents({
16037
- kind: resolvedTargetSelection.resolvedTarget.kind,
16038
- count: resolvedWorkers,
16039
- verbose: options.verbose
16018
+ async function validateTargetsFile(filePath) {
16019
+ const errors = [];
16020
+ const absolutePath = path23.resolve(filePath);
16021
+ let parsed;
16022
+ try {
16023
+ const content = await readFile32(absolutePath, "utf8");
16024
+ parsed = parse32(content);
16025
+ } catch (error) {
16026
+ errors.push({
16027
+ severity: "error",
16028
+ filePath: absolutePath,
16029
+ message: `Failed to parse YAML: ${error.message}`
16040
16030
  });
16031
+ return {
16032
+ valid: false,
16033
+ filePath: absolutePath,
16034
+ fileType: "targets",
16035
+ errors
16036
+ };
16041
16037
  }
16042
- const results = await evaluationRunner({
16043
- testFilePath,
16044
- repoRoot,
16045
- target: resolvedTargetSelection.resolvedTarget,
16046
- targets: resolvedTargetSelection.definitions,
16047
- env: process.env,
16048
- maxRetries: Math.max(0, options.maxRetries),
16049
- agentTimeoutMs,
16050
- promptDumpDir,
16051
- cache,
16052
- useCache: options.cache,
16053
- evalId: options.evalId,
16054
- verbose: options.verbose,
16055
- maxConcurrency: resolvedWorkers,
16056
- onResult: async (result) => {
16057
- await outputWriter.append(result);
16058
- },
16059
- onProgress: async (event) => {
16060
- const evalKey = makeEvalKey(testFilePath, event.evalId);
16061
- if (event.status === "pending" && !seenEvalCases.has(evalKey)) {
16062
- seenEvalCases.add(evalKey);
16063
- progressReporter.setTotal(seenEvalCases.size);
16064
- }
16065
- const displayId = displayIdTracker.getOrAssign(evalKey);
16066
- progressReporter.update(displayId, {
16067
- workerId: displayId,
16068
- evalId: event.evalId,
16069
- status: event.status,
16070
- startedAt: event.startedAt,
16071
- completedAt: event.completedAt,
16072
- error: event.error,
16073
- targetLabel: inlineTargetLabel
16038
+ function validateCliSettings(settings, absolutePath2, location, errors2) {
16039
+ if (!isObject2(settings)) {
16040
+ errors2.push({
16041
+ severity: "error",
16042
+ filePath: absolutePath2,
16043
+ location,
16044
+ message: "CLI provider requires a 'settings' object"
16074
16045
  });
16075
- }
16076
- });
16077
- return { results: [...results], promptDumpDir };
16078
- }
16079
- async function runEvalCommand(input) {
16080
- const options = normalizeOptions(input.rawOptions);
16081
- const cwd = process.cwd();
16082
- const repoRoot = await findRepoRoot(cwd);
16083
- if (options.verbose) {
16084
- console.log(`Repository root: ${repoRoot}`);
16085
- }
16086
- const outputPath = options.outPath ? path13.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
16087
- console.log(`Output path: ${outputPath}`);
16088
- const outputWriter = await createOutputWriter(outputPath, options.format);
16089
- const cache = options.cache ? createEvaluationCache() : void 0;
16090
- const evaluationRunner = await resolveEvaluationRunner();
16091
- const allResults = [];
16092
- let lastPromptDumpDir;
16093
- const seenEvalCases = /* @__PURE__ */ new Set();
16094
- const resolvedTestFiles = input.testFiles.map((file) => path13.resolve(file));
16095
- const displayIdTracker = createDisplayIdTracker();
16096
- const totalWorkers = options.workers ?? DEFAULT_WORKERS;
16097
- const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
16098
- const perFileWorkers = options.workers ? Math.max(1, Math.floor(totalWorkers / fileConcurrency)) : void 0;
16099
- const fileMetadata = /* @__PURE__ */ new Map();
16100
- for (const testFilePath of resolvedTestFiles) {
16101
- const meta = await prepareFileMetadata({
16102
- testFilePath,
16103
- repoRoot,
16104
- cwd,
16105
- options
16106
- });
16107
- fileMetadata.set(testFilePath, meta);
16108
- }
16109
- const totalEvalCount = Array.from(fileMetadata.values()).reduce(
16110
- (sum, meta) => sum + meta.evalIds.length,
16111
- 0
16112
- );
16113
- if (totalEvalCount === 0) {
16114
- throw new Error("No eval cases matched the provided filters.");
16115
- }
16116
- const progressReporter = createProgressReporter(totalWorkers);
16117
- progressReporter.start();
16118
- progressReporter.setTotal(totalEvalCount);
16119
- const seenCodexLogPaths = /* @__PURE__ */ new Set();
16120
- const unsubscribeCodexLogs = subscribeToCodexLogEntries((entry) => {
16121
- if (!entry.filePath || seenCodexLogPaths.has(entry.filePath)) {
16122
16046
  return;
16123
16047
  }
16124
- seenCodexLogPaths.add(entry.filePath);
16125
- progressReporter.addLogPaths([entry.filePath]);
16126
- });
16127
- for (const [testFilePath, meta] of fileMetadata.entries()) {
16128
- for (const evalId of meta.evalIds) {
16129
- const evalKey = makeEvalKey(testFilePath, evalId);
16130
- seenEvalCases.add(evalKey);
16131
- const displayId = displayIdTracker.getOrAssign(evalKey);
16132
- progressReporter.update(displayId, {
16133
- workerId: displayId,
16134
- evalId,
16135
- status: "pending",
16136
- targetLabel: meta.inlineTargetLabel
16048
+ const commandTemplate = settings["command_template"] ?? settings["commandTemplate"];
16049
+ if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
16050
+ errors2.push({
16051
+ severity: "error",
16052
+ filePath: absolutePath2,
16053
+ location: `${location}.commandTemplate`,
16054
+ message: "CLI provider requires 'commandTemplate' as a non-empty string"
16137
16055
  });
16056
+ } else {
16057
+ recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
16138
16058
  }
16139
- }
16140
- try {
16141
- await runWithLimit(resolvedTestFiles, fileConcurrency, async (testFilePath) => {
16142
- const targetPrep = fileMetadata.get(testFilePath);
16143
- if (!targetPrep) {
16144
- throw new Error(`Missing metadata for ${testFilePath}`);
16145
- }
16146
- const result = await runSingleEvalFile({
16147
- testFilePath,
16148
- cwd,
16149
- repoRoot,
16150
- options,
16151
- outputWriter,
16152
- cache,
16153
- evaluationRunner,
16154
- workersOverride: perFileWorkers,
16155
- progressReporter,
16156
- seenEvalCases,
16157
- displayIdTracker,
16158
- selection: targetPrep.selection,
16159
- inlineTargetLabel: targetPrep.inlineTargetLabel
16059
+ const attachmentsFormat = settings["attachments_format"] ?? settings["attachmentsFormat"];
16060
+ if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
16061
+ errors2.push({
16062
+ severity: "error",
16063
+ filePath: absolutePath2,
16064
+ location: `${location}.attachmentsFormat`,
16065
+ message: "'attachmentsFormat' must be a string when provided"
16160
16066
  });
16161
- allResults.push(...result.results);
16162
- if (result.promptDumpDir) {
16163
- lastPromptDumpDir = result.promptDumpDir;
16164
- }
16165
- });
16166
- progressReporter.finish();
16167
- const summary = calculateEvaluationSummary(allResults);
16168
- console.log(formatEvaluationSummary(summary));
16169
- if (allResults.length > 0) {
16170
- console.log(`
16171
- Results written to: ${outputPath}`);
16172
16067
  }
16173
- if (lastPromptDumpDir && allResults.length > 0) {
16174
- console.log(`Prompt payloads saved to: ${lastPromptDumpDir}`);
16068
+ const filesFormat = settings["files_format"] ?? settings["filesFormat"];
16069
+ if (filesFormat !== void 0 && typeof filesFormat !== "string") {
16070
+ errors2.push({
16071
+ severity: "error",
16072
+ filePath: absolutePath2,
16073
+ location: `${location}.filesFormat`,
16074
+ message: "'filesFormat' must be a string when provided"
16075
+ });
16175
16076
  }
16176
- } finally {
16177
- unsubscribeCodexLogs();
16178
- await outputWriter.close().catch(() => void 0);
16179
- }
16180
- }
16181
- async function resolveEvaluationRunner() {
16182
- const overridePath = process.env.AGENTEVO_CLI_EVAL_RUNNER;
16183
- if (!overridePath) {
16184
- return runEvaluation;
16185
- }
16186
- const resolved = path13.isAbsolute(overridePath) ? overridePath : path13.resolve(process.cwd(), overridePath);
16187
- const moduleUrl = pathToFileURL(resolved).href;
16188
- const mod = await import(moduleUrl);
16189
- const candidate = mod.runEvaluation;
16190
- if (typeof candidate !== "function") {
16191
- throw new Error(
16192
- `Module '${resolved}' must export a 'runEvaluation' function to override the default implementation`
16193
- );
16194
- }
16195
- return candidate;
16196
- }
16197
-
16198
- // src/commands/eval/index.ts
16199
- function parseInteger(value, fallback) {
16200
- const parsed = Number.parseInt(value, 10);
16201
- if (Number.isNaN(parsed)) {
16202
- return fallback;
16203
- }
16204
- return parsed;
16205
- }
16206
- function registerEvalCommand(program) {
16207
- program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
16208
- "--workers <count>",
16209
- "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
16210
- (value) => parseInteger(value, 1)
16211
- ).option("--out <path>", "Write results to the specified path").option(
16212
- "--output-format <format>",
16213
- "Output format: 'jsonl' or 'yaml' (default: jsonl)",
16214
- "jsonl"
16215
- ).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
16216
- "--dry-run-delay <ms>",
16217
- "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
16218
- (value) => parseInteger(value, 0),
16219
- 0
16220
- ).option(
16221
- "--dry-run-delay-min <ms>",
16222
- "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
16223
- (value) => parseInteger(value, 0),
16224
- 0
16225
- ).option(
16226
- "--dry-run-delay-max <ms>",
16227
- "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
16228
- (value) => parseInteger(value, 0),
16229
- 0
16230
- ).option(
16231
- "--agent-timeout <seconds>",
16232
- "Timeout in seconds for provider responses (default: 120)",
16233
- (value) => parseInteger(value, 120),
16234
- 120
16235
- ).option(
16236
- "--max-retries <count>",
16237
- "Retry count for timeout recoveries (default: 2)",
16238
- (value) => parseInteger(value, 2),
16239
- 2
16240
- ).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
16241
- "--dump-prompts [dir]",
16242
- "Persist prompt payloads for debugging (optional custom directory)"
16243
- ).action(async (evalPaths, rawOptions) => {
16244
- const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
16245
- await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
16246
- });
16247
- return program;
16248
- }
16249
- async function resolveEvalPaths(evalPaths, cwd) {
16250
- const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
16251
- if (normalizedInputs.length === 0) {
16252
- throw new Error("No eval paths provided.");
16253
- }
16254
- const unmatched = [];
16255
- const results = /* @__PURE__ */ new Set();
16256
- for (const pattern of normalizedInputs) {
16257
- const candidatePath = path14.isAbsolute(pattern) ? path14.normalize(pattern) : path14.resolve(cwd, pattern);
16258
- try {
16259
- const stats = await stat3(candidatePath);
16260
- if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
16261
- results.add(candidatePath);
16262
- continue;
16263
- }
16264
- } catch {
16265
- }
16266
- const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
16267
- const matches = await fg(globPattern, {
16268
- cwd,
16269
- absolute: true,
16270
- onlyFiles: true,
16271
- unique: true,
16272
- dot: true,
16273
- followSymbolicLinks: true
16274
- });
16275
- const yamlMatches = matches.filter((filePath) => /\.ya?ml$/i.test(filePath));
16276
- if (yamlMatches.length === 0) {
16277
- unmatched.push(pattern);
16278
- continue;
16279
- }
16280
- yamlMatches.forEach((filePath) => results.add(path14.normalize(filePath)));
16281
- }
16282
- if (unmatched.length > 0) {
16283
- throw new Error(
16284
- `No eval files matched: ${unmatched.join(
16285
- ", "
16286
- )}. Provide YAML paths or globs (e.g., "evals/**/*.yaml").`
16287
- );
16288
- }
16289
- const sorted = Array.from(results);
16290
- sorted.sort();
16291
- return sorted;
16292
- }
16293
-
16294
- // src/commands/init/index.ts
16295
- import { existsSync, mkdirSync, writeFileSync } from "node:fs";
16296
- import path16 from "node:path";
16297
- import * as readline from "node:readline/promises";
16298
-
16299
- // src/templates/index.ts
16300
- import { readFileSync, readdirSync, statSync } from "node:fs";
16301
- import path15 from "node:path";
16302
- import { fileURLToPath as fileURLToPath2 } from "node:url";
16303
- var TemplateManager = class {
16304
- static getGithubTemplates() {
16305
- return this.getTemplatesFromDir("github");
16306
- }
16307
- static getAgentvTemplates() {
16308
- return this.getTemplatesFromDir("agentv");
16309
- }
16310
- static getTemplatesFromDir(subdir) {
16311
- const currentDir = path15.dirname(fileURLToPath2(import.meta.url));
16312
- let templatesDir;
16313
- if (currentDir.includes(path15.sep + "dist")) {
16314
- templatesDir = path15.join(currentDir, "templates", subdir);
16315
- } else {
16316
- templatesDir = path15.join(currentDir, subdir);
16077
+ const cwd = settings["cwd"];
16078
+ if (cwd !== void 0 && typeof cwd !== "string") {
16079
+ errors2.push({
16080
+ severity: "error",
16081
+ filePath: absolutePath2,
16082
+ location: `${location}.cwd`,
16083
+ message: "'cwd' must be a string when provided"
16084
+ });
16317
16085
  }
16318
- return this.readTemplatesRecursively(templatesDir, "");
16319
- }
16320
- static readTemplatesRecursively(dir, relativePath) {
16321
- const templates = [];
16322
- const entries = readdirSync(dir);
16323
- for (const entry of entries) {
16324
- const fullPath = path15.join(dir, entry);
16325
- const stat5 = statSync(fullPath);
16326
- const entryRelativePath = relativePath ? path15.join(relativePath, entry) : entry;
16327
- if (stat5.isDirectory()) {
16328
- templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
16329
- } else {
16330
- const content = readFileSync(fullPath, "utf-8");
16331
- templates.push({
16332
- path: entryRelativePath.split(path15.sep).join("/"),
16333
- // Normalize to forward slashes
16334
- content
16086
+ const timeoutSeconds = settings["timeout_seconds"] ?? settings["timeoutSeconds"];
16087
+ if (timeoutSeconds !== void 0) {
16088
+ const numericTimeout = Number(timeoutSeconds);
16089
+ if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
16090
+ errors2.push({
16091
+ severity: "error",
16092
+ filePath: absolutePath2,
16093
+ location: `${location}.timeoutSeconds`,
16094
+ message: "'timeoutSeconds' must be a positive number when provided"
16335
16095
  });
16336
16096
  }
16337
16097
  }
16338
- return templates;
16339
- }
16340
- };
16341
-
16342
- // src/commands/init/index.ts
16343
- async function promptYesNo(message) {
16344
- const rl2 = readline.createInterface({
16345
- input: process.stdin,
16346
- output: process.stdout
16347
- });
16348
- try {
16349
- const answer = await rl2.question(`${message} (y/N): `);
16350
- return answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
16351
- } finally {
16352
- rl2.close();
16353
- }
16354
- }
16355
- async function initCommand(options = {}) {
16356
- const targetPath = path16.resolve(options.targetPath ?? ".");
16357
- const githubDir = path16.join(targetPath, ".github");
16358
- const agentvDir = path16.join(targetPath, ".agentv");
16359
- const githubTemplates = TemplateManager.getGithubTemplates();
16360
- const agentvTemplates = TemplateManager.getAgentvTemplates();
16361
- const existingFiles = [];
16362
- if (existsSync(githubDir)) {
16363
- for (const template of githubTemplates) {
16364
- const targetFilePath = path16.join(githubDir, template.path);
16365
- if (existsSync(targetFilePath)) {
16366
- existingFiles.push(path16.relative(targetPath, targetFilePath));
16098
+ const envOverrides = settings["env"];
16099
+ if (envOverrides !== void 0) {
16100
+ if (!isObject2(envOverrides)) {
16101
+ errors2.push({
16102
+ severity: "error",
16103
+ filePath: absolutePath2,
16104
+ location: `${location}.env`,
16105
+ message: "'env' must be an object with string values"
16106
+ });
16107
+ } else {
16108
+ for (const [key2, value] of Object.entries(envOverrides)) {
16109
+ if (typeof value !== "string" || value.trim().length === 0) {
16110
+ errors2.push({
16111
+ severity: "error",
16112
+ filePath: absolutePath2,
16113
+ location: `${location}.env.${key2}`,
16114
+ message: `Environment override '${key2}' must be a non-empty string`
16115
+ });
16116
+ }
16117
+ }
16367
16118
  }
16368
16119
  }
16369
- }
16370
- if (existsSync(agentvDir)) {
16371
- for (const template of agentvTemplates) {
16372
- const targetFilePath = path16.join(agentvDir, template.path);
16373
- if (existsSync(targetFilePath)) {
16374
- existingFiles.push(path16.relative(targetPath, targetFilePath));
16375
- }
16120
+ const healthcheck = settings["healthcheck"];
16121
+ if (healthcheck !== void 0) {
16122
+ validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
16376
16123
  }
16377
16124
  }
16378
- if (existingFiles.length > 0) {
16379
- console.log("We detected an existing setup:");
16380
- existingFiles.forEach((file) => console.log(` - ${file}`));
16381
- console.log();
16382
- const shouldReplace = await promptYesNo("Do you want to replace these files?");
16383
- if (!shouldReplace) {
16384
- console.log("\nInit cancelled. No files were changed.");
16125
+ function validateCliHealthcheck(healthcheck, absolutePath2, location, errors2) {
16126
+ if (!isObject2(healthcheck)) {
16127
+ errors2.push({
16128
+ severity: "error",
16129
+ filePath: absolutePath2,
16130
+ location,
16131
+ message: "'healthcheck' must be an object when provided"
16132
+ });
16385
16133
  return;
16386
16134
  }
16387
- console.log();
16388
- }
16389
- if (!existsSync(githubDir)) {
16390
- mkdirSync(githubDir, { recursive: true });
16391
- }
16392
- if (!existsSync(agentvDir)) {
16393
- mkdirSync(agentvDir, { recursive: true });
16394
- }
16395
- for (const template of githubTemplates) {
16396
- const targetFilePath = path16.join(githubDir, template.path);
16397
- const targetDirPath = path16.dirname(targetFilePath);
16398
- if (!existsSync(targetDirPath)) {
16399
- mkdirSync(targetDirPath, { recursive: true });
16135
+ const type = healthcheck["type"];
16136
+ if (type !== "http" && type !== "command") {
16137
+ errors2.push({
16138
+ severity: "error",
16139
+ filePath: absolutePath2,
16140
+ location: `${location}.type`,
16141
+ message: "healthcheck.type must be either 'http' or 'command'"
16142
+ });
16143
+ return;
16400
16144
  }
16401
- writeFileSync(targetFilePath, template.content, "utf-8");
16402
- console.log(`Created ${path16.relative(targetPath, targetFilePath)}`);
16403
- }
16404
- for (const template of agentvTemplates) {
16405
- const targetFilePath = path16.join(agentvDir, template.path);
16406
- const targetDirPath = path16.dirname(targetFilePath);
16407
- if (!existsSync(targetDirPath)) {
16408
- mkdirSync(targetDirPath, { recursive: true });
16145
+ const timeoutSeconds = healthcheck["timeout_seconds"] ?? healthcheck["timeoutSeconds"];
16146
+ if (timeoutSeconds !== void 0) {
16147
+ const numericTimeout = Number(timeoutSeconds);
16148
+ if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
16149
+ errors2.push({
16150
+ severity: "error",
16151
+ filePath: absolutePath2,
16152
+ location: `${location}.timeoutSeconds`,
16153
+ message: "healthcheck.timeoutSeconds must be a positive number when provided"
16154
+ });
16155
+ }
16409
16156
  }
16410
- writeFileSync(targetFilePath, template.content, "utf-8");
16411
- console.log(`Created ${path16.relative(targetPath, targetFilePath)}`);
16412
- }
16413
- console.log("\nAgentV initialized successfully!");
16414
- console.log(`
16415
- Files installed to ${path16.relative(targetPath, githubDir)}:`);
16416
- githubTemplates.forEach((t) => console.log(` - ${t.path}`));
16417
- console.log(`
16418
- Files installed to ${path16.relative(targetPath, agentvDir)}:`);
16419
- agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
16420
- console.log("\nYou can now:");
16421
- console.log(" 1. Edit .agentv/.env with your API credentials");
16422
- console.log(" 2. Configure targets in .agentv/targets.yaml");
16423
- console.log(" 3. Create eval files using the schema and prompt templates");
16424
- }
16425
-
16426
- // src/commands/status.ts
16427
- function registerStatusCommand(program) {
16428
- program.command("status").description("Show the latest AgentV kernel status").action(() => {
16429
- const kernel = createAgentKernel();
16430
- console.log(`Kernel status: ${kernel.status}`);
16431
- });
16432
- return program;
16433
- }
16434
-
16435
- // src/commands/validate/format-output.ts
16436
- var ANSI_RED = "\x1B[31m";
16437
- var ANSI_YELLOW2 = "\x1B[33m";
16438
- var ANSI_GREEN = "\x1B[32m";
16439
- var ANSI_CYAN = "\x1B[36m";
16440
- var ANSI_BOLD = "\x1B[1m";
16441
- var ANSI_RESET2 = "\x1B[0m";
16442
- function formatSummary(summary, useColors) {
16443
- const lines = [];
16444
- lines.push("");
16445
- lines.push(formatHeader("Validation Summary", useColors));
16446
- lines.push("");
16447
- for (const result of summary.results) {
16448
- lines.push(formatFileResult(result, useColors));
16449
- }
16450
- lines.push("");
16451
- lines.push(formatStats(summary, useColors));
16452
- lines.push("");
16453
- return lines.join("\n");
16454
- }
16455
- function formatHeader(text, useColors) {
16456
- if (useColors) {
16457
- return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
16458
- }
16459
- return text;
16460
- }
16461
- function formatFileResult(result, useColors) {
16462
- const lines = [];
16463
- const status = result.valid ? "\u2713" : "\u2717";
16464
- const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
16465
- const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
16466
- const fileName = result.filePath;
16467
- lines.push(`${statusText} ${fileName}`);
16468
- if (result.errors.length > 0) {
16469
- for (const error of result.errors) {
16470
- lines.push(formatError(error, useColors));
16157
+ if (type === "http") {
16158
+ const url = healthcheck["url"];
16159
+ if (typeof url !== "string" || url.trim().length === 0) {
16160
+ errors2.push({
16161
+ severity: "error",
16162
+ filePath: absolutePath2,
16163
+ location: `${location}.url`,
16164
+ message: "healthcheck.url must be a non-empty string for http checks"
16165
+ });
16166
+ }
16167
+ return;
16471
16168
  }
16472
- }
16473
- return lines.join("\n");
16474
- }
16475
- function formatError(error, useColors) {
16476
- const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
16477
- const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
16478
- const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
16479
- const location = error.location ? ` [${error.location}]` : "";
16480
- return `${coloredPrefix}${location} ${error.message}`;
16481
- }
16482
- function formatStats(summary, useColors) {
16483
- const lines = [];
16484
- const totalText = `Total files: ${summary.totalFiles}`;
16485
- const validText = `Valid: ${summary.validFiles}`;
16486
- const invalidText = `Invalid: ${summary.invalidFiles}`;
16487
- if (useColors) {
16488
- lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
16489
- lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
16490
- if (summary.invalidFiles > 0) {
16491
- lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
16169
+ const commandTemplate = healthcheck["command_template"] ?? healthcheck["commandTemplate"];
16170
+ if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
16171
+ errors2.push({
16172
+ severity: "error",
16173
+ filePath: absolutePath2,
16174
+ location: `${location}.commandTemplate`,
16175
+ message: "healthcheck.commandTemplate must be a non-empty string for command checks"
16176
+ });
16492
16177
  } else {
16493
- lines.push(invalidText);
16494
- }
16495
- } else {
16496
- lines.push(totalText);
16497
- lines.push(validText);
16498
- lines.push(invalidText);
16499
- }
16500
- return lines.join("\n");
16501
- }
16502
- function isTTY() {
16503
- return process.stdout.isTTY ?? false;
16504
- }
16505
-
16506
- // ../../packages/core/dist/evaluation/validation/index.js
16507
- import { readFile as readFile5 } from "node:fs/promises";
16508
- import { parse as parse5 } from "yaml";
16509
- import { readFile as readFile23 } from "node:fs/promises";
16510
- import path17 from "node:path";
16511
- import { parse as parse23 } from "yaml";
16512
- import { readFile as readFile33 } from "node:fs/promises";
16513
- import path23 from "node:path";
16514
- import { parse as parse32 } from "yaml";
16515
- import { readFile as readFile42 } from "node:fs/promises";
16516
- import { parse as parse42 } from "yaml";
16517
- import { readFile as readFile52 } from "node:fs/promises";
16518
- import path33 from "node:path";
16519
- import { parse as parse52 } from "yaml";
16520
- var SCHEMA_EVAL_V22 = "agentv-eval-v2";
16521
- var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
16522
- var SCHEMA_CONFIG_V22 = "agentv-config-v2";
16523
- async function detectFileType(filePath) {
16524
- try {
16525
- const content = await readFile5(filePath, "utf8");
16526
- const parsed = parse5(content);
16527
- if (typeof parsed !== "object" || parsed === null) {
16528
- return "unknown";
16178
+ recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
16529
16179
  }
16530
- const record = parsed;
16531
- const schema = record["$schema"];
16532
- if (typeof schema !== "string") {
16533
- return "unknown";
16180
+ const cwd = healthcheck["cwd"];
16181
+ if (cwd !== void 0 && typeof cwd !== "string") {
16182
+ errors2.push({
16183
+ severity: "error",
16184
+ filePath: absolutePath2,
16185
+ location: `${location}.cwd`,
16186
+ message: "healthcheck.cwd must be a string when provided"
16187
+ });
16534
16188
  }
16535
- switch (schema) {
16536
- case SCHEMA_EVAL_V22:
16537
- return "eval";
16538
- case SCHEMA_TARGETS_V2:
16539
- return "targets";
16540
- case SCHEMA_CONFIG_V22:
16541
- return "config";
16542
- default:
16543
- return "unknown";
16189
+ }
16190
+ function recordUnknownPlaceholders(template, absolutePath2, location, errors2) {
16191
+ const placeholders = extractPlaceholders(template);
16192
+ for (const placeholder of placeholders) {
16193
+ if (!CLI_PLACEHOLDERS2.has(placeholder)) {
16194
+ errors2.push({
16195
+ severity: "error",
16196
+ filePath: absolutePath2,
16197
+ location,
16198
+ message: `Unknown CLI placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS2).join(", ")}`
16199
+ });
16200
+ }
16544
16201
  }
16545
- } catch {
16546
- return "unknown";
16547
16202
  }
16548
- }
16549
- var SCHEMA_EVAL_V222 = "agentv-eval-v2";
16550
- function isObject(value) {
16551
- return typeof value === "object" && value !== null && !Array.isArray(value);
16552
- }
16553
- async function validateEvalFile(filePath) {
16554
- const errors = [];
16555
- const absolutePath = path17.resolve(filePath);
16556
- let parsed;
16557
- try {
16558
- const content = await readFile23(absolutePath, "utf8");
16559
- parsed = parse23(content);
16560
- } catch (error) {
16561
- errors.push({
16562
- severity: "error",
16563
- filePath: absolutePath,
16564
- message: `Failed to parse YAML: ${error.message}`
16565
- });
16566
- return {
16567
- valid: false,
16568
- filePath: absolutePath,
16569
- fileType: "eval",
16570
- errors
16571
- };
16203
+ function extractPlaceholders(template) {
16204
+ const matches = template.matchAll(/\{([A-Z_]+)\}/g);
16205
+ const result = [];
16206
+ for (const match of matches) {
16207
+ const placeholder = match[1];
16208
+ if (placeholder) {
16209
+ result.push(placeholder);
16210
+ }
16211
+ }
16212
+ return result;
16572
16213
  }
16573
- if (!isObject(parsed)) {
16214
+ if (!isObject2(parsed)) {
16574
16215
  errors.push({
16575
16216
  severity: "error",
16576
16217
  filePath: absolutePath,
@@ -16579,13 +16220,13 @@ async function validateEvalFile(filePath) {
16579
16220
  return {
16580
16221
  valid: false,
16581
16222
  filePath: absolutePath,
16582
- fileType: "eval",
16223
+ fileType: "targets",
16583
16224
  errors
16584
16225
  };
16585
16226
  }
16586
16227
  const schema = parsed["$schema"];
16587
- if (schema !== SCHEMA_EVAL_V222) {
16588
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_EVAL_V222}'` : `Missing required field '$schema'. Expected '${SCHEMA_EVAL_V222}'`;
16228
+ if (schema !== TARGETS_SCHEMA_V2) {
16229
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
16589
16230
  errors.push({
16590
16231
  severity: "error",
16591
16232
  filePath: absolutePath,
@@ -16593,639 +16234,1124 @@ async function validateEvalFile(filePath) {
16593
16234
  message
16594
16235
  });
16595
16236
  }
16596
- const evalcases = parsed["evalcases"];
16597
- if (!Array.isArray(evalcases)) {
16237
+ const targets = parsed["targets"];
16238
+ if (!Array.isArray(targets)) {
16598
16239
  errors.push({
16599
16240
  severity: "error",
16600
16241
  filePath: absolutePath,
16601
- location: "evalcases",
16602
- message: "Missing or invalid 'evalcases' field (must be an array)"
16242
+ location: "targets",
16243
+ message: "Missing or invalid 'targets' field (must be an array)"
16603
16244
  });
16604
16245
  return {
16605
16246
  valid: errors.length === 0,
16606
16247
  filePath: absolutePath,
16607
- fileType: "eval",
16248
+ fileType: "targets",
16608
16249
  errors
16609
16250
  };
16610
16251
  }
16611
- for (let i6 = 0; i6 < evalcases.length; i6++) {
16612
- const evalCase = evalcases[i6];
16613
- const location = `evalcases[${i6}]`;
16614
- if (!isObject(evalCase)) {
16252
+ const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
16253
+ for (let i6 = 0; i6 < targets.length; i6++) {
16254
+ const target = targets[i6];
16255
+ const location = `targets[${i6}]`;
16256
+ if (!isObject2(target)) {
16615
16257
  errors.push({
16616
16258
  severity: "error",
16617
16259
  filePath: absolutePath,
16618
16260
  location,
16619
- message: "Eval case must be an object"
16261
+ message: "Target must be an object"
16620
16262
  });
16621
16263
  continue;
16622
16264
  }
16623
- const id = evalCase["id"];
16624
- if (typeof id !== "string" || id.trim().length === 0) {
16265
+ const name = target["name"];
16266
+ if (typeof name !== "string" || name.trim().length === 0) {
16625
16267
  errors.push({
16626
16268
  severity: "error",
16627
16269
  filePath: absolutePath,
16628
- location: `${location}.id`,
16629
- message: "Missing or invalid 'id' field (must be a non-empty string)"
16270
+ location: `${location}.name`,
16271
+ message: "Missing or invalid 'name' field (must be a non-empty string)"
16630
16272
  });
16631
16273
  }
16632
- const outcome = evalCase["outcome"];
16633
- if (typeof outcome !== "string" || outcome.trim().length === 0) {
16274
+ const provider = target["provider"];
16275
+ const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
16276
+ if (typeof provider !== "string" || provider.trim().length === 0) {
16634
16277
  errors.push({
16635
16278
  severity: "error",
16636
16279
  filePath: absolutePath,
16637
- location: `${location}.outcome`,
16638
- message: "Missing or invalid 'outcome' field (must be a non-empty string)"
16280
+ location: `${location}.provider`,
16281
+ message: "Missing or invalid 'provider' field (must be a non-empty string)"
16639
16282
  });
16640
- }
16641
- const inputMessages = evalCase["input_messages"];
16642
- if (!Array.isArray(inputMessages)) {
16283
+ } else if (!knownProviders.includes(provider)) {
16643
16284
  errors.push({
16644
- severity: "error",
16285
+ severity: "warning",
16645
16286
  filePath: absolutePath,
16646
- location: `${location}.input_messages`,
16647
- message: "Missing or invalid 'input_messages' field (must be an array)"
16287
+ location: `${location}.provider`,
16288
+ message: `Unknown provider '${provider}'. Known providers: ${knownProviders.join(", ")}`
16648
16289
  });
16649
- } else {
16650
- validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
16651
16290
  }
16652
- const expectedMessages = evalCase["expected_messages"];
16653
- if (!Array.isArray(expectedMessages)) {
16291
+ const settings = target["settings"];
16292
+ if (providerValue !== "cli" && settings !== void 0 && !isObject2(settings)) {
16654
16293
  errors.push({
16655
16294
  severity: "error",
16656
16295
  filePath: absolutePath,
16657
- location: `${location}.expected_messages`,
16658
- message: "Missing or invalid 'expected_messages' field (must be an array)"
16296
+ location: `${location}.settings`,
16297
+ message: "Invalid 'settings' field (must be an object)"
16298
+ });
16299
+ }
16300
+ if (providerValue === "cli") {
16301
+ validateCliSettings(settings, absolutePath, `${location}.settings`, errors);
16302
+ }
16303
+ if (settings !== void 0 && isObject2(settings) && typeof provider === "string") {
16304
+ validateUnknownSettings(settings, provider, absolutePath, `${location}.settings`, errors);
16305
+ }
16306
+ const judgeTarget = target["judge_target"];
16307
+ if (judgeTarget !== void 0 && typeof judgeTarget !== "string") {
16308
+ errors.push({
16309
+ severity: "error",
16310
+ filePath: absolutePath,
16311
+ location: `${location}.judge_target`,
16312
+ message: "Invalid 'judge_target' field (must be a string)"
16659
16313
  });
16660
- } else {
16661
- validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
16662
16314
  }
16663
16315
  }
16664
16316
  return {
16665
- valid: errors.length === 0,
16317
+ valid: errors.filter((e) => e.severity === "error").length === 0,
16666
16318
  filePath: absolutePath,
16667
- fileType: "eval",
16319
+ fileType: "targets",
16668
16320
  errors
16669
16321
  };
16670
16322
  }
16671
- function validateMessages(messages, location, filePath, errors) {
16672
- for (let i6 = 0; i6 < messages.length; i6++) {
16673
- const message = messages[i6];
16674
- const msgLocation = `${location}[${i6}]`;
16675
- if (!isObject(message)) {
16323
+ var SCHEMA_CONFIG_V222 = "agentv-config-v2";
16324
+ async function validateConfigFile(filePath) {
16325
+ const errors = [];
16326
+ try {
16327
+ const content = await readFile42(filePath, "utf8");
16328
+ const parsed = parse42(content);
16329
+ if (typeof parsed !== "object" || parsed === null) {
16676
16330
  errors.push({
16677
16331
  severity: "error",
16678
16332
  filePath,
16679
- location: msgLocation,
16680
- message: "Message must be an object"
16333
+ message: "Config file must contain a valid YAML object"
16681
16334
  });
16682
- continue;
16335
+ return { valid: false, filePath, fileType: "config", errors };
16683
16336
  }
16684
- const role = message["role"];
16685
- const validRoles = ["system", "user", "assistant"];
16686
- if (!validRoles.includes(role)) {
16337
+ const config = parsed;
16338
+ const schema = config["$schema"];
16339
+ if (schema !== SCHEMA_CONFIG_V222) {
16340
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
16687
16341
  errors.push({
16688
16342
  severity: "error",
16689
16343
  filePath,
16690
- location: `${msgLocation}.role`,
16691
- message: `Invalid role '${role}'. Must be one of: ${validRoles.join(", ")}`
16344
+ location: "$schema",
16345
+ message
16692
16346
  });
16693
16347
  }
16694
- const content = message["content"];
16695
- if (typeof content === "string") {
16696
- } else if (Array.isArray(content)) {
16697
- for (let j2 = 0; j2 < content.length; j2++) {
16698
- const contentItem = content[j2];
16699
- const contentLocation = `${msgLocation}.content[${j2}]`;
16700
- if (typeof contentItem === "string") {
16701
- } else if (isObject(contentItem)) {
16702
- const type = contentItem["type"];
16703
- if (typeof type !== "string") {
16704
- errors.push({
16705
- severity: "error",
16706
- filePath,
16707
- location: `${contentLocation}.type`,
16708
- message: "Content object must have a 'type' field"
16709
- });
16710
- }
16711
- if (type === "text") {
16712
- const value = contentItem["value"];
16713
- if (typeof value !== "string") {
16714
- errors.push({
16715
- severity: "error",
16716
- filePath,
16717
- location: `${contentLocation}.value`,
16718
- message: "Content with type 'text' must have a 'value' field"
16719
- });
16720
- }
16721
- }
16722
- } else {
16723
- errors.push({
16724
- severity: "error",
16725
- filePath,
16726
- location: contentLocation,
16727
- message: "Content array items must be strings or objects"
16728
- });
16729
- }
16348
+ const guidelinePatterns = config["guideline_patterns"];
16349
+ if (guidelinePatterns !== void 0) {
16350
+ if (!Array.isArray(guidelinePatterns)) {
16351
+ errors.push({
16352
+ severity: "error",
16353
+ filePath,
16354
+ location: "guideline_patterns",
16355
+ message: "Field 'guideline_patterns' must be an array"
16356
+ });
16357
+ } else if (!guidelinePatterns.every((p) => typeof p === "string")) {
16358
+ errors.push({
16359
+ severity: "error",
16360
+ filePath,
16361
+ location: "guideline_patterns",
16362
+ message: "All entries in 'guideline_patterns' must be strings"
16363
+ });
16364
+ } else if (guidelinePatterns.length === 0) {
16365
+ errors.push({
16366
+ severity: "warning",
16367
+ filePath,
16368
+ location: "guideline_patterns",
16369
+ message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
16370
+ });
16730
16371
  }
16731
- } else {
16372
+ }
16373
+ const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
16374
+ const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
16375
+ if (unexpectedFields.length > 0) {
16732
16376
  errors.push({
16733
- severity: "error",
16377
+ severity: "warning",
16734
16378
  filePath,
16735
- location: `${msgLocation}.content`,
16736
- message: "Missing or invalid 'content' field (must be a string or array)"
16379
+ message: `Unexpected fields: ${unexpectedFields.join(", ")}`
16737
16380
  });
16738
16381
  }
16382
+ return {
16383
+ valid: errors.filter((e) => e.severity === "error").length === 0,
16384
+ filePath,
16385
+ fileType: "config",
16386
+ errors
16387
+ };
16388
+ } catch (error) {
16389
+ errors.push({
16390
+ severity: "error",
16391
+ filePath,
16392
+ message: `Failed to parse config file: ${error.message}`
16393
+ });
16394
+ return { valid: false, filePath, fileType: "config", errors };
16739
16395
  }
16740
16396
  }
16741
- function isObject2(value) {
16397
+ function isObject3(value) {
16742
16398
  return typeof value === "object" && value !== null && !Array.isArray(value);
16743
16399
  }
16744
- var CLI_PLACEHOLDERS2 = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
16745
- async function validateTargetsFile(filePath) {
16400
+ async function validateFileReferences(evalFilePath) {
16746
16401
  const errors = [];
16747
- const absolutePath = path23.resolve(filePath);
16748
- let parsed;
16749
- try {
16750
- const content = await readFile33(absolutePath, "utf8");
16751
- parsed = parse32(content);
16752
- } catch (error) {
16402
+ const absolutePath = path33.resolve(evalFilePath);
16403
+ const gitRoot = await findGitRoot(absolutePath);
16404
+ if (!gitRoot) {
16753
16405
  errors.push({
16754
16406
  severity: "error",
16755
16407
  filePath: absolutePath,
16756
- message: `Failed to parse YAML: ${error.message}`
16408
+ message: "Cannot validate file references: git repository root not found"
16757
16409
  });
16758
- return {
16759
- valid: false,
16760
- filePath: absolutePath,
16761
- fileType: "targets",
16762
- errors
16763
- };
16410
+ return errors;
16764
16411
  }
16765
- function validateCliSettings(settings, absolutePath2, location, errors2) {
16766
- if (!isObject2(settings)) {
16767
- errors2.push({
16768
- severity: "error",
16769
- filePath: absolutePath2,
16770
- location,
16771
- message: "CLI provider requires a 'settings' object"
16772
- });
16773
- return;
16412
+ const searchRoots = buildSearchRoots(absolutePath, gitRoot);
16413
+ let parsed;
16414
+ try {
16415
+ const content = await readFile5(absolutePath, "utf8");
16416
+ parsed = parse5(content);
16417
+ } catch {
16418
+ return errors;
16419
+ }
16420
+ if (!isObject3(parsed)) {
16421
+ return errors;
16422
+ }
16423
+ const evalcases = parsed["evalcases"];
16424
+ if (!Array.isArray(evalcases)) {
16425
+ return errors;
16426
+ }
16427
+ for (let i6 = 0; i6 < evalcases.length; i6++) {
16428
+ const evalCase = evalcases[i6];
16429
+ if (!isObject3(evalCase)) {
16430
+ continue;
16774
16431
  }
16775
- const commandTemplate = settings["command_template"] ?? settings["commandTemplate"];
16776
- if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
16777
- errors2.push({
16778
- severity: "error",
16779
- filePath: absolutePath2,
16780
- location: `${location}.commandTemplate`,
16781
- message: "CLI provider requires 'commandTemplate' as a non-empty string"
16782
- });
16783
- } else {
16784
- recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
16432
+ const inputMessages = evalCase["input_messages"];
16433
+ if (Array.isArray(inputMessages)) {
16434
+ await validateMessagesFileRefs(inputMessages, `evalcases[${i6}].input_messages`, searchRoots, absolutePath, errors);
16785
16435
  }
16786
- const attachmentsFormat = settings["attachments_format"] ?? settings["attachmentsFormat"];
16787
- if (attachmentsFormat !== void 0 && typeof attachmentsFormat !== "string") {
16788
- errors2.push({
16789
- severity: "error",
16790
- filePath: absolutePath2,
16791
- location: `${location}.attachmentsFormat`,
16792
- message: "'attachmentsFormat' must be a string when provided"
16793
- });
16436
+ const expectedMessages = evalCase["expected_messages"];
16437
+ if (Array.isArray(expectedMessages)) {
16438
+ await validateMessagesFileRefs(expectedMessages, `evalcases[${i6}].expected_messages`, searchRoots, absolutePath, errors);
16794
16439
  }
16795
- const filesFormat = settings["files_format"] ?? settings["filesFormat"];
16796
- if (filesFormat !== void 0 && typeof filesFormat !== "string") {
16797
- errors2.push({
16798
- severity: "error",
16799
- filePath: absolutePath2,
16800
- location: `${location}.filesFormat`,
16801
- message: "'filesFormat' must be a string when provided"
16802
- });
16440
+ }
16441
+ return errors;
16442
+ }
16443
+ async function validateMessagesFileRefs(messages, location, searchRoots, filePath, errors) {
16444
+ for (let i6 = 0; i6 < messages.length; i6++) {
16445
+ const message = messages[i6];
16446
+ if (!isObject3(message)) {
16447
+ continue;
16803
16448
  }
16804
- const cwd = settings["cwd"];
16805
- if (cwd !== void 0 && typeof cwd !== "string") {
16806
- errors2.push({
16807
- severity: "error",
16808
- filePath: absolutePath2,
16809
- location: `${location}.cwd`,
16810
- message: "'cwd' must be a string when provided"
16811
- });
16449
+ const content = message["content"];
16450
+ if (typeof content === "string") {
16451
+ continue;
16812
16452
  }
16813
- const timeoutSeconds = settings["timeout_seconds"] ?? settings["timeoutSeconds"];
16814
- if (timeoutSeconds !== void 0) {
16815
- const numericTimeout = Number(timeoutSeconds);
16816
- if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
16817
- errors2.push({
16453
+ if (!Array.isArray(content)) {
16454
+ continue;
16455
+ }
16456
+ for (let j2 = 0; j2 < content.length; j2++) {
16457
+ const contentItem = content[j2];
16458
+ if (!isObject3(contentItem)) {
16459
+ continue;
16460
+ }
16461
+ const type = contentItem["type"];
16462
+ if (type !== "file") {
16463
+ continue;
16464
+ }
16465
+ const value = contentItem["value"];
16466
+ if (typeof value !== "string") {
16467
+ errors.push({
16818
16468
  severity: "error",
16819
- filePath: absolutePath2,
16820
- location: `${location}.timeoutSeconds`,
16821
- message: "'timeoutSeconds' must be a positive number when provided"
16469
+ filePath,
16470
+ location: `${location}[${i6}].content[${j2}].value`,
16471
+ message: "File reference must have a 'value' field with the file path"
16822
16472
  });
16473
+ continue;
16823
16474
  }
16824
- }
16825
- const envOverrides = settings["env"];
16826
- if (envOverrides !== void 0) {
16827
- if (!isObject2(envOverrides)) {
16828
- errors2.push({
16475
+ const { resolvedPath } = await resolveFileReference(value, searchRoots);
16476
+ if (!resolvedPath) {
16477
+ errors.push({
16829
16478
  severity: "error",
16830
- filePath: absolutePath2,
16831
- location: `${location}.env`,
16832
- message: "'env' must be an object with string values"
16479
+ filePath,
16480
+ location: `${location}[${i6}].content[${j2}]`,
16481
+ message: `Referenced file not found: ${value}`
16833
16482
  });
16834
16483
  } else {
16835
- for (const [key2, value] of Object.entries(envOverrides)) {
16836
- if (typeof value !== "string" || value.trim().length === 0) {
16837
- errors2.push({
16838
- severity: "error",
16839
- filePath: absolutePath2,
16840
- location: `${location}.env.${key2}`,
16841
- message: `Environment override '${key2}' must be a non-empty string`
16484
+ try {
16485
+ const fileContent = await readFile5(resolvedPath, "utf8");
16486
+ if (fileContent.trim().length === 0) {
16487
+ errors.push({
16488
+ severity: "warning",
16489
+ filePath,
16490
+ location: `${location}[${i6}].content[${j2}]`,
16491
+ message: `Referenced file is empty: ${value}`
16842
16492
  });
16843
16493
  }
16494
+ } catch (error) {
16495
+ errors.push({
16496
+ severity: "error",
16497
+ filePath,
16498
+ location: `${location}[${i6}].content[${j2}]`,
16499
+ message: `Cannot read referenced file: ${value} (${error.message})`
16500
+ });
16844
16501
  }
16845
16502
  }
16846
16503
  }
16847
- const healthcheck = settings["healthcheck"];
16848
- if (healthcheck !== void 0) {
16849
- validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
16850
- }
16851
16504
  }
16852
- function validateCliHealthcheck(healthcheck, absolutePath2, location, errors2) {
16853
- if (!isObject2(healthcheck)) {
16854
- errors2.push({
16855
- severity: "error",
16856
- filePath: absolutePath2,
16857
- location,
16858
- message: "'healthcheck' must be an object when provided"
16859
- });
16860
- return;
16861
- }
16862
- const type = healthcheck["type"];
16863
- if (type !== "http" && type !== "command") {
16864
- errors2.push({
16865
- severity: "error",
16866
- filePath: absolutePath2,
16867
- location: `${location}.type`,
16868
- message: "healthcheck.type must be either 'http' or 'command'"
16869
- });
16870
- return;
16871
- }
16872
- const timeoutSeconds = healthcheck["timeout_seconds"] ?? healthcheck["timeoutSeconds"];
16873
- if (timeoutSeconds !== void 0) {
16874
- const numericTimeout = Number(timeoutSeconds);
16875
- if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
16876
- errors2.push({
16877
- severity: "error",
16878
- filePath: absolutePath2,
16879
- location: `${location}.timeoutSeconds`,
16880
- message: "healthcheck.timeoutSeconds must be a positive number when provided"
16881
- });
16882
- }
16883
- }
16884
- if (type === "http") {
16885
- const url = healthcheck["url"];
16886
- if (typeof url !== "string" || url.trim().length === 0) {
16887
- errors2.push({
16888
- severity: "error",
16889
- filePath: absolutePath2,
16890
- location: `${location}.url`,
16891
- message: "healthcheck.url must be a non-empty string for http checks"
16892
- });
16505
+ }
16506
+
16507
+ // src/commands/eval/targets.ts
16508
+ import { constants as constants5 } from "node:fs";
16509
+ import { access as access5, readFile as readFile6 } from "node:fs/promises";
16510
+ import path13 from "node:path";
16511
+ import { parse as parse6 } from "yaml";
16512
+ var TARGET_FILE_CANDIDATES = [
16513
+ "targets.yaml",
16514
+ "targets.yml",
16515
+ path13.join(".agentv", "targets.yaml"),
16516
+ path13.join(".agentv", "targets.yml")
16517
+ ];
16518
+ var ANSI_YELLOW2 = "\x1B[33m";
16519
+ var ANSI_RED = "\x1B[31m";
16520
+ var ANSI_RESET2 = "\x1B[0m";
16521
+ function isTTY() {
16522
+ return process.stdout.isTTY ?? false;
16523
+ }
16524
+ async function fileExists5(filePath) {
16525
+ try {
16526
+ await access5(filePath, constants5.F_OK);
16527
+ return true;
16528
+ } catch {
16529
+ return false;
16530
+ }
16531
+ }
16532
+ async function readTestSuiteTarget(testFilePath) {
16533
+ try {
16534
+ const raw = await readFile6(path13.resolve(testFilePath), "utf8");
16535
+ const parsed = parse6(raw);
16536
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
16537
+ const targetValue = parsed.target;
16538
+ if (typeof targetValue === "string" && targetValue.trim().length > 0) {
16539
+ return targetValue.trim();
16893
16540
  }
16894
- return;
16895
- }
16896
- const commandTemplate = healthcheck["command_template"] ?? healthcheck["commandTemplate"];
16897
- if (typeof commandTemplate !== "string" || commandTemplate.trim().length === 0) {
16898
- errors2.push({
16899
- severity: "error",
16900
- filePath: absolutePath2,
16901
- location: `${location}.commandTemplate`,
16902
- message: "healthcheck.commandTemplate must be a non-empty string for command checks"
16903
- });
16904
- } else {
16905
- recordUnknownPlaceholders(commandTemplate, absolutePath2, `${location}.commandTemplate`, errors2);
16906
- }
16907
- const cwd = healthcheck["cwd"];
16908
- if (cwd !== void 0 && typeof cwd !== "string") {
16909
- errors2.push({
16910
- severity: "error",
16911
- filePath: absolutePath2,
16912
- location: `${location}.cwd`,
16913
- message: "healthcheck.cwd must be a string when provided"
16914
- });
16915
16541
  }
16542
+ } catch {
16916
16543
  }
16917
- function recordUnknownPlaceholders(template, absolutePath2, location, errors2) {
16918
- const placeholders = extractPlaceholders(template);
16919
- for (const placeholder of placeholders) {
16920
- if (!CLI_PLACEHOLDERS2.has(placeholder)) {
16921
- errors2.push({
16922
- severity: "error",
16923
- filePath: absolutePath2,
16924
- location,
16925
- message: `Unknown CLI placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS2).join(", ")}`
16926
- });
16544
+ return void 0;
16545
+ }
16546
+ async function discoverTargetsFile(options) {
16547
+ const { explicitPath, testFilePath, repoRoot, cwd } = options;
16548
+ if (explicitPath) {
16549
+ const resolvedExplicit = path13.resolve(explicitPath);
16550
+ if (await fileExists5(resolvedExplicit)) {
16551
+ return resolvedExplicit;
16552
+ }
16553
+ for (const candidate of TARGET_FILE_CANDIDATES) {
16554
+ const nested = path13.join(resolvedExplicit, candidate);
16555
+ if (await fileExists5(nested)) {
16556
+ return nested;
16927
16557
  }
16928
16558
  }
16559
+ throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
16929
16560
  }
16930
- function extractPlaceholders(template) {
16931
- const matches = template.matchAll(/\{([A-Z_]+)\}/g);
16932
- const result = [];
16933
- for (const match of matches) {
16934
- const placeholder = match[1];
16935
- if (placeholder) {
16936
- result.push(placeholder);
16561
+ const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
16562
+ const resolvedCwd = path13.resolve(cwd);
16563
+ if (!directories.includes(resolvedCwd)) {
16564
+ directories.push(resolvedCwd);
16565
+ }
16566
+ for (const directory of directories) {
16567
+ for (const candidate of TARGET_FILE_CANDIDATES) {
16568
+ const fullPath = path13.join(directory, candidate);
16569
+ if (await fileExists5(fullPath)) {
16570
+ return fullPath;
16937
16571
  }
16938
16572
  }
16939
- return result;
16940
16573
  }
16941
- if (!isObject2(parsed)) {
16942
- errors.push({
16943
- severity: "error",
16944
- filePath: absolutePath,
16945
- message: "File must contain a YAML object"
16946
- });
16947
- return {
16948
- valid: false,
16949
- filePath: absolutePath,
16950
- fileType: "targets",
16951
- errors
16952
- };
16574
+ throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
16575
+ }
16576
+ function pickTargetName(options) {
16577
+ const cliName = options.cliTargetName?.trim();
16578
+ if (cliName && cliName !== "default") {
16579
+ return { name: cliName, source: "cli" };
16953
16580
  }
16954
- const schema = parsed["$schema"];
16955
- if (schema !== TARGETS_SCHEMA_V2) {
16956
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
16957
- errors.push({
16958
- severity: "error",
16959
- filePath: absolutePath,
16960
- location: "$schema",
16961
- message
16962
- });
16581
+ const fileName = options.fileTargetName?.trim();
16582
+ if (fileName && fileName.length > 0) {
16583
+ return { name: fileName, source: "test-file" };
16963
16584
  }
16964
- const targets = parsed["targets"];
16965
- if (!Array.isArray(targets)) {
16966
- errors.push({
16967
- severity: "error",
16968
- filePath: absolutePath,
16969
- location: "targets",
16970
- message: "Missing or invalid 'targets' field (must be an array)"
16971
- });
16585
+ return { name: "default", source: "default" };
16586
+ }
16587
+ async function selectTarget(options) {
16588
+ const { testFilePath, repoRoot, cwd, explicitTargetsPath, cliTargetName, dryRun, dryRunDelay, dryRunDelayMin, dryRunDelayMax, env } = options;
16589
+ const targetsFilePath = await discoverTargetsFile({
16590
+ explicitPath: explicitTargetsPath,
16591
+ testFilePath,
16592
+ repoRoot,
16593
+ cwd
16594
+ });
16595
+ const validationResult = await validateTargetsFile(targetsFilePath);
16596
+ const warnings = validationResult.errors.filter((e) => e.severity === "warning");
16597
+ const useColors = isTTY();
16598
+ if (warnings.length > 0) {
16599
+ console.warn(`
16600
+ Warnings in ${targetsFilePath}:`);
16601
+ for (const warning of warnings) {
16602
+ const location = warning.location ? ` [${warning.location}]` : "";
16603
+ const prefix = useColors ? `${ANSI_YELLOW2} \u26A0${ANSI_RESET2}` : " \u26A0";
16604
+ const message = useColors ? `${ANSI_YELLOW2}${warning.message}${ANSI_RESET2}` : warning.message;
16605
+ console.warn(`${prefix}${location} ${message}`);
16606
+ }
16607
+ console.warn("");
16608
+ }
16609
+ const errors = validationResult.errors.filter((e) => e.severity === "error");
16610
+ if (errors.length > 0) {
16611
+ console.error(`
16612
+ Errors in ${targetsFilePath}:`);
16613
+ for (const error of errors) {
16614
+ const location = error.location ? ` [${error.location}]` : "";
16615
+ const prefix = useColors ? `${ANSI_RED} \u2717${ANSI_RESET2}` : " \u2717";
16616
+ const message = useColors ? `${ANSI_RED}${error.message}${ANSI_RESET2}` : error.message;
16617
+ console.error(`${prefix}${location} ${message}`);
16618
+ }
16619
+ throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
16620
+ }
16621
+ const definitions = await readTargetDefinitions(targetsFilePath);
16622
+ const fileTargetName = await readTestSuiteTarget(testFilePath);
16623
+ const targetChoice = pickTargetName({ cliTargetName, fileTargetName });
16624
+ const targetDefinition = definitions.find((definition) => definition.name === targetChoice.name);
16625
+ if (!targetDefinition) {
16626
+ const available = listTargetNames(definitions).join(", ");
16627
+ throw new Error(
16628
+ `Target '${targetChoice.name}' not found in ${targetsFilePath}. Available targets: ${available}`
16629
+ );
16630
+ }
16631
+ if (dryRun) {
16632
+ const mockTarget = {
16633
+ kind: "mock",
16634
+ name: `${targetDefinition.name}-dry-run`,
16635
+ judgeTarget: void 0,
16636
+ config: {
16637
+ response: '{"answer":"Mock dry-run response"}',
16638
+ delayMs: dryRunDelay,
16639
+ delayMinMs: dryRunDelayMin,
16640
+ delayMaxMs: dryRunDelayMax
16641
+ }
16642
+ };
16972
16643
  return {
16973
- valid: errors.length === 0,
16974
- filePath: absolutePath,
16975
- fileType: "targets",
16976
- errors
16644
+ definitions,
16645
+ resolvedTarget: mockTarget,
16646
+ targetName: targetChoice.name,
16647
+ targetSource: targetChoice.source,
16648
+ targetsFilePath
16977
16649
  };
16978
16650
  }
16979
- const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
16980
- for (let i6 = 0; i6 < targets.length; i6++) {
16981
- const target = targets[i6];
16982
- const location = `targets[${i6}]`;
16983
- if (!isObject2(target)) {
16984
- errors.push({
16985
- severity: "error",
16986
- filePath: absolutePath,
16987
- location,
16988
- message: "Target must be an object"
16989
- });
16990
- continue;
16651
+ try {
16652
+ const resolvedTarget = resolveTargetDefinition(targetDefinition, env);
16653
+ return {
16654
+ definitions,
16655
+ resolvedTarget,
16656
+ targetName: targetChoice.name,
16657
+ targetSource: targetChoice.source,
16658
+ targetsFilePath
16659
+ };
16660
+ } catch (error) {
16661
+ const message = error instanceof Error ? error.message : String(error);
16662
+ throw new Error(`Failed to resolve target '${targetChoice.name}': ${message}`);
16663
+ }
16664
+ }
16665
+
16666
+ // src/commands/eval/run-eval.ts
16667
+ var DEFAULT_WORKERS = 3;
16668
+ function normalizeBoolean(value) {
16669
+ return value === true;
16670
+ }
16671
+ function normalizeString(value) {
16672
+ if (typeof value !== "string") {
16673
+ return void 0;
16674
+ }
16675
+ const trimmed = value.trim();
16676
+ return trimmed.length > 0 ? trimmed : void 0;
16677
+ }
16678
+ function normalizeNumber(value, fallback) {
16679
+ if (typeof value === "number" && Number.isFinite(value)) {
16680
+ return value;
16681
+ }
16682
+ if (typeof value === "string") {
16683
+ const parsed = Number.parseInt(value, 10);
16684
+ if (!Number.isNaN(parsed)) {
16685
+ return parsed;
16991
16686
  }
16992
- const name = target["name"];
16993
- if (typeof name !== "string" || name.trim().length === 0) {
16994
- errors.push({
16995
- severity: "error",
16996
- filePath: absolutePath,
16997
- location: `${location}.name`,
16998
- message: "Missing or invalid 'name' field (must be a non-empty string)"
16999
- });
16687
+ }
16688
+ return fallback;
16689
+ }
16690
+ function normalizeOptions(rawOptions) {
16691
+ const formatStr = normalizeString(rawOptions.outputFormat) ?? "jsonl";
16692
+ const format = formatStr === "yaml" ? "yaml" : "jsonl";
16693
+ const workers = normalizeNumber(rawOptions.workers, 0);
16694
+ return {
16695
+ target: normalizeString(rawOptions.target),
16696
+ targetsPath: normalizeString(rawOptions.targets),
16697
+ evalId: normalizeString(rawOptions.evalId),
16698
+ workers: workers > 0 ? workers : void 0,
16699
+ outPath: normalizeString(rawOptions.out),
16700
+ format,
16701
+ dryRun: normalizeBoolean(rawOptions.dryRun),
16702
+ dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
16703
+ dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
16704
+ dryRunDelayMax: normalizeNumber(rawOptions.dryRunDelayMax, 0),
16705
+ agentTimeoutSeconds: normalizeNumber(rawOptions.agentTimeout, 120),
16706
+ maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
16707
+ cache: normalizeBoolean(rawOptions.cache),
16708
+ verbose: normalizeBoolean(rawOptions.verbose),
16709
+ dumpPrompts: rawOptions.dumpPrompts
16710
+ };
16711
+ }
16712
+ async function ensureFileExists(filePath, description) {
16713
+ try {
16714
+ await access6(filePath, constants6.F_OK);
16715
+ } catch {
16716
+ throw new Error(`${description} not found: ${filePath}`);
16717
+ }
16718
+ }
16719
+ async function findRepoRoot(start) {
16720
+ const fallback = path14.resolve(start);
16721
+ let current = fallback;
16722
+ while (current !== void 0) {
16723
+ const candidate = path14.join(current, ".git");
16724
+ try {
16725
+ await access6(candidate, constants6.F_OK);
16726
+ return current;
16727
+ } catch {
16728
+ const parent = path14.dirname(current);
16729
+ if (parent === current) {
16730
+ break;
16731
+ }
16732
+ current = parent;
17000
16733
  }
17001
- const provider = target["provider"];
17002
- const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
17003
- if (typeof provider !== "string" || provider.trim().length === 0) {
17004
- errors.push({
17005
- severity: "error",
17006
- filePath: absolutePath,
17007
- location: `${location}.provider`,
17008
- message: "Missing or invalid 'provider' field (must be a non-empty string)"
17009
- });
17010
- } else if (!knownProviders.includes(provider)) {
17011
- errors.push({
17012
- severity: "warning",
17013
- filePath: absolutePath,
17014
- location: `${location}.provider`,
17015
- message: `Unknown provider '${provider}'. Known providers: ${knownProviders.join(", ")}`
16734
+ }
16735
+ return fallback;
16736
+ }
16737
+ function buildDefaultOutputPath(cwd, format) {
16738
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
16739
+ const baseName = "eval";
16740
+ const extension = getDefaultExtension(format);
16741
+ return path14.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
16742
+ }
16743
+ function resolvePromptDirectory(option, cwd) {
16744
+ if (option === void 0) {
16745
+ return void 0;
16746
+ }
16747
+ if (typeof option === "string" && option.trim().length > 0) {
16748
+ return path14.resolve(cwd, option);
16749
+ }
16750
+ return path14.join(cwd, ".agentv", "prompts");
16751
+ }
16752
+ function createEvaluationCache() {
16753
+ const store = /* @__PURE__ */ new Map();
16754
+ return {
16755
+ async get(key2) {
16756
+ return store.get(key2);
16757
+ },
16758
+ async set(key2, value) {
16759
+ store.set(key2, value);
16760
+ }
16761
+ };
16762
+ }
16763
+ function createProgressReporter(maxWorkers) {
16764
+ const display = new ProgressDisplay(maxWorkers);
16765
+ return {
16766
+ isInteractive: display.isInteractiveMode(),
16767
+ start: () => display.start(),
16768
+ setTotal: (total) => display.setTotalTests(total),
16769
+ update: (workerId, progress) => display.updateWorker({ ...progress, workerId }),
16770
+ finish: () => display.finish(),
16771
+ addLogPaths: (paths) => display.addLogPaths(paths)
16772
+ };
16773
+ }
16774
+ function makeEvalKey(testFilePath, evalId) {
16775
+ return `${path14.resolve(testFilePath)}::${evalId}`;
16776
+ }
16777
+ function createDisplayIdTracker() {
16778
+ const map = /* @__PURE__ */ new Map();
16779
+ let nextId = 1;
16780
+ return {
16781
+ getOrAssign(evalKey) {
16782
+ const existing = map.get(evalKey);
16783
+ if (existing !== void 0) {
16784
+ return existing;
16785
+ }
16786
+ const assigned = nextId++;
16787
+ map.set(evalKey, assigned);
16788
+ return assigned;
16789
+ }
16790
+ };
16791
+ }
16792
+ async function prepareFileMetadata(params) {
16793
+ const { testFilePath, repoRoot, cwd, options } = params;
16794
+ await ensureFileExists(testFilePath, "Test file");
16795
+ await loadEnvFromHierarchy({
16796
+ testFilePath,
16797
+ repoRoot,
16798
+ verbose: options.verbose
16799
+ });
16800
+ const selection = await selectTarget({
16801
+ testFilePath,
16802
+ repoRoot,
16803
+ cwd,
16804
+ explicitTargetsPath: options.targetsPath,
16805
+ cliTargetName: options.target,
16806
+ dryRun: options.dryRun,
16807
+ dryRunDelay: options.dryRunDelay,
16808
+ dryRunDelayMin: options.dryRunDelayMin,
16809
+ dryRunDelayMax: options.dryRunDelayMax,
16810
+ env: process.env
16811
+ });
16812
+ const providerLabel = options.dryRun ? `${selection.resolvedTarget.kind} (dry-run)` : selection.resolvedTarget.kind;
16813
+ const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
16814
+ const evalCases = await loadEvalCases(testFilePath, repoRoot, { verbose: options.verbose, evalId: options.evalId });
16815
+ const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
16816
+ return { evalIds: filteredIds, selection, inlineTargetLabel };
16817
+ }
16818
+ async function runWithLimit(items, limit, task) {
16819
+ const safeLimit = Math.max(1, limit);
16820
+ let index = 0;
16821
+ const workers = Array.from({ length: safeLimit }, async () => {
16822
+ while (index < items.length) {
16823
+ const current = items[index];
16824
+ index += 1;
16825
+ await task(current);
16826
+ }
16827
+ });
16828
+ await Promise.all(workers);
16829
+ }
16830
+ async function runSingleEvalFile(params) {
16831
+ const {
16832
+ testFilePath,
16833
+ cwd,
16834
+ repoRoot,
16835
+ options,
16836
+ outputWriter,
16837
+ cache,
16838
+ evaluationRunner,
16839
+ workersOverride,
16840
+ progressReporter,
16841
+ seenEvalCases,
16842
+ displayIdTracker,
16843
+ selection,
16844
+ inlineTargetLabel
16845
+ } = params;
16846
+ await ensureFileExists(testFilePath, "Test file");
16847
+ const resolvedTargetSelection = selection;
16848
+ const providerLabel = options.dryRun ? `${resolvedTargetSelection.resolvedTarget.kind} (dry-run)` : resolvedTargetSelection.resolvedTarget.kind;
16849
+ const targetMessage = options.verbose ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} [provider=${providerLabel}] via ${resolvedTargetSelection.targetsFilePath}` : `Using target: ${inlineTargetLabel}`;
16850
+ if (!progressReporter.isInteractive || options.verbose) {
16851
+ console.log(targetMessage);
16852
+ }
16853
+ const promptDumpDir = resolvePromptDirectory(options.dumpPrompts, cwd);
16854
+ if (promptDumpDir) {
16855
+ await mkdir6(promptDumpDir, { recursive: true });
16856
+ if (options.verbose) {
16857
+ console.log(`Prompt dumps enabled at: ${promptDumpDir}`);
16858
+ }
16859
+ }
16860
+ const agentTimeoutMs = Math.max(0, options.agentTimeoutSeconds) * 1e3;
16861
+ const workerPreference = workersOverride ?? options.workers;
16862
+ let resolvedWorkers = workerPreference ?? resolvedTargetSelection.resolvedTarget.workers ?? DEFAULT_WORKERS;
16863
+ if (resolvedWorkers < 1 || resolvedWorkers > 50) {
16864
+ throw new Error(`Workers must be between 1 and 50, got: ${resolvedWorkers}`);
16865
+ }
16866
+ const isVSCodeProvider = ["vscode", "vscode-insiders"].includes(
16867
+ resolvedTargetSelection.resolvedTarget.kind
16868
+ );
16869
+ if (isVSCodeProvider && resolvedWorkers > 1) {
16870
+ console.warn(`Warning: VSCode providers require window focus. Limiting workers from ${resolvedWorkers} to 1 to prevent race conditions.`);
16871
+ resolvedWorkers = 1;
16872
+ }
16873
+ if (options.verbose) {
16874
+ const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
16875
+ console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
16876
+ }
16877
+ if (isVSCodeProvider && !options.dryRun) {
16878
+ await ensureVSCodeSubagents({
16879
+ kind: resolvedTargetSelection.resolvedTarget.kind,
16880
+ count: resolvedWorkers,
16881
+ verbose: options.verbose
16882
+ });
16883
+ }
16884
+ const results = await evaluationRunner({
16885
+ testFilePath,
16886
+ repoRoot,
16887
+ target: resolvedTargetSelection.resolvedTarget,
16888
+ targets: resolvedTargetSelection.definitions,
16889
+ env: process.env,
16890
+ maxRetries: Math.max(0, options.maxRetries),
16891
+ agentTimeoutMs,
16892
+ promptDumpDir,
16893
+ cache,
16894
+ useCache: options.cache,
16895
+ evalId: options.evalId,
16896
+ verbose: options.verbose,
16897
+ maxConcurrency: resolvedWorkers,
16898
+ onResult: async (result) => {
16899
+ await outputWriter.append(result);
16900
+ },
16901
+ onProgress: async (event) => {
16902
+ const evalKey = makeEvalKey(testFilePath, event.evalId);
16903
+ if (event.status === "pending" && !seenEvalCases.has(evalKey)) {
16904
+ seenEvalCases.add(evalKey);
16905
+ progressReporter.setTotal(seenEvalCases.size);
16906
+ }
16907
+ const displayId = displayIdTracker.getOrAssign(evalKey);
16908
+ progressReporter.update(displayId, {
16909
+ workerId: displayId,
16910
+ evalId: event.evalId,
16911
+ status: event.status,
16912
+ startedAt: event.startedAt,
16913
+ completedAt: event.completedAt,
16914
+ error: event.error,
16915
+ targetLabel: inlineTargetLabel
17016
16916
  });
17017
16917
  }
17018
- const settings = target["settings"];
17019
- if (providerValue !== "cli" && settings !== void 0 && !isObject2(settings)) {
17020
- errors.push({
17021
- severity: "error",
17022
- filePath: absolutePath,
17023
- location: `${location}.settings`,
17024
- message: "Invalid 'settings' field (must be an object)"
16918
+ });
16919
+ return { results: [...results], promptDumpDir };
16920
+ }
16921
+ async function runEvalCommand(input) {
16922
+ const options = normalizeOptions(input.rawOptions);
16923
+ const cwd = process.cwd();
16924
+ const repoRoot = await findRepoRoot(cwd);
16925
+ if (options.verbose) {
16926
+ console.log(`Repository root: ${repoRoot}`);
16927
+ }
16928
+ const outputPath = options.outPath ? path14.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
16929
+ console.log(`Output path: ${outputPath}`);
16930
+ const outputWriter = await createOutputWriter(outputPath, options.format);
16931
+ const cache = options.cache ? createEvaluationCache() : void 0;
16932
+ const evaluationRunner = await resolveEvaluationRunner();
16933
+ const allResults = [];
16934
+ let lastPromptDumpDir;
16935
+ const seenEvalCases = /* @__PURE__ */ new Set();
16936
+ const resolvedTestFiles = input.testFiles.map((file) => path14.resolve(file));
16937
+ const displayIdTracker = createDisplayIdTracker();
16938
+ const totalWorkers = options.workers ?? DEFAULT_WORKERS;
16939
+ const fileConcurrency = Math.min(Math.max(1, totalWorkers), Math.max(1, resolvedTestFiles.length));
16940
+ const perFileWorkers = options.workers ? Math.max(1, Math.floor(totalWorkers / fileConcurrency)) : void 0;
16941
+ const fileMetadata = /* @__PURE__ */ new Map();
16942
+ for (const testFilePath of resolvedTestFiles) {
16943
+ const meta = await prepareFileMetadata({
16944
+ testFilePath,
16945
+ repoRoot,
16946
+ cwd,
16947
+ options
16948
+ });
16949
+ fileMetadata.set(testFilePath, meta);
16950
+ }
16951
+ const totalEvalCount = Array.from(fileMetadata.values()).reduce(
16952
+ (sum, meta) => sum + meta.evalIds.length,
16953
+ 0
16954
+ );
16955
+ if (totalEvalCount === 0) {
16956
+ throw new Error("No eval cases matched the provided filters.");
16957
+ }
16958
+ const progressReporter = createProgressReporter(totalWorkers);
16959
+ progressReporter.start();
16960
+ progressReporter.setTotal(totalEvalCount);
16961
+ const seenCodexLogPaths = /* @__PURE__ */ new Set();
16962
+ const unsubscribeCodexLogs = subscribeToCodexLogEntries((entry) => {
16963
+ if (!entry.filePath || seenCodexLogPaths.has(entry.filePath)) {
16964
+ return;
16965
+ }
16966
+ seenCodexLogPaths.add(entry.filePath);
16967
+ progressReporter.addLogPaths([entry.filePath]);
16968
+ });
16969
+ for (const [testFilePath, meta] of fileMetadata.entries()) {
16970
+ for (const evalId of meta.evalIds) {
16971
+ const evalKey = makeEvalKey(testFilePath, evalId);
16972
+ seenEvalCases.add(evalKey);
16973
+ const displayId = displayIdTracker.getOrAssign(evalKey);
16974
+ progressReporter.update(displayId, {
16975
+ workerId: displayId,
16976
+ evalId,
16977
+ status: "pending",
16978
+ targetLabel: meta.inlineTargetLabel
17025
16979
  });
17026
16980
  }
17027
- if (providerValue === "cli") {
17028
- validateCliSettings(settings, absolutePath, `${location}.settings`, errors);
17029
- }
17030
- const judgeTarget = target["judge_target"];
17031
- if (judgeTarget !== void 0 && typeof judgeTarget !== "string") {
17032
- errors.push({
17033
- severity: "error",
17034
- filePath: absolutePath,
17035
- location: `${location}.judge_target`,
17036
- message: "Invalid 'judge_target' field (must be a string)"
16981
+ }
16982
+ try {
16983
+ await runWithLimit(resolvedTestFiles, fileConcurrency, async (testFilePath) => {
16984
+ const targetPrep = fileMetadata.get(testFilePath);
16985
+ if (!targetPrep) {
16986
+ throw new Error(`Missing metadata for ${testFilePath}`);
16987
+ }
16988
+ const result = await runSingleEvalFile({
16989
+ testFilePath,
16990
+ cwd,
16991
+ repoRoot,
16992
+ options,
16993
+ outputWriter,
16994
+ cache,
16995
+ evaluationRunner,
16996
+ workersOverride: perFileWorkers,
16997
+ progressReporter,
16998
+ seenEvalCases,
16999
+ displayIdTracker,
17000
+ selection: targetPrep.selection,
17001
+ inlineTargetLabel: targetPrep.inlineTargetLabel
17037
17002
  });
17003
+ allResults.push(...result.results);
17004
+ if (result.promptDumpDir) {
17005
+ lastPromptDumpDir = result.promptDumpDir;
17006
+ }
17007
+ });
17008
+ progressReporter.finish();
17009
+ const summary = calculateEvaluationSummary(allResults);
17010
+ console.log(formatEvaluationSummary(summary));
17011
+ if (allResults.length > 0) {
17012
+ console.log(`
17013
+ Results written to: ${outputPath}`);
17038
17014
  }
17015
+ if (lastPromptDumpDir && allResults.length > 0) {
17016
+ console.log(`Prompt payloads saved to: ${lastPromptDumpDir}`);
17017
+ }
17018
+ } finally {
17019
+ unsubscribeCodexLogs();
17020
+ await outputWriter.close().catch(() => void 0);
17039
17021
  }
17040
- return {
17041
- valid: errors.filter((e) => e.severity === "error").length === 0,
17042
- filePath: absolutePath,
17043
- fileType: "targets",
17044
- errors
17045
- };
17046
17022
  }
17047
- var SCHEMA_CONFIG_V222 = "agentv-config-v2";
17048
- async function validateConfigFile(filePath) {
17049
- const errors = [];
17050
- try {
17051
- const content = await readFile42(filePath, "utf8");
17052
- const parsed = parse42(content);
17053
- if (typeof parsed !== "object" || parsed === null) {
17054
- errors.push({
17055
- severity: "error",
17056
- filePath,
17057
- message: "Config file must contain a valid YAML object"
17058
- });
17059
- return { valid: false, filePath, fileType: "config", errors };
17023
+ async function resolveEvaluationRunner() {
17024
+ const overridePath = process.env.AGENTEVO_CLI_EVAL_RUNNER;
17025
+ if (!overridePath) {
17026
+ return runEvaluation;
17027
+ }
17028
+ const resolved = path14.isAbsolute(overridePath) ? overridePath : path14.resolve(process.cwd(), overridePath);
17029
+ const moduleUrl = pathToFileURL(resolved).href;
17030
+ const mod = await import(moduleUrl);
17031
+ const candidate = mod.runEvaluation;
17032
+ if (typeof candidate !== "function") {
17033
+ throw new Error(
17034
+ `Module '${resolved}' must export a 'runEvaluation' function to override the default implementation`
17035
+ );
17036
+ }
17037
+ return candidate;
17038
+ }
17039
+
17040
+ // src/commands/eval/index.ts
17041
+ function parseInteger(value, fallback) {
17042
+ const parsed = Number.parseInt(value, 10);
17043
+ if (Number.isNaN(parsed)) {
17044
+ return fallback;
17045
+ }
17046
+ return parsed;
17047
+ }
17048
+ function registerEvalCommand(program) {
17049
+ program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
17050
+ "--workers <count>",
17051
+ "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
17052
+ (value) => parseInteger(value, 1)
17053
+ ).option("--out <path>", "Write results to the specified path").option(
17054
+ "--output-format <format>",
17055
+ "Output format: 'jsonl' or 'yaml' (default: jsonl)",
17056
+ "jsonl"
17057
+ ).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
17058
+ "--dry-run-delay <ms>",
17059
+ "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
17060
+ (value) => parseInteger(value, 0),
17061
+ 0
17062
+ ).option(
17063
+ "--dry-run-delay-min <ms>",
17064
+ "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
17065
+ (value) => parseInteger(value, 0),
17066
+ 0
17067
+ ).option(
17068
+ "--dry-run-delay-max <ms>",
17069
+ "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
17070
+ (value) => parseInteger(value, 0),
17071
+ 0
17072
+ ).option(
17073
+ "--agent-timeout <seconds>",
17074
+ "Timeout in seconds for provider responses (default: 120)",
17075
+ (value) => parseInteger(value, 120),
17076
+ 120
17077
+ ).option(
17078
+ "--max-retries <count>",
17079
+ "Retry count for timeout recoveries (default: 2)",
17080
+ (value) => parseInteger(value, 2),
17081
+ 2
17082
+ ).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
17083
+ "--dump-prompts [dir]",
17084
+ "Persist prompt payloads for debugging (optional custom directory)"
17085
+ ).action(async (evalPaths, rawOptions) => {
17086
+ const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
17087
+ await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
17088
+ });
17089
+ return program;
17090
+ }
17091
+ async function resolveEvalPaths(evalPaths, cwd) {
17092
+ const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
17093
+ if (normalizedInputs.length === 0) {
17094
+ throw new Error("No eval paths provided.");
17095
+ }
17096
+ const unmatched = [];
17097
+ const results = /* @__PURE__ */ new Set();
17098
+ for (const pattern of normalizedInputs) {
17099
+ const candidatePath = path15.isAbsolute(pattern) ? path15.normalize(pattern) : path15.resolve(cwd, pattern);
17100
+ try {
17101
+ const stats = await stat3(candidatePath);
17102
+ if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
17103
+ results.add(candidatePath);
17104
+ continue;
17105
+ }
17106
+ } catch {
17060
17107
  }
17061
- const config = parsed;
17062
- const schema = config["$schema"];
17063
- if (schema !== SCHEMA_CONFIG_V222) {
17064
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
17065
- errors.push({
17066
- severity: "error",
17067
- filePath,
17068
- location: "$schema",
17069
- message
17070
- });
17108
+ const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
17109
+ const matches = await fg(globPattern, {
17110
+ cwd,
17111
+ absolute: true,
17112
+ onlyFiles: true,
17113
+ unique: true,
17114
+ dot: true,
17115
+ followSymbolicLinks: true
17116
+ });
17117
+ const yamlMatches = matches.filter((filePath) => /\.ya?ml$/i.test(filePath));
17118
+ if (yamlMatches.length === 0) {
17119
+ unmatched.push(pattern);
17120
+ continue;
17071
17121
  }
17072
- const guidelinePatterns = config["guideline_patterns"];
17073
- if (guidelinePatterns !== void 0) {
17074
- if (!Array.isArray(guidelinePatterns)) {
17075
- errors.push({
17076
- severity: "error",
17077
- filePath,
17078
- location: "guideline_patterns",
17079
- message: "Field 'guideline_patterns' must be an array"
17080
- });
17081
- } else if (!guidelinePatterns.every((p) => typeof p === "string")) {
17082
- errors.push({
17083
- severity: "error",
17084
- filePath,
17085
- location: "guideline_patterns",
17086
- message: "All entries in 'guideline_patterns' must be strings"
17087
- });
17088
- } else if (guidelinePatterns.length === 0) {
17089
- errors.push({
17090
- severity: "warning",
17091
- filePath,
17092
- location: "guideline_patterns",
17093
- message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
17122
+ yamlMatches.forEach((filePath) => results.add(path15.normalize(filePath)));
17123
+ }
17124
+ if (unmatched.length > 0) {
17125
+ throw new Error(
17126
+ `No eval files matched: ${unmatched.join(
17127
+ ", "
17128
+ )}. Provide YAML paths or globs (e.g., "evals/**/*.yaml").`
17129
+ );
17130
+ }
17131
+ const sorted = Array.from(results);
17132
+ sorted.sort();
17133
+ return sorted;
17134
+ }
17135
+
17136
+ // src/commands/init/index.ts
17137
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
17138
+ import path17 from "node:path";
17139
+ import * as readline from "node:readline/promises";
17140
+
17141
+ // src/templates/index.ts
17142
+ import { readFileSync, readdirSync, statSync } from "node:fs";
17143
+ import path16 from "node:path";
17144
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
17145
+ var TemplateManager = class {
17146
+ static getGithubTemplates() {
17147
+ return this.getTemplatesFromDir("github");
17148
+ }
17149
+ static getAgentvTemplates() {
17150
+ return this.getTemplatesFromDir("agentv");
17151
+ }
17152
+ static getTemplatesFromDir(subdir) {
17153
+ const currentDir = path16.dirname(fileURLToPath2(import.meta.url));
17154
+ let templatesDir;
17155
+ if (currentDir.includes(path16.sep + "dist")) {
17156
+ templatesDir = path16.join(currentDir, "templates", subdir);
17157
+ } else {
17158
+ templatesDir = path16.join(currentDir, subdir);
17159
+ }
17160
+ return this.readTemplatesRecursively(templatesDir, "");
17161
+ }
17162
+ static readTemplatesRecursively(dir, relativePath) {
17163
+ const templates = [];
17164
+ const entries = readdirSync(dir);
17165
+ for (const entry of entries) {
17166
+ const fullPath = path16.join(dir, entry);
17167
+ const stat5 = statSync(fullPath);
17168
+ const entryRelativePath = relativePath ? path16.join(relativePath, entry) : entry;
17169
+ if (stat5.isDirectory()) {
17170
+ templates.push(...this.readTemplatesRecursively(fullPath, entryRelativePath));
17171
+ } else {
17172
+ const content = readFileSync(fullPath, "utf-8");
17173
+ templates.push({
17174
+ path: entryRelativePath.split(path16.sep).join("/"),
17175
+ // Normalize to forward slashes
17176
+ content
17094
17177
  });
17095
17178
  }
17096
17179
  }
17097
- const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
17098
- const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
17099
- if (unexpectedFields.length > 0) {
17100
- errors.push({
17101
- severity: "warning",
17102
- filePath,
17103
- message: `Unexpected fields: ${unexpectedFields.join(", ")}`
17104
- });
17105
- }
17106
- return {
17107
- valid: errors.filter((e) => e.severity === "error").length === 0,
17108
- filePath,
17109
- fileType: "config",
17110
- errors
17111
- };
17112
- } catch (error) {
17113
- errors.push({
17114
- severity: "error",
17115
- filePath,
17116
- message: `Failed to parse config file: ${error.message}`
17117
- });
17118
- return { valid: false, filePath, fileType: "config", errors };
17119
- }
17120
- }
17121
- function isObject3(value) {
17122
- return typeof value === "object" && value !== null && !Array.isArray(value);
17123
- }
17124
- async function validateFileReferences(evalFilePath) {
17125
- const errors = [];
17126
- const absolutePath = path33.resolve(evalFilePath);
17127
- const gitRoot = await findGitRoot(absolutePath);
17128
- if (!gitRoot) {
17129
- errors.push({
17130
- severity: "error",
17131
- filePath: absolutePath,
17132
- message: "Cannot validate file references: git repository root not found"
17133
- });
17134
- return errors;
17180
+ return templates;
17135
17181
  }
17136
- const searchRoots = buildSearchRoots(absolutePath, gitRoot);
17137
- let parsed;
17182
+ };
17183
+
17184
+ // src/commands/init/index.ts
17185
+ async function promptYesNo(message) {
17186
+ const rl2 = readline.createInterface({
17187
+ input: process.stdin,
17188
+ output: process.stdout
17189
+ });
17138
17190
  try {
17139
- const content = await readFile52(absolutePath, "utf8");
17140
- parsed = parse52(content);
17141
- } catch {
17142
- return errors;
17191
+ const answer = await rl2.question(`${message} (y/N): `);
17192
+ return answer.toLowerCase() === "y" || answer.toLowerCase() === "yes";
17193
+ } finally {
17194
+ rl2.close();
17143
17195
  }
17144
- if (!isObject3(parsed)) {
17145
- return errors;
17196
+ }
17197
+ async function initCommand(options = {}) {
17198
+ const targetPath = path17.resolve(options.targetPath ?? ".");
17199
+ const githubDir = path17.join(targetPath, ".github");
17200
+ const agentvDir = path17.join(targetPath, ".agentv");
17201
+ const githubTemplates = TemplateManager.getGithubTemplates();
17202
+ const agentvTemplates = TemplateManager.getAgentvTemplates();
17203
+ const existingFiles = [];
17204
+ if (existsSync(githubDir)) {
17205
+ for (const template of githubTemplates) {
17206
+ const targetFilePath = path17.join(githubDir, template.path);
17207
+ if (existsSync(targetFilePath)) {
17208
+ existingFiles.push(path17.relative(targetPath, targetFilePath));
17209
+ }
17210
+ }
17146
17211
  }
17147
- const evalcases = parsed["evalcases"];
17148
- if (!Array.isArray(evalcases)) {
17149
- return errors;
17212
+ if (existsSync(agentvDir)) {
17213
+ for (const template of agentvTemplates) {
17214
+ const targetFilePath = path17.join(agentvDir, template.path);
17215
+ if (existsSync(targetFilePath)) {
17216
+ existingFiles.push(path17.relative(targetPath, targetFilePath));
17217
+ }
17218
+ }
17150
17219
  }
17151
- for (let i6 = 0; i6 < evalcases.length; i6++) {
17152
- const evalCase = evalcases[i6];
17153
- if (!isObject3(evalCase)) {
17154
- continue;
17220
+ if (existingFiles.length > 0) {
17221
+ console.log("We detected an existing setup:");
17222
+ existingFiles.forEach((file) => console.log(` - ${file}`));
17223
+ console.log();
17224
+ const shouldReplace = await promptYesNo("Do you want to replace these files?");
17225
+ if (!shouldReplace) {
17226
+ console.log("\nInit cancelled. No files were changed.");
17227
+ return;
17155
17228
  }
17156
- const inputMessages = evalCase["input_messages"];
17157
- if (Array.isArray(inputMessages)) {
17158
- await validateMessagesFileRefs(inputMessages, `evalcases[${i6}].input_messages`, searchRoots, absolutePath, errors);
17229
+ console.log();
17230
+ }
17231
+ if (!existsSync(githubDir)) {
17232
+ mkdirSync(githubDir, { recursive: true });
17233
+ }
17234
+ if (!existsSync(agentvDir)) {
17235
+ mkdirSync(agentvDir, { recursive: true });
17236
+ }
17237
+ for (const template of githubTemplates) {
17238
+ const targetFilePath = path17.join(githubDir, template.path);
17239
+ const targetDirPath = path17.dirname(targetFilePath);
17240
+ if (!existsSync(targetDirPath)) {
17241
+ mkdirSync(targetDirPath, { recursive: true });
17159
17242
  }
17160
- const expectedMessages = evalCase["expected_messages"];
17161
- if (Array.isArray(expectedMessages)) {
17162
- await validateMessagesFileRefs(expectedMessages, `evalcases[${i6}].expected_messages`, searchRoots, absolutePath, errors);
17243
+ writeFileSync(targetFilePath, template.content, "utf-8");
17244
+ console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
17245
+ }
17246
+ for (const template of agentvTemplates) {
17247
+ const targetFilePath = path17.join(agentvDir, template.path);
17248
+ const targetDirPath = path17.dirname(targetFilePath);
17249
+ if (!existsSync(targetDirPath)) {
17250
+ mkdirSync(targetDirPath, { recursive: true });
17163
17251
  }
17252
+ writeFileSync(targetFilePath, template.content, "utf-8");
17253
+ console.log(`Created ${path17.relative(targetPath, targetFilePath)}`);
17164
17254
  }
17165
- return errors;
17255
+ console.log("\nAgentV initialized successfully!");
17256
+ console.log(`
17257
+ Files installed to ${path17.relative(targetPath, githubDir)}:`);
17258
+ githubTemplates.forEach((t) => console.log(` - ${t.path}`));
17259
+ console.log(`
17260
+ Files installed to ${path17.relative(targetPath, agentvDir)}:`);
17261
+ agentvTemplates.forEach((t) => console.log(` - ${t.path}`));
17262
+ console.log("\nYou can now:");
17263
+ console.log(" 1. Edit .agentv/.env with your API credentials");
17264
+ console.log(" 2. Configure targets in .agentv/targets.yaml");
17265
+ console.log(" 3. Create eval files using the schema and prompt templates");
17166
17266
  }
17167
- async function validateMessagesFileRefs(messages, location, searchRoots, filePath, errors) {
17168
- for (let i6 = 0; i6 < messages.length; i6++) {
17169
- const message = messages[i6];
17170
- if (!isObject3(message)) {
17171
- continue;
17267
+
17268
+ // src/commands/status.ts
17269
+ function registerStatusCommand(program) {
17270
+ program.command("status").description("Show the latest AgentV kernel status").action(() => {
17271
+ const kernel = createAgentKernel();
17272
+ console.log(`Kernel status: ${kernel.status}`);
17273
+ });
17274
+ return program;
17275
+ }
17276
+
17277
+ // src/commands/validate/format-output.ts
17278
+ var ANSI_RED2 = "\x1B[31m";
17279
+ var ANSI_YELLOW3 = "\x1B[33m";
17280
+ var ANSI_GREEN = "\x1B[32m";
17281
+ var ANSI_CYAN = "\x1B[36m";
17282
+ var ANSI_BOLD = "\x1B[1m";
17283
+ var ANSI_RESET3 = "\x1B[0m";
17284
+ function formatSummary(summary, useColors) {
17285
+ const lines = [];
17286
+ lines.push("");
17287
+ lines.push(formatHeader("Validation Summary", useColors));
17288
+ lines.push("");
17289
+ for (const result of summary.results) {
17290
+ lines.push(formatFileResult(result, useColors));
17291
+ }
17292
+ lines.push("");
17293
+ lines.push(formatStats(summary, useColors));
17294
+ lines.push("");
17295
+ return lines.join("\n");
17296
+ }
17297
+ function formatHeader(text, useColors) {
17298
+ if (useColors) {
17299
+ return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET3}`;
17300
+ }
17301
+ return text;
17302
+ }
17303
+ function formatFileResult(result, useColors) {
17304
+ const lines = [];
17305
+ const status = result.valid ? "\u2713" : "\u2717";
17306
+ const statusColor = result.valid ? ANSI_GREEN : ANSI_RED2;
17307
+ const statusText = useColors ? `${statusColor}${status}${ANSI_RESET3}` : status;
17308
+ const fileName = result.filePath;
17309
+ lines.push(`${statusText} ${fileName}`);
17310
+ if (result.errors.length > 0) {
17311
+ for (const error of result.errors) {
17312
+ lines.push(formatError(error, useColors));
17172
17313
  }
17173
- const content = message["content"];
17174
- if (typeof content === "string") {
17175
- continue;
17314
+ }
17315
+ return lines.join("\n");
17316
+ }
17317
+ function formatError(error, useColors) {
17318
+ const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
17319
+ const color = error.severity === "error" ? ANSI_RED2 : ANSI_YELLOW3;
17320
+ const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET3}` : prefix;
17321
+ const location = error.location ? ` [${error.location}]` : "";
17322
+ return `${coloredPrefix}${location} ${error.message}`;
17323
+ }
17324
+ function formatStats(summary, useColors) {
17325
+ const lines = [];
17326
+ const totalText = `Total files: ${summary.totalFiles}`;
17327
+ const validText = `Valid: ${summary.validFiles}`;
17328
+ const invalidText = `Invalid: ${summary.invalidFiles}`;
17329
+ const filesWithWarnings = summary.results.filter(
17330
+ (r) => r.errors.some((e) => e.severity === "warning")
17331
+ ).length;
17332
+ if (useColors) {
17333
+ lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET3}`);
17334
+ lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET3}`);
17335
+ if (summary.invalidFiles > 0) {
17336
+ lines.push(`${ANSI_RED2}${invalidText}${ANSI_RESET3}`);
17337
+ } else {
17338
+ lines.push(invalidText);
17176
17339
  }
17177
- if (!Array.isArray(content)) {
17178
- continue;
17340
+ if (filesWithWarnings > 0) {
17341
+ lines.push(`${ANSI_YELLOW3}Files with warnings: ${filesWithWarnings}${ANSI_RESET3}`);
17179
17342
  }
17180
- for (let j2 = 0; j2 < content.length; j2++) {
17181
- const contentItem = content[j2];
17182
- if (!isObject3(contentItem)) {
17183
- continue;
17184
- }
17185
- const type = contentItem["type"];
17186
- if (type !== "file") {
17187
- continue;
17188
- }
17189
- const value = contentItem["value"];
17190
- if (typeof value !== "string") {
17191
- errors.push({
17192
- severity: "error",
17193
- filePath,
17194
- location: `${location}[${i6}].content[${j2}].value`,
17195
- message: "File reference must have a 'value' field with the file path"
17196
- });
17197
- continue;
17198
- }
17199
- const { resolvedPath } = await resolveFileReference(value, searchRoots);
17200
- if (!resolvedPath) {
17201
- errors.push({
17202
- severity: "error",
17203
- filePath,
17204
- location: `${location}[${i6}].content[${j2}]`,
17205
- message: `Referenced file not found: ${value}`
17206
- });
17207
- } else {
17208
- try {
17209
- const fileContent = await readFile52(resolvedPath, "utf8");
17210
- if (fileContent.trim().length === 0) {
17211
- errors.push({
17212
- severity: "warning",
17213
- filePath,
17214
- location: `${location}[${i6}].content[${j2}]`,
17215
- message: `Referenced file is empty: ${value}`
17216
- });
17217
- }
17218
- } catch (error) {
17219
- errors.push({
17220
- severity: "error",
17221
- filePath,
17222
- location: `${location}[${i6}].content[${j2}]`,
17223
- message: `Cannot read referenced file: ${value} (${error.message})`
17224
- });
17225
- }
17226
- }
17343
+ } else {
17344
+ lines.push(totalText);
17345
+ lines.push(validText);
17346
+ lines.push(invalidText);
17347
+ if (filesWithWarnings > 0) {
17348
+ lines.push(`Files with warnings: ${filesWithWarnings}`);
17227
17349
  }
17228
17350
  }
17351
+ return lines.join("\n");
17352
+ }
17353
+ function isTTY2() {
17354
+ return process.stdout.isTTY ?? false;
17229
17355
  }
17230
17356
 
17231
17357
  // src/commands/validate/validate-files.ts
@@ -17340,7 +17466,7 @@ async function runValidateCommand(paths, _options) {
17340
17466
  process.exit(1);
17341
17467
  }
17342
17468
  const summary = await validateFiles(paths);
17343
- const useColors = isTTY();
17469
+ const useColors = isTTY2();
17344
17470
  console.log(formatSummary(summary, useColors));
17345
17471
  if (summary.invalidFiles > 0) {
17346
17472
  process.exit(1);
@@ -17386,4 +17512,4 @@ export {
17386
17512
  createProgram,
17387
17513
  runCli
17388
17514
  };
17389
- //# sourceMappingURL=chunk-X6NSDSD2.js.map
17515
+ //# sourceMappingURL=chunk-GWH4WZTW.js.map