agentv 0.2.3 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -585,7 +585,7 @@ var require_utc = __commonJS({
585
585
  import { Command } from "commander";
586
586
  import { readFileSync as readFileSync2 } from "node:fs";
587
587
 
588
- // ../../packages/core/dist/chunk-5REK5RSI.js
588
+ // ../../packages/core/dist/chunk-XXNQA4EW.js
589
589
  import { constants } from "node:fs";
590
590
  import { access } from "node:fs/promises";
591
591
  import path from "node:path";
@@ -613,6 +613,30 @@ async function findGitRoot(startPath) {
613
613
  }
614
614
  return null;
615
615
  }
616
+ function buildDirectoryChain(filePath, repoRoot) {
617
+ const directories = [];
618
+ const seen = /* @__PURE__ */ new Set();
619
+ const boundary = path.resolve(repoRoot);
620
+ let current = path.resolve(path.dirname(filePath));
621
+ while (current !== void 0) {
622
+ if (!seen.has(current)) {
623
+ directories.push(current);
624
+ seen.add(current);
625
+ }
626
+ if (current === boundary) {
627
+ break;
628
+ }
629
+ const parent = path.dirname(current);
630
+ if (parent === current) {
631
+ break;
632
+ }
633
+ current = parent;
634
+ }
635
+ if (!seen.has(boundary)) {
636
+ directories.push(boundary);
637
+ }
638
+ return directories;
639
+ }
616
640
  function buildSearchRoots(evalPath, repoRoot) {
617
641
  const uniqueRoots = [];
618
642
  const addRoot = (root2) => {
@@ -664,8 +688,32 @@ async function resolveFileReference(rawValue, searchRoots) {
664
688
  }
665
689
  return { displayPath, attempted };
666
690
  }
691
+ var KNOWN_PROVIDERS = [
692
+ "azure",
693
+ "anthropic",
694
+ "gemini",
695
+ "mock",
696
+ "vscode",
697
+ "vscode-insiders"
698
+ ];
699
+ var PROVIDER_ALIASES = [
700
+ "azure-openai",
701
+ // alias for "azure"
702
+ "google",
703
+ // alias for "gemini"
704
+ "google-gemini",
705
+ // alias for "gemini"
706
+ "openai",
707
+ // legacy/future support
708
+ "bedrock",
709
+ // legacy/future support
710
+ "vertex"
711
+ // legacy/future support
712
+ ];
713
+ var TARGETS_SCHEMA_V2 = "agentv-targets-v2";
667
714
 
668
715
  // ../../packages/core/dist/index.js
716
+ import micromatch from "micromatch";
669
717
  import { constants as constants3 } from "node:fs";
670
718
  import { access as access3, readFile as readFile2 } from "node:fs/promises";
671
719
  import path7 from "node:path";
@@ -9025,17 +9073,16 @@ var coerce = {
9025
9073
  var NEVER = INVALID;
9026
9074
 
9027
9075
  // ../../packages/core/dist/index.js
9028
- import { mkdtemp, readFile as readFile22, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
9029
- import { tmpdir } from "node:os";
9076
+ import { readFile as readFile22 } from "node:fs/promises";
9030
9077
  import path22 from "node:path";
9031
9078
 
9032
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/vscode/agentDispatch.js
9079
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/agentDispatch.js
9033
9080
  import { exec, spawn } from "child_process";
9034
9081
  import { copyFile, mkdir as mkdir2, readdir as readdir2, readFile, stat as stat2, writeFile } from "fs/promises";
9035
9082
  import path5 from "path";
9036
9083
  import { promisify } from "util";
9037
9084
 
9038
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/vscode/constants.js
9085
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/constants.js
9039
9086
  import os from "os";
9040
9087
  import path2 from "path";
9041
9088
  var DEFAULT_LOCK_NAME = "subagent.lock";
@@ -9047,7 +9094,7 @@ var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
9047
9094
  var DEFAULT_WAKEUP_FILENAME = "wakeup.chatmode.md";
9048
9095
  var DEFAULT_ALIVE_FILENAME = ".alive";
9049
9096
 
9050
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/utils/fs.js
9097
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/fs.js
9051
9098
  import { constants as constants2 } from "fs";
9052
9099
  import { access as access2, mkdir, readdir, rm, stat } from "fs/promises";
9053
9100
  import path3 from "path";
@@ -9080,14 +9127,14 @@ async function removeIfExists(target) {
9080
9127
  }
9081
9128
  }
9082
9129
 
9083
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/utils/time.js
9130
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/time.js
9084
9131
  function sleep(ms2) {
9085
9132
  return new Promise((resolve) => {
9086
9133
  setTimeout(resolve, ms2);
9087
9134
  });
9088
9135
  }
9089
9136
 
9090
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/utils/workspace.js
9137
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/workspace.js
9091
9138
  import path4 from "path";
9092
9139
 
9093
9140
  // ../../node_modules/.pnpm/json5@2.2.3/node_modules/json5/dist/index.mjs
@@ -10175,7 +10222,7 @@ var JSON5 = {
10175
10222
  var lib = JSON5;
10176
10223
  var dist_default = lib;
10177
10224
 
10178
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/utils/workspace.js
10225
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/utils/workspace.js
10179
10226
  function transformWorkspacePaths(workspaceContent, templateDir) {
10180
10227
  let workspace;
10181
10228
  try {
@@ -10248,7 +10295,7 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
10248
10295
  return JSON.stringify(transformedWorkspace, null, 2);
10249
10296
  }
10250
10297
 
10251
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/vscode/agentDispatch.js
10298
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/agentDispatch.js
10252
10299
  var execAsync = promisify(exec);
10253
10300
  var DEFAULT_WORKSPACE_TEMPLATE = {
10254
10301
  folders: [
@@ -10430,9 +10477,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
10430
10477
  return 0;
10431
10478
  }
10432
10479
  function createRequestPrompt(userQuery, responseFileTmp, responseFileFinal, subagentName, vscodeCmd) {
10433
- const escapedUserQuery = userQuery.replace(/`/g, "\\`");
10434
10480
  return `[[ ## task ## ]]
10435
- ${escapedUserQuery}
10481
+
10482
+ ${userQuery}
10436
10483
 
10437
10484
  [[ ## system_instructions ## ]]
10438
10485
 
@@ -10589,7 +10636,7 @@ async function dispatchAgentSession(options) {
10589
10636
  }
10590
10637
  }
10591
10638
 
10592
- // ../../node_modules/.pnpm/subagent@0.4.1/node_modules/subagent/dist/vscode/provision.js
10639
+ // ../../node_modules/.pnpm/subagent@0.4.2/node_modules/subagent/dist/vscode/provision.js
10593
10640
  import { writeFile as writeFile2 } from "fs/promises";
10594
10641
  import path6 from "path";
10595
10642
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
@@ -10762,9 +10809,52 @@ var CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
10762
10809
  var ANSI_YELLOW = "\x1B[33m";
10763
10810
  var ANSI_RESET = "\x1B[0m";
10764
10811
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
10765
- function isGuidelineFile(filePath) {
10812
+ var SCHEMA_CONFIG_V2 = "agentv-config-v2";
10813
+ async function loadConfig(evalFilePath, repoRoot) {
10814
+ const directories = buildDirectoryChain(evalFilePath, repoRoot);
10815
+ for (const directory of directories) {
10816
+ const configPath = path7.join(directory, ".agentv", "config.yaml");
10817
+ if (!await fileExists2(configPath)) {
10818
+ continue;
10819
+ }
10820
+ try {
10821
+ const rawConfig = await readFile2(configPath, "utf8");
10822
+ const parsed = parse3(rawConfig);
10823
+ if (!isJsonObject(parsed)) {
10824
+ logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
10825
+ continue;
10826
+ }
10827
+ const config = parsed;
10828
+ const schema = config.$schema;
10829
+ if (schema !== SCHEMA_CONFIG_V2) {
10830
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
10831
+ Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
10832
+ logWarning(message);
10833
+ continue;
10834
+ }
10835
+ const guidelinePatterns = config.guideline_patterns;
10836
+ if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
10837
+ logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
10838
+ continue;
10839
+ }
10840
+ if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
10841
+ logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
10842
+ continue;
10843
+ }
10844
+ return {
10845
+ guideline_patterns: guidelinePatterns
10846
+ };
10847
+ } catch (error) {
10848
+ logWarning(`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`);
10849
+ continue;
10850
+ }
10851
+ }
10852
+ return null;
10853
+ }
10854
+ function isGuidelineFile(filePath, patterns) {
10766
10855
  const normalized = filePath.split("\\").join("/");
10767
- return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
10856
+ const patternsToUse = patterns ?? [];
10857
+ return micromatch.isMatch(normalized, patternsToUse);
10768
10858
  }
10769
10859
  function extractCodeBlocks(segments) {
10770
10860
  const codeBlocks = [];
@@ -10784,43 +10874,45 @@ function extractCodeBlocks(segments) {
10784
10874
  }
10785
10875
  return codeBlocks;
10786
10876
  }
10787
- async function loadTestCases(testFilePath, repoRoot, options) {
10877
+ async function loadEvalCases(evalFilePath, repoRoot, options) {
10788
10878
  const verbose = options?.verbose ?? false;
10789
- const absoluteTestPath = path7.resolve(testFilePath);
10879
+ const absoluteTestPath = path7.resolve(evalFilePath);
10790
10880
  if (!await fileExists2(absoluteTestPath)) {
10791
- throw new Error(`Test file not found: ${testFilePath}`);
10881
+ throw new Error(`Test file not found: ${evalFilePath}`);
10792
10882
  }
10793
10883
  const repoRootPath = resolveToAbsolutePath(repoRoot);
10794
10884
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
10885
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
10886
+ const guidelinePatterns = config?.guideline_patterns;
10795
10887
  const rawFile = await readFile2(absoluteTestPath, "utf8");
10796
10888
  const parsed = parse3(rawFile);
10797
10889
  if (!isJsonObject(parsed)) {
10798
- throw new Error(`Invalid test file format: ${testFilePath}`);
10890
+ throw new Error(`Invalid test file format: ${evalFilePath}`);
10799
10891
  }
10800
10892
  const suite = parsed;
10801
10893
  const schema = suite.$schema;
10802
10894
  if (schema !== SCHEMA_EVAL_V2) {
10803
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${testFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${testFilePath}.
10895
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
10804
10896
  Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
10805
10897
  throw new Error(message);
10806
10898
  }
10807
10899
  const rawTestcases = suite.evalcases;
10808
10900
  if (!Array.isArray(rawTestcases)) {
10809
- throw new Error(`Invalid test file format: ${testFilePath} - missing 'evalcases' field`);
10901
+ throw new Error(`Invalid test file format: ${evalFilePath} - missing 'evalcases' field`);
10810
10902
  }
10811
10903
  const globalGrader = coerceGrader(suite.grader) ?? "llm_judge";
10812
10904
  const results = [];
10813
- for (const rawTestcase of rawTestcases) {
10814
- if (!isJsonObject(rawTestcase)) {
10905
+ for (const rawEvalcase of rawTestcases) {
10906
+ if (!isJsonObject(rawEvalcase)) {
10815
10907
  logWarning("Skipping invalid test case entry (expected object)");
10816
10908
  continue;
10817
10909
  }
10818
- const testcase = rawTestcase;
10819
- const id = asString(testcase.id);
10820
- const conversationId = asString(testcase.conversation_id);
10821
- const outcome = asString(testcase.outcome);
10822
- const inputMessagesValue = testcase.input_messages;
10823
- const expectedMessagesValue = testcase.expected_messages;
10910
+ const evalcase = rawEvalcase;
10911
+ const id = asString(evalcase.id);
10912
+ const conversationId = asString(evalcase.conversation_id);
10913
+ const outcome = asString(evalcase.outcome);
10914
+ const inputMessagesValue = evalcase.input_messages;
10915
+ const expectedMessagesValue = evalcase.expected_messages;
10824
10916
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
10825
10917
  logWarning(`Skipping incomplete test case: ${id ?? "unknown"}`);
10826
10918
  continue;
@@ -10833,6 +10925,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
10833
10925
  const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
10834
10926
  const assistantMessages = expectedMessages.filter((message) => message.role === "assistant");
10835
10927
  const userMessages = inputMessages.filter((message) => message.role === "user");
10928
+ const systemMessages = inputMessages.filter((message) => message.role === "system");
10836
10929
  if (assistantMessages.length === 0) {
10837
10930
  logWarning(`No assistant message found for test case: ${id}`);
10838
10931
  continue;
@@ -10840,6 +10933,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
10840
10933
  if (assistantMessages.length > 1) {
10841
10934
  logWarning(`Multiple assistant messages found for test case: ${id}, using first`);
10842
10935
  }
10936
+ if (systemMessages.length > 1) {
10937
+ logWarning(`Multiple system messages found for test case: ${id}, using first`);
10938
+ }
10939
+ let systemMessageContent;
10940
+ if (systemMessages.length > 0) {
10941
+ const content = systemMessages[0]?.content;
10942
+ if (typeof content === "string") {
10943
+ systemMessageContent = content;
10944
+ } else if (Array.isArray(content)) {
10945
+ const textParts = [];
10946
+ for (const segment of content) {
10947
+ if (isJsonObject(segment)) {
10948
+ const value = segment.value;
10949
+ if (typeof value === "string") {
10950
+ textParts.push(value);
10951
+ }
10952
+ }
10953
+ }
10954
+ if (textParts.length > 0) {
10955
+ systemMessageContent = textParts.join("\n\n");
10956
+ }
10957
+ }
10958
+ }
10843
10959
  const userSegments = [];
10844
10960
  const guidelinePaths = [];
10845
10961
  const userTextParts = [];
@@ -10871,7 +10987,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
10871
10987
  }
10872
10988
  try {
10873
10989
  const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
10874
- if (isGuidelineFile(displayPath)) {
10990
+ const relativeToRepo = path7.relative(repoRootPath, resolvedPath);
10991
+ if (isGuidelineFile(relativeToRepo, guidelinePatterns)) {
10875
10992
  guidelinePaths.push(path7.resolve(resolvedPath));
10876
10993
  if (verbose) {
10877
10994
  console.log(` [Guideline] Found: ${displayPath}`);
@@ -10881,7 +10998,8 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
10881
10998
  userSegments.push({
10882
10999
  type: "file",
10883
11000
  path: displayPath,
10884
- text: fileContent
11001
+ text: fileContent,
11002
+ resolvedPath: path7.resolve(resolvedPath)
10885
11003
  });
10886
11004
  if (verbose) {
10887
11005
  console.log(` [File] Found: ${displayPath}`);
@@ -10903,16 +11021,29 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
10903
11021
  }
10904
11022
  const codeSnippets = extractCodeBlocks(userSegments);
10905
11023
  const assistantContent = assistantMessages[0]?.content;
10906
- const expectedAssistantRaw = normalizeAssistantContent(assistantContent);
11024
+ const expectedAssistantRaw = await resolveAssistantContent(assistantContent, searchRoots, verbose);
10907
11025
  const userTextPrompt = userTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
10908
- const testCaseGrader = coerceGrader(testcase.grader) ?? globalGrader;
11026
+ const testCaseGrader = coerceGrader(evalcase.grader) ?? globalGrader;
11027
+ const userFilePaths = [];
11028
+ for (const segment of userSegments) {
11029
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
11030
+ userFilePaths.push(segment.resolvedPath);
11031
+ }
11032
+ }
11033
+ const allFilePaths = [
11034
+ ...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
11035
+ ...userFilePaths
11036
+ ];
10909
11037
  const testCase = {
10910
11038
  id,
10911
11039
  conversation_id: conversationId,
10912
11040
  task: userTextPrompt,
10913
11041
  user_segments: userSegments,
11042
+ system_message: systemMessageContent,
10914
11043
  expected_assistant_raw: expectedAssistantRaw,
10915
11044
  guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
11045
+ guideline_patterns: guidelinePatterns,
11046
+ file_paths: allFilePaths,
10916
11047
  code_snippets: codeSnippets,
10917
11048
  outcome,
10918
11049
  grader: testCaseGrader
@@ -10978,7 +11109,7 @@ ${body}`);
10978
11109
  }
10979
11110
  const request = requestParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
10980
11111
  const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
10981
- return { request, guidelines };
11112
+ return { request, guidelines, systemMessage: testCase.system_message };
10982
11113
  }
10983
11114
  async function fileExists2(absolutePath) {
10984
11115
  try {
@@ -11019,7 +11150,7 @@ function cloneJsonValue(value) {
11019
11150
  }
11020
11151
  return cloneJsonObject(value);
11021
11152
  }
11022
- function normalizeAssistantContent(content) {
11153
+ async function resolveAssistantContent(content, searchRoots, verbose) {
11023
11154
  if (typeof content === "string") {
11024
11155
  return content;
11025
11156
  }
@@ -11032,12 +11163,42 @@ function normalizeAssistantContent(content) {
11032
11163
  parts.push(entry);
11033
11164
  continue;
11034
11165
  }
11035
- const textValue = asString(entry["text"]);
11166
+ if (!isJsonObject(entry)) {
11167
+ continue;
11168
+ }
11169
+ const segmentType = asString(entry.type);
11170
+ if (segmentType === "file") {
11171
+ const rawValue = asString(entry.value);
11172
+ if (!rawValue) {
11173
+ continue;
11174
+ }
11175
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference(
11176
+ rawValue,
11177
+ searchRoots
11178
+ );
11179
+ if (!resolvedPath) {
11180
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
11181
+ logWarning(`File not found in expected_messages: ${displayPath}`, attempts);
11182
+ continue;
11183
+ }
11184
+ try {
11185
+ const fileContent = (await readFile2(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
11186
+ parts.push(fileContent);
11187
+ if (verbose) {
11188
+ console.log(` [Expected Assistant File] Found: ${displayPath}`);
11189
+ console.log(` Resolved to: ${resolvedPath}`);
11190
+ }
11191
+ } catch (error) {
11192
+ logWarning(`Could not read file ${resolvedPath}: ${error.message}`);
11193
+ }
11194
+ continue;
11195
+ }
11196
+ const textValue = asString(entry.text);
11036
11197
  if (typeof textValue === "string") {
11037
11198
  parts.push(textValue);
11038
11199
  continue;
11039
11200
  }
11040
- const valueValue = asString(entry["value"]);
11201
+ const valueValue = asString(entry.value);
11041
11202
  if (typeof valueValue === "string") {
11042
11203
  parts.push(valueValue);
11043
11204
  continue;
@@ -11071,15 +11232,18 @@ function buildChatPrompt(request) {
11071
11232
  return request.chatPrompt;
11072
11233
  }
11073
11234
  const systemSegments = [];
11074
- if (request.guidelines && request.guidelines.trim().length > 0) {
11075
- systemSegments.push(`Guidelines:
11076
- ${request.guidelines.trim()}`);
11077
- }
11078
11235
  const metadataSystemPrompt = typeof request.metadata?.systemPrompt === "string" ? request.metadata.systemPrompt : void 0;
11079
11236
  if (metadataSystemPrompt && metadataSystemPrompt.trim().length > 0) {
11080
11237
  systemSegments.push(metadataSystemPrompt.trim());
11238
+ } else {
11239
+ systemSegments.push(DEFAULT_SYSTEM_PROMPT);
11081
11240
  }
11082
- const systemContent = systemSegments.length > 0 ? systemSegments.join("\n\n") : DEFAULT_SYSTEM_PROMPT;
11241
+ if (request.guidelines && request.guidelines.trim().length > 0) {
11242
+ systemSegments.push(`[[ ## Guidelines ## ]]
11243
+
11244
+ ${request.guidelines.trim()}`);
11245
+ }
11246
+ const systemContent = systemSegments.join("\n\n");
11083
11247
  const userContent = request.prompt.trim();
11084
11248
  const prompt = [
11085
11249
  {
@@ -11528,7 +11692,6 @@ function resolveOptionalBoolean(source2) {
11528
11692
  function isLikelyEnvReference(value) {
11529
11693
  return /^[A-Z0-9_]+$/.test(value);
11530
11694
  }
11531
- var PROMPT_FILE_PREFIX = "bbeval-vscode-";
11532
11695
  var VSCodeProvider = class {
11533
11696
  id;
11534
11697
  kind;
@@ -11545,128 +11708,89 @@ var VSCodeProvider = class {
11545
11708
  throw new Error("VS Code provider request was aborted before dispatch");
11546
11709
  }
11547
11710
  const attachments = normalizeAttachments(request.attachments);
11548
- const promptContent = buildPromptDocument(request, attachments);
11549
- const directory = await mkdtemp(path22.join(tmpdir(), PROMPT_FILE_PREFIX));
11550
- const promptPath = path22.join(directory, `${request.testCaseId ?? "request"}.prompt.md`);
11551
- try {
11552
- await writeFile3(promptPath, promptContent, "utf8");
11553
- const session = await dispatchAgentSession({
11554
- userQuery: composeUserQuery(request),
11555
- promptFile: promptPath,
11556
- extraAttachments: attachments,
11557
- wait: this.config.waitForResponse,
11558
- dryRun: this.config.dryRun,
11559
- vscodeCmd: this.config.command,
11560
- subagentRoot: this.config.subagentRoot,
11561
- workspaceTemplate: this.config.workspaceTemplate,
11562
- silent: true
11563
- });
11564
- if (session.exitCode !== 0 || !session.responseFile) {
11565
- const failure = session.error ?? "VS Code subagent did not produce a response";
11566
- throw new Error(failure);
11567
- }
11568
- if (this.config.dryRun) {
11569
- return {
11570
- text: "",
11571
- raw: {
11572
- session,
11573
- promptFile: promptPath,
11574
- attachments
11575
- }
11576
- };
11577
- }
11578
- const responseText = await readFile22(session.responseFile, "utf8");
11711
+ const promptContent = buildPromptDocument(request, attachments, request.guideline_patterns);
11712
+ const session = await dispatchAgentSession({
11713
+ userQuery: promptContent,
11714
+ // Use full prompt content instead of just request.prompt
11715
+ extraAttachments: attachments,
11716
+ wait: this.config.waitForResponse,
11717
+ dryRun: this.config.dryRun,
11718
+ vscodeCmd: this.config.command,
11719
+ subagentRoot: this.config.subagentRoot,
11720
+ workspaceTemplate: this.config.workspaceTemplate,
11721
+ silent: true
11722
+ });
11723
+ if (session.exitCode !== 0 || !session.responseFile) {
11724
+ const failure = session.error ?? "VS Code subagent did not produce a response";
11725
+ throw new Error(failure);
11726
+ }
11727
+ if (this.config.dryRun) {
11579
11728
  return {
11580
- text: responseText,
11729
+ text: "",
11581
11730
  raw: {
11582
11731
  session,
11583
- promptFile: promptPath,
11584
11732
  attachments
11585
11733
  }
11586
11734
  };
11587
- } finally {
11588
- await rm2(directory, { recursive: true, force: true });
11589
11735
  }
11736
+ const responseText = await readFile22(session.responseFile, "utf8");
11737
+ return {
11738
+ text: responseText,
11739
+ raw: {
11740
+ session,
11741
+ attachments
11742
+ }
11743
+ };
11590
11744
  }
11591
11745
  };
11592
- function buildPromptDocument(request, attachments) {
11746
+ function buildPromptDocument(request, attachments, guidelinePatterns) {
11593
11747
  const parts = [];
11594
- const instructionFiles = collectInstructionFiles(attachments);
11595
- if (instructionFiles.length > 0) {
11596
- parts.push(buildMandatoryPrereadBlock(instructionFiles));
11597
- }
11598
- parts.push(`# BbEval Request`);
11599
- if (request.testCaseId) {
11600
- parts.push(`- Test Case: ${request.testCaseId}`);
11601
- }
11602
- if (request.metadata?.target) {
11603
- parts.push(`- Target: ${String(request.metadata.target)}`);
11604
- }
11605
- parts.push("\n## Task\n", request.prompt.trim());
11606
- if (request.guidelines && request.guidelines.trim().length > 0) {
11607
- parts.push("\n## Guidelines\n", request.guidelines.trim());
11608
- }
11609
- if (attachments && attachments.length > 0) {
11610
- const attachmentList = attachments.map((item) => `- ${item}`).join("\n");
11611
- parts.push("\n## Attachments\n", attachmentList);
11748
+ const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
11749
+ if (guidelineFiles.length > 0) {
11750
+ parts.push("\n", buildMandatoryPrereadBlock(guidelineFiles));
11612
11751
  }
11752
+ parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
11613
11753
  return parts.join("\n").trim();
11614
11754
  }
11615
- function buildMandatoryPrereadBlock(instructionFiles) {
11616
- if (instructionFiles.length === 0) {
11755
+ function buildMandatoryPrereadBlock(guidelineFiles) {
11756
+ if (guidelineFiles.length === 0) {
11617
11757
  return "";
11618
11758
  }
11619
11759
  const fileList = [];
11620
- const tokenList = [];
11621
11760
  let counter = 0;
11622
- for (const absolutePath of instructionFiles) {
11761
+ for (const absolutePath of guidelineFiles) {
11623
11762
  counter += 1;
11624
11763
  const fileName = path22.basename(absolutePath);
11625
11764
  const fileUri = pathToFileUri(absolutePath);
11626
- fileList.push(`[${fileName}](${fileUri})`);
11627
- tokenList.push(`INSTRUCTIONS_READ: \`${fileName}\` i=${counter} SHA256=<hex>`);
11765
+ fileList.push(`* [${fileName}](${fileUri})`);
11628
11766
  }
11629
- const filesText = fileList.join(", ");
11630
- const tokensText = tokenList.join("\n");
11767
+ const filesText = fileList.join("\n");
11631
11768
  const instruction = [
11632
- `Read all instruction files: ${filesText}.`,
11633
- `After reading each file, compute its SHA256 hash using this PowerShell command:`,
11634
- "`Get-FileHash -Algorithm SHA256 -LiteralPath '<file-path>' | Select-Object -ExpandProperty Hash`.",
11635
- `Then include, at the top of your reply, these exact tokens on separate lines:
11769
+ `Read all guideline files:
11770
+ ${filesText}.
11636
11771
  `,
11637
- tokensText,
11638
- `
11639
- Replace \`<hex>\` with the actual SHA256 hash value computed from the PowerShell command.`,
11640
11772
  `If any file is missing, fail with ERROR: missing-file <filename> and stop.
11641
11773
  `,
11642
- `Then fetch all documentation required by the instructions before proceeding with your task.`
11643
- ].join(" ");
11644
- return `[[ ## mandatory_pre_read ## ]]
11645
-
11646
- ${instruction}
11647
-
11648
- `;
11774
+ `Then apply system_instructions on the user query below.`
11775
+ ].join("");
11776
+ return `${instruction}`;
11649
11777
  }
11650
- function collectInstructionFiles(attachments) {
11778
+ function collectGuidelineFiles(attachments, guidelinePatterns) {
11651
11779
  if (!attachments || attachments.length === 0) {
11652
11780
  return [];
11653
11781
  }
11654
11782
  const unique = /* @__PURE__ */ new Map();
11655
11783
  for (const attachment of attachments) {
11656
- if (!isInstructionPath(attachment)) {
11657
- continue;
11658
- }
11659
11784
  const absolutePath = path22.resolve(attachment);
11660
- if (!unique.has(absolutePath)) {
11661
- unique.set(absolutePath, absolutePath);
11785
+ const normalized = absolutePath.split(path22.sep).join("/");
11786
+ if (isGuidelineFile(normalized, guidelinePatterns)) {
11787
+ if (!unique.has(absolutePath)) {
11788
+ unique.set(absolutePath, absolutePath);
11789
+ }
11662
11790
  }
11663
11791
  }
11664
11792
  return Array.from(unique.values());
11665
11793
  }
11666
- function isInstructionPath(filePath) {
11667
- const normalized = filePath.split(path22.sep).join("/");
11668
- return normalized.endsWith(".instructions.md") || normalized.includes("/instructions/") || normalized.endsWith(".prompt.md") || normalized.includes("/prompts/");
11669
- }
11670
11794
  function pathToFileUri(filePath) {
11671
11795
  const absolutePath = path22.isAbsolute(filePath) ? filePath : path22.resolve(filePath);
11672
11796
  const normalizedPath = absolutePath.replace(/\\/g, "/");
@@ -11675,14 +11799,6 @@ function pathToFileUri(filePath) {
11675
11799
  }
11676
11800
  return `file://${normalizedPath}`;
11677
11801
  }
11678
- function composeUserQuery(request) {
11679
- const segments = [];
11680
- segments.push(request.prompt.trim());
11681
- if (request.guidelines && request.guidelines.trim().length > 0) {
11682
- segments.push("\nGuidelines:\n", request.guidelines.trim());
11683
- }
11684
- return segments.join("\n").trim();
11685
- }
11686
11802
  function normalizeAttachments(attachments) {
11687
11803
  if (!attachments || attachments.length === 0) {
11688
11804
  return void 0;
@@ -11734,18 +11850,24 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
11734
11850
  function isRecord(value) {
11735
11851
  return typeof value === "object" && value !== null && !Array.isArray(value);
11736
11852
  }
11737
- function checkVersion(parsed, absolutePath) {
11738
- const version = typeof parsed.version === "number" ? parsed.version : typeof parsed.version === "string" ? parseFloat(parsed.version) : void 0;
11739
- if (version === void 0) {
11853
+ function checkSchema(parsed, absolutePath) {
11854
+ const schema = parsed.$schema;
11855
+ if (schema === void 0) {
11856
+ throw new Error(
11857
+ `Missing $schema field in targets.yaml at ${absolutePath}.
11858
+ Please add '$schema: ${TARGETS_SCHEMA_V2}' at the top of the file.`
11859
+ );
11860
+ }
11861
+ if (typeof schema !== "string") {
11740
11862
  throw new Error(
11741
- `Missing version field in targets.yaml at ${absolutePath}.
11742
- Please add 'version: 2.0' at the top of the file.`
11863
+ `Invalid $schema field in targets.yaml at ${absolutePath}.
11864
+ Expected a string value '${TARGETS_SCHEMA_V2}'.`
11743
11865
  );
11744
11866
  }
11745
- if (version < 2) {
11867
+ if (schema !== TARGETS_SCHEMA_V2) {
11746
11868
  throw new Error(
11747
- `Outdated targets.yaml format (version ${version}) at ${absolutePath}.
11748
- Please update to version 2.0 format with 'targets' array.`
11869
+ `Invalid $schema '${schema}' in targets.yaml at ${absolutePath}.
11870
+ Expected '${TARGETS_SCHEMA_V2}'.`
11749
11871
  );
11750
11872
  }
11751
11873
  }
@@ -11777,7 +11899,7 @@ function assertTargetDefinition(value, index, filePath) {
11777
11899
  judge_target: typeof judgeTarget === "string" ? judgeTarget : void 0
11778
11900
  };
11779
11901
  }
11780
- async function fileExists22(filePath) {
11902
+ async function fileExists3(filePath) {
11781
11903
  try {
11782
11904
  await access22(filePath, constants22.F_OK);
11783
11905
  return true;
@@ -11787,15 +11909,15 @@ async function fileExists22(filePath) {
11787
11909
  }
11788
11910
  async function readTargetDefinitions(filePath) {
11789
11911
  const absolutePath = path32.resolve(filePath);
11790
- if (!await fileExists22(absolutePath)) {
11912
+ if (!await fileExists3(absolutePath)) {
11791
11913
  throw new Error(`targets.yaml not found at ${absolutePath}`);
11792
11914
  }
11793
11915
  const raw = await readFile3(absolutePath, "utf8");
11794
11916
  const parsed = parse22(raw);
11795
11917
  if (!isRecord(parsed)) {
11796
- throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with 'version' and 'targets' fields`);
11918
+ throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
11797
11919
  }
11798
- checkVersion(parsed, absolutePath);
11920
+ checkSchema(parsed, absolutePath);
11799
11921
  const targets = extractTargetsArray(parsed, absolutePath);
11800
11922
  const definitions = targets.map((entry, index) => assertTargetDefinition(entry, index, absolutePath));
11801
11923
  return definitions;
@@ -12002,7 +12124,7 @@ function extractKeyTerms(aspect, maxTerms = 5) {
12002
12124
  var HeuristicGrader = class {
12003
12125
  kind = "heuristic";
12004
12126
  grade(context2) {
12005
- const expectedAspects = extractAspects(context2.testCase.expected_assistant_raw);
12127
+ const expectedAspects = extractAspects(context2.evalCase.expected_assistant_raw);
12006
12128
  const result = scoreCandidateResponse(context2.candidate, expectedAspects);
12007
12129
  const misses = [...result.misses];
12008
12130
  if (expectedAspects.length === 0 && isErrorLike(context2.candidate)) {
@@ -12035,14 +12157,14 @@ var QualityGrader = class {
12035
12157
  if (!judgeProvider) {
12036
12158
  throw new Error("No judge provider available for LLM grading");
12037
12159
  }
12038
- const prompt = buildQualityPrompt(context2.testCase, context2.candidate);
12160
+ const prompt = buildQualityPrompt(context2.evalCase, context2.candidate);
12039
12161
  const metadata = {
12040
12162
  systemPrompt: QUALITY_SYSTEM_PROMPT
12041
12163
  };
12042
12164
  const response = await judgeProvider.invoke({
12043
12165
  prompt,
12044
12166
  metadata,
12045
- testCaseId: context2.testCase.id,
12167
+ evalCaseId: context2.evalCase.id,
12046
12168
  attempt: context2.attempt,
12047
12169
  maxOutputTokens: this.maxOutputTokens,
12048
12170
  temperature: this.temperature
@@ -12088,16 +12210,16 @@ var QUALITY_SYSTEM_PROMPT = [
12088
12210
  function buildQualityPrompt(testCase, candidate) {
12089
12211
  const parts = [
12090
12212
  "[[ ## expected_outcome ## ]]",
12091
- testCase.outcome,
12213
+ testCase.outcome.trim(),
12092
12214
  "",
12093
12215
  "[[ ## request ## ]]",
12094
- testCase.task,
12216
+ testCase.task.trim(),
12095
12217
  "",
12096
12218
  "[[ ## reference_answer ## ]]",
12097
- testCase.expected_assistant_raw,
12219
+ testCase.expected_assistant_raw.trim(),
12098
12220
  "",
12099
12221
  "[[ ## generated_answer ## ]]",
12100
- candidate,
12222
+ candidate.trim(),
12101
12223
  "",
12102
12224
  "Respond with a single JSON object matching the schema described in the system prompt."
12103
12225
  ];
@@ -12330,17 +12452,17 @@ async function runEvaluation(options) {
12330
12452
  cache,
12331
12453
  useCache,
12332
12454
  now,
12333
- testId,
12455
+ evalId,
12334
12456
  verbose,
12335
12457
  onResult,
12336
12458
  onProgress
12337
12459
  } = options;
12338
- const load = loadTestCases;
12339
- const testCases = await load(testFilePath, repoRoot, { verbose });
12340
- const filteredTestCases = filterTestCases(testCases, testId);
12341
- if (filteredTestCases.length === 0) {
12342
- if (testId) {
12343
- throw new Error(`Test case with id '${testId}' not found in ${testFilePath}`);
12460
+ const load = loadEvalCases;
12461
+ const evalCases = await load(testFilePath, repoRoot, { verbose });
12462
+ const filteredEvalCases = filterEvalCases(evalCases, evalId);
12463
+ if (filteredEvalCases.length === 0) {
12464
+ if (evalId) {
12465
+ throw new Error(`Test case with id '${evalId}' not found in ${testFilePath}`);
12344
12466
  }
12345
12467
  return [];
12346
12468
  }
@@ -12384,11 +12506,11 @@ async function runEvaluation(options) {
12384
12506
  };
12385
12507
  const graderRegistry = buildGraderRegistry(graders, resolveJudgeProvider);
12386
12508
  const primaryProvider = getOrCreateProvider(target);
12387
- if (onProgress && filteredTestCases.length > 0) {
12388
- for (let i6 = 0; i6 < filteredTestCases.length; i6++) {
12509
+ if (onProgress && filteredEvalCases.length > 0) {
12510
+ for (let i6 = 0; i6 < filteredEvalCases.length; i6++) {
12389
12511
  await onProgress({
12390
12512
  workerId: i6 + 1,
12391
- testId: filteredTestCases[i6].id,
12513
+ evalId: filteredEvalCases[i6].id,
12392
12514
  status: "pending"
12393
12515
  });
12394
12516
  }
@@ -12396,23 +12518,23 @@ async function runEvaluation(options) {
12396
12518
  const workers = options.maxConcurrency ?? target.workers ?? 1;
12397
12519
  const limit = pLimit(workers);
12398
12520
  let nextWorkerId = 1;
12399
- const workerIdByTestId = /* @__PURE__ */ new Map();
12400
- const promises = filteredTestCases.map(
12401
- (testCase) => limit(async () => {
12521
+ const workerIdByEvalId = /* @__PURE__ */ new Map();
12522
+ const promises = filteredEvalCases.map(
12523
+ (evalCase) => limit(async () => {
12402
12524
  const workerId = nextWorkerId++;
12403
- workerIdByTestId.set(testCase.id, workerId);
12525
+ workerIdByEvalId.set(evalCase.id, workerId);
12404
12526
  if (onProgress) {
12405
12527
  await onProgress({
12406
12528
  workerId,
12407
- testId: testCase.id,
12529
+ evalId: evalCase.id,
12408
12530
  status: "running",
12409
12531
  startedAt: Date.now()
12410
12532
  });
12411
12533
  }
12412
12534
  try {
12413
12535
  const judgeProvider = await resolveJudgeProvider(target);
12414
- const result = await runTestCase({
12415
- testCase,
12536
+ const result = await runEvalCase({
12537
+ evalCase,
12416
12538
  provider: primaryProvider,
12417
12539
  target,
12418
12540
  graders: graderRegistry,
@@ -12427,7 +12549,7 @@ async function runEvaluation(options) {
12427
12549
  if (onProgress) {
12428
12550
  await onProgress({
12429
12551
  workerId,
12430
- testId: testCase.id,
12552
+ evalId: evalCase.id,
12431
12553
  status: "completed",
12432
12554
  startedAt: 0,
12433
12555
  // Not used for completed status
@@ -12442,7 +12564,7 @@ async function runEvaluation(options) {
12442
12564
  if (onProgress) {
12443
12565
  await onProgress({
12444
12566
  workerId,
12445
- testId: testCase.id,
12567
+ evalId: evalCase.id,
12446
12568
  status: "failed",
12447
12569
  completedAt: Date.now(),
12448
12570
  error: error instanceof Error ? error.message : String(error)
@@ -12459,10 +12581,10 @@ async function runEvaluation(options) {
12459
12581
  if (outcome.status === "fulfilled") {
12460
12582
  results.push(outcome.value);
12461
12583
  } else {
12462
- const testCase = filteredTestCases[i6];
12463
- const promptInputs = await buildPromptInputs(testCase);
12584
+ const evalCase = filteredEvalCases[i6];
12585
+ const promptInputs = await buildPromptInputs(evalCase);
12464
12586
  const errorResult = buildErrorResult(
12465
- testCase,
12587
+ evalCase,
12466
12588
  target.name,
12467
12589
  (now ?? (() => /* @__PURE__ */ new Date()))(),
12468
12590
  outcome.reason,
@@ -12476,9 +12598,9 @@ async function runEvaluation(options) {
12476
12598
  }
12477
12599
  return results;
12478
12600
  }
12479
- async function runTestCase(options) {
12601
+ async function runEvalCase(options) {
12480
12602
  const {
12481
- testCase,
12603
+ evalCase,
12482
12604
  provider,
12483
12605
  target,
12484
12606
  graders,
@@ -12491,11 +12613,11 @@ async function runTestCase(options) {
12491
12613
  signal,
12492
12614
  judgeProvider
12493
12615
  } = options;
12494
- const promptInputs = await buildPromptInputs(testCase);
12616
+ const promptInputs = await buildPromptInputs(evalCase);
12495
12617
  if (promptDumpDir) {
12496
- await dumpPrompt(promptDumpDir, testCase, promptInputs);
12618
+ await dumpPrompt(promptDumpDir, evalCase, promptInputs);
12497
12619
  }
12498
- const cacheKey = useCache ? createCacheKey(provider, target, testCase, promptInputs) : void 0;
12620
+ const cacheKey = useCache ? createCacheKey(provider, target, evalCase, promptInputs) : void 0;
12499
12621
  let cachedResponse;
12500
12622
  if (cacheKey && cache) {
12501
12623
  cachedResponse = await cache.get(cacheKey);
@@ -12508,7 +12630,7 @@ async function runTestCase(options) {
12508
12630
  while (!providerResponse && attempt < attemptBudget) {
12509
12631
  try {
12510
12632
  providerResponse = await invokeProvider(provider, {
12511
- testCase,
12633
+ evalCase,
12512
12634
  target,
12513
12635
  promptInputs,
12514
12636
  attempt,
@@ -12521,12 +12643,12 @@ async function runTestCase(options) {
12521
12643
  attempt += 1;
12522
12644
  continue;
12523
12645
  }
12524
- return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
12646
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
12525
12647
  }
12526
12648
  }
12527
12649
  if (!providerResponse) {
12528
12650
  return buildErrorResult(
12529
- testCase,
12651
+ evalCase,
12530
12652
  target.name,
12531
12653
  nowFn(),
12532
12654
  lastError ?? new Error("Provider did not return a response"),
@@ -12536,7 +12658,7 @@ async function runTestCase(options) {
12536
12658
  if (cacheKey && cache && !cachedResponse) {
12537
12659
  await cache.set(cacheKey, providerResponse);
12538
12660
  }
12539
- const graderKind = testCase.grader ?? "heuristic";
12661
+ const graderKind = evalCase.grader ?? "heuristic";
12540
12662
  const activeGrader = graders[graderKind] ?? graders.heuristic;
12541
12663
  if (!activeGrader) {
12542
12664
  throw new Error(`No grader registered for kind '${graderKind}'`);
@@ -12545,7 +12667,7 @@ async function runTestCase(options) {
12545
12667
  try {
12546
12668
  const gradeTimestamp = nowFn();
12547
12669
  grade = await activeGrader.grade({
12548
- testCase,
12670
+ evalCase,
12549
12671
  candidate: providerResponse.text ?? "",
12550
12672
  target,
12551
12673
  provider,
@@ -12555,17 +12677,18 @@ async function runTestCase(options) {
12555
12677
  judgeProvider
12556
12678
  });
12557
12679
  } catch (error) {
12558
- return buildErrorResult(testCase, target.name, nowFn(), error, promptInputs);
12680
+ return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
12559
12681
  }
12560
12682
  const completedAt = nowFn();
12561
12683
  const rawRequest = {
12562
12684
  request: promptInputs.request,
12563
12685
  guidelines: promptInputs.guidelines,
12564
- guideline_paths: testCase.guideline_paths
12686
+ guideline_paths: evalCase.guideline_paths,
12687
+ system_message: promptInputs.systemMessage ?? ""
12565
12688
  };
12566
12689
  return {
12567
- test_id: testCase.id,
12568
- conversation_id: testCase.conversation_id,
12690
+ eval_id: evalCase.id,
12691
+ conversation_id: evalCase.conversation_id,
12569
12692
  score: grade.score,
12570
12693
  hits: grade.hits,
12571
12694
  misses: grade.misses,
@@ -12579,11 +12702,11 @@ async function runTestCase(options) {
12579
12702
  grader_raw_request: grade.graderRawRequest
12580
12703
  };
12581
12704
  }
12582
- function filterTestCases(testCases, testId) {
12583
- if (!testId) {
12584
- return testCases;
12705
+ function filterEvalCases(evalCases, evalId) {
12706
+ if (!evalId) {
12707
+ return evalCases;
12585
12708
  }
12586
- return testCases.filter((testCase) => testCase.id === testId);
12709
+ return evalCases.filter((evalCase) => evalCase.id === evalId);
12587
12710
  }
12588
12711
  function buildGraderRegistry(overrides, resolveJudgeProvider) {
12589
12712
  const heuristic = overrides?.heuristic ?? new HeuristicGrader();
@@ -12601,16 +12724,16 @@ function buildGraderRegistry(overrides, resolveJudgeProvider) {
12601
12724
  llm_judge: llmJudge
12602
12725
  };
12603
12726
  }
12604
- async function dumpPrompt(directory, testCase, promptInputs) {
12727
+ async function dumpPrompt(directory, evalCase, promptInputs) {
12605
12728
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
12606
- const filename = `${timestamp}_${sanitizeFilename(testCase.id)}.json`;
12729
+ const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
12607
12730
  const filePath = path42.resolve(directory, filename);
12608
12731
  await mkdir3(path42.dirname(filePath), { recursive: true });
12609
12732
  const payload = {
12610
- test_id: testCase.id,
12733
+ eval_id: evalCase.id,
12611
12734
  request: promptInputs.request,
12612
12735
  guidelines: promptInputs.guidelines,
12613
- guideline_paths: testCase.guideline_paths
12736
+ guideline_paths: evalCase.guideline_paths
12614
12737
  };
12615
12738
  await writeFile22(filePath, JSON.stringify(payload, null, 2), "utf8");
12616
12739
  }
@@ -12622,7 +12745,7 @@ function sanitizeFilename(value) {
12622
12745
  return sanitized.length > 0 ? sanitized : randomUUID2();
12623
12746
  }
12624
12747
  async function invokeProvider(provider, options) {
12625
- const { testCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
12748
+ const { evalCase, target, promptInputs, attempt, agentTimeoutMs, signal } = options;
12626
12749
  const controller = new AbortController();
12627
12750
  const timeout = agentTimeoutMs ? setTimeout(() => controller.abort(), agentTimeoutMs) : void 0;
12628
12751
  if (signal) {
@@ -12632,12 +12755,12 @@ async function invokeProvider(provider, options) {
12632
12755
  return await provider.invoke({
12633
12756
  prompt: promptInputs.request,
12634
12757
  guidelines: promptInputs.guidelines,
12635
- attachments: testCase.guideline_paths,
12636
- testCaseId: testCase.id,
12758
+ guideline_patterns: evalCase.guideline_patterns,
12759
+ attachments: evalCase.file_paths,
12760
+ evalCaseId: evalCase.id,
12637
12761
  attempt,
12638
12762
  metadata: {
12639
- target: target.name,
12640
- grader: testCase.grader
12763
+ systemPrompt: promptInputs.systemMessage ?? ""
12641
12764
  },
12642
12765
  signal: controller.signal
12643
12766
  });
@@ -12647,17 +12770,18 @@ async function invokeProvider(provider, options) {
12647
12770
  }
12648
12771
  }
12649
12772
  }
12650
- function buildErrorResult(testCase, targetName, timestamp, error, promptInputs) {
12773
+ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
12651
12774
  const message = error instanceof Error ? error.message : String(error);
12652
12775
  const rawRequest = {
12653
12776
  request: promptInputs.request,
12654
12777
  guidelines: promptInputs.guidelines,
12655
- guideline_paths: testCase.guideline_paths,
12778
+ guideline_paths: evalCase.guideline_paths,
12779
+ system_message: promptInputs.systemMessage ?? "",
12656
12780
  error: message
12657
12781
  };
12658
12782
  return {
12659
- test_id: testCase.id,
12660
- conversation_id: testCase.conversation_id,
12783
+ eval_id: evalCase.id,
12784
+ conversation_id: evalCase.conversation_id,
12661
12785
  score: 0,
12662
12786
  hits: [],
12663
12787
  misses: [`Error: ${message}`],
@@ -12669,13 +12793,14 @@ function buildErrorResult(testCase, targetName, timestamp, error, promptInputs)
12669
12793
  raw_request: rawRequest
12670
12794
  };
12671
12795
  }
12672
- function createCacheKey(provider, target, testCase, promptInputs) {
12796
+ function createCacheKey(provider, target, evalCase, promptInputs) {
12673
12797
  const hash = createHash("sha256");
12674
12798
  hash.update(provider.id);
12675
12799
  hash.update(target.name);
12676
- hash.update(testCase.id);
12800
+ hash.update(evalCase.id);
12677
12801
  hash.update(promptInputs.request);
12678
12802
  hash.update(promptInputs.guidelines);
12803
+ hash.update(promptInputs.systemMessage ?? "");
12679
12804
  return hash.digest("hex");
12680
12805
  }
12681
12806
  function isTimeoutLike(error) {
@@ -12721,7 +12846,7 @@ function uniqueDirs(directories) {
12721
12846
  }
12722
12847
  return result;
12723
12848
  }
12724
- async function fileExists3(filePath) {
12849
+ async function fileExists4(filePath) {
12725
12850
  try {
12726
12851
  await access4(filePath, constants4.F_OK);
12727
12852
  return true;
@@ -12757,7 +12882,7 @@ async function loadEnvFromHierarchy(options) {
12757
12882
  ]);
12758
12883
  for (const dir of searchDirs) {
12759
12884
  const candidate = path8.join(dir, ".env");
12760
- if (await fileExists3(candidate)) {
12885
+ if (await fileExists4(candidate)) {
12761
12886
  loadDotenv({ path: candidate, override: false });
12762
12887
  if (verbose) {
12763
12888
  console.log(`Loaded environment from: ${candidate}`);
@@ -13127,9 +13252,9 @@ var ProgressDisplay = class {
13127
13252
  this.scheduleRender();
13128
13253
  } else {
13129
13254
  if (progress.status === "completed") {
13130
- console.log(`\u2713 Test ${progress.testId} completed`);
13255
+ console.log(`\u2713 Test ${progress.evalId} completed`);
13131
13256
  } else if (progress.status === "failed") {
13132
- console.log(`\u2717 Test ${progress.testId} failed${progress.error ? `: ${progress.error}` : ""}`);
13257
+ console.log(`\u2717 Test ${progress.evalId} failed${progress.error ? `: ${progress.error}` : ""}`);
13133
13258
  }
13134
13259
  }
13135
13260
  }
@@ -13162,7 +13287,7 @@ var ProgressDisplay = class {
13162
13287
  const statusIcon = this.getStatusIcon(worker.status);
13163
13288
  const elapsed = worker.startedAt ? this.formatElapsed(Date.now() - worker.startedAt) : "";
13164
13289
  const timeLabel = elapsed ? ` (${elapsed})` : "";
13165
- let testLabel = worker.testId;
13290
+ let testLabel = worker.evalId;
13166
13291
  if (testLabel.length > 50) {
13167
13292
  testLabel = testLabel.substring(0, 47) + "...";
13168
13293
  }
@@ -13316,7 +13441,7 @@ function formatEvaluationSummary(summary) {
13316
13441
  lines.push("\n==================================================");
13317
13442
  lines.push("EVALUATION SUMMARY");
13318
13443
  lines.push("==================================================");
13319
- lines.push(`Total test cases: ${summary.total}`);
13444
+ lines.push(`Total eval cases: ${summary.total}`);
13320
13445
  lines.push(`Mean score: ${formatScore(summary.mean)}`);
13321
13446
  lines.push(`Median score: ${formatScore(summary.median)}`);
13322
13447
  lines.push(`Min score: ${formatScore(summary.min)}`);
@@ -13329,13 +13454,13 @@ function formatEvaluationSummary(summary) {
13329
13454
  const [start, end] = bin.range;
13330
13455
  lines.push(` ${start.toFixed(1)}-${end.toFixed(1)}: ${bin.count}`);
13331
13456
  }
13332
- lines.push("\nTop performing test cases:");
13457
+ lines.push("\nTop performing eval cases:");
13333
13458
  summary.topResults.forEach((result, index) => {
13334
- lines.push(` ${index + 1}. ${result.test_id}: ${formatScore(result.score)}`);
13459
+ lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
13335
13460
  });
13336
- lines.push("\nLowest performing test cases:");
13461
+ lines.push("\nLowest performing eval cases:");
13337
13462
  summary.bottomResults.forEach((result, index) => {
13338
- lines.push(` ${index + 1}. ${result.test_id}: ${formatScore(result.score)}`);
13463
+ lines.push(` ${index + 1}. ${result.eval_id}: ${formatScore(result.score)}`);
13339
13464
  });
13340
13465
  return lines.join("\n");
13341
13466
  }
@@ -13349,11 +13474,9 @@ var TARGET_FILE_CANDIDATES = [
13349
13474
  "targets.yaml",
13350
13475
  "targets.yml",
13351
13476
  path11.join(".agentv", "targets.yaml"),
13352
- path11.join(".agentv", "targets.yml"),
13353
- path11.join(".bbeval", "targets.yaml"),
13354
- path11.join(".bbeval", "targets.yml")
13477
+ path11.join(".agentv", "targets.yml")
13355
13478
  ];
13356
- async function fileExists4(filePath) {
13479
+ async function fileExists5(filePath) {
13357
13480
  try {
13358
13481
  await access5(filePath, constants5.F_OK);
13359
13482
  return true;
@@ -13375,56 +13498,30 @@ async function readTestSuiteTarget(testFilePath) {
13375
13498
  }
13376
13499
  return void 0;
13377
13500
  }
13378
- function buildDirectoryChain(testFilePath, repoRoot, cwd) {
13379
- const directories = [];
13380
- const seen = /* @__PURE__ */ new Set();
13381
- const boundary = path11.resolve(repoRoot);
13382
- let current = path11.resolve(path11.dirname(testFilePath));
13383
- while (current !== void 0) {
13384
- if (!seen.has(current)) {
13385
- directories.push(current);
13386
- seen.add(current);
13387
- }
13388
- if (current === boundary) {
13389
- break;
13390
- }
13391
- const parent = path11.dirname(current);
13392
- if (parent === current) {
13393
- break;
13394
- }
13395
- current = parent;
13396
- }
13397
- if (!seen.has(boundary)) {
13398
- directories.push(boundary);
13399
- seen.add(boundary);
13400
- }
13401
- const resolvedCwd = path11.resolve(cwd);
13402
- if (!seen.has(resolvedCwd)) {
13403
- directories.push(resolvedCwd);
13404
- seen.add(resolvedCwd);
13405
- }
13406
- return directories;
13407
- }
13408
13501
  async function discoverTargetsFile(options) {
13409
13502
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
13410
13503
  if (explicitPath) {
13411
13504
  const resolvedExplicit = path11.resolve(explicitPath);
13412
- if (await fileExists4(resolvedExplicit)) {
13505
+ if (await fileExists5(resolvedExplicit)) {
13413
13506
  return resolvedExplicit;
13414
13507
  }
13415
13508
  for (const candidate of TARGET_FILE_CANDIDATES) {
13416
13509
  const nested = path11.join(resolvedExplicit, candidate);
13417
- if (await fileExists4(nested)) {
13510
+ if (await fileExists5(nested)) {
13418
13511
  return nested;
13419
13512
  }
13420
13513
  }
13421
13514
  throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
13422
13515
  }
13423
- const directories = buildDirectoryChain(testFilePath, repoRoot, cwd);
13516
+ const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
13517
+ const resolvedCwd = path11.resolve(cwd);
13518
+ if (!directories.includes(resolvedCwd)) {
13519
+ directories.push(resolvedCwd);
13520
+ }
13424
13521
  for (const directory of directories) {
13425
13522
  for (const candidate of TARGET_FILE_CANDIDATES) {
13426
13523
  const fullPath = path11.join(directory, candidate);
13427
- if (await fileExists4(fullPath)) {
13524
+ if (await fileExists5(fullPath)) {
13428
13525
  return fullPath;
13429
13526
  }
13430
13527
  }
@@ -13525,7 +13622,7 @@ function normalizeOptions(rawOptions) {
13525
13622
  return {
13526
13623
  target: normalizeString(rawOptions.target),
13527
13624
  targetsPath: normalizeString(rawOptions.targets),
13528
- testId: normalizeString(rawOptions.testId),
13625
+ evalId: normalizeString(rawOptions.evalId),
13529
13626
  workers: workers > 0 ? workers : void 0,
13530
13627
  outPath: normalizeString(rawOptions.out),
13531
13628
  format,
@@ -13672,7 +13769,7 @@ async function runEvalCommand(input) {
13672
13769
  promptDumpDir,
13673
13770
  cache,
13674
13771
  useCache: options.cache,
13675
- testId: options.testId,
13772
+ evalId: options.evalId,
13676
13773
  verbose: options.verbose,
13677
13774
  maxConcurrency: resolvedWorkers,
13678
13775
  onResult: async (result) => {
@@ -13685,7 +13782,7 @@ async function runEvalCommand(input) {
13685
13782
  }
13686
13783
  progressDisplay.updateWorker({
13687
13784
  workerId: event.workerId,
13688
- testId: event.testId,
13785
+ evalId: event.evalId,
13689
13786
  status: event.status,
13690
13787
  startedAt: event.startedAt,
13691
13788
  completedAt: event.completedAt,
@@ -13735,7 +13832,7 @@ function parseInteger(value, fallback) {
13735
13832
  return parsed;
13736
13833
  }
13737
13834
  function registerEvalCommand(program) {
13738
- program.command("eval").description("Run BbEval test suites and report results").argument("<test-file>", "Path to the evaluation .test.yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--test-id <id>", "Run only the test case with this identifier").option(
13835
+ program.command("eval").description("Run eval suites and report results").argument("<eval-file>", "Path to the evaluation .yaml file").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
13739
13836
  "--workers <count>",
13740
13837
  "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
13741
13838
  (value) => parseInteger(value, 1)
@@ -13773,25 +13870,164 @@ function registerEvalCommand(program) {
13773
13870
  return program;
13774
13871
  }
13775
13872
 
13776
- // src/commands/lint/lint-files.ts
13777
- import { constants as constants7 } from "node:fs";
13778
- import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
13873
+ // src/commands/init/index.ts
13874
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
13779
13875
  import path14 from "node:path";
13780
13876
 
13877
+ // src/templates/index.ts
13878
+ import { readFileSync } from "node:fs";
13879
+ import path13 from "node:path";
13880
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
13881
+ var TemplateManager = class {
13882
+ static getTemplates() {
13883
+ const currentDir = path13.dirname(fileURLToPath2(import.meta.url));
13884
+ let templatesDir;
13885
+ if (currentDir.includes(path13.sep + "dist")) {
13886
+ templatesDir = path13.join(currentDir, "templates");
13887
+ } else {
13888
+ templatesDir = currentDir;
13889
+ }
13890
+ const evalBuildPrompt = readFileSync(
13891
+ path13.join(templatesDir, "eval-build.prompt.md"),
13892
+ "utf-8"
13893
+ );
13894
+ const evalSchema = readFileSync(
13895
+ path13.join(templatesDir, "eval-schema.json"),
13896
+ "utf-8"
13897
+ );
13898
+ const configSchema = readFileSync(
13899
+ path13.join(templatesDir, "config-schema.json"),
13900
+ "utf-8"
13901
+ );
13902
+ return [
13903
+ {
13904
+ path: "prompts/eval-build.prompt.md",
13905
+ content: evalBuildPrompt
13906
+ },
13907
+ {
13908
+ path: "contexts/eval-schema.json",
13909
+ content: evalSchema
13910
+ },
13911
+ {
13912
+ path: "contexts/config-schema.json",
13913
+ content: configSchema
13914
+ }
13915
+ ];
13916
+ }
13917
+ };
13918
+
13919
+ // src/commands/init/index.ts
13920
+ async function initCommand(options = {}) {
13921
+ const targetPath = path14.resolve(options.targetPath ?? ".");
13922
+ const githubDir = path14.join(targetPath, ".github");
13923
+ if (!existsSync(githubDir)) {
13924
+ mkdirSync(githubDir, { recursive: true });
13925
+ }
13926
+ const templates = TemplateManager.getTemplates();
13927
+ for (const template of templates) {
13928
+ const targetFilePath = path14.join(githubDir, template.path);
13929
+ const targetDirPath = path14.dirname(targetFilePath);
13930
+ if (!existsSync(targetDirPath)) {
13931
+ mkdirSync(targetDirPath, { recursive: true });
13932
+ }
13933
+ writeFileSync(targetFilePath, template.content, "utf-8");
13934
+ console.log(`Created ${path14.relative(targetPath, targetFilePath)}`);
13935
+ }
13936
+ console.log("\nAgentV initialized successfully!");
13937
+ console.log(`
13938
+ Files installed to ${path14.relative(targetPath, githubDir)}:`);
13939
+ templates.forEach((t) => console.log(` - ${t.path}`));
13940
+ console.log("\nYou can now create eval files using the schema and prompt templates.");
13941
+ }
13942
+
13943
+ // src/commands/validate/format-output.ts
13944
+ var ANSI_RED = "\x1B[31m";
13945
+ var ANSI_YELLOW2 = "\x1B[33m";
13946
+ var ANSI_GREEN = "\x1B[32m";
13947
+ var ANSI_CYAN = "\x1B[36m";
13948
+ var ANSI_BOLD = "\x1B[1m";
13949
+ var ANSI_RESET2 = "\x1B[0m";
13950
+ function formatSummary(summary, useColors) {
13951
+ const lines = [];
13952
+ lines.push("");
13953
+ lines.push(formatHeader("Validation Summary", useColors));
13954
+ lines.push("");
13955
+ for (const result of summary.results) {
13956
+ lines.push(formatFileResult(result, useColors));
13957
+ }
13958
+ lines.push("");
13959
+ lines.push(formatStats(summary, useColors));
13960
+ lines.push("");
13961
+ return lines.join("\n");
13962
+ }
13963
+ function formatHeader(text, useColors) {
13964
+ if (useColors) {
13965
+ return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
13966
+ }
13967
+ return text;
13968
+ }
13969
+ function formatFileResult(result, useColors) {
13970
+ const lines = [];
13971
+ const status = result.valid ? "\u2713" : "\u2717";
13972
+ const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
13973
+ const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
13974
+ const fileName = result.filePath;
13975
+ lines.push(`${statusText} ${fileName}`);
13976
+ if (result.errors.length > 0) {
13977
+ for (const error of result.errors) {
13978
+ lines.push(formatError(error, useColors));
13979
+ }
13980
+ }
13981
+ return lines.join("\n");
13982
+ }
13983
+ function formatError(error, useColors) {
13984
+ const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
13985
+ const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
13986
+ const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
13987
+ const location = error.location ? ` [${error.location}]` : "";
13988
+ return `${coloredPrefix}${location} ${error.message}`;
13989
+ }
13990
+ function formatStats(summary, useColors) {
13991
+ const lines = [];
13992
+ const totalText = `Total files: ${summary.totalFiles}`;
13993
+ const validText = `Valid: ${summary.validFiles}`;
13994
+ const invalidText = `Invalid: ${summary.invalidFiles}`;
13995
+ if (useColors) {
13996
+ lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
13997
+ lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
13998
+ if (summary.invalidFiles > 0) {
13999
+ lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
14000
+ } else {
14001
+ lines.push(invalidText);
14002
+ }
14003
+ } else {
14004
+ lines.push(totalText);
14005
+ lines.push(validText);
14006
+ lines.push(invalidText);
14007
+ }
14008
+ return lines.join("\n");
14009
+ }
14010
+ function isTTY() {
14011
+ return process.stdout.isTTY ?? false;
14012
+ }
14013
+
13781
14014
  // ../../packages/core/dist/evaluation/validation/index.js
13782
14015
  import { readFile as readFile5 } from "node:fs/promises";
13783
14016
  import { parse as parse5 } from "yaml";
13784
14017
  import { readFile as readFile23 } from "node:fs/promises";
13785
- import path13 from "node:path";
14018
+ import path15 from "node:path";
13786
14019
  import { parse as parse23 } from "yaml";
13787
14020
  import { readFile as readFile32 } from "node:fs/promises";
13788
14021
  import path23 from "node:path";
13789
14022
  import { parse as parse32 } from "yaml";
13790
14023
  import { readFile as readFile42 } from "node:fs/promises";
13791
- import path33 from "node:path";
13792
14024
  import { parse as parse42 } from "yaml";
14025
+ import { readFile as readFile52 } from "node:fs/promises";
14026
+ import path33 from "node:path";
14027
+ import { parse as parse52 } from "yaml";
13793
14028
  var SCHEMA_EVAL_V22 = "agentv-eval-v2";
13794
14029
  var SCHEMA_TARGETS_V2 = "agentv-targets-v2";
14030
+ var SCHEMA_CONFIG_V22 = "agentv-config-v2";
13795
14031
  async function detectFileType(filePath) {
13796
14032
  try {
13797
14033
  const content = await readFile5(filePath, "utf8");
@@ -13809,6 +14045,8 @@ async function detectFileType(filePath) {
13809
14045
  return "eval";
13810
14046
  case SCHEMA_TARGETS_V2:
13811
14047
  return "targets";
14048
+ case SCHEMA_CONFIG_V22:
14049
+ return "config";
13812
14050
  default:
13813
14051
  return "unknown";
13814
14052
  }
@@ -13822,7 +14060,7 @@ function isObject(value) {
13822
14060
  }
13823
14061
  async function validateEvalFile(filePath) {
13824
14062
  const errors = [];
13825
- const absolutePath = path13.resolve(filePath);
14063
+ const absolutePath = path15.resolve(filePath);
13826
14064
  let parsed;
13827
14065
  try {
13828
14066
  const content = await readFile23(absolutePath, "utf8");
@@ -14008,7 +14246,6 @@ function validateMessages(messages, location, filePath, errors) {
14008
14246
  }
14009
14247
  }
14010
14248
  }
14011
- var SCHEMA_TARGETS_V22 = "agentv-targets-v2";
14012
14249
  function isObject2(value) {
14013
14250
  return typeof value === "object" && value !== null && !Array.isArray(value);
14014
14251
  }
@@ -14046,8 +14283,8 @@ async function validateTargetsFile(filePath) {
14046
14283
  };
14047
14284
  }
14048
14285
  const schema = parsed["$schema"];
14049
- if (schema !== SCHEMA_TARGETS_V22) {
14050
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_TARGETS_V22}'` : `Missing required field '$schema'. Expected '${SCHEMA_TARGETS_V22}'`;
14286
+ if (schema !== TARGETS_SCHEMA_V2) {
14287
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${TARGETS_SCHEMA_V2}'` : `Missing required field '$schema'. Expected '${TARGETS_SCHEMA_V2}'`;
14051
14288
  errors.push({
14052
14289
  severity: "error",
14053
14290
  filePath: absolutePath,
@@ -14070,7 +14307,7 @@ async function validateTargetsFile(filePath) {
14070
14307
  errors
14071
14308
  };
14072
14309
  }
14073
- const knownProviders = ["azure", "openai", "anthropic", "bedrock", "vertex"];
14310
+ const knownProviders = [...KNOWN_PROVIDERS, ...PROVIDER_ALIASES];
14074
14311
  for (let i6 = 0; i6 < targets.length; i6++) {
14075
14312
  const target = targets[i6];
14076
14313
  const location = `targets[${i6}]`;
@@ -14134,6 +14371,80 @@ async function validateTargetsFile(filePath) {
14134
14371
  errors
14135
14372
  };
14136
14373
  }
14374
+ var SCHEMA_CONFIG_V222 = "agentv-config-v2";
14375
+ async function validateConfigFile(filePath) {
14376
+ const errors = [];
14377
+ try {
14378
+ const content = await readFile42(filePath, "utf8");
14379
+ const parsed = parse42(content);
14380
+ if (typeof parsed !== "object" || parsed === null) {
14381
+ errors.push({
14382
+ severity: "error",
14383
+ filePath,
14384
+ message: "Config file must contain a valid YAML object"
14385
+ });
14386
+ return { valid: false, filePath, fileType: "config", errors };
14387
+ }
14388
+ const config = parsed;
14389
+ const schema = config["$schema"];
14390
+ if (schema !== SCHEMA_CONFIG_V222) {
14391
+ const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
14392
+ errors.push({
14393
+ severity: "error",
14394
+ filePath,
14395
+ location: "$schema",
14396
+ message
14397
+ });
14398
+ }
14399
+ const guidelinePatterns = config["guideline_patterns"];
14400
+ if (guidelinePatterns !== void 0) {
14401
+ if (!Array.isArray(guidelinePatterns)) {
14402
+ errors.push({
14403
+ severity: "error",
14404
+ filePath,
14405
+ location: "guideline_patterns",
14406
+ message: "Field 'guideline_patterns' must be an array"
14407
+ });
14408
+ } else if (!guidelinePatterns.every((p) => typeof p === "string")) {
14409
+ errors.push({
14410
+ severity: "error",
14411
+ filePath,
14412
+ location: "guideline_patterns",
14413
+ message: "All entries in 'guideline_patterns' must be strings"
14414
+ });
14415
+ } else if (guidelinePatterns.length === 0) {
14416
+ errors.push({
14417
+ severity: "warning",
14418
+ filePath,
14419
+ location: "guideline_patterns",
14420
+ message: "Field 'guideline_patterns' is empty. Consider removing it or adding patterns."
14421
+ });
14422
+ }
14423
+ }
14424
+ const allowedFields = /* @__PURE__ */ new Set(["$schema", "guideline_patterns"]);
14425
+ const unexpectedFields = Object.keys(config).filter((key2) => !allowedFields.has(key2));
14426
+ if (unexpectedFields.length > 0) {
14427
+ errors.push({
14428
+ severity: "warning",
14429
+ filePath,
14430
+ message: `Unexpected fields: ${unexpectedFields.join(", ")}`
14431
+ });
14432
+ }
14433
+ return {
14434
+ valid: errors.filter((e) => e.severity === "error").length === 0,
14435
+ filePath,
14436
+ fileType: "config",
14437
+ errors
14438
+ };
14439
+ } catch (error) {
14440
+ errors.push({
14441
+ severity: "error",
14442
+ filePath,
14443
+ message: `Failed to parse config file: ${error.message}`
14444
+ });
14445
+ return { valid: false, filePath, fileType: "config", errors };
14446
+ }
14447
+ }
14137
14448
  function isObject3(value) {
14138
14449
  return typeof value === "object" && value !== null && !Array.isArray(value);
14139
14450
  }
@@ -14152,8 +14463,8 @@ async function validateFileReferences(evalFilePath) {
14152
14463
  const searchRoots = buildSearchRoots(absolutePath, gitRoot);
14153
14464
  let parsed;
14154
14465
  try {
14155
- const content = await readFile42(absolutePath, "utf8");
14156
- parsed = parse42(content);
14466
+ const content = await readFile52(absolutePath, "utf8");
14467
+ parsed = parse52(content);
14157
14468
  } catch {
14158
14469
  return errors;
14159
14470
  }
@@ -14222,7 +14533,7 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
14222
14533
  });
14223
14534
  } else {
14224
14535
  try {
14225
- const fileContent = await readFile42(resolvedPath, "utf8");
14536
+ const fileContent = await readFile52(resolvedPath, "utf8");
14226
14537
  if (fileContent.trim().length === 0) {
14227
14538
  errors.push({
14228
14539
  severity: "warning",
@@ -14244,12 +14555,15 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
14244
14555
  }
14245
14556
  }
14246
14557
 
14247
- // src/commands/lint/lint-files.ts
14248
- async function lintFiles(paths) {
14558
+ // src/commands/validate/validate-files.ts
14559
+ import { constants as constants7 } from "node:fs";
14560
+ import { access as access7, readdir as readdir3, stat as stat3 } from "node:fs/promises";
14561
+ import path16 from "node:path";
14562
+ async function validateFiles(paths) {
14249
14563
  const filePaths = await expandPaths(paths);
14250
14564
  const results = [];
14251
14565
  for (const filePath of filePaths) {
14252
- const result = await lintSingleFile(filePath);
14566
+ const result = await validateSingleFile(filePath);
14253
14567
  results.push(result);
14254
14568
  }
14255
14569
  const validFiles = results.filter((r) => r.valid).length;
@@ -14261,8 +14575,8 @@ async function lintFiles(paths) {
14261
14575
  results
14262
14576
  };
14263
14577
  }
14264
- async function lintSingleFile(filePath) {
14265
- const absolutePath = path14.resolve(filePath);
14578
+ async function validateSingleFile(filePath) {
14579
+ const absolutePath = path16.resolve(filePath);
14266
14580
  const fileType = await detectFileType(absolutePath);
14267
14581
  if (fileType === "unknown") {
14268
14582
  return {
@@ -14273,7 +14587,7 @@ async function lintSingleFile(filePath) {
14273
14587
  {
14274
14588
  severity: "error",
14275
14589
  filePath: absolutePath,
14276
- message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2' or 'agentv-targets-v2'"
14590
+ message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2', 'agentv-targets-v2', or 'agentv-config-v2'"
14277
14591
  }
14278
14592
  ]
14279
14593
  };
@@ -14291,15 +14605,17 @@ async function lintSingleFile(filePath) {
14291
14605
  };
14292
14606
  }
14293
14607
  }
14294
- } else {
14608
+ } else if (fileType === "targets") {
14295
14609
  result = await validateTargetsFile(absolutePath);
14610
+ } else {
14611
+ result = await validateConfigFile(absolutePath);
14296
14612
  }
14297
14613
  return result;
14298
14614
  }
14299
14615
  async function expandPaths(paths) {
14300
14616
  const expanded = [];
14301
14617
  for (const inputPath of paths) {
14302
- const absolutePath = path14.resolve(inputPath);
14618
+ const absolutePath = path16.resolve(inputPath);
14303
14619
  try {
14304
14620
  await access7(absolutePath, constants7.F_OK);
14305
14621
  } catch {
@@ -14323,7 +14639,7 @@ async function findYamlFiles(dirPath) {
14323
14639
  try {
14324
14640
  const entries = await readdir3(dirPath, { withFileTypes: true });
14325
14641
  for (const entry of entries) {
14326
- const fullPath = path14.join(dirPath, entry.name);
14642
+ const fullPath = path16.join(dirPath, entry.name);
14327
14643
  if (entry.isDirectory()) {
14328
14644
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
14329
14645
  continue;
@@ -14340,98 +14656,27 @@ async function findYamlFiles(dirPath) {
14340
14656
  return results;
14341
14657
  }
14342
14658
  function isYamlFile(filePath) {
14343
- const ext = path14.extname(filePath).toLowerCase();
14659
+ const ext = path16.extname(filePath).toLowerCase();
14344
14660
  return ext === ".yaml" || ext === ".yml";
14345
14661
  }
14346
14662
 
14347
- // src/commands/lint/format-output.ts
14348
- var ANSI_RED = "\x1B[31m";
14349
- var ANSI_YELLOW2 = "\x1B[33m";
14350
- var ANSI_GREEN = "\x1B[32m";
14351
- var ANSI_CYAN = "\x1B[36m";
14352
- var ANSI_BOLD = "\x1B[1m";
14353
- var ANSI_RESET2 = "\x1B[0m";
14354
- function formatSummary(summary, useColors) {
14355
- const lines = [];
14356
- lines.push("");
14357
- lines.push(formatHeader("Validation Summary", useColors));
14358
- lines.push("");
14359
- for (const result of summary.results) {
14360
- lines.push(formatFileResult(result, useColors));
14361
- }
14362
- lines.push("");
14363
- lines.push(formatStats(summary, useColors));
14364
- lines.push("");
14365
- return lines.join("\n");
14366
- }
14367
- function formatHeader(text, useColors) {
14368
- if (useColors) {
14369
- return `${ANSI_BOLD}${ANSI_CYAN}${text}${ANSI_RESET2}`;
14370
- }
14371
- return text;
14372
- }
14373
- function formatFileResult(result, useColors) {
14374
- const lines = [];
14375
- const status = result.valid ? "\u2713" : "\u2717";
14376
- const statusColor = result.valid ? ANSI_GREEN : ANSI_RED;
14377
- const statusText = useColors ? `${statusColor}${status}${ANSI_RESET2}` : status;
14378
- const fileName = result.filePath;
14379
- lines.push(`${statusText} ${fileName}`);
14380
- if (result.errors.length > 0) {
14381
- for (const error of result.errors) {
14382
- lines.push(formatError(error, useColors));
14383
- }
14384
- }
14385
- return lines.join("\n");
14386
- }
14387
- function formatError(error, useColors) {
14388
- const prefix = error.severity === "error" ? " \u2717" : " \u26A0";
14389
- const color = error.severity === "error" ? ANSI_RED : ANSI_YELLOW2;
14390
- const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET2}` : prefix;
14391
- const location = error.location ? ` [${error.location}]` : "";
14392
- return `${coloredPrefix}${location} ${error.message}`;
14393
- }
14394
- function formatStats(summary, useColors) {
14395
- const lines = [];
14396
- const totalText = `Total files: ${summary.totalFiles}`;
14397
- const validText = `Valid: ${summary.validFiles}`;
14398
- const invalidText = `Invalid: ${summary.invalidFiles}`;
14399
- if (useColors) {
14400
- lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET2}`);
14401
- lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET2}`);
14402
- if (summary.invalidFiles > 0) {
14403
- lines.push(`${ANSI_RED}${invalidText}${ANSI_RESET2}`);
14404
- } else {
14405
- lines.push(invalidText);
14406
- }
14407
- } else {
14408
- lines.push(totalText);
14409
- lines.push(validText);
14410
- lines.push(invalidText);
14411
- }
14412
- return lines.join("\n");
14413
- }
14414
- function isTTY() {
14415
- return process.stdout.isTTY ?? false;
14416
- }
14417
-
14418
- // src/commands/lint/index.ts
14419
- async function runLintCommand(paths, options) {
14663
+ // src/commands/validate/index.ts
14664
+ async function runValidateCommand(paths, _options) {
14420
14665
  if (paths.length === 0) {
14421
- console.error("Error: No paths specified. Usage: agentv lint <paths...>");
14666
+ console.error("Error: No paths specified. Usage: agentv validate <paths...>");
14422
14667
  process.exit(1);
14423
14668
  }
14424
- const summary = await lintFiles(paths);
14669
+ const summary = await validateFiles(paths);
14425
14670
  const useColors = isTTY();
14426
14671
  console.log(formatSummary(summary, useColors));
14427
14672
  if (summary.invalidFiles > 0) {
14428
14673
  process.exit(1);
14429
14674
  }
14430
14675
  }
14431
- function registerLintCommand(program) {
14432
- program.command("lint").description("Validate AgentV eval and targets YAML files").argument("<paths...>", "Files or directories to lint").action(async (paths, options) => {
14676
+ function registerValidateCommand(program) {
14677
+ program.command("validate").description("Validate AgentV eval and targets YAML files").argument("<paths...>", "Files or directories to validate").action(async (paths, _options) => {
14433
14678
  try {
14434
- await runLintCommand(paths, options);
14679
+ await runValidateCommand(paths, _options);
14435
14680
  } catch (error) {
14436
14681
  console.error(`Error: ${error.message}`);
14437
14682
  process.exit(1);
@@ -14449,68 +14694,6 @@ function registerStatusCommand(program) {
14449
14694
  return program;
14450
14695
  }
14451
14696
 
14452
- // src/commands/init/index.ts
14453
- import { existsSync as existsSync2, mkdirSync, writeFileSync } from "node:fs";
14454
- import path16 from "node:path";
14455
-
14456
- // src/templates/index.ts
14457
- import { readFileSync } from "node:fs";
14458
- import { fileURLToPath as fileURLToPath2 } from "node:url";
14459
- import path15 from "node:path";
14460
- var TemplateManager = class {
14461
- static getTemplates() {
14462
- const currentDir = path15.dirname(fileURLToPath2(import.meta.url));
14463
- let templatesDir;
14464
- if (currentDir.includes(path15.sep + "dist")) {
14465
- templatesDir = path15.join(currentDir, "templates");
14466
- } else {
14467
- templatesDir = currentDir;
14468
- }
14469
- const evalBuildPrompt = readFileSync(
14470
- path15.join(templatesDir, "eval-build.prompt.md"),
14471
- "utf-8"
14472
- );
14473
- const evalSchema = readFileSync(
14474
- path15.join(templatesDir, "eval-schema.json"),
14475
- "utf-8"
14476
- );
14477
- return [
14478
- {
14479
- path: "prompts/eval-build.prompt.md",
14480
- content: evalBuildPrompt
14481
- },
14482
- {
14483
- path: "contexts/eval-schema.json",
14484
- content: evalSchema
14485
- }
14486
- ];
14487
- }
14488
- };
14489
-
14490
- // src/commands/init/index.ts
14491
- async function initCommand(options = {}) {
14492
- const targetPath = path16.resolve(options.targetPath ?? ".");
14493
- const githubDir = path16.join(targetPath, ".github");
14494
- if (!existsSync2(githubDir)) {
14495
- mkdirSync(githubDir, { recursive: true });
14496
- }
14497
- const templates = TemplateManager.getTemplates();
14498
- for (const template of templates) {
14499
- const targetFilePath = path16.join(githubDir, template.path);
14500
- const targetDirPath = path16.dirname(targetFilePath);
14501
- if (!existsSync2(targetDirPath)) {
14502
- mkdirSync(targetDirPath, { recursive: true });
14503
- }
14504
- writeFileSync(targetFilePath, template.content, "utf-8");
14505
- console.log(`Created ${path16.relative(targetPath, targetFilePath)}`);
14506
- }
14507
- console.log("\nAgentV initialized successfully!");
14508
- console.log(`
14509
- Files installed to ${path16.relative(targetPath, githubDir)}:`);
14510
- templates.forEach((t) => console.log(` - ${t.path}`));
14511
- console.log("\nYou can now create eval files using the schema and prompt templates.");
14512
- }
14513
-
14514
14697
  // src/index.ts
14515
14698
  var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
14516
14699
  function createProgram() {
@@ -14518,7 +14701,7 @@ function createProgram() {
14518
14701
  program.name("agentv").description("AgentV CLI scaffolding").version(packageJson.version);
14519
14702
  registerStatusCommand(program);
14520
14703
  registerEvalCommand(program);
14521
- registerLintCommand(program);
14704
+ registerValidateCommand(program);
14522
14705
  program.command("init [path]").description("Initialize AgentV in your project (installs prompt templates and schema to .github)").action(async (targetPath) => {
14523
14706
  try {
14524
14707
  await initCommand({ targetPath });
@@ -14539,4 +14722,4 @@ export {
14539
14722
  createProgram,
14540
14723
  runCli
14541
14724
  };
14542
- //# sourceMappingURL=chunk-S3RN2GSO.js.map
14725
+ //# sourceMappingURL=chunk-RLBRJX7V.js.map