@agentv/core 3.8.0 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -19,7 +19,7 @@ import {
19
19
  readTextFile,
20
20
  resolveFileReference,
21
21
  resolveTargetDefinition
22
- } from "./chunk-3ZS3GCMI.js";
22
+ } from "./chunk-PC5TLJF6.js";
23
23
  import {
24
24
  AgentvProvider
25
25
  } from "./chunk-W5YDZWT4.js";
@@ -149,9 +149,9 @@ function mergeExecutionMetrics(computed, metrics) {
149
149
  }
150
150
 
151
151
  // src/evaluation/yaml-parser.ts
152
- import { readFile as readFile7 } from "node:fs/promises";
153
- import path8 from "node:path";
154
- import micromatch3 from "micromatch";
152
+ import { readFile as readFile6 } from "node:fs/promises";
153
+ import path7 from "node:path";
154
+ import micromatch2 from "micromatch";
155
155
  import { parse as parse2 } from "yaml";
156
156
 
157
157
  // src/evaluation/loaders/agent-skills-parser.ts
@@ -225,7 +225,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
225
225
  input_segments: [{ type: "text", value: prompt }],
226
226
  expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
227
227
  reference_answer: evalCase.expected_output,
228
- guideline_paths: [],
229
228
  file_paths: filePaths,
230
229
  criteria: evalCase.expected_output ?? "",
231
230
  assertions,
@@ -239,7 +238,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
239
238
  // src/evaluation/loaders/config-loader.ts
240
239
  import { readFile as readFile2 } from "node:fs/promises";
241
240
  import path3 from "node:path";
242
- import micromatch from "micromatch";
243
241
  import { parse } from "yaml";
244
242
 
245
243
  // src/evaluation/loaders/file-resolver.ts
@@ -370,15 +368,6 @@ async function loadConfig(evalFilePath, repoRoot) {
370
368
  logWarning(`Invalid required_version in ${configPath}, expected string`);
371
369
  continue;
372
370
  }
373
- const guidelinePatterns = config.guideline_patterns;
374
- if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
375
- logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
376
- continue;
377
- }
378
- if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
379
- logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
380
- continue;
381
- }
382
371
  const evalPatterns = config.eval_patterns;
383
372
  if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
384
373
  logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
@@ -394,7 +383,6 @@ async function loadConfig(evalFilePath, repoRoot) {
394
383
  );
395
384
  return {
396
385
  required_version: requiredVersion,
397
- guideline_patterns: guidelinePatterns,
398
386
  eval_patterns: evalPatterns,
399
387
  execution: executionDefaults
400
388
  };
@@ -406,11 +394,6 @@ async function loadConfig(evalFilePath, repoRoot) {
406
394
  }
407
395
  return null;
408
396
  }
409
- function isGuidelineFile(filePath, patterns) {
410
- const normalized = filePath.split("\\").join("/");
411
- const patternsToUse = patterns ?? [];
412
- return micromatch.isMatch(normalized, patternsToUse);
413
- }
414
397
  function extractTargetFromSuite(suite) {
415
398
  const execution = suite.execution;
416
399
  if (execution && typeof execution === "object" && !Array.isArray(execution)) {
@@ -437,6 +420,17 @@ function extractTargetsFromSuite(suite) {
437
420
  }
438
421
  return void 0;
439
422
  }
423
+ function extractWorkersFromSuite(suite) {
424
+ const execution = suite.execution;
425
+ if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
426
+ return void 0;
427
+ }
428
+ const workers = execution.workers;
429
+ if (typeof workers === "number" && Number.isInteger(workers) && workers >= 1 && workers <= 50) {
430
+ return workers;
431
+ }
432
+ return void 0;
433
+ }
440
434
  function extractTargetsFromTestCase(testCase) {
441
435
  const execution = testCase.execution;
442
436
  if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
@@ -1986,7 +1980,7 @@ function parseInlineRubrics(rawRubrics) {
1986
1980
  // src/evaluation/loaders/jsonl-parser.ts
1987
1981
  import { readFile as readFile5 } from "node:fs/promises";
1988
1982
  import path6 from "node:path";
1989
- import micromatch2 from "micromatch";
1983
+ import micromatch from "micromatch";
1990
1984
  import { parse as parseYaml } from "yaml";
1991
1985
 
1992
1986
  // src/evaluation/loaders/message-processor.ts
@@ -2013,10 +2007,6 @@ function formatSegment(segment, mode = "lm") {
2013
2007
  if (type === "text") {
2014
2008
  return asString2(segment.value);
2015
2009
  }
2016
- if (type === "guideline_ref") {
2017
- const refPath = asString2(segment.path);
2018
- return refPath ? `<Attached: ${refPath}>` : void 0;
2019
- }
2020
2010
  if (type === "file") {
2021
2011
  const filePath = asString2(segment.path);
2022
2012
  if (!filePath) {
@@ -2039,9 +2029,6 @@ function hasVisibleContent(segments) {
2039
2029
  const value = asString2(segment.value);
2040
2030
  return value !== void 0 && value.trim().length > 0;
2041
2031
  }
2042
- if (type === "guideline_ref") {
2043
- return false;
2044
- }
2045
2032
  if (type === "file") {
2046
2033
  const text = asString2(segment.text);
2047
2034
  return text !== void 0 && text.trim().length > 0;
@@ -2057,17 +2044,7 @@ function asString2(value) {
2057
2044
  var ANSI_YELLOW4 = "\x1B[33m";
2058
2045
  var ANSI_RESET5 = "\x1B[0m";
2059
2046
  async function processMessages(options) {
2060
- const {
2061
- messages,
2062
- searchRoots,
2063
- repoRootPath,
2064
- guidelinePatterns,
2065
- guidelinePaths,
2066
- treatFileSegmentsAsGuidelines,
2067
- textParts,
2068
- messageType,
2069
- verbose
2070
- } = options;
2047
+ const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
2071
2048
  const segments = [];
2072
2049
  for (const message of messages) {
2073
2050
  const content = message.content;
@@ -2111,21 +2088,6 @@ async function processMessages(options) {
2111
2088
  }
2112
2089
  try {
2113
2090
  const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2114
- const classifyAsGuideline = shouldTreatAsGuideline({
2115
- messageType,
2116
- resolvedPath,
2117
- repoRootPath,
2118
- guidelinePatterns,
2119
- treatFileSegmentsAsGuidelines
2120
- });
2121
- if (classifyAsGuideline && guidelinePaths) {
2122
- guidelinePaths.push(path5.resolve(resolvedPath));
2123
- if (verbose) {
2124
- console.log(` [Guideline] Found: ${displayPath}`);
2125
- console.log(` Resolved to: ${resolvedPath}`);
2126
- }
2127
- continue;
2128
- }
2129
2091
  segments.push({
2130
2092
  type: "file",
2131
2093
  path: displayPath,
@@ -2153,26 +2115,6 @@ async function processMessages(options) {
2153
2115
  }
2154
2116
  return segments;
2155
2117
  }
2156
- function shouldTreatAsGuideline(options) {
2157
- const {
2158
- messageType,
2159
- resolvedPath,
2160
- repoRootPath,
2161
- guidelinePatterns,
2162
- treatFileSegmentsAsGuidelines
2163
- } = options;
2164
- if (messageType !== "input") {
2165
- return false;
2166
- }
2167
- if (treatFileSegmentsAsGuidelines) {
2168
- return true;
2169
- }
2170
- if (!guidelinePatterns || guidelinePatterns.length === 0) {
2171
- return false;
2172
- }
2173
- const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
2174
- return isGuidelineFile(relativeToRepo, guidelinePatterns);
2175
- }
2176
2118
  function asString3(value) {
2177
2119
  return typeof value === "string" ? value : void 0;
2178
2120
  }
@@ -2373,7 +2315,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
2373
2315
  }
2374
2316
  return {
2375
2317
  description: asString4(parsed.description),
2376
- dataset: asString4(parsed.dataset),
2318
+ name: asString4(parsed.name),
2377
2319
  execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
2378
2320
  evaluator: parsed.evaluator
2379
2321
  };
@@ -2409,20 +2351,18 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2409
2351
  const absoluteTestPath = path6.resolve(evalFilePath);
2410
2352
  const repoRootPath = resolveToAbsolutePath(repoRoot);
2411
2353
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
2412
- const config = await loadConfig(absoluteTestPath, repoRootPath);
2413
- const guidelinePatterns = config?.guideline_patterns;
2414
2354
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
2415
2355
  const rawFile = await readFile5(absoluteTestPath, "utf8");
2416
2356
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
2417
- const fallbackDataset = path6.basename(absoluteTestPath, ".jsonl") || "eval";
2418
- const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
2357
+ const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
2358
+ const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
2419
2359
  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
2420
2360
  const globalExecution = sidecar.execution;
2421
2361
  if (verbose) {
2422
2362
  console.log(`
2423
2363
  [JSONL Dataset: ${evalFilePath}]`);
2424
2364
  console.log(` Cases: ${rawCases.length}`);
2425
- console.log(` Dataset name: ${datasetName}`);
2365
+ console.log(` Eval set: ${evalSetName}`);
2426
2366
  if (sidecar.description) {
2427
2367
  console.log(` Description: ${sidecar.description}`);
2428
2368
  }
@@ -2432,7 +2372,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2432
2372
  const evalcase = rawCases[lineIndex];
2433
2373
  const lineNumber = lineIndex + 1;
2434
2374
  const id = asString4(evalcase.id);
2435
- if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
2375
+ if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
2436
2376
  continue;
2437
2377
  }
2438
2378
  const conversationId = asString4(evalcase.conversation_id);
@@ -2455,14 +2395,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2455
2395
  continue;
2456
2396
  }
2457
2397
  const hasExpectedMessages = expectedMessages.length > 0;
2458
- const guidelinePaths = [];
2459
2398
  const inputTextParts = [];
2460
2399
  const inputSegments = await processMessages({
2461
2400
  messages: inputMessages,
2462
2401
  searchRoots,
2463
2402
  repoRootPath,
2464
- guidelinePatterns,
2465
- guidelinePaths,
2466
2403
  textParts: inputTextParts,
2467
2404
  messageType: "input",
2468
2405
  verbose
@@ -2512,40 +2449,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2512
2449
  userFilePaths.push(segment.resolvedPath);
2513
2450
  }
2514
2451
  }
2515
- const allFilePaths = [
2516
- ...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
2517
- ...userFilePaths
2518
- ];
2519
2452
  const testCase = {
2520
2453
  id,
2521
- dataset: datasetName,
2454
+ eval_set: evalSetName,
2522
2455
  conversation_id: conversationId,
2523
2456
  question,
2524
2457
  input: inputMessages,
2525
2458
  input_segments: inputSegments,
2526
2459
  expected_output: outputSegments,
2527
2460
  reference_answer: referenceAnswer,
2528
- guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
2529
- guideline_patterns: guidelinePatterns,
2530
- file_paths: allFilePaths,
2461
+ file_paths: userFilePaths,
2531
2462
  criteria: outcome ?? "",
2532
2463
  evaluator: evalCaseEvaluatorKind,
2533
2464
  assertions: evaluators
2534
2465
  };
2535
- if (verbose) {
2536
- console.log(`
2537
- [Test: ${id}]`);
2538
- if (testCase.guideline_paths.length > 0) {
2539
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
2540
- for (const guidelinePath of testCase.guideline_paths) {
2541
- console.log(` - ${guidelinePath}`);
2542
- }
2543
- } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
2544
- console.log(" No guidelines found (guideline_patterns not configured)");
2545
- } else {
2546
- console.log(" No guidelines found");
2547
- }
2548
- }
2549
2466
  results.push(testCase);
2550
2467
  }
2551
2468
  return results;
@@ -2603,30 +2520,7 @@ function parseMetadata(suite) {
2603
2520
  }
2604
2521
 
2605
2522
  // src/evaluation/formatting/prompt-builder.ts
2606
- import { readFile as readFile6 } from "node:fs/promises";
2607
- import path7 from "node:path";
2608
- var ANSI_YELLOW6 = "\x1B[33m";
2609
- var ANSI_RESET7 = "\x1B[0m";
2610
2523
  async function buildPromptInputs(testCase, mode = "lm") {
2611
- const guidelineParts = [];
2612
- for (const rawPath of testCase.guideline_paths) {
2613
- const absolutePath = path7.resolve(rawPath);
2614
- if (!await fileExists2(absolutePath)) {
2615
- logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
2616
- continue;
2617
- }
2618
- try {
2619
- const content = (await readFile6(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
2620
- guidelineParts.push({
2621
- content,
2622
- isFile: true,
2623
- displayPath: path7.basename(absolutePath)
2624
- });
2625
- } catch (error) {
2626
- logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
2627
- }
2628
- }
2629
- const guidelines = formatFileContents(guidelineParts);
2630
2524
  const segmentsByMessage = [];
2631
2525
  const fileContentsByPath = /* @__PURE__ */ new Map();
2632
2526
  for (const segment of testCase.input_segments) {
@@ -2651,10 +2545,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
2651
2545
  if (type === "file") {
2652
2546
  const value = asString5(segment.value);
2653
2547
  if (!value) continue;
2654
- if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
2655
- messageSegments.push({ type: "guideline_ref", path: value });
2656
- continue;
2657
- }
2658
2548
  const fileText = fileContentsByPath.get(value);
2659
2549
  if (fileText !== void 0) {
2660
2550
  messageSegments.push({ type: "file", text: fileText, path: value });
@@ -2703,10 +2593,6 @@ ${messageContent}`);
2703
2593
  } else {
2704
2594
  const questionParts = [];
2705
2595
  for (const segment of testCase.input_segments) {
2706
- if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
2707
- questionParts.push(`<Attached: ${segment.path}>`);
2708
- continue;
2709
- }
2710
2596
  const formattedContent = formatSegment(segment, mode);
2711
2597
  if (formattedContent) {
2712
2598
  questionParts.push(formattedContent);
@@ -2717,11 +2603,9 @@ ${messageContent}`);
2717
2603
  const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
2718
2604
  messages: testCase.input,
2719
2605
  segmentsByMessage,
2720
- guidelinePatterns: testCase.guideline_patterns,
2721
- guidelineContent: guidelines,
2722
2606
  mode
2723
2607
  }) : void 0;
2724
- return { question, guidelines, chatPrompt };
2608
+ return { question, chatPrompt };
2725
2609
  }
2726
2610
  function needsRoleMarkers(messages, processedSegmentsByMessage) {
2727
2611
  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
@@ -2736,14 +2620,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
2736
2620
  return messagesWithContent > 1;
2737
2621
  }
2738
2622
  function buildChatPromptFromSegments(options) {
2739
- const {
2740
- messages,
2741
- segmentsByMessage,
2742
- guidelinePatterns,
2743
- guidelineContent,
2744
- systemPrompt,
2745
- mode = "lm"
2746
- } = options;
2623
+ const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
2747
2624
  if (messages.length === 0) {
2748
2625
  return void 0;
2749
2626
  }
@@ -2751,11 +2628,6 @@ function buildChatPromptFromSegments(options) {
2751
2628
  if (systemPrompt && systemPrompt.trim().length > 0) {
2752
2629
  systemSegments.push(systemPrompt.trim());
2753
2630
  }
2754
- if (guidelineContent && guidelineContent.trim().length > 0) {
2755
- systemSegments.push(`[[ ## Guidelines ## ]]
2756
-
2757
- ${guidelineContent.trim()}`);
2758
- }
2759
2631
  let startIndex = 0;
2760
2632
  while (startIndex < messages.length && messages[startIndex].role === "system") {
2761
2633
  const segments = segmentsByMessage[startIndex];
@@ -2791,15 +2663,8 @@ ${guidelineContent.trim()}`);
2791
2663
  contentParts.push("@[Tool]:");
2792
2664
  }
2793
2665
  for (const segment of segments) {
2794
- if (segment.type === "guideline_ref") {
2795
- continue;
2796
- }
2797
2666
  const formatted = formatSegment(segment, mode);
2798
2667
  if (formatted) {
2799
- const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
2800
- if (isGuidelineRef) {
2801
- continue;
2802
- }
2803
2668
  contentParts.push(formatted);
2804
2669
  }
2805
2670
  }
@@ -2817,30 +2682,27 @@ ${guidelineContent.trim()}`);
2817
2682
  function asString5(value) {
2818
2683
  return typeof value === "string" ? value : void 0;
2819
2684
  }
2820
- function logWarning5(message) {
2821
- console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
2822
- }
2823
2685
 
2824
2686
  // src/evaluation/yaml-parser.ts
2825
- var ANSI_YELLOW7 = "\x1B[33m";
2687
+ var ANSI_YELLOW6 = "\x1B[33m";
2826
2688
  var ANSI_RED3 = "\x1B[31m";
2827
- var ANSI_RESET8 = "\x1B[0m";
2689
+ var ANSI_RESET7 = "\x1B[0m";
2828
2690
  function resolveTests(suite) {
2829
2691
  if (suite.tests !== void 0) return suite.tests;
2830
2692
  if (suite.eval_cases !== void 0) {
2831
- logWarning6("'eval_cases' is deprecated. Use 'tests' instead.");
2693
+ logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
2832
2694
  return suite.eval_cases;
2833
2695
  }
2834
2696
  if (suite.evalcases !== void 0) {
2835
- logWarning6("'evalcases' is deprecated. Use 'tests' instead.");
2697
+ logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
2836
2698
  return suite.evalcases;
2837
2699
  }
2838
2700
  return void 0;
2839
2701
  }
2840
2702
  async function readTestSuiteMetadata(testFilePath) {
2841
2703
  try {
2842
- const absolutePath = path8.resolve(testFilePath);
2843
- const content = await readFile7(absolutePath, "utf8");
2704
+ const absolutePath = path7.resolve(testFilePath);
2705
+ const content = await readFile6(absolutePath, "utf8");
2844
2706
  const parsed = interpolateEnv(parse2(content), process.env);
2845
2707
  if (!isJsonObject(parsed)) {
2846
2708
  return {};
@@ -2869,6 +2731,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
2869
2731
  tests,
2870
2732
  trials: extractTrialsConfig(parsed),
2871
2733
  targets: extractTargetsFromSuite(parsed),
2734
+ workers: extractWorkersFromSuite(parsed),
2872
2735
  cacheConfig: extractCacheConfig(parsed),
2873
2736
  totalBudgetUsd: extractTotalBudgetUsd(parsed),
2874
2737
  ...metadata !== void 0 && { metadata },
@@ -2891,26 +2754,25 @@ var loadEvalCases = loadTests;
2891
2754
  async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2892
2755
  const verbose = options?.verbose ?? false;
2893
2756
  const filterPattern = options?.filter;
2894
- const absoluteTestPath = path8.resolve(evalFilePath);
2757
+ const absoluteTestPath = path7.resolve(evalFilePath);
2895
2758
  const repoRootPath = resolveToAbsolutePath(repoRoot);
2896
2759
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
2897
2760
  const config = await loadConfig(absoluteTestPath, repoRootPath);
2898
- const guidelinePatterns = config?.guideline_patterns;
2899
- const rawFile = await readFile7(absoluteTestPath, "utf8");
2761
+ const rawFile = await readFile6(absoluteTestPath, "utf8");
2900
2762
  const interpolated = interpolateEnv(parse2(rawFile), process.env);
2901
2763
  if (!isJsonObject(interpolated)) {
2902
2764
  throw new Error(`Invalid test file format: ${evalFilePath}`);
2903
2765
  }
2904
2766
  const suite = interpolated;
2905
- const datasetNameFromSuite = asString6(suite.dataset)?.trim();
2906
- const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
2907
- const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
2767
+ const evalSetNameFromSuite = asString6(suite.name)?.trim();
2768
+ const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
2769
+ const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
2908
2770
  const rawTestcases = resolveTests(suite);
2909
2771
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
2910
- const evalFileDir = path8.dirname(absoluteTestPath);
2772
+ const evalFileDir = path7.dirname(absoluteTestPath);
2911
2773
  let expandedTestcases;
2912
2774
  if (typeof rawTestcases === "string") {
2913
- const externalPath = path8.resolve(evalFileDir, rawTestcases);
2775
+ const externalPath = path7.resolve(evalFileDir, rawTestcases);
2914
2776
  expandedTestcases = await loadCasesFromFile(externalPath);
2915
2777
  } else if (Array.isArray(rawTestcases)) {
2916
2778
  expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
@@ -2924,18 +2786,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2924
2786
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
2925
2787
  const suiteAssertions = suite.assertions ?? suite.assert;
2926
2788
  if (suite.assert !== void 0 && suite.assertions === void 0) {
2927
- logWarning6("'assert' is deprecated at the suite level. Use 'assertions' instead.");
2789
+ logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
2928
2790
  }
2929
2791
  const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
2930
2792
  const results = [];
2931
2793
  for (const rawEvalcase of expandedTestcases) {
2932
2794
  if (!isJsonObject(rawEvalcase)) {
2933
- logWarning6("Skipping invalid test entry (expected object)");
2795
+ logWarning5("Skipping invalid test entry (expected object)");
2934
2796
  continue;
2935
2797
  }
2936
2798
  const evalcase = rawEvalcase;
2937
2799
  const id = asString6(evalcase.id);
2938
- if (filterPattern && (!id || !micromatch3.isMatch(id, filterPattern))) {
2800
+ if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
2939
2801
  continue;
2940
2802
  }
2941
2803
  const conversationId = asString6(evalcase.conversation_id);
@@ -2943,7 +2805,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2943
2805
  if (!outcome && evalcase.expected_outcome !== void 0) {
2944
2806
  outcome = asString6(evalcase.expected_outcome);
2945
2807
  if (outcome) {
2946
- logWarning6(
2808
+ logWarning5(
2947
2809
  `Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
2948
2810
  );
2949
2811
  }
@@ -2963,15 +2825,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2963
2825
  const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
2964
2826
  const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
2965
2827
  const hasExpectedMessages = expectedMessages.length > 0;
2966
- const guidelinePaths = [];
2967
2828
  const inputTextParts = [];
2968
2829
  const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
2969
2830
  messages: effectiveSuiteInputMessages,
2970
2831
  searchRoots,
2971
2832
  repoRootPath,
2972
- guidelinePatterns,
2973
- guidelinePaths,
2974
- treatFileSegmentsAsGuidelines: true,
2975
2833
  textParts: inputTextParts,
2976
2834
  messageType: "input",
2977
2835
  verbose
@@ -2980,8 +2838,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2980
2838
  messages: testInputMessages,
2981
2839
  searchRoots,
2982
2840
  repoRootPath,
2983
- guidelinePatterns,
2984
- guidelinePaths,
2985
2841
  textParts: inputTextParts,
2986
2842
  messageType: "input",
2987
2843
  verbose
@@ -3030,26 +2886,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3030
2886
  userFilePaths.push(segment.resolvedPath);
3031
2887
  }
3032
2888
  }
3033
- const allFilePaths = [
3034
- ...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
3035
- ...userFilePaths
3036
- ];
3037
2889
  const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
3038
2890
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
3039
2891
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
3040
2892
  const caseTargets = extractTargetsFromTestCase(evalcase);
3041
2893
  const testCase = {
3042
2894
  id,
3043
- dataset: datasetName,
2895
+ eval_set: evalSetName,
3044
2896
  conversation_id: conversationId,
3045
2897
  question,
3046
2898
  input: inputMessages,
3047
2899
  input_segments: inputSegments,
3048
2900
  expected_output: outputSegments,
3049
2901
  reference_answer: referenceAnswer,
3050
- guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
3051
- guideline_patterns: guidelinePatterns,
3052
- file_paths: allFilePaths,
2902
+ file_paths: userFilePaths,
3053
2903
  criteria: outcome ?? "",
3054
2904
  evaluator: evalCaseEvaluatorKind,
3055
2905
  assertions: evaluators,
@@ -3057,20 +2907,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3057
2907
  metadata,
3058
2908
  targets: caseTargets
3059
2909
  };
3060
- if (verbose) {
3061
- console.log(`
3062
- [Test: ${id}]`);
3063
- if (testCase.guideline_paths.length > 0) {
3064
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
3065
- for (const guidelinePath of testCase.guideline_paths) {
3066
- console.log(` - ${guidelinePath}`);
3067
- }
3068
- } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
3069
- console.log(" No guidelines found (guideline_patterns not configured)");
3070
- } else {
3071
- console.log(" No guidelines found");
3072
- }
3073
- }
3074
2910
  results.push(testCase);
3075
2911
  }
3076
2912
  return { tests: results, parsed: suite };
@@ -3089,7 +2925,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
3089
2925
  if (!isJsonObject(raw)) return void 0;
3090
2926
  const obj = raw;
3091
2927
  if (obj.script !== void 0 && obj.command === void 0) {
3092
- logWarning6("'script' is deprecated. Use 'command' instead.");
2928
+ logWarning5("'script' is deprecated. Use 'command' instead.");
3093
2929
  }
3094
2930
  const commandSource = obj.command ?? obj.script;
3095
2931
  if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
@@ -3097,8 +2933,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
3097
2933
  if (commandArr.length === 0) return void 0;
3098
2934
  const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
3099
2935
  let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
3100
- if (cwd && !path8.isAbsolute(cwd)) {
3101
- cwd = path8.resolve(evalFileDir, cwd);
2936
+ if (cwd && !path7.isAbsolute(cwd)) {
2937
+ cwd = path7.resolve(evalFileDir, cwd);
3102
2938
  }
3103
2939
  const config = { command: commandArr };
3104
2940
  if (timeoutMs !== void 0) {
@@ -3188,10 +3024,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
3188
3024
  }
3189
3025
  async function resolveWorkspaceConfig(raw, evalFileDir) {
3190
3026
  if (typeof raw === "string") {
3191
- const workspaceFilePath = path8.resolve(evalFileDir, raw);
3027
+ const workspaceFilePath = path7.resolve(evalFileDir, raw);
3192
3028
  let content;
3193
3029
  try {
3194
- content = await readFile7(workspaceFilePath, "utf8");
3030
+ content = await readFile6(workspaceFilePath, "utf8");
3195
3031
  } catch {
3196
3032
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
3197
3033
  }
@@ -3201,7 +3037,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
3201
3037
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
3202
3038
  );
3203
3039
  }
3204
- const workspaceFileDir = path8.dirname(workspaceFilePath);
3040
+ const workspaceFileDir = path7.dirname(workspaceFilePath);
3205
3041
  return parseWorkspaceConfig(parsed, workspaceFileDir);
3206
3042
  }
3207
3043
  return parseWorkspaceConfig(raw, evalFileDir);
@@ -3221,8 +3057,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
3221
3057
  throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
3222
3058
  }
3223
3059
  let template = typeof obj.template === "string" ? obj.template : void 0;
3224
- if (template && !path8.isAbsolute(template)) {
3225
- template = path8.resolve(evalFileDir, template);
3060
+ if (template && !path7.isAbsolute(template)) {
3061
+ template = path7.resolve(evalFileDir, template);
3226
3062
  }
3227
3063
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
3228
3064
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
@@ -3272,28 +3108,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
3272
3108
  function asString6(value) {
3273
3109
  return typeof value === "string" ? value : void 0;
3274
3110
  }
3275
- function logWarning6(message, details) {
3111
+ function logWarning5(message, details) {
3276
3112
  if (details && details.length > 0) {
3277
3113
  const detailBlock = details.join("\n");
3278
- console.warn(`${ANSI_YELLOW7}Warning: ${message}
3279
- ${detailBlock}${ANSI_RESET8}`);
3114
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}
3115
+ ${detailBlock}${ANSI_RESET7}`);
3280
3116
  } else {
3281
- console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
3117
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
3282
3118
  }
3283
3119
  }
3284
3120
  function logError3(message, details) {
3285
3121
  if (details && details.length > 0) {
3286
3122
  const detailBlock = details.join("\n");
3287
3123
  console.error(`${ANSI_RED3}Error: ${message}
3288
- ${detailBlock}${ANSI_RESET8}`);
3124
+ ${detailBlock}${ANSI_RESET7}`);
3289
3125
  } else {
3290
- console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET8}`);
3126
+ console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET7}`);
3291
3127
  }
3292
3128
  }
3293
3129
 
3294
3130
  // src/evaluation/loaders/eval-yaml-transpiler.ts
3295
3131
  import { readFileSync } from "node:fs";
3296
- import path9 from "node:path";
3132
+ import path8 from "node:path";
3297
3133
  import { parse as parse3 } from "yaml";
3298
3134
  function codeGraderInstruction(graderName, description) {
3299
3135
  const desc = description ? ` This grader: ${description}.` : "";
@@ -3538,7 +3374,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
3538
3374
  function transpileEvalYamlFile(evalYamlPath) {
3539
3375
  const content = readFileSync(evalYamlPath, "utf8");
3540
3376
  const parsed = parse3(content);
3541
- return transpileEvalYaml(parsed, path9.basename(evalYamlPath));
3377
+ return transpileEvalYaml(parsed, path8.basename(evalYamlPath));
3542
3378
  }
3543
3379
  function getOutputFilenames(result) {
3544
3380
  const names = /* @__PURE__ */ new Map();
@@ -3773,10 +3609,10 @@ function buildChatPrompt(request) {
3773
3609
  if (hasSystemMessage) {
3774
3610
  return provided;
3775
3611
  }
3776
- const systemContent2 = resolveSystemContent(request, false);
3612
+ const systemContent2 = resolveSystemContent(request);
3777
3613
  return [{ role: "system", content: systemContent2 }, ...provided];
3778
3614
  }
3779
- const systemContent = resolveSystemContent(request, true);
3615
+ const systemContent = resolveSystemContent(request);
3780
3616
  const userContent = request.question.trim();
3781
3617
  const prompt = [
3782
3618
  { role: "system", content: systemContent },
@@ -3784,18 +3620,13 @@ function buildChatPrompt(request) {
3784
3620
  ];
3785
3621
  return prompt;
3786
3622
  }
3787
- function resolveSystemContent(request, includeGuidelines) {
3623
+ function resolveSystemContent(request) {
3788
3624
  const systemSegments = [];
3789
3625
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
3790
3626
  systemSegments.push(request.systemPrompt.trim());
3791
3627
  } else {
3792
3628
  systemSegments.push(DEFAULT_SYSTEM_PROMPT);
3793
3629
  }
3794
- if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
3795
- systemSegments.push(`[[ ## Guidelines ## ]]
3796
-
3797
- ${request.guidelines.trim()}`);
3798
- }
3799
3630
  return systemSegments.join("\n\n");
3800
3631
  }
3801
3632
  function toModelMessages(chatPrompt) {
@@ -3979,7 +3810,7 @@ import { spawn } from "node:child_process";
3979
3810
  import { randomUUID } from "node:crypto";
3980
3811
  import { createWriteStream } from "node:fs";
3981
3812
  import { mkdir } from "node:fs/promises";
3982
- import path11 from "node:path";
3813
+ import path10 from "node:path";
3983
3814
 
3984
3815
  // src/evaluation/providers/claude-log-tracker.ts
3985
3816
  var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
@@ -4035,17 +3866,11 @@ function subscribeToClaudeLogEntries(listener) {
4035
3866
  }
4036
3867
 
4037
3868
  // src/evaluation/providers/preread.ts
4038
- import path10 from "node:path";
4039
- function buildPromptDocument(request, inputFiles, options) {
3869
+ import path9 from "node:path";
3870
+ function buildPromptDocument(request, inputFiles) {
4040
3871
  const parts = [];
4041
- const guidelineFiles = collectGuidelineFiles(
4042
- inputFiles,
4043
- options?.guidelinePatterns ?? request.guideline_patterns,
4044
- options?.guidelineOverrides
4045
- );
4046
3872
  const inputFilesList = collectInputFiles(inputFiles);
4047
- const nonGuidelineInputFiles = inputFilesList.filter((file) => !guidelineFiles.includes(file));
4048
- const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
3873
+ const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
4049
3874
  if (prereadBlock.length > 0) {
4050
3875
  parts.push("\n", prereadBlock);
4051
3876
  }
@@ -4058,62 +3883,36 @@ function normalizeInputFiles(inputFiles) {
4058
3883
  }
4059
3884
  const deduped = /* @__PURE__ */ new Map();
4060
3885
  for (const inputFile of inputFiles) {
4061
- const absolutePath = path10.resolve(inputFile);
3886
+ const absolutePath = path9.resolve(inputFile);
4062
3887
  if (!deduped.has(absolutePath)) {
4063
3888
  deduped.set(absolutePath, absolutePath);
4064
3889
  }
4065
3890
  }
4066
3891
  return Array.from(deduped.values());
4067
3892
  }
4068
- function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
4069
- if (!inputFiles || inputFiles.length === 0) {
4070
- return [];
4071
- }
4072
- const unique = /* @__PURE__ */ new Map();
4073
- for (const inputFile of inputFiles) {
4074
- const absolutePath = path10.resolve(inputFile);
4075
- if (overrides?.has(absolutePath)) {
4076
- if (!unique.has(absolutePath)) {
4077
- unique.set(absolutePath, absolutePath);
4078
- }
4079
- continue;
4080
- }
4081
- const normalized = absolutePath.split(path10.sep).join("/");
4082
- if (isGuidelineFile(normalized, guidelinePatterns)) {
4083
- if (!unique.has(absolutePath)) {
4084
- unique.set(absolutePath, absolutePath);
4085
- }
4086
- }
4087
- }
4088
- return Array.from(unique.values());
4089
- }
4090
3893
  function collectInputFiles(inputFiles) {
4091
3894
  if (!inputFiles || inputFiles.length === 0) {
4092
3895
  return [];
4093
3896
  }
4094
3897
  const unique = /* @__PURE__ */ new Map();
4095
3898
  for (const inputFile of inputFiles) {
4096
- const absolutePath = path10.resolve(inputFile);
3899
+ const absolutePath = path9.resolve(inputFile);
4097
3900
  if (!unique.has(absolutePath)) {
4098
3901
  unique.set(absolutePath, absolutePath);
4099
3902
  }
4100
3903
  }
4101
3904
  return Array.from(unique.values());
4102
3905
  }
4103
- function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
4104
- if (guidelineFiles.length === 0 && inputFiles.length === 0) {
3906
+ function buildMandatoryPrereadBlock(inputFiles) {
3907
+ if (inputFiles.length === 0) {
4105
3908
  return "";
4106
3909
  }
4107
3910
  const buildList = (files) => files.map((absolutePath) => {
4108
- const fileName = path10.basename(absolutePath);
3911
+ const fileName = path9.basename(absolutePath);
4109
3912
  const fileUri = pathToFileUri(absolutePath);
4110
3913
  return `* [${fileName}](${fileUri})`;
4111
3914
  });
4112
3915
  const sections = [];
4113
- if (guidelineFiles.length > 0) {
4114
- sections.push(`Read all guideline files:
4115
- ${buildList(guidelineFiles).join("\n")}.`);
4116
- }
4117
3916
  if (inputFiles.length > 0) {
4118
3917
  sections.push(`Read all input files:
4119
3918
  ${buildList(inputFiles).join("\n")}.`);
@@ -4125,7 +3924,7 @@ ${buildList(inputFiles).join("\n")}.`);
4125
3924
  return sections.join("\n");
4126
3925
  }
4127
3926
  function pathToFileUri(filePath) {
4128
- const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
3927
+ const absolutePath = path9.isAbsolute(filePath) ? filePath : path9.resolve(filePath);
4129
3928
  const normalizedPath = absolutePath.replace(/\\/g, "/");
4130
3929
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
4131
3930
  return `file:///${normalizedPath}`;
@@ -4272,10 +4071,10 @@ var ClaudeCliProvider = class {
4272
4071
  }
4273
4072
  resolveCwd(cwdOverride) {
4274
4073
  if (cwdOverride) {
4275
- return path11.resolve(cwdOverride);
4074
+ return path10.resolve(cwdOverride);
4276
4075
  }
4277
4076
  if (this.config.cwd) {
4278
- return path11.resolve(this.config.cwd);
4077
+ return path10.resolve(this.config.cwd);
4279
4078
  }
4280
4079
  return void 0;
4281
4080
  }
@@ -4285,9 +4084,9 @@ var ClaudeCliProvider = class {
4285
4084
  return void 0;
4286
4085
  }
4287
4086
  if (this.config.logDir) {
4288
- return path11.resolve(this.config.logDir);
4087
+ return path10.resolve(this.config.logDir);
4289
4088
  }
4290
- return path11.join(process.cwd(), ".agentv", "logs", "claude-cli");
4089
+ return path10.join(process.cwd(), ".agentv", "logs", "claude-cli");
4291
4090
  }
4292
4091
  async createStreamLogger(request) {
4293
4092
  const logDir = this.resolveLogDirectory();
@@ -4301,7 +4100,7 @@ var ClaudeCliProvider = class {
4301
4100
  console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
4302
4101
  return void 0;
4303
4102
  }
4304
- const filePath = path11.join(logDir, buildLogFilename(request, this.targetName));
4103
+ const filePath = path10.join(logDir, buildLogFilename(request, this.targetName));
4305
4104
  try {
4306
4105
  const logger = await ClaudeCliStreamLogger.create({
4307
4106
  filePath,
@@ -4622,7 +4421,7 @@ function tryParseJson(line) {
4622
4421
  import { randomUUID as randomUUID2 } from "node:crypto";
4623
4422
  import { createWriteStream as createWriteStream2 } from "node:fs";
4624
4423
  import { mkdir as mkdir2 } from "node:fs/promises";
4625
- import path12 from "node:path";
4424
+ import path11 from "node:path";
4626
4425
  var claudeSdkModule = null;
4627
4426
  async function loadClaudeSdk() {
4628
4427
  if (!claudeSdkModule) {
@@ -4782,10 +4581,10 @@ var ClaudeSdkProvider = class {
4782
4581
  }
4783
4582
  resolveCwd(cwdOverride) {
4784
4583
  if (cwdOverride) {
4785
- return path12.resolve(cwdOverride);
4584
+ return path11.resolve(cwdOverride);
4786
4585
  }
4787
4586
  if (this.config.cwd) {
4788
- return path12.resolve(this.config.cwd);
4587
+ return path11.resolve(this.config.cwd);
4789
4588
  }
4790
4589
  return void 0;
4791
4590
  }
@@ -4795,9 +4594,9 @@ var ClaudeSdkProvider = class {
4795
4594
  return void 0;
4796
4595
  }
4797
4596
  if (this.config.logDir) {
4798
- return path12.resolve(this.config.logDir);
4597
+ return path11.resolve(this.config.logDir);
4799
4598
  }
4800
- return path12.join(process.cwd(), ".agentv", "logs", "claude");
4599
+ return path11.join(process.cwd(), ".agentv", "logs", "claude");
4801
4600
  }
4802
4601
  async createStreamLogger(request) {
4803
4602
  const logDir = this.resolveLogDirectory();
@@ -4811,7 +4610,7 @@ var ClaudeSdkProvider = class {
4811
4610
  console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
4812
4611
  return void 0;
4813
4612
  }
4814
- const filePath = path12.join(logDir, buildLogFilename2(request, this.targetName));
4613
+ const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
4815
4614
  try {
4816
4615
  const logger = await ClaudeStreamLogger.create({
4817
4616
  filePath,
@@ -5018,7 +4817,7 @@ function formatElapsed2(startedAt) {
5018
4817
  import { exec as execWithCallback } from "node:child_process";
5019
4818
  import fs from "node:fs/promises";
5020
4819
  import os from "node:os";
5021
- import path13 from "node:path";
4820
+ import path12 from "node:path";
5022
4821
  import { promisify } from "node:util";
5023
4822
  import { z as z2 } from "zod";
5024
4823
  var ToolCallSchema = z2.object({
@@ -5227,7 +5026,6 @@ var CliProvider = class {
5227
5026
  const { values: templateValues, promptFilePath } = await buildTemplateValues(
5228
5027
  {
5229
5028
  question: "",
5230
- guidelines: "",
5231
5029
  inputFiles: batchInputFiles,
5232
5030
  evalCaseId: "batch",
5233
5031
  attempt: 0
@@ -5460,7 +5258,6 @@ var CliProvider = class {
5460
5258
  const { values: templateValues, promptFilePath } = await buildTemplateValues(
5461
5259
  {
5462
5260
  question: "",
5463
- guidelines: "",
5464
5261
  inputFiles: [],
5465
5262
  evalCaseId: "healthcheck",
5466
5263
  attempt: 0
@@ -5501,7 +5298,6 @@ async function buildTemplateValues(request, config, outputFilePath) {
5501
5298
  values: {
5502
5299
  PROMPT: shellEscape(request.question ?? ""),
5503
5300
  PROMPT_FILE: shellEscape(promptFilePath),
5504
- GUIDELINES: shellEscape(request.guidelines ?? ""),
5505
5301
  EVAL_ID: shellEscape(request.evalCaseId ?? ""),
5506
5302
  ATTEMPT: shellEscape(String(request.attempt ?? 0)),
5507
5303
  FILES: formatFileList(inputFiles, config.filesFormat),
@@ -5523,7 +5319,7 @@ function normalizeInputFiles2(inputFiles) {
5523
5319
  }
5524
5320
  const unique = /* @__PURE__ */ new Map();
5525
5321
  for (const inputFile of inputFiles) {
5526
- const absolutePath = path13.resolve(inputFile);
5322
+ const absolutePath = path12.resolve(inputFile);
5527
5323
  if (!unique.has(absolutePath)) {
5528
5324
  unique.set(absolutePath, absolutePath);
5529
5325
  }
@@ -5537,7 +5333,7 @@ function formatFileList(files, template) {
5537
5333
  const formatter = template ?? "{path}";
5538
5334
  return files.map((filePath) => {
5539
5335
  const escapedPath = shellEscape(filePath);
5540
- const escapedName = shellEscape(path13.basename(filePath));
5336
+ const escapedName = shellEscape(path12.basename(filePath));
5541
5337
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
5542
5338
  }).join(" ");
5543
5339
  }
@@ -5561,7 +5357,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
5561
5357
  const safeEvalId = evalCaseId || "unknown";
5562
5358
  const timestamp = Date.now();
5563
5359
  const random = Math.random().toString(36).substring(2, 9);
5564
- return path13.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
5360
+ return path12.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
5565
5361
  }
5566
5362
  function formatTimeoutSuffix2(timeoutMs) {
5567
5363
  if (!timeoutMs || timeoutMs <= 0) {
@@ -5575,7 +5371,7 @@ function formatTimeoutSuffix2(timeoutMs) {
5575
5371
  import { randomUUID as randomUUID3 } from "node:crypto";
5576
5372
  import { createWriteStream as createWriteStream3 } from "node:fs";
5577
5373
  import { mkdir as mkdir3 } from "node:fs/promises";
5578
- import path14 from "node:path";
5374
+ import path13 from "node:path";
5579
5375
 
5580
5376
  // src/evaluation/providers/codex-log-tracker.ts
5581
5377
  var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
@@ -5810,10 +5606,10 @@ ${basePrompt}` : basePrompt;
5810
5606
  }
5811
5607
  resolveCwd(cwdOverride) {
5812
5608
  if (cwdOverride) {
5813
- return path14.resolve(cwdOverride);
5609
+ return path13.resolve(cwdOverride);
5814
5610
  }
5815
5611
  if (this.config.cwd) {
5816
- return path14.resolve(this.config.cwd);
5612
+ return path13.resolve(this.config.cwd);
5817
5613
  }
5818
5614
  return void 0;
5819
5615
  }
@@ -5823,9 +5619,9 @@ ${basePrompt}` : basePrompt;
5823
5619
  return void 0;
5824
5620
  }
5825
5621
  if (this.config.logDir) {
5826
- return path14.resolve(this.config.logDir);
5622
+ return path13.resolve(this.config.logDir);
5827
5623
  }
5828
- return path14.join(process.cwd(), ".agentv", "logs", "codex");
5624
+ return path13.join(process.cwd(), ".agentv", "logs", "codex");
5829
5625
  }
5830
5626
  async createStreamLogger(request) {
5831
5627
  const logDir = this.resolveLogDirectory();
@@ -5839,7 +5635,7 @@ ${basePrompt}` : basePrompt;
5839
5635
  console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
5840
5636
  return void 0;
5841
5637
  }
5842
- const filePath = path14.join(logDir, buildLogFilename3(request, this.targetName));
5638
+ const filePath = path13.join(logDir, buildLogFilename3(request, this.targetName));
5843
5639
  try {
5844
5640
  const logger = await CodexSdkStreamLogger.create({
5845
5641
  filePath,
@@ -5983,7 +5779,7 @@ function formatElapsed3(startedAt) {
5983
5779
  // src/evaluation/providers/copilot-cli.ts
5984
5780
  import { randomUUID as randomUUID5 } from "node:crypto";
5985
5781
  import { mkdir as mkdir4 } from "node:fs/promises";
5986
- import path16 from "node:path";
5782
+ import path15 from "node:path";
5987
5783
  import { Readable, Writable } from "node:stream";
5988
5784
  import { spawn as spawn2 } from "node:child_process";
5989
5785
  import * as acp from "@agentclientprotocol/sdk";
@@ -6045,7 +5841,7 @@ function subscribeToCopilotCliLogEntries(listener) {
6045
5841
  import { randomUUID as randomUUID4 } from "node:crypto";
6046
5842
  import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
6047
5843
  import { arch, platform } from "node:os";
6048
- import path15 from "node:path";
5844
+ import path14 from "node:path";
6049
5845
  import { fileURLToPath as fileURLToPath2 } from "node:url";
6050
5846
  function resolvePlatformCliPath() {
6051
5847
  const os3 = platform();
@@ -6069,7 +5865,7 @@ function resolvePlatformCliPath() {
6069
5865
  try {
6070
5866
  const resolved = import.meta.resolve(`${packageName}/package.json`);
6071
5867
  const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
6072
- const binaryPath = path15.join(path15.dirname(packageJsonPath), binaryName);
5868
+ const binaryPath = path14.join(path14.dirname(packageJsonPath), binaryName);
6073
5869
  if (existsSync(binaryPath)) {
6074
5870
  return binaryPath;
6075
5871
  }
@@ -6077,7 +5873,7 @@ function resolvePlatformCliPath() {
6077
5873
  }
6078
5874
  let searchDir = process.cwd();
6079
5875
  for (let i = 0; i < 10; i++) {
6080
- const standardPath = path15.join(
5876
+ const standardPath = path14.join(
6081
5877
  searchDir,
6082
5878
  "node_modules",
6083
5879
  ...packageName.split("/"),
@@ -6086,13 +5882,13 @@ function resolvePlatformCliPath() {
6086
5882
  if (existsSync(standardPath)) {
6087
5883
  return standardPath;
6088
5884
  }
6089
- const bunDir = path15.join(searchDir, "node_modules", ".bun");
5885
+ const bunDir = path14.join(searchDir, "node_modules", ".bun");
6090
5886
  const prefix = `@github+copilot-${osPart}-${archPart}@`;
6091
5887
  try {
6092
5888
  const entries = readdirSync(bunDir);
6093
5889
  for (const entry of entries) {
6094
5890
  if (entry.startsWith(prefix)) {
6095
- const candidate = path15.join(
5891
+ const candidate = path14.join(
6096
5892
  bunDir,
6097
5893
  entry,
6098
5894
  "node_modules",
@@ -6107,7 +5903,7 @@ function resolvePlatformCliPath() {
6107
5903
  }
6108
5904
  } catch {
6109
5905
  }
6110
- const parent = path15.dirname(searchDir);
5906
+ const parent = path14.dirname(searchDir);
6111
5907
  if (parent === searchDir) break;
6112
5908
  searchDir = parent;
6113
5909
  }
@@ -6445,10 +6241,10 @@ var CopilotCliProvider = class {
6445
6241
  }
6446
6242
  resolveCwd(cwdOverride) {
6447
6243
  if (cwdOverride) {
6448
- return path16.resolve(cwdOverride);
6244
+ return path15.resolve(cwdOverride);
6449
6245
  }
6450
6246
  if (this.config.cwd) {
6451
- return path16.resolve(this.config.cwd);
6247
+ return path15.resolve(this.config.cwd);
6452
6248
  }
6453
6249
  return void 0;
6454
6250
  }
@@ -6467,9 +6263,9 @@ var CopilotCliProvider = class {
6467
6263
  return void 0;
6468
6264
  }
6469
6265
  if (this.config.logDir) {
6470
- return path16.resolve(this.config.logDir);
6266
+ return path15.resolve(this.config.logDir);
6471
6267
  }
6472
- return path16.join(process.cwd(), ".agentv", "logs", "copilot-cli");
6268
+ return path15.join(process.cwd(), ".agentv", "logs", "copilot-cli");
6473
6269
  }
6474
6270
  async createStreamLogger(request) {
6475
6271
  const logDir = this.resolveLogDirectory();
@@ -6483,7 +6279,7 @@ var CopilotCliProvider = class {
6483
6279
  console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
6484
6280
  return void 0;
6485
6281
  }
6486
- const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
6282
+ const filePath = path15.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
6487
6283
  try {
6488
6284
  const logger = await CopilotStreamLogger.create(
6489
6285
  {
@@ -6578,7 +6374,7 @@ function summarizeAcpEvent(eventType, data) {
6578
6374
  // src/evaluation/providers/copilot-sdk.ts
6579
6375
  import { randomUUID as randomUUID6 } from "node:crypto";
6580
6376
  import { mkdir as mkdir5 } from "node:fs/promises";
6581
- import path17 from "node:path";
6377
+ import path16 from "node:path";
6582
6378
 
6583
6379
  // src/evaluation/providers/copilot-sdk-log-tracker.ts
6584
6380
  var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
@@ -6857,10 +6653,10 @@ var CopilotSdkProvider = class {
6857
6653
  }
6858
6654
  resolveCwd(cwdOverride) {
6859
6655
  if (cwdOverride) {
6860
- return path17.resolve(cwdOverride);
6656
+ return path16.resolve(cwdOverride);
6861
6657
  }
6862
6658
  if (this.config.cwd) {
6863
- return path17.resolve(this.config.cwd);
6659
+ return path16.resolve(this.config.cwd);
6864
6660
  }
6865
6661
  return void 0;
6866
6662
  }
@@ -6869,9 +6665,9 @@ var CopilotSdkProvider = class {
6869
6665
  return void 0;
6870
6666
  }
6871
6667
  if (this.config.logDir) {
6872
- return path17.resolve(this.config.logDir);
6668
+ return path16.resolve(this.config.logDir);
6873
6669
  }
6874
- return path17.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
6670
+ return path16.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
6875
6671
  }
6876
6672
  async createStreamLogger(request) {
6877
6673
  const logDir = this.resolveLogDirectory();
@@ -6885,7 +6681,7 @@ var CopilotSdkProvider = class {
6885
6681
  console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
6886
6682
  return void 0;
6887
6683
  }
6888
- const filePath = path17.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
6684
+ const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
6889
6685
  try {
6890
6686
  const logger = await CopilotStreamLogger.create(
6891
6687
  {
@@ -6962,8 +6758,7 @@ var MockProvider = class {
6962
6758
  return {
6963
6759
  output: [{ role: "assistant", content: this.cannedResponse }],
6964
6760
  raw: {
6965
- question: request.question,
6966
- guidelines: request.guidelines
6761
+ question: request.question
6967
6762
  }
6968
6763
  };
6969
6764
  }
@@ -7241,7 +7036,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
7241
7036
  import { createWriteStream as createWriteStream5 } from "node:fs";
7242
7037
  import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
7243
7038
  import { tmpdir } from "node:os";
7244
- import path18 from "node:path";
7039
+ import path17 from "node:path";
7245
7040
 
7246
7041
  // src/evaluation/providers/pi-log-tracker.ts
7247
7042
  var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
@@ -7322,7 +7117,7 @@ var PiCodingAgentProvider = class {
7322
7117
  const workspaceRoot = await this.createWorkspace();
7323
7118
  const logger = await this.createStreamLogger(request).catch(() => void 0);
7324
7119
  try {
7325
- const promptFile = path18.join(workspaceRoot, PROMPT_FILENAME);
7120
+ const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
7326
7121
  await writeFile(promptFile, request.question, "utf8");
7327
7122
  const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
7328
7123
  const cwd = this.resolveCwd(workspaceRoot, request.cwd);
@@ -7384,12 +7179,12 @@ var PiCodingAgentProvider = class {
7384
7179
  }
7385
7180
  resolveCwd(workspaceRoot, cwdOverride) {
7386
7181
  if (cwdOverride) {
7387
- return path18.resolve(cwdOverride);
7182
+ return path17.resolve(cwdOverride);
7388
7183
  }
7389
7184
  if (!this.config.cwd) {
7390
7185
  return workspaceRoot;
7391
7186
  }
7392
- return path18.resolve(this.config.cwd);
7187
+ return path17.resolve(this.config.cwd);
7393
7188
  }
7394
7189
  buildPiArgs(prompt, inputFiles, _captureFileChanges) {
7395
7190
  const args = [];
@@ -7478,7 +7273,7 @@ ${prompt}` : prompt;
7478
7273
  return env;
7479
7274
  }
7480
7275
  async createWorkspace() {
7481
- return await mkdtemp(path18.join(tmpdir(), WORKSPACE_PREFIX));
7276
+ return await mkdtemp(path17.join(tmpdir(), WORKSPACE_PREFIX));
7482
7277
  }
7483
7278
  async cleanupWorkspace(workspaceRoot) {
7484
7279
  try {
@@ -7488,9 +7283,9 @@ ${prompt}` : prompt;
7488
7283
  }
7489
7284
  resolveLogDirectory() {
7490
7285
  if (this.config.logDir) {
7491
- return path18.resolve(this.config.logDir);
7286
+ return path17.resolve(this.config.logDir);
7492
7287
  }
7493
- return path18.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
7288
+ return path17.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
7494
7289
  }
7495
7290
  async createStreamLogger(request) {
7496
7291
  const logDir = this.resolveLogDirectory();
@@ -7504,7 +7299,7 @@ ${prompt}` : prompt;
7504
7299
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
7505
7300
  return void 0;
7506
7301
  }
7507
- const filePath = path18.join(logDir, buildLogFilename5(request, this.targetName));
7302
+ const filePath = path17.join(logDir, buildLogFilename5(request, this.targetName));
7508
7303
  try {
7509
7304
  const logger = await PiStreamLogger.create({
7510
7305
  filePath,
@@ -8005,17 +7800,17 @@ var ProviderRegistry = class {
8005
7800
  // src/evaluation/providers/vscode-provider.ts
8006
7801
  import { exec as exec2 } from "node:child_process";
8007
7802
  import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
8008
- import path30 from "node:path";
7803
+ import path29 from "node:path";
8009
7804
  import { promisify as promisify3 } from "node:util";
8010
7805
 
8011
7806
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
8012
7807
  import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
8013
- import path28 from "node:path";
7808
+ import path27 from "node:path";
8014
7809
 
8015
7810
  // src/evaluation/providers/vscode/utils/fs.ts
8016
7811
  import { constants as constants2 } from "node:fs";
8017
7812
  import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
8018
- import path19 from "node:path";
7813
+ import path18 from "node:path";
8019
7814
  async function pathExists(target) {
8020
7815
  try {
8021
7816
  await access2(target, constants2.F_OK);
@@ -8031,7 +7826,7 @@ async function readDirEntries(target) {
8031
7826
  const entries = await readdir(target, { withFileTypes: true });
8032
7827
  return entries.map((entry) => ({
8033
7828
  name: entry.name,
8034
- absolutePath: path19.join(target, entry.name),
7829
+ absolutePath: path18.join(target, entry.name),
8035
7830
  isDirectory: entry.isDirectory()
8036
7831
  }));
8037
7832
  }
@@ -8046,9 +7841,9 @@ async function removeIfExists(target) {
8046
7841
  }
8047
7842
 
8048
7843
  // src/evaluation/providers/vscode/utils/path.ts
8049
- import path20 from "node:path";
7844
+ import path19 from "node:path";
8050
7845
  function pathToFileUri2(filePath) {
8051
- const absolutePath = path20.isAbsolute(filePath) ? filePath : path20.resolve(filePath);
7846
+ const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
8052
7847
  const normalizedPath = absolutePath.replace(/\\/g, "/");
8053
7848
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
8054
7849
  return `file:///${normalizedPath}`;
@@ -8057,7 +7852,7 @@ function pathToFileUri2(filePath) {
8057
7852
  }
8058
7853
 
8059
7854
  // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
8060
- import path21 from "node:path";
7855
+ import path20 from "node:path";
8061
7856
 
8062
7857
  // src/evaluation/providers/vscode/utils/template.ts
8063
7858
  function renderTemplate2(content, variables) {
@@ -8149,8 +7944,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
8149
7944
  });
8150
7945
  }
8151
7946
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
8152
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path21.basename(file)}`).join("\n");
8153
- const responseList = responseFiles.map((file) => `"${path21.basename(file)}"`).join(", ");
7947
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path20.basename(file)}`).join("\n");
7948
+ const responseList = responseFiles.map((file) => `"${path20.basename(file)}"`).join(", ");
8154
7949
  return renderTemplate2(templateContent, {
8155
7950
  requestFiles: requestLines,
8156
7951
  responseList
@@ -8158,8 +7953,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
8158
7953
  }
8159
7954
 
8160
7955
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
8161
- import { readFile as readFile8 } from "node:fs/promises";
8162
- import path22 from "node:path";
7956
+ import { readFile as readFile7 } from "node:fs/promises";
7957
+ import path21 from "node:path";
8163
7958
 
8164
7959
  // src/evaluation/providers/vscode/utils/time.ts
8165
7960
  function sleep2(ms) {
@@ -8197,7 +7992,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
8197
7992
  const maxAttempts = 10;
8198
7993
  while (attempts < maxAttempts) {
8199
7994
  try {
8200
- const content = await readFile8(responseFileFinal, { encoding: "utf8" });
7995
+ const content = await readFile7(responseFileFinal, { encoding: "utf8" });
8201
7996
  if (!silent) {
8202
7997
  process.stdout.write(`${content}
8203
7998
  `);
@@ -8218,7 +8013,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
8218
8013
  }
8219
8014
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
8220
8015
  if (!silent) {
8221
- const fileList = responseFilesFinal.map((file) => path22.basename(file)).join(", ");
8016
+ const fileList = responseFilesFinal.map((file) => path21.basename(file)).join(", ");
8222
8017
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
8223
8018
  }
8224
8019
  const deadline = Date.now() + timeoutMs;
@@ -8227,7 +8022,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8227
8022
  while (pending.size > 0) {
8228
8023
  if (Date.now() >= deadline) {
8229
8024
  if (!silent) {
8230
- const remaining = [...pending].map((f) => path22.basename(f)).join(", ");
8025
+ const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
8231
8026
  console.error(
8232
8027
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
8233
8028
  );
@@ -8254,7 +8049,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8254
8049
  const maxAttempts = 10;
8255
8050
  while (attempts < maxAttempts) {
8256
8051
  try {
8257
- const content = await readFile8(file, { encoding: "utf8" });
8052
+ const content = await readFile7(file, { encoding: "utf8" });
8258
8053
  if (!silent) {
8259
8054
  process.stdout.write(`${content}
8260
8055
  `);
@@ -8278,15 +8073,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8278
8073
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
8279
8074
  import { exec, spawn as spawn4 } from "node:child_process";
8280
8075
  import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
8281
- import path25 from "node:path";
8076
+ import path24 from "node:path";
8282
8077
  import { promisify as promisify2 } from "node:util";
8283
8078
 
8284
8079
  // src/evaluation/providers/vscode/dispatch/constants.ts
8285
- import path24 from "node:path";
8080
+ import path23 from "node:path";
8286
8081
 
8287
8082
  // src/paths.ts
8288
8083
  import os2 from "node:os";
8289
- import path23 from "node:path";
8084
+ import path22 from "node:path";
8290
8085
  var logged = false;
8291
8086
  function getAgentvHome() {
8292
8087
  const envHome = process.env.AGENTV_HOME;
@@ -8297,19 +8092,19 @@ function getAgentvHome() {
8297
8092
  }
8298
8093
  return envHome;
8299
8094
  }
8300
- return path23.join(os2.homedir(), ".agentv");
8095
+ return path22.join(os2.homedir(), ".agentv");
8301
8096
  }
8302
8097
  function getWorkspacesRoot() {
8303
- return path23.join(getAgentvHome(), "workspaces");
8098
+ return path22.join(getAgentvHome(), "workspaces");
8304
8099
  }
8305
8100
  function getSubagentsRoot() {
8306
- return path23.join(getAgentvHome(), "subagents");
8101
+ return path22.join(getAgentvHome(), "subagents");
8307
8102
  }
8308
8103
  function getTraceStateRoot() {
8309
- return path23.join(getAgentvHome(), "trace-state");
8104
+ return path22.join(getAgentvHome(), "trace-state");
8310
8105
  }
8311
8106
  function getWorkspacePoolRoot() {
8312
- return path23.join(getAgentvHome(), "workspace-pool");
8107
+ return path22.join(getAgentvHome(), "workspace-pool");
8313
8108
  }
8314
8109
 
8315
8110
  // src/evaluation/providers/vscode/dispatch/constants.ts
@@ -8317,7 +8112,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
8317
8112
  var DEFAULT_ALIVE_FILENAME = ".alive";
8318
8113
  function getDefaultSubagentRoot(vscodeCmd = "code") {
8319
8114
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
8320
- return path24.join(getSubagentsRoot(), folder);
8115
+ return path23.join(getSubagentsRoot(), folder);
8321
8116
  }
8322
8117
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
8323
8118
 
@@ -8384,11 +8179,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8384
8179
  await raceSpawnError(child);
8385
8180
  return true;
8386
8181
  }
8387
- const aliveFile = path25.join(subagentDir, DEFAULT_ALIVE_FILENAME);
8182
+ const aliveFile = path24.join(subagentDir, DEFAULT_ALIVE_FILENAME);
8388
8183
  await removeIfExists(aliveFile);
8389
- const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
8184
+ const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
8390
8185
  await mkdir8(githubAgentsDir, { recursive: true });
8391
- const wakeupDst = path25.join(githubAgentsDir, "wakeup.md");
8186
+ const wakeupDst = path24.join(githubAgentsDir, "wakeup.md");
8392
8187
  await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
8393
8188
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
8394
8189
  label: "open-workspace"
@@ -8401,7 +8196,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8401
8196
  "chat",
8402
8197
  "-m",
8403
8198
  wakeupChatId,
8404
- `create a file named .alive in the ${path25.basename(subagentDir)} folder`
8199
+ `create a file named .alive in the ${path24.basename(subagentDir)} folder`
8405
8200
  ];
8406
8201
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
8407
8202
  await raceSpawnError(wakeupChild);
@@ -8416,10 +8211,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8416
8211
  return true;
8417
8212
  }
8418
8213
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
8419
- const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
8420
- const messagesDir = path25.join(subagentDir, "messages");
8214
+ const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
8215
+ const messagesDir = path24.join(subagentDir, "messages");
8421
8216
  await mkdir8(messagesDir, { recursive: true });
8422
- const reqFile = path25.join(messagesDir, `${timestamp}_req.md`);
8217
+ const reqFile = path24.join(messagesDir, `${timestamp}_req.md`);
8423
8218
  await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
8424
8219
  const reqUri = pathToFileUri2(reqFile);
8425
8220
  const chatArgs = ["-r", "chat", "-m", chatId];
@@ -8427,16 +8222,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
8427
8222
  chatArgs.push("-a", attachment);
8428
8223
  }
8429
8224
  chatArgs.push("-a", reqFile);
8430
- chatArgs.push(`Follow instructions in [${path25.basename(reqFile)}](${reqUri})`);
8225
+ chatArgs.push(`Follow instructions in [${path24.basename(reqFile)}](${reqUri})`);
8431
8226
  const workspaceReady = await ensureWorkspaceFocused(
8432
8227
  workspacePath,
8433
- path25.basename(subagentDir),
8228
+ path24.basename(subagentDir),
8434
8229
  subagentDir,
8435
8230
  vscodeCmd
8436
8231
  );
8437
8232
  if (!workspaceReady) {
8438
8233
  throw new Error(
8439
- `VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8234
+ `VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8440
8235
  );
8441
8236
  }
8442
8237
  await sleep2(500);
@@ -8444,8 +8239,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
8444
8239
  await raceSpawnError(child);
8445
8240
  }
8446
8241
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
8447
- const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
8448
- const messagesDir = path25.join(subagentDir, "messages");
8242
+ const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
8243
+ const messagesDir = path24.join(subagentDir, "messages");
8449
8244
  await mkdir8(messagesDir, { recursive: true });
8450
8245
  const chatArgs = ["-r", "chat", "-m", chatId];
8451
8246
  for (const attachment of attachmentPaths) {
@@ -8454,13 +8249,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
8454
8249
  chatArgs.push(chatInstruction);
8455
8250
  const workspaceReady = await ensureWorkspaceFocused(
8456
8251
  workspacePath,
8457
- path25.basename(subagentDir),
8252
+ path24.basename(subagentDir),
8458
8253
  subagentDir,
8459
8254
  vscodeCmd
8460
8255
  );
8461
8256
  if (!workspaceReady) {
8462
8257
  throw new Error(
8463
- `VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8258
+ `VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8464
8259
  );
8465
8260
  }
8466
8261
  await sleep2(500);
@@ -8469,11 +8264,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
8469
8264
  }
8470
8265
 
8471
8266
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
8472
- import { copyFile, mkdir as mkdir9, readFile as readFile9, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
8473
- import path27 from "node:path";
8267
+ import { copyFile, mkdir as mkdir9, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
8268
+ import path26 from "node:path";
8474
8269
 
8475
8270
  // src/evaluation/providers/vscode/utils/workspace.ts
8476
- import path26 from "node:path";
8271
+ import path25 from "node:path";
8477
8272
  import JSON5 from "json5";
8478
8273
  function transformWorkspacePaths(workspaceContent, templateDir) {
8479
8274
  let workspace;
@@ -8490,10 +8285,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
8490
8285
  }
8491
8286
  const transformedFolders = workspace.folders.map((folder) => {
8492
8287
  const folderPath = folder.path;
8493
- if (path26.isAbsolute(folderPath)) {
8288
+ if (path25.isAbsolute(folderPath)) {
8494
8289
  return folder;
8495
8290
  }
8496
- const absolutePath = path26.resolve(templateDir, folderPath);
8291
+ const absolutePath = path25.resolve(templateDir, folderPath);
8497
8292
  return {
8498
8293
  ...folder,
8499
8294
  path: absolutePath
@@ -8515,19 +8310,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
8515
8310
  if (locationMap && typeof locationMap === "object") {
8516
8311
  const transformedMap = {};
8517
8312
  for (const [locationPath, value] of Object.entries(locationMap)) {
8518
- const isAbsolute = path26.isAbsolute(locationPath);
8313
+ const isAbsolute = path25.isAbsolute(locationPath);
8519
8314
  if (isAbsolute) {
8520
8315
  transformedMap[locationPath] = value;
8521
8316
  } else {
8522
8317
  const firstGlobIndex = locationPath.search(/[*]/);
8523
8318
  if (firstGlobIndex === -1) {
8524
- const resolvedPath = path26.resolve(templateDir, locationPath).replace(/\\/g, "/");
8319
+ const resolvedPath = path25.resolve(templateDir, locationPath).replace(/\\/g, "/");
8525
8320
  transformedMap[resolvedPath] = value;
8526
8321
  } else {
8527
8322
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
8528
8323
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
8529
8324
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
8530
- const resolvedPath = (path26.resolve(templateDir, basePath) + patternPath).replace(
8325
+ const resolvedPath = (path25.resolve(templateDir, basePath) + patternPath).replace(
8531
8326
  /\\/g,
8532
8327
  "/"
8533
8328
  );
@@ -8568,7 +8363,7 @@ async function findUnlockedSubagent(subagentRoot) {
8568
8363
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
8569
8364
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
8570
8365
  for (const subagent of subagents) {
8571
- const lockFile = path27.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
8366
+ const lockFile = path26.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
8572
8367
  if (!await pathExists(lockFile)) {
8573
8368
  return subagent.absolutePath;
8574
8369
  }
@@ -8578,7 +8373,7 @@ async function findUnlockedSubagent(subagentRoot) {
8578
8373
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8579
8374
  let workspaceContent;
8580
8375
  if (workspaceTemplate) {
8581
- const workspaceSrc = path27.resolve(workspaceTemplate);
8376
+ const workspaceSrc = path26.resolve(workspaceTemplate);
8582
8377
  if (!await pathExists(workspaceSrc)) {
8583
8378
  throw new Error(`workspace template not found: ${workspaceSrc}`);
8584
8379
  }
@@ -8586,18 +8381,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8586
8381
  if (!stats.isFile()) {
8587
8382
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
8588
8383
  }
8589
- const templateText = await readFile9(workspaceSrc, "utf8");
8384
+ const templateText = await readFile8(workspaceSrc, "utf8");
8590
8385
  workspaceContent = JSON.parse(templateText);
8591
8386
  } else {
8592
8387
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
8593
8388
  }
8594
- const workspaceName = `${path27.basename(subagentDir)}.code-workspace`;
8595
- const workspaceDst = path27.join(subagentDir, workspaceName);
8596
- const templateDir = workspaceTemplate ? path27.dirname(path27.resolve(workspaceTemplate)) : subagentDir;
8389
+ const workspaceName = `${path26.basename(subagentDir)}.code-workspace`;
8390
+ const workspaceDst = path26.join(subagentDir, workspaceName);
8391
+ const templateDir = workspaceTemplate ? path26.dirname(path26.resolve(workspaceTemplate)) : subagentDir;
8597
8392
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
8598
8393
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
8599
8394
  if (cwd) {
8600
- const absCwd = path27.resolve(cwd);
8395
+ const absCwd = path26.resolve(cwd);
8601
8396
  const parsed = JSON.parse(transformedContent);
8602
8397
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
8603
8398
  if (!alreadyPresent) {
@@ -8606,35 +8401,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8606
8401
  }
8607
8402
  }
8608
8403
  await writeFile3(workspaceDst, transformedContent, "utf8");
8609
- const messagesDir = path27.join(subagentDir, "messages");
8404
+ const messagesDir = path26.join(subagentDir, "messages");
8610
8405
  await mkdir9(messagesDir, { recursive: true });
8611
8406
  return { workspace: workspaceDst, messagesDir };
8612
8407
  }
8613
8408
  async function createSubagentLock(subagentDir) {
8614
- const messagesDir = path27.join(subagentDir, "messages");
8409
+ const messagesDir = path26.join(subagentDir, "messages");
8615
8410
  if (await pathExists(messagesDir)) {
8616
8411
  const files = await readdir2(messagesDir);
8617
8412
  await Promise.all(
8618
8413
  files.map(async (file) => {
8619
- const target = path27.join(messagesDir, file);
8414
+ const target = path26.join(messagesDir, file);
8620
8415
  await removeIfExists(target);
8621
8416
  })
8622
8417
  );
8623
8418
  }
8624
- const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
8419
+ const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
8625
8420
  if (await pathExists(githubAgentsDir)) {
8626
8421
  const agentFiles = await readdir2(githubAgentsDir);
8627
8422
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
8628
8423
  await Promise.all(
8629
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path27.join(githubAgentsDir, file)))
8424
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path26.join(githubAgentsDir, file)))
8630
8425
  );
8631
8426
  }
8632
- const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
8427
+ const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
8633
8428
  await writeFile3(lockFile, "", { encoding: "utf8" });
8634
8429
  return lockFile;
8635
8430
  }
8636
8431
  async function removeSubagentLock(subagentDir) {
8637
- const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
8432
+ const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
8638
8433
  await removeIfExists(lockFile);
8639
8434
  }
8640
8435
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -8654,9 +8449,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
8654
8449
  return 1;
8655
8450
  }
8656
8451
  if (promptFile) {
8657
- const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
8452
+ const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
8658
8453
  await mkdir9(githubAgentsDir, { recursive: true });
8659
- const agentFile = path27.join(githubAgentsDir, `${chatId}.md`);
8454
+ const agentFile = path26.join(githubAgentsDir, `${chatId}.md`);
8660
8455
  try {
8661
8456
  await copyFile(promptFile, agentFile);
8662
8457
  } catch (error) {
@@ -8675,7 +8470,7 @@ async function resolvePromptFile(promptFile) {
8675
8470
  if (!promptFile) {
8676
8471
  return void 0;
8677
8472
  }
8678
- const resolvedPrompt = path28.resolve(promptFile);
8473
+ const resolvedPrompt = path27.resolve(promptFile);
8679
8474
  if (!await pathExists(resolvedPrompt)) {
8680
8475
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
8681
8476
  }
@@ -8691,7 +8486,7 @@ async function resolveAttachments(extraAttachments) {
8691
8486
  }
8692
8487
  const resolved = [];
8693
8488
  for (const attachment of extraAttachments) {
8694
- const resolvedPath = path28.resolve(attachment);
8489
+ const resolvedPath = path27.resolve(attachment);
8695
8490
  if (!await pathExists(resolvedPath)) {
8696
8491
  throw new Error(`Attachment not found: ${resolvedPath}`);
8697
8492
  }
@@ -8733,7 +8528,7 @@ async function dispatchAgentSession(options) {
8733
8528
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
8734
8529
  };
8735
8530
  }
8736
- const subagentName = path28.basename(subagentDir);
8531
+ const subagentName = path27.basename(subagentDir);
8737
8532
  const chatId = Math.random().toString(16).slice(2, 10);
8738
8533
  const preparationResult = await prepareSubagentDirectory(
8739
8534
  subagentDir,
@@ -8761,9 +8556,9 @@ async function dispatchAgentSession(options) {
8761
8556
  };
8762
8557
  }
8763
8558
  const timestamp = generateTimestamp();
8764
- const messagesDir = path28.join(subagentDir, "messages");
8765
- const responseFileTmp = path28.join(messagesDir, `${timestamp}_res.tmp.md`);
8766
- const responseFileFinal = path28.join(messagesDir, `${timestamp}_res.md`);
8559
+ const messagesDir = path27.join(subagentDir, "messages");
8560
+ const responseFileTmp = path27.join(messagesDir, `${timestamp}_res.tmp.md`);
8561
+ const responseFileFinal = path27.join(messagesDir, `${timestamp}_res.md`);
8767
8562
  const requestInstructions = createRequestPrompt(
8768
8563
  userQuery,
8769
8564
  responseFileTmp,
@@ -8868,7 +8663,7 @@ async function dispatchBatchAgent(options) {
8868
8663
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
8869
8664
  };
8870
8665
  }
8871
- subagentName = path28.basename(subagentDir);
8666
+ subagentName = path27.basename(subagentDir);
8872
8667
  const chatId = Math.random().toString(16).slice(2, 10);
8873
8668
  const preparationResult = await prepareSubagentDirectory(
8874
8669
  subagentDir,
@@ -8899,17 +8694,17 @@ async function dispatchBatchAgent(options) {
8899
8694
  };
8900
8695
  }
8901
8696
  const timestamp = generateTimestamp();
8902
- const messagesDir = path28.join(subagentDir, "messages");
8697
+ const messagesDir = path27.join(subagentDir, "messages");
8903
8698
  requestFiles = userQueries.map(
8904
- (_, index) => path28.join(messagesDir, `${timestamp}_${index}_req.md`)
8699
+ (_, index) => path27.join(messagesDir, `${timestamp}_${index}_req.md`)
8905
8700
  );
8906
8701
  const responseTmpFiles = userQueries.map(
8907
- (_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
8702
+ (_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
8908
8703
  );
8909
8704
  responseFilesFinal = userQueries.map(
8910
- (_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.md`)
8705
+ (_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.md`)
8911
8706
  );
8912
- const orchestratorFile = path28.join(messagesDir, `${timestamp}_orchestrator.md`);
8707
+ const orchestratorFile = path27.join(messagesDir, `${timestamp}_orchestrator.md`);
8913
8708
  if (!dryRun) {
8914
8709
  await Promise.all(
8915
8710
  userQueries.map((query, index) => {
@@ -8995,7 +8790,7 @@ async function dispatchBatchAgent(options) {
8995
8790
 
8996
8791
  // src/evaluation/providers/vscode/dispatch/provision.ts
8997
8792
  import { writeFile as writeFile5 } from "node:fs/promises";
8998
- import path29 from "node:path";
8793
+ import path28 from "node:path";
8999
8794
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
9000
8795
  folders: [
9001
8796
  {
@@ -9026,7 +8821,7 @@ async function provisionSubagents(options) {
9026
8821
  if (!Number.isInteger(subagents) || subagents < 1) {
9027
8822
  throw new Error("subagents must be a positive integer");
9028
8823
  }
9029
- const targetPath = path29.resolve(targetRoot);
8824
+ const targetPath = path28.resolve(targetRoot);
9030
8825
  if (!dryRun) {
9031
8826
  await ensureDir(targetPath);
9032
8827
  }
@@ -9046,7 +8841,7 @@ async function provisionSubagents(options) {
9046
8841
  continue;
9047
8842
  }
9048
8843
  highestNumber = Math.max(highestNumber, parsed);
9049
- const lockFile = path29.join(entry.absolutePath, lockName);
8844
+ const lockFile = path28.join(entry.absolutePath, lockName);
9050
8845
  const locked = await pathExists(lockFile);
9051
8846
  if (locked) {
9052
8847
  lockedSubagents.add(entry.absolutePath);
@@ -9063,10 +8858,10 @@ async function provisionSubagents(options) {
9063
8858
  break;
9064
8859
  }
9065
8860
  const subagentDir = subagent.absolutePath;
9066
- const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
9067
- const lockFile = path29.join(subagentDir, lockName);
9068
- const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
9069
- const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
8861
+ const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8862
+ const lockFile = path28.join(subagentDir, lockName);
8863
+ const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
8864
+ const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
9070
8865
  const isLocked = await pathExists(lockFile);
9071
8866
  if (isLocked && !force) {
9072
8867
  continue;
@@ -9104,10 +8899,10 @@ async function provisionSubagents(options) {
9104
8899
  let nextIndex = highestNumber;
9105
8900
  while (subagentsProvisioned < subagents) {
9106
8901
  nextIndex += 1;
9107
- const subagentDir = path29.join(targetPath, `subagent-${nextIndex}`);
9108
- const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
9109
- const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
9110
- const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
8902
+ const subagentDir = path28.join(targetPath, `subagent-${nextIndex}`);
8903
+ const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8904
+ const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
8905
+ const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
9111
8906
  if (!dryRun) {
9112
8907
  await ensureDir(subagentDir);
9113
8908
  await ensureDir(githubAgentsDir);
@@ -9174,7 +8969,7 @@ var VSCodeProvider = class {
9174
8969
  }
9175
8970
  await this.ensureEnvironmentReady();
9176
8971
  const inputFiles = normalizeAttachments(request.inputFiles);
9177
- const promptContent = buildPromptDocument2(request, inputFiles, request.guideline_patterns);
8972
+ const promptContent = buildPromptDocument2(request, inputFiles);
9178
8973
  const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
9179
8974
  const startTime = Date.now();
9180
8975
  const session = await dispatchAgentSession({
@@ -9228,7 +9023,7 @@ var VSCodeProvider = class {
9228
9023
  normalizedRequests.map(({ inputFiles }) => inputFiles)
9229
9024
  );
9230
9025
  const userQueries = normalizedRequests.map(
9231
- ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles, request.guideline_patterns)
9026
+ ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
9232
9027
  );
9233
9028
  const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
9234
9029
  this.config.workspaceTemplate
@@ -9297,7 +9092,7 @@ var VSCodeProvider = class {
9297
9092
  async function locateVSCodeExecutable(candidate) {
9298
9093
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
9299
9094
  if (includesPathSeparator) {
9300
- const resolved = path30.isAbsolute(candidate) ? candidate : path30.resolve(candidate);
9095
+ const resolved = path29.isAbsolute(candidate) ? candidate : path29.resolve(candidate);
9301
9096
  try {
9302
9097
  await access3(resolved, constants3.F_OK);
9303
9098
  return resolved;
@@ -9326,41 +9121,35 @@ async function resolveWorkspaceTemplateFile(template) {
9326
9121
  return void 0;
9327
9122
  }
9328
9123
  try {
9329
- const stats = await stat4(path30.resolve(template));
9124
+ const stats = await stat4(path29.resolve(template));
9330
9125
  return stats.isFile() ? template : void 0;
9331
9126
  } catch {
9332
9127
  return template;
9333
9128
  }
9334
9129
  }
9335
- function buildPromptDocument2(request, attachments, guidelinePatterns) {
9130
+ function buildPromptDocument2(request, attachments) {
9336
9131
  const parts = [];
9337
9132
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
9338
9133
  parts.push(request.systemPrompt.trim());
9339
9134
  }
9340
- const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
9341
9135
  const attachmentFiles = collectAttachmentFiles(attachments);
9342
- const nonGuidelineAttachments = attachmentFiles.filter((file) => !guidelineFiles.includes(file));
9343
- const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
9136
+ const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
9344
9137
  if (prereadBlock.length > 0) {
9345
9138
  parts.push("\n", prereadBlock);
9346
9139
  }
9347
9140
  parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
9348
9141
  return parts.join("\n").trim();
9349
9142
  }
9350
- function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
9351
- if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
9143
+ function buildMandatoryPrereadBlock2(attachmentFiles) {
9144
+ if (attachmentFiles.length === 0) {
9352
9145
  return "";
9353
9146
  }
9354
9147
  const buildList = (files) => files.map((absolutePath) => {
9355
- const fileName = path30.basename(absolutePath);
9148
+ const fileName = path29.basename(absolutePath);
9356
9149
  const fileUri = pathToFileUri3(absolutePath);
9357
9150
  return `* [${fileName}](${fileUri})`;
9358
9151
  });
9359
9152
  const sections = [];
9360
- if (guidelineFiles.length > 0) {
9361
- sections.push(`Read all guideline files:
9362
- ${buildList(guidelineFiles).join("\n")}.`);
9363
- }
9364
9153
  if (attachmentFiles.length > 0) {
9365
9154
  sections.push(`Read all attachment files:
9366
9155
  ${buildList(attachmentFiles).join("\n")}.`);
@@ -9371,29 +9160,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
9371
9160
  );
9372
9161
  return sections.join("\n");
9373
9162
  }
9374
- function collectGuidelineFiles2(attachments, guidelinePatterns) {
9375
- if (!attachments || attachments.length === 0) {
9376
- return [];
9377
- }
9378
- const unique = /* @__PURE__ */ new Map();
9379
- for (const attachment of attachments) {
9380
- const absolutePath = path30.resolve(attachment);
9381
- const normalized = absolutePath.split(path30.sep).join("/");
9382
- if (isGuidelineFile(normalized, guidelinePatterns)) {
9383
- if (!unique.has(absolutePath)) {
9384
- unique.set(absolutePath, absolutePath);
9385
- }
9386
- }
9387
- }
9388
- return Array.from(unique.values());
9389
- }
9390
9163
  function collectAttachmentFiles(attachments) {
9391
9164
  if (!attachments || attachments.length === 0) {
9392
9165
  return [];
9393
9166
  }
9394
9167
  const unique = /* @__PURE__ */ new Map();
9395
9168
  for (const attachment of attachments) {
9396
- const absolutePath = path30.resolve(attachment);
9169
+ const absolutePath = path29.resolve(attachment);
9397
9170
  if (!unique.has(absolutePath)) {
9398
9171
  unique.set(absolutePath, absolutePath);
9399
9172
  }
@@ -9401,7 +9174,7 @@ function collectAttachmentFiles(attachments) {
9401
9174
  return Array.from(unique.values());
9402
9175
  }
9403
9176
  function pathToFileUri3(filePath) {
9404
- const absolutePath = path30.isAbsolute(filePath) ? filePath : path30.resolve(filePath);
9177
+ const absolutePath = path29.isAbsolute(filePath) ? filePath : path29.resolve(filePath);
9405
9178
  const normalizedPath = absolutePath.replace(/\\/g, "/");
9406
9179
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
9407
9180
  return `file:///${normalizedPath}`;
@@ -9414,7 +9187,7 @@ function normalizeAttachments(attachments) {
9414
9187
  }
9415
9188
  const deduped = /* @__PURE__ */ new Set();
9416
9189
  for (const attachment of attachments) {
9417
- deduped.add(path30.resolve(attachment));
9190
+ deduped.add(path29.resolve(attachment));
9418
9191
  }
9419
9192
  return Array.from(deduped);
9420
9193
  }
@@ -9423,7 +9196,7 @@ function mergeAttachments(all) {
9423
9196
  for (const list of all) {
9424
9197
  if (!list) continue;
9425
9198
  for (const inputFile of list) {
9426
- deduped.add(path30.resolve(inputFile));
9199
+ deduped.add(path29.resolve(inputFile));
9427
9200
  }
9428
9201
  }
9429
9202
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -9471,8 +9244,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
9471
9244
 
9472
9245
  // src/evaluation/providers/targets-file.ts
9473
9246
  import { constants as constants4 } from "node:fs";
9474
- import { access as access4, readFile as readFile10 } from "node:fs/promises";
9475
- import path31 from "node:path";
9247
+ import { access as access4, readFile as readFile9 } from "node:fs/promises";
9248
+ import path30 from "node:path";
9476
9249
  import { parse as parse4 } from "yaml";
9477
9250
  function isRecord(value) {
9478
9251
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -9509,11 +9282,11 @@ async function fileExists3(filePath) {
9509
9282
  }
9510
9283
  }
9511
9284
  async function readTargetDefinitions(filePath) {
9512
- const absolutePath = path31.resolve(filePath);
9285
+ const absolutePath = path30.resolve(filePath);
9513
9286
  if (!await fileExists3(absolutePath)) {
9514
9287
  throw new Error(`targets.yaml not found at ${absolutePath}`);
9515
9288
  }
9516
- const raw = await readFile10(absolutePath, "utf8");
9289
+ const raw = await readFile9(absolutePath, "utf8");
9517
9290
  const parsed = parse4(raw);
9518
9291
  if (!isRecord(parsed)) {
9519
9292
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -9529,16 +9302,16 @@ function listTargetNames(definitions) {
9529
9302
  }
9530
9303
 
9531
9304
  // src/evaluation/providers/provider-discovery.ts
9532
- import path32 from "node:path";
9305
+ import path31 from "node:path";
9533
9306
  import fg from "fast-glob";
9534
9307
  async function discoverProviders(registry, baseDir) {
9535
9308
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
9536
9309
  const candidateDirs = [];
9537
- let dir = path32.resolve(baseDir);
9538
- const root = path32.parse(dir).root;
9310
+ let dir = path31.resolve(baseDir);
9311
+ const root = path31.parse(dir).root;
9539
9312
  while (dir !== root) {
9540
- candidateDirs.push(path32.join(dir, ".agentv", "providers"));
9541
- dir = path32.dirname(dir);
9313
+ candidateDirs.push(path31.join(dir, ".agentv", "providers"));
9314
+ dir = path31.dirname(dir);
9542
9315
  }
9543
9316
  let files = [];
9544
9317
  for (const providersDir of candidateDirs) {
@@ -9554,7 +9327,7 @@ async function discoverProviders(registry, baseDir) {
9554
9327
  }
9555
9328
  const discoveredKinds = [];
9556
9329
  for (const filePath of files) {
9557
- const basename = path32.basename(filePath);
9330
+ const basename = path31.basename(filePath);
9558
9331
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
9559
9332
  if (registry.has(kindName)) {
9560
9333
  continue;
@@ -9763,15 +9536,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
9763
9536
  });
9764
9537
  }
9765
9538
  async function execShellWithStdin(command, stdinPayload, options = {}) {
9766
- const { mkdir: mkdir15, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
9539
+ const { mkdir: mkdir15, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
9767
9540
  const { tmpdir: tmpdir3 } = await import("node:os");
9768
- const path45 = await import("node:path");
9541
+ const path44 = await import("node:path");
9769
9542
  const { randomUUID: randomUUID9 } = await import("node:crypto");
9770
- const dir = path45.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
9543
+ const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
9771
9544
  await mkdir15(dir, { recursive: true });
9772
- const stdinPath = path45.join(dir, "stdin.txt");
9773
- const stdoutPath = path45.join(dir, "stdout.txt");
9774
- const stderrPath = path45.join(dir, "stderr.txt");
9545
+ const stdinPath = path44.join(dir, "stdin.txt");
9546
+ const stdoutPath = path44.join(dir, "stdout.txt");
9547
+ const stderrPath = path44.join(dir, "stderr.txt");
9775
9548
  await writeFile9(stdinPath, stdinPayload, "utf8");
9776
9549
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
9777
9550
  const { spawn: spawn5 } = await import("node:child_process");
@@ -9801,8 +9574,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
9801
9574
  resolve(code ?? 0);
9802
9575
  });
9803
9576
  });
9804
- const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
9805
- const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
9577
+ const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
9578
+ const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
9806
9579
  return { stdout, stderr, exitCode };
9807
9580
  } finally {
9808
9581
  await rm6(dir, { recursive: true, force: true });
@@ -10119,10 +9892,7 @@ var CodeEvaluator = class {
10119
9892
  outputText: context.candidate,
10120
9893
  output: outputForPayload,
10121
9894
  outputPath,
10122
- guidelineFiles: context.evalCase.guideline_paths,
10123
- inputFiles: context.evalCase.file_paths.filter(
10124
- (path45) => !context.evalCase.guideline_paths.includes(path45)
10125
- ),
9895
+ inputFiles: context.evalCase.file_paths,
10126
9896
  input: context.evalCase.input,
10127
9897
  trace: context.trace ?? null,
10128
9898
  tokenUsage: context.tokenUsage ?? null,
@@ -10253,7 +10023,7 @@ import { generateText as generateText3 } from "ai";
10253
10023
 
10254
10024
  // src/evaluation/evaluators/llm-grader.ts
10255
10025
  import fs2 from "node:fs/promises";
10256
- import path33 from "node:path";
10026
+ import path32 from "node:path";
10257
10027
  import { generateText as generateText2, stepCountIs, tool } from "ai";
10258
10028
  import { z as z3 } from "zod";
10259
10029
  var DEFAULT_MAX_STEPS = 10;
@@ -11085,8 +10855,8 @@ function calculateScoreRangeResult(result, rubrics) {
11085
10855
  };
11086
10856
  }
11087
10857
  function resolveSandboxed(basePath, relativePath) {
11088
- const resolved = path33.resolve(basePath, relativePath);
11089
- if (!resolved.startsWith(basePath + path33.sep) && resolved !== basePath) {
10858
+ const resolved = path32.resolve(basePath, relativePath);
10859
+ if (!resolved.startsWith(basePath + path32.sep) && resolved !== basePath) {
11090
10860
  throw new Error(`Path '${relativePath}' is outside the workspace`);
11091
10861
  }
11092
10862
  return resolved;
@@ -11176,11 +10946,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
11176
10946
  for (const entry of entries) {
11177
10947
  if (matches.length >= MAX_SEARCH_MATCHES) return;
11178
10948
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
11179
- const fullPath = path33.join(dirPath, entry.name);
10949
+ const fullPath = path32.join(dirPath, entry.name);
11180
10950
  if (entry.isDirectory()) {
11181
10951
  await searchDirectory(fullPath, workspacePath, regex, matches);
11182
10952
  } else if (entry.isFile()) {
11183
- const ext = path33.extname(entry.name).toLowerCase();
10953
+ const ext = path32.extname(entry.name).toLowerCase();
11184
10954
  if (BINARY_EXTENSIONS.has(ext)) continue;
11185
10955
  try {
11186
10956
  const stat8 = await fs2.stat(fullPath);
@@ -11192,7 +10962,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
11192
10962
  regex.lastIndex = 0;
11193
10963
  if (regex.test(lines[i])) {
11194
10964
  matches.push({
11195
- file: path33.relative(workspacePath, fullPath),
10965
+ file: path32.relative(workspacePath, fullPath),
11196
10966
  line: i + 1,
11197
10967
  text: lines[i].substring(0, 200)
11198
10968
  });
@@ -11827,115 +11597,115 @@ var FieldAccuracyEvaluator = class {
11827
11597
  * Evaluate a single field against the expected value.
11828
11598
  */
11829
11599
  evaluateField(fieldConfig, candidateData, expectedData) {
11830
- const { path: path45, match, required = true, weight = 1 } = fieldConfig;
11831
- const candidateValue = resolvePath(candidateData, path45);
11832
- const expectedValue = resolvePath(expectedData, path45);
11600
+ const { path: path44, match, required = true, weight = 1 } = fieldConfig;
11601
+ const candidateValue = resolvePath(candidateData, path44);
11602
+ const expectedValue = resolvePath(expectedData, path44);
11833
11603
  if (expectedValue === void 0) {
11834
11604
  return {
11835
- path: path45,
11605
+ path: path44,
11836
11606
  score: 1,
11837
11607
  // No expected value means no comparison needed
11838
11608
  weight,
11839
11609
  hit: true,
11840
- message: `${path45}: no expected value`
11610
+ message: `${path44}: no expected value`
11841
11611
  };
11842
11612
  }
11843
11613
  if (candidateValue === void 0) {
11844
11614
  if (required) {
11845
11615
  return {
11846
- path: path45,
11616
+ path: path44,
11847
11617
  score: 0,
11848
11618
  weight,
11849
11619
  hit: false,
11850
- message: `${path45} (required, missing)`
11620
+ message: `${path44} (required, missing)`
11851
11621
  };
11852
11622
  }
11853
11623
  return {
11854
- path: path45,
11624
+ path: path44,
11855
11625
  score: 1,
11856
11626
  // Don't penalize missing optional fields
11857
11627
  weight: 0,
11858
11628
  // Zero weight means it won't affect the score
11859
11629
  hit: true,
11860
- message: `${path45}: optional field missing`
11630
+ message: `${path44}: optional field missing`
11861
11631
  };
11862
11632
  }
11863
11633
  switch (match) {
11864
11634
  case "exact":
11865
- return this.compareExact(path45, candidateValue, expectedValue, weight);
11635
+ return this.compareExact(path44, candidateValue, expectedValue, weight);
11866
11636
  case "numeric_tolerance":
11867
11637
  return this.compareNumericTolerance(
11868
- path45,
11638
+ path44,
11869
11639
  candidateValue,
11870
11640
  expectedValue,
11871
11641
  fieldConfig,
11872
11642
  weight
11873
11643
  );
11874
11644
  case "date":
11875
- return this.compareDate(path45, candidateValue, expectedValue, fieldConfig, weight);
11645
+ return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
11876
11646
  default:
11877
11647
  return {
11878
- path: path45,
11648
+ path: path44,
11879
11649
  score: 0,
11880
11650
  weight,
11881
11651
  hit: false,
11882
- message: `${path45}: unknown match type "${match}"`
11652
+ message: `${path44}: unknown match type "${match}"`
11883
11653
  };
11884
11654
  }
11885
11655
  }
11886
11656
  /**
11887
11657
  * Exact equality comparison.
11888
11658
  */
11889
- compareExact(path45, candidateValue, expectedValue, weight) {
11659
+ compareExact(path44, candidateValue, expectedValue, weight) {
11890
11660
  if (deepEqual(candidateValue, expectedValue)) {
11891
11661
  return {
11892
- path: path45,
11662
+ path: path44,
11893
11663
  score: 1,
11894
11664
  weight,
11895
11665
  hit: true,
11896
- message: path45
11666
+ message: path44
11897
11667
  };
11898
11668
  }
11899
11669
  if (typeof candidateValue !== typeof expectedValue) {
11900
11670
  return {
11901
- path: path45,
11671
+ path: path44,
11902
11672
  score: 0,
11903
11673
  weight,
11904
11674
  hit: false,
11905
- message: `${path45} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
11675
+ message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
11906
11676
  };
11907
11677
  }
11908
11678
  return {
11909
- path: path45,
11679
+ path: path44,
11910
11680
  score: 0,
11911
11681
  weight,
11912
11682
  hit: false,
11913
- message: `${path45} (value mismatch)`
11683
+ message: `${path44} (value mismatch)`
11914
11684
  };
11915
11685
  }
11916
11686
  /**
11917
11687
  * Numeric comparison with absolute or relative tolerance.
11918
11688
  */
11919
- compareNumericTolerance(path45, candidateValue, expectedValue, fieldConfig, weight) {
11689
+ compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
11920
11690
  const { tolerance = 0, relative = false } = fieldConfig;
11921
11691
  const candidateNum = toNumber(candidateValue);
11922
11692
  const expectedNum = toNumber(expectedValue);
11923
11693
  if (candidateNum === null || expectedNum === null) {
11924
11694
  return {
11925
- path: path45,
11695
+ path: path44,
11926
11696
  score: 0,
11927
11697
  weight,
11928
11698
  hit: false,
11929
- message: `${path45} (non-numeric value)`
11699
+ message: `${path44} (non-numeric value)`
11930
11700
  };
11931
11701
  }
11932
11702
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
11933
11703
  return {
11934
- path: path45,
11704
+ path: path44,
11935
11705
  score: 0,
11936
11706
  weight,
11937
11707
  hit: false,
11938
- message: `${path45} (invalid numeric value)`
11708
+ message: `${path44} (invalid numeric value)`
11939
11709
  };
11940
11710
  }
11941
11711
  const diff = Math.abs(candidateNum - expectedNum);
@@ -11948,61 +11718,61 @@ var FieldAccuracyEvaluator = class {
11948
11718
  }
11949
11719
  if (withinTolerance) {
11950
11720
  return {
11951
- path: path45,
11721
+ path: path44,
11952
11722
  score: 1,
11953
11723
  weight,
11954
11724
  hit: true,
11955
- message: `${path45} (within tolerance: diff=${diff.toFixed(2)})`
11725
+ message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
11956
11726
  };
11957
11727
  }
11958
11728
  return {
11959
- path: path45,
11729
+ path: path44,
11960
11730
  score: 0,
11961
11731
  weight,
11962
11732
  hit: false,
11963
- message: `${path45} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
11733
+ message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
11964
11734
  };
11965
11735
  }
11966
11736
  /**
11967
11737
  * Date comparison with format normalization.
11968
11738
  */
11969
- compareDate(path45, candidateValue, expectedValue, fieldConfig, weight) {
11739
+ compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
11970
11740
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
11971
11741
  const candidateDate = parseDate(String(candidateValue), formats);
11972
11742
  const expectedDate = parseDate(String(expectedValue), formats);
11973
11743
  if (candidateDate === null) {
11974
11744
  return {
11975
- path: path45,
11745
+ path: path44,
11976
11746
  score: 0,
11977
11747
  weight,
11978
11748
  hit: false,
11979
- message: `${path45} (unparseable candidate date)`
11749
+ message: `${path44} (unparseable candidate date)`
11980
11750
  };
11981
11751
  }
11982
11752
  if (expectedDate === null) {
11983
11753
  return {
11984
- path: path45,
11754
+ path: path44,
11985
11755
  score: 0,
11986
11756
  weight,
11987
11757
  hit: false,
11988
- message: `${path45} (unparseable expected date)`
11758
+ message: `${path44} (unparseable expected date)`
11989
11759
  };
11990
11760
  }
11991
11761
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
11992
11762
  return {
11993
- path: path45,
11763
+ path: path44,
11994
11764
  score: 1,
11995
11765
  weight,
11996
11766
  hit: true,
11997
- message: path45
11767
+ message: path44
11998
11768
  };
11999
11769
  }
12000
11770
  return {
12001
- path: path45,
11771
+ path: path44,
12002
11772
  score: 0,
12003
11773
  weight,
12004
11774
  hit: false,
12005
- message: `${path45} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
11775
+ message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
12006
11776
  };
12007
11777
  }
12008
11778
  /**
@@ -12035,11 +11805,11 @@ var FieldAccuracyEvaluator = class {
12035
11805
  };
12036
11806
  }
12037
11807
  };
12038
- function resolvePath(obj, path45) {
12039
- if (!path45 || !obj) {
11808
+ function resolvePath(obj, path44) {
11809
+ if (!path44 || !obj) {
12040
11810
  return void 0;
12041
11811
  }
12042
- const parts = path45.split(/\.|\[|\]/).filter((p) => p.length > 0);
11812
+ const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
12043
11813
  let current = obj;
12044
11814
  for (const part of parts) {
12045
11815
  if (current === null || current === void 0) {
@@ -12499,8 +12269,8 @@ var TokenUsageEvaluator = class {
12499
12269
  };
12500
12270
 
12501
12271
  // src/evaluation/evaluators/tool-trajectory.ts
12502
- function getNestedValue(obj, path45) {
12503
- const parts = path45.split(".");
12272
+ function getNestedValue(obj, path44) {
12273
+ const parts = path44.split(".");
12504
12274
  let current = obj;
12505
12275
  for (const part of parts) {
12506
12276
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -13122,8 +12892,8 @@ function runEqualsAssertion(output, value) {
13122
12892
  // src/evaluation/orchestrator.ts
13123
12893
  import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
13124
12894
  import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
13125
- import path42 from "node:path";
13126
- import micromatch4 from "micromatch";
12895
+ import path41 from "node:path";
12896
+ import micromatch3 from "micromatch";
13127
12897
 
13128
12898
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
13129
12899
  var Node = class {
@@ -13336,7 +13106,7 @@ var InlineAssertEvaluator = class {
13336
13106
  };
13337
13107
 
13338
13108
  // src/evaluation/evaluators/prompt-resolution.ts
13339
- import path34 from "node:path";
13109
+ import path33 from "node:path";
13340
13110
  async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
13341
13111
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
13342
13112
  if (!context) {
@@ -13371,10 +13141,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
13371
13141
  expectedOutput: context.evalCase.expected_output,
13372
13142
  outputText: context.candidate,
13373
13143
  output: context.output ?? null,
13374
- guidelineFiles: context.evalCase.guideline_paths,
13375
- inputFiles: context.evalCase.file_paths.filter(
13376
- (p) => !context.evalCase.guideline_paths.includes(p)
13377
- ),
13144
+ inputFiles: context.evalCase.file_paths,
13378
13145
  input: context.evalCase.input,
13379
13146
  trace: context.trace ?? null,
13380
13147
  fileChanges: context.fileChanges ?? null,
@@ -13385,7 +13152,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
13385
13152
  };
13386
13153
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
13387
13154
  const scriptPath = script[script.length - 1];
13388
- const cwd = path34.dirname(scriptPath);
13155
+ const cwd = path33.dirname(scriptPath);
13389
13156
  try {
13390
13157
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
13391
13158
  const prompt = stdout.trim();
@@ -13657,16 +13424,16 @@ function createBuiltinRegistry() {
13657
13424
  }
13658
13425
 
13659
13426
  // src/evaluation/registry/assertion-discovery.ts
13660
- import path35 from "node:path";
13427
+ import path34 from "node:path";
13661
13428
  import fg2 from "fast-glob";
13662
13429
  async function discoverAssertions(registry, baseDir) {
13663
13430
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
13664
13431
  const candidateDirs = [];
13665
- let dir = path35.resolve(baseDir);
13666
- const root = path35.parse(dir).root;
13432
+ let dir = path34.resolve(baseDir);
13433
+ const root = path34.parse(dir).root;
13667
13434
  while (dir !== root) {
13668
- candidateDirs.push(path35.join(dir, ".agentv", "assertions"));
13669
- dir = path35.dirname(dir);
13435
+ candidateDirs.push(path34.join(dir, ".agentv", "assertions"));
13436
+ dir = path34.dirname(dir);
13670
13437
  }
13671
13438
  let files = [];
13672
13439
  for (const assertionsDir of candidateDirs) {
@@ -13682,7 +13449,7 @@ async function discoverAssertions(registry, baseDir) {
13682
13449
  }
13683
13450
  const discoveredTypes = [];
13684
13451
  for (const filePath of files) {
13685
- const basename = path35.basename(filePath);
13452
+ const basename = path34.basename(filePath);
13686
13453
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
13687
13454
  if (registry.has(typeName)) {
13688
13455
  continue;
@@ -13700,17 +13467,17 @@ async function discoverAssertions(registry, baseDir) {
13700
13467
  }
13701
13468
 
13702
13469
  // src/evaluation/registry/grader-discovery.ts
13703
- import path36 from "node:path";
13470
+ import path35 from "node:path";
13704
13471
  import fg3 from "fast-glob";
13705
13472
  async function discoverGraders(registry, baseDir) {
13706
13473
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
13707
13474
  const candidateDirs = [];
13708
- let dir = path36.resolve(baseDir);
13709
- const root = path36.parse(dir).root;
13475
+ let dir = path35.resolve(baseDir);
13476
+ const root = path35.parse(dir).root;
13710
13477
  while (dir !== root) {
13711
- candidateDirs.push(path36.join(dir, ".agentv", "graders"));
13712
- candidateDirs.push(path36.join(dir, ".agentv", "judges"));
13713
- dir = path36.dirname(dir);
13478
+ candidateDirs.push(path35.join(dir, ".agentv", "graders"));
13479
+ candidateDirs.push(path35.join(dir, ".agentv", "judges"));
13480
+ dir = path35.dirname(dir);
13714
13481
  }
13715
13482
  let files = [];
13716
13483
  for (const gradersDir of candidateDirs) {
@@ -13726,7 +13493,7 @@ async function discoverGraders(registry, baseDir) {
13726
13493
  }
13727
13494
  const discoveredTypes = [];
13728
13495
  for (const filePath of files) {
13729
- const basename = path36.basename(filePath);
13496
+ const basename = path35.basename(filePath);
13730
13497
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
13731
13498
  if (registry.has(typeName)) {
13732
13499
  continue;
@@ -13886,7 +13653,7 @@ function getTCritical(df) {
13886
13653
  // src/evaluation/workspace/file-changes.ts
13887
13654
  import { exec as execCallback } from "node:child_process";
13888
13655
  import { readdirSync as readdirSync2, statSync } from "node:fs";
13889
- import path37 from "node:path";
13656
+ import path36 from "node:path";
13890
13657
  import { promisify as promisify4 } from "node:util";
13891
13658
  var execAsync4 = promisify4(execCallback);
13892
13659
  function gitExecOpts(workspacePath) {
@@ -13920,10 +13687,10 @@ async function stageNestedRepoChanges(workspacePath) {
13920
13687
  }
13921
13688
  for (const entry of entries) {
13922
13689
  if (entry === ".git" || entry === "node_modules") continue;
13923
- const childPath = path37.join(workspacePath, entry);
13690
+ const childPath = path36.join(workspacePath, entry);
13924
13691
  try {
13925
13692
  if (!statSync(childPath).isDirectory()) continue;
13926
- if (!statSync(path37.join(childPath, ".git")).isDirectory()) continue;
13693
+ if (!statSync(path36.join(childPath, ".git")).isDirectory()) continue;
13927
13694
  } catch {
13928
13695
  continue;
13929
13696
  }
@@ -13934,7 +13701,7 @@ async function stageNestedRepoChanges(workspacePath) {
13934
13701
 
13935
13702
  // src/evaluation/workspace/manager.ts
13936
13703
  import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
13937
- import path38 from "node:path";
13704
+ import path37 from "node:path";
13938
13705
  var TemplateNotFoundError = class extends Error {
13939
13706
  constructor(templatePath) {
13940
13707
  super(`Workspace template not found: ${templatePath}`);
@@ -13964,14 +13731,14 @@ async function isDirectory(filePath) {
13964
13731
  }
13965
13732
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
13966
13733
  const root = workspaceRoot ?? getWorkspacesRoot();
13967
- return path38.join(root, evalRunId, caseId);
13734
+ return path37.join(root, evalRunId, caseId);
13968
13735
  }
13969
13736
  async function copyDirectoryRecursive(src, dest) {
13970
13737
  await mkdir11(dest, { recursive: true });
13971
13738
  const entries = await readdir3(src, { withFileTypes: true });
13972
13739
  for (const entry of entries) {
13973
- const srcPath = path38.join(src, entry.name);
13974
- const destPath = path38.join(dest, entry.name);
13740
+ const srcPath = path37.join(src, entry.name);
13741
+ const destPath = path37.join(dest, entry.name);
13975
13742
  if (entry.name === ".git") {
13976
13743
  continue;
13977
13744
  }
@@ -13983,7 +13750,7 @@ async function copyDirectoryRecursive(src, dest) {
13983
13750
  }
13984
13751
  }
13985
13752
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
13986
- const resolvedTemplatePath = path38.resolve(templatePath);
13753
+ const resolvedTemplatePath = path37.resolve(templatePath);
13987
13754
  if (!await fileExists(resolvedTemplatePath)) {
13988
13755
  throw new TemplateNotFoundError(resolvedTemplatePath);
13989
13756
  }
@@ -14032,7 +13799,7 @@ async function cleanupWorkspace(workspacePath) {
14032
13799
  }
14033
13800
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
14034
13801
  const root = workspaceRoot ?? getWorkspacesRoot();
14035
- const evalDir = path38.join(root, evalRunId);
13802
+ const evalDir = path37.join(root, evalRunId);
14036
13803
  if (await fileExists(evalDir)) {
14037
13804
  await rm4(evalDir, { recursive: true, force: true });
14038
13805
  }
@@ -14042,8 +13809,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
14042
13809
  import { execFile } from "node:child_process";
14043
13810
  import { createHash } from "node:crypto";
14044
13811
  import { existsSync as existsSync2 } from "node:fs";
14045
- import { cp as cp2, mkdir as mkdir12, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
14046
- import path39 from "node:path";
13812
+ import { cp as cp2, mkdir as mkdir12, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
13813
+ import path38 from "node:path";
14047
13814
  import { promisify as promisify5 } from "node:util";
14048
13815
  var execFileAsync = promisify5(execFile);
14049
13816
  function gitEnv() {
@@ -14097,8 +13864,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
14097
13864
  await mkdir12(dest, { recursive: true });
14098
13865
  const entries = await readdir4(src, { withFileTypes: true });
14099
13866
  for (const entry of entries) {
14100
- const srcPath = path39.join(src, entry.name);
14101
- const destPath = path39.join(dest, entry.name);
13867
+ const srcPath = path38.join(src, entry.name);
13868
+ const destPath = path38.join(dest, entry.name);
14102
13869
  if (entry.name === ".git") {
14103
13870
  continue;
14104
13871
  }
@@ -14131,7 +13898,7 @@ var WorkspacePoolManager = class {
14131
13898
  async acquireWorkspace(options) {
14132
13899
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
14133
13900
  const fingerprint = computeWorkspaceFingerprint(repos);
14134
- const poolDir = path39.join(this.poolRoot, fingerprint);
13901
+ const poolDir = path38.join(this.poolRoot, fingerprint);
14135
13902
  await mkdir12(poolDir, { recursive: true });
14136
13903
  const drifted = await this.checkDrift(poolDir, fingerprint);
14137
13904
  if (drifted) {
@@ -14141,7 +13908,7 @@ var WorkspacePoolManager = class {
14141
13908
  await this.removeAllSlots(poolDir);
14142
13909
  }
14143
13910
  for (let i = 0; i < maxSlots; i++) {
14144
- const slotPath = path39.join(poolDir, `slot-${i}`);
13911
+ const slotPath = path38.join(poolDir, `slot-${i}`);
14145
13912
  const lockPath = `${slotPath}.lock`;
14146
13913
  const locked = await this.tryLock(lockPath);
14147
13914
  if (!locked) {
@@ -14203,7 +13970,7 @@ var WorkspacePoolManager = class {
14203
13970
  throw err;
14204
13971
  }
14205
13972
  try {
14206
- const pidStr = await readFile11(lockPath, "utf-8");
13973
+ const pidStr = await readFile10(lockPath, "utf-8");
14207
13974
  const pid = Number.parseInt(pidStr.trim(), 10);
14208
13975
  if (!Number.isNaN(pid)) {
14209
13976
  try {
@@ -14228,9 +13995,9 @@ var WorkspacePoolManager = class {
14228
13995
  * Returns false (no drift) if metadata.json doesn't exist (first use).
14229
13996
  */
14230
13997
  async checkDrift(poolDir, fingerprint) {
14231
- const metadataPath = path39.join(poolDir, "metadata.json");
13998
+ const metadataPath = path38.join(poolDir, "metadata.json");
14232
13999
  try {
14233
- const raw = await readFile11(metadataPath, "utf-8");
14000
+ const raw = await readFile10(metadataPath, "utf-8");
14234
14001
  const metadata = JSON.parse(raw);
14235
14002
  return metadata.fingerprint !== fingerprint;
14236
14003
  } catch {
@@ -14245,17 +14012,17 @@ var WorkspacePoolManager = class {
14245
14012
  repos,
14246
14013
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
14247
14014
  };
14248
- await writeFile7(path39.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
14015
+ await writeFile7(path38.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
14249
14016
  }
14250
14017
  /** Remove all slot directories and their lock files from a pool directory. */
14251
14018
  async removeAllSlots(poolDir) {
14252
14019
  const entries = await readdir4(poolDir);
14253
14020
  for (const entry of entries) {
14254
14021
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
14255
- const lockPath = path39.join(poolDir, `${entry}.lock`);
14022
+ const lockPath = path38.join(poolDir, `${entry}.lock`);
14256
14023
  if (existsSync2(lockPath)) {
14257
14024
  try {
14258
- const pidStr = await readFile11(lockPath, "utf-8");
14025
+ const pidStr = await readFile10(lockPath, "utf-8");
14259
14026
  const pid = Number.parseInt(pidStr.trim(), 10);
14260
14027
  if (!Number.isNaN(pid)) {
14261
14028
  try {
@@ -14268,12 +14035,12 @@ var WorkspacePoolManager = class {
14268
14035
  } catch {
14269
14036
  }
14270
14037
  }
14271
- await rm5(path39.join(poolDir, entry), { recursive: true, force: true });
14038
+ await rm5(path38.join(poolDir, entry), { recursive: true, force: true });
14272
14039
  await rm5(lockPath, { force: true }).catch(() => {
14273
14040
  });
14274
14041
  }
14275
14042
  }
14276
- await rm5(path39.join(poolDir, "metadata.json"), { force: true }).catch(() => {
14043
+ await rm5(path38.join(poolDir, "metadata.json"), { force: true }).catch(() => {
14277
14044
  });
14278
14045
  }
14279
14046
  /**
@@ -14283,7 +14050,7 @@ var WorkspacePoolManager = class {
14283
14050
  */
14284
14051
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
14285
14052
  for (const repo of repos) {
14286
- const repoDir = path39.join(slotPath, repo.path);
14053
+ const repoDir = path38.join(slotPath, repo.path);
14287
14054
  if (!existsSync2(repoDir)) {
14288
14055
  continue;
14289
14056
  }
@@ -14310,7 +14077,7 @@ var WorkspacePoolManager = class {
14310
14077
  // src/evaluation/workspace/repo-manager.ts
14311
14078
  import { execFile as execFile2 } from "node:child_process";
14312
14079
  import { existsSync as existsSync3 } from "node:fs";
14313
- import path40 from "node:path";
14080
+ import path39 from "node:path";
14314
14081
  import { promisify as promisify6 } from "node:util";
14315
14082
  var execFileAsync2 = promisify6(execFile2);
14316
14083
  var DEFAULT_TIMEOUT_MS2 = 3e5;
@@ -14410,7 +14177,7 @@ ${lines.join("\n")}`;
14410
14177
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
14411
14178
  */
14412
14179
  async materialize(repo, workspacePath) {
14413
- const targetDir = path40.join(workspacePath, repo.path);
14180
+ const targetDir = path39.join(workspacePath, repo.path);
14414
14181
  const sourceUrl = getSourceUrl(repo.source);
14415
14182
  const startedAt = Date.now();
14416
14183
  if (this.verbose) {
@@ -14501,7 +14268,7 @@ ${lines.join("\n")}`;
14501
14268
  async reset(repos, workspacePath, reset) {
14502
14269
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
14503
14270
  for (const repo of repos) {
14504
- const targetDir = path40.join(workspacePath, repo.path);
14271
+ const targetDir = path39.join(workspacePath, repo.path);
14505
14272
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
14506
14273
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
14507
14274
  }
@@ -14510,16 +14277,16 @@ ${lines.join("\n")}`;
14510
14277
 
14511
14278
  // src/evaluation/workspace/resolve.ts
14512
14279
  import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
14513
- import path41 from "node:path";
14280
+ import path40 from "node:path";
14514
14281
  async function resolveWorkspaceTemplate(templatePath) {
14515
14282
  if (!templatePath) {
14516
14283
  return void 0;
14517
14284
  }
14518
- const resolved = path41.resolve(templatePath);
14285
+ const resolved = path40.resolve(templatePath);
14519
14286
  const stats = await stat6(resolved);
14520
14287
  if (stats.isFile()) {
14521
14288
  return {
14522
- dir: path41.dirname(resolved),
14289
+ dir: path40.dirname(resolved),
14523
14290
  workspaceFile: resolved
14524
14291
  };
14525
14292
  }
@@ -14531,14 +14298,14 @@ async function resolveWorkspaceTemplate(templatePath) {
14531
14298
  if (workspaceFiles.length === 1) {
14532
14299
  return {
14533
14300
  dir: resolved,
14534
- workspaceFile: path41.join(resolved, workspaceFiles[0])
14301
+ workspaceFile: path40.join(resolved, workspaceFiles[0])
14535
14302
  };
14536
14303
  }
14537
14304
  if (workspaceFiles.length > 1) {
14538
14305
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
14539
14306
  return {
14540
14307
  dir: resolved,
14541
- workspaceFile: conventionFile ? path41.join(resolved, conventionFile) : void 0
14308
+ workspaceFile: conventionFile ? path40.join(resolved, conventionFile) : void 0
14542
14309
  };
14543
14310
  }
14544
14311
  return { dir: resolved };
@@ -14742,7 +14509,7 @@ async function runEvaluation(options) {
14742
14509
  ];
14743
14510
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
14744
14511
  const typeRegistry = createBuiltinRegistry();
14745
- const discoveryBaseDir = evalFilePath ? path42.dirname(path42.resolve(evalFilePath)) : process.cwd();
14512
+ const discoveryBaseDir = evalFilePath ? path41.dirname(path41.resolve(evalFilePath)) : process.cwd();
14746
14513
  const evalDir = discoveryBaseDir;
14747
14514
  await discoverAssertions(typeRegistry, discoveryBaseDir);
14748
14515
  await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -14851,14 +14618,22 @@ async function runEvaluation(options) {
14851
14618
  const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
14852
14619
  const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
14853
14620
  const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
14854
- const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
14855
- const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
14621
+ const workers = options.maxConcurrency ?? target.workers ?? 1;
14856
14622
  setupLog(
14857
- `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
14623
+ `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} workers=${workers}`
14858
14624
  );
14859
- if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
14625
+ if (hasSharedWorkspace && !usePool && workers > 1) {
14860
14626
  console.warn(
14861
- `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
14627
+ [
14628
+ `Warning: This eval uses a shared workspace with ${workers} workers.`,
14629
+ "If the agent under test makes file edits, concurrent runs may corrupt each other.",
14630
+ "To limit concurrency, add this to your eval YAML:",
14631
+ "",
14632
+ " execution:",
14633
+ " workers: 1",
14634
+ "",
14635
+ "Or pass --workers 1 on the command line."
14636
+ ].join("\n")
14862
14637
  );
14863
14638
  }
14864
14639
  const limit = pLimit(workers);
@@ -14931,7 +14706,7 @@ async function runEvaluation(options) {
14931
14706
  }
14932
14707
  try {
14933
14708
  if (suiteWorkspaceFile && sharedWorkspacePath) {
14934
- const copiedWorkspaceFile = path42.join(sharedWorkspacePath, path42.basename(suiteWorkspaceFile));
14709
+ const copiedWorkspaceFile = path41.join(sharedWorkspacePath, path41.basename(suiteWorkspaceFile));
14935
14710
  try {
14936
14711
  await stat7(copiedWorkspaceFile);
14937
14712
  suiteWorkspaceFile = copiedWorkspaceFile;
@@ -15041,7 +14816,7 @@ async function runEvaluation(options) {
15041
14816
  const budgetResult = {
15042
14817
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
15043
14818
  testId: evalCase.id,
15044
- dataset: evalCase.dataset,
14819
+ eval_set: evalCase.eval_set,
15045
14820
  score: 0,
15046
14821
  assertions: [],
15047
14822
  output: [],
@@ -15077,7 +14852,7 @@ async function runEvaluation(options) {
15077
14852
  const haltResult = {
15078
14853
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
15079
14854
  testId: evalCase.id,
15080
- dataset: evalCase.dataset,
14855
+ eval_set: evalCase.eval_set,
15081
14856
  score: 0,
15082
14857
  assertions: [],
15083
14858
  output: [],
@@ -15309,8 +15084,6 @@ async function runBatchEvaluation(options) {
15309
15084
  const promptInputs = promptInputsList[index];
15310
15085
  return {
15311
15086
  question: promptInputs.question,
15312
- guidelines: promptInputs.guidelines,
15313
- guideline_patterns: evalCase.guideline_patterns,
15314
15087
  inputFiles: evalCase.file_paths,
15315
15088
  evalCaseId: evalCase.id,
15316
15089
  metadata: {
@@ -15508,7 +15281,7 @@ async function runEvalCase(options) {
15508
15281
  );
15509
15282
  }
15510
15283
  if (caseWorkspaceFile && workspacePath) {
15511
- const copiedFile = path42.join(workspacePath, path42.basename(caseWorkspaceFile));
15284
+ const copiedFile = path41.join(workspacePath, path41.basename(caseWorkspaceFile));
15512
15285
  try {
15513
15286
  await stat7(copiedFile);
15514
15287
  caseWorkspaceFile = copiedFile;
@@ -15568,10 +15341,10 @@ async function runEvalCase(options) {
15568
15341
  const files = evalCase.metadata.agent_skills_files;
15569
15342
  if (baseDir && files.length > 0) {
15570
15343
  for (const relPath of files) {
15571
- const srcPath = path42.resolve(baseDir, relPath);
15572
- const destPath = path42.resolve(workspacePath, relPath);
15344
+ const srcPath = path41.resolve(baseDir, relPath);
15345
+ const destPath = path41.resolve(workspacePath, relPath);
15573
15346
  try {
15574
- await mkdir13(path42.dirname(destPath), { recursive: true });
15347
+ await mkdir13(path41.dirname(destPath), { recursive: true });
15575
15348
  await copyFile2(srcPath, destPath);
15576
15349
  } catch (error) {
15577
15350
  const message = error instanceof Error ? error.message : String(error);
@@ -16018,8 +15791,7 @@ async function evaluateCandidate(options) {
16018
15791
  let lmRequest;
16019
15792
  if (isAgentProvider(provider)) {
16020
15793
  agentRequest = {
16021
- question: promptInputs.question,
16022
- guideline_paths: evalCase.guideline_paths
15794
+ question: promptInputs.question
16023
15795
  };
16024
15796
  } else {
16025
15797
  if (promptInputs.chatPrompt) {
@@ -16028,8 +15800,7 @@ async function evaluateCandidate(options) {
16028
15800
  };
16029
15801
  } else {
16030
15802
  lmRequest = {
16031
- question: promptInputs.question,
16032
- guidelines: promptInputs.guidelines
15803
+ question: promptInputs.question
16033
15804
  };
16034
15805
  }
16035
15806
  }
@@ -16043,7 +15814,7 @@ async function evaluateCandidate(options) {
16043
15814
  return {
16044
15815
  timestamp: completedAt.toISOString(),
16045
15816
  testId: evalCase.id,
16046
- dataset: evalCase.dataset,
15817
+ eval_set: evalCase.eval_set,
16047
15818
  conversationId: evalCase.conversation_id,
16048
15819
  score: score.score,
16049
15820
  assertions: score.assertions,
@@ -16191,7 +15962,7 @@ async function runEvaluatorList(options) {
16191
15962
  fileChanges,
16192
15963
  workspacePath
16193
15964
  };
16194
- const evalFileDir = evalCase.guideline_paths[0] ? path42.dirname(evalCase.guideline_paths[0]) : process.cwd();
15965
+ const evalFileDir = evalCase.file_paths[0] ? path41.dirname(evalCase.file_paths[0]) : process.cwd();
16195
15966
  const dispatchContext = {
16196
15967
  graderProvider,
16197
15968
  targetResolver,
@@ -16305,7 +16076,7 @@ function filterEvalCases(evalCases, filter) {
16305
16076
  if (!filter) {
16306
16077
  return evalCases;
16307
16078
  }
16308
- return evalCases.filter((evalCase) => micromatch4.isMatch(evalCase.id, filter));
16079
+ return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter));
16309
16080
  }
16310
16081
  function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
16311
16082
  const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
@@ -16342,8 +16113,6 @@ async function invokeProvider(provider, options) {
16342
16113
  const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
16343
16114
  return await provider.invoke({
16344
16115
  question: promptInputs.question,
16345
- guidelines: promptInputs.guidelines,
16346
- guideline_patterns: evalCase.guideline_patterns,
16347
16116
  chatPrompt: promptInputs.chatPrompt,
16348
16117
  inputFiles: evalCase.file_paths,
16349
16118
  evalCaseId: evalCase.id,
@@ -16371,21 +16140,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
16371
16140
  if (isAgentProvider(provider)) {
16372
16141
  agentRequest = {
16373
16142
  question: promptInputs.question,
16374
- guideline_paths: evalCase.guideline_paths,
16375
16143
  error: message
16376
16144
  };
16377
16145
  } else {
16378
16146
  if (promptInputs.chatPrompt) {
16379
16147
  lmRequest = {
16380
16148
  chat_prompt: promptInputs.chatPrompt,
16381
- guideline_paths: evalCase.guideline_paths,
16382
16149
  error: message
16383
16150
  };
16384
16151
  } else {
16385
16152
  lmRequest = {
16386
16153
  question: promptInputs.question,
16387
- guidelines: promptInputs.guidelines,
16388
- guideline_paths: evalCase.guideline_paths,
16389
16154
  error: message
16390
16155
  };
16391
16156
  }
@@ -16398,7 +16163,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
16398
16163
  return {
16399
16164
  timestamp: timestamp.toISOString(),
16400
16165
  testId: evalCase.id,
16401
- dataset: evalCase.dataset,
16166
+ eval_set: evalCase.eval_set,
16402
16167
  conversationId: evalCase.conversation_id,
16403
16168
  score: 0,
16404
16169
  assertions: [{ text: `Error: ${message}`, passed: false }],
@@ -16431,7 +16196,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
16431
16196
  hash.update(target.name);
16432
16197
  hash.update(evalCase.id);
16433
16198
  hash.update(promptInputs.question);
16434
- hash.update(promptInputs.guidelines);
16435
16199
  hash.update(promptInputs.systemMessage ?? "");
16436
16200
  if (promptInputs.chatPrompt) {
16437
16201
  hash.update(JSON.stringify(promptInputs.chatPrompt));
@@ -16531,7 +16295,7 @@ function computeWeightedMean(entries) {
16531
16295
 
16532
16296
  // src/evaluation/evaluate.ts
16533
16297
  import { existsSync as existsSync4 } from "node:fs";
16534
- import path43 from "node:path";
16298
+ import path42 from "node:path";
16535
16299
 
16536
16300
  // src/evaluation/providers/function-provider.ts
16537
16301
  function createFunctionProvider(taskFn) {
@@ -16568,7 +16332,7 @@ async function evaluate(config) {
16568
16332
  }
16569
16333
  const gitRoot = await findGitRoot(process.cwd());
16570
16334
  const repoRoot = gitRoot ?? process.cwd();
16571
- const testFilePath = config.specFile ? path43.resolve(config.specFile) : path43.join(process.cwd(), "__programmatic__.yaml");
16335
+ const testFilePath = config.specFile ? path42.resolve(config.specFile) : path42.join(process.cwd(), "__programmatic__.yaml");
16572
16336
  await loadEnvHierarchy(repoRoot, testFilePath);
16573
16337
  let resolvedTarget;
16574
16338
  let taskProvider;
@@ -16634,8 +16398,6 @@ async function evaluate(config) {
16634
16398
  input_segments: inputSegments,
16635
16399
  expected_output: expectedOutput,
16636
16400
  reference_answer: expectedOutputValue,
16637
- guideline_paths: [],
16638
- guideline_patterns: [],
16639
16401
  file_paths: [],
16640
16402
  assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
16641
16403
  metadata: test.metadata
@@ -16697,10 +16459,10 @@ function computeSummary(results, durationMs) {
16697
16459
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
16698
16460
  async function discoverDefaultTarget(repoRoot) {
16699
16461
  const cwd = process.cwd();
16700
- const chain = buildDirectoryChain(path43.join(cwd, "_placeholder"), repoRoot);
16462
+ const chain = buildDirectoryChain(path42.join(cwd, "_placeholder"), repoRoot);
16701
16463
  for (const dir of chain) {
16702
16464
  for (const candidate of TARGET_FILE_CANDIDATES) {
16703
- const targetsPath = path43.join(dir, candidate);
16465
+ const targetsPath = path42.join(dir, candidate);
16704
16466
  if (!existsSync4(targetsPath)) continue;
16705
16467
  try {
16706
16468
  const definitions = await readTargetDefinitions(targetsPath);
@@ -16717,7 +16479,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
16717
16479
  const chain = buildDirectoryChain(startPath, repoRoot);
16718
16480
  const envFiles = [];
16719
16481
  for (const dir of chain) {
16720
- const envPath = path43.join(dir, ".env");
16482
+ const envPath = path42.join(dir, ".env");
16721
16483
  if (existsSync4(envPath)) envFiles.push(envPath);
16722
16484
  }
16723
16485
  for (let i = 0; i < envFiles.length; i++) {
@@ -16898,8 +16660,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
16898
16660
  }
16899
16661
 
16900
16662
  // src/evaluation/cache/response-cache.ts
16901
- import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
16902
- import path44 from "node:path";
16663
+ import { mkdir as mkdir14, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
16664
+ import path43 from "node:path";
16903
16665
  var DEFAULT_CACHE_PATH = ".agentv/cache";
16904
16666
  var ResponseCache = class {
16905
16667
  cachePath;
@@ -16909,7 +16671,7 @@ var ResponseCache = class {
16909
16671
  async get(key) {
16910
16672
  const filePath = this.keyToPath(key);
16911
16673
  try {
16912
- const data = await readFile12(filePath, "utf8");
16674
+ const data = await readFile11(filePath, "utf8");
16913
16675
  return JSON.parse(data);
16914
16676
  } catch {
16915
16677
  return void 0;
@@ -16917,13 +16679,13 @@ var ResponseCache = class {
16917
16679
  }
16918
16680
  async set(key, value) {
16919
16681
  const filePath = this.keyToPath(key);
16920
- const dir = path44.dirname(filePath);
16682
+ const dir = path43.dirname(filePath);
16921
16683
  await mkdir14(dir, { recursive: true });
16922
16684
  await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
16923
16685
  }
16924
16686
  keyToPath(key) {
16925
16687
  const prefix = key.slice(0, 2);
16926
- return path44.join(this.cachePath, prefix, `${key}.json`);
16688
+ return path43.join(this.cachePath, prefix, `${key}.json`);
16927
16689
  }
16928
16690
  };
16929
16691
  function shouldEnableCache(params) {
@@ -17111,7 +16873,7 @@ var OtelTraceExporter = class {
17111
16873
  rootSpan.setAttribute("gen_ai.system", "agentv");
17112
16874
  rootSpan.setAttribute("agentv.test_id", result.testId);
17113
16875
  rootSpan.setAttribute("agentv.target", result.target);
17114
- if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
16876
+ if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
17115
16877
  rootSpan.setAttribute("agentv.score", result.score);
17116
16878
  if (captureContent && result.output.length > 0) {
17117
16879
  const lastMsg = result.output[result.output.length - 1];
@@ -17296,14 +17058,14 @@ var OtelStreamingObserver = class {
17296
17058
  // biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
17297
17059
  rootCtx = null;
17298
17060
  /** Create root eval span immediately (visible in backend right away) */
17299
- startEvalCase(testId, target, dataset) {
17061
+ startEvalCase(testId, target, evalSet) {
17300
17062
  const ctx = this.parentCtx ?? this.api.context.active();
17301
17063
  this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
17302
17064
  this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
17303
17065
  this.rootSpan.setAttribute("gen_ai.system", "agentv");
17304
17066
  this.rootSpan.setAttribute("agentv.test_id", testId);
17305
17067
  this.rootSpan.setAttribute("agentv.target", target);
17306
- if (dataset) this.rootSpan.setAttribute("agentv.dataset", dataset);
17068
+ if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
17307
17069
  this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
17308
17070
  }
17309
17071
  /** Create and immediately export a tool span */
@@ -17477,6 +17239,7 @@ export {
17477
17239
  extractTargetsFromSuite,
17478
17240
  extractTargetsFromTestCase,
17479
17241
  extractTrialsConfig,
17242
+ extractWorkersFromSuite,
17480
17243
  fileExists,
17481
17244
  findGitRoot,
17482
17245
  freeformEvaluationSchema,
@@ -17491,7 +17254,6 @@ export {
17491
17254
  initializeBaseline,
17492
17255
  isAgentSkillsFormat,
17493
17256
  isEvaluatorKind,
17494
- isGuidelineFile,
17495
17257
  isJsonObject,
17496
17258
  isJsonValue,
17497
17259
  isNonEmptyString,