@agentv/core 3.7.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,21 +2,24 @@ import {
2
2
  TEST_MESSAGE_ROLES,
3
3
  buildDirectoryChain,
4
4
  buildSearchRoots,
5
+ expandFileReferences,
5
6
  extractLastAssistantContent,
6
7
  fileExists,
7
8
  findGitRoot,
9
+ interpolateEnv,
8
10
  isAgentProvider,
9
11
  isEvaluatorKind,
10
12
  isJsonObject,
11
13
  isJsonValue,
12
14
  isTestMessage,
13
15
  isTestMessageRole,
16
+ loadCasesFromFile,
14
17
  normalizeLineEndings,
15
18
  readJsonFile,
16
19
  readTextFile,
17
20
  resolveFileReference,
18
21
  resolveTargetDefinition
19
- } from "./chunk-2IZOTQ25.js";
22
+ } from "./chunk-PC5TLJF6.js";
20
23
  import {
21
24
  AgentvProvider
22
25
  } from "./chunk-W5YDZWT4.js";
@@ -146,30 +149,11 @@ function mergeExecutionMetrics(computed, metrics) {
146
149
  }
147
150
 
148
151
  // src/evaluation/yaml-parser.ts
149
- import { readFile as readFile8 } from "node:fs/promises";
150
- import path9 from "node:path";
151
- import micromatch3 from "micromatch";
152
+ import { readFile as readFile6 } from "node:fs/promises";
153
+ import path7 from "node:path";
154
+ import micromatch2 from "micromatch";
152
155
  import { parse as parse2 } from "yaml";
153
156
 
154
- // src/evaluation/interpolation.ts
155
- var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
156
- function interpolateEnv(value, env) {
157
- if (typeof value === "string") {
158
- return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
159
- }
160
- if (Array.isArray(value)) {
161
- return value.map((item) => interpolateEnv(item, env));
162
- }
163
- if (value !== null && typeof value === "object") {
164
- const result = {};
165
- for (const [key, val] of Object.entries(value)) {
166
- result[key] = interpolateEnv(val, env);
167
- }
168
- return result;
169
- }
170
- return value;
171
- }
172
-
173
157
  // src/evaluation/loaders/agent-skills-parser.ts
174
158
  import { readFile } from "node:fs/promises";
175
159
  import path from "node:path";
@@ -241,7 +225,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
241
225
  input_segments: [{ type: "text", value: prompt }],
242
226
  expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
243
227
  reference_answer: evalCase.expected_output,
244
- guideline_paths: [],
245
228
  file_paths: filePaths,
246
229
  criteria: evalCase.expected_output ?? "",
247
230
  assertions,
@@ -252,134 +235,15 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
252
235
  return tests;
253
236
  }
254
237
 
255
- // src/evaluation/loaders/case-file-loader.ts
256
- import { readFile as readFile2 } from "node:fs/promises";
257
- import path2 from "node:path";
258
- import fg from "fast-glob";
259
- import { parse as parseYaml } from "yaml";
260
- var ANSI_YELLOW = "\x1B[33m";
261
- var ANSI_RESET2 = "\x1B[0m";
262
- var FILE_PROTOCOL = "file://";
263
- function isFileReference(value) {
264
- return typeof value === "string" && value.startsWith(FILE_PROTOCOL);
265
- }
266
- function extractFilePath(ref) {
267
- return ref.slice(FILE_PROTOCOL.length);
268
- }
269
- function isGlobPattern(filePath) {
270
- return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
271
- }
272
- function parseYamlCases(content, filePath) {
273
- const raw = parseYaml(content);
274
- const parsed = interpolateEnv(raw, process.env);
275
- if (!Array.isArray(parsed)) {
276
- throw new Error(
277
- `External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
278
- );
279
- }
280
- const results = [];
281
- for (const item of parsed) {
282
- if (!isJsonObject(item)) {
283
- throw new Error(`External test file contains non-object entry: ${filePath}`);
284
- }
285
- results.push(item);
286
- }
287
- return results;
288
- }
289
- function parseJsonlCases(content, filePath) {
290
- const lines = content.split("\n");
291
- const results = [];
292
- for (let i = 0; i < lines.length; i++) {
293
- const line = lines[i].trim();
294
- if (line === "") continue;
295
- try {
296
- const raw = JSON.parse(line);
297
- const parsed = interpolateEnv(raw, process.env);
298
- if (!isJsonObject(parsed)) {
299
- throw new Error("Expected JSON object");
300
- }
301
- results.push(parsed);
302
- } catch (error) {
303
- const message = error instanceof Error ? error.message : String(error);
304
- throw new Error(`Malformed JSONL at line ${i + 1}: ${message}
305
- File: ${filePath}`);
306
- }
307
- }
308
- return results;
309
- }
310
- async function loadCasesFromFile(filePath) {
311
- const ext = path2.extname(filePath).toLowerCase();
312
- let content;
313
- try {
314
- content = await readFile2(filePath, "utf8");
315
- } catch (error) {
316
- const message = error instanceof Error ? error.message : String(error);
317
- throw new Error(`Cannot read external test file: ${filePath}
318
- ${message}`);
319
- }
320
- if (content.trim() === "") {
321
- console.warn(
322
- `${ANSI_YELLOW}Warning: External test file is empty, skipping: ${filePath}${ANSI_RESET2}`
323
- );
324
- return [];
325
- }
326
- if (ext === ".yaml" || ext === ".yml") {
327
- return parseYamlCases(content, filePath);
328
- }
329
- if (ext === ".jsonl") {
330
- return parseJsonlCases(content, filePath);
331
- }
332
- throw new Error(
333
- `Unsupported external test file format '${ext}': ${filePath}. Supported: .yaml, .yml, .jsonl`
334
- );
335
- }
336
- async function resolveFileReference2(ref, evalFileDir) {
337
- const rawPath = extractFilePath(ref);
338
- const absolutePattern = path2.resolve(evalFileDir, rawPath);
339
- if (isGlobPattern(rawPath)) {
340
- const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
341
- onlyFiles: true,
342
- absolute: true
343
- });
344
- if (matches.length === 0) {
345
- console.warn(
346
- `${ANSI_YELLOW}Warning: Glob pattern matched no files: ${ref} (resolved to ${absolutePattern})${ANSI_RESET2}`
347
- );
348
- return [];
349
- }
350
- matches.sort();
351
- const allCases = [];
352
- for (const match of matches) {
353
- const cases = await loadCasesFromFile(match);
354
- allCases.push(...cases);
355
- }
356
- return allCases;
357
- }
358
- return loadCasesFromFile(absolutePattern);
359
- }
360
- async function expandFileReferences(tests, evalFileDir) {
361
- const expanded = [];
362
- for (const entry of tests) {
363
- if (isFileReference(entry)) {
364
- const cases = await resolveFileReference2(entry, evalFileDir);
365
- expanded.push(...cases);
366
- } else {
367
- expanded.push(entry);
368
- }
369
- }
370
- return expanded;
371
- }
372
-
373
238
  // src/evaluation/loaders/config-loader.ts
374
- import { readFile as readFile3 } from "node:fs/promises";
375
- import path4 from "node:path";
376
- import micromatch from "micromatch";
239
+ import { readFile as readFile2 } from "node:fs/promises";
240
+ import path3 from "node:path";
377
241
  import { parse } from "yaml";
378
242
 
379
243
  // src/evaluation/loaders/file-resolver.ts
380
244
  import { constants } from "node:fs";
381
245
  import { access } from "node:fs/promises";
382
- import path3 from "node:path";
246
+ import path2 from "node:path";
383
247
  import { fileURLToPath } from "node:url";
384
248
  async function fileExists2(absolutePath) {
385
249
  try {
@@ -397,15 +261,15 @@ function resolveToAbsolutePath(candidate) {
397
261
  if (candidate.startsWith("file:")) {
398
262
  return fileURLToPath(candidate);
399
263
  }
400
- return path3.resolve(candidate);
264
+ return path2.resolve(candidate);
401
265
  }
402
266
  throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
403
267
  }
404
268
  function buildDirectoryChain2(filePath, repoRoot) {
405
269
  const directories = [];
406
270
  const seen = /* @__PURE__ */ new Set();
407
- const boundary = path3.resolve(repoRoot);
408
- let current = path3.resolve(path3.dirname(filePath));
271
+ const boundary = path2.resolve(repoRoot);
272
+ let current = path2.resolve(path2.dirname(filePath));
409
273
  while (current !== void 0) {
410
274
  if (!seen.has(current)) {
411
275
  directories.push(current);
@@ -414,7 +278,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
414
278
  if (current === boundary) {
415
279
  break;
416
280
  }
417
- const parent = path3.dirname(current);
281
+ const parent = path2.dirname(current);
418
282
  if (parent === current) {
419
283
  break;
420
284
  }
@@ -428,16 +292,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
428
292
  function buildSearchRoots2(evalPath, repoRoot) {
429
293
  const uniqueRoots = [];
430
294
  const addRoot = (root) => {
431
- const normalized = path3.resolve(root);
295
+ const normalized = path2.resolve(root);
432
296
  if (!uniqueRoots.includes(normalized)) {
433
297
  uniqueRoots.push(normalized);
434
298
  }
435
299
  };
436
- let currentDir = path3.dirname(evalPath);
300
+ let currentDir = path2.dirname(evalPath);
437
301
  let reachedBoundary = false;
438
302
  while (!reachedBoundary) {
439
303
  addRoot(currentDir);
440
- const parentDir = path3.dirname(currentDir);
304
+ const parentDir = path2.dirname(currentDir);
441
305
  if (currentDir === repoRoot || parentDir === currentDir) {
442
306
  reachedBoundary = true;
443
307
  } else {
@@ -452,19 +316,19 @@ function trimLeadingSeparators(value) {
452
316
  const trimmed = value.replace(/^[/\\]+/, "");
453
317
  return trimmed.length > 0 ? trimmed : value;
454
318
  }
455
- async function resolveFileReference3(rawValue, searchRoots) {
319
+ async function resolveFileReference2(rawValue, searchRoots) {
456
320
  const displayPath = trimLeadingSeparators(rawValue);
457
321
  const potentialPaths = [];
458
- if (path3.isAbsolute(rawValue)) {
459
- potentialPaths.push(path3.normalize(rawValue));
322
+ if (path2.isAbsolute(rawValue)) {
323
+ potentialPaths.push(path2.normalize(rawValue));
460
324
  }
461
325
  for (const base of searchRoots) {
462
- potentialPaths.push(path3.resolve(base, displayPath));
326
+ potentialPaths.push(path2.resolve(base, displayPath));
463
327
  }
464
328
  const attempted = [];
465
329
  const seen = /* @__PURE__ */ new Set();
466
330
  for (const candidate of potentialPaths) {
467
- const absoluteCandidate = path3.resolve(candidate);
331
+ const absoluteCandidate = path2.resolve(candidate);
468
332
  if (seen.has(absoluteCandidate)) {
469
333
  continue;
470
334
  }
@@ -478,8 +342,8 @@ async function resolveFileReference3(rawValue, searchRoots) {
478
342
  }
479
343
 
480
344
  // src/evaluation/loaders/config-loader.ts
481
- var ANSI_YELLOW2 = "\x1B[33m";
482
- var ANSI_RESET3 = "\x1B[0m";
345
+ var ANSI_YELLOW = "\x1B[33m";
346
+ var ANSI_RESET2 = "\x1B[0m";
483
347
  var DEFAULT_EVAL_PATTERNS = [
484
348
  "**/evals/**/*.eval.yaml",
485
349
  "**/evals/**/eval.yaml"
@@ -487,12 +351,12 @@ var DEFAULT_EVAL_PATTERNS = [
487
351
  async function loadConfig(evalFilePath, repoRoot) {
488
352
  const directories = buildDirectoryChain2(evalFilePath, repoRoot);
489
353
  for (const directory of directories) {
490
- const configPath = path4.join(directory, ".agentv", "config.yaml");
354
+ const configPath = path3.join(directory, ".agentv", "config.yaml");
491
355
  if (!await fileExists2(configPath)) {
492
356
  continue;
493
357
  }
494
358
  try {
495
- const rawConfig = await readFile3(configPath, "utf8");
359
+ const rawConfig = await readFile2(configPath, "utf8");
496
360
  const parsed = parse(rawConfig);
497
361
  if (!isJsonObject(parsed)) {
498
362
  logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
@@ -504,15 +368,6 @@ async function loadConfig(evalFilePath, repoRoot) {
504
368
  logWarning(`Invalid required_version in ${configPath}, expected string`);
505
369
  continue;
506
370
  }
507
- const guidelinePatterns = config.guideline_patterns;
508
- if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
509
- logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
510
- continue;
511
- }
512
- if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
513
- logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
514
- continue;
515
- }
516
371
  const evalPatterns = config.eval_patterns;
517
372
  if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
518
373
  logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
@@ -528,7 +383,6 @@ async function loadConfig(evalFilePath, repoRoot) {
528
383
  );
529
384
  return {
530
385
  required_version: requiredVersion,
531
- guideline_patterns: guidelinePatterns,
532
386
  eval_patterns: evalPatterns,
533
387
  execution: executionDefaults
534
388
  };
@@ -540,11 +394,6 @@ async function loadConfig(evalFilePath, repoRoot) {
540
394
  }
541
395
  return null;
542
396
  }
543
- function isGuidelineFile(filePath, patterns) {
544
- const normalized = filePath.split("\\").join("/");
545
- const patternsToUse = patterns ?? [];
546
- return micromatch.isMatch(normalized, patternsToUse);
547
- }
548
397
  function extractTargetFromSuite(suite) {
549
398
  const execution = suite.execution;
550
399
  if (execution && typeof execution === "object" && !Array.isArray(execution)) {
@@ -729,14 +578,14 @@ function parseExecutionDefaults(raw, configPath) {
729
578
  return Object.keys(result).length > 0 ? result : void 0;
730
579
  }
731
580
  function logWarning(message) {
732
- console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
581
+ console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET2}`);
733
582
  }
734
583
 
735
584
  // src/evaluation/loaders/evaluator-parser.ts
736
- import path5 from "node:path";
585
+ import path4 from "node:path";
737
586
 
738
587
  // src/evaluation/validation/prompt-validator.ts
739
- import { readFile as readFile4 } from "node:fs/promises";
588
+ import { readFile as readFile3 } from "node:fs/promises";
740
589
 
741
590
  // src/evaluation/template-variables.ts
742
591
  var TEMPLATE_VARIABLES = {
@@ -756,10 +605,10 @@ var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
756
605
  ]);
757
606
 
758
607
  // src/evaluation/validation/prompt-validator.ts
759
- var ANSI_YELLOW3 = "\x1B[33m";
760
- var ANSI_RESET4 = "\x1B[0m";
608
+ var ANSI_YELLOW2 = "\x1B[33m";
609
+ var ANSI_RESET3 = "\x1B[0m";
761
610
  async function validateCustomPromptContent(promptPath) {
762
- const content = await readFile4(promptPath, "utf8");
611
+ const content = await readFile3(promptPath, "utf8");
763
612
  validateTemplateVariables(content, promptPath);
764
613
  }
765
614
  function validateTemplateVariables(content, source) {
@@ -786,16 +635,16 @@ function validateTemplateVariables(content, source) {
786
635
  );
787
636
  }
788
637
  if (invalidVariables.length > 0) {
789
- const warningMessage = `${ANSI_YELLOW3}Warning: Custom evaluator template at ${source}
638
+ const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
790
639
  Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
791
- Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET4}`;
640
+ Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET3}`;
792
641
  console.warn(warningMessage);
793
642
  }
794
643
  }
795
644
 
796
645
  // src/evaluation/loaders/evaluator-parser.ts
797
- var ANSI_YELLOW4 = "\x1B[33m";
798
- var ANSI_RESET5 = "\x1B[0m";
646
+ var ANSI_YELLOW3 = "\x1B[33m";
647
+ var ANSI_RESET4 = "\x1B[0m";
799
648
  function normalizeEvaluatorType(type) {
800
649
  return type.replace(/_/g, "-");
801
650
  }
@@ -897,7 +746,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
897
746
  let command;
898
747
  if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
899
748
  console.warn(
900
- `${ANSI_YELLOW4}Warning: 'script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'command' instead.${ANSI_RESET5}`
749
+ `${ANSI_YELLOW3}Warning: 'script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'command' instead.${ANSI_RESET4}`
901
750
  );
902
751
  }
903
752
  const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
@@ -923,9 +772,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
923
772
  const cwd = asString(rawEvaluator.cwd);
924
773
  let resolvedCwd;
925
774
  if (cwd) {
926
- const resolved = await resolveFileReference3(cwd, searchRoots);
775
+ const resolved = await resolveFileReference2(cwd, searchRoots);
927
776
  if (resolved.resolvedPath) {
928
- resolvedCwd = path5.resolve(resolved.resolvedPath);
777
+ resolvedCwd = path4.resolve(resolved.resolvedPath);
929
778
  } else {
930
779
  logWarning2(
931
780
  `Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
@@ -1081,9 +930,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1081
930
  const aggregatorPrompt = asString(rawAggregator.prompt);
1082
931
  let promptPath2;
1083
932
  if (aggregatorPrompt) {
1084
- const resolved = await resolveFileReference3(aggregatorPrompt, searchRoots);
933
+ const resolved = await resolveFileReference2(aggregatorPrompt, searchRoots);
1085
934
  if (resolved.resolvedPath) {
1086
- promptPath2 = path5.resolve(resolved.resolvedPath);
935
+ promptPath2 = path4.resolve(resolved.resolvedPath);
1087
936
  }
1088
937
  }
1089
938
  aggregator = {
@@ -1640,7 +1489,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1640
1489
  if (isJsonObject2(rawPrompt)) {
1641
1490
  if (rawPrompt.script !== void 0 && rawPrompt.command === void 0) {
1642
1491
  console.warn(
1643
- `${ANSI_YELLOW4}Warning: 'prompt.script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET5}`
1492
+ `${ANSI_YELLOW3}Warning: 'prompt.script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET4}`
1644
1493
  );
1645
1494
  }
1646
1495
  const commandArray = asStringArray(
@@ -1651,9 +1500,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1651
1500
  throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
1652
1501
  }
1653
1502
  const commandPath = commandArray[commandArray.length - 1];
1654
- const resolved = await resolveFileReference3(commandPath, searchRoots);
1503
+ const resolved = await resolveFileReference2(commandPath, searchRoots);
1655
1504
  if (resolved.resolvedPath) {
1656
- resolvedPromptScript = [...commandArray.slice(0, -1), path5.resolve(resolved.resolvedPath)];
1505
+ resolvedPromptScript = [...commandArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
1657
1506
  } else {
1658
1507
  throw new Error(
1659
1508
  `Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
@@ -1664,9 +1513,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1664
1513
  }
1665
1514
  } else if (typeof rawPrompt === "string") {
1666
1515
  prompt = rawPrompt;
1667
- const resolved = await resolveFileReference3(prompt, searchRoots);
1516
+ const resolved = await resolveFileReference2(prompt, searchRoots);
1668
1517
  if (resolved.resolvedPath) {
1669
- promptPath = path5.resolve(resolved.resolvedPath);
1518
+ promptPath = path4.resolve(resolved.resolvedPath);
1670
1519
  try {
1671
1520
  await validateCustomPromptContent(promptPath);
1672
1521
  } catch (error) {
@@ -1866,10 +1715,10 @@ function warnUnconsumedCriteria(_criteria, _evaluators, _testId) {
1866
1715
  function logWarning2(message, details) {
1867
1716
  if (details && details.length > 0) {
1868
1717
  const detailBlock = details.join("\n");
1869
- console.warn(`${ANSI_YELLOW4}Warning: ${message}
1870
- ${detailBlock}${ANSI_RESET5}`);
1718
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}
1719
+ ${detailBlock}${ANSI_RESET4}`);
1871
1720
  } else {
1872
- console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
1721
+ console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
1873
1722
  }
1874
1723
  }
1875
1724
  function parseRequired(value) {
@@ -2118,14 +1967,14 @@ function parseInlineRubrics(rawRubrics) {
2118
1967
  }
2119
1968
 
2120
1969
  // src/evaluation/loaders/jsonl-parser.ts
2121
- import { readFile as readFile6 } from "node:fs/promises";
2122
- import path7 from "node:path";
2123
- import micromatch2 from "micromatch";
2124
- import { parse as parseYaml2 } from "yaml";
2125
-
2126
- // src/evaluation/loaders/message-processor.ts
2127
1970
  import { readFile as readFile5 } from "node:fs/promises";
2128
1971
  import path6 from "node:path";
1972
+ import micromatch from "micromatch";
1973
+ import { parse as parseYaml } from "yaml";
1974
+
1975
+ // src/evaluation/loaders/message-processor.ts
1976
+ import { readFile as readFile4 } from "node:fs/promises";
1977
+ import path5 from "node:path";
2129
1978
 
2130
1979
  // src/evaluation/formatting/segment-formatter.ts
2131
1980
  function formatFileContents(parts) {
@@ -2147,10 +1996,6 @@ function formatSegment(segment, mode = "lm") {
2147
1996
  if (type === "text") {
2148
1997
  return asString2(segment.value);
2149
1998
  }
2150
- if (type === "guideline_ref") {
2151
- const refPath = asString2(segment.path);
2152
- return refPath ? `<Attached: ${refPath}>` : void 0;
2153
- }
2154
1999
  if (type === "file") {
2155
2000
  const filePath = asString2(segment.path);
2156
2001
  if (!filePath) {
@@ -2173,9 +2018,6 @@ function hasVisibleContent(segments) {
2173
2018
  const value = asString2(segment.value);
2174
2019
  return value !== void 0 && value.trim().length > 0;
2175
2020
  }
2176
- if (type === "guideline_ref") {
2177
- return false;
2178
- }
2179
2021
  if (type === "file") {
2180
2022
  const text = asString2(segment.text);
2181
2023
  return text !== void 0 && text.trim().length > 0;
@@ -2188,20 +2030,10 @@ function asString2(value) {
2188
2030
  }
2189
2031
 
2190
2032
  // src/evaluation/loaders/message-processor.ts
2191
- var ANSI_YELLOW5 = "\x1B[33m";
2192
- var ANSI_RESET6 = "\x1B[0m";
2033
+ var ANSI_YELLOW4 = "\x1B[33m";
2034
+ var ANSI_RESET5 = "\x1B[0m";
2193
2035
  async function processMessages(options) {
2194
- const {
2195
- messages,
2196
- searchRoots,
2197
- repoRootPath,
2198
- guidelinePatterns,
2199
- guidelinePaths,
2200
- treatFileSegmentsAsGuidelines,
2201
- textParts,
2202
- messageType,
2203
- verbose
2204
- } = options;
2036
+ const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
2205
2037
  const segments = [];
2206
2038
  for (const message of messages) {
2207
2039
  const content = message.content;
@@ -2233,7 +2065,7 @@ async function processMessages(options) {
2233
2065
  if (!rawValue) {
2234
2066
  continue;
2235
2067
  }
2236
- const { displayPath, resolvedPath, attempted } = await resolveFileReference3(
2068
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
2237
2069
  rawValue,
2238
2070
  searchRoots
2239
2071
  );
@@ -2244,27 +2076,12 @@ async function processMessages(options) {
2244
2076
  continue;
2245
2077
  }
2246
2078
  try {
2247
- const fileContent = (await readFile5(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2248
- const classifyAsGuideline = shouldTreatAsGuideline({
2249
- messageType,
2250
- resolvedPath,
2251
- repoRootPath,
2252
- guidelinePatterns,
2253
- treatFileSegmentsAsGuidelines
2254
- });
2255
- if (classifyAsGuideline && guidelinePaths) {
2256
- guidelinePaths.push(path6.resolve(resolvedPath));
2257
- if (verbose) {
2258
- console.log(` [Guideline] Found: ${displayPath}`);
2259
- console.log(` Resolved to: ${resolvedPath}`);
2260
- }
2261
- continue;
2262
- }
2079
+ const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2263
2080
  segments.push({
2264
2081
  type: "file",
2265
2082
  path: displayPath,
2266
2083
  text: fileContent,
2267
- resolvedPath: path6.resolve(resolvedPath)
2084
+ resolvedPath: path5.resolve(resolvedPath)
2268
2085
  });
2269
2086
  if (verbose) {
2270
2087
  const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -2287,26 +2104,6 @@ async function processMessages(options) {
2287
2104
  }
2288
2105
  return segments;
2289
2106
  }
2290
- function shouldTreatAsGuideline(options) {
2291
- const {
2292
- messageType,
2293
- resolvedPath,
2294
- repoRootPath,
2295
- guidelinePatterns,
2296
- treatFileSegmentsAsGuidelines
2297
- } = options;
2298
- if (messageType !== "input") {
2299
- return false;
2300
- }
2301
- if (treatFileSegmentsAsGuidelines) {
2302
- return true;
2303
- }
2304
- if (!guidelinePatterns || guidelinePatterns.length === 0) {
2305
- return false;
2306
- }
2307
- const relativeToRepo = path6.relative(repoRootPath, resolvedPath);
2308
- return isGuidelineFile(relativeToRepo, guidelinePatterns);
2309
- }
2310
2107
  function asString3(value) {
2311
2108
  return typeof value === "string" ? value : void 0;
2312
2109
  }
@@ -2332,10 +2129,10 @@ function cloneJsonValue(value) {
2332
2129
  function logWarning3(message, details) {
2333
2130
  if (details && details.length > 0) {
2334
2131
  const detailBlock = details.join("\n");
2335
- console.warn(`${ANSI_YELLOW5}Warning: ${message}
2336
- ${detailBlock}${ANSI_RESET6}`);
2132
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}
2133
+ ${detailBlock}${ANSI_RESET5}`);
2337
2134
  } else {
2338
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET6}`);
2135
+ console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
2339
2136
  }
2340
2137
  }
2341
2138
  async function processExpectedMessages(options) {
@@ -2364,7 +2161,7 @@ async function processExpectedMessages(options) {
2364
2161
  if (!rawValue) {
2365
2162
  continue;
2366
2163
  }
2367
- const { displayPath, resolvedPath, attempted } = await resolveFileReference3(
2164
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
2368
2165
  rawValue,
2369
2166
  searchRoots
2370
2167
  );
@@ -2374,12 +2171,12 @@ async function processExpectedMessages(options) {
2374
2171
  continue;
2375
2172
  }
2376
2173
  try {
2377
- const fileContent = (await readFile5(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2174
+ const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2378
2175
  processedContent.push({
2379
2176
  type: "file",
2380
2177
  path: displayPath,
2381
2178
  text: fileContent,
2382
- resolvedPath: path6.resolve(resolvedPath)
2179
+ resolvedPath: path5.resolve(resolvedPath)
2383
2180
  });
2384
2181
  if (verbose) {
2385
2182
  console.log(` [Expected Output File] Found: ${displayPath}`);
@@ -2476,11 +2273,11 @@ function resolveExpectedMessages(raw) {
2476
2273
  }
2477
2274
 
2478
2275
  // src/evaluation/loaders/jsonl-parser.ts
2479
- var ANSI_YELLOW6 = "\x1B[33m";
2276
+ var ANSI_YELLOW5 = "\x1B[33m";
2480
2277
  var ANSI_RED2 = "\x1B[31m";
2481
- var ANSI_RESET7 = "\x1B[0m";
2278
+ var ANSI_RESET6 = "\x1B[0m";
2482
2279
  function detectFormat(filePath) {
2483
- const ext = path7.extname(filePath).toLowerCase();
2280
+ const ext = path6.extname(filePath).toLowerCase();
2484
2281
  if (ext === ".jsonl") return "jsonl";
2485
2282
  if (ext === ".yaml" || ext === ".yml") return "yaml";
2486
2283
  if (ext === ".json") return "agent-skills-json";
@@ -2489,9 +2286,9 @@ function detectFormat(filePath) {
2489
2286
  );
2490
2287
  }
2491
2288
  async function loadSidecarMetadata(jsonlPath, verbose) {
2492
- const dir = path7.dirname(jsonlPath);
2493
- const base = path7.basename(jsonlPath, ".jsonl");
2494
- const sidecarPath = path7.join(dir, `${base}.yaml`);
2289
+ const dir = path6.dirname(jsonlPath);
2290
+ const base = path6.basename(jsonlPath, ".jsonl");
2291
+ const sidecarPath = path6.join(dir, `${base}.yaml`);
2495
2292
  if (!await fileExists2(sidecarPath)) {
2496
2293
  if (verbose) {
2497
2294
  logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
@@ -2499,15 +2296,15 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
2499
2296
  return {};
2500
2297
  }
2501
2298
  try {
2502
- const content = await readFile6(sidecarPath, "utf8");
2503
- const parsed = interpolateEnv(parseYaml2(content), process.env);
2299
+ const content = await readFile5(sidecarPath, "utf8");
2300
+ const parsed = interpolateEnv(parseYaml(content), process.env);
2504
2301
  if (!isJsonObject(parsed)) {
2505
2302
  logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
2506
2303
  return {};
2507
2304
  }
2508
2305
  return {
2509
2306
  description: asString4(parsed.description),
2510
- dataset: asString4(parsed.dataset),
2307
+ name: asString4(parsed.name),
2511
2308
  execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
2512
2309
  evaluator: parsed.evaluator
2513
2310
  };
@@ -2540,23 +2337,21 @@ function parseJsonlContent(content, filePath) {
2540
2337
  async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2541
2338
  const verbose = options?.verbose ?? false;
2542
2339
  const filterPattern = options?.filter;
2543
- const absoluteTestPath = path7.resolve(evalFilePath);
2340
+ const absoluteTestPath = path6.resolve(evalFilePath);
2544
2341
  const repoRootPath = resolveToAbsolutePath(repoRoot);
2545
2342
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
2546
- const config = await loadConfig(absoluteTestPath, repoRootPath);
2547
- const guidelinePatterns = config?.guideline_patterns;
2548
2343
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
2549
- const rawFile = await readFile6(absoluteTestPath, "utf8");
2344
+ const rawFile = await readFile5(absoluteTestPath, "utf8");
2550
2345
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
2551
- const fallbackDataset = path7.basename(absoluteTestPath, ".jsonl") || "eval";
2552
- const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
2346
+ const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
2347
+ const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
2553
2348
  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
2554
2349
  const globalExecution = sidecar.execution;
2555
2350
  if (verbose) {
2556
2351
  console.log(`
2557
2352
  [JSONL Dataset: ${evalFilePath}]`);
2558
2353
  console.log(` Cases: ${rawCases.length}`);
2559
- console.log(` Dataset name: ${datasetName}`);
2354
+ console.log(` Eval set: ${evalSetName}`);
2560
2355
  if (sidecar.description) {
2561
2356
  console.log(` Description: ${sidecar.description}`);
2562
2357
  }
@@ -2566,7 +2361,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2566
2361
  const evalcase = rawCases[lineIndex];
2567
2362
  const lineNumber = lineIndex + 1;
2568
2363
  const id = asString4(evalcase.id);
2569
- if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
2364
+ if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
2570
2365
  continue;
2571
2366
  }
2572
2367
  const conversationId = asString4(evalcase.conversation_id);
@@ -2589,14 +2384,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2589
2384
  continue;
2590
2385
  }
2591
2386
  const hasExpectedMessages = expectedMessages.length > 0;
2592
- const guidelinePaths = [];
2593
2387
  const inputTextParts = [];
2594
2388
  const inputSegments = await processMessages({
2595
2389
  messages: inputMessages,
2596
2390
  searchRoots,
2597
2391
  repoRootPath,
2598
- guidelinePatterns,
2599
- guidelinePaths,
2600
2392
  textParts: inputTextParts,
2601
2393
  messageType: "input",
2602
2394
  verbose
@@ -2646,40 +2438,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2646
2438
  userFilePaths.push(segment.resolvedPath);
2647
2439
  }
2648
2440
  }
2649
- const allFilePaths = [
2650
- ...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
2651
- ...userFilePaths
2652
- ];
2653
2441
  const testCase = {
2654
2442
  id,
2655
- dataset: datasetName,
2443
+ eval_set: evalSetName,
2656
2444
  conversation_id: conversationId,
2657
2445
  question,
2658
2446
  input: inputMessages,
2659
2447
  input_segments: inputSegments,
2660
2448
  expected_output: outputSegments,
2661
2449
  reference_answer: referenceAnswer,
2662
- guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
2663
- guideline_patterns: guidelinePatterns,
2664
- file_paths: allFilePaths,
2450
+ file_paths: userFilePaths,
2665
2451
  criteria: outcome ?? "",
2666
2452
  evaluator: evalCaseEvaluatorKind,
2667
2453
  assertions: evaluators
2668
2454
  };
2669
- if (verbose) {
2670
- console.log(`
2671
- [Test: ${id}]`);
2672
- if (testCase.guideline_paths.length > 0) {
2673
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
2674
- for (const guidelinePath of testCase.guideline_paths) {
2675
- console.log(` - ${guidelinePath}`);
2676
- }
2677
- } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
2678
- console.log(" No guidelines found (guideline_patterns not configured)");
2679
- } else {
2680
- console.log(" No guidelines found");
2681
- }
2682
- }
2683
2455
  results.push(testCase);
2684
2456
  }
2685
2457
  return results;
@@ -2690,19 +2462,19 @@ function asString4(value) {
2690
2462
  function logWarning4(message, details) {
2691
2463
  if (details && details.length > 0) {
2692
2464
  const detailBlock = details.join("\n");
2693
- console.warn(`${ANSI_YELLOW6}Warning: ${message}
2694
- ${detailBlock}${ANSI_RESET7}`);
2465
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
2466
+ ${detailBlock}${ANSI_RESET6}`);
2695
2467
  } else {
2696
- console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
2468
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET6}`);
2697
2469
  }
2698
2470
  }
2699
2471
  function logError2(message, details) {
2700
2472
  if (details && details.length > 0) {
2701
2473
  const detailBlock = details.join("\n");
2702
2474
  console.error(`${ANSI_RED2}Error: ${message}
2703
- ${detailBlock}${ANSI_RESET7}`);
2475
+ ${detailBlock}${ANSI_RESET6}`);
2704
2476
  } else {
2705
- console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
2477
+ console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET6}`);
2706
2478
  }
2707
2479
  }
2708
2480
 
@@ -2737,30 +2509,7 @@ function parseMetadata(suite) {
2737
2509
  }
2738
2510
 
2739
2511
  // src/evaluation/formatting/prompt-builder.ts
2740
- import { readFile as readFile7 } from "node:fs/promises";
2741
- import path8 from "node:path";
2742
- var ANSI_YELLOW7 = "\x1B[33m";
2743
- var ANSI_RESET8 = "\x1B[0m";
2744
2512
  async function buildPromptInputs(testCase, mode = "lm") {
2745
- const guidelineParts = [];
2746
- for (const rawPath of testCase.guideline_paths) {
2747
- const absolutePath = path8.resolve(rawPath);
2748
- if (!await fileExists2(absolutePath)) {
2749
- logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
2750
- continue;
2751
- }
2752
- try {
2753
- const content = (await readFile7(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
2754
- guidelineParts.push({
2755
- content,
2756
- isFile: true,
2757
- displayPath: path8.basename(absolutePath)
2758
- });
2759
- } catch (error) {
2760
- logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
2761
- }
2762
- }
2763
- const guidelines = formatFileContents(guidelineParts);
2764
2513
  const segmentsByMessage = [];
2765
2514
  const fileContentsByPath = /* @__PURE__ */ new Map();
2766
2515
  for (const segment of testCase.input_segments) {
@@ -2785,10 +2534,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
2785
2534
  if (type === "file") {
2786
2535
  const value = asString5(segment.value);
2787
2536
  if (!value) continue;
2788
- if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
2789
- messageSegments.push({ type: "guideline_ref", path: value });
2790
- continue;
2791
- }
2792
2537
  const fileText = fileContentsByPath.get(value);
2793
2538
  if (fileText !== void 0) {
2794
2539
  messageSegments.push({ type: "file", text: fileText, path: value });
@@ -2837,10 +2582,6 @@ ${messageContent}`);
2837
2582
  } else {
2838
2583
  const questionParts = [];
2839
2584
  for (const segment of testCase.input_segments) {
2840
- if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
2841
- questionParts.push(`<Attached: ${segment.path}>`);
2842
- continue;
2843
- }
2844
2585
  const formattedContent = formatSegment(segment, mode);
2845
2586
  if (formattedContent) {
2846
2587
  questionParts.push(formattedContent);
@@ -2851,11 +2592,9 @@ ${messageContent}`);
2851
2592
  const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
2852
2593
  messages: testCase.input,
2853
2594
  segmentsByMessage,
2854
- guidelinePatterns: testCase.guideline_patterns,
2855
- guidelineContent: guidelines,
2856
2595
  mode
2857
2596
  }) : void 0;
2858
- return { question, guidelines, chatPrompt };
2597
+ return { question, chatPrompt };
2859
2598
  }
2860
2599
  function needsRoleMarkers(messages, processedSegmentsByMessage) {
2861
2600
  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
@@ -2870,14 +2609,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
2870
2609
  return messagesWithContent > 1;
2871
2610
  }
2872
2611
  function buildChatPromptFromSegments(options) {
2873
- const {
2874
- messages,
2875
- segmentsByMessage,
2876
- guidelinePatterns,
2877
- guidelineContent,
2878
- systemPrompt,
2879
- mode = "lm"
2880
- } = options;
2612
+ const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
2881
2613
  if (messages.length === 0) {
2882
2614
  return void 0;
2883
2615
  }
@@ -2885,11 +2617,6 @@ function buildChatPromptFromSegments(options) {
2885
2617
  if (systemPrompt && systemPrompt.trim().length > 0) {
2886
2618
  systemSegments.push(systemPrompt.trim());
2887
2619
  }
2888
- if (guidelineContent && guidelineContent.trim().length > 0) {
2889
- systemSegments.push(`[[ ## Guidelines ## ]]
2890
-
2891
- ${guidelineContent.trim()}`);
2892
- }
2893
2620
  let startIndex = 0;
2894
2621
  while (startIndex < messages.length && messages[startIndex].role === "system") {
2895
2622
  const segments = segmentsByMessage[startIndex];
@@ -2925,15 +2652,8 @@ ${guidelineContent.trim()}`);
2925
2652
  contentParts.push("@[Tool]:");
2926
2653
  }
2927
2654
  for (const segment of segments) {
2928
- if (segment.type === "guideline_ref") {
2929
- continue;
2930
- }
2931
2655
  const formatted = formatSegment(segment, mode);
2932
2656
  if (formatted) {
2933
- const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
2934
- if (isGuidelineRef) {
2935
- continue;
2936
- }
2937
2657
  contentParts.push(formatted);
2938
2658
  }
2939
2659
  }
@@ -2951,30 +2671,27 @@ ${guidelineContent.trim()}`);
2951
2671
  function asString5(value) {
2952
2672
  return typeof value === "string" ? value : void 0;
2953
2673
  }
2954
- function logWarning5(message) {
2955
- console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
2956
- }
2957
2674
 
2958
2675
  // src/evaluation/yaml-parser.ts
2959
- var ANSI_YELLOW8 = "\x1B[33m";
2676
+ var ANSI_YELLOW6 = "\x1B[33m";
2960
2677
  var ANSI_RED3 = "\x1B[31m";
2961
- var ANSI_RESET9 = "\x1B[0m";
2678
+ var ANSI_RESET7 = "\x1B[0m";
2962
2679
  function resolveTests(suite) {
2963
2680
  if (suite.tests !== void 0) return suite.tests;
2964
2681
  if (suite.eval_cases !== void 0) {
2965
- logWarning6("'eval_cases' is deprecated. Use 'tests' instead.");
2682
+ logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
2966
2683
  return suite.eval_cases;
2967
2684
  }
2968
2685
  if (suite.evalcases !== void 0) {
2969
- logWarning6("'evalcases' is deprecated. Use 'tests' instead.");
2686
+ logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
2970
2687
  return suite.evalcases;
2971
2688
  }
2972
2689
  return void 0;
2973
2690
  }
2974
2691
  async function readTestSuiteMetadata(testFilePath) {
2975
2692
  try {
2976
- const absolutePath = path9.resolve(testFilePath);
2977
- const content = await readFile8(absolutePath, "utf8");
2693
+ const absolutePath = path7.resolve(testFilePath);
2694
+ const content = await readFile6(absolutePath, "utf8");
2978
2695
  const parsed = interpolateEnv(parse2(content), process.env);
2979
2696
  if (!isJsonObject(parsed)) {
2980
2697
  return {};
@@ -3025,26 +2742,25 @@ var loadEvalCases = loadTests;
3025
2742
  async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3026
2743
  const verbose = options?.verbose ?? false;
3027
2744
  const filterPattern = options?.filter;
3028
- const absoluteTestPath = path9.resolve(evalFilePath);
2745
+ const absoluteTestPath = path7.resolve(evalFilePath);
3029
2746
  const repoRootPath = resolveToAbsolutePath(repoRoot);
3030
2747
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
3031
2748
  const config = await loadConfig(absoluteTestPath, repoRootPath);
3032
- const guidelinePatterns = config?.guideline_patterns;
3033
- const rawFile = await readFile8(absoluteTestPath, "utf8");
2749
+ const rawFile = await readFile6(absoluteTestPath, "utf8");
3034
2750
  const interpolated = interpolateEnv(parse2(rawFile), process.env);
3035
2751
  if (!isJsonObject(interpolated)) {
3036
2752
  throw new Error(`Invalid test file format: ${evalFilePath}`);
3037
2753
  }
3038
2754
  const suite = interpolated;
3039
- const datasetNameFromSuite = asString6(suite.dataset)?.trim();
3040
- const fallbackDataset = path9.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
3041
- const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
2755
+ const evalSetNameFromSuite = asString6(suite.name)?.trim();
2756
+ const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
2757
+ const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
3042
2758
  const rawTestcases = resolveTests(suite);
3043
2759
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
3044
- const evalFileDir = path9.dirname(absoluteTestPath);
2760
+ const evalFileDir = path7.dirname(absoluteTestPath);
3045
2761
  let expandedTestcases;
3046
2762
  if (typeof rawTestcases === "string") {
3047
- const externalPath = path9.resolve(evalFileDir, rawTestcases);
2763
+ const externalPath = path7.resolve(evalFileDir, rawTestcases);
3048
2764
  expandedTestcases = await loadCasesFromFile(externalPath);
3049
2765
  } else if (Array.isArray(rawTestcases)) {
3050
2766
  expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
@@ -3058,18 +2774,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3058
2774
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
3059
2775
  const suiteAssertions = suite.assertions ?? suite.assert;
3060
2776
  if (suite.assert !== void 0 && suite.assertions === void 0) {
3061
- logWarning6("'assert' is deprecated at the suite level. Use 'assertions' instead.");
2777
+ logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
3062
2778
  }
3063
2779
  const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
3064
2780
  const results = [];
3065
2781
  for (const rawEvalcase of expandedTestcases) {
3066
2782
  if (!isJsonObject(rawEvalcase)) {
3067
- logWarning6("Skipping invalid test entry (expected object)");
2783
+ logWarning5("Skipping invalid test entry (expected object)");
3068
2784
  continue;
3069
2785
  }
3070
2786
  const evalcase = rawEvalcase;
3071
2787
  const id = asString6(evalcase.id);
3072
- if (filterPattern && (!id || !micromatch3.isMatch(id, filterPattern))) {
2788
+ if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
3073
2789
  continue;
3074
2790
  }
3075
2791
  const conversationId = asString6(evalcase.conversation_id);
@@ -3077,7 +2793,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3077
2793
  if (!outcome && evalcase.expected_outcome !== void 0) {
3078
2794
  outcome = asString6(evalcase.expected_outcome);
3079
2795
  if (outcome) {
3080
- logWarning6(
2796
+ logWarning5(
3081
2797
  `Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
3082
2798
  );
3083
2799
  }
@@ -3097,15 +2813,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3097
2813
  const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
3098
2814
  const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
3099
2815
  const hasExpectedMessages = expectedMessages.length > 0;
3100
- const guidelinePaths = [];
3101
2816
  const inputTextParts = [];
3102
2817
  const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
3103
2818
  messages: effectiveSuiteInputMessages,
3104
2819
  searchRoots,
3105
2820
  repoRootPath,
3106
- guidelinePatterns,
3107
- guidelinePaths,
3108
- treatFileSegmentsAsGuidelines: true,
3109
2821
  textParts: inputTextParts,
3110
2822
  messageType: "input",
3111
2823
  verbose
@@ -3114,8 +2826,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3114
2826
  messages: testInputMessages,
3115
2827
  searchRoots,
3116
2828
  repoRootPath,
3117
- guidelinePatterns,
3118
- guidelinePaths,
3119
2829
  textParts: inputTextParts,
3120
2830
  messageType: "input",
3121
2831
  verbose
@@ -3164,26 +2874,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3164
2874
  userFilePaths.push(segment.resolvedPath);
3165
2875
  }
3166
2876
  }
3167
- const allFilePaths = [
3168
- ...guidelinePaths.map((guidelinePath) => path9.resolve(guidelinePath)),
3169
- ...userFilePaths
3170
- ];
3171
2877
  const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
3172
2878
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
3173
2879
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
3174
2880
  const caseTargets = extractTargetsFromTestCase(evalcase);
3175
2881
  const testCase = {
3176
2882
  id,
3177
- dataset: datasetName,
2883
+ eval_set: evalSetName,
3178
2884
  conversation_id: conversationId,
3179
2885
  question,
3180
2886
  input: inputMessages,
3181
2887
  input_segments: inputSegments,
3182
2888
  expected_output: outputSegments,
3183
2889
  reference_answer: referenceAnswer,
3184
- guideline_paths: guidelinePaths.map((guidelinePath) => path9.resolve(guidelinePath)),
3185
- guideline_patterns: guidelinePatterns,
3186
- file_paths: allFilePaths,
2890
+ file_paths: userFilePaths,
3187
2891
  criteria: outcome ?? "",
3188
2892
  evaluator: evalCaseEvaluatorKind,
3189
2893
  assertions: evaluators,
@@ -3191,20 +2895,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3191
2895
  metadata,
3192
2896
  targets: caseTargets
3193
2897
  };
3194
- if (verbose) {
3195
- console.log(`
3196
- [Test: ${id}]`);
3197
- if (testCase.guideline_paths.length > 0) {
3198
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
3199
- for (const guidelinePath of testCase.guideline_paths) {
3200
- console.log(` - ${guidelinePath}`);
3201
- }
3202
- } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
3203
- console.log(" No guidelines found (guideline_patterns not configured)");
3204
- } else {
3205
- console.log(" No guidelines found");
3206
- }
3207
- }
3208
2898
  results.push(testCase);
3209
2899
  }
3210
2900
  return { tests: results, parsed: suite };
@@ -3223,7 +2913,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
3223
2913
  if (!isJsonObject(raw)) return void 0;
3224
2914
  const obj = raw;
3225
2915
  if (obj.script !== void 0 && obj.command === void 0) {
3226
- logWarning6("'script' is deprecated. Use 'command' instead.");
2916
+ logWarning5("'script' is deprecated. Use 'command' instead.");
3227
2917
  }
3228
2918
  const commandSource = obj.command ?? obj.script;
3229
2919
  if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
@@ -3231,8 +2921,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
3231
2921
  if (commandArr.length === 0) return void 0;
3232
2922
  const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
3233
2923
  let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
3234
- if (cwd && !path9.isAbsolute(cwd)) {
3235
- cwd = path9.resolve(evalFileDir, cwd);
2924
+ if (cwd && !path7.isAbsolute(cwd)) {
2925
+ cwd = path7.resolve(evalFileDir, cwd);
3236
2926
  }
3237
2927
  const config = { command: commandArr };
3238
2928
  if (timeoutMs !== void 0) {
@@ -3322,10 +3012,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
3322
3012
  }
3323
3013
  async function resolveWorkspaceConfig(raw, evalFileDir) {
3324
3014
  if (typeof raw === "string") {
3325
- const workspaceFilePath = path9.resolve(evalFileDir, raw);
3015
+ const workspaceFilePath = path7.resolve(evalFileDir, raw);
3326
3016
  let content;
3327
3017
  try {
3328
- content = await readFile8(workspaceFilePath, "utf8");
3018
+ content = await readFile6(workspaceFilePath, "utf8");
3329
3019
  } catch {
3330
3020
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
3331
3021
  }
@@ -3335,7 +3025,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
3335
3025
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
3336
3026
  );
3337
3027
  }
3338
- const workspaceFileDir = path9.dirname(workspaceFilePath);
3028
+ const workspaceFileDir = path7.dirname(workspaceFilePath);
3339
3029
  return parseWorkspaceConfig(parsed, workspaceFileDir);
3340
3030
  }
3341
3031
  return parseWorkspaceConfig(raw, evalFileDir);
@@ -3355,8 +3045,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
3355
3045
  throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
3356
3046
  }
3357
3047
  let template = typeof obj.template === "string" ? obj.template : void 0;
3358
- if (template && !path9.isAbsolute(template)) {
3359
- template = path9.resolve(evalFileDir, template);
3048
+ if (template && !path7.isAbsolute(template)) {
3049
+ template = path7.resolve(evalFileDir, template);
3360
3050
  }
3361
3051
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
3362
3052
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
@@ -3406,28 +3096,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
3406
3096
  function asString6(value) {
3407
3097
  return typeof value === "string" ? value : void 0;
3408
3098
  }
3409
- function logWarning6(message, details) {
3099
+ function logWarning5(message, details) {
3410
3100
  if (details && details.length > 0) {
3411
3101
  const detailBlock = details.join("\n");
3412
- console.warn(`${ANSI_YELLOW8}Warning: ${message}
3413
- ${detailBlock}${ANSI_RESET9}`);
3102
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}
3103
+ ${detailBlock}${ANSI_RESET7}`);
3414
3104
  } else {
3415
- console.warn(`${ANSI_YELLOW8}Warning: ${message}${ANSI_RESET9}`);
3105
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
3416
3106
  }
3417
3107
  }
3418
3108
  function logError3(message, details) {
3419
3109
  if (details && details.length > 0) {
3420
3110
  const detailBlock = details.join("\n");
3421
3111
  console.error(`${ANSI_RED3}Error: ${message}
3422
- ${detailBlock}${ANSI_RESET9}`);
3112
+ ${detailBlock}${ANSI_RESET7}`);
3423
3113
  } else {
3424
- console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET9}`);
3114
+ console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET7}`);
3425
3115
  }
3426
3116
  }
3427
3117
 
3428
3118
  // src/evaluation/loaders/eval-yaml-transpiler.ts
3429
3119
  import { readFileSync } from "node:fs";
3430
- import path10 from "node:path";
3120
+ import path8 from "node:path";
3431
3121
  import { parse as parse3 } from "yaml";
3432
3122
  function codeGraderInstruction(graderName, description) {
3433
3123
  const desc = description ? ` This grader: ${description}.` : "";
@@ -3672,7 +3362,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
3672
3362
  function transpileEvalYamlFile(evalYamlPath) {
3673
3363
  const content = readFileSync(evalYamlPath, "utf8");
3674
3364
  const parsed = parse3(content);
3675
- return transpileEvalYaml(parsed, path10.basename(evalYamlPath));
3365
+ return transpileEvalYaml(parsed, path8.basename(evalYamlPath));
3676
3366
  }
3677
3367
  function getOutputFilenames(result) {
3678
3368
  const names = /* @__PURE__ */ new Map();
@@ -3907,10 +3597,10 @@ function buildChatPrompt(request) {
3907
3597
  if (hasSystemMessage) {
3908
3598
  return provided;
3909
3599
  }
3910
- const systemContent2 = resolveSystemContent(request, false);
3600
+ const systemContent2 = resolveSystemContent(request);
3911
3601
  return [{ role: "system", content: systemContent2 }, ...provided];
3912
3602
  }
3913
- const systemContent = resolveSystemContent(request, true);
3603
+ const systemContent = resolveSystemContent(request);
3914
3604
  const userContent = request.question.trim();
3915
3605
  const prompt = [
3916
3606
  { role: "system", content: systemContent },
@@ -3918,18 +3608,13 @@ function buildChatPrompt(request) {
3918
3608
  ];
3919
3609
  return prompt;
3920
3610
  }
3921
- function resolveSystemContent(request, includeGuidelines) {
3611
+ function resolveSystemContent(request) {
3922
3612
  const systemSegments = [];
3923
3613
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
3924
3614
  systemSegments.push(request.systemPrompt.trim());
3925
3615
  } else {
3926
3616
  systemSegments.push(DEFAULT_SYSTEM_PROMPT);
3927
3617
  }
3928
- if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
3929
- systemSegments.push(`[[ ## Guidelines ## ]]
3930
-
3931
- ${request.guidelines.trim()}`);
3932
- }
3933
3618
  return systemSegments.join("\n\n");
3934
3619
  }
3935
3620
  function toModelMessages(chatPrompt) {
@@ -4113,7 +3798,7 @@ import { spawn } from "node:child_process";
4113
3798
  import { randomUUID } from "node:crypto";
4114
3799
  import { createWriteStream } from "node:fs";
4115
3800
  import { mkdir } from "node:fs/promises";
4116
- import path12 from "node:path";
3801
+ import path10 from "node:path";
4117
3802
 
4118
3803
  // src/evaluation/providers/claude-log-tracker.ts
4119
3804
  var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
@@ -4169,17 +3854,11 @@ function subscribeToClaudeLogEntries(listener) {
4169
3854
  }
4170
3855
 
4171
3856
  // src/evaluation/providers/preread.ts
4172
- import path11 from "node:path";
4173
- function buildPromptDocument(request, inputFiles, options) {
3857
+ import path9 from "node:path";
3858
+ function buildPromptDocument(request, inputFiles) {
4174
3859
  const parts = [];
4175
- const guidelineFiles = collectGuidelineFiles(
4176
- inputFiles,
4177
- options?.guidelinePatterns ?? request.guideline_patterns,
4178
- options?.guidelineOverrides
4179
- );
4180
3860
  const inputFilesList = collectInputFiles(inputFiles);
4181
- const nonGuidelineInputFiles = inputFilesList.filter((file) => !guidelineFiles.includes(file));
4182
- const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
3861
+ const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
4183
3862
  if (prereadBlock.length > 0) {
4184
3863
  parts.push("\n", prereadBlock);
4185
3864
  }
@@ -4192,62 +3871,36 @@ function normalizeInputFiles(inputFiles) {
4192
3871
  }
4193
3872
  const deduped = /* @__PURE__ */ new Map();
4194
3873
  for (const inputFile of inputFiles) {
4195
- const absolutePath = path11.resolve(inputFile);
3874
+ const absolutePath = path9.resolve(inputFile);
4196
3875
  if (!deduped.has(absolutePath)) {
4197
3876
  deduped.set(absolutePath, absolutePath);
4198
3877
  }
4199
3878
  }
4200
3879
  return Array.from(deduped.values());
4201
3880
  }
4202
- function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
4203
- if (!inputFiles || inputFiles.length === 0) {
4204
- return [];
4205
- }
4206
- const unique = /* @__PURE__ */ new Map();
4207
- for (const inputFile of inputFiles) {
4208
- const absolutePath = path11.resolve(inputFile);
4209
- if (overrides?.has(absolutePath)) {
4210
- if (!unique.has(absolutePath)) {
4211
- unique.set(absolutePath, absolutePath);
4212
- }
4213
- continue;
4214
- }
4215
- const normalized = absolutePath.split(path11.sep).join("/");
4216
- if (isGuidelineFile(normalized, guidelinePatterns)) {
4217
- if (!unique.has(absolutePath)) {
4218
- unique.set(absolutePath, absolutePath);
4219
- }
4220
- }
4221
- }
4222
- return Array.from(unique.values());
4223
- }
4224
3881
  function collectInputFiles(inputFiles) {
4225
3882
  if (!inputFiles || inputFiles.length === 0) {
4226
3883
  return [];
4227
3884
  }
4228
3885
  const unique = /* @__PURE__ */ new Map();
4229
3886
  for (const inputFile of inputFiles) {
4230
- const absolutePath = path11.resolve(inputFile);
3887
+ const absolutePath = path9.resolve(inputFile);
4231
3888
  if (!unique.has(absolutePath)) {
4232
3889
  unique.set(absolutePath, absolutePath);
4233
3890
  }
4234
3891
  }
4235
3892
  return Array.from(unique.values());
4236
3893
  }
4237
- function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
4238
- if (guidelineFiles.length === 0 && inputFiles.length === 0) {
3894
+ function buildMandatoryPrereadBlock(inputFiles) {
3895
+ if (inputFiles.length === 0) {
4239
3896
  return "";
4240
3897
  }
4241
3898
  const buildList = (files) => files.map((absolutePath) => {
4242
- const fileName = path11.basename(absolutePath);
3899
+ const fileName = path9.basename(absolutePath);
4243
3900
  const fileUri = pathToFileUri(absolutePath);
4244
3901
  return `* [${fileName}](${fileUri})`;
4245
3902
  });
4246
3903
  const sections = [];
4247
- if (guidelineFiles.length > 0) {
4248
- sections.push(`Read all guideline files:
4249
- ${buildList(guidelineFiles).join("\n")}.`);
4250
- }
4251
3904
  if (inputFiles.length > 0) {
4252
3905
  sections.push(`Read all input files:
4253
3906
  ${buildList(inputFiles).join("\n")}.`);
@@ -4259,7 +3912,7 @@ ${buildList(inputFiles).join("\n")}.`);
4259
3912
  return sections.join("\n");
4260
3913
  }
4261
3914
  function pathToFileUri(filePath) {
4262
- const absolutePath = path11.isAbsolute(filePath) ? filePath : path11.resolve(filePath);
3915
+ const absolutePath = path9.isAbsolute(filePath) ? filePath : path9.resolve(filePath);
4263
3916
  const normalizedPath = absolutePath.replace(/\\/g, "/");
4264
3917
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
4265
3918
  return `file:///${normalizedPath}`;
@@ -4406,10 +4059,10 @@ var ClaudeCliProvider = class {
4406
4059
  }
4407
4060
  resolveCwd(cwdOverride) {
4408
4061
  if (cwdOverride) {
4409
- return path12.resolve(cwdOverride);
4062
+ return path10.resolve(cwdOverride);
4410
4063
  }
4411
4064
  if (this.config.cwd) {
4412
- return path12.resolve(this.config.cwd);
4065
+ return path10.resolve(this.config.cwd);
4413
4066
  }
4414
4067
  return void 0;
4415
4068
  }
@@ -4419,9 +4072,9 @@ var ClaudeCliProvider = class {
4419
4072
  return void 0;
4420
4073
  }
4421
4074
  if (this.config.logDir) {
4422
- return path12.resolve(this.config.logDir);
4075
+ return path10.resolve(this.config.logDir);
4423
4076
  }
4424
- return path12.join(process.cwd(), ".agentv", "logs", "claude-cli");
4077
+ return path10.join(process.cwd(), ".agentv", "logs", "claude-cli");
4425
4078
  }
4426
4079
  async createStreamLogger(request) {
4427
4080
  const logDir = this.resolveLogDirectory();
@@ -4435,7 +4088,7 @@ var ClaudeCliProvider = class {
4435
4088
  console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
4436
4089
  return void 0;
4437
4090
  }
4438
- const filePath = path12.join(logDir, buildLogFilename(request, this.targetName));
4091
+ const filePath = path10.join(logDir, buildLogFilename(request, this.targetName));
4439
4092
  try {
4440
4093
  const logger = await ClaudeCliStreamLogger.create({
4441
4094
  filePath,
@@ -4756,7 +4409,7 @@ function tryParseJson(line) {
4756
4409
  import { randomUUID as randomUUID2 } from "node:crypto";
4757
4410
  import { createWriteStream as createWriteStream2 } from "node:fs";
4758
4411
  import { mkdir as mkdir2 } from "node:fs/promises";
4759
- import path13 from "node:path";
4412
+ import path11 from "node:path";
4760
4413
  var claudeSdkModule = null;
4761
4414
  async function loadClaudeSdk() {
4762
4415
  if (!claudeSdkModule) {
@@ -4916,10 +4569,10 @@ var ClaudeSdkProvider = class {
4916
4569
  }
4917
4570
  resolveCwd(cwdOverride) {
4918
4571
  if (cwdOverride) {
4919
- return path13.resolve(cwdOverride);
4572
+ return path11.resolve(cwdOverride);
4920
4573
  }
4921
4574
  if (this.config.cwd) {
4922
- return path13.resolve(this.config.cwd);
4575
+ return path11.resolve(this.config.cwd);
4923
4576
  }
4924
4577
  return void 0;
4925
4578
  }
@@ -4929,9 +4582,9 @@ var ClaudeSdkProvider = class {
4929
4582
  return void 0;
4930
4583
  }
4931
4584
  if (this.config.logDir) {
4932
- return path13.resolve(this.config.logDir);
4585
+ return path11.resolve(this.config.logDir);
4933
4586
  }
4934
- return path13.join(process.cwd(), ".agentv", "logs", "claude");
4587
+ return path11.join(process.cwd(), ".agentv", "logs", "claude");
4935
4588
  }
4936
4589
  async createStreamLogger(request) {
4937
4590
  const logDir = this.resolveLogDirectory();
@@ -4945,7 +4598,7 @@ var ClaudeSdkProvider = class {
4945
4598
  console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
4946
4599
  return void 0;
4947
4600
  }
4948
- const filePath = path13.join(logDir, buildLogFilename2(request, this.targetName));
4601
+ const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
4949
4602
  try {
4950
4603
  const logger = await ClaudeStreamLogger.create({
4951
4604
  filePath,
@@ -5152,7 +4805,7 @@ function formatElapsed2(startedAt) {
5152
4805
  import { exec as execWithCallback } from "node:child_process";
5153
4806
  import fs from "node:fs/promises";
5154
4807
  import os from "node:os";
5155
- import path14 from "node:path";
4808
+ import path12 from "node:path";
5156
4809
  import { promisify } from "node:util";
5157
4810
  import { z as z2 } from "zod";
5158
4811
  var ToolCallSchema = z2.object({
@@ -5361,7 +5014,6 @@ var CliProvider = class {
5361
5014
  const { values: templateValues, promptFilePath } = await buildTemplateValues(
5362
5015
  {
5363
5016
  question: "",
5364
- guidelines: "",
5365
5017
  inputFiles: batchInputFiles,
5366
5018
  evalCaseId: "batch",
5367
5019
  attempt: 0
@@ -5594,7 +5246,6 @@ var CliProvider = class {
5594
5246
  const { values: templateValues, promptFilePath } = await buildTemplateValues(
5595
5247
  {
5596
5248
  question: "",
5597
- guidelines: "",
5598
5249
  inputFiles: [],
5599
5250
  evalCaseId: "healthcheck",
5600
5251
  attempt: 0
@@ -5635,7 +5286,6 @@ async function buildTemplateValues(request, config, outputFilePath) {
5635
5286
  values: {
5636
5287
  PROMPT: shellEscape(request.question ?? ""),
5637
5288
  PROMPT_FILE: shellEscape(promptFilePath),
5638
- GUIDELINES: shellEscape(request.guidelines ?? ""),
5639
5289
  EVAL_ID: shellEscape(request.evalCaseId ?? ""),
5640
5290
  ATTEMPT: shellEscape(String(request.attempt ?? 0)),
5641
5291
  FILES: formatFileList(inputFiles, config.filesFormat),
@@ -5657,7 +5307,7 @@ function normalizeInputFiles2(inputFiles) {
5657
5307
  }
5658
5308
  const unique = /* @__PURE__ */ new Map();
5659
5309
  for (const inputFile of inputFiles) {
5660
- const absolutePath = path14.resolve(inputFile);
5310
+ const absolutePath = path12.resolve(inputFile);
5661
5311
  if (!unique.has(absolutePath)) {
5662
5312
  unique.set(absolutePath, absolutePath);
5663
5313
  }
@@ -5671,7 +5321,7 @@ function formatFileList(files, template) {
5671
5321
  const formatter = template ?? "{path}";
5672
5322
  return files.map((filePath) => {
5673
5323
  const escapedPath = shellEscape(filePath);
5674
- const escapedName = shellEscape(path14.basename(filePath));
5324
+ const escapedName = shellEscape(path12.basename(filePath));
5675
5325
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
5676
5326
  }).join(" ");
5677
5327
  }
@@ -5695,7 +5345,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
5695
5345
  const safeEvalId = evalCaseId || "unknown";
5696
5346
  const timestamp = Date.now();
5697
5347
  const random = Math.random().toString(36).substring(2, 9);
5698
- return path14.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
5348
+ return path12.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
5699
5349
  }
5700
5350
  function formatTimeoutSuffix2(timeoutMs) {
5701
5351
  if (!timeoutMs || timeoutMs <= 0) {
@@ -5709,7 +5359,7 @@ function formatTimeoutSuffix2(timeoutMs) {
5709
5359
  import { randomUUID as randomUUID3 } from "node:crypto";
5710
5360
  import { createWriteStream as createWriteStream3 } from "node:fs";
5711
5361
  import { mkdir as mkdir3 } from "node:fs/promises";
5712
- import path15 from "node:path";
5362
+ import path13 from "node:path";
5713
5363
 
5714
5364
  // src/evaluation/providers/codex-log-tracker.ts
5715
5365
  var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
@@ -5944,10 +5594,10 @@ ${basePrompt}` : basePrompt;
5944
5594
  }
5945
5595
  resolveCwd(cwdOverride) {
5946
5596
  if (cwdOverride) {
5947
- return path15.resolve(cwdOverride);
5597
+ return path13.resolve(cwdOverride);
5948
5598
  }
5949
5599
  if (this.config.cwd) {
5950
- return path15.resolve(this.config.cwd);
5600
+ return path13.resolve(this.config.cwd);
5951
5601
  }
5952
5602
  return void 0;
5953
5603
  }
@@ -5957,9 +5607,9 @@ ${basePrompt}` : basePrompt;
5957
5607
  return void 0;
5958
5608
  }
5959
5609
  if (this.config.logDir) {
5960
- return path15.resolve(this.config.logDir);
5610
+ return path13.resolve(this.config.logDir);
5961
5611
  }
5962
- return path15.join(process.cwd(), ".agentv", "logs", "codex");
5612
+ return path13.join(process.cwd(), ".agentv", "logs", "codex");
5963
5613
  }
5964
5614
  async createStreamLogger(request) {
5965
5615
  const logDir = this.resolveLogDirectory();
@@ -5973,7 +5623,7 @@ ${basePrompt}` : basePrompt;
5973
5623
  console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
5974
5624
  return void 0;
5975
5625
  }
5976
- const filePath = path15.join(logDir, buildLogFilename3(request, this.targetName));
5626
+ const filePath = path13.join(logDir, buildLogFilename3(request, this.targetName));
5977
5627
  try {
5978
5628
  const logger = await CodexSdkStreamLogger.create({
5979
5629
  filePath,
@@ -6117,7 +5767,7 @@ function formatElapsed3(startedAt) {
6117
5767
  // src/evaluation/providers/copilot-cli.ts
6118
5768
  import { randomUUID as randomUUID5 } from "node:crypto";
6119
5769
  import { mkdir as mkdir4 } from "node:fs/promises";
6120
- import path17 from "node:path";
5770
+ import path15 from "node:path";
6121
5771
  import { Readable, Writable } from "node:stream";
6122
5772
  import { spawn as spawn2 } from "node:child_process";
6123
5773
  import * as acp from "@agentclientprotocol/sdk";
@@ -6179,7 +5829,7 @@ function subscribeToCopilotCliLogEntries(listener) {
6179
5829
  import { randomUUID as randomUUID4 } from "node:crypto";
6180
5830
  import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
6181
5831
  import { arch, platform } from "node:os";
6182
- import path16 from "node:path";
5832
+ import path14 from "node:path";
6183
5833
  import { fileURLToPath as fileURLToPath2 } from "node:url";
6184
5834
  function resolvePlatformCliPath() {
6185
5835
  const os3 = platform();
@@ -6203,7 +5853,7 @@ function resolvePlatformCliPath() {
6203
5853
  try {
6204
5854
  const resolved = import.meta.resolve(`${packageName}/package.json`);
6205
5855
  const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
6206
- const binaryPath = path16.join(path16.dirname(packageJsonPath), binaryName);
5856
+ const binaryPath = path14.join(path14.dirname(packageJsonPath), binaryName);
6207
5857
  if (existsSync(binaryPath)) {
6208
5858
  return binaryPath;
6209
5859
  }
@@ -6211,7 +5861,7 @@ function resolvePlatformCliPath() {
6211
5861
  }
6212
5862
  let searchDir = process.cwd();
6213
5863
  for (let i = 0; i < 10; i++) {
6214
- const standardPath = path16.join(
5864
+ const standardPath = path14.join(
6215
5865
  searchDir,
6216
5866
  "node_modules",
6217
5867
  ...packageName.split("/"),
@@ -6220,13 +5870,13 @@ function resolvePlatformCliPath() {
6220
5870
  if (existsSync(standardPath)) {
6221
5871
  return standardPath;
6222
5872
  }
6223
- const bunDir = path16.join(searchDir, "node_modules", ".bun");
5873
+ const bunDir = path14.join(searchDir, "node_modules", ".bun");
6224
5874
  const prefix = `@github+copilot-${osPart}-${archPart}@`;
6225
5875
  try {
6226
5876
  const entries = readdirSync(bunDir);
6227
5877
  for (const entry of entries) {
6228
5878
  if (entry.startsWith(prefix)) {
6229
- const candidate = path16.join(
5879
+ const candidate = path14.join(
6230
5880
  bunDir,
6231
5881
  entry,
6232
5882
  "node_modules",
@@ -6241,7 +5891,7 @@ function resolvePlatformCliPath() {
6241
5891
  }
6242
5892
  } catch {
6243
5893
  }
6244
- const parent = path16.dirname(searchDir);
5894
+ const parent = path14.dirname(searchDir);
6245
5895
  if (parent === searchDir) break;
6246
5896
  searchDir = parent;
6247
5897
  }
@@ -6579,10 +6229,10 @@ var CopilotCliProvider = class {
6579
6229
  }
6580
6230
  resolveCwd(cwdOverride) {
6581
6231
  if (cwdOverride) {
6582
- return path17.resolve(cwdOverride);
6232
+ return path15.resolve(cwdOverride);
6583
6233
  }
6584
6234
  if (this.config.cwd) {
6585
- return path17.resolve(this.config.cwd);
6235
+ return path15.resolve(this.config.cwd);
6586
6236
  }
6587
6237
  return void 0;
6588
6238
  }
@@ -6601,9 +6251,9 @@ var CopilotCliProvider = class {
6601
6251
  return void 0;
6602
6252
  }
6603
6253
  if (this.config.logDir) {
6604
- return path17.resolve(this.config.logDir);
6254
+ return path15.resolve(this.config.logDir);
6605
6255
  }
6606
- return path17.join(process.cwd(), ".agentv", "logs", "copilot-cli");
6256
+ return path15.join(process.cwd(), ".agentv", "logs", "copilot-cli");
6607
6257
  }
6608
6258
  async createStreamLogger(request) {
6609
6259
  const logDir = this.resolveLogDirectory();
@@ -6617,7 +6267,7 @@ var CopilotCliProvider = class {
6617
6267
  console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
6618
6268
  return void 0;
6619
6269
  }
6620
- const filePath = path17.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
6270
+ const filePath = path15.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
6621
6271
  try {
6622
6272
  const logger = await CopilotStreamLogger.create(
6623
6273
  {
@@ -6712,7 +6362,7 @@ function summarizeAcpEvent(eventType, data) {
6712
6362
  // src/evaluation/providers/copilot-sdk.ts
6713
6363
  import { randomUUID as randomUUID6 } from "node:crypto";
6714
6364
  import { mkdir as mkdir5 } from "node:fs/promises";
6715
- import path18 from "node:path";
6365
+ import path16 from "node:path";
6716
6366
 
6717
6367
  // src/evaluation/providers/copilot-sdk-log-tracker.ts
6718
6368
  var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
@@ -6991,10 +6641,10 @@ var CopilotSdkProvider = class {
6991
6641
  }
6992
6642
  resolveCwd(cwdOverride) {
6993
6643
  if (cwdOverride) {
6994
- return path18.resolve(cwdOverride);
6644
+ return path16.resolve(cwdOverride);
6995
6645
  }
6996
6646
  if (this.config.cwd) {
6997
- return path18.resolve(this.config.cwd);
6647
+ return path16.resolve(this.config.cwd);
6998
6648
  }
6999
6649
  return void 0;
7000
6650
  }
@@ -7003,9 +6653,9 @@ var CopilotSdkProvider = class {
7003
6653
  return void 0;
7004
6654
  }
7005
6655
  if (this.config.logDir) {
7006
- return path18.resolve(this.config.logDir);
6656
+ return path16.resolve(this.config.logDir);
7007
6657
  }
7008
- return path18.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
6658
+ return path16.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
7009
6659
  }
7010
6660
  async createStreamLogger(request) {
7011
6661
  const logDir = this.resolveLogDirectory();
@@ -7019,7 +6669,7 @@ var CopilotSdkProvider = class {
7019
6669
  console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
7020
6670
  return void 0;
7021
6671
  }
7022
- const filePath = path18.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
6672
+ const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
7023
6673
  try {
7024
6674
  const logger = await CopilotStreamLogger.create(
7025
6675
  {
@@ -7096,8 +6746,7 @@ var MockProvider = class {
7096
6746
  return {
7097
6747
  output: [{ role: "assistant", content: this.cannedResponse }],
7098
6748
  raw: {
7099
- question: request.question,
7100
- guidelines: request.guidelines
6749
+ question: request.question
7101
6750
  }
7102
6751
  };
7103
6752
  }
@@ -7375,7 +7024,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
7375
7024
  import { createWriteStream as createWriteStream5 } from "node:fs";
7376
7025
  import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
7377
7026
  import { tmpdir } from "node:os";
7378
- import path19 from "node:path";
7027
+ import path17 from "node:path";
7379
7028
 
7380
7029
  // src/evaluation/providers/pi-log-tracker.ts
7381
7030
  var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
@@ -7456,7 +7105,7 @@ var PiCodingAgentProvider = class {
7456
7105
  const workspaceRoot = await this.createWorkspace();
7457
7106
  const logger = await this.createStreamLogger(request).catch(() => void 0);
7458
7107
  try {
7459
- const promptFile = path19.join(workspaceRoot, PROMPT_FILENAME);
7108
+ const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
7460
7109
  await writeFile(promptFile, request.question, "utf8");
7461
7110
  const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
7462
7111
  const cwd = this.resolveCwd(workspaceRoot, request.cwd);
@@ -7518,12 +7167,12 @@ var PiCodingAgentProvider = class {
7518
7167
  }
7519
7168
  resolveCwd(workspaceRoot, cwdOverride) {
7520
7169
  if (cwdOverride) {
7521
- return path19.resolve(cwdOverride);
7170
+ return path17.resolve(cwdOverride);
7522
7171
  }
7523
7172
  if (!this.config.cwd) {
7524
7173
  return workspaceRoot;
7525
7174
  }
7526
- return path19.resolve(this.config.cwd);
7175
+ return path17.resolve(this.config.cwd);
7527
7176
  }
7528
7177
  buildPiArgs(prompt, inputFiles, _captureFileChanges) {
7529
7178
  const args = [];
@@ -7612,7 +7261,7 @@ ${prompt}` : prompt;
7612
7261
  return env;
7613
7262
  }
7614
7263
  async createWorkspace() {
7615
- return await mkdtemp(path19.join(tmpdir(), WORKSPACE_PREFIX));
7264
+ return await mkdtemp(path17.join(tmpdir(), WORKSPACE_PREFIX));
7616
7265
  }
7617
7266
  async cleanupWorkspace(workspaceRoot) {
7618
7267
  try {
@@ -7622,9 +7271,9 @@ ${prompt}` : prompt;
7622
7271
  }
7623
7272
  resolveLogDirectory() {
7624
7273
  if (this.config.logDir) {
7625
- return path19.resolve(this.config.logDir);
7274
+ return path17.resolve(this.config.logDir);
7626
7275
  }
7627
- return path19.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
7276
+ return path17.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
7628
7277
  }
7629
7278
  async createStreamLogger(request) {
7630
7279
  const logDir = this.resolveLogDirectory();
@@ -7638,7 +7287,7 @@ ${prompt}` : prompt;
7638
7287
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
7639
7288
  return void 0;
7640
7289
  }
7641
- const filePath = path19.join(logDir, buildLogFilename5(request, this.targetName));
7290
+ const filePath = path17.join(logDir, buildLogFilename5(request, this.targetName));
7642
7291
  try {
7643
7292
  const logger = await PiStreamLogger.create({
7644
7293
  filePath,
@@ -8139,17 +7788,17 @@ var ProviderRegistry = class {
8139
7788
  // src/evaluation/providers/vscode-provider.ts
8140
7789
  import { exec as exec2 } from "node:child_process";
8141
7790
  import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
8142
- import path31 from "node:path";
7791
+ import path29 from "node:path";
8143
7792
  import { promisify as promisify3 } from "node:util";
8144
7793
 
8145
7794
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
8146
7795
  import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
8147
- import path29 from "node:path";
7796
+ import path27 from "node:path";
8148
7797
 
8149
7798
  // src/evaluation/providers/vscode/utils/fs.ts
8150
7799
  import { constants as constants2 } from "node:fs";
8151
7800
  import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
8152
- import path20 from "node:path";
7801
+ import path18 from "node:path";
8153
7802
  async function pathExists(target) {
8154
7803
  try {
8155
7804
  await access2(target, constants2.F_OK);
@@ -8165,7 +7814,7 @@ async function readDirEntries(target) {
8165
7814
  const entries = await readdir(target, { withFileTypes: true });
8166
7815
  return entries.map((entry) => ({
8167
7816
  name: entry.name,
8168
- absolutePath: path20.join(target, entry.name),
7817
+ absolutePath: path18.join(target, entry.name),
8169
7818
  isDirectory: entry.isDirectory()
8170
7819
  }));
8171
7820
  }
@@ -8180,9 +7829,9 @@ async function removeIfExists(target) {
8180
7829
  }
8181
7830
 
8182
7831
  // src/evaluation/providers/vscode/utils/path.ts
8183
- import path21 from "node:path";
7832
+ import path19 from "node:path";
8184
7833
  function pathToFileUri2(filePath) {
8185
- const absolutePath = path21.isAbsolute(filePath) ? filePath : path21.resolve(filePath);
7834
+ const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
8186
7835
  const normalizedPath = absolutePath.replace(/\\/g, "/");
8187
7836
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
8188
7837
  return `file:///${normalizedPath}`;
@@ -8191,7 +7840,7 @@ function pathToFileUri2(filePath) {
8191
7840
  }
8192
7841
 
8193
7842
  // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
8194
- import path22 from "node:path";
7843
+ import path20 from "node:path";
8195
7844
 
8196
7845
  // src/evaluation/providers/vscode/utils/template.ts
8197
7846
  function renderTemplate2(content, variables) {
@@ -8283,8 +7932,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
8283
7932
  });
8284
7933
  }
8285
7934
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
8286
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path22.basename(file)}`).join("\n");
8287
- const responseList = responseFiles.map((file) => `"${path22.basename(file)}"`).join(", ");
7935
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path20.basename(file)}`).join("\n");
7936
+ const responseList = responseFiles.map((file) => `"${path20.basename(file)}"`).join(", ");
8288
7937
  return renderTemplate2(templateContent, {
8289
7938
  requestFiles: requestLines,
8290
7939
  responseList
@@ -8292,8 +7941,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
8292
7941
  }
8293
7942
 
8294
7943
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
8295
- import { readFile as readFile9 } from "node:fs/promises";
8296
- import path23 from "node:path";
7944
+ import { readFile as readFile7 } from "node:fs/promises";
7945
+ import path21 from "node:path";
8297
7946
 
8298
7947
  // src/evaluation/providers/vscode/utils/time.ts
8299
7948
  function sleep2(ms) {
@@ -8331,7 +7980,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
8331
7980
  const maxAttempts = 10;
8332
7981
  while (attempts < maxAttempts) {
8333
7982
  try {
8334
- const content = await readFile9(responseFileFinal, { encoding: "utf8" });
7983
+ const content = await readFile7(responseFileFinal, { encoding: "utf8" });
8335
7984
  if (!silent) {
8336
7985
  process.stdout.write(`${content}
8337
7986
  `);
@@ -8352,7 +8001,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
8352
8001
  }
8353
8002
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
8354
8003
  if (!silent) {
8355
- const fileList = responseFilesFinal.map((file) => path23.basename(file)).join(", ");
8004
+ const fileList = responseFilesFinal.map((file) => path21.basename(file)).join(", ");
8356
8005
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
8357
8006
  }
8358
8007
  const deadline = Date.now() + timeoutMs;
@@ -8361,7 +8010,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8361
8010
  while (pending.size > 0) {
8362
8011
  if (Date.now() >= deadline) {
8363
8012
  if (!silent) {
8364
- const remaining = [...pending].map((f) => path23.basename(f)).join(", ");
8013
+ const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
8365
8014
  console.error(
8366
8015
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
8367
8016
  );
@@ -8388,7 +8037,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8388
8037
  const maxAttempts = 10;
8389
8038
  while (attempts < maxAttempts) {
8390
8039
  try {
8391
- const content = await readFile9(file, { encoding: "utf8" });
8040
+ const content = await readFile7(file, { encoding: "utf8" });
8392
8041
  if (!silent) {
8393
8042
  process.stdout.write(`${content}
8394
8043
  `);
@@ -8412,15 +8061,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8412
8061
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
8413
8062
  import { exec, spawn as spawn4 } from "node:child_process";
8414
8063
  import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
8415
- import path26 from "node:path";
8064
+ import path24 from "node:path";
8416
8065
  import { promisify as promisify2 } from "node:util";
8417
8066
 
8418
8067
  // src/evaluation/providers/vscode/dispatch/constants.ts
8419
- import path25 from "node:path";
8068
+ import path23 from "node:path";
8420
8069
 
8421
8070
  // src/paths.ts
8422
8071
  import os2 from "node:os";
8423
- import path24 from "node:path";
8072
+ import path22 from "node:path";
8424
8073
  var logged = false;
8425
8074
  function getAgentvHome() {
8426
8075
  const envHome = process.env.AGENTV_HOME;
@@ -8431,19 +8080,19 @@ function getAgentvHome() {
8431
8080
  }
8432
8081
  return envHome;
8433
8082
  }
8434
- return path24.join(os2.homedir(), ".agentv");
8083
+ return path22.join(os2.homedir(), ".agentv");
8435
8084
  }
8436
8085
  function getWorkspacesRoot() {
8437
- return path24.join(getAgentvHome(), "workspaces");
8086
+ return path22.join(getAgentvHome(), "workspaces");
8438
8087
  }
8439
8088
  function getSubagentsRoot() {
8440
- return path24.join(getAgentvHome(), "subagents");
8089
+ return path22.join(getAgentvHome(), "subagents");
8441
8090
  }
8442
8091
  function getTraceStateRoot() {
8443
- return path24.join(getAgentvHome(), "trace-state");
8092
+ return path22.join(getAgentvHome(), "trace-state");
8444
8093
  }
8445
8094
  function getWorkspacePoolRoot() {
8446
- return path24.join(getAgentvHome(), "workspace-pool");
8095
+ return path22.join(getAgentvHome(), "workspace-pool");
8447
8096
  }
8448
8097
 
8449
8098
  // src/evaluation/providers/vscode/dispatch/constants.ts
@@ -8451,7 +8100,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
8451
8100
  var DEFAULT_ALIVE_FILENAME = ".alive";
8452
8101
  function getDefaultSubagentRoot(vscodeCmd = "code") {
8453
8102
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
8454
- return path25.join(getSubagentsRoot(), folder);
8103
+ return path23.join(getSubagentsRoot(), folder);
8455
8104
  }
8456
8105
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
8457
8106
 
@@ -8518,11 +8167,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8518
8167
  await raceSpawnError(child);
8519
8168
  return true;
8520
8169
  }
8521
- const aliveFile = path26.join(subagentDir, DEFAULT_ALIVE_FILENAME);
8170
+ const aliveFile = path24.join(subagentDir, DEFAULT_ALIVE_FILENAME);
8522
8171
  await removeIfExists(aliveFile);
8523
- const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
8172
+ const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
8524
8173
  await mkdir8(githubAgentsDir, { recursive: true });
8525
- const wakeupDst = path26.join(githubAgentsDir, "wakeup.md");
8174
+ const wakeupDst = path24.join(githubAgentsDir, "wakeup.md");
8526
8175
  await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
8527
8176
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
8528
8177
  label: "open-workspace"
@@ -8535,7 +8184,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8535
8184
  "chat",
8536
8185
  "-m",
8537
8186
  wakeupChatId,
8538
- `create a file named .alive in the ${path26.basename(subagentDir)} folder`
8187
+ `create a file named .alive in the ${path24.basename(subagentDir)} folder`
8539
8188
  ];
8540
8189
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
8541
8190
  await raceSpawnError(wakeupChild);
@@ -8550,10 +8199,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8550
8199
  return true;
8551
8200
  }
8552
8201
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
8553
- const workspacePath = path26.join(subagentDir, `${path26.basename(subagentDir)}.code-workspace`);
8554
- const messagesDir = path26.join(subagentDir, "messages");
8202
+ const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
8203
+ const messagesDir = path24.join(subagentDir, "messages");
8555
8204
  await mkdir8(messagesDir, { recursive: true });
8556
- const reqFile = path26.join(messagesDir, `${timestamp}_req.md`);
8205
+ const reqFile = path24.join(messagesDir, `${timestamp}_req.md`);
8557
8206
  await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
8558
8207
  const reqUri = pathToFileUri2(reqFile);
8559
8208
  const chatArgs = ["-r", "chat", "-m", chatId];
@@ -8561,16 +8210,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
8561
8210
  chatArgs.push("-a", attachment);
8562
8211
  }
8563
8212
  chatArgs.push("-a", reqFile);
8564
- chatArgs.push(`Follow instructions in [${path26.basename(reqFile)}](${reqUri})`);
8213
+ chatArgs.push(`Follow instructions in [${path24.basename(reqFile)}](${reqUri})`);
8565
8214
  const workspaceReady = await ensureWorkspaceFocused(
8566
8215
  workspacePath,
8567
- path26.basename(subagentDir),
8216
+ path24.basename(subagentDir),
8568
8217
  subagentDir,
8569
8218
  vscodeCmd
8570
8219
  );
8571
8220
  if (!workspaceReady) {
8572
8221
  throw new Error(
8573
- `VS Code workspace '${path26.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8222
+ `VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8574
8223
  );
8575
8224
  }
8576
8225
  await sleep2(500);
@@ -8578,8 +8227,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
8578
8227
  await raceSpawnError(child);
8579
8228
  }
8580
8229
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
8581
- const workspacePath = path26.join(subagentDir, `${path26.basename(subagentDir)}.code-workspace`);
8582
- const messagesDir = path26.join(subagentDir, "messages");
8230
+ const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
8231
+ const messagesDir = path24.join(subagentDir, "messages");
8583
8232
  await mkdir8(messagesDir, { recursive: true });
8584
8233
  const chatArgs = ["-r", "chat", "-m", chatId];
8585
8234
  for (const attachment of attachmentPaths) {
@@ -8588,13 +8237,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
8588
8237
  chatArgs.push(chatInstruction);
8589
8238
  const workspaceReady = await ensureWorkspaceFocused(
8590
8239
  workspacePath,
8591
- path26.basename(subagentDir),
8240
+ path24.basename(subagentDir),
8592
8241
  subagentDir,
8593
8242
  vscodeCmd
8594
8243
  );
8595
8244
  if (!workspaceReady) {
8596
8245
  throw new Error(
8597
- `VS Code workspace '${path26.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8246
+ `VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8598
8247
  );
8599
8248
  }
8600
8249
  await sleep2(500);
@@ -8603,11 +8252,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
8603
8252
  }
8604
8253
 
8605
8254
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
8606
- import { copyFile, mkdir as mkdir9, readFile as readFile10, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
8607
- import path28 from "node:path";
8255
+ import { copyFile, mkdir as mkdir9, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
8256
+ import path26 from "node:path";
8608
8257
 
8609
8258
  // src/evaluation/providers/vscode/utils/workspace.ts
8610
- import path27 from "node:path";
8259
+ import path25 from "node:path";
8611
8260
  import JSON5 from "json5";
8612
8261
  function transformWorkspacePaths(workspaceContent, templateDir) {
8613
8262
  let workspace;
@@ -8624,10 +8273,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
8624
8273
  }
8625
8274
  const transformedFolders = workspace.folders.map((folder) => {
8626
8275
  const folderPath = folder.path;
8627
- if (path27.isAbsolute(folderPath)) {
8276
+ if (path25.isAbsolute(folderPath)) {
8628
8277
  return folder;
8629
8278
  }
8630
- const absolutePath = path27.resolve(templateDir, folderPath);
8279
+ const absolutePath = path25.resolve(templateDir, folderPath);
8631
8280
  return {
8632
8281
  ...folder,
8633
8282
  path: absolutePath
@@ -8649,19 +8298,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
8649
8298
  if (locationMap && typeof locationMap === "object") {
8650
8299
  const transformedMap = {};
8651
8300
  for (const [locationPath, value] of Object.entries(locationMap)) {
8652
- const isAbsolute = path27.isAbsolute(locationPath);
8301
+ const isAbsolute = path25.isAbsolute(locationPath);
8653
8302
  if (isAbsolute) {
8654
8303
  transformedMap[locationPath] = value;
8655
8304
  } else {
8656
8305
  const firstGlobIndex = locationPath.search(/[*]/);
8657
8306
  if (firstGlobIndex === -1) {
8658
- const resolvedPath = path27.resolve(templateDir, locationPath).replace(/\\/g, "/");
8307
+ const resolvedPath = path25.resolve(templateDir, locationPath).replace(/\\/g, "/");
8659
8308
  transformedMap[resolvedPath] = value;
8660
8309
  } else {
8661
8310
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
8662
8311
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
8663
8312
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
8664
- const resolvedPath = (path27.resolve(templateDir, basePath) + patternPath).replace(
8313
+ const resolvedPath = (path25.resolve(templateDir, basePath) + patternPath).replace(
8665
8314
  /\\/g,
8666
8315
  "/"
8667
8316
  );
@@ -8702,7 +8351,7 @@ async function findUnlockedSubagent(subagentRoot) {
8702
8351
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
8703
8352
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
8704
8353
  for (const subagent of subagents) {
8705
- const lockFile = path28.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
8354
+ const lockFile = path26.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
8706
8355
  if (!await pathExists(lockFile)) {
8707
8356
  return subagent.absolutePath;
8708
8357
  }
@@ -8712,7 +8361,7 @@ async function findUnlockedSubagent(subagentRoot) {
8712
8361
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8713
8362
  let workspaceContent;
8714
8363
  if (workspaceTemplate) {
8715
- const workspaceSrc = path28.resolve(workspaceTemplate);
8364
+ const workspaceSrc = path26.resolve(workspaceTemplate);
8716
8365
  if (!await pathExists(workspaceSrc)) {
8717
8366
  throw new Error(`workspace template not found: ${workspaceSrc}`);
8718
8367
  }
@@ -8720,18 +8369,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8720
8369
  if (!stats.isFile()) {
8721
8370
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
8722
8371
  }
8723
- const templateText = await readFile10(workspaceSrc, "utf8");
8372
+ const templateText = await readFile8(workspaceSrc, "utf8");
8724
8373
  workspaceContent = JSON.parse(templateText);
8725
8374
  } else {
8726
8375
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
8727
8376
  }
8728
- const workspaceName = `${path28.basename(subagentDir)}.code-workspace`;
8729
- const workspaceDst = path28.join(subagentDir, workspaceName);
8730
- const templateDir = workspaceTemplate ? path28.dirname(path28.resolve(workspaceTemplate)) : subagentDir;
8377
+ const workspaceName = `${path26.basename(subagentDir)}.code-workspace`;
8378
+ const workspaceDst = path26.join(subagentDir, workspaceName);
8379
+ const templateDir = workspaceTemplate ? path26.dirname(path26.resolve(workspaceTemplate)) : subagentDir;
8731
8380
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
8732
8381
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
8733
8382
  if (cwd) {
8734
- const absCwd = path28.resolve(cwd);
8383
+ const absCwd = path26.resolve(cwd);
8735
8384
  const parsed = JSON.parse(transformedContent);
8736
8385
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
8737
8386
  if (!alreadyPresent) {
@@ -8740,35 +8389,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8740
8389
  }
8741
8390
  }
8742
8391
  await writeFile3(workspaceDst, transformedContent, "utf8");
8743
- const messagesDir = path28.join(subagentDir, "messages");
8392
+ const messagesDir = path26.join(subagentDir, "messages");
8744
8393
  await mkdir9(messagesDir, { recursive: true });
8745
8394
  return { workspace: workspaceDst, messagesDir };
8746
8395
  }
8747
8396
  async function createSubagentLock(subagentDir) {
8748
- const messagesDir = path28.join(subagentDir, "messages");
8397
+ const messagesDir = path26.join(subagentDir, "messages");
8749
8398
  if (await pathExists(messagesDir)) {
8750
8399
  const files = await readdir2(messagesDir);
8751
8400
  await Promise.all(
8752
8401
  files.map(async (file) => {
8753
- const target = path28.join(messagesDir, file);
8402
+ const target = path26.join(messagesDir, file);
8754
8403
  await removeIfExists(target);
8755
8404
  })
8756
8405
  );
8757
8406
  }
8758
- const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8407
+ const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
8759
8408
  if (await pathExists(githubAgentsDir)) {
8760
8409
  const agentFiles = await readdir2(githubAgentsDir);
8761
8410
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
8762
8411
  await Promise.all(
8763
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path28.join(githubAgentsDir, file)))
8412
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path26.join(githubAgentsDir, file)))
8764
8413
  );
8765
8414
  }
8766
- const lockFile = path28.join(subagentDir, DEFAULT_LOCK_NAME);
8415
+ const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
8767
8416
  await writeFile3(lockFile, "", { encoding: "utf8" });
8768
8417
  return lockFile;
8769
8418
  }
8770
8419
  async function removeSubagentLock(subagentDir) {
8771
- const lockFile = path28.join(subagentDir, DEFAULT_LOCK_NAME);
8420
+ const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
8772
8421
  await removeIfExists(lockFile);
8773
8422
  }
8774
8423
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -8788,9 +8437,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
8788
8437
  return 1;
8789
8438
  }
8790
8439
  if (promptFile) {
8791
- const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8440
+ const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
8792
8441
  await mkdir9(githubAgentsDir, { recursive: true });
8793
- const agentFile = path28.join(githubAgentsDir, `${chatId}.md`);
8442
+ const agentFile = path26.join(githubAgentsDir, `${chatId}.md`);
8794
8443
  try {
8795
8444
  await copyFile(promptFile, agentFile);
8796
8445
  } catch (error) {
@@ -8809,7 +8458,7 @@ async function resolvePromptFile(promptFile) {
8809
8458
  if (!promptFile) {
8810
8459
  return void 0;
8811
8460
  }
8812
- const resolvedPrompt = path29.resolve(promptFile);
8461
+ const resolvedPrompt = path27.resolve(promptFile);
8813
8462
  if (!await pathExists(resolvedPrompt)) {
8814
8463
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
8815
8464
  }
@@ -8825,7 +8474,7 @@ async function resolveAttachments(extraAttachments) {
8825
8474
  }
8826
8475
  const resolved = [];
8827
8476
  for (const attachment of extraAttachments) {
8828
- const resolvedPath = path29.resolve(attachment);
8477
+ const resolvedPath = path27.resolve(attachment);
8829
8478
  if (!await pathExists(resolvedPath)) {
8830
8479
  throw new Error(`Attachment not found: ${resolvedPath}`);
8831
8480
  }
@@ -8867,7 +8516,7 @@ async function dispatchAgentSession(options) {
8867
8516
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
8868
8517
  };
8869
8518
  }
8870
- const subagentName = path29.basename(subagentDir);
8519
+ const subagentName = path27.basename(subagentDir);
8871
8520
  const chatId = Math.random().toString(16).slice(2, 10);
8872
8521
  const preparationResult = await prepareSubagentDirectory(
8873
8522
  subagentDir,
@@ -8895,9 +8544,9 @@ async function dispatchAgentSession(options) {
8895
8544
  };
8896
8545
  }
8897
8546
  const timestamp = generateTimestamp();
8898
- const messagesDir = path29.join(subagentDir, "messages");
8899
- const responseFileTmp = path29.join(messagesDir, `${timestamp}_res.tmp.md`);
8900
- const responseFileFinal = path29.join(messagesDir, `${timestamp}_res.md`);
8547
+ const messagesDir = path27.join(subagentDir, "messages");
8548
+ const responseFileTmp = path27.join(messagesDir, `${timestamp}_res.tmp.md`);
8549
+ const responseFileFinal = path27.join(messagesDir, `${timestamp}_res.md`);
8901
8550
  const requestInstructions = createRequestPrompt(
8902
8551
  userQuery,
8903
8552
  responseFileTmp,
@@ -9002,7 +8651,7 @@ async function dispatchBatchAgent(options) {
9002
8651
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
9003
8652
  };
9004
8653
  }
9005
- subagentName = path29.basename(subagentDir);
8654
+ subagentName = path27.basename(subagentDir);
9006
8655
  const chatId = Math.random().toString(16).slice(2, 10);
9007
8656
  const preparationResult = await prepareSubagentDirectory(
9008
8657
  subagentDir,
@@ -9033,17 +8682,17 @@ async function dispatchBatchAgent(options) {
9033
8682
  };
9034
8683
  }
9035
8684
  const timestamp = generateTimestamp();
9036
- const messagesDir = path29.join(subagentDir, "messages");
8685
+ const messagesDir = path27.join(subagentDir, "messages");
9037
8686
  requestFiles = userQueries.map(
9038
- (_, index) => path29.join(messagesDir, `${timestamp}_${index}_req.md`)
8687
+ (_, index) => path27.join(messagesDir, `${timestamp}_${index}_req.md`)
9039
8688
  );
9040
8689
  const responseTmpFiles = userQueries.map(
9041
- (_, index) => path29.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
8690
+ (_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
9042
8691
  );
9043
8692
  responseFilesFinal = userQueries.map(
9044
- (_, index) => path29.join(messagesDir, `${timestamp}_${index}_res.md`)
8693
+ (_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.md`)
9045
8694
  );
9046
- const orchestratorFile = path29.join(messagesDir, `${timestamp}_orchestrator.md`);
8695
+ const orchestratorFile = path27.join(messagesDir, `${timestamp}_orchestrator.md`);
9047
8696
  if (!dryRun) {
9048
8697
  await Promise.all(
9049
8698
  userQueries.map((query, index) => {
@@ -9129,7 +8778,7 @@ async function dispatchBatchAgent(options) {
9129
8778
 
9130
8779
  // src/evaluation/providers/vscode/dispatch/provision.ts
9131
8780
  import { writeFile as writeFile5 } from "node:fs/promises";
9132
- import path30 from "node:path";
8781
+ import path28 from "node:path";
9133
8782
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
9134
8783
  folders: [
9135
8784
  {
@@ -9160,7 +8809,7 @@ async function provisionSubagents(options) {
9160
8809
  if (!Number.isInteger(subagents) || subagents < 1) {
9161
8810
  throw new Error("subagents must be a positive integer");
9162
8811
  }
9163
- const targetPath = path30.resolve(targetRoot);
8812
+ const targetPath = path28.resolve(targetRoot);
9164
8813
  if (!dryRun) {
9165
8814
  await ensureDir(targetPath);
9166
8815
  }
@@ -9180,7 +8829,7 @@ async function provisionSubagents(options) {
9180
8829
  continue;
9181
8830
  }
9182
8831
  highestNumber = Math.max(highestNumber, parsed);
9183
- const lockFile = path30.join(entry.absolutePath, lockName);
8832
+ const lockFile = path28.join(entry.absolutePath, lockName);
9184
8833
  const locked = await pathExists(lockFile);
9185
8834
  if (locked) {
9186
8835
  lockedSubagents.add(entry.absolutePath);
@@ -9197,10 +8846,10 @@ async function provisionSubagents(options) {
9197
8846
  break;
9198
8847
  }
9199
8848
  const subagentDir = subagent.absolutePath;
9200
- const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
9201
- const lockFile = path30.join(subagentDir, lockName);
9202
- const workspaceDst = path30.join(subagentDir, `${path30.basename(subagentDir)}.code-workspace`);
9203
- const wakeupDst = path30.join(githubAgentsDir, "wakeup.md");
8849
+ const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8850
+ const lockFile = path28.join(subagentDir, lockName);
8851
+ const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
8852
+ const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
9204
8853
  const isLocked = await pathExists(lockFile);
9205
8854
  if (isLocked && !force) {
9206
8855
  continue;
@@ -9238,10 +8887,10 @@ async function provisionSubagents(options) {
9238
8887
  let nextIndex = highestNumber;
9239
8888
  while (subagentsProvisioned < subagents) {
9240
8889
  nextIndex += 1;
9241
- const subagentDir = path30.join(targetPath, `subagent-${nextIndex}`);
9242
- const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
9243
- const workspaceDst = path30.join(subagentDir, `${path30.basename(subagentDir)}.code-workspace`);
9244
- const wakeupDst = path30.join(githubAgentsDir, "wakeup.md");
8890
+ const subagentDir = path28.join(targetPath, `subagent-${nextIndex}`);
8891
+ const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8892
+ const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
8893
+ const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
9245
8894
  if (!dryRun) {
9246
8895
  await ensureDir(subagentDir);
9247
8896
  await ensureDir(githubAgentsDir);
@@ -9308,7 +8957,7 @@ var VSCodeProvider = class {
9308
8957
  }
9309
8958
  await this.ensureEnvironmentReady();
9310
8959
  const inputFiles = normalizeAttachments(request.inputFiles);
9311
- const promptContent = buildPromptDocument2(request, inputFiles, request.guideline_patterns);
8960
+ const promptContent = buildPromptDocument2(request, inputFiles);
9312
8961
  const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
9313
8962
  const startTime = Date.now();
9314
8963
  const session = await dispatchAgentSession({
@@ -9362,7 +9011,7 @@ var VSCodeProvider = class {
9362
9011
  normalizedRequests.map(({ inputFiles }) => inputFiles)
9363
9012
  );
9364
9013
  const userQueries = normalizedRequests.map(
9365
- ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles, request.guideline_patterns)
9014
+ ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
9366
9015
  );
9367
9016
  const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
9368
9017
  this.config.workspaceTemplate
@@ -9431,7 +9080,7 @@ var VSCodeProvider = class {
9431
9080
  async function locateVSCodeExecutable(candidate) {
9432
9081
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
9433
9082
  if (includesPathSeparator) {
9434
- const resolved = path31.isAbsolute(candidate) ? candidate : path31.resolve(candidate);
9083
+ const resolved = path29.isAbsolute(candidate) ? candidate : path29.resolve(candidate);
9435
9084
  try {
9436
9085
  await access3(resolved, constants3.F_OK);
9437
9086
  return resolved;
@@ -9460,41 +9109,35 @@ async function resolveWorkspaceTemplateFile(template) {
9460
9109
  return void 0;
9461
9110
  }
9462
9111
  try {
9463
- const stats = await stat4(path31.resolve(template));
9112
+ const stats = await stat4(path29.resolve(template));
9464
9113
  return stats.isFile() ? template : void 0;
9465
9114
  } catch {
9466
9115
  return template;
9467
9116
  }
9468
9117
  }
9469
- function buildPromptDocument2(request, attachments, guidelinePatterns) {
9118
+ function buildPromptDocument2(request, attachments) {
9470
9119
  const parts = [];
9471
9120
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
9472
9121
  parts.push(request.systemPrompt.trim());
9473
9122
  }
9474
- const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
9475
9123
  const attachmentFiles = collectAttachmentFiles(attachments);
9476
- const nonGuidelineAttachments = attachmentFiles.filter((file) => !guidelineFiles.includes(file));
9477
- const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
9124
+ const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
9478
9125
  if (prereadBlock.length > 0) {
9479
9126
  parts.push("\n", prereadBlock);
9480
9127
  }
9481
9128
  parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
9482
9129
  return parts.join("\n").trim();
9483
9130
  }
9484
- function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
9485
- if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
9131
+ function buildMandatoryPrereadBlock2(attachmentFiles) {
9132
+ if (attachmentFiles.length === 0) {
9486
9133
  return "";
9487
9134
  }
9488
9135
  const buildList = (files) => files.map((absolutePath) => {
9489
- const fileName = path31.basename(absolutePath);
9136
+ const fileName = path29.basename(absolutePath);
9490
9137
  const fileUri = pathToFileUri3(absolutePath);
9491
9138
  return `* [${fileName}](${fileUri})`;
9492
9139
  });
9493
9140
  const sections = [];
9494
- if (guidelineFiles.length > 0) {
9495
- sections.push(`Read all guideline files:
9496
- ${buildList(guidelineFiles).join("\n")}.`);
9497
- }
9498
9141
  if (attachmentFiles.length > 0) {
9499
9142
  sections.push(`Read all attachment files:
9500
9143
  ${buildList(attachmentFiles).join("\n")}.`);
@@ -9505,29 +9148,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
9505
9148
  );
9506
9149
  return sections.join("\n");
9507
9150
  }
9508
- function collectGuidelineFiles2(attachments, guidelinePatterns) {
9509
- if (!attachments || attachments.length === 0) {
9510
- return [];
9511
- }
9512
- const unique = /* @__PURE__ */ new Map();
9513
- for (const attachment of attachments) {
9514
- const absolutePath = path31.resolve(attachment);
9515
- const normalized = absolutePath.split(path31.sep).join("/");
9516
- if (isGuidelineFile(normalized, guidelinePatterns)) {
9517
- if (!unique.has(absolutePath)) {
9518
- unique.set(absolutePath, absolutePath);
9519
- }
9520
- }
9521
- }
9522
- return Array.from(unique.values());
9523
- }
9524
9151
  function collectAttachmentFiles(attachments) {
9525
9152
  if (!attachments || attachments.length === 0) {
9526
9153
  return [];
9527
9154
  }
9528
9155
  const unique = /* @__PURE__ */ new Map();
9529
9156
  for (const attachment of attachments) {
9530
- const absolutePath = path31.resolve(attachment);
9157
+ const absolutePath = path29.resolve(attachment);
9531
9158
  if (!unique.has(absolutePath)) {
9532
9159
  unique.set(absolutePath, absolutePath);
9533
9160
  }
@@ -9535,7 +9162,7 @@ function collectAttachmentFiles(attachments) {
9535
9162
  return Array.from(unique.values());
9536
9163
  }
9537
9164
  function pathToFileUri3(filePath) {
9538
- const absolutePath = path31.isAbsolute(filePath) ? filePath : path31.resolve(filePath);
9165
+ const absolutePath = path29.isAbsolute(filePath) ? filePath : path29.resolve(filePath);
9539
9166
  const normalizedPath = absolutePath.replace(/\\/g, "/");
9540
9167
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
9541
9168
  return `file:///${normalizedPath}`;
@@ -9548,7 +9175,7 @@ function normalizeAttachments(attachments) {
9548
9175
  }
9549
9176
  const deduped = /* @__PURE__ */ new Set();
9550
9177
  for (const attachment of attachments) {
9551
- deduped.add(path31.resolve(attachment));
9178
+ deduped.add(path29.resolve(attachment));
9552
9179
  }
9553
9180
  return Array.from(deduped);
9554
9181
  }
@@ -9557,7 +9184,7 @@ function mergeAttachments(all) {
9557
9184
  for (const list of all) {
9558
9185
  if (!list) continue;
9559
9186
  for (const inputFile of list) {
9560
- deduped.add(path31.resolve(inputFile));
9187
+ deduped.add(path29.resolve(inputFile));
9561
9188
  }
9562
9189
  }
9563
9190
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -9605,8 +9232,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
9605
9232
 
9606
9233
  // src/evaluation/providers/targets-file.ts
9607
9234
  import { constants as constants4 } from "node:fs";
9608
- import { access as access4, readFile as readFile11 } from "node:fs/promises";
9609
- import path32 from "node:path";
9235
+ import { access as access4, readFile as readFile9 } from "node:fs/promises";
9236
+ import path30 from "node:path";
9610
9237
  import { parse as parse4 } from "yaml";
9611
9238
  function isRecord(value) {
9612
9239
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -9643,11 +9270,11 @@ async function fileExists3(filePath) {
9643
9270
  }
9644
9271
  }
9645
9272
  async function readTargetDefinitions(filePath) {
9646
- const absolutePath = path32.resolve(filePath);
9273
+ const absolutePath = path30.resolve(filePath);
9647
9274
  if (!await fileExists3(absolutePath)) {
9648
9275
  throw new Error(`targets.yaml not found at ${absolutePath}`);
9649
9276
  }
9650
- const raw = await readFile11(absolutePath, "utf8");
9277
+ const raw = await readFile9(absolutePath, "utf8");
9651
9278
  const parsed = parse4(raw);
9652
9279
  if (!isRecord(parsed)) {
9653
9280
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -9663,21 +9290,21 @@ function listTargetNames(definitions) {
9663
9290
  }
9664
9291
 
9665
9292
  // src/evaluation/providers/provider-discovery.ts
9666
- import path33 from "node:path";
9667
- import fg2 from "fast-glob";
9293
+ import path31 from "node:path";
9294
+ import fg from "fast-glob";
9668
9295
  async function discoverProviders(registry, baseDir) {
9669
9296
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
9670
9297
  const candidateDirs = [];
9671
- let dir = path33.resolve(baseDir);
9672
- const root = path33.parse(dir).root;
9298
+ let dir = path31.resolve(baseDir);
9299
+ const root = path31.parse(dir).root;
9673
9300
  while (dir !== root) {
9674
- candidateDirs.push(path33.join(dir, ".agentv", "providers"));
9675
- dir = path33.dirname(dir);
9301
+ candidateDirs.push(path31.join(dir, ".agentv", "providers"));
9302
+ dir = path31.dirname(dir);
9676
9303
  }
9677
9304
  let files = [];
9678
9305
  for (const providersDir of candidateDirs) {
9679
9306
  try {
9680
- const found = await fg2(patterns, {
9307
+ const found = await fg(patterns, {
9681
9308
  cwd: providersDir,
9682
9309
  absolute: true,
9683
9310
  onlyFiles: true
@@ -9688,7 +9315,7 @@ async function discoverProviders(registry, baseDir) {
9688
9315
  }
9689
9316
  const discoveredKinds = [];
9690
9317
  for (const filePath of files) {
9691
- const basename = path33.basename(filePath);
9318
+ const basename = path31.basename(filePath);
9692
9319
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
9693
9320
  if (registry.has(kindName)) {
9694
9321
  continue;
@@ -9897,15 +9524,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
9897
9524
  });
9898
9525
  }
9899
9526
  async function execShellWithStdin(command, stdinPayload, options = {}) {
9900
- const { mkdir: mkdir15, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
9527
+ const { mkdir: mkdir15, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
9901
9528
  const { tmpdir: tmpdir3 } = await import("node:os");
9902
- const path46 = await import("node:path");
9529
+ const path44 = await import("node:path");
9903
9530
  const { randomUUID: randomUUID9 } = await import("node:crypto");
9904
- const dir = path46.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
9531
+ const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
9905
9532
  await mkdir15(dir, { recursive: true });
9906
- const stdinPath = path46.join(dir, "stdin.txt");
9907
- const stdoutPath = path46.join(dir, "stdout.txt");
9908
- const stderrPath = path46.join(dir, "stderr.txt");
9533
+ const stdinPath = path44.join(dir, "stdin.txt");
9534
+ const stdoutPath = path44.join(dir, "stdout.txt");
9535
+ const stderrPath = path44.join(dir, "stderr.txt");
9909
9536
  await writeFile9(stdinPath, stdinPayload, "utf8");
9910
9537
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
9911
9538
  const { spawn: spawn5 } = await import("node:child_process");
@@ -9935,8 +9562,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
9935
9562
  resolve(code ?? 0);
9936
9563
  });
9937
9564
  });
9938
- const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
9939
- const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
9565
+ const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
9566
+ const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
9940
9567
  return { stdout, stderr, exitCode };
9941
9568
  } finally {
9942
9569
  await rm6(dir, { recursive: true, force: true });
@@ -10253,10 +9880,7 @@ var CodeEvaluator = class {
10253
9880
  outputText: context.candidate,
10254
9881
  output: outputForPayload,
10255
9882
  outputPath,
10256
- guidelineFiles: context.evalCase.guideline_paths,
10257
- inputFiles: context.evalCase.file_paths.filter(
10258
- (path46) => !context.evalCase.guideline_paths.includes(path46)
10259
- ),
9883
+ inputFiles: context.evalCase.file_paths,
10260
9884
  input: context.evalCase.input,
10261
9885
  trace: context.trace ?? null,
10262
9886
  tokenUsage: context.tokenUsage ?? null,
@@ -10387,7 +10011,7 @@ import { generateText as generateText3 } from "ai";
10387
10011
 
10388
10012
  // src/evaluation/evaluators/llm-grader.ts
10389
10013
  import fs2 from "node:fs/promises";
10390
- import path34 from "node:path";
10014
+ import path32 from "node:path";
10391
10015
  import { generateText as generateText2, stepCountIs, tool } from "ai";
10392
10016
  import { z as z3 } from "zod";
10393
10017
  var DEFAULT_MAX_STEPS = 10;
@@ -11219,8 +10843,8 @@ function calculateScoreRangeResult(result, rubrics) {
11219
10843
  };
11220
10844
  }
11221
10845
  function resolveSandboxed(basePath, relativePath) {
11222
- const resolved = path34.resolve(basePath, relativePath);
11223
- if (!resolved.startsWith(basePath + path34.sep) && resolved !== basePath) {
10846
+ const resolved = path32.resolve(basePath, relativePath);
10847
+ if (!resolved.startsWith(basePath + path32.sep) && resolved !== basePath) {
11224
10848
  throw new Error(`Path '${relativePath}' is outside the workspace`);
11225
10849
  }
11226
10850
  return resolved;
@@ -11310,11 +10934,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
11310
10934
  for (const entry of entries) {
11311
10935
  if (matches.length >= MAX_SEARCH_MATCHES) return;
11312
10936
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
11313
- const fullPath = path34.join(dirPath, entry.name);
10937
+ const fullPath = path32.join(dirPath, entry.name);
11314
10938
  if (entry.isDirectory()) {
11315
10939
  await searchDirectory(fullPath, workspacePath, regex, matches);
11316
10940
  } else if (entry.isFile()) {
11317
- const ext = path34.extname(entry.name).toLowerCase();
10941
+ const ext = path32.extname(entry.name).toLowerCase();
11318
10942
  if (BINARY_EXTENSIONS.has(ext)) continue;
11319
10943
  try {
11320
10944
  const stat8 = await fs2.stat(fullPath);
@@ -11326,7 +10950,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
11326
10950
  regex.lastIndex = 0;
11327
10951
  if (regex.test(lines[i])) {
11328
10952
  matches.push({
11329
- file: path34.relative(workspacePath, fullPath),
10953
+ file: path32.relative(workspacePath, fullPath),
11330
10954
  line: i + 1,
11331
10955
  text: lines[i].substring(0, 200)
11332
10956
  });
@@ -11961,115 +11585,115 @@ var FieldAccuracyEvaluator = class {
11961
11585
  * Evaluate a single field against the expected value.
11962
11586
  */
11963
11587
  evaluateField(fieldConfig, candidateData, expectedData) {
11964
- const { path: path46, match, required = true, weight = 1 } = fieldConfig;
11965
- const candidateValue = resolvePath(candidateData, path46);
11966
- const expectedValue = resolvePath(expectedData, path46);
11588
+ const { path: path44, match, required = true, weight = 1 } = fieldConfig;
11589
+ const candidateValue = resolvePath(candidateData, path44);
11590
+ const expectedValue = resolvePath(expectedData, path44);
11967
11591
  if (expectedValue === void 0) {
11968
11592
  return {
11969
- path: path46,
11593
+ path: path44,
11970
11594
  score: 1,
11971
11595
  // No expected value means no comparison needed
11972
11596
  weight,
11973
11597
  hit: true,
11974
- message: `${path46}: no expected value`
11598
+ message: `${path44}: no expected value`
11975
11599
  };
11976
11600
  }
11977
11601
  if (candidateValue === void 0) {
11978
11602
  if (required) {
11979
11603
  return {
11980
- path: path46,
11604
+ path: path44,
11981
11605
  score: 0,
11982
11606
  weight,
11983
11607
  hit: false,
11984
- message: `${path46} (required, missing)`
11608
+ message: `${path44} (required, missing)`
11985
11609
  };
11986
11610
  }
11987
11611
  return {
11988
- path: path46,
11612
+ path: path44,
11989
11613
  score: 1,
11990
11614
  // Don't penalize missing optional fields
11991
11615
  weight: 0,
11992
11616
  // Zero weight means it won't affect the score
11993
11617
  hit: true,
11994
- message: `${path46}: optional field missing`
11618
+ message: `${path44}: optional field missing`
11995
11619
  };
11996
11620
  }
11997
11621
  switch (match) {
11998
11622
  case "exact":
11999
- return this.compareExact(path46, candidateValue, expectedValue, weight);
11623
+ return this.compareExact(path44, candidateValue, expectedValue, weight);
12000
11624
  case "numeric_tolerance":
12001
11625
  return this.compareNumericTolerance(
12002
- path46,
11626
+ path44,
12003
11627
  candidateValue,
12004
11628
  expectedValue,
12005
11629
  fieldConfig,
12006
11630
  weight
12007
11631
  );
12008
11632
  case "date":
12009
- return this.compareDate(path46, candidateValue, expectedValue, fieldConfig, weight);
11633
+ return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
12010
11634
  default:
12011
11635
  return {
12012
- path: path46,
11636
+ path: path44,
12013
11637
  score: 0,
12014
11638
  weight,
12015
11639
  hit: false,
12016
- message: `${path46}: unknown match type "${match}"`
11640
+ message: `${path44}: unknown match type "${match}"`
12017
11641
  };
12018
11642
  }
12019
11643
  }
12020
11644
  /**
12021
11645
  * Exact equality comparison.
12022
11646
  */
12023
- compareExact(path46, candidateValue, expectedValue, weight) {
11647
+ compareExact(path44, candidateValue, expectedValue, weight) {
12024
11648
  if (deepEqual(candidateValue, expectedValue)) {
12025
11649
  return {
12026
- path: path46,
11650
+ path: path44,
12027
11651
  score: 1,
12028
11652
  weight,
12029
11653
  hit: true,
12030
- message: path46
11654
+ message: path44
12031
11655
  };
12032
11656
  }
12033
11657
  if (typeof candidateValue !== typeof expectedValue) {
12034
11658
  return {
12035
- path: path46,
11659
+ path: path44,
12036
11660
  score: 0,
12037
11661
  weight,
12038
11662
  hit: false,
12039
- message: `${path46} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
11663
+ message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
12040
11664
  };
12041
11665
  }
12042
11666
  return {
12043
- path: path46,
11667
+ path: path44,
12044
11668
  score: 0,
12045
11669
  weight,
12046
11670
  hit: false,
12047
- message: `${path46} (value mismatch)`
11671
+ message: `${path44} (value mismatch)`
12048
11672
  };
12049
11673
  }
12050
11674
  /**
12051
11675
  * Numeric comparison with absolute or relative tolerance.
12052
11676
  */
12053
- compareNumericTolerance(path46, candidateValue, expectedValue, fieldConfig, weight) {
11677
+ compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
12054
11678
  const { tolerance = 0, relative = false } = fieldConfig;
12055
11679
  const candidateNum = toNumber(candidateValue);
12056
11680
  const expectedNum = toNumber(expectedValue);
12057
11681
  if (candidateNum === null || expectedNum === null) {
12058
11682
  return {
12059
- path: path46,
11683
+ path: path44,
12060
11684
  score: 0,
12061
11685
  weight,
12062
11686
  hit: false,
12063
- message: `${path46} (non-numeric value)`
11687
+ message: `${path44} (non-numeric value)`
12064
11688
  };
12065
11689
  }
12066
11690
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
12067
11691
  return {
12068
- path: path46,
11692
+ path: path44,
12069
11693
  score: 0,
12070
11694
  weight,
12071
11695
  hit: false,
12072
- message: `${path46} (invalid numeric value)`
11696
+ message: `${path44} (invalid numeric value)`
12073
11697
  };
12074
11698
  }
12075
11699
  const diff = Math.abs(candidateNum - expectedNum);
@@ -12082,61 +11706,61 @@ var FieldAccuracyEvaluator = class {
12082
11706
  }
12083
11707
  if (withinTolerance) {
12084
11708
  return {
12085
- path: path46,
11709
+ path: path44,
12086
11710
  score: 1,
12087
11711
  weight,
12088
11712
  hit: true,
12089
- message: `${path46} (within tolerance: diff=${diff.toFixed(2)})`
11713
+ message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
12090
11714
  };
12091
11715
  }
12092
11716
  return {
12093
- path: path46,
11717
+ path: path44,
12094
11718
  score: 0,
12095
11719
  weight,
12096
11720
  hit: false,
12097
- message: `${path46} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
11721
+ message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
12098
11722
  };
12099
11723
  }
12100
11724
  /**
12101
11725
  * Date comparison with format normalization.
12102
11726
  */
12103
- compareDate(path46, candidateValue, expectedValue, fieldConfig, weight) {
11727
+ compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
12104
11728
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
12105
11729
  const candidateDate = parseDate(String(candidateValue), formats);
12106
11730
  const expectedDate = parseDate(String(expectedValue), formats);
12107
11731
  if (candidateDate === null) {
12108
11732
  return {
12109
- path: path46,
11733
+ path: path44,
12110
11734
  score: 0,
12111
11735
  weight,
12112
11736
  hit: false,
12113
- message: `${path46} (unparseable candidate date)`
11737
+ message: `${path44} (unparseable candidate date)`
12114
11738
  };
12115
11739
  }
12116
11740
  if (expectedDate === null) {
12117
11741
  return {
12118
- path: path46,
11742
+ path: path44,
12119
11743
  score: 0,
12120
11744
  weight,
12121
11745
  hit: false,
12122
- message: `${path46} (unparseable expected date)`
11746
+ message: `${path44} (unparseable expected date)`
12123
11747
  };
12124
11748
  }
12125
11749
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
12126
11750
  return {
12127
- path: path46,
11751
+ path: path44,
12128
11752
  score: 1,
12129
11753
  weight,
12130
11754
  hit: true,
12131
- message: path46
11755
+ message: path44
12132
11756
  };
12133
11757
  }
12134
11758
  return {
12135
- path: path46,
11759
+ path: path44,
12136
11760
  score: 0,
12137
11761
  weight,
12138
11762
  hit: false,
12139
- message: `${path46} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
11763
+ message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
12140
11764
  };
12141
11765
  }
12142
11766
  /**
@@ -12169,11 +11793,11 @@ var FieldAccuracyEvaluator = class {
12169
11793
  };
12170
11794
  }
12171
11795
  };
12172
- function resolvePath(obj, path46) {
12173
- if (!path46 || !obj) {
11796
+ function resolvePath(obj, path44) {
11797
+ if (!path44 || !obj) {
12174
11798
  return void 0;
12175
11799
  }
12176
- const parts = path46.split(/\.|\[|\]/).filter((p) => p.length > 0);
11800
+ const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
12177
11801
  let current = obj;
12178
11802
  for (const part of parts) {
12179
11803
  if (current === null || current === void 0) {
@@ -12633,8 +12257,8 @@ var TokenUsageEvaluator = class {
12633
12257
  };
12634
12258
 
12635
12259
  // src/evaluation/evaluators/tool-trajectory.ts
12636
- function getNestedValue(obj, path46) {
12637
- const parts = path46.split(".");
12260
+ function getNestedValue(obj, path44) {
12261
+ const parts = path44.split(".");
12638
12262
  let current = obj;
12639
12263
  for (const part of parts) {
12640
12264
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -13256,8 +12880,8 @@ function runEqualsAssertion(output, value) {
13256
12880
  // src/evaluation/orchestrator.ts
13257
12881
  import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
13258
12882
  import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
13259
- import path43 from "node:path";
13260
- import micromatch4 from "micromatch";
12883
+ import path41 from "node:path";
12884
+ import micromatch3 from "micromatch";
13261
12885
 
13262
12886
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
13263
12887
  var Node = class {
@@ -13470,7 +13094,7 @@ var InlineAssertEvaluator = class {
13470
13094
  };
13471
13095
 
13472
13096
  // src/evaluation/evaluators/prompt-resolution.ts
13473
- import path35 from "node:path";
13097
+ import path33 from "node:path";
13474
13098
  async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
13475
13099
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
13476
13100
  if (!context) {
@@ -13505,10 +13129,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
13505
13129
  expectedOutput: context.evalCase.expected_output,
13506
13130
  outputText: context.candidate,
13507
13131
  output: context.output ?? null,
13508
- guidelineFiles: context.evalCase.guideline_paths,
13509
- inputFiles: context.evalCase.file_paths.filter(
13510
- (p) => !context.evalCase.guideline_paths.includes(p)
13511
- ),
13132
+ inputFiles: context.evalCase.file_paths,
13512
13133
  input: context.evalCase.input,
13513
13134
  trace: context.trace ?? null,
13514
13135
  fileChanges: context.fileChanges ?? null,
@@ -13519,7 +13140,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
13519
13140
  };
13520
13141
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
13521
13142
  const scriptPath = script[script.length - 1];
13522
- const cwd = path35.dirname(scriptPath);
13143
+ const cwd = path33.dirname(scriptPath);
13523
13144
  try {
13524
13145
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
13525
13146
  const prompt = stdout.trim();
@@ -13791,21 +13412,21 @@ function createBuiltinRegistry() {
13791
13412
  }
13792
13413
 
13793
13414
  // src/evaluation/registry/assertion-discovery.ts
13794
- import path36 from "node:path";
13795
- import fg3 from "fast-glob";
13415
+ import path34 from "node:path";
13416
+ import fg2 from "fast-glob";
13796
13417
  async function discoverAssertions(registry, baseDir) {
13797
13418
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
13798
13419
  const candidateDirs = [];
13799
- let dir = path36.resolve(baseDir);
13800
- const root = path36.parse(dir).root;
13420
+ let dir = path34.resolve(baseDir);
13421
+ const root = path34.parse(dir).root;
13801
13422
  while (dir !== root) {
13802
- candidateDirs.push(path36.join(dir, ".agentv", "assertions"));
13803
- dir = path36.dirname(dir);
13423
+ candidateDirs.push(path34.join(dir, ".agentv", "assertions"));
13424
+ dir = path34.dirname(dir);
13804
13425
  }
13805
13426
  let files = [];
13806
13427
  for (const assertionsDir of candidateDirs) {
13807
13428
  try {
13808
- const found = await fg3(patterns, {
13429
+ const found = await fg2(patterns, {
13809
13430
  cwd: assertionsDir,
13810
13431
  absolute: true,
13811
13432
  onlyFiles: true
@@ -13816,7 +13437,7 @@ async function discoverAssertions(registry, baseDir) {
13816
13437
  }
13817
13438
  const discoveredTypes = [];
13818
13439
  for (const filePath of files) {
13819
- const basename = path36.basename(filePath);
13440
+ const basename = path34.basename(filePath);
13820
13441
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
13821
13442
  if (registry.has(typeName)) {
13822
13443
  continue;
@@ -13834,22 +13455,22 @@ async function discoverAssertions(registry, baseDir) {
13834
13455
  }
13835
13456
 
13836
13457
  // src/evaluation/registry/grader-discovery.ts
13837
- import path37 from "node:path";
13838
- import fg4 from "fast-glob";
13458
+ import path35 from "node:path";
13459
+ import fg3 from "fast-glob";
13839
13460
  async function discoverGraders(registry, baseDir) {
13840
13461
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
13841
13462
  const candidateDirs = [];
13842
- let dir = path37.resolve(baseDir);
13843
- const root = path37.parse(dir).root;
13463
+ let dir = path35.resolve(baseDir);
13464
+ const root = path35.parse(dir).root;
13844
13465
  while (dir !== root) {
13845
- candidateDirs.push(path37.join(dir, ".agentv", "graders"));
13846
- candidateDirs.push(path37.join(dir, ".agentv", "judges"));
13847
- dir = path37.dirname(dir);
13466
+ candidateDirs.push(path35.join(dir, ".agentv", "graders"));
13467
+ candidateDirs.push(path35.join(dir, ".agentv", "judges"));
13468
+ dir = path35.dirname(dir);
13848
13469
  }
13849
13470
  let files = [];
13850
13471
  for (const gradersDir of candidateDirs) {
13851
13472
  try {
13852
- const found = await fg4(patterns, {
13473
+ const found = await fg3(patterns, {
13853
13474
  cwd: gradersDir,
13854
13475
  absolute: true,
13855
13476
  onlyFiles: true
@@ -13860,7 +13481,7 @@ async function discoverGraders(registry, baseDir) {
13860
13481
  }
13861
13482
  const discoveredTypes = [];
13862
13483
  for (const filePath of files) {
13863
- const basename = path37.basename(filePath);
13484
+ const basename = path35.basename(filePath);
13864
13485
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
13865
13486
  if (registry.has(typeName)) {
13866
13487
  continue;
@@ -14020,7 +13641,7 @@ function getTCritical(df) {
14020
13641
  // src/evaluation/workspace/file-changes.ts
14021
13642
  import { exec as execCallback } from "node:child_process";
14022
13643
  import { readdirSync as readdirSync2, statSync } from "node:fs";
14023
- import path38 from "node:path";
13644
+ import path36 from "node:path";
14024
13645
  import { promisify as promisify4 } from "node:util";
14025
13646
  var execAsync4 = promisify4(execCallback);
14026
13647
  function gitExecOpts(workspacePath) {
@@ -14054,10 +13675,10 @@ async function stageNestedRepoChanges(workspacePath) {
14054
13675
  }
14055
13676
  for (const entry of entries) {
14056
13677
  if (entry === ".git" || entry === "node_modules") continue;
14057
- const childPath = path38.join(workspacePath, entry);
13678
+ const childPath = path36.join(workspacePath, entry);
14058
13679
  try {
14059
13680
  if (!statSync(childPath).isDirectory()) continue;
14060
- if (!statSync(path38.join(childPath, ".git")).isDirectory()) continue;
13681
+ if (!statSync(path36.join(childPath, ".git")).isDirectory()) continue;
14061
13682
  } catch {
14062
13683
  continue;
14063
13684
  }
@@ -14068,7 +13689,7 @@ async function stageNestedRepoChanges(workspacePath) {
14068
13689
 
14069
13690
  // src/evaluation/workspace/manager.ts
14070
13691
  import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
14071
- import path39 from "node:path";
13692
+ import path37 from "node:path";
14072
13693
  var TemplateNotFoundError = class extends Error {
14073
13694
  constructor(templatePath) {
14074
13695
  super(`Workspace template not found: ${templatePath}`);
@@ -14098,14 +13719,14 @@ async function isDirectory(filePath) {
14098
13719
  }
14099
13720
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
14100
13721
  const root = workspaceRoot ?? getWorkspacesRoot();
14101
- return path39.join(root, evalRunId, caseId);
13722
+ return path37.join(root, evalRunId, caseId);
14102
13723
  }
14103
13724
  async function copyDirectoryRecursive(src, dest) {
14104
13725
  await mkdir11(dest, { recursive: true });
14105
13726
  const entries = await readdir3(src, { withFileTypes: true });
14106
13727
  for (const entry of entries) {
14107
- const srcPath = path39.join(src, entry.name);
14108
- const destPath = path39.join(dest, entry.name);
13728
+ const srcPath = path37.join(src, entry.name);
13729
+ const destPath = path37.join(dest, entry.name);
14109
13730
  if (entry.name === ".git") {
14110
13731
  continue;
14111
13732
  }
@@ -14117,7 +13738,7 @@ async function copyDirectoryRecursive(src, dest) {
14117
13738
  }
14118
13739
  }
14119
13740
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
14120
- const resolvedTemplatePath = path39.resolve(templatePath);
13741
+ const resolvedTemplatePath = path37.resolve(templatePath);
14121
13742
  if (!await fileExists(resolvedTemplatePath)) {
14122
13743
  throw new TemplateNotFoundError(resolvedTemplatePath);
14123
13744
  }
@@ -14166,7 +13787,7 @@ async function cleanupWorkspace(workspacePath) {
14166
13787
  }
14167
13788
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
14168
13789
  const root = workspaceRoot ?? getWorkspacesRoot();
14169
- const evalDir = path39.join(root, evalRunId);
13790
+ const evalDir = path37.join(root, evalRunId);
14170
13791
  if (await fileExists(evalDir)) {
14171
13792
  await rm4(evalDir, { recursive: true, force: true });
14172
13793
  }
@@ -14176,8 +13797,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
14176
13797
  import { execFile } from "node:child_process";
14177
13798
  import { createHash } from "node:crypto";
14178
13799
  import { existsSync as existsSync2 } from "node:fs";
14179
- import { cp as cp2, mkdir as mkdir12, readFile as readFile12, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
14180
- import path40 from "node:path";
13800
+ import { cp as cp2, mkdir as mkdir12, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
13801
+ import path38 from "node:path";
14181
13802
  import { promisify as promisify5 } from "node:util";
14182
13803
  var execFileAsync = promisify5(execFile);
14183
13804
  function gitEnv() {
@@ -14231,8 +13852,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
14231
13852
  await mkdir12(dest, { recursive: true });
14232
13853
  const entries = await readdir4(src, { withFileTypes: true });
14233
13854
  for (const entry of entries) {
14234
- const srcPath = path40.join(src, entry.name);
14235
- const destPath = path40.join(dest, entry.name);
13855
+ const srcPath = path38.join(src, entry.name);
13856
+ const destPath = path38.join(dest, entry.name);
14236
13857
  if (entry.name === ".git") {
14237
13858
  continue;
14238
13859
  }
@@ -14265,7 +13886,7 @@ var WorkspacePoolManager = class {
14265
13886
  async acquireWorkspace(options) {
14266
13887
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
14267
13888
  const fingerprint = computeWorkspaceFingerprint(repos);
14268
- const poolDir = path40.join(this.poolRoot, fingerprint);
13889
+ const poolDir = path38.join(this.poolRoot, fingerprint);
14269
13890
  await mkdir12(poolDir, { recursive: true });
14270
13891
  const drifted = await this.checkDrift(poolDir, fingerprint);
14271
13892
  if (drifted) {
@@ -14275,7 +13896,7 @@ var WorkspacePoolManager = class {
14275
13896
  await this.removeAllSlots(poolDir);
14276
13897
  }
14277
13898
  for (let i = 0; i < maxSlots; i++) {
14278
- const slotPath = path40.join(poolDir, `slot-${i}`);
13899
+ const slotPath = path38.join(poolDir, `slot-${i}`);
14279
13900
  const lockPath = `${slotPath}.lock`;
14280
13901
  const locked = await this.tryLock(lockPath);
14281
13902
  if (!locked) {
@@ -14337,7 +13958,7 @@ var WorkspacePoolManager = class {
14337
13958
  throw err;
14338
13959
  }
14339
13960
  try {
14340
- const pidStr = await readFile12(lockPath, "utf-8");
13961
+ const pidStr = await readFile10(lockPath, "utf-8");
14341
13962
  const pid = Number.parseInt(pidStr.trim(), 10);
14342
13963
  if (!Number.isNaN(pid)) {
14343
13964
  try {
@@ -14362,9 +13983,9 @@ var WorkspacePoolManager = class {
14362
13983
  * Returns false (no drift) if metadata.json doesn't exist (first use).
14363
13984
  */
14364
13985
  async checkDrift(poolDir, fingerprint) {
14365
- const metadataPath = path40.join(poolDir, "metadata.json");
13986
+ const metadataPath = path38.join(poolDir, "metadata.json");
14366
13987
  try {
14367
- const raw = await readFile12(metadataPath, "utf-8");
13988
+ const raw = await readFile10(metadataPath, "utf-8");
14368
13989
  const metadata = JSON.parse(raw);
14369
13990
  return metadata.fingerprint !== fingerprint;
14370
13991
  } catch {
@@ -14379,17 +14000,17 @@ var WorkspacePoolManager = class {
14379
14000
  repos,
14380
14001
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
14381
14002
  };
14382
- await writeFile7(path40.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
14003
+ await writeFile7(path38.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
14383
14004
  }
14384
14005
  /** Remove all slot directories and their lock files from a pool directory. */
14385
14006
  async removeAllSlots(poolDir) {
14386
14007
  const entries = await readdir4(poolDir);
14387
14008
  for (const entry of entries) {
14388
14009
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
14389
- const lockPath = path40.join(poolDir, `${entry}.lock`);
14010
+ const lockPath = path38.join(poolDir, `${entry}.lock`);
14390
14011
  if (existsSync2(lockPath)) {
14391
14012
  try {
14392
- const pidStr = await readFile12(lockPath, "utf-8");
14013
+ const pidStr = await readFile10(lockPath, "utf-8");
14393
14014
  const pid = Number.parseInt(pidStr.trim(), 10);
14394
14015
  if (!Number.isNaN(pid)) {
14395
14016
  try {
@@ -14402,12 +14023,12 @@ var WorkspacePoolManager = class {
14402
14023
  } catch {
14403
14024
  }
14404
14025
  }
14405
- await rm5(path40.join(poolDir, entry), { recursive: true, force: true });
14026
+ await rm5(path38.join(poolDir, entry), { recursive: true, force: true });
14406
14027
  await rm5(lockPath, { force: true }).catch(() => {
14407
14028
  });
14408
14029
  }
14409
14030
  }
14410
- await rm5(path40.join(poolDir, "metadata.json"), { force: true }).catch(() => {
14031
+ await rm5(path38.join(poolDir, "metadata.json"), { force: true }).catch(() => {
14411
14032
  });
14412
14033
  }
14413
14034
  /**
@@ -14417,7 +14038,7 @@ var WorkspacePoolManager = class {
14417
14038
  */
14418
14039
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
14419
14040
  for (const repo of repos) {
14420
- const repoDir = path40.join(slotPath, repo.path);
14041
+ const repoDir = path38.join(slotPath, repo.path);
14421
14042
  if (!existsSync2(repoDir)) {
14422
14043
  continue;
14423
14044
  }
@@ -14444,7 +14065,7 @@ var WorkspacePoolManager = class {
14444
14065
  // src/evaluation/workspace/repo-manager.ts
14445
14066
  import { execFile as execFile2 } from "node:child_process";
14446
14067
  import { existsSync as existsSync3 } from "node:fs";
14447
- import path41 from "node:path";
14068
+ import path39 from "node:path";
14448
14069
  import { promisify as promisify6 } from "node:util";
14449
14070
  var execFileAsync2 = promisify6(execFile2);
14450
14071
  var DEFAULT_TIMEOUT_MS2 = 3e5;
@@ -14544,7 +14165,7 @@ ${lines.join("\n")}`;
14544
14165
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
14545
14166
  */
14546
14167
  async materialize(repo, workspacePath) {
14547
- const targetDir = path41.join(workspacePath, repo.path);
14168
+ const targetDir = path39.join(workspacePath, repo.path);
14548
14169
  const sourceUrl = getSourceUrl(repo.source);
14549
14170
  const startedAt = Date.now();
14550
14171
  if (this.verbose) {
@@ -14635,7 +14256,7 @@ ${lines.join("\n")}`;
14635
14256
  async reset(repos, workspacePath, reset) {
14636
14257
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
14637
14258
  for (const repo of repos) {
14638
- const targetDir = path41.join(workspacePath, repo.path);
14259
+ const targetDir = path39.join(workspacePath, repo.path);
14639
14260
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
14640
14261
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
14641
14262
  }
@@ -14644,16 +14265,16 @@ ${lines.join("\n")}`;
14644
14265
 
14645
14266
  // src/evaluation/workspace/resolve.ts
14646
14267
  import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
14647
- import path42 from "node:path";
14268
+ import path40 from "node:path";
14648
14269
  async function resolveWorkspaceTemplate(templatePath) {
14649
14270
  if (!templatePath) {
14650
14271
  return void 0;
14651
14272
  }
14652
- const resolved = path42.resolve(templatePath);
14273
+ const resolved = path40.resolve(templatePath);
14653
14274
  const stats = await stat6(resolved);
14654
14275
  if (stats.isFile()) {
14655
14276
  return {
14656
- dir: path42.dirname(resolved),
14277
+ dir: path40.dirname(resolved),
14657
14278
  workspaceFile: resolved
14658
14279
  };
14659
14280
  }
@@ -14665,14 +14286,14 @@ async function resolveWorkspaceTemplate(templatePath) {
14665
14286
  if (workspaceFiles.length === 1) {
14666
14287
  return {
14667
14288
  dir: resolved,
14668
- workspaceFile: path42.join(resolved, workspaceFiles[0])
14289
+ workspaceFile: path40.join(resolved, workspaceFiles[0])
14669
14290
  };
14670
14291
  }
14671
14292
  if (workspaceFiles.length > 1) {
14672
14293
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
14673
14294
  return {
14674
14295
  dir: resolved,
14675
- workspaceFile: conventionFile ? path42.join(resolved, conventionFile) : void 0
14296
+ workspaceFile: conventionFile ? path40.join(resolved, conventionFile) : void 0
14676
14297
  };
14677
14298
  }
14678
14299
  return { dir: resolved };
@@ -14876,7 +14497,7 @@ async function runEvaluation(options) {
14876
14497
  ];
14877
14498
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
14878
14499
  const typeRegistry = createBuiltinRegistry();
14879
- const discoveryBaseDir = evalFilePath ? path43.dirname(path43.resolve(evalFilePath)) : process.cwd();
14500
+ const discoveryBaseDir = evalFilePath ? path41.dirname(path41.resolve(evalFilePath)) : process.cwd();
14880
14501
  const evalDir = discoveryBaseDir;
14881
14502
  await discoverAssertions(typeRegistry, discoveryBaseDir);
14882
14503
  await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -15065,7 +14686,7 @@ async function runEvaluation(options) {
15065
14686
  }
15066
14687
  try {
15067
14688
  if (suiteWorkspaceFile && sharedWorkspacePath) {
15068
- const copiedWorkspaceFile = path43.join(sharedWorkspacePath, path43.basename(suiteWorkspaceFile));
14689
+ const copiedWorkspaceFile = path41.join(sharedWorkspacePath, path41.basename(suiteWorkspaceFile));
15069
14690
  try {
15070
14691
  await stat7(copiedWorkspaceFile);
15071
14692
  suiteWorkspaceFile = copiedWorkspaceFile;
@@ -15175,10 +14796,10 @@ async function runEvaluation(options) {
15175
14796
  const budgetResult = {
15176
14797
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
15177
14798
  testId: evalCase.id,
15178
- dataset: evalCase.dataset,
14799
+ eval_set: evalCase.eval_set,
15179
14800
  score: 0,
15180
14801
  assertions: [],
15181
- outputText: "",
14802
+ output: [],
15182
14803
  target: target.name,
15183
14804
  error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
15184
14805
  budgetExceeded: true,
@@ -15211,10 +14832,10 @@ async function runEvaluation(options) {
15211
14832
  const haltResult = {
15212
14833
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
15213
14834
  testId: evalCase.id,
15214
- dataset: evalCase.dataset,
14835
+ eval_set: evalCase.eval_set,
15215
14836
  score: 0,
15216
14837
  assertions: [],
15217
- outputText: "",
14838
+ output: [],
15218
14839
  target: target.name,
15219
14840
  error: errorMsg,
15220
14841
  executionStatus: "execution_error",
@@ -15443,8 +15064,6 @@ async function runBatchEvaluation(options) {
15443
15064
  const promptInputs = promptInputsList[index];
15444
15065
  return {
15445
15066
  question: promptInputs.question,
15446
- guidelines: promptInputs.guidelines,
15447
- guideline_patterns: evalCase.guideline_patterns,
15448
15067
  inputFiles: evalCase.file_paths,
15449
15068
  evalCaseId: evalCase.id,
15450
15069
  metadata: {
@@ -15642,7 +15261,7 @@ async function runEvalCase(options) {
15642
15261
  );
15643
15262
  }
15644
15263
  if (caseWorkspaceFile && workspacePath) {
15645
- const copiedFile = path43.join(workspacePath, path43.basename(caseWorkspaceFile));
15264
+ const copiedFile = path41.join(workspacePath, path41.basename(caseWorkspaceFile));
15646
15265
  try {
15647
15266
  await stat7(copiedFile);
15648
15267
  caseWorkspaceFile = copiedFile;
@@ -15702,10 +15321,10 @@ async function runEvalCase(options) {
15702
15321
  const files = evalCase.metadata.agent_skills_files;
15703
15322
  if (baseDir && files.length > 0) {
15704
15323
  for (const relPath of files) {
15705
- const srcPath = path43.resolve(baseDir, relPath);
15706
- const destPath = path43.resolve(workspacePath, relPath);
15324
+ const srcPath = path41.resolve(baseDir, relPath);
15325
+ const destPath = path41.resolve(workspacePath, relPath);
15707
15326
  try {
15708
- await mkdir13(path43.dirname(destPath), { recursive: true });
15327
+ await mkdir13(path41.dirname(destPath), { recursive: true });
15709
15328
  await copyFile2(srcPath, destPath);
15710
15329
  } catch (error) {
15711
15330
  const message = error instanceof Error ? error.message : String(error);
@@ -16152,8 +15771,7 @@ async function evaluateCandidate(options) {
16152
15771
  let lmRequest;
16153
15772
  if (isAgentProvider(provider)) {
16154
15773
  agentRequest = {
16155
- question: promptInputs.question,
16156
- guideline_paths: evalCase.guideline_paths
15774
+ question: promptInputs.question
16157
15775
  };
16158
15776
  } else {
16159
15777
  if (promptInputs.chatPrompt) {
@@ -16162,8 +15780,7 @@ async function evaluateCandidate(options) {
16162
15780
  };
16163
15781
  } else {
16164
15782
  lmRequest = {
16165
- question: promptInputs.question,
16166
- guidelines: promptInputs.guidelines
15783
+ question: promptInputs.question
16167
15784
  };
16168
15785
  }
16169
15786
  }
@@ -16177,11 +15794,10 @@ async function evaluateCandidate(options) {
16177
15794
  return {
16178
15795
  timestamp: completedAt.toISOString(),
16179
15796
  testId: evalCase.id,
16180
- dataset: evalCase.dataset,
15797
+ eval_set: evalCase.eval_set,
16181
15798
  conversationId: evalCase.conversation_id,
16182
15799
  score: score.score,
16183
15800
  assertions: score.assertions,
16184
- outputText: candidate,
16185
15801
  target: target.name,
16186
15802
  tokenUsage,
16187
15803
  costUsd,
@@ -16192,7 +15808,7 @@ async function evaluateCandidate(options) {
16192
15808
  input,
16193
15809
  scores,
16194
15810
  trace,
16195
- output,
15811
+ output: output ?? [{ role: "assistant", content: candidate }],
16196
15812
  fileChanges,
16197
15813
  executionStatus: classifyQualityStatus(score.score)
16198
15814
  };
@@ -16326,7 +15942,7 @@ async function runEvaluatorList(options) {
16326
15942
  fileChanges,
16327
15943
  workspacePath
16328
15944
  };
16329
- const evalFileDir = evalCase.guideline_paths[0] ? path43.dirname(evalCase.guideline_paths[0]) : process.cwd();
15945
+ const evalFileDir = evalCase.file_paths[0] ? path41.dirname(evalCase.file_paths[0]) : process.cwd();
16330
15946
  const dispatchContext = {
16331
15947
  graderProvider,
16332
15948
  targetResolver,
@@ -16357,7 +15973,7 @@ async function runEvaluatorList(options) {
16357
15973
  weight,
16358
15974
  verdict: score2.verdict,
16359
15975
  assertions: score2.assertions,
16360
- evaluatorProviderRequest: score2.evaluatorRawRequest,
15976
+ input: score2.evaluatorRawRequest,
16361
15977
  details: score2.details,
16362
15978
  scores: mapChildResults(score2.scores),
16363
15979
  tokenUsage: score2.tokenUsage,
@@ -16440,7 +16056,7 @@ function filterEvalCases(evalCases, filter) {
16440
16056
  if (!filter) {
16441
16057
  return evalCases;
16442
16058
  }
16443
- return evalCases.filter((evalCase) => micromatch4.isMatch(evalCase.id, filter));
16059
+ return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter));
16444
16060
  }
16445
16061
  function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
16446
16062
  const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
@@ -16477,8 +16093,6 @@ async function invokeProvider(provider, options) {
16477
16093
  const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
16478
16094
  return await provider.invoke({
16479
16095
  question: promptInputs.question,
16480
- guidelines: promptInputs.guidelines,
16481
- guideline_patterns: evalCase.guideline_patterns,
16482
16096
  chatPrompt: promptInputs.chatPrompt,
16483
16097
  inputFiles: evalCase.file_paths,
16484
16098
  evalCaseId: evalCase.id,
@@ -16506,21 +16120,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
16506
16120
  if (isAgentProvider(provider)) {
16507
16121
  agentRequest = {
16508
16122
  question: promptInputs.question,
16509
- guideline_paths: evalCase.guideline_paths,
16510
16123
  error: message
16511
16124
  };
16512
16125
  } else {
16513
16126
  if (promptInputs.chatPrompt) {
16514
16127
  lmRequest = {
16515
16128
  chat_prompt: promptInputs.chatPrompt,
16516
- guideline_paths: evalCase.guideline_paths,
16517
16129
  error: message
16518
16130
  };
16519
16131
  } else {
16520
16132
  lmRequest = {
16521
16133
  question: promptInputs.question,
16522
- guidelines: promptInputs.guidelines,
16523
- guideline_paths: evalCase.guideline_paths,
16524
16134
  error: message
16525
16135
  };
16526
16136
  }
@@ -16533,11 +16143,11 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
16533
16143
  return {
16534
16144
  timestamp: timestamp.toISOString(),
16535
16145
  testId: evalCase.id,
16536
- dataset: evalCase.dataset,
16146
+ eval_set: evalCase.eval_set,
16537
16147
  conversationId: evalCase.conversation_id,
16538
16148
  score: 0,
16539
16149
  assertions: [{ text: `Error: ${message}`, passed: false }],
16540
- outputText: `Error occurred: ${message}`,
16150
+ output: [{ role: "assistant", content: `Error occurred: ${message}` }],
16541
16151
  target: targetName,
16542
16152
  requests,
16543
16153
  input,
@@ -16566,7 +16176,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
16566
16176
  hash.update(target.name);
16567
16177
  hash.update(evalCase.id);
16568
16178
  hash.update(promptInputs.question);
16569
- hash.update(promptInputs.guidelines);
16570
16179
  hash.update(promptInputs.systemMessage ?? "");
16571
16180
  if (promptInputs.chatPrompt) {
16572
16181
  hash.update(JSON.stringify(promptInputs.chatPrompt));
@@ -16581,7 +16190,7 @@ function buildResultInput(promptInputs) {
16581
16190
  content: message.content
16582
16191
  }));
16583
16192
  }
16584
- return promptInputs.question;
16193
+ return [{ role: "user", content: promptInputs.question }];
16585
16194
  }
16586
16195
  function aggregateEvaluatorTokenUsage(scores) {
16587
16196
  if (!scores || scores.length === 0) return void 0;
@@ -16647,7 +16256,7 @@ function mapChildResults(children) {
16647
16256
  weight: child.weight,
16648
16257
  verdict: child.verdict,
16649
16258
  assertions: child.assertions,
16650
- evaluatorProviderRequest: child.evaluatorRawRequest,
16259
+ input: child.evaluatorRawRequest,
16651
16260
  scores: mapChildResults(child.scores),
16652
16261
  details: child.details,
16653
16262
  tokenUsage: child.tokenUsage
@@ -16666,7 +16275,7 @@ function computeWeightedMean(entries) {
16666
16275
 
16667
16276
  // src/evaluation/evaluate.ts
16668
16277
  import { existsSync as existsSync4 } from "node:fs";
16669
- import path44 from "node:path";
16278
+ import path42 from "node:path";
16670
16279
 
16671
16280
  // src/evaluation/providers/function-provider.ts
16672
16281
  function createFunctionProvider(taskFn) {
@@ -16703,7 +16312,7 @@ async function evaluate(config) {
16703
16312
  }
16704
16313
  const gitRoot = await findGitRoot(process.cwd());
16705
16314
  const repoRoot = gitRoot ?? process.cwd();
16706
- const testFilePath = config.specFile ? path44.resolve(config.specFile) : path44.join(process.cwd(), "__programmatic__.yaml");
16315
+ const testFilePath = config.specFile ? path42.resolve(config.specFile) : path42.join(process.cwd(), "__programmatic__.yaml");
16707
16316
  await loadEnvHierarchy(repoRoot, testFilePath);
16708
16317
  let resolvedTarget;
16709
16318
  let taskProvider;
@@ -16769,8 +16378,6 @@ async function evaluate(config) {
16769
16378
  input_segments: inputSegments,
16770
16379
  expected_output: expectedOutput,
16771
16380
  reference_answer: expectedOutputValue,
16772
- guideline_paths: [],
16773
- guideline_patterns: [],
16774
16381
  file_paths: [],
16775
16382
  assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
16776
16383
  metadata: test.metadata
@@ -16832,10 +16439,10 @@ function computeSummary(results, durationMs) {
16832
16439
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
16833
16440
  async function discoverDefaultTarget(repoRoot) {
16834
16441
  const cwd = process.cwd();
16835
- const chain = buildDirectoryChain(path44.join(cwd, "_placeholder"), repoRoot);
16442
+ const chain = buildDirectoryChain(path42.join(cwd, "_placeholder"), repoRoot);
16836
16443
  for (const dir of chain) {
16837
16444
  for (const candidate of TARGET_FILE_CANDIDATES) {
16838
- const targetsPath = path44.join(dir, candidate);
16445
+ const targetsPath = path42.join(dir, candidate);
16839
16446
  if (!existsSync4(targetsPath)) continue;
16840
16447
  try {
16841
16448
  const definitions = await readTargetDefinitions(targetsPath);
@@ -16852,7 +16459,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
16852
16459
  const chain = buildDirectoryChain(startPath, repoRoot);
16853
16460
  const envFiles = [];
16854
16461
  for (const dir of chain) {
16855
- const envPath = path44.join(dir, ".env");
16462
+ const envPath = path42.join(dir, ".env");
16856
16463
  if (existsSync4(envPath)) envFiles.push(envPath);
16857
16464
  }
16858
16465
  for (let i = 0; i < envFiles.length; i++) {
@@ -17033,8 +16640,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
17033
16640
  }
17034
16641
 
17035
16642
  // src/evaluation/cache/response-cache.ts
17036
- import { mkdir as mkdir14, readFile as readFile13, writeFile as writeFile8 } from "node:fs/promises";
17037
- import path45 from "node:path";
16643
+ import { mkdir as mkdir14, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
16644
+ import path43 from "node:path";
17038
16645
  var DEFAULT_CACHE_PATH = ".agentv/cache";
17039
16646
  var ResponseCache = class {
17040
16647
  cachePath;
@@ -17044,7 +16651,7 @@ var ResponseCache = class {
17044
16651
  async get(key) {
17045
16652
  const filePath = this.keyToPath(key);
17046
16653
  try {
17047
- const data = await readFile13(filePath, "utf8");
16654
+ const data = await readFile11(filePath, "utf8");
17048
16655
  return JSON.parse(data);
17049
16656
  } catch {
17050
16657
  return void 0;
@@ -17052,13 +16659,13 @@ var ResponseCache = class {
17052
16659
  }
17053
16660
  async set(key, value) {
17054
16661
  const filePath = this.keyToPath(key);
17055
- const dir = path45.dirname(filePath);
16662
+ const dir = path43.dirname(filePath);
17056
16663
  await mkdir14(dir, { recursive: true });
17057
16664
  await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
17058
16665
  }
17059
16666
  keyToPath(key) {
17060
16667
  const prefix = key.slice(0, 2);
17061
- return path45.join(this.cachePath, prefix, `${key}.json`);
16668
+ return path43.join(this.cachePath, prefix, `${key}.json`);
17062
16669
  }
17063
16670
  };
17064
16671
  function shouldEnableCache(params) {
@@ -17075,7 +16682,6 @@ function shouldSkipCacheForTemperature(targetConfig) {
17075
16682
 
17076
16683
  // src/evaluation/baseline.ts
17077
16684
  var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
17078
- "outputText",
17079
16685
  "requests",
17080
16686
  "trace",
17081
16687
  "workspacePath",
@@ -17092,7 +16698,7 @@ var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
17092
16698
  "startTime",
17093
16699
  "endTime"
17094
16700
  ]);
17095
- var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "evaluatorProviderRequest"]);
16701
+ var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "input"]);
17096
16702
  function trimEvaluatorResult(result) {
17097
16703
  const trimmed = {};
17098
16704
  for (const [key, value] of Object.entries(result)) {
@@ -17247,9 +16853,13 @@ var OtelTraceExporter = class {
17247
16853
  rootSpan.setAttribute("gen_ai.system", "agentv");
17248
16854
  rootSpan.setAttribute("agentv.test_id", result.testId);
17249
16855
  rootSpan.setAttribute("agentv.target", result.target);
17250
- if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
16856
+ if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
17251
16857
  rootSpan.setAttribute("agentv.score", result.score);
17252
- if (captureContent) rootSpan.setAttribute("agentv.output_text", result.outputText);
16858
+ if (captureContent && result.output.length > 0) {
16859
+ const lastMsg = result.output[result.output.length - 1];
16860
+ const text = typeof lastMsg.content === "string" ? lastMsg.content : JSON.stringify(lastMsg.content);
16861
+ rootSpan.setAttribute("agentv.output_text", text);
16862
+ }
17253
16863
  if (result.durationMs != null)
17254
16864
  rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
17255
16865
  if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
@@ -17428,14 +17038,14 @@ var OtelStreamingObserver = class {
17428
17038
  // biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
17429
17039
  rootCtx = null;
17430
17040
  /** Create root eval span immediately (visible in backend right away) */
17431
- startEvalCase(testId, target, dataset) {
17041
+ startEvalCase(testId, target, evalSet) {
17432
17042
  const ctx = this.parentCtx ?? this.api.context.active();
17433
17043
  this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
17434
17044
  this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
17435
17045
  this.rootSpan.setAttribute("gen_ai.system", "agentv");
17436
17046
  this.rootSpan.setAttribute("agentv.test_id", testId);
17437
17047
  this.rootSpan.setAttribute("agentv.target", target);
17438
- if (dataset) this.rootSpan.setAttribute("agentv.dataset", dataset);
17048
+ if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
17439
17049
  this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
17440
17050
  }
17441
17051
  /** Create and immediately export a tool span */
@@ -17623,7 +17233,6 @@ export {
17623
17233
  initializeBaseline,
17624
17234
  isAgentSkillsFormat,
17625
17235
  isEvaluatorKind,
17626
- isGuidelineFile,
17627
17236
  isJsonObject,
17628
17237
  isJsonValue,
17629
17238
  isNonEmptyString,