@agentv/core 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -53,6 +53,7 @@ __export(index_exports, {
53
53
  createAgentKernel: () => createAgentKernel,
54
54
  createProvider: () => createProvider,
55
55
  deepEqual: () => deepEqual,
56
+ detectFormat: () => detectFormat,
56
57
  ensureVSCodeSubagents: () => ensureVSCodeSubagents,
57
58
  executeScript: () => executeScript,
58
59
  explorationRatio: () => explorationRatio,
@@ -226,9 +227,9 @@ function mergeExecutionMetrics(summary, metrics) {
226
227
  }
227
228
 
228
229
  // src/evaluation/yaml-parser.ts
229
- var import_promises6 = require("fs/promises");
230
- var import_node_path6 = __toESM(require("path"), 1);
231
- var import_yaml2 = require("yaml");
230
+ var import_promises7 = require("fs/promises");
231
+ var import_node_path7 = __toESM(require("path"), 1);
232
+ var import_yaml3 = require("yaml");
232
233
 
233
234
  // src/evaluation/loaders/config-loader.ts
234
235
  var import_promises2 = require("fs/promises");
@@ -337,7 +338,6 @@ async function resolveFileReference(rawValue, searchRoots) {
337
338
  }
338
339
 
339
340
  // src/evaluation/loaders/config-loader.ts
340
- var SCHEMA_CONFIG_V2 = "agentv-config-v2";
341
341
  var ANSI_YELLOW = "\x1B[33m";
342
342
  var ANSI_RESET = "\x1B[0m";
343
343
  async function loadConfig(evalFilePath, repoRoot) {
@@ -355,13 +355,6 @@ async function loadConfig(evalFilePath, repoRoot) {
355
355
  continue;
356
356
  }
357
357
  const config = parsed;
358
- const schema = config.$schema;
359
- if (schema !== SCHEMA_CONFIG_V2) {
360
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
361
- Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
362
- logWarning(message);
363
- continue;
364
- }
365
358
  const guidelinePatterns = config.guideline_patterns;
366
359
  if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
367
360
  logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
@@ -470,7 +463,8 @@ var ANSI_YELLOW3 = "\x1B[33m";
470
463
  var ANSI_RESET3 = "\x1B[0m";
471
464
  async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
472
465
  const execution = rawEvalCase.execution;
473
- const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
466
+ const executionObject = isJsonObject2(execution) ? execution : void 0;
467
+ const candidateEvaluators = (executionObject ? executionObject.evaluators : void 0) ?? rawEvalCase.evaluators ?? globalExecution?.evaluators;
474
468
  if (candidateEvaluators === void 0) {
475
469
  return void 0;
476
470
  }
@@ -1013,6 +1007,11 @@ function isValidFieldAggregationType(value) {
1013
1007
  return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
1014
1008
  }
1015
1009
 
1010
+ // src/evaluation/loaders/jsonl-parser.ts
1011
+ var import_promises5 = require("fs/promises");
1012
+ var import_node_path5 = __toESM(require("path"), 1);
1013
+ var import_yaml2 = require("yaml");
1014
+
1016
1015
  // src/evaluation/loaders/message-processor.ts
1017
1016
  var import_promises4 = require("fs/promises");
1018
1017
  var import_node_path4 = __toESM(require("path"), 1);
@@ -1273,28 +1272,271 @@ async function processExpectedMessages(options) {
1273
1272
  return segments;
1274
1273
  }
1275
1274
 
1276
- // src/evaluation/formatting/prompt-builder.ts
1277
- var import_promises5 = require("fs/promises");
1278
- var import_node_path5 = __toESM(require("path"), 1);
1275
+ // src/evaluation/loaders/jsonl-parser.ts
1279
1276
  var ANSI_YELLOW5 = "\x1B[33m";
1277
+ var ANSI_RED = "\x1B[31m";
1280
1278
  var ANSI_RESET5 = "\x1B[0m";
1279
+ function detectFormat(filePath) {
1280
+ const ext = import_node_path5.default.extname(filePath).toLowerCase();
1281
+ if (ext === ".jsonl") return "jsonl";
1282
+ if (ext === ".yaml" || ext === ".yml") return "yaml";
1283
+ throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
1284
+ }
1285
+ async function loadSidecarMetadata(jsonlPath, verbose) {
1286
+ const dir = import_node_path5.default.dirname(jsonlPath);
1287
+ const base = import_node_path5.default.basename(jsonlPath, ".jsonl");
1288
+ const sidecarPath = import_node_path5.default.join(dir, `${base}.yaml`);
1289
+ if (!await fileExists(sidecarPath)) {
1290
+ if (verbose) {
1291
+ logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
1292
+ }
1293
+ return {};
1294
+ }
1295
+ try {
1296
+ const content = await (0, import_promises5.readFile)(sidecarPath, "utf8");
1297
+ const parsed = (0, import_yaml2.parse)(content);
1298
+ if (!isJsonObject(parsed)) {
1299
+ logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
1300
+ return {};
1301
+ }
1302
+ return {
1303
+ description: asString4(parsed.description),
1304
+ dataset: asString4(parsed.dataset),
1305
+ execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
1306
+ evaluator: parsed.evaluator
1307
+ };
1308
+ } catch (error) {
1309
+ logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error.message}`);
1310
+ return {};
1311
+ }
1312
+ }
1313
+ function parseJsonlContent(content, filePath) {
1314
+ const lines = content.split("\n");
1315
+ const cases = [];
1316
+ for (let i = 0; i < lines.length; i++) {
1317
+ const line = lines[i].trim();
1318
+ if (line === "") continue;
1319
+ try {
1320
+ const parsed = JSON.parse(line);
1321
+ if (!isJsonObject(parsed)) {
1322
+ throw new Error("Expected JSON object");
1323
+ }
1324
+ cases.push(parsed);
1325
+ } catch (error) {
1326
+ const message = error instanceof Error ? error.message : String(error);
1327
+ throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
1328
+ File: ${filePath}`);
1329
+ }
1330
+ }
1331
+ return cases;
1332
+ }
1333
+ async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
1334
+ const verbose = options?.verbose ?? false;
1335
+ const evalIdFilter = options?.evalId;
1336
+ const absoluteTestPath = import_node_path5.default.resolve(evalFilePath);
1337
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
1338
+ const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
1339
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
1340
+ const guidelinePatterns = config?.guideline_patterns;
1341
+ const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
1342
+ const rawFile = await (0, import_promises5.readFile)(absoluteTestPath, "utf8");
1343
+ const rawCases = parseJsonlContent(rawFile, evalFilePath);
1344
+ const fallbackDataset = import_node_path5.default.basename(absoluteTestPath, ".jsonl") || "eval";
1345
+ const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
1346
+ const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
1347
+ const globalExecution = sidecar.execution;
1348
+ if (verbose) {
1349
+ console.log(`
1350
+ [JSONL Dataset: ${evalFilePath}]`);
1351
+ console.log(` Cases: ${rawCases.length}`);
1352
+ console.log(` Dataset name: ${datasetName}`);
1353
+ if (sidecar.description) {
1354
+ console.log(` Description: ${sidecar.description}`);
1355
+ }
1356
+ }
1357
+ const results = [];
1358
+ for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
1359
+ const evalcase = rawCases[lineIndex];
1360
+ const lineNumber = lineIndex + 1;
1361
+ const id = asString4(evalcase.id);
1362
+ if (evalIdFilter && id !== evalIdFilter) {
1363
+ continue;
1364
+ }
1365
+ const conversationId = asString4(evalcase.conversation_id);
1366
+ const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
1367
+ const inputMessagesValue = evalcase.input_messages;
1368
+ const expectedMessagesValue = evalcase.expected_messages;
1369
+ if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1370
+ logError(
1371
+ `Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages`
1372
+ );
1373
+ continue;
1374
+ }
1375
+ const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
1376
+ const inputMessages = inputMessagesValue.filter(
1377
+ (msg) => isTestMessage(msg)
1378
+ );
1379
+ const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1380
+ if (hasExpectedMessages && expectedMessages.length === 0) {
1381
+ logError(`Line ${lineNumber}: No valid expected message found for eval case: ${id}`);
1382
+ continue;
1383
+ }
1384
+ const guidelinePaths = [];
1385
+ const inputTextParts = [];
1386
+ const inputSegments = await processMessages({
1387
+ messages: inputMessages,
1388
+ searchRoots,
1389
+ repoRootPath,
1390
+ guidelinePatterns,
1391
+ guidelinePaths,
1392
+ textParts: inputTextParts,
1393
+ messageType: "input",
1394
+ verbose
1395
+ });
1396
+ const outputSegments = hasExpectedMessages ? await processExpectedMessages({
1397
+ messages: expectedMessages,
1398
+ searchRoots,
1399
+ repoRootPath,
1400
+ verbose
1401
+ }) : [];
1402
+ let referenceAnswer = "";
1403
+ if (outputSegments.length > 0) {
1404
+ const lastMessage = outputSegments[outputSegments.length - 1];
1405
+ const content = lastMessage.content;
1406
+ const toolCalls = lastMessage.tool_calls;
1407
+ if (typeof content === "string") {
1408
+ referenceAnswer = content;
1409
+ } else if (content !== void 0 && content !== null) {
1410
+ referenceAnswer = JSON.stringify(content, null, 2);
1411
+ } else if (toolCalls !== void 0 && toolCalls !== null) {
1412
+ referenceAnswer = JSON.stringify(toolCalls, null, 2);
1413
+ }
1414
+ }
1415
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
1416
+ const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
1417
+ const mergedExecution = caseExecution ?? globalExecution;
1418
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
1419
+ let evaluators;
1420
+ try {
1421
+ evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
1422
+ } catch (error) {
1423
+ const message = error instanceof Error ? error.message : String(error);
1424
+ logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
1425
+ continue;
1426
+ }
1427
+ const inlineRubrics = evalcase.rubrics;
1428
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
1429
+ const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
1430
+ if (typeof rubric === "string") {
1431
+ return {
1432
+ id: `rubric-${index + 1}`,
1433
+ description: rubric,
1434
+ weight: 1,
1435
+ required: true
1436
+ };
1437
+ }
1438
+ return {
1439
+ id: asString4(rubric.id) ?? `rubric-${index + 1}`,
1440
+ description: asString4(rubric.description) ?? "",
1441
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1442
+ required: typeof rubric.required === "boolean" ? rubric.required : true
1443
+ };
1444
+ }).filter((r) => r.description.length > 0);
1445
+ if (rubricItems.length > 0) {
1446
+ const rubricEvaluator = {
1447
+ name: "rubric",
1448
+ type: "llm_judge",
1449
+ rubrics: rubricItems
1450
+ };
1451
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
1452
+ }
1453
+ }
1454
+ const userFilePaths = [];
1455
+ for (const segment of inputSegments) {
1456
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
1457
+ userFilePaths.push(segment.resolvedPath);
1458
+ }
1459
+ }
1460
+ const allFilePaths = [
1461
+ ...guidelinePaths.map((guidelinePath) => import_node_path5.default.resolve(guidelinePath)),
1462
+ ...userFilePaths
1463
+ ];
1464
+ const testCase = {
1465
+ id,
1466
+ dataset: datasetName,
1467
+ conversation_id: conversationId,
1468
+ question,
1469
+ input_messages: inputMessages,
1470
+ input_segments: inputSegments,
1471
+ expected_messages: outputSegments,
1472
+ reference_answer: referenceAnswer,
1473
+ guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path5.default.resolve(guidelinePath)),
1474
+ guideline_patterns: guidelinePatterns,
1475
+ file_paths: allFilePaths,
1476
+ expected_outcome: outcome,
1477
+ evaluator: evalCaseEvaluatorKind,
1478
+ evaluators
1479
+ };
1480
+ if (verbose) {
1481
+ console.log(`
1482
+ [Eval Case: ${id}]`);
1483
+ if (testCase.guideline_paths.length > 0) {
1484
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
1485
+ for (const guidelinePath of testCase.guideline_paths) {
1486
+ console.log(` - ${guidelinePath}`);
1487
+ }
1488
+ } else {
1489
+ console.log(" No guidelines found");
1490
+ }
1491
+ }
1492
+ results.push(testCase);
1493
+ }
1494
+ return results;
1495
+ }
1496
+ function asString4(value) {
1497
+ return typeof value === "string" ? value : void 0;
1498
+ }
1499
+ function logWarning4(message, details) {
1500
+ if (details && details.length > 0) {
1501
+ const detailBlock = details.join("\n");
1502
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
1503
+ ${detailBlock}${ANSI_RESET5}`);
1504
+ } else {
1505
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1506
+ }
1507
+ }
1508
+ function logError(message, details) {
1509
+ if (details && details.length > 0) {
1510
+ const detailBlock = details.join("\n");
1511
+ console.error(`${ANSI_RED}Error: ${message}
1512
+ ${detailBlock}${ANSI_RESET5}`);
1513
+ } else {
1514
+ console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
1515
+ }
1516
+ }
1517
+
1518
+ // src/evaluation/formatting/prompt-builder.ts
1519
+ var import_promises6 = require("fs/promises");
1520
+ var import_node_path6 = __toESM(require("path"), 1);
1521
+ var ANSI_YELLOW6 = "\x1B[33m";
1522
+ var ANSI_RESET6 = "\x1B[0m";
1281
1523
  async function buildPromptInputs(testCase, mode = "lm") {
1282
1524
  const guidelineParts = [];
1283
1525
  for (const rawPath of testCase.guideline_paths) {
1284
- const absolutePath = import_node_path5.default.resolve(rawPath);
1526
+ const absolutePath = import_node_path6.default.resolve(rawPath);
1285
1527
  if (!await fileExists(absolutePath)) {
1286
- logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
1528
+ logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
1287
1529
  continue;
1288
1530
  }
1289
1531
  try {
1290
- const content = (await (0, import_promises5.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1532
+ const content = (await (0, import_promises6.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1291
1533
  guidelineParts.push({
1292
1534
  content,
1293
1535
  isFile: true,
1294
- displayPath: import_node_path5.default.basename(absolutePath)
1536
+ displayPath: import_node_path6.default.basename(absolutePath)
1295
1537
  });
1296
1538
  } catch (error) {
1297
- logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
1539
+ logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
1298
1540
  }
1299
1541
  }
1300
1542
  const guidelines = formatFileContents(guidelineParts);
@@ -1318,9 +1560,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
1318
1560
  messageSegments.push({ type: "text", value: segment });
1319
1561
  }
1320
1562
  } else if (isJsonObject(segment)) {
1321
- const type = asString4(segment.type);
1563
+ const type = asString5(segment.type);
1322
1564
  if (type === "file") {
1323
- const value = asString4(segment.value);
1565
+ const value = asString5(segment.value);
1324
1566
  if (!value) continue;
1325
1567
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
1326
1568
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -1331,7 +1573,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
1331
1573
  messageSegments.push({ type: "file", text: fileText, path: value });
1332
1574
  }
1333
1575
  } else if (type === "text") {
1334
- const textValue = asString4(segment.value);
1576
+ const textValue = asString5(segment.value);
1335
1577
  if (textValue && textValue.trim().length > 0) {
1336
1578
  messageSegments.push({ type: "text", value: textValue });
1337
1579
  }
@@ -1485,22 +1727,22 @@ ${guidelineContent.trim()}`);
1485
1727
  }
1486
1728
  return chatPrompt.length > 0 ? chatPrompt : void 0;
1487
1729
  }
1488
- function asString4(value) {
1730
+ function asString5(value) {
1489
1731
  return typeof value === "string" ? value : void 0;
1490
1732
  }
1491
- function logWarning4(message) {
1492
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1733
+ function logWarning5(message) {
1734
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1493
1735
  }
1494
1736
 
1495
1737
  // src/evaluation/yaml-parser.ts
1496
- var ANSI_YELLOW6 = "\x1B[33m";
1497
- var ANSI_RED = "\x1B[31m";
1498
- var ANSI_RESET6 = "\x1B[0m";
1738
+ var ANSI_YELLOW7 = "\x1B[33m";
1739
+ var ANSI_RED2 = "\x1B[31m";
1740
+ var ANSI_RESET7 = "\x1B[0m";
1499
1741
  async function readTestSuiteMetadata(testFilePath) {
1500
1742
  try {
1501
- const absolutePath = import_node_path6.default.resolve(testFilePath);
1502
- const content = await (0, import_promises6.readFile)(absolutePath, "utf8");
1503
- const parsed = (0, import_yaml2.parse)(content);
1743
+ const absolutePath = import_node_path7.default.resolve(testFilePath);
1744
+ const content = await (0, import_promises7.readFile)(absolutePath, "utf8");
1745
+ const parsed = (0, import_yaml3.parse)(content);
1504
1746
  if (!isJsonObject(parsed)) {
1505
1747
  return {};
1506
1748
  }
@@ -1510,21 +1752,25 @@ async function readTestSuiteMetadata(testFilePath) {
1510
1752
  }
1511
1753
  }
1512
1754
  async function loadEvalCases(evalFilePath, repoRoot, options) {
1755
+ const format = detectFormat(evalFilePath);
1756
+ if (format === "jsonl") {
1757
+ return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
1758
+ }
1513
1759
  const verbose = options?.verbose ?? false;
1514
1760
  const evalIdFilter = options?.evalId;
1515
- const absoluteTestPath = import_node_path6.default.resolve(evalFilePath);
1761
+ const absoluteTestPath = import_node_path7.default.resolve(evalFilePath);
1516
1762
  const repoRootPath = resolveToAbsolutePath(repoRoot);
1517
1763
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
1518
1764
  const config = await loadConfig(absoluteTestPath, repoRootPath);
1519
1765
  const guidelinePatterns = config?.guideline_patterns;
1520
- const rawFile = await (0, import_promises6.readFile)(absoluteTestPath, "utf8");
1521
- const parsed = (0, import_yaml2.parse)(rawFile);
1766
+ const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
1767
+ const parsed = (0, import_yaml3.parse)(rawFile);
1522
1768
  if (!isJsonObject(parsed)) {
1523
1769
  throw new Error(`Invalid test file format: ${evalFilePath}`);
1524
1770
  }
1525
1771
  const suite = parsed;
1526
- const datasetNameFromSuite = asString5(suite.dataset)?.trim();
1527
- const fallbackDataset = import_node_path6.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1772
+ const datasetNameFromSuite = asString6(suite.dataset)?.trim();
1773
+ const fallbackDataset = import_node_path7.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1528
1774
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
1529
1775
  const rawTestcases = suite.evalcases;
1530
1776
  if (!Array.isArray(rawTestcases)) {
@@ -1532,24 +1778,24 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1532
1778
  }
1533
1779
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
1534
1780
  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
1535
- const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
1781
+ const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
1536
1782
  const results = [];
1537
1783
  for (const rawEvalcase of rawTestcases) {
1538
1784
  if (!isJsonObject(rawEvalcase)) {
1539
- logWarning5("Skipping invalid eval case entry (expected object)");
1785
+ logWarning6("Skipping invalid eval case entry (expected object)");
1540
1786
  continue;
1541
1787
  }
1542
1788
  const evalcase = rawEvalcase;
1543
- const id = asString5(evalcase.id);
1789
+ const id = asString6(evalcase.id);
1544
1790
  if (evalIdFilter && id !== evalIdFilter) {
1545
1791
  continue;
1546
1792
  }
1547
- const conversationId = asString5(evalcase.conversation_id);
1548
- const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
1793
+ const conversationId = asString6(evalcase.conversation_id);
1794
+ const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
1549
1795
  const inputMessagesValue = evalcase.input_messages;
1550
1796
  const expectedMessagesValue = evalcase.expected_messages;
1551
1797
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1552
- logError(
1798
+ logError2(
1553
1799
  `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
1554
1800
  );
1555
1801
  continue;
@@ -1560,7 +1806,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1560
1806
  );
1561
1807
  const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1562
1808
  if (hasExpectedMessages && expectedMessages.length === 0) {
1563
- logError(`No valid expected message found for eval case: ${id}`);
1809
+ logError2(`No valid expected message found for eval case: ${id}`);
1564
1810
  continue;
1565
1811
  }
1566
1812
  const guidelinePaths = [];
@@ -1601,7 +1847,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1601
1847
  evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
1602
1848
  } catch (error) {
1603
1849
  const message = error instanceof Error ? error.message : String(error);
1604
- logError(`Skipping eval case '${id}': ${message}`);
1850
+ logError2(`Skipping eval case '${id}': ${message}`);
1605
1851
  continue;
1606
1852
  }
1607
1853
  const inlineRubrics = evalcase.rubrics;
@@ -1616,8 +1862,8 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1616
1862
  };
1617
1863
  }
1618
1864
  return {
1619
- id: asString5(rubric.id) ?? `rubric-${index + 1}`,
1620
- description: asString5(rubric.description) ?? "",
1865
+ id: asString6(rubric.id) ?? `rubric-${index + 1}`,
1866
+ description: asString6(rubric.description) ?? "",
1621
1867
  weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1622
1868
  required: typeof rubric.required === "boolean" ? rubric.required : true
1623
1869
  };
@@ -1638,7 +1884,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1638
1884
  }
1639
1885
  }
1640
1886
  const allFilePaths = [
1641
- ...guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
1887
+ ...guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
1642
1888
  ...userFilePaths
1643
1889
  ];
1644
1890
  const testCase = {
@@ -1650,7 +1896,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1650
1896
  input_segments: inputSegments,
1651
1897
  expected_messages: outputSegments,
1652
1898
  reference_answer: referenceAnswer,
1653
- guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
1899
+ guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
1654
1900
  guideline_patterns: guidelinePatterns,
1655
1901
  file_paths: allFilePaths,
1656
1902
  expected_outcome: outcome,
@@ -1673,35 +1919,35 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1673
1919
  }
1674
1920
  return results;
1675
1921
  }
1676
- function asString5(value) {
1922
+ function asString6(value) {
1677
1923
  return typeof value === "string" ? value : void 0;
1678
1924
  }
1679
- function logWarning5(message, details) {
1925
+ function logWarning6(message, details) {
1680
1926
  if (details && details.length > 0) {
1681
1927
  const detailBlock = details.join("\n");
1682
- console.warn(`${ANSI_YELLOW6}Warning: ${message}
1683
- ${detailBlock}${ANSI_RESET6}`);
1928
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}
1929
+ ${detailBlock}${ANSI_RESET7}`);
1684
1930
  } else {
1685
- console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1931
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
1686
1932
  }
1687
1933
  }
1688
- function logError(message, details) {
1934
+ function logError2(message, details) {
1689
1935
  if (details && details.length > 0) {
1690
1936
  const detailBlock = details.join("\n");
1691
- console.error(`${ANSI_RED}Error: ${message}
1692
- ${detailBlock}${ANSI_RESET6}`);
1937
+ console.error(`${ANSI_RED2}Error: ${message}
1938
+ ${detailBlock}${ANSI_RESET7}`);
1693
1939
  } else {
1694
- console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
1940
+ console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
1695
1941
  }
1696
1942
  }
1697
1943
 
1698
1944
  // src/evaluation/file-utils.ts
1699
1945
  var import_node_fs2 = require("fs");
1700
- var import_promises7 = require("fs/promises");
1701
- var import_node_path7 = __toESM(require("path"), 1);
1946
+ var import_promises8 = require("fs/promises");
1947
+ var import_node_path8 = __toESM(require("path"), 1);
1702
1948
  async function fileExists2(filePath) {
1703
1949
  try {
1704
- await (0, import_promises7.access)(filePath, import_node_fs2.constants.F_OK);
1950
+ await (0, import_promises8.access)(filePath, import_node_fs2.constants.F_OK);
1705
1951
  return true;
1706
1952
  } catch {
1707
1953
  return false;
@@ -1711,22 +1957,22 @@ function normalizeLineEndings(content) {
1711
1957
  return content.replace(/\r\n/g, "\n");
1712
1958
  }
1713
1959
  async function readTextFile(filePath) {
1714
- const content = await (0, import_promises7.readFile)(filePath, "utf8");
1960
+ const content = await (0, import_promises8.readFile)(filePath, "utf8");
1715
1961
  return normalizeLineEndings(content);
1716
1962
  }
1717
1963
  async function readJsonFile(filePath) {
1718
- const content = await (0, import_promises7.readFile)(filePath, "utf8");
1964
+ const content = await (0, import_promises8.readFile)(filePath, "utf8");
1719
1965
  return JSON.parse(content);
1720
1966
  }
1721
1967
  async function findGitRoot(startPath) {
1722
- let currentDir = import_node_path7.default.dirname(import_node_path7.default.resolve(startPath));
1723
- const root = import_node_path7.default.parse(currentDir).root;
1968
+ let currentDir = import_node_path8.default.dirname(import_node_path8.default.resolve(startPath));
1969
+ const root = import_node_path8.default.parse(currentDir).root;
1724
1970
  while (currentDir !== root) {
1725
- const gitPath = import_node_path7.default.join(currentDir, ".git");
1971
+ const gitPath = import_node_path8.default.join(currentDir, ".git");
1726
1972
  if (await fileExists2(gitPath)) {
1727
1973
  return currentDir;
1728
1974
  }
1729
- const parentDir = import_node_path7.default.dirname(currentDir);
1975
+ const parentDir = import_node_path8.default.dirname(currentDir);
1730
1976
  if (parentDir === currentDir) {
1731
1977
  break;
1732
1978
  }
@@ -1737,8 +1983,8 @@ async function findGitRoot(startPath) {
1737
1983
  function buildDirectoryChain2(filePath, repoRoot) {
1738
1984
  const directories = [];
1739
1985
  const seen = /* @__PURE__ */ new Set();
1740
- const boundary = import_node_path7.default.resolve(repoRoot);
1741
- let current = import_node_path7.default.resolve(import_node_path7.default.dirname(filePath));
1986
+ const boundary = import_node_path8.default.resolve(repoRoot);
1987
+ let current = import_node_path8.default.resolve(import_node_path8.default.dirname(filePath));
1742
1988
  while (current !== void 0) {
1743
1989
  if (!seen.has(current)) {
1744
1990
  directories.push(current);
@@ -1747,7 +1993,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
1747
1993
  if (current === boundary) {
1748
1994
  break;
1749
1995
  }
1750
- const parent = import_node_path7.default.dirname(current);
1996
+ const parent = import_node_path8.default.dirname(current);
1751
1997
  if (parent === current) {
1752
1998
  break;
1753
1999
  }
@@ -1761,16 +2007,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
1761
2007
  function buildSearchRoots2(evalPath, repoRoot) {
1762
2008
  const uniqueRoots = [];
1763
2009
  const addRoot = (root) => {
1764
- const normalized = import_node_path7.default.resolve(root);
2010
+ const normalized = import_node_path8.default.resolve(root);
1765
2011
  if (!uniqueRoots.includes(normalized)) {
1766
2012
  uniqueRoots.push(normalized);
1767
2013
  }
1768
2014
  };
1769
- let currentDir = import_node_path7.default.dirname(evalPath);
2015
+ let currentDir = import_node_path8.default.dirname(evalPath);
1770
2016
  let reachedBoundary = false;
1771
2017
  while (!reachedBoundary) {
1772
2018
  addRoot(currentDir);
1773
- const parentDir = import_node_path7.default.dirname(currentDir);
2019
+ const parentDir = import_node_path8.default.dirname(currentDir);
1774
2020
  if (currentDir === repoRoot || parentDir === currentDir) {
1775
2021
  reachedBoundary = true;
1776
2022
  } else {
@@ -1788,16 +2034,16 @@ function trimLeadingSeparators2(value) {
1788
2034
  async function resolveFileReference2(rawValue, searchRoots) {
1789
2035
  const displayPath = trimLeadingSeparators2(rawValue);
1790
2036
  const potentialPaths = [];
1791
- if (import_node_path7.default.isAbsolute(rawValue)) {
1792
- potentialPaths.push(import_node_path7.default.normalize(rawValue));
2037
+ if (import_node_path8.default.isAbsolute(rawValue)) {
2038
+ potentialPaths.push(import_node_path8.default.normalize(rawValue));
1793
2039
  }
1794
2040
  for (const base of searchRoots) {
1795
- potentialPaths.push(import_node_path7.default.resolve(base, displayPath));
2041
+ potentialPaths.push(import_node_path8.default.resolve(base, displayPath));
1796
2042
  }
1797
2043
  const attempted = [];
1798
2044
  const seen = /* @__PURE__ */ new Set();
1799
2045
  for (const candidate of potentialPaths) {
1800
- const absoluteCandidate = import_node_path7.default.resolve(candidate);
2046
+ const absoluteCandidate = import_node_path8.default.resolve(candidate);
1801
2047
  if (seen.has(absoluteCandidate)) {
1802
2048
  continue;
1803
2049
  }
@@ -2147,9 +2393,9 @@ async function withRetry(fn, retryConfig, signal) {
2147
2393
  var import_node_child_process = require("child_process");
2148
2394
  var import_node_crypto = require("crypto");
2149
2395
  var import_node_fs3 = require("fs");
2150
- var import_promises8 = require("fs/promises");
2396
+ var import_promises9 = require("fs/promises");
2151
2397
  var import_node_os = require("os");
2152
- var import_node_path9 = __toESM(require("path"), 1);
2398
+ var import_node_path10 = __toESM(require("path"), 1);
2153
2399
 
2154
2400
  // src/evaluation/providers/claude-code-log-tracker.ts
2155
2401
  var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeCodeLogs");
@@ -2205,7 +2451,7 @@ function subscribeToClaudeCodeLogEntries(listener) {
2205
2451
  }
2206
2452
 
2207
2453
  // src/evaluation/providers/preread.ts
2208
- var import_node_path8 = __toESM(require("path"), 1);
2454
+ var import_node_path9 = __toESM(require("path"), 1);
2209
2455
  function buildPromptDocument(request, inputFiles, options) {
2210
2456
  const parts = [];
2211
2457
  const guidelineFiles = collectGuidelineFiles(
@@ -2228,7 +2474,7 @@ function normalizeInputFiles(inputFiles) {
2228
2474
  }
2229
2475
  const deduped = /* @__PURE__ */ new Map();
2230
2476
  for (const inputFile of inputFiles) {
2231
- const absolutePath = import_node_path8.default.resolve(inputFile);
2477
+ const absolutePath = import_node_path9.default.resolve(inputFile);
2232
2478
  if (!deduped.has(absolutePath)) {
2233
2479
  deduped.set(absolutePath, absolutePath);
2234
2480
  }
@@ -2241,14 +2487,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
2241
2487
  }
2242
2488
  const unique = /* @__PURE__ */ new Map();
2243
2489
  for (const inputFile of inputFiles) {
2244
- const absolutePath = import_node_path8.default.resolve(inputFile);
2490
+ const absolutePath = import_node_path9.default.resolve(inputFile);
2245
2491
  if (overrides?.has(absolutePath)) {
2246
2492
  if (!unique.has(absolutePath)) {
2247
2493
  unique.set(absolutePath, absolutePath);
2248
2494
  }
2249
2495
  continue;
2250
2496
  }
2251
- const normalized = absolutePath.split(import_node_path8.default.sep).join("/");
2497
+ const normalized = absolutePath.split(import_node_path9.default.sep).join("/");
2252
2498
  if (isGuidelineFile(normalized, guidelinePatterns)) {
2253
2499
  if (!unique.has(absolutePath)) {
2254
2500
  unique.set(absolutePath, absolutePath);
@@ -2263,7 +2509,7 @@ function collectInputFiles(inputFiles) {
2263
2509
  }
2264
2510
  const unique = /* @__PURE__ */ new Map();
2265
2511
  for (const inputFile of inputFiles) {
2266
- const absolutePath = import_node_path8.default.resolve(inputFile);
2512
+ const absolutePath = import_node_path9.default.resolve(inputFile);
2267
2513
  if (!unique.has(absolutePath)) {
2268
2514
  unique.set(absolutePath, absolutePath);
2269
2515
  }
@@ -2275,7 +2521,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
2275
2521
  return "";
2276
2522
  }
2277
2523
  const buildList = (files) => files.map((absolutePath) => {
2278
- const fileName = import_node_path8.default.basename(absolutePath);
2524
+ const fileName = import_node_path9.default.basename(absolutePath);
2279
2525
  const fileUri = pathToFileUri(absolutePath);
2280
2526
  return `* [${fileName}](${fileUri})`;
2281
2527
  });
@@ -2295,7 +2541,7 @@ ${buildList(inputFiles).join("\n")}.`);
2295
2541
  return sections.join("\n");
2296
2542
  }
2297
2543
  function pathToFileUri(filePath) {
2298
- const absolutePath = import_node_path8.default.isAbsolute(filePath) ? filePath : import_node_path8.default.resolve(filePath);
2544
+ const absolutePath = import_node_path9.default.isAbsolute(filePath) ? filePath : import_node_path9.default.resolve(filePath);
2299
2545
  const normalizedPath = absolutePath.replace(/\\/g, "/");
2300
2546
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
2301
2547
  return `file:///${normalizedPath}`;
@@ -2332,8 +2578,8 @@ var ClaudeCodeProvider = class {
2332
2578
  const workspaceRoot = await this.createWorkspace();
2333
2579
  const logger = await this.createStreamLogger(request).catch(() => void 0);
2334
2580
  try {
2335
- const promptFile = import_node_path9.default.join(workspaceRoot, PROMPT_FILENAME);
2336
- await (0, import_promises8.writeFile)(promptFile, request.question, "utf8");
2581
+ const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
2582
+ await (0, import_promises9.writeFile)(promptFile, request.question, "utf8");
2337
2583
  const args = this.buildClaudeCodeArgs(request.question, inputFiles);
2338
2584
  const cwd = this.resolveCwd();
2339
2585
  const result = await this.executeClaudeCode(args, cwd, request.signal, logger);
@@ -2380,7 +2626,7 @@ var ClaudeCodeProvider = class {
2380
2626
  if (!this.config.cwd) {
2381
2627
  return process.cwd();
2382
2628
  }
2383
- return import_node_path9.default.resolve(this.config.cwd);
2629
+ return import_node_path10.default.resolve(this.config.cwd);
2384
2630
  }
2385
2631
  buildClaudeCodeArgs(prompt, inputFiles) {
2386
2632
  const args = [];
@@ -2437,11 +2683,11 @@ ${filesContext}`;
2437
2683
  }
2438
2684
  }
2439
2685
  async createWorkspace() {
2440
- return await (0, import_promises8.mkdtemp)(import_node_path9.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
2686
+ return await (0, import_promises9.mkdtemp)(import_node_path10.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
2441
2687
  }
2442
2688
  async cleanupWorkspace(workspaceRoot) {
2443
2689
  try {
2444
- await (0, import_promises8.rm)(workspaceRoot, { recursive: true, force: true });
2690
+ await (0, import_promises9.rm)(workspaceRoot, { recursive: true, force: true });
2445
2691
  } catch {
2446
2692
  }
2447
2693
  }
@@ -2451,9 +2697,9 @@ ${filesContext}`;
2451
2697
  return void 0;
2452
2698
  }
2453
2699
  if (this.config.logDir) {
2454
- return import_node_path9.default.resolve(this.config.logDir);
2700
+ return import_node_path10.default.resolve(this.config.logDir);
2455
2701
  }
2456
- return import_node_path9.default.join(process.cwd(), ".agentv", "logs", "claude-code");
2702
+ return import_node_path10.default.join(process.cwd(), ".agentv", "logs", "claude-code");
2457
2703
  }
2458
2704
  async createStreamLogger(request) {
2459
2705
  const logDir = this.resolveLogDirectory();
@@ -2461,13 +2707,13 @@ ${filesContext}`;
2461
2707
  return void 0;
2462
2708
  }
2463
2709
  try {
2464
- await (0, import_promises8.mkdir)(logDir, { recursive: true });
2710
+ await (0, import_promises9.mkdir)(logDir, { recursive: true });
2465
2711
  } catch (error) {
2466
2712
  const message = error instanceof Error ? error.message : String(error);
2467
2713
  console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
2468
2714
  return void 0;
2469
2715
  }
2470
- const filePath = import_node_path9.default.join(logDir, buildLogFilename(request, this.targetName));
2716
+ const filePath = import_node_path10.default.join(logDir, buildLogFilename(request, this.targetName));
2471
2717
  try {
2472
2718
  const logger = await ClaudeCodeStreamLogger.create({
2473
2719
  filePath,
@@ -2872,16 +3118,16 @@ function escapeShellArg(arg) {
2872
3118
  }
2873
3119
  async function defaultClaudeCodeRunner(options) {
2874
3120
  const tempId = (0, import_node_crypto.randomUUID)();
2875
- const stdoutFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stdout`);
2876
- const stderrFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stderr`);
2877
- const exitFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-exit`);
2878
- const pidFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-pid`);
3121
+ const stdoutFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stdout`);
3122
+ const stderrFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stderr`);
3123
+ const exitFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-exit`);
3124
+ const pidFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-pid`);
2879
3125
  try {
2880
3126
  return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
2881
3127
  } finally {
2882
3128
  for (const file of [stdoutFile, stderrFile, exitFile, pidFile]) {
2883
3129
  try {
2884
- await (0, import_promises8.rm)(file, { force: true });
3130
+ await (0, import_promises9.rm)(file, { force: true });
2885
3131
  } catch {
2886
3132
  }
2887
3133
  }
@@ -2915,8 +3161,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2915
3161
  let lastStdoutSize = 0;
2916
3162
  const readFileIfExists = async (filePath) => {
2917
3163
  try {
2918
- const { readFile: readFile8 } = await import("fs/promises");
2919
- return await readFile8(filePath, "utf8");
3164
+ const { readFile: readFile9 } = await import("fs/promises");
3165
+ return await readFile9(filePath, "utf8");
2920
3166
  } catch {
2921
3167
  return "";
2922
3168
  }
@@ -2989,9 +3235,9 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2989
3235
 
2990
3236
  // src/evaluation/providers/cli.ts
2991
3237
  var import_node_child_process2 = require("child_process");
2992
- var import_promises9 = __toESM(require("fs/promises"), 1);
3238
+ var import_promises10 = __toESM(require("fs/promises"), 1);
2993
3239
  var import_node_os2 = __toESM(require("os"), 1);
2994
- var import_node_path10 = __toESM(require("path"), 1);
3240
+ var import_node_path11 = __toESM(require("path"), 1);
2995
3241
  var import_node_util = require("util");
2996
3242
  var import_zod = require("zod");
2997
3243
  var ToolCallSchema = import_zod.z.object({
@@ -3360,7 +3606,7 @@ var CliProvider = class {
3360
3606
  throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
3361
3607
  } finally {
3362
3608
  if (!this.keepTempFiles) {
3363
- await import_promises9.default.unlink(filePath).catch(() => {
3609
+ await import_promises10.default.unlink(filePath).catch(() => {
3364
3610
  });
3365
3611
  }
3366
3612
  }
@@ -3448,7 +3694,7 @@ function normalizeInputFiles2(inputFiles) {
3448
3694
  }
3449
3695
  const unique = /* @__PURE__ */ new Map();
3450
3696
  for (const inputFile of inputFiles) {
3451
- const absolutePath = import_node_path10.default.resolve(inputFile);
3697
+ const absolutePath = import_node_path11.default.resolve(inputFile);
3452
3698
  if (!unique.has(absolutePath)) {
3453
3699
  unique.set(absolutePath, absolutePath);
3454
3700
  }
@@ -3462,7 +3708,7 @@ function formatFileList(files, template) {
3462
3708
  const formatter = template ?? "{path}";
3463
3709
  return files.map((filePath) => {
3464
3710
  const escapedPath = shellEscape(filePath);
3465
- const escapedName = shellEscape(import_node_path10.default.basename(filePath));
3711
+ const escapedName = shellEscape(import_node_path11.default.basename(filePath));
3466
3712
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
3467
3713
  }).join(" ");
3468
3714
  }
@@ -3486,7 +3732,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
3486
3732
  const safeEvalId = evalCaseId || "unknown";
3487
3733
  const timestamp = Date.now();
3488
3734
  const random = Math.random().toString(36).substring(2, 9);
3489
- return import_node_path10.default.join(import_node_os2.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3735
+ return import_node_path11.default.join(import_node_os2.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3490
3736
  }
3491
3737
  function formatTimeoutSuffix2(timeoutMs) {
3492
3738
  if (!timeoutMs || timeoutMs <= 0) {
@@ -3500,9 +3746,9 @@ function formatTimeoutSuffix2(timeoutMs) {
3500
3746
  var import_node_child_process3 = require("child_process");
3501
3747
  var import_node_crypto2 = require("crypto");
3502
3748
  var import_node_fs4 = require("fs");
3503
- var import_promises10 = require("fs/promises");
3749
+ var import_promises11 = require("fs/promises");
3504
3750
  var import_node_os3 = require("os");
3505
- var import_node_path11 = __toESM(require("path"), 1);
3751
+ var import_node_path12 = __toESM(require("path"), 1);
3506
3752
  var import_node_util2 = require("util");
3507
3753
 
3508
3754
  // src/evaluation/providers/codex-log-tracker.ts
@@ -3597,8 +3843,8 @@ var CodexProvider = class {
3597
3843
  const promptContent = `${systemPrompt}
3598
3844
 
3599
3845
  ${basePrompt}`;
3600
- const promptFile = import_node_path11.default.join(workspaceRoot, PROMPT_FILENAME2);
3601
- await (0, import_promises10.writeFile)(promptFile, promptContent, "utf8");
3846
+ const promptFile = import_node_path12.default.join(workspaceRoot, PROMPT_FILENAME2);
3847
+ await (0, import_promises11.writeFile)(promptFile, promptContent, "utf8");
3602
3848
  const args = this.buildCodexArgs();
3603
3849
  const cwd = this.resolveCwd(workspaceRoot);
3604
3850
  const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
@@ -3647,7 +3893,7 @@ ${basePrompt}`;
3647
3893
  if (!this.config.cwd) {
3648
3894
  return workspaceRoot;
3649
3895
  }
3650
- return import_node_path11.default.resolve(this.config.cwd);
3896
+ return import_node_path12.default.resolve(this.config.cwd);
3651
3897
  }
3652
3898
  buildCodexArgs() {
3653
3899
  const args = [
@@ -3689,11 +3935,11 @@ ${basePrompt}`;
3689
3935
  }
3690
3936
  }
3691
3937
  async createWorkspace() {
3692
- return await (0, import_promises10.mkdtemp)(import_node_path11.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
3938
+ return await (0, import_promises11.mkdtemp)(import_node_path12.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
3693
3939
  }
3694
3940
  async cleanupWorkspace(workspaceRoot) {
3695
3941
  try {
3696
- await (0, import_promises10.rm)(workspaceRoot, { recursive: true, force: true });
3942
+ await (0, import_promises11.rm)(workspaceRoot, { recursive: true, force: true });
3697
3943
  } catch {
3698
3944
  }
3699
3945
  }
@@ -3703,9 +3949,9 @@ ${basePrompt}`;
3703
3949
  return void 0;
3704
3950
  }
3705
3951
  if (this.config.logDir) {
3706
- return import_node_path11.default.resolve(this.config.logDir);
3952
+ return import_node_path12.default.resolve(this.config.logDir);
3707
3953
  }
3708
- return import_node_path11.default.join(process.cwd(), ".agentv", "logs", "codex");
3954
+ return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "codex");
3709
3955
  }
3710
3956
  async createStreamLogger(request) {
3711
3957
  const logDir = this.resolveLogDirectory();
@@ -3713,13 +3959,13 @@ ${basePrompt}`;
3713
3959
  return void 0;
3714
3960
  }
3715
3961
  try {
3716
- await (0, import_promises10.mkdir)(logDir, { recursive: true });
3962
+ await (0, import_promises11.mkdir)(logDir, { recursive: true });
3717
3963
  } catch (error) {
3718
3964
  const message = error instanceof Error ? error.message : String(error);
3719
3965
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
3720
3966
  return void 0;
3721
3967
  }
3722
- const filePath = import_node_path11.default.join(logDir, buildLogFilename2(request, this.targetName));
3968
+ const filePath = import_node_path12.default.join(logDir, buildLogFilename2(request, this.targetName));
3723
3969
  try {
3724
3970
  const logger = await CodexStreamLogger.create({
3725
3971
  filePath,
@@ -3934,9 +4180,9 @@ function tryParseJsonValue2(rawLine) {
3934
4180
  async function locateExecutable(candidate) {
3935
4181
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
3936
4182
  if (includesPathSeparator) {
3937
- const resolved = import_node_path11.default.isAbsolute(candidate) ? candidate : import_node_path11.default.resolve(candidate);
4183
+ const resolved = import_node_path12.default.isAbsolute(candidate) ? candidate : import_node_path12.default.resolve(candidate);
3938
4184
  const executablePath = await ensureWindowsExecutableVariant(resolved);
3939
- await (0, import_promises10.access)(executablePath, import_node_fs4.constants.F_OK);
4185
+ await (0, import_promises11.access)(executablePath, import_node_fs4.constants.F_OK);
3940
4186
  return executablePath;
3941
4187
  }
3942
4188
  const locator = process.platform === "win32" ? "where" : "which";
@@ -3946,7 +4192,7 @@ async function locateExecutable(candidate) {
3946
4192
  const preferred = selectExecutableCandidate(lines);
3947
4193
  if (preferred) {
3948
4194
  const executablePath = await ensureWindowsExecutableVariant(preferred);
3949
- await (0, import_promises10.access)(executablePath, import_node_fs4.constants.F_OK);
4195
+ await (0, import_promises11.access)(executablePath, import_node_fs4.constants.F_OK);
3950
4196
  return executablePath;
3951
4197
  }
3952
4198
  } catch {
@@ -3980,7 +4226,7 @@ async function ensureWindowsExecutableVariant(candidate) {
3980
4226
  for (const ext of extensions) {
3981
4227
  const withExtension = `${candidate}${ext}`;
3982
4228
  try {
3983
- await (0, import_promises10.access)(withExtension, import_node_fs4.constants.F_OK);
4229
+ await (0, import_promises11.access)(withExtension, import_node_fs4.constants.F_OK);
3984
4230
  return withExtension;
3985
4231
  } catch {
3986
4232
  }
@@ -4445,9 +4691,9 @@ function extractToolCalls2(content) {
4445
4691
  var import_node_child_process4 = require("child_process");
4446
4692
  var import_node_crypto3 = require("crypto");
4447
4693
  var import_node_fs5 = require("fs");
4448
- var import_promises11 = require("fs/promises");
4694
+ var import_promises12 = require("fs/promises");
4449
4695
  var import_node_os4 = require("os");
4450
- var import_node_path12 = __toESM(require("path"), 1);
4696
+ var import_node_path13 = __toESM(require("path"), 1);
4451
4697
 
4452
4698
  // src/evaluation/providers/pi-log-tracker.ts
4453
4699
  var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
@@ -4531,8 +4777,8 @@ var PiCodingAgentProvider = class {
4531
4777
  const workspaceRoot = await this.createWorkspace();
4532
4778
  const logger = await this.createStreamLogger(request).catch(() => void 0);
4533
4779
  try {
4534
- const promptFile = import_node_path12.default.join(workspaceRoot, PROMPT_FILENAME3);
4535
- await (0, import_promises11.writeFile)(promptFile, request.question, "utf8");
4780
+ const promptFile = import_node_path13.default.join(workspaceRoot, PROMPT_FILENAME3);
4781
+ await (0, import_promises12.writeFile)(promptFile, request.question, "utf8");
4536
4782
  const args = this.buildPiArgs(request.question, inputFiles);
4537
4783
  const cwd = this.resolveCwd(workspaceRoot);
4538
4784
  const result = await this.executePi(args, cwd, request.signal, logger);
@@ -4573,7 +4819,7 @@ var PiCodingAgentProvider = class {
4573
4819
  if (!this.config.cwd) {
4574
4820
  return workspaceRoot;
4575
4821
  }
4576
- return import_node_path12.default.resolve(this.config.cwd);
4822
+ return import_node_path13.default.resolve(this.config.cwd);
4577
4823
  }
4578
4824
  buildPiArgs(prompt, inputFiles) {
4579
4825
  const args = [];
@@ -4662,19 +4908,19 @@ ${prompt}`;
4662
4908
  return env;
4663
4909
  }
4664
4910
  async createWorkspace() {
4665
- return await (0, import_promises11.mkdtemp)(import_node_path12.default.join((0, import_node_os4.tmpdir)(), WORKSPACE_PREFIX3));
4911
+ return await (0, import_promises12.mkdtemp)(import_node_path13.default.join((0, import_node_os4.tmpdir)(), WORKSPACE_PREFIX3));
4666
4912
  }
4667
4913
  async cleanupWorkspace(workspaceRoot) {
4668
4914
  try {
4669
- await (0, import_promises11.rm)(workspaceRoot, { recursive: true, force: true });
4915
+ await (0, import_promises12.rm)(workspaceRoot, { recursive: true, force: true });
4670
4916
  } catch {
4671
4917
  }
4672
4918
  }
4673
4919
  resolveLogDirectory() {
4674
4920
  if (this.config.logDir) {
4675
- return import_node_path12.default.resolve(this.config.logDir);
4921
+ return import_node_path13.default.resolve(this.config.logDir);
4676
4922
  }
4677
- return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4923
+ return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4678
4924
  }
4679
4925
  async createStreamLogger(request) {
4680
4926
  const logDir = this.resolveLogDirectory();
@@ -4682,13 +4928,13 @@ ${prompt}`;
4682
4928
  return void 0;
4683
4929
  }
4684
4930
  try {
4685
- await (0, import_promises11.mkdir)(logDir, { recursive: true });
4931
+ await (0, import_promises12.mkdir)(logDir, { recursive: true });
4686
4932
  } catch (error) {
4687
4933
  const message = error instanceof Error ? error.message : String(error);
4688
4934
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
4689
4935
  return void 0;
4690
4936
  }
4691
- const filePath = import_node_path12.default.join(logDir, buildLogFilename3(request, this.targetName));
4937
+ const filePath = import_node_path13.default.join(logDir, buildLogFilename3(request, this.targetName));
4692
4938
  try {
4693
4939
  const logger = await PiStreamLogger.create({
4694
4940
  filePath,
@@ -5121,7 +5367,7 @@ async function defaultPiRunner(options) {
5121
5367
  }
5122
5368
 
5123
5369
  // src/evaluation/providers/targets.ts
5124
- var import_node_path13 = __toESM(require("path"), 1);
5370
+ var import_node_path14 = __toESM(require("path"), 1);
5125
5371
  var import_zod2 = require("zod");
5126
5372
  var CliHealthcheckHttpInputSchema = import_zod2.z.object({
5127
5373
  type: import_zod2.z.literal("http"),
@@ -5227,11 +5473,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
5227
5473
  allowLiteral: true,
5228
5474
  optionalEnv: true
5229
5475
  });
5230
- if (cwd && evalFilePath && !import_node_path13.default.isAbsolute(cwd)) {
5231
- cwd = import_node_path13.default.resolve(import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath)), cwd);
5476
+ if (cwd && evalFilePath && !import_node_path14.default.isAbsolute(cwd)) {
5477
+ cwd = import_node_path14.default.resolve(import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath)), cwd);
5232
5478
  }
5233
5479
  if (!cwd && evalFilePath) {
5234
- cwd = import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath));
5480
+ cwd = import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath));
5235
5481
  }
5236
5482
  return {
5237
5483
  type: "command",
@@ -5258,11 +5504,11 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
5258
5504
  allowLiteral: true,
5259
5505
  optionalEnv: true
5260
5506
  });
5261
- if (cwd && evalFilePath && !import_node_path13.default.isAbsolute(cwd)) {
5262
- cwd = import_node_path13.default.resolve(import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath)), cwd);
5507
+ if (cwd && evalFilePath && !import_node_path14.default.isAbsolute(cwd)) {
5508
+ cwd = import_node_path14.default.resolve(import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath)), cwd);
5263
5509
  }
5264
5510
  if (!cwd && evalFilePath) {
5265
- cwd = import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath));
5511
+ cwd = import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath));
5266
5512
  }
5267
5513
  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
5268
5514
  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
@@ -5767,8 +6013,8 @@ function resolveCliConfig(target, env, evalFilePath) {
5767
6013
  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
5768
6014
  if (!parseResult.success) {
5769
6015
  const firstError = parseResult.error.errors[0];
5770
- const path17 = firstError?.path.join(".") || "";
5771
- const prefix = path17 ? `${target.name} ${path17}: ` : `${target.name}: `;
6016
+ const path18 = firstError?.path.join(".") || "";
6017
+ const prefix = path18 ? `${target.name} ${path18}: ` : `${target.name}: `;
5772
6018
  throw new Error(`${prefix}${firstError?.message}`);
5773
6019
  }
5774
6020
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -5956,7 +6202,7 @@ function resolveOptionalNumberArray(source, description) {
5956
6202
  }
5957
6203
 
5958
6204
  // src/evaluation/providers/vscode.ts
5959
- var import_node_path14 = __toESM(require("path"), 1);
6205
+ var import_node_path15 = __toESM(require("path"), 1);
5960
6206
  var import_subagent = require("subagent");
5961
6207
 
5962
6208
  // src/evaluation/providers/vscode-templates.ts
@@ -6126,7 +6372,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
6126
6372
  return "";
6127
6373
  }
6128
6374
  const buildList = (files) => files.map((absolutePath) => {
6129
- const fileName = import_node_path14.default.basename(absolutePath);
6375
+ const fileName = import_node_path15.default.basename(absolutePath);
6130
6376
  const fileUri = pathToFileUri2(absolutePath);
6131
6377
  return `* [${fileName}](${fileUri})`;
6132
6378
  });
@@ -6151,8 +6397,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
6151
6397
  }
6152
6398
  const unique = /* @__PURE__ */ new Map();
6153
6399
  for (const attachment of attachments) {
6154
- const absolutePath = import_node_path14.default.resolve(attachment);
6155
- const normalized = absolutePath.split(import_node_path14.default.sep).join("/");
6400
+ const absolutePath = import_node_path15.default.resolve(attachment);
6401
+ const normalized = absolutePath.split(import_node_path15.default.sep).join("/");
6156
6402
  if (isGuidelineFile(normalized, guidelinePatterns)) {
6157
6403
  if (!unique.has(absolutePath)) {
6158
6404
  unique.set(absolutePath, absolutePath);
@@ -6167,7 +6413,7 @@ function collectAttachmentFiles(attachments) {
6167
6413
  }
6168
6414
  const unique = /* @__PURE__ */ new Map();
6169
6415
  for (const attachment of attachments) {
6170
- const absolutePath = import_node_path14.default.resolve(attachment);
6416
+ const absolutePath = import_node_path15.default.resolve(attachment);
6171
6417
  if (!unique.has(absolutePath)) {
6172
6418
  unique.set(absolutePath, absolutePath);
6173
6419
  }
@@ -6175,7 +6421,7 @@ function collectAttachmentFiles(attachments) {
6175
6421
  return Array.from(unique.values());
6176
6422
  }
6177
6423
  function pathToFileUri2(filePath) {
6178
- const absolutePath = import_node_path14.default.isAbsolute(filePath) ? filePath : import_node_path14.default.resolve(filePath);
6424
+ const absolutePath = import_node_path15.default.isAbsolute(filePath) ? filePath : import_node_path15.default.resolve(filePath);
6179
6425
  const normalizedPath = absolutePath.replace(/\\/g, "/");
6180
6426
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
6181
6427
  return `file:///${normalizedPath}`;
@@ -6188,7 +6434,7 @@ function normalizeAttachments(attachments) {
6188
6434
  }
6189
6435
  const deduped = /* @__PURE__ */ new Set();
6190
6436
  for (const attachment of attachments) {
6191
- deduped.add(import_node_path14.default.resolve(attachment));
6437
+ deduped.add(import_node_path15.default.resolve(attachment));
6192
6438
  }
6193
6439
  return Array.from(deduped);
6194
6440
  }
@@ -6197,7 +6443,7 @@ function mergeAttachments(all) {
6197
6443
  for (const list of all) {
6198
6444
  if (!list) continue;
6199
6445
  for (const inputFile of list) {
6200
- deduped.add(import_node_path14.default.resolve(inputFile));
6446
+ deduped.add(import_node_path15.default.resolve(inputFile));
6201
6447
  }
6202
6448
  }
6203
6449
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -6245,9 +6491,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
6245
6491
 
6246
6492
  // src/evaluation/providers/targets-file.ts
6247
6493
  var import_node_fs6 = require("fs");
6248
- var import_promises12 = require("fs/promises");
6249
- var import_node_path15 = __toESM(require("path"), 1);
6250
- var import_yaml3 = require("yaml");
6494
+ var import_promises13 = require("fs/promises");
6495
+ var import_node_path16 = __toESM(require("path"), 1);
6496
+ var import_yaml4 = require("yaml");
6251
6497
  function isRecord(value) {
6252
6498
  return typeof value === "object" && value !== null && !Array.isArray(value);
6253
6499
  }
@@ -6276,19 +6522,19 @@ function assertTargetDefinition(value, index, filePath) {
6276
6522
  }
6277
6523
  async function fileExists3(filePath) {
6278
6524
  try {
6279
- await (0, import_promises12.access)(filePath, import_node_fs6.constants.F_OK);
6525
+ await (0, import_promises13.access)(filePath, import_node_fs6.constants.F_OK);
6280
6526
  return true;
6281
6527
  } catch {
6282
6528
  return false;
6283
6529
  }
6284
6530
  }
6285
6531
  async function readTargetDefinitions(filePath) {
6286
- const absolutePath = import_node_path15.default.resolve(filePath);
6532
+ const absolutePath = import_node_path16.default.resolve(filePath);
6287
6533
  if (!await fileExists3(absolutePath)) {
6288
6534
  throw new Error(`targets.yaml not found at ${absolutePath}`);
6289
6535
  }
6290
- const raw = await (0, import_promises12.readFile)(absolutePath, "utf8");
6291
- const parsed = (0, import_yaml3.parse)(raw);
6536
+ const raw = await (0, import_promises13.readFile)(absolutePath, "utf8");
6537
+ const parsed = (0, import_yaml4.parse)(raw);
6292
6538
  if (!isRecord(parsed)) {
6293
6539
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
6294
6540
  }
@@ -6494,15 +6740,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
6494
6740
  });
6495
6741
  }
6496
6742
  async function execShellWithStdin(command, stdinPayload, options = {}) {
6497
- const { mkdir: mkdir4, readFile: readFile8, rm: rm4, writeFile: writeFile4 } = await import("fs/promises");
6743
+ const { mkdir: mkdir4, readFile: readFile9, rm: rm4, writeFile: writeFile4 } = await import("fs/promises");
6498
6744
  const { tmpdir: tmpdir4 } = await import("os");
6499
- const path17 = await import("path");
6745
+ const path18 = await import("path");
6500
6746
  const { randomUUID: randomUUID4 } = await import("crypto");
6501
- const dir = path17.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
6747
+ const dir = path18.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
6502
6748
  await mkdir4(dir, { recursive: true });
6503
- const stdinPath = path17.join(dir, "stdin.txt");
6504
- const stdoutPath = path17.join(dir, "stdout.txt");
6505
- const stderrPath = path17.join(dir, "stderr.txt");
6749
+ const stdinPath = path18.join(dir, "stdin.txt");
6750
+ const stdoutPath = path18.join(dir, "stdout.txt");
6751
+ const stderrPath = path18.join(dir, "stderr.txt");
6506
6752
  await writeFile4(stdinPath, stdinPayload, "utf8");
6507
6753
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
6508
6754
  const { spawn: spawn4 } = await import("child_process");
@@ -6532,8 +6778,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
6532
6778
  resolve(code ?? 0);
6533
6779
  });
6534
6780
  });
6535
- const stdout = (await readFile8(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
6536
- const stderr = (await readFile8(stderrPath, "utf8")).replace(/\r\n/g, "\n");
6781
+ const stdout = (await readFile9(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
6782
+ const stderr = (await readFile9(stderrPath, "utf8")).replace(/\r\n/g, "\n");
6537
6783
  return { stdout, stderr, exitCode };
6538
6784
  } finally {
6539
6785
  await rm4(dir, { recursive: true, force: true });
@@ -6805,7 +7051,7 @@ var CodeEvaluator = class {
6805
7051
  outputMessages: context.outputMessages ?? null,
6806
7052
  guidelineFiles: context.evalCase.guideline_paths,
6807
7053
  inputFiles: context.evalCase.file_paths.filter(
6808
- (path17) => !context.evalCase.guideline_paths.includes(path17)
7054
+ (path18) => !context.evalCase.guideline_paths.includes(path18)
6809
7055
  ),
6810
7056
  inputMessages: context.evalCase.input_messages,
6811
7057
  traceSummary: context.traceSummary ?? null,
@@ -7591,115 +7837,115 @@ var FieldAccuracyEvaluator = class {
7591
7837
  * Evaluate a single field against the expected value.
7592
7838
  */
7593
7839
  evaluateField(fieldConfig, candidateData, expectedData) {
7594
- const { path: path17, match, required = true, weight = 1 } = fieldConfig;
7595
- const candidateValue = resolvePath(candidateData, path17);
7596
- const expectedValue = resolvePath(expectedData, path17);
7840
+ const { path: path18, match, required = true, weight = 1 } = fieldConfig;
7841
+ const candidateValue = resolvePath(candidateData, path18);
7842
+ const expectedValue = resolvePath(expectedData, path18);
7597
7843
  if (expectedValue === void 0) {
7598
7844
  return {
7599
- path: path17,
7845
+ path: path18,
7600
7846
  score: 1,
7601
7847
  // No expected value means no comparison needed
7602
7848
  weight,
7603
7849
  hit: true,
7604
- message: `${path17}: no expected value`
7850
+ message: `${path18}: no expected value`
7605
7851
  };
7606
7852
  }
7607
7853
  if (candidateValue === void 0) {
7608
7854
  if (required) {
7609
7855
  return {
7610
- path: path17,
7856
+ path: path18,
7611
7857
  score: 0,
7612
7858
  weight,
7613
7859
  hit: false,
7614
- message: `${path17} (required, missing)`
7860
+ message: `${path18} (required, missing)`
7615
7861
  };
7616
7862
  }
7617
7863
  return {
7618
- path: path17,
7864
+ path: path18,
7619
7865
  score: 1,
7620
7866
  // Don't penalize missing optional fields
7621
7867
  weight: 0,
7622
7868
  // Zero weight means it won't affect the score
7623
7869
  hit: true,
7624
- message: `${path17}: optional field missing`
7870
+ message: `${path18}: optional field missing`
7625
7871
  };
7626
7872
  }
7627
7873
  switch (match) {
7628
7874
  case "exact":
7629
- return this.compareExact(path17, candidateValue, expectedValue, weight);
7875
+ return this.compareExact(path18, candidateValue, expectedValue, weight);
7630
7876
  case "numeric_tolerance":
7631
7877
  return this.compareNumericTolerance(
7632
- path17,
7878
+ path18,
7633
7879
  candidateValue,
7634
7880
  expectedValue,
7635
7881
  fieldConfig,
7636
7882
  weight
7637
7883
  );
7638
7884
  case "date":
7639
- return this.compareDate(path17, candidateValue, expectedValue, fieldConfig, weight);
7885
+ return this.compareDate(path18, candidateValue, expectedValue, fieldConfig, weight);
7640
7886
  default:
7641
7887
  return {
7642
- path: path17,
7888
+ path: path18,
7643
7889
  score: 0,
7644
7890
  weight,
7645
7891
  hit: false,
7646
- message: `${path17}: unknown match type "${match}"`
7892
+ message: `${path18}: unknown match type "${match}"`
7647
7893
  };
7648
7894
  }
7649
7895
  }
7650
7896
  /**
7651
7897
  * Exact equality comparison.
7652
7898
  */
7653
- compareExact(path17, candidateValue, expectedValue, weight) {
7899
+ compareExact(path18, candidateValue, expectedValue, weight) {
7654
7900
  if (deepEqual(candidateValue, expectedValue)) {
7655
7901
  return {
7656
- path: path17,
7902
+ path: path18,
7657
7903
  score: 1,
7658
7904
  weight,
7659
7905
  hit: true,
7660
- message: path17
7906
+ message: path18
7661
7907
  };
7662
7908
  }
7663
7909
  if (typeof candidateValue !== typeof expectedValue) {
7664
7910
  return {
7665
- path: path17,
7911
+ path: path18,
7666
7912
  score: 0,
7667
7913
  weight,
7668
7914
  hit: false,
7669
- message: `${path17} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
7915
+ message: `${path18} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
7670
7916
  };
7671
7917
  }
7672
7918
  return {
7673
- path: path17,
7919
+ path: path18,
7674
7920
  score: 0,
7675
7921
  weight,
7676
7922
  hit: false,
7677
- message: `${path17} (value mismatch)`
7923
+ message: `${path18} (value mismatch)`
7678
7924
  };
7679
7925
  }
7680
7926
  /**
7681
7927
  * Numeric comparison with absolute or relative tolerance.
7682
7928
  */
7683
- compareNumericTolerance(path17, candidateValue, expectedValue, fieldConfig, weight) {
7929
+ compareNumericTolerance(path18, candidateValue, expectedValue, fieldConfig, weight) {
7684
7930
  const { tolerance = 0, relative = false } = fieldConfig;
7685
7931
  const candidateNum = toNumber(candidateValue);
7686
7932
  const expectedNum = toNumber(expectedValue);
7687
7933
  if (candidateNum === null || expectedNum === null) {
7688
7934
  return {
7689
- path: path17,
7935
+ path: path18,
7690
7936
  score: 0,
7691
7937
  weight,
7692
7938
  hit: false,
7693
- message: `${path17} (non-numeric value)`
7939
+ message: `${path18} (non-numeric value)`
7694
7940
  };
7695
7941
  }
7696
7942
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
7697
7943
  return {
7698
- path: path17,
7944
+ path: path18,
7699
7945
  score: 0,
7700
7946
  weight,
7701
7947
  hit: false,
7702
- message: `${path17} (invalid numeric value)`
7948
+ message: `${path18} (invalid numeric value)`
7703
7949
  };
7704
7950
  }
7705
7951
  const diff = Math.abs(candidateNum - expectedNum);
@@ -7712,61 +7958,61 @@ var FieldAccuracyEvaluator = class {
7712
7958
  }
7713
7959
  if (withinTolerance) {
7714
7960
  return {
7715
- path: path17,
7961
+ path: path18,
7716
7962
  score: 1,
7717
7963
  weight,
7718
7964
  hit: true,
7719
- message: `${path17} (within tolerance: diff=${diff.toFixed(2)})`
7965
+ message: `${path18} (within tolerance: diff=${diff.toFixed(2)})`
7720
7966
  };
7721
7967
  }
7722
7968
  return {
7723
- path: path17,
7969
+ path: path18,
7724
7970
  score: 0,
7725
7971
  weight,
7726
7972
  hit: false,
7727
- message: `${path17} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
7973
+ message: `${path18} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
7728
7974
  };
7729
7975
  }
7730
7976
  /**
7731
7977
  * Date comparison with format normalization.
7732
7978
  */
7733
- compareDate(path17, candidateValue, expectedValue, fieldConfig, weight) {
7979
+ compareDate(path18, candidateValue, expectedValue, fieldConfig, weight) {
7734
7980
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
7735
7981
  const candidateDate = parseDate(String(candidateValue), formats);
7736
7982
  const expectedDate = parseDate(String(expectedValue), formats);
7737
7983
  if (candidateDate === null) {
7738
7984
  return {
7739
- path: path17,
7985
+ path: path18,
7740
7986
  score: 0,
7741
7987
  weight,
7742
7988
  hit: false,
7743
- message: `${path17} (unparseable candidate date)`
7989
+ message: `${path18} (unparseable candidate date)`
7744
7990
  };
7745
7991
  }
7746
7992
  if (expectedDate === null) {
7747
7993
  return {
7748
- path: path17,
7994
+ path: path18,
7749
7995
  score: 0,
7750
7996
  weight,
7751
7997
  hit: false,
7752
- message: `${path17} (unparseable expected date)`
7998
+ message: `${path18} (unparseable expected date)`
7753
7999
  };
7754
8000
  }
7755
8001
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
7756
8002
  return {
7757
- path: path17,
8003
+ path: path18,
7758
8004
  score: 1,
7759
8005
  weight,
7760
8006
  hit: true,
7761
- message: path17
8007
+ message: path18
7762
8008
  };
7763
8009
  }
7764
8010
  return {
7765
- path: path17,
8011
+ path: path18,
7766
8012
  score: 0,
7767
8013
  weight,
7768
8014
  hit: false,
7769
- message: `${path17} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
8015
+ message: `${path18} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
7770
8016
  };
7771
8017
  }
7772
8018
  /**
@@ -7806,11 +8052,11 @@ var FieldAccuracyEvaluator = class {
7806
8052
  };
7807
8053
  }
7808
8054
  };
7809
- function resolvePath(obj, path17) {
7810
- if (!path17 || !obj) {
8055
+ function resolvePath(obj, path18) {
8056
+ if (!path18 || !obj) {
7811
8057
  return void 0;
7812
8058
  }
7813
- const parts = path17.split(/\.|\[|\]/).filter((p) => p.length > 0);
8059
+ const parts = path18.split(/\.|\[|\]/).filter((p) => p.length > 0);
7814
8060
  let current = obj;
7815
8061
  for (const part of parts) {
7816
8062
  if (current === null || current === void 0) {
@@ -8246,7 +8492,7 @@ var ToolTrajectoryEvaluator = class {
8246
8492
 
8247
8493
  // src/evaluation/orchestrator.ts
8248
8494
  var import_node_crypto5 = require("crypto");
8249
- var import_node_path16 = __toESM(require("path"), 1);
8495
+ var import_node_path17 = __toESM(require("path"), 1);
8250
8496
 
8251
8497
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
8252
8498
  var Node = class {
@@ -9045,7 +9291,7 @@ async function runEvaluatorList(options) {
9045
9291
  });
9046
9292
  }
9047
9293
  if (evaluator.type === "composite") {
9048
- const evalFileDir = evalCase.guideline_paths[0] ? import_node_path16.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
9294
+ const evalFileDir = evalCase.guideline_paths[0] ? import_node_path17.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
9049
9295
  const createEvaluator = (memberConfig) => {
9050
9296
  switch (memberConfig.type) {
9051
9297
  case "llm_judge":
@@ -9620,6 +9866,7 @@ function createAgentKernel() {
9620
9866
  createAgentKernel,
9621
9867
  createProvider,
9622
9868
  deepEqual,
9869
+ detectFormat,
9623
9870
  ensureVSCodeSubagents,
9624
9871
  executeScript,
9625
9872
  explorationRatio,