@agentv/core 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -53,6 +53,7 @@ __export(index_exports, {
53
53
  createAgentKernel: () => createAgentKernel,
54
54
  createProvider: () => createProvider,
55
55
  deepEqual: () => deepEqual,
56
+ detectFormat: () => detectFormat,
56
57
  ensureVSCodeSubagents: () => ensureVSCodeSubagents,
57
58
  executeScript: () => executeScript,
58
59
  explorationRatio: () => explorationRatio,
@@ -226,9 +227,9 @@ function mergeExecutionMetrics(summary, metrics) {
226
227
  }
227
228
 
228
229
  // src/evaluation/yaml-parser.ts
229
- var import_promises6 = require("fs/promises");
230
- var import_node_path6 = __toESM(require("path"), 1);
231
- var import_yaml2 = require("yaml");
230
+ var import_promises7 = require("fs/promises");
231
+ var import_node_path7 = __toESM(require("path"), 1);
232
+ var import_yaml3 = require("yaml");
232
233
 
233
234
  // src/evaluation/loaders/config-loader.ts
234
235
  var import_promises2 = require("fs/promises");
@@ -1006,6 +1007,11 @@ function isValidFieldAggregationType(value) {
1006
1007
  return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
1007
1008
  }
1008
1009
 
1010
+ // src/evaluation/loaders/jsonl-parser.ts
1011
+ var import_promises5 = require("fs/promises");
1012
+ var import_node_path5 = __toESM(require("path"), 1);
1013
+ var import_yaml2 = require("yaml");
1014
+
1009
1015
  // src/evaluation/loaders/message-processor.ts
1010
1016
  var import_promises4 = require("fs/promises");
1011
1017
  var import_node_path4 = __toESM(require("path"), 1);
@@ -1266,28 +1272,271 @@ async function processExpectedMessages(options) {
1266
1272
  return segments;
1267
1273
  }
1268
1274
 
1269
- // src/evaluation/formatting/prompt-builder.ts
1270
- var import_promises5 = require("fs/promises");
1271
- var import_node_path5 = __toESM(require("path"), 1);
1275
+ // src/evaluation/loaders/jsonl-parser.ts
1272
1276
  var ANSI_YELLOW5 = "\x1B[33m";
1277
+ var ANSI_RED = "\x1B[31m";
1273
1278
  var ANSI_RESET5 = "\x1B[0m";
1279
+ function detectFormat(filePath) {
1280
+ const ext = import_node_path5.default.extname(filePath).toLowerCase();
1281
+ if (ext === ".jsonl") return "jsonl";
1282
+ if (ext === ".yaml" || ext === ".yml") return "yaml";
1283
+ throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
1284
+ }
1285
+ async function loadSidecarMetadata(jsonlPath, verbose) {
1286
+ const dir = import_node_path5.default.dirname(jsonlPath);
1287
+ const base = import_node_path5.default.basename(jsonlPath, ".jsonl");
1288
+ const sidecarPath = import_node_path5.default.join(dir, `${base}.yaml`);
1289
+ if (!await fileExists(sidecarPath)) {
1290
+ if (verbose) {
1291
+ logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
1292
+ }
1293
+ return {};
1294
+ }
1295
+ try {
1296
+ const content = await (0, import_promises5.readFile)(sidecarPath, "utf8");
1297
+ const parsed = (0, import_yaml2.parse)(content);
1298
+ if (!isJsonObject(parsed)) {
1299
+ logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
1300
+ return {};
1301
+ }
1302
+ return {
1303
+ description: asString4(parsed.description),
1304
+ dataset: asString4(parsed.dataset),
1305
+ execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
1306
+ evaluator: parsed.evaluator
1307
+ };
1308
+ } catch (error) {
1309
+ logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error.message}`);
1310
+ return {};
1311
+ }
1312
+ }
1313
+ function parseJsonlContent(content, filePath) {
1314
+ const lines = content.split("\n");
1315
+ const cases = [];
1316
+ for (let i = 0; i < lines.length; i++) {
1317
+ const line = lines[i].trim();
1318
+ if (line === "") continue;
1319
+ try {
1320
+ const parsed = JSON.parse(line);
1321
+ if (!isJsonObject(parsed)) {
1322
+ throw new Error("Expected JSON object");
1323
+ }
1324
+ cases.push(parsed);
1325
+ } catch (error) {
1326
+ const message = error instanceof Error ? error.message : String(error);
1327
+ throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
1328
+ File: ${filePath}`);
1329
+ }
1330
+ }
1331
+ return cases;
1332
+ }
1333
+ async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
1334
+ const verbose = options?.verbose ?? false;
1335
+ const evalIdFilter = options?.evalId;
1336
+ const absoluteTestPath = import_node_path5.default.resolve(evalFilePath);
1337
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
1338
+ const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
1339
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
1340
+ const guidelinePatterns = config?.guideline_patterns;
1341
+ const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
1342
+ const rawFile = await (0, import_promises5.readFile)(absoluteTestPath, "utf8");
1343
+ const rawCases = parseJsonlContent(rawFile, evalFilePath);
1344
+ const fallbackDataset = import_node_path5.default.basename(absoluteTestPath, ".jsonl") || "eval";
1345
+ const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
1346
+ const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
1347
+ const globalExecution = sidecar.execution;
1348
+ if (verbose) {
1349
+ console.log(`
1350
+ [JSONL Dataset: ${evalFilePath}]`);
1351
+ console.log(` Cases: ${rawCases.length}`);
1352
+ console.log(` Dataset name: ${datasetName}`);
1353
+ if (sidecar.description) {
1354
+ console.log(` Description: ${sidecar.description}`);
1355
+ }
1356
+ }
1357
+ const results = [];
1358
+ for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
1359
+ const evalcase = rawCases[lineIndex];
1360
+ const lineNumber = lineIndex + 1;
1361
+ const id = asString4(evalcase.id);
1362
+ if (evalIdFilter && id !== evalIdFilter) {
1363
+ continue;
1364
+ }
1365
+ const conversationId = asString4(evalcase.conversation_id);
1366
+ const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
1367
+ const inputMessagesValue = evalcase.input_messages;
1368
+ const expectedMessagesValue = evalcase.expected_messages;
1369
+ if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1370
+ logError(
1371
+ `Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages`
1372
+ );
1373
+ continue;
1374
+ }
1375
+ const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
1376
+ const inputMessages = inputMessagesValue.filter(
1377
+ (msg) => isTestMessage(msg)
1378
+ );
1379
+ const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1380
+ if (hasExpectedMessages && expectedMessages.length === 0) {
1381
+ logError(`Line ${lineNumber}: No valid expected message found for eval case: ${id}`);
1382
+ continue;
1383
+ }
1384
+ const guidelinePaths = [];
1385
+ const inputTextParts = [];
1386
+ const inputSegments = await processMessages({
1387
+ messages: inputMessages,
1388
+ searchRoots,
1389
+ repoRootPath,
1390
+ guidelinePatterns,
1391
+ guidelinePaths,
1392
+ textParts: inputTextParts,
1393
+ messageType: "input",
1394
+ verbose
1395
+ });
1396
+ const outputSegments = hasExpectedMessages ? await processExpectedMessages({
1397
+ messages: expectedMessages,
1398
+ searchRoots,
1399
+ repoRootPath,
1400
+ verbose
1401
+ }) : [];
1402
+ let referenceAnswer = "";
1403
+ if (outputSegments.length > 0) {
1404
+ const lastMessage = outputSegments[outputSegments.length - 1];
1405
+ const content = lastMessage.content;
1406
+ const toolCalls = lastMessage.tool_calls;
1407
+ if (typeof content === "string") {
1408
+ referenceAnswer = content;
1409
+ } else if (content !== void 0 && content !== null) {
1410
+ referenceAnswer = JSON.stringify(content, null, 2);
1411
+ } else if (toolCalls !== void 0 && toolCalls !== null) {
1412
+ referenceAnswer = JSON.stringify(toolCalls, null, 2);
1413
+ }
1414
+ }
1415
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
1416
+ const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
1417
+ const mergedExecution = caseExecution ?? globalExecution;
1418
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
1419
+ let evaluators;
1420
+ try {
1421
+ evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
1422
+ } catch (error) {
1423
+ const message = error instanceof Error ? error.message : String(error);
1424
+ logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
1425
+ continue;
1426
+ }
1427
+ const inlineRubrics = evalcase.rubrics;
1428
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
1429
+ const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
1430
+ if (typeof rubric === "string") {
1431
+ return {
1432
+ id: `rubric-${index + 1}`,
1433
+ description: rubric,
1434
+ weight: 1,
1435
+ required: true
1436
+ };
1437
+ }
1438
+ return {
1439
+ id: asString4(rubric.id) ?? `rubric-${index + 1}`,
1440
+ description: asString4(rubric.description) ?? "",
1441
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1442
+ required: typeof rubric.required === "boolean" ? rubric.required : true
1443
+ };
1444
+ }).filter((r) => r.description.length > 0);
1445
+ if (rubricItems.length > 0) {
1446
+ const rubricEvaluator = {
1447
+ name: "rubric",
1448
+ type: "llm_judge",
1449
+ rubrics: rubricItems
1450
+ };
1451
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
1452
+ }
1453
+ }
1454
+ const userFilePaths = [];
1455
+ for (const segment of inputSegments) {
1456
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
1457
+ userFilePaths.push(segment.resolvedPath);
1458
+ }
1459
+ }
1460
+ const allFilePaths = [
1461
+ ...guidelinePaths.map((guidelinePath) => import_node_path5.default.resolve(guidelinePath)),
1462
+ ...userFilePaths
1463
+ ];
1464
+ const testCase = {
1465
+ id,
1466
+ dataset: datasetName,
1467
+ conversation_id: conversationId,
1468
+ question,
1469
+ input_messages: inputMessages,
1470
+ input_segments: inputSegments,
1471
+ expected_messages: outputSegments,
1472
+ reference_answer: referenceAnswer,
1473
+ guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path5.default.resolve(guidelinePath)),
1474
+ guideline_patterns: guidelinePatterns,
1475
+ file_paths: allFilePaths,
1476
+ expected_outcome: outcome,
1477
+ evaluator: evalCaseEvaluatorKind,
1478
+ evaluators
1479
+ };
1480
+ if (verbose) {
1481
+ console.log(`
1482
+ [Eval Case: ${id}]`);
1483
+ if (testCase.guideline_paths.length > 0) {
1484
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
1485
+ for (const guidelinePath of testCase.guideline_paths) {
1486
+ console.log(` - ${guidelinePath}`);
1487
+ }
1488
+ } else {
1489
+ console.log(" No guidelines found");
1490
+ }
1491
+ }
1492
+ results.push(testCase);
1493
+ }
1494
+ return results;
1495
+ }
1496
+ function asString4(value) {
1497
+ return typeof value === "string" ? value : void 0;
1498
+ }
1499
+ function logWarning4(message, details) {
1500
+ if (details && details.length > 0) {
1501
+ const detailBlock = details.join("\n");
1502
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
1503
+ ${detailBlock}${ANSI_RESET5}`);
1504
+ } else {
1505
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1506
+ }
1507
+ }
1508
+ function logError(message, details) {
1509
+ if (details && details.length > 0) {
1510
+ const detailBlock = details.join("\n");
1511
+ console.error(`${ANSI_RED}Error: ${message}
1512
+ ${detailBlock}${ANSI_RESET5}`);
1513
+ } else {
1514
+ console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
1515
+ }
1516
+ }
1517
+
1518
+ // src/evaluation/formatting/prompt-builder.ts
1519
+ var import_promises6 = require("fs/promises");
1520
+ var import_node_path6 = __toESM(require("path"), 1);
1521
+ var ANSI_YELLOW6 = "\x1B[33m";
1522
+ var ANSI_RESET6 = "\x1B[0m";
1274
1523
  async function buildPromptInputs(testCase, mode = "lm") {
1275
1524
  const guidelineParts = [];
1276
1525
  for (const rawPath of testCase.guideline_paths) {
1277
- const absolutePath = import_node_path5.default.resolve(rawPath);
1526
+ const absolutePath = import_node_path6.default.resolve(rawPath);
1278
1527
  if (!await fileExists(absolutePath)) {
1279
- logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
1528
+ logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
1280
1529
  continue;
1281
1530
  }
1282
1531
  try {
1283
- const content = (await (0, import_promises5.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1532
+ const content = (await (0, import_promises6.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1284
1533
  guidelineParts.push({
1285
1534
  content,
1286
1535
  isFile: true,
1287
- displayPath: import_node_path5.default.basename(absolutePath)
1536
+ displayPath: import_node_path6.default.basename(absolutePath)
1288
1537
  });
1289
1538
  } catch (error) {
1290
- logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
1539
+ logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
1291
1540
  }
1292
1541
  }
1293
1542
  const guidelines = formatFileContents(guidelineParts);
@@ -1311,9 +1560,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
1311
1560
  messageSegments.push({ type: "text", value: segment });
1312
1561
  }
1313
1562
  } else if (isJsonObject(segment)) {
1314
- const type = asString4(segment.type);
1563
+ const type = asString5(segment.type);
1315
1564
  if (type === "file") {
1316
- const value = asString4(segment.value);
1565
+ const value = asString5(segment.value);
1317
1566
  if (!value) continue;
1318
1567
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
1319
1568
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -1324,7 +1573,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
1324
1573
  messageSegments.push({ type: "file", text: fileText, path: value });
1325
1574
  }
1326
1575
  } else if (type === "text") {
1327
- const textValue = asString4(segment.value);
1576
+ const textValue = asString5(segment.value);
1328
1577
  if (textValue && textValue.trim().length > 0) {
1329
1578
  messageSegments.push({ type: "text", value: textValue });
1330
1579
  }
@@ -1478,22 +1727,22 @@ ${guidelineContent.trim()}`);
1478
1727
  }
1479
1728
  return chatPrompt.length > 0 ? chatPrompt : void 0;
1480
1729
  }
1481
- function asString4(value) {
1730
+ function asString5(value) {
1482
1731
  return typeof value === "string" ? value : void 0;
1483
1732
  }
1484
- function logWarning4(message) {
1485
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1733
+ function logWarning5(message) {
1734
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1486
1735
  }
1487
1736
 
1488
1737
  // src/evaluation/yaml-parser.ts
1489
- var ANSI_YELLOW6 = "\x1B[33m";
1490
- var ANSI_RED = "\x1B[31m";
1491
- var ANSI_RESET6 = "\x1B[0m";
1738
+ var ANSI_YELLOW7 = "\x1B[33m";
1739
+ var ANSI_RED2 = "\x1B[31m";
1740
+ var ANSI_RESET7 = "\x1B[0m";
1492
1741
  async function readTestSuiteMetadata(testFilePath) {
1493
1742
  try {
1494
- const absolutePath = import_node_path6.default.resolve(testFilePath);
1495
- const content = await (0, import_promises6.readFile)(absolutePath, "utf8");
1496
- const parsed = (0, import_yaml2.parse)(content);
1743
+ const absolutePath = import_node_path7.default.resolve(testFilePath);
1744
+ const content = await (0, import_promises7.readFile)(absolutePath, "utf8");
1745
+ const parsed = (0, import_yaml3.parse)(content);
1497
1746
  if (!isJsonObject(parsed)) {
1498
1747
  return {};
1499
1748
  }
@@ -1503,21 +1752,25 @@ async function readTestSuiteMetadata(testFilePath) {
1503
1752
  }
1504
1753
  }
1505
1754
  async function loadEvalCases(evalFilePath, repoRoot, options) {
1755
+ const format = detectFormat(evalFilePath);
1756
+ if (format === "jsonl") {
1757
+ return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
1758
+ }
1506
1759
  const verbose = options?.verbose ?? false;
1507
1760
  const evalIdFilter = options?.evalId;
1508
- const absoluteTestPath = import_node_path6.default.resolve(evalFilePath);
1761
+ const absoluteTestPath = import_node_path7.default.resolve(evalFilePath);
1509
1762
  const repoRootPath = resolveToAbsolutePath(repoRoot);
1510
1763
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
1511
1764
  const config = await loadConfig(absoluteTestPath, repoRootPath);
1512
1765
  const guidelinePatterns = config?.guideline_patterns;
1513
- const rawFile = await (0, import_promises6.readFile)(absoluteTestPath, "utf8");
1514
- const parsed = (0, import_yaml2.parse)(rawFile);
1766
+ const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
1767
+ const parsed = (0, import_yaml3.parse)(rawFile);
1515
1768
  if (!isJsonObject(parsed)) {
1516
1769
  throw new Error(`Invalid test file format: ${evalFilePath}`);
1517
1770
  }
1518
1771
  const suite = parsed;
1519
- const datasetNameFromSuite = asString5(suite.dataset)?.trim();
1520
- const fallbackDataset = import_node_path6.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1772
+ const datasetNameFromSuite = asString6(suite.dataset)?.trim();
1773
+ const fallbackDataset = import_node_path7.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1521
1774
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
1522
1775
  const rawTestcases = suite.evalcases;
1523
1776
  if (!Array.isArray(rawTestcases)) {
@@ -1525,24 +1778,24 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1525
1778
  }
1526
1779
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
1527
1780
  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
1528
- const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
1781
+ const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
1529
1782
  const results = [];
1530
1783
  for (const rawEvalcase of rawTestcases) {
1531
1784
  if (!isJsonObject(rawEvalcase)) {
1532
- logWarning5("Skipping invalid eval case entry (expected object)");
1785
+ logWarning6("Skipping invalid eval case entry (expected object)");
1533
1786
  continue;
1534
1787
  }
1535
1788
  const evalcase = rawEvalcase;
1536
- const id = asString5(evalcase.id);
1789
+ const id = asString6(evalcase.id);
1537
1790
  if (evalIdFilter && id !== evalIdFilter) {
1538
1791
  continue;
1539
1792
  }
1540
- const conversationId = asString5(evalcase.conversation_id);
1541
- const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
1793
+ const conversationId = asString6(evalcase.conversation_id);
1794
+ const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
1542
1795
  const inputMessagesValue = evalcase.input_messages;
1543
1796
  const expectedMessagesValue = evalcase.expected_messages;
1544
1797
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1545
- logError(
1798
+ logError2(
1546
1799
  `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
1547
1800
  );
1548
1801
  continue;
@@ -1553,7 +1806,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1553
1806
  );
1554
1807
  const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1555
1808
  if (hasExpectedMessages && expectedMessages.length === 0) {
1556
- logError(`No valid expected message found for eval case: ${id}`);
1809
+ logError2(`No valid expected message found for eval case: ${id}`);
1557
1810
  continue;
1558
1811
  }
1559
1812
  const guidelinePaths = [];
@@ -1594,7 +1847,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1594
1847
  evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
1595
1848
  } catch (error) {
1596
1849
  const message = error instanceof Error ? error.message : String(error);
1597
- logError(`Skipping eval case '${id}': ${message}`);
1850
+ logError2(`Skipping eval case '${id}': ${message}`);
1598
1851
  continue;
1599
1852
  }
1600
1853
  const inlineRubrics = evalcase.rubrics;
@@ -1609,8 +1862,8 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1609
1862
  };
1610
1863
  }
1611
1864
  return {
1612
- id: asString5(rubric.id) ?? `rubric-${index + 1}`,
1613
- description: asString5(rubric.description) ?? "",
1865
+ id: asString6(rubric.id) ?? `rubric-${index + 1}`,
1866
+ description: asString6(rubric.description) ?? "",
1614
1867
  weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1615
1868
  required: typeof rubric.required === "boolean" ? rubric.required : true
1616
1869
  };
@@ -1631,7 +1884,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1631
1884
  }
1632
1885
  }
1633
1886
  const allFilePaths = [
1634
- ...guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
1887
+ ...guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
1635
1888
  ...userFilePaths
1636
1889
  ];
1637
1890
  const testCase = {
@@ -1643,7 +1896,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1643
1896
  input_segments: inputSegments,
1644
1897
  expected_messages: outputSegments,
1645
1898
  reference_answer: referenceAnswer,
1646
- guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path6.default.resolve(guidelinePath)),
1899
+ guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
1647
1900
  guideline_patterns: guidelinePatterns,
1648
1901
  file_paths: allFilePaths,
1649
1902
  expected_outcome: outcome,
@@ -1666,35 +1919,35 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1666
1919
  }
1667
1920
  return results;
1668
1921
  }
1669
- function asString5(value) {
1922
+ function asString6(value) {
1670
1923
  return typeof value === "string" ? value : void 0;
1671
1924
  }
1672
- function logWarning5(message, details) {
1925
+ function logWarning6(message, details) {
1673
1926
  if (details && details.length > 0) {
1674
1927
  const detailBlock = details.join("\n");
1675
- console.warn(`${ANSI_YELLOW6}Warning: ${message}
1676
- ${detailBlock}${ANSI_RESET6}`);
1928
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}
1929
+ ${detailBlock}${ANSI_RESET7}`);
1677
1930
  } else {
1678
- console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1931
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
1679
1932
  }
1680
1933
  }
1681
- function logError(message, details) {
1934
+ function logError2(message, details) {
1682
1935
  if (details && details.length > 0) {
1683
1936
  const detailBlock = details.join("\n");
1684
- console.error(`${ANSI_RED}Error: ${message}
1685
- ${detailBlock}${ANSI_RESET6}`);
1937
+ console.error(`${ANSI_RED2}Error: ${message}
1938
+ ${detailBlock}${ANSI_RESET7}`);
1686
1939
  } else {
1687
- console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
1940
+ console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
1688
1941
  }
1689
1942
  }
1690
1943
 
1691
1944
  // src/evaluation/file-utils.ts
1692
1945
  var import_node_fs2 = require("fs");
1693
- var import_promises7 = require("fs/promises");
1694
- var import_node_path7 = __toESM(require("path"), 1);
1946
+ var import_promises8 = require("fs/promises");
1947
+ var import_node_path8 = __toESM(require("path"), 1);
1695
1948
  async function fileExists2(filePath) {
1696
1949
  try {
1697
- await (0, import_promises7.access)(filePath, import_node_fs2.constants.F_OK);
1950
+ await (0, import_promises8.access)(filePath, import_node_fs2.constants.F_OK);
1698
1951
  return true;
1699
1952
  } catch {
1700
1953
  return false;
@@ -1704,22 +1957,22 @@ function normalizeLineEndings(content) {
1704
1957
  return content.replace(/\r\n/g, "\n");
1705
1958
  }
1706
1959
  async function readTextFile(filePath) {
1707
- const content = await (0, import_promises7.readFile)(filePath, "utf8");
1960
+ const content = await (0, import_promises8.readFile)(filePath, "utf8");
1708
1961
  return normalizeLineEndings(content);
1709
1962
  }
1710
1963
  async function readJsonFile(filePath) {
1711
- const content = await (0, import_promises7.readFile)(filePath, "utf8");
1964
+ const content = await (0, import_promises8.readFile)(filePath, "utf8");
1712
1965
  return JSON.parse(content);
1713
1966
  }
1714
1967
  async function findGitRoot(startPath) {
1715
- let currentDir = import_node_path7.default.dirname(import_node_path7.default.resolve(startPath));
1716
- const root = import_node_path7.default.parse(currentDir).root;
1968
+ let currentDir = import_node_path8.default.dirname(import_node_path8.default.resolve(startPath));
1969
+ const root = import_node_path8.default.parse(currentDir).root;
1717
1970
  while (currentDir !== root) {
1718
- const gitPath = import_node_path7.default.join(currentDir, ".git");
1971
+ const gitPath = import_node_path8.default.join(currentDir, ".git");
1719
1972
  if (await fileExists2(gitPath)) {
1720
1973
  return currentDir;
1721
1974
  }
1722
- const parentDir = import_node_path7.default.dirname(currentDir);
1975
+ const parentDir = import_node_path8.default.dirname(currentDir);
1723
1976
  if (parentDir === currentDir) {
1724
1977
  break;
1725
1978
  }
@@ -1730,8 +1983,8 @@ async function findGitRoot(startPath) {
1730
1983
  function buildDirectoryChain2(filePath, repoRoot) {
1731
1984
  const directories = [];
1732
1985
  const seen = /* @__PURE__ */ new Set();
1733
- const boundary = import_node_path7.default.resolve(repoRoot);
1734
- let current = import_node_path7.default.resolve(import_node_path7.default.dirname(filePath));
1986
+ const boundary = import_node_path8.default.resolve(repoRoot);
1987
+ let current = import_node_path8.default.resolve(import_node_path8.default.dirname(filePath));
1735
1988
  while (current !== void 0) {
1736
1989
  if (!seen.has(current)) {
1737
1990
  directories.push(current);
@@ -1740,7 +1993,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
1740
1993
  if (current === boundary) {
1741
1994
  break;
1742
1995
  }
1743
- const parent = import_node_path7.default.dirname(current);
1996
+ const parent = import_node_path8.default.dirname(current);
1744
1997
  if (parent === current) {
1745
1998
  break;
1746
1999
  }
@@ -1754,16 +2007,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
1754
2007
  function buildSearchRoots2(evalPath, repoRoot) {
1755
2008
  const uniqueRoots = [];
1756
2009
  const addRoot = (root) => {
1757
- const normalized = import_node_path7.default.resolve(root);
2010
+ const normalized = import_node_path8.default.resolve(root);
1758
2011
  if (!uniqueRoots.includes(normalized)) {
1759
2012
  uniqueRoots.push(normalized);
1760
2013
  }
1761
2014
  };
1762
- let currentDir = import_node_path7.default.dirname(evalPath);
2015
+ let currentDir = import_node_path8.default.dirname(evalPath);
1763
2016
  let reachedBoundary = false;
1764
2017
  while (!reachedBoundary) {
1765
2018
  addRoot(currentDir);
1766
- const parentDir = import_node_path7.default.dirname(currentDir);
2019
+ const parentDir = import_node_path8.default.dirname(currentDir);
1767
2020
  if (currentDir === repoRoot || parentDir === currentDir) {
1768
2021
  reachedBoundary = true;
1769
2022
  } else {
@@ -1781,16 +2034,16 @@ function trimLeadingSeparators2(value) {
1781
2034
  async function resolveFileReference2(rawValue, searchRoots) {
1782
2035
  const displayPath = trimLeadingSeparators2(rawValue);
1783
2036
  const potentialPaths = [];
1784
- if (import_node_path7.default.isAbsolute(rawValue)) {
1785
- potentialPaths.push(import_node_path7.default.normalize(rawValue));
2037
+ if (import_node_path8.default.isAbsolute(rawValue)) {
2038
+ potentialPaths.push(import_node_path8.default.normalize(rawValue));
1786
2039
  }
1787
2040
  for (const base of searchRoots) {
1788
- potentialPaths.push(import_node_path7.default.resolve(base, displayPath));
2041
+ potentialPaths.push(import_node_path8.default.resolve(base, displayPath));
1789
2042
  }
1790
2043
  const attempted = [];
1791
2044
  const seen = /* @__PURE__ */ new Set();
1792
2045
  for (const candidate of potentialPaths) {
1793
- const absoluteCandidate = import_node_path7.default.resolve(candidate);
2046
+ const absoluteCandidate = import_node_path8.default.resolve(candidate);
1794
2047
  if (seen.has(absoluteCandidate)) {
1795
2048
  continue;
1796
2049
  }
@@ -2140,9 +2393,9 @@ async function withRetry(fn, retryConfig, signal) {
2140
2393
  var import_node_child_process = require("child_process");
2141
2394
  var import_node_crypto = require("crypto");
2142
2395
  var import_node_fs3 = require("fs");
2143
- var import_promises8 = require("fs/promises");
2396
+ var import_promises9 = require("fs/promises");
2144
2397
  var import_node_os = require("os");
2145
- var import_node_path9 = __toESM(require("path"), 1);
2398
+ var import_node_path10 = __toESM(require("path"), 1);
2146
2399
 
2147
2400
  // src/evaluation/providers/claude-code-log-tracker.ts
2148
2401
  var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeCodeLogs");
@@ -2198,7 +2451,7 @@ function subscribeToClaudeCodeLogEntries(listener) {
2198
2451
  }
2199
2452
 
2200
2453
  // src/evaluation/providers/preread.ts
2201
- var import_node_path8 = __toESM(require("path"), 1);
2454
+ var import_node_path9 = __toESM(require("path"), 1);
2202
2455
  function buildPromptDocument(request, inputFiles, options) {
2203
2456
  const parts = [];
2204
2457
  const guidelineFiles = collectGuidelineFiles(
@@ -2221,7 +2474,7 @@ function normalizeInputFiles(inputFiles) {
2221
2474
  }
2222
2475
  const deduped = /* @__PURE__ */ new Map();
2223
2476
  for (const inputFile of inputFiles) {
2224
- const absolutePath = import_node_path8.default.resolve(inputFile);
2477
+ const absolutePath = import_node_path9.default.resolve(inputFile);
2225
2478
  if (!deduped.has(absolutePath)) {
2226
2479
  deduped.set(absolutePath, absolutePath);
2227
2480
  }
@@ -2234,14 +2487,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
2234
2487
  }
2235
2488
  const unique = /* @__PURE__ */ new Map();
2236
2489
  for (const inputFile of inputFiles) {
2237
- const absolutePath = import_node_path8.default.resolve(inputFile);
2490
+ const absolutePath = import_node_path9.default.resolve(inputFile);
2238
2491
  if (overrides?.has(absolutePath)) {
2239
2492
  if (!unique.has(absolutePath)) {
2240
2493
  unique.set(absolutePath, absolutePath);
2241
2494
  }
2242
2495
  continue;
2243
2496
  }
2244
- const normalized = absolutePath.split(import_node_path8.default.sep).join("/");
2497
+ const normalized = absolutePath.split(import_node_path9.default.sep).join("/");
2245
2498
  if (isGuidelineFile(normalized, guidelinePatterns)) {
2246
2499
  if (!unique.has(absolutePath)) {
2247
2500
  unique.set(absolutePath, absolutePath);
@@ -2256,7 +2509,7 @@ function collectInputFiles(inputFiles) {
2256
2509
  }
2257
2510
  const unique = /* @__PURE__ */ new Map();
2258
2511
  for (const inputFile of inputFiles) {
2259
- const absolutePath = import_node_path8.default.resolve(inputFile);
2512
+ const absolutePath = import_node_path9.default.resolve(inputFile);
2260
2513
  if (!unique.has(absolutePath)) {
2261
2514
  unique.set(absolutePath, absolutePath);
2262
2515
  }
@@ -2268,7 +2521,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
2268
2521
  return "";
2269
2522
  }
2270
2523
  const buildList = (files) => files.map((absolutePath) => {
2271
- const fileName = import_node_path8.default.basename(absolutePath);
2524
+ const fileName = import_node_path9.default.basename(absolutePath);
2272
2525
  const fileUri = pathToFileUri(absolutePath);
2273
2526
  return `* [${fileName}](${fileUri})`;
2274
2527
  });
@@ -2288,7 +2541,7 @@ ${buildList(inputFiles).join("\n")}.`);
2288
2541
  return sections.join("\n");
2289
2542
  }
2290
2543
  function pathToFileUri(filePath) {
2291
- const absolutePath = import_node_path8.default.isAbsolute(filePath) ? filePath : import_node_path8.default.resolve(filePath);
2544
+ const absolutePath = import_node_path9.default.isAbsolute(filePath) ? filePath : import_node_path9.default.resolve(filePath);
2292
2545
  const normalizedPath = absolutePath.replace(/\\/g, "/");
2293
2546
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
2294
2547
  return `file:///${normalizedPath}`;
@@ -2325,8 +2578,8 @@ var ClaudeCodeProvider = class {
2325
2578
  const workspaceRoot = await this.createWorkspace();
2326
2579
  const logger = await this.createStreamLogger(request).catch(() => void 0);
2327
2580
  try {
2328
- const promptFile = import_node_path9.default.join(workspaceRoot, PROMPT_FILENAME);
2329
- await (0, import_promises8.writeFile)(promptFile, request.question, "utf8");
2581
+ const promptFile = import_node_path10.default.join(workspaceRoot, PROMPT_FILENAME);
2582
+ await (0, import_promises9.writeFile)(promptFile, request.question, "utf8");
2330
2583
  const args = this.buildClaudeCodeArgs(request.question, inputFiles);
2331
2584
  const cwd = this.resolveCwd();
2332
2585
  const result = await this.executeClaudeCode(args, cwd, request.signal, logger);
@@ -2373,7 +2626,7 @@ var ClaudeCodeProvider = class {
2373
2626
  if (!this.config.cwd) {
2374
2627
  return process.cwd();
2375
2628
  }
2376
- return import_node_path9.default.resolve(this.config.cwd);
2629
+ return import_node_path10.default.resolve(this.config.cwd);
2377
2630
  }
2378
2631
  buildClaudeCodeArgs(prompt, inputFiles) {
2379
2632
  const args = [];
@@ -2430,11 +2683,11 @@ ${filesContext}`;
2430
2683
  }
2431
2684
  }
2432
2685
  async createWorkspace() {
2433
- return await (0, import_promises8.mkdtemp)(import_node_path9.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
2686
+ return await (0, import_promises9.mkdtemp)(import_node_path10.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
2434
2687
  }
2435
2688
  async cleanupWorkspace(workspaceRoot) {
2436
2689
  try {
2437
- await (0, import_promises8.rm)(workspaceRoot, { recursive: true, force: true });
2690
+ await (0, import_promises9.rm)(workspaceRoot, { recursive: true, force: true });
2438
2691
  } catch {
2439
2692
  }
2440
2693
  }
@@ -2444,9 +2697,9 @@ ${filesContext}`;
2444
2697
  return void 0;
2445
2698
  }
2446
2699
  if (this.config.logDir) {
2447
- return import_node_path9.default.resolve(this.config.logDir);
2700
+ return import_node_path10.default.resolve(this.config.logDir);
2448
2701
  }
2449
- return import_node_path9.default.join(process.cwd(), ".agentv", "logs", "claude-code");
2702
+ return import_node_path10.default.join(process.cwd(), ".agentv", "logs", "claude-code");
2450
2703
  }
2451
2704
  async createStreamLogger(request) {
2452
2705
  const logDir = this.resolveLogDirectory();
@@ -2454,13 +2707,13 @@ ${filesContext}`;
2454
2707
  return void 0;
2455
2708
  }
2456
2709
  try {
2457
- await (0, import_promises8.mkdir)(logDir, { recursive: true });
2710
+ await (0, import_promises9.mkdir)(logDir, { recursive: true });
2458
2711
  } catch (error) {
2459
2712
  const message = error instanceof Error ? error.message : String(error);
2460
2713
  console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
2461
2714
  return void 0;
2462
2715
  }
2463
- const filePath = import_node_path9.default.join(logDir, buildLogFilename(request, this.targetName));
2716
+ const filePath = import_node_path10.default.join(logDir, buildLogFilename(request, this.targetName));
2464
2717
  try {
2465
2718
  const logger = await ClaudeCodeStreamLogger.create({
2466
2719
  filePath,
@@ -2865,16 +3118,16 @@ function escapeShellArg(arg) {
2865
3118
  }
2866
3119
  async function defaultClaudeCodeRunner(options) {
2867
3120
  const tempId = (0, import_node_crypto.randomUUID)();
2868
- const stdoutFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stdout`);
2869
- const stderrFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stderr`);
2870
- const exitFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-exit`);
2871
- const pidFile = import_node_path9.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-pid`);
3121
+ const stdoutFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stdout`);
3122
+ const stderrFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-stderr`);
3123
+ const exitFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-exit`);
3124
+ const pidFile = import_node_path10.default.join((0, import_node_os.tmpdir)(), `agentv-cc-${tempId}-pid`);
2872
3125
  try {
2873
3126
  return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
2874
3127
  } finally {
2875
3128
  for (const file of [stdoutFile, stderrFile, exitFile, pidFile]) {
2876
3129
  try {
2877
- await (0, import_promises8.rm)(file, { force: true });
3130
+ await (0, import_promises9.rm)(file, { force: true });
2878
3131
  } catch {
2879
3132
  }
2880
3133
  }
@@ -2908,8 +3161,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2908
3161
  let lastStdoutSize = 0;
2909
3162
  const readFileIfExists = async (filePath) => {
2910
3163
  try {
2911
- const { readFile: readFile8 } = await import("fs/promises");
2912
- return await readFile8(filePath, "utf8");
3164
+ const { readFile: readFile9 } = await import("fs/promises");
3165
+ return await readFile9(filePath, "utf8");
2913
3166
  } catch {
2914
3167
  return "";
2915
3168
  }
@@ -2982,9 +3235,9 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2982
3235
 
2983
3236
  // src/evaluation/providers/cli.ts
2984
3237
  var import_node_child_process2 = require("child_process");
2985
- var import_promises9 = __toESM(require("fs/promises"), 1);
3238
+ var import_promises10 = __toESM(require("fs/promises"), 1);
2986
3239
  var import_node_os2 = __toESM(require("os"), 1);
2987
- var import_node_path10 = __toESM(require("path"), 1);
3240
+ var import_node_path11 = __toESM(require("path"), 1);
2988
3241
  var import_node_util = require("util");
2989
3242
  var import_zod = require("zod");
2990
3243
  var ToolCallSchema = import_zod.z.object({
@@ -3353,7 +3606,7 @@ var CliProvider = class {
3353
3606
  throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
3354
3607
  } finally {
3355
3608
  if (!this.keepTempFiles) {
3356
- await import_promises9.default.unlink(filePath).catch(() => {
3609
+ await import_promises10.default.unlink(filePath).catch(() => {
3357
3610
  });
3358
3611
  }
3359
3612
  }
@@ -3441,7 +3694,7 @@ function normalizeInputFiles2(inputFiles) {
3441
3694
  }
3442
3695
  const unique = /* @__PURE__ */ new Map();
3443
3696
  for (const inputFile of inputFiles) {
3444
- const absolutePath = import_node_path10.default.resolve(inputFile);
3697
+ const absolutePath = import_node_path11.default.resolve(inputFile);
3445
3698
  if (!unique.has(absolutePath)) {
3446
3699
  unique.set(absolutePath, absolutePath);
3447
3700
  }
@@ -3455,7 +3708,7 @@ function formatFileList(files, template) {
3455
3708
  const formatter = template ?? "{path}";
3456
3709
  return files.map((filePath) => {
3457
3710
  const escapedPath = shellEscape(filePath);
3458
- const escapedName = shellEscape(import_node_path10.default.basename(filePath));
3711
+ const escapedName = shellEscape(import_node_path11.default.basename(filePath));
3459
3712
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
3460
3713
  }).join(" ");
3461
3714
  }
@@ -3479,7 +3732,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
3479
3732
  const safeEvalId = evalCaseId || "unknown";
3480
3733
  const timestamp = Date.now();
3481
3734
  const random = Math.random().toString(36).substring(2, 9);
3482
- return import_node_path10.default.join(import_node_os2.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3735
+ return import_node_path11.default.join(import_node_os2.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3483
3736
  }
3484
3737
  function formatTimeoutSuffix2(timeoutMs) {
3485
3738
  if (!timeoutMs || timeoutMs <= 0) {
@@ -3493,9 +3746,9 @@ function formatTimeoutSuffix2(timeoutMs) {
3493
3746
  var import_node_child_process3 = require("child_process");
3494
3747
  var import_node_crypto2 = require("crypto");
3495
3748
  var import_node_fs4 = require("fs");
3496
- var import_promises10 = require("fs/promises");
3749
+ var import_promises11 = require("fs/promises");
3497
3750
  var import_node_os3 = require("os");
3498
- var import_node_path11 = __toESM(require("path"), 1);
3751
+ var import_node_path12 = __toESM(require("path"), 1);
3499
3752
  var import_node_util2 = require("util");
3500
3753
 
3501
3754
  // src/evaluation/providers/codex-log-tracker.ts
@@ -3590,8 +3843,8 @@ var CodexProvider = class {
3590
3843
  const promptContent = `${systemPrompt}
3591
3844
 
3592
3845
  ${basePrompt}`;
3593
- const promptFile = import_node_path11.default.join(workspaceRoot, PROMPT_FILENAME2);
3594
- await (0, import_promises10.writeFile)(promptFile, promptContent, "utf8");
3846
+ const promptFile = import_node_path12.default.join(workspaceRoot, PROMPT_FILENAME2);
3847
+ await (0, import_promises11.writeFile)(promptFile, promptContent, "utf8");
3595
3848
  const args = this.buildCodexArgs();
3596
3849
  const cwd = this.resolveCwd(workspaceRoot);
3597
3850
  const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
@@ -3640,7 +3893,7 @@ ${basePrompt}`;
3640
3893
  if (!this.config.cwd) {
3641
3894
  return workspaceRoot;
3642
3895
  }
3643
- return import_node_path11.default.resolve(this.config.cwd);
3896
+ return import_node_path12.default.resolve(this.config.cwd);
3644
3897
  }
3645
3898
  buildCodexArgs() {
3646
3899
  const args = [
@@ -3682,11 +3935,11 @@ ${basePrompt}`;
3682
3935
  }
3683
3936
  }
3684
3937
  async createWorkspace() {
3685
- return await (0, import_promises10.mkdtemp)(import_node_path11.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
3938
+ return await (0, import_promises11.mkdtemp)(import_node_path12.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX2));
3686
3939
  }
3687
3940
  async cleanupWorkspace(workspaceRoot) {
3688
3941
  try {
3689
- await (0, import_promises10.rm)(workspaceRoot, { recursive: true, force: true });
3942
+ await (0, import_promises11.rm)(workspaceRoot, { recursive: true, force: true });
3690
3943
  } catch {
3691
3944
  }
3692
3945
  }
@@ -3696,9 +3949,9 @@ ${basePrompt}`;
3696
3949
  return void 0;
3697
3950
  }
3698
3951
  if (this.config.logDir) {
3699
- return import_node_path11.default.resolve(this.config.logDir);
3952
+ return import_node_path12.default.resolve(this.config.logDir);
3700
3953
  }
3701
- return import_node_path11.default.join(process.cwd(), ".agentv", "logs", "codex");
3954
+ return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "codex");
3702
3955
  }
3703
3956
  async createStreamLogger(request) {
3704
3957
  const logDir = this.resolveLogDirectory();
@@ -3706,13 +3959,13 @@ ${basePrompt}`;
3706
3959
  return void 0;
3707
3960
  }
3708
3961
  try {
3709
- await (0, import_promises10.mkdir)(logDir, { recursive: true });
3962
+ await (0, import_promises11.mkdir)(logDir, { recursive: true });
3710
3963
  } catch (error) {
3711
3964
  const message = error instanceof Error ? error.message : String(error);
3712
3965
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
3713
3966
  return void 0;
3714
3967
  }
3715
- const filePath = import_node_path11.default.join(logDir, buildLogFilename2(request, this.targetName));
3968
+ const filePath = import_node_path12.default.join(logDir, buildLogFilename2(request, this.targetName));
3716
3969
  try {
3717
3970
  const logger = await CodexStreamLogger.create({
3718
3971
  filePath,
@@ -3927,9 +4180,9 @@ function tryParseJsonValue2(rawLine) {
3927
4180
  async function locateExecutable(candidate) {
3928
4181
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
3929
4182
  if (includesPathSeparator) {
3930
- const resolved = import_node_path11.default.isAbsolute(candidate) ? candidate : import_node_path11.default.resolve(candidate);
4183
+ const resolved = import_node_path12.default.isAbsolute(candidate) ? candidate : import_node_path12.default.resolve(candidate);
3931
4184
  const executablePath = await ensureWindowsExecutableVariant(resolved);
3932
- await (0, import_promises10.access)(executablePath, import_node_fs4.constants.F_OK);
4185
+ await (0, import_promises11.access)(executablePath, import_node_fs4.constants.F_OK);
3933
4186
  return executablePath;
3934
4187
  }
3935
4188
  const locator = process.platform === "win32" ? "where" : "which";
@@ -3939,7 +4192,7 @@ async function locateExecutable(candidate) {
3939
4192
  const preferred = selectExecutableCandidate(lines);
3940
4193
  if (preferred) {
3941
4194
  const executablePath = await ensureWindowsExecutableVariant(preferred);
3942
- await (0, import_promises10.access)(executablePath, import_node_fs4.constants.F_OK);
4195
+ await (0, import_promises11.access)(executablePath, import_node_fs4.constants.F_OK);
3943
4196
  return executablePath;
3944
4197
  }
3945
4198
  } catch {
@@ -3973,7 +4226,7 @@ async function ensureWindowsExecutableVariant(candidate) {
3973
4226
  for (const ext of extensions) {
3974
4227
  const withExtension = `${candidate}${ext}`;
3975
4228
  try {
3976
- await (0, import_promises10.access)(withExtension, import_node_fs4.constants.F_OK);
4229
+ await (0, import_promises11.access)(withExtension, import_node_fs4.constants.F_OK);
3977
4230
  return withExtension;
3978
4231
  } catch {
3979
4232
  }
@@ -4438,9 +4691,9 @@ function extractToolCalls2(content) {
4438
4691
  var import_node_child_process4 = require("child_process");
4439
4692
  var import_node_crypto3 = require("crypto");
4440
4693
  var import_node_fs5 = require("fs");
4441
- var import_promises11 = require("fs/promises");
4694
+ var import_promises12 = require("fs/promises");
4442
4695
  var import_node_os4 = require("os");
4443
- var import_node_path12 = __toESM(require("path"), 1);
4696
+ var import_node_path13 = __toESM(require("path"), 1);
4444
4697
 
4445
4698
  // src/evaluation/providers/pi-log-tracker.ts
4446
4699
  var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
@@ -4524,8 +4777,8 @@ var PiCodingAgentProvider = class {
4524
4777
  const workspaceRoot = await this.createWorkspace();
4525
4778
  const logger = await this.createStreamLogger(request).catch(() => void 0);
4526
4779
  try {
4527
- const promptFile = import_node_path12.default.join(workspaceRoot, PROMPT_FILENAME3);
4528
- await (0, import_promises11.writeFile)(promptFile, request.question, "utf8");
4780
+ const promptFile = import_node_path13.default.join(workspaceRoot, PROMPT_FILENAME3);
4781
+ await (0, import_promises12.writeFile)(promptFile, request.question, "utf8");
4529
4782
  const args = this.buildPiArgs(request.question, inputFiles);
4530
4783
  const cwd = this.resolveCwd(workspaceRoot);
4531
4784
  const result = await this.executePi(args, cwd, request.signal, logger);
@@ -4566,7 +4819,7 @@ var PiCodingAgentProvider = class {
4566
4819
  if (!this.config.cwd) {
4567
4820
  return workspaceRoot;
4568
4821
  }
4569
- return import_node_path12.default.resolve(this.config.cwd);
4822
+ return import_node_path13.default.resolve(this.config.cwd);
4570
4823
  }
4571
4824
  buildPiArgs(prompt, inputFiles) {
4572
4825
  const args = [];
@@ -4655,19 +4908,19 @@ ${prompt}`;
4655
4908
  return env;
4656
4909
  }
4657
4910
  async createWorkspace() {
4658
- return await (0, import_promises11.mkdtemp)(import_node_path12.default.join((0, import_node_os4.tmpdir)(), WORKSPACE_PREFIX3));
4911
+ return await (0, import_promises12.mkdtemp)(import_node_path13.default.join((0, import_node_os4.tmpdir)(), WORKSPACE_PREFIX3));
4659
4912
  }
4660
4913
  async cleanupWorkspace(workspaceRoot) {
4661
4914
  try {
4662
- await (0, import_promises11.rm)(workspaceRoot, { recursive: true, force: true });
4915
+ await (0, import_promises12.rm)(workspaceRoot, { recursive: true, force: true });
4663
4916
  } catch {
4664
4917
  }
4665
4918
  }
4666
4919
  resolveLogDirectory() {
4667
4920
  if (this.config.logDir) {
4668
- return import_node_path12.default.resolve(this.config.logDir);
4921
+ return import_node_path13.default.resolve(this.config.logDir);
4669
4922
  }
4670
- return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4923
+ return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4671
4924
  }
4672
4925
  async createStreamLogger(request) {
4673
4926
  const logDir = this.resolveLogDirectory();
@@ -4675,13 +4928,13 @@ ${prompt}`;
4675
4928
  return void 0;
4676
4929
  }
4677
4930
  try {
4678
- await (0, import_promises11.mkdir)(logDir, { recursive: true });
4931
+ await (0, import_promises12.mkdir)(logDir, { recursive: true });
4679
4932
  } catch (error) {
4680
4933
  const message = error instanceof Error ? error.message : String(error);
4681
4934
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
4682
4935
  return void 0;
4683
4936
  }
4684
- const filePath = import_node_path12.default.join(logDir, buildLogFilename3(request, this.targetName));
4937
+ const filePath = import_node_path13.default.join(logDir, buildLogFilename3(request, this.targetName));
4685
4938
  try {
4686
4939
  const logger = await PiStreamLogger.create({
4687
4940
  filePath,
@@ -5114,7 +5367,7 @@ async function defaultPiRunner(options) {
5114
5367
  }
5115
5368
 
5116
5369
  // src/evaluation/providers/targets.ts
5117
- var import_node_path13 = __toESM(require("path"), 1);
5370
+ var import_node_path14 = __toESM(require("path"), 1);
5118
5371
  var import_zod2 = require("zod");
5119
5372
  var CliHealthcheckHttpInputSchema = import_zod2.z.object({
5120
5373
  type: import_zod2.z.literal("http"),
@@ -5220,11 +5473,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
5220
5473
  allowLiteral: true,
5221
5474
  optionalEnv: true
5222
5475
  });
5223
- if (cwd && evalFilePath && !import_node_path13.default.isAbsolute(cwd)) {
5224
- cwd = import_node_path13.default.resolve(import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath)), cwd);
5476
+ if (cwd && evalFilePath && !import_node_path14.default.isAbsolute(cwd)) {
5477
+ cwd = import_node_path14.default.resolve(import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath)), cwd);
5225
5478
  }
5226
5479
  if (!cwd && evalFilePath) {
5227
- cwd = import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath));
5480
+ cwd = import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath));
5228
5481
  }
5229
5482
  return {
5230
5483
  type: "command",
@@ -5251,11 +5504,11 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
5251
5504
  allowLiteral: true,
5252
5505
  optionalEnv: true
5253
5506
  });
5254
- if (cwd && evalFilePath && !import_node_path13.default.isAbsolute(cwd)) {
5255
- cwd = import_node_path13.default.resolve(import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath)), cwd);
5507
+ if (cwd && evalFilePath && !import_node_path14.default.isAbsolute(cwd)) {
5508
+ cwd = import_node_path14.default.resolve(import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath)), cwd);
5256
5509
  }
5257
5510
  if (!cwd && evalFilePath) {
5258
- cwd = import_node_path13.default.dirname(import_node_path13.default.resolve(evalFilePath));
5511
+ cwd = import_node_path14.default.dirname(import_node_path14.default.resolve(evalFilePath));
5259
5512
  }
5260
5513
  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
5261
5514
  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
@@ -5760,8 +6013,8 @@ function resolveCliConfig(target, env, evalFilePath) {
5760
6013
  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
5761
6014
  if (!parseResult.success) {
5762
6015
  const firstError = parseResult.error.errors[0];
5763
- const path17 = firstError?.path.join(".") || "";
5764
- const prefix = path17 ? `${target.name} ${path17}: ` : `${target.name}: `;
6016
+ const path18 = firstError?.path.join(".") || "";
6017
+ const prefix = path18 ? `${target.name} ${path18}: ` : `${target.name}: `;
5765
6018
  throw new Error(`${prefix}${firstError?.message}`);
5766
6019
  }
5767
6020
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -5949,7 +6202,7 @@ function resolveOptionalNumberArray(source, description) {
5949
6202
  }
5950
6203
 
5951
6204
  // src/evaluation/providers/vscode.ts
5952
- var import_node_path14 = __toESM(require("path"), 1);
6205
+ var import_node_path15 = __toESM(require("path"), 1);
5953
6206
  var import_subagent = require("subagent");
5954
6207
 
5955
6208
  // src/evaluation/providers/vscode-templates.ts
@@ -6119,7 +6372,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
6119
6372
  return "";
6120
6373
  }
6121
6374
  const buildList = (files) => files.map((absolutePath) => {
6122
- const fileName = import_node_path14.default.basename(absolutePath);
6375
+ const fileName = import_node_path15.default.basename(absolutePath);
6123
6376
  const fileUri = pathToFileUri2(absolutePath);
6124
6377
  return `* [${fileName}](${fileUri})`;
6125
6378
  });
@@ -6144,8 +6397,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
6144
6397
  }
6145
6398
  const unique = /* @__PURE__ */ new Map();
6146
6399
  for (const attachment of attachments) {
6147
- const absolutePath = import_node_path14.default.resolve(attachment);
6148
- const normalized = absolutePath.split(import_node_path14.default.sep).join("/");
6400
+ const absolutePath = import_node_path15.default.resolve(attachment);
6401
+ const normalized = absolutePath.split(import_node_path15.default.sep).join("/");
6149
6402
  if (isGuidelineFile(normalized, guidelinePatterns)) {
6150
6403
  if (!unique.has(absolutePath)) {
6151
6404
  unique.set(absolutePath, absolutePath);
@@ -6160,7 +6413,7 @@ function collectAttachmentFiles(attachments) {
6160
6413
  }
6161
6414
  const unique = /* @__PURE__ */ new Map();
6162
6415
  for (const attachment of attachments) {
6163
- const absolutePath = import_node_path14.default.resolve(attachment);
6416
+ const absolutePath = import_node_path15.default.resolve(attachment);
6164
6417
  if (!unique.has(absolutePath)) {
6165
6418
  unique.set(absolutePath, absolutePath);
6166
6419
  }
@@ -6168,7 +6421,7 @@ function collectAttachmentFiles(attachments) {
6168
6421
  return Array.from(unique.values());
6169
6422
  }
6170
6423
  function pathToFileUri2(filePath) {
6171
- const absolutePath = import_node_path14.default.isAbsolute(filePath) ? filePath : import_node_path14.default.resolve(filePath);
6424
+ const absolutePath = import_node_path15.default.isAbsolute(filePath) ? filePath : import_node_path15.default.resolve(filePath);
6172
6425
  const normalizedPath = absolutePath.replace(/\\/g, "/");
6173
6426
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
6174
6427
  return `file:///${normalizedPath}`;
@@ -6181,7 +6434,7 @@ function normalizeAttachments(attachments) {
6181
6434
  }
6182
6435
  const deduped = /* @__PURE__ */ new Set();
6183
6436
  for (const attachment of attachments) {
6184
- deduped.add(import_node_path14.default.resolve(attachment));
6437
+ deduped.add(import_node_path15.default.resolve(attachment));
6185
6438
  }
6186
6439
  return Array.from(deduped);
6187
6440
  }
@@ -6190,7 +6443,7 @@ function mergeAttachments(all) {
6190
6443
  for (const list of all) {
6191
6444
  if (!list) continue;
6192
6445
  for (const inputFile of list) {
6193
- deduped.add(import_node_path14.default.resolve(inputFile));
6446
+ deduped.add(import_node_path15.default.resolve(inputFile));
6194
6447
  }
6195
6448
  }
6196
6449
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -6238,9 +6491,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
6238
6491
 
6239
6492
  // src/evaluation/providers/targets-file.ts
6240
6493
  var import_node_fs6 = require("fs");
6241
- var import_promises12 = require("fs/promises");
6242
- var import_node_path15 = __toESM(require("path"), 1);
6243
- var import_yaml3 = require("yaml");
6494
+ var import_promises13 = require("fs/promises");
6495
+ var import_node_path16 = __toESM(require("path"), 1);
6496
+ var import_yaml4 = require("yaml");
6244
6497
  function isRecord(value) {
6245
6498
  return typeof value === "object" && value !== null && !Array.isArray(value);
6246
6499
  }
@@ -6269,19 +6522,19 @@ function assertTargetDefinition(value, index, filePath) {
6269
6522
  }
6270
6523
  async function fileExists3(filePath) {
6271
6524
  try {
6272
- await (0, import_promises12.access)(filePath, import_node_fs6.constants.F_OK);
6525
+ await (0, import_promises13.access)(filePath, import_node_fs6.constants.F_OK);
6273
6526
  return true;
6274
6527
  } catch {
6275
6528
  return false;
6276
6529
  }
6277
6530
  }
6278
6531
  async function readTargetDefinitions(filePath) {
6279
- const absolutePath = import_node_path15.default.resolve(filePath);
6532
+ const absolutePath = import_node_path16.default.resolve(filePath);
6280
6533
  if (!await fileExists3(absolutePath)) {
6281
6534
  throw new Error(`targets.yaml not found at ${absolutePath}`);
6282
6535
  }
6283
- const raw = await (0, import_promises12.readFile)(absolutePath, "utf8");
6284
- const parsed = (0, import_yaml3.parse)(raw);
6536
+ const raw = await (0, import_promises13.readFile)(absolutePath, "utf8");
6537
+ const parsed = (0, import_yaml4.parse)(raw);
6285
6538
  if (!isRecord(parsed)) {
6286
6539
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
6287
6540
  }
@@ -6487,15 +6740,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
6487
6740
  });
6488
6741
  }
6489
6742
  async function execShellWithStdin(command, stdinPayload, options = {}) {
6490
- const { mkdir: mkdir4, readFile: readFile8, rm: rm4, writeFile: writeFile4 } = await import("fs/promises");
6743
+ const { mkdir: mkdir4, readFile: readFile9, rm: rm4, writeFile: writeFile4 } = await import("fs/promises");
6491
6744
  const { tmpdir: tmpdir4 } = await import("os");
6492
- const path17 = await import("path");
6745
+ const path18 = await import("path");
6493
6746
  const { randomUUID: randomUUID4 } = await import("crypto");
6494
- const dir = path17.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
6747
+ const dir = path18.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
6495
6748
  await mkdir4(dir, { recursive: true });
6496
- const stdinPath = path17.join(dir, "stdin.txt");
6497
- const stdoutPath = path17.join(dir, "stdout.txt");
6498
- const stderrPath = path17.join(dir, "stderr.txt");
6749
+ const stdinPath = path18.join(dir, "stdin.txt");
6750
+ const stdoutPath = path18.join(dir, "stdout.txt");
6751
+ const stderrPath = path18.join(dir, "stderr.txt");
6499
6752
  await writeFile4(stdinPath, stdinPayload, "utf8");
6500
6753
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
6501
6754
  const { spawn: spawn4 } = await import("child_process");
@@ -6525,8 +6778,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
6525
6778
  resolve(code ?? 0);
6526
6779
  });
6527
6780
  });
6528
- const stdout = (await readFile8(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
6529
- const stderr = (await readFile8(stderrPath, "utf8")).replace(/\r\n/g, "\n");
6781
+ const stdout = (await readFile9(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
6782
+ const stderr = (await readFile9(stderrPath, "utf8")).replace(/\r\n/g, "\n");
6530
6783
  return { stdout, stderr, exitCode };
6531
6784
  } finally {
6532
6785
  await rm4(dir, { recursive: true, force: true });
@@ -6798,7 +7051,7 @@ var CodeEvaluator = class {
6798
7051
  outputMessages: context.outputMessages ?? null,
6799
7052
  guidelineFiles: context.evalCase.guideline_paths,
6800
7053
  inputFiles: context.evalCase.file_paths.filter(
6801
- (path17) => !context.evalCase.guideline_paths.includes(path17)
7054
+ (path18) => !context.evalCase.guideline_paths.includes(path18)
6802
7055
  ),
6803
7056
  inputMessages: context.evalCase.input_messages,
6804
7057
  traceSummary: context.traceSummary ?? null,
@@ -7584,115 +7837,115 @@ var FieldAccuracyEvaluator = class {
7584
7837
  * Evaluate a single field against the expected value.
7585
7838
  */
7586
7839
  evaluateField(fieldConfig, candidateData, expectedData) {
7587
- const { path: path17, match, required = true, weight = 1 } = fieldConfig;
7588
- const candidateValue = resolvePath(candidateData, path17);
7589
- const expectedValue = resolvePath(expectedData, path17);
7840
+ const { path: path18, match, required = true, weight = 1 } = fieldConfig;
7841
+ const candidateValue = resolvePath(candidateData, path18);
7842
+ const expectedValue = resolvePath(expectedData, path18);
7590
7843
  if (expectedValue === void 0) {
7591
7844
  return {
7592
- path: path17,
7845
+ path: path18,
7593
7846
  score: 1,
7594
7847
  // No expected value means no comparison needed
7595
7848
  weight,
7596
7849
  hit: true,
7597
- message: `${path17}: no expected value`
7850
+ message: `${path18}: no expected value`
7598
7851
  };
7599
7852
  }
7600
7853
  if (candidateValue === void 0) {
7601
7854
  if (required) {
7602
7855
  return {
7603
- path: path17,
7856
+ path: path18,
7604
7857
  score: 0,
7605
7858
  weight,
7606
7859
  hit: false,
7607
- message: `${path17} (required, missing)`
7860
+ message: `${path18} (required, missing)`
7608
7861
  };
7609
7862
  }
7610
7863
  return {
7611
- path: path17,
7864
+ path: path18,
7612
7865
  score: 1,
7613
7866
  // Don't penalize missing optional fields
7614
7867
  weight: 0,
7615
7868
  // Zero weight means it won't affect the score
7616
7869
  hit: true,
7617
- message: `${path17}: optional field missing`
7870
+ message: `${path18}: optional field missing`
7618
7871
  };
7619
7872
  }
7620
7873
  switch (match) {
7621
7874
  case "exact":
7622
- return this.compareExact(path17, candidateValue, expectedValue, weight);
7875
+ return this.compareExact(path18, candidateValue, expectedValue, weight);
7623
7876
  case "numeric_tolerance":
7624
7877
  return this.compareNumericTolerance(
7625
- path17,
7878
+ path18,
7626
7879
  candidateValue,
7627
7880
  expectedValue,
7628
7881
  fieldConfig,
7629
7882
  weight
7630
7883
  );
7631
7884
  case "date":
7632
- return this.compareDate(path17, candidateValue, expectedValue, fieldConfig, weight);
7885
+ return this.compareDate(path18, candidateValue, expectedValue, fieldConfig, weight);
7633
7886
  default:
7634
7887
  return {
7635
- path: path17,
7888
+ path: path18,
7636
7889
  score: 0,
7637
7890
  weight,
7638
7891
  hit: false,
7639
- message: `${path17}: unknown match type "${match}"`
7892
+ message: `${path18}: unknown match type "${match}"`
7640
7893
  };
7641
7894
  }
7642
7895
  }
7643
7896
  /**
7644
7897
  * Exact equality comparison.
7645
7898
  */
7646
- compareExact(path17, candidateValue, expectedValue, weight) {
7899
+ compareExact(path18, candidateValue, expectedValue, weight) {
7647
7900
  if (deepEqual(candidateValue, expectedValue)) {
7648
7901
  return {
7649
- path: path17,
7902
+ path: path18,
7650
7903
  score: 1,
7651
7904
  weight,
7652
7905
  hit: true,
7653
- message: path17
7906
+ message: path18
7654
7907
  };
7655
7908
  }
7656
7909
  if (typeof candidateValue !== typeof expectedValue) {
7657
7910
  return {
7658
- path: path17,
7911
+ path: path18,
7659
7912
  score: 0,
7660
7913
  weight,
7661
7914
  hit: false,
7662
- message: `${path17} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
7915
+ message: `${path18} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
7663
7916
  };
7664
7917
  }
7665
7918
  return {
7666
- path: path17,
7919
+ path: path18,
7667
7920
  score: 0,
7668
7921
  weight,
7669
7922
  hit: false,
7670
- message: `${path17} (value mismatch)`
7923
+ message: `${path18} (value mismatch)`
7671
7924
  };
7672
7925
  }
7673
7926
  /**
7674
7927
  * Numeric comparison with absolute or relative tolerance.
7675
7928
  */
7676
- compareNumericTolerance(path17, candidateValue, expectedValue, fieldConfig, weight) {
7929
+ compareNumericTolerance(path18, candidateValue, expectedValue, fieldConfig, weight) {
7677
7930
  const { tolerance = 0, relative = false } = fieldConfig;
7678
7931
  const candidateNum = toNumber(candidateValue);
7679
7932
  const expectedNum = toNumber(expectedValue);
7680
7933
  if (candidateNum === null || expectedNum === null) {
7681
7934
  return {
7682
- path: path17,
7935
+ path: path18,
7683
7936
  score: 0,
7684
7937
  weight,
7685
7938
  hit: false,
7686
- message: `${path17} (non-numeric value)`
7939
+ message: `${path18} (non-numeric value)`
7687
7940
  };
7688
7941
  }
7689
7942
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
7690
7943
  return {
7691
- path: path17,
7944
+ path: path18,
7692
7945
  score: 0,
7693
7946
  weight,
7694
7947
  hit: false,
7695
- message: `${path17} (invalid numeric value)`
7948
+ message: `${path18} (invalid numeric value)`
7696
7949
  };
7697
7950
  }
7698
7951
  const diff = Math.abs(candidateNum - expectedNum);
@@ -7705,61 +7958,61 @@ var FieldAccuracyEvaluator = class {
7705
7958
  }
7706
7959
  if (withinTolerance) {
7707
7960
  return {
7708
- path: path17,
7961
+ path: path18,
7709
7962
  score: 1,
7710
7963
  weight,
7711
7964
  hit: true,
7712
- message: `${path17} (within tolerance: diff=${diff.toFixed(2)})`
7965
+ message: `${path18} (within tolerance: diff=${diff.toFixed(2)})`
7713
7966
  };
7714
7967
  }
7715
7968
  return {
7716
- path: path17,
7969
+ path: path18,
7717
7970
  score: 0,
7718
7971
  weight,
7719
7972
  hit: false,
7720
- message: `${path17} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
7973
+ message: `${path18} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
7721
7974
  };
7722
7975
  }
7723
7976
  /**
7724
7977
  * Date comparison with format normalization.
7725
7978
  */
7726
- compareDate(path17, candidateValue, expectedValue, fieldConfig, weight) {
7979
+ compareDate(path18, candidateValue, expectedValue, fieldConfig, weight) {
7727
7980
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
7728
7981
  const candidateDate = parseDate(String(candidateValue), formats);
7729
7982
  const expectedDate = parseDate(String(expectedValue), formats);
7730
7983
  if (candidateDate === null) {
7731
7984
  return {
7732
- path: path17,
7985
+ path: path18,
7733
7986
  score: 0,
7734
7987
  weight,
7735
7988
  hit: false,
7736
- message: `${path17} (unparseable candidate date)`
7989
+ message: `${path18} (unparseable candidate date)`
7737
7990
  };
7738
7991
  }
7739
7992
  if (expectedDate === null) {
7740
7993
  return {
7741
- path: path17,
7994
+ path: path18,
7742
7995
  score: 0,
7743
7996
  weight,
7744
7997
  hit: false,
7745
- message: `${path17} (unparseable expected date)`
7998
+ message: `${path18} (unparseable expected date)`
7746
7999
  };
7747
8000
  }
7748
8001
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
7749
8002
  return {
7750
- path: path17,
8003
+ path: path18,
7751
8004
  score: 1,
7752
8005
  weight,
7753
8006
  hit: true,
7754
- message: path17
8007
+ message: path18
7755
8008
  };
7756
8009
  }
7757
8010
  return {
7758
- path: path17,
8011
+ path: path18,
7759
8012
  score: 0,
7760
8013
  weight,
7761
8014
  hit: false,
7762
- message: `${path17} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
8015
+ message: `${path18} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
7763
8016
  };
7764
8017
  }
7765
8018
  /**
@@ -7799,11 +8052,11 @@ var FieldAccuracyEvaluator = class {
7799
8052
  };
7800
8053
  }
7801
8054
  };
7802
- function resolvePath(obj, path17) {
7803
- if (!path17 || !obj) {
8055
+ function resolvePath(obj, path18) {
8056
+ if (!path18 || !obj) {
7804
8057
  return void 0;
7805
8058
  }
7806
- const parts = path17.split(/\.|\[|\]/).filter((p) => p.length > 0);
8059
+ const parts = path18.split(/\.|\[|\]/).filter((p) => p.length > 0);
7807
8060
  let current = obj;
7808
8061
  for (const part of parts) {
7809
8062
  if (current === null || current === void 0) {
@@ -8239,7 +8492,7 @@ var ToolTrajectoryEvaluator = class {
8239
8492
 
8240
8493
  // src/evaluation/orchestrator.ts
8241
8494
  var import_node_crypto5 = require("crypto");
8242
- var import_node_path16 = __toESM(require("path"), 1);
8495
+ var import_node_path17 = __toESM(require("path"), 1);
8243
8496
 
8244
8497
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
8245
8498
  var Node = class {
@@ -9038,7 +9291,7 @@ async function runEvaluatorList(options) {
9038
9291
  });
9039
9292
  }
9040
9293
  if (evaluator.type === "composite") {
9041
- const evalFileDir = evalCase.guideline_paths[0] ? import_node_path16.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
9294
+ const evalFileDir = evalCase.guideline_paths[0] ? import_node_path17.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
9042
9295
  const createEvaluator = (memberConfig) => {
9043
9296
  switch (memberConfig.type) {
9044
9297
  case "llm_judge":
@@ -9613,6 +9866,7 @@ function createAgentKernel() {
9613
9866
  createAgentKernel,
9614
9867
  createProvider,
9615
9868
  deepEqual,
9869
+ detectFormat,
9616
9870
  ensureVSCodeSubagents,
9617
9871
  executeScript,
9618
9872
  explorationRatio,