@agentv/core 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -146,8 +146,8 @@ function mergeExecutionMetrics(summary, metrics) {
146
146
  }
147
147
 
148
148
  // src/evaluation/yaml-parser.ts
149
- import { readFile as readFile5 } from "node:fs/promises";
150
- import path6 from "node:path";
149
+ import { readFile as readFile6 } from "node:fs/promises";
150
+ import path7 from "node:path";
151
151
  import { parse as parse2 } from "yaml";
152
152
 
153
153
  // src/evaluation/loaders/config-loader.ts
@@ -926,6 +926,11 @@ function isValidFieldAggregationType(value) {
926
926
  return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
927
927
  }
928
928
 
929
+ // src/evaluation/loaders/jsonl-parser.ts
930
+ import { readFile as readFile4 } from "node:fs/promises";
931
+ import path5 from "node:path";
932
+ import { parse as parseYaml } from "yaml";
933
+
929
934
  // src/evaluation/loaders/message-processor.ts
930
935
  import { readFile as readFile3 } from "node:fs/promises";
931
936
  import path4 from "node:path";
@@ -1186,28 +1191,271 @@ async function processExpectedMessages(options) {
1186
1191
  return segments;
1187
1192
  }
1188
1193
 
1189
- // src/evaluation/formatting/prompt-builder.ts
1190
- import { readFile as readFile4 } from "node:fs/promises";
1191
- import path5 from "node:path";
1194
+ // src/evaluation/loaders/jsonl-parser.ts
1192
1195
  var ANSI_YELLOW5 = "\x1B[33m";
1196
+ var ANSI_RED = "\x1B[31m";
1193
1197
  var ANSI_RESET5 = "\x1B[0m";
1198
+ function detectFormat(filePath) {
1199
+ const ext = path5.extname(filePath).toLowerCase();
1200
+ if (ext === ".jsonl") return "jsonl";
1201
+ if (ext === ".yaml" || ext === ".yml") return "yaml";
1202
+ throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
1203
+ }
1204
+ async function loadSidecarMetadata(jsonlPath, verbose) {
1205
+ const dir = path5.dirname(jsonlPath);
1206
+ const base = path5.basename(jsonlPath, ".jsonl");
1207
+ const sidecarPath = path5.join(dir, `${base}.yaml`);
1208
+ if (!await fileExists2(sidecarPath)) {
1209
+ if (verbose) {
1210
+ logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
1211
+ }
1212
+ return {};
1213
+ }
1214
+ try {
1215
+ const content = await readFile4(sidecarPath, "utf8");
1216
+ const parsed = parseYaml(content);
1217
+ if (!isJsonObject(parsed)) {
1218
+ logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
1219
+ return {};
1220
+ }
1221
+ return {
1222
+ description: asString4(parsed.description),
1223
+ dataset: asString4(parsed.dataset),
1224
+ execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
1225
+ evaluator: parsed.evaluator
1226
+ };
1227
+ } catch (error) {
1228
+ logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error.message}`);
1229
+ return {};
1230
+ }
1231
+ }
1232
+ function parseJsonlContent(content, filePath) {
1233
+ const lines = content.split("\n");
1234
+ const cases = [];
1235
+ for (let i = 0; i < lines.length; i++) {
1236
+ const line = lines[i].trim();
1237
+ if (line === "") continue;
1238
+ try {
1239
+ const parsed = JSON.parse(line);
1240
+ if (!isJsonObject(parsed)) {
1241
+ throw new Error("Expected JSON object");
1242
+ }
1243
+ cases.push(parsed);
1244
+ } catch (error) {
1245
+ const message = error instanceof Error ? error.message : String(error);
1246
+ throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
1247
+ File: ${filePath}`);
1248
+ }
1249
+ }
1250
+ return cases;
1251
+ }
1252
+ async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
1253
+ const verbose = options?.verbose ?? false;
1254
+ const evalIdFilter = options?.evalId;
1255
+ const absoluteTestPath = path5.resolve(evalFilePath);
1256
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
1257
+ const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
1258
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
1259
+ const guidelinePatterns = config?.guideline_patterns;
1260
+ const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
1261
+ const rawFile = await readFile4(absoluteTestPath, "utf8");
1262
+ const rawCases = parseJsonlContent(rawFile, evalFilePath);
1263
+ const fallbackDataset = path5.basename(absoluteTestPath, ".jsonl") || "eval";
1264
+ const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
1265
+ const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
1266
+ const globalExecution = sidecar.execution;
1267
+ if (verbose) {
1268
+ console.log(`
1269
+ [JSONL Dataset: ${evalFilePath}]`);
1270
+ console.log(` Cases: ${rawCases.length}`);
1271
+ console.log(` Dataset name: ${datasetName}`);
1272
+ if (sidecar.description) {
1273
+ console.log(` Description: ${sidecar.description}`);
1274
+ }
1275
+ }
1276
+ const results = [];
1277
+ for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
1278
+ const evalcase = rawCases[lineIndex];
1279
+ const lineNumber = lineIndex + 1;
1280
+ const id = asString4(evalcase.id);
1281
+ if (evalIdFilter && id !== evalIdFilter) {
1282
+ continue;
1283
+ }
1284
+ const conversationId = asString4(evalcase.conversation_id);
1285
+ const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
1286
+ const inputMessagesValue = evalcase.input_messages;
1287
+ const expectedMessagesValue = evalcase.expected_messages;
1288
+ if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1289
+ logError(
1290
+ `Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages`
1291
+ );
1292
+ continue;
1293
+ }
1294
+ const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
1295
+ const inputMessages = inputMessagesValue.filter(
1296
+ (msg) => isTestMessage(msg)
1297
+ );
1298
+ const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1299
+ if (hasExpectedMessages && expectedMessages.length === 0) {
1300
+ logError(`Line ${lineNumber}: No valid expected message found for eval case: ${id}`);
1301
+ continue;
1302
+ }
1303
+ const guidelinePaths = [];
1304
+ const inputTextParts = [];
1305
+ const inputSegments = await processMessages({
1306
+ messages: inputMessages,
1307
+ searchRoots,
1308
+ repoRootPath,
1309
+ guidelinePatterns,
1310
+ guidelinePaths,
1311
+ textParts: inputTextParts,
1312
+ messageType: "input",
1313
+ verbose
1314
+ });
1315
+ const outputSegments = hasExpectedMessages ? await processExpectedMessages({
1316
+ messages: expectedMessages,
1317
+ searchRoots,
1318
+ repoRootPath,
1319
+ verbose
1320
+ }) : [];
1321
+ let referenceAnswer = "";
1322
+ if (outputSegments.length > 0) {
1323
+ const lastMessage = outputSegments[outputSegments.length - 1];
1324
+ const content = lastMessage.content;
1325
+ const toolCalls = lastMessage.tool_calls;
1326
+ if (typeof content === "string") {
1327
+ referenceAnswer = content;
1328
+ } else if (content !== void 0 && content !== null) {
1329
+ referenceAnswer = JSON.stringify(content, null, 2);
1330
+ } else if (toolCalls !== void 0 && toolCalls !== null) {
1331
+ referenceAnswer = JSON.stringify(toolCalls, null, 2);
1332
+ }
1333
+ }
1334
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
1335
+ const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
1336
+ const mergedExecution = caseExecution ?? globalExecution;
1337
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
1338
+ let evaluators;
1339
+ try {
1340
+ evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
1341
+ } catch (error) {
1342
+ const message = error instanceof Error ? error.message : String(error);
1343
+ logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
1344
+ continue;
1345
+ }
1346
+ const inlineRubrics = evalcase.rubrics;
1347
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
1348
+ const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
1349
+ if (typeof rubric === "string") {
1350
+ return {
1351
+ id: `rubric-${index + 1}`,
1352
+ description: rubric,
1353
+ weight: 1,
1354
+ required: true
1355
+ };
1356
+ }
1357
+ return {
1358
+ id: asString4(rubric.id) ?? `rubric-${index + 1}`,
1359
+ description: asString4(rubric.description) ?? "",
1360
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1361
+ required: typeof rubric.required === "boolean" ? rubric.required : true
1362
+ };
1363
+ }).filter((r) => r.description.length > 0);
1364
+ if (rubricItems.length > 0) {
1365
+ const rubricEvaluator = {
1366
+ name: "rubric",
1367
+ type: "llm_judge",
1368
+ rubrics: rubricItems
1369
+ };
1370
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
1371
+ }
1372
+ }
1373
+ const userFilePaths = [];
1374
+ for (const segment of inputSegments) {
1375
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
1376
+ userFilePaths.push(segment.resolvedPath);
1377
+ }
1378
+ }
1379
+ const allFilePaths = [
1380
+ ...guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
1381
+ ...userFilePaths
1382
+ ];
1383
+ const testCase = {
1384
+ id,
1385
+ dataset: datasetName,
1386
+ conversation_id: conversationId,
1387
+ question,
1388
+ input_messages: inputMessages,
1389
+ input_segments: inputSegments,
1390
+ expected_messages: outputSegments,
1391
+ reference_answer: referenceAnswer,
1392
+ guideline_paths: guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
1393
+ guideline_patterns: guidelinePatterns,
1394
+ file_paths: allFilePaths,
1395
+ expected_outcome: outcome,
1396
+ evaluator: evalCaseEvaluatorKind,
1397
+ evaluators
1398
+ };
1399
+ if (verbose) {
1400
+ console.log(`
1401
+ [Eval Case: ${id}]`);
1402
+ if (testCase.guideline_paths.length > 0) {
1403
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
1404
+ for (const guidelinePath of testCase.guideline_paths) {
1405
+ console.log(` - ${guidelinePath}`);
1406
+ }
1407
+ } else {
1408
+ console.log(" No guidelines found");
1409
+ }
1410
+ }
1411
+ results.push(testCase);
1412
+ }
1413
+ return results;
1414
+ }
1415
+ function asString4(value) {
1416
+ return typeof value === "string" ? value : void 0;
1417
+ }
1418
+ function logWarning4(message, details) {
1419
+ if (details && details.length > 0) {
1420
+ const detailBlock = details.join("\n");
1421
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
1422
+ ${detailBlock}${ANSI_RESET5}`);
1423
+ } else {
1424
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1425
+ }
1426
+ }
1427
+ function logError(message, details) {
1428
+ if (details && details.length > 0) {
1429
+ const detailBlock = details.join("\n");
1430
+ console.error(`${ANSI_RED}Error: ${message}
1431
+ ${detailBlock}${ANSI_RESET5}`);
1432
+ } else {
1433
+ console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
1434
+ }
1435
+ }
1436
+
1437
+ // src/evaluation/formatting/prompt-builder.ts
1438
+ import { readFile as readFile5 } from "node:fs/promises";
1439
+ import path6 from "node:path";
1440
+ var ANSI_YELLOW6 = "\x1B[33m";
1441
+ var ANSI_RESET6 = "\x1B[0m";
1194
1442
  async function buildPromptInputs(testCase, mode = "lm") {
1195
1443
  const guidelineParts = [];
1196
1444
  for (const rawPath of testCase.guideline_paths) {
1197
- const absolutePath = path5.resolve(rawPath);
1445
+ const absolutePath = path6.resolve(rawPath);
1198
1446
  if (!await fileExists2(absolutePath)) {
1199
- logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
1447
+ logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
1200
1448
  continue;
1201
1449
  }
1202
1450
  try {
1203
- const content = (await readFile4(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1451
+ const content = (await readFile5(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1204
1452
  guidelineParts.push({
1205
1453
  content,
1206
1454
  isFile: true,
1207
- displayPath: path5.basename(absolutePath)
1455
+ displayPath: path6.basename(absolutePath)
1208
1456
  });
1209
1457
  } catch (error) {
1210
- logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
1458
+ logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
1211
1459
  }
1212
1460
  }
1213
1461
  const guidelines = formatFileContents(guidelineParts);
@@ -1231,9 +1479,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
1231
1479
  messageSegments.push({ type: "text", value: segment });
1232
1480
  }
1233
1481
  } else if (isJsonObject(segment)) {
1234
- const type = asString4(segment.type);
1482
+ const type = asString5(segment.type);
1235
1483
  if (type === "file") {
1236
- const value = asString4(segment.value);
1484
+ const value = asString5(segment.value);
1237
1485
  if (!value) continue;
1238
1486
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
1239
1487
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -1244,7 +1492,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
1244
1492
  messageSegments.push({ type: "file", text: fileText, path: value });
1245
1493
  }
1246
1494
  } else if (type === "text") {
1247
- const textValue = asString4(segment.value);
1495
+ const textValue = asString5(segment.value);
1248
1496
  if (textValue && textValue.trim().length > 0) {
1249
1497
  messageSegments.push({ type: "text", value: textValue });
1250
1498
  }
@@ -1398,21 +1646,21 @@ ${guidelineContent.trim()}`);
1398
1646
  }
1399
1647
  return chatPrompt.length > 0 ? chatPrompt : void 0;
1400
1648
  }
1401
- function asString4(value) {
1649
+ function asString5(value) {
1402
1650
  return typeof value === "string" ? value : void 0;
1403
1651
  }
1404
- function logWarning4(message) {
1405
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1652
+ function logWarning5(message) {
1653
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1406
1654
  }
1407
1655
 
1408
1656
  // src/evaluation/yaml-parser.ts
1409
- var ANSI_YELLOW6 = "\x1B[33m";
1410
- var ANSI_RED = "\x1B[31m";
1411
- var ANSI_RESET6 = "\x1B[0m";
1657
+ var ANSI_YELLOW7 = "\x1B[33m";
1658
+ var ANSI_RED2 = "\x1B[31m";
1659
+ var ANSI_RESET7 = "\x1B[0m";
1412
1660
  async function readTestSuiteMetadata(testFilePath) {
1413
1661
  try {
1414
- const absolutePath = path6.resolve(testFilePath);
1415
- const content = await readFile5(absolutePath, "utf8");
1662
+ const absolutePath = path7.resolve(testFilePath);
1663
+ const content = await readFile6(absolutePath, "utf8");
1416
1664
  const parsed = parse2(content);
1417
1665
  if (!isJsonObject(parsed)) {
1418
1666
  return {};
@@ -1423,21 +1671,25 @@ async function readTestSuiteMetadata(testFilePath) {
1423
1671
  }
1424
1672
  }
1425
1673
  async function loadEvalCases(evalFilePath, repoRoot, options) {
1674
+ const format = detectFormat(evalFilePath);
1675
+ if (format === "jsonl") {
1676
+ return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
1677
+ }
1426
1678
  const verbose = options?.verbose ?? false;
1427
1679
  const evalIdFilter = options?.evalId;
1428
- const absoluteTestPath = path6.resolve(evalFilePath);
1680
+ const absoluteTestPath = path7.resolve(evalFilePath);
1429
1681
  const repoRootPath = resolveToAbsolutePath(repoRoot);
1430
1682
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
1431
1683
  const config = await loadConfig(absoluteTestPath, repoRootPath);
1432
1684
  const guidelinePatterns = config?.guideline_patterns;
1433
- const rawFile = await readFile5(absoluteTestPath, "utf8");
1685
+ const rawFile = await readFile6(absoluteTestPath, "utf8");
1434
1686
  const parsed = parse2(rawFile);
1435
1687
  if (!isJsonObject(parsed)) {
1436
1688
  throw new Error(`Invalid test file format: ${evalFilePath}`);
1437
1689
  }
1438
1690
  const suite = parsed;
1439
- const datasetNameFromSuite = asString5(suite.dataset)?.trim();
1440
- const fallbackDataset = path6.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1691
+ const datasetNameFromSuite = asString6(suite.dataset)?.trim();
1692
+ const fallbackDataset = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1441
1693
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
1442
1694
  const rawTestcases = suite.evalcases;
1443
1695
  if (!Array.isArray(rawTestcases)) {
@@ -1445,24 +1697,24 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1445
1697
  }
1446
1698
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
1447
1699
  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
1448
- const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
1700
+ const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
1449
1701
  const results = [];
1450
1702
  for (const rawEvalcase of rawTestcases) {
1451
1703
  if (!isJsonObject(rawEvalcase)) {
1452
- logWarning5("Skipping invalid eval case entry (expected object)");
1704
+ logWarning6("Skipping invalid eval case entry (expected object)");
1453
1705
  continue;
1454
1706
  }
1455
1707
  const evalcase = rawEvalcase;
1456
- const id = asString5(evalcase.id);
1708
+ const id = asString6(evalcase.id);
1457
1709
  if (evalIdFilter && id !== evalIdFilter) {
1458
1710
  continue;
1459
1711
  }
1460
- const conversationId = asString5(evalcase.conversation_id);
1461
- const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
1712
+ const conversationId = asString6(evalcase.conversation_id);
1713
+ const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
1462
1714
  const inputMessagesValue = evalcase.input_messages;
1463
1715
  const expectedMessagesValue = evalcase.expected_messages;
1464
1716
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1465
- logError(
1717
+ logError2(
1466
1718
  `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
1467
1719
  );
1468
1720
  continue;
@@ -1473,7 +1725,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1473
1725
  );
1474
1726
  const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1475
1727
  if (hasExpectedMessages && expectedMessages.length === 0) {
1476
- logError(`No valid expected message found for eval case: ${id}`);
1728
+ logError2(`No valid expected message found for eval case: ${id}`);
1477
1729
  continue;
1478
1730
  }
1479
1731
  const guidelinePaths = [];
@@ -1514,7 +1766,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1514
1766
  evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
1515
1767
  } catch (error) {
1516
1768
  const message = error instanceof Error ? error.message : String(error);
1517
- logError(`Skipping eval case '${id}': ${message}`);
1769
+ logError2(`Skipping eval case '${id}': ${message}`);
1518
1770
  continue;
1519
1771
  }
1520
1772
  const inlineRubrics = evalcase.rubrics;
@@ -1529,8 +1781,8 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1529
1781
  };
1530
1782
  }
1531
1783
  return {
1532
- id: asString5(rubric.id) ?? `rubric-${index + 1}`,
1533
- description: asString5(rubric.description) ?? "",
1784
+ id: asString6(rubric.id) ?? `rubric-${index + 1}`,
1785
+ description: asString6(rubric.description) ?? "",
1534
1786
  weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1535
1787
  required: typeof rubric.required === "boolean" ? rubric.required : true
1536
1788
  };
@@ -1551,7 +1803,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1551
1803
  }
1552
1804
  }
1553
1805
  const allFilePaths = [
1554
- ...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
1806
+ ...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
1555
1807
  ...userFilePaths
1556
1808
  ];
1557
1809
  const testCase = {
@@ -1563,7 +1815,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1563
1815
  input_segments: inputSegments,
1564
1816
  expected_messages: outputSegments,
1565
1817
  reference_answer: referenceAnswer,
1566
- guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
1818
+ guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
1567
1819
  guideline_patterns: guidelinePatterns,
1568
1820
  file_paths: allFilePaths,
1569
1821
  expected_outcome: outcome,
@@ -1586,25 +1838,25 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1586
1838
  }
1587
1839
  return results;
1588
1840
  }
1589
- function asString5(value) {
1841
+ function asString6(value) {
1590
1842
  return typeof value === "string" ? value : void 0;
1591
1843
  }
1592
- function logWarning5(message, details) {
1844
+ function logWarning6(message, details) {
1593
1845
  if (details && details.length > 0) {
1594
1846
  const detailBlock = details.join("\n");
1595
- console.warn(`${ANSI_YELLOW6}Warning: ${message}
1596
- ${detailBlock}${ANSI_RESET6}`);
1847
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}
1848
+ ${detailBlock}${ANSI_RESET7}`);
1597
1849
  } else {
1598
- console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1850
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
1599
1851
  }
1600
1852
  }
1601
- function logError(message, details) {
1853
+ function logError2(message, details) {
1602
1854
  if (details && details.length > 0) {
1603
1855
  const detailBlock = details.join("\n");
1604
- console.error(`${ANSI_RED}Error: ${message}
1605
- ${detailBlock}${ANSI_RESET6}`);
1856
+ console.error(`${ANSI_RED2}Error: ${message}
1857
+ ${detailBlock}${ANSI_RESET7}`);
1606
1858
  } else {
1607
- console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
1859
+ console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
1608
1860
  }
1609
1861
  }
1610
1862
 
@@ -1947,7 +2199,7 @@ import { randomUUID } from "node:crypto";
1947
2199
  import { createWriteStream } from "node:fs";
1948
2200
  import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
1949
2201
  import { tmpdir } from "node:os";
1950
- import path8 from "node:path";
2202
+ import path9 from "node:path";
1951
2203
 
1952
2204
  // src/evaluation/providers/claude-code-log-tracker.ts
1953
2205
  var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeCodeLogs");
@@ -2003,7 +2255,7 @@ function subscribeToClaudeCodeLogEntries(listener) {
2003
2255
  }
2004
2256
 
2005
2257
  // src/evaluation/providers/preread.ts
2006
- import path7 from "node:path";
2258
+ import path8 from "node:path";
2007
2259
  function buildPromptDocument(request, inputFiles, options) {
2008
2260
  const parts = [];
2009
2261
  const guidelineFiles = collectGuidelineFiles(
@@ -2026,7 +2278,7 @@ function normalizeInputFiles(inputFiles) {
2026
2278
  }
2027
2279
  const deduped = /* @__PURE__ */ new Map();
2028
2280
  for (const inputFile of inputFiles) {
2029
- const absolutePath = path7.resolve(inputFile);
2281
+ const absolutePath = path8.resolve(inputFile);
2030
2282
  if (!deduped.has(absolutePath)) {
2031
2283
  deduped.set(absolutePath, absolutePath);
2032
2284
  }
@@ -2039,14 +2291,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
2039
2291
  }
2040
2292
  const unique = /* @__PURE__ */ new Map();
2041
2293
  for (const inputFile of inputFiles) {
2042
- const absolutePath = path7.resolve(inputFile);
2294
+ const absolutePath = path8.resolve(inputFile);
2043
2295
  if (overrides?.has(absolutePath)) {
2044
2296
  if (!unique.has(absolutePath)) {
2045
2297
  unique.set(absolutePath, absolutePath);
2046
2298
  }
2047
2299
  continue;
2048
2300
  }
2049
- const normalized = absolutePath.split(path7.sep).join("/");
2301
+ const normalized = absolutePath.split(path8.sep).join("/");
2050
2302
  if (isGuidelineFile(normalized, guidelinePatterns)) {
2051
2303
  if (!unique.has(absolutePath)) {
2052
2304
  unique.set(absolutePath, absolutePath);
@@ -2061,7 +2313,7 @@ function collectInputFiles(inputFiles) {
2061
2313
  }
2062
2314
  const unique = /* @__PURE__ */ new Map();
2063
2315
  for (const inputFile of inputFiles) {
2064
- const absolutePath = path7.resolve(inputFile);
2316
+ const absolutePath = path8.resolve(inputFile);
2065
2317
  if (!unique.has(absolutePath)) {
2066
2318
  unique.set(absolutePath, absolutePath);
2067
2319
  }
@@ -2073,7 +2325,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
2073
2325
  return "";
2074
2326
  }
2075
2327
  const buildList = (files) => files.map((absolutePath) => {
2076
- const fileName = path7.basename(absolutePath);
2328
+ const fileName = path8.basename(absolutePath);
2077
2329
  const fileUri = pathToFileUri(absolutePath);
2078
2330
  return `* [${fileName}](${fileUri})`;
2079
2331
  });
@@ -2093,7 +2345,7 @@ ${buildList(inputFiles).join("\n")}.`);
2093
2345
  return sections.join("\n");
2094
2346
  }
2095
2347
  function pathToFileUri(filePath) {
2096
- const absolutePath = path7.isAbsolute(filePath) ? filePath : path7.resolve(filePath);
2348
+ const absolutePath = path8.isAbsolute(filePath) ? filePath : path8.resolve(filePath);
2097
2349
  const normalizedPath = absolutePath.replace(/\\/g, "/");
2098
2350
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
2099
2351
  return `file:///${normalizedPath}`;
@@ -2130,7 +2382,7 @@ var ClaudeCodeProvider = class {
2130
2382
  const workspaceRoot = await this.createWorkspace();
2131
2383
  const logger = await this.createStreamLogger(request).catch(() => void 0);
2132
2384
  try {
2133
- const promptFile = path8.join(workspaceRoot, PROMPT_FILENAME);
2385
+ const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
2134
2386
  await writeFile(promptFile, request.question, "utf8");
2135
2387
  const args = this.buildClaudeCodeArgs(request.question, inputFiles);
2136
2388
  const cwd = this.resolveCwd();
@@ -2178,7 +2430,7 @@ var ClaudeCodeProvider = class {
2178
2430
  if (!this.config.cwd) {
2179
2431
  return process.cwd();
2180
2432
  }
2181
- return path8.resolve(this.config.cwd);
2433
+ return path9.resolve(this.config.cwd);
2182
2434
  }
2183
2435
  buildClaudeCodeArgs(prompt, inputFiles) {
2184
2436
  const args = [];
@@ -2235,7 +2487,7 @@ ${filesContext}`;
2235
2487
  }
2236
2488
  }
2237
2489
  async createWorkspace() {
2238
- return await mkdtemp(path8.join(tmpdir(), WORKSPACE_PREFIX));
2490
+ return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
2239
2491
  }
2240
2492
  async cleanupWorkspace(workspaceRoot) {
2241
2493
  try {
@@ -2249,9 +2501,9 @@ ${filesContext}`;
2249
2501
  return void 0;
2250
2502
  }
2251
2503
  if (this.config.logDir) {
2252
- return path8.resolve(this.config.logDir);
2504
+ return path9.resolve(this.config.logDir);
2253
2505
  }
2254
- return path8.join(process.cwd(), ".agentv", "logs", "claude-code");
2506
+ return path9.join(process.cwd(), ".agentv", "logs", "claude-code");
2255
2507
  }
2256
2508
  async createStreamLogger(request) {
2257
2509
  const logDir = this.resolveLogDirectory();
@@ -2265,7 +2517,7 @@ ${filesContext}`;
2265
2517
  console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
2266
2518
  return void 0;
2267
2519
  }
2268
- const filePath = path8.join(logDir, buildLogFilename(request, this.targetName));
2520
+ const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
2269
2521
  try {
2270
2522
  const logger = await ClaudeCodeStreamLogger.create({
2271
2523
  filePath,
@@ -2670,10 +2922,10 @@ function escapeShellArg(arg) {
2670
2922
  }
2671
2923
  async function defaultClaudeCodeRunner(options) {
2672
2924
  const tempId = randomUUID();
2673
- const stdoutFile = path8.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
2674
- const stderrFile = path8.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
2675
- const exitFile = path8.join(tmpdir(), `agentv-cc-${tempId}-exit`);
2676
- const pidFile = path8.join(tmpdir(), `agentv-cc-${tempId}-pid`);
2925
+ const stdoutFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
2926
+ const stderrFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
2927
+ const exitFile = path9.join(tmpdir(), `agentv-cc-${tempId}-exit`);
2928
+ const pidFile = path9.join(tmpdir(), `agentv-cc-${tempId}-pid`);
2677
2929
  try {
2678
2930
  return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
2679
2931
  } finally {
@@ -2713,8 +2965,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2713
2965
  let lastStdoutSize = 0;
2714
2966
  const readFileIfExists = async (filePath) => {
2715
2967
  try {
2716
- const { readFile: readFile7 } = await import("node:fs/promises");
2717
- return await readFile7(filePath, "utf8");
2968
+ const { readFile: readFile8 } = await import("node:fs/promises");
2969
+ return await readFile8(filePath, "utf8");
2718
2970
  } catch {
2719
2971
  return "";
2720
2972
  }
@@ -2789,7 +3041,7 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2789
3041
  import { exec as execWithCallback } from "node:child_process";
2790
3042
  import fs from "node:fs/promises";
2791
3043
  import os from "node:os";
2792
- import path9 from "node:path";
3044
+ import path10 from "node:path";
2793
3045
  import { promisify } from "node:util";
2794
3046
  import { z } from "zod";
2795
3047
  var ToolCallSchema = z.object({
@@ -3246,7 +3498,7 @@ function normalizeInputFiles2(inputFiles) {
3246
3498
  }
3247
3499
  const unique = /* @__PURE__ */ new Map();
3248
3500
  for (const inputFile of inputFiles) {
3249
- const absolutePath = path9.resolve(inputFile);
3501
+ const absolutePath = path10.resolve(inputFile);
3250
3502
  if (!unique.has(absolutePath)) {
3251
3503
  unique.set(absolutePath, absolutePath);
3252
3504
  }
@@ -3260,7 +3512,7 @@ function formatFileList(files, template) {
3260
3512
  const formatter = template ?? "{path}";
3261
3513
  return files.map((filePath) => {
3262
3514
  const escapedPath = shellEscape(filePath);
3263
- const escapedName = shellEscape(path9.basename(filePath));
3515
+ const escapedName = shellEscape(path10.basename(filePath));
3264
3516
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
3265
3517
  }).join(" ");
3266
3518
  }
@@ -3284,7 +3536,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
3284
3536
  const safeEvalId = evalCaseId || "unknown";
3285
3537
  const timestamp = Date.now();
3286
3538
  const random = Math.random().toString(36).substring(2, 9);
3287
- return path9.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3539
+ return path10.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3288
3540
  }
3289
3541
  function formatTimeoutSuffix2(timeoutMs) {
3290
3542
  if (!timeoutMs || timeoutMs <= 0) {
@@ -3300,7 +3552,7 @@ import { randomUUID as randomUUID2 } from "node:crypto";
3300
3552
  import { constants as constants2, createWriteStream as createWriteStream2 } from "node:fs";
3301
3553
  import { access as access2, mkdir as mkdir2, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile2 } from "node:fs/promises";
3302
3554
  import { tmpdir as tmpdir2 } from "node:os";
3303
- import path10 from "node:path";
3555
+ import path11 from "node:path";
3304
3556
  import { promisify as promisify2 } from "node:util";
3305
3557
 
3306
3558
  // src/evaluation/providers/codex-log-tracker.ts
@@ -3395,7 +3647,7 @@ var CodexProvider = class {
3395
3647
  const promptContent = `${systemPrompt}
3396
3648
 
3397
3649
  ${basePrompt}`;
3398
- const promptFile = path10.join(workspaceRoot, PROMPT_FILENAME2);
3650
+ const promptFile = path11.join(workspaceRoot, PROMPT_FILENAME2);
3399
3651
  await writeFile2(promptFile, promptContent, "utf8");
3400
3652
  const args = this.buildCodexArgs();
3401
3653
  const cwd = this.resolveCwd(workspaceRoot);
@@ -3445,7 +3697,7 @@ ${basePrompt}`;
3445
3697
  if (!this.config.cwd) {
3446
3698
  return workspaceRoot;
3447
3699
  }
3448
- return path10.resolve(this.config.cwd);
3700
+ return path11.resolve(this.config.cwd);
3449
3701
  }
3450
3702
  buildCodexArgs() {
3451
3703
  const args = [
@@ -3487,7 +3739,7 @@ ${basePrompt}`;
3487
3739
  }
3488
3740
  }
3489
3741
  async createWorkspace() {
3490
- return await mkdtemp2(path10.join(tmpdir2(), WORKSPACE_PREFIX2));
3742
+ return await mkdtemp2(path11.join(tmpdir2(), WORKSPACE_PREFIX2));
3491
3743
  }
3492
3744
  async cleanupWorkspace(workspaceRoot) {
3493
3745
  try {
@@ -3501,9 +3753,9 @@ ${basePrompt}`;
3501
3753
  return void 0;
3502
3754
  }
3503
3755
  if (this.config.logDir) {
3504
- return path10.resolve(this.config.logDir);
3756
+ return path11.resolve(this.config.logDir);
3505
3757
  }
3506
- return path10.join(process.cwd(), ".agentv", "logs", "codex");
3758
+ return path11.join(process.cwd(), ".agentv", "logs", "codex");
3507
3759
  }
3508
3760
  async createStreamLogger(request) {
3509
3761
  const logDir = this.resolveLogDirectory();
@@ -3517,7 +3769,7 @@ ${basePrompt}`;
3517
3769
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
3518
3770
  return void 0;
3519
3771
  }
3520
- const filePath = path10.join(logDir, buildLogFilename2(request, this.targetName));
3772
+ const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
3521
3773
  try {
3522
3774
  const logger = await CodexStreamLogger.create({
3523
3775
  filePath,
@@ -3732,7 +3984,7 @@ function tryParseJsonValue2(rawLine) {
3732
3984
  async function locateExecutable(candidate) {
3733
3985
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
3734
3986
  if (includesPathSeparator) {
3735
- const resolved = path10.isAbsolute(candidate) ? candidate : path10.resolve(candidate);
3987
+ const resolved = path11.isAbsolute(candidate) ? candidate : path11.resolve(candidate);
3736
3988
  const executablePath = await ensureWindowsExecutableVariant(resolved);
3737
3989
  await access2(executablePath, constants2.F_OK);
3738
3990
  return executablePath;
@@ -4245,7 +4497,7 @@ import { randomUUID as randomUUID3 } from "node:crypto";
4245
4497
  import { createWriteStream as createWriteStream3 } from "node:fs";
4246
4498
  import { mkdir as mkdir3, mkdtemp as mkdtemp3, rm as rm3, writeFile as writeFile3 } from "node:fs/promises";
4247
4499
  import { tmpdir as tmpdir3 } from "node:os";
4248
- import path11 from "node:path";
4500
+ import path12 from "node:path";
4249
4501
 
4250
4502
  // src/evaluation/providers/pi-log-tracker.ts
4251
4503
  var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
@@ -4329,7 +4581,7 @@ var PiCodingAgentProvider = class {
4329
4581
  const workspaceRoot = await this.createWorkspace();
4330
4582
  const logger = await this.createStreamLogger(request).catch(() => void 0);
4331
4583
  try {
4332
- const promptFile = path11.join(workspaceRoot, PROMPT_FILENAME3);
4584
+ const promptFile = path12.join(workspaceRoot, PROMPT_FILENAME3);
4333
4585
  await writeFile3(promptFile, request.question, "utf8");
4334
4586
  const args = this.buildPiArgs(request.question, inputFiles);
4335
4587
  const cwd = this.resolveCwd(workspaceRoot);
@@ -4371,7 +4623,7 @@ var PiCodingAgentProvider = class {
4371
4623
  if (!this.config.cwd) {
4372
4624
  return workspaceRoot;
4373
4625
  }
4374
- return path11.resolve(this.config.cwd);
4626
+ return path12.resolve(this.config.cwd);
4375
4627
  }
4376
4628
  buildPiArgs(prompt, inputFiles) {
4377
4629
  const args = [];
@@ -4460,7 +4712,7 @@ ${prompt}`;
4460
4712
  return env;
4461
4713
  }
4462
4714
  async createWorkspace() {
4463
- return await mkdtemp3(path11.join(tmpdir3(), WORKSPACE_PREFIX3));
4715
+ return await mkdtemp3(path12.join(tmpdir3(), WORKSPACE_PREFIX3));
4464
4716
  }
4465
4717
  async cleanupWorkspace(workspaceRoot) {
4466
4718
  try {
@@ -4470,9 +4722,9 @@ ${prompt}`;
4470
4722
  }
4471
4723
  resolveLogDirectory() {
4472
4724
  if (this.config.logDir) {
4473
- return path11.resolve(this.config.logDir);
4725
+ return path12.resolve(this.config.logDir);
4474
4726
  }
4475
- return path11.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4727
+ return path12.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4476
4728
  }
4477
4729
  async createStreamLogger(request) {
4478
4730
  const logDir = this.resolveLogDirectory();
@@ -4486,7 +4738,7 @@ ${prompt}`;
4486
4738
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
4487
4739
  return void 0;
4488
4740
  }
4489
- const filePath = path11.join(logDir, buildLogFilename3(request, this.targetName));
4741
+ const filePath = path12.join(logDir, buildLogFilename3(request, this.targetName));
4490
4742
  try {
4491
4743
  const logger = await PiStreamLogger.create({
4492
4744
  filePath,
@@ -4919,7 +5171,7 @@ async function defaultPiRunner(options) {
4919
5171
  }
4920
5172
 
4921
5173
  // src/evaluation/providers/vscode.ts
4922
- import path12 from "node:path";
5174
+ import path13 from "node:path";
4923
5175
  import {
4924
5176
  dispatchAgentSession,
4925
5177
  dispatchBatchAgent,
@@ -5094,7 +5346,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
5094
5346
  return "";
5095
5347
  }
5096
5348
  const buildList = (files) => files.map((absolutePath) => {
5097
- const fileName = path12.basename(absolutePath);
5349
+ const fileName = path13.basename(absolutePath);
5098
5350
  const fileUri = pathToFileUri2(absolutePath);
5099
5351
  return `* [${fileName}](${fileUri})`;
5100
5352
  });
@@ -5119,8 +5371,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
5119
5371
  }
5120
5372
  const unique = /* @__PURE__ */ new Map();
5121
5373
  for (const attachment of attachments) {
5122
- const absolutePath = path12.resolve(attachment);
5123
- const normalized = absolutePath.split(path12.sep).join("/");
5374
+ const absolutePath = path13.resolve(attachment);
5375
+ const normalized = absolutePath.split(path13.sep).join("/");
5124
5376
  if (isGuidelineFile(normalized, guidelinePatterns)) {
5125
5377
  if (!unique.has(absolutePath)) {
5126
5378
  unique.set(absolutePath, absolutePath);
@@ -5135,7 +5387,7 @@ function collectAttachmentFiles(attachments) {
5135
5387
  }
5136
5388
  const unique = /* @__PURE__ */ new Map();
5137
5389
  for (const attachment of attachments) {
5138
- const absolutePath = path12.resolve(attachment);
5390
+ const absolutePath = path13.resolve(attachment);
5139
5391
  if (!unique.has(absolutePath)) {
5140
5392
  unique.set(absolutePath, absolutePath);
5141
5393
  }
@@ -5143,7 +5395,7 @@ function collectAttachmentFiles(attachments) {
5143
5395
  return Array.from(unique.values());
5144
5396
  }
5145
5397
  function pathToFileUri2(filePath) {
5146
- const absolutePath = path12.isAbsolute(filePath) ? filePath : path12.resolve(filePath);
5398
+ const absolutePath = path13.isAbsolute(filePath) ? filePath : path13.resolve(filePath);
5147
5399
  const normalizedPath = absolutePath.replace(/\\/g, "/");
5148
5400
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
5149
5401
  return `file:///${normalizedPath}`;
@@ -5156,7 +5408,7 @@ function normalizeAttachments(attachments) {
5156
5408
  }
5157
5409
  const deduped = /* @__PURE__ */ new Set();
5158
5410
  for (const attachment of attachments) {
5159
- deduped.add(path12.resolve(attachment));
5411
+ deduped.add(path13.resolve(attachment));
5160
5412
  }
5161
5413
  return Array.from(deduped);
5162
5414
  }
@@ -5165,7 +5417,7 @@ function mergeAttachments(all) {
5165
5417
  for (const list of all) {
5166
5418
  if (!list) continue;
5167
5419
  for (const inputFile of list) {
5168
- deduped.add(path12.resolve(inputFile));
5420
+ deduped.add(path13.resolve(inputFile));
5169
5421
  }
5170
5422
  }
5171
5423
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -5213,8 +5465,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
5213
5465
 
5214
5466
  // src/evaluation/providers/targets-file.ts
5215
5467
  import { constants as constants3 } from "node:fs";
5216
- import { access as access3, readFile as readFile6 } from "node:fs/promises";
5217
- import path13 from "node:path";
5468
+ import { access as access3, readFile as readFile7 } from "node:fs/promises";
5469
+ import path14 from "node:path";
5218
5470
  import { parse as parse3 } from "yaml";
5219
5471
  function isRecord(value) {
5220
5472
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -5251,11 +5503,11 @@ async function fileExists3(filePath) {
5251
5503
  }
5252
5504
  }
5253
5505
  async function readTargetDefinitions(filePath) {
5254
- const absolutePath = path13.resolve(filePath);
5506
+ const absolutePath = path14.resolve(filePath);
5255
5507
  if (!await fileExists3(absolutePath)) {
5256
5508
  throw new Error(`targets.yaml not found at ${absolutePath}`);
5257
5509
  }
5258
- const raw = await readFile6(absolutePath, "utf8");
5510
+ const raw = await readFile7(absolutePath, "utf8");
5259
5511
  const parsed = parse3(raw);
5260
5512
  if (!isRecord(parsed)) {
5261
5513
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -5462,15 +5714,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
5462
5714
  });
5463
5715
  }
5464
5716
  async function execShellWithStdin(command, stdinPayload, options = {}) {
5465
- const { mkdir: mkdir4, readFile: readFile7, rm: rm4, writeFile: writeFile4 } = await import("node:fs/promises");
5717
+ const { mkdir: mkdir4, readFile: readFile8, rm: rm4, writeFile: writeFile4 } = await import("node:fs/promises");
5466
5718
  const { tmpdir: tmpdir4 } = await import("node:os");
5467
- const path15 = await import("node:path");
5719
+ const path16 = await import("node:path");
5468
5720
  const { randomUUID: randomUUID4 } = await import("node:crypto");
5469
- const dir = path15.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
5721
+ const dir = path16.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
5470
5722
  await mkdir4(dir, { recursive: true });
5471
- const stdinPath = path15.join(dir, "stdin.txt");
5472
- const stdoutPath = path15.join(dir, "stdout.txt");
5473
- const stderrPath = path15.join(dir, "stderr.txt");
5723
+ const stdinPath = path16.join(dir, "stdin.txt");
5724
+ const stdoutPath = path16.join(dir, "stdout.txt");
5725
+ const stderrPath = path16.join(dir, "stderr.txt");
5474
5726
  await writeFile4(stdinPath, stdinPayload, "utf8");
5475
5727
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
5476
5728
  const { spawn: spawn4 } = await import("node:child_process");
@@ -5500,8 +5752,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
5500
5752
  resolve(code ?? 0);
5501
5753
  });
5502
5754
  });
5503
- const stdout = (await readFile7(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
5504
- const stderr = (await readFile7(stderrPath, "utf8")).replace(/\r\n/g, "\n");
5755
+ const stdout = (await readFile8(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
5756
+ const stderr = (await readFile8(stderrPath, "utf8")).replace(/\r\n/g, "\n");
5505
5757
  return { stdout, stderr, exitCode };
5506
5758
  } finally {
5507
5759
  await rm4(dir, { recursive: true, force: true });
@@ -5773,7 +6025,7 @@ var CodeEvaluator = class {
5773
6025
  outputMessages: context.outputMessages ?? null,
5774
6026
  guidelineFiles: context.evalCase.guideline_paths,
5775
6027
  inputFiles: context.evalCase.file_paths.filter(
5776
- (path15) => !context.evalCase.guideline_paths.includes(path15)
6028
+ (path16) => !context.evalCase.guideline_paths.includes(path16)
5777
6029
  ),
5778
6030
  inputMessages: context.evalCase.input_messages,
5779
6031
  traceSummary: context.traceSummary ?? null,
@@ -6532,115 +6784,115 @@ var FieldAccuracyEvaluator = class {
6532
6784
  * Evaluate a single field against the expected value.
6533
6785
  */
6534
6786
  evaluateField(fieldConfig, candidateData, expectedData) {
6535
- const { path: path15, match, required = true, weight = 1 } = fieldConfig;
6536
- const candidateValue = resolvePath(candidateData, path15);
6537
- const expectedValue = resolvePath(expectedData, path15);
6787
+ const { path: path16, match, required = true, weight = 1 } = fieldConfig;
6788
+ const candidateValue = resolvePath(candidateData, path16);
6789
+ const expectedValue = resolvePath(expectedData, path16);
6538
6790
  if (expectedValue === void 0) {
6539
6791
  return {
6540
- path: path15,
6792
+ path: path16,
6541
6793
  score: 1,
6542
6794
  // No expected value means no comparison needed
6543
6795
  weight,
6544
6796
  hit: true,
6545
- message: `${path15}: no expected value`
6797
+ message: `${path16}: no expected value`
6546
6798
  };
6547
6799
  }
6548
6800
  if (candidateValue === void 0) {
6549
6801
  if (required) {
6550
6802
  return {
6551
- path: path15,
6803
+ path: path16,
6552
6804
  score: 0,
6553
6805
  weight,
6554
6806
  hit: false,
6555
- message: `${path15} (required, missing)`
6807
+ message: `${path16} (required, missing)`
6556
6808
  };
6557
6809
  }
6558
6810
  return {
6559
- path: path15,
6811
+ path: path16,
6560
6812
  score: 1,
6561
6813
  // Don't penalize missing optional fields
6562
6814
  weight: 0,
6563
6815
  // Zero weight means it won't affect the score
6564
6816
  hit: true,
6565
- message: `${path15}: optional field missing`
6817
+ message: `${path16}: optional field missing`
6566
6818
  };
6567
6819
  }
6568
6820
  switch (match) {
6569
6821
  case "exact":
6570
- return this.compareExact(path15, candidateValue, expectedValue, weight);
6822
+ return this.compareExact(path16, candidateValue, expectedValue, weight);
6571
6823
  case "numeric_tolerance":
6572
6824
  return this.compareNumericTolerance(
6573
- path15,
6825
+ path16,
6574
6826
  candidateValue,
6575
6827
  expectedValue,
6576
6828
  fieldConfig,
6577
6829
  weight
6578
6830
  );
6579
6831
  case "date":
6580
- return this.compareDate(path15, candidateValue, expectedValue, fieldConfig, weight);
6832
+ return this.compareDate(path16, candidateValue, expectedValue, fieldConfig, weight);
6581
6833
  default:
6582
6834
  return {
6583
- path: path15,
6835
+ path: path16,
6584
6836
  score: 0,
6585
6837
  weight,
6586
6838
  hit: false,
6587
- message: `${path15}: unknown match type "${match}"`
6839
+ message: `${path16}: unknown match type "${match}"`
6588
6840
  };
6589
6841
  }
6590
6842
  }
6591
6843
  /**
6592
6844
  * Exact equality comparison.
6593
6845
  */
6594
- compareExact(path15, candidateValue, expectedValue, weight) {
6846
+ compareExact(path16, candidateValue, expectedValue, weight) {
6595
6847
  if (deepEqual(candidateValue, expectedValue)) {
6596
6848
  return {
6597
- path: path15,
6849
+ path: path16,
6598
6850
  score: 1,
6599
6851
  weight,
6600
6852
  hit: true,
6601
- message: path15
6853
+ message: path16
6602
6854
  };
6603
6855
  }
6604
6856
  if (typeof candidateValue !== typeof expectedValue) {
6605
6857
  return {
6606
- path: path15,
6858
+ path: path16,
6607
6859
  score: 0,
6608
6860
  weight,
6609
6861
  hit: false,
6610
- message: `${path15} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
6862
+ message: `${path16} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
6611
6863
  };
6612
6864
  }
6613
6865
  return {
6614
- path: path15,
6866
+ path: path16,
6615
6867
  score: 0,
6616
6868
  weight,
6617
6869
  hit: false,
6618
- message: `${path15} (value mismatch)`
6870
+ message: `${path16} (value mismatch)`
6619
6871
  };
6620
6872
  }
6621
6873
  /**
6622
6874
  * Numeric comparison with absolute or relative tolerance.
6623
6875
  */
6624
- compareNumericTolerance(path15, candidateValue, expectedValue, fieldConfig, weight) {
6876
+ compareNumericTolerance(path16, candidateValue, expectedValue, fieldConfig, weight) {
6625
6877
  const { tolerance = 0, relative = false } = fieldConfig;
6626
6878
  const candidateNum = toNumber(candidateValue);
6627
6879
  const expectedNum = toNumber(expectedValue);
6628
6880
  if (candidateNum === null || expectedNum === null) {
6629
6881
  return {
6630
- path: path15,
6882
+ path: path16,
6631
6883
  score: 0,
6632
6884
  weight,
6633
6885
  hit: false,
6634
- message: `${path15} (non-numeric value)`
6886
+ message: `${path16} (non-numeric value)`
6635
6887
  };
6636
6888
  }
6637
6889
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
6638
6890
  return {
6639
- path: path15,
6891
+ path: path16,
6640
6892
  score: 0,
6641
6893
  weight,
6642
6894
  hit: false,
6643
- message: `${path15} (invalid numeric value)`
6895
+ message: `${path16} (invalid numeric value)`
6644
6896
  };
6645
6897
  }
6646
6898
  const diff = Math.abs(candidateNum - expectedNum);
@@ -6653,61 +6905,61 @@ var FieldAccuracyEvaluator = class {
6653
6905
  }
6654
6906
  if (withinTolerance) {
6655
6907
  return {
6656
- path: path15,
6908
+ path: path16,
6657
6909
  score: 1,
6658
6910
  weight,
6659
6911
  hit: true,
6660
- message: `${path15} (within tolerance: diff=${diff.toFixed(2)})`
6912
+ message: `${path16} (within tolerance: diff=${diff.toFixed(2)})`
6661
6913
  };
6662
6914
  }
6663
6915
  return {
6664
- path: path15,
6916
+ path: path16,
6665
6917
  score: 0,
6666
6918
  weight,
6667
6919
  hit: false,
6668
- message: `${path15} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
6920
+ message: `${path16} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
6669
6921
  };
6670
6922
  }
6671
6923
  /**
6672
6924
  * Date comparison with format normalization.
6673
6925
  */
6674
- compareDate(path15, candidateValue, expectedValue, fieldConfig, weight) {
6926
+ compareDate(path16, candidateValue, expectedValue, fieldConfig, weight) {
6675
6927
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
6676
6928
  const candidateDate = parseDate(String(candidateValue), formats);
6677
6929
  const expectedDate = parseDate(String(expectedValue), formats);
6678
6930
  if (candidateDate === null) {
6679
6931
  return {
6680
- path: path15,
6932
+ path: path16,
6681
6933
  score: 0,
6682
6934
  weight,
6683
6935
  hit: false,
6684
- message: `${path15} (unparseable candidate date)`
6936
+ message: `${path16} (unparseable candidate date)`
6685
6937
  };
6686
6938
  }
6687
6939
  if (expectedDate === null) {
6688
6940
  return {
6689
- path: path15,
6941
+ path: path16,
6690
6942
  score: 0,
6691
6943
  weight,
6692
6944
  hit: false,
6693
- message: `${path15} (unparseable expected date)`
6945
+ message: `${path16} (unparseable expected date)`
6694
6946
  };
6695
6947
  }
6696
6948
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
6697
6949
  return {
6698
- path: path15,
6950
+ path: path16,
6699
6951
  score: 1,
6700
6952
  weight,
6701
6953
  hit: true,
6702
- message: path15
6954
+ message: path16
6703
6955
  };
6704
6956
  }
6705
6957
  return {
6706
- path: path15,
6958
+ path: path16,
6707
6959
  score: 0,
6708
6960
  weight,
6709
6961
  hit: false,
6710
- message: `${path15} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
6962
+ message: `${path16} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
6711
6963
  };
6712
6964
  }
6713
6965
  /**
@@ -6747,11 +6999,11 @@ var FieldAccuracyEvaluator = class {
6747
6999
  };
6748
7000
  }
6749
7001
  };
6750
- function resolvePath(obj, path15) {
6751
- if (!path15 || !obj) {
7002
+ function resolvePath(obj, path16) {
7003
+ if (!path16 || !obj) {
6752
7004
  return void 0;
6753
7005
  }
6754
- const parts = path15.split(/\.|\[|\]/).filter((p) => p.length > 0);
7006
+ const parts = path16.split(/\.|\[|\]/).filter((p) => p.length > 0);
6755
7007
  let current = obj;
6756
7008
  for (const part of parts) {
6757
7009
  if (current === null || current === void 0) {
@@ -7187,7 +7439,7 @@ var ToolTrajectoryEvaluator = class {
7187
7439
 
7188
7440
  // src/evaluation/orchestrator.ts
7189
7441
  import { createHash } from "node:crypto";
7190
- import path14 from "node:path";
7442
+ import path15 from "node:path";
7191
7443
 
7192
7444
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
7193
7445
  var Node = class {
@@ -7986,7 +8238,7 @@ async function runEvaluatorList(options) {
7986
8238
  });
7987
8239
  }
7988
8240
  if (evaluator.type === "composite") {
7989
- const evalFileDir = evalCase.guideline_paths[0] ? path14.dirname(evalCase.guideline_paths[0]) : process.cwd();
8241
+ const evalFileDir = evalCase.guideline_paths[0] ? path15.dirname(evalCase.guideline_paths[0]) : process.cwd();
7990
8242
  const createEvaluator = (memberConfig) => {
7991
8243
  switch (memberConfig.type) {
7992
8244
  case "llm_judge":
@@ -8560,6 +8812,7 @@ export {
8560
8812
  createAgentKernel,
8561
8813
  createProvider,
8562
8814
  deepEqual,
8815
+ detectFormat,
8563
8816
  ensureVSCodeSubagents,
8564
8817
  executeScript,
8565
8818
  explorationRatio,