@agentv/core 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -146,8 +146,8 @@ function mergeExecutionMetrics(summary, metrics) {
146
146
  }
147
147
 
148
148
  // src/evaluation/yaml-parser.ts
149
- import { readFile as readFile5 } from "node:fs/promises";
150
- import path6 from "node:path";
149
+ import { readFile as readFile6 } from "node:fs/promises";
150
+ import path7 from "node:path";
151
151
  import { parse as parse2 } from "yaml";
152
152
 
153
153
  // src/evaluation/loaders/config-loader.ts
@@ -257,7 +257,6 @@ async function resolveFileReference2(rawValue, searchRoots) {
257
257
  }
258
258
 
259
259
  // src/evaluation/loaders/config-loader.ts
260
- var SCHEMA_CONFIG_V2 = "agentv-config-v2";
261
260
  var ANSI_YELLOW = "\x1B[33m";
262
261
  var ANSI_RESET = "\x1B[0m";
263
262
  async function loadConfig(evalFilePath, repoRoot) {
@@ -275,13 +274,6 @@ async function loadConfig(evalFilePath, repoRoot) {
275
274
  continue;
276
275
  }
277
276
  const config = parsed;
278
- const schema = config.$schema;
279
- if (schema !== SCHEMA_CONFIG_V2) {
280
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${configPath}. Expected '${SCHEMA_CONFIG_V2}'` : `Missing required field '$schema' in ${configPath}.
281
- Please add '$schema: ${SCHEMA_CONFIG_V2}' at the top of the file.`;
282
- logWarning(message);
283
- continue;
284
- }
285
277
  const guidelinePatterns = config.guideline_patterns;
286
278
  if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
287
279
  logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
@@ -390,7 +382,8 @@ var ANSI_YELLOW3 = "\x1B[33m";
390
382
  var ANSI_RESET3 = "\x1B[0m";
391
383
  async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
392
384
  const execution = rawEvalCase.execution;
393
- const candidateEvaluators = isJsonObject2(execution) ? execution.evaluators ?? rawEvalCase.evaluators : rawEvalCase.evaluators ?? globalExecution?.evaluators;
385
+ const executionObject = isJsonObject2(execution) ? execution : void 0;
386
+ const candidateEvaluators = (executionObject ? executionObject.evaluators : void 0) ?? rawEvalCase.evaluators ?? globalExecution?.evaluators;
394
387
  if (candidateEvaluators === void 0) {
395
388
  return void 0;
396
389
  }
@@ -933,6 +926,11 @@ function isValidFieldAggregationType(value) {
933
926
  return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
934
927
  }
935
928
 
929
+ // src/evaluation/loaders/jsonl-parser.ts
930
+ import { readFile as readFile4 } from "node:fs/promises";
931
+ import path5 from "node:path";
932
+ import { parse as parseYaml } from "yaml";
933
+
936
934
  // src/evaluation/loaders/message-processor.ts
937
935
  import { readFile as readFile3 } from "node:fs/promises";
938
936
  import path4 from "node:path";
@@ -1193,28 +1191,271 @@ async function processExpectedMessages(options) {
1193
1191
  return segments;
1194
1192
  }
1195
1193
 
1196
- // src/evaluation/formatting/prompt-builder.ts
1197
- import { readFile as readFile4 } from "node:fs/promises";
1198
- import path5 from "node:path";
1194
+ // src/evaluation/loaders/jsonl-parser.ts
1199
1195
  var ANSI_YELLOW5 = "\x1B[33m";
1196
+ var ANSI_RED = "\x1B[31m";
1200
1197
  var ANSI_RESET5 = "\x1B[0m";
1198
+ function detectFormat(filePath) {
1199
+ const ext = path5.extname(filePath).toLowerCase();
1200
+ if (ext === ".jsonl") return "jsonl";
1201
+ if (ext === ".yaml" || ext === ".yml") return "yaml";
1202
+ throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
1203
+ }
1204
+ async function loadSidecarMetadata(jsonlPath, verbose) {
1205
+ const dir = path5.dirname(jsonlPath);
1206
+ const base = path5.basename(jsonlPath, ".jsonl");
1207
+ const sidecarPath = path5.join(dir, `${base}.yaml`);
1208
+ if (!await fileExists2(sidecarPath)) {
1209
+ if (verbose) {
1210
+ logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
1211
+ }
1212
+ return {};
1213
+ }
1214
+ try {
1215
+ const content = await readFile4(sidecarPath, "utf8");
1216
+ const parsed = parseYaml(content);
1217
+ if (!isJsonObject(parsed)) {
1218
+ logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
1219
+ return {};
1220
+ }
1221
+ return {
1222
+ description: asString4(parsed.description),
1223
+ dataset: asString4(parsed.dataset),
1224
+ execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
1225
+ evaluator: parsed.evaluator
1226
+ };
1227
+ } catch (error) {
1228
+ logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error.message}`);
1229
+ return {};
1230
+ }
1231
+ }
1232
+ function parseJsonlContent(content, filePath) {
1233
+ const lines = content.split("\n");
1234
+ const cases = [];
1235
+ for (let i = 0; i < lines.length; i++) {
1236
+ const line = lines[i].trim();
1237
+ if (line === "") continue;
1238
+ try {
1239
+ const parsed = JSON.parse(line);
1240
+ if (!isJsonObject(parsed)) {
1241
+ throw new Error("Expected JSON object");
1242
+ }
1243
+ cases.push(parsed);
1244
+ } catch (error) {
1245
+ const message = error instanceof Error ? error.message : String(error);
1246
+ throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
1247
+ File: ${filePath}`);
1248
+ }
1249
+ }
1250
+ return cases;
1251
+ }
1252
+ async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
1253
+ const verbose = options?.verbose ?? false;
1254
+ const evalIdFilter = options?.evalId;
1255
+ const absoluteTestPath = path5.resolve(evalFilePath);
1256
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
1257
+ const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
1258
+ const config = await loadConfig(absoluteTestPath, repoRootPath);
1259
+ const guidelinePatterns = config?.guideline_patterns;
1260
+ const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
1261
+ const rawFile = await readFile4(absoluteTestPath, "utf8");
1262
+ const rawCases = parseJsonlContent(rawFile, evalFilePath);
1263
+ const fallbackDataset = path5.basename(absoluteTestPath, ".jsonl") || "eval";
1264
+ const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
1265
+ const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
1266
+ const globalExecution = sidecar.execution;
1267
+ if (verbose) {
1268
+ console.log(`
1269
+ [JSONL Dataset: ${evalFilePath}]`);
1270
+ console.log(` Cases: ${rawCases.length}`);
1271
+ console.log(` Dataset name: ${datasetName}`);
1272
+ if (sidecar.description) {
1273
+ console.log(` Description: ${sidecar.description}`);
1274
+ }
1275
+ }
1276
+ const results = [];
1277
+ for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
1278
+ const evalcase = rawCases[lineIndex];
1279
+ const lineNumber = lineIndex + 1;
1280
+ const id = asString4(evalcase.id);
1281
+ if (evalIdFilter && id !== evalIdFilter) {
1282
+ continue;
1283
+ }
1284
+ const conversationId = asString4(evalcase.conversation_id);
1285
+ const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
1286
+ const inputMessagesValue = evalcase.input_messages;
1287
+ const expectedMessagesValue = evalcase.expected_messages;
1288
+ if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1289
+ logError(
1290
+ `Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages`
1291
+ );
1292
+ continue;
1293
+ }
1294
+ const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
1295
+ const inputMessages = inputMessagesValue.filter(
1296
+ (msg) => isTestMessage(msg)
1297
+ );
1298
+ const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1299
+ if (hasExpectedMessages && expectedMessages.length === 0) {
1300
+ logError(`Line ${lineNumber}: No valid expected message found for eval case: ${id}`);
1301
+ continue;
1302
+ }
1303
+ const guidelinePaths = [];
1304
+ const inputTextParts = [];
1305
+ const inputSegments = await processMessages({
1306
+ messages: inputMessages,
1307
+ searchRoots,
1308
+ repoRootPath,
1309
+ guidelinePatterns,
1310
+ guidelinePaths,
1311
+ textParts: inputTextParts,
1312
+ messageType: "input",
1313
+ verbose
1314
+ });
1315
+ const outputSegments = hasExpectedMessages ? await processExpectedMessages({
1316
+ messages: expectedMessages,
1317
+ searchRoots,
1318
+ repoRootPath,
1319
+ verbose
1320
+ }) : [];
1321
+ let referenceAnswer = "";
1322
+ if (outputSegments.length > 0) {
1323
+ const lastMessage = outputSegments[outputSegments.length - 1];
1324
+ const content = lastMessage.content;
1325
+ const toolCalls = lastMessage.tool_calls;
1326
+ if (typeof content === "string") {
1327
+ referenceAnswer = content;
1328
+ } else if (content !== void 0 && content !== null) {
1329
+ referenceAnswer = JSON.stringify(content, null, 2);
1330
+ } else if (toolCalls !== void 0 && toolCalls !== null) {
1331
+ referenceAnswer = JSON.stringify(toolCalls, null, 2);
1332
+ }
1333
+ }
1334
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
1335
+ const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
1336
+ const mergedExecution = caseExecution ?? globalExecution;
1337
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
1338
+ let evaluators;
1339
+ try {
1340
+ evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
1341
+ } catch (error) {
1342
+ const message = error instanceof Error ? error.message : String(error);
1343
+ logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
1344
+ continue;
1345
+ }
1346
+ const inlineRubrics = evalcase.rubrics;
1347
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
1348
+ const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
1349
+ if (typeof rubric === "string") {
1350
+ return {
1351
+ id: `rubric-${index + 1}`,
1352
+ description: rubric,
1353
+ weight: 1,
1354
+ required: true
1355
+ };
1356
+ }
1357
+ return {
1358
+ id: asString4(rubric.id) ?? `rubric-${index + 1}`,
1359
+ description: asString4(rubric.description) ?? "",
1360
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1361
+ required: typeof rubric.required === "boolean" ? rubric.required : true
1362
+ };
1363
+ }).filter((r) => r.description.length > 0);
1364
+ if (rubricItems.length > 0) {
1365
+ const rubricEvaluator = {
1366
+ name: "rubric",
1367
+ type: "llm_judge",
1368
+ rubrics: rubricItems
1369
+ };
1370
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
1371
+ }
1372
+ }
1373
+ const userFilePaths = [];
1374
+ for (const segment of inputSegments) {
1375
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
1376
+ userFilePaths.push(segment.resolvedPath);
1377
+ }
1378
+ }
1379
+ const allFilePaths = [
1380
+ ...guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
1381
+ ...userFilePaths
1382
+ ];
1383
+ const testCase = {
1384
+ id,
1385
+ dataset: datasetName,
1386
+ conversation_id: conversationId,
1387
+ question,
1388
+ input_messages: inputMessages,
1389
+ input_segments: inputSegments,
1390
+ expected_messages: outputSegments,
1391
+ reference_answer: referenceAnswer,
1392
+ guideline_paths: guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
1393
+ guideline_patterns: guidelinePatterns,
1394
+ file_paths: allFilePaths,
1395
+ expected_outcome: outcome,
1396
+ evaluator: evalCaseEvaluatorKind,
1397
+ evaluators
1398
+ };
1399
+ if (verbose) {
1400
+ console.log(`
1401
+ [Eval Case: ${id}]`);
1402
+ if (testCase.guideline_paths.length > 0) {
1403
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
1404
+ for (const guidelinePath of testCase.guideline_paths) {
1405
+ console.log(` - ${guidelinePath}`);
1406
+ }
1407
+ } else {
1408
+ console.log(" No guidelines found");
1409
+ }
1410
+ }
1411
+ results.push(testCase);
1412
+ }
1413
+ return results;
1414
+ }
1415
+ function asString4(value) {
1416
+ return typeof value === "string" ? value : void 0;
1417
+ }
1418
+ function logWarning4(message, details) {
1419
+ if (details && details.length > 0) {
1420
+ const detailBlock = details.join("\n");
1421
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
1422
+ ${detailBlock}${ANSI_RESET5}`);
1423
+ } else {
1424
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1425
+ }
1426
+ }
1427
+ function logError(message, details) {
1428
+ if (details && details.length > 0) {
1429
+ const detailBlock = details.join("\n");
1430
+ console.error(`${ANSI_RED}Error: ${message}
1431
+ ${detailBlock}${ANSI_RESET5}`);
1432
+ } else {
1433
+ console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
1434
+ }
1435
+ }
1436
+
1437
+ // src/evaluation/formatting/prompt-builder.ts
1438
+ import { readFile as readFile5 } from "node:fs/promises";
1439
+ import path6 from "node:path";
1440
+ var ANSI_YELLOW6 = "\x1B[33m";
1441
+ var ANSI_RESET6 = "\x1B[0m";
1201
1442
  async function buildPromptInputs(testCase, mode = "lm") {
1202
1443
  const guidelineParts = [];
1203
1444
  for (const rawPath of testCase.guideline_paths) {
1204
- const absolutePath = path5.resolve(rawPath);
1445
+ const absolutePath = path6.resolve(rawPath);
1205
1446
  if (!await fileExists2(absolutePath)) {
1206
- logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
1447
+ logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
1207
1448
  continue;
1208
1449
  }
1209
1450
  try {
1210
- const content = (await readFile4(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1451
+ const content = (await readFile5(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
1211
1452
  guidelineParts.push({
1212
1453
  content,
1213
1454
  isFile: true,
1214
- displayPath: path5.basename(absolutePath)
1455
+ displayPath: path6.basename(absolutePath)
1215
1456
  });
1216
1457
  } catch (error) {
1217
- logWarning4(`Could not read guideline file ${absolutePath}: ${error.message}`);
1458
+ logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
1218
1459
  }
1219
1460
  }
1220
1461
  const guidelines = formatFileContents(guidelineParts);
@@ -1238,9 +1479,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
1238
1479
  messageSegments.push({ type: "text", value: segment });
1239
1480
  }
1240
1481
  } else if (isJsonObject(segment)) {
1241
- const type = asString4(segment.type);
1482
+ const type = asString5(segment.type);
1242
1483
  if (type === "file") {
1243
- const value = asString4(segment.value);
1484
+ const value = asString5(segment.value);
1244
1485
  if (!value) continue;
1245
1486
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
1246
1487
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -1251,7 +1492,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
1251
1492
  messageSegments.push({ type: "file", text: fileText, path: value });
1252
1493
  }
1253
1494
  } else if (type === "text") {
1254
- const textValue = asString4(segment.value);
1495
+ const textValue = asString5(segment.value);
1255
1496
  if (textValue && textValue.trim().length > 0) {
1256
1497
  messageSegments.push({ type: "text", value: textValue });
1257
1498
  }
@@ -1405,21 +1646,21 @@ ${guidelineContent.trim()}`);
1405
1646
  }
1406
1647
  return chatPrompt.length > 0 ? chatPrompt : void 0;
1407
1648
  }
1408
- function asString4(value) {
1649
+ function asString5(value) {
1409
1650
  return typeof value === "string" ? value : void 0;
1410
1651
  }
1411
- function logWarning4(message) {
1412
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
1652
+ function logWarning5(message) {
1653
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1413
1654
  }
1414
1655
 
1415
1656
  // src/evaluation/yaml-parser.ts
1416
- var ANSI_YELLOW6 = "\x1B[33m";
1417
- var ANSI_RED = "\x1B[31m";
1418
- var ANSI_RESET6 = "\x1B[0m";
1657
+ var ANSI_YELLOW7 = "\x1B[33m";
1658
+ var ANSI_RED2 = "\x1B[31m";
1659
+ var ANSI_RESET7 = "\x1B[0m";
1419
1660
  async function readTestSuiteMetadata(testFilePath) {
1420
1661
  try {
1421
- const absolutePath = path6.resolve(testFilePath);
1422
- const content = await readFile5(absolutePath, "utf8");
1662
+ const absolutePath = path7.resolve(testFilePath);
1663
+ const content = await readFile6(absolutePath, "utf8");
1423
1664
  const parsed = parse2(content);
1424
1665
  if (!isJsonObject(parsed)) {
1425
1666
  return {};
@@ -1430,21 +1671,25 @@ async function readTestSuiteMetadata(testFilePath) {
1430
1671
  }
1431
1672
  }
1432
1673
  async function loadEvalCases(evalFilePath, repoRoot, options) {
1674
+ const format = detectFormat(evalFilePath);
1675
+ if (format === "jsonl") {
1676
+ return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
1677
+ }
1433
1678
  const verbose = options?.verbose ?? false;
1434
1679
  const evalIdFilter = options?.evalId;
1435
- const absoluteTestPath = path6.resolve(evalFilePath);
1680
+ const absoluteTestPath = path7.resolve(evalFilePath);
1436
1681
  const repoRootPath = resolveToAbsolutePath(repoRoot);
1437
1682
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
1438
1683
  const config = await loadConfig(absoluteTestPath, repoRootPath);
1439
1684
  const guidelinePatterns = config?.guideline_patterns;
1440
- const rawFile = await readFile5(absoluteTestPath, "utf8");
1685
+ const rawFile = await readFile6(absoluteTestPath, "utf8");
1441
1686
  const parsed = parse2(rawFile);
1442
1687
  if (!isJsonObject(parsed)) {
1443
1688
  throw new Error(`Invalid test file format: ${evalFilePath}`);
1444
1689
  }
1445
1690
  const suite = parsed;
1446
- const datasetNameFromSuite = asString5(suite.dataset)?.trim();
1447
- const fallbackDataset = path6.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1691
+ const datasetNameFromSuite = asString6(suite.dataset)?.trim();
1692
+ const fallbackDataset = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
1448
1693
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
1449
1694
  const rawTestcases = suite.evalcases;
1450
1695
  if (!Array.isArray(rawTestcases)) {
@@ -1452,24 +1697,24 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1452
1697
  }
1453
1698
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
1454
1699
  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
1455
- const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
1700
+ const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
1456
1701
  const results = [];
1457
1702
  for (const rawEvalcase of rawTestcases) {
1458
1703
  if (!isJsonObject(rawEvalcase)) {
1459
- logWarning5("Skipping invalid eval case entry (expected object)");
1704
+ logWarning6("Skipping invalid eval case entry (expected object)");
1460
1705
  continue;
1461
1706
  }
1462
1707
  const evalcase = rawEvalcase;
1463
- const id = asString5(evalcase.id);
1708
+ const id = asString6(evalcase.id);
1464
1709
  if (evalIdFilter && id !== evalIdFilter) {
1465
1710
  continue;
1466
1711
  }
1467
- const conversationId = asString5(evalcase.conversation_id);
1468
- const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
1712
+ const conversationId = asString6(evalcase.conversation_id);
1713
+ const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
1469
1714
  const inputMessagesValue = evalcase.input_messages;
1470
1715
  const expectedMessagesValue = evalcase.expected_messages;
1471
1716
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
1472
- logError(
1717
+ logError2(
1473
1718
  `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
1474
1719
  );
1475
1720
  continue;
@@ -1480,7 +1725,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1480
1725
  );
1481
1726
  const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
1482
1727
  if (hasExpectedMessages && expectedMessages.length === 0) {
1483
- logError(`No valid expected message found for eval case: ${id}`);
1728
+ logError2(`No valid expected message found for eval case: ${id}`);
1484
1729
  continue;
1485
1730
  }
1486
1731
  const guidelinePaths = [];
@@ -1521,7 +1766,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1521
1766
  evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
1522
1767
  } catch (error) {
1523
1768
  const message = error instanceof Error ? error.message : String(error);
1524
- logError(`Skipping eval case '${id}': ${message}`);
1769
+ logError2(`Skipping eval case '${id}': ${message}`);
1525
1770
  continue;
1526
1771
  }
1527
1772
  const inlineRubrics = evalcase.rubrics;
@@ -1536,8 +1781,8 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1536
1781
  };
1537
1782
  }
1538
1783
  return {
1539
- id: asString5(rubric.id) ?? `rubric-${index + 1}`,
1540
- description: asString5(rubric.description) ?? "",
1784
+ id: asString6(rubric.id) ?? `rubric-${index + 1}`,
1785
+ description: asString6(rubric.description) ?? "",
1541
1786
  weight: typeof rubric.weight === "number" ? rubric.weight : 1,
1542
1787
  required: typeof rubric.required === "boolean" ? rubric.required : true
1543
1788
  };
@@ -1558,7 +1803,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1558
1803
  }
1559
1804
  }
1560
1805
  const allFilePaths = [
1561
- ...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
1806
+ ...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
1562
1807
  ...userFilePaths
1563
1808
  ];
1564
1809
  const testCase = {
@@ -1570,7 +1815,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1570
1815
  input_segments: inputSegments,
1571
1816
  expected_messages: outputSegments,
1572
1817
  reference_answer: referenceAnswer,
1573
- guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
1818
+ guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
1574
1819
  guideline_patterns: guidelinePatterns,
1575
1820
  file_paths: allFilePaths,
1576
1821
  expected_outcome: outcome,
@@ -1593,25 +1838,25 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
1593
1838
  }
1594
1839
  return results;
1595
1840
  }
1596
- function asString5(value) {
1841
+ function asString6(value) {
1597
1842
  return typeof value === "string" ? value : void 0;
1598
1843
  }
1599
- function logWarning5(message, details) {
1844
+ function logWarning6(message, details) {
1600
1845
  if (details && details.length > 0) {
1601
1846
  const detailBlock = details.join("\n");
1602
- console.warn(`${ANSI_YELLOW6}Warning: ${message}
1603
- ${detailBlock}${ANSI_RESET6}`);
1847
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}
1848
+ ${detailBlock}${ANSI_RESET7}`);
1604
1849
  } else {
1605
- console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
1850
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
1606
1851
  }
1607
1852
  }
1608
- function logError(message, details) {
1853
+ function logError2(message, details) {
1609
1854
  if (details && details.length > 0) {
1610
1855
  const detailBlock = details.join("\n");
1611
- console.error(`${ANSI_RED}Error: ${message}
1612
- ${detailBlock}${ANSI_RESET6}`);
1856
+ console.error(`${ANSI_RED2}Error: ${message}
1857
+ ${detailBlock}${ANSI_RESET7}`);
1613
1858
  } else {
1614
- console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
1859
+ console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
1615
1860
  }
1616
1861
  }
1617
1862
 
@@ -1954,7 +2199,7 @@ import { randomUUID } from "node:crypto";
1954
2199
  import { createWriteStream } from "node:fs";
1955
2200
  import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
1956
2201
  import { tmpdir } from "node:os";
1957
- import path8 from "node:path";
2202
+ import path9 from "node:path";
1958
2203
 
1959
2204
  // src/evaluation/providers/claude-code-log-tracker.ts
1960
2205
  var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeCodeLogs");
@@ -2010,7 +2255,7 @@ function subscribeToClaudeCodeLogEntries(listener) {
2010
2255
  }
2011
2256
 
2012
2257
  // src/evaluation/providers/preread.ts
2013
- import path7 from "node:path";
2258
+ import path8 from "node:path";
2014
2259
  function buildPromptDocument(request, inputFiles, options) {
2015
2260
  const parts = [];
2016
2261
  const guidelineFiles = collectGuidelineFiles(
@@ -2033,7 +2278,7 @@ function normalizeInputFiles(inputFiles) {
2033
2278
  }
2034
2279
  const deduped = /* @__PURE__ */ new Map();
2035
2280
  for (const inputFile of inputFiles) {
2036
- const absolutePath = path7.resolve(inputFile);
2281
+ const absolutePath = path8.resolve(inputFile);
2037
2282
  if (!deduped.has(absolutePath)) {
2038
2283
  deduped.set(absolutePath, absolutePath);
2039
2284
  }
@@ -2046,14 +2291,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
2046
2291
  }
2047
2292
  const unique = /* @__PURE__ */ new Map();
2048
2293
  for (const inputFile of inputFiles) {
2049
- const absolutePath = path7.resolve(inputFile);
2294
+ const absolutePath = path8.resolve(inputFile);
2050
2295
  if (overrides?.has(absolutePath)) {
2051
2296
  if (!unique.has(absolutePath)) {
2052
2297
  unique.set(absolutePath, absolutePath);
2053
2298
  }
2054
2299
  continue;
2055
2300
  }
2056
- const normalized = absolutePath.split(path7.sep).join("/");
2301
+ const normalized = absolutePath.split(path8.sep).join("/");
2057
2302
  if (isGuidelineFile(normalized, guidelinePatterns)) {
2058
2303
  if (!unique.has(absolutePath)) {
2059
2304
  unique.set(absolutePath, absolutePath);
@@ -2068,7 +2313,7 @@ function collectInputFiles(inputFiles) {
2068
2313
  }
2069
2314
  const unique = /* @__PURE__ */ new Map();
2070
2315
  for (const inputFile of inputFiles) {
2071
- const absolutePath = path7.resolve(inputFile);
2316
+ const absolutePath = path8.resolve(inputFile);
2072
2317
  if (!unique.has(absolutePath)) {
2073
2318
  unique.set(absolutePath, absolutePath);
2074
2319
  }
@@ -2080,7 +2325,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
2080
2325
  return "";
2081
2326
  }
2082
2327
  const buildList = (files) => files.map((absolutePath) => {
2083
- const fileName = path7.basename(absolutePath);
2328
+ const fileName = path8.basename(absolutePath);
2084
2329
  const fileUri = pathToFileUri(absolutePath);
2085
2330
  return `* [${fileName}](${fileUri})`;
2086
2331
  });
@@ -2100,7 +2345,7 @@ ${buildList(inputFiles).join("\n")}.`);
2100
2345
  return sections.join("\n");
2101
2346
  }
2102
2347
  function pathToFileUri(filePath) {
2103
- const absolutePath = path7.isAbsolute(filePath) ? filePath : path7.resolve(filePath);
2348
+ const absolutePath = path8.isAbsolute(filePath) ? filePath : path8.resolve(filePath);
2104
2349
  const normalizedPath = absolutePath.replace(/\\/g, "/");
2105
2350
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
2106
2351
  return `file:///${normalizedPath}`;
@@ -2137,7 +2382,7 @@ var ClaudeCodeProvider = class {
2137
2382
  const workspaceRoot = await this.createWorkspace();
2138
2383
  const logger = await this.createStreamLogger(request).catch(() => void 0);
2139
2384
  try {
2140
- const promptFile = path8.join(workspaceRoot, PROMPT_FILENAME);
2385
+ const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
2141
2386
  await writeFile(promptFile, request.question, "utf8");
2142
2387
  const args = this.buildClaudeCodeArgs(request.question, inputFiles);
2143
2388
  const cwd = this.resolveCwd();
@@ -2185,7 +2430,7 @@ var ClaudeCodeProvider = class {
2185
2430
  if (!this.config.cwd) {
2186
2431
  return process.cwd();
2187
2432
  }
2188
- return path8.resolve(this.config.cwd);
2433
+ return path9.resolve(this.config.cwd);
2189
2434
  }
2190
2435
  buildClaudeCodeArgs(prompt, inputFiles) {
2191
2436
  const args = [];
@@ -2242,7 +2487,7 @@ ${filesContext}`;
2242
2487
  }
2243
2488
  }
2244
2489
  async createWorkspace() {
2245
- return await mkdtemp(path8.join(tmpdir(), WORKSPACE_PREFIX));
2490
+ return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
2246
2491
  }
2247
2492
  async cleanupWorkspace(workspaceRoot) {
2248
2493
  try {
@@ -2256,9 +2501,9 @@ ${filesContext}`;
2256
2501
  return void 0;
2257
2502
  }
2258
2503
  if (this.config.logDir) {
2259
- return path8.resolve(this.config.logDir);
2504
+ return path9.resolve(this.config.logDir);
2260
2505
  }
2261
- return path8.join(process.cwd(), ".agentv", "logs", "claude-code");
2506
+ return path9.join(process.cwd(), ".agentv", "logs", "claude-code");
2262
2507
  }
2263
2508
  async createStreamLogger(request) {
2264
2509
  const logDir = this.resolveLogDirectory();
@@ -2272,7 +2517,7 @@ ${filesContext}`;
2272
2517
  console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
2273
2518
  return void 0;
2274
2519
  }
2275
- const filePath = path8.join(logDir, buildLogFilename(request, this.targetName));
2520
+ const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
2276
2521
  try {
2277
2522
  const logger = await ClaudeCodeStreamLogger.create({
2278
2523
  filePath,
@@ -2677,10 +2922,10 @@ function escapeShellArg(arg) {
2677
2922
  }
2678
2923
  async function defaultClaudeCodeRunner(options) {
2679
2924
  const tempId = randomUUID();
2680
- const stdoutFile = path8.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
2681
- const stderrFile = path8.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
2682
- const exitFile = path8.join(tmpdir(), `agentv-cc-${tempId}-exit`);
2683
- const pidFile = path8.join(tmpdir(), `agentv-cc-${tempId}-pid`);
2925
+ const stdoutFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
2926
+ const stderrFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
2927
+ const exitFile = path9.join(tmpdir(), `agentv-cc-${tempId}-exit`);
2928
+ const pidFile = path9.join(tmpdir(), `agentv-cc-${tempId}-pid`);
2684
2929
  try {
2685
2930
  return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
2686
2931
  } finally {
@@ -2720,8 +2965,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2720
2965
  let lastStdoutSize = 0;
2721
2966
  const readFileIfExists = async (filePath) => {
2722
2967
  try {
2723
- const { readFile: readFile7 } = await import("node:fs/promises");
2724
- return await readFile7(filePath, "utf8");
2968
+ const { readFile: readFile8 } = await import("node:fs/promises");
2969
+ return await readFile8(filePath, "utf8");
2725
2970
  } catch {
2726
2971
  return "";
2727
2972
  }
@@ -2796,7 +3041,7 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
2796
3041
  import { exec as execWithCallback } from "node:child_process";
2797
3042
  import fs from "node:fs/promises";
2798
3043
  import os from "node:os";
2799
- import path9 from "node:path";
3044
+ import path10 from "node:path";
2800
3045
  import { promisify } from "node:util";
2801
3046
  import { z } from "zod";
2802
3047
  var ToolCallSchema = z.object({
@@ -3253,7 +3498,7 @@ function normalizeInputFiles2(inputFiles) {
3253
3498
  }
3254
3499
  const unique = /* @__PURE__ */ new Map();
3255
3500
  for (const inputFile of inputFiles) {
3256
- const absolutePath = path9.resolve(inputFile);
3501
+ const absolutePath = path10.resolve(inputFile);
3257
3502
  if (!unique.has(absolutePath)) {
3258
3503
  unique.set(absolutePath, absolutePath);
3259
3504
  }
@@ -3267,7 +3512,7 @@ function formatFileList(files, template) {
3267
3512
  const formatter = template ?? "{path}";
3268
3513
  return files.map((filePath) => {
3269
3514
  const escapedPath = shellEscape(filePath);
3270
- const escapedName = shellEscape(path9.basename(filePath));
3515
+ const escapedName = shellEscape(path10.basename(filePath));
3271
3516
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
3272
3517
  }).join(" ");
3273
3518
  }
@@ -3291,7 +3536,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
3291
3536
  const safeEvalId = evalCaseId || "unknown";
3292
3537
  const timestamp = Date.now();
3293
3538
  const random = Math.random().toString(36).substring(2, 9);
3294
- return path9.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3539
+ return path10.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
3295
3540
  }
3296
3541
  function formatTimeoutSuffix2(timeoutMs) {
3297
3542
  if (!timeoutMs || timeoutMs <= 0) {
@@ -3307,7 +3552,7 @@ import { randomUUID as randomUUID2 } from "node:crypto";
3307
3552
  import { constants as constants2, createWriteStream as createWriteStream2 } from "node:fs";
3308
3553
  import { access as access2, mkdir as mkdir2, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile2 } from "node:fs/promises";
3309
3554
  import { tmpdir as tmpdir2 } from "node:os";
3310
- import path10 from "node:path";
3555
+ import path11 from "node:path";
3311
3556
  import { promisify as promisify2 } from "node:util";
3312
3557
 
3313
3558
  // src/evaluation/providers/codex-log-tracker.ts
@@ -3402,7 +3647,7 @@ var CodexProvider = class {
3402
3647
  const promptContent = `${systemPrompt}
3403
3648
 
3404
3649
  ${basePrompt}`;
3405
- const promptFile = path10.join(workspaceRoot, PROMPT_FILENAME2);
3650
+ const promptFile = path11.join(workspaceRoot, PROMPT_FILENAME2);
3406
3651
  await writeFile2(promptFile, promptContent, "utf8");
3407
3652
  const args = this.buildCodexArgs();
3408
3653
  const cwd = this.resolveCwd(workspaceRoot);
@@ -3452,7 +3697,7 @@ ${basePrompt}`;
3452
3697
  if (!this.config.cwd) {
3453
3698
  return workspaceRoot;
3454
3699
  }
3455
- return path10.resolve(this.config.cwd);
3700
+ return path11.resolve(this.config.cwd);
3456
3701
  }
3457
3702
  buildCodexArgs() {
3458
3703
  const args = [
@@ -3494,7 +3739,7 @@ ${basePrompt}`;
3494
3739
  }
3495
3740
  }
3496
3741
  async createWorkspace() {
3497
- return await mkdtemp2(path10.join(tmpdir2(), WORKSPACE_PREFIX2));
3742
+ return await mkdtemp2(path11.join(tmpdir2(), WORKSPACE_PREFIX2));
3498
3743
  }
3499
3744
  async cleanupWorkspace(workspaceRoot) {
3500
3745
  try {
@@ -3508,9 +3753,9 @@ ${basePrompt}`;
3508
3753
  return void 0;
3509
3754
  }
3510
3755
  if (this.config.logDir) {
3511
- return path10.resolve(this.config.logDir);
3756
+ return path11.resolve(this.config.logDir);
3512
3757
  }
3513
- return path10.join(process.cwd(), ".agentv", "logs", "codex");
3758
+ return path11.join(process.cwd(), ".agentv", "logs", "codex");
3514
3759
  }
3515
3760
  async createStreamLogger(request) {
3516
3761
  const logDir = this.resolveLogDirectory();
@@ -3524,7 +3769,7 @@ ${basePrompt}`;
3524
3769
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
3525
3770
  return void 0;
3526
3771
  }
3527
- const filePath = path10.join(logDir, buildLogFilename2(request, this.targetName));
3772
+ const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
3528
3773
  try {
3529
3774
  const logger = await CodexStreamLogger.create({
3530
3775
  filePath,
@@ -3739,7 +3984,7 @@ function tryParseJsonValue2(rawLine) {
3739
3984
  async function locateExecutable(candidate) {
3740
3985
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
3741
3986
  if (includesPathSeparator) {
3742
- const resolved = path10.isAbsolute(candidate) ? candidate : path10.resolve(candidate);
3987
+ const resolved = path11.isAbsolute(candidate) ? candidate : path11.resolve(candidate);
3743
3988
  const executablePath = await ensureWindowsExecutableVariant(resolved);
3744
3989
  await access2(executablePath, constants2.F_OK);
3745
3990
  return executablePath;
@@ -4252,7 +4497,7 @@ import { randomUUID as randomUUID3 } from "node:crypto";
4252
4497
  import { createWriteStream as createWriteStream3 } from "node:fs";
4253
4498
  import { mkdir as mkdir3, mkdtemp as mkdtemp3, rm as rm3, writeFile as writeFile3 } from "node:fs/promises";
4254
4499
  import { tmpdir as tmpdir3 } from "node:os";
4255
- import path11 from "node:path";
4500
+ import path12 from "node:path";
4256
4501
 
4257
4502
  // src/evaluation/providers/pi-log-tracker.ts
4258
4503
  var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
@@ -4336,7 +4581,7 @@ var PiCodingAgentProvider = class {
4336
4581
  const workspaceRoot = await this.createWorkspace();
4337
4582
  const logger = await this.createStreamLogger(request).catch(() => void 0);
4338
4583
  try {
4339
- const promptFile = path11.join(workspaceRoot, PROMPT_FILENAME3);
4584
+ const promptFile = path12.join(workspaceRoot, PROMPT_FILENAME3);
4340
4585
  await writeFile3(promptFile, request.question, "utf8");
4341
4586
  const args = this.buildPiArgs(request.question, inputFiles);
4342
4587
  const cwd = this.resolveCwd(workspaceRoot);
@@ -4378,7 +4623,7 @@ var PiCodingAgentProvider = class {
4378
4623
  if (!this.config.cwd) {
4379
4624
  return workspaceRoot;
4380
4625
  }
4381
- return path11.resolve(this.config.cwd);
4626
+ return path12.resolve(this.config.cwd);
4382
4627
  }
4383
4628
  buildPiArgs(prompt, inputFiles) {
4384
4629
  const args = [];
@@ -4467,7 +4712,7 @@ ${prompt}`;
4467
4712
  return env;
4468
4713
  }
4469
4714
  async createWorkspace() {
4470
- return await mkdtemp3(path11.join(tmpdir3(), WORKSPACE_PREFIX3));
4715
+ return await mkdtemp3(path12.join(tmpdir3(), WORKSPACE_PREFIX3));
4471
4716
  }
4472
4717
  async cleanupWorkspace(workspaceRoot) {
4473
4718
  try {
@@ -4477,9 +4722,9 @@ ${prompt}`;
4477
4722
  }
4478
4723
  resolveLogDirectory() {
4479
4724
  if (this.config.logDir) {
4480
- return path11.resolve(this.config.logDir);
4725
+ return path12.resolve(this.config.logDir);
4481
4726
  }
4482
- return path11.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4727
+ return path12.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
4483
4728
  }
4484
4729
  async createStreamLogger(request) {
4485
4730
  const logDir = this.resolveLogDirectory();
@@ -4493,7 +4738,7 @@ ${prompt}`;
4493
4738
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
4494
4739
  return void 0;
4495
4740
  }
4496
- const filePath = path11.join(logDir, buildLogFilename3(request, this.targetName));
4741
+ const filePath = path12.join(logDir, buildLogFilename3(request, this.targetName));
4497
4742
  try {
4498
4743
  const logger = await PiStreamLogger.create({
4499
4744
  filePath,
@@ -4926,7 +5171,7 @@ async function defaultPiRunner(options) {
4926
5171
  }
4927
5172
 
4928
5173
  // src/evaluation/providers/vscode.ts
4929
- import path12 from "node:path";
5174
+ import path13 from "node:path";
4930
5175
  import {
4931
5176
  dispatchAgentSession,
4932
5177
  dispatchBatchAgent,
@@ -5101,7 +5346,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
5101
5346
  return "";
5102
5347
  }
5103
5348
  const buildList = (files) => files.map((absolutePath) => {
5104
- const fileName = path12.basename(absolutePath);
5349
+ const fileName = path13.basename(absolutePath);
5105
5350
  const fileUri = pathToFileUri2(absolutePath);
5106
5351
  return `* [${fileName}](${fileUri})`;
5107
5352
  });
@@ -5126,8 +5371,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
5126
5371
  }
5127
5372
  const unique = /* @__PURE__ */ new Map();
5128
5373
  for (const attachment of attachments) {
5129
- const absolutePath = path12.resolve(attachment);
5130
- const normalized = absolutePath.split(path12.sep).join("/");
5374
+ const absolutePath = path13.resolve(attachment);
5375
+ const normalized = absolutePath.split(path13.sep).join("/");
5131
5376
  if (isGuidelineFile(normalized, guidelinePatterns)) {
5132
5377
  if (!unique.has(absolutePath)) {
5133
5378
  unique.set(absolutePath, absolutePath);
@@ -5142,7 +5387,7 @@ function collectAttachmentFiles(attachments) {
5142
5387
  }
5143
5388
  const unique = /* @__PURE__ */ new Map();
5144
5389
  for (const attachment of attachments) {
5145
- const absolutePath = path12.resolve(attachment);
5390
+ const absolutePath = path13.resolve(attachment);
5146
5391
  if (!unique.has(absolutePath)) {
5147
5392
  unique.set(absolutePath, absolutePath);
5148
5393
  }
@@ -5150,7 +5395,7 @@ function collectAttachmentFiles(attachments) {
5150
5395
  return Array.from(unique.values());
5151
5396
  }
5152
5397
  function pathToFileUri2(filePath) {
5153
- const absolutePath = path12.isAbsolute(filePath) ? filePath : path12.resolve(filePath);
5398
+ const absolutePath = path13.isAbsolute(filePath) ? filePath : path13.resolve(filePath);
5154
5399
  const normalizedPath = absolutePath.replace(/\\/g, "/");
5155
5400
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
5156
5401
  return `file:///${normalizedPath}`;
@@ -5163,7 +5408,7 @@ function normalizeAttachments(attachments) {
5163
5408
  }
5164
5409
  const deduped = /* @__PURE__ */ new Set();
5165
5410
  for (const attachment of attachments) {
5166
- deduped.add(path12.resolve(attachment));
5411
+ deduped.add(path13.resolve(attachment));
5167
5412
  }
5168
5413
  return Array.from(deduped);
5169
5414
  }
@@ -5172,7 +5417,7 @@ function mergeAttachments(all) {
5172
5417
  for (const list of all) {
5173
5418
  if (!list) continue;
5174
5419
  for (const inputFile of list) {
5175
- deduped.add(path12.resolve(inputFile));
5420
+ deduped.add(path13.resolve(inputFile));
5176
5421
  }
5177
5422
  }
5178
5423
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -5220,8 +5465,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
5220
5465
 
5221
5466
  // src/evaluation/providers/targets-file.ts
5222
5467
  import { constants as constants3 } from "node:fs";
5223
- import { access as access3, readFile as readFile6 } from "node:fs/promises";
5224
- import path13 from "node:path";
5468
+ import { access as access3, readFile as readFile7 } from "node:fs/promises";
5469
+ import path14 from "node:path";
5225
5470
  import { parse as parse3 } from "yaml";
5226
5471
  function isRecord(value) {
5227
5472
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -5258,11 +5503,11 @@ async function fileExists3(filePath) {
5258
5503
  }
5259
5504
  }
5260
5505
  async function readTargetDefinitions(filePath) {
5261
- const absolutePath = path13.resolve(filePath);
5506
+ const absolutePath = path14.resolve(filePath);
5262
5507
  if (!await fileExists3(absolutePath)) {
5263
5508
  throw new Error(`targets.yaml not found at ${absolutePath}`);
5264
5509
  }
5265
- const raw = await readFile6(absolutePath, "utf8");
5510
+ const raw = await readFile7(absolutePath, "utf8");
5266
5511
  const parsed = parse3(raw);
5267
5512
  if (!isRecord(parsed)) {
5268
5513
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -5469,15 +5714,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
5469
5714
  });
5470
5715
  }
5471
5716
  async function execShellWithStdin(command, stdinPayload, options = {}) {
5472
- const { mkdir: mkdir4, readFile: readFile7, rm: rm4, writeFile: writeFile4 } = await import("node:fs/promises");
5717
+ const { mkdir: mkdir4, readFile: readFile8, rm: rm4, writeFile: writeFile4 } = await import("node:fs/promises");
5473
5718
  const { tmpdir: tmpdir4 } = await import("node:os");
5474
- const path15 = await import("node:path");
5719
+ const path16 = await import("node:path");
5475
5720
  const { randomUUID: randomUUID4 } = await import("node:crypto");
5476
- const dir = path15.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
5721
+ const dir = path16.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
5477
5722
  await mkdir4(dir, { recursive: true });
5478
- const stdinPath = path15.join(dir, "stdin.txt");
5479
- const stdoutPath = path15.join(dir, "stdout.txt");
5480
- const stderrPath = path15.join(dir, "stderr.txt");
5723
+ const stdinPath = path16.join(dir, "stdin.txt");
5724
+ const stdoutPath = path16.join(dir, "stdout.txt");
5725
+ const stderrPath = path16.join(dir, "stderr.txt");
5481
5726
  await writeFile4(stdinPath, stdinPayload, "utf8");
5482
5727
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
5483
5728
  const { spawn: spawn4 } = await import("node:child_process");
@@ -5507,8 +5752,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
5507
5752
  resolve(code ?? 0);
5508
5753
  });
5509
5754
  });
5510
- const stdout = (await readFile7(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
5511
- const stderr = (await readFile7(stderrPath, "utf8")).replace(/\r\n/g, "\n");
5755
+ const stdout = (await readFile8(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
5756
+ const stderr = (await readFile8(stderrPath, "utf8")).replace(/\r\n/g, "\n");
5512
5757
  return { stdout, stderr, exitCode };
5513
5758
  } finally {
5514
5759
  await rm4(dir, { recursive: true, force: true });
@@ -5780,7 +6025,7 @@ var CodeEvaluator = class {
5780
6025
  outputMessages: context.outputMessages ?? null,
5781
6026
  guidelineFiles: context.evalCase.guideline_paths,
5782
6027
  inputFiles: context.evalCase.file_paths.filter(
5783
- (path15) => !context.evalCase.guideline_paths.includes(path15)
6028
+ (path16) => !context.evalCase.guideline_paths.includes(path16)
5784
6029
  ),
5785
6030
  inputMessages: context.evalCase.input_messages,
5786
6031
  traceSummary: context.traceSummary ?? null,
@@ -6539,115 +6784,115 @@ var FieldAccuracyEvaluator = class {
6539
6784
  * Evaluate a single field against the expected value.
6540
6785
  */
6541
6786
  evaluateField(fieldConfig, candidateData, expectedData) {
6542
- const { path: path15, match, required = true, weight = 1 } = fieldConfig;
6543
- const candidateValue = resolvePath(candidateData, path15);
6544
- const expectedValue = resolvePath(expectedData, path15);
6787
+ const { path: path16, match, required = true, weight = 1 } = fieldConfig;
6788
+ const candidateValue = resolvePath(candidateData, path16);
6789
+ const expectedValue = resolvePath(expectedData, path16);
6545
6790
  if (expectedValue === void 0) {
6546
6791
  return {
6547
- path: path15,
6792
+ path: path16,
6548
6793
  score: 1,
6549
6794
  // No expected value means no comparison needed
6550
6795
  weight,
6551
6796
  hit: true,
6552
- message: `${path15}: no expected value`
6797
+ message: `${path16}: no expected value`
6553
6798
  };
6554
6799
  }
6555
6800
  if (candidateValue === void 0) {
6556
6801
  if (required) {
6557
6802
  return {
6558
- path: path15,
6803
+ path: path16,
6559
6804
  score: 0,
6560
6805
  weight,
6561
6806
  hit: false,
6562
- message: `${path15} (required, missing)`
6807
+ message: `${path16} (required, missing)`
6563
6808
  };
6564
6809
  }
6565
6810
  return {
6566
- path: path15,
6811
+ path: path16,
6567
6812
  score: 1,
6568
6813
  // Don't penalize missing optional fields
6569
6814
  weight: 0,
6570
6815
  // Zero weight means it won't affect the score
6571
6816
  hit: true,
6572
- message: `${path15}: optional field missing`
6817
+ message: `${path16}: optional field missing`
6573
6818
  };
6574
6819
  }
6575
6820
  switch (match) {
6576
6821
  case "exact":
6577
- return this.compareExact(path15, candidateValue, expectedValue, weight);
6822
+ return this.compareExact(path16, candidateValue, expectedValue, weight);
6578
6823
  case "numeric_tolerance":
6579
6824
  return this.compareNumericTolerance(
6580
- path15,
6825
+ path16,
6581
6826
  candidateValue,
6582
6827
  expectedValue,
6583
6828
  fieldConfig,
6584
6829
  weight
6585
6830
  );
6586
6831
  case "date":
6587
- return this.compareDate(path15, candidateValue, expectedValue, fieldConfig, weight);
6832
+ return this.compareDate(path16, candidateValue, expectedValue, fieldConfig, weight);
6588
6833
  default:
6589
6834
  return {
6590
- path: path15,
6835
+ path: path16,
6591
6836
  score: 0,
6592
6837
  weight,
6593
6838
  hit: false,
6594
- message: `${path15}: unknown match type "${match}"`
6839
+ message: `${path16}: unknown match type "${match}"`
6595
6840
  };
6596
6841
  }
6597
6842
  }
6598
6843
  /**
6599
6844
  * Exact equality comparison.
6600
6845
  */
6601
- compareExact(path15, candidateValue, expectedValue, weight) {
6846
+ compareExact(path16, candidateValue, expectedValue, weight) {
6602
6847
  if (deepEqual(candidateValue, expectedValue)) {
6603
6848
  return {
6604
- path: path15,
6849
+ path: path16,
6605
6850
  score: 1,
6606
6851
  weight,
6607
6852
  hit: true,
6608
- message: path15
6853
+ message: path16
6609
6854
  };
6610
6855
  }
6611
6856
  if (typeof candidateValue !== typeof expectedValue) {
6612
6857
  return {
6613
- path: path15,
6858
+ path: path16,
6614
6859
  score: 0,
6615
6860
  weight,
6616
6861
  hit: false,
6617
- message: `${path15} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
6862
+ message: `${path16} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
6618
6863
  };
6619
6864
  }
6620
6865
  return {
6621
- path: path15,
6866
+ path: path16,
6622
6867
  score: 0,
6623
6868
  weight,
6624
6869
  hit: false,
6625
- message: `${path15} (value mismatch)`
6870
+ message: `${path16} (value mismatch)`
6626
6871
  };
6627
6872
  }
6628
6873
  /**
6629
6874
  * Numeric comparison with absolute or relative tolerance.
6630
6875
  */
6631
- compareNumericTolerance(path15, candidateValue, expectedValue, fieldConfig, weight) {
6876
+ compareNumericTolerance(path16, candidateValue, expectedValue, fieldConfig, weight) {
6632
6877
  const { tolerance = 0, relative = false } = fieldConfig;
6633
6878
  const candidateNum = toNumber(candidateValue);
6634
6879
  const expectedNum = toNumber(expectedValue);
6635
6880
  if (candidateNum === null || expectedNum === null) {
6636
6881
  return {
6637
- path: path15,
6882
+ path: path16,
6638
6883
  score: 0,
6639
6884
  weight,
6640
6885
  hit: false,
6641
- message: `${path15} (non-numeric value)`
6886
+ message: `${path16} (non-numeric value)`
6642
6887
  };
6643
6888
  }
6644
6889
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
6645
6890
  return {
6646
- path: path15,
6891
+ path: path16,
6647
6892
  score: 0,
6648
6893
  weight,
6649
6894
  hit: false,
6650
- message: `${path15} (invalid numeric value)`
6895
+ message: `${path16} (invalid numeric value)`
6651
6896
  };
6652
6897
  }
6653
6898
  const diff = Math.abs(candidateNum - expectedNum);
@@ -6660,61 +6905,61 @@ var FieldAccuracyEvaluator = class {
6660
6905
  }
6661
6906
  if (withinTolerance) {
6662
6907
  return {
6663
- path: path15,
6908
+ path: path16,
6664
6909
  score: 1,
6665
6910
  weight,
6666
6911
  hit: true,
6667
- message: `${path15} (within tolerance: diff=${diff.toFixed(2)})`
6912
+ message: `${path16} (within tolerance: diff=${diff.toFixed(2)})`
6668
6913
  };
6669
6914
  }
6670
6915
  return {
6671
- path: path15,
6916
+ path: path16,
6672
6917
  score: 0,
6673
6918
  weight,
6674
6919
  hit: false,
6675
- message: `${path15} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
6920
+ message: `${path16} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
6676
6921
  };
6677
6922
  }
6678
6923
  /**
6679
6924
  * Date comparison with format normalization.
6680
6925
  */
6681
- compareDate(path15, candidateValue, expectedValue, fieldConfig, weight) {
6926
+ compareDate(path16, candidateValue, expectedValue, fieldConfig, weight) {
6682
6927
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
6683
6928
  const candidateDate = parseDate(String(candidateValue), formats);
6684
6929
  const expectedDate = parseDate(String(expectedValue), formats);
6685
6930
  if (candidateDate === null) {
6686
6931
  return {
6687
- path: path15,
6932
+ path: path16,
6688
6933
  score: 0,
6689
6934
  weight,
6690
6935
  hit: false,
6691
- message: `${path15} (unparseable candidate date)`
6936
+ message: `${path16} (unparseable candidate date)`
6692
6937
  };
6693
6938
  }
6694
6939
  if (expectedDate === null) {
6695
6940
  return {
6696
- path: path15,
6941
+ path: path16,
6697
6942
  score: 0,
6698
6943
  weight,
6699
6944
  hit: false,
6700
- message: `${path15} (unparseable expected date)`
6945
+ message: `${path16} (unparseable expected date)`
6701
6946
  };
6702
6947
  }
6703
6948
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
6704
6949
  return {
6705
- path: path15,
6950
+ path: path16,
6706
6951
  score: 1,
6707
6952
  weight,
6708
6953
  hit: true,
6709
- message: path15
6954
+ message: path16
6710
6955
  };
6711
6956
  }
6712
6957
  return {
6713
- path: path15,
6958
+ path: path16,
6714
6959
  score: 0,
6715
6960
  weight,
6716
6961
  hit: false,
6717
- message: `${path15} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
6962
+ message: `${path16} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
6718
6963
  };
6719
6964
  }
6720
6965
  /**
@@ -6754,11 +6999,11 @@ var FieldAccuracyEvaluator = class {
6754
6999
  };
6755
7000
  }
6756
7001
  };
6757
- function resolvePath(obj, path15) {
6758
- if (!path15 || !obj) {
7002
+ function resolvePath(obj, path16) {
7003
+ if (!path16 || !obj) {
6759
7004
  return void 0;
6760
7005
  }
6761
- const parts = path15.split(/\.|\[|\]/).filter((p) => p.length > 0);
7006
+ const parts = path16.split(/\.|\[|\]/).filter((p) => p.length > 0);
6762
7007
  let current = obj;
6763
7008
  for (const part of parts) {
6764
7009
  if (current === null || current === void 0) {
@@ -7194,7 +7439,7 @@ var ToolTrajectoryEvaluator = class {
7194
7439
 
7195
7440
  // src/evaluation/orchestrator.ts
7196
7441
  import { createHash } from "node:crypto";
7197
- import path14 from "node:path";
7442
+ import path15 from "node:path";
7198
7443
 
7199
7444
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
7200
7445
  var Node = class {
@@ -7993,7 +8238,7 @@ async function runEvaluatorList(options) {
7993
8238
  });
7994
8239
  }
7995
8240
  if (evaluator.type === "composite") {
7996
- const evalFileDir = evalCase.guideline_paths[0] ? path14.dirname(evalCase.guideline_paths[0]) : process.cwd();
8241
+ const evalFileDir = evalCase.guideline_paths[0] ? path15.dirname(evalCase.guideline_paths[0]) : process.cwd();
7997
8242
  const createEvaluator = (memberConfig) => {
7998
8243
  switch (memberConfig.type) {
7999
8244
  case "llm_judge":
@@ -8567,6 +8812,7 @@ export {
8567
8812
  createAgentKernel,
8568
8813
  createProvider,
8569
8814
  deepEqual,
8815
+ detectFormat,
8570
8816
  ensureVSCodeSubagents,
8571
8817
  executeScript,
8572
8818
  explorationRatio,