@agentv/core 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +456 -202
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +8 -2
- package/dist/index.d.ts +8 -2
- package/dist/index.js +403 -150
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -146,8 +146,8 @@ function mergeExecutionMetrics(summary, metrics) {
|
|
|
146
146
|
}
|
|
147
147
|
|
|
148
148
|
// src/evaluation/yaml-parser.ts
|
|
149
|
-
import { readFile as
|
|
150
|
-
import
|
|
149
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
150
|
+
import path7 from "node:path";
|
|
151
151
|
import { parse as parse2 } from "yaml";
|
|
152
152
|
|
|
153
153
|
// src/evaluation/loaders/config-loader.ts
|
|
@@ -926,6 +926,11 @@ function isValidFieldAggregationType(value) {
|
|
|
926
926
|
return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
|
|
927
927
|
}
|
|
928
928
|
|
|
929
|
+
// src/evaluation/loaders/jsonl-parser.ts
|
|
930
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
931
|
+
import path5 from "node:path";
|
|
932
|
+
import { parse as parseYaml } from "yaml";
|
|
933
|
+
|
|
929
934
|
// src/evaluation/loaders/message-processor.ts
|
|
930
935
|
import { readFile as readFile3 } from "node:fs/promises";
|
|
931
936
|
import path4 from "node:path";
|
|
@@ -1186,28 +1191,271 @@ async function processExpectedMessages(options) {
|
|
|
1186
1191
|
return segments;
|
|
1187
1192
|
}
|
|
1188
1193
|
|
|
1189
|
-
// src/evaluation/
|
|
1190
|
-
import { readFile as readFile4 } from "node:fs/promises";
|
|
1191
|
-
import path5 from "node:path";
|
|
1194
|
+
// src/evaluation/loaders/jsonl-parser.ts
|
|
1192
1195
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
1196
|
+
var ANSI_RED = "\x1B[31m";
|
|
1193
1197
|
var ANSI_RESET5 = "\x1B[0m";
|
|
1198
|
+
function detectFormat(filePath) {
|
|
1199
|
+
const ext = path5.extname(filePath).toLowerCase();
|
|
1200
|
+
if (ext === ".jsonl") return "jsonl";
|
|
1201
|
+
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
1202
|
+
throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
|
|
1203
|
+
}
|
|
1204
|
+
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
1205
|
+
const dir = path5.dirname(jsonlPath);
|
|
1206
|
+
const base = path5.basename(jsonlPath, ".jsonl");
|
|
1207
|
+
const sidecarPath = path5.join(dir, `${base}.yaml`);
|
|
1208
|
+
if (!await fileExists2(sidecarPath)) {
|
|
1209
|
+
if (verbose) {
|
|
1210
|
+
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
1211
|
+
}
|
|
1212
|
+
return {};
|
|
1213
|
+
}
|
|
1214
|
+
try {
|
|
1215
|
+
const content = await readFile4(sidecarPath, "utf8");
|
|
1216
|
+
const parsed = parseYaml(content);
|
|
1217
|
+
if (!isJsonObject(parsed)) {
|
|
1218
|
+
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
1219
|
+
return {};
|
|
1220
|
+
}
|
|
1221
|
+
return {
|
|
1222
|
+
description: asString4(parsed.description),
|
|
1223
|
+
dataset: asString4(parsed.dataset),
|
|
1224
|
+
execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
|
|
1225
|
+
evaluator: parsed.evaluator
|
|
1226
|
+
};
|
|
1227
|
+
} catch (error) {
|
|
1228
|
+
logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error.message}`);
|
|
1229
|
+
return {};
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
function parseJsonlContent(content, filePath) {
|
|
1233
|
+
const lines = content.split("\n");
|
|
1234
|
+
const cases = [];
|
|
1235
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1236
|
+
const line = lines[i].trim();
|
|
1237
|
+
if (line === "") continue;
|
|
1238
|
+
try {
|
|
1239
|
+
const parsed = JSON.parse(line);
|
|
1240
|
+
if (!isJsonObject(parsed)) {
|
|
1241
|
+
throw new Error("Expected JSON object");
|
|
1242
|
+
}
|
|
1243
|
+
cases.push(parsed);
|
|
1244
|
+
} catch (error) {
|
|
1245
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1246
|
+
throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
|
|
1247
|
+
File: ${filePath}`);
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
return cases;
|
|
1251
|
+
}
|
|
1252
|
+
async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
|
|
1253
|
+
const verbose = options?.verbose ?? false;
|
|
1254
|
+
const evalIdFilter = options?.evalId;
|
|
1255
|
+
const absoluteTestPath = path5.resolve(evalFilePath);
|
|
1256
|
+
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
1257
|
+
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
1258
|
+
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
1259
|
+
const guidelinePatterns = config?.guideline_patterns;
|
|
1260
|
+
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
1261
|
+
const rawFile = await readFile4(absoluteTestPath, "utf8");
|
|
1262
|
+
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
1263
|
+
const fallbackDataset = path5.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
1264
|
+
const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
|
|
1265
|
+
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
|
|
1266
|
+
const globalExecution = sidecar.execution;
|
|
1267
|
+
if (verbose) {
|
|
1268
|
+
console.log(`
|
|
1269
|
+
[JSONL Dataset: ${evalFilePath}]`);
|
|
1270
|
+
console.log(` Cases: ${rawCases.length}`);
|
|
1271
|
+
console.log(` Dataset name: ${datasetName}`);
|
|
1272
|
+
if (sidecar.description) {
|
|
1273
|
+
console.log(` Description: ${sidecar.description}`);
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
const results = [];
|
|
1277
|
+
for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
|
|
1278
|
+
const evalcase = rawCases[lineIndex];
|
|
1279
|
+
const lineNumber = lineIndex + 1;
|
|
1280
|
+
const id = asString4(evalcase.id);
|
|
1281
|
+
if (evalIdFilter && id !== evalIdFilter) {
|
|
1282
|
+
continue;
|
|
1283
|
+
}
|
|
1284
|
+
const conversationId = asString4(evalcase.conversation_id);
|
|
1285
|
+
const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
|
|
1286
|
+
const inputMessagesValue = evalcase.input_messages;
|
|
1287
|
+
const expectedMessagesValue = evalcase.expected_messages;
|
|
1288
|
+
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
1289
|
+
logError(
|
|
1290
|
+
`Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages`
|
|
1291
|
+
);
|
|
1292
|
+
continue;
|
|
1293
|
+
}
|
|
1294
|
+
const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
|
|
1295
|
+
const inputMessages = inputMessagesValue.filter(
|
|
1296
|
+
(msg) => isTestMessage(msg)
|
|
1297
|
+
);
|
|
1298
|
+
const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
|
|
1299
|
+
if (hasExpectedMessages && expectedMessages.length === 0) {
|
|
1300
|
+
logError(`Line ${lineNumber}: No valid expected message found for eval case: ${id}`);
|
|
1301
|
+
continue;
|
|
1302
|
+
}
|
|
1303
|
+
const guidelinePaths = [];
|
|
1304
|
+
const inputTextParts = [];
|
|
1305
|
+
const inputSegments = await processMessages({
|
|
1306
|
+
messages: inputMessages,
|
|
1307
|
+
searchRoots,
|
|
1308
|
+
repoRootPath,
|
|
1309
|
+
guidelinePatterns,
|
|
1310
|
+
guidelinePaths,
|
|
1311
|
+
textParts: inputTextParts,
|
|
1312
|
+
messageType: "input",
|
|
1313
|
+
verbose
|
|
1314
|
+
});
|
|
1315
|
+
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
1316
|
+
messages: expectedMessages,
|
|
1317
|
+
searchRoots,
|
|
1318
|
+
repoRootPath,
|
|
1319
|
+
verbose
|
|
1320
|
+
}) : [];
|
|
1321
|
+
let referenceAnswer = "";
|
|
1322
|
+
if (outputSegments.length > 0) {
|
|
1323
|
+
const lastMessage = outputSegments[outputSegments.length - 1];
|
|
1324
|
+
const content = lastMessage.content;
|
|
1325
|
+
const toolCalls = lastMessage.tool_calls;
|
|
1326
|
+
if (typeof content === "string") {
|
|
1327
|
+
referenceAnswer = content;
|
|
1328
|
+
} else if (content !== void 0 && content !== null) {
|
|
1329
|
+
referenceAnswer = JSON.stringify(content, null, 2);
|
|
1330
|
+
} else if (toolCalls !== void 0 && toolCalls !== null) {
|
|
1331
|
+
referenceAnswer = JSON.stringify(toolCalls, null, 2);
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
1335
|
+
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
1336
|
+
const mergedExecution = caseExecution ?? globalExecution;
|
|
1337
|
+
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
1338
|
+
let evaluators;
|
|
1339
|
+
try {
|
|
1340
|
+
evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
|
|
1341
|
+
} catch (error) {
|
|
1342
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1343
|
+
logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
|
|
1344
|
+
continue;
|
|
1345
|
+
}
|
|
1346
|
+
const inlineRubrics = evalcase.rubrics;
|
|
1347
|
+
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
1348
|
+
const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
|
|
1349
|
+
if (typeof rubric === "string") {
|
|
1350
|
+
return {
|
|
1351
|
+
id: `rubric-${index + 1}`,
|
|
1352
|
+
description: rubric,
|
|
1353
|
+
weight: 1,
|
|
1354
|
+
required: true
|
|
1355
|
+
};
|
|
1356
|
+
}
|
|
1357
|
+
return {
|
|
1358
|
+
id: asString4(rubric.id) ?? `rubric-${index + 1}`,
|
|
1359
|
+
description: asString4(rubric.description) ?? "",
|
|
1360
|
+
weight: typeof rubric.weight === "number" ? rubric.weight : 1,
|
|
1361
|
+
required: typeof rubric.required === "boolean" ? rubric.required : true
|
|
1362
|
+
};
|
|
1363
|
+
}).filter((r) => r.description.length > 0);
|
|
1364
|
+
if (rubricItems.length > 0) {
|
|
1365
|
+
const rubricEvaluator = {
|
|
1366
|
+
name: "rubric",
|
|
1367
|
+
type: "llm_judge",
|
|
1368
|
+
rubrics: rubricItems
|
|
1369
|
+
};
|
|
1370
|
+
evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
1373
|
+
const userFilePaths = [];
|
|
1374
|
+
for (const segment of inputSegments) {
|
|
1375
|
+
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
1376
|
+
userFilePaths.push(segment.resolvedPath);
|
|
1377
|
+
}
|
|
1378
|
+
}
|
|
1379
|
+
const allFilePaths = [
|
|
1380
|
+
...guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
|
|
1381
|
+
...userFilePaths
|
|
1382
|
+
];
|
|
1383
|
+
const testCase = {
|
|
1384
|
+
id,
|
|
1385
|
+
dataset: datasetName,
|
|
1386
|
+
conversation_id: conversationId,
|
|
1387
|
+
question,
|
|
1388
|
+
input_messages: inputMessages,
|
|
1389
|
+
input_segments: inputSegments,
|
|
1390
|
+
expected_messages: outputSegments,
|
|
1391
|
+
reference_answer: referenceAnswer,
|
|
1392
|
+
guideline_paths: guidelinePaths.map((guidelinePath) => path5.resolve(guidelinePath)),
|
|
1393
|
+
guideline_patterns: guidelinePatterns,
|
|
1394
|
+
file_paths: allFilePaths,
|
|
1395
|
+
expected_outcome: outcome,
|
|
1396
|
+
evaluator: evalCaseEvaluatorKind,
|
|
1397
|
+
evaluators
|
|
1398
|
+
};
|
|
1399
|
+
if (verbose) {
|
|
1400
|
+
console.log(`
|
|
1401
|
+
[Eval Case: ${id}]`);
|
|
1402
|
+
if (testCase.guideline_paths.length > 0) {
|
|
1403
|
+
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
1404
|
+
for (const guidelinePath of testCase.guideline_paths) {
|
|
1405
|
+
console.log(` - ${guidelinePath}`);
|
|
1406
|
+
}
|
|
1407
|
+
} else {
|
|
1408
|
+
console.log(" No guidelines found");
|
|
1409
|
+
}
|
|
1410
|
+
}
|
|
1411
|
+
results.push(testCase);
|
|
1412
|
+
}
|
|
1413
|
+
return results;
|
|
1414
|
+
}
|
|
1415
|
+
function asString4(value) {
|
|
1416
|
+
return typeof value === "string" ? value : void 0;
|
|
1417
|
+
}
|
|
1418
|
+
function logWarning4(message, details) {
|
|
1419
|
+
if (details && details.length > 0) {
|
|
1420
|
+
const detailBlock = details.join("\n");
|
|
1421
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}
|
|
1422
|
+
${detailBlock}${ANSI_RESET5}`);
|
|
1423
|
+
} else {
|
|
1424
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
function logError(message, details) {
|
|
1428
|
+
if (details && details.length > 0) {
|
|
1429
|
+
const detailBlock = details.join("\n");
|
|
1430
|
+
console.error(`${ANSI_RED}Error: ${message}
|
|
1431
|
+
${detailBlock}${ANSI_RESET5}`);
|
|
1432
|
+
} else {
|
|
1433
|
+
console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
// src/evaluation/formatting/prompt-builder.ts
|
|
1438
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
1439
|
+
import path6 from "node:path";
|
|
1440
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
1441
|
+
var ANSI_RESET6 = "\x1B[0m";
|
|
1194
1442
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
1195
1443
|
const guidelineParts = [];
|
|
1196
1444
|
for (const rawPath of testCase.guideline_paths) {
|
|
1197
|
-
const absolutePath =
|
|
1445
|
+
const absolutePath = path6.resolve(rawPath);
|
|
1198
1446
|
if (!await fileExists2(absolutePath)) {
|
|
1199
|
-
|
|
1447
|
+
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
1200
1448
|
continue;
|
|
1201
1449
|
}
|
|
1202
1450
|
try {
|
|
1203
|
-
const content = (await
|
|
1451
|
+
const content = (await readFile5(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
1204
1452
|
guidelineParts.push({
|
|
1205
1453
|
content,
|
|
1206
1454
|
isFile: true,
|
|
1207
|
-
displayPath:
|
|
1455
|
+
displayPath: path6.basename(absolutePath)
|
|
1208
1456
|
});
|
|
1209
1457
|
} catch (error) {
|
|
1210
|
-
|
|
1458
|
+
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
1211
1459
|
}
|
|
1212
1460
|
}
|
|
1213
1461
|
const guidelines = formatFileContents(guidelineParts);
|
|
@@ -1231,9 +1479,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
1231
1479
|
messageSegments.push({ type: "text", value: segment });
|
|
1232
1480
|
}
|
|
1233
1481
|
} else if (isJsonObject(segment)) {
|
|
1234
|
-
const type =
|
|
1482
|
+
const type = asString5(segment.type);
|
|
1235
1483
|
if (type === "file") {
|
|
1236
|
-
const value =
|
|
1484
|
+
const value = asString5(segment.value);
|
|
1237
1485
|
if (!value) continue;
|
|
1238
1486
|
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
1239
1487
|
messageSegments.push({ type: "guideline_ref", path: value });
|
|
@@ -1244,7 +1492,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
1244
1492
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
1245
1493
|
}
|
|
1246
1494
|
} else if (type === "text") {
|
|
1247
|
-
const textValue =
|
|
1495
|
+
const textValue = asString5(segment.value);
|
|
1248
1496
|
if (textValue && textValue.trim().length > 0) {
|
|
1249
1497
|
messageSegments.push({ type: "text", value: textValue });
|
|
1250
1498
|
}
|
|
@@ -1398,21 +1646,21 @@ ${guidelineContent.trim()}`);
|
|
|
1398
1646
|
}
|
|
1399
1647
|
return chatPrompt.length > 0 ? chatPrompt : void 0;
|
|
1400
1648
|
}
|
|
1401
|
-
function
|
|
1649
|
+
function asString5(value) {
|
|
1402
1650
|
return typeof value === "string" ? value : void 0;
|
|
1403
1651
|
}
|
|
1404
|
-
function
|
|
1405
|
-
console.warn(`${
|
|
1652
|
+
function logWarning5(message) {
|
|
1653
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
|
|
1406
1654
|
}
|
|
1407
1655
|
|
|
1408
1656
|
// src/evaluation/yaml-parser.ts
|
|
1409
|
-
var
|
|
1410
|
-
var
|
|
1411
|
-
var
|
|
1657
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
1658
|
+
var ANSI_RED2 = "\x1B[31m";
|
|
1659
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
1412
1660
|
async function readTestSuiteMetadata(testFilePath) {
|
|
1413
1661
|
try {
|
|
1414
|
-
const absolutePath =
|
|
1415
|
-
const content = await
|
|
1662
|
+
const absolutePath = path7.resolve(testFilePath);
|
|
1663
|
+
const content = await readFile6(absolutePath, "utf8");
|
|
1416
1664
|
const parsed = parse2(content);
|
|
1417
1665
|
if (!isJsonObject(parsed)) {
|
|
1418
1666
|
return {};
|
|
@@ -1423,21 +1671,25 @@ async function readTestSuiteMetadata(testFilePath) {
|
|
|
1423
1671
|
}
|
|
1424
1672
|
}
|
|
1425
1673
|
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
1674
|
+
const format = detectFormat(evalFilePath);
|
|
1675
|
+
if (format === "jsonl") {
|
|
1676
|
+
return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
|
|
1677
|
+
}
|
|
1426
1678
|
const verbose = options?.verbose ?? false;
|
|
1427
1679
|
const evalIdFilter = options?.evalId;
|
|
1428
|
-
const absoluteTestPath =
|
|
1680
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
1429
1681
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
1430
1682
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
1431
1683
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
1432
1684
|
const guidelinePatterns = config?.guideline_patterns;
|
|
1433
|
-
const rawFile = await
|
|
1685
|
+
const rawFile = await readFile6(absoluteTestPath, "utf8");
|
|
1434
1686
|
const parsed = parse2(rawFile);
|
|
1435
1687
|
if (!isJsonObject(parsed)) {
|
|
1436
1688
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
1437
1689
|
}
|
|
1438
1690
|
const suite = parsed;
|
|
1439
|
-
const datasetNameFromSuite =
|
|
1440
|
-
const fallbackDataset =
|
|
1691
|
+
const datasetNameFromSuite = asString6(suite.dataset)?.trim();
|
|
1692
|
+
const fallbackDataset = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
1441
1693
|
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
1442
1694
|
const rawTestcases = suite.evalcases;
|
|
1443
1695
|
if (!Array.isArray(rawTestcases)) {
|
|
@@ -1445,24 +1697,24 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
1445
1697
|
}
|
|
1446
1698
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
|
|
1447
1699
|
const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
1448
|
-
const _globalTarget =
|
|
1700
|
+
const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
|
|
1449
1701
|
const results = [];
|
|
1450
1702
|
for (const rawEvalcase of rawTestcases) {
|
|
1451
1703
|
if (!isJsonObject(rawEvalcase)) {
|
|
1452
|
-
|
|
1704
|
+
logWarning6("Skipping invalid eval case entry (expected object)");
|
|
1453
1705
|
continue;
|
|
1454
1706
|
}
|
|
1455
1707
|
const evalcase = rawEvalcase;
|
|
1456
|
-
const id =
|
|
1708
|
+
const id = asString6(evalcase.id);
|
|
1457
1709
|
if (evalIdFilter && id !== evalIdFilter) {
|
|
1458
1710
|
continue;
|
|
1459
1711
|
}
|
|
1460
|
-
const conversationId =
|
|
1461
|
-
const outcome =
|
|
1712
|
+
const conversationId = asString6(evalcase.conversation_id);
|
|
1713
|
+
const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
|
|
1462
1714
|
const inputMessagesValue = evalcase.input_messages;
|
|
1463
1715
|
const expectedMessagesValue = evalcase.expected_messages;
|
|
1464
1716
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
1465
|
-
|
|
1717
|
+
logError2(
|
|
1466
1718
|
`Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
|
|
1467
1719
|
);
|
|
1468
1720
|
continue;
|
|
@@ -1473,7 +1725,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
1473
1725
|
);
|
|
1474
1726
|
const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
|
|
1475
1727
|
if (hasExpectedMessages && expectedMessages.length === 0) {
|
|
1476
|
-
|
|
1728
|
+
logError2(`No valid expected message found for eval case: ${id}`);
|
|
1477
1729
|
continue;
|
|
1478
1730
|
}
|
|
1479
1731
|
const guidelinePaths = [];
|
|
@@ -1514,7 +1766,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
1514
1766
|
evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
|
|
1515
1767
|
} catch (error) {
|
|
1516
1768
|
const message = error instanceof Error ? error.message : String(error);
|
|
1517
|
-
|
|
1769
|
+
logError2(`Skipping eval case '${id}': ${message}`);
|
|
1518
1770
|
continue;
|
|
1519
1771
|
}
|
|
1520
1772
|
const inlineRubrics = evalcase.rubrics;
|
|
@@ -1529,8 +1781,8 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
1529
1781
|
};
|
|
1530
1782
|
}
|
|
1531
1783
|
return {
|
|
1532
|
-
id:
|
|
1533
|
-
description:
|
|
1784
|
+
id: asString6(rubric.id) ?? `rubric-${index + 1}`,
|
|
1785
|
+
description: asString6(rubric.description) ?? "",
|
|
1534
1786
|
weight: typeof rubric.weight === "number" ? rubric.weight : 1,
|
|
1535
1787
|
required: typeof rubric.required === "boolean" ? rubric.required : true
|
|
1536
1788
|
};
|
|
@@ -1551,7 +1803,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
1551
1803
|
}
|
|
1552
1804
|
}
|
|
1553
1805
|
const allFilePaths = [
|
|
1554
|
-
...guidelinePaths.map((guidelinePath) =>
|
|
1806
|
+
...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
1555
1807
|
...userFilePaths
|
|
1556
1808
|
];
|
|
1557
1809
|
const testCase = {
|
|
@@ -1563,7 +1815,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
1563
1815
|
input_segments: inputSegments,
|
|
1564
1816
|
expected_messages: outputSegments,
|
|
1565
1817
|
reference_answer: referenceAnswer,
|
|
1566
|
-
guideline_paths: guidelinePaths.map((guidelinePath) =>
|
|
1818
|
+
guideline_paths: guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
1567
1819
|
guideline_patterns: guidelinePatterns,
|
|
1568
1820
|
file_paths: allFilePaths,
|
|
1569
1821
|
expected_outcome: outcome,
|
|
@@ -1586,25 +1838,25 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
1586
1838
|
}
|
|
1587
1839
|
return results;
|
|
1588
1840
|
}
|
|
1589
|
-
function
|
|
1841
|
+
function asString6(value) {
|
|
1590
1842
|
return typeof value === "string" ? value : void 0;
|
|
1591
1843
|
}
|
|
1592
|
-
function
|
|
1844
|
+
function logWarning6(message, details) {
|
|
1593
1845
|
if (details && details.length > 0) {
|
|
1594
1846
|
const detailBlock = details.join("\n");
|
|
1595
|
-
console.warn(`${
|
|
1596
|
-
${detailBlock}${
|
|
1847
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}
|
|
1848
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
1597
1849
|
} else {
|
|
1598
|
-
console.warn(`${
|
|
1850
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
|
|
1599
1851
|
}
|
|
1600
1852
|
}
|
|
1601
|
-
function
|
|
1853
|
+
function logError2(message, details) {
|
|
1602
1854
|
if (details && details.length > 0) {
|
|
1603
1855
|
const detailBlock = details.join("\n");
|
|
1604
|
-
console.error(`${
|
|
1605
|
-
${detailBlock}${
|
|
1856
|
+
console.error(`${ANSI_RED2}Error: ${message}
|
|
1857
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
1606
1858
|
} else {
|
|
1607
|
-
console.error(`${
|
|
1859
|
+
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
|
|
1608
1860
|
}
|
|
1609
1861
|
}
|
|
1610
1862
|
|
|
@@ -1947,7 +2199,7 @@ import { randomUUID } from "node:crypto";
|
|
|
1947
2199
|
import { createWriteStream } from "node:fs";
|
|
1948
2200
|
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
1949
2201
|
import { tmpdir } from "node:os";
|
|
1950
|
-
import
|
|
2202
|
+
import path9 from "node:path";
|
|
1951
2203
|
|
|
1952
2204
|
// src/evaluation/providers/claude-code-log-tracker.ts
|
|
1953
2205
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeCodeLogs");
|
|
@@ -2003,7 +2255,7 @@ function subscribeToClaudeCodeLogEntries(listener) {
|
|
|
2003
2255
|
}
|
|
2004
2256
|
|
|
2005
2257
|
// src/evaluation/providers/preread.ts
|
|
2006
|
-
import
|
|
2258
|
+
import path8 from "node:path";
|
|
2007
2259
|
function buildPromptDocument(request, inputFiles, options) {
|
|
2008
2260
|
const parts = [];
|
|
2009
2261
|
const guidelineFiles = collectGuidelineFiles(
|
|
@@ -2026,7 +2278,7 @@ function normalizeInputFiles(inputFiles) {
|
|
|
2026
2278
|
}
|
|
2027
2279
|
const deduped = /* @__PURE__ */ new Map();
|
|
2028
2280
|
for (const inputFile of inputFiles) {
|
|
2029
|
-
const absolutePath =
|
|
2281
|
+
const absolutePath = path8.resolve(inputFile);
|
|
2030
2282
|
if (!deduped.has(absolutePath)) {
|
|
2031
2283
|
deduped.set(absolutePath, absolutePath);
|
|
2032
2284
|
}
|
|
@@ -2039,14 +2291,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
|
2039
2291
|
}
|
|
2040
2292
|
const unique = /* @__PURE__ */ new Map();
|
|
2041
2293
|
for (const inputFile of inputFiles) {
|
|
2042
|
-
const absolutePath =
|
|
2294
|
+
const absolutePath = path8.resolve(inputFile);
|
|
2043
2295
|
if (overrides?.has(absolutePath)) {
|
|
2044
2296
|
if (!unique.has(absolutePath)) {
|
|
2045
2297
|
unique.set(absolutePath, absolutePath);
|
|
2046
2298
|
}
|
|
2047
2299
|
continue;
|
|
2048
2300
|
}
|
|
2049
|
-
const normalized = absolutePath.split(
|
|
2301
|
+
const normalized = absolutePath.split(path8.sep).join("/");
|
|
2050
2302
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
2051
2303
|
if (!unique.has(absolutePath)) {
|
|
2052
2304
|
unique.set(absolutePath, absolutePath);
|
|
@@ -2061,7 +2313,7 @@ function collectInputFiles(inputFiles) {
|
|
|
2061
2313
|
}
|
|
2062
2314
|
const unique = /* @__PURE__ */ new Map();
|
|
2063
2315
|
for (const inputFile of inputFiles) {
|
|
2064
|
-
const absolutePath =
|
|
2316
|
+
const absolutePath = path8.resolve(inputFile);
|
|
2065
2317
|
if (!unique.has(absolutePath)) {
|
|
2066
2318
|
unique.set(absolutePath, absolutePath);
|
|
2067
2319
|
}
|
|
@@ -2073,7 +2325,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
|
|
|
2073
2325
|
return "";
|
|
2074
2326
|
}
|
|
2075
2327
|
const buildList = (files) => files.map((absolutePath) => {
|
|
2076
|
-
const fileName =
|
|
2328
|
+
const fileName = path8.basename(absolutePath);
|
|
2077
2329
|
const fileUri = pathToFileUri(absolutePath);
|
|
2078
2330
|
return `* [${fileName}](${fileUri})`;
|
|
2079
2331
|
});
|
|
@@ -2093,7 +2345,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
2093
2345
|
return sections.join("\n");
|
|
2094
2346
|
}
|
|
2095
2347
|
function pathToFileUri(filePath) {
|
|
2096
|
-
const absolutePath =
|
|
2348
|
+
const absolutePath = path8.isAbsolute(filePath) ? filePath : path8.resolve(filePath);
|
|
2097
2349
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
2098
2350
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
2099
2351
|
return `file:///${normalizedPath}`;
|
|
@@ -2130,7 +2382,7 @@ var ClaudeCodeProvider = class {
|
|
|
2130
2382
|
const workspaceRoot = await this.createWorkspace();
|
|
2131
2383
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
2132
2384
|
try {
|
|
2133
|
-
const promptFile =
|
|
2385
|
+
const promptFile = path9.join(workspaceRoot, PROMPT_FILENAME);
|
|
2134
2386
|
await writeFile(promptFile, request.question, "utf8");
|
|
2135
2387
|
const args = this.buildClaudeCodeArgs(request.question, inputFiles);
|
|
2136
2388
|
const cwd = this.resolveCwd();
|
|
@@ -2178,7 +2430,7 @@ var ClaudeCodeProvider = class {
|
|
|
2178
2430
|
if (!this.config.cwd) {
|
|
2179
2431
|
return process.cwd();
|
|
2180
2432
|
}
|
|
2181
|
-
return
|
|
2433
|
+
return path9.resolve(this.config.cwd);
|
|
2182
2434
|
}
|
|
2183
2435
|
buildClaudeCodeArgs(prompt, inputFiles) {
|
|
2184
2436
|
const args = [];
|
|
@@ -2235,7 +2487,7 @@ ${filesContext}`;
|
|
|
2235
2487
|
}
|
|
2236
2488
|
}
|
|
2237
2489
|
async createWorkspace() {
|
|
2238
|
-
return await mkdtemp(
|
|
2490
|
+
return await mkdtemp(path9.join(tmpdir(), WORKSPACE_PREFIX));
|
|
2239
2491
|
}
|
|
2240
2492
|
async cleanupWorkspace(workspaceRoot) {
|
|
2241
2493
|
try {
|
|
@@ -2249,9 +2501,9 @@ ${filesContext}`;
|
|
|
2249
2501
|
return void 0;
|
|
2250
2502
|
}
|
|
2251
2503
|
if (this.config.logDir) {
|
|
2252
|
-
return
|
|
2504
|
+
return path9.resolve(this.config.logDir);
|
|
2253
2505
|
}
|
|
2254
|
-
return
|
|
2506
|
+
return path9.join(process.cwd(), ".agentv", "logs", "claude-code");
|
|
2255
2507
|
}
|
|
2256
2508
|
async createStreamLogger(request) {
|
|
2257
2509
|
const logDir = this.resolveLogDirectory();
|
|
@@ -2265,7 +2517,7 @@ ${filesContext}`;
|
|
|
2265
2517
|
console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
|
|
2266
2518
|
return void 0;
|
|
2267
2519
|
}
|
|
2268
|
-
const filePath =
|
|
2520
|
+
const filePath = path9.join(logDir, buildLogFilename(request, this.targetName));
|
|
2269
2521
|
try {
|
|
2270
2522
|
const logger = await ClaudeCodeStreamLogger.create({
|
|
2271
2523
|
filePath,
|
|
@@ -2670,10 +2922,10 @@ function escapeShellArg(arg) {
|
|
|
2670
2922
|
}
|
|
2671
2923
|
async function defaultClaudeCodeRunner(options) {
|
|
2672
2924
|
const tempId = randomUUID();
|
|
2673
|
-
const stdoutFile =
|
|
2674
|
-
const stderrFile =
|
|
2675
|
-
const exitFile =
|
|
2676
|
-
const pidFile =
|
|
2925
|
+
const stdoutFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
|
|
2926
|
+
const stderrFile = path9.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
|
|
2927
|
+
const exitFile = path9.join(tmpdir(), `agentv-cc-${tempId}-exit`);
|
|
2928
|
+
const pidFile = path9.join(tmpdir(), `agentv-cc-${tempId}-pid`);
|
|
2677
2929
|
try {
|
|
2678
2930
|
return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
|
|
2679
2931
|
} finally {
|
|
@@ -2713,8 +2965,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
|
|
|
2713
2965
|
let lastStdoutSize = 0;
|
|
2714
2966
|
const readFileIfExists = async (filePath) => {
|
|
2715
2967
|
try {
|
|
2716
|
-
const { readFile:
|
|
2717
|
-
return await
|
|
2968
|
+
const { readFile: readFile8 } = await import("node:fs/promises");
|
|
2969
|
+
return await readFile8(filePath, "utf8");
|
|
2718
2970
|
} catch {
|
|
2719
2971
|
return "";
|
|
2720
2972
|
}
|
|
@@ -2789,7 +3041,7 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
|
|
|
2789
3041
|
import { exec as execWithCallback } from "node:child_process";
|
|
2790
3042
|
import fs from "node:fs/promises";
|
|
2791
3043
|
import os from "node:os";
|
|
2792
|
-
import
|
|
3044
|
+
import path10 from "node:path";
|
|
2793
3045
|
import { promisify } from "node:util";
|
|
2794
3046
|
import { z } from "zod";
|
|
2795
3047
|
var ToolCallSchema = z.object({
|
|
@@ -3246,7 +3498,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
3246
3498
|
}
|
|
3247
3499
|
const unique = /* @__PURE__ */ new Map();
|
|
3248
3500
|
for (const inputFile of inputFiles) {
|
|
3249
|
-
const absolutePath =
|
|
3501
|
+
const absolutePath = path10.resolve(inputFile);
|
|
3250
3502
|
if (!unique.has(absolutePath)) {
|
|
3251
3503
|
unique.set(absolutePath, absolutePath);
|
|
3252
3504
|
}
|
|
@@ -3260,7 +3512,7 @@ function formatFileList(files, template) {
|
|
|
3260
3512
|
const formatter = template ?? "{path}";
|
|
3261
3513
|
return files.map((filePath) => {
|
|
3262
3514
|
const escapedPath = shellEscape(filePath);
|
|
3263
|
-
const escapedName = shellEscape(
|
|
3515
|
+
const escapedName = shellEscape(path10.basename(filePath));
|
|
3264
3516
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
3265
3517
|
}).join(" ");
|
|
3266
3518
|
}
|
|
@@ -3284,7 +3536,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
3284
3536
|
const safeEvalId = evalCaseId || "unknown";
|
|
3285
3537
|
const timestamp = Date.now();
|
|
3286
3538
|
const random = Math.random().toString(36).substring(2, 9);
|
|
3287
|
-
return
|
|
3539
|
+
return path10.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
3288
3540
|
}
|
|
3289
3541
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
3290
3542
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -3300,7 +3552,7 @@ import { randomUUID as randomUUID2 } from "node:crypto";
|
|
|
3300
3552
|
import { constants as constants2, createWriteStream as createWriteStream2 } from "node:fs";
|
|
3301
3553
|
import { access as access2, mkdir as mkdir2, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile2 } from "node:fs/promises";
|
|
3302
3554
|
import { tmpdir as tmpdir2 } from "node:os";
|
|
3303
|
-
import
|
|
3555
|
+
import path11 from "node:path";
|
|
3304
3556
|
import { promisify as promisify2 } from "node:util";
|
|
3305
3557
|
|
|
3306
3558
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
@@ -3395,7 +3647,7 @@ var CodexProvider = class {
|
|
|
3395
3647
|
const promptContent = `${systemPrompt}
|
|
3396
3648
|
|
|
3397
3649
|
${basePrompt}`;
|
|
3398
|
-
const promptFile =
|
|
3650
|
+
const promptFile = path11.join(workspaceRoot, PROMPT_FILENAME2);
|
|
3399
3651
|
await writeFile2(promptFile, promptContent, "utf8");
|
|
3400
3652
|
const args = this.buildCodexArgs();
|
|
3401
3653
|
const cwd = this.resolveCwd(workspaceRoot);
|
|
@@ -3445,7 +3697,7 @@ ${basePrompt}`;
|
|
|
3445
3697
|
if (!this.config.cwd) {
|
|
3446
3698
|
return workspaceRoot;
|
|
3447
3699
|
}
|
|
3448
|
-
return
|
|
3700
|
+
return path11.resolve(this.config.cwd);
|
|
3449
3701
|
}
|
|
3450
3702
|
buildCodexArgs() {
|
|
3451
3703
|
const args = [
|
|
@@ -3487,7 +3739,7 @@ ${basePrompt}`;
|
|
|
3487
3739
|
}
|
|
3488
3740
|
}
|
|
3489
3741
|
async createWorkspace() {
|
|
3490
|
-
return await mkdtemp2(
|
|
3742
|
+
return await mkdtemp2(path11.join(tmpdir2(), WORKSPACE_PREFIX2));
|
|
3491
3743
|
}
|
|
3492
3744
|
async cleanupWorkspace(workspaceRoot) {
|
|
3493
3745
|
try {
|
|
@@ -3501,9 +3753,9 @@ ${basePrompt}`;
|
|
|
3501
3753
|
return void 0;
|
|
3502
3754
|
}
|
|
3503
3755
|
if (this.config.logDir) {
|
|
3504
|
-
return
|
|
3756
|
+
return path11.resolve(this.config.logDir);
|
|
3505
3757
|
}
|
|
3506
|
-
return
|
|
3758
|
+
return path11.join(process.cwd(), ".agentv", "logs", "codex");
|
|
3507
3759
|
}
|
|
3508
3760
|
async createStreamLogger(request) {
|
|
3509
3761
|
const logDir = this.resolveLogDirectory();
|
|
@@ -3517,7 +3769,7 @@ ${basePrompt}`;
|
|
|
3517
3769
|
console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
|
|
3518
3770
|
return void 0;
|
|
3519
3771
|
}
|
|
3520
|
-
const filePath =
|
|
3772
|
+
const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
|
|
3521
3773
|
try {
|
|
3522
3774
|
const logger = await CodexStreamLogger.create({
|
|
3523
3775
|
filePath,
|
|
@@ -3732,7 +3984,7 @@ function tryParseJsonValue2(rawLine) {
|
|
|
3732
3984
|
async function locateExecutable(candidate) {
|
|
3733
3985
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
3734
3986
|
if (includesPathSeparator) {
|
|
3735
|
-
const resolved =
|
|
3987
|
+
const resolved = path11.isAbsolute(candidate) ? candidate : path11.resolve(candidate);
|
|
3736
3988
|
const executablePath = await ensureWindowsExecutableVariant(resolved);
|
|
3737
3989
|
await access2(executablePath, constants2.F_OK);
|
|
3738
3990
|
return executablePath;
|
|
@@ -4245,7 +4497,7 @@ import { randomUUID as randomUUID3 } from "node:crypto";
|
|
|
4245
4497
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
4246
4498
|
import { mkdir as mkdir3, mkdtemp as mkdtemp3, rm as rm3, writeFile as writeFile3 } from "node:fs/promises";
|
|
4247
4499
|
import { tmpdir as tmpdir3 } from "node:os";
|
|
4248
|
-
import
|
|
4500
|
+
import path12 from "node:path";
|
|
4249
4501
|
|
|
4250
4502
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
4251
4503
|
var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.piLogs");
|
|
@@ -4329,7 +4581,7 @@ var PiCodingAgentProvider = class {
|
|
|
4329
4581
|
const workspaceRoot = await this.createWorkspace();
|
|
4330
4582
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
4331
4583
|
try {
|
|
4332
|
-
const promptFile =
|
|
4584
|
+
const promptFile = path12.join(workspaceRoot, PROMPT_FILENAME3);
|
|
4333
4585
|
await writeFile3(promptFile, request.question, "utf8");
|
|
4334
4586
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
4335
4587
|
const cwd = this.resolveCwd(workspaceRoot);
|
|
@@ -4371,7 +4623,7 @@ var PiCodingAgentProvider = class {
|
|
|
4371
4623
|
if (!this.config.cwd) {
|
|
4372
4624
|
return workspaceRoot;
|
|
4373
4625
|
}
|
|
4374
|
-
return
|
|
4626
|
+
return path12.resolve(this.config.cwd);
|
|
4375
4627
|
}
|
|
4376
4628
|
buildPiArgs(prompt, inputFiles) {
|
|
4377
4629
|
const args = [];
|
|
@@ -4460,7 +4712,7 @@ ${prompt}`;
|
|
|
4460
4712
|
return env;
|
|
4461
4713
|
}
|
|
4462
4714
|
async createWorkspace() {
|
|
4463
|
-
return await mkdtemp3(
|
|
4715
|
+
return await mkdtemp3(path12.join(tmpdir3(), WORKSPACE_PREFIX3));
|
|
4464
4716
|
}
|
|
4465
4717
|
async cleanupWorkspace(workspaceRoot) {
|
|
4466
4718
|
try {
|
|
@@ -4470,9 +4722,9 @@ ${prompt}`;
|
|
|
4470
4722
|
}
|
|
4471
4723
|
resolveLogDirectory() {
|
|
4472
4724
|
if (this.config.logDir) {
|
|
4473
|
-
return
|
|
4725
|
+
return path12.resolve(this.config.logDir);
|
|
4474
4726
|
}
|
|
4475
|
-
return
|
|
4727
|
+
return path12.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
4476
4728
|
}
|
|
4477
4729
|
async createStreamLogger(request) {
|
|
4478
4730
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4486,7 +4738,7 @@ ${prompt}`;
|
|
|
4486
4738
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
4487
4739
|
return void 0;
|
|
4488
4740
|
}
|
|
4489
|
-
const filePath =
|
|
4741
|
+
const filePath = path12.join(logDir, buildLogFilename3(request, this.targetName));
|
|
4490
4742
|
try {
|
|
4491
4743
|
const logger = await PiStreamLogger.create({
|
|
4492
4744
|
filePath,
|
|
@@ -4919,7 +5171,7 @@ async function defaultPiRunner(options) {
|
|
|
4919
5171
|
}
|
|
4920
5172
|
|
|
4921
5173
|
// src/evaluation/providers/vscode.ts
|
|
4922
|
-
import
|
|
5174
|
+
import path13 from "node:path";
|
|
4923
5175
|
import {
|
|
4924
5176
|
dispatchAgentSession,
|
|
4925
5177
|
dispatchBatchAgent,
|
|
@@ -5094,7 +5346,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
5094
5346
|
return "";
|
|
5095
5347
|
}
|
|
5096
5348
|
const buildList = (files) => files.map((absolutePath) => {
|
|
5097
|
-
const fileName =
|
|
5349
|
+
const fileName = path13.basename(absolutePath);
|
|
5098
5350
|
const fileUri = pathToFileUri2(absolutePath);
|
|
5099
5351
|
return `* [${fileName}](${fileUri})`;
|
|
5100
5352
|
});
|
|
@@ -5119,8 +5371,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
5119
5371
|
}
|
|
5120
5372
|
const unique = /* @__PURE__ */ new Map();
|
|
5121
5373
|
for (const attachment of attachments) {
|
|
5122
|
-
const absolutePath =
|
|
5123
|
-
const normalized = absolutePath.split(
|
|
5374
|
+
const absolutePath = path13.resolve(attachment);
|
|
5375
|
+
const normalized = absolutePath.split(path13.sep).join("/");
|
|
5124
5376
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
5125
5377
|
if (!unique.has(absolutePath)) {
|
|
5126
5378
|
unique.set(absolutePath, absolutePath);
|
|
@@ -5135,7 +5387,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
5135
5387
|
}
|
|
5136
5388
|
const unique = /* @__PURE__ */ new Map();
|
|
5137
5389
|
for (const attachment of attachments) {
|
|
5138
|
-
const absolutePath =
|
|
5390
|
+
const absolutePath = path13.resolve(attachment);
|
|
5139
5391
|
if (!unique.has(absolutePath)) {
|
|
5140
5392
|
unique.set(absolutePath, absolutePath);
|
|
5141
5393
|
}
|
|
@@ -5143,7 +5395,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
5143
5395
|
return Array.from(unique.values());
|
|
5144
5396
|
}
|
|
5145
5397
|
function pathToFileUri2(filePath) {
|
|
5146
|
-
const absolutePath =
|
|
5398
|
+
const absolutePath = path13.isAbsolute(filePath) ? filePath : path13.resolve(filePath);
|
|
5147
5399
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
5148
5400
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
5149
5401
|
return `file:///${normalizedPath}`;
|
|
@@ -5156,7 +5408,7 @@ function normalizeAttachments(attachments) {
|
|
|
5156
5408
|
}
|
|
5157
5409
|
const deduped = /* @__PURE__ */ new Set();
|
|
5158
5410
|
for (const attachment of attachments) {
|
|
5159
|
-
deduped.add(
|
|
5411
|
+
deduped.add(path13.resolve(attachment));
|
|
5160
5412
|
}
|
|
5161
5413
|
return Array.from(deduped);
|
|
5162
5414
|
}
|
|
@@ -5165,7 +5417,7 @@ function mergeAttachments(all) {
|
|
|
5165
5417
|
for (const list of all) {
|
|
5166
5418
|
if (!list) continue;
|
|
5167
5419
|
for (const inputFile of list) {
|
|
5168
|
-
deduped.add(
|
|
5420
|
+
deduped.add(path13.resolve(inputFile));
|
|
5169
5421
|
}
|
|
5170
5422
|
}
|
|
5171
5423
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -5213,8 +5465,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
5213
5465
|
|
|
5214
5466
|
// src/evaluation/providers/targets-file.ts
|
|
5215
5467
|
import { constants as constants3 } from "node:fs";
|
|
5216
|
-
import { access as access3, readFile as
|
|
5217
|
-
import
|
|
5468
|
+
import { access as access3, readFile as readFile7 } from "node:fs/promises";
|
|
5469
|
+
import path14 from "node:path";
|
|
5218
5470
|
import { parse as parse3 } from "yaml";
|
|
5219
5471
|
function isRecord(value) {
|
|
5220
5472
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -5251,11 +5503,11 @@ async function fileExists3(filePath) {
|
|
|
5251
5503
|
}
|
|
5252
5504
|
}
|
|
5253
5505
|
async function readTargetDefinitions(filePath) {
|
|
5254
|
-
const absolutePath =
|
|
5506
|
+
const absolutePath = path14.resolve(filePath);
|
|
5255
5507
|
if (!await fileExists3(absolutePath)) {
|
|
5256
5508
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
5257
5509
|
}
|
|
5258
|
-
const raw = await
|
|
5510
|
+
const raw = await readFile7(absolutePath, "utf8");
|
|
5259
5511
|
const parsed = parse3(raw);
|
|
5260
5512
|
if (!isRecord(parsed)) {
|
|
5261
5513
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -5462,15 +5714,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
5462
5714
|
});
|
|
5463
5715
|
}
|
|
5464
5716
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
5465
|
-
const { mkdir: mkdir4, readFile:
|
|
5717
|
+
const { mkdir: mkdir4, readFile: readFile8, rm: rm4, writeFile: writeFile4 } = await import("node:fs/promises");
|
|
5466
5718
|
const { tmpdir: tmpdir4 } = await import("node:os");
|
|
5467
|
-
const
|
|
5719
|
+
const path16 = await import("node:path");
|
|
5468
5720
|
const { randomUUID: randomUUID4 } = await import("node:crypto");
|
|
5469
|
-
const dir =
|
|
5721
|
+
const dir = path16.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
|
|
5470
5722
|
await mkdir4(dir, { recursive: true });
|
|
5471
|
-
const stdinPath =
|
|
5472
|
-
const stdoutPath =
|
|
5473
|
-
const stderrPath =
|
|
5723
|
+
const stdinPath = path16.join(dir, "stdin.txt");
|
|
5724
|
+
const stdoutPath = path16.join(dir, "stdout.txt");
|
|
5725
|
+
const stderrPath = path16.join(dir, "stderr.txt");
|
|
5474
5726
|
await writeFile4(stdinPath, stdinPayload, "utf8");
|
|
5475
5727
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
5476
5728
|
const { spawn: spawn4 } = await import("node:child_process");
|
|
@@ -5500,8 +5752,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
5500
5752
|
resolve(code ?? 0);
|
|
5501
5753
|
});
|
|
5502
5754
|
});
|
|
5503
|
-
const stdout = (await
|
|
5504
|
-
const stderr = (await
|
|
5755
|
+
const stdout = (await readFile8(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
5756
|
+
const stderr = (await readFile8(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
5505
5757
|
return { stdout, stderr, exitCode };
|
|
5506
5758
|
} finally {
|
|
5507
5759
|
await rm4(dir, { recursive: true, force: true });
|
|
@@ -5773,7 +6025,7 @@ var CodeEvaluator = class {
|
|
|
5773
6025
|
outputMessages: context.outputMessages ?? null,
|
|
5774
6026
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
5775
6027
|
inputFiles: context.evalCase.file_paths.filter(
|
|
5776
|
-
(
|
|
6028
|
+
(path16) => !context.evalCase.guideline_paths.includes(path16)
|
|
5777
6029
|
),
|
|
5778
6030
|
inputMessages: context.evalCase.input_messages,
|
|
5779
6031
|
traceSummary: context.traceSummary ?? null,
|
|
@@ -6532,115 +6784,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
6532
6784
|
* Evaluate a single field against the expected value.
|
|
6533
6785
|
*/
|
|
6534
6786
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
6535
|
-
const { path:
|
|
6536
|
-
const candidateValue = resolvePath(candidateData,
|
|
6537
|
-
const expectedValue = resolvePath(expectedData,
|
|
6787
|
+
const { path: path16, match, required = true, weight = 1 } = fieldConfig;
|
|
6788
|
+
const candidateValue = resolvePath(candidateData, path16);
|
|
6789
|
+
const expectedValue = resolvePath(expectedData, path16);
|
|
6538
6790
|
if (expectedValue === void 0) {
|
|
6539
6791
|
return {
|
|
6540
|
-
path:
|
|
6792
|
+
path: path16,
|
|
6541
6793
|
score: 1,
|
|
6542
6794
|
// No expected value means no comparison needed
|
|
6543
6795
|
weight,
|
|
6544
6796
|
hit: true,
|
|
6545
|
-
message: `${
|
|
6797
|
+
message: `${path16}: no expected value`
|
|
6546
6798
|
};
|
|
6547
6799
|
}
|
|
6548
6800
|
if (candidateValue === void 0) {
|
|
6549
6801
|
if (required) {
|
|
6550
6802
|
return {
|
|
6551
|
-
path:
|
|
6803
|
+
path: path16,
|
|
6552
6804
|
score: 0,
|
|
6553
6805
|
weight,
|
|
6554
6806
|
hit: false,
|
|
6555
|
-
message: `${
|
|
6807
|
+
message: `${path16} (required, missing)`
|
|
6556
6808
|
};
|
|
6557
6809
|
}
|
|
6558
6810
|
return {
|
|
6559
|
-
path:
|
|
6811
|
+
path: path16,
|
|
6560
6812
|
score: 1,
|
|
6561
6813
|
// Don't penalize missing optional fields
|
|
6562
6814
|
weight: 0,
|
|
6563
6815
|
// Zero weight means it won't affect the score
|
|
6564
6816
|
hit: true,
|
|
6565
|
-
message: `${
|
|
6817
|
+
message: `${path16}: optional field missing`
|
|
6566
6818
|
};
|
|
6567
6819
|
}
|
|
6568
6820
|
switch (match) {
|
|
6569
6821
|
case "exact":
|
|
6570
|
-
return this.compareExact(
|
|
6822
|
+
return this.compareExact(path16, candidateValue, expectedValue, weight);
|
|
6571
6823
|
case "numeric_tolerance":
|
|
6572
6824
|
return this.compareNumericTolerance(
|
|
6573
|
-
|
|
6825
|
+
path16,
|
|
6574
6826
|
candidateValue,
|
|
6575
6827
|
expectedValue,
|
|
6576
6828
|
fieldConfig,
|
|
6577
6829
|
weight
|
|
6578
6830
|
);
|
|
6579
6831
|
case "date":
|
|
6580
|
-
return this.compareDate(
|
|
6832
|
+
return this.compareDate(path16, candidateValue, expectedValue, fieldConfig, weight);
|
|
6581
6833
|
default:
|
|
6582
6834
|
return {
|
|
6583
|
-
path:
|
|
6835
|
+
path: path16,
|
|
6584
6836
|
score: 0,
|
|
6585
6837
|
weight,
|
|
6586
6838
|
hit: false,
|
|
6587
|
-
message: `${
|
|
6839
|
+
message: `${path16}: unknown match type "${match}"`
|
|
6588
6840
|
};
|
|
6589
6841
|
}
|
|
6590
6842
|
}
|
|
6591
6843
|
/**
|
|
6592
6844
|
* Exact equality comparison.
|
|
6593
6845
|
*/
|
|
6594
|
-
compareExact(
|
|
6846
|
+
compareExact(path16, candidateValue, expectedValue, weight) {
|
|
6595
6847
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
6596
6848
|
return {
|
|
6597
|
-
path:
|
|
6849
|
+
path: path16,
|
|
6598
6850
|
score: 1,
|
|
6599
6851
|
weight,
|
|
6600
6852
|
hit: true,
|
|
6601
|
-
message:
|
|
6853
|
+
message: path16
|
|
6602
6854
|
};
|
|
6603
6855
|
}
|
|
6604
6856
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
6605
6857
|
return {
|
|
6606
|
-
path:
|
|
6858
|
+
path: path16,
|
|
6607
6859
|
score: 0,
|
|
6608
6860
|
weight,
|
|
6609
6861
|
hit: false,
|
|
6610
|
-
message: `${
|
|
6862
|
+
message: `${path16} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
6611
6863
|
};
|
|
6612
6864
|
}
|
|
6613
6865
|
return {
|
|
6614
|
-
path:
|
|
6866
|
+
path: path16,
|
|
6615
6867
|
score: 0,
|
|
6616
6868
|
weight,
|
|
6617
6869
|
hit: false,
|
|
6618
|
-
message: `${
|
|
6870
|
+
message: `${path16} (value mismatch)`
|
|
6619
6871
|
};
|
|
6620
6872
|
}
|
|
6621
6873
|
/**
|
|
6622
6874
|
* Numeric comparison with absolute or relative tolerance.
|
|
6623
6875
|
*/
|
|
6624
|
-
compareNumericTolerance(
|
|
6876
|
+
compareNumericTolerance(path16, candidateValue, expectedValue, fieldConfig, weight) {
|
|
6625
6877
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
6626
6878
|
const candidateNum = toNumber(candidateValue);
|
|
6627
6879
|
const expectedNum = toNumber(expectedValue);
|
|
6628
6880
|
if (candidateNum === null || expectedNum === null) {
|
|
6629
6881
|
return {
|
|
6630
|
-
path:
|
|
6882
|
+
path: path16,
|
|
6631
6883
|
score: 0,
|
|
6632
6884
|
weight,
|
|
6633
6885
|
hit: false,
|
|
6634
|
-
message: `${
|
|
6886
|
+
message: `${path16} (non-numeric value)`
|
|
6635
6887
|
};
|
|
6636
6888
|
}
|
|
6637
6889
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
6638
6890
|
return {
|
|
6639
|
-
path:
|
|
6891
|
+
path: path16,
|
|
6640
6892
|
score: 0,
|
|
6641
6893
|
weight,
|
|
6642
6894
|
hit: false,
|
|
6643
|
-
message: `${
|
|
6895
|
+
message: `${path16} (invalid numeric value)`
|
|
6644
6896
|
};
|
|
6645
6897
|
}
|
|
6646
6898
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -6653,61 +6905,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
6653
6905
|
}
|
|
6654
6906
|
if (withinTolerance) {
|
|
6655
6907
|
return {
|
|
6656
|
-
path:
|
|
6908
|
+
path: path16,
|
|
6657
6909
|
score: 1,
|
|
6658
6910
|
weight,
|
|
6659
6911
|
hit: true,
|
|
6660
|
-
message: `${
|
|
6912
|
+
message: `${path16} (within tolerance: diff=${diff.toFixed(2)})`
|
|
6661
6913
|
};
|
|
6662
6914
|
}
|
|
6663
6915
|
return {
|
|
6664
|
-
path:
|
|
6916
|
+
path: path16,
|
|
6665
6917
|
score: 0,
|
|
6666
6918
|
weight,
|
|
6667
6919
|
hit: false,
|
|
6668
|
-
message: `${
|
|
6920
|
+
message: `${path16} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
6669
6921
|
};
|
|
6670
6922
|
}
|
|
6671
6923
|
/**
|
|
6672
6924
|
* Date comparison with format normalization.
|
|
6673
6925
|
*/
|
|
6674
|
-
compareDate(
|
|
6926
|
+
compareDate(path16, candidateValue, expectedValue, fieldConfig, weight) {
|
|
6675
6927
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
6676
6928
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
6677
6929
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
6678
6930
|
if (candidateDate === null) {
|
|
6679
6931
|
return {
|
|
6680
|
-
path:
|
|
6932
|
+
path: path16,
|
|
6681
6933
|
score: 0,
|
|
6682
6934
|
weight,
|
|
6683
6935
|
hit: false,
|
|
6684
|
-
message: `${
|
|
6936
|
+
message: `${path16} (unparseable candidate date)`
|
|
6685
6937
|
};
|
|
6686
6938
|
}
|
|
6687
6939
|
if (expectedDate === null) {
|
|
6688
6940
|
return {
|
|
6689
|
-
path:
|
|
6941
|
+
path: path16,
|
|
6690
6942
|
score: 0,
|
|
6691
6943
|
weight,
|
|
6692
6944
|
hit: false,
|
|
6693
|
-
message: `${
|
|
6945
|
+
message: `${path16} (unparseable expected date)`
|
|
6694
6946
|
};
|
|
6695
6947
|
}
|
|
6696
6948
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
6697
6949
|
return {
|
|
6698
|
-
path:
|
|
6950
|
+
path: path16,
|
|
6699
6951
|
score: 1,
|
|
6700
6952
|
weight,
|
|
6701
6953
|
hit: true,
|
|
6702
|
-
message:
|
|
6954
|
+
message: path16
|
|
6703
6955
|
};
|
|
6704
6956
|
}
|
|
6705
6957
|
return {
|
|
6706
|
-
path:
|
|
6958
|
+
path: path16,
|
|
6707
6959
|
score: 0,
|
|
6708
6960
|
weight,
|
|
6709
6961
|
hit: false,
|
|
6710
|
-
message: `${
|
|
6962
|
+
message: `${path16} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
6711
6963
|
};
|
|
6712
6964
|
}
|
|
6713
6965
|
/**
|
|
@@ -6747,11 +6999,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
6747
6999
|
};
|
|
6748
7000
|
}
|
|
6749
7001
|
};
|
|
6750
|
-
function resolvePath(obj,
|
|
6751
|
-
if (!
|
|
7002
|
+
function resolvePath(obj, path16) {
|
|
7003
|
+
if (!path16 || !obj) {
|
|
6752
7004
|
return void 0;
|
|
6753
7005
|
}
|
|
6754
|
-
const parts =
|
|
7006
|
+
const parts = path16.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
6755
7007
|
let current = obj;
|
|
6756
7008
|
for (const part of parts) {
|
|
6757
7009
|
if (current === null || current === void 0) {
|
|
@@ -7187,7 +7439,7 @@ var ToolTrajectoryEvaluator = class {
|
|
|
7187
7439
|
|
|
7188
7440
|
// src/evaluation/orchestrator.ts
|
|
7189
7441
|
import { createHash } from "node:crypto";
|
|
7190
|
-
import
|
|
7442
|
+
import path15 from "node:path";
|
|
7191
7443
|
|
|
7192
7444
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
7193
7445
|
var Node = class {
|
|
@@ -7986,7 +8238,7 @@ async function runEvaluatorList(options) {
|
|
|
7986
8238
|
});
|
|
7987
8239
|
}
|
|
7988
8240
|
if (evaluator.type === "composite") {
|
|
7989
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
8241
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path15.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
7990
8242
|
const createEvaluator = (memberConfig) => {
|
|
7991
8243
|
switch (memberConfig.type) {
|
|
7992
8244
|
case "llm_judge":
|
|
@@ -8560,6 +8812,7 @@ export {
|
|
|
8560
8812
|
createAgentKernel,
|
|
8561
8813
|
createProvider,
|
|
8562
8814
|
deepEqual,
|
|
8815
|
+
detectFormat,
|
|
8563
8816
|
ensureVSCodeSubagents,
|
|
8564
8817
|
executeScript,
|
|
8565
8818
|
explorationRatio,
|