@agentv/core 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-L6RCDZ4Z.js +641 -0
- package/dist/chunk-L6RCDZ4Z.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +11 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +76 -74
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -3
- package/dist/index.d.ts +3 -3
- package/dist/index.js +63 -541
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-UQLHF3T7.js +0 -158
- package/dist/chunk-UQLHF3T7.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -382,6 +382,7 @@ async function processMessages(options) {
|
|
|
382
382
|
}
|
|
383
383
|
async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
384
384
|
const verbose = options?.verbose ?? false;
|
|
385
|
+
const evalIdFilter = options?.evalId;
|
|
385
386
|
const absoluteTestPath = import_node_path2.default.resolve(evalFilePath);
|
|
386
387
|
if (!await fileExists2(absoluteTestPath)) {
|
|
387
388
|
throw new Error(`Test file not found: ${evalFilePath}`);
|
|
@@ -413,62 +414,39 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
413
414
|
const results = [];
|
|
414
415
|
for (const rawEvalcase of rawTestcases) {
|
|
415
416
|
if (!isJsonObject(rawEvalcase)) {
|
|
416
|
-
logWarning("Skipping invalid
|
|
417
|
+
logWarning("Skipping invalid eval case entry (expected object)");
|
|
417
418
|
continue;
|
|
418
419
|
}
|
|
419
420
|
const evalcase = rawEvalcase;
|
|
420
421
|
const id = asString(evalcase.id);
|
|
422
|
+
if (evalIdFilter && id !== evalIdFilter) {
|
|
423
|
+
continue;
|
|
424
|
+
}
|
|
421
425
|
const conversationId = asString(evalcase.conversation_id);
|
|
422
426
|
const outcome = asString(evalcase.outcome);
|
|
423
427
|
const inputMessagesValue = evalcase.input_messages;
|
|
424
428
|
const expectedMessagesValue = evalcase.expected_messages;
|
|
425
429
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
426
|
-
logWarning(`Skipping incomplete
|
|
430
|
+
logWarning(`Skipping incomplete eval case: ${id ?? "unknown"}`);
|
|
427
431
|
continue;
|
|
428
432
|
}
|
|
429
433
|
if (!Array.isArray(expectedMessagesValue)) {
|
|
430
|
-
logWarning(`
|
|
434
|
+
logWarning(`Eval case '${id}' missing expected_messages array`);
|
|
431
435
|
continue;
|
|
432
436
|
}
|
|
433
437
|
const inputMessages = inputMessagesValue.filter((msg) => isTestMessage(msg));
|
|
434
438
|
const expectedMessages = expectedMessagesValue.filter((msg) => isTestMessage(msg));
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
const systemMessages = inputMessages.filter((message) => message.role === "system");
|
|
438
|
-
if (assistantMessages.length === 0) {
|
|
439
|
-
logWarning(`No assistant message found for test case: ${id}`);
|
|
439
|
+
if (expectedMessages.length === 0) {
|
|
440
|
+
logWarning(`No expected message found for eval case: ${id}`);
|
|
440
441
|
continue;
|
|
441
442
|
}
|
|
442
|
-
if (
|
|
443
|
-
logWarning(`Multiple
|
|
444
|
-
}
|
|
445
|
-
if (systemMessages.length > 1) {
|
|
446
|
-
logWarning(`Multiple system messages found for test case: ${id}, using first`);
|
|
447
|
-
}
|
|
448
|
-
let systemMessageContent;
|
|
449
|
-
if (systemMessages.length > 0) {
|
|
450
|
-
const content = systemMessages[0]?.content;
|
|
451
|
-
if (typeof content === "string") {
|
|
452
|
-
systemMessageContent = content;
|
|
453
|
-
} else if (Array.isArray(content)) {
|
|
454
|
-
const textParts = [];
|
|
455
|
-
for (const segment of content) {
|
|
456
|
-
if (isJsonObject(segment)) {
|
|
457
|
-
const value = segment.value;
|
|
458
|
-
if (typeof value === "string") {
|
|
459
|
-
textParts.push(value);
|
|
460
|
-
}
|
|
461
|
-
}
|
|
462
|
-
}
|
|
463
|
-
if (textParts.length > 0) {
|
|
464
|
-
systemMessageContent = textParts.join("\n\n");
|
|
465
|
-
}
|
|
466
|
-
}
|
|
443
|
+
if (expectedMessages.length > 1) {
|
|
444
|
+
logWarning(`Multiple expected messages found for eval case: ${id}, using first`);
|
|
467
445
|
}
|
|
468
446
|
const guidelinePaths = [];
|
|
469
447
|
const inputTextParts = [];
|
|
470
448
|
const inputSegments = await processMessages({
|
|
471
|
-
messages:
|
|
449
|
+
messages: inputMessages,
|
|
472
450
|
searchRoots,
|
|
473
451
|
repoRootPath,
|
|
474
452
|
guidelinePatterns,
|
|
@@ -478,7 +456,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
478
456
|
verbose
|
|
479
457
|
});
|
|
480
458
|
const outputSegments = await processMessages({
|
|
481
|
-
messages:
|
|
459
|
+
messages: expectedMessages,
|
|
482
460
|
searchRoots,
|
|
483
461
|
repoRootPath,
|
|
484
462
|
guidelinePatterns,
|
|
@@ -486,10 +464,10 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
486
464
|
verbose
|
|
487
465
|
});
|
|
488
466
|
const codeSnippets = extractCodeBlocks(inputSegments);
|
|
489
|
-
const
|
|
490
|
-
const referenceAnswer = await resolveAssistantContent(
|
|
467
|
+
const expectedContent = expectedMessages[0]?.content;
|
|
468
|
+
const referenceAnswer = await resolveAssistantContent(expectedContent, searchRoots, verbose);
|
|
491
469
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
492
|
-
const
|
|
470
|
+
const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
|
|
493
471
|
const evaluators = await parseEvaluators(evalcase, searchRoots, id ?? "unknown");
|
|
494
472
|
const userFilePaths = [];
|
|
495
473
|
for (const segment of inputSegments) {
|
|
@@ -508,19 +486,18 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
|
|
|
508
486
|
question,
|
|
509
487
|
input_segments: inputSegments,
|
|
510
488
|
output_segments: outputSegments,
|
|
511
|
-
system_message: systemMessageContent,
|
|
512
489
|
reference_answer: referenceAnswer,
|
|
513
490
|
guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path2.default.resolve(guidelinePath)),
|
|
514
491
|
guideline_patterns: guidelinePatterns,
|
|
515
492
|
file_paths: allFilePaths,
|
|
516
493
|
code_snippets: codeSnippets,
|
|
517
494
|
expected_outcome: outcome,
|
|
518
|
-
evaluator:
|
|
495
|
+
evaluator: evalCaseEvaluatorKind,
|
|
519
496
|
evaluators
|
|
520
497
|
};
|
|
521
498
|
if (verbose) {
|
|
522
499
|
console.log(`
|
|
523
|
-
[
|
|
500
|
+
[Eval Case: ${id}]`);
|
|
524
501
|
if (testCase.guideline_paths.length > 0) {
|
|
525
502
|
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
526
503
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
@@ -579,7 +556,7 @@ ${body}`);
|
|
|
579
556
|
}
|
|
580
557
|
const question = questionParts.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
581
558
|
const guidelines = guidelineContents.map((part) => part.trim()).filter((part) => part.length > 0).join("\n\n");
|
|
582
|
-
return { question, guidelines
|
|
559
|
+
return { question, guidelines };
|
|
583
560
|
}
|
|
584
561
|
async function fileExists2(absolutePath) {
|
|
585
562
|
try {
|
|
@@ -965,6 +942,8 @@ var GeminiProvider = class {
|
|
|
965
942
|
|
|
966
943
|
// src/evaluation/providers/cli.ts
|
|
967
944
|
var import_node_child_process = require("child_process");
|
|
945
|
+
var import_promises3 = __toESM(require("fs/promises"), 1);
|
|
946
|
+
var import_node_os = __toESM(require("os"), 1);
|
|
968
947
|
var import_node_path3 = __toESM(require("path"), 1);
|
|
969
948
|
var import_node_util = require("util");
|
|
970
949
|
var execAsync = (0, import_node_util.promisify)(import_node_child_process.exec);
|
|
@@ -980,6 +959,7 @@ async function defaultCommandRunner(command, options) {
|
|
|
980
959
|
};
|
|
981
960
|
try {
|
|
982
961
|
const { stdout, stderr } = await execAsync(command, execOptions);
|
|
962
|
+
console.error(`[CLI DEBUG] SUCCESS - stdout: ${stdout.length} bytes, stderr: ${stderr.length} bytes`);
|
|
983
963
|
return {
|
|
984
964
|
stdout,
|
|
985
965
|
stderr,
|
|
@@ -990,6 +970,8 @@ async function defaultCommandRunner(command, options) {
|
|
|
990
970
|
};
|
|
991
971
|
} catch (error) {
|
|
992
972
|
const execError = error;
|
|
973
|
+
console.error(`[CLI DEBUG] ERROR - code: ${execError.code}, message: ${execError.message}`);
|
|
974
|
+
console.error(`[CLI DEBUG] stdout: ${execError.stdout?.length ?? 0} bytes, stderr: ${execError.stderr?.length ?? 0} bytes`);
|
|
993
975
|
return {
|
|
994
976
|
stdout: execError.stdout ?? "",
|
|
995
977
|
stderr: execError.stderr ?? "",
|
|
@@ -1019,7 +1001,8 @@ var CliProvider = class {
|
|
|
1019
1001
|
throw new Error("CLI provider request was aborted before execution");
|
|
1020
1002
|
}
|
|
1021
1003
|
await this.ensureHealthy(request.signal);
|
|
1022
|
-
const
|
|
1004
|
+
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
1005
|
+
const templateValues = buildTemplateValues(request, this.config, outputFilePath);
|
|
1023
1006
|
const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
|
|
1024
1007
|
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
1025
1008
|
const result = await this.runCommand(renderedCommand, {
|
|
@@ -1042,16 +1025,30 @@ var CliProvider = class {
|
|
|
1042
1025
|
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
1043
1026
|
throw new Error(message);
|
|
1044
1027
|
}
|
|
1028
|
+
const responseText = await this.readAndCleanupOutputFile(outputFilePath);
|
|
1045
1029
|
return {
|
|
1046
|
-
text:
|
|
1030
|
+
text: responseText,
|
|
1047
1031
|
raw: {
|
|
1048
1032
|
command: renderedCommand,
|
|
1049
1033
|
stderr: result.stderr,
|
|
1050
1034
|
exitCode: result.exitCode ?? 0,
|
|
1051
|
-
cwd: this.config.cwd
|
|
1035
|
+
cwd: this.config.cwd,
|
|
1036
|
+
outputFile: outputFilePath
|
|
1052
1037
|
}
|
|
1053
1038
|
};
|
|
1054
1039
|
}
|
|
1040
|
+
async readAndCleanupOutputFile(filePath) {
|
|
1041
|
+
try {
|
|
1042
|
+
const content = await import_promises3.default.readFile(filePath, "utf-8");
|
|
1043
|
+
return content;
|
|
1044
|
+
} catch (error) {
|
|
1045
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
1046
|
+
throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
|
|
1047
|
+
} finally {
|
|
1048
|
+
await import_promises3.default.unlink(filePath).catch(() => {
|
|
1049
|
+
});
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1055
1052
|
async ensureHealthy(signal) {
|
|
1056
1053
|
if (!this.config.healthcheck) {
|
|
1057
1054
|
return;
|
|
@@ -1092,10 +1089,11 @@ var CliProvider = class {
|
|
|
1092
1089
|
question: "",
|
|
1093
1090
|
guidelines: "",
|
|
1094
1091
|
inputFiles: [],
|
|
1095
|
-
evalCaseId: "",
|
|
1092
|
+
evalCaseId: "healthcheck",
|
|
1096
1093
|
attempt: 0
|
|
1097
1094
|
},
|
|
1098
|
-
this.config
|
|
1095
|
+
this.config,
|
|
1096
|
+
generateOutputFilePath("healthcheck")
|
|
1099
1097
|
)
|
|
1100
1098
|
);
|
|
1101
1099
|
const env = this.config.env ? { ...process.env, ...this.config.env } : process.env;
|
|
@@ -1113,14 +1111,15 @@ var CliProvider = class {
|
|
|
1113
1111
|
}
|
|
1114
1112
|
}
|
|
1115
1113
|
};
|
|
1116
|
-
function buildTemplateValues(request, config) {
|
|
1114
|
+
function buildTemplateValues(request, config, outputFilePath) {
|
|
1117
1115
|
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
1118
1116
|
return {
|
|
1119
1117
|
PROMPT: shellEscape(request.question ?? ""),
|
|
1120
1118
|
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
1121
1119
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
1122
1120
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
1123
|
-
FILES: formatFileList(inputFiles, config.filesFormat)
|
|
1121
|
+
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
1122
|
+
OUTPUT_FILE: shellEscape(outputFilePath)
|
|
1124
1123
|
};
|
|
1125
1124
|
}
|
|
1126
1125
|
function normalizeInputFiles(inputFiles) {
|
|
@@ -1158,11 +1157,17 @@ function shellEscape(value) {
|
|
|
1158
1157
|
return "''";
|
|
1159
1158
|
}
|
|
1160
1159
|
if (process.platform === "win32") {
|
|
1161
|
-
const escaped = value.replace(/
|
|
1162
|
-
return `
|
|
1160
|
+
const escaped = value.replace(/'/g, "''");
|
|
1161
|
+
return `'${escaped}'`;
|
|
1163
1162
|
}
|
|
1164
1163
|
return `'${value.replace(/'/g, `'"'"'`)}'`;
|
|
1165
1164
|
}
|
|
1165
|
+
function generateOutputFilePath(evalCaseId) {
|
|
1166
|
+
const safeEvalId = evalCaseId || "unknown";
|
|
1167
|
+
const timestamp = Date.now();
|
|
1168
|
+
const random = Math.random().toString(36).substring(2, 9);
|
|
1169
|
+
return import_node_path3.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
|
|
1170
|
+
}
|
|
1166
1171
|
function formatTimeoutSuffix(timeoutMs) {
|
|
1167
1172
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
1168
1173
|
return "";
|
|
@@ -1175,8 +1180,8 @@ function formatTimeoutSuffix(timeoutMs) {
|
|
|
1175
1180
|
var import_node_child_process2 = require("child_process");
|
|
1176
1181
|
var import_node_crypto = require("crypto");
|
|
1177
1182
|
var import_node_fs3 = require("fs");
|
|
1178
|
-
var
|
|
1179
|
-
var
|
|
1183
|
+
var import_promises4 = require("fs/promises");
|
|
1184
|
+
var import_node_os2 = require("os");
|
|
1180
1185
|
var import_node_path5 = __toESM(require("path"), 1);
|
|
1181
1186
|
var import_node_util2 = require("util");
|
|
1182
1187
|
|
|
@@ -1365,7 +1370,7 @@ var CodexProvider = class {
|
|
|
1365
1370
|
try {
|
|
1366
1371
|
const promptContent = buildPromptDocument(request, inputFiles);
|
|
1367
1372
|
const promptFile = import_node_path5.default.join(workspaceRoot, PROMPT_FILENAME);
|
|
1368
|
-
await (0,
|
|
1373
|
+
await (0, import_promises4.writeFile)(promptFile, promptContent, "utf8");
|
|
1369
1374
|
const args = this.buildCodexArgs();
|
|
1370
1375
|
const cwd = this.resolveCwd(workspaceRoot);
|
|
1371
1376
|
const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
|
|
@@ -1448,11 +1453,11 @@ var CodexProvider = class {
|
|
|
1448
1453
|
}
|
|
1449
1454
|
}
|
|
1450
1455
|
async createWorkspace() {
|
|
1451
|
-
return await (0,
|
|
1456
|
+
return await (0, import_promises4.mkdtemp)(import_node_path5.default.join((0, import_node_os2.tmpdir)(), WORKSPACE_PREFIX));
|
|
1452
1457
|
}
|
|
1453
1458
|
async cleanupWorkspace(workspaceRoot) {
|
|
1454
1459
|
try {
|
|
1455
|
-
await (0,
|
|
1460
|
+
await (0, import_promises4.rm)(workspaceRoot, { recursive: true, force: true });
|
|
1456
1461
|
} catch {
|
|
1457
1462
|
}
|
|
1458
1463
|
}
|
|
@@ -1472,7 +1477,7 @@ var CodexProvider = class {
|
|
|
1472
1477
|
return void 0;
|
|
1473
1478
|
}
|
|
1474
1479
|
try {
|
|
1475
|
-
await (0,
|
|
1480
|
+
await (0, import_promises4.mkdir)(logDir, { recursive: true });
|
|
1476
1481
|
} catch (error) {
|
|
1477
1482
|
const message = error instanceof Error ? error.message : String(error);
|
|
1478
1483
|
console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
|
|
@@ -1695,7 +1700,7 @@ async function locateExecutable(candidate) {
|
|
|
1695
1700
|
if (includesPathSeparator) {
|
|
1696
1701
|
const resolved = import_node_path5.default.isAbsolute(candidate) ? candidate : import_node_path5.default.resolve(candidate);
|
|
1697
1702
|
const executablePath = await ensureWindowsExecutableVariant(resolved);
|
|
1698
|
-
await (0,
|
|
1703
|
+
await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
|
|
1699
1704
|
return executablePath;
|
|
1700
1705
|
}
|
|
1701
1706
|
const locator = process.platform === "win32" ? "where" : "which";
|
|
@@ -1705,7 +1710,7 @@ async function locateExecutable(candidate) {
|
|
|
1705
1710
|
const preferred = selectExecutableCandidate(lines);
|
|
1706
1711
|
if (preferred) {
|
|
1707
1712
|
const executablePath = await ensureWindowsExecutableVariant(preferred);
|
|
1708
|
-
await (0,
|
|
1713
|
+
await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
|
|
1709
1714
|
return executablePath;
|
|
1710
1715
|
}
|
|
1711
1716
|
} catch {
|
|
@@ -1739,7 +1744,7 @@ async function ensureWindowsExecutableVariant(candidate) {
|
|
|
1739
1744
|
for (const ext of extensions) {
|
|
1740
1745
|
const withExtension = `${candidate}${ext}`;
|
|
1741
1746
|
try {
|
|
1742
|
-
await (0,
|
|
1747
|
+
await (0, import_promises4.access)(withExtension, import_node_fs3.constants.F_OK);
|
|
1743
1748
|
return withExtension;
|
|
1744
1749
|
} catch {
|
|
1745
1750
|
}
|
|
@@ -2041,7 +2046,7 @@ var MockProvider = class {
|
|
|
2041
2046
|
|
|
2042
2047
|
// src/evaluation/providers/targets.ts
|
|
2043
2048
|
var import_zod = require("zod");
|
|
2044
|
-
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
2049
|
+
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES", "OUTPUT_FILE"]);
|
|
2045
2050
|
var BASE_TARGET_SCHEMA = import_zod.z.object({
|
|
2046
2051
|
name: import_zod.z.string().min(1, "target name is required"),
|
|
2047
2052
|
provider: import_zod.z.string().min(1, "provider is required"),
|
|
@@ -2768,7 +2773,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
2768
2773
|
|
|
2769
2774
|
// src/evaluation/providers/targets-file.ts
|
|
2770
2775
|
var import_node_fs4 = require("fs");
|
|
2771
|
-
var
|
|
2776
|
+
var import_promises5 = require("fs/promises");
|
|
2772
2777
|
var import_node_path7 = __toESM(require("path"), 1);
|
|
2773
2778
|
var import_yaml2 = require("yaml");
|
|
2774
2779
|
|
|
@@ -2838,7 +2843,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
2838
2843
|
}
|
|
2839
2844
|
async function fileExists3(filePath) {
|
|
2840
2845
|
try {
|
|
2841
|
-
await (0,
|
|
2846
|
+
await (0, import_promises5.access)(filePath, import_node_fs4.constants.F_OK);
|
|
2842
2847
|
return true;
|
|
2843
2848
|
} catch {
|
|
2844
2849
|
return false;
|
|
@@ -2849,7 +2854,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
2849
2854
|
if (!await fileExists3(absolutePath)) {
|
|
2850
2855
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
2851
2856
|
}
|
|
2852
|
-
const raw = await (0,
|
|
2857
|
+
const raw = await (0, import_promises5.readFile)(absolutePath, "utf8");
|
|
2853
2858
|
const parsed = (0, import_yaml2.parse)(raw);
|
|
2854
2859
|
if (!isRecord(parsed)) {
|
|
2855
2860
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
@@ -3095,7 +3100,6 @@ var CodeEvaluator = class {
|
|
|
3095
3100
|
expected_outcome: context.evalCase.expected_outcome,
|
|
3096
3101
|
reference_answer: context.evalCase.reference_answer,
|
|
3097
3102
|
candidate_answer: context.candidate,
|
|
3098
|
-
system_message: context.promptInputs.systemMessage ?? "",
|
|
3099
3103
|
guideline_paths: context.evalCase.guideline_paths,
|
|
3100
3104
|
input_files: context.evalCase.file_paths,
|
|
3101
3105
|
input_segments: context.evalCase.input_segments
|
|
@@ -3195,7 +3199,7 @@ function substituteVariables(template, variables) {
|
|
|
3195
3199
|
|
|
3196
3200
|
// src/evaluation/orchestrator.ts
|
|
3197
3201
|
var import_node_crypto3 = require("crypto");
|
|
3198
|
-
var
|
|
3202
|
+
var import_promises6 = require("fs/promises");
|
|
3199
3203
|
var import_node_path8 = __toESM(require("path"), 1);
|
|
3200
3204
|
|
|
3201
3205
|
// ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
|
|
@@ -3337,7 +3341,7 @@ function validateConcurrency(concurrency) {
|
|
|
3337
3341
|
// src/evaluation/orchestrator.ts
|
|
3338
3342
|
async function runEvaluation(options) {
|
|
3339
3343
|
const {
|
|
3340
|
-
testFilePath,
|
|
3344
|
+
testFilePath: evalFilePath,
|
|
3341
3345
|
repoRoot,
|
|
3342
3346
|
target,
|
|
3343
3347
|
targets,
|
|
@@ -3356,11 +3360,11 @@ async function runEvaluation(options) {
|
|
|
3356
3360
|
onProgress
|
|
3357
3361
|
} = options;
|
|
3358
3362
|
const load = loadEvalCases;
|
|
3359
|
-
const evalCases = await load(
|
|
3363
|
+
const evalCases = await load(evalFilePath, repoRoot, { verbose, evalId });
|
|
3360
3364
|
const filteredEvalCases = filterEvalCases(evalCases, evalId);
|
|
3361
3365
|
if (filteredEvalCases.length === 0) {
|
|
3362
3366
|
if (evalId) {
|
|
3363
|
-
throw new Error(`
|
|
3367
|
+
throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
|
|
3364
3368
|
}
|
|
3365
3369
|
return [];
|
|
3366
3370
|
}
|
|
@@ -3739,8 +3743,7 @@ async function evaluateCandidate(options) {
|
|
|
3739
3743
|
const rawRequest = {
|
|
3740
3744
|
question: promptInputs.question,
|
|
3741
3745
|
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
3742
|
-
guideline_paths: evalCase.guideline_paths
|
|
3743
|
-
system_message: promptInputs.systemMessage ?? ""
|
|
3746
|
+
guideline_paths: evalCase.guideline_paths
|
|
3744
3747
|
};
|
|
3745
3748
|
return {
|
|
3746
3749
|
eval_id: evalCase.id,
|
|
@@ -3956,14 +3959,14 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
|
|
|
3956
3959
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
3957
3960
|
const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
|
|
3958
3961
|
const filePath = import_node_path8.default.resolve(directory, filename);
|
|
3959
|
-
await (0,
|
|
3962
|
+
await (0, import_promises6.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
|
|
3960
3963
|
const payload = {
|
|
3961
3964
|
eval_id: evalCase.id,
|
|
3962
3965
|
question: promptInputs.question,
|
|
3963
3966
|
guidelines: promptInputs.guidelines,
|
|
3964
3967
|
guideline_paths: evalCase.guideline_paths
|
|
3965
3968
|
};
|
|
3966
|
-
await (0,
|
|
3969
|
+
await (0, import_promises6.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
|
|
3967
3970
|
}
|
|
3968
3971
|
function sanitizeFilename(value) {
|
|
3969
3972
|
if (!value) {
|
|
@@ -4004,7 +4007,6 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
4004
4007
|
question: promptInputs.question,
|
|
4005
4008
|
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
4006
4009
|
guideline_paths: evalCase.guideline_paths,
|
|
4007
|
-
system_message: promptInputs.systemMessage ?? "",
|
|
4008
4010
|
error: message
|
|
4009
4011
|
};
|
|
4010
4012
|
return {
|