@agentv/core 2.14.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1270 -604
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +94 -2
- package/dist/index.d.ts +94 -2
- package/dist/index.js +1242 -584
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1244,12 +1244,12 @@ function serializeAttributeValue(value) {
|
|
|
1244
1244
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1245
1245
|
return { stringValue: String(value) };
|
|
1246
1246
|
}
|
|
1247
|
-
var
|
|
1247
|
+
var import_promises32, import_node_path45, OtlpJsonFileExporter;
|
|
1248
1248
|
var init_otlp_json_file_exporter = __esm({
|
|
1249
1249
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1250
1250
|
"use strict";
|
|
1251
|
-
|
|
1252
|
-
|
|
1251
|
+
import_promises32 = require("fs/promises");
|
|
1252
|
+
import_node_path45 = require("path");
|
|
1253
1253
|
OtlpJsonFileExporter = class {
|
|
1254
1254
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1255
1255
|
spans = [];
|
|
@@ -1288,7 +1288,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1288
1288
|
}
|
|
1289
1289
|
async flush() {
|
|
1290
1290
|
if (this.spans.length === 0) return;
|
|
1291
|
-
await (0,
|
|
1291
|
+
await (0, import_promises32.mkdir)((0, import_node_path45.dirname)(this.filePath), { recursive: true });
|
|
1292
1292
|
const otlpJson = {
|
|
1293
1293
|
resourceSpans: [
|
|
1294
1294
|
{
|
|
@@ -1302,8 +1302,8 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1302
1302
|
}
|
|
1303
1303
|
]
|
|
1304
1304
|
};
|
|
1305
|
-
const { writeFile:
|
|
1306
|
-
await
|
|
1305
|
+
const { writeFile: writeFile10 } = await import("fs/promises");
|
|
1306
|
+
await writeFile10(this.filePath, JSON.stringify(otlpJson, null, 2));
|
|
1307
1307
|
}
|
|
1308
1308
|
};
|
|
1309
1309
|
}
|
|
@@ -1319,13 +1319,13 @@ function hrTimeDiffMs(start, end) {
|
|
|
1319
1319
|
const diffNano = end[1] - start[1];
|
|
1320
1320
|
return Math.round(diffSec * 1e3 + diffNano / 1e6);
|
|
1321
1321
|
}
|
|
1322
|
-
var
|
|
1322
|
+
var import_node_fs14, import_promises33, import_node_path46, SimpleTraceFileExporter;
|
|
1323
1323
|
var init_simple_trace_file_exporter = __esm({
|
|
1324
1324
|
"src/observability/simple-trace-file-exporter.ts"() {
|
|
1325
1325
|
"use strict";
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1326
|
+
import_node_fs14 = require("fs");
|
|
1327
|
+
import_promises33 = require("fs/promises");
|
|
1328
|
+
import_node_path46 = require("path");
|
|
1329
1329
|
SimpleTraceFileExporter = class {
|
|
1330
1330
|
stream = null;
|
|
1331
1331
|
filePath;
|
|
@@ -1338,8 +1338,8 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1338
1338
|
async ensureStream() {
|
|
1339
1339
|
if (!this.streamReady) {
|
|
1340
1340
|
this.streamReady = (async () => {
|
|
1341
|
-
await (0,
|
|
1342
|
-
this.stream = (0,
|
|
1341
|
+
await (0, import_promises33.mkdir)((0, import_node_path46.dirname)(this.filePath), { recursive: true });
|
|
1342
|
+
this.stream = (0, import_node_fs14.createWriteStream)(this.filePath, { flags: "w" });
|
|
1343
1343
|
return this.stream;
|
|
1344
1344
|
})();
|
|
1345
1345
|
}
|
|
@@ -1457,6 +1457,7 @@ __export(index_exports, {
|
|
|
1457
1457
|
TokenUsageEvaluator: () => TokenUsageEvaluator,
|
|
1458
1458
|
ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
|
|
1459
1459
|
WorkspaceCreationError: () => WorkspaceCreationError,
|
|
1460
|
+
WorkspacePoolManager: () => WorkspacePoolManager,
|
|
1460
1461
|
assembleLlmJudgePrompt: () => assembleLlmJudgePrompt,
|
|
1461
1462
|
avgToolDurationMs: () => avgToolDurationMs,
|
|
1462
1463
|
buildDirectoryChain: () => buildDirectoryChain2,
|
|
@@ -1471,6 +1472,7 @@ __export(index_exports, {
|
|
|
1471
1472
|
cleanupEvalWorkspaces: () => cleanupEvalWorkspaces,
|
|
1472
1473
|
cleanupWorkspace: () => cleanupWorkspace,
|
|
1473
1474
|
computeTraceSummary: () => computeTraceSummary,
|
|
1475
|
+
computeWorkspaceFingerprint: () => computeWorkspaceFingerprint,
|
|
1474
1476
|
consumeClaudeLogEntries: () => consumeClaudeLogEntries,
|
|
1475
1477
|
consumeCodexLogEntries: () => consumeCodexLogEntries,
|
|
1476
1478
|
consumeCopilotCliLogEntries: () => consumeCopilotCliLogEntries,
|
|
@@ -1502,8 +1504,14 @@ __export(index_exports, {
|
|
|
1502
1504
|
findGitRoot: () => findGitRoot,
|
|
1503
1505
|
freeformEvaluationSchema: () => freeformEvaluationSchema,
|
|
1504
1506
|
generateRubrics: () => generateRubrics,
|
|
1507
|
+
getAgentvHome: () => getAgentvHome,
|
|
1508
|
+
getGitCacheRoot: () => getGitCacheRoot,
|
|
1505
1509
|
getHitCount: () => getHitCount,
|
|
1510
|
+
getSubagentsRoot: () => getSubagentsRoot,
|
|
1511
|
+
getTraceStateRoot: () => getTraceStateRoot,
|
|
1506
1512
|
getWorkspacePath: () => getWorkspacePath,
|
|
1513
|
+
getWorkspacePoolRoot: () => getWorkspacePoolRoot,
|
|
1514
|
+
getWorkspacesRoot: () => getWorkspacesRoot,
|
|
1507
1515
|
initializeBaseline: () => initializeBaseline,
|
|
1508
1516
|
isEvaluatorKind: () => isEvaluatorKind,
|
|
1509
1517
|
isGuidelineFile: () => isGuidelineFile,
|
|
@@ -1853,7 +1861,7 @@ async function resolveFileReference(ref, evalFileDir) {
|
|
|
1853
1861
|
const rawPath = extractFilePath(ref);
|
|
1854
1862
|
const absolutePattern = import_node_path.default.resolve(evalFileDir, rawPath);
|
|
1855
1863
|
if (isGlobPattern(rawPath)) {
|
|
1856
|
-
const matches = await (0, import_fast_glob.default)(absolutePattern, {
|
|
1864
|
+
const matches = await (0, import_fast_glob.default)(absolutePattern.replaceAll("\\", "/"), {
|
|
1857
1865
|
onlyFiles: true,
|
|
1858
1866
|
absolute: true
|
|
1859
1867
|
});
|
|
@@ -2231,6 +2239,17 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
2231
2239
|
} else if (otelFile !== void 0) {
|
|
2232
2240
|
logWarning(`Invalid execution.otel_file in ${configPath}, expected non-empty string`);
|
|
2233
2241
|
}
|
|
2242
|
+
if (typeof obj.pool_workspaces === "boolean") {
|
|
2243
|
+
result.pool_workspaces = obj.pool_workspaces;
|
|
2244
|
+
} else if (obj.pool_workspaces !== void 0) {
|
|
2245
|
+
logWarning(`Invalid execution.pool_workspaces in ${configPath}, expected boolean`);
|
|
2246
|
+
}
|
|
2247
|
+
const poolSlots = obj.pool_slots;
|
|
2248
|
+
if (typeof poolSlots === "number" && Number.isInteger(poolSlots) && poolSlots >= 1 && poolSlots <= 50) {
|
|
2249
|
+
result.pool_slots = poolSlots;
|
|
2250
|
+
} else if (poolSlots !== void 0) {
|
|
2251
|
+
logWarning(`Invalid execution.pool_slots in ${configPath}, expected integer 1-50`);
|
|
2252
|
+
}
|
|
2234
2253
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
2235
2254
|
}
|
|
2236
2255
|
function logWarning(message) {
|
|
@@ -3672,6 +3691,7 @@ async function processMessages(options) {
|
|
|
3672
3691
|
repoRootPath,
|
|
3673
3692
|
guidelinePatterns,
|
|
3674
3693
|
guidelinePaths,
|
|
3694
|
+
treatFileSegmentsAsGuidelines,
|
|
3675
3695
|
textParts,
|
|
3676
3696
|
messageType,
|
|
3677
3697
|
verbose
|
|
@@ -3719,16 +3739,20 @@ async function processMessages(options) {
|
|
|
3719
3739
|
}
|
|
3720
3740
|
try {
|
|
3721
3741
|
const fileContent = (await (0, import_promises5.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
3722
|
-
|
|
3723
|
-
|
|
3724
|
-
|
|
3725
|
-
|
|
3726
|
-
|
|
3727
|
-
|
|
3728
|
-
|
|
3729
|
-
|
|
3730
|
-
|
|
3742
|
+
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
3743
|
+
messageType,
|
|
3744
|
+
resolvedPath,
|
|
3745
|
+
repoRootPath,
|
|
3746
|
+
guidelinePatterns,
|
|
3747
|
+
treatFileSegmentsAsGuidelines
|
|
3748
|
+
});
|
|
3749
|
+
if (classifyAsGuideline && guidelinePaths) {
|
|
3750
|
+
guidelinePaths.push(import_node_path5.default.resolve(resolvedPath));
|
|
3751
|
+
if (verbose) {
|
|
3752
|
+
console.log(` [Guideline] Found: ${displayPath}`);
|
|
3753
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
3731
3754
|
}
|
|
3755
|
+
continue;
|
|
3732
3756
|
}
|
|
3733
3757
|
segments.push({
|
|
3734
3758
|
type: "file",
|
|
@@ -3757,6 +3781,26 @@ async function processMessages(options) {
|
|
|
3757
3781
|
}
|
|
3758
3782
|
return segments;
|
|
3759
3783
|
}
|
|
3784
|
+
function shouldTreatAsGuideline(options) {
|
|
3785
|
+
const {
|
|
3786
|
+
messageType,
|
|
3787
|
+
resolvedPath,
|
|
3788
|
+
repoRootPath,
|
|
3789
|
+
guidelinePatterns,
|
|
3790
|
+
treatFileSegmentsAsGuidelines
|
|
3791
|
+
} = options;
|
|
3792
|
+
if (messageType !== "input") {
|
|
3793
|
+
return false;
|
|
3794
|
+
}
|
|
3795
|
+
if (treatFileSegmentsAsGuidelines) {
|
|
3796
|
+
return true;
|
|
3797
|
+
}
|
|
3798
|
+
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
3799
|
+
return false;
|
|
3800
|
+
}
|
|
3801
|
+
const relativeToRepo = import_node_path5.default.relative(repoRootPath, resolvedPath);
|
|
3802
|
+
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
3803
|
+
}
|
|
3760
3804
|
function asString3(value) {
|
|
3761
3805
|
return typeof value === "string" ? value : void 0;
|
|
3762
3806
|
}
|
|
@@ -4095,6 +4139,8 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4095
4139
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
4096
4140
|
console.log(` - ${guidelinePath}`);
|
|
4097
4141
|
}
|
|
4142
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4143
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
4098
4144
|
} else {
|
|
4099
4145
|
console.log(" No guidelines found");
|
|
4100
4146
|
}
|
|
@@ -4464,7 +4510,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4464
4510
|
} else {
|
|
4465
4511
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
4466
4512
|
}
|
|
4467
|
-
const suiteWorkspace =
|
|
4513
|
+
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
4468
4514
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
4469
4515
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
4470
4516
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
@@ -4500,12 +4546,24 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4500
4546
|
}
|
|
4501
4547
|
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
4502
4548
|
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
4503
|
-
const
|
|
4549
|
+
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
4550
|
+
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
4504
4551
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
4505
4552
|
const guidelinePaths = [];
|
|
4506
4553
|
const inputTextParts = [];
|
|
4507
|
-
const
|
|
4508
|
-
messages:
|
|
4554
|
+
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
4555
|
+
messages: effectiveSuiteInputMessages,
|
|
4556
|
+
searchRoots,
|
|
4557
|
+
repoRootPath,
|
|
4558
|
+
guidelinePatterns,
|
|
4559
|
+
guidelinePaths,
|
|
4560
|
+
treatFileSegmentsAsGuidelines: true,
|
|
4561
|
+
textParts: inputTextParts,
|
|
4562
|
+
messageType: "input",
|
|
4563
|
+
verbose
|
|
4564
|
+
}) : [];
|
|
4565
|
+
const testInputSegments = await processMessages({
|
|
4566
|
+
messages: testInputMessages,
|
|
4509
4567
|
searchRoots,
|
|
4510
4568
|
repoRootPath,
|
|
4511
4569
|
guidelinePatterns,
|
|
@@ -4514,6 +4572,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4514
4572
|
messageType: "input",
|
|
4515
4573
|
verbose
|
|
4516
4574
|
});
|
|
4575
|
+
const inputSegments = [...suiteInputSegments, ...testInputSegments];
|
|
4517
4576
|
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
4518
4577
|
messages: expectedMessages,
|
|
4519
4578
|
searchRoots,
|
|
@@ -4561,7 +4620,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4561
4620
|
...guidelinePaths.map((guidelinePath) => import_node_path8.default.resolve(guidelinePath)),
|
|
4562
4621
|
...userFilePaths
|
|
4563
4622
|
];
|
|
4564
|
-
const caseWorkspace =
|
|
4623
|
+
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
4565
4624
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
4566
4625
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
4567
4626
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
@@ -4592,6 +4651,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4592
4651
|
for (const guidelinePath of testCase.guideline_paths) {
|
|
4593
4652
|
console.log(` - ${guidelinePath}`);
|
|
4594
4653
|
}
|
|
4654
|
+
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4655
|
+
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
4595
4656
|
} else {
|
|
4596
4657
|
console.log(" No guidelines found");
|
|
4597
4658
|
}
|
|
@@ -4691,6 +4752,26 @@ function parseResetConfig(raw) {
|
|
|
4691
4752
|
...afterEach !== void 0 && { after_each: afterEach }
|
|
4692
4753
|
};
|
|
4693
4754
|
}
|
|
4755
|
+
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
4756
|
+
if (typeof raw === "string") {
|
|
4757
|
+
const workspaceFilePath = import_node_path8.default.resolve(evalFileDir, raw);
|
|
4758
|
+
let content;
|
|
4759
|
+
try {
|
|
4760
|
+
content = await (0, import_promises8.readFile)(workspaceFilePath, "utf8");
|
|
4761
|
+
} catch {
|
|
4762
|
+
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
4763
|
+
}
|
|
4764
|
+
const parsed = (0, import_yaml4.parse)(content);
|
|
4765
|
+
if (!isJsonObject(parsed)) {
|
|
4766
|
+
throw new Error(
|
|
4767
|
+
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
4768
|
+
);
|
|
4769
|
+
}
|
|
4770
|
+
const workspaceFileDir = import_node_path8.default.dirname(workspaceFilePath);
|
|
4771
|
+
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
4772
|
+
}
|
|
4773
|
+
return parseWorkspaceConfig(raw, evalFileDir);
|
|
4774
|
+
}
|
|
4694
4775
|
function parseWorkspaceConfig(raw, evalFileDir) {
|
|
4695
4776
|
if (!isJsonObject(raw)) return void 0;
|
|
4696
4777
|
const obj = raw;
|
|
@@ -6784,7 +6865,7 @@ var import_node_path14 = __toESM(require("path"), 1);
|
|
|
6784
6865
|
var import_node_url2 = require("url");
|
|
6785
6866
|
var import_meta = {};
|
|
6786
6867
|
function resolvePlatformCliPath() {
|
|
6787
|
-
const
|
|
6868
|
+
const os3 = (0, import_node_os2.platform)();
|
|
6788
6869
|
const cpu = (0, import_node_os2.arch)();
|
|
6789
6870
|
const platformMap = {
|
|
6790
6871
|
linux: "linux",
|
|
@@ -6795,13 +6876,13 @@ function resolvePlatformCliPath() {
|
|
|
6795
6876
|
x64: "x64",
|
|
6796
6877
|
arm64: "arm64"
|
|
6797
6878
|
};
|
|
6798
|
-
const osPart = platformMap[
|
|
6879
|
+
const osPart = platformMap[os3];
|
|
6799
6880
|
const archPart = archMap[cpu];
|
|
6800
6881
|
if (!osPart || !archPart) {
|
|
6801
6882
|
return void 0;
|
|
6802
6883
|
}
|
|
6803
6884
|
const packageName = `@github/copilot-${osPart}-${archPart}`;
|
|
6804
|
-
const binaryName =
|
|
6885
|
+
const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
|
|
6805
6886
|
try {
|
|
6806
6887
|
const resolved = import_meta.resolve(`${packageName}/package.json`);
|
|
6807
6888
|
const packageJsonPath = resolved.startsWith("file:") ? (0, import_node_url2.fileURLToPath)(resolved) : resolved;
|
|
@@ -9488,8 +9569,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
9488
9569
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
9489
9570
|
if (!parseResult.success) {
|
|
9490
9571
|
const firstError = parseResult.error.errors[0];
|
|
9491
|
-
const
|
|
9492
|
-
const prefix =
|
|
9572
|
+
const path44 = firstError?.path.join(".") || "";
|
|
9573
|
+
const prefix = path44 ? `${target.name} ${path44}: ` : `${target.name}: `;
|
|
9493
9574
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
9494
9575
|
}
|
|
9495
9576
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -9699,12 +9780,12 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
9699
9780
|
// src/evaluation/providers/vscode-provider.ts
|
|
9700
9781
|
var import_node_child_process5 = require("child_process");
|
|
9701
9782
|
var import_promises22 = require("fs/promises");
|
|
9702
|
-
var
|
|
9783
|
+
var import_node_path30 = __toESM(require("path"), 1);
|
|
9703
9784
|
var import_node_util3 = require("util");
|
|
9704
9785
|
|
|
9705
9786
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
9706
9787
|
var import_promises20 = require("fs/promises");
|
|
9707
|
-
var
|
|
9788
|
+
var import_node_path28 = __toESM(require("path"), 1);
|
|
9708
9789
|
|
|
9709
9790
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
9710
9791
|
var import_node_fs7 = require("fs");
|
|
@@ -9972,17 +10053,49 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9972
10053
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
9973
10054
|
var import_node_child_process4 = require("child_process");
|
|
9974
10055
|
var import_promises18 = require("fs/promises");
|
|
9975
|
-
var
|
|
10056
|
+
var import_node_path25 = __toESM(require("path"), 1);
|
|
9976
10057
|
var import_node_util2 = require("util");
|
|
9977
10058
|
|
|
9978
10059
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
10060
|
+
var import_node_path24 = __toESM(require("path"), 1);
|
|
10061
|
+
|
|
10062
|
+
// src/paths.ts
|
|
9979
10063
|
var import_node_os4 = __toESM(require("os"), 1);
|
|
9980
10064
|
var import_node_path23 = __toESM(require("path"), 1);
|
|
10065
|
+
var logged = false;
|
|
10066
|
+
function getAgentvHome() {
|
|
10067
|
+
const envHome = process.env.AGENTV_HOME;
|
|
10068
|
+
if (envHome) {
|
|
10069
|
+
if (!logged) {
|
|
10070
|
+
logged = true;
|
|
10071
|
+
console.warn(`Using AGENTV_HOME: ${envHome}`);
|
|
10072
|
+
}
|
|
10073
|
+
return envHome;
|
|
10074
|
+
}
|
|
10075
|
+
return import_node_path23.default.join(import_node_os4.default.homedir(), ".agentv");
|
|
10076
|
+
}
|
|
10077
|
+
function getWorkspacesRoot() {
|
|
10078
|
+
return import_node_path23.default.join(getAgentvHome(), "workspaces");
|
|
10079
|
+
}
|
|
10080
|
+
function getGitCacheRoot() {
|
|
10081
|
+
return import_node_path23.default.join(getAgentvHome(), "git-cache");
|
|
10082
|
+
}
|
|
10083
|
+
function getSubagentsRoot() {
|
|
10084
|
+
return import_node_path23.default.join(getAgentvHome(), "subagents");
|
|
10085
|
+
}
|
|
10086
|
+
function getTraceStateRoot() {
|
|
10087
|
+
return import_node_path23.default.join(getAgentvHome(), "trace-state");
|
|
10088
|
+
}
|
|
10089
|
+
function getWorkspacePoolRoot() {
|
|
10090
|
+
return import_node_path23.default.join(getAgentvHome(), "workspace-pool");
|
|
10091
|
+
}
|
|
10092
|
+
|
|
10093
|
+
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
9981
10094
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
9982
10095
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
9983
10096
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
9984
10097
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
9985
|
-
return
|
|
10098
|
+
return import_node_path24.default.join(getSubagentsRoot(), folder);
|
|
9986
10099
|
}
|
|
9987
10100
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
9988
10101
|
|
|
@@ -9996,12 +10109,19 @@ description: 'Wake-up Signal'
|
|
|
9996
10109
|
model: Grok Code Fast 1 (copilot)
|
|
9997
10110
|
---`;
|
|
9998
10111
|
function spawnVsCode(vscodeCmd, args, options) {
|
|
9999
|
-
const
|
|
10112
|
+
const useShell = options?.shell ?? true;
|
|
10113
|
+
const command = useShell ? shellQuote(vscodeCmd) : vscodeCmd;
|
|
10114
|
+
const child = (0, import_node_child_process4.spawn)(command, args, {
|
|
10000
10115
|
windowsHide: true,
|
|
10001
|
-
shell:
|
|
10116
|
+
shell: useShell,
|
|
10002
10117
|
detached: false
|
|
10003
10118
|
});
|
|
10004
|
-
child.on("error", () => {
|
|
10119
|
+
child.on("error", (error) => {
|
|
10120
|
+
const label = options?.label ?? "spawn";
|
|
10121
|
+
const renderedArgs = args.map((value) => JSON.stringify(value)).join(" ");
|
|
10122
|
+
console.error(
|
|
10123
|
+
`[vscode] ${label} failed: command=${JSON.stringify(vscodeCmd)} args=${renderedArgs} error=${error.message}`
|
|
10124
|
+
);
|
|
10005
10125
|
});
|
|
10006
10126
|
return child;
|
|
10007
10127
|
}
|
|
@@ -10038,16 +10158,20 @@ async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
|
|
|
10038
10158
|
async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir, vscodeCmd, pollInterval = 1, timeout = 60) {
|
|
10039
10159
|
const alreadyOpen = await checkWorkspaceOpened(workspaceName, vscodeCmd);
|
|
10040
10160
|
if (alreadyOpen) {
|
|
10041
|
-
spawnVsCode(
|
|
10161
|
+
const child = spawnVsCode(vscodeCmd, [workspacePath], { label: "focus-existing-workspace" });
|
|
10162
|
+
await raceSpawnError(child);
|
|
10042
10163
|
return true;
|
|
10043
10164
|
}
|
|
10044
|
-
const aliveFile =
|
|
10165
|
+
const aliveFile = import_node_path25.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
10045
10166
|
await removeIfExists(aliveFile);
|
|
10046
|
-
const githubAgentsDir =
|
|
10167
|
+
const githubAgentsDir = import_node_path25.default.join(subagentDir, ".github", "agents");
|
|
10047
10168
|
await (0, import_promises18.mkdir)(githubAgentsDir, { recursive: true });
|
|
10048
|
-
const wakeupDst =
|
|
10169
|
+
const wakeupDst = import_node_path25.default.join(githubAgentsDir, "wakeup.md");
|
|
10049
10170
|
await (0, import_promises18.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
10050
|
-
spawnVsCode(
|
|
10171
|
+
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
10172
|
+
label: "open-workspace"
|
|
10173
|
+
});
|
|
10174
|
+
await raceSpawnError(workspaceChild);
|
|
10051
10175
|
await sleep2(100);
|
|
10052
10176
|
const wakeupChatId = "wakeup";
|
|
10053
10177
|
const chatArgs = [
|
|
@@ -10055,9 +10179,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
10055
10179
|
"chat",
|
|
10056
10180
|
"-m",
|
|
10057
10181
|
wakeupChatId,
|
|
10058
|
-
`create a file named .alive in the ${
|
|
10182
|
+
`create a file named .alive in the ${import_node_path25.default.basename(subagentDir)} folder`
|
|
10059
10183
|
];
|
|
10060
|
-
spawnVsCode(
|
|
10184
|
+
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
10185
|
+
await raceSpawnError(wakeupChild);
|
|
10061
10186
|
const start = Date.now();
|
|
10062
10187
|
while (!await pathExists(aliveFile)) {
|
|
10063
10188
|
if (Date.now() - start > timeout * 1e3) {
|
|
@@ -10069,10 +10194,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
10069
10194
|
return true;
|
|
10070
10195
|
}
|
|
10071
10196
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
10072
|
-
const workspacePath =
|
|
10073
|
-
const messagesDir =
|
|
10197
|
+
const workspacePath = import_node_path25.default.join(subagentDir, `${import_node_path25.default.basename(subagentDir)}.code-workspace`);
|
|
10198
|
+
const messagesDir = import_node_path25.default.join(subagentDir, "messages");
|
|
10074
10199
|
await (0, import_promises18.mkdir)(messagesDir, { recursive: true });
|
|
10075
|
-
const reqFile =
|
|
10200
|
+
const reqFile = import_node_path25.default.join(messagesDir, `${timestamp}_req.md`);
|
|
10076
10201
|
await (0, import_promises18.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
|
|
10077
10202
|
const reqUri = pathToFileUri2(reqFile);
|
|
10078
10203
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -10080,25 +10205,25 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
10080
10205
|
chatArgs.push("-a", attachment);
|
|
10081
10206
|
}
|
|
10082
10207
|
chatArgs.push("-a", reqFile);
|
|
10083
|
-
chatArgs.push(`Follow instructions in [${
|
|
10208
|
+
chatArgs.push(`Follow instructions in [${import_node_path25.default.basename(reqFile)}](${reqUri})`);
|
|
10084
10209
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
10085
10210
|
workspacePath,
|
|
10086
|
-
|
|
10211
|
+
import_node_path25.default.basename(subagentDir),
|
|
10087
10212
|
subagentDir,
|
|
10088
10213
|
vscodeCmd
|
|
10089
10214
|
);
|
|
10090
10215
|
if (!workspaceReady) {
|
|
10091
10216
|
throw new Error(
|
|
10092
|
-
`VS Code workspace '${
|
|
10217
|
+
`VS Code workspace '${import_node_path25.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
10093
10218
|
);
|
|
10094
10219
|
}
|
|
10095
10220
|
await sleep2(500);
|
|
10096
|
-
const child = spawnVsCode(
|
|
10221
|
+
const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-chat" });
|
|
10097
10222
|
await raceSpawnError(child);
|
|
10098
10223
|
}
|
|
10099
10224
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
10100
|
-
const workspacePath =
|
|
10101
|
-
const messagesDir =
|
|
10225
|
+
const workspacePath = import_node_path25.default.join(subagentDir, `${import_node_path25.default.basename(subagentDir)}.code-workspace`);
|
|
10226
|
+
const messagesDir = import_node_path25.default.join(subagentDir, "messages");
|
|
10102
10227
|
await (0, import_promises18.mkdir)(messagesDir, { recursive: true });
|
|
10103
10228
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
10104
10229
|
for (const attachment of attachmentPaths) {
|
|
@@ -10107,26 +10232,26 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
10107
10232
|
chatArgs.push(chatInstruction);
|
|
10108
10233
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
10109
10234
|
workspacePath,
|
|
10110
|
-
|
|
10235
|
+
import_node_path25.default.basename(subagentDir),
|
|
10111
10236
|
subagentDir,
|
|
10112
10237
|
vscodeCmd
|
|
10113
10238
|
);
|
|
10114
10239
|
if (!workspaceReady) {
|
|
10115
10240
|
throw new Error(
|
|
10116
|
-
`VS Code workspace '${
|
|
10241
|
+
`VS Code workspace '${import_node_path25.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
10117
10242
|
);
|
|
10118
10243
|
}
|
|
10119
10244
|
await sleep2(500);
|
|
10120
|
-
const child = spawnVsCode(
|
|
10245
|
+
const child = spawnVsCode(vscodeCmd, chatArgs, { label: "send-batch-chat" });
|
|
10121
10246
|
await raceSpawnError(child);
|
|
10122
10247
|
}
|
|
10123
10248
|
|
|
10124
10249
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
10125
10250
|
var import_promises19 = require("fs/promises");
|
|
10126
|
-
var
|
|
10251
|
+
var import_node_path27 = __toESM(require("path"), 1);
|
|
10127
10252
|
|
|
10128
10253
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
10129
|
-
var
|
|
10254
|
+
var import_node_path26 = __toESM(require("path"), 1);
|
|
10130
10255
|
var import_json5 = __toESM(require("json5"), 1);
|
|
10131
10256
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
10132
10257
|
let workspace;
|
|
@@ -10143,10 +10268,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10143
10268
|
}
|
|
10144
10269
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
10145
10270
|
const folderPath = folder.path;
|
|
10146
|
-
if (
|
|
10271
|
+
if (import_node_path26.default.isAbsolute(folderPath)) {
|
|
10147
10272
|
return folder;
|
|
10148
10273
|
}
|
|
10149
|
-
const absolutePath =
|
|
10274
|
+
const absolutePath = import_node_path26.default.resolve(templateDir, folderPath);
|
|
10150
10275
|
return {
|
|
10151
10276
|
...folder,
|
|
10152
10277
|
path: absolutePath
|
|
@@ -10168,19 +10293,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10168
10293
|
if (locationMap && typeof locationMap === "object") {
|
|
10169
10294
|
const transformedMap = {};
|
|
10170
10295
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
10171
|
-
const isAbsolute =
|
|
10296
|
+
const isAbsolute = import_node_path26.default.isAbsolute(locationPath);
|
|
10172
10297
|
if (isAbsolute) {
|
|
10173
10298
|
transformedMap[locationPath] = value;
|
|
10174
10299
|
} else {
|
|
10175
10300
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
10176
10301
|
if (firstGlobIndex === -1) {
|
|
10177
|
-
const resolvedPath =
|
|
10302
|
+
const resolvedPath = import_node_path26.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
10178
10303
|
transformedMap[resolvedPath] = value;
|
|
10179
10304
|
} else {
|
|
10180
10305
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
10181
10306
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
10182
10307
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
10183
|
-
const resolvedPath = (
|
|
10308
|
+
const resolvedPath = (import_node_path26.default.resolve(templateDir, basePath) + patternPath).replace(
|
|
10184
10309
|
/\\/g,
|
|
10185
10310
|
"/"
|
|
10186
10311
|
);
|
|
@@ -10221,7 +10346,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
10221
10346
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
10222
10347
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
10223
10348
|
for (const subagent of subagents) {
|
|
10224
|
-
const lockFile =
|
|
10349
|
+
const lockFile = import_node_path27.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
10225
10350
|
if (!await pathExists(lockFile)) {
|
|
10226
10351
|
return subagent.absolutePath;
|
|
10227
10352
|
}
|
|
@@ -10231,7 +10356,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
10231
10356
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
10232
10357
|
let workspaceContent;
|
|
10233
10358
|
if (workspaceTemplate) {
|
|
10234
|
-
const workspaceSrc =
|
|
10359
|
+
const workspaceSrc = import_node_path27.default.resolve(workspaceTemplate);
|
|
10235
10360
|
if (!await pathExists(workspaceSrc)) {
|
|
10236
10361
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
10237
10362
|
}
|
|
@@ -10244,13 +10369,13 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
10244
10369
|
} else {
|
|
10245
10370
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
10246
10371
|
}
|
|
10247
|
-
const workspaceName = `${
|
|
10248
|
-
const workspaceDst =
|
|
10249
|
-
const templateDir = workspaceTemplate ?
|
|
10372
|
+
const workspaceName = `${import_node_path27.default.basename(subagentDir)}.code-workspace`;
|
|
10373
|
+
const workspaceDst = import_node_path27.default.join(subagentDir, workspaceName);
|
|
10374
|
+
const templateDir = workspaceTemplate ? import_node_path27.default.dirname(import_node_path27.default.resolve(workspaceTemplate)) : subagentDir;
|
|
10250
10375
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
10251
10376
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
10252
10377
|
if (cwd) {
|
|
10253
|
-
const absCwd =
|
|
10378
|
+
const absCwd = import_node_path27.default.resolve(cwd);
|
|
10254
10379
|
const parsed = JSON.parse(transformedContent);
|
|
10255
10380
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
10256
10381
|
if (!alreadyPresent) {
|
|
@@ -10259,35 +10384,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
10259
10384
|
}
|
|
10260
10385
|
}
|
|
10261
10386
|
await (0, import_promises19.writeFile)(workspaceDst, transformedContent, "utf8");
|
|
10262
|
-
const messagesDir =
|
|
10387
|
+
const messagesDir = import_node_path27.default.join(subagentDir, "messages");
|
|
10263
10388
|
await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
|
|
10264
10389
|
return { workspace: workspaceDst, messagesDir };
|
|
10265
10390
|
}
|
|
10266
10391
|
async function createSubagentLock(subagentDir) {
|
|
10267
|
-
const messagesDir =
|
|
10392
|
+
const messagesDir = import_node_path27.default.join(subagentDir, "messages");
|
|
10268
10393
|
if (await pathExists(messagesDir)) {
|
|
10269
10394
|
const files = await (0, import_promises19.readdir)(messagesDir);
|
|
10270
10395
|
await Promise.all(
|
|
10271
10396
|
files.map(async (file) => {
|
|
10272
|
-
const target =
|
|
10397
|
+
const target = import_node_path27.default.join(messagesDir, file);
|
|
10273
10398
|
await removeIfExists(target);
|
|
10274
10399
|
})
|
|
10275
10400
|
);
|
|
10276
10401
|
}
|
|
10277
|
-
const githubAgentsDir =
|
|
10402
|
+
const githubAgentsDir = import_node_path27.default.join(subagentDir, ".github", "agents");
|
|
10278
10403
|
if (await pathExists(githubAgentsDir)) {
|
|
10279
10404
|
const agentFiles = await (0, import_promises19.readdir)(githubAgentsDir);
|
|
10280
10405
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
10281
10406
|
await Promise.all(
|
|
10282
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
10407
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path27.default.join(githubAgentsDir, file)))
|
|
10283
10408
|
);
|
|
10284
10409
|
}
|
|
10285
|
-
const lockFile =
|
|
10410
|
+
const lockFile = import_node_path27.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
10286
10411
|
await (0, import_promises19.writeFile)(lockFile, "", { encoding: "utf8" });
|
|
10287
10412
|
return lockFile;
|
|
10288
10413
|
}
|
|
10289
10414
|
async function removeSubagentLock(subagentDir) {
|
|
10290
|
-
const lockFile =
|
|
10415
|
+
const lockFile = import_node_path27.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
10291
10416
|
await removeIfExists(lockFile);
|
|
10292
10417
|
}
|
|
10293
10418
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -10307,9 +10432,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
10307
10432
|
return 1;
|
|
10308
10433
|
}
|
|
10309
10434
|
if (promptFile) {
|
|
10310
|
-
const githubAgentsDir =
|
|
10435
|
+
const githubAgentsDir = import_node_path27.default.join(subagentDir, ".github", "agents");
|
|
10311
10436
|
await (0, import_promises19.mkdir)(githubAgentsDir, { recursive: true });
|
|
10312
|
-
const agentFile =
|
|
10437
|
+
const agentFile = import_node_path27.default.join(githubAgentsDir, `${chatId}.md`);
|
|
10313
10438
|
try {
|
|
10314
10439
|
await (0, import_promises19.copyFile)(promptFile, agentFile);
|
|
10315
10440
|
} catch (error) {
|
|
@@ -10328,7 +10453,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
10328
10453
|
if (!promptFile) {
|
|
10329
10454
|
return void 0;
|
|
10330
10455
|
}
|
|
10331
|
-
const resolvedPrompt =
|
|
10456
|
+
const resolvedPrompt = import_node_path28.default.resolve(promptFile);
|
|
10332
10457
|
if (!await pathExists(resolvedPrompt)) {
|
|
10333
10458
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
10334
10459
|
}
|
|
@@ -10344,7 +10469,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
10344
10469
|
}
|
|
10345
10470
|
const resolved = [];
|
|
10346
10471
|
for (const attachment of extraAttachments) {
|
|
10347
|
-
const resolvedPath =
|
|
10472
|
+
const resolvedPath = import_node_path28.default.resolve(attachment);
|
|
10348
10473
|
if (!await pathExists(resolvedPath)) {
|
|
10349
10474
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
10350
10475
|
}
|
|
@@ -10386,7 +10511,7 @@ async function dispatchAgentSession(options) {
|
|
|
10386
10511
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
10387
10512
|
};
|
|
10388
10513
|
}
|
|
10389
|
-
const subagentName =
|
|
10514
|
+
const subagentName = import_node_path28.default.basename(subagentDir);
|
|
10390
10515
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
10391
10516
|
const preparationResult = await prepareSubagentDirectory(
|
|
10392
10517
|
subagentDir,
|
|
@@ -10414,9 +10539,9 @@ async function dispatchAgentSession(options) {
|
|
|
10414
10539
|
};
|
|
10415
10540
|
}
|
|
10416
10541
|
const timestamp = generateTimestamp();
|
|
10417
|
-
const messagesDir =
|
|
10418
|
-
const responseFileTmp =
|
|
10419
|
-
const responseFileFinal =
|
|
10542
|
+
const messagesDir = import_node_path28.default.join(subagentDir, "messages");
|
|
10543
|
+
const responseFileTmp = import_node_path28.default.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
10544
|
+
const responseFileFinal = import_node_path28.default.join(messagesDir, `${timestamp}_res.md`);
|
|
10420
10545
|
const requestInstructions = createRequestPrompt(
|
|
10421
10546
|
userQuery,
|
|
10422
10547
|
responseFileTmp,
|
|
@@ -10521,7 +10646,7 @@ async function dispatchBatchAgent(options) {
|
|
|
10521
10646
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
10522
10647
|
};
|
|
10523
10648
|
}
|
|
10524
|
-
subagentName =
|
|
10649
|
+
subagentName = import_node_path28.default.basename(subagentDir);
|
|
10525
10650
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
10526
10651
|
const preparationResult = await prepareSubagentDirectory(
|
|
10527
10652
|
subagentDir,
|
|
@@ -10552,17 +10677,17 @@ async function dispatchBatchAgent(options) {
|
|
|
10552
10677
|
};
|
|
10553
10678
|
}
|
|
10554
10679
|
const timestamp = generateTimestamp();
|
|
10555
|
-
const messagesDir =
|
|
10680
|
+
const messagesDir = import_node_path28.default.join(subagentDir, "messages");
|
|
10556
10681
|
requestFiles = userQueries.map(
|
|
10557
|
-
(_, index) =>
|
|
10682
|
+
(_, index) => import_node_path28.default.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
10558
10683
|
);
|
|
10559
10684
|
const responseTmpFiles = userQueries.map(
|
|
10560
|
-
(_, index) =>
|
|
10685
|
+
(_, index) => import_node_path28.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
10561
10686
|
);
|
|
10562
10687
|
responseFilesFinal = userQueries.map(
|
|
10563
|
-
(_, index) =>
|
|
10688
|
+
(_, index) => import_node_path28.default.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
10564
10689
|
);
|
|
10565
|
-
const orchestratorFile =
|
|
10690
|
+
const orchestratorFile = import_node_path28.default.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
10566
10691
|
if (!dryRun) {
|
|
10567
10692
|
await Promise.all(
|
|
10568
10693
|
userQueries.map((query, index) => {
|
|
@@ -10648,7 +10773,7 @@ async function dispatchBatchAgent(options) {
|
|
|
10648
10773
|
|
|
10649
10774
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
10650
10775
|
var import_promises21 = require("fs/promises");
|
|
10651
|
-
var
|
|
10776
|
+
var import_node_path29 = __toESM(require("path"), 1);
|
|
10652
10777
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
10653
10778
|
folders: [
|
|
10654
10779
|
{
|
|
@@ -10679,7 +10804,7 @@ async function provisionSubagents(options) {
|
|
|
10679
10804
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
10680
10805
|
throw new Error("subagents must be a positive integer");
|
|
10681
10806
|
}
|
|
10682
|
-
const targetPath =
|
|
10807
|
+
const targetPath = import_node_path29.default.resolve(targetRoot);
|
|
10683
10808
|
if (!dryRun) {
|
|
10684
10809
|
await ensureDir(targetPath);
|
|
10685
10810
|
}
|
|
@@ -10699,7 +10824,7 @@ async function provisionSubagents(options) {
|
|
|
10699
10824
|
continue;
|
|
10700
10825
|
}
|
|
10701
10826
|
highestNumber = Math.max(highestNumber, parsed);
|
|
10702
|
-
const lockFile =
|
|
10827
|
+
const lockFile = import_node_path29.default.join(entry.absolutePath, lockName);
|
|
10703
10828
|
const locked = await pathExists(lockFile);
|
|
10704
10829
|
if (locked) {
|
|
10705
10830
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -10716,10 +10841,10 @@ async function provisionSubagents(options) {
|
|
|
10716
10841
|
break;
|
|
10717
10842
|
}
|
|
10718
10843
|
const subagentDir = subagent.absolutePath;
|
|
10719
|
-
const githubAgentsDir =
|
|
10720
|
-
const lockFile =
|
|
10721
|
-
const workspaceDst =
|
|
10722
|
-
const wakeupDst =
|
|
10844
|
+
const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
|
|
10845
|
+
const lockFile = import_node_path29.default.join(subagentDir, lockName);
|
|
10846
|
+
const workspaceDst = import_node_path29.default.join(subagentDir, `${import_node_path29.default.basename(subagentDir)}.code-workspace`);
|
|
10847
|
+
const wakeupDst = import_node_path29.default.join(githubAgentsDir, "wakeup.md");
|
|
10723
10848
|
const isLocked = await pathExists(lockFile);
|
|
10724
10849
|
if (isLocked && !force) {
|
|
10725
10850
|
continue;
|
|
@@ -10757,10 +10882,10 @@ async function provisionSubagents(options) {
|
|
|
10757
10882
|
let nextIndex = highestNumber;
|
|
10758
10883
|
while (subagentsProvisioned < subagents) {
|
|
10759
10884
|
nextIndex += 1;
|
|
10760
|
-
const subagentDir =
|
|
10761
|
-
const githubAgentsDir =
|
|
10762
|
-
const workspaceDst =
|
|
10763
|
-
const wakeupDst =
|
|
10885
|
+
const subagentDir = import_node_path29.default.join(targetPath, `subagent-${nextIndex}`);
|
|
10886
|
+
const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
|
|
10887
|
+
const workspaceDst = import_node_path29.default.join(subagentDir, `${import_node_path29.default.basename(subagentDir)}.code-workspace`);
|
|
10888
|
+
const wakeupDst = import_node_path29.default.join(githubAgentsDir, "wakeup.md");
|
|
10764
10889
|
if (!dryRun) {
|
|
10765
10890
|
await ensureDir(subagentDir);
|
|
10766
10891
|
await ensureDir(githubAgentsDir);
|
|
@@ -10786,8 +10911,6 @@ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
10786
10911
|
|
|
10787
10912
|
**IMPORTANT**: Follow these exact steps:
|
|
10788
10913
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
10789
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
10790
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
10791
10914
|
2. When completely finished, run these PowerShell commands to signal completion:
|
|
10792
10915
|
\`\`\`
|
|
10793
10916
|
Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
|
|
@@ -10804,8 +10927,6 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
10804
10927
|
|
|
10805
10928
|
**IMPORTANT**: Follow these exact steps:
|
|
10806
10929
|
1. Create and write your complete response to: {{responseFileTmp}}
|
|
10807
|
-
- All intended file outputs/changes MUST be written in your response file.
|
|
10808
|
-
- For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
|
|
10809
10930
|
2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
|
|
10810
10931
|
3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
|
|
10811
10932
|
`;
|
|
@@ -10954,7 +11075,7 @@ var VSCodeProvider = class {
|
|
|
10954
11075
|
async function locateVSCodeExecutable(candidate) {
|
|
10955
11076
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
10956
11077
|
if (includesPathSeparator) {
|
|
10957
|
-
const resolved =
|
|
11078
|
+
const resolved = import_node_path30.default.isAbsolute(candidate) ? candidate : import_node_path30.default.resolve(candidate);
|
|
10958
11079
|
try {
|
|
10959
11080
|
await (0, import_promises22.access)(resolved, import_promises22.constants.F_OK);
|
|
10960
11081
|
return resolved;
|
|
@@ -10983,7 +11104,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
10983
11104
|
return void 0;
|
|
10984
11105
|
}
|
|
10985
11106
|
try {
|
|
10986
|
-
const stats = await (0, import_promises22.stat)(
|
|
11107
|
+
const stats = await (0, import_promises22.stat)(import_node_path30.default.resolve(template));
|
|
10987
11108
|
return stats.isFile() ? template : void 0;
|
|
10988
11109
|
} catch {
|
|
10989
11110
|
return template;
|
|
@@ -11009,7 +11130,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
11009
11130
|
return "";
|
|
11010
11131
|
}
|
|
11011
11132
|
const buildList = (files) => files.map((absolutePath) => {
|
|
11012
|
-
const fileName =
|
|
11133
|
+
const fileName = import_node_path30.default.basename(absolutePath);
|
|
11013
11134
|
const fileUri = pathToFileUri3(absolutePath);
|
|
11014
11135
|
return `* [${fileName}](${fileUri})`;
|
|
11015
11136
|
});
|
|
@@ -11034,8 +11155,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
11034
11155
|
}
|
|
11035
11156
|
const unique = /* @__PURE__ */ new Map();
|
|
11036
11157
|
for (const attachment of attachments) {
|
|
11037
|
-
const absolutePath =
|
|
11038
|
-
const normalized = absolutePath.split(
|
|
11158
|
+
const absolutePath = import_node_path30.default.resolve(attachment);
|
|
11159
|
+
const normalized = absolutePath.split(import_node_path30.default.sep).join("/");
|
|
11039
11160
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
11040
11161
|
if (!unique.has(absolutePath)) {
|
|
11041
11162
|
unique.set(absolutePath, absolutePath);
|
|
@@ -11050,7 +11171,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
11050
11171
|
}
|
|
11051
11172
|
const unique = /* @__PURE__ */ new Map();
|
|
11052
11173
|
for (const attachment of attachments) {
|
|
11053
|
-
const absolutePath =
|
|
11174
|
+
const absolutePath = import_node_path30.default.resolve(attachment);
|
|
11054
11175
|
if (!unique.has(absolutePath)) {
|
|
11055
11176
|
unique.set(absolutePath, absolutePath);
|
|
11056
11177
|
}
|
|
@@ -11058,7 +11179,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
11058
11179
|
return Array.from(unique.values());
|
|
11059
11180
|
}
|
|
11060
11181
|
function pathToFileUri3(filePath) {
|
|
11061
|
-
const absolutePath =
|
|
11182
|
+
const absolutePath = import_node_path30.default.isAbsolute(filePath) ? filePath : import_node_path30.default.resolve(filePath);
|
|
11062
11183
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
11063
11184
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
11064
11185
|
return `file:///${normalizedPath}`;
|
|
@@ -11071,7 +11192,7 @@ function normalizeAttachments(attachments) {
|
|
|
11071
11192
|
}
|
|
11072
11193
|
const deduped = /* @__PURE__ */ new Set();
|
|
11073
11194
|
for (const attachment of attachments) {
|
|
11074
|
-
deduped.add(
|
|
11195
|
+
deduped.add(import_node_path30.default.resolve(attachment));
|
|
11075
11196
|
}
|
|
11076
11197
|
return Array.from(deduped);
|
|
11077
11198
|
}
|
|
@@ -11080,7 +11201,7 @@ function mergeAttachments(all) {
|
|
|
11080
11201
|
for (const list of all) {
|
|
11081
11202
|
if (!list) continue;
|
|
11082
11203
|
for (const inputFile of list) {
|
|
11083
|
-
deduped.add(
|
|
11204
|
+
deduped.add(import_node_path30.default.resolve(inputFile));
|
|
11084
11205
|
}
|
|
11085
11206
|
}
|
|
11086
11207
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -11129,7 +11250,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
11129
11250
|
// src/evaluation/providers/targets-file.ts
|
|
11130
11251
|
var import_node_fs8 = require("fs");
|
|
11131
11252
|
var import_promises23 = require("fs/promises");
|
|
11132
|
-
var
|
|
11253
|
+
var import_node_path31 = __toESM(require("path"), 1);
|
|
11133
11254
|
var import_yaml5 = require("yaml");
|
|
11134
11255
|
function isRecord(value) {
|
|
11135
11256
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -11166,7 +11287,7 @@ async function fileExists3(filePath) {
|
|
|
11166
11287
|
}
|
|
11167
11288
|
}
|
|
11168
11289
|
async function readTargetDefinitions(filePath) {
|
|
11169
|
-
const absolutePath =
|
|
11290
|
+
const absolutePath = import_node_path31.default.resolve(filePath);
|
|
11170
11291
|
if (!await fileExists3(absolutePath)) {
|
|
11171
11292
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
11172
11293
|
}
|
|
@@ -11186,16 +11307,16 @@ function listTargetNames(definitions) {
|
|
|
11186
11307
|
}
|
|
11187
11308
|
|
|
11188
11309
|
// src/evaluation/providers/provider-discovery.ts
|
|
11189
|
-
var
|
|
11310
|
+
var import_node_path32 = __toESM(require("path"), 1);
|
|
11190
11311
|
var import_fast_glob2 = __toESM(require("fast-glob"), 1);
|
|
11191
11312
|
async function discoverProviders(registry, baseDir) {
|
|
11192
11313
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
11193
11314
|
const candidateDirs = [];
|
|
11194
|
-
let dir =
|
|
11195
|
-
const root =
|
|
11315
|
+
let dir = import_node_path32.default.resolve(baseDir);
|
|
11316
|
+
const root = import_node_path32.default.parse(dir).root;
|
|
11196
11317
|
while (dir !== root) {
|
|
11197
|
-
candidateDirs.push(
|
|
11198
|
-
dir =
|
|
11318
|
+
candidateDirs.push(import_node_path32.default.join(dir, ".agentv", "providers"));
|
|
11319
|
+
dir = import_node_path32.default.dirname(dir);
|
|
11199
11320
|
}
|
|
11200
11321
|
let files = [];
|
|
11201
11322
|
for (const providersDir of candidateDirs) {
|
|
@@ -11211,7 +11332,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
11211
11332
|
}
|
|
11212
11333
|
const discoveredKinds = [];
|
|
11213
11334
|
for (const filePath of files) {
|
|
11214
|
-
const basename =
|
|
11335
|
+
const basename = import_node_path32.default.basename(filePath);
|
|
11215
11336
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
11216
11337
|
if (registry.has(kindName)) {
|
|
11217
11338
|
continue;
|
|
@@ -11318,7 +11439,7 @@ function negateScore(score) {
|
|
|
11318
11439
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
11319
11440
|
var import_promises24 = require("fs/promises");
|
|
11320
11441
|
var import_node_os5 = require("os");
|
|
11321
|
-
var
|
|
11442
|
+
var import_node_path33 = require("path");
|
|
11322
11443
|
|
|
11323
11444
|
// src/runtime/exec.ts
|
|
11324
11445
|
function shellEscapePath(value) {
|
|
@@ -11418,16 +11539,16 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
11418
11539
|
});
|
|
11419
11540
|
}
|
|
11420
11541
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
11421
|
-
const { mkdir:
|
|
11542
|
+
const { mkdir: mkdir17, readFile: readFile14, rm: rm7, writeFile: writeFile10 } = await import("fs/promises");
|
|
11422
11543
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
11423
|
-
const
|
|
11544
|
+
const path44 = await import("path");
|
|
11424
11545
|
const { randomUUID: randomUUID8 } = await import("crypto");
|
|
11425
|
-
const dir =
|
|
11426
|
-
await
|
|
11427
|
-
const stdinPath =
|
|
11428
|
-
const stdoutPath =
|
|
11429
|
-
const stderrPath =
|
|
11430
|
-
await
|
|
11546
|
+
const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID8()}`);
|
|
11547
|
+
await mkdir17(dir, { recursive: true });
|
|
11548
|
+
const stdinPath = path44.join(dir, "stdin.txt");
|
|
11549
|
+
const stdoutPath = path44.join(dir, "stdout.txt");
|
|
11550
|
+
const stderrPath = path44.join(dir, "stderr.txt");
|
|
11551
|
+
await writeFile10(stdinPath, stdinPayload, "utf8");
|
|
11431
11552
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
11432
11553
|
const { spawn: spawn4 } = await import("child_process");
|
|
11433
11554
|
try {
|
|
@@ -11456,11 +11577,11 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
11456
11577
|
resolve(code ?? 0);
|
|
11457
11578
|
});
|
|
11458
11579
|
});
|
|
11459
|
-
const stdout = (await
|
|
11460
|
-
const stderr = (await
|
|
11580
|
+
const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
11581
|
+
const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
11461
11582
|
return { stdout, stderr, exitCode };
|
|
11462
11583
|
} finally {
|
|
11463
|
-
await
|
|
11584
|
+
await rm7(dir, { recursive: true, force: true });
|
|
11464
11585
|
}
|
|
11465
11586
|
}
|
|
11466
11587
|
|
|
@@ -11762,8 +11883,8 @@ var CodeEvaluator = class {
|
|
|
11762
11883
|
if (outputForPayload) {
|
|
11763
11884
|
const serialized = JSON.stringify(outputForPayload);
|
|
11764
11885
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
11765
|
-
const tmpDir = await (0, import_promises24.mkdtemp)((0,
|
|
11766
|
-
outputPath = (0,
|
|
11886
|
+
const tmpDir = await (0, import_promises24.mkdtemp)((0, import_node_path33.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
|
|
11887
|
+
outputPath = (0, import_node_path33.join)(tmpDir, "output.json");
|
|
11767
11888
|
await (0, import_promises24.writeFile)(outputPath, serialized);
|
|
11768
11889
|
outputForPayload = null;
|
|
11769
11890
|
}
|
|
@@ -11778,7 +11899,7 @@ var CodeEvaluator = class {
|
|
|
11778
11899
|
outputPath,
|
|
11779
11900
|
guidelineFiles: context2.evalCase.guideline_paths,
|
|
11780
11901
|
inputFiles: context2.evalCase.file_paths.filter(
|
|
11781
|
-
(
|
|
11902
|
+
(path44) => !context2.evalCase.guideline_paths.includes(path44)
|
|
11782
11903
|
),
|
|
11783
11904
|
input: context2.evalCase.input,
|
|
11784
11905
|
trace: context2.trace ?? null,
|
|
@@ -11876,7 +11997,7 @@ var CodeEvaluator = class {
|
|
|
11876
11997
|
await proxyShutdown();
|
|
11877
11998
|
}
|
|
11878
11999
|
if (outputPath) {
|
|
11879
|
-
await (0, import_promises24.rm)((0,
|
|
12000
|
+
await (0, import_promises24.rm)((0, import_node_path33.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
|
|
11880
12001
|
});
|
|
11881
12002
|
}
|
|
11882
12003
|
}
|
|
@@ -12057,6 +12178,8 @@ ${context2.fileChanges}`;
|
|
|
12057
12178
|
};
|
|
12058
12179
|
} catch (e) {
|
|
12059
12180
|
const message = e instanceof Error ? e.message : String(e);
|
|
12181
|
+
const evalName = context2.evaluator?.name ?? "llm-judge";
|
|
12182
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
12060
12183
|
return {
|
|
12061
12184
|
score: 0,
|
|
12062
12185
|
verdict: "skip",
|
|
@@ -12085,24 +12208,39 @@ ${context2.fileChanges}`;
|
|
|
12085
12208
|
systemPrompt,
|
|
12086
12209
|
target: judgeProvider.targetName
|
|
12087
12210
|
};
|
|
12088
|
-
|
|
12089
|
-
|
|
12090
|
-
|
|
12091
|
-
|
|
12092
|
-
|
|
12093
|
-
|
|
12094
|
-
|
|
12095
|
-
|
|
12096
|
-
|
|
12097
|
-
|
|
12098
|
-
|
|
12099
|
-
|
|
12100
|
-
|
|
12101
|
-
|
|
12102
|
-
|
|
12103
|
-
|
|
12104
|
-
|
|
12105
|
-
|
|
12211
|
+
try {
|
|
12212
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
12213
|
+
context: context2,
|
|
12214
|
+
judgeProvider,
|
|
12215
|
+
systemPrompt,
|
|
12216
|
+
userPrompt: prompt,
|
|
12217
|
+
schema: rubricEvaluationSchema
|
|
12218
|
+
});
|
|
12219
|
+
const { score, verdict, hits, misses } = calculateRubricScore(data, rubrics);
|
|
12220
|
+
return {
|
|
12221
|
+
score,
|
|
12222
|
+
verdict,
|
|
12223
|
+
hits,
|
|
12224
|
+
misses,
|
|
12225
|
+
expectedAspectCount: rubrics.length,
|
|
12226
|
+
reasoning: data.overall_reasoning,
|
|
12227
|
+
evaluatorRawRequest,
|
|
12228
|
+
tokenUsage
|
|
12229
|
+
};
|
|
12230
|
+
} catch (e) {
|
|
12231
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
12232
|
+
const evalName = context2.evaluator?.name ?? "llm-judge";
|
|
12233
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
12234
|
+
return {
|
|
12235
|
+
score: 0,
|
|
12236
|
+
verdict: "skip",
|
|
12237
|
+
hits: [],
|
|
12238
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
12239
|
+
expectedAspectCount: rubrics.length,
|
|
12240
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
12241
|
+
evaluatorRawRequest
|
|
12242
|
+
};
|
|
12243
|
+
}
|
|
12106
12244
|
}
|
|
12107
12245
|
/**
|
|
12108
12246
|
* Evaluate using score-range rubrics (analytic rubric scoring).
|
|
@@ -12116,25 +12254,40 @@ ${context2.fileChanges}`;
|
|
|
12116
12254
|
systemPrompt,
|
|
12117
12255
|
target: judgeProvider.targetName
|
|
12118
12256
|
};
|
|
12119
|
-
|
|
12120
|
-
|
|
12121
|
-
|
|
12122
|
-
|
|
12123
|
-
|
|
12124
|
-
|
|
12125
|
-
|
|
12126
|
-
|
|
12127
|
-
|
|
12128
|
-
|
|
12129
|
-
|
|
12130
|
-
|
|
12131
|
-
|
|
12132
|
-
|
|
12133
|
-
|
|
12134
|
-
|
|
12135
|
-
|
|
12136
|
-
|
|
12137
|
-
|
|
12257
|
+
try {
|
|
12258
|
+
const { data, tokenUsage } = await this.runWithRetry({
|
|
12259
|
+
context: context2,
|
|
12260
|
+
judgeProvider,
|
|
12261
|
+
systemPrompt,
|
|
12262
|
+
userPrompt: prompt,
|
|
12263
|
+
schema: scoreRangeEvaluationSchema
|
|
12264
|
+
});
|
|
12265
|
+
const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
|
|
12266
|
+
return {
|
|
12267
|
+
score,
|
|
12268
|
+
verdict,
|
|
12269
|
+
hits,
|
|
12270
|
+
misses,
|
|
12271
|
+
expectedAspectCount: rubrics.length,
|
|
12272
|
+
reasoning: data.overall_reasoning,
|
|
12273
|
+
evaluatorRawRequest,
|
|
12274
|
+
details,
|
|
12275
|
+
tokenUsage
|
|
12276
|
+
};
|
|
12277
|
+
} catch (e) {
|
|
12278
|
+
const message = e instanceof Error ? e.message : String(e);
|
|
12279
|
+
const evalName = context2.evaluator?.name ?? "llm-judge";
|
|
12280
|
+
console.warn(`\u26A0 LLM judge "${evalName}" failed after 3 attempts (${message}) \u2014 skipped`);
|
|
12281
|
+
return {
|
|
12282
|
+
score: 0,
|
|
12283
|
+
verdict: "skip",
|
|
12284
|
+
hits: [],
|
|
12285
|
+
misses: [`Judge parse failure after 3 attempts: ${message}`],
|
|
12286
|
+
expectedAspectCount: rubrics.length,
|
|
12287
|
+
reasoning: `Judge parse failure after 3 attempts: ${message}`,
|
|
12288
|
+
evaluatorRawRequest
|
|
12289
|
+
};
|
|
12290
|
+
}
|
|
12138
12291
|
}
|
|
12139
12292
|
/**
|
|
12140
12293
|
* Build prompt for score-range rubric evaluation.
|
|
@@ -12420,19 +12573,13 @@ var CompositeEvaluator = class {
|
|
|
12420
12573
|
runWeightedAverage(results, weights) {
|
|
12421
12574
|
let totalWeight = 0;
|
|
12422
12575
|
let weightedSum = 0;
|
|
12576
|
+
let evaluatedCount = 0;
|
|
12423
12577
|
const allHits = [];
|
|
12424
12578
|
const allMisses = [];
|
|
12425
12579
|
const reasoningParts = [];
|
|
12426
12580
|
const scores = [];
|
|
12427
12581
|
for (const member of results) {
|
|
12428
12582
|
const weight = weights?.[member.id] ?? 1;
|
|
12429
|
-
totalWeight += weight;
|
|
12430
|
-
weightedSum += member.result.score * weight;
|
|
12431
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
12432
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
12433
|
-
if (member.result.reasoning) {
|
|
12434
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
12435
|
-
}
|
|
12436
12583
|
scores.push({
|
|
12437
12584
|
name: member.id,
|
|
12438
12585
|
type: member.type,
|
|
@@ -12447,6 +12594,32 @@ var CompositeEvaluator = class {
|
|
|
12447
12594
|
details: member.result.details,
|
|
12448
12595
|
tokenUsage: member.result.tokenUsage
|
|
12449
12596
|
});
|
|
12597
|
+
if (member.result.verdict === "skip") {
|
|
12598
|
+
continue;
|
|
12599
|
+
}
|
|
12600
|
+
evaluatedCount++;
|
|
12601
|
+
totalWeight += weight;
|
|
12602
|
+
weightedSum += member.result.score * weight;
|
|
12603
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
12604
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
12605
|
+
if (member.result.reasoning) {
|
|
12606
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
12607
|
+
}
|
|
12608
|
+
}
|
|
12609
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
12610
|
+
return {
|
|
12611
|
+
score: 0,
|
|
12612
|
+
verdict: "skip",
|
|
12613
|
+
hits: [],
|
|
12614
|
+
misses: [],
|
|
12615
|
+
expectedAspectCount: 1,
|
|
12616
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
12617
|
+
evaluatorRawRequest: {
|
|
12618
|
+
aggregator: "weighted_average",
|
|
12619
|
+
...weights ? { weights } : {}
|
|
12620
|
+
},
|
|
12621
|
+
scores
|
|
12622
|
+
};
|
|
12450
12623
|
}
|
|
12451
12624
|
const finalScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
12452
12625
|
return {
|
|
@@ -12470,19 +12643,8 @@ var CompositeEvaluator = class {
|
|
|
12470
12643
|
const reasoningParts = [];
|
|
12471
12644
|
let passingCount = 0;
|
|
12472
12645
|
let borderlineCount = 0;
|
|
12646
|
+
let evaluatedCount = 0;
|
|
12473
12647
|
for (const member of results) {
|
|
12474
|
-
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
12475
|
-
if (isPassing) {
|
|
12476
|
-
passingCount++;
|
|
12477
|
-
if (member.result.verdict === "borderline") {
|
|
12478
|
-
borderlineCount++;
|
|
12479
|
-
}
|
|
12480
|
-
}
|
|
12481
|
-
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
12482
|
-
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
12483
|
-
if (member.result.reasoning) {
|
|
12484
|
-
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
12485
|
-
}
|
|
12486
12648
|
scores.push({
|
|
12487
12649
|
name: member.id,
|
|
12488
12650
|
type: member.type,
|
|
@@ -12496,8 +12658,39 @@ var CompositeEvaluator = class {
|
|
|
12496
12658
|
details: member.result.details,
|
|
12497
12659
|
tokenUsage: member.result.tokenUsage
|
|
12498
12660
|
});
|
|
12661
|
+
if (member.result.verdict === "skip") {
|
|
12662
|
+
continue;
|
|
12663
|
+
}
|
|
12664
|
+
evaluatedCount++;
|
|
12665
|
+
const isPassing = member.result.verdict === "pass" || member.result.verdict === "borderline";
|
|
12666
|
+
if (isPassing) {
|
|
12667
|
+
passingCount++;
|
|
12668
|
+
if (member.result.verdict === "borderline") {
|
|
12669
|
+
borderlineCount++;
|
|
12670
|
+
}
|
|
12671
|
+
}
|
|
12672
|
+
allHits.push(...member.result.hits.map((h) => `[${member.id}] ${h}`));
|
|
12673
|
+
allMisses.push(...member.result.misses.map((m) => `[${member.id}] ${m}`));
|
|
12674
|
+
if (member.result.reasoning) {
|
|
12675
|
+
reasoningParts.push(`${member.id}: ${member.result.reasoning}`);
|
|
12676
|
+
}
|
|
12499
12677
|
}
|
|
12500
|
-
|
|
12678
|
+
if (evaluatedCount === 0 && results.length > 0) {
|
|
12679
|
+
return {
|
|
12680
|
+
score: 0,
|
|
12681
|
+
verdict: "skip",
|
|
12682
|
+
hits: [],
|
|
12683
|
+
misses: [],
|
|
12684
|
+
expectedAspectCount: 1,
|
|
12685
|
+
reasoning: "All evaluators skipped (infrastructure failure)",
|
|
12686
|
+
evaluatorRawRequest: {
|
|
12687
|
+
aggregator: "threshold",
|
|
12688
|
+
threshold
|
|
12689
|
+
},
|
|
12690
|
+
scores
|
|
12691
|
+
};
|
|
12692
|
+
}
|
|
12693
|
+
const totalCount = evaluatedCount;
|
|
12501
12694
|
const score = totalCount > 0 ? passingCount / totalCount : 0;
|
|
12502
12695
|
const pass = score >= threshold;
|
|
12503
12696
|
if (pass && borderlineCount > 0) {
|
|
@@ -13005,115 +13198,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
13005
13198
|
* Evaluate a single field against the expected value.
|
|
13006
13199
|
*/
|
|
13007
13200
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
13008
|
-
const { path:
|
|
13009
|
-
const candidateValue = resolvePath(candidateData,
|
|
13010
|
-
const expectedValue = resolvePath(expectedData,
|
|
13201
|
+
const { path: path44, match, required = true, weight = 1 } = fieldConfig;
|
|
13202
|
+
const candidateValue = resolvePath(candidateData, path44);
|
|
13203
|
+
const expectedValue = resolvePath(expectedData, path44);
|
|
13011
13204
|
if (expectedValue === void 0) {
|
|
13012
13205
|
return {
|
|
13013
|
-
path:
|
|
13206
|
+
path: path44,
|
|
13014
13207
|
score: 1,
|
|
13015
13208
|
// No expected value means no comparison needed
|
|
13016
13209
|
weight,
|
|
13017
13210
|
hit: true,
|
|
13018
|
-
message: `${
|
|
13211
|
+
message: `${path44}: no expected value`
|
|
13019
13212
|
};
|
|
13020
13213
|
}
|
|
13021
13214
|
if (candidateValue === void 0) {
|
|
13022
13215
|
if (required) {
|
|
13023
13216
|
return {
|
|
13024
|
-
path:
|
|
13217
|
+
path: path44,
|
|
13025
13218
|
score: 0,
|
|
13026
13219
|
weight,
|
|
13027
13220
|
hit: false,
|
|
13028
|
-
message: `${
|
|
13221
|
+
message: `${path44} (required, missing)`
|
|
13029
13222
|
};
|
|
13030
13223
|
}
|
|
13031
13224
|
return {
|
|
13032
|
-
path:
|
|
13225
|
+
path: path44,
|
|
13033
13226
|
score: 1,
|
|
13034
13227
|
// Don't penalize missing optional fields
|
|
13035
13228
|
weight: 0,
|
|
13036
13229
|
// Zero weight means it won't affect the score
|
|
13037
13230
|
hit: true,
|
|
13038
|
-
message: `${
|
|
13231
|
+
message: `${path44}: optional field missing`
|
|
13039
13232
|
};
|
|
13040
13233
|
}
|
|
13041
13234
|
switch (match) {
|
|
13042
13235
|
case "exact":
|
|
13043
|
-
return this.compareExact(
|
|
13236
|
+
return this.compareExact(path44, candidateValue, expectedValue, weight);
|
|
13044
13237
|
case "numeric_tolerance":
|
|
13045
13238
|
return this.compareNumericTolerance(
|
|
13046
|
-
|
|
13239
|
+
path44,
|
|
13047
13240
|
candidateValue,
|
|
13048
13241
|
expectedValue,
|
|
13049
13242
|
fieldConfig,
|
|
13050
13243
|
weight
|
|
13051
13244
|
);
|
|
13052
13245
|
case "date":
|
|
13053
|
-
return this.compareDate(
|
|
13246
|
+
return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
|
|
13054
13247
|
default:
|
|
13055
13248
|
return {
|
|
13056
|
-
path:
|
|
13249
|
+
path: path44,
|
|
13057
13250
|
score: 0,
|
|
13058
13251
|
weight,
|
|
13059
13252
|
hit: false,
|
|
13060
|
-
message: `${
|
|
13253
|
+
message: `${path44}: unknown match type "${match}"`
|
|
13061
13254
|
};
|
|
13062
13255
|
}
|
|
13063
13256
|
}
|
|
13064
13257
|
/**
|
|
13065
13258
|
* Exact equality comparison.
|
|
13066
13259
|
*/
|
|
13067
|
-
compareExact(
|
|
13260
|
+
compareExact(path44, candidateValue, expectedValue, weight) {
|
|
13068
13261
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
13069
13262
|
return {
|
|
13070
|
-
path:
|
|
13263
|
+
path: path44,
|
|
13071
13264
|
score: 1,
|
|
13072
13265
|
weight,
|
|
13073
13266
|
hit: true,
|
|
13074
|
-
message:
|
|
13267
|
+
message: path44
|
|
13075
13268
|
};
|
|
13076
13269
|
}
|
|
13077
13270
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
13078
13271
|
return {
|
|
13079
|
-
path:
|
|
13272
|
+
path: path44,
|
|
13080
13273
|
score: 0,
|
|
13081
13274
|
weight,
|
|
13082
13275
|
hit: false,
|
|
13083
|
-
message: `${
|
|
13276
|
+
message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
13084
13277
|
};
|
|
13085
13278
|
}
|
|
13086
13279
|
return {
|
|
13087
|
-
path:
|
|
13280
|
+
path: path44,
|
|
13088
13281
|
score: 0,
|
|
13089
13282
|
weight,
|
|
13090
13283
|
hit: false,
|
|
13091
|
-
message: `${
|
|
13284
|
+
message: `${path44} (value mismatch)`
|
|
13092
13285
|
};
|
|
13093
13286
|
}
|
|
13094
13287
|
/**
|
|
13095
13288
|
* Numeric comparison with absolute or relative tolerance.
|
|
13096
13289
|
*/
|
|
13097
|
-
compareNumericTolerance(
|
|
13290
|
+
compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13098
13291
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
13099
13292
|
const candidateNum = toNumber2(candidateValue);
|
|
13100
13293
|
const expectedNum = toNumber2(expectedValue);
|
|
13101
13294
|
if (candidateNum === null || expectedNum === null) {
|
|
13102
13295
|
return {
|
|
13103
|
-
path:
|
|
13296
|
+
path: path44,
|
|
13104
13297
|
score: 0,
|
|
13105
13298
|
weight,
|
|
13106
13299
|
hit: false,
|
|
13107
|
-
message: `${
|
|
13300
|
+
message: `${path44} (non-numeric value)`
|
|
13108
13301
|
};
|
|
13109
13302
|
}
|
|
13110
13303
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
13111
13304
|
return {
|
|
13112
|
-
path:
|
|
13305
|
+
path: path44,
|
|
13113
13306
|
score: 0,
|
|
13114
13307
|
weight,
|
|
13115
13308
|
hit: false,
|
|
13116
|
-
message: `${
|
|
13309
|
+
message: `${path44} (invalid numeric value)`
|
|
13117
13310
|
};
|
|
13118
13311
|
}
|
|
13119
13312
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -13126,61 +13319,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
13126
13319
|
}
|
|
13127
13320
|
if (withinTolerance) {
|
|
13128
13321
|
return {
|
|
13129
|
-
path:
|
|
13322
|
+
path: path44,
|
|
13130
13323
|
score: 1,
|
|
13131
13324
|
weight,
|
|
13132
13325
|
hit: true,
|
|
13133
|
-
message: `${
|
|
13326
|
+
message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
|
|
13134
13327
|
};
|
|
13135
13328
|
}
|
|
13136
13329
|
return {
|
|
13137
|
-
path:
|
|
13330
|
+
path: path44,
|
|
13138
13331
|
score: 0,
|
|
13139
13332
|
weight,
|
|
13140
13333
|
hit: false,
|
|
13141
|
-
message: `${
|
|
13334
|
+
message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
13142
13335
|
};
|
|
13143
13336
|
}
|
|
13144
13337
|
/**
|
|
13145
13338
|
* Date comparison with format normalization.
|
|
13146
13339
|
*/
|
|
13147
|
-
compareDate(
|
|
13340
|
+
compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13148
13341
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
13149
13342
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
13150
13343
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
13151
13344
|
if (candidateDate === null) {
|
|
13152
13345
|
return {
|
|
13153
|
-
path:
|
|
13346
|
+
path: path44,
|
|
13154
13347
|
score: 0,
|
|
13155
13348
|
weight,
|
|
13156
13349
|
hit: false,
|
|
13157
|
-
message: `${
|
|
13350
|
+
message: `${path44} (unparseable candidate date)`
|
|
13158
13351
|
};
|
|
13159
13352
|
}
|
|
13160
13353
|
if (expectedDate === null) {
|
|
13161
13354
|
return {
|
|
13162
|
-
path:
|
|
13355
|
+
path: path44,
|
|
13163
13356
|
score: 0,
|
|
13164
13357
|
weight,
|
|
13165
13358
|
hit: false,
|
|
13166
|
-
message: `${
|
|
13359
|
+
message: `${path44} (unparseable expected date)`
|
|
13167
13360
|
};
|
|
13168
13361
|
}
|
|
13169
13362
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
13170
13363
|
return {
|
|
13171
|
-
path:
|
|
13364
|
+
path: path44,
|
|
13172
13365
|
score: 1,
|
|
13173
13366
|
weight,
|
|
13174
13367
|
hit: true,
|
|
13175
|
-
message:
|
|
13368
|
+
message: path44
|
|
13176
13369
|
};
|
|
13177
13370
|
}
|
|
13178
13371
|
return {
|
|
13179
|
-
path:
|
|
13372
|
+
path: path44,
|
|
13180
13373
|
score: 0,
|
|
13181
13374
|
weight,
|
|
13182
13375
|
hit: false,
|
|
13183
|
-
message: `${
|
|
13376
|
+
message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
13184
13377
|
};
|
|
13185
13378
|
}
|
|
13186
13379
|
/**
|
|
@@ -13221,11 +13414,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
13221
13414
|
};
|
|
13222
13415
|
}
|
|
13223
13416
|
};
|
|
13224
|
-
function resolvePath(obj,
|
|
13225
|
-
if (!
|
|
13417
|
+
function resolvePath(obj, path44) {
|
|
13418
|
+
if (!path44 || !obj) {
|
|
13226
13419
|
return void 0;
|
|
13227
13420
|
}
|
|
13228
|
-
const parts =
|
|
13421
|
+
const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
13229
13422
|
let current = obj;
|
|
13230
13423
|
for (const part of parts) {
|
|
13231
13424
|
if (current === null || current === void 0) {
|
|
@@ -13357,7 +13550,7 @@ var LatencyEvaluator = class {
|
|
|
13357
13550
|
|
|
13358
13551
|
// src/evaluation/evaluators/agent-judge.ts
|
|
13359
13552
|
var import_promises25 = __toESM(require("fs/promises"), 1);
|
|
13360
|
-
var
|
|
13553
|
+
var import_node_path34 = __toESM(require("path"), 1);
|
|
13361
13554
|
var import_ai4 = require("ai");
|
|
13362
13555
|
var import_zod5 = require("zod");
|
|
13363
13556
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -13706,8 +13899,8 @@ ${outputSchema}`;
|
|
|
13706
13899
|
}
|
|
13707
13900
|
};
|
|
13708
13901
|
function resolveSandboxed(basePath, relativePath) {
|
|
13709
|
-
const resolved =
|
|
13710
|
-
if (!resolved.startsWith(basePath +
|
|
13902
|
+
const resolved = import_node_path34.default.resolve(basePath, relativePath);
|
|
13903
|
+
if (!resolved.startsWith(basePath + import_node_path34.default.sep) && resolved !== basePath) {
|
|
13711
13904
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
13712
13905
|
}
|
|
13713
13906
|
return resolved;
|
|
@@ -13790,11 +13983,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
13790
13983
|
for (const entry of entries) {
|
|
13791
13984
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
13792
13985
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
13793
|
-
const fullPath =
|
|
13986
|
+
const fullPath = import_node_path34.default.join(dirPath, entry.name);
|
|
13794
13987
|
if (entry.isDirectory()) {
|
|
13795
13988
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
13796
13989
|
} else if (entry.isFile()) {
|
|
13797
|
-
const ext =
|
|
13990
|
+
const ext = import_node_path34.default.extname(entry.name).toLowerCase();
|
|
13798
13991
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
13799
13992
|
try {
|
|
13800
13993
|
const stat8 = await import_promises25.default.stat(fullPath);
|
|
@@ -13806,7 +13999,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
13806
13999
|
regex.lastIndex = 0;
|
|
13807
14000
|
if (regex.test(lines[i])) {
|
|
13808
14001
|
matches.push({
|
|
13809
|
-
file:
|
|
14002
|
+
file: import_node_path34.default.relative(workspacePath, fullPath),
|
|
13810
14003
|
line: i + 1,
|
|
13811
14004
|
text: lines[i].substring(0, 200)
|
|
13812
14005
|
});
|
|
@@ -14043,8 +14236,8 @@ var TokenUsageEvaluator = class {
|
|
|
14043
14236
|
};
|
|
14044
14237
|
|
|
14045
14238
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
14046
|
-
function getNestedValue(obj,
|
|
14047
|
-
const parts =
|
|
14239
|
+
function getNestedValue(obj, path44) {
|
|
14240
|
+
const parts = path44.split(".");
|
|
14048
14241
|
let current = obj;
|
|
14049
14242
|
for (const part of parts) {
|
|
14050
14243
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -14605,9 +14798,9 @@ function runEqualsAssertion(output, value) {
|
|
|
14605
14798
|
}
|
|
14606
14799
|
|
|
14607
14800
|
// src/evaluation/orchestrator.ts
|
|
14608
|
-
var
|
|
14609
|
-
var
|
|
14610
|
-
var
|
|
14801
|
+
var import_node_crypto10 = require("crypto");
|
|
14802
|
+
var import_promises30 = require("fs/promises");
|
|
14803
|
+
var import_node_path42 = __toESM(require("path"), 1);
|
|
14611
14804
|
var import_micromatch4 = __toESM(require("micromatch"), 1);
|
|
14612
14805
|
|
|
14613
14806
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -14798,7 +14991,7 @@ var DeterministicAssertionEvaluator = class {
|
|
|
14798
14991
|
var import_node_fs9 = require("fs");
|
|
14799
14992
|
|
|
14800
14993
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
14801
|
-
var
|
|
14994
|
+
var import_node_path35 = __toESM(require("path"), 1);
|
|
14802
14995
|
async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
14803
14996
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
14804
14997
|
if (!context2) {
|
|
@@ -14847,7 +15040,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
14847
15040
|
};
|
|
14848
15041
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
14849
15042
|
const scriptPath = script[script.length - 1];
|
|
14850
|
-
const cwd =
|
|
15043
|
+
const cwd = import_node_path35.default.dirname(scriptPath);
|
|
14851
15044
|
try {
|
|
14852
15045
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
14853
15046
|
const prompt = stdout.trim();
|
|
@@ -15140,16 +15333,16 @@ function createBuiltinRegistry() {
|
|
|
15140
15333
|
}
|
|
15141
15334
|
|
|
15142
15335
|
// src/evaluation/registry/assertion-discovery.ts
|
|
15143
|
-
var
|
|
15336
|
+
var import_node_path36 = __toESM(require("path"), 1);
|
|
15144
15337
|
var import_fast_glob3 = __toESM(require("fast-glob"), 1);
|
|
15145
15338
|
async function discoverAssertions(registry, baseDir) {
|
|
15146
15339
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
15147
15340
|
const candidateDirs = [];
|
|
15148
|
-
let dir =
|
|
15149
|
-
const root =
|
|
15341
|
+
let dir = import_node_path36.default.resolve(baseDir);
|
|
15342
|
+
const root = import_node_path36.default.parse(dir).root;
|
|
15150
15343
|
while (dir !== root) {
|
|
15151
|
-
candidateDirs.push(
|
|
15152
|
-
dir =
|
|
15344
|
+
candidateDirs.push(import_node_path36.default.join(dir, ".agentv", "assertions"));
|
|
15345
|
+
dir = import_node_path36.default.dirname(dir);
|
|
15153
15346
|
}
|
|
15154
15347
|
let files = [];
|
|
15155
15348
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -15165,7 +15358,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
15165
15358
|
}
|
|
15166
15359
|
const discoveredTypes = [];
|
|
15167
15360
|
for (const filePath of files) {
|
|
15168
|
-
const basename =
|
|
15361
|
+
const basename = import_node_path36.default.basename(filePath);
|
|
15169
15362
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
15170
15363
|
if (registry.has(typeName)) {
|
|
15171
15364
|
continue;
|
|
@@ -15325,7 +15518,7 @@ function getTCritical(df) {
|
|
|
15325
15518
|
// src/evaluation/workspace/file-changes.ts
|
|
15326
15519
|
var import_node_child_process6 = require("child_process");
|
|
15327
15520
|
var import_node_fs10 = require("fs");
|
|
15328
|
-
var
|
|
15521
|
+
var import_node_path37 = __toESM(require("path"), 1);
|
|
15329
15522
|
var import_node_util4 = require("util");
|
|
15330
15523
|
var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process6.exec);
|
|
15331
15524
|
function gitExecOpts(workspacePath) {
|
|
@@ -15359,10 +15552,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
15359
15552
|
}
|
|
15360
15553
|
for (const entry of entries) {
|
|
15361
15554
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
15362
|
-
const childPath =
|
|
15555
|
+
const childPath = import_node_path37.default.join(workspacePath, entry);
|
|
15363
15556
|
try {
|
|
15364
15557
|
if (!(0, import_node_fs10.statSync)(childPath).isDirectory()) continue;
|
|
15365
|
-
if (!(0, import_node_fs10.statSync)(
|
|
15558
|
+
if (!(0, import_node_fs10.statSync)(import_node_path37.default.join(childPath, ".git")).isDirectory()) continue;
|
|
15366
15559
|
} catch {
|
|
15367
15560
|
continue;
|
|
15368
15561
|
}
|
|
@@ -15373,9 +15566,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
15373
15566
|
|
|
15374
15567
|
// src/evaluation/workspace/manager.ts
|
|
15375
15568
|
var import_promises26 = require("fs/promises");
|
|
15376
|
-
var
|
|
15377
|
-
var import_node_path37 = __toESM(require("path"), 1);
|
|
15378
|
-
var DEFAULT_WORKSPACE_ROOT = import_node_path37.default.join(import_node_os6.default.homedir(), ".agentv", "workspaces");
|
|
15569
|
+
var import_node_path38 = __toESM(require("path"), 1);
|
|
15379
15570
|
var TemplateNotFoundError = class extends Error {
|
|
15380
15571
|
constructor(templatePath) {
|
|
15381
15572
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -15404,15 +15595,15 @@ async function isDirectory(filePath) {
|
|
|
15404
15595
|
}
|
|
15405
15596
|
}
|
|
15406
15597
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
15407
|
-
const root = workspaceRoot ??
|
|
15408
|
-
return
|
|
15598
|
+
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
15599
|
+
return import_node_path38.default.join(root, evalRunId, caseId);
|
|
15409
15600
|
}
|
|
15410
15601
|
async function copyDirectoryRecursive(src, dest) {
|
|
15411
15602
|
await (0, import_promises26.mkdir)(dest, { recursive: true });
|
|
15412
15603
|
const entries = await (0, import_promises26.readdir)(src, { withFileTypes: true });
|
|
15413
15604
|
for (const entry of entries) {
|
|
15414
|
-
const srcPath =
|
|
15415
|
-
const destPath =
|
|
15605
|
+
const srcPath = import_node_path38.default.join(src, entry.name);
|
|
15606
|
+
const destPath = import_node_path38.default.join(dest, entry.name);
|
|
15416
15607
|
if (entry.name === ".git") {
|
|
15417
15608
|
continue;
|
|
15418
15609
|
}
|
|
@@ -15424,7 +15615,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
15424
15615
|
}
|
|
15425
15616
|
}
|
|
15426
15617
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
15427
|
-
const resolvedTemplatePath =
|
|
15618
|
+
const resolvedTemplatePath = import_node_path38.default.resolve(templatePath);
|
|
15428
15619
|
if (!await fileExists2(resolvedTemplatePath)) {
|
|
15429
15620
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
15430
15621
|
}
|
|
@@ -15472,25 +15663,21 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
15472
15663
|
}
|
|
15473
15664
|
}
|
|
15474
15665
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
15475
|
-
const root = workspaceRoot ??
|
|
15476
|
-
const evalDir =
|
|
15666
|
+
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
15667
|
+
const evalDir = import_node_path38.default.join(root, evalRunId);
|
|
15477
15668
|
if (await fileExists2(evalDir)) {
|
|
15478
15669
|
await (0, import_promises26.rm)(evalDir, { recursive: true, force: true });
|
|
15479
15670
|
}
|
|
15480
15671
|
}
|
|
15481
15672
|
|
|
15482
|
-
// src/evaluation/workspace/
|
|
15673
|
+
// src/evaluation/workspace/pool-manager.ts
|
|
15483
15674
|
var import_node_child_process7 = require("child_process");
|
|
15484
15675
|
var import_node_crypto8 = require("crypto");
|
|
15485
15676
|
var import_node_fs11 = require("fs");
|
|
15486
15677
|
var import_promises27 = require("fs/promises");
|
|
15487
|
-
var
|
|
15488
|
-
var import_node_path38 = __toESM(require("path"), 1);
|
|
15678
|
+
var import_node_path39 = __toESM(require("path"), 1);
|
|
15489
15679
|
var import_node_util5 = require("util");
|
|
15490
15680
|
var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process7.execFile);
|
|
15491
|
-
var DEFAULT_CACHE_DIR = import_node_path38.default.join(import_node_os7.default.homedir(), ".agentv", "git-cache");
|
|
15492
|
-
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
15493
|
-
var LOCK_TIMEOUT_MS = 6e4;
|
|
15494
15681
|
function gitEnv() {
|
|
15495
15682
|
const env = { ...process.env };
|
|
15496
15683
|
for (const key of Object.keys(env)) {
|
|
@@ -15505,49 +15692,340 @@ function gitEnv() {
|
|
|
15505
15692
|
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
15506
15693
|
};
|
|
15507
15694
|
}
|
|
15508
|
-
function cacheKey(source) {
|
|
15509
|
-
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
15510
|
-
return (0, import_node_crypto8.createHash)("sha256").update(raw).digest("hex");
|
|
15511
|
-
}
|
|
15512
|
-
function getSourceUrl(source) {
|
|
15513
|
-
return source.type === "git" ? source.url : source.path;
|
|
15514
|
-
}
|
|
15515
15695
|
async function git(args, opts) {
|
|
15516
15696
|
const { stdout } = await execFileAsync("git", args, {
|
|
15517
15697
|
cwd: opts?.cwd,
|
|
15518
|
-
timeout: opts?.timeout ??
|
|
15698
|
+
timeout: opts?.timeout ?? 3e5,
|
|
15519
15699
|
env: gitEnv(),
|
|
15520
15700
|
maxBuffer: 50 * 1024 * 1024
|
|
15521
|
-
// 50MB
|
|
15522
15701
|
});
|
|
15523
15702
|
return stdout.trim();
|
|
15524
15703
|
}
|
|
15525
|
-
|
|
15526
|
-
const
|
|
15527
|
-
|
|
15528
|
-
|
|
15529
|
-
|
|
15530
|
-
|
|
15531
|
-
|
|
15532
|
-
|
|
15533
|
-
|
|
15704
|
+
function normalizeRepoForFingerprint(repo) {
|
|
15705
|
+
const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
15706
|
+
const result = {
|
|
15707
|
+
path: repo.path,
|
|
15708
|
+
source,
|
|
15709
|
+
ref: repo.checkout?.ref ?? "HEAD"
|
|
15710
|
+
};
|
|
15711
|
+
if (repo.clone?.depth !== void 0) {
|
|
15712
|
+
result.depth = repo.clone.depth;
|
|
15713
|
+
}
|
|
15714
|
+
if (repo.clone?.filter !== void 0) {
|
|
15715
|
+
result.filter = repo.clone.filter;
|
|
15716
|
+
}
|
|
15717
|
+
if (repo.clone?.sparse?.length) {
|
|
15718
|
+
result.sparse = [...repo.clone.sparse].sort();
|
|
15719
|
+
}
|
|
15720
|
+
return result;
|
|
15721
|
+
}
|
|
15722
|
+
function computeWorkspaceFingerprint(templatePath, repos) {
|
|
15723
|
+
const canonical = {
|
|
15724
|
+
templatePath: templatePath ?? null,
|
|
15725
|
+
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
15726
|
+
};
|
|
15727
|
+
return (0, import_node_crypto8.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
15728
|
+
}
|
|
15729
|
+
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
15730
|
+
await (0, import_promises27.mkdir)(dest, { recursive: true });
|
|
15731
|
+
const entries = await (0, import_promises27.readdir)(src, { withFileTypes: true });
|
|
15732
|
+
for (const entry of entries) {
|
|
15733
|
+
const srcPath = import_node_path39.default.join(src, entry.name);
|
|
15734
|
+
const destPath = import_node_path39.default.join(dest, entry.name);
|
|
15735
|
+
if (entry.name === ".git") {
|
|
15736
|
+
continue;
|
|
15737
|
+
}
|
|
15738
|
+
if (entry.isDirectory()) {
|
|
15739
|
+
if (skipDirs?.has(entry.name)) {
|
|
15534
15740
|
continue;
|
|
15535
15741
|
}
|
|
15536
|
-
|
|
15742
|
+
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
15743
|
+
} else {
|
|
15744
|
+
await (0, import_promises27.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
15537
15745
|
}
|
|
15538
15746
|
}
|
|
15539
|
-
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
15540
15747
|
}
|
|
15541
|
-
|
|
15748
|
+
var WorkspacePoolManager = class {
|
|
15749
|
+
poolRoot;
|
|
15750
|
+
constructor(poolRoot) {
|
|
15751
|
+
this.poolRoot = poolRoot ?? getWorkspacePoolRoot();
|
|
15752
|
+
}
|
|
15753
|
+
/**
|
|
15754
|
+
* Acquire a workspace slot from the pool.
|
|
15755
|
+
*
|
|
15756
|
+
* 1. Compute fingerprint from template + repos
|
|
15757
|
+
* 2. Check drift (compare stored metadata.json fingerprint vs computed)
|
|
15758
|
+
* 3. If drift: warn, remove all slots, rematerialize
|
|
15759
|
+
* 4. Acquire a slot (try-lock slot-0, slot-1, ..., up to maxSlots)
|
|
15760
|
+
* 5. If slot exists: reset repos, re-copy template files (skip repo directories)
|
|
15761
|
+
* 6. If new slot: copy template, materialize all repos, write metadata.json
|
|
15762
|
+
* 7. Return the slot (with path, index, isExisting)
|
|
15763
|
+
*/
|
|
15764
|
+
async acquireWorkspace(options) {
|
|
15765
|
+
const { templatePath, repos, maxSlots, repoManager } = options;
|
|
15766
|
+
const fingerprint = computeWorkspaceFingerprint(templatePath, repos);
|
|
15767
|
+
const poolDir = import_node_path39.default.join(this.poolRoot, fingerprint);
|
|
15768
|
+
await (0, import_promises27.mkdir)(poolDir, { recursive: true });
|
|
15769
|
+
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
15770
|
+
if (drifted) {
|
|
15771
|
+
console.warn(
|
|
15772
|
+
`[workspace-pool] Drift detected for fingerprint ${fingerprint.slice(0, 12)}... Removing stale slots.`
|
|
15773
|
+
);
|
|
15774
|
+
await this.removeAllSlots(poolDir);
|
|
15775
|
+
}
|
|
15776
|
+
for (let i = 0; i < maxSlots; i++) {
|
|
15777
|
+
const slotPath = import_node_path39.default.join(poolDir, `slot-${i}`);
|
|
15778
|
+
const lockPath = `${slotPath}.lock`;
|
|
15779
|
+
const locked = await this.tryLock(lockPath);
|
|
15780
|
+
if (!locked) {
|
|
15781
|
+
continue;
|
|
15782
|
+
}
|
|
15783
|
+
const slotExists = (0, import_node_fs11.existsSync)(slotPath);
|
|
15784
|
+
if (slotExists) {
|
|
15785
|
+
await this.resetSlot(slotPath, templatePath, repos);
|
|
15786
|
+
return {
|
|
15787
|
+
index: i,
|
|
15788
|
+
path: slotPath,
|
|
15789
|
+
isExisting: true,
|
|
15790
|
+
lockPath,
|
|
15791
|
+
fingerprint,
|
|
15792
|
+
poolDir
|
|
15793
|
+
};
|
|
15794
|
+
}
|
|
15795
|
+
await (0, import_promises27.mkdir)(slotPath, { recursive: true });
|
|
15796
|
+
if (templatePath) {
|
|
15797
|
+
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
15798
|
+
}
|
|
15799
|
+
if (repos.length > 0) {
|
|
15800
|
+
await repoManager.materializeAll(repos, slotPath);
|
|
15801
|
+
}
|
|
15802
|
+
await this.writeMetadata(poolDir, fingerprint, templatePath ?? null, repos);
|
|
15803
|
+
return {
|
|
15804
|
+
index: i,
|
|
15805
|
+
path: slotPath,
|
|
15806
|
+
isExisting: false,
|
|
15807
|
+
lockPath,
|
|
15808
|
+
fingerprint,
|
|
15809
|
+
poolDir
|
|
15810
|
+
};
|
|
15811
|
+
}
|
|
15812
|
+
throw new Error(
|
|
15813
|
+
`All ${maxSlots} pool slots are locked for fingerprint ${fingerprint.slice(0, 12)}...`
|
|
15814
|
+
);
|
|
15815
|
+
}
|
|
15816
|
+
/** Remove lock file to release a slot. */
|
|
15817
|
+
async releaseSlot(slot) {
|
|
15818
|
+
try {
|
|
15819
|
+
await (0, import_promises27.unlink)(slot.lockPath);
|
|
15820
|
+
} catch {
|
|
15821
|
+
}
|
|
15822
|
+
}
|
|
15823
|
+
/**
|
|
15824
|
+
* Try to acquire a PID-based lock file.
|
|
15825
|
+
* On EEXIST, read PID and check if process is alive. If dead, stale lock — remove and retry.
|
|
15826
|
+
* Returns true if lock acquired, false if slot is actively locked.
|
|
15827
|
+
* Uses a bounded loop (max 3 attempts) to avoid unbounded recursion.
|
|
15828
|
+
*/
|
|
15829
|
+
async tryLock(lockPath) {
|
|
15830
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
15831
|
+
try {
|
|
15832
|
+
await (0, import_promises27.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
15833
|
+
return true;
|
|
15834
|
+
} catch (err) {
|
|
15835
|
+
if (err.code !== "EEXIST") {
|
|
15836
|
+
throw err;
|
|
15837
|
+
}
|
|
15838
|
+
try {
|
|
15839
|
+
const pidStr = await (0, import_promises27.readFile)(lockPath, "utf-8");
|
|
15840
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
15841
|
+
if (!Number.isNaN(pid)) {
|
|
15842
|
+
try {
|
|
15843
|
+
process.kill(pid, 0);
|
|
15844
|
+
return false;
|
|
15845
|
+
} catch {
|
|
15846
|
+
await (0, import_promises27.unlink)(lockPath).catch(() => {
|
|
15847
|
+
});
|
|
15848
|
+
continue;
|
|
15849
|
+
}
|
|
15850
|
+
}
|
|
15851
|
+
} catch {
|
|
15852
|
+
}
|
|
15853
|
+
return false;
|
|
15854
|
+
}
|
|
15855
|
+
}
|
|
15856
|
+
return false;
|
|
15857
|
+
}
|
|
15858
|
+
/**
|
|
15859
|
+
* Check if the stored fingerprint in metadata.json differs from the computed one.
|
|
15860
|
+
* Returns true if drifted, false otherwise.
|
|
15861
|
+
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
15862
|
+
*/
|
|
15863
|
+
async checkDrift(poolDir, fingerprint) {
|
|
15864
|
+
const metadataPath = import_node_path39.default.join(poolDir, "metadata.json");
|
|
15865
|
+
try {
|
|
15866
|
+
const raw = await (0, import_promises27.readFile)(metadataPath, "utf-8");
|
|
15867
|
+
const metadata = JSON.parse(raw);
|
|
15868
|
+
return metadata.fingerprint !== fingerprint;
|
|
15869
|
+
} catch {
|
|
15870
|
+
return false;
|
|
15871
|
+
}
|
|
15872
|
+
}
|
|
15873
|
+
/** Write metadata.json with fingerprint, inputs, and timestamp. */
|
|
15874
|
+
async writeMetadata(poolDir, fingerprint, templatePath, repos) {
|
|
15875
|
+
const metadata = {
|
|
15876
|
+
fingerprint,
|
|
15877
|
+
templatePath,
|
|
15878
|
+
repos,
|
|
15879
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
15880
|
+
};
|
|
15881
|
+
await (0, import_promises27.writeFile)(import_node_path39.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
15882
|
+
}
|
|
15883
|
+
/** Remove all slot directories and their lock files from a pool directory. */
|
|
15884
|
+
async removeAllSlots(poolDir) {
|
|
15885
|
+
const entries = await (0, import_promises27.readdir)(poolDir);
|
|
15886
|
+
for (const entry of entries) {
|
|
15887
|
+
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
15888
|
+
const lockPath = import_node_path39.default.join(poolDir, `${entry}.lock`);
|
|
15889
|
+
if ((0, import_node_fs11.existsSync)(lockPath)) {
|
|
15890
|
+
try {
|
|
15891
|
+
const pidStr = await (0, import_promises27.readFile)(lockPath, "utf-8");
|
|
15892
|
+
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
15893
|
+
if (!Number.isNaN(pid)) {
|
|
15894
|
+
try {
|
|
15895
|
+
process.kill(pid, 0);
|
|
15896
|
+
console.warn(`[workspace-pool] Skipping slot ${entry}: locked by PID ${pid}`);
|
|
15897
|
+
continue;
|
|
15898
|
+
} catch {
|
|
15899
|
+
}
|
|
15900
|
+
}
|
|
15901
|
+
} catch {
|
|
15902
|
+
}
|
|
15903
|
+
}
|
|
15904
|
+
await (0, import_promises27.rm)(import_node_path39.default.join(poolDir, entry), { recursive: true, force: true });
|
|
15905
|
+
await (0, import_promises27.rm)(lockPath, { force: true }).catch(() => {
|
|
15906
|
+
});
|
|
15907
|
+
}
|
|
15908
|
+
}
|
|
15909
|
+
await (0, import_promises27.rm)(import_node_path39.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
15910
|
+
});
|
|
15911
|
+
}
|
|
15912
|
+
/**
|
|
15913
|
+
* Reset an existing slot for reuse:
|
|
15914
|
+
* 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
|
|
15915
|
+
* 2. Re-copy template files (skip repo directories)
|
|
15916
|
+
*/
|
|
15917
|
+
async resetSlot(slotPath, templatePath, repos) {
|
|
15918
|
+
for (const repo of repos) {
|
|
15919
|
+
const repoDir = import_node_path39.default.join(slotPath, repo.path);
|
|
15920
|
+
if (!(0, import_node_fs11.existsSync)(repoDir)) {
|
|
15921
|
+
continue;
|
|
15922
|
+
}
|
|
15923
|
+
const ref = repo.checkout?.ref ?? "HEAD";
|
|
15924
|
+
await git(["reset", "--hard", ref], { cwd: repoDir });
|
|
15925
|
+
await git(["clean", "-fd"], { cwd: repoDir });
|
|
15926
|
+
}
|
|
15927
|
+
if (templatePath) {
|
|
15928
|
+
const repoDirNames = new Set(
|
|
15929
|
+
repos.map((r) => {
|
|
15930
|
+
const normalized = r.path.replace(/^\.\//, "");
|
|
15931
|
+
return normalized.split("/")[0];
|
|
15932
|
+
})
|
|
15933
|
+
);
|
|
15934
|
+
await copyDirectoryRecursive2(templatePath, slotPath, repoDirNames);
|
|
15935
|
+
}
|
|
15936
|
+
}
|
|
15937
|
+
};
|
|
15938
|
+
|
|
15939
|
+
// src/evaluation/workspace/repo-manager.ts
|
|
15940
|
+
var import_node_child_process8 = require("child_process");
|
|
15941
|
+
var import_node_crypto9 = require("crypto");
|
|
15942
|
+
var import_node_fs12 = require("fs");
|
|
15943
|
+
var import_promises28 = require("fs/promises");
|
|
15944
|
+
var import_node_path40 = __toESM(require("path"), 1);
|
|
15945
|
+
var import_node_util6 = require("util");
|
|
15946
|
+
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process8.execFile);
|
|
15947
|
+
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
15948
|
+
var LOCK_TIMEOUT_MS = 6e4;
|
|
15949
|
+
function gitEnv2() {
|
|
15950
|
+
const env = { ...process.env };
|
|
15951
|
+
for (const key of Object.keys(env)) {
|
|
15952
|
+
if (key.startsWith("GIT_") && key !== "GIT_SSH_COMMAND") {
|
|
15953
|
+
delete env[key];
|
|
15954
|
+
}
|
|
15955
|
+
}
|
|
15956
|
+
return {
|
|
15957
|
+
...env,
|
|
15958
|
+
GIT_TERMINAL_PROMPT: "0",
|
|
15959
|
+
GIT_ASKPASS: "",
|
|
15960
|
+
GIT_SSH_COMMAND: "ssh -o BatchMode=yes"
|
|
15961
|
+
};
|
|
15962
|
+
}
|
|
15963
|
+
function cacheKey(source) {
|
|
15964
|
+
const raw = source.type === "git" ? source.url.toLowerCase().replace(/\.git$/, "") : source.path;
|
|
15965
|
+
return (0, import_node_crypto9.createHash)("sha256").update(raw).digest("hex");
|
|
15966
|
+
}
|
|
15967
|
+
function getSourceUrl(source) {
|
|
15968
|
+
return source.type === "git" ? source.url : source.path;
|
|
15969
|
+
}
|
|
15970
|
+
async function git2(args, opts) {
|
|
15971
|
+
const { stdout } = await execFileAsync2("git", args, {
|
|
15972
|
+
cwd: opts?.cwd,
|
|
15973
|
+
timeout: opts?.timeout ?? DEFAULT_TIMEOUT_MS2,
|
|
15974
|
+
env: gitEnv2(),
|
|
15975
|
+
maxBuffer: 50 * 1024 * 1024
|
|
15976
|
+
// 50MB
|
|
15977
|
+
});
|
|
15978
|
+
return stdout.trim();
|
|
15979
|
+
}
|
|
15980
|
+
async function acquireLock(lockPath) {
|
|
15981
|
+
const start = Date.now();
|
|
15982
|
+
while (Date.now() - start < LOCK_TIMEOUT_MS) {
|
|
15983
|
+
try {
|
|
15984
|
+
await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
15985
|
+
return;
|
|
15986
|
+
} catch (err) {
|
|
15987
|
+
if (err.code === "EEXIST") {
|
|
15988
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
15989
|
+
continue;
|
|
15990
|
+
}
|
|
15991
|
+
throw err;
|
|
15992
|
+
}
|
|
15993
|
+
}
|
|
15994
|
+
throw new Error(`Timed out waiting for lock: ${lockPath}`);
|
|
15995
|
+
}
|
|
15996
|
+
async function releaseLock(lockPath) {
|
|
15542
15997
|
try {
|
|
15543
|
-
await (0,
|
|
15998
|
+
await (0, import_promises28.unlink)(lockPath);
|
|
15544
15999
|
} catch {
|
|
15545
16000
|
}
|
|
15546
16001
|
}
|
|
15547
16002
|
var RepoManager = class {
|
|
15548
16003
|
cacheDir;
|
|
15549
|
-
|
|
15550
|
-
|
|
16004
|
+
verbose;
|
|
16005
|
+
constructor(cacheDir, verbose = false) {
|
|
16006
|
+
this.cacheDir = cacheDir ?? getGitCacheRoot();
|
|
16007
|
+
this.verbose = verbose;
|
|
16008
|
+
}
|
|
16009
|
+
async runGit(args, opts) {
|
|
16010
|
+
const startedAt = Date.now();
|
|
16011
|
+
if (this.verbose) {
|
|
16012
|
+
console.log(`[repo] git start cwd=${opts?.cwd ?? process.cwd()} args=${args.join(" ")}`);
|
|
16013
|
+
}
|
|
16014
|
+
try {
|
|
16015
|
+
const output = await git2(args, opts);
|
|
16016
|
+
if (this.verbose) {
|
|
16017
|
+
console.log(`[repo] git ok durationMs=${Date.now() - startedAt} args=${args.join(" ")}`);
|
|
16018
|
+
}
|
|
16019
|
+
return output;
|
|
16020
|
+
} catch (error) {
|
|
16021
|
+
if (this.verbose) {
|
|
16022
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
16023
|
+
console.log(
|
|
16024
|
+
`[repo] git fail durationMs=${Date.now() - startedAt} args=${args.join(" ")} error=${message}`
|
|
16025
|
+
);
|
|
16026
|
+
}
|
|
16027
|
+
throw error;
|
|
16028
|
+
}
|
|
15551
16029
|
}
|
|
15552
16030
|
/**
|
|
15553
16031
|
* Ensure a bare mirror cache exists for the given source.
|
|
@@ -15556,11 +16034,19 @@ var RepoManager = class {
|
|
|
15556
16034
|
*/
|
|
15557
16035
|
async ensureCache(source, depth, resolve) {
|
|
15558
16036
|
const key = cacheKey(source);
|
|
15559
|
-
const cachePath =
|
|
16037
|
+
const cachePath = import_node_path40.default.join(this.cacheDir, key);
|
|
15560
16038
|
const lockPath = `${cachePath}.lock`;
|
|
15561
|
-
const cacheExists = (0,
|
|
16039
|
+
const cacheExists = (0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"));
|
|
16040
|
+
if (this.verbose) {
|
|
16041
|
+
console.log(
|
|
16042
|
+
`[repo] ensureCache source=${getSourceUrl(source)} resolve=${resolve ?? "remote"} cache=${cachePath}`
|
|
16043
|
+
);
|
|
16044
|
+
}
|
|
15562
16045
|
if (resolve === "local") {
|
|
15563
16046
|
if (cacheExists) {
|
|
16047
|
+
if (this.verbose) {
|
|
16048
|
+
console.log(`[repo] using existing local cache ${cachePath}`);
|
|
16049
|
+
}
|
|
15564
16050
|
return cachePath;
|
|
15565
16051
|
}
|
|
15566
16052
|
const url = getSourceUrl(source);
|
|
@@ -15568,16 +16054,26 @@ var RepoManager = class {
|
|
|
15568
16054
|
`No cache found for \`${url}\`. Run \`agentv cache add --url ${url} --from <local-path>\` to seed it.`
|
|
15569
16055
|
);
|
|
15570
16056
|
}
|
|
15571
|
-
await (0,
|
|
16057
|
+
await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
|
|
16058
|
+
const lockStartedAt = Date.now();
|
|
15572
16059
|
await acquireLock(lockPath);
|
|
16060
|
+
if (this.verbose) {
|
|
16061
|
+
console.log(`[repo] lock acquired path=${lockPath} waitedMs=${Date.now() - lockStartedAt}`);
|
|
16062
|
+
}
|
|
15573
16063
|
try {
|
|
15574
16064
|
if (cacheExists) {
|
|
16065
|
+
if (this.verbose) {
|
|
16066
|
+
console.log(`[repo] refreshing existing cache ${cachePath}`);
|
|
16067
|
+
}
|
|
15575
16068
|
const fetchArgs = ["fetch", "--prune"];
|
|
15576
16069
|
if (depth) {
|
|
15577
16070
|
fetchArgs.push("--depth", String(depth));
|
|
15578
16071
|
}
|
|
15579
|
-
await
|
|
16072
|
+
await this.runGit(fetchArgs, { cwd: cachePath });
|
|
15580
16073
|
} else {
|
|
16074
|
+
if (this.verbose) {
|
|
16075
|
+
console.log(`[repo] creating new cache ${cachePath}`);
|
|
16076
|
+
}
|
|
15581
16077
|
const cloneArgs = ["clone", "--mirror", "--bare"];
|
|
15582
16078
|
if (depth) {
|
|
15583
16079
|
cloneArgs.push("--depth", String(depth));
|
|
@@ -15585,10 +16081,13 @@ var RepoManager = class {
|
|
|
15585
16081
|
const sourceUrl = getSourceUrl(source);
|
|
15586
16082
|
const cloneUrl = depth && source.type === "local" ? `file://${sourceUrl}` : sourceUrl;
|
|
15587
16083
|
cloneArgs.push(cloneUrl, cachePath);
|
|
15588
|
-
await
|
|
16084
|
+
await this.runGit(cloneArgs);
|
|
15589
16085
|
}
|
|
15590
16086
|
} finally {
|
|
15591
16087
|
await releaseLock(lockPath);
|
|
16088
|
+
if (this.verbose) {
|
|
16089
|
+
console.log(`[repo] lock released path=${lockPath}`);
|
|
16090
|
+
}
|
|
15592
16091
|
}
|
|
15593
16092
|
return cachePath;
|
|
15594
16093
|
}
|
|
@@ -15597,7 +16096,13 @@ var RepoManager = class {
|
|
|
15597
16096
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
15598
16097
|
*/
|
|
15599
16098
|
async materialize(repo, workspacePath) {
|
|
15600
|
-
const targetDir =
|
|
16099
|
+
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16100
|
+
const startedAt = Date.now();
|
|
16101
|
+
if (this.verbose) {
|
|
16102
|
+
console.log(
|
|
16103
|
+
`[repo] materialize start path=${repo.path} source=${getSourceUrl(repo.source)} workspace=${workspacePath}`
|
|
16104
|
+
);
|
|
16105
|
+
}
|
|
15601
16106
|
const cachePath = await this.ensureCache(
|
|
15602
16107
|
repo.source,
|
|
15603
16108
|
repo.clone?.depth,
|
|
@@ -15613,10 +16118,10 @@ var RepoManager = class {
|
|
|
15613
16118
|
cloneArgs.push("--no-checkout");
|
|
15614
16119
|
const cloneUrl = repo.clone?.depth || repo.clone?.filter ? `file://${cachePath}` : cachePath;
|
|
15615
16120
|
cloneArgs.push(cloneUrl, targetDir);
|
|
15616
|
-
await
|
|
16121
|
+
await this.runGit(cloneArgs);
|
|
15617
16122
|
if (repo.clone?.sparse?.length) {
|
|
15618
|
-
await
|
|
15619
|
-
await
|
|
16123
|
+
await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
|
|
16124
|
+
await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
|
|
15620
16125
|
}
|
|
15621
16126
|
const ref = repo.checkout?.ref ?? "HEAD";
|
|
15622
16127
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
@@ -15624,7 +16129,7 @@ var RepoManager = class {
|
|
|
15624
16129
|
if (resolve === "remote" && repo.source.type === "git") {
|
|
15625
16130
|
const url = getSourceUrl(repo.source);
|
|
15626
16131
|
try {
|
|
15627
|
-
const lsOutput = await
|
|
16132
|
+
const lsOutput = await this.runGit(["ls-remote", url, ref]);
|
|
15628
16133
|
const match = lsOutput.split(" ")[0];
|
|
15629
16134
|
if (!match) {
|
|
15630
16135
|
throw new Error(`Ref '${ref}' not found on remote ${url}`);
|
|
@@ -15637,17 +16142,26 @@ var RepoManager = class {
|
|
|
15637
16142
|
} else {
|
|
15638
16143
|
resolvedSha = ref;
|
|
15639
16144
|
}
|
|
15640
|
-
|
|
16145
|
+
if (this.verbose) {
|
|
16146
|
+
console.log(
|
|
16147
|
+
`[repo] checkout path=${repo.path} ref=${ref} resolved=${resolvedSha} resolve=${resolve}`
|
|
16148
|
+
);
|
|
16149
|
+
}
|
|
16150
|
+
await this.runGit(["checkout", resolvedSha], { cwd: targetDir });
|
|
15641
16151
|
const ancestor = repo.checkout?.ancestor ?? 0;
|
|
15642
16152
|
if (ancestor > 0) {
|
|
15643
16153
|
try {
|
|
15644
|
-
const ancestorSha = await
|
|
15645
|
-
|
|
16154
|
+
const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
|
|
16155
|
+
cwd: targetDir
|
|
16156
|
+
});
|
|
16157
|
+
await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
|
|
15646
16158
|
} catch {
|
|
15647
16159
|
if (repo.clone?.depth) {
|
|
15648
|
-
await
|
|
15649
|
-
const ancestorSha = await
|
|
15650
|
-
|
|
16160
|
+
await this.runGit(["fetch", "--deepen", String(ancestor)], { cwd: targetDir });
|
|
16161
|
+
const ancestorSha = await this.runGit(["rev-parse", `HEAD~${ancestor}`], {
|
|
16162
|
+
cwd: targetDir
|
|
16163
|
+
});
|
|
16164
|
+
await this.runGit(["checkout", ancestorSha], { cwd: targetDir });
|
|
15651
16165
|
} else {
|
|
15652
16166
|
throw new Error(
|
|
15653
16167
|
`Cannot resolve ancestor ${ancestor} of ref '${ref}'. If using shallow clone, increase clone.depth to at least ${ancestor + 1}.`
|
|
@@ -15655,27 +16169,38 @@ var RepoManager = class {
|
|
|
15655
16169
|
}
|
|
15656
16170
|
}
|
|
15657
16171
|
}
|
|
16172
|
+
if (this.verbose) {
|
|
16173
|
+
console.log(
|
|
16174
|
+
`[repo] materialize done path=${repo.path} target=${targetDir} durationMs=${Date.now() - startedAt}`
|
|
16175
|
+
);
|
|
16176
|
+
}
|
|
15658
16177
|
}
|
|
15659
16178
|
/** Materialize all repos into the workspace. */
|
|
15660
16179
|
async materializeAll(repos, workspacePath) {
|
|
16180
|
+
if (this.verbose) {
|
|
16181
|
+
console.log(`[repo] materializeAll count=${repos.length} workspace=${workspacePath}`);
|
|
16182
|
+
}
|
|
15661
16183
|
for (const repo of repos) {
|
|
15662
16184
|
await this.materialize(repo, workspacePath);
|
|
15663
16185
|
}
|
|
16186
|
+
if (this.verbose) {
|
|
16187
|
+
console.log("[repo] materializeAll complete");
|
|
16188
|
+
}
|
|
15664
16189
|
}
|
|
15665
16190
|
/** Reset repos in workspace to their checkout state. */
|
|
15666
16191
|
async reset(repos, workspacePath, strategy) {
|
|
15667
16192
|
if (strategy === "recreate") {
|
|
15668
16193
|
for (const repo of repos) {
|
|
15669
|
-
const targetDir =
|
|
15670
|
-
await (0,
|
|
16194
|
+
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16195
|
+
await (0, import_promises28.rm)(targetDir, { recursive: true, force: true });
|
|
15671
16196
|
}
|
|
15672
16197
|
await this.materializeAll(repos, workspacePath);
|
|
15673
16198
|
return;
|
|
15674
16199
|
}
|
|
15675
16200
|
for (const repo of repos) {
|
|
15676
|
-
const targetDir =
|
|
15677
|
-
await
|
|
15678
|
-
await
|
|
16201
|
+
const targetDir = import_node_path40.default.join(workspacePath, repo.path);
|
|
16202
|
+
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
16203
|
+
await this.runGit(["clean", "-fd"], { cwd: targetDir });
|
|
15679
16204
|
}
|
|
15680
16205
|
}
|
|
15681
16206
|
/**
|
|
@@ -15685,21 +16210,21 @@ var RepoManager = class {
|
|
|
15685
16210
|
async seedCache(localPath, remoteUrl, opts) {
|
|
15686
16211
|
const source = { type: "git", url: remoteUrl };
|
|
15687
16212
|
const key = cacheKey(source);
|
|
15688
|
-
const cachePath =
|
|
16213
|
+
const cachePath = import_node_path40.default.join(this.cacheDir, key);
|
|
15689
16214
|
const lockPath = `${cachePath}.lock`;
|
|
15690
|
-
await (0,
|
|
16215
|
+
await (0, import_promises28.mkdir)(this.cacheDir, { recursive: true });
|
|
15691
16216
|
await acquireLock(lockPath);
|
|
15692
16217
|
try {
|
|
15693
|
-
if ((0,
|
|
16218
|
+
if ((0, import_node_fs12.existsSync)(import_node_path40.default.join(cachePath, "HEAD"))) {
|
|
15694
16219
|
if (!opts?.force) {
|
|
15695
16220
|
throw new Error(
|
|
15696
16221
|
`Cache already exists for ${remoteUrl} at ${cachePath}. Use force to overwrite.`
|
|
15697
16222
|
);
|
|
15698
16223
|
}
|
|
15699
|
-
await (0,
|
|
16224
|
+
await (0, import_promises28.rm)(cachePath, { recursive: true, force: true });
|
|
15700
16225
|
}
|
|
15701
|
-
await
|
|
15702
|
-
await
|
|
16226
|
+
await git2(["clone", "--mirror", "--bare", localPath, cachePath]);
|
|
16227
|
+
await git2(["remote", "set-url", "origin", remoteUrl], { cwd: cachePath });
|
|
15703
16228
|
} finally {
|
|
15704
16229
|
await releaseLock(lockPath);
|
|
15705
16230
|
}
|
|
@@ -15707,41 +16232,41 @@ var RepoManager = class {
|
|
|
15707
16232
|
}
|
|
15708
16233
|
/** Remove the entire cache directory. */
|
|
15709
16234
|
async cleanCache() {
|
|
15710
|
-
await (0,
|
|
16235
|
+
await (0, import_promises28.rm)(this.cacheDir, { recursive: true, force: true });
|
|
15711
16236
|
}
|
|
15712
16237
|
};
|
|
15713
16238
|
|
|
15714
16239
|
// src/evaluation/workspace/resolve.ts
|
|
15715
|
-
var
|
|
15716
|
-
var
|
|
16240
|
+
var import_promises29 = require("fs/promises");
|
|
16241
|
+
var import_node_path41 = __toESM(require("path"), 1);
|
|
15717
16242
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
15718
16243
|
if (!templatePath) {
|
|
15719
16244
|
return void 0;
|
|
15720
16245
|
}
|
|
15721
|
-
const resolved =
|
|
15722
|
-
const stats = await (0,
|
|
16246
|
+
const resolved = import_node_path41.default.resolve(templatePath);
|
|
16247
|
+
const stats = await (0, import_promises29.stat)(resolved);
|
|
15723
16248
|
if (stats.isFile()) {
|
|
15724
16249
|
return {
|
|
15725
|
-
dir:
|
|
16250
|
+
dir: import_node_path41.default.dirname(resolved),
|
|
15726
16251
|
workspaceFile: resolved
|
|
15727
16252
|
};
|
|
15728
16253
|
}
|
|
15729
16254
|
if (!stats.isDirectory()) {
|
|
15730
16255
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
15731
16256
|
}
|
|
15732
|
-
const entries = await (0,
|
|
16257
|
+
const entries = await (0, import_promises29.readdir)(resolved);
|
|
15733
16258
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
15734
16259
|
if (workspaceFiles.length === 1) {
|
|
15735
16260
|
return {
|
|
15736
16261
|
dir: resolved,
|
|
15737
|
-
workspaceFile:
|
|
16262
|
+
workspaceFile: import_node_path41.default.join(resolved, workspaceFiles[0])
|
|
15738
16263
|
};
|
|
15739
16264
|
}
|
|
15740
16265
|
if (workspaceFiles.length > 1) {
|
|
15741
16266
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
15742
16267
|
return {
|
|
15743
16268
|
dir: resolved,
|
|
15744
|
-
workspaceFile: conventionFile ?
|
|
16269
|
+
workspaceFile: conventionFile ? import_node_path41.default.join(resolved, conventionFile) : void 0
|
|
15745
16270
|
};
|
|
15746
16271
|
}
|
|
15747
16272
|
return { dir: resolved };
|
|
@@ -15823,7 +16348,10 @@ async function runEvaluation(options) {
|
|
|
15823
16348
|
trials,
|
|
15824
16349
|
streamCallbacks,
|
|
15825
16350
|
totalBudgetUsd,
|
|
15826
|
-
failOnError
|
|
16351
|
+
failOnError,
|
|
16352
|
+
poolWorkspaces,
|
|
16353
|
+
poolMaxSlots: configPoolMaxSlots,
|
|
16354
|
+
workspace: userWorkspacePath
|
|
15827
16355
|
} = options;
|
|
15828
16356
|
let useCache = options.useCache;
|
|
15829
16357
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -15832,7 +16360,7 @@ async function runEvaluation(options) {
|
|
|
15832
16360
|
);
|
|
15833
16361
|
useCache = false;
|
|
15834
16362
|
}
|
|
15835
|
-
const evalRunId = (0,
|
|
16363
|
+
const evalRunId = (0, import_node_crypto10.randomUUID)();
|
|
15836
16364
|
const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
|
|
15837
16365
|
const filteredEvalCases = filterEvalCases(evalCases, filter);
|
|
15838
16366
|
if (filteredEvalCases.length === 0) {
|
|
@@ -15897,7 +16425,7 @@ async function runEvaluation(options) {
|
|
|
15897
16425
|
];
|
|
15898
16426
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveJudgeProvider);
|
|
15899
16427
|
const typeRegistry = createBuiltinRegistry();
|
|
15900
|
-
const discoveryBaseDir = evalFilePath ?
|
|
16428
|
+
const discoveryBaseDir = evalFilePath ? import_node_path42.default.dirname(import_node_path42.default.resolve(evalFilePath)) : process.cwd();
|
|
15901
16429
|
const evalDir = discoveryBaseDir;
|
|
15902
16430
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
15903
16431
|
const providerRegistry = createBuiltinProviderRegistry();
|
|
@@ -15953,11 +16481,25 @@ async function runEvaluation(options) {
|
|
|
15953
16481
|
const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
|
|
15954
16482
|
const workspaceTemplate = resolvedTemplate?.dir;
|
|
15955
16483
|
let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
|
|
16484
|
+
const setupLog = (message) => {
|
|
16485
|
+
if (verbose) {
|
|
16486
|
+
console.log(`[setup] ${message}`);
|
|
16487
|
+
}
|
|
16488
|
+
};
|
|
15956
16489
|
const isPerTestIsolation = suiteWorkspace?.isolation === "per_test";
|
|
15957
|
-
|
|
16490
|
+
if (userWorkspacePath && isPerTestIsolation) {
|
|
16491
|
+
throw new Error(
|
|
16492
|
+
"--workspace is incompatible with isolation: per_test. Use isolation: shared (default)."
|
|
16493
|
+
);
|
|
16494
|
+
}
|
|
16495
|
+
const hasSharedWorkspace = !!(userWorkspacePath || workspaceTemplate || suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation);
|
|
16496
|
+
const usePool = poolWorkspaces === true && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !userWorkspacePath;
|
|
15958
16497
|
const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
|
|
15959
|
-
const workers = hasSharedWorkspace ? 1 : requestedWorkers;
|
|
15960
|
-
|
|
16498
|
+
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
16499
|
+
setupLog(
|
|
16500
|
+
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
|
|
16501
|
+
);
|
|
16502
|
+
if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
|
|
15961
16503
|
console.warn(
|
|
15962
16504
|
`Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
|
|
15963
16505
|
);
|
|
@@ -15966,285 +16508,383 @@ async function runEvaluation(options) {
|
|
|
15966
16508
|
let sharedWorkspacePath;
|
|
15967
16509
|
let sharedBaselineCommit;
|
|
15968
16510
|
let beforeAllOutput;
|
|
15969
|
-
|
|
16511
|
+
let poolManager;
|
|
16512
|
+
let poolSlot;
|
|
16513
|
+
const poolSlots = [];
|
|
16514
|
+
const availablePoolSlots = [];
|
|
16515
|
+
const poolSlotBaselines = /* @__PURE__ */ new Map();
|
|
16516
|
+
const poolMaxSlots = Math.min(configPoolMaxSlots ?? 10, 50);
|
|
16517
|
+
if (userWorkspacePath) {
|
|
16518
|
+
sharedWorkspacePath = userWorkspacePath;
|
|
16519
|
+
setupLog(`using user-provided workspace: ${userWorkspacePath}`);
|
|
16520
|
+
} else if (usePool && suiteWorkspace?.repos) {
|
|
16521
|
+
const slotsNeeded = workers;
|
|
16522
|
+
setupLog(`acquiring ${slotsNeeded} workspace pool slot(s) (pool capacity: ${poolMaxSlots})`);
|
|
16523
|
+
poolManager = new WorkspacePoolManager(getWorkspacePoolRoot());
|
|
16524
|
+
const poolRepoManager = new RepoManager(void 0, verbose);
|
|
16525
|
+
for (let i = 0; i < slotsNeeded; i++) {
|
|
16526
|
+
const slot = await poolManager.acquireWorkspace({
|
|
16527
|
+
templatePath: workspaceTemplate,
|
|
16528
|
+
repos: suiteWorkspace.repos,
|
|
16529
|
+
maxSlots: poolMaxSlots,
|
|
16530
|
+
repoManager: poolRepoManager
|
|
16531
|
+
});
|
|
16532
|
+
poolSlots.push(slot);
|
|
16533
|
+
setupLog(`pool slot ${i} acquired at: ${slot.path} (existing=${slot.isExisting})`);
|
|
16534
|
+
}
|
|
16535
|
+
if (slotsNeeded === 1) {
|
|
16536
|
+
poolSlot = poolSlots[0];
|
|
16537
|
+
sharedWorkspacePath = poolSlot.path;
|
|
16538
|
+
} else {
|
|
16539
|
+
availablePoolSlots.push(...poolSlots);
|
|
16540
|
+
}
|
|
16541
|
+
} else if (workspaceTemplate) {
|
|
16542
|
+
setupLog(`creating shared workspace from template: ${workspaceTemplate}`);
|
|
15970
16543
|
try {
|
|
15971
16544
|
sharedWorkspacePath = await createTempWorkspace(workspaceTemplate, evalRunId, "shared");
|
|
16545
|
+
setupLog(`shared workspace created at: ${sharedWorkspacePath}`);
|
|
15972
16546
|
} catch (error) {
|
|
15973
16547
|
const message = error instanceof Error ? error.message : String(error);
|
|
15974
16548
|
throw new Error(`Failed to create shared workspace: ${message}`);
|
|
15975
16549
|
}
|
|
16550
|
+
} else if (suiteWorkspace?.before_all || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
16551
|
+
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
16552
|
+
await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
16553
|
+
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
16554
|
+
}
|
|
16555
|
+
try {
|
|
15976
16556
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
15977
|
-
const copiedWorkspaceFile =
|
|
16557
|
+
const copiedWorkspaceFile = import_node_path42.default.join(sharedWorkspacePath, import_node_path42.default.basename(suiteWorkspaceFile));
|
|
15978
16558
|
try {
|
|
15979
|
-
await (0,
|
|
16559
|
+
await (0, import_promises30.stat)(copiedWorkspaceFile);
|
|
15980
16560
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
15981
16561
|
} catch {
|
|
15982
16562
|
}
|
|
15983
16563
|
}
|
|
15984
|
-
|
|
15985
|
-
sharedWorkspacePath
|
|
15986
|
-
|
|
15987
|
-
|
|
15988
|
-
|
|
15989
|
-
|
|
15990
|
-
|
|
15991
|
-
|
|
15992
|
-
|
|
15993
|
-
|
|
15994
|
-
|
|
15995
|
-
|
|
15996
|
-
});
|
|
15997
|
-
}
|
|
15998
|
-
throw new Error(`Failed to materialize repos: ${message}`);
|
|
15999
|
-
}
|
|
16000
|
-
}
|
|
16001
|
-
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
16002
|
-
const scriptContext = {
|
|
16003
|
-
workspacePath: sharedWorkspacePath,
|
|
16004
|
-
testId: "__before_all__",
|
|
16005
|
-
evalRunId,
|
|
16006
|
-
evalDir
|
|
16007
|
-
};
|
|
16008
|
-
try {
|
|
16009
|
-
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
16010
|
-
} catch (error) {
|
|
16011
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
16012
|
-
if (sharedWorkspacePath) {
|
|
16013
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16014
|
-
});
|
|
16015
|
-
}
|
|
16016
|
-
throw new Error(`before_all script failed: ${message}`);
|
|
16017
|
-
}
|
|
16018
|
-
}
|
|
16019
|
-
if (sharedWorkspacePath) {
|
|
16020
|
-
try {
|
|
16021
|
-
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
16022
|
-
} catch {
|
|
16023
|
-
}
|
|
16024
|
-
}
|
|
16025
|
-
let nextWorkerId = 1;
|
|
16026
|
-
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
16027
|
-
let beforeAllOutputAttached = false;
|
|
16028
|
-
let cumulativeBudgetCost = 0;
|
|
16029
|
-
let budgetExhausted = false;
|
|
16030
|
-
let failOnErrorTriggered = false;
|
|
16031
|
-
const promises = filteredEvalCases.map(
|
|
16032
|
-
(evalCase) => limit(async () => {
|
|
16033
|
-
const workerId = nextWorkerId++;
|
|
16034
|
-
workerIdByEvalId.set(evalCase.id, workerId);
|
|
16035
|
-
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
16036
|
-
const budgetResult = {
|
|
16037
|
-
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
16038
|
-
testId: evalCase.id,
|
|
16039
|
-
dataset: evalCase.dataset,
|
|
16040
|
-
score: 0,
|
|
16041
|
-
hits: [],
|
|
16042
|
-
misses: [],
|
|
16043
|
-
answer: "",
|
|
16044
|
-
target: target.name,
|
|
16045
|
-
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
16046
|
-
budgetExceeded: true,
|
|
16047
|
-
executionStatus: "execution_error",
|
|
16048
|
-
failureStage: "setup",
|
|
16049
|
-
failureReasonCode: "budget_exceeded",
|
|
16050
|
-
executionError: {
|
|
16051
|
-
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
16052
|
-
stage: "setup"
|
|
16053
|
-
}
|
|
16054
|
-
};
|
|
16055
|
-
if (onProgress) {
|
|
16056
|
-
await onProgress({
|
|
16057
|
-
workerId,
|
|
16058
|
-
testId: evalCase.id,
|
|
16059
|
-
status: "failed",
|
|
16060
|
-
completedAt: Date.now(),
|
|
16061
|
-
error: budgetResult.error
|
|
16564
|
+
const repoManager = suiteWorkspace?.repos?.length && !usePool && !userWorkspacePath ? new RepoManager(void 0, verbose) : void 0;
|
|
16565
|
+
if (repoManager && sharedWorkspacePath && suiteWorkspace?.repos && !isPerTestIsolation) {
|
|
16566
|
+
setupLog(
|
|
16567
|
+
`materializing ${suiteWorkspace.repos.length} shared repo(s) into ${sharedWorkspacePath}`
|
|
16568
|
+
);
|
|
16569
|
+
try {
|
|
16570
|
+
await repoManager.materializeAll(suiteWorkspace.repos, sharedWorkspacePath);
|
|
16571
|
+
setupLog("shared repo materialization complete");
|
|
16572
|
+
} catch (error) {
|
|
16573
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
16574
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
16575
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16062
16576
|
});
|
|
16063
16577
|
}
|
|
16064
|
-
|
|
16065
|
-
await onResult(budgetResult);
|
|
16066
|
-
}
|
|
16067
|
-
return budgetResult;
|
|
16578
|
+
throw new Error(`Failed to materialize repos: ${message}`);
|
|
16068
16579
|
}
|
|
16069
|
-
|
|
16070
|
-
|
|
16071
|
-
|
|
16072
|
-
|
|
16073
|
-
|
|
16074
|
-
|
|
16075
|
-
|
|
16076
|
-
|
|
16077
|
-
|
|
16078
|
-
|
|
16079
|
-
|
|
16080
|
-
|
|
16081
|
-
|
|
16082
|
-
|
|
16083
|
-
|
|
16084
|
-
|
|
16085
|
-
|
|
16086
|
-
if (
|
|
16087
|
-
await
|
|
16088
|
-
workerId,
|
|
16089
|
-
testId: evalCase.id,
|
|
16090
|
-
status: "failed",
|
|
16091
|
-
completedAt: Date.now(),
|
|
16092
|
-
error: haltResult.error
|
|
16580
|
+
}
|
|
16581
|
+
if (sharedWorkspacePath && suiteWorkspace?.before_all) {
|
|
16582
|
+
const beforeAllCommand = (suiteWorkspace.before_all.command ?? suiteWorkspace.before_all.script ?? []).join(" ");
|
|
16583
|
+
setupLog(
|
|
16584
|
+
`running shared before_all in cwd=${suiteWorkspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
16585
|
+
);
|
|
16586
|
+
const scriptContext = {
|
|
16587
|
+
workspacePath: sharedWorkspacePath,
|
|
16588
|
+
testId: "__before_all__",
|
|
16589
|
+
evalRunId,
|
|
16590
|
+
evalDir
|
|
16591
|
+
};
|
|
16592
|
+
try {
|
|
16593
|
+
beforeAllOutput = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
16594
|
+
setupLog("shared before_all completed");
|
|
16595
|
+
} catch (error) {
|
|
16596
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
16597
|
+
if (sharedWorkspacePath && !userWorkspacePath) {
|
|
16598
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16093
16599
|
});
|
|
16094
16600
|
}
|
|
16095
|
-
|
|
16096
|
-
await onResult(haltResult);
|
|
16097
|
-
}
|
|
16098
|
-
return haltResult;
|
|
16099
|
-
}
|
|
16100
|
-
if (onProgress) {
|
|
16101
|
-
await onProgress({
|
|
16102
|
-
workerId,
|
|
16103
|
-
testId: evalCase.id,
|
|
16104
|
-
status: "running",
|
|
16105
|
-
startedAt: Date.now()
|
|
16106
|
-
});
|
|
16601
|
+
throw new Error(`before_all script failed: ${message}`);
|
|
16107
16602
|
}
|
|
16108
|
-
|
|
16109
|
-
|
|
16110
|
-
|
|
16111
|
-
|
|
16112
|
-
|
|
16113
|
-
|
|
16114
|
-
|
|
16115
|
-
maxRetries,
|
|
16116
|
-
agentTimeoutMs,
|
|
16117
|
-
cache,
|
|
16118
|
-
useCache,
|
|
16119
|
-
now,
|
|
16120
|
-
judgeProvider,
|
|
16121
|
-
targetResolver,
|
|
16122
|
-
availableTargets,
|
|
16603
|
+
}
|
|
16604
|
+
if (availablePoolSlots.length > 0 && suiteWorkspace?.before_all) {
|
|
16605
|
+
for (const slot of availablePoolSlots) {
|
|
16606
|
+
setupLog(`running before_all on pool slot ${slot.index}`);
|
|
16607
|
+
const scriptContext = {
|
|
16608
|
+
workspacePath: slot.path,
|
|
16609
|
+
testId: "__before_all__",
|
|
16123
16610
|
evalRunId,
|
|
16124
|
-
keepWorkspaces,
|
|
16125
|
-
cleanupWorkspaces,
|
|
16126
|
-
sharedWorkspacePath,
|
|
16127
|
-
sharedBaselineCommit,
|
|
16128
|
-
suiteWorkspaceFile,
|
|
16129
|
-
streamCallbacks,
|
|
16130
|
-
typeRegistry,
|
|
16131
|
-
repoManager,
|
|
16132
16611
|
evalDir
|
|
16133
16612
|
};
|
|
16134
|
-
|
|
16135
|
-
|
|
16136
|
-
|
|
16137
|
-
|
|
16138
|
-
|
|
16139
|
-
|
|
16140
|
-
|
|
16613
|
+
try {
|
|
16614
|
+
const output = await executeWorkspaceScript(suiteWorkspace.before_all, scriptContext);
|
|
16615
|
+
if (!beforeAllOutput) beforeAllOutput = output;
|
|
16616
|
+
setupLog(`before_all completed on pool slot ${slot.index}`);
|
|
16617
|
+
} catch (error) {
|
|
16618
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
16619
|
+
throw new Error(`before_all script failed on pool slot ${slot.index}: ${message}`);
|
|
16620
|
+
}
|
|
16621
|
+
}
|
|
16622
|
+
}
|
|
16623
|
+
if (sharedWorkspacePath) {
|
|
16624
|
+
try {
|
|
16625
|
+
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
16626
|
+
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
16627
|
+
} catch {
|
|
16628
|
+
setupLog("shared baseline initialization skipped (non-fatal)");
|
|
16629
|
+
}
|
|
16630
|
+
}
|
|
16631
|
+
if (availablePoolSlots.length > 0) {
|
|
16632
|
+
for (const slot of availablePoolSlots) {
|
|
16633
|
+
try {
|
|
16634
|
+
const baseline = await initializeBaseline(slot.path);
|
|
16635
|
+
poolSlotBaselines.set(slot.path, baseline);
|
|
16636
|
+
setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
|
|
16637
|
+
} catch {
|
|
16638
|
+
setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
|
|
16639
|
+
}
|
|
16640
|
+
}
|
|
16641
|
+
}
|
|
16642
|
+
let nextWorkerId = 1;
|
|
16643
|
+
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
16644
|
+
let beforeAllOutputAttached = false;
|
|
16645
|
+
let cumulativeBudgetCost = 0;
|
|
16646
|
+
let budgetExhausted = false;
|
|
16647
|
+
let failOnErrorTriggered = false;
|
|
16648
|
+
const promises = filteredEvalCases.map(
|
|
16649
|
+
(evalCase) => limit(async () => {
|
|
16650
|
+
const workerId = nextWorkerId++;
|
|
16651
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
16652
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
16653
|
+
const budgetResult = {
|
|
16654
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
16655
|
+
testId: evalCase.id,
|
|
16656
|
+
dataset: evalCase.dataset,
|
|
16657
|
+
score: 0,
|
|
16658
|
+
hits: [],
|
|
16659
|
+
misses: [],
|
|
16660
|
+
answer: "",
|
|
16661
|
+
target: target.name,
|
|
16662
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
16663
|
+
budgetExceeded: true,
|
|
16664
|
+
executionStatus: "execution_error",
|
|
16665
|
+
failureStage: "setup",
|
|
16666
|
+
failureReasonCode: "budget_exceeded",
|
|
16667
|
+
executionError: {
|
|
16668
|
+
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
16669
|
+
stage: "setup"
|
|
16141
16670
|
}
|
|
16142
|
-
}
|
|
16143
|
-
|
|
16671
|
+
};
|
|
16672
|
+
if (onProgress) {
|
|
16673
|
+
await onProgress({
|
|
16674
|
+
workerId,
|
|
16675
|
+
testId: evalCase.id,
|
|
16676
|
+
status: "failed",
|
|
16677
|
+
completedAt: Date.now(),
|
|
16678
|
+
error: budgetResult.error
|
|
16679
|
+
});
|
|
16144
16680
|
}
|
|
16145
|
-
if (
|
|
16146
|
-
|
|
16147
|
-
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
16148
|
-
budgetExhausted = true;
|
|
16149
|
-
}
|
|
16681
|
+
if (onResult) {
|
|
16682
|
+
await onResult(budgetResult);
|
|
16150
16683
|
}
|
|
16684
|
+
return budgetResult;
|
|
16151
16685
|
}
|
|
16152
|
-
if (failOnError === true &&
|
|
16153
|
-
|
|
16154
|
-
|
|
16155
|
-
|
|
16156
|
-
|
|
16157
|
-
|
|
16686
|
+
if (failOnError === true && failOnErrorTriggered) {
|
|
16687
|
+
const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
|
|
16688
|
+
const haltResult = {
|
|
16689
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
16690
|
+
testId: evalCase.id,
|
|
16691
|
+
dataset: evalCase.dataset,
|
|
16692
|
+
score: 0,
|
|
16693
|
+
hits: [],
|
|
16694
|
+
misses: [],
|
|
16695
|
+
answer: "",
|
|
16696
|
+
target: target.name,
|
|
16697
|
+
error: errorMsg,
|
|
16698
|
+
executionStatus: "execution_error",
|
|
16699
|
+
failureStage: "setup",
|
|
16700
|
+
failureReasonCode: "error_threshold_exceeded",
|
|
16701
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
16702
|
+
};
|
|
16703
|
+
if (onProgress) {
|
|
16704
|
+
await onProgress({
|
|
16705
|
+
workerId,
|
|
16706
|
+
testId: evalCase.id,
|
|
16707
|
+
status: "failed",
|
|
16708
|
+
completedAt: Date.now(),
|
|
16709
|
+
error: haltResult.error
|
|
16710
|
+
});
|
|
16711
|
+
}
|
|
16712
|
+
if (onResult) {
|
|
16713
|
+
await onResult(haltResult);
|
|
16714
|
+
}
|
|
16715
|
+
return haltResult;
|
|
16158
16716
|
}
|
|
16159
16717
|
if (onProgress) {
|
|
16160
16718
|
await onProgress({
|
|
16161
16719
|
workerId,
|
|
16162
16720
|
testId: evalCase.id,
|
|
16163
|
-
status:
|
|
16164
|
-
startedAt:
|
|
16165
|
-
// Not used for completed status
|
|
16166
|
-
completedAt: Date.now(),
|
|
16167
|
-
error: result.error
|
|
16721
|
+
status: "running",
|
|
16722
|
+
startedAt: Date.now()
|
|
16168
16723
|
});
|
|
16169
16724
|
}
|
|
16170
|
-
|
|
16171
|
-
|
|
16725
|
+
const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
|
|
16726
|
+
const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
|
|
16727
|
+
const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
|
|
16728
|
+
try {
|
|
16729
|
+
const judgeProvider = await resolveJudgeProvider(target);
|
|
16730
|
+
const runCaseOptions = {
|
|
16731
|
+
evalCase,
|
|
16732
|
+
provider: primaryProvider,
|
|
16733
|
+
target,
|
|
16734
|
+
evaluators: evaluatorRegistry,
|
|
16735
|
+
maxRetries,
|
|
16736
|
+
agentTimeoutMs,
|
|
16737
|
+
cache,
|
|
16738
|
+
useCache,
|
|
16739
|
+
now,
|
|
16740
|
+
judgeProvider,
|
|
16741
|
+
targetResolver,
|
|
16742
|
+
availableTargets,
|
|
16743
|
+
evalRunId,
|
|
16744
|
+
keepWorkspaces,
|
|
16745
|
+
cleanupWorkspaces,
|
|
16746
|
+
sharedWorkspacePath: testWorkspacePath,
|
|
16747
|
+
sharedBaselineCommit: testBaselineCommit,
|
|
16748
|
+
suiteWorkspaceFile,
|
|
16749
|
+
streamCallbacks,
|
|
16750
|
+
typeRegistry,
|
|
16751
|
+
repoManager,
|
|
16752
|
+
evalDir
|
|
16753
|
+
};
|
|
16754
|
+
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
16755
|
+
if (totalBudgetUsd !== void 0) {
|
|
16756
|
+
let caseCost;
|
|
16757
|
+
if (result.trials && result.trials.length > 0) {
|
|
16758
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
16759
|
+
if (trialCostSum > 0) {
|
|
16760
|
+
caseCost = trialCostSum;
|
|
16761
|
+
}
|
|
16762
|
+
} else {
|
|
16763
|
+
caseCost = result.costUsd;
|
|
16764
|
+
}
|
|
16765
|
+
if (caseCost !== void 0) {
|
|
16766
|
+
cumulativeBudgetCost += caseCost;
|
|
16767
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
16768
|
+
budgetExhausted = true;
|
|
16769
|
+
}
|
|
16770
|
+
}
|
|
16771
|
+
}
|
|
16772
|
+
if (failOnError === true && result.executionStatus === "execution_error") {
|
|
16773
|
+
failOnErrorTriggered = true;
|
|
16774
|
+
}
|
|
16775
|
+
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
16776
|
+
result = { ...result, beforeAllOutput };
|
|
16777
|
+
beforeAllOutputAttached = true;
|
|
16778
|
+
}
|
|
16779
|
+
if (onProgress) {
|
|
16780
|
+
await onProgress({
|
|
16781
|
+
workerId,
|
|
16782
|
+
testId: evalCase.id,
|
|
16783
|
+
status: result.error ? "failed" : "completed",
|
|
16784
|
+
startedAt: 0,
|
|
16785
|
+
// Not used for completed status
|
|
16786
|
+
completedAt: Date.now(),
|
|
16787
|
+
error: result.error
|
|
16788
|
+
});
|
|
16789
|
+
}
|
|
16790
|
+
if (onResult) {
|
|
16791
|
+
await onResult(result);
|
|
16792
|
+
}
|
|
16793
|
+
return result;
|
|
16794
|
+
} catch (error) {
|
|
16795
|
+
if (onProgress) {
|
|
16796
|
+
await onProgress({
|
|
16797
|
+
workerId,
|
|
16798
|
+
testId: evalCase.id,
|
|
16799
|
+
status: "failed",
|
|
16800
|
+
completedAt: Date.now(),
|
|
16801
|
+
error: error instanceof Error ? error.message : String(error)
|
|
16802
|
+
});
|
|
16803
|
+
}
|
|
16804
|
+
throw error;
|
|
16805
|
+
} finally {
|
|
16806
|
+
if (testPoolSlot) {
|
|
16807
|
+
availablePoolSlots.push(testPoolSlot);
|
|
16808
|
+
}
|
|
16172
16809
|
}
|
|
16173
|
-
|
|
16174
|
-
|
|
16175
|
-
|
|
16176
|
-
|
|
16177
|
-
|
|
16178
|
-
|
|
16179
|
-
|
|
16180
|
-
|
|
16181
|
-
|
|
16182
|
-
|
|
16810
|
+
})
|
|
16811
|
+
);
|
|
16812
|
+
const settled = await Promise.allSettled(promises);
|
|
16813
|
+
const results = [];
|
|
16814
|
+
for (let i = 0; i < settled.length; i++) {
|
|
16815
|
+
const outcome = settled[i];
|
|
16816
|
+
if (outcome.status === "fulfilled") {
|
|
16817
|
+
results.push(outcome.value);
|
|
16818
|
+
} else {
|
|
16819
|
+
const evalCase = filteredEvalCases[i];
|
|
16820
|
+
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
16821
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
16822
|
+
const errorResult = buildErrorResult(
|
|
16823
|
+
evalCase,
|
|
16824
|
+
target.name,
|
|
16825
|
+
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
16826
|
+
outcome.reason,
|
|
16827
|
+
promptInputs,
|
|
16828
|
+
primaryProvider,
|
|
16829
|
+
"agent",
|
|
16830
|
+
"provider_error"
|
|
16831
|
+
);
|
|
16832
|
+
results.push(errorResult);
|
|
16833
|
+
if (onResult) {
|
|
16834
|
+
await onResult(errorResult);
|
|
16183
16835
|
}
|
|
16184
|
-
throw error;
|
|
16185
16836
|
}
|
|
16186
|
-
}
|
|
16187
|
-
|
|
16188
|
-
|
|
16189
|
-
|
|
16190
|
-
|
|
16191
|
-
|
|
16192
|
-
|
|
16193
|
-
|
|
16194
|
-
|
|
16195
|
-
|
|
16196
|
-
|
|
16197
|
-
|
|
16198
|
-
|
|
16199
|
-
|
|
16200
|
-
|
|
16201
|
-
|
|
16202
|
-
|
|
16203
|
-
|
|
16204
|
-
|
|
16205
|
-
|
|
16206
|
-
|
|
16207
|
-
);
|
|
16208
|
-
results.push(errorResult);
|
|
16209
|
-
if (onResult) {
|
|
16210
|
-
await onResult(errorResult);
|
|
16837
|
+
}
|
|
16838
|
+
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
16839
|
+
if (afterAllWorkspaces.length > 0 && suiteWorkspace?.after_all) {
|
|
16840
|
+
for (const wsPath of afterAllWorkspaces) {
|
|
16841
|
+
const scriptContext = {
|
|
16842
|
+
workspacePath: wsPath,
|
|
16843
|
+
testId: "__after_all__",
|
|
16844
|
+
evalRunId,
|
|
16845
|
+
evalDir
|
|
16846
|
+
};
|
|
16847
|
+
try {
|
|
16848
|
+
const afterAllOutput = await executeWorkspaceScript(
|
|
16849
|
+
suiteWorkspace.after_all,
|
|
16850
|
+
scriptContext,
|
|
16851
|
+
"warn"
|
|
16852
|
+
);
|
|
16853
|
+
if (afterAllOutput && results.length > 0 && wsPath === afterAllWorkspaces[0]) {
|
|
16854
|
+
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
16855
|
+
}
|
|
16856
|
+
} catch {
|
|
16857
|
+
}
|
|
16211
16858
|
}
|
|
16212
16859
|
}
|
|
16213
|
-
|
|
16214
|
-
|
|
16215
|
-
|
|
16216
|
-
|
|
16217
|
-
|
|
16218
|
-
|
|
16219
|
-
|
|
16220
|
-
|
|
16221
|
-
try {
|
|
16222
|
-
const afterAllOutput = await executeWorkspaceScript(
|
|
16223
|
-
suiteWorkspace.after_all,
|
|
16224
|
-
scriptContext,
|
|
16225
|
-
"warn"
|
|
16226
|
-
);
|
|
16227
|
-
if (afterAllOutput && results.length > 0) {
|
|
16228
|
-
results[results.length - 1] = { ...results[results.length - 1], afterAllOutput };
|
|
16860
|
+
if (sharedWorkspacePath && !poolSlot && poolSlots.length === 0 && !userWorkspacePath) {
|
|
16861
|
+
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
16862
|
+
if (cleanupWorkspaces) {
|
|
16863
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16864
|
+
});
|
|
16865
|
+
} else if (!hasFailure && !keepWorkspaces) {
|
|
16866
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16867
|
+
});
|
|
16229
16868
|
}
|
|
16230
|
-
} catch {
|
|
16231
16869
|
}
|
|
16232
|
-
}
|
|
16233
|
-
if (sharedWorkspacePath) {
|
|
16234
|
-
const hasFailure = results.some((r) => !!r.error || r.score < 0.5);
|
|
16235
16870
|
if (cleanupWorkspaces) {
|
|
16236
|
-
await
|
|
16237
|
-
});
|
|
16238
|
-
} else if (!hasFailure && !keepWorkspaces) {
|
|
16239
|
-
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
16871
|
+
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
16240
16872
|
});
|
|
16241
16873
|
}
|
|
16874
|
+
return results;
|
|
16875
|
+
} finally {
|
|
16876
|
+
if (poolManager) {
|
|
16877
|
+
if (poolSlot) {
|
|
16878
|
+
await poolManager.releaseSlot(poolSlot);
|
|
16879
|
+
}
|
|
16880
|
+
for (const slot of poolSlots) {
|
|
16881
|
+
if (slot !== poolSlot) {
|
|
16882
|
+
await poolManager.releaseSlot(slot).catch(() => {
|
|
16883
|
+
});
|
|
16884
|
+
}
|
|
16885
|
+
}
|
|
16886
|
+
}
|
|
16242
16887
|
}
|
|
16243
|
-
if (cleanupWorkspaces) {
|
|
16244
|
-
await cleanupEvalWorkspaces(evalRunId).catch(() => {
|
|
16245
|
-
});
|
|
16246
|
-
}
|
|
16247
|
-
return results;
|
|
16248
16888
|
}
|
|
16249
16889
|
async function runBatchEvaluation(options) {
|
|
16250
16890
|
const {
|
|
@@ -16423,6 +17063,7 @@ async function runEvalCase(options) {
|
|
|
16423
17063
|
repoManager,
|
|
16424
17064
|
evalDir
|
|
16425
17065
|
} = options;
|
|
17066
|
+
const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
|
|
16426
17067
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
16427
17068
|
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
16428
17069
|
const typeRegistry = providedTypeRegistry ?? createBuiltinRegistry();
|
|
@@ -16460,9 +17101,9 @@ async function runEvalCase(options) {
|
|
|
16460
17101
|
);
|
|
16461
17102
|
}
|
|
16462
17103
|
if (caseWorkspaceFile && workspacePath) {
|
|
16463
|
-
const copiedFile =
|
|
17104
|
+
const copiedFile = import_node_path42.default.join(workspacePath, import_node_path42.default.basename(caseWorkspaceFile));
|
|
16464
17105
|
try {
|
|
16465
|
-
await (0,
|
|
17106
|
+
await (0, import_promises30.stat)(copiedFile);
|
|
16466
17107
|
caseWorkspaceFile = copiedFile;
|
|
16467
17108
|
} catch {
|
|
16468
17109
|
}
|
|
@@ -16470,12 +17111,20 @@ async function runEvalCase(options) {
|
|
|
16470
17111
|
}
|
|
16471
17112
|
if (!workspacePath && (evalCase.workspace?.before_all || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
16472
17113
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
16473
|
-
await (0,
|
|
17114
|
+
await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
|
|
16474
17115
|
}
|
|
16475
17116
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
16476
|
-
const perCaseRepoManager = new RepoManager();
|
|
17117
|
+
const perCaseRepoManager = new RepoManager(void 0, setupDebug);
|
|
16477
17118
|
try {
|
|
17119
|
+
if (setupDebug) {
|
|
17120
|
+
console.log(
|
|
17121
|
+
`[setup] test=${evalCase.id} materializing ${evalCase.workspace.repos.length} per-test repo(s) into ${workspacePath}`
|
|
17122
|
+
);
|
|
17123
|
+
}
|
|
16478
17124
|
await perCaseRepoManager.materializeAll(evalCase.workspace.repos, workspacePath);
|
|
17125
|
+
if (setupDebug) {
|
|
17126
|
+
console.log(`[setup] test=${evalCase.id} per-test repo materialization complete`);
|
|
17127
|
+
}
|
|
16479
17128
|
} catch (error) {
|
|
16480
17129
|
const message = error instanceof Error ? error.message : String(error);
|
|
16481
17130
|
return buildErrorResult(
|
|
@@ -16491,6 +17140,12 @@ async function runEvalCase(options) {
|
|
|
16491
17140
|
}
|
|
16492
17141
|
}
|
|
16493
17142
|
if (workspacePath && evalCase.workspace?.before_all) {
|
|
17143
|
+
const beforeAllCommand = (evalCase.workspace.before_all.command ?? evalCase.workspace.before_all.script ?? []).join(" ");
|
|
17144
|
+
if (setupDebug) {
|
|
17145
|
+
console.log(
|
|
17146
|
+
`[setup] test=${evalCase.id} running before_all in cwd=${evalCase.workspace.before_all.cwd ?? evalDir} command=${beforeAllCommand}`
|
|
17147
|
+
);
|
|
17148
|
+
}
|
|
16494
17149
|
const scriptContext = {
|
|
16495
17150
|
workspacePath,
|
|
16496
17151
|
testId: evalCase.id,
|
|
@@ -16504,6 +17159,9 @@ async function runEvalCase(options) {
|
|
|
16504
17159
|
evalCase.workspace.before_all,
|
|
16505
17160
|
scriptContext
|
|
16506
17161
|
);
|
|
17162
|
+
if (setupDebug) {
|
|
17163
|
+
console.log(`[setup] test=${evalCase.id} before_all completed`);
|
|
17164
|
+
}
|
|
16507
17165
|
} catch (error) {
|
|
16508
17166
|
const message = error instanceof Error ? error.message : String(error);
|
|
16509
17167
|
if (forceCleanup && workspacePath) {
|
|
@@ -17053,7 +17711,7 @@ async function runEvaluatorList(options) {
|
|
|
17053
17711
|
fileChanges,
|
|
17054
17712
|
workspacePath
|
|
17055
17713
|
};
|
|
17056
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
17714
|
+
const evalFileDir = evalCase.guideline_paths[0] ? import_node_path42.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
17057
17715
|
const dispatchContext = {
|
|
17058
17716
|
judgeProvider,
|
|
17059
17717
|
targetResolver,
|
|
@@ -17287,7 +17945,7 @@ function extractProviderError(response) {
|
|
|
17287
17945
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
17288
17946
|
}
|
|
17289
17947
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
17290
|
-
const hash = (0,
|
|
17948
|
+
const hash = (0, import_node_crypto10.createHash)("sha256");
|
|
17291
17949
|
hash.update(provider.id);
|
|
17292
17950
|
hash.update(target.name);
|
|
17293
17951
|
hash.update(evalCase.id);
|
|
@@ -17355,8 +18013,8 @@ function computeWeightedMean(entries) {
|
|
|
17355
18013
|
}
|
|
17356
18014
|
|
|
17357
18015
|
// src/evaluation/evaluate.ts
|
|
17358
|
-
var
|
|
17359
|
-
var
|
|
18016
|
+
var import_node_fs13 = require("fs");
|
|
18017
|
+
var import_node_path43 = __toESM(require("path"), 1);
|
|
17360
18018
|
async function evaluate(config) {
|
|
17361
18019
|
const startTime = Date.now();
|
|
17362
18020
|
if (config.tests && config.specFile) {
|
|
@@ -17378,13 +18036,13 @@ async function evaluate(config) {
|
|
|
17378
18036
|
let evalCases;
|
|
17379
18037
|
let testFilePath;
|
|
17380
18038
|
if (config.specFile) {
|
|
17381
|
-
testFilePath =
|
|
18039
|
+
testFilePath = import_node_path43.default.resolve(config.specFile);
|
|
17382
18040
|
evalCases = await loadTests(testFilePath, repoRoot, {
|
|
17383
18041
|
verbose: config.verbose,
|
|
17384
18042
|
filter: config.filter
|
|
17385
18043
|
});
|
|
17386
18044
|
} else {
|
|
17387
|
-
testFilePath =
|
|
18045
|
+
testFilePath = import_node_path43.default.join(process.cwd(), "__programmatic__.yaml");
|
|
17388
18046
|
evalCases = (config.tests ?? []).map((test) => {
|
|
17389
18047
|
const input = typeof test.input === "string" ? [{ role: "user", content: test.input }] : test.input;
|
|
17390
18048
|
const question = typeof test.input === "string" ? test.input : test.input.find((m) => m.role === "user")?.content ?? "";
|
|
@@ -17470,11 +18128,11 @@ function computeSummary(results, durationMs) {
|
|
|
17470
18128
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
17471
18129
|
async function discoverDefaultTarget(repoRoot) {
|
|
17472
18130
|
const cwd = process.cwd();
|
|
17473
|
-
const chain = buildDirectoryChain2(
|
|
18131
|
+
const chain = buildDirectoryChain2(import_node_path43.default.join(cwd, "_placeholder"), repoRoot);
|
|
17474
18132
|
for (const dir of chain) {
|
|
17475
18133
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
17476
|
-
const targetsPath =
|
|
17477
|
-
if (!(0,
|
|
18134
|
+
const targetsPath = import_node_path43.default.join(dir, candidate);
|
|
18135
|
+
if (!(0, import_node_fs13.existsSync)(targetsPath)) continue;
|
|
17478
18136
|
try {
|
|
17479
18137
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
17480
18138
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -17488,11 +18146,11 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
17488
18146
|
async function loadEnvHierarchy(repoRoot) {
|
|
17489
18147
|
const { readFileSync: readFileSync2 } = await import("fs");
|
|
17490
18148
|
const cwd = process.cwd();
|
|
17491
|
-
const chain = buildDirectoryChain2(
|
|
18149
|
+
const chain = buildDirectoryChain2(import_node_path43.default.join(cwd, "_placeholder"), repoRoot);
|
|
17492
18150
|
const envFiles = [];
|
|
17493
18151
|
for (const dir of chain) {
|
|
17494
|
-
const envPath =
|
|
17495
|
-
if ((0,
|
|
18152
|
+
const envPath = import_node_path43.default.join(dir, ".env");
|
|
18153
|
+
if ((0, import_node_fs13.existsSync)(envPath)) envFiles.push(envPath);
|
|
17496
18154
|
}
|
|
17497
18155
|
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
17498
18156
|
try {
|
|
@@ -17570,12 +18228,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
17570
18228
|
".agentv/config.js"
|
|
17571
18229
|
];
|
|
17572
18230
|
async function loadTsConfig(projectRoot) {
|
|
17573
|
-
const { existsSync:
|
|
18231
|
+
const { existsSync: existsSync5 } = await import("fs");
|
|
17574
18232
|
const { pathToFileURL } = await import("url");
|
|
17575
18233
|
const { join: join2 } = await import("path");
|
|
17576
18234
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
17577
18235
|
const filePath = join2(projectRoot, fileName);
|
|
17578
|
-
if (!
|
|
18236
|
+
if (!existsSync5(filePath)) {
|
|
17579
18237
|
continue;
|
|
17580
18238
|
}
|
|
17581
18239
|
try {
|
|
@@ -17672,8 +18330,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
17672
18330
|
}
|
|
17673
18331
|
|
|
17674
18332
|
// src/evaluation/cache/response-cache.ts
|
|
17675
|
-
var
|
|
17676
|
-
var
|
|
18333
|
+
var import_promises31 = require("fs/promises");
|
|
18334
|
+
var import_node_path44 = __toESM(require("path"), 1);
|
|
17677
18335
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
17678
18336
|
var ResponseCache = class {
|
|
17679
18337
|
cachePath;
|
|
@@ -17683,7 +18341,7 @@ var ResponseCache = class {
|
|
|
17683
18341
|
async get(key) {
|
|
17684
18342
|
const filePath = this.keyToPath(key);
|
|
17685
18343
|
try {
|
|
17686
|
-
const data = await (0,
|
|
18344
|
+
const data = await (0, import_promises31.readFile)(filePath, "utf8");
|
|
17687
18345
|
return JSON.parse(data);
|
|
17688
18346
|
} catch {
|
|
17689
18347
|
return void 0;
|
|
@@ -17691,13 +18349,13 @@ var ResponseCache = class {
|
|
|
17691
18349
|
}
|
|
17692
18350
|
async set(key, value) {
|
|
17693
18351
|
const filePath = this.keyToPath(key);
|
|
17694
|
-
const dir =
|
|
17695
|
-
await (0,
|
|
17696
|
-
await (0,
|
|
18352
|
+
const dir = import_node_path44.default.dirname(filePath);
|
|
18353
|
+
await (0, import_promises31.mkdir)(dir, { recursive: true });
|
|
18354
|
+
await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
17697
18355
|
}
|
|
17698
18356
|
keyToPath(key) {
|
|
17699
18357
|
const prefix = key.slice(0, 2);
|
|
17700
|
-
return
|
|
18358
|
+
return import_node_path44.default.join(this.cachePath, prefix, `${key}.json`);
|
|
17701
18359
|
}
|
|
17702
18360
|
};
|
|
17703
18361
|
function shouldEnableCache(params) {
|
|
@@ -18184,6 +18842,7 @@ function createAgentKernel() {
|
|
|
18184
18842
|
TokenUsageEvaluator,
|
|
18185
18843
|
ToolTrajectoryEvaluator,
|
|
18186
18844
|
WorkspaceCreationError,
|
|
18845
|
+
WorkspacePoolManager,
|
|
18187
18846
|
assembleLlmJudgePrompt,
|
|
18188
18847
|
avgToolDurationMs,
|
|
18189
18848
|
buildDirectoryChain,
|
|
@@ -18198,6 +18857,7 @@ function createAgentKernel() {
|
|
|
18198
18857
|
cleanupEvalWorkspaces,
|
|
18199
18858
|
cleanupWorkspace,
|
|
18200
18859
|
computeTraceSummary,
|
|
18860
|
+
computeWorkspaceFingerprint,
|
|
18201
18861
|
consumeClaudeLogEntries,
|
|
18202
18862
|
consumeCodexLogEntries,
|
|
18203
18863
|
consumeCopilotCliLogEntries,
|
|
@@ -18229,8 +18889,14 @@ function createAgentKernel() {
|
|
|
18229
18889
|
findGitRoot,
|
|
18230
18890
|
freeformEvaluationSchema,
|
|
18231
18891
|
generateRubrics,
|
|
18892
|
+
getAgentvHome,
|
|
18893
|
+
getGitCacheRoot,
|
|
18232
18894
|
getHitCount,
|
|
18895
|
+
getSubagentsRoot,
|
|
18896
|
+
getTraceStateRoot,
|
|
18233
18897
|
getWorkspacePath,
|
|
18898
|
+
getWorkspacePoolRoot,
|
|
18899
|
+
getWorkspacesRoot,
|
|
18234
18900
|
initializeBaseline,
|
|
18235
18901
|
isEvaluatorKind,
|
|
18236
18902
|
isGuidelineFile,
|