@agentv/core 3.8.0 → 3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-3ZS3GCMI.js → chunk-PC5TLJF6.js} +1 -2
- package/dist/chunk-PC5TLJF6.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -33
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -33
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +533 -772
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +16 -21
- package/dist/index.d.ts +16 -21
- package/dist/index.js +381 -619
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-3ZS3GCMI.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1315,12 +1315,12 @@ function serializeAttributeValue(value) {
|
|
|
1315
1315
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1316
1316
|
return { stringValue: String(value) };
|
|
1317
1317
|
}
|
|
1318
|
-
var
|
|
1318
|
+
var import_promises32, import_node_path48, OtlpJsonFileExporter;
|
|
1319
1319
|
var init_otlp_json_file_exporter = __esm({
|
|
1320
1320
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1321
1321
|
"use strict";
|
|
1322
|
-
|
|
1323
|
-
|
|
1322
|
+
import_promises32 = require("fs/promises");
|
|
1323
|
+
import_node_path48 = require("path");
|
|
1324
1324
|
OtlpJsonFileExporter = class {
|
|
1325
1325
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1326
1326
|
spans = [];
|
|
@@ -1359,7 +1359,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1359
1359
|
}
|
|
1360
1360
|
async flush() {
|
|
1361
1361
|
if (this.spans.length === 0) return;
|
|
1362
|
-
await (0,
|
|
1362
|
+
await (0, import_promises32.mkdir)((0, import_node_path48.dirname)(this.filePath), { recursive: true });
|
|
1363
1363
|
const otlpJson = {
|
|
1364
1364
|
resourceSpans: [
|
|
1365
1365
|
{
|
|
@@ -1390,13 +1390,13 @@ function hrTimeDiffMs(start, end) {
|
|
|
1390
1390
|
const diffNano = end[1] - start[1];
|
|
1391
1391
|
return Math.round(diffSec * 1e3 + diffNano / 1e6);
|
|
1392
1392
|
}
|
|
1393
|
-
var import_node_fs15,
|
|
1393
|
+
var import_node_fs15, import_promises33, import_node_path49, SimpleTraceFileExporter;
|
|
1394
1394
|
var init_simple_trace_file_exporter = __esm({
|
|
1395
1395
|
"src/observability/simple-trace-file-exporter.ts"() {
|
|
1396
1396
|
"use strict";
|
|
1397
1397
|
import_node_fs15 = require("fs");
|
|
1398
|
-
|
|
1399
|
-
|
|
1398
|
+
import_promises33 = require("fs/promises");
|
|
1399
|
+
import_node_path49 = require("path");
|
|
1400
1400
|
SimpleTraceFileExporter = class {
|
|
1401
1401
|
stream = null;
|
|
1402
1402
|
filePath;
|
|
@@ -1409,7 +1409,7 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1409
1409
|
async ensureStream() {
|
|
1410
1410
|
if (!this.streamReady) {
|
|
1411
1411
|
this.streamReady = (async () => {
|
|
1412
|
-
await (0,
|
|
1412
|
+
await (0, import_promises33.mkdir)((0, import_node_path49.dirname)(this.filePath), { recursive: true });
|
|
1413
1413
|
this.stream = (0, import_node_fs15.createWriteStream)(this.filePath, { flags: "w" });
|
|
1414
1414
|
return this.stream;
|
|
1415
1415
|
})();
|
|
@@ -1575,6 +1575,7 @@ __export(index_exports, {
|
|
|
1575
1575
|
extractTargetsFromSuite: () => extractTargetsFromSuite,
|
|
1576
1576
|
extractTargetsFromTestCase: () => extractTargetsFromTestCase,
|
|
1577
1577
|
extractTrialsConfig: () => extractTrialsConfig,
|
|
1578
|
+
extractWorkersFromSuite: () => extractWorkersFromSuite,
|
|
1578
1579
|
fileExists: () => fileExists2,
|
|
1579
1580
|
findGitRoot: () => findGitRoot,
|
|
1580
1581
|
freeformEvaluationSchema: () => freeformEvaluationSchema,
|
|
@@ -1589,7 +1590,6 @@ __export(index_exports, {
|
|
|
1589
1590
|
initializeBaseline: () => initializeBaseline,
|
|
1590
1591
|
isAgentSkillsFormat: () => isAgentSkillsFormat,
|
|
1591
1592
|
isEvaluatorKind: () => isEvaluatorKind,
|
|
1592
|
-
isGuidelineFile: () => isGuidelineFile,
|
|
1593
1593
|
isJsonObject: () => isJsonObject,
|
|
1594
1594
|
isJsonValue: () => isJsonValue,
|
|
1595
1595
|
isNonEmptyString: () => isNonEmptyString,
|
|
@@ -1849,9 +1849,9 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
1849
1849
|
}
|
|
1850
1850
|
|
|
1851
1851
|
// src/evaluation/yaml-parser.ts
|
|
1852
|
-
var
|
|
1853
|
-
var
|
|
1854
|
-
var
|
|
1852
|
+
var import_promises8 = require("fs/promises");
|
|
1853
|
+
var import_node_path8 = __toESM(require("path"), 1);
|
|
1854
|
+
var import_micromatch2 = __toESM(require("micromatch"), 1);
|
|
1855
1855
|
var import_yaml4 = require("yaml");
|
|
1856
1856
|
|
|
1857
1857
|
// src/evaluation/interpolation.ts
|
|
@@ -1944,7 +1944,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
1944
1944
|
input_segments: [{ type: "text", value: prompt }],
|
|
1945
1945
|
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
1946
1946
|
reference_answer: evalCase.expected_output,
|
|
1947
|
-
guideline_paths: [],
|
|
1948
1947
|
file_paths: filePaths,
|
|
1949
1948
|
criteria: evalCase.expected_output ?? "",
|
|
1950
1949
|
assertions,
|
|
@@ -2076,7 +2075,6 @@ async function expandFileReferences(tests, evalFileDir) {
|
|
|
2076
2075
|
// src/evaluation/loaders/config-loader.ts
|
|
2077
2076
|
var import_promises4 = require("fs/promises");
|
|
2078
2077
|
var import_node_path4 = __toESM(require("path"), 1);
|
|
2079
|
-
var import_micromatch = __toESM(require("micromatch"), 1);
|
|
2080
2078
|
var import_yaml2 = require("yaml");
|
|
2081
2079
|
|
|
2082
2080
|
// src/evaluation/loaders/file-resolver.ts
|
|
@@ -2207,15 +2205,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
2207
2205
|
logWarning(`Invalid required_version in ${configPath}, expected string`);
|
|
2208
2206
|
continue;
|
|
2209
2207
|
}
|
|
2210
|
-
const guidelinePatterns = config.guideline_patterns;
|
|
2211
|
-
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
2212
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
2213
|
-
continue;
|
|
2214
|
-
}
|
|
2215
|
-
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
2216
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
2217
|
-
continue;
|
|
2218
|
-
}
|
|
2219
2208
|
const evalPatterns = config.eval_patterns;
|
|
2220
2209
|
if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
|
|
2221
2210
|
logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
|
|
@@ -2231,7 +2220,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
2231
2220
|
);
|
|
2232
2221
|
return {
|
|
2233
2222
|
required_version: requiredVersion,
|
|
2234
|
-
guideline_patterns: guidelinePatterns,
|
|
2235
2223
|
eval_patterns: evalPatterns,
|
|
2236
2224
|
execution: executionDefaults
|
|
2237
2225
|
};
|
|
@@ -2243,11 +2231,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
2243
2231
|
}
|
|
2244
2232
|
return null;
|
|
2245
2233
|
}
|
|
2246
|
-
function isGuidelineFile(filePath, patterns) {
|
|
2247
|
-
const normalized = filePath.split("\\").join("/");
|
|
2248
|
-
const patternsToUse = patterns ?? [];
|
|
2249
|
-
return import_micromatch.default.isMatch(normalized, patternsToUse);
|
|
2250
|
-
}
|
|
2251
2234
|
function extractTargetFromSuite(suite) {
|
|
2252
2235
|
const execution = suite.execution;
|
|
2253
2236
|
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
@@ -2274,6 +2257,17 @@ function extractTargetsFromSuite(suite) {
|
|
|
2274
2257
|
}
|
|
2275
2258
|
return void 0;
|
|
2276
2259
|
}
|
|
2260
|
+
function extractWorkersFromSuite(suite) {
|
|
2261
|
+
const execution = suite.execution;
|
|
2262
|
+
if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
|
|
2263
|
+
return void 0;
|
|
2264
|
+
}
|
|
2265
|
+
const workers = execution.workers;
|
|
2266
|
+
if (typeof workers === "number" && Number.isInteger(workers) && workers >= 1 && workers <= 50) {
|
|
2267
|
+
return workers;
|
|
2268
|
+
}
|
|
2269
|
+
return void 0;
|
|
2270
|
+
}
|
|
2277
2271
|
function extractTargetsFromTestCase(testCase) {
|
|
2278
2272
|
const execution = testCase.execution;
|
|
2279
2273
|
if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
|
|
@@ -3823,7 +3817,7 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
3823
3817
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
3824
3818
|
var import_promises7 = require("fs/promises");
|
|
3825
3819
|
var import_node_path7 = __toESM(require("path"), 1);
|
|
3826
|
-
var
|
|
3820
|
+
var import_micromatch = __toESM(require("micromatch"), 1);
|
|
3827
3821
|
var import_yaml3 = require("yaml");
|
|
3828
3822
|
|
|
3829
3823
|
// src/evaluation/loaders/message-processor.ts
|
|
@@ -3850,10 +3844,6 @@ function formatSegment(segment, mode = "lm") {
|
|
|
3850
3844
|
if (type === "text") {
|
|
3851
3845
|
return asString2(segment.value);
|
|
3852
3846
|
}
|
|
3853
|
-
if (type === "guideline_ref") {
|
|
3854
|
-
const refPath = asString2(segment.path);
|
|
3855
|
-
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
3856
|
-
}
|
|
3857
3847
|
if (type === "file") {
|
|
3858
3848
|
const filePath = asString2(segment.path);
|
|
3859
3849
|
if (!filePath) {
|
|
@@ -3876,9 +3866,6 @@ function hasVisibleContent(segments) {
|
|
|
3876
3866
|
const value = asString2(segment.value);
|
|
3877
3867
|
return value !== void 0 && value.trim().length > 0;
|
|
3878
3868
|
}
|
|
3879
|
-
if (type === "guideline_ref") {
|
|
3880
|
-
return false;
|
|
3881
|
-
}
|
|
3882
3869
|
if (type === "file") {
|
|
3883
3870
|
const text = asString2(segment.text);
|
|
3884
3871
|
return text !== void 0 && text.trim().length > 0;
|
|
@@ -3894,17 +3881,7 @@ function asString2(value) {
|
|
|
3894
3881
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
3895
3882
|
var ANSI_RESET6 = "\x1B[0m";
|
|
3896
3883
|
async function processMessages(options) {
|
|
3897
|
-
const {
|
|
3898
|
-
messages,
|
|
3899
|
-
searchRoots,
|
|
3900
|
-
repoRootPath,
|
|
3901
|
-
guidelinePatterns,
|
|
3902
|
-
guidelinePaths,
|
|
3903
|
-
treatFileSegmentsAsGuidelines,
|
|
3904
|
-
textParts,
|
|
3905
|
-
messageType,
|
|
3906
|
-
verbose
|
|
3907
|
-
} = options;
|
|
3884
|
+
const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
|
|
3908
3885
|
const segments = [];
|
|
3909
3886
|
for (const message of messages) {
|
|
3910
3887
|
const content = message.content;
|
|
@@ -3948,21 +3925,6 @@ async function processMessages(options) {
|
|
|
3948
3925
|
}
|
|
3949
3926
|
try {
|
|
3950
3927
|
const fileContent = (await (0, import_promises6.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
3951
|
-
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
3952
|
-
messageType,
|
|
3953
|
-
resolvedPath,
|
|
3954
|
-
repoRootPath,
|
|
3955
|
-
guidelinePatterns,
|
|
3956
|
-
treatFileSegmentsAsGuidelines
|
|
3957
|
-
});
|
|
3958
|
-
if (classifyAsGuideline && guidelinePaths) {
|
|
3959
|
-
guidelinePaths.push(import_node_path6.default.resolve(resolvedPath));
|
|
3960
|
-
if (verbose) {
|
|
3961
|
-
console.log(` [Guideline] Found: ${displayPath}`);
|
|
3962
|
-
console.log(` Resolved to: ${resolvedPath}`);
|
|
3963
|
-
}
|
|
3964
|
-
continue;
|
|
3965
|
-
}
|
|
3966
3928
|
segments.push({
|
|
3967
3929
|
type: "file",
|
|
3968
3930
|
path: displayPath,
|
|
@@ -3990,26 +3952,6 @@ async function processMessages(options) {
|
|
|
3990
3952
|
}
|
|
3991
3953
|
return segments;
|
|
3992
3954
|
}
|
|
3993
|
-
function shouldTreatAsGuideline(options) {
|
|
3994
|
-
const {
|
|
3995
|
-
messageType,
|
|
3996
|
-
resolvedPath,
|
|
3997
|
-
repoRootPath,
|
|
3998
|
-
guidelinePatterns,
|
|
3999
|
-
treatFileSegmentsAsGuidelines
|
|
4000
|
-
} = options;
|
|
4001
|
-
if (messageType !== "input") {
|
|
4002
|
-
return false;
|
|
4003
|
-
}
|
|
4004
|
-
if (treatFileSegmentsAsGuidelines) {
|
|
4005
|
-
return true;
|
|
4006
|
-
}
|
|
4007
|
-
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4008
|
-
return false;
|
|
4009
|
-
}
|
|
4010
|
-
const relativeToRepo = import_node_path6.default.relative(repoRootPath, resolvedPath);
|
|
4011
|
-
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
4012
|
-
}
|
|
4013
3955
|
function asString3(value) {
|
|
4014
3956
|
return typeof value === "string" ? value : void 0;
|
|
4015
3957
|
}
|
|
@@ -4210,7 +4152,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
4210
4152
|
}
|
|
4211
4153
|
return {
|
|
4212
4154
|
description: asString4(parsed.description),
|
|
4213
|
-
|
|
4155
|
+
name: asString4(parsed.name),
|
|
4214
4156
|
execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
|
|
4215
4157
|
evaluator: parsed.evaluator
|
|
4216
4158
|
};
|
|
@@ -4246,20 +4188,18 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4246
4188
|
const absoluteTestPath = import_node_path7.default.resolve(evalFilePath);
|
|
4247
4189
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
4248
4190
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
4249
|
-
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
4250
|
-
const guidelinePatterns = config?.guideline_patterns;
|
|
4251
4191
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
4252
4192
|
const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
|
|
4253
4193
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
4254
|
-
const
|
|
4255
|
-
const
|
|
4194
|
+
const fallbackEvalSet = import_node_path7.default.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
4195
|
+
const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
|
|
4256
4196
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
4257
4197
|
const globalExecution = sidecar.execution;
|
|
4258
4198
|
if (verbose) {
|
|
4259
4199
|
console.log(`
|
|
4260
4200
|
[JSONL Dataset: ${evalFilePath}]`);
|
|
4261
4201
|
console.log(` Cases: ${rawCases.length}`);
|
|
4262
|
-
console.log(`
|
|
4202
|
+
console.log(` Eval set: ${evalSetName}`);
|
|
4263
4203
|
if (sidecar.description) {
|
|
4264
4204
|
console.log(` Description: ${sidecar.description}`);
|
|
4265
4205
|
}
|
|
@@ -4269,7 +4209,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4269
4209
|
const evalcase = rawCases[lineIndex];
|
|
4270
4210
|
const lineNumber = lineIndex + 1;
|
|
4271
4211
|
const id = asString4(evalcase.id);
|
|
4272
|
-
if (filterPattern && (!id || !
|
|
4212
|
+
if (filterPattern && (!id || !import_micromatch.default.isMatch(id, filterPattern))) {
|
|
4273
4213
|
continue;
|
|
4274
4214
|
}
|
|
4275
4215
|
const conversationId = asString4(evalcase.conversation_id);
|
|
@@ -4292,14 +4232,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4292
4232
|
continue;
|
|
4293
4233
|
}
|
|
4294
4234
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
4295
|
-
const guidelinePaths = [];
|
|
4296
4235
|
const inputTextParts = [];
|
|
4297
4236
|
const inputSegments = await processMessages({
|
|
4298
4237
|
messages: inputMessages,
|
|
4299
4238
|
searchRoots,
|
|
4300
4239
|
repoRootPath,
|
|
4301
|
-
guidelinePatterns,
|
|
4302
|
-
guidelinePaths,
|
|
4303
4240
|
textParts: inputTextParts,
|
|
4304
4241
|
messageType: "input",
|
|
4305
4242
|
verbose
|
|
@@ -4349,40 +4286,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4349
4286
|
userFilePaths.push(segment.resolvedPath);
|
|
4350
4287
|
}
|
|
4351
4288
|
}
|
|
4352
|
-
const allFilePaths = [
|
|
4353
|
-
...guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
|
|
4354
|
-
...userFilePaths
|
|
4355
|
-
];
|
|
4356
4289
|
const testCase = {
|
|
4357
4290
|
id,
|
|
4358
|
-
|
|
4291
|
+
eval_set: evalSetName,
|
|
4359
4292
|
conversation_id: conversationId,
|
|
4360
4293
|
question,
|
|
4361
4294
|
input: inputMessages,
|
|
4362
4295
|
input_segments: inputSegments,
|
|
4363
4296
|
expected_output: outputSegments,
|
|
4364
4297
|
reference_answer: referenceAnswer,
|
|
4365
|
-
|
|
4366
|
-
guideline_patterns: guidelinePatterns,
|
|
4367
|
-
file_paths: allFilePaths,
|
|
4298
|
+
file_paths: userFilePaths,
|
|
4368
4299
|
criteria: outcome ?? "",
|
|
4369
4300
|
evaluator: evalCaseEvaluatorKind,
|
|
4370
4301
|
assertions: evaluators
|
|
4371
4302
|
};
|
|
4372
|
-
if (verbose) {
|
|
4373
|
-
console.log(`
|
|
4374
|
-
[Test: ${id}]`);
|
|
4375
|
-
if (testCase.guideline_paths.length > 0) {
|
|
4376
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
4377
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
4378
|
-
console.log(` - ${guidelinePath}`);
|
|
4379
|
-
}
|
|
4380
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4381
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
4382
|
-
} else {
|
|
4383
|
-
console.log(" No guidelines found");
|
|
4384
|
-
}
|
|
4385
|
-
}
|
|
4386
4303
|
results.push(testCase);
|
|
4387
4304
|
}
|
|
4388
4305
|
return results;
|
|
@@ -4440,30 +4357,7 @@ function parseMetadata(suite) {
|
|
|
4440
4357
|
}
|
|
4441
4358
|
|
|
4442
4359
|
// src/evaluation/formatting/prompt-builder.ts
|
|
4443
|
-
var import_promises8 = require("fs/promises");
|
|
4444
|
-
var import_node_path8 = __toESM(require("path"), 1);
|
|
4445
|
-
var ANSI_YELLOW7 = "\x1B[33m";
|
|
4446
|
-
var ANSI_RESET8 = "\x1B[0m";
|
|
4447
4360
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
4448
|
-
const guidelineParts = [];
|
|
4449
|
-
for (const rawPath of testCase.guideline_paths) {
|
|
4450
|
-
const absolutePath = import_node_path8.default.resolve(rawPath);
|
|
4451
|
-
if (!await fileExists(absolutePath)) {
|
|
4452
|
-
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
4453
|
-
continue;
|
|
4454
|
-
}
|
|
4455
|
-
try {
|
|
4456
|
-
const content = (await (0, import_promises8.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
4457
|
-
guidelineParts.push({
|
|
4458
|
-
content,
|
|
4459
|
-
isFile: true,
|
|
4460
|
-
displayPath: import_node_path8.default.basename(absolutePath)
|
|
4461
|
-
});
|
|
4462
|
-
} catch (error) {
|
|
4463
|
-
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
4464
|
-
}
|
|
4465
|
-
}
|
|
4466
|
-
const guidelines = formatFileContents(guidelineParts);
|
|
4467
4361
|
const segmentsByMessage = [];
|
|
4468
4362
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
4469
4363
|
for (const segment of testCase.input_segments) {
|
|
@@ -4488,10 +4382,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
4488
4382
|
if (type === "file") {
|
|
4489
4383
|
const value = asString5(segment.value);
|
|
4490
4384
|
if (!value) continue;
|
|
4491
|
-
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
4492
|
-
messageSegments.push({ type: "guideline_ref", path: value });
|
|
4493
|
-
continue;
|
|
4494
|
-
}
|
|
4495
4385
|
const fileText = fileContentsByPath.get(value);
|
|
4496
4386
|
if (fileText !== void 0) {
|
|
4497
4387
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
@@ -4540,10 +4430,6 @@ ${messageContent}`);
|
|
|
4540
4430
|
} else {
|
|
4541
4431
|
const questionParts = [];
|
|
4542
4432
|
for (const segment of testCase.input_segments) {
|
|
4543
|
-
if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
|
|
4544
|
-
questionParts.push(`<Attached: ${segment.path}>`);
|
|
4545
|
-
continue;
|
|
4546
|
-
}
|
|
4547
4433
|
const formattedContent = formatSegment(segment, mode);
|
|
4548
4434
|
if (formattedContent) {
|
|
4549
4435
|
questionParts.push(formattedContent);
|
|
@@ -4554,11 +4440,9 @@ ${messageContent}`);
|
|
|
4554
4440
|
const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
|
|
4555
4441
|
messages: testCase.input,
|
|
4556
4442
|
segmentsByMessage,
|
|
4557
|
-
guidelinePatterns: testCase.guideline_patterns,
|
|
4558
|
-
guidelineContent: guidelines,
|
|
4559
4443
|
mode
|
|
4560
4444
|
}) : void 0;
|
|
4561
|
-
return { question,
|
|
4445
|
+
return { question, chatPrompt };
|
|
4562
4446
|
}
|
|
4563
4447
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
4564
4448
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -4573,14 +4457,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
4573
4457
|
return messagesWithContent > 1;
|
|
4574
4458
|
}
|
|
4575
4459
|
function buildChatPromptFromSegments(options) {
|
|
4576
|
-
const {
|
|
4577
|
-
messages,
|
|
4578
|
-
segmentsByMessage,
|
|
4579
|
-
guidelinePatterns,
|
|
4580
|
-
guidelineContent,
|
|
4581
|
-
systemPrompt,
|
|
4582
|
-
mode = "lm"
|
|
4583
|
-
} = options;
|
|
4460
|
+
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
4584
4461
|
if (messages.length === 0) {
|
|
4585
4462
|
return void 0;
|
|
4586
4463
|
}
|
|
@@ -4588,11 +4465,6 @@ function buildChatPromptFromSegments(options) {
|
|
|
4588
4465
|
if (systemPrompt && systemPrompt.trim().length > 0) {
|
|
4589
4466
|
systemSegments.push(systemPrompt.trim());
|
|
4590
4467
|
}
|
|
4591
|
-
if (guidelineContent && guidelineContent.trim().length > 0) {
|
|
4592
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
4593
|
-
|
|
4594
|
-
${guidelineContent.trim()}`);
|
|
4595
|
-
}
|
|
4596
4468
|
let startIndex = 0;
|
|
4597
4469
|
while (startIndex < messages.length && messages[startIndex].role === "system") {
|
|
4598
4470
|
const segments = segmentsByMessage[startIndex];
|
|
@@ -4628,15 +4500,8 @@ ${guidelineContent.trim()}`);
|
|
|
4628
4500
|
contentParts.push("@[Tool]:");
|
|
4629
4501
|
}
|
|
4630
4502
|
for (const segment of segments) {
|
|
4631
|
-
if (segment.type === "guideline_ref") {
|
|
4632
|
-
continue;
|
|
4633
|
-
}
|
|
4634
4503
|
const formatted = formatSegment(segment, mode);
|
|
4635
4504
|
if (formatted) {
|
|
4636
|
-
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
4637
|
-
if (isGuidelineRef) {
|
|
4638
|
-
continue;
|
|
4639
|
-
}
|
|
4640
4505
|
contentParts.push(formatted);
|
|
4641
4506
|
}
|
|
4642
4507
|
}
|
|
@@ -4654,30 +4519,27 @@ ${guidelineContent.trim()}`);
|
|
|
4654
4519
|
function asString5(value) {
|
|
4655
4520
|
return typeof value === "string" ? value : void 0;
|
|
4656
4521
|
}
|
|
4657
|
-
function logWarning5(message) {
|
|
4658
|
-
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
4659
|
-
}
|
|
4660
4522
|
|
|
4661
4523
|
// src/evaluation/yaml-parser.ts
|
|
4662
|
-
var
|
|
4524
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
4663
4525
|
var ANSI_RED3 = "\x1B[31m";
|
|
4664
|
-
var
|
|
4526
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
4665
4527
|
function resolveTests(suite) {
|
|
4666
4528
|
if (suite.tests !== void 0) return suite.tests;
|
|
4667
4529
|
if (suite.eval_cases !== void 0) {
|
|
4668
|
-
|
|
4530
|
+
logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
|
|
4669
4531
|
return suite.eval_cases;
|
|
4670
4532
|
}
|
|
4671
4533
|
if (suite.evalcases !== void 0) {
|
|
4672
|
-
|
|
4534
|
+
logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
|
|
4673
4535
|
return suite.evalcases;
|
|
4674
4536
|
}
|
|
4675
4537
|
return void 0;
|
|
4676
4538
|
}
|
|
4677
4539
|
async function readTestSuiteMetadata(testFilePath) {
|
|
4678
4540
|
try {
|
|
4679
|
-
const absolutePath =
|
|
4680
|
-
const content = await (0,
|
|
4541
|
+
const absolutePath = import_node_path8.default.resolve(testFilePath);
|
|
4542
|
+
const content = await (0, import_promises8.readFile)(absolutePath, "utf8");
|
|
4681
4543
|
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4682
4544
|
if (!isJsonObject(parsed)) {
|
|
4683
4545
|
return {};
|
|
@@ -4706,6 +4568,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
4706
4568
|
tests,
|
|
4707
4569
|
trials: extractTrialsConfig(parsed),
|
|
4708
4570
|
targets: extractTargetsFromSuite(parsed),
|
|
4571
|
+
workers: extractWorkersFromSuite(parsed),
|
|
4709
4572
|
cacheConfig: extractCacheConfig(parsed),
|
|
4710
4573
|
totalBudgetUsd: extractTotalBudgetUsd(parsed),
|
|
4711
4574
|
...metadata !== void 0 && { metadata },
|
|
@@ -4728,26 +4591,25 @@ var loadEvalCases = loadTests;
|
|
|
4728
4591
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
4729
4592
|
const verbose = options?.verbose ?? false;
|
|
4730
4593
|
const filterPattern = options?.filter;
|
|
4731
|
-
const absoluteTestPath =
|
|
4594
|
+
const absoluteTestPath = import_node_path8.default.resolve(evalFilePath);
|
|
4732
4595
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
4733
4596
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
4734
4597
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
4735
|
-
const
|
|
4736
|
-
const rawFile = await (0, import_promises9.readFile)(absoluteTestPath, "utf8");
|
|
4598
|
+
const rawFile = await (0, import_promises8.readFile)(absoluteTestPath, "utf8");
|
|
4737
4599
|
const interpolated = interpolateEnv((0, import_yaml4.parse)(rawFile), process.env);
|
|
4738
4600
|
if (!isJsonObject(interpolated)) {
|
|
4739
4601
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
4740
4602
|
}
|
|
4741
4603
|
const suite = interpolated;
|
|
4742
|
-
const
|
|
4743
|
-
const
|
|
4744
|
-
const
|
|
4604
|
+
const evalSetNameFromSuite = asString6(suite.name)?.trim();
|
|
4605
|
+
const fallbackEvalSet = import_node_path8.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
4606
|
+
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
4745
4607
|
const rawTestcases = resolveTests(suite);
|
|
4746
4608
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
4747
|
-
const evalFileDir =
|
|
4609
|
+
const evalFileDir = import_node_path8.default.dirname(absoluteTestPath);
|
|
4748
4610
|
let expandedTestcases;
|
|
4749
4611
|
if (typeof rawTestcases === "string") {
|
|
4750
|
-
const externalPath =
|
|
4612
|
+
const externalPath = import_node_path8.default.resolve(evalFileDir, rawTestcases);
|
|
4751
4613
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
4752
4614
|
} else if (Array.isArray(rawTestcases)) {
|
|
4753
4615
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -4761,18 +4623,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4761
4623
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
4762
4624
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
4763
4625
|
if (suite.assert !== void 0 && suite.assertions === void 0) {
|
|
4764
|
-
|
|
4626
|
+
logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
|
|
4765
4627
|
}
|
|
4766
4628
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
4767
4629
|
const results = [];
|
|
4768
4630
|
for (const rawEvalcase of expandedTestcases) {
|
|
4769
4631
|
if (!isJsonObject(rawEvalcase)) {
|
|
4770
|
-
|
|
4632
|
+
logWarning5("Skipping invalid test entry (expected object)");
|
|
4771
4633
|
continue;
|
|
4772
4634
|
}
|
|
4773
4635
|
const evalcase = rawEvalcase;
|
|
4774
4636
|
const id = asString6(evalcase.id);
|
|
4775
|
-
if (filterPattern && (!id || !
|
|
4637
|
+
if (filterPattern && (!id || !import_micromatch2.default.isMatch(id, filterPattern))) {
|
|
4776
4638
|
continue;
|
|
4777
4639
|
}
|
|
4778
4640
|
const conversationId = asString6(evalcase.conversation_id);
|
|
@@ -4780,7 +4642,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4780
4642
|
if (!outcome && evalcase.expected_outcome !== void 0) {
|
|
4781
4643
|
outcome = asString6(evalcase.expected_outcome);
|
|
4782
4644
|
if (outcome) {
|
|
4783
|
-
|
|
4645
|
+
logWarning5(
|
|
4784
4646
|
`Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
4785
4647
|
);
|
|
4786
4648
|
}
|
|
@@ -4800,15 +4662,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4800
4662
|
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
4801
4663
|
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
4802
4664
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
4803
|
-
const guidelinePaths = [];
|
|
4804
4665
|
const inputTextParts = [];
|
|
4805
4666
|
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
4806
4667
|
messages: effectiveSuiteInputMessages,
|
|
4807
4668
|
searchRoots,
|
|
4808
4669
|
repoRootPath,
|
|
4809
|
-
guidelinePatterns,
|
|
4810
|
-
guidelinePaths,
|
|
4811
|
-
treatFileSegmentsAsGuidelines: true,
|
|
4812
4670
|
textParts: inputTextParts,
|
|
4813
4671
|
messageType: "input",
|
|
4814
4672
|
verbose
|
|
@@ -4817,8 +4675,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4817
4675
|
messages: testInputMessages,
|
|
4818
4676
|
searchRoots,
|
|
4819
4677
|
repoRootPath,
|
|
4820
|
-
guidelinePatterns,
|
|
4821
|
-
guidelinePaths,
|
|
4822
4678
|
textParts: inputTextParts,
|
|
4823
4679
|
messageType: "input",
|
|
4824
4680
|
verbose
|
|
@@ -4867,26 +4723,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4867
4723
|
userFilePaths.push(segment.resolvedPath);
|
|
4868
4724
|
}
|
|
4869
4725
|
}
|
|
4870
|
-
const allFilePaths = [
|
|
4871
|
-
...guidelinePaths.map((guidelinePath) => import_node_path9.default.resolve(guidelinePath)),
|
|
4872
|
-
...userFilePaths
|
|
4873
|
-
];
|
|
4874
4726
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
4875
4727
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
4876
4728
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
4877
4729
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
4878
4730
|
const testCase = {
|
|
4879
4731
|
id,
|
|
4880
|
-
|
|
4732
|
+
eval_set: evalSetName,
|
|
4881
4733
|
conversation_id: conversationId,
|
|
4882
4734
|
question,
|
|
4883
4735
|
input: inputMessages,
|
|
4884
4736
|
input_segments: inputSegments,
|
|
4885
4737
|
expected_output: outputSegments,
|
|
4886
4738
|
reference_answer: referenceAnswer,
|
|
4887
|
-
|
|
4888
|
-
guideline_patterns: guidelinePatterns,
|
|
4889
|
-
file_paths: allFilePaths,
|
|
4739
|
+
file_paths: userFilePaths,
|
|
4890
4740
|
criteria: outcome ?? "",
|
|
4891
4741
|
evaluator: evalCaseEvaluatorKind,
|
|
4892
4742
|
assertions: evaluators,
|
|
@@ -4894,20 +4744,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4894
4744
|
metadata,
|
|
4895
4745
|
targets: caseTargets
|
|
4896
4746
|
};
|
|
4897
|
-
if (verbose) {
|
|
4898
|
-
console.log(`
|
|
4899
|
-
[Test: ${id}]`);
|
|
4900
|
-
if (testCase.guideline_paths.length > 0) {
|
|
4901
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
4902
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
4903
|
-
console.log(` - ${guidelinePath}`);
|
|
4904
|
-
}
|
|
4905
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4906
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
4907
|
-
} else {
|
|
4908
|
-
console.log(" No guidelines found");
|
|
4909
|
-
}
|
|
4910
|
-
}
|
|
4911
4747
|
results.push(testCase);
|
|
4912
4748
|
}
|
|
4913
4749
|
return { tests: results, parsed: suite };
|
|
@@ -4926,7 +4762,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
4926
4762
|
if (!isJsonObject(raw)) return void 0;
|
|
4927
4763
|
const obj = raw;
|
|
4928
4764
|
if (obj.script !== void 0 && obj.command === void 0) {
|
|
4929
|
-
|
|
4765
|
+
logWarning5("'script' is deprecated. Use 'command' instead.");
|
|
4930
4766
|
}
|
|
4931
4767
|
const commandSource = obj.command ?? obj.script;
|
|
4932
4768
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
@@ -4934,8 +4770,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
4934
4770
|
if (commandArr.length === 0) return void 0;
|
|
4935
4771
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
4936
4772
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
4937
|
-
if (cwd && !
|
|
4938
|
-
cwd =
|
|
4773
|
+
if (cwd && !import_node_path8.default.isAbsolute(cwd)) {
|
|
4774
|
+
cwd = import_node_path8.default.resolve(evalFileDir, cwd);
|
|
4939
4775
|
}
|
|
4940
4776
|
const config = { command: commandArr };
|
|
4941
4777
|
if (timeoutMs !== void 0) {
|
|
@@ -5025,10 +4861,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
5025
4861
|
}
|
|
5026
4862
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
5027
4863
|
if (typeof raw === "string") {
|
|
5028
|
-
const workspaceFilePath =
|
|
4864
|
+
const workspaceFilePath = import_node_path8.default.resolve(evalFileDir, raw);
|
|
5029
4865
|
let content;
|
|
5030
4866
|
try {
|
|
5031
|
-
content = await (0,
|
|
4867
|
+
content = await (0, import_promises8.readFile)(workspaceFilePath, "utf8");
|
|
5032
4868
|
} catch {
|
|
5033
4869
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
5034
4870
|
}
|
|
@@ -5038,7 +4874,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
5038
4874
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
5039
4875
|
);
|
|
5040
4876
|
}
|
|
5041
|
-
const workspaceFileDir =
|
|
4877
|
+
const workspaceFileDir = import_node_path8.default.dirname(workspaceFilePath);
|
|
5042
4878
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
5043
4879
|
}
|
|
5044
4880
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -5058,8 +4894,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
5058
4894
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
5059
4895
|
}
|
|
5060
4896
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
5061
|
-
if (template && !
|
|
5062
|
-
template =
|
|
4897
|
+
if (template && !import_node_path8.default.isAbsolute(template)) {
|
|
4898
|
+
template = import_node_path8.default.resolve(evalFileDir, template);
|
|
5063
4899
|
}
|
|
5064
4900
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
5065
4901
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -5109,28 +4945,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
5109
4945
|
function asString6(value) {
|
|
5110
4946
|
return typeof value === "string" ? value : void 0;
|
|
5111
4947
|
}
|
|
5112
|
-
function
|
|
4948
|
+
function logWarning5(message, details) {
|
|
5113
4949
|
if (details && details.length > 0) {
|
|
5114
4950
|
const detailBlock = details.join("\n");
|
|
5115
|
-
console.warn(`${
|
|
5116
|
-
${detailBlock}${
|
|
4951
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}
|
|
4952
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
5117
4953
|
} else {
|
|
5118
|
-
console.warn(`${
|
|
4954
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
5119
4955
|
}
|
|
5120
4956
|
}
|
|
5121
4957
|
function logError3(message, details) {
|
|
5122
4958
|
if (details && details.length > 0) {
|
|
5123
4959
|
const detailBlock = details.join("\n");
|
|
5124
4960
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
5125
|
-
${detailBlock}${
|
|
4961
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
5126
4962
|
} else {
|
|
5127
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
4963
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET8}`);
|
|
5128
4964
|
}
|
|
5129
4965
|
}
|
|
5130
4966
|
|
|
5131
4967
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
5132
4968
|
var import_node_fs2 = require("fs");
|
|
5133
|
-
var
|
|
4969
|
+
var import_node_path9 = __toESM(require("path"), 1);
|
|
5134
4970
|
var import_yaml5 = require("yaml");
|
|
5135
4971
|
function codeGraderInstruction(graderName, description) {
|
|
5136
4972
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -5375,7 +5211,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
5375
5211
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
5376
5212
|
const content = (0, import_node_fs2.readFileSync)(evalYamlPath, "utf8");
|
|
5377
5213
|
const parsed = (0, import_yaml5.parse)(content);
|
|
5378
|
-
return transpileEvalYaml(parsed,
|
|
5214
|
+
return transpileEvalYaml(parsed, import_node_path9.default.basename(evalYamlPath));
|
|
5379
5215
|
}
|
|
5380
5216
|
function getOutputFilenames(result) {
|
|
5381
5217
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -5394,11 +5230,11 @@ function getOutputFilenames(result) {
|
|
|
5394
5230
|
|
|
5395
5231
|
// src/evaluation/file-utils.ts
|
|
5396
5232
|
var import_node_fs3 = require("fs");
|
|
5397
|
-
var
|
|
5398
|
-
var
|
|
5233
|
+
var import_promises9 = require("fs/promises");
|
|
5234
|
+
var import_node_path10 = __toESM(require("path"), 1);
|
|
5399
5235
|
async function fileExists2(filePath) {
|
|
5400
5236
|
try {
|
|
5401
|
-
await (0,
|
|
5237
|
+
await (0, import_promises9.access)(filePath, import_node_fs3.constants.F_OK);
|
|
5402
5238
|
return true;
|
|
5403
5239
|
} catch {
|
|
5404
5240
|
return false;
|
|
@@ -5408,22 +5244,22 @@ function normalizeLineEndings(content) {
|
|
|
5408
5244
|
return content.replace(/\r\n/g, "\n");
|
|
5409
5245
|
}
|
|
5410
5246
|
async function readTextFile(filePath) {
|
|
5411
|
-
const content = await (0,
|
|
5247
|
+
const content = await (0, import_promises9.readFile)(filePath, "utf8");
|
|
5412
5248
|
return normalizeLineEndings(content);
|
|
5413
5249
|
}
|
|
5414
5250
|
async function readJsonFile(filePath) {
|
|
5415
|
-
const content = await (0,
|
|
5251
|
+
const content = await (0, import_promises9.readFile)(filePath, "utf8");
|
|
5416
5252
|
return JSON.parse(content);
|
|
5417
5253
|
}
|
|
5418
5254
|
async function findGitRoot(startPath) {
|
|
5419
|
-
let currentDir =
|
|
5420
|
-
const root =
|
|
5255
|
+
let currentDir = import_node_path10.default.dirname(import_node_path10.default.resolve(startPath));
|
|
5256
|
+
const root = import_node_path10.default.parse(currentDir).root;
|
|
5421
5257
|
while (currentDir !== root) {
|
|
5422
|
-
const gitPath =
|
|
5258
|
+
const gitPath = import_node_path10.default.join(currentDir, ".git");
|
|
5423
5259
|
if (await fileExists2(gitPath)) {
|
|
5424
5260
|
return currentDir;
|
|
5425
5261
|
}
|
|
5426
|
-
const parentDir =
|
|
5262
|
+
const parentDir = import_node_path10.default.dirname(currentDir);
|
|
5427
5263
|
if (parentDir === currentDir) {
|
|
5428
5264
|
break;
|
|
5429
5265
|
}
|
|
@@ -5434,8 +5270,8 @@ async function findGitRoot(startPath) {
|
|
|
5434
5270
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
5435
5271
|
const directories = [];
|
|
5436
5272
|
const seen = /* @__PURE__ */ new Set();
|
|
5437
|
-
const boundary =
|
|
5438
|
-
let current =
|
|
5273
|
+
const boundary = import_node_path10.default.resolve(repoRoot);
|
|
5274
|
+
let current = import_node_path10.default.resolve(import_node_path10.default.dirname(filePath));
|
|
5439
5275
|
while (current !== void 0) {
|
|
5440
5276
|
if (!seen.has(current)) {
|
|
5441
5277
|
directories.push(current);
|
|
@@ -5444,7 +5280,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
5444
5280
|
if (current === boundary) {
|
|
5445
5281
|
break;
|
|
5446
5282
|
}
|
|
5447
|
-
const parent =
|
|
5283
|
+
const parent = import_node_path10.default.dirname(current);
|
|
5448
5284
|
if (parent === current) {
|
|
5449
5285
|
break;
|
|
5450
5286
|
}
|
|
@@ -5458,16 +5294,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
5458
5294
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
5459
5295
|
const uniqueRoots = [];
|
|
5460
5296
|
const addRoot = (root) => {
|
|
5461
|
-
const normalized =
|
|
5297
|
+
const normalized = import_node_path10.default.resolve(root);
|
|
5462
5298
|
if (!uniqueRoots.includes(normalized)) {
|
|
5463
5299
|
uniqueRoots.push(normalized);
|
|
5464
5300
|
}
|
|
5465
5301
|
};
|
|
5466
|
-
let currentDir =
|
|
5302
|
+
let currentDir = import_node_path10.default.dirname(evalPath);
|
|
5467
5303
|
let reachedBoundary = false;
|
|
5468
5304
|
while (!reachedBoundary) {
|
|
5469
5305
|
addRoot(currentDir);
|
|
5470
|
-
const parentDir =
|
|
5306
|
+
const parentDir = import_node_path10.default.dirname(currentDir);
|
|
5471
5307
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
5472
5308
|
reachedBoundary = true;
|
|
5473
5309
|
} else {
|
|
@@ -5485,16 +5321,16 @@ function trimLeadingSeparators2(value) {
|
|
|
5485
5321
|
async function resolveFileReference3(rawValue, searchRoots) {
|
|
5486
5322
|
const displayPath = trimLeadingSeparators2(rawValue);
|
|
5487
5323
|
const potentialPaths = [];
|
|
5488
|
-
if (
|
|
5489
|
-
potentialPaths.push(
|
|
5324
|
+
if (import_node_path10.default.isAbsolute(rawValue)) {
|
|
5325
|
+
potentialPaths.push(import_node_path10.default.normalize(rawValue));
|
|
5490
5326
|
}
|
|
5491
5327
|
for (const base of searchRoots) {
|
|
5492
|
-
potentialPaths.push(
|
|
5328
|
+
potentialPaths.push(import_node_path10.default.resolve(base, displayPath));
|
|
5493
5329
|
}
|
|
5494
5330
|
const attempted = [];
|
|
5495
5331
|
const seen = /* @__PURE__ */ new Set();
|
|
5496
5332
|
for (const candidate of potentialPaths) {
|
|
5497
|
-
const absoluteCandidate =
|
|
5333
|
+
const absoluteCandidate = import_node_path10.default.resolve(candidate);
|
|
5498
5334
|
if (seen.has(absoluteCandidate)) {
|
|
5499
5335
|
continue;
|
|
5500
5336
|
}
|
|
@@ -5728,10 +5564,10 @@ function buildChatPrompt(request) {
|
|
|
5728
5564
|
if (hasSystemMessage) {
|
|
5729
5565
|
return provided;
|
|
5730
5566
|
}
|
|
5731
|
-
const systemContent2 = resolveSystemContent(request
|
|
5567
|
+
const systemContent2 = resolveSystemContent(request);
|
|
5732
5568
|
return [{ role: "system", content: systemContent2 }, ...provided];
|
|
5733
5569
|
}
|
|
5734
|
-
const systemContent = resolveSystemContent(request
|
|
5570
|
+
const systemContent = resolveSystemContent(request);
|
|
5735
5571
|
const userContent = request.question.trim();
|
|
5736
5572
|
const prompt = [
|
|
5737
5573
|
{ role: "system", content: systemContent },
|
|
@@ -5739,18 +5575,13 @@ function buildChatPrompt(request) {
|
|
|
5739
5575
|
];
|
|
5740
5576
|
return prompt;
|
|
5741
5577
|
}
|
|
5742
|
-
function resolveSystemContent(request
|
|
5578
|
+
function resolveSystemContent(request) {
|
|
5743
5579
|
const systemSegments = [];
|
|
5744
5580
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
5745
5581
|
systemSegments.push(request.systemPrompt.trim());
|
|
5746
5582
|
} else {
|
|
5747
5583
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
5748
5584
|
}
|
|
5749
|
-
if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
|
|
5750
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
5751
|
-
|
|
5752
|
-
${request.guidelines.trim()}`);
|
|
5753
|
-
}
|
|
5754
5585
|
return systemSegments.join("\n\n");
|
|
5755
5586
|
}
|
|
5756
5587
|
function toModelMessages(chatPrompt) {
|
|
@@ -5933,8 +5764,8 @@ async function withRetry(fn, retryConfig, signal) {
|
|
|
5933
5764
|
var import_node_child_process = require("child_process");
|
|
5934
5765
|
var import_node_crypto = require("crypto");
|
|
5935
5766
|
var import_node_fs4 = require("fs");
|
|
5936
|
-
var
|
|
5937
|
-
var
|
|
5767
|
+
var import_promises10 = require("fs/promises");
|
|
5768
|
+
var import_node_path12 = __toESM(require("path"), 1);
|
|
5938
5769
|
|
|
5939
5770
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
5940
5771
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
@@ -5990,17 +5821,11 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
5990
5821
|
}
|
|
5991
5822
|
|
|
5992
5823
|
// src/evaluation/providers/preread.ts
|
|
5993
|
-
var
|
|
5994
|
-
function buildPromptDocument(request, inputFiles
|
|
5824
|
+
var import_node_path11 = __toESM(require("path"), 1);
|
|
5825
|
+
function buildPromptDocument(request, inputFiles) {
|
|
5995
5826
|
const parts = [];
|
|
5996
|
-
const guidelineFiles = collectGuidelineFiles(
|
|
5997
|
-
inputFiles,
|
|
5998
|
-
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
5999
|
-
options?.guidelineOverrides
|
|
6000
|
-
);
|
|
6001
5827
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
6002
|
-
const
|
|
6003
|
-
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
5828
|
+
const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
|
|
6004
5829
|
if (prereadBlock.length > 0) {
|
|
6005
5830
|
parts.push("\n", prereadBlock);
|
|
6006
5831
|
}
|
|
@@ -6013,62 +5838,36 @@ function normalizeInputFiles(inputFiles) {
|
|
|
6013
5838
|
}
|
|
6014
5839
|
const deduped = /* @__PURE__ */ new Map();
|
|
6015
5840
|
for (const inputFile of inputFiles) {
|
|
6016
|
-
const absolutePath =
|
|
5841
|
+
const absolutePath = import_node_path11.default.resolve(inputFile);
|
|
6017
5842
|
if (!deduped.has(absolutePath)) {
|
|
6018
5843
|
deduped.set(absolutePath, absolutePath);
|
|
6019
5844
|
}
|
|
6020
5845
|
}
|
|
6021
5846
|
return Array.from(deduped.values());
|
|
6022
5847
|
}
|
|
6023
|
-
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
6024
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
6025
|
-
return [];
|
|
6026
|
-
}
|
|
6027
|
-
const unique = /* @__PURE__ */ new Map();
|
|
6028
|
-
for (const inputFile of inputFiles) {
|
|
6029
|
-
const absolutePath = import_node_path12.default.resolve(inputFile);
|
|
6030
|
-
if (overrides?.has(absolutePath)) {
|
|
6031
|
-
if (!unique.has(absolutePath)) {
|
|
6032
|
-
unique.set(absolutePath, absolutePath);
|
|
6033
|
-
}
|
|
6034
|
-
continue;
|
|
6035
|
-
}
|
|
6036
|
-
const normalized = absolutePath.split(import_node_path12.default.sep).join("/");
|
|
6037
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
6038
|
-
if (!unique.has(absolutePath)) {
|
|
6039
|
-
unique.set(absolutePath, absolutePath);
|
|
6040
|
-
}
|
|
6041
|
-
}
|
|
6042
|
-
}
|
|
6043
|
-
return Array.from(unique.values());
|
|
6044
|
-
}
|
|
6045
5848
|
function collectInputFiles(inputFiles) {
|
|
6046
5849
|
if (!inputFiles || inputFiles.length === 0) {
|
|
6047
5850
|
return [];
|
|
6048
5851
|
}
|
|
6049
5852
|
const unique = /* @__PURE__ */ new Map();
|
|
6050
5853
|
for (const inputFile of inputFiles) {
|
|
6051
|
-
const absolutePath =
|
|
5854
|
+
const absolutePath = import_node_path11.default.resolve(inputFile);
|
|
6052
5855
|
if (!unique.has(absolutePath)) {
|
|
6053
5856
|
unique.set(absolutePath, absolutePath);
|
|
6054
5857
|
}
|
|
6055
5858
|
}
|
|
6056
5859
|
return Array.from(unique.values());
|
|
6057
5860
|
}
|
|
6058
|
-
function buildMandatoryPrereadBlock(
|
|
6059
|
-
if (
|
|
5861
|
+
function buildMandatoryPrereadBlock(inputFiles) {
|
|
5862
|
+
if (inputFiles.length === 0) {
|
|
6060
5863
|
return "";
|
|
6061
5864
|
}
|
|
6062
5865
|
const buildList = (files) => files.map((absolutePath) => {
|
|
6063
|
-
const fileName =
|
|
5866
|
+
const fileName = import_node_path11.default.basename(absolutePath);
|
|
6064
5867
|
const fileUri = pathToFileUri(absolutePath);
|
|
6065
5868
|
return `* [${fileName}](${fileUri})`;
|
|
6066
5869
|
});
|
|
6067
5870
|
const sections = [];
|
|
6068
|
-
if (guidelineFiles.length > 0) {
|
|
6069
|
-
sections.push(`Read all guideline files:
|
|
6070
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
6071
|
-
}
|
|
6072
5871
|
if (inputFiles.length > 0) {
|
|
6073
5872
|
sections.push(`Read all input files:
|
|
6074
5873
|
${buildList(inputFiles).join("\n")}.`);
|
|
@@ -6080,7 +5879,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
6080
5879
|
return sections.join("\n");
|
|
6081
5880
|
}
|
|
6082
5881
|
function pathToFileUri(filePath) {
|
|
6083
|
-
const absolutePath =
|
|
5882
|
+
const absolutePath = import_node_path11.default.isAbsolute(filePath) ? filePath : import_node_path11.default.resolve(filePath);
|
|
6084
5883
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
6085
5884
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
6086
5885
|
return `file:///${normalizedPath}`;
|
|
@@ -6227,10 +6026,10 @@ var ClaudeCliProvider = class {
|
|
|
6227
6026
|
}
|
|
6228
6027
|
resolveCwd(cwdOverride) {
|
|
6229
6028
|
if (cwdOverride) {
|
|
6230
|
-
return
|
|
6029
|
+
return import_node_path12.default.resolve(cwdOverride);
|
|
6231
6030
|
}
|
|
6232
6031
|
if (this.config.cwd) {
|
|
6233
|
-
return
|
|
6032
|
+
return import_node_path12.default.resolve(this.config.cwd);
|
|
6234
6033
|
}
|
|
6235
6034
|
return void 0;
|
|
6236
6035
|
}
|
|
@@ -6240,9 +6039,9 @@ var ClaudeCliProvider = class {
|
|
|
6240
6039
|
return void 0;
|
|
6241
6040
|
}
|
|
6242
6041
|
if (this.config.logDir) {
|
|
6243
|
-
return
|
|
6042
|
+
return import_node_path12.default.resolve(this.config.logDir);
|
|
6244
6043
|
}
|
|
6245
|
-
return
|
|
6044
|
+
return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
6246
6045
|
}
|
|
6247
6046
|
async createStreamLogger(request) {
|
|
6248
6047
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6250,13 +6049,13 @@ var ClaudeCliProvider = class {
|
|
|
6250
6049
|
return void 0;
|
|
6251
6050
|
}
|
|
6252
6051
|
try {
|
|
6253
|
-
await (0,
|
|
6052
|
+
await (0, import_promises10.mkdir)(logDir, { recursive: true });
|
|
6254
6053
|
} catch (error) {
|
|
6255
6054
|
const message = error instanceof Error ? error.message : String(error);
|
|
6256
6055
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6257
6056
|
return void 0;
|
|
6258
6057
|
}
|
|
6259
|
-
const filePath =
|
|
6058
|
+
const filePath = import_node_path12.default.join(logDir, buildLogFilename(request, this.targetName));
|
|
6260
6059
|
try {
|
|
6261
6060
|
const logger = await ClaudeCliStreamLogger.create({
|
|
6262
6061
|
filePath,
|
|
@@ -6576,8 +6375,8 @@ function tryParseJson(line) {
|
|
|
6576
6375
|
// src/evaluation/providers/claude-sdk.ts
|
|
6577
6376
|
var import_node_crypto2 = require("crypto");
|
|
6578
6377
|
var import_node_fs5 = require("fs");
|
|
6579
|
-
var
|
|
6580
|
-
var
|
|
6378
|
+
var import_promises11 = require("fs/promises");
|
|
6379
|
+
var import_node_path13 = __toESM(require("path"), 1);
|
|
6581
6380
|
var claudeSdkModule = null;
|
|
6582
6381
|
async function loadClaudeSdk() {
|
|
6583
6382
|
if (!claudeSdkModule) {
|
|
@@ -6737,10 +6536,10 @@ var ClaudeSdkProvider = class {
|
|
|
6737
6536
|
}
|
|
6738
6537
|
resolveCwd(cwdOverride) {
|
|
6739
6538
|
if (cwdOverride) {
|
|
6740
|
-
return
|
|
6539
|
+
return import_node_path13.default.resolve(cwdOverride);
|
|
6741
6540
|
}
|
|
6742
6541
|
if (this.config.cwd) {
|
|
6743
|
-
return
|
|
6542
|
+
return import_node_path13.default.resolve(this.config.cwd);
|
|
6744
6543
|
}
|
|
6745
6544
|
return void 0;
|
|
6746
6545
|
}
|
|
@@ -6750,9 +6549,9 @@ var ClaudeSdkProvider = class {
|
|
|
6750
6549
|
return void 0;
|
|
6751
6550
|
}
|
|
6752
6551
|
if (this.config.logDir) {
|
|
6753
|
-
return
|
|
6552
|
+
return import_node_path13.default.resolve(this.config.logDir);
|
|
6754
6553
|
}
|
|
6755
|
-
return
|
|
6554
|
+
return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "claude");
|
|
6756
6555
|
}
|
|
6757
6556
|
async createStreamLogger(request) {
|
|
6758
6557
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6760,13 +6559,13 @@ var ClaudeSdkProvider = class {
|
|
|
6760
6559
|
return void 0;
|
|
6761
6560
|
}
|
|
6762
6561
|
try {
|
|
6763
|
-
await (0,
|
|
6562
|
+
await (0, import_promises11.mkdir)(logDir, { recursive: true });
|
|
6764
6563
|
} catch (error) {
|
|
6765
6564
|
const message = error instanceof Error ? error.message : String(error);
|
|
6766
6565
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
6767
6566
|
return void 0;
|
|
6768
6567
|
}
|
|
6769
|
-
const filePath =
|
|
6568
|
+
const filePath = import_node_path13.default.join(logDir, buildLogFilename2(request, this.targetName));
|
|
6770
6569
|
try {
|
|
6771
6570
|
const logger = await ClaudeStreamLogger.create({
|
|
6772
6571
|
filePath,
|
|
@@ -6971,9 +6770,9 @@ function formatElapsed2(startedAt) {
|
|
|
6971
6770
|
|
|
6972
6771
|
// src/evaluation/providers/cli.ts
|
|
6973
6772
|
var import_node_child_process2 = require("child_process");
|
|
6974
|
-
var
|
|
6773
|
+
var import_promises12 = __toESM(require("fs/promises"), 1);
|
|
6975
6774
|
var import_node_os = __toESM(require("os"), 1);
|
|
6976
|
-
var
|
|
6775
|
+
var import_node_path14 = __toESM(require("path"), 1);
|
|
6977
6776
|
var import_node_util = require("util");
|
|
6978
6777
|
var import_zod2 = require("zod");
|
|
6979
6778
|
var ToolCallSchema = import_zod2.z.object({
|
|
@@ -7182,7 +6981,6 @@ var CliProvider = class {
|
|
|
7182
6981
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
7183
6982
|
{
|
|
7184
6983
|
question: "",
|
|
7185
|
-
guidelines: "",
|
|
7186
6984
|
inputFiles: batchInputFiles,
|
|
7187
6985
|
evalCaseId: "batch",
|
|
7188
6986
|
attempt: 0
|
|
@@ -7370,7 +7168,7 @@ var CliProvider = class {
|
|
|
7370
7168
|
throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
|
|
7371
7169
|
} finally {
|
|
7372
7170
|
if (!this.keepTempFiles) {
|
|
7373
|
-
await
|
|
7171
|
+
await import_promises12.default.unlink(filePath).catch(() => {
|
|
7374
7172
|
});
|
|
7375
7173
|
}
|
|
7376
7174
|
}
|
|
@@ -7415,7 +7213,6 @@ var CliProvider = class {
|
|
|
7415
7213
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
7416
7214
|
{
|
|
7417
7215
|
question: "",
|
|
7418
|
-
guidelines: "",
|
|
7419
7216
|
inputFiles: [],
|
|
7420
7217
|
evalCaseId: "healthcheck",
|
|
7421
7218
|
attempt: 0
|
|
@@ -7451,12 +7248,11 @@ var CliProvider = class {
|
|
|
7451
7248
|
async function buildTemplateValues(request, config, outputFilePath) {
|
|
7452
7249
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
7453
7250
|
const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
|
|
7454
|
-
await
|
|
7251
|
+
await import_promises12.default.writeFile(promptFilePath, request.question ?? "", "utf8");
|
|
7455
7252
|
return {
|
|
7456
7253
|
values: {
|
|
7457
7254
|
PROMPT: shellEscape(request.question ?? ""),
|
|
7458
7255
|
PROMPT_FILE: shellEscape(promptFilePath),
|
|
7459
|
-
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
7460
7256
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
7461
7257
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
7462
7258
|
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
@@ -7469,7 +7265,7 @@ async function cleanupTempFile(filePath, keepTempFiles) {
|
|
|
7469
7265
|
if (!filePath || keepTempFiles) {
|
|
7470
7266
|
return;
|
|
7471
7267
|
}
|
|
7472
|
-
await
|
|
7268
|
+
await import_promises12.default.unlink(filePath).catch(() => {
|
|
7473
7269
|
});
|
|
7474
7270
|
}
|
|
7475
7271
|
function normalizeInputFiles2(inputFiles) {
|
|
@@ -7478,7 +7274,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
7478
7274
|
}
|
|
7479
7275
|
const unique = /* @__PURE__ */ new Map();
|
|
7480
7276
|
for (const inputFile of inputFiles) {
|
|
7481
|
-
const absolutePath =
|
|
7277
|
+
const absolutePath = import_node_path14.default.resolve(inputFile);
|
|
7482
7278
|
if (!unique.has(absolutePath)) {
|
|
7483
7279
|
unique.set(absolutePath, absolutePath);
|
|
7484
7280
|
}
|
|
@@ -7492,7 +7288,7 @@ function formatFileList(files, template) {
|
|
|
7492
7288
|
const formatter = template ?? "{path}";
|
|
7493
7289
|
return files.map((filePath) => {
|
|
7494
7290
|
const escapedPath = shellEscape(filePath);
|
|
7495
|
-
const escapedName = shellEscape(
|
|
7291
|
+
const escapedName = shellEscape(import_node_path14.default.basename(filePath));
|
|
7496
7292
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
7497
7293
|
}).join(" ");
|
|
7498
7294
|
}
|
|
@@ -7516,7 +7312,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
7516
7312
|
const safeEvalId = evalCaseId || "unknown";
|
|
7517
7313
|
const timestamp = Date.now();
|
|
7518
7314
|
const random = Math.random().toString(36).substring(2, 9);
|
|
7519
|
-
return
|
|
7315
|
+
return import_node_path14.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
7520
7316
|
}
|
|
7521
7317
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
7522
7318
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -7529,8 +7325,8 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
7529
7325
|
// src/evaluation/providers/codex.ts
|
|
7530
7326
|
var import_node_crypto3 = require("crypto");
|
|
7531
7327
|
var import_node_fs6 = require("fs");
|
|
7532
|
-
var
|
|
7533
|
-
var
|
|
7328
|
+
var import_promises13 = require("fs/promises");
|
|
7329
|
+
var import_node_path15 = __toESM(require("path"), 1);
|
|
7534
7330
|
|
|
7535
7331
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
7536
7332
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -7765,10 +7561,10 @@ ${basePrompt}` : basePrompt;
|
|
|
7765
7561
|
}
|
|
7766
7562
|
resolveCwd(cwdOverride) {
|
|
7767
7563
|
if (cwdOverride) {
|
|
7768
|
-
return
|
|
7564
|
+
return import_node_path15.default.resolve(cwdOverride);
|
|
7769
7565
|
}
|
|
7770
7566
|
if (this.config.cwd) {
|
|
7771
|
-
return
|
|
7567
|
+
return import_node_path15.default.resolve(this.config.cwd);
|
|
7772
7568
|
}
|
|
7773
7569
|
return void 0;
|
|
7774
7570
|
}
|
|
@@ -7778,9 +7574,9 @@ ${basePrompt}` : basePrompt;
|
|
|
7778
7574
|
return void 0;
|
|
7779
7575
|
}
|
|
7780
7576
|
if (this.config.logDir) {
|
|
7781
|
-
return
|
|
7577
|
+
return import_node_path15.default.resolve(this.config.logDir);
|
|
7782
7578
|
}
|
|
7783
|
-
return
|
|
7579
|
+
return import_node_path15.default.join(process.cwd(), ".agentv", "logs", "codex");
|
|
7784
7580
|
}
|
|
7785
7581
|
async createStreamLogger(request) {
|
|
7786
7582
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7788,13 +7584,13 @@ ${basePrompt}` : basePrompt;
|
|
|
7788
7584
|
return void 0;
|
|
7789
7585
|
}
|
|
7790
7586
|
try {
|
|
7791
|
-
await (0,
|
|
7587
|
+
await (0, import_promises13.mkdir)(logDir, { recursive: true });
|
|
7792
7588
|
} catch (error) {
|
|
7793
7589
|
const message = error instanceof Error ? error.message : String(error);
|
|
7794
7590
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7795
7591
|
return void 0;
|
|
7796
7592
|
}
|
|
7797
|
-
const filePath =
|
|
7593
|
+
const filePath = import_node_path15.default.join(logDir, buildLogFilename3(request, this.targetName));
|
|
7798
7594
|
try {
|
|
7799
7595
|
const logger = await CodexSdkStreamLogger.create({
|
|
7800
7596
|
filePath,
|
|
@@ -7937,8 +7733,8 @@ function formatElapsed3(startedAt) {
|
|
|
7937
7733
|
|
|
7938
7734
|
// src/evaluation/providers/copilot-cli.ts
|
|
7939
7735
|
var import_node_crypto5 = require("crypto");
|
|
7940
|
-
var
|
|
7941
|
-
var
|
|
7736
|
+
var import_promises14 = require("fs/promises");
|
|
7737
|
+
var import_node_path17 = __toESM(require("path"), 1);
|
|
7942
7738
|
var import_node_stream = require("stream");
|
|
7943
7739
|
var import_node_child_process3 = require("child_process");
|
|
7944
7740
|
var acp = __toESM(require("@agentclientprotocol/sdk"), 1);
|
|
@@ -8000,7 +7796,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
8000
7796
|
var import_node_crypto4 = require("crypto");
|
|
8001
7797
|
var import_node_fs7 = require("fs");
|
|
8002
7798
|
var import_node_os2 = require("os");
|
|
8003
|
-
var
|
|
7799
|
+
var import_node_path16 = __toESM(require("path"), 1);
|
|
8004
7800
|
var import_node_url2 = require("url");
|
|
8005
7801
|
var import_meta = {};
|
|
8006
7802
|
function resolvePlatformCliPath() {
|
|
@@ -8025,7 +7821,7 @@ function resolvePlatformCliPath() {
|
|
|
8025
7821
|
try {
|
|
8026
7822
|
const resolved = import_meta.resolve(`${packageName}/package.json`);
|
|
8027
7823
|
const packageJsonPath = resolved.startsWith("file:") ? (0, import_node_url2.fileURLToPath)(resolved) : resolved;
|
|
8028
|
-
const binaryPath =
|
|
7824
|
+
const binaryPath = import_node_path16.default.join(import_node_path16.default.dirname(packageJsonPath), binaryName);
|
|
8029
7825
|
if ((0, import_node_fs7.existsSync)(binaryPath)) {
|
|
8030
7826
|
return binaryPath;
|
|
8031
7827
|
}
|
|
@@ -8033,7 +7829,7 @@ function resolvePlatformCliPath() {
|
|
|
8033
7829
|
}
|
|
8034
7830
|
let searchDir = process.cwd();
|
|
8035
7831
|
for (let i = 0; i < 10; i++) {
|
|
8036
|
-
const standardPath =
|
|
7832
|
+
const standardPath = import_node_path16.default.join(
|
|
8037
7833
|
searchDir,
|
|
8038
7834
|
"node_modules",
|
|
8039
7835
|
...packageName.split("/"),
|
|
@@ -8042,13 +7838,13 @@ function resolvePlatformCliPath() {
|
|
|
8042
7838
|
if ((0, import_node_fs7.existsSync)(standardPath)) {
|
|
8043
7839
|
return standardPath;
|
|
8044
7840
|
}
|
|
8045
|
-
const bunDir =
|
|
7841
|
+
const bunDir = import_node_path16.default.join(searchDir, "node_modules", ".bun");
|
|
8046
7842
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
8047
7843
|
try {
|
|
8048
7844
|
const entries = (0, import_node_fs7.readdirSync)(bunDir);
|
|
8049
7845
|
for (const entry of entries) {
|
|
8050
7846
|
if (entry.startsWith(prefix)) {
|
|
8051
|
-
const candidate =
|
|
7847
|
+
const candidate = import_node_path16.default.join(
|
|
8052
7848
|
bunDir,
|
|
8053
7849
|
entry,
|
|
8054
7850
|
"node_modules",
|
|
@@ -8063,7 +7859,7 @@ function resolvePlatformCliPath() {
|
|
|
8063
7859
|
}
|
|
8064
7860
|
} catch {
|
|
8065
7861
|
}
|
|
8066
|
-
const parent =
|
|
7862
|
+
const parent = import_node_path16.default.dirname(searchDir);
|
|
8067
7863
|
if (parent === searchDir) break;
|
|
8068
7864
|
searchDir = parent;
|
|
8069
7865
|
}
|
|
@@ -8401,10 +8197,10 @@ var CopilotCliProvider = class {
|
|
|
8401
8197
|
}
|
|
8402
8198
|
resolveCwd(cwdOverride) {
|
|
8403
8199
|
if (cwdOverride) {
|
|
8404
|
-
return
|
|
8200
|
+
return import_node_path17.default.resolve(cwdOverride);
|
|
8405
8201
|
}
|
|
8406
8202
|
if (this.config.cwd) {
|
|
8407
|
-
return
|
|
8203
|
+
return import_node_path17.default.resolve(this.config.cwd);
|
|
8408
8204
|
}
|
|
8409
8205
|
return void 0;
|
|
8410
8206
|
}
|
|
@@ -8423,9 +8219,9 @@ var CopilotCliProvider = class {
|
|
|
8423
8219
|
return void 0;
|
|
8424
8220
|
}
|
|
8425
8221
|
if (this.config.logDir) {
|
|
8426
|
-
return
|
|
8222
|
+
return import_node_path17.default.resolve(this.config.logDir);
|
|
8427
8223
|
}
|
|
8428
|
-
return
|
|
8224
|
+
return import_node_path17.default.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
8429
8225
|
}
|
|
8430
8226
|
async createStreamLogger(request) {
|
|
8431
8227
|
const logDir = this.resolveLogDirectory();
|
|
@@ -8433,13 +8229,13 @@ var CopilotCliProvider = class {
|
|
|
8433
8229
|
return void 0;
|
|
8434
8230
|
}
|
|
8435
8231
|
try {
|
|
8436
|
-
await (0,
|
|
8232
|
+
await (0, import_promises14.mkdir)(logDir, { recursive: true });
|
|
8437
8233
|
} catch (error) {
|
|
8438
8234
|
const message = error instanceof Error ? error.message : String(error);
|
|
8439
8235
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
8440
8236
|
return void 0;
|
|
8441
8237
|
}
|
|
8442
|
-
const filePath =
|
|
8238
|
+
const filePath = import_node_path17.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
8443
8239
|
try {
|
|
8444
8240
|
const logger = await CopilotStreamLogger.create(
|
|
8445
8241
|
{
|
|
@@ -8533,8 +8329,8 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
8533
8329
|
|
|
8534
8330
|
// src/evaluation/providers/copilot-sdk.ts
|
|
8535
8331
|
var import_node_crypto6 = require("crypto");
|
|
8536
|
-
var
|
|
8537
|
-
var
|
|
8332
|
+
var import_promises15 = require("fs/promises");
|
|
8333
|
+
var import_node_path18 = __toESM(require("path"), 1);
|
|
8538
8334
|
|
|
8539
8335
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
8540
8336
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -8813,10 +8609,10 @@ var CopilotSdkProvider = class {
|
|
|
8813
8609
|
}
|
|
8814
8610
|
resolveCwd(cwdOverride) {
|
|
8815
8611
|
if (cwdOverride) {
|
|
8816
|
-
return
|
|
8612
|
+
return import_node_path18.default.resolve(cwdOverride);
|
|
8817
8613
|
}
|
|
8818
8614
|
if (this.config.cwd) {
|
|
8819
|
-
return
|
|
8615
|
+
return import_node_path18.default.resolve(this.config.cwd);
|
|
8820
8616
|
}
|
|
8821
8617
|
return void 0;
|
|
8822
8618
|
}
|
|
@@ -8825,9 +8621,9 @@ var CopilotSdkProvider = class {
|
|
|
8825
8621
|
return void 0;
|
|
8826
8622
|
}
|
|
8827
8623
|
if (this.config.logDir) {
|
|
8828
|
-
return
|
|
8624
|
+
return import_node_path18.default.resolve(this.config.logDir);
|
|
8829
8625
|
}
|
|
8830
|
-
return
|
|
8626
|
+
return import_node_path18.default.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
8831
8627
|
}
|
|
8832
8628
|
async createStreamLogger(request) {
|
|
8833
8629
|
const logDir = this.resolveLogDirectory();
|
|
@@ -8835,13 +8631,13 @@ var CopilotSdkProvider = class {
|
|
|
8835
8631
|
return void 0;
|
|
8836
8632
|
}
|
|
8837
8633
|
try {
|
|
8838
|
-
await (0,
|
|
8634
|
+
await (0, import_promises15.mkdir)(logDir, { recursive: true });
|
|
8839
8635
|
} catch (error) {
|
|
8840
8636
|
const message = error instanceof Error ? error.message : String(error);
|
|
8841
8637
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
8842
8638
|
return void 0;
|
|
8843
8639
|
}
|
|
8844
|
-
const filePath =
|
|
8640
|
+
const filePath = import_node_path18.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
8845
8641
|
try {
|
|
8846
8642
|
const logger = await CopilotStreamLogger.create(
|
|
8847
8643
|
{
|
|
@@ -8918,8 +8714,7 @@ var MockProvider = class {
|
|
|
8918
8714
|
return {
|
|
8919
8715
|
output: [{ role: "assistant", content: this.cannedResponse }],
|
|
8920
8716
|
raw: {
|
|
8921
|
-
question: request.question
|
|
8922
|
-
guidelines: request.guidelines
|
|
8717
|
+
question: request.question
|
|
8923
8718
|
}
|
|
8924
8719
|
};
|
|
8925
8720
|
}
|
|
@@ -9195,9 +8990,9 @@ function extractToolCalls3(content, toolTrackers, completedToolResults) {
|
|
|
9195
8990
|
var import_node_child_process4 = require("child_process");
|
|
9196
8991
|
var import_node_crypto7 = require("crypto");
|
|
9197
8992
|
var import_node_fs8 = require("fs");
|
|
9198
|
-
var
|
|
8993
|
+
var import_promises16 = require("fs/promises");
|
|
9199
8994
|
var import_node_os3 = require("os");
|
|
9200
|
-
var
|
|
8995
|
+
var import_node_path19 = __toESM(require("path"), 1);
|
|
9201
8996
|
|
|
9202
8997
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
9203
8998
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -9278,8 +9073,8 @@ var PiCodingAgentProvider = class {
|
|
|
9278
9073
|
const workspaceRoot = await this.createWorkspace();
|
|
9279
9074
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
9280
9075
|
try {
|
|
9281
|
-
const promptFile =
|
|
9282
|
-
await (0,
|
|
9076
|
+
const promptFile = import_node_path19.default.join(workspaceRoot, PROMPT_FILENAME);
|
|
9077
|
+
await (0, import_promises16.writeFile)(promptFile, request.question, "utf8");
|
|
9283
9078
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
9284
9079
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
9285
9080
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
@@ -9340,12 +9135,12 @@ var PiCodingAgentProvider = class {
|
|
|
9340
9135
|
}
|
|
9341
9136
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
9342
9137
|
if (cwdOverride) {
|
|
9343
|
-
return
|
|
9138
|
+
return import_node_path19.default.resolve(cwdOverride);
|
|
9344
9139
|
}
|
|
9345
9140
|
if (!this.config.cwd) {
|
|
9346
9141
|
return workspaceRoot;
|
|
9347
9142
|
}
|
|
9348
|
-
return
|
|
9143
|
+
return import_node_path19.default.resolve(this.config.cwd);
|
|
9349
9144
|
}
|
|
9350
9145
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
9351
9146
|
const args = [];
|
|
@@ -9434,19 +9229,19 @@ ${prompt}` : prompt;
|
|
|
9434
9229
|
return env;
|
|
9435
9230
|
}
|
|
9436
9231
|
async createWorkspace() {
|
|
9437
|
-
return await (0,
|
|
9232
|
+
return await (0, import_promises16.mkdtemp)(import_node_path19.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX));
|
|
9438
9233
|
}
|
|
9439
9234
|
async cleanupWorkspace(workspaceRoot) {
|
|
9440
9235
|
try {
|
|
9441
|
-
await (0,
|
|
9236
|
+
await (0, import_promises16.rm)(workspaceRoot, { recursive: true, force: true });
|
|
9442
9237
|
} catch {
|
|
9443
9238
|
}
|
|
9444
9239
|
}
|
|
9445
9240
|
resolveLogDirectory() {
|
|
9446
9241
|
if (this.config.logDir) {
|
|
9447
|
-
return
|
|
9242
|
+
return import_node_path19.default.resolve(this.config.logDir);
|
|
9448
9243
|
}
|
|
9449
|
-
return
|
|
9244
|
+
return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
9450
9245
|
}
|
|
9451
9246
|
async createStreamLogger(request) {
|
|
9452
9247
|
const logDir = this.resolveLogDirectory();
|
|
@@ -9454,13 +9249,13 @@ ${prompt}` : prompt;
|
|
|
9454
9249
|
return void 0;
|
|
9455
9250
|
}
|
|
9456
9251
|
try {
|
|
9457
|
-
await (0,
|
|
9252
|
+
await (0, import_promises16.mkdir)(logDir, { recursive: true });
|
|
9458
9253
|
} catch (error) {
|
|
9459
9254
|
const message = error instanceof Error ? error.message : String(error);
|
|
9460
9255
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
9461
9256
|
return void 0;
|
|
9462
9257
|
}
|
|
9463
|
-
const filePath =
|
|
9258
|
+
const filePath = import_node_path19.default.join(logDir, buildLogFilename5(request, this.targetName));
|
|
9464
9259
|
try {
|
|
9465
9260
|
const logger = await PiStreamLogger.create({
|
|
9466
9261
|
filePath,
|
|
@@ -9959,7 +9754,7 @@ var ProviderRegistry = class {
|
|
|
9959
9754
|
};
|
|
9960
9755
|
|
|
9961
9756
|
// src/evaluation/providers/targets.ts
|
|
9962
|
-
var
|
|
9757
|
+
var import_node_path20 = __toESM(require("path"), 1);
|
|
9963
9758
|
var import_zod3 = require("zod");
|
|
9964
9759
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
9965
9760
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
@@ -10056,11 +9851,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
10056
9851
|
allowLiteral: true,
|
|
10057
9852
|
optionalEnv: true
|
|
10058
9853
|
});
|
|
10059
|
-
if (cwd && evalFilePath && !
|
|
10060
|
-
cwd =
|
|
9854
|
+
if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
|
|
9855
|
+
cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
|
|
10061
9856
|
}
|
|
10062
9857
|
if (!cwd && evalFilePath) {
|
|
10063
|
-
cwd =
|
|
9858
|
+
cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
|
|
10064
9859
|
}
|
|
10065
9860
|
return {
|
|
10066
9861
|
command,
|
|
@@ -10083,15 +9878,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10083
9878
|
optionalEnv: true
|
|
10084
9879
|
}
|
|
10085
9880
|
);
|
|
10086
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10087
|
-
workspaceTemplate =
|
|
9881
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
9882
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10088
9883
|
}
|
|
10089
9884
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
10090
9885
|
allowLiteral: true,
|
|
10091
9886
|
optionalEnv: true
|
|
10092
9887
|
});
|
|
10093
|
-
if (cwd && evalFilePath && !
|
|
10094
|
-
cwd =
|
|
9888
|
+
if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
|
|
9889
|
+
cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
|
|
10095
9890
|
}
|
|
10096
9891
|
if (cwd && workspaceTemplate) {
|
|
10097
9892
|
throw new Error(
|
|
@@ -10099,7 +9894,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10099
9894
|
);
|
|
10100
9895
|
}
|
|
10101
9896
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
10102
|
-
cwd =
|
|
9897
|
+
cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
|
|
10103
9898
|
}
|
|
10104
9899
|
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
10105
9900
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
@@ -10122,7 +9917,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10122
9917
|
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
10123
9918
|
"PROMPT",
|
|
10124
9919
|
"PROMPT_FILE",
|
|
10125
|
-
"GUIDELINES",
|
|
10126
9920
|
"EVAL_ID",
|
|
10127
9921
|
"ATTEMPT",
|
|
10128
9922
|
"FILES",
|
|
@@ -10517,8 +10311,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
10517
10311
|
optionalEnv: true
|
|
10518
10312
|
}
|
|
10519
10313
|
);
|
|
10520
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10521
|
-
workspaceTemplate =
|
|
10314
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10315
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10522
10316
|
}
|
|
10523
10317
|
if (cwd && workspaceTemplate) {
|
|
10524
10318
|
throw new Error(
|
|
@@ -10602,8 +10396,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
10602
10396
|
optionalEnv: true
|
|
10603
10397
|
}
|
|
10604
10398
|
);
|
|
10605
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10606
|
-
workspaceTemplate =
|
|
10399
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10400
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10607
10401
|
}
|
|
10608
10402
|
if (cwd && workspaceTemplate) {
|
|
10609
10403
|
throw new Error(
|
|
@@ -10667,8 +10461,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
10667
10461
|
optionalEnv: true
|
|
10668
10462
|
}
|
|
10669
10463
|
);
|
|
10670
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10671
|
-
workspaceTemplate =
|
|
10464
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10465
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10672
10466
|
}
|
|
10673
10467
|
if (cwd && workspaceTemplate) {
|
|
10674
10468
|
throw new Error(
|
|
@@ -10758,8 +10552,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
10758
10552
|
optionalEnv: true
|
|
10759
10553
|
}
|
|
10760
10554
|
);
|
|
10761
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10762
|
-
workspaceTemplate =
|
|
10555
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10556
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10763
10557
|
}
|
|
10764
10558
|
if (cwd && workspaceTemplate) {
|
|
10765
10559
|
throw new Error(
|
|
@@ -10847,8 +10641,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
10847
10641
|
optionalEnv: true
|
|
10848
10642
|
}
|
|
10849
10643
|
);
|
|
10850
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10851
|
-
workspaceTemplate =
|
|
10644
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10645
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10852
10646
|
}
|
|
10853
10647
|
if (cwd && workspaceTemplate) {
|
|
10854
10648
|
throw new Error(
|
|
@@ -10906,8 +10700,8 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
10906
10700
|
optionalEnv: true
|
|
10907
10701
|
}
|
|
10908
10702
|
) : void 0;
|
|
10909
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10910
|
-
workspaceTemplate =
|
|
10703
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10704
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10911
10705
|
}
|
|
10912
10706
|
const executableSource = target.executable;
|
|
10913
10707
|
const waitSource = target.wait;
|
|
@@ -10948,8 +10742,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
10948
10742
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
10949
10743
|
if (!parseResult.success) {
|
|
10950
10744
|
const firstError = parseResult.error.errors[0];
|
|
10951
|
-
const
|
|
10952
|
-
const prefix =
|
|
10745
|
+
const path47 = firstError?.path.join(".") || "";
|
|
10746
|
+
const prefix = path47 ? `${target.name} ${path47}: ` : `${target.name}: `;
|
|
10953
10747
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
10954
10748
|
}
|
|
10955
10749
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -10970,11 +10764,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
|
|
|
10970
10764
|
allowLiteral: true,
|
|
10971
10765
|
optionalEnv: true
|
|
10972
10766
|
});
|
|
10973
|
-
if (cwd && evalFilePath && !
|
|
10974
|
-
cwd =
|
|
10767
|
+
if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
|
|
10768
|
+
cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
|
|
10975
10769
|
}
|
|
10976
10770
|
if (!cwd && evalFilePath) {
|
|
10977
|
-
cwd =
|
|
10771
|
+
cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
|
|
10978
10772
|
}
|
|
10979
10773
|
return {
|
|
10980
10774
|
command,
|
|
@@ -11158,40 +10952,40 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
11158
10952
|
|
|
11159
10953
|
// src/evaluation/providers/vscode-provider.ts
|
|
11160
10954
|
var import_node_child_process6 = require("child_process");
|
|
11161
|
-
var
|
|
11162
|
-
var
|
|
10955
|
+
var import_promises23 = require("fs/promises");
|
|
10956
|
+
var import_node_path32 = __toESM(require("path"), 1);
|
|
11163
10957
|
var import_node_util3 = require("util");
|
|
11164
10958
|
|
|
11165
10959
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
11166
|
-
var
|
|
11167
|
-
var
|
|
10960
|
+
var import_promises21 = require("fs/promises");
|
|
10961
|
+
var import_node_path30 = __toESM(require("path"), 1);
|
|
11168
10962
|
|
|
11169
10963
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
11170
10964
|
var import_node_fs9 = require("fs");
|
|
11171
|
-
var
|
|
11172
|
-
var
|
|
10965
|
+
var import_promises17 = require("fs/promises");
|
|
10966
|
+
var import_node_path21 = __toESM(require("path"), 1);
|
|
11173
10967
|
async function pathExists(target) {
|
|
11174
10968
|
try {
|
|
11175
|
-
await (0,
|
|
10969
|
+
await (0, import_promises17.access)(target, import_node_fs9.constants.F_OK);
|
|
11176
10970
|
return true;
|
|
11177
10971
|
} catch {
|
|
11178
10972
|
return false;
|
|
11179
10973
|
}
|
|
11180
10974
|
}
|
|
11181
10975
|
async function ensureDir(target) {
|
|
11182
|
-
await (0,
|
|
10976
|
+
await (0, import_promises17.mkdir)(target, { recursive: true });
|
|
11183
10977
|
}
|
|
11184
10978
|
async function readDirEntries(target) {
|
|
11185
|
-
const entries = await (0,
|
|
10979
|
+
const entries = await (0, import_promises17.readdir)(target, { withFileTypes: true });
|
|
11186
10980
|
return entries.map((entry) => ({
|
|
11187
10981
|
name: entry.name,
|
|
11188
|
-
absolutePath:
|
|
10982
|
+
absolutePath: import_node_path21.default.join(target, entry.name),
|
|
11189
10983
|
isDirectory: entry.isDirectory()
|
|
11190
10984
|
}));
|
|
11191
10985
|
}
|
|
11192
10986
|
async function removeIfExists(target) {
|
|
11193
10987
|
try {
|
|
11194
|
-
await (0,
|
|
10988
|
+
await (0, import_promises17.rm)(target, { force: true, recursive: false });
|
|
11195
10989
|
} catch (error) {
|
|
11196
10990
|
if (error.code !== "ENOENT") {
|
|
11197
10991
|
throw error;
|
|
@@ -11200,9 +10994,9 @@ async function removeIfExists(target) {
|
|
|
11200
10994
|
}
|
|
11201
10995
|
|
|
11202
10996
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
11203
|
-
var
|
|
10997
|
+
var import_node_path22 = __toESM(require("path"), 1);
|
|
11204
10998
|
function pathToFileUri2(filePath) {
|
|
11205
|
-
const absolutePath =
|
|
10999
|
+
const absolutePath = import_node_path22.default.isAbsolute(filePath) ? filePath : import_node_path22.default.resolve(filePath);
|
|
11206
11000
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
11207
11001
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
11208
11002
|
return `file:///${normalizedPath}`;
|
|
@@ -11211,7 +11005,7 @@ function pathToFileUri2(filePath) {
|
|
|
11211
11005
|
}
|
|
11212
11006
|
|
|
11213
11007
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
11214
|
-
var
|
|
11008
|
+
var import_node_path23 = __toESM(require("path"), 1);
|
|
11215
11009
|
|
|
11216
11010
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
11217
11011
|
function renderTemplate2(content, variables) {
|
|
@@ -11303,8 +11097,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
11303
11097
|
});
|
|
11304
11098
|
}
|
|
11305
11099
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
11306
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
11307
|
-
const responseList = responseFiles.map((file) => `"${
|
|
11100
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path23.default.basename(file)}`).join("\n");
|
|
11101
|
+
const responseList = responseFiles.map((file) => `"${import_node_path23.default.basename(file)}"`).join(", ");
|
|
11308
11102
|
return renderTemplate2(templateContent, {
|
|
11309
11103
|
requestFiles: requestLines,
|
|
11310
11104
|
responseList
|
|
@@ -11312,8 +11106,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
11312
11106
|
}
|
|
11313
11107
|
|
|
11314
11108
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
11315
|
-
var
|
|
11316
|
-
var
|
|
11109
|
+
var import_promises18 = require("fs/promises");
|
|
11110
|
+
var import_node_path24 = __toESM(require("path"), 1);
|
|
11317
11111
|
|
|
11318
11112
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
11319
11113
|
function sleep2(ms) {
|
|
@@ -11351,7 +11145,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
11351
11145
|
const maxAttempts = 10;
|
|
11352
11146
|
while (attempts < maxAttempts) {
|
|
11353
11147
|
try {
|
|
11354
|
-
const content = await (0,
|
|
11148
|
+
const content = await (0, import_promises18.readFile)(responseFileFinal, { encoding: "utf8" });
|
|
11355
11149
|
if (!silent) {
|
|
11356
11150
|
process.stdout.write(`${content}
|
|
11357
11151
|
`);
|
|
@@ -11372,7 +11166,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
11372
11166
|
}
|
|
11373
11167
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
11374
11168
|
if (!silent) {
|
|
11375
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
11169
|
+
const fileList = responseFilesFinal.map((file) => import_node_path24.default.basename(file)).join(", ");
|
|
11376
11170
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
11377
11171
|
}
|
|
11378
11172
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -11381,7 +11175,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11381
11175
|
while (pending.size > 0) {
|
|
11382
11176
|
if (Date.now() >= deadline) {
|
|
11383
11177
|
if (!silent) {
|
|
11384
|
-
const remaining = [...pending].map((f) =>
|
|
11178
|
+
const remaining = [...pending].map((f) => import_node_path24.default.basename(f)).join(", ");
|
|
11385
11179
|
console.error(
|
|
11386
11180
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
11387
11181
|
);
|
|
@@ -11408,7 +11202,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11408
11202
|
const maxAttempts = 10;
|
|
11409
11203
|
while (attempts < maxAttempts) {
|
|
11410
11204
|
try {
|
|
11411
|
-
const content = await (0,
|
|
11205
|
+
const content = await (0, import_promises18.readFile)(file, { encoding: "utf8" });
|
|
11412
11206
|
if (!silent) {
|
|
11413
11207
|
process.stdout.write(`${content}
|
|
11414
11208
|
`);
|
|
@@ -11431,16 +11225,16 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11431
11225
|
|
|
11432
11226
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
11433
11227
|
var import_node_child_process5 = require("child_process");
|
|
11434
|
-
var
|
|
11435
|
-
var
|
|
11228
|
+
var import_promises19 = require("fs/promises");
|
|
11229
|
+
var import_node_path27 = __toESM(require("path"), 1);
|
|
11436
11230
|
var import_node_util2 = require("util");
|
|
11437
11231
|
|
|
11438
11232
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
11439
|
-
var
|
|
11233
|
+
var import_node_path26 = __toESM(require("path"), 1);
|
|
11440
11234
|
|
|
11441
11235
|
// src/paths.ts
|
|
11442
11236
|
var import_node_os4 = __toESM(require("os"), 1);
|
|
11443
|
-
var
|
|
11237
|
+
var import_node_path25 = __toESM(require("path"), 1);
|
|
11444
11238
|
var logged = false;
|
|
11445
11239
|
function getAgentvHome() {
|
|
11446
11240
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -11451,19 +11245,19 @@ function getAgentvHome() {
|
|
|
11451
11245
|
}
|
|
11452
11246
|
return envHome;
|
|
11453
11247
|
}
|
|
11454
|
-
return
|
|
11248
|
+
return import_node_path25.default.join(import_node_os4.default.homedir(), ".agentv");
|
|
11455
11249
|
}
|
|
11456
11250
|
function getWorkspacesRoot() {
|
|
11457
|
-
return
|
|
11251
|
+
return import_node_path25.default.join(getAgentvHome(), "workspaces");
|
|
11458
11252
|
}
|
|
11459
11253
|
function getSubagentsRoot() {
|
|
11460
|
-
return
|
|
11254
|
+
return import_node_path25.default.join(getAgentvHome(), "subagents");
|
|
11461
11255
|
}
|
|
11462
11256
|
function getTraceStateRoot() {
|
|
11463
|
-
return
|
|
11257
|
+
return import_node_path25.default.join(getAgentvHome(), "trace-state");
|
|
11464
11258
|
}
|
|
11465
11259
|
function getWorkspacePoolRoot() {
|
|
11466
|
-
return
|
|
11260
|
+
return import_node_path25.default.join(getAgentvHome(), "workspace-pool");
|
|
11467
11261
|
}
|
|
11468
11262
|
|
|
11469
11263
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -11471,7 +11265,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
11471
11265
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
11472
11266
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
11473
11267
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
11474
|
-
return
|
|
11268
|
+
return import_node_path26.default.join(getSubagentsRoot(), folder);
|
|
11475
11269
|
}
|
|
11476
11270
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
11477
11271
|
|
|
@@ -11538,12 +11332,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11538
11332
|
await raceSpawnError(child);
|
|
11539
11333
|
return true;
|
|
11540
11334
|
}
|
|
11541
|
-
const aliveFile =
|
|
11335
|
+
const aliveFile = import_node_path27.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
11542
11336
|
await removeIfExists(aliveFile);
|
|
11543
|
-
const githubAgentsDir =
|
|
11544
|
-
await (0,
|
|
11545
|
-
const wakeupDst =
|
|
11546
|
-
await (0,
|
|
11337
|
+
const githubAgentsDir = import_node_path27.default.join(subagentDir, ".github", "agents");
|
|
11338
|
+
await (0, import_promises19.mkdir)(githubAgentsDir, { recursive: true });
|
|
11339
|
+
const wakeupDst = import_node_path27.default.join(githubAgentsDir, "wakeup.md");
|
|
11340
|
+
await (0, import_promises19.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
11547
11341
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
11548
11342
|
label: "open-workspace"
|
|
11549
11343
|
});
|
|
@@ -11555,7 +11349,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11555
11349
|
"chat",
|
|
11556
11350
|
"-m",
|
|
11557
11351
|
wakeupChatId,
|
|
11558
|
-
`create a file named .alive in the ${
|
|
11352
|
+
`create a file named .alive in the ${import_node_path27.default.basename(subagentDir)} folder`
|
|
11559
11353
|
];
|
|
11560
11354
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
11561
11355
|
await raceSpawnError(wakeupChild);
|
|
@@ -11570,27 +11364,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11570
11364
|
return true;
|
|
11571
11365
|
}
|
|
11572
11366
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
11573
|
-
const workspacePath =
|
|
11574
|
-
const messagesDir =
|
|
11575
|
-
await (0,
|
|
11576
|
-
const reqFile =
|
|
11577
|
-
await (0,
|
|
11367
|
+
const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
|
|
11368
|
+
const messagesDir = import_node_path27.default.join(subagentDir, "messages");
|
|
11369
|
+
await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
|
|
11370
|
+
const reqFile = import_node_path27.default.join(messagesDir, `${timestamp}_req.md`);
|
|
11371
|
+
await (0, import_promises19.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
|
|
11578
11372
|
const reqUri = pathToFileUri2(reqFile);
|
|
11579
11373
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
11580
11374
|
for (const attachment of attachmentPaths) {
|
|
11581
11375
|
chatArgs.push("-a", attachment);
|
|
11582
11376
|
}
|
|
11583
11377
|
chatArgs.push("-a", reqFile);
|
|
11584
|
-
chatArgs.push(`Follow instructions in [${
|
|
11378
|
+
chatArgs.push(`Follow instructions in [${import_node_path27.default.basename(reqFile)}](${reqUri})`);
|
|
11585
11379
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
11586
11380
|
workspacePath,
|
|
11587
|
-
|
|
11381
|
+
import_node_path27.default.basename(subagentDir),
|
|
11588
11382
|
subagentDir,
|
|
11589
11383
|
vscodeCmd
|
|
11590
11384
|
);
|
|
11591
11385
|
if (!workspaceReady) {
|
|
11592
11386
|
throw new Error(
|
|
11593
|
-
`VS Code workspace '${
|
|
11387
|
+
`VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
11594
11388
|
);
|
|
11595
11389
|
}
|
|
11596
11390
|
await sleep2(500);
|
|
@@ -11598,9 +11392,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
11598
11392
|
await raceSpawnError(child);
|
|
11599
11393
|
}
|
|
11600
11394
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
11601
|
-
const workspacePath =
|
|
11602
|
-
const messagesDir =
|
|
11603
|
-
await (0,
|
|
11395
|
+
const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
|
|
11396
|
+
const messagesDir = import_node_path27.default.join(subagentDir, "messages");
|
|
11397
|
+
await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
|
|
11604
11398
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
11605
11399
|
for (const attachment of attachmentPaths) {
|
|
11606
11400
|
chatArgs.push("-a", attachment);
|
|
@@ -11608,13 +11402,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
11608
11402
|
chatArgs.push(chatInstruction);
|
|
11609
11403
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
11610
11404
|
workspacePath,
|
|
11611
|
-
|
|
11405
|
+
import_node_path27.default.basename(subagentDir),
|
|
11612
11406
|
subagentDir,
|
|
11613
11407
|
vscodeCmd
|
|
11614
11408
|
);
|
|
11615
11409
|
if (!workspaceReady) {
|
|
11616
11410
|
throw new Error(
|
|
11617
|
-
`VS Code workspace '${
|
|
11411
|
+
`VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
11618
11412
|
);
|
|
11619
11413
|
}
|
|
11620
11414
|
await sleep2(500);
|
|
@@ -11623,11 +11417,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
11623
11417
|
}
|
|
11624
11418
|
|
|
11625
11419
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
11626
|
-
var
|
|
11627
|
-
var
|
|
11420
|
+
var import_promises20 = require("fs/promises");
|
|
11421
|
+
var import_node_path29 = __toESM(require("path"), 1);
|
|
11628
11422
|
|
|
11629
11423
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
11630
|
-
var
|
|
11424
|
+
var import_node_path28 = __toESM(require("path"), 1);
|
|
11631
11425
|
var import_json5 = __toESM(require("json5"), 1);
|
|
11632
11426
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
11633
11427
|
let workspace;
|
|
@@ -11644,10 +11438,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
11644
11438
|
}
|
|
11645
11439
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
11646
11440
|
const folderPath = folder.path;
|
|
11647
|
-
if (
|
|
11441
|
+
if (import_node_path28.default.isAbsolute(folderPath)) {
|
|
11648
11442
|
return folder;
|
|
11649
11443
|
}
|
|
11650
|
-
const absolutePath =
|
|
11444
|
+
const absolutePath = import_node_path28.default.resolve(templateDir, folderPath);
|
|
11651
11445
|
return {
|
|
11652
11446
|
...folder,
|
|
11653
11447
|
path: absolutePath
|
|
@@ -11669,19 +11463,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
11669
11463
|
if (locationMap && typeof locationMap === "object") {
|
|
11670
11464
|
const transformedMap = {};
|
|
11671
11465
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
11672
|
-
const isAbsolute =
|
|
11466
|
+
const isAbsolute = import_node_path28.default.isAbsolute(locationPath);
|
|
11673
11467
|
if (isAbsolute) {
|
|
11674
11468
|
transformedMap[locationPath] = value;
|
|
11675
11469
|
} else {
|
|
11676
11470
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
11677
11471
|
if (firstGlobIndex === -1) {
|
|
11678
|
-
const resolvedPath =
|
|
11472
|
+
const resolvedPath = import_node_path28.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
11679
11473
|
transformedMap[resolvedPath] = value;
|
|
11680
11474
|
} else {
|
|
11681
11475
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
11682
11476
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
11683
11477
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
11684
|
-
const resolvedPath = (
|
|
11478
|
+
const resolvedPath = (import_node_path28.default.resolve(templateDir, basePath) + patternPath).replace(
|
|
11685
11479
|
/\\/g,
|
|
11686
11480
|
"/"
|
|
11687
11481
|
);
|
|
@@ -11722,7 +11516,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
11722
11516
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
11723
11517
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
11724
11518
|
for (const subagent of subagents) {
|
|
11725
|
-
const lockFile =
|
|
11519
|
+
const lockFile = import_node_path29.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
11726
11520
|
if (!await pathExists(lockFile)) {
|
|
11727
11521
|
return subagent.absolutePath;
|
|
11728
11522
|
}
|
|
@@ -11732,26 +11526,26 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
11732
11526
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
11733
11527
|
let workspaceContent;
|
|
11734
11528
|
if (workspaceTemplate) {
|
|
11735
|
-
const workspaceSrc =
|
|
11529
|
+
const workspaceSrc = import_node_path29.default.resolve(workspaceTemplate);
|
|
11736
11530
|
if (!await pathExists(workspaceSrc)) {
|
|
11737
11531
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
11738
11532
|
}
|
|
11739
|
-
const stats = await (0,
|
|
11533
|
+
const stats = await (0, import_promises20.stat)(workspaceSrc);
|
|
11740
11534
|
if (!stats.isFile()) {
|
|
11741
11535
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
11742
11536
|
}
|
|
11743
|
-
const templateText = await (0,
|
|
11537
|
+
const templateText = await (0, import_promises20.readFile)(workspaceSrc, "utf8");
|
|
11744
11538
|
workspaceContent = JSON.parse(templateText);
|
|
11745
11539
|
} else {
|
|
11746
11540
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
11747
11541
|
}
|
|
11748
|
-
const workspaceName = `${
|
|
11749
|
-
const workspaceDst =
|
|
11750
|
-
const templateDir = workspaceTemplate ?
|
|
11542
|
+
const workspaceName = `${import_node_path29.default.basename(subagentDir)}.code-workspace`;
|
|
11543
|
+
const workspaceDst = import_node_path29.default.join(subagentDir, workspaceName);
|
|
11544
|
+
const templateDir = workspaceTemplate ? import_node_path29.default.dirname(import_node_path29.default.resolve(workspaceTemplate)) : subagentDir;
|
|
11751
11545
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
11752
11546
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
11753
11547
|
if (cwd) {
|
|
11754
|
-
const absCwd =
|
|
11548
|
+
const absCwd = import_node_path29.default.resolve(cwd);
|
|
11755
11549
|
const parsed = JSON.parse(transformedContent);
|
|
11756
11550
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
11757
11551
|
if (!alreadyPresent) {
|
|
@@ -11759,36 +11553,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
11759
11553
|
transformedContent = JSON.stringify(parsed, null, 2);
|
|
11760
11554
|
}
|
|
11761
11555
|
}
|
|
11762
|
-
await (0,
|
|
11763
|
-
const messagesDir =
|
|
11764
|
-
await (0,
|
|
11556
|
+
await (0, import_promises20.writeFile)(workspaceDst, transformedContent, "utf8");
|
|
11557
|
+
const messagesDir = import_node_path29.default.join(subagentDir, "messages");
|
|
11558
|
+
await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
|
|
11765
11559
|
return { workspace: workspaceDst, messagesDir };
|
|
11766
11560
|
}
|
|
11767
11561
|
async function createSubagentLock(subagentDir) {
|
|
11768
|
-
const messagesDir =
|
|
11562
|
+
const messagesDir = import_node_path29.default.join(subagentDir, "messages");
|
|
11769
11563
|
if (await pathExists(messagesDir)) {
|
|
11770
|
-
const files = await (0,
|
|
11564
|
+
const files = await (0, import_promises20.readdir)(messagesDir);
|
|
11771
11565
|
await Promise.all(
|
|
11772
11566
|
files.map(async (file) => {
|
|
11773
|
-
const target =
|
|
11567
|
+
const target = import_node_path29.default.join(messagesDir, file);
|
|
11774
11568
|
await removeIfExists(target);
|
|
11775
11569
|
})
|
|
11776
11570
|
);
|
|
11777
11571
|
}
|
|
11778
|
-
const githubAgentsDir =
|
|
11572
|
+
const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
|
|
11779
11573
|
if (await pathExists(githubAgentsDir)) {
|
|
11780
|
-
const agentFiles = await (0,
|
|
11574
|
+
const agentFiles = await (0, import_promises20.readdir)(githubAgentsDir);
|
|
11781
11575
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
11782
11576
|
await Promise.all(
|
|
11783
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
11577
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path29.default.join(githubAgentsDir, file)))
|
|
11784
11578
|
);
|
|
11785
11579
|
}
|
|
11786
|
-
const lockFile =
|
|
11787
|
-
await (0,
|
|
11580
|
+
const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
11581
|
+
await (0, import_promises20.writeFile)(lockFile, "", { encoding: "utf8" });
|
|
11788
11582
|
return lockFile;
|
|
11789
11583
|
}
|
|
11790
11584
|
async function removeSubagentLock(subagentDir) {
|
|
11791
|
-
const lockFile =
|
|
11585
|
+
const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
11792
11586
|
await removeIfExists(lockFile);
|
|
11793
11587
|
}
|
|
11794
11588
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -11808,11 +11602,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
11808
11602
|
return 1;
|
|
11809
11603
|
}
|
|
11810
11604
|
if (promptFile) {
|
|
11811
|
-
const githubAgentsDir =
|
|
11812
|
-
await (0,
|
|
11813
|
-
const agentFile =
|
|
11605
|
+
const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
|
|
11606
|
+
await (0, import_promises20.mkdir)(githubAgentsDir, { recursive: true });
|
|
11607
|
+
const agentFile = import_node_path29.default.join(githubAgentsDir, `${chatId}.md`);
|
|
11814
11608
|
try {
|
|
11815
|
-
await (0,
|
|
11609
|
+
await (0, import_promises20.copyFile)(promptFile, agentFile);
|
|
11816
11610
|
} catch (error) {
|
|
11817
11611
|
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
11818
11612
|
return 1;
|
|
@@ -11829,11 +11623,11 @@ async function resolvePromptFile(promptFile) {
|
|
|
11829
11623
|
if (!promptFile) {
|
|
11830
11624
|
return void 0;
|
|
11831
11625
|
}
|
|
11832
|
-
const resolvedPrompt =
|
|
11626
|
+
const resolvedPrompt = import_node_path30.default.resolve(promptFile);
|
|
11833
11627
|
if (!await pathExists(resolvedPrompt)) {
|
|
11834
11628
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
11835
11629
|
}
|
|
11836
|
-
const promptStats = await (0,
|
|
11630
|
+
const promptStats = await (0, import_promises21.stat)(resolvedPrompt);
|
|
11837
11631
|
if (!promptStats.isFile()) {
|
|
11838
11632
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
11839
11633
|
}
|
|
@@ -11845,7 +11639,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
11845
11639
|
}
|
|
11846
11640
|
const resolved = [];
|
|
11847
11641
|
for (const attachment of extraAttachments) {
|
|
11848
|
-
const resolvedPath =
|
|
11642
|
+
const resolvedPath = import_node_path30.default.resolve(attachment);
|
|
11849
11643
|
if (!await pathExists(resolvedPath)) {
|
|
11850
11644
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
11851
11645
|
}
|
|
@@ -11887,7 +11681,7 @@ async function dispatchAgentSession(options) {
|
|
|
11887
11681
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
11888
11682
|
};
|
|
11889
11683
|
}
|
|
11890
|
-
const subagentName =
|
|
11684
|
+
const subagentName = import_node_path30.default.basename(subagentDir);
|
|
11891
11685
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
11892
11686
|
const preparationResult = await prepareSubagentDirectory(
|
|
11893
11687
|
subagentDir,
|
|
@@ -11915,9 +11709,9 @@ async function dispatchAgentSession(options) {
|
|
|
11915
11709
|
};
|
|
11916
11710
|
}
|
|
11917
11711
|
const timestamp = generateTimestamp();
|
|
11918
|
-
const messagesDir =
|
|
11919
|
-
const responseFileTmp =
|
|
11920
|
-
const responseFileFinal =
|
|
11712
|
+
const messagesDir = import_node_path30.default.join(subagentDir, "messages");
|
|
11713
|
+
const responseFileTmp = import_node_path30.default.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
11714
|
+
const responseFileFinal = import_node_path30.default.join(messagesDir, `${timestamp}_res.md`);
|
|
11921
11715
|
const requestInstructions = createRequestPrompt(
|
|
11922
11716
|
userQuery,
|
|
11923
11717
|
responseFileTmp,
|
|
@@ -12022,7 +11816,7 @@ async function dispatchBatchAgent(options) {
|
|
|
12022
11816
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
12023
11817
|
};
|
|
12024
11818
|
}
|
|
12025
|
-
subagentName =
|
|
11819
|
+
subagentName = import_node_path30.default.basename(subagentDir);
|
|
12026
11820
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
12027
11821
|
const preparationResult = await prepareSubagentDirectory(
|
|
12028
11822
|
subagentDir,
|
|
@@ -12053,24 +11847,24 @@ async function dispatchBatchAgent(options) {
|
|
|
12053
11847
|
};
|
|
12054
11848
|
}
|
|
12055
11849
|
const timestamp = generateTimestamp();
|
|
12056
|
-
const messagesDir =
|
|
11850
|
+
const messagesDir = import_node_path30.default.join(subagentDir, "messages");
|
|
12057
11851
|
requestFiles = userQueries.map(
|
|
12058
|
-
(_, index) =>
|
|
11852
|
+
(_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
12059
11853
|
);
|
|
12060
11854
|
const responseTmpFiles = userQueries.map(
|
|
12061
|
-
(_, index) =>
|
|
11855
|
+
(_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
12062
11856
|
);
|
|
12063
11857
|
responseFilesFinal = userQueries.map(
|
|
12064
|
-
(_, index) =>
|
|
11858
|
+
(_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
12065
11859
|
);
|
|
12066
|
-
const orchestratorFile =
|
|
11860
|
+
const orchestratorFile = import_node_path30.default.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
12067
11861
|
if (!dryRun) {
|
|
12068
11862
|
await Promise.all(
|
|
12069
11863
|
userQueries.map((query, index) => {
|
|
12070
11864
|
const reqFile = requestFiles[index];
|
|
12071
11865
|
const tmpFile = responseTmpFiles[index];
|
|
12072
11866
|
const finalFile = responseFilesFinal[index];
|
|
12073
|
-
return (0,
|
|
11867
|
+
return (0, import_promises21.writeFile)(
|
|
12074
11868
|
reqFile,
|
|
12075
11869
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
12076
11870
|
{ encoding: "utf8" }
|
|
@@ -12082,7 +11876,7 @@ async function dispatchBatchAgent(options) {
|
|
|
12082
11876
|
responseFilesFinal,
|
|
12083
11877
|
orchestratorTemplateContent
|
|
12084
11878
|
);
|
|
12085
|
-
await (0,
|
|
11879
|
+
await (0, import_promises21.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
12086
11880
|
}
|
|
12087
11881
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
12088
11882
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -12148,8 +11942,8 @@ async function dispatchBatchAgent(options) {
|
|
|
12148
11942
|
}
|
|
12149
11943
|
|
|
12150
11944
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
12151
|
-
var
|
|
12152
|
-
var
|
|
11945
|
+
var import_promises22 = require("fs/promises");
|
|
11946
|
+
var import_node_path31 = __toESM(require("path"), 1);
|
|
12153
11947
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
12154
11948
|
folders: [
|
|
12155
11949
|
{
|
|
@@ -12180,7 +11974,7 @@ async function provisionSubagents(options) {
|
|
|
12180
11974
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
12181
11975
|
throw new Error("subagents must be a positive integer");
|
|
12182
11976
|
}
|
|
12183
|
-
const targetPath =
|
|
11977
|
+
const targetPath = import_node_path31.default.resolve(targetRoot);
|
|
12184
11978
|
if (!dryRun) {
|
|
12185
11979
|
await ensureDir(targetPath);
|
|
12186
11980
|
}
|
|
@@ -12200,7 +11994,7 @@ async function provisionSubagents(options) {
|
|
|
12200
11994
|
continue;
|
|
12201
11995
|
}
|
|
12202
11996
|
highestNumber = Math.max(highestNumber, parsed);
|
|
12203
|
-
const lockFile =
|
|
11997
|
+
const lockFile = import_node_path31.default.join(entry.absolutePath, lockName);
|
|
12204
11998
|
const locked = await pathExists(lockFile);
|
|
12205
11999
|
if (locked) {
|
|
12206
12000
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -12217,10 +12011,10 @@ async function provisionSubagents(options) {
|
|
|
12217
12011
|
break;
|
|
12218
12012
|
}
|
|
12219
12013
|
const subagentDir = subagent.absolutePath;
|
|
12220
|
-
const githubAgentsDir =
|
|
12221
|
-
const lockFile =
|
|
12222
|
-
const workspaceDst =
|
|
12223
|
-
const wakeupDst =
|
|
12014
|
+
const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
|
|
12015
|
+
const lockFile = import_node_path31.default.join(subagentDir, lockName);
|
|
12016
|
+
const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
12017
|
+
const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
|
|
12224
12018
|
const isLocked = await pathExists(lockFile);
|
|
12225
12019
|
if (isLocked && !force) {
|
|
12226
12020
|
continue;
|
|
@@ -12229,8 +12023,8 @@ async function provisionSubagents(options) {
|
|
|
12229
12023
|
if (!dryRun) {
|
|
12230
12024
|
await removeIfExists(lockFile);
|
|
12231
12025
|
await ensureDir(githubAgentsDir);
|
|
12232
|
-
await (0,
|
|
12233
|
-
await (0,
|
|
12026
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12027
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12234
12028
|
}
|
|
12235
12029
|
created.push(subagentDir);
|
|
12236
12030
|
lockedSubagents.delete(subagentDir);
|
|
@@ -12240,8 +12034,8 @@ async function provisionSubagents(options) {
|
|
|
12240
12034
|
if (!isLocked && force) {
|
|
12241
12035
|
if (!dryRun) {
|
|
12242
12036
|
await ensureDir(githubAgentsDir);
|
|
12243
|
-
await (0,
|
|
12244
|
-
await (0,
|
|
12037
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12038
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12245
12039
|
}
|
|
12246
12040
|
created.push(subagentDir);
|
|
12247
12041
|
subagentsProvisioned += 1;
|
|
@@ -12249,8 +12043,8 @@ async function provisionSubagents(options) {
|
|
|
12249
12043
|
}
|
|
12250
12044
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
12251
12045
|
await ensureDir(githubAgentsDir);
|
|
12252
|
-
await (0,
|
|
12253
|
-
await (0,
|
|
12046
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12047
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12254
12048
|
}
|
|
12255
12049
|
skippedExisting.push(subagentDir);
|
|
12256
12050
|
subagentsProvisioned += 1;
|
|
@@ -12258,15 +12052,15 @@ async function provisionSubagents(options) {
|
|
|
12258
12052
|
let nextIndex = highestNumber;
|
|
12259
12053
|
while (subagentsProvisioned < subagents) {
|
|
12260
12054
|
nextIndex += 1;
|
|
12261
|
-
const subagentDir =
|
|
12262
|
-
const githubAgentsDir =
|
|
12263
|
-
const workspaceDst =
|
|
12264
|
-
const wakeupDst =
|
|
12055
|
+
const subagentDir = import_node_path31.default.join(targetPath, `subagent-${nextIndex}`);
|
|
12056
|
+
const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
|
|
12057
|
+
const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
12058
|
+
const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
|
|
12265
12059
|
if (!dryRun) {
|
|
12266
12060
|
await ensureDir(subagentDir);
|
|
12267
12061
|
await ensureDir(githubAgentsDir);
|
|
12268
|
-
await (0,
|
|
12269
|
-
await (0,
|
|
12062
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12063
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12270
12064
|
}
|
|
12271
12065
|
created.push(subagentDir);
|
|
12272
12066
|
subagentsProvisioned += 1;
|
|
@@ -12328,7 +12122,7 @@ var VSCodeProvider = class {
|
|
|
12328
12122
|
}
|
|
12329
12123
|
await this.ensureEnvironmentReady();
|
|
12330
12124
|
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
12331
|
-
const promptContent = buildPromptDocument2(request, inputFiles
|
|
12125
|
+
const promptContent = buildPromptDocument2(request, inputFiles);
|
|
12332
12126
|
const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
|
|
12333
12127
|
const startTime = Date.now();
|
|
12334
12128
|
const session = await dispatchAgentSession({
|
|
@@ -12382,7 +12176,7 @@ var VSCodeProvider = class {
|
|
|
12382
12176
|
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
12383
12177
|
);
|
|
12384
12178
|
const userQueries = normalizedRequests.map(
|
|
12385
|
-
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles
|
|
12179
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
|
|
12386
12180
|
);
|
|
12387
12181
|
const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
|
|
12388
12182
|
this.config.workspaceTemplate
|
|
@@ -12451,9 +12245,9 @@ var VSCodeProvider = class {
|
|
|
12451
12245
|
async function locateVSCodeExecutable(candidate) {
|
|
12452
12246
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
12453
12247
|
if (includesPathSeparator) {
|
|
12454
|
-
const resolved =
|
|
12248
|
+
const resolved = import_node_path32.default.isAbsolute(candidate) ? candidate : import_node_path32.default.resolve(candidate);
|
|
12455
12249
|
try {
|
|
12456
|
-
await (0,
|
|
12250
|
+
await (0, import_promises23.access)(resolved, import_promises23.constants.F_OK);
|
|
12457
12251
|
return resolved;
|
|
12458
12252
|
} catch {
|
|
12459
12253
|
throw new Error(
|
|
@@ -12466,7 +12260,7 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
12466
12260
|
const { stdout } = await execAsync3(`${locator} ${candidate}`);
|
|
12467
12261
|
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
12468
12262
|
if (lines.length > 0 && lines[0]) {
|
|
12469
|
-
await (0,
|
|
12263
|
+
await (0, import_promises23.access)(lines[0], import_promises23.constants.F_OK);
|
|
12470
12264
|
return lines[0];
|
|
12471
12265
|
}
|
|
12472
12266
|
} catch {
|
|
@@ -12480,41 +12274,35 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
12480
12274
|
return void 0;
|
|
12481
12275
|
}
|
|
12482
12276
|
try {
|
|
12483
|
-
const stats = await (0,
|
|
12277
|
+
const stats = await (0, import_promises23.stat)(import_node_path32.default.resolve(template));
|
|
12484
12278
|
return stats.isFile() ? template : void 0;
|
|
12485
12279
|
} catch {
|
|
12486
12280
|
return template;
|
|
12487
12281
|
}
|
|
12488
12282
|
}
|
|
12489
|
-
function buildPromptDocument2(request, attachments
|
|
12283
|
+
function buildPromptDocument2(request, attachments) {
|
|
12490
12284
|
const parts = [];
|
|
12491
12285
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
12492
12286
|
parts.push(request.systemPrompt.trim());
|
|
12493
12287
|
}
|
|
12494
|
-
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
12495
12288
|
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
12496
|
-
const
|
|
12497
|
-
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
12289
|
+
const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
|
|
12498
12290
|
if (prereadBlock.length > 0) {
|
|
12499
12291
|
parts.push("\n", prereadBlock);
|
|
12500
12292
|
}
|
|
12501
12293
|
parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
|
|
12502
12294
|
return parts.join("\n").trim();
|
|
12503
12295
|
}
|
|
12504
|
-
function buildMandatoryPrereadBlock2(
|
|
12505
|
-
if (
|
|
12296
|
+
function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
12297
|
+
if (attachmentFiles.length === 0) {
|
|
12506
12298
|
return "";
|
|
12507
12299
|
}
|
|
12508
12300
|
const buildList = (files) => files.map((absolutePath) => {
|
|
12509
|
-
const fileName =
|
|
12301
|
+
const fileName = import_node_path32.default.basename(absolutePath);
|
|
12510
12302
|
const fileUri = pathToFileUri3(absolutePath);
|
|
12511
12303
|
return `* [${fileName}](${fileUri})`;
|
|
12512
12304
|
});
|
|
12513
12305
|
const sections = [];
|
|
12514
|
-
if (guidelineFiles.length > 0) {
|
|
12515
|
-
sections.push(`Read all guideline files:
|
|
12516
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
12517
|
-
}
|
|
12518
12306
|
if (attachmentFiles.length > 0) {
|
|
12519
12307
|
sections.push(`Read all attachment files:
|
|
12520
12308
|
${buildList(attachmentFiles).join("\n")}.`);
|
|
@@ -12525,29 +12313,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
|
|
|
12525
12313
|
);
|
|
12526
12314
|
return sections.join("\n");
|
|
12527
12315
|
}
|
|
12528
|
-
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
12529
|
-
if (!attachments || attachments.length === 0) {
|
|
12530
|
-
return [];
|
|
12531
|
-
}
|
|
12532
|
-
const unique = /* @__PURE__ */ new Map();
|
|
12533
|
-
for (const attachment of attachments) {
|
|
12534
|
-
const absolutePath = import_node_path33.default.resolve(attachment);
|
|
12535
|
-
const normalized = absolutePath.split(import_node_path33.default.sep).join("/");
|
|
12536
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
12537
|
-
if (!unique.has(absolutePath)) {
|
|
12538
|
-
unique.set(absolutePath, absolutePath);
|
|
12539
|
-
}
|
|
12540
|
-
}
|
|
12541
|
-
}
|
|
12542
|
-
return Array.from(unique.values());
|
|
12543
|
-
}
|
|
12544
12316
|
function collectAttachmentFiles(attachments) {
|
|
12545
12317
|
if (!attachments || attachments.length === 0) {
|
|
12546
12318
|
return [];
|
|
12547
12319
|
}
|
|
12548
12320
|
const unique = /* @__PURE__ */ new Map();
|
|
12549
12321
|
for (const attachment of attachments) {
|
|
12550
|
-
const absolutePath =
|
|
12322
|
+
const absolutePath = import_node_path32.default.resolve(attachment);
|
|
12551
12323
|
if (!unique.has(absolutePath)) {
|
|
12552
12324
|
unique.set(absolutePath, absolutePath);
|
|
12553
12325
|
}
|
|
@@ -12555,7 +12327,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
12555
12327
|
return Array.from(unique.values());
|
|
12556
12328
|
}
|
|
12557
12329
|
function pathToFileUri3(filePath) {
|
|
12558
|
-
const absolutePath =
|
|
12330
|
+
const absolutePath = import_node_path32.default.isAbsolute(filePath) ? filePath : import_node_path32.default.resolve(filePath);
|
|
12559
12331
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
12560
12332
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
12561
12333
|
return `file:///${normalizedPath}`;
|
|
@@ -12568,7 +12340,7 @@ function normalizeAttachments(attachments) {
|
|
|
12568
12340
|
}
|
|
12569
12341
|
const deduped = /* @__PURE__ */ new Set();
|
|
12570
12342
|
for (const attachment of attachments) {
|
|
12571
|
-
deduped.add(
|
|
12343
|
+
deduped.add(import_node_path32.default.resolve(attachment));
|
|
12572
12344
|
}
|
|
12573
12345
|
return Array.from(deduped);
|
|
12574
12346
|
}
|
|
@@ -12577,7 +12349,7 @@ function mergeAttachments(all) {
|
|
|
12577
12349
|
for (const list of all) {
|
|
12578
12350
|
if (!list) continue;
|
|
12579
12351
|
for (const inputFile of list) {
|
|
12580
|
-
deduped.add(
|
|
12352
|
+
deduped.add(import_node_path32.default.resolve(inputFile));
|
|
12581
12353
|
}
|
|
12582
12354
|
}
|
|
12583
12355
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -12625,8 +12397,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
12625
12397
|
|
|
12626
12398
|
// src/evaluation/providers/targets-file.ts
|
|
12627
12399
|
var import_node_fs10 = require("fs");
|
|
12628
|
-
var
|
|
12629
|
-
var
|
|
12400
|
+
var import_promises24 = require("fs/promises");
|
|
12401
|
+
var import_node_path33 = __toESM(require("path"), 1);
|
|
12630
12402
|
var import_yaml6 = require("yaml");
|
|
12631
12403
|
function isRecord(value) {
|
|
12632
12404
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -12656,18 +12428,18 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
12656
12428
|
}
|
|
12657
12429
|
async function fileExists3(filePath) {
|
|
12658
12430
|
try {
|
|
12659
|
-
await (0,
|
|
12431
|
+
await (0, import_promises24.access)(filePath, import_node_fs10.constants.F_OK);
|
|
12660
12432
|
return true;
|
|
12661
12433
|
} catch {
|
|
12662
12434
|
return false;
|
|
12663
12435
|
}
|
|
12664
12436
|
}
|
|
12665
12437
|
async function readTargetDefinitions(filePath) {
|
|
12666
|
-
const absolutePath =
|
|
12438
|
+
const absolutePath = import_node_path33.default.resolve(filePath);
|
|
12667
12439
|
if (!await fileExists3(absolutePath)) {
|
|
12668
12440
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
12669
12441
|
}
|
|
12670
|
-
const raw = await (0,
|
|
12442
|
+
const raw = await (0, import_promises24.readFile)(absolutePath, "utf8");
|
|
12671
12443
|
const parsed = (0, import_yaml6.parse)(raw);
|
|
12672
12444
|
if (!isRecord(parsed)) {
|
|
12673
12445
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -12683,16 +12455,16 @@ function listTargetNames(definitions) {
|
|
|
12683
12455
|
}
|
|
12684
12456
|
|
|
12685
12457
|
// src/evaluation/providers/provider-discovery.ts
|
|
12686
|
-
var
|
|
12458
|
+
var import_node_path34 = __toESM(require("path"), 1);
|
|
12687
12459
|
var import_fast_glob2 = __toESM(require("fast-glob"), 1);
|
|
12688
12460
|
async function discoverProviders(registry, baseDir) {
|
|
12689
12461
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
12690
12462
|
const candidateDirs = [];
|
|
12691
|
-
let dir =
|
|
12692
|
-
const root =
|
|
12463
|
+
let dir = import_node_path34.default.resolve(baseDir);
|
|
12464
|
+
const root = import_node_path34.default.parse(dir).root;
|
|
12693
12465
|
while (dir !== root) {
|
|
12694
|
-
candidateDirs.push(
|
|
12695
|
-
dir =
|
|
12466
|
+
candidateDirs.push(import_node_path34.default.join(dir, ".agentv", "providers"));
|
|
12467
|
+
dir = import_node_path34.default.dirname(dir);
|
|
12696
12468
|
}
|
|
12697
12469
|
let files = [];
|
|
12698
12470
|
for (const providersDir of candidateDirs) {
|
|
@@ -12708,7 +12480,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
12708
12480
|
}
|
|
12709
12481
|
const discoveredKinds = [];
|
|
12710
12482
|
for (const filePath of files) {
|
|
12711
|
-
const basename =
|
|
12483
|
+
const basename = import_node_path34.default.basename(filePath);
|
|
12712
12484
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
12713
12485
|
if (registry.has(kindName)) {
|
|
12714
12486
|
continue;
|
|
@@ -12815,9 +12587,9 @@ function negateScore(score) {
|
|
|
12815
12587
|
}
|
|
12816
12588
|
|
|
12817
12589
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
12818
|
-
var
|
|
12590
|
+
var import_promises25 = require("fs/promises");
|
|
12819
12591
|
var import_node_os5 = require("os");
|
|
12820
|
-
var
|
|
12592
|
+
var import_node_path35 = require("path");
|
|
12821
12593
|
|
|
12822
12594
|
// src/runtime/exec.ts
|
|
12823
12595
|
function shellEscapePath(value) {
|
|
@@ -12917,15 +12689,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
12917
12689
|
});
|
|
12918
12690
|
}
|
|
12919
12691
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
12920
|
-
const { mkdir: mkdir17, readFile:
|
|
12692
|
+
const { mkdir: mkdir17, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
12921
12693
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
12922
|
-
const
|
|
12694
|
+
const path47 = await import("path");
|
|
12923
12695
|
const { randomUUID: randomUUID9 } = await import("crypto");
|
|
12924
|
-
const dir =
|
|
12696
|
+
const dir = path47.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
12925
12697
|
await mkdir17(dir, { recursive: true });
|
|
12926
|
-
const stdinPath =
|
|
12927
|
-
const stdoutPath =
|
|
12928
|
-
const stderrPath =
|
|
12698
|
+
const stdinPath = path47.join(dir, "stdin.txt");
|
|
12699
|
+
const stdoutPath = path47.join(dir, "stdout.txt");
|
|
12700
|
+
const stderrPath = path47.join(dir, "stderr.txt");
|
|
12929
12701
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
12930
12702
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
12931
12703
|
const { spawn: spawn5 } = await import("child_process");
|
|
@@ -12955,8 +12727,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
12955
12727
|
resolve(code ?? 0);
|
|
12956
12728
|
});
|
|
12957
12729
|
});
|
|
12958
|
-
const stdout = (await
|
|
12959
|
-
const stderr = (await
|
|
12730
|
+
const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
12731
|
+
const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
12960
12732
|
return { stdout, stderr, exitCode };
|
|
12961
12733
|
} finally {
|
|
12962
12734
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -13261,9 +13033,9 @@ var CodeEvaluator = class {
|
|
|
13261
13033
|
if (outputForPayload) {
|
|
13262
13034
|
const serialized = JSON.stringify(outputForPayload);
|
|
13263
13035
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
13264
|
-
const tmpDir = await (0,
|
|
13265
|
-
outputPath = (0,
|
|
13266
|
-
await (0,
|
|
13036
|
+
const tmpDir = await (0, import_promises25.mkdtemp)((0, import_node_path35.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
|
|
13037
|
+
outputPath = (0, import_node_path35.join)(tmpDir, "output.json");
|
|
13038
|
+
await (0, import_promises25.writeFile)(outputPath, serialized);
|
|
13267
13039
|
outputForPayload = null;
|
|
13268
13040
|
}
|
|
13269
13041
|
}
|
|
@@ -13273,10 +13045,7 @@ var CodeEvaluator = class {
|
|
|
13273
13045
|
outputText: context2.candidate,
|
|
13274
13046
|
output: outputForPayload,
|
|
13275
13047
|
outputPath,
|
|
13276
|
-
|
|
13277
|
-
inputFiles: context2.evalCase.file_paths.filter(
|
|
13278
|
-
(path48) => !context2.evalCase.guideline_paths.includes(path48)
|
|
13279
|
-
),
|
|
13048
|
+
inputFiles: context2.evalCase.file_paths,
|
|
13280
13049
|
input: context2.evalCase.input,
|
|
13281
13050
|
trace: context2.trace ?? null,
|
|
13282
13051
|
tokenUsage: context2.tokenUsage ?? null,
|
|
@@ -13375,7 +13144,7 @@ var CodeEvaluator = class {
|
|
|
13375
13144
|
await proxyShutdown();
|
|
13376
13145
|
}
|
|
13377
13146
|
if (outputPath) {
|
|
13378
|
-
await (0,
|
|
13147
|
+
await (0, import_promises25.rm)((0, import_node_path35.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
|
|
13379
13148
|
});
|
|
13380
13149
|
}
|
|
13381
13150
|
}
|
|
@@ -13438,8 +13207,8 @@ function isAgentProvider(provider) {
|
|
|
13438
13207
|
}
|
|
13439
13208
|
|
|
13440
13209
|
// src/evaluation/evaluators/llm-grader.ts
|
|
13441
|
-
var
|
|
13442
|
-
var
|
|
13210
|
+
var import_promises26 = __toESM(require("fs/promises"), 1);
|
|
13211
|
+
var import_node_path36 = __toESM(require("path"), 1);
|
|
13443
13212
|
var import_ai2 = require("ai");
|
|
13444
13213
|
var import_zod4 = require("zod");
|
|
13445
13214
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -14271,8 +14040,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
14271
14040
|
};
|
|
14272
14041
|
}
|
|
14273
14042
|
function resolveSandboxed(basePath, relativePath) {
|
|
14274
|
-
const resolved =
|
|
14275
|
-
if (!resolved.startsWith(basePath +
|
|
14043
|
+
const resolved = import_node_path36.default.resolve(basePath, relativePath);
|
|
14044
|
+
if (!resolved.startsWith(basePath + import_node_path36.default.sep) && resolved !== basePath) {
|
|
14276
14045
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
14277
14046
|
}
|
|
14278
14047
|
return resolved;
|
|
@@ -14287,7 +14056,7 @@ function createFilesystemTools(workspacePath) {
|
|
|
14287
14056
|
execute: async (input) => {
|
|
14288
14057
|
try {
|
|
14289
14058
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
14290
|
-
const entries = await
|
|
14059
|
+
const entries = await import_promises26.default.readdir(resolved, { withFileTypes: true });
|
|
14291
14060
|
return entries.map((e) => ({
|
|
14292
14061
|
name: e.name,
|
|
14293
14062
|
type: e.isDirectory() ? "directory" : "file"
|
|
@@ -14305,12 +14074,12 @@ function createFilesystemTools(workspacePath) {
|
|
|
14305
14074
|
execute: async (input) => {
|
|
14306
14075
|
try {
|
|
14307
14076
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
14308
|
-
const stat8 = await
|
|
14077
|
+
const stat8 = await import_promises26.default.stat(resolved);
|
|
14309
14078
|
if (stat8.isDirectory()) {
|
|
14310
14079
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
14311
14080
|
}
|
|
14312
14081
|
const buffer = Buffer.alloc(Math.min(stat8.size, MAX_FILE_SIZE));
|
|
14313
|
-
const fd = await
|
|
14082
|
+
const fd = await import_promises26.default.open(resolved, "r");
|
|
14314
14083
|
try {
|
|
14315
14084
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
14316
14085
|
} finally {
|
|
@@ -14355,30 +14124,30 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
14355
14124
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14356
14125
|
let entries;
|
|
14357
14126
|
try {
|
|
14358
|
-
entries = await
|
|
14127
|
+
entries = await import_promises26.default.readdir(dirPath, { withFileTypes: true });
|
|
14359
14128
|
} catch {
|
|
14360
14129
|
return;
|
|
14361
14130
|
}
|
|
14362
14131
|
for (const entry of entries) {
|
|
14363
14132
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14364
14133
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
14365
|
-
const fullPath =
|
|
14134
|
+
const fullPath = import_node_path36.default.join(dirPath, entry.name);
|
|
14366
14135
|
if (entry.isDirectory()) {
|
|
14367
14136
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
14368
14137
|
} else if (entry.isFile()) {
|
|
14369
|
-
const ext =
|
|
14138
|
+
const ext = import_node_path36.default.extname(entry.name).toLowerCase();
|
|
14370
14139
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
14371
14140
|
try {
|
|
14372
|
-
const stat8 = await
|
|
14141
|
+
const stat8 = await import_promises26.default.stat(fullPath);
|
|
14373
14142
|
if (stat8.size > MAX_FILE_SIZE) continue;
|
|
14374
|
-
const content = await
|
|
14143
|
+
const content = await import_promises26.default.readFile(fullPath, "utf-8");
|
|
14375
14144
|
const lines = content.split("\n");
|
|
14376
14145
|
for (let i = 0; i < lines.length; i++) {
|
|
14377
14146
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14378
14147
|
regex.lastIndex = 0;
|
|
14379
14148
|
if (regex.test(lines[i])) {
|
|
14380
14149
|
matches.push({
|
|
14381
|
-
file:
|
|
14150
|
+
file: import_node_path36.default.relative(workspacePath, fullPath),
|
|
14382
14151
|
line: i + 1,
|
|
14383
14152
|
text: lines[i].substring(0, 200)
|
|
14384
14153
|
});
|
|
@@ -15013,115 +14782,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
15013
14782
|
* Evaluate a single field against the expected value.
|
|
15014
14783
|
*/
|
|
15015
14784
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
15016
|
-
const { path:
|
|
15017
|
-
const candidateValue = resolvePath(candidateData,
|
|
15018
|
-
const expectedValue = resolvePath(expectedData,
|
|
14785
|
+
const { path: path47, match, required = true, weight = 1 } = fieldConfig;
|
|
14786
|
+
const candidateValue = resolvePath(candidateData, path47);
|
|
14787
|
+
const expectedValue = resolvePath(expectedData, path47);
|
|
15019
14788
|
if (expectedValue === void 0) {
|
|
15020
14789
|
return {
|
|
15021
|
-
path:
|
|
14790
|
+
path: path47,
|
|
15022
14791
|
score: 1,
|
|
15023
14792
|
// No expected value means no comparison needed
|
|
15024
14793
|
weight,
|
|
15025
14794
|
hit: true,
|
|
15026
|
-
message: `${
|
|
14795
|
+
message: `${path47}: no expected value`
|
|
15027
14796
|
};
|
|
15028
14797
|
}
|
|
15029
14798
|
if (candidateValue === void 0) {
|
|
15030
14799
|
if (required) {
|
|
15031
14800
|
return {
|
|
15032
|
-
path:
|
|
14801
|
+
path: path47,
|
|
15033
14802
|
score: 0,
|
|
15034
14803
|
weight,
|
|
15035
14804
|
hit: false,
|
|
15036
|
-
message: `${
|
|
14805
|
+
message: `${path47} (required, missing)`
|
|
15037
14806
|
};
|
|
15038
14807
|
}
|
|
15039
14808
|
return {
|
|
15040
|
-
path:
|
|
14809
|
+
path: path47,
|
|
15041
14810
|
score: 1,
|
|
15042
14811
|
// Don't penalize missing optional fields
|
|
15043
14812
|
weight: 0,
|
|
15044
14813
|
// Zero weight means it won't affect the score
|
|
15045
14814
|
hit: true,
|
|
15046
|
-
message: `${
|
|
14815
|
+
message: `${path47}: optional field missing`
|
|
15047
14816
|
};
|
|
15048
14817
|
}
|
|
15049
14818
|
switch (match) {
|
|
15050
14819
|
case "exact":
|
|
15051
|
-
return this.compareExact(
|
|
14820
|
+
return this.compareExact(path47, candidateValue, expectedValue, weight);
|
|
15052
14821
|
case "numeric_tolerance":
|
|
15053
14822
|
return this.compareNumericTolerance(
|
|
15054
|
-
|
|
14823
|
+
path47,
|
|
15055
14824
|
candidateValue,
|
|
15056
14825
|
expectedValue,
|
|
15057
14826
|
fieldConfig,
|
|
15058
14827
|
weight
|
|
15059
14828
|
);
|
|
15060
14829
|
case "date":
|
|
15061
|
-
return this.compareDate(
|
|
14830
|
+
return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
|
|
15062
14831
|
default:
|
|
15063
14832
|
return {
|
|
15064
|
-
path:
|
|
14833
|
+
path: path47,
|
|
15065
14834
|
score: 0,
|
|
15066
14835
|
weight,
|
|
15067
14836
|
hit: false,
|
|
15068
|
-
message: `${
|
|
14837
|
+
message: `${path47}: unknown match type "${match}"`
|
|
15069
14838
|
};
|
|
15070
14839
|
}
|
|
15071
14840
|
}
|
|
15072
14841
|
/**
|
|
15073
14842
|
* Exact equality comparison.
|
|
15074
14843
|
*/
|
|
15075
|
-
compareExact(
|
|
14844
|
+
compareExact(path47, candidateValue, expectedValue, weight) {
|
|
15076
14845
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
15077
14846
|
return {
|
|
15078
|
-
path:
|
|
14847
|
+
path: path47,
|
|
15079
14848
|
score: 1,
|
|
15080
14849
|
weight,
|
|
15081
14850
|
hit: true,
|
|
15082
|
-
message:
|
|
14851
|
+
message: path47
|
|
15083
14852
|
};
|
|
15084
14853
|
}
|
|
15085
14854
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
15086
14855
|
return {
|
|
15087
|
-
path:
|
|
14856
|
+
path: path47,
|
|
15088
14857
|
score: 0,
|
|
15089
14858
|
weight,
|
|
15090
14859
|
hit: false,
|
|
15091
|
-
message: `${
|
|
14860
|
+
message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
15092
14861
|
};
|
|
15093
14862
|
}
|
|
15094
14863
|
return {
|
|
15095
|
-
path:
|
|
14864
|
+
path: path47,
|
|
15096
14865
|
score: 0,
|
|
15097
14866
|
weight,
|
|
15098
14867
|
hit: false,
|
|
15099
|
-
message: `${
|
|
14868
|
+
message: `${path47} (value mismatch)`
|
|
15100
14869
|
};
|
|
15101
14870
|
}
|
|
15102
14871
|
/**
|
|
15103
14872
|
* Numeric comparison with absolute or relative tolerance.
|
|
15104
14873
|
*/
|
|
15105
|
-
compareNumericTolerance(
|
|
14874
|
+
compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15106
14875
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
15107
14876
|
const candidateNum = toNumber(candidateValue);
|
|
15108
14877
|
const expectedNum = toNumber(expectedValue);
|
|
15109
14878
|
if (candidateNum === null || expectedNum === null) {
|
|
15110
14879
|
return {
|
|
15111
|
-
path:
|
|
14880
|
+
path: path47,
|
|
15112
14881
|
score: 0,
|
|
15113
14882
|
weight,
|
|
15114
14883
|
hit: false,
|
|
15115
|
-
message: `${
|
|
14884
|
+
message: `${path47} (non-numeric value)`
|
|
15116
14885
|
};
|
|
15117
14886
|
}
|
|
15118
14887
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
15119
14888
|
return {
|
|
15120
|
-
path:
|
|
14889
|
+
path: path47,
|
|
15121
14890
|
score: 0,
|
|
15122
14891
|
weight,
|
|
15123
14892
|
hit: false,
|
|
15124
|
-
message: `${
|
|
14893
|
+
message: `${path47} (invalid numeric value)`
|
|
15125
14894
|
};
|
|
15126
14895
|
}
|
|
15127
14896
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -15134,61 +14903,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
15134
14903
|
}
|
|
15135
14904
|
if (withinTolerance) {
|
|
15136
14905
|
return {
|
|
15137
|
-
path:
|
|
14906
|
+
path: path47,
|
|
15138
14907
|
score: 1,
|
|
15139
14908
|
weight,
|
|
15140
14909
|
hit: true,
|
|
15141
|
-
message: `${
|
|
14910
|
+
message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
|
|
15142
14911
|
};
|
|
15143
14912
|
}
|
|
15144
14913
|
return {
|
|
15145
|
-
path:
|
|
14914
|
+
path: path47,
|
|
15146
14915
|
score: 0,
|
|
15147
14916
|
weight,
|
|
15148
14917
|
hit: false,
|
|
15149
|
-
message: `${
|
|
14918
|
+
message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
15150
14919
|
};
|
|
15151
14920
|
}
|
|
15152
14921
|
/**
|
|
15153
14922
|
* Date comparison with format normalization.
|
|
15154
14923
|
*/
|
|
15155
|
-
compareDate(
|
|
14924
|
+
compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15156
14925
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
15157
14926
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
15158
14927
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
15159
14928
|
if (candidateDate === null) {
|
|
15160
14929
|
return {
|
|
15161
|
-
path:
|
|
14930
|
+
path: path47,
|
|
15162
14931
|
score: 0,
|
|
15163
14932
|
weight,
|
|
15164
14933
|
hit: false,
|
|
15165
|
-
message: `${
|
|
14934
|
+
message: `${path47} (unparseable candidate date)`
|
|
15166
14935
|
};
|
|
15167
14936
|
}
|
|
15168
14937
|
if (expectedDate === null) {
|
|
15169
14938
|
return {
|
|
15170
|
-
path:
|
|
14939
|
+
path: path47,
|
|
15171
14940
|
score: 0,
|
|
15172
14941
|
weight,
|
|
15173
14942
|
hit: false,
|
|
15174
|
-
message: `${
|
|
14943
|
+
message: `${path47} (unparseable expected date)`
|
|
15175
14944
|
};
|
|
15176
14945
|
}
|
|
15177
14946
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
15178
14947
|
return {
|
|
15179
|
-
path:
|
|
14948
|
+
path: path47,
|
|
15180
14949
|
score: 1,
|
|
15181
14950
|
weight,
|
|
15182
14951
|
hit: true,
|
|
15183
|
-
message:
|
|
14952
|
+
message: path47
|
|
15184
14953
|
};
|
|
15185
14954
|
}
|
|
15186
14955
|
return {
|
|
15187
|
-
path:
|
|
14956
|
+
path: path47,
|
|
15188
14957
|
score: 0,
|
|
15189
14958
|
weight,
|
|
15190
14959
|
hit: false,
|
|
15191
|
-
message: `${
|
|
14960
|
+
message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
15192
14961
|
};
|
|
15193
14962
|
}
|
|
15194
14963
|
/**
|
|
@@ -15221,11 +14990,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
15221
14990
|
};
|
|
15222
14991
|
}
|
|
15223
14992
|
};
|
|
15224
|
-
function resolvePath(obj,
|
|
15225
|
-
if (!
|
|
14993
|
+
function resolvePath(obj, path47) {
|
|
14994
|
+
if (!path47 || !obj) {
|
|
15226
14995
|
return void 0;
|
|
15227
14996
|
}
|
|
15228
|
-
const parts =
|
|
14997
|
+
const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
15229
14998
|
let current = obj;
|
|
15230
14999
|
for (const part of parts) {
|
|
15231
15000
|
if (current === null || current === void 0) {
|
|
@@ -15685,8 +15454,8 @@ var TokenUsageEvaluator = class {
|
|
|
15685
15454
|
};
|
|
15686
15455
|
|
|
15687
15456
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
15688
|
-
function getNestedValue(obj,
|
|
15689
|
-
const parts =
|
|
15457
|
+
function getNestedValue(obj, path47) {
|
|
15458
|
+
const parts = path47.split(".");
|
|
15690
15459
|
let current = obj;
|
|
15691
15460
|
for (const part of parts) {
|
|
15692
15461
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -16307,9 +16076,9 @@ function runEqualsAssertion(output, value) {
|
|
|
16307
16076
|
|
|
16308
16077
|
// src/evaluation/orchestrator.ts
|
|
16309
16078
|
var import_node_crypto10 = require("crypto");
|
|
16310
|
-
var
|
|
16311
|
-
var
|
|
16312
|
-
var
|
|
16079
|
+
var import_promises30 = require("fs/promises");
|
|
16080
|
+
var import_node_path45 = __toESM(require("path"), 1);
|
|
16081
|
+
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
16313
16082
|
|
|
16314
16083
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
16315
16084
|
var Node = class {
|
|
@@ -16522,7 +16291,7 @@ var InlineAssertEvaluator = class {
|
|
|
16522
16291
|
};
|
|
16523
16292
|
|
|
16524
16293
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
16525
|
-
var
|
|
16294
|
+
var import_node_path37 = __toESM(require("path"), 1);
|
|
16526
16295
|
async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
16527
16296
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
16528
16297
|
if (!context2) {
|
|
@@ -16557,10 +16326,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
16557
16326
|
expectedOutput: context2.evalCase.expected_output,
|
|
16558
16327
|
outputText: context2.candidate,
|
|
16559
16328
|
output: context2.output ?? null,
|
|
16560
|
-
|
|
16561
|
-
inputFiles: context2.evalCase.file_paths.filter(
|
|
16562
|
-
(p) => !context2.evalCase.guideline_paths.includes(p)
|
|
16563
|
-
),
|
|
16329
|
+
inputFiles: context2.evalCase.file_paths,
|
|
16564
16330
|
input: context2.evalCase.input,
|
|
16565
16331
|
trace: context2.trace ?? null,
|
|
16566
16332
|
fileChanges: context2.fileChanges ?? null,
|
|
@@ -16571,7 +16337,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
16571
16337
|
};
|
|
16572
16338
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
16573
16339
|
const scriptPath = script[script.length - 1];
|
|
16574
|
-
const cwd =
|
|
16340
|
+
const cwd = import_node_path37.default.dirname(scriptPath);
|
|
16575
16341
|
try {
|
|
16576
16342
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
16577
16343
|
const prompt = stdout.trim();
|
|
@@ -16843,16 +16609,16 @@ function createBuiltinRegistry() {
|
|
|
16843
16609
|
}
|
|
16844
16610
|
|
|
16845
16611
|
// src/evaluation/registry/assertion-discovery.ts
|
|
16846
|
-
var
|
|
16612
|
+
var import_node_path38 = __toESM(require("path"), 1);
|
|
16847
16613
|
var import_fast_glob3 = __toESM(require("fast-glob"), 1);
|
|
16848
16614
|
async function discoverAssertions(registry, baseDir) {
|
|
16849
16615
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
16850
16616
|
const candidateDirs = [];
|
|
16851
|
-
let dir =
|
|
16852
|
-
const root =
|
|
16617
|
+
let dir = import_node_path38.default.resolve(baseDir);
|
|
16618
|
+
const root = import_node_path38.default.parse(dir).root;
|
|
16853
16619
|
while (dir !== root) {
|
|
16854
|
-
candidateDirs.push(
|
|
16855
|
-
dir =
|
|
16620
|
+
candidateDirs.push(import_node_path38.default.join(dir, ".agentv", "assertions"));
|
|
16621
|
+
dir = import_node_path38.default.dirname(dir);
|
|
16856
16622
|
}
|
|
16857
16623
|
let files = [];
|
|
16858
16624
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -16868,7 +16634,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
16868
16634
|
}
|
|
16869
16635
|
const discoveredTypes = [];
|
|
16870
16636
|
for (const filePath of files) {
|
|
16871
|
-
const basename =
|
|
16637
|
+
const basename = import_node_path38.default.basename(filePath);
|
|
16872
16638
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
16873
16639
|
if (registry.has(typeName)) {
|
|
16874
16640
|
continue;
|
|
@@ -16886,17 +16652,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
16886
16652
|
}
|
|
16887
16653
|
|
|
16888
16654
|
// src/evaluation/registry/grader-discovery.ts
|
|
16889
|
-
var
|
|
16655
|
+
var import_node_path39 = __toESM(require("path"), 1);
|
|
16890
16656
|
var import_fast_glob4 = __toESM(require("fast-glob"), 1);
|
|
16891
16657
|
async function discoverGraders(registry, baseDir) {
|
|
16892
16658
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
16893
16659
|
const candidateDirs = [];
|
|
16894
|
-
let dir =
|
|
16895
|
-
const root =
|
|
16660
|
+
let dir = import_node_path39.default.resolve(baseDir);
|
|
16661
|
+
const root = import_node_path39.default.parse(dir).root;
|
|
16896
16662
|
while (dir !== root) {
|
|
16897
|
-
candidateDirs.push(
|
|
16898
|
-
candidateDirs.push(
|
|
16899
|
-
dir =
|
|
16663
|
+
candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "graders"));
|
|
16664
|
+
candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "judges"));
|
|
16665
|
+
dir = import_node_path39.default.dirname(dir);
|
|
16900
16666
|
}
|
|
16901
16667
|
let files = [];
|
|
16902
16668
|
for (const gradersDir of candidateDirs) {
|
|
@@ -16912,7 +16678,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
16912
16678
|
}
|
|
16913
16679
|
const discoveredTypes = [];
|
|
16914
16680
|
for (const filePath of files) {
|
|
16915
|
-
const basename =
|
|
16681
|
+
const basename = import_node_path39.default.basename(filePath);
|
|
16916
16682
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
16917
16683
|
if (registry.has(typeName)) {
|
|
16918
16684
|
continue;
|
|
@@ -17072,7 +16838,7 @@ function getTCritical(df) {
|
|
|
17072
16838
|
// src/evaluation/workspace/file-changes.ts
|
|
17073
16839
|
var import_node_child_process7 = require("child_process");
|
|
17074
16840
|
var import_node_fs11 = require("fs");
|
|
17075
|
-
var
|
|
16841
|
+
var import_node_path40 = __toESM(require("path"), 1);
|
|
17076
16842
|
var import_node_util4 = require("util");
|
|
17077
16843
|
var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process7.exec);
|
|
17078
16844
|
function gitExecOpts(workspacePath) {
|
|
@@ -17106,10 +16872,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
17106
16872
|
}
|
|
17107
16873
|
for (const entry of entries) {
|
|
17108
16874
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
17109
|
-
const childPath =
|
|
16875
|
+
const childPath = import_node_path40.default.join(workspacePath, entry);
|
|
17110
16876
|
try {
|
|
17111
16877
|
if (!(0, import_node_fs11.statSync)(childPath).isDirectory()) continue;
|
|
17112
|
-
if (!(0, import_node_fs11.statSync)(
|
|
16878
|
+
if (!(0, import_node_fs11.statSync)(import_node_path40.default.join(childPath, ".git")).isDirectory()) continue;
|
|
17113
16879
|
} catch {
|
|
17114
16880
|
continue;
|
|
17115
16881
|
}
|
|
@@ -17119,8 +16885,8 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
17119
16885
|
}
|
|
17120
16886
|
|
|
17121
16887
|
// src/evaluation/workspace/manager.ts
|
|
17122
|
-
var
|
|
17123
|
-
var
|
|
16888
|
+
var import_promises27 = require("fs/promises");
|
|
16889
|
+
var import_node_path41 = __toESM(require("path"), 1);
|
|
17124
16890
|
var TemplateNotFoundError = class extends Error {
|
|
17125
16891
|
constructor(templatePath) {
|
|
17126
16892
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -17142,7 +16908,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
17142
16908
|
};
|
|
17143
16909
|
async function isDirectory(filePath) {
|
|
17144
16910
|
try {
|
|
17145
|
-
const stats = await (0,
|
|
16911
|
+
const stats = await (0, import_promises27.stat)(filePath);
|
|
17146
16912
|
return stats.isDirectory();
|
|
17147
16913
|
} catch {
|
|
17148
16914
|
return false;
|
|
@@ -17150,26 +16916,26 @@ async function isDirectory(filePath) {
|
|
|
17150
16916
|
}
|
|
17151
16917
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
17152
16918
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
17153
|
-
return
|
|
16919
|
+
return import_node_path41.default.join(root, evalRunId, caseId);
|
|
17154
16920
|
}
|
|
17155
16921
|
async function copyDirectoryRecursive(src, dest) {
|
|
17156
|
-
await (0,
|
|
17157
|
-
const entries = await (0,
|
|
16922
|
+
await (0, import_promises27.mkdir)(dest, { recursive: true });
|
|
16923
|
+
const entries = await (0, import_promises27.readdir)(src, { withFileTypes: true });
|
|
17158
16924
|
for (const entry of entries) {
|
|
17159
|
-
const srcPath =
|
|
17160
|
-
const destPath =
|
|
16925
|
+
const srcPath = import_node_path41.default.join(src, entry.name);
|
|
16926
|
+
const destPath = import_node_path41.default.join(dest, entry.name);
|
|
17161
16927
|
if (entry.name === ".git") {
|
|
17162
16928
|
continue;
|
|
17163
16929
|
}
|
|
17164
16930
|
if (entry.isDirectory()) {
|
|
17165
16931
|
await copyDirectoryRecursive(srcPath, destPath);
|
|
17166
16932
|
} else {
|
|
17167
|
-
await (0,
|
|
16933
|
+
await (0, import_promises27.cp)(srcPath, destPath, { preserveTimestamps: true });
|
|
17168
16934
|
}
|
|
17169
16935
|
}
|
|
17170
16936
|
}
|
|
17171
16937
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
17172
|
-
const resolvedTemplatePath =
|
|
16938
|
+
const resolvedTemplatePath = import_node_path41.default.resolve(templatePath);
|
|
17173
16939
|
if (!await fileExists2(resolvedTemplatePath)) {
|
|
17174
16940
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
17175
16941
|
}
|
|
@@ -17179,7 +16945,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
17179
16945
|
const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
|
|
17180
16946
|
try {
|
|
17181
16947
|
if (await fileExists2(workspacePath)) {
|
|
17182
|
-
await (0,
|
|
16948
|
+
await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
|
|
17183
16949
|
}
|
|
17184
16950
|
await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
|
|
17185
16951
|
return workspacePath;
|
|
@@ -17213,14 +16979,14 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
17213
16979
|
}
|
|
17214
16980
|
async function cleanupWorkspace(workspacePath) {
|
|
17215
16981
|
if (await fileExists2(workspacePath)) {
|
|
17216
|
-
await (0,
|
|
16982
|
+
await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
|
|
17217
16983
|
}
|
|
17218
16984
|
}
|
|
17219
16985
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
17220
16986
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
17221
|
-
const evalDir =
|
|
16987
|
+
const evalDir = import_node_path41.default.join(root, evalRunId);
|
|
17222
16988
|
if (await fileExists2(evalDir)) {
|
|
17223
|
-
await (0,
|
|
16989
|
+
await (0, import_promises27.rm)(evalDir, { recursive: true, force: true });
|
|
17224
16990
|
}
|
|
17225
16991
|
}
|
|
17226
16992
|
|
|
@@ -17228,8 +16994,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
17228
16994
|
var import_node_child_process8 = require("child_process");
|
|
17229
16995
|
var import_node_crypto9 = require("crypto");
|
|
17230
16996
|
var import_node_fs12 = require("fs");
|
|
17231
|
-
var
|
|
17232
|
-
var
|
|
16997
|
+
var import_promises28 = require("fs/promises");
|
|
16998
|
+
var import_node_path42 = __toESM(require("path"), 1);
|
|
17233
16999
|
var import_node_util5 = require("util");
|
|
17234
17000
|
var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process8.execFile);
|
|
17235
17001
|
function gitEnv() {
|
|
@@ -17280,11 +17046,11 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
17280
17046
|
return (0, import_node_crypto9.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
17281
17047
|
}
|
|
17282
17048
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
17283
|
-
await (0,
|
|
17284
|
-
const entries = await (0,
|
|
17049
|
+
await (0, import_promises28.mkdir)(dest, { recursive: true });
|
|
17050
|
+
const entries = await (0, import_promises28.readdir)(src, { withFileTypes: true });
|
|
17285
17051
|
for (const entry of entries) {
|
|
17286
|
-
const srcPath =
|
|
17287
|
-
const destPath =
|
|
17052
|
+
const srcPath = import_node_path42.default.join(src, entry.name);
|
|
17053
|
+
const destPath = import_node_path42.default.join(dest, entry.name);
|
|
17288
17054
|
if (entry.name === ".git") {
|
|
17289
17055
|
continue;
|
|
17290
17056
|
}
|
|
@@ -17294,7 +17060,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
17294
17060
|
}
|
|
17295
17061
|
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
17296
17062
|
} else {
|
|
17297
|
-
await (0,
|
|
17063
|
+
await (0, import_promises28.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
17298
17064
|
}
|
|
17299
17065
|
}
|
|
17300
17066
|
}
|
|
@@ -17317,8 +17083,8 @@ var WorkspacePoolManager = class {
|
|
|
17317
17083
|
async acquireWorkspace(options) {
|
|
17318
17084
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
17319
17085
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
17320
|
-
const poolDir =
|
|
17321
|
-
await (0,
|
|
17086
|
+
const poolDir = import_node_path42.default.join(this.poolRoot, fingerprint);
|
|
17087
|
+
await (0, import_promises28.mkdir)(poolDir, { recursive: true });
|
|
17322
17088
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
17323
17089
|
if (drifted) {
|
|
17324
17090
|
console.warn(
|
|
@@ -17327,7 +17093,7 @@ var WorkspacePoolManager = class {
|
|
|
17327
17093
|
await this.removeAllSlots(poolDir);
|
|
17328
17094
|
}
|
|
17329
17095
|
for (let i = 0; i < maxSlots; i++) {
|
|
17330
|
-
const slotPath =
|
|
17096
|
+
const slotPath = import_node_path42.default.join(poolDir, `slot-${i}`);
|
|
17331
17097
|
const lockPath = `${slotPath}.lock`;
|
|
17332
17098
|
const locked = await this.tryLock(lockPath);
|
|
17333
17099
|
if (!locked) {
|
|
@@ -17345,7 +17111,7 @@ var WorkspacePoolManager = class {
|
|
|
17345
17111
|
poolDir
|
|
17346
17112
|
};
|
|
17347
17113
|
}
|
|
17348
|
-
await (0,
|
|
17114
|
+
await (0, import_promises28.mkdir)(slotPath, { recursive: true });
|
|
17349
17115
|
if (templatePath) {
|
|
17350
17116
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
17351
17117
|
}
|
|
@@ -17369,7 +17135,7 @@ var WorkspacePoolManager = class {
|
|
|
17369
17135
|
/** Remove lock file to release a slot. */
|
|
17370
17136
|
async releaseSlot(slot) {
|
|
17371
17137
|
try {
|
|
17372
|
-
await (0,
|
|
17138
|
+
await (0, import_promises28.unlink)(slot.lockPath);
|
|
17373
17139
|
} catch {
|
|
17374
17140
|
}
|
|
17375
17141
|
}
|
|
@@ -17382,21 +17148,21 @@ var WorkspacePoolManager = class {
|
|
|
17382
17148
|
async tryLock(lockPath) {
|
|
17383
17149
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
17384
17150
|
try {
|
|
17385
|
-
await (0,
|
|
17151
|
+
await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
17386
17152
|
return true;
|
|
17387
17153
|
} catch (err) {
|
|
17388
17154
|
if (err.code !== "EEXIST") {
|
|
17389
17155
|
throw err;
|
|
17390
17156
|
}
|
|
17391
17157
|
try {
|
|
17392
|
-
const pidStr = await (0,
|
|
17158
|
+
const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
|
|
17393
17159
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
17394
17160
|
if (!Number.isNaN(pid)) {
|
|
17395
17161
|
try {
|
|
17396
17162
|
process.kill(pid, 0);
|
|
17397
17163
|
return false;
|
|
17398
17164
|
} catch {
|
|
17399
|
-
await (0,
|
|
17165
|
+
await (0, import_promises28.unlink)(lockPath).catch(() => {
|
|
17400
17166
|
});
|
|
17401
17167
|
continue;
|
|
17402
17168
|
}
|
|
@@ -17414,9 +17180,9 @@ var WorkspacePoolManager = class {
|
|
|
17414
17180
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
17415
17181
|
*/
|
|
17416
17182
|
async checkDrift(poolDir, fingerprint) {
|
|
17417
|
-
const metadataPath =
|
|
17183
|
+
const metadataPath = import_node_path42.default.join(poolDir, "metadata.json");
|
|
17418
17184
|
try {
|
|
17419
|
-
const raw = await (0,
|
|
17185
|
+
const raw = await (0, import_promises28.readFile)(metadataPath, "utf-8");
|
|
17420
17186
|
const metadata = JSON.parse(raw);
|
|
17421
17187
|
return metadata.fingerprint !== fingerprint;
|
|
17422
17188
|
} catch {
|
|
@@ -17431,17 +17197,17 @@ var WorkspacePoolManager = class {
|
|
|
17431
17197
|
repos,
|
|
17432
17198
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
17433
17199
|
};
|
|
17434
|
-
await (0,
|
|
17200
|
+
await (0, import_promises28.writeFile)(import_node_path42.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
17435
17201
|
}
|
|
17436
17202
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
17437
17203
|
async removeAllSlots(poolDir) {
|
|
17438
|
-
const entries = await (0,
|
|
17204
|
+
const entries = await (0, import_promises28.readdir)(poolDir);
|
|
17439
17205
|
for (const entry of entries) {
|
|
17440
17206
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
17441
|
-
const lockPath =
|
|
17207
|
+
const lockPath = import_node_path42.default.join(poolDir, `${entry}.lock`);
|
|
17442
17208
|
if ((0, import_node_fs12.existsSync)(lockPath)) {
|
|
17443
17209
|
try {
|
|
17444
|
-
const pidStr = await (0,
|
|
17210
|
+
const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
|
|
17445
17211
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
17446
17212
|
if (!Number.isNaN(pid)) {
|
|
17447
17213
|
try {
|
|
@@ -17454,12 +17220,12 @@ var WorkspacePoolManager = class {
|
|
|
17454
17220
|
} catch {
|
|
17455
17221
|
}
|
|
17456
17222
|
}
|
|
17457
|
-
await (0,
|
|
17458
|
-
await (0,
|
|
17223
|
+
await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, entry), { recursive: true, force: true });
|
|
17224
|
+
await (0, import_promises28.rm)(lockPath, { force: true }).catch(() => {
|
|
17459
17225
|
});
|
|
17460
17226
|
}
|
|
17461
17227
|
}
|
|
17462
|
-
await (0,
|
|
17228
|
+
await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
17463
17229
|
});
|
|
17464
17230
|
}
|
|
17465
17231
|
/**
|
|
@@ -17469,7 +17235,7 @@ var WorkspacePoolManager = class {
|
|
|
17469
17235
|
*/
|
|
17470
17236
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
17471
17237
|
for (const repo of repos) {
|
|
17472
|
-
const repoDir =
|
|
17238
|
+
const repoDir = import_node_path42.default.join(slotPath, repo.path);
|
|
17473
17239
|
if (!(0, import_node_fs12.existsSync)(repoDir)) {
|
|
17474
17240
|
continue;
|
|
17475
17241
|
}
|
|
@@ -17496,7 +17262,7 @@ var WorkspacePoolManager = class {
|
|
|
17496
17262
|
// src/evaluation/workspace/repo-manager.ts
|
|
17497
17263
|
var import_node_child_process9 = require("child_process");
|
|
17498
17264
|
var import_node_fs13 = require("fs");
|
|
17499
|
-
var
|
|
17265
|
+
var import_node_path43 = __toESM(require("path"), 1);
|
|
17500
17266
|
var import_node_util6 = require("util");
|
|
17501
17267
|
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process9.execFile);
|
|
17502
17268
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -17596,7 +17362,7 @@ ${lines.join("\n")}`;
|
|
|
17596
17362
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
17597
17363
|
*/
|
|
17598
17364
|
async materialize(repo, workspacePath) {
|
|
17599
|
-
const targetDir =
|
|
17365
|
+
const targetDir = import_node_path43.default.join(workspacePath, repo.path);
|
|
17600
17366
|
const sourceUrl = getSourceUrl(repo.source);
|
|
17601
17367
|
const startedAt = Date.now();
|
|
17602
17368
|
if (this.verbose) {
|
|
@@ -17687,7 +17453,7 @@ ${lines.join("\n")}`;
|
|
|
17687
17453
|
async reset(repos, workspacePath, reset) {
|
|
17688
17454
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
17689
17455
|
for (const repo of repos) {
|
|
17690
|
-
const targetDir =
|
|
17456
|
+
const targetDir = import_node_path43.default.join(workspacePath, repo.path);
|
|
17691
17457
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
17692
17458
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
17693
17459
|
}
|
|
@@ -17695,36 +17461,36 @@ ${lines.join("\n")}`;
|
|
|
17695
17461
|
};
|
|
17696
17462
|
|
|
17697
17463
|
// src/evaluation/workspace/resolve.ts
|
|
17698
|
-
var
|
|
17699
|
-
var
|
|
17464
|
+
var import_promises29 = require("fs/promises");
|
|
17465
|
+
var import_node_path44 = __toESM(require("path"), 1);
|
|
17700
17466
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
17701
17467
|
if (!templatePath) {
|
|
17702
17468
|
return void 0;
|
|
17703
17469
|
}
|
|
17704
|
-
const resolved =
|
|
17705
|
-
const stats = await (0,
|
|
17470
|
+
const resolved = import_node_path44.default.resolve(templatePath);
|
|
17471
|
+
const stats = await (0, import_promises29.stat)(resolved);
|
|
17706
17472
|
if (stats.isFile()) {
|
|
17707
17473
|
return {
|
|
17708
|
-
dir:
|
|
17474
|
+
dir: import_node_path44.default.dirname(resolved),
|
|
17709
17475
|
workspaceFile: resolved
|
|
17710
17476
|
};
|
|
17711
17477
|
}
|
|
17712
17478
|
if (!stats.isDirectory()) {
|
|
17713
17479
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
17714
17480
|
}
|
|
17715
|
-
const entries = await (0,
|
|
17481
|
+
const entries = await (0, import_promises29.readdir)(resolved);
|
|
17716
17482
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
17717
17483
|
if (workspaceFiles.length === 1) {
|
|
17718
17484
|
return {
|
|
17719
17485
|
dir: resolved,
|
|
17720
|
-
workspaceFile:
|
|
17486
|
+
workspaceFile: import_node_path44.default.join(resolved, workspaceFiles[0])
|
|
17721
17487
|
};
|
|
17722
17488
|
}
|
|
17723
17489
|
if (workspaceFiles.length > 1) {
|
|
17724
17490
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
17725
17491
|
return {
|
|
17726
17492
|
dir: resolved,
|
|
17727
|
-
workspaceFile: conventionFile ?
|
|
17493
|
+
workspaceFile: conventionFile ? import_node_path44.default.join(resolved, conventionFile) : void 0
|
|
17728
17494
|
};
|
|
17729
17495
|
}
|
|
17730
17496
|
return { dir: resolved };
|
|
@@ -17928,7 +17694,7 @@ async function runEvaluation(options) {
|
|
|
17928
17694
|
];
|
|
17929
17695
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
17930
17696
|
const typeRegistry = createBuiltinRegistry();
|
|
17931
|
-
const discoveryBaseDir = evalFilePath ?
|
|
17697
|
+
const discoveryBaseDir = evalFilePath ? import_node_path45.default.dirname(import_node_path45.default.resolve(evalFilePath)) : process.cwd();
|
|
17932
17698
|
const evalDir = discoveryBaseDir;
|
|
17933
17699
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
17934
17700
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -18037,14 +17803,22 @@ async function runEvaluation(options) {
|
|
|
18037
17803
|
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
18038
17804
|
const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
18039
17805
|
const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
18040
|
-
const
|
|
18041
|
-
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
17806
|
+
const workers = options.maxConcurrency ?? target.workers ?? 1;
|
|
18042
17807
|
setupLog(
|
|
18043
|
-
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool}
|
|
17808
|
+
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} workers=${workers}`
|
|
18044
17809
|
);
|
|
18045
|
-
if (hasSharedWorkspace && !usePool &&
|
|
17810
|
+
if (hasSharedWorkspace && !usePool && workers > 1) {
|
|
18046
17811
|
console.warn(
|
|
18047
|
-
|
|
17812
|
+
[
|
|
17813
|
+
`Warning: This eval uses a shared workspace with ${workers} workers.`,
|
|
17814
|
+
"If the agent under test makes file edits, concurrent runs may corrupt each other.",
|
|
17815
|
+
"To limit concurrency, add this to your eval YAML:",
|
|
17816
|
+
"",
|
|
17817
|
+
" execution:",
|
|
17818
|
+
" workers: 1",
|
|
17819
|
+
"",
|
|
17820
|
+
"Or pass --workers 1 on the command line."
|
|
17821
|
+
].join("\n")
|
|
18048
17822
|
);
|
|
18049
17823
|
}
|
|
18050
17824
|
const limit = pLimit(workers);
|
|
@@ -18060,14 +17834,14 @@ async function runEvaluation(options) {
|
|
|
18060
17834
|
let staticMaterialised = false;
|
|
18061
17835
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
18062
17836
|
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
18063
|
-
const dirExists = await (0,
|
|
17837
|
+
const dirExists = await (0, import_promises30.stat)(configuredStaticPath).then(
|
|
18064
17838
|
(s) => s.isDirectory(),
|
|
18065
17839
|
() => false
|
|
18066
17840
|
);
|
|
18067
|
-
const isEmpty = dirExists ? (await (0,
|
|
17841
|
+
const isEmpty = dirExists ? (await (0, import_promises30.readdir)(configuredStaticPath)).length === 0 : false;
|
|
18068
17842
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
18069
17843
|
if (!dirExists) {
|
|
18070
|
-
await (0,
|
|
17844
|
+
await (0, import_promises30.mkdir)(configuredStaticPath, { recursive: true });
|
|
18071
17845
|
}
|
|
18072
17846
|
if (workspaceTemplate) {
|
|
18073
17847
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -18112,14 +17886,14 @@ async function runEvaluation(options) {
|
|
|
18112
17886
|
}
|
|
18113
17887
|
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
18114
17888
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
18115
|
-
await (0,
|
|
17889
|
+
await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
18116
17890
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
18117
17891
|
}
|
|
18118
17892
|
try {
|
|
18119
17893
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
18120
|
-
const copiedWorkspaceFile =
|
|
17894
|
+
const copiedWorkspaceFile = import_node_path45.default.join(sharedWorkspacePath, import_node_path45.default.basename(suiteWorkspaceFile));
|
|
18121
17895
|
try {
|
|
18122
|
-
await (0,
|
|
17896
|
+
await (0, import_promises30.stat)(copiedWorkspaceFile);
|
|
18123
17897
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
18124
17898
|
} catch {
|
|
18125
17899
|
}
|
|
@@ -18227,7 +18001,7 @@ async function runEvaluation(options) {
|
|
|
18227
18001
|
const budgetResult = {
|
|
18228
18002
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
18229
18003
|
testId: evalCase.id,
|
|
18230
|
-
|
|
18004
|
+
eval_set: evalCase.eval_set,
|
|
18231
18005
|
score: 0,
|
|
18232
18006
|
assertions: [],
|
|
18233
18007
|
output: [],
|
|
@@ -18263,7 +18037,7 @@ async function runEvaluation(options) {
|
|
|
18263
18037
|
const haltResult = {
|
|
18264
18038
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
18265
18039
|
testId: evalCase.id,
|
|
18266
|
-
|
|
18040
|
+
eval_set: evalCase.eval_set,
|
|
18267
18041
|
score: 0,
|
|
18268
18042
|
assertions: [],
|
|
18269
18043
|
output: [],
|
|
@@ -18495,8 +18269,6 @@ async function runBatchEvaluation(options) {
|
|
|
18495
18269
|
const promptInputs = promptInputsList[index];
|
|
18496
18270
|
return {
|
|
18497
18271
|
question: promptInputs.question,
|
|
18498
|
-
guidelines: promptInputs.guidelines,
|
|
18499
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
18500
18272
|
inputFiles: evalCase.file_paths,
|
|
18501
18273
|
evalCaseId: evalCase.id,
|
|
18502
18274
|
metadata: {
|
|
@@ -18694,9 +18466,9 @@ async function runEvalCase(options) {
|
|
|
18694
18466
|
);
|
|
18695
18467
|
}
|
|
18696
18468
|
if (caseWorkspaceFile && workspacePath) {
|
|
18697
|
-
const copiedFile =
|
|
18469
|
+
const copiedFile = import_node_path45.default.join(workspacePath, import_node_path45.default.basename(caseWorkspaceFile));
|
|
18698
18470
|
try {
|
|
18699
|
-
await (0,
|
|
18471
|
+
await (0, import_promises30.stat)(copiedFile);
|
|
18700
18472
|
caseWorkspaceFile = copiedFile;
|
|
18701
18473
|
} catch {
|
|
18702
18474
|
}
|
|
@@ -18704,7 +18476,7 @@ async function runEvalCase(options) {
|
|
|
18704
18476
|
}
|
|
18705
18477
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
18706
18478
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
18707
|
-
await (0,
|
|
18479
|
+
await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
|
|
18708
18480
|
}
|
|
18709
18481
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
18710
18482
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -18754,11 +18526,11 @@ async function runEvalCase(options) {
|
|
|
18754
18526
|
const files = evalCase.metadata.agent_skills_files;
|
|
18755
18527
|
if (baseDir && files.length > 0) {
|
|
18756
18528
|
for (const relPath of files) {
|
|
18757
|
-
const srcPath =
|
|
18758
|
-
const destPath =
|
|
18529
|
+
const srcPath = import_node_path45.default.resolve(baseDir, relPath);
|
|
18530
|
+
const destPath = import_node_path45.default.resolve(workspacePath, relPath);
|
|
18759
18531
|
try {
|
|
18760
|
-
await (0,
|
|
18761
|
-
await (0,
|
|
18532
|
+
await (0, import_promises30.mkdir)(import_node_path45.default.dirname(destPath), { recursive: true });
|
|
18533
|
+
await (0, import_promises30.copyFile)(srcPath, destPath);
|
|
18762
18534
|
} catch (error) {
|
|
18763
18535
|
const message = error instanceof Error ? error.message : String(error);
|
|
18764
18536
|
return buildErrorResult(
|
|
@@ -19204,8 +18976,7 @@ async function evaluateCandidate(options) {
|
|
|
19204
18976
|
let lmRequest;
|
|
19205
18977
|
if (isAgentProvider(provider)) {
|
|
19206
18978
|
agentRequest = {
|
|
19207
|
-
question: promptInputs.question
|
|
19208
|
-
guideline_paths: evalCase.guideline_paths
|
|
18979
|
+
question: promptInputs.question
|
|
19209
18980
|
};
|
|
19210
18981
|
} else {
|
|
19211
18982
|
if (promptInputs.chatPrompt) {
|
|
@@ -19214,8 +18985,7 @@ async function evaluateCandidate(options) {
|
|
|
19214
18985
|
};
|
|
19215
18986
|
} else {
|
|
19216
18987
|
lmRequest = {
|
|
19217
|
-
question: promptInputs.question
|
|
19218
|
-
guidelines: promptInputs.guidelines
|
|
18988
|
+
question: promptInputs.question
|
|
19219
18989
|
};
|
|
19220
18990
|
}
|
|
19221
18991
|
}
|
|
@@ -19229,7 +18999,7 @@ async function evaluateCandidate(options) {
|
|
|
19229
18999
|
return {
|
|
19230
19000
|
timestamp: completedAt.toISOString(),
|
|
19231
19001
|
testId: evalCase.id,
|
|
19232
|
-
|
|
19002
|
+
eval_set: evalCase.eval_set,
|
|
19233
19003
|
conversationId: evalCase.conversation_id,
|
|
19234
19004
|
score: score.score,
|
|
19235
19005
|
assertions: score.assertions,
|
|
@@ -19377,7 +19147,7 @@ async function runEvaluatorList(options) {
|
|
|
19377
19147
|
fileChanges,
|
|
19378
19148
|
workspacePath
|
|
19379
19149
|
};
|
|
19380
|
-
const evalFileDir = evalCase.
|
|
19150
|
+
const evalFileDir = evalCase.file_paths[0] ? import_node_path45.default.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
19381
19151
|
const dispatchContext = {
|
|
19382
19152
|
graderProvider,
|
|
19383
19153
|
targetResolver,
|
|
@@ -19491,7 +19261,7 @@ function filterEvalCases(evalCases, filter) {
|
|
|
19491
19261
|
if (!filter) {
|
|
19492
19262
|
return evalCases;
|
|
19493
19263
|
}
|
|
19494
|
-
return evalCases.filter((evalCase) =>
|
|
19264
|
+
return evalCases.filter((evalCase) => import_micromatch3.default.isMatch(evalCase.id, filter));
|
|
19495
19265
|
}
|
|
19496
19266
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
19497
19267
|
const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
|
|
@@ -19528,8 +19298,6 @@ async function invokeProvider(provider, options) {
|
|
|
19528
19298
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
19529
19299
|
return await provider.invoke({
|
|
19530
19300
|
question: promptInputs.question,
|
|
19531
|
-
guidelines: promptInputs.guidelines,
|
|
19532
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
19533
19301
|
chatPrompt: promptInputs.chatPrompt,
|
|
19534
19302
|
inputFiles: evalCase.file_paths,
|
|
19535
19303
|
evalCaseId: evalCase.id,
|
|
@@ -19557,21 +19325,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
19557
19325
|
if (isAgentProvider(provider)) {
|
|
19558
19326
|
agentRequest = {
|
|
19559
19327
|
question: promptInputs.question,
|
|
19560
|
-
guideline_paths: evalCase.guideline_paths,
|
|
19561
19328
|
error: message
|
|
19562
19329
|
};
|
|
19563
19330
|
} else {
|
|
19564
19331
|
if (promptInputs.chatPrompt) {
|
|
19565
19332
|
lmRequest = {
|
|
19566
19333
|
chat_prompt: promptInputs.chatPrompt,
|
|
19567
|
-
guideline_paths: evalCase.guideline_paths,
|
|
19568
19334
|
error: message
|
|
19569
19335
|
};
|
|
19570
19336
|
} else {
|
|
19571
19337
|
lmRequest = {
|
|
19572
19338
|
question: promptInputs.question,
|
|
19573
|
-
guidelines: promptInputs.guidelines,
|
|
19574
|
-
guideline_paths: evalCase.guideline_paths,
|
|
19575
19339
|
error: message
|
|
19576
19340
|
};
|
|
19577
19341
|
}
|
|
@@ -19584,7 +19348,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
19584
19348
|
return {
|
|
19585
19349
|
timestamp: timestamp.toISOString(),
|
|
19586
19350
|
testId: evalCase.id,
|
|
19587
|
-
|
|
19351
|
+
eval_set: evalCase.eval_set,
|
|
19588
19352
|
conversationId: evalCase.conversation_id,
|
|
19589
19353
|
score: 0,
|
|
19590
19354
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
@@ -19617,7 +19381,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
|
19617
19381
|
hash.update(target.name);
|
|
19618
19382
|
hash.update(evalCase.id);
|
|
19619
19383
|
hash.update(promptInputs.question);
|
|
19620
|
-
hash.update(promptInputs.guidelines);
|
|
19621
19384
|
hash.update(promptInputs.systemMessage ?? "");
|
|
19622
19385
|
if (promptInputs.chatPrompt) {
|
|
19623
19386
|
hash.update(JSON.stringify(promptInputs.chatPrompt));
|
|
@@ -19717,7 +19480,7 @@ function computeWeightedMean(entries) {
|
|
|
19717
19480
|
|
|
19718
19481
|
// src/evaluation/evaluate.ts
|
|
19719
19482
|
var import_node_fs14 = require("fs");
|
|
19720
|
-
var
|
|
19483
|
+
var import_node_path46 = __toESM(require("path"), 1);
|
|
19721
19484
|
|
|
19722
19485
|
// src/evaluation/providers/function-provider.ts
|
|
19723
19486
|
function createFunctionProvider(taskFn) {
|
|
@@ -19754,7 +19517,7 @@ async function evaluate(config) {
|
|
|
19754
19517
|
}
|
|
19755
19518
|
const gitRoot = await findGitRoot(process.cwd());
|
|
19756
19519
|
const repoRoot = gitRoot ?? process.cwd();
|
|
19757
|
-
const testFilePath = config.specFile ?
|
|
19520
|
+
const testFilePath = config.specFile ? import_node_path46.default.resolve(config.specFile) : import_node_path46.default.join(process.cwd(), "__programmatic__.yaml");
|
|
19758
19521
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
19759
19522
|
let resolvedTarget;
|
|
19760
19523
|
let taskProvider;
|
|
@@ -19820,8 +19583,6 @@ async function evaluate(config) {
|
|
|
19820
19583
|
input_segments: inputSegments,
|
|
19821
19584
|
expected_output: expectedOutput,
|
|
19822
19585
|
reference_answer: expectedOutputValue,
|
|
19823
|
-
guideline_paths: [],
|
|
19824
|
-
guideline_patterns: [],
|
|
19825
19586
|
file_paths: [],
|
|
19826
19587
|
assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
|
|
19827
19588
|
metadata: test.metadata
|
|
@@ -19883,10 +19644,10 @@ function computeSummary(results, durationMs) {
|
|
|
19883
19644
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
19884
19645
|
async function discoverDefaultTarget(repoRoot) {
|
|
19885
19646
|
const cwd = process.cwd();
|
|
19886
|
-
const chain = buildDirectoryChain2(
|
|
19647
|
+
const chain = buildDirectoryChain2(import_node_path46.default.join(cwd, "_placeholder"), repoRoot);
|
|
19887
19648
|
for (const dir of chain) {
|
|
19888
19649
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
19889
|
-
const targetsPath =
|
|
19650
|
+
const targetsPath = import_node_path46.default.join(dir, candidate);
|
|
19890
19651
|
if (!(0, import_node_fs14.existsSync)(targetsPath)) continue;
|
|
19891
19652
|
try {
|
|
19892
19653
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -19903,7 +19664,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
19903
19664
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
19904
19665
|
const envFiles = [];
|
|
19905
19666
|
for (const dir of chain) {
|
|
19906
|
-
const envPath =
|
|
19667
|
+
const envPath = import_node_path46.default.join(dir, ".env");
|
|
19907
19668
|
if ((0, import_node_fs14.existsSync)(envPath)) envFiles.push(envPath);
|
|
19908
19669
|
}
|
|
19909
19670
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -20084,8 +19845,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
20084
19845
|
}
|
|
20085
19846
|
|
|
20086
19847
|
// src/evaluation/cache/response-cache.ts
|
|
20087
|
-
var
|
|
20088
|
-
var
|
|
19848
|
+
var import_promises31 = require("fs/promises");
|
|
19849
|
+
var import_node_path47 = __toESM(require("path"), 1);
|
|
20089
19850
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
20090
19851
|
var ResponseCache = class {
|
|
20091
19852
|
cachePath;
|
|
@@ -20095,7 +19856,7 @@ var ResponseCache = class {
|
|
|
20095
19856
|
async get(key) {
|
|
20096
19857
|
const filePath = this.keyToPath(key);
|
|
20097
19858
|
try {
|
|
20098
|
-
const data = await (0,
|
|
19859
|
+
const data = await (0, import_promises31.readFile)(filePath, "utf8");
|
|
20099
19860
|
return JSON.parse(data);
|
|
20100
19861
|
} catch {
|
|
20101
19862
|
return void 0;
|
|
@@ -20103,13 +19864,13 @@ var ResponseCache = class {
|
|
|
20103
19864
|
}
|
|
20104
19865
|
async set(key, value) {
|
|
20105
19866
|
const filePath = this.keyToPath(key);
|
|
20106
|
-
const dir =
|
|
20107
|
-
await (0,
|
|
20108
|
-
await (0,
|
|
19867
|
+
const dir = import_node_path47.default.dirname(filePath);
|
|
19868
|
+
await (0, import_promises31.mkdir)(dir, { recursive: true });
|
|
19869
|
+
await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
20109
19870
|
}
|
|
20110
19871
|
keyToPath(key) {
|
|
20111
19872
|
const prefix = key.slice(0, 2);
|
|
20112
|
-
return
|
|
19873
|
+
return import_node_path47.default.join(this.cachePath, prefix, `${key}.json`);
|
|
20113
19874
|
}
|
|
20114
19875
|
};
|
|
20115
19876
|
function shouldEnableCache(params) {
|
|
@@ -20297,7 +20058,7 @@ var OtelTraceExporter = class {
|
|
|
20297
20058
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
20298
20059
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
20299
20060
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
20300
|
-
if (result.
|
|
20061
|
+
if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
|
|
20301
20062
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
20302
20063
|
if (captureContent && result.output.length > 0) {
|
|
20303
20064
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -20482,14 +20243,14 @@ var OtelStreamingObserver = class {
|
|
|
20482
20243
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
20483
20244
|
rootCtx = null;
|
|
20484
20245
|
/** Create root eval span immediately (visible in backend right away) */
|
|
20485
|
-
startEvalCase(testId, target,
|
|
20246
|
+
startEvalCase(testId, target, evalSet) {
|
|
20486
20247
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
20487
20248
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
20488
20249
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
20489
20250
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
20490
20251
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
20491
20252
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
20492
|
-
if (
|
|
20253
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
|
|
20493
20254
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
20494
20255
|
}
|
|
20495
20256
|
/** Create and immediately export a tool span */
|
|
@@ -20668,6 +20429,7 @@ function createAgentKernel() {
|
|
|
20668
20429
|
extractTargetsFromSuite,
|
|
20669
20430
|
extractTargetsFromTestCase,
|
|
20670
20431
|
extractTrialsConfig,
|
|
20432
|
+
extractWorkersFromSuite,
|
|
20671
20433
|
fileExists,
|
|
20672
20434
|
findGitRoot,
|
|
20673
20435
|
freeformEvaluationSchema,
|
|
@@ -20682,7 +20444,6 @@ function createAgentKernel() {
|
|
|
20682
20444
|
initializeBaseline,
|
|
20683
20445
|
isAgentSkillsFormat,
|
|
20684
20446
|
isEvaluatorKind,
|
|
20685
|
-
isGuidelineFile,
|
|
20686
20447
|
isJsonObject,
|
|
20687
20448
|
isJsonValue,
|
|
20688
20449
|
isNonEmptyString,
|