@agentv/core 3.7.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-2IZOTQ25.js → chunk-PC5TLJF6.js} +143 -4
- package/dist/chunk-PC5TLJF6.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +228 -72
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +85 -37
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +519 -778
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +11 -23
- package/dist/index.d.ts +11 -23
- package/dist/index.js +450 -841
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-2IZOTQ25.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1315,12 +1315,12 @@ function serializeAttributeValue(value) {
|
|
|
1315
1315
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1316
1316
|
return { stringValue: String(value) };
|
|
1317
1317
|
}
|
|
1318
|
-
var
|
|
1318
|
+
var import_promises32, import_node_path48, OtlpJsonFileExporter;
|
|
1319
1319
|
var init_otlp_json_file_exporter = __esm({
|
|
1320
1320
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1321
1321
|
"use strict";
|
|
1322
|
-
|
|
1323
|
-
|
|
1322
|
+
import_promises32 = require("fs/promises");
|
|
1323
|
+
import_node_path48 = require("path");
|
|
1324
1324
|
OtlpJsonFileExporter = class {
|
|
1325
1325
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1326
1326
|
spans = [];
|
|
@@ -1359,7 +1359,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1359
1359
|
}
|
|
1360
1360
|
async flush() {
|
|
1361
1361
|
if (this.spans.length === 0) return;
|
|
1362
|
-
await (0,
|
|
1362
|
+
await (0, import_promises32.mkdir)((0, import_node_path48.dirname)(this.filePath), { recursive: true });
|
|
1363
1363
|
const otlpJson = {
|
|
1364
1364
|
resourceSpans: [
|
|
1365
1365
|
{
|
|
@@ -1390,13 +1390,13 @@ function hrTimeDiffMs(start, end) {
|
|
|
1390
1390
|
const diffNano = end[1] - start[1];
|
|
1391
1391
|
return Math.round(diffSec * 1e3 + diffNano / 1e6);
|
|
1392
1392
|
}
|
|
1393
|
-
var import_node_fs15,
|
|
1393
|
+
var import_node_fs15, import_promises33, import_node_path49, SimpleTraceFileExporter;
|
|
1394
1394
|
var init_simple_trace_file_exporter = __esm({
|
|
1395
1395
|
"src/observability/simple-trace-file-exporter.ts"() {
|
|
1396
1396
|
"use strict";
|
|
1397
1397
|
import_node_fs15 = require("fs");
|
|
1398
|
-
|
|
1399
|
-
|
|
1398
|
+
import_promises33 = require("fs/promises");
|
|
1399
|
+
import_node_path49 = require("path");
|
|
1400
1400
|
SimpleTraceFileExporter = class {
|
|
1401
1401
|
stream = null;
|
|
1402
1402
|
filePath;
|
|
@@ -1409,7 +1409,7 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1409
1409
|
async ensureStream() {
|
|
1410
1410
|
if (!this.streamReady) {
|
|
1411
1411
|
this.streamReady = (async () => {
|
|
1412
|
-
await (0,
|
|
1412
|
+
await (0, import_promises33.mkdir)((0, import_node_path49.dirname)(this.filePath), { recursive: true });
|
|
1413
1413
|
this.stream = (0, import_node_fs15.createWriteStream)(this.filePath, { flags: "w" });
|
|
1414
1414
|
return this.stream;
|
|
1415
1415
|
})();
|
|
@@ -1589,7 +1589,6 @@ __export(index_exports, {
|
|
|
1589
1589
|
initializeBaseline: () => initializeBaseline,
|
|
1590
1590
|
isAgentSkillsFormat: () => isAgentSkillsFormat,
|
|
1591
1591
|
isEvaluatorKind: () => isEvaluatorKind,
|
|
1592
|
-
isGuidelineFile: () => isGuidelineFile,
|
|
1593
1592
|
isJsonObject: () => isJsonObject,
|
|
1594
1593
|
isJsonValue: () => isJsonValue,
|
|
1595
1594
|
isNonEmptyString: () => isNonEmptyString,
|
|
@@ -1849,9 +1848,9 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
1849
1848
|
}
|
|
1850
1849
|
|
|
1851
1850
|
// src/evaluation/yaml-parser.ts
|
|
1852
|
-
var
|
|
1853
|
-
var
|
|
1854
|
-
var
|
|
1851
|
+
var import_promises8 = require("fs/promises");
|
|
1852
|
+
var import_node_path8 = __toESM(require("path"), 1);
|
|
1853
|
+
var import_micromatch2 = __toESM(require("micromatch"), 1);
|
|
1855
1854
|
var import_yaml4 = require("yaml");
|
|
1856
1855
|
|
|
1857
1856
|
// src/evaluation/interpolation.ts
|
|
@@ -1944,7 +1943,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
1944
1943
|
input_segments: [{ type: "text", value: prompt }],
|
|
1945
1944
|
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
1946
1945
|
reference_answer: evalCase.expected_output,
|
|
1947
|
-
guideline_paths: [],
|
|
1948
1946
|
file_paths: filePaths,
|
|
1949
1947
|
criteria: evalCase.expected_output ?? "",
|
|
1950
1948
|
assertions,
|
|
@@ -2076,7 +2074,6 @@ async function expandFileReferences(tests, evalFileDir) {
|
|
|
2076
2074
|
// src/evaluation/loaders/config-loader.ts
|
|
2077
2075
|
var import_promises4 = require("fs/promises");
|
|
2078
2076
|
var import_node_path4 = __toESM(require("path"), 1);
|
|
2079
|
-
var import_micromatch = __toESM(require("micromatch"), 1);
|
|
2080
2077
|
var import_yaml2 = require("yaml");
|
|
2081
2078
|
|
|
2082
2079
|
// src/evaluation/loaders/file-resolver.ts
|
|
@@ -2207,15 +2204,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
2207
2204
|
logWarning(`Invalid required_version in ${configPath}, expected string`);
|
|
2208
2205
|
continue;
|
|
2209
2206
|
}
|
|
2210
|
-
const guidelinePatterns = config.guideline_patterns;
|
|
2211
|
-
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
2212
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
2213
|
-
continue;
|
|
2214
|
-
}
|
|
2215
|
-
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
2216
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
2217
|
-
continue;
|
|
2218
|
-
}
|
|
2219
2207
|
const evalPatterns = config.eval_patterns;
|
|
2220
2208
|
if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
|
|
2221
2209
|
logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
|
|
@@ -2231,7 +2219,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
2231
2219
|
);
|
|
2232
2220
|
return {
|
|
2233
2221
|
required_version: requiredVersion,
|
|
2234
|
-
guideline_patterns: guidelinePatterns,
|
|
2235
2222
|
eval_patterns: evalPatterns,
|
|
2236
2223
|
execution: executionDefaults
|
|
2237
2224
|
};
|
|
@@ -2243,11 +2230,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
2243
2230
|
}
|
|
2244
2231
|
return null;
|
|
2245
2232
|
}
|
|
2246
|
-
function isGuidelineFile(filePath, patterns) {
|
|
2247
|
-
const normalized = filePath.split("\\").join("/");
|
|
2248
|
-
const patternsToUse = patterns ?? [];
|
|
2249
|
-
return import_micromatch.default.isMatch(normalized, patternsToUse);
|
|
2250
|
-
}
|
|
2251
2233
|
function extractTargetFromSuite(suite) {
|
|
2252
2234
|
const execution = suite.execution;
|
|
2253
2235
|
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
@@ -3823,7 +3805,7 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
3823
3805
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
3824
3806
|
var import_promises7 = require("fs/promises");
|
|
3825
3807
|
var import_node_path7 = __toESM(require("path"), 1);
|
|
3826
|
-
var
|
|
3808
|
+
var import_micromatch = __toESM(require("micromatch"), 1);
|
|
3827
3809
|
var import_yaml3 = require("yaml");
|
|
3828
3810
|
|
|
3829
3811
|
// src/evaluation/loaders/message-processor.ts
|
|
@@ -3850,10 +3832,6 @@ function formatSegment(segment, mode = "lm") {
|
|
|
3850
3832
|
if (type === "text") {
|
|
3851
3833
|
return asString2(segment.value);
|
|
3852
3834
|
}
|
|
3853
|
-
if (type === "guideline_ref") {
|
|
3854
|
-
const refPath = asString2(segment.path);
|
|
3855
|
-
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
3856
|
-
}
|
|
3857
3835
|
if (type === "file") {
|
|
3858
3836
|
const filePath = asString2(segment.path);
|
|
3859
3837
|
if (!filePath) {
|
|
@@ -3876,9 +3854,6 @@ function hasVisibleContent(segments) {
|
|
|
3876
3854
|
const value = asString2(segment.value);
|
|
3877
3855
|
return value !== void 0 && value.trim().length > 0;
|
|
3878
3856
|
}
|
|
3879
|
-
if (type === "guideline_ref") {
|
|
3880
|
-
return false;
|
|
3881
|
-
}
|
|
3882
3857
|
if (type === "file") {
|
|
3883
3858
|
const text = asString2(segment.text);
|
|
3884
3859
|
return text !== void 0 && text.trim().length > 0;
|
|
@@ -3894,17 +3869,7 @@ function asString2(value) {
|
|
|
3894
3869
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
3895
3870
|
var ANSI_RESET6 = "\x1B[0m";
|
|
3896
3871
|
async function processMessages(options) {
|
|
3897
|
-
const {
|
|
3898
|
-
messages,
|
|
3899
|
-
searchRoots,
|
|
3900
|
-
repoRootPath,
|
|
3901
|
-
guidelinePatterns,
|
|
3902
|
-
guidelinePaths,
|
|
3903
|
-
treatFileSegmentsAsGuidelines,
|
|
3904
|
-
textParts,
|
|
3905
|
-
messageType,
|
|
3906
|
-
verbose
|
|
3907
|
-
} = options;
|
|
3872
|
+
const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
|
|
3908
3873
|
const segments = [];
|
|
3909
3874
|
for (const message of messages) {
|
|
3910
3875
|
const content = message.content;
|
|
@@ -3948,21 +3913,6 @@ async function processMessages(options) {
|
|
|
3948
3913
|
}
|
|
3949
3914
|
try {
|
|
3950
3915
|
const fileContent = (await (0, import_promises6.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
3951
|
-
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
3952
|
-
messageType,
|
|
3953
|
-
resolvedPath,
|
|
3954
|
-
repoRootPath,
|
|
3955
|
-
guidelinePatterns,
|
|
3956
|
-
treatFileSegmentsAsGuidelines
|
|
3957
|
-
});
|
|
3958
|
-
if (classifyAsGuideline && guidelinePaths) {
|
|
3959
|
-
guidelinePaths.push(import_node_path6.default.resolve(resolvedPath));
|
|
3960
|
-
if (verbose) {
|
|
3961
|
-
console.log(` [Guideline] Found: ${displayPath}`);
|
|
3962
|
-
console.log(` Resolved to: ${resolvedPath}`);
|
|
3963
|
-
}
|
|
3964
|
-
continue;
|
|
3965
|
-
}
|
|
3966
3916
|
segments.push({
|
|
3967
3917
|
type: "file",
|
|
3968
3918
|
path: displayPath,
|
|
@@ -3990,26 +3940,6 @@ async function processMessages(options) {
|
|
|
3990
3940
|
}
|
|
3991
3941
|
return segments;
|
|
3992
3942
|
}
|
|
3993
|
-
function shouldTreatAsGuideline(options) {
|
|
3994
|
-
const {
|
|
3995
|
-
messageType,
|
|
3996
|
-
resolvedPath,
|
|
3997
|
-
repoRootPath,
|
|
3998
|
-
guidelinePatterns,
|
|
3999
|
-
treatFileSegmentsAsGuidelines
|
|
4000
|
-
} = options;
|
|
4001
|
-
if (messageType !== "input") {
|
|
4002
|
-
return false;
|
|
4003
|
-
}
|
|
4004
|
-
if (treatFileSegmentsAsGuidelines) {
|
|
4005
|
-
return true;
|
|
4006
|
-
}
|
|
4007
|
-
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4008
|
-
return false;
|
|
4009
|
-
}
|
|
4010
|
-
const relativeToRepo = import_node_path6.default.relative(repoRootPath, resolvedPath);
|
|
4011
|
-
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
4012
|
-
}
|
|
4013
3943
|
function asString3(value) {
|
|
4014
3944
|
return typeof value === "string" ? value : void 0;
|
|
4015
3945
|
}
|
|
@@ -4210,7 +4140,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
4210
4140
|
}
|
|
4211
4141
|
return {
|
|
4212
4142
|
description: asString4(parsed.description),
|
|
4213
|
-
|
|
4143
|
+
name: asString4(parsed.name),
|
|
4214
4144
|
execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
|
|
4215
4145
|
evaluator: parsed.evaluator
|
|
4216
4146
|
};
|
|
@@ -4246,20 +4176,18 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4246
4176
|
const absoluteTestPath = import_node_path7.default.resolve(evalFilePath);
|
|
4247
4177
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
4248
4178
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
4249
|
-
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
4250
|
-
const guidelinePatterns = config?.guideline_patterns;
|
|
4251
4179
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
4252
4180
|
const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
|
|
4253
4181
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
4254
|
-
const
|
|
4255
|
-
const
|
|
4182
|
+
const fallbackEvalSet = import_node_path7.default.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
4183
|
+
const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
|
|
4256
4184
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
4257
4185
|
const globalExecution = sidecar.execution;
|
|
4258
4186
|
if (verbose) {
|
|
4259
4187
|
console.log(`
|
|
4260
4188
|
[JSONL Dataset: ${evalFilePath}]`);
|
|
4261
4189
|
console.log(` Cases: ${rawCases.length}`);
|
|
4262
|
-
console.log(`
|
|
4190
|
+
console.log(` Eval set: ${evalSetName}`);
|
|
4263
4191
|
if (sidecar.description) {
|
|
4264
4192
|
console.log(` Description: ${sidecar.description}`);
|
|
4265
4193
|
}
|
|
@@ -4269,7 +4197,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4269
4197
|
const evalcase = rawCases[lineIndex];
|
|
4270
4198
|
const lineNumber = lineIndex + 1;
|
|
4271
4199
|
const id = asString4(evalcase.id);
|
|
4272
|
-
if (filterPattern && (!id || !
|
|
4200
|
+
if (filterPattern && (!id || !import_micromatch.default.isMatch(id, filterPattern))) {
|
|
4273
4201
|
continue;
|
|
4274
4202
|
}
|
|
4275
4203
|
const conversationId = asString4(evalcase.conversation_id);
|
|
@@ -4292,14 +4220,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4292
4220
|
continue;
|
|
4293
4221
|
}
|
|
4294
4222
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
4295
|
-
const guidelinePaths = [];
|
|
4296
4223
|
const inputTextParts = [];
|
|
4297
4224
|
const inputSegments = await processMessages({
|
|
4298
4225
|
messages: inputMessages,
|
|
4299
4226
|
searchRoots,
|
|
4300
4227
|
repoRootPath,
|
|
4301
|
-
guidelinePatterns,
|
|
4302
|
-
guidelinePaths,
|
|
4303
4228
|
textParts: inputTextParts,
|
|
4304
4229
|
messageType: "input",
|
|
4305
4230
|
verbose
|
|
@@ -4349,40 +4274,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
4349
4274
|
userFilePaths.push(segment.resolvedPath);
|
|
4350
4275
|
}
|
|
4351
4276
|
}
|
|
4352
|
-
const allFilePaths = [
|
|
4353
|
-
...guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
|
|
4354
|
-
...userFilePaths
|
|
4355
|
-
];
|
|
4356
4277
|
const testCase = {
|
|
4357
4278
|
id,
|
|
4358
|
-
|
|
4279
|
+
eval_set: evalSetName,
|
|
4359
4280
|
conversation_id: conversationId,
|
|
4360
4281
|
question,
|
|
4361
4282
|
input: inputMessages,
|
|
4362
4283
|
input_segments: inputSegments,
|
|
4363
4284
|
expected_output: outputSegments,
|
|
4364
4285
|
reference_answer: referenceAnswer,
|
|
4365
|
-
|
|
4366
|
-
guideline_patterns: guidelinePatterns,
|
|
4367
|
-
file_paths: allFilePaths,
|
|
4286
|
+
file_paths: userFilePaths,
|
|
4368
4287
|
criteria: outcome ?? "",
|
|
4369
4288
|
evaluator: evalCaseEvaluatorKind,
|
|
4370
4289
|
assertions: evaluators
|
|
4371
4290
|
};
|
|
4372
|
-
if (verbose) {
|
|
4373
|
-
console.log(`
|
|
4374
|
-
[Test: ${id}]`);
|
|
4375
|
-
if (testCase.guideline_paths.length > 0) {
|
|
4376
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
4377
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
4378
|
-
console.log(` - ${guidelinePath}`);
|
|
4379
|
-
}
|
|
4380
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4381
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
4382
|
-
} else {
|
|
4383
|
-
console.log(" No guidelines found");
|
|
4384
|
-
}
|
|
4385
|
-
}
|
|
4386
4291
|
results.push(testCase);
|
|
4387
4292
|
}
|
|
4388
4293
|
return results;
|
|
@@ -4440,30 +4345,7 @@ function parseMetadata(suite) {
|
|
|
4440
4345
|
}
|
|
4441
4346
|
|
|
4442
4347
|
// src/evaluation/formatting/prompt-builder.ts
|
|
4443
|
-
var import_promises8 = require("fs/promises");
|
|
4444
|
-
var import_node_path8 = __toESM(require("path"), 1);
|
|
4445
|
-
var ANSI_YELLOW7 = "\x1B[33m";
|
|
4446
|
-
var ANSI_RESET8 = "\x1B[0m";
|
|
4447
4348
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
4448
|
-
const guidelineParts = [];
|
|
4449
|
-
for (const rawPath of testCase.guideline_paths) {
|
|
4450
|
-
const absolutePath = import_node_path8.default.resolve(rawPath);
|
|
4451
|
-
if (!await fileExists(absolutePath)) {
|
|
4452
|
-
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
4453
|
-
continue;
|
|
4454
|
-
}
|
|
4455
|
-
try {
|
|
4456
|
-
const content = (await (0, import_promises8.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
4457
|
-
guidelineParts.push({
|
|
4458
|
-
content,
|
|
4459
|
-
isFile: true,
|
|
4460
|
-
displayPath: import_node_path8.default.basename(absolutePath)
|
|
4461
|
-
});
|
|
4462
|
-
} catch (error) {
|
|
4463
|
-
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
4464
|
-
}
|
|
4465
|
-
}
|
|
4466
|
-
const guidelines = formatFileContents(guidelineParts);
|
|
4467
4349
|
const segmentsByMessage = [];
|
|
4468
4350
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
4469
4351
|
for (const segment of testCase.input_segments) {
|
|
@@ -4488,10 +4370,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
4488
4370
|
if (type === "file") {
|
|
4489
4371
|
const value = asString5(segment.value);
|
|
4490
4372
|
if (!value) continue;
|
|
4491
|
-
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
4492
|
-
messageSegments.push({ type: "guideline_ref", path: value });
|
|
4493
|
-
continue;
|
|
4494
|
-
}
|
|
4495
4373
|
const fileText = fileContentsByPath.get(value);
|
|
4496
4374
|
if (fileText !== void 0) {
|
|
4497
4375
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
@@ -4540,10 +4418,6 @@ ${messageContent}`);
|
|
|
4540
4418
|
} else {
|
|
4541
4419
|
const questionParts = [];
|
|
4542
4420
|
for (const segment of testCase.input_segments) {
|
|
4543
|
-
if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
|
|
4544
|
-
questionParts.push(`<Attached: ${segment.path}>`);
|
|
4545
|
-
continue;
|
|
4546
|
-
}
|
|
4547
4421
|
const formattedContent = formatSegment(segment, mode);
|
|
4548
4422
|
if (formattedContent) {
|
|
4549
4423
|
questionParts.push(formattedContent);
|
|
@@ -4554,11 +4428,9 @@ ${messageContent}`);
|
|
|
4554
4428
|
const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
|
|
4555
4429
|
messages: testCase.input,
|
|
4556
4430
|
segmentsByMessage,
|
|
4557
|
-
guidelinePatterns: testCase.guideline_patterns,
|
|
4558
|
-
guidelineContent: guidelines,
|
|
4559
4431
|
mode
|
|
4560
4432
|
}) : void 0;
|
|
4561
|
-
return { question,
|
|
4433
|
+
return { question, chatPrompt };
|
|
4562
4434
|
}
|
|
4563
4435
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
4564
4436
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -4573,14 +4445,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
4573
4445
|
return messagesWithContent > 1;
|
|
4574
4446
|
}
|
|
4575
4447
|
function buildChatPromptFromSegments(options) {
|
|
4576
|
-
const {
|
|
4577
|
-
messages,
|
|
4578
|
-
segmentsByMessage,
|
|
4579
|
-
guidelinePatterns,
|
|
4580
|
-
guidelineContent,
|
|
4581
|
-
systemPrompt,
|
|
4582
|
-
mode = "lm"
|
|
4583
|
-
} = options;
|
|
4448
|
+
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
4584
4449
|
if (messages.length === 0) {
|
|
4585
4450
|
return void 0;
|
|
4586
4451
|
}
|
|
@@ -4588,11 +4453,6 @@ function buildChatPromptFromSegments(options) {
|
|
|
4588
4453
|
if (systemPrompt && systemPrompt.trim().length > 0) {
|
|
4589
4454
|
systemSegments.push(systemPrompt.trim());
|
|
4590
4455
|
}
|
|
4591
|
-
if (guidelineContent && guidelineContent.trim().length > 0) {
|
|
4592
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
4593
|
-
|
|
4594
|
-
${guidelineContent.trim()}`);
|
|
4595
|
-
}
|
|
4596
4456
|
let startIndex = 0;
|
|
4597
4457
|
while (startIndex < messages.length && messages[startIndex].role === "system") {
|
|
4598
4458
|
const segments = segmentsByMessage[startIndex];
|
|
@@ -4628,15 +4488,8 @@ ${guidelineContent.trim()}`);
|
|
|
4628
4488
|
contentParts.push("@[Tool]:");
|
|
4629
4489
|
}
|
|
4630
4490
|
for (const segment of segments) {
|
|
4631
|
-
if (segment.type === "guideline_ref") {
|
|
4632
|
-
continue;
|
|
4633
|
-
}
|
|
4634
4491
|
const formatted = formatSegment(segment, mode);
|
|
4635
4492
|
if (formatted) {
|
|
4636
|
-
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
4637
|
-
if (isGuidelineRef) {
|
|
4638
|
-
continue;
|
|
4639
|
-
}
|
|
4640
4493
|
contentParts.push(formatted);
|
|
4641
4494
|
}
|
|
4642
4495
|
}
|
|
@@ -4654,30 +4507,27 @@ ${guidelineContent.trim()}`);
|
|
|
4654
4507
|
function asString5(value) {
|
|
4655
4508
|
return typeof value === "string" ? value : void 0;
|
|
4656
4509
|
}
|
|
4657
|
-
function logWarning5(message) {
|
|
4658
|
-
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
4659
|
-
}
|
|
4660
4510
|
|
|
4661
4511
|
// src/evaluation/yaml-parser.ts
|
|
4662
|
-
var
|
|
4512
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
4663
4513
|
var ANSI_RED3 = "\x1B[31m";
|
|
4664
|
-
var
|
|
4514
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
4665
4515
|
function resolveTests(suite) {
|
|
4666
4516
|
if (suite.tests !== void 0) return suite.tests;
|
|
4667
4517
|
if (suite.eval_cases !== void 0) {
|
|
4668
|
-
|
|
4518
|
+
logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
|
|
4669
4519
|
return suite.eval_cases;
|
|
4670
4520
|
}
|
|
4671
4521
|
if (suite.evalcases !== void 0) {
|
|
4672
|
-
|
|
4522
|
+
logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
|
|
4673
4523
|
return suite.evalcases;
|
|
4674
4524
|
}
|
|
4675
4525
|
return void 0;
|
|
4676
4526
|
}
|
|
4677
4527
|
async function readTestSuiteMetadata(testFilePath) {
|
|
4678
4528
|
try {
|
|
4679
|
-
const absolutePath =
|
|
4680
|
-
const content = await (0,
|
|
4529
|
+
const absolutePath = import_node_path8.default.resolve(testFilePath);
|
|
4530
|
+
const content = await (0, import_promises8.readFile)(absolutePath, "utf8");
|
|
4681
4531
|
const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
|
|
4682
4532
|
if (!isJsonObject(parsed)) {
|
|
4683
4533
|
return {};
|
|
@@ -4728,26 +4578,25 @@ var loadEvalCases = loadTests;
|
|
|
4728
4578
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
4729
4579
|
const verbose = options?.verbose ?? false;
|
|
4730
4580
|
const filterPattern = options?.filter;
|
|
4731
|
-
const absoluteTestPath =
|
|
4581
|
+
const absoluteTestPath = import_node_path8.default.resolve(evalFilePath);
|
|
4732
4582
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
4733
4583
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
4734
4584
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
4735
|
-
const
|
|
4736
|
-
const rawFile = await (0, import_promises9.readFile)(absoluteTestPath, "utf8");
|
|
4585
|
+
const rawFile = await (0, import_promises8.readFile)(absoluteTestPath, "utf8");
|
|
4737
4586
|
const interpolated = interpolateEnv((0, import_yaml4.parse)(rawFile), process.env);
|
|
4738
4587
|
if (!isJsonObject(interpolated)) {
|
|
4739
4588
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
4740
4589
|
}
|
|
4741
4590
|
const suite = interpolated;
|
|
4742
|
-
const
|
|
4743
|
-
const
|
|
4744
|
-
const
|
|
4591
|
+
const evalSetNameFromSuite = asString6(suite.name)?.trim();
|
|
4592
|
+
const fallbackEvalSet = import_node_path8.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
4593
|
+
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
4745
4594
|
const rawTestcases = resolveTests(suite);
|
|
4746
4595
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
4747
|
-
const evalFileDir =
|
|
4596
|
+
const evalFileDir = import_node_path8.default.dirname(absoluteTestPath);
|
|
4748
4597
|
let expandedTestcases;
|
|
4749
4598
|
if (typeof rawTestcases === "string") {
|
|
4750
|
-
const externalPath =
|
|
4599
|
+
const externalPath = import_node_path8.default.resolve(evalFileDir, rawTestcases);
|
|
4751
4600
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
4752
4601
|
} else if (Array.isArray(rawTestcases)) {
|
|
4753
4602
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -4761,18 +4610,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4761
4610
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
4762
4611
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
4763
4612
|
if (suite.assert !== void 0 && suite.assertions === void 0) {
|
|
4764
|
-
|
|
4613
|
+
logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
|
|
4765
4614
|
}
|
|
4766
4615
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
4767
4616
|
const results = [];
|
|
4768
4617
|
for (const rawEvalcase of expandedTestcases) {
|
|
4769
4618
|
if (!isJsonObject(rawEvalcase)) {
|
|
4770
|
-
|
|
4619
|
+
logWarning5("Skipping invalid test entry (expected object)");
|
|
4771
4620
|
continue;
|
|
4772
4621
|
}
|
|
4773
4622
|
const evalcase = rawEvalcase;
|
|
4774
4623
|
const id = asString6(evalcase.id);
|
|
4775
|
-
if (filterPattern && (!id || !
|
|
4624
|
+
if (filterPattern && (!id || !import_micromatch2.default.isMatch(id, filterPattern))) {
|
|
4776
4625
|
continue;
|
|
4777
4626
|
}
|
|
4778
4627
|
const conversationId = asString6(evalcase.conversation_id);
|
|
@@ -4780,7 +4629,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4780
4629
|
if (!outcome && evalcase.expected_outcome !== void 0) {
|
|
4781
4630
|
outcome = asString6(evalcase.expected_outcome);
|
|
4782
4631
|
if (outcome) {
|
|
4783
|
-
|
|
4632
|
+
logWarning5(
|
|
4784
4633
|
`Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
4785
4634
|
);
|
|
4786
4635
|
}
|
|
@@ -4800,15 +4649,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4800
4649
|
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
4801
4650
|
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
4802
4651
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
4803
|
-
const guidelinePaths = [];
|
|
4804
4652
|
const inputTextParts = [];
|
|
4805
4653
|
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
4806
4654
|
messages: effectiveSuiteInputMessages,
|
|
4807
4655
|
searchRoots,
|
|
4808
4656
|
repoRootPath,
|
|
4809
|
-
guidelinePatterns,
|
|
4810
|
-
guidelinePaths,
|
|
4811
|
-
treatFileSegmentsAsGuidelines: true,
|
|
4812
4657
|
textParts: inputTextParts,
|
|
4813
4658
|
messageType: "input",
|
|
4814
4659
|
verbose
|
|
@@ -4817,8 +4662,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4817
4662
|
messages: testInputMessages,
|
|
4818
4663
|
searchRoots,
|
|
4819
4664
|
repoRootPath,
|
|
4820
|
-
guidelinePatterns,
|
|
4821
|
-
guidelinePaths,
|
|
4822
4665
|
textParts: inputTextParts,
|
|
4823
4666
|
messageType: "input",
|
|
4824
4667
|
verbose
|
|
@@ -4867,26 +4710,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4867
4710
|
userFilePaths.push(segment.resolvedPath);
|
|
4868
4711
|
}
|
|
4869
4712
|
}
|
|
4870
|
-
const allFilePaths = [
|
|
4871
|
-
...guidelinePaths.map((guidelinePath) => import_node_path9.default.resolve(guidelinePath)),
|
|
4872
|
-
...userFilePaths
|
|
4873
|
-
];
|
|
4874
4713
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
4875
4714
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
4876
4715
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
4877
4716
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
4878
4717
|
const testCase = {
|
|
4879
4718
|
id,
|
|
4880
|
-
|
|
4719
|
+
eval_set: evalSetName,
|
|
4881
4720
|
conversation_id: conversationId,
|
|
4882
4721
|
question,
|
|
4883
4722
|
input: inputMessages,
|
|
4884
4723
|
input_segments: inputSegments,
|
|
4885
4724
|
expected_output: outputSegments,
|
|
4886
4725
|
reference_answer: referenceAnswer,
|
|
4887
|
-
|
|
4888
|
-
guideline_patterns: guidelinePatterns,
|
|
4889
|
-
file_paths: allFilePaths,
|
|
4726
|
+
file_paths: userFilePaths,
|
|
4890
4727
|
criteria: outcome ?? "",
|
|
4891
4728
|
evaluator: evalCaseEvaluatorKind,
|
|
4892
4729
|
assertions: evaluators,
|
|
@@ -4894,20 +4731,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4894
4731
|
metadata,
|
|
4895
4732
|
targets: caseTargets
|
|
4896
4733
|
};
|
|
4897
|
-
if (verbose) {
|
|
4898
|
-
console.log(`
|
|
4899
|
-
[Test: ${id}]`);
|
|
4900
|
-
if (testCase.guideline_paths.length > 0) {
|
|
4901
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
4902
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
4903
|
-
console.log(` - ${guidelinePath}`);
|
|
4904
|
-
}
|
|
4905
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
4906
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
4907
|
-
} else {
|
|
4908
|
-
console.log(" No guidelines found");
|
|
4909
|
-
}
|
|
4910
|
-
}
|
|
4911
4734
|
results.push(testCase);
|
|
4912
4735
|
}
|
|
4913
4736
|
return { tests: results, parsed: suite };
|
|
@@ -4926,7 +4749,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
4926
4749
|
if (!isJsonObject(raw)) return void 0;
|
|
4927
4750
|
const obj = raw;
|
|
4928
4751
|
if (obj.script !== void 0 && obj.command === void 0) {
|
|
4929
|
-
|
|
4752
|
+
logWarning5("'script' is deprecated. Use 'command' instead.");
|
|
4930
4753
|
}
|
|
4931
4754
|
const commandSource = obj.command ?? obj.script;
|
|
4932
4755
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
@@ -4934,8 +4757,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
4934
4757
|
if (commandArr.length === 0) return void 0;
|
|
4935
4758
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
4936
4759
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
4937
|
-
if (cwd && !
|
|
4938
|
-
cwd =
|
|
4760
|
+
if (cwd && !import_node_path8.default.isAbsolute(cwd)) {
|
|
4761
|
+
cwd = import_node_path8.default.resolve(evalFileDir, cwd);
|
|
4939
4762
|
}
|
|
4940
4763
|
const config = { command: commandArr };
|
|
4941
4764
|
if (timeoutMs !== void 0) {
|
|
@@ -5025,10 +4848,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
5025
4848
|
}
|
|
5026
4849
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
5027
4850
|
if (typeof raw === "string") {
|
|
5028
|
-
const workspaceFilePath =
|
|
4851
|
+
const workspaceFilePath = import_node_path8.default.resolve(evalFileDir, raw);
|
|
5029
4852
|
let content;
|
|
5030
4853
|
try {
|
|
5031
|
-
content = await (0,
|
|
4854
|
+
content = await (0, import_promises8.readFile)(workspaceFilePath, "utf8");
|
|
5032
4855
|
} catch {
|
|
5033
4856
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
5034
4857
|
}
|
|
@@ -5038,7 +4861,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
5038
4861
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
5039
4862
|
);
|
|
5040
4863
|
}
|
|
5041
|
-
const workspaceFileDir =
|
|
4864
|
+
const workspaceFileDir = import_node_path8.default.dirname(workspaceFilePath);
|
|
5042
4865
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
5043
4866
|
}
|
|
5044
4867
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -5058,8 +4881,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
5058
4881
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
5059
4882
|
}
|
|
5060
4883
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
5061
|
-
if (template && !
|
|
5062
|
-
template =
|
|
4884
|
+
if (template && !import_node_path8.default.isAbsolute(template)) {
|
|
4885
|
+
template = import_node_path8.default.resolve(evalFileDir, template);
|
|
5063
4886
|
}
|
|
5064
4887
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
5065
4888
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -5109,28 +4932,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
5109
4932
|
function asString6(value) {
|
|
5110
4933
|
return typeof value === "string" ? value : void 0;
|
|
5111
4934
|
}
|
|
5112
|
-
function
|
|
4935
|
+
function logWarning5(message, details) {
|
|
5113
4936
|
if (details && details.length > 0) {
|
|
5114
4937
|
const detailBlock = details.join("\n");
|
|
5115
|
-
console.warn(`${
|
|
5116
|
-
${detailBlock}${
|
|
4938
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}
|
|
4939
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
5117
4940
|
} else {
|
|
5118
|
-
console.warn(`${
|
|
4941
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
5119
4942
|
}
|
|
5120
4943
|
}
|
|
5121
4944
|
function logError3(message, details) {
|
|
5122
4945
|
if (details && details.length > 0) {
|
|
5123
4946
|
const detailBlock = details.join("\n");
|
|
5124
4947
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
5125
|
-
${detailBlock}${
|
|
4948
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
5126
4949
|
} else {
|
|
5127
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
4950
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET8}`);
|
|
5128
4951
|
}
|
|
5129
4952
|
}
|
|
5130
4953
|
|
|
5131
4954
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
5132
4955
|
var import_node_fs2 = require("fs");
|
|
5133
|
-
var
|
|
4956
|
+
var import_node_path9 = __toESM(require("path"), 1);
|
|
5134
4957
|
var import_yaml5 = require("yaml");
|
|
5135
4958
|
function codeGraderInstruction(graderName, description) {
|
|
5136
4959
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -5375,7 +5198,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
5375
5198
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
5376
5199
|
const content = (0, import_node_fs2.readFileSync)(evalYamlPath, "utf8");
|
|
5377
5200
|
const parsed = (0, import_yaml5.parse)(content);
|
|
5378
|
-
return transpileEvalYaml(parsed,
|
|
5201
|
+
return transpileEvalYaml(parsed, import_node_path9.default.basename(evalYamlPath));
|
|
5379
5202
|
}
|
|
5380
5203
|
function getOutputFilenames(result) {
|
|
5381
5204
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -5394,11 +5217,11 @@ function getOutputFilenames(result) {
|
|
|
5394
5217
|
|
|
5395
5218
|
// src/evaluation/file-utils.ts
|
|
5396
5219
|
var import_node_fs3 = require("fs");
|
|
5397
|
-
var
|
|
5398
|
-
var
|
|
5220
|
+
var import_promises9 = require("fs/promises");
|
|
5221
|
+
var import_node_path10 = __toESM(require("path"), 1);
|
|
5399
5222
|
async function fileExists2(filePath) {
|
|
5400
5223
|
try {
|
|
5401
|
-
await (0,
|
|
5224
|
+
await (0, import_promises9.access)(filePath, import_node_fs3.constants.F_OK);
|
|
5402
5225
|
return true;
|
|
5403
5226
|
} catch {
|
|
5404
5227
|
return false;
|
|
@@ -5408,22 +5231,22 @@ function normalizeLineEndings(content) {
|
|
|
5408
5231
|
return content.replace(/\r\n/g, "\n");
|
|
5409
5232
|
}
|
|
5410
5233
|
async function readTextFile(filePath) {
|
|
5411
|
-
const content = await (0,
|
|
5234
|
+
const content = await (0, import_promises9.readFile)(filePath, "utf8");
|
|
5412
5235
|
return normalizeLineEndings(content);
|
|
5413
5236
|
}
|
|
5414
5237
|
async function readJsonFile(filePath) {
|
|
5415
|
-
const content = await (0,
|
|
5238
|
+
const content = await (0, import_promises9.readFile)(filePath, "utf8");
|
|
5416
5239
|
return JSON.parse(content);
|
|
5417
5240
|
}
|
|
5418
5241
|
async function findGitRoot(startPath) {
|
|
5419
|
-
let currentDir =
|
|
5420
|
-
const root =
|
|
5242
|
+
let currentDir = import_node_path10.default.dirname(import_node_path10.default.resolve(startPath));
|
|
5243
|
+
const root = import_node_path10.default.parse(currentDir).root;
|
|
5421
5244
|
while (currentDir !== root) {
|
|
5422
|
-
const gitPath =
|
|
5245
|
+
const gitPath = import_node_path10.default.join(currentDir, ".git");
|
|
5423
5246
|
if (await fileExists2(gitPath)) {
|
|
5424
5247
|
return currentDir;
|
|
5425
5248
|
}
|
|
5426
|
-
const parentDir =
|
|
5249
|
+
const parentDir = import_node_path10.default.dirname(currentDir);
|
|
5427
5250
|
if (parentDir === currentDir) {
|
|
5428
5251
|
break;
|
|
5429
5252
|
}
|
|
@@ -5434,8 +5257,8 @@ async function findGitRoot(startPath) {
|
|
|
5434
5257
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
5435
5258
|
const directories = [];
|
|
5436
5259
|
const seen = /* @__PURE__ */ new Set();
|
|
5437
|
-
const boundary =
|
|
5438
|
-
let current =
|
|
5260
|
+
const boundary = import_node_path10.default.resolve(repoRoot);
|
|
5261
|
+
let current = import_node_path10.default.resolve(import_node_path10.default.dirname(filePath));
|
|
5439
5262
|
while (current !== void 0) {
|
|
5440
5263
|
if (!seen.has(current)) {
|
|
5441
5264
|
directories.push(current);
|
|
@@ -5444,7 +5267,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
5444
5267
|
if (current === boundary) {
|
|
5445
5268
|
break;
|
|
5446
5269
|
}
|
|
5447
|
-
const parent =
|
|
5270
|
+
const parent = import_node_path10.default.dirname(current);
|
|
5448
5271
|
if (parent === current) {
|
|
5449
5272
|
break;
|
|
5450
5273
|
}
|
|
@@ -5458,16 +5281,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
5458
5281
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
5459
5282
|
const uniqueRoots = [];
|
|
5460
5283
|
const addRoot = (root) => {
|
|
5461
|
-
const normalized =
|
|
5284
|
+
const normalized = import_node_path10.default.resolve(root);
|
|
5462
5285
|
if (!uniqueRoots.includes(normalized)) {
|
|
5463
5286
|
uniqueRoots.push(normalized);
|
|
5464
5287
|
}
|
|
5465
5288
|
};
|
|
5466
|
-
let currentDir =
|
|
5289
|
+
let currentDir = import_node_path10.default.dirname(evalPath);
|
|
5467
5290
|
let reachedBoundary = false;
|
|
5468
5291
|
while (!reachedBoundary) {
|
|
5469
5292
|
addRoot(currentDir);
|
|
5470
|
-
const parentDir =
|
|
5293
|
+
const parentDir = import_node_path10.default.dirname(currentDir);
|
|
5471
5294
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
5472
5295
|
reachedBoundary = true;
|
|
5473
5296
|
} else {
|
|
@@ -5485,16 +5308,16 @@ function trimLeadingSeparators2(value) {
|
|
|
5485
5308
|
async function resolveFileReference3(rawValue, searchRoots) {
|
|
5486
5309
|
const displayPath = trimLeadingSeparators2(rawValue);
|
|
5487
5310
|
const potentialPaths = [];
|
|
5488
|
-
if (
|
|
5489
|
-
potentialPaths.push(
|
|
5311
|
+
if (import_node_path10.default.isAbsolute(rawValue)) {
|
|
5312
|
+
potentialPaths.push(import_node_path10.default.normalize(rawValue));
|
|
5490
5313
|
}
|
|
5491
5314
|
for (const base of searchRoots) {
|
|
5492
|
-
potentialPaths.push(
|
|
5315
|
+
potentialPaths.push(import_node_path10.default.resolve(base, displayPath));
|
|
5493
5316
|
}
|
|
5494
5317
|
const attempted = [];
|
|
5495
5318
|
const seen = /* @__PURE__ */ new Set();
|
|
5496
5319
|
for (const candidate of potentialPaths) {
|
|
5497
|
-
const absoluteCandidate =
|
|
5320
|
+
const absoluteCandidate = import_node_path10.default.resolve(candidate);
|
|
5498
5321
|
if (seen.has(absoluteCandidate)) {
|
|
5499
5322
|
continue;
|
|
5500
5323
|
}
|
|
@@ -5728,10 +5551,10 @@ function buildChatPrompt(request) {
|
|
|
5728
5551
|
if (hasSystemMessage) {
|
|
5729
5552
|
return provided;
|
|
5730
5553
|
}
|
|
5731
|
-
const systemContent2 = resolveSystemContent(request
|
|
5554
|
+
const systemContent2 = resolveSystemContent(request);
|
|
5732
5555
|
return [{ role: "system", content: systemContent2 }, ...provided];
|
|
5733
5556
|
}
|
|
5734
|
-
const systemContent = resolveSystemContent(request
|
|
5557
|
+
const systemContent = resolveSystemContent(request);
|
|
5735
5558
|
const userContent = request.question.trim();
|
|
5736
5559
|
const prompt = [
|
|
5737
5560
|
{ role: "system", content: systemContent },
|
|
@@ -5739,18 +5562,13 @@ function buildChatPrompt(request) {
|
|
|
5739
5562
|
];
|
|
5740
5563
|
return prompt;
|
|
5741
5564
|
}
|
|
5742
|
-
function resolveSystemContent(request
|
|
5565
|
+
function resolveSystemContent(request) {
|
|
5743
5566
|
const systemSegments = [];
|
|
5744
5567
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
5745
5568
|
systemSegments.push(request.systemPrompt.trim());
|
|
5746
5569
|
} else {
|
|
5747
5570
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
5748
5571
|
}
|
|
5749
|
-
if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
|
|
5750
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
5751
|
-
|
|
5752
|
-
${request.guidelines.trim()}`);
|
|
5753
|
-
}
|
|
5754
5572
|
return systemSegments.join("\n\n");
|
|
5755
5573
|
}
|
|
5756
5574
|
function toModelMessages(chatPrompt) {
|
|
@@ -5933,8 +5751,8 @@ async function withRetry(fn, retryConfig, signal) {
|
|
|
5933
5751
|
var import_node_child_process = require("child_process");
|
|
5934
5752
|
var import_node_crypto = require("crypto");
|
|
5935
5753
|
var import_node_fs4 = require("fs");
|
|
5936
|
-
var
|
|
5937
|
-
var
|
|
5754
|
+
var import_promises10 = require("fs/promises");
|
|
5755
|
+
var import_node_path12 = __toESM(require("path"), 1);
|
|
5938
5756
|
|
|
5939
5757
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
5940
5758
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
@@ -5990,17 +5808,11 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
5990
5808
|
}
|
|
5991
5809
|
|
|
5992
5810
|
// src/evaluation/providers/preread.ts
|
|
5993
|
-
var
|
|
5994
|
-
function buildPromptDocument(request, inputFiles
|
|
5811
|
+
var import_node_path11 = __toESM(require("path"), 1);
|
|
5812
|
+
function buildPromptDocument(request, inputFiles) {
|
|
5995
5813
|
const parts = [];
|
|
5996
|
-
const guidelineFiles = collectGuidelineFiles(
|
|
5997
|
-
inputFiles,
|
|
5998
|
-
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
5999
|
-
options?.guidelineOverrides
|
|
6000
|
-
);
|
|
6001
5814
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
6002
|
-
const
|
|
6003
|
-
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
5815
|
+
const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
|
|
6004
5816
|
if (prereadBlock.length > 0) {
|
|
6005
5817
|
parts.push("\n", prereadBlock);
|
|
6006
5818
|
}
|
|
@@ -6013,62 +5825,36 @@ function normalizeInputFiles(inputFiles) {
|
|
|
6013
5825
|
}
|
|
6014
5826
|
const deduped = /* @__PURE__ */ new Map();
|
|
6015
5827
|
for (const inputFile of inputFiles) {
|
|
6016
|
-
const absolutePath =
|
|
5828
|
+
const absolutePath = import_node_path11.default.resolve(inputFile);
|
|
6017
5829
|
if (!deduped.has(absolutePath)) {
|
|
6018
5830
|
deduped.set(absolutePath, absolutePath);
|
|
6019
5831
|
}
|
|
6020
5832
|
}
|
|
6021
5833
|
return Array.from(deduped.values());
|
|
6022
5834
|
}
|
|
6023
|
-
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
6024
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
6025
|
-
return [];
|
|
6026
|
-
}
|
|
6027
|
-
const unique = /* @__PURE__ */ new Map();
|
|
6028
|
-
for (const inputFile of inputFiles) {
|
|
6029
|
-
const absolutePath = import_node_path12.default.resolve(inputFile);
|
|
6030
|
-
if (overrides?.has(absolutePath)) {
|
|
6031
|
-
if (!unique.has(absolutePath)) {
|
|
6032
|
-
unique.set(absolutePath, absolutePath);
|
|
6033
|
-
}
|
|
6034
|
-
continue;
|
|
6035
|
-
}
|
|
6036
|
-
const normalized = absolutePath.split(import_node_path12.default.sep).join("/");
|
|
6037
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
6038
|
-
if (!unique.has(absolutePath)) {
|
|
6039
|
-
unique.set(absolutePath, absolutePath);
|
|
6040
|
-
}
|
|
6041
|
-
}
|
|
6042
|
-
}
|
|
6043
|
-
return Array.from(unique.values());
|
|
6044
|
-
}
|
|
6045
5835
|
function collectInputFiles(inputFiles) {
|
|
6046
5836
|
if (!inputFiles || inputFiles.length === 0) {
|
|
6047
5837
|
return [];
|
|
6048
5838
|
}
|
|
6049
5839
|
const unique = /* @__PURE__ */ new Map();
|
|
6050
5840
|
for (const inputFile of inputFiles) {
|
|
6051
|
-
const absolutePath =
|
|
5841
|
+
const absolutePath = import_node_path11.default.resolve(inputFile);
|
|
6052
5842
|
if (!unique.has(absolutePath)) {
|
|
6053
5843
|
unique.set(absolutePath, absolutePath);
|
|
6054
5844
|
}
|
|
6055
5845
|
}
|
|
6056
5846
|
return Array.from(unique.values());
|
|
6057
5847
|
}
|
|
6058
|
-
function buildMandatoryPrereadBlock(
|
|
6059
|
-
if (
|
|
5848
|
+
function buildMandatoryPrereadBlock(inputFiles) {
|
|
5849
|
+
if (inputFiles.length === 0) {
|
|
6060
5850
|
return "";
|
|
6061
5851
|
}
|
|
6062
5852
|
const buildList = (files) => files.map((absolutePath) => {
|
|
6063
|
-
const fileName =
|
|
5853
|
+
const fileName = import_node_path11.default.basename(absolutePath);
|
|
6064
5854
|
const fileUri = pathToFileUri(absolutePath);
|
|
6065
5855
|
return `* [${fileName}](${fileUri})`;
|
|
6066
5856
|
});
|
|
6067
5857
|
const sections = [];
|
|
6068
|
-
if (guidelineFiles.length > 0) {
|
|
6069
|
-
sections.push(`Read all guideline files:
|
|
6070
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
6071
|
-
}
|
|
6072
5858
|
if (inputFiles.length > 0) {
|
|
6073
5859
|
sections.push(`Read all input files:
|
|
6074
5860
|
${buildList(inputFiles).join("\n")}.`);
|
|
@@ -6080,7 +5866,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
6080
5866
|
return sections.join("\n");
|
|
6081
5867
|
}
|
|
6082
5868
|
function pathToFileUri(filePath) {
|
|
6083
|
-
const absolutePath =
|
|
5869
|
+
const absolutePath = import_node_path11.default.isAbsolute(filePath) ? filePath : import_node_path11.default.resolve(filePath);
|
|
6084
5870
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
6085
5871
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
6086
5872
|
return `file:///${normalizedPath}`;
|
|
@@ -6227,10 +6013,10 @@ var ClaudeCliProvider = class {
|
|
|
6227
6013
|
}
|
|
6228
6014
|
resolveCwd(cwdOverride) {
|
|
6229
6015
|
if (cwdOverride) {
|
|
6230
|
-
return
|
|
6016
|
+
return import_node_path12.default.resolve(cwdOverride);
|
|
6231
6017
|
}
|
|
6232
6018
|
if (this.config.cwd) {
|
|
6233
|
-
return
|
|
6019
|
+
return import_node_path12.default.resolve(this.config.cwd);
|
|
6234
6020
|
}
|
|
6235
6021
|
return void 0;
|
|
6236
6022
|
}
|
|
@@ -6240,9 +6026,9 @@ var ClaudeCliProvider = class {
|
|
|
6240
6026
|
return void 0;
|
|
6241
6027
|
}
|
|
6242
6028
|
if (this.config.logDir) {
|
|
6243
|
-
return
|
|
6029
|
+
return import_node_path12.default.resolve(this.config.logDir);
|
|
6244
6030
|
}
|
|
6245
|
-
return
|
|
6031
|
+
return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
6246
6032
|
}
|
|
6247
6033
|
async createStreamLogger(request) {
|
|
6248
6034
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6250,13 +6036,13 @@ var ClaudeCliProvider = class {
|
|
|
6250
6036
|
return void 0;
|
|
6251
6037
|
}
|
|
6252
6038
|
try {
|
|
6253
|
-
await (0,
|
|
6039
|
+
await (0, import_promises10.mkdir)(logDir, { recursive: true });
|
|
6254
6040
|
} catch (error) {
|
|
6255
6041
|
const message = error instanceof Error ? error.message : String(error);
|
|
6256
6042
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6257
6043
|
return void 0;
|
|
6258
6044
|
}
|
|
6259
|
-
const filePath =
|
|
6045
|
+
const filePath = import_node_path12.default.join(logDir, buildLogFilename(request, this.targetName));
|
|
6260
6046
|
try {
|
|
6261
6047
|
const logger = await ClaudeCliStreamLogger.create({
|
|
6262
6048
|
filePath,
|
|
@@ -6576,8 +6362,8 @@ function tryParseJson(line) {
|
|
|
6576
6362
|
// src/evaluation/providers/claude-sdk.ts
|
|
6577
6363
|
var import_node_crypto2 = require("crypto");
|
|
6578
6364
|
var import_node_fs5 = require("fs");
|
|
6579
|
-
var
|
|
6580
|
-
var
|
|
6365
|
+
var import_promises11 = require("fs/promises");
|
|
6366
|
+
var import_node_path13 = __toESM(require("path"), 1);
|
|
6581
6367
|
var claudeSdkModule = null;
|
|
6582
6368
|
async function loadClaudeSdk() {
|
|
6583
6369
|
if (!claudeSdkModule) {
|
|
@@ -6737,10 +6523,10 @@ var ClaudeSdkProvider = class {
|
|
|
6737
6523
|
}
|
|
6738
6524
|
resolveCwd(cwdOverride) {
|
|
6739
6525
|
if (cwdOverride) {
|
|
6740
|
-
return
|
|
6526
|
+
return import_node_path13.default.resolve(cwdOverride);
|
|
6741
6527
|
}
|
|
6742
6528
|
if (this.config.cwd) {
|
|
6743
|
-
return
|
|
6529
|
+
return import_node_path13.default.resolve(this.config.cwd);
|
|
6744
6530
|
}
|
|
6745
6531
|
return void 0;
|
|
6746
6532
|
}
|
|
@@ -6750,9 +6536,9 @@ var ClaudeSdkProvider = class {
|
|
|
6750
6536
|
return void 0;
|
|
6751
6537
|
}
|
|
6752
6538
|
if (this.config.logDir) {
|
|
6753
|
-
return
|
|
6539
|
+
return import_node_path13.default.resolve(this.config.logDir);
|
|
6754
6540
|
}
|
|
6755
|
-
return
|
|
6541
|
+
return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "claude");
|
|
6756
6542
|
}
|
|
6757
6543
|
async createStreamLogger(request) {
|
|
6758
6544
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6760,13 +6546,13 @@ var ClaudeSdkProvider = class {
|
|
|
6760
6546
|
return void 0;
|
|
6761
6547
|
}
|
|
6762
6548
|
try {
|
|
6763
|
-
await (0,
|
|
6549
|
+
await (0, import_promises11.mkdir)(logDir, { recursive: true });
|
|
6764
6550
|
} catch (error) {
|
|
6765
6551
|
const message = error instanceof Error ? error.message : String(error);
|
|
6766
6552
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
6767
6553
|
return void 0;
|
|
6768
6554
|
}
|
|
6769
|
-
const filePath =
|
|
6555
|
+
const filePath = import_node_path13.default.join(logDir, buildLogFilename2(request, this.targetName));
|
|
6770
6556
|
try {
|
|
6771
6557
|
const logger = await ClaudeStreamLogger.create({
|
|
6772
6558
|
filePath,
|
|
@@ -6971,9 +6757,9 @@ function formatElapsed2(startedAt) {
|
|
|
6971
6757
|
|
|
6972
6758
|
// src/evaluation/providers/cli.ts
|
|
6973
6759
|
var import_node_child_process2 = require("child_process");
|
|
6974
|
-
var
|
|
6760
|
+
var import_promises12 = __toESM(require("fs/promises"), 1);
|
|
6975
6761
|
var import_node_os = __toESM(require("os"), 1);
|
|
6976
|
-
var
|
|
6762
|
+
var import_node_path14 = __toESM(require("path"), 1);
|
|
6977
6763
|
var import_node_util = require("util");
|
|
6978
6764
|
var import_zod2 = require("zod");
|
|
6979
6765
|
var ToolCallSchema = import_zod2.z.object({
|
|
@@ -7182,7 +6968,6 @@ var CliProvider = class {
|
|
|
7182
6968
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
7183
6969
|
{
|
|
7184
6970
|
question: "",
|
|
7185
|
-
guidelines: "",
|
|
7186
6971
|
inputFiles: batchInputFiles,
|
|
7187
6972
|
evalCaseId: "batch",
|
|
7188
6973
|
attempt: 0
|
|
@@ -7370,7 +7155,7 @@ var CliProvider = class {
|
|
|
7370
7155
|
throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
|
|
7371
7156
|
} finally {
|
|
7372
7157
|
if (!this.keepTempFiles) {
|
|
7373
|
-
await
|
|
7158
|
+
await import_promises12.default.unlink(filePath).catch(() => {
|
|
7374
7159
|
});
|
|
7375
7160
|
}
|
|
7376
7161
|
}
|
|
@@ -7415,7 +7200,6 @@ var CliProvider = class {
|
|
|
7415
7200
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
7416
7201
|
{
|
|
7417
7202
|
question: "",
|
|
7418
|
-
guidelines: "",
|
|
7419
7203
|
inputFiles: [],
|
|
7420
7204
|
evalCaseId: "healthcheck",
|
|
7421
7205
|
attempt: 0
|
|
@@ -7451,12 +7235,11 @@ var CliProvider = class {
|
|
|
7451
7235
|
async function buildTemplateValues(request, config, outputFilePath) {
|
|
7452
7236
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
7453
7237
|
const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
|
|
7454
|
-
await
|
|
7238
|
+
await import_promises12.default.writeFile(promptFilePath, request.question ?? "", "utf8");
|
|
7455
7239
|
return {
|
|
7456
7240
|
values: {
|
|
7457
7241
|
PROMPT: shellEscape(request.question ?? ""),
|
|
7458
7242
|
PROMPT_FILE: shellEscape(promptFilePath),
|
|
7459
|
-
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
7460
7243
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
7461
7244
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
7462
7245
|
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
@@ -7469,7 +7252,7 @@ async function cleanupTempFile(filePath, keepTempFiles) {
|
|
|
7469
7252
|
if (!filePath || keepTempFiles) {
|
|
7470
7253
|
return;
|
|
7471
7254
|
}
|
|
7472
|
-
await
|
|
7255
|
+
await import_promises12.default.unlink(filePath).catch(() => {
|
|
7473
7256
|
});
|
|
7474
7257
|
}
|
|
7475
7258
|
function normalizeInputFiles2(inputFiles) {
|
|
@@ -7478,7 +7261,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
7478
7261
|
}
|
|
7479
7262
|
const unique = /* @__PURE__ */ new Map();
|
|
7480
7263
|
for (const inputFile of inputFiles) {
|
|
7481
|
-
const absolutePath =
|
|
7264
|
+
const absolutePath = import_node_path14.default.resolve(inputFile);
|
|
7482
7265
|
if (!unique.has(absolutePath)) {
|
|
7483
7266
|
unique.set(absolutePath, absolutePath);
|
|
7484
7267
|
}
|
|
@@ -7492,7 +7275,7 @@ function formatFileList(files, template) {
|
|
|
7492
7275
|
const formatter = template ?? "{path}";
|
|
7493
7276
|
return files.map((filePath) => {
|
|
7494
7277
|
const escapedPath = shellEscape(filePath);
|
|
7495
|
-
const escapedName = shellEscape(
|
|
7278
|
+
const escapedName = shellEscape(import_node_path14.default.basename(filePath));
|
|
7496
7279
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
7497
7280
|
}).join(" ");
|
|
7498
7281
|
}
|
|
@@ -7516,7 +7299,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
7516
7299
|
const safeEvalId = evalCaseId || "unknown";
|
|
7517
7300
|
const timestamp = Date.now();
|
|
7518
7301
|
const random = Math.random().toString(36).substring(2, 9);
|
|
7519
|
-
return
|
|
7302
|
+
return import_node_path14.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
7520
7303
|
}
|
|
7521
7304
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
7522
7305
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -7529,8 +7312,8 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
7529
7312
|
// src/evaluation/providers/codex.ts
|
|
7530
7313
|
var import_node_crypto3 = require("crypto");
|
|
7531
7314
|
var import_node_fs6 = require("fs");
|
|
7532
|
-
var
|
|
7533
|
-
var
|
|
7315
|
+
var import_promises13 = require("fs/promises");
|
|
7316
|
+
var import_node_path15 = __toESM(require("path"), 1);
|
|
7534
7317
|
|
|
7535
7318
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
7536
7319
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -7765,10 +7548,10 @@ ${basePrompt}` : basePrompt;
|
|
|
7765
7548
|
}
|
|
7766
7549
|
resolveCwd(cwdOverride) {
|
|
7767
7550
|
if (cwdOverride) {
|
|
7768
|
-
return
|
|
7551
|
+
return import_node_path15.default.resolve(cwdOverride);
|
|
7769
7552
|
}
|
|
7770
7553
|
if (this.config.cwd) {
|
|
7771
|
-
return
|
|
7554
|
+
return import_node_path15.default.resolve(this.config.cwd);
|
|
7772
7555
|
}
|
|
7773
7556
|
return void 0;
|
|
7774
7557
|
}
|
|
@@ -7778,9 +7561,9 @@ ${basePrompt}` : basePrompt;
|
|
|
7778
7561
|
return void 0;
|
|
7779
7562
|
}
|
|
7780
7563
|
if (this.config.logDir) {
|
|
7781
|
-
return
|
|
7564
|
+
return import_node_path15.default.resolve(this.config.logDir);
|
|
7782
7565
|
}
|
|
7783
|
-
return
|
|
7566
|
+
return import_node_path15.default.join(process.cwd(), ".agentv", "logs", "codex");
|
|
7784
7567
|
}
|
|
7785
7568
|
async createStreamLogger(request) {
|
|
7786
7569
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7788,13 +7571,13 @@ ${basePrompt}` : basePrompt;
|
|
|
7788
7571
|
return void 0;
|
|
7789
7572
|
}
|
|
7790
7573
|
try {
|
|
7791
|
-
await (0,
|
|
7574
|
+
await (0, import_promises13.mkdir)(logDir, { recursive: true });
|
|
7792
7575
|
} catch (error) {
|
|
7793
7576
|
const message = error instanceof Error ? error.message : String(error);
|
|
7794
7577
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7795
7578
|
return void 0;
|
|
7796
7579
|
}
|
|
7797
|
-
const filePath =
|
|
7580
|
+
const filePath = import_node_path15.default.join(logDir, buildLogFilename3(request, this.targetName));
|
|
7798
7581
|
try {
|
|
7799
7582
|
const logger = await CodexSdkStreamLogger.create({
|
|
7800
7583
|
filePath,
|
|
@@ -7937,8 +7720,8 @@ function formatElapsed3(startedAt) {
|
|
|
7937
7720
|
|
|
7938
7721
|
// src/evaluation/providers/copilot-cli.ts
|
|
7939
7722
|
var import_node_crypto5 = require("crypto");
|
|
7940
|
-
var
|
|
7941
|
-
var
|
|
7723
|
+
var import_promises14 = require("fs/promises");
|
|
7724
|
+
var import_node_path17 = __toESM(require("path"), 1);
|
|
7942
7725
|
var import_node_stream = require("stream");
|
|
7943
7726
|
var import_node_child_process3 = require("child_process");
|
|
7944
7727
|
var acp = __toESM(require("@agentclientprotocol/sdk"), 1);
|
|
@@ -8000,7 +7783,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
8000
7783
|
var import_node_crypto4 = require("crypto");
|
|
8001
7784
|
var import_node_fs7 = require("fs");
|
|
8002
7785
|
var import_node_os2 = require("os");
|
|
8003
|
-
var
|
|
7786
|
+
var import_node_path16 = __toESM(require("path"), 1);
|
|
8004
7787
|
var import_node_url2 = require("url");
|
|
8005
7788
|
var import_meta = {};
|
|
8006
7789
|
function resolvePlatformCliPath() {
|
|
@@ -8025,7 +7808,7 @@ function resolvePlatformCliPath() {
|
|
|
8025
7808
|
try {
|
|
8026
7809
|
const resolved = import_meta.resolve(`${packageName}/package.json`);
|
|
8027
7810
|
const packageJsonPath = resolved.startsWith("file:") ? (0, import_node_url2.fileURLToPath)(resolved) : resolved;
|
|
8028
|
-
const binaryPath =
|
|
7811
|
+
const binaryPath = import_node_path16.default.join(import_node_path16.default.dirname(packageJsonPath), binaryName);
|
|
8029
7812
|
if ((0, import_node_fs7.existsSync)(binaryPath)) {
|
|
8030
7813
|
return binaryPath;
|
|
8031
7814
|
}
|
|
@@ -8033,7 +7816,7 @@ function resolvePlatformCliPath() {
|
|
|
8033
7816
|
}
|
|
8034
7817
|
let searchDir = process.cwd();
|
|
8035
7818
|
for (let i = 0; i < 10; i++) {
|
|
8036
|
-
const standardPath =
|
|
7819
|
+
const standardPath = import_node_path16.default.join(
|
|
8037
7820
|
searchDir,
|
|
8038
7821
|
"node_modules",
|
|
8039
7822
|
...packageName.split("/"),
|
|
@@ -8042,13 +7825,13 @@ function resolvePlatformCliPath() {
|
|
|
8042
7825
|
if ((0, import_node_fs7.existsSync)(standardPath)) {
|
|
8043
7826
|
return standardPath;
|
|
8044
7827
|
}
|
|
8045
|
-
const bunDir =
|
|
7828
|
+
const bunDir = import_node_path16.default.join(searchDir, "node_modules", ".bun");
|
|
8046
7829
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
8047
7830
|
try {
|
|
8048
7831
|
const entries = (0, import_node_fs7.readdirSync)(bunDir);
|
|
8049
7832
|
for (const entry of entries) {
|
|
8050
7833
|
if (entry.startsWith(prefix)) {
|
|
8051
|
-
const candidate =
|
|
7834
|
+
const candidate = import_node_path16.default.join(
|
|
8052
7835
|
bunDir,
|
|
8053
7836
|
entry,
|
|
8054
7837
|
"node_modules",
|
|
@@ -8063,7 +7846,7 @@ function resolvePlatformCliPath() {
|
|
|
8063
7846
|
}
|
|
8064
7847
|
} catch {
|
|
8065
7848
|
}
|
|
8066
|
-
const parent =
|
|
7849
|
+
const parent = import_node_path16.default.dirname(searchDir);
|
|
8067
7850
|
if (parent === searchDir) break;
|
|
8068
7851
|
searchDir = parent;
|
|
8069
7852
|
}
|
|
@@ -8401,10 +8184,10 @@ var CopilotCliProvider = class {
|
|
|
8401
8184
|
}
|
|
8402
8185
|
resolveCwd(cwdOverride) {
|
|
8403
8186
|
if (cwdOverride) {
|
|
8404
|
-
return
|
|
8187
|
+
return import_node_path17.default.resolve(cwdOverride);
|
|
8405
8188
|
}
|
|
8406
8189
|
if (this.config.cwd) {
|
|
8407
|
-
return
|
|
8190
|
+
return import_node_path17.default.resolve(this.config.cwd);
|
|
8408
8191
|
}
|
|
8409
8192
|
return void 0;
|
|
8410
8193
|
}
|
|
@@ -8423,9 +8206,9 @@ var CopilotCliProvider = class {
|
|
|
8423
8206
|
return void 0;
|
|
8424
8207
|
}
|
|
8425
8208
|
if (this.config.logDir) {
|
|
8426
|
-
return
|
|
8209
|
+
return import_node_path17.default.resolve(this.config.logDir);
|
|
8427
8210
|
}
|
|
8428
|
-
return
|
|
8211
|
+
return import_node_path17.default.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
8429
8212
|
}
|
|
8430
8213
|
async createStreamLogger(request) {
|
|
8431
8214
|
const logDir = this.resolveLogDirectory();
|
|
@@ -8433,13 +8216,13 @@ var CopilotCliProvider = class {
|
|
|
8433
8216
|
return void 0;
|
|
8434
8217
|
}
|
|
8435
8218
|
try {
|
|
8436
|
-
await (0,
|
|
8219
|
+
await (0, import_promises14.mkdir)(logDir, { recursive: true });
|
|
8437
8220
|
} catch (error) {
|
|
8438
8221
|
const message = error instanceof Error ? error.message : String(error);
|
|
8439
8222
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
8440
8223
|
return void 0;
|
|
8441
8224
|
}
|
|
8442
|
-
const filePath =
|
|
8225
|
+
const filePath = import_node_path17.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
8443
8226
|
try {
|
|
8444
8227
|
const logger = await CopilotStreamLogger.create(
|
|
8445
8228
|
{
|
|
@@ -8533,8 +8316,8 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
8533
8316
|
|
|
8534
8317
|
// src/evaluation/providers/copilot-sdk.ts
|
|
8535
8318
|
var import_node_crypto6 = require("crypto");
|
|
8536
|
-
var
|
|
8537
|
-
var
|
|
8319
|
+
var import_promises15 = require("fs/promises");
|
|
8320
|
+
var import_node_path18 = __toESM(require("path"), 1);
|
|
8538
8321
|
|
|
8539
8322
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
8540
8323
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -8813,10 +8596,10 @@ var CopilotSdkProvider = class {
|
|
|
8813
8596
|
}
|
|
8814
8597
|
resolveCwd(cwdOverride) {
|
|
8815
8598
|
if (cwdOverride) {
|
|
8816
|
-
return
|
|
8599
|
+
return import_node_path18.default.resolve(cwdOverride);
|
|
8817
8600
|
}
|
|
8818
8601
|
if (this.config.cwd) {
|
|
8819
|
-
return
|
|
8602
|
+
return import_node_path18.default.resolve(this.config.cwd);
|
|
8820
8603
|
}
|
|
8821
8604
|
return void 0;
|
|
8822
8605
|
}
|
|
@@ -8825,9 +8608,9 @@ var CopilotSdkProvider = class {
|
|
|
8825
8608
|
return void 0;
|
|
8826
8609
|
}
|
|
8827
8610
|
if (this.config.logDir) {
|
|
8828
|
-
return
|
|
8611
|
+
return import_node_path18.default.resolve(this.config.logDir);
|
|
8829
8612
|
}
|
|
8830
|
-
return
|
|
8613
|
+
return import_node_path18.default.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
8831
8614
|
}
|
|
8832
8615
|
async createStreamLogger(request) {
|
|
8833
8616
|
const logDir = this.resolveLogDirectory();
|
|
@@ -8835,13 +8618,13 @@ var CopilotSdkProvider = class {
|
|
|
8835
8618
|
return void 0;
|
|
8836
8619
|
}
|
|
8837
8620
|
try {
|
|
8838
|
-
await (0,
|
|
8621
|
+
await (0, import_promises15.mkdir)(logDir, { recursive: true });
|
|
8839
8622
|
} catch (error) {
|
|
8840
8623
|
const message = error instanceof Error ? error.message : String(error);
|
|
8841
8624
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
8842
8625
|
return void 0;
|
|
8843
8626
|
}
|
|
8844
|
-
const filePath =
|
|
8627
|
+
const filePath = import_node_path18.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
8845
8628
|
try {
|
|
8846
8629
|
const logger = await CopilotStreamLogger.create(
|
|
8847
8630
|
{
|
|
@@ -8918,8 +8701,7 @@ var MockProvider = class {
|
|
|
8918
8701
|
return {
|
|
8919
8702
|
output: [{ role: "assistant", content: this.cannedResponse }],
|
|
8920
8703
|
raw: {
|
|
8921
|
-
question: request.question
|
|
8922
|
-
guidelines: request.guidelines
|
|
8704
|
+
question: request.question
|
|
8923
8705
|
}
|
|
8924
8706
|
};
|
|
8925
8707
|
}
|
|
@@ -9195,9 +8977,9 @@ function extractToolCalls3(content, toolTrackers, completedToolResults) {
|
|
|
9195
8977
|
var import_node_child_process4 = require("child_process");
|
|
9196
8978
|
var import_node_crypto7 = require("crypto");
|
|
9197
8979
|
var import_node_fs8 = require("fs");
|
|
9198
|
-
var
|
|
8980
|
+
var import_promises16 = require("fs/promises");
|
|
9199
8981
|
var import_node_os3 = require("os");
|
|
9200
|
-
var
|
|
8982
|
+
var import_node_path19 = __toESM(require("path"), 1);
|
|
9201
8983
|
|
|
9202
8984
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
9203
8985
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -9278,8 +9060,8 @@ var PiCodingAgentProvider = class {
|
|
|
9278
9060
|
const workspaceRoot = await this.createWorkspace();
|
|
9279
9061
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
9280
9062
|
try {
|
|
9281
|
-
const promptFile =
|
|
9282
|
-
await (0,
|
|
9063
|
+
const promptFile = import_node_path19.default.join(workspaceRoot, PROMPT_FILENAME);
|
|
9064
|
+
await (0, import_promises16.writeFile)(promptFile, request.question, "utf8");
|
|
9283
9065
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
9284
9066
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
9285
9067
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
@@ -9340,12 +9122,12 @@ var PiCodingAgentProvider = class {
|
|
|
9340
9122
|
}
|
|
9341
9123
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
9342
9124
|
if (cwdOverride) {
|
|
9343
|
-
return
|
|
9125
|
+
return import_node_path19.default.resolve(cwdOverride);
|
|
9344
9126
|
}
|
|
9345
9127
|
if (!this.config.cwd) {
|
|
9346
9128
|
return workspaceRoot;
|
|
9347
9129
|
}
|
|
9348
|
-
return
|
|
9130
|
+
return import_node_path19.default.resolve(this.config.cwd);
|
|
9349
9131
|
}
|
|
9350
9132
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
9351
9133
|
const args = [];
|
|
@@ -9434,19 +9216,19 @@ ${prompt}` : prompt;
|
|
|
9434
9216
|
return env;
|
|
9435
9217
|
}
|
|
9436
9218
|
async createWorkspace() {
|
|
9437
|
-
return await (0,
|
|
9219
|
+
return await (0, import_promises16.mkdtemp)(import_node_path19.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX));
|
|
9438
9220
|
}
|
|
9439
9221
|
async cleanupWorkspace(workspaceRoot) {
|
|
9440
9222
|
try {
|
|
9441
|
-
await (0,
|
|
9223
|
+
await (0, import_promises16.rm)(workspaceRoot, { recursive: true, force: true });
|
|
9442
9224
|
} catch {
|
|
9443
9225
|
}
|
|
9444
9226
|
}
|
|
9445
9227
|
resolveLogDirectory() {
|
|
9446
9228
|
if (this.config.logDir) {
|
|
9447
|
-
return
|
|
9229
|
+
return import_node_path19.default.resolve(this.config.logDir);
|
|
9448
9230
|
}
|
|
9449
|
-
return
|
|
9231
|
+
return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
9450
9232
|
}
|
|
9451
9233
|
async createStreamLogger(request) {
|
|
9452
9234
|
const logDir = this.resolveLogDirectory();
|
|
@@ -9454,13 +9236,13 @@ ${prompt}` : prompt;
|
|
|
9454
9236
|
return void 0;
|
|
9455
9237
|
}
|
|
9456
9238
|
try {
|
|
9457
|
-
await (0,
|
|
9239
|
+
await (0, import_promises16.mkdir)(logDir, { recursive: true });
|
|
9458
9240
|
} catch (error) {
|
|
9459
9241
|
const message = error instanceof Error ? error.message : String(error);
|
|
9460
9242
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
9461
9243
|
return void 0;
|
|
9462
9244
|
}
|
|
9463
|
-
const filePath =
|
|
9245
|
+
const filePath = import_node_path19.default.join(logDir, buildLogFilename5(request, this.targetName));
|
|
9464
9246
|
try {
|
|
9465
9247
|
const logger = await PiStreamLogger.create({
|
|
9466
9248
|
filePath,
|
|
@@ -9959,7 +9741,7 @@ var ProviderRegistry = class {
|
|
|
9959
9741
|
};
|
|
9960
9742
|
|
|
9961
9743
|
// src/evaluation/providers/targets.ts
|
|
9962
|
-
var
|
|
9744
|
+
var import_node_path20 = __toESM(require("path"), 1);
|
|
9963
9745
|
var import_zod3 = require("zod");
|
|
9964
9746
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
9965
9747
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
@@ -10056,11 +9838,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
10056
9838
|
allowLiteral: true,
|
|
10057
9839
|
optionalEnv: true
|
|
10058
9840
|
});
|
|
10059
|
-
if (cwd && evalFilePath && !
|
|
10060
|
-
cwd =
|
|
9841
|
+
if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
|
|
9842
|
+
cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
|
|
10061
9843
|
}
|
|
10062
9844
|
if (!cwd && evalFilePath) {
|
|
10063
|
-
cwd =
|
|
9845
|
+
cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
|
|
10064
9846
|
}
|
|
10065
9847
|
return {
|
|
10066
9848
|
command,
|
|
@@ -10083,15 +9865,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10083
9865
|
optionalEnv: true
|
|
10084
9866
|
}
|
|
10085
9867
|
);
|
|
10086
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10087
|
-
workspaceTemplate =
|
|
9868
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
9869
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10088
9870
|
}
|
|
10089
9871
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
10090
9872
|
allowLiteral: true,
|
|
10091
9873
|
optionalEnv: true
|
|
10092
9874
|
});
|
|
10093
|
-
if (cwd && evalFilePath && !
|
|
10094
|
-
cwd =
|
|
9875
|
+
if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
|
|
9876
|
+
cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
|
|
10095
9877
|
}
|
|
10096
9878
|
if (cwd && workspaceTemplate) {
|
|
10097
9879
|
throw new Error(
|
|
@@ -10099,7 +9881,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10099
9881
|
);
|
|
10100
9882
|
}
|
|
10101
9883
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
10102
|
-
cwd =
|
|
9884
|
+
cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
|
|
10103
9885
|
}
|
|
10104
9886
|
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
10105
9887
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
@@ -10122,7 +9904,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
10122
9904
|
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
10123
9905
|
"PROMPT",
|
|
10124
9906
|
"PROMPT_FILE",
|
|
10125
|
-
"GUIDELINES",
|
|
10126
9907
|
"EVAL_ID",
|
|
10127
9908
|
"ATTEMPT",
|
|
10128
9909
|
"FILES",
|
|
@@ -10517,8 +10298,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
10517
10298
|
optionalEnv: true
|
|
10518
10299
|
}
|
|
10519
10300
|
);
|
|
10520
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10521
|
-
workspaceTemplate =
|
|
10301
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10302
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10522
10303
|
}
|
|
10523
10304
|
if (cwd && workspaceTemplate) {
|
|
10524
10305
|
throw new Error(
|
|
@@ -10602,8 +10383,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
10602
10383
|
optionalEnv: true
|
|
10603
10384
|
}
|
|
10604
10385
|
);
|
|
10605
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10606
|
-
workspaceTemplate =
|
|
10386
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10387
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10607
10388
|
}
|
|
10608
10389
|
if (cwd && workspaceTemplate) {
|
|
10609
10390
|
throw new Error(
|
|
@@ -10667,8 +10448,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
10667
10448
|
optionalEnv: true
|
|
10668
10449
|
}
|
|
10669
10450
|
);
|
|
10670
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10671
|
-
workspaceTemplate =
|
|
10451
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10452
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10672
10453
|
}
|
|
10673
10454
|
if (cwd && workspaceTemplate) {
|
|
10674
10455
|
throw new Error(
|
|
@@ -10758,8 +10539,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
10758
10539
|
optionalEnv: true
|
|
10759
10540
|
}
|
|
10760
10541
|
);
|
|
10761
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10762
|
-
workspaceTemplate =
|
|
10542
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10543
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10763
10544
|
}
|
|
10764
10545
|
if (cwd && workspaceTemplate) {
|
|
10765
10546
|
throw new Error(
|
|
@@ -10847,8 +10628,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
10847
10628
|
optionalEnv: true
|
|
10848
10629
|
}
|
|
10849
10630
|
);
|
|
10850
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10851
|
-
workspaceTemplate =
|
|
10631
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10632
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10852
10633
|
}
|
|
10853
10634
|
if (cwd && workspaceTemplate) {
|
|
10854
10635
|
throw new Error(
|
|
@@ -10906,8 +10687,8 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
10906
10687
|
optionalEnv: true
|
|
10907
10688
|
}
|
|
10908
10689
|
) : void 0;
|
|
10909
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10910
|
-
workspaceTemplate =
|
|
10690
|
+
if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
|
|
10691
|
+
workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10911
10692
|
}
|
|
10912
10693
|
const executableSource = target.executable;
|
|
10913
10694
|
const waitSource = target.wait;
|
|
@@ -10948,8 +10729,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
10948
10729
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
10949
10730
|
if (!parseResult.success) {
|
|
10950
10731
|
const firstError = parseResult.error.errors[0];
|
|
10951
|
-
const
|
|
10952
|
-
const prefix =
|
|
10732
|
+
const path47 = firstError?.path.join(".") || "";
|
|
10733
|
+
const prefix = path47 ? `${target.name} ${path47}: ` : `${target.name}: `;
|
|
10953
10734
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
10954
10735
|
}
|
|
10955
10736
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -10970,11 +10751,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
|
|
|
10970
10751
|
allowLiteral: true,
|
|
10971
10752
|
optionalEnv: true
|
|
10972
10753
|
});
|
|
10973
|
-
if (cwd && evalFilePath && !
|
|
10974
|
-
cwd =
|
|
10754
|
+
if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
|
|
10755
|
+
cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
|
|
10975
10756
|
}
|
|
10976
10757
|
if (!cwd && evalFilePath) {
|
|
10977
|
-
cwd =
|
|
10758
|
+
cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
|
|
10978
10759
|
}
|
|
10979
10760
|
return {
|
|
10980
10761
|
command,
|
|
@@ -11158,40 +10939,40 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
11158
10939
|
|
|
11159
10940
|
// src/evaluation/providers/vscode-provider.ts
|
|
11160
10941
|
var import_node_child_process6 = require("child_process");
|
|
11161
|
-
var
|
|
11162
|
-
var
|
|
10942
|
+
var import_promises23 = require("fs/promises");
|
|
10943
|
+
var import_node_path32 = __toESM(require("path"), 1);
|
|
11163
10944
|
var import_node_util3 = require("util");
|
|
11164
10945
|
|
|
11165
10946
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
11166
|
-
var
|
|
11167
|
-
var
|
|
10947
|
+
var import_promises21 = require("fs/promises");
|
|
10948
|
+
var import_node_path30 = __toESM(require("path"), 1);
|
|
11168
10949
|
|
|
11169
10950
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
11170
10951
|
var import_node_fs9 = require("fs");
|
|
11171
|
-
var
|
|
11172
|
-
var
|
|
10952
|
+
var import_promises17 = require("fs/promises");
|
|
10953
|
+
var import_node_path21 = __toESM(require("path"), 1);
|
|
11173
10954
|
async function pathExists(target) {
|
|
11174
10955
|
try {
|
|
11175
|
-
await (0,
|
|
10956
|
+
await (0, import_promises17.access)(target, import_node_fs9.constants.F_OK);
|
|
11176
10957
|
return true;
|
|
11177
10958
|
} catch {
|
|
11178
10959
|
return false;
|
|
11179
10960
|
}
|
|
11180
10961
|
}
|
|
11181
10962
|
async function ensureDir(target) {
|
|
11182
|
-
await (0,
|
|
10963
|
+
await (0, import_promises17.mkdir)(target, { recursive: true });
|
|
11183
10964
|
}
|
|
11184
10965
|
async function readDirEntries(target) {
|
|
11185
|
-
const entries = await (0,
|
|
10966
|
+
const entries = await (0, import_promises17.readdir)(target, { withFileTypes: true });
|
|
11186
10967
|
return entries.map((entry) => ({
|
|
11187
10968
|
name: entry.name,
|
|
11188
|
-
absolutePath:
|
|
10969
|
+
absolutePath: import_node_path21.default.join(target, entry.name),
|
|
11189
10970
|
isDirectory: entry.isDirectory()
|
|
11190
10971
|
}));
|
|
11191
10972
|
}
|
|
11192
10973
|
async function removeIfExists(target) {
|
|
11193
10974
|
try {
|
|
11194
|
-
await (0,
|
|
10975
|
+
await (0, import_promises17.rm)(target, { force: true, recursive: false });
|
|
11195
10976
|
} catch (error) {
|
|
11196
10977
|
if (error.code !== "ENOENT") {
|
|
11197
10978
|
throw error;
|
|
@@ -11200,9 +10981,9 @@ async function removeIfExists(target) {
|
|
|
11200
10981
|
}
|
|
11201
10982
|
|
|
11202
10983
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
11203
|
-
var
|
|
10984
|
+
var import_node_path22 = __toESM(require("path"), 1);
|
|
11204
10985
|
function pathToFileUri2(filePath) {
|
|
11205
|
-
const absolutePath =
|
|
10986
|
+
const absolutePath = import_node_path22.default.isAbsolute(filePath) ? filePath : import_node_path22.default.resolve(filePath);
|
|
11206
10987
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
11207
10988
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
11208
10989
|
return `file:///${normalizedPath}`;
|
|
@@ -11211,7 +10992,7 @@ function pathToFileUri2(filePath) {
|
|
|
11211
10992
|
}
|
|
11212
10993
|
|
|
11213
10994
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
11214
|
-
var
|
|
10995
|
+
var import_node_path23 = __toESM(require("path"), 1);
|
|
11215
10996
|
|
|
11216
10997
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
11217
10998
|
function renderTemplate2(content, variables) {
|
|
@@ -11303,8 +11084,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
11303
11084
|
});
|
|
11304
11085
|
}
|
|
11305
11086
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
11306
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
11307
|
-
const responseList = responseFiles.map((file) => `"${
|
|
11087
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path23.default.basename(file)}`).join("\n");
|
|
11088
|
+
const responseList = responseFiles.map((file) => `"${import_node_path23.default.basename(file)}"`).join(", ");
|
|
11308
11089
|
return renderTemplate2(templateContent, {
|
|
11309
11090
|
requestFiles: requestLines,
|
|
11310
11091
|
responseList
|
|
@@ -11312,8 +11093,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
11312
11093
|
}
|
|
11313
11094
|
|
|
11314
11095
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
11315
|
-
var
|
|
11316
|
-
var
|
|
11096
|
+
var import_promises18 = require("fs/promises");
|
|
11097
|
+
var import_node_path24 = __toESM(require("path"), 1);
|
|
11317
11098
|
|
|
11318
11099
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
11319
11100
|
function sleep2(ms) {
|
|
@@ -11351,7 +11132,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
11351
11132
|
const maxAttempts = 10;
|
|
11352
11133
|
while (attempts < maxAttempts) {
|
|
11353
11134
|
try {
|
|
11354
|
-
const content = await (0,
|
|
11135
|
+
const content = await (0, import_promises18.readFile)(responseFileFinal, { encoding: "utf8" });
|
|
11355
11136
|
if (!silent) {
|
|
11356
11137
|
process.stdout.write(`${content}
|
|
11357
11138
|
`);
|
|
@@ -11372,7 +11153,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
11372
11153
|
}
|
|
11373
11154
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
11374
11155
|
if (!silent) {
|
|
11375
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
11156
|
+
const fileList = responseFilesFinal.map((file) => import_node_path24.default.basename(file)).join(", ");
|
|
11376
11157
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
11377
11158
|
}
|
|
11378
11159
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -11381,7 +11162,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11381
11162
|
while (pending.size > 0) {
|
|
11382
11163
|
if (Date.now() >= deadline) {
|
|
11383
11164
|
if (!silent) {
|
|
11384
|
-
const remaining = [...pending].map((f) =>
|
|
11165
|
+
const remaining = [...pending].map((f) => import_node_path24.default.basename(f)).join(", ");
|
|
11385
11166
|
console.error(
|
|
11386
11167
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
11387
11168
|
);
|
|
@@ -11408,7 +11189,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11408
11189
|
const maxAttempts = 10;
|
|
11409
11190
|
while (attempts < maxAttempts) {
|
|
11410
11191
|
try {
|
|
11411
|
-
const content = await (0,
|
|
11192
|
+
const content = await (0, import_promises18.readFile)(file, { encoding: "utf8" });
|
|
11412
11193
|
if (!silent) {
|
|
11413
11194
|
process.stdout.write(`${content}
|
|
11414
11195
|
`);
|
|
@@ -11431,16 +11212,16 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11431
11212
|
|
|
11432
11213
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
11433
11214
|
var import_node_child_process5 = require("child_process");
|
|
11434
|
-
var
|
|
11435
|
-
var
|
|
11215
|
+
var import_promises19 = require("fs/promises");
|
|
11216
|
+
var import_node_path27 = __toESM(require("path"), 1);
|
|
11436
11217
|
var import_node_util2 = require("util");
|
|
11437
11218
|
|
|
11438
11219
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
11439
|
-
var
|
|
11220
|
+
var import_node_path26 = __toESM(require("path"), 1);
|
|
11440
11221
|
|
|
11441
11222
|
// src/paths.ts
|
|
11442
11223
|
var import_node_os4 = __toESM(require("os"), 1);
|
|
11443
|
-
var
|
|
11224
|
+
var import_node_path25 = __toESM(require("path"), 1);
|
|
11444
11225
|
var logged = false;
|
|
11445
11226
|
function getAgentvHome() {
|
|
11446
11227
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -11451,19 +11232,19 @@ function getAgentvHome() {
|
|
|
11451
11232
|
}
|
|
11452
11233
|
return envHome;
|
|
11453
11234
|
}
|
|
11454
|
-
return
|
|
11235
|
+
return import_node_path25.default.join(import_node_os4.default.homedir(), ".agentv");
|
|
11455
11236
|
}
|
|
11456
11237
|
function getWorkspacesRoot() {
|
|
11457
|
-
return
|
|
11238
|
+
return import_node_path25.default.join(getAgentvHome(), "workspaces");
|
|
11458
11239
|
}
|
|
11459
11240
|
function getSubagentsRoot() {
|
|
11460
|
-
return
|
|
11241
|
+
return import_node_path25.default.join(getAgentvHome(), "subagents");
|
|
11461
11242
|
}
|
|
11462
11243
|
function getTraceStateRoot() {
|
|
11463
|
-
return
|
|
11244
|
+
return import_node_path25.default.join(getAgentvHome(), "trace-state");
|
|
11464
11245
|
}
|
|
11465
11246
|
function getWorkspacePoolRoot() {
|
|
11466
|
-
return
|
|
11247
|
+
return import_node_path25.default.join(getAgentvHome(), "workspace-pool");
|
|
11467
11248
|
}
|
|
11468
11249
|
|
|
11469
11250
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -11471,7 +11252,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
11471
11252
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
11472
11253
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
11473
11254
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
11474
|
-
return
|
|
11255
|
+
return import_node_path26.default.join(getSubagentsRoot(), folder);
|
|
11475
11256
|
}
|
|
11476
11257
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
11477
11258
|
|
|
@@ -11538,12 +11319,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11538
11319
|
await raceSpawnError(child);
|
|
11539
11320
|
return true;
|
|
11540
11321
|
}
|
|
11541
|
-
const aliveFile =
|
|
11322
|
+
const aliveFile = import_node_path27.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
11542
11323
|
await removeIfExists(aliveFile);
|
|
11543
|
-
const githubAgentsDir =
|
|
11544
|
-
await (0,
|
|
11545
|
-
const wakeupDst =
|
|
11546
|
-
await (0,
|
|
11324
|
+
const githubAgentsDir = import_node_path27.default.join(subagentDir, ".github", "agents");
|
|
11325
|
+
await (0, import_promises19.mkdir)(githubAgentsDir, { recursive: true });
|
|
11326
|
+
const wakeupDst = import_node_path27.default.join(githubAgentsDir, "wakeup.md");
|
|
11327
|
+
await (0, import_promises19.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
11547
11328
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
11548
11329
|
label: "open-workspace"
|
|
11549
11330
|
});
|
|
@@ -11555,7 +11336,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11555
11336
|
"chat",
|
|
11556
11337
|
"-m",
|
|
11557
11338
|
wakeupChatId,
|
|
11558
|
-
`create a file named .alive in the ${
|
|
11339
|
+
`create a file named .alive in the ${import_node_path27.default.basename(subagentDir)} folder`
|
|
11559
11340
|
];
|
|
11560
11341
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
11561
11342
|
await raceSpawnError(wakeupChild);
|
|
@@ -11570,27 +11351,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11570
11351
|
return true;
|
|
11571
11352
|
}
|
|
11572
11353
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
11573
|
-
const workspacePath =
|
|
11574
|
-
const messagesDir =
|
|
11575
|
-
await (0,
|
|
11576
|
-
const reqFile =
|
|
11577
|
-
await (0,
|
|
11354
|
+
const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
|
|
11355
|
+
const messagesDir = import_node_path27.default.join(subagentDir, "messages");
|
|
11356
|
+
await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
|
|
11357
|
+
const reqFile = import_node_path27.default.join(messagesDir, `${timestamp}_req.md`);
|
|
11358
|
+
await (0, import_promises19.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
|
|
11578
11359
|
const reqUri = pathToFileUri2(reqFile);
|
|
11579
11360
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
11580
11361
|
for (const attachment of attachmentPaths) {
|
|
11581
11362
|
chatArgs.push("-a", attachment);
|
|
11582
11363
|
}
|
|
11583
11364
|
chatArgs.push("-a", reqFile);
|
|
11584
|
-
chatArgs.push(`Follow instructions in [${
|
|
11365
|
+
chatArgs.push(`Follow instructions in [${import_node_path27.default.basename(reqFile)}](${reqUri})`);
|
|
11585
11366
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
11586
11367
|
workspacePath,
|
|
11587
|
-
|
|
11368
|
+
import_node_path27.default.basename(subagentDir),
|
|
11588
11369
|
subagentDir,
|
|
11589
11370
|
vscodeCmd
|
|
11590
11371
|
);
|
|
11591
11372
|
if (!workspaceReady) {
|
|
11592
11373
|
throw new Error(
|
|
11593
|
-
`VS Code workspace '${
|
|
11374
|
+
`VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
11594
11375
|
);
|
|
11595
11376
|
}
|
|
11596
11377
|
await sleep2(500);
|
|
@@ -11598,9 +11379,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
11598
11379
|
await raceSpawnError(child);
|
|
11599
11380
|
}
|
|
11600
11381
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
11601
|
-
const workspacePath =
|
|
11602
|
-
const messagesDir =
|
|
11603
|
-
await (0,
|
|
11382
|
+
const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
|
|
11383
|
+
const messagesDir = import_node_path27.default.join(subagentDir, "messages");
|
|
11384
|
+
await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
|
|
11604
11385
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
11605
11386
|
for (const attachment of attachmentPaths) {
|
|
11606
11387
|
chatArgs.push("-a", attachment);
|
|
@@ -11608,13 +11389,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
11608
11389
|
chatArgs.push(chatInstruction);
|
|
11609
11390
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
11610
11391
|
workspacePath,
|
|
11611
|
-
|
|
11392
|
+
import_node_path27.default.basename(subagentDir),
|
|
11612
11393
|
subagentDir,
|
|
11613
11394
|
vscodeCmd
|
|
11614
11395
|
);
|
|
11615
11396
|
if (!workspaceReady) {
|
|
11616
11397
|
throw new Error(
|
|
11617
|
-
`VS Code workspace '${
|
|
11398
|
+
`VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
11618
11399
|
);
|
|
11619
11400
|
}
|
|
11620
11401
|
await sleep2(500);
|
|
@@ -11623,11 +11404,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
11623
11404
|
}
|
|
11624
11405
|
|
|
11625
11406
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
11626
|
-
var
|
|
11627
|
-
var
|
|
11407
|
+
var import_promises20 = require("fs/promises");
|
|
11408
|
+
var import_node_path29 = __toESM(require("path"), 1);
|
|
11628
11409
|
|
|
11629
11410
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
11630
|
-
var
|
|
11411
|
+
var import_node_path28 = __toESM(require("path"), 1);
|
|
11631
11412
|
var import_json5 = __toESM(require("json5"), 1);
|
|
11632
11413
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
11633
11414
|
let workspace;
|
|
@@ -11644,10 +11425,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
11644
11425
|
}
|
|
11645
11426
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
11646
11427
|
const folderPath = folder.path;
|
|
11647
|
-
if (
|
|
11428
|
+
if (import_node_path28.default.isAbsolute(folderPath)) {
|
|
11648
11429
|
return folder;
|
|
11649
11430
|
}
|
|
11650
|
-
const absolutePath =
|
|
11431
|
+
const absolutePath = import_node_path28.default.resolve(templateDir, folderPath);
|
|
11651
11432
|
return {
|
|
11652
11433
|
...folder,
|
|
11653
11434
|
path: absolutePath
|
|
@@ -11669,19 +11450,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
11669
11450
|
if (locationMap && typeof locationMap === "object") {
|
|
11670
11451
|
const transformedMap = {};
|
|
11671
11452
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
11672
|
-
const isAbsolute =
|
|
11453
|
+
const isAbsolute = import_node_path28.default.isAbsolute(locationPath);
|
|
11673
11454
|
if (isAbsolute) {
|
|
11674
11455
|
transformedMap[locationPath] = value;
|
|
11675
11456
|
} else {
|
|
11676
11457
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
11677
11458
|
if (firstGlobIndex === -1) {
|
|
11678
|
-
const resolvedPath =
|
|
11459
|
+
const resolvedPath = import_node_path28.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
11679
11460
|
transformedMap[resolvedPath] = value;
|
|
11680
11461
|
} else {
|
|
11681
11462
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
11682
11463
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
11683
11464
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
11684
|
-
const resolvedPath = (
|
|
11465
|
+
const resolvedPath = (import_node_path28.default.resolve(templateDir, basePath) + patternPath).replace(
|
|
11685
11466
|
/\\/g,
|
|
11686
11467
|
"/"
|
|
11687
11468
|
);
|
|
@@ -11722,7 +11503,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
11722
11503
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
11723
11504
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
11724
11505
|
for (const subagent of subagents) {
|
|
11725
|
-
const lockFile =
|
|
11506
|
+
const lockFile = import_node_path29.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
11726
11507
|
if (!await pathExists(lockFile)) {
|
|
11727
11508
|
return subagent.absolutePath;
|
|
11728
11509
|
}
|
|
@@ -11732,26 +11513,26 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
11732
11513
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
11733
11514
|
let workspaceContent;
|
|
11734
11515
|
if (workspaceTemplate) {
|
|
11735
|
-
const workspaceSrc =
|
|
11516
|
+
const workspaceSrc = import_node_path29.default.resolve(workspaceTemplate);
|
|
11736
11517
|
if (!await pathExists(workspaceSrc)) {
|
|
11737
11518
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
11738
11519
|
}
|
|
11739
|
-
const stats = await (0,
|
|
11520
|
+
const stats = await (0, import_promises20.stat)(workspaceSrc);
|
|
11740
11521
|
if (!stats.isFile()) {
|
|
11741
11522
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
11742
11523
|
}
|
|
11743
|
-
const templateText = await (0,
|
|
11524
|
+
const templateText = await (0, import_promises20.readFile)(workspaceSrc, "utf8");
|
|
11744
11525
|
workspaceContent = JSON.parse(templateText);
|
|
11745
11526
|
} else {
|
|
11746
11527
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
11747
11528
|
}
|
|
11748
|
-
const workspaceName = `${
|
|
11749
|
-
const workspaceDst =
|
|
11750
|
-
const templateDir = workspaceTemplate ?
|
|
11529
|
+
const workspaceName = `${import_node_path29.default.basename(subagentDir)}.code-workspace`;
|
|
11530
|
+
const workspaceDst = import_node_path29.default.join(subagentDir, workspaceName);
|
|
11531
|
+
const templateDir = workspaceTemplate ? import_node_path29.default.dirname(import_node_path29.default.resolve(workspaceTemplate)) : subagentDir;
|
|
11751
11532
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
11752
11533
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
11753
11534
|
if (cwd) {
|
|
11754
|
-
const absCwd =
|
|
11535
|
+
const absCwd = import_node_path29.default.resolve(cwd);
|
|
11755
11536
|
const parsed = JSON.parse(transformedContent);
|
|
11756
11537
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
11757
11538
|
if (!alreadyPresent) {
|
|
@@ -11759,36 +11540,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
11759
11540
|
transformedContent = JSON.stringify(parsed, null, 2);
|
|
11760
11541
|
}
|
|
11761
11542
|
}
|
|
11762
|
-
await (0,
|
|
11763
|
-
const messagesDir =
|
|
11764
|
-
await (0,
|
|
11543
|
+
await (0, import_promises20.writeFile)(workspaceDst, transformedContent, "utf8");
|
|
11544
|
+
const messagesDir = import_node_path29.default.join(subagentDir, "messages");
|
|
11545
|
+
await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
|
|
11765
11546
|
return { workspace: workspaceDst, messagesDir };
|
|
11766
11547
|
}
|
|
11767
11548
|
async function createSubagentLock(subagentDir) {
|
|
11768
|
-
const messagesDir =
|
|
11549
|
+
const messagesDir = import_node_path29.default.join(subagentDir, "messages");
|
|
11769
11550
|
if (await pathExists(messagesDir)) {
|
|
11770
|
-
const files = await (0,
|
|
11551
|
+
const files = await (0, import_promises20.readdir)(messagesDir);
|
|
11771
11552
|
await Promise.all(
|
|
11772
11553
|
files.map(async (file) => {
|
|
11773
|
-
const target =
|
|
11554
|
+
const target = import_node_path29.default.join(messagesDir, file);
|
|
11774
11555
|
await removeIfExists(target);
|
|
11775
11556
|
})
|
|
11776
11557
|
);
|
|
11777
11558
|
}
|
|
11778
|
-
const githubAgentsDir =
|
|
11559
|
+
const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
|
|
11779
11560
|
if (await pathExists(githubAgentsDir)) {
|
|
11780
|
-
const agentFiles = await (0,
|
|
11561
|
+
const agentFiles = await (0, import_promises20.readdir)(githubAgentsDir);
|
|
11781
11562
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
11782
11563
|
await Promise.all(
|
|
11783
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
11564
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path29.default.join(githubAgentsDir, file)))
|
|
11784
11565
|
);
|
|
11785
11566
|
}
|
|
11786
|
-
const lockFile =
|
|
11787
|
-
await (0,
|
|
11567
|
+
const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
11568
|
+
await (0, import_promises20.writeFile)(lockFile, "", { encoding: "utf8" });
|
|
11788
11569
|
return lockFile;
|
|
11789
11570
|
}
|
|
11790
11571
|
async function removeSubagentLock(subagentDir) {
|
|
11791
|
-
const lockFile =
|
|
11572
|
+
const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
11792
11573
|
await removeIfExists(lockFile);
|
|
11793
11574
|
}
|
|
11794
11575
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -11808,11 +11589,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
11808
11589
|
return 1;
|
|
11809
11590
|
}
|
|
11810
11591
|
if (promptFile) {
|
|
11811
|
-
const githubAgentsDir =
|
|
11812
|
-
await (0,
|
|
11813
|
-
const agentFile =
|
|
11592
|
+
const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
|
|
11593
|
+
await (0, import_promises20.mkdir)(githubAgentsDir, { recursive: true });
|
|
11594
|
+
const agentFile = import_node_path29.default.join(githubAgentsDir, `${chatId}.md`);
|
|
11814
11595
|
try {
|
|
11815
|
-
await (0,
|
|
11596
|
+
await (0, import_promises20.copyFile)(promptFile, agentFile);
|
|
11816
11597
|
} catch (error) {
|
|
11817
11598
|
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
11818
11599
|
return 1;
|
|
@@ -11829,11 +11610,11 @@ async function resolvePromptFile(promptFile) {
|
|
|
11829
11610
|
if (!promptFile) {
|
|
11830
11611
|
return void 0;
|
|
11831
11612
|
}
|
|
11832
|
-
const resolvedPrompt =
|
|
11613
|
+
const resolvedPrompt = import_node_path30.default.resolve(promptFile);
|
|
11833
11614
|
if (!await pathExists(resolvedPrompt)) {
|
|
11834
11615
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
11835
11616
|
}
|
|
11836
|
-
const promptStats = await (0,
|
|
11617
|
+
const promptStats = await (0, import_promises21.stat)(resolvedPrompt);
|
|
11837
11618
|
if (!promptStats.isFile()) {
|
|
11838
11619
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
11839
11620
|
}
|
|
@@ -11845,7 +11626,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
11845
11626
|
}
|
|
11846
11627
|
const resolved = [];
|
|
11847
11628
|
for (const attachment of extraAttachments) {
|
|
11848
|
-
const resolvedPath =
|
|
11629
|
+
const resolvedPath = import_node_path30.default.resolve(attachment);
|
|
11849
11630
|
if (!await pathExists(resolvedPath)) {
|
|
11850
11631
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
11851
11632
|
}
|
|
@@ -11887,7 +11668,7 @@ async function dispatchAgentSession(options) {
|
|
|
11887
11668
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
11888
11669
|
};
|
|
11889
11670
|
}
|
|
11890
|
-
const subagentName =
|
|
11671
|
+
const subagentName = import_node_path30.default.basename(subagentDir);
|
|
11891
11672
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
11892
11673
|
const preparationResult = await prepareSubagentDirectory(
|
|
11893
11674
|
subagentDir,
|
|
@@ -11915,9 +11696,9 @@ async function dispatchAgentSession(options) {
|
|
|
11915
11696
|
};
|
|
11916
11697
|
}
|
|
11917
11698
|
const timestamp = generateTimestamp();
|
|
11918
|
-
const messagesDir =
|
|
11919
|
-
const responseFileTmp =
|
|
11920
|
-
const responseFileFinal =
|
|
11699
|
+
const messagesDir = import_node_path30.default.join(subagentDir, "messages");
|
|
11700
|
+
const responseFileTmp = import_node_path30.default.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
11701
|
+
const responseFileFinal = import_node_path30.default.join(messagesDir, `${timestamp}_res.md`);
|
|
11921
11702
|
const requestInstructions = createRequestPrompt(
|
|
11922
11703
|
userQuery,
|
|
11923
11704
|
responseFileTmp,
|
|
@@ -12022,7 +11803,7 @@ async function dispatchBatchAgent(options) {
|
|
|
12022
11803
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
12023
11804
|
};
|
|
12024
11805
|
}
|
|
12025
|
-
subagentName =
|
|
11806
|
+
subagentName = import_node_path30.default.basename(subagentDir);
|
|
12026
11807
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
12027
11808
|
const preparationResult = await prepareSubagentDirectory(
|
|
12028
11809
|
subagentDir,
|
|
@@ -12053,24 +11834,24 @@ async function dispatchBatchAgent(options) {
|
|
|
12053
11834
|
};
|
|
12054
11835
|
}
|
|
12055
11836
|
const timestamp = generateTimestamp();
|
|
12056
|
-
const messagesDir =
|
|
11837
|
+
const messagesDir = import_node_path30.default.join(subagentDir, "messages");
|
|
12057
11838
|
requestFiles = userQueries.map(
|
|
12058
|
-
(_, index) =>
|
|
11839
|
+
(_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
12059
11840
|
);
|
|
12060
11841
|
const responseTmpFiles = userQueries.map(
|
|
12061
|
-
(_, index) =>
|
|
11842
|
+
(_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
12062
11843
|
);
|
|
12063
11844
|
responseFilesFinal = userQueries.map(
|
|
12064
|
-
(_, index) =>
|
|
11845
|
+
(_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
12065
11846
|
);
|
|
12066
|
-
const orchestratorFile =
|
|
11847
|
+
const orchestratorFile = import_node_path30.default.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
12067
11848
|
if (!dryRun) {
|
|
12068
11849
|
await Promise.all(
|
|
12069
11850
|
userQueries.map((query, index) => {
|
|
12070
11851
|
const reqFile = requestFiles[index];
|
|
12071
11852
|
const tmpFile = responseTmpFiles[index];
|
|
12072
11853
|
const finalFile = responseFilesFinal[index];
|
|
12073
|
-
return (0,
|
|
11854
|
+
return (0, import_promises21.writeFile)(
|
|
12074
11855
|
reqFile,
|
|
12075
11856
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
12076
11857
|
{ encoding: "utf8" }
|
|
@@ -12082,7 +11863,7 @@ async function dispatchBatchAgent(options) {
|
|
|
12082
11863
|
responseFilesFinal,
|
|
12083
11864
|
orchestratorTemplateContent
|
|
12084
11865
|
);
|
|
12085
|
-
await (0,
|
|
11866
|
+
await (0, import_promises21.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
12086
11867
|
}
|
|
12087
11868
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
12088
11869
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -12148,8 +11929,8 @@ async function dispatchBatchAgent(options) {
|
|
|
12148
11929
|
}
|
|
12149
11930
|
|
|
12150
11931
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
12151
|
-
var
|
|
12152
|
-
var
|
|
11932
|
+
var import_promises22 = require("fs/promises");
|
|
11933
|
+
var import_node_path31 = __toESM(require("path"), 1);
|
|
12153
11934
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
12154
11935
|
folders: [
|
|
12155
11936
|
{
|
|
@@ -12180,7 +11961,7 @@ async function provisionSubagents(options) {
|
|
|
12180
11961
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
12181
11962
|
throw new Error("subagents must be a positive integer");
|
|
12182
11963
|
}
|
|
12183
|
-
const targetPath =
|
|
11964
|
+
const targetPath = import_node_path31.default.resolve(targetRoot);
|
|
12184
11965
|
if (!dryRun) {
|
|
12185
11966
|
await ensureDir(targetPath);
|
|
12186
11967
|
}
|
|
@@ -12200,7 +11981,7 @@ async function provisionSubagents(options) {
|
|
|
12200
11981
|
continue;
|
|
12201
11982
|
}
|
|
12202
11983
|
highestNumber = Math.max(highestNumber, parsed);
|
|
12203
|
-
const lockFile =
|
|
11984
|
+
const lockFile = import_node_path31.default.join(entry.absolutePath, lockName);
|
|
12204
11985
|
const locked = await pathExists(lockFile);
|
|
12205
11986
|
if (locked) {
|
|
12206
11987
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -12217,10 +11998,10 @@ async function provisionSubagents(options) {
|
|
|
12217
11998
|
break;
|
|
12218
11999
|
}
|
|
12219
12000
|
const subagentDir = subagent.absolutePath;
|
|
12220
|
-
const githubAgentsDir =
|
|
12221
|
-
const lockFile =
|
|
12222
|
-
const workspaceDst =
|
|
12223
|
-
const wakeupDst =
|
|
12001
|
+
const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
|
|
12002
|
+
const lockFile = import_node_path31.default.join(subagentDir, lockName);
|
|
12003
|
+
const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
12004
|
+
const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
|
|
12224
12005
|
const isLocked = await pathExists(lockFile);
|
|
12225
12006
|
if (isLocked && !force) {
|
|
12226
12007
|
continue;
|
|
@@ -12229,8 +12010,8 @@ async function provisionSubagents(options) {
|
|
|
12229
12010
|
if (!dryRun) {
|
|
12230
12011
|
await removeIfExists(lockFile);
|
|
12231
12012
|
await ensureDir(githubAgentsDir);
|
|
12232
|
-
await (0,
|
|
12233
|
-
await (0,
|
|
12013
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12014
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12234
12015
|
}
|
|
12235
12016
|
created.push(subagentDir);
|
|
12236
12017
|
lockedSubagents.delete(subagentDir);
|
|
@@ -12240,8 +12021,8 @@ async function provisionSubagents(options) {
|
|
|
12240
12021
|
if (!isLocked && force) {
|
|
12241
12022
|
if (!dryRun) {
|
|
12242
12023
|
await ensureDir(githubAgentsDir);
|
|
12243
|
-
await (0,
|
|
12244
|
-
await (0,
|
|
12024
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12025
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12245
12026
|
}
|
|
12246
12027
|
created.push(subagentDir);
|
|
12247
12028
|
subagentsProvisioned += 1;
|
|
@@ -12249,8 +12030,8 @@ async function provisionSubagents(options) {
|
|
|
12249
12030
|
}
|
|
12250
12031
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
12251
12032
|
await ensureDir(githubAgentsDir);
|
|
12252
|
-
await (0,
|
|
12253
|
-
await (0,
|
|
12033
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12034
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12254
12035
|
}
|
|
12255
12036
|
skippedExisting.push(subagentDir);
|
|
12256
12037
|
subagentsProvisioned += 1;
|
|
@@ -12258,15 +12039,15 @@ async function provisionSubagents(options) {
|
|
|
12258
12039
|
let nextIndex = highestNumber;
|
|
12259
12040
|
while (subagentsProvisioned < subagents) {
|
|
12260
12041
|
nextIndex += 1;
|
|
12261
|
-
const subagentDir =
|
|
12262
|
-
const githubAgentsDir =
|
|
12263
|
-
const workspaceDst =
|
|
12264
|
-
const wakeupDst =
|
|
12042
|
+
const subagentDir = import_node_path31.default.join(targetPath, `subagent-${nextIndex}`);
|
|
12043
|
+
const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
|
|
12044
|
+
const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
|
|
12045
|
+
const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
|
|
12265
12046
|
if (!dryRun) {
|
|
12266
12047
|
await ensureDir(subagentDir);
|
|
12267
12048
|
await ensureDir(githubAgentsDir);
|
|
12268
|
-
await (0,
|
|
12269
|
-
await (0,
|
|
12049
|
+
await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12050
|
+
await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12270
12051
|
}
|
|
12271
12052
|
created.push(subagentDir);
|
|
12272
12053
|
subagentsProvisioned += 1;
|
|
@@ -12328,7 +12109,7 @@ var VSCodeProvider = class {
|
|
|
12328
12109
|
}
|
|
12329
12110
|
await this.ensureEnvironmentReady();
|
|
12330
12111
|
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
12331
|
-
const promptContent = buildPromptDocument2(request, inputFiles
|
|
12112
|
+
const promptContent = buildPromptDocument2(request, inputFiles);
|
|
12332
12113
|
const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
|
|
12333
12114
|
const startTime = Date.now();
|
|
12334
12115
|
const session = await dispatchAgentSession({
|
|
@@ -12382,7 +12163,7 @@ var VSCodeProvider = class {
|
|
|
12382
12163
|
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
12383
12164
|
);
|
|
12384
12165
|
const userQueries = normalizedRequests.map(
|
|
12385
|
-
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles
|
|
12166
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
|
|
12386
12167
|
);
|
|
12387
12168
|
const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
|
|
12388
12169
|
this.config.workspaceTemplate
|
|
@@ -12451,9 +12232,9 @@ var VSCodeProvider = class {
|
|
|
12451
12232
|
async function locateVSCodeExecutable(candidate) {
|
|
12452
12233
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
12453
12234
|
if (includesPathSeparator) {
|
|
12454
|
-
const resolved =
|
|
12235
|
+
const resolved = import_node_path32.default.isAbsolute(candidate) ? candidate : import_node_path32.default.resolve(candidate);
|
|
12455
12236
|
try {
|
|
12456
|
-
await (0,
|
|
12237
|
+
await (0, import_promises23.access)(resolved, import_promises23.constants.F_OK);
|
|
12457
12238
|
return resolved;
|
|
12458
12239
|
} catch {
|
|
12459
12240
|
throw new Error(
|
|
@@ -12466,7 +12247,7 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
12466
12247
|
const { stdout } = await execAsync3(`${locator} ${candidate}`);
|
|
12467
12248
|
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
12468
12249
|
if (lines.length > 0 && lines[0]) {
|
|
12469
|
-
await (0,
|
|
12250
|
+
await (0, import_promises23.access)(lines[0], import_promises23.constants.F_OK);
|
|
12470
12251
|
return lines[0];
|
|
12471
12252
|
}
|
|
12472
12253
|
} catch {
|
|
@@ -12480,41 +12261,35 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
12480
12261
|
return void 0;
|
|
12481
12262
|
}
|
|
12482
12263
|
try {
|
|
12483
|
-
const stats = await (0,
|
|
12264
|
+
const stats = await (0, import_promises23.stat)(import_node_path32.default.resolve(template));
|
|
12484
12265
|
return stats.isFile() ? template : void 0;
|
|
12485
12266
|
} catch {
|
|
12486
12267
|
return template;
|
|
12487
12268
|
}
|
|
12488
12269
|
}
|
|
12489
|
-
function buildPromptDocument2(request, attachments
|
|
12270
|
+
function buildPromptDocument2(request, attachments) {
|
|
12490
12271
|
const parts = [];
|
|
12491
12272
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
12492
12273
|
parts.push(request.systemPrompt.trim());
|
|
12493
12274
|
}
|
|
12494
|
-
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
12495
12275
|
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
12496
|
-
const
|
|
12497
|
-
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
12276
|
+
const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
|
|
12498
12277
|
if (prereadBlock.length > 0) {
|
|
12499
12278
|
parts.push("\n", prereadBlock);
|
|
12500
12279
|
}
|
|
12501
12280
|
parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
|
|
12502
12281
|
return parts.join("\n").trim();
|
|
12503
12282
|
}
|
|
12504
|
-
function buildMandatoryPrereadBlock2(
|
|
12505
|
-
if (
|
|
12283
|
+
function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
12284
|
+
if (attachmentFiles.length === 0) {
|
|
12506
12285
|
return "";
|
|
12507
12286
|
}
|
|
12508
12287
|
const buildList = (files) => files.map((absolutePath) => {
|
|
12509
|
-
const fileName =
|
|
12288
|
+
const fileName = import_node_path32.default.basename(absolutePath);
|
|
12510
12289
|
const fileUri = pathToFileUri3(absolutePath);
|
|
12511
12290
|
return `* [${fileName}](${fileUri})`;
|
|
12512
12291
|
});
|
|
12513
12292
|
const sections = [];
|
|
12514
|
-
if (guidelineFiles.length > 0) {
|
|
12515
|
-
sections.push(`Read all guideline files:
|
|
12516
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
12517
|
-
}
|
|
12518
12293
|
if (attachmentFiles.length > 0) {
|
|
12519
12294
|
sections.push(`Read all attachment files:
|
|
12520
12295
|
${buildList(attachmentFiles).join("\n")}.`);
|
|
@@ -12525,29 +12300,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
|
|
|
12525
12300
|
);
|
|
12526
12301
|
return sections.join("\n");
|
|
12527
12302
|
}
|
|
12528
|
-
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
12529
|
-
if (!attachments || attachments.length === 0) {
|
|
12530
|
-
return [];
|
|
12531
|
-
}
|
|
12532
|
-
const unique = /* @__PURE__ */ new Map();
|
|
12533
|
-
for (const attachment of attachments) {
|
|
12534
|
-
const absolutePath = import_node_path33.default.resolve(attachment);
|
|
12535
|
-
const normalized = absolutePath.split(import_node_path33.default.sep).join("/");
|
|
12536
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
12537
|
-
if (!unique.has(absolutePath)) {
|
|
12538
|
-
unique.set(absolutePath, absolutePath);
|
|
12539
|
-
}
|
|
12540
|
-
}
|
|
12541
|
-
}
|
|
12542
|
-
return Array.from(unique.values());
|
|
12543
|
-
}
|
|
12544
12303
|
function collectAttachmentFiles(attachments) {
|
|
12545
12304
|
if (!attachments || attachments.length === 0) {
|
|
12546
12305
|
return [];
|
|
12547
12306
|
}
|
|
12548
12307
|
const unique = /* @__PURE__ */ new Map();
|
|
12549
12308
|
for (const attachment of attachments) {
|
|
12550
|
-
const absolutePath =
|
|
12309
|
+
const absolutePath = import_node_path32.default.resolve(attachment);
|
|
12551
12310
|
if (!unique.has(absolutePath)) {
|
|
12552
12311
|
unique.set(absolutePath, absolutePath);
|
|
12553
12312
|
}
|
|
@@ -12555,7 +12314,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
12555
12314
|
return Array.from(unique.values());
|
|
12556
12315
|
}
|
|
12557
12316
|
function pathToFileUri3(filePath) {
|
|
12558
|
-
const absolutePath =
|
|
12317
|
+
const absolutePath = import_node_path32.default.isAbsolute(filePath) ? filePath : import_node_path32.default.resolve(filePath);
|
|
12559
12318
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
12560
12319
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
12561
12320
|
return `file:///${normalizedPath}`;
|
|
@@ -12568,7 +12327,7 @@ function normalizeAttachments(attachments) {
|
|
|
12568
12327
|
}
|
|
12569
12328
|
const deduped = /* @__PURE__ */ new Set();
|
|
12570
12329
|
for (const attachment of attachments) {
|
|
12571
|
-
deduped.add(
|
|
12330
|
+
deduped.add(import_node_path32.default.resolve(attachment));
|
|
12572
12331
|
}
|
|
12573
12332
|
return Array.from(deduped);
|
|
12574
12333
|
}
|
|
@@ -12577,7 +12336,7 @@ function mergeAttachments(all) {
|
|
|
12577
12336
|
for (const list of all) {
|
|
12578
12337
|
if (!list) continue;
|
|
12579
12338
|
for (const inputFile of list) {
|
|
12580
|
-
deduped.add(
|
|
12339
|
+
deduped.add(import_node_path32.default.resolve(inputFile));
|
|
12581
12340
|
}
|
|
12582
12341
|
}
|
|
12583
12342
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -12625,8 +12384,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
12625
12384
|
|
|
12626
12385
|
// src/evaluation/providers/targets-file.ts
|
|
12627
12386
|
var import_node_fs10 = require("fs");
|
|
12628
|
-
var
|
|
12629
|
-
var
|
|
12387
|
+
var import_promises24 = require("fs/promises");
|
|
12388
|
+
var import_node_path33 = __toESM(require("path"), 1);
|
|
12630
12389
|
var import_yaml6 = require("yaml");
|
|
12631
12390
|
function isRecord(value) {
|
|
12632
12391
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -12656,18 +12415,18 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
12656
12415
|
}
|
|
12657
12416
|
async function fileExists3(filePath) {
|
|
12658
12417
|
try {
|
|
12659
|
-
await (0,
|
|
12418
|
+
await (0, import_promises24.access)(filePath, import_node_fs10.constants.F_OK);
|
|
12660
12419
|
return true;
|
|
12661
12420
|
} catch {
|
|
12662
12421
|
return false;
|
|
12663
12422
|
}
|
|
12664
12423
|
}
|
|
12665
12424
|
async function readTargetDefinitions(filePath) {
|
|
12666
|
-
const absolutePath =
|
|
12425
|
+
const absolutePath = import_node_path33.default.resolve(filePath);
|
|
12667
12426
|
if (!await fileExists3(absolutePath)) {
|
|
12668
12427
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
12669
12428
|
}
|
|
12670
|
-
const raw = await (0,
|
|
12429
|
+
const raw = await (0, import_promises24.readFile)(absolutePath, "utf8");
|
|
12671
12430
|
const parsed = (0, import_yaml6.parse)(raw);
|
|
12672
12431
|
if (!isRecord(parsed)) {
|
|
12673
12432
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -12683,16 +12442,16 @@ function listTargetNames(definitions) {
|
|
|
12683
12442
|
}
|
|
12684
12443
|
|
|
12685
12444
|
// src/evaluation/providers/provider-discovery.ts
|
|
12686
|
-
var
|
|
12445
|
+
var import_node_path34 = __toESM(require("path"), 1);
|
|
12687
12446
|
var import_fast_glob2 = __toESM(require("fast-glob"), 1);
|
|
12688
12447
|
async function discoverProviders(registry, baseDir) {
|
|
12689
12448
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
12690
12449
|
const candidateDirs = [];
|
|
12691
|
-
let dir =
|
|
12692
|
-
const root =
|
|
12450
|
+
let dir = import_node_path34.default.resolve(baseDir);
|
|
12451
|
+
const root = import_node_path34.default.parse(dir).root;
|
|
12693
12452
|
while (dir !== root) {
|
|
12694
|
-
candidateDirs.push(
|
|
12695
|
-
dir =
|
|
12453
|
+
candidateDirs.push(import_node_path34.default.join(dir, ".agentv", "providers"));
|
|
12454
|
+
dir = import_node_path34.default.dirname(dir);
|
|
12696
12455
|
}
|
|
12697
12456
|
let files = [];
|
|
12698
12457
|
for (const providersDir of candidateDirs) {
|
|
@@ -12708,7 +12467,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
12708
12467
|
}
|
|
12709
12468
|
const discoveredKinds = [];
|
|
12710
12469
|
for (const filePath of files) {
|
|
12711
|
-
const basename =
|
|
12470
|
+
const basename = import_node_path34.default.basename(filePath);
|
|
12712
12471
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
12713
12472
|
if (registry.has(kindName)) {
|
|
12714
12473
|
continue;
|
|
@@ -12815,9 +12574,9 @@ function negateScore(score) {
|
|
|
12815
12574
|
}
|
|
12816
12575
|
|
|
12817
12576
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
12818
|
-
var
|
|
12577
|
+
var import_promises25 = require("fs/promises");
|
|
12819
12578
|
var import_node_os5 = require("os");
|
|
12820
|
-
var
|
|
12579
|
+
var import_node_path35 = require("path");
|
|
12821
12580
|
|
|
12822
12581
|
// src/runtime/exec.ts
|
|
12823
12582
|
function shellEscapePath(value) {
|
|
@@ -12917,15 +12676,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
12917
12676
|
});
|
|
12918
12677
|
}
|
|
12919
12678
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
12920
|
-
const { mkdir: mkdir17, readFile:
|
|
12679
|
+
const { mkdir: mkdir17, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
12921
12680
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
12922
|
-
const
|
|
12681
|
+
const path47 = await import("path");
|
|
12923
12682
|
const { randomUUID: randomUUID9 } = await import("crypto");
|
|
12924
|
-
const dir =
|
|
12683
|
+
const dir = path47.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
12925
12684
|
await mkdir17(dir, { recursive: true });
|
|
12926
|
-
const stdinPath =
|
|
12927
|
-
const stdoutPath =
|
|
12928
|
-
const stderrPath =
|
|
12685
|
+
const stdinPath = path47.join(dir, "stdin.txt");
|
|
12686
|
+
const stdoutPath = path47.join(dir, "stdout.txt");
|
|
12687
|
+
const stderrPath = path47.join(dir, "stderr.txt");
|
|
12929
12688
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
12930
12689
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
12931
12690
|
const { spawn: spawn5 } = await import("child_process");
|
|
@@ -12955,8 +12714,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
12955
12714
|
resolve(code ?? 0);
|
|
12956
12715
|
});
|
|
12957
12716
|
});
|
|
12958
|
-
const stdout = (await
|
|
12959
|
-
const stderr = (await
|
|
12717
|
+
const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
12718
|
+
const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
12960
12719
|
return { stdout, stderr, exitCode };
|
|
12961
12720
|
} finally {
|
|
12962
12721
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -13261,9 +13020,9 @@ var CodeEvaluator = class {
|
|
|
13261
13020
|
if (outputForPayload) {
|
|
13262
13021
|
const serialized = JSON.stringify(outputForPayload);
|
|
13263
13022
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
13264
|
-
const tmpDir = await (0,
|
|
13265
|
-
outputPath = (0,
|
|
13266
|
-
await (0,
|
|
13023
|
+
const tmpDir = await (0, import_promises25.mkdtemp)((0, import_node_path35.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
|
|
13024
|
+
outputPath = (0, import_node_path35.join)(tmpDir, "output.json");
|
|
13025
|
+
await (0, import_promises25.writeFile)(outputPath, serialized);
|
|
13267
13026
|
outputForPayload = null;
|
|
13268
13027
|
}
|
|
13269
13028
|
}
|
|
@@ -13273,10 +13032,7 @@ var CodeEvaluator = class {
|
|
|
13273
13032
|
outputText: context2.candidate,
|
|
13274
13033
|
output: outputForPayload,
|
|
13275
13034
|
outputPath,
|
|
13276
|
-
|
|
13277
|
-
inputFiles: context2.evalCase.file_paths.filter(
|
|
13278
|
-
(path48) => !context2.evalCase.guideline_paths.includes(path48)
|
|
13279
|
-
),
|
|
13035
|
+
inputFiles: context2.evalCase.file_paths,
|
|
13280
13036
|
input: context2.evalCase.input,
|
|
13281
13037
|
trace: context2.trace ?? null,
|
|
13282
13038
|
tokenUsage: context2.tokenUsage ?? null,
|
|
@@ -13375,7 +13131,7 @@ var CodeEvaluator = class {
|
|
|
13375
13131
|
await proxyShutdown();
|
|
13376
13132
|
}
|
|
13377
13133
|
if (outputPath) {
|
|
13378
|
-
await (0,
|
|
13134
|
+
await (0, import_promises25.rm)((0, import_node_path35.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
|
|
13379
13135
|
});
|
|
13380
13136
|
}
|
|
13381
13137
|
}
|
|
@@ -13438,8 +13194,8 @@ function isAgentProvider(provider) {
|
|
|
13438
13194
|
}
|
|
13439
13195
|
|
|
13440
13196
|
// src/evaluation/evaluators/llm-grader.ts
|
|
13441
|
-
var
|
|
13442
|
-
var
|
|
13197
|
+
var import_promises26 = __toESM(require("fs/promises"), 1);
|
|
13198
|
+
var import_node_path36 = __toESM(require("path"), 1);
|
|
13443
13199
|
var import_ai2 = require("ai");
|
|
13444
13200
|
var import_zod4 = require("zod");
|
|
13445
13201
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -14271,8 +14027,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
14271
14027
|
};
|
|
14272
14028
|
}
|
|
14273
14029
|
function resolveSandboxed(basePath, relativePath) {
|
|
14274
|
-
const resolved =
|
|
14275
|
-
if (!resolved.startsWith(basePath +
|
|
14030
|
+
const resolved = import_node_path36.default.resolve(basePath, relativePath);
|
|
14031
|
+
if (!resolved.startsWith(basePath + import_node_path36.default.sep) && resolved !== basePath) {
|
|
14276
14032
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
14277
14033
|
}
|
|
14278
14034
|
return resolved;
|
|
@@ -14287,7 +14043,7 @@ function createFilesystemTools(workspacePath) {
|
|
|
14287
14043
|
execute: async (input) => {
|
|
14288
14044
|
try {
|
|
14289
14045
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
14290
|
-
const entries = await
|
|
14046
|
+
const entries = await import_promises26.default.readdir(resolved, { withFileTypes: true });
|
|
14291
14047
|
return entries.map((e) => ({
|
|
14292
14048
|
name: e.name,
|
|
14293
14049
|
type: e.isDirectory() ? "directory" : "file"
|
|
@@ -14305,12 +14061,12 @@ function createFilesystemTools(workspacePath) {
|
|
|
14305
14061
|
execute: async (input) => {
|
|
14306
14062
|
try {
|
|
14307
14063
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
14308
|
-
const stat8 = await
|
|
14064
|
+
const stat8 = await import_promises26.default.stat(resolved);
|
|
14309
14065
|
if (stat8.isDirectory()) {
|
|
14310
14066
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
14311
14067
|
}
|
|
14312
14068
|
const buffer = Buffer.alloc(Math.min(stat8.size, MAX_FILE_SIZE));
|
|
14313
|
-
const fd = await
|
|
14069
|
+
const fd = await import_promises26.default.open(resolved, "r");
|
|
14314
14070
|
try {
|
|
14315
14071
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
14316
14072
|
} finally {
|
|
@@ -14355,30 +14111,30 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
14355
14111
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14356
14112
|
let entries;
|
|
14357
14113
|
try {
|
|
14358
|
-
entries = await
|
|
14114
|
+
entries = await import_promises26.default.readdir(dirPath, { withFileTypes: true });
|
|
14359
14115
|
} catch {
|
|
14360
14116
|
return;
|
|
14361
14117
|
}
|
|
14362
14118
|
for (const entry of entries) {
|
|
14363
14119
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14364
14120
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
14365
|
-
const fullPath =
|
|
14121
|
+
const fullPath = import_node_path36.default.join(dirPath, entry.name);
|
|
14366
14122
|
if (entry.isDirectory()) {
|
|
14367
14123
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
14368
14124
|
} else if (entry.isFile()) {
|
|
14369
|
-
const ext =
|
|
14125
|
+
const ext = import_node_path36.default.extname(entry.name).toLowerCase();
|
|
14370
14126
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
14371
14127
|
try {
|
|
14372
|
-
const stat8 = await
|
|
14128
|
+
const stat8 = await import_promises26.default.stat(fullPath);
|
|
14373
14129
|
if (stat8.size > MAX_FILE_SIZE) continue;
|
|
14374
|
-
const content = await
|
|
14130
|
+
const content = await import_promises26.default.readFile(fullPath, "utf-8");
|
|
14375
14131
|
const lines = content.split("\n");
|
|
14376
14132
|
for (let i = 0; i < lines.length; i++) {
|
|
14377
14133
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14378
14134
|
regex.lastIndex = 0;
|
|
14379
14135
|
if (regex.test(lines[i])) {
|
|
14380
14136
|
matches.push({
|
|
14381
|
-
file:
|
|
14137
|
+
file: import_node_path36.default.relative(workspacePath, fullPath),
|
|
14382
14138
|
line: i + 1,
|
|
14383
14139
|
text: lines[i].substring(0, 200)
|
|
14384
14140
|
});
|
|
@@ -15013,115 +14769,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
15013
14769
|
* Evaluate a single field against the expected value.
|
|
15014
14770
|
*/
|
|
15015
14771
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
15016
|
-
const { path:
|
|
15017
|
-
const candidateValue = resolvePath(candidateData,
|
|
15018
|
-
const expectedValue = resolvePath(expectedData,
|
|
14772
|
+
const { path: path47, match, required = true, weight = 1 } = fieldConfig;
|
|
14773
|
+
const candidateValue = resolvePath(candidateData, path47);
|
|
14774
|
+
const expectedValue = resolvePath(expectedData, path47);
|
|
15019
14775
|
if (expectedValue === void 0) {
|
|
15020
14776
|
return {
|
|
15021
|
-
path:
|
|
14777
|
+
path: path47,
|
|
15022
14778
|
score: 1,
|
|
15023
14779
|
// No expected value means no comparison needed
|
|
15024
14780
|
weight,
|
|
15025
14781
|
hit: true,
|
|
15026
|
-
message: `${
|
|
14782
|
+
message: `${path47}: no expected value`
|
|
15027
14783
|
};
|
|
15028
14784
|
}
|
|
15029
14785
|
if (candidateValue === void 0) {
|
|
15030
14786
|
if (required) {
|
|
15031
14787
|
return {
|
|
15032
|
-
path:
|
|
14788
|
+
path: path47,
|
|
15033
14789
|
score: 0,
|
|
15034
14790
|
weight,
|
|
15035
14791
|
hit: false,
|
|
15036
|
-
message: `${
|
|
14792
|
+
message: `${path47} (required, missing)`
|
|
15037
14793
|
};
|
|
15038
14794
|
}
|
|
15039
14795
|
return {
|
|
15040
|
-
path:
|
|
14796
|
+
path: path47,
|
|
15041
14797
|
score: 1,
|
|
15042
14798
|
// Don't penalize missing optional fields
|
|
15043
14799
|
weight: 0,
|
|
15044
14800
|
// Zero weight means it won't affect the score
|
|
15045
14801
|
hit: true,
|
|
15046
|
-
message: `${
|
|
14802
|
+
message: `${path47}: optional field missing`
|
|
15047
14803
|
};
|
|
15048
14804
|
}
|
|
15049
14805
|
switch (match) {
|
|
15050
14806
|
case "exact":
|
|
15051
|
-
return this.compareExact(
|
|
14807
|
+
return this.compareExact(path47, candidateValue, expectedValue, weight);
|
|
15052
14808
|
case "numeric_tolerance":
|
|
15053
14809
|
return this.compareNumericTolerance(
|
|
15054
|
-
|
|
14810
|
+
path47,
|
|
15055
14811
|
candidateValue,
|
|
15056
14812
|
expectedValue,
|
|
15057
14813
|
fieldConfig,
|
|
15058
14814
|
weight
|
|
15059
14815
|
);
|
|
15060
14816
|
case "date":
|
|
15061
|
-
return this.compareDate(
|
|
14817
|
+
return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
|
|
15062
14818
|
default:
|
|
15063
14819
|
return {
|
|
15064
|
-
path:
|
|
14820
|
+
path: path47,
|
|
15065
14821
|
score: 0,
|
|
15066
14822
|
weight,
|
|
15067
14823
|
hit: false,
|
|
15068
|
-
message: `${
|
|
14824
|
+
message: `${path47}: unknown match type "${match}"`
|
|
15069
14825
|
};
|
|
15070
14826
|
}
|
|
15071
14827
|
}
|
|
15072
14828
|
/**
|
|
15073
14829
|
* Exact equality comparison.
|
|
15074
14830
|
*/
|
|
15075
|
-
compareExact(
|
|
14831
|
+
compareExact(path47, candidateValue, expectedValue, weight) {
|
|
15076
14832
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
15077
14833
|
return {
|
|
15078
|
-
path:
|
|
14834
|
+
path: path47,
|
|
15079
14835
|
score: 1,
|
|
15080
14836
|
weight,
|
|
15081
14837
|
hit: true,
|
|
15082
|
-
message:
|
|
14838
|
+
message: path47
|
|
15083
14839
|
};
|
|
15084
14840
|
}
|
|
15085
14841
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
15086
14842
|
return {
|
|
15087
|
-
path:
|
|
14843
|
+
path: path47,
|
|
15088
14844
|
score: 0,
|
|
15089
14845
|
weight,
|
|
15090
14846
|
hit: false,
|
|
15091
|
-
message: `${
|
|
14847
|
+
message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
15092
14848
|
};
|
|
15093
14849
|
}
|
|
15094
14850
|
return {
|
|
15095
|
-
path:
|
|
14851
|
+
path: path47,
|
|
15096
14852
|
score: 0,
|
|
15097
14853
|
weight,
|
|
15098
14854
|
hit: false,
|
|
15099
|
-
message: `${
|
|
14855
|
+
message: `${path47} (value mismatch)`
|
|
15100
14856
|
};
|
|
15101
14857
|
}
|
|
15102
14858
|
/**
|
|
15103
14859
|
* Numeric comparison with absolute or relative tolerance.
|
|
15104
14860
|
*/
|
|
15105
|
-
compareNumericTolerance(
|
|
14861
|
+
compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15106
14862
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
15107
14863
|
const candidateNum = toNumber(candidateValue);
|
|
15108
14864
|
const expectedNum = toNumber(expectedValue);
|
|
15109
14865
|
if (candidateNum === null || expectedNum === null) {
|
|
15110
14866
|
return {
|
|
15111
|
-
path:
|
|
14867
|
+
path: path47,
|
|
15112
14868
|
score: 0,
|
|
15113
14869
|
weight,
|
|
15114
14870
|
hit: false,
|
|
15115
|
-
message: `${
|
|
14871
|
+
message: `${path47} (non-numeric value)`
|
|
15116
14872
|
};
|
|
15117
14873
|
}
|
|
15118
14874
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
15119
14875
|
return {
|
|
15120
|
-
path:
|
|
14876
|
+
path: path47,
|
|
15121
14877
|
score: 0,
|
|
15122
14878
|
weight,
|
|
15123
14879
|
hit: false,
|
|
15124
|
-
message: `${
|
|
14880
|
+
message: `${path47} (invalid numeric value)`
|
|
15125
14881
|
};
|
|
15126
14882
|
}
|
|
15127
14883
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -15134,61 +14890,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
15134
14890
|
}
|
|
15135
14891
|
if (withinTolerance) {
|
|
15136
14892
|
return {
|
|
15137
|
-
path:
|
|
14893
|
+
path: path47,
|
|
15138
14894
|
score: 1,
|
|
15139
14895
|
weight,
|
|
15140
14896
|
hit: true,
|
|
15141
|
-
message: `${
|
|
14897
|
+
message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
|
|
15142
14898
|
};
|
|
15143
14899
|
}
|
|
15144
14900
|
return {
|
|
15145
|
-
path:
|
|
14901
|
+
path: path47,
|
|
15146
14902
|
score: 0,
|
|
15147
14903
|
weight,
|
|
15148
14904
|
hit: false,
|
|
15149
|
-
message: `${
|
|
14905
|
+
message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
15150
14906
|
};
|
|
15151
14907
|
}
|
|
15152
14908
|
/**
|
|
15153
14909
|
* Date comparison with format normalization.
|
|
15154
14910
|
*/
|
|
15155
|
-
compareDate(
|
|
14911
|
+
compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15156
14912
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
15157
14913
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
15158
14914
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
15159
14915
|
if (candidateDate === null) {
|
|
15160
14916
|
return {
|
|
15161
|
-
path:
|
|
14917
|
+
path: path47,
|
|
15162
14918
|
score: 0,
|
|
15163
14919
|
weight,
|
|
15164
14920
|
hit: false,
|
|
15165
|
-
message: `${
|
|
14921
|
+
message: `${path47} (unparseable candidate date)`
|
|
15166
14922
|
};
|
|
15167
14923
|
}
|
|
15168
14924
|
if (expectedDate === null) {
|
|
15169
14925
|
return {
|
|
15170
|
-
path:
|
|
14926
|
+
path: path47,
|
|
15171
14927
|
score: 0,
|
|
15172
14928
|
weight,
|
|
15173
14929
|
hit: false,
|
|
15174
|
-
message: `${
|
|
14930
|
+
message: `${path47} (unparseable expected date)`
|
|
15175
14931
|
};
|
|
15176
14932
|
}
|
|
15177
14933
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
15178
14934
|
return {
|
|
15179
|
-
path:
|
|
14935
|
+
path: path47,
|
|
15180
14936
|
score: 1,
|
|
15181
14937
|
weight,
|
|
15182
14938
|
hit: true,
|
|
15183
|
-
message:
|
|
14939
|
+
message: path47
|
|
15184
14940
|
};
|
|
15185
14941
|
}
|
|
15186
14942
|
return {
|
|
15187
|
-
path:
|
|
14943
|
+
path: path47,
|
|
15188
14944
|
score: 0,
|
|
15189
14945
|
weight,
|
|
15190
14946
|
hit: false,
|
|
15191
|
-
message: `${
|
|
14947
|
+
message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
15192
14948
|
};
|
|
15193
14949
|
}
|
|
15194
14950
|
/**
|
|
@@ -15221,11 +14977,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
15221
14977
|
};
|
|
15222
14978
|
}
|
|
15223
14979
|
};
|
|
15224
|
-
function resolvePath(obj,
|
|
15225
|
-
if (!
|
|
14980
|
+
function resolvePath(obj, path47) {
|
|
14981
|
+
if (!path47 || !obj) {
|
|
15226
14982
|
return void 0;
|
|
15227
14983
|
}
|
|
15228
|
-
const parts =
|
|
14984
|
+
const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
15229
14985
|
let current = obj;
|
|
15230
14986
|
for (const part of parts) {
|
|
15231
14987
|
if (current === null || current === void 0) {
|
|
@@ -15685,8 +15441,8 @@ var TokenUsageEvaluator = class {
|
|
|
15685
15441
|
};
|
|
15686
15442
|
|
|
15687
15443
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
15688
|
-
function getNestedValue(obj,
|
|
15689
|
-
const parts =
|
|
15444
|
+
function getNestedValue(obj, path47) {
|
|
15445
|
+
const parts = path47.split(".");
|
|
15690
15446
|
let current = obj;
|
|
15691
15447
|
for (const part of parts) {
|
|
15692
15448
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -16307,9 +16063,9 @@ function runEqualsAssertion(output, value) {
|
|
|
16307
16063
|
|
|
16308
16064
|
// src/evaluation/orchestrator.ts
|
|
16309
16065
|
var import_node_crypto10 = require("crypto");
|
|
16310
|
-
var
|
|
16311
|
-
var
|
|
16312
|
-
var
|
|
16066
|
+
var import_promises30 = require("fs/promises");
|
|
16067
|
+
var import_node_path45 = __toESM(require("path"), 1);
|
|
16068
|
+
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
16313
16069
|
|
|
16314
16070
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
16315
16071
|
var Node = class {
|
|
@@ -16522,7 +16278,7 @@ var InlineAssertEvaluator = class {
|
|
|
16522
16278
|
};
|
|
16523
16279
|
|
|
16524
16280
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
16525
|
-
var
|
|
16281
|
+
var import_node_path37 = __toESM(require("path"), 1);
|
|
16526
16282
|
async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
16527
16283
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
16528
16284
|
if (!context2) {
|
|
@@ -16557,10 +16313,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
16557
16313
|
expectedOutput: context2.evalCase.expected_output,
|
|
16558
16314
|
outputText: context2.candidate,
|
|
16559
16315
|
output: context2.output ?? null,
|
|
16560
|
-
|
|
16561
|
-
inputFiles: context2.evalCase.file_paths.filter(
|
|
16562
|
-
(p) => !context2.evalCase.guideline_paths.includes(p)
|
|
16563
|
-
),
|
|
16316
|
+
inputFiles: context2.evalCase.file_paths,
|
|
16564
16317
|
input: context2.evalCase.input,
|
|
16565
16318
|
trace: context2.trace ?? null,
|
|
16566
16319
|
fileChanges: context2.fileChanges ?? null,
|
|
@@ -16571,7 +16324,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
16571
16324
|
};
|
|
16572
16325
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
16573
16326
|
const scriptPath = script[script.length - 1];
|
|
16574
|
-
const cwd =
|
|
16327
|
+
const cwd = import_node_path37.default.dirname(scriptPath);
|
|
16575
16328
|
try {
|
|
16576
16329
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
16577
16330
|
const prompt = stdout.trim();
|
|
@@ -16843,16 +16596,16 @@ function createBuiltinRegistry() {
|
|
|
16843
16596
|
}
|
|
16844
16597
|
|
|
16845
16598
|
// src/evaluation/registry/assertion-discovery.ts
|
|
16846
|
-
var
|
|
16599
|
+
var import_node_path38 = __toESM(require("path"), 1);
|
|
16847
16600
|
var import_fast_glob3 = __toESM(require("fast-glob"), 1);
|
|
16848
16601
|
async function discoverAssertions(registry, baseDir) {
|
|
16849
16602
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
16850
16603
|
const candidateDirs = [];
|
|
16851
|
-
let dir =
|
|
16852
|
-
const root =
|
|
16604
|
+
let dir = import_node_path38.default.resolve(baseDir);
|
|
16605
|
+
const root = import_node_path38.default.parse(dir).root;
|
|
16853
16606
|
while (dir !== root) {
|
|
16854
|
-
candidateDirs.push(
|
|
16855
|
-
dir =
|
|
16607
|
+
candidateDirs.push(import_node_path38.default.join(dir, ".agentv", "assertions"));
|
|
16608
|
+
dir = import_node_path38.default.dirname(dir);
|
|
16856
16609
|
}
|
|
16857
16610
|
let files = [];
|
|
16858
16611
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -16868,7 +16621,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
16868
16621
|
}
|
|
16869
16622
|
const discoveredTypes = [];
|
|
16870
16623
|
for (const filePath of files) {
|
|
16871
|
-
const basename =
|
|
16624
|
+
const basename = import_node_path38.default.basename(filePath);
|
|
16872
16625
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
16873
16626
|
if (registry.has(typeName)) {
|
|
16874
16627
|
continue;
|
|
@@ -16886,17 +16639,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
16886
16639
|
}
|
|
16887
16640
|
|
|
16888
16641
|
// src/evaluation/registry/grader-discovery.ts
|
|
16889
|
-
var
|
|
16642
|
+
var import_node_path39 = __toESM(require("path"), 1);
|
|
16890
16643
|
var import_fast_glob4 = __toESM(require("fast-glob"), 1);
|
|
16891
16644
|
async function discoverGraders(registry, baseDir) {
|
|
16892
16645
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
16893
16646
|
const candidateDirs = [];
|
|
16894
|
-
let dir =
|
|
16895
|
-
const root =
|
|
16647
|
+
let dir = import_node_path39.default.resolve(baseDir);
|
|
16648
|
+
const root = import_node_path39.default.parse(dir).root;
|
|
16896
16649
|
while (dir !== root) {
|
|
16897
|
-
candidateDirs.push(
|
|
16898
|
-
candidateDirs.push(
|
|
16899
|
-
dir =
|
|
16650
|
+
candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "graders"));
|
|
16651
|
+
candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "judges"));
|
|
16652
|
+
dir = import_node_path39.default.dirname(dir);
|
|
16900
16653
|
}
|
|
16901
16654
|
let files = [];
|
|
16902
16655
|
for (const gradersDir of candidateDirs) {
|
|
@@ -16912,7 +16665,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
16912
16665
|
}
|
|
16913
16666
|
const discoveredTypes = [];
|
|
16914
16667
|
for (const filePath of files) {
|
|
16915
|
-
const basename =
|
|
16668
|
+
const basename = import_node_path39.default.basename(filePath);
|
|
16916
16669
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
16917
16670
|
if (registry.has(typeName)) {
|
|
16918
16671
|
continue;
|
|
@@ -17072,7 +16825,7 @@ function getTCritical(df) {
|
|
|
17072
16825
|
// src/evaluation/workspace/file-changes.ts
|
|
17073
16826
|
var import_node_child_process7 = require("child_process");
|
|
17074
16827
|
var import_node_fs11 = require("fs");
|
|
17075
|
-
var
|
|
16828
|
+
var import_node_path40 = __toESM(require("path"), 1);
|
|
17076
16829
|
var import_node_util4 = require("util");
|
|
17077
16830
|
var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process7.exec);
|
|
17078
16831
|
function gitExecOpts(workspacePath) {
|
|
@@ -17106,10 +16859,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
17106
16859
|
}
|
|
17107
16860
|
for (const entry of entries) {
|
|
17108
16861
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
17109
|
-
const childPath =
|
|
16862
|
+
const childPath = import_node_path40.default.join(workspacePath, entry);
|
|
17110
16863
|
try {
|
|
17111
16864
|
if (!(0, import_node_fs11.statSync)(childPath).isDirectory()) continue;
|
|
17112
|
-
if (!(0, import_node_fs11.statSync)(
|
|
16865
|
+
if (!(0, import_node_fs11.statSync)(import_node_path40.default.join(childPath, ".git")).isDirectory()) continue;
|
|
17113
16866
|
} catch {
|
|
17114
16867
|
continue;
|
|
17115
16868
|
}
|
|
@@ -17119,8 +16872,8 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
17119
16872
|
}
|
|
17120
16873
|
|
|
17121
16874
|
// src/evaluation/workspace/manager.ts
|
|
17122
|
-
var
|
|
17123
|
-
var
|
|
16875
|
+
var import_promises27 = require("fs/promises");
|
|
16876
|
+
var import_node_path41 = __toESM(require("path"), 1);
|
|
17124
16877
|
var TemplateNotFoundError = class extends Error {
|
|
17125
16878
|
constructor(templatePath) {
|
|
17126
16879
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -17142,7 +16895,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
17142
16895
|
};
|
|
17143
16896
|
async function isDirectory(filePath) {
|
|
17144
16897
|
try {
|
|
17145
|
-
const stats = await (0,
|
|
16898
|
+
const stats = await (0, import_promises27.stat)(filePath);
|
|
17146
16899
|
return stats.isDirectory();
|
|
17147
16900
|
} catch {
|
|
17148
16901
|
return false;
|
|
@@ -17150,26 +16903,26 @@ async function isDirectory(filePath) {
|
|
|
17150
16903
|
}
|
|
17151
16904
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
17152
16905
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
17153
|
-
return
|
|
16906
|
+
return import_node_path41.default.join(root, evalRunId, caseId);
|
|
17154
16907
|
}
|
|
17155
16908
|
async function copyDirectoryRecursive(src, dest) {
|
|
17156
|
-
await (0,
|
|
17157
|
-
const entries = await (0,
|
|
16909
|
+
await (0, import_promises27.mkdir)(dest, { recursive: true });
|
|
16910
|
+
const entries = await (0, import_promises27.readdir)(src, { withFileTypes: true });
|
|
17158
16911
|
for (const entry of entries) {
|
|
17159
|
-
const srcPath =
|
|
17160
|
-
const destPath =
|
|
16912
|
+
const srcPath = import_node_path41.default.join(src, entry.name);
|
|
16913
|
+
const destPath = import_node_path41.default.join(dest, entry.name);
|
|
17161
16914
|
if (entry.name === ".git") {
|
|
17162
16915
|
continue;
|
|
17163
16916
|
}
|
|
17164
16917
|
if (entry.isDirectory()) {
|
|
17165
16918
|
await copyDirectoryRecursive(srcPath, destPath);
|
|
17166
16919
|
} else {
|
|
17167
|
-
await (0,
|
|
16920
|
+
await (0, import_promises27.cp)(srcPath, destPath, { preserveTimestamps: true });
|
|
17168
16921
|
}
|
|
17169
16922
|
}
|
|
17170
16923
|
}
|
|
17171
16924
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
17172
|
-
const resolvedTemplatePath =
|
|
16925
|
+
const resolvedTemplatePath = import_node_path41.default.resolve(templatePath);
|
|
17173
16926
|
if (!await fileExists2(resolvedTemplatePath)) {
|
|
17174
16927
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
17175
16928
|
}
|
|
@@ -17179,7 +16932,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
17179
16932
|
const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
|
|
17180
16933
|
try {
|
|
17181
16934
|
if (await fileExists2(workspacePath)) {
|
|
17182
|
-
await (0,
|
|
16935
|
+
await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
|
|
17183
16936
|
}
|
|
17184
16937
|
await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
|
|
17185
16938
|
return workspacePath;
|
|
@@ -17213,14 +16966,14 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
17213
16966
|
}
|
|
17214
16967
|
async function cleanupWorkspace(workspacePath) {
|
|
17215
16968
|
if (await fileExists2(workspacePath)) {
|
|
17216
|
-
await (0,
|
|
16969
|
+
await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
|
|
17217
16970
|
}
|
|
17218
16971
|
}
|
|
17219
16972
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
17220
16973
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
17221
|
-
const evalDir =
|
|
16974
|
+
const evalDir = import_node_path41.default.join(root, evalRunId);
|
|
17222
16975
|
if (await fileExists2(evalDir)) {
|
|
17223
|
-
await (0,
|
|
16976
|
+
await (0, import_promises27.rm)(evalDir, { recursive: true, force: true });
|
|
17224
16977
|
}
|
|
17225
16978
|
}
|
|
17226
16979
|
|
|
@@ -17228,8 +16981,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
17228
16981
|
var import_node_child_process8 = require("child_process");
|
|
17229
16982
|
var import_node_crypto9 = require("crypto");
|
|
17230
16983
|
var import_node_fs12 = require("fs");
|
|
17231
|
-
var
|
|
17232
|
-
var
|
|
16984
|
+
var import_promises28 = require("fs/promises");
|
|
16985
|
+
var import_node_path42 = __toESM(require("path"), 1);
|
|
17233
16986
|
var import_node_util5 = require("util");
|
|
17234
16987
|
var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process8.execFile);
|
|
17235
16988
|
function gitEnv() {
|
|
@@ -17280,11 +17033,11 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
17280
17033
|
return (0, import_node_crypto9.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
17281
17034
|
}
|
|
17282
17035
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
17283
|
-
await (0,
|
|
17284
|
-
const entries = await (0,
|
|
17036
|
+
await (0, import_promises28.mkdir)(dest, { recursive: true });
|
|
17037
|
+
const entries = await (0, import_promises28.readdir)(src, { withFileTypes: true });
|
|
17285
17038
|
for (const entry of entries) {
|
|
17286
|
-
const srcPath =
|
|
17287
|
-
const destPath =
|
|
17039
|
+
const srcPath = import_node_path42.default.join(src, entry.name);
|
|
17040
|
+
const destPath = import_node_path42.default.join(dest, entry.name);
|
|
17288
17041
|
if (entry.name === ".git") {
|
|
17289
17042
|
continue;
|
|
17290
17043
|
}
|
|
@@ -17294,7 +17047,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
17294
17047
|
}
|
|
17295
17048
|
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
17296
17049
|
} else {
|
|
17297
|
-
await (0,
|
|
17050
|
+
await (0, import_promises28.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
17298
17051
|
}
|
|
17299
17052
|
}
|
|
17300
17053
|
}
|
|
@@ -17317,8 +17070,8 @@ var WorkspacePoolManager = class {
|
|
|
17317
17070
|
async acquireWorkspace(options) {
|
|
17318
17071
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
17319
17072
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
17320
|
-
const poolDir =
|
|
17321
|
-
await (0,
|
|
17073
|
+
const poolDir = import_node_path42.default.join(this.poolRoot, fingerprint);
|
|
17074
|
+
await (0, import_promises28.mkdir)(poolDir, { recursive: true });
|
|
17322
17075
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
17323
17076
|
if (drifted) {
|
|
17324
17077
|
console.warn(
|
|
@@ -17327,7 +17080,7 @@ var WorkspacePoolManager = class {
|
|
|
17327
17080
|
await this.removeAllSlots(poolDir);
|
|
17328
17081
|
}
|
|
17329
17082
|
for (let i = 0; i < maxSlots; i++) {
|
|
17330
|
-
const slotPath =
|
|
17083
|
+
const slotPath = import_node_path42.default.join(poolDir, `slot-${i}`);
|
|
17331
17084
|
const lockPath = `${slotPath}.lock`;
|
|
17332
17085
|
const locked = await this.tryLock(lockPath);
|
|
17333
17086
|
if (!locked) {
|
|
@@ -17345,7 +17098,7 @@ var WorkspacePoolManager = class {
|
|
|
17345
17098
|
poolDir
|
|
17346
17099
|
};
|
|
17347
17100
|
}
|
|
17348
|
-
await (0,
|
|
17101
|
+
await (0, import_promises28.mkdir)(slotPath, { recursive: true });
|
|
17349
17102
|
if (templatePath) {
|
|
17350
17103
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
17351
17104
|
}
|
|
@@ -17369,7 +17122,7 @@ var WorkspacePoolManager = class {
|
|
|
17369
17122
|
/** Remove lock file to release a slot. */
|
|
17370
17123
|
async releaseSlot(slot) {
|
|
17371
17124
|
try {
|
|
17372
|
-
await (0,
|
|
17125
|
+
await (0, import_promises28.unlink)(slot.lockPath);
|
|
17373
17126
|
} catch {
|
|
17374
17127
|
}
|
|
17375
17128
|
}
|
|
@@ -17382,21 +17135,21 @@ var WorkspacePoolManager = class {
|
|
|
17382
17135
|
async tryLock(lockPath) {
|
|
17383
17136
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
17384
17137
|
try {
|
|
17385
|
-
await (0,
|
|
17138
|
+
await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
17386
17139
|
return true;
|
|
17387
17140
|
} catch (err) {
|
|
17388
17141
|
if (err.code !== "EEXIST") {
|
|
17389
17142
|
throw err;
|
|
17390
17143
|
}
|
|
17391
17144
|
try {
|
|
17392
|
-
const pidStr = await (0,
|
|
17145
|
+
const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
|
|
17393
17146
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
17394
17147
|
if (!Number.isNaN(pid)) {
|
|
17395
17148
|
try {
|
|
17396
17149
|
process.kill(pid, 0);
|
|
17397
17150
|
return false;
|
|
17398
17151
|
} catch {
|
|
17399
|
-
await (0,
|
|
17152
|
+
await (0, import_promises28.unlink)(lockPath).catch(() => {
|
|
17400
17153
|
});
|
|
17401
17154
|
continue;
|
|
17402
17155
|
}
|
|
@@ -17414,9 +17167,9 @@ var WorkspacePoolManager = class {
|
|
|
17414
17167
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
17415
17168
|
*/
|
|
17416
17169
|
async checkDrift(poolDir, fingerprint) {
|
|
17417
|
-
const metadataPath =
|
|
17170
|
+
const metadataPath = import_node_path42.default.join(poolDir, "metadata.json");
|
|
17418
17171
|
try {
|
|
17419
|
-
const raw = await (0,
|
|
17172
|
+
const raw = await (0, import_promises28.readFile)(metadataPath, "utf-8");
|
|
17420
17173
|
const metadata = JSON.parse(raw);
|
|
17421
17174
|
return metadata.fingerprint !== fingerprint;
|
|
17422
17175
|
} catch {
|
|
@@ -17431,17 +17184,17 @@ var WorkspacePoolManager = class {
|
|
|
17431
17184
|
repos,
|
|
17432
17185
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
17433
17186
|
};
|
|
17434
|
-
await (0,
|
|
17187
|
+
await (0, import_promises28.writeFile)(import_node_path42.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
17435
17188
|
}
|
|
17436
17189
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
17437
17190
|
async removeAllSlots(poolDir) {
|
|
17438
|
-
const entries = await (0,
|
|
17191
|
+
const entries = await (0, import_promises28.readdir)(poolDir);
|
|
17439
17192
|
for (const entry of entries) {
|
|
17440
17193
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
17441
|
-
const lockPath =
|
|
17194
|
+
const lockPath = import_node_path42.default.join(poolDir, `${entry}.lock`);
|
|
17442
17195
|
if ((0, import_node_fs12.existsSync)(lockPath)) {
|
|
17443
17196
|
try {
|
|
17444
|
-
const pidStr = await (0,
|
|
17197
|
+
const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
|
|
17445
17198
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
17446
17199
|
if (!Number.isNaN(pid)) {
|
|
17447
17200
|
try {
|
|
@@ -17454,12 +17207,12 @@ var WorkspacePoolManager = class {
|
|
|
17454
17207
|
} catch {
|
|
17455
17208
|
}
|
|
17456
17209
|
}
|
|
17457
|
-
await (0,
|
|
17458
|
-
await (0,
|
|
17210
|
+
await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, entry), { recursive: true, force: true });
|
|
17211
|
+
await (0, import_promises28.rm)(lockPath, { force: true }).catch(() => {
|
|
17459
17212
|
});
|
|
17460
17213
|
}
|
|
17461
17214
|
}
|
|
17462
|
-
await (0,
|
|
17215
|
+
await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
17463
17216
|
});
|
|
17464
17217
|
}
|
|
17465
17218
|
/**
|
|
@@ -17469,7 +17222,7 @@ var WorkspacePoolManager = class {
|
|
|
17469
17222
|
*/
|
|
17470
17223
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
17471
17224
|
for (const repo of repos) {
|
|
17472
|
-
const repoDir =
|
|
17225
|
+
const repoDir = import_node_path42.default.join(slotPath, repo.path);
|
|
17473
17226
|
if (!(0, import_node_fs12.existsSync)(repoDir)) {
|
|
17474
17227
|
continue;
|
|
17475
17228
|
}
|
|
@@ -17496,7 +17249,7 @@ var WorkspacePoolManager = class {
|
|
|
17496
17249
|
// src/evaluation/workspace/repo-manager.ts
|
|
17497
17250
|
var import_node_child_process9 = require("child_process");
|
|
17498
17251
|
var import_node_fs13 = require("fs");
|
|
17499
|
-
var
|
|
17252
|
+
var import_node_path43 = __toESM(require("path"), 1);
|
|
17500
17253
|
var import_node_util6 = require("util");
|
|
17501
17254
|
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process9.execFile);
|
|
17502
17255
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -17596,7 +17349,7 @@ ${lines.join("\n")}`;
|
|
|
17596
17349
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
17597
17350
|
*/
|
|
17598
17351
|
async materialize(repo, workspacePath) {
|
|
17599
|
-
const targetDir =
|
|
17352
|
+
const targetDir = import_node_path43.default.join(workspacePath, repo.path);
|
|
17600
17353
|
const sourceUrl = getSourceUrl(repo.source);
|
|
17601
17354
|
const startedAt = Date.now();
|
|
17602
17355
|
if (this.verbose) {
|
|
@@ -17687,7 +17440,7 @@ ${lines.join("\n")}`;
|
|
|
17687
17440
|
async reset(repos, workspacePath, reset) {
|
|
17688
17441
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
17689
17442
|
for (const repo of repos) {
|
|
17690
|
-
const targetDir =
|
|
17443
|
+
const targetDir = import_node_path43.default.join(workspacePath, repo.path);
|
|
17691
17444
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
17692
17445
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
17693
17446
|
}
|
|
@@ -17695,36 +17448,36 @@ ${lines.join("\n")}`;
|
|
|
17695
17448
|
};
|
|
17696
17449
|
|
|
17697
17450
|
// src/evaluation/workspace/resolve.ts
|
|
17698
|
-
var
|
|
17699
|
-
var
|
|
17451
|
+
var import_promises29 = require("fs/promises");
|
|
17452
|
+
var import_node_path44 = __toESM(require("path"), 1);
|
|
17700
17453
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
17701
17454
|
if (!templatePath) {
|
|
17702
17455
|
return void 0;
|
|
17703
17456
|
}
|
|
17704
|
-
const resolved =
|
|
17705
|
-
const stats = await (0,
|
|
17457
|
+
const resolved = import_node_path44.default.resolve(templatePath);
|
|
17458
|
+
const stats = await (0, import_promises29.stat)(resolved);
|
|
17706
17459
|
if (stats.isFile()) {
|
|
17707
17460
|
return {
|
|
17708
|
-
dir:
|
|
17461
|
+
dir: import_node_path44.default.dirname(resolved),
|
|
17709
17462
|
workspaceFile: resolved
|
|
17710
17463
|
};
|
|
17711
17464
|
}
|
|
17712
17465
|
if (!stats.isDirectory()) {
|
|
17713
17466
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
17714
17467
|
}
|
|
17715
|
-
const entries = await (0,
|
|
17468
|
+
const entries = await (0, import_promises29.readdir)(resolved);
|
|
17716
17469
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
17717
17470
|
if (workspaceFiles.length === 1) {
|
|
17718
17471
|
return {
|
|
17719
17472
|
dir: resolved,
|
|
17720
|
-
workspaceFile:
|
|
17473
|
+
workspaceFile: import_node_path44.default.join(resolved, workspaceFiles[0])
|
|
17721
17474
|
};
|
|
17722
17475
|
}
|
|
17723
17476
|
if (workspaceFiles.length > 1) {
|
|
17724
17477
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
17725
17478
|
return {
|
|
17726
17479
|
dir: resolved,
|
|
17727
|
-
workspaceFile: conventionFile ?
|
|
17480
|
+
workspaceFile: conventionFile ? import_node_path44.default.join(resolved, conventionFile) : void 0
|
|
17728
17481
|
};
|
|
17729
17482
|
}
|
|
17730
17483
|
return { dir: resolved };
|
|
@@ -17928,7 +17681,7 @@ async function runEvaluation(options) {
|
|
|
17928
17681
|
];
|
|
17929
17682
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
17930
17683
|
const typeRegistry = createBuiltinRegistry();
|
|
17931
|
-
const discoveryBaseDir = evalFilePath ?
|
|
17684
|
+
const discoveryBaseDir = evalFilePath ? import_node_path45.default.dirname(import_node_path45.default.resolve(evalFilePath)) : process.cwd();
|
|
17932
17685
|
const evalDir = discoveryBaseDir;
|
|
17933
17686
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
17934
17687
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -18060,14 +17813,14 @@ async function runEvaluation(options) {
|
|
|
18060
17813
|
let staticMaterialised = false;
|
|
18061
17814
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
18062
17815
|
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
18063
|
-
const dirExists = await (0,
|
|
17816
|
+
const dirExists = await (0, import_promises30.stat)(configuredStaticPath).then(
|
|
18064
17817
|
(s) => s.isDirectory(),
|
|
18065
17818
|
() => false
|
|
18066
17819
|
);
|
|
18067
|
-
const isEmpty = dirExists ? (await (0,
|
|
17820
|
+
const isEmpty = dirExists ? (await (0, import_promises30.readdir)(configuredStaticPath)).length === 0 : false;
|
|
18068
17821
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
18069
17822
|
if (!dirExists) {
|
|
18070
|
-
await (0,
|
|
17823
|
+
await (0, import_promises30.mkdir)(configuredStaticPath, { recursive: true });
|
|
18071
17824
|
}
|
|
18072
17825
|
if (workspaceTemplate) {
|
|
18073
17826
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -18112,14 +17865,14 @@ async function runEvaluation(options) {
|
|
|
18112
17865
|
}
|
|
18113
17866
|
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
18114
17867
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
18115
|
-
await (0,
|
|
17868
|
+
await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
18116
17869
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
18117
17870
|
}
|
|
18118
17871
|
try {
|
|
18119
17872
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
18120
|
-
const copiedWorkspaceFile =
|
|
17873
|
+
const copiedWorkspaceFile = import_node_path45.default.join(sharedWorkspacePath, import_node_path45.default.basename(suiteWorkspaceFile));
|
|
18121
17874
|
try {
|
|
18122
|
-
await (0,
|
|
17875
|
+
await (0, import_promises30.stat)(copiedWorkspaceFile);
|
|
18123
17876
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
18124
17877
|
} catch {
|
|
18125
17878
|
}
|
|
@@ -18227,10 +17980,10 @@ async function runEvaluation(options) {
|
|
|
18227
17980
|
const budgetResult = {
|
|
18228
17981
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
18229
17982
|
testId: evalCase.id,
|
|
18230
|
-
|
|
17983
|
+
eval_set: evalCase.eval_set,
|
|
18231
17984
|
score: 0,
|
|
18232
17985
|
assertions: [],
|
|
18233
|
-
|
|
17986
|
+
output: [],
|
|
18234
17987
|
target: target.name,
|
|
18235
17988
|
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
18236
17989
|
budgetExceeded: true,
|
|
@@ -18263,10 +18016,10 @@ async function runEvaluation(options) {
|
|
|
18263
18016
|
const haltResult = {
|
|
18264
18017
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
18265
18018
|
testId: evalCase.id,
|
|
18266
|
-
|
|
18019
|
+
eval_set: evalCase.eval_set,
|
|
18267
18020
|
score: 0,
|
|
18268
18021
|
assertions: [],
|
|
18269
|
-
|
|
18022
|
+
output: [],
|
|
18270
18023
|
target: target.name,
|
|
18271
18024
|
error: errorMsg,
|
|
18272
18025
|
executionStatus: "execution_error",
|
|
@@ -18495,8 +18248,6 @@ async function runBatchEvaluation(options) {
|
|
|
18495
18248
|
const promptInputs = promptInputsList[index];
|
|
18496
18249
|
return {
|
|
18497
18250
|
question: promptInputs.question,
|
|
18498
|
-
guidelines: promptInputs.guidelines,
|
|
18499
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
18500
18251
|
inputFiles: evalCase.file_paths,
|
|
18501
18252
|
evalCaseId: evalCase.id,
|
|
18502
18253
|
metadata: {
|
|
@@ -18694,9 +18445,9 @@ async function runEvalCase(options) {
|
|
|
18694
18445
|
);
|
|
18695
18446
|
}
|
|
18696
18447
|
if (caseWorkspaceFile && workspacePath) {
|
|
18697
|
-
const copiedFile =
|
|
18448
|
+
const copiedFile = import_node_path45.default.join(workspacePath, import_node_path45.default.basename(caseWorkspaceFile));
|
|
18698
18449
|
try {
|
|
18699
|
-
await (0,
|
|
18450
|
+
await (0, import_promises30.stat)(copiedFile);
|
|
18700
18451
|
caseWorkspaceFile = copiedFile;
|
|
18701
18452
|
} catch {
|
|
18702
18453
|
}
|
|
@@ -18704,7 +18455,7 @@ async function runEvalCase(options) {
|
|
|
18704
18455
|
}
|
|
18705
18456
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
18706
18457
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
18707
|
-
await (0,
|
|
18458
|
+
await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
|
|
18708
18459
|
}
|
|
18709
18460
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
18710
18461
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -18754,11 +18505,11 @@ async function runEvalCase(options) {
|
|
|
18754
18505
|
const files = evalCase.metadata.agent_skills_files;
|
|
18755
18506
|
if (baseDir && files.length > 0) {
|
|
18756
18507
|
for (const relPath of files) {
|
|
18757
|
-
const srcPath =
|
|
18758
|
-
const destPath =
|
|
18508
|
+
const srcPath = import_node_path45.default.resolve(baseDir, relPath);
|
|
18509
|
+
const destPath = import_node_path45.default.resolve(workspacePath, relPath);
|
|
18759
18510
|
try {
|
|
18760
|
-
await (0,
|
|
18761
|
-
await (0,
|
|
18511
|
+
await (0, import_promises30.mkdir)(import_node_path45.default.dirname(destPath), { recursive: true });
|
|
18512
|
+
await (0, import_promises30.copyFile)(srcPath, destPath);
|
|
18762
18513
|
} catch (error) {
|
|
18763
18514
|
const message = error instanceof Error ? error.message : String(error);
|
|
18764
18515
|
return buildErrorResult(
|
|
@@ -19204,8 +18955,7 @@ async function evaluateCandidate(options) {
|
|
|
19204
18955
|
let lmRequest;
|
|
19205
18956
|
if (isAgentProvider(provider)) {
|
|
19206
18957
|
agentRequest = {
|
|
19207
|
-
question: promptInputs.question
|
|
19208
|
-
guideline_paths: evalCase.guideline_paths
|
|
18958
|
+
question: promptInputs.question
|
|
19209
18959
|
};
|
|
19210
18960
|
} else {
|
|
19211
18961
|
if (promptInputs.chatPrompt) {
|
|
@@ -19214,8 +18964,7 @@ async function evaluateCandidate(options) {
|
|
|
19214
18964
|
};
|
|
19215
18965
|
} else {
|
|
19216
18966
|
lmRequest = {
|
|
19217
|
-
question: promptInputs.question
|
|
19218
|
-
guidelines: promptInputs.guidelines
|
|
18967
|
+
question: promptInputs.question
|
|
19219
18968
|
};
|
|
19220
18969
|
}
|
|
19221
18970
|
}
|
|
@@ -19229,11 +18978,10 @@ async function evaluateCandidate(options) {
|
|
|
19229
18978
|
return {
|
|
19230
18979
|
timestamp: completedAt.toISOString(),
|
|
19231
18980
|
testId: evalCase.id,
|
|
19232
|
-
|
|
18981
|
+
eval_set: evalCase.eval_set,
|
|
19233
18982
|
conversationId: evalCase.conversation_id,
|
|
19234
18983
|
score: score.score,
|
|
19235
18984
|
assertions: score.assertions,
|
|
19236
|
-
outputText: candidate,
|
|
19237
18985
|
target: target.name,
|
|
19238
18986
|
tokenUsage,
|
|
19239
18987
|
costUsd,
|
|
@@ -19244,7 +18992,7 @@ async function evaluateCandidate(options) {
|
|
|
19244
18992
|
input,
|
|
19245
18993
|
scores,
|
|
19246
18994
|
trace: trace2,
|
|
19247
|
-
output,
|
|
18995
|
+
output: output ?? [{ role: "assistant", content: candidate }],
|
|
19248
18996
|
fileChanges,
|
|
19249
18997
|
executionStatus: classifyQualityStatus(score.score)
|
|
19250
18998
|
};
|
|
@@ -19378,7 +19126,7 @@ async function runEvaluatorList(options) {
|
|
|
19378
19126
|
fileChanges,
|
|
19379
19127
|
workspacePath
|
|
19380
19128
|
};
|
|
19381
|
-
const evalFileDir = evalCase.
|
|
19129
|
+
const evalFileDir = evalCase.file_paths[0] ? import_node_path45.default.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
19382
19130
|
const dispatchContext = {
|
|
19383
19131
|
graderProvider,
|
|
19384
19132
|
targetResolver,
|
|
@@ -19409,7 +19157,7 @@ async function runEvaluatorList(options) {
|
|
|
19409
19157
|
weight,
|
|
19410
19158
|
verdict: score2.verdict,
|
|
19411
19159
|
assertions: score2.assertions,
|
|
19412
|
-
|
|
19160
|
+
input: score2.evaluatorRawRequest,
|
|
19413
19161
|
details: score2.details,
|
|
19414
19162
|
scores: mapChildResults(score2.scores),
|
|
19415
19163
|
tokenUsage: score2.tokenUsage,
|
|
@@ -19492,7 +19240,7 @@ function filterEvalCases(evalCases, filter) {
|
|
|
19492
19240
|
if (!filter) {
|
|
19493
19241
|
return evalCases;
|
|
19494
19242
|
}
|
|
19495
|
-
return evalCases.filter((evalCase) =>
|
|
19243
|
+
return evalCases.filter((evalCase) => import_micromatch3.default.isMatch(evalCase.id, filter));
|
|
19496
19244
|
}
|
|
19497
19245
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
19498
19246
|
const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
|
|
@@ -19529,8 +19277,6 @@ async function invokeProvider(provider, options) {
|
|
|
19529
19277
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
19530
19278
|
return await provider.invoke({
|
|
19531
19279
|
question: promptInputs.question,
|
|
19532
|
-
guidelines: promptInputs.guidelines,
|
|
19533
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
19534
19280
|
chatPrompt: promptInputs.chatPrompt,
|
|
19535
19281
|
inputFiles: evalCase.file_paths,
|
|
19536
19282
|
evalCaseId: evalCase.id,
|
|
@@ -19558,21 +19304,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
19558
19304
|
if (isAgentProvider(provider)) {
|
|
19559
19305
|
agentRequest = {
|
|
19560
19306
|
question: promptInputs.question,
|
|
19561
|
-
guideline_paths: evalCase.guideline_paths,
|
|
19562
19307
|
error: message
|
|
19563
19308
|
};
|
|
19564
19309
|
} else {
|
|
19565
19310
|
if (promptInputs.chatPrompt) {
|
|
19566
19311
|
lmRequest = {
|
|
19567
19312
|
chat_prompt: promptInputs.chatPrompt,
|
|
19568
|
-
guideline_paths: evalCase.guideline_paths,
|
|
19569
19313
|
error: message
|
|
19570
19314
|
};
|
|
19571
19315
|
} else {
|
|
19572
19316
|
lmRequest = {
|
|
19573
19317
|
question: promptInputs.question,
|
|
19574
|
-
guidelines: promptInputs.guidelines,
|
|
19575
|
-
guideline_paths: evalCase.guideline_paths,
|
|
19576
19318
|
error: message
|
|
19577
19319
|
};
|
|
19578
19320
|
}
|
|
@@ -19585,11 +19327,11 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
19585
19327
|
return {
|
|
19586
19328
|
timestamp: timestamp.toISOString(),
|
|
19587
19329
|
testId: evalCase.id,
|
|
19588
|
-
|
|
19330
|
+
eval_set: evalCase.eval_set,
|
|
19589
19331
|
conversationId: evalCase.conversation_id,
|
|
19590
19332
|
score: 0,
|
|
19591
19333
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
19592
|
-
|
|
19334
|
+
output: [{ role: "assistant", content: `Error occurred: ${message}` }],
|
|
19593
19335
|
target: targetName,
|
|
19594
19336
|
requests,
|
|
19595
19337
|
input,
|
|
@@ -19618,7 +19360,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
|
19618
19360
|
hash.update(target.name);
|
|
19619
19361
|
hash.update(evalCase.id);
|
|
19620
19362
|
hash.update(promptInputs.question);
|
|
19621
|
-
hash.update(promptInputs.guidelines);
|
|
19622
19363
|
hash.update(promptInputs.systemMessage ?? "");
|
|
19623
19364
|
if (promptInputs.chatPrompt) {
|
|
19624
19365
|
hash.update(JSON.stringify(promptInputs.chatPrompt));
|
|
@@ -19633,7 +19374,7 @@ function buildResultInput(promptInputs) {
|
|
|
19633
19374
|
content: message.content
|
|
19634
19375
|
}));
|
|
19635
19376
|
}
|
|
19636
|
-
return promptInputs.question;
|
|
19377
|
+
return [{ role: "user", content: promptInputs.question }];
|
|
19637
19378
|
}
|
|
19638
19379
|
function aggregateEvaluatorTokenUsage(scores) {
|
|
19639
19380
|
if (!scores || scores.length === 0) return void 0;
|
|
@@ -19699,7 +19440,7 @@ function mapChildResults(children) {
|
|
|
19699
19440
|
weight: child.weight,
|
|
19700
19441
|
verdict: child.verdict,
|
|
19701
19442
|
assertions: child.assertions,
|
|
19702
|
-
|
|
19443
|
+
input: child.evaluatorRawRequest,
|
|
19703
19444
|
scores: mapChildResults(child.scores),
|
|
19704
19445
|
details: child.details,
|
|
19705
19446
|
tokenUsage: child.tokenUsage
|
|
@@ -19718,7 +19459,7 @@ function computeWeightedMean(entries) {
|
|
|
19718
19459
|
|
|
19719
19460
|
// src/evaluation/evaluate.ts
|
|
19720
19461
|
var import_node_fs14 = require("fs");
|
|
19721
|
-
var
|
|
19462
|
+
var import_node_path46 = __toESM(require("path"), 1);
|
|
19722
19463
|
|
|
19723
19464
|
// src/evaluation/providers/function-provider.ts
|
|
19724
19465
|
function createFunctionProvider(taskFn) {
|
|
@@ -19755,7 +19496,7 @@ async function evaluate(config) {
|
|
|
19755
19496
|
}
|
|
19756
19497
|
const gitRoot = await findGitRoot(process.cwd());
|
|
19757
19498
|
const repoRoot = gitRoot ?? process.cwd();
|
|
19758
|
-
const testFilePath = config.specFile ?
|
|
19499
|
+
const testFilePath = config.specFile ? import_node_path46.default.resolve(config.specFile) : import_node_path46.default.join(process.cwd(), "__programmatic__.yaml");
|
|
19759
19500
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
19760
19501
|
let resolvedTarget;
|
|
19761
19502
|
let taskProvider;
|
|
@@ -19821,8 +19562,6 @@ async function evaluate(config) {
|
|
|
19821
19562
|
input_segments: inputSegments,
|
|
19822
19563
|
expected_output: expectedOutput,
|
|
19823
19564
|
reference_answer: expectedOutputValue,
|
|
19824
|
-
guideline_paths: [],
|
|
19825
|
-
guideline_patterns: [],
|
|
19826
19565
|
file_paths: [],
|
|
19827
19566
|
assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
|
|
19828
19567
|
metadata: test.metadata
|
|
@@ -19884,10 +19623,10 @@ function computeSummary(results, durationMs) {
|
|
|
19884
19623
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
19885
19624
|
async function discoverDefaultTarget(repoRoot) {
|
|
19886
19625
|
const cwd = process.cwd();
|
|
19887
|
-
const chain = buildDirectoryChain2(
|
|
19626
|
+
const chain = buildDirectoryChain2(import_node_path46.default.join(cwd, "_placeholder"), repoRoot);
|
|
19888
19627
|
for (const dir of chain) {
|
|
19889
19628
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
19890
|
-
const targetsPath =
|
|
19629
|
+
const targetsPath = import_node_path46.default.join(dir, candidate);
|
|
19891
19630
|
if (!(0, import_node_fs14.existsSync)(targetsPath)) continue;
|
|
19892
19631
|
try {
|
|
19893
19632
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -19904,7 +19643,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
19904
19643
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
19905
19644
|
const envFiles = [];
|
|
19906
19645
|
for (const dir of chain) {
|
|
19907
|
-
const envPath =
|
|
19646
|
+
const envPath = import_node_path46.default.join(dir, ".env");
|
|
19908
19647
|
if ((0, import_node_fs14.existsSync)(envPath)) envFiles.push(envPath);
|
|
19909
19648
|
}
|
|
19910
19649
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -20085,8 +19824,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
20085
19824
|
}
|
|
20086
19825
|
|
|
20087
19826
|
// src/evaluation/cache/response-cache.ts
|
|
20088
|
-
var
|
|
20089
|
-
var
|
|
19827
|
+
var import_promises31 = require("fs/promises");
|
|
19828
|
+
var import_node_path47 = __toESM(require("path"), 1);
|
|
20090
19829
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
20091
19830
|
var ResponseCache = class {
|
|
20092
19831
|
cachePath;
|
|
@@ -20096,7 +19835,7 @@ var ResponseCache = class {
|
|
|
20096
19835
|
async get(key) {
|
|
20097
19836
|
const filePath = this.keyToPath(key);
|
|
20098
19837
|
try {
|
|
20099
|
-
const data = await (0,
|
|
19838
|
+
const data = await (0, import_promises31.readFile)(filePath, "utf8");
|
|
20100
19839
|
return JSON.parse(data);
|
|
20101
19840
|
} catch {
|
|
20102
19841
|
return void 0;
|
|
@@ -20104,13 +19843,13 @@ var ResponseCache = class {
|
|
|
20104
19843
|
}
|
|
20105
19844
|
async set(key, value) {
|
|
20106
19845
|
const filePath = this.keyToPath(key);
|
|
20107
|
-
const dir =
|
|
20108
|
-
await (0,
|
|
20109
|
-
await (0,
|
|
19846
|
+
const dir = import_node_path47.default.dirname(filePath);
|
|
19847
|
+
await (0, import_promises31.mkdir)(dir, { recursive: true });
|
|
19848
|
+
await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
20110
19849
|
}
|
|
20111
19850
|
keyToPath(key) {
|
|
20112
19851
|
const prefix = key.slice(0, 2);
|
|
20113
|
-
return
|
|
19852
|
+
return import_node_path47.default.join(this.cachePath, prefix, `${key}.json`);
|
|
20114
19853
|
}
|
|
20115
19854
|
};
|
|
20116
19855
|
function shouldEnableCache(params) {
|
|
@@ -20127,7 +19866,6 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
20127
19866
|
|
|
20128
19867
|
// src/evaluation/baseline.ts
|
|
20129
19868
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
20130
|
-
"outputText",
|
|
20131
19869
|
"requests",
|
|
20132
19870
|
"trace",
|
|
20133
19871
|
"workspacePath",
|
|
@@ -20144,7 +19882,7 @@ var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
|
20144
19882
|
"startTime",
|
|
20145
19883
|
"endTime"
|
|
20146
19884
|
]);
|
|
20147
|
-
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "
|
|
19885
|
+
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "input"]);
|
|
20148
19886
|
function trimEvaluatorResult(result) {
|
|
20149
19887
|
const trimmed = {};
|
|
20150
19888
|
for (const [key, value] of Object.entries(result)) {
|
|
@@ -20299,9 +20037,13 @@ var OtelTraceExporter = class {
|
|
|
20299
20037
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
20300
20038
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
20301
20039
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
20302
|
-
if (result.
|
|
20040
|
+
if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
|
|
20303
20041
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
20304
|
-
if (captureContent
|
|
20042
|
+
if (captureContent && result.output.length > 0) {
|
|
20043
|
+
const lastMsg = result.output[result.output.length - 1];
|
|
20044
|
+
const text = typeof lastMsg.content === "string" ? lastMsg.content : JSON.stringify(lastMsg.content);
|
|
20045
|
+
rootSpan.setAttribute("agentv.output_text", text);
|
|
20046
|
+
}
|
|
20305
20047
|
if (result.durationMs != null)
|
|
20306
20048
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
20307
20049
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|
|
@@ -20480,14 +20222,14 @@ var OtelStreamingObserver = class {
|
|
|
20480
20222
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
20481
20223
|
rootCtx = null;
|
|
20482
20224
|
/** Create root eval span immediately (visible in backend right away) */
|
|
20483
|
-
startEvalCase(testId, target,
|
|
20225
|
+
startEvalCase(testId, target, evalSet) {
|
|
20484
20226
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
20485
20227
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
20486
20228
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
20487
20229
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
20488
20230
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
20489
20231
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
20490
|
-
if (
|
|
20232
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
|
|
20491
20233
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
20492
20234
|
}
|
|
20493
20235
|
/** Create and immediately export a tool span */
|
|
@@ -20680,7 +20422,6 @@ function createAgentKernel() {
|
|
|
20680
20422
|
initializeBaseline,
|
|
20681
20423
|
isAgentSkillsFormat,
|
|
20682
20424
|
isEvaluatorKind,
|
|
20683
|
-
isGuidelineFile,
|
|
20684
20425
|
isJsonObject,
|
|
20685
20426
|
isJsonValue,
|
|
20686
20427
|
isNonEmptyString,
|