@agentv/core 3.8.0 → 3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-3ZS3GCMI.js → chunk-PC5TLJF6.js} +1 -2
- package/dist/chunk-PC5TLJF6.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -33
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -33
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +533 -772
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +16 -21
- package/dist/index.d.ts +16 -21
- package/dist/index.js +381 -619
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-3ZS3GCMI.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
readTextFile,
|
|
20
20
|
resolveFileReference,
|
|
21
21
|
resolveTargetDefinition
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-PC5TLJF6.js";
|
|
23
23
|
import {
|
|
24
24
|
AgentvProvider
|
|
25
25
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -149,9 +149,9 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
149
149
|
}
|
|
150
150
|
|
|
151
151
|
// src/evaluation/yaml-parser.ts
|
|
152
|
-
import { readFile as
|
|
153
|
-
import
|
|
154
|
-
import
|
|
152
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
153
|
+
import path7 from "node:path";
|
|
154
|
+
import micromatch2 from "micromatch";
|
|
155
155
|
import { parse as parse2 } from "yaml";
|
|
156
156
|
|
|
157
157
|
// src/evaluation/loaders/agent-skills-parser.ts
|
|
@@ -225,7 +225,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
225
225
|
input_segments: [{ type: "text", value: prompt }],
|
|
226
226
|
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
227
227
|
reference_answer: evalCase.expected_output,
|
|
228
|
-
guideline_paths: [],
|
|
229
228
|
file_paths: filePaths,
|
|
230
229
|
criteria: evalCase.expected_output ?? "",
|
|
231
230
|
assertions,
|
|
@@ -239,7 +238,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
239
238
|
// src/evaluation/loaders/config-loader.ts
|
|
240
239
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
241
240
|
import path3 from "node:path";
|
|
242
|
-
import micromatch from "micromatch";
|
|
243
241
|
import { parse } from "yaml";
|
|
244
242
|
|
|
245
243
|
// src/evaluation/loaders/file-resolver.ts
|
|
@@ -370,15 +368,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
370
368
|
logWarning(`Invalid required_version in ${configPath}, expected string`);
|
|
371
369
|
continue;
|
|
372
370
|
}
|
|
373
|
-
const guidelinePatterns = config.guideline_patterns;
|
|
374
|
-
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
375
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
376
|
-
continue;
|
|
377
|
-
}
|
|
378
|
-
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
379
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
380
|
-
continue;
|
|
381
|
-
}
|
|
382
371
|
const evalPatterns = config.eval_patterns;
|
|
383
372
|
if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
|
|
384
373
|
logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
|
|
@@ -394,7 +383,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
394
383
|
);
|
|
395
384
|
return {
|
|
396
385
|
required_version: requiredVersion,
|
|
397
|
-
guideline_patterns: guidelinePatterns,
|
|
398
386
|
eval_patterns: evalPatterns,
|
|
399
387
|
execution: executionDefaults
|
|
400
388
|
};
|
|
@@ -406,11 +394,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
406
394
|
}
|
|
407
395
|
return null;
|
|
408
396
|
}
|
|
409
|
-
function isGuidelineFile(filePath, patterns) {
|
|
410
|
-
const normalized = filePath.split("\\").join("/");
|
|
411
|
-
const patternsToUse = patterns ?? [];
|
|
412
|
-
return micromatch.isMatch(normalized, patternsToUse);
|
|
413
|
-
}
|
|
414
397
|
function extractTargetFromSuite(suite) {
|
|
415
398
|
const execution = suite.execution;
|
|
416
399
|
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
@@ -437,6 +420,17 @@ function extractTargetsFromSuite(suite) {
|
|
|
437
420
|
}
|
|
438
421
|
return void 0;
|
|
439
422
|
}
|
|
423
|
+
function extractWorkersFromSuite(suite) {
|
|
424
|
+
const execution = suite.execution;
|
|
425
|
+
if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
|
|
426
|
+
return void 0;
|
|
427
|
+
}
|
|
428
|
+
const workers = execution.workers;
|
|
429
|
+
if (typeof workers === "number" && Number.isInteger(workers) && workers >= 1 && workers <= 50) {
|
|
430
|
+
return workers;
|
|
431
|
+
}
|
|
432
|
+
return void 0;
|
|
433
|
+
}
|
|
440
434
|
function extractTargetsFromTestCase(testCase) {
|
|
441
435
|
const execution = testCase.execution;
|
|
442
436
|
if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
|
|
@@ -1986,7 +1980,7 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
1986
1980
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
1987
1981
|
import { readFile as readFile5 } from "node:fs/promises";
|
|
1988
1982
|
import path6 from "node:path";
|
|
1989
|
-
import
|
|
1983
|
+
import micromatch from "micromatch";
|
|
1990
1984
|
import { parse as parseYaml } from "yaml";
|
|
1991
1985
|
|
|
1992
1986
|
// src/evaluation/loaders/message-processor.ts
|
|
@@ -2013,10 +2007,6 @@ function formatSegment(segment, mode = "lm") {
|
|
|
2013
2007
|
if (type === "text") {
|
|
2014
2008
|
return asString2(segment.value);
|
|
2015
2009
|
}
|
|
2016
|
-
if (type === "guideline_ref") {
|
|
2017
|
-
const refPath = asString2(segment.path);
|
|
2018
|
-
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
2019
|
-
}
|
|
2020
2010
|
if (type === "file") {
|
|
2021
2011
|
const filePath = asString2(segment.path);
|
|
2022
2012
|
if (!filePath) {
|
|
@@ -2039,9 +2029,6 @@ function hasVisibleContent(segments) {
|
|
|
2039
2029
|
const value = asString2(segment.value);
|
|
2040
2030
|
return value !== void 0 && value.trim().length > 0;
|
|
2041
2031
|
}
|
|
2042
|
-
if (type === "guideline_ref") {
|
|
2043
|
-
return false;
|
|
2044
|
-
}
|
|
2045
2032
|
if (type === "file") {
|
|
2046
2033
|
const text = asString2(segment.text);
|
|
2047
2034
|
return text !== void 0 && text.trim().length > 0;
|
|
@@ -2057,17 +2044,7 @@ function asString2(value) {
|
|
|
2057
2044
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2058
2045
|
var ANSI_RESET5 = "\x1B[0m";
|
|
2059
2046
|
async function processMessages(options) {
|
|
2060
|
-
const {
|
|
2061
|
-
messages,
|
|
2062
|
-
searchRoots,
|
|
2063
|
-
repoRootPath,
|
|
2064
|
-
guidelinePatterns,
|
|
2065
|
-
guidelinePaths,
|
|
2066
|
-
treatFileSegmentsAsGuidelines,
|
|
2067
|
-
textParts,
|
|
2068
|
-
messageType,
|
|
2069
|
-
verbose
|
|
2070
|
-
} = options;
|
|
2047
|
+
const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
|
|
2071
2048
|
const segments = [];
|
|
2072
2049
|
for (const message of messages) {
|
|
2073
2050
|
const content = message.content;
|
|
@@ -2111,21 +2088,6 @@ async function processMessages(options) {
|
|
|
2111
2088
|
}
|
|
2112
2089
|
try {
|
|
2113
2090
|
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2114
|
-
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
2115
|
-
messageType,
|
|
2116
|
-
resolvedPath,
|
|
2117
|
-
repoRootPath,
|
|
2118
|
-
guidelinePatterns,
|
|
2119
|
-
treatFileSegmentsAsGuidelines
|
|
2120
|
-
});
|
|
2121
|
-
if (classifyAsGuideline && guidelinePaths) {
|
|
2122
|
-
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
2123
|
-
if (verbose) {
|
|
2124
|
-
console.log(` [Guideline] Found: ${displayPath}`);
|
|
2125
|
-
console.log(` Resolved to: ${resolvedPath}`);
|
|
2126
|
-
}
|
|
2127
|
-
continue;
|
|
2128
|
-
}
|
|
2129
2091
|
segments.push({
|
|
2130
2092
|
type: "file",
|
|
2131
2093
|
path: displayPath,
|
|
@@ -2153,26 +2115,6 @@ async function processMessages(options) {
|
|
|
2153
2115
|
}
|
|
2154
2116
|
return segments;
|
|
2155
2117
|
}
|
|
2156
|
-
function shouldTreatAsGuideline(options) {
|
|
2157
|
-
const {
|
|
2158
|
-
messageType,
|
|
2159
|
-
resolvedPath,
|
|
2160
|
-
repoRootPath,
|
|
2161
|
-
guidelinePatterns,
|
|
2162
|
-
treatFileSegmentsAsGuidelines
|
|
2163
|
-
} = options;
|
|
2164
|
-
if (messageType !== "input") {
|
|
2165
|
-
return false;
|
|
2166
|
-
}
|
|
2167
|
-
if (treatFileSegmentsAsGuidelines) {
|
|
2168
|
-
return true;
|
|
2169
|
-
}
|
|
2170
|
-
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2171
|
-
return false;
|
|
2172
|
-
}
|
|
2173
|
-
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
2174
|
-
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
2175
|
-
}
|
|
2176
2118
|
function asString3(value) {
|
|
2177
2119
|
return typeof value === "string" ? value : void 0;
|
|
2178
2120
|
}
|
|
@@ -2373,7 +2315,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
2373
2315
|
}
|
|
2374
2316
|
return {
|
|
2375
2317
|
description: asString4(parsed.description),
|
|
2376
|
-
|
|
2318
|
+
name: asString4(parsed.name),
|
|
2377
2319
|
execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
|
|
2378
2320
|
evaluator: parsed.evaluator
|
|
2379
2321
|
};
|
|
@@ -2409,20 +2351,18 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2409
2351
|
const absoluteTestPath = path6.resolve(evalFilePath);
|
|
2410
2352
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2411
2353
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2412
|
-
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
2413
|
-
const guidelinePatterns = config?.guideline_patterns;
|
|
2414
2354
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
2415
2355
|
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
2416
2356
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
2417
|
-
const
|
|
2418
|
-
const
|
|
2357
|
+
const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
2358
|
+
const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
|
|
2419
2359
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
2420
2360
|
const globalExecution = sidecar.execution;
|
|
2421
2361
|
if (verbose) {
|
|
2422
2362
|
console.log(`
|
|
2423
2363
|
[JSONL Dataset: ${evalFilePath}]`);
|
|
2424
2364
|
console.log(` Cases: ${rawCases.length}`);
|
|
2425
|
-
console.log(`
|
|
2365
|
+
console.log(` Eval set: ${evalSetName}`);
|
|
2426
2366
|
if (sidecar.description) {
|
|
2427
2367
|
console.log(` Description: ${sidecar.description}`);
|
|
2428
2368
|
}
|
|
@@ -2432,7 +2372,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2432
2372
|
const evalcase = rawCases[lineIndex];
|
|
2433
2373
|
const lineNumber = lineIndex + 1;
|
|
2434
2374
|
const id = asString4(evalcase.id);
|
|
2435
|
-
if (filterPattern && (!id || !
|
|
2375
|
+
if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
|
|
2436
2376
|
continue;
|
|
2437
2377
|
}
|
|
2438
2378
|
const conversationId = asString4(evalcase.conversation_id);
|
|
@@ -2455,14 +2395,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2455
2395
|
continue;
|
|
2456
2396
|
}
|
|
2457
2397
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
2458
|
-
const guidelinePaths = [];
|
|
2459
2398
|
const inputTextParts = [];
|
|
2460
2399
|
const inputSegments = await processMessages({
|
|
2461
2400
|
messages: inputMessages,
|
|
2462
2401
|
searchRoots,
|
|
2463
2402
|
repoRootPath,
|
|
2464
|
-
guidelinePatterns,
|
|
2465
|
-
guidelinePaths,
|
|
2466
2403
|
textParts: inputTextParts,
|
|
2467
2404
|
messageType: "input",
|
|
2468
2405
|
verbose
|
|
@@ -2512,40 +2449,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2512
2449
|
userFilePaths.push(segment.resolvedPath);
|
|
2513
2450
|
}
|
|
2514
2451
|
}
|
|
2515
|
-
const allFilePaths = [
|
|
2516
|
-
...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
|
|
2517
|
-
...userFilePaths
|
|
2518
|
-
];
|
|
2519
2452
|
const testCase = {
|
|
2520
2453
|
id,
|
|
2521
|
-
|
|
2454
|
+
eval_set: evalSetName,
|
|
2522
2455
|
conversation_id: conversationId,
|
|
2523
2456
|
question,
|
|
2524
2457
|
input: inputMessages,
|
|
2525
2458
|
input_segments: inputSegments,
|
|
2526
2459
|
expected_output: outputSegments,
|
|
2527
2460
|
reference_answer: referenceAnswer,
|
|
2528
|
-
|
|
2529
|
-
guideline_patterns: guidelinePatterns,
|
|
2530
|
-
file_paths: allFilePaths,
|
|
2461
|
+
file_paths: userFilePaths,
|
|
2531
2462
|
criteria: outcome ?? "",
|
|
2532
2463
|
evaluator: evalCaseEvaluatorKind,
|
|
2533
2464
|
assertions: evaluators
|
|
2534
2465
|
};
|
|
2535
|
-
if (verbose) {
|
|
2536
|
-
console.log(`
|
|
2537
|
-
[Test: ${id}]`);
|
|
2538
|
-
if (testCase.guideline_paths.length > 0) {
|
|
2539
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
2540
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
2541
|
-
console.log(` - ${guidelinePath}`);
|
|
2542
|
-
}
|
|
2543
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2544
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
2545
|
-
} else {
|
|
2546
|
-
console.log(" No guidelines found");
|
|
2547
|
-
}
|
|
2548
|
-
}
|
|
2549
2466
|
results.push(testCase);
|
|
2550
2467
|
}
|
|
2551
2468
|
return results;
|
|
@@ -2603,30 +2520,7 @@ function parseMetadata(suite) {
|
|
|
2603
2520
|
}
|
|
2604
2521
|
|
|
2605
2522
|
// src/evaluation/formatting/prompt-builder.ts
|
|
2606
|
-
import { readFile as readFile6 } from "node:fs/promises";
|
|
2607
|
-
import path7 from "node:path";
|
|
2608
|
-
var ANSI_YELLOW6 = "\x1B[33m";
|
|
2609
|
-
var ANSI_RESET7 = "\x1B[0m";
|
|
2610
2523
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
2611
|
-
const guidelineParts = [];
|
|
2612
|
-
for (const rawPath of testCase.guideline_paths) {
|
|
2613
|
-
const absolutePath = path7.resolve(rawPath);
|
|
2614
|
-
if (!await fileExists2(absolutePath)) {
|
|
2615
|
-
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
2616
|
-
continue;
|
|
2617
|
-
}
|
|
2618
|
-
try {
|
|
2619
|
-
const content = (await readFile6(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
2620
|
-
guidelineParts.push({
|
|
2621
|
-
content,
|
|
2622
|
-
isFile: true,
|
|
2623
|
-
displayPath: path7.basename(absolutePath)
|
|
2624
|
-
});
|
|
2625
|
-
} catch (error) {
|
|
2626
|
-
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
2627
|
-
}
|
|
2628
|
-
}
|
|
2629
|
-
const guidelines = formatFileContents(guidelineParts);
|
|
2630
2524
|
const segmentsByMessage = [];
|
|
2631
2525
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
2632
2526
|
for (const segment of testCase.input_segments) {
|
|
@@ -2651,10 +2545,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
2651
2545
|
if (type === "file") {
|
|
2652
2546
|
const value = asString5(segment.value);
|
|
2653
2547
|
if (!value) continue;
|
|
2654
|
-
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
2655
|
-
messageSegments.push({ type: "guideline_ref", path: value });
|
|
2656
|
-
continue;
|
|
2657
|
-
}
|
|
2658
2548
|
const fileText = fileContentsByPath.get(value);
|
|
2659
2549
|
if (fileText !== void 0) {
|
|
2660
2550
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
@@ -2703,10 +2593,6 @@ ${messageContent}`);
|
|
|
2703
2593
|
} else {
|
|
2704
2594
|
const questionParts = [];
|
|
2705
2595
|
for (const segment of testCase.input_segments) {
|
|
2706
|
-
if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
|
|
2707
|
-
questionParts.push(`<Attached: ${segment.path}>`);
|
|
2708
|
-
continue;
|
|
2709
|
-
}
|
|
2710
2596
|
const formattedContent = formatSegment(segment, mode);
|
|
2711
2597
|
if (formattedContent) {
|
|
2712
2598
|
questionParts.push(formattedContent);
|
|
@@ -2717,11 +2603,9 @@ ${messageContent}`);
|
|
|
2717
2603
|
const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
|
|
2718
2604
|
messages: testCase.input,
|
|
2719
2605
|
segmentsByMessage,
|
|
2720
|
-
guidelinePatterns: testCase.guideline_patterns,
|
|
2721
|
-
guidelineContent: guidelines,
|
|
2722
2606
|
mode
|
|
2723
2607
|
}) : void 0;
|
|
2724
|
-
return { question,
|
|
2608
|
+
return { question, chatPrompt };
|
|
2725
2609
|
}
|
|
2726
2610
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
2727
2611
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -2736,14 +2620,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
2736
2620
|
return messagesWithContent > 1;
|
|
2737
2621
|
}
|
|
2738
2622
|
function buildChatPromptFromSegments(options) {
|
|
2739
|
-
const {
|
|
2740
|
-
messages,
|
|
2741
|
-
segmentsByMessage,
|
|
2742
|
-
guidelinePatterns,
|
|
2743
|
-
guidelineContent,
|
|
2744
|
-
systemPrompt,
|
|
2745
|
-
mode = "lm"
|
|
2746
|
-
} = options;
|
|
2623
|
+
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
2747
2624
|
if (messages.length === 0) {
|
|
2748
2625
|
return void 0;
|
|
2749
2626
|
}
|
|
@@ -2751,11 +2628,6 @@ function buildChatPromptFromSegments(options) {
|
|
|
2751
2628
|
if (systemPrompt && systemPrompt.trim().length > 0) {
|
|
2752
2629
|
systemSegments.push(systemPrompt.trim());
|
|
2753
2630
|
}
|
|
2754
|
-
if (guidelineContent && guidelineContent.trim().length > 0) {
|
|
2755
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
2756
|
-
|
|
2757
|
-
${guidelineContent.trim()}`);
|
|
2758
|
-
}
|
|
2759
2631
|
let startIndex = 0;
|
|
2760
2632
|
while (startIndex < messages.length && messages[startIndex].role === "system") {
|
|
2761
2633
|
const segments = segmentsByMessage[startIndex];
|
|
@@ -2791,15 +2663,8 @@ ${guidelineContent.trim()}`);
|
|
|
2791
2663
|
contentParts.push("@[Tool]:");
|
|
2792
2664
|
}
|
|
2793
2665
|
for (const segment of segments) {
|
|
2794
|
-
if (segment.type === "guideline_ref") {
|
|
2795
|
-
continue;
|
|
2796
|
-
}
|
|
2797
2666
|
const formatted = formatSegment(segment, mode);
|
|
2798
2667
|
if (formatted) {
|
|
2799
|
-
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
2800
|
-
if (isGuidelineRef) {
|
|
2801
|
-
continue;
|
|
2802
|
-
}
|
|
2803
2668
|
contentParts.push(formatted);
|
|
2804
2669
|
}
|
|
2805
2670
|
}
|
|
@@ -2817,30 +2682,27 @@ ${guidelineContent.trim()}`);
|
|
|
2817
2682
|
function asString5(value) {
|
|
2818
2683
|
return typeof value === "string" ? value : void 0;
|
|
2819
2684
|
}
|
|
2820
|
-
function logWarning5(message) {
|
|
2821
|
-
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
2822
|
-
}
|
|
2823
2685
|
|
|
2824
2686
|
// src/evaluation/yaml-parser.ts
|
|
2825
|
-
var
|
|
2687
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
2826
2688
|
var ANSI_RED3 = "\x1B[31m";
|
|
2827
|
-
var
|
|
2689
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
2828
2690
|
function resolveTests(suite) {
|
|
2829
2691
|
if (suite.tests !== void 0) return suite.tests;
|
|
2830
2692
|
if (suite.eval_cases !== void 0) {
|
|
2831
|
-
|
|
2693
|
+
logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
|
|
2832
2694
|
return suite.eval_cases;
|
|
2833
2695
|
}
|
|
2834
2696
|
if (suite.evalcases !== void 0) {
|
|
2835
|
-
|
|
2697
|
+
logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
|
|
2836
2698
|
return suite.evalcases;
|
|
2837
2699
|
}
|
|
2838
2700
|
return void 0;
|
|
2839
2701
|
}
|
|
2840
2702
|
async function readTestSuiteMetadata(testFilePath) {
|
|
2841
2703
|
try {
|
|
2842
|
-
const absolutePath =
|
|
2843
|
-
const content = await
|
|
2704
|
+
const absolutePath = path7.resolve(testFilePath);
|
|
2705
|
+
const content = await readFile6(absolutePath, "utf8");
|
|
2844
2706
|
const parsed = interpolateEnv(parse2(content), process.env);
|
|
2845
2707
|
if (!isJsonObject(parsed)) {
|
|
2846
2708
|
return {};
|
|
@@ -2869,6 +2731,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
2869
2731
|
tests,
|
|
2870
2732
|
trials: extractTrialsConfig(parsed),
|
|
2871
2733
|
targets: extractTargetsFromSuite(parsed),
|
|
2734
|
+
workers: extractWorkersFromSuite(parsed),
|
|
2872
2735
|
cacheConfig: extractCacheConfig(parsed),
|
|
2873
2736
|
totalBudgetUsd: extractTotalBudgetUsd(parsed),
|
|
2874
2737
|
...metadata !== void 0 && { metadata },
|
|
@@ -2891,26 +2754,25 @@ var loadEvalCases = loadTests;
|
|
|
2891
2754
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
2892
2755
|
const verbose = options?.verbose ?? false;
|
|
2893
2756
|
const filterPattern = options?.filter;
|
|
2894
|
-
const absoluteTestPath =
|
|
2757
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
2895
2758
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2896
2759
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2897
2760
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
2898
|
-
const
|
|
2899
|
-
const rawFile = await readFile7(absoluteTestPath, "utf8");
|
|
2761
|
+
const rawFile = await readFile6(absoluteTestPath, "utf8");
|
|
2900
2762
|
const interpolated = interpolateEnv(parse2(rawFile), process.env);
|
|
2901
2763
|
if (!isJsonObject(interpolated)) {
|
|
2902
2764
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
2903
2765
|
}
|
|
2904
2766
|
const suite = interpolated;
|
|
2905
|
-
const
|
|
2906
|
-
const
|
|
2907
|
-
const
|
|
2767
|
+
const evalSetNameFromSuite = asString6(suite.name)?.trim();
|
|
2768
|
+
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
2769
|
+
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
2908
2770
|
const rawTestcases = resolveTests(suite);
|
|
2909
2771
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
2910
|
-
const evalFileDir =
|
|
2772
|
+
const evalFileDir = path7.dirname(absoluteTestPath);
|
|
2911
2773
|
let expandedTestcases;
|
|
2912
2774
|
if (typeof rawTestcases === "string") {
|
|
2913
|
-
const externalPath =
|
|
2775
|
+
const externalPath = path7.resolve(evalFileDir, rawTestcases);
|
|
2914
2776
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
2915
2777
|
} else if (Array.isArray(rawTestcases)) {
|
|
2916
2778
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -2924,18 +2786,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2924
2786
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
2925
2787
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
2926
2788
|
if (suite.assert !== void 0 && suite.assertions === void 0) {
|
|
2927
|
-
|
|
2789
|
+
logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
|
|
2928
2790
|
}
|
|
2929
2791
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
2930
2792
|
const results = [];
|
|
2931
2793
|
for (const rawEvalcase of expandedTestcases) {
|
|
2932
2794
|
if (!isJsonObject(rawEvalcase)) {
|
|
2933
|
-
|
|
2795
|
+
logWarning5("Skipping invalid test entry (expected object)");
|
|
2934
2796
|
continue;
|
|
2935
2797
|
}
|
|
2936
2798
|
const evalcase = rawEvalcase;
|
|
2937
2799
|
const id = asString6(evalcase.id);
|
|
2938
|
-
if (filterPattern && (!id || !
|
|
2800
|
+
if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
|
|
2939
2801
|
continue;
|
|
2940
2802
|
}
|
|
2941
2803
|
const conversationId = asString6(evalcase.conversation_id);
|
|
@@ -2943,7 +2805,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2943
2805
|
if (!outcome && evalcase.expected_outcome !== void 0) {
|
|
2944
2806
|
outcome = asString6(evalcase.expected_outcome);
|
|
2945
2807
|
if (outcome) {
|
|
2946
|
-
|
|
2808
|
+
logWarning5(
|
|
2947
2809
|
`Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
2948
2810
|
);
|
|
2949
2811
|
}
|
|
@@ -2963,15 +2825,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2963
2825
|
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
2964
2826
|
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
2965
2827
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
2966
|
-
const guidelinePaths = [];
|
|
2967
2828
|
const inputTextParts = [];
|
|
2968
2829
|
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
2969
2830
|
messages: effectiveSuiteInputMessages,
|
|
2970
2831
|
searchRoots,
|
|
2971
2832
|
repoRootPath,
|
|
2972
|
-
guidelinePatterns,
|
|
2973
|
-
guidelinePaths,
|
|
2974
|
-
treatFileSegmentsAsGuidelines: true,
|
|
2975
2833
|
textParts: inputTextParts,
|
|
2976
2834
|
messageType: "input",
|
|
2977
2835
|
verbose
|
|
@@ -2980,8 +2838,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2980
2838
|
messages: testInputMessages,
|
|
2981
2839
|
searchRoots,
|
|
2982
2840
|
repoRootPath,
|
|
2983
|
-
guidelinePatterns,
|
|
2984
|
-
guidelinePaths,
|
|
2985
2841
|
textParts: inputTextParts,
|
|
2986
2842
|
messageType: "input",
|
|
2987
2843
|
verbose
|
|
@@ -3030,26 +2886,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3030
2886
|
userFilePaths.push(segment.resolvedPath);
|
|
3031
2887
|
}
|
|
3032
2888
|
}
|
|
3033
|
-
const allFilePaths = [
|
|
3034
|
-
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
3035
|
-
...userFilePaths
|
|
3036
|
-
];
|
|
3037
2889
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
3038
2890
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
3039
2891
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
3040
2892
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
3041
2893
|
const testCase = {
|
|
3042
2894
|
id,
|
|
3043
|
-
|
|
2895
|
+
eval_set: evalSetName,
|
|
3044
2896
|
conversation_id: conversationId,
|
|
3045
2897
|
question,
|
|
3046
2898
|
input: inputMessages,
|
|
3047
2899
|
input_segments: inputSegments,
|
|
3048
2900
|
expected_output: outputSegments,
|
|
3049
2901
|
reference_answer: referenceAnswer,
|
|
3050
|
-
|
|
3051
|
-
guideline_patterns: guidelinePatterns,
|
|
3052
|
-
file_paths: allFilePaths,
|
|
2902
|
+
file_paths: userFilePaths,
|
|
3053
2903
|
criteria: outcome ?? "",
|
|
3054
2904
|
evaluator: evalCaseEvaluatorKind,
|
|
3055
2905
|
assertions: evaluators,
|
|
@@ -3057,20 +2907,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3057
2907
|
metadata,
|
|
3058
2908
|
targets: caseTargets
|
|
3059
2909
|
};
|
|
3060
|
-
if (verbose) {
|
|
3061
|
-
console.log(`
|
|
3062
|
-
[Test: ${id}]`);
|
|
3063
|
-
if (testCase.guideline_paths.length > 0) {
|
|
3064
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
3065
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
3066
|
-
console.log(` - ${guidelinePath}`);
|
|
3067
|
-
}
|
|
3068
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
3069
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
3070
|
-
} else {
|
|
3071
|
-
console.log(" No guidelines found");
|
|
3072
|
-
}
|
|
3073
|
-
}
|
|
3074
2910
|
results.push(testCase);
|
|
3075
2911
|
}
|
|
3076
2912
|
return { tests: results, parsed: suite };
|
|
@@ -3089,7 +2925,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3089
2925
|
if (!isJsonObject(raw)) return void 0;
|
|
3090
2926
|
const obj = raw;
|
|
3091
2927
|
if (obj.script !== void 0 && obj.command === void 0) {
|
|
3092
|
-
|
|
2928
|
+
logWarning5("'script' is deprecated. Use 'command' instead.");
|
|
3093
2929
|
}
|
|
3094
2930
|
const commandSource = obj.command ?? obj.script;
|
|
3095
2931
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
@@ -3097,8 +2933,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3097
2933
|
if (commandArr.length === 0) return void 0;
|
|
3098
2934
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
3099
2935
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
3100
|
-
if (cwd && !
|
|
3101
|
-
cwd =
|
|
2936
|
+
if (cwd && !path7.isAbsolute(cwd)) {
|
|
2937
|
+
cwd = path7.resolve(evalFileDir, cwd);
|
|
3102
2938
|
}
|
|
3103
2939
|
const config = { command: commandArr };
|
|
3104
2940
|
if (timeoutMs !== void 0) {
|
|
@@ -3188,10 +3024,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
3188
3024
|
}
|
|
3189
3025
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3190
3026
|
if (typeof raw === "string") {
|
|
3191
|
-
const workspaceFilePath =
|
|
3027
|
+
const workspaceFilePath = path7.resolve(evalFileDir, raw);
|
|
3192
3028
|
let content;
|
|
3193
3029
|
try {
|
|
3194
|
-
content = await
|
|
3030
|
+
content = await readFile6(workspaceFilePath, "utf8");
|
|
3195
3031
|
} catch {
|
|
3196
3032
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3197
3033
|
}
|
|
@@ -3201,7 +3037,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
3201
3037
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
3202
3038
|
);
|
|
3203
3039
|
}
|
|
3204
|
-
const workspaceFileDir =
|
|
3040
|
+
const workspaceFileDir = path7.dirname(workspaceFilePath);
|
|
3205
3041
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
3206
3042
|
}
|
|
3207
3043
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -3221,8 +3057,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3221
3057
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
3222
3058
|
}
|
|
3223
3059
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
3224
|
-
if (template && !
|
|
3225
|
-
template =
|
|
3060
|
+
if (template && !path7.isAbsolute(template)) {
|
|
3061
|
+
template = path7.resolve(evalFileDir, template);
|
|
3226
3062
|
}
|
|
3227
3063
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
3228
3064
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -3272,28 +3108,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
3272
3108
|
function asString6(value) {
|
|
3273
3109
|
return typeof value === "string" ? value : void 0;
|
|
3274
3110
|
}
|
|
3275
|
-
function
|
|
3111
|
+
function logWarning5(message, details) {
|
|
3276
3112
|
if (details && details.length > 0) {
|
|
3277
3113
|
const detailBlock = details.join("\n");
|
|
3278
|
-
console.warn(`${
|
|
3279
|
-
${detailBlock}${
|
|
3114
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}
|
|
3115
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
3280
3116
|
} else {
|
|
3281
|
-
console.warn(`${
|
|
3117
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
3282
3118
|
}
|
|
3283
3119
|
}
|
|
3284
3120
|
function logError3(message, details) {
|
|
3285
3121
|
if (details && details.length > 0) {
|
|
3286
3122
|
const detailBlock = details.join("\n");
|
|
3287
3123
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
3288
|
-
${detailBlock}${
|
|
3124
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
3289
3125
|
} else {
|
|
3290
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
3126
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET7}`);
|
|
3291
3127
|
}
|
|
3292
3128
|
}
|
|
3293
3129
|
|
|
3294
3130
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
3295
3131
|
import { readFileSync } from "node:fs";
|
|
3296
|
-
import
|
|
3132
|
+
import path8 from "node:path";
|
|
3297
3133
|
import { parse as parse3 } from "yaml";
|
|
3298
3134
|
function codeGraderInstruction(graderName, description) {
|
|
3299
3135
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -3538,7 +3374,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
3538
3374
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
3539
3375
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
3540
3376
|
const parsed = parse3(content);
|
|
3541
|
-
return transpileEvalYaml(parsed,
|
|
3377
|
+
return transpileEvalYaml(parsed, path8.basename(evalYamlPath));
|
|
3542
3378
|
}
|
|
3543
3379
|
function getOutputFilenames(result) {
|
|
3544
3380
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -3773,10 +3609,10 @@ function buildChatPrompt(request) {
|
|
|
3773
3609
|
if (hasSystemMessage) {
|
|
3774
3610
|
return provided;
|
|
3775
3611
|
}
|
|
3776
|
-
const systemContent2 = resolveSystemContent(request
|
|
3612
|
+
const systemContent2 = resolveSystemContent(request);
|
|
3777
3613
|
return [{ role: "system", content: systemContent2 }, ...provided];
|
|
3778
3614
|
}
|
|
3779
|
-
const systemContent = resolveSystemContent(request
|
|
3615
|
+
const systemContent = resolveSystemContent(request);
|
|
3780
3616
|
const userContent = request.question.trim();
|
|
3781
3617
|
const prompt = [
|
|
3782
3618
|
{ role: "system", content: systemContent },
|
|
@@ -3784,18 +3620,13 @@ function buildChatPrompt(request) {
|
|
|
3784
3620
|
];
|
|
3785
3621
|
return prompt;
|
|
3786
3622
|
}
|
|
3787
|
-
function resolveSystemContent(request
|
|
3623
|
+
function resolveSystemContent(request) {
|
|
3788
3624
|
const systemSegments = [];
|
|
3789
3625
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
3790
3626
|
systemSegments.push(request.systemPrompt.trim());
|
|
3791
3627
|
} else {
|
|
3792
3628
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
3793
3629
|
}
|
|
3794
|
-
if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
|
|
3795
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
3796
|
-
|
|
3797
|
-
${request.guidelines.trim()}`);
|
|
3798
|
-
}
|
|
3799
3630
|
return systemSegments.join("\n\n");
|
|
3800
3631
|
}
|
|
3801
3632
|
function toModelMessages(chatPrompt) {
|
|
@@ -3979,7 +3810,7 @@ import { spawn } from "node:child_process";
|
|
|
3979
3810
|
import { randomUUID } from "node:crypto";
|
|
3980
3811
|
import { createWriteStream } from "node:fs";
|
|
3981
3812
|
import { mkdir } from "node:fs/promises";
|
|
3982
|
-
import
|
|
3813
|
+
import path10 from "node:path";
|
|
3983
3814
|
|
|
3984
3815
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
3985
3816
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
@@ -4035,17 +3866,11 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
4035
3866
|
}
|
|
4036
3867
|
|
|
4037
3868
|
// src/evaluation/providers/preread.ts
|
|
4038
|
-
import
|
|
4039
|
-
function buildPromptDocument(request, inputFiles
|
|
3869
|
+
import path9 from "node:path";
|
|
3870
|
+
function buildPromptDocument(request, inputFiles) {
|
|
4040
3871
|
const parts = [];
|
|
4041
|
-
const guidelineFiles = collectGuidelineFiles(
|
|
4042
|
-
inputFiles,
|
|
4043
|
-
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
4044
|
-
options?.guidelineOverrides
|
|
4045
|
-
);
|
|
4046
3872
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
4047
|
-
const
|
|
4048
|
-
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
3873
|
+
const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
|
|
4049
3874
|
if (prereadBlock.length > 0) {
|
|
4050
3875
|
parts.push("\n", prereadBlock);
|
|
4051
3876
|
}
|
|
@@ -4058,62 +3883,36 @@ function normalizeInputFiles(inputFiles) {
|
|
|
4058
3883
|
}
|
|
4059
3884
|
const deduped = /* @__PURE__ */ new Map();
|
|
4060
3885
|
for (const inputFile of inputFiles) {
|
|
4061
|
-
const absolutePath =
|
|
3886
|
+
const absolutePath = path9.resolve(inputFile);
|
|
4062
3887
|
if (!deduped.has(absolutePath)) {
|
|
4063
3888
|
deduped.set(absolutePath, absolutePath);
|
|
4064
3889
|
}
|
|
4065
3890
|
}
|
|
4066
3891
|
return Array.from(deduped.values());
|
|
4067
3892
|
}
|
|
4068
|
-
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
4069
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
4070
|
-
return [];
|
|
4071
|
-
}
|
|
4072
|
-
const unique = /* @__PURE__ */ new Map();
|
|
4073
|
-
for (const inputFile of inputFiles) {
|
|
4074
|
-
const absolutePath = path10.resolve(inputFile);
|
|
4075
|
-
if (overrides?.has(absolutePath)) {
|
|
4076
|
-
if (!unique.has(absolutePath)) {
|
|
4077
|
-
unique.set(absolutePath, absolutePath);
|
|
4078
|
-
}
|
|
4079
|
-
continue;
|
|
4080
|
-
}
|
|
4081
|
-
const normalized = absolutePath.split(path10.sep).join("/");
|
|
4082
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
4083
|
-
if (!unique.has(absolutePath)) {
|
|
4084
|
-
unique.set(absolutePath, absolutePath);
|
|
4085
|
-
}
|
|
4086
|
-
}
|
|
4087
|
-
}
|
|
4088
|
-
return Array.from(unique.values());
|
|
4089
|
-
}
|
|
4090
3893
|
function collectInputFiles(inputFiles) {
|
|
4091
3894
|
if (!inputFiles || inputFiles.length === 0) {
|
|
4092
3895
|
return [];
|
|
4093
3896
|
}
|
|
4094
3897
|
const unique = /* @__PURE__ */ new Map();
|
|
4095
3898
|
for (const inputFile of inputFiles) {
|
|
4096
|
-
const absolutePath =
|
|
3899
|
+
const absolutePath = path9.resolve(inputFile);
|
|
4097
3900
|
if (!unique.has(absolutePath)) {
|
|
4098
3901
|
unique.set(absolutePath, absolutePath);
|
|
4099
3902
|
}
|
|
4100
3903
|
}
|
|
4101
3904
|
return Array.from(unique.values());
|
|
4102
3905
|
}
|
|
4103
|
-
function buildMandatoryPrereadBlock(
|
|
4104
|
-
if (
|
|
3906
|
+
function buildMandatoryPrereadBlock(inputFiles) {
|
|
3907
|
+
if (inputFiles.length === 0) {
|
|
4105
3908
|
return "";
|
|
4106
3909
|
}
|
|
4107
3910
|
const buildList = (files) => files.map((absolutePath) => {
|
|
4108
|
-
const fileName =
|
|
3911
|
+
const fileName = path9.basename(absolutePath);
|
|
4109
3912
|
const fileUri = pathToFileUri(absolutePath);
|
|
4110
3913
|
return `* [${fileName}](${fileUri})`;
|
|
4111
3914
|
});
|
|
4112
3915
|
const sections = [];
|
|
4113
|
-
if (guidelineFiles.length > 0) {
|
|
4114
|
-
sections.push(`Read all guideline files:
|
|
4115
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
4116
|
-
}
|
|
4117
3916
|
if (inputFiles.length > 0) {
|
|
4118
3917
|
sections.push(`Read all input files:
|
|
4119
3918
|
${buildList(inputFiles).join("\n")}.`);
|
|
@@ -4125,7 +3924,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
4125
3924
|
return sections.join("\n");
|
|
4126
3925
|
}
|
|
4127
3926
|
function pathToFileUri(filePath) {
|
|
4128
|
-
const absolutePath =
|
|
3927
|
+
const absolutePath = path9.isAbsolute(filePath) ? filePath : path9.resolve(filePath);
|
|
4129
3928
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
4130
3929
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
4131
3930
|
return `file:///${normalizedPath}`;
|
|
@@ -4272,10 +4071,10 @@ var ClaudeCliProvider = class {
|
|
|
4272
4071
|
}
|
|
4273
4072
|
resolveCwd(cwdOverride) {
|
|
4274
4073
|
if (cwdOverride) {
|
|
4275
|
-
return
|
|
4074
|
+
return path10.resolve(cwdOverride);
|
|
4276
4075
|
}
|
|
4277
4076
|
if (this.config.cwd) {
|
|
4278
|
-
return
|
|
4077
|
+
return path10.resolve(this.config.cwd);
|
|
4279
4078
|
}
|
|
4280
4079
|
return void 0;
|
|
4281
4080
|
}
|
|
@@ -4285,9 +4084,9 @@ var ClaudeCliProvider = class {
|
|
|
4285
4084
|
return void 0;
|
|
4286
4085
|
}
|
|
4287
4086
|
if (this.config.logDir) {
|
|
4288
|
-
return
|
|
4087
|
+
return path10.resolve(this.config.logDir);
|
|
4289
4088
|
}
|
|
4290
|
-
return
|
|
4089
|
+
return path10.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
4291
4090
|
}
|
|
4292
4091
|
async createStreamLogger(request) {
|
|
4293
4092
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4301,7 +4100,7 @@ var ClaudeCliProvider = class {
|
|
|
4301
4100
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
4302
4101
|
return void 0;
|
|
4303
4102
|
}
|
|
4304
|
-
const filePath =
|
|
4103
|
+
const filePath = path10.join(logDir, buildLogFilename(request, this.targetName));
|
|
4305
4104
|
try {
|
|
4306
4105
|
const logger = await ClaudeCliStreamLogger.create({
|
|
4307
4106
|
filePath,
|
|
@@ -4622,7 +4421,7 @@ function tryParseJson(line) {
|
|
|
4622
4421
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
4623
4422
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
4624
4423
|
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
4625
|
-
import
|
|
4424
|
+
import path11 from "node:path";
|
|
4626
4425
|
var claudeSdkModule = null;
|
|
4627
4426
|
async function loadClaudeSdk() {
|
|
4628
4427
|
if (!claudeSdkModule) {
|
|
@@ -4782,10 +4581,10 @@ var ClaudeSdkProvider = class {
|
|
|
4782
4581
|
}
|
|
4783
4582
|
resolveCwd(cwdOverride) {
|
|
4784
4583
|
if (cwdOverride) {
|
|
4785
|
-
return
|
|
4584
|
+
return path11.resolve(cwdOverride);
|
|
4786
4585
|
}
|
|
4787
4586
|
if (this.config.cwd) {
|
|
4788
|
-
return
|
|
4587
|
+
return path11.resolve(this.config.cwd);
|
|
4789
4588
|
}
|
|
4790
4589
|
return void 0;
|
|
4791
4590
|
}
|
|
@@ -4795,9 +4594,9 @@ var ClaudeSdkProvider = class {
|
|
|
4795
4594
|
return void 0;
|
|
4796
4595
|
}
|
|
4797
4596
|
if (this.config.logDir) {
|
|
4798
|
-
return
|
|
4597
|
+
return path11.resolve(this.config.logDir);
|
|
4799
4598
|
}
|
|
4800
|
-
return
|
|
4599
|
+
return path11.join(process.cwd(), ".agentv", "logs", "claude");
|
|
4801
4600
|
}
|
|
4802
4601
|
async createStreamLogger(request) {
|
|
4803
4602
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4811,7 +4610,7 @@ var ClaudeSdkProvider = class {
|
|
|
4811
4610
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
4812
4611
|
return void 0;
|
|
4813
4612
|
}
|
|
4814
|
-
const filePath =
|
|
4613
|
+
const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
|
|
4815
4614
|
try {
|
|
4816
4615
|
const logger = await ClaudeStreamLogger.create({
|
|
4817
4616
|
filePath,
|
|
@@ -5018,7 +4817,7 @@ function formatElapsed2(startedAt) {
|
|
|
5018
4817
|
import { exec as execWithCallback } from "node:child_process";
|
|
5019
4818
|
import fs from "node:fs/promises";
|
|
5020
4819
|
import os from "node:os";
|
|
5021
|
-
import
|
|
4820
|
+
import path12 from "node:path";
|
|
5022
4821
|
import { promisify } from "node:util";
|
|
5023
4822
|
import { z as z2 } from "zod";
|
|
5024
4823
|
var ToolCallSchema = z2.object({
|
|
@@ -5227,7 +5026,6 @@ var CliProvider = class {
|
|
|
5227
5026
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5228
5027
|
{
|
|
5229
5028
|
question: "",
|
|
5230
|
-
guidelines: "",
|
|
5231
5029
|
inputFiles: batchInputFiles,
|
|
5232
5030
|
evalCaseId: "batch",
|
|
5233
5031
|
attempt: 0
|
|
@@ -5460,7 +5258,6 @@ var CliProvider = class {
|
|
|
5460
5258
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5461
5259
|
{
|
|
5462
5260
|
question: "",
|
|
5463
|
-
guidelines: "",
|
|
5464
5261
|
inputFiles: [],
|
|
5465
5262
|
evalCaseId: "healthcheck",
|
|
5466
5263
|
attempt: 0
|
|
@@ -5501,7 +5298,6 @@ async function buildTemplateValues(request, config, outputFilePath) {
|
|
|
5501
5298
|
values: {
|
|
5502
5299
|
PROMPT: shellEscape(request.question ?? ""),
|
|
5503
5300
|
PROMPT_FILE: shellEscape(promptFilePath),
|
|
5504
|
-
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
5505
5301
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
5506
5302
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
5507
5303
|
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
@@ -5523,7 +5319,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
5523
5319
|
}
|
|
5524
5320
|
const unique = /* @__PURE__ */ new Map();
|
|
5525
5321
|
for (const inputFile of inputFiles) {
|
|
5526
|
-
const absolutePath =
|
|
5322
|
+
const absolutePath = path12.resolve(inputFile);
|
|
5527
5323
|
if (!unique.has(absolutePath)) {
|
|
5528
5324
|
unique.set(absolutePath, absolutePath);
|
|
5529
5325
|
}
|
|
@@ -5537,7 +5333,7 @@ function formatFileList(files, template) {
|
|
|
5537
5333
|
const formatter = template ?? "{path}";
|
|
5538
5334
|
return files.map((filePath) => {
|
|
5539
5335
|
const escapedPath = shellEscape(filePath);
|
|
5540
|
-
const escapedName = shellEscape(
|
|
5336
|
+
const escapedName = shellEscape(path12.basename(filePath));
|
|
5541
5337
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
5542
5338
|
}).join(" ");
|
|
5543
5339
|
}
|
|
@@ -5561,7 +5357,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
5561
5357
|
const safeEvalId = evalCaseId || "unknown";
|
|
5562
5358
|
const timestamp = Date.now();
|
|
5563
5359
|
const random = Math.random().toString(36).substring(2, 9);
|
|
5564
|
-
return
|
|
5360
|
+
return path12.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
5565
5361
|
}
|
|
5566
5362
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
5567
5363
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -5575,7 +5371,7 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
5575
5371
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
5576
5372
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
5577
5373
|
import { mkdir as mkdir3 } from "node:fs/promises";
|
|
5578
|
-
import
|
|
5374
|
+
import path13 from "node:path";
|
|
5579
5375
|
|
|
5580
5376
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
5581
5377
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -5810,10 +5606,10 @@ ${basePrompt}` : basePrompt;
|
|
|
5810
5606
|
}
|
|
5811
5607
|
resolveCwd(cwdOverride) {
|
|
5812
5608
|
if (cwdOverride) {
|
|
5813
|
-
return
|
|
5609
|
+
return path13.resolve(cwdOverride);
|
|
5814
5610
|
}
|
|
5815
5611
|
if (this.config.cwd) {
|
|
5816
|
-
return
|
|
5612
|
+
return path13.resolve(this.config.cwd);
|
|
5817
5613
|
}
|
|
5818
5614
|
return void 0;
|
|
5819
5615
|
}
|
|
@@ -5823,9 +5619,9 @@ ${basePrompt}` : basePrompt;
|
|
|
5823
5619
|
return void 0;
|
|
5824
5620
|
}
|
|
5825
5621
|
if (this.config.logDir) {
|
|
5826
|
-
return
|
|
5622
|
+
return path13.resolve(this.config.logDir);
|
|
5827
5623
|
}
|
|
5828
|
-
return
|
|
5624
|
+
return path13.join(process.cwd(), ".agentv", "logs", "codex");
|
|
5829
5625
|
}
|
|
5830
5626
|
async createStreamLogger(request) {
|
|
5831
5627
|
const logDir = this.resolveLogDirectory();
|
|
@@ -5839,7 +5635,7 @@ ${basePrompt}` : basePrompt;
|
|
|
5839
5635
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
5840
5636
|
return void 0;
|
|
5841
5637
|
}
|
|
5842
|
-
const filePath =
|
|
5638
|
+
const filePath = path13.join(logDir, buildLogFilename3(request, this.targetName));
|
|
5843
5639
|
try {
|
|
5844
5640
|
const logger = await CodexSdkStreamLogger.create({
|
|
5845
5641
|
filePath,
|
|
@@ -5983,7 +5779,7 @@ function formatElapsed3(startedAt) {
|
|
|
5983
5779
|
// src/evaluation/providers/copilot-cli.ts
|
|
5984
5780
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
5985
5781
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
5986
|
-
import
|
|
5782
|
+
import path15 from "node:path";
|
|
5987
5783
|
import { Readable, Writable } from "node:stream";
|
|
5988
5784
|
import { spawn as spawn2 } from "node:child_process";
|
|
5989
5785
|
import * as acp from "@agentclientprotocol/sdk";
|
|
@@ -6045,7 +5841,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
6045
5841
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
6046
5842
|
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
6047
5843
|
import { arch, platform } from "node:os";
|
|
6048
|
-
import
|
|
5844
|
+
import path14 from "node:path";
|
|
6049
5845
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
6050
5846
|
function resolvePlatformCliPath() {
|
|
6051
5847
|
const os3 = platform();
|
|
@@ -6069,7 +5865,7 @@ function resolvePlatformCliPath() {
|
|
|
6069
5865
|
try {
|
|
6070
5866
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6071
5867
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
6072
|
-
const binaryPath =
|
|
5868
|
+
const binaryPath = path14.join(path14.dirname(packageJsonPath), binaryName);
|
|
6073
5869
|
if (existsSync(binaryPath)) {
|
|
6074
5870
|
return binaryPath;
|
|
6075
5871
|
}
|
|
@@ -6077,7 +5873,7 @@ function resolvePlatformCliPath() {
|
|
|
6077
5873
|
}
|
|
6078
5874
|
let searchDir = process.cwd();
|
|
6079
5875
|
for (let i = 0; i < 10; i++) {
|
|
6080
|
-
const standardPath =
|
|
5876
|
+
const standardPath = path14.join(
|
|
6081
5877
|
searchDir,
|
|
6082
5878
|
"node_modules",
|
|
6083
5879
|
...packageName.split("/"),
|
|
@@ -6086,13 +5882,13 @@ function resolvePlatformCliPath() {
|
|
|
6086
5882
|
if (existsSync(standardPath)) {
|
|
6087
5883
|
return standardPath;
|
|
6088
5884
|
}
|
|
6089
|
-
const bunDir =
|
|
5885
|
+
const bunDir = path14.join(searchDir, "node_modules", ".bun");
|
|
6090
5886
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
6091
5887
|
try {
|
|
6092
5888
|
const entries = readdirSync(bunDir);
|
|
6093
5889
|
for (const entry of entries) {
|
|
6094
5890
|
if (entry.startsWith(prefix)) {
|
|
6095
|
-
const candidate =
|
|
5891
|
+
const candidate = path14.join(
|
|
6096
5892
|
bunDir,
|
|
6097
5893
|
entry,
|
|
6098
5894
|
"node_modules",
|
|
@@ -6107,7 +5903,7 @@ function resolvePlatformCliPath() {
|
|
|
6107
5903
|
}
|
|
6108
5904
|
} catch {
|
|
6109
5905
|
}
|
|
6110
|
-
const parent =
|
|
5906
|
+
const parent = path14.dirname(searchDir);
|
|
6111
5907
|
if (parent === searchDir) break;
|
|
6112
5908
|
searchDir = parent;
|
|
6113
5909
|
}
|
|
@@ -6445,10 +6241,10 @@ var CopilotCliProvider = class {
|
|
|
6445
6241
|
}
|
|
6446
6242
|
resolveCwd(cwdOverride) {
|
|
6447
6243
|
if (cwdOverride) {
|
|
6448
|
-
return
|
|
6244
|
+
return path15.resolve(cwdOverride);
|
|
6449
6245
|
}
|
|
6450
6246
|
if (this.config.cwd) {
|
|
6451
|
-
return
|
|
6247
|
+
return path15.resolve(this.config.cwd);
|
|
6452
6248
|
}
|
|
6453
6249
|
return void 0;
|
|
6454
6250
|
}
|
|
@@ -6467,9 +6263,9 @@ var CopilotCliProvider = class {
|
|
|
6467
6263
|
return void 0;
|
|
6468
6264
|
}
|
|
6469
6265
|
if (this.config.logDir) {
|
|
6470
|
-
return
|
|
6266
|
+
return path15.resolve(this.config.logDir);
|
|
6471
6267
|
}
|
|
6472
|
-
return
|
|
6268
|
+
return path15.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
6473
6269
|
}
|
|
6474
6270
|
async createStreamLogger(request) {
|
|
6475
6271
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6483,7 +6279,7 @@ var CopilotCliProvider = class {
|
|
|
6483
6279
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6484
6280
|
return void 0;
|
|
6485
6281
|
}
|
|
6486
|
-
const filePath =
|
|
6282
|
+
const filePath = path15.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
6487
6283
|
try {
|
|
6488
6284
|
const logger = await CopilotStreamLogger.create(
|
|
6489
6285
|
{
|
|
@@ -6578,7 +6374,7 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
6578
6374
|
// src/evaluation/providers/copilot-sdk.ts
|
|
6579
6375
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
6580
6376
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
6581
|
-
import
|
|
6377
|
+
import path16 from "node:path";
|
|
6582
6378
|
|
|
6583
6379
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
6584
6380
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -6857,10 +6653,10 @@ var CopilotSdkProvider = class {
|
|
|
6857
6653
|
}
|
|
6858
6654
|
resolveCwd(cwdOverride) {
|
|
6859
6655
|
if (cwdOverride) {
|
|
6860
|
-
return
|
|
6656
|
+
return path16.resolve(cwdOverride);
|
|
6861
6657
|
}
|
|
6862
6658
|
if (this.config.cwd) {
|
|
6863
|
-
return
|
|
6659
|
+
return path16.resolve(this.config.cwd);
|
|
6864
6660
|
}
|
|
6865
6661
|
return void 0;
|
|
6866
6662
|
}
|
|
@@ -6869,9 +6665,9 @@ var CopilotSdkProvider = class {
|
|
|
6869
6665
|
return void 0;
|
|
6870
6666
|
}
|
|
6871
6667
|
if (this.config.logDir) {
|
|
6872
|
-
return
|
|
6668
|
+
return path16.resolve(this.config.logDir);
|
|
6873
6669
|
}
|
|
6874
|
-
return
|
|
6670
|
+
return path16.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
6875
6671
|
}
|
|
6876
6672
|
async createStreamLogger(request) {
|
|
6877
6673
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6885,7 +6681,7 @@ var CopilotSdkProvider = class {
|
|
|
6885
6681
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
6886
6682
|
return void 0;
|
|
6887
6683
|
}
|
|
6888
|
-
const filePath =
|
|
6684
|
+
const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
6889
6685
|
try {
|
|
6890
6686
|
const logger = await CopilotStreamLogger.create(
|
|
6891
6687
|
{
|
|
@@ -6962,8 +6758,7 @@ var MockProvider = class {
|
|
|
6962
6758
|
return {
|
|
6963
6759
|
output: [{ role: "assistant", content: this.cannedResponse }],
|
|
6964
6760
|
raw: {
|
|
6965
|
-
question: request.question
|
|
6966
|
-
guidelines: request.guidelines
|
|
6761
|
+
question: request.question
|
|
6967
6762
|
}
|
|
6968
6763
|
};
|
|
6969
6764
|
}
|
|
@@ -7241,7 +7036,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
7241
7036
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
7242
7037
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7243
7038
|
import { tmpdir } from "node:os";
|
|
7244
|
-
import
|
|
7039
|
+
import path17 from "node:path";
|
|
7245
7040
|
|
|
7246
7041
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
7247
7042
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -7322,7 +7117,7 @@ var PiCodingAgentProvider = class {
|
|
|
7322
7117
|
const workspaceRoot = await this.createWorkspace();
|
|
7323
7118
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
7324
7119
|
try {
|
|
7325
|
-
const promptFile =
|
|
7120
|
+
const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
|
|
7326
7121
|
await writeFile(promptFile, request.question, "utf8");
|
|
7327
7122
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
7328
7123
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
@@ -7384,12 +7179,12 @@ var PiCodingAgentProvider = class {
|
|
|
7384
7179
|
}
|
|
7385
7180
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
7386
7181
|
if (cwdOverride) {
|
|
7387
|
-
return
|
|
7182
|
+
return path17.resolve(cwdOverride);
|
|
7388
7183
|
}
|
|
7389
7184
|
if (!this.config.cwd) {
|
|
7390
7185
|
return workspaceRoot;
|
|
7391
7186
|
}
|
|
7392
|
-
return
|
|
7187
|
+
return path17.resolve(this.config.cwd);
|
|
7393
7188
|
}
|
|
7394
7189
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
7395
7190
|
const args = [];
|
|
@@ -7478,7 +7273,7 @@ ${prompt}` : prompt;
|
|
|
7478
7273
|
return env;
|
|
7479
7274
|
}
|
|
7480
7275
|
async createWorkspace() {
|
|
7481
|
-
return await mkdtemp(
|
|
7276
|
+
return await mkdtemp(path17.join(tmpdir(), WORKSPACE_PREFIX));
|
|
7482
7277
|
}
|
|
7483
7278
|
async cleanupWorkspace(workspaceRoot) {
|
|
7484
7279
|
try {
|
|
@@ -7488,9 +7283,9 @@ ${prompt}` : prompt;
|
|
|
7488
7283
|
}
|
|
7489
7284
|
resolveLogDirectory() {
|
|
7490
7285
|
if (this.config.logDir) {
|
|
7491
|
-
return
|
|
7286
|
+
return path17.resolve(this.config.logDir);
|
|
7492
7287
|
}
|
|
7493
|
-
return
|
|
7288
|
+
return path17.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
7494
7289
|
}
|
|
7495
7290
|
async createStreamLogger(request) {
|
|
7496
7291
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7504,7 +7299,7 @@ ${prompt}` : prompt;
|
|
|
7504
7299
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7505
7300
|
return void 0;
|
|
7506
7301
|
}
|
|
7507
|
-
const filePath =
|
|
7302
|
+
const filePath = path17.join(logDir, buildLogFilename5(request, this.targetName));
|
|
7508
7303
|
try {
|
|
7509
7304
|
const logger = await PiStreamLogger.create({
|
|
7510
7305
|
filePath,
|
|
@@ -8005,17 +7800,17 @@ var ProviderRegistry = class {
|
|
|
8005
7800
|
// src/evaluation/providers/vscode-provider.ts
|
|
8006
7801
|
import { exec as exec2 } from "node:child_process";
|
|
8007
7802
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
8008
|
-
import
|
|
7803
|
+
import path29 from "node:path";
|
|
8009
7804
|
import { promisify as promisify3 } from "node:util";
|
|
8010
7805
|
|
|
8011
7806
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
8012
7807
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
8013
|
-
import
|
|
7808
|
+
import path27 from "node:path";
|
|
8014
7809
|
|
|
8015
7810
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
8016
7811
|
import { constants as constants2 } from "node:fs";
|
|
8017
7812
|
import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
8018
|
-
import
|
|
7813
|
+
import path18 from "node:path";
|
|
8019
7814
|
async function pathExists(target) {
|
|
8020
7815
|
try {
|
|
8021
7816
|
await access2(target, constants2.F_OK);
|
|
@@ -8031,7 +7826,7 @@ async function readDirEntries(target) {
|
|
|
8031
7826
|
const entries = await readdir(target, { withFileTypes: true });
|
|
8032
7827
|
return entries.map((entry) => ({
|
|
8033
7828
|
name: entry.name,
|
|
8034
|
-
absolutePath:
|
|
7829
|
+
absolutePath: path18.join(target, entry.name),
|
|
8035
7830
|
isDirectory: entry.isDirectory()
|
|
8036
7831
|
}));
|
|
8037
7832
|
}
|
|
@@ -8046,9 +7841,9 @@ async function removeIfExists(target) {
|
|
|
8046
7841
|
}
|
|
8047
7842
|
|
|
8048
7843
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
8049
|
-
import
|
|
7844
|
+
import path19 from "node:path";
|
|
8050
7845
|
function pathToFileUri2(filePath) {
|
|
8051
|
-
const absolutePath =
|
|
7846
|
+
const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
|
|
8052
7847
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
8053
7848
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
8054
7849
|
return `file:///${normalizedPath}`;
|
|
@@ -8057,7 +7852,7 @@ function pathToFileUri2(filePath) {
|
|
|
8057
7852
|
}
|
|
8058
7853
|
|
|
8059
7854
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
8060
|
-
import
|
|
7855
|
+
import path20 from "node:path";
|
|
8061
7856
|
|
|
8062
7857
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
8063
7858
|
function renderTemplate2(content, variables) {
|
|
@@ -8149,8 +7944,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
8149
7944
|
});
|
|
8150
7945
|
}
|
|
8151
7946
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
8152
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
8153
|
-
const responseList = responseFiles.map((file) => `"${
|
|
7947
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path20.basename(file)}`).join("\n");
|
|
7948
|
+
const responseList = responseFiles.map((file) => `"${path20.basename(file)}"`).join(", ");
|
|
8154
7949
|
return renderTemplate2(templateContent, {
|
|
8155
7950
|
requestFiles: requestLines,
|
|
8156
7951
|
responseList
|
|
@@ -8158,8 +7953,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
8158
7953
|
}
|
|
8159
7954
|
|
|
8160
7955
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
8161
|
-
import { readFile as
|
|
8162
|
-
import
|
|
7956
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
7957
|
+
import path21 from "node:path";
|
|
8163
7958
|
|
|
8164
7959
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
8165
7960
|
function sleep2(ms) {
|
|
@@ -8197,7 +7992,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8197
7992
|
const maxAttempts = 10;
|
|
8198
7993
|
while (attempts < maxAttempts) {
|
|
8199
7994
|
try {
|
|
8200
|
-
const content = await
|
|
7995
|
+
const content = await readFile7(responseFileFinal, { encoding: "utf8" });
|
|
8201
7996
|
if (!silent) {
|
|
8202
7997
|
process.stdout.write(`${content}
|
|
8203
7998
|
`);
|
|
@@ -8218,7 +8013,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8218
8013
|
}
|
|
8219
8014
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
8220
8015
|
if (!silent) {
|
|
8221
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
8016
|
+
const fileList = responseFilesFinal.map((file) => path21.basename(file)).join(", ");
|
|
8222
8017
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
8223
8018
|
}
|
|
8224
8019
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -8227,7 +8022,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8227
8022
|
while (pending.size > 0) {
|
|
8228
8023
|
if (Date.now() >= deadline) {
|
|
8229
8024
|
if (!silent) {
|
|
8230
|
-
const remaining = [...pending].map((f) =>
|
|
8025
|
+
const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
|
|
8231
8026
|
console.error(
|
|
8232
8027
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
8233
8028
|
);
|
|
@@ -8254,7 +8049,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8254
8049
|
const maxAttempts = 10;
|
|
8255
8050
|
while (attempts < maxAttempts) {
|
|
8256
8051
|
try {
|
|
8257
|
-
const content = await
|
|
8052
|
+
const content = await readFile7(file, { encoding: "utf8" });
|
|
8258
8053
|
if (!silent) {
|
|
8259
8054
|
process.stdout.write(`${content}
|
|
8260
8055
|
`);
|
|
@@ -8278,15 +8073,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8278
8073
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
8279
8074
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
8280
8075
|
import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
|
|
8281
|
-
import
|
|
8076
|
+
import path24 from "node:path";
|
|
8282
8077
|
import { promisify as promisify2 } from "node:util";
|
|
8283
8078
|
|
|
8284
8079
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
8285
|
-
import
|
|
8080
|
+
import path23 from "node:path";
|
|
8286
8081
|
|
|
8287
8082
|
// src/paths.ts
|
|
8288
8083
|
import os2 from "node:os";
|
|
8289
|
-
import
|
|
8084
|
+
import path22 from "node:path";
|
|
8290
8085
|
var logged = false;
|
|
8291
8086
|
function getAgentvHome() {
|
|
8292
8087
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8297,19 +8092,19 @@ function getAgentvHome() {
|
|
|
8297
8092
|
}
|
|
8298
8093
|
return envHome;
|
|
8299
8094
|
}
|
|
8300
|
-
return
|
|
8095
|
+
return path22.join(os2.homedir(), ".agentv");
|
|
8301
8096
|
}
|
|
8302
8097
|
function getWorkspacesRoot() {
|
|
8303
|
-
return
|
|
8098
|
+
return path22.join(getAgentvHome(), "workspaces");
|
|
8304
8099
|
}
|
|
8305
8100
|
function getSubagentsRoot() {
|
|
8306
|
-
return
|
|
8101
|
+
return path22.join(getAgentvHome(), "subagents");
|
|
8307
8102
|
}
|
|
8308
8103
|
function getTraceStateRoot() {
|
|
8309
|
-
return
|
|
8104
|
+
return path22.join(getAgentvHome(), "trace-state");
|
|
8310
8105
|
}
|
|
8311
8106
|
function getWorkspacePoolRoot() {
|
|
8312
|
-
return
|
|
8107
|
+
return path22.join(getAgentvHome(), "workspace-pool");
|
|
8313
8108
|
}
|
|
8314
8109
|
|
|
8315
8110
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -8317,7 +8112,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
8317
8112
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
8318
8113
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
8319
8114
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
8320
|
-
return
|
|
8115
|
+
return path23.join(getSubagentsRoot(), folder);
|
|
8321
8116
|
}
|
|
8322
8117
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
8323
8118
|
|
|
@@ -8384,11 +8179,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8384
8179
|
await raceSpawnError(child);
|
|
8385
8180
|
return true;
|
|
8386
8181
|
}
|
|
8387
|
-
const aliveFile =
|
|
8182
|
+
const aliveFile = path24.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
8388
8183
|
await removeIfExists(aliveFile);
|
|
8389
|
-
const githubAgentsDir =
|
|
8184
|
+
const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
|
|
8390
8185
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
8391
|
-
const wakeupDst =
|
|
8186
|
+
const wakeupDst = path24.join(githubAgentsDir, "wakeup.md");
|
|
8392
8187
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
8393
8188
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
8394
8189
|
label: "open-workspace"
|
|
@@ -8401,7 +8196,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8401
8196
|
"chat",
|
|
8402
8197
|
"-m",
|
|
8403
8198
|
wakeupChatId,
|
|
8404
|
-
`create a file named .alive in the ${
|
|
8199
|
+
`create a file named .alive in the ${path24.basename(subagentDir)} folder`
|
|
8405
8200
|
];
|
|
8406
8201
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
8407
8202
|
await raceSpawnError(wakeupChild);
|
|
@@ -8416,10 +8211,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8416
8211
|
return true;
|
|
8417
8212
|
}
|
|
8418
8213
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
8419
|
-
const workspacePath =
|
|
8420
|
-
const messagesDir =
|
|
8214
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
8215
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
8421
8216
|
await mkdir8(messagesDir, { recursive: true });
|
|
8422
|
-
const reqFile =
|
|
8217
|
+
const reqFile = path24.join(messagesDir, `${timestamp}_req.md`);
|
|
8423
8218
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
8424
8219
|
const reqUri = pathToFileUri2(reqFile);
|
|
8425
8220
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -8427,16 +8222,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8427
8222
|
chatArgs.push("-a", attachment);
|
|
8428
8223
|
}
|
|
8429
8224
|
chatArgs.push("-a", reqFile);
|
|
8430
|
-
chatArgs.push(`Follow instructions in [${
|
|
8225
|
+
chatArgs.push(`Follow instructions in [${path24.basename(reqFile)}](${reqUri})`);
|
|
8431
8226
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8432
8227
|
workspacePath,
|
|
8433
|
-
|
|
8228
|
+
path24.basename(subagentDir),
|
|
8434
8229
|
subagentDir,
|
|
8435
8230
|
vscodeCmd
|
|
8436
8231
|
);
|
|
8437
8232
|
if (!workspaceReady) {
|
|
8438
8233
|
throw new Error(
|
|
8439
|
-
`VS Code workspace '${
|
|
8234
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8440
8235
|
);
|
|
8441
8236
|
}
|
|
8442
8237
|
await sleep2(500);
|
|
@@ -8444,8 +8239,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8444
8239
|
await raceSpawnError(child);
|
|
8445
8240
|
}
|
|
8446
8241
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
8447
|
-
const workspacePath =
|
|
8448
|
-
const messagesDir =
|
|
8242
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
8243
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
8449
8244
|
await mkdir8(messagesDir, { recursive: true });
|
|
8450
8245
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
8451
8246
|
for (const attachment of attachmentPaths) {
|
|
@@ -8454,13 +8249,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8454
8249
|
chatArgs.push(chatInstruction);
|
|
8455
8250
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8456
8251
|
workspacePath,
|
|
8457
|
-
|
|
8252
|
+
path24.basename(subagentDir),
|
|
8458
8253
|
subagentDir,
|
|
8459
8254
|
vscodeCmd
|
|
8460
8255
|
);
|
|
8461
8256
|
if (!workspaceReady) {
|
|
8462
8257
|
throw new Error(
|
|
8463
|
-
`VS Code workspace '${
|
|
8258
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8464
8259
|
);
|
|
8465
8260
|
}
|
|
8466
8261
|
await sleep2(500);
|
|
@@ -8469,11 +8264,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8469
8264
|
}
|
|
8470
8265
|
|
|
8471
8266
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
8472
|
-
import { copyFile, mkdir as mkdir9, readFile as
|
|
8473
|
-
import
|
|
8267
|
+
import { copyFile, mkdir as mkdir9, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
8268
|
+
import path26 from "node:path";
|
|
8474
8269
|
|
|
8475
8270
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
8476
|
-
import
|
|
8271
|
+
import path25 from "node:path";
|
|
8477
8272
|
import JSON5 from "json5";
|
|
8478
8273
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
8479
8274
|
let workspace;
|
|
@@ -8490,10 +8285,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8490
8285
|
}
|
|
8491
8286
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
8492
8287
|
const folderPath = folder.path;
|
|
8493
|
-
if (
|
|
8288
|
+
if (path25.isAbsolute(folderPath)) {
|
|
8494
8289
|
return folder;
|
|
8495
8290
|
}
|
|
8496
|
-
const absolutePath =
|
|
8291
|
+
const absolutePath = path25.resolve(templateDir, folderPath);
|
|
8497
8292
|
return {
|
|
8498
8293
|
...folder,
|
|
8499
8294
|
path: absolutePath
|
|
@@ -8515,19 +8310,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8515
8310
|
if (locationMap && typeof locationMap === "object") {
|
|
8516
8311
|
const transformedMap = {};
|
|
8517
8312
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
8518
|
-
const isAbsolute =
|
|
8313
|
+
const isAbsolute = path25.isAbsolute(locationPath);
|
|
8519
8314
|
if (isAbsolute) {
|
|
8520
8315
|
transformedMap[locationPath] = value;
|
|
8521
8316
|
} else {
|
|
8522
8317
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
8523
8318
|
if (firstGlobIndex === -1) {
|
|
8524
|
-
const resolvedPath =
|
|
8319
|
+
const resolvedPath = path25.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
8525
8320
|
transformedMap[resolvedPath] = value;
|
|
8526
8321
|
} else {
|
|
8527
8322
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
8528
8323
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
8529
8324
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
8530
|
-
const resolvedPath = (
|
|
8325
|
+
const resolvedPath = (path25.resolve(templateDir, basePath) + patternPath).replace(
|
|
8531
8326
|
/\\/g,
|
|
8532
8327
|
"/"
|
|
8533
8328
|
);
|
|
@@ -8568,7 +8363,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8568
8363
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
8569
8364
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
8570
8365
|
for (const subagent of subagents) {
|
|
8571
|
-
const lockFile =
|
|
8366
|
+
const lockFile = path26.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
8572
8367
|
if (!await pathExists(lockFile)) {
|
|
8573
8368
|
return subagent.absolutePath;
|
|
8574
8369
|
}
|
|
@@ -8578,7 +8373,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8578
8373
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
8579
8374
|
let workspaceContent;
|
|
8580
8375
|
if (workspaceTemplate) {
|
|
8581
|
-
const workspaceSrc =
|
|
8376
|
+
const workspaceSrc = path26.resolve(workspaceTemplate);
|
|
8582
8377
|
if (!await pathExists(workspaceSrc)) {
|
|
8583
8378
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
8584
8379
|
}
|
|
@@ -8586,18 +8381,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8586
8381
|
if (!stats.isFile()) {
|
|
8587
8382
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
8588
8383
|
}
|
|
8589
|
-
const templateText = await
|
|
8384
|
+
const templateText = await readFile8(workspaceSrc, "utf8");
|
|
8590
8385
|
workspaceContent = JSON.parse(templateText);
|
|
8591
8386
|
} else {
|
|
8592
8387
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
8593
8388
|
}
|
|
8594
|
-
const workspaceName = `${
|
|
8595
|
-
const workspaceDst =
|
|
8596
|
-
const templateDir = workspaceTemplate ?
|
|
8389
|
+
const workspaceName = `${path26.basename(subagentDir)}.code-workspace`;
|
|
8390
|
+
const workspaceDst = path26.join(subagentDir, workspaceName);
|
|
8391
|
+
const templateDir = workspaceTemplate ? path26.dirname(path26.resolve(workspaceTemplate)) : subagentDir;
|
|
8597
8392
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
8598
8393
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
8599
8394
|
if (cwd) {
|
|
8600
|
-
const absCwd =
|
|
8395
|
+
const absCwd = path26.resolve(cwd);
|
|
8601
8396
|
const parsed = JSON.parse(transformedContent);
|
|
8602
8397
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
8603
8398
|
if (!alreadyPresent) {
|
|
@@ -8606,35 +8401,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8606
8401
|
}
|
|
8607
8402
|
}
|
|
8608
8403
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
8609
|
-
const messagesDir =
|
|
8404
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
8610
8405
|
await mkdir9(messagesDir, { recursive: true });
|
|
8611
8406
|
return { workspace: workspaceDst, messagesDir };
|
|
8612
8407
|
}
|
|
8613
8408
|
async function createSubagentLock(subagentDir) {
|
|
8614
|
-
const messagesDir =
|
|
8409
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
8615
8410
|
if (await pathExists(messagesDir)) {
|
|
8616
8411
|
const files = await readdir2(messagesDir);
|
|
8617
8412
|
await Promise.all(
|
|
8618
8413
|
files.map(async (file) => {
|
|
8619
|
-
const target =
|
|
8414
|
+
const target = path26.join(messagesDir, file);
|
|
8620
8415
|
await removeIfExists(target);
|
|
8621
8416
|
})
|
|
8622
8417
|
);
|
|
8623
8418
|
}
|
|
8624
|
-
const githubAgentsDir =
|
|
8419
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
8625
8420
|
if (await pathExists(githubAgentsDir)) {
|
|
8626
8421
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
8627
8422
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
8628
8423
|
await Promise.all(
|
|
8629
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
8424
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path26.join(githubAgentsDir, file)))
|
|
8630
8425
|
);
|
|
8631
8426
|
}
|
|
8632
|
-
const lockFile =
|
|
8427
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8633
8428
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
8634
8429
|
return lockFile;
|
|
8635
8430
|
}
|
|
8636
8431
|
async function removeSubagentLock(subagentDir) {
|
|
8637
|
-
const lockFile =
|
|
8432
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8638
8433
|
await removeIfExists(lockFile);
|
|
8639
8434
|
}
|
|
8640
8435
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -8654,9 +8449,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
8654
8449
|
return 1;
|
|
8655
8450
|
}
|
|
8656
8451
|
if (promptFile) {
|
|
8657
|
-
const githubAgentsDir =
|
|
8452
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
8658
8453
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
8659
|
-
const agentFile =
|
|
8454
|
+
const agentFile = path26.join(githubAgentsDir, `${chatId}.md`);
|
|
8660
8455
|
try {
|
|
8661
8456
|
await copyFile(promptFile, agentFile);
|
|
8662
8457
|
} catch (error) {
|
|
@@ -8675,7 +8470,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
8675
8470
|
if (!promptFile) {
|
|
8676
8471
|
return void 0;
|
|
8677
8472
|
}
|
|
8678
|
-
const resolvedPrompt =
|
|
8473
|
+
const resolvedPrompt = path27.resolve(promptFile);
|
|
8679
8474
|
if (!await pathExists(resolvedPrompt)) {
|
|
8680
8475
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
8681
8476
|
}
|
|
@@ -8691,7 +8486,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
8691
8486
|
}
|
|
8692
8487
|
const resolved = [];
|
|
8693
8488
|
for (const attachment of extraAttachments) {
|
|
8694
|
-
const resolvedPath =
|
|
8489
|
+
const resolvedPath = path27.resolve(attachment);
|
|
8695
8490
|
if (!await pathExists(resolvedPath)) {
|
|
8696
8491
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
8697
8492
|
}
|
|
@@ -8733,7 +8528,7 @@ async function dispatchAgentSession(options) {
|
|
|
8733
8528
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8734
8529
|
};
|
|
8735
8530
|
}
|
|
8736
|
-
const subagentName =
|
|
8531
|
+
const subagentName = path27.basename(subagentDir);
|
|
8737
8532
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8738
8533
|
const preparationResult = await prepareSubagentDirectory(
|
|
8739
8534
|
subagentDir,
|
|
@@ -8761,9 +8556,9 @@ async function dispatchAgentSession(options) {
|
|
|
8761
8556
|
};
|
|
8762
8557
|
}
|
|
8763
8558
|
const timestamp = generateTimestamp();
|
|
8764
|
-
const messagesDir =
|
|
8765
|
-
const responseFileTmp =
|
|
8766
|
-
const responseFileFinal =
|
|
8559
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8560
|
+
const responseFileTmp = path27.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
8561
|
+
const responseFileFinal = path27.join(messagesDir, `${timestamp}_res.md`);
|
|
8767
8562
|
const requestInstructions = createRequestPrompt(
|
|
8768
8563
|
userQuery,
|
|
8769
8564
|
responseFileTmp,
|
|
@@ -8868,7 +8663,7 @@ async function dispatchBatchAgent(options) {
|
|
|
8868
8663
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8869
8664
|
};
|
|
8870
8665
|
}
|
|
8871
|
-
subagentName =
|
|
8666
|
+
subagentName = path27.basename(subagentDir);
|
|
8872
8667
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8873
8668
|
const preparationResult = await prepareSubagentDirectory(
|
|
8874
8669
|
subagentDir,
|
|
@@ -8899,17 +8694,17 @@ async function dispatchBatchAgent(options) {
|
|
|
8899
8694
|
};
|
|
8900
8695
|
}
|
|
8901
8696
|
const timestamp = generateTimestamp();
|
|
8902
|
-
const messagesDir =
|
|
8697
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8903
8698
|
requestFiles = userQueries.map(
|
|
8904
|
-
(_, index) =>
|
|
8699
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
8905
8700
|
);
|
|
8906
8701
|
const responseTmpFiles = userQueries.map(
|
|
8907
|
-
(_, index) =>
|
|
8702
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
8908
8703
|
);
|
|
8909
8704
|
responseFilesFinal = userQueries.map(
|
|
8910
|
-
(_, index) =>
|
|
8705
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
8911
8706
|
);
|
|
8912
|
-
const orchestratorFile =
|
|
8707
|
+
const orchestratorFile = path27.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
8913
8708
|
if (!dryRun) {
|
|
8914
8709
|
await Promise.all(
|
|
8915
8710
|
userQueries.map((query, index) => {
|
|
@@ -8995,7 +8790,7 @@ async function dispatchBatchAgent(options) {
|
|
|
8995
8790
|
|
|
8996
8791
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
8997
8792
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
8998
|
-
import
|
|
8793
|
+
import path28 from "node:path";
|
|
8999
8794
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
9000
8795
|
folders: [
|
|
9001
8796
|
{
|
|
@@ -9026,7 +8821,7 @@ async function provisionSubagents(options) {
|
|
|
9026
8821
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
9027
8822
|
throw new Error("subagents must be a positive integer");
|
|
9028
8823
|
}
|
|
9029
|
-
const targetPath =
|
|
8824
|
+
const targetPath = path28.resolve(targetRoot);
|
|
9030
8825
|
if (!dryRun) {
|
|
9031
8826
|
await ensureDir(targetPath);
|
|
9032
8827
|
}
|
|
@@ -9046,7 +8841,7 @@ async function provisionSubagents(options) {
|
|
|
9046
8841
|
continue;
|
|
9047
8842
|
}
|
|
9048
8843
|
highestNumber = Math.max(highestNumber, parsed);
|
|
9049
|
-
const lockFile =
|
|
8844
|
+
const lockFile = path28.join(entry.absolutePath, lockName);
|
|
9050
8845
|
const locked = await pathExists(lockFile);
|
|
9051
8846
|
if (locked) {
|
|
9052
8847
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -9063,10 +8858,10 @@ async function provisionSubagents(options) {
|
|
|
9063
8858
|
break;
|
|
9064
8859
|
}
|
|
9065
8860
|
const subagentDir = subagent.absolutePath;
|
|
9066
|
-
const githubAgentsDir =
|
|
9067
|
-
const lockFile =
|
|
9068
|
-
const workspaceDst =
|
|
9069
|
-
const wakeupDst =
|
|
8861
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
8862
|
+
const lockFile = path28.join(subagentDir, lockName);
|
|
8863
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
8864
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
9070
8865
|
const isLocked = await pathExists(lockFile);
|
|
9071
8866
|
if (isLocked && !force) {
|
|
9072
8867
|
continue;
|
|
@@ -9104,10 +8899,10 @@ async function provisionSubagents(options) {
|
|
|
9104
8899
|
let nextIndex = highestNumber;
|
|
9105
8900
|
while (subagentsProvisioned < subagents) {
|
|
9106
8901
|
nextIndex += 1;
|
|
9107
|
-
const subagentDir =
|
|
9108
|
-
const githubAgentsDir =
|
|
9109
|
-
const workspaceDst =
|
|
9110
|
-
const wakeupDst =
|
|
8902
|
+
const subagentDir = path28.join(targetPath, `subagent-${nextIndex}`);
|
|
8903
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
8904
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
8905
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
9111
8906
|
if (!dryRun) {
|
|
9112
8907
|
await ensureDir(subagentDir);
|
|
9113
8908
|
await ensureDir(githubAgentsDir);
|
|
@@ -9174,7 +8969,7 @@ var VSCodeProvider = class {
|
|
|
9174
8969
|
}
|
|
9175
8970
|
await this.ensureEnvironmentReady();
|
|
9176
8971
|
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
9177
|
-
const promptContent = buildPromptDocument2(request, inputFiles
|
|
8972
|
+
const promptContent = buildPromptDocument2(request, inputFiles);
|
|
9178
8973
|
const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
|
|
9179
8974
|
const startTime = Date.now();
|
|
9180
8975
|
const session = await dispatchAgentSession({
|
|
@@ -9228,7 +9023,7 @@ var VSCodeProvider = class {
|
|
|
9228
9023
|
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
9229
9024
|
);
|
|
9230
9025
|
const userQueries = normalizedRequests.map(
|
|
9231
|
-
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles
|
|
9026
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
|
|
9232
9027
|
);
|
|
9233
9028
|
const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
|
|
9234
9029
|
this.config.workspaceTemplate
|
|
@@ -9297,7 +9092,7 @@ var VSCodeProvider = class {
|
|
|
9297
9092
|
async function locateVSCodeExecutable(candidate) {
|
|
9298
9093
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
9299
9094
|
if (includesPathSeparator) {
|
|
9300
|
-
const resolved =
|
|
9095
|
+
const resolved = path29.isAbsolute(candidate) ? candidate : path29.resolve(candidate);
|
|
9301
9096
|
try {
|
|
9302
9097
|
await access3(resolved, constants3.F_OK);
|
|
9303
9098
|
return resolved;
|
|
@@ -9326,41 +9121,35 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
9326
9121
|
return void 0;
|
|
9327
9122
|
}
|
|
9328
9123
|
try {
|
|
9329
|
-
const stats = await stat4(
|
|
9124
|
+
const stats = await stat4(path29.resolve(template));
|
|
9330
9125
|
return stats.isFile() ? template : void 0;
|
|
9331
9126
|
} catch {
|
|
9332
9127
|
return template;
|
|
9333
9128
|
}
|
|
9334
9129
|
}
|
|
9335
|
-
function buildPromptDocument2(request, attachments
|
|
9130
|
+
function buildPromptDocument2(request, attachments) {
|
|
9336
9131
|
const parts = [];
|
|
9337
9132
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
9338
9133
|
parts.push(request.systemPrompt.trim());
|
|
9339
9134
|
}
|
|
9340
|
-
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
9341
9135
|
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
9342
|
-
const
|
|
9343
|
-
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
9136
|
+
const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
|
|
9344
9137
|
if (prereadBlock.length > 0) {
|
|
9345
9138
|
parts.push("\n", prereadBlock);
|
|
9346
9139
|
}
|
|
9347
9140
|
parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
|
|
9348
9141
|
return parts.join("\n").trim();
|
|
9349
9142
|
}
|
|
9350
|
-
function buildMandatoryPrereadBlock2(
|
|
9351
|
-
if (
|
|
9143
|
+
function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
9144
|
+
if (attachmentFiles.length === 0) {
|
|
9352
9145
|
return "";
|
|
9353
9146
|
}
|
|
9354
9147
|
const buildList = (files) => files.map((absolutePath) => {
|
|
9355
|
-
const fileName =
|
|
9148
|
+
const fileName = path29.basename(absolutePath);
|
|
9356
9149
|
const fileUri = pathToFileUri3(absolutePath);
|
|
9357
9150
|
return `* [${fileName}](${fileUri})`;
|
|
9358
9151
|
});
|
|
9359
9152
|
const sections = [];
|
|
9360
|
-
if (guidelineFiles.length > 0) {
|
|
9361
|
-
sections.push(`Read all guideline files:
|
|
9362
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
9363
|
-
}
|
|
9364
9153
|
if (attachmentFiles.length > 0) {
|
|
9365
9154
|
sections.push(`Read all attachment files:
|
|
9366
9155
|
${buildList(attachmentFiles).join("\n")}.`);
|
|
@@ -9371,29 +9160,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
|
|
|
9371
9160
|
);
|
|
9372
9161
|
return sections.join("\n");
|
|
9373
9162
|
}
|
|
9374
|
-
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
9375
|
-
if (!attachments || attachments.length === 0) {
|
|
9376
|
-
return [];
|
|
9377
|
-
}
|
|
9378
|
-
const unique = /* @__PURE__ */ new Map();
|
|
9379
|
-
for (const attachment of attachments) {
|
|
9380
|
-
const absolutePath = path30.resolve(attachment);
|
|
9381
|
-
const normalized = absolutePath.split(path30.sep).join("/");
|
|
9382
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
9383
|
-
if (!unique.has(absolutePath)) {
|
|
9384
|
-
unique.set(absolutePath, absolutePath);
|
|
9385
|
-
}
|
|
9386
|
-
}
|
|
9387
|
-
}
|
|
9388
|
-
return Array.from(unique.values());
|
|
9389
|
-
}
|
|
9390
9163
|
function collectAttachmentFiles(attachments) {
|
|
9391
9164
|
if (!attachments || attachments.length === 0) {
|
|
9392
9165
|
return [];
|
|
9393
9166
|
}
|
|
9394
9167
|
const unique = /* @__PURE__ */ new Map();
|
|
9395
9168
|
for (const attachment of attachments) {
|
|
9396
|
-
const absolutePath =
|
|
9169
|
+
const absolutePath = path29.resolve(attachment);
|
|
9397
9170
|
if (!unique.has(absolutePath)) {
|
|
9398
9171
|
unique.set(absolutePath, absolutePath);
|
|
9399
9172
|
}
|
|
@@ -9401,7 +9174,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9401
9174
|
return Array.from(unique.values());
|
|
9402
9175
|
}
|
|
9403
9176
|
function pathToFileUri3(filePath) {
|
|
9404
|
-
const absolutePath =
|
|
9177
|
+
const absolutePath = path29.isAbsolute(filePath) ? filePath : path29.resolve(filePath);
|
|
9405
9178
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9406
9179
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9407
9180
|
return `file:///${normalizedPath}`;
|
|
@@ -9414,7 +9187,7 @@ function normalizeAttachments(attachments) {
|
|
|
9414
9187
|
}
|
|
9415
9188
|
const deduped = /* @__PURE__ */ new Set();
|
|
9416
9189
|
for (const attachment of attachments) {
|
|
9417
|
-
deduped.add(
|
|
9190
|
+
deduped.add(path29.resolve(attachment));
|
|
9418
9191
|
}
|
|
9419
9192
|
return Array.from(deduped);
|
|
9420
9193
|
}
|
|
@@ -9423,7 +9196,7 @@ function mergeAttachments(all) {
|
|
|
9423
9196
|
for (const list of all) {
|
|
9424
9197
|
if (!list) continue;
|
|
9425
9198
|
for (const inputFile of list) {
|
|
9426
|
-
deduped.add(
|
|
9199
|
+
deduped.add(path29.resolve(inputFile));
|
|
9427
9200
|
}
|
|
9428
9201
|
}
|
|
9429
9202
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -9471,8 +9244,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
9471
9244
|
|
|
9472
9245
|
// src/evaluation/providers/targets-file.ts
|
|
9473
9246
|
import { constants as constants4 } from "node:fs";
|
|
9474
|
-
import { access as access4, readFile as
|
|
9475
|
-
import
|
|
9247
|
+
import { access as access4, readFile as readFile9 } from "node:fs/promises";
|
|
9248
|
+
import path30 from "node:path";
|
|
9476
9249
|
import { parse as parse4 } from "yaml";
|
|
9477
9250
|
function isRecord(value) {
|
|
9478
9251
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -9509,11 +9282,11 @@ async function fileExists3(filePath) {
|
|
|
9509
9282
|
}
|
|
9510
9283
|
}
|
|
9511
9284
|
async function readTargetDefinitions(filePath) {
|
|
9512
|
-
const absolutePath =
|
|
9285
|
+
const absolutePath = path30.resolve(filePath);
|
|
9513
9286
|
if (!await fileExists3(absolutePath)) {
|
|
9514
9287
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
9515
9288
|
}
|
|
9516
|
-
const raw = await
|
|
9289
|
+
const raw = await readFile9(absolutePath, "utf8");
|
|
9517
9290
|
const parsed = parse4(raw);
|
|
9518
9291
|
if (!isRecord(parsed)) {
|
|
9519
9292
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -9529,16 +9302,16 @@ function listTargetNames(definitions) {
|
|
|
9529
9302
|
}
|
|
9530
9303
|
|
|
9531
9304
|
// src/evaluation/providers/provider-discovery.ts
|
|
9532
|
-
import
|
|
9305
|
+
import path31 from "node:path";
|
|
9533
9306
|
import fg from "fast-glob";
|
|
9534
9307
|
async function discoverProviders(registry, baseDir) {
|
|
9535
9308
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
9536
9309
|
const candidateDirs = [];
|
|
9537
|
-
let dir =
|
|
9538
|
-
const root =
|
|
9310
|
+
let dir = path31.resolve(baseDir);
|
|
9311
|
+
const root = path31.parse(dir).root;
|
|
9539
9312
|
while (dir !== root) {
|
|
9540
|
-
candidateDirs.push(
|
|
9541
|
-
dir =
|
|
9313
|
+
candidateDirs.push(path31.join(dir, ".agentv", "providers"));
|
|
9314
|
+
dir = path31.dirname(dir);
|
|
9542
9315
|
}
|
|
9543
9316
|
let files = [];
|
|
9544
9317
|
for (const providersDir of candidateDirs) {
|
|
@@ -9554,7 +9327,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9554
9327
|
}
|
|
9555
9328
|
const discoveredKinds = [];
|
|
9556
9329
|
for (const filePath of files) {
|
|
9557
|
-
const basename =
|
|
9330
|
+
const basename = path31.basename(filePath);
|
|
9558
9331
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
9559
9332
|
if (registry.has(kindName)) {
|
|
9560
9333
|
continue;
|
|
@@ -9763,15 +9536,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
9763
9536
|
});
|
|
9764
9537
|
}
|
|
9765
9538
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
9766
|
-
const { mkdir: mkdir15, readFile:
|
|
9539
|
+
const { mkdir: mkdir15, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
9767
9540
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
9768
|
-
const
|
|
9541
|
+
const path44 = await import("node:path");
|
|
9769
9542
|
const { randomUUID: randomUUID9 } = await import("node:crypto");
|
|
9770
|
-
const dir =
|
|
9543
|
+
const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
9771
9544
|
await mkdir15(dir, { recursive: true });
|
|
9772
|
-
const stdinPath =
|
|
9773
|
-
const stdoutPath =
|
|
9774
|
-
const stderrPath =
|
|
9545
|
+
const stdinPath = path44.join(dir, "stdin.txt");
|
|
9546
|
+
const stdoutPath = path44.join(dir, "stdout.txt");
|
|
9547
|
+
const stderrPath = path44.join(dir, "stderr.txt");
|
|
9775
9548
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
9776
9549
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
9777
9550
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -9801,8 +9574,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
9801
9574
|
resolve(code ?? 0);
|
|
9802
9575
|
});
|
|
9803
9576
|
});
|
|
9804
|
-
const stdout = (await
|
|
9805
|
-
const stderr = (await
|
|
9577
|
+
const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9578
|
+
const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9806
9579
|
return { stdout, stderr, exitCode };
|
|
9807
9580
|
} finally {
|
|
9808
9581
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -10119,10 +9892,7 @@ var CodeEvaluator = class {
|
|
|
10119
9892
|
outputText: context.candidate,
|
|
10120
9893
|
output: outputForPayload,
|
|
10121
9894
|
outputPath,
|
|
10122
|
-
|
|
10123
|
-
inputFiles: context.evalCase.file_paths.filter(
|
|
10124
|
-
(path45) => !context.evalCase.guideline_paths.includes(path45)
|
|
10125
|
-
),
|
|
9895
|
+
inputFiles: context.evalCase.file_paths,
|
|
10126
9896
|
input: context.evalCase.input,
|
|
10127
9897
|
trace: context.trace ?? null,
|
|
10128
9898
|
tokenUsage: context.tokenUsage ?? null,
|
|
@@ -10253,7 +10023,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
10253
10023
|
|
|
10254
10024
|
// src/evaluation/evaluators/llm-grader.ts
|
|
10255
10025
|
import fs2 from "node:fs/promises";
|
|
10256
|
-
import
|
|
10026
|
+
import path32 from "node:path";
|
|
10257
10027
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
10258
10028
|
import { z as z3 } from "zod";
|
|
10259
10029
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -11085,8 +10855,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11085
10855
|
};
|
|
11086
10856
|
}
|
|
11087
10857
|
function resolveSandboxed(basePath, relativePath) {
|
|
11088
|
-
const resolved =
|
|
11089
|
-
if (!resolved.startsWith(basePath +
|
|
10858
|
+
const resolved = path32.resolve(basePath, relativePath);
|
|
10859
|
+
if (!resolved.startsWith(basePath + path32.sep) && resolved !== basePath) {
|
|
11090
10860
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
11091
10861
|
}
|
|
11092
10862
|
return resolved;
|
|
@@ -11176,11 +10946,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11176
10946
|
for (const entry of entries) {
|
|
11177
10947
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
11178
10948
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
11179
|
-
const fullPath =
|
|
10949
|
+
const fullPath = path32.join(dirPath, entry.name);
|
|
11180
10950
|
if (entry.isDirectory()) {
|
|
11181
10951
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
11182
10952
|
} else if (entry.isFile()) {
|
|
11183
|
-
const ext =
|
|
10953
|
+
const ext = path32.extname(entry.name).toLowerCase();
|
|
11184
10954
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
11185
10955
|
try {
|
|
11186
10956
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -11192,7 +10962,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11192
10962
|
regex.lastIndex = 0;
|
|
11193
10963
|
if (regex.test(lines[i])) {
|
|
11194
10964
|
matches.push({
|
|
11195
|
-
file:
|
|
10965
|
+
file: path32.relative(workspacePath, fullPath),
|
|
11196
10966
|
line: i + 1,
|
|
11197
10967
|
text: lines[i].substring(0, 200)
|
|
11198
10968
|
});
|
|
@@ -11827,115 +11597,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
11827
11597
|
* Evaluate a single field against the expected value.
|
|
11828
11598
|
*/
|
|
11829
11599
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
11830
|
-
const { path:
|
|
11831
|
-
const candidateValue = resolvePath(candidateData,
|
|
11832
|
-
const expectedValue = resolvePath(expectedData,
|
|
11600
|
+
const { path: path44, match, required = true, weight = 1 } = fieldConfig;
|
|
11601
|
+
const candidateValue = resolvePath(candidateData, path44);
|
|
11602
|
+
const expectedValue = resolvePath(expectedData, path44);
|
|
11833
11603
|
if (expectedValue === void 0) {
|
|
11834
11604
|
return {
|
|
11835
|
-
path:
|
|
11605
|
+
path: path44,
|
|
11836
11606
|
score: 1,
|
|
11837
11607
|
// No expected value means no comparison needed
|
|
11838
11608
|
weight,
|
|
11839
11609
|
hit: true,
|
|
11840
|
-
message: `${
|
|
11610
|
+
message: `${path44}: no expected value`
|
|
11841
11611
|
};
|
|
11842
11612
|
}
|
|
11843
11613
|
if (candidateValue === void 0) {
|
|
11844
11614
|
if (required) {
|
|
11845
11615
|
return {
|
|
11846
|
-
path:
|
|
11616
|
+
path: path44,
|
|
11847
11617
|
score: 0,
|
|
11848
11618
|
weight,
|
|
11849
11619
|
hit: false,
|
|
11850
|
-
message: `${
|
|
11620
|
+
message: `${path44} (required, missing)`
|
|
11851
11621
|
};
|
|
11852
11622
|
}
|
|
11853
11623
|
return {
|
|
11854
|
-
path:
|
|
11624
|
+
path: path44,
|
|
11855
11625
|
score: 1,
|
|
11856
11626
|
// Don't penalize missing optional fields
|
|
11857
11627
|
weight: 0,
|
|
11858
11628
|
// Zero weight means it won't affect the score
|
|
11859
11629
|
hit: true,
|
|
11860
|
-
message: `${
|
|
11630
|
+
message: `${path44}: optional field missing`
|
|
11861
11631
|
};
|
|
11862
11632
|
}
|
|
11863
11633
|
switch (match) {
|
|
11864
11634
|
case "exact":
|
|
11865
|
-
return this.compareExact(
|
|
11635
|
+
return this.compareExact(path44, candidateValue, expectedValue, weight);
|
|
11866
11636
|
case "numeric_tolerance":
|
|
11867
11637
|
return this.compareNumericTolerance(
|
|
11868
|
-
|
|
11638
|
+
path44,
|
|
11869
11639
|
candidateValue,
|
|
11870
11640
|
expectedValue,
|
|
11871
11641
|
fieldConfig,
|
|
11872
11642
|
weight
|
|
11873
11643
|
);
|
|
11874
11644
|
case "date":
|
|
11875
|
-
return this.compareDate(
|
|
11645
|
+
return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
|
|
11876
11646
|
default:
|
|
11877
11647
|
return {
|
|
11878
|
-
path:
|
|
11648
|
+
path: path44,
|
|
11879
11649
|
score: 0,
|
|
11880
11650
|
weight,
|
|
11881
11651
|
hit: false,
|
|
11882
|
-
message: `${
|
|
11652
|
+
message: `${path44}: unknown match type "${match}"`
|
|
11883
11653
|
};
|
|
11884
11654
|
}
|
|
11885
11655
|
}
|
|
11886
11656
|
/**
|
|
11887
11657
|
* Exact equality comparison.
|
|
11888
11658
|
*/
|
|
11889
|
-
compareExact(
|
|
11659
|
+
compareExact(path44, candidateValue, expectedValue, weight) {
|
|
11890
11660
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
11891
11661
|
return {
|
|
11892
|
-
path:
|
|
11662
|
+
path: path44,
|
|
11893
11663
|
score: 1,
|
|
11894
11664
|
weight,
|
|
11895
11665
|
hit: true,
|
|
11896
|
-
message:
|
|
11666
|
+
message: path44
|
|
11897
11667
|
};
|
|
11898
11668
|
}
|
|
11899
11669
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
11900
11670
|
return {
|
|
11901
|
-
path:
|
|
11671
|
+
path: path44,
|
|
11902
11672
|
score: 0,
|
|
11903
11673
|
weight,
|
|
11904
11674
|
hit: false,
|
|
11905
|
-
message: `${
|
|
11675
|
+
message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
11906
11676
|
};
|
|
11907
11677
|
}
|
|
11908
11678
|
return {
|
|
11909
|
-
path:
|
|
11679
|
+
path: path44,
|
|
11910
11680
|
score: 0,
|
|
11911
11681
|
weight,
|
|
11912
11682
|
hit: false,
|
|
11913
|
-
message: `${
|
|
11683
|
+
message: `${path44} (value mismatch)`
|
|
11914
11684
|
};
|
|
11915
11685
|
}
|
|
11916
11686
|
/**
|
|
11917
11687
|
* Numeric comparison with absolute or relative tolerance.
|
|
11918
11688
|
*/
|
|
11919
|
-
compareNumericTolerance(
|
|
11689
|
+
compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
11920
11690
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
11921
11691
|
const candidateNum = toNumber(candidateValue);
|
|
11922
11692
|
const expectedNum = toNumber(expectedValue);
|
|
11923
11693
|
if (candidateNum === null || expectedNum === null) {
|
|
11924
11694
|
return {
|
|
11925
|
-
path:
|
|
11695
|
+
path: path44,
|
|
11926
11696
|
score: 0,
|
|
11927
11697
|
weight,
|
|
11928
11698
|
hit: false,
|
|
11929
|
-
message: `${
|
|
11699
|
+
message: `${path44} (non-numeric value)`
|
|
11930
11700
|
};
|
|
11931
11701
|
}
|
|
11932
11702
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
11933
11703
|
return {
|
|
11934
|
-
path:
|
|
11704
|
+
path: path44,
|
|
11935
11705
|
score: 0,
|
|
11936
11706
|
weight,
|
|
11937
11707
|
hit: false,
|
|
11938
|
-
message: `${
|
|
11708
|
+
message: `${path44} (invalid numeric value)`
|
|
11939
11709
|
};
|
|
11940
11710
|
}
|
|
11941
11711
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -11948,61 +11718,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
11948
11718
|
}
|
|
11949
11719
|
if (withinTolerance) {
|
|
11950
11720
|
return {
|
|
11951
|
-
path:
|
|
11721
|
+
path: path44,
|
|
11952
11722
|
score: 1,
|
|
11953
11723
|
weight,
|
|
11954
11724
|
hit: true,
|
|
11955
|
-
message: `${
|
|
11725
|
+
message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
|
|
11956
11726
|
};
|
|
11957
11727
|
}
|
|
11958
11728
|
return {
|
|
11959
|
-
path:
|
|
11729
|
+
path: path44,
|
|
11960
11730
|
score: 0,
|
|
11961
11731
|
weight,
|
|
11962
11732
|
hit: false,
|
|
11963
|
-
message: `${
|
|
11733
|
+
message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
11964
11734
|
};
|
|
11965
11735
|
}
|
|
11966
11736
|
/**
|
|
11967
11737
|
* Date comparison with format normalization.
|
|
11968
11738
|
*/
|
|
11969
|
-
compareDate(
|
|
11739
|
+
compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
11970
11740
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
11971
11741
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
11972
11742
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
11973
11743
|
if (candidateDate === null) {
|
|
11974
11744
|
return {
|
|
11975
|
-
path:
|
|
11745
|
+
path: path44,
|
|
11976
11746
|
score: 0,
|
|
11977
11747
|
weight,
|
|
11978
11748
|
hit: false,
|
|
11979
|
-
message: `${
|
|
11749
|
+
message: `${path44} (unparseable candidate date)`
|
|
11980
11750
|
};
|
|
11981
11751
|
}
|
|
11982
11752
|
if (expectedDate === null) {
|
|
11983
11753
|
return {
|
|
11984
|
-
path:
|
|
11754
|
+
path: path44,
|
|
11985
11755
|
score: 0,
|
|
11986
11756
|
weight,
|
|
11987
11757
|
hit: false,
|
|
11988
|
-
message: `${
|
|
11758
|
+
message: `${path44} (unparseable expected date)`
|
|
11989
11759
|
};
|
|
11990
11760
|
}
|
|
11991
11761
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
11992
11762
|
return {
|
|
11993
|
-
path:
|
|
11763
|
+
path: path44,
|
|
11994
11764
|
score: 1,
|
|
11995
11765
|
weight,
|
|
11996
11766
|
hit: true,
|
|
11997
|
-
message:
|
|
11767
|
+
message: path44
|
|
11998
11768
|
};
|
|
11999
11769
|
}
|
|
12000
11770
|
return {
|
|
12001
|
-
path:
|
|
11771
|
+
path: path44,
|
|
12002
11772
|
score: 0,
|
|
12003
11773
|
weight,
|
|
12004
11774
|
hit: false,
|
|
12005
|
-
message: `${
|
|
11775
|
+
message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12006
11776
|
};
|
|
12007
11777
|
}
|
|
12008
11778
|
/**
|
|
@@ -12035,11 +11805,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12035
11805
|
};
|
|
12036
11806
|
}
|
|
12037
11807
|
};
|
|
12038
|
-
function resolvePath(obj,
|
|
12039
|
-
if (!
|
|
11808
|
+
function resolvePath(obj, path44) {
|
|
11809
|
+
if (!path44 || !obj) {
|
|
12040
11810
|
return void 0;
|
|
12041
11811
|
}
|
|
12042
|
-
const parts =
|
|
11812
|
+
const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12043
11813
|
let current = obj;
|
|
12044
11814
|
for (const part of parts) {
|
|
12045
11815
|
if (current === null || current === void 0) {
|
|
@@ -12499,8 +12269,8 @@ var TokenUsageEvaluator = class {
|
|
|
12499
12269
|
};
|
|
12500
12270
|
|
|
12501
12271
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12502
|
-
function getNestedValue(obj,
|
|
12503
|
-
const parts =
|
|
12272
|
+
function getNestedValue(obj, path44) {
|
|
12273
|
+
const parts = path44.split(".");
|
|
12504
12274
|
let current = obj;
|
|
12505
12275
|
for (const part of parts) {
|
|
12506
12276
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -13122,8 +12892,8 @@ function runEqualsAssertion(output, value) {
|
|
|
13122
12892
|
// src/evaluation/orchestrator.ts
|
|
13123
12893
|
import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
|
|
13124
12894
|
import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
13125
|
-
import
|
|
13126
|
-
import
|
|
12895
|
+
import path41 from "node:path";
|
|
12896
|
+
import micromatch3 from "micromatch";
|
|
13127
12897
|
|
|
13128
12898
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
13129
12899
|
var Node = class {
|
|
@@ -13336,7 +13106,7 @@ var InlineAssertEvaluator = class {
|
|
|
13336
13106
|
};
|
|
13337
13107
|
|
|
13338
13108
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
13339
|
-
import
|
|
13109
|
+
import path33 from "node:path";
|
|
13340
13110
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
13341
13111
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
13342
13112
|
if (!context) {
|
|
@@ -13371,10 +13141,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13371
13141
|
expectedOutput: context.evalCase.expected_output,
|
|
13372
13142
|
outputText: context.candidate,
|
|
13373
13143
|
output: context.output ?? null,
|
|
13374
|
-
|
|
13375
|
-
inputFiles: context.evalCase.file_paths.filter(
|
|
13376
|
-
(p) => !context.evalCase.guideline_paths.includes(p)
|
|
13377
|
-
),
|
|
13144
|
+
inputFiles: context.evalCase.file_paths,
|
|
13378
13145
|
input: context.evalCase.input,
|
|
13379
13146
|
trace: context.trace ?? null,
|
|
13380
13147
|
fileChanges: context.fileChanges ?? null,
|
|
@@ -13385,7 +13152,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13385
13152
|
};
|
|
13386
13153
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13387
13154
|
const scriptPath = script[script.length - 1];
|
|
13388
|
-
const cwd =
|
|
13155
|
+
const cwd = path33.dirname(scriptPath);
|
|
13389
13156
|
try {
|
|
13390
13157
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
13391
13158
|
const prompt = stdout.trim();
|
|
@@ -13657,16 +13424,16 @@ function createBuiltinRegistry() {
|
|
|
13657
13424
|
}
|
|
13658
13425
|
|
|
13659
13426
|
// src/evaluation/registry/assertion-discovery.ts
|
|
13660
|
-
import
|
|
13427
|
+
import path34 from "node:path";
|
|
13661
13428
|
import fg2 from "fast-glob";
|
|
13662
13429
|
async function discoverAssertions(registry, baseDir) {
|
|
13663
13430
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13664
13431
|
const candidateDirs = [];
|
|
13665
|
-
let dir =
|
|
13666
|
-
const root =
|
|
13432
|
+
let dir = path34.resolve(baseDir);
|
|
13433
|
+
const root = path34.parse(dir).root;
|
|
13667
13434
|
while (dir !== root) {
|
|
13668
|
-
candidateDirs.push(
|
|
13669
|
-
dir =
|
|
13435
|
+
candidateDirs.push(path34.join(dir, ".agentv", "assertions"));
|
|
13436
|
+
dir = path34.dirname(dir);
|
|
13670
13437
|
}
|
|
13671
13438
|
let files = [];
|
|
13672
13439
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -13682,7 +13449,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13682
13449
|
}
|
|
13683
13450
|
const discoveredTypes = [];
|
|
13684
13451
|
for (const filePath of files) {
|
|
13685
|
-
const basename =
|
|
13452
|
+
const basename = path34.basename(filePath);
|
|
13686
13453
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13687
13454
|
if (registry.has(typeName)) {
|
|
13688
13455
|
continue;
|
|
@@ -13700,17 +13467,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13700
13467
|
}
|
|
13701
13468
|
|
|
13702
13469
|
// src/evaluation/registry/grader-discovery.ts
|
|
13703
|
-
import
|
|
13470
|
+
import path35 from "node:path";
|
|
13704
13471
|
import fg3 from "fast-glob";
|
|
13705
13472
|
async function discoverGraders(registry, baseDir) {
|
|
13706
13473
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13707
13474
|
const candidateDirs = [];
|
|
13708
|
-
let dir =
|
|
13709
|
-
const root =
|
|
13475
|
+
let dir = path35.resolve(baseDir);
|
|
13476
|
+
const root = path35.parse(dir).root;
|
|
13710
13477
|
while (dir !== root) {
|
|
13711
|
-
candidateDirs.push(
|
|
13712
|
-
candidateDirs.push(
|
|
13713
|
-
dir =
|
|
13478
|
+
candidateDirs.push(path35.join(dir, ".agentv", "graders"));
|
|
13479
|
+
candidateDirs.push(path35.join(dir, ".agentv", "judges"));
|
|
13480
|
+
dir = path35.dirname(dir);
|
|
13714
13481
|
}
|
|
13715
13482
|
let files = [];
|
|
13716
13483
|
for (const gradersDir of candidateDirs) {
|
|
@@ -13726,7 +13493,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
13726
13493
|
}
|
|
13727
13494
|
const discoveredTypes = [];
|
|
13728
13495
|
for (const filePath of files) {
|
|
13729
|
-
const basename =
|
|
13496
|
+
const basename = path35.basename(filePath);
|
|
13730
13497
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13731
13498
|
if (registry.has(typeName)) {
|
|
13732
13499
|
continue;
|
|
@@ -13886,7 +13653,7 @@ function getTCritical(df) {
|
|
|
13886
13653
|
// src/evaluation/workspace/file-changes.ts
|
|
13887
13654
|
import { exec as execCallback } from "node:child_process";
|
|
13888
13655
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
13889
|
-
import
|
|
13656
|
+
import path36 from "node:path";
|
|
13890
13657
|
import { promisify as promisify4 } from "node:util";
|
|
13891
13658
|
var execAsync4 = promisify4(execCallback);
|
|
13892
13659
|
function gitExecOpts(workspacePath) {
|
|
@@ -13920,10 +13687,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13920
13687
|
}
|
|
13921
13688
|
for (const entry of entries) {
|
|
13922
13689
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
13923
|
-
const childPath =
|
|
13690
|
+
const childPath = path36.join(workspacePath, entry);
|
|
13924
13691
|
try {
|
|
13925
13692
|
if (!statSync(childPath).isDirectory()) continue;
|
|
13926
|
-
if (!statSync(
|
|
13693
|
+
if (!statSync(path36.join(childPath, ".git")).isDirectory()) continue;
|
|
13927
13694
|
} catch {
|
|
13928
13695
|
continue;
|
|
13929
13696
|
}
|
|
@@ -13934,7 +13701,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13934
13701
|
|
|
13935
13702
|
// src/evaluation/workspace/manager.ts
|
|
13936
13703
|
import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
13937
|
-
import
|
|
13704
|
+
import path37 from "node:path";
|
|
13938
13705
|
var TemplateNotFoundError = class extends Error {
|
|
13939
13706
|
constructor(templatePath) {
|
|
13940
13707
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -13964,14 +13731,14 @@ async function isDirectory(filePath) {
|
|
|
13964
13731
|
}
|
|
13965
13732
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
13966
13733
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
13967
|
-
return
|
|
13734
|
+
return path37.join(root, evalRunId, caseId);
|
|
13968
13735
|
}
|
|
13969
13736
|
async function copyDirectoryRecursive(src, dest) {
|
|
13970
13737
|
await mkdir11(dest, { recursive: true });
|
|
13971
13738
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
13972
13739
|
for (const entry of entries) {
|
|
13973
|
-
const srcPath =
|
|
13974
|
-
const destPath =
|
|
13740
|
+
const srcPath = path37.join(src, entry.name);
|
|
13741
|
+
const destPath = path37.join(dest, entry.name);
|
|
13975
13742
|
if (entry.name === ".git") {
|
|
13976
13743
|
continue;
|
|
13977
13744
|
}
|
|
@@ -13983,7 +13750,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
13983
13750
|
}
|
|
13984
13751
|
}
|
|
13985
13752
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
13986
|
-
const resolvedTemplatePath =
|
|
13753
|
+
const resolvedTemplatePath = path37.resolve(templatePath);
|
|
13987
13754
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
13988
13755
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
13989
13756
|
}
|
|
@@ -14032,7 +13799,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
14032
13799
|
}
|
|
14033
13800
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
14034
13801
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14035
|
-
const evalDir =
|
|
13802
|
+
const evalDir = path37.join(root, evalRunId);
|
|
14036
13803
|
if (await fileExists(evalDir)) {
|
|
14037
13804
|
await rm4(evalDir, { recursive: true, force: true });
|
|
14038
13805
|
}
|
|
@@ -14042,8 +13809,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
14042
13809
|
import { execFile } from "node:child_process";
|
|
14043
13810
|
import { createHash } from "node:crypto";
|
|
14044
13811
|
import { existsSync as existsSync2 } from "node:fs";
|
|
14045
|
-
import { cp as cp2, mkdir as mkdir12, readFile as
|
|
14046
|
-
import
|
|
13812
|
+
import { cp as cp2, mkdir as mkdir12, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
13813
|
+
import path38 from "node:path";
|
|
14047
13814
|
import { promisify as promisify5 } from "node:util";
|
|
14048
13815
|
var execFileAsync = promisify5(execFile);
|
|
14049
13816
|
function gitEnv() {
|
|
@@ -14097,8 +13864,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
14097
13864
|
await mkdir12(dest, { recursive: true });
|
|
14098
13865
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
14099
13866
|
for (const entry of entries) {
|
|
14100
|
-
const srcPath =
|
|
14101
|
-
const destPath =
|
|
13867
|
+
const srcPath = path38.join(src, entry.name);
|
|
13868
|
+
const destPath = path38.join(dest, entry.name);
|
|
14102
13869
|
if (entry.name === ".git") {
|
|
14103
13870
|
continue;
|
|
14104
13871
|
}
|
|
@@ -14131,7 +13898,7 @@ var WorkspacePoolManager = class {
|
|
|
14131
13898
|
async acquireWorkspace(options) {
|
|
14132
13899
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
14133
13900
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
14134
|
-
const poolDir =
|
|
13901
|
+
const poolDir = path38.join(this.poolRoot, fingerprint);
|
|
14135
13902
|
await mkdir12(poolDir, { recursive: true });
|
|
14136
13903
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
14137
13904
|
if (drifted) {
|
|
@@ -14141,7 +13908,7 @@ var WorkspacePoolManager = class {
|
|
|
14141
13908
|
await this.removeAllSlots(poolDir);
|
|
14142
13909
|
}
|
|
14143
13910
|
for (let i = 0; i < maxSlots; i++) {
|
|
14144
|
-
const slotPath =
|
|
13911
|
+
const slotPath = path38.join(poolDir, `slot-${i}`);
|
|
14145
13912
|
const lockPath = `${slotPath}.lock`;
|
|
14146
13913
|
const locked = await this.tryLock(lockPath);
|
|
14147
13914
|
if (!locked) {
|
|
@@ -14203,7 +13970,7 @@ var WorkspacePoolManager = class {
|
|
|
14203
13970
|
throw err;
|
|
14204
13971
|
}
|
|
14205
13972
|
try {
|
|
14206
|
-
const pidStr = await
|
|
13973
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
14207
13974
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14208
13975
|
if (!Number.isNaN(pid)) {
|
|
14209
13976
|
try {
|
|
@@ -14228,9 +13995,9 @@ var WorkspacePoolManager = class {
|
|
|
14228
13995
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
14229
13996
|
*/
|
|
14230
13997
|
async checkDrift(poolDir, fingerprint) {
|
|
14231
|
-
const metadataPath =
|
|
13998
|
+
const metadataPath = path38.join(poolDir, "metadata.json");
|
|
14232
13999
|
try {
|
|
14233
|
-
const raw = await
|
|
14000
|
+
const raw = await readFile10(metadataPath, "utf-8");
|
|
14234
14001
|
const metadata = JSON.parse(raw);
|
|
14235
14002
|
return metadata.fingerprint !== fingerprint;
|
|
14236
14003
|
} catch {
|
|
@@ -14245,17 +14012,17 @@ var WorkspacePoolManager = class {
|
|
|
14245
14012
|
repos,
|
|
14246
14013
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14247
14014
|
};
|
|
14248
|
-
await writeFile7(
|
|
14015
|
+
await writeFile7(path38.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
14249
14016
|
}
|
|
14250
14017
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
14251
14018
|
async removeAllSlots(poolDir) {
|
|
14252
14019
|
const entries = await readdir4(poolDir);
|
|
14253
14020
|
for (const entry of entries) {
|
|
14254
14021
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
14255
|
-
const lockPath =
|
|
14022
|
+
const lockPath = path38.join(poolDir, `${entry}.lock`);
|
|
14256
14023
|
if (existsSync2(lockPath)) {
|
|
14257
14024
|
try {
|
|
14258
|
-
const pidStr = await
|
|
14025
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
14259
14026
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14260
14027
|
if (!Number.isNaN(pid)) {
|
|
14261
14028
|
try {
|
|
@@ -14268,12 +14035,12 @@ var WorkspacePoolManager = class {
|
|
|
14268
14035
|
} catch {
|
|
14269
14036
|
}
|
|
14270
14037
|
}
|
|
14271
|
-
await rm5(
|
|
14038
|
+
await rm5(path38.join(poolDir, entry), { recursive: true, force: true });
|
|
14272
14039
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
14273
14040
|
});
|
|
14274
14041
|
}
|
|
14275
14042
|
}
|
|
14276
|
-
await rm5(
|
|
14043
|
+
await rm5(path38.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
14277
14044
|
});
|
|
14278
14045
|
}
|
|
14279
14046
|
/**
|
|
@@ -14283,7 +14050,7 @@ var WorkspacePoolManager = class {
|
|
|
14283
14050
|
*/
|
|
14284
14051
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
14285
14052
|
for (const repo of repos) {
|
|
14286
|
-
const repoDir =
|
|
14053
|
+
const repoDir = path38.join(slotPath, repo.path);
|
|
14287
14054
|
if (!existsSync2(repoDir)) {
|
|
14288
14055
|
continue;
|
|
14289
14056
|
}
|
|
@@ -14310,7 +14077,7 @@ var WorkspacePoolManager = class {
|
|
|
14310
14077
|
// src/evaluation/workspace/repo-manager.ts
|
|
14311
14078
|
import { execFile as execFile2 } from "node:child_process";
|
|
14312
14079
|
import { existsSync as existsSync3 } from "node:fs";
|
|
14313
|
-
import
|
|
14080
|
+
import path39 from "node:path";
|
|
14314
14081
|
import { promisify as promisify6 } from "node:util";
|
|
14315
14082
|
var execFileAsync2 = promisify6(execFile2);
|
|
14316
14083
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -14410,7 +14177,7 @@ ${lines.join("\n")}`;
|
|
|
14410
14177
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
14411
14178
|
*/
|
|
14412
14179
|
async materialize(repo, workspacePath) {
|
|
14413
|
-
const targetDir =
|
|
14180
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
14414
14181
|
const sourceUrl = getSourceUrl(repo.source);
|
|
14415
14182
|
const startedAt = Date.now();
|
|
14416
14183
|
if (this.verbose) {
|
|
@@ -14501,7 +14268,7 @@ ${lines.join("\n")}`;
|
|
|
14501
14268
|
async reset(repos, workspacePath, reset) {
|
|
14502
14269
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
14503
14270
|
for (const repo of repos) {
|
|
14504
|
-
const targetDir =
|
|
14271
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
14505
14272
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
14506
14273
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
14507
14274
|
}
|
|
@@ -14510,16 +14277,16 @@ ${lines.join("\n")}`;
|
|
|
14510
14277
|
|
|
14511
14278
|
// src/evaluation/workspace/resolve.ts
|
|
14512
14279
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
14513
|
-
import
|
|
14280
|
+
import path40 from "node:path";
|
|
14514
14281
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
14515
14282
|
if (!templatePath) {
|
|
14516
14283
|
return void 0;
|
|
14517
14284
|
}
|
|
14518
|
-
const resolved =
|
|
14285
|
+
const resolved = path40.resolve(templatePath);
|
|
14519
14286
|
const stats = await stat6(resolved);
|
|
14520
14287
|
if (stats.isFile()) {
|
|
14521
14288
|
return {
|
|
14522
|
-
dir:
|
|
14289
|
+
dir: path40.dirname(resolved),
|
|
14523
14290
|
workspaceFile: resolved
|
|
14524
14291
|
};
|
|
14525
14292
|
}
|
|
@@ -14531,14 +14298,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
14531
14298
|
if (workspaceFiles.length === 1) {
|
|
14532
14299
|
return {
|
|
14533
14300
|
dir: resolved,
|
|
14534
|
-
workspaceFile:
|
|
14301
|
+
workspaceFile: path40.join(resolved, workspaceFiles[0])
|
|
14535
14302
|
};
|
|
14536
14303
|
}
|
|
14537
14304
|
if (workspaceFiles.length > 1) {
|
|
14538
14305
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
14539
14306
|
return {
|
|
14540
14307
|
dir: resolved,
|
|
14541
|
-
workspaceFile: conventionFile ?
|
|
14308
|
+
workspaceFile: conventionFile ? path40.join(resolved, conventionFile) : void 0
|
|
14542
14309
|
};
|
|
14543
14310
|
}
|
|
14544
14311
|
return { dir: resolved };
|
|
@@ -14742,7 +14509,7 @@ async function runEvaluation(options) {
|
|
|
14742
14509
|
];
|
|
14743
14510
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
14744
14511
|
const typeRegistry = createBuiltinRegistry();
|
|
14745
|
-
const discoveryBaseDir = evalFilePath ?
|
|
14512
|
+
const discoveryBaseDir = evalFilePath ? path41.dirname(path41.resolve(evalFilePath)) : process.cwd();
|
|
14746
14513
|
const evalDir = discoveryBaseDir;
|
|
14747
14514
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
14748
14515
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -14851,14 +14618,22 @@ async function runEvaluation(options) {
|
|
|
14851
14618
|
const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
|
|
14852
14619
|
const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
|
|
14853
14620
|
const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
|
|
14854
|
-
const
|
|
14855
|
-
const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
|
|
14621
|
+
const workers = options.maxConcurrency ?? target.workers ?? 1;
|
|
14856
14622
|
setupLog(
|
|
14857
|
-
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool}
|
|
14623
|
+
`sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} workers=${workers}`
|
|
14858
14624
|
);
|
|
14859
|
-
if (hasSharedWorkspace && !usePool &&
|
|
14625
|
+
if (hasSharedWorkspace && !usePool && workers > 1) {
|
|
14860
14626
|
console.warn(
|
|
14861
|
-
|
|
14627
|
+
[
|
|
14628
|
+
`Warning: This eval uses a shared workspace with ${workers} workers.`,
|
|
14629
|
+
"If the agent under test makes file edits, concurrent runs may corrupt each other.",
|
|
14630
|
+
"To limit concurrency, add this to your eval YAML:",
|
|
14631
|
+
"",
|
|
14632
|
+
" execution:",
|
|
14633
|
+
" workers: 1",
|
|
14634
|
+
"",
|
|
14635
|
+
"Or pass --workers 1 on the command line."
|
|
14636
|
+
].join("\n")
|
|
14862
14637
|
);
|
|
14863
14638
|
}
|
|
14864
14639
|
const limit = pLimit(workers);
|
|
@@ -14931,7 +14706,7 @@ async function runEvaluation(options) {
|
|
|
14931
14706
|
}
|
|
14932
14707
|
try {
|
|
14933
14708
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
14934
|
-
const copiedWorkspaceFile =
|
|
14709
|
+
const copiedWorkspaceFile = path41.join(sharedWorkspacePath, path41.basename(suiteWorkspaceFile));
|
|
14935
14710
|
try {
|
|
14936
14711
|
await stat7(copiedWorkspaceFile);
|
|
14937
14712
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -15041,7 +14816,7 @@ async function runEvaluation(options) {
|
|
|
15041
14816
|
const budgetResult = {
|
|
15042
14817
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15043
14818
|
testId: evalCase.id,
|
|
15044
|
-
|
|
14819
|
+
eval_set: evalCase.eval_set,
|
|
15045
14820
|
score: 0,
|
|
15046
14821
|
assertions: [],
|
|
15047
14822
|
output: [],
|
|
@@ -15077,7 +14852,7 @@ async function runEvaluation(options) {
|
|
|
15077
14852
|
const haltResult = {
|
|
15078
14853
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15079
14854
|
testId: evalCase.id,
|
|
15080
|
-
|
|
14855
|
+
eval_set: evalCase.eval_set,
|
|
15081
14856
|
score: 0,
|
|
15082
14857
|
assertions: [],
|
|
15083
14858
|
output: [],
|
|
@@ -15309,8 +15084,6 @@ async function runBatchEvaluation(options) {
|
|
|
15309
15084
|
const promptInputs = promptInputsList[index];
|
|
15310
15085
|
return {
|
|
15311
15086
|
question: promptInputs.question,
|
|
15312
|
-
guidelines: promptInputs.guidelines,
|
|
15313
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
15314
15087
|
inputFiles: evalCase.file_paths,
|
|
15315
15088
|
evalCaseId: evalCase.id,
|
|
15316
15089
|
metadata: {
|
|
@@ -15508,7 +15281,7 @@ async function runEvalCase(options) {
|
|
|
15508
15281
|
);
|
|
15509
15282
|
}
|
|
15510
15283
|
if (caseWorkspaceFile && workspacePath) {
|
|
15511
|
-
const copiedFile =
|
|
15284
|
+
const copiedFile = path41.join(workspacePath, path41.basename(caseWorkspaceFile));
|
|
15512
15285
|
try {
|
|
15513
15286
|
await stat7(copiedFile);
|
|
15514
15287
|
caseWorkspaceFile = copiedFile;
|
|
@@ -15568,10 +15341,10 @@ async function runEvalCase(options) {
|
|
|
15568
15341
|
const files = evalCase.metadata.agent_skills_files;
|
|
15569
15342
|
if (baseDir && files.length > 0) {
|
|
15570
15343
|
for (const relPath of files) {
|
|
15571
|
-
const srcPath =
|
|
15572
|
-
const destPath =
|
|
15344
|
+
const srcPath = path41.resolve(baseDir, relPath);
|
|
15345
|
+
const destPath = path41.resolve(workspacePath, relPath);
|
|
15573
15346
|
try {
|
|
15574
|
-
await mkdir13(
|
|
15347
|
+
await mkdir13(path41.dirname(destPath), { recursive: true });
|
|
15575
15348
|
await copyFile2(srcPath, destPath);
|
|
15576
15349
|
} catch (error) {
|
|
15577
15350
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -16018,8 +15791,7 @@ async function evaluateCandidate(options) {
|
|
|
16018
15791
|
let lmRequest;
|
|
16019
15792
|
if (isAgentProvider(provider)) {
|
|
16020
15793
|
agentRequest = {
|
|
16021
|
-
question: promptInputs.question
|
|
16022
|
-
guideline_paths: evalCase.guideline_paths
|
|
15794
|
+
question: promptInputs.question
|
|
16023
15795
|
};
|
|
16024
15796
|
} else {
|
|
16025
15797
|
if (promptInputs.chatPrompt) {
|
|
@@ -16028,8 +15800,7 @@ async function evaluateCandidate(options) {
|
|
|
16028
15800
|
};
|
|
16029
15801
|
} else {
|
|
16030
15802
|
lmRequest = {
|
|
16031
|
-
question: promptInputs.question
|
|
16032
|
-
guidelines: promptInputs.guidelines
|
|
15803
|
+
question: promptInputs.question
|
|
16033
15804
|
};
|
|
16034
15805
|
}
|
|
16035
15806
|
}
|
|
@@ -16043,7 +15814,7 @@ async function evaluateCandidate(options) {
|
|
|
16043
15814
|
return {
|
|
16044
15815
|
timestamp: completedAt.toISOString(),
|
|
16045
15816
|
testId: evalCase.id,
|
|
16046
|
-
|
|
15817
|
+
eval_set: evalCase.eval_set,
|
|
16047
15818
|
conversationId: evalCase.conversation_id,
|
|
16048
15819
|
score: score.score,
|
|
16049
15820
|
assertions: score.assertions,
|
|
@@ -16191,7 +15962,7 @@ async function runEvaluatorList(options) {
|
|
|
16191
15962
|
fileChanges,
|
|
16192
15963
|
workspacePath
|
|
16193
15964
|
};
|
|
16194
|
-
const evalFileDir = evalCase.
|
|
15965
|
+
const evalFileDir = evalCase.file_paths[0] ? path41.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
16195
15966
|
const dispatchContext = {
|
|
16196
15967
|
graderProvider,
|
|
16197
15968
|
targetResolver,
|
|
@@ -16305,7 +16076,7 @@ function filterEvalCases(evalCases, filter) {
|
|
|
16305
16076
|
if (!filter) {
|
|
16306
16077
|
return evalCases;
|
|
16307
16078
|
}
|
|
16308
|
-
return evalCases.filter((evalCase) =>
|
|
16079
|
+
return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter));
|
|
16309
16080
|
}
|
|
16310
16081
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
16311
16082
|
const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
|
|
@@ -16342,8 +16113,6 @@ async function invokeProvider(provider, options) {
|
|
|
16342
16113
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
16343
16114
|
return await provider.invoke({
|
|
16344
16115
|
question: promptInputs.question,
|
|
16345
|
-
guidelines: promptInputs.guidelines,
|
|
16346
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
16347
16116
|
chatPrompt: promptInputs.chatPrompt,
|
|
16348
16117
|
inputFiles: evalCase.file_paths,
|
|
16349
16118
|
evalCaseId: evalCase.id,
|
|
@@ -16371,21 +16140,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16371
16140
|
if (isAgentProvider(provider)) {
|
|
16372
16141
|
agentRequest = {
|
|
16373
16142
|
question: promptInputs.question,
|
|
16374
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16375
16143
|
error: message
|
|
16376
16144
|
};
|
|
16377
16145
|
} else {
|
|
16378
16146
|
if (promptInputs.chatPrompt) {
|
|
16379
16147
|
lmRequest = {
|
|
16380
16148
|
chat_prompt: promptInputs.chatPrompt,
|
|
16381
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16382
16149
|
error: message
|
|
16383
16150
|
};
|
|
16384
16151
|
} else {
|
|
16385
16152
|
lmRequest = {
|
|
16386
16153
|
question: promptInputs.question,
|
|
16387
|
-
guidelines: promptInputs.guidelines,
|
|
16388
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16389
16154
|
error: message
|
|
16390
16155
|
};
|
|
16391
16156
|
}
|
|
@@ -16398,7 +16163,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16398
16163
|
return {
|
|
16399
16164
|
timestamp: timestamp.toISOString(),
|
|
16400
16165
|
testId: evalCase.id,
|
|
16401
|
-
|
|
16166
|
+
eval_set: evalCase.eval_set,
|
|
16402
16167
|
conversationId: evalCase.conversation_id,
|
|
16403
16168
|
score: 0,
|
|
16404
16169
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
@@ -16431,7 +16196,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
|
16431
16196
|
hash.update(target.name);
|
|
16432
16197
|
hash.update(evalCase.id);
|
|
16433
16198
|
hash.update(promptInputs.question);
|
|
16434
|
-
hash.update(promptInputs.guidelines);
|
|
16435
16199
|
hash.update(promptInputs.systemMessage ?? "");
|
|
16436
16200
|
if (promptInputs.chatPrompt) {
|
|
16437
16201
|
hash.update(JSON.stringify(promptInputs.chatPrompt));
|
|
@@ -16531,7 +16295,7 @@ function computeWeightedMean(entries) {
|
|
|
16531
16295
|
|
|
16532
16296
|
// src/evaluation/evaluate.ts
|
|
16533
16297
|
import { existsSync as existsSync4 } from "node:fs";
|
|
16534
|
-
import
|
|
16298
|
+
import path42 from "node:path";
|
|
16535
16299
|
|
|
16536
16300
|
// src/evaluation/providers/function-provider.ts
|
|
16537
16301
|
function createFunctionProvider(taskFn) {
|
|
@@ -16568,7 +16332,7 @@ async function evaluate(config) {
|
|
|
16568
16332
|
}
|
|
16569
16333
|
const gitRoot = await findGitRoot(process.cwd());
|
|
16570
16334
|
const repoRoot = gitRoot ?? process.cwd();
|
|
16571
|
-
const testFilePath = config.specFile ?
|
|
16335
|
+
const testFilePath = config.specFile ? path42.resolve(config.specFile) : path42.join(process.cwd(), "__programmatic__.yaml");
|
|
16572
16336
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
16573
16337
|
let resolvedTarget;
|
|
16574
16338
|
let taskProvider;
|
|
@@ -16634,8 +16398,6 @@ async function evaluate(config) {
|
|
|
16634
16398
|
input_segments: inputSegments,
|
|
16635
16399
|
expected_output: expectedOutput,
|
|
16636
16400
|
reference_answer: expectedOutputValue,
|
|
16637
|
-
guideline_paths: [],
|
|
16638
|
-
guideline_patterns: [],
|
|
16639
16401
|
file_paths: [],
|
|
16640
16402
|
assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
|
|
16641
16403
|
metadata: test.metadata
|
|
@@ -16697,10 +16459,10 @@ function computeSummary(results, durationMs) {
|
|
|
16697
16459
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
16698
16460
|
async function discoverDefaultTarget(repoRoot) {
|
|
16699
16461
|
const cwd = process.cwd();
|
|
16700
|
-
const chain = buildDirectoryChain(
|
|
16462
|
+
const chain = buildDirectoryChain(path42.join(cwd, "_placeholder"), repoRoot);
|
|
16701
16463
|
for (const dir of chain) {
|
|
16702
16464
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16703
|
-
const targetsPath =
|
|
16465
|
+
const targetsPath = path42.join(dir, candidate);
|
|
16704
16466
|
if (!existsSync4(targetsPath)) continue;
|
|
16705
16467
|
try {
|
|
16706
16468
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -16717,7 +16479,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
16717
16479
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
16718
16480
|
const envFiles = [];
|
|
16719
16481
|
for (const dir of chain) {
|
|
16720
|
-
const envPath =
|
|
16482
|
+
const envPath = path42.join(dir, ".env");
|
|
16721
16483
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
16722
16484
|
}
|
|
16723
16485
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -16898,8 +16660,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
16898
16660
|
}
|
|
16899
16661
|
|
|
16900
16662
|
// src/evaluation/cache/response-cache.ts
|
|
16901
|
-
import { mkdir as mkdir14, readFile as
|
|
16902
|
-
import
|
|
16663
|
+
import { mkdir as mkdir14, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
|
|
16664
|
+
import path43 from "node:path";
|
|
16903
16665
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
16904
16666
|
var ResponseCache = class {
|
|
16905
16667
|
cachePath;
|
|
@@ -16909,7 +16671,7 @@ var ResponseCache = class {
|
|
|
16909
16671
|
async get(key) {
|
|
16910
16672
|
const filePath = this.keyToPath(key);
|
|
16911
16673
|
try {
|
|
16912
|
-
const data = await
|
|
16674
|
+
const data = await readFile11(filePath, "utf8");
|
|
16913
16675
|
return JSON.parse(data);
|
|
16914
16676
|
} catch {
|
|
16915
16677
|
return void 0;
|
|
@@ -16917,13 +16679,13 @@ var ResponseCache = class {
|
|
|
16917
16679
|
}
|
|
16918
16680
|
async set(key, value) {
|
|
16919
16681
|
const filePath = this.keyToPath(key);
|
|
16920
|
-
const dir =
|
|
16682
|
+
const dir = path43.dirname(filePath);
|
|
16921
16683
|
await mkdir14(dir, { recursive: true });
|
|
16922
16684
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
16923
16685
|
}
|
|
16924
16686
|
keyToPath(key) {
|
|
16925
16687
|
const prefix = key.slice(0, 2);
|
|
16926
|
-
return
|
|
16688
|
+
return path43.join(this.cachePath, prefix, `${key}.json`);
|
|
16927
16689
|
}
|
|
16928
16690
|
};
|
|
16929
16691
|
function shouldEnableCache(params) {
|
|
@@ -17111,7 +16873,7 @@ var OtelTraceExporter = class {
|
|
|
17111
16873
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17112
16874
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
17113
16875
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
17114
|
-
if (result.
|
|
16876
|
+
if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
|
|
17115
16877
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
17116
16878
|
if (captureContent && result.output.length > 0) {
|
|
17117
16879
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -17296,14 +17058,14 @@ var OtelStreamingObserver = class {
|
|
|
17296
17058
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
17297
17059
|
rootCtx = null;
|
|
17298
17060
|
/** Create root eval span immediately (visible in backend right away) */
|
|
17299
|
-
startEvalCase(testId, target,
|
|
17061
|
+
startEvalCase(testId, target, evalSet) {
|
|
17300
17062
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
17301
17063
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
17302
17064
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
17303
17065
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17304
17066
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
17305
17067
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
17306
|
-
if (
|
|
17068
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
|
|
17307
17069
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
17308
17070
|
}
|
|
17309
17071
|
/** Create and immediately export a tool span */
|
|
@@ -17477,6 +17239,7 @@ export {
|
|
|
17477
17239
|
extractTargetsFromSuite,
|
|
17478
17240
|
extractTargetsFromTestCase,
|
|
17479
17241
|
extractTrialsConfig,
|
|
17242
|
+
extractWorkersFromSuite,
|
|
17480
17243
|
fileExists,
|
|
17481
17244
|
findGitRoot,
|
|
17482
17245
|
freeformEvaluationSchema,
|
|
@@ -17491,7 +17254,6 @@ export {
|
|
|
17491
17254
|
initializeBaseline,
|
|
17492
17255
|
isAgentSkillsFormat,
|
|
17493
17256
|
isEvaluatorKind,
|
|
17494
|
-
isGuidelineFile,
|
|
17495
17257
|
isJsonObject,
|
|
17496
17258
|
isJsonValue,
|
|
17497
17259
|
isNonEmptyString,
|