@agentv/core 3.8.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-3ZS3GCMI.js → chunk-PC5TLJF6.js} +1 -2
- package/dist/chunk-PC5TLJF6.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -33
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +2 -33
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +506 -767
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -17
- package/dist/index.d.ts +6 -17
- package/dist/index.js +355 -614
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-3ZS3GCMI.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
readTextFile,
|
|
20
20
|
resolveFileReference,
|
|
21
21
|
resolveTargetDefinition
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-PC5TLJF6.js";
|
|
23
23
|
import {
|
|
24
24
|
AgentvProvider
|
|
25
25
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -149,9 +149,9 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
149
149
|
}
|
|
150
150
|
|
|
151
151
|
// src/evaluation/yaml-parser.ts
|
|
152
|
-
import { readFile as
|
|
153
|
-
import
|
|
154
|
-
import
|
|
152
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
153
|
+
import path7 from "node:path";
|
|
154
|
+
import micromatch2 from "micromatch";
|
|
155
155
|
import { parse as parse2 } from "yaml";
|
|
156
156
|
|
|
157
157
|
// src/evaluation/loaders/agent-skills-parser.ts
|
|
@@ -225,7 +225,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
225
225
|
input_segments: [{ type: "text", value: prompt }],
|
|
226
226
|
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
227
227
|
reference_answer: evalCase.expected_output,
|
|
228
|
-
guideline_paths: [],
|
|
229
228
|
file_paths: filePaths,
|
|
230
229
|
criteria: evalCase.expected_output ?? "",
|
|
231
230
|
assertions,
|
|
@@ -239,7 +238,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
239
238
|
// src/evaluation/loaders/config-loader.ts
|
|
240
239
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
241
240
|
import path3 from "node:path";
|
|
242
|
-
import micromatch from "micromatch";
|
|
243
241
|
import { parse } from "yaml";
|
|
244
242
|
|
|
245
243
|
// src/evaluation/loaders/file-resolver.ts
|
|
@@ -370,15 +368,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
370
368
|
logWarning(`Invalid required_version in ${configPath}, expected string`);
|
|
371
369
|
continue;
|
|
372
370
|
}
|
|
373
|
-
const guidelinePatterns = config.guideline_patterns;
|
|
374
|
-
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
375
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
376
|
-
continue;
|
|
377
|
-
}
|
|
378
|
-
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
379
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
380
|
-
continue;
|
|
381
|
-
}
|
|
382
371
|
const evalPatterns = config.eval_patterns;
|
|
383
372
|
if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
|
|
384
373
|
logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
|
|
@@ -394,7 +383,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
394
383
|
);
|
|
395
384
|
return {
|
|
396
385
|
required_version: requiredVersion,
|
|
397
|
-
guideline_patterns: guidelinePatterns,
|
|
398
386
|
eval_patterns: evalPatterns,
|
|
399
387
|
execution: executionDefaults
|
|
400
388
|
};
|
|
@@ -406,11 +394,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
406
394
|
}
|
|
407
395
|
return null;
|
|
408
396
|
}
|
|
409
|
-
function isGuidelineFile(filePath, patterns) {
|
|
410
|
-
const normalized = filePath.split("\\").join("/");
|
|
411
|
-
const patternsToUse = patterns ?? [];
|
|
412
|
-
return micromatch.isMatch(normalized, patternsToUse);
|
|
413
|
-
}
|
|
414
397
|
function extractTargetFromSuite(suite) {
|
|
415
398
|
const execution = suite.execution;
|
|
416
399
|
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
@@ -1986,7 +1969,7 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
1986
1969
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
1987
1970
|
import { readFile as readFile5 } from "node:fs/promises";
|
|
1988
1971
|
import path6 from "node:path";
|
|
1989
|
-
import
|
|
1972
|
+
import micromatch from "micromatch";
|
|
1990
1973
|
import { parse as parseYaml } from "yaml";
|
|
1991
1974
|
|
|
1992
1975
|
// src/evaluation/loaders/message-processor.ts
|
|
@@ -2013,10 +1996,6 @@ function formatSegment(segment, mode = "lm") {
|
|
|
2013
1996
|
if (type === "text") {
|
|
2014
1997
|
return asString2(segment.value);
|
|
2015
1998
|
}
|
|
2016
|
-
if (type === "guideline_ref") {
|
|
2017
|
-
const refPath = asString2(segment.path);
|
|
2018
|
-
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
2019
|
-
}
|
|
2020
1999
|
if (type === "file") {
|
|
2021
2000
|
const filePath = asString2(segment.path);
|
|
2022
2001
|
if (!filePath) {
|
|
@@ -2039,9 +2018,6 @@ function hasVisibleContent(segments) {
|
|
|
2039
2018
|
const value = asString2(segment.value);
|
|
2040
2019
|
return value !== void 0 && value.trim().length > 0;
|
|
2041
2020
|
}
|
|
2042
|
-
if (type === "guideline_ref") {
|
|
2043
|
-
return false;
|
|
2044
|
-
}
|
|
2045
2021
|
if (type === "file") {
|
|
2046
2022
|
const text = asString2(segment.text);
|
|
2047
2023
|
return text !== void 0 && text.trim().length > 0;
|
|
@@ -2057,17 +2033,7 @@ function asString2(value) {
|
|
|
2057
2033
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2058
2034
|
var ANSI_RESET5 = "\x1B[0m";
|
|
2059
2035
|
async function processMessages(options) {
|
|
2060
|
-
const {
|
|
2061
|
-
messages,
|
|
2062
|
-
searchRoots,
|
|
2063
|
-
repoRootPath,
|
|
2064
|
-
guidelinePatterns,
|
|
2065
|
-
guidelinePaths,
|
|
2066
|
-
treatFileSegmentsAsGuidelines,
|
|
2067
|
-
textParts,
|
|
2068
|
-
messageType,
|
|
2069
|
-
verbose
|
|
2070
|
-
} = options;
|
|
2036
|
+
const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
|
|
2071
2037
|
const segments = [];
|
|
2072
2038
|
for (const message of messages) {
|
|
2073
2039
|
const content = message.content;
|
|
@@ -2111,21 +2077,6 @@ async function processMessages(options) {
|
|
|
2111
2077
|
}
|
|
2112
2078
|
try {
|
|
2113
2079
|
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2114
|
-
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
2115
|
-
messageType,
|
|
2116
|
-
resolvedPath,
|
|
2117
|
-
repoRootPath,
|
|
2118
|
-
guidelinePatterns,
|
|
2119
|
-
treatFileSegmentsAsGuidelines
|
|
2120
|
-
});
|
|
2121
|
-
if (classifyAsGuideline && guidelinePaths) {
|
|
2122
|
-
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
2123
|
-
if (verbose) {
|
|
2124
|
-
console.log(` [Guideline] Found: ${displayPath}`);
|
|
2125
|
-
console.log(` Resolved to: ${resolvedPath}`);
|
|
2126
|
-
}
|
|
2127
|
-
continue;
|
|
2128
|
-
}
|
|
2129
2080
|
segments.push({
|
|
2130
2081
|
type: "file",
|
|
2131
2082
|
path: displayPath,
|
|
@@ -2153,26 +2104,6 @@ async function processMessages(options) {
|
|
|
2153
2104
|
}
|
|
2154
2105
|
return segments;
|
|
2155
2106
|
}
|
|
2156
|
-
function shouldTreatAsGuideline(options) {
|
|
2157
|
-
const {
|
|
2158
|
-
messageType,
|
|
2159
|
-
resolvedPath,
|
|
2160
|
-
repoRootPath,
|
|
2161
|
-
guidelinePatterns,
|
|
2162
|
-
treatFileSegmentsAsGuidelines
|
|
2163
|
-
} = options;
|
|
2164
|
-
if (messageType !== "input") {
|
|
2165
|
-
return false;
|
|
2166
|
-
}
|
|
2167
|
-
if (treatFileSegmentsAsGuidelines) {
|
|
2168
|
-
return true;
|
|
2169
|
-
}
|
|
2170
|
-
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2171
|
-
return false;
|
|
2172
|
-
}
|
|
2173
|
-
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
2174
|
-
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
2175
|
-
}
|
|
2176
2107
|
function asString3(value) {
|
|
2177
2108
|
return typeof value === "string" ? value : void 0;
|
|
2178
2109
|
}
|
|
@@ -2373,7 +2304,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
2373
2304
|
}
|
|
2374
2305
|
return {
|
|
2375
2306
|
description: asString4(parsed.description),
|
|
2376
|
-
|
|
2307
|
+
name: asString4(parsed.name),
|
|
2377
2308
|
execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
|
|
2378
2309
|
evaluator: parsed.evaluator
|
|
2379
2310
|
};
|
|
@@ -2409,20 +2340,18 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2409
2340
|
const absoluteTestPath = path6.resolve(evalFilePath);
|
|
2410
2341
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2411
2342
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2412
|
-
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
2413
|
-
const guidelinePatterns = config?.guideline_patterns;
|
|
2414
2343
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
2415
2344
|
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
2416
2345
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
2417
|
-
const
|
|
2418
|
-
const
|
|
2346
|
+
const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
2347
|
+
const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
|
|
2419
2348
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
2420
2349
|
const globalExecution = sidecar.execution;
|
|
2421
2350
|
if (verbose) {
|
|
2422
2351
|
console.log(`
|
|
2423
2352
|
[JSONL Dataset: ${evalFilePath}]`);
|
|
2424
2353
|
console.log(` Cases: ${rawCases.length}`);
|
|
2425
|
-
console.log(`
|
|
2354
|
+
console.log(` Eval set: ${evalSetName}`);
|
|
2426
2355
|
if (sidecar.description) {
|
|
2427
2356
|
console.log(` Description: ${sidecar.description}`);
|
|
2428
2357
|
}
|
|
@@ -2432,7 +2361,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2432
2361
|
const evalcase = rawCases[lineIndex];
|
|
2433
2362
|
const lineNumber = lineIndex + 1;
|
|
2434
2363
|
const id = asString4(evalcase.id);
|
|
2435
|
-
if (filterPattern && (!id || !
|
|
2364
|
+
if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
|
|
2436
2365
|
continue;
|
|
2437
2366
|
}
|
|
2438
2367
|
const conversationId = asString4(evalcase.conversation_id);
|
|
@@ -2455,14 +2384,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2455
2384
|
continue;
|
|
2456
2385
|
}
|
|
2457
2386
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
2458
|
-
const guidelinePaths = [];
|
|
2459
2387
|
const inputTextParts = [];
|
|
2460
2388
|
const inputSegments = await processMessages({
|
|
2461
2389
|
messages: inputMessages,
|
|
2462
2390
|
searchRoots,
|
|
2463
2391
|
repoRootPath,
|
|
2464
|
-
guidelinePatterns,
|
|
2465
|
-
guidelinePaths,
|
|
2466
2392
|
textParts: inputTextParts,
|
|
2467
2393
|
messageType: "input",
|
|
2468
2394
|
verbose
|
|
@@ -2512,40 +2438,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2512
2438
|
userFilePaths.push(segment.resolvedPath);
|
|
2513
2439
|
}
|
|
2514
2440
|
}
|
|
2515
|
-
const allFilePaths = [
|
|
2516
|
-
...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
|
|
2517
|
-
...userFilePaths
|
|
2518
|
-
];
|
|
2519
2441
|
const testCase = {
|
|
2520
2442
|
id,
|
|
2521
|
-
|
|
2443
|
+
eval_set: evalSetName,
|
|
2522
2444
|
conversation_id: conversationId,
|
|
2523
2445
|
question,
|
|
2524
2446
|
input: inputMessages,
|
|
2525
2447
|
input_segments: inputSegments,
|
|
2526
2448
|
expected_output: outputSegments,
|
|
2527
2449
|
reference_answer: referenceAnswer,
|
|
2528
|
-
|
|
2529
|
-
guideline_patterns: guidelinePatterns,
|
|
2530
|
-
file_paths: allFilePaths,
|
|
2450
|
+
file_paths: userFilePaths,
|
|
2531
2451
|
criteria: outcome ?? "",
|
|
2532
2452
|
evaluator: evalCaseEvaluatorKind,
|
|
2533
2453
|
assertions: evaluators
|
|
2534
2454
|
};
|
|
2535
|
-
if (verbose) {
|
|
2536
|
-
console.log(`
|
|
2537
|
-
[Test: ${id}]`);
|
|
2538
|
-
if (testCase.guideline_paths.length > 0) {
|
|
2539
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
2540
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
2541
|
-
console.log(` - ${guidelinePath}`);
|
|
2542
|
-
}
|
|
2543
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2544
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
2545
|
-
} else {
|
|
2546
|
-
console.log(" No guidelines found");
|
|
2547
|
-
}
|
|
2548
|
-
}
|
|
2549
2455
|
results.push(testCase);
|
|
2550
2456
|
}
|
|
2551
2457
|
return results;
|
|
@@ -2603,30 +2509,7 @@ function parseMetadata(suite) {
|
|
|
2603
2509
|
}
|
|
2604
2510
|
|
|
2605
2511
|
// src/evaluation/formatting/prompt-builder.ts
|
|
2606
|
-
import { readFile as readFile6 } from "node:fs/promises";
|
|
2607
|
-
import path7 from "node:path";
|
|
2608
|
-
var ANSI_YELLOW6 = "\x1B[33m";
|
|
2609
|
-
var ANSI_RESET7 = "\x1B[0m";
|
|
2610
2512
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
2611
|
-
const guidelineParts = [];
|
|
2612
|
-
for (const rawPath of testCase.guideline_paths) {
|
|
2613
|
-
const absolutePath = path7.resolve(rawPath);
|
|
2614
|
-
if (!await fileExists2(absolutePath)) {
|
|
2615
|
-
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
2616
|
-
continue;
|
|
2617
|
-
}
|
|
2618
|
-
try {
|
|
2619
|
-
const content = (await readFile6(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
2620
|
-
guidelineParts.push({
|
|
2621
|
-
content,
|
|
2622
|
-
isFile: true,
|
|
2623
|
-
displayPath: path7.basename(absolutePath)
|
|
2624
|
-
});
|
|
2625
|
-
} catch (error) {
|
|
2626
|
-
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
2627
|
-
}
|
|
2628
|
-
}
|
|
2629
|
-
const guidelines = formatFileContents(guidelineParts);
|
|
2630
2513
|
const segmentsByMessage = [];
|
|
2631
2514
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
2632
2515
|
for (const segment of testCase.input_segments) {
|
|
@@ -2651,10 +2534,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
2651
2534
|
if (type === "file") {
|
|
2652
2535
|
const value = asString5(segment.value);
|
|
2653
2536
|
if (!value) continue;
|
|
2654
|
-
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
2655
|
-
messageSegments.push({ type: "guideline_ref", path: value });
|
|
2656
|
-
continue;
|
|
2657
|
-
}
|
|
2658
2537
|
const fileText = fileContentsByPath.get(value);
|
|
2659
2538
|
if (fileText !== void 0) {
|
|
2660
2539
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
@@ -2703,10 +2582,6 @@ ${messageContent}`);
|
|
|
2703
2582
|
} else {
|
|
2704
2583
|
const questionParts = [];
|
|
2705
2584
|
for (const segment of testCase.input_segments) {
|
|
2706
|
-
if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
|
|
2707
|
-
questionParts.push(`<Attached: ${segment.path}>`);
|
|
2708
|
-
continue;
|
|
2709
|
-
}
|
|
2710
2585
|
const formattedContent = formatSegment(segment, mode);
|
|
2711
2586
|
if (formattedContent) {
|
|
2712
2587
|
questionParts.push(formattedContent);
|
|
@@ -2717,11 +2592,9 @@ ${messageContent}`);
|
|
|
2717
2592
|
const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
|
|
2718
2593
|
messages: testCase.input,
|
|
2719
2594
|
segmentsByMessage,
|
|
2720
|
-
guidelinePatterns: testCase.guideline_patterns,
|
|
2721
|
-
guidelineContent: guidelines,
|
|
2722
2595
|
mode
|
|
2723
2596
|
}) : void 0;
|
|
2724
|
-
return { question,
|
|
2597
|
+
return { question, chatPrompt };
|
|
2725
2598
|
}
|
|
2726
2599
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
2727
2600
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -2736,14 +2609,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
2736
2609
|
return messagesWithContent > 1;
|
|
2737
2610
|
}
|
|
2738
2611
|
function buildChatPromptFromSegments(options) {
|
|
2739
|
-
const {
|
|
2740
|
-
messages,
|
|
2741
|
-
segmentsByMessage,
|
|
2742
|
-
guidelinePatterns,
|
|
2743
|
-
guidelineContent,
|
|
2744
|
-
systemPrompt,
|
|
2745
|
-
mode = "lm"
|
|
2746
|
-
} = options;
|
|
2612
|
+
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
2747
2613
|
if (messages.length === 0) {
|
|
2748
2614
|
return void 0;
|
|
2749
2615
|
}
|
|
@@ -2751,11 +2617,6 @@ function buildChatPromptFromSegments(options) {
|
|
|
2751
2617
|
if (systemPrompt && systemPrompt.trim().length > 0) {
|
|
2752
2618
|
systemSegments.push(systemPrompt.trim());
|
|
2753
2619
|
}
|
|
2754
|
-
if (guidelineContent && guidelineContent.trim().length > 0) {
|
|
2755
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
2756
|
-
|
|
2757
|
-
${guidelineContent.trim()}`);
|
|
2758
|
-
}
|
|
2759
2620
|
let startIndex = 0;
|
|
2760
2621
|
while (startIndex < messages.length && messages[startIndex].role === "system") {
|
|
2761
2622
|
const segments = segmentsByMessage[startIndex];
|
|
@@ -2791,15 +2652,8 @@ ${guidelineContent.trim()}`);
|
|
|
2791
2652
|
contentParts.push("@[Tool]:");
|
|
2792
2653
|
}
|
|
2793
2654
|
for (const segment of segments) {
|
|
2794
|
-
if (segment.type === "guideline_ref") {
|
|
2795
|
-
continue;
|
|
2796
|
-
}
|
|
2797
2655
|
const formatted = formatSegment(segment, mode);
|
|
2798
2656
|
if (formatted) {
|
|
2799
|
-
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
2800
|
-
if (isGuidelineRef) {
|
|
2801
|
-
continue;
|
|
2802
|
-
}
|
|
2803
2657
|
contentParts.push(formatted);
|
|
2804
2658
|
}
|
|
2805
2659
|
}
|
|
@@ -2817,30 +2671,27 @@ ${guidelineContent.trim()}`);
|
|
|
2817
2671
|
function asString5(value) {
|
|
2818
2672
|
return typeof value === "string" ? value : void 0;
|
|
2819
2673
|
}
|
|
2820
|
-
function logWarning5(message) {
|
|
2821
|
-
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
2822
|
-
}
|
|
2823
2674
|
|
|
2824
2675
|
// src/evaluation/yaml-parser.ts
|
|
2825
|
-
var
|
|
2676
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
2826
2677
|
var ANSI_RED3 = "\x1B[31m";
|
|
2827
|
-
var
|
|
2678
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
2828
2679
|
function resolveTests(suite) {
|
|
2829
2680
|
if (suite.tests !== void 0) return suite.tests;
|
|
2830
2681
|
if (suite.eval_cases !== void 0) {
|
|
2831
|
-
|
|
2682
|
+
logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
|
|
2832
2683
|
return suite.eval_cases;
|
|
2833
2684
|
}
|
|
2834
2685
|
if (suite.evalcases !== void 0) {
|
|
2835
|
-
|
|
2686
|
+
logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
|
|
2836
2687
|
return suite.evalcases;
|
|
2837
2688
|
}
|
|
2838
2689
|
return void 0;
|
|
2839
2690
|
}
|
|
2840
2691
|
async function readTestSuiteMetadata(testFilePath) {
|
|
2841
2692
|
try {
|
|
2842
|
-
const absolutePath =
|
|
2843
|
-
const content = await
|
|
2693
|
+
const absolutePath = path7.resolve(testFilePath);
|
|
2694
|
+
const content = await readFile6(absolutePath, "utf8");
|
|
2844
2695
|
const parsed = interpolateEnv(parse2(content), process.env);
|
|
2845
2696
|
if (!isJsonObject(parsed)) {
|
|
2846
2697
|
return {};
|
|
@@ -2891,26 +2742,25 @@ var loadEvalCases = loadTests;
|
|
|
2891
2742
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
2892
2743
|
const verbose = options?.verbose ?? false;
|
|
2893
2744
|
const filterPattern = options?.filter;
|
|
2894
|
-
const absoluteTestPath =
|
|
2745
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
2895
2746
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2896
2747
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2897
2748
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
2898
|
-
const
|
|
2899
|
-
const rawFile = await readFile7(absoluteTestPath, "utf8");
|
|
2749
|
+
const rawFile = await readFile6(absoluteTestPath, "utf8");
|
|
2900
2750
|
const interpolated = interpolateEnv(parse2(rawFile), process.env);
|
|
2901
2751
|
if (!isJsonObject(interpolated)) {
|
|
2902
2752
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
2903
2753
|
}
|
|
2904
2754
|
const suite = interpolated;
|
|
2905
|
-
const
|
|
2906
|
-
const
|
|
2907
|
-
const
|
|
2755
|
+
const evalSetNameFromSuite = asString6(suite.name)?.trim();
|
|
2756
|
+
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
2757
|
+
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
2908
2758
|
const rawTestcases = resolveTests(suite);
|
|
2909
2759
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
2910
|
-
const evalFileDir =
|
|
2760
|
+
const evalFileDir = path7.dirname(absoluteTestPath);
|
|
2911
2761
|
let expandedTestcases;
|
|
2912
2762
|
if (typeof rawTestcases === "string") {
|
|
2913
|
-
const externalPath =
|
|
2763
|
+
const externalPath = path7.resolve(evalFileDir, rawTestcases);
|
|
2914
2764
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
2915
2765
|
} else if (Array.isArray(rawTestcases)) {
|
|
2916
2766
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -2924,18 +2774,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2924
2774
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
2925
2775
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
2926
2776
|
if (suite.assert !== void 0 && suite.assertions === void 0) {
|
|
2927
|
-
|
|
2777
|
+
logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
|
|
2928
2778
|
}
|
|
2929
2779
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
2930
2780
|
const results = [];
|
|
2931
2781
|
for (const rawEvalcase of expandedTestcases) {
|
|
2932
2782
|
if (!isJsonObject(rawEvalcase)) {
|
|
2933
|
-
|
|
2783
|
+
logWarning5("Skipping invalid test entry (expected object)");
|
|
2934
2784
|
continue;
|
|
2935
2785
|
}
|
|
2936
2786
|
const evalcase = rawEvalcase;
|
|
2937
2787
|
const id = asString6(evalcase.id);
|
|
2938
|
-
if (filterPattern && (!id || !
|
|
2788
|
+
if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
|
|
2939
2789
|
continue;
|
|
2940
2790
|
}
|
|
2941
2791
|
const conversationId = asString6(evalcase.conversation_id);
|
|
@@ -2943,7 +2793,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2943
2793
|
if (!outcome && evalcase.expected_outcome !== void 0) {
|
|
2944
2794
|
outcome = asString6(evalcase.expected_outcome);
|
|
2945
2795
|
if (outcome) {
|
|
2946
|
-
|
|
2796
|
+
logWarning5(
|
|
2947
2797
|
`Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
2948
2798
|
);
|
|
2949
2799
|
}
|
|
@@ -2963,15 +2813,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2963
2813
|
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
2964
2814
|
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
2965
2815
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
2966
|
-
const guidelinePaths = [];
|
|
2967
2816
|
const inputTextParts = [];
|
|
2968
2817
|
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
2969
2818
|
messages: effectiveSuiteInputMessages,
|
|
2970
2819
|
searchRoots,
|
|
2971
2820
|
repoRootPath,
|
|
2972
|
-
guidelinePatterns,
|
|
2973
|
-
guidelinePaths,
|
|
2974
|
-
treatFileSegmentsAsGuidelines: true,
|
|
2975
2821
|
textParts: inputTextParts,
|
|
2976
2822
|
messageType: "input",
|
|
2977
2823
|
verbose
|
|
@@ -2980,8 +2826,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2980
2826
|
messages: testInputMessages,
|
|
2981
2827
|
searchRoots,
|
|
2982
2828
|
repoRootPath,
|
|
2983
|
-
guidelinePatterns,
|
|
2984
|
-
guidelinePaths,
|
|
2985
2829
|
textParts: inputTextParts,
|
|
2986
2830
|
messageType: "input",
|
|
2987
2831
|
verbose
|
|
@@ -3030,26 +2874,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3030
2874
|
userFilePaths.push(segment.resolvedPath);
|
|
3031
2875
|
}
|
|
3032
2876
|
}
|
|
3033
|
-
const allFilePaths = [
|
|
3034
|
-
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
3035
|
-
...userFilePaths
|
|
3036
|
-
];
|
|
3037
2877
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
3038
2878
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
3039
2879
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
3040
2880
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
3041
2881
|
const testCase = {
|
|
3042
2882
|
id,
|
|
3043
|
-
|
|
2883
|
+
eval_set: evalSetName,
|
|
3044
2884
|
conversation_id: conversationId,
|
|
3045
2885
|
question,
|
|
3046
2886
|
input: inputMessages,
|
|
3047
2887
|
input_segments: inputSegments,
|
|
3048
2888
|
expected_output: outputSegments,
|
|
3049
2889
|
reference_answer: referenceAnswer,
|
|
3050
|
-
|
|
3051
|
-
guideline_patterns: guidelinePatterns,
|
|
3052
|
-
file_paths: allFilePaths,
|
|
2890
|
+
file_paths: userFilePaths,
|
|
3053
2891
|
criteria: outcome ?? "",
|
|
3054
2892
|
evaluator: evalCaseEvaluatorKind,
|
|
3055
2893
|
assertions: evaluators,
|
|
@@ -3057,20 +2895,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3057
2895
|
metadata,
|
|
3058
2896
|
targets: caseTargets
|
|
3059
2897
|
};
|
|
3060
|
-
if (verbose) {
|
|
3061
|
-
console.log(`
|
|
3062
|
-
[Test: ${id}]`);
|
|
3063
|
-
if (testCase.guideline_paths.length > 0) {
|
|
3064
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
3065
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
3066
|
-
console.log(` - ${guidelinePath}`);
|
|
3067
|
-
}
|
|
3068
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
3069
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
3070
|
-
} else {
|
|
3071
|
-
console.log(" No guidelines found");
|
|
3072
|
-
}
|
|
3073
|
-
}
|
|
3074
2898
|
results.push(testCase);
|
|
3075
2899
|
}
|
|
3076
2900
|
return { tests: results, parsed: suite };
|
|
@@ -3089,7 +2913,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3089
2913
|
if (!isJsonObject(raw)) return void 0;
|
|
3090
2914
|
const obj = raw;
|
|
3091
2915
|
if (obj.script !== void 0 && obj.command === void 0) {
|
|
3092
|
-
|
|
2916
|
+
logWarning5("'script' is deprecated. Use 'command' instead.");
|
|
3093
2917
|
}
|
|
3094
2918
|
const commandSource = obj.command ?? obj.script;
|
|
3095
2919
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
@@ -3097,8 +2921,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3097
2921
|
if (commandArr.length === 0) return void 0;
|
|
3098
2922
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
3099
2923
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
3100
|
-
if (cwd && !
|
|
3101
|
-
cwd =
|
|
2924
|
+
if (cwd && !path7.isAbsolute(cwd)) {
|
|
2925
|
+
cwd = path7.resolve(evalFileDir, cwd);
|
|
3102
2926
|
}
|
|
3103
2927
|
const config = { command: commandArr };
|
|
3104
2928
|
if (timeoutMs !== void 0) {
|
|
@@ -3188,10 +3012,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
3188
3012
|
}
|
|
3189
3013
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3190
3014
|
if (typeof raw === "string") {
|
|
3191
|
-
const workspaceFilePath =
|
|
3015
|
+
const workspaceFilePath = path7.resolve(evalFileDir, raw);
|
|
3192
3016
|
let content;
|
|
3193
3017
|
try {
|
|
3194
|
-
content = await
|
|
3018
|
+
content = await readFile6(workspaceFilePath, "utf8");
|
|
3195
3019
|
} catch {
|
|
3196
3020
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3197
3021
|
}
|
|
@@ -3201,7 +3025,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
3201
3025
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
3202
3026
|
);
|
|
3203
3027
|
}
|
|
3204
|
-
const workspaceFileDir =
|
|
3028
|
+
const workspaceFileDir = path7.dirname(workspaceFilePath);
|
|
3205
3029
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
3206
3030
|
}
|
|
3207
3031
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -3221,8 +3045,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3221
3045
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
3222
3046
|
}
|
|
3223
3047
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
3224
|
-
if (template && !
|
|
3225
|
-
template =
|
|
3048
|
+
if (template && !path7.isAbsolute(template)) {
|
|
3049
|
+
template = path7.resolve(evalFileDir, template);
|
|
3226
3050
|
}
|
|
3227
3051
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
3228
3052
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -3272,28 +3096,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
3272
3096
|
function asString6(value) {
|
|
3273
3097
|
return typeof value === "string" ? value : void 0;
|
|
3274
3098
|
}
|
|
3275
|
-
function
|
|
3099
|
+
function logWarning5(message, details) {
|
|
3276
3100
|
if (details && details.length > 0) {
|
|
3277
3101
|
const detailBlock = details.join("\n");
|
|
3278
|
-
console.warn(`${
|
|
3279
|
-
${detailBlock}${
|
|
3102
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}
|
|
3103
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
3280
3104
|
} else {
|
|
3281
|
-
console.warn(`${
|
|
3105
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
3282
3106
|
}
|
|
3283
3107
|
}
|
|
3284
3108
|
function logError3(message, details) {
|
|
3285
3109
|
if (details && details.length > 0) {
|
|
3286
3110
|
const detailBlock = details.join("\n");
|
|
3287
3111
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
3288
|
-
${detailBlock}${
|
|
3112
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
3289
3113
|
} else {
|
|
3290
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
3114
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET7}`);
|
|
3291
3115
|
}
|
|
3292
3116
|
}
|
|
3293
3117
|
|
|
3294
3118
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
3295
3119
|
import { readFileSync } from "node:fs";
|
|
3296
|
-
import
|
|
3120
|
+
import path8 from "node:path";
|
|
3297
3121
|
import { parse as parse3 } from "yaml";
|
|
3298
3122
|
function codeGraderInstruction(graderName, description) {
|
|
3299
3123
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -3538,7 +3362,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
3538
3362
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
3539
3363
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
3540
3364
|
const parsed = parse3(content);
|
|
3541
|
-
return transpileEvalYaml(parsed,
|
|
3365
|
+
return transpileEvalYaml(parsed, path8.basename(evalYamlPath));
|
|
3542
3366
|
}
|
|
3543
3367
|
function getOutputFilenames(result) {
|
|
3544
3368
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -3773,10 +3597,10 @@ function buildChatPrompt(request) {
|
|
|
3773
3597
|
if (hasSystemMessage) {
|
|
3774
3598
|
return provided;
|
|
3775
3599
|
}
|
|
3776
|
-
const systemContent2 = resolveSystemContent(request
|
|
3600
|
+
const systemContent2 = resolveSystemContent(request);
|
|
3777
3601
|
return [{ role: "system", content: systemContent2 }, ...provided];
|
|
3778
3602
|
}
|
|
3779
|
-
const systemContent = resolveSystemContent(request
|
|
3603
|
+
const systemContent = resolveSystemContent(request);
|
|
3780
3604
|
const userContent = request.question.trim();
|
|
3781
3605
|
const prompt = [
|
|
3782
3606
|
{ role: "system", content: systemContent },
|
|
@@ -3784,18 +3608,13 @@ function buildChatPrompt(request) {
|
|
|
3784
3608
|
];
|
|
3785
3609
|
return prompt;
|
|
3786
3610
|
}
|
|
3787
|
-
function resolveSystemContent(request
|
|
3611
|
+
function resolveSystemContent(request) {
|
|
3788
3612
|
const systemSegments = [];
|
|
3789
3613
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
3790
3614
|
systemSegments.push(request.systemPrompt.trim());
|
|
3791
3615
|
} else {
|
|
3792
3616
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
3793
3617
|
}
|
|
3794
|
-
if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
|
|
3795
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
3796
|
-
|
|
3797
|
-
${request.guidelines.trim()}`);
|
|
3798
|
-
}
|
|
3799
3618
|
return systemSegments.join("\n\n");
|
|
3800
3619
|
}
|
|
3801
3620
|
function toModelMessages(chatPrompt) {
|
|
@@ -3979,7 +3798,7 @@ import { spawn } from "node:child_process";
|
|
|
3979
3798
|
import { randomUUID } from "node:crypto";
|
|
3980
3799
|
import { createWriteStream } from "node:fs";
|
|
3981
3800
|
import { mkdir } from "node:fs/promises";
|
|
3982
|
-
import
|
|
3801
|
+
import path10 from "node:path";
|
|
3983
3802
|
|
|
3984
3803
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
3985
3804
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
@@ -4035,17 +3854,11 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
4035
3854
|
}
|
|
4036
3855
|
|
|
4037
3856
|
// src/evaluation/providers/preread.ts
|
|
4038
|
-
import
|
|
4039
|
-
function buildPromptDocument(request, inputFiles
|
|
3857
|
+
import path9 from "node:path";
|
|
3858
|
+
function buildPromptDocument(request, inputFiles) {
|
|
4040
3859
|
const parts = [];
|
|
4041
|
-
const guidelineFiles = collectGuidelineFiles(
|
|
4042
|
-
inputFiles,
|
|
4043
|
-
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
4044
|
-
options?.guidelineOverrides
|
|
4045
|
-
);
|
|
4046
3860
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
4047
|
-
const
|
|
4048
|
-
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
3861
|
+
const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
|
|
4049
3862
|
if (prereadBlock.length > 0) {
|
|
4050
3863
|
parts.push("\n", prereadBlock);
|
|
4051
3864
|
}
|
|
@@ -4058,62 +3871,36 @@ function normalizeInputFiles(inputFiles) {
|
|
|
4058
3871
|
}
|
|
4059
3872
|
const deduped = /* @__PURE__ */ new Map();
|
|
4060
3873
|
for (const inputFile of inputFiles) {
|
|
4061
|
-
const absolutePath =
|
|
3874
|
+
const absolutePath = path9.resolve(inputFile);
|
|
4062
3875
|
if (!deduped.has(absolutePath)) {
|
|
4063
3876
|
deduped.set(absolutePath, absolutePath);
|
|
4064
3877
|
}
|
|
4065
3878
|
}
|
|
4066
3879
|
return Array.from(deduped.values());
|
|
4067
3880
|
}
|
|
4068
|
-
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
4069
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
4070
|
-
return [];
|
|
4071
|
-
}
|
|
4072
|
-
const unique = /* @__PURE__ */ new Map();
|
|
4073
|
-
for (const inputFile of inputFiles) {
|
|
4074
|
-
const absolutePath = path10.resolve(inputFile);
|
|
4075
|
-
if (overrides?.has(absolutePath)) {
|
|
4076
|
-
if (!unique.has(absolutePath)) {
|
|
4077
|
-
unique.set(absolutePath, absolutePath);
|
|
4078
|
-
}
|
|
4079
|
-
continue;
|
|
4080
|
-
}
|
|
4081
|
-
const normalized = absolutePath.split(path10.sep).join("/");
|
|
4082
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
4083
|
-
if (!unique.has(absolutePath)) {
|
|
4084
|
-
unique.set(absolutePath, absolutePath);
|
|
4085
|
-
}
|
|
4086
|
-
}
|
|
4087
|
-
}
|
|
4088
|
-
return Array.from(unique.values());
|
|
4089
|
-
}
|
|
4090
3881
|
function collectInputFiles(inputFiles) {
|
|
4091
3882
|
if (!inputFiles || inputFiles.length === 0) {
|
|
4092
3883
|
return [];
|
|
4093
3884
|
}
|
|
4094
3885
|
const unique = /* @__PURE__ */ new Map();
|
|
4095
3886
|
for (const inputFile of inputFiles) {
|
|
4096
|
-
const absolutePath =
|
|
3887
|
+
const absolutePath = path9.resolve(inputFile);
|
|
4097
3888
|
if (!unique.has(absolutePath)) {
|
|
4098
3889
|
unique.set(absolutePath, absolutePath);
|
|
4099
3890
|
}
|
|
4100
3891
|
}
|
|
4101
3892
|
return Array.from(unique.values());
|
|
4102
3893
|
}
|
|
4103
|
-
function buildMandatoryPrereadBlock(
|
|
4104
|
-
if (
|
|
3894
|
+
function buildMandatoryPrereadBlock(inputFiles) {
|
|
3895
|
+
if (inputFiles.length === 0) {
|
|
4105
3896
|
return "";
|
|
4106
3897
|
}
|
|
4107
3898
|
const buildList = (files) => files.map((absolutePath) => {
|
|
4108
|
-
const fileName =
|
|
3899
|
+
const fileName = path9.basename(absolutePath);
|
|
4109
3900
|
const fileUri = pathToFileUri(absolutePath);
|
|
4110
3901
|
return `* [${fileName}](${fileUri})`;
|
|
4111
3902
|
});
|
|
4112
3903
|
const sections = [];
|
|
4113
|
-
if (guidelineFiles.length > 0) {
|
|
4114
|
-
sections.push(`Read all guideline files:
|
|
4115
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
4116
|
-
}
|
|
4117
3904
|
if (inputFiles.length > 0) {
|
|
4118
3905
|
sections.push(`Read all input files:
|
|
4119
3906
|
${buildList(inputFiles).join("\n")}.`);
|
|
@@ -4125,7 +3912,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
4125
3912
|
return sections.join("\n");
|
|
4126
3913
|
}
|
|
4127
3914
|
function pathToFileUri(filePath) {
|
|
4128
|
-
const absolutePath =
|
|
3915
|
+
const absolutePath = path9.isAbsolute(filePath) ? filePath : path9.resolve(filePath);
|
|
4129
3916
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
4130
3917
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
4131
3918
|
return `file:///${normalizedPath}`;
|
|
@@ -4272,10 +4059,10 @@ var ClaudeCliProvider = class {
|
|
|
4272
4059
|
}
|
|
4273
4060
|
resolveCwd(cwdOverride) {
|
|
4274
4061
|
if (cwdOverride) {
|
|
4275
|
-
return
|
|
4062
|
+
return path10.resolve(cwdOverride);
|
|
4276
4063
|
}
|
|
4277
4064
|
if (this.config.cwd) {
|
|
4278
|
-
return
|
|
4065
|
+
return path10.resolve(this.config.cwd);
|
|
4279
4066
|
}
|
|
4280
4067
|
return void 0;
|
|
4281
4068
|
}
|
|
@@ -4285,9 +4072,9 @@ var ClaudeCliProvider = class {
|
|
|
4285
4072
|
return void 0;
|
|
4286
4073
|
}
|
|
4287
4074
|
if (this.config.logDir) {
|
|
4288
|
-
return
|
|
4075
|
+
return path10.resolve(this.config.logDir);
|
|
4289
4076
|
}
|
|
4290
|
-
return
|
|
4077
|
+
return path10.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
4291
4078
|
}
|
|
4292
4079
|
async createStreamLogger(request) {
|
|
4293
4080
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4301,7 +4088,7 @@ var ClaudeCliProvider = class {
|
|
|
4301
4088
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
4302
4089
|
return void 0;
|
|
4303
4090
|
}
|
|
4304
|
-
const filePath =
|
|
4091
|
+
const filePath = path10.join(logDir, buildLogFilename(request, this.targetName));
|
|
4305
4092
|
try {
|
|
4306
4093
|
const logger = await ClaudeCliStreamLogger.create({
|
|
4307
4094
|
filePath,
|
|
@@ -4622,7 +4409,7 @@ function tryParseJson(line) {
|
|
|
4622
4409
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
4623
4410
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
4624
4411
|
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
4625
|
-
import
|
|
4412
|
+
import path11 from "node:path";
|
|
4626
4413
|
var claudeSdkModule = null;
|
|
4627
4414
|
async function loadClaudeSdk() {
|
|
4628
4415
|
if (!claudeSdkModule) {
|
|
@@ -4782,10 +4569,10 @@ var ClaudeSdkProvider = class {
|
|
|
4782
4569
|
}
|
|
4783
4570
|
resolveCwd(cwdOverride) {
|
|
4784
4571
|
if (cwdOverride) {
|
|
4785
|
-
return
|
|
4572
|
+
return path11.resolve(cwdOverride);
|
|
4786
4573
|
}
|
|
4787
4574
|
if (this.config.cwd) {
|
|
4788
|
-
return
|
|
4575
|
+
return path11.resolve(this.config.cwd);
|
|
4789
4576
|
}
|
|
4790
4577
|
return void 0;
|
|
4791
4578
|
}
|
|
@@ -4795,9 +4582,9 @@ var ClaudeSdkProvider = class {
|
|
|
4795
4582
|
return void 0;
|
|
4796
4583
|
}
|
|
4797
4584
|
if (this.config.logDir) {
|
|
4798
|
-
return
|
|
4585
|
+
return path11.resolve(this.config.logDir);
|
|
4799
4586
|
}
|
|
4800
|
-
return
|
|
4587
|
+
return path11.join(process.cwd(), ".agentv", "logs", "claude");
|
|
4801
4588
|
}
|
|
4802
4589
|
async createStreamLogger(request) {
|
|
4803
4590
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4811,7 +4598,7 @@ var ClaudeSdkProvider = class {
|
|
|
4811
4598
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
4812
4599
|
return void 0;
|
|
4813
4600
|
}
|
|
4814
|
-
const filePath =
|
|
4601
|
+
const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
|
|
4815
4602
|
try {
|
|
4816
4603
|
const logger = await ClaudeStreamLogger.create({
|
|
4817
4604
|
filePath,
|
|
@@ -5018,7 +4805,7 @@ function formatElapsed2(startedAt) {
|
|
|
5018
4805
|
import { exec as execWithCallback } from "node:child_process";
|
|
5019
4806
|
import fs from "node:fs/promises";
|
|
5020
4807
|
import os from "node:os";
|
|
5021
|
-
import
|
|
4808
|
+
import path12 from "node:path";
|
|
5022
4809
|
import { promisify } from "node:util";
|
|
5023
4810
|
import { z as z2 } from "zod";
|
|
5024
4811
|
var ToolCallSchema = z2.object({
|
|
@@ -5227,7 +5014,6 @@ var CliProvider = class {
|
|
|
5227
5014
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5228
5015
|
{
|
|
5229
5016
|
question: "",
|
|
5230
|
-
guidelines: "",
|
|
5231
5017
|
inputFiles: batchInputFiles,
|
|
5232
5018
|
evalCaseId: "batch",
|
|
5233
5019
|
attempt: 0
|
|
@@ -5460,7 +5246,6 @@ var CliProvider = class {
|
|
|
5460
5246
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5461
5247
|
{
|
|
5462
5248
|
question: "",
|
|
5463
|
-
guidelines: "",
|
|
5464
5249
|
inputFiles: [],
|
|
5465
5250
|
evalCaseId: "healthcheck",
|
|
5466
5251
|
attempt: 0
|
|
@@ -5501,7 +5286,6 @@ async function buildTemplateValues(request, config, outputFilePath) {
|
|
|
5501
5286
|
values: {
|
|
5502
5287
|
PROMPT: shellEscape(request.question ?? ""),
|
|
5503
5288
|
PROMPT_FILE: shellEscape(promptFilePath),
|
|
5504
|
-
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
5505
5289
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
5506
5290
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
5507
5291
|
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
@@ -5523,7 +5307,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
5523
5307
|
}
|
|
5524
5308
|
const unique = /* @__PURE__ */ new Map();
|
|
5525
5309
|
for (const inputFile of inputFiles) {
|
|
5526
|
-
const absolutePath =
|
|
5310
|
+
const absolutePath = path12.resolve(inputFile);
|
|
5527
5311
|
if (!unique.has(absolutePath)) {
|
|
5528
5312
|
unique.set(absolutePath, absolutePath);
|
|
5529
5313
|
}
|
|
@@ -5537,7 +5321,7 @@ function formatFileList(files, template) {
|
|
|
5537
5321
|
const formatter = template ?? "{path}";
|
|
5538
5322
|
return files.map((filePath) => {
|
|
5539
5323
|
const escapedPath = shellEscape(filePath);
|
|
5540
|
-
const escapedName = shellEscape(
|
|
5324
|
+
const escapedName = shellEscape(path12.basename(filePath));
|
|
5541
5325
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
5542
5326
|
}).join(" ");
|
|
5543
5327
|
}
|
|
@@ -5561,7 +5345,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
5561
5345
|
const safeEvalId = evalCaseId || "unknown";
|
|
5562
5346
|
const timestamp = Date.now();
|
|
5563
5347
|
const random = Math.random().toString(36).substring(2, 9);
|
|
5564
|
-
return
|
|
5348
|
+
return path12.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
5565
5349
|
}
|
|
5566
5350
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
5567
5351
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -5575,7 +5359,7 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
5575
5359
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
5576
5360
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
5577
5361
|
import { mkdir as mkdir3 } from "node:fs/promises";
|
|
5578
|
-
import
|
|
5362
|
+
import path13 from "node:path";
|
|
5579
5363
|
|
|
5580
5364
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
5581
5365
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -5810,10 +5594,10 @@ ${basePrompt}` : basePrompt;
|
|
|
5810
5594
|
}
|
|
5811
5595
|
resolveCwd(cwdOverride) {
|
|
5812
5596
|
if (cwdOverride) {
|
|
5813
|
-
return
|
|
5597
|
+
return path13.resolve(cwdOverride);
|
|
5814
5598
|
}
|
|
5815
5599
|
if (this.config.cwd) {
|
|
5816
|
-
return
|
|
5600
|
+
return path13.resolve(this.config.cwd);
|
|
5817
5601
|
}
|
|
5818
5602
|
return void 0;
|
|
5819
5603
|
}
|
|
@@ -5823,9 +5607,9 @@ ${basePrompt}` : basePrompt;
|
|
|
5823
5607
|
return void 0;
|
|
5824
5608
|
}
|
|
5825
5609
|
if (this.config.logDir) {
|
|
5826
|
-
return
|
|
5610
|
+
return path13.resolve(this.config.logDir);
|
|
5827
5611
|
}
|
|
5828
|
-
return
|
|
5612
|
+
return path13.join(process.cwd(), ".agentv", "logs", "codex");
|
|
5829
5613
|
}
|
|
5830
5614
|
async createStreamLogger(request) {
|
|
5831
5615
|
const logDir = this.resolveLogDirectory();
|
|
@@ -5839,7 +5623,7 @@ ${basePrompt}` : basePrompt;
|
|
|
5839
5623
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
5840
5624
|
return void 0;
|
|
5841
5625
|
}
|
|
5842
|
-
const filePath =
|
|
5626
|
+
const filePath = path13.join(logDir, buildLogFilename3(request, this.targetName));
|
|
5843
5627
|
try {
|
|
5844
5628
|
const logger = await CodexSdkStreamLogger.create({
|
|
5845
5629
|
filePath,
|
|
@@ -5983,7 +5767,7 @@ function formatElapsed3(startedAt) {
|
|
|
5983
5767
|
// src/evaluation/providers/copilot-cli.ts
|
|
5984
5768
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
5985
5769
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
5986
|
-
import
|
|
5770
|
+
import path15 from "node:path";
|
|
5987
5771
|
import { Readable, Writable } from "node:stream";
|
|
5988
5772
|
import { spawn as spawn2 } from "node:child_process";
|
|
5989
5773
|
import * as acp from "@agentclientprotocol/sdk";
|
|
@@ -6045,7 +5829,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
6045
5829
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
6046
5830
|
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
6047
5831
|
import { arch, platform } from "node:os";
|
|
6048
|
-
import
|
|
5832
|
+
import path14 from "node:path";
|
|
6049
5833
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
6050
5834
|
function resolvePlatformCliPath() {
|
|
6051
5835
|
const os3 = platform();
|
|
@@ -6069,7 +5853,7 @@ function resolvePlatformCliPath() {
|
|
|
6069
5853
|
try {
|
|
6070
5854
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6071
5855
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
6072
|
-
const binaryPath =
|
|
5856
|
+
const binaryPath = path14.join(path14.dirname(packageJsonPath), binaryName);
|
|
6073
5857
|
if (existsSync(binaryPath)) {
|
|
6074
5858
|
return binaryPath;
|
|
6075
5859
|
}
|
|
@@ -6077,7 +5861,7 @@ function resolvePlatformCliPath() {
|
|
|
6077
5861
|
}
|
|
6078
5862
|
let searchDir = process.cwd();
|
|
6079
5863
|
for (let i = 0; i < 10; i++) {
|
|
6080
|
-
const standardPath =
|
|
5864
|
+
const standardPath = path14.join(
|
|
6081
5865
|
searchDir,
|
|
6082
5866
|
"node_modules",
|
|
6083
5867
|
...packageName.split("/"),
|
|
@@ -6086,13 +5870,13 @@ function resolvePlatformCliPath() {
|
|
|
6086
5870
|
if (existsSync(standardPath)) {
|
|
6087
5871
|
return standardPath;
|
|
6088
5872
|
}
|
|
6089
|
-
const bunDir =
|
|
5873
|
+
const bunDir = path14.join(searchDir, "node_modules", ".bun");
|
|
6090
5874
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
6091
5875
|
try {
|
|
6092
5876
|
const entries = readdirSync(bunDir);
|
|
6093
5877
|
for (const entry of entries) {
|
|
6094
5878
|
if (entry.startsWith(prefix)) {
|
|
6095
|
-
const candidate =
|
|
5879
|
+
const candidate = path14.join(
|
|
6096
5880
|
bunDir,
|
|
6097
5881
|
entry,
|
|
6098
5882
|
"node_modules",
|
|
@@ -6107,7 +5891,7 @@ function resolvePlatformCliPath() {
|
|
|
6107
5891
|
}
|
|
6108
5892
|
} catch {
|
|
6109
5893
|
}
|
|
6110
|
-
const parent =
|
|
5894
|
+
const parent = path14.dirname(searchDir);
|
|
6111
5895
|
if (parent === searchDir) break;
|
|
6112
5896
|
searchDir = parent;
|
|
6113
5897
|
}
|
|
@@ -6445,10 +6229,10 @@ var CopilotCliProvider = class {
|
|
|
6445
6229
|
}
|
|
6446
6230
|
resolveCwd(cwdOverride) {
|
|
6447
6231
|
if (cwdOverride) {
|
|
6448
|
-
return
|
|
6232
|
+
return path15.resolve(cwdOverride);
|
|
6449
6233
|
}
|
|
6450
6234
|
if (this.config.cwd) {
|
|
6451
|
-
return
|
|
6235
|
+
return path15.resolve(this.config.cwd);
|
|
6452
6236
|
}
|
|
6453
6237
|
return void 0;
|
|
6454
6238
|
}
|
|
@@ -6467,9 +6251,9 @@ var CopilotCliProvider = class {
|
|
|
6467
6251
|
return void 0;
|
|
6468
6252
|
}
|
|
6469
6253
|
if (this.config.logDir) {
|
|
6470
|
-
return
|
|
6254
|
+
return path15.resolve(this.config.logDir);
|
|
6471
6255
|
}
|
|
6472
|
-
return
|
|
6256
|
+
return path15.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
6473
6257
|
}
|
|
6474
6258
|
async createStreamLogger(request) {
|
|
6475
6259
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6483,7 +6267,7 @@ var CopilotCliProvider = class {
|
|
|
6483
6267
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6484
6268
|
return void 0;
|
|
6485
6269
|
}
|
|
6486
|
-
const filePath =
|
|
6270
|
+
const filePath = path15.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
6487
6271
|
try {
|
|
6488
6272
|
const logger = await CopilotStreamLogger.create(
|
|
6489
6273
|
{
|
|
@@ -6578,7 +6362,7 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
6578
6362
|
// src/evaluation/providers/copilot-sdk.ts
|
|
6579
6363
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
6580
6364
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
6581
|
-
import
|
|
6365
|
+
import path16 from "node:path";
|
|
6582
6366
|
|
|
6583
6367
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
6584
6368
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -6857,10 +6641,10 @@ var CopilotSdkProvider = class {
|
|
|
6857
6641
|
}
|
|
6858
6642
|
resolveCwd(cwdOverride) {
|
|
6859
6643
|
if (cwdOverride) {
|
|
6860
|
-
return
|
|
6644
|
+
return path16.resolve(cwdOverride);
|
|
6861
6645
|
}
|
|
6862
6646
|
if (this.config.cwd) {
|
|
6863
|
-
return
|
|
6647
|
+
return path16.resolve(this.config.cwd);
|
|
6864
6648
|
}
|
|
6865
6649
|
return void 0;
|
|
6866
6650
|
}
|
|
@@ -6869,9 +6653,9 @@ var CopilotSdkProvider = class {
|
|
|
6869
6653
|
return void 0;
|
|
6870
6654
|
}
|
|
6871
6655
|
if (this.config.logDir) {
|
|
6872
|
-
return
|
|
6656
|
+
return path16.resolve(this.config.logDir);
|
|
6873
6657
|
}
|
|
6874
|
-
return
|
|
6658
|
+
return path16.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
6875
6659
|
}
|
|
6876
6660
|
async createStreamLogger(request) {
|
|
6877
6661
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6885,7 +6669,7 @@ var CopilotSdkProvider = class {
|
|
|
6885
6669
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
6886
6670
|
return void 0;
|
|
6887
6671
|
}
|
|
6888
|
-
const filePath =
|
|
6672
|
+
const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
6889
6673
|
try {
|
|
6890
6674
|
const logger = await CopilotStreamLogger.create(
|
|
6891
6675
|
{
|
|
@@ -6962,8 +6746,7 @@ var MockProvider = class {
|
|
|
6962
6746
|
return {
|
|
6963
6747
|
output: [{ role: "assistant", content: this.cannedResponse }],
|
|
6964
6748
|
raw: {
|
|
6965
|
-
question: request.question
|
|
6966
|
-
guidelines: request.guidelines
|
|
6749
|
+
question: request.question
|
|
6967
6750
|
}
|
|
6968
6751
|
};
|
|
6969
6752
|
}
|
|
@@ -7241,7 +7024,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
7241
7024
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
7242
7025
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7243
7026
|
import { tmpdir } from "node:os";
|
|
7244
|
-
import
|
|
7027
|
+
import path17 from "node:path";
|
|
7245
7028
|
|
|
7246
7029
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
7247
7030
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -7322,7 +7105,7 @@ var PiCodingAgentProvider = class {
|
|
|
7322
7105
|
const workspaceRoot = await this.createWorkspace();
|
|
7323
7106
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
7324
7107
|
try {
|
|
7325
|
-
const promptFile =
|
|
7108
|
+
const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
|
|
7326
7109
|
await writeFile(promptFile, request.question, "utf8");
|
|
7327
7110
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
7328
7111
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
@@ -7384,12 +7167,12 @@ var PiCodingAgentProvider = class {
|
|
|
7384
7167
|
}
|
|
7385
7168
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
7386
7169
|
if (cwdOverride) {
|
|
7387
|
-
return
|
|
7170
|
+
return path17.resolve(cwdOverride);
|
|
7388
7171
|
}
|
|
7389
7172
|
if (!this.config.cwd) {
|
|
7390
7173
|
return workspaceRoot;
|
|
7391
7174
|
}
|
|
7392
|
-
return
|
|
7175
|
+
return path17.resolve(this.config.cwd);
|
|
7393
7176
|
}
|
|
7394
7177
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
7395
7178
|
const args = [];
|
|
@@ -7478,7 +7261,7 @@ ${prompt}` : prompt;
|
|
|
7478
7261
|
return env;
|
|
7479
7262
|
}
|
|
7480
7263
|
async createWorkspace() {
|
|
7481
|
-
return await mkdtemp(
|
|
7264
|
+
return await mkdtemp(path17.join(tmpdir(), WORKSPACE_PREFIX));
|
|
7482
7265
|
}
|
|
7483
7266
|
async cleanupWorkspace(workspaceRoot) {
|
|
7484
7267
|
try {
|
|
@@ -7488,9 +7271,9 @@ ${prompt}` : prompt;
|
|
|
7488
7271
|
}
|
|
7489
7272
|
resolveLogDirectory() {
|
|
7490
7273
|
if (this.config.logDir) {
|
|
7491
|
-
return
|
|
7274
|
+
return path17.resolve(this.config.logDir);
|
|
7492
7275
|
}
|
|
7493
|
-
return
|
|
7276
|
+
return path17.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
7494
7277
|
}
|
|
7495
7278
|
async createStreamLogger(request) {
|
|
7496
7279
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7504,7 +7287,7 @@ ${prompt}` : prompt;
|
|
|
7504
7287
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7505
7288
|
return void 0;
|
|
7506
7289
|
}
|
|
7507
|
-
const filePath =
|
|
7290
|
+
const filePath = path17.join(logDir, buildLogFilename5(request, this.targetName));
|
|
7508
7291
|
try {
|
|
7509
7292
|
const logger = await PiStreamLogger.create({
|
|
7510
7293
|
filePath,
|
|
@@ -8005,17 +7788,17 @@ var ProviderRegistry = class {
|
|
|
8005
7788
|
// src/evaluation/providers/vscode-provider.ts
|
|
8006
7789
|
import { exec as exec2 } from "node:child_process";
|
|
8007
7790
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
8008
|
-
import
|
|
7791
|
+
import path29 from "node:path";
|
|
8009
7792
|
import { promisify as promisify3 } from "node:util";
|
|
8010
7793
|
|
|
8011
7794
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
8012
7795
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
8013
|
-
import
|
|
7796
|
+
import path27 from "node:path";
|
|
8014
7797
|
|
|
8015
7798
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
8016
7799
|
import { constants as constants2 } from "node:fs";
|
|
8017
7800
|
import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
8018
|
-
import
|
|
7801
|
+
import path18 from "node:path";
|
|
8019
7802
|
async function pathExists(target) {
|
|
8020
7803
|
try {
|
|
8021
7804
|
await access2(target, constants2.F_OK);
|
|
@@ -8031,7 +7814,7 @@ async function readDirEntries(target) {
|
|
|
8031
7814
|
const entries = await readdir(target, { withFileTypes: true });
|
|
8032
7815
|
return entries.map((entry) => ({
|
|
8033
7816
|
name: entry.name,
|
|
8034
|
-
absolutePath:
|
|
7817
|
+
absolutePath: path18.join(target, entry.name),
|
|
8035
7818
|
isDirectory: entry.isDirectory()
|
|
8036
7819
|
}));
|
|
8037
7820
|
}
|
|
@@ -8046,9 +7829,9 @@ async function removeIfExists(target) {
|
|
|
8046
7829
|
}
|
|
8047
7830
|
|
|
8048
7831
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
8049
|
-
import
|
|
7832
|
+
import path19 from "node:path";
|
|
8050
7833
|
function pathToFileUri2(filePath) {
|
|
8051
|
-
const absolutePath =
|
|
7834
|
+
const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
|
|
8052
7835
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
8053
7836
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
8054
7837
|
return `file:///${normalizedPath}`;
|
|
@@ -8057,7 +7840,7 @@ function pathToFileUri2(filePath) {
|
|
|
8057
7840
|
}
|
|
8058
7841
|
|
|
8059
7842
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
8060
|
-
import
|
|
7843
|
+
import path20 from "node:path";
|
|
8061
7844
|
|
|
8062
7845
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
8063
7846
|
function renderTemplate2(content, variables) {
|
|
@@ -8149,8 +7932,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
8149
7932
|
});
|
|
8150
7933
|
}
|
|
8151
7934
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
8152
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
8153
|
-
const responseList = responseFiles.map((file) => `"${
|
|
7935
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path20.basename(file)}`).join("\n");
|
|
7936
|
+
const responseList = responseFiles.map((file) => `"${path20.basename(file)}"`).join(", ");
|
|
8154
7937
|
return renderTemplate2(templateContent, {
|
|
8155
7938
|
requestFiles: requestLines,
|
|
8156
7939
|
responseList
|
|
@@ -8158,8 +7941,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
8158
7941
|
}
|
|
8159
7942
|
|
|
8160
7943
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
8161
|
-
import { readFile as
|
|
8162
|
-
import
|
|
7944
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
7945
|
+
import path21 from "node:path";
|
|
8163
7946
|
|
|
8164
7947
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
8165
7948
|
function sleep2(ms) {
|
|
@@ -8197,7 +7980,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8197
7980
|
const maxAttempts = 10;
|
|
8198
7981
|
while (attempts < maxAttempts) {
|
|
8199
7982
|
try {
|
|
8200
|
-
const content = await
|
|
7983
|
+
const content = await readFile7(responseFileFinal, { encoding: "utf8" });
|
|
8201
7984
|
if (!silent) {
|
|
8202
7985
|
process.stdout.write(`${content}
|
|
8203
7986
|
`);
|
|
@@ -8218,7 +8001,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8218
8001
|
}
|
|
8219
8002
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
8220
8003
|
if (!silent) {
|
|
8221
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
8004
|
+
const fileList = responseFilesFinal.map((file) => path21.basename(file)).join(", ");
|
|
8222
8005
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
8223
8006
|
}
|
|
8224
8007
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -8227,7 +8010,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8227
8010
|
while (pending.size > 0) {
|
|
8228
8011
|
if (Date.now() >= deadline) {
|
|
8229
8012
|
if (!silent) {
|
|
8230
|
-
const remaining = [...pending].map((f) =>
|
|
8013
|
+
const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
|
|
8231
8014
|
console.error(
|
|
8232
8015
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
8233
8016
|
);
|
|
@@ -8254,7 +8037,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8254
8037
|
const maxAttempts = 10;
|
|
8255
8038
|
while (attempts < maxAttempts) {
|
|
8256
8039
|
try {
|
|
8257
|
-
const content = await
|
|
8040
|
+
const content = await readFile7(file, { encoding: "utf8" });
|
|
8258
8041
|
if (!silent) {
|
|
8259
8042
|
process.stdout.write(`${content}
|
|
8260
8043
|
`);
|
|
@@ -8278,15 +8061,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8278
8061
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
8279
8062
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
8280
8063
|
import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
|
|
8281
|
-
import
|
|
8064
|
+
import path24 from "node:path";
|
|
8282
8065
|
import { promisify as promisify2 } from "node:util";
|
|
8283
8066
|
|
|
8284
8067
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
8285
|
-
import
|
|
8068
|
+
import path23 from "node:path";
|
|
8286
8069
|
|
|
8287
8070
|
// src/paths.ts
|
|
8288
8071
|
import os2 from "node:os";
|
|
8289
|
-
import
|
|
8072
|
+
import path22 from "node:path";
|
|
8290
8073
|
var logged = false;
|
|
8291
8074
|
function getAgentvHome() {
|
|
8292
8075
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8297,19 +8080,19 @@ function getAgentvHome() {
|
|
|
8297
8080
|
}
|
|
8298
8081
|
return envHome;
|
|
8299
8082
|
}
|
|
8300
|
-
return
|
|
8083
|
+
return path22.join(os2.homedir(), ".agentv");
|
|
8301
8084
|
}
|
|
8302
8085
|
function getWorkspacesRoot() {
|
|
8303
|
-
return
|
|
8086
|
+
return path22.join(getAgentvHome(), "workspaces");
|
|
8304
8087
|
}
|
|
8305
8088
|
function getSubagentsRoot() {
|
|
8306
|
-
return
|
|
8089
|
+
return path22.join(getAgentvHome(), "subagents");
|
|
8307
8090
|
}
|
|
8308
8091
|
function getTraceStateRoot() {
|
|
8309
|
-
return
|
|
8092
|
+
return path22.join(getAgentvHome(), "trace-state");
|
|
8310
8093
|
}
|
|
8311
8094
|
function getWorkspacePoolRoot() {
|
|
8312
|
-
return
|
|
8095
|
+
return path22.join(getAgentvHome(), "workspace-pool");
|
|
8313
8096
|
}
|
|
8314
8097
|
|
|
8315
8098
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -8317,7 +8100,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
8317
8100
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
8318
8101
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
8319
8102
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
8320
|
-
return
|
|
8103
|
+
return path23.join(getSubagentsRoot(), folder);
|
|
8321
8104
|
}
|
|
8322
8105
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
8323
8106
|
|
|
@@ -8384,11 +8167,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8384
8167
|
await raceSpawnError(child);
|
|
8385
8168
|
return true;
|
|
8386
8169
|
}
|
|
8387
|
-
const aliveFile =
|
|
8170
|
+
const aliveFile = path24.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
8388
8171
|
await removeIfExists(aliveFile);
|
|
8389
|
-
const githubAgentsDir =
|
|
8172
|
+
const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
|
|
8390
8173
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
8391
|
-
const wakeupDst =
|
|
8174
|
+
const wakeupDst = path24.join(githubAgentsDir, "wakeup.md");
|
|
8392
8175
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
8393
8176
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
8394
8177
|
label: "open-workspace"
|
|
@@ -8401,7 +8184,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8401
8184
|
"chat",
|
|
8402
8185
|
"-m",
|
|
8403
8186
|
wakeupChatId,
|
|
8404
|
-
`create a file named .alive in the ${
|
|
8187
|
+
`create a file named .alive in the ${path24.basename(subagentDir)} folder`
|
|
8405
8188
|
];
|
|
8406
8189
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
8407
8190
|
await raceSpawnError(wakeupChild);
|
|
@@ -8416,10 +8199,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8416
8199
|
return true;
|
|
8417
8200
|
}
|
|
8418
8201
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
8419
|
-
const workspacePath =
|
|
8420
|
-
const messagesDir =
|
|
8202
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
8203
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
8421
8204
|
await mkdir8(messagesDir, { recursive: true });
|
|
8422
|
-
const reqFile =
|
|
8205
|
+
const reqFile = path24.join(messagesDir, `${timestamp}_req.md`);
|
|
8423
8206
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
8424
8207
|
const reqUri = pathToFileUri2(reqFile);
|
|
8425
8208
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -8427,16 +8210,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8427
8210
|
chatArgs.push("-a", attachment);
|
|
8428
8211
|
}
|
|
8429
8212
|
chatArgs.push("-a", reqFile);
|
|
8430
|
-
chatArgs.push(`Follow instructions in [${
|
|
8213
|
+
chatArgs.push(`Follow instructions in [${path24.basename(reqFile)}](${reqUri})`);
|
|
8431
8214
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8432
8215
|
workspacePath,
|
|
8433
|
-
|
|
8216
|
+
path24.basename(subagentDir),
|
|
8434
8217
|
subagentDir,
|
|
8435
8218
|
vscodeCmd
|
|
8436
8219
|
);
|
|
8437
8220
|
if (!workspaceReady) {
|
|
8438
8221
|
throw new Error(
|
|
8439
|
-
`VS Code workspace '${
|
|
8222
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8440
8223
|
);
|
|
8441
8224
|
}
|
|
8442
8225
|
await sleep2(500);
|
|
@@ -8444,8 +8227,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8444
8227
|
await raceSpawnError(child);
|
|
8445
8228
|
}
|
|
8446
8229
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
8447
|
-
const workspacePath =
|
|
8448
|
-
const messagesDir =
|
|
8230
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
8231
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
8449
8232
|
await mkdir8(messagesDir, { recursive: true });
|
|
8450
8233
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
8451
8234
|
for (const attachment of attachmentPaths) {
|
|
@@ -8454,13 +8237,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8454
8237
|
chatArgs.push(chatInstruction);
|
|
8455
8238
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8456
8239
|
workspacePath,
|
|
8457
|
-
|
|
8240
|
+
path24.basename(subagentDir),
|
|
8458
8241
|
subagentDir,
|
|
8459
8242
|
vscodeCmd
|
|
8460
8243
|
);
|
|
8461
8244
|
if (!workspaceReady) {
|
|
8462
8245
|
throw new Error(
|
|
8463
|
-
`VS Code workspace '${
|
|
8246
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8464
8247
|
);
|
|
8465
8248
|
}
|
|
8466
8249
|
await sleep2(500);
|
|
@@ -8469,11 +8252,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8469
8252
|
}
|
|
8470
8253
|
|
|
8471
8254
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
8472
|
-
import { copyFile, mkdir as mkdir9, readFile as
|
|
8473
|
-
import
|
|
8255
|
+
import { copyFile, mkdir as mkdir9, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
8256
|
+
import path26 from "node:path";
|
|
8474
8257
|
|
|
8475
8258
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
8476
|
-
import
|
|
8259
|
+
import path25 from "node:path";
|
|
8477
8260
|
import JSON5 from "json5";
|
|
8478
8261
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
8479
8262
|
let workspace;
|
|
@@ -8490,10 +8273,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8490
8273
|
}
|
|
8491
8274
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
8492
8275
|
const folderPath = folder.path;
|
|
8493
|
-
if (
|
|
8276
|
+
if (path25.isAbsolute(folderPath)) {
|
|
8494
8277
|
return folder;
|
|
8495
8278
|
}
|
|
8496
|
-
const absolutePath =
|
|
8279
|
+
const absolutePath = path25.resolve(templateDir, folderPath);
|
|
8497
8280
|
return {
|
|
8498
8281
|
...folder,
|
|
8499
8282
|
path: absolutePath
|
|
@@ -8515,19 +8298,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8515
8298
|
if (locationMap && typeof locationMap === "object") {
|
|
8516
8299
|
const transformedMap = {};
|
|
8517
8300
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
8518
|
-
const isAbsolute =
|
|
8301
|
+
const isAbsolute = path25.isAbsolute(locationPath);
|
|
8519
8302
|
if (isAbsolute) {
|
|
8520
8303
|
transformedMap[locationPath] = value;
|
|
8521
8304
|
} else {
|
|
8522
8305
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
8523
8306
|
if (firstGlobIndex === -1) {
|
|
8524
|
-
const resolvedPath =
|
|
8307
|
+
const resolvedPath = path25.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
8525
8308
|
transformedMap[resolvedPath] = value;
|
|
8526
8309
|
} else {
|
|
8527
8310
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
8528
8311
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
8529
8312
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
8530
|
-
const resolvedPath = (
|
|
8313
|
+
const resolvedPath = (path25.resolve(templateDir, basePath) + patternPath).replace(
|
|
8531
8314
|
/\\/g,
|
|
8532
8315
|
"/"
|
|
8533
8316
|
);
|
|
@@ -8568,7 +8351,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8568
8351
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
8569
8352
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
8570
8353
|
for (const subagent of subagents) {
|
|
8571
|
-
const lockFile =
|
|
8354
|
+
const lockFile = path26.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
8572
8355
|
if (!await pathExists(lockFile)) {
|
|
8573
8356
|
return subagent.absolutePath;
|
|
8574
8357
|
}
|
|
@@ -8578,7 +8361,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8578
8361
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
8579
8362
|
let workspaceContent;
|
|
8580
8363
|
if (workspaceTemplate) {
|
|
8581
|
-
const workspaceSrc =
|
|
8364
|
+
const workspaceSrc = path26.resolve(workspaceTemplate);
|
|
8582
8365
|
if (!await pathExists(workspaceSrc)) {
|
|
8583
8366
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
8584
8367
|
}
|
|
@@ -8586,18 +8369,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8586
8369
|
if (!stats.isFile()) {
|
|
8587
8370
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
8588
8371
|
}
|
|
8589
|
-
const templateText = await
|
|
8372
|
+
const templateText = await readFile8(workspaceSrc, "utf8");
|
|
8590
8373
|
workspaceContent = JSON.parse(templateText);
|
|
8591
8374
|
} else {
|
|
8592
8375
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
8593
8376
|
}
|
|
8594
|
-
const workspaceName = `${
|
|
8595
|
-
const workspaceDst =
|
|
8596
|
-
const templateDir = workspaceTemplate ?
|
|
8377
|
+
const workspaceName = `${path26.basename(subagentDir)}.code-workspace`;
|
|
8378
|
+
const workspaceDst = path26.join(subagentDir, workspaceName);
|
|
8379
|
+
const templateDir = workspaceTemplate ? path26.dirname(path26.resolve(workspaceTemplate)) : subagentDir;
|
|
8597
8380
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
8598
8381
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
8599
8382
|
if (cwd) {
|
|
8600
|
-
const absCwd =
|
|
8383
|
+
const absCwd = path26.resolve(cwd);
|
|
8601
8384
|
const parsed = JSON.parse(transformedContent);
|
|
8602
8385
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
8603
8386
|
if (!alreadyPresent) {
|
|
@@ -8606,35 +8389,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8606
8389
|
}
|
|
8607
8390
|
}
|
|
8608
8391
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
8609
|
-
const messagesDir =
|
|
8392
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
8610
8393
|
await mkdir9(messagesDir, { recursive: true });
|
|
8611
8394
|
return { workspace: workspaceDst, messagesDir };
|
|
8612
8395
|
}
|
|
8613
8396
|
async function createSubagentLock(subagentDir) {
|
|
8614
|
-
const messagesDir =
|
|
8397
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
8615
8398
|
if (await pathExists(messagesDir)) {
|
|
8616
8399
|
const files = await readdir2(messagesDir);
|
|
8617
8400
|
await Promise.all(
|
|
8618
8401
|
files.map(async (file) => {
|
|
8619
|
-
const target =
|
|
8402
|
+
const target = path26.join(messagesDir, file);
|
|
8620
8403
|
await removeIfExists(target);
|
|
8621
8404
|
})
|
|
8622
8405
|
);
|
|
8623
8406
|
}
|
|
8624
|
-
const githubAgentsDir =
|
|
8407
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
8625
8408
|
if (await pathExists(githubAgentsDir)) {
|
|
8626
8409
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
8627
8410
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
8628
8411
|
await Promise.all(
|
|
8629
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
8412
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path26.join(githubAgentsDir, file)))
|
|
8630
8413
|
);
|
|
8631
8414
|
}
|
|
8632
|
-
const lockFile =
|
|
8415
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8633
8416
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
8634
8417
|
return lockFile;
|
|
8635
8418
|
}
|
|
8636
8419
|
async function removeSubagentLock(subagentDir) {
|
|
8637
|
-
const lockFile =
|
|
8420
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8638
8421
|
await removeIfExists(lockFile);
|
|
8639
8422
|
}
|
|
8640
8423
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -8654,9 +8437,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
8654
8437
|
return 1;
|
|
8655
8438
|
}
|
|
8656
8439
|
if (promptFile) {
|
|
8657
|
-
const githubAgentsDir =
|
|
8440
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
8658
8441
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
8659
|
-
const agentFile =
|
|
8442
|
+
const agentFile = path26.join(githubAgentsDir, `${chatId}.md`);
|
|
8660
8443
|
try {
|
|
8661
8444
|
await copyFile(promptFile, agentFile);
|
|
8662
8445
|
} catch (error) {
|
|
@@ -8675,7 +8458,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
8675
8458
|
if (!promptFile) {
|
|
8676
8459
|
return void 0;
|
|
8677
8460
|
}
|
|
8678
|
-
const resolvedPrompt =
|
|
8461
|
+
const resolvedPrompt = path27.resolve(promptFile);
|
|
8679
8462
|
if (!await pathExists(resolvedPrompt)) {
|
|
8680
8463
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
8681
8464
|
}
|
|
@@ -8691,7 +8474,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
8691
8474
|
}
|
|
8692
8475
|
const resolved = [];
|
|
8693
8476
|
for (const attachment of extraAttachments) {
|
|
8694
|
-
const resolvedPath =
|
|
8477
|
+
const resolvedPath = path27.resolve(attachment);
|
|
8695
8478
|
if (!await pathExists(resolvedPath)) {
|
|
8696
8479
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
8697
8480
|
}
|
|
@@ -8733,7 +8516,7 @@ async function dispatchAgentSession(options) {
|
|
|
8733
8516
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8734
8517
|
};
|
|
8735
8518
|
}
|
|
8736
|
-
const subagentName =
|
|
8519
|
+
const subagentName = path27.basename(subagentDir);
|
|
8737
8520
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8738
8521
|
const preparationResult = await prepareSubagentDirectory(
|
|
8739
8522
|
subagentDir,
|
|
@@ -8761,9 +8544,9 @@ async function dispatchAgentSession(options) {
|
|
|
8761
8544
|
};
|
|
8762
8545
|
}
|
|
8763
8546
|
const timestamp = generateTimestamp();
|
|
8764
|
-
const messagesDir =
|
|
8765
|
-
const responseFileTmp =
|
|
8766
|
-
const responseFileFinal =
|
|
8547
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8548
|
+
const responseFileTmp = path27.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
8549
|
+
const responseFileFinal = path27.join(messagesDir, `${timestamp}_res.md`);
|
|
8767
8550
|
const requestInstructions = createRequestPrompt(
|
|
8768
8551
|
userQuery,
|
|
8769
8552
|
responseFileTmp,
|
|
@@ -8868,7 +8651,7 @@ async function dispatchBatchAgent(options) {
|
|
|
8868
8651
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8869
8652
|
};
|
|
8870
8653
|
}
|
|
8871
|
-
subagentName =
|
|
8654
|
+
subagentName = path27.basename(subagentDir);
|
|
8872
8655
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8873
8656
|
const preparationResult = await prepareSubagentDirectory(
|
|
8874
8657
|
subagentDir,
|
|
@@ -8899,17 +8682,17 @@ async function dispatchBatchAgent(options) {
|
|
|
8899
8682
|
};
|
|
8900
8683
|
}
|
|
8901
8684
|
const timestamp = generateTimestamp();
|
|
8902
|
-
const messagesDir =
|
|
8685
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8903
8686
|
requestFiles = userQueries.map(
|
|
8904
|
-
(_, index) =>
|
|
8687
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
8905
8688
|
);
|
|
8906
8689
|
const responseTmpFiles = userQueries.map(
|
|
8907
|
-
(_, index) =>
|
|
8690
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
8908
8691
|
);
|
|
8909
8692
|
responseFilesFinal = userQueries.map(
|
|
8910
|
-
(_, index) =>
|
|
8693
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
8911
8694
|
);
|
|
8912
|
-
const orchestratorFile =
|
|
8695
|
+
const orchestratorFile = path27.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
8913
8696
|
if (!dryRun) {
|
|
8914
8697
|
await Promise.all(
|
|
8915
8698
|
userQueries.map((query, index) => {
|
|
@@ -8995,7 +8778,7 @@ async function dispatchBatchAgent(options) {
|
|
|
8995
8778
|
|
|
8996
8779
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
8997
8780
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
8998
|
-
import
|
|
8781
|
+
import path28 from "node:path";
|
|
8999
8782
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
9000
8783
|
folders: [
|
|
9001
8784
|
{
|
|
@@ -9026,7 +8809,7 @@ async function provisionSubagents(options) {
|
|
|
9026
8809
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
9027
8810
|
throw new Error("subagents must be a positive integer");
|
|
9028
8811
|
}
|
|
9029
|
-
const targetPath =
|
|
8812
|
+
const targetPath = path28.resolve(targetRoot);
|
|
9030
8813
|
if (!dryRun) {
|
|
9031
8814
|
await ensureDir(targetPath);
|
|
9032
8815
|
}
|
|
@@ -9046,7 +8829,7 @@ async function provisionSubagents(options) {
|
|
|
9046
8829
|
continue;
|
|
9047
8830
|
}
|
|
9048
8831
|
highestNumber = Math.max(highestNumber, parsed);
|
|
9049
|
-
const lockFile =
|
|
8832
|
+
const lockFile = path28.join(entry.absolutePath, lockName);
|
|
9050
8833
|
const locked = await pathExists(lockFile);
|
|
9051
8834
|
if (locked) {
|
|
9052
8835
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -9063,10 +8846,10 @@ async function provisionSubagents(options) {
|
|
|
9063
8846
|
break;
|
|
9064
8847
|
}
|
|
9065
8848
|
const subagentDir = subagent.absolutePath;
|
|
9066
|
-
const githubAgentsDir =
|
|
9067
|
-
const lockFile =
|
|
9068
|
-
const workspaceDst =
|
|
9069
|
-
const wakeupDst =
|
|
8849
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
8850
|
+
const lockFile = path28.join(subagentDir, lockName);
|
|
8851
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
8852
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
9070
8853
|
const isLocked = await pathExists(lockFile);
|
|
9071
8854
|
if (isLocked && !force) {
|
|
9072
8855
|
continue;
|
|
@@ -9104,10 +8887,10 @@ async function provisionSubagents(options) {
|
|
|
9104
8887
|
let nextIndex = highestNumber;
|
|
9105
8888
|
while (subagentsProvisioned < subagents) {
|
|
9106
8889
|
nextIndex += 1;
|
|
9107
|
-
const subagentDir =
|
|
9108
|
-
const githubAgentsDir =
|
|
9109
|
-
const workspaceDst =
|
|
9110
|
-
const wakeupDst =
|
|
8890
|
+
const subagentDir = path28.join(targetPath, `subagent-${nextIndex}`);
|
|
8891
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
8892
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
8893
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
9111
8894
|
if (!dryRun) {
|
|
9112
8895
|
await ensureDir(subagentDir);
|
|
9113
8896
|
await ensureDir(githubAgentsDir);
|
|
@@ -9174,7 +8957,7 @@ var VSCodeProvider = class {
|
|
|
9174
8957
|
}
|
|
9175
8958
|
await this.ensureEnvironmentReady();
|
|
9176
8959
|
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
9177
|
-
const promptContent = buildPromptDocument2(request, inputFiles
|
|
8960
|
+
const promptContent = buildPromptDocument2(request, inputFiles);
|
|
9178
8961
|
const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
|
|
9179
8962
|
const startTime = Date.now();
|
|
9180
8963
|
const session = await dispatchAgentSession({
|
|
@@ -9228,7 +9011,7 @@ var VSCodeProvider = class {
|
|
|
9228
9011
|
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
9229
9012
|
);
|
|
9230
9013
|
const userQueries = normalizedRequests.map(
|
|
9231
|
-
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles
|
|
9014
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
|
|
9232
9015
|
);
|
|
9233
9016
|
const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
|
|
9234
9017
|
this.config.workspaceTemplate
|
|
@@ -9297,7 +9080,7 @@ var VSCodeProvider = class {
|
|
|
9297
9080
|
async function locateVSCodeExecutable(candidate) {
|
|
9298
9081
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
9299
9082
|
if (includesPathSeparator) {
|
|
9300
|
-
const resolved =
|
|
9083
|
+
const resolved = path29.isAbsolute(candidate) ? candidate : path29.resolve(candidate);
|
|
9301
9084
|
try {
|
|
9302
9085
|
await access3(resolved, constants3.F_OK);
|
|
9303
9086
|
return resolved;
|
|
@@ -9326,41 +9109,35 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
9326
9109
|
return void 0;
|
|
9327
9110
|
}
|
|
9328
9111
|
try {
|
|
9329
|
-
const stats = await stat4(
|
|
9112
|
+
const stats = await stat4(path29.resolve(template));
|
|
9330
9113
|
return stats.isFile() ? template : void 0;
|
|
9331
9114
|
} catch {
|
|
9332
9115
|
return template;
|
|
9333
9116
|
}
|
|
9334
9117
|
}
|
|
9335
|
-
function buildPromptDocument2(request, attachments
|
|
9118
|
+
function buildPromptDocument2(request, attachments) {
|
|
9336
9119
|
const parts = [];
|
|
9337
9120
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
9338
9121
|
parts.push(request.systemPrompt.trim());
|
|
9339
9122
|
}
|
|
9340
|
-
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
9341
9123
|
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
9342
|
-
const
|
|
9343
|
-
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
9124
|
+
const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
|
|
9344
9125
|
if (prereadBlock.length > 0) {
|
|
9345
9126
|
parts.push("\n", prereadBlock);
|
|
9346
9127
|
}
|
|
9347
9128
|
parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
|
|
9348
9129
|
return parts.join("\n").trim();
|
|
9349
9130
|
}
|
|
9350
|
-
function buildMandatoryPrereadBlock2(
|
|
9351
|
-
if (
|
|
9131
|
+
function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
9132
|
+
if (attachmentFiles.length === 0) {
|
|
9352
9133
|
return "";
|
|
9353
9134
|
}
|
|
9354
9135
|
const buildList = (files) => files.map((absolutePath) => {
|
|
9355
|
-
const fileName =
|
|
9136
|
+
const fileName = path29.basename(absolutePath);
|
|
9356
9137
|
const fileUri = pathToFileUri3(absolutePath);
|
|
9357
9138
|
return `* [${fileName}](${fileUri})`;
|
|
9358
9139
|
});
|
|
9359
9140
|
const sections = [];
|
|
9360
|
-
if (guidelineFiles.length > 0) {
|
|
9361
|
-
sections.push(`Read all guideline files:
|
|
9362
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
9363
|
-
}
|
|
9364
9141
|
if (attachmentFiles.length > 0) {
|
|
9365
9142
|
sections.push(`Read all attachment files:
|
|
9366
9143
|
${buildList(attachmentFiles).join("\n")}.`);
|
|
@@ -9371,29 +9148,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
|
|
|
9371
9148
|
);
|
|
9372
9149
|
return sections.join("\n");
|
|
9373
9150
|
}
|
|
9374
|
-
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
9375
|
-
if (!attachments || attachments.length === 0) {
|
|
9376
|
-
return [];
|
|
9377
|
-
}
|
|
9378
|
-
const unique = /* @__PURE__ */ new Map();
|
|
9379
|
-
for (const attachment of attachments) {
|
|
9380
|
-
const absolutePath = path30.resolve(attachment);
|
|
9381
|
-
const normalized = absolutePath.split(path30.sep).join("/");
|
|
9382
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
9383
|
-
if (!unique.has(absolutePath)) {
|
|
9384
|
-
unique.set(absolutePath, absolutePath);
|
|
9385
|
-
}
|
|
9386
|
-
}
|
|
9387
|
-
}
|
|
9388
|
-
return Array.from(unique.values());
|
|
9389
|
-
}
|
|
9390
9151
|
function collectAttachmentFiles(attachments) {
|
|
9391
9152
|
if (!attachments || attachments.length === 0) {
|
|
9392
9153
|
return [];
|
|
9393
9154
|
}
|
|
9394
9155
|
const unique = /* @__PURE__ */ new Map();
|
|
9395
9156
|
for (const attachment of attachments) {
|
|
9396
|
-
const absolutePath =
|
|
9157
|
+
const absolutePath = path29.resolve(attachment);
|
|
9397
9158
|
if (!unique.has(absolutePath)) {
|
|
9398
9159
|
unique.set(absolutePath, absolutePath);
|
|
9399
9160
|
}
|
|
@@ -9401,7 +9162,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9401
9162
|
return Array.from(unique.values());
|
|
9402
9163
|
}
|
|
9403
9164
|
function pathToFileUri3(filePath) {
|
|
9404
|
-
const absolutePath =
|
|
9165
|
+
const absolutePath = path29.isAbsolute(filePath) ? filePath : path29.resolve(filePath);
|
|
9405
9166
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9406
9167
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9407
9168
|
return `file:///${normalizedPath}`;
|
|
@@ -9414,7 +9175,7 @@ function normalizeAttachments(attachments) {
|
|
|
9414
9175
|
}
|
|
9415
9176
|
const deduped = /* @__PURE__ */ new Set();
|
|
9416
9177
|
for (const attachment of attachments) {
|
|
9417
|
-
deduped.add(
|
|
9178
|
+
deduped.add(path29.resolve(attachment));
|
|
9418
9179
|
}
|
|
9419
9180
|
return Array.from(deduped);
|
|
9420
9181
|
}
|
|
@@ -9423,7 +9184,7 @@ function mergeAttachments(all) {
|
|
|
9423
9184
|
for (const list of all) {
|
|
9424
9185
|
if (!list) continue;
|
|
9425
9186
|
for (const inputFile of list) {
|
|
9426
|
-
deduped.add(
|
|
9187
|
+
deduped.add(path29.resolve(inputFile));
|
|
9427
9188
|
}
|
|
9428
9189
|
}
|
|
9429
9190
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -9471,8 +9232,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
9471
9232
|
|
|
9472
9233
|
// src/evaluation/providers/targets-file.ts
|
|
9473
9234
|
import { constants as constants4 } from "node:fs";
|
|
9474
|
-
import { access as access4, readFile as
|
|
9475
|
-
import
|
|
9235
|
+
import { access as access4, readFile as readFile9 } from "node:fs/promises";
|
|
9236
|
+
import path30 from "node:path";
|
|
9476
9237
|
import { parse as parse4 } from "yaml";
|
|
9477
9238
|
function isRecord(value) {
|
|
9478
9239
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -9509,11 +9270,11 @@ async function fileExists3(filePath) {
|
|
|
9509
9270
|
}
|
|
9510
9271
|
}
|
|
9511
9272
|
async function readTargetDefinitions(filePath) {
|
|
9512
|
-
const absolutePath =
|
|
9273
|
+
const absolutePath = path30.resolve(filePath);
|
|
9513
9274
|
if (!await fileExists3(absolutePath)) {
|
|
9514
9275
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
9515
9276
|
}
|
|
9516
|
-
const raw = await
|
|
9277
|
+
const raw = await readFile9(absolutePath, "utf8");
|
|
9517
9278
|
const parsed = parse4(raw);
|
|
9518
9279
|
if (!isRecord(parsed)) {
|
|
9519
9280
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -9529,16 +9290,16 @@ function listTargetNames(definitions) {
|
|
|
9529
9290
|
}
|
|
9530
9291
|
|
|
9531
9292
|
// src/evaluation/providers/provider-discovery.ts
|
|
9532
|
-
import
|
|
9293
|
+
import path31 from "node:path";
|
|
9533
9294
|
import fg from "fast-glob";
|
|
9534
9295
|
async function discoverProviders(registry, baseDir) {
|
|
9535
9296
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
9536
9297
|
const candidateDirs = [];
|
|
9537
|
-
let dir =
|
|
9538
|
-
const root =
|
|
9298
|
+
let dir = path31.resolve(baseDir);
|
|
9299
|
+
const root = path31.parse(dir).root;
|
|
9539
9300
|
while (dir !== root) {
|
|
9540
|
-
candidateDirs.push(
|
|
9541
|
-
dir =
|
|
9301
|
+
candidateDirs.push(path31.join(dir, ".agentv", "providers"));
|
|
9302
|
+
dir = path31.dirname(dir);
|
|
9542
9303
|
}
|
|
9543
9304
|
let files = [];
|
|
9544
9305
|
for (const providersDir of candidateDirs) {
|
|
@@ -9554,7 +9315,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9554
9315
|
}
|
|
9555
9316
|
const discoveredKinds = [];
|
|
9556
9317
|
for (const filePath of files) {
|
|
9557
|
-
const basename =
|
|
9318
|
+
const basename = path31.basename(filePath);
|
|
9558
9319
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
9559
9320
|
if (registry.has(kindName)) {
|
|
9560
9321
|
continue;
|
|
@@ -9763,15 +9524,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
9763
9524
|
});
|
|
9764
9525
|
}
|
|
9765
9526
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
9766
|
-
const { mkdir: mkdir15, readFile:
|
|
9527
|
+
const { mkdir: mkdir15, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
9767
9528
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
9768
|
-
const
|
|
9529
|
+
const path44 = await import("node:path");
|
|
9769
9530
|
const { randomUUID: randomUUID9 } = await import("node:crypto");
|
|
9770
|
-
const dir =
|
|
9531
|
+
const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
9771
9532
|
await mkdir15(dir, { recursive: true });
|
|
9772
|
-
const stdinPath =
|
|
9773
|
-
const stdoutPath =
|
|
9774
|
-
const stderrPath =
|
|
9533
|
+
const stdinPath = path44.join(dir, "stdin.txt");
|
|
9534
|
+
const stdoutPath = path44.join(dir, "stdout.txt");
|
|
9535
|
+
const stderrPath = path44.join(dir, "stderr.txt");
|
|
9775
9536
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
9776
9537
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
9777
9538
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -9801,8 +9562,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
9801
9562
|
resolve(code ?? 0);
|
|
9802
9563
|
});
|
|
9803
9564
|
});
|
|
9804
|
-
const stdout = (await
|
|
9805
|
-
const stderr = (await
|
|
9565
|
+
const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9566
|
+
const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9806
9567
|
return { stdout, stderr, exitCode };
|
|
9807
9568
|
} finally {
|
|
9808
9569
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -10119,10 +9880,7 @@ var CodeEvaluator = class {
|
|
|
10119
9880
|
outputText: context.candidate,
|
|
10120
9881
|
output: outputForPayload,
|
|
10121
9882
|
outputPath,
|
|
10122
|
-
|
|
10123
|
-
inputFiles: context.evalCase.file_paths.filter(
|
|
10124
|
-
(path45) => !context.evalCase.guideline_paths.includes(path45)
|
|
10125
|
-
),
|
|
9883
|
+
inputFiles: context.evalCase.file_paths,
|
|
10126
9884
|
input: context.evalCase.input,
|
|
10127
9885
|
trace: context.trace ?? null,
|
|
10128
9886
|
tokenUsage: context.tokenUsage ?? null,
|
|
@@ -10253,7 +10011,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
10253
10011
|
|
|
10254
10012
|
// src/evaluation/evaluators/llm-grader.ts
|
|
10255
10013
|
import fs2 from "node:fs/promises";
|
|
10256
|
-
import
|
|
10014
|
+
import path32 from "node:path";
|
|
10257
10015
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
10258
10016
|
import { z as z3 } from "zod";
|
|
10259
10017
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -11085,8 +10843,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11085
10843
|
};
|
|
11086
10844
|
}
|
|
11087
10845
|
function resolveSandboxed(basePath, relativePath) {
|
|
11088
|
-
const resolved =
|
|
11089
|
-
if (!resolved.startsWith(basePath +
|
|
10846
|
+
const resolved = path32.resolve(basePath, relativePath);
|
|
10847
|
+
if (!resolved.startsWith(basePath + path32.sep) && resolved !== basePath) {
|
|
11090
10848
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
11091
10849
|
}
|
|
11092
10850
|
return resolved;
|
|
@@ -11176,11 +10934,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11176
10934
|
for (const entry of entries) {
|
|
11177
10935
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
11178
10936
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
11179
|
-
const fullPath =
|
|
10937
|
+
const fullPath = path32.join(dirPath, entry.name);
|
|
11180
10938
|
if (entry.isDirectory()) {
|
|
11181
10939
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
11182
10940
|
} else if (entry.isFile()) {
|
|
11183
|
-
const ext =
|
|
10941
|
+
const ext = path32.extname(entry.name).toLowerCase();
|
|
11184
10942
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
11185
10943
|
try {
|
|
11186
10944
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -11192,7 +10950,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11192
10950
|
regex.lastIndex = 0;
|
|
11193
10951
|
if (regex.test(lines[i])) {
|
|
11194
10952
|
matches.push({
|
|
11195
|
-
file:
|
|
10953
|
+
file: path32.relative(workspacePath, fullPath),
|
|
11196
10954
|
line: i + 1,
|
|
11197
10955
|
text: lines[i].substring(0, 200)
|
|
11198
10956
|
});
|
|
@@ -11827,115 +11585,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
11827
11585
|
* Evaluate a single field against the expected value.
|
|
11828
11586
|
*/
|
|
11829
11587
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
11830
|
-
const { path:
|
|
11831
|
-
const candidateValue = resolvePath(candidateData,
|
|
11832
|
-
const expectedValue = resolvePath(expectedData,
|
|
11588
|
+
const { path: path44, match, required = true, weight = 1 } = fieldConfig;
|
|
11589
|
+
const candidateValue = resolvePath(candidateData, path44);
|
|
11590
|
+
const expectedValue = resolvePath(expectedData, path44);
|
|
11833
11591
|
if (expectedValue === void 0) {
|
|
11834
11592
|
return {
|
|
11835
|
-
path:
|
|
11593
|
+
path: path44,
|
|
11836
11594
|
score: 1,
|
|
11837
11595
|
// No expected value means no comparison needed
|
|
11838
11596
|
weight,
|
|
11839
11597
|
hit: true,
|
|
11840
|
-
message: `${
|
|
11598
|
+
message: `${path44}: no expected value`
|
|
11841
11599
|
};
|
|
11842
11600
|
}
|
|
11843
11601
|
if (candidateValue === void 0) {
|
|
11844
11602
|
if (required) {
|
|
11845
11603
|
return {
|
|
11846
|
-
path:
|
|
11604
|
+
path: path44,
|
|
11847
11605
|
score: 0,
|
|
11848
11606
|
weight,
|
|
11849
11607
|
hit: false,
|
|
11850
|
-
message: `${
|
|
11608
|
+
message: `${path44} (required, missing)`
|
|
11851
11609
|
};
|
|
11852
11610
|
}
|
|
11853
11611
|
return {
|
|
11854
|
-
path:
|
|
11612
|
+
path: path44,
|
|
11855
11613
|
score: 1,
|
|
11856
11614
|
// Don't penalize missing optional fields
|
|
11857
11615
|
weight: 0,
|
|
11858
11616
|
// Zero weight means it won't affect the score
|
|
11859
11617
|
hit: true,
|
|
11860
|
-
message: `${
|
|
11618
|
+
message: `${path44}: optional field missing`
|
|
11861
11619
|
};
|
|
11862
11620
|
}
|
|
11863
11621
|
switch (match) {
|
|
11864
11622
|
case "exact":
|
|
11865
|
-
return this.compareExact(
|
|
11623
|
+
return this.compareExact(path44, candidateValue, expectedValue, weight);
|
|
11866
11624
|
case "numeric_tolerance":
|
|
11867
11625
|
return this.compareNumericTolerance(
|
|
11868
|
-
|
|
11626
|
+
path44,
|
|
11869
11627
|
candidateValue,
|
|
11870
11628
|
expectedValue,
|
|
11871
11629
|
fieldConfig,
|
|
11872
11630
|
weight
|
|
11873
11631
|
);
|
|
11874
11632
|
case "date":
|
|
11875
|
-
return this.compareDate(
|
|
11633
|
+
return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
|
|
11876
11634
|
default:
|
|
11877
11635
|
return {
|
|
11878
|
-
path:
|
|
11636
|
+
path: path44,
|
|
11879
11637
|
score: 0,
|
|
11880
11638
|
weight,
|
|
11881
11639
|
hit: false,
|
|
11882
|
-
message: `${
|
|
11640
|
+
message: `${path44}: unknown match type "${match}"`
|
|
11883
11641
|
};
|
|
11884
11642
|
}
|
|
11885
11643
|
}
|
|
11886
11644
|
/**
|
|
11887
11645
|
* Exact equality comparison.
|
|
11888
11646
|
*/
|
|
11889
|
-
compareExact(
|
|
11647
|
+
compareExact(path44, candidateValue, expectedValue, weight) {
|
|
11890
11648
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
11891
11649
|
return {
|
|
11892
|
-
path:
|
|
11650
|
+
path: path44,
|
|
11893
11651
|
score: 1,
|
|
11894
11652
|
weight,
|
|
11895
11653
|
hit: true,
|
|
11896
|
-
message:
|
|
11654
|
+
message: path44
|
|
11897
11655
|
};
|
|
11898
11656
|
}
|
|
11899
11657
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
11900
11658
|
return {
|
|
11901
|
-
path:
|
|
11659
|
+
path: path44,
|
|
11902
11660
|
score: 0,
|
|
11903
11661
|
weight,
|
|
11904
11662
|
hit: false,
|
|
11905
|
-
message: `${
|
|
11663
|
+
message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
11906
11664
|
};
|
|
11907
11665
|
}
|
|
11908
11666
|
return {
|
|
11909
|
-
path:
|
|
11667
|
+
path: path44,
|
|
11910
11668
|
score: 0,
|
|
11911
11669
|
weight,
|
|
11912
11670
|
hit: false,
|
|
11913
|
-
message: `${
|
|
11671
|
+
message: `${path44} (value mismatch)`
|
|
11914
11672
|
};
|
|
11915
11673
|
}
|
|
11916
11674
|
/**
|
|
11917
11675
|
* Numeric comparison with absolute or relative tolerance.
|
|
11918
11676
|
*/
|
|
11919
|
-
compareNumericTolerance(
|
|
11677
|
+
compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
11920
11678
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
11921
11679
|
const candidateNum = toNumber(candidateValue);
|
|
11922
11680
|
const expectedNum = toNumber(expectedValue);
|
|
11923
11681
|
if (candidateNum === null || expectedNum === null) {
|
|
11924
11682
|
return {
|
|
11925
|
-
path:
|
|
11683
|
+
path: path44,
|
|
11926
11684
|
score: 0,
|
|
11927
11685
|
weight,
|
|
11928
11686
|
hit: false,
|
|
11929
|
-
message: `${
|
|
11687
|
+
message: `${path44} (non-numeric value)`
|
|
11930
11688
|
};
|
|
11931
11689
|
}
|
|
11932
11690
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
11933
11691
|
return {
|
|
11934
|
-
path:
|
|
11692
|
+
path: path44,
|
|
11935
11693
|
score: 0,
|
|
11936
11694
|
weight,
|
|
11937
11695
|
hit: false,
|
|
11938
|
-
message: `${
|
|
11696
|
+
message: `${path44} (invalid numeric value)`
|
|
11939
11697
|
};
|
|
11940
11698
|
}
|
|
11941
11699
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -11948,61 +11706,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
11948
11706
|
}
|
|
11949
11707
|
if (withinTolerance) {
|
|
11950
11708
|
return {
|
|
11951
|
-
path:
|
|
11709
|
+
path: path44,
|
|
11952
11710
|
score: 1,
|
|
11953
11711
|
weight,
|
|
11954
11712
|
hit: true,
|
|
11955
|
-
message: `${
|
|
11713
|
+
message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
|
|
11956
11714
|
};
|
|
11957
11715
|
}
|
|
11958
11716
|
return {
|
|
11959
|
-
path:
|
|
11717
|
+
path: path44,
|
|
11960
11718
|
score: 0,
|
|
11961
11719
|
weight,
|
|
11962
11720
|
hit: false,
|
|
11963
|
-
message: `${
|
|
11721
|
+
message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
11964
11722
|
};
|
|
11965
11723
|
}
|
|
11966
11724
|
/**
|
|
11967
11725
|
* Date comparison with format normalization.
|
|
11968
11726
|
*/
|
|
11969
|
-
compareDate(
|
|
11727
|
+
compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
11970
11728
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
11971
11729
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
11972
11730
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
11973
11731
|
if (candidateDate === null) {
|
|
11974
11732
|
return {
|
|
11975
|
-
path:
|
|
11733
|
+
path: path44,
|
|
11976
11734
|
score: 0,
|
|
11977
11735
|
weight,
|
|
11978
11736
|
hit: false,
|
|
11979
|
-
message: `${
|
|
11737
|
+
message: `${path44} (unparseable candidate date)`
|
|
11980
11738
|
};
|
|
11981
11739
|
}
|
|
11982
11740
|
if (expectedDate === null) {
|
|
11983
11741
|
return {
|
|
11984
|
-
path:
|
|
11742
|
+
path: path44,
|
|
11985
11743
|
score: 0,
|
|
11986
11744
|
weight,
|
|
11987
11745
|
hit: false,
|
|
11988
|
-
message: `${
|
|
11746
|
+
message: `${path44} (unparseable expected date)`
|
|
11989
11747
|
};
|
|
11990
11748
|
}
|
|
11991
11749
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
11992
11750
|
return {
|
|
11993
|
-
path:
|
|
11751
|
+
path: path44,
|
|
11994
11752
|
score: 1,
|
|
11995
11753
|
weight,
|
|
11996
11754
|
hit: true,
|
|
11997
|
-
message:
|
|
11755
|
+
message: path44
|
|
11998
11756
|
};
|
|
11999
11757
|
}
|
|
12000
11758
|
return {
|
|
12001
|
-
path:
|
|
11759
|
+
path: path44,
|
|
12002
11760
|
score: 0,
|
|
12003
11761
|
weight,
|
|
12004
11762
|
hit: false,
|
|
12005
|
-
message: `${
|
|
11763
|
+
message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12006
11764
|
};
|
|
12007
11765
|
}
|
|
12008
11766
|
/**
|
|
@@ -12035,11 +11793,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12035
11793
|
};
|
|
12036
11794
|
}
|
|
12037
11795
|
};
|
|
12038
|
-
function resolvePath(obj,
|
|
12039
|
-
if (!
|
|
11796
|
+
function resolvePath(obj, path44) {
|
|
11797
|
+
if (!path44 || !obj) {
|
|
12040
11798
|
return void 0;
|
|
12041
11799
|
}
|
|
12042
|
-
const parts =
|
|
11800
|
+
const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12043
11801
|
let current = obj;
|
|
12044
11802
|
for (const part of parts) {
|
|
12045
11803
|
if (current === null || current === void 0) {
|
|
@@ -12499,8 +12257,8 @@ var TokenUsageEvaluator = class {
|
|
|
12499
12257
|
};
|
|
12500
12258
|
|
|
12501
12259
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12502
|
-
function getNestedValue(obj,
|
|
12503
|
-
const parts =
|
|
12260
|
+
function getNestedValue(obj, path44) {
|
|
12261
|
+
const parts = path44.split(".");
|
|
12504
12262
|
let current = obj;
|
|
12505
12263
|
for (const part of parts) {
|
|
12506
12264
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -13122,8 +12880,8 @@ function runEqualsAssertion(output, value) {
|
|
|
13122
12880
|
// src/evaluation/orchestrator.ts
|
|
13123
12881
|
import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
|
|
13124
12882
|
import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
13125
|
-
import
|
|
13126
|
-
import
|
|
12883
|
+
import path41 from "node:path";
|
|
12884
|
+
import micromatch3 from "micromatch";
|
|
13127
12885
|
|
|
13128
12886
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
13129
12887
|
var Node = class {
|
|
@@ -13336,7 +13094,7 @@ var InlineAssertEvaluator = class {
|
|
|
13336
13094
|
};
|
|
13337
13095
|
|
|
13338
13096
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
13339
|
-
import
|
|
13097
|
+
import path33 from "node:path";
|
|
13340
13098
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
13341
13099
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
13342
13100
|
if (!context) {
|
|
@@ -13371,10 +13129,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13371
13129
|
expectedOutput: context.evalCase.expected_output,
|
|
13372
13130
|
outputText: context.candidate,
|
|
13373
13131
|
output: context.output ?? null,
|
|
13374
|
-
|
|
13375
|
-
inputFiles: context.evalCase.file_paths.filter(
|
|
13376
|
-
(p) => !context.evalCase.guideline_paths.includes(p)
|
|
13377
|
-
),
|
|
13132
|
+
inputFiles: context.evalCase.file_paths,
|
|
13378
13133
|
input: context.evalCase.input,
|
|
13379
13134
|
trace: context.trace ?? null,
|
|
13380
13135
|
fileChanges: context.fileChanges ?? null,
|
|
@@ -13385,7 +13140,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13385
13140
|
};
|
|
13386
13141
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13387
13142
|
const scriptPath = script[script.length - 1];
|
|
13388
|
-
const cwd =
|
|
13143
|
+
const cwd = path33.dirname(scriptPath);
|
|
13389
13144
|
try {
|
|
13390
13145
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
13391
13146
|
const prompt = stdout.trim();
|
|
@@ -13657,16 +13412,16 @@ function createBuiltinRegistry() {
|
|
|
13657
13412
|
}
|
|
13658
13413
|
|
|
13659
13414
|
// src/evaluation/registry/assertion-discovery.ts
|
|
13660
|
-
import
|
|
13415
|
+
import path34 from "node:path";
|
|
13661
13416
|
import fg2 from "fast-glob";
|
|
13662
13417
|
async function discoverAssertions(registry, baseDir) {
|
|
13663
13418
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13664
13419
|
const candidateDirs = [];
|
|
13665
|
-
let dir =
|
|
13666
|
-
const root =
|
|
13420
|
+
let dir = path34.resolve(baseDir);
|
|
13421
|
+
const root = path34.parse(dir).root;
|
|
13667
13422
|
while (dir !== root) {
|
|
13668
|
-
candidateDirs.push(
|
|
13669
|
-
dir =
|
|
13423
|
+
candidateDirs.push(path34.join(dir, ".agentv", "assertions"));
|
|
13424
|
+
dir = path34.dirname(dir);
|
|
13670
13425
|
}
|
|
13671
13426
|
let files = [];
|
|
13672
13427
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -13682,7 +13437,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13682
13437
|
}
|
|
13683
13438
|
const discoveredTypes = [];
|
|
13684
13439
|
for (const filePath of files) {
|
|
13685
|
-
const basename =
|
|
13440
|
+
const basename = path34.basename(filePath);
|
|
13686
13441
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13687
13442
|
if (registry.has(typeName)) {
|
|
13688
13443
|
continue;
|
|
@@ -13700,17 +13455,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13700
13455
|
}
|
|
13701
13456
|
|
|
13702
13457
|
// src/evaluation/registry/grader-discovery.ts
|
|
13703
|
-
import
|
|
13458
|
+
import path35 from "node:path";
|
|
13704
13459
|
import fg3 from "fast-glob";
|
|
13705
13460
|
async function discoverGraders(registry, baseDir) {
|
|
13706
13461
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13707
13462
|
const candidateDirs = [];
|
|
13708
|
-
let dir =
|
|
13709
|
-
const root =
|
|
13463
|
+
let dir = path35.resolve(baseDir);
|
|
13464
|
+
const root = path35.parse(dir).root;
|
|
13710
13465
|
while (dir !== root) {
|
|
13711
|
-
candidateDirs.push(
|
|
13712
|
-
candidateDirs.push(
|
|
13713
|
-
dir =
|
|
13466
|
+
candidateDirs.push(path35.join(dir, ".agentv", "graders"));
|
|
13467
|
+
candidateDirs.push(path35.join(dir, ".agentv", "judges"));
|
|
13468
|
+
dir = path35.dirname(dir);
|
|
13714
13469
|
}
|
|
13715
13470
|
let files = [];
|
|
13716
13471
|
for (const gradersDir of candidateDirs) {
|
|
@@ -13726,7 +13481,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
13726
13481
|
}
|
|
13727
13482
|
const discoveredTypes = [];
|
|
13728
13483
|
for (const filePath of files) {
|
|
13729
|
-
const basename =
|
|
13484
|
+
const basename = path35.basename(filePath);
|
|
13730
13485
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13731
13486
|
if (registry.has(typeName)) {
|
|
13732
13487
|
continue;
|
|
@@ -13886,7 +13641,7 @@ function getTCritical(df) {
|
|
|
13886
13641
|
// src/evaluation/workspace/file-changes.ts
|
|
13887
13642
|
import { exec as execCallback } from "node:child_process";
|
|
13888
13643
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
13889
|
-
import
|
|
13644
|
+
import path36 from "node:path";
|
|
13890
13645
|
import { promisify as promisify4 } from "node:util";
|
|
13891
13646
|
var execAsync4 = promisify4(execCallback);
|
|
13892
13647
|
function gitExecOpts(workspacePath) {
|
|
@@ -13920,10 +13675,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13920
13675
|
}
|
|
13921
13676
|
for (const entry of entries) {
|
|
13922
13677
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
13923
|
-
const childPath =
|
|
13678
|
+
const childPath = path36.join(workspacePath, entry);
|
|
13924
13679
|
try {
|
|
13925
13680
|
if (!statSync(childPath).isDirectory()) continue;
|
|
13926
|
-
if (!statSync(
|
|
13681
|
+
if (!statSync(path36.join(childPath, ".git")).isDirectory()) continue;
|
|
13927
13682
|
} catch {
|
|
13928
13683
|
continue;
|
|
13929
13684
|
}
|
|
@@ -13934,7 +13689,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13934
13689
|
|
|
13935
13690
|
// src/evaluation/workspace/manager.ts
|
|
13936
13691
|
import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
13937
|
-
import
|
|
13692
|
+
import path37 from "node:path";
|
|
13938
13693
|
var TemplateNotFoundError = class extends Error {
|
|
13939
13694
|
constructor(templatePath) {
|
|
13940
13695
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -13964,14 +13719,14 @@ async function isDirectory(filePath) {
|
|
|
13964
13719
|
}
|
|
13965
13720
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
13966
13721
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
13967
|
-
return
|
|
13722
|
+
return path37.join(root, evalRunId, caseId);
|
|
13968
13723
|
}
|
|
13969
13724
|
async function copyDirectoryRecursive(src, dest) {
|
|
13970
13725
|
await mkdir11(dest, { recursive: true });
|
|
13971
13726
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
13972
13727
|
for (const entry of entries) {
|
|
13973
|
-
const srcPath =
|
|
13974
|
-
const destPath =
|
|
13728
|
+
const srcPath = path37.join(src, entry.name);
|
|
13729
|
+
const destPath = path37.join(dest, entry.name);
|
|
13975
13730
|
if (entry.name === ".git") {
|
|
13976
13731
|
continue;
|
|
13977
13732
|
}
|
|
@@ -13983,7 +13738,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
13983
13738
|
}
|
|
13984
13739
|
}
|
|
13985
13740
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
13986
|
-
const resolvedTemplatePath =
|
|
13741
|
+
const resolvedTemplatePath = path37.resolve(templatePath);
|
|
13987
13742
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
13988
13743
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
13989
13744
|
}
|
|
@@ -14032,7 +13787,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
14032
13787
|
}
|
|
14033
13788
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
14034
13789
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14035
|
-
const evalDir =
|
|
13790
|
+
const evalDir = path37.join(root, evalRunId);
|
|
14036
13791
|
if (await fileExists(evalDir)) {
|
|
14037
13792
|
await rm4(evalDir, { recursive: true, force: true });
|
|
14038
13793
|
}
|
|
@@ -14042,8 +13797,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
14042
13797
|
import { execFile } from "node:child_process";
|
|
14043
13798
|
import { createHash } from "node:crypto";
|
|
14044
13799
|
import { existsSync as existsSync2 } from "node:fs";
|
|
14045
|
-
import { cp as cp2, mkdir as mkdir12, readFile as
|
|
14046
|
-
import
|
|
13800
|
+
import { cp as cp2, mkdir as mkdir12, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
13801
|
+
import path38 from "node:path";
|
|
14047
13802
|
import { promisify as promisify5 } from "node:util";
|
|
14048
13803
|
var execFileAsync = promisify5(execFile);
|
|
14049
13804
|
function gitEnv() {
|
|
@@ -14097,8 +13852,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
14097
13852
|
await mkdir12(dest, { recursive: true });
|
|
14098
13853
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
14099
13854
|
for (const entry of entries) {
|
|
14100
|
-
const srcPath =
|
|
14101
|
-
const destPath =
|
|
13855
|
+
const srcPath = path38.join(src, entry.name);
|
|
13856
|
+
const destPath = path38.join(dest, entry.name);
|
|
14102
13857
|
if (entry.name === ".git") {
|
|
14103
13858
|
continue;
|
|
14104
13859
|
}
|
|
@@ -14131,7 +13886,7 @@ var WorkspacePoolManager = class {
|
|
|
14131
13886
|
async acquireWorkspace(options) {
|
|
14132
13887
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
14133
13888
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
14134
|
-
const poolDir =
|
|
13889
|
+
const poolDir = path38.join(this.poolRoot, fingerprint);
|
|
14135
13890
|
await mkdir12(poolDir, { recursive: true });
|
|
14136
13891
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
14137
13892
|
if (drifted) {
|
|
@@ -14141,7 +13896,7 @@ var WorkspacePoolManager = class {
|
|
|
14141
13896
|
await this.removeAllSlots(poolDir);
|
|
14142
13897
|
}
|
|
14143
13898
|
for (let i = 0; i < maxSlots; i++) {
|
|
14144
|
-
const slotPath =
|
|
13899
|
+
const slotPath = path38.join(poolDir, `slot-${i}`);
|
|
14145
13900
|
const lockPath = `${slotPath}.lock`;
|
|
14146
13901
|
const locked = await this.tryLock(lockPath);
|
|
14147
13902
|
if (!locked) {
|
|
@@ -14203,7 +13958,7 @@ var WorkspacePoolManager = class {
|
|
|
14203
13958
|
throw err;
|
|
14204
13959
|
}
|
|
14205
13960
|
try {
|
|
14206
|
-
const pidStr = await
|
|
13961
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
14207
13962
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14208
13963
|
if (!Number.isNaN(pid)) {
|
|
14209
13964
|
try {
|
|
@@ -14228,9 +13983,9 @@ var WorkspacePoolManager = class {
|
|
|
14228
13983
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
14229
13984
|
*/
|
|
14230
13985
|
async checkDrift(poolDir, fingerprint) {
|
|
14231
|
-
const metadataPath =
|
|
13986
|
+
const metadataPath = path38.join(poolDir, "metadata.json");
|
|
14232
13987
|
try {
|
|
14233
|
-
const raw = await
|
|
13988
|
+
const raw = await readFile10(metadataPath, "utf-8");
|
|
14234
13989
|
const metadata = JSON.parse(raw);
|
|
14235
13990
|
return metadata.fingerprint !== fingerprint;
|
|
14236
13991
|
} catch {
|
|
@@ -14245,17 +14000,17 @@ var WorkspacePoolManager = class {
|
|
|
14245
14000
|
repos,
|
|
14246
14001
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14247
14002
|
};
|
|
14248
|
-
await writeFile7(
|
|
14003
|
+
await writeFile7(path38.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
14249
14004
|
}
|
|
14250
14005
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
14251
14006
|
async removeAllSlots(poolDir) {
|
|
14252
14007
|
const entries = await readdir4(poolDir);
|
|
14253
14008
|
for (const entry of entries) {
|
|
14254
14009
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
14255
|
-
const lockPath =
|
|
14010
|
+
const lockPath = path38.join(poolDir, `${entry}.lock`);
|
|
14256
14011
|
if (existsSync2(lockPath)) {
|
|
14257
14012
|
try {
|
|
14258
|
-
const pidStr = await
|
|
14013
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
14259
14014
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14260
14015
|
if (!Number.isNaN(pid)) {
|
|
14261
14016
|
try {
|
|
@@ -14268,12 +14023,12 @@ var WorkspacePoolManager = class {
|
|
|
14268
14023
|
} catch {
|
|
14269
14024
|
}
|
|
14270
14025
|
}
|
|
14271
|
-
await rm5(
|
|
14026
|
+
await rm5(path38.join(poolDir, entry), { recursive: true, force: true });
|
|
14272
14027
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
14273
14028
|
});
|
|
14274
14029
|
}
|
|
14275
14030
|
}
|
|
14276
|
-
await rm5(
|
|
14031
|
+
await rm5(path38.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
14277
14032
|
});
|
|
14278
14033
|
}
|
|
14279
14034
|
/**
|
|
@@ -14283,7 +14038,7 @@ var WorkspacePoolManager = class {
|
|
|
14283
14038
|
*/
|
|
14284
14039
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
14285
14040
|
for (const repo of repos) {
|
|
14286
|
-
const repoDir =
|
|
14041
|
+
const repoDir = path38.join(slotPath, repo.path);
|
|
14287
14042
|
if (!existsSync2(repoDir)) {
|
|
14288
14043
|
continue;
|
|
14289
14044
|
}
|
|
@@ -14310,7 +14065,7 @@ var WorkspacePoolManager = class {
|
|
|
14310
14065
|
// src/evaluation/workspace/repo-manager.ts
|
|
14311
14066
|
import { execFile as execFile2 } from "node:child_process";
|
|
14312
14067
|
import { existsSync as existsSync3 } from "node:fs";
|
|
14313
|
-
import
|
|
14068
|
+
import path39 from "node:path";
|
|
14314
14069
|
import { promisify as promisify6 } from "node:util";
|
|
14315
14070
|
var execFileAsync2 = promisify6(execFile2);
|
|
14316
14071
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -14410,7 +14165,7 @@ ${lines.join("\n")}`;
|
|
|
14410
14165
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
14411
14166
|
*/
|
|
14412
14167
|
async materialize(repo, workspacePath) {
|
|
14413
|
-
const targetDir =
|
|
14168
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
14414
14169
|
const sourceUrl = getSourceUrl(repo.source);
|
|
14415
14170
|
const startedAt = Date.now();
|
|
14416
14171
|
if (this.verbose) {
|
|
@@ -14501,7 +14256,7 @@ ${lines.join("\n")}`;
|
|
|
14501
14256
|
async reset(repos, workspacePath, reset) {
|
|
14502
14257
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
14503
14258
|
for (const repo of repos) {
|
|
14504
|
-
const targetDir =
|
|
14259
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
14505
14260
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
14506
14261
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
14507
14262
|
}
|
|
@@ -14510,16 +14265,16 @@ ${lines.join("\n")}`;
|
|
|
14510
14265
|
|
|
14511
14266
|
// src/evaluation/workspace/resolve.ts
|
|
14512
14267
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
14513
|
-
import
|
|
14268
|
+
import path40 from "node:path";
|
|
14514
14269
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
14515
14270
|
if (!templatePath) {
|
|
14516
14271
|
return void 0;
|
|
14517
14272
|
}
|
|
14518
|
-
const resolved =
|
|
14273
|
+
const resolved = path40.resolve(templatePath);
|
|
14519
14274
|
const stats = await stat6(resolved);
|
|
14520
14275
|
if (stats.isFile()) {
|
|
14521
14276
|
return {
|
|
14522
|
-
dir:
|
|
14277
|
+
dir: path40.dirname(resolved),
|
|
14523
14278
|
workspaceFile: resolved
|
|
14524
14279
|
};
|
|
14525
14280
|
}
|
|
@@ -14531,14 +14286,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
14531
14286
|
if (workspaceFiles.length === 1) {
|
|
14532
14287
|
return {
|
|
14533
14288
|
dir: resolved,
|
|
14534
|
-
workspaceFile:
|
|
14289
|
+
workspaceFile: path40.join(resolved, workspaceFiles[0])
|
|
14535
14290
|
};
|
|
14536
14291
|
}
|
|
14537
14292
|
if (workspaceFiles.length > 1) {
|
|
14538
14293
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
14539
14294
|
return {
|
|
14540
14295
|
dir: resolved,
|
|
14541
|
-
workspaceFile: conventionFile ?
|
|
14296
|
+
workspaceFile: conventionFile ? path40.join(resolved, conventionFile) : void 0
|
|
14542
14297
|
};
|
|
14543
14298
|
}
|
|
14544
14299
|
return { dir: resolved };
|
|
@@ -14742,7 +14497,7 @@ async function runEvaluation(options) {
|
|
|
14742
14497
|
];
|
|
14743
14498
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
14744
14499
|
const typeRegistry = createBuiltinRegistry();
|
|
14745
|
-
const discoveryBaseDir = evalFilePath ?
|
|
14500
|
+
const discoveryBaseDir = evalFilePath ? path41.dirname(path41.resolve(evalFilePath)) : process.cwd();
|
|
14746
14501
|
const evalDir = discoveryBaseDir;
|
|
14747
14502
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
14748
14503
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -14931,7 +14686,7 @@ async function runEvaluation(options) {
|
|
|
14931
14686
|
}
|
|
14932
14687
|
try {
|
|
14933
14688
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
14934
|
-
const copiedWorkspaceFile =
|
|
14689
|
+
const copiedWorkspaceFile = path41.join(sharedWorkspacePath, path41.basename(suiteWorkspaceFile));
|
|
14935
14690
|
try {
|
|
14936
14691
|
await stat7(copiedWorkspaceFile);
|
|
14937
14692
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -15041,7 +14796,7 @@ async function runEvaluation(options) {
|
|
|
15041
14796
|
const budgetResult = {
|
|
15042
14797
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15043
14798
|
testId: evalCase.id,
|
|
15044
|
-
|
|
14799
|
+
eval_set: evalCase.eval_set,
|
|
15045
14800
|
score: 0,
|
|
15046
14801
|
assertions: [],
|
|
15047
14802
|
output: [],
|
|
@@ -15077,7 +14832,7 @@ async function runEvaluation(options) {
|
|
|
15077
14832
|
const haltResult = {
|
|
15078
14833
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15079
14834
|
testId: evalCase.id,
|
|
15080
|
-
|
|
14835
|
+
eval_set: evalCase.eval_set,
|
|
15081
14836
|
score: 0,
|
|
15082
14837
|
assertions: [],
|
|
15083
14838
|
output: [],
|
|
@@ -15309,8 +15064,6 @@ async function runBatchEvaluation(options) {
|
|
|
15309
15064
|
const promptInputs = promptInputsList[index];
|
|
15310
15065
|
return {
|
|
15311
15066
|
question: promptInputs.question,
|
|
15312
|
-
guidelines: promptInputs.guidelines,
|
|
15313
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
15314
15067
|
inputFiles: evalCase.file_paths,
|
|
15315
15068
|
evalCaseId: evalCase.id,
|
|
15316
15069
|
metadata: {
|
|
@@ -15508,7 +15261,7 @@ async function runEvalCase(options) {
|
|
|
15508
15261
|
);
|
|
15509
15262
|
}
|
|
15510
15263
|
if (caseWorkspaceFile && workspacePath) {
|
|
15511
|
-
const copiedFile =
|
|
15264
|
+
const copiedFile = path41.join(workspacePath, path41.basename(caseWorkspaceFile));
|
|
15512
15265
|
try {
|
|
15513
15266
|
await stat7(copiedFile);
|
|
15514
15267
|
caseWorkspaceFile = copiedFile;
|
|
@@ -15568,10 +15321,10 @@ async function runEvalCase(options) {
|
|
|
15568
15321
|
const files = evalCase.metadata.agent_skills_files;
|
|
15569
15322
|
if (baseDir && files.length > 0) {
|
|
15570
15323
|
for (const relPath of files) {
|
|
15571
|
-
const srcPath =
|
|
15572
|
-
const destPath =
|
|
15324
|
+
const srcPath = path41.resolve(baseDir, relPath);
|
|
15325
|
+
const destPath = path41.resolve(workspacePath, relPath);
|
|
15573
15326
|
try {
|
|
15574
|
-
await mkdir13(
|
|
15327
|
+
await mkdir13(path41.dirname(destPath), { recursive: true });
|
|
15575
15328
|
await copyFile2(srcPath, destPath);
|
|
15576
15329
|
} catch (error) {
|
|
15577
15330
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -16018,8 +15771,7 @@ async function evaluateCandidate(options) {
|
|
|
16018
15771
|
let lmRequest;
|
|
16019
15772
|
if (isAgentProvider(provider)) {
|
|
16020
15773
|
agentRequest = {
|
|
16021
|
-
question: promptInputs.question
|
|
16022
|
-
guideline_paths: evalCase.guideline_paths
|
|
15774
|
+
question: promptInputs.question
|
|
16023
15775
|
};
|
|
16024
15776
|
} else {
|
|
16025
15777
|
if (promptInputs.chatPrompt) {
|
|
@@ -16028,8 +15780,7 @@ async function evaluateCandidate(options) {
|
|
|
16028
15780
|
};
|
|
16029
15781
|
} else {
|
|
16030
15782
|
lmRequest = {
|
|
16031
|
-
question: promptInputs.question
|
|
16032
|
-
guidelines: promptInputs.guidelines
|
|
15783
|
+
question: promptInputs.question
|
|
16033
15784
|
};
|
|
16034
15785
|
}
|
|
16035
15786
|
}
|
|
@@ -16043,7 +15794,7 @@ async function evaluateCandidate(options) {
|
|
|
16043
15794
|
return {
|
|
16044
15795
|
timestamp: completedAt.toISOString(),
|
|
16045
15796
|
testId: evalCase.id,
|
|
16046
|
-
|
|
15797
|
+
eval_set: evalCase.eval_set,
|
|
16047
15798
|
conversationId: evalCase.conversation_id,
|
|
16048
15799
|
score: score.score,
|
|
16049
15800
|
assertions: score.assertions,
|
|
@@ -16191,7 +15942,7 @@ async function runEvaluatorList(options) {
|
|
|
16191
15942
|
fileChanges,
|
|
16192
15943
|
workspacePath
|
|
16193
15944
|
};
|
|
16194
|
-
const evalFileDir = evalCase.
|
|
15945
|
+
const evalFileDir = evalCase.file_paths[0] ? path41.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
16195
15946
|
const dispatchContext = {
|
|
16196
15947
|
graderProvider,
|
|
16197
15948
|
targetResolver,
|
|
@@ -16305,7 +16056,7 @@ function filterEvalCases(evalCases, filter) {
|
|
|
16305
16056
|
if (!filter) {
|
|
16306
16057
|
return evalCases;
|
|
16307
16058
|
}
|
|
16308
|
-
return evalCases.filter((evalCase) =>
|
|
16059
|
+
return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter));
|
|
16309
16060
|
}
|
|
16310
16061
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
16311
16062
|
const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
|
|
@@ -16342,8 +16093,6 @@ async function invokeProvider(provider, options) {
|
|
|
16342
16093
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
16343
16094
|
return await provider.invoke({
|
|
16344
16095
|
question: promptInputs.question,
|
|
16345
|
-
guidelines: promptInputs.guidelines,
|
|
16346
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
16347
16096
|
chatPrompt: promptInputs.chatPrompt,
|
|
16348
16097
|
inputFiles: evalCase.file_paths,
|
|
16349
16098
|
evalCaseId: evalCase.id,
|
|
@@ -16371,21 +16120,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16371
16120
|
if (isAgentProvider(provider)) {
|
|
16372
16121
|
agentRequest = {
|
|
16373
16122
|
question: promptInputs.question,
|
|
16374
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16375
16123
|
error: message
|
|
16376
16124
|
};
|
|
16377
16125
|
} else {
|
|
16378
16126
|
if (promptInputs.chatPrompt) {
|
|
16379
16127
|
lmRequest = {
|
|
16380
16128
|
chat_prompt: promptInputs.chatPrompt,
|
|
16381
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16382
16129
|
error: message
|
|
16383
16130
|
};
|
|
16384
16131
|
} else {
|
|
16385
16132
|
lmRequest = {
|
|
16386
16133
|
question: promptInputs.question,
|
|
16387
|
-
guidelines: promptInputs.guidelines,
|
|
16388
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16389
16134
|
error: message
|
|
16390
16135
|
};
|
|
16391
16136
|
}
|
|
@@ -16398,7 +16143,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16398
16143
|
return {
|
|
16399
16144
|
timestamp: timestamp.toISOString(),
|
|
16400
16145
|
testId: evalCase.id,
|
|
16401
|
-
|
|
16146
|
+
eval_set: evalCase.eval_set,
|
|
16402
16147
|
conversationId: evalCase.conversation_id,
|
|
16403
16148
|
score: 0,
|
|
16404
16149
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
@@ -16431,7 +16176,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
|
16431
16176
|
hash.update(target.name);
|
|
16432
16177
|
hash.update(evalCase.id);
|
|
16433
16178
|
hash.update(promptInputs.question);
|
|
16434
|
-
hash.update(promptInputs.guidelines);
|
|
16435
16179
|
hash.update(promptInputs.systemMessage ?? "");
|
|
16436
16180
|
if (promptInputs.chatPrompt) {
|
|
16437
16181
|
hash.update(JSON.stringify(promptInputs.chatPrompt));
|
|
@@ -16531,7 +16275,7 @@ function computeWeightedMean(entries) {
|
|
|
16531
16275
|
|
|
16532
16276
|
// src/evaluation/evaluate.ts
|
|
16533
16277
|
import { existsSync as existsSync4 } from "node:fs";
|
|
16534
|
-
import
|
|
16278
|
+
import path42 from "node:path";
|
|
16535
16279
|
|
|
16536
16280
|
// src/evaluation/providers/function-provider.ts
|
|
16537
16281
|
function createFunctionProvider(taskFn) {
|
|
@@ -16568,7 +16312,7 @@ async function evaluate(config) {
|
|
|
16568
16312
|
}
|
|
16569
16313
|
const gitRoot = await findGitRoot(process.cwd());
|
|
16570
16314
|
const repoRoot = gitRoot ?? process.cwd();
|
|
16571
|
-
const testFilePath = config.specFile ?
|
|
16315
|
+
const testFilePath = config.specFile ? path42.resolve(config.specFile) : path42.join(process.cwd(), "__programmatic__.yaml");
|
|
16572
16316
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
16573
16317
|
let resolvedTarget;
|
|
16574
16318
|
let taskProvider;
|
|
@@ -16634,8 +16378,6 @@ async function evaluate(config) {
|
|
|
16634
16378
|
input_segments: inputSegments,
|
|
16635
16379
|
expected_output: expectedOutput,
|
|
16636
16380
|
reference_answer: expectedOutputValue,
|
|
16637
|
-
guideline_paths: [],
|
|
16638
|
-
guideline_patterns: [],
|
|
16639
16381
|
file_paths: [],
|
|
16640
16382
|
assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
|
|
16641
16383
|
metadata: test.metadata
|
|
@@ -16697,10 +16439,10 @@ function computeSummary(results, durationMs) {
|
|
|
16697
16439
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
16698
16440
|
async function discoverDefaultTarget(repoRoot) {
|
|
16699
16441
|
const cwd = process.cwd();
|
|
16700
|
-
const chain = buildDirectoryChain(
|
|
16442
|
+
const chain = buildDirectoryChain(path42.join(cwd, "_placeholder"), repoRoot);
|
|
16701
16443
|
for (const dir of chain) {
|
|
16702
16444
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16703
|
-
const targetsPath =
|
|
16445
|
+
const targetsPath = path42.join(dir, candidate);
|
|
16704
16446
|
if (!existsSync4(targetsPath)) continue;
|
|
16705
16447
|
try {
|
|
16706
16448
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -16717,7 +16459,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
16717
16459
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
16718
16460
|
const envFiles = [];
|
|
16719
16461
|
for (const dir of chain) {
|
|
16720
|
-
const envPath =
|
|
16462
|
+
const envPath = path42.join(dir, ".env");
|
|
16721
16463
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
16722
16464
|
}
|
|
16723
16465
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -16898,8 +16640,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
16898
16640
|
}
|
|
16899
16641
|
|
|
16900
16642
|
// src/evaluation/cache/response-cache.ts
|
|
16901
|
-
import { mkdir as mkdir14, readFile as
|
|
16902
|
-
import
|
|
16643
|
+
import { mkdir as mkdir14, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
|
|
16644
|
+
import path43 from "node:path";
|
|
16903
16645
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
16904
16646
|
var ResponseCache = class {
|
|
16905
16647
|
cachePath;
|
|
@@ -16909,7 +16651,7 @@ var ResponseCache = class {
|
|
|
16909
16651
|
async get(key) {
|
|
16910
16652
|
const filePath = this.keyToPath(key);
|
|
16911
16653
|
try {
|
|
16912
|
-
const data = await
|
|
16654
|
+
const data = await readFile11(filePath, "utf8");
|
|
16913
16655
|
return JSON.parse(data);
|
|
16914
16656
|
} catch {
|
|
16915
16657
|
return void 0;
|
|
@@ -16917,13 +16659,13 @@ var ResponseCache = class {
|
|
|
16917
16659
|
}
|
|
16918
16660
|
async set(key, value) {
|
|
16919
16661
|
const filePath = this.keyToPath(key);
|
|
16920
|
-
const dir =
|
|
16662
|
+
const dir = path43.dirname(filePath);
|
|
16921
16663
|
await mkdir14(dir, { recursive: true });
|
|
16922
16664
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
16923
16665
|
}
|
|
16924
16666
|
keyToPath(key) {
|
|
16925
16667
|
const prefix = key.slice(0, 2);
|
|
16926
|
-
return
|
|
16668
|
+
return path43.join(this.cachePath, prefix, `${key}.json`);
|
|
16927
16669
|
}
|
|
16928
16670
|
};
|
|
16929
16671
|
function shouldEnableCache(params) {
|
|
@@ -17111,7 +16853,7 @@ var OtelTraceExporter = class {
|
|
|
17111
16853
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17112
16854
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
17113
16855
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
17114
|
-
if (result.
|
|
16856
|
+
if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
|
|
17115
16857
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
17116
16858
|
if (captureContent && result.output.length > 0) {
|
|
17117
16859
|
const lastMsg = result.output[result.output.length - 1];
|
|
@@ -17296,14 +17038,14 @@ var OtelStreamingObserver = class {
|
|
|
17296
17038
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
17297
17039
|
rootCtx = null;
|
|
17298
17040
|
/** Create root eval span immediately (visible in backend right away) */
|
|
17299
|
-
startEvalCase(testId, target,
|
|
17041
|
+
startEvalCase(testId, target, evalSet) {
|
|
17300
17042
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
17301
17043
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
17302
17044
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
17303
17045
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17304
17046
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
17305
17047
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
17306
|
-
if (
|
|
17048
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
|
|
17307
17049
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
17308
17050
|
}
|
|
17309
17051
|
/** Create and immediately export a tool span */
|
|
@@ -17491,7 +17233,6 @@ export {
|
|
|
17491
17233
|
initializeBaseline,
|
|
17492
17234
|
isAgentSkillsFormat,
|
|
17493
17235
|
isEvaluatorKind,
|
|
17494
|
-
isGuidelineFile,
|
|
17495
17236
|
isJsonObject,
|
|
17496
17237
|
isJsonValue,
|
|
17497
17238
|
isNonEmptyString,
|