@agentv/core 3.7.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-2IZOTQ25.js → chunk-PC5TLJF6.js} +143 -4
- package/dist/chunk-PC5TLJF6.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +228 -72
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +85 -37
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +519 -778
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +11 -23
- package/dist/index.d.ts +11 -23
- package/dist/index.js +450 -841
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-2IZOTQ25.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -2,21 +2,24 @@ import {
|
|
|
2
2
|
TEST_MESSAGE_ROLES,
|
|
3
3
|
buildDirectoryChain,
|
|
4
4
|
buildSearchRoots,
|
|
5
|
+
expandFileReferences,
|
|
5
6
|
extractLastAssistantContent,
|
|
6
7
|
fileExists,
|
|
7
8
|
findGitRoot,
|
|
9
|
+
interpolateEnv,
|
|
8
10
|
isAgentProvider,
|
|
9
11
|
isEvaluatorKind,
|
|
10
12
|
isJsonObject,
|
|
11
13
|
isJsonValue,
|
|
12
14
|
isTestMessage,
|
|
13
15
|
isTestMessageRole,
|
|
16
|
+
loadCasesFromFile,
|
|
14
17
|
normalizeLineEndings,
|
|
15
18
|
readJsonFile,
|
|
16
19
|
readTextFile,
|
|
17
20
|
resolveFileReference,
|
|
18
21
|
resolveTargetDefinition
|
|
19
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-PC5TLJF6.js";
|
|
20
23
|
import {
|
|
21
24
|
AgentvProvider
|
|
22
25
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -146,30 +149,11 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
146
149
|
}
|
|
147
150
|
|
|
148
151
|
// src/evaluation/yaml-parser.ts
|
|
149
|
-
import { readFile as
|
|
150
|
-
import
|
|
151
|
-
import
|
|
152
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
153
|
+
import path7 from "node:path";
|
|
154
|
+
import micromatch2 from "micromatch";
|
|
152
155
|
import { parse as parse2 } from "yaml";
|
|
153
156
|
|
|
154
|
-
// src/evaluation/interpolation.ts
|
|
155
|
-
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
156
|
-
function interpolateEnv(value, env) {
|
|
157
|
-
if (typeof value === "string") {
|
|
158
|
-
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
159
|
-
}
|
|
160
|
-
if (Array.isArray(value)) {
|
|
161
|
-
return value.map((item) => interpolateEnv(item, env));
|
|
162
|
-
}
|
|
163
|
-
if (value !== null && typeof value === "object") {
|
|
164
|
-
const result = {};
|
|
165
|
-
for (const [key, val] of Object.entries(value)) {
|
|
166
|
-
result[key] = interpolateEnv(val, env);
|
|
167
|
-
}
|
|
168
|
-
return result;
|
|
169
|
-
}
|
|
170
|
-
return value;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
157
|
// src/evaluation/loaders/agent-skills-parser.ts
|
|
174
158
|
import { readFile } from "node:fs/promises";
|
|
175
159
|
import path from "node:path";
|
|
@@ -241,7 +225,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
241
225
|
input_segments: [{ type: "text", value: prompt }],
|
|
242
226
|
expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
|
|
243
227
|
reference_answer: evalCase.expected_output,
|
|
244
|
-
guideline_paths: [],
|
|
245
228
|
file_paths: filePaths,
|
|
246
229
|
criteria: evalCase.expected_output ?? "",
|
|
247
230
|
assertions,
|
|
@@ -252,134 +235,15 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
252
235
|
return tests;
|
|
253
236
|
}
|
|
254
237
|
|
|
255
|
-
// src/evaluation/loaders/case-file-loader.ts
|
|
256
|
-
import { readFile as readFile2 } from "node:fs/promises";
|
|
257
|
-
import path2 from "node:path";
|
|
258
|
-
import fg from "fast-glob";
|
|
259
|
-
import { parse as parseYaml } from "yaml";
|
|
260
|
-
var ANSI_YELLOW = "\x1B[33m";
|
|
261
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
262
|
-
var FILE_PROTOCOL = "file://";
|
|
263
|
-
function isFileReference(value) {
|
|
264
|
-
return typeof value === "string" && value.startsWith(FILE_PROTOCOL);
|
|
265
|
-
}
|
|
266
|
-
function extractFilePath(ref) {
|
|
267
|
-
return ref.slice(FILE_PROTOCOL.length);
|
|
268
|
-
}
|
|
269
|
-
function isGlobPattern(filePath) {
|
|
270
|
-
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
271
|
-
}
|
|
272
|
-
function parseYamlCases(content, filePath) {
|
|
273
|
-
const raw = parseYaml(content);
|
|
274
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
275
|
-
if (!Array.isArray(parsed)) {
|
|
276
|
-
throw new Error(
|
|
277
|
-
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
278
|
-
);
|
|
279
|
-
}
|
|
280
|
-
const results = [];
|
|
281
|
-
for (const item of parsed) {
|
|
282
|
-
if (!isJsonObject(item)) {
|
|
283
|
-
throw new Error(`External test file contains non-object entry: ${filePath}`);
|
|
284
|
-
}
|
|
285
|
-
results.push(item);
|
|
286
|
-
}
|
|
287
|
-
return results;
|
|
288
|
-
}
|
|
289
|
-
function parseJsonlCases(content, filePath) {
|
|
290
|
-
const lines = content.split("\n");
|
|
291
|
-
const results = [];
|
|
292
|
-
for (let i = 0; i < lines.length; i++) {
|
|
293
|
-
const line = lines[i].trim();
|
|
294
|
-
if (line === "") continue;
|
|
295
|
-
try {
|
|
296
|
-
const raw = JSON.parse(line);
|
|
297
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
298
|
-
if (!isJsonObject(parsed)) {
|
|
299
|
-
throw new Error("Expected JSON object");
|
|
300
|
-
}
|
|
301
|
-
results.push(parsed);
|
|
302
|
-
} catch (error) {
|
|
303
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
304
|
-
throw new Error(`Malformed JSONL at line ${i + 1}: ${message}
|
|
305
|
-
File: ${filePath}`);
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
return results;
|
|
309
|
-
}
|
|
310
|
-
async function loadCasesFromFile(filePath) {
|
|
311
|
-
const ext = path2.extname(filePath).toLowerCase();
|
|
312
|
-
let content;
|
|
313
|
-
try {
|
|
314
|
-
content = await readFile2(filePath, "utf8");
|
|
315
|
-
} catch (error) {
|
|
316
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
317
|
-
throw new Error(`Cannot read external test file: ${filePath}
|
|
318
|
-
${message}`);
|
|
319
|
-
}
|
|
320
|
-
if (content.trim() === "") {
|
|
321
|
-
console.warn(
|
|
322
|
-
`${ANSI_YELLOW}Warning: External test file is empty, skipping: ${filePath}${ANSI_RESET2}`
|
|
323
|
-
);
|
|
324
|
-
return [];
|
|
325
|
-
}
|
|
326
|
-
if (ext === ".yaml" || ext === ".yml") {
|
|
327
|
-
return parseYamlCases(content, filePath);
|
|
328
|
-
}
|
|
329
|
-
if (ext === ".jsonl") {
|
|
330
|
-
return parseJsonlCases(content, filePath);
|
|
331
|
-
}
|
|
332
|
-
throw new Error(
|
|
333
|
-
`Unsupported external test file format '${ext}': ${filePath}. Supported: .yaml, .yml, .jsonl`
|
|
334
|
-
);
|
|
335
|
-
}
|
|
336
|
-
async function resolveFileReference2(ref, evalFileDir) {
|
|
337
|
-
const rawPath = extractFilePath(ref);
|
|
338
|
-
const absolutePattern = path2.resolve(evalFileDir, rawPath);
|
|
339
|
-
if (isGlobPattern(rawPath)) {
|
|
340
|
-
const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
|
|
341
|
-
onlyFiles: true,
|
|
342
|
-
absolute: true
|
|
343
|
-
});
|
|
344
|
-
if (matches.length === 0) {
|
|
345
|
-
console.warn(
|
|
346
|
-
`${ANSI_YELLOW}Warning: Glob pattern matched no files: ${ref} (resolved to ${absolutePattern})${ANSI_RESET2}`
|
|
347
|
-
);
|
|
348
|
-
return [];
|
|
349
|
-
}
|
|
350
|
-
matches.sort();
|
|
351
|
-
const allCases = [];
|
|
352
|
-
for (const match of matches) {
|
|
353
|
-
const cases = await loadCasesFromFile(match);
|
|
354
|
-
allCases.push(...cases);
|
|
355
|
-
}
|
|
356
|
-
return allCases;
|
|
357
|
-
}
|
|
358
|
-
return loadCasesFromFile(absolutePattern);
|
|
359
|
-
}
|
|
360
|
-
async function expandFileReferences(tests, evalFileDir) {
|
|
361
|
-
const expanded = [];
|
|
362
|
-
for (const entry of tests) {
|
|
363
|
-
if (isFileReference(entry)) {
|
|
364
|
-
const cases = await resolveFileReference2(entry, evalFileDir);
|
|
365
|
-
expanded.push(...cases);
|
|
366
|
-
} else {
|
|
367
|
-
expanded.push(entry);
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
return expanded;
|
|
371
|
-
}
|
|
372
|
-
|
|
373
238
|
// src/evaluation/loaders/config-loader.ts
|
|
374
|
-
import { readFile as
|
|
375
|
-
import
|
|
376
|
-
import micromatch from "micromatch";
|
|
239
|
+
import { readFile as readFile2 } from "node:fs/promises";
|
|
240
|
+
import path3 from "node:path";
|
|
377
241
|
import { parse } from "yaml";
|
|
378
242
|
|
|
379
243
|
// src/evaluation/loaders/file-resolver.ts
|
|
380
244
|
import { constants } from "node:fs";
|
|
381
245
|
import { access } from "node:fs/promises";
|
|
382
|
-
import
|
|
246
|
+
import path2 from "node:path";
|
|
383
247
|
import { fileURLToPath } from "node:url";
|
|
384
248
|
async function fileExists2(absolutePath) {
|
|
385
249
|
try {
|
|
@@ -397,15 +261,15 @@ function resolveToAbsolutePath(candidate) {
|
|
|
397
261
|
if (candidate.startsWith("file:")) {
|
|
398
262
|
return fileURLToPath(candidate);
|
|
399
263
|
}
|
|
400
|
-
return
|
|
264
|
+
return path2.resolve(candidate);
|
|
401
265
|
}
|
|
402
266
|
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
403
267
|
}
|
|
404
268
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
405
269
|
const directories = [];
|
|
406
270
|
const seen = /* @__PURE__ */ new Set();
|
|
407
|
-
const boundary =
|
|
408
|
-
let current =
|
|
271
|
+
const boundary = path2.resolve(repoRoot);
|
|
272
|
+
let current = path2.resolve(path2.dirname(filePath));
|
|
409
273
|
while (current !== void 0) {
|
|
410
274
|
if (!seen.has(current)) {
|
|
411
275
|
directories.push(current);
|
|
@@ -414,7 +278,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
414
278
|
if (current === boundary) {
|
|
415
279
|
break;
|
|
416
280
|
}
|
|
417
|
-
const parent =
|
|
281
|
+
const parent = path2.dirname(current);
|
|
418
282
|
if (parent === current) {
|
|
419
283
|
break;
|
|
420
284
|
}
|
|
@@ -428,16 +292,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
428
292
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
429
293
|
const uniqueRoots = [];
|
|
430
294
|
const addRoot = (root) => {
|
|
431
|
-
const normalized =
|
|
295
|
+
const normalized = path2.resolve(root);
|
|
432
296
|
if (!uniqueRoots.includes(normalized)) {
|
|
433
297
|
uniqueRoots.push(normalized);
|
|
434
298
|
}
|
|
435
299
|
};
|
|
436
|
-
let currentDir =
|
|
300
|
+
let currentDir = path2.dirname(evalPath);
|
|
437
301
|
let reachedBoundary = false;
|
|
438
302
|
while (!reachedBoundary) {
|
|
439
303
|
addRoot(currentDir);
|
|
440
|
-
const parentDir =
|
|
304
|
+
const parentDir = path2.dirname(currentDir);
|
|
441
305
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
442
306
|
reachedBoundary = true;
|
|
443
307
|
} else {
|
|
@@ -452,19 +316,19 @@ function trimLeadingSeparators(value) {
|
|
|
452
316
|
const trimmed = value.replace(/^[/\\]+/, "");
|
|
453
317
|
return trimmed.length > 0 ? trimmed : value;
|
|
454
318
|
}
|
|
455
|
-
async function
|
|
319
|
+
async function resolveFileReference2(rawValue, searchRoots) {
|
|
456
320
|
const displayPath = trimLeadingSeparators(rawValue);
|
|
457
321
|
const potentialPaths = [];
|
|
458
|
-
if (
|
|
459
|
-
potentialPaths.push(
|
|
322
|
+
if (path2.isAbsolute(rawValue)) {
|
|
323
|
+
potentialPaths.push(path2.normalize(rawValue));
|
|
460
324
|
}
|
|
461
325
|
for (const base of searchRoots) {
|
|
462
|
-
potentialPaths.push(
|
|
326
|
+
potentialPaths.push(path2.resolve(base, displayPath));
|
|
463
327
|
}
|
|
464
328
|
const attempted = [];
|
|
465
329
|
const seen = /* @__PURE__ */ new Set();
|
|
466
330
|
for (const candidate of potentialPaths) {
|
|
467
|
-
const absoluteCandidate =
|
|
331
|
+
const absoluteCandidate = path2.resolve(candidate);
|
|
468
332
|
if (seen.has(absoluteCandidate)) {
|
|
469
333
|
continue;
|
|
470
334
|
}
|
|
@@ -478,8 +342,8 @@ async function resolveFileReference3(rawValue, searchRoots) {
|
|
|
478
342
|
}
|
|
479
343
|
|
|
480
344
|
// src/evaluation/loaders/config-loader.ts
|
|
481
|
-
var
|
|
482
|
-
var
|
|
345
|
+
var ANSI_YELLOW = "\x1B[33m";
|
|
346
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
483
347
|
var DEFAULT_EVAL_PATTERNS = [
|
|
484
348
|
"**/evals/**/*.eval.yaml",
|
|
485
349
|
"**/evals/**/eval.yaml"
|
|
@@ -487,12 +351,12 @@ var DEFAULT_EVAL_PATTERNS = [
|
|
|
487
351
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
488
352
|
const directories = buildDirectoryChain2(evalFilePath, repoRoot);
|
|
489
353
|
for (const directory of directories) {
|
|
490
|
-
const configPath =
|
|
354
|
+
const configPath = path3.join(directory, ".agentv", "config.yaml");
|
|
491
355
|
if (!await fileExists2(configPath)) {
|
|
492
356
|
continue;
|
|
493
357
|
}
|
|
494
358
|
try {
|
|
495
|
-
const rawConfig = await
|
|
359
|
+
const rawConfig = await readFile2(configPath, "utf8");
|
|
496
360
|
const parsed = parse(rawConfig);
|
|
497
361
|
if (!isJsonObject(parsed)) {
|
|
498
362
|
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
@@ -504,15 +368,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
504
368
|
logWarning(`Invalid required_version in ${configPath}, expected string`);
|
|
505
369
|
continue;
|
|
506
370
|
}
|
|
507
|
-
const guidelinePatterns = config.guideline_patterns;
|
|
508
|
-
if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
|
|
509
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
|
|
510
|
-
continue;
|
|
511
|
-
}
|
|
512
|
-
if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
|
|
513
|
-
logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
|
|
514
|
-
continue;
|
|
515
|
-
}
|
|
516
371
|
const evalPatterns = config.eval_patterns;
|
|
517
372
|
if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
|
|
518
373
|
logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
|
|
@@ -528,7 +383,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
528
383
|
);
|
|
529
384
|
return {
|
|
530
385
|
required_version: requiredVersion,
|
|
531
|
-
guideline_patterns: guidelinePatterns,
|
|
532
386
|
eval_patterns: evalPatterns,
|
|
533
387
|
execution: executionDefaults
|
|
534
388
|
};
|
|
@@ -540,11 +394,6 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
540
394
|
}
|
|
541
395
|
return null;
|
|
542
396
|
}
|
|
543
|
-
function isGuidelineFile(filePath, patterns) {
|
|
544
|
-
const normalized = filePath.split("\\").join("/");
|
|
545
|
-
const patternsToUse = patterns ?? [];
|
|
546
|
-
return micromatch.isMatch(normalized, patternsToUse);
|
|
547
|
-
}
|
|
548
397
|
function extractTargetFromSuite(suite) {
|
|
549
398
|
const execution = suite.execution;
|
|
550
399
|
if (execution && typeof execution === "object" && !Array.isArray(execution)) {
|
|
@@ -729,14 +578,14 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
729
578
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
730
579
|
}
|
|
731
580
|
function logWarning(message) {
|
|
732
|
-
console.warn(`${
|
|
581
|
+
console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET2}`);
|
|
733
582
|
}
|
|
734
583
|
|
|
735
584
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
736
|
-
import
|
|
585
|
+
import path4 from "node:path";
|
|
737
586
|
|
|
738
587
|
// src/evaluation/validation/prompt-validator.ts
|
|
739
|
-
import { readFile as
|
|
588
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
740
589
|
|
|
741
590
|
// src/evaluation/template-variables.ts
|
|
742
591
|
var TEMPLATE_VARIABLES = {
|
|
@@ -756,10 +605,10 @@ var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
|
756
605
|
]);
|
|
757
606
|
|
|
758
607
|
// src/evaluation/validation/prompt-validator.ts
|
|
759
|
-
var
|
|
760
|
-
var
|
|
608
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
609
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
761
610
|
async function validateCustomPromptContent(promptPath) {
|
|
762
|
-
const content = await
|
|
611
|
+
const content = await readFile3(promptPath, "utf8");
|
|
763
612
|
validateTemplateVariables(content, promptPath);
|
|
764
613
|
}
|
|
765
614
|
function validateTemplateVariables(content, source) {
|
|
@@ -786,16 +635,16 @@ function validateTemplateVariables(content, source) {
|
|
|
786
635
|
);
|
|
787
636
|
}
|
|
788
637
|
if (invalidVariables.length > 0) {
|
|
789
|
-
const warningMessage = `${
|
|
638
|
+
const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
|
|
790
639
|
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
791
|
-
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${
|
|
640
|
+
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET3}`;
|
|
792
641
|
console.warn(warningMessage);
|
|
793
642
|
}
|
|
794
643
|
}
|
|
795
644
|
|
|
796
645
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
797
|
-
var
|
|
798
|
-
var
|
|
646
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
647
|
+
var ANSI_RESET4 = "\x1B[0m";
|
|
799
648
|
function normalizeEvaluatorType(type) {
|
|
800
649
|
return type.replace(/_/g, "-");
|
|
801
650
|
}
|
|
@@ -897,7 +746,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
897
746
|
let command;
|
|
898
747
|
if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
|
|
899
748
|
console.warn(
|
|
900
|
-
`${
|
|
749
|
+
`${ANSI_YELLOW3}Warning: 'script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'command' instead.${ANSI_RESET4}`
|
|
901
750
|
);
|
|
902
751
|
}
|
|
903
752
|
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
@@ -923,9 +772,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
923
772
|
const cwd = asString(rawEvaluator.cwd);
|
|
924
773
|
let resolvedCwd;
|
|
925
774
|
if (cwd) {
|
|
926
|
-
const resolved = await
|
|
775
|
+
const resolved = await resolveFileReference2(cwd, searchRoots);
|
|
927
776
|
if (resolved.resolvedPath) {
|
|
928
|
-
resolvedCwd =
|
|
777
|
+
resolvedCwd = path4.resolve(resolved.resolvedPath);
|
|
929
778
|
} else {
|
|
930
779
|
logWarning2(
|
|
931
780
|
`Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -1081,9 +930,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1081
930
|
const aggregatorPrompt = asString(rawAggregator.prompt);
|
|
1082
931
|
let promptPath2;
|
|
1083
932
|
if (aggregatorPrompt) {
|
|
1084
|
-
const resolved = await
|
|
933
|
+
const resolved = await resolveFileReference2(aggregatorPrompt, searchRoots);
|
|
1085
934
|
if (resolved.resolvedPath) {
|
|
1086
|
-
promptPath2 =
|
|
935
|
+
promptPath2 = path4.resolve(resolved.resolvedPath);
|
|
1087
936
|
}
|
|
1088
937
|
}
|
|
1089
938
|
aggregator = {
|
|
@@ -1640,7 +1489,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1640
1489
|
if (isJsonObject2(rawPrompt)) {
|
|
1641
1490
|
if (rawPrompt.script !== void 0 && rawPrompt.command === void 0) {
|
|
1642
1491
|
console.warn(
|
|
1643
|
-
`${
|
|
1492
|
+
`${ANSI_YELLOW3}Warning: 'prompt.script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET4}`
|
|
1644
1493
|
);
|
|
1645
1494
|
}
|
|
1646
1495
|
const commandArray = asStringArray(
|
|
@@ -1651,9 +1500,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1651
1500
|
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
|
|
1652
1501
|
}
|
|
1653
1502
|
const commandPath = commandArray[commandArray.length - 1];
|
|
1654
|
-
const resolved = await
|
|
1503
|
+
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
1655
1504
|
if (resolved.resolvedPath) {
|
|
1656
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
1505
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
|
|
1657
1506
|
} else {
|
|
1658
1507
|
throw new Error(
|
|
1659
1508
|
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -1664,9 +1513,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1664
1513
|
}
|
|
1665
1514
|
} else if (typeof rawPrompt === "string") {
|
|
1666
1515
|
prompt = rawPrompt;
|
|
1667
|
-
const resolved = await
|
|
1516
|
+
const resolved = await resolveFileReference2(prompt, searchRoots);
|
|
1668
1517
|
if (resolved.resolvedPath) {
|
|
1669
|
-
promptPath =
|
|
1518
|
+
promptPath = path4.resolve(resolved.resolvedPath);
|
|
1670
1519
|
try {
|
|
1671
1520
|
await validateCustomPromptContent(promptPath);
|
|
1672
1521
|
} catch (error) {
|
|
@@ -1866,10 +1715,10 @@ function warnUnconsumedCriteria(_criteria, _evaluators, _testId) {
|
|
|
1866
1715
|
function logWarning2(message, details) {
|
|
1867
1716
|
if (details && details.length > 0) {
|
|
1868
1717
|
const detailBlock = details.join("\n");
|
|
1869
|
-
console.warn(`${
|
|
1870
|
-
${detailBlock}${
|
|
1718
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}
|
|
1719
|
+
${detailBlock}${ANSI_RESET4}`);
|
|
1871
1720
|
} else {
|
|
1872
|
-
console.warn(`${
|
|
1721
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
|
|
1873
1722
|
}
|
|
1874
1723
|
}
|
|
1875
1724
|
function parseRequired(value) {
|
|
@@ -2118,14 +1967,14 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
2118
1967
|
}
|
|
2119
1968
|
|
|
2120
1969
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2121
|
-
import { readFile as readFile6 } from "node:fs/promises";
|
|
2122
|
-
import path7 from "node:path";
|
|
2123
|
-
import micromatch2 from "micromatch";
|
|
2124
|
-
import { parse as parseYaml2 } from "yaml";
|
|
2125
|
-
|
|
2126
|
-
// src/evaluation/loaders/message-processor.ts
|
|
2127
1970
|
import { readFile as readFile5 } from "node:fs/promises";
|
|
2128
1971
|
import path6 from "node:path";
|
|
1972
|
+
import micromatch from "micromatch";
|
|
1973
|
+
import { parse as parseYaml } from "yaml";
|
|
1974
|
+
|
|
1975
|
+
// src/evaluation/loaders/message-processor.ts
|
|
1976
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
1977
|
+
import path5 from "node:path";
|
|
2129
1978
|
|
|
2130
1979
|
// src/evaluation/formatting/segment-formatter.ts
|
|
2131
1980
|
function formatFileContents(parts) {
|
|
@@ -2147,10 +1996,6 @@ function formatSegment(segment, mode = "lm") {
|
|
|
2147
1996
|
if (type === "text") {
|
|
2148
1997
|
return asString2(segment.value);
|
|
2149
1998
|
}
|
|
2150
|
-
if (type === "guideline_ref") {
|
|
2151
|
-
const refPath = asString2(segment.path);
|
|
2152
|
-
return refPath ? `<Attached: ${refPath}>` : void 0;
|
|
2153
|
-
}
|
|
2154
1999
|
if (type === "file") {
|
|
2155
2000
|
const filePath = asString2(segment.path);
|
|
2156
2001
|
if (!filePath) {
|
|
@@ -2173,9 +2018,6 @@ function hasVisibleContent(segments) {
|
|
|
2173
2018
|
const value = asString2(segment.value);
|
|
2174
2019
|
return value !== void 0 && value.trim().length > 0;
|
|
2175
2020
|
}
|
|
2176
|
-
if (type === "guideline_ref") {
|
|
2177
|
-
return false;
|
|
2178
|
-
}
|
|
2179
2021
|
if (type === "file") {
|
|
2180
2022
|
const text = asString2(segment.text);
|
|
2181
2023
|
return text !== void 0 && text.trim().length > 0;
|
|
@@ -2188,20 +2030,10 @@ function asString2(value) {
|
|
|
2188
2030
|
}
|
|
2189
2031
|
|
|
2190
2032
|
// src/evaluation/loaders/message-processor.ts
|
|
2191
|
-
var
|
|
2192
|
-
var
|
|
2033
|
+
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2034
|
+
var ANSI_RESET5 = "\x1B[0m";
|
|
2193
2035
|
async function processMessages(options) {
|
|
2194
|
-
const {
|
|
2195
|
-
messages,
|
|
2196
|
-
searchRoots,
|
|
2197
|
-
repoRootPath,
|
|
2198
|
-
guidelinePatterns,
|
|
2199
|
-
guidelinePaths,
|
|
2200
|
-
treatFileSegmentsAsGuidelines,
|
|
2201
|
-
textParts,
|
|
2202
|
-
messageType,
|
|
2203
|
-
verbose
|
|
2204
|
-
} = options;
|
|
2036
|
+
const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
|
|
2205
2037
|
const segments = [];
|
|
2206
2038
|
for (const message of messages) {
|
|
2207
2039
|
const content = message.content;
|
|
@@ -2233,7 +2065,7 @@ async function processMessages(options) {
|
|
|
2233
2065
|
if (!rawValue) {
|
|
2234
2066
|
continue;
|
|
2235
2067
|
}
|
|
2236
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
2068
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2237
2069
|
rawValue,
|
|
2238
2070
|
searchRoots
|
|
2239
2071
|
);
|
|
@@ -2244,27 +2076,12 @@ async function processMessages(options) {
|
|
|
2244
2076
|
continue;
|
|
2245
2077
|
}
|
|
2246
2078
|
try {
|
|
2247
|
-
const fileContent = (await
|
|
2248
|
-
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
2249
|
-
messageType,
|
|
2250
|
-
resolvedPath,
|
|
2251
|
-
repoRootPath,
|
|
2252
|
-
guidelinePatterns,
|
|
2253
|
-
treatFileSegmentsAsGuidelines
|
|
2254
|
-
});
|
|
2255
|
-
if (classifyAsGuideline && guidelinePaths) {
|
|
2256
|
-
guidelinePaths.push(path6.resolve(resolvedPath));
|
|
2257
|
-
if (verbose) {
|
|
2258
|
-
console.log(` [Guideline] Found: ${displayPath}`);
|
|
2259
|
-
console.log(` Resolved to: ${resolvedPath}`);
|
|
2260
|
-
}
|
|
2261
|
-
continue;
|
|
2262
|
-
}
|
|
2079
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2263
2080
|
segments.push({
|
|
2264
2081
|
type: "file",
|
|
2265
2082
|
path: displayPath,
|
|
2266
2083
|
text: fileContent,
|
|
2267
|
-
resolvedPath:
|
|
2084
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
2268
2085
|
});
|
|
2269
2086
|
if (verbose) {
|
|
2270
2087
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -2287,26 +2104,6 @@ async function processMessages(options) {
|
|
|
2287
2104
|
}
|
|
2288
2105
|
return segments;
|
|
2289
2106
|
}
|
|
2290
|
-
function shouldTreatAsGuideline(options) {
|
|
2291
|
-
const {
|
|
2292
|
-
messageType,
|
|
2293
|
-
resolvedPath,
|
|
2294
|
-
repoRootPath,
|
|
2295
|
-
guidelinePatterns,
|
|
2296
|
-
treatFileSegmentsAsGuidelines
|
|
2297
|
-
} = options;
|
|
2298
|
-
if (messageType !== "input") {
|
|
2299
|
-
return false;
|
|
2300
|
-
}
|
|
2301
|
-
if (treatFileSegmentsAsGuidelines) {
|
|
2302
|
-
return true;
|
|
2303
|
-
}
|
|
2304
|
-
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2305
|
-
return false;
|
|
2306
|
-
}
|
|
2307
|
-
const relativeToRepo = path6.relative(repoRootPath, resolvedPath);
|
|
2308
|
-
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
2309
|
-
}
|
|
2310
2107
|
function asString3(value) {
|
|
2311
2108
|
return typeof value === "string" ? value : void 0;
|
|
2312
2109
|
}
|
|
@@ -2332,10 +2129,10 @@ function cloneJsonValue(value) {
|
|
|
2332
2129
|
function logWarning3(message, details) {
|
|
2333
2130
|
if (details && details.length > 0) {
|
|
2334
2131
|
const detailBlock = details.join("\n");
|
|
2335
|
-
console.warn(`${
|
|
2336
|
-
${detailBlock}${
|
|
2132
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}
|
|
2133
|
+
${detailBlock}${ANSI_RESET5}`);
|
|
2337
2134
|
} else {
|
|
2338
|
-
console.warn(`${
|
|
2135
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
|
|
2339
2136
|
}
|
|
2340
2137
|
}
|
|
2341
2138
|
async function processExpectedMessages(options) {
|
|
@@ -2364,7 +2161,7 @@ async function processExpectedMessages(options) {
|
|
|
2364
2161
|
if (!rawValue) {
|
|
2365
2162
|
continue;
|
|
2366
2163
|
}
|
|
2367
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
2164
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2368
2165
|
rawValue,
|
|
2369
2166
|
searchRoots
|
|
2370
2167
|
);
|
|
@@ -2374,12 +2171,12 @@ async function processExpectedMessages(options) {
|
|
|
2374
2171
|
continue;
|
|
2375
2172
|
}
|
|
2376
2173
|
try {
|
|
2377
|
-
const fileContent = (await
|
|
2174
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2378
2175
|
processedContent.push({
|
|
2379
2176
|
type: "file",
|
|
2380
2177
|
path: displayPath,
|
|
2381
2178
|
text: fileContent,
|
|
2382
|
-
resolvedPath:
|
|
2179
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
2383
2180
|
});
|
|
2384
2181
|
if (verbose) {
|
|
2385
2182
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -2476,11 +2273,11 @@ function resolveExpectedMessages(raw) {
|
|
|
2476
2273
|
}
|
|
2477
2274
|
|
|
2478
2275
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2479
|
-
var
|
|
2276
|
+
var ANSI_YELLOW5 = "\x1B[33m";
|
|
2480
2277
|
var ANSI_RED2 = "\x1B[31m";
|
|
2481
|
-
var
|
|
2278
|
+
var ANSI_RESET6 = "\x1B[0m";
|
|
2482
2279
|
function detectFormat(filePath) {
|
|
2483
|
-
const ext =
|
|
2280
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
2484
2281
|
if (ext === ".jsonl") return "jsonl";
|
|
2485
2282
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
2486
2283
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -2489,9 +2286,9 @@ function detectFormat(filePath) {
|
|
|
2489
2286
|
);
|
|
2490
2287
|
}
|
|
2491
2288
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
2492
|
-
const dir =
|
|
2493
|
-
const base =
|
|
2494
|
-
const sidecarPath =
|
|
2289
|
+
const dir = path6.dirname(jsonlPath);
|
|
2290
|
+
const base = path6.basename(jsonlPath, ".jsonl");
|
|
2291
|
+
const sidecarPath = path6.join(dir, `${base}.yaml`);
|
|
2495
2292
|
if (!await fileExists2(sidecarPath)) {
|
|
2496
2293
|
if (verbose) {
|
|
2497
2294
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -2499,15 +2296,15 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
2499
2296
|
return {};
|
|
2500
2297
|
}
|
|
2501
2298
|
try {
|
|
2502
|
-
const content = await
|
|
2503
|
-
const parsed = interpolateEnv(
|
|
2299
|
+
const content = await readFile5(sidecarPath, "utf8");
|
|
2300
|
+
const parsed = interpolateEnv(parseYaml(content), process.env);
|
|
2504
2301
|
if (!isJsonObject(parsed)) {
|
|
2505
2302
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
2506
2303
|
return {};
|
|
2507
2304
|
}
|
|
2508
2305
|
return {
|
|
2509
2306
|
description: asString4(parsed.description),
|
|
2510
|
-
|
|
2307
|
+
name: asString4(parsed.name),
|
|
2511
2308
|
execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
|
|
2512
2309
|
evaluator: parsed.evaluator
|
|
2513
2310
|
};
|
|
@@ -2540,23 +2337,21 @@ function parseJsonlContent(content, filePath) {
|
|
|
2540
2337
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
2541
2338
|
const verbose = options?.verbose ?? false;
|
|
2542
2339
|
const filterPattern = options?.filter;
|
|
2543
|
-
const absoluteTestPath =
|
|
2340
|
+
const absoluteTestPath = path6.resolve(evalFilePath);
|
|
2544
2341
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2545
2342
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2546
|
-
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
2547
|
-
const guidelinePatterns = config?.guideline_patterns;
|
|
2548
2343
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
2549
|
-
const rawFile = await
|
|
2344
|
+
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
2550
2345
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
2551
|
-
const
|
|
2552
|
-
const
|
|
2346
|
+
const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
2347
|
+
const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
|
|
2553
2348
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
2554
2349
|
const globalExecution = sidecar.execution;
|
|
2555
2350
|
if (verbose) {
|
|
2556
2351
|
console.log(`
|
|
2557
2352
|
[JSONL Dataset: ${evalFilePath}]`);
|
|
2558
2353
|
console.log(` Cases: ${rawCases.length}`);
|
|
2559
|
-
console.log(`
|
|
2354
|
+
console.log(` Eval set: ${evalSetName}`);
|
|
2560
2355
|
if (sidecar.description) {
|
|
2561
2356
|
console.log(` Description: ${sidecar.description}`);
|
|
2562
2357
|
}
|
|
@@ -2566,7 +2361,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2566
2361
|
const evalcase = rawCases[lineIndex];
|
|
2567
2362
|
const lineNumber = lineIndex + 1;
|
|
2568
2363
|
const id = asString4(evalcase.id);
|
|
2569
|
-
if (filterPattern && (!id || !
|
|
2364
|
+
if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
|
|
2570
2365
|
continue;
|
|
2571
2366
|
}
|
|
2572
2367
|
const conversationId = asString4(evalcase.conversation_id);
|
|
@@ -2589,14 +2384,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2589
2384
|
continue;
|
|
2590
2385
|
}
|
|
2591
2386
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
2592
|
-
const guidelinePaths = [];
|
|
2593
2387
|
const inputTextParts = [];
|
|
2594
2388
|
const inputSegments = await processMessages({
|
|
2595
2389
|
messages: inputMessages,
|
|
2596
2390
|
searchRoots,
|
|
2597
2391
|
repoRootPath,
|
|
2598
|
-
guidelinePatterns,
|
|
2599
|
-
guidelinePaths,
|
|
2600
2392
|
textParts: inputTextParts,
|
|
2601
2393
|
messageType: "input",
|
|
2602
2394
|
verbose
|
|
@@ -2646,40 +2438,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2646
2438
|
userFilePaths.push(segment.resolvedPath);
|
|
2647
2439
|
}
|
|
2648
2440
|
}
|
|
2649
|
-
const allFilePaths = [
|
|
2650
|
-
...guidelinePaths.map((guidelinePath) => path7.resolve(guidelinePath)),
|
|
2651
|
-
...userFilePaths
|
|
2652
|
-
];
|
|
2653
2441
|
const testCase = {
|
|
2654
2442
|
id,
|
|
2655
|
-
|
|
2443
|
+
eval_set: evalSetName,
|
|
2656
2444
|
conversation_id: conversationId,
|
|
2657
2445
|
question,
|
|
2658
2446
|
input: inputMessages,
|
|
2659
2447
|
input_segments: inputSegments,
|
|
2660
2448
|
expected_output: outputSegments,
|
|
2661
2449
|
reference_answer: referenceAnswer,
|
|
2662
|
-
|
|
2663
|
-
guideline_patterns: guidelinePatterns,
|
|
2664
|
-
file_paths: allFilePaths,
|
|
2450
|
+
file_paths: userFilePaths,
|
|
2665
2451
|
criteria: outcome ?? "",
|
|
2666
2452
|
evaluator: evalCaseEvaluatorKind,
|
|
2667
2453
|
assertions: evaluators
|
|
2668
2454
|
};
|
|
2669
|
-
if (verbose) {
|
|
2670
|
-
console.log(`
|
|
2671
|
-
[Test: ${id}]`);
|
|
2672
|
-
if (testCase.guideline_paths.length > 0) {
|
|
2673
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
2674
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
2675
|
-
console.log(` - ${guidelinePath}`);
|
|
2676
|
-
}
|
|
2677
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2678
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
2679
|
-
} else {
|
|
2680
|
-
console.log(" No guidelines found");
|
|
2681
|
-
}
|
|
2682
|
-
}
|
|
2683
2455
|
results.push(testCase);
|
|
2684
2456
|
}
|
|
2685
2457
|
return results;
|
|
@@ -2690,19 +2462,19 @@ function asString4(value) {
|
|
|
2690
2462
|
function logWarning4(message, details) {
|
|
2691
2463
|
if (details && details.length > 0) {
|
|
2692
2464
|
const detailBlock = details.join("\n");
|
|
2693
|
-
console.warn(`${
|
|
2694
|
-
${detailBlock}${
|
|
2465
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}
|
|
2466
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
2695
2467
|
} else {
|
|
2696
|
-
console.warn(`${
|
|
2468
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET6}`);
|
|
2697
2469
|
}
|
|
2698
2470
|
}
|
|
2699
2471
|
function logError2(message, details) {
|
|
2700
2472
|
if (details && details.length > 0) {
|
|
2701
2473
|
const detailBlock = details.join("\n");
|
|
2702
2474
|
console.error(`${ANSI_RED2}Error: ${message}
|
|
2703
|
-
${detailBlock}${
|
|
2475
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
2704
2476
|
} else {
|
|
2705
|
-
console.error(`${ANSI_RED2}Error: ${message}${
|
|
2477
|
+
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET6}`);
|
|
2706
2478
|
}
|
|
2707
2479
|
}
|
|
2708
2480
|
|
|
@@ -2737,30 +2509,7 @@ function parseMetadata(suite) {
|
|
|
2737
2509
|
}
|
|
2738
2510
|
|
|
2739
2511
|
// src/evaluation/formatting/prompt-builder.ts
|
|
2740
|
-
import { readFile as readFile7 } from "node:fs/promises";
|
|
2741
|
-
import path8 from "node:path";
|
|
2742
|
-
var ANSI_YELLOW7 = "\x1B[33m";
|
|
2743
|
-
var ANSI_RESET8 = "\x1B[0m";
|
|
2744
2512
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
2745
|
-
const guidelineParts = [];
|
|
2746
|
-
for (const rawPath of testCase.guideline_paths) {
|
|
2747
|
-
const absolutePath = path8.resolve(rawPath);
|
|
2748
|
-
if (!await fileExists2(absolutePath)) {
|
|
2749
|
-
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
2750
|
-
continue;
|
|
2751
|
-
}
|
|
2752
|
-
try {
|
|
2753
|
-
const content = (await readFile7(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
2754
|
-
guidelineParts.push({
|
|
2755
|
-
content,
|
|
2756
|
-
isFile: true,
|
|
2757
|
-
displayPath: path8.basename(absolutePath)
|
|
2758
|
-
});
|
|
2759
|
-
} catch (error) {
|
|
2760
|
-
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
2761
|
-
}
|
|
2762
|
-
}
|
|
2763
|
-
const guidelines = formatFileContents(guidelineParts);
|
|
2764
2513
|
const segmentsByMessage = [];
|
|
2765
2514
|
const fileContentsByPath = /* @__PURE__ */ new Map();
|
|
2766
2515
|
for (const segment of testCase.input_segments) {
|
|
@@ -2785,10 +2534,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
|
|
|
2785
2534
|
if (type === "file") {
|
|
2786
2535
|
const value = asString5(segment.value);
|
|
2787
2536
|
if (!value) continue;
|
|
2788
|
-
if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
|
|
2789
|
-
messageSegments.push({ type: "guideline_ref", path: value });
|
|
2790
|
-
continue;
|
|
2791
|
-
}
|
|
2792
2537
|
const fileText = fileContentsByPath.get(value);
|
|
2793
2538
|
if (fileText !== void 0) {
|
|
2794
2539
|
messageSegments.push({ type: "file", text: fileText, path: value });
|
|
@@ -2837,10 +2582,6 @@ ${messageContent}`);
|
|
|
2837
2582
|
} else {
|
|
2838
2583
|
const questionParts = [];
|
|
2839
2584
|
for (const segment of testCase.input_segments) {
|
|
2840
|
-
if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
|
|
2841
|
-
questionParts.push(`<Attached: ${segment.path}>`);
|
|
2842
|
-
continue;
|
|
2843
|
-
}
|
|
2844
2585
|
const formattedContent = formatSegment(segment, mode);
|
|
2845
2586
|
if (formattedContent) {
|
|
2846
2587
|
questionParts.push(formattedContent);
|
|
@@ -2851,11 +2592,9 @@ ${messageContent}`);
|
|
|
2851
2592
|
const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
|
|
2852
2593
|
messages: testCase.input,
|
|
2853
2594
|
segmentsByMessage,
|
|
2854
|
-
guidelinePatterns: testCase.guideline_patterns,
|
|
2855
|
-
guidelineContent: guidelines,
|
|
2856
2595
|
mode
|
|
2857
2596
|
}) : void 0;
|
|
2858
|
-
return { question,
|
|
2597
|
+
return { question, chatPrompt };
|
|
2859
2598
|
}
|
|
2860
2599
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
2861
2600
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -2870,14 +2609,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
2870
2609
|
return messagesWithContent > 1;
|
|
2871
2610
|
}
|
|
2872
2611
|
function buildChatPromptFromSegments(options) {
|
|
2873
|
-
const {
|
|
2874
|
-
messages,
|
|
2875
|
-
segmentsByMessage,
|
|
2876
|
-
guidelinePatterns,
|
|
2877
|
-
guidelineContent,
|
|
2878
|
-
systemPrompt,
|
|
2879
|
-
mode = "lm"
|
|
2880
|
-
} = options;
|
|
2612
|
+
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
2881
2613
|
if (messages.length === 0) {
|
|
2882
2614
|
return void 0;
|
|
2883
2615
|
}
|
|
@@ -2885,11 +2617,6 @@ function buildChatPromptFromSegments(options) {
|
|
|
2885
2617
|
if (systemPrompt && systemPrompt.trim().length > 0) {
|
|
2886
2618
|
systemSegments.push(systemPrompt.trim());
|
|
2887
2619
|
}
|
|
2888
|
-
if (guidelineContent && guidelineContent.trim().length > 0) {
|
|
2889
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
2890
|
-
|
|
2891
|
-
${guidelineContent.trim()}`);
|
|
2892
|
-
}
|
|
2893
2620
|
let startIndex = 0;
|
|
2894
2621
|
while (startIndex < messages.length && messages[startIndex].role === "system") {
|
|
2895
2622
|
const segments = segmentsByMessage[startIndex];
|
|
@@ -2925,15 +2652,8 @@ ${guidelineContent.trim()}`);
|
|
|
2925
2652
|
contentParts.push("@[Tool]:");
|
|
2926
2653
|
}
|
|
2927
2654
|
for (const segment of segments) {
|
|
2928
|
-
if (segment.type === "guideline_ref") {
|
|
2929
|
-
continue;
|
|
2930
|
-
}
|
|
2931
2655
|
const formatted = formatSegment(segment, mode);
|
|
2932
2656
|
if (formatted) {
|
|
2933
|
-
const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
|
|
2934
|
-
if (isGuidelineRef) {
|
|
2935
|
-
continue;
|
|
2936
|
-
}
|
|
2937
2657
|
contentParts.push(formatted);
|
|
2938
2658
|
}
|
|
2939
2659
|
}
|
|
@@ -2951,30 +2671,27 @@ ${guidelineContent.trim()}`);
|
|
|
2951
2671
|
function asString5(value) {
|
|
2952
2672
|
return typeof value === "string" ? value : void 0;
|
|
2953
2673
|
}
|
|
2954
|
-
function logWarning5(message) {
|
|
2955
|
-
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
2956
|
-
}
|
|
2957
2674
|
|
|
2958
2675
|
// src/evaluation/yaml-parser.ts
|
|
2959
|
-
var
|
|
2676
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
2960
2677
|
var ANSI_RED3 = "\x1B[31m";
|
|
2961
|
-
var
|
|
2678
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
2962
2679
|
function resolveTests(suite) {
|
|
2963
2680
|
if (suite.tests !== void 0) return suite.tests;
|
|
2964
2681
|
if (suite.eval_cases !== void 0) {
|
|
2965
|
-
|
|
2682
|
+
logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
|
|
2966
2683
|
return suite.eval_cases;
|
|
2967
2684
|
}
|
|
2968
2685
|
if (suite.evalcases !== void 0) {
|
|
2969
|
-
|
|
2686
|
+
logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
|
|
2970
2687
|
return suite.evalcases;
|
|
2971
2688
|
}
|
|
2972
2689
|
return void 0;
|
|
2973
2690
|
}
|
|
2974
2691
|
async function readTestSuiteMetadata(testFilePath) {
|
|
2975
2692
|
try {
|
|
2976
|
-
const absolutePath =
|
|
2977
|
-
const content = await
|
|
2693
|
+
const absolutePath = path7.resolve(testFilePath);
|
|
2694
|
+
const content = await readFile6(absolutePath, "utf8");
|
|
2978
2695
|
const parsed = interpolateEnv(parse2(content), process.env);
|
|
2979
2696
|
if (!isJsonObject(parsed)) {
|
|
2980
2697
|
return {};
|
|
@@ -3025,26 +2742,25 @@ var loadEvalCases = loadTests;
|
|
|
3025
2742
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
3026
2743
|
const verbose = options?.verbose ?? false;
|
|
3027
2744
|
const filterPattern = options?.filter;
|
|
3028
|
-
const absoluteTestPath =
|
|
2745
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
3029
2746
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
3030
2747
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
3031
2748
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
3032
|
-
const
|
|
3033
|
-
const rawFile = await readFile8(absoluteTestPath, "utf8");
|
|
2749
|
+
const rawFile = await readFile6(absoluteTestPath, "utf8");
|
|
3034
2750
|
const interpolated = interpolateEnv(parse2(rawFile), process.env);
|
|
3035
2751
|
if (!isJsonObject(interpolated)) {
|
|
3036
2752
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
3037
2753
|
}
|
|
3038
2754
|
const suite = interpolated;
|
|
3039
|
-
const
|
|
3040
|
-
const
|
|
3041
|
-
const
|
|
2755
|
+
const evalSetNameFromSuite = asString6(suite.name)?.trim();
|
|
2756
|
+
const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
2757
|
+
const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
|
|
3042
2758
|
const rawTestcases = resolveTests(suite);
|
|
3043
2759
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
3044
|
-
const evalFileDir =
|
|
2760
|
+
const evalFileDir = path7.dirname(absoluteTestPath);
|
|
3045
2761
|
let expandedTestcases;
|
|
3046
2762
|
if (typeof rawTestcases === "string") {
|
|
3047
|
-
const externalPath =
|
|
2763
|
+
const externalPath = path7.resolve(evalFileDir, rawTestcases);
|
|
3048
2764
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
3049
2765
|
} else if (Array.isArray(rawTestcases)) {
|
|
3050
2766
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -3058,18 +2774,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3058
2774
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
3059
2775
|
const suiteAssertions = suite.assertions ?? suite.assert;
|
|
3060
2776
|
if (suite.assert !== void 0 && suite.assertions === void 0) {
|
|
3061
|
-
|
|
2777
|
+
logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
|
|
3062
2778
|
}
|
|
3063
2779
|
const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
|
|
3064
2780
|
const results = [];
|
|
3065
2781
|
for (const rawEvalcase of expandedTestcases) {
|
|
3066
2782
|
if (!isJsonObject(rawEvalcase)) {
|
|
3067
|
-
|
|
2783
|
+
logWarning5("Skipping invalid test entry (expected object)");
|
|
3068
2784
|
continue;
|
|
3069
2785
|
}
|
|
3070
2786
|
const evalcase = rawEvalcase;
|
|
3071
2787
|
const id = asString6(evalcase.id);
|
|
3072
|
-
if (filterPattern && (!id || !
|
|
2788
|
+
if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
|
|
3073
2789
|
continue;
|
|
3074
2790
|
}
|
|
3075
2791
|
const conversationId = asString6(evalcase.conversation_id);
|
|
@@ -3077,7 +2793,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3077
2793
|
if (!outcome && evalcase.expected_outcome !== void 0) {
|
|
3078
2794
|
outcome = asString6(evalcase.expected_outcome);
|
|
3079
2795
|
if (outcome) {
|
|
3080
|
-
|
|
2796
|
+
logWarning5(
|
|
3081
2797
|
`Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
|
|
3082
2798
|
);
|
|
3083
2799
|
}
|
|
@@ -3097,15 +2813,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3097
2813
|
const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
|
|
3098
2814
|
const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
3099
2815
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
3100
|
-
const guidelinePaths = [];
|
|
3101
2816
|
const inputTextParts = [];
|
|
3102
2817
|
const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
|
|
3103
2818
|
messages: effectiveSuiteInputMessages,
|
|
3104
2819
|
searchRoots,
|
|
3105
2820
|
repoRootPath,
|
|
3106
|
-
guidelinePatterns,
|
|
3107
|
-
guidelinePaths,
|
|
3108
|
-
treatFileSegmentsAsGuidelines: true,
|
|
3109
2821
|
textParts: inputTextParts,
|
|
3110
2822
|
messageType: "input",
|
|
3111
2823
|
verbose
|
|
@@ -3114,8 +2826,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3114
2826
|
messages: testInputMessages,
|
|
3115
2827
|
searchRoots,
|
|
3116
2828
|
repoRootPath,
|
|
3117
|
-
guidelinePatterns,
|
|
3118
|
-
guidelinePaths,
|
|
3119
2829
|
textParts: inputTextParts,
|
|
3120
2830
|
messageType: "input",
|
|
3121
2831
|
verbose
|
|
@@ -3164,26 +2874,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3164
2874
|
userFilePaths.push(segment.resolvedPath);
|
|
3165
2875
|
}
|
|
3166
2876
|
}
|
|
3167
|
-
const allFilePaths = [
|
|
3168
|
-
...guidelinePaths.map((guidelinePath) => path9.resolve(guidelinePath)),
|
|
3169
|
-
...userFilePaths
|
|
3170
|
-
];
|
|
3171
2877
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
3172
2878
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
3173
2879
|
const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
|
|
3174
2880
|
const caseTargets = extractTargetsFromTestCase(evalcase);
|
|
3175
2881
|
const testCase = {
|
|
3176
2882
|
id,
|
|
3177
|
-
|
|
2883
|
+
eval_set: evalSetName,
|
|
3178
2884
|
conversation_id: conversationId,
|
|
3179
2885
|
question,
|
|
3180
2886
|
input: inputMessages,
|
|
3181
2887
|
input_segments: inputSegments,
|
|
3182
2888
|
expected_output: outputSegments,
|
|
3183
2889
|
reference_answer: referenceAnswer,
|
|
3184
|
-
|
|
3185
|
-
guideline_patterns: guidelinePatterns,
|
|
3186
|
-
file_paths: allFilePaths,
|
|
2890
|
+
file_paths: userFilePaths,
|
|
3187
2891
|
criteria: outcome ?? "",
|
|
3188
2892
|
evaluator: evalCaseEvaluatorKind,
|
|
3189
2893
|
assertions: evaluators,
|
|
@@ -3191,20 +2895,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3191
2895
|
metadata,
|
|
3192
2896
|
targets: caseTargets
|
|
3193
2897
|
};
|
|
3194
|
-
if (verbose) {
|
|
3195
|
-
console.log(`
|
|
3196
|
-
[Test: ${id}]`);
|
|
3197
|
-
if (testCase.guideline_paths.length > 0) {
|
|
3198
|
-
console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
|
|
3199
|
-
for (const guidelinePath of testCase.guideline_paths) {
|
|
3200
|
-
console.log(` - ${guidelinePath}`);
|
|
3201
|
-
}
|
|
3202
|
-
} else if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
3203
|
-
console.log(" No guidelines found (guideline_patterns not configured)");
|
|
3204
|
-
} else {
|
|
3205
|
-
console.log(" No guidelines found");
|
|
3206
|
-
}
|
|
3207
|
-
}
|
|
3208
2898
|
results.push(testCase);
|
|
3209
2899
|
}
|
|
3210
2900
|
return { tests: results, parsed: suite };
|
|
@@ -3223,7 +2913,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3223
2913
|
if (!isJsonObject(raw)) return void 0;
|
|
3224
2914
|
const obj = raw;
|
|
3225
2915
|
if (obj.script !== void 0 && obj.command === void 0) {
|
|
3226
|
-
|
|
2916
|
+
logWarning5("'script' is deprecated. Use 'command' instead.");
|
|
3227
2917
|
}
|
|
3228
2918
|
const commandSource = obj.command ?? obj.script;
|
|
3229
2919
|
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
@@ -3231,8 +2921,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3231
2921
|
if (commandArr.length === 0) return void 0;
|
|
3232
2922
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
3233
2923
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
3234
|
-
if (cwd && !
|
|
3235
|
-
cwd =
|
|
2924
|
+
if (cwd && !path7.isAbsolute(cwd)) {
|
|
2925
|
+
cwd = path7.resolve(evalFileDir, cwd);
|
|
3236
2926
|
}
|
|
3237
2927
|
const config = { command: commandArr };
|
|
3238
2928
|
if (timeoutMs !== void 0) {
|
|
@@ -3322,10 +3012,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
3322
3012
|
}
|
|
3323
3013
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3324
3014
|
if (typeof raw === "string") {
|
|
3325
|
-
const workspaceFilePath =
|
|
3015
|
+
const workspaceFilePath = path7.resolve(evalFileDir, raw);
|
|
3326
3016
|
let content;
|
|
3327
3017
|
try {
|
|
3328
|
-
content = await
|
|
3018
|
+
content = await readFile6(workspaceFilePath, "utf8");
|
|
3329
3019
|
} catch {
|
|
3330
3020
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3331
3021
|
}
|
|
@@ -3335,7 +3025,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
3335
3025
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
3336
3026
|
);
|
|
3337
3027
|
}
|
|
3338
|
-
const workspaceFileDir =
|
|
3028
|
+
const workspaceFileDir = path7.dirname(workspaceFilePath);
|
|
3339
3029
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
3340
3030
|
}
|
|
3341
3031
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -3355,8 +3045,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3355
3045
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
3356
3046
|
}
|
|
3357
3047
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
3358
|
-
if (template && !
|
|
3359
|
-
template =
|
|
3048
|
+
if (template && !path7.isAbsolute(template)) {
|
|
3049
|
+
template = path7.resolve(evalFileDir, template);
|
|
3360
3050
|
}
|
|
3361
3051
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
3362
3052
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -3406,28 +3096,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
3406
3096
|
function asString6(value) {
|
|
3407
3097
|
return typeof value === "string" ? value : void 0;
|
|
3408
3098
|
}
|
|
3409
|
-
function
|
|
3099
|
+
function logWarning5(message, details) {
|
|
3410
3100
|
if (details && details.length > 0) {
|
|
3411
3101
|
const detailBlock = details.join("\n");
|
|
3412
|
-
console.warn(`${
|
|
3413
|
-
${detailBlock}${
|
|
3102
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}
|
|
3103
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
3414
3104
|
} else {
|
|
3415
|
-
console.warn(`${
|
|
3105
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
3416
3106
|
}
|
|
3417
3107
|
}
|
|
3418
3108
|
function logError3(message, details) {
|
|
3419
3109
|
if (details && details.length > 0) {
|
|
3420
3110
|
const detailBlock = details.join("\n");
|
|
3421
3111
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
3422
|
-
${detailBlock}${
|
|
3112
|
+
${detailBlock}${ANSI_RESET7}`);
|
|
3423
3113
|
} else {
|
|
3424
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
3114
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET7}`);
|
|
3425
3115
|
}
|
|
3426
3116
|
}
|
|
3427
3117
|
|
|
3428
3118
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
3429
3119
|
import { readFileSync } from "node:fs";
|
|
3430
|
-
import
|
|
3120
|
+
import path8 from "node:path";
|
|
3431
3121
|
import { parse as parse3 } from "yaml";
|
|
3432
3122
|
function codeGraderInstruction(graderName, description) {
|
|
3433
3123
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -3672,7 +3362,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
3672
3362
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
3673
3363
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
3674
3364
|
const parsed = parse3(content);
|
|
3675
|
-
return transpileEvalYaml(parsed,
|
|
3365
|
+
return transpileEvalYaml(parsed, path8.basename(evalYamlPath));
|
|
3676
3366
|
}
|
|
3677
3367
|
function getOutputFilenames(result) {
|
|
3678
3368
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -3907,10 +3597,10 @@ function buildChatPrompt(request) {
|
|
|
3907
3597
|
if (hasSystemMessage) {
|
|
3908
3598
|
return provided;
|
|
3909
3599
|
}
|
|
3910
|
-
const systemContent2 = resolveSystemContent(request
|
|
3600
|
+
const systemContent2 = resolveSystemContent(request);
|
|
3911
3601
|
return [{ role: "system", content: systemContent2 }, ...provided];
|
|
3912
3602
|
}
|
|
3913
|
-
const systemContent = resolveSystemContent(request
|
|
3603
|
+
const systemContent = resolveSystemContent(request);
|
|
3914
3604
|
const userContent = request.question.trim();
|
|
3915
3605
|
const prompt = [
|
|
3916
3606
|
{ role: "system", content: systemContent },
|
|
@@ -3918,18 +3608,13 @@ function buildChatPrompt(request) {
|
|
|
3918
3608
|
];
|
|
3919
3609
|
return prompt;
|
|
3920
3610
|
}
|
|
3921
|
-
function resolveSystemContent(request
|
|
3611
|
+
function resolveSystemContent(request) {
|
|
3922
3612
|
const systemSegments = [];
|
|
3923
3613
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
3924
3614
|
systemSegments.push(request.systemPrompt.trim());
|
|
3925
3615
|
} else {
|
|
3926
3616
|
systemSegments.push(DEFAULT_SYSTEM_PROMPT);
|
|
3927
3617
|
}
|
|
3928
|
-
if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
|
|
3929
|
-
systemSegments.push(`[[ ## Guidelines ## ]]
|
|
3930
|
-
|
|
3931
|
-
${request.guidelines.trim()}`);
|
|
3932
|
-
}
|
|
3933
3618
|
return systemSegments.join("\n\n");
|
|
3934
3619
|
}
|
|
3935
3620
|
function toModelMessages(chatPrompt) {
|
|
@@ -4113,7 +3798,7 @@ import { spawn } from "node:child_process";
|
|
|
4113
3798
|
import { randomUUID } from "node:crypto";
|
|
4114
3799
|
import { createWriteStream } from "node:fs";
|
|
4115
3800
|
import { mkdir } from "node:fs/promises";
|
|
4116
|
-
import
|
|
3801
|
+
import path10 from "node:path";
|
|
4117
3802
|
|
|
4118
3803
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
4119
3804
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
@@ -4169,17 +3854,11 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
4169
3854
|
}
|
|
4170
3855
|
|
|
4171
3856
|
// src/evaluation/providers/preread.ts
|
|
4172
|
-
import
|
|
4173
|
-
function buildPromptDocument(request, inputFiles
|
|
3857
|
+
import path9 from "node:path";
|
|
3858
|
+
function buildPromptDocument(request, inputFiles) {
|
|
4174
3859
|
const parts = [];
|
|
4175
|
-
const guidelineFiles = collectGuidelineFiles(
|
|
4176
|
-
inputFiles,
|
|
4177
|
-
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
4178
|
-
options?.guidelineOverrides
|
|
4179
|
-
);
|
|
4180
3860
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
4181
|
-
const
|
|
4182
|
-
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
3861
|
+
const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
|
|
4183
3862
|
if (prereadBlock.length > 0) {
|
|
4184
3863
|
parts.push("\n", prereadBlock);
|
|
4185
3864
|
}
|
|
@@ -4192,62 +3871,36 @@ function normalizeInputFiles(inputFiles) {
|
|
|
4192
3871
|
}
|
|
4193
3872
|
const deduped = /* @__PURE__ */ new Map();
|
|
4194
3873
|
for (const inputFile of inputFiles) {
|
|
4195
|
-
const absolutePath =
|
|
3874
|
+
const absolutePath = path9.resolve(inputFile);
|
|
4196
3875
|
if (!deduped.has(absolutePath)) {
|
|
4197
3876
|
deduped.set(absolutePath, absolutePath);
|
|
4198
3877
|
}
|
|
4199
3878
|
}
|
|
4200
3879
|
return Array.from(deduped.values());
|
|
4201
3880
|
}
|
|
4202
|
-
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
4203
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
4204
|
-
return [];
|
|
4205
|
-
}
|
|
4206
|
-
const unique = /* @__PURE__ */ new Map();
|
|
4207
|
-
for (const inputFile of inputFiles) {
|
|
4208
|
-
const absolutePath = path11.resolve(inputFile);
|
|
4209
|
-
if (overrides?.has(absolutePath)) {
|
|
4210
|
-
if (!unique.has(absolutePath)) {
|
|
4211
|
-
unique.set(absolutePath, absolutePath);
|
|
4212
|
-
}
|
|
4213
|
-
continue;
|
|
4214
|
-
}
|
|
4215
|
-
const normalized = absolutePath.split(path11.sep).join("/");
|
|
4216
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
4217
|
-
if (!unique.has(absolutePath)) {
|
|
4218
|
-
unique.set(absolutePath, absolutePath);
|
|
4219
|
-
}
|
|
4220
|
-
}
|
|
4221
|
-
}
|
|
4222
|
-
return Array.from(unique.values());
|
|
4223
|
-
}
|
|
4224
3881
|
function collectInputFiles(inputFiles) {
|
|
4225
3882
|
if (!inputFiles || inputFiles.length === 0) {
|
|
4226
3883
|
return [];
|
|
4227
3884
|
}
|
|
4228
3885
|
const unique = /* @__PURE__ */ new Map();
|
|
4229
3886
|
for (const inputFile of inputFiles) {
|
|
4230
|
-
const absolutePath =
|
|
3887
|
+
const absolutePath = path9.resolve(inputFile);
|
|
4231
3888
|
if (!unique.has(absolutePath)) {
|
|
4232
3889
|
unique.set(absolutePath, absolutePath);
|
|
4233
3890
|
}
|
|
4234
3891
|
}
|
|
4235
3892
|
return Array.from(unique.values());
|
|
4236
3893
|
}
|
|
4237
|
-
function buildMandatoryPrereadBlock(
|
|
4238
|
-
if (
|
|
3894
|
+
function buildMandatoryPrereadBlock(inputFiles) {
|
|
3895
|
+
if (inputFiles.length === 0) {
|
|
4239
3896
|
return "";
|
|
4240
3897
|
}
|
|
4241
3898
|
const buildList = (files) => files.map((absolutePath) => {
|
|
4242
|
-
const fileName =
|
|
3899
|
+
const fileName = path9.basename(absolutePath);
|
|
4243
3900
|
const fileUri = pathToFileUri(absolutePath);
|
|
4244
3901
|
return `* [${fileName}](${fileUri})`;
|
|
4245
3902
|
});
|
|
4246
3903
|
const sections = [];
|
|
4247
|
-
if (guidelineFiles.length > 0) {
|
|
4248
|
-
sections.push(`Read all guideline files:
|
|
4249
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
4250
|
-
}
|
|
4251
3904
|
if (inputFiles.length > 0) {
|
|
4252
3905
|
sections.push(`Read all input files:
|
|
4253
3906
|
${buildList(inputFiles).join("\n")}.`);
|
|
@@ -4259,7 +3912,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
4259
3912
|
return sections.join("\n");
|
|
4260
3913
|
}
|
|
4261
3914
|
function pathToFileUri(filePath) {
|
|
4262
|
-
const absolutePath =
|
|
3915
|
+
const absolutePath = path9.isAbsolute(filePath) ? filePath : path9.resolve(filePath);
|
|
4263
3916
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
4264
3917
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
4265
3918
|
return `file:///${normalizedPath}`;
|
|
@@ -4406,10 +4059,10 @@ var ClaudeCliProvider = class {
|
|
|
4406
4059
|
}
|
|
4407
4060
|
resolveCwd(cwdOverride) {
|
|
4408
4061
|
if (cwdOverride) {
|
|
4409
|
-
return
|
|
4062
|
+
return path10.resolve(cwdOverride);
|
|
4410
4063
|
}
|
|
4411
4064
|
if (this.config.cwd) {
|
|
4412
|
-
return
|
|
4065
|
+
return path10.resolve(this.config.cwd);
|
|
4413
4066
|
}
|
|
4414
4067
|
return void 0;
|
|
4415
4068
|
}
|
|
@@ -4419,9 +4072,9 @@ var ClaudeCliProvider = class {
|
|
|
4419
4072
|
return void 0;
|
|
4420
4073
|
}
|
|
4421
4074
|
if (this.config.logDir) {
|
|
4422
|
-
return
|
|
4075
|
+
return path10.resolve(this.config.logDir);
|
|
4423
4076
|
}
|
|
4424
|
-
return
|
|
4077
|
+
return path10.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
4425
4078
|
}
|
|
4426
4079
|
async createStreamLogger(request) {
|
|
4427
4080
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4435,7 +4088,7 @@ var ClaudeCliProvider = class {
|
|
|
4435
4088
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
4436
4089
|
return void 0;
|
|
4437
4090
|
}
|
|
4438
|
-
const filePath =
|
|
4091
|
+
const filePath = path10.join(logDir, buildLogFilename(request, this.targetName));
|
|
4439
4092
|
try {
|
|
4440
4093
|
const logger = await ClaudeCliStreamLogger.create({
|
|
4441
4094
|
filePath,
|
|
@@ -4756,7 +4409,7 @@ function tryParseJson(line) {
|
|
|
4756
4409
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
4757
4410
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
4758
4411
|
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
4759
|
-
import
|
|
4412
|
+
import path11 from "node:path";
|
|
4760
4413
|
var claudeSdkModule = null;
|
|
4761
4414
|
async function loadClaudeSdk() {
|
|
4762
4415
|
if (!claudeSdkModule) {
|
|
@@ -4916,10 +4569,10 @@ var ClaudeSdkProvider = class {
|
|
|
4916
4569
|
}
|
|
4917
4570
|
resolveCwd(cwdOverride) {
|
|
4918
4571
|
if (cwdOverride) {
|
|
4919
|
-
return
|
|
4572
|
+
return path11.resolve(cwdOverride);
|
|
4920
4573
|
}
|
|
4921
4574
|
if (this.config.cwd) {
|
|
4922
|
-
return
|
|
4575
|
+
return path11.resolve(this.config.cwd);
|
|
4923
4576
|
}
|
|
4924
4577
|
return void 0;
|
|
4925
4578
|
}
|
|
@@ -4929,9 +4582,9 @@ var ClaudeSdkProvider = class {
|
|
|
4929
4582
|
return void 0;
|
|
4930
4583
|
}
|
|
4931
4584
|
if (this.config.logDir) {
|
|
4932
|
-
return
|
|
4585
|
+
return path11.resolve(this.config.logDir);
|
|
4933
4586
|
}
|
|
4934
|
-
return
|
|
4587
|
+
return path11.join(process.cwd(), ".agentv", "logs", "claude");
|
|
4935
4588
|
}
|
|
4936
4589
|
async createStreamLogger(request) {
|
|
4937
4590
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4945,7 +4598,7 @@ var ClaudeSdkProvider = class {
|
|
|
4945
4598
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
4946
4599
|
return void 0;
|
|
4947
4600
|
}
|
|
4948
|
-
const filePath =
|
|
4601
|
+
const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
|
|
4949
4602
|
try {
|
|
4950
4603
|
const logger = await ClaudeStreamLogger.create({
|
|
4951
4604
|
filePath,
|
|
@@ -5152,7 +4805,7 @@ function formatElapsed2(startedAt) {
|
|
|
5152
4805
|
import { exec as execWithCallback } from "node:child_process";
|
|
5153
4806
|
import fs from "node:fs/promises";
|
|
5154
4807
|
import os from "node:os";
|
|
5155
|
-
import
|
|
4808
|
+
import path12 from "node:path";
|
|
5156
4809
|
import { promisify } from "node:util";
|
|
5157
4810
|
import { z as z2 } from "zod";
|
|
5158
4811
|
var ToolCallSchema = z2.object({
|
|
@@ -5361,7 +5014,6 @@ var CliProvider = class {
|
|
|
5361
5014
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5362
5015
|
{
|
|
5363
5016
|
question: "",
|
|
5364
|
-
guidelines: "",
|
|
5365
5017
|
inputFiles: batchInputFiles,
|
|
5366
5018
|
evalCaseId: "batch",
|
|
5367
5019
|
attempt: 0
|
|
@@ -5594,7 +5246,6 @@ var CliProvider = class {
|
|
|
5594
5246
|
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5595
5247
|
{
|
|
5596
5248
|
question: "",
|
|
5597
|
-
guidelines: "",
|
|
5598
5249
|
inputFiles: [],
|
|
5599
5250
|
evalCaseId: "healthcheck",
|
|
5600
5251
|
attempt: 0
|
|
@@ -5635,7 +5286,6 @@ async function buildTemplateValues(request, config, outputFilePath) {
|
|
|
5635
5286
|
values: {
|
|
5636
5287
|
PROMPT: shellEscape(request.question ?? ""),
|
|
5637
5288
|
PROMPT_FILE: shellEscape(promptFilePath),
|
|
5638
|
-
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
5639
5289
|
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
5640
5290
|
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
5641
5291
|
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
@@ -5657,7 +5307,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
5657
5307
|
}
|
|
5658
5308
|
const unique = /* @__PURE__ */ new Map();
|
|
5659
5309
|
for (const inputFile of inputFiles) {
|
|
5660
|
-
const absolutePath =
|
|
5310
|
+
const absolutePath = path12.resolve(inputFile);
|
|
5661
5311
|
if (!unique.has(absolutePath)) {
|
|
5662
5312
|
unique.set(absolutePath, absolutePath);
|
|
5663
5313
|
}
|
|
@@ -5671,7 +5321,7 @@ function formatFileList(files, template) {
|
|
|
5671
5321
|
const formatter = template ?? "{path}";
|
|
5672
5322
|
return files.map((filePath) => {
|
|
5673
5323
|
const escapedPath = shellEscape(filePath);
|
|
5674
|
-
const escapedName = shellEscape(
|
|
5324
|
+
const escapedName = shellEscape(path12.basename(filePath));
|
|
5675
5325
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
5676
5326
|
}).join(" ");
|
|
5677
5327
|
}
|
|
@@ -5695,7 +5345,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
5695
5345
|
const safeEvalId = evalCaseId || "unknown";
|
|
5696
5346
|
const timestamp = Date.now();
|
|
5697
5347
|
const random = Math.random().toString(36).substring(2, 9);
|
|
5698
|
-
return
|
|
5348
|
+
return path12.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
5699
5349
|
}
|
|
5700
5350
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
5701
5351
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -5709,7 +5359,7 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
5709
5359
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
5710
5360
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
5711
5361
|
import { mkdir as mkdir3 } from "node:fs/promises";
|
|
5712
|
-
import
|
|
5362
|
+
import path13 from "node:path";
|
|
5713
5363
|
|
|
5714
5364
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
5715
5365
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -5944,10 +5594,10 @@ ${basePrompt}` : basePrompt;
|
|
|
5944
5594
|
}
|
|
5945
5595
|
resolveCwd(cwdOverride) {
|
|
5946
5596
|
if (cwdOverride) {
|
|
5947
|
-
return
|
|
5597
|
+
return path13.resolve(cwdOverride);
|
|
5948
5598
|
}
|
|
5949
5599
|
if (this.config.cwd) {
|
|
5950
|
-
return
|
|
5600
|
+
return path13.resolve(this.config.cwd);
|
|
5951
5601
|
}
|
|
5952
5602
|
return void 0;
|
|
5953
5603
|
}
|
|
@@ -5957,9 +5607,9 @@ ${basePrompt}` : basePrompt;
|
|
|
5957
5607
|
return void 0;
|
|
5958
5608
|
}
|
|
5959
5609
|
if (this.config.logDir) {
|
|
5960
|
-
return
|
|
5610
|
+
return path13.resolve(this.config.logDir);
|
|
5961
5611
|
}
|
|
5962
|
-
return
|
|
5612
|
+
return path13.join(process.cwd(), ".agentv", "logs", "codex");
|
|
5963
5613
|
}
|
|
5964
5614
|
async createStreamLogger(request) {
|
|
5965
5615
|
const logDir = this.resolveLogDirectory();
|
|
@@ -5973,7 +5623,7 @@ ${basePrompt}` : basePrompt;
|
|
|
5973
5623
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
5974
5624
|
return void 0;
|
|
5975
5625
|
}
|
|
5976
|
-
const filePath =
|
|
5626
|
+
const filePath = path13.join(logDir, buildLogFilename3(request, this.targetName));
|
|
5977
5627
|
try {
|
|
5978
5628
|
const logger = await CodexSdkStreamLogger.create({
|
|
5979
5629
|
filePath,
|
|
@@ -6117,7 +5767,7 @@ function formatElapsed3(startedAt) {
|
|
|
6117
5767
|
// src/evaluation/providers/copilot-cli.ts
|
|
6118
5768
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
6119
5769
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
6120
|
-
import
|
|
5770
|
+
import path15 from "node:path";
|
|
6121
5771
|
import { Readable, Writable } from "node:stream";
|
|
6122
5772
|
import { spawn as spawn2 } from "node:child_process";
|
|
6123
5773
|
import * as acp from "@agentclientprotocol/sdk";
|
|
@@ -6179,7 +5829,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
6179
5829
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
6180
5830
|
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
6181
5831
|
import { arch, platform } from "node:os";
|
|
6182
|
-
import
|
|
5832
|
+
import path14 from "node:path";
|
|
6183
5833
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
6184
5834
|
function resolvePlatformCliPath() {
|
|
6185
5835
|
const os3 = platform();
|
|
@@ -6203,7 +5853,7 @@ function resolvePlatformCliPath() {
|
|
|
6203
5853
|
try {
|
|
6204
5854
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6205
5855
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
6206
|
-
const binaryPath =
|
|
5856
|
+
const binaryPath = path14.join(path14.dirname(packageJsonPath), binaryName);
|
|
6207
5857
|
if (existsSync(binaryPath)) {
|
|
6208
5858
|
return binaryPath;
|
|
6209
5859
|
}
|
|
@@ -6211,7 +5861,7 @@ function resolvePlatformCliPath() {
|
|
|
6211
5861
|
}
|
|
6212
5862
|
let searchDir = process.cwd();
|
|
6213
5863
|
for (let i = 0; i < 10; i++) {
|
|
6214
|
-
const standardPath =
|
|
5864
|
+
const standardPath = path14.join(
|
|
6215
5865
|
searchDir,
|
|
6216
5866
|
"node_modules",
|
|
6217
5867
|
...packageName.split("/"),
|
|
@@ -6220,13 +5870,13 @@ function resolvePlatformCliPath() {
|
|
|
6220
5870
|
if (existsSync(standardPath)) {
|
|
6221
5871
|
return standardPath;
|
|
6222
5872
|
}
|
|
6223
|
-
const bunDir =
|
|
5873
|
+
const bunDir = path14.join(searchDir, "node_modules", ".bun");
|
|
6224
5874
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
6225
5875
|
try {
|
|
6226
5876
|
const entries = readdirSync(bunDir);
|
|
6227
5877
|
for (const entry of entries) {
|
|
6228
5878
|
if (entry.startsWith(prefix)) {
|
|
6229
|
-
const candidate =
|
|
5879
|
+
const candidate = path14.join(
|
|
6230
5880
|
bunDir,
|
|
6231
5881
|
entry,
|
|
6232
5882
|
"node_modules",
|
|
@@ -6241,7 +5891,7 @@ function resolvePlatformCliPath() {
|
|
|
6241
5891
|
}
|
|
6242
5892
|
} catch {
|
|
6243
5893
|
}
|
|
6244
|
-
const parent =
|
|
5894
|
+
const parent = path14.dirname(searchDir);
|
|
6245
5895
|
if (parent === searchDir) break;
|
|
6246
5896
|
searchDir = parent;
|
|
6247
5897
|
}
|
|
@@ -6579,10 +6229,10 @@ var CopilotCliProvider = class {
|
|
|
6579
6229
|
}
|
|
6580
6230
|
resolveCwd(cwdOverride) {
|
|
6581
6231
|
if (cwdOverride) {
|
|
6582
|
-
return
|
|
6232
|
+
return path15.resolve(cwdOverride);
|
|
6583
6233
|
}
|
|
6584
6234
|
if (this.config.cwd) {
|
|
6585
|
-
return
|
|
6235
|
+
return path15.resolve(this.config.cwd);
|
|
6586
6236
|
}
|
|
6587
6237
|
return void 0;
|
|
6588
6238
|
}
|
|
@@ -6601,9 +6251,9 @@ var CopilotCliProvider = class {
|
|
|
6601
6251
|
return void 0;
|
|
6602
6252
|
}
|
|
6603
6253
|
if (this.config.logDir) {
|
|
6604
|
-
return
|
|
6254
|
+
return path15.resolve(this.config.logDir);
|
|
6605
6255
|
}
|
|
6606
|
-
return
|
|
6256
|
+
return path15.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
6607
6257
|
}
|
|
6608
6258
|
async createStreamLogger(request) {
|
|
6609
6259
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6617,7 +6267,7 @@ var CopilotCliProvider = class {
|
|
|
6617
6267
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6618
6268
|
return void 0;
|
|
6619
6269
|
}
|
|
6620
|
-
const filePath =
|
|
6270
|
+
const filePath = path15.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
6621
6271
|
try {
|
|
6622
6272
|
const logger = await CopilotStreamLogger.create(
|
|
6623
6273
|
{
|
|
@@ -6712,7 +6362,7 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
6712
6362
|
// src/evaluation/providers/copilot-sdk.ts
|
|
6713
6363
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
6714
6364
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
6715
|
-
import
|
|
6365
|
+
import path16 from "node:path";
|
|
6716
6366
|
|
|
6717
6367
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
6718
6368
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -6991,10 +6641,10 @@ var CopilotSdkProvider = class {
|
|
|
6991
6641
|
}
|
|
6992
6642
|
resolveCwd(cwdOverride) {
|
|
6993
6643
|
if (cwdOverride) {
|
|
6994
|
-
return
|
|
6644
|
+
return path16.resolve(cwdOverride);
|
|
6995
6645
|
}
|
|
6996
6646
|
if (this.config.cwd) {
|
|
6997
|
-
return
|
|
6647
|
+
return path16.resolve(this.config.cwd);
|
|
6998
6648
|
}
|
|
6999
6649
|
return void 0;
|
|
7000
6650
|
}
|
|
@@ -7003,9 +6653,9 @@ var CopilotSdkProvider = class {
|
|
|
7003
6653
|
return void 0;
|
|
7004
6654
|
}
|
|
7005
6655
|
if (this.config.logDir) {
|
|
7006
|
-
return
|
|
6656
|
+
return path16.resolve(this.config.logDir);
|
|
7007
6657
|
}
|
|
7008
|
-
return
|
|
6658
|
+
return path16.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
7009
6659
|
}
|
|
7010
6660
|
async createStreamLogger(request) {
|
|
7011
6661
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7019,7 +6669,7 @@ var CopilotSdkProvider = class {
|
|
|
7019
6669
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7020
6670
|
return void 0;
|
|
7021
6671
|
}
|
|
7022
|
-
const filePath =
|
|
6672
|
+
const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
7023
6673
|
try {
|
|
7024
6674
|
const logger = await CopilotStreamLogger.create(
|
|
7025
6675
|
{
|
|
@@ -7096,8 +6746,7 @@ var MockProvider = class {
|
|
|
7096
6746
|
return {
|
|
7097
6747
|
output: [{ role: "assistant", content: this.cannedResponse }],
|
|
7098
6748
|
raw: {
|
|
7099
|
-
question: request.question
|
|
7100
|
-
guidelines: request.guidelines
|
|
6749
|
+
question: request.question
|
|
7101
6750
|
}
|
|
7102
6751
|
};
|
|
7103
6752
|
}
|
|
@@ -7375,7 +7024,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
7375
7024
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
7376
7025
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7377
7026
|
import { tmpdir } from "node:os";
|
|
7378
|
-
import
|
|
7027
|
+
import path17 from "node:path";
|
|
7379
7028
|
|
|
7380
7029
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
7381
7030
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -7456,7 +7105,7 @@ var PiCodingAgentProvider = class {
|
|
|
7456
7105
|
const workspaceRoot = await this.createWorkspace();
|
|
7457
7106
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
7458
7107
|
try {
|
|
7459
|
-
const promptFile =
|
|
7108
|
+
const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
|
|
7460
7109
|
await writeFile(promptFile, request.question, "utf8");
|
|
7461
7110
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
7462
7111
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
@@ -7518,12 +7167,12 @@ var PiCodingAgentProvider = class {
|
|
|
7518
7167
|
}
|
|
7519
7168
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
7520
7169
|
if (cwdOverride) {
|
|
7521
|
-
return
|
|
7170
|
+
return path17.resolve(cwdOverride);
|
|
7522
7171
|
}
|
|
7523
7172
|
if (!this.config.cwd) {
|
|
7524
7173
|
return workspaceRoot;
|
|
7525
7174
|
}
|
|
7526
|
-
return
|
|
7175
|
+
return path17.resolve(this.config.cwd);
|
|
7527
7176
|
}
|
|
7528
7177
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
7529
7178
|
const args = [];
|
|
@@ -7612,7 +7261,7 @@ ${prompt}` : prompt;
|
|
|
7612
7261
|
return env;
|
|
7613
7262
|
}
|
|
7614
7263
|
async createWorkspace() {
|
|
7615
|
-
return await mkdtemp(
|
|
7264
|
+
return await mkdtemp(path17.join(tmpdir(), WORKSPACE_PREFIX));
|
|
7616
7265
|
}
|
|
7617
7266
|
async cleanupWorkspace(workspaceRoot) {
|
|
7618
7267
|
try {
|
|
@@ -7622,9 +7271,9 @@ ${prompt}` : prompt;
|
|
|
7622
7271
|
}
|
|
7623
7272
|
resolveLogDirectory() {
|
|
7624
7273
|
if (this.config.logDir) {
|
|
7625
|
-
return
|
|
7274
|
+
return path17.resolve(this.config.logDir);
|
|
7626
7275
|
}
|
|
7627
|
-
return
|
|
7276
|
+
return path17.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
7628
7277
|
}
|
|
7629
7278
|
async createStreamLogger(request) {
|
|
7630
7279
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7638,7 +7287,7 @@ ${prompt}` : prompt;
|
|
|
7638
7287
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7639
7288
|
return void 0;
|
|
7640
7289
|
}
|
|
7641
|
-
const filePath =
|
|
7290
|
+
const filePath = path17.join(logDir, buildLogFilename5(request, this.targetName));
|
|
7642
7291
|
try {
|
|
7643
7292
|
const logger = await PiStreamLogger.create({
|
|
7644
7293
|
filePath,
|
|
@@ -8139,17 +7788,17 @@ var ProviderRegistry = class {
|
|
|
8139
7788
|
// src/evaluation/providers/vscode-provider.ts
|
|
8140
7789
|
import { exec as exec2 } from "node:child_process";
|
|
8141
7790
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
8142
|
-
import
|
|
7791
|
+
import path29 from "node:path";
|
|
8143
7792
|
import { promisify as promisify3 } from "node:util";
|
|
8144
7793
|
|
|
8145
7794
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
8146
7795
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
8147
|
-
import
|
|
7796
|
+
import path27 from "node:path";
|
|
8148
7797
|
|
|
8149
7798
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
8150
7799
|
import { constants as constants2 } from "node:fs";
|
|
8151
7800
|
import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
8152
|
-
import
|
|
7801
|
+
import path18 from "node:path";
|
|
8153
7802
|
async function pathExists(target) {
|
|
8154
7803
|
try {
|
|
8155
7804
|
await access2(target, constants2.F_OK);
|
|
@@ -8165,7 +7814,7 @@ async function readDirEntries(target) {
|
|
|
8165
7814
|
const entries = await readdir(target, { withFileTypes: true });
|
|
8166
7815
|
return entries.map((entry) => ({
|
|
8167
7816
|
name: entry.name,
|
|
8168
|
-
absolutePath:
|
|
7817
|
+
absolutePath: path18.join(target, entry.name),
|
|
8169
7818
|
isDirectory: entry.isDirectory()
|
|
8170
7819
|
}));
|
|
8171
7820
|
}
|
|
@@ -8180,9 +7829,9 @@ async function removeIfExists(target) {
|
|
|
8180
7829
|
}
|
|
8181
7830
|
|
|
8182
7831
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
8183
|
-
import
|
|
7832
|
+
import path19 from "node:path";
|
|
8184
7833
|
function pathToFileUri2(filePath) {
|
|
8185
|
-
const absolutePath =
|
|
7834
|
+
const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
|
|
8186
7835
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
8187
7836
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
8188
7837
|
return `file:///${normalizedPath}`;
|
|
@@ -8191,7 +7840,7 @@ function pathToFileUri2(filePath) {
|
|
|
8191
7840
|
}
|
|
8192
7841
|
|
|
8193
7842
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
8194
|
-
import
|
|
7843
|
+
import path20 from "node:path";
|
|
8195
7844
|
|
|
8196
7845
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
8197
7846
|
function renderTemplate2(content, variables) {
|
|
@@ -8283,8 +7932,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
8283
7932
|
});
|
|
8284
7933
|
}
|
|
8285
7934
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
8286
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
8287
|
-
const responseList = responseFiles.map((file) => `"${
|
|
7935
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path20.basename(file)}`).join("\n");
|
|
7936
|
+
const responseList = responseFiles.map((file) => `"${path20.basename(file)}"`).join(", ");
|
|
8288
7937
|
return renderTemplate2(templateContent, {
|
|
8289
7938
|
requestFiles: requestLines,
|
|
8290
7939
|
responseList
|
|
@@ -8292,8 +7941,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
8292
7941
|
}
|
|
8293
7942
|
|
|
8294
7943
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
8295
|
-
import { readFile as
|
|
8296
|
-
import
|
|
7944
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
7945
|
+
import path21 from "node:path";
|
|
8297
7946
|
|
|
8298
7947
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
8299
7948
|
function sleep2(ms) {
|
|
@@ -8331,7 +7980,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8331
7980
|
const maxAttempts = 10;
|
|
8332
7981
|
while (attempts < maxAttempts) {
|
|
8333
7982
|
try {
|
|
8334
|
-
const content = await
|
|
7983
|
+
const content = await readFile7(responseFileFinal, { encoding: "utf8" });
|
|
8335
7984
|
if (!silent) {
|
|
8336
7985
|
process.stdout.write(`${content}
|
|
8337
7986
|
`);
|
|
@@ -8352,7 +8001,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8352
8001
|
}
|
|
8353
8002
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
8354
8003
|
if (!silent) {
|
|
8355
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
8004
|
+
const fileList = responseFilesFinal.map((file) => path21.basename(file)).join(", ");
|
|
8356
8005
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
8357
8006
|
}
|
|
8358
8007
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -8361,7 +8010,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8361
8010
|
while (pending.size > 0) {
|
|
8362
8011
|
if (Date.now() >= deadline) {
|
|
8363
8012
|
if (!silent) {
|
|
8364
|
-
const remaining = [...pending].map((f) =>
|
|
8013
|
+
const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
|
|
8365
8014
|
console.error(
|
|
8366
8015
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
8367
8016
|
);
|
|
@@ -8388,7 +8037,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8388
8037
|
const maxAttempts = 10;
|
|
8389
8038
|
while (attempts < maxAttempts) {
|
|
8390
8039
|
try {
|
|
8391
|
-
const content = await
|
|
8040
|
+
const content = await readFile7(file, { encoding: "utf8" });
|
|
8392
8041
|
if (!silent) {
|
|
8393
8042
|
process.stdout.write(`${content}
|
|
8394
8043
|
`);
|
|
@@ -8412,15 +8061,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8412
8061
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
8413
8062
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
8414
8063
|
import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
|
|
8415
|
-
import
|
|
8064
|
+
import path24 from "node:path";
|
|
8416
8065
|
import { promisify as promisify2 } from "node:util";
|
|
8417
8066
|
|
|
8418
8067
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
8419
|
-
import
|
|
8068
|
+
import path23 from "node:path";
|
|
8420
8069
|
|
|
8421
8070
|
// src/paths.ts
|
|
8422
8071
|
import os2 from "node:os";
|
|
8423
|
-
import
|
|
8072
|
+
import path22 from "node:path";
|
|
8424
8073
|
var logged = false;
|
|
8425
8074
|
function getAgentvHome() {
|
|
8426
8075
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8431,19 +8080,19 @@ function getAgentvHome() {
|
|
|
8431
8080
|
}
|
|
8432
8081
|
return envHome;
|
|
8433
8082
|
}
|
|
8434
|
-
return
|
|
8083
|
+
return path22.join(os2.homedir(), ".agentv");
|
|
8435
8084
|
}
|
|
8436
8085
|
function getWorkspacesRoot() {
|
|
8437
|
-
return
|
|
8086
|
+
return path22.join(getAgentvHome(), "workspaces");
|
|
8438
8087
|
}
|
|
8439
8088
|
function getSubagentsRoot() {
|
|
8440
|
-
return
|
|
8089
|
+
return path22.join(getAgentvHome(), "subagents");
|
|
8441
8090
|
}
|
|
8442
8091
|
function getTraceStateRoot() {
|
|
8443
|
-
return
|
|
8092
|
+
return path22.join(getAgentvHome(), "trace-state");
|
|
8444
8093
|
}
|
|
8445
8094
|
function getWorkspacePoolRoot() {
|
|
8446
|
-
return
|
|
8095
|
+
return path22.join(getAgentvHome(), "workspace-pool");
|
|
8447
8096
|
}
|
|
8448
8097
|
|
|
8449
8098
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -8451,7 +8100,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
8451
8100
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
8452
8101
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
8453
8102
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
8454
|
-
return
|
|
8103
|
+
return path23.join(getSubagentsRoot(), folder);
|
|
8455
8104
|
}
|
|
8456
8105
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
8457
8106
|
|
|
@@ -8518,11 +8167,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8518
8167
|
await raceSpawnError(child);
|
|
8519
8168
|
return true;
|
|
8520
8169
|
}
|
|
8521
|
-
const aliveFile =
|
|
8170
|
+
const aliveFile = path24.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
8522
8171
|
await removeIfExists(aliveFile);
|
|
8523
|
-
const githubAgentsDir =
|
|
8172
|
+
const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
|
|
8524
8173
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
8525
|
-
const wakeupDst =
|
|
8174
|
+
const wakeupDst = path24.join(githubAgentsDir, "wakeup.md");
|
|
8526
8175
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
8527
8176
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
8528
8177
|
label: "open-workspace"
|
|
@@ -8535,7 +8184,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8535
8184
|
"chat",
|
|
8536
8185
|
"-m",
|
|
8537
8186
|
wakeupChatId,
|
|
8538
|
-
`create a file named .alive in the ${
|
|
8187
|
+
`create a file named .alive in the ${path24.basename(subagentDir)} folder`
|
|
8539
8188
|
];
|
|
8540
8189
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
8541
8190
|
await raceSpawnError(wakeupChild);
|
|
@@ -8550,10 +8199,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8550
8199
|
return true;
|
|
8551
8200
|
}
|
|
8552
8201
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
8553
|
-
const workspacePath =
|
|
8554
|
-
const messagesDir =
|
|
8202
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
8203
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
8555
8204
|
await mkdir8(messagesDir, { recursive: true });
|
|
8556
|
-
const reqFile =
|
|
8205
|
+
const reqFile = path24.join(messagesDir, `${timestamp}_req.md`);
|
|
8557
8206
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
8558
8207
|
const reqUri = pathToFileUri2(reqFile);
|
|
8559
8208
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -8561,16 +8210,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8561
8210
|
chatArgs.push("-a", attachment);
|
|
8562
8211
|
}
|
|
8563
8212
|
chatArgs.push("-a", reqFile);
|
|
8564
|
-
chatArgs.push(`Follow instructions in [${
|
|
8213
|
+
chatArgs.push(`Follow instructions in [${path24.basename(reqFile)}](${reqUri})`);
|
|
8565
8214
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8566
8215
|
workspacePath,
|
|
8567
|
-
|
|
8216
|
+
path24.basename(subagentDir),
|
|
8568
8217
|
subagentDir,
|
|
8569
8218
|
vscodeCmd
|
|
8570
8219
|
);
|
|
8571
8220
|
if (!workspaceReady) {
|
|
8572
8221
|
throw new Error(
|
|
8573
|
-
`VS Code workspace '${
|
|
8222
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8574
8223
|
);
|
|
8575
8224
|
}
|
|
8576
8225
|
await sleep2(500);
|
|
@@ -8578,8 +8227,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8578
8227
|
await raceSpawnError(child);
|
|
8579
8228
|
}
|
|
8580
8229
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
8581
|
-
const workspacePath =
|
|
8582
|
-
const messagesDir =
|
|
8230
|
+
const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
|
|
8231
|
+
const messagesDir = path24.join(subagentDir, "messages");
|
|
8583
8232
|
await mkdir8(messagesDir, { recursive: true });
|
|
8584
8233
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
8585
8234
|
for (const attachment of attachmentPaths) {
|
|
@@ -8588,13 +8237,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8588
8237
|
chatArgs.push(chatInstruction);
|
|
8589
8238
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8590
8239
|
workspacePath,
|
|
8591
|
-
|
|
8240
|
+
path24.basename(subagentDir),
|
|
8592
8241
|
subagentDir,
|
|
8593
8242
|
vscodeCmd
|
|
8594
8243
|
);
|
|
8595
8244
|
if (!workspaceReady) {
|
|
8596
8245
|
throw new Error(
|
|
8597
|
-
`VS Code workspace '${
|
|
8246
|
+
`VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8598
8247
|
);
|
|
8599
8248
|
}
|
|
8600
8249
|
await sleep2(500);
|
|
@@ -8603,11 +8252,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8603
8252
|
}
|
|
8604
8253
|
|
|
8605
8254
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
8606
|
-
import { copyFile, mkdir as mkdir9, readFile as
|
|
8607
|
-
import
|
|
8255
|
+
import { copyFile, mkdir as mkdir9, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
8256
|
+
import path26 from "node:path";
|
|
8608
8257
|
|
|
8609
8258
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
8610
|
-
import
|
|
8259
|
+
import path25 from "node:path";
|
|
8611
8260
|
import JSON5 from "json5";
|
|
8612
8261
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
8613
8262
|
let workspace;
|
|
@@ -8624,10 +8273,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8624
8273
|
}
|
|
8625
8274
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
8626
8275
|
const folderPath = folder.path;
|
|
8627
|
-
if (
|
|
8276
|
+
if (path25.isAbsolute(folderPath)) {
|
|
8628
8277
|
return folder;
|
|
8629
8278
|
}
|
|
8630
|
-
const absolutePath =
|
|
8279
|
+
const absolutePath = path25.resolve(templateDir, folderPath);
|
|
8631
8280
|
return {
|
|
8632
8281
|
...folder,
|
|
8633
8282
|
path: absolutePath
|
|
@@ -8649,19 +8298,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8649
8298
|
if (locationMap && typeof locationMap === "object") {
|
|
8650
8299
|
const transformedMap = {};
|
|
8651
8300
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
8652
|
-
const isAbsolute =
|
|
8301
|
+
const isAbsolute = path25.isAbsolute(locationPath);
|
|
8653
8302
|
if (isAbsolute) {
|
|
8654
8303
|
transformedMap[locationPath] = value;
|
|
8655
8304
|
} else {
|
|
8656
8305
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
8657
8306
|
if (firstGlobIndex === -1) {
|
|
8658
|
-
const resolvedPath =
|
|
8307
|
+
const resolvedPath = path25.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
8659
8308
|
transformedMap[resolvedPath] = value;
|
|
8660
8309
|
} else {
|
|
8661
8310
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
8662
8311
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
8663
8312
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
8664
|
-
const resolvedPath = (
|
|
8313
|
+
const resolvedPath = (path25.resolve(templateDir, basePath) + patternPath).replace(
|
|
8665
8314
|
/\\/g,
|
|
8666
8315
|
"/"
|
|
8667
8316
|
);
|
|
@@ -8702,7 +8351,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8702
8351
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
8703
8352
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
8704
8353
|
for (const subagent of subagents) {
|
|
8705
|
-
const lockFile =
|
|
8354
|
+
const lockFile = path26.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
8706
8355
|
if (!await pathExists(lockFile)) {
|
|
8707
8356
|
return subagent.absolutePath;
|
|
8708
8357
|
}
|
|
@@ -8712,7 +8361,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8712
8361
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
8713
8362
|
let workspaceContent;
|
|
8714
8363
|
if (workspaceTemplate) {
|
|
8715
|
-
const workspaceSrc =
|
|
8364
|
+
const workspaceSrc = path26.resolve(workspaceTemplate);
|
|
8716
8365
|
if (!await pathExists(workspaceSrc)) {
|
|
8717
8366
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
8718
8367
|
}
|
|
@@ -8720,18 +8369,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8720
8369
|
if (!stats.isFile()) {
|
|
8721
8370
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
8722
8371
|
}
|
|
8723
|
-
const templateText = await
|
|
8372
|
+
const templateText = await readFile8(workspaceSrc, "utf8");
|
|
8724
8373
|
workspaceContent = JSON.parse(templateText);
|
|
8725
8374
|
} else {
|
|
8726
8375
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
8727
8376
|
}
|
|
8728
|
-
const workspaceName = `${
|
|
8729
|
-
const workspaceDst =
|
|
8730
|
-
const templateDir = workspaceTemplate ?
|
|
8377
|
+
const workspaceName = `${path26.basename(subagentDir)}.code-workspace`;
|
|
8378
|
+
const workspaceDst = path26.join(subagentDir, workspaceName);
|
|
8379
|
+
const templateDir = workspaceTemplate ? path26.dirname(path26.resolve(workspaceTemplate)) : subagentDir;
|
|
8731
8380
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
8732
8381
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
8733
8382
|
if (cwd) {
|
|
8734
|
-
const absCwd =
|
|
8383
|
+
const absCwd = path26.resolve(cwd);
|
|
8735
8384
|
const parsed = JSON.parse(transformedContent);
|
|
8736
8385
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
8737
8386
|
if (!alreadyPresent) {
|
|
@@ -8740,35 +8389,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8740
8389
|
}
|
|
8741
8390
|
}
|
|
8742
8391
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
8743
|
-
const messagesDir =
|
|
8392
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
8744
8393
|
await mkdir9(messagesDir, { recursive: true });
|
|
8745
8394
|
return { workspace: workspaceDst, messagesDir };
|
|
8746
8395
|
}
|
|
8747
8396
|
async function createSubagentLock(subagentDir) {
|
|
8748
|
-
const messagesDir =
|
|
8397
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
8749
8398
|
if (await pathExists(messagesDir)) {
|
|
8750
8399
|
const files = await readdir2(messagesDir);
|
|
8751
8400
|
await Promise.all(
|
|
8752
8401
|
files.map(async (file) => {
|
|
8753
|
-
const target =
|
|
8402
|
+
const target = path26.join(messagesDir, file);
|
|
8754
8403
|
await removeIfExists(target);
|
|
8755
8404
|
})
|
|
8756
8405
|
);
|
|
8757
8406
|
}
|
|
8758
|
-
const githubAgentsDir =
|
|
8407
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
8759
8408
|
if (await pathExists(githubAgentsDir)) {
|
|
8760
8409
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
8761
8410
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
8762
8411
|
await Promise.all(
|
|
8763
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
8412
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path26.join(githubAgentsDir, file)))
|
|
8764
8413
|
);
|
|
8765
8414
|
}
|
|
8766
|
-
const lockFile =
|
|
8415
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8767
8416
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
8768
8417
|
return lockFile;
|
|
8769
8418
|
}
|
|
8770
8419
|
async function removeSubagentLock(subagentDir) {
|
|
8771
|
-
const lockFile =
|
|
8420
|
+
const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8772
8421
|
await removeIfExists(lockFile);
|
|
8773
8422
|
}
|
|
8774
8423
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -8788,9 +8437,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
8788
8437
|
return 1;
|
|
8789
8438
|
}
|
|
8790
8439
|
if (promptFile) {
|
|
8791
|
-
const githubAgentsDir =
|
|
8440
|
+
const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
|
|
8792
8441
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
8793
|
-
const agentFile =
|
|
8442
|
+
const agentFile = path26.join(githubAgentsDir, `${chatId}.md`);
|
|
8794
8443
|
try {
|
|
8795
8444
|
await copyFile(promptFile, agentFile);
|
|
8796
8445
|
} catch (error) {
|
|
@@ -8809,7 +8458,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
8809
8458
|
if (!promptFile) {
|
|
8810
8459
|
return void 0;
|
|
8811
8460
|
}
|
|
8812
|
-
const resolvedPrompt =
|
|
8461
|
+
const resolvedPrompt = path27.resolve(promptFile);
|
|
8813
8462
|
if (!await pathExists(resolvedPrompt)) {
|
|
8814
8463
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
8815
8464
|
}
|
|
@@ -8825,7 +8474,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
8825
8474
|
}
|
|
8826
8475
|
const resolved = [];
|
|
8827
8476
|
for (const attachment of extraAttachments) {
|
|
8828
|
-
const resolvedPath =
|
|
8477
|
+
const resolvedPath = path27.resolve(attachment);
|
|
8829
8478
|
if (!await pathExists(resolvedPath)) {
|
|
8830
8479
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
8831
8480
|
}
|
|
@@ -8867,7 +8516,7 @@ async function dispatchAgentSession(options) {
|
|
|
8867
8516
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8868
8517
|
};
|
|
8869
8518
|
}
|
|
8870
|
-
const subagentName =
|
|
8519
|
+
const subagentName = path27.basename(subagentDir);
|
|
8871
8520
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8872
8521
|
const preparationResult = await prepareSubagentDirectory(
|
|
8873
8522
|
subagentDir,
|
|
@@ -8895,9 +8544,9 @@ async function dispatchAgentSession(options) {
|
|
|
8895
8544
|
};
|
|
8896
8545
|
}
|
|
8897
8546
|
const timestamp = generateTimestamp();
|
|
8898
|
-
const messagesDir =
|
|
8899
|
-
const responseFileTmp =
|
|
8900
|
-
const responseFileFinal =
|
|
8547
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8548
|
+
const responseFileTmp = path27.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
8549
|
+
const responseFileFinal = path27.join(messagesDir, `${timestamp}_res.md`);
|
|
8901
8550
|
const requestInstructions = createRequestPrompt(
|
|
8902
8551
|
userQuery,
|
|
8903
8552
|
responseFileTmp,
|
|
@@ -9002,7 +8651,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9002
8651
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
9003
8652
|
};
|
|
9004
8653
|
}
|
|
9005
|
-
subagentName =
|
|
8654
|
+
subagentName = path27.basename(subagentDir);
|
|
9006
8655
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
9007
8656
|
const preparationResult = await prepareSubagentDirectory(
|
|
9008
8657
|
subagentDir,
|
|
@@ -9033,17 +8682,17 @@ async function dispatchBatchAgent(options) {
|
|
|
9033
8682
|
};
|
|
9034
8683
|
}
|
|
9035
8684
|
const timestamp = generateTimestamp();
|
|
9036
|
-
const messagesDir =
|
|
8685
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
9037
8686
|
requestFiles = userQueries.map(
|
|
9038
|
-
(_, index) =>
|
|
8687
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
9039
8688
|
);
|
|
9040
8689
|
const responseTmpFiles = userQueries.map(
|
|
9041
|
-
(_, index) =>
|
|
8690
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
9042
8691
|
);
|
|
9043
8692
|
responseFilesFinal = userQueries.map(
|
|
9044
|
-
(_, index) =>
|
|
8693
|
+
(_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
9045
8694
|
);
|
|
9046
|
-
const orchestratorFile =
|
|
8695
|
+
const orchestratorFile = path27.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
9047
8696
|
if (!dryRun) {
|
|
9048
8697
|
await Promise.all(
|
|
9049
8698
|
userQueries.map((query, index) => {
|
|
@@ -9129,7 +8778,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9129
8778
|
|
|
9130
8779
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
9131
8780
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
9132
|
-
import
|
|
8781
|
+
import path28 from "node:path";
|
|
9133
8782
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
9134
8783
|
folders: [
|
|
9135
8784
|
{
|
|
@@ -9160,7 +8809,7 @@ async function provisionSubagents(options) {
|
|
|
9160
8809
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
9161
8810
|
throw new Error("subagents must be a positive integer");
|
|
9162
8811
|
}
|
|
9163
|
-
const targetPath =
|
|
8812
|
+
const targetPath = path28.resolve(targetRoot);
|
|
9164
8813
|
if (!dryRun) {
|
|
9165
8814
|
await ensureDir(targetPath);
|
|
9166
8815
|
}
|
|
@@ -9180,7 +8829,7 @@ async function provisionSubagents(options) {
|
|
|
9180
8829
|
continue;
|
|
9181
8830
|
}
|
|
9182
8831
|
highestNumber = Math.max(highestNumber, parsed);
|
|
9183
|
-
const lockFile =
|
|
8832
|
+
const lockFile = path28.join(entry.absolutePath, lockName);
|
|
9184
8833
|
const locked = await pathExists(lockFile);
|
|
9185
8834
|
if (locked) {
|
|
9186
8835
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -9197,10 +8846,10 @@ async function provisionSubagents(options) {
|
|
|
9197
8846
|
break;
|
|
9198
8847
|
}
|
|
9199
8848
|
const subagentDir = subagent.absolutePath;
|
|
9200
|
-
const githubAgentsDir =
|
|
9201
|
-
const lockFile =
|
|
9202
|
-
const workspaceDst =
|
|
9203
|
-
const wakeupDst =
|
|
8849
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
8850
|
+
const lockFile = path28.join(subagentDir, lockName);
|
|
8851
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
8852
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
9204
8853
|
const isLocked = await pathExists(lockFile);
|
|
9205
8854
|
if (isLocked && !force) {
|
|
9206
8855
|
continue;
|
|
@@ -9238,10 +8887,10 @@ async function provisionSubagents(options) {
|
|
|
9238
8887
|
let nextIndex = highestNumber;
|
|
9239
8888
|
while (subagentsProvisioned < subagents) {
|
|
9240
8889
|
nextIndex += 1;
|
|
9241
|
-
const subagentDir =
|
|
9242
|
-
const githubAgentsDir =
|
|
9243
|
-
const workspaceDst =
|
|
9244
|
-
const wakeupDst =
|
|
8890
|
+
const subagentDir = path28.join(targetPath, `subagent-${nextIndex}`);
|
|
8891
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
8892
|
+
const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
8893
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
9245
8894
|
if (!dryRun) {
|
|
9246
8895
|
await ensureDir(subagentDir);
|
|
9247
8896
|
await ensureDir(githubAgentsDir);
|
|
@@ -9308,7 +8957,7 @@ var VSCodeProvider = class {
|
|
|
9308
8957
|
}
|
|
9309
8958
|
await this.ensureEnvironmentReady();
|
|
9310
8959
|
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
9311
|
-
const promptContent = buildPromptDocument2(request, inputFiles
|
|
8960
|
+
const promptContent = buildPromptDocument2(request, inputFiles);
|
|
9312
8961
|
const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
|
|
9313
8962
|
const startTime = Date.now();
|
|
9314
8963
|
const session = await dispatchAgentSession({
|
|
@@ -9362,7 +9011,7 @@ var VSCodeProvider = class {
|
|
|
9362
9011
|
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
9363
9012
|
);
|
|
9364
9013
|
const userQueries = normalizedRequests.map(
|
|
9365
|
-
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles
|
|
9014
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
|
|
9366
9015
|
);
|
|
9367
9016
|
const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
|
|
9368
9017
|
this.config.workspaceTemplate
|
|
@@ -9431,7 +9080,7 @@ var VSCodeProvider = class {
|
|
|
9431
9080
|
async function locateVSCodeExecutable(candidate) {
|
|
9432
9081
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
9433
9082
|
if (includesPathSeparator) {
|
|
9434
|
-
const resolved =
|
|
9083
|
+
const resolved = path29.isAbsolute(candidate) ? candidate : path29.resolve(candidate);
|
|
9435
9084
|
try {
|
|
9436
9085
|
await access3(resolved, constants3.F_OK);
|
|
9437
9086
|
return resolved;
|
|
@@ -9460,41 +9109,35 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
9460
9109
|
return void 0;
|
|
9461
9110
|
}
|
|
9462
9111
|
try {
|
|
9463
|
-
const stats = await stat4(
|
|
9112
|
+
const stats = await stat4(path29.resolve(template));
|
|
9464
9113
|
return stats.isFile() ? template : void 0;
|
|
9465
9114
|
} catch {
|
|
9466
9115
|
return template;
|
|
9467
9116
|
}
|
|
9468
9117
|
}
|
|
9469
|
-
function buildPromptDocument2(request, attachments
|
|
9118
|
+
function buildPromptDocument2(request, attachments) {
|
|
9470
9119
|
const parts = [];
|
|
9471
9120
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
9472
9121
|
parts.push(request.systemPrompt.trim());
|
|
9473
9122
|
}
|
|
9474
|
-
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
9475
9123
|
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
9476
|
-
const
|
|
9477
|
-
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
9124
|
+
const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
|
|
9478
9125
|
if (prereadBlock.length > 0) {
|
|
9479
9126
|
parts.push("\n", prereadBlock);
|
|
9480
9127
|
}
|
|
9481
9128
|
parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
|
|
9482
9129
|
return parts.join("\n").trim();
|
|
9483
9130
|
}
|
|
9484
|
-
function buildMandatoryPrereadBlock2(
|
|
9485
|
-
if (
|
|
9131
|
+
function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
9132
|
+
if (attachmentFiles.length === 0) {
|
|
9486
9133
|
return "";
|
|
9487
9134
|
}
|
|
9488
9135
|
const buildList = (files) => files.map((absolutePath) => {
|
|
9489
|
-
const fileName =
|
|
9136
|
+
const fileName = path29.basename(absolutePath);
|
|
9490
9137
|
const fileUri = pathToFileUri3(absolutePath);
|
|
9491
9138
|
return `* [${fileName}](${fileUri})`;
|
|
9492
9139
|
});
|
|
9493
9140
|
const sections = [];
|
|
9494
|
-
if (guidelineFiles.length > 0) {
|
|
9495
|
-
sections.push(`Read all guideline files:
|
|
9496
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
9497
|
-
}
|
|
9498
9141
|
if (attachmentFiles.length > 0) {
|
|
9499
9142
|
sections.push(`Read all attachment files:
|
|
9500
9143
|
${buildList(attachmentFiles).join("\n")}.`);
|
|
@@ -9505,29 +9148,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
|
|
|
9505
9148
|
);
|
|
9506
9149
|
return sections.join("\n");
|
|
9507
9150
|
}
|
|
9508
|
-
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
9509
|
-
if (!attachments || attachments.length === 0) {
|
|
9510
|
-
return [];
|
|
9511
|
-
}
|
|
9512
|
-
const unique = /* @__PURE__ */ new Map();
|
|
9513
|
-
for (const attachment of attachments) {
|
|
9514
|
-
const absolutePath = path31.resolve(attachment);
|
|
9515
|
-
const normalized = absolutePath.split(path31.sep).join("/");
|
|
9516
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
9517
|
-
if (!unique.has(absolutePath)) {
|
|
9518
|
-
unique.set(absolutePath, absolutePath);
|
|
9519
|
-
}
|
|
9520
|
-
}
|
|
9521
|
-
}
|
|
9522
|
-
return Array.from(unique.values());
|
|
9523
|
-
}
|
|
9524
9151
|
function collectAttachmentFiles(attachments) {
|
|
9525
9152
|
if (!attachments || attachments.length === 0) {
|
|
9526
9153
|
return [];
|
|
9527
9154
|
}
|
|
9528
9155
|
const unique = /* @__PURE__ */ new Map();
|
|
9529
9156
|
for (const attachment of attachments) {
|
|
9530
|
-
const absolutePath =
|
|
9157
|
+
const absolutePath = path29.resolve(attachment);
|
|
9531
9158
|
if (!unique.has(absolutePath)) {
|
|
9532
9159
|
unique.set(absolutePath, absolutePath);
|
|
9533
9160
|
}
|
|
@@ -9535,7 +9162,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9535
9162
|
return Array.from(unique.values());
|
|
9536
9163
|
}
|
|
9537
9164
|
function pathToFileUri3(filePath) {
|
|
9538
|
-
const absolutePath =
|
|
9165
|
+
const absolutePath = path29.isAbsolute(filePath) ? filePath : path29.resolve(filePath);
|
|
9539
9166
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9540
9167
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9541
9168
|
return `file:///${normalizedPath}`;
|
|
@@ -9548,7 +9175,7 @@ function normalizeAttachments(attachments) {
|
|
|
9548
9175
|
}
|
|
9549
9176
|
const deduped = /* @__PURE__ */ new Set();
|
|
9550
9177
|
for (const attachment of attachments) {
|
|
9551
|
-
deduped.add(
|
|
9178
|
+
deduped.add(path29.resolve(attachment));
|
|
9552
9179
|
}
|
|
9553
9180
|
return Array.from(deduped);
|
|
9554
9181
|
}
|
|
@@ -9557,7 +9184,7 @@ function mergeAttachments(all) {
|
|
|
9557
9184
|
for (const list of all) {
|
|
9558
9185
|
if (!list) continue;
|
|
9559
9186
|
for (const inputFile of list) {
|
|
9560
|
-
deduped.add(
|
|
9187
|
+
deduped.add(path29.resolve(inputFile));
|
|
9561
9188
|
}
|
|
9562
9189
|
}
|
|
9563
9190
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -9605,8 +9232,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
9605
9232
|
|
|
9606
9233
|
// src/evaluation/providers/targets-file.ts
|
|
9607
9234
|
import { constants as constants4 } from "node:fs";
|
|
9608
|
-
import { access as access4, readFile as
|
|
9609
|
-
import
|
|
9235
|
+
import { access as access4, readFile as readFile9 } from "node:fs/promises";
|
|
9236
|
+
import path30 from "node:path";
|
|
9610
9237
|
import { parse as parse4 } from "yaml";
|
|
9611
9238
|
function isRecord(value) {
|
|
9612
9239
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -9643,11 +9270,11 @@ async function fileExists3(filePath) {
|
|
|
9643
9270
|
}
|
|
9644
9271
|
}
|
|
9645
9272
|
async function readTargetDefinitions(filePath) {
|
|
9646
|
-
const absolutePath =
|
|
9273
|
+
const absolutePath = path30.resolve(filePath);
|
|
9647
9274
|
if (!await fileExists3(absolutePath)) {
|
|
9648
9275
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
9649
9276
|
}
|
|
9650
|
-
const raw = await
|
|
9277
|
+
const raw = await readFile9(absolutePath, "utf8");
|
|
9651
9278
|
const parsed = parse4(raw);
|
|
9652
9279
|
if (!isRecord(parsed)) {
|
|
9653
9280
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -9663,21 +9290,21 @@ function listTargetNames(definitions) {
|
|
|
9663
9290
|
}
|
|
9664
9291
|
|
|
9665
9292
|
// src/evaluation/providers/provider-discovery.ts
|
|
9666
|
-
import
|
|
9667
|
-
import
|
|
9293
|
+
import path31 from "node:path";
|
|
9294
|
+
import fg from "fast-glob";
|
|
9668
9295
|
async function discoverProviders(registry, baseDir) {
|
|
9669
9296
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
9670
9297
|
const candidateDirs = [];
|
|
9671
|
-
let dir =
|
|
9672
|
-
const root =
|
|
9298
|
+
let dir = path31.resolve(baseDir);
|
|
9299
|
+
const root = path31.parse(dir).root;
|
|
9673
9300
|
while (dir !== root) {
|
|
9674
|
-
candidateDirs.push(
|
|
9675
|
-
dir =
|
|
9301
|
+
candidateDirs.push(path31.join(dir, ".agentv", "providers"));
|
|
9302
|
+
dir = path31.dirname(dir);
|
|
9676
9303
|
}
|
|
9677
9304
|
let files = [];
|
|
9678
9305
|
for (const providersDir of candidateDirs) {
|
|
9679
9306
|
try {
|
|
9680
|
-
const found = await
|
|
9307
|
+
const found = await fg(patterns, {
|
|
9681
9308
|
cwd: providersDir,
|
|
9682
9309
|
absolute: true,
|
|
9683
9310
|
onlyFiles: true
|
|
@@ -9688,7 +9315,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9688
9315
|
}
|
|
9689
9316
|
const discoveredKinds = [];
|
|
9690
9317
|
for (const filePath of files) {
|
|
9691
|
-
const basename =
|
|
9318
|
+
const basename = path31.basename(filePath);
|
|
9692
9319
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
9693
9320
|
if (registry.has(kindName)) {
|
|
9694
9321
|
continue;
|
|
@@ -9897,15 +9524,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
9897
9524
|
});
|
|
9898
9525
|
}
|
|
9899
9526
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
9900
|
-
const { mkdir: mkdir15, readFile:
|
|
9527
|
+
const { mkdir: mkdir15, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
9901
9528
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
9902
|
-
const
|
|
9529
|
+
const path44 = await import("node:path");
|
|
9903
9530
|
const { randomUUID: randomUUID9 } = await import("node:crypto");
|
|
9904
|
-
const dir =
|
|
9531
|
+
const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
9905
9532
|
await mkdir15(dir, { recursive: true });
|
|
9906
|
-
const stdinPath =
|
|
9907
|
-
const stdoutPath =
|
|
9908
|
-
const stderrPath =
|
|
9533
|
+
const stdinPath = path44.join(dir, "stdin.txt");
|
|
9534
|
+
const stdoutPath = path44.join(dir, "stdout.txt");
|
|
9535
|
+
const stderrPath = path44.join(dir, "stderr.txt");
|
|
9909
9536
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
9910
9537
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
9911
9538
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -9935,8 +9562,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
9935
9562
|
resolve(code ?? 0);
|
|
9936
9563
|
});
|
|
9937
9564
|
});
|
|
9938
|
-
const stdout = (await
|
|
9939
|
-
const stderr = (await
|
|
9565
|
+
const stdout = (await readFile12(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9566
|
+
const stderr = (await readFile12(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9940
9567
|
return { stdout, stderr, exitCode };
|
|
9941
9568
|
} finally {
|
|
9942
9569
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -10253,10 +9880,7 @@ var CodeEvaluator = class {
|
|
|
10253
9880
|
outputText: context.candidate,
|
|
10254
9881
|
output: outputForPayload,
|
|
10255
9882
|
outputPath,
|
|
10256
|
-
|
|
10257
|
-
inputFiles: context.evalCase.file_paths.filter(
|
|
10258
|
-
(path46) => !context.evalCase.guideline_paths.includes(path46)
|
|
10259
|
-
),
|
|
9883
|
+
inputFiles: context.evalCase.file_paths,
|
|
10260
9884
|
input: context.evalCase.input,
|
|
10261
9885
|
trace: context.trace ?? null,
|
|
10262
9886
|
tokenUsage: context.tokenUsage ?? null,
|
|
@@ -10387,7 +10011,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
10387
10011
|
|
|
10388
10012
|
// src/evaluation/evaluators/llm-grader.ts
|
|
10389
10013
|
import fs2 from "node:fs/promises";
|
|
10390
|
-
import
|
|
10014
|
+
import path32 from "node:path";
|
|
10391
10015
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
10392
10016
|
import { z as z3 } from "zod";
|
|
10393
10017
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -11219,8 +10843,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11219
10843
|
};
|
|
11220
10844
|
}
|
|
11221
10845
|
function resolveSandboxed(basePath, relativePath) {
|
|
11222
|
-
const resolved =
|
|
11223
|
-
if (!resolved.startsWith(basePath +
|
|
10846
|
+
const resolved = path32.resolve(basePath, relativePath);
|
|
10847
|
+
if (!resolved.startsWith(basePath + path32.sep) && resolved !== basePath) {
|
|
11224
10848
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
11225
10849
|
}
|
|
11226
10850
|
return resolved;
|
|
@@ -11310,11 +10934,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11310
10934
|
for (const entry of entries) {
|
|
11311
10935
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
11312
10936
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
11313
|
-
const fullPath =
|
|
10937
|
+
const fullPath = path32.join(dirPath, entry.name);
|
|
11314
10938
|
if (entry.isDirectory()) {
|
|
11315
10939
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
11316
10940
|
} else if (entry.isFile()) {
|
|
11317
|
-
const ext =
|
|
10941
|
+
const ext = path32.extname(entry.name).toLowerCase();
|
|
11318
10942
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
11319
10943
|
try {
|
|
11320
10944
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -11326,7 +10950,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11326
10950
|
regex.lastIndex = 0;
|
|
11327
10951
|
if (regex.test(lines[i])) {
|
|
11328
10952
|
matches.push({
|
|
11329
|
-
file:
|
|
10953
|
+
file: path32.relative(workspacePath, fullPath),
|
|
11330
10954
|
line: i + 1,
|
|
11331
10955
|
text: lines[i].substring(0, 200)
|
|
11332
10956
|
});
|
|
@@ -11961,115 +11585,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
11961
11585
|
* Evaluate a single field against the expected value.
|
|
11962
11586
|
*/
|
|
11963
11587
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
11964
|
-
const { path:
|
|
11965
|
-
const candidateValue = resolvePath(candidateData,
|
|
11966
|
-
const expectedValue = resolvePath(expectedData,
|
|
11588
|
+
const { path: path44, match, required = true, weight = 1 } = fieldConfig;
|
|
11589
|
+
const candidateValue = resolvePath(candidateData, path44);
|
|
11590
|
+
const expectedValue = resolvePath(expectedData, path44);
|
|
11967
11591
|
if (expectedValue === void 0) {
|
|
11968
11592
|
return {
|
|
11969
|
-
path:
|
|
11593
|
+
path: path44,
|
|
11970
11594
|
score: 1,
|
|
11971
11595
|
// No expected value means no comparison needed
|
|
11972
11596
|
weight,
|
|
11973
11597
|
hit: true,
|
|
11974
|
-
message: `${
|
|
11598
|
+
message: `${path44}: no expected value`
|
|
11975
11599
|
};
|
|
11976
11600
|
}
|
|
11977
11601
|
if (candidateValue === void 0) {
|
|
11978
11602
|
if (required) {
|
|
11979
11603
|
return {
|
|
11980
|
-
path:
|
|
11604
|
+
path: path44,
|
|
11981
11605
|
score: 0,
|
|
11982
11606
|
weight,
|
|
11983
11607
|
hit: false,
|
|
11984
|
-
message: `${
|
|
11608
|
+
message: `${path44} (required, missing)`
|
|
11985
11609
|
};
|
|
11986
11610
|
}
|
|
11987
11611
|
return {
|
|
11988
|
-
path:
|
|
11612
|
+
path: path44,
|
|
11989
11613
|
score: 1,
|
|
11990
11614
|
// Don't penalize missing optional fields
|
|
11991
11615
|
weight: 0,
|
|
11992
11616
|
// Zero weight means it won't affect the score
|
|
11993
11617
|
hit: true,
|
|
11994
|
-
message: `${
|
|
11618
|
+
message: `${path44}: optional field missing`
|
|
11995
11619
|
};
|
|
11996
11620
|
}
|
|
11997
11621
|
switch (match) {
|
|
11998
11622
|
case "exact":
|
|
11999
|
-
return this.compareExact(
|
|
11623
|
+
return this.compareExact(path44, candidateValue, expectedValue, weight);
|
|
12000
11624
|
case "numeric_tolerance":
|
|
12001
11625
|
return this.compareNumericTolerance(
|
|
12002
|
-
|
|
11626
|
+
path44,
|
|
12003
11627
|
candidateValue,
|
|
12004
11628
|
expectedValue,
|
|
12005
11629
|
fieldConfig,
|
|
12006
11630
|
weight
|
|
12007
11631
|
);
|
|
12008
11632
|
case "date":
|
|
12009
|
-
return this.compareDate(
|
|
11633
|
+
return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
|
|
12010
11634
|
default:
|
|
12011
11635
|
return {
|
|
12012
|
-
path:
|
|
11636
|
+
path: path44,
|
|
12013
11637
|
score: 0,
|
|
12014
11638
|
weight,
|
|
12015
11639
|
hit: false,
|
|
12016
|
-
message: `${
|
|
11640
|
+
message: `${path44}: unknown match type "${match}"`
|
|
12017
11641
|
};
|
|
12018
11642
|
}
|
|
12019
11643
|
}
|
|
12020
11644
|
/**
|
|
12021
11645
|
* Exact equality comparison.
|
|
12022
11646
|
*/
|
|
12023
|
-
compareExact(
|
|
11647
|
+
compareExact(path44, candidateValue, expectedValue, weight) {
|
|
12024
11648
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
12025
11649
|
return {
|
|
12026
|
-
path:
|
|
11650
|
+
path: path44,
|
|
12027
11651
|
score: 1,
|
|
12028
11652
|
weight,
|
|
12029
11653
|
hit: true,
|
|
12030
|
-
message:
|
|
11654
|
+
message: path44
|
|
12031
11655
|
};
|
|
12032
11656
|
}
|
|
12033
11657
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
12034
11658
|
return {
|
|
12035
|
-
path:
|
|
11659
|
+
path: path44,
|
|
12036
11660
|
score: 0,
|
|
12037
11661
|
weight,
|
|
12038
11662
|
hit: false,
|
|
12039
|
-
message: `${
|
|
11663
|
+
message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
12040
11664
|
};
|
|
12041
11665
|
}
|
|
12042
11666
|
return {
|
|
12043
|
-
path:
|
|
11667
|
+
path: path44,
|
|
12044
11668
|
score: 0,
|
|
12045
11669
|
weight,
|
|
12046
11670
|
hit: false,
|
|
12047
|
-
message: `${
|
|
11671
|
+
message: `${path44} (value mismatch)`
|
|
12048
11672
|
};
|
|
12049
11673
|
}
|
|
12050
11674
|
/**
|
|
12051
11675
|
* Numeric comparison with absolute or relative tolerance.
|
|
12052
11676
|
*/
|
|
12053
|
-
compareNumericTolerance(
|
|
11677
|
+
compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12054
11678
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
12055
11679
|
const candidateNum = toNumber(candidateValue);
|
|
12056
11680
|
const expectedNum = toNumber(expectedValue);
|
|
12057
11681
|
if (candidateNum === null || expectedNum === null) {
|
|
12058
11682
|
return {
|
|
12059
|
-
path:
|
|
11683
|
+
path: path44,
|
|
12060
11684
|
score: 0,
|
|
12061
11685
|
weight,
|
|
12062
11686
|
hit: false,
|
|
12063
|
-
message: `${
|
|
11687
|
+
message: `${path44} (non-numeric value)`
|
|
12064
11688
|
};
|
|
12065
11689
|
}
|
|
12066
11690
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
12067
11691
|
return {
|
|
12068
|
-
path:
|
|
11692
|
+
path: path44,
|
|
12069
11693
|
score: 0,
|
|
12070
11694
|
weight,
|
|
12071
11695
|
hit: false,
|
|
12072
|
-
message: `${
|
|
11696
|
+
message: `${path44} (invalid numeric value)`
|
|
12073
11697
|
};
|
|
12074
11698
|
}
|
|
12075
11699
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -12082,61 +11706,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
12082
11706
|
}
|
|
12083
11707
|
if (withinTolerance) {
|
|
12084
11708
|
return {
|
|
12085
|
-
path:
|
|
11709
|
+
path: path44,
|
|
12086
11710
|
score: 1,
|
|
12087
11711
|
weight,
|
|
12088
11712
|
hit: true,
|
|
12089
|
-
message: `${
|
|
11713
|
+
message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
|
|
12090
11714
|
};
|
|
12091
11715
|
}
|
|
12092
11716
|
return {
|
|
12093
|
-
path:
|
|
11717
|
+
path: path44,
|
|
12094
11718
|
score: 0,
|
|
12095
11719
|
weight,
|
|
12096
11720
|
hit: false,
|
|
12097
|
-
message: `${
|
|
11721
|
+
message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
12098
11722
|
};
|
|
12099
11723
|
}
|
|
12100
11724
|
/**
|
|
12101
11725
|
* Date comparison with format normalization.
|
|
12102
11726
|
*/
|
|
12103
|
-
compareDate(
|
|
11727
|
+
compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12104
11728
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
12105
11729
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
12106
11730
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
12107
11731
|
if (candidateDate === null) {
|
|
12108
11732
|
return {
|
|
12109
|
-
path:
|
|
11733
|
+
path: path44,
|
|
12110
11734
|
score: 0,
|
|
12111
11735
|
weight,
|
|
12112
11736
|
hit: false,
|
|
12113
|
-
message: `${
|
|
11737
|
+
message: `${path44} (unparseable candidate date)`
|
|
12114
11738
|
};
|
|
12115
11739
|
}
|
|
12116
11740
|
if (expectedDate === null) {
|
|
12117
11741
|
return {
|
|
12118
|
-
path:
|
|
11742
|
+
path: path44,
|
|
12119
11743
|
score: 0,
|
|
12120
11744
|
weight,
|
|
12121
11745
|
hit: false,
|
|
12122
|
-
message: `${
|
|
11746
|
+
message: `${path44} (unparseable expected date)`
|
|
12123
11747
|
};
|
|
12124
11748
|
}
|
|
12125
11749
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
12126
11750
|
return {
|
|
12127
|
-
path:
|
|
11751
|
+
path: path44,
|
|
12128
11752
|
score: 1,
|
|
12129
11753
|
weight,
|
|
12130
11754
|
hit: true,
|
|
12131
|
-
message:
|
|
11755
|
+
message: path44
|
|
12132
11756
|
};
|
|
12133
11757
|
}
|
|
12134
11758
|
return {
|
|
12135
|
-
path:
|
|
11759
|
+
path: path44,
|
|
12136
11760
|
score: 0,
|
|
12137
11761
|
weight,
|
|
12138
11762
|
hit: false,
|
|
12139
|
-
message: `${
|
|
11763
|
+
message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12140
11764
|
};
|
|
12141
11765
|
}
|
|
12142
11766
|
/**
|
|
@@ -12169,11 +11793,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12169
11793
|
};
|
|
12170
11794
|
}
|
|
12171
11795
|
};
|
|
12172
|
-
function resolvePath(obj,
|
|
12173
|
-
if (!
|
|
11796
|
+
function resolvePath(obj, path44) {
|
|
11797
|
+
if (!path44 || !obj) {
|
|
12174
11798
|
return void 0;
|
|
12175
11799
|
}
|
|
12176
|
-
const parts =
|
|
11800
|
+
const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12177
11801
|
let current = obj;
|
|
12178
11802
|
for (const part of parts) {
|
|
12179
11803
|
if (current === null || current === void 0) {
|
|
@@ -12633,8 +12257,8 @@ var TokenUsageEvaluator = class {
|
|
|
12633
12257
|
};
|
|
12634
12258
|
|
|
12635
12259
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12636
|
-
function getNestedValue(obj,
|
|
12637
|
-
const parts =
|
|
12260
|
+
function getNestedValue(obj, path44) {
|
|
12261
|
+
const parts = path44.split(".");
|
|
12638
12262
|
let current = obj;
|
|
12639
12263
|
for (const part of parts) {
|
|
12640
12264
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -13256,8 +12880,8 @@ function runEqualsAssertion(output, value) {
|
|
|
13256
12880
|
// src/evaluation/orchestrator.ts
|
|
13257
12881
|
import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
|
|
13258
12882
|
import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
13259
|
-
import
|
|
13260
|
-
import
|
|
12883
|
+
import path41 from "node:path";
|
|
12884
|
+
import micromatch3 from "micromatch";
|
|
13261
12885
|
|
|
13262
12886
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
13263
12887
|
var Node = class {
|
|
@@ -13470,7 +13094,7 @@ var InlineAssertEvaluator = class {
|
|
|
13470
13094
|
};
|
|
13471
13095
|
|
|
13472
13096
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
13473
|
-
import
|
|
13097
|
+
import path33 from "node:path";
|
|
13474
13098
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
13475
13099
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
13476
13100
|
if (!context) {
|
|
@@ -13505,10 +13129,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13505
13129
|
expectedOutput: context.evalCase.expected_output,
|
|
13506
13130
|
outputText: context.candidate,
|
|
13507
13131
|
output: context.output ?? null,
|
|
13508
|
-
|
|
13509
|
-
inputFiles: context.evalCase.file_paths.filter(
|
|
13510
|
-
(p) => !context.evalCase.guideline_paths.includes(p)
|
|
13511
|
-
),
|
|
13132
|
+
inputFiles: context.evalCase.file_paths,
|
|
13512
13133
|
input: context.evalCase.input,
|
|
13513
13134
|
trace: context.trace ?? null,
|
|
13514
13135
|
fileChanges: context.fileChanges ?? null,
|
|
@@ -13519,7 +13140,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13519
13140
|
};
|
|
13520
13141
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13521
13142
|
const scriptPath = script[script.length - 1];
|
|
13522
|
-
const cwd =
|
|
13143
|
+
const cwd = path33.dirname(scriptPath);
|
|
13523
13144
|
try {
|
|
13524
13145
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
13525
13146
|
const prompt = stdout.trim();
|
|
@@ -13791,21 +13412,21 @@ function createBuiltinRegistry() {
|
|
|
13791
13412
|
}
|
|
13792
13413
|
|
|
13793
13414
|
// src/evaluation/registry/assertion-discovery.ts
|
|
13794
|
-
import
|
|
13795
|
-
import
|
|
13415
|
+
import path34 from "node:path";
|
|
13416
|
+
import fg2 from "fast-glob";
|
|
13796
13417
|
async function discoverAssertions(registry, baseDir) {
|
|
13797
13418
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13798
13419
|
const candidateDirs = [];
|
|
13799
|
-
let dir =
|
|
13800
|
-
const root =
|
|
13420
|
+
let dir = path34.resolve(baseDir);
|
|
13421
|
+
const root = path34.parse(dir).root;
|
|
13801
13422
|
while (dir !== root) {
|
|
13802
|
-
candidateDirs.push(
|
|
13803
|
-
dir =
|
|
13423
|
+
candidateDirs.push(path34.join(dir, ".agentv", "assertions"));
|
|
13424
|
+
dir = path34.dirname(dir);
|
|
13804
13425
|
}
|
|
13805
13426
|
let files = [];
|
|
13806
13427
|
for (const assertionsDir of candidateDirs) {
|
|
13807
13428
|
try {
|
|
13808
|
-
const found = await
|
|
13429
|
+
const found = await fg2(patterns, {
|
|
13809
13430
|
cwd: assertionsDir,
|
|
13810
13431
|
absolute: true,
|
|
13811
13432
|
onlyFiles: true
|
|
@@ -13816,7 +13437,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13816
13437
|
}
|
|
13817
13438
|
const discoveredTypes = [];
|
|
13818
13439
|
for (const filePath of files) {
|
|
13819
|
-
const basename =
|
|
13440
|
+
const basename = path34.basename(filePath);
|
|
13820
13441
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13821
13442
|
if (registry.has(typeName)) {
|
|
13822
13443
|
continue;
|
|
@@ -13834,22 +13455,22 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13834
13455
|
}
|
|
13835
13456
|
|
|
13836
13457
|
// src/evaluation/registry/grader-discovery.ts
|
|
13837
|
-
import
|
|
13838
|
-
import
|
|
13458
|
+
import path35 from "node:path";
|
|
13459
|
+
import fg3 from "fast-glob";
|
|
13839
13460
|
async function discoverGraders(registry, baseDir) {
|
|
13840
13461
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13841
13462
|
const candidateDirs = [];
|
|
13842
|
-
let dir =
|
|
13843
|
-
const root =
|
|
13463
|
+
let dir = path35.resolve(baseDir);
|
|
13464
|
+
const root = path35.parse(dir).root;
|
|
13844
13465
|
while (dir !== root) {
|
|
13845
|
-
candidateDirs.push(
|
|
13846
|
-
candidateDirs.push(
|
|
13847
|
-
dir =
|
|
13466
|
+
candidateDirs.push(path35.join(dir, ".agentv", "graders"));
|
|
13467
|
+
candidateDirs.push(path35.join(dir, ".agentv", "judges"));
|
|
13468
|
+
dir = path35.dirname(dir);
|
|
13848
13469
|
}
|
|
13849
13470
|
let files = [];
|
|
13850
13471
|
for (const gradersDir of candidateDirs) {
|
|
13851
13472
|
try {
|
|
13852
|
-
const found = await
|
|
13473
|
+
const found = await fg3(patterns, {
|
|
13853
13474
|
cwd: gradersDir,
|
|
13854
13475
|
absolute: true,
|
|
13855
13476
|
onlyFiles: true
|
|
@@ -13860,7 +13481,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
13860
13481
|
}
|
|
13861
13482
|
const discoveredTypes = [];
|
|
13862
13483
|
for (const filePath of files) {
|
|
13863
|
-
const basename =
|
|
13484
|
+
const basename = path35.basename(filePath);
|
|
13864
13485
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13865
13486
|
if (registry.has(typeName)) {
|
|
13866
13487
|
continue;
|
|
@@ -14020,7 +13641,7 @@ function getTCritical(df) {
|
|
|
14020
13641
|
// src/evaluation/workspace/file-changes.ts
|
|
14021
13642
|
import { exec as execCallback } from "node:child_process";
|
|
14022
13643
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
14023
|
-
import
|
|
13644
|
+
import path36 from "node:path";
|
|
14024
13645
|
import { promisify as promisify4 } from "node:util";
|
|
14025
13646
|
var execAsync4 = promisify4(execCallback);
|
|
14026
13647
|
function gitExecOpts(workspacePath) {
|
|
@@ -14054,10 +13675,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
14054
13675
|
}
|
|
14055
13676
|
for (const entry of entries) {
|
|
14056
13677
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
14057
|
-
const childPath =
|
|
13678
|
+
const childPath = path36.join(workspacePath, entry);
|
|
14058
13679
|
try {
|
|
14059
13680
|
if (!statSync(childPath).isDirectory()) continue;
|
|
14060
|
-
if (!statSync(
|
|
13681
|
+
if (!statSync(path36.join(childPath, ".git")).isDirectory()) continue;
|
|
14061
13682
|
} catch {
|
|
14062
13683
|
continue;
|
|
14063
13684
|
}
|
|
@@ -14068,7 +13689,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
14068
13689
|
|
|
14069
13690
|
// src/evaluation/workspace/manager.ts
|
|
14070
13691
|
import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
14071
|
-
import
|
|
13692
|
+
import path37 from "node:path";
|
|
14072
13693
|
var TemplateNotFoundError = class extends Error {
|
|
14073
13694
|
constructor(templatePath) {
|
|
14074
13695
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -14098,14 +13719,14 @@ async function isDirectory(filePath) {
|
|
|
14098
13719
|
}
|
|
14099
13720
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
14100
13721
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14101
|
-
return
|
|
13722
|
+
return path37.join(root, evalRunId, caseId);
|
|
14102
13723
|
}
|
|
14103
13724
|
async function copyDirectoryRecursive(src, dest) {
|
|
14104
13725
|
await mkdir11(dest, { recursive: true });
|
|
14105
13726
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
14106
13727
|
for (const entry of entries) {
|
|
14107
|
-
const srcPath =
|
|
14108
|
-
const destPath =
|
|
13728
|
+
const srcPath = path37.join(src, entry.name);
|
|
13729
|
+
const destPath = path37.join(dest, entry.name);
|
|
14109
13730
|
if (entry.name === ".git") {
|
|
14110
13731
|
continue;
|
|
14111
13732
|
}
|
|
@@ -14117,7 +13738,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
14117
13738
|
}
|
|
14118
13739
|
}
|
|
14119
13740
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
14120
|
-
const resolvedTemplatePath =
|
|
13741
|
+
const resolvedTemplatePath = path37.resolve(templatePath);
|
|
14121
13742
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
14122
13743
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
14123
13744
|
}
|
|
@@ -14166,7 +13787,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
14166
13787
|
}
|
|
14167
13788
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
14168
13789
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14169
|
-
const evalDir =
|
|
13790
|
+
const evalDir = path37.join(root, evalRunId);
|
|
14170
13791
|
if (await fileExists(evalDir)) {
|
|
14171
13792
|
await rm4(evalDir, { recursive: true, force: true });
|
|
14172
13793
|
}
|
|
@@ -14176,8 +13797,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
14176
13797
|
import { execFile } from "node:child_process";
|
|
14177
13798
|
import { createHash } from "node:crypto";
|
|
14178
13799
|
import { existsSync as existsSync2 } from "node:fs";
|
|
14179
|
-
import { cp as cp2, mkdir as mkdir12, readFile as
|
|
14180
|
-
import
|
|
13800
|
+
import { cp as cp2, mkdir as mkdir12, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
13801
|
+
import path38 from "node:path";
|
|
14181
13802
|
import { promisify as promisify5 } from "node:util";
|
|
14182
13803
|
var execFileAsync = promisify5(execFile);
|
|
14183
13804
|
function gitEnv() {
|
|
@@ -14231,8 +13852,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
14231
13852
|
await mkdir12(dest, { recursive: true });
|
|
14232
13853
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
14233
13854
|
for (const entry of entries) {
|
|
14234
|
-
const srcPath =
|
|
14235
|
-
const destPath =
|
|
13855
|
+
const srcPath = path38.join(src, entry.name);
|
|
13856
|
+
const destPath = path38.join(dest, entry.name);
|
|
14236
13857
|
if (entry.name === ".git") {
|
|
14237
13858
|
continue;
|
|
14238
13859
|
}
|
|
@@ -14265,7 +13886,7 @@ var WorkspacePoolManager = class {
|
|
|
14265
13886
|
async acquireWorkspace(options) {
|
|
14266
13887
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
14267
13888
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
14268
|
-
const poolDir =
|
|
13889
|
+
const poolDir = path38.join(this.poolRoot, fingerprint);
|
|
14269
13890
|
await mkdir12(poolDir, { recursive: true });
|
|
14270
13891
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
14271
13892
|
if (drifted) {
|
|
@@ -14275,7 +13896,7 @@ var WorkspacePoolManager = class {
|
|
|
14275
13896
|
await this.removeAllSlots(poolDir);
|
|
14276
13897
|
}
|
|
14277
13898
|
for (let i = 0; i < maxSlots; i++) {
|
|
14278
|
-
const slotPath =
|
|
13899
|
+
const slotPath = path38.join(poolDir, `slot-${i}`);
|
|
14279
13900
|
const lockPath = `${slotPath}.lock`;
|
|
14280
13901
|
const locked = await this.tryLock(lockPath);
|
|
14281
13902
|
if (!locked) {
|
|
@@ -14337,7 +13958,7 @@ var WorkspacePoolManager = class {
|
|
|
14337
13958
|
throw err;
|
|
14338
13959
|
}
|
|
14339
13960
|
try {
|
|
14340
|
-
const pidStr = await
|
|
13961
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
14341
13962
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14342
13963
|
if (!Number.isNaN(pid)) {
|
|
14343
13964
|
try {
|
|
@@ -14362,9 +13983,9 @@ var WorkspacePoolManager = class {
|
|
|
14362
13983
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
14363
13984
|
*/
|
|
14364
13985
|
async checkDrift(poolDir, fingerprint) {
|
|
14365
|
-
const metadataPath =
|
|
13986
|
+
const metadataPath = path38.join(poolDir, "metadata.json");
|
|
14366
13987
|
try {
|
|
14367
|
-
const raw = await
|
|
13988
|
+
const raw = await readFile10(metadataPath, "utf-8");
|
|
14368
13989
|
const metadata = JSON.parse(raw);
|
|
14369
13990
|
return metadata.fingerprint !== fingerprint;
|
|
14370
13991
|
} catch {
|
|
@@ -14379,17 +14000,17 @@ var WorkspacePoolManager = class {
|
|
|
14379
14000
|
repos,
|
|
14380
14001
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14381
14002
|
};
|
|
14382
|
-
await writeFile7(
|
|
14003
|
+
await writeFile7(path38.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
14383
14004
|
}
|
|
14384
14005
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
14385
14006
|
async removeAllSlots(poolDir) {
|
|
14386
14007
|
const entries = await readdir4(poolDir);
|
|
14387
14008
|
for (const entry of entries) {
|
|
14388
14009
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
14389
|
-
const lockPath =
|
|
14010
|
+
const lockPath = path38.join(poolDir, `${entry}.lock`);
|
|
14390
14011
|
if (existsSync2(lockPath)) {
|
|
14391
14012
|
try {
|
|
14392
|
-
const pidStr = await
|
|
14013
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
14393
14014
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14394
14015
|
if (!Number.isNaN(pid)) {
|
|
14395
14016
|
try {
|
|
@@ -14402,12 +14023,12 @@ var WorkspacePoolManager = class {
|
|
|
14402
14023
|
} catch {
|
|
14403
14024
|
}
|
|
14404
14025
|
}
|
|
14405
|
-
await rm5(
|
|
14026
|
+
await rm5(path38.join(poolDir, entry), { recursive: true, force: true });
|
|
14406
14027
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
14407
14028
|
});
|
|
14408
14029
|
}
|
|
14409
14030
|
}
|
|
14410
|
-
await rm5(
|
|
14031
|
+
await rm5(path38.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
14411
14032
|
});
|
|
14412
14033
|
}
|
|
14413
14034
|
/**
|
|
@@ -14417,7 +14038,7 @@ var WorkspacePoolManager = class {
|
|
|
14417
14038
|
*/
|
|
14418
14039
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
14419
14040
|
for (const repo of repos) {
|
|
14420
|
-
const repoDir =
|
|
14041
|
+
const repoDir = path38.join(slotPath, repo.path);
|
|
14421
14042
|
if (!existsSync2(repoDir)) {
|
|
14422
14043
|
continue;
|
|
14423
14044
|
}
|
|
@@ -14444,7 +14065,7 @@ var WorkspacePoolManager = class {
|
|
|
14444
14065
|
// src/evaluation/workspace/repo-manager.ts
|
|
14445
14066
|
import { execFile as execFile2 } from "node:child_process";
|
|
14446
14067
|
import { existsSync as existsSync3 } from "node:fs";
|
|
14447
|
-
import
|
|
14068
|
+
import path39 from "node:path";
|
|
14448
14069
|
import { promisify as promisify6 } from "node:util";
|
|
14449
14070
|
var execFileAsync2 = promisify6(execFile2);
|
|
14450
14071
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -14544,7 +14165,7 @@ ${lines.join("\n")}`;
|
|
|
14544
14165
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
14545
14166
|
*/
|
|
14546
14167
|
async materialize(repo, workspacePath) {
|
|
14547
|
-
const targetDir =
|
|
14168
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
14548
14169
|
const sourceUrl = getSourceUrl(repo.source);
|
|
14549
14170
|
const startedAt = Date.now();
|
|
14550
14171
|
if (this.verbose) {
|
|
@@ -14635,7 +14256,7 @@ ${lines.join("\n")}`;
|
|
|
14635
14256
|
async reset(repos, workspacePath, reset) {
|
|
14636
14257
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
14637
14258
|
for (const repo of repos) {
|
|
14638
|
-
const targetDir =
|
|
14259
|
+
const targetDir = path39.join(workspacePath, repo.path);
|
|
14639
14260
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
14640
14261
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
14641
14262
|
}
|
|
@@ -14644,16 +14265,16 @@ ${lines.join("\n")}`;
|
|
|
14644
14265
|
|
|
14645
14266
|
// src/evaluation/workspace/resolve.ts
|
|
14646
14267
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
14647
|
-
import
|
|
14268
|
+
import path40 from "node:path";
|
|
14648
14269
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
14649
14270
|
if (!templatePath) {
|
|
14650
14271
|
return void 0;
|
|
14651
14272
|
}
|
|
14652
|
-
const resolved =
|
|
14273
|
+
const resolved = path40.resolve(templatePath);
|
|
14653
14274
|
const stats = await stat6(resolved);
|
|
14654
14275
|
if (stats.isFile()) {
|
|
14655
14276
|
return {
|
|
14656
|
-
dir:
|
|
14277
|
+
dir: path40.dirname(resolved),
|
|
14657
14278
|
workspaceFile: resolved
|
|
14658
14279
|
};
|
|
14659
14280
|
}
|
|
@@ -14665,14 +14286,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
14665
14286
|
if (workspaceFiles.length === 1) {
|
|
14666
14287
|
return {
|
|
14667
14288
|
dir: resolved,
|
|
14668
|
-
workspaceFile:
|
|
14289
|
+
workspaceFile: path40.join(resolved, workspaceFiles[0])
|
|
14669
14290
|
};
|
|
14670
14291
|
}
|
|
14671
14292
|
if (workspaceFiles.length > 1) {
|
|
14672
14293
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
14673
14294
|
return {
|
|
14674
14295
|
dir: resolved,
|
|
14675
|
-
workspaceFile: conventionFile ?
|
|
14296
|
+
workspaceFile: conventionFile ? path40.join(resolved, conventionFile) : void 0
|
|
14676
14297
|
};
|
|
14677
14298
|
}
|
|
14678
14299
|
return { dir: resolved };
|
|
@@ -14876,7 +14497,7 @@ async function runEvaluation(options) {
|
|
|
14876
14497
|
];
|
|
14877
14498
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
14878
14499
|
const typeRegistry = createBuiltinRegistry();
|
|
14879
|
-
const discoveryBaseDir = evalFilePath ?
|
|
14500
|
+
const discoveryBaseDir = evalFilePath ? path41.dirname(path41.resolve(evalFilePath)) : process.cwd();
|
|
14880
14501
|
const evalDir = discoveryBaseDir;
|
|
14881
14502
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
14882
14503
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -15065,7 +14686,7 @@ async function runEvaluation(options) {
|
|
|
15065
14686
|
}
|
|
15066
14687
|
try {
|
|
15067
14688
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
15068
|
-
const copiedWorkspaceFile =
|
|
14689
|
+
const copiedWorkspaceFile = path41.join(sharedWorkspacePath, path41.basename(suiteWorkspaceFile));
|
|
15069
14690
|
try {
|
|
15070
14691
|
await stat7(copiedWorkspaceFile);
|
|
15071
14692
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -15175,10 +14796,10 @@ async function runEvaluation(options) {
|
|
|
15175
14796
|
const budgetResult = {
|
|
15176
14797
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15177
14798
|
testId: evalCase.id,
|
|
15178
|
-
|
|
14799
|
+
eval_set: evalCase.eval_set,
|
|
15179
14800
|
score: 0,
|
|
15180
14801
|
assertions: [],
|
|
15181
|
-
|
|
14802
|
+
output: [],
|
|
15182
14803
|
target: target.name,
|
|
15183
14804
|
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
15184
14805
|
budgetExceeded: true,
|
|
@@ -15211,10 +14832,10 @@ async function runEvaluation(options) {
|
|
|
15211
14832
|
const haltResult = {
|
|
15212
14833
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15213
14834
|
testId: evalCase.id,
|
|
15214
|
-
|
|
14835
|
+
eval_set: evalCase.eval_set,
|
|
15215
14836
|
score: 0,
|
|
15216
14837
|
assertions: [],
|
|
15217
|
-
|
|
14838
|
+
output: [],
|
|
15218
14839
|
target: target.name,
|
|
15219
14840
|
error: errorMsg,
|
|
15220
14841
|
executionStatus: "execution_error",
|
|
@@ -15443,8 +15064,6 @@ async function runBatchEvaluation(options) {
|
|
|
15443
15064
|
const promptInputs = promptInputsList[index];
|
|
15444
15065
|
return {
|
|
15445
15066
|
question: promptInputs.question,
|
|
15446
|
-
guidelines: promptInputs.guidelines,
|
|
15447
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
15448
15067
|
inputFiles: evalCase.file_paths,
|
|
15449
15068
|
evalCaseId: evalCase.id,
|
|
15450
15069
|
metadata: {
|
|
@@ -15642,7 +15261,7 @@ async function runEvalCase(options) {
|
|
|
15642
15261
|
);
|
|
15643
15262
|
}
|
|
15644
15263
|
if (caseWorkspaceFile && workspacePath) {
|
|
15645
|
-
const copiedFile =
|
|
15264
|
+
const copiedFile = path41.join(workspacePath, path41.basename(caseWorkspaceFile));
|
|
15646
15265
|
try {
|
|
15647
15266
|
await stat7(copiedFile);
|
|
15648
15267
|
caseWorkspaceFile = copiedFile;
|
|
@@ -15702,10 +15321,10 @@ async function runEvalCase(options) {
|
|
|
15702
15321
|
const files = evalCase.metadata.agent_skills_files;
|
|
15703
15322
|
if (baseDir && files.length > 0) {
|
|
15704
15323
|
for (const relPath of files) {
|
|
15705
|
-
const srcPath =
|
|
15706
|
-
const destPath =
|
|
15324
|
+
const srcPath = path41.resolve(baseDir, relPath);
|
|
15325
|
+
const destPath = path41.resolve(workspacePath, relPath);
|
|
15707
15326
|
try {
|
|
15708
|
-
await mkdir13(
|
|
15327
|
+
await mkdir13(path41.dirname(destPath), { recursive: true });
|
|
15709
15328
|
await copyFile2(srcPath, destPath);
|
|
15710
15329
|
} catch (error) {
|
|
15711
15330
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -16152,8 +15771,7 @@ async function evaluateCandidate(options) {
|
|
|
16152
15771
|
let lmRequest;
|
|
16153
15772
|
if (isAgentProvider(provider)) {
|
|
16154
15773
|
agentRequest = {
|
|
16155
|
-
question: promptInputs.question
|
|
16156
|
-
guideline_paths: evalCase.guideline_paths
|
|
15774
|
+
question: promptInputs.question
|
|
16157
15775
|
};
|
|
16158
15776
|
} else {
|
|
16159
15777
|
if (promptInputs.chatPrompt) {
|
|
@@ -16162,8 +15780,7 @@ async function evaluateCandidate(options) {
|
|
|
16162
15780
|
};
|
|
16163
15781
|
} else {
|
|
16164
15782
|
lmRequest = {
|
|
16165
|
-
question: promptInputs.question
|
|
16166
|
-
guidelines: promptInputs.guidelines
|
|
15783
|
+
question: promptInputs.question
|
|
16167
15784
|
};
|
|
16168
15785
|
}
|
|
16169
15786
|
}
|
|
@@ -16177,11 +15794,10 @@ async function evaluateCandidate(options) {
|
|
|
16177
15794
|
return {
|
|
16178
15795
|
timestamp: completedAt.toISOString(),
|
|
16179
15796
|
testId: evalCase.id,
|
|
16180
|
-
|
|
15797
|
+
eval_set: evalCase.eval_set,
|
|
16181
15798
|
conversationId: evalCase.conversation_id,
|
|
16182
15799
|
score: score.score,
|
|
16183
15800
|
assertions: score.assertions,
|
|
16184
|
-
outputText: candidate,
|
|
16185
15801
|
target: target.name,
|
|
16186
15802
|
tokenUsage,
|
|
16187
15803
|
costUsd,
|
|
@@ -16192,7 +15808,7 @@ async function evaluateCandidate(options) {
|
|
|
16192
15808
|
input,
|
|
16193
15809
|
scores,
|
|
16194
15810
|
trace,
|
|
16195
|
-
output,
|
|
15811
|
+
output: output ?? [{ role: "assistant", content: candidate }],
|
|
16196
15812
|
fileChanges,
|
|
16197
15813
|
executionStatus: classifyQualityStatus(score.score)
|
|
16198
15814
|
};
|
|
@@ -16326,7 +15942,7 @@ async function runEvaluatorList(options) {
|
|
|
16326
15942
|
fileChanges,
|
|
16327
15943
|
workspacePath
|
|
16328
15944
|
};
|
|
16329
|
-
const evalFileDir = evalCase.
|
|
15945
|
+
const evalFileDir = evalCase.file_paths[0] ? path41.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
16330
15946
|
const dispatchContext = {
|
|
16331
15947
|
graderProvider,
|
|
16332
15948
|
targetResolver,
|
|
@@ -16357,7 +15973,7 @@ async function runEvaluatorList(options) {
|
|
|
16357
15973
|
weight,
|
|
16358
15974
|
verdict: score2.verdict,
|
|
16359
15975
|
assertions: score2.assertions,
|
|
16360
|
-
|
|
15976
|
+
input: score2.evaluatorRawRequest,
|
|
16361
15977
|
details: score2.details,
|
|
16362
15978
|
scores: mapChildResults(score2.scores),
|
|
16363
15979
|
tokenUsage: score2.tokenUsage,
|
|
@@ -16440,7 +16056,7 @@ function filterEvalCases(evalCases, filter) {
|
|
|
16440
16056
|
if (!filter) {
|
|
16441
16057
|
return evalCases;
|
|
16442
16058
|
}
|
|
16443
|
-
return evalCases.filter((evalCase) =>
|
|
16059
|
+
return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter));
|
|
16444
16060
|
}
|
|
16445
16061
|
function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
|
|
16446
16062
|
const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
|
|
@@ -16477,8 +16093,6 @@ async function invokeProvider(provider, options) {
|
|
|
16477
16093
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
16478
16094
|
return await provider.invoke({
|
|
16479
16095
|
question: promptInputs.question,
|
|
16480
|
-
guidelines: promptInputs.guidelines,
|
|
16481
|
-
guideline_patterns: evalCase.guideline_patterns,
|
|
16482
16096
|
chatPrompt: promptInputs.chatPrompt,
|
|
16483
16097
|
inputFiles: evalCase.file_paths,
|
|
16484
16098
|
evalCaseId: evalCase.id,
|
|
@@ -16506,21 +16120,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16506
16120
|
if (isAgentProvider(provider)) {
|
|
16507
16121
|
agentRequest = {
|
|
16508
16122
|
question: promptInputs.question,
|
|
16509
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16510
16123
|
error: message
|
|
16511
16124
|
};
|
|
16512
16125
|
} else {
|
|
16513
16126
|
if (promptInputs.chatPrompt) {
|
|
16514
16127
|
lmRequest = {
|
|
16515
16128
|
chat_prompt: promptInputs.chatPrompt,
|
|
16516
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16517
16129
|
error: message
|
|
16518
16130
|
};
|
|
16519
16131
|
} else {
|
|
16520
16132
|
lmRequest = {
|
|
16521
16133
|
question: promptInputs.question,
|
|
16522
|
-
guidelines: promptInputs.guidelines,
|
|
16523
|
-
guideline_paths: evalCase.guideline_paths,
|
|
16524
16134
|
error: message
|
|
16525
16135
|
};
|
|
16526
16136
|
}
|
|
@@ -16533,11 +16143,11 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16533
16143
|
return {
|
|
16534
16144
|
timestamp: timestamp.toISOString(),
|
|
16535
16145
|
testId: evalCase.id,
|
|
16536
|
-
|
|
16146
|
+
eval_set: evalCase.eval_set,
|
|
16537
16147
|
conversationId: evalCase.conversation_id,
|
|
16538
16148
|
score: 0,
|
|
16539
16149
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
16540
|
-
|
|
16150
|
+
output: [{ role: "assistant", content: `Error occurred: ${message}` }],
|
|
16541
16151
|
target: targetName,
|
|
16542
16152
|
requests,
|
|
16543
16153
|
input,
|
|
@@ -16566,7 +16176,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
|
16566
16176
|
hash.update(target.name);
|
|
16567
16177
|
hash.update(evalCase.id);
|
|
16568
16178
|
hash.update(promptInputs.question);
|
|
16569
|
-
hash.update(promptInputs.guidelines);
|
|
16570
16179
|
hash.update(promptInputs.systemMessage ?? "");
|
|
16571
16180
|
if (promptInputs.chatPrompt) {
|
|
16572
16181
|
hash.update(JSON.stringify(promptInputs.chatPrompt));
|
|
@@ -16581,7 +16190,7 @@ function buildResultInput(promptInputs) {
|
|
|
16581
16190
|
content: message.content
|
|
16582
16191
|
}));
|
|
16583
16192
|
}
|
|
16584
|
-
return promptInputs.question;
|
|
16193
|
+
return [{ role: "user", content: promptInputs.question }];
|
|
16585
16194
|
}
|
|
16586
16195
|
function aggregateEvaluatorTokenUsage(scores) {
|
|
16587
16196
|
if (!scores || scores.length === 0) return void 0;
|
|
@@ -16647,7 +16256,7 @@ function mapChildResults(children) {
|
|
|
16647
16256
|
weight: child.weight,
|
|
16648
16257
|
verdict: child.verdict,
|
|
16649
16258
|
assertions: child.assertions,
|
|
16650
|
-
|
|
16259
|
+
input: child.evaluatorRawRequest,
|
|
16651
16260
|
scores: mapChildResults(child.scores),
|
|
16652
16261
|
details: child.details,
|
|
16653
16262
|
tokenUsage: child.tokenUsage
|
|
@@ -16666,7 +16275,7 @@ function computeWeightedMean(entries) {
|
|
|
16666
16275
|
|
|
16667
16276
|
// src/evaluation/evaluate.ts
|
|
16668
16277
|
import { existsSync as existsSync4 } from "node:fs";
|
|
16669
|
-
import
|
|
16278
|
+
import path42 from "node:path";
|
|
16670
16279
|
|
|
16671
16280
|
// src/evaluation/providers/function-provider.ts
|
|
16672
16281
|
function createFunctionProvider(taskFn) {
|
|
@@ -16703,7 +16312,7 @@ async function evaluate(config) {
|
|
|
16703
16312
|
}
|
|
16704
16313
|
const gitRoot = await findGitRoot(process.cwd());
|
|
16705
16314
|
const repoRoot = gitRoot ?? process.cwd();
|
|
16706
|
-
const testFilePath = config.specFile ?
|
|
16315
|
+
const testFilePath = config.specFile ? path42.resolve(config.specFile) : path42.join(process.cwd(), "__programmatic__.yaml");
|
|
16707
16316
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
16708
16317
|
let resolvedTarget;
|
|
16709
16318
|
let taskProvider;
|
|
@@ -16769,8 +16378,6 @@ async function evaluate(config) {
|
|
|
16769
16378
|
input_segments: inputSegments,
|
|
16770
16379
|
expected_output: expectedOutput,
|
|
16771
16380
|
reference_answer: expectedOutputValue,
|
|
16772
|
-
guideline_paths: [],
|
|
16773
|
-
guideline_patterns: [],
|
|
16774
16381
|
file_paths: [],
|
|
16775
16382
|
assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
|
|
16776
16383
|
metadata: test.metadata
|
|
@@ -16832,10 +16439,10 @@ function computeSummary(results, durationMs) {
|
|
|
16832
16439
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
16833
16440
|
async function discoverDefaultTarget(repoRoot) {
|
|
16834
16441
|
const cwd = process.cwd();
|
|
16835
|
-
const chain = buildDirectoryChain(
|
|
16442
|
+
const chain = buildDirectoryChain(path42.join(cwd, "_placeholder"), repoRoot);
|
|
16836
16443
|
for (const dir of chain) {
|
|
16837
16444
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16838
|
-
const targetsPath =
|
|
16445
|
+
const targetsPath = path42.join(dir, candidate);
|
|
16839
16446
|
if (!existsSync4(targetsPath)) continue;
|
|
16840
16447
|
try {
|
|
16841
16448
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -16852,7 +16459,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
16852
16459
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
16853
16460
|
const envFiles = [];
|
|
16854
16461
|
for (const dir of chain) {
|
|
16855
|
-
const envPath =
|
|
16462
|
+
const envPath = path42.join(dir, ".env");
|
|
16856
16463
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
16857
16464
|
}
|
|
16858
16465
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -17033,8 +16640,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
17033
16640
|
}
|
|
17034
16641
|
|
|
17035
16642
|
// src/evaluation/cache/response-cache.ts
|
|
17036
|
-
import { mkdir as mkdir14, readFile as
|
|
17037
|
-
import
|
|
16643
|
+
import { mkdir as mkdir14, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
|
|
16644
|
+
import path43 from "node:path";
|
|
17038
16645
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
17039
16646
|
var ResponseCache = class {
|
|
17040
16647
|
cachePath;
|
|
@@ -17044,7 +16651,7 @@ var ResponseCache = class {
|
|
|
17044
16651
|
async get(key) {
|
|
17045
16652
|
const filePath = this.keyToPath(key);
|
|
17046
16653
|
try {
|
|
17047
|
-
const data = await
|
|
16654
|
+
const data = await readFile11(filePath, "utf8");
|
|
17048
16655
|
return JSON.parse(data);
|
|
17049
16656
|
} catch {
|
|
17050
16657
|
return void 0;
|
|
@@ -17052,13 +16659,13 @@ var ResponseCache = class {
|
|
|
17052
16659
|
}
|
|
17053
16660
|
async set(key, value) {
|
|
17054
16661
|
const filePath = this.keyToPath(key);
|
|
17055
|
-
const dir =
|
|
16662
|
+
const dir = path43.dirname(filePath);
|
|
17056
16663
|
await mkdir14(dir, { recursive: true });
|
|
17057
16664
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
17058
16665
|
}
|
|
17059
16666
|
keyToPath(key) {
|
|
17060
16667
|
const prefix = key.slice(0, 2);
|
|
17061
|
-
return
|
|
16668
|
+
return path43.join(this.cachePath, prefix, `${key}.json`);
|
|
17062
16669
|
}
|
|
17063
16670
|
};
|
|
17064
16671
|
function shouldEnableCache(params) {
|
|
@@ -17075,7 +16682,6 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
17075
16682
|
|
|
17076
16683
|
// src/evaluation/baseline.ts
|
|
17077
16684
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
17078
|
-
"outputText",
|
|
17079
16685
|
"requests",
|
|
17080
16686
|
"trace",
|
|
17081
16687
|
"workspacePath",
|
|
@@ -17092,7 +16698,7 @@ var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
|
17092
16698
|
"startTime",
|
|
17093
16699
|
"endTime"
|
|
17094
16700
|
]);
|
|
17095
|
-
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "
|
|
16701
|
+
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "input"]);
|
|
17096
16702
|
function trimEvaluatorResult(result) {
|
|
17097
16703
|
const trimmed = {};
|
|
17098
16704
|
for (const [key, value] of Object.entries(result)) {
|
|
@@ -17247,9 +16853,13 @@ var OtelTraceExporter = class {
|
|
|
17247
16853
|
rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17248
16854
|
rootSpan.setAttribute("agentv.test_id", result.testId);
|
|
17249
16855
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
17250
|
-
if (result.
|
|
16856
|
+
if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
|
|
17251
16857
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
17252
|
-
if (captureContent
|
|
16858
|
+
if (captureContent && result.output.length > 0) {
|
|
16859
|
+
const lastMsg = result.output[result.output.length - 1];
|
|
16860
|
+
const text = typeof lastMsg.content === "string" ? lastMsg.content : JSON.stringify(lastMsg.content);
|
|
16861
|
+
rootSpan.setAttribute("agentv.output_text", text);
|
|
16862
|
+
}
|
|
17253
16863
|
if (result.durationMs != null)
|
|
17254
16864
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
17255
16865
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|
|
@@ -17428,14 +17038,14 @@ var OtelStreamingObserver = class {
|
|
|
17428
17038
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
17429
17039
|
rootCtx = null;
|
|
17430
17040
|
/** Create root eval span immediately (visible in backend right away) */
|
|
17431
|
-
startEvalCase(testId, target,
|
|
17041
|
+
startEvalCase(testId, target, evalSet) {
|
|
17432
17042
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
17433
17043
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
17434
17044
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
17435
17045
|
this.rootSpan.setAttribute("gen_ai.system", "agentv");
|
|
17436
17046
|
this.rootSpan.setAttribute("agentv.test_id", testId);
|
|
17437
17047
|
this.rootSpan.setAttribute("agentv.target", target);
|
|
17438
|
-
if (
|
|
17048
|
+
if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
|
|
17439
17049
|
this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
|
|
17440
17050
|
}
|
|
17441
17051
|
/** Create and immediately export a tool span */
|
|
@@ -17623,7 +17233,6 @@ export {
|
|
|
17623
17233
|
initializeBaseline,
|
|
17624
17234
|
isAgentSkillsFormat,
|
|
17625
17235
|
isEvaluatorKind,
|
|
17626
|
-
isGuidelineFile,
|
|
17627
17236
|
isJsonObject,
|
|
17628
17237
|
isJsonValue,
|
|
17629
17238
|
isNonEmptyString,
|