@agentv/core 3.7.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-2IZOTQ25.js → chunk-3ZS3GCMI.js} +143 -3
- package/dist/chunk-3ZS3GCMI.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +227 -39
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +84 -5
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +13 -11
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +5 -6
- package/dist/index.d.ts +5 -6
- package/dist/index.js +419 -551
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-2IZOTQ25.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -2,21 +2,24 @@ import {
|
|
|
2
2
|
TEST_MESSAGE_ROLES,
|
|
3
3
|
buildDirectoryChain,
|
|
4
4
|
buildSearchRoots,
|
|
5
|
+
expandFileReferences,
|
|
5
6
|
extractLastAssistantContent,
|
|
6
7
|
fileExists,
|
|
7
8
|
findGitRoot,
|
|
9
|
+
interpolateEnv,
|
|
8
10
|
isAgentProvider,
|
|
9
11
|
isEvaluatorKind,
|
|
10
12
|
isJsonObject,
|
|
11
13
|
isJsonValue,
|
|
12
14
|
isTestMessage,
|
|
13
15
|
isTestMessageRole,
|
|
16
|
+
loadCasesFromFile,
|
|
14
17
|
normalizeLineEndings,
|
|
15
18
|
readJsonFile,
|
|
16
19
|
readTextFile,
|
|
17
20
|
resolveFileReference,
|
|
18
21
|
resolveTargetDefinition
|
|
19
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-3ZS3GCMI.js";
|
|
20
23
|
import {
|
|
21
24
|
AgentvProvider
|
|
22
25
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -146,30 +149,11 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
146
149
|
}
|
|
147
150
|
|
|
148
151
|
// src/evaluation/yaml-parser.ts
|
|
149
|
-
import { readFile as
|
|
150
|
-
import
|
|
152
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
153
|
+
import path8 from "node:path";
|
|
151
154
|
import micromatch3 from "micromatch";
|
|
152
155
|
import { parse as parse2 } from "yaml";
|
|
153
156
|
|
|
154
|
-
// src/evaluation/interpolation.ts
|
|
155
|
-
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
156
|
-
function interpolateEnv(value, env) {
|
|
157
|
-
if (typeof value === "string") {
|
|
158
|
-
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
159
|
-
}
|
|
160
|
-
if (Array.isArray(value)) {
|
|
161
|
-
return value.map((item) => interpolateEnv(item, env));
|
|
162
|
-
}
|
|
163
|
-
if (value !== null && typeof value === "object") {
|
|
164
|
-
const result = {};
|
|
165
|
-
for (const [key, val] of Object.entries(value)) {
|
|
166
|
-
result[key] = interpolateEnv(val, env);
|
|
167
|
-
}
|
|
168
|
-
return result;
|
|
169
|
-
}
|
|
170
|
-
return value;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
157
|
// src/evaluation/loaders/agent-skills-parser.ts
|
|
174
158
|
import { readFile } from "node:fs/promises";
|
|
175
159
|
import path from "node:path";
|
|
@@ -252,134 +236,16 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
252
236
|
return tests;
|
|
253
237
|
}
|
|
254
238
|
|
|
255
|
-
// src/evaluation/loaders/case-file-loader.ts
|
|
256
|
-
import { readFile as readFile2 } from "node:fs/promises";
|
|
257
|
-
import path2 from "node:path";
|
|
258
|
-
import fg from "fast-glob";
|
|
259
|
-
import { parse as parseYaml } from "yaml";
|
|
260
|
-
var ANSI_YELLOW = "\x1B[33m";
|
|
261
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
262
|
-
var FILE_PROTOCOL = "file://";
|
|
263
|
-
function isFileReference(value) {
|
|
264
|
-
return typeof value === "string" && value.startsWith(FILE_PROTOCOL);
|
|
265
|
-
}
|
|
266
|
-
function extractFilePath(ref) {
|
|
267
|
-
return ref.slice(FILE_PROTOCOL.length);
|
|
268
|
-
}
|
|
269
|
-
function isGlobPattern(filePath) {
|
|
270
|
-
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
271
|
-
}
|
|
272
|
-
function parseYamlCases(content, filePath) {
|
|
273
|
-
const raw = parseYaml(content);
|
|
274
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
275
|
-
if (!Array.isArray(parsed)) {
|
|
276
|
-
throw new Error(
|
|
277
|
-
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
278
|
-
);
|
|
279
|
-
}
|
|
280
|
-
const results = [];
|
|
281
|
-
for (const item of parsed) {
|
|
282
|
-
if (!isJsonObject(item)) {
|
|
283
|
-
throw new Error(`External test file contains non-object entry: ${filePath}`);
|
|
284
|
-
}
|
|
285
|
-
results.push(item);
|
|
286
|
-
}
|
|
287
|
-
return results;
|
|
288
|
-
}
|
|
289
|
-
function parseJsonlCases(content, filePath) {
|
|
290
|
-
const lines = content.split("\n");
|
|
291
|
-
const results = [];
|
|
292
|
-
for (let i = 0; i < lines.length; i++) {
|
|
293
|
-
const line = lines[i].trim();
|
|
294
|
-
if (line === "") continue;
|
|
295
|
-
try {
|
|
296
|
-
const raw = JSON.parse(line);
|
|
297
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
298
|
-
if (!isJsonObject(parsed)) {
|
|
299
|
-
throw new Error("Expected JSON object");
|
|
300
|
-
}
|
|
301
|
-
results.push(parsed);
|
|
302
|
-
} catch (error) {
|
|
303
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
304
|
-
throw new Error(`Malformed JSONL at line ${i + 1}: ${message}
|
|
305
|
-
File: ${filePath}`);
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
return results;
|
|
309
|
-
}
|
|
310
|
-
async function loadCasesFromFile(filePath) {
|
|
311
|
-
const ext = path2.extname(filePath).toLowerCase();
|
|
312
|
-
let content;
|
|
313
|
-
try {
|
|
314
|
-
content = await readFile2(filePath, "utf8");
|
|
315
|
-
} catch (error) {
|
|
316
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
317
|
-
throw new Error(`Cannot read external test file: ${filePath}
|
|
318
|
-
${message}`);
|
|
319
|
-
}
|
|
320
|
-
if (content.trim() === "") {
|
|
321
|
-
console.warn(
|
|
322
|
-
`${ANSI_YELLOW}Warning: External test file is empty, skipping: ${filePath}${ANSI_RESET2}`
|
|
323
|
-
);
|
|
324
|
-
return [];
|
|
325
|
-
}
|
|
326
|
-
if (ext === ".yaml" || ext === ".yml") {
|
|
327
|
-
return parseYamlCases(content, filePath);
|
|
328
|
-
}
|
|
329
|
-
if (ext === ".jsonl") {
|
|
330
|
-
return parseJsonlCases(content, filePath);
|
|
331
|
-
}
|
|
332
|
-
throw new Error(
|
|
333
|
-
`Unsupported external test file format '${ext}': ${filePath}. Supported: .yaml, .yml, .jsonl`
|
|
334
|
-
);
|
|
335
|
-
}
|
|
336
|
-
async function resolveFileReference2(ref, evalFileDir) {
|
|
337
|
-
const rawPath = extractFilePath(ref);
|
|
338
|
-
const absolutePattern = path2.resolve(evalFileDir, rawPath);
|
|
339
|
-
if (isGlobPattern(rawPath)) {
|
|
340
|
-
const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
|
|
341
|
-
onlyFiles: true,
|
|
342
|
-
absolute: true
|
|
343
|
-
});
|
|
344
|
-
if (matches.length === 0) {
|
|
345
|
-
console.warn(
|
|
346
|
-
`${ANSI_YELLOW}Warning: Glob pattern matched no files: ${ref} (resolved to ${absolutePattern})${ANSI_RESET2}`
|
|
347
|
-
);
|
|
348
|
-
return [];
|
|
349
|
-
}
|
|
350
|
-
matches.sort();
|
|
351
|
-
const allCases = [];
|
|
352
|
-
for (const match of matches) {
|
|
353
|
-
const cases = await loadCasesFromFile(match);
|
|
354
|
-
allCases.push(...cases);
|
|
355
|
-
}
|
|
356
|
-
return allCases;
|
|
357
|
-
}
|
|
358
|
-
return loadCasesFromFile(absolutePattern);
|
|
359
|
-
}
|
|
360
|
-
async function expandFileReferences(tests, evalFileDir) {
|
|
361
|
-
const expanded = [];
|
|
362
|
-
for (const entry of tests) {
|
|
363
|
-
if (isFileReference(entry)) {
|
|
364
|
-
const cases = await resolveFileReference2(entry, evalFileDir);
|
|
365
|
-
expanded.push(...cases);
|
|
366
|
-
} else {
|
|
367
|
-
expanded.push(entry);
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
return expanded;
|
|
371
|
-
}
|
|
372
|
-
|
|
373
239
|
// src/evaluation/loaders/config-loader.ts
|
|
374
|
-
import { readFile as
|
|
375
|
-
import
|
|
240
|
+
import { readFile as readFile2 } from "node:fs/promises";
|
|
241
|
+
import path3 from "node:path";
|
|
376
242
|
import micromatch from "micromatch";
|
|
377
243
|
import { parse } from "yaml";
|
|
378
244
|
|
|
379
245
|
// src/evaluation/loaders/file-resolver.ts
|
|
380
246
|
import { constants } from "node:fs";
|
|
381
247
|
import { access } from "node:fs/promises";
|
|
382
|
-
import
|
|
248
|
+
import path2 from "node:path";
|
|
383
249
|
import { fileURLToPath } from "node:url";
|
|
384
250
|
async function fileExists2(absolutePath) {
|
|
385
251
|
try {
|
|
@@ -397,15 +263,15 @@ function resolveToAbsolutePath(candidate) {
|
|
|
397
263
|
if (candidate.startsWith("file:")) {
|
|
398
264
|
return fileURLToPath(candidate);
|
|
399
265
|
}
|
|
400
|
-
return
|
|
266
|
+
return path2.resolve(candidate);
|
|
401
267
|
}
|
|
402
268
|
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
403
269
|
}
|
|
404
270
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
405
271
|
const directories = [];
|
|
406
272
|
const seen = /* @__PURE__ */ new Set();
|
|
407
|
-
const boundary =
|
|
408
|
-
let current =
|
|
273
|
+
const boundary = path2.resolve(repoRoot);
|
|
274
|
+
let current = path2.resolve(path2.dirname(filePath));
|
|
409
275
|
while (current !== void 0) {
|
|
410
276
|
if (!seen.has(current)) {
|
|
411
277
|
directories.push(current);
|
|
@@ -414,7 +280,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
414
280
|
if (current === boundary) {
|
|
415
281
|
break;
|
|
416
282
|
}
|
|
417
|
-
const parent =
|
|
283
|
+
const parent = path2.dirname(current);
|
|
418
284
|
if (parent === current) {
|
|
419
285
|
break;
|
|
420
286
|
}
|
|
@@ -428,16 +294,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
428
294
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
429
295
|
const uniqueRoots = [];
|
|
430
296
|
const addRoot = (root) => {
|
|
431
|
-
const normalized =
|
|
297
|
+
const normalized = path2.resolve(root);
|
|
432
298
|
if (!uniqueRoots.includes(normalized)) {
|
|
433
299
|
uniqueRoots.push(normalized);
|
|
434
300
|
}
|
|
435
301
|
};
|
|
436
|
-
let currentDir =
|
|
302
|
+
let currentDir = path2.dirname(evalPath);
|
|
437
303
|
let reachedBoundary = false;
|
|
438
304
|
while (!reachedBoundary) {
|
|
439
305
|
addRoot(currentDir);
|
|
440
|
-
const parentDir =
|
|
306
|
+
const parentDir = path2.dirname(currentDir);
|
|
441
307
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
442
308
|
reachedBoundary = true;
|
|
443
309
|
} else {
|
|
@@ -452,19 +318,19 @@ function trimLeadingSeparators(value) {
|
|
|
452
318
|
const trimmed = value.replace(/^[/\\]+/, "");
|
|
453
319
|
return trimmed.length > 0 ? trimmed : value;
|
|
454
320
|
}
|
|
455
|
-
async function
|
|
321
|
+
async function resolveFileReference2(rawValue, searchRoots) {
|
|
456
322
|
const displayPath = trimLeadingSeparators(rawValue);
|
|
457
323
|
const potentialPaths = [];
|
|
458
|
-
if (
|
|
459
|
-
potentialPaths.push(
|
|
324
|
+
if (path2.isAbsolute(rawValue)) {
|
|
325
|
+
potentialPaths.push(path2.normalize(rawValue));
|
|
460
326
|
}
|
|
461
327
|
for (const base of searchRoots) {
|
|
462
|
-
potentialPaths.push(
|
|
328
|
+
potentialPaths.push(path2.resolve(base, displayPath));
|
|
463
329
|
}
|
|
464
330
|
const attempted = [];
|
|
465
331
|
const seen = /* @__PURE__ */ new Set();
|
|
466
332
|
for (const candidate of potentialPaths) {
|
|
467
|
-
const absoluteCandidate =
|
|
333
|
+
const absoluteCandidate = path2.resolve(candidate);
|
|
468
334
|
if (seen.has(absoluteCandidate)) {
|
|
469
335
|
continue;
|
|
470
336
|
}
|
|
@@ -478,8 +344,8 @@ async function resolveFileReference3(rawValue, searchRoots) {
|
|
|
478
344
|
}
|
|
479
345
|
|
|
480
346
|
// src/evaluation/loaders/config-loader.ts
|
|
481
|
-
var
|
|
482
|
-
var
|
|
347
|
+
var ANSI_YELLOW = "\x1B[33m";
|
|
348
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
483
349
|
var DEFAULT_EVAL_PATTERNS = [
|
|
484
350
|
"**/evals/**/*.eval.yaml",
|
|
485
351
|
"**/evals/**/eval.yaml"
|
|
@@ -487,12 +353,12 @@ var DEFAULT_EVAL_PATTERNS = [
|
|
|
487
353
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
488
354
|
const directories = buildDirectoryChain2(evalFilePath, repoRoot);
|
|
489
355
|
for (const directory of directories) {
|
|
490
|
-
const configPath =
|
|
356
|
+
const configPath = path3.join(directory, ".agentv", "config.yaml");
|
|
491
357
|
if (!await fileExists2(configPath)) {
|
|
492
358
|
continue;
|
|
493
359
|
}
|
|
494
360
|
try {
|
|
495
|
-
const rawConfig = await
|
|
361
|
+
const rawConfig = await readFile2(configPath, "utf8");
|
|
496
362
|
const parsed = parse(rawConfig);
|
|
497
363
|
if (!isJsonObject(parsed)) {
|
|
498
364
|
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
@@ -729,14 +595,14 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
729
595
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
730
596
|
}
|
|
731
597
|
function logWarning(message) {
|
|
732
|
-
console.warn(`${
|
|
598
|
+
console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET2}`);
|
|
733
599
|
}
|
|
734
600
|
|
|
735
601
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
736
|
-
import
|
|
602
|
+
import path4 from "node:path";
|
|
737
603
|
|
|
738
604
|
// src/evaluation/validation/prompt-validator.ts
|
|
739
|
-
import { readFile as
|
|
605
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
740
606
|
|
|
741
607
|
// src/evaluation/template-variables.ts
|
|
742
608
|
var TEMPLATE_VARIABLES = {
|
|
@@ -756,10 +622,10 @@ var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
|
756
622
|
]);
|
|
757
623
|
|
|
758
624
|
// src/evaluation/validation/prompt-validator.ts
|
|
759
|
-
var
|
|
760
|
-
var
|
|
625
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
626
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
761
627
|
async function validateCustomPromptContent(promptPath) {
|
|
762
|
-
const content = await
|
|
628
|
+
const content = await readFile3(promptPath, "utf8");
|
|
763
629
|
validateTemplateVariables(content, promptPath);
|
|
764
630
|
}
|
|
765
631
|
function validateTemplateVariables(content, source) {
|
|
@@ -786,16 +652,16 @@ function validateTemplateVariables(content, source) {
|
|
|
786
652
|
);
|
|
787
653
|
}
|
|
788
654
|
if (invalidVariables.length > 0) {
|
|
789
|
-
const warningMessage = `${
|
|
655
|
+
const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
|
|
790
656
|
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
791
|
-
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${
|
|
657
|
+
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET3}`;
|
|
792
658
|
console.warn(warningMessage);
|
|
793
659
|
}
|
|
794
660
|
}
|
|
795
661
|
|
|
796
662
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
797
|
-
var
|
|
798
|
-
var
|
|
663
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
664
|
+
var ANSI_RESET4 = "\x1B[0m";
|
|
799
665
|
function normalizeEvaluatorType(type) {
|
|
800
666
|
return type.replace(/_/g, "-");
|
|
801
667
|
}
|
|
@@ -897,7 +763,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
897
763
|
let command;
|
|
898
764
|
if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
|
|
899
765
|
console.warn(
|
|
900
|
-
`${
|
|
766
|
+
`${ANSI_YELLOW3}Warning: 'script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'command' instead.${ANSI_RESET4}`
|
|
901
767
|
);
|
|
902
768
|
}
|
|
903
769
|
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
@@ -923,9 +789,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
923
789
|
const cwd = asString(rawEvaluator.cwd);
|
|
924
790
|
let resolvedCwd;
|
|
925
791
|
if (cwd) {
|
|
926
|
-
const resolved = await
|
|
792
|
+
const resolved = await resolveFileReference2(cwd, searchRoots);
|
|
927
793
|
if (resolved.resolvedPath) {
|
|
928
|
-
resolvedCwd =
|
|
794
|
+
resolvedCwd = path4.resolve(resolved.resolvedPath);
|
|
929
795
|
} else {
|
|
930
796
|
logWarning2(
|
|
931
797
|
`Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -1081,9 +947,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1081
947
|
const aggregatorPrompt = asString(rawAggregator.prompt);
|
|
1082
948
|
let promptPath2;
|
|
1083
949
|
if (aggregatorPrompt) {
|
|
1084
|
-
const resolved = await
|
|
950
|
+
const resolved = await resolveFileReference2(aggregatorPrompt, searchRoots);
|
|
1085
951
|
if (resolved.resolvedPath) {
|
|
1086
|
-
promptPath2 =
|
|
952
|
+
promptPath2 = path4.resolve(resolved.resolvedPath);
|
|
1087
953
|
}
|
|
1088
954
|
}
|
|
1089
955
|
aggregator = {
|
|
@@ -1640,7 +1506,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1640
1506
|
if (isJsonObject2(rawPrompt)) {
|
|
1641
1507
|
if (rawPrompt.script !== void 0 && rawPrompt.command === void 0) {
|
|
1642
1508
|
console.warn(
|
|
1643
|
-
`${
|
|
1509
|
+
`${ANSI_YELLOW3}Warning: 'prompt.script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET4}`
|
|
1644
1510
|
);
|
|
1645
1511
|
}
|
|
1646
1512
|
const commandArray = asStringArray(
|
|
@@ -1651,9 +1517,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1651
1517
|
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
|
|
1652
1518
|
}
|
|
1653
1519
|
const commandPath = commandArray[commandArray.length - 1];
|
|
1654
|
-
const resolved = await
|
|
1520
|
+
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
1655
1521
|
if (resolved.resolvedPath) {
|
|
1656
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
1522
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
|
|
1657
1523
|
} else {
|
|
1658
1524
|
throw new Error(
|
|
1659
1525
|
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -1664,9 +1530,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1664
1530
|
}
|
|
1665
1531
|
} else if (typeof rawPrompt === "string") {
|
|
1666
1532
|
prompt = rawPrompt;
|
|
1667
|
-
const resolved = await
|
|
1533
|
+
const resolved = await resolveFileReference2(prompt, searchRoots);
|
|
1668
1534
|
if (resolved.resolvedPath) {
|
|
1669
|
-
promptPath =
|
|
1535
|
+
promptPath = path4.resolve(resolved.resolvedPath);
|
|
1670
1536
|
try {
|
|
1671
1537
|
await validateCustomPromptContent(promptPath);
|
|
1672
1538
|
} catch (error) {
|
|
@@ -1866,10 +1732,10 @@ function warnUnconsumedCriteria(_criteria, _evaluators, _testId) {
|
|
|
1866
1732
|
function logWarning2(message, details) {
|
|
1867
1733
|
if (details && details.length > 0) {
|
|
1868
1734
|
const detailBlock = details.join("\n");
|
|
1869
|
-
console.warn(`${
|
|
1870
|
-
${detailBlock}${
|
|
1735
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}
|
|
1736
|
+
${detailBlock}${ANSI_RESET4}`);
|
|
1871
1737
|
} else {
|
|
1872
|
-
console.warn(`${
|
|
1738
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
|
|
1873
1739
|
}
|
|
1874
1740
|
}
|
|
1875
1741
|
function parseRequired(value) {
|
|
@@ -2118,14 +1984,14 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
2118
1984
|
}
|
|
2119
1985
|
|
|
2120
1986
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2121
|
-
import { readFile as
|
|
2122
|
-
import
|
|
1987
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
1988
|
+
import path6 from "node:path";
|
|
2123
1989
|
import micromatch2 from "micromatch";
|
|
2124
|
-
import { parse as
|
|
1990
|
+
import { parse as parseYaml } from "yaml";
|
|
2125
1991
|
|
|
2126
1992
|
// src/evaluation/loaders/message-processor.ts
|
|
2127
|
-
import { readFile as
|
|
2128
|
-
import
|
|
1993
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
1994
|
+
import path5 from "node:path";
|
|
2129
1995
|
|
|
2130
1996
|
// src/evaluation/formatting/segment-formatter.ts
|
|
2131
1997
|
function formatFileContents(parts) {
|
|
@@ -2188,8 +2054,8 @@ function asString2(value) {
|
|
|
2188
2054
|
}
|
|
2189
2055
|
|
|
2190
2056
|
// src/evaluation/loaders/message-processor.ts
|
|
2191
|
-
var
|
|
2192
|
-
var
|
|
2057
|
+
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2058
|
+
var ANSI_RESET5 = "\x1B[0m";
|
|
2193
2059
|
async function processMessages(options) {
|
|
2194
2060
|
const {
|
|
2195
2061
|
messages,
|
|
@@ -2233,7 +2099,7 @@ async function processMessages(options) {
|
|
|
2233
2099
|
if (!rawValue) {
|
|
2234
2100
|
continue;
|
|
2235
2101
|
}
|
|
2236
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
2102
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2237
2103
|
rawValue,
|
|
2238
2104
|
searchRoots
|
|
2239
2105
|
);
|
|
@@ -2244,7 +2110,7 @@ async function processMessages(options) {
|
|
|
2244
2110
|
continue;
|
|
2245
2111
|
}
|
|
2246
2112
|
try {
|
|
2247
|
-
const fileContent = (await
|
|
2113
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2248
2114
|
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
2249
2115
|
messageType,
|
|
2250
2116
|
resolvedPath,
|
|
@@ -2253,7 +2119,7 @@ async function processMessages(options) {
|
|
|
2253
2119
|
treatFileSegmentsAsGuidelines
|
|
2254
2120
|
});
|
|
2255
2121
|
if (classifyAsGuideline && guidelinePaths) {
|
|
2256
|
-
guidelinePaths.push(
|
|
2122
|
+
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
2257
2123
|
if (verbose) {
|
|
2258
2124
|
console.log(` [Guideline] Found: ${displayPath}`);
|
|
2259
2125
|
console.log(` Resolved to: ${resolvedPath}`);
|
|
@@ -2264,7 +2130,7 @@ async function processMessages(options) {
|
|
|
2264
2130
|
type: "file",
|
|
2265
2131
|
path: displayPath,
|
|
2266
2132
|
text: fileContent,
|
|
2267
|
-
resolvedPath:
|
|
2133
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
2268
2134
|
});
|
|
2269
2135
|
if (verbose) {
|
|
2270
2136
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -2304,7 +2170,7 @@ function shouldTreatAsGuideline(options) {
|
|
|
2304
2170
|
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2305
2171
|
return false;
|
|
2306
2172
|
}
|
|
2307
|
-
const relativeToRepo =
|
|
2173
|
+
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
2308
2174
|
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
2309
2175
|
}
|
|
2310
2176
|
function asString3(value) {
|
|
@@ -2332,10 +2198,10 @@ function cloneJsonValue(value) {
|
|
|
2332
2198
|
function logWarning3(message, details) {
|
|
2333
2199
|
if (details && details.length > 0) {
|
|
2334
2200
|
const detailBlock = details.join("\n");
|
|
2335
|
-
console.warn(`${
|
|
2336
|
-
${detailBlock}${
|
|
2201
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}
|
|
2202
|
+
${detailBlock}${ANSI_RESET5}`);
|
|
2337
2203
|
} else {
|
|
2338
|
-
console.warn(`${
|
|
2204
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
|
|
2339
2205
|
}
|
|
2340
2206
|
}
|
|
2341
2207
|
async function processExpectedMessages(options) {
|
|
@@ -2364,7 +2230,7 @@ async function processExpectedMessages(options) {
|
|
|
2364
2230
|
if (!rawValue) {
|
|
2365
2231
|
continue;
|
|
2366
2232
|
}
|
|
2367
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
2233
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2368
2234
|
rawValue,
|
|
2369
2235
|
searchRoots
|
|
2370
2236
|
);
|
|
@@ -2374,12 +2240,12 @@ async function processExpectedMessages(options) {
|
|
|
2374
2240
|
continue;
|
|
2375
2241
|
}
|
|
2376
2242
|
try {
|
|
2377
|
-
const fileContent = (await
|
|
2243
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2378
2244
|
processedContent.push({
|
|
2379
2245
|
type: "file",
|
|
2380
2246
|
path: displayPath,
|
|
2381
2247
|
text: fileContent,
|
|
2382
|
-
resolvedPath:
|
|
2248
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
2383
2249
|
});
|
|
2384
2250
|
if (verbose) {
|
|
2385
2251
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -2476,11 +2342,11 @@ function resolveExpectedMessages(raw) {
|
|
|
2476
2342
|
}
|
|
2477
2343
|
|
|
2478
2344
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2479
|
-
var
|
|
2345
|
+
var ANSI_YELLOW5 = "\x1B[33m";
|
|
2480
2346
|
var ANSI_RED2 = "\x1B[31m";
|
|
2481
|
-
var
|
|
2347
|
+
var ANSI_RESET6 = "\x1B[0m";
|
|
2482
2348
|
function detectFormat(filePath) {
|
|
2483
|
-
const ext =
|
|
2349
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
2484
2350
|
if (ext === ".jsonl") return "jsonl";
|
|
2485
2351
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
2486
2352
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -2489,9 +2355,9 @@ function detectFormat(filePath) {
|
|
|
2489
2355
|
);
|
|
2490
2356
|
}
|
|
2491
2357
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
2492
|
-
const dir =
|
|
2493
|
-
const base =
|
|
2494
|
-
const sidecarPath =
|
|
2358
|
+
const dir = path6.dirname(jsonlPath);
|
|
2359
|
+
const base = path6.basename(jsonlPath, ".jsonl");
|
|
2360
|
+
const sidecarPath = path6.join(dir, `${base}.yaml`);
|
|
2495
2361
|
if (!await fileExists2(sidecarPath)) {
|
|
2496
2362
|
if (verbose) {
|
|
2497
2363
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -2499,8 +2365,8 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
2499
2365
|
return {};
|
|
2500
2366
|
}
|
|
2501
2367
|
try {
|
|
2502
|
-
const content = await
|
|
2503
|
-
const parsed = interpolateEnv(
|
|
2368
|
+
const content = await readFile5(sidecarPath, "utf8");
|
|
2369
|
+
const parsed = interpolateEnv(parseYaml(content), process.env);
|
|
2504
2370
|
if (!isJsonObject(parsed)) {
|
|
2505
2371
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
2506
2372
|
return {};
|
|
@@ -2540,15 +2406,15 @@ function parseJsonlContent(content, filePath) {
|
|
|
2540
2406
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
2541
2407
|
const verbose = options?.verbose ?? false;
|
|
2542
2408
|
const filterPattern = options?.filter;
|
|
2543
|
-
const absoluteTestPath =
|
|
2409
|
+
const absoluteTestPath = path6.resolve(evalFilePath);
|
|
2544
2410
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2545
2411
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2546
2412
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
2547
2413
|
const guidelinePatterns = config?.guideline_patterns;
|
|
2548
2414
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
2549
|
-
const rawFile = await
|
|
2415
|
+
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
2550
2416
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
2551
|
-
const fallbackDataset =
|
|
2417
|
+
const fallbackDataset = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
2552
2418
|
const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
|
|
2553
2419
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
2554
2420
|
const globalExecution = sidecar.execution;
|
|
@@ -2647,7 +2513,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2647
2513
|
}
|
|
2648
2514
|
}
|
|
2649
2515
|
const allFilePaths = [
|
|
2650
|
-
...guidelinePaths.map((guidelinePath) =>
|
|
2516
|
+
...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
|
|
2651
2517
|
...userFilePaths
|
|
2652
2518
|
];
|
|
2653
2519
|
const testCase = {
|
|
@@ -2659,7 +2525,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2659
2525
|
input_segments: inputSegments,
|
|
2660
2526
|
expected_output: outputSegments,
|
|
2661
2527
|
reference_answer: referenceAnswer,
|
|
2662
|
-
guideline_paths: guidelinePaths.map((guidelinePath) =>
|
|
2528
|
+
guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
|
|
2663
2529
|
guideline_patterns: guidelinePatterns,
|
|
2664
2530
|
file_paths: allFilePaths,
|
|
2665
2531
|
criteria: outcome ?? "",
|
|
@@ -2690,19 +2556,19 @@ function asString4(value) {
|
|
|
2690
2556
|
function logWarning4(message, details) {
|
|
2691
2557
|
if (details && details.length > 0) {
|
|
2692
2558
|
const detailBlock = details.join("\n");
|
|
2693
|
-
console.warn(`${
|
|
2694
|
-
${detailBlock}${
|
|
2559
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}
|
|
2560
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
2695
2561
|
} else {
|
|
2696
|
-
console.warn(`${
|
|
2562
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET6}`);
|
|
2697
2563
|
}
|
|
2698
2564
|
}
|
|
2699
2565
|
function logError2(message, details) {
|
|
2700
2566
|
if (details && details.length > 0) {
|
|
2701
2567
|
const detailBlock = details.join("\n");
|
|
2702
2568
|
console.error(`${ANSI_RED2}Error: ${message}
|
|
2703
|
-
${detailBlock}${
|
|
2569
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
2704
2570
|
} else {
|
|
2705
|
-
console.error(`${ANSI_RED2}Error: ${message}${
|
|
2571
|
+
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET6}`);
|
|
2706
2572
|
}
|
|
2707
2573
|
}
|
|
2708
2574
|
|
|
@@ -2737,24 +2603,24 @@ function parseMetadata(suite) {
|
|
|
2737
2603
|
}
|
|
2738
2604
|
|
|
2739
2605
|
// src/evaluation/formatting/prompt-builder.ts
|
|
2740
|
-
import { readFile as
|
|
2741
|
-
import
|
|
2742
|
-
var
|
|
2743
|
-
var
|
|
2606
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
2607
|
+
import path7 from "node:path";
|
|
2608
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
2609
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
2744
2610
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
2745
2611
|
const guidelineParts = [];
|
|
2746
2612
|
for (const rawPath of testCase.guideline_paths) {
|
|
2747
|
-
const absolutePath =
|
|
2613
|
+
const absolutePath = path7.resolve(rawPath);
|
|
2748
2614
|
if (!await fileExists2(absolutePath)) {
|
|
2749
2615
|
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
2750
2616
|
continue;
|
|
2751
2617
|
}
|
|
2752
2618
|
try {
|
|
2753
|
-
const content = (await
|
|
2619
|
+
const content = (await readFile6(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
2754
2620
|
guidelineParts.push({
|
|
2755
2621
|
content,
|
|
2756
2622
|
isFile: true,
|
|
2757
|
-
displayPath:
|
|
2623
|
+
displayPath: path7.basename(absolutePath)
|
|
2758
2624
|
});
|
|
2759
2625
|
} catch (error) {
|
|
2760
2626
|
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
@@ -2952,13 +2818,13 @@ function asString5(value) {
|
|
|
2952
2818
|
return typeof value === "string" ? value : void 0;
|
|
2953
2819
|
}
|
|
2954
2820
|
function logWarning5(message) {
|
|
2955
|
-
console.warn(`${
|
|
2821
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
2956
2822
|
}
|
|
2957
2823
|
|
|
2958
2824
|
// src/evaluation/yaml-parser.ts
|
|
2959
|
-
var
|
|
2825
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
2960
2826
|
var ANSI_RED3 = "\x1B[31m";
|
|
2961
|
-
var
|
|
2827
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
2962
2828
|
function resolveTests(suite) {
|
|
2963
2829
|
if (suite.tests !== void 0) return suite.tests;
|
|
2964
2830
|
if (suite.eval_cases !== void 0) {
|
|
@@ -2973,8 +2839,8 @@ function resolveTests(suite) {
|
|
|
2973
2839
|
}
|
|
2974
2840
|
async function readTestSuiteMetadata(testFilePath) {
|
|
2975
2841
|
try {
|
|
2976
|
-
const absolutePath =
|
|
2977
|
-
const content = await
|
|
2842
|
+
const absolutePath = path8.resolve(testFilePath);
|
|
2843
|
+
const content = await readFile7(absolutePath, "utf8");
|
|
2978
2844
|
const parsed = interpolateEnv(parse2(content), process.env);
|
|
2979
2845
|
if (!isJsonObject(parsed)) {
|
|
2980
2846
|
return {};
|
|
@@ -3025,26 +2891,26 @@ var loadEvalCases = loadTests;
|
|
|
3025
2891
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
3026
2892
|
const verbose = options?.verbose ?? false;
|
|
3027
2893
|
const filterPattern = options?.filter;
|
|
3028
|
-
const absoluteTestPath =
|
|
2894
|
+
const absoluteTestPath = path8.resolve(evalFilePath);
|
|
3029
2895
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
3030
2896
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
3031
2897
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
3032
2898
|
const guidelinePatterns = config?.guideline_patterns;
|
|
3033
|
-
const rawFile = await
|
|
2899
|
+
const rawFile = await readFile7(absoluteTestPath, "utf8");
|
|
3034
2900
|
const interpolated = interpolateEnv(parse2(rawFile), process.env);
|
|
3035
2901
|
if (!isJsonObject(interpolated)) {
|
|
3036
2902
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
3037
2903
|
}
|
|
3038
2904
|
const suite = interpolated;
|
|
3039
2905
|
const datasetNameFromSuite = asString6(suite.dataset)?.trim();
|
|
3040
|
-
const fallbackDataset =
|
|
2906
|
+
const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
3041
2907
|
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
3042
2908
|
const rawTestcases = resolveTests(suite);
|
|
3043
2909
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
3044
|
-
const evalFileDir =
|
|
2910
|
+
const evalFileDir = path8.dirname(absoluteTestPath);
|
|
3045
2911
|
let expandedTestcases;
|
|
3046
2912
|
if (typeof rawTestcases === "string") {
|
|
3047
|
-
const externalPath =
|
|
2913
|
+
const externalPath = path8.resolve(evalFileDir, rawTestcases);
|
|
3048
2914
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
3049
2915
|
} else if (Array.isArray(rawTestcases)) {
|
|
3050
2916
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -3165,7 +3031,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3165
3031
|
}
|
|
3166
3032
|
}
|
|
3167
3033
|
const allFilePaths = [
|
|
3168
|
-
...guidelinePaths.map((guidelinePath) =>
|
|
3034
|
+
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
3169
3035
|
...userFilePaths
|
|
3170
3036
|
];
|
|
3171
3037
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
@@ -3181,7 +3047,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3181
3047
|
input_segments: inputSegments,
|
|
3182
3048
|
expected_output: outputSegments,
|
|
3183
3049
|
reference_answer: referenceAnswer,
|
|
3184
|
-
guideline_paths: guidelinePaths.map((guidelinePath) =>
|
|
3050
|
+
guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
3185
3051
|
guideline_patterns: guidelinePatterns,
|
|
3186
3052
|
file_paths: allFilePaths,
|
|
3187
3053
|
criteria: outcome ?? "",
|
|
@@ -3231,8 +3097,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3231
3097
|
if (commandArr.length === 0) return void 0;
|
|
3232
3098
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
3233
3099
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
3234
|
-
if (cwd && !
|
|
3235
|
-
cwd =
|
|
3100
|
+
if (cwd && !path8.isAbsolute(cwd)) {
|
|
3101
|
+
cwd = path8.resolve(evalFileDir, cwd);
|
|
3236
3102
|
}
|
|
3237
3103
|
const config = { command: commandArr };
|
|
3238
3104
|
if (timeoutMs !== void 0) {
|
|
@@ -3322,10 +3188,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
3322
3188
|
}
|
|
3323
3189
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3324
3190
|
if (typeof raw === "string") {
|
|
3325
|
-
const workspaceFilePath =
|
|
3191
|
+
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
3326
3192
|
let content;
|
|
3327
3193
|
try {
|
|
3328
|
-
content = await
|
|
3194
|
+
content = await readFile7(workspaceFilePath, "utf8");
|
|
3329
3195
|
} catch {
|
|
3330
3196
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3331
3197
|
}
|
|
@@ -3335,7 +3201,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
3335
3201
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
3336
3202
|
);
|
|
3337
3203
|
}
|
|
3338
|
-
const workspaceFileDir =
|
|
3204
|
+
const workspaceFileDir = path8.dirname(workspaceFilePath);
|
|
3339
3205
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
3340
3206
|
}
|
|
3341
3207
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -3355,8 +3221,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3355
3221
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
3356
3222
|
}
|
|
3357
3223
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
3358
|
-
if (template && !
|
|
3359
|
-
template =
|
|
3224
|
+
if (template && !path8.isAbsolute(template)) {
|
|
3225
|
+
template = path8.resolve(evalFileDir, template);
|
|
3360
3226
|
}
|
|
3361
3227
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
3362
3228
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -3409,25 +3275,25 @@ function asString6(value) {
|
|
|
3409
3275
|
function logWarning6(message, details) {
|
|
3410
3276
|
if (details && details.length > 0) {
|
|
3411
3277
|
const detailBlock = details.join("\n");
|
|
3412
|
-
console.warn(`${
|
|
3413
|
-
${detailBlock}${
|
|
3278
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}
|
|
3279
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
3414
3280
|
} else {
|
|
3415
|
-
console.warn(`${
|
|
3281
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
3416
3282
|
}
|
|
3417
3283
|
}
|
|
3418
3284
|
function logError3(message, details) {
|
|
3419
3285
|
if (details && details.length > 0) {
|
|
3420
3286
|
const detailBlock = details.join("\n");
|
|
3421
3287
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
3422
|
-
${detailBlock}${
|
|
3288
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
3423
3289
|
} else {
|
|
3424
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
3290
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET8}`);
|
|
3425
3291
|
}
|
|
3426
3292
|
}
|
|
3427
3293
|
|
|
3428
3294
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
3429
3295
|
import { readFileSync } from "node:fs";
|
|
3430
|
-
import
|
|
3296
|
+
import path9 from "node:path";
|
|
3431
3297
|
import { parse as parse3 } from "yaml";
|
|
3432
3298
|
function codeGraderInstruction(graderName, description) {
|
|
3433
3299
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -3672,7 +3538,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
3672
3538
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
3673
3539
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
3674
3540
|
const parsed = parse3(content);
|
|
3675
|
-
return transpileEvalYaml(parsed,
|
|
3541
|
+
return transpileEvalYaml(parsed, path9.basename(evalYamlPath));
|
|
3676
3542
|
}
|
|
3677
3543
|
function getOutputFilenames(result) {
|
|
3678
3544
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -4113,7 +3979,7 @@ import { spawn } from "node:child_process";
|
|
|
4113
3979
|
import { randomUUID } from "node:crypto";
|
|
4114
3980
|
import { createWriteStream } from "node:fs";
|
|
4115
3981
|
import { mkdir } from "node:fs/promises";
|
|
4116
|
-
import
|
|
3982
|
+
import path11 from "node:path";
|
|
4117
3983
|
|
|
4118
3984
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
4119
3985
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
@@ -4169,7 +4035,7 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
4169
4035
|
}
|
|
4170
4036
|
|
|
4171
4037
|
// src/evaluation/providers/preread.ts
|
|
4172
|
-
import
|
|
4038
|
+
import path10 from "node:path";
|
|
4173
4039
|
function buildPromptDocument(request, inputFiles, options) {
|
|
4174
4040
|
const parts = [];
|
|
4175
4041
|
const guidelineFiles = collectGuidelineFiles(
|
|
@@ -4192,7 +4058,7 @@ function normalizeInputFiles(inputFiles) {
|
|
|
4192
4058
|
}
|
|
4193
4059
|
const deduped = /* @__PURE__ */ new Map();
|
|
4194
4060
|
for (const inputFile of inputFiles) {
|
|
4195
|
-
const absolutePath =
|
|
4061
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4196
4062
|
if (!deduped.has(absolutePath)) {
|
|
4197
4063
|
deduped.set(absolutePath, absolutePath);
|
|
4198
4064
|
}
|
|
@@ -4205,14 +4071,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
|
4205
4071
|
}
|
|
4206
4072
|
const unique = /* @__PURE__ */ new Map();
|
|
4207
4073
|
for (const inputFile of inputFiles) {
|
|
4208
|
-
const absolutePath =
|
|
4074
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4209
4075
|
if (overrides?.has(absolutePath)) {
|
|
4210
4076
|
if (!unique.has(absolutePath)) {
|
|
4211
4077
|
unique.set(absolutePath, absolutePath);
|
|
4212
4078
|
}
|
|
4213
4079
|
continue;
|
|
4214
4080
|
}
|
|
4215
|
-
const normalized = absolutePath.split(
|
|
4081
|
+
const normalized = absolutePath.split(path10.sep).join("/");
|
|
4216
4082
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
4217
4083
|
if (!unique.has(absolutePath)) {
|
|
4218
4084
|
unique.set(absolutePath, absolutePath);
|
|
@@ -4227,7 +4093,7 @@ function collectInputFiles(inputFiles) {
|
|
|
4227
4093
|
}
|
|
4228
4094
|
const unique = /* @__PURE__ */ new Map();
|
|
4229
4095
|
for (const inputFile of inputFiles) {
|
|
4230
|
-
const absolutePath =
|
|
4096
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4231
4097
|
if (!unique.has(absolutePath)) {
|
|
4232
4098
|
unique.set(absolutePath, absolutePath);
|
|
4233
4099
|
}
|
|
@@ -4239,7 +4105,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
|
|
|
4239
4105
|
return "";
|
|
4240
4106
|
}
|
|
4241
4107
|
const buildList = (files) => files.map((absolutePath) => {
|
|
4242
|
-
const fileName =
|
|
4108
|
+
const fileName = path10.basename(absolutePath);
|
|
4243
4109
|
const fileUri = pathToFileUri(absolutePath);
|
|
4244
4110
|
return `* [${fileName}](${fileUri})`;
|
|
4245
4111
|
});
|
|
@@ -4259,7 +4125,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
4259
4125
|
return sections.join("\n");
|
|
4260
4126
|
}
|
|
4261
4127
|
function pathToFileUri(filePath) {
|
|
4262
|
-
const absolutePath =
|
|
4128
|
+
const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
|
|
4263
4129
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
4264
4130
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
4265
4131
|
return `file:///${normalizedPath}`;
|
|
@@ -4406,10 +4272,10 @@ var ClaudeCliProvider = class {
|
|
|
4406
4272
|
}
|
|
4407
4273
|
resolveCwd(cwdOverride) {
|
|
4408
4274
|
if (cwdOverride) {
|
|
4409
|
-
return
|
|
4275
|
+
return path11.resolve(cwdOverride);
|
|
4410
4276
|
}
|
|
4411
4277
|
if (this.config.cwd) {
|
|
4412
|
-
return
|
|
4278
|
+
return path11.resolve(this.config.cwd);
|
|
4413
4279
|
}
|
|
4414
4280
|
return void 0;
|
|
4415
4281
|
}
|
|
@@ -4419,9 +4285,9 @@ var ClaudeCliProvider = class {
|
|
|
4419
4285
|
return void 0;
|
|
4420
4286
|
}
|
|
4421
4287
|
if (this.config.logDir) {
|
|
4422
|
-
return
|
|
4288
|
+
return path11.resolve(this.config.logDir);
|
|
4423
4289
|
}
|
|
4424
|
-
return
|
|
4290
|
+
return path11.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
4425
4291
|
}
|
|
4426
4292
|
async createStreamLogger(request) {
|
|
4427
4293
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4435,7 +4301,7 @@ var ClaudeCliProvider = class {
|
|
|
4435
4301
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
4436
4302
|
return void 0;
|
|
4437
4303
|
}
|
|
4438
|
-
const filePath =
|
|
4304
|
+
const filePath = path11.join(logDir, buildLogFilename(request, this.targetName));
|
|
4439
4305
|
try {
|
|
4440
4306
|
const logger = await ClaudeCliStreamLogger.create({
|
|
4441
4307
|
filePath,
|
|
@@ -4756,7 +4622,7 @@ function tryParseJson(line) {
|
|
|
4756
4622
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
4757
4623
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
4758
4624
|
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
4759
|
-
import
|
|
4625
|
+
import path12 from "node:path";
|
|
4760
4626
|
var claudeSdkModule = null;
|
|
4761
4627
|
async function loadClaudeSdk() {
|
|
4762
4628
|
if (!claudeSdkModule) {
|
|
@@ -4916,10 +4782,10 @@ var ClaudeSdkProvider = class {
|
|
|
4916
4782
|
}
|
|
4917
4783
|
resolveCwd(cwdOverride) {
|
|
4918
4784
|
if (cwdOverride) {
|
|
4919
|
-
return
|
|
4785
|
+
return path12.resolve(cwdOverride);
|
|
4920
4786
|
}
|
|
4921
4787
|
if (this.config.cwd) {
|
|
4922
|
-
return
|
|
4788
|
+
return path12.resolve(this.config.cwd);
|
|
4923
4789
|
}
|
|
4924
4790
|
return void 0;
|
|
4925
4791
|
}
|
|
@@ -4929,9 +4795,9 @@ var ClaudeSdkProvider = class {
|
|
|
4929
4795
|
return void 0;
|
|
4930
4796
|
}
|
|
4931
4797
|
if (this.config.logDir) {
|
|
4932
|
-
return
|
|
4798
|
+
return path12.resolve(this.config.logDir);
|
|
4933
4799
|
}
|
|
4934
|
-
return
|
|
4800
|
+
return path12.join(process.cwd(), ".agentv", "logs", "claude");
|
|
4935
4801
|
}
|
|
4936
4802
|
async createStreamLogger(request) {
|
|
4937
4803
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4945,7 +4811,7 @@ var ClaudeSdkProvider = class {
|
|
|
4945
4811
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
4946
4812
|
return void 0;
|
|
4947
4813
|
}
|
|
4948
|
-
const filePath =
|
|
4814
|
+
const filePath = path12.join(logDir, buildLogFilename2(request, this.targetName));
|
|
4949
4815
|
try {
|
|
4950
4816
|
const logger = await ClaudeStreamLogger.create({
|
|
4951
4817
|
filePath,
|
|
@@ -5152,7 +5018,7 @@ function formatElapsed2(startedAt) {
|
|
|
5152
5018
|
import { exec as execWithCallback } from "node:child_process";
|
|
5153
5019
|
import fs from "node:fs/promises";
|
|
5154
5020
|
import os from "node:os";
|
|
5155
|
-
import
|
|
5021
|
+
import path13 from "node:path";
|
|
5156
5022
|
import { promisify } from "node:util";
|
|
5157
5023
|
import { z as z2 } from "zod";
|
|
5158
5024
|
var ToolCallSchema = z2.object({
|
|
@@ -5657,7 +5523,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
5657
5523
|
}
|
|
5658
5524
|
const unique = /* @__PURE__ */ new Map();
|
|
5659
5525
|
for (const inputFile of inputFiles) {
|
|
5660
|
-
const absolutePath =
|
|
5526
|
+
const absolutePath = path13.resolve(inputFile);
|
|
5661
5527
|
if (!unique.has(absolutePath)) {
|
|
5662
5528
|
unique.set(absolutePath, absolutePath);
|
|
5663
5529
|
}
|
|
@@ -5671,7 +5537,7 @@ function formatFileList(files, template) {
|
|
|
5671
5537
|
const formatter = template ?? "{path}";
|
|
5672
5538
|
return files.map((filePath) => {
|
|
5673
5539
|
const escapedPath = shellEscape(filePath);
|
|
5674
|
-
const escapedName = shellEscape(
|
|
5540
|
+
const escapedName = shellEscape(path13.basename(filePath));
|
|
5675
5541
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
5676
5542
|
}).join(" ");
|
|
5677
5543
|
}
|
|
@@ -5695,7 +5561,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
5695
5561
|
const safeEvalId = evalCaseId || "unknown";
|
|
5696
5562
|
const timestamp = Date.now();
|
|
5697
5563
|
const random = Math.random().toString(36).substring(2, 9);
|
|
5698
|
-
return
|
|
5564
|
+
return path13.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
5699
5565
|
}
|
|
5700
5566
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
5701
5567
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -5709,7 +5575,7 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
5709
5575
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
5710
5576
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
5711
5577
|
import { mkdir as mkdir3 } from "node:fs/promises";
|
|
5712
|
-
import
|
|
5578
|
+
import path14 from "node:path";
|
|
5713
5579
|
|
|
5714
5580
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
5715
5581
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -5944,10 +5810,10 @@ ${basePrompt}` : basePrompt;
|
|
|
5944
5810
|
}
|
|
5945
5811
|
resolveCwd(cwdOverride) {
|
|
5946
5812
|
if (cwdOverride) {
|
|
5947
|
-
return
|
|
5813
|
+
return path14.resolve(cwdOverride);
|
|
5948
5814
|
}
|
|
5949
5815
|
if (this.config.cwd) {
|
|
5950
|
-
return
|
|
5816
|
+
return path14.resolve(this.config.cwd);
|
|
5951
5817
|
}
|
|
5952
5818
|
return void 0;
|
|
5953
5819
|
}
|
|
@@ -5957,9 +5823,9 @@ ${basePrompt}` : basePrompt;
|
|
|
5957
5823
|
return void 0;
|
|
5958
5824
|
}
|
|
5959
5825
|
if (this.config.logDir) {
|
|
5960
|
-
return
|
|
5826
|
+
return path14.resolve(this.config.logDir);
|
|
5961
5827
|
}
|
|
5962
|
-
return
|
|
5828
|
+
return path14.join(process.cwd(), ".agentv", "logs", "codex");
|
|
5963
5829
|
}
|
|
5964
5830
|
async createStreamLogger(request) {
|
|
5965
5831
|
const logDir = this.resolveLogDirectory();
|
|
@@ -5973,7 +5839,7 @@ ${basePrompt}` : basePrompt;
|
|
|
5973
5839
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
5974
5840
|
return void 0;
|
|
5975
5841
|
}
|
|
5976
|
-
const filePath =
|
|
5842
|
+
const filePath = path14.join(logDir, buildLogFilename3(request, this.targetName));
|
|
5977
5843
|
try {
|
|
5978
5844
|
const logger = await CodexSdkStreamLogger.create({
|
|
5979
5845
|
filePath,
|
|
@@ -6117,7 +5983,7 @@ function formatElapsed3(startedAt) {
|
|
|
6117
5983
|
// src/evaluation/providers/copilot-cli.ts
|
|
6118
5984
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
6119
5985
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
6120
|
-
import
|
|
5986
|
+
import path16 from "node:path";
|
|
6121
5987
|
import { Readable, Writable } from "node:stream";
|
|
6122
5988
|
import { spawn as spawn2 } from "node:child_process";
|
|
6123
5989
|
import * as acp from "@agentclientprotocol/sdk";
|
|
@@ -6179,7 +6045,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
6179
6045
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
6180
6046
|
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
6181
6047
|
import { arch, platform } from "node:os";
|
|
6182
|
-
import
|
|
6048
|
+
import path15 from "node:path";
|
|
6183
6049
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
6184
6050
|
function resolvePlatformCliPath() {
|
|
6185
6051
|
const os3 = platform();
|
|
@@ -6203,7 +6069,7 @@ function resolvePlatformCliPath() {
|
|
|
6203
6069
|
try {
|
|
6204
6070
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6205
6071
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
6206
|
-
const binaryPath =
|
|
6072
|
+
const binaryPath = path15.join(path15.dirname(packageJsonPath), binaryName);
|
|
6207
6073
|
if (existsSync(binaryPath)) {
|
|
6208
6074
|
return binaryPath;
|
|
6209
6075
|
}
|
|
@@ -6211,7 +6077,7 @@ function resolvePlatformCliPath() {
|
|
|
6211
6077
|
}
|
|
6212
6078
|
let searchDir = process.cwd();
|
|
6213
6079
|
for (let i = 0; i < 10; i++) {
|
|
6214
|
-
const standardPath =
|
|
6080
|
+
const standardPath = path15.join(
|
|
6215
6081
|
searchDir,
|
|
6216
6082
|
"node_modules",
|
|
6217
6083
|
...packageName.split("/"),
|
|
@@ -6220,13 +6086,13 @@ function resolvePlatformCliPath() {
|
|
|
6220
6086
|
if (existsSync(standardPath)) {
|
|
6221
6087
|
return standardPath;
|
|
6222
6088
|
}
|
|
6223
|
-
const bunDir =
|
|
6089
|
+
const bunDir = path15.join(searchDir, "node_modules", ".bun");
|
|
6224
6090
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
6225
6091
|
try {
|
|
6226
6092
|
const entries = readdirSync(bunDir);
|
|
6227
6093
|
for (const entry of entries) {
|
|
6228
6094
|
if (entry.startsWith(prefix)) {
|
|
6229
|
-
const candidate =
|
|
6095
|
+
const candidate = path15.join(
|
|
6230
6096
|
bunDir,
|
|
6231
6097
|
entry,
|
|
6232
6098
|
"node_modules",
|
|
@@ -6241,7 +6107,7 @@ function resolvePlatformCliPath() {
|
|
|
6241
6107
|
}
|
|
6242
6108
|
} catch {
|
|
6243
6109
|
}
|
|
6244
|
-
const parent =
|
|
6110
|
+
const parent = path15.dirname(searchDir);
|
|
6245
6111
|
if (parent === searchDir) break;
|
|
6246
6112
|
searchDir = parent;
|
|
6247
6113
|
}
|
|
@@ -6579,10 +6445,10 @@ var CopilotCliProvider = class {
|
|
|
6579
6445
|
}
|
|
6580
6446
|
resolveCwd(cwdOverride) {
|
|
6581
6447
|
if (cwdOverride) {
|
|
6582
|
-
return
|
|
6448
|
+
return path16.resolve(cwdOverride);
|
|
6583
6449
|
}
|
|
6584
6450
|
if (this.config.cwd) {
|
|
6585
|
-
return
|
|
6451
|
+
return path16.resolve(this.config.cwd);
|
|
6586
6452
|
}
|
|
6587
6453
|
return void 0;
|
|
6588
6454
|
}
|
|
@@ -6601,9 +6467,9 @@ var CopilotCliProvider = class {
|
|
|
6601
6467
|
return void 0;
|
|
6602
6468
|
}
|
|
6603
6469
|
if (this.config.logDir) {
|
|
6604
|
-
return
|
|
6470
|
+
return path16.resolve(this.config.logDir);
|
|
6605
6471
|
}
|
|
6606
|
-
return
|
|
6472
|
+
return path16.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
6607
6473
|
}
|
|
6608
6474
|
async createStreamLogger(request) {
|
|
6609
6475
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6617,7 +6483,7 @@ var CopilotCliProvider = class {
|
|
|
6617
6483
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6618
6484
|
return void 0;
|
|
6619
6485
|
}
|
|
6620
|
-
const filePath =
|
|
6486
|
+
const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
6621
6487
|
try {
|
|
6622
6488
|
const logger = await CopilotStreamLogger.create(
|
|
6623
6489
|
{
|
|
@@ -6712,7 +6578,7 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
6712
6578
|
// src/evaluation/providers/copilot-sdk.ts
|
|
6713
6579
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
6714
6580
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
6715
|
-
import
|
|
6581
|
+
import path17 from "node:path";
|
|
6716
6582
|
|
|
6717
6583
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
6718
6584
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -6991,10 +6857,10 @@ var CopilotSdkProvider = class {
|
|
|
6991
6857
|
}
|
|
6992
6858
|
resolveCwd(cwdOverride) {
|
|
6993
6859
|
if (cwdOverride) {
|
|
6994
|
-
return
|
|
6860
|
+
return path17.resolve(cwdOverride);
|
|
6995
6861
|
}
|
|
6996
6862
|
if (this.config.cwd) {
|
|
6997
|
-
return
|
|
6863
|
+
return path17.resolve(this.config.cwd);
|
|
6998
6864
|
}
|
|
6999
6865
|
return void 0;
|
|
7000
6866
|
}
|
|
@@ -7003,9 +6869,9 @@ var CopilotSdkProvider = class {
|
|
|
7003
6869
|
return void 0;
|
|
7004
6870
|
}
|
|
7005
6871
|
if (this.config.logDir) {
|
|
7006
|
-
return
|
|
6872
|
+
return path17.resolve(this.config.logDir);
|
|
7007
6873
|
}
|
|
7008
|
-
return
|
|
6874
|
+
return path17.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
7009
6875
|
}
|
|
7010
6876
|
async createStreamLogger(request) {
|
|
7011
6877
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7019,7 +6885,7 @@ var CopilotSdkProvider = class {
|
|
|
7019
6885
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7020
6886
|
return void 0;
|
|
7021
6887
|
}
|
|
7022
|
-
const filePath =
|
|
6888
|
+
const filePath = path17.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
7023
6889
|
try {
|
|
7024
6890
|
const logger = await CopilotStreamLogger.create(
|
|
7025
6891
|
{
|
|
@@ -7375,7 +7241,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
7375
7241
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
7376
7242
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7377
7243
|
import { tmpdir } from "node:os";
|
|
7378
|
-
import
|
|
7244
|
+
import path18 from "node:path";
|
|
7379
7245
|
|
|
7380
7246
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
7381
7247
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -7456,7 +7322,7 @@ var PiCodingAgentProvider = class {
|
|
|
7456
7322
|
const workspaceRoot = await this.createWorkspace();
|
|
7457
7323
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
7458
7324
|
try {
|
|
7459
|
-
const promptFile =
|
|
7325
|
+
const promptFile = path18.join(workspaceRoot, PROMPT_FILENAME);
|
|
7460
7326
|
await writeFile(promptFile, request.question, "utf8");
|
|
7461
7327
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
7462
7328
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
@@ -7518,12 +7384,12 @@ var PiCodingAgentProvider = class {
|
|
|
7518
7384
|
}
|
|
7519
7385
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
7520
7386
|
if (cwdOverride) {
|
|
7521
|
-
return
|
|
7387
|
+
return path18.resolve(cwdOverride);
|
|
7522
7388
|
}
|
|
7523
7389
|
if (!this.config.cwd) {
|
|
7524
7390
|
return workspaceRoot;
|
|
7525
7391
|
}
|
|
7526
|
-
return
|
|
7392
|
+
return path18.resolve(this.config.cwd);
|
|
7527
7393
|
}
|
|
7528
7394
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
7529
7395
|
const args = [];
|
|
@@ -7612,7 +7478,7 @@ ${prompt}` : prompt;
|
|
|
7612
7478
|
return env;
|
|
7613
7479
|
}
|
|
7614
7480
|
async createWorkspace() {
|
|
7615
|
-
return await mkdtemp(
|
|
7481
|
+
return await mkdtemp(path18.join(tmpdir(), WORKSPACE_PREFIX));
|
|
7616
7482
|
}
|
|
7617
7483
|
async cleanupWorkspace(workspaceRoot) {
|
|
7618
7484
|
try {
|
|
@@ -7622,9 +7488,9 @@ ${prompt}` : prompt;
|
|
|
7622
7488
|
}
|
|
7623
7489
|
resolveLogDirectory() {
|
|
7624
7490
|
if (this.config.logDir) {
|
|
7625
|
-
return
|
|
7491
|
+
return path18.resolve(this.config.logDir);
|
|
7626
7492
|
}
|
|
7627
|
-
return
|
|
7493
|
+
return path18.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
7628
7494
|
}
|
|
7629
7495
|
async createStreamLogger(request) {
|
|
7630
7496
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7638,7 +7504,7 @@ ${prompt}` : prompt;
|
|
|
7638
7504
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7639
7505
|
return void 0;
|
|
7640
7506
|
}
|
|
7641
|
-
const filePath =
|
|
7507
|
+
const filePath = path18.join(logDir, buildLogFilename5(request, this.targetName));
|
|
7642
7508
|
try {
|
|
7643
7509
|
const logger = await PiStreamLogger.create({
|
|
7644
7510
|
filePath,
|
|
@@ -8139,17 +8005,17 @@ var ProviderRegistry = class {
|
|
|
8139
8005
|
// src/evaluation/providers/vscode-provider.ts
|
|
8140
8006
|
import { exec as exec2 } from "node:child_process";
|
|
8141
8007
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
8142
|
-
import
|
|
8008
|
+
import path30 from "node:path";
|
|
8143
8009
|
import { promisify as promisify3 } from "node:util";
|
|
8144
8010
|
|
|
8145
8011
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
8146
8012
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
8147
|
-
import
|
|
8013
|
+
import path28 from "node:path";
|
|
8148
8014
|
|
|
8149
8015
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
8150
8016
|
import { constants as constants2 } from "node:fs";
|
|
8151
8017
|
import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
8152
|
-
import
|
|
8018
|
+
import path19 from "node:path";
|
|
8153
8019
|
async function pathExists(target) {
|
|
8154
8020
|
try {
|
|
8155
8021
|
await access2(target, constants2.F_OK);
|
|
@@ -8165,7 +8031,7 @@ async function readDirEntries(target) {
|
|
|
8165
8031
|
const entries = await readdir(target, { withFileTypes: true });
|
|
8166
8032
|
return entries.map((entry) => ({
|
|
8167
8033
|
name: entry.name,
|
|
8168
|
-
absolutePath:
|
|
8034
|
+
absolutePath: path19.join(target, entry.name),
|
|
8169
8035
|
isDirectory: entry.isDirectory()
|
|
8170
8036
|
}));
|
|
8171
8037
|
}
|
|
@@ -8180,9 +8046,9 @@ async function removeIfExists(target) {
|
|
|
8180
8046
|
}
|
|
8181
8047
|
|
|
8182
8048
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
8183
|
-
import
|
|
8049
|
+
import path20 from "node:path";
|
|
8184
8050
|
function pathToFileUri2(filePath) {
|
|
8185
|
-
const absolutePath =
|
|
8051
|
+
const absolutePath = path20.isAbsolute(filePath) ? filePath : path20.resolve(filePath);
|
|
8186
8052
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
8187
8053
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
8188
8054
|
return `file:///${normalizedPath}`;
|
|
@@ -8191,7 +8057,7 @@ function pathToFileUri2(filePath) {
|
|
|
8191
8057
|
}
|
|
8192
8058
|
|
|
8193
8059
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
8194
|
-
import
|
|
8060
|
+
import path21 from "node:path";
|
|
8195
8061
|
|
|
8196
8062
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
8197
8063
|
function renderTemplate2(content, variables) {
|
|
@@ -8283,8 +8149,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
8283
8149
|
});
|
|
8284
8150
|
}
|
|
8285
8151
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
8286
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
8287
|
-
const responseList = responseFiles.map((file) => `"${
|
|
8152
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path21.basename(file)}`).join("\n");
|
|
8153
|
+
const responseList = responseFiles.map((file) => `"${path21.basename(file)}"`).join(", ");
|
|
8288
8154
|
return renderTemplate2(templateContent, {
|
|
8289
8155
|
requestFiles: requestLines,
|
|
8290
8156
|
responseList
|
|
@@ -8292,8 +8158,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
8292
8158
|
}
|
|
8293
8159
|
|
|
8294
8160
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
8295
|
-
import { readFile as
|
|
8296
|
-
import
|
|
8161
|
+
import { readFile as readFile8 } from "node:fs/promises";
|
|
8162
|
+
import path22 from "node:path";
|
|
8297
8163
|
|
|
8298
8164
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
8299
8165
|
function sleep2(ms) {
|
|
@@ -8331,7 +8197,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8331
8197
|
const maxAttempts = 10;
|
|
8332
8198
|
while (attempts < maxAttempts) {
|
|
8333
8199
|
try {
|
|
8334
|
-
const content = await
|
|
8200
|
+
const content = await readFile8(responseFileFinal, { encoding: "utf8" });
|
|
8335
8201
|
if (!silent) {
|
|
8336
8202
|
process.stdout.write(`${content}
|
|
8337
8203
|
`);
|
|
@@ -8352,7 +8218,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8352
8218
|
}
|
|
8353
8219
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
8354
8220
|
if (!silent) {
|
|
8355
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
8221
|
+
const fileList = responseFilesFinal.map((file) => path22.basename(file)).join(", ");
|
|
8356
8222
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
8357
8223
|
}
|
|
8358
8224
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -8361,7 +8227,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8361
8227
|
while (pending.size > 0) {
|
|
8362
8228
|
if (Date.now() >= deadline) {
|
|
8363
8229
|
if (!silent) {
|
|
8364
|
-
const remaining = [...pending].map((f) =>
|
|
8230
|
+
const remaining = [...pending].map((f) => path22.basename(f)).join(", ");
|
|
8365
8231
|
console.error(
|
|
8366
8232
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
8367
8233
|
);
|
|
@@ -8388,7 +8254,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8388
8254
|
const maxAttempts = 10;
|
|
8389
8255
|
while (attempts < maxAttempts) {
|
|
8390
8256
|
try {
|
|
8391
|
-
const content = await
|
|
8257
|
+
const content = await readFile8(file, { encoding: "utf8" });
|
|
8392
8258
|
if (!silent) {
|
|
8393
8259
|
process.stdout.write(`${content}
|
|
8394
8260
|
`);
|
|
@@ -8412,15 +8278,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8412
8278
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
8413
8279
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
8414
8280
|
import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
|
|
8415
|
-
import
|
|
8281
|
+
import path25 from "node:path";
|
|
8416
8282
|
import { promisify as promisify2 } from "node:util";
|
|
8417
8283
|
|
|
8418
8284
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
8419
|
-
import
|
|
8285
|
+
import path24 from "node:path";
|
|
8420
8286
|
|
|
8421
8287
|
// src/paths.ts
|
|
8422
8288
|
import os2 from "node:os";
|
|
8423
|
-
import
|
|
8289
|
+
import path23 from "node:path";
|
|
8424
8290
|
var logged = false;
|
|
8425
8291
|
function getAgentvHome() {
|
|
8426
8292
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8431,19 +8297,19 @@ function getAgentvHome() {
|
|
|
8431
8297
|
}
|
|
8432
8298
|
return envHome;
|
|
8433
8299
|
}
|
|
8434
|
-
return
|
|
8300
|
+
return path23.join(os2.homedir(), ".agentv");
|
|
8435
8301
|
}
|
|
8436
8302
|
function getWorkspacesRoot() {
|
|
8437
|
-
return
|
|
8303
|
+
return path23.join(getAgentvHome(), "workspaces");
|
|
8438
8304
|
}
|
|
8439
8305
|
function getSubagentsRoot() {
|
|
8440
|
-
return
|
|
8306
|
+
return path23.join(getAgentvHome(), "subagents");
|
|
8441
8307
|
}
|
|
8442
8308
|
function getTraceStateRoot() {
|
|
8443
|
-
return
|
|
8309
|
+
return path23.join(getAgentvHome(), "trace-state");
|
|
8444
8310
|
}
|
|
8445
8311
|
function getWorkspacePoolRoot() {
|
|
8446
|
-
return
|
|
8312
|
+
return path23.join(getAgentvHome(), "workspace-pool");
|
|
8447
8313
|
}
|
|
8448
8314
|
|
|
8449
8315
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -8451,7 +8317,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
8451
8317
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
8452
8318
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
8453
8319
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
8454
|
-
return
|
|
8320
|
+
return path24.join(getSubagentsRoot(), folder);
|
|
8455
8321
|
}
|
|
8456
8322
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
8457
8323
|
|
|
@@ -8518,11 +8384,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8518
8384
|
await raceSpawnError(child);
|
|
8519
8385
|
return true;
|
|
8520
8386
|
}
|
|
8521
|
-
const aliveFile =
|
|
8387
|
+
const aliveFile = path25.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
8522
8388
|
await removeIfExists(aliveFile);
|
|
8523
|
-
const githubAgentsDir =
|
|
8389
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
8524
8390
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
8525
|
-
const wakeupDst =
|
|
8391
|
+
const wakeupDst = path25.join(githubAgentsDir, "wakeup.md");
|
|
8526
8392
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
8527
8393
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
8528
8394
|
label: "open-workspace"
|
|
@@ -8535,7 +8401,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8535
8401
|
"chat",
|
|
8536
8402
|
"-m",
|
|
8537
8403
|
wakeupChatId,
|
|
8538
|
-
`create a file named .alive in the ${
|
|
8404
|
+
`create a file named .alive in the ${path25.basename(subagentDir)} folder`
|
|
8539
8405
|
];
|
|
8540
8406
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
8541
8407
|
await raceSpawnError(wakeupChild);
|
|
@@ -8550,10 +8416,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8550
8416
|
return true;
|
|
8551
8417
|
}
|
|
8552
8418
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
8553
|
-
const workspacePath =
|
|
8554
|
-
const messagesDir =
|
|
8419
|
+
const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
|
|
8420
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
8555
8421
|
await mkdir8(messagesDir, { recursive: true });
|
|
8556
|
-
const reqFile =
|
|
8422
|
+
const reqFile = path25.join(messagesDir, `${timestamp}_req.md`);
|
|
8557
8423
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
8558
8424
|
const reqUri = pathToFileUri2(reqFile);
|
|
8559
8425
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -8561,16 +8427,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8561
8427
|
chatArgs.push("-a", attachment);
|
|
8562
8428
|
}
|
|
8563
8429
|
chatArgs.push("-a", reqFile);
|
|
8564
|
-
chatArgs.push(`Follow instructions in [${
|
|
8430
|
+
chatArgs.push(`Follow instructions in [${path25.basename(reqFile)}](${reqUri})`);
|
|
8565
8431
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8566
8432
|
workspacePath,
|
|
8567
|
-
|
|
8433
|
+
path25.basename(subagentDir),
|
|
8568
8434
|
subagentDir,
|
|
8569
8435
|
vscodeCmd
|
|
8570
8436
|
);
|
|
8571
8437
|
if (!workspaceReady) {
|
|
8572
8438
|
throw new Error(
|
|
8573
|
-
`VS Code workspace '${
|
|
8439
|
+
`VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8574
8440
|
);
|
|
8575
8441
|
}
|
|
8576
8442
|
await sleep2(500);
|
|
@@ -8578,8 +8444,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8578
8444
|
await raceSpawnError(child);
|
|
8579
8445
|
}
|
|
8580
8446
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
8581
|
-
const workspacePath =
|
|
8582
|
-
const messagesDir =
|
|
8447
|
+
const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
|
|
8448
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
8583
8449
|
await mkdir8(messagesDir, { recursive: true });
|
|
8584
8450
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
8585
8451
|
for (const attachment of attachmentPaths) {
|
|
@@ -8588,13 +8454,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8588
8454
|
chatArgs.push(chatInstruction);
|
|
8589
8455
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8590
8456
|
workspacePath,
|
|
8591
|
-
|
|
8457
|
+
path25.basename(subagentDir),
|
|
8592
8458
|
subagentDir,
|
|
8593
8459
|
vscodeCmd
|
|
8594
8460
|
);
|
|
8595
8461
|
if (!workspaceReady) {
|
|
8596
8462
|
throw new Error(
|
|
8597
|
-
`VS Code workspace '${
|
|
8463
|
+
`VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8598
8464
|
);
|
|
8599
8465
|
}
|
|
8600
8466
|
await sleep2(500);
|
|
@@ -8603,11 +8469,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8603
8469
|
}
|
|
8604
8470
|
|
|
8605
8471
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
8606
|
-
import { copyFile, mkdir as mkdir9, readFile as
|
|
8607
|
-
import
|
|
8472
|
+
import { copyFile, mkdir as mkdir9, readFile as readFile9, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
8473
|
+
import path27 from "node:path";
|
|
8608
8474
|
|
|
8609
8475
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
8610
|
-
import
|
|
8476
|
+
import path26 from "node:path";
|
|
8611
8477
|
import JSON5 from "json5";
|
|
8612
8478
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
8613
8479
|
let workspace;
|
|
@@ -8624,10 +8490,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8624
8490
|
}
|
|
8625
8491
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
8626
8492
|
const folderPath = folder.path;
|
|
8627
|
-
if (
|
|
8493
|
+
if (path26.isAbsolute(folderPath)) {
|
|
8628
8494
|
return folder;
|
|
8629
8495
|
}
|
|
8630
|
-
const absolutePath =
|
|
8496
|
+
const absolutePath = path26.resolve(templateDir, folderPath);
|
|
8631
8497
|
return {
|
|
8632
8498
|
...folder,
|
|
8633
8499
|
path: absolutePath
|
|
@@ -8649,19 +8515,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8649
8515
|
if (locationMap && typeof locationMap === "object") {
|
|
8650
8516
|
const transformedMap = {};
|
|
8651
8517
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
8652
|
-
const isAbsolute =
|
|
8518
|
+
const isAbsolute = path26.isAbsolute(locationPath);
|
|
8653
8519
|
if (isAbsolute) {
|
|
8654
8520
|
transformedMap[locationPath] = value;
|
|
8655
8521
|
} else {
|
|
8656
8522
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
8657
8523
|
if (firstGlobIndex === -1) {
|
|
8658
|
-
const resolvedPath =
|
|
8524
|
+
const resolvedPath = path26.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
8659
8525
|
transformedMap[resolvedPath] = value;
|
|
8660
8526
|
} else {
|
|
8661
8527
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
8662
8528
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
8663
8529
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
8664
|
-
const resolvedPath = (
|
|
8530
|
+
const resolvedPath = (path26.resolve(templateDir, basePath) + patternPath).replace(
|
|
8665
8531
|
/\\/g,
|
|
8666
8532
|
"/"
|
|
8667
8533
|
);
|
|
@@ -8702,7 +8568,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8702
8568
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
8703
8569
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
8704
8570
|
for (const subagent of subagents) {
|
|
8705
|
-
const lockFile =
|
|
8571
|
+
const lockFile = path27.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
8706
8572
|
if (!await pathExists(lockFile)) {
|
|
8707
8573
|
return subagent.absolutePath;
|
|
8708
8574
|
}
|
|
@@ -8712,7 +8578,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8712
8578
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
8713
8579
|
let workspaceContent;
|
|
8714
8580
|
if (workspaceTemplate) {
|
|
8715
|
-
const workspaceSrc =
|
|
8581
|
+
const workspaceSrc = path27.resolve(workspaceTemplate);
|
|
8716
8582
|
if (!await pathExists(workspaceSrc)) {
|
|
8717
8583
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
8718
8584
|
}
|
|
@@ -8720,18 +8586,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8720
8586
|
if (!stats.isFile()) {
|
|
8721
8587
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
8722
8588
|
}
|
|
8723
|
-
const templateText = await
|
|
8589
|
+
const templateText = await readFile9(workspaceSrc, "utf8");
|
|
8724
8590
|
workspaceContent = JSON.parse(templateText);
|
|
8725
8591
|
} else {
|
|
8726
8592
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
8727
8593
|
}
|
|
8728
|
-
const workspaceName = `${
|
|
8729
|
-
const workspaceDst =
|
|
8730
|
-
const templateDir = workspaceTemplate ?
|
|
8594
|
+
const workspaceName = `${path27.basename(subagentDir)}.code-workspace`;
|
|
8595
|
+
const workspaceDst = path27.join(subagentDir, workspaceName);
|
|
8596
|
+
const templateDir = workspaceTemplate ? path27.dirname(path27.resolve(workspaceTemplate)) : subagentDir;
|
|
8731
8597
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
8732
8598
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
8733
8599
|
if (cwd) {
|
|
8734
|
-
const absCwd =
|
|
8600
|
+
const absCwd = path27.resolve(cwd);
|
|
8735
8601
|
const parsed = JSON.parse(transformedContent);
|
|
8736
8602
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
8737
8603
|
if (!alreadyPresent) {
|
|
@@ -8740,35 +8606,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8740
8606
|
}
|
|
8741
8607
|
}
|
|
8742
8608
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
8743
|
-
const messagesDir =
|
|
8609
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8744
8610
|
await mkdir9(messagesDir, { recursive: true });
|
|
8745
8611
|
return { workspace: workspaceDst, messagesDir };
|
|
8746
8612
|
}
|
|
8747
8613
|
async function createSubagentLock(subagentDir) {
|
|
8748
|
-
const messagesDir =
|
|
8614
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8749
8615
|
if (await pathExists(messagesDir)) {
|
|
8750
8616
|
const files = await readdir2(messagesDir);
|
|
8751
8617
|
await Promise.all(
|
|
8752
8618
|
files.map(async (file) => {
|
|
8753
|
-
const target =
|
|
8619
|
+
const target = path27.join(messagesDir, file);
|
|
8754
8620
|
await removeIfExists(target);
|
|
8755
8621
|
})
|
|
8756
8622
|
);
|
|
8757
8623
|
}
|
|
8758
|
-
const githubAgentsDir =
|
|
8624
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8759
8625
|
if (await pathExists(githubAgentsDir)) {
|
|
8760
8626
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
8761
8627
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
8762
8628
|
await Promise.all(
|
|
8763
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
8629
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path27.join(githubAgentsDir, file)))
|
|
8764
8630
|
);
|
|
8765
8631
|
}
|
|
8766
|
-
const lockFile =
|
|
8632
|
+
const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8767
8633
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
8768
8634
|
return lockFile;
|
|
8769
8635
|
}
|
|
8770
8636
|
async function removeSubagentLock(subagentDir) {
|
|
8771
|
-
const lockFile =
|
|
8637
|
+
const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8772
8638
|
await removeIfExists(lockFile);
|
|
8773
8639
|
}
|
|
8774
8640
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -8788,9 +8654,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
8788
8654
|
return 1;
|
|
8789
8655
|
}
|
|
8790
8656
|
if (promptFile) {
|
|
8791
|
-
const githubAgentsDir =
|
|
8657
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8792
8658
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
8793
|
-
const agentFile =
|
|
8659
|
+
const agentFile = path27.join(githubAgentsDir, `${chatId}.md`);
|
|
8794
8660
|
try {
|
|
8795
8661
|
await copyFile(promptFile, agentFile);
|
|
8796
8662
|
} catch (error) {
|
|
@@ -8809,7 +8675,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
8809
8675
|
if (!promptFile) {
|
|
8810
8676
|
return void 0;
|
|
8811
8677
|
}
|
|
8812
|
-
const resolvedPrompt =
|
|
8678
|
+
const resolvedPrompt = path28.resolve(promptFile);
|
|
8813
8679
|
if (!await pathExists(resolvedPrompt)) {
|
|
8814
8680
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
8815
8681
|
}
|
|
@@ -8825,7 +8691,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
8825
8691
|
}
|
|
8826
8692
|
const resolved = [];
|
|
8827
8693
|
for (const attachment of extraAttachments) {
|
|
8828
|
-
const resolvedPath =
|
|
8694
|
+
const resolvedPath = path28.resolve(attachment);
|
|
8829
8695
|
if (!await pathExists(resolvedPath)) {
|
|
8830
8696
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
8831
8697
|
}
|
|
@@ -8867,7 +8733,7 @@ async function dispatchAgentSession(options) {
|
|
|
8867
8733
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8868
8734
|
};
|
|
8869
8735
|
}
|
|
8870
|
-
const subagentName =
|
|
8736
|
+
const subagentName = path28.basename(subagentDir);
|
|
8871
8737
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8872
8738
|
const preparationResult = await prepareSubagentDirectory(
|
|
8873
8739
|
subagentDir,
|
|
@@ -8895,9 +8761,9 @@ async function dispatchAgentSession(options) {
|
|
|
8895
8761
|
};
|
|
8896
8762
|
}
|
|
8897
8763
|
const timestamp = generateTimestamp();
|
|
8898
|
-
const messagesDir =
|
|
8899
|
-
const responseFileTmp =
|
|
8900
|
-
const responseFileFinal =
|
|
8764
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
8765
|
+
const responseFileTmp = path28.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
8766
|
+
const responseFileFinal = path28.join(messagesDir, `${timestamp}_res.md`);
|
|
8901
8767
|
const requestInstructions = createRequestPrompt(
|
|
8902
8768
|
userQuery,
|
|
8903
8769
|
responseFileTmp,
|
|
@@ -9002,7 +8868,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9002
8868
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
9003
8869
|
};
|
|
9004
8870
|
}
|
|
9005
|
-
subagentName =
|
|
8871
|
+
subagentName = path28.basename(subagentDir);
|
|
9006
8872
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
9007
8873
|
const preparationResult = await prepareSubagentDirectory(
|
|
9008
8874
|
subagentDir,
|
|
@@ -9033,17 +8899,17 @@ async function dispatchBatchAgent(options) {
|
|
|
9033
8899
|
};
|
|
9034
8900
|
}
|
|
9035
8901
|
const timestamp = generateTimestamp();
|
|
9036
|
-
const messagesDir =
|
|
8902
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
9037
8903
|
requestFiles = userQueries.map(
|
|
9038
|
-
(_, index) =>
|
|
8904
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
9039
8905
|
);
|
|
9040
8906
|
const responseTmpFiles = userQueries.map(
|
|
9041
|
-
(_, index) =>
|
|
8907
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
9042
8908
|
);
|
|
9043
8909
|
responseFilesFinal = userQueries.map(
|
|
9044
|
-
(_, index) =>
|
|
8910
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
9045
8911
|
);
|
|
9046
|
-
const orchestratorFile =
|
|
8912
|
+
const orchestratorFile = path28.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
9047
8913
|
if (!dryRun) {
|
|
9048
8914
|
await Promise.all(
|
|
9049
8915
|
userQueries.map((query, index) => {
|
|
@@ -9129,7 +8995,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9129
8995
|
|
|
9130
8996
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
9131
8997
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
9132
|
-
import
|
|
8998
|
+
import path29 from "node:path";
|
|
9133
8999
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
9134
9000
|
folders: [
|
|
9135
9001
|
{
|
|
@@ -9160,7 +9026,7 @@ async function provisionSubagents(options) {
|
|
|
9160
9026
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
9161
9027
|
throw new Error("subagents must be a positive integer");
|
|
9162
9028
|
}
|
|
9163
|
-
const targetPath =
|
|
9029
|
+
const targetPath = path29.resolve(targetRoot);
|
|
9164
9030
|
if (!dryRun) {
|
|
9165
9031
|
await ensureDir(targetPath);
|
|
9166
9032
|
}
|
|
@@ -9180,7 +9046,7 @@ async function provisionSubagents(options) {
|
|
|
9180
9046
|
continue;
|
|
9181
9047
|
}
|
|
9182
9048
|
highestNumber = Math.max(highestNumber, parsed);
|
|
9183
|
-
const lockFile =
|
|
9049
|
+
const lockFile = path29.join(entry.absolutePath, lockName);
|
|
9184
9050
|
const locked = await pathExists(lockFile);
|
|
9185
9051
|
if (locked) {
|
|
9186
9052
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -9197,10 +9063,10 @@ async function provisionSubagents(options) {
|
|
|
9197
9063
|
break;
|
|
9198
9064
|
}
|
|
9199
9065
|
const subagentDir = subagent.absolutePath;
|
|
9200
|
-
const githubAgentsDir =
|
|
9201
|
-
const lockFile =
|
|
9202
|
-
const workspaceDst =
|
|
9203
|
-
const wakeupDst =
|
|
9066
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
9067
|
+
const lockFile = path29.join(subagentDir, lockName);
|
|
9068
|
+
const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
9069
|
+
const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
|
|
9204
9070
|
const isLocked = await pathExists(lockFile);
|
|
9205
9071
|
if (isLocked && !force) {
|
|
9206
9072
|
continue;
|
|
@@ -9238,10 +9104,10 @@ async function provisionSubagents(options) {
|
|
|
9238
9104
|
let nextIndex = highestNumber;
|
|
9239
9105
|
while (subagentsProvisioned < subagents) {
|
|
9240
9106
|
nextIndex += 1;
|
|
9241
|
-
const subagentDir =
|
|
9242
|
-
const githubAgentsDir =
|
|
9243
|
-
const workspaceDst =
|
|
9244
|
-
const wakeupDst =
|
|
9107
|
+
const subagentDir = path29.join(targetPath, `subagent-${nextIndex}`);
|
|
9108
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
9109
|
+
const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
9110
|
+
const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
|
|
9245
9111
|
if (!dryRun) {
|
|
9246
9112
|
await ensureDir(subagentDir);
|
|
9247
9113
|
await ensureDir(githubAgentsDir);
|
|
@@ -9431,7 +9297,7 @@ var VSCodeProvider = class {
|
|
|
9431
9297
|
async function locateVSCodeExecutable(candidate) {
|
|
9432
9298
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
9433
9299
|
if (includesPathSeparator) {
|
|
9434
|
-
const resolved =
|
|
9300
|
+
const resolved = path30.isAbsolute(candidate) ? candidate : path30.resolve(candidate);
|
|
9435
9301
|
try {
|
|
9436
9302
|
await access3(resolved, constants3.F_OK);
|
|
9437
9303
|
return resolved;
|
|
@@ -9460,7 +9326,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
9460
9326
|
return void 0;
|
|
9461
9327
|
}
|
|
9462
9328
|
try {
|
|
9463
|
-
const stats = await stat4(
|
|
9329
|
+
const stats = await stat4(path30.resolve(template));
|
|
9464
9330
|
return stats.isFile() ? template : void 0;
|
|
9465
9331
|
} catch {
|
|
9466
9332
|
return template;
|
|
@@ -9486,7 +9352,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
9486
9352
|
return "";
|
|
9487
9353
|
}
|
|
9488
9354
|
const buildList = (files) => files.map((absolutePath) => {
|
|
9489
|
-
const fileName =
|
|
9355
|
+
const fileName = path30.basename(absolutePath);
|
|
9490
9356
|
const fileUri = pathToFileUri3(absolutePath);
|
|
9491
9357
|
return `* [${fileName}](${fileUri})`;
|
|
9492
9358
|
});
|
|
@@ -9511,8 +9377,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
9511
9377
|
}
|
|
9512
9378
|
const unique = /* @__PURE__ */ new Map();
|
|
9513
9379
|
for (const attachment of attachments) {
|
|
9514
|
-
const absolutePath =
|
|
9515
|
-
const normalized = absolutePath.split(
|
|
9380
|
+
const absolutePath = path30.resolve(attachment);
|
|
9381
|
+
const normalized = absolutePath.split(path30.sep).join("/");
|
|
9516
9382
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
9517
9383
|
if (!unique.has(absolutePath)) {
|
|
9518
9384
|
unique.set(absolutePath, absolutePath);
|
|
@@ -9527,7 +9393,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9527
9393
|
}
|
|
9528
9394
|
const unique = /* @__PURE__ */ new Map();
|
|
9529
9395
|
for (const attachment of attachments) {
|
|
9530
|
-
const absolutePath =
|
|
9396
|
+
const absolutePath = path30.resolve(attachment);
|
|
9531
9397
|
if (!unique.has(absolutePath)) {
|
|
9532
9398
|
unique.set(absolutePath, absolutePath);
|
|
9533
9399
|
}
|
|
@@ -9535,7 +9401,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9535
9401
|
return Array.from(unique.values());
|
|
9536
9402
|
}
|
|
9537
9403
|
function pathToFileUri3(filePath) {
|
|
9538
|
-
const absolutePath =
|
|
9404
|
+
const absolutePath = path30.isAbsolute(filePath) ? filePath : path30.resolve(filePath);
|
|
9539
9405
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9540
9406
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9541
9407
|
return `file:///${normalizedPath}`;
|
|
@@ -9548,7 +9414,7 @@ function normalizeAttachments(attachments) {
|
|
|
9548
9414
|
}
|
|
9549
9415
|
const deduped = /* @__PURE__ */ new Set();
|
|
9550
9416
|
for (const attachment of attachments) {
|
|
9551
|
-
deduped.add(
|
|
9417
|
+
deduped.add(path30.resolve(attachment));
|
|
9552
9418
|
}
|
|
9553
9419
|
return Array.from(deduped);
|
|
9554
9420
|
}
|
|
@@ -9557,7 +9423,7 @@ function mergeAttachments(all) {
|
|
|
9557
9423
|
for (const list of all) {
|
|
9558
9424
|
if (!list) continue;
|
|
9559
9425
|
for (const inputFile of list) {
|
|
9560
|
-
deduped.add(
|
|
9426
|
+
deduped.add(path30.resolve(inputFile));
|
|
9561
9427
|
}
|
|
9562
9428
|
}
|
|
9563
9429
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -9605,8 +9471,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
9605
9471
|
|
|
9606
9472
|
// src/evaluation/providers/targets-file.ts
|
|
9607
9473
|
import { constants as constants4 } from "node:fs";
|
|
9608
|
-
import { access as access4, readFile as
|
|
9609
|
-
import
|
|
9474
|
+
import { access as access4, readFile as readFile10 } from "node:fs/promises";
|
|
9475
|
+
import path31 from "node:path";
|
|
9610
9476
|
import { parse as parse4 } from "yaml";
|
|
9611
9477
|
function isRecord(value) {
|
|
9612
9478
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -9643,11 +9509,11 @@ async function fileExists3(filePath) {
|
|
|
9643
9509
|
}
|
|
9644
9510
|
}
|
|
9645
9511
|
async function readTargetDefinitions(filePath) {
|
|
9646
|
-
const absolutePath =
|
|
9512
|
+
const absolutePath = path31.resolve(filePath);
|
|
9647
9513
|
if (!await fileExists3(absolutePath)) {
|
|
9648
9514
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
9649
9515
|
}
|
|
9650
|
-
const raw = await
|
|
9516
|
+
const raw = await readFile10(absolutePath, "utf8");
|
|
9651
9517
|
const parsed = parse4(raw);
|
|
9652
9518
|
if (!isRecord(parsed)) {
|
|
9653
9519
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -9663,21 +9529,21 @@ function listTargetNames(definitions) {
|
|
|
9663
9529
|
}
|
|
9664
9530
|
|
|
9665
9531
|
// src/evaluation/providers/provider-discovery.ts
|
|
9666
|
-
import
|
|
9667
|
-
import
|
|
9532
|
+
import path32 from "node:path";
|
|
9533
|
+
import fg from "fast-glob";
|
|
9668
9534
|
async function discoverProviders(registry, baseDir) {
|
|
9669
9535
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
9670
9536
|
const candidateDirs = [];
|
|
9671
|
-
let dir =
|
|
9672
|
-
const root =
|
|
9537
|
+
let dir = path32.resolve(baseDir);
|
|
9538
|
+
const root = path32.parse(dir).root;
|
|
9673
9539
|
while (dir !== root) {
|
|
9674
|
-
candidateDirs.push(
|
|
9675
|
-
dir =
|
|
9540
|
+
candidateDirs.push(path32.join(dir, ".agentv", "providers"));
|
|
9541
|
+
dir = path32.dirname(dir);
|
|
9676
9542
|
}
|
|
9677
9543
|
let files = [];
|
|
9678
9544
|
for (const providersDir of candidateDirs) {
|
|
9679
9545
|
try {
|
|
9680
|
-
const found = await
|
|
9546
|
+
const found = await fg(patterns, {
|
|
9681
9547
|
cwd: providersDir,
|
|
9682
9548
|
absolute: true,
|
|
9683
9549
|
onlyFiles: true
|
|
@@ -9688,7 +9554,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9688
9554
|
}
|
|
9689
9555
|
const discoveredKinds = [];
|
|
9690
9556
|
for (const filePath of files) {
|
|
9691
|
-
const basename =
|
|
9557
|
+
const basename = path32.basename(filePath);
|
|
9692
9558
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
9693
9559
|
if (registry.has(kindName)) {
|
|
9694
9560
|
continue;
|
|
@@ -9897,15 +9763,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
9897
9763
|
});
|
|
9898
9764
|
}
|
|
9899
9765
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
9900
|
-
const { mkdir: mkdir15, readFile:
|
|
9766
|
+
const { mkdir: mkdir15, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
9901
9767
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
9902
|
-
const
|
|
9768
|
+
const path45 = await import("node:path");
|
|
9903
9769
|
const { randomUUID: randomUUID9 } = await import("node:crypto");
|
|
9904
|
-
const dir =
|
|
9770
|
+
const dir = path45.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
9905
9771
|
await mkdir15(dir, { recursive: true });
|
|
9906
|
-
const stdinPath =
|
|
9907
|
-
const stdoutPath =
|
|
9908
|
-
const stderrPath =
|
|
9772
|
+
const stdinPath = path45.join(dir, "stdin.txt");
|
|
9773
|
+
const stdoutPath = path45.join(dir, "stdout.txt");
|
|
9774
|
+
const stderrPath = path45.join(dir, "stderr.txt");
|
|
9909
9775
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
9910
9776
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
9911
9777
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -9935,8 +9801,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
9935
9801
|
resolve(code ?? 0);
|
|
9936
9802
|
});
|
|
9937
9803
|
});
|
|
9938
|
-
const stdout = (await
|
|
9939
|
-
const stderr = (await
|
|
9804
|
+
const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9805
|
+
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9940
9806
|
return { stdout, stderr, exitCode };
|
|
9941
9807
|
} finally {
|
|
9942
9808
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -10255,7 +10121,7 @@ var CodeEvaluator = class {
|
|
|
10255
10121
|
outputPath,
|
|
10256
10122
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
10257
10123
|
inputFiles: context.evalCase.file_paths.filter(
|
|
10258
|
-
(
|
|
10124
|
+
(path45) => !context.evalCase.guideline_paths.includes(path45)
|
|
10259
10125
|
),
|
|
10260
10126
|
input: context.evalCase.input,
|
|
10261
10127
|
trace: context.trace ?? null,
|
|
@@ -10387,7 +10253,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
10387
10253
|
|
|
10388
10254
|
// src/evaluation/evaluators/llm-grader.ts
|
|
10389
10255
|
import fs2 from "node:fs/promises";
|
|
10390
|
-
import
|
|
10256
|
+
import path33 from "node:path";
|
|
10391
10257
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
10392
10258
|
import { z as z3 } from "zod";
|
|
10393
10259
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -11219,8 +11085,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11219
11085
|
};
|
|
11220
11086
|
}
|
|
11221
11087
|
function resolveSandboxed(basePath, relativePath) {
|
|
11222
|
-
const resolved =
|
|
11223
|
-
if (!resolved.startsWith(basePath +
|
|
11088
|
+
const resolved = path33.resolve(basePath, relativePath);
|
|
11089
|
+
if (!resolved.startsWith(basePath + path33.sep) && resolved !== basePath) {
|
|
11224
11090
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
11225
11091
|
}
|
|
11226
11092
|
return resolved;
|
|
@@ -11310,11 +11176,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11310
11176
|
for (const entry of entries) {
|
|
11311
11177
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
11312
11178
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
11313
|
-
const fullPath =
|
|
11179
|
+
const fullPath = path33.join(dirPath, entry.name);
|
|
11314
11180
|
if (entry.isDirectory()) {
|
|
11315
11181
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
11316
11182
|
} else if (entry.isFile()) {
|
|
11317
|
-
const ext =
|
|
11183
|
+
const ext = path33.extname(entry.name).toLowerCase();
|
|
11318
11184
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
11319
11185
|
try {
|
|
11320
11186
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -11326,7 +11192,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11326
11192
|
regex.lastIndex = 0;
|
|
11327
11193
|
if (regex.test(lines[i])) {
|
|
11328
11194
|
matches.push({
|
|
11329
|
-
file:
|
|
11195
|
+
file: path33.relative(workspacePath, fullPath),
|
|
11330
11196
|
line: i + 1,
|
|
11331
11197
|
text: lines[i].substring(0, 200)
|
|
11332
11198
|
});
|
|
@@ -11961,115 +11827,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
11961
11827
|
* Evaluate a single field against the expected value.
|
|
11962
11828
|
*/
|
|
11963
11829
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
11964
|
-
const { path:
|
|
11965
|
-
const candidateValue = resolvePath(candidateData,
|
|
11966
|
-
const expectedValue = resolvePath(expectedData,
|
|
11830
|
+
const { path: path45, match, required = true, weight = 1 } = fieldConfig;
|
|
11831
|
+
const candidateValue = resolvePath(candidateData, path45);
|
|
11832
|
+
const expectedValue = resolvePath(expectedData, path45);
|
|
11967
11833
|
if (expectedValue === void 0) {
|
|
11968
11834
|
return {
|
|
11969
|
-
path:
|
|
11835
|
+
path: path45,
|
|
11970
11836
|
score: 1,
|
|
11971
11837
|
// No expected value means no comparison needed
|
|
11972
11838
|
weight,
|
|
11973
11839
|
hit: true,
|
|
11974
|
-
message: `${
|
|
11840
|
+
message: `${path45}: no expected value`
|
|
11975
11841
|
};
|
|
11976
11842
|
}
|
|
11977
11843
|
if (candidateValue === void 0) {
|
|
11978
11844
|
if (required) {
|
|
11979
11845
|
return {
|
|
11980
|
-
path:
|
|
11846
|
+
path: path45,
|
|
11981
11847
|
score: 0,
|
|
11982
11848
|
weight,
|
|
11983
11849
|
hit: false,
|
|
11984
|
-
message: `${
|
|
11850
|
+
message: `${path45} (required, missing)`
|
|
11985
11851
|
};
|
|
11986
11852
|
}
|
|
11987
11853
|
return {
|
|
11988
|
-
path:
|
|
11854
|
+
path: path45,
|
|
11989
11855
|
score: 1,
|
|
11990
11856
|
// Don't penalize missing optional fields
|
|
11991
11857
|
weight: 0,
|
|
11992
11858
|
// Zero weight means it won't affect the score
|
|
11993
11859
|
hit: true,
|
|
11994
|
-
message: `${
|
|
11860
|
+
message: `${path45}: optional field missing`
|
|
11995
11861
|
};
|
|
11996
11862
|
}
|
|
11997
11863
|
switch (match) {
|
|
11998
11864
|
case "exact":
|
|
11999
|
-
return this.compareExact(
|
|
11865
|
+
return this.compareExact(path45, candidateValue, expectedValue, weight);
|
|
12000
11866
|
case "numeric_tolerance":
|
|
12001
11867
|
return this.compareNumericTolerance(
|
|
12002
|
-
|
|
11868
|
+
path45,
|
|
12003
11869
|
candidateValue,
|
|
12004
11870
|
expectedValue,
|
|
12005
11871
|
fieldConfig,
|
|
12006
11872
|
weight
|
|
12007
11873
|
);
|
|
12008
11874
|
case "date":
|
|
12009
|
-
return this.compareDate(
|
|
11875
|
+
return this.compareDate(path45, candidateValue, expectedValue, fieldConfig, weight);
|
|
12010
11876
|
default:
|
|
12011
11877
|
return {
|
|
12012
|
-
path:
|
|
11878
|
+
path: path45,
|
|
12013
11879
|
score: 0,
|
|
12014
11880
|
weight,
|
|
12015
11881
|
hit: false,
|
|
12016
|
-
message: `${
|
|
11882
|
+
message: `${path45}: unknown match type "${match}"`
|
|
12017
11883
|
};
|
|
12018
11884
|
}
|
|
12019
11885
|
}
|
|
12020
11886
|
/**
|
|
12021
11887
|
* Exact equality comparison.
|
|
12022
11888
|
*/
|
|
12023
|
-
compareExact(
|
|
11889
|
+
compareExact(path45, candidateValue, expectedValue, weight) {
|
|
12024
11890
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
12025
11891
|
return {
|
|
12026
|
-
path:
|
|
11892
|
+
path: path45,
|
|
12027
11893
|
score: 1,
|
|
12028
11894
|
weight,
|
|
12029
11895
|
hit: true,
|
|
12030
|
-
message:
|
|
11896
|
+
message: path45
|
|
12031
11897
|
};
|
|
12032
11898
|
}
|
|
12033
11899
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
12034
11900
|
return {
|
|
12035
|
-
path:
|
|
11901
|
+
path: path45,
|
|
12036
11902
|
score: 0,
|
|
12037
11903
|
weight,
|
|
12038
11904
|
hit: false,
|
|
12039
|
-
message: `${
|
|
11905
|
+
message: `${path45} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
12040
11906
|
};
|
|
12041
11907
|
}
|
|
12042
11908
|
return {
|
|
12043
|
-
path:
|
|
11909
|
+
path: path45,
|
|
12044
11910
|
score: 0,
|
|
12045
11911
|
weight,
|
|
12046
11912
|
hit: false,
|
|
12047
|
-
message: `${
|
|
11913
|
+
message: `${path45} (value mismatch)`
|
|
12048
11914
|
};
|
|
12049
11915
|
}
|
|
12050
11916
|
/**
|
|
12051
11917
|
* Numeric comparison with absolute or relative tolerance.
|
|
12052
11918
|
*/
|
|
12053
|
-
compareNumericTolerance(
|
|
11919
|
+
compareNumericTolerance(path45, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12054
11920
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
12055
11921
|
const candidateNum = toNumber(candidateValue);
|
|
12056
11922
|
const expectedNum = toNumber(expectedValue);
|
|
12057
11923
|
if (candidateNum === null || expectedNum === null) {
|
|
12058
11924
|
return {
|
|
12059
|
-
path:
|
|
11925
|
+
path: path45,
|
|
12060
11926
|
score: 0,
|
|
12061
11927
|
weight,
|
|
12062
11928
|
hit: false,
|
|
12063
|
-
message: `${
|
|
11929
|
+
message: `${path45} (non-numeric value)`
|
|
12064
11930
|
};
|
|
12065
11931
|
}
|
|
12066
11932
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
12067
11933
|
return {
|
|
12068
|
-
path:
|
|
11934
|
+
path: path45,
|
|
12069
11935
|
score: 0,
|
|
12070
11936
|
weight,
|
|
12071
11937
|
hit: false,
|
|
12072
|
-
message: `${
|
|
11938
|
+
message: `${path45} (invalid numeric value)`
|
|
12073
11939
|
};
|
|
12074
11940
|
}
|
|
12075
11941
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -12082,61 +11948,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
12082
11948
|
}
|
|
12083
11949
|
if (withinTolerance) {
|
|
12084
11950
|
return {
|
|
12085
|
-
path:
|
|
11951
|
+
path: path45,
|
|
12086
11952
|
score: 1,
|
|
12087
11953
|
weight,
|
|
12088
11954
|
hit: true,
|
|
12089
|
-
message: `${
|
|
11955
|
+
message: `${path45} (within tolerance: diff=${diff.toFixed(2)})`
|
|
12090
11956
|
};
|
|
12091
11957
|
}
|
|
12092
11958
|
return {
|
|
12093
|
-
path:
|
|
11959
|
+
path: path45,
|
|
12094
11960
|
score: 0,
|
|
12095
11961
|
weight,
|
|
12096
11962
|
hit: false,
|
|
12097
|
-
message: `${
|
|
11963
|
+
message: `${path45} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
12098
11964
|
};
|
|
12099
11965
|
}
|
|
12100
11966
|
/**
|
|
12101
11967
|
* Date comparison with format normalization.
|
|
12102
11968
|
*/
|
|
12103
|
-
compareDate(
|
|
11969
|
+
compareDate(path45, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12104
11970
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
12105
11971
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
12106
11972
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
12107
11973
|
if (candidateDate === null) {
|
|
12108
11974
|
return {
|
|
12109
|
-
path:
|
|
11975
|
+
path: path45,
|
|
12110
11976
|
score: 0,
|
|
12111
11977
|
weight,
|
|
12112
11978
|
hit: false,
|
|
12113
|
-
message: `${
|
|
11979
|
+
message: `${path45} (unparseable candidate date)`
|
|
12114
11980
|
};
|
|
12115
11981
|
}
|
|
12116
11982
|
if (expectedDate === null) {
|
|
12117
11983
|
return {
|
|
12118
|
-
path:
|
|
11984
|
+
path: path45,
|
|
12119
11985
|
score: 0,
|
|
12120
11986
|
weight,
|
|
12121
11987
|
hit: false,
|
|
12122
|
-
message: `${
|
|
11988
|
+
message: `${path45} (unparseable expected date)`
|
|
12123
11989
|
};
|
|
12124
11990
|
}
|
|
12125
11991
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
12126
11992
|
return {
|
|
12127
|
-
path:
|
|
11993
|
+
path: path45,
|
|
12128
11994
|
score: 1,
|
|
12129
11995
|
weight,
|
|
12130
11996
|
hit: true,
|
|
12131
|
-
message:
|
|
11997
|
+
message: path45
|
|
12132
11998
|
};
|
|
12133
11999
|
}
|
|
12134
12000
|
return {
|
|
12135
|
-
path:
|
|
12001
|
+
path: path45,
|
|
12136
12002
|
score: 0,
|
|
12137
12003
|
weight,
|
|
12138
12004
|
hit: false,
|
|
12139
|
-
message: `${
|
|
12005
|
+
message: `${path45} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12140
12006
|
};
|
|
12141
12007
|
}
|
|
12142
12008
|
/**
|
|
@@ -12169,11 +12035,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12169
12035
|
};
|
|
12170
12036
|
}
|
|
12171
12037
|
};
|
|
12172
|
-
function resolvePath(obj,
|
|
12173
|
-
if (!
|
|
12038
|
+
function resolvePath(obj, path45) {
|
|
12039
|
+
if (!path45 || !obj) {
|
|
12174
12040
|
return void 0;
|
|
12175
12041
|
}
|
|
12176
|
-
const parts =
|
|
12042
|
+
const parts = path45.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12177
12043
|
let current = obj;
|
|
12178
12044
|
for (const part of parts) {
|
|
12179
12045
|
if (current === null || current === void 0) {
|
|
@@ -12633,8 +12499,8 @@ var TokenUsageEvaluator = class {
|
|
|
12633
12499
|
};
|
|
12634
12500
|
|
|
12635
12501
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12636
|
-
function getNestedValue(obj,
|
|
12637
|
-
const parts =
|
|
12502
|
+
function getNestedValue(obj, path45) {
|
|
12503
|
+
const parts = path45.split(".");
|
|
12638
12504
|
let current = obj;
|
|
12639
12505
|
for (const part of parts) {
|
|
12640
12506
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -13256,7 +13122,7 @@ function runEqualsAssertion(output, value) {
|
|
|
13256
13122
|
// src/evaluation/orchestrator.ts
|
|
13257
13123
|
import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
|
|
13258
13124
|
import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
13259
|
-
import
|
|
13125
|
+
import path42 from "node:path";
|
|
13260
13126
|
import micromatch4 from "micromatch";
|
|
13261
13127
|
|
|
13262
13128
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -13470,7 +13336,7 @@ var InlineAssertEvaluator = class {
|
|
|
13470
13336
|
};
|
|
13471
13337
|
|
|
13472
13338
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
13473
|
-
import
|
|
13339
|
+
import path34 from "node:path";
|
|
13474
13340
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
13475
13341
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
13476
13342
|
if (!context) {
|
|
@@ -13519,7 +13385,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13519
13385
|
};
|
|
13520
13386
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13521
13387
|
const scriptPath = script[script.length - 1];
|
|
13522
|
-
const cwd =
|
|
13388
|
+
const cwd = path34.dirname(scriptPath);
|
|
13523
13389
|
try {
|
|
13524
13390
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
13525
13391
|
const prompt = stdout.trim();
|
|
@@ -13791,21 +13657,21 @@ function createBuiltinRegistry() {
|
|
|
13791
13657
|
}
|
|
13792
13658
|
|
|
13793
13659
|
// src/evaluation/registry/assertion-discovery.ts
|
|
13794
|
-
import
|
|
13795
|
-
import
|
|
13660
|
+
import path35 from "node:path";
|
|
13661
|
+
import fg2 from "fast-glob";
|
|
13796
13662
|
async function discoverAssertions(registry, baseDir) {
|
|
13797
13663
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13798
13664
|
const candidateDirs = [];
|
|
13799
|
-
let dir =
|
|
13800
|
-
const root =
|
|
13665
|
+
let dir = path35.resolve(baseDir);
|
|
13666
|
+
const root = path35.parse(dir).root;
|
|
13801
13667
|
while (dir !== root) {
|
|
13802
|
-
candidateDirs.push(
|
|
13803
|
-
dir =
|
|
13668
|
+
candidateDirs.push(path35.join(dir, ".agentv", "assertions"));
|
|
13669
|
+
dir = path35.dirname(dir);
|
|
13804
13670
|
}
|
|
13805
13671
|
let files = [];
|
|
13806
13672
|
for (const assertionsDir of candidateDirs) {
|
|
13807
13673
|
try {
|
|
13808
|
-
const found = await
|
|
13674
|
+
const found = await fg2(patterns, {
|
|
13809
13675
|
cwd: assertionsDir,
|
|
13810
13676
|
absolute: true,
|
|
13811
13677
|
onlyFiles: true
|
|
@@ -13816,7 +13682,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13816
13682
|
}
|
|
13817
13683
|
const discoveredTypes = [];
|
|
13818
13684
|
for (const filePath of files) {
|
|
13819
|
-
const basename =
|
|
13685
|
+
const basename = path35.basename(filePath);
|
|
13820
13686
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13821
13687
|
if (registry.has(typeName)) {
|
|
13822
13688
|
continue;
|
|
@@ -13834,22 +13700,22 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13834
13700
|
}
|
|
13835
13701
|
|
|
13836
13702
|
// src/evaluation/registry/grader-discovery.ts
|
|
13837
|
-
import
|
|
13838
|
-
import
|
|
13703
|
+
import path36 from "node:path";
|
|
13704
|
+
import fg3 from "fast-glob";
|
|
13839
13705
|
async function discoverGraders(registry, baseDir) {
|
|
13840
13706
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13841
13707
|
const candidateDirs = [];
|
|
13842
|
-
let dir =
|
|
13843
|
-
const root =
|
|
13708
|
+
let dir = path36.resolve(baseDir);
|
|
13709
|
+
const root = path36.parse(dir).root;
|
|
13844
13710
|
while (dir !== root) {
|
|
13845
|
-
candidateDirs.push(
|
|
13846
|
-
candidateDirs.push(
|
|
13847
|
-
dir =
|
|
13711
|
+
candidateDirs.push(path36.join(dir, ".agentv", "graders"));
|
|
13712
|
+
candidateDirs.push(path36.join(dir, ".agentv", "judges"));
|
|
13713
|
+
dir = path36.dirname(dir);
|
|
13848
13714
|
}
|
|
13849
13715
|
let files = [];
|
|
13850
13716
|
for (const gradersDir of candidateDirs) {
|
|
13851
13717
|
try {
|
|
13852
|
-
const found = await
|
|
13718
|
+
const found = await fg3(patterns, {
|
|
13853
13719
|
cwd: gradersDir,
|
|
13854
13720
|
absolute: true,
|
|
13855
13721
|
onlyFiles: true
|
|
@@ -13860,7 +13726,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
13860
13726
|
}
|
|
13861
13727
|
const discoveredTypes = [];
|
|
13862
13728
|
for (const filePath of files) {
|
|
13863
|
-
const basename =
|
|
13729
|
+
const basename = path36.basename(filePath);
|
|
13864
13730
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13865
13731
|
if (registry.has(typeName)) {
|
|
13866
13732
|
continue;
|
|
@@ -14020,7 +13886,7 @@ function getTCritical(df) {
|
|
|
14020
13886
|
// src/evaluation/workspace/file-changes.ts
|
|
14021
13887
|
import { exec as execCallback } from "node:child_process";
|
|
14022
13888
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
14023
|
-
import
|
|
13889
|
+
import path37 from "node:path";
|
|
14024
13890
|
import { promisify as promisify4 } from "node:util";
|
|
14025
13891
|
var execAsync4 = promisify4(execCallback);
|
|
14026
13892
|
function gitExecOpts(workspacePath) {
|
|
@@ -14054,10 +13920,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
14054
13920
|
}
|
|
14055
13921
|
for (const entry of entries) {
|
|
14056
13922
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
14057
|
-
const childPath =
|
|
13923
|
+
const childPath = path37.join(workspacePath, entry);
|
|
14058
13924
|
try {
|
|
14059
13925
|
if (!statSync(childPath).isDirectory()) continue;
|
|
14060
|
-
if (!statSync(
|
|
13926
|
+
if (!statSync(path37.join(childPath, ".git")).isDirectory()) continue;
|
|
14061
13927
|
} catch {
|
|
14062
13928
|
continue;
|
|
14063
13929
|
}
|
|
@@ -14068,7 +13934,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
14068
13934
|
|
|
14069
13935
|
// src/evaluation/workspace/manager.ts
|
|
14070
13936
|
import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
14071
|
-
import
|
|
13937
|
+
import path38 from "node:path";
|
|
14072
13938
|
var TemplateNotFoundError = class extends Error {
|
|
14073
13939
|
constructor(templatePath) {
|
|
14074
13940
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -14098,14 +13964,14 @@ async function isDirectory(filePath) {
|
|
|
14098
13964
|
}
|
|
14099
13965
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
14100
13966
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14101
|
-
return
|
|
13967
|
+
return path38.join(root, evalRunId, caseId);
|
|
14102
13968
|
}
|
|
14103
13969
|
async function copyDirectoryRecursive(src, dest) {
|
|
14104
13970
|
await mkdir11(dest, { recursive: true });
|
|
14105
13971
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
14106
13972
|
for (const entry of entries) {
|
|
14107
|
-
const srcPath =
|
|
14108
|
-
const destPath =
|
|
13973
|
+
const srcPath = path38.join(src, entry.name);
|
|
13974
|
+
const destPath = path38.join(dest, entry.name);
|
|
14109
13975
|
if (entry.name === ".git") {
|
|
14110
13976
|
continue;
|
|
14111
13977
|
}
|
|
@@ -14117,7 +13983,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
14117
13983
|
}
|
|
14118
13984
|
}
|
|
14119
13985
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
14120
|
-
const resolvedTemplatePath =
|
|
13986
|
+
const resolvedTemplatePath = path38.resolve(templatePath);
|
|
14121
13987
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
14122
13988
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
14123
13989
|
}
|
|
@@ -14166,7 +14032,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
14166
14032
|
}
|
|
14167
14033
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
14168
14034
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14169
|
-
const evalDir =
|
|
14035
|
+
const evalDir = path38.join(root, evalRunId);
|
|
14170
14036
|
if (await fileExists(evalDir)) {
|
|
14171
14037
|
await rm4(evalDir, { recursive: true, force: true });
|
|
14172
14038
|
}
|
|
@@ -14176,8 +14042,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
14176
14042
|
import { execFile } from "node:child_process";
|
|
14177
14043
|
import { createHash } from "node:crypto";
|
|
14178
14044
|
import { existsSync as existsSync2 } from "node:fs";
|
|
14179
|
-
import { cp as cp2, mkdir as mkdir12, readFile as
|
|
14180
|
-
import
|
|
14045
|
+
import { cp as cp2, mkdir as mkdir12, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
14046
|
+
import path39 from "node:path";
|
|
14181
14047
|
import { promisify as promisify5 } from "node:util";
|
|
14182
14048
|
var execFileAsync = promisify5(execFile);
|
|
14183
14049
|
function gitEnv() {
|
|
@@ -14231,8 +14097,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
14231
14097
|
await mkdir12(dest, { recursive: true });
|
|
14232
14098
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
14233
14099
|
for (const entry of entries) {
|
|
14234
|
-
const srcPath =
|
|
14235
|
-
const destPath =
|
|
14100
|
+
const srcPath = path39.join(src, entry.name);
|
|
14101
|
+
const destPath = path39.join(dest, entry.name);
|
|
14236
14102
|
if (entry.name === ".git") {
|
|
14237
14103
|
continue;
|
|
14238
14104
|
}
|
|
@@ -14265,7 +14131,7 @@ var WorkspacePoolManager = class {
|
|
|
14265
14131
|
async acquireWorkspace(options) {
|
|
14266
14132
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
14267
14133
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
14268
|
-
const poolDir =
|
|
14134
|
+
const poolDir = path39.join(this.poolRoot, fingerprint);
|
|
14269
14135
|
await mkdir12(poolDir, { recursive: true });
|
|
14270
14136
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
14271
14137
|
if (drifted) {
|
|
@@ -14275,7 +14141,7 @@ var WorkspacePoolManager = class {
|
|
|
14275
14141
|
await this.removeAllSlots(poolDir);
|
|
14276
14142
|
}
|
|
14277
14143
|
for (let i = 0; i < maxSlots; i++) {
|
|
14278
|
-
const slotPath =
|
|
14144
|
+
const slotPath = path39.join(poolDir, `slot-${i}`);
|
|
14279
14145
|
const lockPath = `${slotPath}.lock`;
|
|
14280
14146
|
const locked = await this.tryLock(lockPath);
|
|
14281
14147
|
if (!locked) {
|
|
@@ -14337,7 +14203,7 @@ var WorkspacePoolManager = class {
|
|
|
14337
14203
|
throw err;
|
|
14338
14204
|
}
|
|
14339
14205
|
try {
|
|
14340
|
-
const pidStr = await
|
|
14206
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
14341
14207
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14342
14208
|
if (!Number.isNaN(pid)) {
|
|
14343
14209
|
try {
|
|
@@ -14362,9 +14228,9 @@ var WorkspacePoolManager = class {
|
|
|
14362
14228
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
14363
14229
|
*/
|
|
14364
14230
|
async checkDrift(poolDir, fingerprint) {
|
|
14365
|
-
const metadataPath =
|
|
14231
|
+
const metadataPath = path39.join(poolDir, "metadata.json");
|
|
14366
14232
|
try {
|
|
14367
|
-
const raw = await
|
|
14233
|
+
const raw = await readFile11(metadataPath, "utf-8");
|
|
14368
14234
|
const metadata = JSON.parse(raw);
|
|
14369
14235
|
return metadata.fingerprint !== fingerprint;
|
|
14370
14236
|
} catch {
|
|
@@ -14379,17 +14245,17 @@ var WorkspacePoolManager = class {
|
|
|
14379
14245
|
repos,
|
|
14380
14246
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14381
14247
|
};
|
|
14382
|
-
await writeFile7(
|
|
14248
|
+
await writeFile7(path39.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
14383
14249
|
}
|
|
14384
14250
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
14385
14251
|
async removeAllSlots(poolDir) {
|
|
14386
14252
|
const entries = await readdir4(poolDir);
|
|
14387
14253
|
for (const entry of entries) {
|
|
14388
14254
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
14389
|
-
const lockPath =
|
|
14255
|
+
const lockPath = path39.join(poolDir, `${entry}.lock`);
|
|
14390
14256
|
if (existsSync2(lockPath)) {
|
|
14391
14257
|
try {
|
|
14392
|
-
const pidStr = await
|
|
14258
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
14393
14259
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14394
14260
|
if (!Number.isNaN(pid)) {
|
|
14395
14261
|
try {
|
|
@@ -14402,12 +14268,12 @@ var WorkspacePoolManager = class {
|
|
|
14402
14268
|
} catch {
|
|
14403
14269
|
}
|
|
14404
14270
|
}
|
|
14405
|
-
await rm5(
|
|
14271
|
+
await rm5(path39.join(poolDir, entry), { recursive: true, force: true });
|
|
14406
14272
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
14407
14273
|
});
|
|
14408
14274
|
}
|
|
14409
14275
|
}
|
|
14410
|
-
await rm5(
|
|
14276
|
+
await rm5(path39.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
14411
14277
|
});
|
|
14412
14278
|
}
|
|
14413
14279
|
/**
|
|
@@ -14417,7 +14283,7 @@ var WorkspacePoolManager = class {
|
|
|
14417
14283
|
*/
|
|
14418
14284
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
14419
14285
|
for (const repo of repos) {
|
|
14420
|
-
const repoDir =
|
|
14286
|
+
const repoDir = path39.join(slotPath, repo.path);
|
|
14421
14287
|
if (!existsSync2(repoDir)) {
|
|
14422
14288
|
continue;
|
|
14423
14289
|
}
|
|
@@ -14444,7 +14310,7 @@ var WorkspacePoolManager = class {
|
|
|
14444
14310
|
// src/evaluation/workspace/repo-manager.ts
|
|
14445
14311
|
import { execFile as execFile2 } from "node:child_process";
|
|
14446
14312
|
import { existsSync as existsSync3 } from "node:fs";
|
|
14447
|
-
import
|
|
14313
|
+
import path40 from "node:path";
|
|
14448
14314
|
import { promisify as promisify6 } from "node:util";
|
|
14449
14315
|
var execFileAsync2 = promisify6(execFile2);
|
|
14450
14316
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -14544,7 +14410,7 @@ ${lines.join("\n")}`;
|
|
|
14544
14410
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
14545
14411
|
*/
|
|
14546
14412
|
async materialize(repo, workspacePath) {
|
|
14547
|
-
const targetDir =
|
|
14413
|
+
const targetDir = path40.join(workspacePath, repo.path);
|
|
14548
14414
|
const sourceUrl = getSourceUrl(repo.source);
|
|
14549
14415
|
const startedAt = Date.now();
|
|
14550
14416
|
if (this.verbose) {
|
|
@@ -14635,7 +14501,7 @@ ${lines.join("\n")}`;
|
|
|
14635
14501
|
async reset(repos, workspacePath, reset) {
|
|
14636
14502
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
14637
14503
|
for (const repo of repos) {
|
|
14638
|
-
const targetDir =
|
|
14504
|
+
const targetDir = path40.join(workspacePath, repo.path);
|
|
14639
14505
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
14640
14506
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
14641
14507
|
}
|
|
@@ -14644,16 +14510,16 @@ ${lines.join("\n")}`;
|
|
|
14644
14510
|
|
|
14645
14511
|
// src/evaluation/workspace/resolve.ts
|
|
14646
14512
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
14647
|
-
import
|
|
14513
|
+
import path41 from "node:path";
|
|
14648
14514
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
14649
14515
|
if (!templatePath) {
|
|
14650
14516
|
return void 0;
|
|
14651
14517
|
}
|
|
14652
|
-
const resolved =
|
|
14518
|
+
const resolved = path41.resolve(templatePath);
|
|
14653
14519
|
const stats = await stat6(resolved);
|
|
14654
14520
|
if (stats.isFile()) {
|
|
14655
14521
|
return {
|
|
14656
|
-
dir:
|
|
14522
|
+
dir: path41.dirname(resolved),
|
|
14657
14523
|
workspaceFile: resolved
|
|
14658
14524
|
};
|
|
14659
14525
|
}
|
|
@@ -14665,14 +14531,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
14665
14531
|
if (workspaceFiles.length === 1) {
|
|
14666
14532
|
return {
|
|
14667
14533
|
dir: resolved,
|
|
14668
|
-
workspaceFile:
|
|
14534
|
+
workspaceFile: path41.join(resolved, workspaceFiles[0])
|
|
14669
14535
|
};
|
|
14670
14536
|
}
|
|
14671
14537
|
if (workspaceFiles.length > 1) {
|
|
14672
14538
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
14673
14539
|
return {
|
|
14674
14540
|
dir: resolved,
|
|
14675
|
-
workspaceFile: conventionFile ?
|
|
14541
|
+
workspaceFile: conventionFile ? path41.join(resolved, conventionFile) : void 0
|
|
14676
14542
|
};
|
|
14677
14543
|
}
|
|
14678
14544
|
return { dir: resolved };
|
|
@@ -14876,7 +14742,7 @@ async function runEvaluation(options) {
|
|
|
14876
14742
|
];
|
|
14877
14743
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
14878
14744
|
const typeRegistry = createBuiltinRegistry();
|
|
14879
|
-
const discoveryBaseDir = evalFilePath ?
|
|
14745
|
+
const discoveryBaseDir = evalFilePath ? path42.dirname(path42.resolve(evalFilePath)) : process.cwd();
|
|
14880
14746
|
const evalDir = discoveryBaseDir;
|
|
14881
14747
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
14882
14748
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -15065,7 +14931,7 @@ async function runEvaluation(options) {
|
|
|
15065
14931
|
}
|
|
15066
14932
|
try {
|
|
15067
14933
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
15068
|
-
const copiedWorkspaceFile =
|
|
14934
|
+
const copiedWorkspaceFile = path42.join(sharedWorkspacePath, path42.basename(suiteWorkspaceFile));
|
|
15069
14935
|
try {
|
|
15070
14936
|
await stat7(copiedWorkspaceFile);
|
|
15071
14937
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -15178,7 +15044,7 @@ async function runEvaluation(options) {
|
|
|
15178
15044
|
dataset: evalCase.dataset,
|
|
15179
15045
|
score: 0,
|
|
15180
15046
|
assertions: [],
|
|
15181
|
-
|
|
15047
|
+
output: [],
|
|
15182
15048
|
target: target.name,
|
|
15183
15049
|
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
15184
15050
|
budgetExceeded: true,
|
|
@@ -15214,7 +15080,7 @@ async function runEvaluation(options) {
|
|
|
15214
15080
|
dataset: evalCase.dataset,
|
|
15215
15081
|
score: 0,
|
|
15216
15082
|
assertions: [],
|
|
15217
|
-
|
|
15083
|
+
output: [],
|
|
15218
15084
|
target: target.name,
|
|
15219
15085
|
error: errorMsg,
|
|
15220
15086
|
executionStatus: "execution_error",
|
|
@@ -15642,7 +15508,7 @@ async function runEvalCase(options) {
|
|
|
15642
15508
|
);
|
|
15643
15509
|
}
|
|
15644
15510
|
if (caseWorkspaceFile && workspacePath) {
|
|
15645
|
-
const copiedFile =
|
|
15511
|
+
const copiedFile = path42.join(workspacePath, path42.basename(caseWorkspaceFile));
|
|
15646
15512
|
try {
|
|
15647
15513
|
await stat7(copiedFile);
|
|
15648
15514
|
caseWorkspaceFile = copiedFile;
|
|
@@ -15702,10 +15568,10 @@ async function runEvalCase(options) {
|
|
|
15702
15568
|
const files = evalCase.metadata.agent_skills_files;
|
|
15703
15569
|
if (baseDir && files.length > 0) {
|
|
15704
15570
|
for (const relPath of files) {
|
|
15705
|
-
const srcPath =
|
|
15706
|
-
const destPath =
|
|
15571
|
+
const srcPath = path42.resolve(baseDir, relPath);
|
|
15572
|
+
const destPath = path42.resolve(workspacePath, relPath);
|
|
15707
15573
|
try {
|
|
15708
|
-
await mkdir13(
|
|
15574
|
+
await mkdir13(path42.dirname(destPath), { recursive: true });
|
|
15709
15575
|
await copyFile2(srcPath, destPath);
|
|
15710
15576
|
} catch (error) {
|
|
15711
15577
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -16181,7 +16047,6 @@ async function evaluateCandidate(options) {
|
|
|
16181
16047
|
conversationId: evalCase.conversation_id,
|
|
16182
16048
|
score: score.score,
|
|
16183
16049
|
assertions: score.assertions,
|
|
16184
|
-
outputText: candidate,
|
|
16185
16050
|
target: target.name,
|
|
16186
16051
|
tokenUsage,
|
|
16187
16052
|
costUsd,
|
|
@@ -16192,7 +16057,7 @@ async function evaluateCandidate(options) {
|
|
|
16192
16057
|
input,
|
|
16193
16058
|
scores,
|
|
16194
16059
|
trace,
|
|
16195
|
-
output,
|
|
16060
|
+
output: output ?? [{ role: "assistant", content: candidate }],
|
|
16196
16061
|
fileChanges,
|
|
16197
16062
|
executionStatus: classifyQualityStatus(score.score)
|
|
16198
16063
|
};
|
|
@@ -16326,7 +16191,7 @@ async function runEvaluatorList(options) {
|
|
|
16326
16191
|
fileChanges,
|
|
16327
16192
|
workspacePath
|
|
16328
16193
|
};
|
|
16329
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
16194
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path42.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
16330
16195
|
const dispatchContext = {
|
|
16331
16196
|
graderProvider,
|
|
16332
16197
|
targetResolver,
|
|
@@ -16357,7 +16222,7 @@ async function runEvaluatorList(options) {
|
|
|
16357
16222
|
weight,
|
|
16358
16223
|
verdict: score2.verdict,
|
|
16359
16224
|
assertions: score2.assertions,
|
|
16360
|
-
|
|
16225
|
+
input: score2.evaluatorRawRequest,
|
|
16361
16226
|
details: score2.details,
|
|
16362
16227
|
scores: mapChildResults(score2.scores),
|
|
16363
16228
|
tokenUsage: score2.tokenUsage,
|
|
@@ -16537,7 +16402,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16537
16402
|
conversationId: evalCase.conversation_id,
|
|
16538
16403
|
score: 0,
|
|
16539
16404
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
16540
|
-
|
|
16405
|
+
output: [{ role: "assistant", content: `Error occurred: ${message}` }],
|
|
16541
16406
|
target: targetName,
|
|
16542
16407
|
requests,
|
|
16543
16408
|
input,
|
|
@@ -16581,7 +16446,7 @@ function buildResultInput(promptInputs) {
|
|
|
16581
16446
|
content: message.content
|
|
16582
16447
|
}));
|
|
16583
16448
|
}
|
|
16584
|
-
return promptInputs.question;
|
|
16449
|
+
return [{ role: "user", content: promptInputs.question }];
|
|
16585
16450
|
}
|
|
16586
16451
|
function aggregateEvaluatorTokenUsage(scores) {
|
|
16587
16452
|
if (!scores || scores.length === 0) return void 0;
|
|
@@ -16647,7 +16512,7 @@ function mapChildResults(children) {
|
|
|
16647
16512
|
weight: child.weight,
|
|
16648
16513
|
verdict: child.verdict,
|
|
16649
16514
|
assertions: child.assertions,
|
|
16650
|
-
|
|
16515
|
+
input: child.evaluatorRawRequest,
|
|
16651
16516
|
scores: mapChildResults(child.scores),
|
|
16652
16517
|
details: child.details,
|
|
16653
16518
|
tokenUsage: child.tokenUsage
|
|
@@ -16666,7 +16531,7 @@ function computeWeightedMean(entries) {
|
|
|
16666
16531
|
|
|
16667
16532
|
// src/evaluation/evaluate.ts
|
|
16668
16533
|
import { existsSync as existsSync4 } from "node:fs";
|
|
16669
|
-
import
|
|
16534
|
+
import path43 from "node:path";
|
|
16670
16535
|
|
|
16671
16536
|
// src/evaluation/providers/function-provider.ts
|
|
16672
16537
|
function createFunctionProvider(taskFn) {
|
|
@@ -16703,7 +16568,7 @@ async function evaluate(config) {
|
|
|
16703
16568
|
}
|
|
16704
16569
|
const gitRoot = await findGitRoot(process.cwd());
|
|
16705
16570
|
const repoRoot = gitRoot ?? process.cwd();
|
|
16706
|
-
const testFilePath = config.specFile ?
|
|
16571
|
+
const testFilePath = config.specFile ? path43.resolve(config.specFile) : path43.join(process.cwd(), "__programmatic__.yaml");
|
|
16707
16572
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
16708
16573
|
let resolvedTarget;
|
|
16709
16574
|
let taskProvider;
|
|
@@ -16832,10 +16697,10 @@ function computeSummary(results, durationMs) {
|
|
|
16832
16697
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
16833
16698
|
async function discoverDefaultTarget(repoRoot) {
|
|
16834
16699
|
const cwd = process.cwd();
|
|
16835
|
-
const chain = buildDirectoryChain(
|
|
16700
|
+
const chain = buildDirectoryChain(path43.join(cwd, "_placeholder"), repoRoot);
|
|
16836
16701
|
for (const dir of chain) {
|
|
16837
16702
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16838
|
-
const targetsPath =
|
|
16703
|
+
const targetsPath = path43.join(dir, candidate);
|
|
16839
16704
|
if (!existsSync4(targetsPath)) continue;
|
|
16840
16705
|
try {
|
|
16841
16706
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -16852,7 +16717,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
16852
16717
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
16853
16718
|
const envFiles = [];
|
|
16854
16719
|
for (const dir of chain) {
|
|
16855
|
-
const envPath =
|
|
16720
|
+
const envPath = path43.join(dir, ".env");
|
|
16856
16721
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
16857
16722
|
}
|
|
16858
16723
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -17033,8 +16898,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
17033
16898
|
}
|
|
17034
16899
|
|
|
17035
16900
|
// src/evaluation/cache/response-cache.ts
|
|
17036
|
-
import { mkdir as mkdir14, readFile as
|
|
17037
|
-
import
|
|
16901
|
+
import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
|
|
16902
|
+
import path44 from "node:path";
|
|
17038
16903
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
17039
16904
|
var ResponseCache = class {
|
|
17040
16905
|
cachePath;
|
|
@@ -17044,7 +16909,7 @@ var ResponseCache = class {
|
|
|
17044
16909
|
async get(key) {
|
|
17045
16910
|
const filePath = this.keyToPath(key);
|
|
17046
16911
|
try {
|
|
17047
|
-
const data = await
|
|
16912
|
+
const data = await readFile12(filePath, "utf8");
|
|
17048
16913
|
return JSON.parse(data);
|
|
17049
16914
|
} catch {
|
|
17050
16915
|
return void 0;
|
|
@@ -17052,13 +16917,13 @@ var ResponseCache = class {
|
|
|
17052
16917
|
}
|
|
17053
16918
|
async set(key, value) {
|
|
17054
16919
|
const filePath = this.keyToPath(key);
|
|
17055
|
-
const dir =
|
|
16920
|
+
const dir = path44.dirname(filePath);
|
|
17056
16921
|
await mkdir14(dir, { recursive: true });
|
|
17057
16922
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
17058
16923
|
}
|
|
17059
16924
|
keyToPath(key) {
|
|
17060
16925
|
const prefix = key.slice(0, 2);
|
|
17061
|
-
return
|
|
16926
|
+
return path44.join(this.cachePath, prefix, `${key}.json`);
|
|
17062
16927
|
}
|
|
17063
16928
|
};
|
|
17064
16929
|
function shouldEnableCache(params) {
|
|
@@ -17075,7 +16940,6 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
17075
16940
|
|
|
17076
16941
|
// src/evaluation/baseline.ts
|
|
17077
16942
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
17078
|
-
"outputText",
|
|
17079
16943
|
"requests",
|
|
17080
16944
|
"trace",
|
|
17081
16945
|
"workspacePath",
|
|
@@ -17092,7 +16956,7 @@ var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
|
17092
16956
|
"startTime",
|
|
17093
16957
|
"endTime"
|
|
17094
16958
|
]);
|
|
17095
|
-
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "
|
|
16959
|
+
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "input"]);
|
|
17096
16960
|
function trimEvaluatorResult(result) {
|
|
17097
16961
|
const trimmed = {};
|
|
17098
16962
|
for (const [key, value] of Object.entries(result)) {
|
|
@@ -17249,7 +17113,11 @@ var OtelTraceExporter = class {
|
|
|
17249
17113
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
17250
17114
|
if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
|
|
17251
17115
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
17252
|
-
if (captureContent
|
|
17116
|
+
if (captureContent && result.output.length > 0) {
|
|
17117
|
+
const lastMsg = result.output[result.output.length - 1];
|
|
17118
|
+
const text = typeof lastMsg.content === "string" ? lastMsg.content : JSON.stringify(lastMsg.content);
|
|
17119
|
+
rootSpan.setAttribute("agentv.output_text", text);
|
|
17120
|
+
}
|
|
17253
17121
|
if (result.durationMs != null)
|
|
17254
17122
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
17255
17123
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|