@agentv/core 3.6.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-2IZOTQ25.js → chunk-3ZS3GCMI.js} +143 -3
- package/dist/chunk-3ZS3GCMI.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +227 -39
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +84 -5
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +23 -22
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -9
- package/dist/index.d.ts +6 -9
- package/dist/index.js +429 -562
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-2IZOTQ25.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -2,21 +2,24 @@ import {
|
|
|
2
2
|
TEST_MESSAGE_ROLES,
|
|
3
3
|
buildDirectoryChain,
|
|
4
4
|
buildSearchRoots,
|
|
5
|
+
expandFileReferences,
|
|
5
6
|
extractLastAssistantContent,
|
|
6
7
|
fileExists,
|
|
7
8
|
findGitRoot,
|
|
9
|
+
interpolateEnv,
|
|
8
10
|
isAgentProvider,
|
|
9
11
|
isEvaluatorKind,
|
|
10
12
|
isJsonObject,
|
|
11
13
|
isJsonValue,
|
|
12
14
|
isTestMessage,
|
|
13
15
|
isTestMessageRole,
|
|
16
|
+
loadCasesFromFile,
|
|
14
17
|
normalizeLineEndings,
|
|
15
18
|
readJsonFile,
|
|
16
19
|
readTextFile,
|
|
17
20
|
resolveFileReference,
|
|
18
21
|
resolveTargetDefinition
|
|
19
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-3ZS3GCMI.js";
|
|
20
23
|
import {
|
|
21
24
|
AgentvProvider
|
|
22
25
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -83,12 +86,10 @@ function computeTraceSummary(messages) {
|
|
|
83
86
|
}
|
|
84
87
|
}
|
|
85
88
|
}
|
|
86
|
-
const toolNames = Object.keys(toolCallCounts).sort();
|
|
87
89
|
return {
|
|
88
90
|
trace: {
|
|
89
91
|
eventCount: totalToolCalls,
|
|
90
|
-
|
|
91
|
-
toolCallsByName: toolCallCounts,
|
|
92
|
+
toolCalls: toolCallCounts,
|
|
92
93
|
errorCount: 0,
|
|
93
94
|
llmCallCount,
|
|
94
95
|
...hasAnyDuration ? { toolDurations } : {}
|
|
@@ -112,7 +113,7 @@ var DEFAULT_EXPLORATION_TOOLS = [
|
|
|
112
113
|
function explorationRatio(summary, explorationTools = DEFAULT_EXPLORATION_TOOLS) {
|
|
113
114
|
if (summary.eventCount === 0) return void 0;
|
|
114
115
|
const explorationCalls = explorationTools.reduce(
|
|
115
|
-
(sum, tool2) => sum + (summary.
|
|
116
|
+
(sum, tool2) => sum + (summary.toolCalls[tool2] ?? 0),
|
|
116
117
|
0
|
|
117
118
|
);
|
|
118
119
|
return explorationCalls / summary.eventCount;
|
|
@@ -148,30 +149,11 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
148
149
|
}
|
|
149
150
|
|
|
150
151
|
// src/evaluation/yaml-parser.ts
|
|
151
|
-
import { readFile as
|
|
152
|
-
import
|
|
152
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
153
|
+
import path8 from "node:path";
|
|
153
154
|
import micromatch3 from "micromatch";
|
|
154
155
|
import { parse as parse2 } from "yaml";
|
|
155
156
|
|
|
156
|
-
// src/evaluation/interpolation.ts
|
|
157
|
-
var ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
158
|
-
function interpolateEnv(value, env) {
|
|
159
|
-
if (typeof value === "string") {
|
|
160
|
-
return value.replace(ENV_VAR_PATTERN, (_, varName) => env[varName] ?? "");
|
|
161
|
-
}
|
|
162
|
-
if (Array.isArray(value)) {
|
|
163
|
-
return value.map((item) => interpolateEnv(item, env));
|
|
164
|
-
}
|
|
165
|
-
if (value !== null && typeof value === "object") {
|
|
166
|
-
const result = {};
|
|
167
|
-
for (const [key, val] of Object.entries(value)) {
|
|
168
|
-
result[key] = interpolateEnv(val, env);
|
|
169
|
-
}
|
|
170
|
-
return result;
|
|
171
|
-
}
|
|
172
|
-
return value;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
157
|
// src/evaluation/loaders/agent-skills-parser.ts
|
|
176
158
|
import { readFile } from "node:fs/promises";
|
|
177
159
|
import path from "node:path";
|
|
@@ -254,134 +236,16 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
254
236
|
return tests;
|
|
255
237
|
}
|
|
256
238
|
|
|
257
|
-
// src/evaluation/loaders/case-file-loader.ts
|
|
258
|
-
import { readFile as readFile2 } from "node:fs/promises";
|
|
259
|
-
import path2 from "node:path";
|
|
260
|
-
import fg from "fast-glob";
|
|
261
|
-
import { parse as parseYaml } from "yaml";
|
|
262
|
-
var ANSI_YELLOW = "\x1B[33m";
|
|
263
|
-
var ANSI_RESET2 = "\x1B[0m";
|
|
264
|
-
var FILE_PROTOCOL = "file://";
|
|
265
|
-
function isFileReference(value) {
|
|
266
|
-
return typeof value === "string" && value.startsWith(FILE_PROTOCOL);
|
|
267
|
-
}
|
|
268
|
-
function extractFilePath(ref) {
|
|
269
|
-
return ref.slice(FILE_PROTOCOL.length);
|
|
270
|
-
}
|
|
271
|
-
function isGlobPattern(filePath) {
|
|
272
|
-
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
273
|
-
}
|
|
274
|
-
function parseYamlCases(content, filePath) {
|
|
275
|
-
const raw = parseYaml(content);
|
|
276
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
277
|
-
if (!Array.isArray(parsed)) {
|
|
278
|
-
throw new Error(
|
|
279
|
-
`External test file must contain a YAML array, got ${typeof parsed}: ${filePath}`
|
|
280
|
-
);
|
|
281
|
-
}
|
|
282
|
-
const results = [];
|
|
283
|
-
for (const item of parsed) {
|
|
284
|
-
if (!isJsonObject(item)) {
|
|
285
|
-
throw new Error(`External test file contains non-object entry: ${filePath}`);
|
|
286
|
-
}
|
|
287
|
-
results.push(item);
|
|
288
|
-
}
|
|
289
|
-
return results;
|
|
290
|
-
}
|
|
291
|
-
function parseJsonlCases(content, filePath) {
|
|
292
|
-
const lines = content.split("\n");
|
|
293
|
-
const results = [];
|
|
294
|
-
for (let i = 0; i < lines.length; i++) {
|
|
295
|
-
const line = lines[i].trim();
|
|
296
|
-
if (line === "") continue;
|
|
297
|
-
try {
|
|
298
|
-
const raw = JSON.parse(line);
|
|
299
|
-
const parsed = interpolateEnv(raw, process.env);
|
|
300
|
-
if (!isJsonObject(parsed)) {
|
|
301
|
-
throw new Error("Expected JSON object");
|
|
302
|
-
}
|
|
303
|
-
results.push(parsed);
|
|
304
|
-
} catch (error) {
|
|
305
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
306
|
-
throw new Error(`Malformed JSONL at line ${i + 1}: ${message}
|
|
307
|
-
File: ${filePath}`);
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
return results;
|
|
311
|
-
}
|
|
312
|
-
async function loadCasesFromFile(filePath) {
|
|
313
|
-
const ext = path2.extname(filePath).toLowerCase();
|
|
314
|
-
let content;
|
|
315
|
-
try {
|
|
316
|
-
content = await readFile2(filePath, "utf8");
|
|
317
|
-
} catch (error) {
|
|
318
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
319
|
-
throw new Error(`Cannot read external test file: ${filePath}
|
|
320
|
-
${message}`);
|
|
321
|
-
}
|
|
322
|
-
if (content.trim() === "") {
|
|
323
|
-
console.warn(
|
|
324
|
-
`${ANSI_YELLOW}Warning: External test file is empty, skipping: ${filePath}${ANSI_RESET2}`
|
|
325
|
-
);
|
|
326
|
-
return [];
|
|
327
|
-
}
|
|
328
|
-
if (ext === ".yaml" || ext === ".yml") {
|
|
329
|
-
return parseYamlCases(content, filePath);
|
|
330
|
-
}
|
|
331
|
-
if (ext === ".jsonl") {
|
|
332
|
-
return parseJsonlCases(content, filePath);
|
|
333
|
-
}
|
|
334
|
-
throw new Error(
|
|
335
|
-
`Unsupported external test file format '${ext}': ${filePath}. Supported: .yaml, .yml, .jsonl`
|
|
336
|
-
);
|
|
337
|
-
}
|
|
338
|
-
async function resolveFileReference2(ref, evalFileDir) {
|
|
339
|
-
const rawPath = extractFilePath(ref);
|
|
340
|
-
const absolutePattern = path2.resolve(evalFileDir, rawPath);
|
|
341
|
-
if (isGlobPattern(rawPath)) {
|
|
342
|
-
const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
|
|
343
|
-
onlyFiles: true,
|
|
344
|
-
absolute: true
|
|
345
|
-
});
|
|
346
|
-
if (matches.length === 0) {
|
|
347
|
-
console.warn(
|
|
348
|
-
`${ANSI_YELLOW}Warning: Glob pattern matched no files: ${ref} (resolved to ${absolutePattern})${ANSI_RESET2}`
|
|
349
|
-
);
|
|
350
|
-
return [];
|
|
351
|
-
}
|
|
352
|
-
matches.sort();
|
|
353
|
-
const allCases = [];
|
|
354
|
-
for (const match of matches) {
|
|
355
|
-
const cases = await loadCasesFromFile(match);
|
|
356
|
-
allCases.push(...cases);
|
|
357
|
-
}
|
|
358
|
-
return allCases;
|
|
359
|
-
}
|
|
360
|
-
return loadCasesFromFile(absolutePattern);
|
|
361
|
-
}
|
|
362
|
-
async function expandFileReferences(tests, evalFileDir) {
|
|
363
|
-
const expanded = [];
|
|
364
|
-
for (const entry of tests) {
|
|
365
|
-
if (isFileReference(entry)) {
|
|
366
|
-
const cases = await resolveFileReference2(entry, evalFileDir);
|
|
367
|
-
expanded.push(...cases);
|
|
368
|
-
} else {
|
|
369
|
-
expanded.push(entry);
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
return expanded;
|
|
373
|
-
}
|
|
374
|
-
|
|
375
239
|
// src/evaluation/loaders/config-loader.ts
|
|
376
|
-
import { readFile as
|
|
377
|
-
import
|
|
240
|
+
import { readFile as readFile2 } from "node:fs/promises";
|
|
241
|
+
import path3 from "node:path";
|
|
378
242
|
import micromatch from "micromatch";
|
|
379
243
|
import { parse } from "yaml";
|
|
380
244
|
|
|
381
245
|
// src/evaluation/loaders/file-resolver.ts
|
|
382
246
|
import { constants } from "node:fs";
|
|
383
247
|
import { access } from "node:fs/promises";
|
|
384
|
-
import
|
|
248
|
+
import path2 from "node:path";
|
|
385
249
|
import { fileURLToPath } from "node:url";
|
|
386
250
|
async function fileExists2(absolutePath) {
|
|
387
251
|
try {
|
|
@@ -399,15 +263,15 @@ function resolveToAbsolutePath(candidate) {
|
|
|
399
263
|
if (candidate.startsWith("file:")) {
|
|
400
264
|
return fileURLToPath(candidate);
|
|
401
265
|
}
|
|
402
|
-
return
|
|
266
|
+
return path2.resolve(candidate);
|
|
403
267
|
}
|
|
404
268
|
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
405
269
|
}
|
|
406
270
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
407
271
|
const directories = [];
|
|
408
272
|
const seen = /* @__PURE__ */ new Set();
|
|
409
|
-
const boundary =
|
|
410
|
-
let current =
|
|
273
|
+
const boundary = path2.resolve(repoRoot);
|
|
274
|
+
let current = path2.resolve(path2.dirname(filePath));
|
|
411
275
|
while (current !== void 0) {
|
|
412
276
|
if (!seen.has(current)) {
|
|
413
277
|
directories.push(current);
|
|
@@ -416,7 +280,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
416
280
|
if (current === boundary) {
|
|
417
281
|
break;
|
|
418
282
|
}
|
|
419
|
-
const parent =
|
|
283
|
+
const parent = path2.dirname(current);
|
|
420
284
|
if (parent === current) {
|
|
421
285
|
break;
|
|
422
286
|
}
|
|
@@ -430,16 +294,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
430
294
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
431
295
|
const uniqueRoots = [];
|
|
432
296
|
const addRoot = (root) => {
|
|
433
|
-
const normalized =
|
|
297
|
+
const normalized = path2.resolve(root);
|
|
434
298
|
if (!uniqueRoots.includes(normalized)) {
|
|
435
299
|
uniqueRoots.push(normalized);
|
|
436
300
|
}
|
|
437
301
|
};
|
|
438
|
-
let currentDir =
|
|
302
|
+
let currentDir = path2.dirname(evalPath);
|
|
439
303
|
let reachedBoundary = false;
|
|
440
304
|
while (!reachedBoundary) {
|
|
441
305
|
addRoot(currentDir);
|
|
442
|
-
const parentDir =
|
|
306
|
+
const parentDir = path2.dirname(currentDir);
|
|
443
307
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
444
308
|
reachedBoundary = true;
|
|
445
309
|
} else {
|
|
@@ -454,19 +318,19 @@ function trimLeadingSeparators(value) {
|
|
|
454
318
|
const trimmed = value.replace(/^[/\\]+/, "");
|
|
455
319
|
return trimmed.length > 0 ? trimmed : value;
|
|
456
320
|
}
|
|
457
|
-
async function
|
|
321
|
+
async function resolveFileReference2(rawValue, searchRoots) {
|
|
458
322
|
const displayPath = trimLeadingSeparators(rawValue);
|
|
459
323
|
const potentialPaths = [];
|
|
460
|
-
if (
|
|
461
|
-
potentialPaths.push(
|
|
324
|
+
if (path2.isAbsolute(rawValue)) {
|
|
325
|
+
potentialPaths.push(path2.normalize(rawValue));
|
|
462
326
|
}
|
|
463
327
|
for (const base of searchRoots) {
|
|
464
|
-
potentialPaths.push(
|
|
328
|
+
potentialPaths.push(path2.resolve(base, displayPath));
|
|
465
329
|
}
|
|
466
330
|
const attempted = [];
|
|
467
331
|
const seen = /* @__PURE__ */ new Set();
|
|
468
332
|
for (const candidate of potentialPaths) {
|
|
469
|
-
const absoluteCandidate =
|
|
333
|
+
const absoluteCandidate = path2.resolve(candidate);
|
|
470
334
|
if (seen.has(absoluteCandidate)) {
|
|
471
335
|
continue;
|
|
472
336
|
}
|
|
@@ -480,8 +344,8 @@ async function resolveFileReference3(rawValue, searchRoots) {
|
|
|
480
344
|
}
|
|
481
345
|
|
|
482
346
|
// src/evaluation/loaders/config-loader.ts
|
|
483
|
-
var
|
|
484
|
-
var
|
|
347
|
+
var ANSI_YELLOW = "\x1B[33m";
|
|
348
|
+
var ANSI_RESET2 = "\x1B[0m";
|
|
485
349
|
var DEFAULT_EVAL_PATTERNS = [
|
|
486
350
|
"**/evals/**/*.eval.yaml",
|
|
487
351
|
"**/evals/**/eval.yaml"
|
|
@@ -489,12 +353,12 @@ var DEFAULT_EVAL_PATTERNS = [
|
|
|
489
353
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
490
354
|
const directories = buildDirectoryChain2(evalFilePath, repoRoot);
|
|
491
355
|
for (const directory of directories) {
|
|
492
|
-
const configPath =
|
|
356
|
+
const configPath = path3.join(directory, ".agentv", "config.yaml");
|
|
493
357
|
if (!await fileExists2(configPath)) {
|
|
494
358
|
continue;
|
|
495
359
|
}
|
|
496
360
|
try {
|
|
497
|
-
const rawConfig = await
|
|
361
|
+
const rawConfig = await readFile2(configPath, "utf8");
|
|
498
362
|
const parsed = parse(rawConfig);
|
|
499
363
|
if (!isJsonObject(parsed)) {
|
|
500
364
|
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
@@ -731,14 +595,14 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
731
595
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
732
596
|
}
|
|
733
597
|
function logWarning(message) {
|
|
734
|
-
console.warn(`${
|
|
598
|
+
console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET2}`);
|
|
735
599
|
}
|
|
736
600
|
|
|
737
601
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
738
|
-
import
|
|
602
|
+
import path4 from "node:path";
|
|
739
603
|
|
|
740
604
|
// src/evaluation/validation/prompt-validator.ts
|
|
741
|
-
import { readFile as
|
|
605
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
742
606
|
|
|
743
607
|
// src/evaluation/template-variables.ts
|
|
744
608
|
var TEMPLATE_VARIABLES = {
|
|
@@ -758,10 +622,10 @@ var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
|
758
622
|
]);
|
|
759
623
|
|
|
760
624
|
// src/evaluation/validation/prompt-validator.ts
|
|
761
|
-
var
|
|
762
|
-
var
|
|
625
|
+
var ANSI_YELLOW2 = "\x1B[33m";
|
|
626
|
+
var ANSI_RESET3 = "\x1B[0m";
|
|
763
627
|
async function validateCustomPromptContent(promptPath) {
|
|
764
|
-
const content = await
|
|
628
|
+
const content = await readFile3(promptPath, "utf8");
|
|
765
629
|
validateTemplateVariables(content, promptPath);
|
|
766
630
|
}
|
|
767
631
|
function validateTemplateVariables(content, source) {
|
|
@@ -788,16 +652,16 @@ function validateTemplateVariables(content, source) {
|
|
|
788
652
|
);
|
|
789
653
|
}
|
|
790
654
|
if (invalidVariables.length > 0) {
|
|
791
|
-
const warningMessage = `${
|
|
655
|
+
const warningMessage = `${ANSI_YELLOW2}Warning: Custom evaluator template at ${source}
|
|
792
656
|
Contains invalid variables: ${invalidVariables.map((v) => `{{ ${v} }}`).join(", ")}
|
|
793
|
-
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${
|
|
657
|
+
Valid variables: ${Array.from(VALID_TEMPLATE_VARIABLES).map((v) => `{{ ${v} }}`).join(", ")}${ANSI_RESET3}`;
|
|
794
658
|
console.warn(warningMessage);
|
|
795
659
|
}
|
|
796
660
|
}
|
|
797
661
|
|
|
798
662
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
799
|
-
var
|
|
800
|
-
var
|
|
663
|
+
var ANSI_YELLOW3 = "\x1B[33m";
|
|
664
|
+
var ANSI_RESET4 = "\x1B[0m";
|
|
801
665
|
function normalizeEvaluatorType(type) {
|
|
802
666
|
return type.replace(/_/g, "-");
|
|
803
667
|
}
|
|
@@ -899,7 +763,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
899
763
|
let command;
|
|
900
764
|
if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
|
|
901
765
|
console.warn(
|
|
902
|
-
`${
|
|
766
|
+
`${ANSI_YELLOW3}Warning: 'script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'command' instead.${ANSI_RESET4}`
|
|
903
767
|
);
|
|
904
768
|
}
|
|
905
769
|
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
@@ -925,9 +789,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
925
789
|
const cwd = asString(rawEvaluator.cwd);
|
|
926
790
|
let resolvedCwd;
|
|
927
791
|
if (cwd) {
|
|
928
|
-
const resolved = await
|
|
792
|
+
const resolved = await resolveFileReference2(cwd, searchRoots);
|
|
929
793
|
if (resolved.resolvedPath) {
|
|
930
|
-
resolvedCwd =
|
|
794
|
+
resolvedCwd = path4.resolve(resolved.resolvedPath);
|
|
931
795
|
} else {
|
|
932
796
|
logWarning2(
|
|
933
797
|
`Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -1083,9 +947,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1083
947
|
const aggregatorPrompt = asString(rawAggregator.prompt);
|
|
1084
948
|
let promptPath2;
|
|
1085
949
|
if (aggregatorPrompt) {
|
|
1086
|
-
const resolved = await
|
|
950
|
+
const resolved = await resolveFileReference2(aggregatorPrompt, searchRoots);
|
|
1087
951
|
if (resolved.resolvedPath) {
|
|
1088
|
-
promptPath2 =
|
|
952
|
+
promptPath2 = path4.resolve(resolved.resolvedPath);
|
|
1089
953
|
}
|
|
1090
954
|
}
|
|
1091
955
|
aggregator = {
|
|
@@ -1642,7 +1506,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1642
1506
|
if (isJsonObject2(rawPrompt)) {
|
|
1643
1507
|
if (rawPrompt.script !== void 0 && rawPrompt.command === void 0) {
|
|
1644
1508
|
console.warn(
|
|
1645
|
-
`${
|
|
1509
|
+
`${ANSI_YELLOW3}Warning: 'prompt.script' is deprecated in evaluator '${name}' in '${evalId}'. Use 'prompt.command' instead.${ANSI_RESET4}`
|
|
1646
1510
|
);
|
|
1647
1511
|
}
|
|
1648
1512
|
const commandArray = asStringArray(
|
|
@@ -1653,9 +1517,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1653
1517
|
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
|
|
1654
1518
|
}
|
|
1655
1519
|
const commandPath = commandArray[commandArray.length - 1];
|
|
1656
|
-
const resolved = await
|
|
1520
|
+
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
1657
1521
|
if (resolved.resolvedPath) {
|
|
1658
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
1522
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
|
|
1659
1523
|
} else {
|
|
1660
1524
|
throw new Error(
|
|
1661
1525
|
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -1666,9 +1530,9 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1666
1530
|
}
|
|
1667
1531
|
} else if (typeof rawPrompt === "string") {
|
|
1668
1532
|
prompt = rawPrompt;
|
|
1669
|
-
const resolved = await
|
|
1533
|
+
const resolved = await resolveFileReference2(prompt, searchRoots);
|
|
1670
1534
|
if (resolved.resolvedPath) {
|
|
1671
|
-
promptPath =
|
|
1535
|
+
promptPath = path4.resolve(resolved.resolvedPath);
|
|
1672
1536
|
try {
|
|
1673
1537
|
await validateCustomPromptContent(promptPath);
|
|
1674
1538
|
} catch (error) {
|
|
@@ -1868,10 +1732,10 @@ function warnUnconsumedCriteria(_criteria, _evaluators, _testId) {
|
|
|
1868
1732
|
function logWarning2(message, details) {
|
|
1869
1733
|
if (details && details.length > 0) {
|
|
1870
1734
|
const detailBlock = details.join("\n");
|
|
1871
|
-
console.warn(`${
|
|
1872
|
-
${detailBlock}${
|
|
1735
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}
|
|
1736
|
+
${detailBlock}${ANSI_RESET4}`);
|
|
1873
1737
|
} else {
|
|
1874
|
-
console.warn(`${
|
|
1738
|
+
console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
|
|
1875
1739
|
}
|
|
1876
1740
|
}
|
|
1877
1741
|
function parseRequired(value) {
|
|
@@ -2120,14 +1984,14 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
2120
1984
|
}
|
|
2121
1985
|
|
|
2122
1986
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2123
|
-
import { readFile as
|
|
2124
|
-
import
|
|
1987
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
1988
|
+
import path6 from "node:path";
|
|
2125
1989
|
import micromatch2 from "micromatch";
|
|
2126
|
-
import { parse as
|
|
1990
|
+
import { parse as parseYaml } from "yaml";
|
|
2127
1991
|
|
|
2128
1992
|
// src/evaluation/loaders/message-processor.ts
|
|
2129
|
-
import { readFile as
|
|
2130
|
-
import
|
|
1993
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
1994
|
+
import path5 from "node:path";
|
|
2131
1995
|
|
|
2132
1996
|
// src/evaluation/formatting/segment-formatter.ts
|
|
2133
1997
|
function formatFileContents(parts) {
|
|
@@ -2190,8 +2054,8 @@ function asString2(value) {
|
|
|
2190
2054
|
}
|
|
2191
2055
|
|
|
2192
2056
|
// src/evaluation/loaders/message-processor.ts
|
|
2193
|
-
var
|
|
2194
|
-
var
|
|
2057
|
+
var ANSI_YELLOW4 = "\x1B[33m";
|
|
2058
|
+
var ANSI_RESET5 = "\x1B[0m";
|
|
2195
2059
|
async function processMessages(options) {
|
|
2196
2060
|
const {
|
|
2197
2061
|
messages,
|
|
@@ -2235,7 +2099,7 @@ async function processMessages(options) {
|
|
|
2235
2099
|
if (!rawValue) {
|
|
2236
2100
|
continue;
|
|
2237
2101
|
}
|
|
2238
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
2102
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2239
2103
|
rawValue,
|
|
2240
2104
|
searchRoots
|
|
2241
2105
|
);
|
|
@@ -2246,7 +2110,7 @@ async function processMessages(options) {
|
|
|
2246
2110
|
continue;
|
|
2247
2111
|
}
|
|
2248
2112
|
try {
|
|
2249
|
-
const fileContent = (await
|
|
2113
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2250
2114
|
const classifyAsGuideline = shouldTreatAsGuideline({
|
|
2251
2115
|
messageType,
|
|
2252
2116
|
resolvedPath,
|
|
@@ -2255,7 +2119,7 @@ async function processMessages(options) {
|
|
|
2255
2119
|
treatFileSegmentsAsGuidelines
|
|
2256
2120
|
});
|
|
2257
2121
|
if (classifyAsGuideline && guidelinePaths) {
|
|
2258
|
-
guidelinePaths.push(
|
|
2122
|
+
guidelinePaths.push(path5.resolve(resolvedPath));
|
|
2259
2123
|
if (verbose) {
|
|
2260
2124
|
console.log(` [Guideline] Found: ${displayPath}`);
|
|
2261
2125
|
console.log(` Resolved to: ${resolvedPath}`);
|
|
@@ -2266,7 +2130,7 @@ async function processMessages(options) {
|
|
|
2266
2130
|
type: "file",
|
|
2267
2131
|
path: displayPath,
|
|
2268
2132
|
text: fileContent,
|
|
2269
|
-
resolvedPath:
|
|
2133
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
2270
2134
|
});
|
|
2271
2135
|
if (verbose) {
|
|
2272
2136
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -2306,7 +2170,7 @@ function shouldTreatAsGuideline(options) {
|
|
|
2306
2170
|
if (!guidelinePatterns || guidelinePatterns.length === 0) {
|
|
2307
2171
|
return false;
|
|
2308
2172
|
}
|
|
2309
|
-
const relativeToRepo =
|
|
2173
|
+
const relativeToRepo = path5.relative(repoRootPath, resolvedPath);
|
|
2310
2174
|
return isGuidelineFile(relativeToRepo, guidelinePatterns);
|
|
2311
2175
|
}
|
|
2312
2176
|
function asString3(value) {
|
|
@@ -2334,10 +2198,10 @@ function cloneJsonValue(value) {
|
|
|
2334
2198
|
function logWarning3(message, details) {
|
|
2335
2199
|
if (details && details.length > 0) {
|
|
2336
2200
|
const detailBlock = details.join("\n");
|
|
2337
|
-
console.warn(`${
|
|
2338
|
-
${detailBlock}${
|
|
2201
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}
|
|
2202
|
+
${detailBlock}${ANSI_RESET5}`);
|
|
2339
2203
|
} else {
|
|
2340
|
-
console.warn(`${
|
|
2204
|
+
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
|
|
2341
2205
|
}
|
|
2342
2206
|
}
|
|
2343
2207
|
async function processExpectedMessages(options) {
|
|
@@ -2366,7 +2230,7 @@ async function processExpectedMessages(options) {
|
|
|
2366
2230
|
if (!rawValue) {
|
|
2367
2231
|
continue;
|
|
2368
2232
|
}
|
|
2369
|
-
const { displayPath, resolvedPath, attempted } = await
|
|
2233
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
2370
2234
|
rawValue,
|
|
2371
2235
|
searchRoots
|
|
2372
2236
|
);
|
|
@@ -2376,12 +2240,12 @@ async function processExpectedMessages(options) {
|
|
|
2376
2240
|
continue;
|
|
2377
2241
|
}
|
|
2378
2242
|
try {
|
|
2379
|
-
const fileContent = (await
|
|
2243
|
+
const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2380
2244
|
processedContent.push({
|
|
2381
2245
|
type: "file",
|
|
2382
2246
|
path: displayPath,
|
|
2383
2247
|
text: fileContent,
|
|
2384
|
-
resolvedPath:
|
|
2248
|
+
resolvedPath: path5.resolve(resolvedPath)
|
|
2385
2249
|
});
|
|
2386
2250
|
if (verbose) {
|
|
2387
2251
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -2478,11 +2342,11 @@ function resolveExpectedMessages(raw) {
|
|
|
2478
2342
|
}
|
|
2479
2343
|
|
|
2480
2344
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2481
|
-
var
|
|
2345
|
+
var ANSI_YELLOW5 = "\x1B[33m";
|
|
2482
2346
|
var ANSI_RED2 = "\x1B[31m";
|
|
2483
|
-
var
|
|
2347
|
+
var ANSI_RESET6 = "\x1B[0m";
|
|
2484
2348
|
function detectFormat(filePath) {
|
|
2485
|
-
const ext =
|
|
2349
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
2486
2350
|
if (ext === ".jsonl") return "jsonl";
|
|
2487
2351
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
2488
2352
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -2491,9 +2355,9 @@ function detectFormat(filePath) {
|
|
|
2491
2355
|
);
|
|
2492
2356
|
}
|
|
2493
2357
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
2494
|
-
const dir =
|
|
2495
|
-
const base =
|
|
2496
|
-
const sidecarPath =
|
|
2358
|
+
const dir = path6.dirname(jsonlPath);
|
|
2359
|
+
const base = path6.basename(jsonlPath, ".jsonl");
|
|
2360
|
+
const sidecarPath = path6.join(dir, `${base}.yaml`);
|
|
2497
2361
|
if (!await fileExists2(sidecarPath)) {
|
|
2498
2362
|
if (verbose) {
|
|
2499
2363
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -2501,8 +2365,8 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
2501
2365
|
return {};
|
|
2502
2366
|
}
|
|
2503
2367
|
try {
|
|
2504
|
-
const content = await
|
|
2505
|
-
const parsed = interpolateEnv(
|
|
2368
|
+
const content = await readFile5(sidecarPath, "utf8");
|
|
2369
|
+
const parsed = interpolateEnv(parseYaml(content), process.env);
|
|
2506
2370
|
if (!isJsonObject(parsed)) {
|
|
2507
2371
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
2508
2372
|
return {};
|
|
@@ -2542,15 +2406,15 @@ function parseJsonlContent(content, filePath) {
|
|
|
2542
2406
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
2543
2407
|
const verbose = options?.verbose ?? false;
|
|
2544
2408
|
const filterPattern = options?.filter;
|
|
2545
|
-
const absoluteTestPath =
|
|
2409
|
+
const absoluteTestPath = path6.resolve(evalFilePath);
|
|
2546
2410
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2547
2411
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2548
2412
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
2549
2413
|
const guidelinePatterns = config?.guideline_patterns;
|
|
2550
2414
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
2551
|
-
const rawFile = await
|
|
2415
|
+
const rawFile = await readFile5(absoluteTestPath, "utf8");
|
|
2552
2416
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
2553
|
-
const fallbackDataset =
|
|
2417
|
+
const fallbackDataset = path6.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
2554
2418
|
const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
|
|
2555
2419
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
2556
2420
|
const globalExecution = sidecar.execution;
|
|
@@ -2649,7 +2513,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2649
2513
|
}
|
|
2650
2514
|
}
|
|
2651
2515
|
const allFilePaths = [
|
|
2652
|
-
...guidelinePaths.map((guidelinePath) =>
|
|
2516
|
+
...guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
|
|
2653
2517
|
...userFilePaths
|
|
2654
2518
|
];
|
|
2655
2519
|
const testCase = {
|
|
@@ -2661,7 +2525,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
|
2661
2525
|
input_segments: inputSegments,
|
|
2662
2526
|
expected_output: outputSegments,
|
|
2663
2527
|
reference_answer: referenceAnswer,
|
|
2664
|
-
guideline_paths: guidelinePaths.map((guidelinePath) =>
|
|
2528
|
+
guideline_paths: guidelinePaths.map((guidelinePath) => path6.resolve(guidelinePath)),
|
|
2665
2529
|
guideline_patterns: guidelinePatterns,
|
|
2666
2530
|
file_paths: allFilePaths,
|
|
2667
2531
|
criteria: outcome ?? "",
|
|
@@ -2692,19 +2556,19 @@ function asString4(value) {
|
|
|
2692
2556
|
function logWarning4(message, details) {
|
|
2693
2557
|
if (details && details.length > 0) {
|
|
2694
2558
|
const detailBlock = details.join("\n");
|
|
2695
|
-
console.warn(`${
|
|
2696
|
-
${detailBlock}${
|
|
2559
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}
|
|
2560
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
2697
2561
|
} else {
|
|
2698
|
-
console.warn(`${
|
|
2562
|
+
console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET6}`);
|
|
2699
2563
|
}
|
|
2700
2564
|
}
|
|
2701
2565
|
function logError2(message, details) {
|
|
2702
2566
|
if (details && details.length > 0) {
|
|
2703
2567
|
const detailBlock = details.join("\n");
|
|
2704
2568
|
console.error(`${ANSI_RED2}Error: ${message}
|
|
2705
|
-
${detailBlock}${
|
|
2569
|
+
${detailBlock}${ANSI_RESET6}`);
|
|
2706
2570
|
} else {
|
|
2707
|
-
console.error(`${ANSI_RED2}Error: ${message}${
|
|
2571
|
+
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET6}`);
|
|
2708
2572
|
}
|
|
2709
2573
|
}
|
|
2710
2574
|
|
|
@@ -2739,24 +2603,24 @@ function parseMetadata(suite) {
|
|
|
2739
2603
|
}
|
|
2740
2604
|
|
|
2741
2605
|
// src/evaluation/formatting/prompt-builder.ts
|
|
2742
|
-
import { readFile as
|
|
2743
|
-
import
|
|
2744
|
-
var
|
|
2745
|
-
var
|
|
2606
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
2607
|
+
import path7 from "node:path";
|
|
2608
|
+
var ANSI_YELLOW6 = "\x1B[33m";
|
|
2609
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
2746
2610
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
2747
2611
|
const guidelineParts = [];
|
|
2748
2612
|
for (const rawPath of testCase.guideline_paths) {
|
|
2749
|
-
const absolutePath =
|
|
2613
|
+
const absolutePath = path7.resolve(rawPath);
|
|
2750
2614
|
if (!await fileExists2(absolutePath)) {
|
|
2751
2615
|
logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
|
|
2752
2616
|
continue;
|
|
2753
2617
|
}
|
|
2754
2618
|
try {
|
|
2755
|
-
const content = (await
|
|
2619
|
+
const content = (await readFile6(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
|
|
2756
2620
|
guidelineParts.push({
|
|
2757
2621
|
content,
|
|
2758
2622
|
isFile: true,
|
|
2759
|
-
displayPath:
|
|
2623
|
+
displayPath: path7.basename(absolutePath)
|
|
2760
2624
|
});
|
|
2761
2625
|
} catch (error) {
|
|
2762
2626
|
logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
|
|
@@ -2954,13 +2818,13 @@ function asString5(value) {
|
|
|
2954
2818
|
return typeof value === "string" ? value : void 0;
|
|
2955
2819
|
}
|
|
2956
2820
|
function logWarning5(message) {
|
|
2957
|
-
console.warn(`${
|
|
2821
|
+
console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET7}`);
|
|
2958
2822
|
}
|
|
2959
2823
|
|
|
2960
2824
|
// src/evaluation/yaml-parser.ts
|
|
2961
|
-
var
|
|
2825
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
2962
2826
|
var ANSI_RED3 = "\x1B[31m";
|
|
2963
|
-
var
|
|
2827
|
+
var ANSI_RESET8 = "\x1B[0m";
|
|
2964
2828
|
function resolveTests(suite) {
|
|
2965
2829
|
if (suite.tests !== void 0) return suite.tests;
|
|
2966
2830
|
if (suite.eval_cases !== void 0) {
|
|
@@ -2975,8 +2839,8 @@ function resolveTests(suite) {
|
|
|
2975
2839
|
}
|
|
2976
2840
|
async function readTestSuiteMetadata(testFilePath) {
|
|
2977
2841
|
try {
|
|
2978
|
-
const absolutePath =
|
|
2979
|
-
const content = await
|
|
2842
|
+
const absolutePath = path8.resolve(testFilePath);
|
|
2843
|
+
const content = await readFile7(absolutePath, "utf8");
|
|
2980
2844
|
const parsed = interpolateEnv(parse2(content), process.env);
|
|
2981
2845
|
if (!isJsonObject(parsed)) {
|
|
2982
2846
|
return {};
|
|
@@ -3027,26 +2891,26 @@ var loadEvalCases = loadTests;
|
|
|
3027
2891
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
3028
2892
|
const verbose = options?.verbose ?? false;
|
|
3029
2893
|
const filterPattern = options?.filter;
|
|
3030
|
-
const absoluteTestPath =
|
|
2894
|
+
const absoluteTestPath = path8.resolve(evalFilePath);
|
|
3031
2895
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
3032
2896
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
3033
2897
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
3034
2898
|
const guidelinePatterns = config?.guideline_patterns;
|
|
3035
|
-
const rawFile = await
|
|
2899
|
+
const rawFile = await readFile7(absoluteTestPath, "utf8");
|
|
3036
2900
|
const interpolated = interpolateEnv(parse2(rawFile), process.env);
|
|
3037
2901
|
if (!isJsonObject(interpolated)) {
|
|
3038
2902
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
3039
2903
|
}
|
|
3040
2904
|
const suite = interpolated;
|
|
3041
2905
|
const datasetNameFromSuite = asString6(suite.dataset)?.trim();
|
|
3042
|
-
const fallbackDataset =
|
|
2906
|
+
const fallbackDataset = path8.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
|
|
3043
2907
|
const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
|
|
3044
2908
|
const rawTestcases = resolveTests(suite);
|
|
3045
2909
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
3046
|
-
const evalFileDir =
|
|
2910
|
+
const evalFileDir = path8.dirname(absoluteTestPath);
|
|
3047
2911
|
let expandedTestcases;
|
|
3048
2912
|
if (typeof rawTestcases === "string") {
|
|
3049
|
-
const externalPath =
|
|
2913
|
+
const externalPath = path8.resolve(evalFileDir, rawTestcases);
|
|
3050
2914
|
expandedTestcases = await loadCasesFromFile(externalPath);
|
|
3051
2915
|
} else if (Array.isArray(rawTestcases)) {
|
|
3052
2916
|
expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
|
|
@@ -3167,7 +3031,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3167
3031
|
}
|
|
3168
3032
|
}
|
|
3169
3033
|
const allFilePaths = [
|
|
3170
|
-
...guidelinePaths.map((guidelinePath) =>
|
|
3034
|
+
...guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
3171
3035
|
...userFilePaths
|
|
3172
3036
|
];
|
|
3173
3037
|
const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
|
|
@@ -3183,7 +3047,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3183
3047
|
input_segments: inputSegments,
|
|
3184
3048
|
expected_output: outputSegments,
|
|
3185
3049
|
reference_answer: referenceAnswer,
|
|
3186
|
-
guideline_paths: guidelinePaths.map((guidelinePath) =>
|
|
3050
|
+
guideline_paths: guidelinePaths.map((guidelinePath) => path8.resolve(guidelinePath)),
|
|
3187
3051
|
guideline_patterns: guidelinePatterns,
|
|
3188
3052
|
file_paths: allFilePaths,
|
|
3189
3053
|
criteria: outcome ?? "",
|
|
@@ -3233,8 +3097,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3233
3097
|
if (commandArr.length === 0) return void 0;
|
|
3234
3098
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
3235
3099
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
3236
|
-
if (cwd && !
|
|
3237
|
-
cwd =
|
|
3100
|
+
if (cwd && !path8.isAbsolute(cwd)) {
|
|
3101
|
+
cwd = path8.resolve(evalFileDir, cwd);
|
|
3238
3102
|
}
|
|
3239
3103
|
const config = { command: commandArr };
|
|
3240
3104
|
if (timeoutMs !== void 0) {
|
|
@@ -3324,10 +3188,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
3324
3188
|
}
|
|
3325
3189
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3326
3190
|
if (typeof raw === "string") {
|
|
3327
|
-
const workspaceFilePath =
|
|
3191
|
+
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
3328
3192
|
let content;
|
|
3329
3193
|
try {
|
|
3330
|
-
content = await
|
|
3194
|
+
content = await readFile7(workspaceFilePath, "utf8");
|
|
3331
3195
|
} catch {
|
|
3332
3196
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3333
3197
|
}
|
|
@@ -3337,7 +3201,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
3337
3201
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
3338
3202
|
);
|
|
3339
3203
|
}
|
|
3340
|
-
const workspaceFileDir =
|
|
3204
|
+
const workspaceFileDir = path8.dirname(workspaceFilePath);
|
|
3341
3205
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
3342
3206
|
}
|
|
3343
3207
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -3357,8 +3221,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3357
3221
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
3358
3222
|
}
|
|
3359
3223
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
3360
|
-
if (template && !
|
|
3361
|
-
template =
|
|
3224
|
+
if (template && !path8.isAbsolute(template)) {
|
|
3225
|
+
template = path8.resolve(evalFileDir, template);
|
|
3362
3226
|
}
|
|
3363
3227
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
3364
3228
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -3411,25 +3275,25 @@ function asString6(value) {
|
|
|
3411
3275
|
function logWarning6(message, details) {
|
|
3412
3276
|
if (details && details.length > 0) {
|
|
3413
3277
|
const detailBlock = details.join("\n");
|
|
3414
|
-
console.warn(`${
|
|
3415
|
-
${detailBlock}${
|
|
3278
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}
|
|
3279
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
3416
3280
|
} else {
|
|
3417
|
-
console.warn(`${
|
|
3281
|
+
console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
|
|
3418
3282
|
}
|
|
3419
3283
|
}
|
|
3420
3284
|
function logError3(message, details) {
|
|
3421
3285
|
if (details && details.length > 0) {
|
|
3422
3286
|
const detailBlock = details.join("\n");
|
|
3423
3287
|
console.error(`${ANSI_RED3}Error: ${message}
|
|
3424
|
-
${detailBlock}${
|
|
3288
|
+
${detailBlock}${ANSI_RESET8}`);
|
|
3425
3289
|
} else {
|
|
3426
|
-
console.error(`${ANSI_RED3}Error: ${message}${
|
|
3290
|
+
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET8}`);
|
|
3427
3291
|
}
|
|
3428
3292
|
}
|
|
3429
3293
|
|
|
3430
3294
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
3431
3295
|
import { readFileSync } from "node:fs";
|
|
3432
|
-
import
|
|
3296
|
+
import path9 from "node:path";
|
|
3433
3297
|
import { parse as parse3 } from "yaml";
|
|
3434
3298
|
function codeGraderInstruction(graderName, description) {
|
|
3435
3299
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
@@ -3674,7 +3538,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
3674
3538
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
3675
3539
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
3676
3540
|
const parsed = parse3(content);
|
|
3677
|
-
return transpileEvalYaml(parsed,
|
|
3541
|
+
return transpileEvalYaml(parsed, path9.basename(evalYamlPath));
|
|
3678
3542
|
}
|
|
3679
3543
|
function getOutputFilenames(result) {
|
|
3680
3544
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -4115,7 +3979,7 @@ import { spawn } from "node:child_process";
|
|
|
4115
3979
|
import { randomUUID } from "node:crypto";
|
|
4116
3980
|
import { createWriteStream } from "node:fs";
|
|
4117
3981
|
import { mkdir } from "node:fs/promises";
|
|
4118
|
-
import
|
|
3982
|
+
import path11 from "node:path";
|
|
4119
3983
|
|
|
4120
3984
|
// src/evaluation/providers/claude-log-tracker.ts
|
|
4121
3985
|
var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
|
|
@@ -4171,7 +4035,7 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
4171
4035
|
}
|
|
4172
4036
|
|
|
4173
4037
|
// src/evaluation/providers/preread.ts
|
|
4174
|
-
import
|
|
4038
|
+
import path10 from "node:path";
|
|
4175
4039
|
function buildPromptDocument(request, inputFiles, options) {
|
|
4176
4040
|
const parts = [];
|
|
4177
4041
|
const guidelineFiles = collectGuidelineFiles(
|
|
@@ -4194,7 +4058,7 @@ function normalizeInputFiles(inputFiles) {
|
|
|
4194
4058
|
}
|
|
4195
4059
|
const deduped = /* @__PURE__ */ new Map();
|
|
4196
4060
|
for (const inputFile of inputFiles) {
|
|
4197
|
-
const absolutePath =
|
|
4061
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4198
4062
|
if (!deduped.has(absolutePath)) {
|
|
4199
4063
|
deduped.set(absolutePath, absolutePath);
|
|
4200
4064
|
}
|
|
@@ -4207,14 +4071,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
|
4207
4071
|
}
|
|
4208
4072
|
const unique = /* @__PURE__ */ new Map();
|
|
4209
4073
|
for (const inputFile of inputFiles) {
|
|
4210
|
-
const absolutePath =
|
|
4074
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4211
4075
|
if (overrides?.has(absolutePath)) {
|
|
4212
4076
|
if (!unique.has(absolutePath)) {
|
|
4213
4077
|
unique.set(absolutePath, absolutePath);
|
|
4214
4078
|
}
|
|
4215
4079
|
continue;
|
|
4216
4080
|
}
|
|
4217
|
-
const normalized = absolutePath.split(
|
|
4081
|
+
const normalized = absolutePath.split(path10.sep).join("/");
|
|
4218
4082
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
4219
4083
|
if (!unique.has(absolutePath)) {
|
|
4220
4084
|
unique.set(absolutePath, absolutePath);
|
|
@@ -4229,7 +4093,7 @@ function collectInputFiles(inputFiles) {
|
|
|
4229
4093
|
}
|
|
4230
4094
|
const unique = /* @__PURE__ */ new Map();
|
|
4231
4095
|
for (const inputFile of inputFiles) {
|
|
4232
|
-
const absolutePath =
|
|
4096
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4233
4097
|
if (!unique.has(absolutePath)) {
|
|
4234
4098
|
unique.set(absolutePath, absolutePath);
|
|
4235
4099
|
}
|
|
@@ -4241,7 +4105,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
|
|
|
4241
4105
|
return "";
|
|
4242
4106
|
}
|
|
4243
4107
|
const buildList = (files) => files.map((absolutePath) => {
|
|
4244
|
-
const fileName =
|
|
4108
|
+
const fileName = path10.basename(absolutePath);
|
|
4245
4109
|
const fileUri = pathToFileUri(absolutePath);
|
|
4246
4110
|
return `* [${fileName}](${fileUri})`;
|
|
4247
4111
|
});
|
|
@@ -4261,7 +4125,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
4261
4125
|
return sections.join("\n");
|
|
4262
4126
|
}
|
|
4263
4127
|
function pathToFileUri(filePath) {
|
|
4264
|
-
const absolutePath =
|
|
4128
|
+
const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
|
|
4265
4129
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
4266
4130
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
4267
4131
|
return `file:///${normalizedPath}`;
|
|
@@ -4408,10 +4272,10 @@ var ClaudeCliProvider = class {
|
|
|
4408
4272
|
}
|
|
4409
4273
|
resolveCwd(cwdOverride) {
|
|
4410
4274
|
if (cwdOverride) {
|
|
4411
|
-
return
|
|
4275
|
+
return path11.resolve(cwdOverride);
|
|
4412
4276
|
}
|
|
4413
4277
|
if (this.config.cwd) {
|
|
4414
|
-
return
|
|
4278
|
+
return path11.resolve(this.config.cwd);
|
|
4415
4279
|
}
|
|
4416
4280
|
return void 0;
|
|
4417
4281
|
}
|
|
@@ -4421,9 +4285,9 @@ var ClaudeCliProvider = class {
|
|
|
4421
4285
|
return void 0;
|
|
4422
4286
|
}
|
|
4423
4287
|
if (this.config.logDir) {
|
|
4424
|
-
return
|
|
4288
|
+
return path11.resolve(this.config.logDir);
|
|
4425
4289
|
}
|
|
4426
|
-
return
|
|
4290
|
+
return path11.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
4427
4291
|
}
|
|
4428
4292
|
async createStreamLogger(request) {
|
|
4429
4293
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4437,7 +4301,7 @@ var ClaudeCliProvider = class {
|
|
|
4437
4301
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
4438
4302
|
return void 0;
|
|
4439
4303
|
}
|
|
4440
|
-
const filePath =
|
|
4304
|
+
const filePath = path11.join(logDir, buildLogFilename(request, this.targetName));
|
|
4441
4305
|
try {
|
|
4442
4306
|
const logger = await ClaudeCliStreamLogger.create({
|
|
4443
4307
|
filePath,
|
|
@@ -4758,7 +4622,7 @@ function tryParseJson(line) {
|
|
|
4758
4622
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
4759
4623
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
4760
4624
|
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
4761
|
-
import
|
|
4625
|
+
import path12 from "node:path";
|
|
4762
4626
|
var claudeSdkModule = null;
|
|
4763
4627
|
async function loadClaudeSdk() {
|
|
4764
4628
|
if (!claudeSdkModule) {
|
|
@@ -4918,10 +4782,10 @@ var ClaudeSdkProvider = class {
|
|
|
4918
4782
|
}
|
|
4919
4783
|
resolveCwd(cwdOverride) {
|
|
4920
4784
|
if (cwdOverride) {
|
|
4921
|
-
return
|
|
4785
|
+
return path12.resolve(cwdOverride);
|
|
4922
4786
|
}
|
|
4923
4787
|
if (this.config.cwd) {
|
|
4924
|
-
return
|
|
4788
|
+
return path12.resolve(this.config.cwd);
|
|
4925
4789
|
}
|
|
4926
4790
|
return void 0;
|
|
4927
4791
|
}
|
|
@@ -4931,9 +4795,9 @@ var ClaudeSdkProvider = class {
|
|
|
4931
4795
|
return void 0;
|
|
4932
4796
|
}
|
|
4933
4797
|
if (this.config.logDir) {
|
|
4934
|
-
return
|
|
4798
|
+
return path12.resolve(this.config.logDir);
|
|
4935
4799
|
}
|
|
4936
|
-
return
|
|
4800
|
+
return path12.join(process.cwd(), ".agentv", "logs", "claude");
|
|
4937
4801
|
}
|
|
4938
4802
|
async createStreamLogger(request) {
|
|
4939
4803
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4947,7 +4811,7 @@ var ClaudeSdkProvider = class {
|
|
|
4947
4811
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
4948
4812
|
return void 0;
|
|
4949
4813
|
}
|
|
4950
|
-
const filePath =
|
|
4814
|
+
const filePath = path12.join(logDir, buildLogFilename2(request, this.targetName));
|
|
4951
4815
|
try {
|
|
4952
4816
|
const logger = await ClaudeStreamLogger.create({
|
|
4953
4817
|
filePath,
|
|
@@ -5154,7 +5018,7 @@ function formatElapsed2(startedAt) {
|
|
|
5154
5018
|
import { exec as execWithCallback } from "node:child_process";
|
|
5155
5019
|
import fs from "node:fs/promises";
|
|
5156
5020
|
import os from "node:os";
|
|
5157
|
-
import
|
|
5021
|
+
import path13 from "node:path";
|
|
5158
5022
|
import { promisify } from "node:util";
|
|
5159
5023
|
import { z as z2 } from "zod";
|
|
5160
5024
|
var ToolCallSchema = z2.object({
|
|
@@ -5659,7 +5523,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
5659
5523
|
}
|
|
5660
5524
|
const unique = /* @__PURE__ */ new Map();
|
|
5661
5525
|
for (const inputFile of inputFiles) {
|
|
5662
|
-
const absolutePath =
|
|
5526
|
+
const absolutePath = path13.resolve(inputFile);
|
|
5663
5527
|
if (!unique.has(absolutePath)) {
|
|
5664
5528
|
unique.set(absolutePath, absolutePath);
|
|
5665
5529
|
}
|
|
@@ -5673,7 +5537,7 @@ function formatFileList(files, template) {
|
|
|
5673
5537
|
const formatter = template ?? "{path}";
|
|
5674
5538
|
return files.map((filePath) => {
|
|
5675
5539
|
const escapedPath = shellEscape(filePath);
|
|
5676
|
-
const escapedName = shellEscape(
|
|
5540
|
+
const escapedName = shellEscape(path13.basename(filePath));
|
|
5677
5541
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
5678
5542
|
}).join(" ");
|
|
5679
5543
|
}
|
|
@@ -5697,7 +5561,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
5697
5561
|
const safeEvalId = evalCaseId || "unknown";
|
|
5698
5562
|
const timestamp = Date.now();
|
|
5699
5563
|
const random = Math.random().toString(36).substring(2, 9);
|
|
5700
|
-
return
|
|
5564
|
+
return path13.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
5701
5565
|
}
|
|
5702
5566
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
5703
5567
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -5711,7 +5575,7 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
5711
5575
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
5712
5576
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
5713
5577
|
import { mkdir as mkdir3 } from "node:fs/promises";
|
|
5714
|
-
import
|
|
5578
|
+
import path14 from "node:path";
|
|
5715
5579
|
|
|
5716
5580
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
5717
5581
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -5946,10 +5810,10 @@ ${basePrompt}` : basePrompt;
|
|
|
5946
5810
|
}
|
|
5947
5811
|
resolveCwd(cwdOverride) {
|
|
5948
5812
|
if (cwdOverride) {
|
|
5949
|
-
return
|
|
5813
|
+
return path14.resolve(cwdOverride);
|
|
5950
5814
|
}
|
|
5951
5815
|
if (this.config.cwd) {
|
|
5952
|
-
return
|
|
5816
|
+
return path14.resolve(this.config.cwd);
|
|
5953
5817
|
}
|
|
5954
5818
|
return void 0;
|
|
5955
5819
|
}
|
|
@@ -5959,9 +5823,9 @@ ${basePrompt}` : basePrompt;
|
|
|
5959
5823
|
return void 0;
|
|
5960
5824
|
}
|
|
5961
5825
|
if (this.config.logDir) {
|
|
5962
|
-
return
|
|
5826
|
+
return path14.resolve(this.config.logDir);
|
|
5963
5827
|
}
|
|
5964
|
-
return
|
|
5828
|
+
return path14.join(process.cwd(), ".agentv", "logs", "codex");
|
|
5965
5829
|
}
|
|
5966
5830
|
async createStreamLogger(request) {
|
|
5967
5831
|
const logDir = this.resolveLogDirectory();
|
|
@@ -5975,7 +5839,7 @@ ${basePrompt}` : basePrompt;
|
|
|
5975
5839
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
5976
5840
|
return void 0;
|
|
5977
5841
|
}
|
|
5978
|
-
const filePath =
|
|
5842
|
+
const filePath = path14.join(logDir, buildLogFilename3(request, this.targetName));
|
|
5979
5843
|
try {
|
|
5980
5844
|
const logger = await CodexSdkStreamLogger.create({
|
|
5981
5845
|
filePath,
|
|
@@ -6119,7 +5983,7 @@ function formatElapsed3(startedAt) {
|
|
|
6119
5983
|
// src/evaluation/providers/copilot-cli.ts
|
|
6120
5984
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
6121
5985
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
6122
|
-
import
|
|
5986
|
+
import path16 from "node:path";
|
|
6123
5987
|
import { Readable, Writable } from "node:stream";
|
|
6124
5988
|
import { spawn as spawn2 } from "node:child_process";
|
|
6125
5989
|
import * as acp from "@agentclientprotocol/sdk";
|
|
@@ -6181,7 +6045,7 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
6181
6045
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
6182
6046
|
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
6183
6047
|
import { arch, platform } from "node:os";
|
|
6184
|
-
import
|
|
6048
|
+
import path15 from "node:path";
|
|
6185
6049
|
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
6186
6050
|
function resolvePlatformCliPath() {
|
|
6187
6051
|
const os3 = platform();
|
|
@@ -6205,7 +6069,7 @@ function resolvePlatformCliPath() {
|
|
|
6205
6069
|
try {
|
|
6206
6070
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6207
6071
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
|
|
6208
|
-
const binaryPath =
|
|
6072
|
+
const binaryPath = path15.join(path15.dirname(packageJsonPath), binaryName);
|
|
6209
6073
|
if (existsSync(binaryPath)) {
|
|
6210
6074
|
return binaryPath;
|
|
6211
6075
|
}
|
|
@@ -6213,7 +6077,7 @@ function resolvePlatformCliPath() {
|
|
|
6213
6077
|
}
|
|
6214
6078
|
let searchDir = process.cwd();
|
|
6215
6079
|
for (let i = 0; i < 10; i++) {
|
|
6216
|
-
const standardPath =
|
|
6080
|
+
const standardPath = path15.join(
|
|
6217
6081
|
searchDir,
|
|
6218
6082
|
"node_modules",
|
|
6219
6083
|
...packageName.split("/"),
|
|
@@ -6222,13 +6086,13 @@ function resolvePlatformCliPath() {
|
|
|
6222
6086
|
if (existsSync(standardPath)) {
|
|
6223
6087
|
return standardPath;
|
|
6224
6088
|
}
|
|
6225
|
-
const bunDir =
|
|
6089
|
+
const bunDir = path15.join(searchDir, "node_modules", ".bun");
|
|
6226
6090
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
6227
6091
|
try {
|
|
6228
6092
|
const entries = readdirSync(bunDir);
|
|
6229
6093
|
for (const entry of entries) {
|
|
6230
6094
|
if (entry.startsWith(prefix)) {
|
|
6231
|
-
const candidate =
|
|
6095
|
+
const candidate = path15.join(
|
|
6232
6096
|
bunDir,
|
|
6233
6097
|
entry,
|
|
6234
6098
|
"node_modules",
|
|
@@ -6243,7 +6107,7 @@ function resolvePlatformCliPath() {
|
|
|
6243
6107
|
}
|
|
6244
6108
|
} catch {
|
|
6245
6109
|
}
|
|
6246
|
-
const parent =
|
|
6110
|
+
const parent = path15.dirname(searchDir);
|
|
6247
6111
|
if (parent === searchDir) break;
|
|
6248
6112
|
searchDir = parent;
|
|
6249
6113
|
}
|
|
@@ -6581,10 +6445,10 @@ var CopilotCliProvider = class {
|
|
|
6581
6445
|
}
|
|
6582
6446
|
resolveCwd(cwdOverride) {
|
|
6583
6447
|
if (cwdOverride) {
|
|
6584
|
-
return
|
|
6448
|
+
return path16.resolve(cwdOverride);
|
|
6585
6449
|
}
|
|
6586
6450
|
if (this.config.cwd) {
|
|
6587
|
-
return
|
|
6451
|
+
return path16.resolve(this.config.cwd);
|
|
6588
6452
|
}
|
|
6589
6453
|
return void 0;
|
|
6590
6454
|
}
|
|
@@ -6603,9 +6467,9 @@ var CopilotCliProvider = class {
|
|
|
6603
6467
|
return void 0;
|
|
6604
6468
|
}
|
|
6605
6469
|
if (this.config.logDir) {
|
|
6606
|
-
return
|
|
6470
|
+
return path16.resolve(this.config.logDir);
|
|
6607
6471
|
}
|
|
6608
|
-
return
|
|
6472
|
+
return path16.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
6609
6473
|
}
|
|
6610
6474
|
async createStreamLogger(request) {
|
|
6611
6475
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6619,7 +6483,7 @@ var CopilotCliProvider = class {
|
|
|
6619
6483
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6620
6484
|
return void 0;
|
|
6621
6485
|
}
|
|
6622
|
-
const filePath =
|
|
6486
|
+
const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
6623
6487
|
try {
|
|
6624
6488
|
const logger = await CopilotStreamLogger.create(
|
|
6625
6489
|
{
|
|
@@ -6714,7 +6578,7 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
6714
6578
|
// src/evaluation/providers/copilot-sdk.ts
|
|
6715
6579
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
6716
6580
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
6717
|
-
import
|
|
6581
|
+
import path17 from "node:path";
|
|
6718
6582
|
|
|
6719
6583
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
6720
6584
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -6993,10 +6857,10 @@ var CopilotSdkProvider = class {
|
|
|
6993
6857
|
}
|
|
6994
6858
|
resolveCwd(cwdOverride) {
|
|
6995
6859
|
if (cwdOverride) {
|
|
6996
|
-
return
|
|
6860
|
+
return path17.resolve(cwdOverride);
|
|
6997
6861
|
}
|
|
6998
6862
|
if (this.config.cwd) {
|
|
6999
|
-
return
|
|
6863
|
+
return path17.resolve(this.config.cwd);
|
|
7000
6864
|
}
|
|
7001
6865
|
return void 0;
|
|
7002
6866
|
}
|
|
@@ -7005,9 +6869,9 @@ var CopilotSdkProvider = class {
|
|
|
7005
6869
|
return void 0;
|
|
7006
6870
|
}
|
|
7007
6871
|
if (this.config.logDir) {
|
|
7008
|
-
return
|
|
6872
|
+
return path17.resolve(this.config.logDir);
|
|
7009
6873
|
}
|
|
7010
|
-
return
|
|
6874
|
+
return path17.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
7011
6875
|
}
|
|
7012
6876
|
async createStreamLogger(request) {
|
|
7013
6877
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7021,7 +6885,7 @@ var CopilotSdkProvider = class {
|
|
|
7021
6885
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7022
6886
|
return void 0;
|
|
7023
6887
|
}
|
|
7024
|
-
const filePath =
|
|
6888
|
+
const filePath = path17.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
7025
6889
|
try {
|
|
7026
6890
|
const logger = await CopilotStreamLogger.create(
|
|
7027
6891
|
{
|
|
@@ -7377,7 +7241,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
7377
7241
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
7378
7242
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7379
7243
|
import { tmpdir } from "node:os";
|
|
7380
|
-
import
|
|
7244
|
+
import path18 from "node:path";
|
|
7381
7245
|
|
|
7382
7246
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
7383
7247
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -7458,7 +7322,7 @@ var PiCodingAgentProvider = class {
|
|
|
7458
7322
|
const workspaceRoot = await this.createWorkspace();
|
|
7459
7323
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
7460
7324
|
try {
|
|
7461
|
-
const promptFile =
|
|
7325
|
+
const promptFile = path18.join(workspaceRoot, PROMPT_FILENAME);
|
|
7462
7326
|
await writeFile(promptFile, request.question, "utf8");
|
|
7463
7327
|
const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
|
|
7464
7328
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
@@ -7520,12 +7384,12 @@ var PiCodingAgentProvider = class {
|
|
|
7520
7384
|
}
|
|
7521
7385
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
7522
7386
|
if (cwdOverride) {
|
|
7523
|
-
return
|
|
7387
|
+
return path18.resolve(cwdOverride);
|
|
7524
7388
|
}
|
|
7525
7389
|
if (!this.config.cwd) {
|
|
7526
7390
|
return workspaceRoot;
|
|
7527
7391
|
}
|
|
7528
|
-
return
|
|
7392
|
+
return path18.resolve(this.config.cwd);
|
|
7529
7393
|
}
|
|
7530
7394
|
buildPiArgs(prompt, inputFiles, _captureFileChanges) {
|
|
7531
7395
|
const args = [];
|
|
@@ -7614,7 +7478,7 @@ ${prompt}` : prompt;
|
|
|
7614
7478
|
return env;
|
|
7615
7479
|
}
|
|
7616
7480
|
async createWorkspace() {
|
|
7617
|
-
return await mkdtemp(
|
|
7481
|
+
return await mkdtemp(path18.join(tmpdir(), WORKSPACE_PREFIX));
|
|
7618
7482
|
}
|
|
7619
7483
|
async cleanupWorkspace(workspaceRoot) {
|
|
7620
7484
|
try {
|
|
@@ -7624,9 +7488,9 @@ ${prompt}` : prompt;
|
|
|
7624
7488
|
}
|
|
7625
7489
|
resolveLogDirectory() {
|
|
7626
7490
|
if (this.config.logDir) {
|
|
7627
|
-
return
|
|
7491
|
+
return path18.resolve(this.config.logDir);
|
|
7628
7492
|
}
|
|
7629
|
-
return
|
|
7493
|
+
return path18.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
7630
7494
|
}
|
|
7631
7495
|
async createStreamLogger(request) {
|
|
7632
7496
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7640,7 +7504,7 @@ ${prompt}` : prompt;
|
|
|
7640
7504
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7641
7505
|
return void 0;
|
|
7642
7506
|
}
|
|
7643
|
-
const filePath =
|
|
7507
|
+
const filePath = path18.join(logDir, buildLogFilename5(request, this.targetName));
|
|
7644
7508
|
try {
|
|
7645
7509
|
const logger = await PiStreamLogger.create({
|
|
7646
7510
|
filePath,
|
|
@@ -8141,17 +8005,17 @@ var ProviderRegistry = class {
|
|
|
8141
8005
|
// src/evaluation/providers/vscode-provider.ts
|
|
8142
8006
|
import { exec as exec2 } from "node:child_process";
|
|
8143
8007
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
8144
|
-
import
|
|
8008
|
+
import path30 from "node:path";
|
|
8145
8009
|
import { promisify as promisify3 } from "node:util";
|
|
8146
8010
|
|
|
8147
8011
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
8148
8012
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
8149
|
-
import
|
|
8013
|
+
import path28 from "node:path";
|
|
8150
8014
|
|
|
8151
8015
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
8152
8016
|
import { constants as constants2 } from "node:fs";
|
|
8153
8017
|
import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
8154
|
-
import
|
|
8018
|
+
import path19 from "node:path";
|
|
8155
8019
|
async function pathExists(target) {
|
|
8156
8020
|
try {
|
|
8157
8021
|
await access2(target, constants2.F_OK);
|
|
@@ -8167,7 +8031,7 @@ async function readDirEntries(target) {
|
|
|
8167
8031
|
const entries = await readdir(target, { withFileTypes: true });
|
|
8168
8032
|
return entries.map((entry) => ({
|
|
8169
8033
|
name: entry.name,
|
|
8170
|
-
absolutePath:
|
|
8034
|
+
absolutePath: path19.join(target, entry.name),
|
|
8171
8035
|
isDirectory: entry.isDirectory()
|
|
8172
8036
|
}));
|
|
8173
8037
|
}
|
|
@@ -8182,9 +8046,9 @@ async function removeIfExists(target) {
|
|
|
8182
8046
|
}
|
|
8183
8047
|
|
|
8184
8048
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
8185
|
-
import
|
|
8049
|
+
import path20 from "node:path";
|
|
8186
8050
|
function pathToFileUri2(filePath) {
|
|
8187
|
-
const absolutePath =
|
|
8051
|
+
const absolutePath = path20.isAbsolute(filePath) ? filePath : path20.resolve(filePath);
|
|
8188
8052
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
8189
8053
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
8190
8054
|
return `file:///${normalizedPath}`;
|
|
@@ -8193,7 +8057,7 @@ function pathToFileUri2(filePath) {
|
|
|
8193
8057
|
}
|
|
8194
8058
|
|
|
8195
8059
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
8196
|
-
import
|
|
8060
|
+
import path21 from "node:path";
|
|
8197
8061
|
|
|
8198
8062
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
8199
8063
|
function renderTemplate2(content, variables) {
|
|
@@ -8285,8 +8149,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
8285
8149
|
});
|
|
8286
8150
|
}
|
|
8287
8151
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
8288
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
8289
|
-
const responseList = responseFiles.map((file) => `"${
|
|
8152
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path21.basename(file)}`).join("\n");
|
|
8153
|
+
const responseList = responseFiles.map((file) => `"${path21.basename(file)}"`).join(", ");
|
|
8290
8154
|
return renderTemplate2(templateContent, {
|
|
8291
8155
|
requestFiles: requestLines,
|
|
8292
8156
|
responseList
|
|
@@ -8294,8 +8158,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
8294
8158
|
}
|
|
8295
8159
|
|
|
8296
8160
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
8297
|
-
import { readFile as
|
|
8298
|
-
import
|
|
8161
|
+
import { readFile as readFile8 } from "node:fs/promises";
|
|
8162
|
+
import path22 from "node:path";
|
|
8299
8163
|
|
|
8300
8164
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
8301
8165
|
function sleep2(ms) {
|
|
@@ -8333,7 +8197,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8333
8197
|
const maxAttempts = 10;
|
|
8334
8198
|
while (attempts < maxAttempts) {
|
|
8335
8199
|
try {
|
|
8336
|
-
const content = await
|
|
8200
|
+
const content = await readFile8(responseFileFinal, { encoding: "utf8" });
|
|
8337
8201
|
if (!silent) {
|
|
8338
8202
|
process.stdout.write(`${content}
|
|
8339
8203
|
`);
|
|
@@ -8354,7 +8218,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8354
8218
|
}
|
|
8355
8219
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
8356
8220
|
if (!silent) {
|
|
8357
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
8221
|
+
const fileList = responseFilesFinal.map((file) => path22.basename(file)).join(", ");
|
|
8358
8222
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
8359
8223
|
}
|
|
8360
8224
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -8363,7 +8227,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8363
8227
|
while (pending.size > 0) {
|
|
8364
8228
|
if (Date.now() >= deadline) {
|
|
8365
8229
|
if (!silent) {
|
|
8366
|
-
const remaining = [...pending].map((f) =>
|
|
8230
|
+
const remaining = [...pending].map((f) => path22.basename(f)).join(", ");
|
|
8367
8231
|
console.error(
|
|
8368
8232
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
8369
8233
|
);
|
|
@@ -8390,7 +8254,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8390
8254
|
const maxAttempts = 10;
|
|
8391
8255
|
while (attempts < maxAttempts) {
|
|
8392
8256
|
try {
|
|
8393
|
-
const content = await
|
|
8257
|
+
const content = await readFile8(file, { encoding: "utf8" });
|
|
8394
8258
|
if (!silent) {
|
|
8395
8259
|
process.stdout.write(`${content}
|
|
8396
8260
|
`);
|
|
@@ -8414,15 +8278,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8414
8278
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
8415
8279
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
8416
8280
|
import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
|
|
8417
|
-
import
|
|
8281
|
+
import path25 from "node:path";
|
|
8418
8282
|
import { promisify as promisify2 } from "node:util";
|
|
8419
8283
|
|
|
8420
8284
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
8421
|
-
import
|
|
8285
|
+
import path24 from "node:path";
|
|
8422
8286
|
|
|
8423
8287
|
// src/paths.ts
|
|
8424
8288
|
import os2 from "node:os";
|
|
8425
|
-
import
|
|
8289
|
+
import path23 from "node:path";
|
|
8426
8290
|
var logged = false;
|
|
8427
8291
|
function getAgentvHome() {
|
|
8428
8292
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8433,19 +8297,19 @@ function getAgentvHome() {
|
|
|
8433
8297
|
}
|
|
8434
8298
|
return envHome;
|
|
8435
8299
|
}
|
|
8436
|
-
return
|
|
8300
|
+
return path23.join(os2.homedir(), ".agentv");
|
|
8437
8301
|
}
|
|
8438
8302
|
function getWorkspacesRoot() {
|
|
8439
|
-
return
|
|
8303
|
+
return path23.join(getAgentvHome(), "workspaces");
|
|
8440
8304
|
}
|
|
8441
8305
|
function getSubagentsRoot() {
|
|
8442
|
-
return
|
|
8306
|
+
return path23.join(getAgentvHome(), "subagents");
|
|
8443
8307
|
}
|
|
8444
8308
|
function getTraceStateRoot() {
|
|
8445
|
-
return
|
|
8309
|
+
return path23.join(getAgentvHome(), "trace-state");
|
|
8446
8310
|
}
|
|
8447
8311
|
function getWorkspacePoolRoot() {
|
|
8448
|
-
return
|
|
8312
|
+
return path23.join(getAgentvHome(), "workspace-pool");
|
|
8449
8313
|
}
|
|
8450
8314
|
|
|
8451
8315
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -8453,7 +8317,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
8453
8317
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
8454
8318
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
8455
8319
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
8456
|
-
return
|
|
8320
|
+
return path24.join(getSubagentsRoot(), folder);
|
|
8457
8321
|
}
|
|
8458
8322
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
8459
8323
|
|
|
@@ -8520,11 +8384,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8520
8384
|
await raceSpawnError(child);
|
|
8521
8385
|
return true;
|
|
8522
8386
|
}
|
|
8523
|
-
const aliveFile =
|
|
8387
|
+
const aliveFile = path25.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
8524
8388
|
await removeIfExists(aliveFile);
|
|
8525
|
-
const githubAgentsDir =
|
|
8389
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
8526
8390
|
await mkdir8(githubAgentsDir, { recursive: true });
|
|
8527
|
-
const wakeupDst =
|
|
8391
|
+
const wakeupDst = path25.join(githubAgentsDir, "wakeup.md");
|
|
8528
8392
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
8529
8393
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
8530
8394
|
label: "open-workspace"
|
|
@@ -8537,7 +8401,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8537
8401
|
"chat",
|
|
8538
8402
|
"-m",
|
|
8539
8403
|
wakeupChatId,
|
|
8540
|
-
`create a file named .alive in the ${
|
|
8404
|
+
`create a file named .alive in the ${path25.basename(subagentDir)} folder`
|
|
8541
8405
|
];
|
|
8542
8406
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
8543
8407
|
await raceSpawnError(wakeupChild);
|
|
@@ -8552,10 +8416,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8552
8416
|
return true;
|
|
8553
8417
|
}
|
|
8554
8418
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
8555
|
-
const workspacePath =
|
|
8556
|
-
const messagesDir =
|
|
8419
|
+
const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
|
|
8420
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
8557
8421
|
await mkdir8(messagesDir, { recursive: true });
|
|
8558
|
-
const reqFile =
|
|
8422
|
+
const reqFile = path25.join(messagesDir, `${timestamp}_req.md`);
|
|
8559
8423
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
8560
8424
|
const reqUri = pathToFileUri2(reqFile);
|
|
8561
8425
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -8563,16 +8427,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8563
8427
|
chatArgs.push("-a", attachment);
|
|
8564
8428
|
}
|
|
8565
8429
|
chatArgs.push("-a", reqFile);
|
|
8566
|
-
chatArgs.push(`Follow instructions in [${
|
|
8430
|
+
chatArgs.push(`Follow instructions in [${path25.basename(reqFile)}](${reqUri})`);
|
|
8567
8431
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8568
8432
|
workspacePath,
|
|
8569
|
-
|
|
8433
|
+
path25.basename(subagentDir),
|
|
8570
8434
|
subagentDir,
|
|
8571
8435
|
vscodeCmd
|
|
8572
8436
|
);
|
|
8573
8437
|
if (!workspaceReady) {
|
|
8574
8438
|
throw new Error(
|
|
8575
|
-
`VS Code workspace '${
|
|
8439
|
+
`VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8576
8440
|
);
|
|
8577
8441
|
}
|
|
8578
8442
|
await sleep2(500);
|
|
@@ -8580,8 +8444,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8580
8444
|
await raceSpawnError(child);
|
|
8581
8445
|
}
|
|
8582
8446
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
8583
|
-
const workspacePath =
|
|
8584
|
-
const messagesDir =
|
|
8447
|
+
const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
|
|
8448
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
8585
8449
|
await mkdir8(messagesDir, { recursive: true });
|
|
8586
8450
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
8587
8451
|
for (const attachment of attachmentPaths) {
|
|
@@ -8590,13 +8454,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8590
8454
|
chatArgs.push(chatInstruction);
|
|
8591
8455
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8592
8456
|
workspacePath,
|
|
8593
|
-
|
|
8457
|
+
path25.basename(subagentDir),
|
|
8594
8458
|
subagentDir,
|
|
8595
8459
|
vscodeCmd
|
|
8596
8460
|
);
|
|
8597
8461
|
if (!workspaceReady) {
|
|
8598
8462
|
throw new Error(
|
|
8599
|
-
`VS Code workspace '${
|
|
8463
|
+
`VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8600
8464
|
);
|
|
8601
8465
|
}
|
|
8602
8466
|
await sleep2(500);
|
|
@@ -8605,11 +8469,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8605
8469
|
}
|
|
8606
8470
|
|
|
8607
8471
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
8608
|
-
import { copyFile, mkdir as mkdir9, readFile as
|
|
8609
|
-
import
|
|
8472
|
+
import { copyFile, mkdir as mkdir9, readFile as readFile9, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
8473
|
+
import path27 from "node:path";
|
|
8610
8474
|
|
|
8611
8475
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
8612
|
-
import
|
|
8476
|
+
import path26 from "node:path";
|
|
8613
8477
|
import JSON5 from "json5";
|
|
8614
8478
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
8615
8479
|
let workspace;
|
|
@@ -8626,10 +8490,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8626
8490
|
}
|
|
8627
8491
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
8628
8492
|
const folderPath = folder.path;
|
|
8629
|
-
if (
|
|
8493
|
+
if (path26.isAbsolute(folderPath)) {
|
|
8630
8494
|
return folder;
|
|
8631
8495
|
}
|
|
8632
|
-
const absolutePath =
|
|
8496
|
+
const absolutePath = path26.resolve(templateDir, folderPath);
|
|
8633
8497
|
return {
|
|
8634
8498
|
...folder,
|
|
8635
8499
|
path: absolutePath
|
|
@@ -8651,19 +8515,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8651
8515
|
if (locationMap && typeof locationMap === "object") {
|
|
8652
8516
|
const transformedMap = {};
|
|
8653
8517
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
8654
|
-
const isAbsolute =
|
|
8518
|
+
const isAbsolute = path26.isAbsolute(locationPath);
|
|
8655
8519
|
if (isAbsolute) {
|
|
8656
8520
|
transformedMap[locationPath] = value;
|
|
8657
8521
|
} else {
|
|
8658
8522
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
8659
8523
|
if (firstGlobIndex === -1) {
|
|
8660
|
-
const resolvedPath =
|
|
8524
|
+
const resolvedPath = path26.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
8661
8525
|
transformedMap[resolvedPath] = value;
|
|
8662
8526
|
} else {
|
|
8663
8527
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
8664
8528
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
8665
8529
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
8666
|
-
const resolvedPath = (
|
|
8530
|
+
const resolvedPath = (path26.resolve(templateDir, basePath) + patternPath).replace(
|
|
8667
8531
|
/\\/g,
|
|
8668
8532
|
"/"
|
|
8669
8533
|
);
|
|
@@ -8704,7 +8568,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8704
8568
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
8705
8569
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
8706
8570
|
for (const subagent of subagents) {
|
|
8707
|
-
const lockFile =
|
|
8571
|
+
const lockFile = path27.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
8708
8572
|
if (!await pathExists(lockFile)) {
|
|
8709
8573
|
return subagent.absolutePath;
|
|
8710
8574
|
}
|
|
@@ -8714,7 +8578,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8714
8578
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
8715
8579
|
let workspaceContent;
|
|
8716
8580
|
if (workspaceTemplate) {
|
|
8717
|
-
const workspaceSrc =
|
|
8581
|
+
const workspaceSrc = path27.resolve(workspaceTemplate);
|
|
8718
8582
|
if (!await pathExists(workspaceSrc)) {
|
|
8719
8583
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
8720
8584
|
}
|
|
@@ -8722,18 +8586,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8722
8586
|
if (!stats.isFile()) {
|
|
8723
8587
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
8724
8588
|
}
|
|
8725
|
-
const templateText = await
|
|
8589
|
+
const templateText = await readFile9(workspaceSrc, "utf8");
|
|
8726
8590
|
workspaceContent = JSON.parse(templateText);
|
|
8727
8591
|
} else {
|
|
8728
8592
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
8729
8593
|
}
|
|
8730
|
-
const workspaceName = `${
|
|
8731
|
-
const workspaceDst =
|
|
8732
|
-
const templateDir = workspaceTemplate ?
|
|
8594
|
+
const workspaceName = `${path27.basename(subagentDir)}.code-workspace`;
|
|
8595
|
+
const workspaceDst = path27.join(subagentDir, workspaceName);
|
|
8596
|
+
const templateDir = workspaceTemplate ? path27.dirname(path27.resolve(workspaceTemplate)) : subagentDir;
|
|
8733
8597
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
8734
8598
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
8735
8599
|
if (cwd) {
|
|
8736
|
-
const absCwd =
|
|
8600
|
+
const absCwd = path27.resolve(cwd);
|
|
8737
8601
|
const parsed = JSON.parse(transformedContent);
|
|
8738
8602
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
8739
8603
|
if (!alreadyPresent) {
|
|
@@ -8742,35 +8606,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8742
8606
|
}
|
|
8743
8607
|
}
|
|
8744
8608
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
8745
|
-
const messagesDir =
|
|
8609
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8746
8610
|
await mkdir9(messagesDir, { recursive: true });
|
|
8747
8611
|
return { workspace: workspaceDst, messagesDir };
|
|
8748
8612
|
}
|
|
8749
8613
|
async function createSubagentLock(subagentDir) {
|
|
8750
|
-
const messagesDir =
|
|
8614
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8751
8615
|
if (await pathExists(messagesDir)) {
|
|
8752
8616
|
const files = await readdir2(messagesDir);
|
|
8753
8617
|
await Promise.all(
|
|
8754
8618
|
files.map(async (file) => {
|
|
8755
|
-
const target =
|
|
8619
|
+
const target = path27.join(messagesDir, file);
|
|
8756
8620
|
await removeIfExists(target);
|
|
8757
8621
|
})
|
|
8758
8622
|
);
|
|
8759
8623
|
}
|
|
8760
|
-
const githubAgentsDir =
|
|
8624
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8761
8625
|
if (await pathExists(githubAgentsDir)) {
|
|
8762
8626
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
8763
8627
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
8764
8628
|
await Promise.all(
|
|
8765
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
8629
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path27.join(githubAgentsDir, file)))
|
|
8766
8630
|
);
|
|
8767
8631
|
}
|
|
8768
|
-
const lockFile =
|
|
8632
|
+
const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8769
8633
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
8770
8634
|
return lockFile;
|
|
8771
8635
|
}
|
|
8772
8636
|
async function removeSubagentLock(subagentDir) {
|
|
8773
|
-
const lockFile =
|
|
8637
|
+
const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8774
8638
|
await removeIfExists(lockFile);
|
|
8775
8639
|
}
|
|
8776
8640
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -8790,9 +8654,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
8790
8654
|
return 1;
|
|
8791
8655
|
}
|
|
8792
8656
|
if (promptFile) {
|
|
8793
|
-
const githubAgentsDir =
|
|
8657
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8794
8658
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
8795
|
-
const agentFile =
|
|
8659
|
+
const agentFile = path27.join(githubAgentsDir, `${chatId}.md`);
|
|
8796
8660
|
try {
|
|
8797
8661
|
await copyFile(promptFile, agentFile);
|
|
8798
8662
|
} catch (error) {
|
|
@@ -8811,7 +8675,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
8811
8675
|
if (!promptFile) {
|
|
8812
8676
|
return void 0;
|
|
8813
8677
|
}
|
|
8814
|
-
const resolvedPrompt =
|
|
8678
|
+
const resolvedPrompt = path28.resolve(promptFile);
|
|
8815
8679
|
if (!await pathExists(resolvedPrompt)) {
|
|
8816
8680
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
8817
8681
|
}
|
|
@@ -8827,7 +8691,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
8827
8691
|
}
|
|
8828
8692
|
const resolved = [];
|
|
8829
8693
|
for (const attachment of extraAttachments) {
|
|
8830
|
-
const resolvedPath =
|
|
8694
|
+
const resolvedPath = path28.resolve(attachment);
|
|
8831
8695
|
if (!await pathExists(resolvedPath)) {
|
|
8832
8696
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
8833
8697
|
}
|
|
@@ -8869,7 +8733,7 @@ async function dispatchAgentSession(options) {
|
|
|
8869
8733
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8870
8734
|
};
|
|
8871
8735
|
}
|
|
8872
|
-
const subagentName =
|
|
8736
|
+
const subagentName = path28.basename(subagentDir);
|
|
8873
8737
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8874
8738
|
const preparationResult = await prepareSubagentDirectory(
|
|
8875
8739
|
subagentDir,
|
|
@@ -8897,9 +8761,9 @@ async function dispatchAgentSession(options) {
|
|
|
8897
8761
|
};
|
|
8898
8762
|
}
|
|
8899
8763
|
const timestamp = generateTimestamp();
|
|
8900
|
-
const messagesDir =
|
|
8901
|
-
const responseFileTmp =
|
|
8902
|
-
const responseFileFinal =
|
|
8764
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
8765
|
+
const responseFileTmp = path28.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
8766
|
+
const responseFileFinal = path28.join(messagesDir, `${timestamp}_res.md`);
|
|
8903
8767
|
const requestInstructions = createRequestPrompt(
|
|
8904
8768
|
userQuery,
|
|
8905
8769
|
responseFileTmp,
|
|
@@ -9004,7 +8868,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9004
8868
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
9005
8869
|
};
|
|
9006
8870
|
}
|
|
9007
|
-
subagentName =
|
|
8871
|
+
subagentName = path28.basename(subagentDir);
|
|
9008
8872
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
9009
8873
|
const preparationResult = await prepareSubagentDirectory(
|
|
9010
8874
|
subagentDir,
|
|
@@ -9035,17 +8899,17 @@ async function dispatchBatchAgent(options) {
|
|
|
9035
8899
|
};
|
|
9036
8900
|
}
|
|
9037
8901
|
const timestamp = generateTimestamp();
|
|
9038
|
-
const messagesDir =
|
|
8902
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
9039
8903
|
requestFiles = userQueries.map(
|
|
9040
|
-
(_, index) =>
|
|
8904
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
9041
8905
|
);
|
|
9042
8906
|
const responseTmpFiles = userQueries.map(
|
|
9043
|
-
(_, index) =>
|
|
8907
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
9044
8908
|
);
|
|
9045
8909
|
responseFilesFinal = userQueries.map(
|
|
9046
|
-
(_, index) =>
|
|
8910
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
9047
8911
|
);
|
|
9048
|
-
const orchestratorFile =
|
|
8912
|
+
const orchestratorFile = path28.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
9049
8913
|
if (!dryRun) {
|
|
9050
8914
|
await Promise.all(
|
|
9051
8915
|
userQueries.map((query, index) => {
|
|
@@ -9131,7 +8995,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9131
8995
|
|
|
9132
8996
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
9133
8997
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
9134
|
-
import
|
|
8998
|
+
import path29 from "node:path";
|
|
9135
8999
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
9136
9000
|
folders: [
|
|
9137
9001
|
{
|
|
@@ -9162,7 +9026,7 @@ async function provisionSubagents(options) {
|
|
|
9162
9026
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
9163
9027
|
throw new Error("subagents must be a positive integer");
|
|
9164
9028
|
}
|
|
9165
|
-
const targetPath =
|
|
9029
|
+
const targetPath = path29.resolve(targetRoot);
|
|
9166
9030
|
if (!dryRun) {
|
|
9167
9031
|
await ensureDir(targetPath);
|
|
9168
9032
|
}
|
|
@@ -9182,7 +9046,7 @@ async function provisionSubagents(options) {
|
|
|
9182
9046
|
continue;
|
|
9183
9047
|
}
|
|
9184
9048
|
highestNumber = Math.max(highestNumber, parsed);
|
|
9185
|
-
const lockFile =
|
|
9049
|
+
const lockFile = path29.join(entry.absolutePath, lockName);
|
|
9186
9050
|
const locked = await pathExists(lockFile);
|
|
9187
9051
|
if (locked) {
|
|
9188
9052
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -9199,10 +9063,10 @@ async function provisionSubagents(options) {
|
|
|
9199
9063
|
break;
|
|
9200
9064
|
}
|
|
9201
9065
|
const subagentDir = subagent.absolutePath;
|
|
9202
|
-
const githubAgentsDir =
|
|
9203
|
-
const lockFile =
|
|
9204
|
-
const workspaceDst =
|
|
9205
|
-
const wakeupDst =
|
|
9066
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
9067
|
+
const lockFile = path29.join(subagentDir, lockName);
|
|
9068
|
+
const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
9069
|
+
const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
|
|
9206
9070
|
const isLocked = await pathExists(lockFile);
|
|
9207
9071
|
if (isLocked && !force) {
|
|
9208
9072
|
continue;
|
|
@@ -9240,10 +9104,10 @@ async function provisionSubagents(options) {
|
|
|
9240
9104
|
let nextIndex = highestNumber;
|
|
9241
9105
|
while (subagentsProvisioned < subagents) {
|
|
9242
9106
|
nextIndex += 1;
|
|
9243
|
-
const subagentDir =
|
|
9244
|
-
const githubAgentsDir =
|
|
9245
|
-
const workspaceDst =
|
|
9246
|
-
const wakeupDst =
|
|
9107
|
+
const subagentDir = path29.join(targetPath, `subagent-${nextIndex}`);
|
|
9108
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
9109
|
+
const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
9110
|
+
const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
|
|
9247
9111
|
if (!dryRun) {
|
|
9248
9112
|
await ensureDir(subagentDir);
|
|
9249
9113
|
await ensureDir(githubAgentsDir);
|
|
@@ -9433,7 +9297,7 @@ var VSCodeProvider = class {
|
|
|
9433
9297
|
async function locateVSCodeExecutable(candidate) {
|
|
9434
9298
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
9435
9299
|
if (includesPathSeparator) {
|
|
9436
|
-
const resolved =
|
|
9300
|
+
const resolved = path30.isAbsolute(candidate) ? candidate : path30.resolve(candidate);
|
|
9437
9301
|
try {
|
|
9438
9302
|
await access3(resolved, constants3.F_OK);
|
|
9439
9303
|
return resolved;
|
|
@@ -9462,7 +9326,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
9462
9326
|
return void 0;
|
|
9463
9327
|
}
|
|
9464
9328
|
try {
|
|
9465
|
-
const stats = await stat4(
|
|
9329
|
+
const stats = await stat4(path30.resolve(template));
|
|
9466
9330
|
return stats.isFile() ? template : void 0;
|
|
9467
9331
|
} catch {
|
|
9468
9332
|
return template;
|
|
@@ -9488,7 +9352,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
9488
9352
|
return "";
|
|
9489
9353
|
}
|
|
9490
9354
|
const buildList = (files) => files.map((absolutePath) => {
|
|
9491
|
-
const fileName =
|
|
9355
|
+
const fileName = path30.basename(absolutePath);
|
|
9492
9356
|
const fileUri = pathToFileUri3(absolutePath);
|
|
9493
9357
|
return `* [${fileName}](${fileUri})`;
|
|
9494
9358
|
});
|
|
@@ -9513,8 +9377,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
9513
9377
|
}
|
|
9514
9378
|
const unique = /* @__PURE__ */ new Map();
|
|
9515
9379
|
for (const attachment of attachments) {
|
|
9516
|
-
const absolutePath =
|
|
9517
|
-
const normalized = absolutePath.split(
|
|
9380
|
+
const absolutePath = path30.resolve(attachment);
|
|
9381
|
+
const normalized = absolutePath.split(path30.sep).join("/");
|
|
9518
9382
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
9519
9383
|
if (!unique.has(absolutePath)) {
|
|
9520
9384
|
unique.set(absolutePath, absolutePath);
|
|
@@ -9529,7 +9393,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9529
9393
|
}
|
|
9530
9394
|
const unique = /* @__PURE__ */ new Map();
|
|
9531
9395
|
for (const attachment of attachments) {
|
|
9532
|
-
const absolutePath =
|
|
9396
|
+
const absolutePath = path30.resolve(attachment);
|
|
9533
9397
|
if (!unique.has(absolutePath)) {
|
|
9534
9398
|
unique.set(absolutePath, absolutePath);
|
|
9535
9399
|
}
|
|
@@ -9537,7 +9401,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9537
9401
|
return Array.from(unique.values());
|
|
9538
9402
|
}
|
|
9539
9403
|
function pathToFileUri3(filePath) {
|
|
9540
|
-
const absolutePath =
|
|
9404
|
+
const absolutePath = path30.isAbsolute(filePath) ? filePath : path30.resolve(filePath);
|
|
9541
9405
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9542
9406
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9543
9407
|
return `file:///${normalizedPath}`;
|
|
@@ -9550,7 +9414,7 @@ function normalizeAttachments(attachments) {
|
|
|
9550
9414
|
}
|
|
9551
9415
|
const deduped = /* @__PURE__ */ new Set();
|
|
9552
9416
|
for (const attachment of attachments) {
|
|
9553
|
-
deduped.add(
|
|
9417
|
+
deduped.add(path30.resolve(attachment));
|
|
9554
9418
|
}
|
|
9555
9419
|
return Array.from(deduped);
|
|
9556
9420
|
}
|
|
@@ -9559,7 +9423,7 @@ function mergeAttachments(all) {
|
|
|
9559
9423
|
for (const list of all) {
|
|
9560
9424
|
if (!list) continue;
|
|
9561
9425
|
for (const inputFile of list) {
|
|
9562
|
-
deduped.add(
|
|
9426
|
+
deduped.add(path30.resolve(inputFile));
|
|
9563
9427
|
}
|
|
9564
9428
|
}
|
|
9565
9429
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -9607,8 +9471,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
9607
9471
|
|
|
9608
9472
|
// src/evaluation/providers/targets-file.ts
|
|
9609
9473
|
import { constants as constants4 } from "node:fs";
|
|
9610
|
-
import { access as access4, readFile as
|
|
9611
|
-
import
|
|
9474
|
+
import { access as access4, readFile as readFile10 } from "node:fs/promises";
|
|
9475
|
+
import path31 from "node:path";
|
|
9612
9476
|
import { parse as parse4 } from "yaml";
|
|
9613
9477
|
function isRecord(value) {
|
|
9614
9478
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -9645,11 +9509,11 @@ async function fileExists3(filePath) {
|
|
|
9645
9509
|
}
|
|
9646
9510
|
}
|
|
9647
9511
|
async function readTargetDefinitions(filePath) {
|
|
9648
|
-
const absolutePath =
|
|
9512
|
+
const absolutePath = path31.resolve(filePath);
|
|
9649
9513
|
if (!await fileExists3(absolutePath)) {
|
|
9650
9514
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
9651
9515
|
}
|
|
9652
|
-
const raw = await
|
|
9516
|
+
const raw = await readFile10(absolutePath, "utf8");
|
|
9653
9517
|
const parsed = parse4(raw);
|
|
9654
9518
|
if (!isRecord(parsed)) {
|
|
9655
9519
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -9665,21 +9529,21 @@ function listTargetNames(definitions) {
|
|
|
9665
9529
|
}
|
|
9666
9530
|
|
|
9667
9531
|
// src/evaluation/providers/provider-discovery.ts
|
|
9668
|
-
import
|
|
9669
|
-
import
|
|
9532
|
+
import path32 from "node:path";
|
|
9533
|
+
import fg from "fast-glob";
|
|
9670
9534
|
async function discoverProviders(registry, baseDir) {
|
|
9671
9535
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
9672
9536
|
const candidateDirs = [];
|
|
9673
|
-
let dir =
|
|
9674
|
-
const root =
|
|
9537
|
+
let dir = path32.resolve(baseDir);
|
|
9538
|
+
const root = path32.parse(dir).root;
|
|
9675
9539
|
while (dir !== root) {
|
|
9676
|
-
candidateDirs.push(
|
|
9677
|
-
dir =
|
|
9540
|
+
candidateDirs.push(path32.join(dir, ".agentv", "providers"));
|
|
9541
|
+
dir = path32.dirname(dir);
|
|
9678
9542
|
}
|
|
9679
9543
|
let files = [];
|
|
9680
9544
|
for (const providersDir of candidateDirs) {
|
|
9681
9545
|
try {
|
|
9682
|
-
const found = await
|
|
9546
|
+
const found = await fg(patterns, {
|
|
9683
9547
|
cwd: providersDir,
|
|
9684
9548
|
absolute: true,
|
|
9685
9549
|
onlyFiles: true
|
|
@@ -9690,7 +9554,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9690
9554
|
}
|
|
9691
9555
|
const discoveredKinds = [];
|
|
9692
9556
|
for (const filePath of files) {
|
|
9693
|
-
const basename =
|
|
9557
|
+
const basename = path32.basename(filePath);
|
|
9694
9558
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
9695
9559
|
if (registry.has(kindName)) {
|
|
9696
9560
|
continue;
|
|
@@ -9899,15 +9763,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
9899
9763
|
});
|
|
9900
9764
|
}
|
|
9901
9765
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
9902
|
-
const { mkdir: mkdir15, readFile:
|
|
9766
|
+
const { mkdir: mkdir15, readFile: readFile13, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
9903
9767
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
9904
|
-
const
|
|
9768
|
+
const path45 = await import("node:path");
|
|
9905
9769
|
const { randomUUID: randomUUID9 } = await import("node:crypto");
|
|
9906
|
-
const dir =
|
|
9770
|
+
const dir = path45.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
|
|
9907
9771
|
await mkdir15(dir, { recursive: true });
|
|
9908
|
-
const stdinPath =
|
|
9909
|
-
const stdoutPath =
|
|
9910
|
-
const stderrPath =
|
|
9772
|
+
const stdinPath = path45.join(dir, "stdin.txt");
|
|
9773
|
+
const stdoutPath = path45.join(dir, "stdout.txt");
|
|
9774
|
+
const stderrPath = path45.join(dir, "stderr.txt");
|
|
9911
9775
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
9912
9776
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
9913
9777
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -9937,8 +9801,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
9937
9801
|
resolve(code ?? 0);
|
|
9938
9802
|
});
|
|
9939
9803
|
});
|
|
9940
|
-
const stdout = (await
|
|
9941
|
-
const stderr = (await
|
|
9804
|
+
const stdout = (await readFile13(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9805
|
+
const stderr = (await readFile13(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9942
9806
|
return { stdout, stderr, exitCode };
|
|
9943
9807
|
} finally {
|
|
9944
9808
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -10257,7 +10121,7 @@ var CodeEvaluator = class {
|
|
|
10257
10121
|
outputPath,
|
|
10258
10122
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
10259
10123
|
inputFiles: context.evalCase.file_paths.filter(
|
|
10260
|
-
(
|
|
10124
|
+
(path45) => !context.evalCase.guideline_paths.includes(path45)
|
|
10261
10125
|
),
|
|
10262
10126
|
input: context.evalCase.input,
|
|
10263
10127
|
trace: context.trace ?? null,
|
|
@@ -10389,7 +10253,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
10389
10253
|
|
|
10390
10254
|
// src/evaluation/evaluators/llm-grader.ts
|
|
10391
10255
|
import fs2 from "node:fs/promises";
|
|
10392
|
-
import
|
|
10256
|
+
import path33 from "node:path";
|
|
10393
10257
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
10394
10258
|
import { z as z3 } from "zod";
|
|
10395
10259
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -11221,8 +11085,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11221
11085
|
};
|
|
11222
11086
|
}
|
|
11223
11087
|
function resolveSandboxed(basePath, relativePath) {
|
|
11224
|
-
const resolved =
|
|
11225
|
-
if (!resolved.startsWith(basePath +
|
|
11088
|
+
const resolved = path33.resolve(basePath, relativePath);
|
|
11089
|
+
if (!resolved.startsWith(basePath + path33.sep) && resolved !== basePath) {
|
|
11226
11090
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
11227
11091
|
}
|
|
11228
11092
|
return resolved;
|
|
@@ -11312,11 +11176,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11312
11176
|
for (const entry of entries) {
|
|
11313
11177
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
11314
11178
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
11315
|
-
const fullPath =
|
|
11179
|
+
const fullPath = path33.join(dirPath, entry.name);
|
|
11316
11180
|
if (entry.isDirectory()) {
|
|
11317
11181
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
11318
11182
|
} else if (entry.isFile()) {
|
|
11319
|
-
const ext =
|
|
11183
|
+
const ext = path33.extname(entry.name).toLowerCase();
|
|
11320
11184
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
11321
11185
|
try {
|
|
11322
11186
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -11328,7 +11192,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11328
11192
|
regex.lastIndex = 0;
|
|
11329
11193
|
if (regex.test(lines[i])) {
|
|
11330
11194
|
matches.push({
|
|
11331
|
-
file:
|
|
11195
|
+
file: path33.relative(workspacePath, fullPath),
|
|
11332
11196
|
line: i + 1,
|
|
11333
11197
|
text: lines[i].substring(0, 200)
|
|
11334
11198
|
});
|
|
@@ -11963,115 +11827,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
11963
11827
|
* Evaluate a single field against the expected value.
|
|
11964
11828
|
*/
|
|
11965
11829
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
11966
|
-
const { path:
|
|
11967
|
-
const candidateValue = resolvePath(candidateData,
|
|
11968
|
-
const expectedValue = resolvePath(expectedData,
|
|
11830
|
+
const { path: path45, match, required = true, weight = 1 } = fieldConfig;
|
|
11831
|
+
const candidateValue = resolvePath(candidateData, path45);
|
|
11832
|
+
const expectedValue = resolvePath(expectedData, path45);
|
|
11969
11833
|
if (expectedValue === void 0) {
|
|
11970
11834
|
return {
|
|
11971
|
-
path:
|
|
11835
|
+
path: path45,
|
|
11972
11836
|
score: 1,
|
|
11973
11837
|
// No expected value means no comparison needed
|
|
11974
11838
|
weight,
|
|
11975
11839
|
hit: true,
|
|
11976
|
-
message: `${
|
|
11840
|
+
message: `${path45}: no expected value`
|
|
11977
11841
|
};
|
|
11978
11842
|
}
|
|
11979
11843
|
if (candidateValue === void 0) {
|
|
11980
11844
|
if (required) {
|
|
11981
11845
|
return {
|
|
11982
|
-
path:
|
|
11846
|
+
path: path45,
|
|
11983
11847
|
score: 0,
|
|
11984
11848
|
weight,
|
|
11985
11849
|
hit: false,
|
|
11986
|
-
message: `${
|
|
11850
|
+
message: `${path45} (required, missing)`
|
|
11987
11851
|
};
|
|
11988
11852
|
}
|
|
11989
11853
|
return {
|
|
11990
|
-
path:
|
|
11854
|
+
path: path45,
|
|
11991
11855
|
score: 1,
|
|
11992
11856
|
// Don't penalize missing optional fields
|
|
11993
11857
|
weight: 0,
|
|
11994
11858
|
// Zero weight means it won't affect the score
|
|
11995
11859
|
hit: true,
|
|
11996
|
-
message: `${
|
|
11860
|
+
message: `${path45}: optional field missing`
|
|
11997
11861
|
};
|
|
11998
11862
|
}
|
|
11999
11863
|
switch (match) {
|
|
12000
11864
|
case "exact":
|
|
12001
|
-
return this.compareExact(
|
|
11865
|
+
return this.compareExact(path45, candidateValue, expectedValue, weight);
|
|
12002
11866
|
case "numeric_tolerance":
|
|
12003
11867
|
return this.compareNumericTolerance(
|
|
12004
|
-
|
|
11868
|
+
path45,
|
|
12005
11869
|
candidateValue,
|
|
12006
11870
|
expectedValue,
|
|
12007
11871
|
fieldConfig,
|
|
12008
11872
|
weight
|
|
12009
11873
|
);
|
|
12010
11874
|
case "date":
|
|
12011
|
-
return this.compareDate(
|
|
11875
|
+
return this.compareDate(path45, candidateValue, expectedValue, fieldConfig, weight);
|
|
12012
11876
|
default:
|
|
12013
11877
|
return {
|
|
12014
|
-
path:
|
|
11878
|
+
path: path45,
|
|
12015
11879
|
score: 0,
|
|
12016
11880
|
weight,
|
|
12017
11881
|
hit: false,
|
|
12018
|
-
message: `${
|
|
11882
|
+
message: `${path45}: unknown match type "${match}"`
|
|
12019
11883
|
};
|
|
12020
11884
|
}
|
|
12021
11885
|
}
|
|
12022
11886
|
/**
|
|
12023
11887
|
* Exact equality comparison.
|
|
12024
11888
|
*/
|
|
12025
|
-
compareExact(
|
|
11889
|
+
compareExact(path45, candidateValue, expectedValue, weight) {
|
|
12026
11890
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
12027
11891
|
return {
|
|
12028
|
-
path:
|
|
11892
|
+
path: path45,
|
|
12029
11893
|
score: 1,
|
|
12030
11894
|
weight,
|
|
12031
11895
|
hit: true,
|
|
12032
|
-
message:
|
|
11896
|
+
message: path45
|
|
12033
11897
|
};
|
|
12034
11898
|
}
|
|
12035
11899
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
12036
11900
|
return {
|
|
12037
|
-
path:
|
|
11901
|
+
path: path45,
|
|
12038
11902
|
score: 0,
|
|
12039
11903
|
weight,
|
|
12040
11904
|
hit: false,
|
|
12041
|
-
message: `${
|
|
11905
|
+
message: `${path45} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
12042
11906
|
};
|
|
12043
11907
|
}
|
|
12044
11908
|
return {
|
|
12045
|
-
path:
|
|
11909
|
+
path: path45,
|
|
12046
11910
|
score: 0,
|
|
12047
11911
|
weight,
|
|
12048
11912
|
hit: false,
|
|
12049
|
-
message: `${
|
|
11913
|
+
message: `${path45} (value mismatch)`
|
|
12050
11914
|
};
|
|
12051
11915
|
}
|
|
12052
11916
|
/**
|
|
12053
11917
|
* Numeric comparison with absolute or relative tolerance.
|
|
12054
11918
|
*/
|
|
12055
|
-
compareNumericTolerance(
|
|
11919
|
+
compareNumericTolerance(path45, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12056
11920
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
12057
11921
|
const candidateNum = toNumber(candidateValue);
|
|
12058
11922
|
const expectedNum = toNumber(expectedValue);
|
|
12059
11923
|
if (candidateNum === null || expectedNum === null) {
|
|
12060
11924
|
return {
|
|
12061
|
-
path:
|
|
11925
|
+
path: path45,
|
|
12062
11926
|
score: 0,
|
|
12063
11927
|
weight,
|
|
12064
11928
|
hit: false,
|
|
12065
|
-
message: `${
|
|
11929
|
+
message: `${path45} (non-numeric value)`
|
|
12066
11930
|
};
|
|
12067
11931
|
}
|
|
12068
11932
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
12069
11933
|
return {
|
|
12070
|
-
path:
|
|
11934
|
+
path: path45,
|
|
12071
11935
|
score: 0,
|
|
12072
11936
|
weight,
|
|
12073
11937
|
hit: false,
|
|
12074
|
-
message: `${
|
|
11938
|
+
message: `${path45} (invalid numeric value)`
|
|
12075
11939
|
};
|
|
12076
11940
|
}
|
|
12077
11941
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -12084,61 +11948,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
12084
11948
|
}
|
|
12085
11949
|
if (withinTolerance) {
|
|
12086
11950
|
return {
|
|
12087
|
-
path:
|
|
11951
|
+
path: path45,
|
|
12088
11952
|
score: 1,
|
|
12089
11953
|
weight,
|
|
12090
11954
|
hit: true,
|
|
12091
|
-
message: `${
|
|
11955
|
+
message: `${path45} (within tolerance: diff=${diff.toFixed(2)})`
|
|
12092
11956
|
};
|
|
12093
11957
|
}
|
|
12094
11958
|
return {
|
|
12095
|
-
path:
|
|
11959
|
+
path: path45,
|
|
12096
11960
|
score: 0,
|
|
12097
11961
|
weight,
|
|
12098
11962
|
hit: false,
|
|
12099
|
-
message: `${
|
|
11963
|
+
message: `${path45} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
12100
11964
|
};
|
|
12101
11965
|
}
|
|
12102
11966
|
/**
|
|
12103
11967
|
* Date comparison with format normalization.
|
|
12104
11968
|
*/
|
|
12105
|
-
compareDate(
|
|
11969
|
+
compareDate(path45, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12106
11970
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
12107
11971
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
12108
11972
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
12109
11973
|
if (candidateDate === null) {
|
|
12110
11974
|
return {
|
|
12111
|
-
path:
|
|
11975
|
+
path: path45,
|
|
12112
11976
|
score: 0,
|
|
12113
11977
|
weight,
|
|
12114
11978
|
hit: false,
|
|
12115
|
-
message: `${
|
|
11979
|
+
message: `${path45} (unparseable candidate date)`
|
|
12116
11980
|
};
|
|
12117
11981
|
}
|
|
12118
11982
|
if (expectedDate === null) {
|
|
12119
11983
|
return {
|
|
12120
|
-
path:
|
|
11984
|
+
path: path45,
|
|
12121
11985
|
score: 0,
|
|
12122
11986
|
weight,
|
|
12123
11987
|
hit: false,
|
|
12124
|
-
message: `${
|
|
11988
|
+
message: `${path45} (unparseable expected date)`
|
|
12125
11989
|
};
|
|
12126
11990
|
}
|
|
12127
11991
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
12128
11992
|
return {
|
|
12129
|
-
path:
|
|
11993
|
+
path: path45,
|
|
12130
11994
|
score: 1,
|
|
12131
11995
|
weight,
|
|
12132
11996
|
hit: true,
|
|
12133
|
-
message:
|
|
11997
|
+
message: path45
|
|
12134
11998
|
};
|
|
12135
11999
|
}
|
|
12136
12000
|
return {
|
|
12137
|
-
path:
|
|
12001
|
+
path: path45,
|
|
12138
12002
|
score: 0,
|
|
12139
12003
|
weight,
|
|
12140
12004
|
hit: false,
|
|
12141
|
-
message: `${
|
|
12005
|
+
message: `${path45} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12142
12006
|
};
|
|
12143
12007
|
}
|
|
12144
12008
|
/**
|
|
@@ -12171,11 +12035,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12171
12035
|
};
|
|
12172
12036
|
}
|
|
12173
12037
|
};
|
|
12174
|
-
function resolvePath(obj,
|
|
12175
|
-
if (!
|
|
12038
|
+
function resolvePath(obj, path45) {
|
|
12039
|
+
if (!path45 || !obj) {
|
|
12176
12040
|
return void 0;
|
|
12177
12041
|
}
|
|
12178
|
-
const parts =
|
|
12042
|
+
const parts = path45.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12179
12043
|
let current = obj;
|
|
12180
12044
|
for (const part of parts) {
|
|
12181
12045
|
if (current === null || current === void 0) {
|
|
@@ -12635,8 +12499,8 @@ var TokenUsageEvaluator = class {
|
|
|
12635
12499
|
};
|
|
12636
12500
|
|
|
12637
12501
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12638
|
-
function getNestedValue(obj,
|
|
12639
|
-
const parts =
|
|
12502
|
+
function getNestedValue(obj, path45) {
|
|
12503
|
+
const parts = path45.split(".");
|
|
12640
12504
|
let current = obj;
|
|
12641
12505
|
for (const part of parts) {
|
|
12642
12506
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -12780,11 +12644,9 @@ var ToolTrajectoryEvaluator = class {
|
|
|
12780
12644
|
for (const call of toolCalls) {
|
|
12781
12645
|
toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1;
|
|
12782
12646
|
}
|
|
12783
|
-
const toolNames = Object.keys(toolCallsByName).sort();
|
|
12784
12647
|
return {
|
|
12785
12648
|
eventCount: toolCalls.length,
|
|
12786
|
-
|
|
12787
|
-
toolCallsByName,
|
|
12649
|
+
toolCalls: toolCallsByName,
|
|
12788
12650
|
errorCount: 0
|
|
12789
12651
|
};
|
|
12790
12652
|
}
|
|
@@ -12802,7 +12664,7 @@ var ToolTrajectoryEvaluator = class {
|
|
|
12802
12664
|
const assertions = [];
|
|
12803
12665
|
for (const toolName of toolNames) {
|
|
12804
12666
|
const required = minimums[toolName];
|
|
12805
|
-
const actual = summary.
|
|
12667
|
+
const actual = summary.toolCalls[toolName] ?? 0;
|
|
12806
12668
|
if (actual >= required) {
|
|
12807
12669
|
assertions.push({
|
|
12808
12670
|
text: `${toolName}: called ${actual} times (required >=${required})`,
|
|
@@ -13260,7 +13122,7 @@ function runEqualsAssertion(output, value) {
|
|
|
13260
13122
|
// src/evaluation/orchestrator.ts
|
|
13261
13123
|
import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
|
|
13262
13124
|
import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
13263
|
-
import
|
|
13125
|
+
import path42 from "node:path";
|
|
13264
13126
|
import micromatch4 from "micromatch";
|
|
13265
13127
|
|
|
13266
13128
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -13474,7 +13336,7 @@ var InlineAssertEvaluator = class {
|
|
|
13474
13336
|
};
|
|
13475
13337
|
|
|
13476
13338
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
13477
|
-
import
|
|
13339
|
+
import path34 from "node:path";
|
|
13478
13340
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
13479
13341
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
13480
13342
|
if (!context) {
|
|
@@ -13523,7 +13385,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13523
13385
|
};
|
|
13524
13386
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13525
13387
|
const scriptPath = script[script.length - 1];
|
|
13526
|
-
const cwd =
|
|
13388
|
+
const cwd = path34.dirname(scriptPath);
|
|
13527
13389
|
try {
|
|
13528
13390
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
13529
13391
|
const prompt = stdout.trim();
|
|
@@ -13795,21 +13657,21 @@ function createBuiltinRegistry() {
|
|
|
13795
13657
|
}
|
|
13796
13658
|
|
|
13797
13659
|
// src/evaluation/registry/assertion-discovery.ts
|
|
13798
|
-
import
|
|
13799
|
-
import
|
|
13660
|
+
import path35 from "node:path";
|
|
13661
|
+
import fg2 from "fast-glob";
|
|
13800
13662
|
async function discoverAssertions(registry, baseDir) {
|
|
13801
13663
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13802
13664
|
const candidateDirs = [];
|
|
13803
|
-
let dir =
|
|
13804
|
-
const root =
|
|
13665
|
+
let dir = path35.resolve(baseDir);
|
|
13666
|
+
const root = path35.parse(dir).root;
|
|
13805
13667
|
while (dir !== root) {
|
|
13806
|
-
candidateDirs.push(
|
|
13807
|
-
dir =
|
|
13668
|
+
candidateDirs.push(path35.join(dir, ".agentv", "assertions"));
|
|
13669
|
+
dir = path35.dirname(dir);
|
|
13808
13670
|
}
|
|
13809
13671
|
let files = [];
|
|
13810
13672
|
for (const assertionsDir of candidateDirs) {
|
|
13811
13673
|
try {
|
|
13812
|
-
const found = await
|
|
13674
|
+
const found = await fg2(patterns, {
|
|
13813
13675
|
cwd: assertionsDir,
|
|
13814
13676
|
absolute: true,
|
|
13815
13677
|
onlyFiles: true
|
|
@@ -13820,7 +13682,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13820
13682
|
}
|
|
13821
13683
|
const discoveredTypes = [];
|
|
13822
13684
|
for (const filePath of files) {
|
|
13823
|
-
const basename =
|
|
13685
|
+
const basename = path35.basename(filePath);
|
|
13824
13686
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13825
13687
|
if (registry.has(typeName)) {
|
|
13826
13688
|
continue;
|
|
@@ -13838,22 +13700,22 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13838
13700
|
}
|
|
13839
13701
|
|
|
13840
13702
|
// src/evaluation/registry/grader-discovery.ts
|
|
13841
|
-
import
|
|
13842
|
-
import
|
|
13703
|
+
import path36 from "node:path";
|
|
13704
|
+
import fg3 from "fast-glob";
|
|
13843
13705
|
async function discoverGraders(registry, baseDir) {
|
|
13844
13706
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13845
13707
|
const candidateDirs = [];
|
|
13846
|
-
let dir =
|
|
13847
|
-
const root =
|
|
13708
|
+
let dir = path36.resolve(baseDir);
|
|
13709
|
+
const root = path36.parse(dir).root;
|
|
13848
13710
|
while (dir !== root) {
|
|
13849
|
-
candidateDirs.push(
|
|
13850
|
-
candidateDirs.push(
|
|
13851
|
-
dir =
|
|
13711
|
+
candidateDirs.push(path36.join(dir, ".agentv", "graders"));
|
|
13712
|
+
candidateDirs.push(path36.join(dir, ".agentv", "judges"));
|
|
13713
|
+
dir = path36.dirname(dir);
|
|
13852
13714
|
}
|
|
13853
13715
|
let files = [];
|
|
13854
13716
|
for (const gradersDir of candidateDirs) {
|
|
13855
13717
|
try {
|
|
13856
|
-
const found = await
|
|
13718
|
+
const found = await fg3(patterns, {
|
|
13857
13719
|
cwd: gradersDir,
|
|
13858
13720
|
absolute: true,
|
|
13859
13721
|
onlyFiles: true
|
|
@@ -13864,7 +13726,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
13864
13726
|
}
|
|
13865
13727
|
const discoveredTypes = [];
|
|
13866
13728
|
for (const filePath of files) {
|
|
13867
|
-
const basename =
|
|
13729
|
+
const basename = path36.basename(filePath);
|
|
13868
13730
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13869
13731
|
if (registry.has(typeName)) {
|
|
13870
13732
|
continue;
|
|
@@ -14024,7 +13886,7 @@ function getTCritical(df) {
|
|
|
14024
13886
|
// src/evaluation/workspace/file-changes.ts
|
|
14025
13887
|
import { exec as execCallback } from "node:child_process";
|
|
14026
13888
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
14027
|
-
import
|
|
13889
|
+
import path37 from "node:path";
|
|
14028
13890
|
import { promisify as promisify4 } from "node:util";
|
|
14029
13891
|
var execAsync4 = promisify4(execCallback);
|
|
14030
13892
|
function gitExecOpts(workspacePath) {
|
|
@@ -14058,10 +13920,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
14058
13920
|
}
|
|
14059
13921
|
for (const entry of entries) {
|
|
14060
13922
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
14061
|
-
const childPath =
|
|
13923
|
+
const childPath = path37.join(workspacePath, entry);
|
|
14062
13924
|
try {
|
|
14063
13925
|
if (!statSync(childPath).isDirectory()) continue;
|
|
14064
|
-
if (!statSync(
|
|
13926
|
+
if (!statSync(path37.join(childPath, ".git")).isDirectory()) continue;
|
|
14065
13927
|
} catch {
|
|
14066
13928
|
continue;
|
|
14067
13929
|
}
|
|
@@ -14072,7 +13934,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
14072
13934
|
|
|
14073
13935
|
// src/evaluation/workspace/manager.ts
|
|
14074
13936
|
import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
14075
|
-
import
|
|
13937
|
+
import path38 from "node:path";
|
|
14076
13938
|
var TemplateNotFoundError = class extends Error {
|
|
14077
13939
|
constructor(templatePath) {
|
|
14078
13940
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -14102,14 +13964,14 @@ async function isDirectory(filePath) {
|
|
|
14102
13964
|
}
|
|
14103
13965
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
14104
13966
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14105
|
-
return
|
|
13967
|
+
return path38.join(root, evalRunId, caseId);
|
|
14106
13968
|
}
|
|
14107
13969
|
async function copyDirectoryRecursive(src, dest) {
|
|
14108
13970
|
await mkdir11(dest, { recursive: true });
|
|
14109
13971
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
14110
13972
|
for (const entry of entries) {
|
|
14111
|
-
const srcPath =
|
|
14112
|
-
const destPath =
|
|
13973
|
+
const srcPath = path38.join(src, entry.name);
|
|
13974
|
+
const destPath = path38.join(dest, entry.name);
|
|
14113
13975
|
if (entry.name === ".git") {
|
|
14114
13976
|
continue;
|
|
14115
13977
|
}
|
|
@@ -14121,7 +13983,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
14121
13983
|
}
|
|
14122
13984
|
}
|
|
14123
13985
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
14124
|
-
const resolvedTemplatePath =
|
|
13986
|
+
const resolvedTemplatePath = path38.resolve(templatePath);
|
|
14125
13987
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
14126
13988
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
14127
13989
|
}
|
|
@@ -14170,7 +14032,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
14170
14032
|
}
|
|
14171
14033
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
14172
14034
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14173
|
-
const evalDir =
|
|
14035
|
+
const evalDir = path38.join(root, evalRunId);
|
|
14174
14036
|
if (await fileExists(evalDir)) {
|
|
14175
14037
|
await rm4(evalDir, { recursive: true, force: true });
|
|
14176
14038
|
}
|
|
@@ -14180,8 +14042,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
14180
14042
|
import { execFile } from "node:child_process";
|
|
14181
14043
|
import { createHash } from "node:crypto";
|
|
14182
14044
|
import { existsSync as existsSync2 } from "node:fs";
|
|
14183
|
-
import { cp as cp2, mkdir as mkdir12, readFile as
|
|
14184
|
-
import
|
|
14045
|
+
import { cp as cp2, mkdir as mkdir12, readFile as readFile11, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
14046
|
+
import path39 from "node:path";
|
|
14185
14047
|
import { promisify as promisify5 } from "node:util";
|
|
14186
14048
|
var execFileAsync = promisify5(execFile);
|
|
14187
14049
|
function gitEnv() {
|
|
@@ -14235,8 +14097,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
14235
14097
|
await mkdir12(dest, { recursive: true });
|
|
14236
14098
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
14237
14099
|
for (const entry of entries) {
|
|
14238
|
-
const srcPath =
|
|
14239
|
-
const destPath =
|
|
14100
|
+
const srcPath = path39.join(src, entry.name);
|
|
14101
|
+
const destPath = path39.join(dest, entry.name);
|
|
14240
14102
|
if (entry.name === ".git") {
|
|
14241
14103
|
continue;
|
|
14242
14104
|
}
|
|
@@ -14269,7 +14131,7 @@ var WorkspacePoolManager = class {
|
|
|
14269
14131
|
async acquireWorkspace(options) {
|
|
14270
14132
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
14271
14133
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
14272
|
-
const poolDir =
|
|
14134
|
+
const poolDir = path39.join(this.poolRoot, fingerprint);
|
|
14273
14135
|
await mkdir12(poolDir, { recursive: true });
|
|
14274
14136
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
14275
14137
|
if (drifted) {
|
|
@@ -14279,7 +14141,7 @@ var WorkspacePoolManager = class {
|
|
|
14279
14141
|
await this.removeAllSlots(poolDir);
|
|
14280
14142
|
}
|
|
14281
14143
|
for (let i = 0; i < maxSlots; i++) {
|
|
14282
|
-
const slotPath =
|
|
14144
|
+
const slotPath = path39.join(poolDir, `slot-${i}`);
|
|
14283
14145
|
const lockPath = `${slotPath}.lock`;
|
|
14284
14146
|
const locked = await this.tryLock(lockPath);
|
|
14285
14147
|
if (!locked) {
|
|
@@ -14341,7 +14203,7 @@ var WorkspacePoolManager = class {
|
|
|
14341
14203
|
throw err;
|
|
14342
14204
|
}
|
|
14343
14205
|
try {
|
|
14344
|
-
const pidStr = await
|
|
14206
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
14345
14207
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14346
14208
|
if (!Number.isNaN(pid)) {
|
|
14347
14209
|
try {
|
|
@@ -14366,9 +14228,9 @@ var WorkspacePoolManager = class {
|
|
|
14366
14228
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
14367
14229
|
*/
|
|
14368
14230
|
async checkDrift(poolDir, fingerprint) {
|
|
14369
|
-
const metadataPath =
|
|
14231
|
+
const metadataPath = path39.join(poolDir, "metadata.json");
|
|
14370
14232
|
try {
|
|
14371
|
-
const raw = await
|
|
14233
|
+
const raw = await readFile11(metadataPath, "utf-8");
|
|
14372
14234
|
const metadata = JSON.parse(raw);
|
|
14373
14235
|
return metadata.fingerprint !== fingerprint;
|
|
14374
14236
|
} catch {
|
|
@@ -14383,17 +14245,17 @@ var WorkspacePoolManager = class {
|
|
|
14383
14245
|
repos,
|
|
14384
14246
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14385
14247
|
};
|
|
14386
|
-
await writeFile7(
|
|
14248
|
+
await writeFile7(path39.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
14387
14249
|
}
|
|
14388
14250
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
14389
14251
|
async removeAllSlots(poolDir) {
|
|
14390
14252
|
const entries = await readdir4(poolDir);
|
|
14391
14253
|
for (const entry of entries) {
|
|
14392
14254
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
14393
|
-
const lockPath =
|
|
14255
|
+
const lockPath = path39.join(poolDir, `${entry}.lock`);
|
|
14394
14256
|
if (existsSync2(lockPath)) {
|
|
14395
14257
|
try {
|
|
14396
|
-
const pidStr = await
|
|
14258
|
+
const pidStr = await readFile11(lockPath, "utf-8");
|
|
14397
14259
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14398
14260
|
if (!Number.isNaN(pid)) {
|
|
14399
14261
|
try {
|
|
@@ -14406,12 +14268,12 @@ var WorkspacePoolManager = class {
|
|
|
14406
14268
|
} catch {
|
|
14407
14269
|
}
|
|
14408
14270
|
}
|
|
14409
|
-
await rm5(
|
|
14271
|
+
await rm5(path39.join(poolDir, entry), { recursive: true, force: true });
|
|
14410
14272
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
14411
14273
|
});
|
|
14412
14274
|
}
|
|
14413
14275
|
}
|
|
14414
|
-
await rm5(
|
|
14276
|
+
await rm5(path39.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
14415
14277
|
});
|
|
14416
14278
|
}
|
|
14417
14279
|
/**
|
|
@@ -14421,7 +14283,7 @@ var WorkspacePoolManager = class {
|
|
|
14421
14283
|
*/
|
|
14422
14284
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
14423
14285
|
for (const repo of repos) {
|
|
14424
|
-
const repoDir =
|
|
14286
|
+
const repoDir = path39.join(slotPath, repo.path);
|
|
14425
14287
|
if (!existsSync2(repoDir)) {
|
|
14426
14288
|
continue;
|
|
14427
14289
|
}
|
|
@@ -14448,7 +14310,7 @@ var WorkspacePoolManager = class {
|
|
|
14448
14310
|
// src/evaluation/workspace/repo-manager.ts
|
|
14449
14311
|
import { execFile as execFile2 } from "node:child_process";
|
|
14450
14312
|
import { existsSync as existsSync3 } from "node:fs";
|
|
14451
|
-
import
|
|
14313
|
+
import path40 from "node:path";
|
|
14452
14314
|
import { promisify as promisify6 } from "node:util";
|
|
14453
14315
|
var execFileAsync2 = promisify6(execFile2);
|
|
14454
14316
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -14548,7 +14410,7 @@ ${lines.join("\n")}`;
|
|
|
14548
14410
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
14549
14411
|
*/
|
|
14550
14412
|
async materialize(repo, workspacePath) {
|
|
14551
|
-
const targetDir =
|
|
14413
|
+
const targetDir = path40.join(workspacePath, repo.path);
|
|
14552
14414
|
const sourceUrl = getSourceUrl(repo.source);
|
|
14553
14415
|
const startedAt = Date.now();
|
|
14554
14416
|
if (this.verbose) {
|
|
@@ -14639,7 +14501,7 @@ ${lines.join("\n")}`;
|
|
|
14639
14501
|
async reset(repos, workspacePath, reset) {
|
|
14640
14502
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
14641
14503
|
for (const repo of repos) {
|
|
14642
|
-
const targetDir =
|
|
14504
|
+
const targetDir = path40.join(workspacePath, repo.path);
|
|
14643
14505
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
14644
14506
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
14645
14507
|
}
|
|
@@ -14648,16 +14510,16 @@ ${lines.join("\n")}`;
|
|
|
14648
14510
|
|
|
14649
14511
|
// src/evaluation/workspace/resolve.ts
|
|
14650
14512
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
14651
|
-
import
|
|
14513
|
+
import path41 from "node:path";
|
|
14652
14514
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
14653
14515
|
if (!templatePath) {
|
|
14654
14516
|
return void 0;
|
|
14655
14517
|
}
|
|
14656
|
-
const resolved =
|
|
14518
|
+
const resolved = path41.resolve(templatePath);
|
|
14657
14519
|
const stats = await stat6(resolved);
|
|
14658
14520
|
if (stats.isFile()) {
|
|
14659
14521
|
return {
|
|
14660
|
-
dir:
|
|
14522
|
+
dir: path41.dirname(resolved),
|
|
14661
14523
|
workspaceFile: resolved
|
|
14662
14524
|
};
|
|
14663
14525
|
}
|
|
@@ -14669,14 +14531,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
14669
14531
|
if (workspaceFiles.length === 1) {
|
|
14670
14532
|
return {
|
|
14671
14533
|
dir: resolved,
|
|
14672
|
-
workspaceFile:
|
|
14534
|
+
workspaceFile: path41.join(resolved, workspaceFiles[0])
|
|
14673
14535
|
};
|
|
14674
14536
|
}
|
|
14675
14537
|
if (workspaceFiles.length > 1) {
|
|
14676
14538
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
14677
14539
|
return {
|
|
14678
14540
|
dir: resolved,
|
|
14679
|
-
workspaceFile: conventionFile ?
|
|
14541
|
+
workspaceFile: conventionFile ? path41.join(resolved, conventionFile) : void 0
|
|
14680
14542
|
};
|
|
14681
14543
|
}
|
|
14682
14544
|
return { dir: resolved };
|
|
@@ -14880,7 +14742,7 @@ async function runEvaluation(options) {
|
|
|
14880
14742
|
];
|
|
14881
14743
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
14882
14744
|
const typeRegistry = createBuiltinRegistry();
|
|
14883
|
-
const discoveryBaseDir = evalFilePath ?
|
|
14745
|
+
const discoveryBaseDir = evalFilePath ? path42.dirname(path42.resolve(evalFilePath)) : process.cwd();
|
|
14884
14746
|
const evalDir = discoveryBaseDir;
|
|
14885
14747
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
14886
14748
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -15069,7 +14931,7 @@ async function runEvaluation(options) {
|
|
|
15069
14931
|
}
|
|
15070
14932
|
try {
|
|
15071
14933
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
15072
|
-
const copiedWorkspaceFile =
|
|
14934
|
+
const copiedWorkspaceFile = path42.join(sharedWorkspacePath, path42.basename(suiteWorkspaceFile));
|
|
15073
14935
|
try {
|
|
15074
14936
|
await stat7(copiedWorkspaceFile);
|
|
15075
14937
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -15182,7 +15044,7 @@ async function runEvaluation(options) {
|
|
|
15182
15044
|
dataset: evalCase.dataset,
|
|
15183
15045
|
score: 0,
|
|
15184
15046
|
assertions: [],
|
|
15185
|
-
|
|
15047
|
+
output: [],
|
|
15186
15048
|
target: target.name,
|
|
15187
15049
|
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
15188
15050
|
budgetExceeded: true,
|
|
@@ -15218,7 +15080,7 @@ async function runEvaluation(options) {
|
|
|
15218
15080
|
dataset: evalCase.dataset,
|
|
15219
15081
|
score: 0,
|
|
15220
15082
|
assertions: [],
|
|
15221
|
-
|
|
15083
|
+
output: [],
|
|
15222
15084
|
target: target.name,
|
|
15223
15085
|
error: errorMsg,
|
|
15224
15086
|
executionStatus: "execution_error",
|
|
@@ -15483,7 +15345,7 @@ async function runBatchEvaluation(options) {
|
|
|
15483
15345
|
const providerResponse = batchResponse[i];
|
|
15484
15346
|
const output = providerResponse.output;
|
|
15485
15347
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
15486
|
-
const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0,
|
|
15348
|
+
const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolCalls: {}, errorCount: 0 } } : void 0;
|
|
15487
15349
|
const merged = computed ? mergeExecutionMetrics(computed, {
|
|
15488
15350
|
tokenUsage: providerResponse.tokenUsage,
|
|
15489
15351
|
costUsd: providerResponse.costUsd,
|
|
@@ -15646,7 +15508,7 @@ async function runEvalCase(options) {
|
|
|
15646
15508
|
);
|
|
15647
15509
|
}
|
|
15648
15510
|
if (caseWorkspaceFile && workspacePath) {
|
|
15649
|
-
const copiedFile =
|
|
15511
|
+
const copiedFile = path42.join(workspacePath, path42.basename(caseWorkspaceFile));
|
|
15650
15512
|
try {
|
|
15651
15513
|
await stat7(copiedFile);
|
|
15652
15514
|
caseWorkspaceFile = copiedFile;
|
|
@@ -15706,10 +15568,10 @@ async function runEvalCase(options) {
|
|
|
15706
15568
|
const files = evalCase.metadata.agent_skills_files;
|
|
15707
15569
|
if (baseDir && files.length > 0) {
|
|
15708
15570
|
for (const relPath of files) {
|
|
15709
|
-
const srcPath =
|
|
15710
|
-
const destPath =
|
|
15571
|
+
const srcPath = path42.resolve(baseDir, relPath);
|
|
15572
|
+
const destPath = path42.resolve(workspacePath, relPath);
|
|
15711
15573
|
try {
|
|
15712
|
-
await mkdir13(
|
|
15574
|
+
await mkdir13(path42.dirname(destPath), { recursive: true });
|
|
15713
15575
|
await copyFile2(srcPath, destPath);
|
|
15714
15576
|
} catch (error) {
|
|
15715
15577
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -15880,7 +15742,7 @@ async function runEvalCase(options) {
|
|
|
15880
15742
|
}
|
|
15881
15743
|
const output = providerResponse.output;
|
|
15882
15744
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
15883
|
-
const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0,
|
|
15745
|
+
const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolCalls: {}, errorCount: 0 } } : void 0;
|
|
15884
15746
|
const merged = computed ? mergeExecutionMetrics(computed, {
|
|
15885
15747
|
tokenUsage: providerResponse.tokenUsage,
|
|
15886
15748
|
costUsd: providerResponse.costUsd,
|
|
@@ -16185,7 +16047,6 @@ async function evaluateCandidate(options) {
|
|
|
16185
16047
|
conversationId: evalCase.conversation_id,
|
|
16186
16048
|
score: score.score,
|
|
16187
16049
|
assertions: score.assertions,
|
|
16188
|
-
outputText: candidate,
|
|
16189
16050
|
target: target.name,
|
|
16190
16051
|
tokenUsage,
|
|
16191
16052
|
costUsd,
|
|
@@ -16196,7 +16057,7 @@ async function evaluateCandidate(options) {
|
|
|
16196
16057
|
input,
|
|
16197
16058
|
scores,
|
|
16198
16059
|
trace,
|
|
16199
|
-
output,
|
|
16060
|
+
output: output ?? [{ role: "assistant", content: candidate }],
|
|
16200
16061
|
fileChanges,
|
|
16201
16062
|
executionStatus: classifyQualityStatus(score.score)
|
|
16202
16063
|
};
|
|
@@ -16330,7 +16191,7 @@ async function runEvaluatorList(options) {
|
|
|
16330
16191
|
fileChanges,
|
|
16331
16192
|
workspacePath
|
|
16332
16193
|
};
|
|
16333
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
16194
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path42.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
16334
16195
|
const dispatchContext = {
|
|
16335
16196
|
graderProvider,
|
|
16336
16197
|
targetResolver,
|
|
@@ -16361,7 +16222,7 @@ async function runEvaluatorList(options) {
|
|
|
16361
16222
|
weight,
|
|
16362
16223
|
verdict: score2.verdict,
|
|
16363
16224
|
assertions: score2.assertions,
|
|
16364
|
-
|
|
16225
|
+
input: score2.evaluatorRawRequest,
|
|
16365
16226
|
details: score2.details,
|
|
16366
16227
|
scores: mapChildResults(score2.scores),
|
|
16367
16228
|
tokenUsage: score2.tokenUsage,
|
|
@@ -16541,7 +16402,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
|
|
|
16541
16402
|
conversationId: evalCase.conversation_id,
|
|
16542
16403
|
score: 0,
|
|
16543
16404
|
assertions: [{ text: `Error: ${message}`, passed: false }],
|
|
16544
|
-
|
|
16405
|
+
output: [{ role: "assistant", content: `Error occurred: ${message}` }],
|
|
16545
16406
|
target: targetName,
|
|
16546
16407
|
requests,
|
|
16547
16408
|
input,
|
|
@@ -16585,7 +16446,7 @@ function buildResultInput(promptInputs) {
|
|
|
16585
16446
|
content: message.content
|
|
16586
16447
|
}));
|
|
16587
16448
|
}
|
|
16588
|
-
return promptInputs.question;
|
|
16449
|
+
return [{ role: "user", content: promptInputs.question }];
|
|
16589
16450
|
}
|
|
16590
16451
|
function aggregateEvaluatorTokenUsage(scores) {
|
|
16591
16452
|
if (!scores || scores.length === 0) return void 0;
|
|
@@ -16651,7 +16512,7 @@ function mapChildResults(children) {
|
|
|
16651
16512
|
weight: child.weight,
|
|
16652
16513
|
verdict: child.verdict,
|
|
16653
16514
|
assertions: child.assertions,
|
|
16654
|
-
|
|
16515
|
+
input: child.evaluatorRawRequest,
|
|
16655
16516
|
scores: mapChildResults(child.scores),
|
|
16656
16517
|
details: child.details,
|
|
16657
16518
|
tokenUsage: child.tokenUsage
|
|
@@ -16670,7 +16531,7 @@ function computeWeightedMean(entries) {
|
|
|
16670
16531
|
|
|
16671
16532
|
// src/evaluation/evaluate.ts
|
|
16672
16533
|
import { existsSync as existsSync4 } from "node:fs";
|
|
16673
|
-
import
|
|
16534
|
+
import path43 from "node:path";
|
|
16674
16535
|
|
|
16675
16536
|
// src/evaluation/providers/function-provider.ts
|
|
16676
16537
|
function createFunctionProvider(taskFn) {
|
|
@@ -16707,7 +16568,7 @@ async function evaluate(config) {
|
|
|
16707
16568
|
}
|
|
16708
16569
|
const gitRoot = await findGitRoot(process.cwd());
|
|
16709
16570
|
const repoRoot = gitRoot ?? process.cwd();
|
|
16710
|
-
const testFilePath = config.specFile ?
|
|
16571
|
+
const testFilePath = config.specFile ? path43.resolve(config.specFile) : path43.join(process.cwd(), "__programmatic__.yaml");
|
|
16711
16572
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
16712
16573
|
let resolvedTarget;
|
|
16713
16574
|
let taskProvider;
|
|
@@ -16836,10 +16697,10 @@ function computeSummary(results, durationMs) {
|
|
|
16836
16697
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
16837
16698
|
async function discoverDefaultTarget(repoRoot) {
|
|
16838
16699
|
const cwd = process.cwd();
|
|
16839
|
-
const chain = buildDirectoryChain(
|
|
16700
|
+
const chain = buildDirectoryChain(path43.join(cwd, "_placeholder"), repoRoot);
|
|
16840
16701
|
for (const dir of chain) {
|
|
16841
16702
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16842
|
-
const targetsPath =
|
|
16703
|
+
const targetsPath = path43.join(dir, candidate);
|
|
16843
16704
|
if (!existsSync4(targetsPath)) continue;
|
|
16844
16705
|
try {
|
|
16845
16706
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -16856,7 +16717,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
16856
16717
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
16857
16718
|
const envFiles = [];
|
|
16858
16719
|
for (const dir of chain) {
|
|
16859
|
-
const envPath =
|
|
16720
|
+
const envPath = path43.join(dir, ".env");
|
|
16860
16721
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
16861
16722
|
}
|
|
16862
16723
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -17037,8 +16898,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
17037
16898
|
}
|
|
17038
16899
|
|
|
17039
16900
|
// src/evaluation/cache/response-cache.ts
|
|
17040
|
-
import { mkdir as mkdir14, readFile as
|
|
17041
|
-
import
|
|
16901
|
+
import { mkdir as mkdir14, readFile as readFile12, writeFile as writeFile8 } from "node:fs/promises";
|
|
16902
|
+
import path44 from "node:path";
|
|
17042
16903
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
17043
16904
|
var ResponseCache = class {
|
|
17044
16905
|
cachePath;
|
|
@@ -17048,7 +16909,7 @@ var ResponseCache = class {
|
|
|
17048
16909
|
async get(key) {
|
|
17049
16910
|
const filePath = this.keyToPath(key);
|
|
17050
16911
|
try {
|
|
17051
|
-
const data = await
|
|
16912
|
+
const data = await readFile12(filePath, "utf8");
|
|
17052
16913
|
return JSON.parse(data);
|
|
17053
16914
|
} catch {
|
|
17054
16915
|
return void 0;
|
|
@@ -17056,13 +16917,13 @@ var ResponseCache = class {
|
|
|
17056
16917
|
}
|
|
17057
16918
|
async set(key, value) {
|
|
17058
16919
|
const filePath = this.keyToPath(key);
|
|
17059
|
-
const dir =
|
|
16920
|
+
const dir = path44.dirname(filePath);
|
|
17060
16921
|
await mkdir14(dir, { recursive: true });
|
|
17061
16922
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
17062
16923
|
}
|
|
17063
16924
|
keyToPath(key) {
|
|
17064
16925
|
const prefix = key.slice(0, 2);
|
|
17065
|
-
return
|
|
16926
|
+
return path44.join(this.cachePath, prefix, `${key}.json`);
|
|
17066
16927
|
}
|
|
17067
16928
|
};
|
|
17068
16929
|
function shouldEnableCache(params) {
|
|
@@ -17079,7 +16940,6 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
17079
16940
|
|
|
17080
16941
|
// src/evaluation/baseline.ts
|
|
17081
16942
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
17082
|
-
"outputText",
|
|
17083
16943
|
"requests",
|
|
17084
16944
|
"trace",
|
|
17085
16945
|
"workspacePath",
|
|
@@ -17096,7 +16956,7 @@ var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
|
17096
16956
|
"startTime",
|
|
17097
16957
|
"endTime"
|
|
17098
16958
|
]);
|
|
17099
|
-
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "
|
|
16959
|
+
var STRIPPED_EVALUATOR_FIELDS = /* @__PURE__ */ new Set(["rawRequest", "input"]);
|
|
17100
16960
|
function trimEvaluatorResult(result) {
|
|
17101
16961
|
const trimmed = {};
|
|
17102
16962
|
for (const [key, value] of Object.entries(result)) {
|
|
@@ -17253,14 +17113,21 @@ var OtelTraceExporter = class {
|
|
|
17253
17113
|
rootSpan.setAttribute("agentv.target", result.target);
|
|
17254
17114
|
if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
|
|
17255
17115
|
rootSpan.setAttribute("agentv.score", result.score);
|
|
17256
|
-
if (captureContent
|
|
17116
|
+
if (captureContent && result.output.length > 0) {
|
|
17117
|
+
const lastMsg = result.output[result.output.length - 1];
|
|
17118
|
+
const text = typeof lastMsg.content === "string" ? lastMsg.content : JSON.stringify(lastMsg.content);
|
|
17119
|
+
rootSpan.setAttribute("agentv.output_text", text);
|
|
17120
|
+
}
|
|
17257
17121
|
if (result.durationMs != null)
|
|
17258
17122
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
17259
17123
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|
|
17260
17124
|
if (result.trace) {
|
|
17261
17125
|
const t = result.trace;
|
|
17262
17126
|
rootSpan.setAttribute("agentv.trace.event_count", t.eventCount);
|
|
17263
|
-
rootSpan.setAttribute(
|
|
17127
|
+
rootSpan.setAttribute(
|
|
17128
|
+
"agentv.trace.tool_names",
|
|
17129
|
+
Object.keys(t.toolCalls).sort().join(",")
|
|
17130
|
+
);
|
|
17264
17131
|
if (t.llmCallCount != null)
|
|
17265
17132
|
rootSpan.setAttribute("agentv.trace.llm_call_count", t.llmCallCount);
|
|
17266
17133
|
}
|