agentv 4.38.1 → 4.40.1-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-MK5X5MSO.js → artifact-writer-GIAIMGPQ.js} +14 -11
- package/dist/{chunk-QOBQ5XYF.js → chunk-76FOHROU.js} +16 -4
- package/dist/chunk-76FOHROU.js.map +1 -0
- package/dist/{chunk-VBHHZQS6.js → chunk-BLXYBUU4.js} +1825 -333
- package/dist/chunk-BLXYBUU4.js.map +1 -0
- package/dist/{chunk-NLTIK3LV.js → chunk-I3SC4FOT.js} +499 -347
- package/dist/chunk-I3SC4FOT.js.map +1 -0
- package/dist/{chunk-OIN3MVOD.js → chunk-S2JJCLHV.js} +67 -68
- package/dist/chunk-S2JJCLHV.js.map +1 -0
- package/dist/chunk-TWQP7JYQ.js +494 -0
- package/dist/chunk-TWQP7JYQ.js.map +1 -0
- package/dist/{chunk-6M5S4IJW.js → chunk-WKA5QDNQ.js} +586 -183
- package/dist/chunk-WKA5QDNQ.js.map +1 -0
- package/dist/cli.js +6 -6
- package/dist/dashboard/assets/index-BnYCCJ7O.css +1 -0
- package/dist/dashboard/assets/index-DaueD7GO.js +118 -0
- package/dist/dashboard/assets/{index-SIl6NbIJ.js → index-_jpKSzIf.js} +1 -1
- package/dist/dashboard/index.html +2 -2
- package/dist/{dist-HVLBDG5F.js → dist-6Z4OSITR.js} +54 -16
- package/dist/index.js +6 -6
- package/dist/{interactive-45LPG2YJ.js → interactive-OUB3GZRC.js} +6 -6
- package/dist/{otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js → otlp-json-file-exporter-RY63S3IG-PZBQPVYY.js} +2 -2
- package/dist/skills/agentv-eval-writer/SKILL.md +49 -24
- package/dist/skills/agentv-eval-writer/references/custom-evaluators.md +21 -15
- package/dist/{ts-eval-loader-TJT6BGFF-DI7XNSO4.js → ts-eval-loader-NWH3B4HG-UXXCZKLP.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-6M5S4IJW.js.map +0 -1
- package/dist/chunk-DKUAETXE.js +0 -1362
- package/dist/chunk-DKUAETXE.js.map +0 -1
- package/dist/chunk-NLTIK3LV.js.map +0 -1
- package/dist/chunk-OIN3MVOD.js.map +0 -1
- package/dist/chunk-QOBQ5XYF.js.map +0 -1
- package/dist/chunk-VBHHZQS6.js.map +0 -1
- package/dist/dashboard/assets/index-BpnllKET.css +0 -1
- package/dist/dashboard/assets/index-Cm9SUopp.js +0 -118
- /package/dist/{artifact-writer-MK5X5MSO.js.map → artifact-writer-GIAIMGPQ.js.map} +0 -0
- /package/dist/{dist-HVLBDG5F.js.map → dist-6Z4OSITR.js.map} +0 -0
- /package/dist/{interactive-45LPG2YJ.js.map → interactive-OUB3GZRC.js.map} +0 -0
- /package/dist/{otlp-json-file-exporter-RJFPCKVK-T6N4OGWG.js.map → otlp-json-file-exporter-RY63S3IG-PZBQPVYY.js.map} +0 -0
- /package/dist/{ts-eval-loader-TJT6BGFF-DI7XNSO4.js.map → ts-eval-loader-NWH3B4HG-UXXCZKLP.js.map} +0 -0
|
@@ -1,22 +1,8 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
|
-
RESULT_INDEX_FILENAME,
|
|
4
|
-
RESULT_RUNS_DIRNAME,
|
|
5
|
-
aggregateRunDir,
|
|
6
|
-
buildDefaultRunDir,
|
|
7
|
-
buildTestTargetKey,
|
|
8
|
-
deduplicateByTestIdTarget,
|
|
9
|
-
isDirectoryPath,
|
|
10
|
-
normalizeExperimentName,
|
|
11
|
-
parseJsonlResults,
|
|
12
|
-
resolveExistingRunPrimaryPath,
|
|
13
|
-
resolveRunIndexPath,
|
|
14
|
-
resolveRunManifestPath,
|
|
15
|
-
resolveWorkspaceOrFilePath,
|
|
16
3
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
17
|
-
writeArtifactsFromResults
|
|
18
|
-
|
|
19
|
-
} from "./chunk-DKUAETXE.js";
|
|
4
|
+
writeArtifactsFromResults
|
|
5
|
+
} from "./chunk-TWQP7JYQ.js";
|
|
20
6
|
import {
|
|
21
7
|
RunBudgetTracker,
|
|
22
8
|
buildWipBranchName,
|
|
@@ -32,9 +18,8 @@ import {
|
|
|
32
18
|
pushWipCheckpoint,
|
|
33
19
|
resolveResultsRepoRunsDir,
|
|
34
20
|
setupWipWorktree,
|
|
35
|
-
syncResultsRepoForProject
|
|
36
|
-
|
|
37
|
-
} from "./chunk-NLTIK3LV.js";
|
|
21
|
+
syncResultsRepoForProject
|
|
22
|
+
} from "./chunk-I3SC4FOT.js";
|
|
38
23
|
import {
|
|
39
24
|
CLI_PLACEHOLDERS,
|
|
40
25
|
COMMON_TARGET_SETTINGS,
|
|
@@ -43,9 +28,13 @@ import {
|
|
|
43
28
|
KNOWN_PROVIDERS,
|
|
44
29
|
PROVIDER_ALIASES,
|
|
45
30
|
ResponseCache,
|
|
31
|
+
ResultRowSchemaError,
|
|
32
|
+
aggregateRunDir,
|
|
46
33
|
buildDirectoryChain,
|
|
47
34
|
buildSearchRoots,
|
|
35
|
+
buildTestTargetKey,
|
|
48
36
|
buildTraceFromMessages,
|
|
37
|
+
deduplicateByTestIdTarget,
|
|
49
38
|
ensureVSCodeSubagents,
|
|
50
39
|
findDeprecatedCamelCaseTargetWarnings,
|
|
51
40
|
findGitRoot,
|
|
@@ -59,6 +48,8 @@ import {
|
|
|
59
48
|
loadCasesFromFile,
|
|
60
49
|
loadConfig,
|
|
61
50
|
loadTestSuite,
|
|
51
|
+
normalizeResultRow,
|
|
52
|
+
parseJsonlResults,
|
|
62
53
|
parseYamlValue,
|
|
63
54
|
readTargetDefinitions,
|
|
64
55
|
readTestSuiteMetadata,
|
|
@@ -73,8 +64,10 @@ import {
|
|
|
73
64
|
subscribeToCopilotSdkLogEntries,
|
|
74
65
|
subscribeToPiLogEntries,
|
|
75
66
|
toCamelCaseDeep,
|
|
76
|
-
toSnakeCaseDeep
|
|
77
|
-
|
|
67
|
+
toSnakeCaseDeep,
|
|
68
|
+
traceFromTranscriptJsonLines,
|
|
69
|
+
writeInitialBenchmarkArtifact
|
|
70
|
+
} from "./chunk-BLXYBUU4.js";
|
|
78
71
|
|
|
79
72
|
// src/commands/eval/shared.ts
|
|
80
73
|
import { constants } from "node:fs";
|
|
@@ -190,7 +183,7 @@ async function findRepoRoot(start) {
|
|
|
190
183
|
// package.json
|
|
191
184
|
var package_default = {
|
|
192
185
|
name: "agentv",
|
|
193
|
-
version: "4.
|
|
186
|
+
version: "4.40.1-next.1",
|
|
194
187
|
description: "CLI entry point for AgentV",
|
|
195
188
|
type: "module",
|
|
196
189
|
repository: {
|
|
@@ -298,10 +291,10 @@ async function discoverTargetsFile(options) {
|
|
|
298
291
|
}
|
|
299
292
|
|
|
300
293
|
// src/commands/eval/run-eval.ts
|
|
301
|
-
import { constants as constants4, existsSync as
|
|
302
|
-
import { access as
|
|
303
|
-
import
|
|
304
|
-
import { pathToFileURL } from "node:url";
|
|
294
|
+
import { constants as constants4, existsSync as existsSync7, mkdirSync as mkdirSync2 } from "node:fs";
|
|
295
|
+
import { access as access6, readFile as readFile8 } from "node:fs/promises";
|
|
296
|
+
import path14 from "node:path";
|
|
297
|
+
import { pathToFileURL as pathToFileURL2 } from "node:url";
|
|
305
298
|
|
|
306
299
|
// src/version-check.ts
|
|
307
300
|
import { coerce, satisfies, validRange } from "semver";
|
|
@@ -355,19 +348,100 @@ function enforceRequiredVersion(requiredVersion, options) {
|
|
|
355
348
|
}
|
|
356
349
|
|
|
357
350
|
// src/commands/results/remote.ts
|
|
358
|
-
import { existsSync as
|
|
359
|
-
import
|
|
351
|
+
import { existsSync as existsSync5 } from "node:fs";
|
|
352
|
+
import path8 from "node:path";
|
|
360
353
|
|
|
361
354
|
// src/commands/inspect/utils.ts
|
|
362
|
-
import { readFileSync as readFileSync2, readdirSync, statSync } from "node:fs";
|
|
363
|
-
import
|
|
355
|
+
import { readFileSync as readFileSync2, readdirSync, statSync as statSync2 } from "node:fs";
|
|
356
|
+
import path5 from "node:path";
|
|
364
357
|
|
|
365
|
-
// src/commands/
|
|
366
|
-
import { existsSync,
|
|
358
|
+
// src/commands/eval/result-layout.ts
|
|
359
|
+
import { existsSync, statSync } from "node:fs";
|
|
367
360
|
import path3 from "node:path";
|
|
361
|
+
var RESULT_INDEX_FILENAME = "index.jsonl";
|
|
362
|
+
var RESULT_RUNS_DIRNAME = "runs";
|
|
363
|
+
var DEFAULT_EXPERIMENT_NAME = "default";
|
|
364
|
+
function normalizeExperimentName(experiment) {
|
|
365
|
+
const trimmed = experiment?.trim();
|
|
366
|
+
if (!trimmed) {
|
|
367
|
+
return DEFAULT_EXPERIMENT_NAME;
|
|
368
|
+
}
|
|
369
|
+
if (!/^[A-Za-z0-9._-]+$/.test(trimmed)) {
|
|
370
|
+
throw new Error(
|
|
371
|
+
`Invalid experiment name "${trimmed}". Use only letters, numbers, ".", "_" and "-".`
|
|
372
|
+
);
|
|
373
|
+
}
|
|
374
|
+
return trimmed;
|
|
375
|
+
}
|
|
376
|
+
function createRunDirName(timestamp = /* @__PURE__ */ new Date()) {
|
|
377
|
+
return timestamp.toISOString().replace(/[:.]/g, "-");
|
|
378
|
+
}
|
|
379
|
+
function buildDefaultRunDir(cwd, experiment, timestamp = /* @__PURE__ */ new Date()) {
|
|
380
|
+
return path3.join(
|
|
381
|
+
cwd,
|
|
382
|
+
".agentv",
|
|
383
|
+
"results",
|
|
384
|
+
RESULT_RUNS_DIRNAME,
|
|
385
|
+
normalizeExperimentName(experiment),
|
|
386
|
+
createRunDirName(timestamp)
|
|
387
|
+
);
|
|
388
|
+
}
|
|
389
|
+
function resolveRunIndexPath(runDir) {
|
|
390
|
+
return path3.join(runDir, RESULT_INDEX_FILENAME);
|
|
391
|
+
}
|
|
392
|
+
function isRunManifestPath(filePath) {
|
|
393
|
+
return path3.basename(filePath) === RESULT_INDEX_FILENAME;
|
|
394
|
+
}
|
|
395
|
+
function resolveExistingRunPrimaryPath(runDir) {
|
|
396
|
+
const indexPath = resolveRunIndexPath(runDir);
|
|
397
|
+
if (existsSync(indexPath)) {
|
|
398
|
+
return indexPath;
|
|
399
|
+
}
|
|
400
|
+
return void 0;
|
|
401
|
+
}
|
|
402
|
+
function isDirectoryPath(filePath) {
|
|
403
|
+
try {
|
|
404
|
+
return statSync(filePath).isDirectory();
|
|
405
|
+
} catch {
|
|
406
|
+
return false;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
function resolveWorkspaceOrFilePath(filePath) {
|
|
410
|
+
if (!isDirectoryPath(filePath)) {
|
|
411
|
+
return filePath;
|
|
412
|
+
}
|
|
413
|
+
const existing = resolveExistingRunPrimaryPath(filePath);
|
|
414
|
+
if (!existing) {
|
|
415
|
+
throw new Error(`Result workspace is missing ${RESULT_INDEX_FILENAME}: ${filePath}`);
|
|
416
|
+
}
|
|
417
|
+
return existing;
|
|
418
|
+
}
|
|
419
|
+
function resolveRunManifestPath(filePath) {
|
|
420
|
+
if (isDirectoryPath(filePath)) {
|
|
421
|
+
return resolveWorkspaceOrFilePath(filePath);
|
|
422
|
+
}
|
|
423
|
+
if (!isRunManifestPath(filePath)) {
|
|
424
|
+
throw new Error(
|
|
425
|
+
`Expected a run workspace directory or ${RESULT_INDEX_FILENAME} manifest: ${filePath}`
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
return filePath;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// src/commands/results/manifest.ts
|
|
432
|
+
import { existsSync as existsSync2, readFileSync } from "node:fs";
|
|
433
|
+
import path4 from "node:path";
|
|
368
434
|
function parseJsonlLines(content) {
|
|
369
435
|
return content.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0).map((line) => JSON.parse(line));
|
|
370
436
|
}
|
|
437
|
+
function parseResultRows(content, sourceLabel) {
|
|
438
|
+
return content.split(/\r?\n/).map((line, index) => ({ line: line.trim(), lineNumber: index + 1 })).filter(({ line }) => line.length > 0).map(
|
|
439
|
+
({ line, lineNumber }) => normalizeResultRow(JSON.parse(line), {
|
|
440
|
+
lineNumber,
|
|
441
|
+
sourceLabel
|
|
442
|
+
})
|
|
443
|
+
);
|
|
444
|
+
}
|
|
371
445
|
function parseMarkdownMessages(content) {
|
|
372
446
|
const trimmed = content.trim();
|
|
373
447
|
if (!trimmed.startsWith("@[")) {
|
|
@@ -383,8 +457,8 @@ function readOptionalText(baseDir, relativePath) {
|
|
|
383
457
|
if (!relativePath) {
|
|
384
458
|
return void 0;
|
|
385
459
|
}
|
|
386
|
-
const absolutePath =
|
|
387
|
-
if (!
|
|
460
|
+
const absolutePath = path4.join(baseDir, relativePath);
|
|
461
|
+
if (!existsSync2(absolutePath)) {
|
|
388
462
|
return void 0;
|
|
389
463
|
}
|
|
390
464
|
return readFileSync(absolutePath, "utf8");
|
|
@@ -430,6 +504,7 @@ function hydrateTrace(baseDir, record) {
|
|
|
430
504
|
return buildTraceFromMessages({
|
|
431
505
|
input: hydrateInput(baseDir, record),
|
|
432
506
|
output: output ? [{ role: "assistant", content: output }] : [],
|
|
507
|
+
summary: record.trace ? toCamelCaseDeep(record.trace) : void 0,
|
|
433
508
|
finalOutput: output,
|
|
434
509
|
target: record.target,
|
|
435
510
|
testId: record.test_id
|
|
@@ -484,11 +559,11 @@ function hydrateManifestRecord(baseDir, record) {
|
|
|
484
559
|
};
|
|
485
560
|
}
|
|
486
561
|
function parseResultManifest(content) {
|
|
487
|
-
return
|
|
562
|
+
return parseResultRows(content);
|
|
488
563
|
}
|
|
489
564
|
function resolveResultSourcePath(source, cwd) {
|
|
490
|
-
const resolved =
|
|
491
|
-
if (isDirectoryPath(resolved) ||
|
|
565
|
+
const resolved = path4.isAbsolute(source) ? source : path4.resolve(cwd ?? process.cwd(), source);
|
|
566
|
+
if (isDirectoryPath(resolved) || path4.basename(resolved) === RESULT_INDEX_FILENAME) {
|
|
492
567
|
return resolveRunManifestPath(resolved);
|
|
493
568
|
}
|
|
494
569
|
return resolved;
|
|
@@ -496,8 +571,8 @@ function resolveResultSourcePath(source, cwd) {
|
|
|
496
571
|
function loadManifestResults(sourceFile) {
|
|
497
572
|
const resolvedSourceFile = resolveRunManifestPath(sourceFile);
|
|
498
573
|
const content = readFileSync(resolvedSourceFile, "utf8");
|
|
499
|
-
const records =
|
|
500
|
-
const baseDir =
|
|
574
|
+
const records = parseResultRows(content, resolvedSourceFile);
|
|
575
|
+
const baseDir = path4.dirname(resolvedSourceFile);
|
|
501
576
|
return records.map((record) => hydrateManifestRecord(baseDir, record));
|
|
502
577
|
}
|
|
503
578
|
function loadLightweightResults(sourceFile) {
|
|
@@ -544,10 +619,10 @@ function padLeft(str, len) {
|
|
|
544
619
|
}
|
|
545
620
|
function loadResultFile(filePath) {
|
|
546
621
|
const resolvedFilePath = resolveTraceResultPath(filePath);
|
|
547
|
-
if (
|
|
622
|
+
if (path5.extname(resolvedFilePath) === ".json") {
|
|
548
623
|
return loadOtlpTraceFile(resolvedFilePath);
|
|
549
624
|
}
|
|
550
|
-
if (
|
|
625
|
+
if (path5.basename(resolvedFilePath) === RESULT_INDEX_FILENAME) {
|
|
551
626
|
return loadManifestAsRawResults(resolvedFilePath);
|
|
552
627
|
}
|
|
553
628
|
return loadJsonlRecords(resolvedFilePath);
|
|
@@ -555,15 +630,33 @@ function loadResultFile(filePath) {
|
|
|
555
630
|
function resolveTraceResultPath(filePath) {
|
|
556
631
|
return resolveWorkspaceOrFilePath(filePath);
|
|
557
632
|
}
|
|
633
|
+
function isRecord(value) {
|
|
634
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
635
|
+
}
|
|
636
|
+
function shouldUseTraceScoreError(value) {
|
|
637
|
+
if (!isRecord(value)) {
|
|
638
|
+
return false;
|
|
639
|
+
}
|
|
640
|
+
return typeof value.test_id === "string" || typeof value.testId === "string" || Object.hasOwn(value, "score") || Object.hasOwn(value, "trace") || Object.hasOwn(value, "spans");
|
|
641
|
+
}
|
|
558
642
|
function loadJsonlRecords(filePath) {
|
|
559
643
|
const content = readFileSync2(filePath, "utf8");
|
|
560
644
|
const lines = content.trim().split("\n").filter((line) => line.trim());
|
|
561
645
|
return lines.map((line, i) => {
|
|
562
|
-
const
|
|
563
|
-
|
|
564
|
-
|
|
646
|
+
const parsed = JSON.parse(line);
|
|
647
|
+
try {
|
|
648
|
+
return normalizeResultRow(parsed, {
|
|
649
|
+
lineNumber: i + 1,
|
|
650
|
+
sourceLabel: filePath
|
|
651
|
+
});
|
|
652
|
+
} catch (error) {
|
|
653
|
+
if (error instanceof ResultRowSchemaError && shouldUseTraceScoreError(parsed)) {
|
|
654
|
+
throw new Error(
|
|
655
|
+
`Missing or invalid score in result at line ${i + 1}: ${line.slice(0, 100)}`
|
|
656
|
+
);
|
|
657
|
+
}
|
|
658
|
+
throw error;
|
|
565
659
|
}
|
|
566
|
-
return record;
|
|
567
660
|
});
|
|
568
661
|
}
|
|
569
662
|
function loadManifestAsRawResults(filePath) {
|
|
@@ -827,7 +920,7 @@ function toTraceSummary(result) {
|
|
|
827
920
|
return toCamelCaseDeep(rawTrace);
|
|
828
921
|
}
|
|
829
922
|
function buildRunId(relativeRunPath) {
|
|
830
|
-
const normalized = relativeRunPath.split(
|
|
923
|
+
const normalized = relativeRunPath.split(path5.sep).join("/");
|
|
831
924
|
const segments = normalized.split("/").filter(Boolean);
|
|
832
925
|
if (segments.length >= 2) {
|
|
833
926
|
const experiment = segments.slice(0, -1).join("/");
|
|
@@ -841,7 +934,7 @@ function buildRunId(relativeRunPath) {
|
|
|
841
934
|
}
|
|
842
935
|
function readRunDisplayName(runDir) {
|
|
843
936
|
try {
|
|
844
|
-
const benchmark = JSON.parse(readFileSync2(
|
|
937
|
+
const benchmark = JSON.parse(readFileSync2(path5.join(runDir, "benchmark.json"), "utf8"));
|
|
845
938
|
const displayName = benchmark.metadata?.display_name;
|
|
846
939
|
return typeof displayName === "string" && displayName.trim() ? displayName.trim() : void 0;
|
|
847
940
|
} catch {
|
|
@@ -851,8 +944,8 @@ function readRunDisplayName(runDir) {
|
|
|
851
944
|
function collectRunManifestPaths(runsDir, currentDir, files) {
|
|
852
945
|
const primaryPath = resolveExistingRunPrimaryPath(currentDir);
|
|
853
946
|
if (primaryPath) {
|
|
854
|
-
const relativeRunPath =
|
|
855
|
-
const sortName =
|
|
947
|
+
const relativeRunPath = path5.relative(runsDir, currentDir);
|
|
948
|
+
const sortName = path5.basename(currentDir);
|
|
856
949
|
files.push({
|
|
857
950
|
filePath: primaryPath,
|
|
858
951
|
displayName: readRunDisplayName(currentDir) ?? sortName,
|
|
@@ -864,7 +957,7 @@ function collectRunManifestPaths(runsDir, currentDir, files) {
|
|
|
864
957
|
const entries = readdirSync(currentDir, { withFileTypes: true });
|
|
865
958
|
for (const entry of entries) {
|
|
866
959
|
if (entry.isDirectory()) {
|
|
867
|
-
collectRunManifestPaths(runsDir,
|
|
960
|
+
collectRunManifestPaths(runsDir, path5.join(currentDir, entry.name), files);
|
|
868
961
|
}
|
|
869
962
|
}
|
|
870
963
|
}
|
|
@@ -874,7 +967,7 @@ function listResultFilesFromRunsDir(runsDir, limit) {
|
|
|
874
967
|
const entries = readdirSync(runsDir, { withFileTypes: true });
|
|
875
968
|
for (const entry of entries) {
|
|
876
969
|
if (entry.isDirectory()) {
|
|
877
|
-
collectRunManifestPaths(runsDir,
|
|
970
|
+
collectRunManifestPaths(runsDir, path5.join(runsDir, entry.name), files);
|
|
878
971
|
}
|
|
879
972
|
}
|
|
880
973
|
} catch {
|
|
@@ -884,7 +977,7 @@ function listResultFilesFromRunsDir(runsDir, limit) {
|
|
|
884
977
|
const metas = [];
|
|
885
978
|
for (const { filePath, displayName, runId } of limited) {
|
|
886
979
|
try {
|
|
887
|
-
const fileStat =
|
|
980
|
+
const fileStat = statSync2(filePath);
|
|
888
981
|
const results = loadResultFile(filePath);
|
|
889
982
|
const testCount = results.length;
|
|
890
983
|
const passCount = results.filter((r) => r.score >= DEFAULT_THRESHOLD).length;
|
|
@@ -909,7 +1002,7 @@ function listResultFilesFromRunsDir(runsDir, limit) {
|
|
|
909
1002
|
}
|
|
910
1003
|
function listResultFiles(cwd, limit) {
|
|
911
1004
|
return listResultFilesFromRunsDir(
|
|
912
|
-
|
|
1005
|
+
path5.join(cwd, ".agentv", "results", RESULT_RUNS_DIRNAME),
|
|
913
1006
|
limit
|
|
914
1007
|
);
|
|
915
1008
|
}
|
|
@@ -945,21 +1038,21 @@ function formatScore(score) {
|
|
|
945
1038
|
|
|
946
1039
|
// src/commands/results/remote-metadata.ts
|
|
947
1040
|
import { execFileSync } from "node:child_process";
|
|
948
|
-
import { existsSync as
|
|
949
|
-
import
|
|
1041
|
+
import { existsSync as existsSync4, mkdirSync, readFileSync as readFileSync4, rmSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
1042
|
+
import path7 from "node:path";
|
|
950
1043
|
|
|
951
1044
|
// src/commands/results/run-tags.ts
|
|
952
|
-
import { existsSync as
|
|
953
|
-
import
|
|
1045
|
+
import { existsSync as existsSync3, readFileSync as readFileSync3, unlinkSync, writeFileSync } from "node:fs";
|
|
1046
|
+
import path6 from "node:path";
|
|
954
1047
|
var RUN_TAGS_FILENAME = "tags.json";
|
|
955
1048
|
var MAX_TAGS_PER_RUN = 20;
|
|
956
1049
|
var MAX_TAG_LENGTH = 60;
|
|
957
1050
|
function runTagsPath(manifestPath) {
|
|
958
|
-
return
|
|
1051
|
+
return path6.join(path6.dirname(manifestPath), RUN_TAGS_FILENAME);
|
|
959
1052
|
}
|
|
960
1053
|
function readRunTags(manifestPath) {
|
|
961
1054
|
const fp = runTagsPath(manifestPath);
|
|
962
|
-
if (!
|
|
1055
|
+
if (!existsSync3(fp)) return void 0;
|
|
963
1056
|
try {
|
|
964
1057
|
const parsed = JSON.parse(readFileSync3(fp, "utf8"));
|
|
965
1058
|
if (!parsed || typeof parsed !== "object") return void 0;
|
|
@@ -993,7 +1086,7 @@ function writeRunTags(manifestPath, tags) {
|
|
|
993
1086
|
}
|
|
994
1087
|
function deleteRunTags(manifestPath) {
|
|
995
1088
|
const fp = runTagsPath(manifestPath);
|
|
996
|
-
if (
|
|
1089
|
+
if (existsSync3(fp)) {
|
|
997
1090
|
unlinkSync(fp);
|
|
998
1091
|
}
|
|
999
1092
|
}
|
|
@@ -1026,8 +1119,8 @@ function normalizeTags(tags) {
|
|
|
1026
1119
|
}
|
|
1027
1120
|
|
|
1028
1121
|
// src/commands/results/remote-metadata.ts
|
|
1029
|
-
var RESULTS_RUNS_DIR =
|
|
1030
|
-
var REMOTE_METADATA_RUNS_DIR =
|
|
1122
|
+
var RESULTS_RUNS_DIR = path7.join(".agentv", "results", "runs");
|
|
1123
|
+
var REMOTE_METADATA_RUNS_DIR = path7.join(".agentv", "results", "metadata", "runs");
|
|
1031
1124
|
function cleanGitEnv() {
|
|
1032
1125
|
const env = {};
|
|
1033
1126
|
for (const [key, value] of Object.entries(process.env)) {
|
|
@@ -1053,10 +1146,10 @@ function tryRunGit(repoDir, args) {
|
|
|
1053
1146
|
}
|
|
1054
1147
|
}
|
|
1055
1148
|
function toGitPath(filePath) {
|
|
1056
|
-
return filePath.split(
|
|
1149
|
+
return filePath.split(path7.sep).join("/");
|
|
1057
1150
|
}
|
|
1058
1151
|
function readTagsFile(filePath) {
|
|
1059
|
-
if (!
|
|
1152
|
+
if (!existsSync4(filePath)) return void 0;
|
|
1060
1153
|
try {
|
|
1061
1154
|
return parseTagsFile(readFileSync4(filePath, "utf8"));
|
|
1062
1155
|
} catch {
|
|
@@ -1099,27 +1192,27 @@ function resolveComparisonRef(repoDir) {
|
|
|
1099
1192
|
return tryRunGit(repoDir, ["rev-parse", "--verify", "HEAD"]) ? "HEAD" : void 0;
|
|
1100
1193
|
}
|
|
1101
1194
|
function resolveRemoteRunMetadataPaths(repoDir, manifestPath) {
|
|
1102
|
-
const runsRoot =
|
|
1103
|
-
const manifestDir =
|
|
1104
|
-
const runRelativePath =
|
|
1105
|
-
if (runRelativePath.length === 0 || runRelativePath.startsWith("..") ||
|
|
1195
|
+
const runsRoot = path7.resolve(repoDir, RESULTS_RUNS_DIR);
|
|
1196
|
+
const manifestDir = path7.resolve(path7.dirname(manifestPath));
|
|
1197
|
+
const runRelativePath = path7.relative(runsRoot, manifestDir);
|
|
1198
|
+
if (runRelativePath.length === 0 || runRelativePath.startsWith("..") || path7.isAbsolute(runRelativePath)) {
|
|
1106
1199
|
throw new Error(
|
|
1107
1200
|
`Remote run manifest is outside the results repo runs directory: ${manifestPath}`
|
|
1108
1201
|
);
|
|
1109
1202
|
}
|
|
1110
|
-
const overlayTagsPath =
|
|
1203
|
+
const overlayTagsPath = path7.join(
|
|
1111
1204
|
repoDir,
|
|
1112
1205
|
REMOTE_METADATA_RUNS_DIR,
|
|
1113
1206
|
runRelativePath,
|
|
1114
1207
|
RUN_TAGS_FILENAME
|
|
1115
1208
|
);
|
|
1116
|
-
const artifactTagsPath =
|
|
1209
|
+
const artifactTagsPath = path7.join(runsRoot, runRelativePath, RUN_TAGS_FILENAME);
|
|
1117
1210
|
return {
|
|
1118
1211
|
runRelativePath,
|
|
1119
1212
|
artifactTagsPath,
|
|
1120
|
-
artifactTagsGitPath: toGitPath(
|
|
1213
|
+
artifactTagsGitPath: toGitPath(path7.relative(repoDir, artifactTagsPath)),
|
|
1121
1214
|
overlayTagsPath,
|
|
1122
|
-
overlayTagsGitPath: toGitPath(
|
|
1215
|
+
overlayTagsGitPath: toGitPath(path7.relative(repoDir, overlayTagsPath))
|
|
1123
1216
|
};
|
|
1124
1217
|
}
|
|
1125
1218
|
function readRemoteRunTagsContext(repoDir, manifestPath) {
|
|
@@ -1149,7 +1242,7 @@ function toRemoteRunTagState(context) {
|
|
|
1149
1242
|
};
|
|
1150
1243
|
}
|
|
1151
1244
|
function assertWritableResultsRepo(repoDir) {
|
|
1152
|
-
if (!
|
|
1245
|
+
if (!existsSync4(repoDir)) {
|
|
1153
1246
|
throw new Error("Writable results repo is not configured for remote metadata");
|
|
1154
1247
|
}
|
|
1155
1248
|
const insideWorkTree = tryRunGit(repoDir, ["rev-parse", "--is-inside-work-tree"]);
|
|
@@ -1174,7 +1267,7 @@ function writeRemoteRunTags(repoDir, manifestPath, tags) {
|
|
|
1174
1267
|
tags: cleaned,
|
|
1175
1268
|
updated_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
1176
1269
|
};
|
|
1177
|
-
mkdirSync(
|
|
1270
|
+
mkdirSync(path7.dirname(context.paths.overlayTagsPath), { recursive: true });
|
|
1178
1271
|
writeFileSync2(context.paths.overlayTagsPath, `${JSON.stringify(entry, null, 2)}
|
|
1179
1272
|
`, "utf8");
|
|
1180
1273
|
return readRemoteRunTags(repoDir, manifestPath);
|
|
@@ -1187,7 +1280,7 @@ function deleteRemoteRunTags(repoDir, manifestPath) {
|
|
|
1187
1280
|
var gitRunsCache = /* @__PURE__ */ new Map();
|
|
1188
1281
|
var GIT_RUNS_CACHE_TTL_MS = 6e4;
|
|
1189
1282
|
function getResultsStorageRef(config) {
|
|
1190
|
-
return config.branch
|
|
1283
|
+
return config.branch;
|
|
1191
1284
|
}
|
|
1192
1285
|
function cachedListGitRuns(repoDir, ref) {
|
|
1193
1286
|
const now = Date.now();
|
|
@@ -1220,13 +1313,13 @@ function getStatusMessage(error) {
|
|
|
1220
1313
|
return error instanceof Error ? error.message : String(error);
|
|
1221
1314
|
}
|
|
1222
1315
|
function getRelativeRunPath(cwd, runDir) {
|
|
1223
|
-
const relative =
|
|
1224
|
-
if (!relative.startsWith("..") && !
|
|
1316
|
+
const relative = path8.relative(path8.join(cwd, ".agentv", "results", "runs"), runDir);
|
|
1317
|
+
if (!relative.startsWith("..") && !path8.isAbsolute(relative)) {
|
|
1225
1318
|
return relative;
|
|
1226
1319
|
}
|
|
1227
|
-
const experiment =
|
|
1228
|
-
const runName =
|
|
1229
|
-
return experiment && experiment !== runName ?
|
|
1320
|
+
const experiment = path8.basename(path8.dirname(runDir));
|
|
1321
|
+
const runName = path8.basename(runDir);
|
|
1322
|
+
return experiment && experiment !== runName ? path8.join(experiment, runName) : runName;
|
|
1230
1323
|
}
|
|
1231
1324
|
function buildCommitTitle(payload) {
|
|
1232
1325
|
const passed = payload.results.filter((result) => result.score >= DEFAULT_THRESHOLD).length;
|
|
@@ -1242,23 +1335,65 @@ async function maybeWarnLargeArtifact(runDir) {
|
|
|
1242
1335
|
);
|
|
1243
1336
|
}
|
|
1244
1337
|
}
|
|
1245
|
-
async function loadNormalizedResultsConfig(cwd, projectId) {
|
|
1338
|
+
async function loadNormalizedResultsConfig(cwd, projectId, overrides) {
|
|
1246
1339
|
const repoRoot = await findRepoRoot(cwd) ?? cwd;
|
|
1247
|
-
const config = await loadConfig(
|
|
1340
|
+
const config = await loadConfig(path8.join(cwd, "_"), repoRoot);
|
|
1248
1341
|
const project = projectId !== void 0 ? getProject(projectId) : getProjectForPath(repoRoot) ?? getProjectForPath(cwd);
|
|
1249
1342
|
const projectResults = project?.results ? {
|
|
1250
1343
|
mode: "github",
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1344
|
+
...project.results.repoUrl !== void 0 && {
|
|
1345
|
+
repo: project.results.repoUrl,
|
|
1346
|
+
repo_url: project.results.repoUrl
|
|
1347
|
+
},
|
|
1348
|
+
...project.results.repoPath !== void 0 && { repo_path: project.results.repoPath },
|
|
1349
|
+
...project.results.branch !== void 0 && { branch: project.results.branch },
|
|
1350
|
+
...project.results.remote !== void 0 && { remote: project.results.remote },
|
|
1351
|
+
...project.results.path !== void 0 && { path: project.results.path },
|
|
1352
|
+
...(project.results.sync?.autoPush !== void 0 || project.results.sync?.requirePush !== void 0) && {
|
|
1353
|
+
sync: {
|
|
1354
|
+
...project.results.sync?.autoPush !== void 0 && {
|
|
1355
|
+
auto_push: project.results.sync.autoPush
|
|
1356
|
+
},
|
|
1357
|
+
...project.results.sync?.requirePush !== void 0 && {
|
|
1358
|
+
require_push: project.results.sync.requirePush
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
},
|
|
1362
|
+
...project.results.branchPrefix !== void 0 && {
|
|
1363
|
+
branch_prefix: project.results.branchPrefix
|
|
1364
|
+
}
|
|
1256
1365
|
} : void 0;
|
|
1257
1366
|
const resultsConfig = projectResults ?? resolveResultsConfigForProject(config, project?.id);
|
|
1258
|
-
if (!resultsConfig) {
|
|
1367
|
+
if (!resultsConfig && !overrides) {
|
|
1259
1368
|
return void 0;
|
|
1260
1369
|
}
|
|
1261
|
-
|
|
1370
|
+
const baseConfig = resultsConfig ? normalizeResultsConfig(resultsConfig, { baseDir: project?.path ?? repoRoot }) : void 0;
|
|
1371
|
+
const repoOverride = overrides?.repo ?? overrides?.repo_url ?? overrides?.repo_path;
|
|
1372
|
+
if (!baseConfig && !repoOverride) {
|
|
1373
|
+
return void 0;
|
|
1374
|
+
}
|
|
1375
|
+
if (!overrides) {
|
|
1376
|
+
return baseConfig;
|
|
1377
|
+
}
|
|
1378
|
+
const merged = {
|
|
1379
|
+
mode: "github",
|
|
1380
|
+
...overrides.repo !== void 0 ? { repo: overrides.repo } : overrides.repo_url !== void 0 ? { repo_url: overrides.repo_url } : overrides.repo_path !== void 0 ? { repo_path: overrides.repo_path } : baseConfig?.repo_path ? { repo_path: baseConfig.repo_path } : baseConfig?.repo_url ? { repo_url: baseConfig.repo_url } : baseConfig?.repo ? { repo: baseConfig.repo } : {},
|
|
1381
|
+
...overrides.branch !== void 0 ? { branch: overrides.branch } : baseConfig?.branch ? { branch: baseConfig.branch } : {},
|
|
1382
|
+
...overrides.remote !== void 0 ? { remote: overrides.remote } : baseConfig?.remote ? { remote: baseConfig.remote } : {},
|
|
1383
|
+
...repoOverride === void 0 && baseConfig?.repo_path === void 0 && baseConfig?.path ? { path: baseConfig.path } : {},
|
|
1384
|
+
...(overrides.auto_push !== void 0 || overrides.require_push !== void 0 || baseConfig?.auto_push !== void 0 || baseConfig?.require_push !== void 0) && {
|
|
1385
|
+
sync: {
|
|
1386
|
+
...(overrides.auto_push ?? baseConfig?.auto_push) !== void 0 && {
|
|
1387
|
+
auto_push: overrides.auto_push ?? baseConfig?.auto_push
|
|
1388
|
+
},
|
|
1389
|
+
...(overrides.require_push ?? baseConfig?.require_push) !== void 0 && {
|
|
1390
|
+
require_push: overrides.require_push ?? baseConfig?.require_push
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
},
|
|
1394
|
+
...baseConfig?.branch_prefix ? { branch_prefix: baseConfig.branch_prefix } : {}
|
|
1395
|
+
};
|
|
1396
|
+
return normalizeResultsConfig(merged, { baseDir: project?.path ?? repoRoot });
|
|
1262
1397
|
}
|
|
1263
1398
|
function encodeRemoteRunId(filename) {
|
|
1264
1399
|
return `${REMOTE_RUN_PREFIX}${filename}`;
|
|
@@ -1348,7 +1483,7 @@ async function listMergedResultFiles(cwd, limit, projectId) {
|
|
|
1348
1483
|
filename: encodeRemoteRunId(r.run_id),
|
|
1349
1484
|
raw_filename: r.run_id,
|
|
1350
1485
|
source: "remote",
|
|
1351
|
-
path:
|
|
1486
|
+
path: path8.join(config.path, r.manifest_path),
|
|
1352
1487
|
displayName: r.display_name,
|
|
1353
1488
|
timestamp: r.timestamp,
|
|
1354
1489
|
testCount: r.test_count,
|
|
@@ -1394,20 +1529,20 @@ async function findRunById(cwd, runId, projectId) {
|
|
|
1394
1529
|
return runs.find((run) => run.filename === runId);
|
|
1395
1530
|
}
|
|
1396
1531
|
async function ensureRemoteRunAvailable(cwd, meta, projectId) {
|
|
1397
|
-
if (meta.source !== "remote" ||
|
|
1532
|
+
if (meta.source !== "remote" || existsSync5(meta.path)) {
|
|
1398
1533
|
return;
|
|
1399
1534
|
}
|
|
1400
1535
|
const config = await loadNormalizedResultsConfig(cwd, projectId);
|
|
1401
1536
|
if (!config) {
|
|
1402
1537
|
throw new Error("Remote results are not configured");
|
|
1403
1538
|
}
|
|
1404
|
-
const relativeManifestPath =
|
|
1539
|
+
const relativeManifestPath = path8.relative(config.path, meta.path).split(path8.sep).join("/");
|
|
1405
1540
|
if (relativeManifestPath.length === 0 || relativeManifestPath === meta.path || relativeManifestPath.startsWith("../")) {
|
|
1406
1541
|
throw new Error(`Remote manifest path is outside the results repo clone: ${meta.path}`);
|
|
1407
1542
|
}
|
|
1408
|
-
const relativeRunPath =
|
|
1543
|
+
const relativeRunPath = path8.posix.relative(
|
|
1409
1544
|
".agentv/results/runs",
|
|
1410
|
-
|
|
1545
|
+
path8.posix.dirname(relativeManifestPath)
|
|
1411
1546
|
);
|
|
1412
1547
|
await materializeGitRun(config.path, relativeRunPath, getResultsStorageRef(config));
|
|
1413
1548
|
}
|
|
@@ -1444,8 +1579,12 @@ async function clearRemoteRunTags(cwd, meta, projectId) {
|
|
|
1444
1579
|
return deleteRemoteRunTags(config.path, meta.path);
|
|
1445
1580
|
}
|
|
1446
1581
|
async function maybeAutoExportRunArtifacts(payload) {
|
|
1447
|
-
const config = await loadNormalizedResultsConfig(
|
|
1448
|
-
|
|
1582
|
+
const config = await loadNormalizedResultsConfig(
|
|
1583
|
+
payload.cwd,
|
|
1584
|
+
void 0,
|
|
1585
|
+
payload.results_overrides
|
|
1586
|
+
);
|
|
1587
|
+
if (!config) {
|
|
1449
1588
|
return "disabled";
|
|
1450
1589
|
}
|
|
1451
1590
|
try {
|
|
@@ -1459,12 +1598,18 @@ async function maybeAutoExportRunArtifacts(payload) {
|
|
|
1459
1598
|
commitMessage: commitTitle
|
|
1460
1599
|
});
|
|
1461
1600
|
if (!pushed) {
|
|
1462
|
-
console.warn("Warning: results export produced no git changes.
|
|
1601
|
+
console.warn("Warning: results export produced no git changes.");
|
|
1463
1602
|
return "already_published";
|
|
1464
1603
|
}
|
|
1465
|
-
|
|
1604
|
+
const pushLabel = config.auto_push || config.require_push ? "pushed" : "published locally";
|
|
1605
|
+
console.log(
|
|
1606
|
+
`Results ${pushLabel} to ${config.repo} (${config.branch ?? "default branch"}:${relativeRunPath})`
|
|
1607
|
+
);
|
|
1466
1608
|
return "published";
|
|
1467
1609
|
} catch (error) {
|
|
1610
|
+
if (config.require_push) {
|
|
1611
|
+
throw error;
|
|
1612
|
+
}
|
|
1468
1613
|
console.warn(`Warning: skipping results export: ${getStatusMessage(error)}`);
|
|
1469
1614
|
console.warn("Warning: Run 'gh auth login' if GitHub authentication is missing.");
|
|
1470
1615
|
return "failed";
|
|
@@ -1474,13 +1619,13 @@ async function maybeAutoExportRunArtifacts(payload) {
|
|
|
1474
1619
|
// src/commands/eval/env.ts
|
|
1475
1620
|
import { constants as constants3 } from "node:fs";
|
|
1476
1621
|
import { access as access3 } from "node:fs/promises";
|
|
1477
|
-
import
|
|
1622
|
+
import path9 from "node:path";
|
|
1478
1623
|
import { config as loadDotenv } from "dotenv";
|
|
1479
1624
|
function uniqueDirs(directories) {
|
|
1480
1625
|
const seen = /* @__PURE__ */ new Set();
|
|
1481
1626
|
const result = [];
|
|
1482
1627
|
for (const dir of directories) {
|
|
1483
|
-
const absolute =
|
|
1628
|
+
const absolute = path9.resolve(dir);
|
|
1484
1629
|
if (seen.has(absolute)) {
|
|
1485
1630
|
continue;
|
|
1486
1631
|
}
|
|
@@ -1499,14 +1644,14 @@ async function fileExists2(filePath) {
|
|
|
1499
1644
|
}
|
|
1500
1645
|
function collectAncestorDirectories(start, boundary) {
|
|
1501
1646
|
const directories = [];
|
|
1502
|
-
const boundaryDir =
|
|
1503
|
-
let current =
|
|
1647
|
+
const boundaryDir = path9.resolve(boundary);
|
|
1648
|
+
let current = path9.resolve(start);
|
|
1504
1649
|
while (current !== void 0) {
|
|
1505
1650
|
directories.push(current);
|
|
1506
1651
|
if (current === boundaryDir) {
|
|
1507
1652
|
break;
|
|
1508
1653
|
}
|
|
1509
|
-
const parent =
|
|
1654
|
+
const parent = path9.dirname(current);
|
|
1510
1655
|
if (parent === current) {
|
|
1511
1656
|
break;
|
|
1512
1657
|
}
|
|
@@ -1516,12 +1661,12 @@ function collectAncestorDirectories(start, boundary) {
|
|
|
1516
1661
|
}
|
|
1517
1662
|
async function loadEnvFromHierarchy(options) {
|
|
1518
1663
|
const { testFilePath, repoRoot, verbose } = options;
|
|
1519
|
-
const testDir =
|
|
1664
|
+
const testDir = path9.dirname(path9.resolve(testFilePath));
|
|
1520
1665
|
const cwd = process.cwd();
|
|
1521
1666
|
const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
|
|
1522
1667
|
const envFiles = [];
|
|
1523
1668
|
for (const dir of searchDirs) {
|
|
1524
|
-
const candidate =
|
|
1669
|
+
const candidate = path9.join(dir, ".env");
|
|
1525
1670
|
if (await fileExists2(candidate)) {
|
|
1526
1671
|
envFiles.push(candidate);
|
|
1527
1672
|
}
|
|
@@ -1542,10 +1687,118 @@ async function loadEnvFromHierarchy(options) {
|
|
|
1542
1687
|
return envFiles[0];
|
|
1543
1688
|
}
|
|
1544
1689
|
|
|
1690
|
+
// src/commands/eval/otel-backends.ts
|
|
1691
|
+
import { access as access4 } from "node:fs/promises";
|
|
1692
|
+
import path10 from "node:path";
|
|
1693
|
+
import { pathToFileURL } from "node:url";
|
|
1694
|
+
var RESOLVER_EXTENSIONS = [".mjs", ".js"];
|
|
1695
|
+
var builtinOtelBackendResolvers = [
|
|
1696
|
+
{
|
|
1697
|
+
name: "langfuse",
|
|
1698
|
+
resolve: ({ env }) => {
|
|
1699
|
+
const baseUrl = trimTrailingSlash(env.LANGFUSE_HOST ?? "https://cloud.langfuse.com");
|
|
1700
|
+
const publicKey = env.LANGFUSE_PUBLIC_KEY ?? "";
|
|
1701
|
+
const secretKey = env.LANGFUSE_SECRET_KEY ?? "";
|
|
1702
|
+
return {
|
|
1703
|
+
endpoint: `${baseUrl}/api/public/otel/v1/traces`,
|
|
1704
|
+
headers: {
|
|
1705
|
+
Authorization: `Basic ${Buffer.from(`${publicKey}:${secretKey}`).toString("base64")}`
|
|
1706
|
+
}
|
|
1707
|
+
};
|
|
1708
|
+
}
|
|
1709
|
+
},
|
|
1710
|
+
{
|
|
1711
|
+
name: "braintrust",
|
|
1712
|
+
resolve: ({ env }) => {
|
|
1713
|
+
const headers = {
|
|
1714
|
+
Authorization: `Bearer ${env.BRAINTRUST_API_KEY ?? ""}`
|
|
1715
|
+
};
|
|
1716
|
+
const parent = env.BRAINTRUST_PARENT ?? (env.BRAINTRUST_PROJECT_ID ? `project_id:${env.BRAINTRUST_PROJECT_ID}` : void 0) ?? (env.BRAINTRUST_PROJECT ? `project_name:${env.BRAINTRUST_PROJECT}` : void 0);
|
|
1717
|
+
if (parent) {
|
|
1718
|
+
headers["x-bt-parent"] = parent;
|
|
1719
|
+
}
|
|
1720
|
+
return {
|
|
1721
|
+
endpoint: "https://api.braintrust.dev/otel/v1/traces",
|
|
1722
|
+
headers
|
|
1723
|
+
};
|
|
1724
|
+
}
|
|
1725
|
+
},
|
|
1726
|
+
{
|
|
1727
|
+
name: "confident",
|
|
1728
|
+
resolve: ({ env }) => ({
|
|
1729
|
+
endpoint: "https://otel.confident-ai.com/v1/traces",
|
|
1730
|
+
headers: {
|
|
1731
|
+
"x-confident-api-key": env.CONFIDENT_API_KEY ?? ""
|
|
1732
|
+
}
|
|
1733
|
+
})
|
|
1734
|
+
}
|
|
1735
|
+
];
|
|
1736
|
+
var builtinOtelBackendResolversByName = new Map(
|
|
1737
|
+
builtinOtelBackendResolvers.map((resolver) => [resolver.name, resolver])
|
|
1738
|
+
);
|
|
1739
|
+
async function resolveOtelBackend(name, context) {
|
|
1740
|
+
const resolver = await loadOtelBackendResolver(name, context.cwd);
|
|
1741
|
+
return resolver?.resolve(context);
|
|
1742
|
+
}
|
|
1743
|
+
async function loadOtelBackendResolver(name, cwd) {
|
|
1744
|
+
const localResolverPath = await findLocalOtelBackendResolver(name, cwd);
|
|
1745
|
+
if (localResolverPath) {
|
|
1746
|
+
return importOtelBackendResolver(localResolverPath, name);
|
|
1747
|
+
}
|
|
1748
|
+
return builtinOtelBackendResolversByName.get(name);
|
|
1749
|
+
}
|
|
1750
|
+
async function findLocalOtelBackendResolver(name, cwd) {
|
|
1751
|
+
if (!isSafeResolverName(name)) {
|
|
1752
|
+
return void 0;
|
|
1753
|
+
}
|
|
1754
|
+
for (const dir of getResolverSearchDirs(cwd)) {
|
|
1755
|
+
for (const ext of RESOLVER_EXTENSIONS) {
|
|
1756
|
+
const candidate = path10.join(dir, `${name}${ext}`);
|
|
1757
|
+
try {
|
|
1758
|
+
await access4(candidate);
|
|
1759
|
+
return candidate;
|
|
1760
|
+
} catch {
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
}
|
|
1764
|
+
return void 0;
|
|
1765
|
+
}
|
|
1766
|
+
function getResolverSearchDirs(cwd) {
|
|
1767
|
+
const dirs = [];
|
|
1768
|
+
let current = path10.resolve(cwd);
|
|
1769
|
+
const root = path10.parse(current).root;
|
|
1770
|
+
while (current !== root) {
|
|
1771
|
+
dirs.push(path10.join(current, ".agentv", "otel-backends"));
|
|
1772
|
+
current = path10.dirname(current);
|
|
1773
|
+
}
|
|
1774
|
+
return dirs;
|
|
1775
|
+
}
|
|
1776
|
+
function isSafeResolverName(name) {
|
|
1777
|
+
return name.length > 0 && !name.includes("/") && !name.includes("\\") && !name.startsWith(".");
|
|
1778
|
+
}
|
|
1779
|
+
async function importOtelBackendResolver(filePath, fallbackName) {
|
|
1780
|
+
const mod = await import(pathToFileURL(filePath).href);
|
|
1781
|
+
const candidate = [mod.default, mod.otelBackend, mod.resolver].find(
|
|
1782
|
+
(value) => value && typeof value.resolve === "function"
|
|
1783
|
+
);
|
|
1784
|
+
if (!candidate) {
|
|
1785
|
+
throw new Error(
|
|
1786
|
+
`OTel backend resolver '${fallbackName}' from ${filePath} must export a resolver object`
|
|
1787
|
+
);
|
|
1788
|
+
}
|
|
1789
|
+
return {
|
|
1790
|
+
...candidate,
|
|
1791
|
+
name: typeof candidate.name === "string" && candidate.name.length > 0 ? candidate.name : fallbackName
|
|
1792
|
+
};
|
|
1793
|
+
}
|
|
1794
|
+
function trimTrailingSlash(value) {
|
|
1795
|
+
return value.replace(/\/+$/, "");
|
|
1796
|
+
}
|
|
1797
|
+
|
|
1545
1798
|
// src/commands/eval/jsonl-writer.ts
|
|
1546
1799
|
import { createWriteStream } from "node:fs";
|
|
1547
1800
|
import { mkdir } from "node:fs/promises";
|
|
1548
|
-
import
|
|
1801
|
+
import path11 from "node:path";
|
|
1549
1802
|
import { finished } from "node:stream/promises";
|
|
1550
1803
|
|
|
1551
1804
|
// ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
|
|
@@ -1763,7 +2016,7 @@ var JsonlWriter = class _JsonlWriter {
|
|
|
1763
2016
|
this.stream = stream;
|
|
1764
2017
|
}
|
|
1765
2018
|
static async open(filePath, options) {
|
|
1766
|
-
await mkdir(
|
|
2019
|
+
await mkdir(path11.dirname(filePath), { recursive: true });
|
|
1767
2020
|
const flags = options?.append ? "a" : "w";
|
|
1768
2021
|
const stream = createWriteStream(filePath, { flags, encoding: "utf8" });
|
|
1769
2022
|
return new _JsonlWriter(stream);
|
|
@@ -1888,12 +2141,12 @@ var ProgressDisplay = class {
|
|
|
1888
2141
|
}
|
|
1889
2142
|
addLogPaths(paths) {
|
|
1890
2143
|
const newPaths = [];
|
|
1891
|
-
for (const
|
|
1892
|
-
if (this.logPathSet.has(
|
|
2144
|
+
for (const path16 of paths) {
|
|
2145
|
+
if (this.logPathSet.has(path16)) {
|
|
1893
2146
|
continue;
|
|
1894
2147
|
}
|
|
1895
|
-
this.logPathSet.add(
|
|
1896
|
-
newPaths.push(
|
|
2148
|
+
this.logPathSet.add(path16);
|
|
2149
|
+
newPaths.push(path16);
|
|
1897
2150
|
}
|
|
1898
2151
|
if (newPaths.length === 0) {
|
|
1899
2152
|
return;
|
|
@@ -1946,9 +2199,9 @@ async function loadNonErrorResults(jsonlPath) {
|
|
|
1946
2199
|
}
|
|
1947
2200
|
|
|
1948
2201
|
// src/commands/eval/run-cache.ts
|
|
1949
|
-
import { existsSync as
|
|
2202
|
+
import { existsSync as existsSync6 } from "node:fs";
|
|
1950
2203
|
import { mkdir as mkdir2, readFile, writeFile } from "node:fs/promises";
|
|
1951
|
-
import
|
|
2204
|
+
import path12 from "node:path";
|
|
1952
2205
|
var CACHE_FILENAME = "cache.json";
|
|
1953
2206
|
function resolveRunCacheFile(cache) {
|
|
1954
2207
|
if (cache.lastRunDir) {
|
|
@@ -1957,7 +2210,7 @@ function resolveRunCacheFile(cache) {
|
|
|
1957
2210
|
return "";
|
|
1958
2211
|
}
|
|
1959
2212
|
function cachePath(cwd) {
|
|
1960
|
-
return
|
|
2213
|
+
return path12.join(cwd, ".agentv", CACHE_FILENAME);
|
|
1961
2214
|
}
|
|
1962
2215
|
async function loadRunCache(cwd) {
|
|
1963
2216
|
try {
|
|
@@ -1970,17 +2223,17 @@ async function loadRunCache(cwd) {
|
|
|
1970
2223
|
async function resolveCachedRunDir(cwd) {
|
|
1971
2224
|
const cache = await loadRunCache(cwd);
|
|
1972
2225
|
if (!cache?.lastRunDir) return void 0;
|
|
1973
|
-
if (!
|
|
2226
|
+
if (!existsSync6(cache.lastRunDir)) return void 0;
|
|
1974
2227
|
return cache.lastRunDir;
|
|
1975
2228
|
}
|
|
1976
2229
|
async function saveRunCache(cwd, resultPath) {
|
|
1977
|
-
if (
|
|
2230
|
+
if (path12.basename(resultPath) !== RESULT_INDEX_FILENAME) {
|
|
1978
2231
|
return;
|
|
1979
2232
|
}
|
|
1980
|
-
const dir =
|
|
2233
|
+
const dir = path12.join(cwd, ".agentv");
|
|
1981
2234
|
await mkdir2(dir, { recursive: true });
|
|
1982
2235
|
const cache = {
|
|
1983
|
-
lastRunDir:
|
|
2236
|
+
lastRunDir: path12.dirname(resultPath),
|
|
1984
2237
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1985
2238
|
};
|
|
1986
2239
|
await writeFile(cachePath(cwd), `${JSON.stringify(cache, null, 2)}
|
|
@@ -2249,7 +2502,7 @@ function formatMatrixSummary(results) {
|
|
|
2249
2502
|
|
|
2250
2503
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
2251
2504
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
2252
|
-
import
|
|
2505
|
+
import path13 from "node:path";
|
|
2253
2506
|
import { readFile as readFile22, readdir, stat as stat2 } from "node:fs/promises";
|
|
2254
2507
|
import path22 from "node:path";
|
|
2255
2508
|
import { readFile as readFile3 } from "node:fs/promises";
|
|
@@ -2260,7 +2513,7 @@ import { readFile as readFile5 } from "node:fs/promises";
|
|
|
2260
2513
|
import path52 from "node:path";
|
|
2261
2514
|
import { readFile as readFile6 } from "node:fs/promises";
|
|
2262
2515
|
import path62 from "node:path";
|
|
2263
|
-
import { access as
|
|
2516
|
+
import { access as access5, readFile as readFile7 } from "node:fs/promises";
|
|
2264
2517
|
import path72 from "node:path";
|
|
2265
2518
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
2266
2519
|
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
|
|
@@ -2295,8 +2548,8 @@ async function detectFileType(filePath) {
|
|
|
2295
2548
|
}
|
|
2296
2549
|
}
|
|
2297
2550
|
function inferFileTypeFromPath(filePath) {
|
|
2298
|
-
const normalized =
|
|
2299
|
-
const basename =
|
|
2551
|
+
const normalized = path13.normalize(filePath).replace(/\\/g, "/");
|
|
2552
|
+
const basename = path13.basename(filePath);
|
|
2300
2553
|
if (normalized.includes("/.agentv/")) {
|
|
2301
2554
|
if (basename === "config.yaml" || basename === "config.yml") {
|
|
2302
2555
|
return "config";
|
|
@@ -3258,6 +3511,8 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
3258
3511
|
...COMMON_SETTINGS,
|
|
3259
3512
|
"cli_url",
|
|
3260
3513
|
"cli_path",
|
|
3514
|
+
"args",
|
|
3515
|
+
"arguments",
|
|
3261
3516
|
"github_token",
|
|
3262
3517
|
"model",
|
|
3263
3518
|
"cwd",
|
|
@@ -3266,8 +3521,12 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
3266
3521
|
"log_format",
|
|
3267
3522
|
"stream_log",
|
|
3268
3523
|
"system_prompt",
|
|
3269
|
-
"
|
|
3270
|
-
"
|
|
3524
|
+
"subprovider",
|
|
3525
|
+
"base_url",
|
|
3526
|
+
"api_key",
|
|
3527
|
+
"bearer_token",
|
|
3528
|
+
"api_version",
|
|
3529
|
+
"wire_api"
|
|
3271
3530
|
]);
|
|
3272
3531
|
var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
3273
3532
|
...COMMON_SETTINGS,
|
|
@@ -3283,7 +3542,12 @@ var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
3283
3542
|
"log_format",
|
|
3284
3543
|
"stream_log",
|
|
3285
3544
|
"system_prompt",
|
|
3286
|
-
"
|
|
3545
|
+
"subprovider",
|
|
3546
|
+
"base_url",
|
|
3547
|
+
"api_key",
|
|
3548
|
+
"bearer_token",
|
|
3549
|
+
"api_version",
|
|
3550
|
+
"wire_api"
|
|
3287
3551
|
]);
|
|
3288
3552
|
var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
|
|
3289
3553
|
...COMMON_SETTINGS,
|
|
@@ -3302,7 +3566,7 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
3302
3566
|
var REPLAY_SETTINGS = /* @__PURE__ */ new Set([
|
|
3303
3567
|
...COMMON_SETTINGS,
|
|
3304
3568
|
"fixtures",
|
|
3305
|
-
"
|
|
3569
|
+
"execution_traces",
|
|
3306
3570
|
"source_target",
|
|
3307
3571
|
"suite",
|
|
3308
3572
|
"eval_path",
|
|
@@ -3648,13 +3912,13 @@ async function validateTargetsFile(filePath) {
|
|
|
3648
3912
|
}
|
|
3649
3913
|
if (providerValue === "replay") {
|
|
3650
3914
|
const hasFixtures = isNonEmptyString(target.fixtures);
|
|
3651
|
-
const
|
|
3652
|
-
if (hasFixtures ===
|
|
3915
|
+
const hasExecutionTraces = isNonEmptyString(target.execution_traces);
|
|
3916
|
+
if (hasFixtures === hasExecutionTraces) {
|
|
3653
3917
|
errors.push({
|
|
3654
3918
|
severity: "error",
|
|
3655
3919
|
filePath: absolutePath,
|
|
3656
3920
|
location,
|
|
3657
|
-
message: "Replay provider requires exactly one replay source: 'fixtures' or '
|
|
3921
|
+
message: "Replay provider requires exactly one replay source: 'fixtures' or 'execution_traces'"
|
|
3658
3922
|
});
|
|
3659
3923
|
}
|
|
3660
3924
|
if (!isNonEmptyString(target.source_target)) {
|
|
@@ -3909,7 +4173,27 @@ function validateProjectResultsConfig(errors, filePath, rawResults, location) {
|
|
|
3909
4173
|
});
|
|
3910
4174
|
}
|
|
3911
4175
|
}
|
|
3912
|
-
|
|
4176
|
+
const hasRepoUrl = resultsRecord.repo_url !== void 0;
|
|
4177
|
+
const hasRepoPath = resultsRecord.repo_path !== void 0;
|
|
4178
|
+
if (hasRepoUrl && hasRepoPath) {
|
|
4179
|
+
errors.push({
|
|
4180
|
+
severity: "error",
|
|
4181
|
+
filePath,
|
|
4182
|
+
location,
|
|
4183
|
+
message: `Field '${location}' must set only one of repo_url or repo_path`
|
|
4184
|
+
});
|
|
4185
|
+
} else if (hasRepoUrl) {
|
|
4186
|
+
validateGitRemoteUrl(errors, filePath, resultsRecord.repo_url, `${location}.repo_url`);
|
|
4187
|
+
} else if (hasRepoPath) {
|
|
4188
|
+
validateRequiredString(errors, filePath, resultsRecord.repo_path, `${location}.repo_path`);
|
|
4189
|
+
} else {
|
|
4190
|
+
errors.push({
|
|
4191
|
+
severity: "error",
|
|
4192
|
+
filePath,
|
|
4193
|
+
location,
|
|
4194
|
+
message: `Field '${location}' must set repo_url or repo_path`
|
|
4195
|
+
});
|
|
4196
|
+
}
|
|
3913
4197
|
if (resultsRecord.branch !== void 0 && (typeof resultsRecord.branch !== "string" || resultsRecord.branch.trim().length === 0)) {
|
|
3914
4198
|
errors.push({
|
|
3915
4199
|
severity: "error",
|
|
@@ -3918,6 +4202,14 @@ function validateProjectResultsConfig(errors, filePath, rawResults, location) {
|
|
|
3918
4202
|
message: `Field '${location}.branch' must be a non-empty string`
|
|
3919
4203
|
});
|
|
3920
4204
|
}
|
|
4205
|
+
if (resultsRecord.remote !== void 0 && (typeof resultsRecord.remote !== "string" || resultsRecord.remote.trim().length === 0)) {
|
|
4206
|
+
errors.push({
|
|
4207
|
+
severity: "error",
|
|
4208
|
+
filePath,
|
|
4209
|
+
location: `${location}.remote`,
|
|
4210
|
+
message: `Field '${location}.remote' must be a non-empty string`
|
|
4211
|
+
});
|
|
4212
|
+
}
|
|
3921
4213
|
if (resultsRecord.path !== void 0) {
|
|
3922
4214
|
if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
|
|
3923
4215
|
errors.push({
|
|
@@ -3953,6 +4245,14 @@ function validateProjectResultsConfig(errors, filePath, rawResults, location) {
|
|
|
3953
4245
|
message: `Field '${location}.sync.auto_push' must be a boolean`
|
|
3954
4246
|
});
|
|
3955
4247
|
}
|
|
4248
|
+
if (syncRecord.require_push !== void 0 && typeof syncRecord.require_push !== "boolean") {
|
|
4249
|
+
errors.push({
|
|
4250
|
+
severity: "error",
|
|
4251
|
+
filePath,
|
|
4252
|
+
location: `${location}.sync.require_push`,
|
|
4253
|
+
message: `Field '${location}.sync.require_push' must be a boolean`
|
|
4254
|
+
});
|
|
4255
|
+
}
|
|
3956
4256
|
}
|
|
3957
4257
|
}
|
|
3958
4258
|
if (resultsRecord.branch_prefix !== void 0 && (typeof resultsRecord.branch_prefix !== "string" || resultsRecord.branch_prefix.trim().length === 0)) {
|
|
@@ -3978,7 +4278,7 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
|
|
|
3978
4278
|
return;
|
|
3979
4279
|
}
|
|
3980
4280
|
const resultsRecord = rawResults;
|
|
3981
|
-
if (resultsRecord.mode !== "github") {
|
|
4281
|
+
if (resultsRecord.mode !== void 0 && resultsRecord.mode !== "github") {
|
|
3982
4282
|
errors.push({
|
|
3983
4283
|
severity: "error",
|
|
3984
4284
|
filePath,
|
|
@@ -3986,7 +4286,31 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
|
|
|
3986
4286
|
message: `Field '${location}.mode' must be 'github'`
|
|
3987
4287
|
});
|
|
3988
4288
|
}
|
|
3989
|
-
|
|
4289
|
+
const hasLegacyRepo = resultsRecord.repo !== void 0;
|
|
4290
|
+
const hasRepoUrl = resultsRecord.repo_url !== void 0;
|
|
4291
|
+
const hasRepoPath = resultsRecord.repo_path !== void 0;
|
|
4292
|
+
const sourceCount = [hasLegacyRepo, hasRepoUrl, hasRepoPath].filter(Boolean).length;
|
|
4293
|
+
if (sourceCount === 0) {
|
|
4294
|
+
errors.push({
|
|
4295
|
+
severity: "error",
|
|
4296
|
+
filePath,
|
|
4297
|
+
location,
|
|
4298
|
+
message: `Field '${location}' must set repo_url/repo or repo_path`
|
|
4299
|
+
});
|
|
4300
|
+
} else if (sourceCount > 1) {
|
|
4301
|
+
errors.push({
|
|
4302
|
+
severity: "error",
|
|
4303
|
+
filePath,
|
|
4304
|
+
location,
|
|
4305
|
+
message: `Field '${location}' must set only one of repo_url/repo or repo_path`
|
|
4306
|
+
});
|
|
4307
|
+
} else if (hasLegacyRepo) {
|
|
4308
|
+
validateRequiredString(errors, filePath, resultsRecord.repo, `${location}.repo`);
|
|
4309
|
+
} else if (hasRepoUrl) {
|
|
4310
|
+
validateGitRemoteUrl(errors, filePath, resultsRecord.repo_url, `${location}.repo_url`);
|
|
4311
|
+
} else {
|
|
4312
|
+
validateRequiredString(errors, filePath, resultsRecord.repo_path, `${location}.repo_path`);
|
|
4313
|
+
}
|
|
3990
4314
|
if (resultsRecord.branch !== void 0 && (typeof resultsRecord.branch !== "string" || resultsRecord.branch.trim().length === 0)) {
|
|
3991
4315
|
errors.push({
|
|
3992
4316
|
severity: "error",
|
|
@@ -3995,6 +4319,14 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
|
|
|
3995
4319
|
message: `Field '${location}.branch' must be a non-empty string`
|
|
3996
4320
|
});
|
|
3997
4321
|
}
|
|
4322
|
+
if (resultsRecord.remote !== void 0 && (typeof resultsRecord.remote !== "string" || resultsRecord.remote.trim().length === 0)) {
|
|
4323
|
+
errors.push({
|
|
4324
|
+
severity: "error",
|
|
4325
|
+
filePath,
|
|
4326
|
+
location: `${location}.remote`,
|
|
4327
|
+
message: `Field '${location}.remote' must be a non-empty string`
|
|
4328
|
+
});
|
|
4329
|
+
}
|
|
3998
4330
|
if (resultsRecord.path !== void 0) {
|
|
3999
4331
|
if (typeof resultsRecord.path !== "string" || resultsRecord.path.trim().length === 0) {
|
|
4000
4332
|
errors.push({
|
|
@@ -4023,6 +4355,34 @@ function validateResultsConfig(errors, filePath, rawResults, location) {
|
|
|
4023
4355
|
message: `Field '${location}.auto_push' must be a boolean`
|
|
4024
4356
|
});
|
|
4025
4357
|
}
|
|
4358
|
+
if (resultsRecord.sync !== void 0) {
|
|
4359
|
+
if (typeof resultsRecord.sync !== "object" || resultsRecord.sync === null || Array.isArray(resultsRecord.sync)) {
|
|
4360
|
+
errors.push({
|
|
4361
|
+
severity: "error",
|
|
4362
|
+
filePath,
|
|
4363
|
+
location: `${location}.sync`,
|
|
4364
|
+
message: `Field '${location}.sync' must be an object`
|
|
4365
|
+
});
|
|
4366
|
+
} else {
|
|
4367
|
+
const syncRecord = resultsRecord.sync;
|
|
4368
|
+
if (syncRecord.auto_push !== void 0 && typeof syncRecord.auto_push !== "boolean") {
|
|
4369
|
+
errors.push({
|
|
4370
|
+
severity: "error",
|
|
4371
|
+
filePath,
|
|
4372
|
+
location: `${location}.sync.auto_push`,
|
|
4373
|
+
message: `Field '${location}.sync.auto_push' must be a boolean`
|
|
4374
|
+
});
|
|
4375
|
+
}
|
|
4376
|
+
if (syncRecord.require_push !== void 0 && typeof syncRecord.require_push !== "boolean") {
|
|
4377
|
+
errors.push({
|
|
4378
|
+
severity: "error",
|
|
4379
|
+
filePath,
|
|
4380
|
+
location: `${location}.sync.require_push`,
|
|
4381
|
+
message: `Field '${location}.sync.require_push' must be a boolean`
|
|
4382
|
+
});
|
|
4383
|
+
}
|
|
4384
|
+
}
|
|
4385
|
+
}
|
|
4026
4386
|
if (resultsRecord.branch_prefix !== void 0 && (typeof resultsRecord.branch_prefix !== "string" || resultsRecord.branch_prefix.trim().length === 0)) {
|
|
4027
4387
|
errors.push({
|
|
4028
4388
|
severity: "error",
|
|
@@ -4251,7 +4611,7 @@ function looksLikeFilePath(arg) {
|
|
|
4251
4611
|
}
|
|
4252
4612
|
async function fileExists3(filePath) {
|
|
4253
4613
|
try {
|
|
4254
|
-
await
|
|
4614
|
+
await access5(filePath);
|
|
4255
4615
|
return true;
|
|
4256
4616
|
} catch {
|
|
4257
4617
|
return false;
|
|
@@ -4609,6 +4969,15 @@ function normalizeString(value) {
|
|
|
4609
4969
|
const trimmed = value.trim();
|
|
4610
4970
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
4611
4971
|
}
|
|
4972
|
+
function resultsRepoOverride(value) {
|
|
4973
|
+
if (!value) {
|
|
4974
|
+
return {};
|
|
4975
|
+
}
|
|
4976
|
+
if (value === "current" || value === "." || value.startsWith("./") || value.startsWith("../") || value.startsWith("/") || value.startsWith("~/") || value.startsWith("~\\") || /^[A-Za-z]:[/\\]/.test(value)) {
|
|
4977
|
+
return { repo_path: value === "current" ? "." : value };
|
|
4978
|
+
}
|
|
4979
|
+
return { repo: value };
|
|
4980
|
+
}
|
|
4612
4981
|
function resolveTimestampPlaceholder(value) {
|
|
4613
4982
|
if (!value.includes("{timestamp}")) {
|
|
4614
4983
|
return value;
|
|
@@ -4664,13 +5033,13 @@ function normalizeSourceMetadataByEvalFile(value) {
|
|
|
4664
5033
|
const entries = [...value.entries()].filter(
|
|
4665
5034
|
(entry) => typeof entry[0] === "string" && typeof entry[1] === "object" && entry[1] !== null && !Array.isArray(entry[1])
|
|
4666
5035
|
);
|
|
4667
|
-
return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [
|
|
5036
|
+
return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [path14.resolve(key), metadata])) : void 0;
|
|
4668
5037
|
}
|
|
4669
5038
|
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
4670
5039
|
const entries = Object.entries(value).filter(
|
|
4671
5040
|
(entry) => typeof entry[1] === "object" && entry[1] !== null && !Array.isArray(entry[1])
|
|
4672
5041
|
);
|
|
4673
|
-
return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [
|
|
5042
|
+
return entries.length > 0 ? new Map(entries.map(([key, metadata]) => [path14.resolve(key), metadata])) : void 0;
|
|
4674
5043
|
}
|
|
4675
5044
|
return void 0;
|
|
4676
5045
|
}
|
|
@@ -4684,10 +5053,10 @@ var LEGACY_OUTPUT_FILE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
|
4684
5053
|
".htm"
|
|
4685
5054
|
]);
|
|
4686
5055
|
function looksLikeLegacyOutputFilePath(value) {
|
|
4687
|
-
return LEGACY_OUTPUT_FILE_EXTENSIONS.has(
|
|
5056
|
+
return LEGACY_OUTPUT_FILE_EXTENSIONS.has(path14.extname(value).toLowerCase());
|
|
4688
5057
|
}
|
|
4689
5058
|
function outputFileMigrationMessage(value) {
|
|
4690
|
-
const ext =
|
|
5059
|
+
const ext = path14.extname(value).toLowerCase();
|
|
4691
5060
|
const removalHint = ext === ".xml" ? "JUnit XML export from agentv eval has been removed." : "Flat result file export from agentv eval has been removed.";
|
|
4692
5061
|
return `--output expects a run directory, not a file path: ${value}
|
|
4693
5062
|
${removalHint} Set --output <dir> for the canonical run artifacts; AgentV always writes <dir>/index.jsonl.`;
|
|
@@ -4695,7 +5064,7 @@ ${removalHint} Set --output <dir> for the canonical run artifacts; AgentV always
|
|
|
4695
5064
|
function artifactsMigrationMessage(artifactsDir, outputDir) {
|
|
4696
5065
|
const lines = [`--artifacts was removed from agentv eval. Use --output ${artifactsDir} instead.`];
|
|
4697
5066
|
if (outputDir && looksLikeLegacyOutputFilePath(outputDir)) {
|
|
4698
|
-
const ext =
|
|
5067
|
+
const ext = path14.extname(outputDir).toLowerCase();
|
|
4699
5068
|
lines.push(
|
|
4700
5069
|
ext === ".xml" ? "JUnit XML export from agentv eval has been removed." : "Flat result file export from agentv eval has been removed."
|
|
4701
5070
|
);
|
|
@@ -4774,6 +5143,21 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
4774
5143
|
const yamlWorkspacePath = normalizeString(yamlExecutionRecord?.workspace_path);
|
|
4775
5144
|
const workspacePath = cliWorkspacePath ?? yamlWorkspacePath;
|
|
4776
5145
|
const workspaceMode = cliWorkspacePath ? "static" : cliWorkspaceMode ?? yamlWorkspaceMode;
|
|
5146
|
+
const resultsRepo = normalizeString(rawOptions.resultsRepo);
|
|
5147
|
+
const resultsPush = normalizeBoolean(rawOptions.resultsPush);
|
|
5148
|
+
const resultsNoPush = normalizeBoolean(rawOptions.noResultsPush);
|
|
5149
|
+
const resultsRequirePush = normalizeBoolean(rawOptions.resultsRequirePush);
|
|
5150
|
+
const resultsOverrides = {
|
|
5151
|
+
...resultsRepoOverride(resultsRepo),
|
|
5152
|
+
...normalizeString(rawOptions.resultsBranch) !== void 0 && {
|
|
5153
|
+
branch: normalizeString(rawOptions.resultsBranch)
|
|
5154
|
+
},
|
|
5155
|
+
...normalizeString(rawOptions.resultsRemote) !== void 0 && {
|
|
5156
|
+
remote: normalizeString(rawOptions.resultsRemote)
|
|
5157
|
+
},
|
|
5158
|
+
...resultsPush || resultsNoPush ? { auto_push: resultsPush && !resultsNoPush } : {},
|
|
5159
|
+
...resultsRequirePush ? { require_push: true } : {}
|
|
5160
|
+
};
|
|
4777
5161
|
return {
|
|
4778
5162
|
target: singleTarget,
|
|
4779
5163
|
cliTargets,
|
|
@@ -4825,11 +5209,12 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
4825
5209
|
budgetUsd: normalizeOptionalNumber(rawOptions.budgetUsd),
|
|
4826
5210
|
sourceMetadataByEvalFile: normalizeSourceMetadataByEvalFile(
|
|
4827
5211
|
rawOptions.sourceMetadataByEvalFile
|
|
4828
|
-
)
|
|
5212
|
+
),
|
|
5213
|
+
resultsOverrides: Object.keys(resultsOverrides).length > 0 ? resultsOverrides : void 0
|
|
4829
5214
|
};
|
|
4830
5215
|
}
|
|
4831
5216
|
function withSourceMetadata(result, testFilePath, options) {
|
|
4832
|
-
const sourceMetadata = options.sourceMetadataByEvalFile?.get(
|
|
5217
|
+
const sourceMetadata = options.sourceMetadataByEvalFile?.get(path14.resolve(testFilePath));
|
|
4833
5218
|
if (!sourceMetadata) {
|
|
4834
5219
|
return result;
|
|
4835
5220
|
}
|
|
@@ -4843,7 +5228,7 @@ function withSourceMetadata(result, testFilePath, options) {
|
|
|
4843
5228
|
}
|
|
4844
5229
|
async function ensureFileExists(filePath, description) {
|
|
4845
5230
|
try {
|
|
4846
|
-
await
|
|
5231
|
+
await access6(filePath, constants4.F_OK);
|
|
4847
5232
|
} catch {
|
|
4848
5233
|
throw new Error(`${description} not found: ${filePath}`);
|
|
4849
5234
|
}
|
|
@@ -4851,7 +5236,7 @@ async function ensureFileExists(filePath, description) {
|
|
|
4851
5236
|
function buildDefaultOutputPathForExperiment(cwd, experiment) {
|
|
4852
5237
|
const runDir = buildDefaultRunDir(cwd, experiment);
|
|
4853
5238
|
mkdirSync2(runDir, { recursive: true });
|
|
4854
|
-
return
|
|
5239
|
+
return path14.join(runDir, "index.jsonl");
|
|
4855
5240
|
}
|
|
4856
5241
|
function createProgressReporter(maxWorkers, options) {
|
|
4857
5242
|
const display = new ProgressDisplay(maxWorkers, options);
|
|
@@ -4865,7 +5250,7 @@ function createProgressReporter(maxWorkers, options) {
|
|
|
4865
5250
|
};
|
|
4866
5251
|
}
|
|
4867
5252
|
function makeTestCaseKey(testFilePath, testId) {
|
|
4868
|
-
return `${
|
|
5253
|
+
return `${path14.resolve(testFilePath)}::${testId}`;
|
|
4869
5254
|
}
|
|
4870
5255
|
function resolveTargetLabel(requestedName, resolvedName) {
|
|
4871
5256
|
if (resolvedName !== requestedName) {
|
|
@@ -4927,7 +5312,7 @@ async function prepareFileMetadata(params) {
|
|
|
4927
5312
|
repoRoot,
|
|
4928
5313
|
verbose: options.verbose
|
|
4929
5314
|
});
|
|
4930
|
-
const relativePath =
|
|
5315
|
+
const relativePath = path14.relative(cwd, testFilePath);
|
|
4931
5316
|
const category = deriveCategory(relativePath);
|
|
4932
5317
|
const suite = await loadTestSuite(testFilePath, repoRoot, {
|
|
4933
5318
|
verbose: options.verbose,
|
|
@@ -4952,7 +5337,7 @@ async function prepareFileMetadata(params) {
|
|
|
4952
5337
|
selections = [
|
|
4953
5338
|
{
|
|
4954
5339
|
selection: transcriptSelection,
|
|
4955
|
-
inlineTargetLabel: `transcript (${
|
|
5340
|
+
inlineTargetLabel: `transcript (${path14.basename(options.transcript)})`
|
|
4956
5341
|
}
|
|
4957
5342
|
];
|
|
4958
5343
|
} else if (suite.inlineTarget && options.cliTargets.length === 0) {
|
|
@@ -5118,7 +5503,7 @@ async function runSingleEvalFile(params) {
|
|
|
5118
5503
|
} = params;
|
|
5119
5504
|
const targetName = selection.targetName;
|
|
5120
5505
|
const replayRecording = options.recordReplay ? {
|
|
5121
|
-
fixturesPath:
|
|
5506
|
+
fixturesPath: path14.resolve(options.recordReplay),
|
|
5122
5507
|
sourceTarget: targetName,
|
|
5123
5508
|
variant: options.recordReplayVariant
|
|
5124
5509
|
} : void 0;
|
|
@@ -5269,7 +5654,7 @@ async function runEvalCommand(input) {
|
|
|
5269
5654
|
);
|
|
5270
5655
|
}
|
|
5271
5656
|
const repoRoot = await findRepoRoot(cwd);
|
|
5272
|
-
const yamlConfig = await loadConfig(
|
|
5657
|
+
const yamlConfig = await loadConfig(path14.join(cwd, "_"), repoRoot);
|
|
5273
5658
|
let requiredVersionCheck;
|
|
5274
5659
|
if (yamlConfig?.required_version) {
|
|
5275
5660
|
requiredVersionCheck = await enforceRequiredVersion(yamlConfig.required_version, {
|
|
@@ -5305,7 +5690,7 @@ async function runEvalCommand(input) {
|
|
|
5305
5690
|
}
|
|
5306
5691
|
let retryNonErrorResults;
|
|
5307
5692
|
if (options.retryErrors) {
|
|
5308
|
-
const retryPath =
|
|
5693
|
+
const retryPath = path14.resolve(options.retryErrors);
|
|
5309
5694
|
await ensureFileExists(retryPath, "Retry-errors JSONL file");
|
|
5310
5695
|
const completedIds = await loadFullyCompletedTestIds(retryPath);
|
|
5311
5696
|
const errorIds = await loadErrorTestIds(retryPath);
|
|
@@ -5323,7 +5708,7 @@ async function runEvalCommand(input) {
|
|
|
5323
5708
|
if (cachedDir) {
|
|
5324
5709
|
options = { ...options, outputDir: cachedDir };
|
|
5325
5710
|
const flagLabel = options.rerunFailed ? "rerun-failed" : "resume";
|
|
5326
|
-
const displayDir =
|
|
5711
|
+
const displayDir = path14.relative(cwd, cachedDir) || cachedDir;
|
|
5327
5712
|
console.log(`Auto-detected last run dir for --${flagLabel}: ${displayDir}`);
|
|
5328
5713
|
}
|
|
5329
5714
|
}
|
|
@@ -5332,8 +5717,8 @@ async function runEvalCommand(input) {
|
|
|
5332
5717
|
if (options.resume && !options.retryErrors) {
|
|
5333
5718
|
const explicitResumeDir = options.outputDir;
|
|
5334
5719
|
if (explicitResumeDir) {
|
|
5335
|
-
const resumeIndexPath =
|
|
5336
|
-
if (
|
|
5720
|
+
const resumeIndexPath = path14.join(path14.resolve(explicitResumeDir), "index.jsonl");
|
|
5721
|
+
if (existsSync7(resumeIndexPath)) {
|
|
5337
5722
|
const content = await readFile8(resumeIndexPath, "utf8");
|
|
5338
5723
|
const existingResults = parseJsonlResults(content);
|
|
5339
5724
|
resumeSkipKeys = /* @__PURE__ */ new Set();
|
|
@@ -5357,7 +5742,7 @@ async function runEvalCommand(input) {
|
|
|
5357
5742
|
}
|
|
5358
5743
|
}
|
|
5359
5744
|
if (options.workspacePath) {
|
|
5360
|
-
const resolvedWorkspace =
|
|
5745
|
+
const resolvedWorkspace = path14.resolve(options.workspacePath);
|
|
5361
5746
|
try {
|
|
5362
5747
|
const { stat: stat3 } = await import("node:fs/promises");
|
|
5363
5748
|
const stats = await stat3(resolvedWorkspace);
|
|
@@ -5379,27 +5764,35 @@ async function runEvalCommand(input) {
|
|
|
5379
5764
|
let runDir;
|
|
5380
5765
|
let outputPath;
|
|
5381
5766
|
if (explicitDir) {
|
|
5382
|
-
runDir =
|
|
5767
|
+
runDir = path14.resolve(explicitDir);
|
|
5383
5768
|
mkdirSync2(runDir, { recursive: true });
|
|
5384
|
-
outputPath =
|
|
5769
|
+
outputPath = path14.join(runDir, "index.jsonl");
|
|
5385
5770
|
} else {
|
|
5386
5771
|
outputPath = buildDefaultOutputPathForExperiment(cwd, options.experiment);
|
|
5387
|
-
runDir =
|
|
5772
|
+
runDir = path14.dirname(outputPath);
|
|
5388
5773
|
}
|
|
5389
5774
|
let otelExporter = null;
|
|
5390
5775
|
const useFileExport = !!options.otelFile;
|
|
5391
5776
|
if (options.exportOtel || useFileExport) {
|
|
5392
5777
|
try {
|
|
5393
|
-
const { OtelTraceExporter
|
|
5778
|
+
const { OtelTraceExporter } = await import("./dist-6Z4OSITR.js");
|
|
5394
5779
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
5395
5780
|
let headers = {};
|
|
5781
|
+
let resourceAttributes = {};
|
|
5396
5782
|
if (options.otelBackend) {
|
|
5397
|
-
const
|
|
5398
|
-
|
|
5399
|
-
|
|
5400
|
-
|
|
5783
|
+
const resolvedBackend = await resolveOtelBackend(options.otelBackend, {
|
|
5784
|
+
env: process.env,
|
|
5785
|
+
cwd
|
|
5786
|
+
});
|
|
5787
|
+
if (resolvedBackend) {
|
|
5788
|
+
endpoint = resolvedBackend.endpoint;
|
|
5789
|
+
headers = { ...headers, ...resolvedBackend.headers };
|
|
5790
|
+
resourceAttributes = { ...resourceAttributes, ...resolvedBackend.resourceAttributes };
|
|
5791
|
+
for (const warning of resolvedBackend.warnings ?? []) {
|
|
5792
|
+
console.warn(warning);
|
|
5793
|
+
}
|
|
5401
5794
|
} else {
|
|
5402
|
-
console.warn(`Unknown OTel backend
|
|
5795
|
+
console.warn(`Unknown OTel backend resolver: ${options.otelBackend}`);
|
|
5403
5796
|
}
|
|
5404
5797
|
}
|
|
5405
5798
|
if (process.env.OTEL_EXPORTER_OTLP_HEADERS) {
|
|
@@ -5412,9 +5805,10 @@ async function runEvalCommand(input) {
|
|
|
5412
5805
|
otelExporter = new OtelTraceExporter({
|
|
5413
5806
|
endpoint,
|
|
5414
5807
|
headers,
|
|
5808
|
+
resourceAttributes,
|
|
5415
5809
|
captureContent,
|
|
5416
5810
|
groupTurns: options.otelGroupTurns,
|
|
5417
|
-
otlpFilePath: options.otelFile ?
|
|
5811
|
+
otlpFilePath: options.otelFile ? path14.resolve(options.otelFile) : void 0
|
|
5418
5812
|
});
|
|
5419
5813
|
const initialized = await otelExporter.init();
|
|
5420
5814
|
if (!initialized) {
|
|
@@ -5432,9 +5826,9 @@ async function runEvalCommand(input) {
|
|
|
5432
5826
|
}
|
|
5433
5827
|
const primaryWritePath = outputPath;
|
|
5434
5828
|
console.log(`Artifact directory: ${runDir}`);
|
|
5435
|
-
const resolvedTestFiles = input.testFiles.map((file) =>
|
|
5829
|
+
const resolvedTestFiles = input.testFiles.map((file) => path14.resolve(file));
|
|
5436
5830
|
if (options.otelFile) {
|
|
5437
|
-
console.log(`OTLP JSON file: ${
|
|
5831
|
+
console.log(`OTLP JSON file: ${path14.resolve(options.otelFile)}`);
|
|
5438
5832
|
}
|
|
5439
5833
|
const evaluationRunner = await resolveEvaluationRunner();
|
|
5440
5834
|
const allResults = [];
|
|
@@ -5462,7 +5856,7 @@ async function runEvalCommand(input) {
|
|
|
5462
5856
|
for (const [testFilePath, meta] of fileMetadata.entries()) {
|
|
5463
5857
|
if (!matchesTagFilters(meta.tags, options.tags, options.excludeTags)) {
|
|
5464
5858
|
fileMetadata.delete(testFilePath);
|
|
5465
|
-
skippedFiles.push(
|
|
5859
|
+
skippedFiles.push(path14.relative(cwd, testFilePath));
|
|
5466
5860
|
}
|
|
5467
5861
|
}
|
|
5468
5862
|
if (skippedFiles.length > 0 && options.verbose) {
|
|
@@ -5485,12 +5879,12 @@ async function runEvalCommand(input) {
|
|
|
5485
5879
|
tsConfigCache: options.tsConfigCache
|
|
5486
5880
|
});
|
|
5487
5881
|
const activeCachePath = options.cachePath ?? yamlCachePath ?? options.tsConfigCachePath;
|
|
5488
|
-
const cache = cacheEnabled ? new ResponseCache(activeCachePath ?
|
|
5882
|
+
const cache = cacheEnabled ? new ResponseCache(activeCachePath ? path14.resolve(activeCachePath) : void 0) : void 0;
|
|
5489
5883
|
if (cache) {
|
|
5490
5884
|
console.log(`Response cache: enabled (${cache.cachePath})`);
|
|
5491
5885
|
}
|
|
5492
5886
|
if (options.recordReplay) {
|
|
5493
|
-
console.log(`Replay recording: ${
|
|
5887
|
+
console.log(`Replay recording: ${path14.resolve(options.recordReplay)}`);
|
|
5494
5888
|
}
|
|
5495
5889
|
const yamlThreshold = firstMeta?.threshold;
|
|
5496
5890
|
const resolvedThreshold = options.threshold ?? yamlThreshold;
|
|
@@ -5586,7 +5980,7 @@ async function runEvalCommand(input) {
|
|
|
5586
5980
|
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
|
|
5587
5981
|
let transcriptProviderFactory;
|
|
5588
5982
|
if (options.transcript) {
|
|
5589
|
-
const { TranscriptProvider } = await import("./dist-
|
|
5983
|
+
const { TranscriptProvider } = await import("./dist-6Z4OSITR.js");
|
|
5590
5984
|
const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
|
|
5591
5985
|
const totalTests = [...fileMetadata.values()].reduce(
|
|
5592
5986
|
(sum, meta) => sum + meta.testCases.length,
|
|
@@ -5614,7 +6008,11 @@ async function runEvalCommand(input) {
|
|
|
5614
6008
|
let wipCleanedUp = false;
|
|
5615
6009
|
let finalExportStatus = "disabled";
|
|
5616
6010
|
{
|
|
5617
|
-
const wipConfig = await loadNormalizedResultsConfig(
|
|
6011
|
+
const wipConfig = await loadNormalizedResultsConfig(
|
|
6012
|
+
cwd,
|
|
6013
|
+
void 0,
|
|
6014
|
+
options.resultsOverrides
|
|
6015
|
+
).catch(() => void 0);
|
|
5618
6016
|
if (wipConfig?.auto_push) {
|
|
5619
6017
|
wipLoop = new WipCheckpointLoop({
|
|
5620
6018
|
config: wipConfig,
|
|
@@ -5631,7 +6029,7 @@ async function runEvalCommand(input) {
|
|
|
5631
6029
|
if (!targetPrep2) continue;
|
|
5632
6030
|
const budgetMsg = `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`;
|
|
5633
6031
|
console.log(`
|
|
5634
|
-
\u26A0 ${budgetMsg} \u2014 skipping ${
|
|
6032
|
+
\u26A0 ${budgetMsg} \u2014 skipping ${path14.basename(testFilePath)}`);
|
|
5635
6033
|
for (const { selection } of targetPrep2.selections) {
|
|
5636
6034
|
const skippedResults = targetPrep2.testCases.map((testCase) => ({
|
|
5637
6035
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -5710,7 +6108,7 @@ async function runEvalCommand(input) {
|
|
|
5710
6108
|
threshold: resolvedThreshold,
|
|
5711
6109
|
providerFactory: transcriptProviderFactory ?? targetPrep.providerFactory
|
|
5712
6110
|
});
|
|
5713
|
-
const evalFile =
|
|
6111
|
+
const evalFile = path14.relative(cwd, testFilePath);
|
|
5714
6112
|
const existingSummary = remoteEvalSummaries.find(
|
|
5715
6113
|
(summary2) => summary2.evalFile === evalFile
|
|
5716
6114
|
);
|
|
@@ -5727,7 +6125,7 @@ async function runEvalCommand(input) {
|
|
|
5727
6125
|
const message = fileError instanceof Error ? fileError.message : String(fileError);
|
|
5728
6126
|
console.error(
|
|
5729
6127
|
`
|
|
5730
|
-
[ERROR] \u26A0 Eval file failed: ${
|
|
6128
|
+
[ERROR] \u26A0 Eval file failed: ${path14.basename(testFilePath)} \u2014 ${message}
|
|
5731
6129
|
`
|
|
5732
6130
|
);
|
|
5733
6131
|
const errorResults = filteredTestCases.map(
|
|
@@ -5806,7 +6204,7 @@ ${formatRequiredVersionFailureNote(requiredVersionCheck)}`);
|
|
|
5806
6204
|
);
|
|
5807
6205
|
const taskBundleTargets = buildTaskBundleTargetSelections(activeTestFiles, fileMetadata);
|
|
5808
6206
|
if (isResumeAppend) {
|
|
5809
|
-
const { writePerTestArtifacts } = await import("./artifact-writer-
|
|
6207
|
+
const { writePerTestArtifacts } = await import("./artifact-writer-GIAIMGPQ.js");
|
|
5810
6208
|
await writePerTestArtifacts(allResults, runDir, {
|
|
5811
6209
|
experiment: normalizeExperimentName(options.experiment),
|
|
5812
6210
|
cwd,
|
|
@@ -5818,7 +6216,7 @@ ${formatRequiredVersionFailureNote(requiredVersionCheck)}`);
|
|
|
5818
6216
|
runDir,
|
|
5819
6217
|
{ evalFile, experiment: normalizeExperimentName(options.experiment) }
|
|
5820
6218
|
);
|
|
5821
|
-
const indexPath =
|
|
6219
|
+
const indexPath = path14.join(runDir, "index.jsonl");
|
|
5822
6220
|
console.log(`Artifact workspace updated: ${runDir}`);
|
|
5823
6221
|
console.log(` Index: ${indexPath}`);
|
|
5824
6222
|
console.log(` Per-test artifacts: ${runDir} (${allResults.length} new test directories)`);
|
|
@@ -5879,13 +6277,14 @@ Results written to: ${outputPath}`);
|
|
|
5879
6277
|
status: result.executionStatus === "execution_error" || result.error ? "ERROR" : result.score >= DEFAULT_THRESHOLD ? "PASS" : "FAIL"
|
|
5880
6278
|
}))
|
|
5881
6279
|
})),
|
|
5882
|
-
experiment: normalizeExperimentName(options.experiment)
|
|
6280
|
+
experiment: normalizeExperimentName(options.experiment),
|
|
6281
|
+
results_overrides: options.resultsOverrides
|
|
5883
6282
|
});
|
|
5884
6283
|
}
|
|
5885
6284
|
if (summary.executionErrorCount > 0 && !options.retryErrors && !options.resume) {
|
|
5886
|
-
const evalFileArgs = activeTestFiles.map((f) =>
|
|
6285
|
+
const evalFileArgs = activeTestFiles.map((f) => path14.relative(cwd, f)).join(" ");
|
|
5887
6286
|
const targetFlag = options.target ? ` --target ${options.target}` : "";
|
|
5888
|
-
const relativeRunDir =
|
|
6287
|
+
const relativeRunDir = path14.relative(cwd, runDir);
|
|
5889
6288
|
console.log(
|
|
5890
6289
|
`
|
|
5891
6290
|
Tip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed tests with:
|
|
@@ -5934,8 +6333,8 @@ async function resolveEvaluationRunner() {
|
|
|
5934
6333
|
if (!overridePath) {
|
|
5935
6334
|
return runEvaluation;
|
|
5936
6335
|
}
|
|
5937
|
-
const resolved =
|
|
5938
|
-
const moduleUrl =
|
|
6336
|
+
const resolved = path14.isAbsolute(overridePath) ? overridePath : path14.resolve(process.cwd(), overridePath);
|
|
6337
|
+
const moduleUrl = pathToFileURL2(resolved).href;
|
|
5939
6338
|
const mod = await import(moduleUrl);
|
|
5940
6339
|
const candidate = mod.runEvaluation;
|
|
5941
6340
|
if (typeof candidate !== "function") {
|
|
@@ -5947,11 +6346,11 @@ async function resolveEvaluationRunner() {
|
|
|
5947
6346
|
}
|
|
5948
6347
|
|
|
5949
6348
|
// src/commands/eval/discover.ts
|
|
5950
|
-
import
|
|
6349
|
+
import path15 from "node:path";
|
|
5951
6350
|
import fg2 from "fast-glob";
|
|
5952
6351
|
async function discoverEvalFiles(cwd) {
|
|
5953
6352
|
const repoRoot = await findRepoRoot(cwd);
|
|
5954
|
-
const config = await loadConfig(
|
|
6353
|
+
const config = await loadConfig(path15.join(cwd, "_"), repoRoot);
|
|
5955
6354
|
const patterns = config?.eval_patterns && config.eval_patterns.length > 0 ? config.eval_patterns : DEFAULT_EVAL_PATTERNS;
|
|
5956
6355
|
const ignore = ["**/node_modules/**", "**/dist/**"];
|
|
5957
6356
|
const matches = await fg2(patterns, {
|
|
@@ -5963,7 +6362,7 @@ async function discoverEvalFiles(cwd) {
|
|
|
5963
6362
|
caseSensitiveMatch: false
|
|
5964
6363
|
});
|
|
5965
6364
|
const evalFiles = matches.map((absPath) => {
|
|
5966
|
-
const relativePath =
|
|
6365
|
+
const relativePath = path15.relative(cwd, absPath);
|
|
5967
6366
|
const category = deriveCategory(relativePath);
|
|
5968
6367
|
return { path: absPath, relativePath, category };
|
|
5969
6368
|
});
|
|
@@ -5987,6 +6386,10 @@ export {
|
|
|
5987
6386
|
resolveEvalPaths,
|
|
5988
6387
|
findRepoRoot,
|
|
5989
6388
|
package_default,
|
|
6389
|
+
RESULT_INDEX_FILENAME,
|
|
6390
|
+
createRunDirName,
|
|
6391
|
+
buildDefaultRunDir,
|
|
6392
|
+
resolveRunManifestPath,
|
|
5990
6393
|
parseResultManifest,
|
|
5991
6394
|
resolveResultSourcePath,
|
|
5992
6395
|
loadManifestResults,
|
|
@@ -6036,4 +6439,4 @@ export {
|
|
|
6036
6439
|
getCategories,
|
|
6037
6440
|
filterByCategory
|
|
6038
6441
|
};
|
|
6039
|
-
//# sourceMappingURL=chunk-
|
|
6442
|
+
//# sourceMappingURL=chunk-WKA5QDNQ.js.map
|