@agentv/core 4.3.4 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +192 -58
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +60 -1
- package/dist/index.d.ts +60 -1
- package/dist/index.js +177 -53
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1358,13 +1358,13 @@ function serializeAttributeValue(value) {
|
|
|
1358
1358
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1359
1359
|
return { stringValue: String(value) };
|
|
1360
1360
|
}
|
|
1361
|
-
var import_promises35,
|
|
1361
|
+
var import_promises35, import_node_path52, OtlpJsonFileExporter;
|
|
1362
1362
|
var init_otlp_json_file_exporter = __esm({
|
|
1363
1363
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1364
1364
|
"use strict";
|
|
1365
1365
|
init_cjs_shims();
|
|
1366
1366
|
import_promises35 = require("fs/promises");
|
|
1367
|
-
|
|
1367
|
+
import_node_path52 = require("path");
|
|
1368
1368
|
OtlpJsonFileExporter = class {
|
|
1369
1369
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1370
1370
|
spans = [];
|
|
@@ -1403,7 +1403,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1403
1403
|
}
|
|
1404
1404
|
async flush() {
|
|
1405
1405
|
if (this.spans.length === 0) return;
|
|
1406
|
-
await (0, import_promises35.mkdir)((0,
|
|
1406
|
+
await (0, import_promises35.mkdir)((0, import_node_path52.dirname)(this.filePath), { recursive: true });
|
|
1407
1407
|
const otlpJson = {
|
|
1408
1408
|
resourceSpans: [
|
|
1409
1409
|
{
|
|
@@ -1458,6 +1458,7 @@ __export(index_exports, {
|
|
|
1458
1458
|
ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
|
|
1459
1459
|
WorkspaceCreationError: () => WorkspaceCreationError,
|
|
1460
1460
|
WorkspacePoolManager: () => WorkspacePoolManager,
|
|
1461
|
+
addProject: () => addProject,
|
|
1461
1462
|
assembleLlmGraderPrompt: () => assembleLlmGraderPrompt,
|
|
1462
1463
|
assembleLlmJudgePrompt: () => assembleLlmGraderPrompt,
|
|
1463
1464
|
avgToolDurationMs: () => avgToolDurationMs,
|
|
@@ -1487,11 +1488,13 @@ __export(index_exports, {
|
|
|
1487
1488
|
deepEqual: () => deepEqual,
|
|
1488
1489
|
defineConfig: () => defineConfig,
|
|
1489
1490
|
deriveCategory: () => deriveCategory,
|
|
1491
|
+
deriveProjectId: () => deriveProjectId,
|
|
1490
1492
|
detectFormat: () => detectFormat,
|
|
1491
1493
|
discoverAssertions: () => discoverAssertions,
|
|
1492
1494
|
discoverCopilotSessions: () => discoverCopilotSessions,
|
|
1493
1495
|
discoverGraders: () => discoverGraders,
|
|
1494
1496
|
discoverJudges: () => discoverGraders,
|
|
1497
|
+
discoverProjects: () => discoverProjects,
|
|
1495
1498
|
discoverProviders: () => discoverProviders,
|
|
1496
1499
|
ensureVSCodeSubagents: () => ensureVSCodeSubagents,
|
|
1497
1500
|
evaluate: () => evaluate,
|
|
@@ -1515,6 +1518,8 @@ __export(index_exports, {
|
|
|
1515
1518
|
generateRubrics: () => generateRubrics,
|
|
1516
1519
|
getAgentvHome: () => getAgentvHome,
|
|
1517
1520
|
getOutputFilenames: () => getOutputFilenames,
|
|
1521
|
+
getProject: () => getProject,
|
|
1522
|
+
getProjectsRegistryPath: () => getProjectsRegistryPath,
|
|
1518
1523
|
getSubagentsRoot: () => getSubagentsRoot,
|
|
1519
1524
|
getTextContent: () => getTextContent,
|
|
1520
1525
|
getTraceStateRoot: () => getTraceStateRoot,
|
|
@@ -1536,6 +1541,7 @@ __export(index_exports, {
|
|
|
1536
1541
|
loadEvalCaseById: () => loadEvalCaseById,
|
|
1537
1542
|
loadEvalCases: () => loadEvalCases,
|
|
1538
1543
|
loadEvalSuite: () => loadEvalSuite,
|
|
1544
|
+
loadProjectRegistry: () => loadProjectRegistry,
|
|
1539
1545
|
loadTestById: () => loadTestById,
|
|
1540
1546
|
loadTestSuite: () => loadTestSuite,
|
|
1541
1547
|
loadTests: () => loadTests,
|
|
@@ -1550,6 +1556,7 @@ __export(index_exports, {
|
|
|
1550
1556
|
readTargetDefinitions: () => readTargetDefinitions,
|
|
1551
1557
|
readTestSuiteMetadata: () => readTestSuiteMetadata,
|
|
1552
1558
|
readTextFile: () => readTextFile,
|
|
1559
|
+
removeProject: () => removeProject,
|
|
1553
1560
|
resolveAndCreateProvider: () => resolveAndCreateProvider,
|
|
1554
1561
|
resolveFileReference: () => resolveFileReference3,
|
|
1555
1562
|
resolveTargetDefinition: () => resolveTargetDefinition,
|
|
@@ -1568,6 +1575,7 @@ __export(index_exports, {
|
|
|
1568
1575
|
runIsJsonAssertion: () => runIsJsonAssertion,
|
|
1569
1576
|
runRegexAssertion: () => runRegexAssertion,
|
|
1570
1577
|
runStartsWithAssertion: () => runStartsWithAssertion,
|
|
1578
|
+
saveProjectRegistry: () => saveProjectRegistry,
|
|
1571
1579
|
scoreToVerdict: () => scoreToVerdict,
|
|
1572
1580
|
shouldEnableCache: () => shouldEnableCache,
|
|
1573
1581
|
shouldSkipCacheForTemperature: () => shouldSkipCacheForTemperature,
|
|
@@ -1580,6 +1588,7 @@ __export(index_exports, {
|
|
|
1580
1588
|
toCamelCaseDeep: () => toCamelCaseDeep,
|
|
1581
1589
|
toSnakeCaseDeep: () => toSnakeCaseDeep,
|
|
1582
1590
|
tokensPerTool: () => tokensPerTool,
|
|
1591
|
+
touchProject: () => touchProject,
|
|
1583
1592
|
transpileEvalYaml: () => transpileEvalYaml,
|
|
1584
1593
|
transpileEvalYamlFile: () => transpileEvalYamlFile,
|
|
1585
1594
|
trimBaselineResult: () => trimBaselineResult
|
|
@@ -11556,8 +11565,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
11556
11565
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
11557
11566
|
if (!parseResult.success) {
|
|
11558
11567
|
const firstError = parseResult.error.errors[0];
|
|
11559
|
-
const
|
|
11560
|
-
const prefix =
|
|
11568
|
+
const path51 = firstError?.path.join(".") || "";
|
|
11569
|
+
const prefix = path51 ? `${target.name} ${path51}: ` : `${target.name}: `;
|
|
11561
11570
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
11562
11571
|
}
|
|
11563
11572
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -13622,13 +13631,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
13622
13631
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
13623
13632
|
const { mkdir: mkdir17, readFile: readFile16, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
13624
13633
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
13625
|
-
const
|
|
13634
|
+
const path51 = await import("path");
|
|
13626
13635
|
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
13627
|
-
const dir =
|
|
13636
|
+
const dir = path51.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
13628
13637
|
await mkdir17(dir, { recursive: true });
|
|
13629
|
-
const stdinPath =
|
|
13630
|
-
const stdoutPath =
|
|
13631
|
-
const stderrPath =
|
|
13638
|
+
const stdinPath = path51.join(dir, "stdin.txt");
|
|
13639
|
+
const stdoutPath = path51.join(dir, "stdout.txt");
|
|
13640
|
+
const stderrPath = path51.join(dir, "stderr.txt");
|
|
13632
13641
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
13633
13642
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
13634
13643
|
const { spawn: spawn5 } = await import("child_process");
|
|
@@ -15848,115 +15857,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
15848
15857
|
* Evaluate a single field against the expected value.
|
|
15849
15858
|
*/
|
|
15850
15859
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
15851
|
-
const { path:
|
|
15852
|
-
const candidateValue = resolvePath(candidateData,
|
|
15853
|
-
const expectedValue = resolvePath(expectedData,
|
|
15860
|
+
const { path: path51, match, required = true, weight = 1 } = fieldConfig;
|
|
15861
|
+
const candidateValue = resolvePath(candidateData, path51);
|
|
15862
|
+
const expectedValue = resolvePath(expectedData, path51);
|
|
15854
15863
|
if (expectedValue === void 0) {
|
|
15855
15864
|
return {
|
|
15856
|
-
path:
|
|
15865
|
+
path: path51,
|
|
15857
15866
|
score: 1,
|
|
15858
15867
|
// No expected value means no comparison needed
|
|
15859
15868
|
weight,
|
|
15860
15869
|
hit: true,
|
|
15861
|
-
message: `${
|
|
15870
|
+
message: `${path51}: no expected value`
|
|
15862
15871
|
};
|
|
15863
15872
|
}
|
|
15864
15873
|
if (candidateValue === void 0) {
|
|
15865
15874
|
if (required) {
|
|
15866
15875
|
return {
|
|
15867
|
-
path:
|
|
15876
|
+
path: path51,
|
|
15868
15877
|
score: 0,
|
|
15869
15878
|
weight,
|
|
15870
15879
|
hit: false,
|
|
15871
|
-
message: `${
|
|
15880
|
+
message: `${path51} (required, missing)`
|
|
15872
15881
|
};
|
|
15873
15882
|
}
|
|
15874
15883
|
return {
|
|
15875
|
-
path:
|
|
15884
|
+
path: path51,
|
|
15876
15885
|
score: 1,
|
|
15877
15886
|
// Don't penalize missing optional fields
|
|
15878
15887
|
weight: 0,
|
|
15879
15888
|
// Zero weight means it won't affect the score
|
|
15880
15889
|
hit: true,
|
|
15881
|
-
message: `${
|
|
15890
|
+
message: `${path51}: optional field missing`
|
|
15882
15891
|
};
|
|
15883
15892
|
}
|
|
15884
15893
|
switch (match) {
|
|
15885
15894
|
case "exact":
|
|
15886
|
-
return this.compareExact(
|
|
15895
|
+
return this.compareExact(path51, candidateValue, expectedValue, weight);
|
|
15887
15896
|
case "numeric_tolerance":
|
|
15888
15897
|
return this.compareNumericTolerance(
|
|
15889
|
-
|
|
15898
|
+
path51,
|
|
15890
15899
|
candidateValue,
|
|
15891
15900
|
expectedValue,
|
|
15892
15901
|
fieldConfig,
|
|
15893
15902
|
weight
|
|
15894
15903
|
);
|
|
15895
15904
|
case "date":
|
|
15896
|
-
return this.compareDate(
|
|
15905
|
+
return this.compareDate(path51, candidateValue, expectedValue, fieldConfig, weight);
|
|
15897
15906
|
default:
|
|
15898
15907
|
return {
|
|
15899
|
-
path:
|
|
15908
|
+
path: path51,
|
|
15900
15909
|
score: 0,
|
|
15901
15910
|
weight,
|
|
15902
15911
|
hit: false,
|
|
15903
|
-
message: `${
|
|
15912
|
+
message: `${path51}: unknown match type "${match}"`
|
|
15904
15913
|
};
|
|
15905
15914
|
}
|
|
15906
15915
|
}
|
|
15907
15916
|
/**
|
|
15908
15917
|
* Exact equality comparison.
|
|
15909
15918
|
*/
|
|
15910
|
-
compareExact(
|
|
15919
|
+
compareExact(path51, candidateValue, expectedValue, weight) {
|
|
15911
15920
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
15912
15921
|
return {
|
|
15913
|
-
path:
|
|
15922
|
+
path: path51,
|
|
15914
15923
|
score: 1,
|
|
15915
15924
|
weight,
|
|
15916
15925
|
hit: true,
|
|
15917
|
-
message:
|
|
15926
|
+
message: path51
|
|
15918
15927
|
};
|
|
15919
15928
|
}
|
|
15920
15929
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
15921
15930
|
return {
|
|
15922
|
-
path:
|
|
15931
|
+
path: path51,
|
|
15923
15932
|
score: 0,
|
|
15924
15933
|
weight,
|
|
15925
15934
|
hit: false,
|
|
15926
|
-
message: `${
|
|
15935
|
+
message: `${path51} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
15927
15936
|
};
|
|
15928
15937
|
}
|
|
15929
15938
|
return {
|
|
15930
|
-
path:
|
|
15939
|
+
path: path51,
|
|
15931
15940
|
score: 0,
|
|
15932
15941
|
weight,
|
|
15933
15942
|
hit: false,
|
|
15934
|
-
message: `${
|
|
15943
|
+
message: `${path51} (value mismatch)`
|
|
15935
15944
|
};
|
|
15936
15945
|
}
|
|
15937
15946
|
/**
|
|
15938
15947
|
* Numeric comparison with absolute or relative tolerance.
|
|
15939
15948
|
*/
|
|
15940
|
-
compareNumericTolerance(
|
|
15949
|
+
compareNumericTolerance(path51, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15941
15950
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
15942
15951
|
const candidateNum = toNumber(candidateValue);
|
|
15943
15952
|
const expectedNum = toNumber(expectedValue);
|
|
15944
15953
|
if (candidateNum === null || expectedNum === null) {
|
|
15945
15954
|
return {
|
|
15946
|
-
path:
|
|
15955
|
+
path: path51,
|
|
15947
15956
|
score: 0,
|
|
15948
15957
|
weight,
|
|
15949
15958
|
hit: false,
|
|
15950
|
-
message: `${
|
|
15959
|
+
message: `${path51} (non-numeric value)`
|
|
15951
15960
|
};
|
|
15952
15961
|
}
|
|
15953
15962
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
15954
15963
|
return {
|
|
15955
|
-
path:
|
|
15964
|
+
path: path51,
|
|
15956
15965
|
score: 0,
|
|
15957
15966
|
weight,
|
|
15958
15967
|
hit: false,
|
|
15959
|
-
message: `${
|
|
15968
|
+
message: `${path51} (invalid numeric value)`
|
|
15960
15969
|
};
|
|
15961
15970
|
}
|
|
15962
15971
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -15969,61 +15978,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
15969
15978
|
}
|
|
15970
15979
|
if (withinTolerance) {
|
|
15971
15980
|
return {
|
|
15972
|
-
path:
|
|
15981
|
+
path: path51,
|
|
15973
15982
|
score: 1,
|
|
15974
15983
|
weight,
|
|
15975
15984
|
hit: true,
|
|
15976
|
-
message: `${
|
|
15985
|
+
message: `${path51} (within tolerance: diff=${diff.toFixed(2)})`
|
|
15977
15986
|
};
|
|
15978
15987
|
}
|
|
15979
15988
|
return {
|
|
15980
|
-
path:
|
|
15989
|
+
path: path51,
|
|
15981
15990
|
score: 0,
|
|
15982
15991
|
weight,
|
|
15983
15992
|
hit: false,
|
|
15984
|
-
message: `${
|
|
15993
|
+
message: `${path51} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
15985
15994
|
};
|
|
15986
15995
|
}
|
|
15987
15996
|
/**
|
|
15988
15997
|
* Date comparison with format normalization.
|
|
15989
15998
|
*/
|
|
15990
|
-
compareDate(
|
|
15999
|
+
compareDate(path51, candidateValue, expectedValue, fieldConfig, weight) {
|
|
15991
16000
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
15992
16001
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
15993
16002
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
15994
16003
|
if (candidateDate === null) {
|
|
15995
16004
|
return {
|
|
15996
|
-
path:
|
|
16005
|
+
path: path51,
|
|
15997
16006
|
score: 0,
|
|
15998
16007
|
weight,
|
|
15999
16008
|
hit: false,
|
|
16000
|
-
message: `${
|
|
16009
|
+
message: `${path51} (unparseable candidate date)`
|
|
16001
16010
|
};
|
|
16002
16011
|
}
|
|
16003
16012
|
if (expectedDate === null) {
|
|
16004
16013
|
return {
|
|
16005
|
-
path:
|
|
16014
|
+
path: path51,
|
|
16006
16015
|
score: 0,
|
|
16007
16016
|
weight,
|
|
16008
16017
|
hit: false,
|
|
16009
|
-
message: `${
|
|
16018
|
+
message: `${path51} (unparseable expected date)`
|
|
16010
16019
|
};
|
|
16011
16020
|
}
|
|
16012
16021
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
16013
16022
|
return {
|
|
16014
|
-
path:
|
|
16023
|
+
path: path51,
|
|
16015
16024
|
score: 1,
|
|
16016
16025
|
weight,
|
|
16017
16026
|
hit: true,
|
|
16018
|
-
message:
|
|
16027
|
+
message: path51
|
|
16019
16028
|
};
|
|
16020
16029
|
}
|
|
16021
16030
|
return {
|
|
16022
|
-
path:
|
|
16031
|
+
path: path51,
|
|
16023
16032
|
score: 0,
|
|
16024
16033
|
weight,
|
|
16025
16034
|
hit: false,
|
|
16026
|
-
message: `${
|
|
16035
|
+
message: `${path51} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
16027
16036
|
};
|
|
16028
16037
|
}
|
|
16029
16038
|
/**
|
|
@@ -16056,11 +16065,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
16056
16065
|
};
|
|
16057
16066
|
}
|
|
16058
16067
|
};
|
|
16059
|
-
function resolvePath(obj,
|
|
16060
|
-
if (!
|
|
16068
|
+
function resolvePath(obj, path51) {
|
|
16069
|
+
if (!path51 || !obj) {
|
|
16061
16070
|
return void 0;
|
|
16062
16071
|
}
|
|
16063
|
-
const parts =
|
|
16072
|
+
const parts = path51.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
16064
16073
|
let current = obj;
|
|
16065
16074
|
for (const part of parts) {
|
|
16066
16075
|
if (current === null || current === void 0) {
|
|
@@ -16549,8 +16558,8 @@ var TokenUsageEvaluator = class {
|
|
|
16549
16558
|
|
|
16550
16559
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
16551
16560
|
init_cjs_shims();
|
|
16552
|
-
function getNestedValue(obj,
|
|
16553
|
-
const parts =
|
|
16561
|
+
function getNestedValue(obj, path51) {
|
|
16562
|
+
const parts = path51.split(".");
|
|
16554
16563
|
let current = obj;
|
|
16555
16564
|
for (const part of parts) {
|
|
16556
16565
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -20817,7 +20826,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
20817
20826
|
return null;
|
|
20818
20827
|
}
|
|
20819
20828
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
20820
|
-
const { readFileSync:
|
|
20829
|
+
const { readFileSync: readFileSync3 } = await import("fs");
|
|
20821
20830
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
20822
20831
|
const envFiles = [];
|
|
20823
20832
|
for (const dir of chain) {
|
|
@@ -20826,7 +20835,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
20826
20835
|
}
|
|
20827
20836
|
for (let i = 0; i < envFiles.length; i++) {
|
|
20828
20837
|
try {
|
|
20829
|
-
const content =
|
|
20838
|
+
const content = readFileSync3(envFiles[i], "utf8");
|
|
20830
20839
|
for (const line of content.split("\n")) {
|
|
20831
20840
|
const trimmed = line.trim();
|
|
20832
20841
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -20899,12 +20908,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
20899
20908
|
".agentv/config.js"
|
|
20900
20909
|
];
|
|
20901
20910
|
async function loadTsConfig(projectRoot) {
|
|
20902
|
-
const { existsSync:
|
|
20911
|
+
const { existsSync: existsSync6 } = await import("fs");
|
|
20903
20912
|
const { pathToFileURL } = await import("url");
|
|
20904
20913
|
const { join: join2 } = await import("path");
|
|
20905
20914
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
20906
20915
|
const filePath = join2(projectRoot, fileName);
|
|
20907
|
-
if (!
|
|
20916
|
+
if (!existsSync6(filePath)) {
|
|
20908
20917
|
continue;
|
|
20909
20918
|
}
|
|
20910
20919
|
try {
|
|
@@ -21049,6 +21058,122 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
21049
21058
|
return false;
|
|
21050
21059
|
}
|
|
21051
21060
|
|
|
21061
|
+
// src/projects.ts
|
|
21062
|
+
init_cjs_shims();
|
|
21063
|
+
var import_node_fs16 = require("fs");
|
|
21064
|
+
var import_node_path51 = __toESM(require("path"), 1);
|
|
21065
|
+
var import_yaml8 = require("yaml");
|
|
21066
|
+
function getProjectsRegistryPath() {
|
|
21067
|
+
return import_node_path51.default.join(getAgentvHome(), "projects.yaml");
|
|
21068
|
+
}
|
|
21069
|
+
function loadProjectRegistry() {
|
|
21070
|
+
const registryPath = getProjectsRegistryPath();
|
|
21071
|
+
if (!(0, import_node_fs16.existsSync)(registryPath)) {
|
|
21072
|
+
return { projects: [] };
|
|
21073
|
+
}
|
|
21074
|
+
try {
|
|
21075
|
+
const raw = (0, import_node_fs16.readFileSync)(registryPath, "utf-8");
|
|
21076
|
+
const parsed = (0, import_yaml8.parse)(raw);
|
|
21077
|
+
if (!parsed || !Array.isArray(parsed.projects)) {
|
|
21078
|
+
return { projects: [] };
|
|
21079
|
+
}
|
|
21080
|
+
return { projects: parsed.projects };
|
|
21081
|
+
} catch {
|
|
21082
|
+
return { projects: [] };
|
|
21083
|
+
}
|
|
21084
|
+
}
|
|
21085
|
+
function saveProjectRegistry(registry) {
|
|
21086
|
+
const registryPath = getProjectsRegistryPath();
|
|
21087
|
+
const dir = import_node_path51.default.dirname(registryPath);
|
|
21088
|
+
if (!(0, import_node_fs16.existsSync)(dir)) {
|
|
21089
|
+
(0, import_node_fs16.mkdirSync)(dir, { recursive: true });
|
|
21090
|
+
}
|
|
21091
|
+
(0, import_node_fs16.writeFileSync)(registryPath, (0, import_yaml8.stringify)(registry), "utf-8");
|
|
21092
|
+
}
|
|
21093
|
+
function deriveProjectId(dirPath, existingIds) {
|
|
21094
|
+
const base = import_node_path51.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
21095
|
+
let candidate = base || "project";
|
|
21096
|
+
let suffix = 2;
|
|
21097
|
+
while (existingIds.includes(candidate)) {
|
|
21098
|
+
candidate = `${base}-${suffix}`;
|
|
21099
|
+
suffix++;
|
|
21100
|
+
}
|
|
21101
|
+
return candidate;
|
|
21102
|
+
}
|
|
21103
|
+
function addProject(projectPath) {
|
|
21104
|
+
const absPath = import_node_path51.default.resolve(projectPath);
|
|
21105
|
+
if (!(0, import_node_fs16.existsSync)(absPath)) {
|
|
21106
|
+
throw new Error(`Directory not found: ${absPath}`);
|
|
21107
|
+
}
|
|
21108
|
+
if (!(0, import_node_fs16.existsSync)(import_node_path51.default.join(absPath, ".agentv"))) {
|
|
21109
|
+
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
21110
|
+
}
|
|
21111
|
+
const registry = loadProjectRegistry();
|
|
21112
|
+
const existing = registry.projects.find((p) => p.path === absPath);
|
|
21113
|
+
if (existing) {
|
|
21114
|
+
return existing;
|
|
21115
|
+
}
|
|
21116
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
21117
|
+
const entry = {
|
|
21118
|
+
id: deriveProjectId(
|
|
21119
|
+
absPath,
|
|
21120
|
+
registry.projects.map((p) => p.id)
|
|
21121
|
+
),
|
|
21122
|
+
name: import_node_path51.default.basename(absPath),
|
|
21123
|
+
path: absPath,
|
|
21124
|
+
addedAt: now,
|
|
21125
|
+
lastOpenedAt: now
|
|
21126
|
+
};
|
|
21127
|
+
registry.projects.push(entry);
|
|
21128
|
+
saveProjectRegistry(registry);
|
|
21129
|
+
return entry;
|
|
21130
|
+
}
|
|
21131
|
+
function removeProject(projectId) {
|
|
21132
|
+
const registry = loadProjectRegistry();
|
|
21133
|
+
const idx = registry.projects.findIndex((p) => p.id === projectId);
|
|
21134
|
+
if (idx < 0) return false;
|
|
21135
|
+
registry.projects.splice(idx, 1);
|
|
21136
|
+
saveProjectRegistry(registry);
|
|
21137
|
+
return true;
|
|
21138
|
+
}
|
|
21139
|
+
function getProject(projectId) {
|
|
21140
|
+
return loadProjectRegistry().projects.find((p) => p.id === projectId);
|
|
21141
|
+
}
|
|
21142
|
+
function touchProject(projectId) {
|
|
21143
|
+
const registry = loadProjectRegistry();
|
|
21144
|
+
const entry = registry.projects.find((p) => p.id === projectId);
|
|
21145
|
+
if (entry) {
|
|
21146
|
+
entry.lastOpenedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
21147
|
+
saveProjectRegistry(registry);
|
|
21148
|
+
}
|
|
21149
|
+
}
|
|
21150
|
+
function discoverProjects(rootDir, maxDepth = 2) {
|
|
21151
|
+
const absRoot = import_node_path51.default.resolve(rootDir);
|
|
21152
|
+
if (!(0, import_node_fs16.existsSync)(absRoot) || !(0, import_node_fs16.statSync)(absRoot).isDirectory()) {
|
|
21153
|
+
return [];
|
|
21154
|
+
}
|
|
21155
|
+
const results = [];
|
|
21156
|
+
function scan(dir, depth) {
|
|
21157
|
+
if (depth > maxDepth) return;
|
|
21158
|
+
if ((0, import_node_fs16.existsSync)(import_node_path51.default.join(dir, ".agentv"))) {
|
|
21159
|
+
results.push(dir);
|
|
21160
|
+
return;
|
|
21161
|
+
}
|
|
21162
|
+
if (depth === maxDepth) return;
|
|
21163
|
+
try {
|
|
21164
|
+
const entries = (0, import_node_fs16.readdirSync)(dir, { withFileTypes: true });
|
|
21165
|
+
for (const entry of entries) {
|
|
21166
|
+
if (!entry.isDirectory()) continue;
|
|
21167
|
+
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
21168
|
+
scan(import_node_path51.default.join(dir, entry.name), depth + 1);
|
|
21169
|
+
}
|
|
21170
|
+
} catch {
|
|
21171
|
+
}
|
|
21172
|
+
}
|
|
21173
|
+
scan(absRoot, 0);
|
|
21174
|
+
return results;
|
|
21175
|
+
}
|
|
21176
|
+
|
|
21052
21177
|
// src/evaluation/baseline.ts
|
|
21053
21178
|
init_cjs_shims();
|
|
21054
21179
|
var STRIPPED_TOP_LEVEL_FIELDS = /* @__PURE__ */ new Set([
|
|
@@ -21649,6 +21774,7 @@ function createAgentKernel() {
|
|
|
21649
21774
|
ToolTrajectoryEvaluator,
|
|
21650
21775
|
WorkspaceCreationError,
|
|
21651
21776
|
WorkspacePoolManager,
|
|
21777
|
+
addProject,
|
|
21652
21778
|
assembleLlmGraderPrompt,
|
|
21653
21779
|
assembleLlmJudgePrompt,
|
|
21654
21780
|
avgToolDurationMs,
|
|
@@ -21678,11 +21804,13 @@ function createAgentKernel() {
|
|
|
21678
21804
|
deepEqual,
|
|
21679
21805
|
defineConfig,
|
|
21680
21806
|
deriveCategory,
|
|
21807
|
+
deriveProjectId,
|
|
21681
21808
|
detectFormat,
|
|
21682
21809
|
discoverAssertions,
|
|
21683
21810
|
discoverCopilotSessions,
|
|
21684
21811
|
discoverGraders,
|
|
21685
21812
|
discoverJudges,
|
|
21813
|
+
discoverProjects,
|
|
21686
21814
|
discoverProviders,
|
|
21687
21815
|
ensureVSCodeSubagents,
|
|
21688
21816
|
evaluate,
|
|
@@ -21706,6 +21834,8 @@ function createAgentKernel() {
|
|
|
21706
21834
|
generateRubrics,
|
|
21707
21835
|
getAgentvHome,
|
|
21708
21836
|
getOutputFilenames,
|
|
21837
|
+
getProject,
|
|
21838
|
+
getProjectsRegistryPath,
|
|
21709
21839
|
getSubagentsRoot,
|
|
21710
21840
|
getTextContent,
|
|
21711
21841
|
getTraceStateRoot,
|
|
@@ -21727,6 +21857,7 @@ function createAgentKernel() {
|
|
|
21727
21857
|
loadEvalCaseById,
|
|
21728
21858
|
loadEvalCases,
|
|
21729
21859
|
loadEvalSuite,
|
|
21860
|
+
loadProjectRegistry,
|
|
21730
21861
|
loadTestById,
|
|
21731
21862
|
loadTestSuite,
|
|
21732
21863
|
loadTests,
|
|
@@ -21741,6 +21872,7 @@ function createAgentKernel() {
|
|
|
21741
21872
|
readTargetDefinitions,
|
|
21742
21873
|
readTestSuiteMetadata,
|
|
21743
21874
|
readTextFile,
|
|
21875
|
+
removeProject,
|
|
21744
21876
|
resolveAndCreateProvider,
|
|
21745
21877
|
resolveFileReference,
|
|
21746
21878
|
resolveTargetDefinition,
|
|
@@ -21759,6 +21891,7 @@ function createAgentKernel() {
|
|
|
21759
21891
|
runIsJsonAssertion,
|
|
21760
21892
|
runRegexAssertion,
|
|
21761
21893
|
runStartsWithAssertion,
|
|
21894
|
+
saveProjectRegistry,
|
|
21762
21895
|
scoreToVerdict,
|
|
21763
21896
|
shouldEnableCache,
|
|
21764
21897
|
shouldSkipCacheForTemperature,
|
|
@@ -21771,6 +21904,7 @@ function createAgentKernel() {
|
|
|
21771
21904
|
toCamelCaseDeep,
|
|
21772
21905
|
toSnakeCaseDeep,
|
|
21773
21906
|
tokensPerTool,
|
|
21907
|
+
touchProject,
|
|
21774
21908
|
transpileEvalYaml,
|
|
21775
21909
|
transpileEvalYamlFile,
|
|
21776
21910
|
trimBaselineResult
|