agentv 3.14.5 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-UBLKP2F4.js → chunk-E3VSJJI4.js} +568 -74
- package/dist/chunk-E3VSJJI4.js.map +1 -0
- package/dist/{chunk-GUXXTOYK.js → chunk-OT2J474N.js} +44 -18
- package/dist/chunk-OT2J474N.js.map +1 -0
- package/dist/{chunk-ELQEFMGO.js → chunk-OXBBWZOY.js} +592 -295
- package/dist/chunk-OXBBWZOY.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-5EEXTTC3.js → dist-3Z22B6SU.js} +18 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-WUIEXGWM.js → interactive-D5UTP72M.js} +4 -11
- package/dist/interactive-D5UTP72M.js.map +1 -0
- package/dist/studio/assets/index-BuKVkxFj.css +1 -0
- package/dist/studio/assets/index-CE3-mmv0.js +11 -0
- package/dist/studio/assets/index-DBU720Fm.js +71 -0
- package/dist/studio/index.html +13 -0
- package/dist/templates/.env.example +0 -3
- package/package.json +1 -1
- package/dist/chunk-ELQEFMGO.js.map +0 -1
- package/dist/chunk-GUXXTOYK.js.map +0 -1
- package/dist/chunk-UBLKP2F4.js.map +0 -1
- package/dist/interactive-WUIEXGWM.js.map +0 -1
- /package/dist/{dist-5EEXTTC3.js.map → dist-3Z22B6SU.js.map} +0 -0
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
CLI_PLACEHOLDERS,
|
|
4
|
+
COMMON_TARGET_SETTINGS,
|
|
4
5
|
KNOWN_PROVIDERS,
|
|
5
6
|
PROVIDER_ALIASES,
|
|
6
7
|
ResponseCache,
|
|
7
8
|
buildDirectoryChain,
|
|
8
9
|
buildSearchRoots,
|
|
10
|
+
deriveCategory,
|
|
9
11
|
ensureVSCodeSubagents,
|
|
10
12
|
findGitRoot,
|
|
11
13
|
interpolateEnv,
|
|
@@ -27,12 +29,12 @@ import {
|
|
|
27
29
|
subscribeToCopilotCliLogEntries,
|
|
28
30
|
subscribeToCopilotSdkLogEntries,
|
|
29
31
|
subscribeToPiLogEntries
|
|
30
|
-
} from "./chunk-
|
|
32
|
+
} from "./chunk-OXBBWZOY.js";
|
|
31
33
|
|
|
32
34
|
// package.json
|
|
33
35
|
var package_default = {
|
|
34
36
|
name: "agentv",
|
|
35
|
-
version: "
|
|
37
|
+
version: "4.0.0",
|
|
36
38
|
description: "CLI entry point for AgentV",
|
|
37
39
|
type: "module",
|
|
38
40
|
repository: {
|
|
@@ -110,6 +112,27 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
110
112
|
results.add(candidatePath);
|
|
111
113
|
continue;
|
|
112
114
|
}
|
|
115
|
+
if (stats.isDirectory()) {
|
|
116
|
+
const dirGlob = path.posix.join(
|
|
117
|
+
candidatePath.replace(/\\/g, "/"),
|
|
118
|
+
"**/*.eval.{yaml,yml}"
|
|
119
|
+
);
|
|
120
|
+
const dirMatches = await fg(dirGlob, {
|
|
121
|
+
absolute: true,
|
|
122
|
+
onlyFiles: true,
|
|
123
|
+
unique: true,
|
|
124
|
+
dot: true,
|
|
125
|
+
followSymbolicLinks: true
|
|
126
|
+
});
|
|
127
|
+
if (dirMatches.length === 0) {
|
|
128
|
+
unmatched.push(pattern);
|
|
129
|
+
} else {
|
|
130
|
+
for (const filePath of dirMatches) {
|
|
131
|
+
results.add(path.normalize(filePath));
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
113
136
|
} catch {
|
|
114
137
|
}
|
|
115
138
|
const globPattern = pattern.includes("\\") ? pattern.replace(/\\/g, "/") : pattern;
|
|
@@ -304,7 +327,7 @@ import path3 from "node:path";
|
|
|
304
327
|
var RESULT_INDEX_FILENAME = "index.jsonl";
|
|
305
328
|
var RESULT_RUNS_DIRNAME = "runs";
|
|
306
329
|
function createRunDirName(timestamp = /* @__PURE__ */ new Date()) {
|
|
307
|
-
return
|
|
330
|
+
return timestamp.toISOString().replace(/[:.]/g, "-");
|
|
308
331
|
}
|
|
309
332
|
function buildDefaultRunDir(cwd) {
|
|
310
333
|
return path3.join(cwd, ".agentv", "results", RESULT_RUNS_DIRNAME, createRunDirName());
|
|
@@ -562,20 +585,16 @@ function safeArtifactPathSegment(value, fallback) {
|
|
|
562
585
|
function safeTestId(testId) {
|
|
563
586
|
return safeArtifactPathSegment(testId, "unknown");
|
|
564
587
|
}
|
|
565
|
-
function
|
|
566
|
-
return
|
|
567
|
-
}
|
|
568
|
-
function getEvalSet(result) {
|
|
569
|
-
const record = result;
|
|
570
|
-
return result.eval_set ?? record.evalSet;
|
|
588
|
+
function getDataset(result) {
|
|
589
|
+
return result.dataset;
|
|
571
590
|
}
|
|
572
591
|
function buildArtifactSubdir(result) {
|
|
573
592
|
const segments = [];
|
|
574
|
-
const evalSet =
|
|
593
|
+
const evalSet = getDataset(result);
|
|
575
594
|
if (evalSet) {
|
|
576
595
|
segments.push(safeArtifactPathSegment(evalSet, "default"));
|
|
577
596
|
}
|
|
578
|
-
segments.push(safeTestId(result.testId)
|
|
597
|
+
segments.push(safeTestId(result.testId));
|
|
579
598
|
return path4.posix.join(...segments);
|
|
580
599
|
}
|
|
581
600
|
function formatOutputMarkdown(output) {
|
|
@@ -598,7 +617,8 @@ function buildResultIndexArtifact(result) {
|
|
|
598
617
|
return {
|
|
599
618
|
timestamp: result.timestamp,
|
|
600
619
|
test_id: result.testId ?? "unknown",
|
|
601
|
-
|
|
620
|
+
dataset: getDataset(result),
|
|
621
|
+
category: result.category,
|
|
602
622
|
conversation_id: result.conversationId,
|
|
603
623
|
score: result.score,
|
|
604
624
|
target: result.target ?? "unknown",
|
|
@@ -1667,7 +1687,7 @@ var JunitWriter = class _JunitWriter {
|
|
|
1667
1687
|
this.closed = true;
|
|
1668
1688
|
const grouped = /* @__PURE__ */ new Map();
|
|
1669
1689
|
for (const result of this.results) {
|
|
1670
|
-
const suite = result.
|
|
1690
|
+
const suite = result.dataset ?? "default";
|
|
1671
1691
|
const existing = grouped.get(suite);
|
|
1672
1692
|
if (existing) {
|
|
1673
1693
|
existing.push(result);
|
|
@@ -1995,7 +2015,8 @@ function hydrateManifestRecord(baseDir, record) {
|
|
|
1995
2015
|
return {
|
|
1996
2016
|
timestamp: record.timestamp,
|
|
1997
2017
|
testId,
|
|
1998
|
-
|
|
2018
|
+
dataset: record.dataset,
|
|
2019
|
+
category: record.category,
|
|
1999
2020
|
target: record.target,
|
|
2000
2021
|
score: record.score,
|
|
2001
2022
|
executionStatus: record.execution_status,
|
|
@@ -2053,6 +2074,7 @@ function loadLightweightResults(sourceFile) {
|
|
|
2053
2074
|
return parseResultManifest(content).map((record) => ({
|
|
2054
2075
|
testId: record.test_id ?? record.eval_id ?? "unknown",
|
|
2055
2076
|
target: record.target,
|
|
2077
|
+
experiment: record.experiment,
|
|
2056
2078
|
score: record.score,
|
|
2057
2079
|
scores: record.scores,
|
|
2058
2080
|
executionStatus: record.execution_status,
|
|
@@ -3020,7 +3042,7 @@ function validateContentForRoleMarkers(content, location, filePath, errors) {
|
|
|
3020
3042
|
function isObject2(value) {
|
|
3021
3043
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
3022
3044
|
}
|
|
3023
|
-
var COMMON_SETTINGS =
|
|
3045
|
+
var COMMON_SETTINGS = new Set(COMMON_TARGET_SETTINGS);
|
|
3024
3046
|
var RETRY_SETTINGS = /* @__PURE__ */ new Set([
|
|
3025
3047
|
"max_retries",
|
|
3026
3048
|
"maxRetries",
|
|
@@ -4140,9 +4162,12 @@ async function prepareFileMetadata(params) {
|
|
|
4140
4162
|
repoRoot,
|
|
4141
4163
|
verbose: options.verbose
|
|
4142
4164
|
});
|
|
4165
|
+
const relativePath = path15.relative(cwd, testFilePath);
|
|
4166
|
+
const category = deriveCategory(relativePath);
|
|
4143
4167
|
const suite = await loadTestSuite(testFilePath, repoRoot, {
|
|
4144
4168
|
verbose: options.verbose,
|
|
4145
|
-
filter: options.filter
|
|
4169
|
+
filter: options.filter,
|
|
4170
|
+
category
|
|
4146
4171
|
});
|
|
4147
4172
|
const filteredIds = suite.tests.map((value) => value.id);
|
|
4148
4173
|
const cliTargets = options.cliTargets;
|
|
@@ -4421,7 +4446,7 @@ async function runEvalCommand(input) {
|
|
|
4421
4446
|
const useFileExport = !!options.otelFile;
|
|
4422
4447
|
if (options.exportOtel || useFileExport) {
|
|
4423
4448
|
try {
|
|
4424
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4449
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-3Z22B6SU.js");
|
|
4425
4450
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4426
4451
|
let headers = {};
|
|
4427
4452
|
if (options.otelBackend) {
|
|
@@ -4794,6 +4819,7 @@ export {
|
|
|
4794
4819
|
resolveExistingRunPrimaryPath,
|
|
4795
4820
|
resolveWorkspaceOrFilePath,
|
|
4796
4821
|
writeArtifactsFromResults,
|
|
4822
|
+
parseResultManifest,
|
|
4797
4823
|
resolveResultSourcePath,
|
|
4798
4824
|
loadManifestResults,
|
|
4799
4825
|
loadLightweightResults,
|
|
@@ -4812,4 +4838,4 @@ export {
|
|
|
4812
4838
|
selectTarget,
|
|
4813
4839
|
runEvalCommand
|
|
4814
4840
|
};
|
|
4815
|
-
//# sourceMappingURL=chunk-
|
|
4841
|
+
//# sourceMappingURL=chunk-OT2J474N.js.map
|