agentv 4.15.3 → 4.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-P2UXBHIA.js → chunk-CK47W2B5.js} +9 -4
- package/dist/chunk-CK47W2B5.js.map +1 -0
- package/dist/{chunk-6X2UFVGI.js → chunk-K4QF5TNX.js} +5 -2
- package/dist/chunk-K4QF5TNX.js.map +1 -0
- package/dist/{chunk-NXZLEIAG.js → chunk-TT36K77B.js} +18 -12
- package/dist/chunk-TT36K77B.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-7ZYP7EBD.js → dist-ZVTERRJS.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-QOOK2KED.js → interactive-RXDJ5OXY.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-6X2UFVGI.js.map +0 -1
- package/dist/chunk-NXZLEIAG.js.map +0 -1
- package/dist/chunk-P2UXBHIA.js.map +0 -1
- /package/dist/{dist-7ZYP7EBD.js.map → dist-ZVTERRJS.js.map} +0 -0
- /package/dist/{interactive-QOOK2KED.js.map → interactive-RXDJ5OXY.js.map} +0 -0
|
@@ -42,12 +42,12 @@ import {
|
|
|
42
42
|
syncResultsRepo,
|
|
43
43
|
toCamelCaseDeep,
|
|
44
44
|
toTranscriptJsonLines
|
|
45
|
-
} from "./chunk-
|
|
45
|
+
} from "./chunk-K4QF5TNX.js";
|
|
46
46
|
|
|
47
47
|
// package.json
|
|
48
48
|
var package_default = {
|
|
49
49
|
name: "agentv",
|
|
50
|
-
version: "4.15.
|
|
50
|
+
version: "4.15.4",
|
|
51
51
|
description: "CLI entry point for AgentV",
|
|
52
52
|
type: "module",
|
|
53
53
|
repository: {
|
|
@@ -4923,6 +4923,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
4923
4923
|
retryErrors: normalizeString(rawOptions.retryErrors),
|
|
4924
4924
|
workspaceMode,
|
|
4925
4925
|
workspacePath,
|
|
4926
|
+
// Precedence: CLI > YAML config > TS config
|
|
4927
|
+
keepWorkspaces: normalizeBoolean(rawOptions.keepWorkspaces) || yamlExecution?.keep_workspaces === true || config?.execution?.keepWorkspaces === true,
|
|
4926
4928
|
benchmarkJson: normalizeString(rawOptions.benchmarkJson),
|
|
4927
4929
|
artifacts: normalizeString(rawOptions.artifacts),
|
|
4928
4930
|
graderTarget: normalizeString(rawOptions.graderTarget),
|
|
@@ -5191,6 +5193,7 @@ async function runSingleEvalFile(params) {
|
|
|
5191
5193
|
maxConcurrency: resolvedWorkers,
|
|
5192
5194
|
workspaceMode: options.workspaceMode,
|
|
5193
5195
|
workspacePath: options.workspacePath,
|
|
5196
|
+
keepWorkspaces: options.keepWorkspaces,
|
|
5194
5197
|
trials: trialsConfig,
|
|
5195
5198
|
totalBudgetUsd,
|
|
5196
5199
|
failOnError,
|
|
@@ -5352,7 +5355,7 @@ async function runEvalCommand(input) {
|
|
|
5352
5355
|
const useFileExport = !!options.otelFile;
|
|
5353
5356
|
if (options.exportOtel || useFileExport) {
|
|
5354
5357
|
try {
|
|
5355
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
5358
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-ZVTERRJS.js");
|
|
5356
5359
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
5357
5360
|
let headers = {};
|
|
5358
5361
|
if (options.otelBackend) {
|
|
@@ -5548,7 +5551,7 @@ async function runEvalCommand(input) {
|
|
|
5548
5551
|
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
|
|
5549
5552
|
let transcriptProviderFactory;
|
|
5550
5553
|
if (options.transcript) {
|
|
5551
|
-
const { TranscriptProvider } = await import("./dist-
|
|
5554
|
+
const { TranscriptProvider } = await import("./dist-ZVTERRJS.js");
|
|
5552
5555
|
const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
|
|
5553
5556
|
const totalTests = [...fileMetadata.values()].reduce(
|
|
5554
5557
|
(sum, meta) => sum + meta.testCases.length,
|
|
@@ -5705,15 +5708,18 @@ async function runEvalCommand(input) {
|
|
|
5705
5708
|
`Export file(s) written: ${resolvedExportPaths.map((p) => path17.relative(cwd, p)).join(", ")}`
|
|
5706
5709
|
);
|
|
5707
5710
|
}
|
|
5708
|
-
const
|
|
5709
|
-
|
|
5710
|
-
)
|
|
5711
|
-
|
|
5712
|
-
|
|
5713
|
-
|
|
5714
|
-
console.log(` ${result.testId}: ${result.workspacePath}`);
|
|
5711
|
+
const resultsWithWorkspaces = allResults.filter((r) => r.workspacePath);
|
|
5712
|
+
const preservedWorkspaces = options.keepWorkspaces ? resultsWithWorkspaces : resultsWithWorkspaces.filter((r) => r.error || r.score < 0.5);
|
|
5713
|
+
if (preservedWorkspaces.length > 0) {
|
|
5714
|
+
console.log("\nPreserved workspaces:");
|
|
5715
|
+
for (const result of preservedWorkspaces) {
|
|
5716
|
+
console.log(` ${result.testId} -> ${result.workspacePath}`);
|
|
5715
5717
|
}
|
|
5716
5718
|
}
|
|
5719
|
+
const usedWorkspaces = resultsWithWorkspaces.length > 0 || options.workspaceMode && options.workspaceMode !== "static";
|
|
5720
|
+
if (!options.keepWorkspaces && usedWorkspaces) {
|
|
5721
|
+
console.log("Use --keep-workspaces to preserve all workspaces for inspection.");
|
|
5722
|
+
}
|
|
5717
5723
|
if (allResults.length > 0) {
|
|
5718
5724
|
console.log(`
|
|
5719
5725
|
Results written to: ${outputPath}`);
|
|
@@ -5870,4 +5876,4 @@ export {
|
|
|
5870
5876
|
getCategories,
|
|
5871
5877
|
filterByCategory
|
|
5872
5878
|
};
|
|
5873
|
-
//# sourceMappingURL=chunk-
|
|
5879
|
+
//# sourceMappingURL=chunk-TT36K77B.js.map
|