agentv 4.15.3-next.1 → 4.15.4-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,12 +42,12 @@ import {
42
42
  syncResultsRepo,
43
43
  toCamelCaseDeep,
44
44
  toTranscriptJsonLines
45
- } from "./chunk-6X2UFVGI.js";
45
+ } from "./chunk-K4QF5TNX.js";
46
46
 
47
47
  // package.json
48
48
  var package_default = {
49
49
  name: "agentv",
50
- version: "4.15.3-next.1",
50
+ version: "4.15.4-next.1",
51
51
  description: "CLI entry point for AgentV",
52
52
  type: "module",
53
53
  repository: {
@@ -4923,6 +4923,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
4923
4923
  retryErrors: normalizeString(rawOptions.retryErrors),
4924
4924
  workspaceMode,
4925
4925
  workspacePath,
4926
+ // Precedence: CLI > YAML config > TS config
4927
+ keepWorkspaces: normalizeBoolean(rawOptions.keepWorkspaces) || yamlExecution?.keep_workspaces === true || config?.execution?.keepWorkspaces === true,
4926
4928
  benchmarkJson: normalizeString(rawOptions.benchmarkJson),
4927
4929
  artifacts: normalizeString(rawOptions.artifacts),
4928
4930
  graderTarget: normalizeString(rawOptions.graderTarget),
@@ -5191,6 +5193,7 @@ async function runSingleEvalFile(params) {
5191
5193
  maxConcurrency: resolvedWorkers,
5192
5194
  workspaceMode: options.workspaceMode,
5193
5195
  workspacePath: options.workspacePath,
5196
+ keepWorkspaces: options.keepWorkspaces,
5194
5197
  trials: trialsConfig,
5195
5198
  totalBudgetUsd,
5196
5199
  failOnError,
@@ -5352,7 +5355,7 @@ async function runEvalCommand(input) {
5352
5355
  const useFileExport = !!options.otelFile;
5353
5356
  if (options.exportOtel || useFileExport) {
5354
5357
  try {
5355
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-7ZYP7EBD.js");
5358
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-ZVTERRJS.js");
5356
5359
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5357
5360
  let headers = {};
5358
5361
  if (options.otelBackend) {
@@ -5548,7 +5551,7 @@ async function runEvalCommand(input) {
5548
5551
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5549
5552
  let transcriptProviderFactory;
5550
5553
  if (options.transcript) {
5551
- const { TranscriptProvider } = await import("./dist-7ZYP7EBD.js");
5554
+ const { TranscriptProvider } = await import("./dist-ZVTERRJS.js");
5552
5555
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5553
5556
  const totalTests = [...fileMetadata.values()].reduce(
5554
5557
  (sum, meta) => sum + meta.testCases.length,
@@ -5705,15 +5708,18 @@ async function runEvalCommand(input) {
5705
5708
  `Export file(s) written: ${resolvedExportPaths.map((p) => path17.relative(cwd, p)).join(", ")}`
5706
5709
  );
5707
5710
  }
5708
- const failedWithWorkspaces = allResults.filter(
5709
- (r) => r.workspacePath && (r.error || r.score < 0.5)
5710
- );
5711
- if (failedWithWorkspaces.length > 0) {
5712
- console.log("\nWorkspaces preserved for debugging:");
5713
- for (const result of failedWithWorkspaces) {
5714
- console.log(` ${result.testId}: ${result.workspacePath}`);
5711
+ const resultsWithWorkspaces = allResults.filter((r) => r.workspacePath);
5712
+ const preservedWorkspaces = options.keepWorkspaces ? resultsWithWorkspaces : resultsWithWorkspaces.filter((r) => r.error || r.score < 0.5);
5713
+ if (preservedWorkspaces.length > 0) {
5714
+ console.log("\nPreserved workspaces:");
5715
+ for (const result of preservedWorkspaces) {
5716
+ console.log(` ${result.testId} -> ${result.workspacePath}`);
5715
5717
  }
5716
5718
  }
5719
+ const usedWorkspaces = resultsWithWorkspaces.length > 0 || options.workspaceMode && options.workspaceMode !== "static";
5720
+ if (!options.keepWorkspaces && usedWorkspaces) {
5721
+ console.log("Use --keep-workspaces to preserve all workspaces for inspection.");
5722
+ }
5717
5723
  if (allResults.length > 0) {
5718
5724
  console.log(`
5719
5725
  Results written to: ${outputPath}`);
@@ -5870,4 +5876,4 @@ export {
5870
5876
  getCategories,
5871
5877
  filterByCategory
5872
5878
  };
5873
- //# sourceMappingURL=chunk-T4WCAGSF.js.map
5879
+ //# sourceMappingURL=chunk-4IDJVYC6.js.map