@agentv/core 4.15.9 → 4.16.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1857,6 +1857,7 @@ __export(index_exports, {
1857
1857
  extractJsonBlob: () => extractJsonBlob,
1858
1858
  extractLastAssistantContent: () => extractLastAssistantContent,
1859
1859
  extractTargetFromSuite: () => extractTargetFromSuite,
1860
+ extractTargetRefsFromSuite: () => extractTargetRefsFromSuite,
1860
1861
  extractTargetsFromSuite: () => extractTargetsFromSuite,
1861
1862
  extractTargetsFromTestCase: () => extractTargetsFromTestCase,
1862
1863
  extractThreshold: () => extractThreshold,
@@ -1866,6 +1867,7 @@ __export(index_exports, {
1866
1867
  findGitRoot: () => findGitRoot,
1867
1868
  freeformEvaluationSchema: () => freeformEvaluationSchema,
1868
1869
  generateRubrics: () => generateRubrics,
1870
+ getAgentvConfigDir: () => getAgentvConfigDir,
1869
1871
  getAgentvHome: () => getAgentvHome,
1870
1872
  getBenchmark: () => getBenchmark,
1871
1873
  getBenchmarksRegistryPath: () => getBenchmarksRegistryPath,
@@ -2655,17 +2657,76 @@ function extractTargetFromSuite(suite) {
2655
2657
  }
2656
2658
  return void 0;
2657
2659
  }
2658
- function extractTargetsFromSuite(suite) {
2660
+ function extractTargetRefsFromSuite(suite) {
2659
2661
  const execution = suite.execution;
2660
2662
  if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
2661
2663
  return void 0;
2662
2664
  }
2663
2665
  const targets = execution.targets;
2664
- if (Array.isArray(targets)) {
2665
- const valid = targets.filter((t) => typeof t === "string" && t.trim().length > 0);
2666
- return valid.length > 0 ? valid.map((t) => t.trim()) : void 0;
2666
+ if (!Array.isArray(targets)) {
2667
+ return void 0;
2667
2668
  }
2668
- return void 0;
2669
+ const refs = [];
2670
+ for (const t of targets) {
2671
+ if (typeof t === "string" && t.trim().length > 0) {
2672
+ refs.push({ name: t.trim() });
2673
+ } else if (t && typeof t === "object" && !Array.isArray(t) && "name" in t) {
2674
+ const obj = t;
2675
+ const name = typeof obj.name === "string" ? obj.name.trim() : "";
2676
+ if (name.length === 0) continue;
2677
+ const useTarget = typeof obj.use_target === "string" ? obj.use_target.trim() : void 0;
2678
+ const hooks = parseTargetHooks(obj.hooks);
2679
+ refs.push({
2680
+ name,
2681
+ ...useTarget && { use_target: useTarget },
2682
+ ...hooks && { hooks }
2683
+ });
2684
+ }
2685
+ }
2686
+ return refs.length > 0 ? refs : void 0;
2687
+ }
2688
+ function extractTargetsFromSuite(suite) {
2689
+ const refs = extractTargetRefsFromSuite(suite);
2690
+ if (!refs) return void 0;
2691
+ const names = refs.map((r) => r.name);
2692
+ return names.length > 0 ? names : void 0;
2693
+ }
2694
+ function parseHookConfig(raw) {
2695
+ if (!raw || typeof raw !== "object") return void 0;
2696
+ const obj = raw;
2697
+ let command;
2698
+ if (typeof obj.command === "string") {
2699
+ command = ["sh", "-c", obj.command];
2700
+ } else if (Array.isArray(obj.command)) {
2701
+ command = obj.command.filter((s) => typeof s === "string");
2702
+ } else if (typeof obj.script === "string") {
2703
+ command = ["sh", "-c", obj.script];
2704
+ } else if (Array.isArray(obj.script)) {
2705
+ command = obj.script.filter((s) => typeof s === "string");
2706
+ }
2707
+ if (!command || command.length === 0) return void 0;
2708
+ const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : typeof obj.timeoutMs === "number" ? obj.timeoutMs : void 0;
2709
+ const cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
2710
+ return {
2711
+ command,
2712
+ ...timeoutMs !== void 0 && { timeout_ms: timeoutMs },
2713
+ ...cwd && { cwd }
2714
+ };
2715
+ }
2716
+ function parseTargetHooks(raw) {
2717
+ if (!raw || typeof raw !== "object") return void 0;
2718
+ const obj = raw;
2719
+ const beforeAll = parseHookConfig(obj.before_all);
2720
+ const beforeEach = parseHookConfig(obj.before_each);
2721
+ const afterEach = parseHookConfig(obj.after_each);
2722
+ const afterAll = parseHookConfig(obj.after_all);
2723
+ if (!beforeAll && !beforeEach && !afterEach && !afterAll) return void 0;
2724
+ return {
2725
+ ...beforeAll && { before_all: beforeAll },
2726
+ ...beforeEach && { before_each: beforeEach },
2727
+ ...afterEach && { after_each: afterEach },
2728
+ ...afterAll && { after_all: afterAll }
2729
+ };
2669
2730
  }
2670
2731
  function extractWorkersFromSuite(suite) {
2671
2732
  const execution = suite.execution;
@@ -3337,7 +3398,11 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
3337
3398
  }
3338
3399
  const placeholderIndex = result.indexOf(PLACEHOLDER);
3339
3400
  if (strings.length > 0 && placeholderIndex !== -1) {
3340
- result[placeholderIndex] = { type: "rubrics", criteria: strings };
3401
+ result[placeholderIndex] = {
3402
+ type: "rubrics",
3403
+ criteria: strings,
3404
+ weight: strings.length
3405
+ };
3341
3406
  } else if (placeholderIndex !== -1) {
3342
3407
  result.splice(placeholderIndex, 1);
3343
3408
  }
@@ -5739,6 +5804,7 @@ async function readTestSuiteMetadata(testFilePath) {
5739
5804
  return {
5740
5805
  target: extractTargetFromSuite(parsed),
5741
5806
  targets: extractTargetsFromSuite(parsed),
5807
+ targetRefs: extractTargetRefsFromSuite(parsed),
5742
5808
  trials: extractTrialsConfig(parsed)
5743
5809
  };
5744
5810
  } catch {
@@ -5765,6 +5831,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
5765
5831
  tests,
5766
5832
  trials: extractTrialsConfig(parsed),
5767
5833
  targets: extractTargetsFromSuite(parsed),
5834
+ targetRefs: extractTargetRefsFromSuite(parsed),
5768
5835
  workers: extractWorkersFromSuite(parsed),
5769
5836
  cacheConfig: extractCacheConfig(parsed),
5770
5837
  totalBudgetUsd: extractTotalBudgetUsd(parsed),
@@ -7519,7 +7586,7 @@ var ClaudeCliProvider = class {
7519
7586
  if (options.cwd) {
7520
7587
  spawnOptions.cwd = options.cwd;
7521
7588
  }
7522
- const child = (0, import_node_child_process.spawn)("claude", options.args, spawnOptions);
7589
+ const child = (0, import_node_child_process.spawn)(this.config.executable, options.args, spawnOptions);
7523
7590
  let stdout = "";
7524
7591
  let stderr = "";
7525
7592
  let timedOut = false;
@@ -7578,7 +7645,7 @@ var ClaudeCliProvider = class {
7578
7645
  if (err.code === "ENOENT") {
7579
7646
  reject(
7580
7647
  new Error(
7581
- `Claude CLI executable 'claude' was not found on PATH. Install claude-code or ensure it is in PATH.`
7648
+ `Claude CLI executable '${this.config.executable}' was not found on PATH. Install claude-code or ensure it is in PATH.`
7582
7649
  )
7583
7650
  );
7584
7651
  } else {
@@ -11671,6 +11738,9 @@ init_cjs_shims();
11671
11738
  var import_node_os7 = __toESM(require("os"), 1);
11672
11739
  var import_node_path24 = __toESM(require("path"), 1);
11673
11740
  var logged = false;
11741
+ function getAgentvConfigDir() {
11742
+ return import_node_path24.default.join(import_node_os7.default.homedir(), ".agentv");
11743
+ }
11674
11744
  function getAgentvHome() {
11675
11745
  const envHome = process.env.AGENTV_HOME;
11676
11746
  if (envHome && envHome !== "undefined") {
@@ -12343,6 +12413,8 @@ var ProviderRegistry = class {
12343
12413
 
12344
12414
  // src/evaluation/providers/targets.ts
12345
12415
  init_cjs_shims();
12416
+ var import_node_fs12 = require("fs");
12417
+ var import_node_os8 = require("os");
12346
12418
  var import_node_path26 = __toESM(require("path"), 1);
12347
12419
  var import_zod3 = require("zod");
12348
12420
  var CliHealthcheckHttpInputSchema = import_zod3.z.object({
@@ -12368,8 +12440,6 @@ var CliTargetInputSchema = import_zod3.z.object({
12368
12440
  attachments_format: import_zod3.z.string().optional(),
12369
12441
  // Working directory - optional
12370
12442
  cwd: import_zod3.z.string().optional(),
12371
- // Workspace template directory - optional (mutually exclusive with cwd)
12372
- workspace_template: import_zod3.z.string().optional(),
12373
12443
  // Timeout in seconds - optional
12374
12444
  timeout_seconds: import_zod3.z.number().positive().optional(),
12375
12445
  // Healthcheck configuration - optional
@@ -12404,7 +12474,6 @@ var CliTargetConfigSchema = import_zod3.z.object({
12404
12474
  command: import_zod3.z.string().min(1),
12405
12475
  filesFormat: import_zod3.z.string().optional(),
12406
12476
  cwd: import_zod3.z.string().optional(),
12407
- workspaceTemplate: import_zod3.z.string().optional(),
12408
12477
  timeoutMs: import_zod3.z.number().positive().optional(),
12409
12478
  healthcheck: CliHealthcheckSchema.optional(),
12410
12479
  verbose: import_zod3.z.boolean().optional(),
@@ -12447,19 +12516,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
12447
12516
  const command = resolveString(input.command, env, `${targetName} CLI command`, true);
12448
12517
  const filesFormatSource = input.files_format ?? input.attachments_format;
12449
12518
  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
12450
- const workspaceTemplateSource = input.workspace_template;
12451
- let workspaceTemplate = resolveOptionalString(
12452
- workspaceTemplateSource,
12453
- env,
12454
- `${targetName} workspace template`,
12455
- {
12456
- allowLiteral: true,
12457
- optionalEnv: true
12458
- }
12459
- );
12460
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
12461
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
12462
- }
12463
12519
  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
12464
12520
  allowLiteral: true,
12465
12521
  optionalEnv: true
@@ -12467,12 +12523,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
12467
12523
  if (cwd && evalFilePath && !import_node_path26.default.isAbsolute(cwd)) {
12468
12524
  cwd = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), cwd);
12469
12525
  }
12470
- if (cwd && workspaceTemplate) {
12471
- throw new Error(
12472
- `${targetName}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
12473
- );
12474
- }
12475
- if (!cwd && !workspaceTemplate && evalFilePath) {
12526
+ if (!cwd && evalFilePath) {
12476
12527
  cwd = import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath));
12477
12528
  }
12478
12529
  const timeoutSeconds = input.timeout_seconds;
@@ -12484,7 +12535,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
12484
12535
  command,
12485
12536
  filesFormat,
12486
12537
  cwd,
12487
- workspaceTemplate,
12488
12538
  timeoutMs,
12489
12539
  healthcheck,
12490
12540
  verbose,
@@ -12553,11 +12603,6 @@ function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
12553
12603
  return warnings;
12554
12604
  }
12555
12605
  function assertNoDeprecatedCamelCaseTargetFields(definition) {
12556
- if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
12557
- throw new Error(
12558
- `${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
12559
- );
12560
- }
12561
12606
  const warning = findDeprecatedCamelCaseTargetWarnings(
12562
12607
  definition,
12563
12608
  `target "${definition.name}"`
@@ -12607,7 +12652,6 @@ var BASE_TARGET_SCHEMA = import_zod3.z.object({
12607
12652
  judge_target: import_zod3.z.string().optional(),
12608
12653
  // backward compat
12609
12654
  workers: import_zod3.z.number().int().min(1).optional(),
12610
- workspace_template: import_zod3.z.string().optional(),
12611
12655
  subagent_mode_allowed: import_zod3.z.boolean().optional(),
12612
12656
  fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
12613
12657
  }).passthrough();
@@ -12704,11 +12748,6 @@ function resolveDelegatedTargetDefinition(name, definitions, env = process.env)
12704
12748
  function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
12705
12749
  assertNoDeprecatedCamelCaseTargetFields(definition);
12706
12750
  const parsed = BASE_TARGET_SCHEMA.parse(definition);
12707
- if (parsed.workspace_template !== void 0) {
12708
- throw new Error(
12709
- `${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
12710
- );
12711
- }
12712
12751
  if (!parsed.provider) {
12713
12752
  throw new Error(
12714
12753
  `${parsed.name}: 'provider' is required (targets with use_target must be resolved before calling resolveTargetDefinition)`
@@ -12805,6 +12844,20 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath, op
12805
12844
  ...base,
12806
12845
  config: resolvePiCliConfig(parsed, env, evalFilePath)
12807
12846
  };
12847
+ case "cc-mirror": {
12848
+ const variantName = resolveOptionalString(parsed.variant, env, `${parsed.name} cc-mirror variant`, {
12849
+ allowLiteral: true,
12850
+ optionalEnv: true
12851
+ }) ?? parsed.name;
12852
+ if (!parsed.executable) {
12853
+ parsed.executable = resolveCcMirrorBinaryPath(variantName);
12854
+ }
12855
+ return {
12856
+ kind: "claude-cli",
12857
+ ...base,
12858
+ config: resolveClaudeConfig(parsed, env, evalFilePath)
12859
+ };
12860
+ }
12808
12861
  case "claude":
12809
12862
  case "claude-code":
12810
12863
  case "claude-cli":
@@ -12993,12 +13046,11 @@ function resolveGeminiConfig(target, env) {
12993
13046
  retry
12994
13047
  };
12995
13048
  }
12996
- function resolveCodexConfig(target, env, evalFilePath) {
13049
+ function resolveCodexConfig(target, env, _evalFilePath) {
12997
13050
  const modelSource = target.model;
12998
13051
  const executableSource = target.executable ?? target.command ?? target.binary;
12999
13052
  const argsSource = target.args ?? target.arguments;
13000
13053
  const cwdSource = target.cwd;
13001
- const workspaceTemplateSource = target.workspace_template;
13002
13054
  const timeoutSource = target.timeout_seconds;
13003
13055
  const logDirSource = target.log_dir ?? target.log_directory;
13004
13056
  const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
@@ -13021,23 +13073,6 @@ function resolveCodexConfig(target, env, evalFilePath) {
13021
13073
  allowLiteral: true,
13022
13074
  optionalEnv: true
13023
13075
  });
13024
- let workspaceTemplate = resolveOptionalString(
13025
- workspaceTemplateSource,
13026
- env,
13027
- `${target.name} codex workspace template`,
13028
- {
13029
- allowLiteral: true,
13030
- optionalEnv: true
13031
- }
13032
- );
13033
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
13034
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
13035
- }
13036
- if (cwd && workspaceTemplate) {
13037
- throw new Error(
13038
- `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
13039
- );
13040
- }
13041
13076
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
13042
13077
  const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
13043
13078
  allowLiteral: true,
@@ -13050,7 +13085,6 @@ function resolveCodexConfig(target, env, evalFilePath) {
13050
13085
  executable,
13051
13086
  args,
13052
13087
  cwd,
13053
- workspaceTemplate,
13054
13088
  timeoutMs,
13055
13089
  logDir,
13056
13090
  logFormat,
@@ -13103,13 +13137,12 @@ function resolveStreamLog(target, envFallback) {
13103
13137
  deprecationWarning: `${target.name}: 'log_format' is deprecated and will be removed in v4.16. Use 'stream_log: ${streamLogEquivalent}' instead (log_format: '${normalized}' \u2192 stream_log: '${streamLogEquivalent}').`
13104
13138
  };
13105
13139
  }
13106
- function resolveCopilotSdkConfig(target, env, evalFilePath) {
13140
+ function resolveCopilotSdkConfig(target, env, _evalFilePath) {
13107
13141
  const cliUrlSource = target.cli_url;
13108
13142
  const cliPathSource = target.cli_path;
13109
13143
  const githubTokenSource = target.github_token;
13110
13144
  const modelSource = target.model;
13111
13145
  const cwdSource = target.cwd;
13112
- const workspaceTemplateSource = target.workspace_template;
13113
13146
  const timeoutSource = target.timeout_seconds;
13114
13147
  const logDirSource = target.log_dir ?? target.log_directory;
13115
13148
  const logFormatSource = target.log_format;
@@ -13144,23 +13177,6 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
13144
13177
  allowLiteral: true,
13145
13178
  optionalEnv: true
13146
13179
  });
13147
- let workspaceTemplate = resolveOptionalString(
13148
- workspaceTemplateSource,
13149
- env,
13150
- `${target.name} copilot-sdk workspace template`,
13151
- {
13152
- allowLiteral: true,
13153
- optionalEnv: true
13154
- }
13155
- );
13156
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
13157
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
13158
- }
13159
- if (cwd && workspaceTemplate) {
13160
- throw new Error(
13161
- `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
13162
- );
13163
- }
13164
13180
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-sdk timeout`);
13165
13181
  const logDir = resolveOptionalString(
13166
13182
  logDirSource,
@@ -13225,7 +13241,6 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
13225
13241
  githubToken,
13226
13242
  model,
13227
13243
  cwd,
13228
- workspaceTemplate,
13229
13244
  timeoutMs,
13230
13245
  logDir,
13231
13246
  logFormat,
@@ -13239,12 +13254,11 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
13239
13254
  byokWireApi
13240
13255
  };
13241
13256
  }
13242
- function resolveCopilotCliConfig(target, env, evalFilePath) {
13257
+ function resolveCopilotCliConfig(target, env, _evalFilePath) {
13243
13258
  const executableSource = target.executable ?? target.command ?? target.binary;
13244
13259
  const modelSource = target.model;
13245
13260
  const argsSource = target.args ?? target.arguments;
13246
13261
  const cwdSource = target.cwd;
13247
- const workspaceTemplateSource = target.workspace_template;
13248
13262
  const timeoutSource = target.timeout_seconds;
13249
13263
  const logDirSource = target.log_dir ?? target.log_directory;
13250
13264
  const logFormatSource = target.log_format;
@@ -13267,23 +13281,6 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
13267
13281
  allowLiteral: true,
13268
13282
  optionalEnv: true
13269
13283
  });
13270
- let workspaceTemplate = resolveOptionalString(
13271
- workspaceTemplateSource,
13272
- env,
13273
- `${target.name} copilot-cli workspace template`,
13274
- {
13275
- allowLiteral: true,
13276
- optionalEnv: true
13277
- }
13278
- );
13279
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
13280
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
13281
- }
13282
- if (cwd && workspaceTemplate) {
13283
- throw new Error(
13284
- `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
13285
- );
13286
- }
13287
13284
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-cli timeout`);
13288
13285
  const logDir = resolveOptionalString(
13289
13286
  logDirSource,
@@ -13301,7 +13298,6 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
13301
13298
  model,
13302
13299
  args,
13303
13300
  cwd,
13304
- workspaceTemplate,
13305
13301
  timeoutMs,
13306
13302
  logDir,
13307
13303
  logFormat,
@@ -13316,14 +13312,13 @@ function normalizeCopilotLogFormat(value) {
13316
13312
  if (normalized === "json" || normalized === "summary") return normalized;
13317
13313
  throw new Error("copilot log format must be 'summary' or 'json'");
13318
13314
  }
13319
- function resolvePiCodingAgentConfig(target, env, evalFilePath) {
13315
+ function resolvePiCodingAgentConfig(target, env, _evalFilePath) {
13320
13316
  const subproviderSource = target.subprovider;
13321
13317
  const modelSource = target.model ?? target.pi_model;
13322
13318
  const apiKeySource = target.api_key;
13323
13319
  const toolsSource = target.tools ?? target.pi_tools;
13324
13320
  const thinkingSource = target.thinking ?? target.pi_thinking;
13325
13321
  const cwdSource = target.cwd;
13326
- const workspaceTemplateSource = target.workspace_template;
13327
13322
  const timeoutSource = target.timeout_seconds;
13328
13323
  const logDirSource = target.log_dir ?? target.log_directory;
13329
13324
  const logFormatSource = target.log_format;
@@ -13367,23 +13362,6 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
13367
13362
  allowLiteral: true,
13368
13363
  optionalEnv: true
13369
13364
  });
13370
- let workspaceTemplate = resolveOptionalString(
13371
- workspaceTemplateSource,
13372
- env,
13373
- `${target.name} pi workspace template`,
13374
- {
13375
- allowLiteral: true,
13376
- optionalEnv: true
13377
- }
13378
- );
13379
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
13380
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
13381
- }
13382
- if (cwd && workspaceTemplate) {
13383
- throw new Error(
13384
- `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
13385
- );
13386
- }
13387
13365
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
13388
13366
  const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
13389
13367
  allowLiteral: true,
@@ -13399,7 +13377,6 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
13399
13377
  tools,
13400
13378
  thinking,
13401
13379
  cwd,
13402
- workspaceTemplate,
13403
13380
  timeoutMs,
13404
13381
  logDir,
13405
13382
  logFormat,
@@ -13407,7 +13384,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
13407
13384
  systemPrompt
13408
13385
  };
13409
13386
  }
13410
- function resolvePiCliConfig(target, env, evalFilePath) {
13387
+ function resolvePiCliConfig(target, env, _evalFilePath) {
13411
13388
  const executableSource = target.executable ?? target.command ?? target.binary;
13412
13389
  const subproviderSource = target.subprovider;
13413
13390
  const modelSource = target.model ?? target.pi_model;
@@ -13415,7 +13392,6 @@ function resolvePiCliConfig(target, env, evalFilePath) {
13415
13392
  const toolsSource = target.tools ?? target.pi_tools;
13416
13393
  const thinkingSource = target.thinking ?? target.pi_thinking;
13417
13394
  const cwdSource = target.cwd;
13418
- const workspaceTemplateSource = target.workspace_template;
13419
13395
  const timeoutSource = target.timeout_seconds;
13420
13396
  const logDirSource = target.log_dir ?? target.log_directory;
13421
13397
  const logFormatSource = target.log_format;
@@ -13462,18 +13438,6 @@ function resolvePiCliConfig(target, env, evalFilePath) {
13462
13438
  allowLiteral: true,
13463
13439
  optionalEnv: true
13464
13440
  });
13465
- let workspaceTemplate = resolveOptionalString(
13466
- workspaceTemplateSource,
13467
- env,
13468
- `${target.name} pi-cli workspace template`,
13469
- { allowLiteral: true, optionalEnv: true }
13470
- );
13471
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
13472
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
13473
- }
13474
- if (cwd && workspaceTemplate) {
13475
- throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
13476
- }
13477
13441
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-cli timeout`);
13478
13442
  const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi-cli log directory`, {
13479
13443
  allowLiteral: true,
@@ -13491,7 +13455,6 @@ function resolvePiCliConfig(target, env, evalFilePath) {
13491
13455
  thinking,
13492
13456
  args,
13493
13457
  cwd,
13494
- workspaceTemplate,
13495
13458
  timeoutMs,
13496
13459
  logDir,
13497
13460
  logFormat,
@@ -13499,10 +13462,10 @@ function resolvePiCliConfig(target, env, evalFilePath) {
13499
13462
  systemPrompt
13500
13463
  };
13501
13464
  }
13502
- function resolveClaudeConfig(target, env, evalFilePath) {
13465
+ function resolveClaudeConfig(target, env, _evalFilePath) {
13466
+ const executableSource = target.executable ?? target.command ?? target.binary;
13503
13467
  const modelSource = target.model;
13504
13468
  const cwdSource = target.cwd;
13505
- const workspaceTemplateSource = target.workspace_template;
13506
13469
  const timeoutSource = target.timeout_seconds;
13507
13470
  const logDirSource = target.log_dir ?? target.log_directory;
13508
13471
  const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
@@ -13512,6 +13475,10 @@ function resolveClaudeConfig(target, env, evalFilePath) {
13512
13475
  process.stderr.write(`[agentv] \u26A0 ${streamLogResult.deprecationWarning}
13513
13476
  `);
13514
13477
  }
13478
+ const executable = resolveOptionalString(executableSource, env, `${target.name} claude-cli executable`, {
13479
+ allowLiteral: true,
13480
+ optionalEnv: true
13481
+ }) ?? "claude";
13515
13482
  const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
13516
13483
  allowLiteral: true,
13517
13484
  optionalEnv: true
@@ -13520,23 +13487,6 @@ function resolveClaudeConfig(target, env, evalFilePath) {
13520
13487
  allowLiteral: true,
13521
13488
  optionalEnv: true
13522
13489
  });
13523
- let workspaceTemplate = resolveOptionalString(
13524
- workspaceTemplateSource,
13525
- env,
13526
- `${target.name} claude workspace template`,
13527
- {
13528
- allowLiteral: true,
13529
- optionalEnv: true
13530
- }
13531
- );
13532
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
13533
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
13534
- }
13535
- if (cwd && workspaceTemplate) {
13536
- throw new Error(
13537
- `${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
13538
- );
13539
- }
13540
13490
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} claude timeout`);
13541
13491
  const logDir = resolveOptionalString(logDirSource, env, `${target.name} claude log directory`, {
13542
13492
  allowLiteral: true,
@@ -13547,10 +13497,10 @@ function resolveClaudeConfig(target, env, evalFilePath) {
13547
13497
  const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
13548
13498
  const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
13549
13499
  return {
13500
+ executable,
13550
13501
  model,
13551
13502
  systemPrompt,
13552
13503
  cwd,
13553
- workspaceTemplate,
13554
13504
  timeoutMs,
13555
13505
  maxTurns,
13556
13506
  maxBudgetUsd,
@@ -13559,6 +13509,28 @@ function resolveClaudeConfig(target, env, evalFilePath) {
13559
13509
  streamLog: streamLogResult.streamLog
13560
13510
  };
13561
13511
  }
13512
+ function resolveCcMirrorBinaryPath(variant) {
13513
+ const variantJsonPath = import_node_path26.default.join((0, import_node_os8.homedir)(), ".cc-mirror", variant, "variant.json");
13514
+ if (!(0, import_node_fs12.existsSync)(variantJsonPath)) {
13515
+ throw new Error(
13516
+ `cc-mirror variant "${variant}": ${variantJsonPath} not found. Install the variant or set "executable" explicitly.`
13517
+ );
13518
+ }
13519
+ let parsed;
13520
+ try {
13521
+ parsed = JSON.parse((0, import_node_fs12.readFileSync)(variantJsonPath, "utf8"));
13522
+ } catch (e) {
13523
+ throw new Error(
13524
+ `cc-mirror variant "${variant}": failed to parse ${variantJsonPath}: ${e.message}`
13525
+ );
13526
+ }
13527
+ if (typeof parsed.binaryPath !== "string" || parsed.binaryPath.trim().length === 0) {
13528
+ throw new Error(
13529
+ `cc-mirror variant "${variant}": ${variantJsonPath} missing "binaryPath" field`
13530
+ );
13531
+ }
13532
+ return parsed.binaryPath;
13533
+ }
13562
13534
  function normalizeClaudeLogFormat(value) {
13563
13535
  if (value === void 0 || value === null) {
13564
13536
  return void 0;
@@ -13576,20 +13548,7 @@ function resolveMockConfig(target) {
13576
13548
  const response = typeof target.response === "string" ? target.response : void 0;
13577
13549
  return { response };
13578
13550
  }
13579
- function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
13580
- const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
13581
- let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
13582
- workspaceTemplateEnvVar,
13583
- env,
13584
- `${target.name} workspace template path`,
13585
- {
13586
- allowLiteral: true,
13587
- optionalEnv: true
13588
- }
13589
- ) : void 0;
13590
- if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
13591
- workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
13592
- }
13551
+ function resolveVSCodeConfig(target, env, insiders, _evalFilePath) {
13593
13552
  const executableSource = target.executable;
13594
13553
  const waitSource = target.wait;
13595
13554
  const dryRunSource = target.dry_run;
@@ -13609,7 +13568,6 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
13609
13568
  allowLiteral: true,
13610
13569
  optionalEnv: true
13611
13570
  }),
13612
- workspaceTemplate,
13613
13571
  timeoutMs
13614
13572
  };
13615
13573
  }
@@ -13891,12 +13849,12 @@ var import_node_path35 = __toESM(require("path"), 1);
13891
13849
 
13892
13850
  // src/evaluation/providers/vscode/utils/fs.ts
13893
13851
  init_cjs_shims();
13894
- var import_node_fs12 = require("fs");
13852
+ var import_node_fs13 = require("fs");
13895
13853
  var import_promises23 = require("fs/promises");
13896
13854
  var import_node_path27 = __toESM(require("path"), 1);
13897
13855
  async function pathExists(target) {
13898
13856
  try {
13899
- await (0, import_promises23.access)(target, import_node_fs12.constants.F_OK);
13857
+ await (0, import_promises23.access)(target, import_node_fs13.constants.F_OK);
13900
13858
  return true;
13901
13859
  } catch {
13902
13860
  return false;
@@ -15035,7 +14993,7 @@ var VSCodeProvider = class {
15035
14993
  await this.ensureEnvironmentReady();
15036
14994
  const inputFiles = normalizeAttachments(request.inputFiles);
15037
14995
  const promptContent = buildPromptDocument2(request, inputFiles);
15038
- const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
14996
+ const workspaceTemplate = request.workspaceFile;
15039
14997
  const startTime = Date.now();
15040
14998
  const session = await dispatchAgentSession({
15041
14999
  userQuery: promptContent,
@@ -15091,9 +15049,6 @@ var VSCodeProvider = class {
15091
15049
  const userQueries = normalizedRequests.map(
15092
15050
  ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
15093
15051
  );
15094
- const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
15095
- this.config.workspaceTemplate
15096
- );
15097
15052
  const startTime = Date.now();
15098
15053
  const session = await dispatchBatchAgent({
15099
15054
  userQueries,
@@ -15103,7 +15058,7 @@ var VSCodeProvider = class {
15103
15058
  dryRun: this.config.dryRun,
15104
15059
  vscodeCmd: this.config.executable,
15105
15060
  subagentRoot: this.config.subagentRoot,
15106
- workspaceTemplate: batchWorkspaceTemplate,
15061
+ workspaceTemplate: void 0,
15107
15062
  silent: true,
15108
15063
  timeoutMs: this.config.timeoutMs
15109
15064
  });
@@ -15183,17 +15138,6 @@ async function locateVSCodeExecutable(candidate) {
15183
15138
  `VS Code executable '${candidate}' was not found on PATH. Check the 'executable' setting in your target configuration.`
15184
15139
  );
15185
15140
  }
15186
- async function resolveWorkspaceTemplateFile(template) {
15187
- if (!template) {
15188
- return void 0;
15189
- }
15190
- try {
15191
- const stats = await (0, import_promises29.stat)(import_node_path37.default.resolve(template));
15192
- return stats.isFile() ? template : void 0;
15193
- } catch {
15194
- return template;
15195
- }
15196
- }
15197
15141
  function buildPromptDocument2(request, attachments) {
15198
15142
  const parts = [];
15199
15143
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
@@ -15356,7 +15300,7 @@ function isAgentProvider(provider) {
15356
15300
 
15357
15301
  // src/evaluation/providers/targets-file.ts
15358
15302
  init_cjs_shims();
15359
- var import_node_fs13 = require("fs");
15303
+ var import_node_fs14 = require("fs");
15360
15304
  var import_promises30 = require("fs/promises");
15361
15305
  var import_node_path38 = __toESM(require("path"), 1);
15362
15306
  var import_yaml8 = require("yaml");
@@ -15391,7 +15335,7 @@ function assertTargetDefinition(value, index, filePath) {
15391
15335
  }
15392
15336
  async function fileExists3(filePath) {
15393
15337
  try {
15394
- await (0, import_promises30.access)(filePath, import_node_fs13.constants.F_OK);
15338
+ await (0, import_promises30.access)(filePath, import_node_fs14.constants.F_OK);
15395
15339
  return true;
15396
15340
  } catch {
15397
15341
  return false;
@@ -15575,7 +15519,7 @@ function negateScore(score) {
15575
15519
  // src/evaluation/evaluators/code-evaluator.ts
15576
15520
  init_cjs_shims();
15577
15521
  var import_promises31 = require("fs/promises");
15578
- var import_node_os8 = require("os");
15522
+ var import_node_os9 = require("os");
15579
15523
  var import_node_path40 = require("path");
15580
15524
  init_exec();
15581
15525
 
@@ -15940,7 +15884,7 @@ var CodeEvaluator = class {
15940
15884
  let imageTmpDir;
15941
15885
  const getImageDir = async () => {
15942
15886
  if (!imageTmpDir) {
15943
- imageTmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os8.tmpdir)(), "agentv-img-"));
15887
+ imageTmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os9.tmpdir)(), "agentv-img-"));
15944
15888
  }
15945
15889
  return imageTmpDir;
15946
15890
  };
@@ -15953,7 +15897,7 @@ var CodeEvaluator = class {
15953
15897
  if (outputForPayload) {
15954
15898
  const serialized = JSON.stringify(outputForPayload);
15955
15899
  if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
15956
- const tmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os8.tmpdir)(), "agentv-grader-"));
15900
+ const tmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os9.tmpdir)(), "agentv-grader-"));
15957
15901
  outputPath = (0, import_node_path40.join)(tmpDir, "output.json");
15958
15902
  await (0, import_promises31.writeFile)(outputPath, serialized);
15959
15903
  outputForPayload = null;
@@ -16452,7 +16396,7 @@ ${context2.fileChanges}`;
16452
16396
  const workspacePath = context2.workspacePath;
16453
16397
  if (!workspacePath) {
16454
16398
  throw new Error(
16455
- "llm-grader built-in agent mode requires a workspace_template target (workspacePath is not set)"
16399
+ "llm-grader built-in agent mode requires a workspace (workspacePath is not set)"
16456
16400
  );
16457
16401
  }
16458
16402
  const systemPrompt = this.buildAgentSystemPrompt(context2);
@@ -17191,11 +17135,11 @@ function createFilesystemTools(workspacePath) {
17191
17135
  execute: async (input) => {
17192
17136
  try {
17193
17137
  const resolved = resolveSandboxed(workspacePath, input.path);
17194
- const stat13 = await import_promises32.default.stat(resolved);
17195
- if (stat13.isDirectory()) {
17138
+ const stat12 = await import_promises32.default.stat(resolved);
17139
+ if (stat12.isDirectory()) {
17196
17140
  return { error: `'${input.path}' is a directory, not a file` };
17197
17141
  }
17198
- const buffer = Buffer.alloc(Math.min(stat13.size, MAX_FILE_SIZE));
17142
+ const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
17199
17143
  const fd = await import_promises32.default.open(resolved, "r");
17200
17144
  try {
17201
17145
  await fd.read(buffer, 0, buffer.length, 0);
@@ -17203,8 +17147,8 @@ function createFilesystemTools(workspacePath) {
17203
17147
  await fd.close();
17204
17148
  }
17205
17149
  const content = buffer.toString("utf-8");
17206
- const truncated = stat13.size > MAX_FILE_SIZE;
17207
- return { content, truncated, size: stat13.size };
17150
+ const truncated = stat12.size > MAX_FILE_SIZE;
17151
+ return { content, truncated, size: stat12.size };
17208
17152
  } catch (error) {
17209
17153
  return { error: error instanceof Error ? error.message : String(error) };
17210
17154
  }
@@ -17255,8 +17199,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
17255
17199
  const ext = import_node_path41.default.extname(entry.name).toLowerCase();
17256
17200
  if (BINARY_EXTENSIONS.has(ext)) continue;
17257
17201
  try {
17258
- const stat13 = await import_promises32.default.stat(fullPath);
17259
- if (stat13.size > MAX_FILE_SIZE) continue;
17202
+ const stat12 = await import_promises32.default.stat(fullPath);
17203
+ if (stat12.size > MAX_FILE_SIZE) continue;
17260
17204
  const content = await import_promises32.default.readFile(fullPath, "utf-8");
17261
17205
  const lines = content.split("\n");
17262
17206
  for (let i = 0; i < lines.length; i++) {
@@ -19159,7 +19103,7 @@ function runEqualsAssertion(output, value) {
19159
19103
  init_cjs_shims();
19160
19104
  var import_node_child_process11 = require("child_process");
19161
19105
  var import_node_crypto11 = require("crypto");
19162
- var import_node_fs16 = require("fs");
19106
+ var import_node_fs17 = require("fs");
19163
19107
  var import_promises36 = require("fs/promises");
19164
19108
  var import_node_path49 = __toESM(require("path"), 1);
19165
19109
  var import_node_util7 = require("util");
@@ -20064,7 +20008,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
20064
20008
  init_cjs_shims();
20065
20009
  var import_node_child_process9 = require("child_process");
20066
20010
  var import_node_crypto10 = require("crypto");
20067
- var import_node_fs14 = require("fs");
20011
+ var import_node_fs15 = require("fs");
20068
20012
  var import_promises34 = require("fs/promises");
20069
20013
  var import_node_path46 = __toESM(require("path"), 1);
20070
20014
  var import_node_util5 = require("util");
@@ -20172,7 +20116,7 @@ var WorkspacePoolManager = class {
20172
20116
  if (!locked) {
20173
20117
  continue;
20174
20118
  }
20175
- const slotExists = (0, import_node_fs14.existsSync)(slotPath);
20119
+ const slotExists = (0, import_node_fs15.existsSync)(slotPath);
20176
20120
  if (slotExists) {
20177
20121
  await this.resetSlot(slotPath, templatePath, repos, poolReset);
20178
20122
  return {
@@ -20278,7 +20222,7 @@ var WorkspacePoolManager = class {
20278
20222
  for (const entry of entries) {
20279
20223
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
20280
20224
  const lockPath = import_node_path46.default.join(poolDir, `${entry}.lock`);
20281
- if ((0, import_node_fs14.existsSync)(lockPath)) {
20225
+ if ((0, import_node_fs15.existsSync)(lockPath)) {
20282
20226
  try {
20283
20227
  const pidStr = await (0, import_promises34.readFile)(lockPath, "utf-8");
20284
20228
  const pid = Number.parseInt(pidStr.trim(), 10);
@@ -20310,7 +20254,7 @@ var WorkspacePoolManager = class {
20310
20254
  for (const repo of repos) {
20311
20255
  if (!repo.path || !repo.source) continue;
20312
20256
  const repoDir = import_node_path46.default.join(slotPath, repo.path);
20313
- if (!(0, import_node_fs14.existsSync)(repoDir)) {
20257
+ if (!(0, import_node_fs15.existsSync)(repoDir)) {
20314
20258
  continue;
20315
20259
  }
20316
20260
  if (poolReset === "none") {
@@ -20346,7 +20290,7 @@ var WorkspacePoolManager = class {
20346
20290
  // src/evaluation/workspace/repo-manager.ts
20347
20291
  init_cjs_shims();
20348
20292
  var import_node_child_process10 = require("child_process");
20349
- var import_node_fs15 = require("fs");
20293
+ var import_node_fs16 = require("fs");
20350
20294
  var import_node_path47 = __toESM(require("path"), 1);
20351
20295
  var import_node_util6 = require("util");
20352
20296
  var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process10.execFile);
@@ -20398,7 +20342,7 @@ var RepoManager = class {
20398
20342
  resolvedSourcePath: sourcePath ?? "",
20399
20343
  reason: "empty_path"
20400
20344
  });
20401
- } else if (!(0, import_node_fs15.existsSync)(sourcePath)) {
20345
+ } else if (!(0, import_node_fs16.existsSync)(sourcePath)) {
20402
20346
  errors.push({
20403
20347
  repoPath: repo.path ?? "(none)",
20404
20348
  resolvedSourcePath: sourcePath,
@@ -20692,7 +20636,7 @@ function workspaceGitEnv() {
20692
20636
  };
20693
20637
  }
20694
20638
  async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
20695
- if (!(0, import_node_fs16.existsSync)(import_node_path49.default.join(workspacePath, ".git"))) {
20639
+ if (!(0, import_node_fs17.existsSync)(import_node_path49.default.join(workspacePath, ".git"))) {
20696
20640
  return false;
20697
20641
  }
20698
20642
  const cleanFlag = resetMode === "strict" ? "-fdx" : "-fd";
@@ -20706,13 +20650,6 @@ async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
20706
20650
  await execFileAsync3("git", ["clean", cleanFlag], opts);
20707
20651
  return true;
20708
20652
  }
20709
- function getWorkspaceTemplate(target) {
20710
- const config = target.config;
20711
- if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
20712
- return config.workspaceTemplate;
20713
- }
20714
- return void 0;
20715
- }
20716
20653
  function validateDependencyGraph(tests) {
20717
20654
  const ids = /* @__PURE__ */ new Set();
20718
20655
  for (const test of tests) {
@@ -20986,7 +20923,7 @@ async function runEvaluation(options) {
20986
20923
  }
20987
20924
  }
20988
20925
  const suiteWorkspace = filteredEvalCases[0]?.workspace;
20989
- const rawTemplate = suiteWorkspace?.template ?? getWorkspaceTemplate(target);
20926
+ const rawTemplate = suiteWorkspace?.template;
20990
20927
  const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
20991
20928
  const workspaceTemplate = resolvedTemplate?.dir;
20992
20929
  let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
@@ -21184,7 +21121,7 @@ async function runEvaluation(options) {
21184
21121
  for (const repo of suiteWorkspace.repos) {
21185
21122
  if (!repo.path || !repo.source) continue;
21186
21123
  const targetDir = import_node_path49.default.join(sharedWorkspacePath, repo.path);
21187
- if ((0, import_node_fs16.existsSync)(targetDir)) {
21124
+ if ((0, import_node_fs17.existsSync)(targetDir)) {
21188
21125
  setupLog(`reusing existing repo at: ${targetDir}`);
21189
21126
  continue;
21190
21127
  }
@@ -21274,6 +21211,54 @@ async function runEvaluation(options) {
21274
21211
  }
21275
21212
  }
21276
21213
  }
21214
+ const targetHooks = options.targetHooks;
21215
+ const targetBeforeAllHook = targetHooks?.before_all;
21216
+ if (sharedWorkspacePath && hasHookCommand(targetBeforeAllHook)) {
21217
+ const beforeAllCommand = (targetBeforeAllHook.command ?? []).join(" ");
21218
+ setupLog(`running target before_all command=${beforeAllCommand}`);
21219
+ const scriptContext = {
21220
+ workspacePath: sharedWorkspacePath,
21221
+ testId: "__target_before_all__",
21222
+ evalRunId,
21223
+ evalDir,
21224
+ workspaceFileDir: suiteWorkspace?.workspaceFileDir
21225
+ };
21226
+ try {
21227
+ await executeWorkspaceScript(
21228
+ toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
21229
+ scriptContext
21230
+ );
21231
+ setupLog("target before_all completed");
21232
+ } catch (error) {
21233
+ const message = error instanceof Error ? error.message : String(error);
21234
+ if (sharedWorkspacePath && !useStaticWorkspace) {
21235
+ await cleanupWorkspace(sharedWorkspacePath).catch(() => {
21236
+ });
21237
+ }
21238
+ throw new Error(`target before_all hook failed: ${message}`);
21239
+ }
21240
+ }
21241
+ if (availablePoolSlots.length > 0 && hasHookCommand(targetBeforeAllHook)) {
21242
+ for (const slot of availablePoolSlots) {
21243
+ setupLog(`running target before_all on pool slot ${slot.index}`);
21244
+ const scriptContext = {
21245
+ workspacePath: slot.path,
21246
+ testId: "__target_before_all__",
21247
+ evalRunId,
21248
+ evalDir,
21249
+ workspaceFileDir: suiteWorkspace?.workspaceFileDir
21250
+ };
21251
+ try {
21252
+ await executeWorkspaceScript(
21253
+ toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
21254
+ scriptContext
21255
+ );
21256
+ } catch (error) {
21257
+ const message = error instanceof Error ? error.message : String(error);
21258
+ throw new Error(`target before_all hook failed on pool slot ${slot.index}: ${message}`);
21259
+ }
21260
+ }
21261
+ }
21277
21262
  if (sharedWorkspacePath) {
21278
21263
  try {
21279
21264
  sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
@@ -21419,6 +21404,7 @@ async function runEvaluation(options) {
21419
21404
  evalDir,
21420
21405
  verbose,
21421
21406
  threshold: scoreThreshold,
21407
+ targetHooks: options.targetHooks,
21422
21408
  ...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
21423
21409
  };
21424
21410
  let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
@@ -21560,6 +21546,26 @@ async function runEvaluation(options) {
21560
21546
  }
21561
21547
  }
21562
21548
  const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
21549
+ const targetAfterAllHook = targetHooks?.after_all;
21550
+ if (afterAllWorkspaces.length > 0 && hasHookCommand(targetAfterAllHook)) {
21551
+ for (const wsPath of afterAllWorkspaces) {
21552
+ const scriptContext = {
21553
+ workspacePath: wsPath,
21554
+ testId: "__target_after_all__",
21555
+ evalRunId,
21556
+ evalDir,
21557
+ workspaceFileDir: suiteWorkspace?.workspaceFileDir
21558
+ };
21559
+ try {
21560
+ await executeWorkspaceScript(
21561
+ toScriptConfig(targetAfterAllHook, "after_all", "target hooks"),
21562
+ scriptContext,
21563
+ "warn"
21564
+ );
21565
+ } catch {
21566
+ }
21567
+ }
21568
+ }
21563
21569
  const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
21564
21570
  if (afterAllWorkspaces.length > 0 && suiteHooksEnabled && hasHookCommand(suiteAfterAllHook)) {
21565
21571
  const afterAllHook = suiteAfterAllHook;
@@ -21822,7 +21828,7 @@ async function runEvalCase(options) {
21822
21828
  let caseWorkspaceFile;
21823
21829
  const caseHooksEnabled = hooksEnabled(evalCase.workspace);
21824
21830
  if (!workspacePath) {
21825
- const rawCaseTemplate = evalCase.workspace?.template ?? getWorkspaceTemplate(target);
21831
+ const rawCaseTemplate = evalCase.workspace?.template;
21826
21832
  const resolvedCaseTemplate = await resolveWorkspaceTemplate(rawCaseTemplate);
21827
21833
  const caseWorkspaceTemplate = resolvedCaseTemplate?.dir;
21828
21834
  caseWorkspaceFile = resolvedCaseTemplate?.workspaceFile;
@@ -22040,6 +22046,38 @@ async function runEvalCase(options) {
22040
22046
  );
22041
22047
  }
22042
22048
  }
22049
+ const targetBeforeEachHook = options.targetHooks?.before_each;
22050
+ if (workspacePath && hasHookCommand(targetBeforeEachHook)) {
22051
+ const scriptContext = {
22052
+ workspacePath,
22053
+ testId: evalCase.id,
22054
+ evalRunId: evalRunId ?? "",
22055
+ caseInput: evalCase.question,
22056
+ caseMetadata: evalCase.metadata,
22057
+ evalDir,
22058
+ workspaceFileDir: evalCase.workspace?.workspaceFileDir
22059
+ };
22060
+ try {
22061
+ await executeWorkspaceScript(
22062
+ toScriptConfig(targetBeforeEachHook, "before_each", `target hook for '${evalCase.id}'`),
22063
+ scriptContext
22064
+ );
22065
+ beforeEachNeedsFreshBaseline = true;
22066
+ } catch (error) {
22067
+ const message = error instanceof Error ? error.message : String(error);
22068
+ return buildErrorResult(
22069
+ evalCase,
22070
+ target.name,
22071
+ nowFn(),
22072
+ new Error(`target before_each hook failed: ${message}`),
22073
+ promptInputs,
22074
+ provider,
22075
+ "setup",
22076
+ "script_error",
22077
+ verbose
22078
+ );
22079
+ }
22080
+ }
22043
22081
  let baselineCommit = beforeEachNeedsFreshBaseline ? void 0 : sharedBaselineCommit;
22044
22082
  if (!baselineCommit && workspacePath) {
22045
22083
  try {
@@ -22194,6 +22232,26 @@ async function runEvalCase(options) {
22194
22232
  ${providerFileChanges}` : providerFileChanges;
22195
22233
  }
22196
22234
  const providerError = extractProviderError(providerResponse);
22235
+ const targetAfterEachHook = options.targetHooks?.after_each;
22236
+ if (workspacePath && hasHookCommand(targetAfterEachHook)) {
22237
+ const scriptContext = {
22238
+ workspacePath,
22239
+ testId: evalCase.id,
22240
+ evalRunId: evalRunId ?? "",
22241
+ caseInput: evalCase.question,
22242
+ caseMetadata: evalCase.metadata,
22243
+ evalDir,
22244
+ workspaceFileDir: evalCase.workspace?.workspaceFileDir
22245
+ };
22246
+ try {
22247
+ await executeWorkspaceScript(
22248
+ toScriptConfig(targetAfterEachHook, "after_each", `target hook for '${evalCase.id}'`),
22249
+ scriptContext,
22250
+ "warn"
22251
+ );
22252
+ } catch {
22253
+ }
22254
+ }
22197
22255
  if (caseHooksEnabled && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none") {
22198
22256
  try {
22199
22257
  if (repoManager && evalCase.workspace.repos?.length) {
@@ -23324,7 +23382,7 @@ function computeWeightedMean(entries) {
23324
23382
 
23325
23383
  // src/evaluation/evaluate.ts
23326
23384
  init_cjs_shims();
23327
- var import_node_fs17 = require("fs");
23385
+ var import_node_fs18 = require("fs");
23328
23386
  var import_node_path50 = __toESM(require("path"), 1);
23329
23387
 
23330
23388
  // src/evaluation/providers/function-provider.ts
@@ -23482,7 +23540,7 @@ async function discoverDefaultTarget(repoRoot) {
23482
23540
  for (const dir of chain) {
23483
23541
  for (const candidate of TARGET_FILE_CANDIDATES) {
23484
23542
  const targetsPath = import_node_path50.default.join(dir, candidate);
23485
- if (!(0, import_node_fs17.existsSync)(targetsPath)) continue;
23543
+ if (!(0, import_node_fs18.existsSync)(targetsPath)) continue;
23486
23544
  try {
23487
23545
  const definitions = await readTargetDefinitions(targetsPath);
23488
23546
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -23494,16 +23552,16 @@ async function discoverDefaultTarget(repoRoot) {
23494
23552
  return null;
23495
23553
  }
23496
23554
  async function loadEnvHierarchy(repoRoot, startPath) {
23497
- const { readFileSync: readFileSync5 } = await import("fs");
23555
+ const { readFileSync: readFileSync6 } = await import("fs");
23498
23556
  const chain = buildDirectoryChain2(startPath, repoRoot);
23499
23557
  const envFiles = [];
23500
23558
  for (const dir of chain) {
23501
23559
  const envPath = import_node_path50.default.join(dir, ".env");
23502
- if ((0, import_node_fs17.existsSync)(envPath)) envFiles.push(envPath);
23560
+ if ((0, import_node_fs18.existsSync)(envPath)) envFiles.push(envPath);
23503
23561
  }
23504
23562
  for (let i = 0; i < envFiles.length; i++) {
23505
23563
  try {
23506
- const content = readFileSync5(envFiles[i], "utf8");
23564
+ const content = readFileSync6(envFiles[i], "utf8");
23507
23565
  for (const line of content.split("\n")) {
23508
23566
  const trimmed = line.trim();
23509
23567
  if (!trimmed || trimmed.startsWith("#")) continue;
@@ -23576,12 +23634,12 @@ var CONFIG_FILE_NAMES = [
23576
23634
  ".agentv/config.js"
23577
23635
  ];
23578
23636
  async function loadTsConfig(projectRoot) {
23579
- const { existsSync: existsSync9 } = await import("fs");
23637
+ const { existsSync: existsSync10 } = await import("fs");
23580
23638
  const { pathToFileURL: pathToFileURL2 } = await import("url");
23581
23639
  const { join: join2 } = await import("path");
23582
23640
  for (const fileName of CONFIG_FILE_NAMES) {
23583
23641
  const filePath = join2(projectRoot, fileName);
23584
- if (!existsSync9(filePath)) {
23642
+ if (!existsSync10(filePath)) {
23585
23643
  continue;
23586
23644
  }
23587
23645
  try {
@@ -23830,9 +23888,9 @@ function shouldSkipCacheForTemperature(targetConfig) {
23830
23888
  // src/evaluation/results-repo.ts
23831
23889
  init_cjs_shims();
23832
23890
  var import_node_child_process12 = require("child_process");
23833
- var import_node_fs18 = require("fs");
23891
+ var import_node_fs19 = require("fs");
23834
23892
  var import_promises39 = require("fs/promises");
23835
- var import_node_os9 = __toESM(require("os"), 1);
23893
+ var import_node_os10 = __toESM(require("os"), 1);
23836
23894
  var import_node_path53 = __toESM(require("path"), 1);
23837
23895
  var import_node_util8 = require("util");
23838
23896
  var execFileAsync4 = (0, import_node_util8.promisify)(import_node_child_process12.execFile);
@@ -23870,18 +23928,18 @@ function getResultsRepoCachePaths(repo) {
23870
23928
  };
23871
23929
  }
23872
23930
  function readPersistedStatus(statusFile) {
23873
- if (!(0, import_node_fs18.existsSync)(statusFile)) {
23931
+ if (!(0, import_node_fs19.existsSync)(statusFile)) {
23874
23932
  return {};
23875
23933
  }
23876
23934
  try {
23877
- return JSON.parse((0, import_node_fs18.readFileSync)(statusFile, "utf8"));
23935
+ return JSON.parse((0, import_node_fs19.readFileSync)(statusFile, "utf8"));
23878
23936
  } catch {
23879
23937
  return {};
23880
23938
  }
23881
23939
  }
23882
23940
  function writePersistedStatus(statusFile, status) {
23883
- (0, import_node_fs18.mkdirSync)(import_node_path53.default.dirname(statusFile), { recursive: true });
23884
- (0, import_node_fs18.writeFileSync)(statusFile, `${JSON.stringify(status, null, 2)}
23941
+ (0, import_node_fs19.mkdirSync)(import_node_path53.default.dirname(statusFile), { recursive: true });
23942
+ (0, import_node_fs19.writeFileSync)(statusFile, `${JSON.stringify(status, null, 2)}
23885
23943
  `, "utf8");
23886
23944
  }
23887
23945
  async function runCommand(executable, args, options) {
@@ -23943,8 +24001,8 @@ function updateStatusFile(config, patch) {
23943
24001
  async function ensureResultsRepoClone(config) {
23944
24002
  const normalized = normalizeResultsExportConfig(config);
23945
24003
  const cachePaths = getResultsRepoCachePaths(normalized.repo);
23946
- (0, import_node_fs18.mkdirSync)(cachePaths.rootDir, { recursive: true });
23947
- if (!(0, import_node_fs18.existsSync)(cachePaths.repoDir)) {
24004
+ (0, import_node_fs19.mkdirSync)(cachePaths.rootDir, { recursive: true });
24005
+ if (!(0, import_node_fs19.existsSync)(cachePaths.repoDir)) {
23948
24006
  try {
23949
24007
  await runGit([
23950
24008
  "clone",
@@ -23958,7 +24016,7 @@ async function ensureResultsRepoClone(config) {
23958
24016
  throw withFriendlyGitHubAuthError(error);
23959
24017
  }
23960
24018
  }
23961
- if (!(0, import_node_fs18.existsSync)(import_node_path53.default.join(cachePaths.repoDir, ".git"))) {
24019
+ if (!(0, import_node_fs19.existsSync)(import_node_path53.default.join(cachePaths.repoDir, ".git"))) {
23962
24020
  throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
23963
24021
  }
23964
24022
  return cachePaths.repoDir;
@@ -23977,7 +24035,7 @@ function getResultsRepoStatus(config) {
23977
24035
  const persisted = readPersistedStatus(cachePaths.statusFile);
23978
24036
  return {
23979
24037
  configured: true,
23980
- available: (0, import_node_fs18.existsSync)(cachePaths.repoDir),
24038
+ available: (0, import_node_fs19.existsSync)(cachePaths.repoDir),
23981
24039
  repo: normalized.repo,
23982
24040
  path: normalized.path,
23983
24041
  auto_push: normalized.auto_push,
@@ -24023,7 +24081,7 @@ async function prepareResultsRepoBranch(config, branchName) {
24023
24081
  const cloneDir = await ensureResultsRepoClone(normalized);
24024
24082
  const baseBranch = await resolveDefaultBranch(cloneDir);
24025
24083
  await updateCacheRepo(cloneDir, baseBranch);
24026
- const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path53.default.join(import_node_os9.default.tmpdir(), "agentv-results-repo-"));
24084
+ const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path53.default.join(import_node_os10.default.tmpdir(), "agentv-results-repo-"));
24027
24085
  const worktreeDir = import_node_path53.default.join(worktreeRoot, "repo");
24028
24086
  await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
24029
24087
  cwd: cloneDir
@@ -24042,8 +24100,8 @@ async function prepareResultsRepoBranch(config, branchName) {
24042
24100
  };
24043
24101
  }
24044
24102
  async function stageResultsArtifacts(params) {
24045
- (0, import_node_fs18.rmSync)(params.destinationDir, { recursive: true, force: true });
24046
- (0, import_node_fs18.mkdirSync)(import_node_path53.default.dirname(params.destinationDir), { recursive: true });
24103
+ (0, import_node_fs19.rmSync)(params.destinationDir, { recursive: true, force: true });
24104
+ (0, import_node_fs19.mkdirSync)(import_node_path53.default.dirname(params.destinationDir), { recursive: true });
24047
24105
  await (0, import_promises39.cp)(params.sourceDir, params.destinationDir, { recursive: true });
24048
24106
  }
24049
24107
  function resolveResultsRepoRunsDir(config) {
@@ -24111,19 +24169,31 @@ async function createDraftResultsPr(params) {
24111
24169
 
24112
24170
  // src/benchmarks.ts
24113
24171
  init_cjs_shims();
24114
- var import_node_fs19 = require("fs");
24172
+ var import_node_fs20 = require("fs");
24115
24173
  var import_node_path54 = __toESM(require("path"), 1);
24116
24174
  var import_yaml10 = require("yaml");
24117
24175
  function getBenchmarksRegistryPath() {
24118
- return import_node_path54.default.join(getAgentvHome(), "projects.yaml");
24176
+ return import_node_path54.default.join(getAgentvConfigDir(), "projects.yaml");
24177
+ }
24178
+ function migrateProjectsYaml(targetPath) {
24179
+ const dataHome = getAgentvHome();
24180
+ const configDir = getAgentvConfigDir();
24181
+ if (dataHome === configDir) return;
24182
+ const legacyPath = import_node_path54.default.join(dataHome, "projects.yaml");
24183
+ if (!(0, import_node_fs20.existsSync)(legacyPath)) return;
24184
+ (0, import_node_fs20.mkdirSync)(import_node_path54.default.dirname(targetPath), { recursive: true });
24185
+ (0, import_node_fs20.copyFileSync)(legacyPath, targetPath);
24119
24186
  }
24120
24187
  function loadBenchmarkRegistry() {
24121
24188
  const registryPath = getBenchmarksRegistryPath();
24122
- if (!(0, import_node_fs19.existsSync)(registryPath)) {
24189
+ if (!(0, import_node_fs20.existsSync)(registryPath)) {
24190
+ migrateProjectsYaml(registryPath);
24191
+ }
24192
+ if (!(0, import_node_fs20.existsSync)(registryPath)) {
24123
24193
  return { benchmarks: [] };
24124
24194
  }
24125
24195
  try {
24126
- const raw = (0, import_node_fs19.readFileSync)(registryPath, "utf-8");
24196
+ const raw = (0, import_node_fs20.readFileSync)(registryPath, "utf-8");
24127
24197
  const parsed = (0, import_yaml10.parse)(raw);
24128
24198
  if (!parsed || !Array.isArray(parsed.benchmarks)) {
24129
24199
  return { benchmarks: [] };
@@ -24136,10 +24206,10 @@ function loadBenchmarkRegistry() {
24136
24206
  function saveBenchmarkRegistry(registry) {
24137
24207
  const registryPath = getBenchmarksRegistryPath();
24138
24208
  const dir = import_node_path54.default.dirname(registryPath);
24139
- if (!(0, import_node_fs19.existsSync)(dir)) {
24140
- (0, import_node_fs19.mkdirSync)(dir, { recursive: true });
24209
+ if (!(0, import_node_fs20.existsSync)(dir)) {
24210
+ (0, import_node_fs20.mkdirSync)(dir, { recursive: true });
24141
24211
  }
24142
- (0, import_node_fs19.writeFileSync)(registryPath, (0, import_yaml10.stringify)({ benchmarks: registry.benchmarks }), "utf-8");
24212
+ (0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml10.stringify)({ benchmarks: registry.benchmarks }), "utf-8");
24143
24213
  }
24144
24214
  function deriveBenchmarkId(dirPath, existingIds) {
24145
24215
  const base = import_node_path54.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
@@ -24153,10 +24223,10 @@ function deriveBenchmarkId(dirPath, existingIds) {
24153
24223
  }
24154
24224
  function addBenchmark(benchmarkPath) {
24155
24225
  const absPath = import_node_path54.default.resolve(benchmarkPath);
24156
- if (!(0, import_node_fs19.existsSync)(absPath)) {
24226
+ if (!(0, import_node_fs20.existsSync)(absPath)) {
24157
24227
  throw new Error(`Directory not found: ${absPath}`);
24158
24228
  }
24159
- if (!(0, import_node_fs19.existsSync)(import_node_path54.default.join(absPath, ".agentv"))) {
24229
+ if (!(0, import_node_fs20.existsSync)(import_node_path54.default.join(absPath, ".agentv"))) {
24160
24230
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
24161
24231
  }
24162
24232
  const registry = loadBenchmarkRegistry();
@@ -24200,19 +24270,19 @@ function touchBenchmark(benchmarkId) {
24200
24270
  }
24201
24271
  function discoverBenchmarks(rootDir, maxDepth = 2) {
24202
24272
  const absRoot = import_node_path54.default.resolve(rootDir);
24203
- if (!(0, import_node_fs19.existsSync)(absRoot) || !(0, import_node_fs19.statSync)(absRoot).isDirectory()) {
24273
+ if (!(0, import_node_fs20.existsSync)(absRoot) || !(0, import_node_fs20.statSync)(absRoot).isDirectory()) {
24204
24274
  return [];
24205
24275
  }
24206
24276
  const results = [];
24207
24277
  function scan(dir, depth) {
24208
24278
  if (depth > maxDepth) return;
24209
- if ((0, import_node_fs19.existsSync)(import_node_path54.default.join(dir, ".agentv"))) {
24279
+ if ((0, import_node_fs20.existsSync)(import_node_path54.default.join(dir, ".agentv"))) {
24210
24280
  results.push(dir);
24211
24281
  return;
24212
24282
  }
24213
24283
  if (depth === maxDepth) return;
24214
24284
  try {
24215
- const entries = (0, import_node_fs19.readdirSync)(dir, { withFileTypes: true });
24285
+ const entries = (0, import_node_fs20.readdirSync)(dir, { withFileTypes: true });
24216
24286
  for (const entry of entries) {
24217
24287
  if (!entry.isDirectory()) continue;
24218
24288
  if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
@@ -25153,9 +25223,9 @@ function extractResponseItemContent(content) {
25153
25223
  // src/import/codex-session-discovery.ts
25154
25224
  init_cjs_shims();
25155
25225
  var import_promises41 = require("fs/promises");
25156
- var import_node_os10 = require("os");
25226
+ var import_node_os11 = require("os");
25157
25227
  var import_node_path56 = __toESM(require("path"), 1);
25158
- var DEFAULT_SESSIONS_DIR = () => import_node_path56.default.join((0, import_node_os10.homedir)(), ".codex", "sessions");
25228
+ var DEFAULT_SESSIONS_DIR = () => import_node_path56.default.join((0, import_node_os11.homedir)(), ".codex", "sessions");
25159
25229
  async function discoverCodexSessions(opts) {
25160
25230
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
25161
25231
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
@@ -25219,9 +25289,9 @@ async function discoverCodexSessions(opts) {
25219
25289
  // src/import/session-discovery.ts
25220
25290
  init_cjs_shims();
25221
25291
  var import_promises42 = require("fs/promises");
25222
- var import_node_os11 = require("os");
25292
+ var import_node_os12 = require("os");
25223
25293
  var import_node_path57 = __toESM(require("path"), 1);
25224
- var DEFAULT_PROJECTS_DIR = () => import_node_path57.default.join((0, import_node_os11.homedir)(), ".claude", "projects");
25294
+ var DEFAULT_PROJECTS_DIR = () => import_node_path57.default.join((0, import_node_os12.homedir)(), ".claude", "projects");
25225
25295
  function encodeProjectPath(projectPath) {
25226
25296
  return projectPath.replace(/\//g, "-");
25227
25297
  }
@@ -25515,6 +25585,7 @@ function createAgentKernel() {
25515
25585
  extractJsonBlob,
25516
25586
  extractLastAssistantContent,
25517
25587
  extractTargetFromSuite,
25588
+ extractTargetRefsFromSuite,
25518
25589
  extractTargetsFromSuite,
25519
25590
  extractTargetsFromTestCase,
25520
25591
  extractThreshold,
@@ -25524,6 +25595,7 @@ function createAgentKernel() {
25524
25595
  findGitRoot,
25525
25596
  freeformEvaluationSchema,
25526
25597
  generateRubrics,
25598
+ getAgentvConfigDir,
25527
25599
  getAgentvHome,
25528
25600
  getBenchmark,
25529
25601
  getBenchmarksRegistryPath,