@agentv/core 4.13.0 → 4.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -25,7 +25,7 @@ import {
25
25
  resolveDelegatedTargetDefinition,
26
26
  resolveFileReference,
27
27
  resolveTargetDefinition
28
- } from "./chunk-SWLNU3I6.js";
28
+ } from "./chunk-A3HYVKTI.js";
29
29
  import {
30
30
  execFileWithStdin,
31
31
  execShellWithStdin
@@ -3568,7 +3568,11 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
3568
3568
  if (format === "agent-skills-json") {
3569
3569
  return { tests: await loadTestsFromAgentSkills(evalFilePath) };
3570
3570
  }
3571
- const { tests, parsed } = await loadTestsFromYaml(evalFilePath, repoRoot, options);
3571
+ const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
3572
+ evalFilePath,
3573
+ repoRoot,
3574
+ options
3575
+ );
3572
3576
  const metadata = parseMetadata(parsed);
3573
3577
  const failOnError = extractFailOnError(parsed);
3574
3578
  const threshold = extractThreshold(parsed);
@@ -3581,7 +3585,8 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
3581
3585
  totalBudgetUsd: extractTotalBudgetUsd(parsed),
3582
3586
  ...metadata !== void 0 && { metadata },
3583
3587
  ...failOnError !== void 0 && { failOnError },
3584
- ...threshold !== void 0 && { threshold }
3588
+ ...threshold !== void 0 && { threshold },
3589
+ ...suiteWorkspacePath !== void 0 && { workspacePath: suiteWorkspacePath }
3585
3590
  };
3586
3591
  }
3587
3592
  var loadEvalSuite = loadTestSuite;
@@ -3743,6 +3748,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3743
3748
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
3744
3749
  const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
3745
3750
  const caseTargets = extractTargetsFromTestCase(testCaseConfig);
3751
+ const dependsOn = Array.isArray(testCaseConfig.depends_on) ? testCaseConfig.depends_on.filter(
3752
+ (v) => typeof v === "string"
3753
+ ) : void 0;
3754
+ const onDependencyFailureRaw = asString5(testCaseConfig.on_dependency_failure);
3755
+ const onDependencyFailure = onDependencyFailureRaw === "skip" || onDependencyFailureRaw === "fail" || onDependencyFailureRaw === "run" ? onDependencyFailureRaw : void 0;
3746
3756
  const testCase = {
3747
3757
  id,
3748
3758
  suite: suiteName,
@@ -3760,11 +3770,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3760
3770
  workspace: mergedWorkspace,
3761
3771
  metadata,
3762
3772
  targets: caseTargets,
3763
- ...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
3773
+ ...caseThreshold !== void 0 ? { threshold: caseThreshold } : {},
3774
+ ...dependsOn && dependsOn.length > 0 ? { depends_on: dependsOn } : {},
3775
+ ...onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {}
3764
3776
  };
3765
3777
  results.push(testCase);
3766
3778
  }
3767
- return { tests: results, parsed: suite };
3779
+ return { tests: results, parsed: suite, suiteWorkspacePath: suiteWorkspace?.path };
3768
3780
  }
3769
3781
  async function loadTestById(evalFilePath, repoRoot, evalId) {
3770
3782
  const tests = await loadTests(evalFilePath, repoRoot);
@@ -6612,11 +6624,123 @@ function formatElapsed3(startedAt) {
6612
6624
  // src/evaluation/providers/copilot-cli.ts
6613
6625
  import { randomUUID as randomUUID5 } from "node:crypto";
6614
6626
  import { mkdir as mkdir4 } from "node:fs/promises";
6615
- import path16 from "node:path";
6627
+ import { homedir as homedir2 } from "node:os";
6628
+ import path17 from "node:path";
6616
6629
  import { Readable, Writable } from "node:stream";
6617
6630
  import { spawn as spawn2 } from "node:child_process";
6618
6631
  import * as acp from "@agentclientprotocol/sdk";
6619
6632
 
6633
+ // src/evaluation/workspace/file-changes.ts
6634
+ import { exec as execCallback } from "node:child_process";
6635
+ import { readdirSync, statSync } from "node:fs";
6636
+ import { readFile as readFile9, readdir, stat } from "node:fs/promises";
6637
+ import path15 from "node:path";
6638
+ import { promisify as promisify2 } from "node:util";
6639
+ var execAsync2 = promisify2(execCallback);
6640
+ var SNAPSHOT_MAX_FILE_BYTES = 512 * 1024;
6641
+ var SNAPSHOT_EXCLUDE_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".agentv", "__pycache__"]);
6642
+ function gitExecOpts(workspacePath) {
6643
+ const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
6644
+ return { cwd: workspacePath, env };
6645
+ }
6646
+ async function initializeBaseline(workspacePath) {
6647
+ const opts = gitExecOpts(workspacePath);
6648
+ await execAsync2("git init", opts);
6649
+ await execAsync2("git add -A", opts);
6650
+ await execAsync2(
6651
+ 'git -c user.email=agentv@localhost -c user.name=agentv commit --allow-empty -m "agentv-baseline"',
6652
+ opts
6653
+ );
6654
+ const { stdout } = await execAsync2("git rev-parse HEAD", opts);
6655
+ return stdout.trim();
6656
+ }
6657
+ async function captureFileChanges(workspacePath, baselineCommit) {
6658
+ const opts = gitExecOpts(workspacePath);
6659
+ await stageNestedRepoChanges(workspacePath);
6660
+ await execAsync2("git add -A", opts);
6661
+ const { stdout } = await execAsync2(`git diff ${baselineCommit} --submodule=diff`, opts);
6662
+ return stdout.trim();
6663
+ }
6664
+ async function stageNestedRepoChanges(workspacePath) {
6665
+ let entries;
6666
+ try {
6667
+ entries = readdirSync(workspacePath);
6668
+ } catch {
6669
+ return;
6670
+ }
6671
+ for (const entry of entries) {
6672
+ if (entry === ".git" || entry === "node_modules") continue;
6673
+ const childPath = path15.join(workspacePath, entry);
6674
+ try {
6675
+ if (!statSync(childPath).isDirectory()) continue;
6676
+ if (!statSync(path15.join(childPath, ".git")).isDirectory()) continue;
6677
+ } catch {
6678
+ continue;
6679
+ }
6680
+ const childOpts = gitExecOpts(childPath);
6681
+ await execAsync2("git add -A", childOpts);
6682
+ }
6683
+ }
6684
+ async function captureSnapshot(dir) {
6685
+ const snapshot = /* @__PURE__ */ new Map();
6686
+ await walkDir(dir, dir, snapshot);
6687
+ return snapshot;
6688
+ }
6689
+ async function walkDir(rootDir, currentDir, snapshot) {
6690
+ let entries;
6691
+ try {
6692
+ entries = await readdir(currentDir);
6693
+ } catch {
6694
+ return;
6695
+ }
6696
+ for (const entry of entries) {
6697
+ if (SNAPSHOT_EXCLUDE_DIRS.has(entry)) continue;
6698
+ const fullPath = path15.join(currentDir, entry);
6699
+ let fileStat;
6700
+ try {
6701
+ fileStat = await stat(fullPath);
6702
+ } catch {
6703
+ continue;
6704
+ }
6705
+ if (fileStat.isDirectory()) {
6706
+ await walkDir(rootDir, fullPath, snapshot);
6707
+ } else if (fileStat.isFile()) {
6708
+ if (fileStat.size > SNAPSHOT_MAX_FILE_BYTES) continue;
6709
+ let content;
6710
+ try {
6711
+ content = await readFile9(fullPath, "utf8");
6712
+ if (content.includes("\0")) continue;
6713
+ } catch {
6714
+ continue;
6715
+ }
6716
+ const relativePath = path15.relative(rootDir, fullPath).replace(/\\/g, "/");
6717
+ snapshot.set(relativePath, content);
6718
+ }
6719
+ }
6720
+ }
6721
+ function generateNewFileDiff(relativePath, content) {
6722
+ const lines = content.endsWith("\n") ? content.slice(0, -1).split("\n") : content.split("\n");
6723
+ const addedLines = lines.map((l) => `+${l}`).join("\n");
6724
+ return [
6725
+ `diff --git a/${relativePath} b/${relativePath}`,
6726
+ "new file mode 100644",
6727
+ "--- /dev/null",
6728
+ `+++ b/${relativePath}`,
6729
+ `@@ -0,0 +1,${lines.length} @@`,
6730
+ addedLines
6731
+ ].join("\n");
6732
+ }
6733
+ async function captureSessionArtifacts(filesDir, pathPrefix = "") {
6734
+ const snapshot = await captureSnapshot(filesDir).catch(() => void 0);
6735
+ if (!snapshot || snapshot.size === 0) return void 0;
6736
+ const parts = [];
6737
+ for (const [relPath, content] of snapshot) {
6738
+ const displayPath = pathPrefix ? `${pathPrefix}/${relPath}` : relPath;
6739
+ parts.push(generateNewFileDiff(displayPath, content));
6740
+ }
6741
+ return parts.join("\n");
6742
+ }
6743
+
6620
6744
  // src/evaluation/providers/copilot-cli-log-tracker.ts
6621
6745
  var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.copilotCliLogs");
6622
6746
  var GLOBAL_SUBSCRIBERS_KEY3 = Symbol.for("agentv.copilotCliLogSubscribers");
@@ -6672,9 +6796,9 @@ function subscribeToCopilotCliLogEntries(listener) {
6672
6796
 
6673
6797
  // src/evaluation/providers/copilot-utils.ts
6674
6798
  import { randomUUID as randomUUID4 } from "node:crypto";
6675
- import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
6799
+ import { createWriteStream as createWriteStream4, existsSync, readdirSync as readdirSync2 } from "node:fs";
6676
6800
  import { arch, homedir, platform } from "node:os";
6677
- import path15 from "node:path";
6801
+ import path16 from "node:path";
6678
6802
  import { fileURLToPath as fileURLToPath3 } from "node:url";
6679
6803
  function resolvePlatformCliPath() {
6680
6804
  const os4 = platform();
@@ -6698,7 +6822,7 @@ function resolvePlatformCliPath() {
6698
6822
  try {
6699
6823
  const resolved = import.meta.resolve(`${packageName}/package.json`);
6700
6824
  const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath3(resolved) : resolved;
6701
- const binaryPath = path15.join(path15.dirname(packageJsonPath), binaryName);
6825
+ const binaryPath = path16.join(path16.dirname(packageJsonPath), binaryName);
6702
6826
  if (existsSync(binaryPath)) {
6703
6827
  return binaryPath;
6704
6828
  }
@@ -6706,7 +6830,7 @@ function resolvePlatformCliPath() {
6706
6830
  }
6707
6831
  let searchDir = process.cwd();
6708
6832
  for (let i = 0; i < 10; i++) {
6709
- const standardPath = path15.join(
6833
+ const standardPath = path16.join(
6710
6834
  searchDir,
6711
6835
  "node_modules",
6712
6836
  ...packageName.split("/"),
@@ -6715,13 +6839,13 @@ function resolvePlatformCliPath() {
6715
6839
  if (existsSync(standardPath)) {
6716
6840
  return standardPath;
6717
6841
  }
6718
- const bunDir = path15.join(searchDir, "node_modules", ".bun");
6842
+ const bunDir = path16.join(searchDir, "node_modules", ".bun");
6719
6843
  const prefix = `@github+copilot-${osPart}-${archPart}@`;
6720
6844
  try {
6721
- const entries = readdirSync(bunDir);
6845
+ const entries = readdirSync2(bunDir);
6722
6846
  for (const entry of entries) {
6723
6847
  if (entry.startsWith(prefix)) {
6724
- const candidate = path15.join(
6848
+ const candidate = path16.join(
6725
6849
  bunDir,
6726
6850
  entry,
6727
6851
  "node_modules",
@@ -6736,16 +6860,16 @@ function resolvePlatformCliPath() {
6736
6860
  }
6737
6861
  } catch {
6738
6862
  }
6739
- const parent = path15.dirname(searchDir);
6863
+ const parent = path16.dirname(searchDir);
6740
6864
  if (parent === searchDir) break;
6741
6865
  searchDir = parent;
6742
6866
  }
6743
6867
  for (const root of globalNpmRoots()) {
6744
- const hoisted = path15.join(root, "@github", `copilot-${osPart}-${archPart}`, binaryName);
6868
+ const hoisted = path16.join(root, "@github", `copilot-${osPart}-${archPart}`, binaryName);
6745
6869
  if (existsSync(hoisted)) {
6746
6870
  return hoisted;
6747
6871
  }
6748
- const nested = path15.join(
6872
+ const nested = path16.join(
6749
6873
  root,
6750
6874
  "@github",
6751
6875
  "copilot",
@@ -6766,20 +6890,20 @@ function globalNpmRoots() {
6766
6890
  const home = homedir();
6767
6891
  if (os4 === "win32") {
6768
6892
  if (process.env.APPDATA) {
6769
- roots.push(path15.join(process.env.APPDATA, "npm", "node_modules"));
6893
+ roots.push(path16.join(process.env.APPDATA, "npm", "node_modules"));
6770
6894
  }
6771
- roots.push(path15.join(home, "AppData", "Roaming", "npm", "node_modules"));
6895
+ roots.push(path16.join(home, "AppData", "Roaming", "npm", "node_modules"));
6772
6896
  } else {
6773
6897
  roots.push("/opt/homebrew/lib/node_modules");
6774
6898
  roots.push("/usr/local/lib/node_modules");
6775
6899
  roots.push("/usr/lib/node_modules");
6776
- roots.push(path15.join(home, ".npm-global", "lib", "node_modules"));
6777
- roots.push(path15.join(home, ".local", "lib", "node_modules"));
6900
+ roots.push(path16.join(home, ".npm-global", "lib", "node_modules"));
6901
+ roots.push(path16.join(home, ".local", "lib", "node_modules"));
6778
6902
  }
6779
6903
  if (process.env.npm_config_prefix) {
6780
6904
  const prefix = process.env.npm_config_prefix;
6781
6905
  roots.push(
6782
- os4 === "win32" ? path15.join(prefix, "node_modules") : path15.join(prefix, "lib", "node_modules")
6906
+ os4 === "win32" ? path16.join(prefix, "node_modules") : path16.join(prefix, "lib", "node_modules")
6783
6907
  );
6784
6908
  }
6785
6909
  return Array.from(new Set(roots));
@@ -6826,14 +6950,22 @@ var CopilotStreamLogger = class _CopilotStreamLogger {
6826
6950
  startedAt = Date.now();
6827
6951
  format;
6828
6952
  summarize;
6829
- constructor(filePath, format, summarize) {
6953
+ chunkExtractor;
6954
+ pendingText = "";
6955
+ constructor(filePath, format, summarize, chunkExtractor) {
6830
6956
  this.filePath = filePath;
6831
6957
  this.format = format;
6832
6958
  this.summarize = summarize;
6959
+ this.chunkExtractor = chunkExtractor;
6833
6960
  this.stream = createWriteStream4(filePath, { flags: "a" });
6834
6961
  }
6835
6962
  static async create(options, summarize) {
6836
- const logger = new _CopilotStreamLogger(options.filePath, options.format, summarize);
6963
+ const logger = new _CopilotStreamLogger(
6964
+ options.filePath,
6965
+ options.format,
6966
+ summarize,
6967
+ options.chunkExtractor
6968
+ );
6837
6969
  const header = [
6838
6970
  `# ${options.headerLabel} stream log`,
6839
6971
  `# target: ${options.targetName}`,
@@ -6849,19 +6981,42 @@ var CopilotStreamLogger = class _CopilotStreamLogger {
6849
6981
  return logger;
6850
6982
  }
6851
6983
  handleEvent(eventType, data) {
6852
- const elapsed = formatElapsed4(this.startedAt);
6853
6984
  if (this.format === "json") {
6854
- this.stream.write(`${JSON.stringify({ time: elapsed, event: eventType, data })}
6855
- `);
6856
- } else {
6857
- const summary = this.summarize(eventType, data);
6858
- if (summary) {
6859
- this.stream.write(`[+${elapsed}] [${eventType}] ${summary}
6985
+ const elapsed2 = formatElapsed4(this.startedAt);
6986
+ this.stream.write(`${JSON.stringify({ time: elapsed2, event: eventType, data })}
6860
6987
  `);
6988
+ return;
6989
+ }
6990
+ if (this.chunkExtractor) {
6991
+ const chunkText = this.chunkExtractor(eventType, data);
6992
+ if (chunkText === null) {
6993
+ this.pendingText = "";
6994
+ return;
6995
+ }
6996
+ if (chunkText !== void 0) {
6997
+ this.pendingText += chunkText;
6998
+ return;
6861
6999
  }
7000
+ this.flushPendingText();
7001
+ }
7002
+ const elapsed = formatElapsed4(this.startedAt);
7003
+ const summary = this.summarize(eventType, data);
7004
+ if (summary) {
7005
+ this.stream.write(`[+${elapsed}] [${eventType}] ${summary}
7006
+ `);
6862
7007
  }
6863
7008
  }
7009
+ flushPendingText() {
7010
+ if (!this.pendingText) return;
7011
+ const elapsed = formatElapsed4(this.startedAt);
7012
+ this.stream.write(`[+${elapsed}] [assistant_message] ${this.pendingText}
7013
+ `);
7014
+ this.pendingText = "";
7015
+ }
6864
7016
  async close() {
7017
+ if (this.format !== "json") {
7018
+ this.flushPendingText();
7019
+ }
6865
7020
  await new Promise((resolve, reject) => {
6866
7021
  this.stream.once("error", reject);
6867
7022
  this.stream.end(() => resolve());
@@ -7070,6 +7225,10 @@ var CopilotCliProvider = class {
7070
7225
  content: finalContent
7071
7226
  });
7072
7227
  }
7228
+ const sessionId = session.sessionId;
7229
+ const fileChanges = sessionId ? await captureSessionArtifacts(
7230
+ path17.join(homedir2(), ".copilot", "session-state", sessionId, "files")
7231
+ ).catch(() => void 0) : void 0;
7073
7232
  return {
7074
7233
  raw: {
7075
7234
  model: this.config.model,
@@ -7081,7 +7240,8 @@ var CopilotCliProvider = class {
7081
7240
  costUsd,
7082
7241
  durationMs,
7083
7242
  startTime,
7084
- endTime
7243
+ endTime,
7244
+ ...fileChanges ? { fileChanges } : {}
7085
7245
  };
7086
7246
  } finally {
7087
7247
  await logger?.close();
@@ -7122,10 +7282,10 @@ var CopilotCliProvider = class {
7122
7282
  }
7123
7283
  resolveCwd(cwdOverride) {
7124
7284
  if (cwdOverride) {
7125
- return path16.resolve(cwdOverride);
7285
+ return path17.resolve(cwdOverride);
7126
7286
  }
7127
7287
  if (this.config.cwd) {
7128
- return path16.resolve(this.config.cwd);
7288
+ return path17.resolve(this.config.cwd);
7129
7289
  }
7130
7290
  return void 0;
7131
7291
  }
@@ -7144,9 +7304,9 @@ var CopilotCliProvider = class {
7144
7304
  return void 0;
7145
7305
  }
7146
7306
  if (this.config.logDir) {
7147
- return path16.resolve(this.config.logDir);
7307
+ return path17.resolve(this.config.logDir);
7148
7308
  }
7149
- return path16.join(process.cwd(), ".agentv", "logs", "copilot-cli");
7309
+ return path17.join(process.cwd(), ".agentv", "logs", "copilot-cli");
7150
7310
  }
7151
7311
  async createStreamLogger(request) {
7152
7312
  const logDir = this.resolveLogDirectory();
@@ -7160,7 +7320,7 @@ var CopilotCliProvider = class {
7160
7320
  console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
7161
7321
  return void 0;
7162
7322
  }
7163
- const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
7323
+ const filePath = path17.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
7164
7324
  try {
7165
7325
  const logger = await CopilotStreamLogger.create(
7166
7326
  {
@@ -7169,7 +7329,8 @@ var CopilotCliProvider = class {
7169
7329
  evalCaseId: request.evalCaseId,
7170
7330
  attempt: request.attempt,
7171
7331
  format: this.config.logFormat ?? "summary",
7172
- headerLabel: "Copilot CLI (ACP)"
7332
+ headerLabel: "Copilot CLI (ACP)",
7333
+ chunkExtractor: extractAcpChunk
7173
7334
  },
7174
7335
  summarizeAcpEvent
7175
7336
  );
@@ -7228,6 +7389,14 @@ Fix options:
7228
7389
  - In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
7229
7390
  - In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
7230
7391
  }
7392
+ function extractAcpChunk(eventType, data) {
7393
+ if (eventType === "agent_thought_chunk") return null;
7394
+ if (eventType !== "agent_message_chunk") return void 0;
7395
+ if (!data || typeof data !== "object") return void 0;
7396
+ const d = data;
7397
+ const content = d.content;
7398
+ return content?.type === "text" && typeof content.text === "string" ? content.text : void 0;
7399
+ }
7231
7400
  function summarizeAcpEvent(eventType, data) {
7232
7401
  if (!data || typeof data !== "object") {
7233
7402
  return eventType;
@@ -7253,9 +7422,9 @@ function summarizeAcpEvent(eventType, data) {
7253
7422
  }
7254
7423
 
7255
7424
  // src/evaluation/providers/copilot-log.ts
7256
- import { readFile as readFile10 } from "node:fs/promises";
7257
- import { homedir as homedir3 } from "node:os";
7258
- import path18 from "node:path";
7425
+ import { readFile as readFile11 } from "node:fs/promises";
7426
+ import { homedir as homedir4 } from "node:os";
7427
+ import path19 from "node:path";
7259
7428
 
7260
7429
  // src/evaluation/providers/copilot-log-parser.ts
7261
7430
  function parseCopilotEvents(eventsJsonl) {
@@ -7387,32 +7556,32 @@ function parseCopilotEvents(eventsJsonl) {
7387
7556
  }
7388
7557
 
7389
7558
  // src/evaluation/providers/copilot-session-discovery.ts
7390
- import { readFile as readFile9, readdir, stat } from "node:fs/promises";
7391
- import { homedir as homedir2 } from "node:os";
7392
- import path17 from "node:path";
7559
+ import { readFile as readFile10, readdir as readdir2, stat as stat2 } from "node:fs/promises";
7560
+ import { homedir as homedir3 } from "node:os";
7561
+ import path18 from "node:path";
7393
7562
  import { parse as parseYaml2 } from "yaml";
7394
- var DEFAULT_SESSION_STATE_DIR = () => path17.join(homedir2(), ".copilot", "session-state");
7563
+ var DEFAULT_SESSION_STATE_DIR = () => path18.join(homedir3(), ".copilot", "session-state");
7395
7564
  async function discoverCopilotSessions(opts) {
7396
7565
  const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
7397
7566
  const limit = opts?.limit ?? 10;
7398
7567
  let entries;
7399
7568
  try {
7400
- entries = await readdir(sessionStateDir);
7569
+ entries = await readdir2(sessionStateDir);
7401
7570
  } catch {
7402
7571
  return [];
7403
7572
  }
7404
7573
  const sessions = [];
7405
7574
  for (const entry of entries) {
7406
- const sessionDir = path17.join(sessionStateDir, entry);
7407
- const workspacePath = path17.join(sessionDir, "workspace.yaml");
7408
- const eventsPath = path17.join(sessionDir, "events.jsonl");
7575
+ const sessionDir = path18.join(sessionStateDir, entry);
7576
+ const workspacePath = path18.join(sessionDir, "workspace.yaml");
7577
+ const eventsPath = path18.join(sessionDir, "events.jsonl");
7409
7578
  try {
7410
- const workspaceContent = await readFile9(workspacePath, "utf8");
7579
+ const workspaceContent = await readFile10(workspacePath, "utf8");
7411
7580
  const workspace = parseYaml2(workspaceContent) ?? {};
7412
7581
  const cwd = String(workspace.cwd ?? "");
7413
7582
  let updatedAt;
7414
7583
  try {
7415
- const eventsStat = await stat(eventsPath);
7584
+ const eventsStat = await stat2(eventsPath);
7416
7585
  updatedAt = eventsStat.mtime;
7417
7586
  } catch {
7418
7587
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -7466,21 +7635,24 @@ var CopilotLogProvider = class {
7466
7635
  }
7467
7636
  async invoke(_request) {
7468
7637
  const sessionDir = await this.resolveSessionDir();
7469
- const eventsPath = path18.join(sessionDir, "events.jsonl");
7638
+ const eventsPath = path19.join(sessionDir, "events.jsonl");
7470
7639
  let eventsContent;
7471
7640
  try {
7472
- eventsContent = await readFile10(eventsPath, "utf8");
7641
+ eventsContent = await readFile11(eventsPath, "utf8");
7473
7642
  } catch (err) {
7474
7643
  throw new Error(
7475
7644
  `Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
7476
7645
  );
7477
7646
  }
7478
7647
  const parsed = parseCopilotEvents(eventsContent);
7648
+ const filesDir = path19.join(sessionDir, "files");
7649
+ const fileChanges = await captureSessionArtifacts(filesDir).catch(() => void 0);
7479
7650
  return {
7480
7651
  output: parsed.messages,
7481
7652
  tokenUsage: parsed.tokenUsage,
7482
7653
  durationMs: parsed.durationMs,
7483
- startTime: parsed.meta.startedAt
7654
+ startTime: parsed.meta.startedAt,
7655
+ ...fileChanges ? { fileChanges } : {}
7484
7656
  };
7485
7657
  }
7486
7658
  async resolveSessionDir() {
@@ -7488,8 +7660,8 @@ var CopilotLogProvider = class {
7488
7660
  return this.config.sessionDir;
7489
7661
  }
7490
7662
  if (this.config.sessionId) {
7491
- const stateDir = this.config.sessionStateDir ?? path18.join(homedir3(), ".copilot", "session-state");
7492
- return path18.join(stateDir, this.config.sessionId);
7663
+ const stateDir = this.config.sessionStateDir ?? path19.join(homedir4(), ".copilot", "session-state");
7664
+ return path19.join(stateDir, this.config.sessionId);
7493
7665
  }
7494
7666
  if (this.config.discover === "latest") {
7495
7667
  const sessions = await discoverCopilotSessions({
@@ -7514,7 +7686,7 @@ var CopilotLogProvider = class {
7514
7686
  import { randomUUID as randomUUID6 } from "node:crypto";
7515
7687
  import { existsSync as existsSync2 } from "node:fs";
7516
7688
  import { mkdir as mkdir5 } from "node:fs/promises";
7517
- import path19 from "node:path";
7689
+ import path20 from "node:path";
7518
7690
 
7519
7691
  // src/evaluation/providers/copilot-sdk-log-tracker.ts
7520
7692
  var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
@@ -7760,6 +7932,10 @@ var CopilotSdkProvider = class {
7760
7932
  content: finalContent
7761
7933
  });
7762
7934
  }
7935
+ const sessionWorkspacePath = session.workspacePath;
7936
+ const fileChanges = sessionWorkspacePath ? await captureSessionArtifacts(path20.join(sessionWorkspacePath, "files")).catch(
7937
+ () => void 0
7938
+ ) : void 0;
7763
7939
  return {
7764
7940
  raw: {
7765
7941
  model: this.config.model,
@@ -7771,7 +7947,8 @@ var CopilotSdkProvider = class {
7771
7947
  costUsd,
7772
7948
  durationMs,
7773
7949
  startTime,
7774
- endTime
7950
+ endTime,
7951
+ ...fileChanges ? { fileChanges } : {}
7775
7952
  };
7776
7953
  } finally {
7777
7954
  unsubscribe();
@@ -7824,10 +8001,10 @@ var CopilotSdkProvider = class {
7824
8001
  }
7825
8002
  resolveCwd(cwdOverride) {
7826
8003
  if (cwdOverride) {
7827
- return path19.resolve(cwdOverride);
8004
+ return path20.resolve(cwdOverride);
7828
8005
  }
7829
8006
  if (this.config.cwd) {
7830
- return path19.resolve(this.config.cwd);
8007
+ return path20.resolve(this.config.cwd);
7831
8008
  }
7832
8009
  return void 0;
7833
8010
  }
@@ -7836,9 +8013,9 @@ var CopilotSdkProvider = class {
7836
8013
  return void 0;
7837
8014
  }
7838
8015
  if (this.config.logDir) {
7839
- return path19.resolve(this.config.logDir);
8016
+ return path20.resolve(this.config.logDir);
7840
8017
  }
7841
- return path19.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
8018
+ return path20.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
7842
8019
  }
7843
8020
  async createStreamLogger(request) {
7844
8021
  const logDir = this.resolveLogDirectory();
@@ -7852,7 +8029,7 @@ var CopilotSdkProvider = class {
7852
8029
  console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
7853
8030
  return void 0;
7854
8031
  }
7855
- const filePath = path19.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
8032
+ const filePath = path20.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
7856
8033
  try {
7857
8034
  const logger = await CopilotStreamLogger.create(
7858
8035
  {
@@ -7861,7 +8038,8 @@ var CopilotSdkProvider = class {
7861
8038
  evalCaseId: request.evalCaseId,
7862
8039
  attempt: request.attempt,
7863
8040
  format: this.config.logFormat ?? "summary",
7864
- headerLabel: "Copilot SDK"
8041
+ headerLabel: "Copilot SDK",
8042
+ chunkExtractor: extractSdkChunk
7865
8043
  },
7866
8044
  summarizeSdkEvent
7867
8045
  );
@@ -7881,9 +8059,9 @@ var CopilotSdkProvider = class {
7881
8059
  };
7882
8060
  function resolveSkillDirectories(cwd) {
7883
8061
  const candidates = [
7884
- path19.join(cwd, ".claude", "skills"),
7885
- path19.join(cwd, ".agents", "skills"),
7886
- path19.join(cwd, ".codex", "skills")
8062
+ path20.join(cwd, ".claude", "skills"),
8063
+ path20.join(cwd, ".agents", "skills"),
8064
+ path20.join(cwd, ".codex", "skills")
7887
8065
  ];
7888
8066
  return candidates.filter((dir) => existsSync2(dir));
7889
8067
  }
@@ -7897,6 +8075,12 @@ function normalizeByokBaseUrl(baseUrl, type) {
7897
8075
  }
7898
8076
  return trimmed;
7899
8077
  }
8078
+ function extractSdkChunk(eventType, data) {
8079
+ if (eventType !== "assistant.message_delta") return void 0;
8080
+ if (!data || typeof data !== "object") return void 0;
8081
+ const d = data;
8082
+ return typeof d.deltaContent === "string" ? d.deltaContent : void 0;
8083
+ }
7900
8084
  function summarizeSdkEvent(eventType, data) {
7901
8085
  if (!data || typeof data !== "object") {
7902
8086
  return eventType;
@@ -7967,7 +8151,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
7967
8151
  import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
7968
8152
  import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
7969
8153
  import { tmpdir } from "node:os";
7970
- import path20 from "node:path";
8154
+ import path21 from "node:path";
7971
8155
 
7972
8156
  // src/evaluation/providers/pi-log-tracker.ts
7973
8157
  var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
@@ -8173,7 +8357,7 @@ var PiCliProvider = class {
8173
8357
  const cwd = this.resolveCwd(workspaceRoot, request.cwd);
8174
8358
  const logger = await this.createStreamLogger(request).catch(() => void 0);
8175
8359
  try {
8176
- const promptFile = path20.join(cwd, PROMPT_FILENAME);
8360
+ const promptFile = path21.join(cwd, PROMPT_FILENAME);
8177
8361
  await writeFile(promptFile, request.question, "utf8");
8178
8362
  const args = this.buildPiArgs(request.question, inputFiles);
8179
8363
  const result = await this.executePi(args, cwd, request.signal, logger);
@@ -8236,10 +8420,10 @@ var PiCliProvider = class {
8236
8420
  }
8237
8421
  resolveCwd(workspaceRoot, cwdOverride) {
8238
8422
  if (cwdOverride) {
8239
- return path20.resolve(cwdOverride);
8423
+ return path21.resolve(cwdOverride);
8240
8424
  }
8241
8425
  if (this.config.cwd) {
8242
- return path20.resolve(this.config.cwd);
8426
+ return path21.resolve(this.config.cwd);
8243
8427
  }
8244
8428
  if (workspaceRoot) {
8245
8429
  return workspaceRoot;
@@ -8345,7 +8529,7 @@ ${prompt}` : prompt;
8345
8529
  return env;
8346
8530
  }
8347
8531
  async createWorkspace() {
8348
- return await mkdtemp(path20.join(tmpdir(), WORKSPACE_PREFIX));
8532
+ return await mkdtemp(path21.join(tmpdir(), WORKSPACE_PREFIX));
8349
8533
  }
8350
8534
  async cleanupWorkspace(workspaceRoot) {
8351
8535
  try {
@@ -8355,9 +8539,9 @@ ${prompt}` : prompt;
8355
8539
  }
8356
8540
  resolveLogDirectory() {
8357
8541
  if (this.config.logDir) {
8358
- return path20.resolve(this.config.logDir);
8542
+ return path21.resolve(this.config.logDir);
8359
8543
  }
8360
- return path20.join(process.cwd(), ".agentv", "logs", "pi-cli");
8544
+ return path21.join(process.cwd(), ".agentv", "logs", "pi-cli");
8361
8545
  }
8362
8546
  async createStreamLogger(request) {
8363
8547
  const logDir = this.resolveLogDirectory();
@@ -8371,7 +8555,7 @@ ${prompt}` : prompt;
8371
8555
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
8372
8556
  return void 0;
8373
8557
  }
8374
- const filePath = path20.join(logDir, buildLogFilename5(request, this.targetName));
8558
+ const filePath = path21.join(logDir, buildLogFilename5(request, this.targetName));
8375
8559
  try {
8376
8560
  const logger = await PiStreamLogger.create({
8377
8561
  filePath,
@@ -8842,8 +9026,8 @@ function resolveWindowsCmd(executable) {
8842
9026
  const content = readFileSync2(cmdPath, "utf-8");
8843
9027
  const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
8844
9028
  if (match) {
8845
- const dp0 = path20.dirname(path20.resolve(cmdPath));
8846
- const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path20.sep}`);
9029
+ const dp0 = path21.dirname(path21.resolve(cmdPath));
9030
+ const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path21.sep}`);
8847
9031
  try {
8848
9032
  accessSync(scriptPath);
8849
9033
  return ["node", [scriptPath]];
@@ -8922,13 +9106,13 @@ import { execSync as execSync2 } from "node:child_process";
8922
9106
  import { randomUUID as randomUUID8 } from "node:crypto";
8923
9107
  import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
8924
9108
  import { mkdir as mkdir7 } from "node:fs/promises";
8925
- import path22 from "node:path";
9109
+ import path23 from "node:path";
8926
9110
  import { createInterface } from "node:readline";
8927
9111
  import { fileURLToPath as fileURLToPath4, pathToFileURL } from "node:url";
8928
9112
 
8929
9113
  // src/paths.ts
8930
9114
  import os2 from "node:os";
8931
- import path21 from "node:path";
9115
+ import path22 from "node:path";
8932
9116
  var logged = false;
8933
9117
  function getAgentvHome() {
8934
9118
  const envHome = process.env.AGENTV_HOME;
@@ -8939,19 +9123,19 @@ function getAgentvHome() {
8939
9123
  }
8940
9124
  return envHome;
8941
9125
  }
8942
- return path21.join(os2.homedir(), ".agentv");
9126
+ return path22.join(os2.homedir(), ".agentv");
8943
9127
  }
8944
9128
  function getWorkspacesRoot() {
8945
- return path21.join(getAgentvHome(), "workspaces");
9129
+ return path22.join(getAgentvHome(), "workspaces");
8946
9130
  }
8947
9131
  function getSubagentsRoot() {
8948
- return path21.join(getAgentvHome(), "subagents");
9132
+ return path22.join(getAgentvHome(), "subagents");
8949
9133
  }
8950
9134
  function getTraceStateRoot() {
8951
- return path21.join(getAgentvHome(), "trace-state");
9135
+ return path22.join(getAgentvHome(), "trace-state");
8952
9136
  }
8953
9137
  function getWorkspacePoolRoot() {
8954
- return path21.join(getAgentvHome(), "workspace-pool");
9138
+ return path22.join(getAgentvHome(), "workspace-pool");
8955
9139
  }
8956
9140
 
8957
9141
  // src/evaluation/providers/pi-coding-agent.ts
@@ -8973,7 +9157,7 @@ async function promptInstall() {
8973
9157
  }
8974
9158
  }
8975
9159
  function findManagedSdkInstallRoot() {
8976
- return path22.join(getAgentvHome(), "deps", "pi-sdk");
9160
+ return path23.join(getAgentvHome(), "deps", "pi-sdk");
8977
9161
  }
8978
9162
  function resolveGlobalNpmRoot() {
8979
9163
  try {
@@ -8987,7 +9171,7 @@ function resolveGlobalNpmRoot() {
8987
9171
  }
8988
9172
  }
8989
9173
  function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
8990
- return path22.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
9174
+ return path23.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
8991
9175
  }
8992
9176
  function findAccessiblePath(paths) {
8993
9177
  for (const candidate of paths) {
@@ -9013,11 +9197,11 @@ async function tryImportLocalSdkModules() {
9013
9197
  async function tryImportManagedSdkModules() {
9014
9198
  const managedRoot = findManagedSdkInstallRoot();
9015
9199
  const piCodingAgentEntry = findAccessiblePath([
9016
- path22.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
9200
+ path23.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
9017
9201
  ]);
9018
9202
  const piAiEntry = findAccessiblePath([
9019
- path22.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
9020
- path22.join(
9203
+ path23.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
9204
+ path23.join(
9021
9205
  managedRoot,
9022
9206
  "node_modules",
9023
9207
  "@mariozechner",
@@ -9048,7 +9232,7 @@ async function tryImportGlobalSdkModules() {
9048
9232
  ]);
9049
9233
  const piAiEntry = findAccessiblePath([
9050
9234
  buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
9051
- path22.join(
9235
+ path23.join(
9052
9236
  globalNpmRoot,
9053
9237
  "@mariozechner",
9054
9238
  "pi-coding-agent",
@@ -9349,10 +9533,10 @@ ${fileList}`;
9349
9533
  }
9350
9534
  resolveCwd(cwdOverride) {
9351
9535
  if (cwdOverride) {
9352
- return path22.resolve(cwdOverride);
9536
+ return path23.resolve(cwdOverride);
9353
9537
  }
9354
9538
  if (this.config.cwd) {
9355
- return path22.resolve(this.config.cwd);
9539
+ return path23.resolve(this.config.cwd);
9356
9540
  }
9357
9541
  return process.cwd();
9358
9542
  }
@@ -9371,9 +9555,9 @@ ${fileList}`;
9371
9555
  }
9372
9556
  resolveLogDirectory() {
9373
9557
  if (this.config.logDir) {
9374
- return path22.resolve(this.config.logDir);
9558
+ return path23.resolve(this.config.logDir);
9375
9559
  }
9376
- return path22.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
9560
+ return path23.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
9377
9561
  }
9378
9562
  async createStreamLogger(request) {
9379
9563
  const logDir = this.resolveLogDirectory();
@@ -9387,7 +9571,7 @@ ${fileList}`;
9387
9571
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
9388
9572
  return void 0;
9389
9573
  }
9390
- const filePath = path22.join(logDir, buildLogFilename6(request, this.targetName));
9574
+ const filePath = path23.join(logDir, buildLogFilename6(request, this.targetName));
9391
9575
  try {
9392
9576
  const logger = await PiStreamLogger2.create({
9393
9577
  filePath,
@@ -9601,18 +9785,18 @@ var ProviderRegistry = class {
9601
9785
 
9602
9786
  // src/evaluation/providers/vscode-provider.ts
9603
9787
  import { exec as exec2 } from "node:child_process";
9604
- import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
9605
- import path33 from "node:path";
9606
- import { promisify as promisify3 } from "node:util";
9788
+ import { constants as constants3, access as access3, stat as stat6 } from "node:fs/promises";
9789
+ import path34 from "node:path";
9790
+ import { promisify as promisify4 } from "node:util";
9607
9791
 
9608
9792
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
9609
- import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
9610
- import path31 from "node:path";
9793
+ import { stat as stat5, writeFile as writeFile4 } from "node:fs/promises";
9794
+ import path32 from "node:path";
9611
9795
 
9612
9796
  // src/evaluation/providers/vscode/utils/fs.ts
9613
9797
  import { constants as constants2 } from "node:fs";
9614
- import { access as access2, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
9615
- import path23 from "node:path";
9798
+ import { access as access2, mkdir as mkdir8, readdir as readdir3, rm as rm2, stat as stat3 } from "node:fs/promises";
9799
+ import path24 from "node:path";
9616
9800
  async function pathExists(target) {
9617
9801
  try {
9618
9802
  await access2(target, constants2.F_OK);
@@ -9625,10 +9809,10 @@ async function ensureDir(target) {
9625
9809
  await mkdir8(target, { recursive: true });
9626
9810
  }
9627
9811
  async function readDirEntries(target) {
9628
- const entries = await readdir2(target, { withFileTypes: true });
9812
+ const entries = await readdir3(target, { withFileTypes: true });
9629
9813
  return entries.map((entry) => ({
9630
9814
  name: entry.name,
9631
- absolutePath: path23.join(target, entry.name),
9815
+ absolutePath: path24.join(target, entry.name),
9632
9816
  isDirectory: entry.isDirectory()
9633
9817
  }));
9634
9818
  }
@@ -9643,9 +9827,9 @@ async function removeIfExists(target) {
9643
9827
  }
9644
9828
 
9645
9829
  // src/evaluation/providers/vscode/utils/path.ts
9646
- import path24 from "node:path";
9830
+ import path25 from "node:path";
9647
9831
  function pathToFileUri2(filePath) {
9648
- const absolutePath = path24.isAbsolute(filePath) ? filePath : path24.resolve(filePath);
9832
+ const absolutePath = path25.isAbsolute(filePath) ? filePath : path25.resolve(filePath);
9649
9833
  const normalizedPath = absolutePath.replace(/\\/g, "/");
9650
9834
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
9651
9835
  return `file:///${normalizedPath}`;
@@ -9654,7 +9838,7 @@ function pathToFileUri2(filePath) {
9654
9838
  }
9655
9839
 
9656
9840
  // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
9657
- import path25 from "node:path";
9841
+ import path26 from "node:path";
9658
9842
 
9659
9843
  // src/evaluation/providers/vscode/utils/template.ts
9660
9844
  function renderTemplate2(content, variables) {
@@ -9746,8 +9930,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
9746
9930
  });
9747
9931
  }
9748
9932
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
9749
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path25.basename(file)}`).join("\n");
9750
- const responseList = responseFiles.map((file) => `"${path25.basename(file)}"`).join(", ");
9933
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path26.basename(file)}`).join("\n");
9934
+ const responseList = responseFiles.map((file) => `"${path26.basename(file)}"`).join(", ");
9751
9935
  return renderTemplate2(templateContent, {
9752
9936
  requestFiles: requestLines,
9753
9937
  responseList
@@ -9755,8 +9939,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
9755
9939
  }
9756
9940
 
9757
9941
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
9758
- import { readFile as readFile11 } from "node:fs/promises";
9759
- import path26 from "node:path";
9942
+ import { readFile as readFile12 } from "node:fs/promises";
9943
+ import path27 from "node:path";
9760
9944
 
9761
9945
  // src/evaluation/providers/vscode/utils/time.ts
9762
9946
  function sleep2(ms) {
@@ -9794,7 +9978,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
9794
9978
  const maxAttempts = 10;
9795
9979
  while (attempts < maxAttempts) {
9796
9980
  try {
9797
- const content = await readFile11(responseFileFinal, { encoding: "utf8" });
9981
+ const content = await readFile12(responseFileFinal, { encoding: "utf8" });
9798
9982
  if (!silent) {
9799
9983
  process.stdout.write(`${content}
9800
9984
  `);
@@ -9815,7 +9999,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
9815
9999
  }
9816
10000
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
9817
10001
  if (!silent) {
9818
- const fileList = responseFilesFinal.map((file) => path26.basename(file)).join(", ");
10002
+ const fileList = responseFilesFinal.map((file) => path27.basename(file)).join(", ");
9819
10003
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
9820
10004
  }
9821
10005
  const deadline = Date.now() + timeoutMs;
@@ -9824,7 +10008,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9824
10008
  while (pending.size > 0) {
9825
10009
  if (Date.now() >= deadline) {
9826
10010
  if (!silent) {
9827
- const remaining = [...pending].map((f) => path26.basename(f)).join(", ");
10011
+ const remaining = [...pending].map((f) => path27.basename(f)).join(", ");
9828
10012
  console.error(
9829
10013
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
9830
10014
  );
@@ -9851,7 +10035,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9851
10035
  const maxAttempts = 10;
9852
10036
  while (attempts < maxAttempts) {
9853
10037
  try {
9854
- const content = await readFile11(file, { encoding: "utf8" });
10038
+ const content = await readFile12(file, { encoding: "utf8" });
9855
10039
  if (!silent) {
9856
10040
  process.stdout.write(`${content}
9857
10041
  `);
@@ -9875,21 +10059,21 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9875
10059
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
9876
10060
  import { exec, spawn as spawn4 } from "node:child_process";
9877
10061
  import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
9878
- import path28 from "node:path";
9879
- import { promisify as promisify2 } from "node:util";
10062
+ import path29 from "node:path";
10063
+ import { promisify as promisify3 } from "node:util";
9880
10064
 
9881
10065
  // src/evaluation/providers/vscode/dispatch/constants.ts
9882
- import path27 from "node:path";
10066
+ import path28 from "node:path";
9883
10067
  var DEFAULT_LOCK_NAME = "subagent.lock";
9884
10068
  var DEFAULT_ALIVE_FILENAME = ".alive";
9885
10069
  function getDefaultSubagentRoot(vscodeCmd = "code") {
9886
10070
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
9887
- return path27.join(getSubagentsRoot(), folder);
10071
+ return path28.join(getSubagentsRoot(), folder);
9888
10072
  }
9889
10073
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
9890
10074
 
9891
10075
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
9892
- var execAsync2 = promisify2(exec);
10076
+ var execAsync3 = promisify3(exec);
9893
10077
  function shellQuote(cmd) {
9894
10078
  return cmd.includes(" ") ? `"${cmd}"` : cmd;
9895
10079
  }
@@ -9935,7 +10119,7 @@ async function raceSpawnError(child, graceMs = 200) {
9935
10119
  }
9936
10120
  async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
9937
10121
  try {
9938
- const { stdout } = await execAsync2(`${shellQuote(vscodeCmd)} --status`, {
10122
+ const { stdout } = await execAsync3(`${shellQuote(vscodeCmd)} --status`, {
9939
10123
  timeout: 1e4,
9940
10124
  windowsHide: true
9941
10125
  });
@@ -9951,11 +10135,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9951
10135
  await raceSpawnError(child);
9952
10136
  return true;
9953
10137
  }
9954
- const aliveFile = path28.join(subagentDir, DEFAULT_ALIVE_FILENAME);
10138
+ const aliveFile = path29.join(subagentDir, DEFAULT_ALIVE_FILENAME);
9955
10139
  await removeIfExists(aliveFile);
9956
- const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
10140
+ const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
9957
10141
  await mkdir9(githubAgentsDir, { recursive: true });
9958
- const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
10142
+ const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
9959
10143
  await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
9960
10144
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
9961
10145
  label: "open-workspace"
@@ -9968,7 +10152,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9968
10152
  "chat",
9969
10153
  "-m",
9970
10154
  wakeupChatId,
9971
- `create a file named .alive in the ${path28.basename(subagentDir)} folder`
10155
+ `create a file named .alive in the ${path29.basename(subagentDir)} folder`
9972
10156
  ];
9973
10157
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
9974
10158
  await raceSpawnError(wakeupChild);
@@ -9983,10 +10167,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9983
10167
  return true;
9984
10168
  }
9985
10169
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
9986
- const workspacePath = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
9987
- const messagesDir = path28.join(subagentDir, "messages");
10170
+ const workspacePath = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
10171
+ const messagesDir = path29.join(subagentDir, "messages");
9988
10172
  await mkdir9(messagesDir, { recursive: true });
9989
- const reqFile = path28.join(messagesDir, `${timestamp}_req.md`);
10173
+ const reqFile = path29.join(messagesDir, `${timestamp}_req.md`);
9990
10174
  await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
9991
10175
  const reqUri = pathToFileUri2(reqFile);
9992
10176
  const chatArgs = ["-r", "chat", "-m", chatId];
@@ -9994,16 +10178,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
9994
10178
  chatArgs.push("-a", attachment);
9995
10179
  }
9996
10180
  chatArgs.push("-a", reqFile);
9997
- chatArgs.push(`Follow instructions in [${path28.basename(reqFile)}](${reqUri})`);
10181
+ chatArgs.push(`Follow instructions in [${path29.basename(reqFile)}](${reqUri})`);
9998
10182
  const workspaceReady = await ensureWorkspaceFocused(
9999
10183
  workspacePath,
10000
- path28.basename(subagentDir),
10184
+ path29.basename(subagentDir),
10001
10185
  subagentDir,
10002
10186
  vscodeCmd
10003
10187
  );
10004
10188
  if (!workspaceReady) {
10005
10189
  throw new Error(
10006
- `VS Code workspace '${path28.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
10190
+ `VS Code workspace '${path29.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
10007
10191
  );
10008
10192
  }
10009
10193
  await sleep2(500);
@@ -10011,8 +10195,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
10011
10195
  await raceSpawnError(child);
10012
10196
  }
10013
10197
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
10014
- const workspacePath = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
10015
- const messagesDir = path28.join(subagentDir, "messages");
10198
+ const workspacePath = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
10199
+ const messagesDir = path29.join(subagentDir, "messages");
10016
10200
  await mkdir9(messagesDir, { recursive: true });
10017
10201
  const chatArgs = ["-r", "chat", "-m", chatId];
10018
10202
  for (const attachment of attachmentPaths) {
@@ -10021,13 +10205,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
10021
10205
  chatArgs.push(chatInstruction);
10022
10206
  const workspaceReady = await ensureWorkspaceFocused(
10023
10207
  workspacePath,
10024
- path28.basename(subagentDir),
10208
+ path29.basename(subagentDir),
10025
10209
  subagentDir,
10026
10210
  vscodeCmd
10027
10211
  );
10028
10212
  if (!workspaceReady) {
10029
10213
  throw new Error(
10030
- `VS Code workspace '${path28.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
10214
+ `VS Code workspace '${path29.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
10031
10215
  );
10032
10216
  }
10033
10217
  await sleep2(500);
@@ -10036,11 +10220,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
10036
10220
  }
10037
10221
 
10038
10222
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
10039
- import { copyFile, mkdir as mkdir10, readFile as readFile12, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
10040
- import path30 from "node:path";
10223
+ import { copyFile, mkdir as mkdir10, readFile as readFile13, readdir as readdir4, stat as stat4, writeFile as writeFile3 } from "node:fs/promises";
10224
+ import path31 from "node:path";
10041
10225
 
10042
10226
  // src/evaluation/providers/vscode/utils/workspace.ts
10043
- import path29 from "node:path";
10227
+ import path30 from "node:path";
10044
10228
  import JSON5 from "json5";
10045
10229
  function transformWorkspacePaths(workspaceContent, templateDir) {
10046
10230
  let workspace;
@@ -10057,10 +10241,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
10057
10241
  }
10058
10242
  const transformedFolders = workspace.folders.map((folder) => {
10059
10243
  const folderPath = folder.path;
10060
- if (path29.isAbsolute(folderPath)) {
10244
+ if (path30.isAbsolute(folderPath)) {
10061
10245
  return folder;
10062
10246
  }
10063
- const absolutePath = path29.resolve(templateDir, folderPath);
10247
+ const absolutePath = path30.resolve(templateDir, folderPath);
10064
10248
  return {
10065
10249
  ...folder,
10066
10250
  path: absolutePath
@@ -10082,19 +10266,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
10082
10266
  if (locationMap && typeof locationMap === "object") {
10083
10267
  const transformedMap = {};
10084
10268
  for (const [locationPath, value] of Object.entries(locationMap)) {
10085
- const isAbsolute = path29.isAbsolute(locationPath);
10269
+ const isAbsolute = path30.isAbsolute(locationPath);
10086
10270
  if (isAbsolute) {
10087
10271
  transformedMap[locationPath] = value;
10088
10272
  } else {
10089
10273
  const firstGlobIndex = locationPath.search(/[*]/);
10090
10274
  if (firstGlobIndex === -1) {
10091
- const resolvedPath = path29.resolve(templateDir, locationPath).replace(/\\/g, "/");
10275
+ const resolvedPath = path30.resolve(templateDir, locationPath).replace(/\\/g, "/");
10092
10276
  transformedMap[resolvedPath] = value;
10093
10277
  } else {
10094
10278
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
10095
10279
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
10096
10280
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
10097
- const resolvedPath = (path29.resolve(templateDir, basePath) + patternPath).replace(
10281
+ const resolvedPath = (path30.resolve(templateDir, basePath) + patternPath).replace(
10098
10282
  /\\/g,
10099
10283
  "/"
10100
10284
  );
@@ -10135,7 +10319,7 @@ async function findUnlockedSubagent(subagentRoot) {
10135
10319
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
10136
10320
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
10137
10321
  for (const subagent of subagents) {
10138
- const lockFile = path30.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
10322
+ const lockFile = path31.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
10139
10323
  if (!await pathExists(lockFile)) {
10140
10324
  return subagent.absolutePath;
10141
10325
  }
@@ -10145,26 +10329,26 @@ async function findUnlockedSubagent(subagentRoot) {
10145
10329
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
10146
10330
  let workspaceContent;
10147
10331
  if (workspaceTemplate) {
10148
- const workspaceSrc = path30.resolve(workspaceTemplate);
10332
+ const workspaceSrc = path31.resolve(workspaceTemplate);
10149
10333
  if (!await pathExists(workspaceSrc)) {
10150
10334
  throw new Error(`workspace template not found: ${workspaceSrc}`);
10151
10335
  }
10152
- const stats = await stat3(workspaceSrc);
10336
+ const stats = await stat4(workspaceSrc);
10153
10337
  if (!stats.isFile()) {
10154
10338
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
10155
10339
  }
10156
- const templateText = await readFile12(workspaceSrc, "utf8");
10340
+ const templateText = await readFile13(workspaceSrc, "utf8");
10157
10341
  workspaceContent = JSON.parse(templateText);
10158
10342
  } else {
10159
10343
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
10160
10344
  }
10161
- const workspaceName = `${path30.basename(subagentDir)}.code-workspace`;
10162
- const workspaceDst = path30.join(subagentDir, workspaceName);
10163
- const templateDir = workspaceTemplate ? path30.dirname(path30.resolve(workspaceTemplate)) : subagentDir;
10345
+ const workspaceName = `${path31.basename(subagentDir)}.code-workspace`;
10346
+ const workspaceDst = path31.join(subagentDir, workspaceName);
10347
+ const templateDir = workspaceTemplate ? path31.dirname(path31.resolve(workspaceTemplate)) : subagentDir;
10164
10348
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
10165
10349
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
10166
10350
  if (cwd) {
10167
- const absCwd = path30.resolve(cwd);
10351
+ const absCwd = path31.resolve(cwd);
10168
10352
  const parsed = JSON.parse(transformedContent);
10169
10353
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
10170
10354
  if (!alreadyPresent) {
@@ -10173,35 +10357,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
10173
10357
  }
10174
10358
  }
10175
10359
  await writeFile3(workspaceDst, transformedContent, "utf8");
10176
- const messagesDir = path30.join(subagentDir, "messages");
10360
+ const messagesDir = path31.join(subagentDir, "messages");
10177
10361
  await mkdir10(messagesDir, { recursive: true });
10178
10362
  return { workspace: workspaceDst, messagesDir };
10179
10363
  }
10180
10364
  async function createSubagentLock(subagentDir) {
10181
- const messagesDir = path30.join(subagentDir, "messages");
10365
+ const messagesDir = path31.join(subagentDir, "messages");
10182
10366
  if (await pathExists(messagesDir)) {
10183
- const files = await readdir3(messagesDir);
10367
+ const files = await readdir4(messagesDir);
10184
10368
  await Promise.all(
10185
10369
  files.map(async (file) => {
10186
- const target = path30.join(messagesDir, file);
10370
+ const target = path31.join(messagesDir, file);
10187
10371
  await removeIfExists(target);
10188
10372
  })
10189
10373
  );
10190
10374
  }
10191
- const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
10375
+ const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
10192
10376
  if (await pathExists(githubAgentsDir)) {
10193
- const agentFiles = await readdir3(githubAgentsDir);
10377
+ const agentFiles = await readdir4(githubAgentsDir);
10194
10378
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
10195
10379
  await Promise.all(
10196
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path30.join(githubAgentsDir, file)))
10380
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path31.join(githubAgentsDir, file)))
10197
10381
  );
10198
10382
  }
10199
- const lockFile = path30.join(subagentDir, DEFAULT_LOCK_NAME);
10383
+ const lockFile = path31.join(subagentDir, DEFAULT_LOCK_NAME);
10200
10384
  await writeFile3(lockFile, "", { encoding: "utf8" });
10201
10385
  return lockFile;
10202
10386
  }
10203
10387
  async function removeSubagentLock(subagentDir) {
10204
- const lockFile = path30.join(subagentDir, DEFAULT_LOCK_NAME);
10388
+ const lockFile = path31.join(subagentDir, DEFAULT_LOCK_NAME);
10205
10389
  await removeIfExists(lockFile);
10206
10390
  }
10207
10391
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -10221,9 +10405,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
10221
10405
  return 1;
10222
10406
  }
10223
10407
  if (promptFile) {
10224
- const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
10408
+ const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
10225
10409
  await mkdir10(githubAgentsDir, { recursive: true });
10226
- const agentFile = path30.join(githubAgentsDir, `${chatId}.md`);
10410
+ const agentFile = path31.join(githubAgentsDir, `${chatId}.md`);
10227
10411
  try {
10228
10412
  await copyFile(promptFile, agentFile);
10229
10413
  } catch (error) {
@@ -10242,11 +10426,11 @@ async function resolvePromptFile(promptFile) {
10242
10426
  if (!promptFile) {
10243
10427
  return void 0;
10244
10428
  }
10245
- const resolvedPrompt = path31.resolve(promptFile);
10429
+ const resolvedPrompt = path32.resolve(promptFile);
10246
10430
  if (!await pathExists(resolvedPrompt)) {
10247
10431
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
10248
10432
  }
10249
- const promptStats = await stat4(resolvedPrompt);
10433
+ const promptStats = await stat5(resolvedPrompt);
10250
10434
  if (!promptStats.isFile()) {
10251
10435
  throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
10252
10436
  }
@@ -10258,7 +10442,7 @@ async function resolveAttachments(extraAttachments) {
10258
10442
  }
10259
10443
  const resolved = [];
10260
10444
  for (const attachment of extraAttachments) {
10261
- const resolvedPath = path31.resolve(attachment);
10445
+ const resolvedPath = path32.resolve(attachment);
10262
10446
  if (!await pathExists(resolvedPath)) {
10263
10447
  throw new Error(`Attachment not found: ${resolvedPath}`);
10264
10448
  }
@@ -10300,7 +10484,7 @@ async function dispatchAgentSession(options) {
10300
10484
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
10301
10485
  };
10302
10486
  }
10303
- const subagentName = path31.basename(subagentDir);
10487
+ const subagentName = path32.basename(subagentDir);
10304
10488
  const chatId = Math.random().toString(16).slice(2, 10);
10305
10489
  const preparationResult = await prepareSubagentDirectory(
10306
10490
  subagentDir,
@@ -10328,9 +10512,9 @@ async function dispatchAgentSession(options) {
10328
10512
  };
10329
10513
  }
10330
10514
  const timestamp = generateTimestamp();
10331
- const messagesDir = path31.join(subagentDir, "messages");
10332
- const responseFileTmp = path31.join(messagesDir, `${timestamp}_res.tmp.md`);
10333
- const responseFileFinal = path31.join(messagesDir, `${timestamp}_res.md`);
10515
+ const messagesDir = path32.join(subagentDir, "messages");
10516
+ const responseFileTmp = path32.join(messagesDir, `${timestamp}_res.tmp.md`);
10517
+ const responseFileFinal = path32.join(messagesDir, `${timestamp}_res.md`);
10334
10518
  const requestInstructions = createRequestPrompt(
10335
10519
  userQuery,
10336
10520
  responseFileTmp,
@@ -10435,7 +10619,7 @@ async function dispatchBatchAgent(options) {
10435
10619
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
10436
10620
  };
10437
10621
  }
10438
- subagentName = path31.basename(subagentDir);
10622
+ subagentName = path32.basename(subagentDir);
10439
10623
  const chatId = Math.random().toString(16).slice(2, 10);
10440
10624
  const preparationResult = await prepareSubagentDirectory(
10441
10625
  subagentDir,
@@ -10466,17 +10650,17 @@ async function dispatchBatchAgent(options) {
10466
10650
  };
10467
10651
  }
10468
10652
  const timestamp = generateTimestamp();
10469
- const messagesDir = path31.join(subagentDir, "messages");
10653
+ const messagesDir = path32.join(subagentDir, "messages");
10470
10654
  requestFiles = userQueries.map(
10471
- (_, index) => path31.join(messagesDir, `${timestamp}_${index}_req.md`)
10655
+ (_, index) => path32.join(messagesDir, `${timestamp}_${index}_req.md`)
10472
10656
  );
10473
10657
  const responseTmpFiles = userQueries.map(
10474
- (_, index) => path31.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
10658
+ (_, index) => path32.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
10475
10659
  );
10476
10660
  responseFilesFinal = userQueries.map(
10477
- (_, index) => path31.join(messagesDir, `${timestamp}_${index}_res.md`)
10661
+ (_, index) => path32.join(messagesDir, `${timestamp}_${index}_res.md`)
10478
10662
  );
10479
- const orchestratorFile = path31.join(messagesDir, `${timestamp}_orchestrator.md`);
10663
+ const orchestratorFile = path32.join(messagesDir, `${timestamp}_orchestrator.md`);
10480
10664
  if (!dryRun) {
10481
10665
  await Promise.all(
10482
10666
  userQueries.map((query, index) => {
@@ -10562,7 +10746,7 @@ async function dispatchBatchAgent(options) {
10562
10746
 
10563
10747
  // src/evaluation/providers/vscode/dispatch/provision.ts
10564
10748
  import { writeFile as writeFile5 } from "node:fs/promises";
10565
- import path32 from "node:path";
10749
+ import path33 from "node:path";
10566
10750
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
10567
10751
  folders: [
10568
10752
  {
@@ -10593,7 +10777,7 @@ async function provisionSubagents(options) {
10593
10777
  if (!Number.isInteger(subagents) || subagents < 1) {
10594
10778
  throw new Error("subagents must be a positive integer");
10595
10779
  }
10596
- const targetPath = path32.resolve(targetRoot);
10780
+ const targetPath = path33.resolve(targetRoot);
10597
10781
  if (!dryRun) {
10598
10782
  await ensureDir(targetPath);
10599
10783
  }
@@ -10613,7 +10797,7 @@ async function provisionSubagents(options) {
10613
10797
  continue;
10614
10798
  }
10615
10799
  highestNumber = Math.max(highestNumber, parsed);
10616
- const lockFile = path32.join(entry.absolutePath, lockName);
10800
+ const lockFile = path33.join(entry.absolutePath, lockName);
10617
10801
  const locked = await pathExists(lockFile);
10618
10802
  if (locked) {
10619
10803
  lockedSubagents.add(entry.absolutePath);
@@ -10630,10 +10814,10 @@ async function provisionSubagents(options) {
10630
10814
  break;
10631
10815
  }
10632
10816
  const subagentDir = subagent.absolutePath;
10633
- const githubAgentsDir = path32.join(subagentDir, ".github", "agents");
10634
- const lockFile = path32.join(subagentDir, lockName);
10635
- const workspaceDst = path32.join(subagentDir, `${path32.basename(subagentDir)}.code-workspace`);
10636
- const wakeupDst = path32.join(githubAgentsDir, "wakeup.md");
10817
+ const githubAgentsDir = path33.join(subagentDir, ".github", "agents");
10818
+ const lockFile = path33.join(subagentDir, lockName);
10819
+ const workspaceDst = path33.join(subagentDir, `${path33.basename(subagentDir)}.code-workspace`);
10820
+ const wakeupDst = path33.join(githubAgentsDir, "wakeup.md");
10637
10821
  const isLocked = await pathExists(lockFile);
10638
10822
  if (isLocked && !force) {
10639
10823
  continue;
@@ -10671,10 +10855,10 @@ async function provisionSubagents(options) {
10671
10855
  let nextIndex = highestNumber;
10672
10856
  while (subagentsProvisioned < subagents) {
10673
10857
  nextIndex += 1;
10674
- const subagentDir = path32.join(targetPath, `subagent-${nextIndex}`);
10675
- const githubAgentsDir = path32.join(subagentDir, ".github", "agents");
10676
- const workspaceDst = path32.join(subagentDir, `${path32.basename(subagentDir)}.code-workspace`);
10677
- const wakeupDst = path32.join(githubAgentsDir, "wakeup.md");
10858
+ const subagentDir = path33.join(targetPath, `subagent-${nextIndex}`);
10859
+ const githubAgentsDir = path33.join(subagentDir, ".github", "agents");
10860
+ const workspaceDst = path33.join(subagentDir, `${path33.basename(subagentDir)}.code-workspace`);
10861
+ const wakeupDst = path33.join(githubAgentsDir, "wakeup.md");
10678
10862
  if (!dryRun) {
10679
10863
  await ensureDir(subagentDir);
10680
10864
  await ensureDir(githubAgentsDir);
@@ -10721,7 +10905,7 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
10721
10905
  `;
10722
10906
 
10723
10907
  // src/evaluation/providers/vscode-provider.ts
10724
- var execAsync3 = promisify3(exec2);
10908
+ var execAsync4 = promisify4(exec2);
10725
10909
  var VSCodeProvider = class {
10726
10910
  id;
10727
10911
  kind;
@@ -10864,7 +11048,7 @@ var VSCodeProvider = class {
10864
11048
  async function locateVSCodeExecutable(candidate) {
10865
11049
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
10866
11050
  if (includesPathSeparator) {
10867
- const resolved = path33.isAbsolute(candidate) ? candidate : path33.resolve(candidate);
11051
+ const resolved = path34.isAbsolute(candidate) ? candidate : path34.resolve(candidate);
10868
11052
  try {
10869
11053
  await access3(resolved, constants3.F_OK);
10870
11054
  return resolved;
@@ -10876,7 +11060,7 @@ async function locateVSCodeExecutable(candidate) {
10876
11060
  }
10877
11061
  const locator = process.platform === "win32" ? "where" : "which";
10878
11062
  try {
10879
- const { stdout } = await execAsync3(`${locator} ${candidate}`);
11063
+ const { stdout } = await execAsync4(`${locator} ${candidate}`);
10880
11064
  const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
10881
11065
  if (lines.length > 0 && lines[0]) {
10882
11066
  await access3(lines[0], constants3.F_OK);
@@ -10893,7 +11077,7 @@ async function resolveWorkspaceTemplateFile(template) {
10893
11077
  return void 0;
10894
11078
  }
10895
11079
  try {
10896
- const stats = await stat5(path33.resolve(template));
11080
+ const stats = await stat6(path34.resolve(template));
10897
11081
  return stats.isFile() ? template : void 0;
10898
11082
  } catch {
10899
11083
  return template;
@@ -10917,7 +11101,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
10917
11101
  return "";
10918
11102
  }
10919
11103
  const buildList = (files) => files.map((absolutePath) => {
10920
- const fileName = path33.basename(absolutePath);
11104
+ const fileName = path34.basename(absolutePath);
10921
11105
  const fileUri = pathToFileUri3(absolutePath);
10922
11106
  return `* [${fileName}](${fileUri})`;
10923
11107
  });
@@ -10938,7 +11122,7 @@ function collectAttachmentFiles(attachments) {
10938
11122
  }
10939
11123
  const unique = /* @__PURE__ */ new Map();
10940
11124
  for (const attachment of attachments) {
10941
- const absolutePath = path33.resolve(attachment);
11125
+ const absolutePath = path34.resolve(attachment);
10942
11126
  if (!unique.has(absolutePath)) {
10943
11127
  unique.set(absolutePath, absolutePath);
10944
11128
  }
@@ -10946,7 +11130,7 @@ function collectAttachmentFiles(attachments) {
10946
11130
  return Array.from(unique.values());
10947
11131
  }
10948
11132
  function pathToFileUri3(filePath) {
10949
- const absolutePath = path33.isAbsolute(filePath) ? filePath : path33.resolve(filePath);
11133
+ const absolutePath = path34.isAbsolute(filePath) ? filePath : path34.resolve(filePath);
10950
11134
  const normalizedPath = absolutePath.replace(/\\/g, "/");
10951
11135
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
10952
11136
  return `file:///${normalizedPath}`;
@@ -10959,7 +11143,7 @@ function normalizeAttachments(attachments) {
10959
11143
  }
10960
11144
  const deduped = /* @__PURE__ */ new Set();
10961
11145
  for (const attachment of attachments) {
10962
- deduped.add(path33.resolve(attachment));
11146
+ deduped.add(path34.resolve(attachment));
10963
11147
  }
10964
11148
  return Array.from(deduped);
10965
11149
  }
@@ -10968,7 +11152,7 @@ function mergeAttachments(all) {
10968
11152
  for (const list of all) {
10969
11153
  if (!list) continue;
10970
11154
  for (const inputFile of list) {
10971
- deduped.add(path33.resolve(inputFile));
11155
+ deduped.add(path34.resolve(inputFile));
10972
11156
  }
10973
11157
  }
10974
11158
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -11016,8 +11200,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
11016
11200
 
11017
11201
  // src/evaluation/providers/targets-file.ts
11018
11202
  import { constants as constants4 } from "node:fs";
11019
- import { access as access4, readFile as readFile13 } from "node:fs/promises";
11020
- import path34 from "node:path";
11203
+ import { access as access4, readFile as readFile14 } from "node:fs/promises";
11204
+ import path35 from "node:path";
11021
11205
  import { parse as parse5 } from "yaml";
11022
11206
  function isRecord(value) {
11023
11207
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -11057,11 +11241,11 @@ async function fileExists3(filePath) {
11057
11241
  }
11058
11242
  }
11059
11243
  async function readTargetDefinitions(filePath) {
11060
- const absolutePath = path34.resolve(filePath);
11244
+ const absolutePath = path35.resolve(filePath);
11061
11245
  if (!await fileExists3(absolutePath)) {
11062
11246
  throw new Error(`targets.yaml not found at ${absolutePath}`);
11063
11247
  }
11064
- const raw = await readFile13(absolutePath, "utf8");
11248
+ const raw = await readFile14(absolutePath, "utf8");
11065
11249
  const parsed = parse5(raw);
11066
11250
  if (!isRecord(parsed)) {
11067
11251
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -11077,16 +11261,16 @@ function listTargetNames(definitions) {
11077
11261
  }
11078
11262
 
11079
11263
  // src/evaluation/providers/provider-discovery.ts
11080
- import path35 from "node:path";
11264
+ import path36 from "node:path";
11081
11265
  import fg from "fast-glob";
11082
11266
  async function discoverProviders(registry, baseDir) {
11083
11267
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
11084
11268
  const candidateDirs = [];
11085
- let dir = path35.resolve(baseDir);
11086
- const root = path35.parse(dir).root;
11269
+ let dir = path36.resolve(baseDir);
11270
+ const root = path36.parse(dir).root;
11087
11271
  while (dir !== root) {
11088
- candidateDirs.push(path35.join(dir, ".agentv", "providers"));
11089
- dir = path35.dirname(dir);
11272
+ candidateDirs.push(path36.join(dir, ".agentv", "providers"));
11273
+ dir = path36.dirname(dir);
11090
11274
  }
11091
11275
  let files = [];
11092
11276
  for (const providersDir of candidateDirs) {
@@ -11102,7 +11286,7 @@ async function discoverProviders(registry, baseDir) {
11102
11286
  }
11103
11287
  const discoveredKinds = [];
11104
11288
  for (const filePath of files) {
11105
- const basename = path35.basename(filePath);
11289
+ const basename = path36.basename(filePath);
11106
11290
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
11107
11291
  if (registry.has(kindName)) {
11108
11292
  continue;
@@ -11767,7 +11951,7 @@ import { generateText as generateText3 } from "ai";
11767
11951
 
11768
11952
  // src/evaluation/evaluators/llm-grader.ts
11769
11953
  import fs2 from "node:fs/promises";
11770
- import path36 from "node:path";
11954
+ import path37 from "node:path";
11771
11955
  import { generateText as generateText2, stepCountIs, tool } from "ai";
11772
11956
  import { z as z3 } from "zod";
11773
11957
  var DEFAULT_MAX_STEPS = 10;
@@ -12801,8 +12985,8 @@ function toAiSdkImageParts(images) {
12801
12985
  }));
12802
12986
  }
12803
12987
  function resolveSandboxed(basePath, relativePath) {
12804
- const resolved = path36.resolve(basePath, relativePath);
12805
- if (!resolved.startsWith(basePath + path36.sep) && resolved !== basePath) {
12988
+ const resolved = path37.resolve(basePath, relativePath);
12989
+ if (!resolved.startsWith(basePath + path37.sep) && resolved !== basePath) {
12806
12990
  throw new Error(`Path '${relativePath}' is outside the workspace`);
12807
12991
  }
12808
12992
  return resolved;
@@ -12835,11 +13019,11 @@ function createFilesystemTools(workspacePath) {
12835
13019
  execute: async (input) => {
12836
13020
  try {
12837
13021
  const resolved = resolveSandboxed(workspacePath, input.path);
12838
- const stat12 = await fs2.stat(resolved);
12839
- if (stat12.isDirectory()) {
13022
+ const stat13 = await fs2.stat(resolved);
13023
+ if (stat13.isDirectory()) {
12840
13024
  return { error: `'${input.path}' is a directory, not a file` };
12841
13025
  }
12842
- const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
13026
+ const buffer = Buffer.alloc(Math.min(stat13.size, MAX_FILE_SIZE));
12843
13027
  const fd = await fs2.open(resolved, "r");
12844
13028
  try {
12845
13029
  await fd.read(buffer, 0, buffer.length, 0);
@@ -12847,8 +13031,8 @@ function createFilesystemTools(workspacePath) {
12847
13031
  await fd.close();
12848
13032
  }
12849
13033
  const content = buffer.toString("utf-8");
12850
- const truncated = stat12.size > MAX_FILE_SIZE;
12851
- return { content, truncated, size: stat12.size };
13034
+ const truncated = stat13.size > MAX_FILE_SIZE;
13035
+ return { content, truncated, size: stat13.size };
12852
13036
  } catch (error) {
12853
13037
  return { error: error instanceof Error ? error.message : String(error) };
12854
13038
  }
@@ -12892,15 +13076,15 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
12892
13076
  for (const entry of entries) {
12893
13077
  if (matches.length >= MAX_SEARCH_MATCHES) return;
12894
13078
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
12895
- const fullPath = path36.join(dirPath, entry.name);
13079
+ const fullPath = path37.join(dirPath, entry.name);
12896
13080
  if (entry.isDirectory()) {
12897
13081
  await searchDirectory(fullPath, workspacePath, regex, matches);
12898
13082
  } else if (entry.isFile()) {
12899
- const ext = path36.extname(entry.name).toLowerCase();
13083
+ const ext = path37.extname(entry.name).toLowerCase();
12900
13084
  if (BINARY_EXTENSIONS.has(ext)) continue;
12901
13085
  try {
12902
- const stat12 = await fs2.stat(fullPath);
12903
- if (stat12.size > MAX_FILE_SIZE) continue;
13086
+ const stat13 = await fs2.stat(fullPath);
13087
+ if (stat13.size > MAX_FILE_SIZE) continue;
12904
13088
  const content = await fs2.readFile(fullPath, "utf-8");
12905
13089
  const lines = content.split("\n");
12906
13090
  for (let i = 0; i < lines.length; i++) {
@@ -12908,7 +13092,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
12908
13092
  regex.lastIndex = 0;
12909
13093
  if (regex.test(lines[i])) {
12910
13094
  matches.push({
12911
- file: path36.relative(workspacePath, fullPath),
13095
+ file: path37.relative(workspacePath, fullPath),
12912
13096
  line: i + 1,
12913
13097
  text: lines[i].substring(0, 200)
12914
13098
  });
@@ -14868,7 +15052,7 @@ function runEqualsAssertion(output, value) {
14868
15052
  // src/evaluation/orchestrator.ts
14869
15053
  import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
14870
15054
  import { existsSync as existsSync5 } from "node:fs";
14871
- import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir7, stat as stat8 } from "node:fs/promises";
15055
+ import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir8, stat as stat9 } from "node:fs/promises";
14872
15056
  import path45 from "node:path";
14873
15057
  import micromatch3 from "micromatch";
14874
15058
 
@@ -15083,7 +15267,7 @@ var InlineAssertEvaluator = class {
15083
15267
  };
15084
15268
 
15085
15269
  // src/evaluation/evaluators/prompt-resolution.ts
15086
- import path37 from "node:path";
15270
+ import path38 from "node:path";
15087
15271
  async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
15088
15272
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
15089
15273
  if (!context) {
@@ -15135,7 +15319,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
15135
15319
  };
15136
15320
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
15137
15321
  const scriptPath = script[script.length - 1];
15138
- const cwd = path37.dirname(scriptPath);
15322
+ const cwd = path38.dirname(scriptPath);
15139
15323
  try {
15140
15324
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
15141
15325
  const prompt = stdout.trim();
@@ -15418,16 +15602,16 @@ function createBuiltinRegistry() {
15418
15602
  }
15419
15603
 
15420
15604
  // src/evaluation/registry/assertion-discovery.ts
15421
- import path38 from "node:path";
15605
+ import path39 from "node:path";
15422
15606
  import fg2 from "fast-glob";
15423
15607
  async function discoverAssertions(registry, baseDir) {
15424
15608
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
15425
15609
  const candidateDirs = [];
15426
- let dir = path38.resolve(baseDir);
15427
- const root = path38.parse(dir).root;
15610
+ let dir = path39.resolve(baseDir);
15611
+ const root = path39.parse(dir).root;
15428
15612
  while (dir !== root) {
15429
- candidateDirs.push(path38.join(dir, ".agentv", "assertions"));
15430
- dir = path38.dirname(dir);
15613
+ candidateDirs.push(path39.join(dir, ".agentv", "assertions"));
15614
+ dir = path39.dirname(dir);
15431
15615
  }
15432
15616
  let files = [];
15433
15617
  for (const assertionsDir of candidateDirs) {
@@ -15443,7 +15627,7 @@ async function discoverAssertions(registry, baseDir) {
15443
15627
  }
15444
15628
  const discoveredTypes = [];
15445
15629
  for (const filePath of files) {
15446
- const basename = path38.basename(filePath);
15630
+ const basename = path39.basename(filePath);
15447
15631
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
15448
15632
  if (registry.has(typeName)) {
15449
15633
  continue;
@@ -15461,17 +15645,17 @@ async function discoverAssertions(registry, baseDir) {
15461
15645
  }
15462
15646
 
15463
15647
  // src/evaluation/registry/grader-discovery.ts
15464
- import path39 from "node:path";
15648
+ import path40 from "node:path";
15465
15649
  import fg3 from "fast-glob";
15466
15650
  async function discoverGraders(registry, baseDir) {
15467
15651
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
15468
15652
  const candidateDirs = [];
15469
- let dir = path39.resolve(baseDir);
15470
- const root = path39.parse(dir).root;
15653
+ let dir = path40.resolve(baseDir);
15654
+ const root = path40.parse(dir).root;
15471
15655
  while (dir !== root) {
15472
- candidateDirs.push(path39.join(dir, ".agentv", "graders"));
15473
- candidateDirs.push(path39.join(dir, ".agentv", "judges"));
15474
- dir = path39.dirname(dir);
15656
+ candidateDirs.push(path40.join(dir, ".agentv", "graders"));
15657
+ candidateDirs.push(path40.join(dir, ".agentv", "judges"));
15658
+ dir = path40.dirname(dir);
15475
15659
  }
15476
15660
  let files = [];
15477
15661
  for (const gradersDir of candidateDirs) {
@@ -15487,7 +15671,7 @@ async function discoverGraders(registry, baseDir) {
15487
15671
  }
15488
15672
  const discoveredTypes = [];
15489
15673
  for (const filePath of files) {
15490
- const basename = path39.basename(filePath);
15674
+ const basename = path40.basename(filePath);
15491
15675
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
15492
15676
  if (registry.has(typeName)) {
15493
15677
  continue;
@@ -15644,57 +15828,8 @@ function getTCritical(df) {
15644
15828
  return T_TABLE_95[df - 1];
15645
15829
  }
15646
15830
 
15647
- // src/evaluation/workspace/file-changes.ts
15648
- import { exec as execCallback } from "node:child_process";
15649
- import { readdirSync as readdirSync2, statSync } from "node:fs";
15650
- import path40 from "node:path";
15651
- import { promisify as promisify4 } from "node:util";
15652
- var execAsync4 = promisify4(execCallback);
15653
- function gitExecOpts(workspacePath) {
15654
- const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
15655
- return { cwd: workspacePath, env };
15656
- }
15657
- async function initializeBaseline(workspacePath) {
15658
- const opts = gitExecOpts(workspacePath);
15659
- await execAsync4("git init", opts);
15660
- await execAsync4("git add -A", opts);
15661
- await execAsync4(
15662
- 'git -c user.email=agentv@localhost -c user.name=agentv commit --allow-empty -m "agentv-baseline"',
15663
- opts
15664
- );
15665
- const { stdout } = await execAsync4("git rev-parse HEAD", opts);
15666
- return stdout.trim();
15667
- }
15668
- async function captureFileChanges(workspacePath, baselineCommit) {
15669
- const opts = gitExecOpts(workspacePath);
15670
- await stageNestedRepoChanges(workspacePath);
15671
- await execAsync4("git add -A", opts);
15672
- const { stdout } = await execAsync4(`git diff ${baselineCommit} --submodule=diff`, opts);
15673
- return stdout.trim();
15674
- }
15675
- async function stageNestedRepoChanges(workspacePath) {
15676
- let entries;
15677
- try {
15678
- entries = readdirSync2(workspacePath);
15679
- } catch {
15680
- return;
15681
- }
15682
- for (const entry of entries) {
15683
- if (entry === ".git" || entry === "node_modules") continue;
15684
- const childPath = path40.join(workspacePath, entry);
15685
- try {
15686
- if (!statSync(childPath).isDirectory()) continue;
15687
- if (!statSync(path40.join(childPath, ".git")).isDirectory()) continue;
15688
- } catch {
15689
- continue;
15690
- }
15691
- const childOpts = gitExecOpts(childPath);
15692
- await execAsync4("git add -A", childOpts);
15693
- }
15694
- }
15695
-
15696
15831
  // src/evaluation/workspace/manager.ts
15697
- import { cp, mkdir as mkdir12, readdir as readdir4, rm as rm4, stat as stat6 } from "node:fs/promises";
15832
+ import { cp, mkdir as mkdir12, readdir as readdir5, rm as rm4, stat as stat7 } from "node:fs/promises";
15698
15833
  import path41 from "node:path";
15699
15834
  var TemplateNotFoundError = class extends Error {
15700
15835
  constructor(templatePath) {
@@ -15717,7 +15852,7 @@ var WorkspaceCreationError = class extends Error {
15717
15852
  };
15718
15853
  async function isDirectory(filePath) {
15719
15854
  try {
15720
- const stats = await stat6(filePath);
15855
+ const stats = await stat7(filePath);
15721
15856
  return stats.isDirectory();
15722
15857
  } catch {
15723
15858
  return false;
@@ -15729,7 +15864,7 @@ function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
15729
15864
  }
15730
15865
  async function copyDirectoryRecursive(src, dest) {
15731
15866
  await mkdir12(dest, { recursive: true });
15732
- const entries = await readdir4(src, { withFileTypes: true });
15867
+ const entries = await readdir5(src, { withFileTypes: true });
15733
15868
  for (const entry of entries) {
15734
15869
  const srcPath = path41.join(src, entry.name);
15735
15870
  const destPath = path41.join(dest, entry.name);
@@ -15803,7 +15938,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
15803
15938
  import { execFile } from "node:child_process";
15804
15939
  import { createHash } from "node:crypto";
15805
15940
  import { existsSync as existsSync3 } from "node:fs";
15806
- import { cp as cp2, mkdir as mkdir13, readFile as readFile14, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
15941
+ import { cp as cp2, mkdir as mkdir13, readFile as readFile15, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
15807
15942
  import path42 from "node:path";
15808
15943
  import { promisify as promisify5 } from "node:util";
15809
15944
  var execFileAsync = promisify5(execFile);
@@ -15858,7 +15993,7 @@ function computeWorkspaceFingerprint(repos) {
15858
15993
  }
15859
15994
  async function copyDirectoryRecursive2(src, dest, skipDirs) {
15860
15995
  await mkdir13(dest, { recursive: true });
15861
- const entries = await readdir5(src, { withFileTypes: true });
15996
+ const entries = await readdir6(src, { withFileTypes: true });
15862
15997
  for (const entry of entries) {
15863
15998
  const srcPath = path42.join(src, entry.name);
15864
15999
  const destPath = path42.join(dest, entry.name);
@@ -15966,7 +16101,7 @@ var WorkspacePoolManager = class {
15966
16101
  throw err;
15967
16102
  }
15968
16103
  try {
15969
- const pidStr = await readFile14(lockPath, "utf-8");
16104
+ const pidStr = await readFile15(lockPath, "utf-8");
15970
16105
  const pid = Number.parseInt(pidStr.trim(), 10);
15971
16106
  if (!Number.isNaN(pid)) {
15972
16107
  try {
@@ -15993,7 +16128,7 @@ var WorkspacePoolManager = class {
15993
16128
  async checkDrift(poolDir, fingerprint) {
15994
16129
  const metadataPath = path42.join(poolDir, "metadata.json");
15995
16130
  try {
15996
- const raw = await readFile14(metadataPath, "utf-8");
16131
+ const raw = await readFile15(metadataPath, "utf-8");
15997
16132
  const metadata = JSON.parse(raw);
15998
16133
  return metadata.fingerprint !== fingerprint;
15999
16134
  } catch {
@@ -16012,13 +16147,13 @@ var WorkspacePoolManager = class {
16012
16147
  }
16013
16148
  /** Remove all slot directories and their lock files from a pool directory. */
16014
16149
  async removeAllSlots(poolDir) {
16015
- const entries = await readdir5(poolDir);
16150
+ const entries = await readdir6(poolDir);
16016
16151
  for (const entry of entries) {
16017
16152
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
16018
16153
  const lockPath = path42.join(poolDir, `${entry}.lock`);
16019
16154
  if (existsSync3(lockPath)) {
16020
16155
  try {
16021
- const pidStr = await readFile14(lockPath, "utf-8");
16156
+ const pidStr = await readFile15(lockPath, "utf-8");
16022
16157
  const pid = Number.parseInt(pidStr.trim(), 10);
16023
16158
  if (!Number.isNaN(pid)) {
16024
16159
  try {
@@ -16293,14 +16428,14 @@ ${lines.join("\n")}`;
16293
16428
  };
16294
16429
 
16295
16430
  // src/evaluation/workspace/resolve.ts
16296
- import { readdir as readdir6, stat as stat7 } from "node:fs/promises";
16431
+ import { readdir as readdir7, stat as stat8 } from "node:fs/promises";
16297
16432
  import path44 from "node:path";
16298
16433
  async function resolveWorkspaceTemplate(templatePath) {
16299
16434
  if (!templatePath) {
16300
16435
  return void 0;
16301
16436
  }
16302
16437
  const resolved = path44.resolve(templatePath);
16303
- const stats = await stat7(resolved);
16438
+ const stats = await stat8(resolved);
16304
16439
  if (stats.isFile()) {
16305
16440
  return {
16306
16441
  dir: path44.dirname(resolved),
@@ -16310,7 +16445,7 @@ async function resolveWorkspaceTemplate(templatePath) {
16310
16445
  if (!stats.isDirectory()) {
16311
16446
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
16312
16447
  }
16313
- const entries = await readdir6(resolved);
16448
+ const entries = await readdir7(resolved);
16314
16449
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
16315
16450
  if (workspaceFiles.length === 1) {
16316
16451
  return {
@@ -16416,6 +16551,100 @@ function getWorkspaceTemplate(target) {
16416
16551
  }
16417
16552
  return void 0;
16418
16553
  }
16554
+ function validateDependencyGraph(tests) {
16555
+ const ids = /* @__PURE__ */ new Set();
16556
+ for (const test of tests) {
16557
+ if (ids.has(test.id)) {
16558
+ throw new Error(`Duplicate test ID '${test.id}' \u2014 each test must have a unique ID`);
16559
+ }
16560
+ ids.add(test.id);
16561
+ }
16562
+ for (const test of tests) {
16563
+ if (!test.depends_on) continue;
16564
+ for (const dep of test.depends_on) {
16565
+ if (!ids.has(dep)) {
16566
+ throw new Error(
16567
+ `Test '${test.id}' depends on '${dep}', but no test with that ID exists in this suite`
16568
+ );
16569
+ }
16570
+ if (dep === test.id) {
16571
+ throw new Error(`Test '${test.id}' depends on itself`);
16572
+ }
16573
+ }
16574
+ }
16575
+ const depMap = /* @__PURE__ */ new Map();
16576
+ for (const test of tests) {
16577
+ if (test.depends_on && test.depends_on.length > 0) {
16578
+ depMap.set(test.id, test.depends_on);
16579
+ }
16580
+ }
16581
+ const visited = /* @__PURE__ */ new Set();
16582
+ const visiting = /* @__PURE__ */ new Set();
16583
+ function visit(id, path53) {
16584
+ if (visiting.has(id)) {
16585
+ const cycle = [...path53.slice(path53.indexOf(id)), id];
16586
+ throw new Error(`Circular dependency detected: ${cycle.join(" \u2192 ")}`);
16587
+ }
16588
+ if (visited.has(id)) return;
16589
+ visiting.add(id);
16590
+ path53.push(id);
16591
+ for (const dep of depMap.get(id) ?? []) {
16592
+ visit(dep, path53);
16593
+ }
16594
+ path53.pop();
16595
+ visiting.delete(id);
16596
+ visited.add(id);
16597
+ }
16598
+ for (const test of tests) {
16599
+ visit(test.id, []);
16600
+ }
16601
+ }
16602
+ function computeWaves(tests) {
16603
+ const hasDeps = tests.some((t) => t.depends_on && t.depends_on.length > 0);
16604
+ if (!hasDeps) {
16605
+ return [tests.slice()];
16606
+ }
16607
+ const inDegree = /* @__PURE__ */ new Map();
16608
+ const dependents = /* @__PURE__ */ new Map();
16609
+ const testById = /* @__PURE__ */ new Map();
16610
+ for (const test of tests) {
16611
+ testById.set(test.id, test);
16612
+ inDegree.set(test.id, 0);
16613
+ }
16614
+ for (const test of tests) {
16615
+ if (!test.depends_on) continue;
16616
+ inDegree.set(test.id, test.depends_on.length);
16617
+ for (const dep of test.depends_on) {
16618
+ const list = dependents.get(dep) ?? [];
16619
+ list.push(test.id);
16620
+ dependents.set(dep, list);
16621
+ }
16622
+ }
16623
+ const waves = [];
16624
+ let ready = tests.filter((t) => (inDegree.get(t.id) ?? 0) === 0);
16625
+ while (ready.length > 0) {
16626
+ waves.push(ready);
16627
+ const nextReady = [];
16628
+ for (const test of ready) {
16629
+ for (const depId of dependents.get(test.id) ?? []) {
16630
+ const newDeg = (inDegree.get(depId) ?? 1) - 1;
16631
+ inDegree.set(depId, newDeg);
16632
+ if (newDeg === 0) {
16633
+ const depTest = testById.get(depId);
16634
+ if (depTest) nextReady.push(depTest);
16635
+ }
16636
+ }
16637
+ }
16638
+ ready = nextReady;
16639
+ }
16640
+ const totalScheduled = waves.reduce((sum, w) => sum + w.length, 0);
16641
+ if (totalScheduled !== tests.length) {
16642
+ throw new Error(
16643
+ `Internal error: ${tests.length - totalScheduled} tests were not scheduled (possible undetected cycle)`
16644
+ );
16645
+ }
16646
+ return waves;
16647
+ }
16419
16648
  async function runEvaluation(options) {
16420
16649
  const {
16421
16650
  testFilePath: evalFilePath,
@@ -16683,11 +16912,11 @@ async function runEvaluation(options) {
16683
16912
  let staticMaterialised = false;
16684
16913
  const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
16685
16914
  if (useStaticWorkspace && configuredStaticPath) {
16686
- const dirExists = await stat8(configuredStaticPath).then(
16915
+ const dirExists = await stat9(configuredStaticPath).then(
16687
16916
  (s) => s.isDirectory(),
16688
16917
  () => false
16689
16918
  );
16690
- const isEmpty = dirExists ? (await readdir7(configuredStaticPath)).length === 0 : false;
16919
+ const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
16691
16920
  if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
16692
16921
  if (!dirExists) {
16693
16922
  await mkdir14(configuredStaticPath, { recursive: true });
@@ -16739,10 +16968,41 @@ async function runEvaluation(options) {
16739
16968
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
16740
16969
  }
16741
16970
  try {
16971
+ let toDependencyResult2 = function(r) {
16972
+ const outputText = extractLastAssistantContent(r.output);
16973
+ return {
16974
+ score: r.score,
16975
+ output: outputText,
16976
+ workspace_path: r.workspacePath,
16977
+ details: r.scores ? Object.fromEntries(
16978
+ r.scores.map((s) => [s.name, { score: s.score, verdict: s.verdict }])
16979
+ ) : void 0,
16980
+ status: r.executionStatus === "ok" ? "passed" : r.executionStatus === "execution_error" ? "error" : "failed"
16981
+ };
16982
+ }, checkDependencies2 = function(evalCase) {
16983
+ const depResults = {};
16984
+ if (!evalCase.depends_on || evalCase.depends_on.length === 0) {
16985
+ return { ok: true, depResults };
16986
+ }
16987
+ let allPassed = true;
16988
+ for (const depId of evalCase.depends_on) {
16989
+ const depResult = completedResults.get(depId);
16990
+ if (depResult) {
16991
+ depResults[depId] = toDependencyResult2(depResult);
16992
+ if (depResult.executionStatus === "execution_error") {
16993
+ allPassed = false;
16994
+ }
16995
+ } else {
16996
+ allPassed = false;
16997
+ }
16998
+ }
16999
+ return { ok: allPassed, depResults };
17000
+ };
17001
+ var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2;
16742
17002
  if (suiteWorkspaceFile && sharedWorkspacePath) {
16743
17003
  const copiedWorkspaceFile = path45.join(sharedWorkspacePath, path45.basename(suiteWorkspaceFile));
16744
17004
  try {
16745
- await stat8(copiedWorkspaceFile);
17005
+ await stat9(copiedWorkspaceFile);
16746
17006
  suiteWorkspaceFile = copiedWorkspaceFile;
16747
17007
  } catch {
16748
17008
  }
@@ -16849,8 +17109,9 @@ async function runEvaluation(options) {
16849
17109
  try {
16850
17110
  sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
16851
17111
  setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
16852
- } catch {
16853
- setupLog("shared baseline initialization skipped (non-fatal)");
17112
+ } catch (error) {
17113
+ const message = error instanceof Error ? error.message : String(error);
17114
+ setupLog(`shared baseline initialization failed (file_changes unavailable): ${message}`);
16854
17115
  }
16855
17116
  }
16856
17117
  if (availablePoolSlots.length > 0) {
@@ -16859,8 +17120,11 @@ async function runEvaluation(options) {
16859
17120
  const baseline = await initializeBaseline(slot.path);
16860
17121
  poolSlotBaselines.set(slot.path, baseline);
16861
17122
  setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
16862
- } catch {
16863
- setupLog(`pool slot ${slot.index} baseline initialization skipped (non-fatal)`);
17123
+ } catch (error) {
17124
+ const message = error instanceof Error ? error.message : String(error);
17125
+ setupLog(
17126
+ `pool slot ${slot.index} baseline initialization failed (file_changes unavailable): ${message}`
17127
+ );
16864
17128
  }
16865
17129
  }
16866
17130
  }
@@ -16870,204 +17134,259 @@ async function runEvaluation(options) {
16870
17134
  let cumulativeBudgetCost = 0;
16871
17135
  let budgetExhausted = false;
16872
17136
  let failOnErrorTriggered = false;
16873
- const promises = filteredEvalCases.map(
16874
- (evalCase) => limit(async () => {
16875
- const workerId = nextWorkerId++;
16876
- workerIdByEvalId.set(evalCase.id, workerId);
16877
- if (totalBudgetUsd !== void 0 && budgetExhausted) {
16878
- const budgetResult = {
16879
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
17137
+ validateDependencyGraph(filteredEvalCases);
17138
+ const waves = computeWaves(filteredEvalCases);
17139
+ const completedResults = /* @__PURE__ */ new Map();
17140
+ const results = [];
17141
+ async function dispatchTest(evalCase, depResults) {
17142
+ const workerId = nextWorkerId++;
17143
+ workerIdByEvalId.set(evalCase.id, workerId);
17144
+ if (totalBudgetUsd !== void 0 && budgetExhausted) {
17145
+ const budgetResult = {
17146
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
17147
+ testId: evalCase.id,
17148
+ suite: evalCase.suite,
17149
+ category: evalCase.category,
17150
+ score: 0,
17151
+ assertions: [],
17152
+ output: [],
17153
+ target: target.name,
17154
+ error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
17155
+ budgetExceeded: true,
17156
+ executionStatus: "execution_error",
17157
+ failureStage: "setup",
17158
+ failureReasonCode: "budget_exceeded",
17159
+ executionError: {
17160
+ message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
17161
+ stage: "setup"
17162
+ }
17163
+ };
17164
+ if (onProgress) {
17165
+ await onProgress({
17166
+ workerId,
16880
17167
  testId: evalCase.id,
16881
- suite: evalCase.suite,
16882
- category: evalCase.category,
16883
- score: 0,
16884
- assertions: [],
16885
- output: [],
16886
- target: target.name,
16887
- error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
16888
- budgetExceeded: true,
16889
- executionStatus: "execution_error",
16890
- failureStage: "setup",
16891
- failureReasonCode: "budget_exceeded",
16892
- executionError: {
16893
- message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
16894
- stage: "setup"
17168
+ status: "failed",
17169
+ completedAt: Date.now(),
17170
+ error: budgetResult.error,
17171
+ score: budgetResult.score,
17172
+ executionStatus: budgetResult.executionStatus
17173
+ });
17174
+ }
17175
+ if (onResult) {
17176
+ await onResult(budgetResult);
17177
+ }
17178
+ return budgetResult;
17179
+ }
17180
+ if (failOnError === true && failOnErrorTriggered) {
17181
+ const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
17182
+ const haltResult = {
17183
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
17184
+ testId: evalCase.id,
17185
+ suite: evalCase.suite,
17186
+ category: evalCase.category,
17187
+ score: 0,
17188
+ assertions: [],
17189
+ output: [],
17190
+ target: target.name,
17191
+ error: errorMsg,
17192
+ executionStatus: "execution_error",
17193
+ failureStage: "setup",
17194
+ failureReasonCode: "error_threshold_exceeded",
17195
+ executionError: { message: errorMsg, stage: "setup" }
17196
+ };
17197
+ if (onProgress) {
17198
+ await onProgress({
17199
+ workerId,
17200
+ testId: evalCase.id,
17201
+ status: "failed",
17202
+ completedAt: Date.now(),
17203
+ error: haltResult.error,
17204
+ score: haltResult.score,
17205
+ executionStatus: haltResult.executionStatus
17206
+ });
17207
+ }
17208
+ if (onResult) {
17209
+ await onResult(haltResult);
17210
+ }
17211
+ return haltResult;
17212
+ }
17213
+ if (onProgress) {
17214
+ await onProgress({
17215
+ workerId,
17216
+ testId: evalCase.id,
17217
+ status: "running",
17218
+ startedAt: Date.now()
17219
+ });
17220
+ }
17221
+ const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
17222
+ const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
17223
+ const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
17224
+ try {
17225
+ const graderProvider = await resolveGraderProvider(target);
17226
+ const runCaseOptions = {
17227
+ evalCase,
17228
+ provider: primaryProvider,
17229
+ target,
17230
+ evaluators: evaluatorRegistry,
17231
+ maxRetries,
17232
+ agentTimeoutMs,
17233
+ cache,
17234
+ useCache,
17235
+ now,
17236
+ graderProvider,
17237
+ targetResolver,
17238
+ availableTargets,
17239
+ evalRunId,
17240
+ keepWorkspaces,
17241
+ cleanupWorkspaces,
17242
+ retainOnSuccess: resolvedRetainOnSuccess,
17243
+ retainOnFailure: resolvedRetainOnFailure,
17244
+ sharedWorkspacePath: testWorkspacePath,
17245
+ sharedBaselineCommit: testBaselineCommit,
17246
+ suiteWorkspaceFile,
17247
+ streamCallbacks,
17248
+ typeRegistry,
17249
+ repoManager,
17250
+ evalDir,
17251
+ verbose,
17252
+ threshold: scoreThreshold,
17253
+ ...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
17254
+ };
17255
+ let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
17256
+ if (totalBudgetUsd !== void 0) {
17257
+ let caseCost;
17258
+ if (result.trials && result.trials.length > 0) {
17259
+ const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
17260
+ if (trialCostSum > 0) {
17261
+ caseCost = trialCostSum;
16895
17262
  }
16896
- };
16897
- if (onProgress) {
16898
- await onProgress({
16899
- workerId,
16900
- testId: evalCase.id,
16901
- status: "failed",
16902
- completedAt: Date.now(),
16903
- error: budgetResult.error,
16904
- score: budgetResult.score,
16905
- executionStatus: budgetResult.executionStatus
16906
- });
17263
+ } else {
17264
+ caseCost = result.costUsd;
16907
17265
  }
16908
- if (onResult) {
16909
- await onResult(budgetResult);
17266
+ if (caseCost !== void 0) {
17267
+ cumulativeBudgetCost += caseCost;
17268
+ if (cumulativeBudgetCost >= totalBudgetUsd) {
17269
+ budgetExhausted = true;
17270
+ }
16910
17271
  }
16911
- return budgetResult;
16912
17272
  }
16913
- if (failOnError === true && failOnErrorTriggered) {
16914
- const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
16915
- const haltResult = {
16916
- timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
17273
+ if (failOnError === true && result.executionStatus === "execution_error") {
17274
+ failOnErrorTriggered = true;
17275
+ }
17276
+ if (beforeAllOutput && !beforeAllOutputAttached) {
17277
+ result = { ...result, beforeAllOutput };
17278
+ beforeAllOutputAttached = true;
17279
+ }
17280
+ if (onProgress) {
17281
+ await onProgress({
17282
+ workerId,
16917
17283
  testId: evalCase.id,
16918
- suite: evalCase.suite,
16919
- category: evalCase.category,
16920
- score: 0,
16921
- assertions: [],
16922
- output: [],
16923
- target: target.name,
16924
- error: errorMsg,
16925
- executionStatus: "execution_error",
16926
- failureStage: "setup",
16927
- failureReasonCode: "error_threshold_exceeded",
16928
- executionError: { message: errorMsg, stage: "setup" }
16929
- };
16930
- if (onProgress) {
16931
- await onProgress({
16932
- workerId,
16933
- testId: evalCase.id,
16934
- status: "failed",
16935
- completedAt: Date.now(),
16936
- error: haltResult.error,
16937
- score: haltResult.score,
16938
- executionStatus: haltResult.executionStatus
16939
- });
16940
- }
16941
- if (onResult) {
16942
- await onResult(haltResult);
16943
- }
16944
- return haltResult;
17284
+ status: result.error ? "failed" : "completed",
17285
+ startedAt: 0,
17286
+ // Not used for completed status
17287
+ completedAt: Date.now(),
17288
+ error: result.error,
17289
+ score: result.score,
17290
+ executionStatus: result.executionStatus
17291
+ });
16945
17292
  }
17293
+ if (onResult) {
17294
+ await onResult(result);
17295
+ }
17296
+ return result;
17297
+ } catch (error) {
16946
17298
  if (onProgress) {
16947
17299
  await onProgress({
16948
17300
  workerId,
16949
17301
  testId: evalCase.id,
16950
- status: "running",
16951
- startedAt: Date.now()
17302
+ status: "failed",
17303
+ completedAt: Date.now(),
17304
+ error: error instanceof Error ? error.message : String(error)
16952
17305
  });
16953
17306
  }
16954
- const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
16955
- const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
16956
- const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
16957
- try {
16958
- const graderProvider = await resolveGraderProvider(target);
16959
- const runCaseOptions = {
16960
- evalCase,
16961
- provider: primaryProvider,
16962
- target,
16963
- evaluators: evaluatorRegistry,
16964
- maxRetries,
16965
- agentTimeoutMs,
16966
- cache,
16967
- useCache,
16968
- now,
16969
- graderProvider,
16970
- targetResolver,
16971
- availableTargets,
16972
- evalRunId,
16973
- keepWorkspaces,
16974
- cleanupWorkspaces,
16975
- retainOnSuccess: resolvedRetainOnSuccess,
16976
- retainOnFailure: resolvedRetainOnFailure,
16977
- sharedWorkspacePath: testWorkspacePath,
16978
- sharedBaselineCommit: testBaselineCommit,
16979
- suiteWorkspaceFile,
16980
- streamCallbacks,
16981
- typeRegistry,
16982
- repoManager,
16983
- evalDir,
16984
- verbose,
16985
- threshold: scoreThreshold
16986
- };
16987
- let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
16988
- if (totalBudgetUsd !== void 0) {
16989
- let caseCost;
16990
- if (result.trials && result.trials.length > 0) {
16991
- const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
16992
- if (trialCostSum > 0) {
16993
- caseCost = trialCostSum;
16994
- }
16995
- } else {
16996
- caseCost = result.costUsd;
16997
- }
16998
- if (caseCost !== void 0) {
16999
- cumulativeBudgetCost += caseCost;
17000
- if (cumulativeBudgetCost >= totalBudgetUsd) {
17001
- budgetExhausted = true;
17307
+ throw error;
17308
+ } finally {
17309
+ if (testPoolSlot) {
17310
+ availablePoolSlots.push(testPoolSlot);
17311
+ }
17312
+ }
17313
+ }
17314
+ for (const wave of waves) {
17315
+ const wavePromises = wave.map(
17316
+ (evalCase) => limit(async () => {
17317
+ if (evalCase.depends_on && evalCase.depends_on.length > 0) {
17318
+ const { ok, depResults } = checkDependencies2(evalCase);
17319
+ if (!ok) {
17320
+ const policy = evalCase.on_dependency_failure ?? "skip";
17321
+ if (policy === "skip" || policy === "fail") {
17322
+ const failedDeps = evalCase.depends_on.filter(
17323
+ (d) => completedResults.get(d)?.executionStatus === "execution_error"
17324
+ );
17325
+ const prefix = policy === "skip" ? "Skipped" : "Failed";
17326
+ const errorMsg = `${prefix}: dependency failed (${failedDeps.join(", ")})`;
17327
+ const depFailResult = {
17328
+ timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
17329
+ testId: evalCase.id,
17330
+ suite: evalCase.suite,
17331
+ category: evalCase.category,
17332
+ score: 0,
17333
+ assertions: [],
17334
+ output: [],
17335
+ target: target.name,
17336
+ error: errorMsg,
17337
+ executionStatus: "execution_error",
17338
+ failureStage: "setup",
17339
+ failureReasonCode: "dependency_failed",
17340
+ executionError: { message: errorMsg, stage: "setup" }
17341
+ };
17342
+ if (onProgress) {
17343
+ await onProgress({
17344
+ workerId: nextWorkerId++,
17345
+ testId: evalCase.id,
17346
+ status: "failed",
17347
+ completedAt: Date.now(),
17348
+ error: depFailResult.error,
17349
+ score: 0,
17350
+ executionStatus: depFailResult.executionStatus
17351
+ });
17352
+ }
17353
+ if (onResult) {
17354
+ await onResult(depFailResult);
17355
+ }
17356
+ return depFailResult;
17002
17357
  }
17003
17358
  }
17359
+ return dispatchTest(evalCase, depResults);
17004
17360
  }
17005
- if (failOnError === true && result.executionStatus === "execution_error") {
17006
- failOnErrorTriggered = true;
17007
- }
17008
- if (beforeAllOutput && !beforeAllOutputAttached) {
17009
- result = { ...result, beforeAllOutput };
17010
- beforeAllOutputAttached = true;
17011
- }
17012
- if (onProgress) {
17013
- await onProgress({
17014
- workerId,
17015
- testId: evalCase.id,
17016
- status: result.error ? "failed" : "completed",
17017
- startedAt: 0,
17018
- // Not used for completed status
17019
- completedAt: Date.now(),
17020
- error: result.error,
17021
- score: result.score,
17022
- executionStatus: result.executionStatus
17023
- });
17024
- }
17361
+ return dispatchTest(evalCase);
17362
+ })
17363
+ );
17364
+ const settled = await Promise.allSettled(wavePromises);
17365
+ for (let i = 0; i < settled.length; i++) {
17366
+ const outcome = settled[i];
17367
+ const evalCase = wave[i];
17368
+ if (outcome.status === "fulfilled") {
17369
+ completedResults.set(evalCase.id, outcome.value);
17370
+ results.push(outcome.value);
17371
+ } else {
17372
+ const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
17373
+ const promptInputs = await buildPromptInputs(evalCase, formattingMode);
17374
+ const errorResult = buildErrorResult(
17375
+ evalCase,
17376
+ target.name,
17377
+ (now ?? (() => /* @__PURE__ */ new Date()))(),
17378
+ outcome.reason,
17379
+ promptInputs,
17380
+ primaryProvider,
17381
+ "agent",
17382
+ "provider_error",
17383
+ verbose
17384
+ );
17385
+ completedResults.set(evalCase.id, errorResult);
17386
+ results.push(errorResult);
17025
17387
  if (onResult) {
17026
- await onResult(result);
17388
+ await onResult(errorResult);
17027
17389
  }
17028
- return result;
17029
- } catch (error) {
17030
- if (onProgress) {
17031
- await onProgress({
17032
- workerId,
17033
- testId: evalCase.id,
17034
- status: "failed",
17035
- completedAt: Date.now(),
17036
- error: error instanceof Error ? error.message : String(error)
17037
- });
17038
- }
17039
- throw error;
17040
- } finally {
17041
- if (testPoolSlot) {
17042
- availablePoolSlots.push(testPoolSlot);
17043
- }
17044
- }
17045
- })
17046
- );
17047
- const settled = await Promise.allSettled(promises);
17048
- const results = [];
17049
- for (let i = 0; i < settled.length; i++) {
17050
- const outcome = settled[i];
17051
- if (outcome.status === "fulfilled") {
17052
- results.push(outcome.value);
17053
- } else {
17054
- const evalCase = filteredEvalCases[i];
17055
- const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
17056
- const promptInputs = await buildPromptInputs(evalCase, formattingMode);
17057
- const errorResult = buildErrorResult(
17058
- evalCase,
17059
- target.name,
17060
- (now ?? (() => /* @__PURE__ */ new Date()))(),
17061
- outcome.reason,
17062
- promptInputs,
17063
- primaryProvider,
17064
- "agent",
17065
- "provider_error",
17066
- verbose
17067
- );
17068
- results.push(errorResult);
17069
- if (onResult) {
17070
- await onResult(errorResult);
17071
17390
  }
17072
17391
  }
17073
17392
  }
@@ -17312,7 +17631,8 @@ async function runEvalCase(options) {
17312
17631
  repoManager,
17313
17632
  evalDir,
17314
17633
  verbose,
17315
- threshold: caseThreshold
17634
+ threshold: caseThreshold,
17635
+ dependencyResults
17316
17636
  } = options;
17317
17637
  const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
17318
17638
  const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
@@ -17356,7 +17676,7 @@ async function runEvalCase(options) {
17356
17676
  if (caseWorkspaceFile && workspacePath) {
17357
17677
  const copiedFile = path45.join(workspacePath, path45.basename(caseWorkspaceFile));
17358
17678
  try {
17359
- await stat8(copiedFile);
17679
+ await stat9(copiedFile);
17360
17680
  caseWorkspaceFile = copiedFile;
17361
17681
  } catch {
17362
17682
  }
@@ -17520,7 +17840,11 @@ async function runEvalCase(options) {
17520
17840
  if (!baselineCommit && workspacePath) {
17521
17841
  try {
17522
17842
  baselineCommit = await initializeBaseline(workspacePath);
17523
- } catch {
17843
+ } catch (error) {
17844
+ const message = error instanceof Error ? error.message : String(error);
17845
+ if (verbose) {
17846
+ console.warn(`[setup] test=${evalCase.id} baseline initialization failed: ${message}`);
17847
+ }
17524
17848
  }
17525
17849
  }
17526
17850
  const caseStartMs = Date.now();
@@ -17631,6 +17955,11 @@ async function runEvalCase(options) {
17631
17955
  } catch {
17632
17956
  }
17633
17957
  }
17958
+ const providerFileChanges = providerResponse?.fileChanges;
17959
+ if (providerFileChanges) {
17960
+ fileChanges = fileChanges ? `${fileChanges}
17961
+ ${providerFileChanges}` : providerFileChanges;
17962
+ }
17634
17963
  const providerError = extractProviderError(providerResponse);
17635
17964
  if (caseHooksEnabled && repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
17636
17965
  try {
@@ -17688,7 +18017,8 @@ async function runEvalCase(options) {
17688
18017
  workspacePath,
17689
18018
  dockerConfig: evalCase.workspace?.docker,
17690
18019
  verbose,
17691
- threshold: evalCase.threshold ?? caseThreshold
18020
+ threshold: evalCase.threshold ?? caseThreshold,
18021
+ dependencyResults
17692
18022
  });
17693
18023
  const effectiveThreshold = evalCase.threshold ?? caseThreshold;
17694
18024
  const totalDurationMs = Date.now() - caseStartMs;
@@ -17881,7 +18211,8 @@ async function evaluateCandidate(options) {
17881
18211
  fileChanges,
17882
18212
  workspacePath,
17883
18213
  dockerConfig,
17884
- threshold: evalThreshold
18214
+ threshold: evalThreshold,
18215
+ dependencyResults
17885
18216
  } = options;
17886
18217
  const gradeTimestamp = nowFn();
17887
18218
  const { score, scores } = await runEvaluatorsForCase({
@@ -17908,7 +18239,8 @@ async function evaluateCandidate(options) {
17908
18239
  fileChanges,
17909
18240
  workspacePath,
17910
18241
  dockerConfig,
17911
- threshold: evalThreshold
18242
+ threshold: evalThreshold,
18243
+ dependencyResults
17912
18244
  });
17913
18245
  const completedAt = nowFn();
17914
18246
  let agentRequest;
@@ -17984,7 +18316,8 @@ async function runEvaluatorsForCase(options) {
17984
18316
  fileChanges,
17985
18317
  workspacePath,
17986
18318
  dockerConfig,
17987
- threshold
18319
+ threshold,
18320
+ dependencyResults
17988
18321
  } = options;
17989
18322
  if (evalCase.assertions && evalCase.assertions.length > 0) {
17990
18323
  return runEvaluatorList({
@@ -18012,7 +18345,8 @@ async function runEvaluatorsForCase(options) {
18012
18345
  fileChanges,
18013
18346
  workspacePath,
18014
18347
  dockerConfig,
18015
- threshold
18348
+ threshold,
18349
+ dependencyResults
18016
18350
  });
18017
18351
  }
18018
18352
  const evaluatorKind = evalCase.evaluator ?? "llm-grader";
@@ -18042,6 +18376,7 @@ async function runEvaluatorsForCase(options) {
18042
18376
  fileChanges,
18043
18377
  workspacePath,
18044
18378
  dockerConfig,
18379
+ dependencyResults,
18045
18380
  ...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
18046
18381
  });
18047
18382
  return { score };
@@ -18081,7 +18416,8 @@ async function runEvaluatorList(options) {
18081
18416
  availableTargets,
18082
18417
  fileChanges,
18083
18418
  workspacePath,
18084
- dockerConfig
18419
+ dockerConfig,
18420
+ dependencyResults
18085
18421
  } = options;
18086
18422
  const scored = [];
18087
18423
  const scores = [];
@@ -18105,7 +18441,8 @@ async function runEvaluatorList(options) {
18105
18441
  availableTargets,
18106
18442
  fileChanges,
18107
18443
  workspacePath,
18108
- dockerConfig
18444
+ dockerConfig,
18445
+ dependencyResults
18109
18446
  };
18110
18447
  const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
18111
18448
  const dispatchContext = {
@@ -18824,7 +19161,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
18824
19161
  }
18825
19162
 
18826
19163
  // src/evaluation/workspace/deps-scanner.ts
18827
- import { readFile as readFile15 } from "node:fs/promises";
19164
+ import { readFile as readFile16 } from "node:fs/promises";
18828
19165
  import path47 from "node:path";
18829
19166
  import { parse as parse6 } from "yaml";
18830
19167
  function normalizeGitUrl(url) {
@@ -18872,7 +19209,7 @@ async function scanRepoDeps(evalFilePaths) {
18872
19209
  return { repos: [...seen.values()], errors };
18873
19210
  }
18874
19211
  async function extractReposFromEvalFile(filePath) {
18875
- const content = await readFile15(filePath, "utf8");
19212
+ const content = await readFile16(filePath, "utf8");
18876
19213
  const parsed = interpolateEnv(parse6(content), process.env);
18877
19214
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
18878
19215
  const obj = parsed;
@@ -18893,7 +19230,7 @@ async function extractReposFromEvalFile(filePath) {
18893
19230
  async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
18894
19231
  if (typeof raw === "string") {
18895
19232
  const workspaceFilePath = path47.resolve(evalFileDir, raw);
18896
- const content = await readFile15(workspaceFilePath, "utf8");
19233
+ const content = await readFile16(workspaceFilePath, "utf8");
18897
19234
  const parsed = interpolateEnv(parse6(content), process.env);
18898
19235
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
18899
19236
  return extractReposFromObject(parsed);
@@ -18921,7 +19258,7 @@ function extractReposFromObject(obj) {
18921
19258
  }
18922
19259
 
18923
19260
  // src/evaluation/cache/response-cache.ts
18924
- import { mkdir as mkdir15, readFile as readFile16, writeFile as writeFile8 } from "node:fs/promises";
19261
+ import { mkdir as mkdir15, readFile as readFile17, writeFile as writeFile8 } from "node:fs/promises";
18925
19262
  import path48 from "node:path";
18926
19263
  var DEFAULT_CACHE_PATH = ".agentv/cache";
18927
19264
  var ResponseCache = class {
@@ -18932,7 +19269,7 @@ var ResponseCache = class {
18932
19269
  async get(key) {
18933
19270
  const filePath = this.keyToPath(key);
18934
19271
  try {
18935
- const data = await readFile16(filePath, "utf8");
19272
+ const data = await readFile17(filePath, "utf8");
18936
19273
  return JSON.parse(data);
18937
19274
  } catch {
18938
19275
  return void 0;
@@ -18964,7 +19301,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
18964
19301
  // src/evaluation/results-repo.ts
18965
19302
  import { execFile as execFile3 } from "node:child_process";
18966
19303
  import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync } from "node:fs";
18967
- import { cp as cp3, mkdtemp as mkdtemp3, readdir as readdir8, rm as rm6, stat as stat9 } from "node:fs/promises";
19304
+ import { cp as cp3, mkdtemp as mkdtemp3, readdir as readdir9, rm as rm6, stat as stat10 } from "node:fs/promises";
18968
19305
  import os3 from "node:os";
18969
19306
  import path49 from "node:path";
18970
19307
  import { promisify as promisify7 } from "node:util";
@@ -19187,12 +19524,12 @@ function resolveResultsRepoRunsDir(config) {
19187
19524
  );
19188
19525
  }
19189
19526
  async function directorySizeBytes(targetPath) {
19190
- const entry = await stat9(targetPath);
19527
+ const entry = await stat10(targetPath);
19191
19528
  if (entry.isFile()) {
19192
19529
  return entry.size;
19193
19530
  }
19194
19531
  let total = 0;
19195
- for (const child of await readdir8(targetPath, { withFileTypes: true })) {
19532
+ for (const child of await readdir9(targetPath, { withFileTypes: true })) {
19196
19533
  total += await directorySizeBytes(path49.join(targetPath, child.name));
19197
19534
  }
19198
19535
  return total;
@@ -20259,17 +20596,17 @@ function extractResponseItemContent(content) {
20259
20596
  }
20260
20597
 
20261
20598
  // src/import/codex-session-discovery.ts
20262
- import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
20263
- import { homedir as homedir4 } from "node:os";
20599
+ import { readdir as readdir10, stat as stat11 } from "node:fs/promises";
20600
+ import { homedir as homedir5 } from "node:os";
20264
20601
  import path51 from "node:path";
20265
- var DEFAULT_SESSIONS_DIR = () => path51.join(homedir4(), ".codex", "sessions");
20602
+ var DEFAULT_SESSIONS_DIR = () => path51.join(homedir5(), ".codex", "sessions");
20266
20603
  async function discoverCodexSessions(opts) {
20267
20604
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
20268
20605
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
20269
20606
  const sessions = [];
20270
20607
  let yearDirs;
20271
20608
  try {
20272
- yearDirs = await readdir9(sessionsDir);
20609
+ yearDirs = await readdir10(sessionsDir);
20273
20610
  } catch {
20274
20611
  return [];
20275
20612
  }
@@ -20277,7 +20614,7 @@ async function discoverCodexSessions(opts) {
20277
20614
  const yearPath = path51.join(sessionsDir, year);
20278
20615
  let monthDirs;
20279
20616
  try {
20280
- monthDirs = await readdir9(yearPath);
20617
+ monthDirs = await readdir10(yearPath);
20281
20618
  } catch {
20282
20619
  continue;
20283
20620
  }
@@ -20285,7 +20622,7 @@ async function discoverCodexSessions(opts) {
20285
20622
  const monthPath = path51.join(yearPath, month);
20286
20623
  let dayDirs;
20287
20624
  try {
20288
- dayDirs = await readdir9(monthPath);
20625
+ dayDirs = await readdir10(monthPath);
20289
20626
  } catch {
20290
20627
  continue;
20291
20628
  }
@@ -20297,7 +20634,7 @@ async function discoverCodexSessions(opts) {
20297
20634
  const dayPath = path51.join(monthPath, day);
20298
20635
  let files;
20299
20636
  try {
20300
- files = await readdir9(dayPath);
20637
+ files = await readdir10(dayPath);
20301
20638
  } catch {
20302
20639
  continue;
20303
20640
  }
@@ -20309,7 +20646,7 @@ async function discoverCodexSessions(opts) {
20309
20646
  const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
20310
20647
  let updatedAt;
20311
20648
  try {
20312
- const fileStat = await stat10(filePath);
20649
+ const fileStat = await stat11(filePath);
20313
20650
  updatedAt = fileStat.mtime;
20314
20651
  } catch {
20315
20652
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -20324,10 +20661,10 @@ async function discoverCodexSessions(opts) {
20324
20661
  }
20325
20662
 
20326
20663
  // src/import/session-discovery.ts
20327
- import { readdir as readdir10, stat as stat11 } from "node:fs/promises";
20328
- import { homedir as homedir5 } from "node:os";
20664
+ import { readdir as readdir11, stat as stat12 } from "node:fs/promises";
20665
+ import { homedir as homedir6 } from "node:os";
20329
20666
  import path52 from "node:path";
20330
- var DEFAULT_PROJECTS_DIR = () => path52.join(homedir5(), ".claude", "projects");
20667
+ var DEFAULT_PROJECTS_DIR = () => path52.join(homedir6(), ".claude", "projects");
20331
20668
  function encodeProjectPath(projectPath) {
20332
20669
  return projectPath.replace(/\//g, "-");
20333
20670
  }
@@ -20336,7 +20673,7 @@ async function discoverClaudeSessions(opts) {
20336
20673
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
20337
20674
  let projectDirs;
20338
20675
  try {
20339
- projectDirs = await readdir10(projectsDir);
20676
+ projectDirs = await readdir11(projectsDir);
20340
20677
  } catch {
20341
20678
  return [];
20342
20679
  }
@@ -20349,7 +20686,7 @@ async function discoverClaudeSessions(opts) {
20349
20686
  const dirPath = path52.join(projectsDir, projectDir);
20350
20687
  let entries;
20351
20688
  try {
20352
- entries = await readdir10(dirPath);
20689
+ entries = await readdir11(dirPath);
20353
20690
  } catch {
20354
20691
  continue;
20355
20692
  }
@@ -20360,7 +20697,7 @@ async function discoverClaudeSessions(opts) {
20360
20697
  const filePath = path52.join(dirPath, entry);
20361
20698
  let updatedAt;
20362
20699
  try {
20363
- const fileStat = await stat11(filePath);
20700
+ const fileStat = await stat12(filePath);
20364
20701
  updatedAt = fileStat.mtime;
20365
20702
  } catch {
20366
20703
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -20378,7 +20715,7 @@ async function discoverClaudeSessions(opts) {
20378
20715
  }
20379
20716
 
20380
20717
  // src/import/types.ts
20381
- import { readFile as readFile17 } from "node:fs/promises";
20718
+ import { readFile as readFile18 } from "node:fs/promises";
20382
20719
  function toTranscriptJsonLine(entry) {
20383
20720
  const firstUserMessage = entry.messages.find((m) => m.role === "user");
20384
20721
  const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
@@ -20404,11 +20741,11 @@ function toTranscriptJsonLine(entry) {
20404
20741
  };
20405
20742
  }
20406
20743
  async function readTranscriptJsonl(filePath) {
20407
- const text = await readFile17(filePath, "utf8");
20744
+ const text = await readFile18(filePath, "utf8");
20408
20745
  return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
20409
20746
  }
20410
20747
  async function readTranscriptFile(filePath) {
20411
- return readFile17(filePath, "utf8");
20748
+ return readFile18(filePath, "utf8");
20412
20749
  }
20413
20750
 
20414
20751
  // src/import/transcript-provider.ts