agentv 4.10.0 → 4.11.2-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/{chunk-XOSNETAV.js → chunk-BAUNAXHT.js} +1 -1
  2. package/dist/chunk-BPGJ4HBU.js +183 -0
  3. package/dist/chunk-BPGJ4HBU.js.map +1 -0
  4. package/dist/{chunk-KF6BABQ5.js → chunk-DHWFLK6T.js} +1090 -303
  5. package/dist/chunk-DHWFLK6T.js.map +1 -0
  6. package/dist/{chunk-SE73HJZG.js → chunk-FQGY6QXQ.js} +780 -346
  7. package/dist/chunk-FQGY6QXQ.js.map +1 -0
  8. package/dist/chunk-NPVGBFF6.js +151 -0
  9. package/dist/chunk-NPVGBFF6.js.map +1 -0
  10. package/dist/{chunk-VA64NETD.js → chunk-YLVQNF23.js} +1120 -731
  11. package/dist/chunk-YLVQNF23.js.map +1 -0
  12. package/dist/cli.js +6 -4
  13. package/dist/cli.js.map +1 -1
  14. package/dist/{dist-XDNB4WDT.js → dist-HNSXNRVK.js} +36 -3
  15. package/dist/docker-workspace-RPPXBT27-B4AQHVWA.js +11 -0
  16. package/dist/{esm-CZAWIY6F.js → esm-UYZ3HJBU.js} +2 -2
  17. package/dist/esm-UYZ3HJBU.js.map +1 -0
  18. package/dist/exec-AR6JUUN5-6MBPURPR.js +11 -0
  19. package/dist/exec-AR6JUUN5-6MBPURPR.js.map +1 -0
  20. package/dist/index.js +6 -4
  21. package/dist/{interactive-SNKK6VCV.js → interactive-OPHUF2UP.js} +6 -4
  22. package/dist/{interactive-SNKK6VCV.js.map → interactive-OPHUF2UP.js.map} +1 -1
  23. package/dist/{src-ML4D2MC2.js → src-PXDA7QIS.js} +2 -2
  24. package/dist/studio/assets/index-Bi-KHfNm.js +65 -0
  25. package/dist/studio/assets/index-D_j-w4UO.css +1 -0
  26. package/dist/studio/assets/{index-DcwjOyrk.js → index-VyDFrnoK.js} +1 -1
  27. package/dist/studio/index.html +2 -2
  28. package/package.json +1 -1
  29. package/dist/chunk-KF6BABQ5.js.map +0 -1
  30. package/dist/chunk-SE73HJZG.js.map +0 -1
  31. package/dist/chunk-VA64NETD.js.map +0 -1
  32. package/dist/studio/assets/index-DHxVz6M9.css +0 -1
  33. package/dist/studio/assets/index-Y5InSvcS.js +0 -65
  34. /package/dist/{chunk-XOSNETAV.js.map → chunk-BAUNAXHT.js.map} +0 -0
  35. /package/dist/{dist-XDNB4WDT.js.map → dist-HNSXNRVK.js.map} +0 -0
  36. /package/dist/{esm-CZAWIY6F.js.map → docker-workspace-RPPXBT27-B4AQHVWA.js.map} +0 -0
  37. /package/dist/{src-ML4D2MC2.js.map → src-PXDA7QIS.js.map} +0 -0
@@ -1,7 +1,17 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
+ import {
3
+ execFileWithStdin,
4
+ execShellWithStdin
5
+ } from "./chunk-NPVGBFF6.js";
2
6
  import {
3
7
  require_token_error
4
8
  } from "./chunk-HQDCIXVH.js";
9
+ import {
10
+ SpanStatusCode,
11
+ context,
12
+ init_esm,
13
+ trace
14
+ } from "./chunk-LRULMAAA.js";
5
15
  import {
6
16
  AISDKError,
7
17
  APICallError,
@@ -150,12 +160,6 @@ import {
150
160
  withoutTrailingSlash,
151
161
  zodSchema
152
162
  } from "./chunk-ZKO2LGRR.js";
153
- import {
154
- SpanStatusCode,
155
- context,
156
- init_esm,
157
- trace
158
- } from "./chunk-LRULMAAA.js";
159
163
  import {
160
164
  __commonJS,
161
165
  __export,
@@ -301,7 +305,7 @@ var require_dist = __commonJS({
301
305
  }
302
306
  });
303
307
 
304
- // ../../packages/core/dist/chunk-BWHUWLGW.js
308
+ // ../../packages/core/dist/chunk-5POFMJJ7.js
305
309
  import { constants } from "node:fs";
306
310
  import { access, readFile } from "node:fs/promises";
307
311
  import path from "node:path";
@@ -419,7 +423,7 @@ __export(external_exports2, {
419
423
  void: () => voidType
420
424
  });
421
425
 
422
- // ../../packages/core/dist/chunk-BWHUWLGW.js
426
+ // ../../packages/core/dist/chunk-5POFMJJ7.js
423
427
  import { readFile as readFile2 } from "node:fs/promises";
424
428
  import path3 from "node:path";
425
429
  import fg from "fast-glob";
@@ -2308,10 +2312,10 @@ async function expandFileReferences(tests, evalFileDir) {
2308
2312
  }
2309
2313
 
2310
2314
  // ../../packages/core/dist/index.js
2311
- import { readFile as readFile7 } from "node:fs/promises";
2315
+ import { readFile as readFile8 } from "node:fs/promises";
2312
2316
  import path8 from "node:path";
2313
2317
  import micromatch2 from "micromatch";
2314
- import { parse as parse2 } from "yaml";
2318
+ import { parse as parse3 } from "yaml";
2315
2319
  import { readFile as readFile3 } from "node:fs/promises";
2316
2320
  import path4 from "node:path";
2317
2321
  import { readFile as readFile22 } from "node:fs/promises";
@@ -2321,20 +2325,22 @@ import { constants as constants2 } from "node:fs";
2321
2325
  import { access as access2 } from "node:fs/promises";
2322
2326
  import path22 from "node:path";
2323
2327
  import { fileURLToPath } from "node:url";
2328
+ import { readFile as readFile5 } from "node:fs/promises";
2324
2329
  import path5 from "node:path";
2330
+ import { parse as parse2 } from "yaml";
2325
2331
  import { readFile as readFile32 } from "node:fs/promises";
2326
2332
  import path42 from "node:path";
2327
2333
  import { fileURLToPath as fileURLToPath2 } from "node:url";
2328
2334
  import { readFile as readFile4 } from "node:fs/promises";
2329
- import { readFile as readFile6 } from "node:fs/promises";
2335
+ import { readFile as readFile7 } from "node:fs/promises";
2330
2336
  import path7 from "node:path";
2331
2337
  import micromatch from "micromatch";
2332
2338
  import { parse as parseYaml2 } from "yaml";
2333
- import { readFile as readFile5 } from "node:fs/promises";
2339
+ import { readFile as readFile6 } from "node:fs/promises";
2334
2340
  import path6 from "node:path";
2335
2341
  import { readFileSync } from "node:fs";
2336
2342
  import path9 from "node:path";
2337
- import { parse as parse3 } from "yaml";
2343
+ import { parse as parse4 } from "yaml";
2338
2344
  import { createOpenAI } from "@ai-sdk/openai";
2339
2345
 
2340
2346
  // ../../node_modules/.bun/@openrouter+ai-sdk-provider@2.3.3+3ab978b6804fd9e7/node_modules/@openrouter/ai-sdk-provider/dist/index.mjs
@@ -6944,7 +6950,7 @@ function createOpenRouter(options = {}) {
6944
6950
  );
6945
6951
  const createChatModel = (modelId, settings = {}) => new OpenRouterChatLanguageModel(modelId, settings, {
6946
6952
  provider: "openrouter.chat",
6947
- url: ({ path: path52 }) => `${baseURL}${path52}`,
6953
+ url: ({ path: path53 }) => `${baseURL}${path53}`,
6948
6954
  headers: getHeaders,
6949
6955
  compatibility,
6950
6956
  fetch: options.fetch,
@@ -6952,7 +6958,7 @@ function createOpenRouter(options = {}) {
6952
6958
  });
6953
6959
  const createCompletionModel = (modelId, settings = {}) => new OpenRouterCompletionLanguageModel(modelId, settings, {
6954
6960
  provider: "openrouter.completion",
6955
- url: ({ path: path52 }) => `${baseURL}${path52}`,
6961
+ url: ({ path: path53 }) => `${baseURL}${path53}`,
6956
6962
  headers: getHeaders,
6957
6963
  compatibility,
6958
6964
  fetch: options.fetch,
@@ -6960,14 +6966,14 @@ function createOpenRouter(options = {}) {
6960
6966
  });
6961
6967
  const createEmbeddingModel = (modelId, settings = {}) => new OpenRouterEmbeddingModel(modelId, settings, {
6962
6968
  provider: "openrouter.embedding",
6963
- url: ({ path: path52 }) => `${baseURL}${path52}`,
6969
+ url: ({ path: path53 }) => `${baseURL}${path53}`,
6964
6970
  headers: getHeaders,
6965
6971
  fetch: options.fetch,
6966
6972
  extraBody: options.extraBody
6967
6973
  });
6968
6974
  const createImageModel = (modelId, settings = {}) => new OpenRouterImageModel(modelId, settings, {
6969
6975
  provider: "openrouter.image",
6970
- url: ({ path: path52 }) => `${baseURL}${path52}`,
6976
+ url: ({ path: path53 }) => `${baseURL}${path53}`,
6971
6977
  headers: getHeaders,
6972
6978
  fetch: options.fetch,
6973
6979
  extraBody: options.extraBody
@@ -14467,10 +14473,10 @@ import { createWriteStream as createWriteStream4, existsSync, readdirSync } from
14467
14473
  import { arch, platform } from "node:os";
14468
14474
  import path15 from "node:path";
14469
14475
  import { fileURLToPath as fileURLToPath3 } from "node:url";
14470
- import { readFile as readFile9 } from "node:fs/promises";
14476
+ import { readFile as readFile10 } from "node:fs/promises";
14471
14477
  import { homedir as homedir2 } from "node:os";
14472
14478
  import path18 from "node:path";
14473
- import { readFile as readFile8, readdir, stat } from "node:fs/promises";
14479
+ import { readFile as readFile9, readdir, stat } from "node:fs/promises";
14474
14480
  import { homedir } from "node:os";
14475
14481
  import path17 from "node:path";
14476
14482
  import { parse as parseYaml22 } from "yaml";
@@ -14504,23 +14510,23 @@ import { access as access22, mkdir as mkdir8, readdir as readdir2, rm as rm2, st
14504
14510
  import path23 from "node:path";
14505
14511
  import path24 from "node:path";
14506
14512
  import path25 from "node:path";
14507
- import { readFile as readFile10 } from "node:fs/promises";
14513
+ import { readFile as readFile11 } from "node:fs/promises";
14508
14514
  import path26 from "node:path";
14509
14515
  import { exec, spawn as spawn4 } from "node:child_process";
14510
14516
  import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
14511
14517
  import path28 from "node:path";
14512
14518
  import { promisify as promisify2 } from "node:util";
14513
14519
  import path27 from "node:path";
14514
- import { copyFile, mkdir as mkdir10, readFile as readFile11, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
14520
+ import { copyFile, mkdir as mkdir10, readFile as readFile12, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
14515
14521
  import path30 from "node:path";
14516
14522
  import path29 from "node:path";
14517
14523
  import JSON5 from "json5";
14518
14524
  import { writeFile as writeFile5 } from "node:fs/promises";
14519
14525
  import path322 from "node:path";
14520
14526
  import { constants as constants4 } from "node:fs";
14521
- import { access as access4, readFile as readFile12 } from "node:fs/promises";
14527
+ import { access as access4, readFile as readFile13 } from "node:fs/promises";
14522
14528
  import path34 from "node:path";
14523
- import { parse as parse4 } from "yaml";
14529
+ import { parse as parse5 } from "yaml";
14524
14530
  import path35 from "node:path";
14525
14531
  import fg2 from "fast-glob";
14526
14532
  import { mkdtemp as mkdtemp2, rm as rm3, writeFile as writeFile6 } from "node:fs/promises";
@@ -14549,7 +14555,7 @@ import path41 from "node:path";
14549
14555
  import { execFile } from "node:child_process";
14550
14556
  import { createHash } from "node:crypto";
14551
14557
  import { existsSync as existsSync3 } from "node:fs";
14552
- import { cp as cp2, mkdir as mkdir13, readFile as readFile13, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
14558
+ import { cp as cp2, mkdir as mkdir13, readFile as readFile14, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
14553
14559
  import path422 from "node:path";
14554
14560
  import { promisify as promisify5 } from "node:util";
14555
14561
  import { execFile as execFile2 } from "node:child_process";
@@ -14560,21 +14566,27 @@ import { readdir as readdir6, stat as stat7 } from "node:fs/promises";
14560
14566
  import path44 from "node:path";
14561
14567
  import { existsSync as existsSync6 } from "node:fs";
14562
14568
  import path46 from "node:path";
14563
- import { readFile as readFile14 } from "node:fs/promises";
14569
+ import { readFile as readFile15 } from "node:fs/promises";
14564
14570
  import path47 from "node:path";
14565
- import { parse as parse5 } from "yaml";
14566
- import { mkdir as mkdir15, readFile as readFile15, writeFile as writeFile8 } from "node:fs/promises";
14571
+ import { parse as parse6 } from "yaml";
14572
+ import { mkdir as mkdir15, readFile as readFile16, writeFile as writeFile8 } from "node:fs/promises";
14567
14573
  import path48 from "node:path";
14568
- import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
14574
+ import { execFile as execFile3 } from "node:child_process";
14575
+ import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync } from "node:fs";
14576
+ import { cp as cp3, mkdtemp as mkdtemp3, readdir as readdir8, rm as rm6, stat as stat9 } from "node:fs/promises";
14577
+ import os3 from "node:os";
14569
14578
  import path49 from "node:path";
14570
- import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
14571
- import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
14572
- import { homedir as homedir3 } from "node:os";
14579
+ import { promisify as promisify7 } from "node:util";
14580
+ import { existsSync as existsSync8, mkdirSync as mkdirSync3, readFileSync as readFileSync4, readdirSync as readdirSync3, statSync as statSync2, writeFileSync as writeFileSync2 } from "node:fs";
14573
14581
  import path50 from "node:path";
14582
+ import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
14574
14583
  import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
14575
- import { homedir as homedir4 } from "node:os";
14584
+ import { homedir as homedir3 } from "node:os";
14576
14585
  import path51 from "node:path";
14577
- import { readFile as readFile16 } from "node:fs/promises";
14586
+ import { readdir as readdir10, stat as stat11 } from "node:fs/promises";
14587
+ import { homedir as homedir4 } from "node:os";
14588
+ import path52 from "node:path";
14589
+ import { readFile as readFile17 } from "node:fs/promises";
14578
14590
  function computeTraceSummary(messages) {
14579
14591
  const toolCallCounts = {};
14580
14592
  const toolDurations = {};
@@ -14957,10 +14969,12 @@ async function loadConfig(evalFilePath, repoRoot) {
14957
14969
  parsed.execution,
14958
14970
  configPath
14959
14971
  );
14972
+ const results = parseResultsConfig(parsed.results, configPath);
14960
14973
  return {
14961
14974
  required_version: requiredVersion,
14962
14975
  eval_patterns: evalPatterns,
14963
- execution: executionDefaults
14976
+ execution: executionDefaults,
14977
+ results
14964
14978
  };
14965
14979
  } catch (error) {
14966
14980
  logWarning(
@@ -15195,150 +15209,63 @@ function parseExecutionDefaults(raw, configPath) {
15195
15209
  }
15196
15210
  return Object.keys(result).length > 0 ? result : void 0;
15197
15211
  }
15198
- function logWarning(message) {
15199
- console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET22}`);
15200
- }
15201
- function shellEscapePath(value) {
15202
- if (process.platform === "win32") {
15203
- return `"${value.replaceAll('"', '""')}"`;
15212
+ function parseResultsConfig(raw, configPath) {
15213
+ if (raw === void 0 || raw === null) {
15214
+ return void 0;
15204
15215
  }
15205
- return `'${value.replaceAll("'", `'"'"'`)}'`;
15206
- }
15207
- async function execFileWithStdin(argv, stdinPayload, options = {}) {
15208
- if (argv.length === 0) {
15209
- throw new Error("Executable argv must include at least one entry");
15216
+ if (typeof raw !== "object" || Array.isArray(raw)) {
15217
+ logWarning(`Invalid results in ${configPath}, expected object`);
15218
+ return void 0;
15210
15219
  }
15211
- if (typeof Bun !== "undefined") {
15212
- return execFileWithStdinBun(argv, stdinPayload, options);
15220
+ const obj = raw;
15221
+ const exportConfig = parseResultsExportConfig(obj.export, configPath);
15222
+ if (!exportConfig) {
15223
+ return void 0;
15213
15224
  }
15214
- return execFileWithStdinNode(argv, stdinPayload, options);
15225
+ return { export: exportConfig };
15215
15226
  }
15216
- async function execFileWithStdinBun(argv, stdinPayload, options) {
15217
- const command = [...argv];
15218
- const encoder = new TextEncoder();
15219
- const proc = Bun.spawn(command, {
15220
- cwd: options.cwd,
15221
- stdin: encoder.encode(stdinPayload),
15222
- stdout: "pipe",
15223
- stderr: "pipe",
15224
- // Merge additional env vars with process.env
15225
- env: options.env ? { ...process.env, ...options.env } : process.env
15226
- });
15227
- let timedOut = false;
15228
- const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
15229
- timedOut = true;
15230
- proc.kill("SIGKILL");
15231
- }, options.timeoutMs) : void 0;
15232
- try {
15233
- const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
15234
- const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
15235
- const [stdout, stderr, exitCode] = await Promise.all([
15236
- stdoutPromise,
15237
- stderrPromise,
15238
- proc.exited
15239
- ]);
15240
- if (timedOut) {
15241
- throw new Error(`Process timed out after ${options.timeoutMs}ms`);
15242
- }
15243
- return {
15244
- stdout: stdout.replace(/\r\n/g, "\n"),
15245
- stderr: stderr.replace(/\r\n/g, "\n"),
15246
- exitCode
15247
- };
15248
- } finally {
15249
- if (timeout !== void 0) {
15250
- clearTimeout(timeout);
15251
- }
15227
+ function parseResultsExportConfig(raw, configPath) {
15228
+ if (raw === void 0 || raw === null) {
15229
+ return void 0;
15252
15230
  }
15253
- }
15254
- async function execFileWithStdinNode(argv, stdinPayload, options) {
15255
- const { spawn: spawn5 } = await import("node:child_process");
15256
- return new Promise((resolve2, reject) => {
15257
- const [cmd, ...args] = argv;
15258
- const child = spawn5(cmd, args, {
15259
- cwd: options.cwd,
15260
- stdio: ["pipe", "pipe", "pipe"],
15261
- // Merge additional env vars with process.env
15262
- env: options.env ? { ...process.env, ...options.env } : process.env
15263
- });
15264
- const stdoutChunks = [];
15265
- const stderrChunks = [];
15266
- child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
15267
- child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
15268
- let timedOut = false;
15269
- const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
15270
- timedOut = true;
15271
- child.kill("SIGKILL");
15272
- }, options.timeoutMs) : void 0;
15273
- child.on("error", (error) => {
15274
- if (timeout !== void 0) clearTimeout(timeout);
15275
- reject(error);
15276
- });
15277
- child.on("close", (code) => {
15278
- if (timeout !== void 0) clearTimeout(timeout);
15279
- if (timedOut) {
15280
- reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
15281
- return;
15282
- }
15283
- const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
15284
- const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
15285
- resolve2({
15286
- stdout,
15287
- stderr,
15288
- exitCode: code ?? 0
15289
- });
15290
- });
15291
- if (child.stdin) {
15292
- child.stdin.write(stdinPayload);
15293
- child.stdin.end();
15231
+ if (typeof raw !== "object" || Array.isArray(raw)) {
15232
+ logWarning(`Invalid results.export in ${configPath}, expected object`);
15233
+ return void 0;
15234
+ }
15235
+ const obj = raw;
15236
+ const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
15237
+ const exportPath = typeof obj.path === "string" ? obj.path.trim() : "";
15238
+ if (!repo) {
15239
+ logWarning(`Invalid results.export.repo in ${configPath}, expected non-empty string`);
15240
+ return void 0;
15241
+ }
15242
+ if (!exportPath) {
15243
+ logWarning(`Invalid results.export.path in ${configPath}, expected non-empty string`);
15244
+ return void 0;
15245
+ }
15246
+ if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
15247
+ logWarning(`Invalid results.export.auto_push in ${configPath}, expected boolean`);
15248
+ return void 0;
15249
+ }
15250
+ let branchPrefix;
15251
+ if (obj.branch_prefix !== void 0) {
15252
+ if (typeof obj.branch_prefix !== "string" || obj.branch_prefix.trim().length === 0) {
15253
+ logWarning(
15254
+ `Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`
15255
+ );
15256
+ return void 0;
15294
15257
  }
15295
- });
15296
- }
15297
- async function execShellWithStdin(command, stdinPayload, options = {}) {
15298
- const { mkdir: mkdir16, readFile: readFile17, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
15299
- const { tmpdir: tmpdir3 } = await import("node:os");
15300
- const path52 = await import("node:path");
15301
- const { randomUUID: randomUUID10 } = await import("node:crypto");
15302
- const dir = path52.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
15303
- await mkdir16(dir, { recursive: true });
15304
- const stdinPath = path52.join(dir, "stdin.txt");
15305
- const stdoutPath = path52.join(dir, "stdout.txt");
15306
- const stderrPath = path52.join(dir, "stderr.txt");
15307
- await writeFile9(stdinPath, stdinPayload, "utf8");
15308
- const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
15309
- const { spawn: spawn5 } = await import("node:child_process");
15310
- try {
15311
- const exitCode = await new Promise((resolve2, reject) => {
15312
- const child = spawn5(wrappedCommand, {
15313
- shell: true,
15314
- cwd: options.cwd,
15315
- stdio: ["ignore", "ignore", "ignore"],
15316
- // Merge additional env vars with process.env
15317
- env: options.env ? { ...process.env, ...options.env } : process.env
15318
- });
15319
- const timeout = options.timeoutMs ? setTimeout(() => {
15320
- child.kill();
15321
- reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
15322
- }, options.timeoutMs) : void 0;
15323
- child.on("error", (error) => {
15324
- if (timeout !== void 0) {
15325
- clearTimeout(timeout);
15326
- }
15327
- reject(error);
15328
- });
15329
- child.on("exit", (code) => {
15330
- if (timeout !== void 0) {
15331
- clearTimeout(timeout);
15332
- }
15333
- resolve2(code ?? 0);
15334
- });
15335
- });
15336
- const stdout = (await readFile17(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
15337
- const stderr = (await readFile17(stderrPath, "utf8")).replace(/\r\n/g, "\n");
15338
- return { stdout, stderr, exitCode };
15339
- } finally {
15340
- await rm6(dir, { recursive: true, force: true });
15258
+ branchPrefix = obj.branch_prefix.trim();
15341
15259
  }
15260
+ return {
15261
+ repo,
15262
+ path: exportPath,
15263
+ ...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
15264
+ ...branchPrefix && { branch_prefix: branchPrefix }
15265
+ };
15266
+ }
15267
+ function logWarning(message) {
15268
+ console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET22}`);
15342
15269
  }
15343
15270
  var MIME_TYPE_ALIASES = {
15344
15271
  csv: "text/csv",
@@ -15571,6 +15498,7 @@ function validateTemplateVariables(content, source) {
15571
15498
  }
15572
15499
  var ANSI_YELLOW3 = "\x1B[33m";
15573
15500
  var ANSI_RESET4 = "\x1B[0m";
15501
+ var MAX_ASSERTION_INCLUDE_DEPTH = 3;
15574
15502
  var PROMPT_FILE_PREFIX = "file://";
15575
15503
  function normalizeEvaluatorType(type) {
15576
15504
  return type.replace(/_/g, "-");
@@ -15603,7 +15531,79 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
15603
15531
  const evaluators = [...parsedCase ?? [], ...parsedRoot ?? []];
15604
15532
  return evaluators.length > 0 ? evaluators : void 0;
15605
15533
  }
15606
- async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
15534
+ function isIncludeEntry(value) {
15535
+ return isJsonObject2(value) && typeof value.include === "string" && Object.keys(value).length === 1;
15536
+ }
15537
+ function isTemplateReference(value) {
15538
+ return !value.startsWith(".") && !value.includes("/") && !value.includes("\\");
15539
+ }
15540
+ async function resolveAssertionTemplateReference(include, searchRoots) {
15541
+ const templateCandidates = isTemplateReference(include) ? [
15542
+ path5.join(".agentv", "templates", `${include}.yaml`),
15543
+ path5.join(".agentv", "templates", `${include}.yml`)
15544
+ ] : [include];
15545
+ const attempted = [];
15546
+ for (const candidate of templateCandidates) {
15547
+ const resolved = await resolveFileReference22(candidate, searchRoots);
15548
+ attempted.push(...resolved.attempted);
15549
+ if (resolved.resolvedPath) {
15550
+ return {
15551
+ displayPath: resolved.displayPath,
15552
+ resolvedPath: resolved.resolvedPath,
15553
+ attempted
15554
+ };
15555
+ }
15556
+ }
15557
+ return {
15558
+ displayPath: templateCandidates[0] ?? include,
15559
+ resolvedPath: "",
15560
+ attempted
15561
+ };
15562
+ }
15563
+ async function loadAssertionTemplateEntries(include, searchRoots, evalId, includeContext) {
15564
+ const nextDepth = includeContext.depth + 1;
15565
+ if (nextDepth > MAX_ASSERTION_INCLUDE_DEPTH) {
15566
+ const chain = [...includeContext.chain, include].join(" -> ");
15567
+ throw new Error(
15568
+ `Assertion template include depth exceeded ${MAX_ASSERTION_INCLUDE_DEPTH} in '${evalId}'. Include chain: ${chain}`
15569
+ );
15570
+ }
15571
+ const resolved = await resolveAssertionTemplateReference(include, searchRoots);
15572
+ if (!resolved.resolvedPath) {
15573
+ const attempted = resolved.attempted.length > 0 ? `
15574
+ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
15575
+ throw new Error(
15576
+ `Assertion template not found in '${evalId}': ${resolved.displayPath}${attempted}`
15577
+ );
15578
+ }
15579
+ if (includeContext.chain.includes(resolved.resolvedPath)) {
15580
+ const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
15581
+ throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
15582
+ }
15583
+ const content = await readFile5(resolved.resolvedPath, "utf8");
15584
+ const parsed = interpolateEnv(parse2(content), process.env);
15585
+ if (!isJsonObject2(parsed)) {
15586
+ throw new Error(
15587
+ `Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} (expected a YAML object with an assertions array)`
15588
+ );
15589
+ }
15590
+ const assertions = parsed.assertions;
15591
+ if (!Array.isArray(assertions)) {
15592
+ throw new Error(
15593
+ `Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
15594
+ );
15595
+ }
15596
+ const templateDir = path5.dirname(resolved.resolvedPath);
15597
+ const nestedSearchRoots = [
15598
+ templateDir,
15599
+ ...searchRoots.filter((root) => path5.resolve(root) !== templateDir)
15600
+ ];
15601
+ return await expandEvaluatorEntries(assertions, nestedSearchRoots, evalId, {
15602
+ depth: nextDepth,
15603
+ chain: [...includeContext.chain, resolved.resolvedPath]
15604
+ }) ?? [];
15605
+ }
15606
+ async function expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId, includeContext = { depth: 0, chain: [] }) {
15607
15607
  if (candidateEvaluators === void 0) {
15608
15608
  return void 0;
15609
15609
  }
@@ -15611,13 +15611,34 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
15611
15611
  logWarning2(`Skipping evaluators for '${evalId}': expected array`);
15612
15612
  return void 0;
15613
15613
  }
15614
- const firstStringIndex = candidateEvaluators.findIndex((e) => typeof e === "string");
15615
- const processedEvaluators = firstStringIndex === -1 ? [...candidateEvaluators] : (() => {
15614
+ const expanded = [];
15615
+ for (const rawEvaluator of candidateEvaluators) {
15616
+ if (isIncludeEntry(rawEvaluator)) {
15617
+ const included = await loadAssertionTemplateEntries(
15618
+ rawEvaluator.include,
15619
+ searchRoots,
15620
+ evalId,
15621
+ includeContext
15622
+ );
15623
+ expanded.push(...included);
15624
+ continue;
15625
+ }
15626
+ expanded.push(rawEvaluator);
15627
+ }
15628
+ return expanded;
15629
+ }
15630
+ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
15631
+ const expandedEvaluators = await expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId);
15632
+ if (!expandedEvaluators) {
15633
+ return void 0;
15634
+ }
15635
+ const firstStringIndex = expandedEvaluators.findIndex((e) => typeof e === "string");
15636
+ const processedEvaluators = firstStringIndex === -1 ? [...expandedEvaluators] : (() => {
15616
15637
  const PLACEHOLDER = Symbol("rubric-placeholder");
15617
15638
  const strings = [];
15618
15639
  const result = [];
15619
15640
  let rubricInserted = false;
15620
- for (const item of candidateEvaluators) {
15641
+ for (const item of expandedEvaluators) {
15621
15642
  if (typeof item === "string") {
15622
15643
  const trimmed = item.trim();
15623
15644
  if (trimmed.length === 0) {
@@ -15832,8 +15853,16 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
15832
15853
  );
15833
15854
  continue;
15834
15855
  }
15856
+ const expandedMembers = await expandEvaluatorEntries(
15857
+ rawMembers,
15858
+ searchRoots,
15859
+ `${evalId}:${name21}`
15860
+ );
15861
+ if (!expandedMembers) {
15862
+ continue;
15863
+ }
15835
15864
  const memberEvaluators = [];
15836
- for (const rawMember of rawMembers) {
15865
+ for (const rawMember of expandedMembers) {
15837
15866
  if (!isJsonObject2(rawMember)) {
15838
15867
  logWarning2(`Skipping invalid member evaluator in composite '${name21}' (expected object)`);
15839
15868
  continue;
@@ -17268,7 +17297,7 @@ async function processMessages(options) {
17268
17297
  continue;
17269
17298
  }
17270
17299
  try {
17271
- const fileContent = (await readFile5(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
17300
+ const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
17272
17301
  processedContent.push({
17273
17302
  ...cloneJsonObject(rawSegment),
17274
17303
  path: displayPath,
@@ -17309,7 +17338,7 @@ async function processMessages(options) {
17309
17338
  continue;
17310
17339
  }
17311
17340
  try {
17312
- const imageBuffer = await readFile5(resolvedPath);
17341
+ const imageBuffer = await readFile6(resolvedPath);
17313
17342
  const base64 = imageBuffer.toString("base64");
17314
17343
  processedContent.push({
17315
17344
  type: "image",
@@ -17386,7 +17415,7 @@ async function processExpectedMessages(options) {
17386
17415
  continue;
17387
17416
  }
17388
17417
  try {
17389
- const fileContent = (await readFile5(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
17418
+ const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
17390
17419
  processedContent.push({
17391
17420
  type: "file",
17392
17421
  path: displayPath,
@@ -17426,7 +17455,7 @@ async function processExpectedMessages(options) {
17426
17455
  continue;
17427
17456
  }
17428
17457
  try {
17429
- const imageBuffer = await readFile5(resolvedPath);
17458
+ const imageBuffer = await readFile6(resolvedPath);
17430
17459
  const base64 = imageBuffer.toString("base64");
17431
17460
  processedContent.push({
17432
17461
  type: "image",
@@ -17550,7 +17579,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
17550
17579
  return {};
17551
17580
  }
17552
17581
  try {
17553
- const content = await readFile6(sidecarPath, "utf8");
17582
+ const content = await readFile7(sidecarPath, "utf8");
17554
17583
  const parsed = interpolateEnv(parseYaml2(content), process.env);
17555
17584
  if (!isJsonObject(parsed)) {
17556
17585
  logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
@@ -17595,7 +17624,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
17595
17624
  const repoRootPath = resolveToAbsolutePath(repoRoot);
17596
17625
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
17597
17626
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
17598
- const rawFile = await readFile6(absoluteTestPath, "utf8");
17627
+ const rawFile = await readFile7(absoluteTestPath, "utf8");
17599
17628
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
17600
17629
  const fallbackSuiteName = path7.basename(absoluteTestPath, ".jsonl") || "eval";
17601
17630
  const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
@@ -17772,11 +17801,13 @@ function parseRepoCheckout(raw) {
17772
17801
  if (!isJsonObject(raw)) return void 0;
17773
17802
  const obj = raw;
17774
17803
  const ref = typeof obj.ref === "string" ? obj.ref : void 0;
17804
+ const baseCommit = typeof obj.base_commit === "string" ? obj.base_commit : void 0;
17775
17805
  const resolve2 = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
17776
17806
  const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
17777
- if (!ref && !resolve2 && ancestor === void 0) return void 0;
17807
+ if (!ref && !baseCommit && !resolve2 && ancestor === void 0) return void 0;
17778
17808
  return {
17779
17809
  ...ref !== void 0 && { ref },
17810
+ ...baseCommit !== void 0 && { base_commit: baseCommit },
17780
17811
  ...resolve2 !== void 0 && { resolve: resolve2 },
17781
17812
  ...ancestor !== void 0 && { ancestor }
17782
17813
  };
@@ -17799,12 +17830,12 @@ function parseRepoConfig(raw) {
17799
17830
  const obj = raw;
17800
17831
  const repoPath = typeof obj.path === "string" ? obj.path : void 0;
17801
17832
  const source = parseRepoSource(obj.source);
17802
- if (!repoPath || !source) return void 0;
17803
17833
  const checkout = parseRepoCheckout(obj.checkout);
17804
17834
  const clone = parseRepoClone(obj.clone);
17835
+ if (!repoPath && !source && !checkout && !clone) return void 0;
17805
17836
  return {
17806
- path: repoPath,
17807
- source,
17837
+ ...repoPath !== void 0 && { path: repoPath },
17838
+ ...source !== void 0 && { source },
17808
17839
  ...checkout !== void 0 && { checkout },
17809
17840
  ...clone !== void 0 && { clone }
17810
17841
  };
@@ -17853,7 +17884,8 @@ ${messageContent}`);
17853
17884
  segmentsByMessage,
17854
17885
  mode
17855
17886
  }) : void 0;
17856
- return { question, chatPrompt };
17887
+ const systemMessage = extractSystemMessage(testCase.input, segmentsByMessage, mode);
17888
+ return { question, chatPrompt, systemMessage };
17857
17889
  }
17858
17890
  function needsRoleMarkers(messages, processedSegmentsByMessage) {
17859
17891
  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
@@ -17867,6 +17899,26 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
17867
17899
  }
17868
17900
  return messagesWithContent > 1;
17869
17901
  }
17902
+ function extractSystemMessage(messages, segmentsByMessage, mode) {
17903
+ const systemParts = [];
17904
+ for (let i = 0; i < messages.length; i++) {
17905
+ if (messages[i].role !== "system") {
17906
+ break;
17907
+ }
17908
+ const segments = segmentsByMessage[i];
17909
+ const contentParts = [];
17910
+ for (const segment of segments) {
17911
+ const formatted = formatSegment(segment, mode);
17912
+ if (formatted) {
17913
+ contentParts.push(formatted);
17914
+ }
17915
+ }
17916
+ if (contentParts.length > 0) {
17917
+ systemParts.push(contentParts.join("\n"));
17918
+ }
17919
+ }
17920
+ return systemParts.length > 0 ? systemParts.join("\n\n") : void 0;
17921
+ }
17870
17922
  function buildChatPromptFromSegments(options) {
17871
17923
  const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
17872
17924
  if (messages.length === 0) {
@@ -17948,8 +18000,8 @@ function resolveTests(suite) {
17948
18000
  async function readTestSuiteMetadata(testFilePath) {
17949
18001
  try {
17950
18002
  const absolutePath = path8.resolve(testFilePath);
17951
- const content = await readFile7(absolutePath, "utf8");
17952
- const parsed = interpolateEnv(parse2(content), process.env);
18003
+ const content = await readFile8(absolutePath, "utf8");
18004
+ const parsed = interpolateEnv(parse3(content), process.env);
17953
18005
  if (!isJsonObject(parsed)) {
17954
18006
  return {};
17955
18007
  }
@@ -18006,8 +18058,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
18006
18058
  const repoRootPath = resolveToAbsolutePath(repoRoot);
18007
18059
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
18008
18060
  const config = await loadConfig(absoluteTestPath, repoRootPath);
18009
- const rawFile = await readFile7(absoluteTestPath, "utf8");
18010
- const interpolated = interpolateEnv(parse2(rawFile), process.env);
18061
+ const rawFile = await readFile8(absoluteTestPath, "utf8");
18062
+ const interpolated = interpolateEnv(parse3(rawFile), process.env);
18011
18063
  if (!isJsonObject(interpolated)) {
18012
18064
  throw new Error(`Invalid test file format: ${evalFilePath}`);
18013
18065
  }
@@ -18148,7 +18200,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
18148
18200
  const testCase = {
18149
18201
  id,
18150
18202
  suite: suiteName,
18151
- category: options?.category,
18203
+ category: suite.category ?? options?.category,
18152
18204
  conversation_id: conversationId,
18153
18205
  question,
18154
18206
  input: inputMessages,
@@ -18241,11 +18293,11 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
18241
18293
  const workspaceFilePath = path8.resolve(evalFileDir, raw);
18242
18294
  let content;
18243
18295
  try {
18244
- content = await readFile7(workspaceFilePath, "utf8");
18296
+ content = await readFile8(workspaceFilePath, "utf8");
18245
18297
  } catch {
18246
18298
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
18247
18299
  }
18248
- const parsed = interpolateEnv(parse2(content), process.env);
18300
+ const parsed = interpolateEnv(parse3(content), process.env);
18249
18301
  if (!isJsonObject(parsed)) {
18250
18302
  throw new Error(
18251
18303
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
@@ -18280,14 +18332,28 @@ function parseWorkspaceConfig(raw, evalFileDir) {
18280
18332
  const explicitMode = obj.mode === "pooled" || obj.mode === "temp" || obj.mode === "static" ? obj.mode : void 0;
18281
18333
  const workspacePath = typeof obj.path === "string" ? obj.path : void 0;
18282
18334
  const mode = explicitMode ?? (workspacePath ? "static" : void 0);
18283
- if (!template && !isolation && !repos && !hooks && !mode && !workspacePath) return void 0;
18335
+ const docker = parseDockerWorkspaceConfig(obj.docker);
18336
+ if (!template && !isolation && !repos && !hooks && !mode && !workspacePath && !docker)
18337
+ return void 0;
18284
18338
  return {
18285
18339
  ...template !== void 0 && { template },
18286
18340
  ...isolation !== void 0 && { isolation },
18287
18341
  ...repos !== void 0 && { repos },
18288
18342
  ...hooks !== void 0 && { hooks },
18289
18343
  ...mode !== void 0 && { mode },
18290
- ...workspacePath !== void 0 && { path: workspacePath }
18344
+ ...workspacePath !== void 0 && { path: workspacePath },
18345
+ ...docker !== void 0 && { docker }
18346
+ };
18347
+ }
18348
+ function parseDockerWorkspaceConfig(raw) {
18349
+ if (!isJsonObject(raw)) return void 0;
18350
+ const obj = raw;
18351
+ if (typeof obj.image !== "string") return void 0;
18352
+ return {
18353
+ image: obj.image,
18354
+ ...typeof obj.timeout === "number" && { timeout: obj.timeout },
18355
+ ...typeof obj.memory === "string" && { memory: obj.memory },
18356
+ ...typeof obj.cpus === "number" && { cpus: obj.cpus }
18291
18357
  };
18292
18358
  }
18293
18359
  function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
@@ -18316,7 +18382,8 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
18316
18382
  repos: caseLevel.repos ?? suiteLevel.repos,
18317
18383
  ...hasHooks && { hooks: mergedHooks },
18318
18384
  mode: caseLevel.mode ?? suiteLevel.mode,
18319
- path: caseLevel.path ?? suiteLevel.path
18385
+ path: caseLevel.path ?? suiteLevel.path,
18386
+ docker: caseLevel.docker ?? suiteLevel.docker
18320
18387
  };
18321
18388
  }
18322
18389
  function asString5(value) {
@@ -18578,7 +18645,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
18578
18645
  }
18579
18646
  function transpileEvalYamlFile(evalYamlPath) {
18580
18647
  const content = readFileSync(evalYamlPath, "utf8");
18581
- const parsed = parse3(content);
18648
+ const parsed = parse4(content);
18582
18649
  return transpileEvalYaml(parsed, path9.basename(evalYamlPath));
18583
18650
  }
18584
18651
  function getOutputFilenames(result) {
@@ -20994,7 +21061,7 @@ function subscribeToCopilotCliLogEntries(listener) {
20994
21061
  };
20995
21062
  }
20996
21063
  function resolvePlatformCliPath() {
20997
- const os3 = platform();
21064
+ const os4 = platform();
20998
21065
  const cpu = arch();
20999
21066
  const platformMap = {
21000
21067
  linux: "linux",
@@ -21005,13 +21072,13 @@ function resolvePlatformCliPath() {
21005
21072
  x64: "x64",
21006
21073
  arm64: "arm64"
21007
21074
  };
21008
- const osPart = platformMap[os3];
21075
+ const osPart = platformMap[os4];
21009
21076
  const archPart = archMap[cpu];
21010
21077
  if (!osPart || !archPart) {
21011
21078
  return void 0;
21012
21079
  }
21013
21080
  const packageName = `@github/copilot-${osPart}-${archPart}`;
21014
- const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
21081
+ const binaryName = os4 === "win32" ? "copilot.exe" : "copilot";
21015
21082
  try {
21016
21083
  const resolved = import.meta.resolve(`${packageName}/package.json`);
21017
21084
  const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath3(resolved) : resolved;
@@ -21667,7 +21734,7 @@ async function discoverCopilotSessions(opts) {
21667
21734
  const workspacePath = path17.join(sessionDir, "workspace.yaml");
21668
21735
  const eventsPath = path17.join(sessionDir, "events.jsonl");
21669
21736
  try {
21670
- const workspaceContent = await readFile8(workspacePath, "utf8");
21737
+ const workspaceContent = await readFile9(workspacePath, "utf8");
21671
21738
  const workspace = parseYaml22(workspaceContent) ?? {};
21672
21739
  const cwd = String(workspace.cwd ?? "");
21673
21740
  let updatedAt;
@@ -21727,7 +21794,7 @@ var CopilotLogProvider = class {
21727
21794
  const eventsPath = path18.join(sessionDir, "events.jsonl");
21728
21795
  let eventsContent;
21729
21796
  try {
21730
- eventsContent = await readFile9(eventsPath, "utf8");
21797
+ eventsContent = await readFile10(eventsPath, "utf8");
21731
21798
  } catch (err) {
21732
21799
  throw new Error(
21733
21800
  `Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
@@ -23972,7 +24039,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
23972
24039
  const maxAttempts = 10;
23973
24040
  while (attempts < maxAttempts) {
23974
24041
  try {
23975
- const content = await readFile10(responseFileFinal, { encoding: "utf8" });
24042
+ const content = await readFile11(responseFileFinal, { encoding: "utf8" });
23976
24043
  if (!silent) {
23977
24044
  process.stdout.write(`${content}
23978
24045
  `);
@@ -24029,7 +24096,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
24029
24096
  const maxAttempts = 10;
24030
24097
  while (attempts < maxAttempts) {
24031
24098
  try {
24032
- const content = await readFile10(file, { encoding: "utf8" });
24099
+ const content = await readFile11(file, { encoding: "utf8" });
24033
24100
  if (!silent) {
24034
24101
  process.stdout.write(`${content}
24035
24102
  `);
@@ -24310,7 +24377,7 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
24310
24377
  if (!stats.isFile()) {
24311
24378
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
24312
24379
  }
24313
- const templateText = await readFile11(workspaceSrc, "utf8");
24380
+ const templateText = await readFile12(workspaceSrc, "utf8");
24314
24381
  workspaceContent = JSON.parse(templateText);
24315
24382
  } else {
24316
24383
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
@@ -25202,8 +25269,8 @@ async function readTargetDefinitions(filePath) {
25202
25269
  if (!await fileExists3(absolutePath)) {
25203
25270
  throw new Error(`targets.yaml not found at ${absolutePath}`);
25204
25271
  }
25205
- const raw = await readFile12(absolutePath, "utf8");
25206
- const parsed = parse4(raw);
25272
+ const raw = await readFile13(absolutePath, "utf8");
25273
+ const parsed = parse5(raw);
25207
25274
  if (!isRecord(parsed)) {
25208
25275
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
25209
25276
  }
@@ -25625,6 +25692,16 @@ function toCamelCaseDeep(obj) {
25625
25692
  }
25626
25693
  return obj;
25627
25694
  }
25695
+ function getRepoCheckoutRef(checkout) {
25696
+ return checkout?.base_commit ?? checkout?.ref ?? "HEAD";
25697
+ }
25698
+ function getRepoCheckoutTargets(repos) {
25699
+ if (!repos) return [];
25700
+ return repos.filter((repo) => repo.checkout?.base_commit || repo.checkout?.ref).map((repo) => ({
25701
+ path: repo.path,
25702
+ ref: getRepoCheckoutRef(repo.checkout)
25703
+ }));
25704
+ }
25628
25705
  var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
25629
25706
  var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
25630
25707
  async function materializeContentForGrader(messages, getWorkDir) {
@@ -25758,13 +25835,31 @@ var CodeEvaluator = class {
25758
25835
  const workspaceEnv = context2.workspacePath ? { AGENTV_WORKSPACE_PATH: context2.workspacePath } : void 0;
25759
25836
  const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
25760
25837
  try {
25761
- const stdout = await executeScript(
25762
- this.command,
25763
- inputPayload,
25764
- this.agentTimeoutMs,
25765
- this.cwd,
25766
- env
25767
- );
25838
+ let stdout;
25839
+ if (context2.dockerConfig) {
25840
+ const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27-B4AQHVWA.js");
25841
+ const dockerProvider = new DockerWorkspaceProvider2(context2.dockerConfig);
25842
+ const result = await dockerProvider.runGraderInContainer({
25843
+ command: [...this.command],
25844
+ stdin: inputPayload,
25845
+ repoCheckouts: getRepoCheckoutTargets(context2.evalCase.workspace?.repos)
25846
+ });
25847
+ if (result.exitCode !== 0) {
25848
+ const trimmedErr = result.stderr.trim();
25849
+ throw new Error(
25850
+ trimmedErr.length > 0 ? `Code evaluator exited with code ${result.exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${result.exitCode}`
25851
+ );
25852
+ }
25853
+ stdout = result.stdout.trim();
25854
+ } else {
25855
+ stdout = await executeScript(
25856
+ this.command,
25857
+ inputPayload,
25858
+ this.agentTimeoutMs,
25859
+ this.cwd,
25860
+ env
25861
+ );
25862
+ }
25768
25863
  const parsed = parseJsonSafe(stdout);
25769
25864
  const score = clampScore(typeof parsed?.score === "number" ? parsed.score : 0);
25770
25865
  const assertions = Array.isArray(parsed?.assertions) ? parsed.assertions.filter(
@@ -26916,11 +27011,11 @@ function createFilesystemTools(workspacePath) {
26916
27011
  execute: async (input) => {
26917
27012
  try {
26918
27013
  const resolved = resolveSandboxed(workspacePath, input.path);
26919
- const stat11 = await fs2.stat(resolved);
26920
- if (stat11.isDirectory()) {
27014
+ const stat12 = await fs2.stat(resolved);
27015
+ if (stat12.isDirectory()) {
26921
27016
  return { error: `'${input.path}' is a directory, not a file` };
26922
27017
  }
26923
- const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
27018
+ const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
26924
27019
  const fd = await fs2.open(resolved, "r");
26925
27020
  try {
26926
27021
  await fd.read(buffer, 0, buffer.length, 0);
@@ -26928,8 +27023,8 @@ function createFilesystemTools(workspacePath) {
26928
27023
  await fd.close();
26929
27024
  }
26930
27025
  const content = buffer.toString("utf-8");
26931
- const truncated = stat11.size > MAX_FILE_SIZE;
26932
- return { content, truncated, size: stat11.size };
27026
+ const truncated = stat12.size > MAX_FILE_SIZE;
27027
+ return { content, truncated, size: stat12.size };
26933
27028
  } catch (error) {
26934
27029
  return { error: error instanceof Error ? error.message : String(error) };
26935
27030
  }
@@ -26980,8 +27075,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
26980
27075
  const ext = path36.extname(entry.name).toLowerCase();
26981
27076
  if (BINARY_EXTENSIONS.has(ext)) continue;
26982
27077
  try {
26983
- const stat11 = await fs2.stat(fullPath);
26984
- if (stat11.size > MAX_FILE_SIZE) continue;
27078
+ const stat12 = await fs2.stat(fullPath);
27079
+ if (stat12.size > MAX_FILE_SIZE) continue;
26985
27080
  const content = await fs2.readFile(fullPath, "utf-8");
26986
27081
  const lines = content.split("\n");
26987
27082
  for (let i = 0; i < lines.length; i++) {
@@ -27614,115 +27709,115 @@ var FieldAccuracyEvaluator = class {
27614
27709
  * Evaluate a single field against the expected value.
27615
27710
  */
27616
27711
  evaluateField(fieldConfig, candidateData, expectedData) {
27617
- const { path: path52, match, required = true, weight = 1 } = fieldConfig;
27618
- const candidateValue = resolvePath(candidateData, path52);
27619
- const expectedValue = resolvePath(expectedData, path52);
27712
+ const { path: path53, match, required = true, weight = 1 } = fieldConfig;
27713
+ const candidateValue = resolvePath(candidateData, path53);
27714
+ const expectedValue = resolvePath(expectedData, path53);
27620
27715
  if (expectedValue === void 0) {
27621
27716
  return {
27622
- path: path52,
27717
+ path: path53,
27623
27718
  score: 1,
27624
27719
  // No expected value means no comparison needed
27625
27720
  weight,
27626
27721
  hit: true,
27627
- message: `${path52}: no expected value`
27722
+ message: `${path53}: no expected value`
27628
27723
  };
27629
27724
  }
27630
27725
  if (candidateValue === void 0) {
27631
27726
  if (required) {
27632
27727
  return {
27633
- path: path52,
27728
+ path: path53,
27634
27729
  score: 0,
27635
27730
  weight,
27636
27731
  hit: false,
27637
- message: `${path52} (required, missing)`
27732
+ message: `${path53} (required, missing)`
27638
27733
  };
27639
27734
  }
27640
27735
  return {
27641
- path: path52,
27736
+ path: path53,
27642
27737
  score: 1,
27643
27738
  // Don't penalize missing optional fields
27644
27739
  weight: 0,
27645
27740
  // Zero weight means it won't affect the score
27646
27741
  hit: true,
27647
- message: `${path52}: optional field missing`
27742
+ message: `${path53}: optional field missing`
27648
27743
  };
27649
27744
  }
27650
27745
  switch (match) {
27651
27746
  case "exact":
27652
- return this.compareExact(path52, candidateValue, expectedValue, weight);
27747
+ return this.compareExact(path53, candidateValue, expectedValue, weight);
27653
27748
  case "numeric_tolerance":
27654
27749
  return this.compareNumericTolerance(
27655
- path52,
27750
+ path53,
27656
27751
  candidateValue,
27657
27752
  expectedValue,
27658
27753
  fieldConfig,
27659
27754
  weight
27660
27755
  );
27661
27756
  case "date":
27662
- return this.compareDate(path52, candidateValue, expectedValue, fieldConfig, weight);
27757
+ return this.compareDate(path53, candidateValue, expectedValue, fieldConfig, weight);
27663
27758
  default:
27664
27759
  return {
27665
- path: path52,
27760
+ path: path53,
27666
27761
  score: 0,
27667
27762
  weight,
27668
27763
  hit: false,
27669
- message: `${path52}: unknown match type "${match}"`
27764
+ message: `${path53}: unknown match type "${match}"`
27670
27765
  };
27671
27766
  }
27672
27767
  }
27673
27768
  /**
27674
27769
  * Exact equality comparison.
27675
27770
  */
27676
- compareExact(path52, candidateValue, expectedValue, weight) {
27771
+ compareExact(path53, candidateValue, expectedValue, weight) {
27677
27772
  if (deepEqual(candidateValue, expectedValue)) {
27678
27773
  return {
27679
- path: path52,
27774
+ path: path53,
27680
27775
  score: 1,
27681
27776
  weight,
27682
27777
  hit: true,
27683
- message: path52
27778
+ message: path53
27684
27779
  };
27685
27780
  }
27686
27781
  if (typeof candidateValue !== typeof expectedValue) {
27687
27782
  return {
27688
- path: path52,
27783
+ path: path53,
27689
27784
  score: 0,
27690
27785
  weight,
27691
27786
  hit: false,
27692
- message: `${path52} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
27787
+ message: `${path53} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
27693
27788
  };
27694
27789
  }
27695
27790
  return {
27696
- path: path52,
27791
+ path: path53,
27697
27792
  score: 0,
27698
27793
  weight,
27699
27794
  hit: false,
27700
- message: `${path52} (value mismatch)`
27795
+ message: `${path53} (value mismatch)`
27701
27796
  };
27702
27797
  }
27703
27798
  /**
27704
27799
  * Numeric comparison with absolute or relative tolerance.
27705
27800
  */
27706
- compareNumericTolerance(path52, candidateValue, expectedValue, fieldConfig, weight) {
27801
+ compareNumericTolerance(path53, candidateValue, expectedValue, fieldConfig, weight) {
27707
27802
  const { tolerance = 0, relative = false } = fieldConfig;
27708
27803
  const candidateNum = toNumber(candidateValue);
27709
27804
  const expectedNum = toNumber(expectedValue);
27710
27805
  if (candidateNum === null || expectedNum === null) {
27711
27806
  return {
27712
- path: path52,
27807
+ path: path53,
27713
27808
  score: 0,
27714
27809
  weight,
27715
27810
  hit: false,
27716
- message: `${path52} (non-numeric value)`
27811
+ message: `${path53} (non-numeric value)`
27717
27812
  };
27718
27813
  }
27719
27814
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
27720
27815
  return {
27721
- path: path52,
27816
+ path: path53,
27722
27817
  score: 0,
27723
27818
  weight,
27724
27819
  hit: false,
27725
- message: `${path52} (invalid numeric value)`
27820
+ message: `${path53} (invalid numeric value)`
27726
27821
  };
27727
27822
  }
27728
27823
  const diff = Math.abs(candidateNum - expectedNum);
@@ -27735,61 +27830,61 @@ var FieldAccuracyEvaluator = class {
27735
27830
  }
27736
27831
  if (withinTolerance) {
27737
27832
  return {
27738
- path: path52,
27833
+ path: path53,
27739
27834
  score: 1,
27740
27835
  weight,
27741
27836
  hit: true,
27742
- message: `${path52} (within tolerance: diff=${diff.toFixed(2)})`
27837
+ message: `${path53} (within tolerance: diff=${diff.toFixed(2)})`
27743
27838
  };
27744
27839
  }
27745
27840
  return {
27746
- path: path52,
27841
+ path: path53,
27747
27842
  score: 0,
27748
27843
  weight,
27749
27844
  hit: false,
27750
- message: `${path52} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
27845
+ message: `${path53} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
27751
27846
  };
27752
27847
  }
27753
27848
  /**
27754
27849
  * Date comparison with format normalization.
27755
27850
  */
27756
- compareDate(path52, candidateValue, expectedValue, fieldConfig, weight) {
27851
+ compareDate(path53, candidateValue, expectedValue, fieldConfig, weight) {
27757
27852
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
27758
27853
  const candidateDate = parseDate(String(candidateValue), formats);
27759
27854
  const expectedDate = parseDate(String(expectedValue), formats);
27760
27855
  if (candidateDate === null) {
27761
27856
  return {
27762
- path: path52,
27857
+ path: path53,
27763
27858
  score: 0,
27764
27859
  weight,
27765
27860
  hit: false,
27766
- message: `${path52} (unparseable candidate date)`
27861
+ message: `${path53} (unparseable candidate date)`
27767
27862
  };
27768
27863
  }
27769
27864
  if (expectedDate === null) {
27770
27865
  return {
27771
- path: path52,
27866
+ path: path53,
27772
27867
  score: 0,
27773
27868
  weight,
27774
27869
  hit: false,
27775
- message: `${path52} (unparseable expected date)`
27870
+ message: `${path53} (unparseable expected date)`
27776
27871
  };
27777
27872
  }
27778
27873
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
27779
27874
  return {
27780
- path: path52,
27875
+ path: path53,
27781
27876
  score: 1,
27782
27877
  weight,
27783
27878
  hit: true,
27784
- message: path52
27879
+ message: path53
27785
27880
  };
27786
27881
  }
27787
27882
  return {
27788
- path: path52,
27883
+ path: path53,
27789
27884
  score: 0,
27790
27885
  weight,
27791
27886
  hit: false,
27792
- message: `${path52} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
27887
+ message: `${path53} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
27793
27888
  };
27794
27889
  }
27795
27890
  /**
@@ -27822,11 +27917,11 @@ var FieldAccuracyEvaluator = class {
27822
27917
  };
27823
27918
  }
27824
27919
  };
27825
- function resolvePath(obj, path52) {
27826
- if (!path52 || !obj) {
27920
+ function resolvePath(obj, path53) {
27921
+ if (!path53 || !obj) {
27827
27922
  return void 0;
27828
27923
  }
27829
- const parts = path52.split(/\.|\[|\]/).filter((p) => p.length > 0);
27924
+ const parts = path53.split(/\.|\[|\]/).filter((p) => p.length > 0);
27830
27925
  let current = obj;
27831
27926
  for (const part of parts) {
27832
27927
  if (current === null || current === void 0) {
@@ -28308,8 +28403,8 @@ var TokenUsageEvaluator = class {
28308
28403
  };
28309
28404
  }
28310
28405
  };
28311
- function getNestedValue(obj, path52) {
28312
- const parts = path52.split(".");
28406
+ function getNestedValue(obj, path53) {
28407
+ const parts = path53.split(".");
28313
28408
  let current = obj;
28314
28409
  for (const part of parts) {
28315
28410
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -29155,6 +29250,15 @@ async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
29155
29250
  }
29156
29251
  return void 0;
29157
29252
  }
29253
+ function containsTemplateVariables(text2) {
29254
+ const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
29255
+ for (const match of text2.matchAll(variablePattern)) {
29256
+ if (VALID_TEMPLATE_VARIABLES.has(match[1])) {
29257
+ return true;
29258
+ }
29259
+ }
29260
+ return false;
29261
+ }
29158
29262
  async function executePromptTemplate(script, context2, config, timeoutMs) {
29159
29263
  const payload = {
29160
29264
  criteria: context2.evalCase.criteria,
@@ -29225,9 +29329,20 @@ var llmGraderFactory = (config, context2) => {
29225
29329
  },
29226
29330
  agentTimeoutMs
29227
29331
  );
29332
+ const isFromInlinePrompt = !c.resolvedPromptScript?.length && !c.resolvedPromptPath && !c.promptPath;
29333
+ let evaluatorTemplateOverride;
29334
+ let evalCase = evalContext.evalCase;
29335
+ if (customPrompt) {
29336
+ if (!isFromInlinePrompt || containsTemplateVariables(customPrompt)) {
29337
+ evaluatorTemplateOverride = customPrompt;
29338
+ } else {
29339
+ evalCase = { ...evalCase, criteria: customPrompt };
29340
+ }
29341
+ }
29228
29342
  return evaluator.evaluate({
29229
29343
  ...evalContext,
29230
- evaluatorTemplateOverride: customPrompt,
29344
+ evalCase,
29345
+ evaluatorTemplateOverride,
29231
29346
  evaluator: c
29232
29347
  });
29233
29348
  }
@@ -29824,12 +29939,14 @@ async function git(args, opts) {
29824
29939
  return stdout.trim();
29825
29940
  }
29826
29941
  function normalizeRepoForFingerprint(repo) {
29827
- const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
29828
- const result = {
29829
- path: repo.path,
29830
- source,
29831
- ref: repo.checkout?.ref ?? "HEAD"
29832
- };
29942
+ const result = {};
29943
+ if (repo.path) {
29944
+ result.path = repo.path;
29945
+ }
29946
+ if (repo.source) {
29947
+ result.source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
29948
+ }
29949
+ result.ref = getRepoCheckoutRef(repo.checkout);
29833
29950
  if (repo.clone?.depth !== void 0) {
29834
29951
  result.depth = repo.clone.depth;
29835
29952
  }
@@ -29843,7 +29960,7 @@ function normalizeRepoForFingerprint(repo) {
29843
29960
  }
29844
29961
  function computeWorkspaceFingerprint(repos) {
29845
29962
  const canonical = {
29846
- repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
29963
+ repos: [...repos].sort((a, b) => (a.path ?? "").localeCompare(b.path ?? "")).map(normalizeRepoForFingerprint)
29847
29964
  };
29848
29965
  return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
29849
29966
  }
@@ -29957,7 +30074,7 @@ var WorkspacePoolManager = class {
29957
30074
  throw err;
29958
30075
  }
29959
30076
  try {
29960
- const pidStr = await readFile13(lockPath, "utf-8");
30077
+ const pidStr = await readFile14(lockPath, "utf-8");
29961
30078
  const pid = Number.parseInt(pidStr.trim(), 10);
29962
30079
  if (!Number.isNaN(pid)) {
29963
30080
  try {
@@ -29984,7 +30101,7 @@ var WorkspacePoolManager = class {
29984
30101
  async checkDrift(poolDir, fingerprint) {
29985
30102
  const metadataPath = path422.join(poolDir, "metadata.json");
29986
30103
  try {
29987
- const raw = await readFile13(metadataPath, "utf-8");
30104
+ const raw = await readFile14(metadataPath, "utf-8");
29988
30105
  const metadata = JSON.parse(raw);
29989
30106
  return metadata.fingerprint !== fingerprint;
29990
30107
  } catch {
@@ -30009,7 +30126,7 @@ var WorkspacePoolManager = class {
30009
30126
  const lockPath = path422.join(poolDir, `${entry}.lock`);
30010
30127
  if (existsSync3(lockPath)) {
30011
30128
  try {
30012
- const pidStr = await readFile13(lockPath, "utf-8");
30129
+ const pidStr = await readFile14(lockPath, "utf-8");
30013
30130
  const pid = Number.parseInt(pidStr.trim(), 10);
30014
30131
  if (!Number.isNaN(pid)) {
30015
30132
  try {
@@ -30037,6 +30154,7 @@ var WorkspacePoolManager = class {
30037
30154
  */
30038
30155
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
30039
30156
  for (const repo of repos) {
30157
+ if (!repo.path || !repo.source) continue;
30040
30158
  const repoDir = path422.join(slotPath, repo.path);
30041
30159
  if (!existsSync3(repoDir)) {
30042
30160
  continue;
@@ -30044,7 +30162,7 @@ var WorkspacePoolManager = class {
30044
30162
  if (poolReset === "none") {
30045
30163
  continue;
30046
30164
  }
30047
- const ref = repo.checkout?.ref ?? "HEAD";
30165
+ const ref = getRepoCheckoutRef(repo.checkout);
30048
30166
  const resolve2 = repo.checkout?.resolve ?? "remote";
30049
30167
  if (resolve2 === "remote") {
30050
30168
  const fetchArgs = ["fetch", "origin", ref];
@@ -30061,8 +30179,8 @@ var WorkspacePoolManager = class {
30061
30179
  }
30062
30180
  if (templatePath) {
30063
30181
  const repoDirNames = new Set(
30064
- repos.map((r) => {
30065
- const normalized = r.path.replace(/^\.\//, "");
30182
+ repos.filter((r) => r.path).map((r) => {
30183
+ const normalized = (r.path ?? "").replace(/^\.\//, "");
30066
30184
  return normalized.split("/")[0];
30067
30185
  })
30068
30186
  );
@@ -30111,17 +30229,17 @@ var RepoManager = class {
30111
30229
  static validateLocalPaths(repos) {
30112
30230
  const errors = [];
30113
30231
  for (const repo of repos) {
30114
- if (repo.source.type !== "local") continue;
30232
+ if (!repo.source || repo.source.type !== "local") continue;
30115
30233
  const sourcePath = repo.source.path;
30116
30234
  if (!sourcePath || sourcePath.trim() === "") {
30117
30235
  errors.push({
30118
- repoPath: repo.path,
30236
+ repoPath: repo.path ?? "(none)",
30119
30237
  resolvedSourcePath: sourcePath ?? "",
30120
30238
  reason: "empty_path"
30121
30239
  });
30122
30240
  } else if (!existsSync4(sourcePath)) {
30123
30241
  errors.push({
30124
- repoPath: repo.path,
30242
+ repoPath: repo.path ?? "(none)",
30125
30243
  resolvedSourcePath: sourcePath,
30126
30244
  reason: "not_found"
30127
30245
  });
@@ -30168,6 +30286,12 @@ ${lines.join("\n")}`;
30168
30286
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
30169
30287
  */
30170
30288
  async materialize(repo, workspacePath) {
30289
+ if (!repo.source || !repo.path) {
30290
+ if (this.verbose) {
30291
+ console.log(`[repo] materialize skip path=${repo.path ?? "(none)"} (no source or path)`);
30292
+ }
30293
+ return;
30294
+ }
30171
30295
  const targetDir = path43.join(workspacePath, repo.path);
30172
30296
  const sourceUrl = getSourceUrl(repo.source);
30173
30297
  const startedAt = Date.now();
@@ -30191,7 +30315,7 @@ ${lines.join("\n")}`;
30191
30315
  await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
30192
30316
  await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
30193
30317
  }
30194
- const ref = repo.checkout?.ref ?? "HEAD";
30318
+ const ref = getRepoCheckoutRef(repo.checkout);
30195
30319
  const resolve2 = repo.checkout?.resolve ?? "remote";
30196
30320
  let resolvedSha;
30197
30321
  if (resolve2 === "remote" && repo.source.type === "git") {
@@ -30243,22 +30367,26 @@ ${lines.join("\n")}`;
30243
30367
  );
30244
30368
  }
30245
30369
  }
30246
- /** Materialize all repos into the workspace. */
30370
+ /** Materialize all repos into the workspace. Skips repos without source (Docker-only repos). */
30247
30371
  async materializeAll(repos, workspacePath) {
30372
+ const materializableRepos = repos.filter((r) => r.source);
30248
30373
  if (this.verbose) {
30249
- console.log(`[repo] materializeAll count=${repos.length} workspace=${workspacePath}`);
30374
+ console.log(
30375
+ `[repo] materializeAll count=${materializableRepos.length} (${repos.length - materializableRepos.length} skipped, no source) workspace=${workspacePath}`
30376
+ );
30250
30377
  }
30251
- for (const repo of repos) {
30378
+ for (const repo of materializableRepos) {
30252
30379
  await this.materialize(repo, workspacePath);
30253
30380
  }
30254
30381
  if (this.verbose) {
30255
30382
  console.log("[repo] materializeAll complete");
30256
30383
  }
30257
30384
  }
30258
- /** Reset repos in workspace to their checkout state. */
30385
+ /** Reset repos in workspace to their checkout state. Skips repos without path or source. */
30259
30386
  async reset(repos, workspacePath, reset) {
30260
30387
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
30261
30388
  for (const repo of repos) {
30389
+ if (!repo.path || !repo.source) continue;
30262
30390
  const targetDir = path43.join(workspacePath, repo.path);
30263
30391
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
30264
30392
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
@@ -30574,7 +30702,8 @@ async function runEvaluation(options) {
30574
30702
  for (const ec of filteredEvalCases) {
30575
30703
  if (ec.workspace?.repos) {
30576
30704
  for (const repo of ec.workspace.repos) {
30577
- const key = `${repo.path}::${repo.source.type === "local" ? repo.source.path : ""}`;
30705
+ if (!repo.source) continue;
30706
+ const key = `${repo.path ?? ""}::${repo.source.type === "local" ? repo.source.path : ""}`;
30578
30707
  if (!allRepos.has(key)) {
30579
30708
  allRepos.set(key, repo);
30580
30709
  }
@@ -30587,7 +30716,7 @@ async function runEvaluation(options) {
30587
30716
  const message = RepoManager.formatValidationErrors(localPathErrors);
30588
30717
  console.warn(`Warning: ${message}`);
30589
30718
  const invalidLocalRepoPaths = new Set(localPathErrors.map((e) => e.repoPath));
30590
- if (suiteWorkspace?.repos?.some((r) => invalidLocalRepoPaths.has(r.path))) {
30719
+ if (suiteWorkspace?.repos?.some((r) => r.path && invalidLocalRepoPaths.has(r.path))) {
30591
30720
  throw new Error(message);
30592
30721
  }
30593
30722
  }
@@ -30720,6 +30849,7 @@ async function runEvaluation(options) {
30720
30849
  try {
30721
30850
  if (needsPerRepoCheck) {
30722
30851
  for (const repo of suiteWorkspace.repos) {
30852
+ if (!repo.path || !repo.source) continue;
30723
30853
  const targetDir = path45.join(sharedWorkspacePath, repo.path);
30724
30854
  if (existsSync5(targetDir)) {
30725
30855
  setupLog(`reusing existing repo at: ${targetDir}`);
@@ -30744,6 +30874,19 @@ async function runEvaluation(options) {
30744
30874
  throw new Error(`Failed to materialize repos: ${message}`);
30745
30875
  }
30746
30876
  }
30877
+ const suiteDockerConfig = suiteWorkspace?.docker;
30878
+ if (suiteDockerConfig) {
30879
+ setupLog(`pulling Docker image: ${suiteDockerConfig.image}`);
30880
+ const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27-B4AQHVWA.js");
30881
+ const dockerSetup = new DockerWorkspaceProvider2(suiteDockerConfig);
30882
+ if (!await dockerSetup.isDockerAvailable()) {
30883
+ throw new Error(
30884
+ "Docker workspace configured but Docker CLI is not available. Install Docker and ensure it is running."
30885
+ );
30886
+ }
30887
+ await dockerSetup.pullImage();
30888
+ setupLog("Docker image pull complete");
30889
+ }
30747
30890
  const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
30748
30891
  const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
30749
30892
  if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
@@ -31104,11 +31247,9 @@ async function runBatchEvaluation(options) {
31104
31247
  const promptInputs = promptInputsList[index];
31105
31248
  return {
31106
31249
  question: promptInputs.question,
31250
+ systemPrompt: promptInputs.systemMessage,
31107
31251
  inputFiles: evalCase.file_paths,
31108
- evalCaseId: evalCase.id,
31109
- metadata: {
31110
- systemPrompt: promptInputs.systemMessage ?? ""
31111
- }
31252
+ evalCaseId: evalCase.id
31112
31253
  };
31113
31254
  });
31114
31255
  const batchResponse = await provider.invokeBatch?.(batchRequests);
@@ -31639,6 +31780,7 @@ async function runEvalCase(options) {
31639
31780
  availableTargets,
31640
31781
  fileChanges,
31641
31782
  workspacePath,
31783
+ dockerConfig: evalCase.workspace?.docker,
31642
31784
  verbose,
31643
31785
  threshold: evalCase.threshold ?? caseThreshold
31644
31786
  });
@@ -31832,6 +31974,7 @@ async function evaluateCandidate(options) {
31832
31974
  availableTargets,
31833
31975
  fileChanges,
31834
31976
  workspacePath,
31977
+ dockerConfig,
31835
31978
  threshold: evalThreshold
31836
31979
  } = options;
31837
31980
  const gradeTimestamp = nowFn();
@@ -31858,6 +32001,7 @@ async function evaluateCandidate(options) {
31858
32001
  availableTargets,
31859
32002
  fileChanges,
31860
32003
  workspacePath,
32004
+ dockerConfig,
31861
32005
  threshold: evalThreshold
31862
32006
  });
31863
32007
  const completedAt = nowFn();
@@ -31933,6 +32077,7 @@ async function runEvaluatorsForCase(options) {
31933
32077
  availableTargets,
31934
32078
  fileChanges,
31935
32079
  workspacePath,
32080
+ dockerConfig,
31936
32081
  threshold
31937
32082
  } = options;
31938
32083
  if (evalCase.assertions && evalCase.assertions.length > 0) {
@@ -31960,6 +32105,7 @@ async function runEvaluatorsForCase(options) {
31960
32105
  availableTargets,
31961
32106
  fileChanges,
31962
32107
  workspacePath,
32108
+ dockerConfig,
31963
32109
  threshold
31964
32110
  });
31965
32111
  }
@@ -31989,6 +32135,7 @@ async function runEvaluatorsForCase(options) {
31989
32135
  availableTargets,
31990
32136
  fileChanges,
31991
32137
  workspacePath,
32138
+ dockerConfig,
31992
32139
  ...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
31993
32140
  });
31994
32141
  return { score };
@@ -32027,7 +32174,8 @@ async function runEvaluatorList(options) {
32027
32174
  targetResolver,
32028
32175
  availableTargets,
32029
32176
  fileChanges,
32030
- workspacePath
32177
+ workspacePath,
32178
+ dockerConfig
32031
32179
  } = options;
32032
32180
  const scored = [];
32033
32181
  const scores = [];
@@ -32050,7 +32198,8 @@ async function runEvaluatorList(options) {
32050
32198
  targetResolver,
32051
32199
  availableTargets,
32052
32200
  fileChanges,
32053
- workspacePath
32201
+ workspacePath,
32202
+ dockerConfig
32054
32203
  };
32055
32204
  const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
32056
32205
  const dispatchContext = {
@@ -32212,13 +32361,11 @@ async function invokeProvider(provider, options) {
32212
32361
  const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
32213
32362
  return await provider.invoke({
32214
32363
  question: promptInputs.question,
32364
+ systemPrompt: promptInputs.systemMessage,
32215
32365
  chatPrompt: promptInputs.chatPrompt,
32216
32366
  inputFiles: evalCase.file_paths,
32217
32367
  evalCaseId: evalCase.id,
32218
32368
  attempt,
32219
- metadata: {
32220
- systemPrompt: promptInputs.systemMessage ?? ""
32221
- },
32222
32369
  signal: controller.signal,
32223
32370
  cwd,
32224
32371
  workspaceFile,
@@ -32580,7 +32727,7 @@ async function discoverDefaultTarget(repoRoot) {
32580
32727
  return null;
32581
32728
  }
32582
32729
  async function loadEnvHierarchy(repoRoot, startPath) {
32583
- const { readFileSync: readFileSync4 } = await import("node:fs");
32730
+ const { readFileSync: readFileSync5 } = await import("node:fs");
32584
32731
  const chain = buildDirectoryChain(startPath, repoRoot);
32585
32732
  const envFiles = [];
32586
32733
  for (const dir of chain) {
@@ -32589,7 +32736,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
32589
32736
  }
32590
32737
  for (let i = 0; i < envFiles.length; i++) {
32591
32738
  try {
32592
- const content = readFileSync4(envFiles[i], "utf8");
32739
+ const content = readFileSync5(envFiles[i], "utf8");
32593
32740
  for (const line of content.split("\n")) {
32594
32741
  const trimmed = line.trim();
32595
32742
  if (!trimmed || trimmed.startsWith("#")) continue;
@@ -32658,12 +32805,12 @@ var CONFIG_FILE_NAMES = [
32658
32805
  ".agentv/config.js"
32659
32806
  ];
32660
32807
  async function loadTsConfig(projectRoot) {
32661
- const { existsSync: existsSync8 } = await import("node:fs");
32808
+ const { existsSync: existsSync9 } = await import("node:fs");
32662
32809
  const { pathToFileURL: pathToFileURL2 } = await import("node:url");
32663
32810
  const { join: join2 } = await import("node:path");
32664
32811
  for (const fileName of CONFIG_FILE_NAMES) {
32665
32812
  const filePath = join2(projectRoot, fileName);
32666
- if (!existsSync8(filePath)) {
32813
+ if (!existsSync9(filePath)) {
32667
32814
  continue;
32668
32815
  }
32669
32816
  try {
@@ -32771,7 +32918,7 @@ async function scanRepoDeps(evalFilePaths) {
32771
32918
  try {
32772
32919
  const repos = await extractReposFromEvalFile(filePath);
32773
32920
  for (const repo of repos) {
32774
- if (repo.source.type !== "git") continue;
32921
+ if (!repo.source || repo.source.type !== "git") continue;
32775
32922
  const ref = repo.checkout?.ref;
32776
32923
  const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ""}`;
32777
32924
  const existing = seen.get(key);
@@ -32799,8 +32946,8 @@ async function scanRepoDeps(evalFilePaths) {
32799
32946
  return { repos: [...seen.values()], errors };
32800
32947
  }
32801
32948
  async function extractReposFromEvalFile(filePath) {
32802
- const content = await readFile14(filePath, "utf8");
32803
- const parsed = interpolateEnv(parse5(content), process.env);
32949
+ const content = await readFile15(filePath, "utf8");
32950
+ const parsed = interpolateEnv(parse6(content), process.env);
32804
32951
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
32805
32952
  const obj = parsed;
32806
32953
  const evalFileDir = path47.dirname(path47.resolve(filePath));
@@ -32820,8 +32967,8 @@ async function extractReposFromEvalFile(filePath) {
32820
32967
  async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
32821
32968
  if (typeof raw === "string") {
32822
32969
  const workspaceFilePath = path47.resolve(evalFileDir, raw);
32823
- const content = await readFile14(workspaceFilePath, "utf8");
32824
- const parsed = interpolateEnv(parse5(content), process.env);
32970
+ const content = await readFile15(workspaceFilePath, "utf8");
32971
+ const parsed = interpolateEnv(parse6(content), process.env);
32825
32972
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
32826
32973
  return extractReposFromObject(parsed);
32827
32974
  }
@@ -32855,7 +33002,7 @@ var ResponseCache = class {
32855
33002
  async get(key) {
32856
33003
  const filePath = this.keyToPath(key);
32857
33004
  try {
32858
- const data = await readFile15(filePath, "utf8");
33005
+ const data = await readFile16(filePath, "utf8");
32859
33006
  return JSON.parse(data);
32860
33007
  } catch {
32861
33008
  return void 0;
@@ -32883,16 +33030,289 @@ function shouldSkipCacheForTemperature(targetConfig) {
32883
33030
  }
32884
33031
  return false;
32885
33032
  }
33033
+ var execFileAsync3 = promisify7(execFile3);
33034
+ function sanitizeRepoSlug(repo) {
33035
+ return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
33036
+ }
33037
+ function withFriendlyGitHubAuthError(error) {
33038
+ const message = error instanceof Error ? error.message : String(error);
33039
+ const lower = message.toLowerCase();
33040
+ if (lower.includes("authentication failed") || lower.includes("could not read username") || lower.includes("permission denied") || lower.includes("not logged into any github hosts")) {
33041
+ return new Error(`${message}. Run 'gh auth login' to authenticate.`);
33042
+ }
33043
+ return new Error(message);
33044
+ }
33045
+ function normalizeResultsExportConfig(config) {
33046
+ return {
33047
+ repo: config.repo.trim(),
33048
+ path: config.path.trim().replace(/^\/+|\/+$/g, ""),
33049
+ auto_push: config.auto_push === true,
33050
+ branch_prefix: config.branch_prefix?.trim() || "eval-results"
33051
+ };
33052
+ }
33053
+ function resolveResultsRepoUrl(repo) {
33054
+ if (repo.includes("://") || repo.startsWith("git@")) {
33055
+ return repo;
33056
+ }
33057
+ return `https://github.com/${repo}.git`;
33058
+ }
33059
+ function getResultsRepoCachePaths(repo) {
33060
+ const rootDir = path49.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
33061
+ return {
33062
+ rootDir,
33063
+ repoDir: path49.join(rootDir, "repo"),
33064
+ statusFile: path49.join(rootDir, "status.json")
33065
+ };
33066
+ }
33067
+ function readPersistedStatus(statusFile) {
33068
+ if (!existsSync7(statusFile)) {
33069
+ return {};
33070
+ }
33071
+ try {
33072
+ return JSON.parse(readFileSync3(statusFile, "utf8"));
33073
+ } catch {
33074
+ return {};
33075
+ }
33076
+ }
33077
+ function writePersistedStatus(statusFile, status) {
33078
+ mkdirSync2(path49.dirname(statusFile), { recursive: true });
33079
+ writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
33080
+ `, "utf8");
33081
+ }
33082
+ async function runCommand(executable, args, options) {
33083
+ try {
33084
+ const { stdout, stderr } = await execFileAsync3(executable, [...args], {
33085
+ cwd: options?.cwd,
33086
+ env: process.env
33087
+ });
33088
+ return { stdout, stderr };
33089
+ } catch (error) {
33090
+ if (options?.check === false && error && typeof error === "object") {
33091
+ const execError = error;
33092
+ return {
33093
+ stdout: execError.stdout ?? "",
33094
+ stderr: execError.stderr ?? ""
33095
+ };
33096
+ }
33097
+ throw withFriendlyGitHubAuthError(error);
33098
+ }
33099
+ }
33100
+ async function runGit(args, options) {
33101
+ return runCommand("git", args, options);
33102
+ }
33103
+ async function runGh(args, options) {
33104
+ return runCommand("gh", args, options);
33105
+ }
33106
+ async function resolveDefaultBranch(repoDir) {
33107
+ try {
33108
+ const { stdout } = await runGit(["symbolic-ref", "refs/remotes/origin/HEAD"], { cwd: repoDir });
33109
+ const ref = stdout.trim();
33110
+ const prefix = "refs/remotes/origin/";
33111
+ if (ref.startsWith(prefix)) {
33112
+ return ref.slice(prefix.length);
33113
+ }
33114
+ } catch {
33115
+ }
33116
+ for (const candidate of ["main", "master"]) {
33117
+ try {
33118
+ await runGit(["rev-parse", "--verify", `origin/${candidate}`], { cwd: repoDir });
33119
+ return candidate;
33120
+ } catch {
33121
+ }
33122
+ }
33123
+ return "main";
33124
+ }
33125
+ async function updateCacheRepo(repoDir, baseBranch) {
33126
+ await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
33127
+ await runGit(["checkout", baseBranch], { cwd: repoDir });
33128
+ await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
33129
+ }
33130
+ function updateStatusFile(config, patch) {
33131
+ const cachePaths = getResultsRepoCachePaths(config.repo);
33132
+ const current = readPersistedStatus(cachePaths.statusFile);
33133
+ writePersistedStatus(cachePaths.statusFile, {
33134
+ ...current,
33135
+ ...patch
33136
+ });
33137
+ }
33138
+ async function ensureResultsRepoClone(config) {
33139
+ const normalized = normalizeResultsExportConfig(config);
33140
+ const cachePaths = getResultsRepoCachePaths(normalized.repo);
33141
+ mkdirSync2(cachePaths.rootDir, { recursive: true });
33142
+ if (!existsSync7(cachePaths.repoDir)) {
33143
+ try {
33144
+ await runGit([
33145
+ "clone",
33146
+ "--filter=blob:none",
33147
+ resolveResultsRepoUrl(normalized.repo),
33148
+ cachePaths.repoDir
33149
+ ]);
33150
+ return cachePaths.repoDir;
33151
+ } catch (error) {
33152
+ updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
33153
+ throw withFriendlyGitHubAuthError(error);
33154
+ }
33155
+ }
33156
+ if (!existsSync7(path49.join(cachePaths.repoDir, ".git"))) {
33157
+ throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
33158
+ }
33159
+ return cachePaths.repoDir;
33160
+ }
33161
+ function getResultsRepoStatus(config) {
33162
+ if (!config) {
33163
+ return {
33164
+ configured: false,
33165
+ available: false,
33166
+ repo: "",
33167
+ cache_dir: ""
33168
+ };
33169
+ }
33170
+ const normalized = normalizeResultsExportConfig(config);
33171
+ const cachePaths = getResultsRepoCachePaths(normalized.repo);
33172
+ const persisted = readPersistedStatus(cachePaths.statusFile);
33173
+ return {
33174
+ configured: true,
33175
+ available: existsSync7(cachePaths.repoDir),
33176
+ repo: normalized.repo,
33177
+ path: normalized.path,
33178
+ auto_push: normalized.auto_push,
33179
+ branch_prefix: normalized.branch_prefix,
33180
+ cache_dir: cachePaths.repoDir,
33181
+ last_synced_at: persisted.last_synced_at,
33182
+ last_error: persisted.last_error
33183
+ };
33184
+ }
33185
+ async function syncResultsRepo(config) {
33186
+ const normalized = normalizeResultsExportConfig(config);
33187
+ try {
33188
+ const repoDir = await ensureResultsRepoClone(normalized);
33189
+ const baseBranch = await resolveDefaultBranch(repoDir);
33190
+ await updateCacheRepo(repoDir, baseBranch);
33191
+ updateStatusFile(normalized, {
33192
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
33193
+ last_error: void 0
33194
+ });
33195
+ } catch (error) {
33196
+ updateStatusFile(normalized, {
33197
+ last_error: withFriendlyGitHubAuthError(error).message
33198
+ });
33199
+ throw withFriendlyGitHubAuthError(error);
33200
+ }
33201
+ return getResultsRepoStatus(normalized);
33202
+ }
33203
+ async function checkoutResultsRepoBranch(config, branchName) {
33204
+ const normalized = normalizeResultsExportConfig(config);
33205
+ const repoDir = await ensureResultsRepoClone(normalized);
33206
+ const baseBranch = await resolveDefaultBranch(repoDir);
33207
+ await updateCacheRepo(repoDir, baseBranch);
33208
+ await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
33209
+ updateStatusFile(normalized, { last_error: void 0 });
33210
+ return {
33211
+ branchName,
33212
+ baseBranch,
33213
+ repoDir
33214
+ };
33215
+ }
33216
+ async function prepareResultsRepoBranch(config, branchName) {
33217
+ const normalized = normalizeResultsExportConfig(config);
33218
+ const cloneDir = await ensureResultsRepoClone(normalized);
33219
+ const baseBranch = await resolveDefaultBranch(cloneDir);
33220
+ await updateCacheRepo(cloneDir, baseBranch);
33221
+ const worktreeRoot = await mkdtemp3(path49.join(os3.tmpdir(), "agentv-results-repo-"));
33222
+ const worktreeDir = path49.join(worktreeRoot, "repo");
33223
+ await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
33224
+ cwd: cloneDir
33225
+ });
33226
+ return {
33227
+ branchName,
33228
+ baseBranch,
33229
+ repoDir: worktreeDir,
33230
+ cleanup: async () => {
33231
+ try {
33232
+ await runGit(["worktree", "remove", "--force", worktreeDir], { cwd: cloneDir });
33233
+ } finally {
33234
+ await rm6(worktreeRoot, { recursive: true, force: true }).catch(() => void 0);
33235
+ }
33236
+ }
33237
+ };
33238
+ }
33239
+ async function stageResultsArtifacts(params) {
33240
+ rmSync(params.destinationDir, { recursive: true, force: true });
33241
+ mkdirSync2(path49.dirname(params.destinationDir), { recursive: true });
33242
+ await cp3(params.sourceDir, params.destinationDir, { recursive: true });
33243
+ }
33244
+ function resolveResultsRepoRunsDir(config) {
33245
+ const normalized = normalizeResultsExportConfig(config);
33246
+ return path49.join(
33247
+ getResultsRepoCachePaths(normalized.repo).repoDir,
33248
+ ...normalized.path.split("/")
33249
+ );
33250
+ }
33251
+ async function directorySizeBytes(targetPath) {
33252
+ const entry = await stat9(targetPath);
33253
+ if (entry.isFile()) {
33254
+ return entry.size;
33255
+ }
33256
+ let total = 0;
33257
+ for (const child of await readdir8(targetPath, { withFileTypes: true })) {
33258
+ total += await directorySizeBytes(path49.join(targetPath, child.name));
33259
+ }
33260
+ return total;
33261
+ }
33262
+ async function commitAndPushResultsBranch(params) {
33263
+ await runGit(["add", "--all"], { cwd: params.repoDir });
33264
+ const { stdout: diffStdout } = await runGit(["status", "--porcelain"], {
33265
+ cwd: params.repoDir,
33266
+ check: false
33267
+ });
33268
+ if (diffStdout.trim().length === 0) {
33269
+ return false;
33270
+ }
33271
+ await runGit(["commit", "-m", params.commitMessage], { cwd: params.repoDir });
33272
+ await runGit(["push", "-u", "origin", params.branchName], { cwd: params.repoDir });
33273
+ return true;
33274
+ }
33275
+ async function pushResultsRepoBranch(config, branchName, cwd) {
33276
+ const normalized = normalizeResultsExportConfig(config);
33277
+ await runGit(["push", "-u", "origin", branchName], {
33278
+ cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir
33279
+ });
33280
+ updateStatusFile(normalized, {
33281
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
33282
+ last_error: void 0
33283
+ });
33284
+ }
33285
+ async function createDraftResultsPr(params) {
33286
+ const { stdout } = await runGh(
33287
+ [
33288
+ "pr",
33289
+ "create",
33290
+ "--draft",
33291
+ "--repo",
33292
+ params.repo,
33293
+ "--base",
33294
+ params.baseBranch,
33295
+ "--head",
33296
+ params.branchName,
33297
+ "--title",
33298
+ params.title,
33299
+ "--body",
33300
+ params.body
33301
+ ],
33302
+ { cwd: params.repoDir }
33303
+ );
33304
+ return stdout.trim();
33305
+ }
32886
33306
  function getProjectsRegistryPath() {
32887
- return path49.join(getAgentvHome(), "projects.yaml");
33307
+ return path50.join(getAgentvHome(), "projects.yaml");
32888
33308
  }
32889
33309
  function loadProjectRegistry() {
32890
33310
  const registryPath = getProjectsRegistryPath();
32891
- if (!existsSync7(registryPath)) {
33311
+ if (!existsSync8(registryPath)) {
32892
33312
  return { projects: [] };
32893
33313
  }
32894
33314
  try {
32895
- const raw = readFileSync3(registryPath, "utf-8");
33315
+ const raw = readFileSync4(registryPath, "utf-8");
32896
33316
  const parsed = parseYaml3(raw);
32897
33317
  if (!parsed || !Array.isArray(parsed.projects)) {
32898
33318
  return { projects: [] };
@@ -32904,14 +33324,14 @@ function loadProjectRegistry() {
32904
33324
  }
32905
33325
  function saveProjectRegistry(registry) {
32906
33326
  const registryPath = getProjectsRegistryPath();
32907
- const dir = path49.dirname(registryPath);
32908
- if (!existsSync7(dir)) {
32909
- mkdirSync2(dir, { recursive: true });
33327
+ const dir = path50.dirname(registryPath);
33328
+ if (!existsSync8(dir)) {
33329
+ mkdirSync3(dir, { recursive: true });
32910
33330
  }
32911
- writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
33331
+ writeFileSync2(registryPath, stringifyYaml(registry), "utf-8");
32912
33332
  }
32913
33333
  function deriveProjectId(dirPath, existingIds) {
32914
- const base = path49.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
33334
+ const base = path50.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
32915
33335
  let candidate = base || "project";
32916
33336
  let suffix = 2;
32917
33337
  while (existingIds.includes(candidate)) {
@@ -32921,11 +33341,11 @@ function deriveProjectId(dirPath, existingIds) {
32921
33341
  return candidate;
32922
33342
  }
32923
33343
  function addProject(projectPath) {
32924
- const absPath = path49.resolve(projectPath);
32925
- if (!existsSync7(absPath)) {
33344
+ const absPath = path50.resolve(projectPath);
33345
+ if (!existsSync8(absPath)) {
32926
33346
  throw new Error(`Directory not found: ${absPath}`);
32927
33347
  }
32928
- if (!existsSync7(path49.join(absPath, ".agentv"))) {
33348
+ if (!existsSync8(path50.join(absPath, ".agentv"))) {
32929
33349
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
32930
33350
  }
32931
33351
  const registry = loadProjectRegistry();
@@ -32939,7 +33359,7 @@ function addProject(projectPath) {
32939
33359
  absPath,
32940
33360
  registry.projects.map((p) => p.id)
32941
33361
  ),
32942
- name: path49.basename(absPath),
33362
+ name: path50.basename(absPath),
32943
33363
  path: absPath,
32944
33364
  addedAt: now2,
32945
33365
  lastOpenedAt: now2
@@ -32968,14 +33388,14 @@ function touchProject(projectId) {
32968
33388
  }
32969
33389
  }
32970
33390
  function discoverProjects(rootDir, maxDepth = 2) {
32971
- const absRoot = path49.resolve(rootDir);
32972
- if (!existsSync7(absRoot) || !statSync2(absRoot).isDirectory()) {
33391
+ const absRoot = path50.resolve(rootDir);
33392
+ if (!existsSync8(absRoot) || !statSync2(absRoot).isDirectory()) {
32973
33393
  return [];
32974
33394
  }
32975
33395
  const results = [];
32976
33396
  function scan(dir, depth) {
32977
33397
  if (depth > maxDepth) return;
32978
- if (existsSync7(path49.join(dir, ".agentv"))) {
33398
+ if (existsSync8(path50.join(dir, ".agentv"))) {
32979
33399
  results.push(dir);
32980
33400
  return;
32981
33401
  }
@@ -32985,7 +33405,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
32985
33405
  for (const entry of entries) {
32986
33406
  if (!entry.isDirectory()) continue;
32987
33407
  if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
32988
- scan(path49.join(dir, entry.name), depth + 1);
33408
+ scan(path50.join(dir, entry.name), depth + 1);
32989
33409
  }
32990
33410
  } catch {
32991
33411
  }
@@ -33089,8 +33509,8 @@ var OtelTraceExporter = class {
33089
33509
  async init() {
33090
33510
  try {
33091
33511
  const [sdkTraceNode, resourcesMod, semconvMod, api, coreMod] = await Promise.all([
33092
- import("./src-ML4D2MC2.js"),
33093
- import("./esm-CZAWIY6F.js"),
33512
+ import("./src-PXDA7QIS.js"),
33513
+ import("./esm-UYZ3HJBU.js"),
33094
33514
  import("./esm-RVQPUGWH.js"),
33095
33515
  import("./esm-R77SNOF5.js"),
33096
33516
  import("./esm-ZADQ4XQH-5LX2IKZV.js").catch(() => null)
@@ -33884,30 +34304,30 @@ function extractResponseItemContent(content) {
33884
34304
  }
33885
34305
  return parts.length > 0 ? parts.join("") : void 0;
33886
34306
  }
33887
- var DEFAULT_SESSIONS_DIR = () => path50.join(homedir3(), ".codex", "sessions");
34307
+ var DEFAULT_SESSIONS_DIR = () => path51.join(homedir3(), ".codex", "sessions");
33888
34308
  async function discoverCodexSessions(opts) {
33889
34309
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
33890
34310
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
33891
34311
  const sessions = [];
33892
34312
  let yearDirs;
33893
34313
  try {
33894
- yearDirs = await readdir8(sessionsDir);
34314
+ yearDirs = await readdir9(sessionsDir);
33895
34315
  } catch {
33896
34316
  return [];
33897
34317
  }
33898
34318
  for (const year of yearDirs) {
33899
- const yearPath = path50.join(sessionsDir, year);
34319
+ const yearPath = path51.join(sessionsDir, year);
33900
34320
  let monthDirs;
33901
34321
  try {
33902
- monthDirs = await readdir8(yearPath);
34322
+ monthDirs = await readdir9(yearPath);
33903
34323
  } catch {
33904
34324
  continue;
33905
34325
  }
33906
34326
  for (const month of monthDirs) {
33907
- const monthPath = path50.join(yearPath, month);
34327
+ const monthPath = path51.join(yearPath, month);
33908
34328
  let dayDirs;
33909
34329
  try {
33910
- dayDirs = await readdir8(monthPath);
34330
+ dayDirs = await readdir9(monthPath);
33911
34331
  } catch {
33912
34332
  continue;
33913
34333
  }
@@ -33916,22 +34336,22 @@ async function discoverCodexSessions(opts) {
33916
34336
  const dirDate = `${year}-${month}-${day}`;
33917
34337
  if (dirDate !== opts.date) continue;
33918
34338
  }
33919
- const dayPath = path50.join(monthPath, day);
34339
+ const dayPath = path51.join(monthPath, day);
33920
34340
  let files;
33921
34341
  try {
33922
- files = await readdir8(dayPath);
34342
+ files = await readdir9(dayPath);
33923
34343
  } catch {
33924
34344
  continue;
33925
34345
  }
33926
34346
  for (const file of files) {
33927
34347
  if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
33928
- const filePath = path50.join(dayPath, file);
34348
+ const filePath = path51.join(dayPath, file);
33929
34349
  const nameWithoutExt = file.replace(/\.jsonl$/, "");
33930
34350
  const parts = nameWithoutExt.split("-");
33931
34351
  const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
33932
34352
  let updatedAt;
33933
34353
  try {
33934
- const fileStat = await stat9(filePath);
34354
+ const fileStat = await stat10(filePath);
33935
34355
  updatedAt = fileStat.mtime;
33936
34356
  } catch {
33937
34357
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -33944,7 +34364,7 @@ async function discoverCodexSessions(opts) {
33944
34364
  sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
33945
34365
  return sessions.slice(0, limit);
33946
34366
  }
33947
- var DEFAULT_PROJECTS_DIR = () => path51.join(homedir4(), ".claude", "projects");
34367
+ var DEFAULT_PROJECTS_DIR = () => path52.join(homedir4(), ".claude", "projects");
33948
34368
  function encodeProjectPath(projectPath) {
33949
34369
  return projectPath.replace(/\//g, "-");
33950
34370
  }
@@ -33953,7 +34373,7 @@ async function discoverClaudeSessions(opts) {
33953
34373
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
33954
34374
  let projectDirs;
33955
34375
  try {
33956
- projectDirs = await readdir9(projectsDir);
34376
+ projectDirs = await readdir10(projectsDir);
33957
34377
  } catch {
33958
34378
  return [];
33959
34379
  }
@@ -33963,10 +34383,10 @@ async function discoverClaudeSessions(opts) {
33963
34383
  }
33964
34384
  const sessions = [];
33965
34385
  for (const projectDir of projectDirs) {
33966
- const dirPath = path51.join(projectsDir, projectDir);
34386
+ const dirPath = path52.join(projectsDir, projectDir);
33967
34387
  let entries;
33968
34388
  try {
33969
- entries = await readdir9(dirPath);
34389
+ entries = await readdir10(dirPath);
33970
34390
  } catch {
33971
34391
  continue;
33972
34392
  }
@@ -33974,10 +34394,10 @@ async function discoverClaudeSessions(opts) {
33974
34394
  if (!entry.endsWith(".jsonl")) continue;
33975
34395
  const sessionId = entry.replace(/\.jsonl$/, "");
33976
34396
  if (opts?.sessionId && sessionId !== opts.sessionId) continue;
33977
- const filePath = path51.join(dirPath, entry);
34397
+ const filePath = path52.join(dirPath, entry);
33978
34398
  let updatedAt;
33979
34399
  try {
33980
- const fileStat = await stat10(filePath);
34400
+ const fileStat = await stat11(filePath);
33981
34401
  updatedAt = fileStat.mtime;
33982
34402
  } catch {
33983
34403
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -34018,11 +34438,11 @@ function toTranscriptJsonLine(entry) {
34018
34438
  };
34019
34439
  }
34020
34440
  async function readTranscriptJsonl(filePath) {
34021
- const text2 = await readFile16(filePath, "utf8");
34441
+ const text2 = await readFile17(filePath, "utf8");
34022
34442
  return text2.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
34023
34443
  }
34024
34444
  async function readTranscriptFile(filePath) {
34025
- return readFile16(filePath, "utf8");
34445
+ return readFile17(filePath, "utf8");
34026
34446
  }
34027
34447
  var TranscriptProvider = class _TranscriptProvider {
34028
34448
  id;
@@ -34229,6 +34649,20 @@ export {
34229
34649
  ResponseCache,
34230
34650
  shouldEnableCache,
34231
34651
  shouldSkipCacheForTemperature,
34652
+ normalizeResultsExportConfig,
34653
+ resolveResultsRepoUrl,
34654
+ getResultsRepoCachePaths,
34655
+ ensureResultsRepoClone,
34656
+ getResultsRepoStatus,
34657
+ syncResultsRepo,
34658
+ checkoutResultsRepoBranch,
34659
+ prepareResultsRepoBranch,
34660
+ stageResultsArtifacts,
34661
+ resolveResultsRepoRunsDir,
34662
+ directorySizeBytes,
34663
+ commitAndPushResultsBranch,
34664
+ pushResultsRepoBranch,
34665
+ createDraftResultsPr,
34232
34666
  getProjectsRegistryPath,
34233
34667
  loadProjectRegistry,
34234
34668
  saveProjectRegistry,
@@ -34254,4 +34688,4 @@ export {
34254
34688
  TranscriptProvider,
34255
34689
  createAgentKernel
34256
34690
  };
34257
- //# sourceMappingURL=chunk-SE73HJZG.js.map
34691
+ //# sourceMappingURL=chunk-FQGY6QXQ.js.map