@agentv/core 3.8.0 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1315,12 +1315,12 @@ function serializeAttributeValue(value) {
1315
1315
  if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
1316
1316
  return { stringValue: String(value) };
1317
1317
  }
1318
- var import_promises33, import_node_path49, OtlpJsonFileExporter;
1318
+ var import_promises32, import_node_path48, OtlpJsonFileExporter;
1319
1319
  var init_otlp_json_file_exporter = __esm({
1320
1320
  "src/observability/otlp-json-file-exporter.ts"() {
1321
1321
  "use strict";
1322
- import_promises33 = require("fs/promises");
1323
- import_node_path49 = require("path");
1322
+ import_promises32 = require("fs/promises");
1323
+ import_node_path48 = require("path");
1324
1324
  OtlpJsonFileExporter = class {
1325
1325
  // biome-ignore lint/suspicious/noExplicitAny: serialized span data
1326
1326
  spans = [];
@@ -1359,7 +1359,7 @@ var init_otlp_json_file_exporter = __esm({
1359
1359
  }
1360
1360
  async flush() {
1361
1361
  if (this.spans.length === 0) return;
1362
- await (0, import_promises33.mkdir)((0, import_node_path49.dirname)(this.filePath), { recursive: true });
1362
+ await (0, import_promises32.mkdir)((0, import_node_path48.dirname)(this.filePath), { recursive: true });
1363
1363
  const otlpJson = {
1364
1364
  resourceSpans: [
1365
1365
  {
@@ -1390,13 +1390,13 @@ function hrTimeDiffMs(start, end) {
1390
1390
  const diffNano = end[1] - start[1];
1391
1391
  return Math.round(diffSec * 1e3 + diffNano / 1e6);
1392
1392
  }
1393
- var import_node_fs15, import_promises34, import_node_path50, SimpleTraceFileExporter;
1393
+ var import_node_fs15, import_promises33, import_node_path49, SimpleTraceFileExporter;
1394
1394
  var init_simple_trace_file_exporter = __esm({
1395
1395
  "src/observability/simple-trace-file-exporter.ts"() {
1396
1396
  "use strict";
1397
1397
  import_node_fs15 = require("fs");
1398
- import_promises34 = require("fs/promises");
1399
- import_node_path50 = require("path");
1398
+ import_promises33 = require("fs/promises");
1399
+ import_node_path49 = require("path");
1400
1400
  SimpleTraceFileExporter = class {
1401
1401
  stream = null;
1402
1402
  filePath;
@@ -1409,7 +1409,7 @@ var init_simple_trace_file_exporter = __esm({
1409
1409
  async ensureStream() {
1410
1410
  if (!this.streamReady) {
1411
1411
  this.streamReady = (async () => {
1412
- await (0, import_promises34.mkdir)((0, import_node_path50.dirname)(this.filePath), { recursive: true });
1412
+ await (0, import_promises33.mkdir)((0, import_node_path49.dirname)(this.filePath), { recursive: true });
1413
1413
  this.stream = (0, import_node_fs15.createWriteStream)(this.filePath, { flags: "w" });
1414
1414
  return this.stream;
1415
1415
  })();
@@ -1575,6 +1575,7 @@ __export(index_exports, {
1575
1575
  extractTargetsFromSuite: () => extractTargetsFromSuite,
1576
1576
  extractTargetsFromTestCase: () => extractTargetsFromTestCase,
1577
1577
  extractTrialsConfig: () => extractTrialsConfig,
1578
+ extractWorkersFromSuite: () => extractWorkersFromSuite,
1578
1579
  fileExists: () => fileExists2,
1579
1580
  findGitRoot: () => findGitRoot,
1580
1581
  freeformEvaluationSchema: () => freeformEvaluationSchema,
@@ -1589,7 +1590,6 @@ __export(index_exports, {
1589
1590
  initializeBaseline: () => initializeBaseline,
1590
1591
  isAgentSkillsFormat: () => isAgentSkillsFormat,
1591
1592
  isEvaluatorKind: () => isEvaluatorKind,
1592
- isGuidelineFile: () => isGuidelineFile,
1593
1593
  isJsonObject: () => isJsonObject,
1594
1594
  isJsonValue: () => isJsonValue,
1595
1595
  isNonEmptyString: () => isNonEmptyString,
@@ -1849,9 +1849,9 @@ function mergeExecutionMetrics(computed, metrics) {
1849
1849
  }
1850
1850
 
1851
1851
  // src/evaluation/yaml-parser.ts
1852
- var import_promises9 = require("fs/promises");
1853
- var import_node_path9 = __toESM(require("path"), 1);
1854
- var import_micromatch3 = __toESM(require("micromatch"), 1);
1852
+ var import_promises8 = require("fs/promises");
1853
+ var import_node_path8 = __toESM(require("path"), 1);
1854
+ var import_micromatch2 = __toESM(require("micromatch"), 1);
1855
1855
  var import_yaml4 = require("yaml");
1856
1856
 
1857
1857
  // src/evaluation/interpolation.ts
@@ -1944,7 +1944,6 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
1944
1944
  input_segments: [{ type: "text", value: prompt }],
1945
1945
  expected_output: evalCase.expected_output ? [{ role: "assistant", content: evalCase.expected_output }] : [],
1946
1946
  reference_answer: evalCase.expected_output,
1947
- guideline_paths: [],
1948
1947
  file_paths: filePaths,
1949
1948
  criteria: evalCase.expected_output ?? "",
1950
1949
  assertions,
@@ -2076,7 +2075,6 @@ async function expandFileReferences(tests, evalFileDir) {
2076
2075
  // src/evaluation/loaders/config-loader.ts
2077
2076
  var import_promises4 = require("fs/promises");
2078
2077
  var import_node_path4 = __toESM(require("path"), 1);
2079
- var import_micromatch = __toESM(require("micromatch"), 1);
2080
2078
  var import_yaml2 = require("yaml");
2081
2079
 
2082
2080
  // src/evaluation/loaders/file-resolver.ts
@@ -2207,15 +2205,6 @@ async function loadConfig(evalFilePath, repoRoot) {
2207
2205
  logWarning(`Invalid required_version in ${configPath}, expected string`);
2208
2206
  continue;
2209
2207
  }
2210
- const guidelinePatterns = config.guideline_patterns;
2211
- if (guidelinePatterns !== void 0 && !Array.isArray(guidelinePatterns)) {
2212
- logWarning(`Invalid guideline_patterns in ${configPath}, expected array`);
2213
- continue;
2214
- }
2215
- if (Array.isArray(guidelinePatterns) && !guidelinePatterns.every((p) => typeof p === "string")) {
2216
- logWarning(`Invalid guideline_patterns in ${configPath}, all entries must be strings`);
2217
- continue;
2218
- }
2219
2208
  const evalPatterns = config.eval_patterns;
2220
2209
  if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
2221
2210
  logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
@@ -2231,7 +2220,6 @@ async function loadConfig(evalFilePath, repoRoot) {
2231
2220
  );
2232
2221
  return {
2233
2222
  required_version: requiredVersion,
2234
- guideline_patterns: guidelinePatterns,
2235
2223
  eval_patterns: evalPatterns,
2236
2224
  execution: executionDefaults
2237
2225
  };
@@ -2243,11 +2231,6 @@ async function loadConfig(evalFilePath, repoRoot) {
2243
2231
  }
2244
2232
  return null;
2245
2233
  }
2246
- function isGuidelineFile(filePath, patterns) {
2247
- const normalized = filePath.split("\\").join("/");
2248
- const patternsToUse = patterns ?? [];
2249
- return import_micromatch.default.isMatch(normalized, patternsToUse);
2250
- }
2251
2234
  function extractTargetFromSuite(suite) {
2252
2235
  const execution = suite.execution;
2253
2236
  if (execution && typeof execution === "object" && !Array.isArray(execution)) {
@@ -2274,6 +2257,17 @@ function extractTargetsFromSuite(suite) {
2274
2257
  }
2275
2258
  return void 0;
2276
2259
  }
2260
+ function extractWorkersFromSuite(suite) {
2261
+ const execution = suite.execution;
2262
+ if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
2263
+ return void 0;
2264
+ }
2265
+ const workers = execution.workers;
2266
+ if (typeof workers === "number" && Number.isInteger(workers) && workers >= 1 && workers <= 50) {
2267
+ return workers;
2268
+ }
2269
+ return void 0;
2270
+ }
2277
2271
  function extractTargetsFromTestCase(testCase) {
2278
2272
  const execution = testCase.execution;
2279
2273
  if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
@@ -3823,7 +3817,7 @@ function parseInlineRubrics(rawRubrics) {
3823
3817
  // src/evaluation/loaders/jsonl-parser.ts
3824
3818
  var import_promises7 = require("fs/promises");
3825
3819
  var import_node_path7 = __toESM(require("path"), 1);
3826
- var import_micromatch2 = __toESM(require("micromatch"), 1);
3820
+ var import_micromatch = __toESM(require("micromatch"), 1);
3827
3821
  var import_yaml3 = require("yaml");
3828
3822
 
3829
3823
  // src/evaluation/loaders/message-processor.ts
@@ -3850,10 +3844,6 @@ function formatSegment(segment, mode = "lm") {
3850
3844
  if (type === "text") {
3851
3845
  return asString2(segment.value);
3852
3846
  }
3853
- if (type === "guideline_ref") {
3854
- const refPath = asString2(segment.path);
3855
- return refPath ? `<Attached: ${refPath}>` : void 0;
3856
- }
3857
3847
  if (type === "file") {
3858
3848
  const filePath = asString2(segment.path);
3859
3849
  if (!filePath) {
@@ -3876,9 +3866,6 @@ function hasVisibleContent(segments) {
3876
3866
  const value = asString2(segment.value);
3877
3867
  return value !== void 0 && value.trim().length > 0;
3878
3868
  }
3879
- if (type === "guideline_ref") {
3880
- return false;
3881
- }
3882
3869
  if (type === "file") {
3883
3870
  const text = asString2(segment.text);
3884
3871
  return text !== void 0 && text.trim().length > 0;
@@ -3894,17 +3881,7 @@ function asString2(value) {
3894
3881
  var ANSI_YELLOW5 = "\x1B[33m";
3895
3882
  var ANSI_RESET6 = "\x1B[0m";
3896
3883
  async function processMessages(options) {
3897
- const {
3898
- messages,
3899
- searchRoots,
3900
- repoRootPath,
3901
- guidelinePatterns,
3902
- guidelinePaths,
3903
- treatFileSegmentsAsGuidelines,
3904
- textParts,
3905
- messageType,
3906
- verbose
3907
- } = options;
3884
+ const { messages, searchRoots, repoRootPath, textParts, messageType, verbose } = options;
3908
3885
  const segments = [];
3909
3886
  for (const message of messages) {
3910
3887
  const content = message.content;
@@ -3948,21 +3925,6 @@ async function processMessages(options) {
3948
3925
  }
3949
3926
  try {
3950
3927
  const fileContent = (await (0, import_promises6.readFile)(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
3951
- const classifyAsGuideline = shouldTreatAsGuideline({
3952
- messageType,
3953
- resolvedPath,
3954
- repoRootPath,
3955
- guidelinePatterns,
3956
- treatFileSegmentsAsGuidelines
3957
- });
3958
- if (classifyAsGuideline && guidelinePaths) {
3959
- guidelinePaths.push(import_node_path6.default.resolve(resolvedPath));
3960
- if (verbose) {
3961
- console.log(` [Guideline] Found: ${displayPath}`);
3962
- console.log(` Resolved to: ${resolvedPath}`);
3963
- }
3964
- continue;
3965
- }
3966
3928
  segments.push({
3967
3929
  type: "file",
3968
3930
  path: displayPath,
@@ -3990,26 +3952,6 @@ async function processMessages(options) {
3990
3952
  }
3991
3953
  return segments;
3992
3954
  }
3993
- function shouldTreatAsGuideline(options) {
3994
- const {
3995
- messageType,
3996
- resolvedPath,
3997
- repoRootPath,
3998
- guidelinePatterns,
3999
- treatFileSegmentsAsGuidelines
4000
- } = options;
4001
- if (messageType !== "input") {
4002
- return false;
4003
- }
4004
- if (treatFileSegmentsAsGuidelines) {
4005
- return true;
4006
- }
4007
- if (!guidelinePatterns || guidelinePatterns.length === 0) {
4008
- return false;
4009
- }
4010
- const relativeToRepo = import_node_path6.default.relative(repoRootPath, resolvedPath);
4011
- return isGuidelineFile(relativeToRepo, guidelinePatterns);
4012
- }
4013
3955
  function asString3(value) {
4014
3956
  return typeof value === "string" ? value : void 0;
4015
3957
  }
@@ -4210,7 +4152,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
4210
4152
  }
4211
4153
  return {
4212
4154
  description: asString4(parsed.description),
4213
- dataset: asString4(parsed.dataset),
4155
+ name: asString4(parsed.name),
4214
4156
  execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
4215
4157
  evaluator: parsed.evaluator
4216
4158
  };
@@ -4246,20 +4188,18 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4246
4188
  const absoluteTestPath = import_node_path7.default.resolve(evalFilePath);
4247
4189
  const repoRootPath = resolveToAbsolutePath(repoRoot);
4248
4190
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
4249
- const config = await loadConfig(absoluteTestPath, repoRootPath);
4250
- const guidelinePatterns = config?.guideline_patterns;
4251
4191
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
4252
4192
  const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
4253
4193
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
4254
- const fallbackDataset = import_node_path7.default.basename(absoluteTestPath, ".jsonl") || "eval";
4255
- const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
4194
+ const fallbackEvalSet = import_node_path7.default.basename(absoluteTestPath, ".jsonl") || "eval";
4195
+ const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
4256
4196
  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
4257
4197
  const globalExecution = sidecar.execution;
4258
4198
  if (verbose) {
4259
4199
  console.log(`
4260
4200
  [JSONL Dataset: ${evalFilePath}]`);
4261
4201
  console.log(` Cases: ${rawCases.length}`);
4262
- console.log(` Dataset name: ${datasetName}`);
4202
+ console.log(` Eval set: ${evalSetName}`);
4263
4203
  if (sidecar.description) {
4264
4204
  console.log(` Description: ${sidecar.description}`);
4265
4205
  }
@@ -4269,7 +4209,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4269
4209
  const evalcase = rawCases[lineIndex];
4270
4210
  const lineNumber = lineIndex + 1;
4271
4211
  const id = asString4(evalcase.id);
4272
- if (filterPattern && (!id || !import_micromatch2.default.isMatch(id, filterPattern))) {
4212
+ if (filterPattern && (!id || !import_micromatch.default.isMatch(id, filterPattern))) {
4273
4213
  continue;
4274
4214
  }
4275
4215
  const conversationId = asString4(evalcase.conversation_id);
@@ -4292,14 +4232,11 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4292
4232
  continue;
4293
4233
  }
4294
4234
  const hasExpectedMessages = expectedMessages.length > 0;
4295
- const guidelinePaths = [];
4296
4235
  const inputTextParts = [];
4297
4236
  const inputSegments = await processMessages({
4298
4237
  messages: inputMessages,
4299
4238
  searchRoots,
4300
4239
  repoRootPath,
4301
- guidelinePatterns,
4302
- guidelinePaths,
4303
4240
  textParts: inputTextParts,
4304
4241
  messageType: "input",
4305
4242
  verbose
@@ -4349,40 +4286,20 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
4349
4286
  userFilePaths.push(segment.resolvedPath);
4350
4287
  }
4351
4288
  }
4352
- const allFilePaths = [
4353
- ...guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
4354
- ...userFilePaths
4355
- ];
4356
4289
  const testCase = {
4357
4290
  id,
4358
- dataset: datasetName,
4291
+ eval_set: evalSetName,
4359
4292
  conversation_id: conversationId,
4360
4293
  question,
4361
4294
  input: inputMessages,
4362
4295
  input_segments: inputSegments,
4363
4296
  expected_output: outputSegments,
4364
4297
  reference_answer: referenceAnswer,
4365
- guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path7.default.resolve(guidelinePath)),
4366
- guideline_patterns: guidelinePatterns,
4367
- file_paths: allFilePaths,
4298
+ file_paths: userFilePaths,
4368
4299
  criteria: outcome ?? "",
4369
4300
  evaluator: evalCaseEvaluatorKind,
4370
4301
  assertions: evaluators
4371
4302
  };
4372
- if (verbose) {
4373
- console.log(`
4374
- [Test: ${id}]`);
4375
- if (testCase.guideline_paths.length > 0) {
4376
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
4377
- for (const guidelinePath of testCase.guideline_paths) {
4378
- console.log(` - ${guidelinePath}`);
4379
- }
4380
- } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
4381
- console.log(" No guidelines found (guideline_patterns not configured)");
4382
- } else {
4383
- console.log(" No guidelines found");
4384
- }
4385
- }
4386
4303
  results.push(testCase);
4387
4304
  }
4388
4305
  return results;
@@ -4440,30 +4357,7 @@ function parseMetadata(suite) {
4440
4357
  }
4441
4358
 
4442
4359
  // src/evaluation/formatting/prompt-builder.ts
4443
- var import_promises8 = require("fs/promises");
4444
- var import_node_path8 = __toESM(require("path"), 1);
4445
- var ANSI_YELLOW7 = "\x1B[33m";
4446
- var ANSI_RESET8 = "\x1B[0m";
4447
4360
  async function buildPromptInputs(testCase, mode = "lm") {
4448
- const guidelineParts = [];
4449
- for (const rawPath of testCase.guideline_paths) {
4450
- const absolutePath = import_node_path8.default.resolve(rawPath);
4451
- if (!await fileExists(absolutePath)) {
4452
- logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
4453
- continue;
4454
- }
4455
- try {
4456
- const content = (await (0, import_promises8.readFile)(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
4457
- guidelineParts.push({
4458
- content,
4459
- isFile: true,
4460
- displayPath: import_node_path8.default.basename(absolutePath)
4461
- });
4462
- } catch (error) {
4463
- logWarning5(`Could not read guideline file ${absolutePath}: ${error.message}`);
4464
- }
4465
- }
4466
- const guidelines = formatFileContents(guidelineParts);
4467
4361
  const segmentsByMessage = [];
4468
4362
  const fileContentsByPath = /* @__PURE__ */ new Map();
4469
4363
  for (const segment of testCase.input_segments) {
@@ -4488,10 +4382,6 @@ async function buildPromptInputs(testCase, mode = "lm") {
4488
4382
  if (type === "file") {
4489
4383
  const value = asString5(segment.value);
4490
4384
  if (!value) continue;
4491
- if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
4492
- messageSegments.push({ type: "guideline_ref", path: value });
4493
- continue;
4494
- }
4495
4385
  const fileText = fileContentsByPath.get(value);
4496
4386
  if (fileText !== void 0) {
4497
4387
  messageSegments.push({ type: "file", text: fileText, path: value });
@@ -4540,10 +4430,6 @@ ${messageContent}`);
4540
4430
  } else {
4541
4431
  const questionParts = [];
4542
4432
  for (const segment of testCase.input_segments) {
4543
- if (segment.type === "file" && typeof segment.path === "string" && testCase.guideline_patterns && isGuidelineFile(segment.path, testCase.guideline_patterns)) {
4544
- questionParts.push(`<Attached: ${segment.path}>`);
4545
- continue;
4546
- }
4547
4433
  const formattedContent = formatSegment(segment, mode);
4548
4434
  if (formattedContent) {
4549
4435
  questionParts.push(formattedContent);
@@ -4554,11 +4440,9 @@ ${messageContent}`);
4554
4440
  const chatPrompt = useRoleMarkers ? buildChatPromptFromSegments({
4555
4441
  messages: testCase.input,
4556
4442
  segmentsByMessage,
4557
- guidelinePatterns: testCase.guideline_patterns,
4558
- guidelineContent: guidelines,
4559
4443
  mode
4560
4444
  }) : void 0;
4561
- return { question, guidelines, chatPrompt };
4445
+ return { question, chatPrompt };
4562
4446
  }
4563
4447
  function needsRoleMarkers(messages, processedSegmentsByMessage) {
4564
4448
  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
@@ -4573,14 +4457,7 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
4573
4457
  return messagesWithContent > 1;
4574
4458
  }
4575
4459
  function buildChatPromptFromSegments(options) {
4576
- const {
4577
- messages,
4578
- segmentsByMessage,
4579
- guidelinePatterns,
4580
- guidelineContent,
4581
- systemPrompt,
4582
- mode = "lm"
4583
- } = options;
4460
+ const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
4584
4461
  if (messages.length === 0) {
4585
4462
  return void 0;
4586
4463
  }
@@ -4588,11 +4465,6 @@ function buildChatPromptFromSegments(options) {
4588
4465
  if (systemPrompt && systemPrompt.trim().length > 0) {
4589
4466
  systemSegments.push(systemPrompt.trim());
4590
4467
  }
4591
- if (guidelineContent && guidelineContent.trim().length > 0) {
4592
- systemSegments.push(`[[ ## Guidelines ## ]]
4593
-
4594
- ${guidelineContent.trim()}`);
4595
- }
4596
4468
  let startIndex = 0;
4597
4469
  while (startIndex < messages.length && messages[startIndex].role === "system") {
4598
4470
  const segments = segmentsByMessage[startIndex];
@@ -4628,15 +4500,8 @@ ${guidelineContent.trim()}`);
4628
4500
  contentParts.push("@[Tool]:");
4629
4501
  }
4630
4502
  for (const segment of segments) {
4631
- if (segment.type === "guideline_ref") {
4632
- continue;
4633
- }
4634
4503
  const formatted = formatSegment(segment, mode);
4635
4504
  if (formatted) {
4636
- const isGuidelineRef = segment.type === "file" && typeof segment.path === "string" && guidelinePatterns && isGuidelineFile(segment.path, guidelinePatterns);
4637
- if (isGuidelineRef) {
4638
- continue;
4639
- }
4640
4505
  contentParts.push(formatted);
4641
4506
  }
4642
4507
  }
@@ -4654,30 +4519,27 @@ ${guidelineContent.trim()}`);
4654
4519
  function asString5(value) {
4655
4520
  return typeof value === "string" ? value : void 0;
4656
4521
  }
4657
- function logWarning5(message) {
4658
- console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
4659
- }
4660
4522
 
4661
4523
  // src/evaluation/yaml-parser.ts
4662
- var ANSI_YELLOW8 = "\x1B[33m";
4524
+ var ANSI_YELLOW7 = "\x1B[33m";
4663
4525
  var ANSI_RED3 = "\x1B[31m";
4664
- var ANSI_RESET9 = "\x1B[0m";
4526
+ var ANSI_RESET8 = "\x1B[0m";
4665
4527
  function resolveTests(suite) {
4666
4528
  if (suite.tests !== void 0) return suite.tests;
4667
4529
  if (suite.eval_cases !== void 0) {
4668
- logWarning6("'eval_cases' is deprecated. Use 'tests' instead.");
4530
+ logWarning5("'eval_cases' is deprecated. Use 'tests' instead.");
4669
4531
  return suite.eval_cases;
4670
4532
  }
4671
4533
  if (suite.evalcases !== void 0) {
4672
- logWarning6("'evalcases' is deprecated. Use 'tests' instead.");
4534
+ logWarning5("'evalcases' is deprecated. Use 'tests' instead.");
4673
4535
  return suite.evalcases;
4674
4536
  }
4675
4537
  return void 0;
4676
4538
  }
4677
4539
  async function readTestSuiteMetadata(testFilePath) {
4678
4540
  try {
4679
- const absolutePath = import_node_path9.default.resolve(testFilePath);
4680
- const content = await (0, import_promises9.readFile)(absolutePath, "utf8");
4541
+ const absolutePath = import_node_path8.default.resolve(testFilePath);
4542
+ const content = await (0, import_promises8.readFile)(absolutePath, "utf8");
4681
4543
  const parsed = interpolateEnv((0, import_yaml4.parse)(content), process.env);
4682
4544
  if (!isJsonObject(parsed)) {
4683
4545
  return {};
@@ -4706,6 +4568,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
4706
4568
  tests,
4707
4569
  trials: extractTrialsConfig(parsed),
4708
4570
  targets: extractTargetsFromSuite(parsed),
4571
+ workers: extractWorkersFromSuite(parsed),
4709
4572
  cacheConfig: extractCacheConfig(parsed),
4710
4573
  totalBudgetUsd: extractTotalBudgetUsd(parsed),
4711
4574
  ...metadata !== void 0 && { metadata },
@@ -4728,26 +4591,25 @@ var loadEvalCases = loadTests;
4728
4591
  async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4729
4592
  const verbose = options?.verbose ?? false;
4730
4593
  const filterPattern = options?.filter;
4731
- const absoluteTestPath = import_node_path9.default.resolve(evalFilePath);
4594
+ const absoluteTestPath = import_node_path8.default.resolve(evalFilePath);
4732
4595
  const repoRootPath = resolveToAbsolutePath(repoRoot);
4733
4596
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
4734
4597
  const config = await loadConfig(absoluteTestPath, repoRootPath);
4735
- const guidelinePatterns = config?.guideline_patterns;
4736
- const rawFile = await (0, import_promises9.readFile)(absoluteTestPath, "utf8");
4598
+ const rawFile = await (0, import_promises8.readFile)(absoluteTestPath, "utf8");
4737
4599
  const interpolated = interpolateEnv((0, import_yaml4.parse)(rawFile), process.env);
4738
4600
  if (!isJsonObject(interpolated)) {
4739
4601
  throw new Error(`Invalid test file format: ${evalFilePath}`);
4740
4602
  }
4741
4603
  const suite = interpolated;
4742
- const datasetNameFromSuite = asString6(suite.dataset)?.trim();
4743
- const fallbackDataset = import_node_path9.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
4744
- const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
4604
+ const evalSetNameFromSuite = asString6(suite.name)?.trim();
4605
+ const fallbackEvalSet = import_node_path8.default.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
4606
+ const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
4745
4607
  const rawTestcases = resolveTests(suite);
4746
4608
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
4747
- const evalFileDir = import_node_path9.default.dirname(absoluteTestPath);
4609
+ const evalFileDir = import_node_path8.default.dirname(absoluteTestPath);
4748
4610
  let expandedTestcases;
4749
4611
  if (typeof rawTestcases === "string") {
4750
- const externalPath = import_node_path9.default.resolve(evalFileDir, rawTestcases);
4612
+ const externalPath = import_node_path8.default.resolve(evalFileDir, rawTestcases);
4751
4613
  expandedTestcases = await loadCasesFromFile(externalPath);
4752
4614
  } else if (Array.isArray(rawTestcases)) {
4753
4615
  expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
@@ -4761,18 +4623,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4761
4623
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
4762
4624
  const suiteAssertions = suite.assertions ?? suite.assert;
4763
4625
  if (suite.assert !== void 0 && suite.assertions === void 0) {
4764
- logWarning6("'assert' is deprecated at the suite level. Use 'assertions' instead.");
4626
+ logWarning5("'assert' is deprecated at the suite level. Use 'assertions' instead.");
4765
4627
  }
4766
4628
  const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
4767
4629
  const results = [];
4768
4630
  for (const rawEvalcase of expandedTestcases) {
4769
4631
  if (!isJsonObject(rawEvalcase)) {
4770
- logWarning6("Skipping invalid test entry (expected object)");
4632
+ logWarning5("Skipping invalid test entry (expected object)");
4771
4633
  continue;
4772
4634
  }
4773
4635
  const evalcase = rawEvalcase;
4774
4636
  const id = asString6(evalcase.id);
4775
- if (filterPattern && (!id || !import_micromatch3.default.isMatch(id, filterPattern))) {
4637
+ if (filterPattern && (!id || !import_micromatch2.default.isMatch(id, filterPattern))) {
4776
4638
  continue;
4777
4639
  }
4778
4640
  const conversationId = asString6(evalcase.conversation_id);
@@ -4780,7 +4642,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4780
4642
  if (!outcome && evalcase.expected_outcome !== void 0) {
4781
4643
  outcome = asString6(evalcase.expected_outcome);
4782
4644
  if (outcome) {
4783
- logWarning6(
4645
+ logWarning5(
4784
4646
  `Test '${asString6(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
4785
4647
  );
4786
4648
  }
@@ -4800,15 +4662,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4800
4662
  const effectiveSuiteInputMessages = suiteInputMessages && !skipDefaults ? suiteInputMessages : void 0;
4801
4663
  const inputMessages = effectiveSuiteInputMessages ? [...effectiveSuiteInputMessages, ...testInputMessages] : testInputMessages;
4802
4664
  const hasExpectedMessages = expectedMessages.length > 0;
4803
- const guidelinePaths = [];
4804
4665
  const inputTextParts = [];
4805
4666
  const suiteInputSegments = effectiveSuiteInputMessages ? await processMessages({
4806
4667
  messages: effectiveSuiteInputMessages,
4807
4668
  searchRoots,
4808
4669
  repoRootPath,
4809
- guidelinePatterns,
4810
- guidelinePaths,
4811
- treatFileSegmentsAsGuidelines: true,
4812
4670
  textParts: inputTextParts,
4813
4671
  messageType: "input",
4814
4672
  verbose
@@ -4817,8 +4675,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4817
4675
  messages: testInputMessages,
4818
4676
  searchRoots,
4819
4677
  repoRootPath,
4820
- guidelinePatterns,
4821
- guidelinePaths,
4822
4678
  textParts: inputTextParts,
4823
4679
  messageType: "input",
4824
4680
  verbose
@@ -4867,26 +4723,20 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4867
4723
  userFilePaths.push(segment.resolvedPath);
4868
4724
  }
4869
4725
  }
4870
- const allFilePaths = [
4871
- ...guidelinePaths.map((guidelinePath) => import_node_path9.default.resolve(guidelinePath)),
4872
- ...userFilePaths
4873
- ];
4874
4726
  const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
4875
4727
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
4876
4728
  const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
4877
4729
  const caseTargets = extractTargetsFromTestCase(evalcase);
4878
4730
  const testCase = {
4879
4731
  id,
4880
- dataset: datasetName,
4732
+ eval_set: evalSetName,
4881
4733
  conversation_id: conversationId,
4882
4734
  question,
4883
4735
  input: inputMessages,
4884
4736
  input_segments: inputSegments,
4885
4737
  expected_output: outputSegments,
4886
4738
  reference_answer: referenceAnswer,
4887
- guideline_paths: guidelinePaths.map((guidelinePath) => import_node_path9.default.resolve(guidelinePath)),
4888
- guideline_patterns: guidelinePatterns,
4889
- file_paths: allFilePaths,
4739
+ file_paths: userFilePaths,
4890
4740
  criteria: outcome ?? "",
4891
4741
  evaluator: evalCaseEvaluatorKind,
4892
4742
  assertions: evaluators,
@@ -4894,20 +4744,6 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4894
4744
  metadata,
4895
4745
  targets: caseTargets
4896
4746
  };
4897
- if (verbose) {
4898
- console.log(`
4899
- [Test: ${id}]`);
4900
- if (testCase.guideline_paths.length > 0) {
4901
- console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
4902
- for (const guidelinePath of testCase.guideline_paths) {
4903
- console.log(` - ${guidelinePath}`);
4904
- }
4905
- } else if (!guidelinePatterns || guidelinePatterns.length === 0) {
4906
- console.log(" No guidelines found (guideline_patterns not configured)");
4907
- } else {
4908
- console.log(" No guidelines found");
4909
- }
4910
- }
4911
4747
  results.push(testCase);
4912
4748
  }
4913
4749
  return { tests: results, parsed: suite };
@@ -4926,7 +4762,7 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
4926
4762
  if (!isJsonObject(raw)) return void 0;
4927
4763
  const obj = raw;
4928
4764
  if (obj.script !== void 0 && obj.command === void 0) {
4929
- logWarning6("'script' is deprecated. Use 'command' instead.");
4765
+ logWarning5("'script' is deprecated. Use 'command' instead.");
4930
4766
  }
4931
4767
  const commandSource = obj.command ?? obj.script;
4932
4768
  if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
@@ -4934,8 +4770,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
4934
4770
  if (commandArr.length === 0) return void 0;
4935
4771
  const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
4936
4772
  let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
4937
- if (cwd && !import_node_path9.default.isAbsolute(cwd)) {
4938
- cwd = import_node_path9.default.resolve(evalFileDir, cwd);
4773
+ if (cwd && !import_node_path8.default.isAbsolute(cwd)) {
4774
+ cwd = import_node_path8.default.resolve(evalFileDir, cwd);
4939
4775
  }
4940
4776
  const config = { command: commandArr };
4941
4777
  if (timeoutMs !== void 0) {
@@ -5025,10 +4861,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
5025
4861
  }
5026
4862
  async function resolveWorkspaceConfig(raw, evalFileDir) {
5027
4863
  if (typeof raw === "string") {
5028
- const workspaceFilePath = import_node_path9.default.resolve(evalFileDir, raw);
4864
+ const workspaceFilePath = import_node_path8.default.resolve(evalFileDir, raw);
5029
4865
  let content;
5030
4866
  try {
5031
- content = await (0, import_promises9.readFile)(workspaceFilePath, "utf8");
4867
+ content = await (0, import_promises8.readFile)(workspaceFilePath, "utf8");
5032
4868
  } catch {
5033
4869
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
5034
4870
  }
@@ -5038,7 +4874,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
5038
4874
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
5039
4875
  );
5040
4876
  }
5041
- const workspaceFileDir = import_node_path9.default.dirname(workspaceFilePath);
4877
+ const workspaceFileDir = import_node_path8.default.dirname(workspaceFilePath);
5042
4878
  return parseWorkspaceConfig(parsed, workspaceFileDir);
5043
4879
  }
5044
4880
  return parseWorkspaceConfig(raw, evalFileDir);
@@ -5058,8 +4894,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
5058
4894
  throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
5059
4895
  }
5060
4896
  let template = typeof obj.template === "string" ? obj.template : void 0;
5061
- if (template && !import_node_path9.default.isAbsolute(template)) {
5062
- template = import_node_path9.default.resolve(evalFileDir, template);
4897
+ if (template && !import_node_path8.default.isAbsolute(template)) {
4898
+ template = import_node_path8.default.resolve(evalFileDir, template);
5063
4899
  }
5064
4900
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
5065
4901
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
@@ -5109,28 +4945,28 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
5109
4945
  function asString6(value) {
5110
4946
  return typeof value === "string" ? value : void 0;
5111
4947
  }
5112
- function logWarning6(message, details) {
4948
+ function logWarning5(message, details) {
5113
4949
  if (details && details.length > 0) {
5114
4950
  const detailBlock = details.join("\n");
5115
- console.warn(`${ANSI_YELLOW8}Warning: ${message}
5116
- ${detailBlock}${ANSI_RESET9}`);
4951
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}
4952
+ ${detailBlock}${ANSI_RESET8}`);
5117
4953
  } else {
5118
- console.warn(`${ANSI_YELLOW8}Warning: ${message}${ANSI_RESET9}`);
4954
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET8}`);
5119
4955
  }
5120
4956
  }
5121
4957
  function logError3(message, details) {
5122
4958
  if (details && details.length > 0) {
5123
4959
  const detailBlock = details.join("\n");
5124
4960
  console.error(`${ANSI_RED3}Error: ${message}
5125
- ${detailBlock}${ANSI_RESET9}`);
4961
+ ${detailBlock}${ANSI_RESET8}`);
5126
4962
  } else {
5127
- console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET9}`);
4963
+ console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET8}`);
5128
4964
  }
5129
4965
  }
5130
4966
 
5131
4967
  // src/evaluation/loaders/eval-yaml-transpiler.ts
5132
4968
  var import_node_fs2 = require("fs");
5133
- var import_node_path10 = __toESM(require("path"), 1);
4969
+ var import_node_path9 = __toESM(require("path"), 1);
5134
4970
  var import_yaml5 = require("yaml");
5135
4971
  function codeGraderInstruction(graderName, description) {
5136
4972
  const desc = description ? ` This grader: ${description}.` : "";
@@ -5375,7 +5211,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
5375
5211
  function transpileEvalYamlFile(evalYamlPath) {
5376
5212
  const content = (0, import_node_fs2.readFileSync)(evalYamlPath, "utf8");
5377
5213
  const parsed = (0, import_yaml5.parse)(content);
5378
- return transpileEvalYaml(parsed, import_node_path10.default.basename(evalYamlPath));
5214
+ return transpileEvalYaml(parsed, import_node_path9.default.basename(evalYamlPath));
5379
5215
  }
5380
5216
  function getOutputFilenames(result) {
5381
5217
  const names = /* @__PURE__ */ new Map();
@@ -5394,11 +5230,11 @@ function getOutputFilenames(result) {
5394
5230
 
5395
5231
  // src/evaluation/file-utils.ts
5396
5232
  var import_node_fs3 = require("fs");
5397
- var import_promises10 = require("fs/promises");
5398
- var import_node_path11 = __toESM(require("path"), 1);
5233
+ var import_promises9 = require("fs/promises");
5234
+ var import_node_path10 = __toESM(require("path"), 1);
5399
5235
  async function fileExists2(filePath) {
5400
5236
  try {
5401
- await (0, import_promises10.access)(filePath, import_node_fs3.constants.F_OK);
5237
+ await (0, import_promises9.access)(filePath, import_node_fs3.constants.F_OK);
5402
5238
  return true;
5403
5239
  } catch {
5404
5240
  return false;
@@ -5408,22 +5244,22 @@ function normalizeLineEndings(content) {
5408
5244
  return content.replace(/\r\n/g, "\n");
5409
5245
  }
5410
5246
  async function readTextFile(filePath) {
5411
- const content = await (0, import_promises10.readFile)(filePath, "utf8");
5247
+ const content = await (0, import_promises9.readFile)(filePath, "utf8");
5412
5248
  return normalizeLineEndings(content);
5413
5249
  }
5414
5250
  async function readJsonFile(filePath) {
5415
- const content = await (0, import_promises10.readFile)(filePath, "utf8");
5251
+ const content = await (0, import_promises9.readFile)(filePath, "utf8");
5416
5252
  return JSON.parse(content);
5417
5253
  }
5418
5254
  async function findGitRoot(startPath) {
5419
- let currentDir = import_node_path11.default.dirname(import_node_path11.default.resolve(startPath));
5420
- const root = import_node_path11.default.parse(currentDir).root;
5255
+ let currentDir = import_node_path10.default.dirname(import_node_path10.default.resolve(startPath));
5256
+ const root = import_node_path10.default.parse(currentDir).root;
5421
5257
  while (currentDir !== root) {
5422
- const gitPath = import_node_path11.default.join(currentDir, ".git");
5258
+ const gitPath = import_node_path10.default.join(currentDir, ".git");
5423
5259
  if (await fileExists2(gitPath)) {
5424
5260
  return currentDir;
5425
5261
  }
5426
- const parentDir = import_node_path11.default.dirname(currentDir);
5262
+ const parentDir = import_node_path10.default.dirname(currentDir);
5427
5263
  if (parentDir === currentDir) {
5428
5264
  break;
5429
5265
  }
@@ -5434,8 +5270,8 @@ async function findGitRoot(startPath) {
5434
5270
  function buildDirectoryChain2(filePath, repoRoot) {
5435
5271
  const directories = [];
5436
5272
  const seen = /* @__PURE__ */ new Set();
5437
- const boundary = import_node_path11.default.resolve(repoRoot);
5438
- let current = import_node_path11.default.resolve(import_node_path11.default.dirname(filePath));
5273
+ const boundary = import_node_path10.default.resolve(repoRoot);
5274
+ let current = import_node_path10.default.resolve(import_node_path10.default.dirname(filePath));
5439
5275
  while (current !== void 0) {
5440
5276
  if (!seen.has(current)) {
5441
5277
  directories.push(current);
@@ -5444,7 +5280,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
5444
5280
  if (current === boundary) {
5445
5281
  break;
5446
5282
  }
5447
- const parent = import_node_path11.default.dirname(current);
5283
+ const parent = import_node_path10.default.dirname(current);
5448
5284
  if (parent === current) {
5449
5285
  break;
5450
5286
  }
@@ -5458,16 +5294,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
5458
5294
  function buildSearchRoots2(evalPath, repoRoot) {
5459
5295
  const uniqueRoots = [];
5460
5296
  const addRoot = (root) => {
5461
- const normalized = import_node_path11.default.resolve(root);
5297
+ const normalized = import_node_path10.default.resolve(root);
5462
5298
  if (!uniqueRoots.includes(normalized)) {
5463
5299
  uniqueRoots.push(normalized);
5464
5300
  }
5465
5301
  };
5466
- let currentDir = import_node_path11.default.dirname(evalPath);
5302
+ let currentDir = import_node_path10.default.dirname(evalPath);
5467
5303
  let reachedBoundary = false;
5468
5304
  while (!reachedBoundary) {
5469
5305
  addRoot(currentDir);
5470
- const parentDir = import_node_path11.default.dirname(currentDir);
5306
+ const parentDir = import_node_path10.default.dirname(currentDir);
5471
5307
  if (currentDir === repoRoot || parentDir === currentDir) {
5472
5308
  reachedBoundary = true;
5473
5309
  } else {
@@ -5485,16 +5321,16 @@ function trimLeadingSeparators2(value) {
5485
5321
  async function resolveFileReference3(rawValue, searchRoots) {
5486
5322
  const displayPath = trimLeadingSeparators2(rawValue);
5487
5323
  const potentialPaths = [];
5488
- if (import_node_path11.default.isAbsolute(rawValue)) {
5489
- potentialPaths.push(import_node_path11.default.normalize(rawValue));
5324
+ if (import_node_path10.default.isAbsolute(rawValue)) {
5325
+ potentialPaths.push(import_node_path10.default.normalize(rawValue));
5490
5326
  }
5491
5327
  for (const base of searchRoots) {
5492
- potentialPaths.push(import_node_path11.default.resolve(base, displayPath));
5328
+ potentialPaths.push(import_node_path10.default.resolve(base, displayPath));
5493
5329
  }
5494
5330
  const attempted = [];
5495
5331
  const seen = /* @__PURE__ */ new Set();
5496
5332
  for (const candidate of potentialPaths) {
5497
- const absoluteCandidate = import_node_path11.default.resolve(candidate);
5333
+ const absoluteCandidate = import_node_path10.default.resolve(candidate);
5498
5334
  if (seen.has(absoluteCandidate)) {
5499
5335
  continue;
5500
5336
  }
@@ -5728,10 +5564,10 @@ function buildChatPrompt(request) {
5728
5564
  if (hasSystemMessage) {
5729
5565
  return provided;
5730
5566
  }
5731
- const systemContent2 = resolveSystemContent(request, false);
5567
+ const systemContent2 = resolveSystemContent(request);
5732
5568
  return [{ role: "system", content: systemContent2 }, ...provided];
5733
5569
  }
5734
- const systemContent = resolveSystemContent(request, true);
5570
+ const systemContent = resolveSystemContent(request);
5735
5571
  const userContent = request.question.trim();
5736
5572
  const prompt = [
5737
5573
  { role: "system", content: systemContent },
@@ -5739,18 +5575,13 @@ function buildChatPrompt(request) {
5739
5575
  ];
5740
5576
  return prompt;
5741
5577
  }
5742
- function resolveSystemContent(request, includeGuidelines) {
5578
+ function resolveSystemContent(request) {
5743
5579
  const systemSegments = [];
5744
5580
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
5745
5581
  systemSegments.push(request.systemPrompt.trim());
5746
5582
  } else {
5747
5583
  systemSegments.push(DEFAULT_SYSTEM_PROMPT);
5748
5584
  }
5749
- if (includeGuidelines && request.guidelines && request.guidelines.trim().length > 0) {
5750
- systemSegments.push(`[[ ## Guidelines ## ]]
5751
-
5752
- ${request.guidelines.trim()}`);
5753
- }
5754
5585
  return systemSegments.join("\n\n");
5755
5586
  }
5756
5587
  function toModelMessages(chatPrompt) {
@@ -5933,8 +5764,8 @@ async function withRetry(fn, retryConfig, signal) {
5933
5764
  var import_node_child_process = require("child_process");
5934
5765
  var import_node_crypto = require("crypto");
5935
5766
  var import_node_fs4 = require("fs");
5936
- var import_promises11 = require("fs/promises");
5937
- var import_node_path13 = __toESM(require("path"), 1);
5767
+ var import_promises10 = require("fs/promises");
5768
+ var import_node_path12 = __toESM(require("path"), 1);
5938
5769
 
5939
5770
  // src/evaluation/providers/claude-log-tracker.ts
5940
5771
  var GLOBAL_LOGS_KEY = Symbol.for("agentv.claudeLogs");
@@ -5990,17 +5821,11 @@ function subscribeToClaudeLogEntries(listener) {
5990
5821
  }
5991
5822
 
5992
5823
  // src/evaluation/providers/preread.ts
5993
- var import_node_path12 = __toESM(require("path"), 1);
5994
- function buildPromptDocument(request, inputFiles, options) {
5824
+ var import_node_path11 = __toESM(require("path"), 1);
5825
+ function buildPromptDocument(request, inputFiles) {
5995
5826
  const parts = [];
5996
- const guidelineFiles = collectGuidelineFiles(
5997
- inputFiles,
5998
- options?.guidelinePatterns ?? request.guideline_patterns,
5999
- options?.guidelineOverrides
6000
- );
6001
5827
  const inputFilesList = collectInputFiles(inputFiles);
6002
- const nonGuidelineInputFiles = inputFilesList.filter((file) => !guidelineFiles.includes(file));
6003
- const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
5828
+ const prereadBlock = buildMandatoryPrereadBlock(inputFilesList);
6004
5829
  if (prereadBlock.length > 0) {
6005
5830
  parts.push("\n", prereadBlock);
6006
5831
  }
@@ -6013,62 +5838,36 @@ function normalizeInputFiles(inputFiles) {
6013
5838
  }
6014
5839
  const deduped = /* @__PURE__ */ new Map();
6015
5840
  for (const inputFile of inputFiles) {
6016
- const absolutePath = import_node_path12.default.resolve(inputFile);
5841
+ const absolutePath = import_node_path11.default.resolve(inputFile);
6017
5842
  if (!deduped.has(absolutePath)) {
6018
5843
  deduped.set(absolutePath, absolutePath);
6019
5844
  }
6020
5845
  }
6021
5846
  return Array.from(deduped.values());
6022
5847
  }
6023
- function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
6024
- if (!inputFiles || inputFiles.length === 0) {
6025
- return [];
6026
- }
6027
- const unique = /* @__PURE__ */ new Map();
6028
- for (const inputFile of inputFiles) {
6029
- const absolutePath = import_node_path12.default.resolve(inputFile);
6030
- if (overrides?.has(absolutePath)) {
6031
- if (!unique.has(absolutePath)) {
6032
- unique.set(absolutePath, absolutePath);
6033
- }
6034
- continue;
6035
- }
6036
- const normalized = absolutePath.split(import_node_path12.default.sep).join("/");
6037
- if (isGuidelineFile(normalized, guidelinePatterns)) {
6038
- if (!unique.has(absolutePath)) {
6039
- unique.set(absolutePath, absolutePath);
6040
- }
6041
- }
6042
- }
6043
- return Array.from(unique.values());
6044
- }
6045
5848
  function collectInputFiles(inputFiles) {
6046
5849
  if (!inputFiles || inputFiles.length === 0) {
6047
5850
  return [];
6048
5851
  }
6049
5852
  const unique = /* @__PURE__ */ new Map();
6050
5853
  for (const inputFile of inputFiles) {
6051
- const absolutePath = import_node_path12.default.resolve(inputFile);
5854
+ const absolutePath = import_node_path11.default.resolve(inputFile);
6052
5855
  if (!unique.has(absolutePath)) {
6053
5856
  unique.set(absolutePath, absolutePath);
6054
5857
  }
6055
5858
  }
6056
5859
  return Array.from(unique.values());
6057
5860
  }
6058
- function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
6059
- if (guidelineFiles.length === 0 && inputFiles.length === 0) {
5861
+ function buildMandatoryPrereadBlock(inputFiles) {
5862
+ if (inputFiles.length === 0) {
6060
5863
  return "";
6061
5864
  }
6062
5865
  const buildList = (files) => files.map((absolutePath) => {
6063
- const fileName = import_node_path12.default.basename(absolutePath);
5866
+ const fileName = import_node_path11.default.basename(absolutePath);
6064
5867
  const fileUri = pathToFileUri(absolutePath);
6065
5868
  return `* [${fileName}](${fileUri})`;
6066
5869
  });
6067
5870
  const sections = [];
6068
- if (guidelineFiles.length > 0) {
6069
- sections.push(`Read all guideline files:
6070
- ${buildList(guidelineFiles).join("\n")}.`);
6071
- }
6072
5871
  if (inputFiles.length > 0) {
6073
5872
  sections.push(`Read all input files:
6074
5873
  ${buildList(inputFiles).join("\n")}.`);
@@ -6080,7 +5879,7 @@ ${buildList(inputFiles).join("\n")}.`);
6080
5879
  return sections.join("\n");
6081
5880
  }
6082
5881
  function pathToFileUri(filePath) {
6083
- const absolutePath = import_node_path12.default.isAbsolute(filePath) ? filePath : import_node_path12.default.resolve(filePath);
5882
+ const absolutePath = import_node_path11.default.isAbsolute(filePath) ? filePath : import_node_path11.default.resolve(filePath);
6084
5883
  const normalizedPath = absolutePath.replace(/\\/g, "/");
6085
5884
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
6086
5885
  return `file:///${normalizedPath}`;
@@ -6227,10 +6026,10 @@ var ClaudeCliProvider = class {
6227
6026
  }
6228
6027
  resolveCwd(cwdOverride) {
6229
6028
  if (cwdOverride) {
6230
- return import_node_path13.default.resolve(cwdOverride);
6029
+ return import_node_path12.default.resolve(cwdOverride);
6231
6030
  }
6232
6031
  if (this.config.cwd) {
6233
- return import_node_path13.default.resolve(this.config.cwd);
6032
+ return import_node_path12.default.resolve(this.config.cwd);
6234
6033
  }
6235
6034
  return void 0;
6236
6035
  }
@@ -6240,9 +6039,9 @@ var ClaudeCliProvider = class {
6240
6039
  return void 0;
6241
6040
  }
6242
6041
  if (this.config.logDir) {
6243
- return import_node_path13.default.resolve(this.config.logDir);
6042
+ return import_node_path12.default.resolve(this.config.logDir);
6244
6043
  }
6245
- return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "claude-cli");
6044
+ return import_node_path12.default.join(process.cwd(), ".agentv", "logs", "claude-cli");
6246
6045
  }
6247
6046
  async createStreamLogger(request) {
6248
6047
  const logDir = this.resolveLogDirectory();
@@ -6250,13 +6049,13 @@ var ClaudeCliProvider = class {
6250
6049
  return void 0;
6251
6050
  }
6252
6051
  try {
6253
- await (0, import_promises11.mkdir)(logDir, { recursive: true });
6052
+ await (0, import_promises10.mkdir)(logDir, { recursive: true });
6254
6053
  } catch (error) {
6255
6054
  const message = error instanceof Error ? error.message : String(error);
6256
6055
  console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
6257
6056
  return void 0;
6258
6057
  }
6259
- const filePath = import_node_path13.default.join(logDir, buildLogFilename(request, this.targetName));
6058
+ const filePath = import_node_path12.default.join(logDir, buildLogFilename(request, this.targetName));
6260
6059
  try {
6261
6060
  const logger = await ClaudeCliStreamLogger.create({
6262
6061
  filePath,
@@ -6576,8 +6375,8 @@ function tryParseJson(line) {
6576
6375
  // src/evaluation/providers/claude-sdk.ts
6577
6376
  var import_node_crypto2 = require("crypto");
6578
6377
  var import_node_fs5 = require("fs");
6579
- var import_promises12 = require("fs/promises");
6580
- var import_node_path14 = __toESM(require("path"), 1);
6378
+ var import_promises11 = require("fs/promises");
6379
+ var import_node_path13 = __toESM(require("path"), 1);
6581
6380
  var claudeSdkModule = null;
6582
6381
  async function loadClaudeSdk() {
6583
6382
  if (!claudeSdkModule) {
@@ -6737,10 +6536,10 @@ var ClaudeSdkProvider = class {
6737
6536
  }
6738
6537
  resolveCwd(cwdOverride) {
6739
6538
  if (cwdOverride) {
6740
- return import_node_path14.default.resolve(cwdOverride);
6539
+ return import_node_path13.default.resolve(cwdOverride);
6741
6540
  }
6742
6541
  if (this.config.cwd) {
6743
- return import_node_path14.default.resolve(this.config.cwd);
6542
+ return import_node_path13.default.resolve(this.config.cwd);
6744
6543
  }
6745
6544
  return void 0;
6746
6545
  }
@@ -6750,9 +6549,9 @@ var ClaudeSdkProvider = class {
6750
6549
  return void 0;
6751
6550
  }
6752
6551
  if (this.config.logDir) {
6753
- return import_node_path14.default.resolve(this.config.logDir);
6552
+ return import_node_path13.default.resolve(this.config.logDir);
6754
6553
  }
6755
- return import_node_path14.default.join(process.cwd(), ".agentv", "logs", "claude");
6554
+ return import_node_path13.default.join(process.cwd(), ".agentv", "logs", "claude");
6756
6555
  }
6757
6556
  async createStreamLogger(request) {
6758
6557
  const logDir = this.resolveLogDirectory();
@@ -6760,13 +6559,13 @@ var ClaudeSdkProvider = class {
6760
6559
  return void 0;
6761
6560
  }
6762
6561
  try {
6763
- await (0, import_promises12.mkdir)(logDir, { recursive: true });
6562
+ await (0, import_promises11.mkdir)(logDir, { recursive: true });
6764
6563
  } catch (error) {
6765
6564
  const message = error instanceof Error ? error.message : String(error);
6766
6565
  console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
6767
6566
  return void 0;
6768
6567
  }
6769
- const filePath = import_node_path14.default.join(logDir, buildLogFilename2(request, this.targetName));
6568
+ const filePath = import_node_path13.default.join(logDir, buildLogFilename2(request, this.targetName));
6770
6569
  try {
6771
6570
  const logger = await ClaudeStreamLogger.create({
6772
6571
  filePath,
@@ -6971,9 +6770,9 @@ function formatElapsed2(startedAt) {
6971
6770
 
6972
6771
  // src/evaluation/providers/cli.ts
6973
6772
  var import_node_child_process2 = require("child_process");
6974
- var import_promises13 = __toESM(require("fs/promises"), 1);
6773
+ var import_promises12 = __toESM(require("fs/promises"), 1);
6975
6774
  var import_node_os = __toESM(require("os"), 1);
6976
- var import_node_path15 = __toESM(require("path"), 1);
6775
+ var import_node_path14 = __toESM(require("path"), 1);
6977
6776
  var import_node_util = require("util");
6978
6777
  var import_zod2 = require("zod");
6979
6778
  var ToolCallSchema = import_zod2.z.object({
@@ -7182,7 +6981,6 @@ var CliProvider = class {
7182
6981
  const { values: templateValues, promptFilePath } = await buildTemplateValues(
7183
6982
  {
7184
6983
  question: "",
7185
- guidelines: "",
7186
6984
  inputFiles: batchInputFiles,
7187
6985
  evalCaseId: "batch",
7188
6986
  attempt: 0
@@ -7370,7 +7168,7 @@ var CliProvider = class {
7370
7168
  throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
7371
7169
  } finally {
7372
7170
  if (!this.keepTempFiles) {
7373
- await import_promises13.default.unlink(filePath).catch(() => {
7171
+ await import_promises12.default.unlink(filePath).catch(() => {
7374
7172
  });
7375
7173
  }
7376
7174
  }
@@ -7415,7 +7213,6 @@ var CliProvider = class {
7415
7213
  const { values: templateValues, promptFilePath } = await buildTemplateValues(
7416
7214
  {
7417
7215
  question: "",
7418
- guidelines: "",
7419
7216
  inputFiles: [],
7420
7217
  evalCaseId: "healthcheck",
7421
7218
  attempt: 0
@@ -7451,12 +7248,11 @@ var CliProvider = class {
7451
7248
  async function buildTemplateValues(request, config, outputFilePath) {
7452
7249
  const inputFiles = normalizeInputFiles2(request.inputFiles);
7453
7250
  const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
7454
- await import_promises13.default.writeFile(promptFilePath, request.question ?? "", "utf8");
7251
+ await import_promises12.default.writeFile(promptFilePath, request.question ?? "", "utf8");
7455
7252
  return {
7456
7253
  values: {
7457
7254
  PROMPT: shellEscape(request.question ?? ""),
7458
7255
  PROMPT_FILE: shellEscape(promptFilePath),
7459
- GUIDELINES: shellEscape(request.guidelines ?? ""),
7460
7256
  EVAL_ID: shellEscape(request.evalCaseId ?? ""),
7461
7257
  ATTEMPT: shellEscape(String(request.attempt ?? 0)),
7462
7258
  FILES: formatFileList(inputFiles, config.filesFormat),
@@ -7469,7 +7265,7 @@ async function cleanupTempFile(filePath, keepTempFiles) {
7469
7265
  if (!filePath || keepTempFiles) {
7470
7266
  return;
7471
7267
  }
7472
- await import_promises13.default.unlink(filePath).catch(() => {
7268
+ await import_promises12.default.unlink(filePath).catch(() => {
7473
7269
  });
7474
7270
  }
7475
7271
  function normalizeInputFiles2(inputFiles) {
@@ -7478,7 +7274,7 @@ function normalizeInputFiles2(inputFiles) {
7478
7274
  }
7479
7275
  const unique = /* @__PURE__ */ new Map();
7480
7276
  for (const inputFile of inputFiles) {
7481
- const absolutePath = import_node_path15.default.resolve(inputFile);
7277
+ const absolutePath = import_node_path14.default.resolve(inputFile);
7482
7278
  if (!unique.has(absolutePath)) {
7483
7279
  unique.set(absolutePath, absolutePath);
7484
7280
  }
@@ -7492,7 +7288,7 @@ function formatFileList(files, template) {
7492
7288
  const formatter = template ?? "{path}";
7493
7289
  return files.map((filePath) => {
7494
7290
  const escapedPath = shellEscape(filePath);
7495
- const escapedName = shellEscape(import_node_path15.default.basename(filePath));
7291
+ const escapedName = shellEscape(import_node_path14.default.basename(filePath));
7496
7292
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
7497
7293
  }).join(" ");
7498
7294
  }
@@ -7516,7 +7312,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
7516
7312
  const safeEvalId = evalCaseId || "unknown";
7517
7313
  const timestamp = Date.now();
7518
7314
  const random = Math.random().toString(36).substring(2, 9);
7519
- return import_node_path15.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
7315
+ return import_node_path14.default.join(import_node_os.default.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
7520
7316
  }
7521
7317
  function formatTimeoutSuffix2(timeoutMs) {
7522
7318
  if (!timeoutMs || timeoutMs <= 0) {
@@ -7529,8 +7325,8 @@ function formatTimeoutSuffix2(timeoutMs) {
7529
7325
  // src/evaluation/providers/codex.ts
7530
7326
  var import_node_crypto3 = require("crypto");
7531
7327
  var import_node_fs6 = require("fs");
7532
- var import_promises14 = require("fs/promises");
7533
- var import_node_path16 = __toESM(require("path"), 1);
7328
+ var import_promises13 = require("fs/promises");
7329
+ var import_node_path15 = __toESM(require("path"), 1);
7534
7330
 
7535
7331
  // src/evaluation/providers/codex-log-tracker.ts
7536
7332
  var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
@@ -7765,10 +7561,10 @@ ${basePrompt}` : basePrompt;
7765
7561
  }
7766
7562
  resolveCwd(cwdOverride) {
7767
7563
  if (cwdOverride) {
7768
- return import_node_path16.default.resolve(cwdOverride);
7564
+ return import_node_path15.default.resolve(cwdOverride);
7769
7565
  }
7770
7566
  if (this.config.cwd) {
7771
- return import_node_path16.default.resolve(this.config.cwd);
7567
+ return import_node_path15.default.resolve(this.config.cwd);
7772
7568
  }
7773
7569
  return void 0;
7774
7570
  }
@@ -7778,9 +7574,9 @@ ${basePrompt}` : basePrompt;
7778
7574
  return void 0;
7779
7575
  }
7780
7576
  if (this.config.logDir) {
7781
- return import_node_path16.default.resolve(this.config.logDir);
7577
+ return import_node_path15.default.resolve(this.config.logDir);
7782
7578
  }
7783
- return import_node_path16.default.join(process.cwd(), ".agentv", "logs", "codex");
7579
+ return import_node_path15.default.join(process.cwd(), ".agentv", "logs", "codex");
7784
7580
  }
7785
7581
  async createStreamLogger(request) {
7786
7582
  const logDir = this.resolveLogDirectory();
@@ -7788,13 +7584,13 @@ ${basePrompt}` : basePrompt;
7788
7584
  return void 0;
7789
7585
  }
7790
7586
  try {
7791
- await (0, import_promises14.mkdir)(logDir, { recursive: true });
7587
+ await (0, import_promises13.mkdir)(logDir, { recursive: true });
7792
7588
  } catch (error) {
7793
7589
  const message = error instanceof Error ? error.message : String(error);
7794
7590
  console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
7795
7591
  return void 0;
7796
7592
  }
7797
- const filePath = import_node_path16.default.join(logDir, buildLogFilename3(request, this.targetName));
7593
+ const filePath = import_node_path15.default.join(logDir, buildLogFilename3(request, this.targetName));
7798
7594
  try {
7799
7595
  const logger = await CodexSdkStreamLogger.create({
7800
7596
  filePath,
@@ -7937,8 +7733,8 @@ function formatElapsed3(startedAt) {
7937
7733
 
7938
7734
  // src/evaluation/providers/copilot-cli.ts
7939
7735
  var import_node_crypto5 = require("crypto");
7940
- var import_promises15 = require("fs/promises");
7941
- var import_node_path18 = __toESM(require("path"), 1);
7736
+ var import_promises14 = require("fs/promises");
7737
+ var import_node_path17 = __toESM(require("path"), 1);
7942
7738
  var import_node_stream = require("stream");
7943
7739
  var import_node_child_process3 = require("child_process");
7944
7740
  var acp = __toESM(require("@agentclientprotocol/sdk"), 1);
@@ -8000,7 +7796,7 @@ function subscribeToCopilotCliLogEntries(listener) {
8000
7796
  var import_node_crypto4 = require("crypto");
8001
7797
  var import_node_fs7 = require("fs");
8002
7798
  var import_node_os2 = require("os");
8003
- var import_node_path17 = __toESM(require("path"), 1);
7799
+ var import_node_path16 = __toESM(require("path"), 1);
8004
7800
  var import_node_url2 = require("url");
8005
7801
  var import_meta = {};
8006
7802
  function resolvePlatformCliPath() {
@@ -8025,7 +7821,7 @@ function resolvePlatformCliPath() {
8025
7821
  try {
8026
7822
  const resolved = import_meta.resolve(`${packageName}/package.json`);
8027
7823
  const packageJsonPath = resolved.startsWith("file:") ? (0, import_node_url2.fileURLToPath)(resolved) : resolved;
8028
- const binaryPath = import_node_path17.default.join(import_node_path17.default.dirname(packageJsonPath), binaryName);
7824
+ const binaryPath = import_node_path16.default.join(import_node_path16.default.dirname(packageJsonPath), binaryName);
8029
7825
  if ((0, import_node_fs7.existsSync)(binaryPath)) {
8030
7826
  return binaryPath;
8031
7827
  }
@@ -8033,7 +7829,7 @@ function resolvePlatformCliPath() {
8033
7829
  }
8034
7830
  let searchDir = process.cwd();
8035
7831
  for (let i = 0; i < 10; i++) {
8036
- const standardPath = import_node_path17.default.join(
7832
+ const standardPath = import_node_path16.default.join(
8037
7833
  searchDir,
8038
7834
  "node_modules",
8039
7835
  ...packageName.split("/"),
@@ -8042,13 +7838,13 @@ function resolvePlatformCliPath() {
8042
7838
  if ((0, import_node_fs7.existsSync)(standardPath)) {
8043
7839
  return standardPath;
8044
7840
  }
8045
- const bunDir = import_node_path17.default.join(searchDir, "node_modules", ".bun");
7841
+ const bunDir = import_node_path16.default.join(searchDir, "node_modules", ".bun");
8046
7842
  const prefix = `@github+copilot-${osPart}-${archPart}@`;
8047
7843
  try {
8048
7844
  const entries = (0, import_node_fs7.readdirSync)(bunDir);
8049
7845
  for (const entry of entries) {
8050
7846
  if (entry.startsWith(prefix)) {
8051
- const candidate = import_node_path17.default.join(
7847
+ const candidate = import_node_path16.default.join(
8052
7848
  bunDir,
8053
7849
  entry,
8054
7850
  "node_modules",
@@ -8063,7 +7859,7 @@ function resolvePlatformCliPath() {
8063
7859
  }
8064
7860
  } catch {
8065
7861
  }
8066
- const parent = import_node_path17.default.dirname(searchDir);
7862
+ const parent = import_node_path16.default.dirname(searchDir);
8067
7863
  if (parent === searchDir) break;
8068
7864
  searchDir = parent;
8069
7865
  }
@@ -8401,10 +8197,10 @@ var CopilotCliProvider = class {
8401
8197
  }
8402
8198
  resolveCwd(cwdOverride) {
8403
8199
  if (cwdOverride) {
8404
- return import_node_path18.default.resolve(cwdOverride);
8200
+ return import_node_path17.default.resolve(cwdOverride);
8405
8201
  }
8406
8202
  if (this.config.cwd) {
8407
- return import_node_path18.default.resolve(this.config.cwd);
8203
+ return import_node_path17.default.resolve(this.config.cwd);
8408
8204
  }
8409
8205
  return void 0;
8410
8206
  }
@@ -8423,9 +8219,9 @@ var CopilotCliProvider = class {
8423
8219
  return void 0;
8424
8220
  }
8425
8221
  if (this.config.logDir) {
8426
- return import_node_path18.default.resolve(this.config.logDir);
8222
+ return import_node_path17.default.resolve(this.config.logDir);
8427
8223
  }
8428
- return import_node_path18.default.join(process.cwd(), ".agentv", "logs", "copilot-cli");
8224
+ return import_node_path17.default.join(process.cwd(), ".agentv", "logs", "copilot-cli");
8429
8225
  }
8430
8226
  async createStreamLogger(request) {
8431
8227
  const logDir = this.resolveLogDirectory();
@@ -8433,13 +8229,13 @@ var CopilotCliProvider = class {
8433
8229
  return void 0;
8434
8230
  }
8435
8231
  try {
8436
- await (0, import_promises15.mkdir)(logDir, { recursive: true });
8232
+ await (0, import_promises14.mkdir)(logDir, { recursive: true });
8437
8233
  } catch (error) {
8438
8234
  const message = error instanceof Error ? error.message : String(error);
8439
8235
  console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
8440
8236
  return void 0;
8441
8237
  }
8442
- const filePath = import_node_path18.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
8238
+ const filePath = import_node_path17.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
8443
8239
  try {
8444
8240
  const logger = await CopilotStreamLogger.create(
8445
8241
  {
@@ -8533,8 +8329,8 @@ function summarizeAcpEvent(eventType, data) {
8533
8329
 
8534
8330
  // src/evaluation/providers/copilot-sdk.ts
8535
8331
  var import_node_crypto6 = require("crypto");
8536
- var import_promises16 = require("fs/promises");
8537
- var import_node_path19 = __toESM(require("path"), 1);
8332
+ var import_promises15 = require("fs/promises");
8333
+ var import_node_path18 = __toESM(require("path"), 1);
8538
8334
 
8539
8335
  // src/evaluation/providers/copilot-sdk-log-tracker.ts
8540
8336
  var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
@@ -8813,10 +8609,10 @@ var CopilotSdkProvider = class {
8813
8609
  }
8814
8610
  resolveCwd(cwdOverride) {
8815
8611
  if (cwdOverride) {
8816
- return import_node_path19.default.resolve(cwdOverride);
8612
+ return import_node_path18.default.resolve(cwdOverride);
8817
8613
  }
8818
8614
  if (this.config.cwd) {
8819
- return import_node_path19.default.resolve(this.config.cwd);
8615
+ return import_node_path18.default.resolve(this.config.cwd);
8820
8616
  }
8821
8617
  return void 0;
8822
8618
  }
@@ -8825,9 +8621,9 @@ var CopilotSdkProvider = class {
8825
8621
  return void 0;
8826
8622
  }
8827
8623
  if (this.config.logDir) {
8828
- return import_node_path19.default.resolve(this.config.logDir);
8624
+ return import_node_path18.default.resolve(this.config.logDir);
8829
8625
  }
8830
- return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
8626
+ return import_node_path18.default.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
8831
8627
  }
8832
8628
  async createStreamLogger(request) {
8833
8629
  const logDir = this.resolveLogDirectory();
@@ -8835,13 +8631,13 @@ var CopilotSdkProvider = class {
8835
8631
  return void 0;
8836
8632
  }
8837
8633
  try {
8838
- await (0, import_promises16.mkdir)(logDir, { recursive: true });
8634
+ await (0, import_promises15.mkdir)(logDir, { recursive: true });
8839
8635
  } catch (error) {
8840
8636
  const message = error instanceof Error ? error.message : String(error);
8841
8637
  console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
8842
8638
  return void 0;
8843
8639
  }
8844
- const filePath = import_node_path19.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
8640
+ const filePath = import_node_path18.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
8845
8641
  try {
8846
8642
  const logger = await CopilotStreamLogger.create(
8847
8643
  {
@@ -8918,8 +8714,7 @@ var MockProvider = class {
8918
8714
  return {
8919
8715
  output: [{ role: "assistant", content: this.cannedResponse }],
8920
8716
  raw: {
8921
- question: request.question,
8922
- guidelines: request.guidelines
8717
+ question: request.question
8923
8718
  }
8924
8719
  };
8925
8720
  }
@@ -9195,9 +8990,9 @@ function extractToolCalls3(content, toolTrackers, completedToolResults) {
9195
8990
  var import_node_child_process4 = require("child_process");
9196
8991
  var import_node_crypto7 = require("crypto");
9197
8992
  var import_node_fs8 = require("fs");
9198
- var import_promises17 = require("fs/promises");
8993
+ var import_promises16 = require("fs/promises");
9199
8994
  var import_node_os3 = require("os");
9200
- var import_node_path20 = __toESM(require("path"), 1);
8995
+ var import_node_path19 = __toESM(require("path"), 1);
9201
8996
 
9202
8997
  // src/evaluation/providers/pi-log-tracker.ts
9203
8998
  var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
@@ -9278,8 +9073,8 @@ var PiCodingAgentProvider = class {
9278
9073
  const workspaceRoot = await this.createWorkspace();
9279
9074
  const logger = await this.createStreamLogger(request).catch(() => void 0);
9280
9075
  try {
9281
- const promptFile = import_node_path20.default.join(workspaceRoot, PROMPT_FILENAME);
9282
- await (0, import_promises17.writeFile)(promptFile, request.question, "utf8");
9076
+ const promptFile = import_node_path19.default.join(workspaceRoot, PROMPT_FILENAME);
9077
+ await (0, import_promises16.writeFile)(promptFile, request.question, "utf8");
9283
9078
  const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
9284
9079
  const cwd = this.resolveCwd(workspaceRoot, request.cwd);
9285
9080
  const result = await this.executePi(args, cwd, request.signal, logger);
@@ -9340,12 +9135,12 @@ var PiCodingAgentProvider = class {
9340
9135
  }
9341
9136
  resolveCwd(workspaceRoot, cwdOverride) {
9342
9137
  if (cwdOverride) {
9343
- return import_node_path20.default.resolve(cwdOverride);
9138
+ return import_node_path19.default.resolve(cwdOverride);
9344
9139
  }
9345
9140
  if (!this.config.cwd) {
9346
9141
  return workspaceRoot;
9347
9142
  }
9348
- return import_node_path20.default.resolve(this.config.cwd);
9143
+ return import_node_path19.default.resolve(this.config.cwd);
9349
9144
  }
9350
9145
  buildPiArgs(prompt, inputFiles, _captureFileChanges) {
9351
9146
  const args = [];
@@ -9434,19 +9229,19 @@ ${prompt}` : prompt;
9434
9229
  return env;
9435
9230
  }
9436
9231
  async createWorkspace() {
9437
- return await (0, import_promises17.mkdtemp)(import_node_path20.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX));
9232
+ return await (0, import_promises16.mkdtemp)(import_node_path19.default.join((0, import_node_os3.tmpdir)(), WORKSPACE_PREFIX));
9438
9233
  }
9439
9234
  async cleanupWorkspace(workspaceRoot) {
9440
9235
  try {
9441
- await (0, import_promises17.rm)(workspaceRoot, { recursive: true, force: true });
9236
+ await (0, import_promises16.rm)(workspaceRoot, { recursive: true, force: true });
9442
9237
  } catch {
9443
9238
  }
9444
9239
  }
9445
9240
  resolveLogDirectory() {
9446
9241
  if (this.config.logDir) {
9447
- return import_node_path20.default.resolve(this.config.logDir);
9242
+ return import_node_path19.default.resolve(this.config.logDir);
9448
9243
  }
9449
- return import_node_path20.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
9244
+ return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
9450
9245
  }
9451
9246
  async createStreamLogger(request) {
9452
9247
  const logDir = this.resolveLogDirectory();
@@ -9454,13 +9249,13 @@ ${prompt}` : prompt;
9454
9249
  return void 0;
9455
9250
  }
9456
9251
  try {
9457
- await (0, import_promises17.mkdir)(logDir, { recursive: true });
9252
+ await (0, import_promises16.mkdir)(logDir, { recursive: true });
9458
9253
  } catch (error) {
9459
9254
  const message = error instanceof Error ? error.message : String(error);
9460
9255
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
9461
9256
  return void 0;
9462
9257
  }
9463
- const filePath = import_node_path20.default.join(logDir, buildLogFilename5(request, this.targetName));
9258
+ const filePath = import_node_path19.default.join(logDir, buildLogFilename5(request, this.targetName));
9464
9259
  try {
9465
9260
  const logger = await PiStreamLogger.create({
9466
9261
  filePath,
@@ -9959,7 +9754,7 @@ var ProviderRegistry = class {
9959
9754
  };
9960
9755
 
9961
9756
  // src/evaluation/providers/targets.ts
9962
- var import_node_path21 = __toESM(require("path"), 1);
9757
+ var import_node_path20 = __toESM(require("path"), 1);
9963
9758
  var import_zod3 = require("zod");
9964
9759
  var CliHealthcheckHttpInputSchema = import_zod3.z.object({
9965
9760
  url: import_zod3.z.string().min(1, "healthcheck URL is required"),
@@ -10056,11 +9851,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
10056
9851
  allowLiteral: true,
10057
9852
  optionalEnv: true
10058
9853
  });
10059
- if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
10060
- cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
9854
+ if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
9855
+ cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
10061
9856
  }
10062
9857
  if (!cwd && evalFilePath) {
10063
- cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
9858
+ cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
10064
9859
  }
10065
9860
  return {
10066
9861
  command,
@@ -10083,15 +9878,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
10083
9878
  optionalEnv: true
10084
9879
  }
10085
9880
  );
10086
- if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10087
- workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
9881
+ if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
9882
+ workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10088
9883
  }
10089
9884
  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
10090
9885
  allowLiteral: true,
10091
9886
  optionalEnv: true
10092
9887
  });
10093
- if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
10094
- cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
9888
+ if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
9889
+ cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
10095
9890
  }
10096
9891
  if (cwd && workspaceTemplate) {
10097
9892
  throw new Error(
@@ -10099,7 +9894,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
10099
9894
  );
10100
9895
  }
10101
9896
  if (!cwd && !workspaceTemplate && evalFilePath) {
10102
- cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
9897
+ cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
10103
9898
  }
10104
9899
  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
10105
9900
  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
@@ -10122,7 +9917,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
10122
9917
  var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
10123
9918
  "PROMPT",
10124
9919
  "PROMPT_FILE",
10125
- "GUIDELINES",
10126
9920
  "EVAL_ID",
10127
9921
  "ATTEMPT",
10128
9922
  "FILES",
@@ -10517,8 +10311,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
10517
10311
  optionalEnv: true
10518
10312
  }
10519
10313
  );
10520
- if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10521
- workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10314
+ if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10315
+ workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10522
10316
  }
10523
10317
  if (cwd && workspaceTemplate) {
10524
10318
  throw new Error(
@@ -10602,8 +10396,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
10602
10396
  optionalEnv: true
10603
10397
  }
10604
10398
  );
10605
- if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10606
- workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10399
+ if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10400
+ workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10607
10401
  }
10608
10402
  if (cwd && workspaceTemplate) {
10609
10403
  throw new Error(
@@ -10667,8 +10461,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
10667
10461
  optionalEnv: true
10668
10462
  }
10669
10463
  );
10670
- if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10671
- workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10464
+ if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10465
+ workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10672
10466
  }
10673
10467
  if (cwd && workspaceTemplate) {
10674
10468
  throw new Error(
@@ -10758,8 +10552,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10758
10552
  optionalEnv: true
10759
10553
  }
10760
10554
  );
10761
- if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10762
- workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10555
+ if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10556
+ workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10763
10557
  }
10764
10558
  if (cwd && workspaceTemplate) {
10765
10559
  throw new Error(
@@ -10847,8 +10641,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
10847
10641
  optionalEnv: true
10848
10642
  }
10849
10643
  );
10850
- if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10851
- workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10644
+ if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10645
+ workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10852
10646
  }
10853
10647
  if (cwd && workspaceTemplate) {
10854
10648
  throw new Error(
@@ -10906,8 +10700,8 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
10906
10700
  optionalEnv: true
10907
10701
  }
10908
10702
  ) : void 0;
10909
- if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10910
- workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10703
+ if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10704
+ workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10911
10705
  }
10912
10706
  const executableSource = target.executable;
10913
10707
  const waitSource = target.wait;
@@ -10948,8 +10742,8 @@ function resolveCliConfig(target, env, evalFilePath) {
10948
10742
  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
10949
10743
  if (!parseResult.success) {
10950
10744
  const firstError = parseResult.error.errors[0];
10951
- const path48 = firstError?.path.join(".") || "";
10952
- const prefix = path48 ? `${target.name} ${path48}: ` : `${target.name}: `;
10745
+ const path47 = firstError?.path.join(".") || "";
10746
+ const prefix = path47 ? `${target.name} ${path47}: ` : `${target.name}: `;
10953
10747
  throw new Error(`${prefix}${firstError?.message}`);
10954
10748
  }
10955
10749
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -10970,11 +10764,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
10970
10764
  allowLiteral: true,
10971
10765
  optionalEnv: true
10972
10766
  });
10973
- if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
10974
- cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
10767
+ if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
10768
+ cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
10975
10769
  }
10976
10770
  if (!cwd && evalFilePath) {
10977
- cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
10771
+ cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
10978
10772
  }
10979
10773
  return {
10980
10774
  command,
@@ -11158,40 +10952,40 @@ function resolveOptionalNumberArray(source, description) {
11158
10952
 
11159
10953
  // src/evaluation/providers/vscode-provider.ts
11160
10954
  var import_node_child_process6 = require("child_process");
11161
- var import_promises24 = require("fs/promises");
11162
- var import_node_path33 = __toESM(require("path"), 1);
10955
+ var import_promises23 = require("fs/promises");
10956
+ var import_node_path32 = __toESM(require("path"), 1);
11163
10957
  var import_node_util3 = require("util");
11164
10958
 
11165
10959
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
11166
- var import_promises22 = require("fs/promises");
11167
- var import_node_path31 = __toESM(require("path"), 1);
10960
+ var import_promises21 = require("fs/promises");
10961
+ var import_node_path30 = __toESM(require("path"), 1);
11168
10962
 
11169
10963
  // src/evaluation/providers/vscode/utils/fs.ts
11170
10964
  var import_node_fs9 = require("fs");
11171
- var import_promises18 = require("fs/promises");
11172
- var import_node_path22 = __toESM(require("path"), 1);
10965
+ var import_promises17 = require("fs/promises");
10966
+ var import_node_path21 = __toESM(require("path"), 1);
11173
10967
  async function pathExists(target) {
11174
10968
  try {
11175
- await (0, import_promises18.access)(target, import_node_fs9.constants.F_OK);
10969
+ await (0, import_promises17.access)(target, import_node_fs9.constants.F_OK);
11176
10970
  return true;
11177
10971
  } catch {
11178
10972
  return false;
11179
10973
  }
11180
10974
  }
11181
10975
  async function ensureDir(target) {
11182
- await (0, import_promises18.mkdir)(target, { recursive: true });
10976
+ await (0, import_promises17.mkdir)(target, { recursive: true });
11183
10977
  }
11184
10978
  async function readDirEntries(target) {
11185
- const entries = await (0, import_promises18.readdir)(target, { withFileTypes: true });
10979
+ const entries = await (0, import_promises17.readdir)(target, { withFileTypes: true });
11186
10980
  return entries.map((entry) => ({
11187
10981
  name: entry.name,
11188
- absolutePath: import_node_path22.default.join(target, entry.name),
10982
+ absolutePath: import_node_path21.default.join(target, entry.name),
11189
10983
  isDirectory: entry.isDirectory()
11190
10984
  }));
11191
10985
  }
11192
10986
  async function removeIfExists(target) {
11193
10987
  try {
11194
- await (0, import_promises18.rm)(target, { force: true, recursive: false });
10988
+ await (0, import_promises17.rm)(target, { force: true, recursive: false });
11195
10989
  } catch (error) {
11196
10990
  if (error.code !== "ENOENT") {
11197
10991
  throw error;
@@ -11200,9 +10994,9 @@ async function removeIfExists(target) {
11200
10994
  }
11201
10995
 
11202
10996
  // src/evaluation/providers/vscode/utils/path.ts
11203
- var import_node_path23 = __toESM(require("path"), 1);
10997
+ var import_node_path22 = __toESM(require("path"), 1);
11204
10998
  function pathToFileUri2(filePath) {
11205
- const absolutePath = import_node_path23.default.isAbsolute(filePath) ? filePath : import_node_path23.default.resolve(filePath);
10999
+ const absolutePath = import_node_path22.default.isAbsolute(filePath) ? filePath : import_node_path22.default.resolve(filePath);
11206
11000
  const normalizedPath = absolutePath.replace(/\\/g, "/");
11207
11001
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
11208
11002
  return `file:///${normalizedPath}`;
@@ -11211,7 +11005,7 @@ function pathToFileUri2(filePath) {
11211
11005
  }
11212
11006
 
11213
11007
  // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
11214
- var import_node_path24 = __toESM(require("path"), 1);
11008
+ var import_node_path23 = __toESM(require("path"), 1);
11215
11009
 
11216
11010
  // src/evaluation/providers/vscode/utils/template.ts
11217
11011
  function renderTemplate2(content, variables) {
@@ -11303,8 +11097,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
11303
11097
  });
11304
11098
  }
11305
11099
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
11306
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path24.default.basename(file)}`).join("\n");
11307
- const responseList = responseFiles.map((file) => `"${import_node_path24.default.basename(file)}"`).join(", ");
11100
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path23.default.basename(file)}`).join("\n");
11101
+ const responseList = responseFiles.map((file) => `"${import_node_path23.default.basename(file)}"`).join(", ");
11308
11102
  return renderTemplate2(templateContent, {
11309
11103
  requestFiles: requestLines,
11310
11104
  responseList
@@ -11312,8 +11106,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
11312
11106
  }
11313
11107
 
11314
11108
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
11315
- var import_promises19 = require("fs/promises");
11316
- var import_node_path25 = __toESM(require("path"), 1);
11109
+ var import_promises18 = require("fs/promises");
11110
+ var import_node_path24 = __toESM(require("path"), 1);
11317
11111
 
11318
11112
  // src/evaluation/providers/vscode/utils/time.ts
11319
11113
  function sleep2(ms) {
@@ -11351,7 +11145,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
11351
11145
  const maxAttempts = 10;
11352
11146
  while (attempts < maxAttempts) {
11353
11147
  try {
11354
- const content = await (0, import_promises19.readFile)(responseFileFinal, { encoding: "utf8" });
11148
+ const content = await (0, import_promises18.readFile)(responseFileFinal, { encoding: "utf8" });
11355
11149
  if (!silent) {
11356
11150
  process.stdout.write(`${content}
11357
11151
  `);
@@ -11372,7 +11166,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
11372
11166
  }
11373
11167
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
11374
11168
  if (!silent) {
11375
- const fileList = responseFilesFinal.map((file) => import_node_path25.default.basename(file)).join(", ");
11169
+ const fileList = responseFilesFinal.map((file) => import_node_path24.default.basename(file)).join(", ");
11376
11170
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
11377
11171
  }
11378
11172
  const deadline = Date.now() + timeoutMs;
@@ -11381,7 +11175,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
11381
11175
  while (pending.size > 0) {
11382
11176
  if (Date.now() >= deadline) {
11383
11177
  if (!silent) {
11384
- const remaining = [...pending].map((f) => import_node_path25.default.basename(f)).join(", ");
11178
+ const remaining = [...pending].map((f) => import_node_path24.default.basename(f)).join(", ");
11385
11179
  console.error(
11386
11180
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
11387
11181
  );
@@ -11408,7 +11202,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
11408
11202
  const maxAttempts = 10;
11409
11203
  while (attempts < maxAttempts) {
11410
11204
  try {
11411
- const content = await (0, import_promises19.readFile)(file, { encoding: "utf8" });
11205
+ const content = await (0, import_promises18.readFile)(file, { encoding: "utf8" });
11412
11206
  if (!silent) {
11413
11207
  process.stdout.write(`${content}
11414
11208
  `);
@@ -11431,16 +11225,16 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
11431
11225
 
11432
11226
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
11433
11227
  var import_node_child_process5 = require("child_process");
11434
- var import_promises20 = require("fs/promises");
11435
- var import_node_path28 = __toESM(require("path"), 1);
11228
+ var import_promises19 = require("fs/promises");
11229
+ var import_node_path27 = __toESM(require("path"), 1);
11436
11230
  var import_node_util2 = require("util");
11437
11231
 
11438
11232
  // src/evaluation/providers/vscode/dispatch/constants.ts
11439
- var import_node_path27 = __toESM(require("path"), 1);
11233
+ var import_node_path26 = __toESM(require("path"), 1);
11440
11234
 
11441
11235
  // src/paths.ts
11442
11236
  var import_node_os4 = __toESM(require("os"), 1);
11443
- var import_node_path26 = __toESM(require("path"), 1);
11237
+ var import_node_path25 = __toESM(require("path"), 1);
11444
11238
  var logged = false;
11445
11239
  function getAgentvHome() {
11446
11240
  const envHome = process.env.AGENTV_HOME;
@@ -11451,19 +11245,19 @@ function getAgentvHome() {
11451
11245
  }
11452
11246
  return envHome;
11453
11247
  }
11454
- return import_node_path26.default.join(import_node_os4.default.homedir(), ".agentv");
11248
+ return import_node_path25.default.join(import_node_os4.default.homedir(), ".agentv");
11455
11249
  }
11456
11250
  function getWorkspacesRoot() {
11457
- return import_node_path26.default.join(getAgentvHome(), "workspaces");
11251
+ return import_node_path25.default.join(getAgentvHome(), "workspaces");
11458
11252
  }
11459
11253
  function getSubagentsRoot() {
11460
- return import_node_path26.default.join(getAgentvHome(), "subagents");
11254
+ return import_node_path25.default.join(getAgentvHome(), "subagents");
11461
11255
  }
11462
11256
  function getTraceStateRoot() {
11463
- return import_node_path26.default.join(getAgentvHome(), "trace-state");
11257
+ return import_node_path25.default.join(getAgentvHome(), "trace-state");
11464
11258
  }
11465
11259
  function getWorkspacePoolRoot() {
11466
- return import_node_path26.default.join(getAgentvHome(), "workspace-pool");
11260
+ return import_node_path25.default.join(getAgentvHome(), "workspace-pool");
11467
11261
  }
11468
11262
 
11469
11263
  // src/evaluation/providers/vscode/dispatch/constants.ts
@@ -11471,7 +11265,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
11471
11265
  var DEFAULT_ALIVE_FILENAME = ".alive";
11472
11266
  function getDefaultSubagentRoot(vscodeCmd = "code") {
11473
11267
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
11474
- return import_node_path27.default.join(getSubagentsRoot(), folder);
11268
+ return import_node_path26.default.join(getSubagentsRoot(), folder);
11475
11269
  }
11476
11270
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
11477
11271
 
@@ -11538,12 +11332,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
11538
11332
  await raceSpawnError(child);
11539
11333
  return true;
11540
11334
  }
11541
- const aliveFile = import_node_path28.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
11335
+ const aliveFile = import_node_path27.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
11542
11336
  await removeIfExists(aliveFile);
11543
- const githubAgentsDir = import_node_path28.default.join(subagentDir, ".github", "agents");
11544
- await (0, import_promises20.mkdir)(githubAgentsDir, { recursive: true });
11545
- const wakeupDst = import_node_path28.default.join(githubAgentsDir, "wakeup.md");
11546
- await (0, import_promises20.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
11337
+ const githubAgentsDir = import_node_path27.default.join(subagentDir, ".github", "agents");
11338
+ await (0, import_promises19.mkdir)(githubAgentsDir, { recursive: true });
11339
+ const wakeupDst = import_node_path27.default.join(githubAgentsDir, "wakeup.md");
11340
+ await (0, import_promises19.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
11547
11341
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
11548
11342
  label: "open-workspace"
11549
11343
  });
@@ -11555,7 +11349,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
11555
11349
  "chat",
11556
11350
  "-m",
11557
11351
  wakeupChatId,
11558
- `create a file named .alive in the ${import_node_path28.default.basename(subagentDir)} folder`
11352
+ `create a file named .alive in the ${import_node_path27.default.basename(subagentDir)} folder`
11559
11353
  ];
11560
11354
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
11561
11355
  await raceSpawnError(wakeupChild);
@@ -11570,27 +11364,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
11570
11364
  return true;
11571
11365
  }
11572
11366
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
11573
- const workspacePath = import_node_path28.default.join(subagentDir, `${import_node_path28.default.basename(subagentDir)}.code-workspace`);
11574
- const messagesDir = import_node_path28.default.join(subagentDir, "messages");
11575
- await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
11576
- const reqFile = import_node_path28.default.join(messagesDir, `${timestamp}_req.md`);
11577
- await (0, import_promises20.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
11367
+ const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
11368
+ const messagesDir = import_node_path27.default.join(subagentDir, "messages");
11369
+ await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
11370
+ const reqFile = import_node_path27.default.join(messagesDir, `${timestamp}_req.md`);
11371
+ await (0, import_promises19.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
11578
11372
  const reqUri = pathToFileUri2(reqFile);
11579
11373
  const chatArgs = ["-r", "chat", "-m", chatId];
11580
11374
  for (const attachment of attachmentPaths) {
11581
11375
  chatArgs.push("-a", attachment);
11582
11376
  }
11583
11377
  chatArgs.push("-a", reqFile);
11584
- chatArgs.push(`Follow instructions in [${import_node_path28.default.basename(reqFile)}](${reqUri})`);
11378
+ chatArgs.push(`Follow instructions in [${import_node_path27.default.basename(reqFile)}](${reqUri})`);
11585
11379
  const workspaceReady = await ensureWorkspaceFocused(
11586
11380
  workspacePath,
11587
- import_node_path28.default.basename(subagentDir),
11381
+ import_node_path27.default.basename(subagentDir),
11588
11382
  subagentDir,
11589
11383
  vscodeCmd
11590
11384
  );
11591
11385
  if (!workspaceReady) {
11592
11386
  throw new Error(
11593
- `VS Code workspace '${import_node_path28.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11387
+ `VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11594
11388
  );
11595
11389
  }
11596
11390
  await sleep2(500);
@@ -11598,9 +11392,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
11598
11392
  await raceSpawnError(child);
11599
11393
  }
11600
11394
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
11601
- const workspacePath = import_node_path28.default.join(subagentDir, `${import_node_path28.default.basename(subagentDir)}.code-workspace`);
11602
- const messagesDir = import_node_path28.default.join(subagentDir, "messages");
11603
- await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
11395
+ const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
11396
+ const messagesDir = import_node_path27.default.join(subagentDir, "messages");
11397
+ await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
11604
11398
  const chatArgs = ["-r", "chat", "-m", chatId];
11605
11399
  for (const attachment of attachmentPaths) {
11606
11400
  chatArgs.push("-a", attachment);
@@ -11608,13 +11402,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
11608
11402
  chatArgs.push(chatInstruction);
11609
11403
  const workspaceReady = await ensureWorkspaceFocused(
11610
11404
  workspacePath,
11611
- import_node_path28.default.basename(subagentDir),
11405
+ import_node_path27.default.basename(subagentDir),
11612
11406
  subagentDir,
11613
11407
  vscodeCmd
11614
11408
  );
11615
11409
  if (!workspaceReady) {
11616
11410
  throw new Error(
11617
- `VS Code workspace '${import_node_path28.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11411
+ `VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11618
11412
  );
11619
11413
  }
11620
11414
  await sleep2(500);
@@ -11623,11 +11417,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
11623
11417
  }
11624
11418
 
11625
11419
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
11626
- var import_promises21 = require("fs/promises");
11627
- var import_node_path30 = __toESM(require("path"), 1);
11420
+ var import_promises20 = require("fs/promises");
11421
+ var import_node_path29 = __toESM(require("path"), 1);
11628
11422
 
11629
11423
  // src/evaluation/providers/vscode/utils/workspace.ts
11630
- var import_node_path29 = __toESM(require("path"), 1);
11424
+ var import_node_path28 = __toESM(require("path"), 1);
11631
11425
  var import_json5 = __toESM(require("json5"), 1);
11632
11426
  function transformWorkspacePaths(workspaceContent, templateDir) {
11633
11427
  let workspace;
@@ -11644,10 +11438,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
11644
11438
  }
11645
11439
  const transformedFolders = workspace.folders.map((folder) => {
11646
11440
  const folderPath = folder.path;
11647
- if (import_node_path29.default.isAbsolute(folderPath)) {
11441
+ if (import_node_path28.default.isAbsolute(folderPath)) {
11648
11442
  return folder;
11649
11443
  }
11650
- const absolutePath = import_node_path29.default.resolve(templateDir, folderPath);
11444
+ const absolutePath = import_node_path28.default.resolve(templateDir, folderPath);
11651
11445
  return {
11652
11446
  ...folder,
11653
11447
  path: absolutePath
@@ -11669,19 +11463,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
11669
11463
  if (locationMap && typeof locationMap === "object") {
11670
11464
  const transformedMap = {};
11671
11465
  for (const [locationPath, value] of Object.entries(locationMap)) {
11672
- const isAbsolute = import_node_path29.default.isAbsolute(locationPath);
11466
+ const isAbsolute = import_node_path28.default.isAbsolute(locationPath);
11673
11467
  if (isAbsolute) {
11674
11468
  transformedMap[locationPath] = value;
11675
11469
  } else {
11676
11470
  const firstGlobIndex = locationPath.search(/[*]/);
11677
11471
  if (firstGlobIndex === -1) {
11678
- const resolvedPath = import_node_path29.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
11472
+ const resolvedPath = import_node_path28.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
11679
11473
  transformedMap[resolvedPath] = value;
11680
11474
  } else {
11681
11475
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
11682
11476
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
11683
11477
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
11684
- const resolvedPath = (import_node_path29.default.resolve(templateDir, basePath) + patternPath).replace(
11478
+ const resolvedPath = (import_node_path28.default.resolve(templateDir, basePath) + patternPath).replace(
11685
11479
  /\\/g,
11686
11480
  "/"
11687
11481
  );
@@ -11722,7 +11516,7 @@ async function findUnlockedSubagent(subagentRoot) {
11722
11516
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
11723
11517
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
11724
11518
  for (const subagent of subagents) {
11725
- const lockFile = import_node_path30.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
11519
+ const lockFile = import_node_path29.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
11726
11520
  if (!await pathExists(lockFile)) {
11727
11521
  return subagent.absolutePath;
11728
11522
  }
@@ -11732,26 +11526,26 @@ async function findUnlockedSubagent(subagentRoot) {
11732
11526
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
11733
11527
  let workspaceContent;
11734
11528
  if (workspaceTemplate) {
11735
- const workspaceSrc = import_node_path30.default.resolve(workspaceTemplate);
11529
+ const workspaceSrc = import_node_path29.default.resolve(workspaceTemplate);
11736
11530
  if (!await pathExists(workspaceSrc)) {
11737
11531
  throw new Error(`workspace template not found: ${workspaceSrc}`);
11738
11532
  }
11739
- const stats = await (0, import_promises21.stat)(workspaceSrc);
11533
+ const stats = await (0, import_promises20.stat)(workspaceSrc);
11740
11534
  if (!stats.isFile()) {
11741
11535
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
11742
11536
  }
11743
- const templateText = await (0, import_promises21.readFile)(workspaceSrc, "utf8");
11537
+ const templateText = await (0, import_promises20.readFile)(workspaceSrc, "utf8");
11744
11538
  workspaceContent = JSON.parse(templateText);
11745
11539
  } else {
11746
11540
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
11747
11541
  }
11748
- const workspaceName = `${import_node_path30.default.basename(subagentDir)}.code-workspace`;
11749
- const workspaceDst = import_node_path30.default.join(subagentDir, workspaceName);
11750
- const templateDir = workspaceTemplate ? import_node_path30.default.dirname(import_node_path30.default.resolve(workspaceTemplate)) : subagentDir;
11542
+ const workspaceName = `${import_node_path29.default.basename(subagentDir)}.code-workspace`;
11543
+ const workspaceDst = import_node_path29.default.join(subagentDir, workspaceName);
11544
+ const templateDir = workspaceTemplate ? import_node_path29.default.dirname(import_node_path29.default.resolve(workspaceTemplate)) : subagentDir;
11751
11545
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
11752
11546
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
11753
11547
  if (cwd) {
11754
- const absCwd = import_node_path30.default.resolve(cwd);
11548
+ const absCwd = import_node_path29.default.resolve(cwd);
11755
11549
  const parsed = JSON.parse(transformedContent);
11756
11550
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
11757
11551
  if (!alreadyPresent) {
@@ -11759,36 +11553,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
11759
11553
  transformedContent = JSON.stringify(parsed, null, 2);
11760
11554
  }
11761
11555
  }
11762
- await (0, import_promises21.writeFile)(workspaceDst, transformedContent, "utf8");
11763
- const messagesDir = import_node_path30.default.join(subagentDir, "messages");
11764
- await (0, import_promises21.mkdir)(messagesDir, { recursive: true });
11556
+ await (0, import_promises20.writeFile)(workspaceDst, transformedContent, "utf8");
11557
+ const messagesDir = import_node_path29.default.join(subagentDir, "messages");
11558
+ await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
11765
11559
  return { workspace: workspaceDst, messagesDir };
11766
11560
  }
11767
11561
  async function createSubagentLock(subagentDir) {
11768
- const messagesDir = import_node_path30.default.join(subagentDir, "messages");
11562
+ const messagesDir = import_node_path29.default.join(subagentDir, "messages");
11769
11563
  if (await pathExists(messagesDir)) {
11770
- const files = await (0, import_promises21.readdir)(messagesDir);
11564
+ const files = await (0, import_promises20.readdir)(messagesDir);
11771
11565
  await Promise.all(
11772
11566
  files.map(async (file) => {
11773
- const target = import_node_path30.default.join(messagesDir, file);
11567
+ const target = import_node_path29.default.join(messagesDir, file);
11774
11568
  await removeIfExists(target);
11775
11569
  })
11776
11570
  );
11777
11571
  }
11778
- const githubAgentsDir = import_node_path30.default.join(subagentDir, ".github", "agents");
11572
+ const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
11779
11573
  if (await pathExists(githubAgentsDir)) {
11780
- const agentFiles = await (0, import_promises21.readdir)(githubAgentsDir);
11574
+ const agentFiles = await (0, import_promises20.readdir)(githubAgentsDir);
11781
11575
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
11782
11576
  await Promise.all(
11783
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path30.default.join(githubAgentsDir, file)))
11577
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path29.default.join(githubAgentsDir, file)))
11784
11578
  );
11785
11579
  }
11786
- const lockFile = import_node_path30.default.join(subagentDir, DEFAULT_LOCK_NAME);
11787
- await (0, import_promises21.writeFile)(lockFile, "", { encoding: "utf8" });
11580
+ const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
11581
+ await (0, import_promises20.writeFile)(lockFile, "", { encoding: "utf8" });
11788
11582
  return lockFile;
11789
11583
  }
11790
11584
  async function removeSubagentLock(subagentDir) {
11791
- const lockFile = import_node_path30.default.join(subagentDir, DEFAULT_LOCK_NAME);
11585
+ const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
11792
11586
  await removeIfExists(lockFile);
11793
11587
  }
11794
11588
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -11808,11 +11602,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
11808
11602
  return 1;
11809
11603
  }
11810
11604
  if (promptFile) {
11811
- const githubAgentsDir = import_node_path30.default.join(subagentDir, ".github", "agents");
11812
- await (0, import_promises21.mkdir)(githubAgentsDir, { recursive: true });
11813
- const agentFile = import_node_path30.default.join(githubAgentsDir, `${chatId}.md`);
11605
+ const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
11606
+ await (0, import_promises20.mkdir)(githubAgentsDir, { recursive: true });
11607
+ const agentFile = import_node_path29.default.join(githubAgentsDir, `${chatId}.md`);
11814
11608
  try {
11815
- await (0, import_promises21.copyFile)(promptFile, agentFile);
11609
+ await (0, import_promises20.copyFile)(promptFile, agentFile);
11816
11610
  } catch (error) {
11817
11611
  console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
11818
11612
  return 1;
@@ -11829,11 +11623,11 @@ async function resolvePromptFile(promptFile) {
11829
11623
  if (!promptFile) {
11830
11624
  return void 0;
11831
11625
  }
11832
- const resolvedPrompt = import_node_path31.default.resolve(promptFile);
11626
+ const resolvedPrompt = import_node_path30.default.resolve(promptFile);
11833
11627
  if (!await pathExists(resolvedPrompt)) {
11834
11628
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
11835
11629
  }
11836
- const promptStats = await (0, import_promises22.stat)(resolvedPrompt);
11630
+ const promptStats = await (0, import_promises21.stat)(resolvedPrompt);
11837
11631
  if (!promptStats.isFile()) {
11838
11632
  throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
11839
11633
  }
@@ -11845,7 +11639,7 @@ async function resolveAttachments(extraAttachments) {
11845
11639
  }
11846
11640
  const resolved = [];
11847
11641
  for (const attachment of extraAttachments) {
11848
- const resolvedPath = import_node_path31.default.resolve(attachment);
11642
+ const resolvedPath = import_node_path30.default.resolve(attachment);
11849
11643
  if (!await pathExists(resolvedPath)) {
11850
11644
  throw new Error(`Attachment not found: ${resolvedPath}`);
11851
11645
  }
@@ -11887,7 +11681,7 @@ async function dispatchAgentSession(options) {
11887
11681
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
11888
11682
  };
11889
11683
  }
11890
- const subagentName = import_node_path31.default.basename(subagentDir);
11684
+ const subagentName = import_node_path30.default.basename(subagentDir);
11891
11685
  const chatId = Math.random().toString(16).slice(2, 10);
11892
11686
  const preparationResult = await prepareSubagentDirectory(
11893
11687
  subagentDir,
@@ -11915,9 +11709,9 @@ async function dispatchAgentSession(options) {
11915
11709
  };
11916
11710
  }
11917
11711
  const timestamp = generateTimestamp();
11918
- const messagesDir = import_node_path31.default.join(subagentDir, "messages");
11919
- const responseFileTmp = import_node_path31.default.join(messagesDir, `${timestamp}_res.tmp.md`);
11920
- const responseFileFinal = import_node_path31.default.join(messagesDir, `${timestamp}_res.md`);
11712
+ const messagesDir = import_node_path30.default.join(subagentDir, "messages");
11713
+ const responseFileTmp = import_node_path30.default.join(messagesDir, `${timestamp}_res.tmp.md`);
11714
+ const responseFileFinal = import_node_path30.default.join(messagesDir, `${timestamp}_res.md`);
11921
11715
  const requestInstructions = createRequestPrompt(
11922
11716
  userQuery,
11923
11717
  responseFileTmp,
@@ -12022,7 +11816,7 @@ async function dispatchBatchAgent(options) {
12022
11816
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
12023
11817
  };
12024
11818
  }
12025
- subagentName = import_node_path31.default.basename(subagentDir);
11819
+ subagentName = import_node_path30.default.basename(subagentDir);
12026
11820
  const chatId = Math.random().toString(16).slice(2, 10);
12027
11821
  const preparationResult = await prepareSubagentDirectory(
12028
11822
  subagentDir,
@@ -12053,24 +11847,24 @@ async function dispatchBatchAgent(options) {
12053
11847
  };
12054
11848
  }
12055
11849
  const timestamp = generateTimestamp();
12056
- const messagesDir = import_node_path31.default.join(subagentDir, "messages");
11850
+ const messagesDir = import_node_path30.default.join(subagentDir, "messages");
12057
11851
  requestFiles = userQueries.map(
12058
- (_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_req.md`)
11852
+ (_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_req.md`)
12059
11853
  );
12060
11854
  const responseTmpFiles = userQueries.map(
12061
- (_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
11855
+ (_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
12062
11856
  );
12063
11857
  responseFilesFinal = userQueries.map(
12064
- (_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_res.md`)
11858
+ (_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.md`)
12065
11859
  );
12066
- const orchestratorFile = import_node_path31.default.join(messagesDir, `${timestamp}_orchestrator.md`);
11860
+ const orchestratorFile = import_node_path30.default.join(messagesDir, `${timestamp}_orchestrator.md`);
12067
11861
  if (!dryRun) {
12068
11862
  await Promise.all(
12069
11863
  userQueries.map((query, index) => {
12070
11864
  const reqFile = requestFiles[index];
12071
11865
  const tmpFile = responseTmpFiles[index];
12072
11866
  const finalFile = responseFilesFinal[index];
12073
- return (0, import_promises22.writeFile)(
11867
+ return (0, import_promises21.writeFile)(
12074
11868
  reqFile,
12075
11869
  createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
12076
11870
  { encoding: "utf8" }
@@ -12082,7 +11876,7 @@ async function dispatchBatchAgent(options) {
12082
11876
  responseFilesFinal,
12083
11877
  orchestratorTemplateContent
12084
11878
  );
12085
- await (0, import_promises22.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
11879
+ await (0, import_promises21.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
12086
11880
  }
12087
11881
  const chatAttachments = [orchestratorFile, ...attachments];
12088
11882
  const orchestratorUri = pathToFileUri2(orchestratorFile);
@@ -12148,8 +11942,8 @@ async function dispatchBatchAgent(options) {
12148
11942
  }
12149
11943
 
12150
11944
  // src/evaluation/providers/vscode/dispatch/provision.ts
12151
- var import_promises23 = require("fs/promises");
12152
- var import_node_path32 = __toESM(require("path"), 1);
11945
+ var import_promises22 = require("fs/promises");
11946
+ var import_node_path31 = __toESM(require("path"), 1);
12153
11947
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
12154
11948
  folders: [
12155
11949
  {
@@ -12180,7 +11974,7 @@ async function provisionSubagents(options) {
12180
11974
  if (!Number.isInteger(subagents) || subagents < 1) {
12181
11975
  throw new Error("subagents must be a positive integer");
12182
11976
  }
12183
- const targetPath = import_node_path32.default.resolve(targetRoot);
11977
+ const targetPath = import_node_path31.default.resolve(targetRoot);
12184
11978
  if (!dryRun) {
12185
11979
  await ensureDir(targetPath);
12186
11980
  }
@@ -12200,7 +11994,7 @@ async function provisionSubagents(options) {
12200
11994
  continue;
12201
11995
  }
12202
11996
  highestNumber = Math.max(highestNumber, parsed);
12203
- const lockFile = import_node_path32.default.join(entry.absolutePath, lockName);
11997
+ const lockFile = import_node_path31.default.join(entry.absolutePath, lockName);
12204
11998
  const locked = await pathExists(lockFile);
12205
11999
  if (locked) {
12206
12000
  lockedSubagents.add(entry.absolutePath);
@@ -12217,10 +12011,10 @@ async function provisionSubagents(options) {
12217
12011
  break;
12218
12012
  }
12219
12013
  const subagentDir = subagent.absolutePath;
12220
- const githubAgentsDir = import_node_path32.default.join(subagentDir, ".github", "agents");
12221
- const lockFile = import_node_path32.default.join(subagentDir, lockName);
12222
- const workspaceDst = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
12223
- const wakeupDst = import_node_path32.default.join(githubAgentsDir, "wakeup.md");
12014
+ const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
12015
+ const lockFile = import_node_path31.default.join(subagentDir, lockName);
12016
+ const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
12017
+ const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
12224
12018
  const isLocked = await pathExists(lockFile);
12225
12019
  if (isLocked && !force) {
12226
12020
  continue;
@@ -12229,8 +12023,8 @@ async function provisionSubagents(options) {
12229
12023
  if (!dryRun) {
12230
12024
  await removeIfExists(lockFile);
12231
12025
  await ensureDir(githubAgentsDir);
12232
- await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12233
- await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12026
+ await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12027
+ await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12234
12028
  }
12235
12029
  created.push(subagentDir);
12236
12030
  lockedSubagents.delete(subagentDir);
@@ -12240,8 +12034,8 @@ async function provisionSubagents(options) {
12240
12034
  if (!isLocked && force) {
12241
12035
  if (!dryRun) {
12242
12036
  await ensureDir(githubAgentsDir);
12243
- await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12244
- await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12037
+ await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12038
+ await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12245
12039
  }
12246
12040
  created.push(subagentDir);
12247
12041
  subagentsProvisioned += 1;
@@ -12249,8 +12043,8 @@ async function provisionSubagents(options) {
12249
12043
  }
12250
12044
  if (!dryRun && !await pathExists(workspaceDst)) {
12251
12045
  await ensureDir(githubAgentsDir);
12252
- await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12253
- await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12046
+ await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12047
+ await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12254
12048
  }
12255
12049
  skippedExisting.push(subagentDir);
12256
12050
  subagentsProvisioned += 1;
@@ -12258,15 +12052,15 @@ async function provisionSubagents(options) {
12258
12052
  let nextIndex = highestNumber;
12259
12053
  while (subagentsProvisioned < subagents) {
12260
12054
  nextIndex += 1;
12261
- const subagentDir = import_node_path32.default.join(targetPath, `subagent-${nextIndex}`);
12262
- const githubAgentsDir = import_node_path32.default.join(subagentDir, ".github", "agents");
12263
- const workspaceDst = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
12264
- const wakeupDst = import_node_path32.default.join(githubAgentsDir, "wakeup.md");
12055
+ const subagentDir = import_node_path31.default.join(targetPath, `subagent-${nextIndex}`);
12056
+ const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
12057
+ const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
12058
+ const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
12265
12059
  if (!dryRun) {
12266
12060
  await ensureDir(subagentDir);
12267
12061
  await ensureDir(githubAgentsDir);
12268
- await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12269
- await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12062
+ await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12063
+ await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12270
12064
  }
12271
12065
  created.push(subagentDir);
12272
12066
  subagentsProvisioned += 1;
@@ -12328,7 +12122,7 @@ var VSCodeProvider = class {
12328
12122
  }
12329
12123
  await this.ensureEnvironmentReady();
12330
12124
  const inputFiles = normalizeAttachments(request.inputFiles);
12331
- const promptContent = buildPromptDocument2(request, inputFiles, request.guideline_patterns);
12125
+ const promptContent = buildPromptDocument2(request, inputFiles);
12332
12126
  const workspaceTemplate = request.workspaceFile ?? await resolveWorkspaceTemplateFile(this.config.workspaceTemplate);
12333
12127
  const startTime = Date.now();
12334
12128
  const session = await dispatchAgentSession({
@@ -12382,7 +12176,7 @@ var VSCodeProvider = class {
12382
12176
  normalizedRequests.map(({ inputFiles }) => inputFiles)
12383
12177
  );
12384
12178
  const userQueries = normalizedRequests.map(
12385
- ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles, request.guideline_patterns)
12179
+ ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
12386
12180
  );
12387
12181
  const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
12388
12182
  this.config.workspaceTemplate
@@ -12451,9 +12245,9 @@ var VSCodeProvider = class {
12451
12245
  async function locateVSCodeExecutable(candidate) {
12452
12246
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
12453
12247
  if (includesPathSeparator) {
12454
- const resolved = import_node_path33.default.isAbsolute(candidate) ? candidate : import_node_path33.default.resolve(candidate);
12248
+ const resolved = import_node_path32.default.isAbsolute(candidate) ? candidate : import_node_path32.default.resolve(candidate);
12455
12249
  try {
12456
- await (0, import_promises24.access)(resolved, import_promises24.constants.F_OK);
12250
+ await (0, import_promises23.access)(resolved, import_promises23.constants.F_OK);
12457
12251
  return resolved;
12458
12252
  } catch {
12459
12253
  throw new Error(
@@ -12466,7 +12260,7 @@ async function locateVSCodeExecutable(candidate) {
12466
12260
  const { stdout } = await execAsync3(`${locator} ${candidate}`);
12467
12261
  const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
12468
12262
  if (lines.length > 0 && lines[0]) {
12469
- await (0, import_promises24.access)(lines[0], import_promises24.constants.F_OK);
12263
+ await (0, import_promises23.access)(lines[0], import_promises23.constants.F_OK);
12470
12264
  return lines[0];
12471
12265
  }
12472
12266
  } catch {
@@ -12480,41 +12274,35 @@ async function resolveWorkspaceTemplateFile(template) {
12480
12274
  return void 0;
12481
12275
  }
12482
12276
  try {
12483
- const stats = await (0, import_promises24.stat)(import_node_path33.default.resolve(template));
12277
+ const stats = await (0, import_promises23.stat)(import_node_path32.default.resolve(template));
12484
12278
  return stats.isFile() ? template : void 0;
12485
12279
  } catch {
12486
12280
  return template;
12487
12281
  }
12488
12282
  }
12489
- function buildPromptDocument2(request, attachments, guidelinePatterns) {
12283
+ function buildPromptDocument2(request, attachments) {
12490
12284
  const parts = [];
12491
12285
  if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
12492
12286
  parts.push(request.systemPrompt.trim());
12493
12287
  }
12494
- const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
12495
12288
  const attachmentFiles = collectAttachmentFiles(attachments);
12496
- const nonGuidelineAttachments = attachmentFiles.filter((file) => !guidelineFiles.includes(file));
12497
- const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
12289
+ const prereadBlock = buildMandatoryPrereadBlock2(attachmentFiles);
12498
12290
  if (prereadBlock.length > 0) {
12499
12291
  parts.push("\n", prereadBlock);
12500
12292
  }
12501
12293
  parts.push("\n[[ ## user_query ## ]]\n", request.question.trim());
12502
12294
  return parts.join("\n").trim();
12503
12295
  }
12504
- function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
12505
- if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
12296
+ function buildMandatoryPrereadBlock2(attachmentFiles) {
12297
+ if (attachmentFiles.length === 0) {
12506
12298
  return "";
12507
12299
  }
12508
12300
  const buildList = (files) => files.map((absolutePath) => {
12509
- const fileName = import_node_path33.default.basename(absolutePath);
12301
+ const fileName = import_node_path32.default.basename(absolutePath);
12510
12302
  const fileUri = pathToFileUri3(absolutePath);
12511
12303
  return `* [${fileName}](${fileUri})`;
12512
12304
  });
12513
12305
  const sections = [];
12514
- if (guidelineFiles.length > 0) {
12515
- sections.push(`Read all guideline files:
12516
- ${buildList(guidelineFiles).join("\n")}.`);
12517
- }
12518
12306
  if (attachmentFiles.length > 0) {
12519
12307
  sections.push(`Read all attachment files:
12520
12308
  ${buildList(attachmentFiles).join("\n")}.`);
@@ -12525,29 +12313,13 @@ ${buildList(attachmentFiles).join("\n")}.`);
12525
12313
  );
12526
12314
  return sections.join("\n");
12527
12315
  }
12528
- function collectGuidelineFiles2(attachments, guidelinePatterns) {
12529
- if (!attachments || attachments.length === 0) {
12530
- return [];
12531
- }
12532
- const unique = /* @__PURE__ */ new Map();
12533
- for (const attachment of attachments) {
12534
- const absolutePath = import_node_path33.default.resolve(attachment);
12535
- const normalized = absolutePath.split(import_node_path33.default.sep).join("/");
12536
- if (isGuidelineFile(normalized, guidelinePatterns)) {
12537
- if (!unique.has(absolutePath)) {
12538
- unique.set(absolutePath, absolutePath);
12539
- }
12540
- }
12541
- }
12542
- return Array.from(unique.values());
12543
- }
12544
12316
  function collectAttachmentFiles(attachments) {
12545
12317
  if (!attachments || attachments.length === 0) {
12546
12318
  return [];
12547
12319
  }
12548
12320
  const unique = /* @__PURE__ */ new Map();
12549
12321
  for (const attachment of attachments) {
12550
- const absolutePath = import_node_path33.default.resolve(attachment);
12322
+ const absolutePath = import_node_path32.default.resolve(attachment);
12551
12323
  if (!unique.has(absolutePath)) {
12552
12324
  unique.set(absolutePath, absolutePath);
12553
12325
  }
@@ -12555,7 +12327,7 @@ function collectAttachmentFiles(attachments) {
12555
12327
  return Array.from(unique.values());
12556
12328
  }
12557
12329
  function pathToFileUri3(filePath) {
12558
- const absolutePath = import_node_path33.default.isAbsolute(filePath) ? filePath : import_node_path33.default.resolve(filePath);
12330
+ const absolutePath = import_node_path32.default.isAbsolute(filePath) ? filePath : import_node_path32.default.resolve(filePath);
12559
12331
  const normalizedPath = absolutePath.replace(/\\/g, "/");
12560
12332
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
12561
12333
  return `file:///${normalizedPath}`;
@@ -12568,7 +12340,7 @@ function normalizeAttachments(attachments) {
12568
12340
  }
12569
12341
  const deduped = /* @__PURE__ */ new Set();
12570
12342
  for (const attachment of attachments) {
12571
- deduped.add(import_node_path33.default.resolve(attachment));
12343
+ deduped.add(import_node_path32.default.resolve(attachment));
12572
12344
  }
12573
12345
  return Array.from(deduped);
12574
12346
  }
@@ -12577,7 +12349,7 @@ function mergeAttachments(all) {
12577
12349
  for (const list of all) {
12578
12350
  if (!list) continue;
12579
12351
  for (const inputFile of list) {
12580
- deduped.add(import_node_path33.default.resolve(inputFile));
12352
+ deduped.add(import_node_path32.default.resolve(inputFile));
12581
12353
  }
12582
12354
  }
12583
12355
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -12625,8 +12397,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
12625
12397
 
12626
12398
  // src/evaluation/providers/targets-file.ts
12627
12399
  var import_node_fs10 = require("fs");
12628
- var import_promises25 = require("fs/promises");
12629
- var import_node_path34 = __toESM(require("path"), 1);
12400
+ var import_promises24 = require("fs/promises");
12401
+ var import_node_path33 = __toESM(require("path"), 1);
12630
12402
  var import_yaml6 = require("yaml");
12631
12403
  function isRecord(value) {
12632
12404
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -12656,18 +12428,18 @@ function assertTargetDefinition(value, index, filePath) {
12656
12428
  }
12657
12429
  async function fileExists3(filePath) {
12658
12430
  try {
12659
- await (0, import_promises25.access)(filePath, import_node_fs10.constants.F_OK);
12431
+ await (0, import_promises24.access)(filePath, import_node_fs10.constants.F_OK);
12660
12432
  return true;
12661
12433
  } catch {
12662
12434
  return false;
12663
12435
  }
12664
12436
  }
12665
12437
  async function readTargetDefinitions(filePath) {
12666
- const absolutePath = import_node_path34.default.resolve(filePath);
12438
+ const absolutePath = import_node_path33.default.resolve(filePath);
12667
12439
  if (!await fileExists3(absolutePath)) {
12668
12440
  throw new Error(`targets.yaml not found at ${absolutePath}`);
12669
12441
  }
12670
- const raw = await (0, import_promises25.readFile)(absolutePath, "utf8");
12442
+ const raw = await (0, import_promises24.readFile)(absolutePath, "utf8");
12671
12443
  const parsed = (0, import_yaml6.parse)(raw);
12672
12444
  if (!isRecord(parsed)) {
12673
12445
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -12683,16 +12455,16 @@ function listTargetNames(definitions) {
12683
12455
  }
12684
12456
 
12685
12457
  // src/evaluation/providers/provider-discovery.ts
12686
- var import_node_path35 = __toESM(require("path"), 1);
12458
+ var import_node_path34 = __toESM(require("path"), 1);
12687
12459
  var import_fast_glob2 = __toESM(require("fast-glob"), 1);
12688
12460
  async function discoverProviders(registry, baseDir) {
12689
12461
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
12690
12462
  const candidateDirs = [];
12691
- let dir = import_node_path35.default.resolve(baseDir);
12692
- const root = import_node_path35.default.parse(dir).root;
12463
+ let dir = import_node_path34.default.resolve(baseDir);
12464
+ const root = import_node_path34.default.parse(dir).root;
12693
12465
  while (dir !== root) {
12694
- candidateDirs.push(import_node_path35.default.join(dir, ".agentv", "providers"));
12695
- dir = import_node_path35.default.dirname(dir);
12466
+ candidateDirs.push(import_node_path34.default.join(dir, ".agentv", "providers"));
12467
+ dir = import_node_path34.default.dirname(dir);
12696
12468
  }
12697
12469
  let files = [];
12698
12470
  for (const providersDir of candidateDirs) {
@@ -12708,7 +12480,7 @@ async function discoverProviders(registry, baseDir) {
12708
12480
  }
12709
12481
  const discoveredKinds = [];
12710
12482
  for (const filePath of files) {
12711
- const basename = import_node_path35.default.basename(filePath);
12483
+ const basename = import_node_path34.default.basename(filePath);
12712
12484
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
12713
12485
  if (registry.has(kindName)) {
12714
12486
  continue;
@@ -12815,9 +12587,9 @@ function negateScore(score) {
12815
12587
  }
12816
12588
 
12817
12589
  // src/evaluation/evaluators/code-evaluator.ts
12818
- var import_promises26 = require("fs/promises");
12590
+ var import_promises25 = require("fs/promises");
12819
12591
  var import_node_os5 = require("os");
12820
- var import_node_path36 = require("path");
12592
+ var import_node_path35 = require("path");
12821
12593
 
12822
12594
  // src/runtime/exec.ts
12823
12595
  function shellEscapePath(value) {
@@ -12917,15 +12689,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
12917
12689
  });
12918
12690
  }
12919
12691
  async function execShellWithStdin(command, stdinPayload, options = {}) {
12920
- const { mkdir: mkdir17, readFile: readFile15, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
12692
+ const { mkdir: mkdir17, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
12921
12693
  const { tmpdir: tmpdir3 } = await import("os");
12922
- const path48 = await import("path");
12694
+ const path47 = await import("path");
12923
12695
  const { randomUUID: randomUUID9 } = await import("crypto");
12924
- const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
12696
+ const dir = path47.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
12925
12697
  await mkdir17(dir, { recursive: true });
12926
- const stdinPath = path48.join(dir, "stdin.txt");
12927
- const stdoutPath = path48.join(dir, "stdout.txt");
12928
- const stderrPath = path48.join(dir, "stderr.txt");
12698
+ const stdinPath = path47.join(dir, "stdin.txt");
12699
+ const stdoutPath = path47.join(dir, "stdout.txt");
12700
+ const stderrPath = path47.join(dir, "stderr.txt");
12929
12701
  await writeFile9(stdinPath, stdinPayload, "utf8");
12930
12702
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
12931
12703
  const { spawn: spawn5 } = await import("child_process");
@@ -12955,8 +12727,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
12955
12727
  resolve(code ?? 0);
12956
12728
  });
12957
12729
  });
12958
- const stdout = (await readFile15(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
12959
- const stderr = (await readFile15(stderrPath, "utf8")).replace(/\r\n/g, "\n");
12730
+ const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
12731
+ const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
12960
12732
  return { stdout, stderr, exitCode };
12961
12733
  } finally {
12962
12734
  await rm6(dir, { recursive: true, force: true });
@@ -13261,9 +13033,9 @@ var CodeEvaluator = class {
13261
13033
  if (outputForPayload) {
13262
13034
  const serialized = JSON.stringify(outputForPayload);
13263
13035
  if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
13264
- const tmpDir = await (0, import_promises26.mkdtemp)((0, import_node_path36.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
13265
- outputPath = (0, import_node_path36.join)(tmpDir, "output.json");
13266
- await (0, import_promises26.writeFile)(outputPath, serialized);
13036
+ const tmpDir = await (0, import_promises25.mkdtemp)((0, import_node_path35.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
13037
+ outputPath = (0, import_node_path35.join)(tmpDir, "output.json");
13038
+ await (0, import_promises25.writeFile)(outputPath, serialized);
13267
13039
  outputForPayload = null;
13268
13040
  }
13269
13041
  }
@@ -13273,10 +13045,7 @@ var CodeEvaluator = class {
13273
13045
  outputText: context2.candidate,
13274
13046
  output: outputForPayload,
13275
13047
  outputPath,
13276
- guidelineFiles: context2.evalCase.guideline_paths,
13277
- inputFiles: context2.evalCase.file_paths.filter(
13278
- (path48) => !context2.evalCase.guideline_paths.includes(path48)
13279
- ),
13048
+ inputFiles: context2.evalCase.file_paths,
13280
13049
  input: context2.evalCase.input,
13281
13050
  trace: context2.trace ?? null,
13282
13051
  tokenUsage: context2.tokenUsage ?? null,
@@ -13375,7 +13144,7 @@ var CodeEvaluator = class {
13375
13144
  await proxyShutdown();
13376
13145
  }
13377
13146
  if (outputPath) {
13378
- await (0, import_promises26.rm)((0, import_node_path36.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
13147
+ await (0, import_promises25.rm)((0, import_node_path35.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
13379
13148
  });
13380
13149
  }
13381
13150
  }
@@ -13438,8 +13207,8 @@ function isAgentProvider(provider) {
13438
13207
  }
13439
13208
 
13440
13209
  // src/evaluation/evaluators/llm-grader.ts
13441
- var import_promises27 = __toESM(require("fs/promises"), 1);
13442
- var import_node_path37 = __toESM(require("path"), 1);
13210
+ var import_promises26 = __toESM(require("fs/promises"), 1);
13211
+ var import_node_path36 = __toESM(require("path"), 1);
13443
13212
  var import_ai2 = require("ai");
13444
13213
  var import_zod4 = require("zod");
13445
13214
  var DEFAULT_MAX_STEPS = 10;
@@ -14271,8 +14040,8 @@ function calculateScoreRangeResult(result, rubrics) {
14271
14040
  };
14272
14041
  }
14273
14042
  function resolveSandboxed(basePath, relativePath) {
14274
- const resolved = import_node_path37.default.resolve(basePath, relativePath);
14275
- if (!resolved.startsWith(basePath + import_node_path37.default.sep) && resolved !== basePath) {
14043
+ const resolved = import_node_path36.default.resolve(basePath, relativePath);
14044
+ if (!resolved.startsWith(basePath + import_node_path36.default.sep) && resolved !== basePath) {
14276
14045
  throw new Error(`Path '${relativePath}' is outside the workspace`);
14277
14046
  }
14278
14047
  return resolved;
@@ -14287,7 +14056,7 @@ function createFilesystemTools(workspacePath) {
14287
14056
  execute: async (input) => {
14288
14057
  try {
14289
14058
  const resolved = resolveSandboxed(workspacePath, input.path);
14290
- const entries = await import_promises27.default.readdir(resolved, { withFileTypes: true });
14059
+ const entries = await import_promises26.default.readdir(resolved, { withFileTypes: true });
14291
14060
  return entries.map((e) => ({
14292
14061
  name: e.name,
14293
14062
  type: e.isDirectory() ? "directory" : "file"
@@ -14305,12 +14074,12 @@ function createFilesystemTools(workspacePath) {
14305
14074
  execute: async (input) => {
14306
14075
  try {
14307
14076
  const resolved = resolveSandboxed(workspacePath, input.path);
14308
- const stat8 = await import_promises27.default.stat(resolved);
14077
+ const stat8 = await import_promises26.default.stat(resolved);
14309
14078
  if (stat8.isDirectory()) {
14310
14079
  return { error: `'${input.path}' is a directory, not a file` };
14311
14080
  }
14312
14081
  const buffer = Buffer.alloc(Math.min(stat8.size, MAX_FILE_SIZE));
14313
- const fd = await import_promises27.default.open(resolved, "r");
14082
+ const fd = await import_promises26.default.open(resolved, "r");
14314
14083
  try {
14315
14084
  await fd.read(buffer, 0, buffer.length, 0);
14316
14085
  } finally {
@@ -14355,30 +14124,30 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
14355
14124
  if (matches.length >= MAX_SEARCH_MATCHES) return;
14356
14125
  let entries;
14357
14126
  try {
14358
- entries = await import_promises27.default.readdir(dirPath, { withFileTypes: true });
14127
+ entries = await import_promises26.default.readdir(dirPath, { withFileTypes: true });
14359
14128
  } catch {
14360
14129
  return;
14361
14130
  }
14362
14131
  for (const entry of entries) {
14363
14132
  if (matches.length >= MAX_SEARCH_MATCHES) return;
14364
14133
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
14365
- const fullPath = import_node_path37.default.join(dirPath, entry.name);
14134
+ const fullPath = import_node_path36.default.join(dirPath, entry.name);
14366
14135
  if (entry.isDirectory()) {
14367
14136
  await searchDirectory(fullPath, workspacePath, regex, matches);
14368
14137
  } else if (entry.isFile()) {
14369
- const ext = import_node_path37.default.extname(entry.name).toLowerCase();
14138
+ const ext = import_node_path36.default.extname(entry.name).toLowerCase();
14370
14139
  if (BINARY_EXTENSIONS.has(ext)) continue;
14371
14140
  try {
14372
- const stat8 = await import_promises27.default.stat(fullPath);
14141
+ const stat8 = await import_promises26.default.stat(fullPath);
14373
14142
  if (stat8.size > MAX_FILE_SIZE) continue;
14374
- const content = await import_promises27.default.readFile(fullPath, "utf-8");
14143
+ const content = await import_promises26.default.readFile(fullPath, "utf-8");
14375
14144
  const lines = content.split("\n");
14376
14145
  for (let i = 0; i < lines.length; i++) {
14377
14146
  if (matches.length >= MAX_SEARCH_MATCHES) return;
14378
14147
  regex.lastIndex = 0;
14379
14148
  if (regex.test(lines[i])) {
14380
14149
  matches.push({
14381
- file: import_node_path37.default.relative(workspacePath, fullPath),
14150
+ file: import_node_path36.default.relative(workspacePath, fullPath),
14382
14151
  line: i + 1,
14383
14152
  text: lines[i].substring(0, 200)
14384
14153
  });
@@ -15013,115 +14782,115 @@ var FieldAccuracyEvaluator = class {
15013
14782
  * Evaluate a single field against the expected value.
15014
14783
  */
15015
14784
  evaluateField(fieldConfig, candidateData, expectedData) {
15016
- const { path: path48, match, required = true, weight = 1 } = fieldConfig;
15017
- const candidateValue = resolvePath(candidateData, path48);
15018
- const expectedValue = resolvePath(expectedData, path48);
14785
+ const { path: path47, match, required = true, weight = 1 } = fieldConfig;
14786
+ const candidateValue = resolvePath(candidateData, path47);
14787
+ const expectedValue = resolvePath(expectedData, path47);
15019
14788
  if (expectedValue === void 0) {
15020
14789
  return {
15021
- path: path48,
14790
+ path: path47,
15022
14791
  score: 1,
15023
14792
  // No expected value means no comparison needed
15024
14793
  weight,
15025
14794
  hit: true,
15026
- message: `${path48}: no expected value`
14795
+ message: `${path47}: no expected value`
15027
14796
  };
15028
14797
  }
15029
14798
  if (candidateValue === void 0) {
15030
14799
  if (required) {
15031
14800
  return {
15032
- path: path48,
14801
+ path: path47,
15033
14802
  score: 0,
15034
14803
  weight,
15035
14804
  hit: false,
15036
- message: `${path48} (required, missing)`
14805
+ message: `${path47} (required, missing)`
15037
14806
  };
15038
14807
  }
15039
14808
  return {
15040
- path: path48,
14809
+ path: path47,
15041
14810
  score: 1,
15042
14811
  // Don't penalize missing optional fields
15043
14812
  weight: 0,
15044
14813
  // Zero weight means it won't affect the score
15045
14814
  hit: true,
15046
- message: `${path48}: optional field missing`
14815
+ message: `${path47}: optional field missing`
15047
14816
  };
15048
14817
  }
15049
14818
  switch (match) {
15050
14819
  case "exact":
15051
- return this.compareExact(path48, candidateValue, expectedValue, weight);
14820
+ return this.compareExact(path47, candidateValue, expectedValue, weight);
15052
14821
  case "numeric_tolerance":
15053
14822
  return this.compareNumericTolerance(
15054
- path48,
14823
+ path47,
15055
14824
  candidateValue,
15056
14825
  expectedValue,
15057
14826
  fieldConfig,
15058
14827
  weight
15059
14828
  );
15060
14829
  case "date":
15061
- return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
14830
+ return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
15062
14831
  default:
15063
14832
  return {
15064
- path: path48,
14833
+ path: path47,
15065
14834
  score: 0,
15066
14835
  weight,
15067
14836
  hit: false,
15068
- message: `${path48}: unknown match type "${match}"`
14837
+ message: `${path47}: unknown match type "${match}"`
15069
14838
  };
15070
14839
  }
15071
14840
  }
15072
14841
  /**
15073
14842
  * Exact equality comparison.
15074
14843
  */
15075
- compareExact(path48, candidateValue, expectedValue, weight) {
14844
+ compareExact(path47, candidateValue, expectedValue, weight) {
15076
14845
  if (deepEqual(candidateValue, expectedValue)) {
15077
14846
  return {
15078
- path: path48,
14847
+ path: path47,
15079
14848
  score: 1,
15080
14849
  weight,
15081
14850
  hit: true,
15082
- message: path48
14851
+ message: path47
15083
14852
  };
15084
14853
  }
15085
14854
  if (typeof candidateValue !== typeof expectedValue) {
15086
14855
  return {
15087
- path: path48,
14856
+ path: path47,
15088
14857
  score: 0,
15089
14858
  weight,
15090
14859
  hit: false,
15091
- message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
14860
+ message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
15092
14861
  };
15093
14862
  }
15094
14863
  return {
15095
- path: path48,
14864
+ path: path47,
15096
14865
  score: 0,
15097
14866
  weight,
15098
14867
  hit: false,
15099
- message: `${path48} (value mismatch)`
14868
+ message: `${path47} (value mismatch)`
15100
14869
  };
15101
14870
  }
15102
14871
  /**
15103
14872
  * Numeric comparison with absolute or relative tolerance.
15104
14873
  */
15105
- compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
14874
+ compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
15106
14875
  const { tolerance = 0, relative = false } = fieldConfig;
15107
14876
  const candidateNum = toNumber(candidateValue);
15108
14877
  const expectedNum = toNumber(expectedValue);
15109
14878
  if (candidateNum === null || expectedNum === null) {
15110
14879
  return {
15111
- path: path48,
14880
+ path: path47,
15112
14881
  score: 0,
15113
14882
  weight,
15114
14883
  hit: false,
15115
- message: `${path48} (non-numeric value)`
14884
+ message: `${path47} (non-numeric value)`
15116
14885
  };
15117
14886
  }
15118
14887
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
15119
14888
  return {
15120
- path: path48,
14889
+ path: path47,
15121
14890
  score: 0,
15122
14891
  weight,
15123
14892
  hit: false,
15124
- message: `${path48} (invalid numeric value)`
14893
+ message: `${path47} (invalid numeric value)`
15125
14894
  };
15126
14895
  }
15127
14896
  const diff = Math.abs(candidateNum - expectedNum);
@@ -15134,61 +14903,61 @@ var FieldAccuracyEvaluator = class {
15134
14903
  }
15135
14904
  if (withinTolerance) {
15136
14905
  return {
15137
- path: path48,
14906
+ path: path47,
15138
14907
  score: 1,
15139
14908
  weight,
15140
14909
  hit: true,
15141
- message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
14910
+ message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
15142
14911
  };
15143
14912
  }
15144
14913
  return {
15145
- path: path48,
14914
+ path: path47,
15146
14915
  score: 0,
15147
14916
  weight,
15148
14917
  hit: false,
15149
- message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
14918
+ message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
15150
14919
  };
15151
14920
  }
15152
14921
  /**
15153
14922
  * Date comparison with format normalization.
15154
14923
  */
15155
- compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
14924
+ compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
15156
14925
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
15157
14926
  const candidateDate = parseDate(String(candidateValue), formats);
15158
14927
  const expectedDate = parseDate(String(expectedValue), formats);
15159
14928
  if (candidateDate === null) {
15160
14929
  return {
15161
- path: path48,
14930
+ path: path47,
15162
14931
  score: 0,
15163
14932
  weight,
15164
14933
  hit: false,
15165
- message: `${path48} (unparseable candidate date)`
14934
+ message: `${path47} (unparseable candidate date)`
15166
14935
  };
15167
14936
  }
15168
14937
  if (expectedDate === null) {
15169
14938
  return {
15170
- path: path48,
14939
+ path: path47,
15171
14940
  score: 0,
15172
14941
  weight,
15173
14942
  hit: false,
15174
- message: `${path48} (unparseable expected date)`
14943
+ message: `${path47} (unparseable expected date)`
15175
14944
  };
15176
14945
  }
15177
14946
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
15178
14947
  return {
15179
- path: path48,
14948
+ path: path47,
15180
14949
  score: 1,
15181
14950
  weight,
15182
14951
  hit: true,
15183
- message: path48
14952
+ message: path47
15184
14953
  };
15185
14954
  }
15186
14955
  return {
15187
- path: path48,
14956
+ path: path47,
15188
14957
  score: 0,
15189
14958
  weight,
15190
14959
  hit: false,
15191
- message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
14960
+ message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
15192
14961
  };
15193
14962
  }
15194
14963
  /**
@@ -15221,11 +14990,11 @@ var FieldAccuracyEvaluator = class {
15221
14990
  };
15222
14991
  }
15223
14992
  };
15224
- function resolvePath(obj, path48) {
15225
- if (!path48 || !obj) {
14993
+ function resolvePath(obj, path47) {
14994
+ if (!path47 || !obj) {
15226
14995
  return void 0;
15227
14996
  }
15228
- const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
14997
+ const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
15229
14998
  let current = obj;
15230
14999
  for (const part of parts) {
15231
15000
  if (current === null || current === void 0) {
@@ -15685,8 +15454,8 @@ var TokenUsageEvaluator = class {
15685
15454
  };
15686
15455
 
15687
15456
  // src/evaluation/evaluators/tool-trajectory.ts
15688
- function getNestedValue(obj, path48) {
15689
- const parts = path48.split(".");
15457
+ function getNestedValue(obj, path47) {
15458
+ const parts = path47.split(".");
15690
15459
  let current = obj;
15691
15460
  for (const part of parts) {
15692
15461
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -16307,9 +16076,9 @@ function runEqualsAssertion(output, value) {
16307
16076
 
16308
16077
  // src/evaluation/orchestrator.ts
16309
16078
  var import_node_crypto10 = require("crypto");
16310
- var import_promises31 = require("fs/promises");
16311
- var import_node_path46 = __toESM(require("path"), 1);
16312
- var import_micromatch4 = __toESM(require("micromatch"), 1);
16079
+ var import_promises30 = require("fs/promises");
16080
+ var import_node_path45 = __toESM(require("path"), 1);
16081
+ var import_micromatch3 = __toESM(require("micromatch"), 1);
16313
16082
 
16314
16083
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
16315
16084
  var Node = class {
@@ -16522,7 +16291,7 @@ var InlineAssertEvaluator = class {
16522
16291
  };
16523
16292
 
16524
16293
  // src/evaluation/evaluators/prompt-resolution.ts
16525
- var import_node_path38 = __toESM(require("path"), 1);
16294
+ var import_node_path37 = __toESM(require("path"), 1);
16526
16295
  async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
16527
16296
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
16528
16297
  if (!context2) {
@@ -16557,10 +16326,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
16557
16326
  expectedOutput: context2.evalCase.expected_output,
16558
16327
  outputText: context2.candidate,
16559
16328
  output: context2.output ?? null,
16560
- guidelineFiles: context2.evalCase.guideline_paths,
16561
- inputFiles: context2.evalCase.file_paths.filter(
16562
- (p) => !context2.evalCase.guideline_paths.includes(p)
16563
- ),
16329
+ inputFiles: context2.evalCase.file_paths,
16564
16330
  input: context2.evalCase.input,
16565
16331
  trace: context2.trace ?? null,
16566
16332
  fileChanges: context2.fileChanges ?? null,
@@ -16571,7 +16337,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
16571
16337
  };
16572
16338
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
16573
16339
  const scriptPath = script[script.length - 1];
16574
- const cwd = import_node_path38.default.dirname(scriptPath);
16340
+ const cwd = import_node_path37.default.dirname(scriptPath);
16575
16341
  try {
16576
16342
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
16577
16343
  const prompt = stdout.trim();
@@ -16843,16 +16609,16 @@ function createBuiltinRegistry() {
16843
16609
  }
16844
16610
 
16845
16611
  // src/evaluation/registry/assertion-discovery.ts
16846
- var import_node_path39 = __toESM(require("path"), 1);
16612
+ var import_node_path38 = __toESM(require("path"), 1);
16847
16613
  var import_fast_glob3 = __toESM(require("fast-glob"), 1);
16848
16614
  async function discoverAssertions(registry, baseDir) {
16849
16615
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
16850
16616
  const candidateDirs = [];
16851
- let dir = import_node_path39.default.resolve(baseDir);
16852
- const root = import_node_path39.default.parse(dir).root;
16617
+ let dir = import_node_path38.default.resolve(baseDir);
16618
+ const root = import_node_path38.default.parse(dir).root;
16853
16619
  while (dir !== root) {
16854
- candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "assertions"));
16855
- dir = import_node_path39.default.dirname(dir);
16620
+ candidateDirs.push(import_node_path38.default.join(dir, ".agentv", "assertions"));
16621
+ dir = import_node_path38.default.dirname(dir);
16856
16622
  }
16857
16623
  let files = [];
16858
16624
  for (const assertionsDir of candidateDirs) {
@@ -16868,7 +16634,7 @@ async function discoverAssertions(registry, baseDir) {
16868
16634
  }
16869
16635
  const discoveredTypes = [];
16870
16636
  for (const filePath of files) {
16871
- const basename = import_node_path39.default.basename(filePath);
16637
+ const basename = import_node_path38.default.basename(filePath);
16872
16638
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
16873
16639
  if (registry.has(typeName)) {
16874
16640
  continue;
@@ -16886,17 +16652,17 @@ async function discoverAssertions(registry, baseDir) {
16886
16652
  }
16887
16653
 
16888
16654
  // src/evaluation/registry/grader-discovery.ts
16889
- var import_node_path40 = __toESM(require("path"), 1);
16655
+ var import_node_path39 = __toESM(require("path"), 1);
16890
16656
  var import_fast_glob4 = __toESM(require("fast-glob"), 1);
16891
16657
  async function discoverGraders(registry, baseDir) {
16892
16658
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
16893
16659
  const candidateDirs = [];
16894
- let dir = import_node_path40.default.resolve(baseDir);
16895
- const root = import_node_path40.default.parse(dir).root;
16660
+ let dir = import_node_path39.default.resolve(baseDir);
16661
+ const root = import_node_path39.default.parse(dir).root;
16896
16662
  while (dir !== root) {
16897
- candidateDirs.push(import_node_path40.default.join(dir, ".agentv", "graders"));
16898
- candidateDirs.push(import_node_path40.default.join(dir, ".agentv", "judges"));
16899
- dir = import_node_path40.default.dirname(dir);
16663
+ candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "graders"));
16664
+ candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "judges"));
16665
+ dir = import_node_path39.default.dirname(dir);
16900
16666
  }
16901
16667
  let files = [];
16902
16668
  for (const gradersDir of candidateDirs) {
@@ -16912,7 +16678,7 @@ async function discoverGraders(registry, baseDir) {
16912
16678
  }
16913
16679
  const discoveredTypes = [];
16914
16680
  for (const filePath of files) {
16915
- const basename = import_node_path40.default.basename(filePath);
16681
+ const basename = import_node_path39.default.basename(filePath);
16916
16682
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
16917
16683
  if (registry.has(typeName)) {
16918
16684
  continue;
@@ -17072,7 +16838,7 @@ function getTCritical(df) {
17072
16838
  // src/evaluation/workspace/file-changes.ts
17073
16839
  var import_node_child_process7 = require("child_process");
17074
16840
  var import_node_fs11 = require("fs");
17075
- var import_node_path41 = __toESM(require("path"), 1);
16841
+ var import_node_path40 = __toESM(require("path"), 1);
17076
16842
  var import_node_util4 = require("util");
17077
16843
  var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process7.exec);
17078
16844
  function gitExecOpts(workspacePath) {
@@ -17106,10 +16872,10 @@ async function stageNestedRepoChanges(workspacePath) {
17106
16872
  }
17107
16873
  for (const entry of entries) {
17108
16874
  if (entry === ".git" || entry === "node_modules") continue;
17109
- const childPath = import_node_path41.default.join(workspacePath, entry);
16875
+ const childPath = import_node_path40.default.join(workspacePath, entry);
17110
16876
  try {
17111
16877
  if (!(0, import_node_fs11.statSync)(childPath).isDirectory()) continue;
17112
- if (!(0, import_node_fs11.statSync)(import_node_path41.default.join(childPath, ".git")).isDirectory()) continue;
16878
+ if (!(0, import_node_fs11.statSync)(import_node_path40.default.join(childPath, ".git")).isDirectory()) continue;
17113
16879
  } catch {
17114
16880
  continue;
17115
16881
  }
@@ -17119,8 +16885,8 @@ async function stageNestedRepoChanges(workspacePath) {
17119
16885
  }
17120
16886
 
17121
16887
  // src/evaluation/workspace/manager.ts
17122
- var import_promises28 = require("fs/promises");
17123
- var import_node_path42 = __toESM(require("path"), 1);
16888
+ var import_promises27 = require("fs/promises");
16889
+ var import_node_path41 = __toESM(require("path"), 1);
17124
16890
  var TemplateNotFoundError = class extends Error {
17125
16891
  constructor(templatePath) {
17126
16892
  super(`Workspace template not found: ${templatePath}`);
@@ -17142,7 +16908,7 @@ var WorkspaceCreationError = class extends Error {
17142
16908
  };
17143
16909
  async function isDirectory(filePath) {
17144
16910
  try {
17145
- const stats = await (0, import_promises28.stat)(filePath);
16911
+ const stats = await (0, import_promises27.stat)(filePath);
17146
16912
  return stats.isDirectory();
17147
16913
  } catch {
17148
16914
  return false;
@@ -17150,26 +16916,26 @@ async function isDirectory(filePath) {
17150
16916
  }
17151
16917
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
17152
16918
  const root = workspaceRoot ?? getWorkspacesRoot();
17153
- return import_node_path42.default.join(root, evalRunId, caseId);
16919
+ return import_node_path41.default.join(root, evalRunId, caseId);
17154
16920
  }
17155
16921
  async function copyDirectoryRecursive(src, dest) {
17156
- await (0, import_promises28.mkdir)(dest, { recursive: true });
17157
- const entries = await (0, import_promises28.readdir)(src, { withFileTypes: true });
16922
+ await (0, import_promises27.mkdir)(dest, { recursive: true });
16923
+ const entries = await (0, import_promises27.readdir)(src, { withFileTypes: true });
17158
16924
  for (const entry of entries) {
17159
- const srcPath = import_node_path42.default.join(src, entry.name);
17160
- const destPath = import_node_path42.default.join(dest, entry.name);
16925
+ const srcPath = import_node_path41.default.join(src, entry.name);
16926
+ const destPath = import_node_path41.default.join(dest, entry.name);
17161
16927
  if (entry.name === ".git") {
17162
16928
  continue;
17163
16929
  }
17164
16930
  if (entry.isDirectory()) {
17165
16931
  await copyDirectoryRecursive(srcPath, destPath);
17166
16932
  } else {
17167
- await (0, import_promises28.cp)(srcPath, destPath, { preserveTimestamps: true });
16933
+ await (0, import_promises27.cp)(srcPath, destPath, { preserveTimestamps: true });
17168
16934
  }
17169
16935
  }
17170
16936
  }
17171
16937
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
17172
- const resolvedTemplatePath = import_node_path42.default.resolve(templatePath);
16938
+ const resolvedTemplatePath = import_node_path41.default.resolve(templatePath);
17173
16939
  if (!await fileExists2(resolvedTemplatePath)) {
17174
16940
  throw new TemplateNotFoundError(resolvedTemplatePath);
17175
16941
  }
@@ -17179,7 +16945,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
17179
16945
  const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
17180
16946
  try {
17181
16947
  if (await fileExists2(workspacePath)) {
17182
- await (0, import_promises28.rm)(workspacePath, { recursive: true, force: true });
16948
+ await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
17183
16949
  }
17184
16950
  await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
17185
16951
  return workspacePath;
@@ -17213,14 +16979,14 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
17213
16979
  }
17214
16980
  async function cleanupWorkspace(workspacePath) {
17215
16981
  if (await fileExists2(workspacePath)) {
17216
- await (0, import_promises28.rm)(workspacePath, { recursive: true, force: true });
16982
+ await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
17217
16983
  }
17218
16984
  }
17219
16985
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
17220
16986
  const root = workspaceRoot ?? getWorkspacesRoot();
17221
- const evalDir = import_node_path42.default.join(root, evalRunId);
16987
+ const evalDir = import_node_path41.default.join(root, evalRunId);
17222
16988
  if (await fileExists2(evalDir)) {
17223
- await (0, import_promises28.rm)(evalDir, { recursive: true, force: true });
16989
+ await (0, import_promises27.rm)(evalDir, { recursive: true, force: true });
17224
16990
  }
17225
16991
  }
17226
16992
 
@@ -17228,8 +16994,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
17228
16994
  var import_node_child_process8 = require("child_process");
17229
16995
  var import_node_crypto9 = require("crypto");
17230
16996
  var import_node_fs12 = require("fs");
17231
- var import_promises29 = require("fs/promises");
17232
- var import_node_path43 = __toESM(require("path"), 1);
16997
+ var import_promises28 = require("fs/promises");
16998
+ var import_node_path42 = __toESM(require("path"), 1);
17233
16999
  var import_node_util5 = require("util");
17234
17000
  var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process8.execFile);
17235
17001
  function gitEnv() {
@@ -17280,11 +17046,11 @@ function computeWorkspaceFingerprint(repos) {
17280
17046
  return (0, import_node_crypto9.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
17281
17047
  }
17282
17048
  async function copyDirectoryRecursive2(src, dest, skipDirs) {
17283
- await (0, import_promises29.mkdir)(dest, { recursive: true });
17284
- const entries = await (0, import_promises29.readdir)(src, { withFileTypes: true });
17049
+ await (0, import_promises28.mkdir)(dest, { recursive: true });
17050
+ const entries = await (0, import_promises28.readdir)(src, { withFileTypes: true });
17285
17051
  for (const entry of entries) {
17286
- const srcPath = import_node_path43.default.join(src, entry.name);
17287
- const destPath = import_node_path43.default.join(dest, entry.name);
17052
+ const srcPath = import_node_path42.default.join(src, entry.name);
17053
+ const destPath = import_node_path42.default.join(dest, entry.name);
17288
17054
  if (entry.name === ".git") {
17289
17055
  continue;
17290
17056
  }
@@ -17294,7 +17060,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
17294
17060
  }
17295
17061
  await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
17296
17062
  } else {
17297
- await (0, import_promises29.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
17063
+ await (0, import_promises28.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
17298
17064
  }
17299
17065
  }
17300
17066
  }
@@ -17317,8 +17083,8 @@ var WorkspacePoolManager = class {
17317
17083
  async acquireWorkspace(options) {
17318
17084
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
17319
17085
  const fingerprint = computeWorkspaceFingerprint(repos);
17320
- const poolDir = import_node_path43.default.join(this.poolRoot, fingerprint);
17321
- await (0, import_promises29.mkdir)(poolDir, { recursive: true });
17086
+ const poolDir = import_node_path42.default.join(this.poolRoot, fingerprint);
17087
+ await (0, import_promises28.mkdir)(poolDir, { recursive: true });
17322
17088
  const drifted = await this.checkDrift(poolDir, fingerprint);
17323
17089
  if (drifted) {
17324
17090
  console.warn(
@@ -17327,7 +17093,7 @@ var WorkspacePoolManager = class {
17327
17093
  await this.removeAllSlots(poolDir);
17328
17094
  }
17329
17095
  for (let i = 0; i < maxSlots; i++) {
17330
- const slotPath = import_node_path43.default.join(poolDir, `slot-${i}`);
17096
+ const slotPath = import_node_path42.default.join(poolDir, `slot-${i}`);
17331
17097
  const lockPath = `${slotPath}.lock`;
17332
17098
  const locked = await this.tryLock(lockPath);
17333
17099
  if (!locked) {
@@ -17345,7 +17111,7 @@ var WorkspacePoolManager = class {
17345
17111
  poolDir
17346
17112
  };
17347
17113
  }
17348
- await (0, import_promises29.mkdir)(slotPath, { recursive: true });
17114
+ await (0, import_promises28.mkdir)(slotPath, { recursive: true });
17349
17115
  if (templatePath) {
17350
17116
  await copyDirectoryRecursive2(templatePath, slotPath);
17351
17117
  }
@@ -17369,7 +17135,7 @@ var WorkspacePoolManager = class {
17369
17135
  /** Remove lock file to release a slot. */
17370
17136
  async releaseSlot(slot) {
17371
17137
  try {
17372
- await (0, import_promises29.unlink)(slot.lockPath);
17138
+ await (0, import_promises28.unlink)(slot.lockPath);
17373
17139
  } catch {
17374
17140
  }
17375
17141
  }
@@ -17382,21 +17148,21 @@ var WorkspacePoolManager = class {
17382
17148
  async tryLock(lockPath) {
17383
17149
  for (let attempt = 0; attempt < 3; attempt++) {
17384
17150
  try {
17385
- await (0, import_promises29.writeFile)(lockPath, String(process.pid), { flag: "wx" });
17151
+ await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
17386
17152
  return true;
17387
17153
  } catch (err) {
17388
17154
  if (err.code !== "EEXIST") {
17389
17155
  throw err;
17390
17156
  }
17391
17157
  try {
17392
- const pidStr = await (0, import_promises29.readFile)(lockPath, "utf-8");
17158
+ const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
17393
17159
  const pid = Number.parseInt(pidStr.trim(), 10);
17394
17160
  if (!Number.isNaN(pid)) {
17395
17161
  try {
17396
17162
  process.kill(pid, 0);
17397
17163
  return false;
17398
17164
  } catch {
17399
- await (0, import_promises29.unlink)(lockPath).catch(() => {
17165
+ await (0, import_promises28.unlink)(lockPath).catch(() => {
17400
17166
  });
17401
17167
  continue;
17402
17168
  }
@@ -17414,9 +17180,9 @@ var WorkspacePoolManager = class {
17414
17180
  * Returns false (no drift) if metadata.json doesn't exist (first use).
17415
17181
  */
17416
17182
  async checkDrift(poolDir, fingerprint) {
17417
- const metadataPath = import_node_path43.default.join(poolDir, "metadata.json");
17183
+ const metadataPath = import_node_path42.default.join(poolDir, "metadata.json");
17418
17184
  try {
17419
- const raw = await (0, import_promises29.readFile)(metadataPath, "utf-8");
17185
+ const raw = await (0, import_promises28.readFile)(metadataPath, "utf-8");
17420
17186
  const metadata = JSON.parse(raw);
17421
17187
  return metadata.fingerprint !== fingerprint;
17422
17188
  } catch {
@@ -17431,17 +17197,17 @@ var WorkspacePoolManager = class {
17431
17197
  repos,
17432
17198
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
17433
17199
  };
17434
- await (0, import_promises29.writeFile)(import_node_path43.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
17200
+ await (0, import_promises28.writeFile)(import_node_path42.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
17435
17201
  }
17436
17202
  /** Remove all slot directories and their lock files from a pool directory. */
17437
17203
  async removeAllSlots(poolDir) {
17438
- const entries = await (0, import_promises29.readdir)(poolDir);
17204
+ const entries = await (0, import_promises28.readdir)(poolDir);
17439
17205
  for (const entry of entries) {
17440
17206
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
17441
- const lockPath = import_node_path43.default.join(poolDir, `${entry}.lock`);
17207
+ const lockPath = import_node_path42.default.join(poolDir, `${entry}.lock`);
17442
17208
  if ((0, import_node_fs12.existsSync)(lockPath)) {
17443
17209
  try {
17444
- const pidStr = await (0, import_promises29.readFile)(lockPath, "utf-8");
17210
+ const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
17445
17211
  const pid = Number.parseInt(pidStr.trim(), 10);
17446
17212
  if (!Number.isNaN(pid)) {
17447
17213
  try {
@@ -17454,12 +17220,12 @@ var WorkspacePoolManager = class {
17454
17220
  } catch {
17455
17221
  }
17456
17222
  }
17457
- await (0, import_promises29.rm)(import_node_path43.default.join(poolDir, entry), { recursive: true, force: true });
17458
- await (0, import_promises29.rm)(lockPath, { force: true }).catch(() => {
17223
+ await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, entry), { recursive: true, force: true });
17224
+ await (0, import_promises28.rm)(lockPath, { force: true }).catch(() => {
17459
17225
  });
17460
17226
  }
17461
17227
  }
17462
- await (0, import_promises29.rm)(import_node_path43.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
17228
+ await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
17463
17229
  });
17464
17230
  }
17465
17231
  /**
@@ -17469,7 +17235,7 @@ var WorkspacePoolManager = class {
17469
17235
  */
17470
17236
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
17471
17237
  for (const repo of repos) {
17472
- const repoDir = import_node_path43.default.join(slotPath, repo.path);
17238
+ const repoDir = import_node_path42.default.join(slotPath, repo.path);
17473
17239
  if (!(0, import_node_fs12.existsSync)(repoDir)) {
17474
17240
  continue;
17475
17241
  }
@@ -17496,7 +17262,7 @@ var WorkspacePoolManager = class {
17496
17262
  // src/evaluation/workspace/repo-manager.ts
17497
17263
  var import_node_child_process9 = require("child_process");
17498
17264
  var import_node_fs13 = require("fs");
17499
- var import_node_path44 = __toESM(require("path"), 1);
17265
+ var import_node_path43 = __toESM(require("path"), 1);
17500
17266
  var import_node_util6 = require("util");
17501
17267
  var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process9.execFile);
17502
17268
  var DEFAULT_TIMEOUT_MS2 = 3e5;
@@ -17596,7 +17362,7 @@ ${lines.join("\n")}`;
17596
17362
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
17597
17363
  */
17598
17364
  async materialize(repo, workspacePath) {
17599
- const targetDir = import_node_path44.default.join(workspacePath, repo.path);
17365
+ const targetDir = import_node_path43.default.join(workspacePath, repo.path);
17600
17366
  const sourceUrl = getSourceUrl(repo.source);
17601
17367
  const startedAt = Date.now();
17602
17368
  if (this.verbose) {
@@ -17687,7 +17453,7 @@ ${lines.join("\n")}`;
17687
17453
  async reset(repos, workspacePath, reset) {
17688
17454
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
17689
17455
  for (const repo of repos) {
17690
- const targetDir = import_node_path44.default.join(workspacePath, repo.path);
17456
+ const targetDir = import_node_path43.default.join(workspacePath, repo.path);
17691
17457
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
17692
17458
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
17693
17459
  }
@@ -17695,36 +17461,36 @@ ${lines.join("\n")}`;
17695
17461
  };
17696
17462
 
17697
17463
  // src/evaluation/workspace/resolve.ts
17698
- var import_promises30 = require("fs/promises");
17699
- var import_node_path45 = __toESM(require("path"), 1);
17464
+ var import_promises29 = require("fs/promises");
17465
+ var import_node_path44 = __toESM(require("path"), 1);
17700
17466
  async function resolveWorkspaceTemplate(templatePath) {
17701
17467
  if (!templatePath) {
17702
17468
  return void 0;
17703
17469
  }
17704
- const resolved = import_node_path45.default.resolve(templatePath);
17705
- const stats = await (0, import_promises30.stat)(resolved);
17470
+ const resolved = import_node_path44.default.resolve(templatePath);
17471
+ const stats = await (0, import_promises29.stat)(resolved);
17706
17472
  if (stats.isFile()) {
17707
17473
  return {
17708
- dir: import_node_path45.default.dirname(resolved),
17474
+ dir: import_node_path44.default.dirname(resolved),
17709
17475
  workspaceFile: resolved
17710
17476
  };
17711
17477
  }
17712
17478
  if (!stats.isDirectory()) {
17713
17479
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
17714
17480
  }
17715
- const entries = await (0, import_promises30.readdir)(resolved);
17481
+ const entries = await (0, import_promises29.readdir)(resolved);
17716
17482
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
17717
17483
  if (workspaceFiles.length === 1) {
17718
17484
  return {
17719
17485
  dir: resolved,
17720
- workspaceFile: import_node_path45.default.join(resolved, workspaceFiles[0])
17486
+ workspaceFile: import_node_path44.default.join(resolved, workspaceFiles[0])
17721
17487
  };
17722
17488
  }
17723
17489
  if (workspaceFiles.length > 1) {
17724
17490
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
17725
17491
  return {
17726
17492
  dir: resolved,
17727
- workspaceFile: conventionFile ? import_node_path45.default.join(resolved, conventionFile) : void 0
17493
+ workspaceFile: conventionFile ? import_node_path44.default.join(resolved, conventionFile) : void 0
17728
17494
  };
17729
17495
  }
17730
17496
  return { dir: resolved };
@@ -17928,7 +17694,7 @@ async function runEvaluation(options) {
17928
17694
  ];
17929
17695
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
17930
17696
  const typeRegistry = createBuiltinRegistry();
17931
- const discoveryBaseDir = evalFilePath ? import_node_path46.default.dirname(import_node_path46.default.resolve(evalFilePath)) : process.cwd();
17697
+ const discoveryBaseDir = evalFilePath ? import_node_path45.default.dirname(import_node_path45.default.resolve(evalFilePath)) : process.cwd();
17932
17698
  const evalDir = discoveryBaseDir;
17933
17699
  await discoverAssertions(typeRegistry, discoveryBaseDir);
17934
17700
  await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -18037,14 +17803,22 @@ async function runEvaluation(options) {
18037
17803
  const usePool = poolEnabled !== false && !!suiteWorkspace?.repos?.length && !isPerTestIsolation && !useStaticWorkspace;
18038
17804
  const resolvedRetainOnSuccess = retainOnSuccess ?? (keepWorkspaces ? "keep" : "cleanup");
18039
17805
  const resolvedRetainOnFailure = retainOnFailure ?? (cleanupWorkspaces ? "cleanup" : "keep");
18040
- const requestedWorkers = options.maxConcurrency ?? target.workers ?? 1;
18041
- const workers = hasSharedWorkspace && !usePool ? 1 : requestedWorkers;
17806
+ const workers = options.maxConcurrency ?? target.workers ?? 1;
18042
17807
  setupLog(
18043
- `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} requestedWorkers=${requestedWorkers} effectiveWorkers=${workers}`
17808
+ `sharedWorkspace=${hasSharedWorkspace} perTestIsolation=${isPerTestIsolation} usePool=${usePool} workers=${workers}`
18044
17809
  );
18045
- if (hasSharedWorkspace && !usePool && requestedWorkers > 1) {
17810
+ if (hasSharedWorkspace && !usePool && workers > 1) {
18046
17811
  console.warn(
18047
- `Warning: Shared workspace requires sequential execution. Overriding workers from ${requestedWorkers} to 1.`
17812
+ [
17813
+ `Warning: This eval uses a shared workspace with ${workers} workers.`,
17814
+ "If the agent under test makes file edits, concurrent runs may corrupt each other.",
17815
+ "To limit concurrency, add this to your eval YAML:",
17816
+ "",
17817
+ " execution:",
17818
+ " workers: 1",
17819
+ "",
17820
+ "Or pass --workers 1 on the command line."
17821
+ ].join("\n")
18048
17822
  );
18049
17823
  }
18050
17824
  const limit = pLimit(workers);
@@ -18060,14 +17834,14 @@ async function runEvaluation(options) {
18060
17834
  let staticMaterialised = false;
18061
17835
  if (useStaticWorkspace && configuredStaticPath) {
18062
17836
  const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
18063
- const dirExists = await (0, import_promises31.stat)(configuredStaticPath).then(
17837
+ const dirExists = await (0, import_promises30.stat)(configuredStaticPath).then(
18064
17838
  (s) => s.isDirectory(),
18065
17839
  () => false
18066
17840
  );
18067
- const isEmpty = dirExists ? (await (0, import_promises31.readdir)(configuredStaticPath)).length === 0 : false;
17841
+ const isEmpty = dirExists ? (await (0, import_promises30.readdir)(configuredStaticPath)).length === 0 : false;
18068
17842
  if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
18069
17843
  if (!dirExists) {
18070
- await (0, import_promises31.mkdir)(configuredStaticPath, { recursive: true });
17844
+ await (0, import_promises30.mkdir)(configuredStaticPath, { recursive: true });
18071
17845
  }
18072
17846
  if (workspaceTemplate) {
18073
17847
  await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
@@ -18112,14 +17886,14 @@ async function runEvaluation(options) {
18112
17886
  }
18113
17887
  } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
18114
17888
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
18115
- await (0, import_promises31.mkdir)(sharedWorkspacePath, { recursive: true });
17889
+ await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
18116
17890
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
18117
17891
  }
18118
17892
  try {
18119
17893
  if (suiteWorkspaceFile && sharedWorkspacePath) {
18120
- const copiedWorkspaceFile = import_node_path46.default.join(sharedWorkspacePath, import_node_path46.default.basename(suiteWorkspaceFile));
17894
+ const copiedWorkspaceFile = import_node_path45.default.join(sharedWorkspacePath, import_node_path45.default.basename(suiteWorkspaceFile));
18121
17895
  try {
18122
- await (0, import_promises31.stat)(copiedWorkspaceFile);
17896
+ await (0, import_promises30.stat)(copiedWorkspaceFile);
18123
17897
  suiteWorkspaceFile = copiedWorkspaceFile;
18124
17898
  } catch {
18125
17899
  }
@@ -18227,7 +18001,7 @@ async function runEvaluation(options) {
18227
18001
  const budgetResult = {
18228
18002
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
18229
18003
  testId: evalCase.id,
18230
- dataset: evalCase.dataset,
18004
+ eval_set: evalCase.eval_set,
18231
18005
  score: 0,
18232
18006
  assertions: [],
18233
18007
  output: [],
@@ -18263,7 +18037,7 @@ async function runEvaluation(options) {
18263
18037
  const haltResult = {
18264
18038
  timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
18265
18039
  testId: evalCase.id,
18266
- dataset: evalCase.dataset,
18040
+ eval_set: evalCase.eval_set,
18267
18041
  score: 0,
18268
18042
  assertions: [],
18269
18043
  output: [],
@@ -18495,8 +18269,6 @@ async function runBatchEvaluation(options) {
18495
18269
  const promptInputs = promptInputsList[index];
18496
18270
  return {
18497
18271
  question: promptInputs.question,
18498
- guidelines: promptInputs.guidelines,
18499
- guideline_patterns: evalCase.guideline_patterns,
18500
18272
  inputFiles: evalCase.file_paths,
18501
18273
  evalCaseId: evalCase.id,
18502
18274
  metadata: {
@@ -18694,9 +18466,9 @@ async function runEvalCase(options) {
18694
18466
  );
18695
18467
  }
18696
18468
  if (caseWorkspaceFile && workspacePath) {
18697
- const copiedFile = import_node_path46.default.join(workspacePath, import_node_path46.default.basename(caseWorkspaceFile));
18469
+ const copiedFile = import_node_path45.default.join(workspacePath, import_node_path45.default.basename(caseWorkspaceFile));
18698
18470
  try {
18699
- await (0, import_promises31.stat)(copiedFile);
18471
+ await (0, import_promises30.stat)(copiedFile);
18700
18472
  caseWorkspaceFile = copiedFile;
18701
18473
  } catch {
18702
18474
  }
@@ -18704,7 +18476,7 @@ async function runEvalCase(options) {
18704
18476
  }
18705
18477
  if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
18706
18478
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
18707
- await (0, import_promises31.mkdir)(workspacePath, { recursive: true });
18479
+ await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
18708
18480
  }
18709
18481
  if (evalCase.workspace?.repos?.length && workspacePath) {
18710
18482
  const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
@@ -18754,11 +18526,11 @@ async function runEvalCase(options) {
18754
18526
  const files = evalCase.metadata.agent_skills_files;
18755
18527
  if (baseDir && files.length > 0) {
18756
18528
  for (const relPath of files) {
18757
- const srcPath = import_node_path46.default.resolve(baseDir, relPath);
18758
- const destPath = import_node_path46.default.resolve(workspacePath, relPath);
18529
+ const srcPath = import_node_path45.default.resolve(baseDir, relPath);
18530
+ const destPath = import_node_path45.default.resolve(workspacePath, relPath);
18759
18531
  try {
18760
- await (0, import_promises31.mkdir)(import_node_path46.default.dirname(destPath), { recursive: true });
18761
- await (0, import_promises31.copyFile)(srcPath, destPath);
18532
+ await (0, import_promises30.mkdir)(import_node_path45.default.dirname(destPath), { recursive: true });
18533
+ await (0, import_promises30.copyFile)(srcPath, destPath);
18762
18534
  } catch (error) {
18763
18535
  const message = error instanceof Error ? error.message : String(error);
18764
18536
  return buildErrorResult(
@@ -19204,8 +18976,7 @@ async function evaluateCandidate(options) {
19204
18976
  let lmRequest;
19205
18977
  if (isAgentProvider(provider)) {
19206
18978
  agentRequest = {
19207
- question: promptInputs.question,
19208
- guideline_paths: evalCase.guideline_paths
18979
+ question: promptInputs.question
19209
18980
  };
19210
18981
  } else {
19211
18982
  if (promptInputs.chatPrompt) {
@@ -19214,8 +18985,7 @@ async function evaluateCandidate(options) {
19214
18985
  };
19215
18986
  } else {
19216
18987
  lmRequest = {
19217
- question: promptInputs.question,
19218
- guidelines: promptInputs.guidelines
18988
+ question: promptInputs.question
19219
18989
  };
19220
18990
  }
19221
18991
  }
@@ -19229,7 +18999,7 @@ async function evaluateCandidate(options) {
19229
18999
  return {
19230
19000
  timestamp: completedAt.toISOString(),
19231
19001
  testId: evalCase.id,
19232
- dataset: evalCase.dataset,
19002
+ eval_set: evalCase.eval_set,
19233
19003
  conversationId: evalCase.conversation_id,
19234
19004
  score: score.score,
19235
19005
  assertions: score.assertions,
@@ -19377,7 +19147,7 @@ async function runEvaluatorList(options) {
19377
19147
  fileChanges,
19378
19148
  workspacePath
19379
19149
  };
19380
- const evalFileDir = evalCase.guideline_paths[0] ? import_node_path46.default.dirname(evalCase.guideline_paths[0]) : process.cwd();
19150
+ const evalFileDir = evalCase.file_paths[0] ? import_node_path45.default.dirname(evalCase.file_paths[0]) : process.cwd();
19381
19151
  const dispatchContext = {
19382
19152
  graderProvider,
19383
19153
  targetResolver,
@@ -19491,7 +19261,7 @@ function filterEvalCases(evalCases, filter) {
19491
19261
  if (!filter) {
19492
19262
  return evalCases;
19493
19263
  }
19494
- return evalCases.filter((evalCase) => import_micromatch4.default.isMatch(evalCase.id, filter));
19264
+ return evalCases.filter((evalCase) => import_micromatch3.default.isMatch(evalCase.id, filter));
19495
19265
  }
19496
19266
  function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
19497
19267
  const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
@@ -19528,8 +19298,6 @@ async function invokeProvider(provider, options) {
19528
19298
  const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
19529
19299
  return await provider.invoke({
19530
19300
  question: promptInputs.question,
19531
- guidelines: promptInputs.guidelines,
19532
- guideline_patterns: evalCase.guideline_patterns,
19533
19301
  chatPrompt: promptInputs.chatPrompt,
19534
19302
  inputFiles: evalCase.file_paths,
19535
19303
  evalCaseId: evalCase.id,
@@ -19557,21 +19325,17 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
19557
19325
  if (isAgentProvider(provider)) {
19558
19326
  agentRequest = {
19559
19327
  question: promptInputs.question,
19560
- guideline_paths: evalCase.guideline_paths,
19561
19328
  error: message
19562
19329
  };
19563
19330
  } else {
19564
19331
  if (promptInputs.chatPrompt) {
19565
19332
  lmRequest = {
19566
19333
  chat_prompt: promptInputs.chatPrompt,
19567
- guideline_paths: evalCase.guideline_paths,
19568
19334
  error: message
19569
19335
  };
19570
19336
  } else {
19571
19337
  lmRequest = {
19572
19338
  question: promptInputs.question,
19573
- guidelines: promptInputs.guidelines,
19574
- guideline_paths: evalCase.guideline_paths,
19575
19339
  error: message
19576
19340
  };
19577
19341
  }
@@ -19584,7 +19348,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
19584
19348
  return {
19585
19349
  timestamp: timestamp.toISOString(),
19586
19350
  testId: evalCase.id,
19587
- dataset: evalCase.dataset,
19351
+ eval_set: evalCase.eval_set,
19588
19352
  conversationId: evalCase.conversation_id,
19589
19353
  score: 0,
19590
19354
  assertions: [{ text: `Error: ${message}`, passed: false }],
@@ -19617,7 +19381,6 @@ function createCacheKey(provider, target, evalCase, promptInputs) {
19617
19381
  hash.update(target.name);
19618
19382
  hash.update(evalCase.id);
19619
19383
  hash.update(promptInputs.question);
19620
- hash.update(promptInputs.guidelines);
19621
19384
  hash.update(promptInputs.systemMessage ?? "");
19622
19385
  if (promptInputs.chatPrompt) {
19623
19386
  hash.update(JSON.stringify(promptInputs.chatPrompt));
@@ -19717,7 +19480,7 @@ function computeWeightedMean(entries) {
19717
19480
 
19718
19481
  // src/evaluation/evaluate.ts
19719
19482
  var import_node_fs14 = require("fs");
19720
- var import_node_path47 = __toESM(require("path"), 1);
19483
+ var import_node_path46 = __toESM(require("path"), 1);
19721
19484
 
19722
19485
  // src/evaluation/providers/function-provider.ts
19723
19486
  function createFunctionProvider(taskFn) {
@@ -19754,7 +19517,7 @@ async function evaluate(config) {
19754
19517
  }
19755
19518
  const gitRoot = await findGitRoot(process.cwd());
19756
19519
  const repoRoot = gitRoot ?? process.cwd();
19757
- const testFilePath = config.specFile ? import_node_path47.default.resolve(config.specFile) : import_node_path47.default.join(process.cwd(), "__programmatic__.yaml");
19520
+ const testFilePath = config.specFile ? import_node_path46.default.resolve(config.specFile) : import_node_path46.default.join(process.cwd(), "__programmatic__.yaml");
19758
19521
  await loadEnvHierarchy(repoRoot, testFilePath);
19759
19522
  let resolvedTarget;
19760
19523
  let taskProvider;
@@ -19820,8 +19583,6 @@ async function evaluate(config) {
19820
19583
  input_segments: inputSegments,
19821
19584
  expected_output: expectedOutput,
19822
19585
  reference_answer: expectedOutputValue,
19823
- guideline_paths: [],
19824
- guideline_patterns: [],
19825
19586
  file_paths: [],
19826
19587
  assertions: assertConfigs.length > 0 ? assertConfigs : void 0,
19827
19588
  metadata: test.metadata
@@ -19883,10 +19644,10 @@ function computeSummary(results, durationMs) {
19883
19644
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
19884
19645
  async function discoverDefaultTarget(repoRoot) {
19885
19646
  const cwd = process.cwd();
19886
- const chain = buildDirectoryChain2(import_node_path47.default.join(cwd, "_placeholder"), repoRoot);
19647
+ const chain = buildDirectoryChain2(import_node_path46.default.join(cwd, "_placeholder"), repoRoot);
19887
19648
  for (const dir of chain) {
19888
19649
  for (const candidate of TARGET_FILE_CANDIDATES) {
19889
- const targetsPath = import_node_path47.default.join(dir, candidate);
19650
+ const targetsPath = import_node_path46.default.join(dir, candidate);
19890
19651
  if (!(0, import_node_fs14.existsSync)(targetsPath)) continue;
19891
19652
  try {
19892
19653
  const definitions = await readTargetDefinitions(targetsPath);
@@ -19903,7 +19664,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
19903
19664
  const chain = buildDirectoryChain2(startPath, repoRoot);
19904
19665
  const envFiles = [];
19905
19666
  for (const dir of chain) {
19906
- const envPath = import_node_path47.default.join(dir, ".env");
19667
+ const envPath = import_node_path46.default.join(dir, ".env");
19907
19668
  if ((0, import_node_fs14.existsSync)(envPath)) envFiles.push(envPath);
19908
19669
  }
19909
19670
  for (let i = 0; i < envFiles.length; i++) {
@@ -20084,8 +19845,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
20084
19845
  }
20085
19846
 
20086
19847
  // src/evaluation/cache/response-cache.ts
20087
- var import_promises32 = require("fs/promises");
20088
- var import_node_path48 = __toESM(require("path"), 1);
19848
+ var import_promises31 = require("fs/promises");
19849
+ var import_node_path47 = __toESM(require("path"), 1);
20089
19850
  var DEFAULT_CACHE_PATH = ".agentv/cache";
20090
19851
  var ResponseCache = class {
20091
19852
  cachePath;
@@ -20095,7 +19856,7 @@ var ResponseCache = class {
20095
19856
  async get(key) {
20096
19857
  const filePath = this.keyToPath(key);
20097
19858
  try {
20098
- const data = await (0, import_promises32.readFile)(filePath, "utf8");
19859
+ const data = await (0, import_promises31.readFile)(filePath, "utf8");
20099
19860
  return JSON.parse(data);
20100
19861
  } catch {
20101
19862
  return void 0;
@@ -20103,13 +19864,13 @@ var ResponseCache = class {
20103
19864
  }
20104
19865
  async set(key, value) {
20105
19866
  const filePath = this.keyToPath(key);
20106
- const dir = import_node_path48.default.dirname(filePath);
20107
- await (0, import_promises32.mkdir)(dir, { recursive: true });
20108
- await (0, import_promises32.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
19867
+ const dir = import_node_path47.default.dirname(filePath);
19868
+ await (0, import_promises31.mkdir)(dir, { recursive: true });
19869
+ await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
20109
19870
  }
20110
19871
  keyToPath(key) {
20111
19872
  const prefix = key.slice(0, 2);
20112
- return import_node_path48.default.join(this.cachePath, prefix, `${key}.json`);
19873
+ return import_node_path47.default.join(this.cachePath, prefix, `${key}.json`);
20113
19874
  }
20114
19875
  };
20115
19876
  function shouldEnableCache(params) {
@@ -20297,7 +20058,7 @@ var OtelTraceExporter = class {
20297
20058
  rootSpan.setAttribute("gen_ai.system", "agentv");
20298
20059
  rootSpan.setAttribute("agentv.test_id", result.testId);
20299
20060
  rootSpan.setAttribute("agentv.target", result.target);
20300
- if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
20061
+ if (result.eval_set) rootSpan.setAttribute("agentv.eval_set", result.eval_set);
20301
20062
  rootSpan.setAttribute("agentv.score", result.score);
20302
20063
  if (captureContent && result.output.length > 0) {
20303
20064
  const lastMsg = result.output[result.output.length - 1];
@@ -20482,14 +20243,14 @@ var OtelStreamingObserver = class {
20482
20243
  // biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
20483
20244
  rootCtx = null;
20484
20245
  /** Create root eval span immediately (visible in backend right away) */
20485
- startEvalCase(testId, target, dataset) {
20246
+ startEvalCase(testId, target, evalSet) {
20486
20247
  const ctx = this.parentCtx ?? this.api.context.active();
20487
20248
  this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
20488
20249
  this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
20489
20250
  this.rootSpan.setAttribute("gen_ai.system", "agentv");
20490
20251
  this.rootSpan.setAttribute("agentv.test_id", testId);
20491
20252
  this.rootSpan.setAttribute("agentv.target", target);
20492
- if (dataset) this.rootSpan.setAttribute("agentv.dataset", dataset);
20253
+ if (evalSet) this.rootSpan.setAttribute("agentv.eval_set", evalSet);
20493
20254
  this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
20494
20255
  }
20495
20256
  /** Create and immediately export a tool span */
@@ -20668,6 +20429,7 @@ function createAgentKernel() {
20668
20429
  extractTargetsFromSuite,
20669
20430
  extractTargetsFromTestCase,
20670
20431
  extractTrialsConfig,
20432
+ extractWorkersFromSuite,
20671
20433
  fileExists,
20672
20434
  findGitRoot,
20673
20435
  freeformEvaluationSchema,
@@ -20682,7 +20444,6 @@ function createAgentKernel() {
20682
20444
  initializeBaseline,
20683
20445
  isAgentSkillsFormat,
20684
20446
  isEvaluatorKind,
20685
- isGuidelineFile,
20686
20447
  isJsonObject,
20687
20448
  isJsonValue,
20688
20449
  isNonEmptyString,