agentv 0.23.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -146,7 +146,7 @@ import { binary, run, subcommands as subcommands2 } from "cmd-ts";
146
146
 
147
147
  // src/commands/eval/index.ts
148
148
  import { stat as stat4 } from "node:fs/promises";
149
- import path19 from "node:path";
149
+ import path20 from "node:path";
150
150
  import {
151
151
  command,
152
152
  flag,
@@ -161,13 +161,14 @@ import fg from "fast-glob";
161
161
  // src/commands/eval/run-eval.ts
162
162
  import { constants as constants6 } from "node:fs";
163
163
  import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
164
- import path18 from "node:path";
164
+ import path19 from "node:path";
165
165
  import { pathToFileURL } from "node:url";
166
166
 
167
- // ../../packages/core/dist/chunk-B2J23S7D.js
167
+ // ../../packages/core/dist/chunk-NDEN3H2B.js
168
168
  import { constants } from "node:fs";
169
169
  import { access, readFile } from "node:fs/promises";
170
170
  import path from "node:path";
171
+ import path2 from "node:path";
171
172
 
172
173
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/external.js
173
174
  var external_exports = {};
@@ -647,8 +648,8 @@ function getErrorMap() {
647
648
 
648
649
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
649
650
  var makeIssue = (params) => {
650
- const { data, path: path26, errorMaps, issueData } = params;
651
- const fullPath = [...path26, ...issueData.path || []];
651
+ const { data, path: path27, errorMaps, issueData } = params;
652
+ const fullPath = [...path27, ...issueData.path || []];
652
653
  const fullIssue = {
653
654
  ...issueData,
654
655
  path: fullPath
@@ -764,11 +765,11 @@ var errorUtil;
764
765
 
765
766
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
766
767
  var ParseInputLazyPath = class {
767
- constructor(parent, value, path26, key2) {
768
+ constructor(parent, value, path27, key2) {
768
769
  this._cachedPath = [];
769
770
  this.parent = parent;
770
771
  this.data = value;
771
- this._path = path26;
772
+ this._path = path27;
772
773
  this._key = key2;
773
774
  }
774
775
  get path() {
@@ -4210,7 +4211,7 @@ var coerce = {
4210
4211
  };
4211
4212
  var NEVER = INVALID;
4212
4213
 
4213
- // ../../packages/core/dist/chunk-B2J23S7D.js
4214
+ // ../../packages/core/dist/chunk-NDEN3H2B.js
4214
4215
  async function fileExists(filePath) {
4215
4216
  try {
4216
4217
  await access(filePath, constants.F_OK);
@@ -4226,6 +4227,10 @@ async function readTextFile(filePath) {
4226
4227
  const content = await readFile(filePath, "utf8");
4227
4228
  return normalizeLineEndings(content);
4228
4229
  }
4230
+ async function readJsonFile(filePath) {
4231
+ const content = await readFile(filePath, "utf8");
4232
+ return JSON.parse(content);
4233
+ }
4229
4234
  async function findGitRoot(startPath) {
4230
4235
  let currentDir = path.dirname(path.resolve(startPath));
4231
4236
  const root2 = path.parse(currentDir).root;
@@ -4331,7 +4336,7 @@ var BASE_TARGET_SCHEMA = external_exports.object({
4331
4336
  judge_target: external_exports.string().optional(),
4332
4337
  workers: external_exports.number().int().min(1).optional()
4333
4338
  }).passthrough();
4334
- var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
4339
+ var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
4335
4340
  function normalizeAzureApiVersion(value) {
4336
4341
  if (!value) {
4337
4342
  return DEFAULT_AZURE_API_VERSION;
@@ -4375,7 +4380,7 @@ function resolveRetryConfig(target) {
4375
4380
  retryableStatusCodes
4376
4381
  };
4377
4382
  }
4378
- function resolveTargetDefinition(definition, env = process.env) {
4383
+ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
4379
4384
  const parsed = BASE_TARGET_SCHEMA.parse(definition);
4380
4385
  const provider = parsed.provider.toLowerCase();
4381
4386
  const providerBatching = resolveOptionalBoolean(
@@ -4448,7 +4453,7 @@ function resolveTargetDefinition(definition, env = process.env) {
4448
4453
  judgeTarget: parsed.judge_target,
4449
4454
  workers: parsed.workers,
4450
4455
  providerBatching,
4451
- config: resolveCliConfig(parsed, env)
4456
+ config: resolveCliConfig(parsed, env, evalFilePath)
4452
4457
  };
4453
4458
  default:
4454
4459
  throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
@@ -4465,7 +4470,10 @@ function resolveAzureConfig(target, env) {
4465
4470
  const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
4466
4471
  const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
4467
4472
  const version2 = normalizeAzureApiVersion(
4468
- resolveOptionalString(versionSource, env, `${target.name} api version`)
4473
+ resolveOptionalString(versionSource, env, `${target.name} api version`, {
4474
+ allowLiteral: true,
4475
+ optionalEnv: true
4476
+ })
4469
4477
  );
4470
4478
  const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
4471
4479
  const maxOutputTokens = resolveOptionalNumber(
@@ -4566,7 +4574,8 @@ function normalizeCodexLogFormat(value) {
4566
4574
  }
4567
4575
  function resolveMockConfig(target) {
4568
4576
  const response = typeof target.response === "string" ? target.response : void 0;
4569
- return { response };
4577
+ const trace2 = Array.isArray(target.trace) ? target.trace : void 0;
4578
+ return { response, trace: trace2 };
4570
4579
  }
4571
4580
  function resolveVSCodeConfig(target, env, insiders) {
4572
4581
  const workspaceTemplateEnvVar = resolveOptionalLiteralString(
@@ -4598,15 +4607,18 @@ function resolveVSCodeConfig(target, env, insiders) {
4598
4607
  workspaceTemplate
4599
4608
  };
4600
4609
  }
4601
- function resolveCliConfig(target, env) {
4610
+ function resolveCliConfig(target, env, evalFilePath) {
4602
4611
  const commandTemplateSource = target.command_template ?? target.commandTemplate;
4603
4612
  const filesFormat = resolveOptionalLiteralString(
4604
4613
  target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
4605
4614
  );
4606
- const cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
4615
+ let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
4607
4616
  allowLiteral: true,
4608
4617
  optionalEnv: true
4609
4618
  });
4619
+ if (!cwd && evalFilePath) {
4620
+ cwd = path2.dirname(path2.resolve(evalFilePath));
4621
+ }
4610
4622
  const timeoutMs = resolveTimeoutMs(
4611
4623
  target.timeout_seconds ?? target.timeoutSeconds,
4612
4624
  `${target.name} timeout`
@@ -4724,17 +4736,15 @@ function resolveOptionalString(source2, env, description, options) {
4724
4736
  if (envVarMatch) {
4725
4737
  const varName = envVarMatch[1];
4726
4738
  const envValue = env[varName];
4727
- if (envValue !== void 0) {
4728
- if (envValue.trim().length === 0) {
4729
- throw new Error(`Environment variable '${varName}' for ${description} is empty`);
4730
- }
4731
- return envValue;
4732
- }
4733
4739
  const optionalEnv = options?.optionalEnv ?? false;
4734
- if (optionalEnv) {
4735
- return void 0;
4740
+ if (envValue === void 0 || envValue.trim().length === 0) {
4741
+ if (optionalEnv) {
4742
+ return void 0;
4743
+ }
4744
+ const status = envValue === void 0 ? "is not set" : "is empty";
4745
+ throw new Error(`Environment variable '${varName}' required for ${description} ${status}`);
4736
4746
  }
4737
- throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
4747
+ return envValue;
4738
4748
  }
4739
4749
  const allowLiteral = options?.allowLiteral ?? false;
4740
4750
  if (!allowLiteral) {
@@ -4889,7 +4899,7 @@ import micromatch from "micromatch";
4889
4899
  import { parse as parse5 } from "yaml";
4890
4900
  import { constants as constants3 } from "node:fs";
4891
4901
  import { access as access3 } from "node:fs/promises";
4892
- import path12 from "node:path";
4902
+ import path13 from "node:path";
4893
4903
  import path32 from "node:path";
4894
4904
  import { readFile as readFile22 } from "node:fs/promises";
4895
4905
  import { readFile as readFile32 } from "node:fs/promises";
@@ -5985,10 +5995,10 @@ function assignProp(target, prop, value) {
5985
5995
  configurable: true
5986
5996
  });
5987
5997
  }
5988
- function getElementAtPath(obj, path26) {
5989
- if (!path26)
5998
+ function getElementAtPath(obj, path27) {
5999
+ if (!path27)
5990
6000
  return obj;
5991
- return path26.reduce((acc, key2) => acc?.[key2], obj);
6001
+ return path27.reduce((acc, key2) => acc?.[key2], obj);
5992
6002
  }
5993
6003
  function promiseAllObject(promisesObj) {
5994
6004
  const keys = Object.keys(promisesObj);
@@ -6308,11 +6318,11 @@ function aborted(x, startIndex = 0) {
6308
6318
  }
6309
6319
  return false;
6310
6320
  }
6311
- function prefixIssues(path26, issues) {
6321
+ function prefixIssues(path27, issues) {
6312
6322
  return issues.map((iss) => {
6313
6323
  var _a17;
6314
6324
  (_a17 = iss).path ?? (_a17.path = []);
6315
- iss.path.unshift(path26);
6325
+ iss.path.unshift(path27);
6316
6326
  return iss;
6317
6327
  });
6318
6328
  }
@@ -6449,7 +6459,7 @@ function treeifyError(error40, _mapper) {
6449
6459
  return issue2.message;
6450
6460
  };
6451
6461
  const result = { errors: [] };
6452
- const processError = (error41, path26 = []) => {
6462
+ const processError = (error41, path27 = []) => {
6453
6463
  var _a17, _b8;
6454
6464
  for (const issue2 of error41.issues) {
6455
6465
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6459,7 +6469,7 @@ function treeifyError(error40, _mapper) {
6459
6469
  } else if (issue2.code === "invalid_element") {
6460
6470
  processError({ issues: issue2.issues }, issue2.path);
6461
6471
  } else {
6462
- const fullpath = [...path26, ...issue2.path];
6472
+ const fullpath = [...path27, ...issue2.path];
6463
6473
  if (fullpath.length === 0) {
6464
6474
  result.errors.push(mapper(issue2));
6465
6475
  continue;
@@ -6489,9 +6499,9 @@ function treeifyError(error40, _mapper) {
6489
6499
  processError(error40);
6490
6500
  return result;
6491
6501
  }
6492
- function toDotPath(path26) {
6502
+ function toDotPath(path27) {
6493
6503
  const segs = [];
6494
- for (const seg of path26) {
6504
+ for (const seg of path27) {
6495
6505
  if (typeof seg === "number")
6496
6506
  segs.push(`[${seg}]`);
6497
6507
  else if (typeof seg === "symbol")
@@ -26044,14 +26054,14 @@ function createAzure(options = {}) {
26044
26054
  description: "Azure OpenAI resource name"
26045
26055
  });
26046
26056
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26047
- const url2 = ({ path: path26, modelId }) => {
26057
+ const url2 = ({ path: path27, modelId }) => {
26048
26058
  var _a24;
26049
26059
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26050
26060
  let fullUrl;
26051
26061
  if (options.useDeploymentBasedUrls) {
26052
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path26}`);
26062
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path27}`);
26053
26063
  } else {
26054
- fullUrl = new URL(`${baseUrlPrefix}/v1${path26}`);
26064
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path27}`);
26055
26065
  }
26056
26066
  fullUrl.searchParams.set("api-version", apiVersion);
26057
26067
  return fullUrl.toString();
@@ -32499,12 +32509,12 @@ import path102 from "node:path";
32499
32509
 
32500
32510
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/agentDispatch.js
32501
32511
  import { stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
32502
- import path10 from "node:path";
32512
+ import path11 from "node:path";
32503
32513
 
32504
32514
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/fs.js
32505
32515
  import { constants as constants2 } from "node:fs";
32506
32516
  import { access as access2, mkdir, readdir, rm, stat } from "node:fs/promises";
32507
- import path2 from "node:path";
32517
+ import path3 from "node:path";
32508
32518
  async function pathExists(target) {
32509
32519
  try {
32510
32520
  await access2(target, constants2.F_OK);
@@ -32520,7 +32530,7 @@ async function readDirEntries(target) {
32520
32530
  const entries = await readdir(target, { withFileTypes: true });
32521
32531
  return entries.map((entry) => ({
32522
32532
  name: entry.name,
32523
- absolutePath: path2.join(target, entry.name),
32533
+ absolutePath: path3.join(target, entry.name),
32524
32534
  isDirectory: entry.isDirectory()
32525
32535
  }));
32526
32536
  }
@@ -32535,9 +32545,9 @@ async function removeIfExists(target) {
32535
32545
  }
32536
32546
 
32537
32547
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/path.js
32538
- import path3 from "node:path";
32548
+ import path4 from "node:path";
32539
32549
  function pathToFileUri(filePath) {
32540
- const absolutePath = path3.isAbsolute(filePath) ? filePath : path3.resolve(filePath);
32550
+ const absolutePath = path4.isAbsolute(filePath) ? filePath : path4.resolve(filePath);
32541
32551
  const normalizedPath = absolutePath.replace(/\\/g, "/");
32542
32552
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
32543
32553
  return `file:///${normalizedPath}`;
@@ -32546,7 +32556,7 @@ function pathToFileUri(filePath) {
32546
32556
  }
32547
32557
 
32548
32558
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/promptBuilder.js
32549
- import path4 from "node:path";
32559
+ import path5 from "node:path";
32550
32560
 
32551
32561
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/template.js
32552
32562
  function renderTemplate(content, variables) {
@@ -32636,8 +32646,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
32636
32646
  });
32637
32647
  }
32638
32648
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
32639
- const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${path4.basename(file2)}`).join("\n");
32640
- const responseList = responseFiles.map((file2) => `"${path4.basename(file2)}"`).join(", ");
32649
+ const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${path5.basename(file2)}`).join("\n");
32650
+ const responseList = responseFiles.map((file2) => `"${path5.basename(file2)}"`).join(", ");
32641
32651
  return renderTemplate(templateContent, {
32642
32652
  requestFiles: requestLines,
32643
32653
  responseList
@@ -32646,7 +32656,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
32646
32656
 
32647
32657
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/responseWaiter.js
32648
32658
  import { readFile as readFile2 } from "node:fs/promises";
32649
- import path5 from "node:path";
32659
+ import path6 from "node:path";
32650
32660
 
32651
32661
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/time.js
32652
32662
  function sleep(ms) {
@@ -32695,7 +32705,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
32695
32705
  }
32696
32706
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false) {
32697
32707
  if (!silent) {
32698
- const fileList = responseFilesFinal.map((file2) => path5.basename(file2)).join(", ");
32708
+ const fileList = responseFilesFinal.map((file2) => path6.basename(file2)).join(", ");
32699
32709
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
32700
32710
  }
32701
32711
  try {
@@ -32745,17 +32755,17 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
32745
32755
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/vscodeProcess.js
32746
32756
  import { exec, spawn } from "node:child_process";
32747
32757
  import { mkdir as mkdir2, writeFile } from "node:fs/promises";
32748
- import path7 from "node:path";
32758
+ import path8 from "node:path";
32749
32759
  import { promisify } from "node:util";
32750
32760
 
32751
32761
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/constants.js
32752
32762
  import os from "node:os";
32753
- import path6 from "node:path";
32763
+ import path7 from "node:path";
32754
32764
  var DEFAULT_LOCK_NAME = "subagent.lock";
32755
32765
  var DEFAULT_ALIVE_FILENAME = ".alive";
32756
32766
  function getDefaultSubagentRoot(vscodeCmd = "code") {
32757
32767
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
32758
- return path6.join(os.homedir(), ".subagent", folder);
32768
+ return path7.join(os.homedir(), ".subagent", folder);
32759
32769
  }
32760
32770
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
32761
32771
 
@@ -32782,11 +32792,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
32782
32792
  spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
32783
32793
  return true;
32784
32794
  }
32785
- const aliveFile = path7.join(subagentDir, DEFAULT_ALIVE_FILENAME);
32795
+ const aliveFile = path8.join(subagentDir, DEFAULT_ALIVE_FILENAME);
32786
32796
  await removeIfExists(aliveFile);
32787
- const githubAgentsDir = path7.join(subagentDir, ".github", "agents");
32797
+ const githubAgentsDir = path8.join(subagentDir, ".github", "agents");
32788
32798
  await mkdir2(githubAgentsDir, { recursive: true });
32789
- const wakeupDst = path7.join(githubAgentsDir, "wakeup.md");
32799
+ const wakeupDst = path8.join(githubAgentsDir, "wakeup.md");
32790
32800
  await writeFile(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
32791
32801
  spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
32792
32802
  await sleep(100);
@@ -32796,7 +32806,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
32796
32806
  "chat",
32797
32807
  "-m",
32798
32808
  wakeupChatId,
32799
- `create a file named .alive in the ${path7.basename(subagentDir)} folder`
32809
+ `create a file named .alive in the ${path8.basename(subagentDir)} folder`
32800
32810
  ];
32801
32811
  spawn(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
32802
32812
  const start = Date.now();
@@ -32811,10 +32821,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
32811
32821
  }
32812
32822
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
32813
32823
  try {
32814
- const workspacePath = path7.join(subagentDir, `${path7.basename(subagentDir)}.code-workspace`);
32815
- const messagesDir = path7.join(subagentDir, "messages");
32824
+ const workspacePath = path8.join(subagentDir, `${path8.basename(subagentDir)}.code-workspace`);
32825
+ const messagesDir = path8.join(subagentDir, "messages");
32816
32826
  await mkdir2(messagesDir, { recursive: true });
32817
- const reqFile = path7.join(messagesDir, `${timestamp}_req.md`);
32827
+ const reqFile = path8.join(messagesDir, `${timestamp}_req.md`);
32818
32828
  await writeFile(reqFile, requestInstructions, { encoding: "utf8" });
32819
32829
  const reqUri = pathToFileUri(reqFile);
32820
32830
  const chatArgs = ["-r", "chat", "-m", chatId];
@@ -32822,8 +32832,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
32822
32832
  chatArgs.push("-a", attachment);
32823
32833
  }
32824
32834
  chatArgs.push("-a", reqFile);
32825
- chatArgs.push(`Follow instructions in [${path7.basename(reqFile)}](${reqUri})`);
32826
- const workspaceReady = await ensureWorkspaceFocused(workspacePath, path7.basename(subagentDir), subagentDir, vscodeCmd);
32835
+ chatArgs.push(`Follow instructions in [${path8.basename(reqFile)}](${reqUri})`);
32836
+ const workspaceReady = await ensureWorkspaceFocused(workspacePath, path8.basename(subagentDir), subagentDir, vscodeCmd);
32827
32837
  if (!workspaceReady) {
32828
32838
  console.error("warning: Workspace may not be fully ready");
32829
32839
  }
@@ -32837,15 +32847,15 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
32837
32847
  }
32838
32848
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
32839
32849
  try {
32840
- const workspacePath = path7.join(subagentDir, `${path7.basename(subagentDir)}.code-workspace`);
32841
- const messagesDir = path7.join(subagentDir, "messages");
32850
+ const workspacePath = path8.join(subagentDir, `${path8.basename(subagentDir)}.code-workspace`);
32851
+ const messagesDir = path8.join(subagentDir, "messages");
32842
32852
  await mkdir2(messagesDir, { recursive: true });
32843
32853
  const chatArgs = ["-r", "chat", "-m", chatId];
32844
32854
  for (const attachment of attachmentPaths) {
32845
32855
  chatArgs.push("-a", attachment);
32846
32856
  }
32847
32857
  chatArgs.push(chatInstruction);
32848
- const workspaceReady = await ensureWorkspaceFocused(workspacePath, path7.basename(subagentDir), subagentDir, vscodeCmd);
32858
+ const workspaceReady = await ensureWorkspaceFocused(workspacePath, path8.basename(subagentDir), subagentDir, vscodeCmd);
32849
32859
  if (!workspaceReady) {
32850
32860
  console.error("warning: Workspace may not be fully ready");
32851
32861
  }
@@ -32860,10 +32870,10 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
32860
32870
 
32861
32871
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/workspaceManager.js
32862
32872
  import { copyFile, mkdir as mkdir3, readFile as readFile3, readdir as readdir2, stat as stat2, writeFile as writeFile2 } from "node:fs/promises";
32863
- import path9 from "node:path";
32873
+ import path10 from "node:path";
32864
32874
 
32865
32875
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/workspace.js
32866
- import path8 from "node:path";
32876
+ import path9 from "node:path";
32867
32877
 
32868
32878
  // ../../node_modules/.bun/json5@2.2.3/node_modules/json5/dist/index.mjs
32869
32879
  var Space_Separator = /[\u1680\u2000-\u200A\u202F\u205F\u3000]/;
@@ -33966,10 +33976,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
33966
33976
  }
33967
33977
  const transformedFolders = workspace.folders.map((folder) => {
33968
33978
  const folderPath = folder.path;
33969
- if (path8.isAbsolute(folderPath)) {
33979
+ if (path9.isAbsolute(folderPath)) {
33970
33980
  return folder;
33971
33981
  }
33972
- const absolutePath = path8.resolve(templateDir, folderPath);
33982
+ const absolutePath = path9.resolve(templateDir, folderPath);
33973
33983
  return {
33974
33984
  ...folder,
33975
33985
  path: absolutePath
@@ -33991,19 +34001,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
33991
34001
  if (locationMap && typeof locationMap === "object") {
33992
34002
  const transformedMap = {};
33993
34003
  for (const [locationPath, value] of Object.entries(locationMap)) {
33994
- const isAbsolute = path8.isAbsolute(locationPath);
34004
+ const isAbsolute = path9.isAbsolute(locationPath);
33995
34005
  if (isAbsolute) {
33996
34006
  transformedMap[locationPath] = value;
33997
34007
  } else {
33998
34008
  const firstGlobIndex = locationPath.search(/[*]/);
33999
34009
  if (firstGlobIndex === -1) {
34000
- const resolvedPath = path8.resolve(templateDir, locationPath).replace(/\\/g, "/");
34010
+ const resolvedPath = path9.resolve(templateDir, locationPath).replace(/\\/g, "/");
34001
34011
  transformedMap[resolvedPath] = value;
34002
34012
  } else {
34003
34013
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
34004
34014
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
34005
34015
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
34006
- const resolvedPath = (path8.resolve(templateDir, basePath) + patternPath).replace(/\\/g, "/");
34016
+ const resolvedPath = (path9.resolve(templateDir, basePath) + patternPath).replace(/\\/g, "/");
34007
34017
  transformedMap[resolvedPath] = value;
34008
34018
  }
34009
34019
  }
@@ -34041,7 +34051,7 @@ async function findUnlockedSubagent(subagentRoot) {
34041
34051
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
34042
34052
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
34043
34053
  for (const subagent of subagents) {
34044
- const lockFile = path9.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
34054
+ const lockFile = path10.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
34045
34055
  if (!await pathExists(lockFile)) {
34046
34056
  return subagent.absolutePath;
34047
34057
  }
@@ -34051,7 +34061,7 @@ async function findUnlockedSubagent(subagentRoot) {
34051
34061
  async function copyAgentConfig(subagentDir, workspaceTemplate) {
34052
34062
  let workspaceContent;
34053
34063
  if (workspaceTemplate) {
34054
- const workspaceSrc = path9.resolve(workspaceTemplate);
34064
+ const workspaceSrc = path10.resolve(workspaceTemplate);
34055
34065
  if (!await pathExists(workspaceSrc)) {
34056
34066
  throw new Error(`workspace template not found: ${workspaceSrc}`);
34057
34067
  }
@@ -34064,37 +34074,37 @@ async function copyAgentConfig(subagentDir, workspaceTemplate) {
34064
34074
  } else {
34065
34075
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
34066
34076
  }
34067
- const workspaceName = `${path9.basename(subagentDir)}.code-workspace`;
34068
- const workspaceDst = path9.join(subagentDir, workspaceName);
34069
- const templateDir = workspaceTemplate ? path9.dirname(path9.resolve(workspaceTemplate)) : subagentDir;
34077
+ const workspaceName = `${path10.basename(subagentDir)}.code-workspace`;
34078
+ const workspaceDst = path10.join(subagentDir, workspaceName);
34079
+ const templateDir = workspaceTemplate ? path10.dirname(path10.resolve(workspaceTemplate)) : subagentDir;
34070
34080
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
34071
34081
  const transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
34072
34082
  await writeFile2(workspaceDst, transformedContent, "utf8");
34073
- const messagesDir = path9.join(subagentDir, "messages");
34083
+ const messagesDir = path10.join(subagentDir, "messages");
34074
34084
  await mkdir3(messagesDir, { recursive: true });
34075
34085
  return { workspace: workspaceDst, messagesDir };
34076
34086
  }
34077
34087
  async function createSubagentLock(subagentDir) {
34078
- const messagesDir = path9.join(subagentDir, "messages");
34088
+ const messagesDir = path10.join(subagentDir, "messages");
34079
34089
  if (await pathExists(messagesDir)) {
34080
34090
  const files = await readdir2(messagesDir);
34081
34091
  await Promise.all(files.map(async (file2) => {
34082
- const target = path9.join(messagesDir, file2);
34092
+ const target = path10.join(messagesDir, file2);
34083
34093
  await removeIfExists(target);
34084
34094
  }));
34085
34095
  }
34086
- const githubAgentsDir = path9.join(subagentDir, ".github", "agents");
34096
+ const githubAgentsDir = path10.join(subagentDir, ".github", "agents");
34087
34097
  if (await pathExists(githubAgentsDir)) {
34088
34098
  const agentFiles = await readdir2(githubAgentsDir);
34089
34099
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
34090
- await Promise.all(agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(path9.join(githubAgentsDir, file2))));
34100
+ await Promise.all(agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(path10.join(githubAgentsDir, file2))));
34091
34101
  }
34092
- const lockFile = path9.join(subagentDir, DEFAULT_LOCK_NAME);
34102
+ const lockFile = path10.join(subagentDir, DEFAULT_LOCK_NAME);
34093
34103
  await writeFile2(lockFile, "", { encoding: "utf8" });
34094
34104
  return lockFile;
34095
34105
  }
34096
34106
  async function removeSubagentLock(subagentDir) {
34097
- const lockFile = path9.join(subagentDir, DEFAULT_LOCK_NAME);
34107
+ const lockFile = path10.join(subagentDir, DEFAULT_LOCK_NAME);
34098
34108
  await removeIfExists(lockFile);
34099
34109
  }
34100
34110
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun) {
@@ -34114,9 +34124,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
34114
34124
  return 1;
34115
34125
  }
34116
34126
  if (promptFile) {
34117
- const githubAgentsDir = path9.join(subagentDir, ".github", "agents");
34127
+ const githubAgentsDir = path10.join(subagentDir, ".github", "agents");
34118
34128
  await mkdir3(githubAgentsDir, { recursive: true });
34119
- const agentFile = path9.join(githubAgentsDir, `${chatId}.md`);
34129
+ const agentFile = path10.join(githubAgentsDir, `${chatId}.md`);
34120
34130
  try {
34121
34131
  await copyFile(promptFile, agentFile);
34122
34132
  } catch (error40) {
@@ -34135,7 +34145,7 @@ async function resolvePromptFile(promptFile) {
34135
34145
  if (!promptFile) {
34136
34146
  return void 0;
34137
34147
  }
34138
- const resolvedPrompt = path10.resolve(promptFile);
34148
+ const resolvedPrompt = path11.resolve(promptFile);
34139
34149
  if (!await pathExists(resolvedPrompt)) {
34140
34150
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
34141
34151
  }
@@ -34151,7 +34161,7 @@ async function resolveAttachments(extraAttachments) {
34151
34161
  }
34152
34162
  const resolved = [];
34153
34163
  for (const attachment of extraAttachments) {
34154
- const resolvedPath = path10.resolve(attachment);
34164
+ const resolvedPath = path11.resolve(attachment);
34155
34165
  if (!await pathExists(resolvedPath)) {
34156
34166
  throw new Error(`Attachment not found: ${resolvedPath}`);
34157
34167
  }
@@ -34180,7 +34190,7 @@ async function dispatchAgentSession(options) {
34180
34190
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
34181
34191
  };
34182
34192
  }
34183
- const subagentName = path10.basename(subagentDir);
34193
+ const subagentName = path11.basename(subagentDir);
34184
34194
  const chatId = Math.random().toString(16).slice(2, 10);
34185
34195
  const preparationResult = await prepareSubagentDirectory(subagentDir, resolvedPrompt, chatId, workspaceTemplate, dryRun);
34186
34196
  if (preparationResult !== 0) {
@@ -34201,9 +34211,9 @@ async function dispatchAgentSession(options) {
34201
34211
  };
34202
34212
  }
34203
34213
  const timestamp = generateTimestamp();
34204
- const messagesDir = path10.join(subagentDir, "messages");
34205
- const responseFileTmp = path10.join(messagesDir, `${timestamp}_res.tmp.md`);
34206
- const responseFileFinal = path10.join(messagesDir, `${timestamp}_res.md`);
34214
+ const messagesDir = path11.join(subagentDir, "messages");
34215
+ const responseFileTmp = path11.join(messagesDir, `${timestamp}_res.tmp.md`);
34216
+ const responseFileFinal = path11.join(messagesDir, `${timestamp}_res.md`);
34207
34217
  const requestInstructions = createRequestPrompt(userQuery, responseFileTmp, responseFileFinal, templateContent);
34208
34218
  if (dryRun) {
34209
34219
  return {
@@ -34293,7 +34303,7 @@ async function dispatchBatchAgent(options) {
34293
34303
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
34294
34304
  };
34295
34305
  }
34296
- subagentName = path10.basename(subagentDir);
34306
+ subagentName = path11.basename(subagentDir);
34297
34307
  const chatId = Math.random().toString(16).slice(2, 10);
34298
34308
  const preparationResult = await prepareSubagentDirectory(subagentDir, resolvedPrompt, chatId, workspaceTemplate, dryRun);
34299
34309
  if (preparationResult !== 0) {
@@ -34318,11 +34328,11 @@ async function dispatchBatchAgent(options) {
34318
34328
  };
34319
34329
  }
34320
34330
  const timestamp = generateTimestamp();
34321
- const messagesDir = path10.join(subagentDir, "messages");
34322
- requestFiles = userQueries.map((_, index) => path10.join(messagesDir, `${timestamp}_${index}_req.md`));
34323
- const responseTmpFiles = userQueries.map((_, index) => path10.join(messagesDir, `${timestamp}_${index}_res.tmp.md`));
34324
- responseFilesFinal = userQueries.map((_, index) => path10.join(messagesDir, `${timestamp}_${index}_res.md`));
34325
- const orchestratorFile = path10.join(messagesDir, `${timestamp}_orchestrator.md`);
34331
+ const messagesDir = path11.join(subagentDir, "messages");
34332
+ requestFiles = userQueries.map((_, index) => path11.join(messagesDir, `${timestamp}_${index}_req.md`));
34333
+ const responseTmpFiles = userQueries.map((_, index) => path11.join(messagesDir, `${timestamp}_${index}_res.tmp.md`));
34334
+ responseFilesFinal = userQueries.map((_, index) => path11.join(messagesDir, `${timestamp}_${index}_res.md`));
34335
+ const orchestratorFile = path11.join(messagesDir, `${timestamp}_orchestrator.md`);
34326
34336
  if (!dryRun) {
34327
34337
  await Promise.all(userQueries.map((query, index) => writeFile3(requestFiles[index], createBatchRequestPrompt(query, responseTmpFiles[index], responseFilesFinal[index], batchRequestTemplateContent), { encoding: "utf8" })));
34328
34338
  const orchestratorContent = createBatchOrchestratorPrompt(requestFiles, responseFilesFinal, orchestratorTemplateContent);
@@ -34391,7 +34401,7 @@ async function dispatchBatchAgent(options) {
34391
34401
 
34392
34402
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/provision.js
34393
34403
  import { writeFile as writeFile4 } from "node:fs/promises";
34394
- import path11 from "node:path";
34404
+ import path12 from "node:path";
34395
34405
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
34396
34406
  folders: [
34397
34407
  {
@@ -34414,7 +34424,7 @@ async function provisionSubagents(options) {
34414
34424
  if (!Number.isInteger(subagents) || subagents < 1) {
34415
34425
  throw new Error("subagents must be a positive integer");
34416
34426
  }
34417
- const targetPath = path11.resolve(targetRoot);
34427
+ const targetPath = path12.resolve(targetRoot);
34418
34428
  if (!dryRun) {
34419
34429
  await ensureDir(targetPath);
34420
34430
  }
@@ -34435,7 +34445,7 @@ async function provisionSubagents(options) {
34435
34445
  continue;
34436
34446
  }
34437
34447
  highestNumber = Math.max(highestNumber, parsed);
34438
- const lockFile = path11.join(entry.absolutePath, lockName);
34448
+ const lockFile = path12.join(entry.absolutePath, lockName);
34439
34449
  const locked = await pathExists(lockFile);
34440
34450
  if (locked) {
34441
34451
  lockedSubagents.add(entry.absolutePath);
@@ -34452,11 +34462,11 @@ async function provisionSubagents(options) {
34452
34462
  break;
34453
34463
  }
34454
34464
  const subagentDir = subagent.absolutePath;
34455
- const githubAgentsDir = path11.join(subagentDir, ".github", "agents");
34456
- const lockFile = path11.join(subagentDir, lockName);
34457
- const workspaceDst = path11.join(subagentDir, `${path11.basename(subagentDir)}.code-workspace`);
34458
- const wakeupDst = path11.join(githubAgentsDir, "wakeup.md");
34459
- const subagentDst = path11.join(githubAgentsDir, "subagent.md");
34465
+ const githubAgentsDir = path12.join(subagentDir, ".github", "agents");
34466
+ const lockFile = path12.join(subagentDir, lockName);
34467
+ const workspaceDst = path12.join(subagentDir, `${path12.basename(subagentDir)}.code-workspace`);
34468
+ const wakeupDst = path12.join(githubAgentsDir, "wakeup.md");
34469
+ const subagentDst = path12.join(githubAgentsDir, "subagent.md");
34460
34470
  const isLocked = await pathExists(lockFile);
34461
34471
  if (isLocked && !force) {
34462
34472
  continue;
@@ -34494,11 +34504,11 @@ async function provisionSubagents(options) {
34494
34504
  let nextIndex = highestNumber;
34495
34505
  while (subagentsProvisioned < subagents) {
34496
34506
  nextIndex += 1;
34497
- const subagentDir = path11.join(targetPath, `subagent-${nextIndex}`);
34498
- const githubAgentsDir = path11.join(subagentDir, ".github", "agents");
34499
- const workspaceDst = path11.join(subagentDir, `${path11.basename(subagentDir)}.code-workspace`);
34500
- const wakeupDst = path11.join(githubAgentsDir, "wakeup.md");
34501
- const subagentDst = path11.join(githubAgentsDir, "subagent.md");
34507
+ const subagentDir = path12.join(targetPath, `subagent-${nextIndex}`);
34508
+ const githubAgentsDir = path12.join(subagentDir, ".github", "agents");
34509
+ const workspaceDst = path12.join(subagentDir, `${path12.basename(subagentDir)}.code-workspace`);
34510
+ const wakeupDst = path12.join(githubAgentsDir, "wakeup.md");
34511
+ const subagentDst = path12.join(githubAgentsDir, "subagent.md");
34502
34512
  if (!dryRun) {
34503
34513
  await ensureDir(subagentDir);
34504
34514
  await ensureDir(githubAgentsDir);
@@ -34562,11 +34572,47 @@ function isTestMessage(value) {
34562
34572
  }
34563
34573
  return candidate.content.every(isJsonObject);
34564
34574
  }
34565
- var EVALUATOR_KIND_VALUES = ["code_judge", "llm_judge", "rubric", "composite"];
34575
+ var EVALUATOR_KIND_VALUES = [
34576
+ "code_judge",
34577
+ "llm_judge",
34578
+ "rubric",
34579
+ "composite",
34580
+ "tool_trajectory",
34581
+ "expected_messages"
34582
+ ];
34566
34583
  var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
34567
34584
  function isEvaluatorKind(value) {
34568
34585
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
34569
34586
  }
34587
+ function isTraceEventType(value) {
34588
+ return typeof value === "string" && ["model_step", "tool_call", "tool_result", "message", "error"].includes(value);
34589
+ }
34590
+ function isTraceEvent(value) {
34591
+ if (typeof value !== "object" || value === null) {
34592
+ return false;
34593
+ }
34594
+ const candidate = value;
34595
+ return isTraceEventType(candidate.type) && typeof candidate.timestamp === "string";
34596
+ }
34597
+ function computeTraceSummary(trace2) {
34598
+ const toolCallCounts = {};
34599
+ let errorCount = 0;
34600
+ for (const event of trace2) {
34601
+ if (event.type === "tool_call" && event.name) {
34602
+ toolCallCounts[event.name] = (toolCallCounts[event.name] ?? 0) + 1;
34603
+ }
34604
+ if (event.type === "error") {
34605
+ errorCount++;
34606
+ }
34607
+ }
34608
+ const toolNames = Object.keys(toolCallCounts).sort();
34609
+ return {
34610
+ eventCount: trace2.length,
34611
+ toolNames,
34612
+ toolCallsByName: toolCallCounts,
34613
+ errorCount
34614
+ };
34615
+ }
34570
34616
  function extractCodeBlocks(segments) {
34571
34617
  const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
34572
34618
  const codeBlocks = [];
@@ -34660,15 +34706,15 @@ function resolveToAbsolutePath(candidate) {
34660
34706
  if (candidate.startsWith("file://")) {
34661
34707
  return new URL(candidate).pathname;
34662
34708
  }
34663
- return path12.resolve(candidate);
34709
+ return path13.resolve(candidate);
34664
34710
  }
34665
34711
  throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
34666
34712
  }
34667
34713
  function buildDirectoryChain2(filePath, repoRoot) {
34668
34714
  const directories = [];
34669
34715
  const seen = /* @__PURE__ */ new Set();
34670
- const boundary = path12.resolve(repoRoot);
34671
- let current = path12.resolve(path12.dirname(filePath));
34716
+ const boundary = path13.resolve(repoRoot);
34717
+ let current = path13.resolve(path13.dirname(filePath));
34672
34718
  while (current !== void 0) {
34673
34719
  if (!seen.has(current)) {
34674
34720
  directories.push(current);
@@ -34677,7 +34723,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
34677
34723
  if (current === boundary) {
34678
34724
  break;
34679
34725
  }
34680
- const parent = path12.dirname(current);
34726
+ const parent = path13.dirname(current);
34681
34727
  if (parent === current) {
34682
34728
  break;
34683
34729
  }
@@ -34691,16 +34737,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
34691
34737
  function buildSearchRoots2(evalPath, repoRoot) {
34692
34738
  const uniqueRoots = [];
34693
34739
  const addRoot = (root2) => {
34694
- const normalized = path12.resolve(root2);
34740
+ const normalized = path13.resolve(root2);
34695
34741
  if (!uniqueRoots.includes(normalized)) {
34696
34742
  uniqueRoots.push(normalized);
34697
34743
  }
34698
34744
  };
34699
- let currentDir = path12.dirname(evalPath);
34745
+ let currentDir = path13.dirname(evalPath);
34700
34746
  let reachedBoundary = false;
34701
34747
  while (!reachedBoundary) {
34702
34748
  addRoot(currentDir);
34703
- const parentDir = path12.dirname(currentDir);
34749
+ const parentDir = path13.dirname(currentDir);
34704
34750
  if (currentDir === repoRoot || parentDir === currentDir) {
34705
34751
  reachedBoundary = true;
34706
34752
  } else {
@@ -34718,16 +34764,16 @@ function trimLeadingSeparators2(value) {
34718
34764
  async function resolveFileReference2(rawValue, searchRoots) {
34719
34765
  const displayPath = trimLeadingSeparators2(rawValue);
34720
34766
  const potentialPaths = [];
34721
- if (path12.isAbsolute(rawValue)) {
34722
- potentialPaths.push(path12.normalize(rawValue));
34767
+ if (path13.isAbsolute(rawValue)) {
34768
+ potentialPaths.push(path13.normalize(rawValue));
34723
34769
  }
34724
34770
  for (const base of searchRoots) {
34725
- potentialPaths.push(path12.resolve(base, displayPath));
34771
+ potentialPaths.push(path13.resolve(base, displayPath));
34726
34772
  }
34727
34773
  const attempted = [];
34728
34774
  const seen = /* @__PURE__ */ new Set();
34729
34775
  for (const candidate of potentialPaths) {
34730
- const absoluteCandidate = path12.resolve(candidate);
34776
+ const absoluteCandidate = path13.resolve(candidate);
34731
34777
  if (seen.has(absoluteCandidate)) {
34732
34778
  continue;
34733
34779
  }
@@ -34885,6 +34931,7 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
34885
34931
  logWarning2(`Skipping code_judge evaluator '${name16}' in '${evalId}': missing script`);
34886
34932
  continue;
34887
34933
  }
34934
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
34888
34935
  const cwd = asString2(rawEvaluator.cwd);
34889
34936
  let resolvedCwd;
34890
34937
  if (cwd) {
@@ -34905,7 +34952,8 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
34905
34952
  type: "code",
34906
34953
  script,
34907
34954
  cwd,
34908
- resolvedCwd
34955
+ resolvedCwd,
34956
+ ...weight2 !== void 0 ? { weight: weight2 } : {}
34909
34957
  });
34910
34958
  continue;
34911
34959
  }
@@ -35000,14 +35048,89 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35000
35048
  ...promptPath2 ? { promptPath: promptPath2 } : {}
35001
35049
  };
35002
35050
  }
35051
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35003
35052
  evaluators.push({
35004
35053
  name: name16,
35005
35054
  type: "composite",
35006
35055
  evaluators: memberEvaluators,
35007
- aggregator
35056
+ aggregator,
35057
+ ...weight2 !== void 0 ? { weight: weight2 } : {}
35058
+ });
35059
+ continue;
35060
+ }
35061
+ if (typeValue === "expected_messages") {
35062
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35063
+ evaluators.push({
35064
+ name: name16,
35065
+ type: "expected_messages",
35066
+ ...weight2 !== void 0 ? { weight: weight2 } : {}
35008
35067
  });
35009
35068
  continue;
35010
35069
  }
35070
+ if (typeValue === "tool_trajectory") {
35071
+ const mode = asString2(rawEvaluator.mode);
35072
+ if (mode !== "any_order" && mode !== "in_order" && mode !== "exact") {
35073
+ logWarning2(
35074
+ `Skipping tool_trajectory evaluator '${name16}' in '${evalId}': invalid mode '${mode}' (must be any_order, in_order, or exact)`
35075
+ );
35076
+ continue;
35077
+ }
35078
+ const rawMinimums = rawEvaluator.minimums;
35079
+ let minimums;
35080
+ if (rawMinimums !== void 0) {
35081
+ if (!isJsonObject2(rawMinimums)) {
35082
+ logWarning2(
35083
+ `Skipping tool_trajectory evaluator '${name16}' in '${evalId}': minimums must be an object`
35084
+ );
35085
+ continue;
35086
+ }
35087
+ minimums = {};
35088
+ for (const [toolName, count] of Object.entries(rawMinimums)) {
35089
+ if (typeof count === "number" && count >= 0) {
35090
+ minimums[toolName] = count;
35091
+ }
35092
+ }
35093
+ }
35094
+ const rawExpected = rawEvaluator.expected;
35095
+ let expected;
35096
+ if (rawExpected !== void 0) {
35097
+ if (!Array.isArray(rawExpected)) {
35098
+ logWarning2(
35099
+ `Skipping tool_trajectory evaluator '${name16}' in '${evalId}': expected must be an array`
35100
+ );
35101
+ continue;
35102
+ }
35103
+ expected = [];
35104
+ for (const item of rawExpected) {
35105
+ if (isJsonObject2(item) && typeof item.tool === "string") {
35106
+ expected.push({ tool: item.tool });
35107
+ }
35108
+ }
35109
+ }
35110
+ if (mode === "any_order" && !minimums) {
35111
+ logWarning2(
35112
+ `Skipping tool_trajectory evaluator '${name16}' in '${evalId}': any_order mode requires minimums`
35113
+ );
35114
+ continue;
35115
+ }
35116
+ if ((mode === "in_order" || mode === "exact") && !expected) {
35117
+ logWarning2(
35118
+ `Skipping tool_trajectory evaluator '${name16}' in '${evalId}': ${mode} mode requires expected`
35119
+ );
35120
+ continue;
35121
+ }
35122
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35123
+ const config2 = {
35124
+ name: name16,
35125
+ type: "tool_trajectory",
35126
+ mode,
35127
+ ...minimums ? { minimums } : {},
35128
+ ...expected ? { expected } : {},
35129
+ ...weight2 !== void 0 ? { weight: weight2 } : {}
35130
+ };
35131
+ evaluators.push(config2);
35132
+ continue;
35133
+ }
35011
35134
  const prompt = asString2(rawEvaluator.prompt);
35012
35135
  let promptPath;
35013
35136
  if (prompt) {
@@ -35044,19 +35167,23 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35044
35167
  logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': no valid rubrics found`);
35045
35168
  continue;
35046
35169
  }
35170
+ const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35047
35171
  evaluators.push({
35048
35172
  name: name16,
35049
35173
  type: "llm_judge",
35050
- rubrics: parsedRubrics
35174
+ rubrics: parsedRubrics,
35175
+ ...weight2 !== void 0 ? { weight: weight2 } : {}
35051
35176
  });
35052
35177
  continue;
35053
35178
  }
35179
+ const weight = validateWeight(rawEvaluator.weight, name16, evalId);
35054
35180
  evaluators.push({
35055
35181
  name: name16,
35056
35182
  type: "llm_judge",
35057
35183
  prompt,
35058
35184
  promptPath,
35059
- ...parsedRubrics && parsedRubrics.length > 0 ? { rubrics: parsedRubrics } : {}
35185
+ ...parsedRubrics && parsedRubrics.length > 0 ? { rubrics: parsedRubrics } : {},
35186
+ ...weight !== void 0 ? { weight } : {}
35060
35187
  });
35061
35188
  }
35062
35189
  return evaluators.length > 0 ? evaluators : void 0;
@@ -35086,6 +35213,27 @@ ${detailBlock}${ANSI_RESET3}`);
35086
35213
  console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET3}`);
35087
35214
  }
35088
35215
  }
35216
+ function validateWeight(rawWeight, evaluatorName, evalId) {
35217
+ if (rawWeight === void 0) {
35218
+ return void 0;
35219
+ }
35220
+ if (typeof rawWeight !== "number") {
35221
+ throw new Error(
35222
+ `Invalid weight for evaluator '${evaluatorName}' in '${evalId}': must be a number`
35223
+ );
35224
+ }
35225
+ if (!Number.isFinite(rawWeight)) {
35226
+ throw new Error(
35227
+ `Invalid weight for evaluator '${evaluatorName}' in '${evalId}': must be finite (got ${rawWeight})`
35228
+ );
35229
+ }
35230
+ if (rawWeight < 0) {
35231
+ throw new Error(
35232
+ `Invalid weight for evaluator '${evaluatorName}' in '${evalId}': must be non-negative (got ${rawWeight})`
35233
+ );
35234
+ }
35235
+ return rawWeight;
35236
+ }
35089
35237
  var ANSI_YELLOW4 = "\x1B[33m";
35090
35238
  var ANSI_RESET4 = "\x1B[0m";
35091
35239
  async function processMessages(options) {
@@ -35257,6 +35405,67 @@ ${detailBlock}${ANSI_RESET4}`);
35257
35405
  console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
35258
35406
  }
35259
35407
  }
35408
+ async function processExpectedMessages(options) {
35409
+ const { messages, searchRoots, repoRootPath, verbose } = options;
35410
+ const segments = [];
35411
+ for (const message of messages) {
35412
+ const segment = {
35413
+ role: message.role
35414
+ };
35415
+ if (message.role === "assistant" && message.tool_calls !== void 0) {
35416
+ segment.tool_calls = message.tool_calls;
35417
+ }
35418
+ const content = message.content;
35419
+ if (typeof content === "string") {
35420
+ segment.content = content;
35421
+ } else if (Array.isArray(content)) {
35422
+ const processedContent = [];
35423
+ for (const rawSegment of content) {
35424
+ if (!isJsonObject(rawSegment)) {
35425
+ continue;
35426
+ }
35427
+ const segmentType = asString3(rawSegment.type);
35428
+ if (segmentType === "file") {
35429
+ const rawValue = asString3(rawSegment.value);
35430
+ if (!rawValue) {
35431
+ continue;
35432
+ }
35433
+ const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
35434
+ rawValue,
35435
+ searchRoots
35436
+ );
35437
+ if (!resolvedPath) {
35438
+ const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
35439
+ logWarning3(`File not found in expected_messages: ${displayPath}`, attempts);
35440
+ continue;
35441
+ }
35442
+ try {
35443
+ const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
35444
+ processedContent.push({
35445
+ type: "file",
35446
+ path: displayPath,
35447
+ text: fileContent,
35448
+ resolvedPath: path42.resolve(resolvedPath)
35449
+ });
35450
+ if (verbose) {
35451
+ console.log(` [Expected Output File] Found: ${displayPath}`);
35452
+ console.log(` Resolved to: ${resolvedPath}`);
35453
+ }
35454
+ } catch (error40) {
35455
+ logWarning3(
35456
+ `Could not read expected output file ${resolvedPath}: ${error40.message}`
35457
+ );
35458
+ }
35459
+ continue;
35460
+ }
35461
+ processedContent.push(cloneJsonObject(rawSegment));
35462
+ }
35463
+ segment.content = processedContent;
35464
+ }
35465
+ segments.push(segment);
35466
+ }
35467
+ return segments;
35468
+ }
35260
35469
  var ANSI_YELLOW5 = "\x1B[33m";
35261
35470
  var ANSI_RESET5 = "\x1B[0m";
35262
35471
  async function buildPromptInputs(testCase, mode = "lm") {
@@ -35555,12 +35764,10 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35555
35764
  messageType: "input",
35556
35765
  verbose
35557
35766
  });
35558
- const outputSegments = hasExpectedMessages ? await processMessages({
35767
+ const outputSegments = hasExpectedMessages ? await processExpectedMessages({
35559
35768
  messages: expectedMessages,
35560
35769
  searchRoots,
35561
35770
  repoRootPath,
35562
- guidelinePatterns,
35563
- messageType: "output",
35564
35771
  verbose
35565
35772
  }) : [];
35566
35773
  const codeSnippets = extractCodeBlocks(inputSegments);
@@ -36071,9 +36278,11 @@ var CliProvider = class {
36071
36278
  const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
36072
36279
  throw new Error(message);
36073
36280
  }
36074
- const responseText = await this.readAndCleanupOutputFile(outputFilePath);
36281
+ const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
36282
+ const parsed = this.parseOutputContent(responseContent);
36075
36283
  return {
36076
- text: responseText,
36284
+ text: parsed.text,
36285
+ trace: parsed.trace,
36077
36286
  raw: {
36078
36287
  command: renderedCommand,
36079
36288
  stderr: result.stderr,
@@ -36083,6 +36292,31 @@ var CliProvider = class {
36083
36292
  }
36084
36293
  };
36085
36294
  }
36295
+ /**
36296
+ * Parse output content from CLI.
36297
+ * If the content is valid JSON with a 'text' field, extract text and optional trace.
36298
+ * Otherwise, treat the entire content as plain text.
36299
+ */
36300
+ parseOutputContent(content) {
36301
+ try {
36302
+ const parsed = JSON.parse(content);
36303
+ if (typeof parsed === "object" && parsed !== null && "text" in parsed) {
36304
+ const obj = parsed;
36305
+ const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
36306
+ const trace2 = this.parseTrace(obj.trace);
36307
+ return { text: text2, trace: trace2 };
36308
+ }
36309
+ } catch {
36310
+ }
36311
+ return { text: content };
36312
+ }
36313
+ parseTrace(trace2) {
36314
+ if (!Array.isArray(trace2)) {
36315
+ return void 0;
36316
+ }
36317
+ const validEvents = trace2.filter(isTraceEvent);
36318
+ return validEvents.length > 0 ? validEvents : void 0;
36319
+ }
36086
36320
  async readAndCleanupOutputFile(filePath) {
36087
36321
  try {
36088
36322
  const content = await readTextFile(filePath);
@@ -37044,6 +37278,7 @@ var MockProvider = class {
37044
37278
  delayMs;
37045
37279
  delayMinMs;
37046
37280
  delayMaxMs;
37281
+ trace;
37047
37282
  constructor(targetName, config2) {
37048
37283
  this.id = `mock:${targetName}`;
37049
37284
  this.targetName = targetName;
@@ -37051,6 +37286,7 @@ var MockProvider = class {
37051
37286
  this.delayMs = config2.delayMs ?? 0;
37052
37287
  this.delayMinMs = config2.delayMinMs ?? 0;
37053
37288
  this.delayMaxMs = config2.delayMaxMs ?? 0;
37289
+ this.trace = config2.trace;
37054
37290
  }
37055
37291
  async invoke(request) {
37056
37292
  const delay2 = this.calculateDelay();
@@ -37062,7 +37298,8 @@ var MockProvider = class {
37062
37298
  raw: {
37063
37299
  question: request.question,
37064
37300
  guidelines: request.guidelines
37065
- }
37301
+ },
37302
+ trace: this.trace
37066
37303
  };
37067
37304
  }
37068
37305
  calculateDelay() {
@@ -37705,9 +37942,11 @@ var CodeEvaluator = class {
37705
37942
  expected_outcome: context.evalCase.expected_outcome,
37706
37943
  reference_answer: context.evalCase.reference_answer,
37707
37944
  candidate_answer: context.candidate,
37708
- guideline_paths: context.evalCase.guideline_paths,
37709
- input_files: context.evalCase.file_paths,
37710
- input_segments: context.evalCase.input_segments
37945
+ guideline_files: context.evalCase.guideline_paths,
37946
+ input_files: context.evalCase.file_paths.filter(
37947
+ (path132) => !context.evalCase.guideline_paths.includes(path132)
37948
+ ),
37949
+ input_messages: context.evalCase.input_messages
37711
37950
  },
37712
37951
  null,
37713
37952
  2
@@ -37827,6 +38066,251 @@ function substituteVariables(template, variables) {
37827
38066
  return variables[varName] ?? match;
37828
38067
  });
37829
38068
  }
38069
+ var ToolTrajectoryEvaluator = class {
38070
+ kind = "tool_trajectory";
38071
+ config;
38072
+ constructor(options) {
38073
+ this.config = options.config;
38074
+ }
38075
+ evaluate(context) {
38076
+ const { candidateTrace, candidateTraceSummary } = context;
38077
+ if (!candidateTrace || !candidateTraceSummary) {
38078
+ return {
38079
+ score: 0,
38080
+ verdict: "fail",
38081
+ hits: [],
38082
+ misses: ["No trace available for evaluation"],
38083
+ expectedAspectCount: 1
38084
+ };
38085
+ }
38086
+ switch (this.config.mode) {
38087
+ case "any_order":
38088
+ return this.evaluateAnyOrder(candidateTraceSummary);
38089
+ case "in_order":
38090
+ return this.evaluateInOrder(candidateTrace);
38091
+ case "exact":
38092
+ return this.evaluateExact(candidateTrace);
38093
+ default:
38094
+ return {
38095
+ score: 0,
38096
+ verdict: "fail",
38097
+ hits: [],
38098
+ misses: [`Unknown mode: ${this.config.mode}`],
38099
+ expectedAspectCount: 1
38100
+ };
38101
+ }
38102
+ }
38103
+ evaluateAnyOrder(summary) {
38104
+ const minimums = this.config.minimums ?? {};
38105
+ const toolNames = Object.keys(minimums);
38106
+ if (toolNames.length === 0) {
38107
+ return {
38108
+ score: 1,
38109
+ verdict: "pass",
38110
+ hits: ["No tool requirements specified"],
38111
+ misses: [],
38112
+ expectedAspectCount: 0
38113
+ };
38114
+ }
38115
+ const hits = [];
38116
+ const misses = [];
38117
+ for (const toolName of toolNames) {
38118
+ const required2 = minimums[toolName];
38119
+ const actual = summary.toolCallsByName[toolName] ?? 0;
38120
+ if (actual >= required2) {
38121
+ hits.push(`${toolName}: called ${actual} times (required \u2265${required2})`);
38122
+ } else {
38123
+ misses.push(`${toolName}: called ${actual} times (required \u2265${required2})`);
38124
+ }
38125
+ }
38126
+ const score = hits.length / toolNames.length;
38127
+ return {
38128
+ score,
38129
+ verdict: scoreToVerdict(score),
38130
+ hits,
38131
+ misses,
38132
+ expectedAspectCount: toolNames.length
38133
+ };
38134
+ }
38135
+ evaluateInOrder(trace2) {
38136
+ const expected = this.config.expected ?? [];
38137
+ if (expected.length === 0) {
38138
+ return {
38139
+ score: 1,
38140
+ verdict: "pass",
38141
+ hits: ["No tool sequence specified"],
38142
+ misses: [],
38143
+ expectedAspectCount: 0
38144
+ };
38145
+ }
38146
+ const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
38147
+ const hits = [];
38148
+ const misses = [];
38149
+ let actualIndex = 0;
38150
+ for (let i = 0; i < expected.length; i++) {
38151
+ const expectedTool = expected[i].tool;
38152
+ let found = false;
38153
+ while (actualIndex < actualToolCalls.length) {
38154
+ if (actualToolCalls[actualIndex].name === expectedTool) {
38155
+ hits.push(`Found ${expectedTool} at position ${actualIndex}`);
38156
+ actualIndex++;
38157
+ found = true;
38158
+ break;
38159
+ }
38160
+ actualIndex++;
38161
+ }
38162
+ if (!found) {
38163
+ misses.push(`Expected ${expectedTool} at position ${i}, not found in remaining trace`);
38164
+ }
38165
+ }
38166
+ const score = hits.length / expected.length;
38167
+ return {
38168
+ score,
38169
+ verdict: scoreToVerdict(score),
38170
+ hits,
38171
+ misses,
38172
+ expectedAspectCount: expected.length
38173
+ };
38174
+ }
38175
+ evaluateExact(trace2) {
38176
+ const expected = this.config.expected ?? [];
38177
+ if (expected.length === 0) {
38178
+ return {
38179
+ score: 1,
38180
+ verdict: "pass",
38181
+ hits: ["No tool sequence specified"],
38182
+ misses: [],
38183
+ expectedAspectCount: 0
38184
+ };
38185
+ }
38186
+ const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
38187
+ const hits = [];
38188
+ const misses = [];
38189
+ if (actualToolCalls.length !== expected.length) {
38190
+ misses.push(`Expected ${expected.length} tool calls, got ${actualToolCalls.length}`);
38191
+ }
38192
+ const checkLength = Math.min(expected.length, actualToolCalls.length);
38193
+ for (let i = 0; i < checkLength; i++) {
38194
+ const expectedTool = expected[i].tool;
38195
+ const actualTool = actualToolCalls[i].name;
38196
+ if (actualTool === expectedTool) {
38197
+ hits.push(`Position ${i}: ${expectedTool} \u2713`);
38198
+ } else {
38199
+ misses.push(`Position ${i}: expected ${expectedTool}, got ${actualTool}`);
38200
+ }
38201
+ }
38202
+ for (let i = checkLength; i < expected.length; i++) {
38203
+ misses.push(`Position ${i}: expected ${expected[i].tool}, got nothing`);
38204
+ }
38205
+ const score = hits.length / expected.length;
38206
+ return {
38207
+ score,
38208
+ verdict: scoreToVerdict(score),
38209
+ hits,
38210
+ misses,
38211
+ expectedAspectCount: expected.length
38212
+ };
38213
+ }
38214
+ };
38215
+ var ExpectedMessagesEvaluator = class {
38216
+ kind = "expected_messages";
38217
+ evaluate(context) {
38218
+ const { candidateTrace, evalCase } = context;
38219
+ const expectedSegments = evalCase.expected_segments;
38220
+ const expectedToolCalls = this.extractExpectedToolCalls(expectedSegments);
38221
+ if (expectedToolCalls.length === 0) {
38222
+ return {
38223
+ score: 1,
38224
+ verdict: "pass",
38225
+ hits: ["No tool_calls specified in expected_messages"],
38226
+ misses: [],
38227
+ expectedAspectCount: 1
38228
+ };
38229
+ }
38230
+ if (!candidateTrace || candidateTrace.length === 0) {
38231
+ return {
38232
+ score: 0,
38233
+ verdict: "fail",
38234
+ hits: [],
38235
+ misses: ["No trace available to validate tool_calls"],
38236
+ expectedAspectCount: expectedToolCalls.length
38237
+ };
38238
+ }
38239
+ const actualToolCalls = candidateTrace.filter((e) => e.type === "tool_call");
38240
+ return this.validateToolCalls(expectedToolCalls, actualToolCalls);
38241
+ }
38242
+ extractExpectedToolCalls(segments) {
38243
+ if (!segments) {
38244
+ return [];
38245
+ }
38246
+ const toolCalls = [];
38247
+ for (const segment of segments) {
38248
+ const role = segment.role;
38249
+ const segmentToolCalls = segment.tool_calls;
38250
+ if (role === "assistant" && Array.isArray(segmentToolCalls)) {
38251
+ for (const tc of segmentToolCalls) {
38252
+ if (typeof tc === "object" && tc !== null && typeof tc.tool === "string") {
38253
+ const toolCall = tc;
38254
+ toolCalls.push({ tool: toolCall.tool, input: toolCall.input });
38255
+ }
38256
+ }
38257
+ }
38258
+ }
38259
+ return toolCalls;
38260
+ }
38261
+ validateToolCalls(expected, actual) {
38262
+ const hits = [];
38263
+ const misses = [];
38264
+ for (let i = 0; i < expected.length; i++) {
38265
+ const expectedCall = expected[i];
38266
+ const actualCall = actual[i];
38267
+ if (!actualCall) {
38268
+ misses.push(
38269
+ `tool_calls[${i}]: expected ${expectedCall.tool}, but no more tool calls in trace`
38270
+ );
38271
+ continue;
38272
+ }
38273
+ if (actualCall.name !== expectedCall.tool) {
38274
+ misses.push(
38275
+ `tool_calls[${i}]: expected ${expectedCall.tool}, got ${actualCall.name ?? "unknown"}`
38276
+ );
38277
+ continue;
38278
+ }
38279
+ if (expectedCall.input !== void 0) {
38280
+ if (!this.deepEquals(expectedCall.input, actualCall.input)) {
38281
+ misses.push(`tool_calls[${i}]: ${expectedCall.tool} input mismatch`);
38282
+ continue;
38283
+ }
38284
+ }
38285
+ hits.push(`tool_calls[${i}]: ${expectedCall.tool} matched`);
38286
+ }
38287
+ const totalChecks = expected.length || 1;
38288
+ const score = hits.length / totalChecks;
38289
+ return {
38290
+ score,
38291
+ verdict: score >= 0.8 ? "pass" : score >= 0.6 ? "borderline" : "fail",
38292
+ hits,
38293
+ misses,
38294
+ expectedAspectCount: totalChecks
38295
+ };
38296
+ }
38297
+ deepEquals(a, b) {
38298
+ if (a === b) return true;
38299
+ if (typeof a !== typeof b) return false;
38300
+ if (typeof a !== "object" || a === null || b === null) return false;
38301
+ if (Array.isArray(a) && Array.isArray(b)) {
38302
+ if (a.length !== b.length) return false;
38303
+ return a.every((val, i) => this.deepEquals(val, b[i]));
38304
+ }
38305
+ if (Array.isArray(a) || Array.isArray(b)) return false;
38306
+ const aObj = a;
38307
+ const bObj = b;
38308
+ const aKeys = Object.keys(aObj);
38309
+ const bKeys = Object.keys(bObj);
38310
+ if (aKeys.length !== bKeys.length) return false;
38311
+ return aKeys.every((key2) => this.deepEquals(aObj[key2], bObj[key2]));
38312
+ }
38313
+ };
37830
38314
  var DEFAULT_COMPOSITE_AGGREGATOR_PROMPT = `Review the following evaluation results:
37831
38315
  {{EVALUATOR_RESULTS_JSON}}
37832
38316
 
@@ -38239,7 +38723,7 @@ async function runEvaluation(options) {
38239
38723
  if (!definition) {
38240
38724
  return void 0;
38241
38725
  }
38242
- const resolved = resolveTargetDefinition(definition, envLookup);
38726
+ const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
38243
38727
  resolvedTargetsByName.set(name16, resolved);
38244
38728
  return resolved;
38245
38729
  };
@@ -38553,6 +39037,17 @@ async function runEvalCase(options) {
38553
39037
  if (cacheKey && cache && !cachedResponse) {
38554
39038
  await cache.set(cacheKey, providerResponse);
38555
39039
  }
39040
+ let candidateTrace = providerResponse.trace;
39041
+ if (!candidateTrace && providerResponse.traceRef) {
39042
+ try {
39043
+ const rawTrace = await readJsonFile(providerResponse.traceRef);
39044
+ if (Array.isArray(rawTrace) && rawTrace.every(isTraceEvent)) {
39045
+ candidateTrace = rawTrace;
39046
+ }
39047
+ } catch {
39048
+ }
39049
+ }
39050
+ const candidateTraceSummary = candidateTrace ? computeTraceSummary(candidateTrace) : void 0;
38556
39051
  try {
38557
39052
  return await evaluateCandidate({
38558
39053
  evalCase,
@@ -38564,7 +39059,9 @@ async function runEvalCase(options) {
38564
39059
  nowFn,
38565
39060
  attempt,
38566
39061
  judgeProvider,
38567
- agentTimeoutMs
39062
+ agentTimeoutMs,
39063
+ candidateTrace,
39064
+ candidateTraceSummary
38568
39065
  });
38569
39066
  } catch (error40) {
38570
39067
  return buildErrorResult(evalCase, target.name, nowFn(), error40, promptInputs, provider);
@@ -38581,7 +39078,9 @@ async function evaluateCandidate(options) {
38581
39078
  nowFn,
38582
39079
  attempt,
38583
39080
  judgeProvider,
38584
- agentTimeoutMs
39081
+ agentTimeoutMs,
39082
+ candidateTrace,
39083
+ candidateTraceSummary
38585
39084
  } = options;
38586
39085
  const gradeTimestamp = nowFn();
38587
39086
  const { score, evaluatorResults } = await runEvaluatorsForCase({
@@ -38594,7 +39093,9 @@ async function evaluateCandidate(options) {
38594
39093
  promptInputs,
38595
39094
  now: gradeTimestamp,
38596
39095
  judgeProvider,
38597
- agentTimeoutMs
39096
+ agentTimeoutMs,
39097
+ candidateTrace,
39098
+ candidateTraceSummary
38598
39099
  });
38599
39100
  const completedAt = nowFn();
38600
39101
  let agentProviderRequest;
@@ -38607,14 +39108,12 @@ async function evaluateCandidate(options) {
38607
39108
  } else {
38608
39109
  if (promptInputs.chatPrompt) {
38609
39110
  lmProviderRequest = {
38610
- chat_prompt: promptInputs.chatPrompt,
38611
- guideline_paths: evalCase.guideline_paths
39111
+ chat_prompt: promptInputs.chatPrompt
38612
39112
  };
38613
39113
  } else {
38614
39114
  lmProviderRequest = {
38615
39115
  question: promptInputs.question,
38616
- guidelines: promptInputs.guidelines,
38617
- guideline_paths: evalCase.guideline_paths
39116
+ guidelines: promptInputs.guidelines
38618
39117
  };
38619
39118
  }
38620
39119
  }
@@ -38633,7 +39132,8 @@ async function evaluateCandidate(options) {
38633
39132
  agent_provider_request: agentProviderRequest,
38634
39133
  lm_provider_request: lmProviderRequest,
38635
39134
  evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
38636
- evaluator_results: evaluatorResults
39135
+ evaluator_results: evaluatorResults,
39136
+ trace_summary: candidateTraceSummary
38637
39137
  };
38638
39138
  }
38639
39139
  async function runEvaluatorsForCase(options) {
@@ -38647,7 +39147,9 @@ async function runEvaluatorsForCase(options) {
38647
39147
  promptInputs,
38648
39148
  now,
38649
39149
  judgeProvider,
38650
- agentTimeoutMs
39150
+ agentTimeoutMs,
39151
+ candidateTrace,
39152
+ candidateTraceSummary
38651
39153
  } = options;
38652
39154
  if (evalCase.evaluators && evalCase.evaluators.length > 0) {
38653
39155
  return runEvaluatorList({
@@ -38661,7 +39163,9 @@ async function runEvaluatorsForCase(options) {
38661
39163
  promptInputs,
38662
39164
  now,
38663
39165
  judgeProvider,
38664
- agentTimeoutMs
39166
+ agentTimeoutMs,
39167
+ candidateTrace,
39168
+ candidateTraceSummary
38665
39169
  });
38666
39170
  }
38667
39171
  const evaluatorKind = evalCase.evaluator ?? "llm_judge";
@@ -38677,7 +39181,9 @@ async function runEvaluatorsForCase(options) {
38677
39181
  attempt,
38678
39182
  promptInputs,
38679
39183
  now,
38680
- judgeProvider
39184
+ judgeProvider,
39185
+ candidateTrace,
39186
+ candidateTraceSummary
38681
39187
  });
38682
39188
  return { score };
38683
39189
  }
@@ -38693,7 +39199,9 @@ async function runEvaluatorList(options) {
38693
39199
  promptInputs,
38694
39200
  now,
38695
39201
  judgeProvider,
38696
- agentTimeoutMs
39202
+ agentTimeoutMs,
39203
+ candidateTrace,
39204
+ candidateTraceSummary
38697
39205
  } = options;
38698
39206
  const scored = [];
38699
39207
  const evaluatorResults = [];
@@ -38712,11 +39220,13 @@ async function runEvaluatorList(options) {
38712
39220
  now,
38713
39221
  judgeProvider
38714
39222
  });
38715
- scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
39223
+ const weight = evaluator.weight ?? 1;
39224
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
38716
39225
  evaluatorResults.push({
38717
39226
  name: evaluator.name,
38718
39227
  type: evaluator.type,
38719
39228
  score: score2.score,
39229
+ weight,
38720
39230
  verdict: score2.verdict,
38721
39231
  hits: score2.hits,
38722
39232
  misses: score2.misses,
@@ -38739,11 +39249,13 @@ async function runEvaluatorList(options) {
38739
39249
  promptInputs,
38740
39250
  now
38741
39251
  });
38742
- scored.push({ score: score2, name: evaluator.name, type: "code_judge" });
39252
+ const weight = evaluator.weight ?? 1;
39253
+ scored.push({ score: score2, name: evaluator.name, type: "code_judge", weight });
38743
39254
  evaluatorResults.push({
38744
39255
  name: evaluator.name,
38745
39256
  type: "code_judge",
38746
39257
  score: score2.score,
39258
+ weight,
38747
39259
  verdict: score2.verdict,
38748
39260
  hits: score2.hits,
38749
39261
  misses: score2.misses,
@@ -38769,6 +39281,12 @@ async function runEvaluatorList(options) {
38769
39281
  cwd: evalFileDir,
38770
39282
  evaluatorFactory: { create: createEvaluator }
38771
39283
  });
39284
+ case "tool_trajectory":
39285
+ return new ToolTrajectoryEvaluator({
39286
+ config: memberConfig
39287
+ });
39288
+ case "expected_messages":
39289
+ return new ExpectedMessagesEvaluator();
38772
39290
  default: {
38773
39291
  const unknownConfig = memberConfig;
38774
39292
  throw new Error(`Unsupported evaluator type in composite: ${unknownConfig.type}`);
@@ -38790,11 +39308,13 @@ async function runEvaluatorList(options) {
38790
39308
  now,
38791
39309
  judgeProvider
38792
39310
  });
38793
- scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
39311
+ const weight = evaluator.weight ?? 1;
39312
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
38794
39313
  evaluatorResults.push({
38795
39314
  name: evaluator.name,
38796
39315
  type: evaluator.type,
38797
39316
  score: score2.score,
39317
+ weight,
38798
39318
  verdict: score2.verdict,
38799
39319
  hits: score2.hits,
38800
39320
  misses: score2.misses,
@@ -38803,6 +39323,60 @@ async function runEvaluatorList(options) {
38803
39323
  evaluator_results: mapChildResults(score2.evaluatorResults)
38804
39324
  });
38805
39325
  }
39326
+ if (evaluator.type === "tool_trajectory") {
39327
+ const trajectoryEvaluator = new ToolTrajectoryEvaluator({
39328
+ config: evaluator
39329
+ });
39330
+ const score2 = trajectoryEvaluator.evaluate({
39331
+ evalCase,
39332
+ candidate,
39333
+ target,
39334
+ provider,
39335
+ attempt,
39336
+ promptInputs,
39337
+ now,
39338
+ candidateTrace,
39339
+ candidateTraceSummary
39340
+ });
39341
+ const weight = evaluator.weight ?? 1;
39342
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
39343
+ evaluatorResults.push({
39344
+ name: evaluator.name,
39345
+ type: evaluator.type,
39346
+ score: score2.score,
39347
+ weight,
39348
+ verdict: score2.verdict,
39349
+ hits: score2.hits,
39350
+ misses: score2.misses,
39351
+ reasoning: score2.reasoning
39352
+ });
39353
+ }
39354
+ if (evaluator.type === "expected_messages") {
39355
+ const expectedMessagesEvaluator = new ExpectedMessagesEvaluator();
39356
+ const score2 = expectedMessagesEvaluator.evaluate({
39357
+ evalCase,
39358
+ candidate,
39359
+ target,
39360
+ provider,
39361
+ attempt,
39362
+ promptInputs,
39363
+ now,
39364
+ candidateTrace,
39365
+ candidateTraceSummary
39366
+ });
39367
+ const weight = evaluator.weight ?? 1;
39368
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
39369
+ evaluatorResults.push({
39370
+ name: evaluator.name,
39371
+ type: evaluator.type,
39372
+ score: score2.score,
39373
+ weight,
39374
+ verdict: score2.verdict,
39375
+ hits: score2.hits,
39376
+ misses: score2.misses,
39377
+ reasoning: score2.reasoning
39378
+ });
39379
+ }
38806
39380
  } catch (error40) {
38807
39381
  const message = error40 instanceof Error ? error40.message : String(error40);
38808
39382
  const fallbackScore = {
@@ -38814,15 +39388,18 @@ async function runEvaluatorList(options) {
38814
39388
  reasoning: message
38815
39389
  };
38816
39390
  const resultType = evaluator.type === "code" ? "code_judge" : evaluator.type;
39391
+ const weight = evaluator.weight ?? 1;
38817
39392
  scored.push({
38818
39393
  score: fallbackScore,
38819
39394
  name: evaluator.name ?? "unknown",
38820
- type: resultType ?? "llm_judge"
39395
+ type: resultType ?? "llm_judge",
39396
+ weight
38821
39397
  });
38822
39398
  evaluatorResults.push({
38823
39399
  name: evaluator.name ?? "unknown",
38824
39400
  type: resultType ?? "llm_judge",
38825
39401
  score: 0,
39402
+ weight,
38826
39403
  verdict: "fail",
38827
39404
  hits: [],
38828
39405
  misses: [`Evaluator '${evaluator.name ?? "unknown"}' failed: ${message}`],
@@ -38830,7 +39407,9 @@ async function runEvaluatorList(options) {
38830
39407
  });
38831
39408
  }
38832
39409
  }
38833
- const aggregateScore = scored.length > 0 ? scored.reduce((total, entry) => total + entry.score.score, 0) / scored.length : 0;
39410
+ const aggregateScore = scored.length > 0 ? computeWeightedMean(
39411
+ scored.map((entry) => ({ score: entry.score.score, weight: entry.weight }))
39412
+ ) : 0;
38834
39413
  const hits = scored.flatMap((entry) => entry.score.hits);
38835
39414
  const misses = scored.flatMap((entry) => entry.score.misses);
38836
39415
  const expectedAspectCount = scored.reduce(
@@ -39056,6 +39635,16 @@ function mapChildResults(children) {
39056
39635
  evaluator_results: mapChildResults(child.evaluatorResults)
39057
39636
  }));
39058
39637
  }
39638
+ function computeWeightedMean(entries) {
39639
+ let totalWeight = 0;
39640
+ let weightedSum = 0;
39641
+ for (const entry of entries) {
39642
+ const weight = entry.weight ?? 1;
39643
+ totalWeight += weight;
39644
+ weightedSum += entry.score * weight;
39645
+ }
39646
+ return totalWeight > 0 ? weightedSum / totalWeight : 0;
39647
+ }
39059
39648
  var rubricItemSchema = external_exports.object({
39060
39649
  id: external_exports.string().describe("Short identifier for this rubric (e.g., clarity, completeness)"),
39061
39650
  description: external_exports.string().describe("What this rubric checks for"),
@@ -39136,13 +39725,13 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
39136
39725
  // src/commands/eval/env.ts
39137
39726
  import { constants as constants4 } from "node:fs";
39138
39727
  import { access as access4 } from "node:fs/promises";
39139
- import path13 from "node:path";
39728
+ import path14 from "node:path";
39140
39729
  import { config as loadDotenv } from "dotenv";
39141
39730
  function uniqueDirs(directories) {
39142
39731
  const seen = /* @__PURE__ */ new Set();
39143
39732
  const result = [];
39144
39733
  for (const dir of directories) {
39145
- const absolute = path13.resolve(dir);
39734
+ const absolute = path14.resolve(dir);
39146
39735
  if (seen.has(absolute)) {
39147
39736
  continue;
39148
39737
  }
@@ -39161,14 +39750,14 @@ async function fileExists4(filePath) {
39161
39750
  }
39162
39751
  function collectAncestorDirectories(start, boundary) {
39163
39752
  const directories = [];
39164
- const boundaryDir = path13.resolve(boundary);
39165
- let current = path13.resolve(start);
39753
+ const boundaryDir = path14.resolve(boundary);
39754
+ let current = path14.resolve(start);
39166
39755
  while (current !== void 0) {
39167
39756
  directories.push(current);
39168
39757
  if (current === boundaryDir) {
39169
39758
  break;
39170
39759
  }
39171
- const parent = path13.dirname(current);
39760
+ const parent = path14.dirname(current);
39172
39761
  if (parent === current) {
39173
39762
  break;
39174
39763
  }
@@ -39178,29 +39767,36 @@ function collectAncestorDirectories(start, boundary) {
39178
39767
  }
39179
39768
  async function loadEnvFromHierarchy(options) {
39180
39769
  const { testFilePath, repoRoot, verbose } = options;
39181
- const testDir = path13.dirname(path13.resolve(testFilePath));
39770
+ const testDir = path14.dirname(path14.resolve(testFilePath));
39182
39771
  const cwd = process.cwd();
39183
39772
  const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
39773
+ const envFiles = [];
39184
39774
  for (const dir of searchDirs) {
39185
- const candidate = path13.join(dir, ".env");
39775
+ const candidate = path14.join(dir, ".env");
39186
39776
  if (await fileExists4(candidate)) {
39187
- loadDotenv({ path: candidate, override: false });
39188
- if (verbose) {
39189
- console.log(`Loaded environment from: ${candidate}`);
39190
- }
39191
- return candidate;
39777
+ envFiles.push(candidate);
39192
39778
  }
39193
39779
  }
39194
- if (verbose) {
39195
- console.log("No .env file found in hierarchy");
39780
+ if (envFiles.length === 0) {
39781
+ if (verbose) {
39782
+ console.log("No .env file found in hierarchy");
39783
+ }
39784
+ return void 0;
39196
39785
  }
39197
- return void 0;
39786
+ for (let i = envFiles.length - 1; i >= 0; i--) {
39787
+ const envFile = envFiles[i];
39788
+ loadDotenv({ path: envFile, override: false });
39789
+ if (verbose) {
39790
+ console.log(`Loaded environment from: ${envFile}`);
39791
+ }
39792
+ }
39793
+ return envFiles[0];
39198
39794
  }
39199
39795
 
39200
39796
  // src/commands/eval/jsonl-writer.ts
39201
39797
  import { createWriteStream as createWriteStream2 } from "node:fs";
39202
39798
  import { mkdir as mkdir5 } from "node:fs/promises";
39203
- import path14 from "node:path";
39799
+ import path15 from "node:path";
39204
39800
  import { finished } from "node:stream/promises";
39205
39801
 
39206
39802
  // ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
@@ -39418,7 +40014,7 @@ var JsonlWriter = class _JsonlWriter {
39418
40014
  this.stream = stream;
39419
40015
  }
39420
40016
  static async open(filePath) {
39421
- await mkdir5(path14.dirname(filePath), { recursive: true });
40017
+ await mkdir5(path15.dirname(filePath), { recursive: true });
39422
40018
  const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
39423
40019
  return new _JsonlWriter(stream);
39424
40020
  }
@@ -39450,7 +40046,7 @@ var JsonlWriter = class _JsonlWriter {
39450
40046
  // src/commands/eval/yaml-writer.ts
39451
40047
  import { createWriteStream as createWriteStream3 } from "node:fs";
39452
40048
  import { mkdir as mkdir6 } from "node:fs/promises";
39453
- import path15 from "node:path";
40049
+ import path16 from "node:path";
39454
40050
  import { finished as finished2 } from "node:stream/promises";
39455
40051
  import { stringify as stringifyYaml } from "yaml";
39456
40052
  var YamlWriter = class _YamlWriter {
@@ -39462,7 +40058,7 @@ var YamlWriter = class _YamlWriter {
39462
40058
  this.stream = stream;
39463
40059
  }
39464
40060
  static async open(filePath) {
39465
- await mkdir6(path15.dirname(filePath), { recursive: true });
40061
+ await mkdir6(path16.dirname(filePath), { recursive: true });
39466
40062
  const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
39467
40063
  return new _YamlWriter(stream);
39468
40064
  }
@@ -39586,12 +40182,12 @@ var ProgressDisplay = class {
39586
40182
  }
39587
40183
  addLogPaths(paths) {
39588
40184
  const newPaths = [];
39589
- for (const path26 of paths) {
39590
- if (this.logPathSet.has(path26)) {
40185
+ for (const path27 of paths) {
40186
+ if (this.logPathSet.has(path27)) {
39591
40187
  continue;
39592
40188
  }
39593
- this.logPathSet.add(path26);
39594
- newPaths.push(path26);
40189
+ this.logPathSet.add(path27);
40190
+ newPaths.push(path27);
39595
40191
  }
39596
40192
  if (newPaths.length === 0) {
39597
40193
  return;
@@ -39607,8 +40203,8 @@ var ProgressDisplay = class {
39607
40203
  this.hasPrintedLogHeader = true;
39608
40204
  }
39609
40205
  const startIndex = this.logPaths.length - newPaths.length;
39610
- newPaths.forEach((path26, offset) => {
39611
- console.log(`${startIndex + offset + 1}. ${path26}`);
40206
+ newPaths.forEach((path27, offset) => {
40207
+ console.log(`${startIndex + offset + 1}. ${path27}`);
39612
40208
  });
39613
40209
  }
39614
40210
  scheduleRender() {
@@ -39656,8 +40252,8 @@ var ProgressDisplay = class {
39656
40252
  if (this.logPaths.length > 0) {
39657
40253
  lines.push("");
39658
40254
  lines.push("Codex CLI logs:");
39659
- this.logPaths.forEach((path26, index) => {
39660
- lines.push(`${index + 1}. ${path26}`);
40255
+ this.logPaths.forEach((path27, index) => {
40256
+ lines.push(`${index + 1}. ${path27}`);
39661
40257
  });
39662
40258
  }
39663
40259
  const rowCount = this.getRenderedRowCount(lines);
@@ -39864,7 +40460,7 @@ function formatEvaluationSummary(summary) {
39864
40460
 
39865
40461
  // ../../packages/core/dist/evaluation/validation/index.js
39866
40462
  import { readFile as readFile7 } from "node:fs/promises";
39867
- import path16 from "node:path";
40463
+ import path17 from "node:path";
39868
40464
  import { parse as parse6 } from "yaml";
39869
40465
  import { readFile as readFile23 } from "node:fs/promises";
39870
40466
  import path23 from "node:path";
@@ -39907,8 +40503,8 @@ async function detectFileType(filePath) {
39907
40503
  }
39908
40504
  }
39909
40505
  function inferFileTypeFromPath(filePath) {
39910
- const normalized = path16.normalize(filePath).replace(/\\/g, "/");
39911
- const basename = path16.basename(filePath);
40506
+ const normalized = path17.normalize(filePath).replace(/\\/g, "/");
40507
+ const basename = path17.basename(filePath);
39912
40508
  if (normalized.includes("/.agentv/")) {
39913
40509
  if (basename === "config.yaml" || basename === "config.yml") {
39914
40510
  return "config";
@@ -40053,6 +40649,26 @@ function validateMessages(messages, location, filePath, errors) {
40053
40649
  message: `Invalid role '${role}'. Must be one of: ${validRoles.join(", ")}`
40054
40650
  });
40055
40651
  }
40652
+ const toolCalls = message.tool_calls;
40653
+ if (toolCalls !== void 0) {
40654
+ if (role !== "assistant") {
40655
+ errors.push({
40656
+ severity: "error",
40657
+ filePath,
40658
+ location: `${msgLocation}.tool_calls`,
40659
+ message: "tool_calls can only be specified on assistant messages"
40660
+ });
40661
+ } else if (!Array.isArray(toolCalls)) {
40662
+ errors.push({
40663
+ severity: "error",
40664
+ filePath,
40665
+ location: `${msgLocation}.tool_calls`,
40666
+ message: "tool_calls must be an array"
40667
+ });
40668
+ } else {
40669
+ validateToolCalls(toolCalls, `${msgLocation}.tool_calls`, filePath, errors);
40670
+ }
40671
+ }
40056
40672
  const content = message.content;
40057
40673
  if (typeof content === "string") {
40058
40674
  validateContentForRoleMarkers(content, `${msgLocation}.content`, filePath, errors);
@@ -40117,6 +40733,30 @@ function validateContentForRoleMarkers(content, location, filePath, errors) {
40117
40733
  }
40118
40734
  }
40119
40735
  }
40736
+ function validateToolCalls(toolCalls, location, filePath, errors) {
40737
+ for (let i = 0; i < toolCalls.length; i++) {
40738
+ const toolCall = toolCalls[i];
40739
+ const callLocation = `${location}[${i}]`;
40740
+ if (!isObject2(toolCall)) {
40741
+ errors.push({
40742
+ severity: "error",
40743
+ filePath,
40744
+ location: callLocation,
40745
+ message: "Tool call must be an object"
40746
+ });
40747
+ continue;
40748
+ }
40749
+ const tool2 = toolCall.tool;
40750
+ if (typeof tool2 !== "string" || tool2.trim().length === 0) {
40751
+ errors.push({
40752
+ severity: "error",
40753
+ filePath,
40754
+ location: `${callLocation}.tool`,
40755
+ message: "Missing or invalid 'tool' field (must be a non-empty string)"
40756
+ });
40757
+ }
40758
+ }
40759
+ }
40120
40760
  function isObject22(value) {
40121
40761
  return typeof value === "object" && value !== null && !Array.isArray(value);
40122
40762
  }
@@ -40212,7 +40852,9 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
40212
40852
  "response",
40213
40853
  "delayMs",
40214
40854
  "delayMinMs",
40215
- "delayMaxMs"
40855
+ "delayMaxMs",
40856
+ "trace"
40857
+ // For testing tool_trajectory evaluator
40216
40858
  ]);
40217
40859
  var CLI_SETTINGS = /* @__PURE__ */ new Set([
40218
40860
  ...COMMON_SETTINGS,
@@ -40735,12 +41377,12 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
40735
41377
  // src/utils/targets.ts
40736
41378
  import { constants as constants5 } from "node:fs";
40737
41379
  import { access as access5 } from "node:fs/promises";
40738
- import path17 from "node:path";
41380
+ import path18 from "node:path";
40739
41381
  var TARGET_FILE_CANDIDATES = [
40740
41382
  "targets.yaml",
40741
41383
  "targets.yml",
40742
- path17.join(".agentv", "targets.yaml"),
40743
- path17.join(".agentv", "targets.yml")
41384
+ path18.join(".agentv", "targets.yaml"),
41385
+ path18.join(".agentv", "targets.yml")
40744
41386
  ];
40745
41387
  async function fileExists5(filePath) {
40746
41388
  try {
@@ -40753,12 +41395,12 @@ async function fileExists5(filePath) {
40753
41395
  async function discoverTargetsFile(options) {
40754
41396
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
40755
41397
  if (explicitPath) {
40756
- const resolvedExplicit = path17.resolve(explicitPath);
41398
+ const resolvedExplicit = path18.resolve(explicitPath);
40757
41399
  if (await fileExists5(resolvedExplicit)) {
40758
41400
  return resolvedExplicit;
40759
41401
  }
40760
41402
  for (const candidate of TARGET_FILE_CANDIDATES) {
40761
- const nested = path17.join(resolvedExplicit, candidate);
41403
+ const nested = path18.join(resolvedExplicit, candidate);
40762
41404
  if (await fileExists5(nested)) {
40763
41405
  return nested;
40764
41406
  }
@@ -40766,13 +41408,13 @@ async function discoverTargetsFile(options) {
40766
41408
  throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
40767
41409
  }
40768
41410
  const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
40769
- const resolvedCwd = path17.resolve(cwd);
41411
+ const resolvedCwd = path18.resolve(cwd);
40770
41412
  if (!directories.includes(resolvedCwd)) {
40771
41413
  directories.push(resolvedCwd);
40772
41414
  }
40773
41415
  for (const directory of directories) {
40774
41416
  for (const candidate of TARGET_FILE_CANDIDATES) {
40775
- const fullPath = path17.join(directory, candidate);
41417
+ const fullPath = path18.join(directory, candidate);
40776
41418
  if (await fileExists5(fullPath)) {
40777
41419
  return fullPath;
40778
41420
  }
@@ -40881,7 +41523,7 @@ Errors in ${targetsFilePath}:`);
40881
41523
  };
40882
41524
  }
40883
41525
  try {
40884
- const resolvedTarget = resolveTargetDefinition(targetDefinition, env);
41526
+ const resolvedTarget = resolveTargetDefinition(targetDefinition, env, testFilePath);
40885
41527
  return {
40886
41528
  definitions,
40887
41529
  resolvedTarget,
@@ -40938,7 +41580,9 @@ function normalizeOptions(rawOptions) {
40938
41580
  maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
40939
41581
  cache: normalizeBoolean(rawOptions.cache),
40940
41582
  verbose: normalizeBoolean(rawOptions.verbose),
40941
- dumpPrompts: rawOptions.dumpPrompts
41583
+ dumpPrompts: rawOptions.dumpPrompts,
41584
+ dumpTraces: normalizeBoolean(rawOptions.dumpTraces),
41585
+ includeTrace: normalizeBoolean(rawOptions.includeTrace)
40942
41586
  };
40943
41587
  }
40944
41588
  async function ensureFileExists(filePath, description) {
@@ -40949,15 +41593,15 @@ async function ensureFileExists(filePath, description) {
40949
41593
  }
40950
41594
  }
40951
41595
  async function findRepoRoot(start) {
40952
- const fallback = path18.resolve(start);
41596
+ const fallback = path19.resolve(start);
40953
41597
  let current = fallback;
40954
41598
  while (current !== void 0) {
40955
- const candidate = path18.join(current, ".git");
41599
+ const candidate = path19.join(current, ".git");
40956
41600
  try {
40957
41601
  await access6(candidate, constants6.F_OK);
40958
41602
  return current;
40959
41603
  } catch {
40960
- const parent = path18.dirname(current);
41604
+ const parent = path19.dirname(current);
40961
41605
  if (parent === current) {
40962
41606
  break;
40963
41607
  }
@@ -40970,16 +41614,16 @@ function buildDefaultOutputPath(cwd, format) {
40970
41614
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
40971
41615
  const baseName = "eval";
40972
41616
  const extension = getDefaultExtension(format);
40973
- return path18.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
41617
+ return path19.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
40974
41618
  }
40975
41619
  function resolvePromptDirectory(option4, cwd) {
40976
41620
  if (option4 === void 0) {
40977
41621
  return void 0;
40978
41622
  }
40979
41623
  if (typeof option4 === "string" && option4.trim().length > 0) {
40980
- return path18.resolve(cwd, option4);
41624
+ return path19.resolve(cwd, option4);
40981
41625
  }
40982
- return path18.join(cwd, ".agentv", "prompts");
41626
+ return path19.join(cwd, ".agentv", "prompts");
40983
41627
  }
40984
41628
  function createEvaluationCache() {
40985
41629
  const store = /* @__PURE__ */ new Map();
@@ -41004,7 +41648,7 @@ function createProgressReporter(maxWorkers) {
41004
41648
  };
41005
41649
  }
41006
41650
  function makeEvalKey(testFilePath, evalId) {
41007
- return `${path18.resolve(testFilePath)}::${evalId}`;
41651
+ return `${path19.resolve(testFilePath)}::${evalId}`;
41008
41652
  }
41009
41653
  function createDisplayIdTracker() {
41010
41654
  const map2 = /* @__PURE__ */ new Map();
@@ -41108,10 +41752,6 @@ async function runSingleEvalFile(params) {
41108
41752
  );
41109
41753
  resolvedWorkers = 1;
41110
41754
  }
41111
- if (options.verbose) {
41112
- const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
41113
- console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
41114
- }
41115
41755
  if (isVSCodeProvider && !options.dryRun) {
41116
41756
  await ensureVSCodeSubagents({
41117
41757
  kind: resolvedTargetSelection.resolvedTarget.kind,
@@ -41164,7 +41804,7 @@ async function runEvalCommand(input) {
41164
41804
  if (options.verbose) {
41165
41805
  console.log(`Repository root: ${repoRoot}`);
41166
41806
  }
41167
- const outputPath = options.outPath ? path18.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
41807
+ const outputPath = options.outPath ? path19.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
41168
41808
  console.log(`Output path: ${outputPath}`);
41169
41809
  const outputWriter = await createOutputWriter(outputPath, options.format);
41170
41810
  const cache = options.cache ? createEvaluationCache() : void 0;
@@ -41172,7 +41812,7 @@ async function runEvalCommand(input) {
41172
41812
  const allResults = [];
41173
41813
  let lastPromptDumpDir;
41174
41814
  const seenEvalCases = /* @__PURE__ */ new Set();
41175
- const resolvedTestFiles = input.testFiles.map((file2) => path18.resolve(file2));
41815
+ const resolvedTestFiles = input.testFiles.map((file2) => path19.resolve(file2));
41176
41816
  const displayIdTracker = createDisplayIdTracker();
41177
41817
  const totalWorkers = options.workers ?? DEFAULT_WORKERS;
41178
41818
  const fileConcurrency = Math.min(
@@ -41268,7 +41908,7 @@ async function resolveEvaluationRunner() {
41268
41908
  if (!overridePath) {
41269
41909
  return runEvaluation;
41270
41910
  }
41271
- const resolved = path18.isAbsolute(overridePath) ? overridePath : path18.resolve(process.cwd(), overridePath);
41911
+ const resolved = path19.isAbsolute(overridePath) ? overridePath : path19.resolve(process.cwd(), overridePath);
41272
41912
  const moduleUrl = pathToFileURL(resolved).href;
41273
41913
  const mod = await import(moduleUrl);
41274
41914
  const candidate = mod.runEvaluation;
@@ -41369,6 +42009,14 @@ var evalCommand = command({
41369
42009
  type: optional2(string4),
41370
42010
  long: "dump-prompts",
41371
42011
  description: "Directory path for persisting prompt payloads for debugging"
42012
+ }),
42013
+ dumpTraces: flag({
42014
+ long: "dump-traces",
42015
+ description: "Write trace files to .agentv/traces/"
42016
+ }),
42017
+ includeTrace: flag({
42018
+ long: "include-trace",
42019
+ description: "Include full trace in result output (verbose)"
41372
42020
  })
41373
42021
  },
41374
42022
  handler: async (args) => {
@@ -41389,7 +42037,9 @@ var evalCommand = command({
41389
42037
  maxRetries: args.maxRetries,
41390
42038
  cache: args.cache,
41391
42039
  verbose: args.verbose,
41392
- dumpPrompts
42040
+ dumpPrompts,
42041
+ dumpTraces: args.dumpTraces,
42042
+ includeTrace: args.includeTrace
41393
42043
  };
41394
42044
  await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
41395
42045
  }
@@ -41402,7 +42052,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
41402
42052
  const unmatched = [];
41403
42053
  const results = /* @__PURE__ */ new Set();
41404
42054
  for (const pattern of normalizedInputs) {
41405
- const candidatePath = path19.isAbsolute(pattern) ? path19.normalize(pattern) : path19.resolve(cwd, pattern);
42055
+ const candidatePath = path20.isAbsolute(pattern) ? path20.normalize(pattern) : path20.resolve(cwd, pattern);
41406
42056
  try {
41407
42057
  const stats = await stat4(candidatePath);
41408
42058
  if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
@@ -41426,7 +42076,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
41426
42076
  continue;
41427
42077
  }
41428
42078
  for (const filePath of yamlMatches) {
41429
- results.add(path19.normalize(filePath));
42079
+ results.add(path20.normalize(filePath));
41430
42080
  }
41431
42081
  }
41432
42082
  if (unmatched.length > 0) {
@@ -41446,7 +42096,7 @@ import { command as command2, flag as flag2, option as option2, optional as opti
41446
42096
 
41447
42097
  // src/commands/generate/rubrics.ts
41448
42098
  import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
41449
- import path20 from "node:path";
42099
+ import path21 from "node:path";
41450
42100
  import { pathToFileURL as pathToFileURL2 } from "node:url";
41451
42101
  import { isMap, isSeq, parseDocument } from "yaml";
41452
42102
  function isJsonObject3(value) {
@@ -41458,7 +42108,7 @@ function asString6(value) {
41458
42108
  async function loadRubricGenerator() {
41459
42109
  const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
41460
42110
  if (customGenerator) {
41461
- const generatorPath = path20.resolve(customGenerator);
42111
+ const generatorPath = path21.resolve(customGenerator);
41462
42112
  const generatorUrl = pathToFileURL2(generatorPath).href;
41463
42113
  const module = await import(generatorUrl);
41464
42114
  return module.generateRubrics;
@@ -41468,7 +42118,7 @@ async function loadRubricGenerator() {
41468
42118
  async function generateRubricsCommand(options) {
41469
42119
  const { file: file2, target: targetOverride, verbose } = options;
41470
42120
  console.log(`Generating rubrics for: ${file2}`);
41471
- const absolutePath = path20.resolve(file2);
42121
+ const absolutePath = path21.resolve(file2);
41472
42122
  const content = await readFile8(absolutePath, "utf8");
41473
42123
  const doc = parseDocument(content);
41474
42124
  const parsed = doc.toJSON();
@@ -41629,13 +42279,13 @@ var generateCommand = subcommands({
41629
42279
 
41630
42280
  // src/commands/init/index.ts
41631
42281
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
41632
- import path24 from "node:path";
42282
+ import path25 from "node:path";
41633
42283
  import * as readline from "node:readline/promises";
41634
42284
  import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
41635
42285
 
41636
42286
  // src/templates/index.ts
41637
42287
  import { readFileSync, readdirSync, statSync } from "node:fs";
41638
- import path21 from "node:path";
42288
+ import path24 from "node:path";
41639
42289
  import { fileURLToPath } from "node:url";
41640
42290
  function getGithubTemplates() {
41641
42291
  return getTemplatesFromDir(".github");
@@ -41647,12 +42297,12 @@ function getClaudeTemplates() {
41647
42297
  return getTemplatesFromDir(".claude");
41648
42298
  }
41649
42299
  function getTemplatesFromDir(subdir) {
41650
- const currentDir = path21.dirname(fileURLToPath(import.meta.url));
42300
+ const currentDir = path24.dirname(fileURLToPath(import.meta.url));
41651
42301
  let templatesDir;
41652
- if (currentDir.includes(`${path21.sep}dist`)) {
41653
- templatesDir = path21.join(currentDir, "templates", subdir);
42302
+ if (currentDir.includes(`${path24.sep}dist`)) {
42303
+ templatesDir = path24.join(currentDir, "templates", subdir);
41654
42304
  } else {
41655
- templatesDir = path21.join(currentDir, subdir);
42305
+ templatesDir = path24.join(currentDir, subdir);
41656
42306
  }
41657
42307
  return readTemplatesRecursively(templatesDir, "");
41658
42308
  }
@@ -41660,15 +42310,15 @@ function readTemplatesRecursively(dir, relativePath) {
41660
42310
  const templates = [];
41661
42311
  const entries = readdirSync(dir);
41662
42312
  for (const entry of entries) {
41663
- const fullPath = path21.join(dir, entry);
42313
+ const fullPath = path24.join(dir, entry);
41664
42314
  const stat6 = statSync(fullPath);
41665
- const entryRelativePath = relativePath ? path21.join(relativePath, entry) : entry;
42315
+ const entryRelativePath = relativePath ? path24.join(relativePath, entry) : entry;
41666
42316
  if (stat6.isDirectory()) {
41667
42317
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
41668
42318
  } else {
41669
42319
  const content = readFileSync(fullPath, "utf-8");
41670
42320
  templates.push({
41671
- path: entryRelativePath.split(path21.sep).join("/"),
42321
+ path: entryRelativePath.split(path24.sep).join("/"),
41672
42322
  // Normalize to forward slashes
41673
42323
  content
41674
42324
  });
@@ -41691,10 +42341,10 @@ async function promptYesNo(message) {
41691
42341
  }
41692
42342
  }
41693
42343
  async function initCommand(options = {}) {
41694
- const targetPath = path24.resolve(options.targetPath ?? ".");
41695
- const githubDir = path24.join(targetPath, ".github");
41696
- const agentvDir = path24.join(targetPath, ".agentv");
41697
- const claudeDir = path24.join(targetPath, ".claude");
42344
+ const targetPath = path25.resolve(options.targetPath ?? ".");
42345
+ const githubDir = path25.join(targetPath, ".github");
42346
+ const agentvDir = path25.join(targetPath, ".agentv");
42347
+ const claudeDir = path25.join(targetPath, ".claude");
41698
42348
  const githubTemplates = getGithubTemplates();
41699
42349
  const agentvTemplates = getAgentvTemplates();
41700
42350
  const claudeTemplates = getClaudeTemplates();
@@ -41702,32 +42352,32 @@ async function initCommand(options = {}) {
41702
42352
  const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
41703
42353
  const existingFiles = [];
41704
42354
  if (envTemplate) {
41705
- const envFilePath = path24.join(targetPath, ".env.template");
42355
+ const envFilePath = path25.join(targetPath, ".env.template");
41706
42356
  if (existsSync(envFilePath)) {
41707
42357
  existingFiles.push(".env.template");
41708
42358
  }
41709
42359
  }
41710
42360
  if (existsSync(githubDir)) {
41711
42361
  for (const template of githubTemplates) {
41712
- const targetFilePath = path24.join(githubDir, template.path);
42362
+ const targetFilePath = path25.join(githubDir, template.path);
41713
42363
  if (existsSync(targetFilePath)) {
41714
- existingFiles.push(path24.relative(targetPath, targetFilePath));
42364
+ existingFiles.push(path25.relative(targetPath, targetFilePath));
41715
42365
  }
41716
42366
  }
41717
42367
  }
41718
42368
  if (existsSync(agentvDir)) {
41719
42369
  for (const template of otherAgentvTemplates) {
41720
- const targetFilePath = path24.join(agentvDir, template.path);
42370
+ const targetFilePath = path25.join(agentvDir, template.path);
41721
42371
  if (existsSync(targetFilePath)) {
41722
- existingFiles.push(path24.relative(targetPath, targetFilePath));
42372
+ existingFiles.push(path25.relative(targetPath, targetFilePath));
41723
42373
  }
41724
42374
  }
41725
42375
  }
41726
42376
  if (existsSync(claudeDir)) {
41727
42377
  for (const template of claudeTemplates) {
41728
- const targetFilePath = path24.join(claudeDir, template.path);
42378
+ const targetFilePath = path25.join(claudeDir, template.path);
41729
42379
  if (existsSync(targetFilePath)) {
41730
- existingFiles.push(path24.relative(targetPath, targetFilePath));
42380
+ existingFiles.push(path25.relative(targetPath, targetFilePath));
41731
42381
  }
41732
42382
  }
41733
42383
  }
@@ -41754,36 +42404,36 @@ async function initCommand(options = {}) {
41754
42404
  mkdirSync(claudeDir, { recursive: true });
41755
42405
  }
41756
42406
  if (envTemplate) {
41757
- const envFilePath = path24.join(targetPath, ".env.template");
42407
+ const envFilePath = path25.join(targetPath, ".env.template");
41758
42408
  writeFileSync(envFilePath, envTemplate.content, "utf-8");
41759
42409
  console.log("Created .env.template");
41760
42410
  }
41761
42411
  for (const template of githubTemplates) {
41762
- const targetFilePath = path24.join(githubDir, template.path);
41763
- const targetDirPath = path24.dirname(targetFilePath);
42412
+ const targetFilePath = path25.join(githubDir, template.path);
42413
+ const targetDirPath = path25.dirname(targetFilePath);
41764
42414
  if (!existsSync(targetDirPath)) {
41765
42415
  mkdirSync(targetDirPath, { recursive: true });
41766
42416
  }
41767
42417
  writeFileSync(targetFilePath, template.content, "utf-8");
41768
- console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
42418
+ console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
41769
42419
  }
41770
42420
  for (const template of otherAgentvTemplates) {
41771
- const targetFilePath = path24.join(agentvDir, template.path);
41772
- const targetDirPath = path24.dirname(targetFilePath);
42421
+ const targetFilePath = path25.join(agentvDir, template.path);
42422
+ const targetDirPath = path25.dirname(targetFilePath);
41773
42423
  if (!existsSync(targetDirPath)) {
41774
42424
  mkdirSync(targetDirPath, { recursive: true });
41775
42425
  }
41776
42426
  writeFileSync(targetFilePath, template.content, "utf-8");
41777
- console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
42427
+ console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
41778
42428
  }
41779
42429
  for (const template of claudeTemplates) {
41780
- const targetFilePath = path24.join(claudeDir, template.path);
41781
- const targetDirPath = path24.dirname(targetFilePath);
42430
+ const targetFilePath = path25.join(claudeDir, template.path);
42431
+ const targetDirPath = path25.dirname(targetFilePath);
41782
42432
  if (!existsSync(targetDirPath)) {
41783
42433
  mkdirSync(targetDirPath, { recursive: true });
41784
42434
  }
41785
42435
  writeFileSync(targetFilePath, template.content, "utf-8");
41786
- console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
42436
+ console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
41787
42437
  }
41788
42438
  console.log("\nAgentV initialized successfully!");
41789
42439
  console.log("\nFiles installed to root:");
@@ -41791,17 +42441,17 @@ async function initCommand(options = {}) {
41791
42441
  console.log(" - .env.template");
41792
42442
  }
41793
42443
  console.log(`
41794
- Files installed to ${path24.relative(targetPath, githubDir)}:`);
42444
+ Files installed to ${path25.relative(targetPath, githubDir)}:`);
41795
42445
  for (const t of githubTemplates) {
41796
42446
  console.log(` - ${t.path}`);
41797
42447
  }
41798
42448
  console.log(`
41799
- Files installed to ${path24.relative(targetPath, agentvDir)}:`);
42449
+ Files installed to ${path25.relative(targetPath, agentvDir)}:`);
41800
42450
  for (const t of otherAgentvTemplates) {
41801
42451
  console.log(` - ${t.path}`);
41802
42452
  }
41803
42453
  console.log(`
41804
- Files installed to ${path24.relative(targetPath, claudeDir)}:`);
42454
+ Files installed to ${path25.relative(targetPath, claudeDir)}:`);
41805
42455
  for (const t of claudeTemplates) {
41806
42456
  console.log(` - ${t.path}`);
41807
42457
  }
@@ -41916,7 +42566,7 @@ function isTTY2() {
41916
42566
  // src/commands/validate/validate-files.ts
41917
42567
  import { constants as constants7 } from "node:fs";
41918
42568
  import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
41919
- import path25 from "node:path";
42569
+ import path26 from "node:path";
41920
42570
  async function validateFiles(paths) {
41921
42571
  const filePaths = await expandPaths(paths);
41922
42572
  const results = [];
@@ -41934,7 +42584,7 @@ async function validateFiles(paths) {
41934
42584
  };
41935
42585
  }
41936
42586
  async function validateSingleFile(filePath) {
41937
- const absolutePath = path25.resolve(filePath);
42587
+ const absolutePath = path26.resolve(filePath);
41938
42588
  const fileType = await detectFileType(absolutePath);
41939
42589
  let result;
41940
42590
  if (fileType === "eval") {
@@ -41959,7 +42609,7 @@ async function validateSingleFile(filePath) {
41959
42609
  async function expandPaths(paths) {
41960
42610
  const expanded = [];
41961
42611
  for (const inputPath of paths) {
41962
- const absolutePath = path25.resolve(inputPath);
42612
+ const absolutePath = path26.resolve(inputPath);
41963
42613
  try {
41964
42614
  await access7(absolutePath, constants7.F_OK);
41965
42615
  } catch {
@@ -41983,7 +42633,7 @@ async function findYamlFiles(dirPath) {
41983
42633
  try {
41984
42634
  const entries = await readdir3(dirPath, { withFileTypes: true });
41985
42635
  for (const entry of entries) {
41986
- const fullPath = path25.join(dirPath, entry.name);
42636
+ const fullPath = path26.join(dirPath, entry.name);
41987
42637
  if (entry.isDirectory()) {
41988
42638
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
41989
42639
  continue;
@@ -42000,7 +42650,7 @@ async function findYamlFiles(dirPath) {
42000
42650
  return results;
42001
42651
  }
42002
42652
  function isYamlFile(filePath) {
42003
- const ext = path25.extname(filePath).toLowerCase();
42653
+ const ext = path26.extname(filePath).toLowerCase();
42004
42654
  return ext === ".yaml" || ext === ".yml";
42005
42655
  }
42006
42656
 
@@ -42058,4 +42708,4 @@ export {
42058
42708
  app,
42059
42709
  runCli
42060
42710
  };
42061
- //# sourceMappingURL=chunk-4T62HFF4.js.map
42711
+ //# sourceMappingURL=chunk-6ZM7WVSC.js.map